44#include "EST_SCFG_Chart.h"
46EST_SCFG_Chart_Edge::EST_SCFG_Chart_Edge()
50EST_SCFG_Chart_Edge::EST_SCFG_Chart_Edge(
double prob,
60EST_SCFG_Chart_Edge::~EST_SCFG_Chart_Edge()
64LISP EST_SCFG_Chart::print_edge(
int start,
int end,
int p,
73 else if (start+1 == end)
77 cons(flocons(
e->prob()),
89 d1 = edges[start][
e->pos()][
e->d1()];
90 d2 = edges[
e->pos()][end][
e->d2()];
93 cons(print_edge(start,
e->pos(),
e->d1(),d1),
94 cons(print_edge(
e->pos(),end,
e->d2(),d2),
97 cons(flocons(
e->prob()),
105EST_SCFG_Chart::EST_SCFG_Chart()
111 grammar_local = TRUE;
115EST_SCFG_Chart::~EST_SCFG_Chart()
128 grammar_local = FALSE;
152 for (n_vertices=1,p=s; p !=
e; p=inext(p))
156 for (n=0,p=s; p !=
e; p=inext(p),n++)
161 cerr <<
"SCFG_Chart: unknown terminal symbol \"" <<
162 p->f(name).string() <<
"\"" <<
endl;
169void EST_SCFG_Chart::delete_edge_table()
173 if (wfst == 0)
return;
175 for (i=0; i < n_vertices; i++)
178 for (
j=0;
j < n_vertices;
j++)
181 if (edges[i][
j][k] != emptyedge)
182 delete edges[i][
j][k];
183 delete [] edges[i][
j];
196void EST_SCFG_Chart::setup_edge_table()
205 for (i=0; i < n_vertices; i++)
209 for (
j=0;
j < n_vertices;
j++)
212 for (k=0; k <
nt; k++)
218double EST_SCFG_Chart::find_best_tree_cal(
int start,
int end,
int p)
229 edges[start][end][p] =
231 wfst[start]->d1(),0,-1);
233 edges[start][end][p] = emptyedge;
239 double s=0,
t_prob,left,right;
243 for (
q=0;
q <
nt;
q++)
244 for (r=0; r <
nt; r++)
249 for (
j=start+1;
j < end;
j++)
251 left=find_best_tree(start,
j,
q);
254 right = find_best_tree(
j,end,r);
270 edges[start][end][p] =
273 edges[start][end][p] = emptyedge;
281 if (n_vertices - 1 > 0)
282 find_best_tree(0,n_vertices-1,grammar->distinguished_symbol());
291 top = edges[0][n_vertices-1][grammar->distinguished_symbol()];
296 return print_edge(0,n_vertices-1,grammar->distinguished_symbol(),top);
323 cerr <<
"SCFG_Chart: extract_parse, number of items in link stream " <<
324 " different from those in parse tree" <<
endl;
331 top = edges[0][n_vertices-1][grammar->distinguished_symbol()];
339 extract_edge(0,n_vertices-1,grammar->distinguished_symbol(),top,
342 if ((
force) && (!daughter1(
ss)))
343 extract_forced_parse(0,n_vertices-1,
ss,w);
348void EST_SCFG_Chart::extract_forced_parse(
int start,
int end,
356 s->append_daughter(w);
357 s->set_name(grammar->
nonterminal(grammar->distinguished_symbol()));
362 extract_forced_parse(start,start+1,s->append_daughter(),w);
364 st->set_name(grammar->
nonterminal(grammar->distinguished_symbol()));
367 extract_forced_parse(start+1,end,
st,
nw);
371void EST_SCFG_Chart::extract_edge(
int start,
int end,
int p,
382 else if (start+1 == end)
385 s->append_daughter((*
word));
387 s->
set(
"prob",(
float)
e->prob());
396 d1 = edges[start][
e->pos()][
e->d1()];
397 d2 = edges[
e->pos()][end][
e->d2()];
400 s->append_daughter();
401 s->append_daughter();
403 extract_edge(start,
e->pos(),
e->d1(),d1,daughter1(s),
word);
404 extract_edge(
e->pos(),end,
e->d2(),d2,daughter2(s),
word);
407 s->
set(
"prob",(
float)
e->prob());
419 for (w=
sent; w != NIL; w=cdr(w))
425 word->set_name(get_c_string(car(car(w))));
426 if (consp(car(cdr(car(w)))))
427 for (f=car(cdr(car(w))); f != NIL; f=cdr(f))
429 if (FLONUMP(car(cdr(car(f)))))
430 word->set(get_c_string(car(car(f))),
431 get_c_float(car(cdr(car(f)))));
433 word->set(get_c_string(car(car(f))),
434 get_c_string(car(cdr(car(f)))));
437 word->set(
"name",get_c_string(car(cdr(car(w)))));
440 word->set(
"name",get_c_string(car(w)));
452 chart.set_grammar_rules(grammar);
467 chart.set_grammar_rules(grammar);
469 EST_SCFG_chart_load_relation(
words,
string);
472 parse =
chart.find_parse();
484 chart.set_grammar_rules(grammar);
486 EST_SCFG_chart_load_relation(
words,
string);
489 parse =
chart.find_parse();
494LISP scfg_bracketing_only(
LISP parse)
496 if (consp(siod_nth(4,parse)))
500 for (d=cdr(cdr(cdr(cdr(parse)))),
ds=NIL; d != NIL; d=cdr(d))
501 ds = cons(scfg_bracketing_only(car(d)),
ds);
505 return siod_nth(4,parse);
521 for (c=0; c < corpus.
length(); c++)
525 parse = scfg_parse(
flat,*
this);
542 cout <<
"cross bracketing " <<
cb.mean()*100 <<
" (" <<
failed <<
544 "% fully consistent from " << corpus.
length()
545 <<
" sentences)" <<
endl;
555 if (ref.length() != test.length())
557 EST_error(
"bracket_crossing: sentences of different lengths");
560 for (i=0; i < ref.length(); i++)
561 for (
j=i+1;
j <= ref.length();
j++)
void set(const EST_String &name, int ival)
int pos(void)
Postion, 0 1 or 2, where 0 is empty, 1 is incomplete 2 is complete.
double prob(void)
Edge probability.
int d1()
(Non)terminal of daughter 1
int d2()
(Non)terminal of daughter 2
void extract_parse(EST_Relation *syn, EST_Relation *word, int force=0)
Extract parse tree and add it to syn linking leafs to word.
void setup_wfst(EST_Relation *s, const EST_String &name="name")
void parse()
Parses the loaded WFST with the loaded grammar.
LISP find_parse()
Return the parse in full LISP form.
void set_grammar_rules(LISP r)
Initialize from LISP rules set.
void test_crossbrackets()
EST_String nonterminal(int p) const
Convert nonterminal index to string form.
int num_nonterminals() const
Number of nonterminals.
double prob_B(int p, int q, int r) const
The rule probability of given binary rule.
EST_String terminal(int m) const
Convert terminal index to string form.
void set_rules(LISP rules)
Set (or reset) rules from external source after construction.
double prob_U(int p, int m) const
The rule probability of given unary rule.
INLINE int length() const
number of items in vector.
INLINE const T & a_no_check(int n) const
read-only const access operator: without bounds checking
int valid(int i, int k) const
If a bracketing from i to k is valid in string.