44#include "EST_TDeque.h"
48#include "rxp/XML_Parser.h"
50#include "ling_class_init.h"
52#if defined(ESTLIBDIRC)
53# define __STRINGIZE(X) #X
54# define ESTLIBDIR __STRINGIZE(ESTLIBDIRC)
58static EST_Regex simpleIDRegex(
"[^#]*#id(\\([-a-z0-9]+\\))");
59static EST_Regex rangeIDRegex(
"[^#]*#id(\\([a-z]*\\)\\([0-9]*\\)\\(-\\([0-9]+\\)\\)*).*id(\\([a-z]*\\)\\([0-9]*\\)\\(-\\([0-9]+\\)\\)*)");
60static EST_Regex featureDefRegex(
"\\([^:]*\\):\\(.*\\)");
73class GenXML_Parse_State
94 GenXML_Parse_State() : contents(100) {}
145void EST_GenXML::class_init(
void)
147 ling_class_init::use();
149 pclass =
new GenXML_Parser_Class();
151 printf(
"Register estlib in genxml %s\n", ESTLIBDIR
"/\\1.dtd");
154 pclass->register_id(
"//CSTR EST//DTD \\(.*\\)//[A-Z]*",
155 ESTLIBDIR
"/\\1.dtd");
156 pclass->register_id(
"//CSTR EST//ENTITIES \\(.*\\)//[A-Z]*",
157 ESTLIBDIR
"/\\1.ent");
163 EST_GenXML::pclass->register_id(
pattern, result);
168 EST_GenXML::pclass->registered_ids(list);
173 return EST_GenXML::pclass->try_and_open(
ent);
177EST_read_status EST_GenXML::read_xml(
FILE *
file,
183 (
void)print_attributes;
184 GenXML_Parse_State state;
191 parser->track_context(TRUE);
205 if (state->rel!=NULL && name == state->relName)
208 state->rel = state->utt->create_relation(state->relName=name);
218 state->contents.add_item(
id, c);
223 if (c->relations.
present(state->relName))
230static EST_String make_new_id(
const char *root)
246 val = attributes.
val(
"id");
247#if defined(EST_DEBUGGING)
252 else if (attributes.
present(
"href"))
254 val = attributes.
val(
"href");
255 int starts[EST_Regex_max_subexpressions];
256 int ends[EST_Regex_max_subexpressions];
258 if (val.matches(simpleIDRegex, 0,
starts,
ends))
261#if defined(EST_DEBUGGING)
266 else if (val.matches(rangeIDRegex, 0,
starts,
ends))
275#if defined(EST_DEBUGGING)
320 for(
int i=
n1; i<=
n2; i++)
366 EST_warning(
"element with bad ID or HREF '%s'", (
const char *)val);
369 ids.append(make_new_id(
"n"));
382 (
const char *)
them->
k,
383 (
const char *)
them->
v);
394 GenXML_Parse_State *state = (GenXML_Parse_State *)data;
397 state->open_depth=-1;
398 state->rel_start_depth=-1;
399 state->depth_stack.clear();
419 int starts[EST_Regex_max_subexpressions];
420 int ends[EST_Regex_max_subexpressions];
422 int n = split(
defs, names, MAX_FEATS, feat_sep);
423 for(
int i=0; i<n; i++)
443 printf(
"on %s got %s(%s)=%s\n", name,
460 GenXML_Parse_State *state = (GenXML_Parse_State *)data;
467 if (state->utt != NULL
469 proccess_features(name, val, attributes, state->utt->f);
472 if (state->rel != NULL
474 proccess_features(name, val, attributes, state->rel->f);
485 EST_warning(
"%s\nNo feature '%s' to name relation\n", get_error(p), (
const char *)val);
490 ensure_relation(state, relName);
491 state->rel_start_depth=state->depth;
495 printf(
"start of relation depth=%d name=%s type=%s\n", state->depth, (
const char *)relName, state->linear?
"linear":
"tree");
498 else if ((state->rel_start_depth >= 0 &&
504 printf(
"push depth=%d name=%s ig=%s\n", state->depth, name, (
const char *)
ig);
507 ensure_relation(state, val);
509 state->depth_stack.push(state->open_depth);
510 state->open_depth=state->depth;
516 extract_ids(attributes,
ids);
519 ids.append(state->id);
521 switch (
ids.length())
546 cont->
f.
set(
"id",
id);
551 if (state->current == NULL)
552 item = state->rel->append();
554 item = state->current->insert_after();
555 else if (state->current == NULL)
556 if (state->parent == NULL)
557 item = state->rel->append();
559 item = state->parent->append_daughter();
561 if (state->parent == NULL)
562 item = state->current->insert_after();
564 item = state->parent->append_daughter();
566 item->set_contents(cont);
575 bool embed = (attributes.
val(
"estExpansion") ==
"embed");
578 state->id=make_new_id(
"e");
579 element_open(c, p, data, name, attributes);
591 element_close(c, p, data, name);
596 element_open(c, p, data, name, attributes);
601 element_close(c, p, data, name);
607 if (state->parent!=NULL)
608 state->contentAttr = attributes.
val(
"estContentFeature");
611 printf(
"\t current=%s parent=%s contA=%s\n",
612 (
const char *)state->current->name(),
613 (
const char *)state->parent->name(),
614 (
const char *)state->contentAttr);
631 GenXML_Parse_State *state = (GenXML_Parse_State *)data;
634 element_open(c, p, data, name, attributes);
635 element_close(c, p, data, name);
645 GenXML_Parse_State *state = (GenXML_Parse_State *)data;
650 if (state->depth == state->rel_start_depth )
653 printf(
"end of relation depth=%d name=%s\n", state->depth, name);
655 state->rel_start_depth=-1;
659 state->depth == state->open_depth)
662 printf(
"pop depth=%d name=%s\n", state->depth, name);
664 state->current = state->parent;
665 state->parent=parent(state->parent);
666 state->open_depth = state->depth_stack.pop();
668 printf(
"\t current=%s parent=%s\n",
669 (
const char *)state->current->name(),
670 (
const char *)state->parent->name());
686 GenXML_Parse_State *state = (GenXML_Parse_State *)data;
690 state->parent->set(state->contentAttr,
chars);
693 printf(
"GEN XML Parser [pcdata[%s]] %d\n",
chars, state->depth);
707 printf(
"GEN XML Parser [cdata[%s]] %d\n",
chars, state->depth);
718 GenXML_Parse_State *state = (GenXML_Parse_State *)data;
734 EST_error(
"GEN XML Parser %s", get_error(p));
742#if defined(INSTANTIATE_TEMPLATES)
744#include "../base_class/EST_THash.cc"
void set(const EST_String &name, int ival)
EST_Features f
General features for this item.
static const EST_String Empty
Constant empty string.
static EST_String cat(const EST_String s1, const EST_String s2=Empty, const EST_String s3=Empty, const EST_String s4=Empty, const EST_String s5=Empty, const EST_String s6=Empty, const EST_String s7=Empty, const EST_String s8=Empty, const EST_String s9=Empty)
int matches(const char *e, int pos=0) const
Exactly match this string?
EST_String at(int from, int len=0) const
Return part at position.
V & val(const K &key, int &found) const
int present(const K &key) const
Does the key have an entry?
const int present(const K &rkey) const
Returns true if key is present.
void clear()
remove everything in utterance
virtual void element(XML_Parser_Class &c, XML_Parser &p, void *data, const char *name, XML_Attribute_List &attributes)
virtual void error(XML_Parser_Class &c, XML_Parser &p, void *data)
virtual void processing(XML_Parser_Class &c, XML_Parser &p, void *data, const char *instruction)
virtual void document_close(XML_Parser_Class &c, XML_Parser &p, void *data)
virtual void pcdata(XML_Parser_Class &c, XML_Parser &p, void *data, const char *chars)
virtual void document_open(XML_Parser_Class &c, XML_Parser &p, void *data)
virtual void element_open(XML_Parser_Class &c, XML_Parser &p, void *data, const char *name, XML_Attribute_List &attributes)
virtual void cdata(XML_Parser_Class &c, XML_Parser &p, void *data, const char *chars)
virtual void element_close(XML_Parser_Class &c, XML_Parser &p, void *data, const char *name)
void error(XML_Parser_Class &c, XML_Parser &p, void *data, EST_String message)