Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
wfst_regex.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1997 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author : Alan W Black */
34/* Date : November 1997 */
35/*-----------------------------------------------------------------------*/
36/* */
37/* WFST functions for building from REGEXs */
38/* */
39/*=======================================================================*/
40#include <iostream>
41#include "EST_cutils.h"
42#include "EST_WFST.h"
43
45{
46 // Choice of either disjunct
47 LISP l;
48 int intermed;
49
50 if (disjunctions == NIL)
51 cerr << "WFST construct: disjunct is nil\n";
52
53 for (l=disjunctions; l != NIL; l=cdr(l))
54 {
55 // Can't go directly to end as other transitions could be added there
56 intermed = add_state(wfst_nonfinal);
57 build_wfst(start,intermed,car(l));
59 }
60}
61
63{
64 // require each conjunct in turn
65 int intermed,lstart;
66 LISP l;
67
68 if (conjunctions == NIL)
69 cerr << "WFST build: conjunct is nil\n";
70
71 lstart = start;
72 for (l=conjunctions; cdr(l) != NIL; l=cdr(l))
73 {
74 intermed = add_state(wfst_nonfinal);
77 }
78 build_wfst(lstart,end,car(l));
79
80}
81
82int EST_WFST::terminal(LISP l)
83{
84 // true, l is a terminal in a regex
85
86 if (atomp(l))
87 return TRUE;
88 else
89 return FALSE;
90}
91
92int EST_WFST::operator_or(LISP l)
93{
94 if (l && !consp(l) && (streq("or",get_c_string(l))))
95 return TRUE;
96 else
97 return FALSE;
98}
99
100int EST_WFST::operator_plus(LISP l)
101{
102 if (l && !consp(l) && (streq("+",get_c_string(l))))
103 return TRUE;
104 else
105 return FALSE;
106}
107
108int EST_WFST::operator_not(LISP l)
109{
110 if (l && !consp(l) && (streq("not",get_c_string(l))))
111 return TRUE;
112 else
113 return FALSE;
114}
115
116int EST_WFST::operator_star(LISP l)
117{
118 if (l && !consp(l) && (streq("*",get_c_string(l))))
119 return TRUE;
120 else
121 return FALSE;
122}
123
124int EST_WFST::operator_optional(LISP l)
125{
126 if (l && !consp(l) && (streq("?",get_c_string(l))))
127 return TRUE;
128 else
129 return FALSE;
130}
131
132int EST_WFST::operator_and(LISP l)
133{
134 if (l && !consp(l) && (streq("and",get_c_string(l))))
135 return TRUE;
136 else
137 return FALSE;
138}
139
140void EST_WFST::build_wfst(int start, int end,LISP regex)
141{
142 if (terminal(regex))
143 {
144 // unpack the label
145 int in,out;
146 EST_String s_name(get_c_string(regex));
147 if (s_name.contains("/"))
148 {
149 in = p_in_symbols.name(s_name.before("/"));
150 out = p_out_symbols.name(s_name.after("/"));
151 }
152 else
153 {
154 in = p_in_symbols.name(get_c_string(regex));
155 out = p_out_symbols.name(get_c_string(regex));
156 }
157 if ((in == -1) || (out == -1))
158 cerr << "WFST_build: symbol " << get_c_string(regex) <<
159 " not in alphabet\n";
160 p_states[start]->add_transition(0,end,in,out);
161 }
162 else if (operator_or(car(regex)))
163 build_or_transition(start,end,cdr(regex));
164 else if (operator_plus(car(regex)))
165 {
166 build_wfst(start,end,cdr(regex));
167 build_wfst(end,end,cdr(regex));
168 }
169 else if (operator_star(car(regex)))
170 {
171 build_wfst(start,start,cdr(regex));
172 build_wfst(start,end,epsilon_label());
173 }
174 else if (operator_not(car(regex)))
175 {
176 int errstate = add_state(wfst_error);
178 }
179 else if (operator_optional(car(regex)))
180 {
181 build_wfst(start,end,cdr(regex));
182 build_wfst(start,end,epsilon_label());
183 }
184 else if (operator_and(car(regex)))
185 build_and_transition(start,end,cdr(regex));
186 else
187 build_and_transition(start,end,regex); // default is and
188}
189
190void EST_WFST::build_from_regex(LISP inalpha, LISP outalpha, LISP regex)
191{
192
193 clear();
194
195 cout << "building from regex: " << endl;
196 pprint(regex);
197
198 init(inalpha,outalpha); // alphabets
199 if (regex == NIL)
200 p_start_state = add_state(wfst_final); // empty WFST
201 else
202 {
203 p_start_state = add_state(wfst_nonfinal);
204 int end = add_state(wfst_final);
205 build_wfst(p_start_state,end,regex);
206 }
207}
208
const EST_String & name(const int n) const
The name given the index.
int add_state(enum wfst_state_type state_type)
Add a new state, returns new name.
Definition EST_WFST.cc:652
void init(int init_num_states=10)
Clear with (estimation of number of states required)
Definition EST_WFST.cc:145
void clear()
clear removing existing states if any
Definition EST_WFST.cc:115
void build_wfst(int start, int end, LISP regex)
Basic regex constructor.
LISP epsilon_label() const
LISP for on epsilon symbols.
Definition EST_WFST.h:216
void build_or_transition(int start, int end, LISP disjunctions)
Basic disjunction constructor.
Definition wfst_regex.cc:44
void build_and_transition(int start, int end, LISP conjunctions)
Basic conjunction constructor.
Definition wfst_regex.cc:62