Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
EST_String.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Authors : Alan W Black (awb@cstr.ed.ac.uk) */
34 /* Date : January, February 1997 */
35 /* -------------------------------------------------------------------- */
36 /* */
37 /* A non-GNU implementation of a EST_String class to use with non-G++ */
38 /* compilers. */
39 /* */
40 /* Note this is not a full implementation of libg++'s EST_String class */
41 /* just the bits we need */
42 /* */
43 /*************************************************************************/
44
45
46#include <iostream>
47#include <cstring>
48#include <cstdio>
49#include <cctype>
50#include "EST_String.h"
51// #include "EST_error.h"
52#include "string_version.h"
53#include "EST_math.h"
54
55extern "C" {
56#include "regexp.h"
57}
58
59const char *EST_String::version = "CSTR String Class " STRING_VERSION " " STRING_DATE;
60
62
63EST_String EST_String_nullString = "";
64
65struct subst {
67 char *s;
68 int slen;
69} ;
70
71#if !__GSUB_REENTRANT__
72static struct subst *substitutions=NULL;
73int num_substitutions=0;
74#endif
75
76
77 /********************************************************************\
78 * *
79 * Locate is the basic utility method behind many of the *
80 * manipulations, it finds something in a EST_String, returns a *
81 * success or failure flag and sets start and end to where it was. *
82 * *
83 \********************************************************************/
84
85int EST_String::locate(const char *s, int len, int from, int &start, int &end) const
86{
87 CHECK_STRING_ARG(s);
88
89 const char *sub=NULL;
90
91 if (!s)
92 return 0;
93
94 if (from < 0 && -from < size)
95 {
96 int endpos=size+from+1;
97 int p=0;
98 const char *nextsub;
99
100 while ((nextsub=strstr(str()+p, s)))
101 {
102 p=nextsub-str()+1;
103 if (p > endpos)
104 break;
105 sub=nextsub;
106 }
107 }
108 else if (from>=0 && from <= size)
109 sub= strstr(str()+from, s);
110
111 if (sub != NULL)
112 {
113 start = sub-str();
114 end = start + len;
115 return 1;
116 }
117 else
118 {
119 return 0;
120 }
121
122}
123
124int EST_String::locate(EST_Regex &ex, int from, int &start, int &end, int *starts, int *ends) const
125{
127
128 if (from < 0 && -from < size)
129 {
130 int endpos=size+from+1;
131 int p=0;
132 int found=0;
133
134 while (ex.run(str(), p, match_start, match_end, starts, ends))
135 {
136 found++;
137 start=match_start;
138 end=match_end;
139 p = match_start+1;
140 if (p > endpos)
141 break;
142 }
143 return found >0;
144 }
145 else if (from >=0 && from <= size)
146 {
147 if (ex.run(str(), from, match_start, match_end, starts, ends))
148 {
149 start = match_start;
150 end=match_end;
151 return 1;
152 }
153 else
154 return 0;
155 }
156 else
157 return 0;
158}
159
160int EST_String::extract(const char *s, int len, int pos, int &start, int &end) const
161{
162 CHECK_STRING_ARG(s);
163
164 if (!s)
165 return 0;
166
167 if (pos < 0)
168 return locate(s, len, 0, start, end);
169
170 if (pos <= size-len && memcmp(str()+pos, s, len)==0)
171 {
172 start = pos;
173 end = pos + len;
174 return 1;
175 }
176 else
177 return 0;
178}
179
180int EST_String::extract(EST_Regex &ex, int pos, int &start, int &end) const
181{
183
184 if (pos < 0)
185 return locate(ex, 0, start, end);
186
187 if (pos < size && ex.run(str(), pos, match_start, match_end) && match_start == pos)
188 {
189 start = match_start;
190 end = match_end;
191 return 1;
192 }
193 else
194 return 0;
195}
196
197EST_String EST_String::chop_internal(int from, int len, EST_chop_direction mode) const
198{
199 int start, end;
200
201 if (from < 0)
202 {
203 start = size+from;
204 }
205 else
206 {
207 start = from;
208 }
209
210 end=start+len;
211
212 if (start >=0 && end <=size && size > 0)
213 switch (mode)
214 {
215 case Chop_Before:
216 return EST_String(str(), size, 0, start); break;
217 case Chop_At:
218 return EST_String(str(), size, start, end-start); break;
219 case Chop_After:
220 return EST_String(str(), size, end, -1);
221 }
222 return EST_String();
223
224}
225
226EST_String EST_String::chop_internal(const char *it, int len, int from, EST_chop_direction mode) const
227{
228 CHECK_STRING_ARG(it);
229
230 int start, end;
231
232 if (it && locate(it, len, from, start, end))
233 switch (mode)
234 {
235 case Chop_Before:
236 return EST_String(str(), size, 0, start); break;
237 case Chop_At:
238 return EST_String(str(), size, start, end-start); break;
239 case Chop_After:
240 return EST_String(str(), size, end, -1);
241 }
242 return EST_String();
243
244}
245
246EST_String EST_String::chop_internal (EST_Regex &it, int from, EST_chop_direction mode) const
247{
248 int start=0, end=0;
249
250 if (locate(it, from, start, end))
251 switch (mode)
252 {
253 case Chop_Before:
254 return EST_String(str(), size, 0, start); break;
255 case Chop_At:
256 return EST_String(str(), size, start, end-start); break;
257 case Chop_After:
258 return EST_String(str(), size, end, -1);
259 }
260 return EST_String();
261
262}
263
264
265int EST_String::gsub_internal (const char *os, int olength, const char *s, int length)
266{
267 CHECK_STRING_ARG(os);
268 CHECK_STRING_ARG(s);
269
270 int pos=0, n=0, change=0;
272
273 const char *from;
274 char *to;
275
276#if __GSUB_REENTRANT__
277 struct subst {
279 } *substitutions=NULL;
280
281 int num_substitutions=0;
282#endif
283
284 if (s && os && size > 0 && *os != '\0')
285 {
286 {
287 int start, end;
288 while (locate(os, olength, pos, start, end))
289 {
290 if (num_substitutions <= n)
291 substitutions = wrealloc(substitutions, struct subst, (num_substitutions +=10));
292
293 substitutions[n].start = start;
294 substitutions[n].end = end;
295
296 change += length - (end-start);
297
298 n++;
299 pos=end;
300 }
301 }
302
303 // dubious dealings with the inside of the string
304
305 from = (const char *)memory;
306
307 if (change > 0)
308 {
309 // Spurious braces make temporary ref to chunk go away
310 {new_memory = chunk_allocate(size+change+1);}
311 to = new_memory;
312 }
313 else
314 {
315 cp_make_updatable(memory, size);
316 to = memory;
317 }
318
319 int i, at=0;
320 char *p=to;
321
322 for(i=0; i<n; i++)
323 {
324 int start = substitutions[i].start;
325 int end = substitutions[i].end;
326 memcpy(p, from+at, start-at);
327 p += start-at;
328 memcpy(p, s, length);
329 p += length;
330 at=end;
331 }
332 if (p != from+at)
333 memcpy(p, from+at, size-at);
334
335 p += size-at;
336 *p = '\0';
337
338 if (change > 0)
339 memory = new_memory;
340
341
342 size += change;
343 }
344
345 // cout << "gsub n=" << memory.count() << "\n";
346
347#if __GSUB_REENTRANT__
348 if (substitutions)
349 wfree(substitutions);
350#endif
351
352 return n;
353
354}
355
356int EST_String::gsub_internal (EST_Regex &ex, const char *s, int length)
357{
358
359 int bracket_num=-1;
360
361 if (s==NULL)
363
364 int pos=0, n=0, change=0;
366
367 const char *from;
368 char *to;
369
370#if __GSUB_REENTRANT__
371 struct subst *substitutions=NULL;
372
373 int num_substitutions=0;
374#endif
375
376 // printf("match '%s'\n", (const char *)(*this));
377
378 if (size > 0)
379 {
380 {
381 int start, starts[EST_Regex_max_subexpressions], ends[EST_Regex_max_subexpressions], mlen;
382 while ((start = search(ex, mlen, pos, starts, ends))>=0)
383 {
384 // printf("match %d-%d, %d-%d, %d-%d\n", start, start+mlen, starts[0], ends[0], starts[1], ends[1]);
385 if (num_substitutions <= n)
386 substitutions = wrealloc(substitutions, struct subst, (num_substitutions +=10));
387
388 substitutions[n].start = start;
389 substitutions[n].end = start+mlen;
390
391 if (s)
392 change += length - mlen;
393 else
394 {
395 int slen = ends[bracket_num]-starts[bracket_num];
396 change += slen - mlen;
397 substitutions[n].slen = slen;
398 substitutions[n].s = walloc(char, slen);
399 memcpy(substitutions[n].s, (const char *)memory+starts[bracket_num], slen);
400
401 }
402
403 n++;
404 pos=start+mlen;
405 }
406 }
407
408 // dubious dealings with the inside of the string
409
410 from = (const char *)memory;
411
412 if (change > 0)
413 {
414 // Spurious braces make temporary ref to chunk go away
415 {new_memory = chunk_allocate(size+change+1);}
416 to = new_memory;
417 }
418 else
419 {
420 cp_make_updatable(memory, size);
421 to = memory;
422 }
423
424 int i, at=0;
425 char *p=to;
426
427 for(i=0; i<n; i++)
428 {
429 int start = substitutions[i].start;
430 int end = substitutions[i].end;
431 memcpy(p, from+at, start-at);
432 p += start-at;
433 if (s)
434 {
435 memcpy(p, s, length);
436 p += length;
437 }
438 else
439 {
440 memcpy(p, substitutions[i].s, substitutions[i].slen);
441 wfree(substitutions[i].s);
442 substitutions[i].s=NULL;
443 p += substitutions[i].slen;
444 }
445 at=end;
446 }
447 memcpy(p, from+at, size-at);
448
449 p += size-at;
450 *p = '\0';
451
452 if (change > 0)
453 memory = new_memory;
454
455 size += change;
456 }
457
458#if __GSUB_REENTRANT__
459 if (substitutions)
460 wfree(substitutions);
461#endif
462
463 return n;
464
465}
466
468 int (&starts)[EST_Regex_max_subexpressions],
469 int (&ends)[EST_Regex_max_subexpressions])
470{
471 int n=0, change=0;
473
474 const char *from;
475 char *to;
476
477#if __GSUB_REENTRANT__
478 struct subst *substitutions=NULL;
479
480 int num_substitutions=0;
481#endif
482
483 // printf("match '%s'\n", (const char *)(*this));
484
485 int i;
486 if (size > 0)
487 {
488 int escaped=0;
489
490 for(i=0; i<size; i++)
491 {
492 if (escaped)
493 {
494 if (memory[i] >= '0' &&memory[i] <= '9')
495 {
496 int snum = memory[i] - '0';
497 if (ends[snum] >= 0 && starts[snum] >=0)
498 {
499 if (num_substitutions <= n)
500 substitutions = wrealloc(substitutions, struct subst, (num_substitutions +=10));
501
502 substitutions[n].start = i-1;
503 substitutions[n].end = i+1;
504 substitutions[n].s = ((char *)(void *)(const char *)source.memory) + starts[snum];
505 substitutions[n].slen = ends[snum] - starts[snum];
506 change += substitutions[n].slen - 2;
507
508 n++;
509 }
510 }
511 escaped=0;
512 }
513 else if (memory[i] == '\\')
514 escaped=1;
515 }
516
517
518 // dubious dealings with the inside of the string
519
520 from = (const char *)memory;
521
522 if (change > 0)
523 {
524 // Spurious braces make temporary ref to chunk go away
525 {new_memory = chunk_allocate(size+change+1);}
526 to = new_memory;
527 }
528 else
529 {
530 cp_make_updatable(memory, size);
531 to = memory;
532 }
533
534 int at=0;
535 char *p=to;
536
537 for(i=0; i<n; i++)
538 {
539 int start = substitutions[i].start;
540 int end = substitutions[i].end;
541 memcpy(p, from+at, start-at);
542 p += start-at;
543
544 memcpy(p, substitutions[i].s, substitutions[i].slen);
545 substitutions[i].s=NULL;
546 p += substitutions[i].slen;
547 at=end;
548 }
549 memcpy(p, from+at, size-at);
550
551 p += size-at;
552 *p = '\0';
553
554 if (change > 0)
555 memory = new_memory;
556
557 size += change;
558 }
559
560#if __GSUB_REENTRANT__
561 if (substitutions)
562 wfree(substitutions);
563#endif
564
565 return n;
566}
567
568// Pass in the two possible separators as pointers so we don't have to
569// duplicate all the code. Inline definitions of the friend functions
570// takes care of the pretty interface.
571
572int EST_String::split_internal(EST_String result[], int max,
573 const char *s_seperator, int slen,
575 char quote) const
576{
577 int n=0;
578 int pos=0;
579 int start, end;
580 int lastspace=0;
581
582 if (size>0)
583 {
584 while (pos < length())
585 {
586 start= -1;
587 end= -1;
588 if ((*this)(pos) == quote)
589 {
590 start=pos;
591 pos++;
592 while (pos < length())
593 {
594 if ((*this)(pos) == quote)
595 {
596 pos++;
597 if ((*this)(pos) != quote)
598 break;
599 else
600 pos++;
601 }
602 else
603 pos++;
604 }
605 end=pos;
606 }
607 else
608 {
609 int mstart, mend, matched;
610 if (s_seperator)
611 matched = locate(s_seperator, slen, pos, mstart, mend);
612 else
613 matched = locate(*re_seperator, pos, mstart, mend);
614
615 if (matched)
616 if (mstart != pos)
617 {
618 start=pos;
619 end=mstart;
620 pos=mend;
622 }
623 else if (pos ==lastspace)
624 {
625 start=pos;
626 end=pos;
627 pos=mend;
629 }
630 else
631 {
632 pos=mend;
634 }
635 else
636 {
637 start=pos;
638 end=length();
639 pos=end;
640 }
641 }
642 if (start>=0)
643 result[n++] = EST_String(*this, start, end-start);
644 if (n==max)
645 break;
646 }
647 }
648
649 return n;
650}
651
652int EST_String::matches(const char *s, int pos) const
653{
654 CHECK_STRING_ARG(s);
655
656 int start, end;
657
658 if (!s)
659 return 0;
660
661 int len=safe_strlen(s);
662
663 if (extract(s, len, pos, start, end))
664 return start==pos && end==len;
665 else
666 return 0;
667}
668
669int EST_String::matches(const EST_String &s, int pos) const
670{
671 int start, end;
672
673 if (extract(s.str(), s.size, pos, start, end))
674 return start==pos && end==s.size;
675 else
676 return 0;
677}
678
679int EST_String::matches(EST_Regex &e, int pos, int *starts, int *ends) const
680{
681 if (size==0)
682 return e.run_match("", pos, starts, ends) >0;
683 else
684 return e.run_match(str(), pos, starts, ends) >0;
685}
686
687
688EST_String operator + (const EST_String &a, const char *b)
689{
690 CHECK_STRING_ARG(b);
691
692 int al = a.size;
693 int bl = safe_strlen(b);
694
695 if (al == 0)
696 return EST_String(b, 0, bl);
697 if (bl == 0)
698 return EST_String(a);
699
700 EST_ChunkPtr c = chunk_allocate(al+bl+1, a.str(), al);
701
702 if (bl>0)
703 memmove((char *)c + al, b, bl);
704 c(al+bl)='\0';
705
706 return EST_String(al+bl, c);
707}
708
710{
711 int al = a.size;
712 int bl = b.size;
713
714 if (al == 0)
715 return EST_String(b);
716 if (bl == 0)
717 return EST_String(a);
718
719 EST_ChunkPtr c = chunk_allocate(al+bl+1, a.str(), al);
720
721 memmove((char *)c+al,b.str(),bl);
722 c(al+bl)='\0';
723
724 return EST_String(al+bl, c);
725}
726
727EST_String operator + (const char *a, const EST_String &b)
728{
729 CHECK_STRING_ARG(a);
730
731 int al = safe_strlen(a);
732 int bl = b.size;
733
734 if (bl == 0)
735 return EST_String(a, 0, al);
736 if (al == 0)
737 return EST_String(b);
738
739 EST_ChunkPtr c = chunk_allocate(al+bl+1, a, al);
740
741 memmove((char *)c + al, b.str(), bl);
742
743 c(al+bl)='\0';
744
745 return EST_String(al+bl, c);
746}
747
749{
750
751 if (n<1)
752 return "";
753
754 int l = s.length();
755 int sz = n * l;
756
757 EST_String it(NULL, 0, sz);
758
759 for(int j=0; j<n; j++)
760 strncpy(((char *)it)+j*l, (const char *)s, l);
761
762 return it;
763}
764
766
767{
768 CHECK_STRING_ARG(b);
769
770 int bl = safe_strlen(b);
771
772 if (size == 0)
773 {
774 memory = chunk_allocate(bl+1, b, bl);
775 size = bl;
776 return *this;
777 }
778
779 grow_chunk(memory, size, size+bl+1);
780
781 memmove((char *)memory + size,b,bl);
782 memory(size+bl)='\0';
783 size += bl;
784
785 return *this;
786}
787
789
790{
791 int bl = b.size;
792
793 if (size == 0)
794 {
795 memory = NON_CONST_CHUNKPTR(b.memory);
796 size = b.size;
797 return *this;
798 }
799
800 grow_chunk(memory, size, size+bl+1);
801
802 if (bl >0)
803 memmove((char *)memory + size,b.str(),bl);
804
805 memory(size+bl)='\0';
806 size += bl;
807
808 return *this;
809}
810
812{
813 CHECK_STRING_ARG(s);
814
815 size=safe_strlen(s);
816
817 if (size != 0)
818 memory = chunk_allocate(size+1, s, size);
819 else
820 memory=NULL;
821 }
822
823
824EST_String::EST_String(const char *s, int start_or_fill, int len)
825{
826
827 if (s)
828 {
829 int start= start_or_fill;
830 if (len <0)
831 len=safe_strlen(s)-start;
832
833 size=len;
834 if (size != 0)
835 memory = chunk_allocate(len+1, s+start, len);
836 else
837 memory=NULL;
838 }
839 else
840 {
841 char fill = start_or_fill;
842 if (len<0) len=0;
843 size=len;
844 if (size != 0)
845 {
846 memory = chunk_allocate(len+1);
847 char *p = memory;
848 for(int j=0; j<len;j++)
849 p[j] = fill;
850 p[len]='\0';
851 }
852 else
853 memory=NULL;
854 }
855}
856
857EST_String::EST_String(const char *s, int s_size, int start, int len)
858{
859 CHECK_STRING_ARG(s);
860
861 if (len <0)
862 len=s_size-start;
863
864 size=len;
865 if (size != 0)
866 memory = chunk_allocate(len+1, s+start, len);
867 else
868 memory=NULL;
869}
870
871EST_String::EST_String(const EST_String &s, int start, int len)
872{
873 if (len <0)
874 len=s.size-start;
875
876 size=len;
877
878 if (start == 0 && len == s.size)
879 memory = NON_CONST_CHUNKPTR(s.memory);
880 else if (size != 0)
881 memory = chunk_allocate(len+1, s.memory, start, len);
882 else
883 memory = NULL;
884}
885
886/*
887EST_String::EST_String(const EST_String &s)
888{
889#if 1
890 static EST_ChunkPtr hack = NON_CONST_CHUNKPTR(s.memory);
891 memory = NON_CONST_CHUNKPTR(s.memory);
892 size = s.size;
893#else
894 *(struct EST_dumb_string *)this = *(struct EST_dumb_string *)(&s);
895#endif
896}
897*/
898
899#if __FSF_COMPATIBILITY__
900EST_String::EST_String(const char c)
901{
902 size=1;
903 memory= chunk_allocate(2, &c, 1);
904}
905#endif
906
908{
909 CHECK_STRING_ARG(str);
910 int len = safe_strlen(str);
911 if (!len)
912 memory = NULL;
913 else if (!shareing() && len < size)
914 memcpy((char *)memory, str, len+1);
915 else if (len)
916 memory = chunk_allocate(len+1, str, len);
917 size=len;
918 return *this;
919}
920
921#if 0
923{
924 memory = chunk_allocate(2, &c, 1);
925 size=1;
926 return *this;
927}
928
930{
931 const char *str = (const char *)s;
932 CHECK_STRING_ARG(str);
933 int len = safe_strlen(str);
934 if (!len)
935 memory = NULL;
936 else if (!shareing() && len < size)
937 memcpy((char *)memory, str, len+1);
938 else if (len)
939 memory = chunk_allocate(len+1, str, len);
940 size=len;
941 return *this;
942 //
943 //#if 1
944 ///* static EST_ChunkPtr hack = s.memory; */
945 // memory = NON_CONST_CHUNKPTR(s.memory);
946 // size = s.size;
947 //#else
948 // *(struct EST_dumb_string *)this = *(struct EST_dumb_string *)(&s);
949 //#endif
950 // return *this;
951}
952#endif
953
954EST_String downcase(const EST_String &s)
955{
956 EST_String t = EST_String(s.size, chunk_allocate(s.size+1, s.str(), s.size));
957 int i;
958
959 for (i=0; i < s.length(); i++)
960 if (isupper(s(i)))
961 t[i] = tolower(s(i));
962 else
963 t[i] = s(i);
964 return t;
965}
966
967EST_String upcase(const EST_String &s)
968{
969 EST_String t = EST_String(s.size, chunk_allocate(s.size+1, s.str(), s.size));
970 int i;
971
972 for (i=0; i < s.length(); i++)
973 if (islower(s(i)))
974 t[i] = toupper(s(i));
975 else
976 t[i] = s(i);
977 return t;
978}
979
980
981int
983{
984 int pos=0;
985 int n=0;
986 int start, end;
987
988 while (locate(s, pos, start, end))
989 {
990 n++;
991 pos=end;
992 }
993 return n;
994}
995
996int
997EST_String::freq(const char *s) const
998{
999 CHECK_STRING_ARG(s);
1000
1001 int pos=0;
1002 int n=0;
1003 int start, end;
1004 int len=safe_strlen(s);
1005
1006 while (locate(s, len, pos, start, end))
1007 {
1008 n++;
1009 pos=end;
1010 }
1011 return n;
1012}
1013
1014int
1016{
1017 int pos=0;
1018 int n=0;
1019 int start, end=0;
1020
1021 while (locate(ex, pos, start, end))
1022 {
1023 n++;
1024 pos=end;
1025 }
1026 return n;
1027}
1028
1030{
1031
1032 const char quotequote[3] = {quotec, quotec, '\0'};
1033
1034 EST_String result(*this);
1035
1036 result.gsub(quotequote+1, quotequote+0);
1037
1038 return EST_String::cat(quotequote+1, result, quotequote+1);
1039}
1040
1042{
1043
1044 const char quotequote[3] = {quotec, quotec, '\0'};
1045
1046 EST_String result(*this);
1047
1048 // cout << "before unqote '" << result << "'\n";
1049
1050 result.gsub(quotequote+0, quotequote+1);
1051
1052 // cout << "after unqote '" << result << "'\n";
1053
1054 if (result[0] == quotec && result[result.length()-1] == quotec )
1055 {
1056#if 1
1057 /* Spurious local variable to get arounf SunCC 4.0 being broken */
1058 EST_String res= result.at(1, result.length()-2);
1059 return res;
1060#else
1061 return result.at(1, result.length()-2);
1062#endif
1063 }
1064 else
1065 return result;
1066}
1067
1069{
1070
1071 if (contains(RXwhite) || contains(quotec))
1072 return quote(quotec);
1073
1074 return *this;
1075}
1076
1077
1079{
1080
1081 if ((*this)(0) == quotec && (*this)(length()-1) == quotec )
1082 return unquote(quotec);
1083
1084 return *this;
1085}
1086
1088
1089{
1090 if (str.size > 0)
1091 return (s << str.str());
1092 else
1093 return (s << "");
1094}
1095
1097 const EST_String s2,
1098 const EST_String s3,
1099 const EST_String s4,
1100 const EST_String s5,
1101 const EST_String s6,
1102 const EST_String s7,
1103 const EST_String s8,
1104 const EST_String s9
1105 )
1106{
1107 int len=(s1.length()+s2.length()+s3.length()+s4.length()+s5.length() +
1108 s6.length()+s7.length()+s8.length()+s9.length());
1109
1110 EST_String result;
1111
1112 result.size=len;
1113 result.memory= chunk_allocate(len+1, (const char *)s1, s1.length());
1114
1115 int p = s1.length();
1116 if (s2.length())
1117 { strncpy((char *)result.memory + p, (const char *)s2, s2.length()); p += s2.length(); }
1118 if (s3.length())
1119 { strncpy((char *)result.memory + p, (const char *)s3, s3.length()); p += s3.length(); }
1120 if (s4.length())
1121 { strncpy((char *)result.memory + p, (const char *)s4, s4.length()); p += s4.length(); }
1122 if (s5.length())
1123 { strncpy((char *)result.memory + p, (const char *)s5, s5.length()); p += s5.length(); }
1124 if (s6.length())
1125 { strncpy((char *)result.memory + p, (const char *)s6, s6.length()); p += s6.length(); }
1126 if (s7.length())
1127 { strncpy((char *)result.memory + p, (const char *)s7, s7.length()); p += s7.length(); }
1128 if (s8.length())
1129 { strncpy((char *)result.memory + p, (const char *)s8, s8.length()); p += s8.length(); }
1130 if (s9.length())
1131 { strncpy((char *)result.memory + p, (const char *)s9, s9.length()); p += s9.length(); }
1132
1133 result.memory(p) = '\0';
1134
1135 return result;
1136}
1137
1138int compare(const EST_String &a, const EST_String &b)
1139{
1140 if (a.size == 0 && b.size == 0)
1141 return 0;
1142 else if (a.size == 0)
1143 return -1;
1144 else if (b.size == 0)
1145 return 1;
1146 else
1147 return strcmp(a.str(), b.str());
1148}
1149
1150int compare(const EST_String &a, const char *b)
1151{
1152 if (a.size == 0 && (b==NULL || *b == '\0'))
1153 return 0;
1154 else if (a.size == 0)
1155 return -1;
1156 else if (b == NULL || *b == '\0')
1157 return 1;
1158 else
1159 return strcmp(a.str(), b);
1160}
1161
1162int fcompare(const EST_String &a, const EST_String &b,
1163 const unsigned char *table)
1164{
1165 if (a.size == 0 && b.size == 0)
1166 return 0;
1167 else if (a.size == 0)
1168 return -1;
1169 else if (b.size == 0)
1170 return 1;
1171 else
1172 return EST_strcasecmp(a.str(), b.str(), table);
1173}
1174
1175int fcompare(const EST_String &a, const char *b,
1176 const unsigned char *table)
1177{
1178 int bsize = (b ? strlen((const char *)b) : 0);
1179 if (a.size == 0 && bsize == 0)
1180 return 0;
1181 else if (a.size == 0)
1182 return -1;
1183 else if (bsize == 0)
1184 return 1;
1185 else
1186 return EST_strcasecmp(a.str(), (const char *)b, table);
1187}
1188
1189int operator == (const char *a, const EST_String &b)
1190{
1191 CHECK_STRING_ARG(a);
1192
1193 if (!a)
1194 return 0;
1195 else if (b.size==0)
1196 return *a == '\0';
1197 else
1198 return (*a == b(0)) && strcmp(a, b.str())==0;
1199}
1200
1201int operator == (const EST_String &a, const EST_String &b)
1202{
1203 if (a.size==0)
1204 return b.size == 0;
1205 else if (b.size == 0)
1206 return 0;
1207 else
1208 return a.size == b.size && a(0) == b(0) && memcmp(a.str(),b.str(),a.size)==0;
1209};
1210
1212{
1213 char buf[64];
1214 const char *format;
1215
1216 switch (b)
1217 {
1218 case 8:
1219 format="0%o";
1220 break;
1221 case 10:
1222 format="%d";
1223 break;
1224 case 16:
1225 format="0x%x";
1226 break;
1227 default:
1228 format="??%d??";
1229 break;
1230 }
1231 sprintf(buf, format, i);
1232
1233 return EST_String(buf);
1234}
1235
1237{
1238 char buf[64];
1239 const char *format;
1240
1241 switch (b)
1242 {
1243 case 8:
1244 format="0%lo";
1245 break;
1246 case 10:
1247 format="%ld";
1248 break;
1249 case 16:
1250 format="0x%lx";
1251 break;
1252 default:
1253 format="??%ld??";
1254 break;
1255 }
1256 sprintf(buf, format, i);
1257
1258 return EST_String(buf);
1259}
1260
1262{
1263 char buf[64];
1264
1265 sprintf(buf, "%f", f);
1266
1267 return EST_String(buf);
1268}
1269
1271{
1272 char buf[64];
1273
1274 sprintf(buf, "%f", d);
1275
1276 return EST_String(buf);
1277}
1278
1279long EST_String::Long(bool *valid) const
1280{
1281 char *end;
1282
1283 long val = strtol(str(), &end, 10);
1284
1285 if (end==NULL|| *end != '\0')
1286 {
1287 if (valid != NULL)
1288 {
1289 *valid=0;
1290 return 0L;
1291 }
1292 else
1293 {
1294 printf("bad integer number format '%s'\n",
1295 (const char *)str());
1296 exit(0);
1297 }
1298 }
1299
1300 if (valid)
1301 *valid=1;
1302
1303 return val;
1304}
1305
1306int EST_String::Int(bool *valid) const
1307{
1308 long val = Long(valid);
1309
1310 if (valid && !*valid)
1311 return 0L;
1312
1313 if (val > INT_MAX || val < INT_MIN)
1314 {
1315 if (valid != NULL)
1316 {
1317 *valid=0;
1318 return 0L;
1319 }
1320 else
1321 {
1322 printf("number out of range for integer %ld",
1323 val);
1324 exit(0);
1325 }
1326 }
1327
1328 return val;
1329}
1330
1331double EST_String::Double(bool *valid) const
1332{
1333 char *end;
1334
1335 double val = strtod(str(), &end);
1336
1337 if (end==NULL|| *end != '\0')
1338 {
1339 if (valid != NULL)
1340 {
1341 *valid=0;
1342 return 0.0;
1343 }
1344 else
1345 {
1346 printf("bad decimal number format '%s'",
1347 (const char *)str());
1348 exit(0);
1349 }
1350 }
1351
1352 if (valid)
1353 *valid=1;
1354
1355 return val;
1356}
1357
1358float EST_String::Float(bool *valid) const
1359{
1360 double val = Double(valid);
1361
1362 if (valid && !*valid)
1363 return 0.0;
1364
1365 if (val > FLT_MAX || val < -FLT_MAX)
1366 {
1367 if (valid != NULL)
1368 {
1369 *valid=0;
1370 return 0.0;
1371 }
1372 else
1373 {
1374 printf("number out of range for float %f",
1375 val);
1376 exit(0);
1377 }
1378 }
1379
1380 return val;
1381}
1382
1383
1384
static EST_String Number(int i, int base=10)
Build string from an integer.
int gsub(const char *os, const EST_String &s)
Substitute one string for another.
Definition EST_String.h:401
int subst(EST_String source, int(&starts)[EST_Regex_max_subexpressions], int(&ends)[EST_Regex_max_subexpressions])
Substitute the result of a match into a string.
EST_String unquote_if_needed(const char quotec) const
Remove quotes if any.
static const EST_String Empty
Constant empty string.
Definition EST_String.h:111
EST_String unquote(const char quotec) const
Remove quotes and unprotect internal quotes.
int freq(const char *s) const
Number of occurrences of substring.
EST_String(void)
Construct an empty string.
Definition EST_String.h:201
int EST_string_size
Type of string size field.
Definition EST_String.h:114
EST_String quote(const char quotec) const
Return the string in quotes with internal quotes protected.
static EST_String cat(const EST_String s1, const EST_String s2=Empty, const EST_String s3=Empty, const EST_String s4=Empty, const EST_String s5=Empty, const EST_String s6=Empty, const EST_String s7=Empty, const EST_String s8=Empty, const EST_String s9=Empty)
int search(const char *s, int len, int &mlen, int pos=0) const
Find a substring.
Definition EST_String.h:334
int length(void) const
Length of string ({not} length of underlying chunk)
Definition EST_String.h:241
EST_String & operator+=(const char *b)
Add C string to end of EST_String.
int contains(const char *s, int pos=-1) const
Does it contain this substring?
Definition EST_String.h:375
EST_String quote_if_needed(const char quotec) const
Return in quotes if there is something to protect (e.g. spaces)
static const char * version
Global version string.
Definition EST_String.h:108
int matches(const char *e, int pos=0) const
Exactly match this string?
const char * str(void) const
Get a const-pointer to the actual memory.
Definition EST_String.h:245
EST_String at(int from, int len=0) const
Return part at position.
Definition EST_String.h:302
EST_String & operator=(const char *str)
Assign C string to EST_String.