comparison regen/src/parse.c @ 11:1e0cd7fade8b

add regen
author nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date Sun, 19 Jun 2011 16:36:05 +0900
parents
children
comparison
equal deleted inserted replaced
10:41634f26cd6f 11:1e0cd7fade8b
1 #include "re.h"
2 #include "util.h"
3 #include "parse.h"
4 #include "codegen.h"
5
6 /*
7 Grammar:
8
9 EOP: eall: { e0 } EOP
10 Union: e0: e1 { '|' e1 }*
11 Concatination: e1: e2 { e2 }*
12 Repetition: e2: e3 { '+' | '*' | '?' }
13 Literal: e3: Literal | '.' | '$' | '[]' | '^' | '$' | Backref | '(' e0 ')'
14 */
15
16 static EXPR_TYPE Toktype;
17 static UCHARP Toklit, Ptr, End;
18 static REGEX *R;
19 EXPR *test;
20
21 void
22 lex()
23 {
24 if (Ptr == End) {
25 Toktype = EOP;
26 Toklit = 0;
27 } else switch (Toklit = Ptr++, *Toklit) {
28 case '.': Toktype = Dot; break;
29 case '[': Toktype = Charclass; break;
30 case '|': Toktype = Union; break;
31 case '?': Toktype = Qmark; break;
32 case '+': Toktype = Plus; break;
33 case '*': Toktype = Star; break;
34 case '(': Toktype = Lpar; break;
35 case ')': Toktype = Rpar; break;
36 case '^': Toktype = BegLine; break;
37 case '$': Toktype = EndLine; break;
38 case '\\': Toktype = Literal;
39 if (Ptr == End) exitmsg("bad \\");
40 Toklit = Ptr++;
41 break;
42 default:
43 Toktype = Literal;
44 }
45 }
46
47 void
48 parse(REGEX *r, CODEGEN *g)
49 {
50 EXPR *e, *dotstar;
51 R = r;
52 Ptr = (UCHARP)r->regex;
53 End = (UCHARP)r->regex_end;
54 lex();
55
56 e = e0(r);
57 if (Toktype != EOP) exitmsg("expected end of pattern.");
58
59 r->min_length = e->min_length;
60 r->max_length = e->max_length;
61
62 fill_words(e);
63
64 r->must = e->must;
65
66 int i;
67 for (i = 0; i < r->must.in->size; i++) {
68 if (strlen(r->must.in->words[i]) > r->must_max_length) {
69 r->must_max_length = strlen(r->must.in->words[i]);
70 r->must_max_word = r->must.in->words[i];
71 }
72 }
73 for (i = 0; i < NCHAR; i++) {
74 if (r->involve[i]) r->ninvolve++;
75 }
76
77 if (r->anchored == FALSE) {
78 dotstar = newexpr(r, Dot, '*', (EXPR *)0, (EXPR *)0);
79 dotstar = newexpr(r, Star, '\0', dotstar, (EXPR *)0);
80 e = newexpr(r, Concat, '\0', dotstar, e);
81 }
82
83 e = newexpr(r, EOP, '#', e, (EXPR *)0);
84
85 fill_follow(e);
86
87 r->root = e;
88
89 if (r->debug) {
90 printf("regex: %s\n\n", r->regex);
91 printf("min-length: %d\n", r->min_length);
92 printf("max-length: %d\n", r->max_length);
93 printf("ninvolve: %d\n\n", r->ninvolve);
94 for (i = 0; i < NCHAR; i++) {
95 if (r->involve[i]) {
96 printf("\"%s\", ", ASCII[i]);
97 }
98 }
99 puts("\n");
100 if (r->must.is != NULL) {
101 printf("is: %s\n", r->must.is);
102 }
103 if (r->must.in != NULL) {
104 for (i = 0; i < r->must.in->size; i++) {
105 printf("in: %s\n", r->must.in->words[i]);
106 }
107 }
108 if (r->must.left != NULL) {
109 printf("left: %s\n", r->must.left);
110 }
111 if (r->must.right != NULL) {
112 printf("right: %s\n", r->must.right);
113 }
114 for (i = 0; i < r->maxid; i++) {
115 print_pos(r->lit_expr[i]);
116 }
117 exit(0);
118 }
119 }
120
121 EXPR *
122 e0(REGEX *r)
123 {
124 EXPR *e, *f;
125
126 e = e1(r);
127 while (Toktype == Union) {
128 lex();
129 f = e1(r);
130 e = newexpr(r, Union, '\0', e, f);
131 }
132
133 return e;
134 }
135
136 EXPR *
137 e1(REGEX *r)
138 {
139 EXPR *e, *f;
140
141 e = e2(r);
142
143 while (Toktype == Literal || Toktype == Charclass
144 || Toktype == Dot || Toktype == Lpar
145 || Toktype == EndLine || Toktype == BegLine) {
146 f = e2(r);
147 e = newexpr(r, Concat, '\0', e, f);
148 }
149 return(e);
150 }
151
152 EXPR *
153 e2(REGEX *r)
154 {
155 EXPR *e;
156 e = e3(r);
157 for ( ; PlusStarQmark(Toktype); lex()) {
158 e = newexpr(r, Toktype, '\0', e, (EXPR *)0);
159 }
160 return(e);
161 }
162
163 EXPR *
164 e3(REGEX *r)
165 {
166 EXPR *e;
167 UCHAR *tbl;
168 int count;
169
170 switch(Toktype) {
171 case Literal:
172 e = newexpr(r, Literal, *Toklit, (EXPR *)0, (EXPR *)0);
173 break;
174 case BegLine:
175 e = newexpr(r, BegLine, '^', (EXPR *)0, (EXPR *)0);
176 break;
177 case EndLine:
178 e = newexpr(r, EndLine, '$', (EXPR *)0, (EXPR *)0);
179 break;
180 case Dot:
181 e = newexpr(r, Dot, '.', (EXPR *)0, (EXPR *)0);
182 break;
183 case Charclass:
184 tbl = (UCHARP)xmalloc(NCHAR, "charclass tbl");
185 count = ccl(tbl);
186 if (count == 1) {
187 e = newexpr(r, Literal, *tbl, (EXPR *)0, (EXPR *)0);
188 xfree(tbl);
189 } else {
190 e = newexpr(r, Charclass, '[', (EXPR *)count, (EXPR *)tbl);
191 }
192 break;
193 case Lpar:
194 lex(r);
195 e = e0(r);
196 if (Toktype != Rpar) exitmsg("expected a ')'");
197 break;
198 default:
199 exitmsg("expected a lit or '('");
200 break;
201 }
202 lex();
203 return e;
204 }
205
206 int
207 ccl(UCHAR *tbl) {
208 int i;
209 UCHAR lastc = 0;;
210 int range = 0;
211 int count = 0;
212 UCHAR comp;
213
214 tbl[NCHAR] = 0; // count of characters in chaclass.
215 memset((char *)tbl, 0, NCHAR);
216
217 if (*Ptr == '^') {
218 Ptr++;
219 for (i = 32; i < 126; i ++) {
220 tbl[i] = 1;
221 }
222 comp = 0;
223 } else {
224 comp = 1;
225 }
226
227 if (*Ptr == ']' || *Ptr == '-') {
228 tbl[*Ptr] = comp;
229 lastc = *Ptr++;
230 }
231
232 for (; (Ptr < End) && (*Ptr != ']'); Ptr++) {
233 if (!range && *Ptr == '-') {
234 range = 1;
235 continue;
236 }
237 if (*Ptr == '\\') {
238 if (++Ptr >= End) {
239 exitmsg(" [ ] imbalance");
240 }
241 }
242
243 tbl[*Ptr] = comp;
244
245 if (range) {
246 for (i = (*Ptr) - 1; i > lastc; i--) {
247 tbl[i] = comp;
248 }
249 range = 0;
250 }
251 lastc = *Ptr;
252 }
253 if (Ptr == End) exitmsg(" [ ] imbalance");
254
255 if (range) {
256 tbl['-'] = comp;
257 range = 0;
258 }
259
260 Ptr++;
261
262 for (i = 0; i < NCHAR; i++) {
263 if (tbl[i]) {
264 lastc = i;
265 count++;
266 }
267 }
268 if (count == 1) *tbl = (char)lastc;
269
270 /* Return the character count. */
271 return count;
272 }
273
274 EXPR *
275 newexpr(REGEX *r, EXPR_TYPE t, UCHAR lit, EXPR* left, EXPR* right)
276 {
277 int i;
278 EXPR *e = (EXPR *)xmalloc(sizeof(EXPR), "newexpr");
279 e->type = t;
280 e->lit = lit;
281 e->l = left;
282 e->r = right;
283 e->first_pos = newpos();
284 e->last_pos = newpos();
285 e->follow_pos = newpos();
286 e->before_pos = newpos();
287 e->parent = 0;
288
289 switch (t) {
290 case EOP:
291 e->id = 0;
292 r->lit_expr[e->id] = e;
293 break;
294 case Dot: case Charclass: case Literal: case BegLine: case EndLine:
295 e->id = r->maxid++;
296 r->lit_expr[e->id] = e;
297 break;
298 default:
299 e->id = -1;
300 break;
301 }
302
303 switch (t) {
304 case Dot:
305 if (e->lit != '*') {
306 for (i = 0; i < NCHAR; i++) {
307 r->involve[i] = 1;
308 }
309 }
310 break;
311 case Charclass:
312 for (i = 0; i < NCHAR; i++) {
313 if (e->tbl[i]) {
314 r->involve[i] = 1;
315 }
316 }
317 break;
318 case BegLine: case EndLine:
319 r->involve[NL] = 1;
320 break;
321 case Literal:
322 r->involve[e->lit] = 1;
323 break;
324 default:
325 break;
326 }
327
328 switch (t) {
329 case Union: case Concat:
330 left->parent = e;
331 right->parent = e;
332 break;
333 case EOP: case Plus: case Star: case Qmark:
334 left->parent = e;
335 break;
336 default:
337 break;
338 }
339
340 rept_compress(e, e->l); /* optimize */
341 fill_expr(e);
342 return e;
343 }
344
345 void
346 fill_expr(e)
347 EXPR *e;
348 {
349 short l, r;
350
351 switch(e->type) {
352 case EOP:
353 e->max_length = e->l->max_length;
354 e->min_length = e->l->min_length;
355 break;
356 case Plus:
357 e->max_length = -1;
358 e->min_length = e->l->min_length;
359 break;
360 case Star:
361 e->max_length = -1;
362 e->min_length = 0;
363 break;
364 case Qmark:
365 e->max_length = e->l->max_length;
366 e->min_length = 0;
367 break;
368 case Union:
369 l = e->l->max_length; r = e->r->max_length;
370 e->max_length = (INFOR(l, r) ? -1 : MAX(l, r));
371 l = e->l->min_length; r = e->r->min_length;
372 e->min_length = MIN(e->l->min_length, e->r->min_length);
373 break;
374 case Concat:
375 l = e->l->max_length; r = e->r->max_length;
376 e->max_length = (INFOR(l, r) ? -1 : l + r);
377 l = e->l->min_length; r = e->r->min_length;
378 e->min_length = l + r;
379 break;
380 default:
381 e->min_length = e->max_length = 1;
382 }
383
384 switch (e->type) {
385 case Star: case Qmark: case Plus:
386 copypos(e->l->first_pos, e->first_pos);
387 copypos(e->l->last_pos, e->last_pos);
388 break;
389 case Union:
390 copypos(e->r->first_pos, e->first_pos);
391 copypos(e->l->first_pos, e->first_pos);
392 copypos(e->r->last_pos, e->last_pos);
393 copypos(e->l->last_pos, e->last_pos);
394 break;
395 case Concat:
396 if (e->l->min_length == 0) {
397 copypos(e->l->first_pos, e->first_pos);
398 copypos(e->r->first_pos, e->first_pos);
399 } else {
400 copypos(e->l->first_pos, e->first_pos);
401 }
402 if (e->r->min_length == 0) {
403 copypos(e->l->last_pos, e->last_pos);
404 copypos(e->r->last_pos, e->last_pos);
405 } else {
406 copypos(e->r->last_pos, e->last_pos);
407 }
408 break;
409 case BegLine: case EndLine: case Literal: case Dot: case Charclass: case EOP:
410 addpos(e->first_pos, e->id);
411 addpos(e->last_pos, e->id);
412 default:
413 break;
414 }
415 }
416
417 /* singleton */
418 static const WORD_SET NOWORDS = {NULL, 0};
419 static const MUST NOMUST = {
420 (WORD_SET *)&NOWORDS, NULL, NULL, NULL
421 };
422
423 void fill_words(EXPR *e) {
424 int l1, l2, i, j;
425 char *str;
426
427 switch (e->type) {
428 case EOP:
429 fill_words(e->l);
430 e->must = e->l->must;
431 break;
432 case Plus:
433 fill_words(e->l);
434 e->must = e->l->must;
435 e->must.is = NULL;
436 break;
437 case Union:
438 fill_words(e->l);
439 fill_words(e->r);
440 /* fill must.is */
441 if (e->l->must.is && e->r->must.is
442 && !strcmp(e->l->must.is, e->r->must.is)) {
443 e->must.is = e->must.left = e->must.right = e->l->must.is;
444 e->must.in = word_set_new(1);
445 e->must.in->words[0] = e->must.is;
446 return;
447 } else {
448 e->must.is = NULL;
449 }
450
451 /* fill must.left */
452 if (e->l->must.left && e->r->must.left) {
453 l1 = strlen(e->l->must.left);
454 l2 = strlen(e->r->must.left);
455 for (i = 0; i < l1 && i < l2; i++) {
456 if (e->l->must.left[i] != e->r->must.left[i]) {
457 break;
458 }
459 }
460 if (i) {
461 str = (char *)xmalloc(sizeof(char) * (i+1), "must: union-left");
462 for (j = 0; j < i; j++) str[j] = e->l->must.left[j];
463 str[j] = '\0';
464 e->must.left = str;
465 } else {
466 /* no common words */
467 e->must.left = NULL;
468 }
469 } else {
470 e->must.left = NULL;
471 }
472
473 /* fill must.right */
474 if (e->l->must.right && e->r->must.right) {
475 l1 = strlen(e->l->must.right);
476 l2 = strlen(e->r->must.right);
477 for (i = 0; i < l1 && i < l2; i++) {
478 if (e->l->must.right[l1-i-1] != e->r->must.right[l2-i-1]) {
479 break;
480 }
481 }
482 if (i) {
483 str = (char *)xmalloc(sizeof(char) * (i+1), "must: union-right");
484 for (j = 0; j < i; j++) str[i-j-1] = e->l->must.right[l1-j-1];
485 str[j] = '\0';
486 e->must.right = str;
487 } else {
488 /* no common words */
489 e->must.right = NULL;
490 }
491 } else {
492 e->must.right = NULL;
493 }
494
495 /* fill must.in */
496 if (e->must.left && e->must.right) {
497 l1 = strlen(e->must.left);
498 l2 = strlen(e->must.right);
499 e->must.in = word_set_new(1);
500 e->must.in->words[0] = l1 > l2 ? e->must.left : e->must.right;
501 } else if (e->must.left || e->must.right) {
502 e->must.in = word_set_new(1);
503 e->must.in->words[0] = e->must.left ? e->must.left : e->must.right;
504 } else {
505 e->must.in = word_set_new(0);
506 }
507
508 break;
509 case Concat:
510 /* fill must.is */
511 fill_words(e->l);
512 fill_words(e->r);
513 if (e->l->must.is && e->r->must.is) {
514 l1 = strlen(e->l->must.is);
515 l2 = strlen(e->r->must.is);
516 str = (char *)xmalloc(sizeof(char) * (l1+l2+1), "must: concat-is");
517 strcpy(str, e->l->must.is);
518 strcat(str, e->r->must.is);
519 e->must.is = e->must.left = e->must.right = str;
520 e->must.in = word_set_new(1);
521 e->must.in->words[0] = str;
522 break;
523 } else {
524 e->must.is = NULL;
525 }
526
527 /* fill must.left */
528 if (e->l->must.is && e->r->must.left) {
529 l1 = strlen(e->l->must.is);
530 l2 = strlen(e->r->must.left);
531 str = (char *)xmalloc(sizeof(char) * (l1+l2+1), "must: concat-is");
532 strcpy(str, e->l->must.is);
533 strcat(str, e->r->must.left);
534 e->must.left = str;
535 } else {
536 e->must.left = e->l->must.left;
537 }
538
539 /* fill must.right */
540 if (e->r->must.is && e->l->must.right) {
541 l1 = strlen(e->r->must.is);
542 l2 = strlen(e->l->must.right);
543 str = (char *)xmalloc(sizeof(char) * (l1+l2+1), "must: concat-is");
544 strcpy(str, e->l->must.right);
545 strcat(str, e->r->must.is);
546 e->must.right = str;
547 } else {
548 e->must.right = e->r->must.right;
549 }
550
551 /* fill must.in */
552 if (e->must.is) {
553 e->must.in = word_set_new(1);
554 e->must.in->words[0] = e->must.is;
555 } else if (e->l->must.is && e->r->must.left) {
556 l1 = e->r->must.in->size;
557 e->must.in = word_set_new(l1);
558 e->must.in->words[0] = e->must.left;
559 for (i = 0, j = 0; i < l1; i++) {
560 if (!strcmp(e->r->must.left, e->r->must.in->words[i]) && j == 0) {
561 j = 1;
562 continue;
563 }
564 e->must.in->words[i+1-j] = e->r->must.in->words[i];
565 }
566 } else if (e->l->must.right && e->r->must.is) {
567 l1 = e->l->must.in->size;
568 e->must.in = word_set_new(l1);
569 e->must.in->words[0] = e->must.right;
570 for (i = 0, j = 0; i < l1; i++) {
571 if (!strcmp(e->l->must.right, e->l->must.in->words[i]) && j == 0) {
572 j = 1;
573 continue;
574 }
575 e->must.in->words[i+1-j] = e->l->must.in->words[i];
576 }
577 } else if (e->l->must.right && e->r->must.left) {
578 l1 = strlen(e->l->must.right);
579 l2 = strlen(e->r->must.left);
580 str = (char *)xmalloc(sizeof(char) * (l1+l2+1), "must: concat-in");
581 strcpy(str, e->l->must.right);
582 strcat(str, e->r->must.left);
583 l1 = e->l->must.in->size;
584 l2 = e->r->must.in->size;
585 e->must.in = word_set_new(l1+l2-1);
586 e->must.in->words[0] = str;
587 for (i = 0, j = 0; i < l1; i++) {
588 if (!strcmp(e->l->must.right, e->l->must.in->words[i]) && j == 0) {
589 j = 1;
590 continue;
591 }
592 e->must.in->words[i+1-j] = e->l->must.in->words[i];
593 }
594 for (i = 0, j = 0; i < l2; i++) {
595 if (!strcmp(e->l->must.right, e->l->must.in->words[i]) && j == 0) {
596 j = 1;
597 continue;
598 }
599 e->must.in->words[i+l1-j] = e->r->must.in->words[i];
600 }
601 } else {
602 l1 = e->l->must.in->size;
603 l2 = e->r->must.in->size;
604 e->must.in = word_set_new(l1+l2);
605 for (i = 0; i < l1; i++) {
606 e->must.in->words[i] = e->l->must.in->words[i];
607 }
608 for (i = 0; i < l2; i++) {
609 e->must.in->words[i+l1] = e->r->must.in->words[i];
610 }
611 }
612 break;
613 case Literal:
614 str = (char *)xcalloc(sizeof(char), 2, "must: literal");
615 str[0] = e->lit;
616 e->must.is = e->must.left = e->must.right = str;
617 e->must.in = word_set_new(1);
618 e->must.in->words[0] = str;
619 break;
620 default:
621 e->must = NOMUST;
622 }
623 }
624
625 WORD_SET *
626 word_set_new (int size)
627 {
628 if (size == 0) return (WORD_SET *)&NOWORDS;
629
630 WORD_SET *word_set = (WORD_SET *)xmalloc(sizeof(WORD_SET), "word_set");
631 word_set->size = size;
632 if (size > 0) {
633 word_set->words = (char **)xcalloc(sizeof(char *), size, "words");
634 }
635 return word_set;
636 }
637
638 void fill_follow(EXPR *e)
639 {
640 switch (e->type) {
641 case EOP:
642 connect(e->l->last_pos, e->first_pos);
643 fill_follow(e->l);
644 break;
645 case Concat:
646 connect(e->l->last_pos, e->r->first_pos);
647 fill_follow(e->r);
648 fill_follow(e->l);
649 break;
650 case Union:
651 fill_follow(e->r);
652 fill_follow(e->l);
653 break;
654 case Star: case Plus:
655 connect(e->l->last_pos, e->l->first_pos);
656 fill_follow(e->l);
657 break;
658 case Qmark:
659 fill_follow(e->l);
660 break;
661 default:
662 break;
663 }
664 return;
665 }
666
667 POSITION_SET *
668 newpos()
669 {
670 static const int init_size = 10;
671 POSITION_SET *pos = (POSITION_SET *)xmalloc(sizeof(POSITION_SET) ,"position set");
672 pos->npos = 0;
673 pos->pos = (int *)xmalloc(sizeof(int)*init_size, "array in position set");
674 pos->size = init_size;
675 return pos;
676 }
677
678 void
679 connect(POSITION_SET *set1, POSITION_SET *set2)
680 {
681 int i, j;
682 EXPR **tbl = R->lit_expr;
683 for (i = 0; i < set1->npos; i++) {
684 for (j = 0; j < set2->npos; j++) {
685 addpos(tbl[set1->pos[i]]->follow_pos,
686 set2->pos[j]);
687 addpos(tbl[set2->pos[j]]->before_pos,
688 set1->pos[i]);
689 }
690 }
691 }
692
693 void
694 copypos(POSITION_SET *src, POSITION_SET *dst)
695 {
696 int i;
697 for (i = 0; i < src->npos; i++) {
698 addpos(dst, src->pos[i]);
699 }
700 }
701
702 void
703 addpos(POSITION_SET *set, int id)
704 {
705 int i;
706
707 if (id < 0 || R->maxid < id) {
708 exitmsg("invalid id for addpos: %d\n", id);
709 }
710
711 /* check duplication. */
712 for (i = 0; i < set->npos; i++) {
713 if (set->pos[i] == id) {
714 return;
715 }
716 }
717
718 /* check for realloc. before adding. */
719 if (set->npos >= set->size) {
720 set->size *= 2;
721 set->pos = (int *)xrealloc(set->pos, (set->size)*sizeof(int), "realloc addpos");
722 }
723
724 set->pos[set->npos] = id;
725 set->npos++;
726 }
727
728 void
729 delete_rept(EXPR *e)
730 {
731 #ifdef DEBUG
732 print_expr(R->root);
733 #endif
734 e->l->parent = e->parent;
735 if (e->parent->l == e) e->parent->l = e->l;
736 else e->parent->r = e->l;
737
738 percolate(e->parent);
739 rept_compress (e->parent, e->l);
740
741 xfree(e);
742 #ifdef DEBUG
743 print_expr(R->root);
744 #endif
745 }
746
747 void
748 rept_compress(EXPR *parent, EXPR* child)
749 {
750 switch(parent->type) {
751 case Plus: case Star: case Qmark: break;
752 default: return;
753 }
754 switch(child->type) {
755 case Plus: case Star: case Qmark: break;
756 default: return;
757 }
758
759 if (parent->type != child->type && parent->type != Star) {
760 change_rept(parent, Star);
761 }
762
763 delete_rept(child);
764 }
765
766 void
767 percolate(EXPR *e)
768 {
769 fill_expr(e);
770 if (e->parent) percolate(e->parent);
771 }
772
773 void
774 change_rept(EXPR *e, EXPR_TYPE type)
775 {
776 #ifdef DEBUG
777 print_expr(R->root);
778 #endif
779 e->type = type;
780 percolate(e);
781 #ifdef DEBUG
782 print_expr(R->root);
783 #endif
784 }
785
786 void
787 delete_expr(EXPR *e)
788 {
789 EXPR *parent = e->parent;
790 EXPR *grandparent, *sibling;
791
792 if (!parent) {
793 exitmsg("INTERNAL ERROR: delete_expr abort %d\n", e->type);
794 }
795
796 switch(parent->type) {
797 case Plus: case Star: case Qmark: case Union:
798 delete_expr(parent);
799 return;
800 case Concat:
801 break;
802 default:
803 exitmsg("INTERNAL ERROR: delete_expr abort %d\n", parent->type);
804 }
805 #ifdef DEBUG
806 print_expr(R->root);
807 #endif
808 grandparent = parent->parent;
809
810 if (e == parent->r) {
811 sibling = parent->l;
812 } else {
813 sibling = parent->r;
814 }
815
816 sibling->parent = grandparent;
817
818 if (parent == grandparent->r) {
819 grandparent->r = sibling;
820 } else {
821 grandparent->l = sibling;
822 }
823
824 percolate(grandparent);
825
826 parent->r = parent->l = 0;
827 free_expr(e);
828
829 rept_compress(grandparent, sibling);
830 #ifdef DEBUG
831 print_expr(R->root);
832 #endif
833 }
834
835 BOOL
836 match_every_line(REGEX *r)
837 {
838 if (r->root->min_length == 0) {
839 free_expr(r->root);
840 r->maxid = 1;
841 r->root = newexpr(r, Literal, NL, (EXPR *)0, (EXPR *)0);
842 r->root = newexpr(r, EOP, '#', r->root, (EXPR *)0);
843 return TRUE;
844 }
845 return FALSE;
846 }
847
848 void
849 free_expr(EXPR *e)
850 {
851 if (e == 0) return;
852 switch (e->type) {
853 case Charclass:
854 /* Don't free the table, because it may be shared,
855 * but zero the pointer to it. TODO
856 */
857 e->r = 0;
858 default:
859 xfree(e->first_pos);
860 xfree(e->last_pos);
861 xfree(e->follow_pos);
862 }
863
864 if (e->r) free_expr(e->r);
865 if (e->l) free_expr(e->l);
866
867 xfree(e);
868 }
869
870 void
871 print_expr(EXPR *e)
872 {
873 switch(e->type) {
874 case EOP:
875 print_expr(e->l); printf("#\n"); return;
876 case Plus:
877 print_expr(e->l); printf("+"); return;
878 case Star:
879 print_expr(e->l); printf("*"); return;
880 case Qmark:
881 print_expr(e->l); printf("?"); return;
882 case Concat:
883 if (PlusStarQmark(e->parent->type)) printf("(");
884 print_expr(e->l); print_expr(e->r);
885 if (PlusStarQmark(e->parent->type)) printf(")");
886 return;
887 case Union:
888 if (PlusStarQmark(e->parent->type)) printf("(");
889 if (e->parent->type == Concat) printf("(");
890 print_expr(e->l); printf("|"); print_expr(e->r);
891 if (PlusStarQmark(e->parent->type)) printf(")");
892 if (e->parent->type == Concat) printf(")");
893 return;
894 case Dot:
895 printf("."); return;
896 case Charclass:;
897 int i;
898 printf("[");
899 for (i = 0; i < NCHAR; i++) {
900 if (((UCHARP)e->r)[i]) {
901 printf("%c", (UCHAR)i);
902 }
903 }
904 printf("]"); return;
905 case BegLine:
906 printf("^");
907 return;
908 case EndLine:
909 printf("$");
910 return;
911 case Literal:
912 switch (e->lit) {
913 case '\n':
914 printf("\\n"); return;
915 case '\v':
916 printf("\\v"); return;
917 case '\f':
918 printf("\\f"); return;
919 case '\t':
920 printf("\\t"); return;
921 case '\r':
922 printf("\\r"); return;
923 default:
924 printf("%c", e->lit); return;
925 }
926 default:
927 printf("%c", e->lit); return;
928 }
929 }
930
931 void print_pos(EXPR *e)
932 {
933 int i;
934 printf("expr %d (lit='%c')\n", e->id, e->lit);
935 printf("first_pos:\n");
936 for (i = 0; i < e->first_pos->npos; i++) {
937 printf("\texpr %d;\n", e->first_pos->pos[i]);
938 }
939 printf("last_pos:\n");
940 for (i = 0; i < e->last_pos->npos; i++) {
941 printf("\texpr %d;\n", e->last_pos->pos[i]);
942 }
943 printf("follow_pos:\n");
944 for (i = 0; i < e->follow_pos->npos; i++) {
945 printf("\texpr %d;\n", e->follow_pos->pos[i]);
946 }
947 printf("before_pos:\n");
948 for (i = 0; i < e->before_pos->npos; i++) {
949 printf("\texpr %d;\n", e->before_pos->pos[i]);
950 }
951
952 printf("\n");
953 }