Mercurial > hg > Members > nobuyasu > test

diff regen/src/parse.c @ 11:1e0cd7fade8b
add regen
author: nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date: Sun, 19 Jun 2011 16:36:05 +0900
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/regen/src/parse.c	Sun Jun 19 16:36:05 2011 +0900
@@ -0,0 +1,953 @@
+#include "re.h"
+#include "util.h"
+#include "parse.h"
+#include "codegen.h"
+
+/*
+Grammar:
+
+EOP:          eall:   { e0 } EOP
+Union:          e0:   e1 { '|' e1 }*
+Concatination:  e1:   e2 { e2 }*
+Repetition:     e2:   e3 { '+' | '*' | '?' }
+Literal:        e3:   Literal | '.' | '$' | '[]' | '^' | '$' | Backref | '(' e0 ')'
+*/
+
+static EXPR_TYPE Toktype;
+static UCHARP Toklit, Ptr, End;
+static REGEX *R;
+EXPR *test;
+
+void
+lex()
+{
+  if (Ptr == End) {
+    Toktype = EOP;
+    Toklit = 0;
+  } else switch (Toklit = Ptr++, *Toklit) {
+    case '.': Toktype = Dot; break;
+    case '[': Toktype = Charclass; break;
+    case '|': Toktype = Union; break;
+    case '?': Toktype = Qmark; break;
+    case '+': Toktype = Plus; break;
+    case '*': Toktype = Star; break;
+    case '(': Toktype = Lpar; break;
+    case ')': Toktype = Rpar; break;
+    case '^': Toktype = BegLine; break;
+    case '$': Toktype = EndLine; break;
+    case '\\': Toktype = Literal;
+      if (Ptr == End) exitmsg("bad \\");
+      Toklit = Ptr++;
+      break;
+    default:
+      Toktype = Literal;
+  }
+}
+
+void
+parse(REGEX *r, CODEGEN *g)
+{
+  EXPR *e, *dotstar;
+  R = r;
+  Ptr = (UCHARP)r->regex;
+  End = (UCHARP)r->regex_end;
+  lex();
+
+  e = e0(r);
+  if (Toktype != EOP) exitmsg("expected end of pattern.");
+
+  r->min_length = e->min_length;
+  r->max_length = e->max_length;
+
+  fill_words(e);
+
+  r->must = e->must;
+
+  int i;
+  for (i = 0; i < r->must.in->size; i++) {
+    if (strlen(r->must.in->words[i]) > r->must_max_length) {
+      r->must_max_length = strlen(r->must.in->words[i]);
+      r->must_max_word = r->must.in->words[i];
+    }
+  }
+  for (i = 0; i < NCHAR; i++) {
+    if (r->involve[i]) r->ninvolve++;
+  }
+
+  if (r->anchored == FALSE) {
+    dotstar = newexpr(r, Dot, '*', (EXPR *)0, (EXPR *)0);
+    dotstar = newexpr(r, Star, '\0', dotstar, (EXPR *)0);
+    e = newexpr(r, Concat, '\0', dotstar, e);
+  }
+
+  e = newexpr(r, EOP, '#', e, (EXPR *)0);
+
+  fill_follow(e);
+
+  r->root = e;
+
+  if (r->debug) {
+    printf("regex: %s\n\n", r->regex);
+    printf("min-length: %d\n", r->min_length);
+    printf("max-length: %d\n", r->max_length);
+    printf("ninvolve:   %d\n\n", r->ninvolve);
+    for (i = 0; i < NCHAR; i++) {
+      if (r->involve[i]) {
+        printf("\"%s\", ", ASCII[i]);
+      }
+    }
+    puts("\n");
+    if (r->must.is != NULL) {
+      printf("is: %s\n", r->must.is);
+    }
+    if (r->must.in != NULL) {
+      for (i = 0; i < r->must.in->size; i++) {
+        printf("in: %s\n", r->must.in->words[i]);
+      }
+    }
+    if (r->must.left != NULL) {
+      printf("left: %s\n", r->must.left);
+    }
+    if (r->must.right != NULL) {
+      printf("right: %s\n", r->must.right);
+    }
+    for (i = 0; i < r->maxid; i++) {
+      print_pos(r->lit_expr[i]);
+    }
+    exit(0);
+  }
+}
+
+EXPR *
+e0(REGEX *r)
+{
+  EXPR *e, *f;
+
+  e = e1(r);
+  while (Toktype == Union) {
+    lex();
+    f = e1(r);
+    e = newexpr(r, Union, '\0', e, f);
+  }
+
+  return e;
+}
+
+EXPR *
+e1(REGEX *r)
+{
+  EXPR *e, *f;
+
+  e = e2(r);
+
+  while (Toktype == Literal || Toktype == Charclass
+         || Toktype == Dot || Toktype == Lpar
+         || Toktype == EndLine || Toktype == BegLine) {
+    f = e2(r);
+    e = newexpr(r, Concat, '\0', e, f);
+  }
+  return(e);
+}
+
+EXPR *
+e2(REGEX *r)
+{
+  EXPR *e;
+  e = e3(r);
+  for ( ; PlusStarQmark(Toktype); lex()) {
+    e = newexpr(r, Toktype, '\0', e, (EXPR *)0);
+  }
+  return(e);
+}
+
+EXPR *
+e3(REGEX *r)
+{
+  EXPR *e;
+  UCHAR *tbl;
+  int count;
+
+  switch(Toktype) {
+  case Literal:
+    e = newexpr(r, Literal, *Toklit, (EXPR *)0, (EXPR *)0);
+    break;
+  case BegLine:
+    e = newexpr(r, BegLine, '^', (EXPR *)0, (EXPR *)0);
+    break;
+  case EndLine:
+    e = newexpr(r, EndLine, '$', (EXPR *)0, (EXPR *)0);
+    break;
+  case Dot:
+    e = newexpr(r, Dot, '.', (EXPR *)0, (EXPR *)0);
+    break;
+  case Charclass:
+    tbl = (UCHARP)xmalloc(NCHAR, "charclass tbl");
+    count = ccl(tbl);
+    if (count == 1) {
+      e = newexpr(r, Literal, *tbl, (EXPR *)0, (EXPR *)0);
+      xfree(tbl);
+    } else {
+      e = newexpr(r, Charclass, '[', (EXPR *)count, (EXPR *)tbl);
+    }
+    break;
+  case Lpar:
+    lex(r);
+    e = e0(r);
+    if (Toktype != Rpar) exitmsg("expected a ')'");
+    break;
+  default:
+    exitmsg("expected a lit or '('");
+    break;
+  }
+  lex();
+  return e;
+}
+
+int
+ccl(UCHAR *tbl) {
+  int i;
+  UCHAR lastc = 0;;
+  int range = 0;
+  int count = 0;
+  UCHAR comp;
+
+  tbl[NCHAR] = 0; // count of characters in chaclass.
+  memset((char *)tbl, 0, NCHAR);
+
+  if (*Ptr == '^') {
+    Ptr++;
+    for (i = 32; i < 126; i ++) {
+      tbl[i] = 1;
+    }
+    comp = 0;
+  } else {
+    comp = 1;
+  }
+
+  if (*Ptr == ']' || *Ptr == '-') {
+    tbl[*Ptr] = comp;
+    lastc = *Ptr++;
+  }
+
+  for (; (Ptr < End) && (*Ptr != ']'); Ptr++) {
+    if (!range && *Ptr == '-') {
+      range = 1;
+      continue;
+    }
+    if (*Ptr == '\\') {
+      if (++Ptr >= End) {
+        exitmsg(" [ ] imbalance");
+      }
+    }
+
+    tbl[*Ptr] = comp;
+
+    if (range) {
+      for (i = (*Ptr) - 1; i > lastc; i--) {
+        tbl[i] = comp;
+      }
+      range = 0;
+    }
+    lastc = *Ptr;
+  }
+  if (Ptr == End) exitmsg(" [ ] imbalance");
+
+  if (range) {
+    tbl['-'] = comp;
+    range = 0;
+  }
+
+  Ptr++;
+
+  for (i = 0; i < NCHAR; i++) {
+    if (tbl[i]) {
+      lastc = i;
+      count++;
+    }
+  }
+  if (count == 1) *tbl = (char)lastc;
+
+  /* Return the character count. */
+  return count;
+}
+
+EXPR *
+newexpr(REGEX *r, EXPR_TYPE t, UCHAR lit,  EXPR* left, EXPR* right)
+{
+  int i;
+  EXPR *e = (EXPR *)xmalloc(sizeof(EXPR), "newexpr");
+  e->type = t;
+  e->lit = lit;
+  e->l = left;
+  e->r = right;
+  e->first_pos = newpos();
+  e->last_pos = newpos();
+  e->follow_pos = newpos();
+  e->before_pos = newpos();
+  e->parent = 0;
+
+  switch (t) {
+  case EOP:
+    e->id = 0;
+    r->lit_expr[e->id] = e;
+    break;
+  case Dot: case Charclass: case Literal:  case BegLine: case EndLine:
+    e->id = r->maxid++;
+    r->lit_expr[e->id] = e;
+    break;
+  default:
+    e->id = -1;
+    break;
+  }
+
+  switch (t) {
+  case Dot:
+    if (e->lit != '*') {
+      for (i = 0; i < NCHAR; i++) {
+        r->involve[i] = 1;
+      }
+    }
+    break;
+  case Charclass:
+    for (i = 0; i < NCHAR; i++) {
+      if (e->tbl[i]) {
+        r->involve[i] = 1;
+      }
+    }
+    break;
+ case BegLine: case EndLine:
+    r->involve[NL] = 1;
+    break;
+  case Literal:
+    r->involve[e->lit] = 1;
+    break;
+  default:
+    break;
+  }
+
+  switch (t) {
+  case Union: case Concat:
+    left->parent = e;
+    right->parent = e;
+    break;
+  case EOP: case Plus: case Star: case Qmark:
+    left->parent = e;
+    break;
+  default:
+    break;
+  }
+
+  rept_compress(e, e->l); /* optimize */
+  fill_expr(e);
+  return e;
+}
+
+void
+fill_expr(e)
+EXPR *e;
+{
+  short l, r;
+
+  switch(e->type) {
+  case EOP:
+    e->max_length = e->l->max_length;
+    e->min_length = e->l->min_length;
+    break;
+  case Plus:
+    e->max_length = -1;
+    e->min_length = e->l->min_length;
+    break;
+  case Star:
+    e->max_length = -1;
+    e->min_length = 0;
+    break;
+  case Qmark:
+    e->max_length = e->l->max_length;
+    e->min_length = 0;
+    break;
+  case Union:
+    l = e->l->max_length; r = e->r->max_length;
+    e->max_length = (INFOR(l, r) ? -1 : MAX(l, r));
+    l = e->l->min_length; r = e->r->min_length;
+    e->min_length = MIN(e->l->min_length, e->r->min_length);
+    break;
+  case Concat:
+    l = e->l->max_length; r = e->r->max_length;
+    e->max_length = (INFOR(l, r) ? -1 : l + r);
+    l = e->l->min_length; r = e->r->min_length;
+    e->min_length = l + r;
+    break;
+  default:
+    e->min_length = e->max_length = 1;
+  }
+
+  switch (e->type) {
+  case Star: case Qmark: case Plus:
+    copypos(e->l->first_pos, e->first_pos);
+    copypos(e->l->last_pos, e->last_pos);
+    break;
+  case Union:
+    copypos(e->r->first_pos, e->first_pos);
+    copypos(e->l->first_pos, e->first_pos);
+    copypos(e->r->last_pos, e->last_pos);
+    copypos(e->l->last_pos, e->last_pos);
+    break;
+  case Concat:
+    if (e->l->min_length == 0) {
+      copypos(e->l->first_pos, e->first_pos);
+      copypos(e->r->first_pos, e->first_pos);
+    } else {
+      copypos(e->l->first_pos, e->first_pos);
+    }
+    if (e->r->min_length == 0) {
+      copypos(e->l->last_pos, e->last_pos);
+      copypos(e->r->last_pos, e->last_pos);
+    } else {
+      copypos(e->r->last_pos, e->last_pos);
+    }
+    break;
+  case BegLine: case EndLine: case Literal: case Dot: case Charclass: case EOP:
+    addpos(e->first_pos, e->id);
+    addpos(e->last_pos, e->id);
+  default:
+    break;
+  }
+}
+
+/* singleton */
+static const WORD_SET NOWORDS = {NULL, 0};
+static const MUST NOMUST = {
+  (WORD_SET *)&NOWORDS, NULL, NULL, NULL
+};
+
+void fill_words(EXPR *e) {
+  int l1, l2, i, j;
+  char *str;
+
+  switch (e->type) {
+  case EOP:
+    fill_words(e->l);
+    e->must = e->l->must;
+    break;
+  case Plus:
+    fill_words(e->l);
+    e->must = e->l->must;
+    e->must.is = NULL;
+    break;
+  case Union:
+    fill_words(e->l);
+    fill_words(e->r);
+    /* fill must.is */
+    if (e->l->must.is && e->r->must.is
+        && !strcmp(e->l->must.is, e->r->must.is)) {
+      e->must.is = e->must.left = e->must.right = e->l->must.is;
+      e->must.in = word_set_new(1);
+      e->must.in->words[0] = e->must.is;
+      return;
+    } else {
+      e->must.is = NULL;
+    }
+
+    /* fill must.left */
+    if (e->l->must.left && e->r->must.left) {
+      l1 = strlen(e->l->must.left);
+      l2 = strlen(e->r->must.left);
+      for (i = 0; i < l1 && i < l2; i++) {
+        if (e->l->must.left[i] != e->r->must.left[i]) {
+          break;
+        }
+      }
+      if (i) {
+        str = (char *)xmalloc(sizeof(char) * (i+1), "must: union-left");
+        for (j = 0; j < i; j++) str[j] = e->l->must.left[j];
+        str[j] = '\0';
+        e->must.left = str;
+      } else {
+        /* no common words */
+        e->must.left = NULL;
+      }
+    } else {
+      e->must.left = NULL;
+    }
+
+    /* fill must.right */
+    if (e->l->must.right && e->r->must.right) {
+      l1 = strlen(e->l->must.right);
+      l2 = strlen(e->r->must.right);
+      for (i = 0; i < l1 && i < l2; i++) {
+        if (e->l->must.right[l1-i-1] != e->r->must.right[l2-i-1]) {
+          break;
+        }
+      }
+      if (i) {
+        str = (char *)xmalloc(sizeof(char) * (i+1), "must: union-right");
+        for (j = 0; j < i; j++) str[i-j-1] = e->l->must.right[l1-j-1];
+        str[j] = '\0';
+        e->must.right = str;
+      } else {
+        /* no common words */
+        e->must.right = NULL;
+      }
+    } else {
+      e->must.right = NULL;
+    }
+
+    /* fill must.in */
+    if (e->must.left && e->must.right) {
+      l1 = strlen(e->must.left);
+      l2 = strlen(e->must.right);
+      e->must.in = word_set_new(1);
+      e->must.in->words[0] = l1 > l2 ? e->must.left : e->must.right;
+    } else if (e->must.left || e->must.right) {
+      e->must.in = word_set_new(1);
+      e->must.in->words[0] = e->must.left ? e->must.left : e->must.right;
+    } else {
+      e->must.in = word_set_new(0);
+    }
+
+    break;
+  case Concat:
+    /* fill must.is */
+    fill_words(e->l);
+    fill_words(e->r);
+    if (e->l->must.is && e->r->must.is) {
+      l1 = strlen(e->l->must.is);
+      l2 = strlen(e->r->must.is);
+      str = (char *)xmalloc(sizeof(char) * (l1+l2+1), "must: concat-is");
+      strcpy(str, e->l->must.is);
+      strcat(str, e->r->must.is);
+      e->must.is = e->must.left = e->must.right = str;
+      e->must.in = word_set_new(1);
+      e->must.in->words[0] = str;
+      break;
+    } else {
+      e->must.is = NULL;
+    }
+
+    /* fill must.left */
+    if (e->l->must.is && e->r->must.left) {
+      l1 = strlen(e->l->must.is);
+      l2 = strlen(e->r->must.left);
+      str = (char *)xmalloc(sizeof(char) * (l1+l2+1), "must: concat-is");
+      strcpy(str, e->l->must.is);
+      strcat(str, e->r->must.left);
+      e->must.left = str;
+    } else {
+      e->must.left = e->l->must.left;
+    }
+
+    /* fill must.right */
+    if (e->r->must.is && e->l->must.right) {
+      l1 = strlen(e->r->must.is);
+      l2 = strlen(e->l->must.right);
+      str = (char *)xmalloc(sizeof(char) * (l1+l2+1), "must: concat-is");
+      strcpy(str, e->l->must.right);
+      strcat(str, e->r->must.is);
+      e->must.right = str;
+    } else {
+      e->must.right = e->r->must.right;
+    }
+
+    /* fill must.in */
+    if (e->must.is) {
+      e->must.in = word_set_new(1);
+      e->must.in->words[0] = e->must.is;
+    } else if (e->l->must.is && e->r->must.left) {
+      l1 = e->r->must.in->size;
+      e->must.in = word_set_new(l1);
+      e->must.in->words[0] = e->must.left;
+      for (i = 0, j = 0; i < l1; i++) {
+        if (!strcmp(e->r->must.left, e->r->must.in->words[i]) && j == 0) {
+          j = 1;
+          continue;
+        }
+        e->must.in->words[i+1-j] = e->r->must.in->words[i];
+      }
+    } else if (e->l->must.right && e->r->must.is) {
+      l1 = e->l->must.in->size;
+      e->must.in = word_set_new(l1);
+      e->must.in->words[0] = e->must.right;
+      for (i = 0, j = 0; i < l1; i++) {
+        if (!strcmp(e->l->must.right, e->l->must.in->words[i]) && j == 0) {
+          j = 1;
+          continue;
+        }
+        e->must.in->words[i+1-j] = e->l->must.in->words[i];
+      }
+    } else if (e->l->must.right && e->r->must.left) {
+      l1 = strlen(e->l->must.right);
+      l2 = strlen(e->r->must.left);
+      str = (char *)xmalloc(sizeof(char) * (l1+l2+1), "must: concat-in");
+      strcpy(str, e->l->must.right);
+      strcat(str, e->r->must.left);
+      l1 = e->l->must.in->size;
+      l2 = e->r->must.in->size;
+      e->must.in = word_set_new(l1+l2-1);
+      e->must.in->words[0] = str;
+      for (i = 0, j = 0; i < l1; i++) {
+        if (!strcmp(e->l->must.right, e->l->must.in->words[i]) && j == 0) {
+          j = 1;
+          continue;
+        }
+        e->must.in->words[i+1-j] = e->l->must.in->words[i];
+      }
+      for (i = 0, j = 0; i < l2; i++) {
+        if (!strcmp(e->l->must.right, e->l->must.in->words[i]) && j == 0) {
+          j = 1;
+          continue;
+        }
+        e->must.in->words[i+l1-j] = e->r->must.in->words[i];
+      }
+    } else {
+      l1 = e->l->must.in->size;
+      l2 = e->r->must.in->size;
+      e->must.in = word_set_new(l1+l2);
+      for (i = 0; i < l1; i++) {
+        e->must.in->words[i] = e->l->must.in->words[i];
+      }
+      for (i = 0; i < l2; i++) {
+        e->must.in->words[i+l1] = e->r->must.in->words[i];
+      }
+    }
+    break;
+  case Literal:
+    str = (char *)xcalloc(sizeof(char), 2, "must: literal");
+    str[0] = e->lit;
+    e->must.is = e->must.left = e->must.right = str;
+    e->must.in = word_set_new(1);
+    e->must.in->words[0] = str;
+    break;
+  default:
+    e->must = NOMUST;
+  }
+}
+
+WORD_SET *
+word_set_new (int size)
+{
+  if (size == 0) return (WORD_SET *)&NOWORDS;
+
+  WORD_SET *word_set = (WORD_SET *)xmalloc(sizeof(WORD_SET), "word_set");
+  word_set->size = size;
+  if (size > 0) {
+    word_set->words = (char **)xcalloc(sizeof(char *), size, "words");
+  }
+  return word_set;
+}
+
+void fill_follow(EXPR *e)
+{
+  switch (e->type) {
+  case EOP:
+    connect(e->l->last_pos, e->first_pos);
+    fill_follow(e->l);
+    break;
+  case Concat:
+    connect(e->l->last_pos, e->r->first_pos);
+    fill_follow(e->r);
+    fill_follow(e->l);
+    break;
+  case Union:
+    fill_follow(e->r);
+    fill_follow(e->l);
+    break;
+  case Star: case Plus:
+    connect(e->l->last_pos, e->l->first_pos);
+    fill_follow(e->l);
+    break;
+  case Qmark:
+    fill_follow(e->l);
+    break;
+  default:
+    break;
+  }
+  return;
+}
+
+POSITION_SET *
+newpos()
+{
+  static const int init_size = 10;
+  POSITION_SET *pos = (POSITION_SET *)xmalloc(sizeof(POSITION_SET) ,"position set");
+  pos->npos = 0;
+  pos->pos = (int *)xmalloc(sizeof(int)*init_size, "array in position set");
+  pos->size = init_size;
+  return pos;
+}
+
+void
+connect(POSITION_SET *set1, POSITION_SET *set2)
+{
+  int i, j;
+  EXPR **tbl = R->lit_expr;
+  for (i = 0; i < set1->npos; i++) {
+    for (j = 0; j < set2->npos; j++) {
+      addpos(tbl[set1->pos[i]]->follow_pos,
+             set2->pos[j]);
+      addpos(tbl[set2->pos[j]]->before_pos,
+             set1->pos[i]);
+    }
+  }
+}
+
+void
+copypos(POSITION_SET *src, POSITION_SET *dst)
+{
+  int i;
+  for (i = 0; i < src->npos; i++) {
+    addpos(dst, src->pos[i]);
+  }
+}
+
+void
+addpos(POSITION_SET *set, int id)
+{
+  int i;
+
+  if (id < 0 || R->maxid < id) {
+    exitmsg("invalid id for addpos: %d\n", id);
+  }
+
+  /* check duplication. */
+  for (i = 0; i < set->npos; i++) {
+    if (set->pos[i] == id) {
+      return;
+    }
+  }
+
+  /* check for realloc. before adding. */
+  if (set->npos >= set->size) {
+    set->size *= 2;
+    set->pos = (int *)xrealloc(set->pos, (set->size)*sizeof(int), "realloc addpos");
+  }
+
+  set->pos[set->npos] = id;
+  set->npos++;
+}
+
+void
+delete_rept(EXPR *e)
+{
+#ifdef DEBUG
+  print_expr(R->root);
+#endif
+  e->l->parent = e->parent;
+  if (e->parent->l == e) e->parent->l = e->l;
+  else e->parent->r = e->l;
+
+  percolate(e->parent);
+  rept_compress (e->parent, e->l);
+
+  xfree(e);
+#ifdef DEBUG
+  print_expr(R->root);
+#endif
+}
+
+void
+rept_compress(EXPR *parent, EXPR* child)
+{
+  switch(parent->type) {
+  case Plus: case Star: case Qmark: break;
+  default: return;
+  }
+  switch(child->type) {
+  case Plus: case Star: case Qmark: break;
+  default: return;
+  }
+
+  if (parent->type != child->type && parent->type != Star) {
+    change_rept(parent, Star);
+  }
+
+  delete_rept(child);
+}
+
+void
+percolate(EXPR *e)
+{
+  fill_expr(e);
+  if (e->parent) percolate(e->parent);
+}
+
+void
+change_rept(EXPR *e, EXPR_TYPE type)
+{
+#ifdef DEBUG
+  print_expr(R->root);
+#endif
+  e->type = type;
+  percolate(e);
+#ifdef DEBUG
+  print_expr(R->root);
+#endif
+}
+
+void
+delete_expr(EXPR *e)
+{
+  EXPR *parent = e->parent;
+  EXPR *grandparent, *sibling;
+
+  if (!parent) {
+    exitmsg("INTERNAL ERROR: delete_expr abort %d\n", e->type);
+  }
+
+  switch(parent->type) {
+  case Plus: case Star: case Qmark: case Union:
+    delete_expr(parent);
+    return;
+  case Concat:
+    break;
+  default:
+    exitmsg("INTERNAL ERROR: delete_expr abort %d\n", parent->type);
+  }
+#ifdef DEBUG
+  print_expr(R->root);
+#endif
+  grandparent = parent->parent;
+
+  if (e == parent->r) {
+    sibling = parent->l;
+  } else {
+    sibling = parent->r;
+  }
+
+  sibling->parent = grandparent;
+
+  if (parent == grandparent->r) {
+    grandparent->r = sibling;
+  } else {
+    grandparent->l = sibling;
+  }
+
+  percolate(grandparent);
+
+  parent->r = parent->l = 0;
+  free_expr(e);
+
+  rept_compress(grandparent, sibling);
+#ifdef DEBUG
+  print_expr(R->root);
+#endif
+}
+
+BOOL
+match_every_line(REGEX *r)
+{
+  if (r->root->min_length == 0) {
+    free_expr(r->root);
+    r->maxid = 1;
+    r->root = newexpr(r, Literal, NL, (EXPR *)0, (EXPR *)0);
+    r->root = newexpr(r, EOP, '#', r->root, (EXPR *)0);
+    return TRUE;
+  }
+  return FALSE;
+}
+
+void
+free_expr(EXPR *e)
+{
+  if (e == 0) return;
+  switch (e->type) {
+  case Charclass:
+    /* Don't free the table, because it may be shared,
+     * but zero the pointer to it. TODO
+     */
+    e->r = 0;
+  default:
+    xfree(e->first_pos);
+    xfree(e->last_pos);
+    xfree(e->follow_pos);
+  }
+
+  if (e->r) free_expr(e->r);
+  if (e->l) free_expr(e->l);
+
+  xfree(e);
+}
+
+void
+print_expr(EXPR *e)
+{
+  switch(e->type) {
+  case EOP:
+    print_expr(e->l); printf("#\n"); return;
+  case Plus:
+    print_expr(e->l); printf("+"); return;
+  case Star:
+    print_expr(e->l); printf("*"); return;
+  case Qmark:
+    print_expr(e->l); printf("?"); return;
+  case Concat:
+    if (PlusStarQmark(e->parent->type)) printf("(");
+    print_expr(e->l); print_expr(e->r);
+    if (PlusStarQmark(e->parent->type)) printf(")");
+    return;
+  case Union:
+    if (PlusStarQmark(e->parent->type)) printf("(");
+    if (e->parent->type == Concat) printf("(");
+    print_expr(e->l); printf("|"); print_expr(e->r);
+    if (PlusStarQmark(e->parent->type)) printf(")");
+    if (e->parent->type == Concat) printf(")");
+    return;
+  case Dot:
+    printf("."); return;
+  case Charclass:;
+    int i;
+    printf("[");
+    for (i = 0; i < NCHAR; i++) {
+      if (((UCHARP)e->r)[i]) {
+        printf("%c", (UCHAR)i);
+      }
+    }
+    printf("]"); return;
+  case BegLine:
+    printf("^");
+    return;
+  case EndLine:
+    printf("$");
+    return;
+  case Literal:
+    switch (e->lit) {
+    case '\n':
+      printf("\\n"); return;
+    case '\v':
+      printf("\\v"); return;
+    case '\f':
+      printf("\\f"); return;
+    case '\t':
+      printf("\\t"); return;
+    case '\r':
+      printf("\\r"); return;
+    default:
+      printf("%c", e->lit); return;
+    }
+  default:
+    printf("%c", e->lit); return;
+  }
+}
+
+void print_pos(EXPR *e)
+{
+  int i;
+  printf("expr %d (lit='%c')\n", e->id, e->lit);
+  printf("first_pos:\n");
+  for (i = 0; i < e->first_pos->npos; i++) {
+    printf("\texpr %d;\n", e->first_pos->pos[i]);
+  }
+  printf("last_pos:\n");
+  for (i = 0; i < e->last_pos->npos; i++) {
+    printf("\texpr %d;\n", e->last_pos->pos[i]);
+  }
+  printf("follow_pos:\n");
+  for (i = 0; i < e->follow_pos->npos; i++) {
+    printf("\texpr %d;\n", e->follow_pos->pos[i]);
+  }
+  printf("before_pos:\n");
+  for (i = 0; i < e->before_pos->npos; i++) {
+    printf("\texpr %d;\n", e->before_pos->pos[i]);
+  }
+
+  printf("\n");
+}
author	nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date	Sun, 19 Jun 2011 16:36:05 +0900
parents
children