view regen/src/parse.c @ 11:1e0cd7fade8b

add regen
author nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date Sun, 19 Jun 2011 16:36:05 +0900
parents
children
line wrap: on
line source

#include "re.h"
#include "util.h"
#include "parse.h"
#include "codegen.h"

/*
Grammar:

EOP:          eall:   { e0 } EOP
Union:          e0:   e1 { '|' e1 }*
Concatination:  e1:   e2 { e2 }*
Repetition:     e2:   e3 { '+' | '*' | '?' }
Literal:        e3:   Literal | '.' | '$' | '[]' | '^' | '$' | Backref | '(' e0 ')'
*/

static EXPR_TYPE Toktype;
static UCHARP Toklit, Ptr, End;
static REGEX *R;
EXPR *test;

void
lex()
{
  if (Ptr == End) {
    Toktype = EOP;
    Toklit = 0;
  } else switch (Toklit = Ptr++, *Toklit) {
    case '.': Toktype = Dot; break;
    case '[': Toktype = Charclass; break;
    case '|': Toktype = Union; break;
    case '?': Toktype = Qmark; break;
    case '+': Toktype = Plus; break;
    case '*': Toktype = Star; break;
    case '(': Toktype = Lpar; break;
    case ')': Toktype = Rpar; break;
    case '^': Toktype = BegLine; break;
    case '$': Toktype = EndLine; break;
    case '\\': Toktype = Literal;
      if (Ptr == End) exitmsg("bad \\");
      Toklit = Ptr++;
      break;
    default:
      Toktype = Literal;
  }
}

void
parse(REGEX *r, CODEGEN *g)
{
  EXPR *e, *dotstar;
  R = r;
  Ptr = (UCHARP)r->regex;
  End = (UCHARP)r->regex_end;
  lex();

  e = e0(r);
  if (Toktype != EOP) exitmsg("expected end of pattern.");

  r->min_length = e->min_length;
  r->max_length = e->max_length;

  fill_words(e);

  r->must = e->must;

  int i;
  for (i = 0; i < r->must.in->size; i++) {
    if (strlen(r->must.in->words[i]) > r->must_max_length) {
      r->must_max_length = strlen(r->must.in->words[i]);
      r->must_max_word = r->must.in->words[i];
    }
  }
  for (i = 0; i < NCHAR; i++) {
    if (r->involve[i]) r->ninvolve++;
  }

  if (r->anchored == FALSE) {
    dotstar = newexpr(r, Dot, '*', (EXPR *)0, (EXPR *)0);
    dotstar = newexpr(r, Star, '\0', dotstar, (EXPR *)0);
    e = newexpr(r, Concat, '\0', dotstar, e);
  }

  e = newexpr(r, EOP, '#', e, (EXPR *)0);

  fill_follow(e);

  r->root = e;

  if (r->debug) {
    printf("regex: %s\n\n", r->regex);
    printf("min-length: %d\n", r->min_length);
    printf("max-length: %d\n", r->max_length);
    printf("ninvolve:   %d\n\n", r->ninvolve);
    for (i = 0; i < NCHAR; i++) {
      if (r->involve[i]) {
        printf("\"%s\", ", ASCII[i]);
      }
    }
    puts("\n");
    if (r->must.is != NULL) {
      printf("is: %s\n", r->must.is);
    }
    if (r->must.in != NULL) {
      for (i = 0; i < r->must.in->size; i++) {
        printf("in: %s\n", r->must.in->words[i]);
      }
    }
    if (r->must.left != NULL) {
      printf("left: %s\n", r->must.left);
    }
    if (r->must.right != NULL) {
      printf("right: %s\n", r->must.right);
    }
    for (i = 0; i < r->maxid; i++) {
      print_pos(r->lit_expr[i]);
    }
    exit(0);
  }
}

EXPR *
e0(REGEX *r)
{
  EXPR *e, *f;

  e = e1(r);
  while (Toktype == Union) {
    lex();
    f = e1(r);
    e = newexpr(r, Union, '\0', e, f);
  }

  return e;
}

EXPR *
e1(REGEX *r)
{
  EXPR *e, *f;

  e = e2(r);

  while (Toktype == Literal || Toktype == Charclass
         || Toktype == Dot || Toktype == Lpar
         || Toktype == EndLine || Toktype == BegLine) {
    f = e2(r);
    e = newexpr(r, Concat, '\0', e, f);
  }
  return(e);
}

EXPR *
e2(REGEX *r)
{
  EXPR *e;
  e = e3(r);
  for ( ; PlusStarQmark(Toktype); lex()) {
    e = newexpr(r, Toktype, '\0', e, (EXPR *)0);
  }
  return(e);
}

EXPR *
e3(REGEX *r)
{
  EXPR *e;
  UCHAR *tbl;
  int count;

  switch(Toktype) {
  case Literal:
    e = newexpr(r, Literal, *Toklit, (EXPR *)0, (EXPR *)0);
    break;
  case BegLine:
    e = newexpr(r, BegLine, '^', (EXPR *)0, (EXPR *)0);
    break;
  case EndLine:
    e = newexpr(r, EndLine, '$', (EXPR *)0, (EXPR *)0);
    break;
  case Dot:
    e = newexpr(r, Dot, '.', (EXPR *)0, (EXPR *)0);
    break;
  case Charclass:
    tbl = (UCHARP)xmalloc(NCHAR, "charclass tbl");
    count = ccl(tbl);
    if (count == 1) {
      e = newexpr(r, Literal, *tbl, (EXPR *)0, (EXPR *)0);
      xfree(tbl);
    } else {
      e = newexpr(r, Charclass, '[', (EXPR *)count, (EXPR *)tbl);
    }
    break;
  case Lpar:
    lex(r);
    e = e0(r);
    if (Toktype != Rpar) exitmsg("expected a ')'");
    break;
  default:
    exitmsg("expected a lit or '('");
    break;
  }
  lex();
  return e;
}

int
ccl(UCHAR *tbl) {
  int i;
  UCHAR lastc = 0;;
  int range = 0;
  int count = 0;
  UCHAR comp;

  tbl[NCHAR] = 0; // count of characters in chaclass.
  memset((char *)tbl, 0, NCHAR);

  if (*Ptr == '^') {
    Ptr++;
    for (i = 32; i < 126; i ++) {
      tbl[i] = 1;
    }
    comp = 0;
  } else {
    comp = 1;
  }

  if (*Ptr == ']' || *Ptr == '-') {
    tbl[*Ptr] = comp;
    lastc = *Ptr++;
  }

  for (; (Ptr < End) && (*Ptr != ']'); Ptr++) {
    if (!range && *Ptr == '-') {
      range = 1;
      continue;
    }
    if (*Ptr == '\\') {
      if (++Ptr >= End) {
        exitmsg(" [ ] imbalance");
      }
    }

    tbl[*Ptr] = comp;

    if (range) {
      for (i = (*Ptr) - 1; i > lastc; i--) {
        tbl[i] = comp;
      }
      range = 0;
    }
    lastc = *Ptr;
  }
  if (Ptr == End) exitmsg(" [ ] imbalance");

  if (range) {
    tbl['-'] = comp;
    range = 0;
  }

  Ptr++;

  for (i = 0; i < NCHAR; i++) {
    if (tbl[i]) {
      lastc = i;
      count++;
    }
  }
  if (count == 1) *tbl = (char)lastc;

  /* Return the character count. */
  return count;
}

EXPR *
newexpr(REGEX *r, EXPR_TYPE t, UCHAR lit,  EXPR* left, EXPR* right)
{
  int i;
  EXPR *e = (EXPR *)xmalloc(sizeof(EXPR), "newexpr");
  e->type = t;
  e->lit = lit;
  e->l = left;
  e->r = right;
  e->first_pos = newpos();
  e->last_pos = newpos();
  e->follow_pos = newpos();
  e->before_pos = newpos();
  e->parent = 0;

  switch (t) {
  case EOP:
    e->id = 0;
    r->lit_expr[e->id] = e;
    break;
  case Dot: case Charclass: case Literal:  case BegLine: case EndLine:
    e->id = r->maxid++;
    r->lit_expr[e->id] = e;
    break;
  default:
    e->id = -1;
    break;
  }

  switch (t) {
  case Dot:
    if (e->lit != '*') {
      for (i = 0; i < NCHAR; i++) {
        r->involve[i] = 1;
      }
    }
    break;
  case Charclass:
    for (i = 0; i < NCHAR; i++) {
      if (e->tbl[i]) {
        r->involve[i] = 1;
      }
    }
    break;
 case BegLine: case EndLine:
    r->involve[NL] = 1;
    break;
  case Literal:
    r->involve[e->lit] = 1;
    break;
  default:
    break;
  }

  switch (t) {
  case Union: case Concat:
    left->parent = e;
    right->parent = e;
    break;
  case EOP: case Plus: case Star: case Qmark:
    left->parent = e;
    break;
  default:
    break;
  }

  rept_compress(e, e->l); /* optimize */
  fill_expr(e);
  return e;
}

void
fill_expr(e)
EXPR *e;
{
  short l, r;

  switch(e->type) {
  case EOP:
    e->max_length = e->l->max_length;
    e->min_length = e->l->min_length;
    break;
  case Plus:
    e->max_length = -1;
    e->min_length = e->l->min_length;
    break;
  case Star:
    e->max_length = -1;
    e->min_length = 0;
    break;
  case Qmark:
    e->max_length = e->l->max_length;
    e->min_length = 0;
    break;
  case Union:
    l = e->l->max_length; r = e->r->max_length;
    e->max_length = (INFOR(l, r) ? -1 : MAX(l, r));
    l = e->l->min_length; r = e->r->min_length;
    e->min_length = MIN(e->l->min_length, e->r->min_length);
    break;
  case Concat:
    l = e->l->max_length; r = e->r->max_length;
    e->max_length = (INFOR(l, r) ? -1 : l + r);
    l = e->l->min_length; r = e->r->min_length;
    e->min_length = l + r;
    break;
  default:
    e->min_length = e->max_length = 1;
  }

  switch (e->type) {
  case Star: case Qmark: case Plus:
    copypos(e->l->first_pos, e->first_pos);
    copypos(e->l->last_pos, e->last_pos);
    break;
  case Union:
    copypos(e->r->first_pos, e->first_pos);
    copypos(e->l->first_pos, e->first_pos);
    copypos(e->r->last_pos, e->last_pos);
    copypos(e->l->last_pos, e->last_pos);
    break;
  case Concat:
    if (e->l->min_length == 0) {
      copypos(e->l->first_pos, e->first_pos);
      copypos(e->r->first_pos, e->first_pos);
    } else {
      copypos(e->l->first_pos, e->first_pos);
    }
    if (e->r->min_length == 0) {
      copypos(e->l->last_pos, e->last_pos);
      copypos(e->r->last_pos, e->last_pos);
    } else {
      copypos(e->r->last_pos, e->last_pos);
    }
    break;
  case BegLine: case EndLine: case Literal: case Dot: case Charclass: case EOP:
    addpos(e->first_pos, e->id);
    addpos(e->last_pos, e->id);
  default:
    break;
  }
}

/* singleton */
static const WORD_SET NOWORDS = {NULL, 0};
static const MUST NOMUST = {
  (WORD_SET *)&NOWORDS, NULL, NULL, NULL
};

void fill_words(EXPR *e) {
  int l1, l2, i, j;
  char *str;

  switch (e->type) {
  case EOP:
    fill_words(e->l);
    e->must = e->l->must;
    break;
  case Plus:
    fill_words(e->l);
    e->must = e->l->must;
    e->must.is = NULL;
    break;
  case Union:
    fill_words(e->l);
    fill_words(e->r);
    /* fill must.is */
    if (e->l->must.is && e->r->must.is
        && !strcmp(e->l->must.is, e->r->must.is)) {
      e->must.is = e->must.left = e->must.right = e->l->must.is;
      e->must.in = word_set_new(1);
      e->must.in->words[0] = e->must.is;
      return;
    } else {
      e->must.is = NULL;
    }

    /* fill must.left */
    if (e->l->must.left && e->r->must.left) {
      l1 = strlen(e->l->must.left);
      l2 = strlen(e->r->must.left);
      for (i = 0; i < l1 && i < l2; i++) {
        if (e->l->must.left[i] != e->r->must.left[i]) {
          break;
        }
      }
      if (i) {
        str = (char *)xmalloc(sizeof(char) * (i+1), "must: union-left");
        for (j = 0; j < i; j++) str[j] = e->l->must.left[j];
        str[j] = '\0';
        e->must.left = str;
      } else {
        /* no common words */
        e->must.left = NULL;
      }
    } else {
      e->must.left = NULL;
    }

    /* fill must.right */
    if (e->l->must.right && e->r->must.right) {
      l1 = strlen(e->l->must.right);
      l2 = strlen(e->r->must.right);
      for (i = 0; i < l1 && i < l2; i++) {
        if (e->l->must.right[l1-i-1] != e->r->must.right[l2-i-1]) {
          break;
        }
      }
      if (i) {
        str = (char *)xmalloc(sizeof(char) * (i+1), "must: union-right");
        for (j = 0; j < i; j++) str[i-j-1] = e->l->must.right[l1-j-1];
        str[j] = '\0';
        e->must.right = str;
      } else {
        /* no common words */
        e->must.right = NULL;
      }
    } else {
      e->must.right = NULL;
    }

    /* fill must.in */
    if (e->must.left && e->must.right) {
      l1 = strlen(e->must.left);
      l2 = strlen(e->must.right);
      e->must.in = word_set_new(1);
      e->must.in->words[0] = l1 > l2 ? e->must.left : e->must.right;
    } else if (e->must.left || e->must.right) {
      e->must.in = word_set_new(1);
      e->must.in->words[0] = e->must.left ? e->must.left : e->must.right;
    } else {
      e->must.in = word_set_new(0);
    }

    break;
  case Concat:
    /* fill must.is */
    fill_words(e->l);
    fill_words(e->r);
    if (e->l->must.is && e->r->must.is) {
      l1 = strlen(e->l->must.is);
      l2 = strlen(e->r->must.is);
      str = (char *)xmalloc(sizeof(char) * (l1+l2+1), "must: concat-is");
      strcpy(str, e->l->must.is);
      strcat(str, e->r->must.is);
      e->must.is = e->must.left = e->must.right = str;
      e->must.in = word_set_new(1);
      e->must.in->words[0] = str;
      break;
    } else {
      e->must.is = NULL;
    }

    /* fill must.left */
    if (e->l->must.is && e->r->must.left) {
      l1 = strlen(e->l->must.is);
      l2 = strlen(e->r->must.left);
      str = (char *)xmalloc(sizeof(char) * (l1+l2+1), "must: concat-is");
      strcpy(str, e->l->must.is);
      strcat(str, e->r->must.left);
      e->must.left = str;
    } else {
      e->must.left = e->l->must.left;
    }

    /* fill must.right */
    if (e->r->must.is && e->l->must.right) {
      l1 = strlen(e->r->must.is);
      l2 = strlen(e->l->must.right);
      str = (char *)xmalloc(sizeof(char) * (l1+l2+1), "must: concat-is");
      strcpy(str, e->l->must.right);
      strcat(str, e->r->must.is);
      e->must.right = str;
    } else {
      e->must.right = e->r->must.right;
    }

    /* fill must.in */
    if (e->must.is) {
      e->must.in = word_set_new(1);
      e->must.in->words[0] = e->must.is;
    } else if (e->l->must.is && e->r->must.left) {
      l1 = e->r->must.in->size;
      e->must.in = word_set_new(l1);
      e->must.in->words[0] = e->must.left;
      for (i = 0, j = 0; i < l1; i++) {
        if (!strcmp(e->r->must.left, e->r->must.in->words[i]) && j == 0) {
          j = 1;
          continue;
        }
        e->must.in->words[i+1-j] = e->r->must.in->words[i];
      }
    } else if (e->l->must.right && e->r->must.is) {
      l1 = e->l->must.in->size;
      e->must.in = word_set_new(l1);
      e->must.in->words[0] = e->must.right;
      for (i = 0, j = 0; i < l1; i++) {
        if (!strcmp(e->l->must.right, e->l->must.in->words[i]) && j == 0) {
          j = 1;
          continue;
        }
        e->must.in->words[i+1-j] = e->l->must.in->words[i];
      }
    } else if (e->l->must.right && e->r->must.left) {
      l1 = strlen(e->l->must.right);
      l2 = strlen(e->r->must.left);
      str = (char *)xmalloc(sizeof(char) * (l1+l2+1), "must: concat-in");
      strcpy(str, e->l->must.right);
      strcat(str, e->r->must.left);
      l1 = e->l->must.in->size;
      l2 = e->r->must.in->size;
      e->must.in = word_set_new(l1+l2-1);
      e->must.in->words[0] = str;
      for (i = 0, j = 0; i < l1; i++) {
        if (!strcmp(e->l->must.right, e->l->must.in->words[i]) && j == 0) {
          j = 1;
          continue;
        }
        e->must.in->words[i+1-j] = e->l->must.in->words[i];
      }
      for (i = 0, j = 0; i < l2; i++) {
        if (!strcmp(e->l->must.right, e->l->must.in->words[i]) && j == 0) {
          j = 1;
          continue;
        }
        e->must.in->words[i+l1-j] = e->r->must.in->words[i];
      }
    } else {
      l1 = e->l->must.in->size;
      l2 = e->r->must.in->size;
      e->must.in = word_set_new(l1+l2);
      for (i = 0; i < l1; i++) {
        e->must.in->words[i] = e->l->must.in->words[i];
      }
      for (i = 0; i < l2; i++) {
        e->must.in->words[i+l1] = e->r->must.in->words[i];
      }
    }
    break;
  case Literal:
    str = (char *)xcalloc(sizeof(char), 2, "must: literal");
    str[0] = e->lit;
    e->must.is = e->must.left = e->must.right = str;
    e->must.in = word_set_new(1);
    e->must.in->words[0] = str;
    break;
  default:
    e->must = NOMUST;
  }
}

WORD_SET *
word_set_new (int size)
{
  if (size == 0) return (WORD_SET *)&NOWORDS;

  WORD_SET *word_set = (WORD_SET *)xmalloc(sizeof(WORD_SET), "word_set");
  word_set->size = size;
  if (size > 0) {
    word_set->words = (char **)xcalloc(sizeof(char *), size, "words");
  }
  return word_set;
}

void fill_follow(EXPR *e)
{
  switch (e->type) {
  case EOP:
    connect(e->l->last_pos, e->first_pos);
    fill_follow(e->l);
    break;
  case Concat:
    connect(e->l->last_pos, e->r->first_pos);
    fill_follow(e->r);
    fill_follow(e->l);
    break;
  case Union:
    fill_follow(e->r);
    fill_follow(e->l);
    break;
  case Star: case Plus:
    connect(e->l->last_pos, e->l->first_pos);
    fill_follow(e->l);
    break;
  case Qmark:
    fill_follow(e->l);
    break;
  default:
    break;
  }
  return;
}

POSITION_SET *
newpos()
{
  static const int init_size = 10;
  POSITION_SET *pos = (POSITION_SET *)xmalloc(sizeof(POSITION_SET) ,"position set");
  pos->npos = 0;
  pos->pos = (int *)xmalloc(sizeof(int)*init_size, "array in position set");
  pos->size = init_size;
  return pos;
}

void
connect(POSITION_SET *set1, POSITION_SET *set2)
{
  int i, j;
  EXPR **tbl = R->lit_expr;
  for (i = 0; i < set1->npos; i++) {
    for (j = 0; j < set2->npos; j++) {
      addpos(tbl[set1->pos[i]]->follow_pos,
             set2->pos[j]);
      addpos(tbl[set2->pos[j]]->before_pos,
             set1->pos[i]);
    }
  }
}

void
copypos(POSITION_SET *src, POSITION_SET *dst)
{
  int i;
  for (i = 0; i < src->npos; i++) {
    addpos(dst, src->pos[i]);
  }
}

void
addpos(POSITION_SET *set, int id)
{
  int i;

  if (id < 0 || R->maxid < id) {
    exitmsg("invalid id for addpos: %d\n", id);
  }

  /* check duplication. */
  for (i = 0; i < set->npos; i++) {
    if (set->pos[i] == id) {
      return;
    }
  }

  /* check for realloc. before adding. */
  if (set->npos >= set->size) {
    set->size *= 2;
    set->pos = (int *)xrealloc(set->pos, (set->size)*sizeof(int), "realloc addpos");
  }

  set->pos[set->npos] = id;
  set->npos++;
}

void
delete_rept(EXPR *e)
{
#ifdef DEBUG
  print_expr(R->root);
#endif
  e->l->parent = e->parent;
  if (e->parent->l == e) e->parent->l = e->l;
  else e->parent->r = e->l;

  percolate(e->parent);
  rept_compress (e->parent, e->l);

  xfree(e);
#ifdef DEBUG
  print_expr(R->root);
#endif
}

void
rept_compress(EXPR *parent, EXPR* child)
{
  switch(parent->type) {
  case Plus: case Star: case Qmark: break;
  default: return;
  }
  switch(child->type) {
  case Plus: case Star: case Qmark: break;
  default: return;
  }

  if (parent->type != child->type && parent->type != Star) {
    change_rept(parent, Star);
  }

  delete_rept(child);
}

void
percolate(EXPR *e)
{
  fill_expr(e);
  if (e->parent) percolate(e->parent);
}

void
change_rept(EXPR *e, EXPR_TYPE type)
{
#ifdef DEBUG
  print_expr(R->root);
#endif
  e->type = type;
  percolate(e);
#ifdef DEBUG
  print_expr(R->root);
#endif
}

void
delete_expr(EXPR *e)
{
  EXPR *parent = e->parent;
  EXPR *grandparent, *sibling;

  if (!parent) {
    exitmsg("INTERNAL ERROR: delete_expr abort %d\n", e->type);
  }

  switch(parent->type) {
  case Plus: case Star: case Qmark: case Union:
    delete_expr(parent);
    return;
  case Concat:
    break;
  default:
    exitmsg("INTERNAL ERROR: delete_expr abort %d\n", parent->type);
  }
#ifdef DEBUG
  print_expr(R->root);
#endif
  grandparent = parent->parent;

  if (e == parent->r) {
    sibling = parent->l;
  } else {
    sibling = parent->r;
  }

  sibling->parent = grandparent;

  if (parent == grandparent->r) {
    grandparent->r = sibling;
  } else {
    grandparent->l = sibling;
  }

  percolate(grandparent);

  parent->r = parent->l = 0;
  free_expr(e);

  rept_compress(grandparent, sibling);
#ifdef DEBUG
  print_expr(R->root);
#endif
}

BOOL
match_every_line(REGEX *r)
{
  if (r->root->min_length == 0) {
    free_expr(r->root);
    r->maxid = 1;
    r->root = newexpr(r, Literal, NL, (EXPR *)0, (EXPR *)0);
    r->root = newexpr(r, EOP, '#', r->root, (EXPR *)0);
    return TRUE;
  }
  return FALSE;
}

void
free_expr(EXPR *e)
{
  if (e == 0) return;
  switch (e->type) {
  case Charclass:
    /* Don't free the table, because it may be shared,
     * but zero the pointer to it. TODO
     */
    e->r = 0;
  default:
    xfree(e->first_pos);
    xfree(e->last_pos);
    xfree(e->follow_pos);
  }

  if (e->r) free_expr(e->r);
  if (e->l) free_expr(e->l);

  xfree(e);
}

void
print_expr(EXPR *e)
{
  switch(e->type) {
  case EOP:
    print_expr(e->l); printf("#\n"); return;
  case Plus:
    print_expr(e->l); printf("+"); return;
  case Star:
    print_expr(e->l); printf("*"); return;
  case Qmark:
    print_expr(e->l); printf("?"); return;
  case Concat:
    if (PlusStarQmark(e->parent->type)) printf("(");
    print_expr(e->l); print_expr(e->r);
    if (PlusStarQmark(e->parent->type)) printf(")");
    return;
  case Union:
    if (PlusStarQmark(e->parent->type)) printf("(");
    if (e->parent->type == Concat) printf("(");
    print_expr(e->l); printf("|"); print_expr(e->r);
    if (PlusStarQmark(e->parent->type)) printf(")");
    if (e->parent->type == Concat) printf(")");
    return;
  case Dot:
    printf("."); return;
  case Charclass:;
    int i;
    printf("[");
    for (i = 0; i < NCHAR; i++) {
      if (((UCHARP)e->r)[i]) {
        printf("%c", (UCHAR)i);
      }
    }
    printf("]"); return;
  case BegLine:
    printf("^");
    return;
  case EndLine:
    printf("$");
    return;
  case Literal:
    switch (e->lit) {
    case '\n':
      printf("\\n"); return;
    case '\v':
      printf("\\v"); return;
    case '\f':
      printf("\\f"); return;
    case '\t':
      printf("\\t"); return;
    case '\r':
      printf("\\r"); return;
    default:
      printf("%c", e->lit); return;
    }
  default:
    printf("%c", e->lit); return;
  }
}

void print_pos(EXPR *e)
{
  int i;
  printf("expr %d (lit='%c')\n", e->id, e->lit);
  printf("first_pos:\n");
  for (i = 0; i < e->first_pos->npos; i++) {
    printf("\texpr %d;\n", e->first_pos->pos[i]);
  }
  printf("last_pos:\n");
  for (i = 0; i < e->last_pos->npos; i++) {
    printf("\texpr %d;\n", e->last_pos->pos[i]);
  }
  printf("follow_pos:\n");
  for (i = 0; i < e->follow_pos->npos; i++) {
    printf("\texpr %d;\n", e->follow_pos->pos[i]);
  }
  printf("before_pos:\n");
  for (i = 0; i < e->before_pos->npos; i++) {
    printf("\texpr %d;\n", e->before_pos->pos[i]);
  }

  printf("\n");
}