view regen/src/re.h @ 11:1e0cd7fade8b

add regen
author nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date Sun, 19 Jun 2011 16:36:05 +0900
parents
children
line wrap: on
line source

#ifndef __RE_H__
#define __RE_H__ 1
#include <stdio.h>

typedef int ID;
typedef unsigned char UCHAR;
typedef unsigned char * UCHARP;
#define NCHAR 256
#define NL '\n'

typedef enum STATE_TYPE {
  START = 1,
  DEFAULT = 0,
  REJECT = -1,
  ACCEPT = -2
} STATE_TYPE;

typedef struct POSITION_SET {
  int *pos;
  int npos;
  int size;
} POSITION_SET;

typedef struct WORD_SET {
  char **words;
  int size;
}  WORD_SET;

typedef enum EXPR_TYPE {
  EOP = 0,
  Literal,
  Dot,
  Charclass,
  Concat,
  Union,
  Qmark,
  Plus,
  Star,
  Rpar,
  Lpar,
  BegLine,
  EndLine
} EXPR_TYPE;

typedef struct MUST {
  WORD_SET *in;    /* w1 '\0' w2 '\0' w3 ... */
  char *is;    /* fixed-stirng when not NULL. */
  char *left;  /* left-edge string  */
  char *right; /* right-edge string */
} MUST;

typedef struct EXPR {
  ID id;
  union {
    struct EXPR *l;
    int count;
  };
  union {
    struct EXPR *r;
    UCHARP tbl;
  };
  struct EXPR *parent;
  UCHAR lit;
  EXPR_TYPE type;
  short min_length;
  short max_length;
  POSITION_SET *first_pos;
  POSITION_SET *last_pos;
  POSITION_SET *follow_pos;
  POSITION_SET *before_pos;
  MUST must;
} EXPR;

typedef enum BOOL {
  FALSE = 0,
  TRUE = 1
} BOOL;

typedef enum DIRECTION {
  LEFT = 0,
  RIGHT = 1
} DIRECTION;

typedef struct DFA {
  int id;
  int transition[NCHAR];
  int ntransition;
  int default_transition;
  BOOL accept;
} DFA;

typedef struct REGEX {
  /* data for compiling */
  char *regex;       /* Regular expression. */
  char *regex_end;   /* Pointer to end regex. */
  char *escaped_regex;       /* Escaped Regular expression for embedding. */
  /* data for matching */
  EXPR *root;        /* Pointer to root expression. */
  unsigned maxid;   /* Number of lit expressions. */
  /* data for optimization */
  MUST must;     /* For fixed-string-filtering. Only use longest key. */
  char *filter_key;
  EXPR** lit_expr;   /* Pointer to array of lit-expresions. */
  UCHARP involve; /* Character table, which is elements of regex. */
  short ninvolve;
  int min_length;  /* Min length of matching text. */
  int max_length;  /* Max length of matching text -1 if unlimited. */
  char *must_max_word; /* best word (be used filter). */
  int must_max_length; /* Max length of must words. */
  BOOL anchored;     /* regex must be matched from begin of line. */
  BOOL table_lookup; /* enable Threaded code optimization (table lookup). */
  BOOL debug; /* print regex statistic. */
} REGEX;

typedef struct CODEGEN {
  FILE *output;
  FILE *header; /* finally, merged into output with source */
  FILE *source; /* finally, merged into output with header */
  char *genroot; /* path to regen src. defined $REGENROOT.  */
  void (*initialize)(REGEX *, struct CODEGEN *);
  void (*filter)(REGEX *, struct CODEGEN *);
  void (*predict)(REGEX *, struct CODEGEN *);
  void (*declaration)(REGEX *, struct CODEGEN *);
  void (*transition)(REGEX *, struct CODEGEN *);
  void (*finalize)(REGEX *, struct CODEGEN *);
  DFA *dfa;
  int begline_state;
  /* optinal/optimal generate rules. */
  BOOL enable_filter;
  BOOL enable_predict;
  BOOL gen_table_lookup;
} CODEGEN;

#endif