annotate regen/src/regen.c @ 11:1e0cd7fade8b

add regen
author nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date Sun, 19 Jun 2011 16:36:05 +0900
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 #include "re.h"
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2 #include "util.h"
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
3 #include "codegen.h"
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
4 #include "generator.h"
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
5 #include <unistd.h>
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
6 #include <getopt.h>
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
7
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 void initialize(REGEX *, CODEGEN*, int, char *[]);
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 extern void parse(REGEX *, CODEGEN*);
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 void compile(REGEX *, CODEGEN*);
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
11 extern void codegen(REGEX *, CODEGEN*);
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
12 void finalize(REGEX *, CODEGEN*);
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
13
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 int
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
15 main(int argc, char *argv[])
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
16 {
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
17 REGEX* r = (REGEX *)xcalloc(sizeof(REGEX), 1, "regex");
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 CODEGEN* g = (CODEGEN *)xcalloc(sizeof(CODEGEN), 1, "codegen");
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
19 initialize(r, g, argc, argv);
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
20 parse(r, g);
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
21 codegen(r, g);
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 compile(r, g);
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
23 finalize(r, g);
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 return 0;
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
25 }
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
26
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
27 void
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
28 initialize(REGEX *r, CODEGEN *g, int argc, char *argv[])
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
29 {
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
30 int opt, len;
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
31 FILE *fp;
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
32
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
33 static char buf[1024];
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
34 g->output = stdout;
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
35 g->enable_filter = TRUE;
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
36 g->enable_predict = TRUE;
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
37 g->gen_table_lookup = TRUE;
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
38 set_gen_function(r, g);
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
39
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
40 g->genroot = getenv("REGENROOT");
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
41 if (g->genroot == NULL) {
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
42 exitmsg("$REGENROOT not defined.");
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
43 }
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
44
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
45 while ((opt=getopt(argc, argv, "dlbglso:f:")) != -1) {
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
46 switch (opt) {
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
47 case 'f':
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
48 {
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
49 fp = xfopen(optarg, "r");
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
50 fgets(buf, sizeof(buf), fp);
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
51 r->regex = buf;
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
52 if (buf[strlen(buf)-1] == '\n') {
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
53 buf[strlen(buf)-1] = '\0';
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
54 }
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
55 fclose(fp);
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
56 }
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
57 break;
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
58 case 'o':
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
59 fp = xfopen(optarg, "w+");
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
60 g->output = fp;
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
61 break;
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
62 case 's':
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
63 g->gen_table_lookup = FALSE;
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
64 break;
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
65 case 'l':
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
66 set_gen_label(r, g);
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
67 break;
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
68 case 'b':
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
69 set_gen_cbc(r, g);
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
70 break;
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
71 case 'd':
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
72 set_gen_dot(r, g);
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
73 break;
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
74 case 'g':
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
75 r->debug = TRUE;
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
76 break;
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
77 default:
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
78 exit(1);
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
79 }
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
80 }
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
81
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
82 if (r->regex == NULL) {
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
83 if (optind >= argc) {
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
84 exitmsg("USAGE: regen [options] regexp\n");
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
85 }
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
86 r->regex = argv[optind];
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
87 }
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
88
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
89 len = strlen(r->regex);
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
90 r->regex_end = r->regex+len;
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
91 r->escaped_regex = escape(r->regex);
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
92 r->maxid = 1;
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
93 r->lit_expr = (EXPR **)xmalloc(len*sizeof(EXPR *), "regexp base set");
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
94 r->involve = (UCHARP)xcalloc(sizeof(UCHAR), NCHAR, "involve");
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
95 }
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
96
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
97 void
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
98 compile(REGEX *r, CODEGEN *g)
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
99 {
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
100 }
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
101
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
102 void
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
103 finalize(REGEX *r, CODEGEN *g)
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
104 {
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
105 //FIXME: this code make corrupted double-linked list. (why?)
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
106 //fclose(g->output);
1e0cd7fade8b add regen
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
107 }