Mercurial > hg > Applications > Grep
annotate regexParser/regexParser.cc @ 212:b0ae5273925c
implement allocateCCState()
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 28 Dec 2015 16:42:02 +0900 |
parents | ecf70fb215a5 |
children | a94f57af1600 |
rev | line source |
---|---|
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
1 #include <stdlib.h> |
89
50a146c05192
add NodeNumber in Regex Parser tree
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
87
diff
changeset
|
2 #include <stdio.h> |
122 | 3 #include <string.h> |
4 #include <ctype.h> | |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
5 #include "regexParser.h" |
115
ca30f8334741
rename createRegexParser.cc to regexParser.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
112
diff
changeset
|
6 |
112
ec485345daf9
some function use static
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
108
diff
changeset
|
7 static NodePtr charClass(RegexInfoPtr); |
ec485345daf9
some function use static
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
108
diff
changeset
|
8 static void token(RegexInfoPtr); |
ec485345daf9
some function use static
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
108
diff
changeset
|
9 static NodePtr regexAtom(RegexInfoPtr); |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
10 |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
11 /** |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
12 * Create a node of regex parse tree. |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
13 * tokenType |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
14 * regexPosition(state) |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
15 * stateTransitionTable |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
16 */ |
118 | 17 static |
116
66c633575b53
remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
115
diff
changeset
|
18 NodePtr allocateNode() { |
129
b930be74a16e
remove word.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
128
diff
changeset
|
19 NodePtr n = NEW(Node); |
125 | 20 n->cc = NULL; |
21 n->left = NULL; | |
22 n->right = NULL; | |
192
ecf70fb215a5
print charclass
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
190
diff
changeset
|
23 n->stateNum = 0; |
ecf70fb215a5
print charclass
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
190
diff
changeset
|
24 n->nextStateNum = 0; |
116
66c633575b53
remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
115
diff
changeset
|
25 return n; |
66c633575b53
remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
115
diff
changeset
|
26 } |
66c633575b53
remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
115
diff
changeset
|
27 |
187
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
185
diff
changeset
|
28 NodePtr createNode(RegexInfoPtr ri,unsigned char type,CharClassPtr cc, NodePtr left, NodePtr right) { |
116
66c633575b53
remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
115
diff
changeset
|
29 NodePtr n = allocateNode(); |
125 | 30 n->tokenType = type; |
134 | 31 n->cc = cc; |
183
7ae0a3070647
implement generateTransitionList
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
180
diff
changeset
|
32 n->state = NULL; |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
33 n->left = left; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
34 n->right = right; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
35 return n; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
36 } |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
37 |
144
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
143
diff
changeset
|
38 CharClassPtr createCharClassWord(RegexInfoPtr ri) { |
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
143
diff
changeset
|
39 CharClassPtr cc = NEW(CharClass); |
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
143
diff
changeset
|
40 cc->type = 'a'; |
192
ecf70fb215a5
print charclass
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
190
diff
changeset
|
41 cc->left = NULL; |
ecf70fb215a5
print charclass
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
190
diff
changeset
|
42 cc->right = NULL; |
144
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
143
diff
changeset
|
43 cc->cond.w.word = ri->tokenValue; |
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
143
diff
changeset
|
44 cc->cond.w.length = ri->ptr - ri->tokenValue; |
178
5e8c6857934c
implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
175
diff
changeset
|
45 cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue; |
192
ecf70fb215a5
print charclass
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
190
diff
changeset
|
46 cc->cond.range.next = NULL; |
144
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
143
diff
changeset
|
47 return cc; |
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
143
diff
changeset
|
48 } |
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
143
diff
changeset
|
49 |
149
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
50 /* |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
51 cond.range.begin cond.range.end |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
52 |----------------| |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
53 1.b---e |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
54 2.b------e |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
55 3.b------------e |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
56 4.b-----------------------e |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
57 5.b----------------------------e |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
58 |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
59 |----------------| |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
60 6. b---------e |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
61 7. b----------------e |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
62 8. b---------------------e |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
63 |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
64 |----------------| |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
65 9. b-----e |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
66 10. b--------e |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
67 11. b-------------e |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
68 |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
69 |----------------| |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
70 12. b-----e |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
71 |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
72 |----------------| |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
73 13. b--e |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
74 |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
75 */ |
180
d97bcab546e8
implement getNext
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
179
diff
changeset
|
76 CharClassPtr insertCharClass(CharClassPtr cc, unsigned long begin, unsigned long end) { |
178
5e8c6857934c
implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
175
diff
changeset
|
77 if (cc == NULL) { |
188
109d22faf7b5
remove errors and warnings
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
187
diff
changeset
|
78 createCharClassRange(begin,end,0,0,0); |
178
5e8c6857934c
implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
175
diff
changeset
|
79 } |
149
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
80 if (end < cc->cond.range.begin ) { // 1 |
147 | 81 if (cc->left) { |
180
d97bcab546e8
implement getNext
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
179
diff
changeset
|
82 cc->left = insertCharClass(cc->left,begin,end); |
147 | 83 } else { |
188
109d22faf7b5
remove errors and warnings
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
187
diff
changeset
|
84 cc->left = createCharClassRange(begin,end,0,0,0); |
147 | 85 } |
152 | 86 return cc; |
149
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
87 } else if (end == cc->cond.range.begin ) { // 2 |
147 | 88 cc->cond.range.begin = begin; |
150 | 89 return cc; |
149
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
90 } else if (end <= cc->cond.range.end) { // 3,4,6,7,9,10 |
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
91 if (begin < cc->cond.range.begin) { // 3,4 |
147 | 92 cc->cond.range.begin = begin; |
93 } | |
150 | 94 return cc; |
149
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
95 } else if (begin > cc->cond.range.end ) { // 13 |
147 | 96 if (cc->right) { |
180
d97bcab546e8
implement getNext
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
179
diff
changeset
|
97 cc->right = insertCharClass(cc->right,begin,end); |
147 | 98 } else { |
188
109d22faf7b5
remove errors and warnings
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
187
diff
changeset
|
99 cc->right = createCharClassRange(begin,end,0,0,0); |
147 | 100 } |
151 | 101 return cc; |
150 | 102 } |
103 if (cc->right) { | |
151 | 104 CharClassPtr right = cc->right; |
105 begin = cc->cond.range.begin; | |
106 free(cc); | |
180
d97bcab546e8
implement getNext
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
179
diff
changeset
|
107 return insertCharClass(right,begin,end); |
150 | 108 } |
109 if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { // 12 | |
151 | 110 if (end > cc->cond.range.end) cc->cond.range.end = end; // 11,8 |
149
f1880f25fabf
add insertCharClass images
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
148
diff
changeset
|
111 } else if (begin < cc->cond.range.begin) { // 5 |
147 | 112 cc->cond.range.begin = begin; |
150 | 113 cc->cond.range.end = end; |
114 } else { | |
115 printf("insertCharClass Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); | |
147 | 116 } |
117 return cc; | |
142 | 118 } |
125 | 119 |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
120 // <charClass> ::= '['<literal>'-'<literal>']' |
112
ec485345daf9
some function use static
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
108
diff
changeset
|
121 static |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
122 NodePtr charClass(RegexInfoPtr ri) { |
125 | 123 CharClassPtr cc = NEW(CharClass); |
135 | 124 NodePtr n = createNode(ri,'c',cc,0,0); |
126 | 125 cc->type = 'r'; |
142 | 126 cc->nextState.bitContainer = 0; |
148
d1ebba6e117a
add test routing
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
147
diff
changeset
|
127 cc->left = NULL; |
d1ebba6e117a
add test routing
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
147
diff
changeset
|
128 cc->right = NULL; |
212
b0ae5273925c
implement allocateCCState()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
192
diff
changeset
|
129 cc->stateNum = 0; |
b0ae5273925c
implement allocateCCState()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
192
diff
changeset
|
130 cc->state = NULL; |
142 | 131 RangeListPtr rangeList = &cc->cond.range; |
147 | 132 rangeList->begin = *ri->ptr; |
133 rangeList->end = *ri->ptr; | |
145
50217a0545e8
fix charClass()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
144
diff
changeset
|
134 rangeList->next = NULL; |
130
7925e9abb078
add or flag
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
129
diff
changeset
|
135 |
135 | 136 for (ri->ptr++; *ri->ptr && *ri->ptr != ']'; ri->ptr++) { |
137 if (*ri->ptr == '-') { | |
147 | 138 rangeList->end = *(ri->ptr + 1); |
135 | 139 ri->ptr++; |
140 continue; | |
141 } | |
142 if (ri->ptr[0] == 0 || ri->ptr[0] == ']') break; | |
126 | 143 rangeList->next = NEW(RangeList); |
125 | 144 rangeList = rangeList->next; |
147 | 145 rangeList->begin = *ri->ptr; |
146 rangeList->end = *ri->ptr; | |
125 | 147 rangeList->next = NULL; |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
148 } |
147 | 149 |
150 | 150 RangeListPtr r = cc->cond.range.next; |
151 cc->cond.range.next = 0; | |
152 for (; r; r = r->next) { | |
180
d97bcab546e8
implement getNext
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
179
diff
changeset
|
153 cc = insertCharClass(cc, r->begin, r->end); |
147 | 154 } |
192
ecf70fb215a5
print charclass
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
190
diff
changeset
|
155 cc->cond.range.next = 0; |
142 | 156 // TODO literal support |
157 // merge rangeList here | |
135 | 158 if (*ri->ptr) ri->ptr++; |
159 token(ri); | |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
160 return n; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
161 } |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
162 |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
163 // <literal> ::= [a-z][A-Z][0-9] |
112
ec485345daf9
some function use static
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
108
diff
changeset
|
164 static |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
165 NodePtr literal(RegexInfoPtr ri) { |
134 | 166 CharClassPtr cc = createCharClassWord(ri); |
144
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
143
diff
changeset
|
167 token(ri); |
134 | 168 NodePtr n = createNode(ri,'a',cc,0,0); |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
169 return n; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
170 } |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
171 |
133
ccc673449351
Look ahead '*'
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
132
diff
changeset
|
172 static |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
173 void token(RegexInfoPtr ri) { |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
174 while (ri->ptr[0] != '\0') { |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
175 if (ri->ptr[0] == '('){ |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
176 ri->ptr++; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
177 ri->tokenType = '('; |
118 | 178 ri->tokenValue = NULL; |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
179 return; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
180 } else if (ri->ptr[0] == ')') { |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
181 ri->ptr++; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
182 ri->tokenType = ')'; |
118 | 183 ri->tokenValue = ri->ptr; |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
184 return; |
135 | 185 } else if (ri->ptr[0] == ']') { |
186 ri->ptr++; | |
187 ri->tokenType = ']'; | |
188 ri->tokenValue = ri->ptr; | |
189 return; | |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
190 } else if (ri->ptr[0] == '|'){ |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
191 ri->ptr++; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
192 ri->tokenType = '|'; |
118 | 193 ri->tokenValue = NULL; |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
194 return; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
195 } else if (ri->ptr[0] == '*'){ |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
196 ri->ptr++; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
197 ri->tokenType = '*'; |
118 | 198 ri->tokenValue = NULL; |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
199 return; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
200 } else if (ri->ptr[0] == '\\'){ |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
201 // need more proccesing |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
202 /* |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
203 \277 |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
204 \0xa5 |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
205 \[ |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
206 \\ |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
207 \utf-8 etc... |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
208 */ |
142 | 209 } else if (ri->ptr[0] == '[') { |
210 ri->ptr++; | |
211 ri->tokenType = 'c'; | |
212 ri->tokenValue = ri->ptr; | |
213 return; | |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
214 } else { |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
215 ri->tokenType = 'a'; |
118 | 216 ri->tokenValue = ri->ptr; |
134 | 217 if (isalnum(ri->ptr[0])) { |
122 | 218 ri->ptr++; |
219 } | |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
220 return; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
221 } |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
222 } |
134 | 223 ri->tokenType = 0; |
224 ri->tokenValue = NULL; | |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
225 return; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
226 } |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
227 |
130
7925e9abb078
add or flag
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
129
diff
changeset
|
228 // <regexAtom> ::= <literal>|<charClass>|<group> |
112
ec485345daf9
some function use static
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
108
diff
changeset
|
229 static |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
230 NodePtr regexAtom(RegexInfoPtr ri) { |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
231 |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
232 NodePtr n = NULL; |
124 | 233 if (ri->tokenType == 'c') n = charClass(ri); |
134 | 234 else if (ri->tokenType == 'a') n = literal(ri); |
235 else if (ri->tokenType == '(') { | |
236 n = regex(ri); | |
237 if (ri->tokenType != ')') { | |
238 // error | |
239 } | |
240 token(ri); | |
241 } | |
242 if (ri->tokenType == '*') { | |
243 n = createNode(ri,'*',0,n,0); | |
244 token(ri); | |
245 } | |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
246 return n; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
247 } |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
248 |
133
ccc673449351
Look ahead '*'
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
132
diff
changeset
|
249 // <regex> ::= <regexAtom> | <regexAtom>'*'<regex> | <regexAtom>'|'<regex> | <regexAtom><regexAtom>'*' | <regexAtom><regex> |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
250 NodePtr regex(RegexInfoPtr ri) { |
134 | 251 token(ri); |
128 | 252 NodePtr n = regexAtom(ri); |
134 | 253 while (ri->tokenType) { |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
254 if (ri->tokenType == '*') { |
134 | 255 n = createNode(ri,'*',0,n,0); |
256 token(ri); | |
257 return n; | |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
258 } else if (ri->tokenType == '|') { |
134 | 259 n = createNode(ri,'|',0,n,0); |
133
ccc673449351
Look ahead '*'
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
132
diff
changeset
|
260 NodePtr n1 = regex(ri); |
134 | 261 n->right = n1; |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
262 } else if (ri->tokenType == ')') { |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
263 return n; |
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
264 } else { |
134 | 265 n = createNode(ri,'+',0,n,0); |
180
d97bcab546e8
implement getNext
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
179
diff
changeset
|
266 NodePtr n1 = regexAtom(ri); |
d97bcab546e8
implement getNext
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
179
diff
changeset
|
267 n->right = n1; |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
268 } |
134 | 269 } |
270 return n; | |
82
1d9bbf922bb6
add createRegexTree.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
271 } |