Mercurial > hg > Applications > Grep
annotate c/regexParser/main.cc @ 63:8fd3d35e9861
add token function
author | masa |
---|---|
date | Thu, 23 Jul 2015 18:01:02 +0900 |
parents | a49b4a8b8c14 |
children | e0ad6c145f89 |
rev | line source |
---|---|
55 | 1 /* |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
2 * <literal> ::= [a-z][A-Z][0-9] |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
3 * <charClass> ::= '['<literal>'-'<literal>']' |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
4 * <string> ::= <literal><literal>* |
58
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
5 * <group> ::= '('<regex>')' |
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
6 * <or> ::= <regex>'|'<regex> |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
7 * <*> ::= <regex>'*' |
58
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
8 * <regex> ::= <string>|<or>|<charClass>|<group>|<*> |
55 | 9 */ |
10 | |
45 | 11 #include <stdio.h> |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
12 #include <stdlib.h> |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
13 #include <string.h> |
57
71b497d25273
fix literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
56
diff
changeset
|
14 |
63 | 15 typedef struct charClass { |
16 unsigned char table[256]; | |
17 struct utf8Range { | |
18 unsigned char *begin; | |
19 unsigned char *end; | |
20 struct utf8Range next; | |
21 } *rangeList; | |
22 } | |
23 | |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
24 typedef struct node { |
63 | 25 unsigned char type; |
26 union value { | |
27 charClass *cc; | |
28 unsigned char *string; | |
29 } | |
58
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
30 struct node *self; |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
31 struct node *left; |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
32 struct node *right; |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
33 } Node, *NodePtr; |
52
a2826bf4e80a
remove magic number
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
51
diff
changeset
|
34 |
58
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
35 char *ptr; |
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
36 NodePtr regexHeadNode; |
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
37 |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
38 NodePtr charClass(); |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
39 NodePtr string(); |
58
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
40 NodePtr group(); |
63 | 41 NodePtr orexp(); |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
42 NodePtr asterisk(); |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
43 NodePtr regex(); |
57
71b497d25273
fix literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
56
diff
changeset
|
44 NodePtr createNode(char,NodePtr,NodePtr); |
55 | 45 |
62
a49b4a8b8c14
implement isLiteral
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
61
diff
changeset
|
46 bool isLiteral(char c) { |
63 | 47 if (*ptr > 0x7f) return true; |
48 else if (*ptr == '(') return false; | |
49 else if (*ptr == '[') return false; | |
50 else if (*ptr == '|') return false; | |
51 else if (*ptr == '*') return false; | |
52 return true; | |
62
a49b4a8b8c14
implement isLiteral
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
61
diff
changeset
|
53 } |
a49b4a8b8c14
implement isLiteral
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
61
diff
changeset
|
54 |
60
8616a045a7f4
impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
59
diff
changeset
|
55 void printNodeDate(NodePtr n) { |
8616a045a7f4
impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
59
diff
changeset
|
56 puts("---------------------"); |
8616a045a7f4
impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
59
diff
changeset
|
57 printf("Self Node char : %c\n", n->character); |
8616a045a7f4
impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
59
diff
changeset
|
58 printf("Self Node addr : %p\n", n->self); |
8616a045a7f4
impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
59
diff
changeset
|
59 printf("left Node addr : %p\n", n->left); |
8616a045a7f4
impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
59
diff
changeset
|
60 printf("right Node addr : %p\n", n->right); |
8616a045a7f4
impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
59
diff
changeset
|
61 puts("---------------------"); |
8616a045a7f4
impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
59
diff
changeset
|
62 puts(""); |
8616a045a7f4
impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
59
diff
changeset
|
63 } |
8616a045a7f4
impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
59
diff
changeset
|
64 |
57
71b497d25273
fix literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
56
diff
changeset
|
65 NodePtr createNode(char character, NodePtr left, NodePtr right) { |
71b497d25273
fix literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
56
diff
changeset
|
66 NodePtr n = (NodePtr)malloc(sizeof(Node)); |
58
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
67 n->self = n; |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
68 n->character = character; |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
69 n->left = left; |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
70 n->right = right; |
58
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
71 |
60
8616a045a7f4
impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
59
diff
changeset
|
72 printNodeDate(n); |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
73 return n; |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
74 } |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
75 |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
76 // <charClass> ::= '['<literal>'-'<literal>']' |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
77 NodePtr charClass() { |
62
a49b4a8b8c14
implement isLiteral
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
61
diff
changeset
|
78 ptr++; |
a49b4a8b8c14
implement isLiteral
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
61
diff
changeset
|
79 NodePtr n = (NodePtr)malloc(sizeof(Node)); |
a49b4a8b8c14
implement isLiteral
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
61
diff
changeset
|
80 return n; |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
81 } |
55 | 82 |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
83 // <literal> ::= [a-z][A-Z][0-9] |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
84 NodePtr literal() { |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
85 char c = *ptr; |
57
71b497d25273
fix literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
56
diff
changeset
|
86 NodePtr n = createNode(c,0,0); |
71b497d25273
fix literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
56
diff
changeset
|
87 ptr++; |
71b497d25273
fix literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
56
diff
changeset
|
88 return n; |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
89 } |
55 | 90 |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
91 // <string> ::= <literal><literal>* |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
92 NodePtr string() { |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
93 char c = *ptr; |
58
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
94 NodePtr n = NULL; |
61
67cade0e35b0
impl isLiteral
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
60
diff
changeset
|
95 if (isLiteral(c)) { |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
96 n = createNode(0,literal(),string()); |
55 | 97 } else { |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
98 n = createNode(0,0,0); |
55 | 99 } |
58
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
100 return n; |
55 | 101 } |
102 | |
63 | 103 // <group> ::= '('<regex>')' |
58
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
104 NodePtr group() { |
63 | 105 token(); |
106 NodePtr n = regex(); | |
107 token(); | |
58
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
108 if (*ptr == ')') { |
63 | 109 n = createNode('(',n,0); |
58
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
110 } else { |
63 | 111 // ) reqiured |
58
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
112 } |
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
113 return n; |
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
114 } |
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
115 |
63 | 116 // <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex> |
117 NodePtr regex() { | |
118 NodePtr n = regexAtom(); | |
119 while (*ptr) { | |
120 token(); | |
121 if (tokenType == '*') { | |
122 n = createNode('*',n,0); | |
123 } else if (tokenType == '|') { | |
124 NodePtr n1 = regex(); | |
125 n = createNode('|',n,n1); | |
126 } else { | |
127 NodePtr n1 = regex(); | |
128 n = createNode('+',n,n1); | |
129 } | |
130 } | |
55 | 131 } |
132 | |
63 | 133 // <regexAtom> ::= <literal>|<charClass>|<group> |
134 NodePtr regexAtom() { | |
135 | |
136 token(); | |
137 NodePter n; | |
138 if (tokenType == 'a') n = literal(); | |
139 else if (tokenType == '[') n = charClass(); | |
140 else if (tokenType == '(') n = group(); | |
141 | |
60
8616a045a7f4
impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
59
diff
changeset
|
142 return n; |
55 | 143 } |
144 | |
63 | 145 void token() { |
58
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
146 while (*ptr != '\0') { |
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
147 if ((*ptr == '(') || (*ptr == ')')) { |
63 | 148 tokenType = *ptr++; |
149 tokenValue = 0; | |
150 return ; | |
58
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
151 } else if (*ptr == '[') { |
63 | 152 tokenType = '['; |
153 tokenValue = ptr; | |
154 if (ptr[1] == ']') { | |
155 ptr++; | |
156 } | |
157 while (*ptr != ']') ptr++; | |
158 ptr++; | |
159 return; | |
58
4053c3e0fa7f
implement group()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
57
diff
changeset
|
160 } else if (*ptr == '|'){ |
63 | 161 tokenType = '|'; |
162 tokenValue = 0; | |
163 return; | |
60
8616a045a7f4
impl asterisk
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
59
diff
changeset
|
164 } else if (*ptr == '*'){ |
63 | 165 tokenType = '*'; |
166 tokenValue = 0; | |
167 return; | |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
168 } |
63 | 169 |
170 tokenType = 'a'; | |
171 tokenValue = ptr; | |
172 | |
173 if (*ptr == '\\') ptr++; // need more proccesing | |
174 /* | |
175 \277 | |
176 \0xa5 | |
177 \[ | |
178 \\ | |
179 \utf-8 etc... | |
180 */ | |
181 | |
55 | 182 } |
183 } | |
184 | |
62
a49b4a8b8c14
implement isLiteral
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
61
diff
changeset
|
185 int main(int argc, char **argv) |
55 | 186 { |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
187 for (int i = 1; i < argc; i++) { |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
188 if (strcmp(argv[i],"-regex") == 0) { |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
189 ptr = argv[i+1]; i++; |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
190 } |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
191 } |
55 | 192 |
56
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
193 printf("regex : %s\n",ptr); |
8901bc071d33
implement string() and literal()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
194 NodePtr n = regex(); |
45 | 195 return 0; |
196 } |