Mercurial > hg > Members > masakoha > testcode
comparison c/regexParser/main.cc @ 80:0a452d69f0e2
remove global variable in main.cc
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 07 Oct 2015 16:08:34 +0900 |
parents | 52da06c3f050 |
children | 27883946b2dc |
comparison
equal
deleted
inserted
replaced
79:52da06c3f050 | 80:0a452d69f0e2 |
---|---|
9 #include <stdio.h> | 9 #include <stdio.h> |
10 #include <stdlib.h> | 10 #include <stdlib.h> |
11 #include <string.h> | 11 #include <string.h> |
12 #include "regexParser.h" | 12 #include "regexParser.h" |
13 | 13 |
14 unsigned char *ptr; | 14 typedef struct regexInfo { |
15 unsigned char tokenType; | 15 unsigned char *ptr; |
16 int tokenValue; | 16 unsigned char tokenType; |
17 int tokenValue; | |
18 } RegexInfo, *RegexInfoPtr; | |
17 | 19 |
18 NodePtr charClass(); | 20 NodePtr charClass(); |
19 NodePtr group(); | 21 NodePtr group(); |
20 NodePtr regex(); | 22 NodePtr regex(RegexInfoPtr); |
21 NodePtr createNode(unsigned char,NodePtr,NodePtr); | 23 NodePtr createNode(unsigned char,NodePtr,NodePtr); |
22 void token(); | 24 void token(); |
23 NodePtr regexAtom(); | 25 NodePtr regexAtom(); |
24 extern void printTree(NodePtr); | 26 extern void printTree(NodePtr); |
25 | |
26 | |
27 bool isLiteral(char c) { | |
28 if (*ptr > 0x7f) return true; | |
29 else if (*ptr == '(') return false; | |
30 else if (*ptr == '[') return false; | |
31 else if (*ptr == '|') return false; | |
32 else if (*ptr == '*') return false; | |
33 return true; | |
34 } | |
35 | 27 |
36 /** | 28 /** |
37 * Create a node of regex parse tree. | 29 * Create a node of regex parse tree. |
38 * tokenType | 30 * tokenType |
39 * regexPosition(state) | 31 * regexPosition(state) |
47 n->right = right; | 39 n->right = right; |
48 return n; | 40 return n; |
49 } | 41 } |
50 | 42 |
51 // <charClass> ::= '['<literal>'-'<literal>']' | 43 // <charClass> ::= '['<literal>'-'<literal>']' |
52 NodePtr charClass() { | 44 NodePtr charClass(RegexInfoPtr ri) { |
53 NodePtr n = (NodePtr)malloc(sizeof(Node)); | 45 NodePtr n = (NodePtr)malloc(sizeof(Node)); |
54 unsigned char startChar = *ptr; | 46 unsigned char startChar = ri->ptr[0]; |
55 while (*ptr == '-') { | 47 while (ri->ptr[0] == '-') { |
56 ptr++; | 48 ri->ptr++; |
57 } | 49 } |
58 unsigned char endChar = *ptr; | 50 unsigned char endChar = ri->ptr[0]; |
59 unsigned char *charTable = (unsigned char*)malloc(sizeof(char)*256); | 51 unsigned char *charTable = (unsigned char*)malloc(sizeof(char)*256); |
60 | 52 |
61 return n; | 53 return n; |
62 } | 54 } |
63 | 55 |
64 // <literal> ::= [a-z][A-Z][0-9] | 56 // <literal> ::= [a-z][A-Z][0-9] |
65 NodePtr literal() { | 57 NodePtr literal(RegexInfoPtr ri) { |
66 NodePtr n = createNode(*ptr,0,0); | 58 NodePtr n = createNode(ri->ptr[0],0,0); |
67 ptr++; | 59 ri->ptr++; |
68 return n; | 60 return n; |
69 } | 61 } |
70 | 62 |
71 // <group> ::= '('<regex>')' | 63 // <group> ::= '('<regex>')' |
72 NodePtr group() { | 64 NodePtr group(RegexInfoPtr ri) { |
73 return regex(); | 65 return regex(ri); |
74 } | 66 } |
75 | 67 |
76 | 68 |
77 | 69 |
78 void token() { | 70 void token(RegexInfoPtr ri) { |
79 while (*ptr != '\0') { | 71 while (ri->ptr[0] != '\0') { |
80 if (*ptr == '('){ | 72 if (ri->ptr[0] == '('){ |
81 ptr++; | 73 ri->ptr++; |
82 tokenType = '('; | 74 ri->tokenType = '('; |
83 tokenValue = 0; | 75 ri->tokenValue = 0; |
84 if (ptr[1] == ')') { | 76 if (ri->ptr[1] == ')') { |
85 ptr++; | 77 ri->ptr++; |
86 } | 78 } |
87 return; | 79 return; |
88 } else if (*ptr == ')') { | 80 } else if (ri->ptr[0] == ')') { |
89 ptr++; | 81 ri->ptr++; |
90 tokenType = ')'; | 82 ri->tokenType = ')'; |
91 tokenValue = *ptr; | 83 ri->tokenValue = ri->ptr[0]; |
92 return; | 84 return; |
93 } else if (*ptr == '[') { | 85 } else if (ri->ptr[0] == '[') { |
94 ptr++; | 86 ri->ptr++; |
95 tokenType = '['; | 87 ri->tokenType = '['; |
96 tokenValue = *ptr; | 88 ri->tokenValue = ri->ptr[0]; |
97 if (ptr[1] == ']') { | 89 if (ri->ptr[1] == ']') { |
98 ptr++; | 90 ri->ptr++; |
99 } | 91 } |
100 return; | 92 return; |
101 } else if (*ptr == '|'){ | 93 } else if (ri->ptr[0] == '|'){ |
102 ptr++; | 94 ri->ptr++; |
103 tokenType = '|'; | 95 ri->tokenType = '|'; |
104 tokenValue = 0; | 96 ri->tokenValue = 0; |
105 return; | 97 return; |
106 } else if (*ptr == '*'){ | 98 } else if (ri->ptr[0] == '*'){ |
107 ptr++; | 99 ri->ptr++; |
108 tokenType = '*'; | 100 ri->tokenType = '*'; |
109 tokenValue = 0; | 101 ri->tokenValue = 0; |
110 return; | 102 return; |
111 } else if (*ptr == '\\'){ | 103 } else if (ri->ptr[0] == '\\'){ |
112 // need more proccesing | 104 // need more proccesing |
113 /* | 105 /* |
114 \277 | 106 \277 |
115 \0xa5 | 107 \0xa5 |
116 \[ | 108 \[ |
117 \\ | 109 \\ |
118 \utf-8 etc... | 110 \utf-8 etc... |
119 */ | 111 */ |
120 } else { | 112 } else { |
121 tokenType = 'a'; | 113 ri->tokenType = 'a'; |
122 tokenValue = *ptr; | 114 ri->tokenValue = ri->ptr[0]; |
123 return; | 115 return; |
124 } | 116 } |
125 } | 117 } |
126 | 118 |
127 tokenType = 0; | 119 ri->tokenType = 0; |
128 tokenValue = 0; | 120 ri->tokenValue = 0; |
129 return; | 121 return; |
130 } | 122 } |
131 | 123 |
132 // <regexAtom> ::= <literal>|<charClass>|<group> | 124 // <regexAtom> ::= <literal>|<charClass>|<group> |
133 NodePtr regexAtom() { | 125 NodePtr regexAtom(RegexInfoPtr ri) { |
134 | 126 |
135 token(); | 127 token(ri); |
136 NodePtr n = NULL; | 128 NodePtr n = NULL; |
137 if (tokenType == 'a') n = literal(); | 129 if (ri->tokenType == 'a') n = literal(ri); |
138 else if (tokenType == '[') n = charClass(); | 130 else if (ri->tokenType == '[') n = charClass(ri); |
139 else if (tokenType == '(') n = group(); | 131 else if (ri->tokenType == '(') n = group(ri); |
140 | 132 |
141 return n; | 133 return n; |
142 } | 134 } |
143 | 135 |
144 // <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex> | 136 // <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex> |
145 NodePtr regex() { | 137 NodePtr regex(RegexInfoPtr ri) { |
146 NodePtr n = regexAtom(); | 138 NodePtr n = regexAtom(ri); |
147 while (*ptr) { | 139 while (ri->ptr[0]) { |
148 token(); | 140 token(ri); |
149 if (tokenType == '*') { | 141 if (ri->tokenType == '*') { |
150 n = createNode('*',n,0); | 142 n = createNode('*',n,0); |
151 } else if (tokenType == '|') { | 143 } else if (ri->tokenType == '|') { |
152 NodePtr n1 = regex(); | 144 NodePtr n1 = regex(ri); |
153 n = createNode('|',n,n1); | 145 n = createNode('|',n,n1); |
154 } else if (tokenType == ')') { | 146 } else if (ri->tokenType == ')') { |
155 return n; | 147 return n; |
156 } else { | 148 } else { |
157 NodePtr n1 = regex(); | 149 NodePtr n1 = regex(ri); |
158 n = createNode('+',n,n1); | 150 n = createNode('+',n,n1); |
159 } | 151 } |
160 } return n; | 152 } return n; |
161 } | 153 } |
162 | 154 |
163 | 155 |
164 int main(int argc, char **argv) | 156 int main(int argc, char **argv) |
165 { | 157 { |
158 RegexInfoPtr ri = (RegexInfoPtr)malloc(sizeof(RegexInfo)); | |
159 | |
166 for (int i = 1; i < argc; i++) { | 160 for (int i = 1; i < argc; i++) { |
167 if (strcmp(argv[i],"-regex") == 0) { | 161 if (strcmp(argv[i],"-regex") == 0) { |
168 ptr = (unsigned char*)argv[i+1]; i++; | 162 ri->ptr = (unsigned char*)argv[i+1]; i++; |
169 } | 163 } |
170 } | 164 } |
171 | 165 |
172 printf("regex : %s\n",ptr); | 166 printf("regex : %s\n",ri->ptr); |
173 NodePtr n = regex(); | 167 NodePtr n = regex(ri); |
174 printTree(n); | 168 printTree(n); |
175 return 0; | 169 return 0; |
176 } | 170 } |