Mercurial > hg > Applications > Grep
annotate regexParser/regexParser.h @ 220:4dc8d327cc7d
subset construction worked
how to handle EOF?
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Sat, 02 Jan 2016 00:01:22 +0900 |
parents | 63e9224c7b2b |
children | 1a34e702776a |
rev | line source |
---|---|
126 | 1 #define NEW(type) (type*)malloc(sizeof(type)) |
2 | |
188
109d22faf7b5
remove errors and warnings
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
187
diff
changeset
|
3 #ifndef INCLUDED_STRUCT |
109d22faf7b5
remove errors and warnings
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
187
diff
changeset
|
4 #define INCLUDED_STRUCT |
191
02031fb73af8
remove somefiles and fix header files
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
190
diff
changeset
|
5 |
02031fb73af8
remove somefiles and fix header files
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
190
diff
changeset
|
6 #define BITBLOCK 64 |
144
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
142
diff
changeset
|
7 typedef struct bitVector { |
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
142
diff
changeset
|
8 unsigned long bitContainer; |
191
02031fb73af8
remove somefiles and fix header files
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
190
diff
changeset
|
9 } BitVector,*BitVectorPtr; |
144
d8a4922eceae
remove some errors (not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
142
diff
changeset
|
10 |
129
b930be74a16e
remove word.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
126
diff
changeset
|
11 typedef struct word { |
b930be74a16e
remove word.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
126
diff
changeset
|
12 unsigned char *word; |
b930be74a16e
remove word.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
126
diff
changeset
|
13 int length; |
b930be74a16e
remove word.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
126
diff
changeset
|
14 } Word, *WordPtr; |
b930be74a16e
remove word.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
126
diff
changeset
|
15 |
126 | 16 typedef struct utf8Range { |
142 | 17 unsigned long begin; |
18 unsigned long end; | |
19 struct utf8Range *next; // only used in the parser. | |
126 | 20 } RangeList , *RangeListPtr; |
21 | |
178
5e8c6857934c
implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
175
diff
changeset
|
22 typedef struct condition { |
142 | 23 RangeList range; |
24 Word w; | |
116
66c633575b53
remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
115
diff
changeset
|
25 } Condition, *ConditionList; |
66c633575b53
remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
115
diff
changeset
|
26 |
77
7f53a587bf97
add regexParser.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
27 typedef struct charClass { |
111 | 28 unsigned char type; |
115
ca30f8334741
rename createRegexParser.cc to regexParser.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
111
diff
changeset
|
29 struct charClass *left; |
ca30f8334741
rename createRegexParser.cc to regexParser.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
111
diff
changeset
|
30 struct charClass *right; |
142 | 31 Condition cond; |
212
b0ae5273925c
implement allocateCCState()
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
203
diff
changeset
|
32 int stateNum; |
142 | 33 BitVector nextState; |
77
7f53a587bf97
add regexParser.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
34 } CharClass, *CharClassPtr; |
7f53a587bf97
add regexParser.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
35 |
190
3e8e5780ad4a
change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
188
diff
changeset
|
36 struct node; |
3e8e5780ad4a
change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
188
diff
changeset
|
37 |
3e8e5780ad4a
change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
188
diff
changeset
|
38 typedef struct state { |
192
ecf70fb215a5
print charclass
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
191
diff
changeset
|
39 int stateNum; |
190
3e8e5780ad4a
change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
188
diff
changeset
|
40 BitVector bitState; |
3e8e5780ad4a
change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
188
diff
changeset
|
41 CharClassPtr cc; |
3e8e5780ad4a
change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
188
diff
changeset
|
42 struct node *node; |
3e8e5780ad4a
change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
188
diff
changeset
|
43 struct state *next; |
3e8e5780ad4a
change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
188
diff
changeset
|
44 } State, *StatePtr; |
3e8e5780ad4a
change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
188
diff
changeset
|
45 |
77
7f53a587bf97
add regexParser.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
46 typedef struct node { |
89
50a146c05192
add NodeNumber in Regex Parser tree
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
85
diff
changeset
|
47 unsigned char tokenType; |
111 | 48 CharClassPtr cc; |
183
7ae0a3070647
implement generateTransitionList
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
180
diff
changeset
|
49 int stateNum; |
7ae0a3070647
implement generateTransitionList
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
180
diff
changeset
|
50 int nextStateNum; |
7ae0a3070647
implement generateTransitionList
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
180
diff
changeset
|
51 StatePtr state; |
195
4fefd80c05f2
change variable name (TGValue tg -> TGValue tgv)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
192
diff
changeset
|
52 StatePtr nextState; |
77
7f53a587bf97
add regexParser.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
53 struct node *left; |
7f53a587bf97
add regexParser.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
54 struct node *right; |
7f53a587bf97
add regexParser.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
55 } Node, *NodePtr; |
116
66c633575b53
remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
115
diff
changeset
|
56 |
187
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
57 typedef struct stateStack { |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
58 BitVector state; |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
59 struct stateStack *next; |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
60 } StateStack, *StateStackPtr; |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
61 |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
62 typedef struct transitionGenerator { |
203
e809a2dd0731
add scValue
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
201
diff
changeset
|
63 long totalStateCount; |
187
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
64 StateStackPtr stack; |
192
ecf70fb215a5
print charclass
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
191
diff
changeset
|
65 StatePtr *stateArray; |
ecf70fb215a5
print charclass
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
191
diff
changeset
|
66 StatePtr stateList; |
187
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
67 } TransitionGenerator, *TransitionGeneratorPtr; |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
68 |
220
4dc8d327cc7d
subset construction worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
215
diff
changeset
|
69 // Value Container is passed directly in functions |
4dc8d327cc7d
subset construction worked
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
215
diff
changeset
|
70 // Don't use it's pointer |
203
e809a2dd0731
add scValue
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
201
diff
changeset
|
71 typedef struct scValue { |
e809a2dd0731
add scValue
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
201
diff
changeset
|
72 StatePtr stateTop; |
e809a2dd0731
add scValue
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
201
diff
changeset
|
73 StatePtr stateEnd; |
e809a2dd0731
add scValue
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
201
diff
changeset
|
74 TransitionGeneratorPtr tg; |
e809a2dd0731
add scValue
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
201
diff
changeset
|
75 } SCValue, *SCValuePtr; |
e809a2dd0731
add scValue
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
201
diff
changeset
|
76 |
187
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
77 typedef struct tgValue { |
215 | 78 StatePtr asterisk; // last * state of the expression |
192
ecf70fb215a5
print charclass
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
191
diff
changeset
|
79 StatePtr startState; |
ecf70fb215a5
print charclass
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
191
diff
changeset
|
80 StatePtr endState; |
187
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
81 TransitionGeneratorPtr tg; |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
82 } TGValue, *TGValuePtr; |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
83 |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
84 enum charClassStackState { |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
85 LEFT, |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
86 SELF, |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
87 RIGHT |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
88 }; |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
89 |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
90 typedef struct charClassStack { |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
91 charClassStackState turn; |
184
1da1b2eacb84
gather struct
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
183
diff
changeset
|
92 CharClassPtr cc; |
187
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
93 struct charClassStack *next; |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
94 } CharClassStack, *CharClassStackPtr; |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
95 |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
96 typedef struct charClassWalker { |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
97 CharClassStackPtr stack; |
201
b8bc24abaf8a
add TODO and fix CharClassWalker
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
195
diff
changeset
|
98 charClassStackState turn; |
187
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
99 CharClassPtr next; |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
100 } CharClassWalker, *CharClassWalkerPtr; |
ef798db705e9
remove some warnings and errors(not working)
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
186
diff
changeset
|
101 |
184
1da1b2eacb84
gather struct
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
183
diff
changeset
|
102 |
116
66c633575b53
remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
115
diff
changeset
|
103 typedef struct regexInfo { |
66c633575b53
remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
115
diff
changeset
|
104 unsigned char *ptr; |
66c633575b53
remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
115
diff
changeset
|
105 unsigned char tokenType; |
118 | 106 unsigned char *tokenValue; |
178
5e8c6857934c
implement charClassMerge
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
175
diff
changeset
|
107 int stateNumber; |
116
66c633575b53
remove error and warning
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
115
diff
changeset
|
108 } RegexInfo, *RegexInfoPtr; |
188
109d22faf7b5
remove errors and warnings
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
187
diff
changeset
|
109 #endif |
190
3e8e5780ad4a
change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
188
diff
changeset
|
110 |
3e8e5780ad4a
change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
188
diff
changeset
|
111 extern NodePtr createNode(RegexInfoPtr ri,unsigned char type,CharClassPtr cc, NodePtr left, NodePtr right); |
188
109d22faf7b5
remove errors and warnings
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
187
diff
changeset
|
112 extern CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right); |
190
3e8e5780ad4a
change node::State to State
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
188
diff
changeset
|
113 extern NodePtr regex(RegexInfoPtr); |