Mercurial > hg > Applications > Grep
comparison regexParser/CharClass.cc @ 309:058c87665213
small fix
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 08 Feb 2016 18:04:28 +0900 |
parents | 1188debbef10 |
children | df27e6cab846 |
comparison
equal
deleted
inserted
replaced
308:1188debbef10 | 309:058c87665213 |
---|---|
8 #include "BitVector.h" | 8 #include "BitVector.h" |
9 #include "error.h" | 9 #include "error.h" |
10 | 10 |
11 #include "CharClass.h" | 11 #include "CharClass.h" |
12 | 12 |
13 | |
14 CharClassPtr createCharClassWord(RegexInfoPtr ri) { | |
15 CharClassPtr cc = NEW(CharClass); | |
16 cc->type = 'a'; | |
17 cc->left = NULL; | |
18 cc->right = NULL; | |
19 cc->cond.w.word = ri->tokenValue; | |
20 cc->cond.w.length = ri->ptr - ri->tokenValue; | |
21 cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue; | |
22 return cc; | |
23 } | |
24 | |
25 /* | |
26 cond.range.begin cond.range.end | |
27 |----------------| | |
28 1.b---e | |
29 2.b------e | |
30 3.b------------e | |
31 4.b-----------------------e | |
32 5.b----------------------------e | |
33 | |
34 |----------------| | |
35 6. b---------e | |
36 7. b----------------e | |
37 8. b---------------------e | |
38 | |
39 |----------------| | |
40 9. b-----e | |
41 10. b--------e | |
42 11. b-------------e | |
43 | |
44 |----------------| | |
45 12. b-----e | |
46 | |
47 |----------------| | |
48 13. b--e | |
49 | |
50 */ | |
51 CharClassPtr insertCharClass(CharClassPtr cc, unsigned long begin, unsigned long end) { | |
52 if (begin>end) { | |
53 unsigned long tmp = begin; begin = end; end = tmp; | |
54 } | |
55 if (cc == NULL) { | |
56 return createCharClassRange(begin,end,0,0,0); | |
57 } | |
58 if (end < cc->cond.range.begin ) { // 1 | |
59 if (cc->left) { | |
60 cc->left = insertCharClass(cc->left,begin,end); | |
61 } else { | |
62 cc->left = createCharClassRange(begin,end,0,0,0); | |
63 } | |
64 return cc; | |
65 } else if (end == cc->cond.range.begin ) { // 2 | |
66 cc->cond.range.begin = begin; | |
67 return cc; | |
68 } else if (end <= cc->cond.range.end) { // 3,4,6,7,9,10 | |
69 if (begin < cc->cond.range.begin) { // 3,4 | |
70 cc->cond.range.begin = begin; | |
71 } | |
72 return cc; | |
73 } else if (begin > cc->cond.range.end ) { // 13 | |
74 if (cc->right) { | |
75 cc->right = insertCharClass(cc->right,begin,end); | |
76 } else { | |
77 cc->right = createCharClassRange(begin,end,0,0,0); | |
78 } | |
79 return cc; | |
80 } | |
81 if (cc->right) { | |
82 CharClassPtr right = cc->right; | |
83 begin = cc->cond.range.begin; | |
84 free(cc); | |
85 return insertCharClass(right,begin,end); | |
86 } | |
87 if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { // 12 | |
88 if (end > cc->cond.range.end) cc->cond.range.end = end; // 11,8 | |
89 } else if (begin < cc->cond.range.begin) { // 5 | |
90 cc->cond.range.begin = begin; | |
91 cc->cond.range.end = end; | |
92 } else { | |
93 printf("insertCharClass Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); | |
94 } | |
95 return cc; | |
96 } | |
97 | |
98 | |
99 CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right) { | 13 CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right) { |
100 CharClassPtr cc = NEW(CharClass); | 14 CharClassPtr cc = NEW(CharClass); |
101 cc->type = 'r'; | |
102 cc->cond.range.begin = begin; | 15 cc->cond.range.begin = begin; |
103 cc->cond.range.end = end; | 16 cc->cond.range.end = end; |
104 cc->cond.w.word = NULL; | 17 cc->cond.w.word = NULL; |
105 cc->cond.w.length = 0; | 18 cc->cond.w.length = 0; |
106 cc->left = left; | 19 cc->left = left; |
107 cc->right = right; | 20 cc->right = right; |
108 cc->nextState.bitContainer = state; | 21 cc->nextState.bitContainer = state; |
109 return cc; | 22 return cc; |
110 } | 23 } |
24 | |
25 CharClassPtr createCharClassWord(RegexInfoPtr ri) { | |
26 CharClassPtr cc = NEW(CharClass); | |
27 cc->left = NULL; | |
28 cc->right = NULL; | |
29 cc->cond.w.word = ri->tokenValue; | |
30 cc->cond.w.length = ri->ptr - ri->tokenValue; | |
31 cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue; | |
32 return cc; | |
33 } | |
34 | |
35 /* | |
36 cond.range.begin cond.range.end | |
37 |----------------| | |
38 1.b---e | |
39 2.b------e | |
40 3.b------------e | |
41 4.b-----------------------e | |
42 5.b----------------------------e | |
43 | |
44 |----------------| | |
45 6. b---------e | |
46 7. b----------------e | |
47 8. b---------------------e | |
48 | |
49 |----------------| | |
50 9. b-----e | |
51 10. b--------e | |
52 11. b-------------e | |
53 | |
54 |----------------| | |
55 12. b-----e | |
56 | |
57 |----------------| | |
58 13. b--e | |
59 | |
60 */ | |
61 CharClassPtr insertCharClass(CharClassPtr cc, unsigned long begin, unsigned long end) { | |
62 if (begin>end) { | |
63 unsigned long tmp = begin; begin = end; end = tmp; | |
64 } | |
65 if (cc == NULL) { | |
66 return createCharClassRange(begin,end,0,0,0); | |
67 } | |
68 if (end < cc->cond.range.begin ) { // 1 | |
69 if (cc->left) { | |
70 cc->left = insertCharClass(cc->left,begin,end); | |
71 } else { | |
72 cc->left = createCharClassRange(begin,end,0,0,0); | |
73 } | |
74 return cc; | |
75 } else if (end == cc->cond.range.begin ) { // 2 | |
76 cc->cond.range.begin = begin; | |
77 return cc; | |
78 } else if (end <= cc->cond.range.end) { // 3,4,6,7,9,10 | |
79 if (begin < cc->cond.range.begin) { // 3,4 | |
80 cc->cond.range.begin = begin; | |
81 } | |
82 return cc; | |
83 } else if (begin > cc->cond.range.end ) { // 13 | |
84 if (cc->right) { | |
85 cc->right = insertCharClass(cc->right,begin,end); | |
86 } else { | |
87 cc->right = createCharClassRange(begin,end,0,0,0); | |
88 } | |
89 return cc; | |
90 } | |
91 if (cc->right) { | |
92 CharClassPtr right = cc->right; | |
93 begin = cc->cond.range.begin; | |
94 free(cc); | |
95 return insertCharClass(right,begin,end); | |
96 } | |
97 if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { // 12 | |
98 if (end > cc->cond.range.end) cc->cond.range.end = end; // 11,8 | |
99 } else if (begin < cc->cond.range.begin) { // 5 | |
100 cc->cond.range.begin = begin; | |
101 cc->cond.range.end = end; | |
102 } else { | |
103 printf("insertCharClass Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); | |
104 } | |
105 return cc; | |
106 } | |
107 | |
108 | |
111 | 109 |
112 CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) ; | 110 CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) ; |
113 | 111 |
114 CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,BitVector nextState) { | 112 CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,BitVector nextState) { |
115 CharClassPtr cc1; | 113 CharClassPtr cc1; |