Mercurial > hg > Applications > Grep
changeset 147:84d32375383a pairPro
implement insertCharClass
author | masa |
---|---|
date | Tue, 15 Dec 2015 17:14:35 +0900 |
parents | 1c74ac7d56ec |
children | d1ebba6e117a |
files | c/regexParser/main.cc c/regexParser/node.cc c/regexParser/regexParser.cc |
diffstat | 3 files changed, 64 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- a/c/regexParser/main.cc Tue Dec 15 16:20:35 2015 +0900 +++ b/c/regexParser/main.cc Tue Dec 15 17:14:35 2015 +0900 @@ -20,6 +20,5 @@ printf("regex : %s\n",ri->ptr); NodePtr n = regex(ri); printTree(n); - TransitionGenerator tg = generateTransitionList(n); return 0; }
--- a/c/regexParser/node.cc Tue Dec 15 16:20:35 2015 +0900 +++ b/c/regexParser/node.cc Tue Dec 15 17:14:35 2015 +0900 @@ -3,6 +3,9 @@ static void printCharacterClass(CharClassPtr cc, long nodeNumber,int d) { if (cc->type == 'r') { + if (cc->left) { + printCharacterClass(cc->left,nodeNumber,d+1); + } printf("%*c",d*4, ' '); for (RangeList range = cc->cond.range; range.begin != 0;) { printf("[%c-%c] ",(unsigned char)range.begin,(unsigned char)range.end); @@ -13,14 +16,15 @@ } } printf("(%lu)\n",nodeNumber); + if (cc->right) { + printCharacterClass(cc->right,nodeNumber,d+1); + } } } static void descendTree(NodePtr n, int d) { if (n->left != NULL) { - d++; - descendTree(n->left, d); - d--; + descendTree(n->left, d+1); } if (n->tokenType == 'a') { printf("%*c",d*4, ' '); @@ -35,9 +39,7 @@ } if (n->right != NULL) { - d++; - descendTree(n->right, d); - d--; + descendTree(n->right, d+1); } }
--- a/c/regexParser/regexParser.cc Tue Dec 15 16:20:35 2015 +0900 +++ b/c/regexParser/regexParser.cc Tue Dec 15 17:14:35 2015 +0900 @@ -39,6 +39,17 @@ return n; } +CharClassPtr createCharClassRange(unsigned long begin, unsigned long end, CharClassPtr left, CharClassPtr right) { + CharClassPtr cc = NEW(CharClass); + cc->type = 'r'; + cc->cond.range.begin = begin; + cc->cond.range.end = end; + cc->left = left; + cc->right = right; + cc->nextState.bitContainer = 0; + return cc; +} + CharClassPtr createCharClassWord(RegexInfoPtr ri) { CharClassPtr cc = NEW(CharClass); cc->type = 'a'; @@ -48,9 +59,39 @@ return cc; } -CharClassPtr charClassMerge(CharClassPtr cc, unsigned char begin, unsigned char end, CharClassPtr next) { - CharClassPtr cc1 = NEW(CharClass); - return cc1; +CharClassPtr insertCharClass(CharClassPtr cc, unsigned char begin, unsigned char end) { + if (end < cc->cond.range.begin ) { + CharClassPtr cc1 = createCharClassRange(cc->begin,cc->end,cc->left,cc->right); + if (cc->left) { + cc1->left = insertCharClass(cc->left,begin,end); + return cc1; + } else { + CharClassPtr cc2 = createCharClassRange(begin,end,0,0); + cc1->left = cc2; + return cc1; + } + } else if (end == cc->cond.range.begin ) { + cc->cond.range.begin = begin; + } else if (end <= cc->cond.range.end) { + if (begin < cc->cond.range.begin) { + cc->cond.range.begin = begin; + } + } else if (begin > cc->cond.range.end ) { + CharClassPtr cc1 = createCharClassRange(cc->begin,cc->end,cc->left,cc->right); + if (cc->right) { + cc1->rigt = insertCharClass(cc->right,begin,end); + return cc1; + } else { + CharClassPtr cc2 = createCharClassRange(begin,end,0,0); + cc1->right = cc2; + return cc1; + } + } else if (begin == cc->cond.range.end ) { + cc->cond.range.end = end; + } else if (begin < cc->cond.range.begin) { + cc->cond.range.begin = begin; + } + return cc; } // <charClass> ::= '['<literal>'-'<literal>']' @@ -61,27 +102,33 @@ cc->type = 'r'; cc->nextState.bitContainer = 0; RangeListPtr rangeList = &cc->cond.range; - rangeList->begin = (unsigned long)*ri->ptr; - rangeList->end = (unsigned long)*ri->ptr; + rangeList->begin = *ri->ptr; + rangeList->end = *ri->ptr; rangeList->next = NULL; for (ri->ptr++; *ri->ptr && *ri->ptr != ']'; ri->ptr++) { if (*ri->ptr == '-') { - rangeList->end = (unsigned long)*(ri->ptr + 1); + rangeList->end = *(ri->ptr + 1); ri->ptr++; continue; } if (ri->ptr[0] == 0 || ri->ptr[0] == ']') break; if (ri->ptr[0] == rangeList->end + 1) { - rangeList->end = (unsigned long)*ri->ptr; + rangeList->end = *ri->ptr; continue; } rangeList->next = NEW(RangeList); rangeList = rangeList->next; - rangeList->begin = (unsigned long)*ri->ptr; - rangeList->end = (unsigned long)*ri->ptr; + rangeList->begin = *ri->ptr; + rangeList->end = *ri->ptr; rangeList->next = NULL; } + + for (RangeListPtr r = &cc->cond.range; r; r = r->next) { + cc = insertCharClass(cc, r->begin, r->end); + } + + n->cc = cc; // TODO literal support // merge rangeList here if (*ri->ptr) ri->ptr++;