# HG changeset patch # User Shinji KONO # Date 1454927017 -32400 # Node ID df27e6cab8466cab6594c7c325b12486374e5b10 # Parent 058c87665213600925fb2787d0cb1995514c2c69 CharClassMerge with Word ( no match implementation ) diff -r 058c87665213 -r df27e6cab846 regexParser/CharClass.cc --- a/regexParser/CharClass.cc Mon Feb 08 18:04:28 2016 +0900 +++ b/regexParser/CharClass.cc Mon Feb 08 19:23:37 2016 +0900 @@ -16,6 +16,7 @@ cc->cond.range.end = end; cc->cond.w.word = NULL; cc->cond.w.length = 0; + cc->cond.w.next = NULL; cc->left = left; cc->right = right; cc->nextState.bitContainer = state; @@ -28,10 +29,41 @@ cc->right = NULL; cc->cond.w.word = ri->tokenValue; cc->cond.w.length = ri->ptr - ri->tokenValue; + cc->cond.w.next = NULL; cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue; return cc; } +CharClassPtr createCharClassRangeWord(unsigned long begin, unsigned long end,CharClassPtr cc,CharClassPtr m, CharClassPtr left, CharClassPtr right) { + // same range and seme nextState? + CharClassPtr ncc = NEW(CharClass); + ncc->cond.range.begin = begin; + ncc->cond.range.end = end; + ncc->cond.w = cc->cond.w; + ncc->left = left; + ncc->right = right; + if (!m) { + ncc->nextState.bitContainer = cc->nextState.bitContainer; + return ncc; + } + if (m->cond.w.word) { + WordPtr w = &m->cond.w; + WordPtr *next = &ncc->cond.w.next; + while(w->next) { // insert sort? + WordPtr n = NEW(Word); + n->word = w->word; + n->length = w->length; + *next = n; + next = &n->next; + w = w->next; + } + *next = &m->cond.w; + } + ncc->nextState.bitContainer = m->nextState.bitContainer | cc->nextState.bitContainer; + return ncc; +} + + /* cond.range.begin cond.range.end |----------------| @@ -107,14 +139,14 @@ -CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) ; +CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, CharClassPtr m) ; -CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,BitVector nextState) { +CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,CharClassPtr m) { CharClassPtr cc1; if (cc) { - cc1 = charClassMerge(cc,mBegin,mEnd,nextState); + cc1 = charClassMerge(cc,mBegin,mEnd,m); } else { - cc1 = createCharClassRange(mBegin,mEnd,nextState.bitContainer,NULL,NULL); + cc1 = createCharClassRangeWord(mBegin,mEnd,m,NULL,NULL,NULL); } return cc1; } @@ -146,74 +178,74 @@ */ -CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) { +CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, CharClassPtr m) { // 重なっているccの領域を分割する // 必要ならばnextStateを重ねあわせる // 変更があった場合は新しくリストを作って返す if (end < cc->cond.range.begin ) { // 1 if (cc->left) { - return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,nextState.bitContainer,charClassMerge(cc->left,begin,end,nextState),cc->right); + return createCharClassRangeWord(cc->cond.range.begin,cc->cond.range.end,m,NULL,charClassMerge(cc->left,begin,end,m),cc->right); } else { - return createCharClassRange(begin,end,nextState.bitContainer,NULL,cc); + return createCharClassRangeWord(begin,end,m,NULL,NULL,cc); } } else if (end == cc->cond.range.begin && begin != end ) { // 2 - CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,nextState); + CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,m); if (cc->cond.range.begin == cc->cond.range.end) { - return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, - cc->nextState.bitContainer | nextState.bitContainer, cc1,cc->right); + return createCharClassRangeWord(cc->cond.range.begin,cc->cond.range.end, + cc,m, cc1,cc->right); } - CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); - return createCharClassRange(cc->cond.range.begin,cc->cond.range.begin, - cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); + CharClassPtr cc3 = createCharClassRangeWord(cc->cond.range.begin+1,cc->cond.range.end,cc,NULL,cc->left,cc->right); + return createCharClassRangeWord(cc->cond.range.begin,cc->cond.range.begin, + cc,m,cc1,cc3); } else if (end < cc->cond.range.end) { // range.begin < end if (begin < cc->cond.range.begin) { // 3 - CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); - CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); - return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); + CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,m); + CharClassPtr cc3 = createCharClassRangeWord(end+1,cc->cond.range.end,cc,NULL,cc->left,cc->right); + return createCharClassRangeWord(cc->cond.range.begin,end,m,cc,cc1,cc3); } if (begin == cc->cond.range.begin) { // 6 - CharClassPtr cc2 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); - return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc2); + CharClassPtr cc2 = createCharClassRangeWord(end+1,cc->cond.range.end,cc,NULL,NULL,cc->right); + return createCharClassRangeWord(begin,end,cc,m,cc->left,cc2); } // 9 - CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); - CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); - return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc2,cc3); + CharClassPtr cc2 = createCharClassRangeWord(cc->cond.range.begin,begin-1,cc,NULL,cc->left,NULL); + CharClassPtr cc3 = createCharClassRangeWord(end+1,cc->cond.range.end,cc,NULL,NULL,cc->right); + return createCharClassRangeWord(begin,end,cc,m,cc2,cc3); } else if (end == cc->cond.range.end) { if (begin == cc->cond.range.begin) { // 7 - return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc->right); + return createCharClassRangeWord(begin,end,cc,m,cc->left,cc->right); } else if (begin < cc->cond.range.begin) { // 4 - CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); - return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc->right); + CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,m); + return createCharClassRangeWord(cc->cond.range.begin,end,cc,m,cc1,cc->right); } // 10 cond.range.begin < begin - CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,NULL,cc->right); - return createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,cc2); + CharClassPtr cc2 = createCharClassRangeWord(begin,cc->cond.range.end,cc,m,NULL,cc->right); + return createCharClassRangeWord(cc->cond.range.begin,begin-1,cc,NULL,cc->left,cc2); } if (begin > cc->cond.range.end ) { // 13 if (cc->right) { - return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer,cc->left,charClassMerge(cc->right,begin,end,nextState)); + return createCharClassRangeWord(cc->cond.range.begin,cc->cond.range.end,cc,NULL,cc->left,charClassMerge(cc->right,begin,end,m)); } else { - return createCharClassRange(begin,end,nextState.bitContainer,cc,NULL); + return createCharClassRangeWord(begin,end,m,NULL,cc,NULL); } } if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { if (end > cc->cond.range.end) { // cond.range.end < end if (begin == cc->cond.range.begin) { // 8 - CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); - return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, - cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc1); + CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,m); + return createCharClassRangeWord(cc->cond.range.begin,cc->cond.range.end, + cc,m,cc->left,cc1); } if (begin > cc->cond.range.begin) { // 11,12 - CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); - CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); - return createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc3,cc1); + CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,m); + CharClassPtr cc3 = createCharClassRangeWord(cc->cond.range.begin,begin-1,cc,m,cc->left,NULL); + return createCharClassRangeWord(begin,cc->cond.range.end,cc,m,cc3,cc1); } } } else if (begin < cc->cond.range.begin) { // 5 - CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); - CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); - return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); + CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,m); + CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,m); + return createCharClassRangeWord(cc->cond.range.begin,cc->cond.range.end,cc,m,cc1,cc3); } else { printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); } @@ -294,13 +326,11 @@ } CharClassWalkerPtr walk = createCharClassWalker(x->cc); CharClassPtr ccy = y; - BitVector bi; while (hasNext(walk)) { CharClassPtr cc = getNext(walk); unsigned long begin = cc->cond.range.begin; unsigned long end = cc->cond.range.end; - bi = cc->nextState; - ccy = charClassMerge(ccy,begin,end,bi); + ccy = charClassMerge(ccy,begin,end,cc); } free(walk); return ccy; diff -r 058c87665213 -r df27e6cab846 regexParser/regexParser.h --- a/regexParser/regexParser.h Mon Feb 08 18:04:28 2016 +0900 +++ b/regexParser/regexParser.h Mon Feb 08 19:23:37 2016 +0900 @@ -13,7 +13,7 @@ int length; // look up table for BM search. // BitVector nextState; - // struct word *next; + struct word *next; } Word, *WordPtr; typedef struct utf8Range { diff -r 058c87665213 -r df27e6cab846 regexParser/test/ccMerge.cc --- a/regexParser/test/ccMerge.cc Mon Feb 08 18:04:28 2016 +0900 +++ b/regexParser/test/ccMerge.cc Mon Feb 08 19:23:37 2016 +0900 @@ -24,6 +24,8 @@ NodePtr n = NULL; StatePtr s = NULL; TGValue tgv = createTGValue(); + State dummy; + tgv.tg->stateEnd = &dummy; for (int i = 1; i < argc; i++) { if (strcmp(argv[i],"-regex") == 0) { ri.ptr = (unsigned char*)argv[i+1]; i++;