Mercurial > hg > Members > masakoha > testcode
changeset 308:1188debbef10
separate CharClass
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 08 Feb 2016 12:45:45 +0900 |
parents | 9f0df6ce89a2 |
children | 058c87665213 |
files | regexParser/CharClass.cc regexParser/CharClass.h regexParser/Makefile regexParser/generateSequentialSearch.cc regexParser/grepWalk.cc regexParser/regexParser.cc regexParser/subsetConstruction.cc regexParser/subsetConstruction.h regexParser/test/ccMerge.cc regexParser/threadedSearch.cc |
diffstat | 10 files changed, 332 insertions(+), 305 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regexParser/CharClass.cc Mon Feb 08 12:45:45 2016 +0900 @@ -0,0 +1,311 @@ +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> + +#include "regexParser.h" +#include "subsetConstruction.h" +#include "node.h" +#include "BitVector.h" +#include "error.h" + +#include "CharClass.h" + + +CharClassPtr createCharClassWord(RegexInfoPtr ri) { + CharClassPtr cc = NEW(CharClass); + cc->type = 'a'; + cc->left = NULL; + cc->right = NULL; + cc->cond.w.word = ri->tokenValue; + cc->cond.w.length = ri->ptr - ri->tokenValue; + cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue; + return cc; +} + +/* + cond.range.begin cond.range.end + |----------------| + 1.b---e + 2.b------e + 3.b------------e + 4.b-----------------------e + 5.b----------------------------e + + |----------------| + 6. b---------e + 7. b----------------e + 8. b---------------------e + + |----------------| + 9. b-----e + 10. b--------e + 11. b-------------e + + |----------------| + 12. b-----e + + |----------------| + 13. b--e + + */ +CharClassPtr insertCharClass(CharClassPtr cc, unsigned long begin, unsigned long end) { + if (begin>end) { + unsigned long tmp = begin; begin = end; end = tmp; + } + if (cc == NULL) { + return createCharClassRange(begin,end,0,0,0); + } + if (end < cc->cond.range.begin ) { // 1 + if (cc->left) { + cc->left = insertCharClass(cc->left,begin,end); + } else { + cc->left = createCharClassRange(begin,end,0,0,0); + } + return cc; + } else if (end == cc->cond.range.begin ) { // 2 + cc->cond.range.begin = begin; + return cc; + } else if (end <= cc->cond.range.end) { // 3,4,6,7,9,10 + if (begin < cc->cond.range.begin) { // 3,4 + cc->cond.range.begin = begin; + } + return cc; + } else if (begin > cc->cond.range.end ) { // 13 + if (cc->right) { + cc->right = insertCharClass(cc->right,begin,end); + } else { + cc->right = createCharClassRange(begin,end,0,0,0); + } + return cc; + } + if (cc->right) { + CharClassPtr right = cc->right; + begin = cc->cond.range.begin; + free(cc); + return insertCharClass(right,begin,end); + } + if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { // 12 + if (end > cc->cond.range.end) cc->cond.range.end = end; // 11,8 + } else if (begin < cc->cond.range.begin) { // 5 + cc->cond.range.begin = begin; + cc->cond.range.end = end; + } else { + printf("insertCharClass Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); + } + return cc; +} + + +CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right) { + CharClassPtr cc = NEW(CharClass); + cc->type = 'r'; + cc->cond.range.begin = begin; + cc->cond.range.end = end; + cc->cond.w.word = NULL; + cc->cond.w.length = 0; + cc->left = left; + cc->right = right; + cc->nextState.bitContainer = state; + return cc; +} + +CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) ; + +CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,BitVector nextState) { + CharClassPtr cc1; + if (cc) { + cc1 = charClassMerge(cc,mBegin,mEnd,nextState); + } else { + cc1 = createCharClassRange(mBegin,mEnd,nextState.bitContainer,NULL,NULL); + } + return cc1; +} + +/* + cond.range.begin cond.range.end + |----------------| + 1.b---e + 2.b------e + 3.b------------e + 4.b-----------------------e + 5.b----------------------------e + + |----------------| + 6. b---------e + 7. b----------------e + 8. b---------------------e + + |----------------| + 9. b-----e + 10. b--------e + 11. b-------------e + + |----------------| + 12. b-----e + + |----------------| + 13. b--e + + */ + +CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) { + // 重なっているccの領域を分割する + // 必要ならばnextStateを重ねあわせる + // 変更があった場合は新しくリストを作って返す + if (end < cc->cond.range.begin ) { // 1 + if (cc->left) { + return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,nextState.bitContainer,charClassMerge(cc->left,begin,end,nextState),cc->right); + } else { + return createCharClassRange(begin,end,nextState.bitContainer,NULL,cc); + } + } else if (end == cc->cond.range.begin && begin != end ) { // 2 + CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,nextState); + if (cc->cond.range.begin == cc->cond.range.end) { + return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, + cc->nextState.bitContainer | nextState.bitContainer, cc1,cc->right); + } + CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); + return createCharClassRange(cc->cond.range.begin,cc->cond.range.begin, + cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); + } else if (end < cc->cond.range.end) { // range.begin < end + if (begin < cc->cond.range.begin) { // 3 + CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); + CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); + return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); + } + if (begin == cc->cond.range.begin) { // 6 + CharClassPtr cc2 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); + return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc2); + } + // 9 + CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); + CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); + return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc2,cc3); + } else if (end == cc->cond.range.end) { + if (begin == cc->cond.range.begin) { // 7 + return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc->right); + } else if (begin < cc->cond.range.begin) { // 4 + CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); + return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc->right); + } + // 10 cond.range.begin < begin + CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,NULL,cc->right); + return createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,cc2); + } + if (begin > cc->cond.range.end ) { // 13 + if (cc->right) { + return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer,cc->left,charClassMerge(cc->right,begin,end,nextState)); + } else { + return createCharClassRange(begin,end,nextState.bitContainer,cc,NULL); + } + } + if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { + if (end > cc->cond.range.end) { // cond.range.end < end + if (begin == cc->cond.range.begin) { // 8 + CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); + return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, + cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc1); + } + if (begin > cc->cond.range.begin) { // 11,12 + CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); + CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); + return createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc3,cc1); + } + } + } else if (begin < cc->cond.range.begin) { // 5 + CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); + CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); + return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); + } else { + printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); + } + return cc; +} + +void findLeftMost(CharClassPtr next,CharClassWalkerPtr walk) { + while (next->left) { + CharClassStackPtr ccs = NEW(CharClassStack); + ccs->next = walk->stack; + ccs->turn = walk->turn; + walk->turn = LEFT; + ccs->cc = next; + walk->stack = ccs; + next = next->left; + } + walk->turn = SELF; + walk->next = next; +} + +CharClassWalkerPtr createCharClassWalker (CharClassPtr next) { + CharClassWalkerPtr walk = NEW(CharClassWalker); + walk->next = NULL; + walk->stack = NULL; + walk->turn = LEFT; + if (!next) return walk; + findLeftMost(next,walk); + return walk; +} + +bool hasNext(CharClassWalkerPtr walk) { + return walk->next != NULL; +} + +CharClassStackPtr charClassStackPop(CharClassWalkerPtr walk) { + CharClassStackPtr prev = walk->stack->next; + walk->turn = walk->stack->turn; + free(walk->stack); + walk->stack = prev; + return prev; +} + +CharClassPtr getNext(CharClassWalkerPtr walk) { + CharClassPtr current = walk->next; + walk->next = NULL; + if (walk->turn == SELF) { + if (current->right) { + walk->turn = RIGHT; + findLeftMost(current->right,walk); + return current; + } + } + while (walk->stack) { + walk->next = walk->stack->cc; + charClassStackPop(walk); + if (walk->turn == LEFT) { + walk->turn = SELF; + return current; + } + } + return current; +} + +void setState(CharClassPtr cc, BitVector bi) { + // if (word case) setNext(bitVector to the word list) + cc->nextState = bi; + if (cc->left) { + setState(cc->left,bi); + } + if (cc->right) { + setState(cc->right,bi); + } +} + +CharClassPtr mergeTransition(StatePtr x,CharClassPtr y) { + if (x->cc == NULL) { + return y; + } + CharClassWalkerPtr walk = createCharClassWalker(x->cc); + CharClassPtr ccy = y; + BitVector bi; + while (hasNext(walk)) { + CharClassPtr cc = getNext(walk); + unsigned long begin = cc->cond.range.begin; + unsigned long end = cc->cond.range.end; + bi = cc->nextState; + ccy = charClassMerge(ccy,begin,end,bi); + } + free(walk); + return ccy; +} + +/* end */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regexParser/CharClass.h Mon Feb 08 12:45:45 2016 +0900 @@ -0,0 +1,11 @@ +#include "regexParser.h" + +extern CharClassPtr createCharClassWord(RegexInfoPtr ri) ; +extern CharClassPtr insertCharClass(CharClassPtr cc, unsigned long begin, unsigned long end) ; +extern CharClassWalkerPtr createCharClassWalker (CharClassPtr next) ; +extern bool hasNext(CharClassWalkerPtr walk) ; +extern CharClassPtr getNext(CharClassWalkerPtr walk) ; +extern void setState(CharClassPtr cc, BitVector bi) ; +extern CharClassPtr mergeTransition(StatePtr x,CharClassPtr y) ; + +/* end */
--- a/regexParser/Makefile Mon Feb 08 12:26:53 2016 +0900 +++ b/regexParser/Makefile Mon Feb 08 12:45:45 2016 +0900 @@ -1,6 +1,6 @@ TARGET= regexParser test/ccMerge CFLAGS= -Wall -O0 -g -I$(CERIUM)/include/TaskManager -I. -SEQCFLAGS= CFLAGS= -Wall -O -g -I$(CERIUM)/include/TaskManager -I. +SEQCFLAGS= -Wall -O -g -I$(CERIUM)/include/TaskManager -I. CC= clang++ CbC= clang++ CERIUM= ../../Cerium @@ -36,7 +36,7 @@ $(CC) $(CFLAGS) $< bitVector.cc -o $@ test/ccMerge: test/ccMerge.o subsetConstruction.o regexParser.o node.o error.o bitVector.o - $(CC) $(CFLAGS) $< subsetConstruction.o regexParser.o node.o error.o bitVector.o -o $@ + $(CC) $(CFLAGS) $< subsetConstruction.o regexParser.o node.o error.o bitVector.o CharClass.o -o $@ parallelSearch: $(AR) cd cerium ; $(MAKE) -f Makefile.macosx CERIUM=../$(CERIUM) @@ -79,7 +79,7 @@ sequentialSearch: sequentialSearch.cc regexParser fileread.o ./regexParser -seq -subset -regex $(REGEX) - $(CC) $(CFLAGS) -c sequentialSearch.cc + $(CC) $(SEQCFLAGS) -c sequentialSearch.cc $(CC) $(SEQDFLAGS) sequentialSearch.o generateSequentialSearch.o $(OBJS) -o $@ - ./$@ -file $(TESTFILE)
--- a/regexParser/generateSequentialSearch.cc Mon Feb 08 12:26:53 2016 +0900 +++ b/regexParser/generateSequentialSearch.cc Mon Feb 08 12:45:45 2016 +0900 @@ -2,6 +2,7 @@ #include <stdlib.h> #include "generateSequentialSearch.h" +#include "CharClass.h" #include "subsetConstruction.h" void
--- a/regexParser/grepWalk.cc Mon Feb 08 12:26:53 2016 +0900 +++ b/regexParser/grepWalk.cc Mon Feb 08 12:45:45 2016 +0900 @@ -3,6 +3,7 @@ #include "grepWalk.h" #include "subsetConstruction.h" +#include "CharClass.h" #include "threadedSearch.h" StatePtr nextState(BitVector bi,TransitionGeneratorPtr tg) {
--- a/regexParser/regexParser.cc Mon Feb 08 12:26:53 2016 +0900 +++ b/regexParser/regexParser.cc Mon Feb 08 12:45:45 2016 +0900 @@ -3,6 +3,7 @@ #include <string.h> #include <ctype.h> #include "regexParser.h" +#include "CharClass.h" static NodePtr charClass(RegexInfoPtr); static void token(RegexInfoPtr); @@ -37,90 +38,6 @@ return n; } -CharClassPtr createCharClassWord(RegexInfoPtr ri) { - CharClassPtr cc = NEW(CharClass); - cc->type = 'a'; - cc->left = NULL; - cc->right = NULL; - cc->cond.w.word = ri->tokenValue; - cc->cond.w.length = ri->ptr - ri->tokenValue; - cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue; - return cc; -} - -/* - cond.range.begin cond.range.end - |----------------| - 1.b---e - 2.b------e - 3.b------------e - 4.b-----------------------e - 5.b----------------------------e - - |----------------| - 6. b---------e - 7. b----------------e - 8. b---------------------e - - |----------------| - 9. b-----e - 10. b--------e - 11. b-------------e - - |----------------| - 12. b-----e - - |----------------| - 13. b--e - - */ -CharClassPtr insertCharClass(CharClassPtr cc, unsigned long begin, unsigned long end) { - if (begin>end) { - unsigned long tmp = begin; begin = end; end = tmp; - } - if (cc == NULL) { - return createCharClassRange(begin,end,0,0,0); - } - if (end < cc->cond.range.begin ) { // 1 - if (cc->left) { - cc->left = insertCharClass(cc->left,begin,end); - } else { - cc->left = createCharClassRange(begin,end,0,0,0); - } - return cc; - } else if (end == cc->cond.range.begin ) { // 2 - cc->cond.range.begin = begin; - return cc; - } else if (end <= cc->cond.range.end) { // 3,4,6,7,9,10 - if (begin < cc->cond.range.begin) { // 3,4 - cc->cond.range.begin = begin; - } - return cc; - } else if (begin > cc->cond.range.end ) { // 13 - if (cc->right) { - cc->right = insertCharClass(cc->right,begin,end); - } else { - cc->right = createCharClassRange(begin,end,0,0,0); - } - return cc; - } - if (cc->right) { - CharClassPtr right = cc->right; - begin = cc->cond.range.begin; - free(cc); - return insertCharClass(right,begin,end); - } - if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { // 12 - if (end > cc->cond.range.end) cc->cond.range.end = end; // 11,8 - } else if (begin < cc->cond.range.begin) { // 5 - cc->cond.range.begin = begin; - cc->cond.range.end = end; - } else { - printf("insertCharClass Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); - } - return cc; -} - // <charClass> ::= '['<literal>'-'<literal>']' static NodePtr charClass(RegexInfoPtr ri) {
--- a/regexParser/subsetConstruction.cc Mon Feb 08 12:26:53 2016 +0900 +++ b/regexParser/subsetConstruction.cc Mon Feb 08 12:45:45 2016 +0900 @@ -5,221 +5,10 @@ #include "regexParser.h" #include "subsetConstruction.h" #include "node.h" -#include "BitVector.h" +#include "bitVector.h" +#include "CharClass.h" #include "error.h" -#define SIZE 256 - -CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right) { - CharClassPtr cc = NEW(CharClass); - cc->type = 'r'; - cc->cond.range.begin = begin; - cc->cond.range.end = end; - cc->cond.w.word = NULL; - cc->cond.w.length = 0; - cc->left = left; - cc->right = right; - cc->nextState.bitContainer = state; - return cc; -} - -CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,BitVector nextState) { - CharClassPtr cc1; - if (cc) { - cc1 = charClassMerge(cc,mBegin,mEnd,nextState); - } else { - cc1 = createCharClassRange(mBegin,mEnd,nextState.bitContainer,NULL,NULL); - } - return cc1; -} - -/* - cond.range.begin cond.range.end - |----------------| - 1.b---e - 2.b------e - 3.b------------e - 4.b-----------------------e - 5.b----------------------------e - - |----------------| - 6. b---------e - 7. b----------------e - 8. b---------------------e - - |----------------| - 9. b-----e - 10. b--------e - 11. b-------------e - - |----------------| - 12. b-----e - - |----------------| - 13. b--e - - */ - -CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) { - // 重なっているccの領域を分割する - // 必要ならばnextStateを重ねあわせる - // 変更があった場合は新しくリストを作って返す - if (end < cc->cond.range.begin ) { // 1 - if (cc->left) { - return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,nextState.bitContainer,charClassMerge(cc->left,begin,end,nextState),cc->right); - } else { - return createCharClassRange(begin,end,nextState.bitContainer,NULL,cc); - } - } else if (end == cc->cond.range.begin && begin != end ) { // 2 - CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,nextState); - if (cc->cond.range.begin == cc->cond.range.end) { - return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, - cc->nextState.bitContainer | nextState.bitContainer, cc1,cc->right); - } - CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); - return createCharClassRange(cc->cond.range.begin,cc->cond.range.begin, - cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); - } else if (end < cc->cond.range.end) { // range.begin < end - if (begin < cc->cond.range.begin) { // 3 - CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); - CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); - return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); - } - if (begin == cc->cond.range.begin) { // 6 - CharClassPtr cc2 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); - return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc2); - } - // 9 - CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); - CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); - return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc2,cc3); - } else if (end == cc->cond.range.end) { - if (begin == cc->cond.range.begin) { // 7 - return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc->right); - } else if (begin < cc->cond.range.begin) { // 4 - CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); - return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc->right); - } - // 10 cond.range.begin < begin - CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,NULL,cc->right); - return createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,cc2); - } - if (begin > cc->cond.range.end ) { // 13 - if (cc->right) { - return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer,cc->left,charClassMerge(cc->right,begin,end,nextState)); - } else { - return createCharClassRange(begin,end,nextState.bitContainer,cc,NULL); - } - } - if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { - if (end > cc->cond.range.end) { // cond.range.end < end - if (begin == cc->cond.range.begin) { // 8 - CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); - return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, - cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc1); - } - if (begin > cc->cond.range.begin) { // 11,12 - CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); - CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); - return createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc3,cc1); - } - } - } else if (begin < cc->cond.range.begin) { // 5 - CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); - CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); - return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); - } else { - printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); - } - return cc; -} - -void findLeftMost(CharClassPtr next,CharClassWalkerPtr walk) { - while (next->left) { - CharClassStackPtr ccs = NEW(CharClassStack); - ccs->next = walk->stack; - ccs->turn = walk->turn; - walk->turn = LEFT; - ccs->cc = next; - walk->stack = ccs; - next = next->left; - } - walk->turn = SELF; - walk->next = next; -} - -CharClassWalkerPtr createCharClassWalker (CharClassPtr next) { - CharClassWalkerPtr walk = NEW(CharClassWalker); - walk->next = NULL; - walk->stack = NULL; - walk->turn = LEFT; - if (!next) return walk; - findLeftMost(next,walk); - return walk; -} - -bool hasNext(CharClassWalkerPtr walk) { - return walk->next != NULL; -} - -CharClassStackPtr charClassStackPop(CharClassWalkerPtr walk) { - CharClassStackPtr prev = walk->stack->next; - walk->turn = walk->stack->turn; - free(walk->stack); - walk->stack = prev; - return prev; -} - -CharClassPtr getNext(CharClassWalkerPtr walk) { - CharClassPtr current = walk->next; - walk->next = NULL; - if (walk->turn == SELF) { - if (current->right) { - walk->turn = RIGHT; - findLeftMost(current->right,walk); - return current; - } - } - while (walk->stack) { - walk->next = walk->stack->cc; - charClassStackPop(walk); - if (walk->turn == LEFT) { - walk->turn = SELF; - return current; - } - } - return current; -} - -void setState(CharClassPtr cc, BitVector bi) { - // if (word case) setNext(bitVector to the word list) - cc->nextState = bi; - if (cc->left) { - setState(cc->left,bi); - } - if (cc->right) { - setState(cc->right,bi); - } -} - -CharClassPtr mergeTransition(StatePtr x,CharClassPtr y) { - if (x->cc == NULL) { - return y; - } - CharClassWalkerPtr walk = createCharClassWalker(x->cc); - CharClassPtr ccy = y; - BitVector bi; - while (hasNext(walk)) { - CharClassPtr cc = getNext(walk); - unsigned long begin = cc->cond.range.begin; - unsigned long end = cc->cond.range.end; - bi = cc->nextState; - ccy = charClassMerge(ccy,begin,end,bi); - } - free(walk); - return ccy; -} - /** 作成する state を linked list bitvector を index とした配列に BitVectorPtr を格納
--- a/regexParser/subsetConstruction.h Mon Feb 08 12:26:53 2016 +0900 +++ b/regexParser/subsetConstruction.h Mon Feb 08 12:45:45 2016 +0900 @@ -1,14 +1,9 @@ -extern CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState); extern TGValue createTGValue(); -extern CharClassPtr mergeTransition(StatePtr x,CharClassPtr y); extern void exportState(TransitionGeneratorPtr tg); extern void setState(CharClassPtr cc, BitVector bi); extern StatePtr createState(TGValue tgv,NodePtr n); extern StatePtr createState(TransitionGeneratorPtr tg,BitVector bi); extern TGValue generateTransitionList(NodePtr n); -extern CharClassPtr getNext(CharClassWalkerPtr walk); -extern bool hasNext(CharClassWalkerPtr walk); -extern CharClassWalkerPtr createCharClassWalker (CharClassPtr next); extern void printState(TransitionGeneratorPtr tg); extern void printState(StatePtr state); extern void determinize(StatePtr s, TransitionGeneratorPtr tg);
--- a/regexParser/test/ccMerge.cc Mon Feb 08 12:26:53 2016 +0900 +++ b/regexParser/test/ccMerge.cc Mon Feb 08 12:45:45 2016 +0900 @@ -3,6 +3,7 @@ #include <string.h> #include "regexParser.h" #include "node.h" +#include "CharClass.h" #include "subsetConstruction.h" void printCCTree(CharClassPtr cc) {