Mercurial > hg > Members > masakoha > testcode
changeset 299:bdfe0a32c48f
grepWalk
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 03 Feb 2016 13:11:10 +0900 |
parents | 63213964502a |
children | 3e78631a6222 |
files | regexParser/CeriumGrep.cc regexParser/Makefile regexParser/grepWalk.cc regexParser/grepWalk.h regexParser/threadedSearch.cc |
diffstat | 5 files changed, 58 insertions(+), 46 deletions(-) [+] |
line wrap: on
line diff
--- a/regexParser/CeriumGrep.cc Wed Feb 03 12:24:34 2016 +0900 +++ b/regexParser/CeriumGrep.cc Wed Feb 03 13:11:10 2016 +0900 @@ -65,7 +65,7 @@ st_mmap_t st_mmap = createSt_mmap(filename,fd); Buffer buff = createBuffer(st_mmap); if (ts) threadedSearch(tgv.tg,buff); - else grepWalk(tgv.tg,buff.buffptr,buff); + else grepWalk(tgv.tg,buff); close(fd); }
--- a/regexParser/Makefile Wed Feb 03 12:24:34 2016 +0900 +++ b/regexParser/Makefile Wed Feb 03 13:11:10 2016 +0900 @@ -71,7 +71,7 @@ ./sequentialSearch -file $(TESTFILE) $(WC) grepWalk: regexParser $(TESTFILE) - ./regexParser -$(SUBSET) -regex $(REGEX)-file $(TESTFILE) $(WC) + ./regexParser $(SUBSET) -regex $(REGEX) -file $(TESTFILE) $(WC) sequentialSearch: sequentialSearch.cc regexParser fileread.o ./regexParser -seq -subset -regex $(REGEX)
--- a/regexParser/grepWalk.cc Wed Feb 03 12:24:34 2016 +0900 +++ b/regexParser/grepWalk.cc Wed Feb 03 13:11:10 2016 +0900 @@ -1,54 +1,65 @@ #include <stdio.h> +#include <stdlib.h> #include "grepWalk.h" #include "subsetConstruction.h" - -void grep(TransitionGeneratorPtr tg,unsigned char *matchBegin,Buffer buff,unsigned long d) ; - -void grepSkip(TransitionGeneratorPtr tg,unsigned char *matchBegin, Buffer buff) { - matchBegin = buff.buffptr; - grep(tg,matchBegin,buff,1); // 1 is initState -} +#include "threadedSearch.h" -void grepWalk(TransitionGeneratorPtr tg, unsigned char *matchBegin, Buffer buff) { - grepSkip(tg,matchBegin,buff); -} - -void grepMatch(TransitionGeneratorPtr tg,unsigned char *matchBegin, Buffer buff) { - fwrite(matchBegin,buff.buffptr-matchBegin,1,stdout); - puts("\n"); - grepSkip(tg,matchBegin,buff); +StatePtr nextState(BitVector bi,TransitionGeneratorPtr tg) { + // create tSearch in next state. + StatePtr state = tg->stateArray[bi.bitContainer]; + if (state == NULL) { + // on the fly subset construction. + state = createState(tg,bi); + determinize(state,tg); + tg->stateArray[bi.bitContainer] = state; + } + return state; } -void grep(TransitionGeneratorPtr tg,unsigned char *matchBegin,Buffer buff,unsigned long d) { - unsigned char c = *buff.buffptr++; - if (c=='\0') return; - StatePtr state = tg->stateList; +void grepWalk(TransitionGeneratorPtr tg,Buffer buff) { + TSValue tsv = createTSValue(tg,buff); + tg->stateStart = NEW(State); + *tg->stateStart = *tg->stateList; + tg->stateStart->accept = false; // Start state never accept + StatePtr state = tg->stateStart; - while (state->bitState.bitContainer != d) state = state->next; // 配列へのアクセスへ変更 - CharClassWalkerPtr ccw = createCharClassWalker(state->cc); - CharClassPtr cc = NULL; - bool found = false; - while (hasNext(ccw)) { - cc = getNext(ccw); - unsigned long begin = cc->cond.range.begin; - unsigned long end = cc->cond.range.end; - if (begin == end) { - if (c == begin) found = true; - else found = false; - } else { - if (c < begin) found = false; - else if (c < end) found = true; +#if DEBUG + TSValuePtr tsvp = &tsv; // make tsv visible in lldb +#endif + next: while (tsv.buff.buffptr < tsv.buff.buffend) { + if (state->accept) { + tsv = tg->stateMatch(tsv); + } + CharClassWalkerPtr ccw = createCharClassWalker(state->cc); + if (!hasNext(ccw)) { + // matched start again + state = tg->stateStart; + ccw = createCharClassWalker(state->cc); } - if (found == true) break; + unsigned char c = *tsv.buff.buffptr++; +// printState(tsv.current->state); + while (hasNext(ccw)) { + CharClassPtr cc = getNext(ccw); + if (c<cc->cond.range.begin) { + state = tg->stateStart; + tsv = tg->stateSkip(tsv); + goto next; + } else if (c<=cc->cond.range.end) { + // range matched. + if (cc->cond.w.word) { + // match the word. + // if (not match) continue; + } + state = nextState(cc->nextState,tg); + goto next; + } + } + state = tg->stateStart; + tsv = tg->stateSkip(tsv); } - - if (found == false) { - grepSkip(tg,matchBegin,buff); - } else if (found == true && (cc->nextState.bitContainer | 2)) { // Accept - grepMatch(tg,matchBegin,buff); - } else { - grep(tg,matchBegin,buff,cc->nextState.bitContainer); - } +#if DEBUG + *tsvp = tsv; +#endif }
--- a/regexParser/grepWalk.h Wed Feb 03 12:24:34 2016 +0900 +++ b/regexParser/grepWalk.h Wed Feb 03 13:11:10 2016 +0900 @@ -1,3 +1,3 @@ #include "regexParser.h" -extern void grepWalk(TransitionGeneratorPtr tg, unsigned char *matchBegin, Buffer buff); +extern void grepWalk(TransitionGeneratorPtr tg, Buffer buff);
--- a/regexParser/threadedSearch.cc Wed Feb 03 12:24:34 2016 +0900 +++ b/regexParser/threadedSearch.cc Wed Feb 03 13:11:10 2016 +0900 @@ -12,7 +12,6 @@ static TSValue stateSkip(TSValue tsv) { - tsv.current = tsv.tg->stateStart->tState; if (tsv.matchEnd) { fwrite(tsv.matchBegin,tsv.matchEnd-tsv.matchBegin,1,stdout); puts(""); @@ -100,6 +99,7 @@ for (int i = 0; i < tsv.current->ccvSize; i++) { CCVPtr ccv = &tsv.current->ccv[i]; if (c<ccv->begin) { + tsv.current = tsv.tg->stateStart->tState; tsv = tsv.current->stateSkip(tsv); goto next; } else if (c<=ccv->end) { @@ -117,6 +117,7 @@ goto next; } } + tsv.current = tsv.tg->stateStart->tState; tsv = tsv.current->stateSkip(tsv); } #if DEBUG