Mercurial > hg > Applications > Grep
changeset 321:a1b65d39b947
bmSearch fix
author | mir3636 |
---|---|
date | Mon, 16 May 2016 17:03:17 +0900 |
parents | da02a7258d54 |
children | 62f4628d2c0d |
files | regexParser/CeriumGrep.cc regexParser/regexParser.cc regexParser/threadedSearch.cc |
diffstat | 3 files changed, 18 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/regexParser/CeriumGrep.cc Sun May 08 23:31:14 2016 +0900 +++ b/regexParser/CeriumGrep.cc Mon May 16 17:03:17 2016 +0900 @@ -24,6 +24,7 @@ RegexInfo ri; ri.stateNumber = 1; ri.wordMode = true; + ri.maxWordLen = 0; for (int i = 1; i < argc; i++) { if (strcmp(argv[i],"-regex") == 0) { ri.ptr = (unsigned char*)argv[i+1]; i++; @@ -68,6 +69,8 @@ } } + tgv.tg->maxWordLen = ri.maxWordLen; + if (filename != NULL && !parallel) { int fd = 0; st_mmap_t st_mmap = createSt_mmap(filename,fd);
--- a/regexParser/regexParser.cc Sun May 08 23:31:14 2016 +0900 +++ b/regexParser/regexParser.cc Mon May 16 17:03:17 2016 +0900 @@ -77,8 +77,12 @@ token(ri); } cc->cond.w.length = ri->ptr - word - 1; + if (ri->tokenType == '\0') + cc->cond.w.length++; if (cc->cond.w.length > 1) cc->cond.w.word = word; + if (ri->maxWordLen < cc->cond.w.length) + ri->maxWordLen = cc->cond.w.length; } else token(ri); NodePtr n = createNode(ri,'a',cc,0,0);
--- a/regexParser/threadedSearch.cc Sun May 08 23:31:14 2016 +0900 +++ b/regexParser/threadedSearch.cc Mon May 16 17:03:17 2016 +0900 @@ -6,6 +6,7 @@ #include "CharClass.h" #include "threadedSearch.h" #include "subsetConstruction.h" +#include "bmSearch.h" #define max(a,b)((a)>(b)?a:b) #define min(a,b)((a)<(b)?a:b) @@ -26,11 +27,12 @@ } tsv.matchBegin = tsv.buff.buffptr; // next char may be matchBegin // if possible use bmsearch + if (!tsv.current || !tsv.current->ccv[0].w.bm ) return tsv; while (tsv.buff.buffptr < tsv.buff.buffend) { long skip = tsv.tg->maxWordLen; for (int k = 0; k < tsv.current->ccvSize; k++) { CCVPtr ccv = &tsv.current->ccv[k]; - if (ccv->w.word) { + if (ccv->w.bm) { int i = ccv->w.length - 1; while (tsv.buff.buffptr[i] == ccv->w.word[i]) { if (i == 0) { @@ -40,7 +42,8 @@ tsv.current = nextTState(ccv->state,tsv.tg); ccv->tState = tsv.current; } - tsv.buff.buffptr += ccv->w.length - 1; + tsv.matchBegin = tsv.buff.buffptr; + tsv.buff.buffptr += ccv->w.length; return tsv; } --i; @@ -50,6 +53,7 @@ } tsv.buff.buffptr += skip; } + tsv.matchBegin = tsv.buff.buffptr; return tsv; } @@ -179,17 +183,20 @@ tsv.tg->stateSkip = stateSkip; tsv.tg->stateMatch = stateMatch; tsv.tg->stateNothing = stateNothing; + tsv.current = NULL; return tsv; } void threadedSearch(TransitionGeneratorPtr tg, Buffer buff) { TSValue tsv = createTSValue(tg,buff); - tsv.current = generateTState(tg->stateList,tg); + generateTState(tg->stateList,tg); tg->stateStart = NEW(State); *tg->stateStart = *tg->stateList; tg->stateStart->accept = false; // Start state never accept - generateTState(tg->stateStart,tg); + StatePtr state = tg->stateStart; + checkBMSearch(state->cc); + tsv.current = generateTState(tg->stateStart,tg); tSearch(tsv); }