Mercurial > hg > Applications > Grep
view regexParser/threadedSearch.cc @ 293:948428caf616
NFA maximum match worked
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 02 Feb 2016 10:38:45 +0900 |
parents | 868f01f1ba8e |
children | 63213964502a |
line wrap: on
line source
#include <stdio.h> #include <stdlib.h> #include "regexParser.h" #include "threadedSearch.h" #include "subsetConstruction.h" static TSValue stateNothing(TSValue tsv) { return tsv; } static TSValue stateSkip(TSValue tsv) { tsv.current = tsv.tg->stateStart->tState; if (tsv.matchEnd) { fwrite(tsv.matchBegin,tsv.matchEnd-tsv.matchBegin,1,stdout); puts(""); tsv.matchEnd = NULL; } return tsv; } static TSValue stateMatch(TSValue tsv) { tsv.matchEnd = tsv.buff.buffptr; // next char of the match return tsv; } TStatePtr generateTState(StatePtr state, TransitionGeneratorPtr tg) { TStatePtr tState = NEW(TState); tState->state = state; int ccvSize = 0; CharClassWalkerPtr ccw = createCharClassWalker(state->cc); while (hasNext(ccw)) { getNext(ccw); ccvSize++; } tState->ccvSize = ccvSize; if (state->accept) { tState->stateSkip = tg->stateSkip; tState->stateMatch = tg->stateMatch; } else { tState->stateSkip = tg->stateSkip; tState->stateMatch = tg->stateNothing; } if (ccvSize == 0) { tState->ccv = NULL; state->tState = tState; return tState; } else tState->ccv = (ccv*)malloc(sizeof(ccv)*ccvSize); ccw = createCharClassWalker(state->cc); int i = 0; while (hasNext(ccw)) { CharClassPtr cc = getNext(ccw); unsigned long begin = cc->cond.range.begin; unsigned long end = cc->cond.range.end; struct ccv *ccv = &tState->ccv[i++]; ccv->begin = begin; ccv->end = end; ccv->tState = NULL; ccv->state = cc->nextState; ccv->w = cc->cond.w; } free(ccw); state->tState = tState; return tState; } TStatePtr nextTState(BitVector bi,TransitionGeneratorPtr tg) { // create tSearch in next state. StatePtr state = tg->stateArray[bi.bitContainer]; if (state == NULL) { // on the fly subset construction. state = createState(tg,bi); determinize(state,tg); tg->stateArray[bi.bitContainer] = state; } if (state->tState == NULL) { generateTState(state,tg); } return state->tState; } #define DEBUG 0 TSValue tSearch(TSValue tsv) { #if DEBUG TSValuePtr tsvp = &tsv; // make tsv visible in lldb #endif next: while (tsv.buff.buffptr < tsv.buff.buffend) { unsigned char c = *tsv.buff.buffptr++; // printState(tsv.current->state); for (int i = 0; i < tsv.current->ccvSize; i++) { CCVPtr ccv = &tsv.current->ccv[i]; if (c<ccv->begin) { tsv = tsv.current->stateSkip(tsv); tsv.matchBegin = tsv.buff.buffptr; goto next; } else if (c<=ccv->end) { // range matched. if (ccv->w.word) { // match the word. // if (not match) continue; } tsv = tsv.current->stateMatch(tsv); if (ccv->tState) { tsv.current = ccv->tState; } else { tsv.current = nextTState(ccv->state,tsv.tg); ccv->tState = tsv.current; } goto next; } } tsv = tsv.current->stateSkip(tsv); tsv.matchBegin = tsv.buff.buffptr; } #if DEBUG *tsvp = tsv; return *tsvp; #else return tsv; #endif } void threadedSearch(TransitionGeneratorPtr tg, Buffer buff) { TSValue tsv; tsv.buff = buff; tsv.tg = tg; tsv.blk = NULL; tsv.tg->stateSkip = stateSkip; tsv.tg->stateMatch = stateMatch; tsv.tg->stateNothing = stateNothing; tsv.matchBegin = buff.buffptr; tsv.matchEnd = NULL; tsv.current = generateTState(tg->stateList,tg); tg->stateStart = NEW(State); *tg->stateStart = *tg->stateList; tg->stateStart->accept = false; // Start state never accept generateTState(tg->stateStart,tg); tSearch(tsv); }