Mercurial > hg > Applications > Grep
view regexParser/subsetConstruction.cc @ 311:1d79e61a9365
CbC state generator is not work on -O2 but -O1
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 08 Feb 2016 22:12:14 +0900 |
parents | 1188debbef10 |
children |
line wrap: on
line source
#include <stdio.h> #include <stdlib.h> #include <ctype.h> #include "regexParser.h" #include "subsetConstruction.h" #include "node.h" #include "bitVector.h" #include "CharClass.h" #include "error.h" /** 作成する state を linked list bitvector を index とした配列に BitVectorPtr を格納 state に対応する NodePtr を */ StatePtr createState(TransitionGeneratorPtr tg,BitVector bi) { StatePtr s = NEW(State); s->stateNum = tg->totalStateCount++; s->next = NULL; tg->stateEnd->next = s; tg->stateEnd = s; s->bitState = bi; s->cc = NULL; s->node = NULL; s->tState = NULL; return s; } StatePtr createState(TGValue tgv,NodePtr n) { BitVector bi = createBitVector(tgv.tg->totalStateCount); StatePtr s = createState(tgv.tg,bi); n->stateNum = s->stateNum; s->node = n; s->bitState = bi; s->accept = false; s->cc = n->cc; return s; } /** pass 1 : 正規表現に必要な状態を探して、それぞれに番号を割り振る 前が * でない + は新しく状態を作る * があったら、次の状態はその時の先頭の状態か次の状態が終了状態ならば終了状態との組み合わせになる 常に先頭の状態を返す 最後が*ならば、それを持ち歩く pass 2: 割り当てられた状態に沿って charclass の行き先を書き換える 書き換えた charclass を merge する 前の部分に * がない + は新しい状態をつくるので、state を切り替えて生成する */ TGValue generateTransition(NodePtr n,TGValue tgv, int pass) { if (n->tokenType == '+') { TGValue tgvLeft = tgv; tgvLeft.endState = n->right->state; tgvLeft.asterisk = NULL; tgvLeft = generateTransition(n->left,tgvLeft,pass); TGValue tgvRight = tgv; if (tgvLeft.asterisk) { n->right->state = tgv.endState; tgvRight.startState = tgvLeft.asterisk; tgvRight = generateTransition(n->right,tgvRight,pass); tgvLeft.asterisk = tgvRight.asterisk; return tgvLeft; } tgvRight.asterisk = NULL; if (pass==1) { n->right->state = tgvRight.startState = createState(tgvRight,n->right); } else { tgvRight.startState = n->right->state; tgvRight.tg->stateArray[tgvRight.startState->bitState.bitContainer] = tgvRight.startState ; } tgvRight = generateTransition(n->right,tgvRight,pass); if (tgv.endState && tgvRight.asterisk) tgvRight.startState->accept = tgv.endState->accept; tgvLeft.asterisk = tgvRight.asterisk; return tgvLeft; } else if (n->tokenType == '|') { TGValue tgv1 = generateTransition(n->left,tgv,pass); tgv1.endState = tgv.endState; TGValue tgv2 = generateTransition(n->right,tgv1,pass); return tgv2; } else if (n->tokenType == '*') { TGValue tgvAstah = tgv; tgvAstah.endState = tgvAstah.startState; if (pass==2) tgvAstah.endState->accept = tgv.endState->accept; tgvAstah = generateTransition(n->left,tgvAstah,pass); tgvAstah.asterisk = tgvAstah.startState; return tgvAstah; } else if (n->tokenType == 'c' || n->tokenType == 'a'){ TGValue tgv1 = tgv; if (pass==1) { n->stateNum = tgv.startState->stateNum; n->state = tgv.startState; } else { int nextState = tgv.endState->stateNum; n->nextStateNum = nextState; n->nextState = tgv.endState; BitVector bi = createBitVector(nextState); if (n->nextState->accept) bi = bitSet(bi,1); setState(n->cc,bi); tgv1.startState->cc = mergeTransition(tgv1.startState,n->cc); } return tgv1; } else { return tgv; } } TransitionGeneratorPtr createTransitionGenerator() { TransitionGeneratorPtr tg = NEW(TransitionGenerator); tg->totalStateCount = 0; tg->stack = NULL; tg->stateArray = NULL; tg->stateList = NULL; return tg; } TGValue createTGValue() { TGValue tgv; tgv.startState = NULL; tgv.endState = NULL; tgv.asterisk = NULL; tgv.tg = createTransitionGenerator(); return tgv; } TGValue generateTransitionList(NodePtr n) { TGValue tgv = createTGValue(); TransitionGeneratorPtr tg = tgv.tg; State dummy; tg->stateEnd = &dummy; StatePtr startState = tgv.startState = createState(tgv,n); tg->stateList = startState; NodePtr eof = createNode(NULL,'e',NULL,NULL,NULL); StatePtr endState = tgv.endState = createState(tgv,eof); endState->accept = true; tgv.startState = startState; tgv.endState = endState; tgv = generateTransition(n,tgv,1); printTree(n); if (tg->totalStateCount > BITBLOCK) { errorMassege("StateMax > BITBLOCK",__LINE__,__FILE__); } BitVector bi = createBitVector(tg->totalStateCount); tg->stateArray = (StatePtr*)calloc(bi.bitContainer*2,sizeof(StatePtr*)); tg->stateArray[startState->bitState.bitContainer] = startState; tg->stateArray[endState->bitState.bitContainer] = endState; tg->totalBasicState = tg->totalStateCount; tgv.startState = startState; tgv.endState = endState; tgv = generateTransition(n,tgv,2); return tgv; } void createAnyState(TransitionGeneratorPtr tg) { BitVector anyBi = createBitVector(tg->totalBasicState); anyBi.bitContainer = anyBi.bitContainer - 1; // all bit 1 state anyBi.bitContainer ^= 2; // exclude Accept State tg->anyState = createState(tg,anyBi); determinize(tg->anyState,tg); tg->stateArray[tg->anyState->bitState.bitContainer] = tg->anyState; } void printState(StatePtr state) { printf("state : %lx%c\n",state->bitState.bitContainer,state->accept?'*':' '); long nodeNumber = 0; if (state->node) { BitVector bi = createBitVector(state->node->nextStateNum); printf("node : %c %lx -> %lx\n",state->node->tokenType,state->bitState.bitContainer,bi.bitContainer); if (state->node->state) nodeNumber = state->node->state->bitState.bitContainer; } if (state->cc) { printCharacterClass(state->cc,nodeNumber,4); } } void printState(TransitionGeneratorPtr tg) { StatePtr state = tg->stateList; for (;state;state = state->next) { printState(state); putchar('\n'); } } /** 現在のステートに含まれる組み合わせ状態をとってくる 組み合わされた個々の charclass をmerge して新しい charclass をつくり、組み合わせ状態に登録する 生成した状態は stateArray に格納するA 新しい状態ができなくなったら終了 charClassMerge の段階で新しい状態をリストに登録したら、charclasswalk をする必要がなくなる */ void determinize(StatePtr s, TransitionGeneratorPtr tg) { BitVector bi = s->bitState; for (;;) { int bitPosition = searchBit(bi); if (!bitPosition) break; unsigned long baseNum = 1 << (bitPosition-1); // printf("bit %lx pos %d baseNum %lx\n",bi.bitContainer,bitPosition,baseNum); bi.bitContainer ^= baseNum; if (baseNum==2) { s->accept = true; continue; // EOF case } StatePtr base = tg->stateArray[baseNum]; if (base == NULL) { fprintf(stderr,"baseNum=%lx ",baseNum); errorMassege("No base state",__LINE__,__FILE__); break; } CharClassPtr merge = mergeTransition(s,base->cc); s->cc = merge; } } void subsetConstruction(TransitionGeneratorPtr tg) { for (StatePtr st = tg->stateList;st;st = st->next) { CharClassWalkerPtr cw = createCharClassWalker(st->cc); while (hasNext(cw)) { CharClassPtr cc = getNext(cw); BitVector bi = cc->nextState; if (tg->stateArray[bi.bitContainer]) continue; // already done StatePtr s = createState(tg,bi); // s is added at the end of stateList. determinize(s,tg); tg->stateArray[bi.bitContainer] = s; } free(cw); } }