Mercurial > hg > Applications > Grep
view regexParser/subsetConstraction.cc @ 213:11b6332f0a42
fix tgv.tg->totalStateCount increment
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 28 Dec 2015 19:02:14 +0900 (2015-12-28) |
parents | b0ae5273925c |
children | a94f57af1600 |
line wrap: on
line source
#include <stdio.h> #include <stdlib.h> #include <ctype.h> #include "regexParser.h" #include "subsetConstraction.h" #include "node.h" #include "BitVector.h" #include "error.h" CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right) { CharClassPtr cc = NEW(CharClass); cc->type = 'r'; cc->cond.range.begin = begin; cc->cond.range.end = end; cc->cond.range.next = NULL; cc->left = left; cc->right = right; cc->nextState.bitContainer = state; return cc; } CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,BitVector nextState) { CharClassPtr cc1; if (cc) { cc1 = charClassMerge(cc,mBegin,mEnd,nextState); } else { cc1 = createCharClassRange(mBegin,mEnd,nextState.bitContainer,NULL,NULL); } return cc1; } CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) { // 重なっているccの領域を分割する // 必要ならばnextStateを重ねあわせる // 変更があった場合は新しくリストを作って返す if (end < cc->cond.range.begin ) { // 1 if (cc->left) { return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,nextState.bitContainer,charClassMerge(cc->left,begin,end,nextState),cc->right); } else { return createCharClassRange(begin,end,nextState.bitContainer,NULL,cc); } } else if (end == cc->cond.range.begin && begin != end ) { // 2 CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,nextState); if (cc->cond.range.begin == cc->cond.range.end) { return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, cc->nextState.bitContainer | nextState.bitContainer, cc1,cc->right); } CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); return createCharClassRange(cc->cond.range.begin,cc->cond.range.begin, cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); } else if (end < cc->cond.range.end) { // range.begin < end if (begin < cc->cond.range.begin) { // 3 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); } if (begin == cc->cond.range.begin) { // 6 CharClassPtr cc2 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc2); } // 9 CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc2,cc3); } else if (end == cc->cond.range.end) { if (begin == cc->cond.range.begin) { // 7 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc->right); } else if (begin < cc->cond.range.begin) { // 4 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc->right); } // 10 cond.range.begin < begin CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,NULL,cc->right); return createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,cc2); } if (begin > cc->cond.range.end ) { // 13 if (cc->right) { return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,nextState.bitContainer,cc->left,charClassMerge(cc->right,begin,end,nextState)); } else { return createCharClassRange(begin,end,nextState.bitContainer,cc,NULL); } } if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { if (end > cc->cond.range.end) { // cond.range.end < end if (begin == cc->cond.range.begin) { // 8 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc1); } if (begin > cc->cond.range.begin) { // 11,12 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); return createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc3,cc1); } } } else if (begin < cc->cond.range.begin) { // 5 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); } else { printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); } return cc; } void findLeftMost(CharClassPtr next,CharClassWalkerPtr walk) { while (next->left) { CharClassStackPtr ccs = NEW(CharClassStack); ccs->next = walk->stack; ccs->turn = walk->turn; walk->turn = LEFT; ccs->cc = next; walk->stack = ccs; next = next->left; } walk->turn = SELF; walk->next = next; } CharClassWalkerPtr createCharClassWalker (CharClassPtr next) { CharClassWalkerPtr walk = NEW(CharClassWalker); walk->next = NULL; walk->stack = NULL; if (!next) return walk; findLeftMost(next,walk); return walk; } bool hasNext(CharClassWalkerPtr walk) { return walk->next != NULL; } CharClassStackPtr charClassStackPop(CharClassWalkerPtr walk) { CharClassStackPtr prev = walk->stack->next; walk->turn = walk->stack->turn; free(walk->stack); walk->stack = prev; return prev; } CharClassPtr getNext(CharClassWalkerPtr walk) { CharClassPtr current = walk->next; walk->next = NULL; if (walk->turn == SELF) { if (current->right) { walk->turn = RIGHT; findLeftMost(current->right,walk); return current; } } while (walk->stack) { walk->next = walk->stack->cc; charClassStackPop(walk); if (walk->turn == LEFT) { walk->turn = SELF; return current; } } walk->next = NULL; return current; } void setState(CharClassPtr cc, BitVector bi) { cc->nextState = bi; if (cc->left) { setState(cc->left,bi); } cc->nextState = bi; if (cc->right) { setState(cc->right,bi); } } CharClassPtr mergeTransition(StatePtr x,CharClassPtr y) { if (x->cc == NULL) { return y; } CharClassWalkerPtr walk = createCharClassWalker(x->cc); CharClassPtr ccy = y; BitVector bi; while (hasNext(walk)) { CharClassPtr cc = getNext(walk); unsigned long begin = cc->cond.range.begin; unsigned long end = cc->cond.range.end; bi = cc->nextState; ccy = charClassMerge(ccy,begin,end,bi); } free(walk); return ccy; } /** 作成する state を linked list bitvector を index とした配列に BitVectorPtr を格納 state に対応する NodePtr を */ StatePtr createCCState(CharClassPtr cc,TGValue tgv) { StatePtr s = NEW(State); s->stateNum = cc->stateNum = ++tgv.tg->totalStateCount; s->next = tgv.tg->stateList; tgv.tg->stateList = s; BitVector bi = createBitVector(cc->stateNum); s->bitState = bi; s->cc = cc; return s; } CharClassPtr allocateCCState(CharClassPtr cc, TGValue tgv) { if (cc->left) { allocateCCState(cc->left,tgv); } cc->state = createCCState(cc,tgv); if (cc->right) { allocateCCState(cc->right,tgv); } return cc; } StatePtr createState(TGValue tgv,NodePtr n) { StatePtr s = NEW(State); s->stateNum = n->stateNum = tgv.tg->totalStateCount; s->next = tgv.tg->stateList; tgv.tg->stateList = s; s->node = n; BitVector bi = createBitVector(n->stateNum); s->bitState = bi; if (n->cc) { allocateCCState(n->cc,tgv); } else { tgv.tg->totalStateCount++; } s->cc = n->cc; return s; } /** 正規表現に必要な状態を探して、それぞれに番号を割り振る 前が * でない + は新しく状態を作る * があったら、次の状態はその時の先頭の状態になる */ TGValue stateAllocate(NodePtr n,TGValue tgv) { if (n->tokenType == '+') { TGValue tgvLeft = stateAllocate(n->left,tgv); if (tgvLeft.asterisk) { TGValue tgvRight = tgvLeft; tgvRight.asterisk = false; tgvRight = stateAllocate(n->right,tgvRight); tgvRight.asterisk = true; return tgvRight; } TGValue tgvRight = tgvLeft; n->right->state = createState(tgvRight,n->right); tgvRight.startState = n->right->state; stateAllocate(n->right,tgvRight); return tgvLeft; } else if (n->tokenType == '|') { TGValue tgv1 = stateAllocate(n->left,tgv); TGValue tgv2 = stateAllocate(n->right,tgv1); return tgv2; } else if (n->tokenType == '*') { TGValue tgvAstah = tgv; tgvAstah.endState = tgvAstah.startState; tgvAstah = stateAllocate(n->left,tgvAstah); tgvAstah.asterisk = true; return tgvAstah; } else if (n->tokenType == 'c' || n->tokenType == 'a'){ TGValue tgv1 = tgv; tgv1.asterisk = false; n->stateNum = tgv.startState->stateNum; n->nextStateNum = tgv.endState->stateNum; n->state = tgv.startState;; n->nextState = tgv.endState; return tgv1; } else { return tgv; } } /** 割り当てられた状態に沿って charclass の行き先を書き換える 書き換えた charclass を merge する 前の部分に * がない + は新しい状態をつくるので、state を切り替えて生成する */ TGValue generateTransition(NodePtr n,TGValue tgv) { if (n->tokenType == '+') { TGValue tgvLeft = generateTransition(n->left,tgv); if (tgvLeft.asterisk) { TGValue tgvRight = tgvLeft; tgvRight.asterisk = false; tgvRight = generateTransition(n->right,tgvRight); tgvRight.asterisk = true; return tgvRight; } StatePtr left = tgvLeft.startState; tgvLeft.startState = n->right->state; tgvLeft.tg->stateArray[tgvLeft.startState->bitState.bitContainer] = left; TGValue tgv1 = generateTransition(n->right,tgvLeft); tgv1.startState = left; return tgv1; } else if (n->tokenType == '|') { TGValue tgv1 = generateTransition(n->left,tgv); TGValue tgv2 = generateTransition(n->right,tgv1); return tgv2; } else if (n->tokenType == '*') { TGValue tgvAstah = generateTransition(n->left,tgv); tgvAstah.asterisk = true; return tgvAstah; } else if (n->tokenType == 'c' || n->tokenType == 'a'){ TGValue tgv1 = tgv; tgv1.asterisk = false; BitVector bi = createBitVector(n->nextStateNum); setState(n->cc,bi); tgv1.startState->cc = mergeTransition(tgv1.startState,n->cc); return tgv1; } else { return tgv; } } TransitionGeneratorPtr createTransitionGenerator() { TransitionGeneratorPtr tg = NEW(TransitionGenerator); tg->totalStateCount = 0; tg->stack = NULL; tg->stateArray = NULL; tg->stateList = NULL; return tg; } TGValue createTGValue() { TGValue tgv; tgv.startState = NULL; tgv.endState = NULL; tgv.asterisk = false; tgv.tg = createTransitionGenerator(); return tgv; } TransitionGeneratorPtr generateTransitionList(NodePtr n) { TGValue tgv = createTGValue(); StatePtr startState = tgv.startState = createState(tgv,n); NodePtr eof = createNode(NULL,'e',NULL,NULL,NULL); StatePtr endState = tgv.endState = createState(tgv,eof); tgv = stateAllocate(n,tgv); if (tgv.tg->totalStateCount > BITBLOCK) { errorMassege("StateMax > BITBLOCK",__LINE__,__FILE__); } BitVector bi = createBitVector(tgv.tg->totalStateCount); tgv.tg->stateArray = (StatePtr*)calloc(bi.bitContainer*2,sizeof(StatePtr*)); tgv.tg->stateArray[startState->bitState.bitContainer] = startState; tgv.tg->stateArray[endState->bitState.bitContainer] = endState; generateTransition(n,tgv); return tgv.tg; } void printState(StatePtr state) { printf("state : %lx\n",state->bitState.bitContainer); long nodeNumber = 0; if (state->node) { printf("node : %c %lx -> %d\n",state->node->tokenType,state->bitState.bitContainer,state->node->nextStateNum); if (state->node->state) nodeNumber = state->node->state->bitState.bitContainer; } if (state->cc) { printCharacterClass(state->cc,nodeNumber,4); } } void printState(TransitionGeneratorPtr tg) { StatePtr state = tg->stateList; for (;state;state = state->next) { printState(state); putchar('\n'); } } SCValue createSCValue(TGValue tgv) { SCValue scv; scv.stateTop = tgv.tg->stateList; scv.stateEnd = scv.stateTop; while (scv.stateEnd->next) { scv.stateEnd = scv.stateEnd->next; } return scv; } SCValue createState(SCValue scv,BitVector bi) { StatePtr s = NEW(State); s->stateNum = ++scv.tg->totalStateCount; s->next = NULL; scv.stateEnd->next = s; scv.stateEnd = s; s->bitState = bi; s->cc = NULL; return scv; } /** 現在のステートに含まれる組み合わせ状態をとってくる 組み合わされた個々の charclass をmerge して新しい charclass をつくり、組み合わせ状態に登録する 生成した状態は stateArray に格納するA 新しい状態ができなくなったら終了 charClassMerge の段階で新しい状態をリストに登録したら、charclasswalk をする必要がなくなる */ SCValue subsetConstraction(SCValue scv) { for (;scv.stateTop;scv.stateTop = scv.stateTop->next) { CharClassWalkerPtr cw = createCharClassWalker(scv.stateTop->cc); while (hasNext(cw)) { CharClassPtr cc = getNext(cw); BitVector bi = cc->nextState; if (scv.stateArray[bi.bitContainer]) continue; scv = createState(scv,bi); StatePtr s = scv.stateEnd; for (;bi.bitContainer;) { int bitPosition = searchBit(bi); unsigned long baseNum = 1 << bitPosition; bi.bitContainer ^= baseNum; StatePtr base = scv.stateArray[baseNum]; if (base == NULL) {// error continue; } CharClassPtr merge = mergeTransition(s,base->cc); s->cc = merge; } scv.stateArray[bi.bitContainer] = s; } } return scv; }