Mercurial > hg > Applications > Grep
view regexParser/subsetConstruction.cc @ 224:474fc9f844db
fix exportState()
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 15 Jan 2016 16:12:19 +0900 |
parents | e430b7d0b33d |
children | 0c28ff35b4f0 |
line wrap: on
line source
#include <stdio.h> #include <stdlib.h> #include <ctype.h> #include "regexParser.h" #include "subsetConstruction.h" #include "node.h" #include "BitVector.h" #include "error.h" #define SIZE 256 CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right) { CharClassPtr cc = NEW(CharClass); cc->type = 'r'; cc->cond.range.begin = begin; cc->cond.range.end = end; cc->cond.range.next = NULL; cc->left = left; cc->right = right; cc->nextState.bitContainer = state; return cc; } CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,BitVector nextState) { CharClassPtr cc1; if (cc) { cc1 = charClassMerge(cc,mBegin,mEnd,nextState); } else { cc1 = createCharClassRange(mBegin,mEnd,nextState.bitContainer,NULL,NULL); } return cc1; } CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) { // 重なっているccの領域を分割する // 必要ならばnextStateを重ねあわせる // 変更があった場合は新しくリストを作って返す if (end < cc->cond.range.begin ) { // 1 if (cc->left) { return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,nextState.bitContainer,charClassMerge(cc->left,begin,end,nextState),cc->right); } else { return createCharClassRange(begin,end,nextState.bitContainer,NULL,cc); } } else if (end == cc->cond.range.begin && begin != end ) { // 2 CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,nextState); if (cc->cond.range.begin == cc->cond.range.end) { return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, cc->nextState.bitContainer | nextState.bitContainer, cc1,cc->right); } CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); return createCharClassRange(cc->cond.range.begin,cc->cond.range.begin, cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); } else if (end < cc->cond.range.end) { // range.begin < end if (begin < cc->cond.range.begin) { // 3 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); } if (begin == cc->cond.range.begin) { // 6 CharClassPtr cc2 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc2); } // 9 CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc2,cc3); } else if (end == cc->cond.range.end) { if (begin == cc->cond.range.begin) { // 7 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc->right); } else if (begin < cc->cond.range.begin) { // 4 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc->right); } // 10 cond.range.begin < begin CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,NULL,cc->right); return createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,cc2); } if (begin > cc->cond.range.end ) { // 13 if (cc->right) { return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,nextState.bitContainer,cc->left,charClassMerge(cc->right,begin,end,nextState)); } else { return createCharClassRange(begin,end,nextState.bitContainer,cc,NULL); } } if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { if (end > cc->cond.range.end) { // cond.range.end < end if (begin == cc->cond.range.begin) { // 8 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc1); } if (begin > cc->cond.range.begin) { // 11,12 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); return createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc3,cc1); } } } else if (begin < cc->cond.range.begin) { // 5 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); } else { printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); } return cc; } void findLeftMost(CharClassPtr next,CharClassWalkerPtr walk) { while (next->left) { CharClassStackPtr ccs = NEW(CharClassStack); ccs->next = walk->stack; ccs->turn = walk->turn; walk->turn = LEFT; ccs->cc = next; walk->stack = ccs; next = next->left; } walk->turn = SELF; walk->next = next; } CharClassWalkerPtr createCharClassWalker (CharClassPtr next) { CharClassWalkerPtr walk = NEW(CharClassWalker); walk->next = NULL; walk->stack = NULL; if (!next) return walk; findLeftMost(next,walk); return walk; } bool hasNext(CharClassWalkerPtr walk) { return walk->next != NULL; } CharClassStackPtr charClassStackPop(CharClassWalkerPtr walk) { CharClassStackPtr prev = walk->stack->next; walk->turn = walk->stack->turn; free(walk->stack); walk->stack = prev; return prev; } CharClassPtr getNext(CharClassWalkerPtr walk) { CharClassPtr current = walk->next; walk->next = NULL; if (walk->turn == SELF) { if (current->right) { walk->turn = RIGHT; findLeftMost(current->right,walk); return current; } } while (walk->stack) { walk->next = walk->stack->cc; charClassStackPop(walk); if (walk->turn == LEFT) { walk->turn = SELF; return current; } } walk->next = NULL; return current; } void setState(CharClassPtr cc, BitVector bi) { cc->nextState = bi; if (cc->left) { setState(cc->left,bi); } cc->nextState = bi; if (cc->right) { setState(cc->right,bi); } } CharClassPtr mergeTransition(StatePtr x,CharClassPtr y) { if (x->cc == NULL) { return y; } CharClassWalkerPtr walk = createCharClassWalker(x->cc); CharClassPtr ccy = y; BitVector bi; while (hasNext(walk)) { CharClassPtr cc = getNext(walk); unsigned long begin = cc->cond.range.begin; unsigned long end = cc->cond.range.end; bi = cc->nextState; ccy = charClassMerge(ccy,begin,end,bi); } free(walk); return ccy; } /** 作成する state を linked list bitvector を index とした配列に BitVectorPtr を格納 state に対応する NodePtr を */ StatePtr createState(TGValue tgv,NodePtr n) { StatePtr s = NEW(State); s->stateNum = n->stateNum = tgv.tg->totalStateCount; s->next = tgv.tg->stateList; tgv.tg->stateList = s; s->node = n; BitVector bi = createBitVector(n->stateNum); s->bitState = bi; tgv.tg->totalStateCount++; s->cc = n->cc; return s; } /** pass 1 : 正規表現に必要な状態を探して、それぞれに番号を割り振る 前が * でない + は新しく状態を作る * があったら、次の状態はその時の先頭の状態になる 常に先頭の状態を返す 最後が*ならば、それを持ち歩く pass 2: 割り当てられた状態に沿って charclass の行き先を書き換える 書き換えた charclass を merge する 前の部分に * がない + は新しい状態をつくるので、state を切り替えて生成する */ TGValue generateTransition(NodePtr n,TGValue tgv, int pass) { if (n->tokenType == '+') { TGValue tgvLeft = tgv; tgvLeft.endState = n->right->state; tgvLeft.asterisk = NULL; tgvLeft = generateTransition(n->left,tgvLeft,pass); TGValue tgvRight = tgv; if (tgvLeft.asterisk) { n->right->state = tgv.endState; tgvRight.startState = tgvLeft.asterisk; tgvRight = generateTransition(n->right,tgvRight,pass); tgvLeft.asterisk = tgvRight.asterisk; return tgvLeft; } tgvRight.asterisk = NULL; if (pass==1) { n->right->state = tgvRight.startState = createState(tgvRight,n->right); } else { tgvRight.startState = n->right->state; tgvRight.tg->stateArray[tgvRight.startState->bitState.bitContainer] = tgvRight.startState ; } tgvRight = generateTransition(n->right,tgvRight,pass); tgvLeft.asterisk = tgvRight.asterisk; return tgvLeft; } else if (n->tokenType == '|') { TGValue tgv1 = generateTransition(n->left,tgv,pass); TGValue tgv2 = generateTransition(n->right,tgv1,pass); return tgv2; } else if (n->tokenType == '*') { TGValue tgvAstah = tgv; tgvAstah.endState = tgvAstah.startState; tgvAstah = generateTransition(n->left,tgvAstah,pass); tgvAstah.asterisk = tgvAstah.startState; return tgvAstah; } else if (n->tokenType == 'c' || n->tokenType == 'a'){ TGValue tgv1 = tgv; if (pass==1) { n->stateNum = tgv.startState->stateNum; n->state = tgv.startState; } else { int nextState = tgv.endState->stateNum; n->nextStateNum = nextState; n->nextState = tgv.endState; BitVector bi = createBitVector(nextState); setState(n->cc,bi); tgv1.startState->cc = mergeTransition(tgv1.startState,n->cc); } return tgv1; } else { return tgv; } } TransitionGeneratorPtr createTransitionGenerator() { TransitionGeneratorPtr tg = NEW(TransitionGenerator); tg->totalStateCount = 0; tg->stack = NULL; tg->stateArray = NULL; tg->stateList = NULL; return tg; } TGValue createTGValue() { TGValue tgv; tgv.startState = NULL; tgv.endState = NULL; tgv.asterisk = NULL; tgv.tg = createTransitionGenerator(); return tgv; } TGValue generateTransitionList(NodePtr n) { TGValue tgv = createTGValue(); StatePtr startState = tgv.startState = createState(tgv,n); NodePtr eof = createNode(NULL,'e',NULL,NULL,NULL); StatePtr endState = tgv.endState = createState(tgv,eof); tgv.startState = startState; tgv.endState = endState; tgv = generateTransition(n,tgv,1); printTree(n); if (tgv.tg->totalStateCount > BITBLOCK) { errorMassege("StateMax > BITBLOCK",__LINE__,__FILE__); } BitVector bi = createBitVector(tgv.tg->totalStateCount); tgv.tg->stateArray = (StatePtr*)calloc(bi.bitContainer*2,sizeof(StatePtr*)); tgv.tg->stateArray[startState->bitState.bitContainer] = startState; tgv.tg->stateArray[endState->bitState.bitContainer] = endState; tgv.startState = startState; tgv.endState = endState; tgv = generateTransition(n,tgv,2); return tgv; } void exportState(TransitionGeneratorPtr tg) { StatePtr state = tg->stateList; FILE *fp = fopen("state.cc","w"); char s[SIZE]; fputs("unsigned char *buff, *buffptr, buffend;\n",fp); for (;state;state = state->next) { sprintf(s,"void state%lx() {\n",state->bitState.bitContainer); fputs(s,fp); if (state->bitState.bitContainer == 2) { // Accept fputs(" // Accept\n",fp); } else { // not Accept fputs(" unsigned char c = *buffptr++;\n",fp); CharClassWalkerPtr ccw = createCharClassWalker(state->cc); bool flag = true; while (hasNext(ccw)) { CharClassPtr cc = getNext(ccw); unsigned long begin = cc->cond.range.begin; unsigned long end = cc->cond.range.end; BitVector bi = cc->nextState; if (flag) { flag = false; } else { fputs(" else ",fp); } if (begin == end) { sprintf(s," if (c=='%c') state%lu();\n",(unsigned char)begin, bi.bitContainer); fputs(s,fp); } else { sprintf(s," if (c<'%c') state1();\n",(unsigned char)begin); fputs(s,fp); sprintf(s," else if (c<='%c') state%lu();\n",(unsigned char)end, bi.bitContainer); fputs(s,fp); } } sprintf(s," else state1();\n"); fputs(s,fp); } fputs("}\n\n",fp); } fclose(fp); } void printState(StatePtr state) { printf("state : %lx\n",state->bitState.bitContainer); long nodeNumber = 0; if (state->node) { printf("node : %c %lx -> %d\n",state->node->tokenType,state->bitState.bitContainer,state->node->nextStateNum); if (state->node->state) nodeNumber = state->node->state->bitState.bitContainer; } if (state->cc) { printCharacterClass(state->cc,nodeNumber,4); } } void printState(TransitionGeneratorPtr tg) { StatePtr state = tg->stateList; for (;state;state = state->next) { printState(state); putchar('\n'); } } SCValue createSCValue(TGValue tgv) { SCValue scv; scv.tg = tgv.tg; scv.stateTop = tgv.tg->stateList; scv.stateEnd = scv.stateTop; while (scv.stateEnd->next) { scv.stateEnd = scv.stateEnd->next; } return scv; } SCValue createState(SCValue scv,BitVector bi) { StatePtr s = NEW(State); s->stateNum = ++scv.tg->totalStateCount; s->next = NULL; scv.stateEnd->next = s; scv.stateEnd = s; s->bitState = bi; s->cc = NULL; return scv; } /** 現在のステートに含まれる組み合わせ状態をとってくる 組み合わされた個々の charclass をmerge して新しい charclass をつくり、組み合わせ状態に登録する 生成した状態は stateArray に格納するA 新しい状態ができなくなったら終了 charClassMerge の段階で新しい状態をリストに登録したら、charclasswalk をする必要がなくなる */ SCValue subsetConstruction(SCValue scv) { for (;scv.stateTop;scv.stateTop = scv.stateTop->next) { CharClassWalkerPtr cw = createCharClassWalker(scv.stateTop->cc); while (hasNext(cw)) { CharClassPtr cc = getNext(cw); BitVector bi = cc->nextState; if (scv.tg->stateArray[bi.bitContainer]) continue; // already done scv = createState(scv,bi); StatePtr s = scv.stateEnd; scv.tg->stateArray[bi.bitContainer] = s; for (;;) { int bitPosition = searchBit(bi); if (!bitPosition) break; unsigned long baseNum = 1 << (bitPosition-1); // printf("bit %lx pos %d baseNum %lx\n",bi.bitContainer,bitPosition,baseNum); bi.bitContainer ^= baseNum; if (baseNum==2) continue; // EOF case StatePtr base = scv.tg->stateArray[baseNum]; if (base == NULL) { errorMassege("No base state",__LINE__,__FILE__); break; } CharClassPtr merge = mergeTransition(s,base->cc); s->cc = merge; } } } return scv; }