Mercurial > hg > Members > masakoha > testcode
view regexParser/subsetConstraction.cc @ 195:4fefd80c05f2 pairPro
change variable name (TGValue tg -> TGValue tgv)
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sat, 26 Dec 2015 13:48:12 +0900 |
parents | ecf70fb215a5 |
children | 35608dc85e83 98ff9d94566a |
line wrap: on
line source
#include <stdio.h> #include <stdlib.h> #include <ctype.h> #include "regexParser.h" #include "subsetConstraction.h" #include "node.h" #include "BitVector.h" #include "error.h" CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right) { CharClassPtr cc = NEW(CharClass); cc->type = 'r'; cc->cond.range.begin = begin; cc->cond.range.end = end; cc->cond.range.next = NULL; cc->left = left; cc->right = right; cc->nextState.bitContainer = state; return cc; } CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,BitVector nextState) { CharClassPtr cc1; if (cc) { cc1 = charClassMerge(cc,mBegin,mEnd,nextState); } else { cc1 = createCharClassRange(mBegin,mEnd,nextState.bitContainer,NULL,NULL); } return cc1; } CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) { // 重なっているccの領域を分割する // 必要ならばnextStateを重ねあわせる // 変更があった場合は新しくリストを作って返す if (end < cc->cond.range.begin ) { // 1 if (cc->left) { return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,nextState.bitContainer,charClassMerge(cc->left,begin,end,nextState),cc->right); } else { return createCharClassRange(begin,end,nextState.bitContainer,NULL,cc); } } else if (end == cc->cond.range.begin && begin != end ) { // 2 CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,nextState); if (cc->cond.range.begin == cc->cond.range.end) { return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, cc->nextState.bitContainer | nextState.bitContainer, cc1,cc->right); } CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); return createCharClassRange(cc->cond.range.begin,cc->cond.range.begin, cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); } else if (end < cc->cond.range.end) { // range.begin < end if (begin < cc->cond.range.begin) { // 3 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); } if (begin == cc->cond.range.begin) { // 6 CharClassPtr cc2 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc2); } // 9 CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc2,cc3); } else if (end == cc->cond.range.end) { if (begin == cc->cond.range.begin) { // 7 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc->right); } else if (begin < cc->cond.range.begin) { // 4 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc->right); } // 10 cond.range.begin < begin CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,NULL,cc->right); return createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,cc2); } if (begin > cc->cond.range.end ) { // 13 if (cc->right) { return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,nextState.bitContainer,cc->left,charClassMerge(cc->right,begin,end,nextState)); } else { return createCharClassRange(begin,end,nextState.bitContainer,cc,NULL); } } if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { if (end > cc->cond.range.end) { // cond.range.end < end if (begin == cc->cond.range.begin) { // 8 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc1); } if (begin > cc->cond.range.begin) { // 11 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); return createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc3,cc1); } } // begin != end && end != cc->cond.range.end if (begin == cc->cond.range.end) { // 12 CharClassPtr cc1 = mergeCCTree(cc->right,begin+1,end,nextState); if (cc->cond.range.begin == cc->cond.range.end) { return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc->right); } CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,cc->cond.range.end-1,cc->nextState.bitContainer,cc->left,NULL); return createCharClassRange(cc->cond.range.end,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); } } else if (begin < cc->cond.range.begin) { // 5 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); } else { printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); } return cc; } void findLeftMost(CharClassPtr next,CharClassWalkerPtr walk) { while (next->left) { CharClassStackPtr ccs = NEW(CharClassStack); ccs->next = walk->stack; ccs->turn = LEFT; ccs->cc = next; walk->stack = ccs; next = next->left; } walk->next = next; } CharClassWalkerPtr createCharClassWalker (CharClassPtr next) { CharClassWalkerPtr walk = NEW(CharClassWalker); walk->next = NULL; walk->stack = NULL; if (!next) return walk; if (!next->left) { walk->next = next; return walk; } findLeftMost(next,walk); return walk; } bool hasNext(CharClassWalkerPtr walk) { return walk->next != NULL; } CharClassStackPtr charClassStackPop(CharClassWalkerPtr walk) { if (!walk->stack) { return NULL; } CharClassStackPtr prev = walk->stack->next; free(walk->stack); walk->stack = prev; return prev; } CharClassPtr getNext(CharClassWalkerPtr walk) { CharClassPtr current = walk->next; walk->next = NULL; while (walk->stack) { while (walk->stack->turn == RIGHT) { if (charClassStackPop(walk) == NULL) { return current; } } if (walk->stack->turn == LEFT) { walk->next = walk->stack->cc; walk->stack->turn = SELF; return current; } if (current->right) { walk->stack->turn = RIGHT; findLeftMost(current->right,walk); return current; } charClassStackPop(walk); } return current; } void setState(CharClassPtr cc, BitVector bi) { cc->nextState = bi; if (cc->left) { setState(cc->left,bi); } cc->nextState = bi; if (cc->right) { setState(cc->right,bi); } } CharClassPtr mergeTransition(StatePtr x,CharClassPtr y) { if (x->cc == NULL) { return y; } CharClassWalkerPtr walk = createCharClassWalker(x->cc); CharClassPtr ccy = y; BitVector bi; for (CharClassPtr cc = getNext(walk); hasNext(walk); cc=getNext(walk)) { unsigned long begin = cc->cond.range.begin; unsigned long end = cc->cond.range.end; bi = cc->nextState; ccy = charClassMerge(ccy,begin,end,bi); } free(walk); return ccy; } /** 作成する state を linked list bitvector を index とした配列に BitVectorPtr を格納 state に対応する NodePtr を */ StatePtr createState(TGValue tgv,NodePtr n) { StatePtr s = NEW(State); s->stateNum = n->stateNum = ++tgv.tg->stateMax; s->next = tgv.tg->stateList; tgv.tg->stateList = s; s->node = n; BitVector bi = createBitVector(n->stateNum); s->bitState = bi; s->cc = NULL; return s; } /** 正規表現に必要な状態を探して、それぞれに番号を割り振る 前が * でない + は新しく状態を作る * があったら、次の状態はその時の先頭の状態になる */ TGValue stateAllocate(NodePtr n,TGValue tgv) { if (n->tokenType == '+') { TGValue tgvLeft = stateAllocate(n->left,tgv); if (tgvLeft.asterisk) { TGValue tgvRight = tgvLeft; tgvRight.asterisk = false; tgvRight = stateAllocate(n->right,tgvRight); tgvRight.asterisk = true; return tgvRight; } TGValue tgvRight = tgvLeft; n->right->state = createState(tgvRight,n->right); tgvRight.startState = n->right->state; stateAllocate(n->right,tgvRight); return tgvLeft; } else if (n->tokenType == '|') { TGValue tgv1 = stateAllocate(n->left,tgv); TGValue tgv2 = stateAllocate(n->right,tgv1); return tgv2; } else if (n->tokenType == '*') { TGValue tgvAstah = tgv; tgvAstah.endState = tgvAstah.startState; tgvAstah = stateAllocate(n->left,tgvAstah); tgvAstah.asterisk = true; return tgvAstah; } else if (n->tokenType == 'c' || n->tokenType == 'a'){ TGValue tgv1 = tgv; tgv1.asterisk = false; n->stateNum = tgv.startState->stateNum; n->nextStateNum = tgv.endState->stateNum; n->state = tgv.startState; n->nextState = tgv.endState; return tgv1; } else { return tgv; } } /** 割り当てられた状態に沿って charclass の行き先を書き換える 書き換えた charclass を merge する 前の部分に * がない + は新しい状態をつくるので、state を切り替えて生成する */ TGValue generateTransition(NodePtr n,TGValue tgv) { if (n->tokenType == '+') { TGValue tgvLeft = generateTransition(n->left,tgv); if (tgvLeft.asterisk) { TGValue tgvRight = tgvLeft; tgvRight.asterisk = false; tgvRight = generateTransition(n->right,tgvRight); tgvRight.asterisk = true; return tgvRight; } StatePtr left = tgvLeft.startState; tgvLeft.startState = n->right->state; tgvLeft.tg->stateArray[tgvLeft.startState->bitState.bitContainer] = left; TGValue tgv1 = generateTransition(n->right,tgvLeft); tgv1.startState = left; return tgv1; } else if (n->tokenType == '|') { TGValue tgv1 = generateTransition(n->left,tgv); TGValue tgv2 = generateTransition(n->right,tgv1); return tgv2; } else if (n->tokenType == '*') { TGValue tgvAstah = generateTransition(n->left,tgv); tgvAstah.asterisk = true; return tgvAstah; } else if (n->tokenType == 'c' || n->tokenType == 'a'){ TGValue tgv1 = tgv; tgv1.asterisk = false; BitVector bi = createBitVector(n->nextStateNum); setState(n->cc,bi); tgv1.startState->cc = mergeTransition(tgv1.startState,n->cc); return tgv1; } else { return tgv; } } TransitionGeneratorPtr createTransitionGenerator() { TransitionGeneratorPtr tg = NEW(TransitionGenerator); tg->stateMax = -1; tg->stack = NULL; tg->stateArray = NULL; tg->stateList = NULL; return tg; } TransitionGeneratorPtr generateTransitionList(NodePtr n) { TransitionGeneratorPtr tg = createTransitionGenerator(); TGValue tgv; // initiarize tgv tgv.asterisk = false; tgv.tg = tg; StatePtr startState = tgv.startState = createState(tgv,n); NodePtr eof = createNode(NULL,'e',NULL,NULL,NULL); StatePtr endState = tgv.endState = createState(tgv,eof); tgv = stateAllocate(n,tgv); if (tg->stateMax > BITBLOCK) { errorMassege("StateMax > BITBLOCK",__LINE__,__FILE__); } BitVector bi = createBitVector(tg->stateMax); tgv.tg->stateArray = (StatePtr*)calloc(bi.bitContainer*2,sizeof(StatePtr*)); tgv.tg->stateArray[startState->bitState.bitContainer] = startState; tgv.tg->stateArray[endState->bitState.bitContainer] = endState; generateTransition(n,tgv); return tg; } void printState(StatePtr state) { printf("state : %lx\n",state->bitState.bitContainer); long nodeNumber = 0; if (state->node) { printf("node : %c %lx -> %d\n",state->node->tokenType,state->bitState.bitContainer,state->node->nextStateNum); if (state->node->state) nodeNumber = state->node->state->bitState.bitContainer; } if (state->cc) { printCharacterClass(state->cc,nodeNumber,4); } } void printState(TransitionGeneratorPtr tg) { StatePtr state = tg->stateList; for (;state;state = state->next) { printState(state); putchar('\n'); } }