Mercurial > hg > Applications > Grep
view regexParser/subsetConstraction.cc @ 185:d25f4f3b4c34 pairPro
add comment
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 24 Dec 2015 20:27:49 +0900 |
parents | 1da1b2eacb84 |
children | 3e8aae8beba9 |
line wrap: on
line source
#include <stdio.h> #include <stdlib.h> #include <ctype.h> #include "subsetConstraction.h" CharClassPtr createCharClassWord(unsigned char *w, CharClassPtr cc1, CharClassPtr cc2) { CharClassPtr cc = NEW(CharClass); return cc; } CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right) { CharClassPtr cc = NEW(CharClass); cc->type = 'r'; cc->cond.range.begin = begin; cc->cond.range.end = end; cc->left = left; cc->right = right; cc->nextState.bitContainer = state; return cc; } CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,BitVector nextState) { CharClassPtr cc1; if (cc) { cc1 = charClassMerge(cc,mBegin,mEnd,nextState); } else { cc1 = createCharClassRange(mBegin,mEnd,nextState.bitContainer,NULL,NULL); } return cc1; } CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) { // 重なっているccの領域を分割する // 必要ならばnextStateを重ねあわせる // 変更があった場合は新しくリストを作って返す if (end < cc->cond.range.begin ) { // 1 if (cc->left) { return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,charClassMerge(cc->left,begin,end,nextState),cc->right); } else { return createCharClassRange(begin,end,nextState.bitContainer,NULL,cc); } } else if (end == cc->cond.range.begin && begin != end ) { // 2 CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,nextState); if (cc->cond.range.begin == cc->cond.range.end) { return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, cc->nextState.bitContainer | nextState.bitContainer, cc1,cc->right); } CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); return createCharClassRange(cc->cond.range.begin,cc->cond.range.begin, cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); } else if (end < cc->cond.range.end) { // range.begin < end if (begin < cc->cond.range.begin) { // 3 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right); return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); } if (begin == cc->cond.range.begin) { // 6 CharClassPtr cc2 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc2); } // 9 CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right); return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc2,cc3); } else if (end == cc->cond.range.end) { if (begin == cc->cond.range.begin) { // 7 return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc->right); } else if (begin < cc->cond.range.begin) { // 4 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc->right); } // 10 cond.range.begin < begin CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,NULL,cc->right); return createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,cc2); } if (begin > cc->cond.range.end ) { // 13 if (cc->right) { return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->left,charClassMerge(cc->right,begin,end,nextState)); } else { return createCharClassRange(begin,end,nextState.bitContainer,cc,NULL); } } if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { if (end > cc->cond.range.end) { // cond.range.end < end if (begin == cc->cond.range.begin) { // 8 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); return createCharClassRange(cc->cond.range.begin,cc->cond.range.end, cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc1); } if (begin > cc->cond.range.begin) { // 11 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL); return createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc3,cc1); } } // begin != end && end != cc->cond.range.end if (begin == cc->cond.range.end) { // 12 CharClassPtr cc1 = mergeCCTree(cc->right,begin+1,end,nextState); if (cc->cond.range.begin == cc->cond.range.end) { return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc->right); } CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,cc->cond.range.end-1,cc->nextState.bitContainer,cc->left,NULL); return createCharClassRange(cc->cond.range.end,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); } } else if (begin < cc->cond.range.begin) { // 5 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState); CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState); return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3); } else { printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); } return cc; } void findLeftMost(CharClassPtr next,CharClassWalkerPtr walk) { while (next->left) { CharClassStackPtr ccs = NEW(CharClassStack); ccs->next = walk->stack; ccs->turn = LEFT; ccs->cc = next; walk->stack = ccs; next = next->left; } walk->next = next; } CharClassWalkerPtr createCharClassWalker (CharClassPtr next) { CharClassWalkerPtr walk = NEW(CharClassWalker); walk->next = NULL; walk->stack = NULL; if (!next) return walk; if (!next->left) { walk->next = next; return walk; } findLeftMost(next,walk); return walk; } bool hasNext(CharClassWalkerPtr walk) { return walk->next != NULL; } CharClassStackPtr charClassStackPop(CharClassWalkerPtr walk) { if (!walk->stack) { return NULL; } CharClassStackPtr prev = walk->stack->next; free(walk->stack); walk->stack = prev; return prev; } CharClassPtr getNext(CharClassWalkerPtr walk) { CharClassPtr current = walk->next; walk->next = NULL; while (walk->stack) { while (walk->stack->turn == RIGHT) { if (charClassStackPop(walk) == NULL) { return current; } } if (walk->stack->turn == LEFT) { walk->next = walk->stack->cc; walk->stack->turn = SELF; return current; } if (current->right) { walk->stack->turn = RIGHT; findLeftMost(current->right,walk); return current; } charClassStackPop(walk); } return current; } void setState(CharClassPtr cc, BitVector bi) { setState(cc,bi); if (cc->left) { setState(cc->left,bi); } cc->nextState = bi; if (cc->right) { setState(cc->right,bi); } } CharClassPtr mergeTransition(StatePtr x,CharClassPtr y) { if (x->cc == NULL) { return y; } CharClassWalkerPtr walk = createCharClassWalker(x->cc); CharClassPtr ccy = y; BitVector bi; for (CharClassPtr cc = getNext(walk); hasNext(walk); cc=getNext(walk)) { unsigned long begin = cc->cond.range.begin; unsigned long end = cc->cond.range.end; bi = cc->nextState; ccy = charClassMerge(ccy,begin,end,bi); } free(walk); return ccy; } /** 作成する state を linked list bitvector を index とした配列に BitVectorPtr を格納 state に対応する NodePtr を */ TGValue createState(TGValue tg,NodePtr n) { StatePtr s = NEW(State); s->next = tg.tg->currentState; tg.tg->currentState = s; s->node = n; BitVector bi = createBitVector(tg.stateBegin); s->bitState = bi; s->cc = NULL; } /** 正規表現に必要な状態を探して、それぞれに番号を割り振る 前が * でない + は新しく状態を作る * があったら、次の状態はその時の先頭の状態になる */ TGValue stateAllocate(NodePtr n,TGValue tg) { if (n->tokenType == '+') { TGValue tgLeft = stateAllocate(n->left,tg); if (tgLeft.asterisk) { TGValue tgRight = tgLeft; tgRight.asterisk = false; tgRight = stateAllocate(n->right,tgRight); tgRight.asterisk = true; return tgRight; } TGValue tgRight = tgLeft; tgRight.stateBegin = ++tgRight.stateNum; n->right->state = createState(tgRight,n->right); TGValue tgv1 = stateAllocate(n->right,tgLeft); return tgLeft; } else if (n->tokenType == '|') { TGValue tgv = stateAllocate(n->left,tg); TGValue tgv1 = stateAllocate(n->right,tgv); return tgv1; } else if (n->tokenType == '*') { TGValue tgAstah = tg; tgAstah.stateEnd = tgAstah.stateBegin; tgAstah = stateAllocate(n->left,tgAstah); tgAstah.asterisk = true; return tgAstah; } else if (n->tokenType == 'c' || n->tokenType == 'a'){ TGValue tgv = tg; tgv.asterisk = false; n->stateNum = tg.stateBegin; n->nextStateNum = tg.stateEnd; return tgv; } else { return tg; } } /** 割り当てられた状態に沿って charclass の行き先を書き換える 書き換えた charclass を merge する 前の部分に * がない + は新しい状態をつくるので、state を切り替えて生成する */ TGValue generateTransition(NodePtr n,TGValue tg) { if (n->tokenType == '+') { if (tg.asterisk) { TGValue tgRight = tg; tgRight.asterisk = false; tgRight = generateTransition(n->right,tgRight); tgRight.asterisk = true; return tgRight; } StatePtr left = tg.state; tg.state = n->left->state; tg.tg.stateArray[tg.state->bitState.bitContainer] = tg.state; TGValue tgLeft = generateTransition(n->left,tg); tg.state = left; TGValue tgv1 = generateTransition(n->right,tgLeft); return tgv1; } else if (n->tokenType == '|') { TGValue tgv = generateTransition(n->left,tg); TGValue tgv1 = generateTransition(n->right,tgv); return tgv1; } else if (n->tokenType == '*') { tgAstah = generateTransition(n->left,tgAstah); tgAstah.asterisk = true; return tgAstah; } else if (n->tokenType == 'c' || n->tokenType == 'a'){ TGValue tgv = tg; tgv.asterisk = false; BitVector bi = createBitVector(n->nextStateNum); setState(n->cc,bi); tgv.state->cc = mergeTransition(tgv.state,n->cc); return tgv; } else { return tg; } } void printTransitionList(TransitionPtr ts) { for (;ts;ts = ts->next) { printf("\n"); } } TransitionGenerator createTransitionGenerator() { TransitionGenerator tg; tg.ts = NEW(Transition); tg.state = NEW(State); tg.transitionList = NEW(Transition); // Init State : 00...00(64bit) BitVectorPtr initStateBi = NEW(BitVector); bitSet(initStateBi,INIT_STATE_BIT); StatePtr initState = createState(*initStateBi); // Last State : 10...00(64bit) BitVectorPtr lastStateBi = NEW(BitVector); bitSet(lastStateBi,END_STATE_BIT); StatePtr lastState = createState(*lastStateBi); tg.stateArray = appendState(initState,lastState); tg.stateArrayLast = lastState; tg.currentState = initState; tg.nextState = NEW(State); return tg; } TransitionGenerator generateTransitionList(NodePtr n) { TransitionGenerator tg = createTransitionGenerator(); TGValue tgv; tgv.asterisk = false; tgv.tg = tg; StatePtr start = createState(tgv,n); NodePtr eof = createNode(NULL,'e',NULL,NULL,NULL); StatePtr end = createState(tgv,eof); tgv.stateBegin = 0; tgv.stateEnd = 1; stateAllocate(n,tgv); tgv.tg.stateArray = (StatePtr)calloc(tg.stateNum,sizeof(StatePtr)); generateTransition(n,tgv); printTransitionList(tg.ts); return tg; }