view regexParser/subsetConstraction.cc @ 185:d25f4f3b4c34 pairPro

add comment
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Thu, 24 Dec 2015 20:27:49 +0900
parents 1da1b2eacb84
children 3e8aae8beba9
line wrap: on
line source

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "subsetConstraction.h"

CharClassPtr createCharClassWord(unsigned char *w, CharClassPtr cc1, CharClassPtr cc2) {
    CharClassPtr cc = NEW(CharClass);
    return cc;
}

CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right) {
    CharClassPtr cc = NEW(CharClass);
    cc->type = 'r';
    cc->cond.range.begin = begin;
    cc->cond.range.end = end;
    cc->left = left;
    cc->right = right;
    cc->nextState.bitContainer = state;
    return cc;
}

CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,BitVector nextState) {
    CharClassPtr cc1;
    if (cc) {
        cc1 = charClassMerge(cc,mBegin,mEnd,nextState);
    } else {
        cc1 = createCharClassRange(mBegin,mEnd,nextState.bitContainer,NULL,NULL);
    }
    return cc1;
}

CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, BitVector nextState) {
    // 重なっているccの領域を分割する
    // 必要ならばnextStateを重ねあわせる
    // 変更があった場合は新しくリストを作って返す
    if (end < cc->cond.range.begin ) { // 1
        if (cc->left) {
            return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,charClassMerge(cc->left,begin,end,nextState),cc->right);
        } else {
            return createCharClassRange(begin,end,nextState.bitContainer,NULL,cc);
        }
    } else if (end == cc->cond.range.begin && begin != end ) { // 2
        CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,nextState);
        if (cc->cond.range.begin == cc->cond.range.end) {
            return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,
                cc->nextState.bitContainer | nextState.bitContainer, cc1,cc->right);
        }
        CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right);
        return createCharClassRange(cc->cond.range.begin,cc->cond.range.begin,
            cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3);
    } else if (end < cc->cond.range.end) { // range.begin < end
        if (begin < cc->cond.range.begin) {  // 3
            CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState);
            CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,cc->left,cc->right);
            return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3);
        }
        if (begin == cc->cond.range.begin) {  // 6
            CharClassPtr cc2 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right);
            return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc2);
        }
        // 9
        CharClassPtr cc2 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL);
        CharClassPtr cc3 = createCharClassRange(end+1,cc->cond.range.end,cc->nextState.bitContainer,NULL,cc->right);
        return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc2,cc3);
    } else if (end == cc->cond.range.end) {
        if (begin == cc->cond.range.begin) { // 7
            return createCharClassRange(begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc->right);
        } else if (begin < cc->cond.range.begin) { // 4
            CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState);
            return createCharClassRange(cc->cond.range.begin,end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc->right);
        }
        // 10 cond.range.begin < begin
        CharClassPtr cc2 = createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,NULL,cc->right);
        return createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,cc2);
    }
    if (begin > cc->cond.range.end ) { // 13
        if (cc->right) {
            return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->left,charClassMerge(cc->right,begin,end,nextState));
        } else {
            return createCharClassRange(begin,end,nextState.bitContainer,cc,NULL);
        }
    }
    if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) {
        if (end > cc->cond.range.end) { // cond.range.end < end
            if (begin == cc->cond.range.begin) {    // 8
                CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState);
                return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,
                    cc->nextState.bitContainer | nextState.bitContainer,cc->left,cc1);
            }
            if (begin > cc->cond.range.begin) {  // 11
                CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState);
                CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,begin-1,cc->nextState.bitContainer,cc->left,NULL);
                return createCharClassRange(begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc3,cc1);
            }
        }
        // begin != end && end != cc->cond.range.end
        if (begin == cc->cond.range.end) { // 12
            CharClassPtr cc1 = mergeCCTree(cc->right,begin+1,end,nextState);
            if (cc->cond.range.begin == cc->cond.range.end) {
                return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc->right);
            }
            CharClassPtr cc3 = createCharClassRange(cc->cond.range.begin,cc->cond.range.end-1,cc->nextState.bitContainer,cc->left,NULL);
            return createCharClassRange(cc->cond.range.end,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3);
        }
    } else if (begin < cc->cond.range.begin) { // 5
        CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,nextState);
        CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,nextState);
        return createCharClassRange(cc->cond.range.begin,cc->cond.range.end,cc->nextState.bitContainer | nextState.bitContainer,cc1,cc3);
    } else {
        printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end);
    }
    return cc;
}

void findLeftMost(CharClassPtr next,CharClassWalkerPtr walk) {
    while (next->left) {
        CharClassStackPtr ccs = NEW(CharClassStack);
        ccs->next = walk->stack;
        ccs->turn = LEFT;
        ccs->cc = next;
        walk->stack = ccs;
        next = next->left;
    }
    walk->next = next;
}

CharClassWalkerPtr createCharClassWalker (CharClassPtr next) {
    CharClassWalkerPtr walk = NEW(CharClassWalker);
    walk->next = NULL;
    walk->stack = NULL;
    if (!next) return walk;
    if (!next->left) {
        walk->next = next;
        return walk;
    }
    findLeftMost(next,walk);
    return walk;
}

bool hasNext(CharClassWalkerPtr walk) {
    return walk->next != NULL;
}

CharClassStackPtr charClassStackPop(CharClassWalkerPtr walk) {
    if (!walk->stack) {
        return NULL;
    }
    CharClassStackPtr prev = walk->stack->next;
    free(walk->stack);
    walk->stack = prev;
    return prev;
}

CharClassPtr getNext(CharClassWalkerPtr walk) {
    CharClassPtr current = walk->next;
    walk->next = NULL;
    while (walk->stack) {
        while (walk->stack->turn == RIGHT) {
            if (charClassStackPop(walk) == NULL) {
                return current;
            }
        }
        if (walk->stack->turn == LEFT) {
            walk->next = walk->stack->cc;
            walk->stack->turn = SELF;
            return current;
        }
        if (current->right) {
            walk->stack->turn = RIGHT;
            findLeftMost(current->right,walk);
            return current;
        }
        charClassStackPop(walk);
    }
    return current;
}

void setState(CharClassPtr cc, BitVector bi) {
    setState(cc,bi);
    if (cc->left) {
        setState(cc->left,bi);
    }
    cc->nextState = bi;
    if (cc->right) {
        setState(cc->right,bi);
    }
}

CharClassPtr mergeTransition(StatePtr x,CharClassPtr y) {
    if (x->cc == NULL) {
        return y;
    }
    CharClassWalkerPtr walk = createCharClassWalker(x->cc);
    CharClassPtr ccy = y;
    BitVector bi;
    for (CharClassPtr cc = getNext(walk); hasNext(walk); cc=getNext(walk)) {
        unsigned long begin = cc->cond.range.begin;
        unsigned long end = cc->cond.range.end;
        bi = cc->nextState;
        ccy = charClassMerge(ccy,begin,end,bi);
    }
    free(walk);
    return ccy;
}

/**
 作成する state を linked list
bitvector を index とした配列に BitVectorPtr を格納
state に対応する NodePtr を
 */
TGValue createState(TGValue tg,NodePtr n) {
    StatePtr s = NEW(State);
    s->next = tg.tg->currentState;
    tg.tg->currentState = s;
    s->node = n;
    BitVector bi = createBitVector(tg.stateBegin);
    s->bitState = bi;
    s->cc = NULL;
}

/**
正規表現に必要な状態を探して、それぞれに番号を割り振る
前が * でない + は新しく状態を作る
* があったら、次の状態はその時の先頭の状態になる
 */
TGValue stateAllocate(NodePtr n,TGValue tg) {
    if (n->tokenType == '+') {
        TGValue tgLeft = stateAllocate(n->left,tg);
        if (tgLeft.asterisk) {
            TGValue tgRight = tgLeft;
            tgRight.asterisk = false;
            tgRight = stateAllocate(n->right,tgRight);
            tgRight.asterisk = true;
            return tgRight;
        }
        TGValue tgRight = tgLeft;
        tgRight.stateBegin = ++tgRight.stateNum;
        n->right->state = createState(tgRight,n->right);
        TGValue tgv1 = stateAllocate(n->right,tgLeft);
        return tgLeft;
    } else if (n->tokenType == '|') {
        TGValue tgv  = stateAllocate(n->left,tg);
        TGValue tgv1 = stateAllocate(n->right,tgv);
        return tgv1;
    } else if (n->tokenType == '*') {
        TGValue tgAstah = tg;
        tgAstah.stateEnd = tgAstah.stateBegin;
        tgAstah = stateAllocate(n->left,tgAstah);
        tgAstah.asterisk = true;
        return tgAstah;
    } else if (n->tokenType == 'c' || n->tokenType == 'a'){
        TGValue tgv = tg;
        tgv.asterisk = false;
        n->stateNum = tg.stateBegin;
        n->nextStateNum = tg.stateEnd;
        return tgv;
    } else {
        return tg;
    }
}

/**
割り当てられた状態に沿って charclass の行き先を書き換える
書き換えた charclass を merge する
前の部分に * がない + は新しい状態をつくるので、state を切り替えて生成する
 */
TGValue generateTransition(NodePtr n,TGValue tg) {
    if (n->tokenType == '+') {
        if (tg.asterisk) {
            TGValue tgRight = tg;
            tgRight.asterisk = false;
            tgRight = generateTransition(n->right,tgRight);
            tgRight.asterisk = true;
            return tgRight;
        }
        StatePtr left = tg.state;
        tg.state = n->left->state;
        tg.tg.stateArray[tg.state->bitState.bitContainer] = tg.state;
        TGValue tgLeft = generateTransition(n->left,tg);
        tg.state = left;
        TGValue tgv1 = generateTransition(n->right,tgLeft);
        return tgv1;
    } else if (n->tokenType == '|') {
        TGValue tgv  = generateTransition(n->left,tg);
        TGValue tgv1 = generateTransition(n->right,tgv);
        return tgv1;
    } else if (n->tokenType == '*') {
        tgAstah = generateTransition(n->left,tgAstah);
        tgAstah.asterisk = true;
        return tgAstah;
    } else if (n->tokenType == 'c' || n->tokenType == 'a'){
        TGValue tgv = tg;
        tgv.asterisk = false;
        BitVector bi = createBitVector(n->nextStateNum);
        setState(n->cc,bi);
        tgv.state->cc = mergeTransition(tgv.state,n->cc);
        return tgv;
    } else {
        return tg;
    }
}

void printTransitionList(TransitionPtr ts) {
    for (;ts;ts = ts->next) {
        printf("\n");
    }
}

TransitionGenerator createTransitionGenerator() {
    TransitionGenerator tg;
    tg.ts = NEW(Transition);
    tg.state = NEW(State);
    tg.transitionList = NEW(Transition);
    // Init State : 00...00(64bit)
    BitVectorPtr initStateBi = NEW(BitVector);
    bitSet(initStateBi,INIT_STATE_BIT);
    StatePtr initState = createState(*initStateBi);
    // Last State : 10...00(64bit)
    BitVectorPtr lastStateBi = NEW(BitVector);
    bitSet(lastStateBi,END_STATE_BIT);
    StatePtr lastState = createState(*lastStateBi);
    tg.stateArray = appendState(initState,lastState);
    tg.stateArrayLast = lastState;
    tg.currentState = initState;
    tg.nextState = NEW(State);
    return tg;
}

TransitionGenerator generateTransitionList(NodePtr n) {
    TransitionGenerator tg = createTransitionGenerator();
    TGValue tgv;
    tgv.asterisk = false;
    tgv.tg = tg;
    StatePtr start = createState(tgv,n);
    NodePtr eof = createNode(NULL,'e',NULL,NULL,NULL);
    StatePtr end = createState(tgv,eof);
    tgv.stateBegin = 0;
    tgv.stateEnd = 1;
    stateAllocate(n,tgv);
    tgv.tg.stateArray = (StatePtr)calloc(tg.stateNum,sizeof(StatePtr));
    generateTransition(n,tgv);
    printTransitionList(tg.ts);
    return tg;
}