Mercurial > hg > Members > masakoha > testcode
changeset 192:ecf70fb215a5 pairPro
print charclass
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 25 Dec 2015 19:31:53 +0900 |
parents | 02031fb73af8 |
children | 093bbd028ee3 b9db09ff630f |
files | regexParser/error.cc regexParser/error.h regexParser/node.cc regexParser/regexParser.cc regexParser/regexParser.h regexParser/subsetConstraction.cc |
diffstat | 6 files changed, 50 insertions(+), 52 deletions(-) [+] |
line wrap: on
line diff
--- a/regexParser/error.cc Fri Dec 25 16:08:02 2015 +0900 +++ b/regexParser/error.cc Fri Dec 25 19:31:53 2015 +0900 @@ -1,7 +1,7 @@ #include <stdio.h> #include <stdlib.h> -void mallocFailedMessage() { - fprintf(stderr, "Failed to allocate memory.\n"); +void errorMassege(const char* ers,int lineNum,const char* filename) { + fprintf(stderr, "%s:%d: error: %s\n",filename,lineNum,ers); exit(-1); }
--- a/regexParser/error.h Fri Dec 25 16:08:02 2015 +0900 +++ b/regexParser/error.h Fri Dec 25 19:31:53 2015 +0900 @@ -1,1 +1,1 @@ -extern void mallocFailedMessage(); +extern void errorMassege(const char* ers,int lineNum,const char* filename);
--- a/regexParser/node.cc Fri Dec 25 16:08:02 2015 +0900 +++ b/regexParser/node.cc Fri Dec 25 19:31:53 2015 +0900 @@ -3,7 +3,8 @@ #include "node.h" void printCharacterClass(CharClassPtr cc, long nodeNumber,int d) { - if (cc->type == 'r') { + if (!cc) return; + if (cc->type == 'a') { if (cc->left) { printCharacterClass(cc->left,nodeNumber,d+1); }
--- a/regexParser/regexParser.cc Fri Dec 25 16:08:02 2015 +0900 +++ b/regexParser/regexParser.cc Fri Dec 25 19:31:53 2015 +0900 @@ -20,6 +20,8 @@ n->cc = NULL; n->left = NULL; n->right = NULL; + n->stateNum = 0; + n->nextStateNum = 0; return n; } @@ -36,9 +38,12 @@ CharClassPtr createCharClassWord(RegexInfoPtr ri) { CharClassPtr cc = NEW(CharClass); cc->type = 'a'; + cc->left = NULL; + cc->right = NULL; cc->cond.w.word = ri->tokenValue; cc->cond.w.length = ri->ptr - ri->tokenValue; cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue; + cc->cond.range.next = NULL; return cc; } @@ -145,7 +150,7 @@ for (; r; r = r->next) { cc = insertCharClass(cc, r->begin, r->end); } - + cc->cond.range.next = 0; // TODO literal support // merge rangeList here if (*ri->ptr) ri->ptr++; @@ -239,12 +244,6 @@ return n; } -RegexInfoPtr createRegexInfo (RegexInfoPtr ri) { - ri->stateNumber++; - return ri; -} - - // <regex> ::= <regexAtom> | <regexAtom>'*'<regex> | <regexAtom>'|'<regex> | <regexAtom><regexAtom>'*' | <regexAtom><regex> NodePtr regex(RegexInfoPtr ri) { token(ri);
--- a/regexParser/regexParser.h Fri Dec 25 16:08:02 2015 +0900 +++ b/regexParser/regexParser.h Fri Dec 25 19:31:53 2015 +0900 @@ -35,6 +35,7 @@ struct node; typedef struct state { + int stateNum; BitVector bitState; CharClassPtr cc; struct node *node; @@ -59,19 +60,14 @@ typedef struct transitionGenerator { long stateMax; StateStackPtr stack; - StatePtr state; - StatePtr stateArray; - StatePtr currentState; - StatePtr startState; - StatePtr endState; + StatePtr *stateArray; + StatePtr stateList; } TransitionGenerator, *TransitionGeneratorPtr; typedef struct tgValue { bool asterisk; - int stateNum; - int stateBegin; - int stateEnd; - StatePtr tgState; + StatePtr startState; + StatePtr endState; TransitionGeneratorPtr tg; } TGValue, *TGValuePtr;
--- a/regexParser/subsetConstraction.cc Fri Dec 25 16:08:02 2015 +0900 +++ b/regexParser/subsetConstraction.cc Fri Dec 25 19:31:53 2015 +0900 @@ -6,17 +6,14 @@ #include "subsetConstraction.h" #include "node.h" #include "BitVector.h" - -CharClassPtr createCharClassWord(unsigned char *w, CharClassPtr cc1, CharClassPtr cc2) { - CharClassPtr cc = NEW(CharClass); - return cc; -} +#include "error.h" CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right) { CharClassPtr cc = NEW(CharClass); cc->type = 'r'; cc->cond.range.begin = begin; cc->cond.range.end = end; + cc->cond.range.next = NULL; cc->left = left; cc->right = right; cc->nextState.bitContainer = state; @@ -214,8 +211,9 @@ */ StatePtr createState(TGValue tg,NodePtr n) { StatePtr s = NEW(State); - s->next = tg.tg->currentState; - tg.tg->currentState = s; + s->stateNum = n->stateNum = ++tg.tg->stateMax; + s->next = tg.tg->stateList; + tg.tg->stateList = s; s->node = n; BitVector bi = createBitVector(n->stateNum); s->bitState = bi; @@ -239,8 +237,9 @@ return tgRight; } TGValue tgRight = tgLeft; - tgRight.stateBegin = ++tgRight.stateNum; n->right->state = createState(tgRight,n->right); + tgRight.startState = n->right->state; + stateAllocate(n->right,tgRight); return tgLeft; } else if (n->tokenType == '|') { TGValue tgv = stateAllocate(n->left,tg); @@ -248,15 +247,16 @@ return tgv1; } else if (n->tokenType == '*') { TGValue tgAstah = tg; - tgAstah.stateEnd = tgAstah.stateBegin; + tgAstah.endState = tgAstah.startState; tgAstah = stateAllocate(n->left,tgAstah); tgAstah.asterisk = true; return tgAstah; } else if (n->tokenType == 'c' || n->tokenType == 'a'){ TGValue tgv = tg; tgv.asterisk = false; - n->stateNum = tg.stateBegin; - n->nextStateNum = tg.stateEnd; + n->stateNum = tg.startState->stateNum; + n->nextStateNum = tg.endState->stateNum; + n->state = tg.startState; return tgv; } else { return tg; @@ -270,19 +270,19 @@ */ TGValue generateTransition(NodePtr n,TGValue tg) { if (n->tokenType == '+') { - if (tg.asterisk) { - TGValue tgRight = tg; + TGValue tgLeft = generateTransition(n->left,tg); + if (tgLeft.asterisk) { + TGValue tgRight = tgLeft; tgRight.asterisk = false; tgRight = generateTransition(n->right,tgRight); tgRight.asterisk = true; return tgRight; } - StatePtr left = tg.tgState; - tg.tgState = n->left->state; - // tg.tg->stateArray[tg.tgState->bitState.bitContainer] = tg.tgState; - TGValue tgLeft = generateTransition(n->left,tg); - tg.tgState = left; + StatePtr left = tgLeft.startState; + tgLeft.startState = n->right->state; + tgLeft.tg->stateArray[tgLeft.startState->bitState.bitContainer] = left; TGValue tgv1 = generateTransition(n->right,tgLeft); + tgv1.startState = left; return tgv1; } else if (n->tokenType == '|') { TGValue tgv = generateTransition(n->left,tg); @@ -297,7 +297,7 @@ tgv.asterisk = false; BitVector bi = createBitVector(n->nextStateNum); setState(n->cc,bi); - tgv.tgState->cc = mergeTransition(tgv.tgState,n->cc); + tgv.startState->cc = mergeTransition(tgv.startState,n->cc); return tgv; } else { return tg; @@ -306,11 +306,10 @@ TransitionGeneratorPtr createTransitionGenerator() { TransitionGeneratorPtr tg = NEW(TransitionGenerator); - tg->stateMax = 0; + tg->stateMax = -1; tg->stack = NULL; - tg->state = NEW(State); tg->stateArray = NULL; - tg->currentState = NULL; + tg->stateList = NULL; return tg; } @@ -320,16 +319,17 @@ // initiarize tgv tgv.asterisk = false; tgv.tg = tg; - tgv.tg->startState = createState(tgv,n); + StatePtr startState = tgv.startState = createState(tgv,n); NodePtr eof = createNode(NULL,'e',NULL,NULL,NULL); - tgv.tg->endState = createState(tgv,eof); - tgv.stateBegin = 0; - tgv.stateEnd = 1; - tgv.tgState = NULL; - stateAllocate(n,tgv); - tgv.tg->stateMax = tg->stateMax; + StatePtr endState = tgv.endState = createState(tgv,eof); + tgv = stateAllocate(n,tgv); + if (tg->stateMax > BITBLOCK) { + errorMassege("StateMax > BITBLOCK",__LINE__,__FILE__); + } BitVector bi = createBitVector(tg->stateMax); - tgv.tg->stateArray = (StatePtr)calloc(bi.bitContainer*2,sizeof(StatePtr)); + tgv.tg->stateArray = (StatePtr*)calloc(bi.bitContainer*2,sizeof(StatePtr*)); + tgv.tg->stateArray[startState->bitState.bitContainer] = startState; + tgv.tg->stateArray[endState->bitState.bitContainer] = endState; generateTransition(n,tgv); return tg; } @@ -341,11 +341,13 @@ printf("node : %c %d -> %d\n",state->node->tokenType,state->node->stateNum,state->node->nextStateNum); nodeNumber = state->node->stateNum; } - printCharacterClass(state->cc,nodeNumber,4); + if (state->cc) { + printCharacterClass(state->cc,nodeNumber,4); + } } void printState(TransitionGeneratorPtr tg) { - StatePtr state = tg->currentState; + StatePtr state = tg->stateList; for (;state;state = state->next) { printState(state); putchar('\n');