changeset 192:ecf70fb215a5 pairPro

print charclass
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Fri, 25 Dec 2015 19:31:53 +0900
parents 02031fb73af8
children 093bbd028ee3 b9db09ff630f
files regexParser/error.cc regexParser/error.h regexParser/node.cc regexParser/regexParser.cc regexParser/regexParser.h regexParser/subsetConstraction.cc
diffstat 6 files changed, 50 insertions(+), 52 deletions(-) [+]
line wrap: on
line diff
--- a/regexParser/error.cc	Fri Dec 25 16:08:02 2015 +0900
+++ b/regexParser/error.cc	Fri Dec 25 19:31:53 2015 +0900
@@ -1,7 +1,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-void mallocFailedMessage() {
-    fprintf(stderr, "Failed to allocate memory.\n");
+void errorMassege(const char* ers,int lineNum,const char* filename) {
+    fprintf(stderr, "%s:%d: error: %s\n",filename,lineNum,ers);
     exit(-1);
 }
--- a/regexParser/error.h	Fri Dec 25 16:08:02 2015 +0900
+++ b/regexParser/error.h	Fri Dec 25 19:31:53 2015 +0900
@@ -1,1 +1,1 @@
-extern void mallocFailedMessage();
+extern void errorMassege(const char* ers,int lineNum,const char* filename);
--- a/regexParser/node.cc	Fri Dec 25 16:08:02 2015 +0900
+++ b/regexParser/node.cc	Fri Dec 25 19:31:53 2015 +0900
@@ -3,7 +3,8 @@
 #include "node.h"
 
 void printCharacterClass(CharClassPtr cc, long nodeNumber,int d) {
-    if (cc->type == 'r') {
+    if (!cc) return;
+    if (cc->type == 'a') {
         if (cc->left) {
             printCharacterClass(cc->left,nodeNumber,d+1);
         }
--- a/regexParser/regexParser.cc	Fri Dec 25 16:08:02 2015 +0900
+++ b/regexParser/regexParser.cc	Fri Dec 25 19:31:53 2015 +0900
@@ -20,6 +20,8 @@
     n->cc = NULL;
     n->left = NULL;
     n->right = NULL;
+    n->stateNum = 0;
+    n->nextStateNum = 0;
     return n;
 }
 
@@ -36,9 +38,12 @@
 CharClassPtr createCharClassWord(RegexInfoPtr ri) {
     CharClassPtr cc = NEW(CharClass);
     cc->type = 'a';
+    cc->left = NULL;
+    cc->right = NULL;
     cc->cond.w.word = ri->tokenValue;
     cc->cond.w.length = ri->ptr - ri->tokenValue;
     cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue;
+    cc->cond.range.next = NULL;
     return cc;
 }
 
@@ -145,7 +150,7 @@
     for (; r; r = r->next) {
         cc = insertCharClass(cc, r->begin, r->end);
     }
-
+    cc->cond.range.next = 0;
     // TODO literal support
     // merge rangeList here
     if (*ri->ptr) ri->ptr++;
@@ -239,12 +244,6 @@
     return n;
 }
 
-RegexInfoPtr createRegexInfo (RegexInfoPtr ri) {
-    ri->stateNumber++;
-    return ri;
-}
-
-
 // <regex> ::= <regexAtom> | <regexAtom>'*'<regex> | <regexAtom>'|'<regex> | <regexAtom><regexAtom>'*' | <regexAtom><regex>
 NodePtr regex(RegexInfoPtr ri) {
     token(ri);
--- a/regexParser/regexParser.h	Fri Dec 25 16:08:02 2015 +0900
+++ b/regexParser/regexParser.h	Fri Dec 25 19:31:53 2015 +0900
@@ -35,6 +35,7 @@
 struct node;
 
 typedef struct state {
+    int stateNum;
     BitVector bitState;
     CharClassPtr cc;
     struct node *node;
@@ -59,19 +60,14 @@
 typedef struct transitionGenerator {
     long stateMax;
     StateStackPtr stack;
-    StatePtr state;
-    StatePtr stateArray;
-    StatePtr currentState;
-    StatePtr startState;
-    StatePtr endState;
+    StatePtr *stateArray;
+    StatePtr stateList;
 } TransitionGenerator, *TransitionGeneratorPtr;
 
 typedef struct tgValue {
     bool asterisk;
-    int stateNum;
-    int stateBegin;
-    int stateEnd;
-    StatePtr tgState;
+    StatePtr startState;
+    StatePtr endState;
     TransitionGeneratorPtr tg;
 } TGValue, *TGValuePtr;
 
--- a/regexParser/subsetConstraction.cc	Fri Dec 25 16:08:02 2015 +0900
+++ b/regexParser/subsetConstraction.cc	Fri Dec 25 19:31:53 2015 +0900
@@ -6,17 +6,14 @@
 #include "subsetConstraction.h"
 #include "node.h"
 #include "BitVector.h"
-
-CharClassPtr createCharClassWord(unsigned char *w, CharClassPtr cc1, CharClassPtr cc2) {
-    CharClassPtr cc = NEW(CharClass);
-    return cc;
-}
+#include "error.h"
 
 CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right) {
     CharClassPtr cc = NEW(CharClass);
     cc->type = 'r';
     cc->cond.range.begin = begin;
     cc->cond.range.end = end;
+    cc->cond.range.next = NULL;
     cc->left = left;
     cc->right = right;
     cc->nextState.bitContainer = state;
@@ -214,8 +211,9 @@
  */
 StatePtr createState(TGValue tg,NodePtr n) {
     StatePtr s = NEW(State);
-    s->next = tg.tg->currentState;
-    tg.tg->currentState = s;
+    s->stateNum = n->stateNum = ++tg.tg->stateMax;
+    s->next = tg.tg->stateList;
+    tg.tg->stateList = s;
     s->node = n;
     BitVector bi = createBitVector(n->stateNum);
     s->bitState = bi;
@@ -239,8 +237,9 @@
             return tgRight;
         }
         TGValue tgRight = tgLeft;
-        tgRight.stateBegin = ++tgRight.stateNum;
         n->right->state = createState(tgRight,n->right);
+        tgRight.startState = n->right->state;
+        stateAllocate(n->right,tgRight);
         return tgLeft;
     } else if (n->tokenType == '|') {
         TGValue tgv  = stateAllocate(n->left,tg);
@@ -248,15 +247,16 @@
         return tgv1;
     } else if (n->tokenType == '*') {
         TGValue tgAstah = tg;
-        tgAstah.stateEnd = tgAstah.stateBegin;
+        tgAstah.endState = tgAstah.startState;
         tgAstah = stateAllocate(n->left,tgAstah);
         tgAstah.asterisk = true;
         return tgAstah;
     } else if (n->tokenType == 'c' || n->tokenType == 'a'){
         TGValue tgv = tg;
         tgv.asterisk = false;
-        n->stateNum = tg.stateBegin;
-        n->nextStateNum = tg.stateEnd;
+        n->stateNum = tg.startState->stateNum;
+        n->nextStateNum = tg.endState->stateNum;
+        n->state = tg.startState;
         return tgv;
     } else {
         return tg;
@@ -270,19 +270,19 @@
  */
 TGValue generateTransition(NodePtr n,TGValue tg) {
     if (n->tokenType == '+') {
-        if (tg.asterisk) {
-            TGValue tgRight = tg;
+        TGValue tgLeft = generateTransition(n->left,tg);
+        if (tgLeft.asterisk) {
+            TGValue tgRight = tgLeft;
             tgRight.asterisk = false;
             tgRight = generateTransition(n->right,tgRight);
             tgRight.asterisk = true;
             return tgRight;
         }
-        StatePtr left = tg.tgState;
-        tg.tgState = n->left->state;
-        // tg.tg->stateArray[tg.tgState->bitState.bitContainer] = tg.tgState;
-        TGValue tgLeft = generateTransition(n->left,tg);
-        tg.tgState = left;
+        StatePtr left = tgLeft.startState;
+        tgLeft.startState = n->right->state;
+        tgLeft.tg->stateArray[tgLeft.startState->bitState.bitContainer] = left;
         TGValue tgv1 = generateTransition(n->right,tgLeft);
+        tgv1.startState = left;
         return tgv1;
     } else if (n->tokenType == '|') {
         TGValue tgv  = generateTransition(n->left,tg);
@@ -297,7 +297,7 @@
         tgv.asterisk = false;
         BitVector bi = createBitVector(n->nextStateNum);
         setState(n->cc,bi);
-        tgv.tgState->cc = mergeTransition(tgv.tgState,n->cc);
+        tgv.startState->cc = mergeTransition(tgv.startState,n->cc);
         return tgv;
     } else {
         return tg;
@@ -306,11 +306,10 @@
 
 TransitionGeneratorPtr createTransitionGenerator() {
     TransitionGeneratorPtr tg = NEW(TransitionGenerator);
-    tg->stateMax = 0;
+    tg->stateMax = -1;
     tg->stack = NULL;
-    tg->state = NEW(State);
     tg->stateArray = NULL;
-    tg->currentState = NULL;
+    tg->stateList = NULL;
     return tg;
 }
 
@@ -320,16 +319,17 @@
     // initiarize tgv
     tgv.asterisk = false;
     tgv.tg = tg;
-    tgv.tg->startState = createState(tgv,n);
+    StatePtr startState = tgv.startState = createState(tgv,n);
     NodePtr eof = createNode(NULL,'e',NULL,NULL,NULL);
-    tgv.tg->endState = createState(tgv,eof);
-    tgv.stateBegin = 0;
-    tgv.stateEnd = 1;
-    tgv.tgState = NULL;
-    stateAllocate(n,tgv);
-    tgv.tg->stateMax = tg->stateMax;
+    StatePtr endState = tgv.endState = createState(tgv,eof);
+    tgv = stateAllocate(n,tgv);
+    if (tg->stateMax > BITBLOCK) {
+        errorMassege("StateMax > BITBLOCK",__LINE__,__FILE__);
+    }
     BitVector bi = createBitVector(tg->stateMax);
-    tgv.tg->stateArray = (StatePtr)calloc(bi.bitContainer*2,sizeof(StatePtr));
+    tgv.tg->stateArray = (StatePtr*)calloc(bi.bitContainer*2,sizeof(StatePtr*));
+    tgv.tg->stateArray[startState->bitState.bitContainer] = startState;
+    tgv.tg->stateArray[endState->bitState.bitContainer] = endState;
     generateTransition(n,tgv);
     return tg;
 }
@@ -341,11 +341,13 @@
         printf("node : %c %d -> %d\n",state->node->tokenType,state->node->stateNum,state->node->nextStateNum);
         nodeNumber = state->node->stateNum;
     }
-    printCharacterClass(state->cc,nodeNumber,4);
+    if (state->cc) {
+        printCharacterClass(state->cc,nodeNumber,4);
+    }
 }
 
 void printState(TransitionGeneratorPtr tg) {
-    StatePtr state = tg->currentState;
+    StatePtr state = tg->stateList;
     for (;state;state = state->next) {
         printState(state);
         putchar('\n');