changeset 299:bdfe0a32c48f

grepWalk
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Wed, 03 Feb 2016 13:11:10 +0900
parents 63213964502a
children 3e78631a6222
files regexParser/CeriumGrep.cc regexParser/Makefile regexParser/grepWalk.cc regexParser/grepWalk.h regexParser/threadedSearch.cc
diffstat 5 files changed, 58 insertions(+), 46 deletions(-) [+]
line wrap: on
line diff
--- a/regexParser/CeriumGrep.cc	Wed Feb 03 12:24:34 2016 +0900
+++ b/regexParser/CeriumGrep.cc	Wed Feb 03 13:11:10 2016 +0900
@@ -65,7 +65,7 @@
         st_mmap_t st_mmap = createSt_mmap(filename,fd);
         Buffer buff = createBuffer(st_mmap);
         if (ts) threadedSearch(tgv.tg,buff);
-        else grepWalk(tgv.tg,buff.buffptr,buff);
+        else grepWalk(tgv.tg,buff);
         close(fd);
     }
 
--- a/regexParser/Makefile	Wed Feb 03 12:24:34 2016 +0900
+++ b/regexParser/Makefile	Wed Feb 03 13:11:10 2016 +0900
@@ -71,7 +71,7 @@
 	./sequentialSearch -file $(TESTFILE) $(WC)
 
 grepWalk: regexParser $(TESTFILE)
-	./regexParser -$(SUBSET)  -regex $(REGEX)-file $(TESTFILE) $(WC)
+	./regexParser $(SUBSET)  -regex $(REGEX) -file $(TESTFILE) $(WC)
 
 sequentialSearch: sequentialSearch.cc regexParser fileread.o
 	./regexParser -seq -subset -regex $(REGEX)
--- a/regexParser/grepWalk.cc	Wed Feb 03 12:24:34 2016 +0900
+++ b/regexParser/grepWalk.cc	Wed Feb 03 13:11:10 2016 +0900
@@ -1,54 +1,65 @@
 #include <stdio.h>
+#include <stdlib.h>
 
 #include "grepWalk.h"
 #include "subsetConstruction.h"
-
-void grep(TransitionGeneratorPtr tg,unsigned char *matchBegin,Buffer buff,unsigned long d) ;
-
-void grepSkip(TransitionGeneratorPtr tg,unsigned char *matchBegin, Buffer buff) {
-    matchBegin = buff.buffptr;
-    grep(tg,matchBegin,buff,1); // 1 is initState
-}
+#include "threadedSearch.h"
 
-void grepWalk(TransitionGeneratorPtr tg, unsigned char *matchBegin, Buffer buff) {
-    grepSkip(tg,matchBegin,buff);
-}
-
-void grepMatch(TransitionGeneratorPtr tg,unsigned char *matchBegin, Buffer buff) {
-    fwrite(matchBegin,buff.buffptr-matchBegin,1,stdout);
-    puts("\n");
-    grepSkip(tg,matchBegin,buff);
+StatePtr nextState(BitVector bi,TransitionGeneratorPtr tg) {
+    // create tSearch in next state.
+    StatePtr state = tg->stateArray[bi.bitContainer];
+    if (state == NULL) {
+        // on the fly subset construction.
+        state = createState(tg,bi);
+        determinize(state,tg);
+        tg->stateArray[bi.bitContainer] = state;
+    }
+    return state;
 }
 
-void grep(TransitionGeneratorPtr tg,unsigned char *matchBegin,Buffer buff,unsigned long d) {
-    unsigned char c = *buff.buffptr++;
-    if (c=='\0') return;
-    StatePtr state = tg->stateList;
+void grepWalk(TransitionGeneratorPtr tg,Buffer buff) {
+    TSValue tsv = createTSValue(tg,buff);
+    tg->stateStart = NEW(State);
+    *tg->stateStart = *tg->stateList;
+    tg->stateStart->accept = false; // Start state never accept
+    StatePtr state = tg->stateStart;
 
-    while (state->bitState.bitContainer != d) state = state->next; // 配列へのアクセスへ変更
-    CharClassWalkerPtr ccw = createCharClassWalker(state->cc);
-    CharClassPtr cc = NULL;
-    bool found = false;
-    while (hasNext(ccw)) {
-        cc = getNext(ccw);
-        unsigned long begin = cc->cond.range.begin;
-        unsigned long end = cc->cond.range.end;
-        if (begin == end) {
-            if (c == begin) found = true;
-            else found = false;
-        } else {
-            if (c < begin) found = false;
-            else if (c < end) found = true;
+#if DEBUG
+    TSValuePtr tsvp = &tsv;   // make tsv visible in lldb
+#endif
+    next: while (tsv.buff.buffptr < tsv.buff.buffend) {
+        if (state->accept) {
+            tsv = tg->stateMatch(tsv);
+        }
+        CharClassWalkerPtr ccw = createCharClassWalker(state->cc);
+        if (!hasNext(ccw)) {
+            // matched start again
+            state = tg->stateStart;
+            ccw = createCharClassWalker(state->cc);
         }
-        if (found == true) break;
+        unsigned char c = *tsv.buff.buffptr++;
+//        printState(tsv.current->state);
+        while (hasNext(ccw)) {
+            CharClassPtr cc = getNext(ccw);
+            if (c<cc->cond.range.begin) {
+                state = tg->stateStart;
+                tsv = tg->stateSkip(tsv);
+                goto next;
+            } else if (c<=cc->cond.range.end) {
+                // range matched.
+                if (cc->cond.w.word) {
+                    // match the word.
+                    // if (not match) continue;
+                }
+                state = nextState(cc->nextState,tg);
+                goto next;
+            }
+        }
+        state = tg->stateStart;
+        tsv = tg->stateSkip(tsv);
     }
-
-    if (found == false) {
-        grepSkip(tg,matchBegin,buff);
-    } else if (found == true && (cc->nextState.bitContainer | 2)) { // Accept
-        grepMatch(tg,matchBegin,buff);
-    } else {
-        grep(tg,matchBegin,buff,cc->nextState.bitContainer);
-    }
+#if DEBUG
+    *tsvp = tsv;
+#endif
 }
 
--- a/regexParser/grepWalk.h	Wed Feb 03 12:24:34 2016 +0900
+++ b/regexParser/grepWalk.h	Wed Feb 03 13:11:10 2016 +0900
@@ -1,3 +1,3 @@
 #include "regexParser.h"
 
-extern void grepWalk(TransitionGeneratorPtr tg, unsigned char *matchBegin, Buffer buff);
+extern void grepWalk(TransitionGeneratorPtr tg, Buffer buff);
--- a/regexParser/threadedSearch.cc	Wed Feb 03 12:24:34 2016 +0900
+++ b/regexParser/threadedSearch.cc	Wed Feb 03 13:11:10 2016 +0900
@@ -12,7 +12,6 @@
 
 static
 TSValue stateSkip(TSValue tsv) {
-    tsv.current = tsv.tg->stateStart->tState;
     if (tsv.matchEnd) {
         fwrite(tsv.matchBegin,tsv.matchEnd-tsv.matchBegin,1,stdout);
         puts("");
@@ -100,6 +99,7 @@
         for (int i = 0; i < tsv.current->ccvSize; i++) {
             CCVPtr ccv = &tsv.current->ccv[i];
             if (c<ccv->begin) {
+                tsv.current = tsv.tg->stateStart->tState;
                 tsv = tsv.current->stateSkip(tsv);
                 goto next;
             } else if (c<=ccv->end) {
@@ -117,6 +117,7 @@
                 goto next;
             }
         }
+        tsv.current = tsv.tg->stateStart->tState;
         tsv = tsv.current->stateSkip(tsv);
     }
 #if DEBUG