# HG changeset patch # User Masataka Kohagura # Date 1455852054 -32400 # Node ID 7e40d0e6fba04e6a277a06a43dd70df56a443a19 # Parent 8223a72a80e5ff0a6e53ebb2b1ff3193070b2a5f add tSearch generateTState source diff -r 8223a72a80e5 -r 7e40d0e6fba0 slide/s6/index.html --- a/slide/s6/index.html Fri Feb 19 11:49:29 2016 +0900 +++ b/slide/s6/index.html Fri Feb 19 12:20:54 2016 +0900 @@ -434,6 +434,43 @@
+

文字クラスの構造体

+
+typedef struct utf8Range {
+    unsigned long begin;
+    unsigned long end;
+} RangeList , *RangeListPtr;
+
+typedef struct condition {
+    RangeList range;
+    Word w;
+} Condition, *ConditionList;
+
+typedef struct charClass {
+    struct charClass *left;
+    struct charClass *right;
+    Condition cond;
+    int stateNum;
+    BitVector nextState;
+} CharClass, *CharClassPtr;
+
+ +
+ +

正規表現木をオートマトンの状態遷移に沿って状態割当


@@ -498,13 +535,25 @@
@@ -555,43 +604,6 @@
-

文字クラスの構造体

-
-typedef struct utf8Range {
-    unsigned long begin;
-    unsigned long end;
-} RangeList , *RangeListPtr;
-
-typedef struct condition {
-    RangeList range;
-    Word w;
-} Condition, *ConditionList;
-
-typedef struct charClass {
-    struct charClass *left;
-    struct charClass *right;
-    Condition cond;
-    int stateNum;
-    BitVector nextState;
-} CharClass, *CharClassPtr;
-
- -
- -

Subset Construction


- + + +
+

並列処理時の正規表現のマッチング

+
+TSValue tSearch(TSValue tsv) {
+    next: while (tsv.buff.buffptr < tsv.buff.buffend) {
+        tsv = tsv.current->stateMatch(tsv);
+        if (tsv.current->ccvSize==0) {
+            tsv.current = tsv.tg->stateStart->tState;
+        }
+        unsigned char c = *tsv.buff.buffptr++;
+        for (int i = 0; i < tsv.current->ccvSize; i++) {
+            CCVPtr ccv = &tsv.current->ccv[i];
+            if (cbegin) {
+                tsv.current = tsv.tg->stateStart->tState;
+                tsv = tsv.current->stateSkip(tsv);
+                goto next;
+            } else if (c<=ccv->end) {
+                // range matched.
+                if (ccv->w.word) {
+                    // match the word.
+                    // if (not match) continue;
+                }
+                if (ccv->tState) {
+                    tsv.current = ccv->tState;
+                } else {
+                    tsv.current = nextTState(ccv->state,tsv.tg);
+                    ccv->tState = tsv.current;
+                }
+                goto next;
+            }
+        }
+        tsv.current = tsv.tg->stateStart->tState;
+        tsv = tsv.current->stateSkip(tsv);
+    }
+    return tsv;
+}
+
+
+ +

正規表現の整合性