changeset 135:e1a262ec75f0 pairPro

impl charclass
author masa
date Fri, 04 Dec 2015 19:07:10 +0900
parents dbafc753078e
children 15815fcb6c2f
files c/regexParser/Makefile c/regexParser/node.cc c/regexParser/regexParser.cc
diffstat 3 files changed, 56 insertions(+), 33 deletions(-) [+]
line wrap: on
line diff
--- a/c/regexParser/Makefile	Fri Dec 04 17:45:09 2015 +0900
+++ b/c/regexParser/Makefile	Fri Dec 04 19:07:10 2015 +0900
@@ -25,22 +25,22 @@
 	rm -f *~ \#*
 
 test:
-	./$(TARGET) -regex "a"
-	./$(TARGET) -regex "ab"
-	./$(TARGET) -regex "ab*"
-	./$(TARGET) -regex "(ab)*"
-	./$(TARGET) -regex "(ab)*c"
-	./$(TARGET) -regex "(ab)c"
-	./$(TARGET) -regex "(a|b)c"
-	./$(TARGET) -regex "a(bc)*d"
-	./$(TARGET) -regex "abc*d"
-	./$(TARGET) -regex "(ab)c*d"
-	./$(TARGET) -regex "a(b)c"
-	./$(TARGET) -regex "(a|b|c)d"
-	./$(TARGET) -regex "(a|b|c)*d"
-	./$(TARGET) -regex "[a]"
-	./$(TARGET) -regex "[ab]"
-	./$(TARGET) -regex "[a-z]"
-	./$(TARGET) -regex "[a-zA-Z]"
-	./$(TARGET) -regex "[a-zA-Z]*"
-	./$(TARGET) -regex "a[a-zA-Z]*"
+	./$(TARGET) -regex 'a'
+	./$(TARGET) -regex 'ab'
+	./$(TARGET) -regex 'ab*'
+	./$(TARGET) -regex '(ab)*'
+	./$(TARGET) -regex '(ab)*c'
+	./$(TARGET) -regex '(ab)c'
+	./$(TARGET) -regex '(a|b)c'
+	./$(TARGET) -regex 'a(bc)*d'
+	./$(TARGET) -regex 'abc*d'
+	./$(TARGET) -regex '(ab)c*d'
+	./$(TARGET) -regex 'a(b)c'
+	./$(TARGET) -regex '(a|b|c)d'
+	./$(TARGET) -regex '(a|b|c)*d'
+	./$(TARGET) -regex '[a]'
+	./$(TARGET) -regex '[ab]'
+	./$(TARGET) -regex '[a-z]'
+	./$(TARGET) -regex '[a-zA-Z]'
+	./$(TARGET) -regex '[a-zA-Z]*'
+	./$(TARGET) -regex 'a[a-zA-Z]*'
--- a/c/regexParser/node.cc	Fri Dec 04 17:45:09 2015 +0900
+++ b/c/regexParser/node.cc	Fri Dec 04 19:07:10 2015 +0900
@@ -1,20 +1,32 @@
 #include <stdio.h>
 #include "node.h"
 
+static void printCharacterClass(CharClassPtr cc, int d) {
+    if (cc->type == 'r') {
+        printf("%*c",d*4, ' ');
+        for (RangeListPtr range = cc->cond->range; range; range = range->next) {
+           printf("%c-%c ",*range->begin,*range->end);
+        }
+        printf("\n");
+    }
+}
+
 static void descendTree(NodePtr n, int d) {
     if (n->left != NULL) {
         d++;
         descendTree(n->left, d);
         d--;
     }
-    if (n->tokenType != 'a') {
-        printf("%*c%c(%lu)\n",d*4, ' ',n->tokenType,n->nodeNumber);
-    } else {
+    if (n->tokenType == 'a') {
         printf("%*c",d*4, ' ');
         for (int i = 0; i < n->cc->cond->w->length; i++) {
             putchar(n->cc->cond->w->word[i]);
         }
         printf("(%lu)\n",n->nodeNumber);
+    } else if (n->tokenType == 'c') {
+        printCharacterClass(n->cc,d);
+    } else {
+        printf("%*c%c(%lu)\n",d*4, ' ',n->tokenType,n->nodeNumber);
     }
 
     if (n->right != NULL) {
--- a/c/regexParser/regexParser.cc	Fri Dec 04 17:45:09 2015 +0900
+++ b/c/regexParser/regexParser.cc	Fri Dec 04 19:07:10 2015 +0900
@@ -57,33 +57,39 @@
 // <charClass> ::= '['<literal>'-'<literal>']'
 static
 NodePtr charClass(RegexInfoPtr ri) {
-
     CharClassPtr cc = NEW(CharClass);
+    NodePtr n = createNode(ri,'c',cc,0,0);
     cc->type = 'r';
     cc->cond = NEW(Condition);
     cc->cond->range = NEW(RangeList);
     cc->cond->range->begin = ri->ptr;
-    cc->cond->range->end = ri->ptr + 1;
+    cc->cond->range->end = ri->ptr;
     cc->cond->range->next = NULL;
 
-    int i = 0;
 
     RangeListPtr rangeList = cc->cond->range;
 
-    while (ri->ptr[i] != ']') {
-        if (ri->ptr[i] == '-') i++;
-
-        rangeList->end = ri->ptr + i;
+    for (ri->ptr++; *ri->ptr && *ri->ptr != ']'; ri->ptr++) {
+        if (*ri->ptr == '-') {
+            rangeList->end = ri->ptr + 1;
+            ri->ptr++;
+            continue;
+        }
+        if (ri->ptr[0] == 0 || ri->ptr[0] == ']') break;
+        if (ri->ptr[0] == rangeList->end[0] + 1) {
+            rangeList->end = ri->ptr;
+            continue;
+        }
         rangeList->next = NEW(RangeList);
         rangeList = rangeList->next;
-        rangeList->begin = ri->ptr+i+1;
+        rangeList->begin = ri->ptr;
+        rangeList->end = ri->ptr;
         rangeList->next = NULL;
-        i++;
     }
             // TODO literal support
 
-    rangeList->end = ri->ptr + i - 1;
-    NodePtr n = createNode(ri,'c',cc,0,0);
+    if (*ri->ptr) ri->ptr++;
+    token(ri);
     token(ri);
     return n;
 }
@@ -114,6 +120,11 @@
             ri->tokenType = 'c';
             ri->tokenValue = ri->ptr;
             return;
+        } else if (ri->ptr[0] == ']') {
+            ri->ptr++;
+            ri->tokenType = ']';
+            ri->tokenValue = ri->ptr;
+            return;
         } else if (ri->ptr[0] == '|'){
             ri->ptr++;
             ri->tokenType = '|';