diff c/regexParser/main.cc @ 82:1d9bbf922bb6

add createRegexTree.cc
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Wed, 07 Oct 2015 18:13:27 +0900
parents 27883946b2dc
children 912d7bd51f38
line wrap: on
line diff
--- a/c/regexParser/main.cc	Wed Oct 07 16:47:03 2015 +0900
+++ b/c/regexParser/main.cc	Wed Oct 07 18:13:27 2015 +0900
@@ -11,141 +11,9 @@
 #include <string.h>
 #include "regexParser.h"
 
-NodePtr createNode(unsigned char,NodePtr,NodePtr);
-NodePtr charClass(RegexInfoPtr);
-NodePtr group(RegexInfoPtr);
-void token(RegexInfoPtr);
-NodePtr regexAtom(RegexInfoPtr);
-NodePtr regex(RegexInfoPtr);
+extern NodePtr regex(RegexInfoPtr);
 extern void printTree(NodePtr);
 
-/**
- * Create a node of regex parse tree.
- *     tokenType
- *     regexPosition(state)
- *     stateTransitionTable
- */
-NodePtr createNode(unsigned char character, NodePtr left, NodePtr right) {
-    NodePtr n = (NodePtr)malloc(sizeof(Node));
-    n->self = n;
-    n->Value.character = character;
-    n->left = left;
-    n->right = right;
-    return n;
-}
-
-// <charClass> ::= '['<literal>'-'<literal>']'
-NodePtr charClass(RegexInfoPtr ri) {
-    NodePtr n = (NodePtr)malloc(sizeof(Node));
-    unsigned char startChar = ri->ptr[0];
-    while (ri->ptr[0] == '-') {
-        ri->ptr++;
-    }
-    unsigned char endChar = ri->ptr[0];
-    unsigned char *charTable = (unsigned char*)malloc(sizeof(char)*256);
-
-    return n;
-}
-
-// <literal> ::= [a-z][A-Z][0-9]
-NodePtr literal(RegexInfoPtr ri) {
-    NodePtr n = createNode(ri->ptr[0],0,0);
-    ri->ptr++;
-    return n;
-}
-
-// <group> ::= '('<regex>')'
-NodePtr group(RegexInfoPtr ri) {
-    return regex(ri);
-}
-
-
-
-void token(RegexInfoPtr ri) {
-    while (ri->ptr[0] != '\0') {
-        if (ri->ptr[0] == '('){
-            ri->ptr++;
-            ri->tokenType = '(';
-            ri->tokenValue = 0;
-            if (ri->ptr[1] == ')') {
-                ri->ptr++;
-            }
-            return;
-        } else if (ri->ptr[0] == ')') {
-            ri->ptr++;
-            ri->tokenType = ')';
-            ri->tokenValue = ri->ptr[0];
-            return;
-        } else if (ri->ptr[0] == '[') {
-            ri->ptr++;
-            ri->tokenType = '[';
-            ri->tokenValue = ri->ptr[0];
-            if (ri->ptr[1] == ']') {
-                ri->ptr++;
-            }
-            return;
-        } else if (ri->ptr[0] == '|'){
-            ri->ptr++;
-            ri->tokenType = '|';
-            ri->tokenValue = 0;
-            return;
-        } else if (ri->ptr[0] == '*'){
-            ri->ptr++;
-            ri->tokenType = '*';
-            ri->tokenValue = 0;
-            return;
-        } else if (ri->ptr[0] == '\\'){
-            // need more proccesing 
-            /*
-                \277
-                \0xa5
-                \[
-                \\
-                \utf-8 etc...
-            */
-        } else {
-            ri->tokenType = 'a';
-            ri->tokenValue = ri->ptr[0];
-            return;
-        }
-    }
-
-    ri->tokenType = 0;
-    ri->tokenValue = 0;
-    return;
-}
-
-// <regexAtom> ::= <literal>|<charClass>|<group>
-NodePtr regexAtom(RegexInfoPtr ri) {
-
-    token(ri);
-    NodePtr n = NULL;
-    if (ri->tokenType == 'a') n = literal(ri);
-    else if (ri->tokenType == '[') n = charClass(ri);
-    else if (ri->tokenType == '(') n = group(ri);
-
-    return n;
-}
-
-// <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex>
-NodePtr regex(RegexInfoPtr ri) {
-    NodePtr n = regexAtom(ri);
-    while (ri->ptr[0]) {
-        token(ri);
-        if (ri->tokenType == '*') {
-            n = createNode('*',n,0);
-        } else if (ri->tokenType == '|') {
-            NodePtr n1 = regex(ri);
-            n = createNode('|',n,n1);
-        } else if (ri->tokenType == ')') {
-            return n;
-        } else {
-            NodePtr n1 = regex(ri);
-            n = createNode('+',n,n1);
-        }
-    } return n;
-}
-
 
 int main(int argc, char **argv)
 {