Mercurial > hg > Members > masakoha > testcode
view c/regexParser/main.cc @ 66:f8fb3b463f70
fix when '|' come procces
author | Masataka Kohagura <e085726@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 04 Aug 2015 16:41:41 +0900 |
parents | 20b7d4e958bb |
children | 4842ca2cf8ee |
line wrap: on
line source
/* * <literal> ::= [a-z][A-Z][0-9] * <charClass> ::= '['<literal>'-'<literal>']' * <string> ::= <literal><literal>* * <group> ::= '('<regex>')' * <regexAtom> ::= <literal>|<charClass>|<group> * <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex> */ #include <stdio.h> #include <stdlib.h> #include <string.h> typedef struct charClass { unsigned char table[256]; struct utf8Range { unsigned char *begin; unsigned char *end; struct utf8Range *next; } *rangeList; } CharClass, *CharClassPtr; typedef struct node { unsigned char type; union value { charClass *cc; unsigned char *string; }; struct node *self; struct node *left; struct node *right; } Node, *NodePtr; unsigned char *ptr; unsigned char tokenType; int tokenValue; NodePtr regexHeadNode; NodePtr charClass(); NodePtr string(); NodePtr group(); NodePtr orexp(); NodePtr asterisk(); NodePtr regex(); NodePtr createNode(char,NodePtr,NodePtr); extern void token(); extern NodePtr regexAtom(); bool isLiteral(char c) { if (*ptr > 0x7f) return true; else if (*ptr == '(') return false; else if (*ptr == '[') return false; else if (*ptr == '|') return false; else if (*ptr == '*') return false; return true; } void printNodeDate(NodePtr n) { puts("---------------------"); // printf("Self Node char : %c\n", n->character); printf("Self Node addr : %p\n", n->self); printf("left Node addr : %p\n", n->left); printf("right Node addr : %p\n", n->right); puts("---------------------"); puts(""); } NodePtr createNode(char character, NodePtr left, NodePtr right) { NodePtr n = (NodePtr)malloc(sizeof(Node)); n->self = n; // n->character = character; n->left = left; n->right = right; printNodeDate(n); return n; } // <charClass> ::= '['<literal>'-'<literal>']' NodePtr charClass() { ptr++; NodePtr n = (NodePtr)malloc(sizeof(Node)); return n; } // <literal> ::= [a-z][A-Z][0-9] NodePtr literal() { NodePtr n = createNode(*ptr,0,0); ptr++; return n; } // <string> ::= <literal><literal>* NodePtr string() { char c = *ptr; NodePtr n = NULL; if (isLiteral(c)) { n = createNode(0,literal(),string()); } else { n = createNode(0,0,0); } return n; } // <group> ::= '('<regex>')' NodePtr group() { token(); NodePtr n = regex(); token(); if (*ptr == ')') { n = createNode('(',n,0); } else { // ) reqiured } return n; } void token() { while (*ptr != '\0') { if ((*ptr == '(') || (*ptr == ')')) { tokenType = *ptr++; tokenValue = 0; return ; } else if (*ptr == '[') { tokenType = '['; tokenValue = *ptr; if (ptr[1] == ']') { ptr++; } while (*ptr != ']') ptr++; ptr++; return; } else if (*ptr == '|'){ tokenType = '|'; tokenValue = 0; return; } else if (*ptr == '*'){ tokenType = '*'; tokenValue = 0; return; } tokenType = 'a'; tokenValue = *ptr; return; if (*ptr == '\\') ptr++; // need more proccesing /* \277 \0xa5 \[ \\ \utf-8 etc... */ } } // <regexAtom> ::= <literal>|<charClass>|<group> NodePtr regexAtom() { token(); NodePtr n = NULL; if (tokenType == 'a') n = literal(); else if (tokenType == '[') n = charClass(); else if (tokenType == '(') n = group(); return n; } // <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex> NodePtr regex() { NodePtr n = regexAtom(); while (*ptr) { token(); if (tokenType == '*') { n = createNode('*',n,0); ptr++; } else if (tokenType == '|') { ptr++; NodePtr n1 = regex(); n = createNode('|',n,n1); } else { NodePtr n1 = regex(); n = createNode('+',n,n1); } } return n; } /* * e.g. * * % ./regexParser -regex abc * * #-c * #-+ * # #-b * + * #-a * * % ./regexParser -regex (a*|bc)d * * * #-d * + * # #-c * # #-+ * # # #-b * #-| * # * #-* * #-a * */ void printTree(NodePtr n) { } int main(int argc, char **argv) { for (int i = 1; i < argc; i++) { if (strcmp(argv[i],"-regex") == 0) { ptr = (unsigned char*)argv[i+1]; i++; } } printf("regex : %s\n",ptr); NodePtr n = regex(); printTree(n); return 0; }