Mercurial > hg > Members > masakoha > testcode
view c/regexParser/main.cc @ 74:b11dfc48f698
fix grouping function
author | masa |
---|---|
date | Fri, 28 Aug 2015 18:35:14 +0900 |
parents | 0a5dd7486bb2 |
children | 6541eae41a73 |
line wrap: on
line source
/* * <literal> ::= [a-z][A-Z][0-9] * <charClass> ::= '['<literal>'-'<literal>']' * <group> ::= '('<regex>')' * <regexAtom> ::= <literal>|<charClass>|<group> * <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex> */ #include <stdio.h> #include <stdlib.h> #include <string.h> typedef struct charClass { unsigned char table[256]; struct utf8Range { unsigned char *begin; unsigned char *end; struct utf8Range *next; } *rangeList; } CharClass, *CharClassPtr; typedef struct node { unsigned char type; union value { charClass *cc; unsigned char character; } Value, *ValuePtr; struct node *self; struct node *parent; struct node *left; struct node *right; } Node, *NodePtr; unsigned char *ptr; unsigned char tokenType; int tokenValue; NodePtr regexHeadNode; NodePtr charClass(); NodePtr group(); NodePtr orexp(); NodePtr asterisk(); NodePtr regex(); NodePtr createNode(unsigned char,NodePtr,NodePtr); extern void token(); extern NodePtr regexAtom(); bool isLiteral(char c) { if (*ptr > 0x7f) return true; else if (*ptr == '(') return false; else if (*ptr == '[') return false; else if (*ptr == '|') return false; else if (*ptr == '*') return false; return true; } NodePtr createNode(unsigned char character, NodePtr left, NodePtr right) { NodePtr n = (NodePtr)malloc(sizeof(Node)); n->self = n; n->Value.character = character; n->left = left; n->right = right; return n; } // <charClass> ::= '['<literal>'-'<literal>']' NodePtr charClass() { NodePtr n = (NodePtr)malloc(sizeof(Node)); unsigned char startChar = *ptr; while (*ptr == '-') { ptr++; } unsigned char endChar = *ptr; unsigned char *charTable = (unsigned char*)malloc(sizeof(char)*256); return n; } // <literal> ::= [a-z][A-Z][0-9] NodePtr literal() { NodePtr n = createNode(*ptr,0,0); ptr++; return n; } // <group> ::= '('<regex>')' NodePtr group() { NodePtr n = NULL; if (*ptr == ')') { n = createNode('(',n,0); } else { n = createNode('+', regex(),0); } return n; } void token() { while (*ptr != '\0') { if (*ptr == '('){ ptr++; tokenType = '('; tokenValue = 0; if (ptr[1] == ')') { ptr++; } return; } else if (*ptr == ')') { ptr++; tokenType = ')'; tokenValue = *ptr; return; } else if (*ptr == '[') { ptr++; tokenType = '['; tokenValue = *ptr; if (ptr[1] == ']') { ptr++; } return; } else if (*ptr == '|'){ ptr++; tokenType = '|'; tokenValue = 0; return; } else if (*ptr == '*'){ ptr++; tokenType = '*'; tokenValue = 0; return; } else if (*ptr == '\\'){ // need more proccesing /* \277 \0xa5 \[ \\ \utf-8 etc... */ } else { tokenType = 'a'; tokenValue = *ptr; return; } } tokenType = 0; tokenValue = 0; return; } // <regexAtom> ::= <literal>|<charClass>|<group> NodePtr regexAtom() { token(); NodePtr n = NULL; if (tokenType == 'a') n = literal(); else if (tokenType == '[') n = charClass(); else if (tokenType == '(') n = group(); return n; } // <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex> NodePtr regex() { NodePtr n = regexAtom(); while (*ptr) { token(); if (tokenType == '*') { n = createNode('*',n,0); } else if (tokenType == '|') { NodePtr n1 = regex(); n = createNode('|',n,n1); } else if (tokenType == ')') { return n; } else { NodePtr n1 = regex(); n = createNode('+',n,n1); } } return n; } void descendTree(NodePtr n) { static int d = 0; if (n->right != NULL) { d++; descendTree(n->right); d--; } printf("%*c%c\n",d*4, ' ',n->Value.character); if (n->left != NULL) { d++; descendTree(n->left); d--; } } void printTree(NodePtr n) { puts("---Print Node----"); descendTree(n); puts("-----------------"); } int main(int argc, char **argv) { for (int i = 1; i < argc; i++) { if (strcmp(argv[i],"-regex") == 0) { ptr = (unsigned char*)argv[i+1]; i++; } } printf("regex : %s\n",ptr); NodePtr n = regex(); printTree(n); return 0; }