Mercurial > hg > Applications > Grep
view c/regexParser/main.cc @ 63:8fd3d35e9861
add token function
author | masa |
---|---|
date | Thu, 23 Jul 2015 18:01:02 +0900 |
parents | a49b4a8b8c14 |
children | e0ad6c145f89 |
line wrap: on
line source
/* * <literal> ::= [a-z][A-Z][0-9] * <charClass> ::= '['<literal>'-'<literal>']' * <string> ::= <literal><literal>* * <group> ::= '('<regex>')' * <or> ::= <regex>'|'<regex> * <*> ::= <regex>'*' * <regex> ::= <string>|<or>|<charClass>|<group>|<*> */ #include <stdio.h> #include <stdlib.h> #include <string.h> typedef struct charClass { unsigned char table[256]; struct utf8Range { unsigned char *begin; unsigned char *end; struct utf8Range next; } *rangeList; } typedef struct node { unsigned char type; union value { charClass *cc; unsigned char *string; } struct node *self; struct node *left; struct node *right; } Node, *NodePtr; char *ptr; NodePtr regexHeadNode; NodePtr charClass(); NodePtr string(); NodePtr group(); NodePtr orexp(); NodePtr asterisk(); NodePtr regex(); NodePtr createNode(char,NodePtr,NodePtr); bool isLiteral(char c) { if (*ptr > 0x7f) return true; else if (*ptr == '(') return false; else if (*ptr == '[') return false; else if (*ptr == '|') return false; else if (*ptr == '*') return false; return true; } void printNodeDate(NodePtr n) { puts("---------------------"); printf("Self Node char : %c\n", n->character); printf("Self Node addr : %p\n", n->self); printf("left Node addr : %p\n", n->left); printf("right Node addr : %p\n", n->right); puts("---------------------"); puts(""); } NodePtr createNode(char character, NodePtr left, NodePtr right) { NodePtr n = (NodePtr)malloc(sizeof(Node)); n->self = n; n->character = character; n->left = left; n->right = right; printNodeDate(n); return n; } // <charClass> ::= '['<literal>'-'<literal>']' NodePtr charClass() { ptr++; NodePtr n = (NodePtr)malloc(sizeof(Node)); return n; } // <literal> ::= [a-z][A-Z][0-9] NodePtr literal() { char c = *ptr; NodePtr n = createNode(c,0,0); ptr++; return n; } // <string> ::= <literal><literal>* NodePtr string() { char c = *ptr; NodePtr n = NULL; if (isLiteral(c)) { n = createNode(0,literal(),string()); } else { n = createNode(0,0,0); } return n; } // <group> ::= '('<regex>')' NodePtr group() { token(); NodePtr n = regex(); token(); if (*ptr == ')') { n = createNode('(',n,0); } else { // ) reqiured } return n; } // <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex> NodePtr regex() { NodePtr n = regexAtom(); while (*ptr) { token(); if (tokenType == '*') { n = createNode('*',n,0); } else if (tokenType == '|') { NodePtr n1 = regex(); n = createNode('|',n,n1); } else { NodePtr n1 = regex(); n = createNode('+',n,n1); } } } // <regexAtom> ::= <literal>|<charClass>|<group> NodePtr regexAtom() { token(); NodePter n; if (tokenType == 'a') n = literal(); else if (tokenType == '[') n = charClass(); else if (tokenType == '(') n = group(); return n; } void token() { while (*ptr != '\0') { if ((*ptr == '(') || (*ptr == ')')) { tokenType = *ptr++; tokenValue = 0; return ; } else if (*ptr == '[') { tokenType = '['; tokenValue = ptr; if (ptr[1] == ']') { ptr++; } while (*ptr != ']') ptr++; ptr++; return; } else if (*ptr == '|'){ tokenType = '|'; tokenValue = 0; return; } else if (*ptr == '*'){ tokenType = '*'; tokenValue = 0; return; } tokenType = 'a'; tokenValue = ptr; if (*ptr == '\\') ptr++; // need more proccesing /* \277 \0xa5 \[ \\ \utf-8 etc... */ } } int main(int argc, char **argv) { for (int i = 1; i < argc; i++) { if (strcmp(argv[i],"-regex") == 0) { ptr = argv[i+1]; i++; } } printf("regex : %s\n",ptr); NodePtr n = regex(); return 0; }