Mercurial > hg > Applications > Grep
view c/regexParser/main.cc @ 81:27883946b2dc
sort function
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 07 Oct 2015 16:47:03 +0900 |
parents | 0a452d69f0e2 |
children | 1d9bbf922bb6 |
line wrap: on
line source
/* * <literal> ::= [a-z][A-Z][0-9] * <charClass> ::= '['<literal>'-'<literal>']' * <group> ::= '('<regex>')' * <regexAtom> ::= <literal>|<charClass>|<group> * <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex> */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include "regexParser.h" NodePtr createNode(unsigned char,NodePtr,NodePtr); NodePtr charClass(RegexInfoPtr); NodePtr group(RegexInfoPtr); void token(RegexInfoPtr); NodePtr regexAtom(RegexInfoPtr); NodePtr regex(RegexInfoPtr); extern void printTree(NodePtr); /** * Create a node of regex parse tree. * tokenType * regexPosition(state) * stateTransitionTable */ NodePtr createNode(unsigned char character, NodePtr left, NodePtr right) { NodePtr n = (NodePtr)malloc(sizeof(Node)); n->self = n; n->Value.character = character; n->left = left; n->right = right; return n; } // <charClass> ::= '['<literal>'-'<literal>']' NodePtr charClass(RegexInfoPtr ri) { NodePtr n = (NodePtr)malloc(sizeof(Node)); unsigned char startChar = ri->ptr[0]; while (ri->ptr[0] == '-') { ri->ptr++; } unsigned char endChar = ri->ptr[0]; unsigned char *charTable = (unsigned char*)malloc(sizeof(char)*256); return n; } // <literal> ::= [a-z][A-Z][0-9] NodePtr literal(RegexInfoPtr ri) { NodePtr n = createNode(ri->ptr[0],0,0); ri->ptr++; return n; } // <group> ::= '('<regex>')' NodePtr group(RegexInfoPtr ri) { return regex(ri); } void token(RegexInfoPtr ri) { while (ri->ptr[0] != '\0') { if (ri->ptr[0] == '('){ ri->ptr++; ri->tokenType = '('; ri->tokenValue = 0; if (ri->ptr[1] == ')') { ri->ptr++; } return; } else if (ri->ptr[0] == ')') { ri->ptr++; ri->tokenType = ')'; ri->tokenValue = ri->ptr[0]; return; } else if (ri->ptr[0] == '[') { ri->ptr++; ri->tokenType = '['; ri->tokenValue = ri->ptr[0]; if (ri->ptr[1] == ']') { ri->ptr++; } return; } else if (ri->ptr[0] == '|'){ ri->ptr++; ri->tokenType = '|'; ri->tokenValue = 0; return; } else if (ri->ptr[0] == '*'){ ri->ptr++; ri->tokenType = '*'; ri->tokenValue = 0; return; } else if (ri->ptr[0] == '\\'){ // need more proccesing /* \277 \0xa5 \[ \\ \utf-8 etc... */ } else { ri->tokenType = 'a'; ri->tokenValue = ri->ptr[0]; return; } } ri->tokenType = 0; ri->tokenValue = 0; return; } // <regexAtom> ::= <literal>|<charClass>|<group> NodePtr regexAtom(RegexInfoPtr ri) { token(ri); NodePtr n = NULL; if (ri->tokenType == 'a') n = literal(ri); else if (ri->tokenType == '[') n = charClass(ri); else if (ri->tokenType == '(') n = group(ri); return n; } // <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex> NodePtr regex(RegexInfoPtr ri) { NodePtr n = regexAtom(ri); while (ri->ptr[0]) { token(ri); if (ri->tokenType == '*') { n = createNode('*',n,0); } else if (ri->tokenType == '|') { NodePtr n1 = regex(ri); n = createNode('|',n,n1); } else if (ri->tokenType == ')') { return n; } else { NodePtr n1 = regex(ri); n = createNode('+',n,n1); } } return n; } int main(int argc, char **argv) { RegexInfoPtr ri = (RegexInfoPtr)malloc(sizeof(RegexInfo)); for (int i = 1; i < argc; i++) { if (strcmp(argv[i],"-regex") == 0) { ri->ptr = (unsigned char*)argv[i+1]; i++; } } printf("regex : %s\n",ri->ptr); NodePtr n = regex(ri); printTree(n); return 0; }