Mercurial > hg > Members > masakoha > testcode
diff c/regexParser/main.cc @ 82:1d9bbf922bb6
add createRegexTree.cc
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 07 Oct 2015 18:13:27 +0900 |
parents | 27883946b2dc |
children | 912d7bd51f38 |
line wrap: on
line diff
--- a/c/regexParser/main.cc Wed Oct 07 16:47:03 2015 +0900 +++ b/c/regexParser/main.cc Wed Oct 07 18:13:27 2015 +0900 @@ -11,141 +11,9 @@ #include <string.h> #include "regexParser.h" -NodePtr createNode(unsigned char,NodePtr,NodePtr); -NodePtr charClass(RegexInfoPtr); -NodePtr group(RegexInfoPtr); -void token(RegexInfoPtr); -NodePtr regexAtom(RegexInfoPtr); -NodePtr regex(RegexInfoPtr); +extern NodePtr regex(RegexInfoPtr); extern void printTree(NodePtr); -/** - * Create a node of regex parse tree. - * tokenType - * regexPosition(state) - * stateTransitionTable - */ -NodePtr createNode(unsigned char character, NodePtr left, NodePtr right) { - NodePtr n = (NodePtr)malloc(sizeof(Node)); - n->self = n; - n->Value.character = character; - n->left = left; - n->right = right; - return n; -} - -// <charClass> ::= '['<literal>'-'<literal>']' -NodePtr charClass(RegexInfoPtr ri) { - NodePtr n = (NodePtr)malloc(sizeof(Node)); - unsigned char startChar = ri->ptr[0]; - while (ri->ptr[0] == '-') { - ri->ptr++; - } - unsigned char endChar = ri->ptr[0]; - unsigned char *charTable = (unsigned char*)malloc(sizeof(char)*256); - - return n; -} - -// <literal> ::= [a-z][A-Z][0-9] -NodePtr literal(RegexInfoPtr ri) { - NodePtr n = createNode(ri->ptr[0],0,0); - ri->ptr++; - return n; -} - -// <group> ::= '('<regex>')' -NodePtr group(RegexInfoPtr ri) { - return regex(ri); -} - - - -void token(RegexInfoPtr ri) { - while (ri->ptr[0] != '\0') { - if (ri->ptr[0] == '('){ - ri->ptr++; - ri->tokenType = '('; - ri->tokenValue = 0; - if (ri->ptr[1] == ')') { - ri->ptr++; - } - return; - } else if (ri->ptr[0] == ')') { - ri->ptr++; - ri->tokenType = ')'; - ri->tokenValue = ri->ptr[0]; - return; - } else if (ri->ptr[0] == '[') { - ri->ptr++; - ri->tokenType = '['; - ri->tokenValue = ri->ptr[0]; - if (ri->ptr[1] == ']') { - ri->ptr++; - } - return; - } else if (ri->ptr[0] == '|'){ - ri->ptr++; - ri->tokenType = '|'; - ri->tokenValue = 0; - return; - } else if (ri->ptr[0] == '*'){ - ri->ptr++; - ri->tokenType = '*'; - ri->tokenValue = 0; - return; - } else if (ri->ptr[0] == '\\'){ - // need more proccesing - /* - \277 - \0xa5 - \[ - \\ - \utf-8 etc... - */ - } else { - ri->tokenType = 'a'; - ri->tokenValue = ri->ptr[0]; - return; - } - } - - ri->tokenType = 0; - ri->tokenValue = 0; - return; -} - -// <regexAtom> ::= <literal>|<charClass>|<group> -NodePtr regexAtom(RegexInfoPtr ri) { - - token(ri); - NodePtr n = NULL; - if (ri->tokenType == 'a') n = literal(ri); - else if (ri->tokenType == '[') n = charClass(ri); - else if (ri->tokenType == '(') n = group(ri); - - return n; -} - -// <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex> -NodePtr regex(RegexInfoPtr ri) { - NodePtr n = regexAtom(ri); - while (ri->ptr[0]) { - token(ri); - if (ri->tokenType == '*') { - n = createNode('*',n,0); - } else if (ri->tokenType == '|') { - NodePtr n1 = regex(ri); - n = createNode('|',n,n1); - } else if (ri->tokenType == ')') { - return n; - } else { - NodePtr n1 = regex(ri); - n = createNode('+',n,n1); - } - } return n; -} - int main(int argc, char **argv) {