Mercurial > hg > Applications > Grep
view c/regexParser/regexParser.cc @ 121:aa266a4db47c pairPro
merge
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 26 Nov 2015 21:17:26 +0900 |
parents | 2f0653f8eabb 5d29b6a1b50f |
children | 188d866227a4 |
line wrap: on
line source
#include <stdlib.h> #include <stdio.h> #include "regexParser.h" #include "error.h" static NodePtr allocateNode(); static NodePtr createNode(RegexInfoPtr,unsigned char*,NodePtr,NodePtr); static NodePtr charClass(RegexInfoPtr); static NodePtr group(RegexInfoPtr); static void token(RegexInfoPtr); static NodePtr regexAtom(RegexInfoPtr); NodePtr regex(RegexInfoPtr); /** * Create a node of regex parse tree. * tokenType * regexPosition(state) * stateTransitionTable */ static NodePtr allocateNode() { NodePtr n = (NodePtr)malloc(sizeof(node)); n->cc = (CharClassPtr)malloc(sizeof(CharClass)); n->cc->cond = (ConditionList)malloc(sizeof(Condition)); return n; } static NodePtr createNode(RegexInfoPtr ri,unsigned char *character, NodePtr left, NodePtr right) { NodePtr n = allocateNode(); if (n == NULL) { mallocFailedMessage(); } n->tokenType = ri->tokenType; n->left = left; n->right = right; n->nodeNumber = ri->nodeNumber; ri->nodeNumber++; if (ri->tokenType == 'a') { ri->tokenType = 0; n->cc->cond->w = getWord(ri->tokenValue); ri->ptr += n->cc->cond->w->length-1; } else { WordPtr w = (WordPtr)malloc(sizeof(Word)); w->word = character; w->length = 1; n->cc->cond->w = w; } return n; } // <charClass> ::= '['<literal>'-'<literal>']' static NodePtr charClass(RegexInfoPtr ri) { NodePtr n = (NodePtr)malloc(sizeof(Node)); if (n == NULL) { mallocFailedMessage(); } while (ri->ptr[0] == '-') { ri->ptr++; } return n; } // <literal> ::= [a-z][A-Z][0-9] static NodePtr literal(RegexInfoPtr ri) { NodePtr n = createNode(ri,ri->ptr,0,0); return n; } // <group> ::= '('<regex>')' static NodePtr group(RegexInfoPtr ri) { return regex(ri); } static void token(RegexInfoPtr ri) { while (ri->ptr[0] != '\0') { if (ri->ptr[0] == '('){ ri->ptr++; ri->tokenType = '('; ri->tokenValue = NULL; if (ri->ptr[1] == ')') { ri->ptr++; } return; } else if (ri->ptr[0] == ')') { ri->ptr++; ri->tokenType = ')'; ri->tokenValue = ri->ptr; return; } else if (ri->ptr[0] == '[') { ri->ptr++; ri->tokenType = '['; ri->tokenValue = ri->ptr; if (ri->ptr[1] == ']') { ri->ptr++; } return; } else if (ri->ptr[0] == '|'){ ri->ptr++; ri->tokenType = '|'; ri->tokenValue = NULL; return; } else if (ri->ptr[0] == '*'){ ri->ptr++; ri->tokenType = '*'; ri->tokenValue = NULL; return; } else if (ri->ptr[0] == '\\'){ // need more proccesing /* \277 \0xa5 \[ \\ \utf-8 etc... */ } else { ri->tokenType = 'a'; ri->tokenValue = ri->ptr; ri->ptr++; return; } } return; } // <regexAtom> ::= <literal>|<charClass>|<group> static NodePtr regexAtom(RegexInfoPtr ri) { token(ri); NodePtr n = NULL; if (ri->tokenType == 'a') n = literal(ri); else if (ri->tokenType == '[') n = charClass(ri); else if (ri->tokenType == '(') n = group(ri); return n; } // <regex> ::= <regexAtom>|<regexAtom>'*'|<regexAtom>'|'<regex>|<regexAtom><regex> NodePtr regex(RegexInfoPtr ri) { NodePtr n = regexAtom(ri); while (ri->ptr[0]) { token(ri); if (ri->tokenType == '*') { unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); syntax[0] = '*'; n = createNode(ri,syntax,n,0); } else if (ri->tokenType == '|') { NodePtr n1 = regex(ri); unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); syntax[0] = '|'; n = createNode(ri,syntax,n,n1); } else if (ri->tokenType == ')') { return n; } else { NodePtr n1 = regex(ri); unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); syntax[0] = '+'; n = createNode(ri,syntax,n,n1); } } return n; }