Applications/Grep: c/regexParser/regexParser.cc comparison

comparison c/regexParser/regexParser.cc @ 121:aa266a4db47c pairPro

merge

author	Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date	Thu, 26 Nov 2015 21:17:26 +0900
parents	2f0653f8eabb 5d29b6a1b50f
children	188d866227a4

comparison

equal deleted inserted replaced

-:2f0653f8eabb
+:aa266a4db47c
 #include <stdlib.h>
 #include <stdio.h>
 #include "regexParser.h"
 #include "error.h"
-static NodePtr createNode(RegexInfoPtr,NodePtr,NodePtr);
+static NodePtr allocateNode();
+static NodePtr createNode(RegexInfoPtr,unsigned char*,NodePtr,NodePtr);
 static NodePtr charClass(RegexInfoPtr);
 static NodePtr group(RegexInfoPtr);
 static void token(RegexInfoPtr);
 static NodePtr regexAtom(RegexInfoPtr);
 NodePtr regex(RegexInfoPtr);
 *     tokenType
 *     regexPosition(state)
 *     stateTransitionTable
 */
+static
 NodePtr allocateNode() {
 NodePtr n = (NodePtr)malloc(sizeof(node));
 n->cc = (CharClassPtr)malloc(sizeof(CharClass));
 n->cc->cond = (ConditionList)malloc(sizeof(Condition));
 return n;
 }
 static
-NodePtr createNode(RegexInfoPtr ri, NodePtr left, NodePtr right) {
+NodePtr createNode(RegexInfoPtr ri,unsigned char *character, NodePtr left, NodePtr right) {
 NodePtr n = allocateNode();
 if (n == NULL) {
 mallocFailedMessage();
 }
 n->tokenType = ri->tokenType;
-n->cc->cond->character = ri->tokenValue;
 n->left = left;
 n->right = right;
+n->nodeNumber = ri->nodeNumber;
+ri->nodeNumber++;
 if (ri->tokenType == 'a') {
-n->nodeNumber = ri->nodeNumber;
-ri->nodeNumber++;
 ri->tokenType = 0;
+n->cc->cond->w = getWord(ri->tokenValue);
+ri->ptr += n->cc->cond->w->length-1;
+} else {
+WordPtr w = (WordPtr)malloc(sizeof(Word));
+w->word = character;
+w->length = 1;
+n->cc->cond->w = w;
 }
 return n;
 }
 // <charClass> ::= '['<literal>'-'<literal>']'
 }
 // <literal> ::= [a-z][A-Z][0-9]
 static
 NodePtr literal(RegexInfoPtr ri) {
-NodePtr n = createNode(ri,0,0);
+NodePtr n = createNode(ri,ri->ptr,0,0);
-ri->ptr++;
 return n;
 }
 // <group> ::= '('<regex>')'
 static
 void token(RegexInfoPtr ri) {
 while (ri->ptr[0] != '\0') {
 if (ri->ptr[0] == '('){
 ri->ptr++;
 ri->tokenType = '(';
-ri->tokenValue = 0;
+ri->tokenValue = NULL;
 if (ri->ptr[1] == ')') {
 ri->ptr++;
 }
 return;
 } else if (ri->ptr[0] == ')') {
 ri->ptr++;
 ri->tokenType = ')';
-ri->tokenValue = ri->ptr[0];
+ri->tokenValue = ri->ptr;
 return;
 } else if (ri->ptr[0] == '[') {
 ri->ptr++;
 ri->tokenType = '[';
-ri->tokenValue = ri->ptr[0];
+ri->tokenValue = ri->ptr;
 if (ri->ptr[1] == ']') {
 ri->ptr++;
 }
 return;
 } else if (ri->ptr[0] == '|'){
 ri->ptr++;
 ri->tokenType = '|';
-ri->tokenValue = '|';
+ri->tokenValue = NULL;
 return;
 } else if (ri->ptr[0] == '*'){
 ri->ptr++;
 ri->tokenType = '*';
-ri->tokenValue = '*';
+ri->tokenValue = NULL;
 return;
 } else if (ri->ptr[0] == '\\'){
 // need more proccesing
 /*
 \277
 \\
 \utf-8 etc...
 */
 } else {
 ri->tokenType = 'a';
-ri->tokenValue = ri->ptr[0];
+ri->tokenValue = ri->ptr;
+ri->ptr++;
 return;
 }
 }
-ri->tokenType = 0;
-ri->tokenValue = 0;
 return;
 }
 // <regexAtom> ::= <literal>|<charClass>|<group>
 static
 NodePtr regex(RegexInfoPtr ri) {
 NodePtr n = regexAtom(ri);
 while (ri->ptr[0]) {
 token(ri);
 if (ri->tokenType == '*') {
-n = createNode(ri,n,0);
+unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
+syntax[0] = '*';
+n = createNode(ri,syntax,n,0);
 } else if (ri->tokenType == '|') {
 NodePtr n1 = regex(ri);
-ri->tokenValue = '|';
+unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
-n = createNode(ri,n,n1);
+syntax[0] = '|';
+n = createNode(ri,syntax,n,n1);
 } else if (ri->tokenType == ')') {
 return n;
 } else {
 NodePtr n1 = regex(ri);
-ri->tokenValue = '+';
+unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char));
-n = createNode(ri,n,n1);
+syntax[0] = '+';
+n = createNode(ri,syntax,n,n1);
 }
 } return n;
 }

Mercurial > hg > Applications > Grep

comparison c/regexParser/regexParser.cc @ 121:aa266a4db47c pairPro