Mercurial > hg > Members > masakoha > testcode
changeset 127:b061cd8205cc pairPro
merge
author | masa |
---|---|
date | Tue, 01 Dec 2015 21:50:09 +0900 |
parents | 639b0b437ebf (diff) c363a66dc1a7 (current diff) |
children | f827682d4687 |
files | c/regexParser/regexParser.cc |
diffstat | 4 files changed, 56 insertions(+), 90 deletions(-) [+] |
line wrap: on
line diff
--- a/c/regexParser/regexParser.cc Tue Dec 01 17:06:26 2015 +0900 +++ b/c/regexParser/regexParser.cc Tue Dec 01 21:50:09 2015 +0900 @@ -6,7 +6,7 @@ #include "error.h" static NodePtr allocateNode(); -static NodePtr createNode(RegexInfoPtr,unsigned char*,NodePtr,NodePtr); +static NodePtr createNode(RegexInfoPtr,unsigned char,NodePtr,NodePtr); static NodePtr charClass(RegexInfoPtr); static NodePtr group(RegexInfoPtr); static void token(RegexInfoPtr); @@ -23,64 +23,76 @@ static NodePtr allocateNode() { NodePtr n = (NodePtr)malloc(sizeof(node)); - n->cc = (CharClassPtr)malloc(sizeof(CharClass)); - n->cc->cond = (ConditionList)malloc(sizeof(Condition)); + n->cc = NULL; + n->left = NULL; + n->right = NULL; return n; } static -NodePtr createNode(RegexInfoPtr ri,unsigned char *character, NodePtr left, NodePtr right) { +CharClassPtr createCharClassWord(RegexInfoPtr ri) { + CharClassPtr cc = NEW(CharClass); + cc->type = 'a'; + cc->cond = NEW(Condition); + cc->cond->w = NEW(Word); + cc->cond->w->word = ri->tokenValue; + cc->cond->w->length = ri->ptr - ri->tokenValue; + + return cc; +} + +static +NodePtr createNode(RegexInfoPtr ri,unsigned char type, NodePtr left, NodePtr right) { NodePtr n = allocateNode(); - if (n == NULL) { - mallocFailedMessage(); - } - n->tokenType = ri->tokenType; + n->tokenType = type; n->left = left; n->right = right; n->nodeNumber = ri->nodeNumber; ri->nodeNumber++; - if (ri->tokenType == 'a') { - ri->tokenType = 0; - n->cc->cond->w = getWord(ri->tokenValue); - } else { - WordPtr w = (WordPtr)malloc(sizeof(Word)); - w->word = character; - w->length = 1; - n->cc->cond->w = w; + if (type == 'a') { + n->cc = createCharClassWord(ri); } + return n; } + // <charClass> ::= '['<literal>'-'<literal>']' static NodePtr charClass(RegexInfoPtr ri) { NodePtr n = allocateNode(); - if (n == NULL) { - mallocFailedMessage(); - } - n->tokenType = ri->tokenType; + n->tokenType = 'c'; n->nodeNumber = ri->nodeNumber; ri->nodeNumber++; - n->cc->cond->w = (WordPtr)malloc(sizeof(Word)); + + CharClassPtr cc = NEW(CharClass); + cc->type = 'r'; + cc->cond = NEW(Condition); + cc->cond->range = NEW(RangeList); + cc->cond->range->begin = ri->ptr; + cc->cond->range->end = ri->ptr + 1; + cc->cond->range->next = NULL; int i = 0; + RangeListPtr rangeList = cc->cond->range; + while (ri->ptr[i] != ']') { - if (ri->ptr[i] == '-') { - n->cc->begin = ri->ptr[i-1]; - n->cc->end = ri->ptr[i+1]; - } + if (ri->ptr[i] == '-') i++; + + rangeList->end = ri->ptr + i; + rangeList->next = NEW(RangeList); + rangeList = rangeList->next; + rangeList->begin = ri->ptr+i+1; + rangeList->next = NULL; i++; } // TODO literal support - n->cc->cond->w->word = (unsigned char*)malloc(sizeof(unsigned char)*(i+1)); - strncpy((char*)n->cc->cond->w->word, (char*)ri->ptr,i+1); - n->cc->cond->w->word[i] = '\0'; - ri->ptr += i+1; + rangeList->end = ri->ptr + i - 1; return n; } @@ -88,7 +100,7 @@ // <literal> ::= [a-z][A-Z][0-9] static NodePtr literal(RegexInfoPtr ri) { - NodePtr n = createNode(ri,ri->ptr,0,0); + NodePtr n = createNode(ri,'a',0,0); return n; } @@ -158,31 +170,16 @@ return n; } -// <regex> ::= <regexAtom> | <regexAtom><regex>'*' | <regexAtom>'*' | <regexAtom>'|'<regex> | <regexAtom><regex> | '(' regex ')' +// <regex> ::= <regexAtom> | <regexAtom>'*' | <regexAtom>'|'<regex> | <regexAtom><regex> | '(' regex ')' NodePtr regex(RegexInfoPtr ri) { NodePtr n = NULL; while (ri->ptr[0]) { token(ri); if (ri->tokenType == '*') { - // TODO literal support - unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); - syntax[0] = '*'; - NodePtr n1 = createNode(ri,syntax,n->right,0); - - unsigned char *syntax1 = (unsigned char*)malloc(sizeof(unsigned char)); - syntax1[0] = '+'; - - n = createNode(ri,syntax1,n->left,n1); + n = createNode(ri,'*',n,0); } else if (ri->tokenType == '|') { NodePtr n1 = regex(ri); - unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); - syntax[0] = '|'; - n = createNode(ri,syntax,n,n1); - } else if (ri->tokenType == '(') { - NodePtr n1 = regex(ri); - unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); - syntax[0] = '+'; - n = createNode(ri,syntax,n,n1); + n = createNode(ri,'|',n,n1); } else if (ri->tokenType == ')') { return n; } else if (ri->tokenType == 'a') { @@ -193,9 +190,7 @@ } else { // return NULL NodePtr n1 = regex(ri); - unsigned char *syntax = (unsigned char*)malloc(sizeof(unsigned char)); - syntax[0] = '+'; - n = createNode(ri,syntax,n,n1); + n = createNode(ri,'a',n,n1); } } return n; }
--- a/c/regexParser/regexParser.h Tue Dec 01 17:06:26 2015 +0900 +++ b/c/regexParser/regexParser.h Tue Dec 01 21:50:09 2015 +0900 @@ -1,11 +1,16 @@ #include "word.h" #include "error.h" + +#define NEW(type) (type*)malloc(sizeof(type)) + +typedef struct utf8Range { + unsigned char *begin; + unsigned char *end; + struct utf8Range *next; +} RangeList , *RangeListPtr; + typedef union condition { - struct utf8Range { - unsigned char *begin; - unsigned char *end; - struct utf8Range *next; - } rangeList; + RangeListPtr range; unsigned char character; WordPtr w; } Condition, *ConditionList;
--- a/c/regexParser/word.cc Tue Dec 01 17:06:26 2015 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ -#include <ctype.h> -#include <string.h> -#include <stdio.h> -#include <stdlib.h> -#include "word.h" - -int getWordLength(unsigned char* w){ - int i = 0; - - for (i=0;isalnum(w[i]);i++); - - return i; -} - -WordPtr getWord(unsigned char *string) { - - WordPtr w = (WordPtr)malloc(sizeof(Word)); - - int i = getWordLength(string); - int wordLength; - int allocateWordSize; - - wordLength = i; - allocateWordSize = i+1; - unsigned char *word = (unsigned char*)malloc(sizeof(unsigned char)*allocateWordSize); - strncpy((char*)word, (char*)string, allocateWordSize); - word[wordLength] = '\0'; - w->word = word; - w->length = wordLength; - return w; -}