Mercurial > hg > Applications > Grep
changeset 142:de0f332d560c pairPro
insert charClassMerge function
author | masa |
---|---|
date | Fri, 11 Dec 2015 14:54:00 +0900 |
parents | 71f36a59cf6a |
children | 32977f5a2ed0 |
files | c/regexParser/regexParser.cc c/regexParser/regexParser.h c/regexParser/subsetConstraction.cc |
diffstat | 3 files changed, 52 insertions(+), 32 deletions(-) [+] |
line wrap: on
line diff
--- a/c/regexParser/regexParser.cc Fri Dec 11 13:12:42 2015 +0900 +++ b/c/regexParser/regexParser.cc Fri Dec 11 14:54:00 2015 +0900 @@ -30,10 +30,9 @@ CharClassPtr createCharClassWord(RegexInfoPtr ri) { CharClassPtr cc = NEW(CharClass); cc->type = 'a'; - cc->cond = NEW(Condition); - cc->cond->w = NEW(Word); - cc->cond->w->word = ri->tokenValue; - cc->cond->w->length = ri->ptr - ri->tokenValue; + cc->cond.w.word = ri->tokenValue; + cc->cond.w.length = ri->ptr - ri->tokenValue; + cc->nextState.bitContainer = 0; token(ri); return cc; @@ -53,6 +52,28 @@ return n; } +CharClassPtr charClassMerge(CharClassPtr src, CharClassPtr add) { + // 重なっているccの領域を分割する + // 必要ならばnextStateを重ねあわせる + // 変更があった場合は新しくリストを作って返す + if (src->type == 'a') { + if (add->type == 'a') { + if (src->cond.w.word[0] > add->cond.w.word[0]) { + // add のほうが小さいので小さい順のccをつくる + CharClassPtr left = charClassMerge(add->left.src); + return createCharClassWord(add->cond->w.word, left, add->right); + } else { + + } + } else if (add->type == 'c') { + // + if (src->cond.w.word[0] < add->cond.range.begin) { + + } else (src->cond->w.word[0] < add->end) { + } else if (src->type == 'c') { + + } +} // <charClass> ::= '['<literal>'-'<literal>']' static @@ -60,14 +81,10 @@ CharClassPtr cc = NEW(CharClass); NodePtr n = createNode(ri,'c',cc,0,0); cc->type = 'r'; - cc->cond = NEW(Condition); - cc->cond->range = NEW(RangeList); - cc->cond->range->begin = ri->ptr; - cc->cond->range->end = ri->ptr; - cc->cond->range->next = NULL; - - - RangeListPtr rangeList = cc->cond->range; + cc->nextState.bitContainer = 0; + RangeListPtr rangeList = &cc->cond.range; + rangeList->begin = ri->ptr; + rangeList->end = ri->ptr; for (ri->ptr++; *ri->ptr && *ri->ptr != ']'; ri->ptr++) { if (*ri->ptr == '-') { @@ -86,8 +103,8 @@ rangeList->end = ri->ptr; rangeList->next = NULL; } - // TODO literal support - + // TODO literal support + // merge rangeList here if (*ri->ptr) ri->ptr++; token(ri); return n; @@ -114,11 +131,6 @@ ri->tokenType = ')'; ri->tokenValue = ri->ptr; return; - } else if (ri->ptr[0] == '[') { - ri->ptr++; - ri->tokenType = 'c'; - ri->tokenValue = ri->ptr; - return; } else if (ri->ptr[0] == ']') { ri->ptr++; ri->tokenType = ']'; @@ -144,6 +156,11 @@ \\ \utf-8 etc... */ + } else if (ri->ptr[0] == '[') { + ri->ptr++; + ri->tokenType = 'c'; + ri->tokenValue = ri->ptr; + return; } else { ri->tokenType = 'a'; ri->tokenValue = ri->ptr;
--- a/c/regexParser/regexParser.h Fri Dec 11 13:12:42 2015 +0900 +++ b/c/regexParser/regexParser.h Fri Dec 11 14:54:00 2015 +0900 @@ -8,23 +8,22 @@ } Word, *WordPtr; typedef struct utf8Range { - unsigned char *begin; - unsigned char *end; - struct utf8Range *next; + unsigned long begin; + unsigned long end; + struct utf8Range *next; // only used in the parser. } RangeList , *RangeListPtr; typedef union condition { - RangeListPtr range; - WordPtr w; + RangeList range; + Word w; } Condition, *ConditionList; typedef struct charClass { unsigned char type; - ConditionList cond; struct charClass *left; struct charClass *right; - unsigned char begin; - unsigned char end; + Condition cond; + BitVector nextState; } CharClass, *CharClassPtr; typedef struct node {
--- a/c/regexParser/subsetConstraction.cc Fri Dec 11 13:12:42 2015 +0900 +++ b/c/regexParser/subsetConstraction.cc Fri Dec 11 14:54:00 2015 +0900 @@ -12,13 +12,17 @@ } } -TGValuePtr generateTransition(NodePtr n,TransitionGeneratorPtr tg) { - TGValuePtr tgv0 = NULL; - TGValuePtr tgv1 = NULL; +TGValue generateTransition(NodePtr n,TransitionGenerator tg) { if (n->tokenType == '+') { - tgv0 = generateTransition(n->left,tg); - tgv1 = generateTransition(n->right,tg); + TGValue tgv = generateTransition(n->left,tg); + if (tgv.asterisk) { + TGValue tgv1 = generateTransition(n->right,tg); + tgv.state |= tgv1.state; + return tgv; + } + tgv.state = n->right->nodeNumber; + return tgv; } else if (n->tokenType == '|') { tgv0 = generateTransition(n->left,tg); tgv1 = generateTransition(n->right,tg);