# HG changeset patch # User masa # Date 1449813240 -32400 # Node ID de0f332d560cd29e8cf861f17b10ea20baadfe04 # Parent 71f36a59cf6a70d33f3ae3e8ce95a4bee813877b insert charClassMerge function diff -r 71f36a59cf6a -r de0f332d560c c/regexParser/regexParser.cc --- a/c/regexParser/regexParser.cc Fri Dec 11 13:12:42 2015 +0900 +++ b/c/regexParser/regexParser.cc Fri Dec 11 14:54:00 2015 +0900 @@ -30,10 +30,9 @@ CharClassPtr createCharClassWord(RegexInfoPtr ri) { CharClassPtr cc = NEW(CharClass); cc->type = 'a'; - cc->cond = NEW(Condition); - cc->cond->w = NEW(Word); - cc->cond->w->word = ri->tokenValue; - cc->cond->w->length = ri->ptr - ri->tokenValue; + cc->cond.w.word = ri->tokenValue; + cc->cond.w.length = ri->ptr - ri->tokenValue; + cc->nextState.bitContainer = 0; token(ri); return cc; @@ -53,6 +52,28 @@ return n; } +CharClassPtr charClassMerge(CharClassPtr src, CharClassPtr add) { + // 重なっているccの領域を分割する + // 必要ならばnextStateを重ねあわせる + // 変更があった場合は新しくリストを作って返す + if (src->type == 'a') { + if (add->type == 'a') { + if (src->cond.w.word[0] > add->cond.w.word[0]) { + // add のほうが小さいので小さい順のccをつくる + CharClassPtr left = charClassMerge(add->left.src); + return createCharClassWord(add->cond->w.word, left, add->right); + } else { + + } + } else if (add->type == 'c') { + // + if (src->cond.w.word[0] < add->cond.range.begin) { + + } else (src->cond->w.word[0] < add->end) { + } else if (src->type == 'c') { + + } +} // ::= '[''-'']' static @@ -60,14 +81,10 @@ CharClassPtr cc = NEW(CharClass); NodePtr n = createNode(ri,'c',cc,0,0); cc->type = 'r'; - cc->cond = NEW(Condition); - cc->cond->range = NEW(RangeList); - cc->cond->range->begin = ri->ptr; - cc->cond->range->end = ri->ptr; - cc->cond->range->next = NULL; - - - RangeListPtr rangeList = cc->cond->range; + cc->nextState.bitContainer = 0; + RangeListPtr rangeList = &cc->cond.range; + rangeList->begin = ri->ptr; + rangeList->end = ri->ptr; for (ri->ptr++; *ri->ptr && *ri->ptr != ']'; ri->ptr++) { if (*ri->ptr == '-') { @@ -86,8 +103,8 @@ rangeList->end = ri->ptr; rangeList->next = NULL; } - // TODO literal support - + // TODO literal support + // merge rangeList here if (*ri->ptr) ri->ptr++; token(ri); return n; @@ -114,11 +131,6 @@ ri->tokenType = ')'; ri->tokenValue = ri->ptr; return; - } else if (ri->ptr[0] == '[') { - ri->ptr++; - ri->tokenType = 'c'; - ri->tokenValue = ri->ptr; - return; } else if (ri->ptr[0] == ']') { ri->ptr++; ri->tokenType = ']'; @@ -144,6 +156,11 @@ \\ \utf-8 etc... */ + } else if (ri->ptr[0] == '[') { + ri->ptr++; + ri->tokenType = 'c'; + ri->tokenValue = ri->ptr; + return; } else { ri->tokenType = 'a'; ri->tokenValue = ri->ptr; diff -r 71f36a59cf6a -r de0f332d560c c/regexParser/regexParser.h --- a/c/regexParser/regexParser.h Fri Dec 11 13:12:42 2015 +0900 +++ b/c/regexParser/regexParser.h Fri Dec 11 14:54:00 2015 +0900 @@ -8,23 +8,22 @@ } Word, *WordPtr; typedef struct utf8Range { - unsigned char *begin; - unsigned char *end; - struct utf8Range *next; + unsigned long begin; + unsigned long end; + struct utf8Range *next; // only used in the parser. } RangeList , *RangeListPtr; typedef union condition { - RangeListPtr range; - WordPtr w; + RangeList range; + Word w; } Condition, *ConditionList; typedef struct charClass { unsigned char type; - ConditionList cond; struct charClass *left; struct charClass *right; - unsigned char begin; - unsigned char end; + Condition cond; + BitVector nextState; } CharClass, *CharClassPtr; typedef struct node { diff -r 71f36a59cf6a -r de0f332d560c c/regexParser/subsetConstraction.cc --- a/c/regexParser/subsetConstraction.cc Fri Dec 11 13:12:42 2015 +0900 +++ b/c/regexParser/subsetConstraction.cc Fri Dec 11 14:54:00 2015 +0900 @@ -12,13 +12,17 @@ } } -TGValuePtr generateTransition(NodePtr n,TransitionGeneratorPtr tg) { - TGValuePtr tgv0 = NULL; - TGValuePtr tgv1 = NULL; +TGValue generateTransition(NodePtr n,TransitionGenerator tg) { if (n->tokenType == '+') { - tgv0 = generateTransition(n->left,tg); - tgv1 = generateTransition(n->right,tg); + TGValue tgv = generateTransition(n->left,tg); + if (tgv.asterisk) { + TGValue tgv1 = generateTransition(n->right,tg); + tgv.state |= tgv1.state; + return tgv; + } + tgv.state = n->right->nodeNumber; + return tgv; } else if (n->tokenType == '|') { tgv0 = generateTransition(n->left,tg); tgv1 = generateTransition(n->right,tg);