Mercurial > hg > Members > masakoha > testcode
view regexParser/CharClass.cc @ 319:7b8234c090f7
bmSearch
author | mir3636 |
---|---|
date | Sun, 08 May 2016 22:53:20 +0900 |
parents | 66012db6a717 |
children |
line wrap: on
line source
#include <stdio.h> #include <stdlib.h> #include <ctype.h> #include "regexParser.h" #include "subsetConstruction.h" #include "node.h" #include "BitVector.h" #include "error.h" #include "CharClass.h" CharClassPtr createCharClassRange(unsigned long begin, unsigned long end,unsigned long state, CharClassPtr left, CharClassPtr right) { CharClassPtr cc = NEW(CharClass); cc->cond.range.begin = begin; cc->cond.range.end = end; cc->cond.w.word = NULL; cc->cond.w.length = 0; cc->cond.w.next = NULL; cc->cond.w.bm = NULL; cc->left = left; cc->right = right; cc->nextState.bitContainer = state; return cc; } CharClassPtr createCharClassWord(RegexInfoPtr ri) { CharClassPtr cc = NEW(CharClass); cc->left = NULL; cc->right = NULL; cc->cond.w.word = ri->tokenValue; cc->cond.w.length = ri->ptr - ri->tokenValue; cc->cond.w.next = NULL; cc->cond.w.bm = NULL; cc->cond.range.begin = cc->cond.range.end = *ri->tokenValue; return cc; } CharClassPtr createCharClassRangeWord(unsigned long begin, unsigned long end,CharClassPtr cc,CharClassPtr m, CharClassPtr left, CharClassPtr right) { // same range and seme nextState? CharClassPtr ncc = NEW(CharClass); ncc->cond.range.begin = begin; ncc->cond.range.end = end; ncc->cond.w = cc->cond.w; ncc->left = left; ncc->right = right; if (!m) { ncc->nextState.bitContainer = cc->nextState.bitContainer; return ncc; } if (m->cond.w.word) { WordPtr n = &ncc->cond.w; n->word = m->cond.w.word; n->length = m->cond.w.length; n->next = &cc->cond.w; } ncc->nextState.bitContainer = m->nextState.bitContainer | cc->nextState.bitContainer; return ncc; } /* cond.range.begin cond.range.end |----------------| 1.b---e 2.b------e 3.b------------e 4.b-----------------------e 5.b----------------------------e |----------------| 6. b---------e 7. b----------------e 8. b---------------------e |----------------| 9. b-----e 10. b--------e 11. b-------------e |----------------| 12. b-----e |----------------| 13. b--e */ CharClassPtr insertCharClass(CharClassPtr cc, unsigned long begin, unsigned long end) { if (begin>end) { unsigned long tmp = begin; begin = end; end = tmp; } if (cc == NULL) { return createCharClassRange(begin,end,0,0,0); } if (end < cc->cond.range.begin ) { // 1 if (cc->left) { cc->left = insertCharClass(cc->left,begin,end); } else { cc->left = createCharClassRange(begin,end,0,0,0); } return cc; } else if (end == cc->cond.range.begin ) { // 2 cc->cond.range.begin = begin; return cc; } else if (end <= cc->cond.range.end) { // 3,4,6,7,9,10 if (begin < cc->cond.range.begin) { // 3,4 cc->cond.range.begin = begin; } return cc; } else if (begin > cc->cond.range.end ) { // 13 if (cc->right) { cc->right = insertCharClass(cc->right,begin,end); } else { cc->right = createCharClassRange(begin,end,0,0,0); } return cc; } if (cc->right) { CharClassPtr right = cc->right; begin = cc->cond.range.begin; free(cc); return insertCharClass(right,begin,end); } if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { // 12 if (end > cc->cond.range.end) cc->cond.range.end = end; // 11,8 } else if (begin < cc->cond.range.begin) { // 5 cc->cond.range.begin = begin; cc->cond.range.end = end; } else { printf("insertCharClass Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); } return cc; } CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, CharClassPtr m) ; CharClassPtr mergeCCTree(CharClassPtr cc,unsigned char mBegin,unsigned char mEnd,CharClassPtr m) { CharClassPtr cc1; if (cc) { cc1 = charClassMerge(cc,mBegin,mEnd,m); } else { cc1 = createCharClassRangeWord(mBegin,mEnd,m,NULL,NULL,NULL); } return cc1; } /* cond.range.begin cond.range.end |----------------| 1.b---e 2.b------e 3.b------------e 4.b-----------------------e 5.b----------------------------e |----------------| 6. b---------e 7. b----------------e 8. b---------------------e |----------------| 9. b-----e 10. b--------e 11. b-------------e |----------------| 12. b-----e |----------------| 13. b--e */ CharClassPtr charClassMerge(CharClassPtr cc,unsigned long begin, unsigned long end, CharClassPtr m) { // 重なっているccの領域を分割する // 必要ならばnextStateを重ねあわせる // 変更があった場合は新しくリストを作って返す if (end < cc->cond.range.begin ) { // 1 if (cc->left) { return createCharClassRangeWord(cc->cond.range.begin,cc->cond.range.end,m,NULL,charClassMerge(cc->left,begin,end,m),cc->right); } else { return createCharClassRangeWord(begin,end,m,NULL,NULL,cc); } } else if (end == cc->cond.range.begin && begin != end ) { // 2 CharClassPtr cc1 = mergeCCTree(cc->left,begin,end-1,m); if (cc->cond.range.begin == cc->cond.range.end) { return createCharClassRangeWord(cc->cond.range.begin,cc->cond.range.end, cc,m, cc1,cc->right); } CharClassPtr cc3 = createCharClassRangeWord(cc->cond.range.begin+1,cc->cond.range.end,cc,NULL,cc->left,cc->right); return createCharClassRangeWord(cc->cond.range.begin,cc->cond.range.begin, cc,m,cc1,cc3); } else if (end < cc->cond.range.end) { // range.begin < end if (begin < cc->cond.range.begin) { // 3 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,m); CharClassPtr cc3 = createCharClassRangeWord(end+1,cc->cond.range.end,cc,NULL,cc->left,cc->right); return createCharClassRangeWord(cc->cond.range.begin,end,m,cc,cc1,cc3); } if (begin == cc->cond.range.begin) { // 6 CharClassPtr cc2 = createCharClassRangeWord(end+1,cc->cond.range.end,cc,NULL,NULL,cc->right); return createCharClassRangeWord(begin,end,cc,m,cc->left,cc2); } // 9 CharClassPtr cc2 = createCharClassRangeWord(cc->cond.range.begin,begin-1,cc,NULL,cc->left,NULL); CharClassPtr cc3 = createCharClassRangeWord(end+1,cc->cond.range.end,cc,NULL,NULL,cc->right); return createCharClassRangeWord(begin,end,cc,m,cc2,cc3); } else if (end == cc->cond.range.end) { if (begin == cc->cond.range.begin) { // 7 return createCharClassRangeWord(begin,end,cc,m,cc->left,cc->right); } else if (begin < cc->cond.range.begin) { // 4 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,m); return createCharClassRangeWord(cc->cond.range.begin,end,cc,m,cc1,cc->right); } // 10 cond.range.begin < begin CharClassPtr cc2 = createCharClassRangeWord(begin,cc->cond.range.end,cc,m,NULL,cc->right); return createCharClassRangeWord(cc->cond.range.begin,begin-1,cc,NULL,cc->left,cc2); } if (begin > cc->cond.range.end ) { // 13 if (cc->right) { return createCharClassRangeWord(cc->cond.range.begin,cc->cond.range.end,cc,NULL,cc->left,charClassMerge(cc->right,begin,end,m)); } else { return createCharClassRangeWord(begin,end,m,NULL,cc,NULL); } } if (begin >= cc->cond.range.begin && begin <= cc->cond.range.end) { if (end > cc->cond.range.end) { // cond.range.end < end if (begin == cc->cond.range.begin) { // 8 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,m); return createCharClassRangeWord(cc->cond.range.begin,cc->cond.range.end, cc,m,cc->left,cc1); } if (begin > cc->cond.range.begin) { // 11,12 CharClassPtr cc1 = mergeCCTree(cc->right,cc->cond.range.end+1,end,m); CharClassPtr cc3 = createCharClassRangeWord(cc->cond.range.begin,begin-1,cc,m,cc->left,NULL); return createCharClassRangeWord(begin,cc->cond.range.end,cc,m,cc3,cc1); } } } else if (begin < cc->cond.range.begin) { // 5 CharClassPtr cc1 = mergeCCTree(cc->left,begin,cc->cond.range.begin-1,m); CharClassPtr cc3 = mergeCCTree(cc->right,cc->cond.range.end+1,end,m); return createCharClassRangeWord(cc->cond.range.begin,cc->cond.range.end,cc,m,cc1,cc3); } else { printf("charClassMerge Error : begin %lu end %lu cc->begin %lu cc->end %lu\n", begin,end,cc->cond.range.begin,cc->cond.range.end); } return cc; } void findLeftMost(CharClassPtr next,CharClassWalkerPtr walk) { while (next->left) { CharClassStackPtr ccs = NEW(CharClassStack); ccs->next = walk->stack; ccs->turn = walk->turn; walk->turn = LEFT; ccs->cc = next; walk->stack = ccs; next = next->left; } walk->turn = SELF; walk->next = next; } CharClassWalkerPtr createCharClassWalker (CharClassPtr next) { CharClassWalkerPtr walk = NEW(CharClassWalker); walk->next = NULL; walk->stack = NULL; walk->turn = LEFT; if (!next) return walk; findLeftMost(next,walk); return walk; } bool hasNext(CharClassWalkerPtr walk) { return walk->next != NULL; } CharClassStackPtr charClassStackPop(CharClassWalkerPtr walk) { CharClassStackPtr prev = walk->stack->next; walk->turn = walk->stack->turn; free(walk->stack); walk->stack = prev; return prev; } CharClassPtr getNext(CharClassWalkerPtr walk) { CharClassPtr current = walk->next; walk->next = NULL; if (walk->turn == SELF) { if (current->right) { walk->turn = RIGHT; findLeftMost(current->right,walk); return current; } } while (walk->stack) { walk->next = walk->stack->cc; charClassStackPop(walk); if (walk->turn == LEFT) { walk->turn = SELF; return current; } } return current; } void setState(CharClassPtr cc, BitVector bi) { // if (word case) setNext(bitVector to the word list) cc->nextState = bi; if (cc->left) { setState(cc->left,bi); } if (cc->right) { setState(cc->right,bi); } } CharClassPtr mergeTransition(StatePtr x,CharClassPtr y) { if (x->cc == NULL) { return y; } CharClassWalkerPtr walk = createCharClassWalker(x->cc); CharClassPtr ccy = y; while (hasNext(walk)) { CharClassPtr cc = getNext(walk); unsigned long begin = cc->cond.range.begin; unsigned long end = cc->cond.range.end; ccy = charClassMerge(ccy,begin,end,cc); } free(walk); return ccy; } /* end */