Mercurial > hg > Applications > Grep
changeset 135:e1a262ec75f0 pairPro
impl charclass
author | masa |
---|---|
date | Fri, 04 Dec 2015 19:07:10 +0900 |
parents | dbafc753078e |
children | 15815fcb6c2f |
files | c/regexParser/Makefile c/regexParser/node.cc c/regexParser/regexParser.cc |
diffstat | 3 files changed, 56 insertions(+), 33 deletions(-) [+] |
line wrap: on
line diff
--- a/c/regexParser/Makefile Fri Dec 04 17:45:09 2015 +0900 +++ b/c/regexParser/Makefile Fri Dec 04 19:07:10 2015 +0900 @@ -25,22 +25,22 @@ rm -f *~ \#* test: - ./$(TARGET) -regex "a" - ./$(TARGET) -regex "ab" - ./$(TARGET) -regex "ab*" - ./$(TARGET) -regex "(ab)*" - ./$(TARGET) -regex "(ab)*c" - ./$(TARGET) -regex "(ab)c" - ./$(TARGET) -regex "(a|b)c" - ./$(TARGET) -regex "a(bc)*d" - ./$(TARGET) -regex "abc*d" - ./$(TARGET) -regex "(ab)c*d" - ./$(TARGET) -regex "a(b)c" - ./$(TARGET) -regex "(a|b|c)d" - ./$(TARGET) -regex "(a|b|c)*d" - ./$(TARGET) -regex "[a]" - ./$(TARGET) -regex "[ab]" - ./$(TARGET) -regex "[a-z]" - ./$(TARGET) -regex "[a-zA-Z]" - ./$(TARGET) -regex "[a-zA-Z]*" - ./$(TARGET) -regex "a[a-zA-Z]*" + ./$(TARGET) -regex 'a' + ./$(TARGET) -regex 'ab' + ./$(TARGET) -regex 'ab*' + ./$(TARGET) -regex '(ab)*' + ./$(TARGET) -regex '(ab)*c' + ./$(TARGET) -regex '(ab)c' + ./$(TARGET) -regex '(a|b)c' + ./$(TARGET) -regex 'a(bc)*d' + ./$(TARGET) -regex 'abc*d' + ./$(TARGET) -regex '(ab)c*d' + ./$(TARGET) -regex 'a(b)c' + ./$(TARGET) -regex '(a|b|c)d' + ./$(TARGET) -regex '(a|b|c)*d' + ./$(TARGET) -regex '[a]' + ./$(TARGET) -regex '[ab]' + ./$(TARGET) -regex '[a-z]' + ./$(TARGET) -regex '[a-zA-Z]' + ./$(TARGET) -regex '[a-zA-Z]*' + ./$(TARGET) -regex 'a[a-zA-Z]*'
--- a/c/regexParser/node.cc Fri Dec 04 17:45:09 2015 +0900 +++ b/c/regexParser/node.cc Fri Dec 04 19:07:10 2015 +0900 @@ -1,20 +1,32 @@ #include <stdio.h> #include "node.h" +static void printCharacterClass(CharClassPtr cc, int d) { + if (cc->type == 'r') { + printf("%*c",d*4, ' '); + for (RangeListPtr range = cc->cond->range; range; range = range->next) { + printf("%c-%c ",*range->begin,*range->end); + } + printf("\n"); + } +} + static void descendTree(NodePtr n, int d) { if (n->left != NULL) { d++; descendTree(n->left, d); d--; } - if (n->tokenType != 'a') { - printf("%*c%c(%lu)\n",d*4, ' ',n->tokenType,n->nodeNumber); - } else { + if (n->tokenType == 'a') { printf("%*c",d*4, ' '); for (int i = 0; i < n->cc->cond->w->length; i++) { putchar(n->cc->cond->w->word[i]); } printf("(%lu)\n",n->nodeNumber); + } else if (n->tokenType == 'c') { + printCharacterClass(n->cc,d); + } else { + printf("%*c%c(%lu)\n",d*4, ' ',n->tokenType,n->nodeNumber); } if (n->right != NULL) {
--- a/c/regexParser/regexParser.cc Fri Dec 04 17:45:09 2015 +0900 +++ b/c/regexParser/regexParser.cc Fri Dec 04 19:07:10 2015 +0900 @@ -57,33 +57,39 @@ // <charClass> ::= '['<literal>'-'<literal>']' static NodePtr charClass(RegexInfoPtr ri) { - CharClassPtr cc = NEW(CharClass); + NodePtr n = createNode(ri,'c',cc,0,0); cc->type = 'r'; cc->cond = NEW(Condition); cc->cond->range = NEW(RangeList); cc->cond->range->begin = ri->ptr; - cc->cond->range->end = ri->ptr + 1; + cc->cond->range->end = ri->ptr; cc->cond->range->next = NULL; - int i = 0; RangeListPtr rangeList = cc->cond->range; - while (ri->ptr[i] != ']') { - if (ri->ptr[i] == '-') i++; - - rangeList->end = ri->ptr + i; + for (ri->ptr++; *ri->ptr && *ri->ptr != ']'; ri->ptr++) { + if (*ri->ptr == '-') { + rangeList->end = ri->ptr + 1; + ri->ptr++; + continue; + } + if (ri->ptr[0] == 0 || ri->ptr[0] == ']') break; + if (ri->ptr[0] == rangeList->end[0] + 1) { + rangeList->end = ri->ptr; + continue; + } rangeList->next = NEW(RangeList); rangeList = rangeList->next; - rangeList->begin = ri->ptr+i+1; + rangeList->begin = ri->ptr; + rangeList->end = ri->ptr; rangeList->next = NULL; - i++; } // TODO literal support - rangeList->end = ri->ptr + i - 1; - NodePtr n = createNode(ri,'c',cc,0,0); + if (*ri->ptr) ri->ptr++; + token(ri); token(ri); return n; } @@ -114,6 +120,11 @@ ri->tokenType = 'c'; ri->tokenValue = ri->ptr; return; + } else if (ri->ptr[0] == ']') { + ri->ptr++; + ri->tokenType = ']'; + ri->tokenValue = ri->ptr; + return; } else if (ri->ptr[0] == '|'){ ri->ptr++; ri->tokenType = '|';