Mercurial > hg > Applications > Grep
changeset 56:8901bc071d33
implement string() and literal()
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 11 Jun 2015 16:24:40 +0900 |
parents | 883e3473a9f5 |
children | 71b497d25273 |
files | c/regexParser/main.cc |
diffstat | 1 files changed, 82 insertions(+), 157 deletions(-) [+] |
line wrap: on
line diff
--- a/c/regexParser/main.cc Mon Jun 08 23:40:09 2015 +0900 +++ b/c/regexParser/main.cc Thu Jun 11 16:24:40 2015 +0900 @@ -1,186 +1,111 @@ /* - Very Simple Calculator - $Id$ + * <literal> ::= [a-z][A-Z][0-9] + * <charClass> ::= '['<literal>'-'<literal>']' + * <string> ::= <literal><literal>* + * <or> ::= '('<regex>'|'<regex>')' + * <*> ::= <regex>'*' + * <regex> ::= <literal>|<conc>|<or>|<charClass> */ #include <stdio.h> - -static char *ptr,*last_ptr; -static int value,lvalue; -static int last_token; -static int variable[48]; +#include <stdlib.h> +#include <string.h> +char *ptr; -static int expr(); -static int aexpr(); -static int mexpr(); -static int term(); -static int token(); -static void error(char *); +typedef struct node { + int character; + struct node *left; + struct node *right; +} Node, *NodePtr; - -static int -token() -{ - int c,d; +NodePtr charClass(); +NodePtr string(); +NodePtr _or(); +NodePtr asterisk(); +NodePtr regex(); +NodePtr createNode(int,NodePtr,NodePtr); - last_ptr = ptr; /* for error position */ - c= *ptr; - if(!c) { - last_token = EOF; - return last_token; - } - ptr++; - if (c<=' ') { /* comment */ - while(*ptr++); - ptr--; - last_token = EOF; - last_ptr = ptr; - return last_token; - } +NodePtr createNode(int character, NodePtr left, NodePtr right) { + NodePtr n; + n = (NodePtr)malloc(sizeof(Node)); + n->character = character; + n->left = left; + n->right = right; + return n; +} + +// <charClass> ::= '['<literal>'-'<literal>']' +NodePtr charClass() { + NodePtr n = createNode(0,0,0); + return n; +} - if('0'<=c && c<='9') { /* Decimal */ - d = c-'0'; - while((c= *ptr++)) { - if('0'<=c && c<='9') { - d = d*10 + (c - '0'); - } else { - break; - } - } - c && ptr--; - value = d; - last_token = '0'; - return last_token; +// <literal> ::= [a-z][A-Z][0-9] +NodePtr literal() { + char c = *ptr; + createNode(c,0,0); +} - } else if ('a'<=c && c<='z') { /* variable */ - value = c-'a'; /* return variable reference */ - last_token = 'v'; - return last_token; +// <string> ::= <literal><literal>* +NodePtr string() { + char c = *ptr; + NodePtr n = (NodePtr)malloc(sizeof(Node)); + + if (('a'<=c && c<='z')||('A'<=c && c<='Z')||('0'<=c && c<='9')) { + n = createNode(0,literal(),string()); + return n; } else { - last_token = c; - return last_token; - return c; + n = createNode(0,0,0); } } -static int -expr() -{ - int d,assign; - - d = aexpr(); - assign = lvalue; - switch(last_token) { - case '>': - d = (d > aexpr()); - return d; - case '=': - if(assign>=0) { - d = expr(); - variable[assign] = d; - return d; - } else { - error("Bad assignment"); - return 0; +// <or> ::= '('<regex>'|'<regex>')' +NodePtr _or() { + regex(); + while(*ptr++ == ')') { + if (*ptr == '|') { + ptr++; + regex(); } - case ')': - return d; - case EOF: - return d; - default: - error("Bad expression"); - return d; - } -} - -static int -aexpr() -{ - int d; - - d = mexpr(); - switch(last_token) { - case '-': - d -= aexpr(); - return d; - case '+': - d += aexpr(); - return d; - default: - return d; } } -static int -mexpr() -{ - int d; - d = term(); - switch(last_token) { - case '*': - d *= mexpr(); - return d; - case '/': - d /= mexpr(); - return d; - default: - return d; - } +// <*> ::= <regex>'*' +NodePtr asterisk() { + } -static int -term() -{ - int d; +// <regex> ::= <literal>|<string>|<or>|<charClass> +// <literal> は <string> に内包されるから、<regex> ::= <string>|<or>|<charClass>が正しい?? +NodePtr regex() { + + NodePtr n; - lvalue= -1; - token(); - if(last_token==EOF) { - error("Term expected"); + while (int c = *ptr++) { + if (c == '(') { + ptr++; + _or(); + } else if (c == '[') { + charClass(); + } else { + n = createNode(0,string(),regex()); + } } - switch(last_token) { - case '0': - d = value; - token(); - return d; - case 'v': - d = lvalue = value; - token(); - return variable[d]; - case '(': - d = expr(); - if(last_token != ')') { - error("Unbalanced parenthsis"); - } - token(); - return d; - default: - token(); - error("Unknown term"); - return 0; - } -} -static int lineno = 0; - -void -error(char *msg) -{ - fprintf(stderr,"%s on line %d\n",msg, lineno); + return n; } int -main() +main(int argc, char **argv) { - int d; - char buf[BUFSIZ]; + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i],"-regex") == 0) { + ptr = argv[i+1]; i++; + } + } - while (fgets(buf,BUFSIZ,stdin)) { - ptr = buf; - d = expr(); - printf("%s = 0x%08x = %d\n",buf,d,d); - fflush(stdout); - lineno++; - } + printf("regex : %s\n",ptr); + NodePtr n = regex(); + return 0; }