Mercurial > hg > Applications > Grep
comparison c/regexParser/main.cc @ 58:4053c3e0fa7f
implement group()
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 12 Jun 2015 19:02:00 +0900 |
parents | 71b497d25273 |
children | af189c727733 |
comparison
equal
deleted
inserted
replaced
57:71b497d25273 | 58:4053c3e0fa7f |
---|---|
1 /* | 1 /* |
2 * <literal> ::= [a-z][A-Z][0-9] | 2 * <literal> ::= [a-z][A-Z][0-9] |
3 * <charClass> ::= '['<literal>'-'<literal>']' | 3 * <charClass> ::= '['<literal>'-'<literal>']' |
4 * <string> ::= <literal><literal>* | 4 * <string> ::= <literal><literal>* |
5 * <or> ::= '('<regex>'|'<regex>')' | 5 * <group> ::= '('<regex>')' |
6 * <or> ::= <regex>'|'<regex> | |
6 * <*> ::= <regex>'*' | 7 * <*> ::= <regex>'*' |
7 * <regex> ::= <literal>|<conc>|<or>|<charClass> | 8 * <regex> ::= <string>|<or>|<charClass>|<group>|<*> |
8 */ | 9 */ |
9 | 10 |
10 #include <stdio.h> | 11 #include <stdio.h> |
11 #include <stdlib.h> | 12 #include <stdlib.h> |
12 #include <string.h> | 13 #include <string.h> |
13 | 14 |
14 char *ptr; | |
15 typedef struct node { | 15 typedef struct node { |
16 struct node *self; | |
16 char character; | 17 char character; |
17 struct node *left; | 18 struct node *left; |
18 struct node *right; | 19 struct node *right; |
19 } Node, *NodePtr; | 20 } Node, *NodePtr; |
20 | 21 |
22 char *ptr; | |
23 NodePtr regexHeadNode; | |
24 | |
21 NodePtr charClass(); | 25 NodePtr charClass(); |
22 NodePtr string(); | 26 NodePtr string(); |
27 NodePtr group(); | |
23 NodePtr _or(); | 28 NodePtr _or(); |
24 NodePtr asterisk(); | 29 NodePtr asterisk(); |
25 NodePtr regex(); | 30 NodePtr regex(); |
26 NodePtr createNode(char,NodePtr,NodePtr); | 31 NodePtr createNode(char,NodePtr,NodePtr); |
27 | 32 |
28 NodePtr createNode(char character, NodePtr left, NodePtr right) { | 33 NodePtr createNode(char character, NodePtr left, NodePtr right) { |
29 NodePtr n = (NodePtr)malloc(sizeof(Node)); | 34 NodePtr n = (NodePtr)malloc(sizeof(Node)); |
35 n->self = n; | |
30 n->character = character; | 36 n->character = character; |
31 n->left = left; | 37 n->left = left; |
32 n->right = right; | 38 n->right = right; |
39 | |
33 return n; | 40 return n; |
34 } | 41 } |
35 | 42 |
36 // <charClass> ::= '['<literal>'-'<literal>']' | 43 // <charClass> ::= '['<literal>'-'<literal>']' |
37 NodePtr charClass() { | 44 NodePtr charClass() { |
45 ptr++; | |
38 NodePtr n = createNode(0,0,0); | 46 NodePtr n = createNode(0,0,0); |
39 return n; | 47 return n; |
40 } | 48 } |
41 | 49 |
42 // <literal> ::= [a-z][A-Z][0-9] | 50 // <literal> ::= [a-z][A-Z][0-9] |
48 } | 56 } |
49 | 57 |
50 // <string> ::= <literal><literal>* | 58 // <string> ::= <literal><literal>* |
51 NodePtr string() { | 59 NodePtr string() { |
52 char c = *ptr; | 60 char c = *ptr; |
53 NodePtr n; | 61 NodePtr n = NULL; |
54 | 62 printf("%c\n",c); |
55 if (('a'<=c && c<='z')||('A'<=c && c<='Z')||('0'<=c && c<='9')) { | 63 if (('a'<=c && c<='z')||('A'<=c && c<='Z')||('0'<=c && c<='9')) { |
56 n = createNode(0,literal(),string()); | 64 n = createNode(0,literal(),string()); |
57 return n; | |
58 } else { | 65 } else { |
59 n = createNode(0,0,0); | 66 n = createNode(0,0,0); |
60 } | 67 } |
68 return n; | |
61 } | 69 } |
62 | 70 |
63 // <or> ::= '('<regex>'|'<regex>')' | 71 // <group> ::= '('<regex>')' | '('<regex>'|'<regex>')' |
72 NodePtr group() { | |
73 NodePtr n; | |
74 if (*ptr == ')') { | |
75 n = createNode(0,0,0); | |
76 ptr++; | |
77 } else { | |
78 ptr++; | |
79 n = regex(); | |
80 } | |
81 | |
82 return n; | |
83 } | |
84 | |
85 | |
86 // <or> ::= <regex>'|'<regex> | |
64 NodePtr _or() { | 87 NodePtr _or() { |
65 regex(); | 88 NodePtr n = createNode('|',regexHeadNode,regex()); |
66 while(*ptr++ == ')') { | 89 return n; |
67 if (*ptr == '|') { | |
68 ptr++; | |
69 regex(); | |
70 } | |
71 } | |
72 } | 90 } |
73 | 91 |
74 // <*> ::= <regex>'*' | 92 // <*> ::= <regex>'*' |
75 NodePtr asterisk() { | 93 NodePtr asterisk() { |
76 | 94 |
77 } | 95 } |
78 | 96 |
79 // <regex> ::= <literal>|<string>|<or>|<charClass> | 97 // <regex> ::= <string>|<or>|<charClass>|<group>|<*> |
80 // <literal> は <string> に内包されるから、<regex> ::= <string>|<or>|<charClass>が正しい?? | |
81 NodePtr regex() { | 98 NodePtr regex() { |
82 | 99 |
83 NodePtr n; | 100 NodePtr n; |
84 | 101 |
85 while (char c = *ptr) { | 102 while (*ptr != '\0') { |
86 if (c == '(') { | 103 if ((*ptr == '(') || (*ptr == ')')) { |
87 ptr++; | 104 n = group(); |
105 } else if (*ptr == '[') { | |
106 n = charClass(); | |
107 } else if (*ptr == '|'){ | |
88 n = _or(); | 108 n = _or(); |
89 } else if (c == '[') { | |
90 n = charClass(); | |
91 } else { | 109 } else { |
92 n = string(); | 110 n = string(); |
111 regexHeadNode = n; | |
93 } | 112 } |
94 ptr++; | |
95 } | 113 } |
96 | 114 |
97 return n; | 115 return n; |
98 } | 116 } |
99 | 117 |