annotate CbC-examples/regexp/dfareg.py @ 62:70947ddafad7

added test code, CbC-example/regexp
author ryoma <e075725@ie.u-ryukyu.ac.jp>
date Sun, 25 Apr 2010 17:46:01 +0900
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
62
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 #!/usr/bin/env python
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3 import copy
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
4 import sys
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
5
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
6 class NondeterministicFiniteAutomaton(object):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
7 def __init__(self, transition, start, accepts, map_):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 self.transition = transition
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 self.start = start
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 self.accepts = accepts
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
11 self.map = map_
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
12
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
13 def epsilonExpand(self, set_):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 que = set(set_)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
15 done = set()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
16 while que:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
17 stat = que.pop()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 nexts = self.transition(stat, "")
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
19 done.add(stat)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
20 for nextStat in nexts:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
21 if not nextStat in done: que.add(nextStat)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 return frozenset(done)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
23
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 class DeterministicFiniteAutomaton(object):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
25 def __init__(self, transition, start, accepts, map_):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
26 self.transition = transition
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
27 self.start = start
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
28 self.accepts = accepts
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
29 self.map = map_
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
30 def getRuntime(self):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
31 return DFARuntime(self)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
32
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
33 class DFARuntime(object):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
34 def __init__(self, DFA):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
35 self.DFA = DFA
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
36 self.curState = self.DFA.start
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
37
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
38 def doTransition(self, char):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
39 self.curState = self.DFA.transition(
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
40 self.curState, char)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
41
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
42 def isAcceptState(self):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
43 return self.curState in self.DFA.accepts
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
44
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
45 def doesAccept(self, input):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
46 for alphabet in input:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
47 self.doTransition(alphabet)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
48 return self.isAcceptState()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
49
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
50 class Token(object):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
51 CHARACTER = 0
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
52 OPE_UNION = 1
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
53 OPE_STAR = 2
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
54 LPAREN = 3
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
55 RPAREN = 4
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
56 EOF = 5
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
57
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
58 def __init__(self, value, kind):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
59 self.value = value
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
60 self.kind = kind
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
61
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
62
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
63 class Lexer(object):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
64 def __init__(self, str):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
65 self.stringList = list(str)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
66
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
67 def scan(self):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
68 if not self.stringList:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
69 return Token(None, Token.EOF)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
70
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
71 ch = self.stringList.pop(0)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
72 if ch == '\\':
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
73 return Token(self.stringList.pop(0), Token.CHARACTER)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
74 elif ch == '|':
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
75 return Token(ch, Token.OPE_UNION)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
76 elif ch == '(':
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
77 return Token(ch, Token.LPAREN)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
78 elif ch == ')':
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
79 return Token(ch, Token.RPAREN)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
80 elif ch == '*':
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
81 return Token(ch, Token.OPE_STAR)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
82 else:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
83 return Token(ch, Token.CHARACTER)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
84
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
85 """
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
86 Context-free Grammer:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
87 (A) expression -> subexpr EOF
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
88 (B) subexpr -> seq '|' subexpr | seq
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
89 (C) seq -> subseq | ''
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
90 (D) subseq -> star subseq | star
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
91 (E) star -> factor '*' | factor
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
92 (F) factor -> '(' subexpr ')' | CHARACTER
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
93 """
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
94
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
95 class Parser(object):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
96 def __init__(self, lexer):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
97 self.lexer = lexer
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
98 self.look = None
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
99 self.move()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
100
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
101 def match(self, tag):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
102 if self.look.kind != tag:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
103 raise Exeption("syntax error")
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
104 self.move()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
105
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
106 def move(self):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
107 self.look = self.lexer.scan()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
108
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
109 def factor(self):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
110 if self.look.kind == Token.LPAREN:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
111 # factor -> '(' subexpr ')'
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
112 self.match(Token.LPAREN)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
113 node = self.subexpr()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
114 self.match(Token.RPAREN)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
115 return node
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
116 else:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
117 # factor -> CHARACTER
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
118 node = Character(self.look.value)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
119 self.match(Token.CHARACTER)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
120 return node
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
121
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
122 def star(self):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
123 # star -> factor '*' | factor
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
124 node = self.factor()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
125 if self.look.kind == Token.OPE_STAR:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
126 self.match(Token.OPE_STAR)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
127 node = Star(node)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
128 return node
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
129
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
130 def seq(self):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
131 if self.look.kind == Token.LPAREN \
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
132 or self.look.kind == Token.CHARACTER:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
133 # seq -> subseq
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
134 return self.subseq()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
135 else:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
136 # seq -> ''
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
137 return Character("")
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
138
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
139 def subseq(self):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
140 node1 = self.star()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
141 if self.look.kind == Token.LPAREN \
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
142 or self.look.kind == Token.CHARACTER:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
143 # subseq -> star subseq
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
144 node2 = self.subseq()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
145 node = Concat(node1, node2)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
146 return node
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
147 else:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
148 # subseq -> star
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
149 return node1
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
150
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
151 def subexpr(self):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
152 node1 = self.seq()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
153 if self.look.kind == Token.OPE_UNION:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
154 self.match(Token.OPE_UNION)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
155 # subexpr -> seq '|' subexpr
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
156 node2 = self.subexpr()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
157 node = Union(node1, node2)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
158 return node
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
159 else:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
160 # subexpr -> seq
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
161 return node1
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
162
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
163 def expression(self):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
164 # expression -> subexpr EOF
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
165 node = self.subexpr()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
166 self.match(Token.EOF)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
167
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
168 # Create TREE, NFA
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
169 context = Context()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
170 fragment = node.assemble(context)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
171 return fragment.build()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
172
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
173 class Context(object):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
174 def __init__(self):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
175 self.stateCount = 0
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
176
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
177 def newState(self):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
178 self.stateCount += 1
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
179 return self.stateCount
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
180
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
181 class NFAFragment(object):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
182 def __init__(self):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
183 self.start = None # integer
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
184 self.accepts = None # frozenset
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
185 self.map = dict() # transition
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
186
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
187 def connect(self, from_, char, to):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
188 slot = self.map.setdefault((from_, char), set())
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
189 slot.add(to)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
190
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
191 def newSkelton(self):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
192 # copy fragment (self), and it return
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
193 newFrag = NFAFragment();
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
194 newFrag.map = copy.deepcopy(self.map)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
195 return newFrag
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
196
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
197 def __or__(self, frag):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
198 newFrag = self.newSkelton()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
199 for k, v in frag.map.iteritems():
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
200 newFrag.map[k] = v.copy()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
201 return newFrag
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
202
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
203 def build(self):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
204 map_ = self.map
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
205 def transition(state, char):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
206 return frozenset(map_.get((state, char), []))
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
207
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
208 return NondeterministicFiniteAutomaton(
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
209 transition,
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
210 self.start,
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
211 self.accepts,
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
212 self.map
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
213 )
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
214
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
215 class Character(object):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
216 def __init__(self, char):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
217 self.char = char
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
218
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
219 def assemble(self, context):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
220 frag = NFAFragment()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
221 s1 = context.newState()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
222 s2 = context.newState()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
223 frag.connect(s1, self.char, s2)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
224 frag.start = s1
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
225 frag.accepts = frozenset([s2])
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
226 return frag
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
227
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
228 class Union(object):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
229 def __init__(self, op1, op2):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
230 self.op1 = op1
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
231 self.op2 = op2
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
232
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
233 def assemble(self, context):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
234 frag1 = self.op1.assemble(context)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
235 frag2 = self.op2.assemble(context)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
236 frag = frag1 | frag2
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
237 s = context.newState()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
238 frag.connect(s, "", frag1.start)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
239 frag.connect(s, "", frag2.start)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
240 frag.start = s
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
241 frag.accepts = frag1.accepts | frag2.accepts
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
242 return frag
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
243
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
244 class Concat(object):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
245 def __init__(self, op1, op2):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
246 self.op1 = op1
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
247 self.op2 = op2
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
248
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
249 def assemble(self, context):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
250 frag1 = self.op1.assemble(context)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
251 frag2 = self.op2.assemble(context)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
252 frag = frag1 | frag2
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
253
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
254 for state in frag1.accepts:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
255 frag.connect(state, "", frag2.start)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
256
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
257 frag.start = frag1.start
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
258 frag.accepts = frag2.accepts
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
259 return frag
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
260
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
261 class Star(object):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
262 def __init__(self, op):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
263 self.op = op
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
264
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
265 def assemble(self, context):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
266 fragOrig = self.op.assemble(context)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
267 frag = fragOrig.newSkelton()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
268
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
269 for state in fragOrig.accepts:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
270 frag.connect(state, "", fragOrig.start)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
271
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
272 s = context.newState()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
273 frag.connect(s, "", fragOrig.start)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
274 frag.start = s
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
275 frag.accepts = fragOrig.accepts | frozenset([s])
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
276 return frag
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
277
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
278 class NonDisjoinSets(object):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
279 def __init__(self, sub):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
280 self.sub = sub
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
281 def __contains__(self, a_set):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
282 return a_set & self.sub
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
283
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
284 def nfa2dfa(nfa):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
285 def transition(set_, alpha):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
286 ret = set()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
287 for elem in set_:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
288 ret |= nfa.transition(elem, alpha)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
289 return nfa.epsilonExpand(frozenset(ret))
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
290
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
291 def stateSetTransition(stateSet, char):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
292 trans = set()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
293 for state in stateSet:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
294 t = nfa.map.setdefault((state, char), None)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
295 if not t == None: trans.update(nfa.epsilonExpand(t))
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
296 return frozenset(trans)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
297
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
298 map_ = dict()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
299 que = set(frozenset([nfa.epsilonExpand(set([nfa.start]))]))
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
300 done = set()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
301
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
302 while que:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
303 stateSet = que.pop()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
304
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
305 for state in stateSet:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
306 for k, v in nfa.map.iteritems():
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
307 if state == k[0] and k[1] != '':
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
308 slot = map_.setdefault((stateSet, k[1]), set())
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
309 slot.update(nfa.epsilonExpand(v))
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
310
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
311 done.add(stateSet)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
312
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
313 for v in map_.itervalues():
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
314 if not v in done:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
315 que.add(frozenset(v))
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
316
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
317 return DeterministicFiniteAutomaton(
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
318 transition,
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
319 nfa.epsilonExpand(frozenset([nfa.start])),
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
320 NonDisjoinSets(nfa.accepts),
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
321 map_
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
322 )
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
323
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
324 # emit CbC-souce, from dfa
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
325 def dfa2CbC(dfa, regexp):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
326
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
327 # return state name from set
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
328 def set2name(set_):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
329 l = list(set_)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
330 l.sort()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
331 return '_'.join(str(x) for x in l)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
332
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
333 funcMap = dict()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
334 funcMap[dfa.start] = set()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
335
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
336 # dfa.map => {(frozenset([1, 15]), 'd'): frozenset([16]), ....}
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
337 # : frozenset(1, 15) x 'd' -> frozenset([16])
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
338
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
339 for v in dfa.map.itervalues():
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
340 funcMap[frozenset(v)] = set()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
341
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
342 for key in dfa.map.iterkeys():
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
343 slot = funcMap.setdefault(key[0], set())
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
344 slot.add(key[1])
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
345
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
346 # emit cbc code
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
347 print "#include <stdio.h>\n"
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
348 print "#include <stdlib.h>\n"
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
349 for k in funcMap.iterkeys():
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
350 print '__code state_' + set2name(k) + "(char* s);"
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
351 print '__code accept();'
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
352 print '__code reject();'
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
353 print """
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
354 int main(int argc, char* argv[]) {
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
355 \tif (argc == 1) {
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
356 \t\tprintf(\"usage: %%s text\\n\", argv[0]);
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
357 \t\texit(0);
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
358 }
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
359
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
360 \tputs(\"regexp: %s\");
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
361 \tputs(\"number of state: %d\");
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
362 \tprintf(\"string: %%s\\n\", argv[1]);
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
363 \tgoto state_%s(argv[1]);
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
364 \treturn 0;
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
365 }\n""" % (regexp, len(funcMap), set2name(dfa.start))
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
366
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
367 for k, v in funcMap.iteritems():
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
368 print '__code state_' + set2name(k) + "(char* s) {"
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
369 print "\tswitch(*s++) {"
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
370 for case in v:
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
371 print "\t\tcase '%c': " % (case)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
372 print "\t\t\tgoto state_%s(s);\n\t\t\tbreak;" % set2name(dfa.map[(frozenset(k), case)])
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
373 if k in dfa.accepts: print "\t\tcase '\\0': goto accept();\n\t\t\tbreak;"
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
374 print "\t\tdefault: goto reject();\n\t}"
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
375 print "}\n"
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
376
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
377 print """
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
378 __code accept() {
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
379 \tprintf(\"\\nstring matches regexp. \\n\\n\");
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
380 }\n"""
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
381 print """
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
382 __code reject() {
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
383 \tprintf(\"\\nstring does not match regexp. \\n\\n\");
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
384 }\n"""
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
385
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
386 class Regexp(object):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
387 def __init__(self, regexp):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
388 self.regexp = regexp
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
389 self.nfa = None
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
390 self.dfa = None
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
391 self._compile()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
392
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
393 def _compile(self):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
394 lexer_ = Lexer(self.regexp)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
395 parser_ = Parser(lexer_)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
396 self.nfa = parser_.expression()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
397 self.dfa = nfa2dfa(self.nfa)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
398
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
399 def emitCbC(self):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
400 dfa2CbC(self.dfa, self.regexp)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
401
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
402 def matches(self, string):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
403 runtime = self.dfa.getRuntime()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
404 return runtime.doesAccept(string)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
405
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
406 def compile(regexp):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
407 return Regexp(regexp)
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
408
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
409 def main(argv):
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
410 if len(argv) == 1 : exit("usage: dfareg.py regexp [> file]")
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
411 r = compile(argv[1])
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
412 r.emitCbC()
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
413
70947ddafad7 added test code, CbC-example/regexp
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
414 if __name__ == '__main__' : main(sys.argv)