comparison src/cTranslator.py @ 6:168d60b03e2c

add dotTranslator(Translator), that can translate from DFA or NFA into Dot-file(Dot is graph generater using tex.)
author ryoma <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Fri, 02 Jul 2010 02:43:28 +0900
parents 11fba907c0af
children a28f87d353bb
comparison
equal deleted inserted replaced
5:11fba907c0af 6:168d60b03e2c
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 import sys
4 from dfareg import Regexp, CallGraph 3 from dfareg import Regexp, CallGraph
5 from translator import Translator 4 from translator import Translator
6 5
7 class CTranslator(Translator): 6 class CTranslator(Translator):
8 """ 7 """
9 CTranslator 8 CTranslator
9 This Calss can translate from DFA or NFA into C source code.
10 DFA: A simple state transition as tail call (also can implement with CbC).
11 NFA: using stack, deepening depth-first search.
10 >>> string = \"(A|B)*C\" 12 >>> string = \"(A|B)*C\"
11 >>> reg = Regexp(string) 13 >>> reg = Regexp(string)
12 >>> dfacg = CallGraph(reg.dfa) 14 >>> dfacg = CallGraph(reg.dfa)
13 >>> nfacg = CallGraph(reg.nfa) 15 >>> nfacg = CallGraph(reg.nfa)
14 >>> CTranslator(string, dfacg).translate() 16 >>> CTranslator(string, dfacg).translate()
15 >>> CTranslator(string, nfacg).translate() 17 >>> CTranslator(string, nfacg).translate()
16 """ 18 """
17 def __init__(self, regexp, cg, stream=sys.stdout): 19 def __init__(self, regexp, cg):
18 self.regexp = regexp 20 self.regexp = regexp
19 self.cg = cg 21 self.cg = cg
20 self.stream = stream 22 self.stream = None
21 self.funType = 'void ' 23 self.funType = 'void '
22 self.callType = '' 24 self.callType = ''
23 self.breakStatement = '\t\t\tbreak;' 25 self.breakStatement = '\t\t\tbreak;'
24 self.debug = False 26 self.debug = False
25 27
26 def translateFromCallGraph(self): 28 def translateFromCallGraph(self):
29 def prefix(string):
30 return "state_"+string
27 # self.emit C-source code 31 # self.emit C-source code
28 self.emit("#include <stdio.h>\n") 32 self.emit("#include <stdio.h>\n")
29 for k in self.cg.map.iterkeys(): 33 for k in self.cg.map.iterkeys():
30 self.emit(self.funType + "state_" + k + "(char* s);\n") 34 self.emit(self.funType + prefix(k) + "(char* s);\n")
31 self.emit(self.funType + 'accept(char* s);\n') 35 self.emit(self.funType + 'accept(char* s);\n')
32 self.emit(self.funType + 'reject(char* s);\n') 36 self.emit(self.funType + 'reject(char* s);\n')
33 self.emit(""" 37 self.emit("""
34 int main(int argc, char* argv[]) { 38 int main(int argc, char* argv[]) {
35 \tputs(\"regexp: %s\"); 39 \tputs(\"regexp: %s\");
36 \tputs(\"number of state: %d\"); 40 \tputs(\"number of state: %d\");
37 \tprintf(\"string: %%s\\n\", argv[1]); 41 \tprintf(\"string: %%s\\n\", argv[1]);
38 \t%s%s(argv[1]); 42 \t%s%s(argv[1]);
39 """ % (self.regexp, len(self.cg.states), self.callType, "state_"+self.cg.start)) 43 """ % (self.regexp, len(self.cg.states), self.callType, prefix(self.cg.start)))
40 if self.cg.type is "NFA" : self.emit("\treject(argv[1]);\n") 44 if self.cg.type is "NFA" : self.emit("\treject(argv[1]);\n")
41 self.emit(""" 45 self.emit("""
42 \treturn 0; 46 \treturn 0;
43 }\n\n""") 47 }\n\n""")
44 48
45 for k, v in self.cg.map.iteritems(): 49 for k, v in self.cg.map.iteritems():
46 self.emit(self.funType + "state_" + k + "(char* s) {\n") 50 self.emit(self.funType + prefix(k) + "(char* s) {\n")
47 if self.debug: self.emit("\tprintf(\"state: %s, input: %%s\\n\", s);\n" % (k)) 51 if self.debug: self.emit("\tprintf(\"state: %s, input: %%s\\n\", s);\n" % (k))
48 if self.cg.type is "NFA": 52 if self.cg.type is "NFA":
49 sLocal = "s_local" 53 sLocal = "s_local"
50 self.emit("\tchar* %s = s;\n" % (sLocal)) 54 self.emit("\tchar* %s = s;\n" % (sLocal))
51 # epsilon-transition 55 # epsilon-transition
52 for ss in (ss for i,ss in v.iteritems() if i == ''): 56 for ss in (ss for i,ss in v.iteritems() if i == ''):
53 for s in ss: 57 for s in ss:
54 self.emit("\t%s%s(%s);\n" % (self.callType, "state_"+s, sLocal)) 58 self.emit("\t%s%s(%s);\n" % (self.callType, prefix(s), sLocal))
55 else: 59 else:
56 sLocal = "s" 60 sLocal = "s"
57 61
58 self.emit("\tswitch(*%s++) {\n" % (sLocal)) 62 self.emit("\tswitch(*%s++) {\n" % (sLocal))
59 63
60 for input, nextStates in v.iteritems(): 64 for input, nextStates in v.iteritems():
61 if input != '': 65 if input != '':
62 self.emit("\t\tcase '%s': \n" % (input)) 66 self.emit("\t\tcase '%s': \n" % (input))
63 for nextState in nextStates: 67 for nextState in nextStates:
64 self.emit("\t\t\t%s%s(%s);\n" % (self.callType, "state_"+nextState, sLocal)) 68 self.emit("\t\t\t%s%s(%s);\n" % (self.callType, prefix(nextState), sLocal))
65 if self.breakStatement != '' : self.emit(self.breakStatement+'\n') 69 if self.breakStatement != '' : self.emit(self.breakStatement+'\n')
66 70
67 if k in self.cg.accepts : 71 if k in self.cg.accepts :
68 self.emit( """\t\tcase '\\0':\n\t\t\t%saccept(%s);\n%s\n""" % (self.callType, sLocal, self.breakStatement)) 72 self.emit( """\t\tcase '\\0':\n\t\t\t%saccept(%s);\n%s\n""" % (self.callType, sLocal, self.breakStatement))
69 if self.cg.type is "DFA": 73 if self.cg.type is "DFA":
79 self.emit (""" 83 self.emit ("""
80 %sreject(char* s) { 84 %sreject(char* s) {
81 \tprintf(\"\\nstring does not match regexp. \\n\\n\"); 85 \tprintf(\"\\nstring does not match regexp. \\n\\n\");
82 }\n""" % self.funType) 86 }\n""" % self.funType)
83 87
84 def main(): 88 def test():
85 import doctest 89 import doctest
86 doctest.testmod() 90 doctest.testmod()
87 ''' 91 '''
88 reg = Regexp("(A|B)*C") 92 reg = Regexp("(A|B)*C")
89 ct = CTranslator(reg.regexp, CallGraph(reg.dfa)) 93 ct = CTranslator(reg.regexp, CallGraph(reg.dfa))
90 ct.translate() 94 ct.translate()
91 ''' 95 '''
92 96
93 if __name__ == '__main__' : main() 97 if __name__ == '__main__' : test()