Mercurial > hg > Members > shinya > pyrect
view src/cTranslator.py @ 8:a28f87d353bb
simplify DFA state name. (in C/Dot Translator. ex: "1_2_3" -> "1")
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sat, 03 Jul 2010 01:40:36 +0900 |
parents | 168d60b03e2c |
children | 973b596bd124 |
line wrap: on
line source
#!/Usr/bin/env python from dfareg import Regexp, CallGraph from translator import Translator class CTranslator(Translator): """ CTranslator This Calss can translate from DFA or NFA into C source code. DFA: A simple state transition as tail call (also can implement with CbC). NFA: using stack, deepening depth-first search. >>> string = \"(A|B)*C\" >>> reg = Regexp(string) >>> dfacg = CallGraph(reg.dfa) >>> nfacg = CallGraph(reg.nfa) >>> CTranslator(string, dfacg).translate() >>> CTranslator(string, nfacg).translate() """ def __init__(self, regexp, cg): Translator.__init__(self, regexp, cg) self.funType = 'void ' self.callType = '' self.breakStatement = '\t\t\tbreak;' self.debug = False if self.cg.type is "DFA": self.dfaStateNameHash = self.createDFAStateNameHash() def modifyStateName(self, stateName): if self.cg.type is "DFA": return "state_"+self.dfaStateNameHash[stateName] else: return "state_"+stateName def translateFromCallGraph(self): # self.emit C-source code self.emit("#include <stdio.h>\n") for k in self.cg.map.iterkeys(): self.emit(self.funType + self.modifyStateName(k) + "(char* s);\n") self.emit(self.funType + 'accept(char* s);\n') self.emit(self.funType + 'reject(char* s);\n') self.emit(""" int main(int argc, char* argv[]) { \tputs(\"regexp: %s\"); \tputs(\"number of state: %d\"); \tprintf(\"string: %%s\\n\", argv[1]); \t%s%s(argv[1]); """ % (self.regexp, len(self.cg.states), self.callType, self.modifyStateName(self.cg.start))) if self.cg.type is "NFA" : self.emit("\treject(argv[1]);\n") self.emit(""" \treturn 0; }\n\n""") for k, v in self.cg.map.iteritems(): self.emit(self.funType + self.modifyStateName(k) + "(char* s) {\n") if self.debug: self.emit("\tprintf(\"state: %s, input: %%s\\n\", s);\n" % (k)) if self.cg.type is "NFA": sLocal = "s_local" self.emit("\tchar* %s = s;\n" % (sLocal)) # epsilon-transition for ss in (ss for i,ss in v.iteritems() if i == ''): for s in ss: self.emit("\t%s%s(%s);\n" % (self.callType, self.modifyStateName(s), sLocal)) else: sLocal = "s" self.emit("\tswitch(*%s++) {\n" % (sLocal)) for input, nextStates in v.iteritems(): if input != '': self.emit("\t\tcase '%s': \n" % (input)) for nextState in nextStates: self.emit("\t\t\t%s%s(%s);\n" % (self.callType, self.modifyStateName(nextState), sLocal)) if self.breakStatement != '' : self.emit(self.breakStatement+'\n') if k in self.cg.accepts : self.emit( """\t\tcase '\\0':\n\t\t\t%saccept(%s);\n%s\n""" % (self.callType, sLocal, self.breakStatement)) if self.cg.type is "DFA": self.emit("\t\tdefault: %sreject(%s);\n\t}\n" % (self.callType, sLocal)) else: self.emit("\t}\n") self.emit("}\n\n") self.emit (""" %saccept(char* s) { \tprintf(\"\\nstring matches regexp. \\n\\n\"); }\n""" % self.funType) self.emit (""" %sreject(char* s) { \tprintf(\"\\nstring does not match regexp. \\n\\n\"); }\n""" % self.funType) def test(): import doctest doctest.testmod() ''' reg = Regexp("(A|B)*C") ct = CTranslator(reg.regexp, CallGraph(reg.dfa)) ct.translate() ''' if __name__ == '__main__' : test()