Mercurial > hg > Members > shinya > pyrect
changeset 5:11fba907c0af
add Translater(object), that can translate C/CbC source code
from NFA or DFA(CbC can translate from DFA).
and refactoring, refactoring, refactoring...
not implimented translation-DotFile.
author | ryoma <e075725@ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 01 Jul 2010 00:40:51 +0900 |
parents | 7e47839a54ad |
children | 168d60b03e2c |
files | code/c/dfa.c code/c/nfa.c src/cTranslator.py src/cbcTranslator.py src/converter.py src/dfareg.py src/translator.py |
diffstat | 7 files changed, 419 insertions(+), 96 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/c/dfa.c Thu Jul 01 00:40:51 2010 +0900 @@ -0,0 +1,49 @@ +#include <stdio.h> +void state_1_3(char* s); +void state_1_2(char* s); +void accept(char* s); +void reject(char* s); + +int main(int argc, char* argv[]) { + puts("regexp: A*"); + puts("number of state: 2"); + printf("string: %s\n", argv[1]); + state_1_3(argv[1]); + + return 0; +} + +void state_1_3(char* s) { + printf("state: 1_3, input: %s\n", s); + switch(*s++) { + case 'A': + state_1_2(s); + break; + case '\0': + accept(s); + break; + default: reject(s); + } +} + +void state_1_2(char* s) { + printf("state: 1_2, input: %s\n", s); + switch(*s++) { + case 'A': + state_1_2(s); + break; + case '\0': + accept(s); + break; + default: reject(s); + } +} + + +void accept(char* s) { + printf("\nstring matches regexp. \n\n"); +} + +void reject(char* s) { + printf("\nstring does not match regexp. \n\n"); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/c/nfa.c Thu Jul 01 00:40:51 2010 +0900 @@ -0,0 +1,106 @@ +#include <stdio.h> +void state_1(char* s); +void state_3(char* s); +void state_2(char* s); +void state_5(char* s); +void state_4(char* s); +void state_7(char* s); +void state_6(char* s); +void state_8(char* s); +void accept(char* s); +void reject(char* s); + +int main(int argc, char* argv[]) { + puts("regexp: (A|B)*C"); + puts("number of state: 8"); + printf("string: %s\n", argv[1]); + state_6(argv[1]); + reject(argv[1]); + + return 0; +} + +void state_1(char* s) { + printf("state: 1, input: %s\n", s); + char* s_local = s; + switch(*s_local++) { + case 'A': + state_2(s_local); + break; + } +} + +void state_3(char* s) { + printf("state: 3, input: %s\n", s); + char* s_local = s; + switch(*s_local++) { + case 'B': + state_4(s_local); + break; + } +} + +void state_2(char* s) { + printf("state: 2, input: %s\n", s); + char* s_local = s; + state_5(s_local); + state_7(s_local); + switch(*s_local++) { + } +} + +void state_5(char* s) { + printf("state: 5, input: %s\n", s); + char* s_local = s; + state_1(s_local); + state_3(s_local); + switch(*s_local++) { + } +} + +void state_4(char* s) { + printf("state: 4, input: %s\n", s); + char* s_local = s; + state_5(s_local); + state_7(s_local); + switch(*s_local++) { + } +} + +void state_7(char* s) { + printf("state: 7, input: %s\n", s); + char* s_local = s; + switch(*s_local++) { + case 'C': + state_8(s_local); + break; + } +} + +void state_6(char* s) { + printf("state: 6, input: %s\n", s); + char* s_local = s; + state_5(s_local); + state_7(s_local); + switch(*s_local++) { + } +} + +void state_8(char* s) { + printf("state: 8, input: %s\n", s); + char* s_local = s; + switch(*s_local++) { + case '\0': + accept(s_local); + break; + } +} + + +void accept(char* s) { + printf("\nstring matches regexp. \n\n"); +} + +void reject(char* s) { + printf("\nstring does not match regexp. \n\n"); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cTranslator.py Thu Jul 01 00:40:51 2010 +0900 @@ -0,0 +1,93 @@ +#!/usr/bin/env python + +import sys +from dfareg import Regexp, CallGraph +from translator import Translator + +class CTranslator(Translator): + """ + CTranslator + >>> string = \"(A|B)*C\" + >>> reg = Regexp(string) + >>> dfacg = CallGraph(reg.dfa) + >>> nfacg = CallGraph(reg.nfa) + >>> CTranslator(string, dfacg).translate() + >>> CTranslator(string, nfacg).translate() + """ + def __init__(self, regexp, cg, stream=sys.stdout): + self.regexp = regexp + self.cg = cg + self.stream = stream + self.funType = 'void ' + self.callType = '' + self.breakStatement = '\t\t\tbreak;' + self.debug = False + + def translateFromCallGraph(self): + # self.emit C-source code + self.emit("#include <stdio.h>\n") + for k in self.cg.map.iterkeys(): + self.emit(self.funType + "state_" + k + "(char* s);\n") + self.emit(self.funType + 'accept(char* s);\n') + self.emit(self.funType + 'reject(char* s);\n') + self.emit(""" +int main(int argc, char* argv[]) { +\tputs(\"regexp: %s\"); +\tputs(\"number of state: %d\"); +\tprintf(\"string: %%s\\n\", argv[1]); +\t%s%s(argv[1]); +""" % (self.regexp, len(self.cg.states), self.callType, "state_"+self.cg.start)) + if self.cg.type is "NFA" : self.emit("\treject(argv[1]);\n") + self.emit(""" +\treturn 0; +}\n\n""") + + for k, v in self.cg.map.iteritems(): + self.emit(self.funType + "state_" + k + "(char* s) {\n") + if self.debug: self.emit("\tprintf(\"state: %s, input: %%s\\n\", s);\n" % (k)) + if self.cg.type is "NFA": + sLocal = "s_local" + self.emit("\tchar* %s = s;\n" % (sLocal)) + # epsilon-transition + for ss in (ss for i,ss in v.iteritems() if i == ''): + for s in ss: + self.emit("\t%s%s(%s);\n" % (self.callType, "state_"+s, sLocal)) + else: + sLocal = "s" + + self.emit("\tswitch(*%s++) {\n" % (sLocal)) + + for input, nextStates in v.iteritems(): + if input != '': + self.emit("\t\tcase '%s': \n" % (input)) + for nextState in nextStates: + self.emit("\t\t\t%s%s(%s);\n" % (self.callType, "state_"+nextState, sLocal)) + if self.breakStatement != '' : self.emit(self.breakStatement+'\n') + + if k in self.cg.accepts : + self.emit( """\t\tcase '\\0':\n\t\t\t%saccept(%s);\n%s\n""" % (self.callType, sLocal, self.breakStatement)) + if self.cg.type is "DFA": + self.emit("\t\tdefault: %sreject(%s);\n\t}\n" % (self.callType, sLocal)) + else: + self.emit("\t}\n") + self.emit("}\n\n") + + self.emit (""" +%saccept(char* s) { +\tprintf(\"\\nstring matches regexp. \\n\\n\"); +}\n""" % self.funType) + self.emit (""" +%sreject(char* s) { +\tprintf(\"\\nstring does not match regexp. \\n\\n\"); +}\n""" % self.funType) + +def main(): + import doctest + doctest.testmod() + ''' + reg = Regexp("(A|B)*C") + ct = CTranslator(reg.regexp, CallGraph(reg.dfa)) + ct.translate() + ''' + +if __name__ == '__main__' : main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cbcTranslator.py Thu Jul 01 00:40:51 2010 +0900 @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +import sys +from dfareg import Regexp, CallGraph +from cTranslator import CTranslator + +class CbCTranslateExeption(Exception): + pass + +class CbCTranslator(CTranslator): + def __init__(self, regexp, cg, stream=sys): + if cg.type is "NFA": raise CbCTranslateExeption("can't translate CbC from NFA") + self.regexp = regexp + self.cg = cg + self.stream = stream + self.funType = '__code ' + self.callType = 'goto ' + self.breakStatement = '' + self.debug = False + +def main(): + reg = Regexp("(A|B)*C") + cbct = CbCTranslator(reg.regexp, CallGraph(reg.dfa)) + cbct.translate() + +if __name__ == '__main__' : main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/converter.py Thu Jul 01 00:40:51 2010 +0900 @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +import sys +from dfareg import Regexp, CallGraph +from cTranslator import CTranslator +from cbcTranslator import CbCTranslator +from optparse import OptionParser + +def main(argv): + myusage = "%prog [-C] regexp" + psr = OptionParser(usage=myusage) + psr.add_option("-C", action="store_true", dest="emitC", default=False, help="emit C-source") + psr.add_option("--from-dfa", action="store_true", dest="dfa", default=True, help="translate from DFA") + psr.add_option("--from-nfa", action="store_true", dest="nfa", default=False, help="translate from NFA") + psr.add_option("-o", action="store", type="string", dest="output", default=False, help="output file", metavar="FILE") + psr.add_option("-g", action="store_true", dest="debug", default=False, help="embed debug info") + psr.add_option("-D", action="store_true", dest="emitDot", default=False, help="emit Dot file") + (opts, args) = psr.parse_args(sys.argv) + if len(args) == 1: + psr.print_help() + exit() + reg = Regexp(args[1]) + if not opts.output: + output = sys.stdout + else: + output = open(opts.output,"w") + + if opts.nfa: + fa = reg.nfa + else: + fa = reg.dfa + + if opts.emitDot: + r.emitDot() + elif opts.emitC: + translator = CTranslator(reg.regexp, CallGraph(fa)) + translator.debug = opts.debug + else: + translator = CbCTranslator(reg.regexp, CallGraph(fa)) + translator.debug = opts.debug + + translator.translate(output) + +if __name__ == '__main__' : main(sys.argv)
--- a/src/dfareg.py Tue Jun 29 12:46:29 2010 +0900 +++ b/src/dfareg.py Thu Jul 01 00:40:51 2010 +0900 @@ -1,7 +1,7 @@ #!/usr/bin/env python import copy -import sys, re +import re from optparse import OptionParser class NondeterministicFiniteAutomaton(object): @@ -324,101 +324,98 @@ # create call graph (as dictionary) class CallGraph(object): - def __init__(self, dfa): - self.dfa = dfa - self.start = "state_"+self.set2name(dfa.start) - (self.map, self.accepts) = self.getCallGraph(self.dfa) + """ + CallGraph is State Transition Diagram. + it's can be create from DFA or DFA. + >>> reg = Regexp(\"AA*|B\") + >>> dfacg = CallGraph(reg.dfa) + >>> nfacg = CallGraph(reg.nfa) + >>> dfacg.map + {'1_6_8': {'A': ['2_3_5'], 'B': ['7']}, '2_3_5': {'A': ['3_4']}, '7': {}, '3_4': {'A': ['3_4']}} + >>> dfacg.states + set(['1_6_8', '2_3_5', '7', '3_4']) + >>> dfacg.accepts + set(['2_3_5', '7', '3_4']) + >>> nfacg.map + {'1': {'A': ['2']}, '3': {'A': ['4']}, '2': {'': ['5']}, '5': {'': ['3']}, '4': {'': ['3']}, '7': {}, '6': {'B': ['7']}, '8': {'': ['1', '6']}} + >>> nfacg.states + set(['1', '3', '2', '5', '4', '7', '6', '8']) + >>> nfacg.accepts + set(['5', '4', '7']) + """ + + def __init__(self, fa): + self.fa = fa + if isinstance(fa, DeterministicFiniteAutomaton): + self.type = "DFA" + self.start = self.set2name(fa.start) + else: + self.type = "NFA" + self.start = str(fa.start) + self.inputs = set() + self.states = set() + (self.map, self.accepts) = self.getCallGraph(self.fa) # return state name from set def set2name(self, set_): l = list(set_) l.sort() - return '_'.join(str(x) for x in l) - - def getCallGraph(self, dfa): + return ('_'.join(str(x) for x in l)) - funcMap = dict() - funcMap[dfa.start] = set() + def getCallGraph(self, fa): + transitionCases = dict() + if self.type is "DFA": + transitionCases[fa.start] = set() + else: + transitionCases[frozenset([fa.start])] = set() + # transitionCases is hash table that binds a state and inputs, + # it's useful for transition definition + + # fa is dfa or nfa # dfa.map => {(frozenset([1, 15]), 'd'): frozenset([16]), ....} # : frozenset(1, 15) x 'd' -> frozenset([16]) - - for v in dfa.map.itervalues(): - funcMap[frozenset(v)] = set() + # nfa.map => {(6, ''): set([5, 7]), (1, 'A'): set([2]), ....} + # : 6 x '' -> set([5, 7]) - for key in dfa.map.iterkeys(): - slot = funcMap.setdefault(key[0], set()) - slot.add(key[1]) + for nextState in fa.map.itervalues(): + if self.type is "DFA": + transitionCases[frozenset(nextState)] = set() + else: + for nextState_ in nextState: + transitionCases[frozenset([nextState_])] = set() - for v in dfa.map.itervalues(): - funcMap[frozenset(v)] = set() - - for key in dfa.map.iterkeys(): - slot = funcMap.setdefault(key[0], set()) - slot.add(key[1]) + for (state, input) in fa.map.iterkeys(): + if self.type is "NFA": + state = [state] + slot = transitionCases.setdefault(frozenset(state), set()) + slot.add(input) callGraph = dict() - accepts = list() - - # emit cbc code - for k in funcMap.iterkeys(): - callGraph["state_"+self.set2name(k)] = set() + accepts = set() - for k, v in funcMap.iteritems(): - caseMap = dict() - for case in v: - caseMap[case] = "state_"+self.set2name(dfa.map[(frozenset(k), case)]) - if k in dfa.accepts: - accepts.append("state_"+self.set2name(k)) - caseMap['\\0'] = "accept" - callGraph["state_"+self.set2name(k)] = caseMap - - return (callGraph, accepts) - -# emit CbC-souce, from CallGraph -def dfa2CbC(cg, regexp, emitCFlag): - if emitCFlag: - fun_type = 'void ' - call_t = '' - break_s = '\n\t\t\tbreak;' - else: - fun_type = '__code ' - call_t = 'goto ' - break_s = '' + # create CallGraph + for state in transitionCases.iterkeys(): + callGraph[self.set2name(state)] = set() - # emit cbc(or c) code - print "#include <stdio.h>\n" - for k in cg.map.iterkeys(): - print fun_type + k + "(char* s);" - print fun_type + 'accept(char* s);' - print fun_type + 'reject(char* s);' - print """ -int main(int argc, char* argv[]) { -\tputs(\"regexp: %s\"); -\tputs(\"number of state: %d\"); -\tprintf(\"string: %%s\\n\", argv[1]); -\t%s%s(argv[1]); -\treturn 0; -}\n""" % (regexp, len(cg.map), call_t, cg.start) - - for k, v in cg.map.iteritems(): - print fun_type + k + "(char* s) {" - print "\tswitch(*s++) {" - for case, next_state in v.iteritems(): - print "\t\tcase '%s': " % (case) - print "\t\t\t%s%s(s);%s" % (call_t, next_state, break_s) - - print "\t\tdefault: %sreject(s);\n\t}" % call_t - print "}\n" - - print """ -%saccept(char* s) { -\tprintf(\"\\nstring matches regexp. \\n\\n\"); -}\n""" % fun_type - print """ -%sreject(char* s) { -\tprintf(\"\\nstring does not match regexp. \\n\\n\"); -}\n""" % fun_type + for (state, input) in transitionCases.iteritems(): + caseMap = dict() + self.states.add(self.set2name(state)) + if self.type is "DFA": + for case in input: + self.inputs.add(case) + caseMap[case] = [self.set2name(fa.map[frozenset(state), case])] + if state in fa.accepts: + accepts.add(self.set2name(state)) + else: + self.states.add(str(list(state)[0])) + for case in input: + self.inputs.add(case) + caseMap[case] = [str(next_) for next_ in fa.map[list(state)[0], case]] + if list(state)[0] in fa.accepts: + accepts.add(self.set2name(state)) + callGraph[self.set2name(state)] = caseMap + return (callGraph, accepts) def dfa2Dot(cg, regexp): print """ @@ -473,19 +470,8 @@ def compile(regexp): return Regexp(regexp) -def main(argv): - myusage = "%prog [-C] regexp" - psr = OptionParser(usage=myusage) - psr.add_option("-C", action="store_true", dest="emitCFlag", default=False, help="emit C-source") - psr.add_option("-D", action="store_true", dest="emitDot", default=False, help="emit Dot file") - (opts, args) = psr.parse_args(sys.argv) - if len(args) == 1: - psr.print_help() - exit() - r = compile(args[1]) - if opts.emitDot: - r.emitDot() - else: - r.emitCbC(opts.emitCFlag) +def test(a): + import doctest + doctest.testmod() -if __name__ == '__main__' : main(sys.argv) +if __name__ == '__main__' : test()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/translator.py Thu Jul 01 00:40:51 2010 +0900 @@ -0,0 +1,19 @@ +#!/usr/bin/env python + +import sys + +class Translator(object): + def __init__(self, regexp, cg, stream=sys.stdout): + self.regexp = regexp + self.cg = cg + self.stream = stream + + def emit(self, string): + self.stream.write(string) + + def translateFromCallGraph(self): + pass + + def translate(self, stream=sys.stdout): + self.stream = stream + self.translateFromCallGraph()