view src/c_translator.py @ 22:5149b48b22a9

bug fix: c_translator.py, grep_translator.py
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Mon, 05 Jul 2010 20:30:14 +0900
parents a24acddedf89
children 0e90ae1a2d9b
line wrap: on
line source

#!/Usr/bin/env python

from dfareg import Regexp, CallGraph
from translator import Translator

class CTranslator(Translator):
    """CTranslator
    This Class can translate from DFA or NFA into C source code.
    DFA: A simple state transition as tail call (also can implement with CbC).
    NFA: using stack, deepening depth-first search.
    >>> string = '(A|B)*C'
    >>> reg = Regexp(string)
    >>> dfacg = CallGraph(reg.dfa)
    >>> nfacg = CallGraph(reg.nfa)
    >>> CTranslator(string, dfacg).translate()
    >>> CTranslator(string, nfacg).translate()
    """
    def __init__(self, regexp, cg):
        Translator.__init__(self, regexp, cg)
        self.funType = 'void '
        self.callType = ''
        self.breakStatement = '\t\t\tbreak;'
        self.debug = False
        if self.cg.type == "DFA":
            self.name_hash = self.create_name_hash()

    def modify_state_name(self, state_name):
        if state_name == "accept" or state_name == "reject":
            return state_name
        if self.cg.type == "DFA":
            return "state_"+self.name_hash[state_name]
        else:
            return "state_"+state_name

    def emit_accept_state(self):
        self.emit ("""
%saccept(char* s) {
\tprintf(\"\\nstring matches regexp. \\n\\n\");
}\n""" % self.funType)

    def emit_reject_state(self):
        self.emit ("""
%sreject(char* s) {
\tprintf(\"\\nstring does not match regexp. \\n\\n\");
}\n""" % self.funType)

    def emit_driver(self):
        self.emit("""
int main(int argc, char* argv[]) {
\tputs(\"regexp: %s\");
\tputs(\"number of state: %d\");
\tprintf(\"string: %%s\\n\", argv[1]);
\t%s%s(argv[1]);
""" % (self.regexp, len(self.cg.states), self.callType, self.modify_state_name(self.cg.start)))
        if self.cg.type == "NFA":
            self.emit("\treject(argv[1]);\n")
        self.emit("""
\treturn 0;
}\n\n""")

    def emit_switch(self, case, default=None):
        self.emit("\tswitch(*s++) {\n")
        for input, next_states in case.iteritems():
            if input != '':
                self.emit("\t\tcase '%s': \n" % (input))
                for next_state in next_states:
                    self.emit("\t\t\t%s%s(s);\n" % (self.callType, self.modify_state_name(next_state)))
                if self.breakStatement != '': self.emit(self.breakStatement+'\n')

        if default:
            self.emit( """\t\tdefault:\n\t\t\t%s%s(NULL);\n""" % (self.callType, default))
        self.emit("\t}\n")


    def emit_state(self, cur_state, transition):
        self.emit(self.funType + self.modify_state_name(cur_state) + "(char* s) {\n")
        if self.debug:
            self.emit("\tprintf(\"state: %s, input: %%s\\n\", s);\n" % (cur_state))
        if self.cg.type == "NFA":
            if '' in transition:
                epsilon_transition = transition.pop('')
                for n in epsilon_transition:
                    self.emit("\t%s%s(s);\n" % (self.callType, self.modify_state_name(n)))

        if cur_state in self.cg.accepts:
            transition['\\0'] = ["accept"]

        if transition:
            if self.cg.type == "DFA":
                self.emit_switch(transition, default="reject")
            else:
                self.emit_switch(transition)
        self.emit("}\n\n")

    def emit_initialization(self):
        self.emit("#include <stdio.h>\n\n")
        for state in self.cg.map.iterkeys():
            self.emit(self.funType + self.modify_state_name(state) + "(char* s);\n")
        self.emit(self.funType + 'accept(char* s);\n')
        self.emit(self.funType + 'reject(char* s);\n')

    def emit_from_callgraph(self):
        # self.emit C-source code
        self.emit_initialization()
        self.emit_driver()

        for cur_state, transition in self.cg.map.iteritems():
            self.emit_state(cur_state, transition)

        self.emit_accept_state()
        self.emit_reject_state()

def test():
    import doctest
    doctest.testmod()

if __name__ == '__main__': test()