view src/cTranslator.py @ 8:a28f87d353bb

simplify DFA state name. (in C/Dot Translator. ex: "1_2_3" -> "1")
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Sat, 03 Jul 2010 01:40:36 +0900
parents 168d60b03e2c
children 973b596bd124
line wrap: on
line source

#!/Usr/bin/env python

from dfareg import Regexp, CallGraph
from translator import Translator

class CTranslator(Translator):
    """
    CTranslator
    This Calss can translate from DFA or NFA into C source code.
    DFA: A simple state transition as tail call (also can implement with CbC).
    NFA: using stack, deepening depth-first search.
    >>> string = \"(A|B)*C\"
    >>> reg = Regexp(string)
    >>> dfacg = CallGraph(reg.dfa)
    >>> nfacg = CallGraph(reg.nfa)
    >>> CTranslator(string, dfacg).translate()
    >>> CTranslator(string, nfacg).translate()
    """
    def __init__(self, regexp, cg):
        Translator.__init__(self, regexp, cg)
        self.funType = 'void '
        self.callType = ''
        self.breakStatement = '\t\t\tbreak;'
        self.debug = False
        if self.cg.type is "DFA":
            self.dfaStateNameHash = self.createDFAStateNameHash()

    def modifyStateName(self, stateName):
        if self.cg.type is "DFA":
            return "state_"+self.dfaStateNameHash[stateName]
        else:
            return "state_"+stateName

    def translateFromCallGraph(self):
        # self.emit C-source code
        self.emit("#include <stdio.h>\n")
        for k in self.cg.map.iterkeys():
            self.emit(self.funType + self.modifyStateName(k) + "(char* s);\n")
        self.emit(self.funType + 'accept(char* s);\n')
        self.emit(self.funType + 'reject(char* s);\n')
        self.emit("""
int main(int argc, char* argv[]) {
\tputs(\"regexp: %s\");
\tputs(\"number of state: %d\");
\tprintf(\"string: %%s\\n\", argv[1]);
\t%s%s(argv[1]);
""" % (self.regexp, len(self.cg.states), self.callType, self.modifyStateName(self.cg.start)))
        if self.cg.type is "NFA" : self.emit("\treject(argv[1]);\n")
        self.emit("""
\treturn 0;
}\n\n""")

        for k, v in self.cg.map.iteritems():
            self.emit(self.funType + self.modifyStateName(k) + "(char* s) {\n")
            if self.debug: self.emit("\tprintf(\"state: %s, input: %%s\\n\", s);\n" % (k))
            if self.cg.type is "NFA":
                sLocal = "s_local"
                self.emit("\tchar* %s = s;\n" % (sLocal))
                # epsilon-transition
                for ss in (ss for i,ss in v.iteritems() if i == ''):
                    for s in ss:
                        self.emit("\t%s%s(%s);\n" % (self.callType, self.modifyStateName(s), sLocal))
            else:
                sLocal = "s"

            self.emit("\tswitch(*%s++) {\n" % (sLocal))

            for input, nextStates in v.iteritems():
                if input != '':
                    self.emit("\t\tcase '%s': \n" % (input))
                    for nextState in nextStates:
                        self.emit("\t\t\t%s%s(%s);\n" % (self.callType, self.modifyStateName(nextState), sLocal))
                    if self.breakStatement != '' : self.emit(self.breakStatement+'\n')

            if k in self.cg.accepts :
                self.emit( """\t\tcase '\\0':\n\t\t\t%saccept(%s);\n%s\n""" % (self.callType, sLocal, self.breakStatement))
            if self.cg.type is "DFA":
                self.emit("\t\tdefault: %sreject(%s);\n\t}\n" % (self.callType, sLocal))
            else:
                self.emit("\t}\n")
            self.emit("}\n\n")

        self.emit ("""
%saccept(char* s) {
\tprintf(\"\\nstring matches regexp. \\n\\n\");
}\n""" % self.funType)
        self.emit ("""
%sreject(char* s) {
\tprintf(\"\\nstring does not match regexp. \\n\\n\");
}\n""" % self.funType)

def test():
    import doctest
    doctest.testmod()
    '''
    reg = Regexp("(A|B)*C")
    ct = CTranslator(reg.regexp, CallGraph(reg.dfa))
    ct.translate()
    '''

if __name__ == '__main__' : test()