Mercurial > hg > Members > shinya > pyrect
changeset 28:0e90ae1a2d9b
add DFATranslator(GREPTranslator). which can translate into gnu-grep's DFA-based matching function.
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 08 Jul 2010 20:02:42 +0900 |
parents | 3db85244784b |
children | b833746d9d92 |
files | src/c_translator.py src/dfa_translator.py |
diffstat | 2 files changed, 78 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/src/c_translator.py Thu Jul 08 06:35:39 2010 +0900 +++ b/src/c_translator.py Thu Jul 08 20:02:42 2010 +0900 @@ -21,6 +21,7 @@ self.callType = '' self.breakStatement = '\t\t\tbreak;' self.debug = False + self.eols = ('\\0', '\\n') if self.cg.type == "DFA": self.name_hash = self.create_name_hash() @@ -68,7 +69,7 @@ if self.breakStatement != '': self.emit(self.breakStatement+'\n') if default: - self.emit( """\t\tdefault:\n\t\t\t%s%s(NULL);\n""" % (self.callType, default)) + self.emit( """\t\tdefault:\n\t\t\t%s%s(s);\n""" % (self.callType, default)) self.emit("\t}\n") @@ -83,7 +84,8 @@ self.emit("\t%s%s(s);\n" % (self.callType, self.modify_state_name(n))) if cur_state in self.cg.accepts: - transition['\\0'] = ["accept"] + for eol in self.eols: + transition[eol] = ["accept"] if transition: if self.cg.type == "DFA":
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/dfa_translator.py Thu Jul 08 20:02:42 2010 +0900 @@ -0,0 +1,74 @@ +#!/usr/bin/env python + +from grep_translator import GREPTranslator +from dfareg import Regexp, CallGraph + +'''(build|fndecl|gcc)''' +class DFATranslator(GREPTranslator): + """DFATranslator + This class can translate from DFA into size_t DFA(char* s). + which is entirely equivalent to dfaexec(..) in GNU-grep (see src/dfa.c). + * but which is not work currently. (when search large-file, there is fewer + * accepted-lines than grep's dfaexec.) + * probably, there is some problem exists about buffering. + >>> string = '(build|fndecl|gcc)' + >>> reg = Regexp(string) + >>> dfacg = CallGraph(reg.dfa) + >>> tje = DFATranslator(string, dfacg) + >>> tje.translate() + """ + + def __init__(self, regexp, cg): + GREPTranslator.__init__(self, regexp, cg) + self.funType = 'size_t ' + self.callType = 'return ' + self.breakStatement = '' + + def emit_initialization(self): + for state in self.cg.map.iterkeys(): + self.emit(self.funType + self.modify_state_name(state) + "(char* s);\n") + self.emit(self.funType + 'accept(char* s);\n') + self.emit(self.funType + 'reject(char* s);\n') + + def emit_accept_state(self): + self.emit (""" +%saccept(char* s) { +\treturn 1; +}\n""" % self.funType) + + def emit_reject_state(self): + self.emit (""" +%sreject(char* s) { +\treturn 0; +}\n""" % self.funType) + + def emit_driver(self): + self.emit(""" +/* This DFA accept only \'%s\'*/ +%sDFA(char *s) { + char *begin = s; + do { + if (%s(s)) { //(matchhere(regexp+1, text)) + return (char const *) s - begin; + } + } while (*s != '\\n' && *s++ != '\\0'); + return (size_t) -1; +}\n\n""" % (self.regexp, self.funType, self.modify_state_name(self.cg.start))) + + def emit_state(self, cur_state, transition): + self.emit(self.funType + self.modify_state_name(cur_state) + "(char* s) {\n") + if cur_state in self.cg.accepts: + self.emit("\treturn accept(s);\n") + else: + if transition: + if self.cg.type == "DFA": + self.emit_switch(transition, default="reject") + else: + self.emit_switch(transition) + self.emit("}\n\n") + +def test(): + import doctest + doctest.testmod() + +if __name__ == '__main__': test()