Mercurial > hg > Members > shinya > pyrect
changeset 12:41391400fe68
add GREPTranslator(Translator) and implement jit-compile-grep,
which faster than grep!! in case of regular expression search in large files.
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 04 Jul 2010 08:40:59 +0900 |
parents | 94984eaa03e2 |
children | fb7922f6d9ef |
files | src/__init__.py src/benchgrep.sh src/benchgrep.sh~ src/cTranslator.py src/cTranslator.pyc src/cbcTranslator.py src/cbcTranslator.pyc src/converter.py src/dfareg.pyc src/dotTranslator.py src/grep_translator.py src/grep_translator.pyc src/jitgrep.py src/jitgrep.py~ src/reg2llvm.pyc src/template/grep.template src/translator.py src/translator.pyc |
diffstat | 13 files changed, 212 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/benchgrep.sh Sun Jul 04 08:40:59 2010 +0900 @@ -0,0 +1,15 @@ +#!/bin/sh + +egrepout="/tmp/egrep.out" +jitgrepout="/tmp/jitgrep.out" + +echo "[jitgrep]" +time ./jitgrep.py $@ > $jitgrepout + +echo "\n[egrep]" +time egrep $@ > $egrepout + +echo "\n[diff egrep jitgrep]" +diff $egrepout $jitgrepout + +#rm -f $egrepout $jitgrepout
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/benchgrep.sh~ Sun Jul 04 08:40:59 2010 +0900 @@ -0,0 +1,15 @@ +#!/bin/sh + +egrepout="/tmp/egrep.out" +jitgrepout="/tmp/jitgrep.out" + +echo "[egrep]" +time egrep $@ > $egrepout + +echo "\n[jitgrep]" +time ./jitgrep.py $@ > $jitgrepout + +echo "\n[diff egrep jitgrep]" +diff $egrepout $jitgrepout + +#rm -f $egrepout $jitgrepout
--- a/src/cTranslator.py Sun Jul 04 00:48:24 2010 +0900 +++ b/src/cTranslator.py Sun Jul 04 08:40:59 2010 +0900 @@ -45,8 +45,6 @@ }\n""" % self.funType) def emit_driver(self): - self.emit(self.funType + 'accept(char* s);\n') - self.emit(self.funType + 'reject(char* s);\n') self.emit(""" int main(int argc, char* argv[]) { \tputs(\"regexp: %s\"); @@ -77,7 +75,7 @@ if default: self.emit( """\t\tdefault:\n\t\t\t%s%s(NULL);\n""" % (self.callType, default)) - self.emit("\t}") + self.emit("\t}\n") def emit_state(self, cur_state, transition): @@ -102,14 +100,18 @@ self.emit_switch(transition, default="reject") else: self.emit_switch(transition) - self.emit("\n}\n\n") + self.emit("}\n\n") + + def emit_initialization(self): + self.emit("#include <stdio.h>\n\n") + for state in self.cg.map.iterkeys(): + self.emit(self.funType + self.modify_state_name(state) + "(char* s);\n") + self.emit(self.funType + 'accept(char* s);\n') + self.emit(self.funType + 'reject(char* s);\n') def emit_from_callgraph(self): # self.emit C-source code - self.emit("#include <stdio.h>\n") - for k in self.cg.map.iterkeys(): - self.emit(self.funType + self.modify_state_name(k) + "(char* s);\n") - + self.emit_initialization() self.emit_driver() for cur_state, transition in self.cg.map.iteritems():
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/grep_translator.py Sun Jul 04 08:40:59 2010 +0900 @@ -0,0 +1,68 @@ +from cTranslator import CTranslator +from dfareg import Regexp, CallGraph + +class GREPTranslator(CTranslator): + """GREPTranslator + >>> string = \"(A|B)*C\" + >>> reg = Regexp(string) + >>> dfacg = CallGraph(reg.dfa) + >>> tje = GREPTranslator(string, dfacg) + >>> tje.translate() + """ + def __init__(self, regexp, cg): + CTranslator.__init__(self, regexp, cg) + self.funType = 'int ' + self.callType = 'return ' + self.breakStatement = '' + + def emit_accept_state(self): + self.emit (""" +%saccept(char* s) { +\treturn 1; +}\n""" % self.funType) + + def emit_reject_state(self): + self.emit (""" +%sreject(char* s) { +\treturn 0; +}\n""" % self.funType) + + def emit_initialization(self): + self.emit("#include <stdio.h>\n") + self.emit("#include <stdlib.h>\n") + self.emit("#include <string.h>\n\n") + self.emit("#define BUFSIZE 1024\n\n") + for state in self.cg.map.iterkeys(): + self.emit(self.funType + self.modify_state_name(state) + "(char* s);\n") + self.emit(self.funType + 'accept(char* s);\n') + self.emit(self.funType + 'reject(char* s);\n') + + def emit_driver(self): + self.emit(""" +int match(char *text) { + do { + if (%s(text)) + return 1; + } while (*text++ != '\\0'); + return 0; +}\n\n""" % (self.modify_state_name(self.cg.start))) + self.emit(open("template/grep.template", "r").read()) + self.emit("\n") + + def emit_state(self, cur_state, transition): + self.emit(self.funType + self.modify_state_name(cur_state) + "(char* s) {\n") + if cur_state in self.cg.accepts: + self.emit("\treturn accept(s);\n") + else: + if transition: + if self.cg.type is "DFA": + self.emit_switch(transition, default="reject") + else: + self.emit_switch(transition) + self.emit("}\n\n") + +def test(): + import doctest + doctest.testmod() + +if __name__ == '__main__': test()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jitgrep.py Sun Jul 04 08:40:59 2010 +0900 @@ -0,0 +1,47 @@ +#!/usr/bin/env python + +import sys +import os +import re +from grep_translator import GREPTranslator +from dfareg import Regexp, CallGraph + +def main(argv): + if len(argv) < 2: + print("usage: jitgrep regexp [file ..]") + return + + string = argv[1] + reg = Regexp(string) + dfacg = CallGraph(reg.dfa) + tje = GREPTranslator(string, dfacg) + + srcpath = "/tmp/jitgrep_emit.c" + binpath = "/tmp/jitgrep_emit" + + tmpsrc = open(srcpath, 'w') + tje.translate(tmpsrc) + tmpsrc.close() + + cmd = 'gcc ' + srcpath + " -o " + binpath + # print(cmd) + os.system(cmd) + + # print("argc=" + str(len(argv))) + # print(argv) + + if len(argv) == 2: + while True: + try: + os.system(binpath + ' ' + raw_input()) + except KeyboardInterrupt: + break + else: + cmd = binpath + ' dummy_option ' + ' '.join(argv[2:]) + # print(cmd) + os.system(cmd) + + os.remove(srcpath) + os.remove(binpath) + +if __name__ == '__main__': main(sys.argv)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jitgrep.py~ Sun Jul 04 08:40:59 2010 +0900 @@ -0,0 +1,14 @@ +import sys +from grep_translator import GREPTranslator +from dfareg import Regexp, CallGraph + +def main(sys.argv): + string = "(gcc|fndecl|build)" + reg = Regexp(string) + dfacg = CallGraph(reg.dfa) + tje = GREPTranslator(string, dfacg) + tmpsrc = open("/tmp/jitgrep_emit.c", "w") + tje.translate(tmpsrc) + print(sys.argv) + +if __name__ == '__main__': main(sys.argv)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/template/grep.template Sun Jul 04 08:40:59 2010 +0900 @@ -0,0 +1,43 @@ +int grep(FILE *f, char *name) { + int n, nmatch; + char buf[BUFSIZE]; + nmatch = 0; + while (fgets(buf, sizeof buf, f) != NULL) { + n = strlen(buf); + if (n > 0 && buf[n-1] == '\n') + buf[n-1] = '\0'; + if (match(buf)) { + nmatch++; + if (name != NULL) + printf("%s:", name); + printf("%s\n", buf); + } + } + return nmatch; +} + +int main(int argc, char* argv[]) { + int i, nmatch; + FILE *f; + nmatch = 0; + /* for (i = 0; i < argc; printf("%s\n", argv[i++])); */ + if (argc == 2) { + if (match(argv[1])) { + printf("%s\n", argv[1]); + nmatch++; + } + } else { + for (i = 2; i < argc; i++) { + f = fopen(argv[i], "r"); + if (f == NULL) { + fprintf(stderr, "can't open: %s\n", argv[i]); + continue; + } + if (grep(f, argc > 3 ? argv[i] : NULL) > 0) + nmatch++; + fclose(f); + } + } + + return nmatch; +}