Mercurial > hg > Members > shinya > pyrect
changeset 52:abb0691e792a
bug fix. remove unnecessarily files.
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 23 Aug 2010 20:00:04 +0900 |
parents | c48284580d5a |
children | 1f8c474ca8b3 |
files | pyrect/grep_bench.sh pyrect/jitgrep.py pyrect/regexp/lexer.py pyrect/translator/c_translator.py pyrect/translator/cbc_grep_translator.py pyrect/translator/cbc_translator.py pyrect/translator/dot_translator.py pyrect/translator/grep_translator.py |
diffstat | 8 files changed, 214 insertions(+), 79 deletions(-) [+] |
line wrap: on
line diff
--- a/pyrect/grep_bench.sh Tue Aug 10 15:56:23 2010 +0900 +++ b/pyrect/grep_bench.sh Mon Aug 23 20:00:04 2010 +0900 @@ -7,28 +7,39 @@ cgrepout="/tmp/cgrep.out" dgrepout="/tmp/dgrep.out" -echo "[jitgrep]" -time ./jitgrep.py $@ > $jitgrepout +echo "[jitgrep - compiling]" +time ./jitgrep.py -c $@ > /dev/null -echo "[jitgrep - with out compiling]" +echo +echo "[jitgrep - matching with out compiling]" +time /tmp/jitgrep $@ > $jitgrepout + +echo +echo "[jitgrep - cbc matching with out compiling]" time /tmp/jitgrep $@ > /dev/null -#echo "\n[llgrep]" +#echo +#echo "[llgrep]" #time ./llgrep.py -O $@ 2> /dev/null > $llgrepout -echo "\n[cgrep]" +echo +echo "[cgrep]" time cgrep -E $@ > $cgrepout -echo "\n[egrep]" +echo +echo "[egrep]" time egrep $@ > $egrepout -#echo "\n[dgrep (non-filter grep)]" +#echo +#echo "[dgrep (non-filter grep)]" #time dgrep -E $@ > $dgrepout -#echo "\n[agrep]" +#echo +#echo "[agrep]" #time agrep $@ > $agrepout -echo "\n[diff egrep jitgrep]" +echo +echo "[diff egrep jitgrep]" diff $egrepout $jitgrepout #echo "[diff egrep llgrep]" @@ -40,10 +51,7 @@ #echo "[diff cgrep llgrep]" #diff $cgrepout $llgrepout -echo "\n[matches]" +echo "[matches]" wc $egrepout -#echo "[diff agrep jitgrep]" -#diff $agrepout $jitgrepout - #rm -f $egrepout $jitgrepout $agrepout $cgrepout $llgrepout
--- a/pyrect/jitgrep.py Tue Aug 10 15:56:23 2010 +0900 +++ b/pyrect/jitgrep.py Mon Aug 23 20:00:04 2010 +0900 @@ -10,7 +10,7 @@ def main(argv): myusage = """%prog [--buf-size=size] [--dump] - [--time] [--debug] [--cc=compiler] + [--time] [--debug] [--cc=compiler] [-c] [-Olevel] regexp [file..] [--out=file]""" psr = OptionParser(usage=myusage) @@ -20,8 +20,9 @@ psr.add_option("--cc", action="store", type="string", dest="cc", default="gcc", metavar="FILE", help="Choose compiler (default is gcc).") + psr.add_option("-c", action="store_true", dest="compile", default=False , help="compile only.") psr.add_option("--buf-size=size", action="store", type="string", dest="bufsize", default="1M" , help="Set read-buffer size (e.x. 1024, 1024K, 2M)") - psr.add_option("--CFLAGS", action="store", type="string", dest="cflags", default="-O3 -fomit-frame-pointer", help="Print compile/matching time.") + psr.add_option("--CFLAGS", action="store", type="string", dest="cflags", default="-O3", help="Print compile/matching time.") psr.add_option("--time", action="store_true", dest="time", default=False, help="Print compile/matching time.") psr.add_option("--debug", action="store_true", dest="debug", default=False, help="Dump commands, not evalute matching (except interactive mode).") psr.add_option("--label", action="store_true", dest="label", default=False, help="label implimentation in C.") @@ -32,12 +33,12 @@ if len(args) < 2: psr.print_usage() - exit(0) + return if opts.cc == "cbc": cbc = True opts.cc = "$CBCROOT/INSTALL_DIR/bin/gcc" - opts.cflags += " -L$CBCROOT/gcc" + opts.cflags += " -L$CBCROOT/gcc -w" else: cbc = False @@ -60,7 +61,7 @@ bufsize = int(opts.bufsize) except ValueError: psr.print_usage() - exit(0) + return if opts.time : start_time = time.time() reg = Regexp(string) @@ -75,7 +76,7 @@ if opts.dump: grept.translate() - exit(0) + return else: tmpsrc = open(srcpath, 'w') grept.translate(tmpsrc) @@ -100,6 +101,9 @@ print("args=", args) print("opts=", opts) + if opts.compile: + return + if len(args) == 2 and not opts.debug: while True: try:
--- a/pyrect/regexp/lexer.py Tue Aug 10 15:56:23 2010 +0900 +++ b/pyrect/regexp/lexer.py Mon Aug 23 20:00:04 2010 +0900 @@ -1,5 +1,5 @@ #!/usr/bin/env python -# -*- encoding: utf-8 -*- +#-*- encoding: utf-8 -*- from ply import lex
--- a/pyrect/translator/c_translator.py Tue Aug 10 15:56:23 2010 +0900 +++ b/pyrect/translator/c_translator.py Mon Aug 23 20:00:04 2010 +0900 @@ -140,7 +140,7 @@ if '' in transition: epsilon_transition = transition.pop('') for n in epsilon_transition: - self.emit("\t%s%s(s);\n" % (self.callType, self.state_name(n))) + self.emit("return %s(s);\n" % self.state_name(n)) else: default = "reject" @@ -197,7 +197,7 @@ self._emit("/* %s */" % input_node.__repr__()) def visit_Character(self, char): - self._emit("case %d:" % char.char) + self._emit("case %d: /* match %s */" % (char.char, chr(char.char))) self._emit(" return %s(s);" % self.next) def visit_EndLine(self, endline):
--- a/pyrect/translator/cbc_grep_translator.py Tue Aug 10 15:56:23 2010 +0900 +++ b/pyrect/translator/cbc_grep_translator.py Mon Aug 23 20:00:04 2010 +0900 @@ -1,12 +1,13 @@ #!/usr/bin/env python -from grep_translator import GREPTranslator +import os from pyrect.regexp import Regexp +from pyrect.translator import CbCTranslator class CbCGREPTranslateExeption(Exception): pass -class CbCGREPTranslator(GREPTranslator): +class CbCGREPTranslator(CbCTranslator): """CbCGREPTranslator This Class can translate form DFA into grep source-code. which based on (beautiful) mini-grep introduced \"The Practice of Programming\" @@ -17,15 +18,15 @@ >>> tje.translate() """ + BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + def __init__(self, regexp): - GREPTranslator.__init__(self, regexp) - self.funType = '__code ' - self.interface = "char *s, char* cur, char* buf, FILE *f, char* filename" + CbCTranslator.__init__(self, regexp) + self.interface = "unsigned char *s, unsigned char* cur, unsigned char* buf, FILE *f, char* filename" self.args = "s, cur, buf, f, filename" - self.callType = 'goto ' - self.breakStatement = '' self.print_file = False - self.__bufsize = 1024 + self.__bufsize = 1024 * 1024 + self.trans_stmt = self._trans_stmt(self.emit, self.args) def getbufsize(self): return self.__bufsize @@ -34,6 +35,13 @@ bufsize = property(getbufsize, setbufsize) + def state_name(self, state): + if state in ("accept", "reject", "next_ptr", "next_line", "returner"): + return state + else: + return "state_" + state + + def emit_accept_state(self): self.emit("__code accept(%s) {\n" % self.interface) if self.print_file: @@ -62,7 +70,7 @@ self.emit(""" __code next_line(%s) { if(fgets(buf, LINEBUFSIZE, f) == NULL) { - goto returner(); + goto returner(%s); } int n = strlen(buf); if (n > 0 && buf[n-1] == '\\n') @@ -71,28 +79,24 @@ s = cur; goto DFA(%s); } -""" % (self.interface, self.args)) +""" % (self.interface, self.args, self.args)) self.emit(""" -__code returner() { +__code returner(%s) { return; -}""") +}""" % self.interface) def emit_initialization(self): - self.emit("#include <stdio.h>\n") - self.emit("#include <stdlib.h>\n") - self.emit("#include <string.h>\n\n") - self.emit("#define LINEBUFSIZE 1024\n") - self.emit("#define READBUFSIZE %d\n\n" % self.bufsize) + self.emit("#include <stdio.h>") + self.emit("#include <stdlib.h>") + self.emit("#include <string.h>", 2) + self.emit("#define LINEBUFSIZE 1024") + self.emit("#define READBUFSIZE %d" % self.bufsize, 2) - self.emit("%sDFA(%s);\n" % (self.funType, self.interface)) - for state in self.cg.map.iterkeys(): - self.emit(self.funType + self.state_name(state) + "(" + self.interface + ");\n") - self.emit(self.funType + 'accept(%s);\n' % self.interface) - self.emit(self.funType + 'reject(%s);\n' % self.interface) - self.emit(self.funType + 'next_ptr(%s);\n' % self.interface) - self.emit(self.funType + 'next_line(%s);\n' % self.interface) - self.emit(self.funType + 'returner();\n\n') - grepsource = open("template/grep.cbc") + self.emit("__code DFA(%s);\n" % self.interface) + for state in self.cg.map.keys() + ["accept", "reject", "next_ptr", "next_line", "returner"]: + self.emit("__code %s(%s);" % (self.state_name(state), self.interface)) + self.emit() + grepsource = open(self.BASE_DIR + "/template/grep.cbc") self.emit(grepsource.read()) self.emit_next_state() @@ -107,34 +111,18 @@ } """) self.emit(""" -%sDFA(%s) { +__code DFA(%s) { goto %s(%s); } -""" % (self.funType, self.interface, self.state_name(self.cg.start), self.args)) - - def emit_switch(self, case, default=None): - self.emit("\tswitch(*s++) {\n") - for input, next_state in case.iteritems(): - if input != '': - self.emit("\t\tcase '%s': \n" % (input)) - self.emit("\t\t\t%s%s(%s);\n" % (self.callType, self.state_name(next_state), self.args)) - if self.breakStatement != '': self.emit(self.breakStatement+'\n') - - if default: - self.emit( """\t\tdefault:\n\t\t\t%s%s(%s);\n""" % (self.callType, default, self.args)) - self.emit("\t}\n") +""" % (self.interface, self.state_name(self.cg.start), self.args)) def emit_state(self, cur_state, transition): - self.emit(self.funType + self.state_name(cur_state) + "(" + self.interface + ") {\n") if cur_state in self.cg.accepts: - self.emit("\tgoto accept(%s);\n" % self.args) + self.emiti("__code %s(%s) {" % (self.state_name(cur_state), self.interface)) + self.emit( "goto accept(%s);" % self.args) + self.emitd("}") else: - if transition: - if self.cg.type == "DFA": - self.emit_switch(transition, default="reject") - else: - self.emit_switch(transition) - self.emit("}\n\n") + CbCTranslator.emit_state(self, cur_state, transition) def test(): import doctest
--- a/pyrect/translator/cbc_translator.py Tue Aug 10 15:56:23 2010 +0900 +++ b/pyrect/translator/cbc_translator.py Mon Aug 23 20:00:04 2010 +0900 @@ -1,6 +1,8 @@ #!/usr/bin/env python from pyrect.regexp import Regexp +from pyrect.regexp.ast import * +from translator import Translator from c_translator import CTranslator class CbCTranslator(CTranslator): @@ -10,14 +12,147 @@ >>> reg = Regexp(string) >>> ct = CbCTranslator(reg) >>> ct.translate() - >>> ct.debug = True - >>> ct.translate() """ def __init__(self, regexp): - CTranslator.__init__(self, regexp) - self.funType = '__code ' - self.callType = 'goto ' - self.breakStatement = '' + Translator.__init__(self, regexp) + self.special_rule = (Range, BegLine, MBCharacter) + self.cg = regexp.dfacg + self.debug = False + self.interface = "unsigned char *s" + self.args = "s" + self.trans_stmt = self._trans_stmt(self.emit, self.args) + + def emit_accept_state(self): + self.emiti("__code accept(%s) {" % self.interface) + self.emit( "return;") + self.emitd("}", 2) + + def emit_reject_state(self): + self.emiti("__code reject(%s) {" % self.interface) + self.emit( "return;") + self.emitd("}", 2) + + def emit_driver(self): + self.emiti("int main(int argc, unsigned char* argv[]) {") + self.emit( 'buf = argv[1];') + self.emit( 'puts("regexp: %s");' % self.regexp.regexp) + self.emit( 'puts("number of state: %d");' % len(self.cg.states)) + self.emit( r'printf("string: %s\n", argv[1]);') + self.emit0( "goto %s((unsigned char*)argv[1]);" % self.state_name(self.cg.start)) + self.emit( "return 0;") + self.emitd("}", 2) + + def emit_switch(self, case, default=None): + if not case: + if default: + self.emit("goto %s(%s);" % (default, self.args)) + return + self.emiti("switch(*s++) {") + for case, next_ in case.iteritems(): + self.trans_stmt.emit(case, self.state_name(next_)) + if default: + self.emit("default: goto %s(%s);" % (default, self.args)) + self.emitd("}") + + def emit_state(self, cur_state, transition): + self.emiti("__code %s(%s) {" % (self.state_name(cur_state), self.interface)) + + if self.debug: + self.emit(r'printf("state: %s, input: %%s\n", s);' % cur_state) + if self.cg.type == "NFA": + default = None + if '' in transition: + epsilon_transition = transition.pop('') + for n in epsilon_transition: + self.emit("goto %s(%s);\n" % (self.state_name(n), self.args)) + else: + default = "reject" + + any_ = None + + for input_ in transition.keys(): + if type(input_) in self.special_rule: + self.trans_stmt.emit(input_, self.state_name(transition.pop(input_))) + elif type(input_) is AnyChar: + any_ = (input_, self.state_name(transition.pop(input_))) + default = None + + if cur_state in self.cg.accepts: + eol = Character('\0') + transition[eol] = "accept" + + self.emit_switch(transition, default) + + if any_: + self.trans_stmt.emit(any_[0], any_[1]) + + self.emitd("}", 2) + + def emit_initialization(self): + self.emit("#include <stdio.h>") + for state in self.cg.map.keys() + ["accept", "reject"]: + self.emit("__code %s(%s);" % (self.state_name(state), self.interface)) + self.emit('unsigned char* buf;') + self.emit_skip() + + def emit_from_callgraph(self): + # self.emit C-source code + self.emit_initialization() + self.emit_driver() + + for cur_state, transition in self.cg.map.iteritems(): + self.emit_state(cur_state, transition) + + self.emit_accept_state() + self.emit_reject_state() + + class _trans_stmt(CTranslator._trans_stmt): + def __init__(self, emit, args): + CTranslator._trans_stmt.__init__(self, emit) + self.args = args + + def visit_Character(self, char): + self._emit("case %d: /* match %s */" % (char.char, chr(char.char))) + self._emit(" goto %s(%s);" % (self.next, self.args)) + + def visit_EndLine(self, endline): + self._emit(r"case '\0':") + self._emit(" goto %s($s);" % (self.next, self.args)) + + # Special Rule + + def visit_MBCharacter(self, mbchar): + self._emit("/* match %s */" % mbchar) + bytes = mbchar.bytes + self._emit(" if(%s)" % \ + " && ".join(["*(s+%d) == 0x%x" % (d, x) for d, x in enumerate(bytes)])) + self._emit(" s += %d;" % len(bytes)) + self._emit(" goto %s(%s);" % (self.next, self.args), 2) + + def visit_BegLine(self, begline): + self._emit("if (s == buf)") + self._emit(" goto %s(%s);" % (self.next, self.args), 2) + + def visit_Range(self, range): + if isinstance(range.lower, MBCharacter) and not \ + isinstance(range.upper, MBCharacter) or \ + isinstance(range.upper, MBCharacter) and not \ + isinstance(range.lower, MBCharacter): + return + + if isinstance(range.lower, MBCharacter): + self.visit(range) + else: + self._emit("if ('%s' <= *s && *s <= '%s')" % (range.lower.char, range.upper.char)) + self._emit(" s++;") + self._emit(" goto %s(%s);" % (self.next, self.args), 2) + + def visit_AnyChar(self, anychar): + self._emit(r"if (*s != '\0') {") + self._emit(" s = SKIP(s);") + self._emit(" goto %s(%s);" % (self.next, self.args), 2) + self._emit("}") + self._emit("goto reject(%s);" % self.args) def test(): import doctest
--- a/pyrect/translator/dot_translator.py Tue Aug 10 15:56:23 2010 +0900 +++ b/pyrect/translator/dot_translator.py Mon Aug 23 20:00:04 2010 +0900 @@ -22,8 +22,8 @@ self.cg = regexp.nfacg else: self.cg = regexp.dfacg - self.fill_color = "lightsteelblue1" - self.frame_color = "navyblue" + self.fill_color = "white" #"lightsteelblue1" + self.frame_color = "black" #"navyblue" def state_name(self, name): return "q"+name
--- a/pyrect/translator/grep_translator.py Tue Aug 10 15:56:23 2010 +0900 +++ b/pyrect/translator/grep_translator.py Mon Aug 23 20:00:04 2010 +0900 @@ -51,7 +51,7 @@ self.emiti( "if(%s(text))" % self.state_name(self.cg.start)) self.emit( "return 1;") self.emitd( r"} while (*text++ != '\0');") - self.emit("return 0;") + self.emitd("return 0;") self.emitd("}", 2) def emit_state(self, cur_state, transition):