# HG changeset patch # User Ryoma SHINYA # Date 1289067043 -32400 # Node ID 56a997f2c1219fd91885293525734caac121e104 # Parent b02b321d0e062e2e9b37307ee9fc59a054b6ec24 improve codegen. remove needless code (when filter-only, no need to emit dfa-code). diff -r b02b321d0e06 -r 56a997f2c121 pyrect/translator/grep_translator.py --- a/pyrect/translator/grep_translator.py Sun Nov 07 01:48:19 2010 +0900 +++ b/pyrect/translator/grep_translator.py Sun Nov 07 03:10:43 2010 +0900 @@ -27,6 +27,7 @@ self.thread_dfa = 1 self.thread_line = 1 self.filter = True + self.filter_only = False self.start = "matcher" self.interface = "UCHARP beg, UCHARP buf, UCHARP end" self.args = "beg, buf, end" @@ -50,7 +51,7 @@ self.emit("#include ") self.emit("#include ") - self.emit_skip() + #self.emit_skip() for state in self.cg.map.iterkeys(): self.emit("void %s(%s);" % (self.state_name(state), self.interface)) @@ -65,6 +66,9 @@ grepsource = open(self.BASE_DIR + "/template/grep.c") self.emit(grepsource.read()) + #TODO: filter is faster than dfa in matching. + #but slower than that in compiling. + #We have to improve this problem. def emit_filter(self, words): def longest(s1, s2): if len(s1) >= len(s2): @@ -75,16 +79,15 @@ key = reduce(longest, words) if len(words) == 1 and len(key) == self.regexp.min_len: - filter_only = True + self.filter_only = True else: - filter_only = False filter_prefix = False def emit_next(): self.emit("beg = memrchr(buf, '\\n', beg);") - if filter_only: + if self.filter_only: self.emit("accept(%s);" % self.args) - elif filter_prefix: + elif self.filter_prefix: self.emit("buf -= %d;" % len(key)) self.emit("dfa(%s);" % self.args) else: @@ -106,26 +109,23 @@ for i in range(l - 1): skip[ord(key[i])] = str(l-1-i) - self.emit('static UCHAR key[] = "%s";' % key) - self.emiti( "static int skip[256] = {") + self.emit('static const UCHAR key[] = "%s";' % key) + + self.emiti( "static const UCHAR skip[256] = {") for i in range(8): i = i * 32 self.emit(",".join(skip[i:i+32]) + ",") self.emitd( "};") - self.emit("UCHARP tmp; register UCHAR c;", 2) - self.emit("int i; buf += %d;" % (l-1)) - self.emiti("while (buf < end) {") + self.emit("UCHARP tmp = buf; UCHAR c;", 2) + self.emit("int i; buf += %d - skip[c];" % (l-1)) + self.emiti("while ((buf += skip[c]) < end) {") self.emiti( "if ((c = *buf) == %d /*'%c'*/) {" % (ord(key[l-1]), key[l-1])) self.emit( "i = %d; tmp = buf;" % (l-1)) - self.emiti( "while (key[--i] == *(--tmp)) {") - self.emiti( "if (i == 0) {") - self.emit( "beg = memrchr(buf, '\\n', beg);") - self.emit( "goto next;") - self.emitd( "}") + self.emit( "while (key[--i] == *(--tmp)) {") + self.emit( "if (i == 0) goto next;") self.emitd( "}") self.emitd( "}") - self.emit( "buf += skip[c];") self.emitd("}") self.emit( "return;") self.emit( "next:") @@ -144,8 +144,6 @@ def emit_accept_state(self): self.emiti("void accept(%s) {" % self.interface) - #self.emit( "printf(\"*beg = %c, *buf = %c, *end = %c; \\n\", *beg, *buf, *end);") - #self.emit( "printf(\"beg = %x, buf = %x, end = %x; \\n\", beg, buf, end);") self.emit( "buf--;") self.emit( "UCHARP ret = (UCHARP)memchr(buf, '\\n', (buf - end));") self.emit( 'if (ret == NULL) {fprintf(stderr, "memchr NULL err!"); exit(0);}') @@ -180,6 +178,8 @@ self.emitd("}") def emit_state(self, cur_state, transition): + if self.filter_only: return + self.emiti("void %s(%s) {" % (self.state_name(cur_state), self.interface)) if cur_state in self.cg.accepts: