Mercurial > hg > Members > shinya > pyrect
changeset 75:e06786b3c2dc
modify filtering algorithm, unloop string-compare!!
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 08 Nov 2010 03:45:14 +0900 |
parents | a6a0504dea7b |
children | dd6d2b9e48ad |
files | pyrect/jitgrep.py pyrect/translator/grep_translator.py |
diffstat | 2 files changed, 70 insertions(+), 19 deletions(-) [+] |
line wrap: on
line diff
--- a/pyrect/jitgrep.py Mon Nov 08 02:19:18 2010 +0900 +++ b/pyrect/jitgrep.py Mon Nov 08 03:45:14 2010 +0900 @@ -12,7 +12,7 @@ myusage = """%prog [--buf-size=size] [--dump] [--time] [--debug] [--cc=compiler] [-c] [-Olevel] regexp [file..] [--out=file] - [--thread=thread_num] [--disable-filter] + [--thread=thread_num] [--filter=algorithm] [--disable-booster]""" psr = OptionParser(usage=myusage) @@ -27,8 +27,8 @@ psr.add_option("--cflags", action="store", type="string", dest="cflags", default="-O3", help="Print compile/matching time.") psr.add_option("--time", action="store_true", dest="time", default=False, help="Print compile/matching time.") psr.add_option("--thread", action="store", type="string", dest="thread", default="0", metavar="FILE", help="number of thread.") - psr.add_option("--disable-filter", action="store_true", dest="no_filter", default=False, help="disable fixed-string filter(Boyer-Moore).") - psr.add_option("--disable-booster", action="store_true", dest="no_booster", default=False, help="disable booster.") + psr.add_option("--disable-booster", action="store_true", dest="no_boost", default=False, help="disable boosetr (default: use booster).") + psr.add_option("--filter", action="store", type="string", dest="filter", default="bmh", help="chose filtering-algorithm bmh(default), quick, or none.") psr.add_option("--debug", action="store_true", dest="debug", default=False, help="Dump commands, not evalute matching (except interactive mode).") psr.add_option("--label", action="store_true", dest="label", default=False, help="label implimentation in C.") psr.add_option("--dump", action="store_true", dest="dump", default=False, help="Dump generated grep-source.") @@ -75,8 +75,8 @@ grept = GoToGREPTranslator(reg) else: grept = GREPTranslator(reg) - grept.filter = not opts.no_filter - grept.skip_boost = not opts.no_booster + grept.filter = opts.filter + grept.skip_boost = not opts.no_boost grept.thread_line = int(opts.thread) grept.bufsize = bufsize
--- a/pyrect/translator/grep_translator.py Mon Nov 08 02:19:18 2010 +0900 +++ b/pyrect/translator/grep_translator.py Mon Nov 08 03:45:14 2010 +0900 @@ -26,7 +26,7 @@ self.__bufsize = 1024 * 1024 self.thread_dfa = 1 self.thread_line = 1 - self.filter = True + self.filter = "bmh" self.filter_only = False self.filter_prefix = False self.skip_boost = True @@ -39,7 +39,7 @@ def setbufsize(self, bufsize): self.__bufsize = abs(bufsize) - bufsize = property(getbufsize, setbufsize) + bufsize = property(getbufsize, setbufsize) def emit_initialization(self): self.emit("#include <stdio.h>") @@ -58,7 +58,9 @@ self.emit('void accept(%s);' % self.interface) key = None - if self.filter and self.regexp.must_words: + + if (self.filter == "bmh" or self.filter == "quick")\ + and self.regexp.must_words: key = max(self.regexp.must_words, key=len) if len(self.regexp.must_words) == 1 and len(key) == self.regexp.min_len: self.filter_only = True @@ -70,7 +72,10 @@ self.emit("void %s(%s);" % (self.state_name(state), self.interface)) self.emit() - if self.filter: self.emit_filter(key) + if self.filter == "bmh": + self.emit_bmh_filter(key) + else: + self.emit_quick_filter(key) if self.skip_boost and not self.filter_only and \ not AnyChar() in self.regexp.chars and \ @@ -82,7 +87,7 @@ grepsource = open(self.BASE_DIR + "/template/grep.c") self.emit(grepsource.read()) - def emit_filter(self, key): + def emit_bmh_filter(self, key): l = len(key) def emit_next(): if self.filter_only: @@ -97,7 +102,7 @@ self.emit("UCHARP get_line_beg(UCHARP p, UCHARP beg);", 2) - self.emiti("void bm_filter(%s) {" % self.interface) + self.emiti("void bmh_filter(%s) {" % self.interface) l = len(key) if l == 1: self.emit("buf = memchr(buf, %d, (end - buf));" % ord(key)) @@ -112,14 +117,12 @@ for i in range(l-1): skip[key[i]] = l-1-i - self.emit("UCHARP tmp1, *tmp2;", 2) + self.emit("UCHARP tmp;", 2) self.emit("int i; buf += %d;" % (l-1)) - self.emiti("while (buf < end) {") - self.emiti( "if (*buf == %d /*'%c'*/) {" % (ord(key[l-1]), key[l-1])) - self.emit( "tmp1 = key+%d; tmp2 = buf;" % (l-1)) - self.emit( "while (*(--tmp1) == *(--tmp2)) {") - self.emit( "if (tmp1 == key) goto next;") - self.emitd( "}") + self.emiti("while ((tmp = buf) < end) {") + self.emiti( "if (%s) {" % + " && ".join(reversed(["*tmp-- == %d" % ord(c) for c in key]))) + self.emit( "goto next;") self.emitd( "}") self.emiti( "switch(*buf) {") for k, v in skip.iteritems(): @@ -133,6 +136,54 @@ emit_next() self.emitd("}", 2) + def emit_quick_filter(self, key): + l = len(key) + def emit_next(): + if self.filter_only: + self.emit("accept(%s);" % self.args) + elif self.filter_prefix: + self.emit("buf+%d;" % l) + self.emit("%s(%s);" % (self.state_name(self.cg.start) ,self.args)) + else: + self.emit("beg = get_line_beg(buf, beg);") + self.emit("buf = beg;") + self.emit("%s(%s);" % (self.state_name(self.cg.start), self.args)) + + self.emit("UCHARP get_line_beg(UCHARP p, UCHARP beg);", 2) + + self.emiti("void quick_filter(%s) {" % self.interface) + l = len(key) + if l == 1: + self.emit("buf = memchr(buf, %d, (end - buf));" % ord(key)) + self.emit("if (buf == NULL) return;") + emit_next() + self.emitd("}", 2) + return + + self.emit('static UCHAR key[] = "%s";' % key) + + skip = dict() + for i in range(l): + skip[key[i]] = l-i + + self.emit("UCHARP tmp, *end_ = end - %d;" % l, 2) + self.emiti("while ((tmp = buf) <= end_) {") + self.emiti( "if (%s) {" % + " && ".join(["*tmp++ == %d" % ord(c) for c in key])) + self.emit( "goto next;") + self.emitd( "}") + self.emiti( "switch(buf[%d]) {" % l) + for k, v in skip.iteritems(): + self.emiti( "case %d: /* %s */" % (ord(k), Character.ascii(k))) + self.emit( "buf += %d; break;" % v), self.dedent() + self.emiti("default: buf += %d;" % (l+1)), self.dedent() + self.emitd( "}") + self.emitd("}") + self.emit( "return;") + self.emit( "next:") + emit_next() + self.emitd("}", 2) + def emit_booster(self, min_len, chars): self.emiti("void booster(%s) {" % self.interface) self.emiti( "do {") @@ -149,7 +200,7 @@ def emit_driver(self): self.emiti("void matcher(%s) {" % self.interface) if self.filter: - self.emit( "%s(%s);" % ("bm_filter", self.args)) + self.emit( "%s(%s);" % (self.filter + "_filter", self.args)) else: self.emit( "%s(%s);" % (self.state_name(self.cg.start), self.args)) self.emit( "return;")