Mercurial > hg > Members > shinya > pyrect
changeset 90:8cfa81638130
buf-fix: goto booster possibly, and improve code-gen routine (add some usefull functions -> demiti, iemit,,).
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 16 Nov 2010 06:06:25 +0900 |
parents | 933d422f21f0 |
children | 19a88707bd29 |
files | pyrect/translator/goto_grep_translator.py pyrect/translator/grep_translator.py pyrect/translator/translator.py |
diffstat | 3 files changed, 72 insertions(+), 90 deletions(-) [+] |
line wrap: on
line diff
--- a/pyrect/translator/goto_grep_translator.py Tue Nov 16 06:01:56 2010 +0900 +++ b/pyrect/translator/goto_grep_translator.py Tue Nov 16 06:06:25 2010 +0900 @@ -13,9 +13,9 @@ This Class can translate form DFA into grep source-code. which based on (beautiful) mini-grep introduced \"The Practice of Programming\" written by Rob Pike & Brian W. Kernighan. (see template/grep.c) - >>> string = \"(def)\" + >>> string = \"(hoge|fuga|piyo)\" >>> reg = Regexp(string) - >>> tje = GREPLABELTranslator(reg) + >>> tje = GOTOGREPTranslator(reg) >>> tje.translate() """ @@ -35,6 +35,8 @@ self.start = self.cg.start self.interface = "UCHARP beg, UCHARP buf, UCHARP end" self.args = "beg, buf, end" + self.thread_interface = "UCHARP beg, UCHARP buf, UCHARP end, thread_arg_t *targ" + self.thread_args = "beg, buf, end, targ" def getbufsize(self,): return self.__bufsize @@ -50,8 +52,8 @@ else: return "state_"+str(name) - def emit_goto(self, state): - self.emit("goto %s;" % self.state_name(state)) + def emit_goto(self, state, n=1): + self.emit("goto %s;" % self.state_name(state), n) def emit_from_callgraph(self): # self.emit C-source code @@ -64,7 +66,7 @@ self.emit_accept_state() self.emit_reject_state() self.emit("return;") - self.emitd("}", 2) + self.demit("}", 1) def emit_initialization(self): self.emit("#include <stdio.h>") @@ -74,7 +76,7 @@ self.emit("#include <sys/stat.h>") self.emit("#include <fcntl.h>") self.emit("#include <unistd.h>") - self.emit("#include <string.h>", 2) + self.emit("#include <string.h>", 1) self.emit("typedef unsigned char UCHAR;") self.emit("typedef unsigned char *UCHARP;") @@ -108,7 +110,7 @@ def emit_bmh_filter(self, key): l = len(key) - self.emiti("bmh_filter:") + self.demiti("bmh_filter:") def emit_next(): if self.filter_only: @@ -126,14 +128,14 @@ self.emit("buf = memchr(buf, %d, (end - buf));" % ord(key)) self.emit("if (buf == NULL) return;") emit_next() - self.emitd("", 2) + self.demit("", 1) return skip = dict() for i in range(l-1): skip[key[i]] = l-1-i - self.emit("buf += %d;" % (l-1), 2) + self.emit("buf += %d;" % (l-1), 1) self.emiti("while (buf < end) {") self.emiti( "if (*buf == %d /* %s */) {" % (ord(key[-1]), Character.ascii(key[-1]))) @@ -141,23 +143,23 @@ self.emiti( "while (*(--tmp1) == *(--tmp2)) {") self.emiti( "if (tmp2 == key) {") emit_next() - self.emitd( "}") - self.emitd( "}") - self.emitd( "}") + self.demit( "}") + self.demit( "}") + self.demit( "}") self.emiti( "switch(*buf) {") for k, v in skip.iteritems(): - self.emiti( "case %d: /* %s */" % (ord(k), Character.ascii(k))) - self.emit( "buf += %d; break;" % v), self.dedent() - self.emiti("default: buf += %d;" % l), self.dedent() - self.emitd( "}") - self.emitd("}") + self.demiti( "case %d: /* %s */" % (ord(k), Character.ascii(k))) + self.emit( "buf += %d; break;" % v) + self.emiti("default: buf += %d;" % l) + self.demit( "}") + self.demit("}") self.emit( "return;") - self.emitd("", 2) + self.demit("", 1) def emit_quick_filter(self, key): l = len(key) - self.emiti("quick_filter:") + self.demiti("quick_filter:") def emit_next(): if self.filter_only: @@ -175,54 +177,54 @@ self.emit("buf = memchr(buf, %d, (end - buf));" % ord(key)) self.emit("if (buf == NULL) return;") emit_next() - self.emitd("}", 2) + self.demit("}", 1) return skip = dict() for i in range(l): skip[key[i]] = l-i - self.emit("end_ = end - %d;" % (l-1), 2) + self.emit("end_ = end - %d;" % (l-1), 1) self.emiti("while (buf < end_) {") self.emiti( "if (*buf == %d /* %s */) {" % (ord(key[0]), Character.ascii(key[0]))) self.emit( "tmp1 = buf, tmp2 = (UCHARP)key;") self.emiti( "while (*(++tmp1) == *(++tmp2)){") self.emiti( "if (tmp2 == key+%d) {" % (l-1)) emit_next() - self.emitd( "}") - self.emitd( "}") - self.emitd( "}") + self.demit( "}") + self.demit( "}") + self.demit( "}") self.emiti( "switch(buf[%d]) {" % l) for k, v in skip.iteritems(): - self.emiti( "case %d: /* %s */" % (ord(k), Character.ascii(k))) - self.emit( "buf += %d; break;" % v), self.dedent() - self.emiti("default: buf += %d;" % (l+1)), self.dedent() - self.emitd( "}") - self.emitd("}") + self.demiti( "case %d: /* %s */" % (ord(k), Character.ascii(k))) + self.emit( "buf += %d; break;" % v) + self.emiti("default: buf += %d;" % (l+1)) + self.demit( "}") + self.demit("}") self.emit( "return;") - self.emitd("", 2) + self.demit("", 1) def emit_booster(self, min_len, chars): - self.emiti("booster:") + self.demiti("booster:") self.emit( "end_ = end - %d;" % (min_len-1)) self.emit( "if (buf > end_) return;") self.emiti( "do {") self.emiti( "switch (buf[%d]) {" % (min_len-1)) for c in chars: - self.emit( "case %d: /* %s */" % (ord(c), Character.ascii(c))) + self.demiti( "case %d: /* %s */" % (ord(c), Character.ascii(c))) self.emit_goto(self.cg.start) - self.emitd( "}") - self.emitd( "} while((buf += %d) <= end_);" % min_len) + self.demit( "}") + self.demit( "} while((buf += %d) <= end_);" % min_len) self.emit("return;") - self.emit("", 2) + self.emit("", 1) def emit_driver(self): - self.emit("UCHARP end_, ret;") + self.emit("UCHARP end_, ret;", 2) if self.skip_boost: self.emit_booster(self.regexp.min_len, self.regexp.chars) if self.filter: self.emit("UCHARP tmp1, tmp2; static const UCHAR key[] = \"%s\";" % self.filter_key) - self.emit_goto(self.filter + "_filter") + self.emit_goto(self.filter + "_filter", 2) if self.filter == "bmh": self.emit_bmh_filter(self.filter_key) else: @@ -232,25 +234,25 @@ return def emit_accept_state(self): - self.emiti("accept:") + self.demiti("accept:") self.emit( "ret = (UCHARP)memchr(buf, '\\n', (buf - end));") if self.skip_boost or self.filter: self.emit( "beg = get_line_beg(buf, beg);") self.emiti( "if (ret == NULL) {") self.emit( "print_line(beg, end);") self.emit( "return;") - self.emitd( "}") + self.demit( "}") self.emit( "print_line(beg, ret);") self.emit( "beg = buf = ret + 1;") self.emit_goto(self.start) - self.emitd("", 2) + self.demit("", 1) def emit_reject_state(self): - self.emiti("reject:") + self.demiti("reject:") self.emit( "if (buf >= end) return;") self.emit( "beg = buf;") self.emit_goto(self.start) - self.emitd("") + self.demit("") def emit_switch(self, case, default=None): if not case: @@ -259,24 +261,25 @@ return self.emiti("switch(*buf++) {") for case, next_ in case.iteritems(): - self.trans_stmt.emit(case, self.state_name(next_)) - if default == self.state_name(self.cg.start) and self.skip_boost: - self.emit("default: ") + self.demiti("case %d: /* %s */" % (case.char, case)) + self.emit_goto(next_) + if default == self.cg.start and self.skip_boost: + self.demiti("default: ") self.emit_goto("booster") else: - self.emit("default: ") + self.demiti("default: ") self.emit_goto(default) - self.emitd("}") + self.demit("}") def emit_state(self, cur_state, transition): if self.filter_only: return - self.emiti("%s:" % self.state_name(cur_state)) + self.demiti("%s:" % self.state_name(cur_state)) if cur_state in self.cg.accepts: self.emit( "buf--;") self.emit_goto("accept") - self.emitd("", 2) + self.demit("", 1) return if transition.has_key(AnyChar()): @@ -296,7 +299,7 @@ self.emit( "static const void *%s_table[256] = {%s};" % (self.state_name(cur_state), ", ".join(["&&"+x for x in tbl]))) self.emit( "goto *%s_table[*buf++];" % (self.state_name(cur_state), self.args)) - self.emitd("", 2) + self.demit("", 1) return for eol in self.eols: @@ -308,44 +311,7 @@ self.emit_switch(transition, default) - self.emitd("", 2) - - class _trans_stmt(ASTWalker): - def __init__(self, emit): - self._emit = emit - self.args = "beg, buf, end" - - def emit(self, input_node, next_): - self.next = next_ - input_node.accept(self) - - def visit(self, input_node): - self._emit("/* UNKNOW RULE */") - self._emit("/* %s */" % input_node.__repr__()) - - def visit_Character(self, char): - self._emit("case %d: /* %s */" % (char.char, char)) - self._emit(" goto %s;" % self.next) - - # Special Rule - def visit_BegLine(self, begline): - self._emit("/* begin of line */") - self._emit("if (buf == beg)") - self._emit(" goto %s;" % self.next, 2) - - def visit_Range(self, range): - if isinstance(range.lower, MBCharacter) and not \ - isinstance(range.upper, MBCharacter) or \ - isinstance(range.upper, MBCharacter) and not \ - isinstance(range.lower, MBCharacter): - return - - if isinstance(range.lower, MBCharacter): - self.visit(range) - else: - self._emit("if ('%s' <= *buf && *buf <= '%s')" % (range.lower.char, range.upper.char)) - self._emit(" buf++;") - self._emit(" goto %s;" % self.next, 2) + self.demit("", 1) def test(): import doctest
--- a/pyrect/translator/grep_translator.py Tue Nov 16 06:01:56 2010 +0900 +++ b/pyrect/translator/grep_translator.py Tue Nov 16 06:06:25 2010 +0900 @@ -109,7 +109,7 @@ self.emit("buf = memchr(buf, %d, (end - buf));" % ord(key)) self.emit("if (buf == NULL) return;") emit_next() - self.emitd("}", 2) + self.demit("}", 2) return self.emit('static const UCHAR key[] = "%s";' % key)
--- a/pyrect/translator/translator.py Tue Nov 16 06:01:56 2010 +0900 +++ b/pyrect/translator/translator.py Tue Nov 16 06:06:25 2010 +0900 @@ -23,12 +23,28 @@ self.emit(*arg) self.indent() + def iemit(self, *arg): + self.indent() + self.emit(*arg) + def emitd(self, *arg): + self.emit(*arg) + self.dedent() + + def demit(self, *arg): self.dedent() self.emit(*arg) + def iemitd(self, *arg): + self.iemit0(*arg) + self.dedent() + + def demiti(self, *arg): + self.demit(*arg) + self.indent() + def emit0(self, string): - self.stream.write(self.tab * self.__indent + string) + self.stream.write(self.tab*self.__indent + string) def state_name(self, state_name): return str(state_name)