changeset 90:8cfa81638130

buf-fix: goto booster possibly, and improve code-gen routine (add some usefull functions -> demiti, iemit,,).
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Tue, 16 Nov 2010 06:06:25 +0900
parents 933d422f21f0
children 19a88707bd29
files pyrect/translator/goto_grep_translator.py pyrect/translator/grep_translator.py pyrect/translator/translator.py
diffstat 3 files changed, 72 insertions(+), 90 deletions(-) [+]
line wrap: on
line diff
--- a/pyrect/translator/goto_grep_translator.py	Tue Nov 16 06:01:56 2010 +0900
+++ b/pyrect/translator/goto_grep_translator.py	Tue Nov 16 06:06:25 2010 +0900
@@ -13,9 +13,9 @@
     This Class can translate form DFA into grep source-code.
     which based on (beautiful) mini-grep introduced  \"The Practice of Programming\"
     written by Rob Pike & Brian W. Kernighan. (see template/grep.c)
-    >>> string = \"(def)\"
+    >>> string = \"(hoge|fuga|piyo)\"
     >>> reg = Regexp(string)
-    >>> tje = GREPLABELTranslator(reg)
+    >>> tje = GOTOGREPTranslator(reg)
     >>> tje.translate()
     """
 
@@ -35,6 +35,8 @@
         self.start = self.cg.start
         self.interface = "UCHARP beg, UCHARP buf, UCHARP end"
         self.args = "beg, buf, end"
+        self.thread_interface = "UCHARP beg, UCHARP buf, UCHARP end, thread_arg_t *targ"
+        self.thread_args = "beg, buf, end, targ"
 
     def getbufsize(self,):
         return self.__bufsize
@@ -50,8 +52,8 @@
         else:
             return "state_"+str(name)
 
-    def emit_goto(self, state):
-        self.emit("goto %s;" % self.state_name(state))
+    def emit_goto(self, state, n=1):
+        self.emit("goto %s;" % self.state_name(state), n)
 
     def emit_from_callgraph(self):
         # self.emit C-source code
@@ -64,7 +66,7 @@
         self.emit_accept_state()
         self.emit_reject_state()
         self.emit("return;")
-        self.emitd("}", 2)
+        self.demit("}", 1)
 
     def emit_initialization(self):
         self.emit("#include <stdio.h>")
@@ -74,7 +76,7 @@
         self.emit("#include <sys/stat.h>")
         self.emit("#include <fcntl.h>")
         self.emit("#include <unistd.h>")
-        self.emit("#include <string.h>", 2)
+        self.emit("#include <string.h>", 1)
 
         self.emit("typedef unsigned char   UCHAR;")
         self.emit("typedef unsigned char *UCHARP;")
@@ -108,7 +110,7 @@
     def emit_bmh_filter(self, key):
         l = len(key)
 
-        self.emiti("bmh_filter:")
+        self.demiti("bmh_filter:")
 
         def emit_next():
             if self.filter_only:
@@ -126,14 +128,14 @@
             self.emit("buf = memchr(buf, %d, (end - buf));" % ord(key))
             self.emit("if (buf == NULL) return;")
             emit_next()
-            self.emitd("", 2)
+            self.demit("", 1)
             return
 
         skip = dict()
         for i in range(l-1):
             skip[key[i]] = l-1-i
 
-        self.emit("buf += %d;" % (l-1), 2)
+        self.emit("buf += %d;" % (l-1), 1)
 
         self.emiti("while (buf < end) {")
         self.emiti(  "if (*buf == %d /* %s */) {" % (ord(key[-1]), Character.ascii(key[-1])))
@@ -141,23 +143,23 @@
         self.emiti(    "while (*(--tmp1) == *(--tmp2)) {")
         self.emiti(       "if (tmp2 == key) {")
         emit_next()
-        self.emitd(       "}")
-        self.emitd(    "}")
-        self.emitd(  "}")
+        self.demit(       "}")
+        self.demit(    "}")
+        self.demit(  "}")
         self.emiti(  "switch(*buf) {")
         for k, v in skip.iteritems():
-            self.emiti(  "case %d: /* %s */" % (ord(k), Character.ascii(k)))
-            self.emit(     "buf += %d; break;" % v), self.dedent()
-        self.emiti("default: buf += %d;" % l), self.dedent()
-        self.emitd(  "}")
-        self.emitd("}")
+            self.demiti(  "case %d: /* %s */" % (ord(k), Character.ascii(k)))
+            self.emit(     "buf += %d; break;" % v)
+        self.emiti("default: buf += %d;" % l)
+        self.demit(  "}")
+        self.demit("}")
         self.emit( "return;")
-        self.emitd("", 2)
+        self.demit("", 1)
 
     def emit_quick_filter(self, key):
         l = len(key)
 
-        self.emiti("quick_filter:")
+        self.demiti("quick_filter:")
 
         def emit_next():
             if self.filter_only:
@@ -175,54 +177,54 @@
             self.emit("buf = memchr(buf, %d, (end - buf));" % ord(key))
             self.emit("if (buf == NULL) return;")
             emit_next()
-            self.emitd("}", 2)
+            self.demit("}", 1)
             return
 
         skip = dict()
         for i in range(l):
             skip[key[i]] = l-i
 
-        self.emit("end_ = end - %d;" % (l-1),  2)
+        self.emit("end_ = end - %d;" % (l-1),  1)
         self.emiti("while (buf < end_) {")
         self.emiti(  "if (*buf == %d /* %s */) {" % (ord(key[0]), Character.ascii(key[0])))
         self.emit(     "tmp1 = buf, tmp2 = (UCHARP)key;")
         self.emiti(    "while (*(++tmp1) == *(++tmp2)){")
         self.emiti(       "if (tmp2 == key+%d) {" % (l-1))
         emit_next()
-        self.emitd(       "}")
-        self.emitd(    "}")
-        self.emitd(  "}")
+        self.demit(       "}")
+        self.demit(    "}")
+        self.demit(  "}")
         self.emiti(  "switch(buf[%d]) {" % l)
         for k, v in skip.iteritems():
-            self.emiti(  "case %d: /* %s */" % (ord(k), Character.ascii(k)))
-            self.emit(     "buf += %d; break;" % v), self.dedent()
-        self.emiti("default: buf += %d;" % (l+1)), self.dedent()
-        self.emitd(  "}")
-        self.emitd("}")
+            self.demiti(  "case %d: /* %s */" % (ord(k), Character.ascii(k)))
+            self.emit(     "buf += %d; break;" % v)
+        self.emiti("default: buf += %d;" % (l+1))
+        self.demit(  "}")
+        self.demit("}")
         self.emit( "return;")
-        self.emitd("", 2)
+        self.demit("", 1)
 
     def emit_booster(self, min_len, chars):
-        self.emiti("booster:")
+        self.demiti("booster:")
         self.emit(   "end_ = end - %d;" % (min_len-1))
         self.emit(   "if (buf > end_) return;")
         self.emiti(  "do {")
         self.emiti(    "switch (buf[%d]) {" % (min_len-1))
         for c in chars:
-            self.emit(   "case %d: /* %s */" % (ord(c), Character.ascii(c)))
+            self.demiti(   "case %d: /* %s */" % (ord(c), Character.ascii(c)))
         self.emit_goto(self.cg.start)
-        self.emitd(    "}")
-        self.emitd(  "} while((buf += %d) <= end_);" % min_len)
+        self.demit(    "}")
+        self.demit(  "} while((buf += %d) <= end_);" % min_len)
         self.emit("return;")
-        self.emit("", 2)
+        self.emit("", 1)
 
     def emit_driver(self):
-        self.emit("UCHARP end_, ret;")
+        self.emit("UCHARP end_, ret;", 2)
         if self.skip_boost:
             self.emit_booster(self.regexp.min_len, self.regexp.chars)
         if self.filter:
             self.emit("UCHARP tmp1, tmp2; static const UCHAR key[] = \"%s\";" % self.filter_key)
-            self.emit_goto(self.filter + "_filter")
+            self.emit_goto(self.filter + "_filter", 2)
             if self.filter == "bmh":
                 self.emit_bmh_filter(self.filter_key)
             else:
@@ -232,25 +234,25 @@
         return
 
     def emit_accept_state(self):
-        self.emiti("accept:")
+        self.demiti("accept:")
         self.emit(   "ret = (UCHARP)memchr(buf, '\\n', (buf - end));")
         if self.skip_boost or self.filter:
             self.emit(   "beg = get_line_beg(buf, beg);")
         self.emiti(  "if (ret == NULL) {")
         self.emit(     "print_line(beg, end);")
         self.emit(     "return;")
-        self.emitd(  "}")
+        self.demit(  "}")
         self.emit(   "print_line(beg, ret);")
         self.emit(   "beg = buf = ret + 1;")
         self.emit_goto(self.start)
-        self.emitd("", 2)
+        self.demit("", 1)
 
     def emit_reject_state(self):
-        self.emiti("reject:")
+        self.demiti("reject:")
         self.emit(   "if (buf >= end) return;")
         self.emit(   "beg = buf;")
         self.emit_goto(self.start)
-        self.emitd("")
+        self.demit("")
 
     def emit_switch(self, case, default=None):
         if not case:
@@ -259,24 +261,25 @@
             return
         self.emiti("switch(*buf++) {")
         for case, next_ in case.iteritems():
-            self.trans_stmt.emit(case, self.state_name(next_))
-        if default == self.state_name(self.cg.start) and self.skip_boost:
-            self.emit("default: ")
+            self.demiti("case %d: /* %s */" % (case.char, case))
+            self.emit_goto(next_)
+        if default == self.cg.start and self.skip_boost:
+            self.demiti("default: ")
             self.emit_goto("booster")
         else:
-            self.emit("default: ")
+            self.demiti("default: ")
             self.emit_goto(default)
-        self.emitd("}")
+        self.demit("}")
 
     def emit_state(self, cur_state, transition):
         if self.filter_only: return
 
-        self.emiti("%s:" % self.state_name(cur_state))
+        self.demiti("%s:" % self.state_name(cur_state))
 
         if cur_state in self.cg.accepts:
             self.emit(   "buf--;")
             self.emit_goto("accept")
-            self.emitd("", 2)
+            self.demit("", 1)
             return
 
         if transition.has_key(AnyChar()):
@@ -296,7 +299,7 @@
                 self.emit(   "static const void *%s_table[256] = {%s};"
                              % (self.state_name(cur_state), ", ".join(["&&"+x for x in tbl])))
                 self.emit(   "goto *%s_table[*buf++];" % (self.state_name(cur_state), self.args))
-                self.emitd("", 2)
+                self.demit("", 1)
             return
 
         for eol in self.eols:
@@ -308,44 +311,7 @@
 
         self.emit_switch(transition, default)
 
-        self.emitd("", 2)
-
-    class _trans_stmt(ASTWalker):
-        def __init__(self, emit):
-            self._emit = emit
-            self.args = "beg, buf, end"
-
-        def emit(self, input_node, next_):
-            self.next = next_
-            input_node.accept(self)
-
-        def visit(self, input_node):
-            self._emit("/* UNKNOW RULE */")
-            self._emit("/* %s */" % input_node.__repr__())
-
-        def visit_Character(self, char):
-            self._emit("case %d: /* %s */" % (char.char, char))
-            self._emit("  goto %s;" % self.next)
-
-        # Special Rule
-        def visit_BegLine(self, begline):
-            self._emit("/* begin of line  */")
-            self._emit("if (buf == beg)")
-            self._emit("  goto %s;" % self.next, 2)
-
-        def visit_Range(self, range):
-            if isinstance(range.lower, MBCharacter) and not \
-               isinstance(range.upper, MBCharacter) or  \
-               isinstance(range.upper, MBCharacter) and not \
-               isinstance(range.lower, MBCharacter):
-                return
-
-            if isinstance(range.lower, MBCharacter):
-                self.visit(range)
-            else:
-                self._emit("if ('%s' <= *buf && *buf <= '%s')" % (range.lower.char, range.upper.char))
-                self._emit("  buf++;")
-                self._emit("  goto %s;" % self.next, 2)
+        self.demit("", 1)
 
 def test():
     import doctest
--- a/pyrect/translator/grep_translator.py	Tue Nov 16 06:01:56 2010 +0900
+++ b/pyrect/translator/grep_translator.py	Tue Nov 16 06:06:25 2010 +0900
@@ -109,7 +109,7 @@
             self.emit("buf = memchr(buf, %d, (end - buf));" % ord(key))
             self.emit("if (buf == NULL) return;")
             emit_next()
-            self.emitd("}", 2)
+            self.demit("}", 2)
             return
 
         self.emit('static const UCHAR key[] = "%s";' % key)
--- a/pyrect/translator/translator.py	Tue Nov 16 06:01:56 2010 +0900
+++ b/pyrect/translator/translator.py	Tue Nov 16 06:06:25 2010 +0900
@@ -23,12 +23,28 @@
         self.emit(*arg)
         self.indent()
 
+    def iemit(self, *arg):
+        self.indent()
+        self.emit(*arg)
+
     def emitd(self, *arg):
+        self.emit(*arg)
+        self.dedent()
+
+    def demit(self, *arg):
         self.dedent()
         self.emit(*arg)
 
+    def iemitd(self, *arg):
+        self.iemit0(*arg)
+        self.dedent()
+
+    def demiti(self, *arg):
+        self.demit(*arg)
+        self.indent()
+
     def emit0(self, string):
-        self.stream.write(self.tab * self.__indent + string)
+        self.stream.write(self.tab*self.__indent + string)
 
     def state_name(self, state_name):
         return str(state_name)