Mercurial > hg > Members > shinya > pyrect
changeset 106:8102bf4bbec6
modify range stmt.
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 14 Dec 2010 15:02:25 +0900 |
parents | 14faa199c3bf |
children | 492daa4d7fa5 |
files | pyrect/jitgrep.py pyrect/regexp/__init__.py pyrect/regexp/analyzer.py pyrect/regexp/ast.py pyrect/regexp/char_collector.py pyrect/translator/cbc_grep_translator.py pyrect/translator/grep_translator.py |
diffstat | 7 files changed, 30 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/pyrect/jitgrep.py Tue Dec 14 11:49:59 2010 +0900 +++ b/pyrect/jitgrep.py Tue Dec 14 15:02:25 2010 +0900 @@ -73,8 +73,10 @@ if opts.time : start_time = time.time() + chars = Regexp.get_chars(string) + chars = Regexp.get_chars(string) reg = Regexp(".*"+string) - reg.chars = Regexp.get_chars(string) + reg.chars = chars (reg.max_len, _, _) = Regexp.get_analyze(string) if opts.cbc:
--- a/pyrect/regexp/__init__.py Tue Dec 14 11:49:59 2010 +0900 +++ b/pyrect/regexp/__init__.py Tue Dec 14 15:02:25 2010 +0900 @@ -47,7 +47,8 @@ @classmethod def parse(cls, regexp): psr = Parser() - return psr.parse(regexp) + ast = psr.parse(regexp) + return ast def matches(self, string): runtime = self.dfa.get_runtime()
--- a/pyrect/regexp/analyzer.py Tue Dec 14 11:49:59 2010 +0900 +++ b/pyrect/regexp/analyzer.py Tue Dec 14 15:02:25 2010 +0900 @@ -69,6 +69,9 @@ (m, _, _) = qmark.op.accept(self) return m, 0, ["", ""] + def visit_CharClass(self, cclass): + return 1, 1, ["", ""] + def test(): import doctest doctest.testmod()
--- a/pyrect/regexp/ast.py Tue Dec 14 11:49:59 2010 +0900 +++ b/pyrect/regexp/ast.py Tue Dec 14 15:02:25 2010 +0900 @@ -195,6 +195,18 @@ self.inverse = inverse self.factor = factor + def get_chars(self): + char = set() + for f in self.factor: + if type(f) == Range: + for ff in f: + char.add(ff) + else: + char.add(f.char) + if self.inverse: + char = set(range(256)) - char + return char + def __repr__(self): return self.__class__.__name__+"[%s]" \ % ",".join((s.__repr__() for s in self.factor)) @@ -210,6 +222,10 @@ self.lower = lower self.upper = upper + def __iter__(self): + for c in range(self.lower.char, self.upper.char+1): + yield c + def __contains__(self, input_node): if isinstance(input_node, Character): self.lower
--- a/pyrect/regexp/char_collector.py Tue Dec 14 11:49:59 2010 +0900 +++ b/pyrect/regexp/char_collector.py Tue Dec 14 15:02:25 2010 +0900 @@ -36,6 +36,10 @@ def visit_AnyChar(self, anychar): return [anychar] + def visit_CharClass(self, cclass): + chars = [chr(x) for x in cclass.get_chars()] + return chars + def visit_Character(self, char): return [chr(char.char)]
--- a/pyrect/translator/cbc_grep_translator.py Tue Dec 14 11:49:59 2010 +0900 +++ b/pyrect/translator/cbc_grep_translator.py Tue Dec 14 15:02:25 2010 +0900 @@ -88,7 +88,7 @@ if self.skip_boost and not self.filter_only and \ not AnyChar() in self.regexp.chars and \ - self.regexp.min_len > 2: + self.regexp.min_len >= 2: self.emit_booster(self.regexp.min_len, self.regexp.chars) else: self.skip_boost = False
--- a/pyrect/translator/grep_translator.py Tue Dec 14 11:49:59 2010 +0900 +++ b/pyrect/translator/grep_translator.py Tue Dec 14 15:02:25 2010 +0900 @@ -81,7 +81,7 @@ if self.skip_boost and not self.filter_only and \ not AnyChar() in self.regexp.chars and \ - self.regexp.min_len > 2: + self.regexp.min_len >= 2: self.emit_booster(self.regexp.min_len, self.regexp.chars) else: self.skip_boost = False