Mercurial > hg > Members > nobuyasu > test
diff pyrect/pyrect/regexp/analyzer.py @ 9:493c96d030c0
add pyrect
author | nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 14 Jun 2011 17:24:03 +0900 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/pyrect/regexp/analyzer.py Tue Jun 14 17:24:03 2011 +0900 @@ -0,0 +1,79 @@ +#!/usr/bin/env python + +""" +Extract Keywords from AST. Keywords, +which are necessary words to be accepted with Regular-Expression. +and which are used to Fixed-String-Filtering (ex: Boyer-Moore). +kwset is also used in GNU-GREP. +""" + +from pyrect.regexp.parser import Parser +from pyrect.regexp.ast import ASTWalker, Plus + +class Analyzer(ASTWalker): + """ Extract with Visitor-Pattern. + AST (ast), is represented by Node-Tree. + >>> prs = Parser() + >>> an = Analyzer() + >>> an.analyze(prs.parse('fixed-string')) + (12, 12, ['fixed-string']) + >>> an.analyze(prs.parse('(build|fndecl|gcc)')) + (6, 3, []) + >>> an.analyze(prs.parse('123(AB|CD)*456')) + (inf, 6, ['123', '456']) + >>> an.analyze(prs.parse('((12)+|3)|456')) + (inf, 1, []) + >>> an.analyze(prs.parse('^(plus)?(qmark)?')) + (9, 0, []) + >>> an.analyze(prs.parse('\*+ \[\[')) + (inf, 4, ['* [[']) + """ + + def __init__(self, ast=None): + if ast: + self.analyze(ast) + else: + self.max_len = 0 + self.min_len = 0 + + def analyze(self, ast=None): + if ast: + self.max_len, self.min_len, self.must_words = ast.accept(self) + self.must_words = [x for x in self.must_words if x != ""] + return self.max_len, self.min_len, self.must_words + + def visit(self, ast): + """Following Classes contain no-Keywords. + Union, Star + """ + return 1, 1, [str(ast)] + + def visit_Character(self, ast): + return 1, 1, [chr(ast.char)] + + def concat(self, (max1, min1, key1), (max2, min2, key2)): + return max1 + max2, min1 + min2, key1[0:-1] \ + + ([key1[-1] + key2[0]]) + key2[1:] + + def union(self, (max1, min1, _), (max2, min2, __)): + return max(max1, max2), min(min1, min2), ["", ""] + + def visit_Star(self, star): + return float("inf"), 0, ["", ""] + + def visit_Plus(self, plus): + (_, m, k) = plus.op.accept(self) + return float("inf"), m, k + [""] + k + + def visit_Qmark(self, qmark): + (m, _, _) = qmark.op.accept(self) + return m, 0, ["", ""] + + def visit_CharClass(self, cclass): + return 1, 1, ["", ""] + +def test(): + import doctest + doctest.testmod() + +if __name__ == "__main__": test()