Mercurial > hg > Members > shinya > pyrect
view pyrect/regexp/analyzer.py @ 55:4ae288b37591
ddd analyzer. analyzer can analyzing to regexp max-length.
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 26 Oct 2010 16:37:43 +0900 |
parents | |
children | 81b44ae1cd73 |
line wrap: on
line source
#!/usr/bin/env python """ Extract Keywords from AST. Keywords, which are necessary words to be accepted with Regular-Expression. and which are used to Fixed-String-Filtering (ex: Boyer-Moore). kwset is also used in GNU-GREP. """ from pyrect.regexp.parser import Parser from pyrect.regexp.ast import ASTWalker class Analyzer(ASTWalker): """ Extract with Visitor-Pattern. AST (ast), is represented by Node-Tree. >>> prs = Parser() >>> an = Analyzer() >>> an.analyze(prs.parse('fixed-string')) 12 >>> an.analyze(prs.parse('(build|fndecl|gcc)')) 6 >>> an.analyze(prs.parse('(AB|CD)*123')) inf >>> an.analyze(prs.parse('((12)*|3)|456')) inf >>> an.analyze(prs.parse('(plus)?(qmark)?')) 9 """ def __init__(self): self.maxlen = 0 def analyze(self, ast=None): if ast: self.maxlen = ast.accept(self) return self.maxlen def visit(self, ast): """Following Classes contain no-Keywords. Union, Star """ return 1 def visit_Concat(self, concat): a1 = concat.op1.accept(self) a2 = concat.op2.accept(self) return a1 + a2 def visit_Union(self, union): a1 = union.op1.accept(self) a2 = union.op2.accept(self) return max(a1, a2) def visit_Star(self, star): return float("inf") def visit_Plus(self, plus): return float("inf") def visit_Qmark(self, qmark): return qmark.op.accept(self) def test(): import doctest doctest.testmod() if __name__ == "__main__": test()