view pyrect/regexp/analyzer.py @ 55:4ae288b37591

ddd analyzer. analyzer can analyzing to regexp max-length.
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Tue, 26 Oct 2010 16:37:43 +0900
parents
children 81b44ae1cd73
line wrap: on
line source

#!/usr/bin/env python

"""
Extract Keywords from AST. Keywords,
which are necessary words to be accepted with Regular-Expression.
and which are used to Fixed-String-Filtering (ex: Boyer-Moore).
kwset is also used in GNU-GREP.
"""

from pyrect.regexp.parser import Parser
from pyrect.regexp.ast import ASTWalker

class Analyzer(ASTWalker):
    """ Extract with Visitor-Pattern.
    AST (ast), is represented by Node-Tree.
    >>> prs = Parser()
    >>> an  = Analyzer()
    >>> an.analyze(prs.parse('fixed-string'))
    12
    >>> an.analyze(prs.parse('(build|fndecl|gcc)'))
    6
    >>> an.analyze(prs.parse('(AB|CD)*123'))
    inf
    >>> an.analyze(prs.parse('((12)*|3)|456'))
    inf
    >>> an.analyze(prs.parse('(plus)?(qmark)?'))
    9
    """
    def __init__(self):
        self.maxlen = 0

    def analyze(self, ast=None):
        if ast:
            self.maxlen = ast.accept(self)
        return self.maxlen

    def visit(self, ast):
        """Following Classes contain no-Keywords.
        Union, Star
        """
        return 1

    def visit_Concat(self, concat):
        a1 = concat.op1.accept(self)
        a2 = concat.op2.accept(self)

        return a1 + a2

    def visit_Union(self, union):
        a1 = union.op1.accept(self)
        a2 = union.op2.accept(self)
        return max(a1, a2)

    def visit_Star(self, star):
        return float("inf")

    def visit_Plus(self, plus):
        return float("inf")

    def visit_Qmark(self, qmark):
        return qmark.op.accept(self)

def test():
    import doctest
    doctest.testmod()

if __name__ == "__main__": test()