Mercurial > hg > Members > shinya > pyrect
changeset 55:4ae288b37591
ddd analyzer. analyzer can analyzing to regexp max-length.
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 26 Oct 2010 16:37:43 +0900 |
parents | 36cdfcaf5420 |
children | ee9945561f80 |
files | pyrect/regexp/analyzer.py pyrect/regexp/kwset.py pyrect/translator/c_translator.py pyrect/translator/dot_translator.py |
diffstat | 4 files changed, 69 insertions(+), 78 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/regexp/analyzer.py Tue Oct 26 16:37:43 2010 +0900 @@ -0,0 +1,67 @@ +#!/usr/bin/env python + +""" +Extract Keywords from AST. Keywords, +which are necessary words to be accepted with Regular-Expression. +and which are used to Fixed-String-Filtering (ex: Boyer-Moore). +kwset is also used in GNU-GREP. +""" + +from pyrect.regexp.parser import Parser +from pyrect.regexp.ast import ASTWalker + +class Analyzer(ASTWalker): + """ Extract with Visitor-Pattern. + AST (ast), is represented by Node-Tree. + >>> prs = Parser() + >>> an = Analyzer() + >>> an.analyze(prs.parse('fixed-string')) + 12 + >>> an.analyze(prs.parse('(build|fndecl|gcc)')) + 6 + >>> an.analyze(prs.parse('(AB|CD)*123')) + inf + >>> an.analyze(prs.parse('((12)*|3)|456')) + inf + >>> an.analyze(prs.parse('(plus)?(qmark)?')) + 9 + """ + def __init__(self): + self.maxlen = 0 + + def analyze(self, ast=None): + if ast: + self.maxlen = ast.accept(self) + return self.maxlen + + def visit(self, ast): + """Following Classes contain no-Keywords. + Union, Star + """ + return 1 + + def visit_Concat(self, concat): + a1 = concat.op1.accept(self) + a2 = concat.op2.accept(self) + + return a1 + a2 + + def visit_Union(self, union): + a1 = union.op1.accept(self) + a2 = union.op2.accept(self) + return max(a1, a2) + + def visit_Star(self, star): + return float("inf") + + def visit_Plus(self, plus): + return float("inf") + + def visit_Qmark(self, qmark): + return qmark.op.accept(self) + +def test(): + import doctest + doctest.testmod() + +if __name__ == "__main__": test()
--- a/pyrect/regexp/kwset.py Wed Aug 25 22:22:54 2010 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -#!/usr/bin/env python - -""" -Extract Keywords from AST. Keywords, -which are necessary words to be accepted with Regular-Expression. -and which are used to Fixed-String-Filtering (ex: Boyer-Moore). -kwset is also used in GNU-GREP. -""" - -from pyrect.regexp.parser import Parser -from pyrect.regexp.ast import ASTWalker - -class KeywordsExtractor(ASTWalker): - """ Extract with Visitor-Pattern. - AST (ast), is represented by Node-Tree. - >>> prs = Parser() - >>> kex = KeywordsExtractor() - >>> kex.extract_keywords(prs.parse('(AB|CD)*123')) - ['', '123'] - >>> kex.extract_keywords(prs.parse('WOOO*PS!!')) - ['WOO', '', 'PS!!'] - >>> kex.extract_keywords(prs.parse('(build|fndecl|gcc)')) - [''] - >>> kex.extract_keywords(prs.parse('(plus)+(qmark)?')) - ['plus', ''] - """ - def __init__(self): - self.keywords = [] - - def extract_keywords(self, ast=None): - if ast: - self.keywords = ast.accept(self) - return self.keywords - - def visit(self, ast): - """Following Classes contain no-Keywords. - Union, Star - """ - return [''] - - def visit_Character(self, character): - return character.char - - def visit_Concat(self, concat): - key1 = concat.op1.accept(self) - key2 = concat.op2.accept(self) - - if isinstance(key1, str) and isinstance(key2, str): - return key1 + key2 - elif isinstance(key1, str) and isinstance(key2, list): - if key2[0]: - key2[0] = key1 + key2[0] - else: - key2 = [key1] + key2 - return key2 - elif isinstance(key1, list) and isinstance(key2, str): - if key1[-1]: - key1[-1] = key1[-1] + key2 - else: - key1 = key1 + [key2] - return key1 - else: - return key1 + key2 - - def visit_Plus(self, plus): - return plus.op.accept(self) - -def extract_keywords(ast): - return KeywordsExtractor().extract_keywords(ast) - -def test(): - import doctest - doctest.testmod() - -if __name__ == "__main__": test()
--- a/pyrect/translator/c_translator.py Wed Aug 25 22:22:54 2010 +0900 +++ b/pyrect/translator/c_translator.py Tue Oct 26 16:37:43 2010 +0900 @@ -50,7 +50,6 @@ self.emit("3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,") self.emit("};") self.emitd("#define SKIP(s) ((s) + skip_tbl[*(unsigned char *)s])", 2) - #self.emitd("#define SKIP(s) s+1", 2) def emit_driver(self): self.emiti("int main(int argc, unsigned char* argv[]) {")
--- a/pyrect/translator/dot_translator.py Wed Aug 25 22:22:54 2010 +0900 +++ b/pyrect/translator/dot_translator.py Tue Oct 26 16:37:43 2010 +0900 @@ -22,8 +22,8 @@ self.cg = regexp.nfacg else: self.cg = regexp.dfacg - self.fill_color = "white" #"lightsteelblue1" - self.frame_color = "black" #"navyblue" + self.fill_color = "lightsteelblue1" + self.frame_color = "navyblue" def state_name(self, name): return "q"+name