Mercurial > hg > Members > shinya > pyrect
view src/dfareg.py @ 9:973b596bd124
remove Regexp.emitDot()
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sat, 03 Jul 2010 03:49:41 +0900 |
parents | 11fba907c0af |
children | 2d00b46ce34d |
line wrap: on
line source
#!/usr/bin/env python import copy import re from optparse import OptionParser class NondeterministicFiniteAutomaton(object): def __init__(self, transition, start, accepts, map_): self.transition = transition self.start = start self.accepts = accepts self.map = map_ def epsilonExpand(self, set_): que = set(set_) done = set() while que: stat = que.pop() nexts = self.transition(stat, "") done.add(stat) for nextStat in nexts: if not nextStat in done: que.add(nextStat) return frozenset(done) class DeterministicFiniteAutomaton(object): def __init__(self, transition, start, accepts, map_): self.transition = transition self.start = start self.accepts = accepts self.map = map_ def getRuntime(self): return DFARuntime(self) class DFARuntime(object): def __init__(self, DFA): self.DFA = DFA self.curState = self.DFA.start def doTransition(self, char): self.curState = self.DFA.transition( self.curState, char) def isAcceptState(self): return self.curState in self.DFA.accepts def doesAccept(self, input): for alphabet in input: self.doTransition(alphabet) return self.isAcceptState() class Token(object): CHARACTER = 0 OPE_UNION = 1 OPE_STAR = 2 LPAREN = 3 RPAREN = 4 EOF = 5 def __init__(self, value, kind): self.value = value self.kind = kind class Lexer(object): def __init__(self, str): self.stringList = list(str) def scan(self): if not self.stringList: return Token(None, Token.EOF) ch = self.stringList.pop(0) if ch == '\\': return Token(self.stringList.pop(0), Token.CHARACTER) elif ch == '|': return Token(ch, Token.OPE_UNION) elif ch == '(': return Token(ch, Token.LPAREN) elif ch == ')': return Token(ch, Token.RPAREN) elif ch == '*': return Token(ch, Token.OPE_STAR) else: return Token(ch, Token.CHARACTER) """Context-free Grammer: (A) expression -> subexpr EOF (B) subexpr -> seq '|' subexpr | seq (C) seq -> subseq | '' (D) subseq -> star subseq | star (E) star -> factor '*' | factor (F) factor -> '(' subexpr ')' | CHARACTER """ class Parser(object): def __init__(self, lexer): self.lexer = lexer self.look = None self.move() def match(self, tag): if self.look.kind != tag: raise Exeption("syntax error") self.move() def move(self): self.look = self.lexer.scan() def factor(self): if self.look.kind == Token.LPAREN: # factor -> '(' subexpr ')' self.match(Token.LPAREN) node = self.subexpr() self.match(Token.RPAREN) return node else: # factor -> CHARACTER node = Character(self.look.value) self.match(Token.CHARACTER) return node def star(self): # star -> factor '*' | factor node = self.factor() if self.look.kind == Token.OPE_STAR: self.match(Token.OPE_STAR) node = Star(node) return node def seq(self): if self.look.kind == Token.LPAREN \ or self.look.kind == Token.CHARACTER: # seq -> subseq return self.subseq() else: # seq -> '' return Character("") def subseq(self): node1 = self.star() if self.look.kind == Token.LPAREN \ or self.look.kind == Token.CHARACTER: # subseq -> star subseq node2 = self.subseq() node = Concat(node1, node2) return node else: # subseq -> star return node1 def subexpr(self): node1 = self.seq() if self.look.kind == Token.OPE_UNION: self.match(Token.OPE_UNION) # subexpr -> seq '|' subexpr node2 = self.subexpr() node = Union(node1, node2) return node else: # subexpr -> seq return node1 def expression(self): # expression -> subexpr EOF node = self.subexpr() self.match(Token.EOF) # Create TREE, NFA context = Context() fragment = node.assemble(context) return fragment.build() class Context(object): def __init__(self): self.stateCount = 0 def newState(self): self.stateCount += 1 return self.stateCount class NFAFragment(object): def __init__(self): self.start = None # integer self.accepts = None # frozenset self.map = dict() # transition def connect(self, from_, char, to): slot = self.map.setdefault((from_, char), set()) slot.add(to) def newSkelton(self): # copy fragment (self), and it return newFrag = NFAFragment(); newFrag.map = copy.deepcopy(self.map) return newFrag def __or__(self, frag): newFrag = self.newSkelton() for k, v in frag.map.iteritems(): newFrag.map[k] = v.copy() return newFrag def build(self): map_ = self.map def transition(state, char): return frozenset(map_.get((state, char), [])) return NondeterministicFiniteAutomaton( transition, self.start, self.accepts, self.map ) class Character(object): def __init__(self, char): self.char = char def assemble(self, context): frag = NFAFragment() s1 = context.newState() s2 = context.newState() frag.connect(s1, self.char, s2) frag.start = s1 frag.accepts = frozenset([s2]) return frag class Union(object): def __init__(self, op1, op2): self.op1 = op1 self.op2 = op2 def assemble(self, context): frag1 = self.op1.assemble(context) frag2 = self.op2.assemble(context) frag = frag1 | frag2 s = context.newState() frag.connect(s, "", frag1.start) frag.connect(s, "", frag2.start) frag.start = s frag.accepts = frag1.accepts | frag2.accepts return frag class Concat(object): def __init__(self, op1, op2): self.op1 = op1 self.op2 = op2 def assemble(self, context): frag1 = self.op1.assemble(context) frag2 = self.op2.assemble(context) frag = frag1 | frag2 for state in frag1.accepts: frag.connect(state, "", frag2.start) frag.start = frag1.start frag.accepts = frag2.accepts return frag class Star(object): def __init__(self, op): self.op = op def assemble(self, context): fragOrig = self.op.assemble(context) frag = fragOrig.newSkelton() for state in fragOrig.accepts: frag.connect(state, "", fragOrig.start) s = context.newState() frag.connect(s, "", fragOrig.start) frag.start = s frag.accepts = fragOrig.accepts | frozenset([s]) return frag class NonDisjoinSets(object): def __init__(self, sub): self.sub = sub def __contains__(self, a_set): return a_set & self.sub def nfa2dfa(nfa): def transition(set_, alpha): ret = set() for elem in set_: ret |= nfa.transition(elem, alpha) return nfa.epsilonExpand(frozenset(ret)) def stateSetTransition(stateSet, char): trans = set() for state in stateSet: t = nfa.map.setdefault((state, char), None) if not t == None: trans.update(nfa.epsilonExpand(t)) return frozenset(trans) map_ = dict() que = set(frozenset([nfa.epsilonExpand(set([nfa.start]))])) done = set() while que: stateSet = que.pop() for state in stateSet: for k, v in nfa.map.iteritems(): if state == k[0] and k[1] != '': slot = map_.setdefault((stateSet, k[1]), set()) slot.update(nfa.epsilonExpand(v)) done.add(stateSet) for v in map_.itervalues(): if not v in done: que.add(frozenset(v)) return DeterministicFiniteAutomaton( transition, nfa.epsilonExpand(frozenset([nfa.start])), NonDisjoinSets(nfa.accepts), map_ ) # create call graph (as dictionary) class CallGraph(object): """CallGraph is State Transition Diagram. it's can be create from DFA or DFA. >>> reg = Regexp(\"AA*|B\") >>> dfacg = CallGraph(reg.dfa) >>> nfacg = CallGraph(reg.nfa) >>> dfacg.map {'1_6_8': {'A': ['2_3_5'], 'B': ['7']}, '2_3_5': {'A': ['3_4']}, '7': {}, '3_4': {'A': ['3_4']}} >>> dfacg.states set(['1_6_8', '2_3_5', '7', '3_4']) >>> dfacg.accepts set(['2_3_5', '7', '3_4']) >>> nfacg.map {'1': {'A': ['2']}, '3': {'A': ['4']}, '2': {'': ['5']}, '5': {'': ['3']}, '4': {'': ['3']}, '7': {}, '6': {'B': ['7']}, '8': {'': ['1', '6']}} >>> nfacg.states set(['1', '3', '2', '5', '4', '7', '6', '8']) >>> nfacg.accepts set(['5', '4', '7']) """ def __init__(self, fa): self.fa = fa if isinstance(fa, DeterministicFiniteAutomaton): self.type = "DFA" self.start = self.set2name(fa.start) else: self.type = "NFA" self.start = str(fa.start) self.inputs = set() self.states = set() (self.map, self.accepts) = self.getCallGraph(self.fa) # return state name from set def set2name(self, set_): l = list(set_) l.sort() return ('_'.join(str(x) for x in l)) def getCallGraph(self, fa): transitionCases = dict() if self.type is "DFA": transitionCases[fa.start] = set() else: transitionCases[frozenset([fa.start])] = set() # transitionCases is hash table that binds a state and inputs, # it's useful for transition definition # fa is dfa or nfa # dfa.map => {(frozenset([1, 15]), 'd'): frozenset([16]), ....} # : frozenset(1, 15) x 'd' -> frozenset([16]) # nfa.map => {(6, ''): set([5, 7]), (1, 'A'): set([2]), ....} # : 6 x '' -> set([5, 7]) for nextState in fa.map.itervalues(): if self.type is "DFA": transitionCases[frozenset(nextState)] = set() else: for nextState_ in nextState: transitionCases[frozenset([nextState_])] = set() for (state, input) in fa.map.iterkeys(): if self.type is "NFA": state = [state] slot = transitionCases.setdefault(frozenset(state), set()) slot.add(input) callGraph = dict() accepts = set() # create CallGraph for state in transitionCases.iterkeys(): callGraph[self.set2name(state)] = set() for (state, input) in transitionCases.iteritems(): caseMap = dict() self.states.add(self.set2name(state)) if self.type is "DFA": for case in input: self.inputs.add(case) caseMap[case] = [self.set2name(fa.map[frozenset(state), case])] if state in fa.accepts: accepts.add(self.set2name(state)) else: self.states.add(str(list(state)[0])) for case in input: self.inputs.add(case) caseMap[case] = [str(next_) for next_ in fa.map[list(state)[0], case]] if list(state)[0] in fa.accepts: accepts.add(self.set2name(state)) callGraph[self.set2name(state)] = caseMap return (callGraph, accepts) class Regexp(object): def __init__(self, regexp): self.regexp = regexp self.nfa = None self.dfa = None self._compile() def _compile(self): lexer_ = Lexer(self.regexp) parser_ = Parser(lexer_) self.nfa = parser_.expression() self.dfa = nfa2dfa(self.nfa) def matches(self, string): runtime = self.dfa.getRuntime() return runtime.doesAccept(string) def compile(regexp): return Regexp(regexp) def test(): import doctest doctest.testmod() if __name__ == '__main__' : test()