Mercurial > hg > Members > shinya > pyrect
changeset 47:701beabd7d97
add input-rules, Range, CharacterClass, Anchor and MultiByte-Char(but not work)\nand more simplify NFA (is global improvement).
line wrap: on
line diff
--- a/pyrect/c_translator.py Fri Aug 06 20:18:58 2010 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,162 +0,0 @@ -#!/Usr/bin/env python - -from pyrect.regexp import Regexp -from pyrect.translator import Translator - -class CTranslator(Translator): - """CTranslator - This Class can translate from DFA or NFA into C source code. - DFA: A simple state transition as tail call (also can implement with CbC). - NFA: using stack, deepening depth-first search. - >>> string = '(A|B)*C' - >>> reg = Regexp(string) - >>> CTranslator(reg).translate() - >>> CTranslator(reg).translate() - """ - def __init__(self, regexp): - Translator.__init__(self, regexp) - self.funType = 'void ' - self.callType = '' - self.breakStatement = '\t\t\tbreak;' - self.debug = False - self.eols = ('\\0', '\\n') - - def state_name(self, state_name): - if state_name == "accept" or state_name == "reject": - return str(state_name) - else: - return "state_"+str(state_name) - - def emit_accept_state(self): - self.emit (""" -%saccept(char* s) { -\tprintf(\"\\nstring matches regexp. \\n\\n\"); -}\n""" % self.funType) - - def emit_reject_state(self): - self.emit (""" -%sreject(char* s) { -\tprintf(\"\\nstring does not match regexp. \\n\\n\"); -}\n""" % self.funType) - - def emit_driver(self): - self.emit(""" -int main(int argc, char* argv[]) { -\tputs(\"regexp: %s\"); -\tputs(\"number of state: %d\"); -\tprintf(\"string: %%s\\n\", argv[1]); -\t%s%s(argv[1]); -""" % (self.regexp, len(self.cg.states), self.callType, self.state_name(self.cg.start))) - if self.cg.type == "NFA": - self.emit("\treject(argv[1]);\n") - self.emit(""" -\treturn 0; -}\n\n""") - - def emit_strcmp1(self, string, next): - cmp_stmt = list() - offset = 0 - if len(string) >= 4: - for n in range(len(string)/4): - type_ = "unsigned int *" - ptr = "intp" + str(n) - self.emit("\tstatic %s%s = (%s)\"%s\";\n" - % (type_, ptr, type_, string[:4])) - cmp_stmt.append((type_, offset, "*"+ptr)) - string = string[4:] - offset += 4 - if len(string) >= 2: - type_ = "unsigned short int *" - ptr = "shortp" - self.emit("\tstatic %s%s = (%s)\"%s\";\n" - % (type_, ptr, type_, string[:2])) - cmp_stmt.append((type_, offset, "*"+ptr)) - offset += 2 - string = string[2:] - if len(string) == 1: - ptr = "'%s'" % string[0] - cmp_stmt.append(("char *", offset, ptr)) - offset += 1 - - self.emit("\n\tif (") - for stmt in cmp_stmt: - self.emit("*(%s)((char *)s+%d) == %s" % stmt) - if stmt != cmp_stmt[-1]: - self.emit(" && ") - self.emit(")\n\t\t return %s(s+%d);\n\n" % (self.state_name(next), offset)) - - def emit_strcmp2(self, string, next): - self.emit("\tls = s;\n") - self.emit("\tif (") - self.emit("*ls++ == '%c'" % string[0]) - for char in string[1:]: - self.emit(" && *ls++ == '%c'" % char) - self.emit(")\n\t\t return %s(ls);\n\n" % self.state_name(next)) - - def emit_strcmp3(self, string, next): - self.emit("\tstatic char* string = \"%s\";\n" % string) - self.emit("\tif (memcmp(string, s, %d) == 0)\n" % len(string)) - self.emit("\t\t return %s(s+%d);\n" % (self.state_name(next), len(string))) - - - def emit_switch(self, case, default=None): - self.emit("\tswitch(*s++) {\n") - for input, next_states in case.iteritems(): - if input != '': - self.emit("\t\tcase '%s': \n" % (input)) - if self.cg.type == "NFA": - for next_state in next_states: - self.emit("\t\t\t%s%s(s);\n" % (self.callType, self.state_name(next_state))) - else: - self.emit("\t\t\t%s%s(s);\n" % (self.callType, self.state_name(next_states))) - if self.breakStatement != '': self.emit(self.breakStatement+'\n') - - if default: - self.emit( """\t\tdefault:\n\t\t\t%s%s(s);\n""" % (self.callType, default)) - self.emit("\t}\n") - - - def emit_state(self, cur_state, transition): - self.emit(self.funType + self.state_name(cur_state) + "(char* s) {\n") - if self.debug: - self.emit("\tprintf(\"state: %s, input: %%s\\n\", s);\n" % (cur_state)) - if self.cg.type == "NFA": - if '' in transition: - epsilon_transition = transition.pop('') - for n in epsilon_transition: - self.emit("\t%s%s(s);\n" % (self.callType, self.state_name(n))) - - if cur_state in self.cg.accepts: - for eol in self.eols: - transition[eol] = ["accept"] - - if transition: - if self.cg.type == "DFA": - self.emit_switch(transition, default="reject") - else: - self.emit_switch(transition) - self.emit("}\n\n") - - def emit_initialization(self): - self.emit("#include <stdio.h>\n\n") - for state in self.cg.map.iterkeys(): - self.emit(self.funType + self.state_name(state) + "(char* s);\n") - self.emit(self.funType + 'accept(char* s);\n') - self.emit(self.funType + 'reject(char* s);\n') - - def emit_from_callgraph(self): - # self.emit C-source code - self.emit_initialization() - self.emit_driver() - - for cur_state, transition in self.cg.map.iteritems(): - self.emit_state(cur_state, transition) - - self.emit_accept_state() - self.emit_reject_state() - -def test(): - import doctest - doctest.testmod() - -if __name__ == '__main__': test()
--- a/pyrect/cbc_translator.py Fri Aug 06 20:18:58 2010 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -#!/usr/bin/env python - -from pyrect.regexp import Regexp -from pyrect.c_translator import CTranslator - -class CbCTranslator(CTranslator): - """ - CbCTranslator - >>> string = \"(A|B)*C\" - >>> reg = Regexp(string) - >>> ct = CbCTranslator(reg) - >>> ct.translate() - >>> ct.debug = True - >>> ct.translate() - """ - def __init__(self, regexp): - CTranslator.__init__(self, regexp) - self.funType = '__code ' - self.callType = 'goto ' - self.breakStatement = '' - -def test(): - import doctest - doctest.testmod() - -if __name__ == '__main__' : test()
--- a/pyrect/cbcgrep_translator.py Fri Aug 06 20:18:58 2010 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,143 +0,0 @@ -#!/usr/bin/env python - -from pyrect.grep_translator import GREPTranslator -from pyrect.regexp import Regexp - -class CbCGREPTranslateExeption(Exception): - pass - -class CbCGREPTranslator(GREPTranslator): - """CbCGREPTranslator - This Class can translate form DFA into grep source-code. - which based on (beautiful) mini-grep introduced \"The Practice of Programming\" - written by Rob Pike & Brian W. Kernighan. (see template/grep.c) - >>> string = \"(build|fndecl|gcc)\" - >>> reg = Regexp(string) - >>> tje = GREPTranslator(reg) - >>> tje.translate() - """ - - def __init__(self, regexp): - GREPTranslator.__init__(self, regexp) - self.funType = '__code ' - self.interface = "char *s, char* cur, char* buf, FILE *f, char* filename" - self.args = "s, cur, buf, f, filename" - self.callType = 'goto ' - self.breakStatement = '' - self.print_file = False - self.__bufsize = 1024 - - def getbufsize(self): - return self.__bufsize - def setbufsize(self, bufsize): - self.__bufsize = abs(bufsize) - - bufsize = property(getbufsize, setbufsize) - - def emit_accept_state(self): - self.emit("__code accept(%s) {\n" % self.interface) - if self.print_file: - self.emit(" printf(\"%s: %s\\n\", filename, buf);\n") - else: - self.emit(" printf(\"%s\\n\", buf);\n") - self.emit(" goto next_line(%s);\n}\n\n" % self.args) - - def emit_reject_state(self): - self.emit(""" -__code reject(%s) { - goto next_ptr(%s); -} -""" % (self.interface, self.args)) - - def emit_next_state(self): - self.emit (""" -__code next_ptr(%s) { - if(*cur++ == '\\0') - goto next_line(%s); - s = cur; - goto DFA(%s); -} -""" % (self.interface, self.args, self.args)) - - self.emit(""" -__code next_line(%s) { - if(fgets(buf, LINEBUFSIZE, f) == NULL) { - goto returner(); - } - int n = strlen(buf); - if (n > 0 && buf[n-1] == '\\n') - buf[n-1] = '\\0'; - cur = buf; - s = cur; - goto DFA(%s); -} -""" % (self.interface, self.args)) - self.emit(""" -__code returner() { - return; -}""") - - def emit_initialization(self): - self.emit("#include <stdio.h>\n") - self.emit("#include <stdlib.h>\n") - self.emit("#include <string.h>\n\n") - self.emit("#define LINEBUFSIZE 1024\n") - self.emit("#define READBUFSIZE %d\n\n" % self.bufsize) - - self.emit("%sDFA(%s);\n" % (self.funType, self.interface)) - for state in self.cg.map.iterkeys(): - self.emit(self.funType + self.state_name(state) + "(" + self.interface + ");\n") - self.emit(self.funType + 'accept(%s);\n' % self.interface) - self.emit(self.funType + 'reject(%s);\n' % self.interface) - self.emit(self.funType + 'next_ptr(%s);\n' % self.interface) - self.emit(self.funType + 'next_line(%s);\n' % self.interface) - self.emit(self.funType + 'returner();\n\n') - grepsource = open("template/grep.cbc") - self.emit(grepsource.read()) - self.emit_next_state() - - def emit_filter(self): - pass - - def emit_driver(self): - self.emit(""" -int main(int argc, char* argv[]) { - grepmain(argc, argv); - return; -} -""") - self.emit(""" -%sDFA(%s) { - goto %s(%s); -} -""" % (self.funType, self.interface, self.state_name(self.cg.start), self.args)) - - def emit_switch(self, case, default=None): - self.emit("\tswitch(*s++) {\n") - for input, next_state in case.iteritems(): - if input != '': - self.emit("\t\tcase '%s': \n" % (input)) - self.emit("\t\t\t%s%s(%s);\n" % (self.callType, self.state_name(next_state), self.args)) - if self.breakStatement != '': self.emit(self.breakStatement+'\n') - - if default: - self.emit( """\t\tdefault:\n\t\t\t%s%s(%s);\n""" % (self.callType, default, self.args)) - self.emit("\t}\n") - - def emit_state(self, cur_state, transition): - self.emit(self.funType + self.state_name(cur_state) + "(" + self.interface + ") {\n") - if cur_state in self.cg.accepts: - self.emit("\tgoto accept(%s);\n" % self.args) - else: - if transition: - if self.cg.type == "DFA": - self.emit_switch(transition, default="reject") - else: - self.emit_switch(transition) - self.emit("}\n\n") - -def test(): - import doctest - doctest.testmod() - -if __name__ == '__main__': test()
--- a/pyrect/converter.py Fri Aug 06 20:18:58 2010 +0900 +++ b/pyrect/converter.py Sun Aug 08 04:13:14 2010 +0900 @@ -2,10 +2,7 @@ import sys from pyrect.regexp import Regexp -from pyrect.c_translator import CTranslator -from pyrect.cbc_translator import CbCTranslator -from pyrect.dot_translator import DotTranslator -from pyrect.llvm_translator import LLVMTranslator +from pyrect.translator import * from optparse import OptionParser def main(argv):
--- a/pyrect/dfa_translator.py Fri Aug 06 20:18:58 2010 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,72 +0,0 @@ -#!/usr/bin/env python - -from pyrect.grep_translator import GREPTranslator -from pyrect.regexp import Regexp - -class GNUGREPTranslator(GREPTranslator): - """GNUGREPTranslator - This class can translate from DFA into size_t DFA(char* s). - which is entirely equivalent to dfaexec(..) in GNU-grep (see src/dfa.c). - * but which is not work currently. (when search large-file, there is fewer - * accepted-lines than grep's dfaexec.) - * probably, there is some problem exists about buffering. - >>> string = '(build|fndecl|gcc)' - >>> reg = Regexp(string) - >>> tje = GNUGREPTranslator(reg) - >>> tje.translate() - """ - - def __init__(self, regexp): - GREPTranslator.__init__(self, regexp) - self.funType = 'size_t ' - self.callType = 'return ' - self.breakStatement = '' - - def emit_initialization(self): - for state in self.cg.map.iterkeys(): - self.emit(self.funType + self.state_name(state) + "(char* s);\n") - self.emit(self.funType + 'accept(char* s);\n') - self.emit(self.funType + 'reject(char* s);\n') - - def emit_accept_state(self): - self.emit (""" -%saccept(char* s) { -\treturn 1; -}\n""" % self.funType) - - def emit_reject_state(self): - self.emit (""" -%sreject(char* s) { -\treturn 0; -}\n""" % self.funType) - - def emit_driver(self): - self.emit(""" -/* This DFA accept only \'%s\'*/ -%sDFA(char *s) { - char *begin = s; - do { - if (%s(s)) { //(matchhere(regexp+1, text)) - return (char const *) s - begin; - } - } while (*s != '\\n' && *s++ != '\\0'); - return (size_t) -1; -}\n\n""" % (self.regexp.regexp, self.funType, self.state_name(self.cg.start))) - - def emit_state(self, cur_state, transition): - self.emit(self.funType + self.state_name(cur_state) + "(char* s) {\n") - if cur_state in self.cg.accepts: - self.emit("\treturn accept(s);\n") - else: - if transition: - if self.cg.type == "DFA": - self.emit_switch(transition, default="reject") - else: - self.emit_switch(transition) - self.emit("}\n\n") - -def test(): - import doctest - doctest.testmod() - -if __name__ == '__main__': test()
--- a/pyrect/dfareg.py Fri Aug 06 20:18:58 2010 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,443 +0,0 @@ -#!/Usr/bin/env python - -import copy -import re -from optparse import OptionParser - -class NondeterministicFiniteAutomaton(object): - def __init__(self, transition, start, accepts, map_): - self.transition = transition - self.start = start - self.accepts = accepts - self.map = map_ - - def epsilon_expand(self, set_): - que = set(set_) - done = set() - while que: - stat = que.pop() - nexts = self.transition(stat, "") - done.add(stat) - for nextStat in nexts: - if not nextStat in done: que.add(nextStat) - return frozenset(done) - -class DeterministicFiniteAutomaton(object): - def __init__(self, transition, start, accepts, map_): - self.transition = transition - self.start = start - self.accepts = accepts - self.map = map_ - def get_runtime(self): - return DFARuntime(self) - -class DFARuntime(object): - def __init__(self, DFA): - self.DFA = DFA - self.curState = self.DFA.start - - def do_transition(self, char): - self.curState = self.DFA.transition( - self.curState, char) - - def is_accept_state(self): - return self.curState in self.DFA.accepts - - def does_accept(self, input): - for alphabet in input: - self.do_transition(alphabet) - return self.is_accept_state() - -class Token(object): - CHARACTER = 0 - OPE_UNION = 1 - OPE_STAR = 2 - LPAREN = 3 - RPAREN = 4 - EOF = 5 - - def __init__(self, value, kind): - self.value = value - self.kind = kind - - -class Lexer(object): - def __init__(self, str): - self.stringList = list(str) - - def scan(self): - if not self.stringList: - return Token(None, Token.EOF) - - ch = self.stringList.pop(0) - if ch == '\\': - return Token(self.stringList.pop(0), Token.CHARACTER) - elif ch == '|': - return Token(ch, Token.OPE_UNION) - elif ch == '(': - return Token(ch, Token.LPAREN) - elif ch == ')': - return Token(ch, Token.RPAREN) - elif ch == '*': - return Token(ch, Token.OPE_STAR) - else: - return Token(ch, Token.CHARACTER) - -"""Context-free Grammer: - (A) expression -> subexpr EOF - (B) subexpr -> seq '|' subexpr | seq - (C) seq -> subseq | '' - (D) subseq -> star subseq | star - (E) star -> factor '*' | factor - (F) factor -> '(' subexpr ')' | CHARACTER -""" - -class Parser(object): - def __init__(self, lexer): - self.lexer = lexer - self.look = None - self.move() - - def match(self, tag): - if self.look.kind != tag: - raise Exeption("syntax error") - self.move() - - def move(self): - self.look = self.lexer.scan() - - def factor(self): - if self.look.kind == Token.LPAREN: - # factor -> '(' subexpr ')' - self.match(Token.LPAREN) - node = self.subexpr() - self.match(Token.RPAREN) - return node - else: - # factor -> CHARACTER - node = Character(self.look.value) - self.match(Token.CHARACTER) - return node - - def star(self): - # star -> factor '*' | factor - node = self.factor() - if self.look.kind == Token.OPE_STAR: - self.match(Token.OPE_STAR) - node = Star(node) - return node - - def seq(self): - if self.look.kind == Token.LPAREN \ - or self.look.kind == Token.CHARACTER: - # seq -> subseq - return self.subseq() - else: - # seq -> '' - return Character("") - - def subseq(self): - node1 = self.star() - if self.look.kind == Token.LPAREN \ - or self.look.kind == Token.CHARACTER: - # subseq -> star subseq - node2 = self.subseq() - node = Concat(node1, node2) - return node - else: - # subseq -> star - return node1 - - def subexpr(self): - node1 = self.seq() - if self.look.kind == Token.OPE_UNION: - self.match(Token.OPE_UNION) - # subexpr -> seq '|' subexpr - node2 = self.subexpr() - node = Union(node1, node2) - return node - else: - # subexpr -> seq - return node1 - - def expression(self): - # expression -> subexpr EOF - node = self.subexpr() - self.match(Token.EOF) - - # Create TREE, NFA - context = Context() - fragment = node.assemble(context) - return fragment.build() - -class Context(object): - def __init__(self): - self.stateCount = 0 - - def new_state(self): - self.stateCount += 1 - return self.stateCount - -class NFAFragment(object): - def __init__(self): - self.start = None # integer - self.accepts = None # frozenset - self.map = dict() # transition - - def connect(self, from_, char, to): - slot = self.map.setdefault((from_, char), set()) - slot.add(to) - - def new_skelton(self): - # copy fragment (self), and it return - newFrag = NFAFragment(); - newFrag.map = copy.deepcopy(self.map) - return newFrag - - def __or__(self, frag): - newFrag = self.new_skelton() - for k, v in frag.map.iteritems(): - newFrag.map[k] = v.copy() - return newFrag - - def build(self): - map_ = self.map - def transition(state, char): - return frozenset(map_.get((state, char), [])) - - return NondeterministicFiniteAutomaton( - transition, - self.start, - self.accepts, - self.map - ) - -class Character(object): - def __init__(self, char): - self.char = char - - def assemble(self, context): - frag = NFAFragment() - s1 = context.new_state() - s2 = context.new_state() - frag.connect(s1, self.char, s2) - frag.start = s1 - frag.accepts = frozenset([s2]) - return frag - -class Union(object): - def __init__(self, op1, op2): - self.op1 = op1 - self.op2 = op2 - - def assemble(self, context): - frag1 = self.op1.assemble(context) - frag2 = self.op2.assemble(context) - frag = frag1 | frag2 - s = context.new_state() - frag.connect(s, "", frag1.start) - frag.connect(s, "", frag2.start) - frag.start = s - frag.accepts = frag1.accepts | frag2.accepts - return frag - -class Concat(object): - def __init__(self, op1, op2): - self.op1 = op1 - self.op2 = op2 - - def assemble(self, context): - frag1 = self.op1.assemble(context) - frag2 = self.op2.assemble(context) - frag = frag1 | frag2 - - for state in frag1.accepts: - frag.connect(state, "", frag2.start) - - frag.start = frag1.start - frag.accepts = frag2.accepts - return frag - -class Star(object): - def __init__(self, op): - self.op = op - - def assemble(self, context): - fragOrig = self.op.assemble(context) - frag = fragOrig.new_skelton() - - for state in fragOrig.accepts: - frag.connect(state, "", fragOrig.start) - - s = context.new_state() - frag.connect(s, "", fragOrig.start) - frag.start = s - frag.accepts = fragOrig.accepts | frozenset([s]) - return frag - -class NonDisjoinSets(object): - def __init__(self, sub): - self.sub = sub - def __contains__(self, a_set): - return a_set & self.sub - -def nfa2dfa(nfa): - def transition(set_, alpha): - ret = set() - for elem in set_: - ret |= nfa.transition(elem, alpha) - return nfa.epsilon_expand(frozenset(ret)) - - def state_set_transition(stateSet, char): - trans = set() - for state in stateSet: - t = nfa.map.setdefault((state, char), None) - if not t == None: trans.update(nfa.epsilon_expand(t)) - return frozenset(trans) - - map_ = dict() - que = set(frozenset([nfa.epsilon_expand(set([nfa.start]))])) - done = set() - - while que: - stateSet = que.pop() - - for state in stateSet: - for k, v in nfa.map.iteritems(): - if state == k[0] and k[1] != '': - slot = map_.setdefault((stateSet, k[1]), set()) - slot.update(nfa.epsilon_expand(v)) - - done.add(stateSet) - - for v in map_.itervalues(): - if not v in done: - que.add(frozenset(v)) - - return DeterministicFiniteAutomaton( - transition, - nfa.epsilon_expand(frozenset([nfa.start])), - NonDisjoinSets(nfa.accepts), - map_ - ) - -# create call graph (as dictionary) -class CallGraph(object): - """CallGraph is State Transition Diagram. - it's can be create from DFA or DFA. - >>> reg = Regexp(\"AA*|B\") - >>> dfacg = CallGraph(reg.dfa) - >>> nfacg = CallGraph(reg.nfa) - >>> dfacg.map - {'1_6_8': {'A': ['2_3_5'], 'B': ['7']}, '2_3_5': {'A': ['3_4']}, '7': {}, '3_4': {'A': ['3_4']}} - >>> dfacg.states - set(['1_6_8', '2_3_5', '7', '3_4']) - >>> dfacg.accepts - set(['2_3_5', '7', '3_4']) - >>> nfacg.map - {'1': {'A': ['2']}, '3': {'A': ['4']}, '2': {'': ['5']}, '5': {'': ['3']}, '4': {'': ['3']}, '7': {}, '6': {'B': ['7']}, '8': {'': ['1', '6']}} - >>> nfacg.states - set(['1', '3', '2', '5', '4', '7', '6', '8']) - >>> nfacg.accepts - set(['5', '4', '7']) - """ - - def __init__(self, fa): - self.fa = fa - if isinstance(fa, DeterministicFiniteAutomaton): - self.type = "DFA" - self.start = self.set2name(fa.start) - else: - self.type = "NFA" - self.start = str(fa.start) - self.inputs = set() - self.states = set() - (self.map, self.accepts) = self.create_call_graph(self.fa) - - # return state name from set - def set2name(self, set_): - l = list(set_) - l.sort() - return ('_'.join(str(x) for x in l)) - - def create_call_graph(self, fa): - transitionCases = dict() - if self.type == "DFA": - transitionCases[fa.start] = set() - else: - transitionCases[frozenset([fa.start])] = set() - - # transitionCases is hash table that binds a state and inputs, - # it's useful for transition definition - - # fa is dfa or nfa - # dfa.map => {(frozenset([1, 15]), 'd'): frozenset([16]), ....} - # : frozenset(1, 15) x 'd' -> frozenset([16]) - # nfa.map => {(6, ''): set([5, 7]), (1, 'A'): set([2]), ....} - # : 6 x '' -> set([5, 7]) - - for nextState in fa.map.itervalues(): - if self.type == "DFA": - transitionCases[frozenset(nextState)] = set() - else: - for nextState_ in nextState: - transitionCases[frozenset([nextState_])] = set() - - for (state, input) in fa.map.iterkeys(): - if self.type == "NFA": - state = [state] - slot = transitionCases.setdefault(frozenset(state), set()) - slot.add(input) - - callGraph = dict() - accepts = set() - - # create CallGraph - for state in transitionCases.iterkeys(): - callGraph[self.set2name(state)] = set() - - for (state, input) in transitionCases.iteritems(): - caseMap = dict() - self.states.add(self.set2name(state)) - if self.type == "DFA": - for case in input: - self.inputs.add(case) - caseMap[case] = [self.set2name(fa.map[frozenset(state), case])] - if state in fa.accepts: - accepts.add(self.set2name(state)) - else: - self.states.add(str(list(state)[0])) - for case in input: - self.inputs.add(case) - caseMap[case] = [str(next_) for next_ in fa.map[list(state)[0], case]] - if list(state)[0] in fa.accepts: - accepts.add(self.set2name(state)) - callGraph[self.set2name(state)] = caseMap - return (callGraph, accepts) - - -class Regexp(object): - def __init__(self, regexp): - self.regexp = regexp - self.nfa = None - self.dfa = None - self._compile() - - def _compile(self): - lexer_ = Lexer(self.regexp) - parser_ = Parser(lexer_) - self.nfa = parser_.expression() - self.dfa = nfa2dfa(self.nfa) - - def matches(self, string): - runtime = self.dfa.get_runtime() - return runtime.does_accept(string) - -def compile(regexp): - return Regexp(regexp) - -def test(): - import doctest - doctest.testmod() - -if __name__ == '__main__' : test()
--- a/pyrect/dot_translator.py Fri Aug 06 20:18:58 2010 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,61 +0,0 @@ -#!/usr/bin/env python - -import re -from pyrect.translator import Translator -from pyrect.regexp import Regexp - -class DotTranslator(Translator): - """ - DotToranslator - This Class can translate from DFA or NFA into Dot - Dot is Graph-generater using TeX. - --code/graph/makepdf.sh is generate graph script. - >>> string = \"(A|B)*C\" - >>> reg = Regexp(string) - >>> DotTranslator(reg, 'DFA').translate() - >>> DotTranslator(reg, 'NFA').translate() - """ - def __init__(self, regexp, fa="DFA"): - Translator.__init__(self, regexp) - if fa == "NFA": - self.cg = regexp.nfacg - self.fill_color = "lightsteelblue1" - self.frame_color = "navyblue" - - def state_name(self, name): - return "q"+name - - def emit_from_callgraph(self): - self.emit(''' -digraph G{ -\trankdir=LR -\tregex [shape=plaintext, label="%s"] -''' % self.regexp.regexp) - color = "fillcolor=%s, style=filled, color = %s" % (self.fill_color, self.frame_color) - for state in self.cg.states: - if state in self.cg.accepts: - self.emit("\t%s [shape=doublecircle, %s]\n" % (self.state_name(state), color)) - else: - self.emit("\t%s [shape=circle, %s]\n" % (self.state_name(state), color)) - self.emit("\tstart [shape=point]\n\tstart -> %s\n" % self.state_name(self.cg.start)) - - for cur_state, trans in self.cg.map.iteritems(): - for input, next_states in trans.iteritems(): - if self.cg.type == "DFA": - next_states = [next_states] - for next_state in next_states: - self.emit("\t%s -> %s [label=\"'%s'\"]\n" - % (self.state_name(cur_state), self.state_name(next_state), input)) - - self.emit("}") - -def test(): - import doctest - doctest.testmod() - ''' - reg = Regexp("(A|B)*C") - ct = CTranslator(reg.regexp, CallGraph(reg.dfa)) - ct.translate() - ''' - -if __name__ == '__main__' : test()
--- a/pyrect/goto_grep_translator.py Fri Aug 06 20:18:58 2010 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,118 +0,0 @@ -#!/usr/bin/env python - -from c_translator import CTranslator -from pyrect.regexp import Regexp - -class GoToGREPTranslator(CTranslator): - """GoToGREPTranslator - This Class can translate form DFA into grep source-code. - which based on (beautiful) mini-grep introduced \"The Practice of Programming\" - written by Rob Pike & Brian W. Kernighan. (see template/grep.label) - >>> string = \"(build|fndecl|gcc)\" - >>> reg = Regexp(string) - >>> tje = GoToGREPTranslator(reg) - >>> tje.translate() - """ - - def __init__(self, regexp): - CTranslator.__init__(self, regexp) - self.funType = 'void ' - self.callType = 'return ' - self.breakStatement = '' - self.begline = False - self.__bufsize = 1024 - - def getbufsize(self,): - return self.__bufsize - def setbufsize(self, bufsize): - self.__bufsize = abs(bufsize) - - bufsize = property(getbufsize, setbufsize) - - def emit_accept_state(self): - self.emit (""" -\taccept: -\t\tprintf(\"%s\\n\", buf); -\t\treturn; -""") - - def emit_reject_state(self): - self.emit("\treject:\n") - if not self.begline: - self.emit(""" -\t\tif (*cur++ == '\\0') -\t\t\treturn; -\t\ts = cur; -\t\tgoto %s; -""" % self.state_name(self.cg.start)) - self.emit("return;\n") - - - def emit_initialization(self): - self.emit("#include <stdio.h>\n") - self.emit("#include <stdlib.h>\n") - self.emit("#include <string.h>\n\n") - - self.emit("#define LINEBUFSIZE 1024\n") - self.emit("#define READBUFSIZE %d\n\n" % (self.bufsize)) - self.emit("char readbuf[%d];\n\n" % (self.bufsize)) - - self.emit("%sDFA(char* s, char *buf, char* filename);\n" % self.funType) - grepsource = open("template/grep.label") - self.emit(grepsource.read()) - - def emit_filter(self): - pass - - def emit_driver(self): - self.emit(""" -int main(int argc, char* argv[]) { -\treturn grepmain(argc, argv); -}\n\n""") - - def emit_switch(self, case, default=None): - self.emit("\t\tswitch(*s++) {\n") - for input, next_state in case.iteritems(): - if input != '': - self.emit("\t\t\tcase '%s': \n" % (input)) - self.emit("\t\t\t\tgoto %s;\n" % (self.state_name(next_state))) - if self.breakStatement != '': self.emit(self.breakStatement+'\n') - - if default: - self.emit( """\t\t\tdefault:\n\t\t\tgoto %s;\n""" % default) - self.emit("\t\t}\n") - - def emit_state(self, cur_state, transition): - self.emit("\t" + self.state_name(cur_state) + ":\n") - if cur_state in self.cg.accepts: - self.emit("\t\tgoto accept;\n") - else: - if transition: - if self.cg.type == "DFA": - self.emit_switch(transition, default="reject") - else: - self.emit_switch(transition) - self.emit("\n") - - def emit_from_callgraph(self): - # self.emit C-source code - self.emit_initialization() - self.emit_driver() - - self.emit("void DFA(char *s, char *buf, char* filename) {\n") - self.emit("\tchar *cur = s;\n") - self.emit("\tgoto %s;\n" % self.state_name(self.cg.start)) - - for cur_state, transition in self.cg.map.iteritems(): - self.emit_state(cur_state, transition) - - self.emit_accept_state() - self.emit_reject_state() - - self.emit("}\n\n") - -def test(): - import doctest - doctest.testmod() - -if __name__ == '__main__': test()
--- a/pyrect/grep_translator.py Fri Aug 06 20:18:58 2010 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,118 +0,0 @@ -#!/usr/bin/env python - -import copy -from pyrect.c_translator import CTranslator -from pyrect.regexp import Regexp - -class GREPTranslateExeption(Exception): - pass - -class GREPTranslator(CTranslator): - """GREPTranslator - This Class can translate form DFA into grep source-code. - which based on (beautiful) mini-grep introduced \"The Practice of Programming\" - written by Rob Pike & Brian W. Kernighan. (see template/grep.c) - >>> string = \"(build|fndecl|gcc)\" - >>> reg = Regexp(string) - >>> tje = GREPTranslator(reg) - >>> tje.translate() - """ - - def __init__(self, regexp): - CTranslator.__init__(self, regexp) - self.funType = 'int ' - self.callType = 'return ' - self.breakStatement = '' - self.begline = False - self.__bufsize = 1024 - - def getbufsize(self,): - return self.__bufsize - def setbufsize(self, bufsize): - self.__bufsize = abs(bufsize) - - bufsize = property(getbufsize, setbufsize) - - def emit_accept_state(self): - self.emit (""" -%saccept(char* s) { -\treturn 1; -}\n""" % self.funType) - - def emit_reject_state(self): - self.emit (""" -%sreject(char* s) { -\treturn 0; -}\n""" % self.funType) - - def emit_initialization(self): - self.emit("#include <stdio.h>\n") - self.emit("#include <stdlib.h>\n") - self.emit("#include <string.h>\n\n") - - self.emit("#define LINEBUFSIZE 1024\n") - self.emit("#define READBUFSIZE %d\n\n" % (self.bufsize)) - self.emit("char readbuf[%d];\n\n" % (self.bufsize)) - - self.emit("%sDFA(char* s);\n" % (self.funType)) - for state in self.cg.map.iterkeys(): - self.emit(self.funType + self.state_name(state) + "(char* s);\n") - self.emit(self.funType + 'accept(char* s);\n') - self.emit(self.funType + 'reject(char* s);\n') - grepsource = open("template/grep.c") - self.emit(grepsource.read()) - - def emit_filter(self): - pass - - def emit_matcher(self): - self.emit("int match(char *regexp, char *text) {\n") - if self.begline: - self.emit("\t\treturn DFA(text);\n") - else: - self.emit(""" -\tdo { -\t\tif (DFA(text)) -\t\t\treturn 1; -\t} while (*text++ != '\\0'); -\treturn 0; -""") - self.emit("}\n\n") - - def emit_driver(self): - self.emit(""" -int main(int argc, char* argv[]) { -\treturn grepmain(argc, argv); -}\n\n""") - self.emit_matcher() - self.emit(""" -%sDFA(char *s) { - return %s(s); -}\n\n""" % (self.funType, self.state_name(self.cg.start))) - - def emit_state(self, cur_state, transition): - transition = copy.deepcopy(transition) - self.emit(self.funType + self.state_name(cur_state) + "(char* s) {\n") - strings = [x for x in transition.keys() if len(x) > 1] - if strings: - self.emit("\tchar *ls;\n") - for string in strings: - self.emit_strcmp1(string, transition.pop(string)) - - if cur_state in self.cg.accepts: - self.emit("\treturn accept(s);\n") - else: - if transition: - if self.cg.type == "DFA": - self.emit_switch(transition, default="reject") - else: - self.emit_switch(transition) - else: - self.emit("\t return reject(s);\n") - self.emit("}\n\n") - -def test(): - import doctest - doctest.testmod() - -if __name__ == '__main__': test()
--- a/pyrect/jitgrep.py Fri Aug 06 20:18:58 2010 +0900 +++ b/pyrect/jitgrep.py Sun Aug 08 04:13:14 2010 +0900 @@ -5,9 +5,7 @@ import re import time from optparse import OptionParser -from pyrect.grep_translator import GREPTranslator -from pyrect.goto_grep_translator import GoToGREPTranslator -from pyrect.cbcgrep_translator import CbCGREPTranslator +from pyrect.translator import * from pyrect.regexp import Regexp def main(argv):
--- a/pyrect/llvm_grep_translator.py Fri Aug 06 20:18:58 2010 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,146 +0,0 @@ -#!/usr/bin/env python - -from llvm.core import * -from llvm.passes import * -from llvm.ee import * -from llvm_translator import LLVMTranslator -from pyrect.regexp import Regexp - -class LLVMGREPTranslator(LLVMTranslator): - """LLVMGREPTranslator - This class can translate from DFA into grep LLVM-module. - which can translate LLVM-IR, and also can execute it's self. - >>> string = 'def' - >>> reg = Regexp(string) - >>> lt = LLVMGREPTranslator(reg) - >>> lt.translate() - >>> ret = lt.execute() - >>> isinstance(ret, llvm.ee.GenericValue) - True - """ - - def __init__(self, regexp): - LLVMTranslator.__init__(self, regexp) - llfile = file("template/grep.ll") - self.llvm_module = Module.from_assembly(llfile) - self.compiled = False - self.string = regexp.regexp - self.args = [] - - def state_name(self, state_name): - return state_name - - def emit_driver(self): - self.regexp_str = self.new_str_const(self.string) - dfa = self.llvm_module.get_or_insert_function( - Type.function(self.int_t, (self.charptr_t,)), "DFA") - dfa_entry = dfa.append_basic_block("entry") - emit = Builder.new(dfa_entry) - ret = emit.call(self.llvm_module.get_function_named(self.cg.start) - ,(dfa.args[0],)) - emit.ret(ret) - - main = self.llvm_module.add_function( - Type.function(Type.void(), (self.int_t,)), "pre_main") - main_entry = main.append_basic_block("entry") - emit = Builder.new(main_entry) - - index = len(self.args) - - if index == 1: - grep = self.llvm_module.get_function_named("llgrep") - else: - grep = self.llvm_module.get_function_named("llgrep_with_name") - - for i in range(index): - emit.call(grep, (self.gep_first(emit, self.regexp_str), - self.gep_first(emit, self.new_str_const(self.args[i])))) - emit.ret_void() - - self.main = main - - def jitcompile(self): - self.debug_str = self.new_str_const("state: %s, arg: %c(int %d)\n") - def optional_func_decl(fun): - #fun.calling_convertion = CC_X86_FASTCALL - fun.args[0].name = "index" - - def func_decl(state): - optional_func_decl(state) - - state_ref = dict() - - # Create function - accept and reject (final state). - accept_state = self.llvm_module.add_function( - Type.function(self.int_t, (self.charptr_t,)), "accept") - optional_func_decl(accept_state) - reject_state = self.llvm_module.add_function( - Type.function(self.int_t, (self.charptr_t,)), "reject") - optional_func_decl(reject_state) - - state_ref["accept"] = accept_state - state_ref["reject"] = reject_state - - # add state to module, (as function or label). - for state in self.cg.map.iterkeys(): - fun = self.llvm_module.add_function( - Type.function(self.int_t, (self.charptr_t,)), state) - optional_func_decl(fun) - state_ref[state] = fun - - # emit instructions - emit = Builder.new(accept_state.append_basic_block("entry")) - emit.ret(self.const_one) - - emit = Builder.new(reject_state.append_basic_block("entry")) - emit.ret(self.const_zero) - - for state, transition in self.cg.map.iteritems(): - cases = dict() - state_fun = state_ref[state] - emit = Builder.new(state_fun.append_basic_block("entry")) - - if state in self.cg.accepts: - ret = emit.call(accept_state, (state_fun.args[0],)) - emit.ret(ret) - continue - - for case, next_state in transition.iteritems(): - cases[self.char_const(case)] = state_ref[next_state] - - char = emit.load(state_fun.args[0]) - next_ptr = emit.gep(state_fun.args[0], (self.const_one,)) - if (self.debug): self.emit_call_printf(emit, self.debug_str, self.gep_first(emit, self.new_str_const(fun.name)), char, char) - - label = 0 - default_bb = state_fun.append_basic_block("default") #create default bb - builder = Builder.new(default_bb) # default is reject. - ret = builder.call(reject_state, (next_ptr,)) - builder.ret(ret) - - si = emit.switch(char, default_bb, len(cases)) # create switch instruction with deafult case. - for case, nextFun in cases.iteritems(): - bb = state_fun.append_basic_block("case%d" % label) #create default bb - builder = Builder.new(bb) - ret = builder.call(nextFun, (next_ptr,)) - builder.ret(ret) - si.add_case(case, bb) - label += 1 - - self.emit_driver() - self.mp = ModuleProvider.new(self.llvm_module) - if (self.optimize): self.do_optimize() - self.ee = ExecutionEngine.new(self.mp) - self.compiled = True - - def execute(self): - if not self.compiled: - self.jitcompile() - return self.ee.run_function(self.main, - (GenericValue.int(self.int_t, 0),)) - -def test(): - import doctest - doctest.testmod() - -if __name__ == "__main__": test()
--- a/pyrect/llvm_translator.py Fri Aug 06 20:18:58 2010 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,200 +0,0 @@ -#!/usr/bin/env python - -from llvm.core import * -from llvm.passes import * -from llvm.ee import * -from pyrect.translator import Translator -from pyrect.regexp import Regexp - -class LLVMTranslator(Translator): - """LLVMTranslator - This Class can translate from DFA or NFA into LLVM-IR. - and also can JIT-Compile/evaluate it's self using llvm-py. - >>> string = '(A|B)*C' - >>> reg = Regexp(string) - >>> lt = LLVMTranslator(reg) - >>> lt.debug = True - >>> lt.translate() - >>> lt.execute() - """ - - # define llvm core types, and const - int_t = Type.int(32) - char_t = Type.int(8) - charptr_t = Type.pointer(char_t) - charptrptr_t = Type.pointer(charptr_t) - const_zero = Constant.int(int_t, 0) - const_one = Constant.int(int_t, 1) - llvm.GuaranteedTailCallOpt = True - - def __init__(self, regexp): - Translator.__init__(self, regexp) - self.optimize = False - self.debug = False - self.string = "ABC" - self.llvm_module = Module.new(self.cg.type) - self.compiled = False - - def emit_driver(self): - main = self.llvm_module.add_function( - Type.function(self.int_t, (self.int_t,)), "unitmain") - main.args[0].name = "index" - main_entry = main.append_basic_block("entry") - - emit = Builder.new(main_entry) - start = self.llvm_module.get_function_named(self.cg.start) - ret = emit.call(start, (main.args[0],)) - emit.ret(ret) - self.main = main - - def jitcompile(self): - self.matchp_str = self.new_str_const(self.string) - self.debug_str = self.new_str_const("state: %s, arg: %c(int %d)\n") - - def optional_func_decl(fun): - fun.calling_convertion = CC_X86_FASTCALL - fun.args[0].name = "index" - - def func_decl(state): - optional_func_decl(state) - - state_ref = dict() - - # Create function - accept and reject (final state). - accept_state = self.llvm_module.add_function( - Type.function(self.int_t, (self.int_t,)), "accept") - optional_func_decl(accept_state) - reject_state = self.llvm_module.add_function( - Type.function(self.int_t, (self.int_t,)), "reject") - optional_func_decl(reject_state) - - state_ref["accept"] = accept_state - state_ref["reject"] = reject_state - - # add state to module, (as function or label). - for state in self.cg.map.iterkeys(): - fun = self.llvm_module.add_function( - Type.function(self.int_t, (self.int_t,)), state) - optional_func_decl(fun) - state_ref[state] = fun - - # emit instructions - emit = Builder.new(accept_state.append_basic_block("entry")) - if self.debug: self.emit_call_printf(emit, "%s does match regexp\n", self.gep_first(emit, self.matchp_str)) - emit.ret(self.const_one) - - emit = Builder.new(reject_state.append_basic_block("entry")) - if self.debug: self.emit_call_printf(emit, "%s does not match regexp\n", self.gep_first(emit, self.matchp_str)) - emit.ret(self.const_zero) - - for state, transition in self.cg.map.iteritems(): - cases = dict() - if state in self.cg.accepts: - transition['\\0'] = ["accept"] - for case, next_states in transition.iteritems(): - cases[self.char_const(case)] = state_ref[next_states[0]] - state_fun = state_ref[state] - emit = Builder.new(state_fun.append_basic_block("entry")) - ptr = emit.gep(self.matchp_str, (self.const_zero, state_fun.args[0])) - next_index = emit.add(state_fun.args[0], self.const_one) - char = emit.load(ptr) - - if (self.debug): self.emit_call_printf(emit, self.debug_str, self.gep_first(emit, self.new_str_const(fun.name)), char, char) - - label = 0 - default_bb = state_fun.append_basic_block("default") #create default bb - builder = Builder.new(default_bb) # default is reject. - ret = builder.call(reject_state, (next_index,)) - builder.ret(ret) - - si = emit.switch(char, default_bb, len(cases)) # create switch instruction with deafult case. - for case, nextFun in cases.iteritems(): - bb = state_fun.append_basic_block("case%d" % label) #create default bb - builder = Builder.new(bb) - ret = builder.call(nextFun, (next_index,)) - builder.ret(ret) - si.add_case(case, bb) - label += 1 - - self.mp = ModuleProvider.new(self.llvm_module) - if (self.optimize): self.do_optimize() - self.ee = ExecutionEngine.new(self.mp) - self.emit_driver() - self.compiled = True - - def emit_from_callgraph(self): - if not self.compiled: - self.jitcompile() - self.emit(str(self.llvm_module)) - def get_execution_engine(self): - if not self.compiled: - self.jitcompile() - return self.ee - - def do_optimize(self): - #optimization passes - pm = PassManager.new() - pm.add(TargetData.new('')) - pm.add(PASS_FUNCTION_INLINING) - pm.run(self.llvm_module) - fp = FunctionPassManager.new(self.mp) - fp.add(TargetData.new('')) - fp.add(PASS_BLOCK_PLACEMENT) - fp.add(PASS_INSTRUCTION_COMBINING) - fp.add(PASS_TAIL_CALL_ELIMINATION) - fp.add(PASS_AGGRESSIVE_DCE) - fp.add(PASS_DEAD_INST_ELIMINATION) - fp.add(PASS_DEAD_CODE_ELIMINATION) - for fun in self.llvm_module.functions: - fp.run(fun) - - def print_module(self): - if not self.compiled: - self.jitcompile() - print self.llvm_module - - def execute(self): - if not self.compiled: - self.jitcompile() - self.ee.run_function(self.main, - (GenericValue.int(self.int_t, 0),)) - return - - def new_str_const(self, val): - '''create string(array of int) as a global value ''' - str = self.llvm_module.add_global_variable(Type.array(self.char_t, len(val) + 1), "") - str.initializer = Constant.stringz(val) - return str - - def gep_first(self, emit, val): - '''get pointer of array''' - return emit.gep(val, (self.const_zero, self.const_zero)) - - def char_const(self, val): - '''create constant int value''' - if isinstance(val, str): - if val == '\\0': - return Constant.int(self.char_t, 0) - else: - return Constant.int(self.char_t, ord(val)) - else: - exit('char_const: invalid argument.', val) - - def emit_call_printf(self, emit, string, *args): - '''emit libc printf function call instruction''' - try: - printf = self.llvm_module.get_function_named("printf") - except llvm.LLVMException: - printf = self.llvm_module.add_function( - Type.function(Type.void(), - (Type.pointer(self.char_t, 0),), 1), "printf") - if isinstance(string, str): - string = self.new_str_const(string) - emit.call(printf, - [self.gep_first(emit, string)]+list(args)) - -def test(): - import doctest - doctest.testmod() - -if __name__ == "__main__": test()
--- a/pyrect/regexp/ast.py Fri Aug 06 20:18:58 2010 +0900 +++ b/pyrect/regexp/ast.py Sun Aug 08 04:13:14 2010 +0900 @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- encoding: utf-8 -*- """ General-Node-set. Parser create AST (be composed of Nodes) from Regexp. @@ -13,41 +14,30 @@ class Node(object): def __init__(self): pass + def __str__(self): return str(self.__class__) + + def __repr__(self): + return "("+self.__class__.__name__+":"+str(self)+")" + def accept(self, visitor): visit = "visit_%s" % self.__class__.__name__ return getattr(visitor, visit, visitor.visit)(self) -class Character(Node): - def __init__(self, char): - self.char = char - - def __str__(self): - return "'" + self.char + "'" +""" +NFA basic elements. +Concat, Union, Star, Qmark, Plus +""" class Concat(Node): def __init__(self, op1, op2): self.op1 = op1 self.op2 = op2 - def concat(self, key1, key2): - if isinstance(key1, str) and isinstance(key2, str): - return key1 + key2 - elif isinstance(key1, str) and isinstance(key2, list): - if key2[0]: - key2[0] = key1 + key2[0] - else: - key2 = [key1] + key2 - return key2 - elif isinstance(key1, list) and isinstance(key2, str): - if key1[-1]: - key1[-1] = key1[-1] + key2 - else: - key1 = key1 + [key2] - return key1 - else: - key1 + key2 + def __repr__(self): + return self.__class__.__name__ + "(%s.%s)" \ + % (self.op1.__repr__(), self.op2.__repr__()) def __str__(self): return "(%s.%s)" % (self.op1, self.op2) @@ -57,6 +47,10 @@ self.op1 = op1 self.op2 = op2 + def __repr__(self): + return "(Union:(%s|%s))" % \ + (self.op1.__repr__(), self.op2.__repr__()) + def __str__(self): return "(%s|%s)" % (self.op1, self.op2) @@ -80,3 +74,114 @@ def __str__(self): return "(%s)+" % self.op + +""" +following Nodes are'nt convert NFA/DFA's each state, +InputNode remains as input which is decided at matching. +""" + +""" +basic elements. +Character, MBCharacter +""" + +class Singleton(type): + def __new__(self, name, bases, dict): + dict['instances'] = {} + return type.__new__(self, name, bases, dict) + + def __call__(self, *args): + if not args in self.instances: + self.instances[args] = type.__call__(self, *args) + return self.instances[args] + +class InputNode(Node): + __metaclass__ = Singleton + + def __add__(self, other): + return FixedString(self, other) + + def __hash__(self): + return id(self) + + def __cmp__(self, other): + if self.__hash__() == other.__hash__(): + return 0 + elif self.__hash__() > other.__hash__(): + return 1 + else: + return -1 + +class Character(InputNode): + def __init__(self, char): + self.char = char + + def __str__(self): + return "'" + self.char + "'" + +class MBCharacter(Character): + def __init__(self, mbchar): + Character.__init__(self, unicode(mbchar)) + + def __str__(self): + return "'" + self.char.encode('utf-8') + "'" + +class EscapeCharacter(Character): + def __init__(self, char): + Character.__init__(self, char) + +class FixedString(InputNode): + def __init__(self, char): + self.string = list() + + def appfront(self, input_): + self.string.insert(0, input_) + return self + +""" +Anchor, is Special-Input rules to match specify text position. +BegLine, EndLine, +""" + +class Anchor(InputNode): + pass + +class BegLine(Anchor): + def __str__(self): + return "^" + +class EndLine(Anchor): + def __str__(self): + return "$" + +""" +other Special Inputs. +AnyChar, CharClass +""" + +class AnyChar(InputNode): + def __str__(self): + return "." + +class CharClass(InputNode): + def __init__(self, factor, inverse=False): + self.inverse = inverse + self.factor = factor + + def __repr__(self): + return self.__class__.__name__+"[%s]" \ + % ",".join((s.__repr__() for s in self.factor)) + + def __str__(self): + if self.inverse: + return "[^%s]" % "".join(map(str, self.factor)) + else: + return "[%s]" % "".join(map(str, self.factor)) + +class Range(InputNode): + def __init__(self, lower, upper): + self.lower = lower + self.upper = upper + + def __str__(self): + return "%s-%s" % (self.upper, self.lower)
--- a/pyrect/regexp/callgraph.py Fri Aug 06 20:18:58 2010 +0900 +++ b/pyrect/regexp/callgraph.py Sun Aug 08 04:13:14 2010 +0900 @@ -1,4 +1,5 @@ #!/Usr/bin/env python +# -*- encoding: utf-8 -*- import copy from pyrect.regexp.parser import Parser @@ -6,34 +7,15 @@ from pyrect.regexp.dfa_translator import DFATranslator from pyrect.regexp.dfa import DFA from pyrect.regexp.nfa import NFA +from pyrect.regexp.ast import Character # create call graph (as dictionary) class CallGraph(object): """CallGraph is State Transition Diagram. it's can be create from DFA or DFA. >>> prs = Parser() - >>> reg = \"AA*|B\" - >>> nfa = NFATranslator().translate(prs.parse(reg)) - >>> dfa = DFATranslator().translate(prs.parse(reg)) - >>> dfacg = CallGraph(dfa) - >>> nfacg = CallGraph(nfa) - >>> dfacg.map - {'1': {'A': '1'}, '0': {}, '3': {'A': '1'}, '2': {'A': '3', 'B': '0'}} - >>> dfacg.states - ['0', '1', '2', '3'] - >>> dfacg.accepts - ['0', '1', '3'] - >>> nfacg.map - {'1': {'': ['4']}, '0': {'A': ['1']}, '3': {'': ['2']}, '2': {'A': ['3']}, '5': {'B': ['6']}, '4': {'': ['2']}, '7': {'': ['0', '5']}, '6': {}} - >>> nfacg.states - ['0', '1', '2', '3', '4', '5', '6', '7'] - >>> nfacg.accepts - ['3', '4', '6'] - >>> cg = CallGraph(DFATranslator().translate(prs.parse('(argc|argv|hoge)'))) - >>> cg.combine() - >>> cg.start - >>> cg.map - >>> cg.accepts + >>> dfat = DFATranslator() + >>> cg = CallGraph(dfat.translate(prs.parse('(argv|argc|args|あれ|これ)'))) """ def __init__(self, fa): @@ -55,7 +37,8 @@ for state, value in self.map.iteritems(): if len(value) == 1: input_, next_ = value.items()[0] - if state != next_ and not state in self.accepts and state != self.start: + if state != next_ and not state in self.accepts\ + and state != self.start and isinstance(input_, Character): com[state] = [input_, next_, set()] for state in com.iterkeys(): @@ -68,7 +51,6 @@ def combine_(): recursion = False for state, value in com.iteritems(): - input_, state if value[1] in com: com_next = com[value[1]] com[state] = [value[0]+com_next[0], com_next[1], value[2]] @@ -86,7 +68,7 @@ for refer in value[2]: for input_ , next_ in self.map[refer].iteritems(): if key == next_: - self.map[refer][input_+value[0]] = value[1] + self.map[refer][input_ + value[0]] = value[1] self.map[refer].pop(input_) self.map.pop(key) self.states.remove(key)
--- a/pyrect/regexp/dfa_translator.py Fri Aug 06 20:18:58 2010 +0900 +++ b/pyrect/regexp/dfa_translator.py Sun Aug 08 04:13:14 2010 +0900 @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- encoding: utf-8 -*- from pyrect.regexp.parser import Parser from pyrect.regexp.ast import ASTWalker @@ -11,18 +12,12 @@ actually DFATranslator use NFATranslator and it convert NFA into DFA. >>> prs = Parser() >>> dfat = DFATranslator() - >>> dfat.translate(prs.parse('ABC')).map - {(2, 'A'): 3, (0, 'C'): 1, (3, 'B'): 0} - >>> dfat.translate(prs.parse('A|B')).map - {(1, 'A'): 2, (1, 'B'): 0} >>> dfat.translate(prs.parse('(A|B)*C')).map - {(1, 'C'): 2, (3, 'C'): 2, (0, 'A'): 1, (3, 'B'): 0, (3, 'A'): 1, (1, 'B'): 0, (1, 'A'): 1, (0, 'B'): 0, (0, 'C'): 2} - >>> dfat.translate(prs.parse('(A|B)*C')).start - 3 + {(0, (Character:'C')): 1, (0, (Character:'A')): 0, (0, (Character:'B')): 0} >>> dfat.translate(prs.parse('(A|B)*C')).accepts - [2] - >>> dfat.translate(prs.parse('(A|B)*C')).states - [0, 3, 1, 2] + [1] + >>> dfat.translate(prs.parse('ほ?げ+')).map + {(2, (MBCharacter:'げ')): 2, (0, (MBCharacter:'げ')): 2, (0, (MBCharacter:'ほ')): 1, (1, (MBCharacter:'げ')): 2} """ def __init__(self):
--- a/pyrect/regexp/lexer.py Fri Aug 06 20:18:58 2010 +0900 +++ b/pyrect/regexp/lexer.py Sun Aug 08 04:13:14 2010 +0900 @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- encoding: utf-8 -*- from ply import lex @@ -7,6 +8,14 @@ 'STAR', 'LPAREN', 'RPAREN', + 'LBRACKET', + 'RBRACKET', + 'CARET', + 'DOLLAR', + 'NORMALCHAR', + 'ESCAPECHAR', + 'MBCHAR', + 'DASH', 'ANYCHAR', 'PLUS', 'QMARK' @@ -36,9 +45,42 @@ ur'\)' return t +def t_DASH(t): + ur'-' + return t + +def t_CARET(t): + ur'\^' + return t + +def t_DOLLAR(t): + ur'\$' + return t + +def t_RBRACKET(t): + ur'\[' + return t + +def t_LBRACKET(t): + ur'\]' + return t + def t_ANYCHAR(t): - ur'\\?(.)' - t.value = t.value[-1:] + ur'\.' + return t + +def t_ESCAPECHAR(t): + ur'\\[ -~]' + t.value = t.value[-1] + return t + +def t_NORMALCHAR(t): + ur'[ -~]' # match ascii + t.value + return t + +def t_MBCHAR(t): + ur'[^ -~]+' # match multi byte code. return t def t_error(t):
--- a/pyrect/regexp/nfa.py Fri Aug 06 20:18:58 2010 +0900 +++ b/pyrect/regexp/nfa.py Sun Aug 08 04:13:14 2010 +0900 @@ -7,13 +7,21 @@ class NFAFragment(object): def __init__(self): self.start = None # integer - self.accepts = None # frozenset + self.accepts = set() # frozenset self.map = dict() # transition, char or special -> frozenset([states]) def connect(self, from_, char, to): slot = self.map.setdefault((from_, char), set()) slot.add(to) + def accept_to(self, from_, char): + self.accepts.add((from_, char)) + + def set_accept(self, to): + for from_, char in self.accepts: + self.connect(from_, char, to) + self.accepts = set() + def new_skelton(self): # copy fragment (self), and it return newFrag = NFAFragment();
--- a/pyrect/regexp/nfa_translator.py Fri Aug 06 20:18:58 2010 +0900 +++ b/pyrect/regexp/nfa_translator.py Sun Aug 08 04:13:14 2010 +0900 @@ -1,7 +1,8 @@ #!/usr/bin/env python +# -*- encoding: utf-8 -*- from pyrect.regexp.parser import Parser -from pyrect.regexp.ast import ASTWalker +from pyrect.regexp.ast import * from pyrect.regexp.nfa import * class NFATranslator(ASTWalker): @@ -9,16 +10,11 @@ AST (ast), is represented by Node-Tree. >>> prs = Parser() >>> nfat = NFATranslator() - >>> nfat.translate(prs.parse('ABC')).map - {(3, ''): set([4]), (4, 'C'): set([5]), (1, ''): set([2]), (0, 'A'): set([1]), (2, 'B'): set([3])} - >>> nfat.translate(prs.parse('A|B')).map - {(0, 'A'): set([1]), (2, 'B'): set([3]), (4, ''): set([0, 2])} >>> nfat.translate(prs.parse('(A|B)*C')).map - {(0, 'A'): set([1]), (3, ''): set([4, 6]), (6, 'C'): set([7]), (2, 'B'): set([3]), (5, ''): set([4, 6]), (1, ''): set([4, 6]), (4, ''): set([0, 2])} + {(3, ''): set([2, 4]), (2, ''): set([0, 1]), (1, (Character:'B')): set([3]), (0, (Character:'A')): set([3]), (4, (Character:'C')): set([5])} >>> nfat.translate(prs.parse('(A|B)*C')).accepts - [7] - >>> nfat.translate(prs.parse('(AB|CD)?E+')).map - {(9, ''): set([8, 12]), (0, 'A'): set([1]), (7, ''): set([12]), (10, 'E'): set([11]), (12, ''): set([10]), (3, ''): set([12]), (8, ''): set([0, 4]), (11, ''): set([10]), (2, 'B'): set([3]), (5, ''): set([6]), (4, 'C'): set([5]), (1, ''): set([2]), (6, 'D'): set([7])} + [5] + >>> nfat.translate(prs.parse('ほ?げ+')).map """ def __init__(self): @@ -36,23 +32,32 @@ self.ast = ast if self.ast: self.__state_id = -1 - self.nfa = ast.accept(self).build() + frag = ast.accept(self) + frag.set_accept(self.state_id) + self.nfa = frag.build() self.nfa.states = range(self.__state_id + 1) - self.nfa.accepts = list(self.nfa.accepts) + self.nfa.accepts = [self.__state_id] return self.nfa - def visit(self, ast): - return None - - def visit_Character(self, character): + def visit(self, input_node): frag = NFAFragment() - s1 = self.state_id - s2 = self.state_id - frag.connect(s1, character.char, s2) - frag.start = s1 - frag.accepts = frozenset([s2]) + s = self.state_id + frag.start = s + frag.accept_to(s, input_node) return frag + def visit_CharClass(self, charclass): + # CharClass be converted Union in NFA + factor = list(charclass.factor) + if len(factor) == 1: + return factor[0].accept(self) + + union = factor[0] + for op in factor[1:]: + union = Union(union, op) + + return union.accept(self) + def visit_Union(self, union): frag1 = union.op1.accept(self) frag2 = union.op2.accept(self) @@ -68,49 +73,42 @@ frag1 = concat.op1.accept(self) frag2 = concat.op2.accept(self) + frag1.set_accept(frag2.start) frag = frag1 | frag2 - for state in frag1.accepts: - frag.connect(state, '', frag2.start) - frag.start = frag1.start frag.accepts = frag2.accepts return frag def visit_Star(self, star): - fragOrig = star.op.accept(self) - frag = fragOrig.new_skelton() - - for state in fragOrig.accepts: - frag.connect(state, '', fragOrig.start) + frag = star.op.accept(self) s = self.state_id - frag.connect(s, '', fragOrig.start) + frag.connect(s, '', frag.start) + frag.set_accept(s) + frag.accept_to(s, '') frag.start = s - frag.accepts = fragOrig.accepts | frozenset([s]) + return frag def visit_Qmark(self, qmark): - fragOrig = qmark.op.accept(self) - frag = fragOrig.new_skelton() + frag = qmark.op.accept(self) s = self.state_id - frag.connect(s, '', fragOrig.start) + frag.connect(s, '', frag.start) + frag.accept_to(s, '') frag.start = s - frag.accepts = fragOrig.accepts | frozenset([s]) + return frag def visit_Plus(self, plus): - fragOrig = plus.op.accept(self) - frag = fragOrig.new_skelton() - - for state in fragOrig.accepts: - frag.connect(state, '', fragOrig.start) + frag = plus.op.accept(self) s = self.state_id - frag.connect(s, '', fragOrig.start) - frag.start = s - frag.accepts = fragOrig.accepts + frag.set_accept(s) + frag.connect(s, '', frag.start) + frag.accept_to(s, '') + return frag def test():
--- a/pyrect/regexp/parser.py Fri Aug 06 20:18:58 2010 +0900 +++ b/pyrect/regexp/parser.py Sun Aug 08 04:13:14 2010 +0900 @@ -1,7 +1,9 @@ #!/usr/bin/env python +# -*- encoding: utf-8 -*- from ply import yacc import os +import unicodedata from pyrect.regexp.lexer import lex, tokens from pyrect.regexp.ast import * @@ -13,27 +15,52 @@ >>> ast = parser.parse('(AB|CD)*123') >>> print ast (((((('A'.'B')|('C'.'D')))*.'1').'2').'3') - >>> ast = parser.parse('(A|B)?C+') + >>> ast = parser.parse('^\^(A|B)?C+$') >>> print ast - ((('A'|'B'))?.('C')+) + ((((^.'^').(('A'|'B'))?).('C')+).$) + + multi byte も OK!! + >>> parser.parse('aあいc?') + Concat(Concat((Character:'a').Concat((MBCharacter:'あ').(MBCharacter:'い'))).(Qmark:('c')?)) + >>> parser.parse('[123A-Zあ]') + CharClass[(Character:'1'),(Character:'2'),(Range:'Z'-'A'),(Character:'3'),(MBCharacter:'あ')] """ BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + @staticmethod + def mbparse(uni_bytes): + mbchars = list() + + for mbchar in uni_bytes: + mbchars.append(MBCharacter(mbchar)) + + ret = mbchars[0] + + for mbchar in mbchars[1:]: + ret = Concat(ret, mbchar) + + return ret + def __init__(self): self.yacc = yacc.yacc(outputdir=self.BASE_DIR, debug=False) self.lexer = lex def parse(self, expression): - self.lexer.input(expression) + self.lexer.input(unicode(expression, 'utf-8')) self.ast = self.yacc.parse(lexer=self.lexer) return self.ast """Parse following language ---------------------------------------- -regexp -> regexp UNION branch | branch -branch -> branch closure | closure -closure -> closure STAR | closure QMARK | closure PLUS | atom -atom -> LPAREN regexp RPAREN | ANYCHAR +regexp -> regexp UNION branch | branch +branch -> branch closure | closure +closure -> closure STAR | closure QMARK | closure PLUS | atom +atom -> LPAREN regexp RPAREN | LBRACKET charclass RBRACKET + | ANYCHAR | NORMALCHAR | CARET | DOLLAR | MBCHAR | ESCAPECHAR +charclass -> charclass cclass | cclass +cclass -> cset DASH cset | cset +cset -> NORMALCHAR | LPAREN | RPAREN | ANYCHAR | CARET | DOLLAR + | PLUS | QMARK | STAR | DASH | MBCHAR old parse rule (A) expression -> subexpr EOF @@ -120,9 +147,71 @@ p[0] = p[2] def p_atom2(p): + '''atom : NORMALCHAR + | DASH''' + p[0] = Character(p[1]) + +def p_atom3(p): 'atom : ANYCHAR' + p[0] = AnyChar() + +def p_atom4(p): + '''atom : RBRACKET charclass LBRACKET + | RBRACKET CARET charclass LBRACKET''' + if p[2] == '^': + p[0] = CharClass(p[3], inverse=True) + else: + p[0] = CharClass(p[2]) + +def p_atom5(p): + 'atom : CARET' + p[0] = BegLine() + +def p_atom6(p): + 'atom : DOLLAR' + p[0] = EndLine() + +def p_atom7(p): + 'atom : MBCHAR' + p[0] = Parser.mbparse(p[1]) + +def p_atom8(p): + 'atom : ESCAPECHAR' + p[0] = EscapeCharacter(p[1]) + +def p_charclass2(p): + 'charclass : charclass cclass' + p[0] = p[1].union(p[2]) + +def p_charclass1(p): + 'charclass : cclass' + p[0] = p[1] + +def p_cclass1(p): + 'cclass : cset DASH cset' + p[0] = frozenset([Range(p[1], p[3])]) + +def p_cclass2(p): + 'cclass : cset' + p[0] = frozenset([p[1]]) + +def p_cset1(p): + '''cset : NORMALCHAR + | LPAREN + | RPAREN + | ANYCHAR + | CARET + | DOLLAR + | PLUS + | QMARK + | STAR + | DASH''' p[0] = Character(p[1]) +def p_cset2(p): + 'cset : MBCHAR' + p[0] = MBCharacter(p[1]) + def p_error(p): raise Exception("syntax error")
--- a/pyrect/template/grep.c Fri Aug 06 20:18:58 2010 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,50 +0,0 @@ -/* Excerpted from 'The Practice of Programming' */ -/* by Brian W. Kernighan and Rob Pike */ - -int grep(char * regexp, FILE *f, char *name) { - int n, nmatch; - char buf[LINEBUFSIZE]; - nmatch = 0; - while (fgets(buf, sizeof buf, f) != NULL) { - n = strlen(buf); - if (n > 0 && buf[n-1] == '\n') - buf[n-1] = '\0'; - if (match(regexp, buf)) { - nmatch++; - if (name != NULL) - printf("%s:", name); - printf("%s\n", buf); - } - } - return nmatch; -} - -int grepmain(int argc, char* argv[]) { - int i, nmatch; - FILE *f; - - if (argc < 2) { - fprintf(stderr, "usage: grep regexp [file ...]"); - exit(0); - } - nmatch = 0; - if (argc == 2) { - if (grep(argv[1], stdin, NULL)) - nmatch; - } else { - for (i = 2; i < argc; i++) { - f = fopen(argv[i], "r"); - if (f == NULL) { - fprintf(stderr, "can't open %s:", argv[i]); - continue; - } - if (READBUFSIZE > 0) - setvbuf(f, readbuf, _IOFBF, READBUFSIZE); - if (grep(argv[1], f, argc > 3 ? argv[i] : NULL) > 0) - nmatch++; - fclose(f); - } - } - - return nmatch; -}
--- a/pyrect/template/grep.cbc Fri Aug 06 20:18:58 2010 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,35 +0,0 @@ -void grep(char* s, char *cur, char *buf, FILE *f, char* filename) { - goto next_line(s, cur, buf, f, filename); - return; -} - -void grepmain(int argc, char* argv[]) { - int i; - char buf[LINEBUFSIZE]; - char readbuf[READBUFSIZE]; - char *filename; - FILE *f; - - if (argc < 2) { - fprintf(stderr, "usage: grep regexp [file ...]"); - exit(0); - } - if (argc == 2) { - grep(buf, buf, buf, stdin, NULL); - } else { - for (i = 2; i < argc; i++) { - filename = argv[i]; - f = fopen(filename, "r"); - if (f == NULL) { - fprintf(stderr, "can't open %s:", filename); - continue; - } - if (READBUFSIZE > 0) - setvbuf(f, readbuf, _IOFBF, READBUFSIZE); - grep(buf, buf, buf, f, filename); - fclose(f); - } - } - - return; -}
--- a/pyrect/template/grep.label Fri Aug 06 20:18:58 2010 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,44 +0,0 @@ -/* Excerpted from 'The Practice of Programming' */ -/* by Brian W. Kernighan and Rob Pike */ - -int grep(char * regexp, FILE *f, char *name) { - int n; - char buf[LINEBUFSIZE]; - while (fgets(buf, sizeof buf, f) != NULL) { - n = strlen(buf); - if (n > 0 && buf[n-1] == '\n') - buf[n-1] = '\0'; - DFA(buf, buf, name); - } - return 1; -} - -int grepmain(int argc, char* argv[]) { - int i, nmatch; - FILE *f; - - if (argc < 2) { - fprintf(stderr, "usage: grep regexp [file ...]"); - exit(0); - } - nmatch = 0; - if (argc == 2) { - if (grep(argv[1], stdin, NULL)) - nmatch; - } else { - for (i = 2; i < argc; i++) { - f = fopen(argv[i], "r"); - if (f == NULL) { - fprintf(stderr, "can't open %s:", argv[i]); - continue; - } - if (READBUFSIZE > 0) - setvbuf(f, readbuf, _IOFBF, READBUFSIZE); - if (grep(argv[1], f, argc > 3 ? argv[i] : NULL) > 0) - nmatch++; - fclose(f); - } - } - - return nmatch; -}
--- a/pyrect/template/grep.ll Fri Aug 06 20:18:58 2010 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,377 +0,0 @@ -; ModuleID = 'template/grep.ll.c' -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin9" - %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } - %struct.__sFILEX = type opaque - %struct.__sbuf = type { i8*, i32 } -@"\01LC" = internal constant [4 x i8] c"%s:\00" ; <[4 x i8]*> [#uses=1] -@"\01LC1" = internal constant [2 x i8] c"r\00" ; <[2 x i8]*> [#uses=1] -@__stderrp = external global %struct.FILE* ; <%struct.FILE**> [#uses=4] -@"\01LC2" = internal constant [15 x i8] c"can't open %s:\00" ; <[15 x i8]*> [#uses=1] -@readbuf = common global [1048576 x i8] zeroinitializer, align 32 ; <[1048576 x i8]*> [#uses=1] -@"\01LC3" = internal constant [30 x i8] c"usage: grep regexp [file ...]\00" ; <[30 x i8]*> [#uses=1] -@__stdinp = external global %struct.FILE* ; <%struct.FILE**> [#uses=1] - -define i32 @match(i8* %regexp, i8* %text) nounwind { -entry: - %regexp_addr = alloca i8* ; <i8**> [#uses=2] - %text_addr = alloca i8* ; <i8**> [#uses=6] - %retval = alloca i32 ; <i32*> [#uses=2] - alloca i32 ; <i32*>:0 [#uses=4] - %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] - store i8* %regexp, i8** %regexp_addr - store i8* %text, i8** %text_addr - load i8** %regexp_addr, align 4 ; <i8*>:1 [#uses=1] - getelementptr i8* %1, i32 0 ; <i8*>:2 [#uses=1] - load i8* %2, align 1 ; <i8>:3 [#uses=1] - icmp eq i8 %3, 94 ; <i1>:4 [#uses=1] - zext i1 %4 to i8 ; <i8>:5 [#uses=1] - %toBool = icmp ne i8 %5, 0 ; <i1> [#uses=1] - br i1 %toBool, label %bb, label %bb1 - -bb: ; preds = %entry - load i8** %text_addr, align 4 ; <i8*>:6 [#uses=1] - call i32 @DFA( i8* %6 ) nounwind ; <i32>:7 [#uses=1] - store i32 %7, i32* %0, align 4 - br label %bb7 - -bb1: ; preds = %bb4, %entry - load i8** %text_addr, align 4 ; <i8*>:8 [#uses=1] - call i32 @DFA( i8* %8 ) nounwind ; <i32>:9 [#uses=1] - icmp ne i32 %9, 0 ; <i1>:10 [#uses=1] - zext i1 %10 to i8 ; <i8>:11 [#uses=1] - %toBool2 = icmp ne i8 %11, 0 ; <i1> [#uses=1] - br i1 %toBool2, label %bb3, label %bb4 - -bb3: ; preds = %bb1 - store i32 1, i32* %0, align 4 - br label %bb7 - -bb4: ; preds = %bb1 - load i8** %text_addr, align 4 ; <i8*>:12 [#uses=1] - load i8* %12, align 1 ; <i8>:13 [#uses=1] - icmp ne i8 %13, 0 ; <i1>:14 [#uses=1] - zext i1 %14 to i8 ; <i8>:15 [#uses=1] - load i8** %text_addr, align 4 ; <i8*>:16 [#uses=1] - getelementptr i8* %16, i64 1 ; <i8*>:17 [#uses=1] - store i8* %17, i8** %text_addr, align 4 - %toBool5 = icmp ne i8 %15, 0 ; <i1> [#uses=1] - br i1 %toBool5, label %bb1, label %bb6 - -bb6: ; preds = %bb4 - store i32 0, i32* %0, align 4 - br label %bb7 - -bb7: ; preds = %bb6, %bb3, %bb - load i32* %0, align 4 ; <i32>:18 [#uses=1] - store i32 %18, i32* %retval, align 4 - br label %return - -return: ; preds = %bb7 - %retval8 = load i32* %retval ; <i32> [#uses=1] - ret i32 %retval8 -} - -declare i32 @DFA(i8*) - -define void @grep(i8* %regexp, %struct.FILE* %f, i8* %name) nounwind { -entry: - %regexp_addr = alloca i8* ; <i8**> [#uses=2] - %f_addr = alloca %struct.FILE* ; <%struct.FILE**> [#uses=2] - %name_addr = alloca i8* ; <i8**> [#uses=3] - %buf = alloca [1024 x i8] ; <[1024 x i8]*> [#uses=6] - %n = alloca i32 ; <i32*> [#uses=4] - %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] - store i8* %regexp, i8** %regexp_addr - store %struct.FILE* %f, %struct.FILE** %f_addr - store i8* %name, i8** %name_addr - br label %bb13 - -bb: ; preds = %bb13 - %buf1 = bitcast [1024 x i8]* %buf to i8* ; <i8*> [#uses=1] - call i32 @strlen( i8* %buf1 ) nounwind readonly ; <i32>:0 [#uses=1] - store i32 %0, i32* %n, align 4 - load i32* %n, align 4 ; <i32>:1 [#uses=1] - icmp sgt i32 %1, 0 ; <i1>:2 [#uses=1] - zext i1 %2 to i8 ; <i8>:3 [#uses=1] - %toBool = icmp ne i8 %3, 0 ; <i1> [#uses=1] - br i1 %toBool, label %bb2, label %bb5 - -bb2: ; preds = %bb - load i32* %n, align 4 ; <i32>:4 [#uses=1] - sub i32 %4, 1 ; <i32>:5 [#uses=1] - getelementptr [1024 x i8]* %buf, i32 0, i32 %5 ; <i8*>:6 [#uses=1] - load i8* %6, align 1 ; <i8>:7 [#uses=1] - icmp eq i8 %7, 10 ; <i1>:8 [#uses=1] - zext i1 %8 to i8 ; <i8>:9 [#uses=1] - %toBool3 = icmp ne i8 %9, 0 ; <i1> [#uses=1] - br i1 %toBool3, label %bb4, label %bb5 - -bb4: ; preds = %bb2 - load i32* %n, align 4 ; <i32>:10 [#uses=1] - sub i32 %10, 1 ; <i32>:11 [#uses=1] - getelementptr [1024 x i8]* %buf, i32 0, i32 %11 ; <i8*>:12 [#uses=1] - store i8 0, i8* %12, align 1 - br label %bb5 - -bb5: ; preds = %bb4, %bb2, %bb - load i8** %regexp_addr, align 4 ; <i8*>:13 [#uses=1] - %buf6 = bitcast [1024 x i8]* %buf to i8* ; <i8*> [#uses=1] - call i32 @match( i8* %13, i8* %buf6 ) nounwind ; <i32>:14 [#uses=1] - icmp ne i32 %14, 0 ; <i1>:15 [#uses=1] - zext i1 %15 to i8 ; <i8>:16 [#uses=1] - %toBool7 = icmp ne i8 %16, 0 ; <i1> [#uses=1] - br i1 %toBool7, label %bb8, label %bb13 - -bb8: ; preds = %bb5 - load i8** %name_addr, align 4 ; <i8*>:17 [#uses=1] - icmp ne i8* %17, null ; <i1>:18 [#uses=1] - zext i1 %18 to i8 ; <i8>:19 [#uses=1] - %toBool9 = icmp ne i8 %19, 0 ; <i1> [#uses=1] - br i1 %toBool9, label %bb10, label %bb11 - -bb10: ; preds = %bb8 - load i8** %name_addr, align 4 ; <i8*>:20 [#uses=1] - call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), i8* %20 ) nounwind ; <i32>:21 [#uses=0] - br label %bb11 - -bb11: ; preds = %bb10, %bb8 - %buf12 = bitcast [1024 x i8]* %buf to i8* ; <i8*> [#uses=1] - call i32 @puts( i8* %buf12 ) nounwind ; <i32>:22 [#uses=0] - br label %bb13 - -bb13: ; preds = %bb11, %bb5, %entry - %buf14 = bitcast [1024 x i8]* %buf to i8* ; <i8*> [#uses=1] - load %struct.FILE** %f_addr, align 4 ; <%struct.FILE*>:23 [#uses=1] - call i8* @fgets( i8* %buf14, i32 1024, %struct.FILE* %23 ) nounwind ; <i8*>:24 [#uses=1] - icmp ne i8* %24, null ; <i1>:25 [#uses=1] - zext i1 %25 to i8 ; <i8>:26 [#uses=1] - %toBool15 = icmp ne i8 %26, 0 ; <i1> [#uses=1] - br i1 %toBool15, label %bb, label %bb16 - -bb16: ; preds = %bb13 - br label %return - -return: ; preds = %bb16 - ret void -} - -declare i32 @strlen(i8*) nounwind readonly - -declare i32 @printf(i8*, ...) nounwind - -declare i32 @puts(i8*) - -declare i8* @fgets(i8*, i32, %struct.FILE*) - -define void @llgrep(i8* %regexp, i8* %filename) nounwind { -entry: - %regexp_addr = alloca i8* ; <i8**> [#uses=2] - %filename_addr = alloca i8* ; <i8**> [#uses=3] - %f = alloca %struct.FILE* ; <%struct.FILE**> [#uses=5] - %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] - store i8* %regexp, i8** %regexp_addr - store i8* %filename, i8** %filename_addr - load i8** %filename_addr, align 4 ; <i8*>:0 [#uses=1] - call %struct.FILE* @fopen( i8* %0, i8* getelementptr ([2 x i8]* @"\01LC1", i32 0, i32 0) ) nounwind ; <%struct.FILE*>:1 [#uses=1] - store %struct.FILE* %1, %struct.FILE** %f, align 4 - load %struct.FILE** %f, align 4 ; <%struct.FILE*>:2 [#uses=1] - icmp eq %struct.FILE* %2, null ; <i1>:3 [#uses=1] - zext i1 %3 to i8 ; <i8>:4 [#uses=1] - %toBool = icmp ne i8 %4, 0 ; <i1> [#uses=1] - br i1 %toBool, label %bb, label %bb1 - -bb: ; preds = %entry - load %struct.FILE** @__stderrp, align 4 ; <%struct.FILE*>:5 [#uses=1] - load i8** %filename_addr, align 4 ; <i8*>:6 [#uses=1] - call i32 (%struct.FILE*, i8*, ...)* @fprintf( %struct.FILE* %5, i8* getelementptr ([15 x i8]* @"\01LC2", i32 0, i32 0), i8* %6 ) nounwind ; <i32>:7 [#uses=0] - br label %bb1 - -bb1: ; preds = %bb, %entry - load %struct.FILE** %f, align 4 ; <%struct.FILE*>:8 [#uses=1] - call i32 @setvbuf( %struct.FILE* %8, i8* getelementptr ([1048576 x i8]* @readbuf, i32 0, i32 0), i32 0, i32 1048576 ) nounwind ; <i32>:9 [#uses=0] - load i8** %regexp_addr, align 4 ; <i8*>:10 [#uses=1] - load %struct.FILE** %f, align 4 ; <%struct.FILE*>:11 [#uses=1] - call void @grep( i8* %10, %struct.FILE* %11, i8* null ) nounwind - load %struct.FILE** %f, align 4 ; <%struct.FILE*>:12 [#uses=1] - call i32 @fclose( %struct.FILE* %12 ) nounwind ; <i32>:13 [#uses=0] - br label %return - -return: ; preds = %bb1 - ret void -} - -declare %struct.FILE* @fopen(i8*, i8*) - -declare i32 @fprintf(%struct.FILE*, i8*, ...) nounwind - -declare i32 @setvbuf(%struct.FILE*, i8*, i32, i32) - -declare i32 @fclose(%struct.FILE*) - -define void @llgrep_with_name(i8* %regexp, i8* %filename) nounwind { -entry: - %regexp_addr = alloca i8* ; <i8**> [#uses=2] - %filename_addr = alloca i8* ; <i8**> [#uses=4] - %f = alloca %struct.FILE* ; <%struct.FILE**> [#uses=5] - %nmatch = alloca i32 ; <i32*> [#uses=1] - %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] - store i8* %regexp, i8** %regexp_addr - store i8* %filename, i8** %filename_addr - store i32 0, i32* %nmatch, align 4 - load i8** %filename_addr, align 4 ; <i8*>:0 [#uses=1] - call %struct.FILE* @fopen( i8* %0, i8* getelementptr ([2 x i8]* @"\01LC1", i32 0, i32 0) ) nounwind ; <%struct.FILE*>:1 [#uses=1] - store %struct.FILE* %1, %struct.FILE** %f, align 4 - load %struct.FILE** %f, align 4 ; <%struct.FILE*>:2 [#uses=1] - icmp eq %struct.FILE* %2, null ; <i1>:3 [#uses=1] - zext i1 %3 to i8 ; <i8>:4 [#uses=1] - %toBool = icmp ne i8 %4, 0 ; <i1> [#uses=1] - br i1 %toBool, label %bb, label %bb1 - -bb: ; preds = %entry - load %struct.FILE** @__stderrp, align 4 ; <%struct.FILE*>:5 [#uses=1] - load i8** %filename_addr, align 4 ; <i8*>:6 [#uses=1] - call i32 (%struct.FILE*, i8*, ...)* @fprintf( %struct.FILE* %5, i8* getelementptr ([15 x i8]* @"\01LC2", i32 0, i32 0), i8* %6 ) nounwind ; <i32>:7 [#uses=0] - br label %bb1 - -bb1: ; preds = %bb, %entry - load %struct.FILE** %f, align 4 ; <%struct.FILE*>:8 [#uses=1] - call i32 @setvbuf( %struct.FILE* %8, i8* getelementptr ([1048576 x i8]* @readbuf, i32 0, i32 0), i32 0, i32 1048576 ) nounwind ; <i32>:9 [#uses=0] - load i8** %regexp_addr, align 4 ; <i8*>:10 [#uses=1] - load %struct.FILE** %f, align 4 ; <%struct.FILE*>:11 [#uses=1] - load i8** %filename_addr, align 4 ; <i8*>:12 [#uses=1] - call void @grep( i8* %10, %struct.FILE* %11, i8* %12 ) nounwind - load %struct.FILE** %f, align 4 ; <%struct.FILE*>:13 [#uses=1] - call i32 @fclose( %struct.FILE* %13 ) nounwind ; <i32>:14 [#uses=0] - br label %return - -return: ; preds = %bb1 - ret void -} - -define i32 @main(i32 %argc, i8** %argv) nounwind { -entry: - %argc_addr = alloca i32 ; <i32*> [#uses=5] - %argv_addr = alloca i8** ; <i8***> [#uses=6] - %retval = alloca i32 ; <i32*> [#uses=2] - %f = alloca %struct.FILE* ; <%struct.FILE**> [#uses=5] - %i = alloca i32 ; <i32*> [#uses=7] - alloca i32 ; <i32*>:0 [#uses=2] - %iftmp.5 = alloca i8* ; <i8**> [#uses=3] - %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] - store i32 %argc, i32* %argc_addr - store i8** %argv, i8*** %argv_addr - load i32* %argc_addr, align 4 ; <i32>:1 [#uses=1] - icmp sle i32 %1, 1 ; <i1>:2 [#uses=1] - zext i1 %2 to i8 ; <i8>:3 [#uses=1] - %toBool = icmp ne i8 %3, 0 ; <i1> [#uses=1] - br i1 %toBool, label %bb, label %bb1 - -bb: ; preds = %entry - load %struct.FILE** @__stderrp, align 4 ; <%struct.FILE*>:4 [#uses=1] - bitcast %struct.FILE* %4 to i8* ; <i8*>:5 [#uses=1] - call i32 @"\01_fwrite$UNIX2003"( i8* getelementptr ([30 x i8]* @"\01LC3", i32 0, i32 0), i32 1, i32 29, i8* %5 ) nounwind ; <i32>:6 [#uses=0] - call void @exit( i32 0 ) noreturn nounwind - unreachable - -bb1: ; preds = %entry - load i32* %argc_addr, align 4 ; <i32>:7 [#uses=1] - icmp eq i32 %7, 2 ; <i1>:8 [#uses=1] - zext i1 %8 to i8 ; <i8>:9 [#uses=1] - %toBool2 = icmp ne i8 %9, 0 ; <i1> [#uses=1] - br i1 %toBool2, label %bb3, label %bb4 - -bb3: ; preds = %bb1 - load %struct.FILE** @__stdinp, align 4 ; <%struct.FILE*>:10 [#uses=1] - load i8*** %argv_addr, align 4 ; <i8**>:11 [#uses=1] - getelementptr i8** %11, i32 1 ; <i8**>:12 [#uses=1] - load i8** %12, align 4 ; <i8*>:13 [#uses=1] - call void @grep( i8* %13, %struct.FILE* %10, i8* null ) nounwind - br label %bb16 - -bb4: ; preds = %bb1 - store i32 2, i32* %i, align 4 - br label %bb14 - -bb5: ; preds = %bb14 - load i8*** %argv_addr, align 4 ; <i8**>:14 [#uses=1] - load i32* %i, align 4 ; <i32>:15 [#uses=1] - getelementptr i8** %14, i32 %15 ; <i8**>:16 [#uses=1] - load i8** %16, align 4 ; <i8*>:17 [#uses=1] - call %struct.FILE* @fopen( i8* %17, i8* getelementptr ([2 x i8]* @"\01LC1", i32 0, i32 0) ) nounwind ; <%struct.FILE*>:18 [#uses=1] - store %struct.FILE* %18, %struct.FILE** %f, align 4 - load %struct.FILE** %f, align 4 ; <%struct.FILE*>:19 [#uses=1] - icmp eq %struct.FILE* %19, null ; <i1>:20 [#uses=1] - zext i1 %20 to i8 ; <i8>:21 [#uses=1] - %toBool6 = icmp ne i8 %21, 0 ; <i1> [#uses=1] - br i1 %toBool6, label %bb7, label %bb8 - -bb7: ; preds = %bb5 - load i8*** %argv_addr, align 4 ; <i8**>:22 [#uses=1] - load i32* %i, align 4 ; <i32>:23 [#uses=1] - getelementptr i8** %22, i32 %23 ; <i8**>:24 [#uses=1] - load i8** %24, align 4 ; <i8*>:25 [#uses=1] - load %struct.FILE** @__stderrp, align 4 ; <%struct.FILE*>:26 [#uses=1] - call i32 (%struct.FILE*, i8*, ...)* @fprintf( %struct.FILE* %26, i8* getelementptr ([15 x i8]* @"\01LC2", i32 0, i32 0), i8* %25 ) nounwind ; <i32>:27 [#uses=0] - br label %bb13 - -bb8: ; preds = %bb5 - load %struct.FILE** %f, align 4 ; <%struct.FILE*>:28 [#uses=1] - call i32 @setvbuf( %struct.FILE* %28, i8* getelementptr ([1048576 x i8]* @readbuf, i32 0, i32 0), i32 0, i32 1048576 ) nounwind ; <i32>:29 [#uses=0] - load i32* %argc_addr, align 4 ; <i32>:30 [#uses=1] - icmp sgt i32 %30, 3 ; <i1>:31 [#uses=1] - zext i1 %31 to i8 ; <i8>:32 [#uses=1] - %toBool9 = icmp ne i8 %32, 0 ; <i1> [#uses=1] - br i1 %toBool9, label %bb10, label %bb11 - -bb10: ; preds = %bb8 - load i8*** %argv_addr, align 4 ; <i8**>:33 [#uses=1] - load i32* %i, align 4 ; <i32>:34 [#uses=1] - getelementptr i8** %33, i32 %34 ; <i8**>:35 [#uses=1] - load i8** %35, align 4 ; <i8*>:36 [#uses=1] - store i8* %36, i8** %iftmp.5, align 4 - br label %bb12 - -bb11: ; preds = %bb8 - store i8* null, i8** %iftmp.5, align 4 - br label %bb12 - -bb12: ; preds = %bb11, %bb10 - load i8*** %argv_addr, align 4 ; <i8**>:37 [#uses=1] - getelementptr i8** %37, i32 1 ; <i8**>:38 [#uses=1] - load i8** %38, align 4 ; <i8*>:39 [#uses=1] - load %struct.FILE** %f, align 4 ; <%struct.FILE*>:40 [#uses=1] - load i8** %iftmp.5, align 4 ; <i8*>:41 [#uses=1] - call void @grep( i8* %39, %struct.FILE* %40, i8* %41 ) nounwind - load %struct.FILE** %f, align 4 ; <%struct.FILE*>:42 [#uses=1] - call i32 @fclose( %struct.FILE* %42 ) nounwind ; <i32>:43 [#uses=0] - br label %bb13 - -bb13: ; preds = %bb12, %bb7 - load i32* %i, align 4 ; <i32>:44 [#uses=1] - add i32 %44, 1 ; <i32>:45 [#uses=1] - store i32 %45, i32* %i, align 4 - br label %bb14 - -bb14: ; preds = %bb13, %bb4 - load i32* %i, align 4 ; <i32>:46 [#uses=1] - load i32* %argc_addr, align 4 ; <i32>:47 [#uses=1] - icmp slt i32 %46, %47 ; <i1>:48 [#uses=1] - zext i1 %48 to i8 ; <i8>:49 [#uses=1] - %toBool15 = icmp ne i8 %49, 0 ; <i1> [#uses=1] - br i1 %toBool15, label %bb5, label %bb16 - -bb16: ; preds = %bb14, %bb3 - store i32 0, i32* %0, align 4 - load i32* %0, align 4 ; <i32>:50 [#uses=1] - store i32 %50, i32* %retval, align 4 - br label %return - -return: ; preds = %bb16 - %retval17 = load i32* %retval ; <i32> [#uses=1] - ret i32 %retval17 -} - -declare i32 @"\01_fwrite$UNIX2003"(i8*, i32, i32, i8*) - -declare void @exit(i32) noreturn nounwind
--- a/pyrect/template/grep.ll.c Fri Aug 06 20:18:58 2010 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,89 +0,0 @@ -#include <stdio.h> -#include <string.h> -#include <stdlib.h> - -#define LINEBUFSIZE 1024 -#define READBUFSIZE 1024*1024 -char readbuf[READBUFSIZE]; - -extern int DFA(char* text); - -int match(char *regexp, char *text) { - if (regexp[0] == '^') - return DFA(text); - do { - if (DFA(text)) - return 1; - } while (*text++ != '\0'); - return 0; -} - -void grep(char *regexp, FILE *f, char *name) { - int n; - char buf[LINEBUFSIZE]; - while (fgets(buf, sizeof buf, f) != NULL) { - n = strlen(buf); - if (n > 0 && buf[n-1] == '\n') - buf[n-1] = '\0'; - if (match(regexp, buf)) { - if (name != NULL) - printf("%s:", name); - printf("%s\n", buf); - } - } - return; -} - -void llgrep(char *regexp, char* filename) { - FILE *f; - f = fopen(filename, "r"); - if (f == NULL) { - fprintf(stderr, "can't open %s:", filename); - } - if (READBUFSIZE > 0) - setvbuf(f, readbuf, _IOFBF, READBUFSIZE); - grep(regexp, f, NULL); - fclose(f); - return; -} - -void llgrep_with_name(char *regexp, char* filename) { - int nmatch = 0; - FILE *f; - f = fopen(filename, "r"); - if (f == NULL) { - fprintf(stderr, "can't open %s:", filename); - } - if (READBUFSIZE > 0) - setvbuf(f, readbuf, _IOFBF, READBUFSIZE); - grep(regexp, f, filename); - fclose(f); - return; -} - -int main(int argc, char* argv[]) { - int i; - FILE *f; - - if (argc < 2) { - fprintf(stderr, "usage: grep regexp [file ...]"); - exit(0); - } - if (argc == 2) { - grep(argv[1], stdin, NULL); - } else { - for (i = 2; i < argc; i++) { - f = fopen(argv[i], "r"); - if (f == NULL) { - fprintf(stderr, "can't open %s:", argv[i]); - continue; - } - if (READBUFSIZE > 0) - setvbuf(f, readbuf, _IOFBF, READBUFSIZE); - grep(argv[1], f, argc > 3 ? argv[i] : NULL); - fclose(f); - } - } - - return 0; -}
--- a/pyrect/template/grep.ll.c~ Fri Aug 06 20:18:58 2010 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,89 +0,0 @@ -#include <stdio.h> -#include <string.h> -#include <stdlib.h> - -#define LINEBUFSIZE 1024 -#define READBUFSIZE 1024*1024 -char readbuf[READBUFSIZE]; - -extern int DFA(char* text); - -int match(char *regexp, char *text) { - if (regexp[0] == '^') - return DFA(text); - do { - if (DFA(text)) - return 1; - } while (text++ != '\0'); - return 0; -} - -void grep(char *regexp, FILE *f, char *name) { - int n; - char buf[LINEBUFSIZE]; - while (fgets(buf, sizeof buf, f) != NULL) { - n = strlen(buf); - if (n > 0 && buf[n-1] == '\n') - buf[n-1] = '\0'; - if (match(regexp, buf)) { - if (name != NULL) - printf("%s:", name); - printf("%s\n", buf); - } - } - return; -} - -void llgrep(char *regexp, char* filename) { - FILE *f; - f = fopen(filename, "r"); - if (f == NULL) { - fprintf(stderr, "can't open %s:", filename); - } - if (READBUFSIZE > 0) - setvbuf(f, readbuf, _IOFBF, READBUFSIZE); - grep(regexp, f, NULL); - fclose(f); - return; -} - -void llgrep_with_name(char *regexp, char* filename) { - int nmatch = 0; - FILE *f; - f = fopen(filename, "r"); - if (f == NULL) { - fprintf(stderr, "can't open %s:", filename); - } - if (READBUFSIZE > 0) - setvbuf(f, readbuf, _IOFBF, READBUFSIZE); - grep(regexp, f, filename); - fclose(f); - return; -} - -int main(int argc, char* argv[]) { - int i; - FILE *f; - - if (argc < 2) { - fprintf(stderr, "usage: grep regexp [file ...]"); - exit(0); - } - if (argc == 2) { - grep(argv[1], stdin, NULL); - } else { - for (i = 2; i < argc; i++) { - f = fopen(argv[i], "r"); - if (f == NULL) { - fprintf(stderr, "can't open %s:", argv[i]); - continue; - } - if (READBUFSIZE > 0) - setvbuf(f, readbuf, _IOFBF, READBUFSIZE); - grep(argv[1], f, argc > 3 ? argv[i] : NULL); - fclose(f); - } - } - - return 0; -}
--- a/pyrect/template/grep.ll~ Fri Aug 06 20:18:58 2010 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,284 +0,0 @@ -; ModuleID = 'template/grep.ll.c' -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin9" - %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } - %struct.__sFILEX = type opaque - %struct.__sbuf = type { i8*, i32 } -@"\01LC" = internal constant [4 x i8] c"%s:\00" ; <[4 x i8]*> [#uses=1] -@__stderrp = external global %struct.FILE* ; <%struct.FILE**> [#uses=2] -@"\01LC1" = internal constant [30 x i8] c"usage: grep regexp [file ...]\00" ; <[30 x i8]*> [#uses=1] -@__stdinp = external global %struct.FILE* ; <%struct.FILE**> [#uses=1] -@"\01LC2" = internal constant [2 x i8] c"r\00" ; <[2 x i8]*> [#uses=1] -@"\01LC3" = internal constant [15 x i8] c"can't open %s:\00" ; <[15 x i8]*> [#uses=1] -@readbuf = common global [1048576 x i8] zeroinitializer, align 32 ; <[1048576 x i8]*> [#uses=1] - -define i32 @match(i8* %regexp, i8* %text) nounwind { -entry: - %regexp_addr = alloca i8* ; <i8**> [#uses=1] - %text_addr = alloca i8* ; <i8**> [#uses=1] - %retval = alloca i32 ; <i32*> [#uses=2] - alloca i32 ; <i32*>:0 [#uses=2] - %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] - store i8* %regexp, i8** %regexp_addr - store i8* %text, i8** %text_addr - store i32 1, i32* %0, align 4 - load i32* %0, align 4 ; <i32>:1 [#uses=1] - store i32 %1, i32* %retval, align 4 - br label %return - -return: ; preds = %entry - %retval1 = load i32* %retval ; <i32> [#uses=1] - ret i32 %retval1 -} - -define i32 @grep(i8* %regexp, %struct.FILE* %f, i8* %name) nounwind { -entry: - %regexp_addr = alloca i8* ; <i8**> [#uses=2] - %f_addr = alloca %struct.FILE* ; <%struct.FILE**> [#uses=2] - %name_addr = alloca i8* ; <i8**> [#uses=3] - %retval = alloca i32 ; <i32*> [#uses=2] - %buf = alloca [1024 x i8] ; <[1024 x i8]*> [#uses=6] - %nmatch = alloca i32 ; <i32*> [#uses=4] - %n = alloca i32 ; <i32*> [#uses=4] - alloca i32 ; <i32*>:0 [#uses=2] - %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] - store i8* %regexp, i8** %regexp_addr - store %struct.FILE* %f, %struct.FILE** %f_addr - store i8* %name, i8** %name_addr - store i32 0, i32* %nmatch, align 4 - br label %bb13 - -bb: ; preds = %bb13 - %buf1 = bitcast [1024 x i8]* %buf to i8* ; <i8*> [#uses=1] - call i32 @strlen( i8* %buf1 ) nounwind readonly ; <i32>:1 [#uses=1] - store i32 %1, i32* %n, align 4 - load i32* %n, align 4 ; <i32>:2 [#uses=1] - icmp sgt i32 %2, 0 ; <i1>:3 [#uses=1] - zext i1 %3 to i8 ; <i8>:4 [#uses=1] - %toBool = icmp ne i8 %4, 0 ; <i1> [#uses=1] - br i1 %toBool, label %bb2, label %bb5 - -bb2: ; preds = %bb - load i32* %n, align 4 ; <i32>:5 [#uses=1] - sub i32 %5, 1 ; <i32>:6 [#uses=1] - getelementptr [1024 x i8]* %buf, i32 0, i32 %6 ; <i8*>:7 [#uses=1] - load i8* %7, align 1 ; <i8>:8 [#uses=1] - icmp eq i8 %8, 10 ; <i1>:9 [#uses=1] - zext i1 %9 to i8 ; <i8>:10 [#uses=1] - %toBool3 = icmp ne i8 %10, 0 ; <i1> [#uses=1] - br i1 %toBool3, label %bb4, label %bb5 - -bb4: ; preds = %bb2 - load i32* %n, align 4 ; <i32>:11 [#uses=1] - sub i32 %11, 1 ; <i32>:12 [#uses=1] - getelementptr [1024 x i8]* %buf, i32 0, i32 %12 ; <i8*>:13 [#uses=1] - store i8 0, i8* %13, align 1 - br label %bb5 - -bb5: ; preds = %bb4, %bb2, %bb - load i8** %regexp_addr, align 4 ; <i8*>:14 [#uses=1] - %buf6 = bitcast [1024 x i8]* %buf to i8* ; <i8*> [#uses=1] - call i32 @match( i8* %14, i8* %buf6 ) nounwind ; <i32>:15 [#uses=1] - icmp ne i32 %15, 0 ; <i1>:16 [#uses=1] - zext i1 %16 to i8 ; <i8>:17 [#uses=1] - %toBool7 = icmp ne i8 %17, 0 ; <i1> [#uses=1] - br i1 %toBool7, label %bb8, label %bb13 - -bb8: ; preds = %bb5 - load i32* %nmatch, align 4 ; <i32>:18 [#uses=1] - add i32 %18, 1 ; <i32>:19 [#uses=1] - store i32 %19, i32* %nmatch, align 4 - load i8** %name_addr, align 4 ; <i8*>:20 [#uses=1] - icmp ne i8* %20, null ; <i1>:21 [#uses=1] - zext i1 %21 to i8 ; <i8>:22 [#uses=1] - %toBool9 = icmp ne i8 %22, 0 ; <i1> [#uses=1] - br i1 %toBool9, label %bb10, label %bb11 - -bb10: ; preds = %bb8 - load i8** %name_addr, align 4 ; <i8*>:23 [#uses=1] - call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), i8* %23 ) nounwind ; <i32>:24 [#uses=0] - br label %bb11 - -bb11: ; preds = %bb10, %bb8 - %buf12 = bitcast [1024 x i8]* %buf to i8* ; <i8*> [#uses=1] - call i32 @puts( i8* %buf12 ) nounwind ; <i32>:25 [#uses=0] - br label %bb13 - -bb13: ; preds = %bb11, %bb5, %entry - %buf14 = bitcast [1024 x i8]* %buf to i8* ; <i8*> [#uses=1] - load %struct.FILE** %f_addr, align 4 ; <%struct.FILE*>:26 [#uses=1] - call i8* @fgets( i8* %buf14, i32 1024, %struct.FILE* %26 ) nounwind ; <i8*>:27 [#uses=1] - icmp ne i8* %27, null ; <i1>:28 [#uses=1] - zext i1 %28 to i8 ; <i8>:29 [#uses=1] - %toBool15 = icmp ne i8 %29, 0 ; <i1> [#uses=1] - br i1 %toBool15, label %bb, label %bb16 - -bb16: ; preds = %bb13 - load i32* %nmatch, align 4 ; <i32>:30 [#uses=1] - store i32 %30, i32* %0, align 4 - load i32* %0, align 4 ; <i32>:31 [#uses=1] - store i32 %31, i32* %retval, align 4 - br label %return - -return: ; preds = %bb16 - %retval17 = load i32* %retval ; <i32> [#uses=1] - ret i32 %retval17 -} - -declare i32 @strlen(i8*) nounwind readonly - -declare i32 @printf(i8*, ...) nounwind - -declare i32 @puts(i8*) - -declare i8* @fgets(i8*, i32, %struct.FILE*) - -define i32 @grepmain(i32 %argc, i8** %argv) nounwind { -entry: - %argc_addr = alloca i32 ; <i32*> [#uses=5] - %argv_addr = alloca i8** ; <i8***> [#uses=6] - %retval = alloca i32 ; <i32*> [#uses=2] - %f = alloca %struct.FILE* ; <%struct.FILE**> [#uses=5] - %nmatch = alloca i32 ; <i32*> [#uses=4] - %i = alloca i32 ; <i32*> [#uses=7] - alloca i32 ; <i32*>:0 [#uses=2] - %iftmp.3 = alloca i8* ; <i8**> [#uses=3] - %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] - store i32 %argc, i32* %argc_addr - store i8** %argv, i8*** %argv_addr - load i32* %argc_addr, align 4 ; <i32>:1 [#uses=1] - icmp sle i32 %1, 1 ; <i1>:2 [#uses=1] - zext i1 %2 to i8 ; <i8>:3 [#uses=1] - %toBool = icmp ne i8 %3, 0 ; <i1> [#uses=1] - br i1 %toBool, label %bb, label %bb1 - -bb: ; preds = %entry - load %struct.FILE** @__stderrp, align 4 ; <%struct.FILE*>:4 [#uses=1] - bitcast %struct.FILE* %4 to i8* ; <i8*>:5 [#uses=1] - call i32 @"\01_fwrite$UNIX2003"( i8* getelementptr ([30 x i8]* @"\01LC1", i32 0, i32 0), i32 1, i32 29, i8* %5 ) nounwind ; <i32>:6 [#uses=0] - call void @exit( i32 0 ) noreturn nounwind - unreachable - -bb1: ; preds = %entry - store i32 0, i32* %nmatch, align 4 - load i32* %argc_addr, align 4 ; <i32>:7 [#uses=1] - icmp eq i32 %7, 2 ; <i1>:8 [#uses=1] - zext i1 %8 to i8 ; <i8>:9 [#uses=1] - %toBool2 = icmp ne i8 %9, 0 ; <i1> [#uses=1] - br i1 %toBool2, label %bb3, label %bb4 - -bb3: ; preds = %bb1 - load %struct.FILE** @__stdinp, align 4 ; <%struct.FILE*>:10 [#uses=1] - load i8*** %argv_addr, align 4 ; <i8**>:11 [#uses=1] - getelementptr i8** %11, i32 1 ; <i8**>:12 [#uses=1] - load i8** %12, align 4 ; <i8*>:13 [#uses=1] - call i32 @grep( i8* %13, %struct.FILE* %10, i8* null ) nounwind ; <i32>:14 [#uses=0] - br label %bb19 - -bb4: ; preds = %bb1 - store i32 2, i32* %i, align 4 - br label %bb17 - -bb5: ; preds = %bb17 - load i8*** %argv_addr, align 4 ; <i8**>:15 [#uses=1] - load i32* %i, align 4 ; <i32>:16 [#uses=1] - getelementptr i8** %15, i32 %16 ; <i8**>:17 [#uses=1] - load i8** %17, align 4 ; <i8*>:18 [#uses=1] - call %struct.FILE* @fopen( i8* %18, i8* getelementptr ([2 x i8]* @"\01LC2", i32 0, i32 0) ) nounwind ; <%struct.FILE*>:19 [#uses=1] - store %struct.FILE* %19, %struct.FILE** %f, align 4 - load %struct.FILE** %f, align 4 ; <%struct.FILE*>:20 [#uses=1] - icmp eq %struct.FILE* %20, null ; <i1>:21 [#uses=1] - zext i1 %21 to i8 ; <i8>:22 [#uses=1] - %toBool6 = icmp ne i8 %22, 0 ; <i1> [#uses=1] - br i1 %toBool6, label %bb7, label %bb8 - -bb7: ; preds = %bb5 - load i8*** %argv_addr, align 4 ; <i8**>:23 [#uses=1] - load i32* %i, align 4 ; <i32>:24 [#uses=1] - getelementptr i8** %23, i32 %24 ; <i8**>:25 [#uses=1] - load i8** %25, align 4 ; <i8*>:26 [#uses=1] - load %struct.FILE** @__stderrp, align 4 ; <%struct.FILE*>:27 [#uses=1] - call i32 (%struct.FILE*, i8*, ...)* @fprintf( %struct.FILE* %27, i8* getelementptr ([15 x i8]* @"\01LC3", i32 0, i32 0), i8* %26 ) nounwind ; <i32>:28 [#uses=0] - br label %bb16 - -bb8: ; preds = %bb5 - load %struct.FILE** %f, align 4 ; <%struct.FILE*>:29 [#uses=1] - call i32 @setvbuf( %struct.FILE* %29, i8* getelementptr ([1048576 x i8]* @readbuf, i32 0, i32 0), i32 0, i32 1048576 ) nounwind ; <i32>:30 [#uses=0] - load i32* %argc_addr, align 4 ; <i32>:31 [#uses=1] - icmp sgt i32 %31, 3 ; <i1>:32 [#uses=1] - zext i1 %32 to i8 ; <i8>:33 [#uses=1] - %toBool9 = icmp ne i8 %33, 0 ; <i1> [#uses=1] - br i1 %toBool9, label %bb10, label %bb11 - -bb10: ; preds = %bb8 - load i8*** %argv_addr, align 4 ; <i8**>:34 [#uses=1] - load i32* %i, align 4 ; <i32>:35 [#uses=1] - getelementptr i8** %34, i32 %35 ; <i8**>:36 [#uses=1] - load i8** %36, align 4 ; <i8*>:37 [#uses=1] - store i8* %37, i8** %iftmp.3, align 4 - br label %bb12 - -bb11: ; preds = %bb8 - store i8* null, i8** %iftmp.3, align 4 - br label %bb12 - -bb12: ; preds = %bb11, %bb10 - load i8*** %argv_addr, align 4 ; <i8**>:38 [#uses=1] - getelementptr i8** %38, i32 1 ; <i8**>:39 [#uses=1] - load i8** %39, align 4 ; <i8*>:40 [#uses=1] - load %struct.FILE** %f, align 4 ; <%struct.FILE*>:41 [#uses=1] - load i8** %iftmp.3, align 4 ; <i8*>:42 [#uses=1] - call i32 @grep( i8* %40, %struct.FILE* %41, i8* %42 ) nounwind ; <i32>:43 [#uses=1] - icmp sgt i32 %43, 0 ; <i1>:44 [#uses=1] - zext i1 %44 to i8 ; <i8>:45 [#uses=1] - %toBool13 = icmp ne i8 %45, 0 ; <i1> [#uses=1] - br i1 %toBool13, label %bb14, label %bb15 - -bb14: ; preds = %bb12 - load i32* %nmatch, align 4 ; <i32>:46 [#uses=1] - add i32 %46, 1 ; <i32>:47 [#uses=1] - store i32 %47, i32* %nmatch, align 4 - br label %bb15 - -bb15: ; preds = %bb14, %bb12 - load %struct.FILE** %f, align 4 ; <%struct.FILE*>:48 [#uses=1] - call i32 @fclose( %struct.FILE* %48 ) nounwind ; <i32>:49 [#uses=0] - br label %bb16 - -bb16: ; preds = %bb15, %bb7 - load i32* %i, align 4 ; <i32>:50 [#uses=1] - add i32 %50, 1 ; <i32>:51 [#uses=1] - store i32 %51, i32* %i, align 4 - br label %bb17 - -bb17: ; preds = %bb16, %bb4 - load i32* %i, align 4 ; <i32>:52 [#uses=1] - load i32* %argc_addr, align 4 ; <i32>:53 [#uses=1] - icmp slt i32 %52, %53 ; <i1>:54 [#uses=1] - zext i1 %54 to i8 ; <i8>:55 [#uses=1] - %toBool18 = icmp ne i8 %55, 0 ; <i1> [#uses=1] - br i1 %toBool18, label %bb5, label %bb19 - -bb19: ; preds = %bb17, %bb3 - load i32* %nmatch, align 4 ; <i32>:56 [#uses=1] - store i32 %56, i32* %0, align 4 - load i32* %0, align 4 ; <i32>:57 [#uses=1] - store i32 %57, i32* %retval, align 4 - br label %return - -return: ; preds = %bb19 - %retval20 = load i32* %retval ; <i32> [#uses=1] - ret i32 %retval20 -} - -declare i32 @"\01_fwrite$UNIX2003"(i8*, i32, i32, i8*) - -declare void @exit(i32) noreturn nounwind - -declare %struct.FILE* @fopen(i8*, i8*) - -declare i32 @fprintf(%struct.FILE*, i8*, ...) nounwind - -declare i32 @setvbuf(%struct.FILE*, i8*, i32, i32) - -declare i32 @fclose(%struct.FILE*)
--- a/pyrect/translator.py Fri Aug 06 20:18:58 2010 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,22 +0,0 @@ -#!/usr/bin/env python - -import sys - -class Translator(object): - def __init__(self, regexp): - self.regexp = regexp - self.cg = regexp.dfacg - self.stream = None - - def emit(self, string): - self.stream.write(string) - - def state_name(self, state_name): - return str(state_name) - - def emit_from_callgraph(self): - pass - - def translate(self, stream=sys.stdout): - self.stream = stream - self.emit_from_callgraph()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/translator/c_translator.py Sun Aug 08 04:13:14 2010 +0900 @@ -0,0 +1,207 @@ +#!/Usr/bin/env python + +from __future__ import with_statement +from pyrect.regexp import Regexp +from pyrect.regexp.ast import * +from translator import Translator + +class CTranslator(Translator): + """CTranslator + This Class can translate from DFA or NFA into C source code. + DFA: A simple state transition as tail call (also can implement with CbC). + NFA: using stack, deepening depth-first search. + >>> string = '(A|B)*C' + >>> reg = Regexp(string) + >>> CTranslator(reg).translate() + >>> CTranslator(reg).translate() + """ + def __init__(self, regexp, fa="DFA"): + Translator.__init__(self, regexp) + if fa == "DFA": + self.cg = regexp.dfacg + self.debug = False + self.special_type = (Range, Anchor) + self.trans_stmt = self._trans_stmt(self.emit) + + def state_name(self, name): + if name == "accept" or name == "reject": + return name + else: + return "state_"+str(name) + + def emit_accept_state(self): + self.emiti("void accept(char* s) {") + self.emit( r'printf("string matches regexp. \n\n");') + self.emitd("}", 2) + + def emit_reject_state(self): + self.emiti("void reject(char* s) {") + self.emit( r'printf("string matches regexp. \n\n");') + self.emitd("}", 2) + + def emit_driver(self): + self.emiti("int main(int argc, char* argv[]) {") + self.emit( 'buf = argv[1];') + self.emit( 'puts("regexp: %s");"' % self.regexp.regexp) + self.emit( 'puts("number of state: %d");' % len(self.cg.states)) + self.emit( r'printf(\"string: %s\n\", argv[1]);') + self.emit( "%s(argv[1]);" % self.state_name(self.cg.start)) + if self.cg.type == "NFA": + self.emit("reject(argv[1]);") + self.emit( "return 0;") + self.emitd("}", 2) + + def emit_strcmp1(self, string, next): + cmp_stmt = list() + offset = 0 + if len(string) >= 4: + for n in range(len(string)/4): + type_ = "unsigned int *" + ptr = "intp" + str(n) + self.emit('static %s%s = (%s)\"%s\";' + % (type_, ptr, type_, string[:4])) + cmp_stmt.append((type_, offset, "*"+ptr)) + string = string[4:] + offset += 4 + if len(string) >= 2: + type_ = "unsigned short int *" + ptr = "shortp" + self.emit('static %s%s = (%s)\"%s\";' + % (type_, ptr, type_, string[:2])) + cmp_stmt.append((type_, offset, "*"+ptr)) + offset += 2 + string = string[2:] + if len(string) == 1: + ptr = "'%s'" % string[0] + cmp_stmt.append(("char *", offset, ptr)) + offset += 1 + + self.emit() + self.emit0("if (") + for stmt in cmp_stmt: + self.emit0("*(%s)((char *)s+%d) == %s" % stmt) + if stmt != cmp_stmt[-1]: + self.emit(" && ") + self.emiti(")") + self.emit ("return %s(s+%d);" % (self.state_name(next), offset)) + self.emitd() + + def emit_strcmp2(self, string, next): + self.indent() + self.emit ( "ls = s;") + # emit -> if (cmp_stmt && cmp_stmt && ...) + self.emit0("if (") + self.emit ("*ls++ == '%c'" % string[0]) + for char in string[1:]: + self.emit0(" && *ls++ == '%c'" % char) + self.emiti(")") + self.emit ("return %s(ls);" % self.state_name(next)) + self.emitd() + + def emit_strcmp3(self, string, next): + self.emit('static char* string = \"%s\";' % string) + self.emiti("if (memcmp(string, s, %d) == 0)" % len(string)) + self.emit("return %s(s+%d);" % (self.state_name(next), len(string))) + self.emitd() + + def emit_switch(self, case, default=None): + self.emiti("switch(*s++) {") + for case, next_ in case.iteritems(): + self.trans_stmt.emit(case, self.state_name(next_)) + if default: + self.emit("default: return %s(s);" % default) + self.emitd("}") + + def emit_state(self, cur_state, transition): + self.emiti("int %s(char* s) {" % self.state_name(cur_state)) + + if self.debug: + self.emit(r'printf("state: %s, input: %%s\n", s);' % cur_state) + if self.cg.type == "NFA": + if '' in transition: + epsilon_transition = transition.pop('') + for n in epsilon_transition: + self.emit("\t%s%s(s);\n" % (self.callType, self.state_name(n))) + + if cur_state in self.cg.accepts: + eol = Character(r'\0') + transition[eol] = "accept" + + for input_ in transition.keys(): + for special in self.special_type: + if isinstance(input_, special): + self.trans_stmt.emit(input_, self.state_name(transition.pop(input_))) + + if transition: + if self.cg.type == "DFA": + self.emit_switch(transition, default="reject") + else: + self.emit_switch(transition) + + self.emitd("}", 2) + + def emit_initialization(self): + self.emit("#include <stdio.h>") + for state in self.cg.map.iterkeys(): + self.emit("int %s(char* s);" % self.state_name(state)) + self.emit('int accept(char* s);') + self.emit('int reject(char* s);') + self.emit('char* buf;', 2) + + def emit_from_callgraph(self): + # self.emit C-source code + self.emit_initialization() + self.emit_driver() + + for cur_state, transition in self.cg.map.iteritems(): + self.emit_state(cur_state, transition) + + self.emit_accept_state() + self.emit_reject_state() + + class _trans_stmt(ASTWalker): + def __init__(self, emit): + self._emit = emit + + def emit(self, input_node, next_): + self.next = next_ + input_node.accept(self) + + def visit(self, input_node): + self._emit("/* UNKNOW RULE */") + self._emit("/* %s */" % input_node.__repr__()) + + def visit_MBCharacter(self, mbchar): + self.visit(mbchar) + + def visit_Character(self, char): + self._emit("case '%s':" % char.char) + self._emit(" return %s(s);" % self.next) + + def visit_BegLine(self, begline): + self._emit("if (s == buf)") + self._emit(" return %s(s);" % self.next) + + def visit_EndLine(self, endline): + self._emit(r"if (*s == '\0')") + self._emit(" return %s(s);" % self.next) + + # Special Rule + def visit_Range(self, range): + if isinstance(range.lower, MBCharacter) and not \ + isinstance(range.upper, MBCharacter) or \ + isinstance(range.upper, MBCharacter) and not \ + isinstance(range.lower, MBCharacter): + return + + if isinstance(range.lower, MBCharacter): + self.visit(range) + else: + self._emit("if ('%s' <= *s && *s =< '%s')" % (range.lower.char, range.upper.char)) + self._emit(" return %s(s+1);" % self.next) + +def test(): + import doctest + doctest.testmod() + +if __name__ == '__main__': test()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/translator/cbc_grep_translator.py Sun Aug 08 04:13:14 2010 +0900 @@ -0,0 +1,143 @@ +#!/usr/bin/env python + +from grep_translator import GREPTranslator +from pyrect.regexp import Regexp + +class CbCGREPTranslateExeption(Exception): + pass + +class CbCGREPTranslator(GREPTranslator): + """CbCGREPTranslator + This Class can translate form DFA into grep source-code. + which based on (beautiful) mini-grep introduced \"The Practice of Programming\" + written by Rob Pike & Brian W. Kernighan. (see template/grep.c) + >>> string = \"(build|fndecl|gcc)\" + >>> reg = Regexp(string) + >>> tje = GREPTranslator(reg) + >>> tje.translate() + """ + + def __init__(self, regexp): + GREPTranslator.__init__(self, regexp) + self.funType = '__code ' + self.interface = "char *s, char* cur, char* buf, FILE *f, char* filename" + self.args = "s, cur, buf, f, filename" + self.callType = 'goto ' + self.breakStatement = '' + self.print_file = False + self.__bufsize = 1024 + + def getbufsize(self): + return self.__bufsize + def setbufsize(self, bufsize): + self.__bufsize = abs(bufsize) + + bufsize = property(getbufsize, setbufsize) + + def emit_accept_state(self): + self.emit("__code accept(%s) {\n" % self.interface) + if self.print_file: + self.emit(" printf(\"%s: %s\\n\", filename, buf);\n") + else: + self.emit(" printf(\"%s\\n\", buf);\n") + self.emit(" goto next_line(%s);\n}\n\n" % self.args) + + def emit_reject_state(self): + self.emit(""" +__code reject(%s) { + goto next_ptr(%s); +} +""" % (self.interface, self.args)) + + def emit_next_state(self): + self.emit (""" +__code next_ptr(%s) { + if(*cur++ == '\\0') + goto next_line(%s); + s = cur; + goto DFA(%s); +} +""" % (self.interface, self.args, self.args)) + + self.emit(""" +__code next_line(%s) { + if(fgets(buf, LINEBUFSIZE, f) == NULL) { + goto returner(); + } + int n = strlen(buf); + if (n > 0 && buf[n-1] == '\\n') + buf[n-1] = '\\0'; + cur = buf; + s = cur; + goto DFA(%s); +} +""" % (self.interface, self.args)) + self.emit(""" +__code returner() { + return; +}""") + + def emit_initialization(self): + self.emit("#include <stdio.h>\n") + self.emit("#include <stdlib.h>\n") + self.emit("#include <string.h>\n\n") + self.emit("#define LINEBUFSIZE 1024\n") + self.emit("#define READBUFSIZE %d\n\n" % self.bufsize) + + self.emit("%sDFA(%s);\n" % (self.funType, self.interface)) + for state in self.cg.map.iterkeys(): + self.emit(self.funType + self.state_name(state) + "(" + self.interface + ");\n") + self.emit(self.funType + 'accept(%s);\n' % self.interface) + self.emit(self.funType + 'reject(%s);\n' % self.interface) + self.emit(self.funType + 'next_ptr(%s);\n' % self.interface) + self.emit(self.funType + 'next_line(%s);\n' % self.interface) + self.emit(self.funType + 'returner();\n\n') + grepsource = open("template/grep.cbc") + self.emit(grepsource.read()) + self.emit_next_state() + + def emit_filter(self): + pass + + def emit_driver(self): + self.emit(""" +int main(int argc, char* argv[]) { + grepmain(argc, argv); + return; +} +""") + self.emit(""" +%sDFA(%s) { + goto %s(%s); +} +""" % (self.funType, self.interface, self.state_name(self.cg.start), self.args)) + + def emit_switch(self, case, default=None): + self.emit("\tswitch(*s++) {\n") + for input, next_state in case.iteritems(): + if input != '': + self.emit("\t\tcase '%s': \n" % (input)) + self.emit("\t\t\t%s%s(%s);\n" % (self.callType, self.state_name(next_state), self.args)) + if self.breakStatement != '': self.emit(self.breakStatement+'\n') + + if default: + self.emit( """\t\tdefault:\n\t\t\t%s%s(%s);\n""" % (self.callType, default, self.args)) + self.emit("\t}\n") + + def emit_state(self, cur_state, transition): + self.emit(self.funType + self.state_name(cur_state) + "(" + self.interface + ") {\n") + if cur_state in self.cg.accepts: + self.emit("\tgoto accept(%s);\n" % self.args) + else: + if transition: + if self.cg.type == "DFA": + self.emit_switch(transition, default="reject") + else: + self.emit_switch(transition) + self.emit("}\n\n") + +def test(): + import doctest + doctest.testmod() + +if __name__ == '__main__': test()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/translator/cbc_translator.py Sun Aug 08 04:13:14 2010 +0900 @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +from pyrect.regexp import Regexp +from c_translator import CTranslator + +class CbCTranslator(CTranslator): + """ + CbCTranslator + >>> string = \"(A|B)*C\" + >>> reg = Regexp(string) + >>> ct = CbCTranslator(reg) + >>> ct.translate() + >>> ct.debug = True + >>> ct.translate() + """ + def __init__(self, regexp): + CTranslator.__init__(self, regexp) + self.funType = '__code ' + self.callType = 'goto ' + self.breakStatement = '' + +def test(): + import doctest + doctest.testmod() + +if __name__ == '__main__' : test()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/translator/dfa_translator.py Sun Aug 08 04:13:14 2010 +0900 @@ -0,0 +1,72 @@ +#!/usr/bin/env python + +from grep_translator import GREPTranslator +from pyrect.regexp import Regexp + +class GNUGREPTranslator(GREPTranslator): + """GNUGREPTranslator + This class can translate from DFA into size_t DFA(char* s). + which is entirely equivalent to dfaexec(..) in GNU-grep (see src/dfa.c). + * but which is not work currently. (when search large-file, there is fewer + * accepted-lines than grep's dfaexec.) + * probably, there is some problem exists about buffering. + >>> string = '(build|fndecl|gcc)' + >>> reg = Regexp(string) + >>> tje = GNUGREPTranslator(reg) + >>> tje.translate() + """ + + def __init__(self, regexp): + GREPTranslator.__init__(self, regexp) + self.funType = 'size_t ' + self.callType = 'return ' + self.breakStatement = '' + + def emit_initialization(self): + for state in self.cg.map.iterkeys(): + self.emit(self.funType + self.state_name(state) + "(char* s);\n") + self.emit(self.funType + 'accept(char* s);\n') + self.emit(self.funType + 'reject(char* s);\n') + + def emit_accept_state(self): + self.emit (""" +%saccept(char* s) { +\treturn 1; +}\n""" % self.funType) + + def emit_reject_state(self): + self.emit (""" +%sreject(char* s) { +\treturn 0; +}\n""" % self.funType) + + def emit_driver(self): + self.emit(""" +/* This DFA accept only \'%s\'*/ +%sDFA(char *s) { + char *begin = s; + do { + if (%s(s)) { //(matchhere(regexp+1, text)) + return (char const *) s - begin; + } + } while (*s != '\\n' && *s++ != '\\0'); + return (size_t) -1; +}\n\n""" % (self.regexp.regexp, self.funType, self.state_name(self.cg.start))) + + def emit_state(self, cur_state, transition): + self.emit(self.funType + self.state_name(cur_state) + "(char* s) {\n") + if cur_state in self.cg.accepts: + self.emit("\treturn accept(s);\n") + else: + if transition: + if self.cg.type == "DFA": + self.emit_switch(transition, default="reject") + else: + self.emit_switch(transition) + self.emit("}\n\n") + +def test(): + import doctest + doctest.testmod() + +if __name__ == '__main__': test()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/translator/dot_translator.py Sun Aug 08 04:13:14 2010 +0900 @@ -0,0 +1,68 @@ +#!/usr/bin/env python + +import re + +from translator import Translator +from pyrect.regexp import Regexp + +class DotTranslator(Translator): + """ + DotToranslator + This Class can translate from DFA or NFA into Dot + Dot is Graph-generater using TeX. + --code/graph/makepdf.sh is generate graph script. + >>> string = \"(A|B)*C\" + >>> reg = Regexp(string) + >>> DotTranslator(reg, 'DFA').translate() + >>> DotTranslator(reg, 'NFA').translate() + """ + def __init__(self, regexp, fa="DFA"): + Translator.__init__(self, regexp) + if fa == "NFA": + self.cg = regexp.nfacg + else: + self.cg = regexp.dfacg + self.fill_color = "lightsteelblue1" + self.frame_color = "navyblue" + + def state_name(self, name): + return "q"+name + + def emit_from_callgraph(self): + color = "fillcolor=%s, style=filled, color = %s" % (self.fill_color, self.frame_color) + self.emit('digraph G{'), self.indent() + self.emit( 'rankdir=LR') + self.emit( 'regex [shape=plaintext, label="%s"]' % self.regexp.regexp) + + # emit transition + for state in self.cg.states: + if state in self.cg.accepts: + self.emit("%s [shape=doublecircle, %s]" % (self.state_name(state), color)) + else: + self.emit("%s [shape=circle, %s]" % (self.state_name(state), color)) + + # edge to start state + self.emit("start [shape=point]") + self.emit("start -> %s" % self.state_name(self.cg.start)) + self.emit() + + for cur_state, trans in self.cg.map.iteritems(): + for input, next_states in trans.iteritems(): + if self.cg.type == "DFA": + next_states = [next_states] + for next_state in next_states: + self.emit("%s -> %s [label=\"%s\"]" + % (self.state_name(cur_state), self.state_name(next_state), input)) + self.dedent() + self.emit("}", 2) + +def test(): + import doctest + doctest.testmod() + ''' + reg = Regexp("(A|B)*C") + ct = CTranslator(reg.regexp, CallGraph(reg.dfa)) + ct.translate() + ''' + +if __name__ == '__main__' : test()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/translator/goto_grep_translator.py Sun Aug 08 04:13:14 2010 +0900 @@ -0,0 +1,118 @@ +#!/usr/bin/env python + +from c_translator import CTranslator +from pyrect.regexp import Regexp + +class GoToGREPTranslator(CTranslator): + """GoToGREPTranslator + This Class can translate form DFA into grep source-code. + which based on (beautiful) mini-grep introduced \"The Practice of Programming\" + written by Rob Pike & Brian W. Kernighan. (see template/grep.label) + >>> string = \"(build|fndecl|gcc)\" + >>> reg = Regexp(string) + >>> tje = GoToGREPTranslator(reg) + >>> tje.translate() + """ + + def __init__(self, regexp): + CTranslator.__init__(self, regexp) + self.funType = 'void ' + self.callType = 'return ' + self.breakStatement = '' + self.begline = False + self.__bufsize = 1024 + + def getbufsize(self,): + return self.__bufsize + def setbufsize(self, bufsize): + self.__bufsize = abs(bufsize) + + bufsize = property(getbufsize, setbufsize) + + def emit_accept_state(self): + self.emit (""" +\taccept: +\t\tprintf(\"%s\\n\", buf); +\t\treturn; +""") + + def emit_reject_state(self): + self.emit("\treject:\n") + if not self.begline: + self.emit(""" +\t\tif (*cur++ == '\\0') +\t\t\treturn; +\t\ts = cur; +\t\tgoto %s; +""" % self.state_name(self.cg.start)) + self.emit("return;\n") + + + def emit_initialization(self): + self.emit("#include <stdio.h>\n") + self.emit("#include <stdlib.h>\n") + self.emit("#include <string.h>\n\n") + + self.emit("#define LINEBUFSIZE 1024\n") + self.emit("#define READBUFSIZE %d\n\n" % (self.bufsize)) + self.emit("char readbuf[%d];\n\n" % (self.bufsize)) + + self.emit("%sDFA(char* s, char *buf, char* filename);\n" % self.funType) + grepsource = open("template/grep.label") + self.emit(grepsource.read()) + + def emit_filter(self): + pass + + def emit_driver(self): + self.emit(""" +int main(int argc, char* argv[]) { +\treturn grepmain(argc, argv); +}\n\n""") + + def emit_switch(self, case, default=None): + self.emit("\t\tswitch(*s++) {\n") + for input, next_state in case.iteritems(): + if input != '': + self.emit("\t\t\tcase '%s': \n" % (input)) + self.emit("\t\t\t\tgoto %s;\n" % (self.state_name(next_state))) + if self.breakStatement != '': self.emit(self.breakStatement+'\n') + + if default: + self.emit( """\t\t\tdefault:\n\t\t\tgoto %s;\n""" % default) + self.emit("\t\t}\n") + + def emit_state(self, cur_state, transition): + self.emit("\t" + self.state_name(cur_state) + ":\n") + if cur_state in self.cg.accepts: + self.emit("\t\tgoto accept;\n") + else: + if transition: + if self.cg.type == "DFA": + self.emit_switch(transition, default="reject") + else: + self.emit_switch(transition) + self.emit("\n") + + def emit_from_callgraph(self): + # self.emit C-source code + self.emit_initialization() + self.emit_driver() + + self.emit("void DFA(char *s, char *buf, char* filename) {\n") + self.emit("\tchar *cur = s;\n") + self.emit("\tgoto %s;\n" % self.state_name(self.cg.start)) + + for cur_state, transition in self.cg.map.iteritems(): + self.emit_state(cur_state, transition) + + self.emit_accept_state() + self.emit_reject_state() + + self.emit("}\n\n") + +def test(): + import doctest + doctest.testmod() + +if __name__ == '__main__': test()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/translator/grep_translator.py Sun Aug 08 04:13:14 2010 +0900 @@ -0,0 +1,118 @@ +#!/usr/bin/env python + +import copy +from c_translator import CTranslator +from pyrect.regexp import Regexp + +class GREPTranslateExeption(Exception): + pass + +class GREPTranslator(CTranslator): + """GREPTranslator + This Class can translate form DFA into grep source-code. + which based on (beautiful) mini-grep introduced \"The Practice of Programming\" + written by Rob Pike & Brian W. Kernighan. (see template/grep.c) + >>> string = \"(build|fndecl|gcc)\" + >>> reg = Regexp(string) + >>> tje = GREPTranslator(reg) + >>> tje.translate() + """ + + def __init__(self, regexp): + CTranslator.__init__(self, regexp) + self.funType = 'int ' + self.callType = 'return ' + self.breakStatement = '' + self.begline = False + self.__bufsize = 1024 + + def getbufsize(self,): + return self.__bufsize + def setbufsize(self, bufsize): + self.__bufsize = abs(bufsize) + + bufsize = property(getbufsize, setbufsize) + + def emit_accept_state(self): + self.emit (""" +%saccept(char* s) { +\treturn 1; +}\n""" % self.funType) + + def emit_reject_state(self): + self.emit (""" +%sreject(char* s) { +\treturn 0; +}\n""" % self.funType) + + def emit_initialization(self): + self.emit("#include <stdio.h>\n") + self.emit("#include <stdlib.h>\n") + self.emit("#include <string.h>\n\n") + + self.emit("#define LINEBUFSIZE 1024\n") + self.emit("#define READBUFSIZE %d\n\n" % (self.bufsize)) + self.emit("char readbuf[%d];\n\n" % (self.bufsize)) + + self.emit("%sDFA(char* s);\n" % (self.funType)) + for state in self.cg.map.iterkeys(): + self.emit(self.funType + self.state_name(state) + "(char* s);\n") + self.emit(self.funType + 'accept(char* s);\n') + self.emit(self.funType + 'reject(char* s);\n') + grepsource = open("template/grep.c") + self.emit(grepsource.read()) + + def emit_filter(self): + pass + + def emit_matcher(self): + self.emit("int match(char *regexp, char *text) {\n") + if self.begline: + self.emit("\t\treturn DFA(text);\n") + else: + self.emit(""" +\tdo { +\t\tif (DFA(text)) +\t\t\treturn 1; +\t} while (*text++ != '\\0'); +\treturn 0; +""") + self.emit("}\n\n") + + def emit_driver(self): + self.emit(""" +int main(int argc, char* argv[]) { +\treturn grepmain(argc, argv); +}\n\n""") + self.emit_matcher() + self.emit(""" +%sDFA(char *s) { + return %s(s); +}\n\n""" % (self.funType, self.state_name(self.cg.start))) + + def emit_state(self, cur_state, transition): + transition = copy.deepcopy(transition) + self.emit(self.funType + self.state_name(cur_state) + "(char* s) {\n") + strings = [x for x in transition.keys() if len(x) > 1] + if strings: + self.emit("\tchar *ls;\n") + for string in strings: + self.emit_strcmp2(string, transition.pop(string)) + + if cur_state in self.cg.accepts: + self.emit("\treturn accept(s);\n") + else: + if transition: + if self.cg.type == "DFA": + self.emit_switch(transition, default="reject") + else: + self.emit_switch(transition) + else: + self.emit("\t return reject(s);\n") + self.emit("}\n\n") + +def test(): + import doctest + doctest.testmod() + +if __name__ == '__main__': test()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/translator/llvm_grep_translator.py Sun Aug 08 04:13:14 2010 +0900 @@ -0,0 +1,146 @@ +#!/usr/bin/env python + +from llvm.core import * +from llvm.passes import * +from llvm.ee import * +from llvm_translator import LLVMTranslator +from pyrect.regexp import Regexp + +class LLVMGREPTranslator(LLVMTranslator): + """LLVMGREPTranslator + This class can translate from DFA into grep LLVM-module. + which can translate LLVM-IR, and also can execute it's self. + >>> string = 'def' + >>> reg = Regexp(string) + >>> lt = LLVMGREPTranslator(reg) + >>> lt.translate() + >>> ret = lt.execute() + >>> isinstance(ret, llvm.ee.GenericValue) + True + """ + + def __init__(self, regexp): + LLVMTranslator.__init__(self, regexp) + llfile = file("template/grep.ll") + self.llvm_module = Module.from_assembly(llfile) + self.compiled = False + self.string = regexp.regexp + self.args = [] + + def state_name(self, state_name): + return state_name + + def emit_driver(self): + self.regexp_str = self.new_str_const(self.string) + dfa = self.llvm_module.get_or_insert_function( + Type.function(self.int_t, (self.charptr_t,)), "DFA") + dfa_entry = dfa.append_basic_block("entry") + emit = Builder.new(dfa_entry) + ret = emit.call(self.llvm_module.get_function_named(self.cg.start) + ,(dfa.args[0],)) + emit.ret(ret) + + main = self.llvm_module.add_function( + Type.function(Type.void(), (self.int_t,)), "pre_main") + main_entry = main.append_basic_block("entry") + emit = Builder.new(main_entry) + + index = len(self.args) + + if index == 1: + grep = self.llvm_module.get_function_named("llgrep") + else: + grep = self.llvm_module.get_function_named("llgrep_with_name") + + for i in range(index): + emit.call(grep, (self.gep_first(emit, self.regexp_str), + self.gep_first(emit, self.new_str_const(self.args[i])))) + emit.ret_void() + + self.main = main + + def jitcompile(self): + self.debug_str = self.new_str_const("state: %s, arg: %c(int %d)\n") + def optional_func_decl(fun): + #fun.calling_convertion = CC_X86_FASTCALL + fun.args[0].name = "index" + + def func_decl(state): + optional_func_decl(state) + + state_ref = dict() + + # Create function - accept and reject (final state). + accept_state = self.llvm_module.add_function( + Type.function(self.int_t, (self.charptr_t,)), "accept") + optional_func_decl(accept_state) + reject_state = self.llvm_module.add_function( + Type.function(self.int_t, (self.charptr_t,)), "reject") + optional_func_decl(reject_state) + + state_ref["accept"] = accept_state + state_ref["reject"] = reject_state + + # add state to module, (as function or label). + for state in self.cg.map.iterkeys(): + fun = self.llvm_module.add_function( + Type.function(self.int_t, (self.charptr_t,)), state) + optional_func_decl(fun) + state_ref[state] = fun + + # emit instructions + emit = Builder.new(accept_state.append_basic_block("entry")) + emit.ret(self.const_one) + + emit = Builder.new(reject_state.append_basic_block("entry")) + emit.ret(self.const_zero) + + for state, transition in self.cg.map.iteritems(): + cases = dict() + state_fun = state_ref[state] + emit = Builder.new(state_fun.append_basic_block("entry")) + + if state in self.cg.accepts: + ret = emit.call(accept_state, (state_fun.args[0],)) + emit.ret(ret) + continue + + for case, next_state in transition.iteritems(): + cases[self.char_const(case)] = state_ref[next_state] + + char = emit.load(state_fun.args[0]) + next_ptr = emit.gep(state_fun.args[0], (self.const_one,)) + if (self.debug): self.emit_call_printf(emit, self.debug_str, self.gep_first(emit, self.new_str_const(fun.name)), char, char) + + label = 0 + default_bb = state_fun.append_basic_block("default") #create default bb + builder = Builder.new(default_bb) # default is reject. + ret = builder.call(reject_state, (next_ptr,)) + builder.ret(ret) + + si = emit.switch(char, default_bb, len(cases)) # create switch instruction with deafult case. + for case, nextFun in cases.iteritems(): + bb = state_fun.append_basic_block("case%d" % label) #create default bb + builder = Builder.new(bb) + ret = builder.call(nextFun, (next_ptr,)) + builder.ret(ret) + si.add_case(case, bb) + label += 1 + + self.emit_driver() + self.mp = ModuleProvider.new(self.llvm_module) + if (self.optimize): self.do_optimize() + self.ee = ExecutionEngine.new(self.mp) + self.compiled = True + + def execute(self): + if not self.compiled: + self.jitcompile() + return self.ee.run_function(self.main, + (GenericValue.int(self.int_t, 0),)) + +def test(): + import doctest + doctest.testmod() + +if __name__ == "__main__": test()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/translator/llvm_translator.py Sun Aug 08 04:13:14 2010 +0900 @@ -0,0 +1,200 @@ +#!/usr/bin/env python + +from llvm.core import * +from llvm.passes import * +from llvm.ee import * +from pyrect.regexp import Regexp +from translator import Translator + +class LLVMTranslator(Translator): + """LLVMTranslator + This Class can translate from DFA or NFA into LLVM-IR. + and also can JIT-Compile/evaluate it's self using llvm-py. + >>> string = '(A|B)*C' + >>> reg = Regexp(string) + >>> lt = LLVMTranslator(reg) + >>> lt.debug = True + >>> lt.translate() + >>> lt.execute() + """ + + # define llvm core types, and const + int_t = Type.int(32) + char_t = Type.int(8) + charptr_t = Type.pointer(char_t) + charptrptr_t = Type.pointer(charptr_t) + const_zero = Constant.int(int_t, 0) + const_one = Constant.int(int_t, 1) + llvm.GuaranteedTailCallOpt = True + + def __init__(self, regexp): + Translator.__init__(self, regexp) + self.optimize = False + self.debug = False + self.string = "ABC" + self.llvm_module = Module.new(self.cg.type) + self.compiled = False + + def emit_driver(self): + main = self.llvm_module.add_function( + Type.function(self.int_t, (self.int_t,)), "unitmain") + main.args[0].name = "index" + main_entry = main.append_basic_block("entry") + + emit = Builder.new(main_entry) + start = self.llvm_module.get_function_named(self.cg.start) + ret = emit.call(start, (main.args[0],)) + emit.ret(ret) + self.main = main + + def jitcompile(self): + self.matchp_str = self.new_str_const(self.string) + self.debug_str = self.new_str_const("state: %s, arg: %c(int %d)\n") + + def optional_func_decl(fun): + fun.calling_convertion = CC_X86_FASTCALL + fun.args[0].name = "index" + + def func_decl(state): + optional_func_decl(state) + + state_ref = dict() + + # Create function - accept and reject (final state). + accept_state = self.llvm_module.add_function( + Type.function(self.int_t, (self.int_t,)), "accept") + optional_func_decl(accept_state) + reject_state = self.llvm_module.add_function( + Type.function(self.int_t, (self.int_t,)), "reject") + optional_func_decl(reject_state) + + state_ref["accept"] = accept_state + state_ref["reject"] = reject_state + + # add state to module, (as function or label). + for state in self.cg.map.iterkeys(): + fun = self.llvm_module.add_function( + Type.function(self.int_t, (self.int_t,)), state) + optional_func_decl(fun) + state_ref[state] = fun + + # emit instructions + emit = Builder.new(accept_state.append_basic_block("entry")) + if self.debug: self.emit_call_printf(emit, "%s does match regexp\n", self.gep_first(emit, self.matchp_str)) + emit.ret(self.const_one) + + emit = Builder.new(reject_state.append_basic_block("entry")) + if self.debug: self.emit_call_printf(emit, "%s does not match regexp\n", self.gep_first(emit, self.matchp_str)) + emit.ret(self.const_zero) + + for state, transition in self.cg.map.iteritems(): + cases = dict() + if state in self.cg.accepts: + transition['\\0'] = ["accept"] + for case, next_states in transition.iteritems(): + cases[self.char_const(case)] = state_ref[next_states[0]] + state_fun = state_ref[state] + emit = Builder.new(state_fun.append_basic_block("entry")) + ptr = emit.gep(self.matchp_str, (self.const_zero, state_fun.args[0])) + next_index = emit.add(state_fun.args[0], self.const_one) + char = emit.load(ptr) + + if (self.debug): self.emit_call_printf(emit, self.debug_str, self.gep_first(emit, self.new_str_const(fun.name)), char, char) + + label = 0 + default_bb = state_fun.append_basic_block("default") #create default bb + builder = Builder.new(default_bb) # default is reject. + ret = builder.call(reject_state, (next_index,)) + builder.ret(ret) + + si = emit.switch(char, default_bb, len(cases)) # create switch instruction with deafult case. + for case, nextFun in cases.iteritems(): + bb = state_fun.append_basic_block("case%d" % label) #create default bb + builder = Builder.new(bb) + ret = builder.call(nextFun, (next_index,)) + builder.ret(ret) + si.add_case(case, bb) + label += 1 + + self.mp = ModuleProvider.new(self.llvm_module) + if (self.optimize): self.do_optimize() + self.ee = ExecutionEngine.new(self.mp) + self.emit_driver() + self.compiled = True + + def emit_from_callgraph(self): + if not self.compiled: + self.jitcompile() + self.emit(str(self.llvm_module)) + def get_execution_engine(self): + if not self.compiled: + self.jitcompile() + return self.ee + + def do_optimize(self): + #optimization passes + pm = PassManager.new() + pm.add(TargetData.new('')) + pm.add(PASS_FUNCTION_INLINING) + pm.run(self.llvm_module) + fp = FunctionPassManager.new(self.mp) + fp.add(TargetData.new('')) + fp.add(PASS_BLOCK_PLACEMENT) + fp.add(PASS_INSTRUCTION_COMBINING) + fp.add(PASS_TAIL_CALL_ELIMINATION) + fp.add(PASS_AGGRESSIVE_DCE) + fp.add(PASS_DEAD_INST_ELIMINATION) + fp.add(PASS_DEAD_CODE_ELIMINATION) + for fun in self.llvm_module.functions: + fp.run(fun) + + def print_module(self): + if not self.compiled: + self.jitcompile() + print self.llvm_module + + def execute(self): + if not self.compiled: + self.jitcompile() + self.ee.run_function(self.main, + (GenericValue.int(self.int_t, 0),)) + return + + def new_str_const(self, val): + '''create string(array of int) as a global value ''' + str = self.llvm_module.add_global_variable(Type.array(self.char_t, len(val) + 1), "") + str.initializer = Constant.stringz(val) + return str + + def gep_first(self, emit, val): + '''get pointer of array''' + return emit.gep(val, (self.const_zero, self.const_zero)) + + def char_const(self, val): + '''create constant int value''' + if isinstance(val, str): + if val == '\\0': + return Constant.int(self.char_t, 0) + else: + return Constant.int(self.char_t, ord(val)) + else: + exit('char_const: invalid argument.', val) + + def emit_call_printf(self, emit, string, *args): + '''emit libc printf function call instruction''' + try: + printf = self.llvm_module.get_function_named("printf") + except llvm.LLVMException: + printf = self.llvm_module.add_function( + Type.function(Type.void(), + (Type.pointer(self.char_t, 0),), 1), "printf") + if isinstance(string, str): + string = self.new_str_const(string) + emit.call(printf, + [self.gep_first(emit, string)]+list(args)) + +def test(): + import doctest + doctest.testmod() + +if __name__ == "__main__": test()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/translator/template/grep.c Sun Aug 08 04:13:14 2010 +0900 @@ -0,0 +1,47 @@ +int grep(char * regexp, FILE *f, char *name) { + int n, nmatch; + char buf[LINEBUFSIZE]; + nmatch = 0; + while (fgets(buf, sizeof buf, f) != NULL) { + n = strlen(buf); + if (n > 0 && buf[n-1] == '\n') + buf[n-1] = '\0'; + if (match(regexp, buf)) { + nmatch++; + if (name != NULL) + printf("%s:", name); + printf("%s\n", buf); + } + } + return nmatch; +} + +int grepmain(int argc, char* argv[]) { + int i, nmatch; + FILE *f; + + if (argc < 2) { + fprintf(stderr, "usage: grep regexp [file ...]"); + exit(0); + } + nmatch = 0; + if (argc == 2) { + if (grep(argv[1], stdin, NULL)) + nmatch; + } else { + for (i = 2; i < argc; i++) { + f = fopen(argv[i], "r"); + if (f == NULL) { + fprintf(stderr, "can't open %s:", argv[i]); + continue; + } + if (READBUFSIZE > 0) + setvbuf(f, readbuf, _IOFBF, READBUFSIZE); + if (grep(argv[1], f, argc > 3 ? argv[i] : NULL) > 0) + nmatch++; + fclose(f); + } + } + + return nmatch; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/translator/template/grep.cbc Sun Aug 08 04:13:14 2010 +0900 @@ -0,0 +1,35 @@ +void grep(char* s, char *cur, char *buf, FILE *f, char* filename) { + goto next_line(s, cur, buf, f, filename); + return; +} + +void grepmain(int argc, char* argv[]) { + int i; + char buf[LINEBUFSIZE]; + char readbuf[READBUFSIZE]; + char *filename; + FILE *f; + + if (argc < 2) { + fprintf(stderr, "usage: grep regexp [file ...]"); + exit(0); + } + if (argc == 2) { + grep(buf, buf, buf, stdin, NULL); + } else { + for (i = 2; i < argc; i++) { + filename = argv[i]; + f = fopen(filename, "r"); + if (f == NULL) { + fprintf(stderr, "can't open %s:", filename); + continue; + } + if (READBUFSIZE > 0) + setvbuf(f, readbuf, _IOFBF, READBUFSIZE); + grep(buf, buf, buf, f, filename); + fclose(f); + } + } + + return; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/translator/template/grep.label Sun Aug 08 04:13:14 2010 +0900 @@ -0,0 +1,44 @@ +/* Excerpted from 'The Practice of Programming' */ +/* by Brian W. Kernighan and Rob Pike */ + +int grep(char * regexp, FILE *f, char *name) { + int n; + char buf[LINEBUFSIZE]; + while (fgets(buf, sizeof buf, f) != NULL) { + n = strlen(buf); + if (n > 0 && buf[n-1] == '\n') + buf[n-1] = '\0'; + DFA(buf, buf, name); + } + return 1; +} + +int grepmain(int argc, char* argv[]) { + int i, nmatch; + FILE *f; + + if (argc < 2) { + fprintf(stderr, "usage: grep regexp [file ...]"); + exit(0); + } + nmatch = 0; + if (argc == 2) { + if (grep(argv[1], stdin, NULL)) + nmatch; + } else { + for (i = 2; i < argc; i++) { + f = fopen(argv[i], "r"); + if (f == NULL) { + fprintf(stderr, "can't open %s:", argv[i]); + continue; + } + if (READBUFSIZE > 0) + setvbuf(f, readbuf, _IOFBF, READBUFSIZE); + if (grep(argv[1], f, argc > 3 ? argv[i] : NULL) > 0) + nmatch++; + fclose(f); + } + } + + return nmatch; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/translator/template/grep.ll Sun Aug 08 04:13:14 2010 +0900 @@ -0,0 +1,377 @@ +; ModuleID = 'template/grep.ll.c' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin9" + %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } + %struct.__sFILEX = type opaque + %struct.__sbuf = type { i8*, i32 } +@"\01LC" = internal constant [4 x i8] c"%s:\00" ; <[4 x i8]*> [#uses=1] +@"\01LC1" = internal constant [2 x i8] c"r\00" ; <[2 x i8]*> [#uses=1] +@__stderrp = external global %struct.FILE* ; <%struct.FILE**> [#uses=4] +@"\01LC2" = internal constant [15 x i8] c"can't open %s:\00" ; <[15 x i8]*> [#uses=1] +@readbuf = common global [1048576 x i8] zeroinitializer, align 32 ; <[1048576 x i8]*> [#uses=1] +@"\01LC3" = internal constant [30 x i8] c"usage: grep regexp [file ...]\00" ; <[30 x i8]*> [#uses=1] +@__stdinp = external global %struct.FILE* ; <%struct.FILE**> [#uses=1] + +define i32 @match(i8* %regexp, i8* %text) nounwind { +entry: + %regexp_addr = alloca i8* ; <i8**> [#uses=2] + %text_addr = alloca i8* ; <i8**> [#uses=6] + %retval = alloca i32 ; <i32*> [#uses=2] + alloca i32 ; <i32*>:0 [#uses=4] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + store i8* %regexp, i8** %regexp_addr + store i8* %text, i8** %text_addr + load i8** %regexp_addr, align 4 ; <i8*>:1 [#uses=1] + getelementptr i8* %1, i32 0 ; <i8*>:2 [#uses=1] + load i8* %2, align 1 ; <i8>:3 [#uses=1] + icmp eq i8 %3, 94 ; <i1>:4 [#uses=1] + zext i1 %4 to i8 ; <i8>:5 [#uses=1] + %toBool = icmp ne i8 %5, 0 ; <i1> [#uses=1] + br i1 %toBool, label %bb, label %bb1 + +bb: ; preds = %entry + load i8** %text_addr, align 4 ; <i8*>:6 [#uses=1] + call i32 @DFA( i8* %6 ) nounwind ; <i32>:7 [#uses=1] + store i32 %7, i32* %0, align 4 + br label %bb7 + +bb1: ; preds = %bb4, %entry + load i8** %text_addr, align 4 ; <i8*>:8 [#uses=1] + call i32 @DFA( i8* %8 ) nounwind ; <i32>:9 [#uses=1] + icmp ne i32 %9, 0 ; <i1>:10 [#uses=1] + zext i1 %10 to i8 ; <i8>:11 [#uses=1] + %toBool2 = icmp ne i8 %11, 0 ; <i1> [#uses=1] + br i1 %toBool2, label %bb3, label %bb4 + +bb3: ; preds = %bb1 + store i32 1, i32* %0, align 4 + br label %bb7 + +bb4: ; preds = %bb1 + load i8** %text_addr, align 4 ; <i8*>:12 [#uses=1] + load i8* %12, align 1 ; <i8>:13 [#uses=1] + icmp ne i8 %13, 0 ; <i1>:14 [#uses=1] + zext i1 %14 to i8 ; <i8>:15 [#uses=1] + load i8** %text_addr, align 4 ; <i8*>:16 [#uses=1] + getelementptr i8* %16, i64 1 ; <i8*>:17 [#uses=1] + store i8* %17, i8** %text_addr, align 4 + %toBool5 = icmp ne i8 %15, 0 ; <i1> [#uses=1] + br i1 %toBool5, label %bb1, label %bb6 + +bb6: ; preds = %bb4 + store i32 0, i32* %0, align 4 + br label %bb7 + +bb7: ; preds = %bb6, %bb3, %bb + load i32* %0, align 4 ; <i32>:18 [#uses=1] + store i32 %18, i32* %retval, align 4 + br label %return + +return: ; preds = %bb7 + %retval8 = load i32* %retval ; <i32> [#uses=1] + ret i32 %retval8 +} + +declare i32 @DFA(i8*) + +define void @grep(i8* %regexp, %struct.FILE* %f, i8* %name) nounwind { +entry: + %regexp_addr = alloca i8* ; <i8**> [#uses=2] + %f_addr = alloca %struct.FILE* ; <%struct.FILE**> [#uses=2] + %name_addr = alloca i8* ; <i8**> [#uses=3] + %buf = alloca [1024 x i8] ; <[1024 x i8]*> [#uses=6] + %n = alloca i32 ; <i32*> [#uses=4] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + store i8* %regexp, i8** %regexp_addr + store %struct.FILE* %f, %struct.FILE** %f_addr + store i8* %name, i8** %name_addr + br label %bb13 + +bb: ; preds = %bb13 + %buf1 = bitcast [1024 x i8]* %buf to i8* ; <i8*> [#uses=1] + call i32 @strlen( i8* %buf1 ) nounwind readonly ; <i32>:0 [#uses=1] + store i32 %0, i32* %n, align 4 + load i32* %n, align 4 ; <i32>:1 [#uses=1] + icmp sgt i32 %1, 0 ; <i1>:2 [#uses=1] + zext i1 %2 to i8 ; <i8>:3 [#uses=1] + %toBool = icmp ne i8 %3, 0 ; <i1> [#uses=1] + br i1 %toBool, label %bb2, label %bb5 + +bb2: ; preds = %bb + load i32* %n, align 4 ; <i32>:4 [#uses=1] + sub i32 %4, 1 ; <i32>:5 [#uses=1] + getelementptr [1024 x i8]* %buf, i32 0, i32 %5 ; <i8*>:6 [#uses=1] + load i8* %6, align 1 ; <i8>:7 [#uses=1] + icmp eq i8 %7, 10 ; <i1>:8 [#uses=1] + zext i1 %8 to i8 ; <i8>:9 [#uses=1] + %toBool3 = icmp ne i8 %9, 0 ; <i1> [#uses=1] + br i1 %toBool3, label %bb4, label %bb5 + +bb4: ; preds = %bb2 + load i32* %n, align 4 ; <i32>:10 [#uses=1] + sub i32 %10, 1 ; <i32>:11 [#uses=1] + getelementptr [1024 x i8]* %buf, i32 0, i32 %11 ; <i8*>:12 [#uses=1] + store i8 0, i8* %12, align 1 + br label %bb5 + +bb5: ; preds = %bb4, %bb2, %bb + load i8** %regexp_addr, align 4 ; <i8*>:13 [#uses=1] + %buf6 = bitcast [1024 x i8]* %buf to i8* ; <i8*> [#uses=1] + call i32 @match( i8* %13, i8* %buf6 ) nounwind ; <i32>:14 [#uses=1] + icmp ne i32 %14, 0 ; <i1>:15 [#uses=1] + zext i1 %15 to i8 ; <i8>:16 [#uses=1] + %toBool7 = icmp ne i8 %16, 0 ; <i1> [#uses=1] + br i1 %toBool7, label %bb8, label %bb13 + +bb8: ; preds = %bb5 + load i8** %name_addr, align 4 ; <i8*>:17 [#uses=1] + icmp ne i8* %17, null ; <i1>:18 [#uses=1] + zext i1 %18 to i8 ; <i8>:19 [#uses=1] + %toBool9 = icmp ne i8 %19, 0 ; <i1> [#uses=1] + br i1 %toBool9, label %bb10, label %bb11 + +bb10: ; preds = %bb8 + load i8** %name_addr, align 4 ; <i8*>:20 [#uses=1] + call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), i8* %20 ) nounwind ; <i32>:21 [#uses=0] + br label %bb11 + +bb11: ; preds = %bb10, %bb8 + %buf12 = bitcast [1024 x i8]* %buf to i8* ; <i8*> [#uses=1] + call i32 @puts( i8* %buf12 ) nounwind ; <i32>:22 [#uses=0] + br label %bb13 + +bb13: ; preds = %bb11, %bb5, %entry + %buf14 = bitcast [1024 x i8]* %buf to i8* ; <i8*> [#uses=1] + load %struct.FILE** %f_addr, align 4 ; <%struct.FILE*>:23 [#uses=1] + call i8* @fgets( i8* %buf14, i32 1024, %struct.FILE* %23 ) nounwind ; <i8*>:24 [#uses=1] + icmp ne i8* %24, null ; <i1>:25 [#uses=1] + zext i1 %25 to i8 ; <i8>:26 [#uses=1] + %toBool15 = icmp ne i8 %26, 0 ; <i1> [#uses=1] + br i1 %toBool15, label %bb, label %bb16 + +bb16: ; preds = %bb13 + br label %return + +return: ; preds = %bb16 + ret void +} + +declare i32 @strlen(i8*) nounwind readonly + +declare i32 @printf(i8*, ...) nounwind + +declare i32 @puts(i8*) + +declare i8* @fgets(i8*, i32, %struct.FILE*) + +define void @llgrep(i8* %regexp, i8* %filename) nounwind { +entry: + %regexp_addr = alloca i8* ; <i8**> [#uses=2] + %filename_addr = alloca i8* ; <i8**> [#uses=3] + %f = alloca %struct.FILE* ; <%struct.FILE**> [#uses=5] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + store i8* %regexp, i8** %regexp_addr + store i8* %filename, i8** %filename_addr + load i8** %filename_addr, align 4 ; <i8*>:0 [#uses=1] + call %struct.FILE* @fopen( i8* %0, i8* getelementptr ([2 x i8]* @"\01LC1", i32 0, i32 0) ) nounwind ; <%struct.FILE*>:1 [#uses=1] + store %struct.FILE* %1, %struct.FILE** %f, align 4 + load %struct.FILE** %f, align 4 ; <%struct.FILE*>:2 [#uses=1] + icmp eq %struct.FILE* %2, null ; <i1>:3 [#uses=1] + zext i1 %3 to i8 ; <i8>:4 [#uses=1] + %toBool = icmp ne i8 %4, 0 ; <i1> [#uses=1] + br i1 %toBool, label %bb, label %bb1 + +bb: ; preds = %entry + load %struct.FILE** @__stderrp, align 4 ; <%struct.FILE*>:5 [#uses=1] + load i8** %filename_addr, align 4 ; <i8*>:6 [#uses=1] + call i32 (%struct.FILE*, i8*, ...)* @fprintf( %struct.FILE* %5, i8* getelementptr ([15 x i8]* @"\01LC2", i32 0, i32 0), i8* %6 ) nounwind ; <i32>:7 [#uses=0] + br label %bb1 + +bb1: ; preds = %bb, %entry + load %struct.FILE** %f, align 4 ; <%struct.FILE*>:8 [#uses=1] + call i32 @setvbuf( %struct.FILE* %8, i8* getelementptr ([1048576 x i8]* @readbuf, i32 0, i32 0), i32 0, i32 1048576 ) nounwind ; <i32>:9 [#uses=0] + load i8** %regexp_addr, align 4 ; <i8*>:10 [#uses=1] + load %struct.FILE** %f, align 4 ; <%struct.FILE*>:11 [#uses=1] + call void @grep( i8* %10, %struct.FILE* %11, i8* null ) nounwind + load %struct.FILE** %f, align 4 ; <%struct.FILE*>:12 [#uses=1] + call i32 @fclose( %struct.FILE* %12 ) nounwind ; <i32>:13 [#uses=0] + br label %return + +return: ; preds = %bb1 + ret void +} + +declare %struct.FILE* @fopen(i8*, i8*) + +declare i32 @fprintf(%struct.FILE*, i8*, ...) nounwind + +declare i32 @setvbuf(%struct.FILE*, i8*, i32, i32) + +declare i32 @fclose(%struct.FILE*) + +define void @llgrep_with_name(i8* %regexp, i8* %filename) nounwind { +entry: + %regexp_addr = alloca i8* ; <i8**> [#uses=2] + %filename_addr = alloca i8* ; <i8**> [#uses=4] + %f = alloca %struct.FILE* ; <%struct.FILE**> [#uses=5] + %nmatch = alloca i32 ; <i32*> [#uses=1] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + store i8* %regexp, i8** %regexp_addr + store i8* %filename, i8** %filename_addr + store i32 0, i32* %nmatch, align 4 + load i8** %filename_addr, align 4 ; <i8*>:0 [#uses=1] + call %struct.FILE* @fopen( i8* %0, i8* getelementptr ([2 x i8]* @"\01LC1", i32 0, i32 0) ) nounwind ; <%struct.FILE*>:1 [#uses=1] + store %struct.FILE* %1, %struct.FILE** %f, align 4 + load %struct.FILE** %f, align 4 ; <%struct.FILE*>:2 [#uses=1] + icmp eq %struct.FILE* %2, null ; <i1>:3 [#uses=1] + zext i1 %3 to i8 ; <i8>:4 [#uses=1] + %toBool = icmp ne i8 %4, 0 ; <i1> [#uses=1] + br i1 %toBool, label %bb, label %bb1 + +bb: ; preds = %entry + load %struct.FILE** @__stderrp, align 4 ; <%struct.FILE*>:5 [#uses=1] + load i8** %filename_addr, align 4 ; <i8*>:6 [#uses=1] + call i32 (%struct.FILE*, i8*, ...)* @fprintf( %struct.FILE* %5, i8* getelementptr ([15 x i8]* @"\01LC2", i32 0, i32 0), i8* %6 ) nounwind ; <i32>:7 [#uses=0] + br label %bb1 + +bb1: ; preds = %bb, %entry + load %struct.FILE** %f, align 4 ; <%struct.FILE*>:8 [#uses=1] + call i32 @setvbuf( %struct.FILE* %8, i8* getelementptr ([1048576 x i8]* @readbuf, i32 0, i32 0), i32 0, i32 1048576 ) nounwind ; <i32>:9 [#uses=0] + load i8** %regexp_addr, align 4 ; <i8*>:10 [#uses=1] + load %struct.FILE** %f, align 4 ; <%struct.FILE*>:11 [#uses=1] + load i8** %filename_addr, align 4 ; <i8*>:12 [#uses=1] + call void @grep( i8* %10, %struct.FILE* %11, i8* %12 ) nounwind + load %struct.FILE** %f, align 4 ; <%struct.FILE*>:13 [#uses=1] + call i32 @fclose( %struct.FILE* %13 ) nounwind ; <i32>:14 [#uses=0] + br label %return + +return: ; preds = %bb1 + ret void +} + +define i32 @main(i32 %argc, i8** %argv) nounwind { +entry: + %argc_addr = alloca i32 ; <i32*> [#uses=5] + %argv_addr = alloca i8** ; <i8***> [#uses=6] + %retval = alloca i32 ; <i32*> [#uses=2] + %f = alloca %struct.FILE* ; <%struct.FILE**> [#uses=5] + %i = alloca i32 ; <i32*> [#uses=7] + alloca i32 ; <i32*>:0 [#uses=2] + %iftmp.5 = alloca i8* ; <i8**> [#uses=3] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + store i32 %argc, i32* %argc_addr + store i8** %argv, i8*** %argv_addr + load i32* %argc_addr, align 4 ; <i32>:1 [#uses=1] + icmp sle i32 %1, 1 ; <i1>:2 [#uses=1] + zext i1 %2 to i8 ; <i8>:3 [#uses=1] + %toBool = icmp ne i8 %3, 0 ; <i1> [#uses=1] + br i1 %toBool, label %bb, label %bb1 + +bb: ; preds = %entry + load %struct.FILE** @__stderrp, align 4 ; <%struct.FILE*>:4 [#uses=1] + bitcast %struct.FILE* %4 to i8* ; <i8*>:5 [#uses=1] + call i32 @"\01_fwrite$UNIX2003"( i8* getelementptr ([30 x i8]* @"\01LC3", i32 0, i32 0), i32 1, i32 29, i8* %5 ) nounwind ; <i32>:6 [#uses=0] + call void @exit( i32 0 ) noreturn nounwind + unreachable + +bb1: ; preds = %entry + load i32* %argc_addr, align 4 ; <i32>:7 [#uses=1] + icmp eq i32 %7, 2 ; <i1>:8 [#uses=1] + zext i1 %8 to i8 ; <i8>:9 [#uses=1] + %toBool2 = icmp ne i8 %9, 0 ; <i1> [#uses=1] + br i1 %toBool2, label %bb3, label %bb4 + +bb3: ; preds = %bb1 + load %struct.FILE** @__stdinp, align 4 ; <%struct.FILE*>:10 [#uses=1] + load i8*** %argv_addr, align 4 ; <i8**>:11 [#uses=1] + getelementptr i8** %11, i32 1 ; <i8**>:12 [#uses=1] + load i8** %12, align 4 ; <i8*>:13 [#uses=1] + call void @grep( i8* %13, %struct.FILE* %10, i8* null ) nounwind + br label %bb16 + +bb4: ; preds = %bb1 + store i32 2, i32* %i, align 4 + br label %bb14 + +bb5: ; preds = %bb14 + load i8*** %argv_addr, align 4 ; <i8**>:14 [#uses=1] + load i32* %i, align 4 ; <i32>:15 [#uses=1] + getelementptr i8** %14, i32 %15 ; <i8**>:16 [#uses=1] + load i8** %16, align 4 ; <i8*>:17 [#uses=1] + call %struct.FILE* @fopen( i8* %17, i8* getelementptr ([2 x i8]* @"\01LC1", i32 0, i32 0) ) nounwind ; <%struct.FILE*>:18 [#uses=1] + store %struct.FILE* %18, %struct.FILE** %f, align 4 + load %struct.FILE** %f, align 4 ; <%struct.FILE*>:19 [#uses=1] + icmp eq %struct.FILE* %19, null ; <i1>:20 [#uses=1] + zext i1 %20 to i8 ; <i8>:21 [#uses=1] + %toBool6 = icmp ne i8 %21, 0 ; <i1> [#uses=1] + br i1 %toBool6, label %bb7, label %bb8 + +bb7: ; preds = %bb5 + load i8*** %argv_addr, align 4 ; <i8**>:22 [#uses=1] + load i32* %i, align 4 ; <i32>:23 [#uses=1] + getelementptr i8** %22, i32 %23 ; <i8**>:24 [#uses=1] + load i8** %24, align 4 ; <i8*>:25 [#uses=1] + load %struct.FILE** @__stderrp, align 4 ; <%struct.FILE*>:26 [#uses=1] + call i32 (%struct.FILE*, i8*, ...)* @fprintf( %struct.FILE* %26, i8* getelementptr ([15 x i8]* @"\01LC2", i32 0, i32 0), i8* %25 ) nounwind ; <i32>:27 [#uses=0] + br label %bb13 + +bb8: ; preds = %bb5 + load %struct.FILE** %f, align 4 ; <%struct.FILE*>:28 [#uses=1] + call i32 @setvbuf( %struct.FILE* %28, i8* getelementptr ([1048576 x i8]* @readbuf, i32 0, i32 0), i32 0, i32 1048576 ) nounwind ; <i32>:29 [#uses=0] + load i32* %argc_addr, align 4 ; <i32>:30 [#uses=1] + icmp sgt i32 %30, 3 ; <i1>:31 [#uses=1] + zext i1 %31 to i8 ; <i8>:32 [#uses=1] + %toBool9 = icmp ne i8 %32, 0 ; <i1> [#uses=1] + br i1 %toBool9, label %bb10, label %bb11 + +bb10: ; preds = %bb8 + load i8*** %argv_addr, align 4 ; <i8**>:33 [#uses=1] + load i32* %i, align 4 ; <i32>:34 [#uses=1] + getelementptr i8** %33, i32 %34 ; <i8**>:35 [#uses=1] + load i8** %35, align 4 ; <i8*>:36 [#uses=1] + store i8* %36, i8** %iftmp.5, align 4 + br label %bb12 + +bb11: ; preds = %bb8 + store i8* null, i8** %iftmp.5, align 4 + br label %bb12 + +bb12: ; preds = %bb11, %bb10 + load i8*** %argv_addr, align 4 ; <i8**>:37 [#uses=1] + getelementptr i8** %37, i32 1 ; <i8**>:38 [#uses=1] + load i8** %38, align 4 ; <i8*>:39 [#uses=1] + load %struct.FILE** %f, align 4 ; <%struct.FILE*>:40 [#uses=1] + load i8** %iftmp.5, align 4 ; <i8*>:41 [#uses=1] + call void @grep( i8* %39, %struct.FILE* %40, i8* %41 ) nounwind + load %struct.FILE** %f, align 4 ; <%struct.FILE*>:42 [#uses=1] + call i32 @fclose( %struct.FILE* %42 ) nounwind ; <i32>:43 [#uses=0] + br label %bb13 + +bb13: ; preds = %bb12, %bb7 + load i32* %i, align 4 ; <i32>:44 [#uses=1] + add i32 %44, 1 ; <i32>:45 [#uses=1] + store i32 %45, i32* %i, align 4 + br label %bb14 + +bb14: ; preds = %bb13, %bb4 + load i32* %i, align 4 ; <i32>:46 [#uses=1] + load i32* %argc_addr, align 4 ; <i32>:47 [#uses=1] + icmp slt i32 %46, %47 ; <i1>:48 [#uses=1] + zext i1 %48 to i8 ; <i8>:49 [#uses=1] + %toBool15 = icmp ne i8 %49, 0 ; <i1> [#uses=1] + br i1 %toBool15, label %bb5, label %bb16 + +bb16: ; preds = %bb14, %bb3 + store i32 0, i32* %0, align 4 + load i32* %0, align 4 ; <i32>:50 [#uses=1] + store i32 %50, i32* %retval, align 4 + br label %return + +return: ; preds = %bb16 + %retval17 = load i32* %retval ; <i32> [#uses=1] + ret i32 %retval17 +} + +declare i32 @"\01_fwrite$UNIX2003"(i8*, i32, i32, i8*) + +declare void @exit(i32) noreturn nounwind
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/translator/template/grep.ll.c Sun Aug 08 04:13:14 2010 +0900 @@ -0,0 +1,89 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +#define LINEBUFSIZE 1024 +#define READBUFSIZE 1024*1024 +char readbuf[READBUFSIZE]; + +extern int DFA(char* text); + +int match(char *regexp, char *text) { + if (regexp[0] == '^') + return DFA(text); + do { + if (DFA(text)) + return 1; + } while (*text++ != '\0'); + return 0; +} + +void grep(char *regexp, FILE *f, char *name) { + int n; + char buf[LINEBUFSIZE]; + while (fgets(buf, sizeof buf, f) != NULL) { + n = strlen(buf); + if (n > 0 && buf[n-1] == '\n') + buf[n-1] = '\0'; + if (match(regexp, buf)) { + if (name != NULL) + printf("%s:", name); + printf("%s\n", buf); + } + } + return; +} + +void llgrep(char *regexp, char* filename) { + FILE *f; + f = fopen(filename, "r"); + if (f == NULL) { + fprintf(stderr, "can't open %s:", filename); + } + if (READBUFSIZE > 0) + setvbuf(f, readbuf, _IOFBF, READBUFSIZE); + grep(regexp, f, NULL); + fclose(f); + return; +} + +void llgrep_with_name(char *regexp, char* filename) { + int nmatch = 0; + FILE *f; + f = fopen(filename, "r"); + if (f == NULL) { + fprintf(stderr, "can't open %s:", filename); + } + if (READBUFSIZE > 0) + setvbuf(f, readbuf, _IOFBF, READBUFSIZE); + grep(regexp, f, filename); + fclose(f); + return; +} + +int main(int argc, char* argv[]) { + int i; + FILE *f; + + if (argc < 2) { + fprintf(stderr, "usage: grep regexp [file ...]"); + exit(0); + } + if (argc == 2) { + grep(argv[1], stdin, NULL); + } else { + for (i = 2; i < argc; i++) { + f = fopen(argv[i], "r"); + if (f == NULL) { + fprintf(stderr, "can't open %s:", argv[i]); + continue; + } + if (READBUFSIZE > 0) + setvbuf(f, readbuf, _IOFBF, READBUFSIZE); + grep(argv[1], f, argc > 3 ? argv[i] : NULL); + fclose(f); + } + } + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/translator/template/grep.ll.c~ Sun Aug 08 04:13:14 2010 +0900 @@ -0,0 +1,89 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +#define LINEBUFSIZE 1024 +#define READBUFSIZE 1024*1024 +char readbuf[READBUFSIZE]; + +extern int DFA(char* text); + +int match(char *regexp, char *text) { + if (regexp[0] == '^') + return DFA(text); + do { + if (DFA(text)) + return 1; + } while (text++ != '\0'); + return 0; +} + +void grep(char *regexp, FILE *f, char *name) { + int n; + char buf[LINEBUFSIZE]; + while (fgets(buf, sizeof buf, f) != NULL) { + n = strlen(buf); + if (n > 0 && buf[n-1] == '\n') + buf[n-1] = '\0'; + if (match(regexp, buf)) { + if (name != NULL) + printf("%s:", name); + printf("%s\n", buf); + } + } + return; +} + +void llgrep(char *regexp, char* filename) { + FILE *f; + f = fopen(filename, "r"); + if (f == NULL) { + fprintf(stderr, "can't open %s:", filename); + } + if (READBUFSIZE > 0) + setvbuf(f, readbuf, _IOFBF, READBUFSIZE); + grep(regexp, f, NULL); + fclose(f); + return; +} + +void llgrep_with_name(char *regexp, char* filename) { + int nmatch = 0; + FILE *f; + f = fopen(filename, "r"); + if (f == NULL) { + fprintf(stderr, "can't open %s:", filename); + } + if (READBUFSIZE > 0) + setvbuf(f, readbuf, _IOFBF, READBUFSIZE); + grep(regexp, f, filename); + fclose(f); + return; +} + +int main(int argc, char* argv[]) { + int i; + FILE *f; + + if (argc < 2) { + fprintf(stderr, "usage: grep regexp [file ...]"); + exit(0); + } + if (argc == 2) { + grep(argv[1], stdin, NULL); + } else { + for (i = 2; i < argc; i++) { + f = fopen(argv[i], "r"); + if (f == NULL) { + fprintf(stderr, "can't open %s:", argv[i]); + continue; + } + if (READBUFSIZE > 0) + setvbuf(f, readbuf, _IOFBF, READBUFSIZE); + grep(argv[1], f, argc > 3 ? argv[i] : NULL); + fclose(f); + } + } + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/translator/template/grep.ll~ Sun Aug 08 04:13:14 2010 +0900 @@ -0,0 +1,284 @@ +; ModuleID = 'template/grep.ll.c' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin9" + %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } + %struct.__sFILEX = type opaque + %struct.__sbuf = type { i8*, i32 } +@"\01LC" = internal constant [4 x i8] c"%s:\00" ; <[4 x i8]*> [#uses=1] +@__stderrp = external global %struct.FILE* ; <%struct.FILE**> [#uses=2] +@"\01LC1" = internal constant [30 x i8] c"usage: grep regexp [file ...]\00" ; <[30 x i8]*> [#uses=1] +@__stdinp = external global %struct.FILE* ; <%struct.FILE**> [#uses=1] +@"\01LC2" = internal constant [2 x i8] c"r\00" ; <[2 x i8]*> [#uses=1] +@"\01LC3" = internal constant [15 x i8] c"can't open %s:\00" ; <[15 x i8]*> [#uses=1] +@readbuf = common global [1048576 x i8] zeroinitializer, align 32 ; <[1048576 x i8]*> [#uses=1] + +define i32 @match(i8* %regexp, i8* %text) nounwind { +entry: + %regexp_addr = alloca i8* ; <i8**> [#uses=1] + %text_addr = alloca i8* ; <i8**> [#uses=1] + %retval = alloca i32 ; <i32*> [#uses=2] + alloca i32 ; <i32*>:0 [#uses=2] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + store i8* %regexp, i8** %regexp_addr + store i8* %text, i8** %text_addr + store i32 1, i32* %0, align 4 + load i32* %0, align 4 ; <i32>:1 [#uses=1] + store i32 %1, i32* %retval, align 4 + br label %return + +return: ; preds = %entry + %retval1 = load i32* %retval ; <i32> [#uses=1] + ret i32 %retval1 +} + +define i32 @grep(i8* %regexp, %struct.FILE* %f, i8* %name) nounwind { +entry: + %regexp_addr = alloca i8* ; <i8**> [#uses=2] + %f_addr = alloca %struct.FILE* ; <%struct.FILE**> [#uses=2] + %name_addr = alloca i8* ; <i8**> [#uses=3] + %retval = alloca i32 ; <i32*> [#uses=2] + %buf = alloca [1024 x i8] ; <[1024 x i8]*> [#uses=6] + %nmatch = alloca i32 ; <i32*> [#uses=4] + %n = alloca i32 ; <i32*> [#uses=4] + alloca i32 ; <i32*>:0 [#uses=2] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + store i8* %regexp, i8** %regexp_addr + store %struct.FILE* %f, %struct.FILE** %f_addr + store i8* %name, i8** %name_addr + store i32 0, i32* %nmatch, align 4 + br label %bb13 + +bb: ; preds = %bb13 + %buf1 = bitcast [1024 x i8]* %buf to i8* ; <i8*> [#uses=1] + call i32 @strlen( i8* %buf1 ) nounwind readonly ; <i32>:1 [#uses=1] + store i32 %1, i32* %n, align 4 + load i32* %n, align 4 ; <i32>:2 [#uses=1] + icmp sgt i32 %2, 0 ; <i1>:3 [#uses=1] + zext i1 %3 to i8 ; <i8>:4 [#uses=1] + %toBool = icmp ne i8 %4, 0 ; <i1> [#uses=1] + br i1 %toBool, label %bb2, label %bb5 + +bb2: ; preds = %bb + load i32* %n, align 4 ; <i32>:5 [#uses=1] + sub i32 %5, 1 ; <i32>:6 [#uses=1] + getelementptr [1024 x i8]* %buf, i32 0, i32 %6 ; <i8*>:7 [#uses=1] + load i8* %7, align 1 ; <i8>:8 [#uses=1] + icmp eq i8 %8, 10 ; <i1>:9 [#uses=1] + zext i1 %9 to i8 ; <i8>:10 [#uses=1] + %toBool3 = icmp ne i8 %10, 0 ; <i1> [#uses=1] + br i1 %toBool3, label %bb4, label %bb5 + +bb4: ; preds = %bb2 + load i32* %n, align 4 ; <i32>:11 [#uses=1] + sub i32 %11, 1 ; <i32>:12 [#uses=1] + getelementptr [1024 x i8]* %buf, i32 0, i32 %12 ; <i8*>:13 [#uses=1] + store i8 0, i8* %13, align 1 + br label %bb5 + +bb5: ; preds = %bb4, %bb2, %bb + load i8** %regexp_addr, align 4 ; <i8*>:14 [#uses=1] + %buf6 = bitcast [1024 x i8]* %buf to i8* ; <i8*> [#uses=1] + call i32 @match( i8* %14, i8* %buf6 ) nounwind ; <i32>:15 [#uses=1] + icmp ne i32 %15, 0 ; <i1>:16 [#uses=1] + zext i1 %16 to i8 ; <i8>:17 [#uses=1] + %toBool7 = icmp ne i8 %17, 0 ; <i1> [#uses=1] + br i1 %toBool7, label %bb8, label %bb13 + +bb8: ; preds = %bb5 + load i32* %nmatch, align 4 ; <i32>:18 [#uses=1] + add i32 %18, 1 ; <i32>:19 [#uses=1] + store i32 %19, i32* %nmatch, align 4 + load i8** %name_addr, align 4 ; <i8*>:20 [#uses=1] + icmp ne i8* %20, null ; <i1>:21 [#uses=1] + zext i1 %21 to i8 ; <i8>:22 [#uses=1] + %toBool9 = icmp ne i8 %22, 0 ; <i1> [#uses=1] + br i1 %toBool9, label %bb10, label %bb11 + +bb10: ; preds = %bb8 + load i8** %name_addr, align 4 ; <i8*>:23 [#uses=1] + call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), i8* %23 ) nounwind ; <i32>:24 [#uses=0] + br label %bb11 + +bb11: ; preds = %bb10, %bb8 + %buf12 = bitcast [1024 x i8]* %buf to i8* ; <i8*> [#uses=1] + call i32 @puts( i8* %buf12 ) nounwind ; <i32>:25 [#uses=0] + br label %bb13 + +bb13: ; preds = %bb11, %bb5, %entry + %buf14 = bitcast [1024 x i8]* %buf to i8* ; <i8*> [#uses=1] + load %struct.FILE** %f_addr, align 4 ; <%struct.FILE*>:26 [#uses=1] + call i8* @fgets( i8* %buf14, i32 1024, %struct.FILE* %26 ) nounwind ; <i8*>:27 [#uses=1] + icmp ne i8* %27, null ; <i1>:28 [#uses=1] + zext i1 %28 to i8 ; <i8>:29 [#uses=1] + %toBool15 = icmp ne i8 %29, 0 ; <i1> [#uses=1] + br i1 %toBool15, label %bb, label %bb16 + +bb16: ; preds = %bb13 + load i32* %nmatch, align 4 ; <i32>:30 [#uses=1] + store i32 %30, i32* %0, align 4 + load i32* %0, align 4 ; <i32>:31 [#uses=1] + store i32 %31, i32* %retval, align 4 + br label %return + +return: ; preds = %bb16 + %retval17 = load i32* %retval ; <i32> [#uses=1] + ret i32 %retval17 +} + +declare i32 @strlen(i8*) nounwind readonly + +declare i32 @printf(i8*, ...) nounwind + +declare i32 @puts(i8*) + +declare i8* @fgets(i8*, i32, %struct.FILE*) + +define i32 @grepmain(i32 %argc, i8** %argv) nounwind { +entry: + %argc_addr = alloca i32 ; <i32*> [#uses=5] + %argv_addr = alloca i8** ; <i8***> [#uses=6] + %retval = alloca i32 ; <i32*> [#uses=2] + %f = alloca %struct.FILE* ; <%struct.FILE**> [#uses=5] + %nmatch = alloca i32 ; <i32*> [#uses=4] + %i = alloca i32 ; <i32*> [#uses=7] + alloca i32 ; <i32*>:0 [#uses=2] + %iftmp.3 = alloca i8* ; <i8**> [#uses=3] + %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] + store i32 %argc, i32* %argc_addr + store i8** %argv, i8*** %argv_addr + load i32* %argc_addr, align 4 ; <i32>:1 [#uses=1] + icmp sle i32 %1, 1 ; <i1>:2 [#uses=1] + zext i1 %2 to i8 ; <i8>:3 [#uses=1] + %toBool = icmp ne i8 %3, 0 ; <i1> [#uses=1] + br i1 %toBool, label %bb, label %bb1 + +bb: ; preds = %entry + load %struct.FILE** @__stderrp, align 4 ; <%struct.FILE*>:4 [#uses=1] + bitcast %struct.FILE* %4 to i8* ; <i8*>:5 [#uses=1] + call i32 @"\01_fwrite$UNIX2003"( i8* getelementptr ([30 x i8]* @"\01LC1", i32 0, i32 0), i32 1, i32 29, i8* %5 ) nounwind ; <i32>:6 [#uses=0] + call void @exit( i32 0 ) noreturn nounwind + unreachable + +bb1: ; preds = %entry + store i32 0, i32* %nmatch, align 4 + load i32* %argc_addr, align 4 ; <i32>:7 [#uses=1] + icmp eq i32 %7, 2 ; <i1>:8 [#uses=1] + zext i1 %8 to i8 ; <i8>:9 [#uses=1] + %toBool2 = icmp ne i8 %9, 0 ; <i1> [#uses=1] + br i1 %toBool2, label %bb3, label %bb4 + +bb3: ; preds = %bb1 + load %struct.FILE** @__stdinp, align 4 ; <%struct.FILE*>:10 [#uses=1] + load i8*** %argv_addr, align 4 ; <i8**>:11 [#uses=1] + getelementptr i8** %11, i32 1 ; <i8**>:12 [#uses=1] + load i8** %12, align 4 ; <i8*>:13 [#uses=1] + call i32 @grep( i8* %13, %struct.FILE* %10, i8* null ) nounwind ; <i32>:14 [#uses=0] + br label %bb19 + +bb4: ; preds = %bb1 + store i32 2, i32* %i, align 4 + br label %bb17 + +bb5: ; preds = %bb17 + load i8*** %argv_addr, align 4 ; <i8**>:15 [#uses=1] + load i32* %i, align 4 ; <i32>:16 [#uses=1] + getelementptr i8** %15, i32 %16 ; <i8**>:17 [#uses=1] + load i8** %17, align 4 ; <i8*>:18 [#uses=1] + call %struct.FILE* @fopen( i8* %18, i8* getelementptr ([2 x i8]* @"\01LC2", i32 0, i32 0) ) nounwind ; <%struct.FILE*>:19 [#uses=1] + store %struct.FILE* %19, %struct.FILE** %f, align 4 + load %struct.FILE** %f, align 4 ; <%struct.FILE*>:20 [#uses=1] + icmp eq %struct.FILE* %20, null ; <i1>:21 [#uses=1] + zext i1 %21 to i8 ; <i8>:22 [#uses=1] + %toBool6 = icmp ne i8 %22, 0 ; <i1> [#uses=1] + br i1 %toBool6, label %bb7, label %bb8 + +bb7: ; preds = %bb5 + load i8*** %argv_addr, align 4 ; <i8**>:23 [#uses=1] + load i32* %i, align 4 ; <i32>:24 [#uses=1] + getelementptr i8** %23, i32 %24 ; <i8**>:25 [#uses=1] + load i8** %25, align 4 ; <i8*>:26 [#uses=1] + load %struct.FILE** @__stderrp, align 4 ; <%struct.FILE*>:27 [#uses=1] + call i32 (%struct.FILE*, i8*, ...)* @fprintf( %struct.FILE* %27, i8* getelementptr ([15 x i8]* @"\01LC3", i32 0, i32 0), i8* %26 ) nounwind ; <i32>:28 [#uses=0] + br label %bb16 + +bb8: ; preds = %bb5 + load %struct.FILE** %f, align 4 ; <%struct.FILE*>:29 [#uses=1] + call i32 @setvbuf( %struct.FILE* %29, i8* getelementptr ([1048576 x i8]* @readbuf, i32 0, i32 0), i32 0, i32 1048576 ) nounwind ; <i32>:30 [#uses=0] + load i32* %argc_addr, align 4 ; <i32>:31 [#uses=1] + icmp sgt i32 %31, 3 ; <i1>:32 [#uses=1] + zext i1 %32 to i8 ; <i8>:33 [#uses=1] + %toBool9 = icmp ne i8 %33, 0 ; <i1> [#uses=1] + br i1 %toBool9, label %bb10, label %bb11 + +bb10: ; preds = %bb8 + load i8*** %argv_addr, align 4 ; <i8**>:34 [#uses=1] + load i32* %i, align 4 ; <i32>:35 [#uses=1] + getelementptr i8** %34, i32 %35 ; <i8**>:36 [#uses=1] + load i8** %36, align 4 ; <i8*>:37 [#uses=1] + store i8* %37, i8** %iftmp.3, align 4 + br label %bb12 + +bb11: ; preds = %bb8 + store i8* null, i8** %iftmp.3, align 4 + br label %bb12 + +bb12: ; preds = %bb11, %bb10 + load i8*** %argv_addr, align 4 ; <i8**>:38 [#uses=1] + getelementptr i8** %38, i32 1 ; <i8**>:39 [#uses=1] + load i8** %39, align 4 ; <i8*>:40 [#uses=1] + load %struct.FILE** %f, align 4 ; <%struct.FILE*>:41 [#uses=1] + load i8** %iftmp.3, align 4 ; <i8*>:42 [#uses=1] + call i32 @grep( i8* %40, %struct.FILE* %41, i8* %42 ) nounwind ; <i32>:43 [#uses=1] + icmp sgt i32 %43, 0 ; <i1>:44 [#uses=1] + zext i1 %44 to i8 ; <i8>:45 [#uses=1] + %toBool13 = icmp ne i8 %45, 0 ; <i1> [#uses=1] + br i1 %toBool13, label %bb14, label %bb15 + +bb14: ; preds = %bb12 + load i32* %nmatch, align 4 ; <i32>:46 [#uses=1] + add i32 %46, 1 ; <i32>:47 [#uses=1] + store i32 %47, i32* %nmatch, align 4 + br label %bb15 + +bb15: ; preds = %bb14, %bb12 + load %struct.FILE** %f, align 4 ; <%struct.FILE*>:48 [#uses=1] + call i32 @fclose( %struct.FILE* %48 ) nounwind ; <i32>:49 [#uses=0] + br label %bb16 + +bb16: ; preds = %bb15, %bb7 + load i32* %i, align 4 ; <i32>:50 [#uses=1] + add i32 %50, 1 ; <i32>:51 [#uses=1] + store i32 %51, i32* %i, align 4 + br label %bb17 + +bb17: ; preds = %bb16, %bb4 + load i32* %i, align 4 ; <i32>:52 [#uses=1] + load i32* %argc_addr, align 4 ; <i32>:53 [#uses=1] + icmp slt i32 %52, %53 ; <i1>:54 [#uses=1] + zext i1 %54 to i8 ; <i8>:55 [#uses=1] + %toBool18 = icmp ne i8 %55, 0 ; <i1> [#uses=1] + br i1 %toBool18, label %bb5, label %bb19 + +bb19: ; preds = %bb17, %bb3 + load i32* %nmatch, align 4 ; <i32>:56 [#uses=1] + store i32 %56, i32* %0, align 4 + load i32* %0, align 4 ; <i32>:57 [#uses=1] + store i32 %57, i32* %retval, align 4 + br label %return + +return: ; preds = %bb19 + %retval20 = load i32* %retval ; <i32> [#uses=1] + ret i32 %retval20 +} + +declare i32 @"\01_fwrite$UNIX2003"(i8*, i32, i32, i8*) + +declare void @exit(i32) noreturn nounwind + +declare %struct.FILE* @fopen(i8*, i8*) + +declare i32 @fprintf(%struct.FILE*, i8*, ...) nounwind + +declare i32 @setvbuf(%struct.FILE*, i8*, i32, i32) + +declare i32 @fclose(%struct.FILE*)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pyrect/translator/translator.py Sun Aug 08 04:13:14 2010 +0900 @@ -0,0 +1,58 @@ +#!/usr/bin/env python + +import sys + +class Translator(object): + def __init__(self, regexp): + self.regexp = regexp + self.stream = None + self.__indent = 0 + self.tab = " " + + def indent(self): + self.__indent += 1 + + def dedent(self): + if self.__indent > 0: + self.__indent -= 1 + + def emit(self, string='', ret=1): + self.emit0(string+"\n"*ret) + + def emiti(self, *arg): + self.emit(*arg) + self.indent() + + def emitd(self, *arg): + self.dedent() + self.emit(*arg) + + def emit0(self, string): + self.stream.write(self.tab * self.__indent + string) + + def state_name(self, state_name): + return str(state_name) + + def emit_from_callgraph(self): + pass + + def translate(self, stream=sys.stdout): + self.stream = stream + self.emit_from_callgraph() + + class stmt(object): + def __init__(self, stream=sys.stdout): + self.indent = indent + def __enter__(self): + pass + def __exit__(self, *arg): + pass + + class global_stmt(stmt): + pass + + class state_stmt(stmt): + pass + + class transition_stmt(stmt): + pass