comparison src/llvm_translator.py @ 18:ec36e784df2e

add LLVMTranslator(Translator) (and remove reg2llvm.py), add --LLVM option to converter.py.
author Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp>
date Mon, 05 Jul 2010 08:36:11 +0900
parents
children a24acddedf89
comparison
equal deleted inserted replaced
17:5ff3f1efa76a 18:ec36e784df2e
1 #!/usr/bin/env python
2
3 from llvm.core import *
4 from llvm.passes import *
5 from llvm.ee import *
6 from translator import Translator
7 from dfareg import Regexp, CallGraph
8
9 class LLVMTranslator(Translator):
10 """LLVMTranslator
11 This Class can translate from DFA or NFA into LLVM-IR.
12 and also can JIT-Compile/evaluate it's self using llvm-py.
13 >>> string = '(A|B)*C'
14 >>> reg = Regexp(string)
15 >>> dfacg = CallGraph(reg.dfa)
16 >>> lt = LLVMTranslator(string, dfacg)
17 >>> lt.translate()
18 >>> isinstance(lt.execute(), llvm.ee.GenericValue)
19 True
20 """
21 # define llvm core types, and const
22 int_t = Type.int()
23 char_t = Type.int(8)
24 charptr_t = Type.pointer(char_t)
25 charptrptr_t = Type.pointer(charptr_t)
26 const_zero = Constant.int(int_t, 0)
27 const_one = Constant.int(int_t, 1)
28 llvm.GuaranteedTailCallOpt = True
29
30 def __init__(self, regexp, cg): #(self, modName, regexp, string, self.impl_label, optimized, debug):
31 Translator.__init__(self, regexp, cg)
32 self.mod = Module.new(self.cg.type)
33 self.optimize = False
34 self.debug = False
35 self.impl_label = False
36 self.matchp_str = self.new_str_const("ABC")
37 self.debug_str = self.new_str_const("state: %s, arg: %c(int %d)\n")
38 if self.cg.type == "DFA":
39 self.name_hash = self.create_name_hash()
40
41 def modify_state_name(self, state_name):
42 if self.cg.type == "DFA":
43 return self.name_hash[state_name]
44 else:
45 return state_name
46
47 def emit_from_callgraph(self):
48 def optional_func_decl(fun):
49 fun.calling_convertion = CC_X86_FASTCALL
50 fun.args[0].name = "index"
51
52 def func_decl(state):
53 optional_func_decl(state)
54
55 state_ref = dict()
56 main = self.mod.add_function(
57 Type.function(self.int_t, (self.int_t,)), "main")
58 optional_func_decl(main)
59 main_entry = main.append_basic_block("entry")
60
61 if self.impl_label:
62 accept_state = main.append_basic_block("accpet")
63 reject_state = main.append_basic_block("reject")
64 index_ptr = Builder.new(main_entry).malloc(self.int_t)
65 Builder.new(accept_state).free(index_ptr)
66 Builder.new(reject_state).free(index_ptr)
67 else:
68 # Create function - accept and reject (final state).
69 accept_state = self.mod.add_function(
70 Type.function(self.int_t, (self.int_t,)), "accept")
71 optional_func_decl(accept_state)
72 reject_state = self.mod.add_function(
73 Type.function(self.int_t, (self.int_t,)), "reject")
74 optional_func_decl(reject_state)
75
76
77 state_ref["accept"] = accept_state
78 state_ref["reject"] = reject_state
79
80 # add state to module, (as function or label).
81 if (self.impl_label):
82 for state in self.cg.map.iterkeys():
83 label = main.append_basic_block(state)
84 state_ref[state] = label
85 else:
86 for state in self.cg.map.iterkeys():
87 fun = self.mod.add_function(
88 Type.function(self.int_t, (self.int_t,)), state)
89 optional_func_decl(fun)
90 state_ref[state] = fun
91
92 # emit instructions
93 if (self.impl_label): emit = Builder.new(accept_state)
94 else: emit = Builder.new(accept_state.append_basic_block("entry"))
95
96 self.emit_call_printf(emit, "%s does match regexp\n", self.gep_first(emit, self.matchp_str))
97 emit.ret(self.const_one)
98
99 if (self.impl_label): emit = Builder.new(reject_state)
100 else: emit = Builder.new(reject_state.append_basic_block("entry"))
101 self.emit_call_printf(emit, "%s does not match regexp\n", self.gep_first(emit, self.matchp_str))
102 emit.ret(self.const_zero)
103
104 if (self.impl_label):
105 # emit transition instruction with jump instruction
106 emit = Builder.new(main_entry)
107 emit.store(main.args[0], index_ptr)
108 emit.branch(state_ref[self.cg.start])
109
110 for state, transition in self.cg.map.iteritems():
111 emit = Builder.new(state_ref[state])
112 index = emit.load(index_ptr)
113 ptr = emit.gep(self.matchp_str, (self.const_zero, index))
114 emit.store(emit.add(self.const_one, index), index_ptr)
115 char = emit.load(ptr)
116 si = emit.switch(char, state_ref['reject'], len(transition))
117 local_label = 0
118 for case, next_states in transition.iteritems():
119 bb = main.append_basic_block("%s_case%d" % (state, local_label)) #create default bb
120 emit = Builder.new(bb)
121 emit.branch(state_ref[next_states[0]])
122 si.add_case(self.char_const(case), bb)
123 local_label += 1
124 else:
125 for state, transition in self.cg.map.iteritems():
126 cases = dict()
127 for case, next_states in transition.iteritems():
128 cases[self.char_const(case)] = state_ref[next_states[0]]
129 state_fun = state_ref[state]
130 emit = Builder.new(state_fun.append_basic_block("entry"))
131 ptr = emit.gep(self.matchp_str, (self.const_zero, state_fun.args[0]))
132 next_index = emit.add(state_fun.args[0], self.const_one)
133 char = emit.load(ptr)
134
135 if (self.debug): self.emit_call_printf(emit, self.debug_str, self.gep_first(emit, self.new_str_const(fun.name)), char, char)
136
137 label = 0
138 default_bb = state_fun.append_basic_block("default") #create default bb
139 builder = Builder.new(default_bb) # default is reject.
140 ret = builder.call(reject_state, (next_index,))
141 builder.ret(ret)
142
143 si = emit.switch(char, default_bb, len(cases)) # create switch instruction with deafult case.
144 for case, nextFun in cases.iteritems():
145 bb = state_fun.append_basic_block("case%d" % label) #create default bb
146 builder = Builder.new(bb)
147 ret = builder.call(nextFun, (next_index,))
148 builder.ret(ret)
149 si.add_case(case, bb)
150 label += 1
151 emit = Builder.new(main_entry)
152 ret = emit.call(state_ref[self.cg.start], (main.args[0],))
153 emit.ret(ret)
154
155 self.mp = ModuleProvider.new(self.mod)
156 if (self.optimize): self.do_optimize()
157 self.ee = ExecutionEngine.new(self.mp)
158 self.main = main
159 self.emit(str(self.mod))
160
161 def get_execution_engine(self):
162 return self.ee
163
164 def do_optimize(self):
165 #optimization passes
166 pm = PassManager.new()
167 pm.add(TargetData.new(''))
168 pm.add(PASS_FUNCTION_INLINING)
169 pm.run(self.mod)
170 fp = FunctionPassManager.new(self.mp)
171 fp.add(TargetData.new(''))
172 fp.add(PASS_BLOCK_PLACEMENT)
173 fp.add(PASS_INSTRUCTION_COMBINING)
174 fp.add(PASS_TAIL_CALL_ELIMINATION)
175 fp.add(PASS_AGGRESSIVE_DCE)
176 fp.add(PASS_DEAD_INST_ELIMINATION)
177 fp.add(PASS_DEAD_CODE_ELIMINATION)
178 for fun in self.mod.functions:
179 fp.run(fun)
180
181 def print_module(self):
182 print self.mod
183
184 def execute(self):
185 return self.ee.run_function(self.main,
186 (GenericValue.int(self.int_t, 0),))
187
188 def new_str_const(self, val):
189 '''create string(array of int) as a global value '''
190 str = self.mod.add_global_variable(Type.array(self.char_t, len(val) + 1), "")
191 str.initializer = Constant.stringz(val)
192 return str
193
194 def gep_first(self, emit, val):
195 '''get pointer of array'''
196 return emit.gep(val, (self.const_zero, self.const_zero))
197
198 def char_const(self, val):
199 '''create constant int value'''
200 if isinstance(val, str):
201 if val == '\\0':
202 return Constant.int(self.char_t, 0)
203 else:
204 return Constant.int(self.char_t, ord(val))
205 else:
206 exit('char_const: invalid argument.', val)
207
208 def emit_call_printf(self, emit, string, *args):
209 '''emit libc printf function call instruction'''
210 try:
211 printf = self.mod.get_function_named("printf")
212 except llvm.LLVMException:
213 printf = self.mod.add_function(
214 Type.function(Type.void(),
215 (Type.pointer(self.char_t, 0),), 1), "printf")
216 if isinstance(string, str):
217 string = self.new_str_const(string)
218 emit.call(printf,
219 [self.gep_first(emit, string)]+list(args))
220
221 def test():
222 import doctest
223 doctest.testmod()
224
225 if __name__ == "__main__": test()