Mercurial > hg > Members > shinya > pyrect
comparison src/llvm_translator.py @ 18:ec36e784df2e
add LLVMTranslator(Translator) (and remove reg2llvm.py), add --LLVM option to converter.py.
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 05 Jul 2010 08:36:11 +0900 |
parents | |
children | a24acddedf89 |
comparison
equal
deleted
inserted
replaced
17:5ff3f1efa76a | 18:ec36e784df2e |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 from llvm.core import * | |
4 from llvm.passes import * | |
5 from llvm.ee import * | |
6 from translator import Translator | |
7 from dfareg import Regexp, CallGraph | |
8 | |
9 class LLVMTranslator(Translator): | |
10 """LLVMTranslator | |
11 This Class can translate from DFA or NFA into LLVM-IR. | |
12 and also can JIT-Compile/evaluate it's self using llvm-py. | |
13 >>> string = '(A|B)*C' | |
14 >>> reg = Regexp(string) | |
15 >>> dfacg = CallGraph(reg.dfa) | |
16 >>> lt = LLVMTranslator(string, dfacg) | |
17 >>> lt.translate() | |
18 >>> isinstance(lt.execute(), llvm.ee.GenericValue) | |
19 True | |
20 """ | |
21 # define llvm core types, and const | |
22 int_t = Type.int() | |
23 char_t = Type.int(8) | |
24 charptr_t = Type.pointer(char_t) | |
25 charptrptr_t = Type.pointer(charptr_t) | |
26 const_zero = Constant.int(int_t, 0) | |
27 const_one = Constant.int(int_t, 1) | |
28 llvm.GuaranteedTailCallOpt = True | |
29 | |
30 def __init__(self, regexp, cg): #(self, modName, regexp, string, self.impl_label, optimized, debug): | |
31 Translator.__init__(self, regexp, cg) | |
32 self.mod = Module.new(self.cg.type) | |
33 self.optimize = False | |
34 self.debug = False | |
35 self.impl_label = False | |
36 self.matchp_str = self.new_str_const("ABC") | |
37 self.debug_str = self.new_str_const("state: %s, arg: %c(int %d)\n") | |
38 if self.cg.type == "DFA": | |
39 self.name_hash = self.create_name_hash() | |
40 | |
41 def modify_state_name(self, state_name): | |
42 if self.cg.type == "DFA": | |
43 return self.name_hash[state_name] | |
44 else: | |
45 return state_name | |
46 | |
47 def emit_from_callgraph(self): | |
48 def optional_func_decl(fun): | |
49 fun.calling_convertion = CC_X86_FASTCALL | |
50 fun.args[0].name = "index" | |
51 | |
52 def func_decl(state): | |
53 optional_func_decl(state) | |
54 | |
55 state_ref = dict() | |
56 main = self.mod.add_function( | |
57 Type.function(self.int_t, (self.int_t,)), "main") | |
58 optional_func_decl(main) | |
59 main_entry = main.append_basic_block("entry") | |
60 | |
61 if self.impl_label: | |
62 accept_state = main.append_basic_block("accpet") | |
63 reject_state = main.append_basic_block("reject") | |
64 index_ptr = Builder.new(main_entry).malloc(self.int_t) | |
65 Builder.new(accept_state).free(index_ptr) | |
66 Builder.new(reject_state).free(index_ptr) | |
67 else: | |
68 # Create function - accept and reject (final state). | |
69 accept_state = self.mod.add_function( | |
70 Type.function(self.int_t, (self.int_t,)), "accept") | |
71 optional_func_decl(accept_state) | |
72 reject_state = self.mod.add_function( | |
73 Type.function(self.int_t, (self.int_t,)), "reject") | |
74 optional_func_decl(reject_state) | |
75 | |
76 | |
77 state_ref["accept"] = accept_state | |
78 state_ref["reject"] = reject_state | |
79 | |
80 # add state to module, (as function or label). | |
81 if (self.impl_label): | |
82 for state in self.cg.map.iterkeys(): | |
83 label = main.append_basic_block(state) | |
84 state_ref[state] = label | |
85 else: | |
86 for state in self.cg.map.iterkeys(): | |
87 fun = self.mod.add_function( | |
88 Type.function(self.int_t, (self.int_t,)), state) | |
89 optional_func_decl(fun) | |
90 state_ref[state] = fun | |
91 | |
92 # emit instructions | |
93 if (self.impl_label): emit = Builder.new(accept_state) | |
94 else: emit = Builder.new(accept_state.append_basic_block("entry")) | |
95 | |
96 self.emit_call_printf(emit, "%s does match regexp\n", self.gep_first(emit, self.matchp_str)) | |
97 emit.ret(self.const_one) | |
98 | |
99 if (self.impl_label): emit = Builder.new(reject_state) | |
100 else: emit = Builder.new(reject_state.append_basic_block("entry")) | |
101 self.emit_call_printf(emit, "%s does not match regexp\n", self.gep_first(emit, self.matchp_str)) | |
102 emit.ret(self.const_zero) | |
103 | |
104 if (self.impl_label): | |
105 # emit transition instruction with jump instruction | |
106 emit = Builder.new(main_entry) | |
107 emit.store(main.args[0], index_ptr) | |
108 emit.branch(state_ref[self.cg.start]) | |
109 | |
110 for state, transition in self.cg.map.iteritems(): | |
111 emit = Builder.new(state_ref[state]) | |
112 index = emit.load(index_ptr) | |
113 ptr = emit.gep(self.matchp_str, (self.const_zero, index)) | |
114 emit.store(emit.add(self.const_one, index), index_ptr) | |
115 char = emit.load(ptr) | |
116 si = emit.switch(char, state_ref['reject'], len(transition)) | |
117 local_label = 0 | |
118 for case, next_states in transition.iteritems(): | |
119 bb = main.append_basic_block("%s_case%d" % (state, local_label)) #create default bb | |
120 emit = Builder.new(bb) | |
121 emit.branch(state_ref[next_states[0]]) | |
122 si.add_case(self.char_const(case), bb) | |
123 local_label += 1 | |
124 else: | |
125 for state, transition in self.cg.map.iteritems(): | |
126 cases = dict() | |
127 for case, next_states in transition.iteritems(): | |
128 cases[self.char_const(case)] = state_ref[next_states[0]] | |
129 state_fun = state_ref[state] | |
130 emit = Builder.new(state_fun.append_basic_block("entry")) | |
131 ptr = emit.gep(self.matchp_str, (self.const_zero, state_fun.args[0])) | |
132 next_index = emit.add(state_fun.args[0], self.const_one) | |
133 char = emit.load(ptr) | |
134 | |
135 if (self.debug): self.emit_call_printf(emit, self.debug_str, self.gep_first(emit, self.new_str_const(fun.name)), char, char) | |
136 | |
137 label = 0 | |
138 default_bb = state_fun.append_basic_block("default") #create default bb | |
139 builder = Builder.new(default_bb) # default is reject. | |
140 ret = builder.call(reject_state, (next_index,)) | |
141 builder.ret(ret) | |
142 | |
143 si = emit.switch(char, default_bb, len(cases)) # create switch instruction with deafult case. | |
144 for case, nextFun in cases.iteritems(): | |
145 bb = state_fun.append_basic_block("case%d" % label) #create default bb | |
146 builder = Builder.new(bb) | |
147 ret = builder.call(nextFun, (next_index,)) | |
148 builder.ret(ret) | |
149 si.add_case(case, bb) | |
150 label += 1 | |
151 emit = Builder.new(main_entry) | |
152 ret = emit.call(state_ref[self.cg.start], (main.args[0],)) | |
153 emit.ret(ret) | |
154 | |
155 self.mp = ModuleProvider.new(self.mod) | |
156 if (self.optimize): self.do_optimize() | |
157 self.ee = ExecutionEngine.new(self.mp) | |
158 self.main = main | |
159 self.emit(str(self.mod)) | |
160 | |
161 def get_execution_engine(self): | |
162 return self.ee | |
163 | |
164 def do_optimize(self): | |
165 #optimization passes | |
166 pm = PassManager.new() | |
167 pm.add(TargetData.new('')) | |
168 pm.add(PASS_FUNCTION_INLINING) | |
169 pm.run(self.mod) | |
170 fp = FunctionPassManager.new(self.mp) | |
171 fp.add(TargetData.new('')) | |
172 fp.add(PASS_BLOCK_PLACEMENT) | |
173 fp.add(PASS_INSTRUCTION_COMBINING) | |
174 fp.add(PASS_TAIL_CALL_ELIMINATION) | |
175 fp.add(PASS_AGGRESSIVE_DCE) | |
176 fp.add(PASS_DEAD_INST_ELIMINATION) | |
177 fp.add(PASS_DEAD_CODE_ELIMINATION) | |
178 for fun in self.mod.functions: | |
179 fp.run(fun) | |
180 | |
181 def print_module(self): | |
182 print self.mod | |
183 | |
184 def execute(self): | |
185 return self.ee.run_function(self.main, | |
186 (GenericValue.int(self.int_t, 0),)) | |
187 | |
188 def new_str_const(self, val): | |
189 '''create string(array of int) as a global value ''' | |
190 str = self.mod.add_global_variable(Type.array(self.char_t, len(val) + 1), "") | |
191 str.initializer = Constant.stringz(val) | |
192 return str | |
193 | |
194 def gep_first(self, emit, val): | |
195 '''get pointer of array''' | |
196 return emit.gep(val, (self.const_zero, self.const_zero)) | |
197 | |
198 def char_const(self, val): | |
199 '''create constant int value''' | |
200 if isinstance(val, str): | |
201 if val == '\\0': | |
202 return Constant.int(self.char_t, 0) | |
203 else: | |
204 return Constant.int(self.char_t, ord(val)) | |
205 else: | |
206 exit('char_const: invalid argument.', val) | |
207 | |
208 def emit_call_printf(self, emit, string, *args): | |
209 '''emit libc printf function call instruction''' | |
210 try: | |
211 printf = self.mod.get_function_named("printf") | |
212 except llvm.LLVMException: | |
213 printf = self.mod.add_function( | |
214 Type.function(Type.void(), | |
215 (Type.pointer(self.char_t, 0),), 1), "printf") | |
216 if isinstance(string, str): | |
217 string = self.new_str_const(string) | |
218 emit.call(printf, | |
219 [self.gep_first(emit, string)]+list(args)) | |
220 | |
221 def test(): | |
222 import doctest | |
223 doctest.testmod() | |
224 | |
225 if __name__ == "__main__": test() |