Mercurial > hg > Members > shinya > pyrect
changeset 0:02656ea776f3
Regexp-Compiler with LLVM
author | ryoma <e075725@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 15 Jun 2010 00:54:59 +0900 |
parents | |
children | d5e81ef9afba |
files | __init__.py benchReg.py code/as/reg.gcc.s code/as/reg.ll.s code/as/regcbc.s code/c/reg.c code/c/regcbc.c code/ll/emit.ll code/ll/reg.ll code/ll/regllvm.label.ll code/ll/regllvm.label_optimized.ll dfareg.py ll reg2llvm.py test/test_DFARuntime.py test/test_NFA2DFA.py |
diffstat | 16 files changed, 2258 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/__init__.py Tue Jun 15 00:54:59 2010 +0900 @@ -0,0 +1,2 @@ +def compile(regexp): + return Regexp(regexp)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/benchReg.py Tue Jun 15 00:54:59 2010 +0900 @@ -0,0 +1,35 @@ +#!/usr/bin/env python + +import sys,re +from optparse import OptionParser +from timeit import Timer +from dfareg import Regexp +from reg2llvm import RegLLVM + +myusage = "%prog [-O] [-p] [-s string] [-r regexp] [-E]" +psr = OptionParser(usage=myusage) +psr.add_option("-O", action="store_true", dest="optimizeFlag", default=False, help="optimimzation") +psr.add_option("-L", action="store_true", dest="implLabel", default=False, help="impliment with label") +psr.add_option("-E", action="store_true", dest="executeFlag", default=False, help="execute Module") +psr.add_option("-p", action="store_true", dest="printFlag", default=False, help="print module") +psr.add_option("-d", action="store_true", dest="dumpFlag", default=False, help="add debug stmt to module") +psr.add_option("-n", action="store", type="int", dest="number", default=1000, help="number of eval", metavar="INT") +psr.add_option("-r", action="store", type="string", dest="regexp", default="(A|B)*C", help="regexp", metavar="FILE") +psr.add_option("-s", action="store", type="string", dest="string", default="A"+"B"*500+"C", help="string", metavar="FILE") +(opts, args) = psr.parse_args(sys.argv) + +print "regexp = \"%s\", string = \"%s\", loop = %d" % (opts.regexp, opts.string, opts.number) + +regLLVM = RegLLVM("regllvm", opts.regexp, opts.string, opts.implLabel, opts.optimizeFlag, opts.dumpFlag) + +if (opts.printFlag): regLLVM.printModule() + +if (opts.executeFlag): + print "LLVM :", Timer(setup='from __main__ import regLLVM', stmt='regLLVM.execute()').timeit(opts.number) + + reg = re.compile("^%s$" % opts.regexp) + print "re :", Timer(setup='from __main__ import reg', stmt='reg.search("%s")' % opts.string).timeit(opts.number) + + reg = Regexp(opts.regexp) + print "dfareg:", Timer(setup='from __main__ import reg', stmt='reg.matches("%s")' % opts.string).timeit(opts.number) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/as/reg.gcc.s Tue Jun 15 00:54:59 2010 +0900 @@ -0,0 +1,326 @@ + .cstring +LC0: + .ascii "\12string matches regexp. \12\0" + .align 2 +LC1: + .ascii "\12string does not match regexp. \12\0" + .text + .align 4,0x90 +.globl _state_8 +_state_8: + pushl %ebp + movl %esp, %ebp + pushl %ebx + subl $20, %esp + call ___i686.get_pc_thunk.bx +"L00000000001$pb": + cmpb $0, (%ecx) + jne L5 + leal LC0-"L00000000001$pb"(%ebx), %eax + movl %eax, (%esp) + call L_puts$stub + addl $20, %esp + popl %ebx + popl %ebp + ret + .align 4,0x90 +L5: + leal LC1-"L00000000001$pb"(%ebx), %eax + movl %eax, (%esp) + call L_puts$stub + addl $20, %esp + popl %ebx + popl %ebp + ret + .align 4,0x90 +.globl _state_1_2_3_5_7 +_state_1_2_3_5_7: + pushl %ebp + movl %esp, %ebp + pushl %ebx + call ___i686.get_pc_thunk.bx +"L00000000002$pb": + subl $20, %esp +L22: + movzbl (%ecx), %eax + addl $1, %ecx + cmpb $66, %al + je L22 + cmpb $67, %al + je L18 + cmpb $65, %al + je L22 +L26: + leal LC1-"L00000000002$pb"(%ebx), %eax + movl %eax, (%esp) + call L_puts$stub + addl $20, %esp + popl %ebx + popl %ebp + ret + .align 4,0x90 +L18: + cmpb $0, (%ecx) + jne L26 + leal LC0-"L00000000002$pb"(%ebx), %eax + movl %eax, (%esp) + call L_puts$stub + addl $20, %esp + popl %ebx + popl %ebp + ret + .align 4,0x90 +.globl _state_1_3_4_5_7 +_state_1_3_4_5_7: + pushl %ebp + movl %esp, %ebp + pushl %ebx + call ___i686.get_pc_thunk.bx +"L00000000003$pb": + subl $20, %esp +L38: + movzbl (%ecx), %eax + addl $1, %ecx + cmpb $66, %al + je L38 + cmpb $67, %al + je L41 + cmpb $65, %al + je L48 +L46: + leal LC1-"L00000000003$pb"(%ebx), %eax + movl %eax, (%esp) + call L_puts$stub + addl $20, %esp + popl %ebx + popl %ebp + ret + .align 4,0x90 +L48: + addl $20, %esp + popl %ebx + popl %ebp + jmp _state_1_2_3_5_7 + .align 4,0x90 +L41: + cmpb $0, (%ecx) + jne L46 + leal LC0-"L00000000003$pb"(%ebx), %eax + movl %eax, (%esp) + call L_puts$stub + addl $20, %esp + popl %ebx + popl %ebp + ret + .align 4,0x90 +.globl _state_1_3_5_6_7 +_state_1_3_5_6_7: + pushl %ebp + movl %esp, %ebp + pushl %ebx + subl $20, %esp + movzbl (%ecx), %eax + call ___i686.get_pc_thunk.bx +"L00000000004$pb": + leal 1(%ecx), %edx + cmpb $66, %al + je L63 + cmpb $67, %al + je L53 +L72: + cmpb $65, %al + je L73 +L65: + leal LC1-"L00000000004$pb"(%ebx), %eax + movl %eax, (%esp) + call L_puts$stub + addl $20, %esp + popl %ebx + popl %ebp + ret + .align 4,0x90 +L73: + addl $20, %esp + movl %edx, %ecx + popl %ebx + popl %ebp + jmp _state_1_2_3_5_7 + .align 4,0x90 +L63: + movzbl (%edx), %eax + addl $1, %edx + cmpb $66, %al + je L63 + cmpb $67, %al + jne L72 + cmpb $0, (%edx) + jne L65 + leal LC0-"L00000000004$pb"(%ebx), %eax + movl %eax, (%esp) + call L_puts$stub + .align 4,0x90 +L74: + addl $20, %esp + popl %ebx + popl %ebp + ret + .align 4,0x90 +L53: + cmpb $0, 1(%ecx) + jne L65 + leal LC0-"L00000000004$pb"(%ebx), %eax + movl %eax, (%esp) + call L_puts$stub + jmp L74 + .cstring +LC2: + .ascii "regexp: (A|B)*C\0" +LC3: + .ascii "number of state: 4\0" +LC4: + .ascii "string: %s\12\0" + .text + .align 4,0x90 +.globl _main +_main: + pushl %ebp + movl %esp, %ebp + pushl %esi + pushl %ebx + call ___i686.get_pc_thunk.bx +"L00000000005$pb": + subl $16, %esp + movl 12(%ebp), %esi + leal LC2-"L00000000005$pb"(%ebx), %eax + movl %eax, (%esp) + call L_puts$stub + leal LC3-"L00000000005$pb"(%ebx), %eax + movl %eax, (%esp) + call L_puts$stub + movl 4(%esi), %eax + movl %eax, 4(%esp) + leal LC4-"L00000000005$pb"(%ebx), %eax + movl %eax, (%esp) + call L_printf$stub + movl 4(%esi), %eax + movzbl (%eax), %edx + leal 1(%eax), %ecx + cmpb $66, %dl + je L89 + cmpb $67, %dl + je L79 + cmpb $65, %dl + je L97 +L91: + leal LC1-"L00000000005$pb"(%ebx), %eax + movl %eax, (%esp) + call L_puts$stub + addl $16, %esp + popl %ebx + popl %esi + popl %ebp + ret + .align 4,0x90 +L89: + movzbl (%ecx), %eax + addl $1, %ecx + cmpb $66, %al + je L89 + cmpb $67, %al + je L86 + cmpb $65, %al + jne L91 +L97: + addl $16, %esp + popl %ebx + popl %esi + popl %ebp + jmp _state_1_2_3_5_7 + .align 4,0x90 +L79: + cmpb $0, 1(%eax) + jne L91 +L96: + leal LC0-"L00000000005$pb"(%ebx), %eax + movl %eax, (%esp) + call L_puts$stub + addl $16, %esp + popl %ebx + popl %esi + popl %ebp + ret + .align 4,0x90 +L86: + cmpb $0, (%ecx) + jne L91 + .p2align 4,,3 + jmp L96 + .align 4,0x90 +.globl _accept +_accept: + pushl %ebp + movl %esp, %ebp + pushl %ebx + call ___i686.get_pc_thunk.bx +"L00000000006$pb": + subl $20, %esp + leal LC0-"L00000000006$pb"(%ebx), %eax + movl %eax, (%esp) + call L_puts$stub + addl $20, %esp + popl %ebx + popl %ebp + ret + .align 4,0x90 +.globl _reject +_reject: + pushl %ebp + movl %esp, %ebp + pushl %ebx + call ___i686.get_pc_thunk.bx +"L00000000007$pb": + subl $20, %esp + leal LC1-"L00000000007$pb"(%ebx), %eax + movl %eax, (%esp) + call L_puts$stub + addl $20, %esp + popl %ebx + popl %ebp + ret + .picsymbol_stub +L_printf$stub: + .indirect_symbol _printf + call LPC$1 +LPC$1: popl %eax + movl L1$lz-LPC$1(%eax),%edx + jmp *%edx +L_printf$stub_binder: + lea L1$lz-LPC$1(%eax),%eax + pushl %eax + jmp dyld_stub_binding_helper + .lazy_symbol_pointer +L1$lz: + .indirect_symbol _printf + .long L_printf$stub_binder + .picsymbol_stub +L_puts$stub: + .indirect_symbol _puts + call LPC$2 +LPC$2: popl %eax + movl L2$lz-LPC$2(%eax),%edx + jmp *%edx +L_puts$stub_binder: + lea L2$lz-LPC$2(%eax),%eax + pushl %eax + jmp dyld_stub_binding_helper + .lazy_symbol_pointer +L2$lz: + .indirect_symbol _puts + .long L_puts$stub_binder + .subsections_via_symbols + .section __TEXT,__textcoal_nt,coalesced,pure_instructions + .weak_definition ___i686.get_pc_thunk.bx + .private_extern ___i686.get_pc_thunk.bx +___i686.get_pc_thunk.bx: + movl (%esp), %ebx + ret
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/as/reg.ll.s Tue Jun 15 00:54:59 2010 +0900 @@ -0,0 +1,98 @@ + .text + .align 4,0x90 + .globl _main +_main: +Leh_func_begin1: +Llabel1: + subl $12, %esp + movl $4, (%esp) + call _malloc + movl 16(%esp), %ecx + movl %ecx, (%eax) + .align 4,0x90 +LBB1_1: ## state_1_3_5_6_7 + movl (%eax), %ecx + leal 1(%ecx), %edx + movl %edx, (%eax) + movb __unnamed_1_0(%ecx), %cl + cmpb $65, %cl + je LBB1_1 ## state_1_3_5_6_7 +LBB1_2: ## state_1_3_5_6_7 + cmpb $66, %cl + je LBB1_1 ## state_1_3_5_6_7 +LBB1_3: ## state_1_3_5_6_7 + cmpb $67, %cl + jne LBB1_6 ## reject +LBB1_4: ## state_8 + movl (%eax), %ecx + leal 1(%ecx), %edx + movl %edx, (%eax) + movb __unnamed_1_0(%ecx), %cl + testb %cl, %cl + jne LBB1_6 ## reject +LBB1_5: ## accpet + movl %eax, (%esp) + call _free + movl $1, %eax + addl $12, %esp + ret +LBB1_6: ## reject + movl %eax, (%esp) + call _free + xorl %eax, %eax + addl $12, %esp + ret +Leh_func_end1: + .data + .globl __unnamed_1_0 +__unnamed_1_0: ## + .asciz "ABBBBBBC" + .globl __unnamed_2_1 + .align 4 +__unnamed_2_1: ## + .asciz "state: %s, arg: %c(int %d)\n" + +.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +EH_frame0: +Lsection_eh_frame: +Leh_frame_common: + .set Lset1eh,Leh_frame_common_end-Leh_frame_common_begin + .long Lset1eh +Leh_frame_common_begin: + .long 0x0 + .byte 0x1 + .asciz "zR" + .byte 0x1 + .byte 0x7C + .byte 0x8 + .byte 0x1 + .byte 0x1B + .byte 0xC + .byte 0x5 + .byte 0x4 + .byte 0x88 + .byte 0x1 + .align 2 +Leh_frame_common_end: + + .globl _main.eh +_main.eh: + .set Lset2eh,Leh_frame_end1-Leh_frame_begin1 + .long Lset2eh +Leh_frame_begin1: + .long Leh_frame_begin1-Leh_frame_common + .long Leh_func_begin1-. + .set Lset3eh,Leh_func_end1-Leh_func_begin1 + .long Lset3eh + .byte 0x0 + .byte 0xE + .byte 0x10 + .byte 0x4 + .set Lset4eh,Llabel1-Leh_func_begin1 + .long Lset4eh + .byte 0xD + .byte 0x5 + .align 2 +Leh_frame_end1: + .subsections_via_symbols +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/as/regcbc.s Tue Jun 15 00:54:59 2010 +0900 @@ -0,0 +1,317 @@ + .cstring + .align 2 +LC0: + .ascii "\12string does not match regexp. \12\0" + .text + .align 4,0x90 +.globl _reject +_reject: + call L3 +"L00000000001$pb": +L3: + popl %ecx + pushl %ebp + movl %esp, %ebp + leal LC0-"L00000000001$pb"(%ecx), %eax + movl %eax, 8(%ebp) + leave + jmp L_puts$stub + .cstring +LC1: + .ascii "\12string matches regexp. \12\0" + .text + .align 4,0x90 +.globl _accept +_accept: + call L6 +"L00000000002$pb": +L6: + popl %ecx + pushl %ebp + movl %esp, %ebp + leal LC1-"L00000000002$pb"(%ecx), %eax + movl %eax, 8(%ebp) + leave + jmp L_puts$stub + .align 4,0x90 +.globl _state_14 +_state_14: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax + call L13 +"L00000000003$pb": +L13: + popl %ecx + cmpb $0, (%eax) + jne L8 + leal LC1-"L00000000003$pb"(%ecx), %eax + movl %eax, 8(%ebp) + leave + jmp L_puts$stub + .align 4,0x90 +L8: + leal LC0-"L00000000003$pb"(%ecx), %eax + movl %eax, 8(%ebp) + leave + jmp L_puts$stub + .align 4,0x90 +.globl _state_12 +_state_12: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax + call L19 +"L00000000004$pb": +L19: + popl %ecx + cmpb $0, (%eax) + jne L15 + leal LC1-"L00000000004$pb"(%ecx), %eax + movl %eax, 8(%ebp) + leave + jmp L_puts$stub + .align 4,0x90 +L15: + leal LC0-"L00000000004$pb"(%ecx), %eax + movl %eax, 8(%ebp) + leave + jmp L_puts$stub + .align 4,0x90 +.globl _state_9_11_13_15 +_state_9_11_13_15: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %edx + call L31 +"L00000000005$pb": +L31: + popl %ecx + movzbl (%edx), %eax + cmpb $79, %al + je L22 + cmpb $111, %al + je L22 +L21: + leal LC0-"L00000000005$pb"(%ecx), %eax + movl %eax, 8(%ebp) + leave + jmp L_puts$stub + .align 4,0x90 +L22: + cmpb $0, 1(%edx) + jne L21 + leal LC1-"L00000000005$pb"(%ecx), %eax + movl %eax, 8(%ebp) + leave + jmp L_puts$stub + .align 4,0x90 +.globl _state_7_11_13_15 +_state_7_11_13_15: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %edx + call L43 +"L00000000006$pb": +L43: + popl %ecx + movzbl (%edx), %eax + cmpb $79, %al + je L34 + cmpb $111, %al + je L34 +L33: + leal LC0-"L00000000006$pb"(%ecx), %eax + movl %eax, 8(%ebp) + leave + jmp L_puts$stub + .align 4,0x90 +L34: + cmpb $0, 1(%edx) + jne L33 + leal LC1-"L00000000006$pb"(%ecx), %eax + movl %eax, 8(%ebp) + leave + jmp L_puts$stub + .align 4,0x90 +.globl _state_4_6_8_10 +_state_4_6_8_10: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax + call L52 +"L00000000007$pb": +L52: + popl %ecx + leal 1(%eax), %edx + movzbl (%eax), %eax + cmpb $79, %al + je L46 + cmpb $111, %al + je L51 + leal LC0-"L00000000007$pb"(%ecx), %eax + movl %eax, 8(%ebp) + leave + jmp L_puts$stub + .align 4,0x90 +L46: + movl %edx, 8(%ebp) + leave + jmp _state_9_11_13_15 + .align 4,0x90 +L51: + movl %edx, 8(%ebp) + leave + jmp _state_7_11_13_15 + .align 4,0x90 +.globl _state_2_6_8_10 +_state_2_6_8_10: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax + call L60 +"L00000000008$pb": +L60: + popl %ecx + leal 1(%eax), %edx + movzbl (%eax), %eax + cmpb $79, %al + je L55 + cmpb $111, %al + je L59 + leal LC0-"L00000000008$pb"(%ecx), %eax + movl %eax, 8(%ebp) + leave + jmp L_puts$stub + .align 4,0x90 +L55: + movl %edx, 8(%ebp) + leave + jmp _state_9_11_13_15 + .align 4,0x90 +L59: + movl %edx, 8(%ebp) + leave + jmp _state_7_11_13_15 + .cstring +LC2: + .ascii "regexp: (f|F)(o|O)(o|O)\0" +LC3: + .ascii "number of state: 7\0" +LC4: + .ascii "string: %s\12\0" + .text + .align 4,0x90 +.globl _main +_main: + pushl %ebp + movl %esp, %ebp + pushl %esi + pushl %ebx + call L75 +"L00000000009$pb": +L75: + popl %ebx + subl $16, %esp + movl 12(%ebp), %esi + leal LC2-"L00000000009$pb"(%ebx), %eax + movl %eax, (%esp) + call L_puts$stub + leal LC3-"L00000000009$pb"(%ebx), %eax + movl %eax, (%esp) + call L_puts$stub + movl 4(%esi), %eax + movl %eax, 4(%esp) + leal LC4-"L00000000009$pb"(%ebx), %eax + movl %eax, (%esp) + call L_printf$stub + movl 4(%esi), %edx + movzbl (%edx), %eax + cmpb $70, %al + je L64 + cmpb $102, %al + je L64 +L69: + leal LC0-"L00000000009$pb"(%ebx), %eax + movl %eax, (%esp) + call L_puts$stub + addl $16, %esp + xorl %eax, %eax + popl %ebx + popl %esi + leave + ret + .align 4,0x90 +L64: + movzbl 1(%edx), %eax + leal 2(%edx), %ecx + cmpb $79, %al + je L70 + cmpb $111, %al + jne L69 + movl %ecx, (%esp) + call _state_7_11_13_15 + addl $16, %esp + xorl %eax, %eax + popl %ebx + popl %esi + leave + ret + .align 4,0x90 +L70: + movl %ecx, (%esp) + call _state_9_11_13_15 + addl $16, %esp + xorl %eax, %eax + popl %ebx + popl %esi + leave + ret + .align 4,0x90 +.globl _state_1_3_5 +_state_1_3_5: + pushl %ebp + movl %esp, %ebp + pushl %ebx + movl 8(%ebp), %edx + call L91 +"L00000000010$pb": +L91: + popl %ebx + movzbl (%edx), %eax + cmpb $70, %al + je L79 + cmpb $102, %al + je L79 +L88: + leal LC0-"L00000000010$pb"(%ebx), %eax + movl %eax, 8(%ebp) + popl %ebx + leave + jmp L_puts$stub + .align 4,0x90 +L79: + movzbl 1(%edx), %eax + leal 2(%edx), %ecx + cmpb $79, %al + je L84 + cmpb $111, %al + jne L88 + movl %ecx, 8(%ebp) + popl %ebx + leave + jmp _state_7_11_13_15 + .align 4,0x90 +L84: + movl %ecx, 8(%ebp) + popl %ebx + leave + jmp _state_9_11_13_15 + .section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5 +L_printf$stub: + .indirect_symbol _printf + hlt ; hlt ; hlt ; hlt ; hlt +L_puts$stub: + .indirect_symbol _puts + hlt ; hlt ; hlt ; hlt ; hlt + .subsections_via_symbols
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/c/reg.c Tue Jun 15 00:54:59 2010 +0900 @@ -0,0 +1,81 @@ +#include <stdio.h> + +__code state_8(char* s); +__code state_1_3_5_6_7(char* s); +__code state_1_3_4_5_7(char* s); +__code state_1_2_3_5_7(char* s); +__code accept(char* s); +__code reject(char* s); + +int main(int argc, char* argv[]) { + puts("regexp: (A|B)*C"); + puts("number of state: 4"); + printf("string: %s\n", argv[1]); + goto state_1_3_5_6_7(argv[1]); + return 0; +} + +__code state_8(char* s) { + switch(*s++) { + case '\0': + goto accept(s); + break; + default: goto reject(s); + } +} + +__code state_1_3_5_6_7(char* s) { + switch(*s++) { + case 'A': + goto state_1_2_3_5_7(s); + break; + case 'C': + goto state_8(s); + break; + case 'B': + goto state_1_3_4_5_7(s); + break; + default: goto reject(s); + } +} + +__code state_1_3_4_5_7(char* s) { + switch(*s++) { + case 'A': + goto state_1_2_3_5_7(s); + break; + case 'C': + goto state_8(s); + break; + case 'B': + goto state_1_3_4_5_7(s); + break; + default: goto reject(s); + } +} + +__code state_1_2_3_5_7(char* s) { + switch(*s++) { + case 'A': + goto state_1_2_3_5_7(s); + break; + case 'C': + goto state_8(s); + break; + case 'B': + goto state_1_3_4_5_7(s); + break; + default: goto reject(s); + } +} + + +__code accept(char* s) { + printf("\nstring matches regexp. \n\n"); +} + + +__code reject(char* s) { + printf("\nstring does not match regexp. \n\n"); +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/c/regcbc.c Tue Jun 15 00:54:59 2010 +0900 @@ -0,0 +1,108 @@ +#include <stdio.h> + +__code state_14(char* s); +__code state_12(char* s); +__code state_9_11_13_15(char* s); +__code state_7_11_13_15(char* s); +__code state_2_6_8_10(char* s); +__code state_4_6_8_10(char* s); +__code state_1_3_5(char* s); +__code accept(char* s); +__code reject(char* s); + +int main(int argc, char* argv[]) { + puts("regexp: (f|F)(o|O)(o|O)"); + puts("number of state: 7"); + printf("string: %s\n", argv[1]); + goto state_1_3_5(argv[1]); + return 0; +} + +__code state_14(char* s) { + switch(*s++) { + case '\0': + goto accept(s); + break; + default: goto reject(s); + } +} + +__code state_12(char* s) { + switch(*s++) { + case '\0': + goto accept(s); + break; + default: goto reject(s); + } +} + +__code state_9_11_13_15(char* s) { + switch(*s++) { + case 'o': + goto state_12(s); + break; + case 'O': + goto state_14(s); + break; + default: goto reject(s); + } +} + +__code state_7_11_13_15(char* s) { + switch(*s++) { + case 'o': + goto state_12(s); + break; + case 'O': + goto state_14(s); + break; + default: goto reject(s); + } +} + +__code state_2_6_8_10(char* s) { + switch(*s++) { + case 'o': + goto state_7_11_13_15(s); + break; + case 'O': + goto state_9_11_13_15(s); + break; + default: goto reject(s); + } +} + +__code state_4_6_8_10(char* s) { + switch(*s++) { + case 'O': + goto state_9_11_13_15(s); + break; + case 'o': + goto state_7_11_13_15(s); + break; + default: goto reject(s); + } +} + +__code state_1_3_5(char* s) { + switch(*s++) { + case 'F': + goto state_4_6_8_10(s); + break; + case 'f': + goto state_2_6_8_10(s); + break; + default: goto reject(s); + } +} + + +__code accept(char* s) { + printf("\nstring matches regexp. \n\n"); +} + + +__code reject(char* s) { + printf("\nstring does not match regexp. \n\n"); +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/ll/emit.ll Tue Jun 15 00:54:59 2010 +0900 @@ -0,0 +1,38 @@ + +define fastcc void @driver(i32) { +entry: + %1 = load i8* getelementptr ([4 x i8]* @0, i32 0, i32 0) ; <i8> [#uses=1] + switch i8 %1, label %default [ + i8 65, label %case0 + i8 66, label %case1 + ] + +return: ; preds = %case1, %case0, %default + ret void + +default: ; preds = %entry + call void @reject(i32 0) + br label %return + +case0: ; preds = %entry + call void @accept(i32 1) + br label %return + +case1: ; preds = %entry + call void @accept(i32 1) + br label %return +} + +define fastcc void @accept(i32) { +entry: + call void (i8*, ...)* @printf(i8* getelementptr ([24 x i8]* @1, i32 0, i32 0), i8* getelementptr ([5 x i8]* @2, i32 0, i32 0)) + ret void +} + +define fastcc void @reject(i32) { +entry: + %1 = getelementptr [4 x i8]* @0, i32 0, i32 %0 ; <i8*> [#uses=1] + call void (i8*, ...)* @printf(i8* getelementptr ([30 x i8]* @3, i32 0, i32 0), i8* %1, i8* getelementptr ([5 x i8]* @4, i32 0, i32 0)) + ret void +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/ll/reg.ll Tue Jun 15 00:54:59 2010 +0900 @@ -0,0 +1,99 @@ +; ModuleID = 'regllvm' +global [9 x i8] c"ABBBBBBC\00" ; <[9 x i8]*>:0 [#uses=4] +global [28 x i8] c"state: %s, arg: %c(int %d)\0A\00" ; <[28 x i8]*>:1 [#uses=0] + +define i32 @main(i32 %index) { +entry: + %0 = malloc i32 ; <i32*> [#uses=11] + store i32 %index, i32* %0, align 8 + br label %state_1_3_5_6_7 + +state_1_3_5_6_7: ; preds = %entry + %1 = load i32* %0, align 8 ; <i32> [#uses=2] + %2 = sext i32 %1 to i64 ; <i64> [#uses=1] + %3 = getelementptr [9 x i8]* @0, i64 0, i64 %2 ; <i8*> [#uses=1] + %4 = add i32 %1, 1 ; <i32> [#uses=1] + store i32 %4, i32* %0, align 8 + %5 = load i8* %3 ; <i8> [#uses=1] + switch i8 %5, label %reject [ + i8 65, label %state_1_3_5_6_7_case0 + i8 67, label %state_1_3_5_6_7_case1 + i8 66, label %state_1_3_5_6_7_case2 + ] + +reject: ; preds = %state_1_2_3_5_7, %state_1_3_4_5_7, %state_1_3_5_6_7, %state_8 + free i32* %0 + ret i32 0 + +state_1_3_5_6_7_case0: ; preds = %state_1_3_5_6_7 + br label %state_1_2_3_5_7 + +state_1_2_3_5_7: ; preds = %state_1_2_3_5_7_case0, %state_1_3_4_5_7_case0, %state_1_3_5_6_7_case0 + %6 = load i32* %0, align 8 ; <i32> [#uses=2] + %7 = sext i32 %6 to i64 ; <i64> [#uses=1] + %8 = getelementptr [9 x i8]* @0, i64 0, i64 %7 ; <i8*> [#uses=1] + %9 = add i32 %6, 1 ; <i32> [#uses=1] + store i32 %9, i32* %0, align 8 + %10 = load i8* %8 ; <i8> [#uses=1] + switch i8 %10, label %reject [ + i8 65, label %state_1_2_3_5_7_case0 + i8 67, label %state_1_2_3_5_7_case1 + i8 66, label %state_1_2_3_5_7_case2 + ] + +state_1_2_3_5_7_case0: ; preds = %state_1_2_3_5_7 + br label %state_1_2_3_5_7 + +state_1_2_3_5_7_case1: ; preds = %state_1_2_3_5_7 + br label %state_8 + +state_8: ; preds = %state_1_2_3_5_7_case1, %state_1_3_4_5_7_case1, %state_1_3_5_6_7_case1 + %11 = load i32* %0, align 8 ; <i32> [#uses=2] + %12 = sext i32 %11 to i64 ; <i64> [#uses=1] + %13 = getelementptr [9 x i8]* @0, i64 0, i64 %12 ; <i8*> [#uses=1] + %14 = add i32 %11, 1 ; <i32> [#uses=1] + store i32 %14, i32* %0, align 8 + %15 = load i8* %13 ; <i8> [#uses=1] + switch i8 %15, label %reject [ + i8 0, label %state_8_case0 + ] + +state_8_case0: ; preds = %state_8 + br label %accpet + +accpet: ; preds = %state_8_case0 + free i32* %0 + ret i32 1 + +state_1_2_3_5_7_case2: ; preds = %state_1_2_3_5_7 + br label %state_1_3_4_5_7 + +state_1_3_4_5_7: ; preds = %state_1_2_3_5_7_case2, %state_1_3_4_5_7_case2, %state_1_3_5_6_7_case2 + %16 = load i32* %0, align 8 ; <i32> [#uses=2] + %17 = sext i32 %16 to i64 ; <i64> [#uses=1] + %18 = getelementptr [9 x i8]* @0, i64 0, i64 %17 ; <i8*> [#uses=1] + %19 = add i32 %16, 1 ; <i32> [#uses=1] + store i32 %19, i32* %0, align 8 + %20 = load i8* %18 ; <i8> [#uses=1] + switch i8 %20, label %reject [ + i8 65, label %state_1_3_4_5_7_case0 + i8 67, label %state_1_3_4_5_7_case1 + i8 66, label %state_1_3_4_5_7_case2 + ] + +state_1_3_4_5_7_case0: ; preds = %state_1_3_4_5_7 + br label %state_1_2_3_5_7 + +state_1_3_4_5_7_case1: ; preds = %state_1_3_4_5_7 + br label %state_8 + +state_1_3_4_5_7_case2: ; preds = %state_1_3_4_5_7 + br label %state_1_3_4_5_7 + +state_1_3_5_6_7_case1: ; preds = %state_1_3_5_6_7 + br label %state_8 + +state_1_3_5_6_7_case2: ; preds = %state_1_3_5_6_7 + br label %state_1_3_4_5_7 +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/ll/regllvm.label.ll Tue Jun 15 00:54:59 2010 +0900 @@ -0,0 +1,196 @@ +; ModuleID = 'regllvm' +global [6 x i8] c"ABBBC\00" ; <[6 x i8]*>:0 [#uses=8] +global [28 x i8] c"state: %s, arg: %c(int %d)\0A\00" ; <[28 x i8]*>:1 [#uses=1] +global [22 x i8] c"%s does match regexp\0A\00" ; <[22 x i8]*>:2 [#uses=1] +global [26 x i8] c"%s does not match regexp\0A\00" ; <[26 x i8]*>:3 [#uses=1] +global [16 x i8] c"state_1_2_3_5_7\00" ; <[16 x i8]*>:4 [#uses=1] +global [16 x i8] c"state_1_2_3_5_7\00" ; <[16 x i8]*>:5 [#uses=1] +global [16 x i8] c"state_1_2_3_5_7\00" ; <[16 x i8]*>:6 [#uses=1] +global [16 x i8] c"state_1_2_3_5_7\00" ; <[16 x i8]*>:7 [#uses=1] + +define i32 @main(i32) { +entry: + %1 = tail call i32 @state_1_3_5_6_7(i32 %0) ; <i32> [#uses=1] + ret i32 %1 +} + +define i32 @accept(i32) { +entry: + tail call void (i8*, ...)* @printf(i8* getelementptr ([22 x i8]* @2, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0)) + ret i32 1 +} + +define i32 @reject(i32) { +entry: + tail call void (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @3, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0)) + ret i32 0 +} + +define i32 @state_8(i32 %index) { +entry: + %index1 = sext i32 %index to i64 ; <i64> [#uses=1] + %0 = getelementptr [6 x i8]* @0, i64 0, i64 %index1 ; <i8*> [#uses=1] + %1 = load i8* %0 ; <i8> [#uses=3] + tail call void (i8*, ...)* @printf(i8* getelementptr ([28 x i8]* @1, i32 0, i32 0), i8* getelementptr ([16 x i8]* @4, i32 0, i32 0), i8 %1, i8 %1) + switch i8 %1, label %default [ + i8 0, label %case0 + ] + +default: ; preds = %entry + tail call void (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @3, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0)) + ret i32 0 + +case0: ; preds = %entry + tail call void (i8*, ...)* @printf(i8* getelementptr ([22 x i8]* @2, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0)) + ret i32 1 +} + +define i32 @state_1_3_5_6_7(i32 %index) { +entry: + %index1 = sext i32 %index to i64 ; <i64> [#uses=1] + %0 = getelementptr [6 x i8]* @0, i64 0, i64 %index1 ; <i8*> [#uses=1] + %1 = add i32 %index, 1 ; <i32> [#uses=3] + %2 = load i8* %0 ; <i8> [#uses=3] + tail call void (i8*, ...)* @printf(i8* getelementptr ([28 x i8]* @1, i32 0, i32 0), i8* getelementptr ([16 x i8]* @5, i32 0, i32 0), i8 %2, i8 %2) + switch i8 %2, label %default [ + i8 65, label %case0 + i8 67, label %case1 + i8 66, label %case2 + ] + +default: ; preds = %entry + tail call void (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @3, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0)) + ret i32 0 + +case0: ; preds = %entry + %3 = tail call i32 @state_1_2_3_5_7(i32 %1) ; <i32> [#uses=1] + ret i32 %3 + +case1: ; preds = %entry + %4 = sext i32 %1 to i64 ; <i64> [#uses=1] + %5 = getelementptr [6 x i8]* @0, i64 0, i64 %4 ; <i8*> [#uses=1] + %6 = load i8* %5 ; <i8> [#uses=3] + tail call void (i8*, ...)* @printf(i8* getelementptr ([28 x i8]* @1, i32 0, i32 0), i8* getelementptr ([16 x i8]* @4, i32 0, i32 0), i8 %6, i8 %6) + switch i8 %6, label %default.i [ + i8 0, label %case0.i + ] + +default.i: ; preds = %case1 + tail call void (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @3, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0)) + br label %state_8.exit + +state_8.exit: ; preds = %default.i, %case0.i + %7 = phi i32 [ 1, %case0.i ], [ 0, %default.i ] ; <i32> [#uses=1] + ret i32 %7 + +case0.i: ; preds = %case1 + tail call void (i8*, ...)* @printf(i8* getelementptr ([22 x i8]* @2, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0)) + br label %state_8.exit + +case2: ; preds = %entry + %8 = tail call i32 @state_1_3_4_5_7(i32 %1) ; <i32> [#uses=1] + ret i32 %8 +} + +define i32 @state_1_3_4_5_7(i32 %index) { +entry: + br label %tailrecurse + +tailrecurse: ; preds = %case2, %entry + %index.tr = phi i32 [ %index, %entry ], [ %1, %case2 ] ; <i32> [#uses=2] + %index1 = sext i32 %index.tr to i64 ; <i64> [#uses=1] + %0 = getelementptr [6 x i8]* @0, i64 0, i64 %index1 ; <i8*> [#uses=1] + %1 = add i32 %index.tr, 1 ; <i32> [#uses=3] + %2 = load i8* %0 ; <i8> [#uses=3] + tail call void (i8*, ...)* @printf(i8* getelementptr ([28 x i8]* @1, i32 0, i32 0), i8* getelementptr ([16 x i8]* @6, i32 0, i32 0), i8 %2, i8 %2) + switch i8 %2, label %default [ + i8 65, label %case0 + i8 67, label %case1 + i8 66, label %case2 + ] + +default: ; preds = %tailrecurse + tail call void (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @3, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0)) + ret i32 0 + +case0: ; preds = %tailrecurse + %3 = tail call i32 @state_1_2_3_5_7(i32 %1) ; <i32> [#uses=1] + ret i32 %3 + +case1: ; preds = %tailrecurse + %4 = sext i32 %1 to i64 ; <i64> [#uses=1] + %5 = getelementptr [6 x i8]* @0, i64 0, i64 %4 ; <i8*> [#uses=1] + %6 = load i8* %5 ; <i8> [#uses=3] + tail call void (i8*, ...)* @printf(i8* getelementptr ([28 x i8]* @1, i32 0, i32 0), i8* getelementptr ([16 x i8]* @4, i32 0, i32 0), i8 %6, i8 %6) + switch i8 %6, label %default.i [ + i8 0, label %case0.i + ] + +default.i: ; preds = %case1 + tail call void (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @3, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0)) + br label %state_8.exit + +state_8.exit: ; preds = %default.i, %case0.i + %7 = phi i32 [ 1, %case0.i ], [ 0, %default.i ] ; <i32> [#uses=1] + ret i32 %7 + +case0.i: ; preds = %case1 + tail call void (i8*, ...)* @printf(i8* getelementptr ([22 x i8]* @2, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0)) + br label %state_8.exit + +case2: ; preds = %tailrecurse + br label %tailrecurse +} + +define i32 @state_1_2_3_5_7(i32 %index) { +entry: + br label %tailrecurse + +tailrecurse: ; preds = %case0, %entry + %index.tr = phi i32 [ %index, %entry ], [ %1, %case0 ] ; <i32> [#uses=2] + %index1 = sext i32 %index.tr to i64 ; <i64> [#uses=1] + %0 = getelementptr [6 x i8]* @0, i64 0, i64 %index1 ; <i8*> [#uses=1] + %1 = add i32 %index.tr, 1 ; <i32> [#uses=3] + %2 = load i8* %0 ; <i8> [#uses=3] + tail call void (i8*, ...)* @printf(i8* getelementptr ([28 x i8]* @1, i32 0, i32 0), i8* getelementptr ([16 x i8]* @7, i32 0, i32 0), i8 %2, i8 %2) + switch i8 %2, label %default [ + i8 65, label %case0 + i8 67, label %case1 + i8 66, label %case2 + ] + +default: ; preds = %tailrecurse + tail call void (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @3, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0)) + ret i32 0 + +case0: ; preds = %tailrecurse + br label %tailrecurse + +case1: ; preds = %tailrecurse + %3 = sext i32 %1 to i64 ; <i64> [#uses=1] + %4 = getelementptr [6 x i8]* @0, i64 0, i64 %3 ; <i8*> [#uses=1] + %5 = load i8* %4 ; <i8> [#uses=3] + tail call void (i8*, ...)* @printf(i8* getelementptr ([28 x i8]* @1, i32 0, i32 0), i8* getelementptr ([16 x i8]* @4, i32 0, i32 0), i8 %5, i8 %5) + switch i8 %5, label %default.i [ + i8 0, label %case0.i + ] + +default.i: ; preds = %case1 + tail call void (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @3, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0)) + br label %state_8.exit + +state_8.exit: ; preds = %default.i, %case0.i + %6 = phi i32 [ 1, %case0.i ], [ 0, %default.i ] ; <i32> [#uses=1] + ret i32 %6 + +case0.i: ; preds = %case1 + tail call void (i8*, ...)* @printf(i8* getelementptr ([22 x i8]* @2, i32 0, i32 0), i8* getelementptr ([6 x i8]* @0, i32 0, i32 0)) + br label %state_8.exit + +case2: ; preds = %tailrecurse + %7 = tail call i32 @state_1_3_4_5_7(i32 %1) ; <i32> [#uses=1] + ret i32 %7 +} + +declare void @printf(i8*, ...) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/code/ll/regllvm.label_optimized.ll Tue Jun 15 00:54:59 2010 +0900 @@ -0,0 +1,97 @@ +; ModuleID = 'regllvm' +global [6 x i8] c"ABBBC\00" ; <[6 x i8]*>:0 [#uses=4] +global [28 x i8] c"state: %s, arg: %c(int %d)\0A\00" ; <[28 x i8]*>:1 [#uses=0] + +define i32 @main(i32) { +entry: + %1 = malloc i32 ; <i32*> [#uses=9] + store i32 %0, i32* %1, align 8 + br label %state_1_3_5_6_7 + +state_1_3_5_6_7: ; preds = %entry + %2 = load i32* %1, align 8 ; <i32> [#uses=2] + %3 = sext i32 %2 to i64 ; <i64> [#uses=1] + %4 = getelementptr [6 x i8]* @0, i64 0, i64 %3 ; <i8*> [#uses=1] + %5 = add i32 %2, 1 ; <i32> [#uses=1] + store i32 %5, i32* %1, align 8 + %6 = load i8* %4 ; <i8> [#uses=1] + switch i8 %6, label %reject [ + i8 65, label %state_1_3_5_6_7_case0 + i8 67, label %state_1_3_5_6_7_case1 + i8 66, label %state_1_3_5_6_7_case2 + ] + +reject: ; preds = %state_1_2_3_5_7, %state_1_3_4_5_7, %state_1_3_5_6_7, %state_8 + ret i32 0 + +state_1_3_5_6_7_case0: ; preds = %state_1_3_5_6_7 + br label %state_1_2_3_5_7 + +state_1_2_3_5_7: ; preds = %state_1_2_3_5_7_case0, %state_1_3_4_5_7_case0, %state_1_3_5_6_7_case0 + %7 = load i32* %1, align 8 ; <i32> [#uses=2] + %8 = sext i32 %7 to i64 ; <i64> [#uses=1] + %9 = getelementptr [6 x i8]* @0, i64 0, i64 %8 ; <i8*> [#uses=1] + %10 = add i32 %7, 1 ; <i32> [#uses=1] + store i32 %10, i32* %1, align 8 + %11 = load i8* %9 ; <i8> [#uses=1] + switch i8 %11, label %reject [ + i8 65, label %state_1_2_3_5_7_case0 + i8 67, label %state_1_2_3_5_7_case1 + i8 66, label %state_1_2_3_5_7_case2 + ] + +state_1_2_3_5_7_case0: ; preds = %state_1_2_3_5_7 + br label %state_1_2_3_5_7 + +state_1_2_3_5_7_case1: ; preds = %state_1_2_3_5_7 + br label %state_8 + +state_8: ; preds = %state_1_2_3_5_7_case1, %state_1_3_4_5_7_case1, %state_1_3_5_6_7_case1 + %12 = load i32* %1, align 8 ; <i32> [#uses=2] + %13 = sext i32 %12 to i64 ; <i64> [#uses=1] + %14 = getelementptr [6 x i8]* @0, i64 0, i64 %13 ; <i8*> [#uses=1] + %15 = add i32 %12, 1 ; <i32> [#uses=1] + store i32 %15, i32* %1, align 8 + %16 = load i8* %14 ; <i8> [#uses=1] + switch i8 %16, label %reject [ + i8 0, label %state_8_case0 + ] + +state_8_case0: ; preds = %state_8 + br label %accpet + +accpet: ; preds = %state_8_case0 + ret i32 1 + +state_1_2_3_5_7_case2: ; preds = %state_1_2_3_5_7 + br label %state_1_3_4_5_7 + +state_1_3_4_5_7: ; preds = %state_1_2_3_5_7_case2, %state_1_3_4_5_7_case2, %state_1_3_5_6_7_case2 + %17 = load i32* %1, align 8 ; <i32> [#uses=2] + %18 = sext i32 %17 to i64 ; <i64> [#uses=1] + %19 = getelementptr [6 x i8]* @0, i64 0, i64 %18 ; <i8*> [#uses=1] + %20 = add i32 %17, 1 ; <i32> [#uses=1] + store i32 %20, i32* %1, align 8 + %21 = load i8* %19 ; <i8> [#uses=1] + switch i8 %21, label %reject [ + i8 65, label %state_1_3_4_5_7_case0 + i8 67, label %state_1_3_4_5_7_case1 + i8 66, label %state_1_3_4_5_7_case2 + ] + +state_1_3_4_5_7_case0: ; preds = %state_1_3_4_5_7 + br label %state_1_2_3_5_7 + +state_1_3_4_5_7_case1: ; preds = %state_1_3_4_5_7 + br label %state_8 + +state_1_3_4_5_7_case2: ; preds = %state_1_3_4_5_7 + br label %state_1_3_4_5_7 + +state_1_3_5_6_7_case1: ; preds = %state_1_3_5_6_7 + br label %state_8 + +state_1_3_5_6_7_case2: ; preds = %state_1_3_5_6_7 + br label %state_1_3_4_5_7 +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dfareg.py Tue Jun 15 00:54:59 2010 +0900 @@ -0,0 +1,456 @@ +#!/usr/bin/env python + +import copy +import sys +from optparse import OptionParser + +class NondeterministicFiniteAutomaton(object): + def __init__(self, transition, start, accepts, map_): + self.transition = transition + self.start = start + self.accepts = accepts + self.map = map_ + + def epsilonExpand(self, set_): + que = set(set_) + done = set() + while que: + stat = que.pop() + nexts = self.transition(stat, "") + done.add(stat) + for nextStat in nexts: + if not nextStat in done: que.add(nextStat) + return frozenset(done) + +class DeterministicFiniteAutomaton(object): + def __init__(self, transition, start, accepts, map_): + self.transition = transition + self.start = start + self.accepts = accepts + self.map = map_ + def getRuntime(self): + return DFARuntime(self) + +class DFARuntime(object): + def __init__(self, DFA): + self.DFA = DFA + self.curState = self.DFA.start + + def doTransition(self, char): + self.curState = self.DFA.transition( + self.curState, char) + + def isAcceptState(self): + return self.curState in self.DFA.accepts + + def doesAccept(self, input): + for alphabet in input: + self.doTransition(alphabet) + return self.isAcceptState() + +class Token(object): + CHARACTER = 0 + OPE_UNION = 1 + OPE_STAR = 2 + LPAREN = 3 + RPAREN = 4 + EOF = 5 + + def __init__(self, value, kind): + self.value = value + self.kind = kind + + +class Lexer(object): + def __init__(self, str): + self.stringList = list(str) + + def scan(self): + if not self.stringList: + return Token(None, Token.EOF) + + ch = self.stringList.pop(0) + if ch == '\\': + return Token(self.stringList.pop(0), Token.CHARACTER) + elif ch == '|': + return Token(ch, Token.OPE_UNION) + elif ch == '(': + return Token(ch, Token.LPAREN) + elif ch == ')': + return Token(ch, Token.RPAREN) + elif ch == '*': + return Token(ch, Token.OPE_STAR) + else: + return Token(ch, Token.CHARACTER) + +""" + Context-free Grammer: + (A) expression -> subexpr EOF + (B) subexpr -> seq '|' subexpr | seq + (C) seq -> subseq | '' + (D) subseq -> star subseq | star + (E) star -> factor '*' | factor + (F) factor -> '(' subexpr ')' | CHARACTER +""" + +class Parser(object): + def __init__(self, lexer): + self.lexer = lexer + self.look = None + self.move() + + def match(self, tag): + if self.look.kind != tag: + raise Exeption("syntax error") + self.move() + + def move(self): + self.look = self.lexer.scan() + + def factor(self): + if self.look.kind == Token.LPAREN: + # factor -> '(' subexpr ')' + self.match(Token.LPAREN) + node = self.subexpr() + self.match(Token.RPAREN) + return node + else: + # factor -> CHARACTER + node = Character(self.look.value) + self.match(Token.CHARACTER) + return node + + def star(self): + # star -> factor '*' | factor + node = self.factor() + if self.look.kind == Token.OPE_STAR: + self.match(Token.OPE_STAR) + node = Star(node) + return node + + def seq(self): + if self.look.kind == Token.LPAREN \ + or self.look.kind == Token.CHARACTER: + # seq -> subseq + return self.subseq() + else: + # seq -> '' + return Character("") + + def subseq(self): + node1 = self.star() + if self.look.kind == Token.LPAREN \ + or self.look.kind == Token.CHARACTER: + # subseq -> star subseq + node2 = self.subseq() + node = Concat(node1, node2) + return node + else: + # subseq -> star + return node1 + + def subexpr(self): + node1 = self.seq() + if self.look.kind == Token.OPE_UNION: + self.match(Token.OPE_UNION) + # subexpr -> seq '|' subexpr + node2 = self.subexpr() + node = Union(node1, node2) + return node + else: + # subexpr -> seq + return node1 + + def expression(self): + # expression -> subexpr EOF + node = self.subexpr() + self.match(Token.EOF) + + # Create TREE, NFA + context = Context() + fragment = node.assemble(context) + return fragment.build() + +class Context(object): + def __init__(self): + self.stateCount = 0 + + def newState(self): + self.stateCount += 1 + return self.stateCount + +class NFAFragment(object): + def __init__(self): + self.start = None # integer + self.accepts = None # frozenset + self.map = dict() # transition + + def connect(self, from_, char, to): + slot = self.map.setdefault((from_, char), set()) + slot.add(to) + + def newSkelton(self): + # copy fragment (self), and it return + newFrag = NFAFragment(); + newFrag.map = copy.deepcopy(self.map) + return newFrag + + def __or__(self, frag): + newFrag = self.newSkelton() + for k, v in frag.map.iteritems(): + newFrag.map[k] = v.copy() + return newFrag + + def build(self): + map_ = self.map + def transition(state, char): + return frozenset(map_.get((state, char), [])) + + return NondeterministicFiniteAutomaton( + transition, + self.start, + self.accepts, + self.map + ) + +class Character(object): + def __init__(self, char): + self.char = char + + def assemble(self, context): + frag = NFAFragment() + s1 = context.newState() + s2 = context.newState() + frag.connect(s1, self.char, s2) + frag.start = s1 + frag.accepts = frozenset([s2]) + return frag + +class Union(object): + def __init__(self, op1, op2): + self.op1 = op1 + self.op2 = op2 + + def assemble(self, context): + frag1 = self.op1.assemble(context) + frag2 = self.op2.assemble(context) + frag = frag1 | frag2 + s = context.newState() + frag.connect(s, "", frag1.start) + frag.connect(s, "", frag2.start) + frag.start = s + frag.accepts = frag1.accepts | frag2.accepts + return frag + +class Concat(object): + def __init__(self, op1, op2): + self.op1 = op1 + self.op2 = op2 + + def assemble(self, context): + frag1 = self.op1.assemble(context) + frag2 = self.op2.assemble(context) + frag = frag1 | frag2 + + for state in frag1.accepts: + frag.connect(state, "", frag2.start) + + frag.start = frag1.start + frag.accepts = frag2.accepts + return frag + +class Star(object): + def __init__(self, op): + self.op = op + + def assemble(self, context): + fragOrig = self.op.assemble(context) + frag = fragOrig.newSkelton() + + for state in fragOrig.accepts: + frag.connect(state, "", fragOrig.start) + + s = context.newState() + frag.connect(s, "", fragOrig.start) + frag.start = s + frag.accepts = fragOrig.accepts | frozenset([s]) + return frag + +class NonDisjoinSets(object): + def __init__(self, sub): + self.sub = sub + def __contains__(self, a_set): + return a_set & self.sub + +def nfa2dfa(nfa): + def transition(set_, alpha): + ret = set() + for elem in set_: + ret |= nfa.transition(elem, alpha) + return nfa.epsilonExpand(frozenset(ret)) + + def stateSetTransition(stateSet, char): + trans = set() + for state in stateSet: + t = nfa.map.setdefault((state, char), None) + if not t == None: trans.update(nfa.epsilonExpand(t)) + return frozenset(trans) + + map_ = dict() + que = set(frozenset([nfa.epsilonExpand(set([nfa.start]))])) + done = set() + + while que: + stateSet = que.pop() + + for state in stateSet: + for k, v in nfa.map.iteritems(): + if state == k[0] and k[1] != '': + slot = map_.setdefault((stateSet, k[1]), set()) + slot.update(nfa.epsilonExpand(v)) + + done.add(stateSet) + + for v in map_.itervalues(): + if not v in done: + que.add(frozenset(v)) + + return DeterministicFiniteAutomaton( + transition, + nfa.epsilonExpand(frozenset([nfa.start])), + NonDisjoinSets(nfa.accepts), + map_ + ) + +# create call graph (as dictionary) +class CallGraph(object): + def __init__(self, dfa): + self.dfa = dfa + self.start = "state_"+self.set2name(dfa.start) + (self.map, self.accepts) = self.getCallGraph(self.dfa) + + # return state name from set + def set2name(self, set_): + l = list(set_) + l.sort() + return '_'.join(str(x) for x in l) + + def getCallGraph(self, dfa): + + funcMap = dict() + funcMap[dfa.start] = set() + + # dfa.map => {(frozenset([1, 15]), 'd'): frozenset([16]), ....} + # : frozenset(1, 15) x 'd' -> frozenset([16]) + + for v in dfa.map.itervalues(): + funcMap[frozenset(v)] = set() + + for key in dfa.map.iterkeys(): + slot = funcMap.setdefault(key[0], set()) + slot.add(key[1]) + + for v in dfa.map.itervalues(): + funcMap[frozenset(v)] = set() + + for key in dfa.map.iterkeys(): + slot = funcMap.setdefault(key[0], set()) + slot.add(key[1]) + + callGraph = dict() + accepts = list() + + # emit cbc code + for k in funcMap.iterkeys(): + callGraph["state_"+self.set2name(k)] = set() + + for k, v in funcMap.iteritems(): + caseMap = dict() + for case in v: + caseMap[case] = "state_"+self.set2name(dfa.map[(frozenset(k), case)]) + if k in dfa.accepts: + accepts.append("state_"+self.set2name(k)) + caseMap['\\0'] = "accept" + callGraph["state_"+self.set2name(k)] = caseMap + + return (callGraph, accepts) + +# emit CbC-souce, from CallGraph +def dfa2CbC(cg, regexp, emitCFlag): + if emitCFlag: + fun_type = 'void ' + call_t = '' + else: + fun_type = '__code ' + call_t = 'goto ' + + # emit cbc(or c) code + print "#include <stdio.h>\n" + for k in cg.map.iterkeys(): + print fun_type + k + "(char* s);" + print fun_type + 'accept(char* s);' + print fun_type + 'reject(char* s);' + print """ +int main(int argc, char* argv[]) { +\tputs(\"regexp: %s\"); +\tputs(\"number of state: %d\"); +\tprintf(\"string: %%s\\n\", argv[1]); +\t%s%s(argv[1]); +\treturn 0; +}\n""" % (regexp, len(cg.map), call_t, cg.start) + + for k, v in cg.map.iteritems(): + print fun_type + k + "(char* s) {" + print "\tswitch(*s++) {" + for case, next_state in v.iteritems(): + print "\t\tcase '%s': " % (case) + print "\t\t\t%s%s(s);\n\t\t\tbreak;" % (call_t, next_state) + + print "\t\tdefault: %sreject(s);\n\t}" % call_t + print "}\n" + + print """ +%saccept(char* s) { +\tprintf(\"\\nstring matches regexp. \\n\\n\"); +}\n""" % fun_type + print """ +%sreject(char* s) { +\tprintf(\"\\nstring does not match regexp. \\n\\n\"); +}\n""" % fun_type + +class Regexp(object): + def __init__(self, regexp): + self.regexp = regexp + self.nfa = None + self.dfa = None + self._compile() + + def _compile(self): + lexer_ = Lexer(self.regexp) + parser_ = Parser(lexer_) + self.nfa = parser_.expression() + self.dfa = nfa2dfa(self.nfa) + + def emitCbC(self, emitCFlag=False): + cg = CallGraph(self.dfa) + dfa2CbC(cg, self.regexp, emitCFlag) + + def matches(self, string): + runtime = self.dfa.getRuntime() + return runtime.doesAccept(string) + +def compile(regexp): + return Regexp(regexp) + +def main(argv): + myusage = "%prog [-C] regexp" + psr = OptionParser(usage=myusage) + psr.add_option("-C", action="store_true", dest="emitCFlag", default=False, help="emit C-source") + (opts, args) = psr.parse_args(sys.argv) + if len(args) == 1: + psr.print_help() + exit() + r = compile(args[1]) + r.emitCbC(opts.emitCFlag) + +if __name__ == '__main__' : main(sys.argv)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ll Tue Jun 15 00:54:59 2010 +0900 @@ -0,0 +1,146 @@ +; ModuleID = 'dfareg' +global [2 x i8] c"C\00" ; <[2 x i8]*>:0 [#uses=5] +global [20 x i8] c"state: %s, arg: %s\0A\00" ; <[20 x i8]*>:1 [#uses=1] +global [24 x i8] c"%s does match regexp. \0A\00" ; <[24 x i8]*>:2 [#uses=1] +global [28 x i8] c"%s does not match regexp. \0A\00" ; <[28 x i8]*>:3 [#uses=1] +global [8 x i8] c"state_8\00" ; <[8 x i8]*>:4 [#uses=1] +global [16 x i8] c"state_1_3_5_6_7\00" ; <[16 x i8]*>:5 [#uses=1] +global [16 x i8] c"state_1_3_4_5_7\00" ; <[16 x i8]*>:6 [#uses=1] +global [16 x i8] c"state_1_2_3_5_7\00" ; <[16 x i8]*>:7 [#uses=1] + +define fastcc void @accept(i32 %index) { +entry: + call void (i8*, ...)* @printf(i8* getelementptr ([24 x i8]* @2, i32 0, i32 0), i8* getelementptr ([2 x i8]* @0, i32 0, i32 0)) + ret void +} + +define fastcc void @reject(i32 %index) { +entry: + call void (i8*, ...)* @printf(i8* getelementptr ([28 x i8]* @3, i32 0, i32 0), i8* getelementptr ([2 x i8]* @0, i32 0, i32 0)) + ret void +} + +define fastcc void @state_8(i32 %index) { +entry: + %0 = getelementptr [2 x i8]* @0, i32 0, i32 %index ; <i8*> [#uses=2] + %1 = add i32 %index, 1 ; <i32> [#uses=2] + call void (i8*, ...)* @printf(i8* getelementptr ([20 x i8]* @1, i32 0, i32 0), i8* getelementptr ([8 x i8]* @4, i32 0, i32 0), i8* %0) + %2 = load i8* %0 ; <i8> [#uses=1] + switch i8 %2, label %default [ + i8 0, label %case0 + ] + +return: ; preds = %case0, %default + ret void + +default: ; preds = %entry + call void @reject(i32 %1) + br label %return + +case0: ; preds = %entry + call void @accept(i32 %1) + br label %return +} + +define fastcc void @state_1_3_5_6_7(i32 %index) { +entry: + %0 = getelementptr [2 x i8]* @0, i32 0, i32 %index ; <i8*> [#uses=2] + %1 = add i32 %index, 1 ; <i32> [#uses=4] + call void (i8*, ...)* @printf(i8* getelementptr ([20 x i8]* @1, i32 0, i32 0), i8* getelementptr ([16 x i8]* @5, i32 0, i32 0), i8* %0) + %2 = load i8* %0 ; <i8> [#uses=1] + switch i8 %2, label %default [ + i8 65, label %case0 + i8 67, label %case1 + i8 66, label %case2 + ] + +return: ; preds = %case2, %case1, %case0, %default + ret void + +default: ; preds = %entry + call void @reject(i32 %1) + br label %return + +case0: ; preds = %entry + call void @state_1_2_3_5_7(i32 %1) + br label %return + +case1: ; preds = %entry + call void @state_8(i32 %1) + br label %return + +case2: ; preds = %entry + call void @state_1_3_4_5_7(i32 %1) + br label %return +} + +define fastcc void @state_1_3_4_5_7(i32 %index) { +entry: + %0 = getelementptr [2 x i8]* @0, i32 0, i32 %index ; <i8*> [#uses=2] + %1 = add i32 %index, 1 ; <i32> [#uses=4] + call void (i8*, ...)* @printf(i8* getelementptr ([20 x i8]* @1, i32 0, i32 0), i8* getelementptr ([16 x i8]* @6, i32 0, i32 0), i8* %0) + %2 = load i8* %0 ; <i8> [#uses=1] + switch i8 %2, label %default [ + i8 65, label %case0 + i8 67, label %case1 + i8 66, label %case2 + ] + +return: ; preds = %case2, %case1, %case0, %default + ret void + +default: ; preds = %entry + call void @reject(i32 %1) + br label %return + +case0: ; preds = %entry + call void @state_1_2_3_5_7(i32 %1) + br label %return + +case1: ; preds = %entry + call void @state_8(i32 %1) + br label %return + +case2: ; preds = %entry + call void @state_1_3_4_5_7(i32 %1) + br label %return +} + +define fastcc void @state_1_2_3_5_7(i32 %index) { +entry: + %0 = getelementptr [2 x i8]* @0, i32 0, i32 %index ; <i8*> [#uses=2] + %1 = add i32 %index, 1 ; <i32> [#uses=4] + call void (i8*, ...)* @printf(i8* getelementptr ([20 x i8]* @1, i32 0, i32 0), i8* getelementptr ([16 x i8]* @7, i32 0, i32 0), i8* %0) + %2 = load i8* %0 ; <i8> [#uses=1] + switch i8 %2, label %default [ + i8 65, label %case0 + i8 67, label %case1 + i8 66, label %case2 + ] + +return: ; preds = %case2, %case1, %case0, %default + ret void + +default: ; preds = %entry + call void @reject(i32 %1) + br label %return + +case0: ; preds = %entry + call void @state_1_2_3_5_7(i32 %1) + br label %return + +case1: ; preds = %entry + call void @state_8(i32 %1) + br label %return + +case2: ; preds = %entry + call void @state_1_3_4_5_7(i32 %1) + br label %return +} + +declare void @printf(i8*, ...) + +start: state_1_3_5_6_7 +state: state_1_3_5_6_7, arg: C +state: state_8, arg: +C does match regexp.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reg2llvm.py Tue Jun 15 00:54:59 2010 +0900 @@ -0,0 +1,202 @@ +#!/usr/bin/env python + +# Import the llvm-py modules. +# encodingfrom llvm import * +from llvm.core import * +from llvm.passes import * +from llvm.ee import * # new import: ee = Execution Engine +from dfareg import * + +class RegLLVM(object): + # define llvm core types, and const + int_t = Type.int() + char_t = Type.int(8) + charptr_t = Type.pointer(char_t) + charptrptr_t = Type.pointer(charptr_t) + const_zero = Constant.int(int_t, 0) + const_one = Constant.int(int_t, 1) + llvm.GuaranteedTailCallOpt = True + + def __init__(self, modName, regexp, string, impl_label, optimized, dump): + # Create a module + self.mod = Module.new(modName) + self.regexp = regexp + self.optimized = optimized + self.dump = dump + self.impl_label = impl_label + + self.matchp_str = self.new_str_const(string) + self.dump_str = self.new_str_const("state: %s, arg: %c(int %d)\n") + + def optional_func_decl(fun): + fun.calling_convertion = CC_X86_FASTCALL + fun.args[0].name = "index" + + state_list = dict() + main = self.mod.add_function( + Type.function(self.int_t, (self.int_t,)), "main") + optional_func_decl(main) + main_entry = main.append_basic_block("entry") + + + if (impl_label): + accept_state = main.append_basic_block("accpet") + reject_state = main.append_basic_block("reject") + index_ptr = Builder.new(main_entry).malloc(self.int_t) + Builder.new(accept_state).free(index_ptr) + Builder.new(reject_state).free(index_ptr) + else: + # Create function - accept and reject (final state). + accept_state = self.mod.add_function( + Type.function(self.int_t, (self.int_t,)), "accept") + optional_func_decl(accept_state) + reject_state = self.mod.add_function( + Type.function(self.int_t, (self.int_t,)), "reject") + optional_func_decl(reject_state) + + state_list["accept"] = accept_state + state_list["reject"] = reject_state + + # create Regexp Object (Regexp has dfa) + r = Regexp(regexp) + self.cg = CallGraph(r.dfa) + + # add state to module, (as function or label). + if (impl_label): + for state in self.cg.map.iterkeys(): + label = main.append_basic_block(state) + state_list[state] = label + else: + for state in self.cg.map.iterkeys(): + fun = self.mod.add_function( + Type.function(self.int_t, (self.int_t,)), state) + optional_func_decl(fun) + state_list[state] = fun + + # emit instructions + if (impl_label): emit = Builder.new(accept_state) + else: emit = Builder.new(accept_state.append_basic_block("entry")) + + if (dump): self.emit_call_printf(emit, "%s does match regexp\n", self.gep_first(emit, self.matchp_str)) + emit.ret(self.const_one) + + if (impl_label): emit = Builder.new(reject_state) + else: emit = Builder.new(reject_state.append_basic_block("entry")) + if (dump): self.emit_call_printf(emit, "%s does not match regexp\n", self.gep_first(emit, self.matchp_str)) + emit.ret(self.const_zero) + + if (impl_label): + # emit transition instruction with jump instruction + emit = Builder.new(main_entry) + emit.store(main.args[0], index_ptr) + emit.branch(state_list[self.cg.start]) + + for state, transition in self.cg.map.iteritems(): + emit = Builder.new(state_list[state]) + index = emit.load(index_ptr) + ptr = emit.gep(self.matchp_str, (self.const_zero, index)) + emit.store(emit.add(self.const_one, index), index_ptr) + char = emit.load(ptr) + si = emit.switch(char, state_list['reject'], len(transition)) + local_label = 0 + for case, next_state in transition.iteritems(): + bb = main.append_basic_block("%s_case%d" % (state, local_label)) #create default bb + emit = Builder.new(bb) + emit.branch(state_list[next_state]) + si.add_case(self.char_const(case), bb) + local_label += 1 + else: + for state, transition in self.cg.map.iteritems(): + cases = dict() + for case, next_state in transition.iteritems(): + cases[self.char_const(case)] = state_list[next_state] + state_fun = state_list[state] + emit = Builder.new(state_fun.append_basic_block("entry")) + ptr = emit.gep(self.matchp_str, (self.const_zero, state_fun.args[0])) + next_index = emit.add(state_fun.args[0], self.const_one) + char = emit.load(ptr) + + if (dump): self.emit_call_printf(emit, self.dump_str, self.gep_first(emit, self.new_str_const(fun.name)), char, char) + + label = 0 + default_bb = state_fun.append_basic_block("default") #create default bb + builder = Builder.new(default_bb) # default is reject. + ret = builder.call(reject_state, (next_index,)) + builder.ret(ret) + + si = emit.switch(char, default_bb, len(cases)) # create switch instruction with deafult case. + for case, nextFun in cases.iteritems(): + bb = state_fun.append_basic_block("case%d" % label) #create default bb + builder = Builder.new(bb) + ret = builder.call(nextFun, (next_index,)) + builder.ret(ret) + si.add_case(case, bb) + label += 1 + emit = Builder.new(main_entry) + ret = emit.call(state_list[self.cg.start], (main.args[0],)) + emit.ret(ret) + + self.mp = ModuleProvider.new(self.mod) + if (optimized): self.optimize() + self.ee = ExecutionEngine.new(self.mp) + self.main = main + + def getExecutionEngine(self): + return self.ee + + def optimize(self): + #optimization passes + pm = PassManager.new() + pm.add(TargetData.new('')) + pm.add(PASS_FUNCTION_INLINING) + pm.run(self.mod) + fp = FunctionPassManager.new(self.mp) + fp.add(TargetData.new('')) + fp.add(PASS_BLOCK_PLACEMENT) + fp.add(PASS_INSTRUCTION_COMBINING) + fp.add(PASS_TAIL_CALL_ELIMINATION) + fp.add(PASS_AGGRESSIVE_DCE) + fp.add(PASS_DEAD_INST_ELIMINATION) + fp.add(PASS_DEAD_CODE_ELIMINATION) + for fun in self.mod.functions: + fp.run(fun) + + def printModule(self): + print self.mod + + def execute(self): + return self.ee.run_function(self.main, + (GenericValue.int(self.int_t, 0),)) + + def new_str_const(self, val): + '''create string(array of int) as a global value ''' + str = self.mod.add_global_variable(Type.array(self.char_t, len(val) + 1), "") + str.initializer = Constant.stringz(val) + return str + + def gep_first(self, emit, val): + '''get pointer of array''' + return emit.gep(val, (self.const_zero, self.const_zero)) + + def char_const(self, val): + '''create constant int value''' + if isinstance(val, str): + if val == '\\0': + return Constant.int(self.char_t, 0) + else: + return Constant.int(self.char_t, ord(val)) + else: + exit('char_const: invalid argument.', val) + + def emit_call_printf(self, emit, string, *args): + '''emit libc printf function call instruction''' + try: + printf = self.mod.get_function_named("printf") + except llvm.LLVMException: + printf = self.mod.add_function( + Type.function(Type.void(), + (Type.pointer(self.char_t, 0),), 1), "printf") + if isinstance(string, str): + string = self.new_str_const(string) + emit.call(printf, + [self.gep_first(emit, string)]+list(args))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/test_DFARuntime.py Tue Jun 15 00:54:59 2010 +0900 @@ -0,0 +1,21 @@ +from dfareg import * + +# -*- coding: utf-8 -*- + +def transition(stat, char): + if stat == 1 and char == 'a': return 2 + if stat == 2 and char == 'b': return 3 + return 0 + +dfa = DeterministicFiniteAutomaton( + transition, + 1, + frozenset([3]), + ) + +for str in (u'ab', u'ba'): + runtime = dfa.getRuntime() + if runtime.doesAccept(str): + print u"%s is accepted" % str + else: + print u"%s is not accepted" % str
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/test_NFA2DFA.py Tue Jun 15 00:54:59 2010 +0900 @@ -0,0 +1,36 @@ +from dfareg import * + +def transition(state, char): + if state == 0 and char == u"a": return frozenset([1, 2]) + if state == 1 and char == u"b": return frozenset([2]) + if state == 2 and char == u"" : return frozenset([0]) + return frozenset([]) + +nfa = NondeterministicFiniteAutomaton( + transition, + 0, + frozenset([2]) + ) + +class Token(object): + CHARACTER = 0 + OPE_UNION = 1 + OPE_STAR = 2 + LPAREN = 3 + RPAREN = 4 + EOF = 5 + +# Convert NFA into DFA + +def nfa2dfa(nfa): + def transition(set_, alpha): + ret = set() + for elem in set_: + ret |= nfa.transition(elem, alpha) + return nfa.epsilonExpand(frozenset(ret)) + return DeterministicFiniteAutomaton( + transition, + nfa.epsilonExpand(frozenset([nfa.start])), + NondeterministicFiniteAutomaton(nfa.accepts) + ) +