150
|
1 # RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck %s
|
|
2 --- |
|
|
3 define amdgpu_ps <4 x float> @exp_done_waitcnt(<4 x i32> inreg, <4 x
|
|
4 i32> inreg, i32 inreg %w, float %v) #0 {
|
|
5 %a = load volatile float, float addrspace(1)* undef
|
|
6 %b = load volatile float, float addrspace(1)* undef
|
|
7 %c = load volatile float, float addrspace(1)* undef
|
|
8 %d = load volatile float, float addrspace(1)* undef
|
|
9 call void @llvm.amdgcn.exp.f32(i32 15, i32 1, float %a, float %b, float %c, float %d, i1 true, i1 false)
|
|
10 ret <4 x float> <float 5.000000e-01, float 1.000000e+00, float 2.000000e+00, float 4.000000e+00>
|
|
11 }
|
|
12
|
|
13 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
|
14
|
|
15 attributes #0 = { nounwind }
|
|
16
|
|
17 ...
|
|
18 ---
|
|
19
|
|
20 # CHECK-LABEL: name: exp_done_waitcnt{{$}}
|
|
21 # CHECK: EXP_DONE
|
|
22 # CHECK-NEXT: S_WAITCNT 3855
|
|
23 # CHECK: $vgpr0 = V_MOV_B32
|
|
24 # CHECK: $vgpr1 = V_MOV_B32
|
|
25 # CHECK: $vgpr2 = V_MOV_B32
|
|
26 # CHECK: $vgpr3 = V_MOV_B32
|
|
27 name: exp_done_waitcnt
|
|
28 alignment: 1
|
|
29 exposesReturnsTwice: false
|
|
30 legalized: false
|
|
31 regBankSelected: false
|
|
32 selected: false
|
|
33 tracksRegLiveness: true
|
|
34 frameInfo:
|
|
35 isFrameAddressTaken: false
|
|
36 isReturnAddressTaken: false
|
|
37 hasStackMap: false
|
|
38 hasPatchPoint: false
|
|
39 stackSize: 0
|
|
40 offsetAdjustment: 0
|
|
41 maxAlignment: 0
|
|
42 adjustsStack: false
|
|
43 hasCalls: false
|
|
44 maxCallFrameSize: 0
|
|
45 hasOpaqueSPAdjustment: false
|
|
46 hasVAStart: false
|
|
47 hasMustTailInVarArgFunc: false
|
|
48 body: |
|
|
49 bb.0 (%ir-block.2):
|
|
50 $sgpr3 = S_MOV_B32 61440
|
|
51 $sgpr2 = S_MOV_B32 -1
|
|
52 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
|
53 $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
|
54 $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
|
55 $vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
|
56 EXP_DONE 0, killed $vgpr0, killed $vgpr1, killed $vgpr2, killed $vgpr3, -1, -1, 15, implicit $exec
|
|
57 $vgpr0 = V_MOV_B32_e32 1056964608, implicit $exec
|
|
58 $vgpr1 = V_MOV_B32_e32 1065353216, implicit $exec
|
|
59 $vgpr2 = V_MOV_B32_e32 1073741824, implicit $exec
|
|
60 $vgpr3 = V_MOV_B32_e32 1082130432, implicit $exec
|
|
61 SI_RETURN_TO_EPILOG killed $vgpr0, killed $vgpr1, killed $vgpr2, killed $vgpr3
|
|
62
|
|
63 ...
|