comparison test/CodeGen/X86/load-slice.ll @ 121:803732b1fca8

LLVM 5.0
author kono
date Fri, 27 Oct 2017 17:07:41 +0900
parents afa8332a0e37
children 3a76565eade5
comparison
equal deleted inserted replaced
120:1172e4bd9c6f 121:803732b1fca8
17 ; High slice starts at 4 (base + 4-bytes) and is 4-bytes aligned. 17 ; High slice starts at 4 (base + 4-bytes) and is 4-bytes aligned.
18 ; 18 ;
19 ; STRESS-LABEL: t1: 19 ; STRESS-LABEL: t1:
20 ; Load out[out_start + 8].real, this is base + 8 * 8 + 0. 20 ; Load out[out_start + 8].real, this is base + 8 * 8 + 0.
21 ; STRESS: vmovss 64([[BASE:[^(]+]]), [[OUT_Real:%xmm[0-9]+]] 21 ; STRESS: vmovss 64([[BASE:[^(]+]]), [[OUT_Real:%xmm[0-9]+]]
22 ; Load out[out_start + 8].imm, this is base + 8 * 8 + 4.
23 ; STRESS-NEXT: vmovss 68([[BASE]]), [[OUT_Imm:%xmm[0-9]+]]
22 ; Add low slice: out[out_start].real, this is base + 0. 24 ; Add low slice: out[out_start].real, this is base + 0.
23 ; STRESS-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]] 25 ; STRESS-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]]
24 ; Load out[out_start + 8].imm, this is base + 8 * 8 + 4.
25 ; STRESS-NEXT: vmovss 68([[BASE]]), [[OUT_Imm:%xmm[0-9]+]]
26 ; Add high slice: out[out_start].imm, this is base + 4. 26 ; Add high slice: out[out_start].imm, this is base + 4.
27 ; STRESS-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]] 27 ; STRESS-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]]
28 ; Swap Imm and Real. 28 ; Swap Imm and Real.
29 ; STRESS-NEXT: vinsertps $16, [[RES_Imm]], [[RES_Real]], [[RES_Vec:%xmm[0-9]+]] 29 ; STRESS-NEXT: vinsertps $16, [[RES_Imm]], [[RES_Real]], [[RES_Vec:%xmm[0-9]+]]
30 ; Put the results back into out[out_start]. 30 ; Put the results back into out[out_start].
32 ; 32 ;
33 ; Same for REGULAR, we eliminate register bank copy with each slices. 33 ; Same for REGULAR, we eliminate register bank copy with each slices.
34 ; REGULAR-LABEL: t1: 34 ; REGULAR-LABEL: t1:
35 ; Load out[out_start + 8].real, this is base + 8 * 8 + 0. 35 ; Load out[out_start + 8].real, this is base + 8 * 8 + 0.
36 ; REGULAR: vmovss 64([[BASE:[^)]+]]), [[OUT_Real:%xmm[0-9]+]] 36 ; REGULAR: vmovss 64([[BASE:[^)]+]]), [[OUT_Real:%xmm[0-9]+]]
37 ; Load out[out_start + 8].imm, this is base + 8 * 8 + 4.
38 ; REGULAR-NEXT: vmovss 68([[BASE]]), [[OUT_Imm:%xmm[0-9]+]]
37 ; Add low slice: out[out_start].real, this is base + 0. 39 ; Add low slice: out[out_start].real, this is base + 0.
38 ; REGULAR-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]] 40 ; REGULAR-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]]
39 ; Load out[out_start + 8].imm, this is base + 8 * 8 + 4.
40 ; REGULAR-NEXT: vmovss 68([[BASE]]), [[OUT_Imm:%xmm[0-9]+]]
41 ; Add high slice: out[out_start].imm, this is base + 4. 41 ; Add high slice: out[out_start].imm, this is base + 4.
42 ; REGULAR-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]] 42 ; REGULAR-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]]
43 ; Swap Imm and Real. 43 ; Swap Imm and Real.
44 ; REGULAR-NEXT: vinsertps $16, [[RES_Imm]], [[RES_Real]], [[RES_Vec:%xmm[0-9]+]] 44 ; REGULAR-NEXT: vinsertps $16, [[RES_Imm]], [[RES_Real]], [[RES_Vec:%xmm[0-9]+]]
45 ; Put the results back into out[out_start]. 45 ; Put the results back into out[out_start].
71 71
72 ; Function Attrs: nounwind 72 ; Function Attrs: nounwind
73 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 73 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
74 74
75 ; Function Attrs: nounwind 75 ; Function Attrs: nounwind
76 declare void @llvm.lifetime.start(i64, i8* nocapture) 76 declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
77 77
78 ; Function Attrs: nounwind 78 ; Function Attrs: nounwind
79 declare void @llvm.lifetime.end(i64, i8* nocapture) 79 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
80 80
81 ; Check that we do not read outside of the chunk of bits of the original loads. 81 ; Check that we do not read outside of the chunk of bits of the original loads.
82 ; 82 ;
83 ; The 64-bits should have been split in one 32-bits and one 16-bits slices. 83 ; The 64-bits should have been split in one 32-bits and one 16-bits slices.
84 ; The 16-bits should be zero extended to match the final type. 84 ; The 16-bits should be zero extended to match the final type.