Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/X86/vec_loadsingles.ll @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | 60c9769439b8 |
children | 1172e4bd9c6f |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,-slow-unaligned-mem-32 | FileCheck %s --check-prefix=ALL --check-prefix=FAST32 | 1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,-slow-unaligned-mem-32 | FileCheck %s --check-prefix=ALL --check-prefix=FAST32 |
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+slow-unaligned-mem-32 | FileCheck %s --check-prefix=ALL --check-prefix=SLOW32 | 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+slow-unaligned-mem-32 | FileCheck %s --check-prefix=ALL --check-prefix=SLOW32 |
3 | 3 |
4 define <4 x float> @merge_2_floats(float* nocapture %p) nounwind readonly { | 4 define <4 x float> @merge_2_floats(float* nocapture %p) nounwind readonly { |
5 %tmp1 = load float* %p | 5 %tmp1 = load float, float* %p |
6 %vecins = insertelement <4 x float> undef, float %tmp1, i32 0 | 6 %vecins = insertelement <4 x float> undef, float %tmp1, i32 0 |
7 %add.ptr = getelementptr float* %p, i32 1 | 7 %add.ptr = getelementptr float, float* %p, i32 1 |
8 %tmp5 = load float* %add.ptr | 8 %tmp5 = load float, float* %add.ptr |
9 %vecins7 = insertelement <4 x float> %vecins, float %tmp5, i32 1 | 9 %vecins7 = insertelement <4 x float> %vecins, float %tmp5, i32 1 |
10 ret <4 x float> %vecins7 | 10 ret <4 x float> %vecins7 |
11 | 11 |
12 ; ALL-LABEL: merge_2_floats | 12 ; ALL-LABEL: merge_2_floats |
13 ; ALL: vmovq | 13 ; ALL: vmovq |
15 } | 15 } |
16 | 16 |
17 ; Test-case generated due to a crash when trying to treat loading the first | 17 ; Test-case generated due to a crash when trying to treat loading the first |
18 ; two i64s of a <4 x i64> as a load of two i32s. | 18 ; two i64s of a <4 x i64> as a load of two i32s. |
19 define <4 x i64> @merge_2_floats_into_4() { | 19 define <4 x i64> @merge_2_floats_into_4() { |
20 %1 = load i64** undef, align 8 | 20 %1 = load i64*, i64** undef, align 8 |
21 %2 = getelementptr inbounds i64* %1, i64 0 | 21 %2 = getelementptr inbounds i64, i64* %1, i64 0 |
22 %3 = load i64* %2 | 22 %3 = load i64, i64* %2 |
23 %4 = insertelement <4 x i64> undef, i64 %3, i32 0 | 23 %4 = insertelement <4 x i64> undef, i64 %3, i32 0 |
24 %5 = load i64** undef, align 8 | 24 %5 = load i64*, i64** undef, align 8 |
25 %6 = getelementptr inbounds i64* %5, i64 1 | 25 %6 = getelementptr inbounds i64, i64* %5, i64 1 |
26 %7 = load i64* %6 | 26 %7 = load i64, i64* %6 |
27 %8 = insertelement <4 x i64> %4, i64 %7, i32 1 | 27 %8 = insertelement <4 x i64> %4, i64 %7, i32 1 |
28 %9 = shufflevector <4 x i64> %8, <4 x i64> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 5> | 28 %9 = shufflevector <4 x i64> %8, <4 x i64> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 5> |
29 ret <4 x i64> %9 | 29 ret <4 x i64> %9 |
30 | 30 |
31 ; ALL-LABEL: merge_2_floats_into_4 | 31 ; ALL-LABEL: merge_2_floats_into_4 |
32 ; ALL: vmovups | 32 ; ALL: vmovups |
33 ; ALL-NEXT: retq | 33 ; ALL-NEXT: retq |
34 } | 34 } |
35 | 35 |
36 define <4 x float> @merge_4_floats(float* %ptr) { | 36 define <4 x float> @merge_4_floats(float* %ptr) { |
37 %a = load float* %ptr, align 8 | 37 %a = load float, float* %ptr, align 8 |
38 %vec = insertelement <4 x float> undef, float %a, i32 0 | 38 %vec = insertelement <4 x float> undef, float %a, i32 0 |
39 %idx1 = getelementptr inbounds float* %ptr, i64 1 | 39 %idx1 = getelementptr inbounds float, float* %ptr, i64 1 |
40 %b = load float* %idx1, align 8 | 40 %b = load float, float* %idx1, align 8 |
41 %vec2 = insertelement <4 x float> %vec, float %b, i32 1 | 41 %vec2 = insertelement <4 x float> %vec, float %b, i32 1 |
42 %idx3 = getelementptr inbounds float* %ptr, i64 2 | 42 %idx3 = getelementptr inbounds float, float* %ptr, i64 2 |
43 %c = load float* %idx3, align 8 | 43 %c = load float, float* %idx3, align 8 |
44 %vec4 = insertelement <4 x float> %vec2, float %c, i32 2 | 44 %vec4 = insertelement <4 x float> %vec2, float %c, i32 2 |
45 %idx5 = getelementptr inbounds float* %ptr, i64 3 | 45 %idx5 = getelementptr inbounds float, float* %ptr, i64 3 |
46 %d = load float* %idx5, align 8 | 46 %d = load float, float* %idx5, align 8 |
47 %vec6 = insertelement <4 x float> %vec4, float %d, i32 3 | 47 %vec6 = insertelement <4 x float> %vec4, float %d, i32 3 |
48 ret <4 x float> %vec6 | 48 ret <4 x float> %vec6 |
49 | 49 |
50 ; ALL-LABEL: merge_4_floats | 50 ; ALL-LABEL: merge_4_floats |
51 ; ALL: vmovups | 51 ; ALL: vmovups |
56 ; Make sure that 32-byte vectors are handled efficiently. | 56 ; Make sure that 32-byte vectors are handled efficiently. |
57 ; If the target has slow 32-byte accesses, we should still generate | 57 ; If the target has slow 32-byte accesses, we should still generate |
58 ; 16-byte loads. | 58 ; 16-byte loads. |
59 | 59 |
60 define <8 x float> @merge_8_floats(float* %ptr) { | 60 define <8 x float> @merge_8_floats(float* %ptr) { |
61 %a = load float* %ptr, align 4 | 61 %a = load float, float* %ptr, align 4 |
62 %vec = insertelement <8 x float> undef, float %a, i32 0 | 62 %vec = insertelement <8 x float> undef, float %a, i32 0 |
63 %idx1 = getelementptr inbounds float* %ptr, i64 1 | 63 %idx1 = getelementptr inbounds float, float* %ptr, i64 1 |
64 %b = load float* %idx1, align 4 | 64 %b = load float, float* %idx1, align 4 |
65 %vec2 = insertelement <8 x float> %vec, float %b, i32 1 | 65 %vec2 = insertelement <8 x float> %vec, float %b, i32 1 |
66 %idx3 = getelementptr inbounds float* %ptr, i64 2 | 66 %idx3 = getelementptr inbounds float, float* %ptr, i64 2 |
67 %c = load float* %idx3, align 4 | 67 %c = load float, float* %idx3, align 4 |
68 %vec4 = insertelement <8 x float> %vec2, float %c, i32 2 | 68 %vec4 = insertelement <8 x float> %vec2, float %c, i32 2 |
69 %idx5 = getelementptr inbounds float* %ptr, i64 3 | 69 %idx5 = getelementptr inbounds float, float* %ptr, i64 3 |
70 %d = load float* %idx5, align 4 | 70 %d = load float, float* %idx5, align 4 |
71 %vec6 = insertelement <8 x float> %vec4, float %d, i32 3 | 71 %vec6 = insertelement <8 x float> %vec4, float %d, i32 3 |
72 %idx7 = getelementptr inbounds float* %ptr, i64 4 | 72 %idx7 = getelementptr inbounds float, float* %ptr, i64 4 |
73 %e = load float* %idx7, align 4 | 73 %e = load float, float* %idx7, align 4 |
74 %vec8 = insertelement <8 x float> %vec6, float %e, i32 4 | 74 %vec8 = insertelement <8 x float> %vec6, float %e, i32 4 |
75 %idx9 = getelementptr inbounds float* %ptr, i64 5 | 75 %idx9 = getelementptr inbounds float, float* %ptr, i64 5 |
76 %f = load float* %idx9, align 4 | 76 %f = load float, float* %idx9, align 4 |
77 %vec10 = insertelement <8 x float> %vec8, float %f, i32 5 | 77 %vec10 = insertelement <8 x float> %vec8, float %f, i32 5 |
78 %idx11 = getelementptr inbounds float* %ptr, i64 6 | 78 %idx11 = getelementptr inbounds float, float* %ptr, i64 6 |
79 %g = load float* %idx11, align 4 | 79 %g = load float, float* %idx11, align 4 |
80 %vec12 = insertelement <8 x float> %vec10, float %g, i32 6 | 80 %vec12 = insertelement <8 x float> %vec10, float %g, i32 6 |
81 %idx13 = getelementptr inbounds float* %ptr, i64 7 | 81 %idx13 = getelementptr inbounds float, float* %ptr, i64 7 |
82 %h = load float* %idx13, align 4 | 82 %h = load float, float* %idx13, align 4 |
83 %vec14 = insertelement <8 x float> %vec12, float %h, i32 7 | 83 %vec14 = insertelement <8 x float> %vec12, float %h, i32 7 |
84 ret <8 x float> %vec14 | 84 ret <8 x float> %vec14 |
85 | 85 |
86 ; ALL-LABEL: merge_8_floats | 86 ; ALL-LABEL: merge_8_floats |
87 | 87 |
92 ; SLOW32-NEXT: vinsertf128 | 92 ; SLOW32-NEXT: vinsertf128 |
93 ; SLOW32-NEXT: retq | 93 ; SLOW32-NEXT: retq |
94 } | 94 } |
95 | 95 |
96 define <4 x double> @merge_4_doubles(double* %ptr) { | 96 define <4 x double> @merge_4_doubles(double* %ptr) { |
97 %a = load double* %ptr, align 8 | 97 %a = load double, double* %ptr, align 8 |
98 %vec = insertelement <4 x double> undef, double %a, i32 0 | 98 %vec = insertelement <4 x double> undef, double %a, i32 0 |
99 %idx1 = getelementptr inbounds double* %ptr, i64 1 | 99 %idx1 = getelementptr inbounds double, double* %ptr, i64 1 |
100 %b = load double* %idx1, align 8 | 100 %b = load double, double* %idx1, align 8 |
101 %vec2 = insertelement <4 x double> %vec, double %b, i32 1 | 101 %vec2 = insertelement <4 x double> %vec, double %b, i32 1 |
102 %idx3 = getelementptr inbounds double* %ptr, i64 2 | 102 %idx3 = getelementptr inbounds double, double* %ptr, i64 2 |
103 %c = load double* %idx3, align 8 | 103 %c = load double, double* %idx3, align 8 |
104 %vec4 = insertelement <4 x double> %vec2, double %c, i32 2 | 104 %vec4 = insertelement <4 x double> %vec2, double %c, i32 2 |
105 %idx5 = getelementptr inbounds double* %ptr, i64 3 | 105 %idx5 = getelementptr inbounds double, double* %ptr, i64 3 |
106 %d = load double* %idx5, align 8 | 106 %d = load double, double* %idx5, align 8 |
107 %vec6 = insertelement <4 x double> %vec4, double %d, i32 3 | 107 %vec6 = insertelement <4 x double> %vec4, double %d, i32 3 |
108 ret <4 x double> %vec6 | 108 ret <4 x double> %vec6 |
109 | 109 |
110 ; ALL-LABEL: merge_4_doubles | 110 ; ALL-LABEL: merge_4_doubles |
111 ; FAST32: vmovups | 111 ; FAST32: vmovups |
118 | 118 |
119 ; PR21771 ( http://llvm.org/bugs/show_bug.cgi?id=21771 ) | 119 ; PR21771 ( http://llvm.org/bugs/show_bug.cgi?id=21771 ) |
120 ; Recognize and combine consecutive loads even when the | 120 ; Recognize and combine consecutive loads even when the |
121 ; first of the combined loads is offset from the base address. | 121 ; first of the combined loads is offset from the base address. |
122 define <4 x double> @merge_4_doubles_offset(double* %ptr) { | 122 define <4 x double> @merge_4_doubles_offset(double* %ptr) { |
123 %arrayidx4 = getelementptr inbounds double* %ptr, i64 4 | 123 %arrayidx4 = getelementptr inbounds double, double* %ptr, i64 4 |
124 %arrayidx5 = getelementptr inbounds double* %ptr, i64 5 | 124 %arrayidx5 = getelementptr inbounds double, double* %ptr, i64 5 |
125 %arrayidx6 = getelementptr inbounds double* %ptr, i64 6 | 125 %arrayidx6 = getelementptr inbounds double, double* %ptr, i64 6 |
126 %arrayidx7 = getelementptr inbounds double* %ptr, i64 7 | 126 %arrayidx7 = getelementptr inbounds double, double* %ptr, i64 7 |
127 %e = load double* %arrayidx4, align 8 | 127 %e = load double, double* %arrayidx4, align 8 |
128 %f = load double* %arrayidx5, align 8 | 128 %f = load double, double* %arrayidx5, align 8 |
129 %g = load double* %arrayidx6, align 8 | 129 %g = load double, double* %arrayidx6, align 8 |
130 %h = load double* %arrayidx7, align 8 | 130 %h = load double, double* %arrayidx7, align 8 |
131 %vecinit4 = insertelement <4 x double> undef, double %e, i32 0 | 131 %vecinit4 = insertelement <4 x double> undef, double %e, i32 0 |
132 %vecinit5 = insertelement <4 x double> %vecinit4, double %f, i32 1 | 132 %vecinit5 = insertelement <4 x double> %vecinit4, double %f, i32 1 |
133 %vecinit6 = insertelement <4 x double> %vecinit5, double %g, i32 2 | 133 %vecinit6 = insertelement <4 x double> %vecinit5, double %g, i32 2 |
134 %vecinit7 = insertelement <4 x double> %vecinit6, double %h, i32 3 | 134 %vecinit7 = insertelement <4 x double> %vecinit6, double %h, i32 3 |
135 ret <4 x double> %vecinit7 | 135 ret <4 x double> %vecinit7 |