95
|
1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
2
|
77
|
3 declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32)
|
|
4 declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32)
|
|
5 declare <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double>, i8*, <8 x i32>, i8, i32)
|
|
6 declare void @llvm.x86.avx512.scatter.dpd.512 (i8*, i8, <8 x i32>, <8 x double>, i32)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
7
|
77
|
8 declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float>, i8*, <8 x i64>, i8, i32)
|
|
9 declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>, i32)
|
|
10 declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, i8*, <8 x i64>, i8, i32)
|
|
11 declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
12
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
13 define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf) {
|
95
|
14 ; CHECK-LABEL: gather_mask_dps:
|
|
15 ; CHECK: ## BB#0:
|
|
16 ; CHECK-NEXT: kmovw %edi, %k1
|
|
17 ; CHECK-NEXT: kmovw %k1, %k2
|
|
18 ; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k2}
|
|
19 ; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
|
|
20 ; CHECK-NEXT: vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
|
|
21 ; CHECK-NEXT: retq
|
77
|
22 %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
23 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
77
|
24 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
25 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
26 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
27
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
28 define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
95
|
29 ; CHECK-LABEL: gather_mask_dpd:
|
|
30 ; CHECK: ## BB#0:
|
|
31 ; CHECK-NEXT: kmovb %edi, %k1
|
|
32 ; CHECK-NEXT: kmovw %k1, %k2
|
|
33 ; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k2}
|
|
34 ; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
|
|
35 ; CHECK-NEXT: vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
|
|
36 ; CHECK-NEXT: retq
|
77
|
37 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
38 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
77
|
39 call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
40 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
41 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
42
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
43 define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
95
|
44 ; CHECK-LABEL: gather_mask_qps:
|
|
45 ; CHECK: ## BB#0:
|
|
46 ; CHECK-NEXT: kmovb %edi, %k1
|
|
47 ; CHECK-NEXT: kmovw %k1, %k2
|
|
48 ; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k2}
|
|
49 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
|
|
50 ; CHECK-NEXT: vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
|
|
51 ; CHECK-NEXT: retq
|
77
|
52 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
53 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
|
77
|
54 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
55 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
56 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
57
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
58 define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
95
|
59 ; CHECK-LABEL: gather_mask_qpd:
|
|
60 ; CHECK: ## BB#0:
|
|
61 ; CHECK-NEXT: kmovb %edi, %k1
|
|
62 ; CHECK-NEXT: kmovw %k1, %k2
|
|
63 ; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k2}
|
|
64 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
|
|
65 ; CHECK-NEXT: vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
|
|
66 ; CHECK-NEXT: retq
|
77
|
67 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
68 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
|
77
|
69 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
70 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
71 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
72 ;;
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
73 ;; Integer Gather/Scatter
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
74 ;;
|
77
|
75 declare <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>, i8*, <16 x i32>, i16, i32)
|
|
76 declare void @llvm.x86.avx512.scatter.dpi.512 (i8*, i16, <16 x i32>, <16 x i32>, i32)
|
|
77 declare <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64>, i8*, <8 x i32>, i8, i32)
|
|
78 declare void @llvm.x86.avx512.scatter.dpq.512 (i8*, i8, <8 x i32>, <8 x i64>, i32)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
79
|
77
|
80 declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32>, i8*, <8 x i64>, i8, i32)
|
|
81 declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i32)
|
|
82 declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, <8 x i64>, i8, i32)
|
|
83 declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
84
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
85 define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf) {
|
95
|
86 ; CHECK-LABEL: gather_mask_dd:
|
|
87 ; CHECK: ## BB#0:
|
|
88 ; CHECK-NEXT: kmovw %edi, %k1
|
|
89 ; CHECK-NEXT: kmovw %k1, %k2
|
|
90 ; CHECK-NEXT: vpgatherdd (%rsi,%zmm0,4), %zmm1 {%k2}
|
|
91 ; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
|
|
92 ; CHECK-NEXT: vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
|
|
93 ; CHECK-NEXT: retq
|
77
|
94 %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
95 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
77
|
96 call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
97 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
98 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
99
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
100 define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
95
|
101 ; CHECK-LABEL: gather_mask_qd:
|
|
102 ; CHECK: ## BB#0:
|
|
103 ; CHECK-NEXT: kmovb %edi, %k1
|
|
104 ; CHECK-NEXT: kmovw %k1, %k2
|
|
105 ; CHECK-NEXT: vpgatherqd (%rsi,%zmm0,4), %ymm1 {%k2}
|
|
106 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
|
|
107 ; CHECK-NEXT: vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
|
|
108 ; CHECK-NEXT: retq
|
77
|
109 %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
110 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
|
77
|
111 call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
112 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
113 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
114
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
115 define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
95
|
116 ; CHECK-LABEL: gather_mask_qq:
|
|
117 ; CHECK: ## BB#0:
|
|
118 ; CHECK-NEXT: kmovb %edi, %k1
|
|
119 ; CHECK-NEXT: kmovw %k1, %k2
|
|
120 ; CHECK-NEXT: vpgatherqq (%rsi,%zmm0,4), %zmm1 {%k2}
|
|
121 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
|
|
122 ; CHECK-NEXT: vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
|
|
123 ; CHECK-NEXT: retq
|
77
|
124 %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
125 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
|
77
|
126 call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
127 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
128 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
129
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
130 define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) {
|
95
|
131 ; CHECK-LABEL: gather_mask_dq:
|
|
132 ; CHECK: ## BB#0:
|
|
133 ; CHECK-NEXT: kmovb %edi, %k1
|
|
134 ; CHECK-NEXT: kmovw %k1, %k2
|
|
135 ; CHECK-NEXT: vpgatherdq (%rsi,%ymm0,4), %zmm1 {%k2}
|
|
136 ; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
|
|
137 ; CHECK-NEXT: vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
|
|
138 ; CHECK-NEXT: retq
|
77
|
139 %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
140 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
77
|
141 call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4)
|
|
142 ret void
|
|
143 }
|
|
144
|
|
145 define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) {
|
95
|
146 ; CHECK-LABEL: gather_mask_dpd_execdomain:
|
|
147 ; CHECK: ## BB#0:
|
|
148 ; CHECK-NEXT: kmovb %edi, %k1
|
|
149 ; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k1}
|
|
150 ; CHECK-NEXT: vmovapd %zmm1, (%rdx)
|
|
151 ; CHECK-NEXT: retq
|
77
|
152 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
|
|
153 store <8 x double> %x, <8 x double>* %stbuf
|
|
154 ret void
|
|
155 }
|
|
156
|
|
157 define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) {
|
95
|
158 ; CHECK-LABEL: gather_mask_qpd_execdomain:
|
|
159 ; CHECK: ## BB#0:
|
|
160 ; CHECK-NEXT: kmovb %edi, %k1
|
|
161 ; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k1}
|
|
162 ; CHECK-NEXT: vmovapd %zmm1, (%rdx)
|
|
163 ; CHECK-NEXT: retq
|
77
|
164 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
|
|
165 store <8 x double> %x, <8 x double>* %stbuf
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
166 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
167 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
168
|
77
|
169 define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base) {
|
95
|
170 ; CHECK-LABEL: gather_mask_dps_execdomain:
|
|
171 ; CHECK: ## BB#0:
|
|
172 ; CHECK-NEXT: kmovw %edi, %k1
|
|
173 ; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k1}
|
|
174 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
|
175 ; CHECK-NEXT: retq
|
77
|
176 %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
|
|
177 ret <16 x float> %res;
|
|
178 }
|
|
179
|
|
180 define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base) {
|
95
|
181 ; CHECK-LABEL: gather_mask_qps_execdomain:
|
|
182 ; CHECK: ## BB#0:
|
|
183 ; CHECK-NEXT: kmovb %edi, %k1
|
|
184 ; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k1}
|
|
185 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
|
186 ; CHECK-NEXT: retq
|
77
|
187 %res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
|
|
188 ret <8 x float> %res;
|
|
189 }
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
190
|
77
|
191 define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) {
|
95
|
192 ; CHECK-LABEL: scatter_mask_dpd_execdomain:
|
|
193 ; CHECK: ## BB#0:
|
|
194 ; CHECK-NEXT: kmovb %esi, %k1
|
|
195 ; CHECK-NEXT: vmovapd (%rdi), %zmm1
|
|
196 ; CHECK-NEXT: vscatterdpd %zmm1, (%rcx,%ymm0,4) {%k1}
|
|
197 ; CHECK-NEXT: retq
|
|
198 %x = load <8 x double>, <8 x double>* %src, align 64
|
77
|
199 call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4)
|
|
200 ret void
|
|
201 }
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
202
|
77
|
203 define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) {
|
95
|
204 ; CHECK-LABEL: scatter_mask_qpd_execdomain:
|
|
205 ; CHECK: ## BB#0:
|
|
206 ; CHECK-NEXT: kmovb %esi, %k1
|
|
207 ; CHECK-NEXT: vmovapd (%rdi), %zmm1
|
|
208 ; CHECK-NEXT: vscatterqpd %zmm1, (%rcx,%zmm0,4) {%k1}
|
|
209 ; CHECK-NEXT: retq
|
|
210 %x = load <8 x double>, <8 x double>* %src, align 64
|
77
|
211 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4)
|
|
212 ret void
|
|
213 }
|
|
214
|
|
215 define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf) {
|
95
|
216 ; CHECK-LABEL: scatter_mask_dps_execdomain:
|
|
217 ; CHECK: ## BB#0:
|
|
218 ; CHECK-NEXT: kmovw %esi, %k1
|
|
219 ; CHECK-NEXT: vmovaps (%rdi), %zmm1
|
|
220 ; CHECK-NEXT: vscatterdps %zmm1, (%rcx,%zmm0,4) {%k1}
|
|
221 ; CHECK-NEXT: retq
|
|
222 %x = load <16 x float>, <16 x float>* %src, align 64
|
77
|
223 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4)
|
|
224 ret void
|
|
225 }
|
|
226
|
|
227 define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf) {
|
95
|
228 ; CHECK-LABEL: scatter_mask_qps_execdomain:
|
|
229 ; CHECK: ## BB#0:
|
|
230 ; CHECK-NEXT: kmovb %esi, %k1
|
|
231 ; CHECK-NEXT: vmovaps (%rdi), %ymm1
|
|
232 ; CHECK-NEXT: vscatterqps %ymm1, (%rcx,%zmm0,4) {%k1}
|
|
233 ; CHECK-NEXT: retq
|
|
234 %x = load <8 x float>, <8 x float>* %src, align 32
|
77
|
235 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
236 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
237 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
238
|
77
|
239 define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf) {
|
95
|
240 ; CHECK-LABEL: gather_qps:
|
|
241 ; CHECK: ## BB#0:
|
|
242 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
243 ; CHECK-NEXT: kxnorw %k2, %k2, %k2
|
|
244 ; CHECK-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2}
|
|
245 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
|
|
246 ; CHECK-NEXT: vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1}
|
|
247 ; CHECK-NEXT: retq
|
77
|
248 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 -1, i32 4)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
249 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
|
77
|
250 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
251 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
252 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
253
|
77
|
254 declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
|
|
255 declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
|
|
256 define void @prefetch(<8 x i64> %ind, i8* %base) {
|
95
|
257 ; CHECK-LABEL: prefetch:
|
|
258 ; CHECK: ## BB#0:
|
|
259 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
260 ; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1}
|
|
261 ; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1}
|
|
262 ; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1}
|
|
263 ; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1}
|
|
264 ; CHECK-NEXT: retq
|
77
|
265 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0)
|
|
266 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1)
|
|
267 call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 0)
|
|
268 call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 1)
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
269 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
270 }
|
95
|
271
|
|
272
|
|
273 declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, i8*, <2 x i64>, i8, i32)
|
|
274
|
|
275 define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
|
|
276 ; CHECK-LABEL: test_int_x86_avx512_gather3div2_df:
|
|
277 ; CHECK: ## BB#0:
|
|
278 ; CHECK-NEXT: kmovb %esi, %k1
|
|
279 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
|
280 ; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,4), %xmm2 {%k1}
|
|
281 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
282 ; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,0), %xmm0 {%k1}
|
|
283 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
|
|
284 ; CHECK-NEXT: retq
|
|
285 %res = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
|
|
286 %res1 = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 0)
|
|
287 %res2 = fadd <2 x double> %res, %res1
|
|
288 ret <2 x double> %res2
|
|
289 }
|
|
290
|
|
291 declare <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64>, i8*, <2 x i64>, i8, i32)
|
|
292
|
|
293 define <4 x i32>@test_int_x86_avx512_gather3div2_di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
|
|
294 ; CHECK-LABEL: test_int_x86_avx512_gather3div2_di:
|
|
295 ; CHECK: ## BB#0:
|
|
296 ; CHECK-NEXT: kmovb %esi, %k1
|
|
297 ; CHECK-NEXT: vpgatherqq (%rdi,%xmm1,8), %xmm0 {%k1}
|
|
298 ; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
|
|
299 ; CHECK-NEXT: retq
|
|
300 %res = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8)
|
|
301 %res1 = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8)
|
|
302 %res2 = add <4 x i32> %res, %res1
|
|
303 ret <4 x i32> %res2
|
|
304 }
|
|
305
|
|
306 declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, i8*, <4 x i64>, i8, i32)
|
|
307
|
|
308 define <4 x double>@test_int_x86_avx512_gather3div4_df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
|
|
309 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_df:
|
|
310 ; CHECK: ## BB#0:
|
|
311 ; CHECK-NEXT: kmovb %esi, %k1
|
|
312 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
|
313 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,4), %ymm2 {%k1}
|
|
314 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
315 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,0), %ymm0 {%k1}
|
|
316 ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
|
|
317 ; CHECK-NEXT: retq
|
|
318 %res = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
|
|
319 %res1 = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 0)
|
|
320 %res2 = fadd <4 x double> %res, %res1
|
|
321 ret <4 x double> %res2
|
|
322 }
|
|
323
|
|
324 declare <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64>, i8*, <4 x i64>, i8, i32)
|
|
325
|
|
326 define <8 x i32>@test_int_x86_avx512_gather3div4_di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
|
|
327 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_di:
|
|
328 ; CHECK: ## BB#0:
|
|
329 ; CHECK-NEXT: kmovb %esi, %k1
|
|
330 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
|
331 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1}
|
|
332 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
333 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1}
|
|
334 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
|
|
335 ; CHECK-NEXT: retq
|
|
336 %res = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 8)
|
|
337 %res1 = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 8)
|
|
338 %res2 = add <8 x i32> %res, %res1
|
|
339 ret <8 x i32> %res2
|
|
340 }
|
|
341
|
|
342 declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, i8*, <2 x i64>, i8, i32)
|
|
343
|
|
344 define <4 x float>@test_int_x86_avx512_gather3div4_sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
|
|
345 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_sf:
|
|
346 ; CHECK: ## BB#0:
|
|
347 ; CHECK-NEXT: kmovb %esi, %k1
|
|
348 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
|
349 ; CHECK-NEXT: vgatherqps (%rdi,%xmm1,4), %xmm2 {%k1}
|
|
350 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
351 ; CHECK-NEXT: vgatherqps (%rdi,%xmm1,0), %xmm0 {%k1}
|
|
352 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
|
|
353 ; CHECK-NEXT: retq
|
|
354 %res = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
|
|
355 %res1 = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 0)
|
|
356 %res2 = fadd <4 x float> %res, %res1
|
|
357 ret <4 x float> %res2
|
|
358 }
|
|
359
|
|
360 declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, i8*, <2 x i64>, i8, i32)
|
|
361
|
|
362 define <4 x i32>@test_int_x86_avx512_gather3div4_si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
|
|
363 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_si:
|
|
364 ; CHECK: ## BB#0:
|
|
365 ; CHECK-NEXT: kmovb %esi, %k1
|
|
366 ; CHECK-NEXT: kxnorw %k2, %k2, %k2
|
|
367 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
|
368 ; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2}
|
|
369 ; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm0 {%k1}
|
|
370 ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
|
|
371 ; CHECK-NEXT: retq
|
|
372 %res = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 4)
|
|
373 %res1 = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
|
|
374 %res2 = add <4 x i32> %res, %res1
|
|
375 ret <4 x i32> %res2
|
|
376 }
|
|
377
|
|
378 declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, i8*, <4 x i64>, i8, i32)
|
|
379
|
|
380 define <4 x float>@test_int_x86_avx512_gather3div8_sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
|
|
381 ; CHECK-LABEL: test_int_x86_avx512_gather3div8_sf:
|
|
382 ; CHECK: ## BB#0:
|
|
383 ; CHECK-NEXT: kmovb %esi, %k1
|
|
384 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
|
385 ; CHECK-NEXT: vgatherqps (%rdi,%ymm1,4), %xmm2 {%k1}
|
|
386 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
387 ; CHECK-NEXT: vgatherqps (%rdi,%ymm1,0), %xmm0 {%k1}
|
|
388 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
|
|
389 ; CHECK-NEXT: retq
|
|
390 %res = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
|
|
391 %res1 = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 0)
|
|
392 %res2 = fadd <4 x float> %res, %res1
|
|
393 ret <4 x float> %res2
|
|
394 }
|
|
395
|
|
396 declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, i8*, <4 x i64>, i8, i32)
|
|
397
|
|
398 define <4 x i32>@test_int_x86_avx512_gather3div8_si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
|
|
399 ; CHECK-LABEL: test_int_x86_avx512_gather3div8_si:
|
|
400 ; CHECK: ## BB#0:
|
|
401 ; CHECK-NEXT: kmovb %esi, %k1
|
|
402 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
|
403 ; CHECK-NEXT: kmovw %k1, %k2
|
|
404 ; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2}
|
|
405 ; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,2), %xmm0 {%k1}
|
|
406 ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
|
|
407 ; CHECK-NEXT: retq
|
|
408 %res = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
|
|
409 %res1 = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 2)
|
|
410 %res2 = add <4 x i32> %res, %res1
|
|
411 ret <4 x i32> %res2
|
|
412 }
|
|
413
|
|
414 declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, i8*, <4 x i32>, i8, i32)
|
|
415
|
|
416 define <2 x double>@test_int_x86_avx512_gather3siv2_df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
|
417 ; CHECK-LABEL: test_int_x86_avx512_gather3siv2_df:
|
|
418 ; CHECK: ## BB#0:
|
|
419 ; CHECK-NEXT: kmovb %esi, %k1
|
|
420 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
|
421 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %xmm2 {%k1}
|
|
422 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
423 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,0), %xmm0 {%k1}
|
|
424 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
|
|
425 ; CHECK-NEXT: retq
|
|
426 %res = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
|
|
427 %res1 = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
|
|
428 %res2 = fadd <2 x double> %res, %res1
|
|
429 ret <2 x double> %res2
|
|
430 }
|
|
431
|
|
432 declare <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, i8*, <4 x i32>, i8, i32)
|
|
433
|
|
434 define <4 x i32>@test_int_x86_avx512_gather3siv2_di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
|
435 ; CHECK-LABEL: test_int_x86_avx512_gather3siv2_di:
|
|
436 ; CHECK: ## BB#0:
|
|
437 ; CHECK-NEXT: kmovb %esi, %k1
|
|
438 ; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1}
|
|
439 ; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
|
|
440 ; CHECK-NEXT: retq
|
|
441 %res = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
|
|
442 %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
|
|
443 %res2 = add <4 x i32> %res, %res1
|
|
444 ret <4 x i32> %res2
|
|
445 }
|
|
446
|
|
447 declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, i8*, <4 x i32>, i8, i32)
|
|
448
|
|
449 define <4 x double>@test_int_x86_avx512_gather3siv4_df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
|
450 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_df:
|
|
451 ; CHECK: ## BB#0:
|
|
452 ; CHECK-NEXT: kmovb %esi, %k1
|
|
453 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
|
454 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %ymm2 {%k1}
|
|
455 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
456 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,0), %ymm0 {%k1}
|
|
457 ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
|
|
458 ; CHECK-NEXT: retq
|
|
459 %res = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
|
|
460 %res1 = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
|
|
461 %res2 = fadd <4 x double> %res, %res1
|
|
462 ret <4 x double> %res2
|
|
463 }
|
|
464
|
|
465 declare <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, i8*, <4 x i32>, i8, i32)
|
|
466
|
|
467 define <8 x i32>@test_int_x86_avx512_gather3siv4_di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
|
468 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_di:
|
|
469 ; CHECK: ## BB#0:
|
|
470 ; CHECK-NEXT: kmovb %esi, %k1
|
|
471 ; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %ymm0 {%k1}
|
|
472 ; CHECK-NEXT: vpaddd %ymm0, %ymm0, %ymm0
|
|
473 ; CHECK-NEXT: retq
|
|
474 %res = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
|
|
475 %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
|
|
476 %res2 = add <8 x i32> %res, %res1
|
|
477 ret <8 x i32> %res2
|
|
478 }
|
|
479
|
|
480 declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, i8*, <4 x i32>, i8, i32)
|
|
481
|
|
482 define <4 x float>@test_int_x86_avx512_gather3siv4_sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
|
483 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_sf:
|
|
484 ; CHECK: ## BB#0:
|
|
485 ; CHECK-NEXT: kmovb %esi, %k1
|
|
486 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
|
487 ; CHECK-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm2 {%k1}
|
|
488 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
489 ; CHECK-NEXT: vgatherdps (%rdi,%xmm1,0), %xmm0 {%k1}
|
|
490 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
|
|
491 ; CHECK-NEXT: retq
|
|
492 %res = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
|
|
493 %res1 = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
|
|
494 %res2 = fadd <4 x float> %res, %res1
|
|
495 ret <4 x float> %res2
|
|
496 }
|
|
497
|
|
498 declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, i8*, <4 x i32>, i8, i32)
|
|
499
|
|
500 define <4 x i32>@test_int_x86_avx512_gather3siv4_si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
|
|
501 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_si:
|
|
502 ; CHECK: ## BB#0:
|
|
503 ; CHECK-NEXT: kmovb %esi, %k1
|
|
504 ; CHECK-NEXT: kxnorw %k2, %k2, %k2
|
|
505 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
|
506 ; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2}
|
|
507 ; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,0), %xmm0 {%k1}
|
|
508 ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
|
|
509 ; CHECK-NEXT: retq
|
|
510 %res = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 4)
|
|
511 %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 0)
|
|
512 %res2 = add <4 x i32> %res, %res1
|
|
513 ret <4 x i32> %res2
|
|
514 }
|
|
515
|
|
516 declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, i8*, <8 x i32>, i8, i32)
|
|
517
|
|
518 define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
|
|
519 ; CHECK-LABEL: test_int_x86_avx512_gather3siv8_sf:
|
|
520 ; CHECK: ## BB#0:
|
|
521 ; CHECK-NEXT: kmovb %esi, %k1
|
|
522 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
|
523 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm2 {%k1}
|
|
524 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
525 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,0), %ymm0 {%k1}
|
|
526 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
|
|
527 ; CHECK-NEXT: retq
|
|
528 %res = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
|
|
529 %res1 = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 -1, i32 0)
|
|
530 %res2 = fadd <8 x float> %res, %res1
|
|
531 ret <8 x float> %res2
|
|
532 }
|
|
533
|
|
534 declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, i8*, <8 x i32>, i8, i32)
|
|
535
|
|
536 define <8 x i32>@test_int_x86_avx512_gather3siv8_si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
|
|
537 ; CHECK-LABEL: test_int_x86_avx512_gather3siv8_si:
|
|
538 ; CHECK: ## BB#0:
|
|
539 ; CHECK-NEXT: kmovb %esi, %k1
|
|
540 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
|
|
541 ; CHECK-NEXT: kmovw %k1, %k2
|
|
542 ; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2}
|
|
543 ; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,0), %ymm0 {%k1}
|
|
544 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
|
|
545 ; CHECK-NEXT: retq
|
|
546 %res = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
|
|
547 %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 0)
|
|
548 %res2 = add <8 x i32> %res, %res1
|
|
549 ret <8 x i32> %res2
|
|
550 }
|
|
551
|
|
552 declare void @llvm.x86.avx512.scatterdiv2.df(i8*, i8, <2 x i64>, <2 x double>, i32)
|
|
553
|
|
554 define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3) {
|
|
555 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df:
|
|
556 ; CHECK: ## BB#0:
|
|
557 ; CHECK-NEXT: kmovb %esi, %k1
|
|
558 ; CHECK-NEXT: kxnorw %k2, %k2, %k2
|
|
559 ; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,0) {%k2}
|
|
560 ; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
561 ; CHECK-NEXT: retq
|
|
562 call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 -1, <2 x i64> %x2, <2 x double> %x3, i32 0)
|
|
563 call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3, i32 4)
|
|
564 ret void
|
|
565 }
|
|
566
|
|
567 declare void @llvm.x86.avx512.scatterdiv2.di(i8*, i8, <2 x i64>, <2 x i64>, i32)
|
|
568
|
|
569 define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3) {
|
|
570 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_di:
|
|
571 ; CHECK: ## BB#0:
|
|
572 ; CHECK-NEXT: kmovb %esi, %k1
|
|
573 ; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,0) {%k1}
|
|
574 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
575 ; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
576 ; CHECK-NEXT: retq
|
|
577 call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 0)
|
|
578 call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 -1, <2 x i64> %x2, <2 x i64> %x3, i32 4)
|
|
579 ret void
|
|
580 }
|
|
581
|
|
582 declare void @llvm.x86.avx512.scatterdiv4.df(i8*, i8, <4 x i64>, <4 x double>, i32)
|
|
583
|
|
584 define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3) {
|
|
585 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_df:
|
|
586 ; CHECK: ## BB#0:
|
|
587 ; CHECK-NEXT: kmovb %esi, %k1
|
|
588 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,0) {%k1}
|
|
589 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
590 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1}
|
|
591 ; CHECK-NEXT: retq
|
|
592 call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 0)
|
|
593 call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 -1, <4 x i64> %x2, <4 x double> %x3, i32 4)
|
|
594 ret void
|
|
595 }
|
|
596
|
|
597 declare void @llvm.x86.avx512.scatterdiv4.di(i8*, i8, <4 x i64>, <4 x i64>, i32)
|
|
598
|
|
599 define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3) {
|
|
600 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_di:
|
|
601 ; CHECK: ## BB#0:
|
|
602 ; CHECK-NEXT: kmovb %esi, %k1
|
|
603 ; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,0) {%k1}
|
|
604 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
605 ; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1}
|
|
606 ; CHECK-NEXT: retq
|
|
607 call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 0)
|
|
608 call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i64> %x3, i32 4)
|
|
609 ret void
|
|
610 }
|
|
611
|
|
612 declare void @llvm.x86.avx512.scatterdiv4.sf(i8*, i8, <2 x i64>, <4 x float>, i32)
|
|
613
|
|
614 define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3) {
|
|
615 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_sf:
|
|
616 ; CHECK: ## BB#0:
|
|
617 ; CHECK-NEXT: kmovb %esi, %k1
|
|
618 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,0) {%k1}
|
|
619 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
620 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
621 ; CHECK-NEXT: retq
|
|
622 call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 0)
|
|
623 call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 -1, <2 x i64> %x2, <4 x float> %x3, i32 4)
|
|
624 ret void
|
|
625 }
|
|
626
|
|
627 declare void @llvm.x86.avx512.scatterdiv4.si(i8*, i8, <2 x i64>, <4 x i32>, i32)
|
|
628
|
|
629 define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3) {
|
|
630 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si:
|
|
631 ; CHECK: ## BB#0:
|
|
632 ; CHECK-NEXT: kmovb %esi, %k1
|
|
633 ; CHECK-NEXT: kxnorw %k2, %k2, %k2
|
|
634 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,0) {%k2}
|
|
635 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
636 ; CHECK-NEXT: retq
|
|
637 call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 -1, <2 x i64> %x2, <4 x i32> %x3, i32 0)
|
|
638 call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3, i32 4)
|
|
639 ret void
|
|
640 }
|
|
641
|
|
642 declare void @llvm.x86.avx512.scatterdiv8.sf(i8*, i8, <4 x i64>, <4 x float>, i32)
|
|
643
|
|
644 define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3) {
|
|
645 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_sf:
|
|
646 ; CHECK: ## BB#0:
|
|
647 ; CHECK-NEXT: kmovb %esi, %k1
|
|
648 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,0) {%k1}
|
|
649 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
650 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1}
|
|
651 ; CHECK-NEXT: retq
|
|
652 call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 0)
|
|
653 call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 -1, <4 x i64> %x2, <4 x float> %x3, i32 4)
|
|
654 ret void
|
|
655 }
|
|
656
|
|
657 declare void @llvm.x86.avx512.scatterdiv8.si(i8*, i8, <4 x i64>, <4 x i32>, i32)
|
|
658
|
|
659 define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3) {
|
|
660 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_si:
|
|
661 ; CHECK: ## BB#0:
|
|
662 ; CHECK-NEXT: kmovb %esi, %k1
|
|
663 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,0) {%k1}
|
|
664 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
665 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1}
|
|
666 ; CHECK-NEXT: retq
|
|
667 call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 0)
|
|
668 call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i32> %x3, i32 4)
|
|
669 ret void
|
|
670 }
|
|
671
|
|
672 declare void @llvm.x86.avx512.scattersiv2.df(i8*, i8, <4 x i32>, <2 x double>, i32)
|
|
673
|
|
674 define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3) {
|
|
675 ; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df:
|
|
676 ; CHECK: ## BB#0:
|
|
677 ; CHECK-NEXT: kmovb %esi, %k1
|
|
678 ; CHECK-NEXT: kxnorw %k2, %k2, %k2
|
|
679 ; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,0) {%k2}
|
|
680 ; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
681 ; CHECK-NEXT: retq
|
|
682 call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 -1, <4 x i32> %x2, <2 x double> %x3, i32 0)
|
|
683 call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3, i32 4)
|
|
684 ret void
|
|
685 }
|
|
686
|
|
687 declare void @llvm.x86.avx512.scattersiv2.di(i8*, i8, <4 x i32>, <2 x i64>, i32)
|
|
688
|
|
689 define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3) {
|
|
690 ; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di:
|
|
691 ; CHECK: ## BB#0:
|
|
692 ; CHECK-NEXT: kmovb %esi, %k1
|
|
693 ; CHECK-NEXT: kxnorw %k2, %k2, %k2
|
|
694 ; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,0) {%k2}
|
|
695 ; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
696 ; CHECK-NEXT: retq
|
|
697 call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 -1, <4 x i32> %x2, <2 x i64> %x3, i32 0)
|
|
698 call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3, i32 4)
|
|
699 ret void
|
|
700 }
|
|
701
|
|
702 declare void @llvm.x86.avx512.scattersiv4.df(i8*, i8, <4 x i32>, <4 x double>, i32)
|
|
703
|
|
704 define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3) {
|
|
705 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_df:
|
|
706 ; CHECK: ## BB#0:
|
|
707 ; CHECK-NEXT: kmovb %esi, %k1
|
|
708 ; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,0) {%k1}
|
|
709 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
710 ; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1}
|
|
711 ; CHECK-NEXT: retq
|
|
712 call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 0)
|
|
713 call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 -1, <4 x i32> %x2, <4 x double> %x3, i32 4)
|
|
714 ret void
|
|
715 }
|
|
716
|
|
717 declare void @llvm.x86.avx512.scattersiv4.di(i8*, i8, <4 x i32>, <4 x i64>, i32)
|
|
718
|
|
719 define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3) {
|
|
720 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di:
|
|
721 ; CHECK: ## BB#0:
|
|
722 ; CHECK-NEXT: kmovb %esi, %k1
|
|
723 ; CHECK-NEXT: kxnorw %k2, %k2, %k2
|
|
724 ; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,0) {%k2}
|
|
725 ; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1}
|
|
726 ; CHECK-NEXT: retq
|
|
727 call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i64> %x3, i32 0)
|
|
728 call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3, i32 4)
|
|
729 ret void
|
|
730 }
|
|
731
|
|
732 declare void @llvm.x86.avx512.scattersiv4.sf(i8*, i8, <4 x i32>, <4 x float>, i32)
|
|
733
|
|
734 define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3) {
|
|
735 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_sf:
|
|
736 ; CHECK: ## BB#0:
|
|
737 ; CHECK-NEXT: kmovb %esi, %k1
|
|
738 ; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,0) {%k1}
|
|
739 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
740 ; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
741 ; CHECK-NEXT: retq
|
|
742 call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 0)
|
|
743 call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 -1, <4 x i32> %x2, <4 x float> %x3, i32 4)
|
|
744 ret void
|
|
745 }
|
|
746
|
|
747 declare void @llvm.x86.avx512.scattersiv4.si(i8*, i8, <4 x i32>, <4 x i32>, i32)
|
|
748
|
|
749 define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3) {
|
|
750 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_si:
|
|
751 ; CHECK: ## BB#0:
|
|
752 ; CHECK-NEXT: kmovb %esi, %k1
|
|
753 ; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,0) {%k1}
|
|
754 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
755 ; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1}
|
|
756 ; CHECK-NEXT: retq
|
|
757 call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 0)
|
|
758 call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i32> %x3, i32 4)
|
|
759 ret void
|
|
760 }
|
|
761
|
|
762 declare void @llvm.x86.avx512.scattersiv8.sf(i8*, i8, <8 x i32>, <8 x float>, i32)
|
|
763
|
|
764 define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3) {
|
|
765 ; CHECK-LABEL: test_int_x86_avx512_scattersiv8_sf:
|
|
766 ; CHECK: ## BB#0:
|
|
767 ; CHECK-NEXT: kmovb %esi, %k1
|
|
768 ; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,0) {%k1}
|
|
769 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
770 ; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1}
|
|
771 ; CHECK-NEXT: retq
|
|
772 call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 0)
|
|
773 call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 -1, <8 x i32> %x2, <8 x float> %x3, i32 4)
|
|
774 ret void
|
|
775 }
|
|
776
|
|
777 declare void @llvm.x86.avx512.scattersiv8.si(i8*, i8, <8 x i32>, <8 x i32>, i32)
|
|
778
|
|
779 define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3) {
|
|
780 ; CHECK-LABEL: test_int_x86_avx512_scattersiv8_si:
|
|
781 ; CHECK: ## BB#0:
|
|
782 ; CHECK-NEXT: kmovb %esi, %k1
|
|
783 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,0) {%k1}
|
|
784 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
|
|
785 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
|
|
786 ; CHECK-NEXT: retq
|
|
787 call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 0)
|
|
788 call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 4)
|
|
789 ret void
|
|
790 }
|
|
791
|