comparison test/CodeGen/X86/avx512-gather-scatter-intrin.ll @ 95:afa8332a0e37 LLVM3.8

LLVM 3.8
author Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
date Tue, 13 Oct 2015 17:48:58 +0900
parents 54457678186b
children 7d135dc70f03
comparison
equal deleted inserted replaced
84:f3e34b893a5f 95:afa8332a0e37
1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s 1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
2 2
3 declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32) 3 declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32)
4 declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32) 4 declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32)
5 declare <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double>, i8*, <8 x i32>, i8, i32) 5 declare <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double>, i8*, <8 x i32>, i8, i32)
6 declare void @llvm.x86.avx512.scatter.dpd.512 (i8*, i8, <8 x i32>, <8 x double>, i32) 6 declare void @llvm.x86.avx512.scatter.dpd.512 (i8*, i8, <8 x i32>, <8 x double>, i32)
8 declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float>, i8*, <8 x i64>, i8, i32) 8 declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float>, i8*, <8 x i64>, i8, i32)
9 declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>, i32) 9 declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>, i32)
10 declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, i8*, <8 x i64>, i8, i32) 10 declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, i8*, <8 x i64>, i8, i32)
11 declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32) 11 declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32)
12 12
13 ;CHECK-LABEL: gather_mask_dps
14 ;CHECK: kmovw
15 ;CHECK: vgatherdps
16 ;CHECK: vpadd
17 ;CHECK: vscatterdps
18 ;CHECK: ret
19 define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf) { 13 define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf) {
14 ; CHECK-LABEL: gather_mask_dps:
15 ; CHECK: ## BB#0:
16 ; CHECK-NEXT: kmovw %edi, %k1
17 ; CHECK-NEXT: kmovw %k1, %k2
18 ; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k2}
19 ; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
20 ; CHECK-NEXT: vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
21 ; CHECK-NEXT: retq
20 %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4) 22 %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
21 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 23 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
22 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4) 24 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4)
23 ret void 25 ret void
24 } 26 }
25 27
26 ;CHECK-LABEL: gather_mask_dpd
27 ;CHECK: kmovw
28 ;CHECK: vgatherdpd
29 ;CHECK: vpadd
30 ;CHECK: vscatterdpd
31 ;CHECK: ret
32 define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) { 28 define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) {
29 ; CHECK-LABEL: gather_mask_dpd:
30 ; CHECK: ## BB#0:
31 ; CHECK-NEXT: kmovb %edi, %k1
32 ; CHECK-NEXT: kmovw %k1, %k2
33 ; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k2}
34 ; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
35 ; CHECK-NEXT: vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
36 ; CHECK-NEXT: retq
33 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4) 37 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
34 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 38 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
35 call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4) 39 call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4)
36 ret void 40 ret void
37 } 41 }
38 42
39 ;CHECK-LABEL: gather_mask_qps
40 ;CHECK: kmovw
41 ;CHECK: vgatherqps
42 ;CHECK: vpadd
43 ;CHECK: vscatterqps
44 ;CHECK: ret
45 define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf) { 43 define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf) {
44 ; CHECK-LABEL: gather_mask_qps:
45 ; CHECK: ## BB#0:
46 ; CHECK-NEXT: kmovb %edi, %k1
47 ; CHECK-NEXT: kmovw %k1, %k2
48 ; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k2}
49 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
50 ; CHECK-NEXT: vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
51 ; CHECK-NEXT: retq
46 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) 52 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
47 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 53 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
48 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4) 54 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4)
49 ret void 55 ret void
50 } 56 }
51 57
52 ;CHECK-LABEL: gather_mask_qpd
53 ;CHECK: kmovw
54 ;CHECK: vgatherqpd
55 ;CHECK: vpadd
56 ;CHECK: vscatterqpd
57 ;CHECK: ret
58 define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) { 58 define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) {
59 ; CHECK-LABEL: gather_mask_qpd:
60 ; CHECK: ## BB#0:
61 ; CHECK-NEXT: kmovb %edi, %k1
62 ; CHECK-NEXT: kmovw %k1, %k2
63 ; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k2}
64 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
65 ; CHECK-NEXT: vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
66 ; CHECK-NEXT: retq
59 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) 67 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
60 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 68 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
61 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4) 69 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4)
62 ret void 70 ret void
63 } 71 }
72 declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32>, i8*, <8 x i64>, i8, i32) 80 declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32>, i8*, <8 x i64>, i8, i32)
73 declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i32) 81 declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i32)
74 declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, <8 x i64>, i8, i32) 82 declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, <8 x i64>, i8, i32)
75 declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32) 83 declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32)
76 84
77 ;CHECK-LABEL: gather_mask_dd
78 ;CHECK: kmovw
79 ;CHECK: vpgatherdd
80 ;CHECK: vpadd
81 ;CHECK: vpscatterdd
82 ;CHECK: ret
83 define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf) { 85 define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf) {
86 ; CHECK-LABEL: gather_mask_dd:
87 ; CHECK: ## BB#0:
88 ; CHECK-NEXT: kmovw %edi, %k1
89 ; CHECK-NEXT: kmovw %k1, %k2
90 ; CHECK-NEXT: vpgatherdd (%rsi,%zmm0,4), %zmm1 {%k2}
91 ; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
92 ; CHECK-NEXT: vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
93 ; CHECK-NEXT: retq
84 %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4) 94 %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
85 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 95 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
86 call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4) 96 call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4)
87 ret void 97 ret void
88 } 98 }
89 99
90 ;CHECK-LABEL: gather_mask_qd
91 ;CHECK: kmovw
92 ;CHECK: vpgatherqd
93 ;CHECK: vpadd
94 ;CHECK: vpscatterqd
95 ;CHECK: ret
96 define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf) { 100 define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf) {
101 ; CHECK-LABEL: gather_mask_qd:
102 ; CHECK: ## BB#0:
103 ; CHECK-NEXT: kmovb %edi, %k1
104 ; CHECK-NEXT: kmovw %k1, %k2
105 ; CHECK-NEXT: vpgatherqd (%rsi,%zmm0,4), %ymm1 {%k2}
106 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
107 ; CHECK-NEXT: vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
108 ; CHECK-NEXT: retq
97 %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) 109 %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
98 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 110 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
99 call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4) 111 call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4)
100 ret void 112 ret void
101 } 113 }
102 114
103 ;CHECK-LABEL: gather_mask_qq
104 ;CHECK: kmovw
105 ;CHECK: vpgatherqq
106 ;CHECK: vpadd
107 ;CHECK: vpscatterqq
108 ;CHECK: ret
109 define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) { 115 define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) {
116 ; CHECK-LABEL: gather_mask_qq:
117 ; CHECK: ## BB#0:
118 ; CHECK-NEXT: kmovb %edi, %k1
119 ; CHECK-NEXT: kmovw %k1, %k2
120 ; CHECK-NEXT: vpgatherqq (%rsi,%zmm0,4), %zmm1 {%k2}
121 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
122 ; CHECK-NEXT: vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
123 ; CHECK-NEXT: retq
110 %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) 124 %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
111 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 125 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
112 call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4) 126 call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4)
113 ret void 127 ret void
114 } 128 }
115 129
116 ;CHECK-LABEL: gather_mask_dq
117 ;CHECK: kmovw
118 ;CHECK: vpgatherdq
119 ;CHECK: vpadd
120 ;CHECK: vpscatterdq
121 ;CHECK: ret
122 define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) { 130 define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) {
131 ; CHECK-LABEL: gather_mask_dq:
132 ; CHECK: ## BB#0:
133 ; CHECK-NEXT: kmovb %edi, %k1
134 ; CHECK-NEXT: kmovw %k1, %k2
135 ; CHECK-NEXT: vpgatherdq (%rsi,%ymm0,4), %zmm1 {%k2}
136 ; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
137 ; CHECK-NEXT: vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
138 ; CHECK-NEXT: retq
123 %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4) 139 %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
124 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 140 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
125 call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4) 141 call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4)
126 ret void 142 ret void
127 } 143 }
128 144
129
130 ;CHECK-LABEL: gather_mask_dpd_execdomain
131 ;CHECK: vgatherdpd
132 ;CHECK: vmovapd
133 ;CHECK: ret
134 define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) { 145 define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) {
146 ; CHECK-LABEL: gather_mask_dpd_execdomain:
147 ; CHECK: ## BB#0:
148 ; CHECK-NEXT: kmovb %edi, %k1
149 ; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k1}
150 ; CHECK-NEXT: vmovapd %zmm1, (%rdx)
151 ; CHECK-NEXT: retq
135 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4) 152 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
136 store <8 x double> %x, <8 x double>* %stbuf 153 store <8 x double> %x, <8 x double>* %stbuf
137 ret void 154 ret void
138 } 155 }
139 156
140 ;CHECK-LABEL: gather_mask_qpd_execdomain
141 ;CHECK: vgatherqpd
142 ;CHECK: vmovapd
143 ;CHECK: ret
144 define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) { 157 define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) {
158 ; CHECK-LABEL: gather_mask_qpd_execdomain:
159 ; CHECK: ## BB#0:
160 ; CHECK-NEXT: kmovb %edi, %k1
161 ; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k1}
162 ; CHECK-NEXT: vmovapd %zmm1, (%rdx)
163 ; CHECK-NEXT: retq
145 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) 164 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
146 store <8 x double> %x, <8 x double>* %stbuf 165 store <8 x double> %x, <8 x double>* %stbuf
147 ret void 166 ret void
148 } 167 }
149 168
150 ;CHECK-LABEL: gather_mask_dps_execdomain
151 ;CHECK: vgatherdps
152 ;CHECK: vmovaps
153 ;CHECK: ret
154 define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base) { 169 define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base) {
170 ; CHECK-LABEL: gather_mask_dps_execdomain:
171 ; CHECK: ## BB#0:
172 ; CHECK-NEXT: kmovw %edi, %k1
173 ; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k1}
174 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
175 ; CHECK-NEXT: retq
155 %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4) 176 %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
156 ret <16 x float> %res; 177 ret <16 x float> %res;
157 } 178 }
158 179
159 ;CHECK-LABEL: gather_mask_qps_execdomain
160 ;CHECK: vgatherqps
161 ;CHECK: vmovaps
162 ;CHECK: ret
163 define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base) { 180 define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base) {
181 ; CHECK-LABEL: gather_mask_qps_execdomain:
182 ; CHECK: ## BB#0:
183 ; CHECK-NEXT: kmovb %edi, %k1
184 ; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k1}
185 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
186 ; CHECK-NEXT: retq
164 %res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) 187 %res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
165 ret <8 x float> %res; 188 ret <8 x float> %res;
166 } 189 }
167 190
168 ;CHECK-LABEL: scatter_mask_dpd_execdomain
169 ;CHECK: vmovapd
170 ;CHECK: vscatterdpd
171 ;CHECK: ret
172 define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) { 191 define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) {
173 %x = load <8 x double>* %src, align 64 192 ; CHECK-LABEL: scatter_mask_dpd_execdomain:
193 ; CHECK: ## BB#0:
194 ; CHECK-NEXT: kmovb %esi, %k1
195 ; CHECK-NEXT: vmovapd (%rdi), %zmm1
196 ; CHECK-NEXT: vscatterdpd %zmm1, (%rcx,%ymm0,4) {%k1}
197 ; CHECK-NEXT: retq
198 %x = load <8 x double>, <8 x double>* %src, align 64
174 call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4) 199 call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4)
175 ret void 200 ret void
176 } 201 }
177 202
178 ;CHECK-LABEL: scatter_mask_qpd_execdomain
179 ;CHECK: vmovapd
180 ;CHECK: vscatterqpd
181 ;CHECK: ret
182 define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) { 203 define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) {
183 %x = load <8 x double>* %src, align 64 204 ; CHECK-LABEL: scatter_mask_qpd_execdomain:
205 ; CHECK: ## BB#0:
206 ; CHECK-NEXT: kmovb %esi, %k1
207 ; CHECK-NEXT: vmovapd (%rdi), %zmm1
208 ; CHECK-NEXT: vscatterqpd %zmm1, (%rcx,%zmm0,4) {%k1}
209 ; CHECK-NEXT: retq
210 %x = load <8 x double>, <8 x double>* %src, align 64
184 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4) 211 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4)
185 ret void 212 ret void
186 } 213 }
187 214
188 ;CHECK-LABEL: scatter_mask_dps_execdomain
189 ;CHECK: vmovaps
190 ;CHECK: vscatterdps
191 ;CHECK: ret
192 define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf) { 215 define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf) {
193 %x = load <16 x float>* %src, align 64 216 ; CHECK-LABEL: scatter_mask_dps_execdomain:
217 ; CHECK: ## BB#0:
218 ; CHECK-NEXT: kmovw %esi, %k1
219 ; CHECK-NEXT: vmovaps (%rdi), %zmm1
220 ; CHECK-NEXT: vscatterdps %zmm1, (%rcx,%zmm0,4) {%k1}
221 ; CHECK-NEXT: retq
222 %x = load <16 x float>, <16 x float>* %src, align 64
194 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4) 223 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4)
195 ret void 224 ret void
196 } 225 }
197 226
198 ;CHECK-LABEL: scatter_mask_qps_execdomain
199 ;CHECK: vmovaps
200 ;CHECK: vscatterqps
201 ;CHECK: ret
202 define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf) { 227 define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf) {
203 %x = load <8 x float>* %src, align 32 228 ; CHECK-LABEL: scatter_mask_qps_execdomain:
229 ; CHECK: ## BB#0:
230 ; CHECK-NEXT: kmovb %esi, %k1
231 ; CHECK-NEXT: vmovaps (%rdi), %ymm1
232 ; CHECK-NEXT: vscatterqps %ymm1, (%rcx,%zmm0,4) {%k1}
233 ; CHECK-NEXT: retq
234 %x = load <8 x float>, <8 x float>* %src, align 32
204 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4) 235 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4)
205 ret void 236 ret void
206 } 237 }
207 238
208 ;CHECK-LABEL: gather_qps
209 ;CHECK: kxnorw
210 ;CHECK: vgatherqps
211 ;CHECK: vpadd
212 ;CHECK: vscatterqps
213 ;CHECK: ret
214 define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf) { 239 define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf) {
240 ; CHECK-LABEL: gather_qps:
241 ; CHECK: ## BB#0:
242 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
243 ; CHECK-NEXT: kxnorw %k2, %k2, %k2
244 ; CHECK-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2}
245 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
246 ; CHECK-NEXT: vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1}
247 ; CHECK-NEXT: retq
215 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 -1, i32 4) 248 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 -1, i32 4)
216 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 249 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
217 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4) 250 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4)
218 ret void 251 ret void
219 } 252 }
220 253
221 ;CHECK-LABEL: prefetch
222 ;CHECK: gatherpf0
223 ;CHECK: gatherpf1
224 ;CHECK: scatterpf0
225 ;CHECK: scatterpf1
226 ;CHECK: ret
227 declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8* , i32, i32); 254 declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
228 declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32); 255 declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
229 define void @prefetch(<8 x i64> %ind, i8* %base) { 256 define void @prefetch(<8 x i64> %ind, i8* %base) {
257 ; CHECK-LABEL: prefetch:
258 ; CHECK: ## BB#0:
259 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
260 ; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1}
261 ; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1}
262 ; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1}
263 ; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1}
264 ; CHECK-NEXT: retq
230 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0) 265 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0)
231 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1) 266 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1)
232 call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 0) 267 call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 0)
233 call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 1) 268 call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 1)
234 ret void 269 ret void
235 } 270 }
271
272
273 declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, i8*, <2 x i64>, i8, i32)
274
275 define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
276 ; CHECK-LABEL: test_int_x86_avx512_gather3div2_df:
277 ; CHECK: ## BB#0:
278 ; CHECK-NEXT: kmovb %esi, %k1
279 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
280 ; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,4), %xmm2 {%k1}
281 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
282 ; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,0), %xmm0 {%k1}
283 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
284 ; CHECK-NEXT: retq
285 %res = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
286 %res1 = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 0)
287 %res2 = fadd <2 x double> %res, %res1
288 ret <2 x double> %res2
289 }
290
291 declare <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64>, i8*, <2 x i64>, i8, i32)
292
293 define <4 x i32>@test_int_x86_avx512_gather3div2_di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
294 ; CHECK-LABEL: test_int_x86_avx512_gather3div2_di:
295 ; CHECK: ## BB#0:
296 ; CHECK-NEXT: kmovb %esi, %k1
297 ; CHECK-NEXT: vpgatherqq (%rdi,%xmm1,8), %xmm0 {%k1}
298 ; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
299 ; CHECK-NEXT: retq
300 %res = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8)
301 %res1 = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8)
302 %res2 = add <4 x i32> %res, %res1
303 ret <4 x i32> %res2
304 }
305
306 declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, i8*, <4 x i64>, i8, i32)
307
308 define <4 x double>@test_int_x86_avx512_gather3div4_df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
309 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_df:
310 ; CHECK: ## BB#0:
311 ; CHECK-NEXT: kmovb %esi, %k1
312 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
313 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,4), %ymm2 {%k1}
314 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
315 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,0), %ymm0 {%k1}
316 ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
317 ; CHECK-NEXT: retq
318 %res = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
319 %res1 = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 0)
320 %res2 = fadd <4 x double> %res, %res1
321 ret <4 x double> %res2
322 }
323
324 declare <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64>, i8*, <4 x i64>, i8, i32)
325
326 define <8 x i32>@test_int_x86_avx512_gather3div4_di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
327 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_di:
328 ; CHECK: ## BB#0:
329 ; CHECK-NEXT: kmovb %esi, %k1
330 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
331 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1}
332 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
333 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1}
334 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
335 ; CHECK-NEXT: retq
336 %res = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 8)
337 %res1 = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 8)
338 %res2 = add <8 x i32> %res, %res1
339 ret <8 x i32> %res2
340 }
341
342 declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, i8*, <2 x i64>, i8, i32)
343
344 define <4 x float>@test_int_x86_avx512_gather3div4_sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
345 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_sf:
346 ; CHECK: ## BB#0:
347 ; CHECK-NEXT: kmovb %esi, %k1
348 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
349 ; CHECK-NEXT: vgatherqps (%rdi,%xmm1,4), %xmm2 {%k1}
350 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
351 ; CHECK-NEXT: vgatherqps (%rdi,%xmm1,0), %xmm0 {%k1}
352 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
353 ; CHECK-NEXT: retq
354 %res = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
355 %res1 = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 0)
356 %res2 = fadd <4 x float> %res, %res1
357 ret <4 x float> %res2
358 }
359
360 declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, i8*, <2 x i64>, i8, i32)
361
362 define <4 x i32>@test_int_x86_avx512_gather3div4_si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
363 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_si:
364 ; CHECK: ## BB#0:
365 ; CHECK-NEXT: kmovb %esi, %k1
366 ; CHECK-NEXT: kxnorw %k2, %k2, %k2
367 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
368 ; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2}
369 ; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm0 {%k1}
370 ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
371 ; CHECK-NEXT: retq
372 %res = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 4)
373 %res1 = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
374 %res2 = add <4 x i32> %res, %res1
375 ret <4 x i32> %res2
376 }
377
378 declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, i8*, <4 x i64>, i8, i32)
379
380 define <4 x float>@test_int_x86_avx512_gather3div8_sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
381 ; CHECK-LABEL: test_int_x86_avx512_gather3div8_sf:
382 ; CHECK: ## BB#0:
383 ; CHECK-NEXT: kmovb %esi, %k1
384 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
385 ; CHECK-NEXT: vgatherqps (%rdi,%ymm1,4), %xmm2 {%k1}
386 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
387 ; CHECK-NEXT: vgatherqps (%rdi,%ymm1,0), %xmm0 {%k1}
388 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
389 ; CHECK-NEXT: retq
390 %res = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
391 %res1 = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 0)
392 %res2 = fadd <4 x float> %res, %res1
393 ret <4 x float> %res2
394 }
395
396 declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, i8*, <4 x i64>, i8, i32)
397
398 define <4 x i32>@test_int_x86_avx512_gather3div8_si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
399 ; CHECK-LABEL: test_int_x86_avx512_gather3div8_si:
400 ; CHECK: ## BB#0:
401 ; CHECK-NEXT: kmovb %esi, %k1
402 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
403 ; CHECK-NEXT: kmovw %k1, %k2
404 ; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2}
405 ; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,2), %xmm0 {%k1}
406 ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
407 ; CHECK-NEXT: retq
408 %res = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
409 %res1 = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 2)
410 %res2 = add <4 x i32> %res, %res1
411 ret <4 x i32> %res2
412 }
413
414 declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, i8*, <4 x i32>, i8, i32)
415
416 define <2 x double>@test_int_x86_avx512_gather3siv2_df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
417 ; CHECK-LABEL: test_int_x86_avx512_gather3siv2_df:
418 ; CHECK: ## BB#0:
419 ; CHECK-NEXT: kmovb %esi, %k1
420 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
421 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %xmm2 {%k1}
422 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
423 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,0), %xmm0 {%k1}
424 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
425 ; CHECK-NEXT: retq
426 %res = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
427 %res1 = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
428 %res2 = fadd <2 x double> %res, %res1
429 ret <2 x double> %res2
430 }
431
432 declare <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, i8*, <4 x i32>, i8, i32)
433
434 define <4 x i32>@test_int_x86_avx512_gather3siv2_di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
435 ; CHECK-LABEL: test_int_x86_avx512_gather3siv2_di:
436 ; CHECK: ## BB#0:
437 ; CHECK-NEXT: kmovb %esi, %k1
438 ; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1}
439 ; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
440 ; CHECK-NEXT: retq
441 %res = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
442 %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
443 %res2 = add <4 x i32> %res, %res1
444 ret <4 x i32> %res2
445 }
446
447 declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, i8*, <4 x i32>, i8, i32)
448
449 define <4 x double>@test_int_x86_avx512_gather3siv4_df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
450 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_df:
451 ; CHECK: ## BB#0:
452 ; CHECK-NEXT: kmovb %esi, %k1
453 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
454 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %ymm2 {%k1}
455 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
456 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,0), %ymm0 {%k1}
457 ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
458 ; CHECK-NEXT: retq
459 %res = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
460 %res1 = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
461 %res2 = fadd <4 x double> %res, %res1
462 ret <4 x double> %res2
463 }
464
465 declare <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, i8*, <4 x i32>, i8, i32)
466
467 define <8 x i32>@test_int_x86_avx512_gather3siv4_di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
468 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_di:
469 ; CHECK: ## BB#0:
470 ; CHECK-NEXT: kmovb %esi, %k1
471 ; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %ymm0 {%k1}
472 ; CHECK-NEXT: vpaddd %ymm0, %ymm0, %ymm0
473 ; CHECK-NEXT: retq
474 %res = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
475 %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
476 %res2 = add <8 x i32> %res, %res1
477 ret <8 x i32> %res2
478 }
479
480 declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, i8*, <4 x i32>, i8, i32)
481
482 define <4 x float>@test_int_x86_avx512_gather3siv4_sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
483 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_sf:
484 ; CHECK: ## BB#0:
485 ; CHECK-NEXT: kmovb %esi, %k1
486 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
487 ; CHECK-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm2 {%k1}
488 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
489 ; CHECK-NEXT: vgatherdps (%rdi,%xmm1,0), %xmm0 {%k1}
490 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
491 ; CHECK-NEXT: retq
492 %res = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
493 %res1 = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
494 %res2 = fadd <4 x float> %res, %res1
495 ret <4 x float> %res2
496 }
497
498 declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, i8*, <4 x i32>, i8, i32)
499
500 define <4 x i32>@test_int_x86_avx512_gather3siv4_si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
501 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_si:
502 ; CHECK: ## BB#0:
503 ; CHECK-NEXT: kmovb %esi, %k1
504 ; CHECK-NEXT: kxnorw %k2, %k2, %k2
505 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
506 ; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2}
507 ; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,0), %xmm0 {%k1}
508 ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
509 ; CHECK-NEXT: retq
510 %res = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 4)
511 %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 0)
512 %res2 = add <4 x i32> %res, %res1
513 ret <4 x i32> %res2
514 }
515
516 declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, i8*, <8 x i32>, i8, i32)
517
518 define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
519 ; CHECK-LABEL: test_int_x86_avx512_gather3siv8_sf:
520 ; CHECK: ## BB#0:
521 ; CHECK-NEXT: kmovb %esi, %k1
522 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
523 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm2 {%k1}
524 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
525 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,0), %ymm0 {%k1}
526 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
527 ; CHECK-NEXT: retq
528 %res = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
529 %res1 = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 -1, i32 0)
530 %res2 = fadd <8 x float> %res, %res1
531 ret <8 x float> %res2
532 }
533
534 declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, i8*, <8 x i32>, i8, i32)
535
536 define <8 x i32>@test_int_x86_avx512_gather3siv8_si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
537 ; CHECK-LABEL: test_int_x86_avx512_gather3siv8_si:
538 ; CHECK: ## BB#0:
539 ; CHECK-NEXT: kmovb %esi, %k1
540 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
541 ; CHECK-NEXT: kmovw %k1, %k2
542 ; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2}
543 ; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,0), %ymm0 {%k1}
544 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
545 ; CHECK-NEXT: retq
546 %res = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
547 %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 0)
548 %res2 = add <8 x i32> %res, %res1
549 ret <8 x i32> %res2
550 }
551
552 declare void @llvm.x86.avx512.scatterdiv2.df(i8*, i8, <2 x i64>, <2 x double>, i32)
553
554 define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3) {
555 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df:
556 ; CHECK: ## BB#0:
557 ; CHECK-NEXT: kmovb %esi, %k1
558 ; CHECK-NEXT: kxnorw %k2, %k2, %k2
559 ; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,0) {%k2}
560 ; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1}
561 ; CHECK-NEXT: retq
562 call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 -1, <2 x i64> %x2, <2 x double> %x3, i32 0)
563 call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3, i32 4)
564 ret void
565 }
566
567 declare void @llvm.x86.avx512.scatterdiv2.di(i8*, i8, <2 x i64>, <2 x i64>, i32)
568
569 define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3) {
570 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_di:
571 ; CHECK: ## BB#0:
572 ; CHECK-NEXT: kmovb %esi, %k1
573 ; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,0) {%k1}
574 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
575 ; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1}
576 ; CHECK-NEXT: retq
577 call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 0)
578 call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 -1, <2 x i64> %x2, <2 x i64> %x3, i32 4)
579 ret void
580 }
581
582 declare void @llvm.x86.avx512.scatterdiv4.df(i8*, i8, <4 x i64>, <4 x double>, i32)
583
584 define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3) {
585 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_df:
586 ; CHECK: ## BB#0:
587 ; CHECK-NEXT: kmovb %esi, %k1
588 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,0) {%k1}
589 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
590 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1}
591 ; CHECK-NEXT: retq
592 call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 0)
593 call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 -1, <4 x i64> %x2, <4 x double> %x3, i32 4)
594 ret void
595 }
596
597 declare void @llvm.x86.avx512.scatterdiv4.di(i8*, i8, <4 x i64>, <4 x i64>, i32)
598
599 define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3) {
600 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_di:
601 ; CHECK: ## BB#0:
602 ; CHECK-NEXT: kmovb %esi, %k1
603 ; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,0) {%k1}
604 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
605 ; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1}
606 ; CHECK-NEXT: retq
607 call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 0)
608 call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i64> %x3, i32 4)
609 ret void
610 }
611
612 declare void @llvm.x86.avx512.scatterdiv4.sf(i8*, i8, <2 x i64>, <4 x float>, i32)
613
614 define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3) {
615 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_sf:
616 ; CHECK: ## BB#0:
617 ; CHECK-NEXT: kmovb %esi, %k1
618 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,0) {%k1}
619 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
620 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1}
621 ; CHECK-NEXT: retq
622 call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 0)
623 call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 -1, <2 x i64> %x2, <4 x float> %x3, i32 4)
624 ret void
625 }
626
627 declare void @llvm.x86.avx512.scatterdiv4.si(i8*, i8, <2 x i64>, <4 x i32>, i32)
628
629 define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3) {
630 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si:
631 ; CHECK: ## BB#0:
632 ; CHECK-NEXT: kmovb %esi, %k1
633 ; CHECK-NEXT: kxnorw %k2, %k2, %k2
634 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,0) {%k2}
635 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1}
636 ; CHECK-NEXT: retq
637 call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 -1, <2 x i64> %x2, <4 x i32> %x3, i32 0)
638 call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3, i32 4)
639 ret void
640 }
641
642 declare void @llvm.x86.avx512.scatterdiv8.sf(i8*, i8, <4 x i64>, <4 x float>, i32)
643
644 define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3) {
645 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_sf:
646 ; CHECK: ## BB#0:
647 ; CHECK-NEXT: kmovb %esi, %k1
648 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,0) {%k1}
649 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
650 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1}
651 ; CHECK-NEXT: retq
652 call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 0)
653 call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 -1, <4 x i64> %x2, <4 x float> %x3, i32 4)
654 ret void
655 }
656
657 declare void @llvm.x86.avx512.scatterdiv8.si(i8*, i8, <4 x i64>, <4 x i32>, i32)
658
659 define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3) {
660 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_si:
661 ; CHECK: ## BB#0:
662 ; CHECK-NEXT: kmovb %esi, %k1
663 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,0) {%k1}
664 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
665 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1}
666 ; CHECK-NEXT: retq
667 call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 0)
668 call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i32> %x3, i32 4)
669 ret void
670 }
671
672 declare void @llvm.x86.avx512.scattersiv2.df(i8*, i8, <4 x i32>, <2 x double>, i32)
673
674 define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3) {
675 ; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df:
676 ; CHECK: ## BB#0:
677 ; CHECK-NEXT: kmovb %esi, %k1
678 ; CHECK-NEXT: kxnorw %k2, %k2, %k2
679 ; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,0) {%k2}
680 ; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1}
681 ; CHECK-NEXT: retq
682 call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 -1, <4 x i32> %x2, <2 x double> %x3, i32 0)
683 call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3, i32 4)
684 ret void
685 }
686
687 declare void @llvm.x86.avx512.scattersiv2.di(i8*, i8, <4 x i32>, <2 x i64>, i32)
688
689 define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3) {
690 ; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di:
691 ; CHECK: ## BB#0:
692 ; CHECK-NEXT: kmovb %esi, %k1
693 ; CHECK-NEXT: kxnorw %k2, %k2, %k2
694 ; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,0) {%k2}
695 ; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1}
696 ; CHECK-NEXT: retq
697 call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 -1, <4 x i32> %x2, <2 x i64> %x3, i32 0)
698 call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3, i32 4)
699 ret void
700 }
701
702 declare void @llvm.x86.avx512.scattersiv4.df(i8*, i8, <4 x i32>, <4 x double>, i32)
703
704 define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3) {
705 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_df:
706 ; CHECK: ## BB#0:
707 ; CHECK-NEXT: kmovb %esi, %k1
708 ; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,0) {%k1}
709 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
710 ; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1}
711 ; CHECK-NEXT: retq
712 call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 0)
713 call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 -1, <4 x i32> %x2, <4 x double> %x3, i32 4)
714 ret void
715 }
716
717 declare void @llvm.x86.avx512.scattersiv4.di(i8*, i8, <4 x i32>, <4 x i64>, i32)
718
719 define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3) {
720 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di:
721 ; CHECK: ## BB#0:
722 ; CHECK-NEXT: kmovb %esi, %k1
723 ; CHECK-NEXT: kxnorw %k2, %k2, %k2
724 ; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,0) {%k2}
725 ; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1}
726 ; CHECK-NEXT: retq
727 call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i64> %x3, i32 0)
728 call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3, i32 4)
729 ret void
730 }
731
732 declare void @llvm.x86.avx512.scattersiv4.sf(i8*, i8, <4 x i32>, <4 x float>, i32)
733
734 define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3) {
735 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_sf:
736 ; CHECK: ## BB#0:
737 ; CHECK-NEXT: kmovb %esi, %k1
738 ; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,0) {%k1}
739 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
740 ; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1}
741 ; CHECK-NEXT: retq
742 call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 0)
743 call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 -1, <4 x i32> %x2, <4 x float> %x3, i32 4)
744 ret void
745 }
746
747 declare void @llvm.x86.avx512.scattersiv4.si(i8*, i8, <4 x i32>, <4 x i32>, i32)
748
749 define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3) {
750 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_si:
751 ; CHECK: ## BB#0:
752 ; CHECK-NEXT: kmovb %esi, %k1
753 ; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,0) {%k1}
754 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
755 ; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1}
756 ; CHECK-NEXT: retq
757 call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 0)
758 call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i32> %x3, i32 4)
759 ret void
760 }
761
762 declare void @llvm.x86.avx512.scattersiv8.sf(i8*, i8, <8 x i32>, <8 x float>, i32)
763
764 define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3) {
765 ; CHECK-LABEL: test_int_x86_avx512_scattersiv8_sf:
766 ; CHECK: ## BB#0:
767 ; CHECK-NEXT: kmovb %esi, %k1
768 ; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,0) {%k1}
769 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
770 ; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1}
771 ; CHECK-NEXT: retq
772 call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 0)
773 call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 -1, <8 x i32> %x2, <8 x float> %x3, i32 4)
774 ret void
775 }
776
777 declare void @llvm.x86.avx512.scattersiv8.si(i8*, i8, <8 x i32>, <8 x i32>, i32)
778
779 define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3) {
780 ; CHECK-LABEL: test_int_x86_avx512_scattersiv8_si:
781 ; CHECK: ## BB#0:
782 ; CHECK-NEXT: kmovb %esi, %k1
783 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,0) {%k1}
784 ; CHECK-NEXT: kxnorw %k1, %k1, %k1
785 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
786 ; CHECK-NEXT: retq
787 call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 0)
788 call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 4)
789 ret void
790 }
791