Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/X86/avx512-gather-scatter-intrin.ll @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | 54457678186b |
children | 7d135dc70f03 |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s | 1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s |
2 | 2 |
3 declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32) | 3 declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32) |
4 declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32) | 4 declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32) |
5 declare <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double>, i8*, <8 x i32>, i8, i32) | 5 declare <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double>, i8*, <8 x i32>, i8, i32) |
6 declare void @llvm.x86.avx512.scatter.dpd.512 (i8*, i8, <8 x i32>, <8 x double>, i32) | 6 declare void @llvm.x86.avx512.scatter.dpd.512 (i8*, i8, <8 x i32>, <8 x double>, i32) |
8 declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float>, i8*, <8 x i64>, i8, i32) | 8 declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float>, i8*, <8 x i64>, i8, i32) |
9 declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>, i32) | 9 declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>, i32) |
10 declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, i8*, <8 x i64>, i8, i32) | 10 declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, i8*, <8 x i64>, i8, i32) |
11 declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32) | 11 declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32) |
12 | 12 |
13 ;CHECK-LABEL: gather_mask_dps | |
14 ;CHECK: kmovw | |
15 ;CHECK: vgatherdps | |
16 ;CHECK: vpadd | |
17 ;CHECK: vscatterdps | |
18 ;CHECK: ret | |
19 define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf) { | 13 define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf) { |
14 ; CHECK-LABEL: gather_mask_dps: | |
15 ; CHECK: ## BB#0: | |
16 ; CHECK-NEXT: kmovw %edi, %k1 | |
17 ; CHECK-NEXT: kmovw %k1, %k2 | |
18 ; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k2} | |
19 ; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0 | |
20 ; CHECK-NEXT: vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1} | |
21 ; CHECK-NEXT: retq | |
20 %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4) | 22 %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4) |
21 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> | 23 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> |
22 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4) | 24 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4) |
23 ret void | 25 ret void |
24 } | 26 } |
25 | 27 |
26 ;CHECK-LABEL: gather_mask_dpd | |
27 ;CHECK: kmovw | |
28 ;CHECK: vgatherdpd | |
29 ;CHECK: vpadd | |
30 ;CHECK: vscatterdpd | |
31 ;CHECK: ret | |
32 define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) { | 28 define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) { |
29 ; CHECK-LABEL: gather_mask_dpd: | |
30 ; CHECK: ## BB#0: | |
31 ; CHECK-NEXT: kmovb %edi, %k1 | |
32 ; CHECK-NEXT: kmovw %k1, %k2 | |
33 ; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k2} | |
34 ; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 | |
35 ; CHECK-NEXT: vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1} | |
36 ; CHECK-NEXT: retq | |
33 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4) | 37 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4) |
34 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> | 38 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> |
35 call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4) | 39 call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4) |
36 ret void | 40 ret void |
37 } | 41 } |
38 | 42 |
39 ;CHECK-LABEL: gather_mask_qps | |
40 ;CHECK: kmovw | |
41 ;CHECK: vgatherqps | |
42 ;CHECK: vpadd | |
43 ;CHECK: vscatterqps | |
44 ;CHECK: ret | |
45 define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf) { | 43 define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf) { |
44 ; CHECK-LABEL: gather_mask_qps: | |
45 ; CHECK: ## BB#0: | |
46 ; CHECK-NEXT: kmovb %edi, %k1 | |
47 ; CHECK-NEXT: kmovw %k1, %k2 | |
48 ; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k2} | |
49 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 | |
50 ; CHECK-NEXT: vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1} | |
51 ; CHECK-NEXT: retq | |
46 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) | 52 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) |
47 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> | 53 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> |
48 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4) | 54 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4) |
49 ret void | 55 ret void |
50 } | 56 } |
51 | 57 |
52 ;CHECK-LABEL: gather_mask_qpd | |
53 ;CHECK: kmovw | |
54 ;CHECK: vgatherqpd | |
55 ;CHECK: vpadd | |
56 ;CHECK: vscatterqpd | |
57 ;CHECK: ret | |
58 define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) { | 58 define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) { |
59 ; CHECK-LABEL: gather_mask_qpd: | |
60 ; CHECK: ## BB#0: | |
61 ; CHECK-NEXT: kmovb %edi, %k1 | |
62 ; CHECK-NEXT: kmovw %k1, %k2 | |
63 ; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k2} | |
64 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 | |
65 ; CHECK-NEXT: vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1} | |
66 ; CHECK-NEXT: retq | |
59 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) | 67 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) |
60 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> | 68 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> |
61 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4) | 69 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4) |
62 ret void | 70 ret void |
63 } | 71 } |
72 declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32>, i8*, <8 x i64>, i8, i32) | 80 declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32>, i8*, <8 x i64>, i8, i32) |
73 declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i32) | 81 declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i32) |
74 declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, <8 x i64>, i8, i32) | 82 declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, <8 x i64>, i8, i32) |
75 declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32) | 83 declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32) |
76 | 84 |
77 ;CHECK-LABEL: gather_mask_dd | |
78 ;CHECK: kmovw | |
79 ;CHECK: vpgatherdd | |
80 ;CHECK: vpadd | |
81 ;CHECK: vpscatterdd | |
82 ;CHECK: ret | |
83 define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf) { | 85 define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf) { |
86 ; CHECK-LABEL: gather_mask_dd: | |
87 ; CHECK: ## BB#0: | |
88 ; CHECK-NEXT: kmovw %edi, %k1 | |
89 ; CHECK-NEXT: kmovw %k1, %k2 | |
90 ; CHECK-NEXT: vpgatherdd (%rsi,%zmm0,4), %zmm1 {%k2} | |
91 ; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0 | |
92 ; CHECK-NEXT: vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1} | |
93 ; CHECK-NEXT: retq | |
84 %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4) | 94 %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4) |
85 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> | 95 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> |
86 call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4) | 96 call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4) |
87 ret void | 97 ret void |
88 } | 98 } |
89 | 99 |
90 ;CHECK-LABEL: gather_mask_qd | |
91 ;CHECK: kmovw | |
92 ;CHECK: vpgatherqd | |
93 ;CHECK: vpadd | |
94 ;CHECK: vpscatterqd | |
95 ;CHECK: ret | |
96 define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf) { | 100 define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf) { |
101 ; CHECK-LABEL: gather_mask_qd: | |
102 ; CHECK: ## BB#0: | |
103 ; CHECK-NEXT: kmovb %edi, %k1 | |
104 ; CHECK-NEXT: kmovw %k1, %k2 | |
105 ; CHECK-NEXT: vpgatherqd (%rsi,%zmm0,4), %ymm1 {%k2} | |
106 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 | |
107 ; CHECK-NEXT: vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1} | |
108 ; CHECK-NEXT: retq | |
97 %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) | 109 %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) |
98 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> | 110 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> |
99 call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4) | 111 call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4) |
100 ret void | 112 ret void |
101 } | 113 } |
102 | 114 |
103 ;CHECK-LABEL: gather_mask_qq | |
104 ;CHECK: kmovw | |
105 ;CHECK: vpgatherqq | |
106 ;CHECK: vpadd | |
107 ;CHECK: vpscatterqq | |
108 ;CHECK: ret | |
109 define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) { | 115 define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) { |
116 ; CHECK-LABEL: gather_mask_qq: | |
117 ; CHECK: ## BB#0: | |
118 ; CHECK-NEXT: kmovb %edi, %k1 | |
119 ; CHECK-NEXT: kmovw %k1, %k2 | |
120 ; CHECK-NEXT: vpgatherqq (%rsi,%zmm0,4), %zmm1 {%k2} | |
121 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 | |
122 ; CHECK-NEXT: vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1} | |
123 ; CHECK-NEXT: retq | |
110 %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) | 124 %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) |
111 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> | 125 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> |
112 call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4) | 126 call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4) |
113 ret void | 127 ret void |
114 } | 128 } |
115 | 129 |
116 ;CHECK-LABEL: gather_mask_dq | |
117 ;CHECK: kmovw | |
118 ;CHECK: vpgatherdq | |
119 ;CHECK: vpadd | |
120 ;CHECK: vpscatterdq | |
121 ;CHECK: ret | |
122 define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) { | 130 define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) { |
131 ; CHECK-LABEL: gather_mask_dq: | |
132 ; CHECK: ## BB#0: | |
133 ; CHECK-NEXT: kmovb %edi, %k1 | |
134 ; CHECK-NEXT: kmovw %k1, %k2 | |
135 ; CHECK-NEXT: vpgatherdq (%rsi,%ymm0,4), %zmm1 {%k2} | |
136 ; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 | |
137 ; CHECK-NEXT: vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1} | |
138 ; CHECK-NEXT: retq | |
123 %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4) | 139 %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4) |
124 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> | 140 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> |
125 call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4) | 141 call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4) |
126 ret void | 142 ret void |
127 } | 143 } |
128 | 144 |
129 | |
130 ;CHECK-LABEL: gather_mask_dpd_execdomain | |
131 ;CHECK: vgatherdpd | |
132 ;CHECK: vmovapd | |
133 ;CHECK: ret | |
134 define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) { | 145 define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) { |
146 ; CHECK-LABEL: gather_mask_dpd_execdomain: | |
147 ; CHECK: ## BB#0: | |
148 ; CHECK-NEXT: kmovb %edi, %k1 | |
149 ; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k1} | |
150 ; CHECK-NEXT: vmovapd %zmm1, (%rdx) | |
151 ; CHECK-NEXT: retq | |
135 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4) | 152 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4) |
136 store <8 x double> %x, <8 x double>* %stbuf | 153 store <8 x double> %x, <8 x double>* %stbuf |
137 ret void | 154 ret void |
138 } | 155 } |
139 | 156 |
140 ;CHECK-LABEL: gather_mask_qpd_execdomain | |
141 ;CHECK: vgatherqpd | |
142 ;CHECK: vmovapd | |
143 ;CHECK: ret | |
144 define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) { | 157 define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) { |
158 ; CHECK-LABEL: gather_mask_qpd_execdomain: | |
159 ; CHECK: ## BB#0: | |
160 ; CHECK-NEXT: kmovb %edi, %k1 | |
161 ; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k1} | |
162 ; CHECK-NEXT: vmovapd %zmm1, (%rdx) | |
163 ; CHECK-NEXT: retq | |
145 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) | 164 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) |
146 store <8 x double> %x, <8 x double>* %stbuf | 165 store <8 x double> %x, <8 x double>* %stbuf |
147 ret void | 166 ret void |
148 } | 167 } |
149 | 168 |
150 ;CHECK-LABEL: gather_mask_dps_execdomain | |
151 ;CHECK: vgatherdps | |
152 ;CHECK: vmovaps | |
153 ;CHECK: ret | |
154 define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base) { | 169 define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base) { |
170 ; CHECK-LABEL: gather_mask_dps_execdomain: | |
171 ; CHECK: ## BB#0: | |
172 ; CHECK-NEXT: kmovw %edi, %k1 | |
173 ; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k1} | |
174 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 | |
175 ; CHECK-NEXT: retq | |
155 %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4) | 176 %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4) |
156 ret <16 x float> %res; | 177 ret <16 x float> %res; |
157 } | 178 } |
158 | 179 |
159 ;CHECK-LABEL: gather_mask_qps_execdomain | |
160 ;CHECK: vgatherqps | |
161 ;CHECK: vmovaps | |
162 ;CHECK: ret | |
163 define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base) { | 180 define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base) { |
181 ; CHECK-LABEL: gather_mask_qps_execdomain: | |
182 ; CHECK: ## BB#0: | |
183 ; CHECK-NEXT: kmovb %edi, %k1 | |
184 ; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k1} | |
185 ; CHECK-NEXT: vmovaps %zmm1, %zmm0 | |
186 ; CHECK-NEXT: retq | |
164 %res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) | 187 %res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) |
165 ret <8 x float> %res; | 188 ret <8 x float> %res; |
166 } | 189 } |
167 | 190 |
168 ;CHECK-LABEL: scatter_mask_dpd_execdomain | |
169 ;CHECK: vmovapd | |
170 ;CHECK: vscatterdpd | |
171 ;CHECK: ret | |
172 define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) { | 191 define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) { |
173 %x = load <8 x double>* %src, align 64 | 192 ; CHECK-LABEL: scatter_mask_dpd_execdomain: |
193 ; CHECK: ## BB#0: | |
194 ; CHECK-NEXT: kmovb %esi, %k1 | |
195 ; CHECK-NEXT: vmovapd (%rdi), %zmm1 | |
196 ; CHECK-NEXT: vscatterdpd %zmm1, (%rcx,%ymm0,4) {%k1} | |
197 ; CHECK-NEXT: retq | |
198 %x = load <8 x double>, <8 x double>* %src, align 64 | |
174 call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4) | 199 call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4) |
175 ret void | 200 ret void |
176 } | 201 } |
177 | 202 |
178 ;CHECK-LABEL: scatter_mask_qpd_execdomain | |
179 ;CHECK: vmovapd | |
180 ;CHECK: vscatterqpd | |
181 ;CHECK: ret | |
182 define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) { | 203 define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) { |
183 %x = load <8 x double>* %src, align 64 | 204 ; CHECK-LABEL: scatter_mask_qpd_execdomain: |
205 ; CHECK: ## BB#0: | |
206 ; CHECK-NEXT: kmovb %esi, %k1 | |
207 ; CHECK-NEXT: vmovapd (%rdi), %zmm1 | |
208 ; CHECK-NEXT: vscatterqpd %zmm1, (%rcx,%zmm0,4) {%k1} | |
209 ; CHECK-NEXT: retq | |
210 %x = load <8 x double>, <8 x double>* %src, align 64 | |
184 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4) | 211 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4) |
185 ret void | 212 ret void |
186 } | 213 } |
187 | 214 |
188 ;CHECK-LABEL: scatter_mask_dps_execdomain | |
189 ;CHECK: vmovaps | |
190 ;CHECK: vscatterdps | |
191 ;CHECK: ret | |
192 define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf) { | 215 define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf) { |
193 %x = load <16 x float>* %src, align 64 | 216 ; CHECK-LABEL: scatter_mask_dps_execdomain: |
217 ; CHECK: ## BB#0: | |
218 ; CHECK-NEXT: kmovw %esi, %k1 | |
219 ; CHECK-NEXT: vmovaps (%rdi), %zmm1 | |
220 ; CHECK-NEXT: vscatterdps %zmm1, (%rcx,%zmm0,4) {%k1} | |
221 ; CHECK-NEXT: retq | |
222 %x = load <16 x float>, <16 x float>* %src, align 64 | |
194 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4) | 223 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4) |
195 ret void | 224 ret void |
196 } | 225 } |
197 | 226 |
198 ;CHECK-LABEL: scatter_mask_qps_execdomain | |
199 ;CHECK: vmovaps | |
200 ;CHECK: vscatterqps | |
201 ;CHECK: ret | |
202 define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf) { | 227 define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf) { |
203 %x = load <8 x float>* %src, align 32 | 228 ; CHECK-LABEL: scatter_mask_qps_execdomain: |
229 ; CHECK: ## BB#0: | |
230 ; CHECK-NEXT: kmovb %esi, %k1 | |
231 ; CHECK-NEXT: vmovaps (%rdi), %ymm1 | |
232 ; CHECK-NEXT: vscatterqps %ymm1, (%rcx,%zmm0,4) {%k1} | |
233 ; CHECK-NEXT: retq | |
234 %x = load <8 x float>, <8 x float>* %src, align 32 | |
204 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4) | 235 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4) |
205 ret void | 236 ret void |
206 } | 237 } |
207 | 238 |
208 ;CHECK-LABEL: gather_qps | |
209 ;CHECK: kxnorw | |
210 ;CHECK: vgatherqps | |
211 ;CHECK: vpadd | |
212 ;CHECK: vscatterqps | |
213 ;CHECK: ret | |
214 define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf) { | 239 define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf) { |
240 ; CHECK-LABEL: gather_qps: | |
241 ; CHECK: ## BB#0: | |
242 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
243 ; CHECK-NEXT: kxnorw %k2, %k2, %k2 | |
244 ; CHECK-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2} | |
245 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 | |
246 ; CHECK-NEXT: vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1} | |
247 ; CHECK-NEXT: retq | |
215 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 -1, i32 4) | 248 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 -1, i32 4) |
216 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> | 249 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> |
217 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4) | 250 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4) |
218 ret void | 251 ret void |
219 } | 252 } |
220 | 253 |
221 ;CHECK-LABEL: prefetch | |
222 ;CHECK: gatherpf0 | |
223 ;CHECK: gatherpf1 | |
224 ;CHECK: scatterpf0 | |
225 ;CHECK: scatterpf1 | |
226 ;CHECK: ret | |
227 declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8* , i32, i32); | 254 declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8* , i32, i32); |
228 declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32); | 255 declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32); |
229 define void @prefetch(<8 x i64> %ind, i8* %base) { | 256 define void @prefetch(<8 x i64> %ind, i8* %base) { |
257 ; CHECK-LABEL: prefetch: | |
258 ; CHECK: ## BB#0: | |
259 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
260 ; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1} | |
261 ; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1} | |
262 ; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1} | |
263 ; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1} | |
264 ; CHECK-NEXT: retq | |
230 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0) | 265 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0) |
231 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1) | 266 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1) |
232 call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 0) | 267 call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 0) |
233 call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 1) | 268 call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 1) |
234 ret void | 269 ret void |
235 } | 270 } |
271 | |
272 | |
273 declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, i8*, <2 x i64>, i8, i32) | |
274 | |
275 define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) { | |
276 ; CHECK-LABEL: test_int_x86_avx512_gather3div2_df: | |
277 ; CHECK: ## BB#0: | |
278 ; CHECK-NEXT: kmovb %esi, %k1 | |
279 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 | |
280 ; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,4), %xmm2 {%k1} | |
281 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
282 ; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,0), %xmm0 {%k1} | |
283 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 | |
284 ; CHECK-NEXT: retq | |
285 %res = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4) | |
286 %res1 = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 0) | |
287 %res2 = fadd <2 x double> %res, %res1 | |
288 ret <2 x double> %res2 | |
289 } | |
290 | |
291 declare <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64>, i8*, <2 x i64>, i8, i32) | |
292 | |
293 define <4 x i32>@test_int_x86_avx512_gather3div2_di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) { | |
294 ; CHECK-LABEL: test_int_x86_avx512_gather3div2_di: | |
295 ; CHECK: ## BB#0: | |
296 ; CHECK-NEXT: kmovb %esi, %k1 | |
297 ; CHECK-NEXT: vpgatherqq (%rdi,%xmm1,8), %xmm0 {%k1} | |
298 ; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0 | |
299 ; CHECK-NEXT: retq | |
300 %res = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8) | |
301 %res1 = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8) | |
302 %res2 = add <4 x i32> %res, %res1 | |
303 ret <4 x i32> %res2 | |
304 } | |
305 | |
306 declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, i8*, <4 x i64>, i8, i32) | |
307 | |
308 define <4 x double>@test_int_x86_avx512_gather3div4_df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) { | |
309 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_df: | |
310 ; CHECK: ## BB#0: | |
311 ; CHECK-NEXT: kmovb %esi, %k1 | |
312 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 | |
313 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,4), %ymm2 {%k1} | |
314 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
315 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,0), %ymm0 {%k1} | |
316 ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 | |
317 ; CHECK-NEXT: retq | |
318 %res = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4) | |
319 %res1 = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 0) | |
320 %res2 = fadd <4 x double> %res, %res1 | |
321 ret <4 x double> %res2 | |
322 } | |
323 | |
324 declare <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64>, i8*, <4 x i64>, i8, i32) | |
325 | |
326 define <8 x i32>@test_int_x86_avx512_gather3div4_di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) { | |
327 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_di: | |
328 ; CHECK: ## BB#0: | |
329 ; CHECK-NEXT: kmovb %esi, %k1 | |
330 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 | |
331 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1} | |
332 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
333 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1} | |
334 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 | |
335 ; CHECK-NEXT: retq | |
336 %res = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 8) | |
337 %res1 = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 8) | |
338 %res2 = add <8 x i32> %res, %res1 | |
339 ret <8 x i32> %res2 | |
340 } | |
341 | |
342 declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, i8*, <2 x i64>, i8, i32) | |
343 | |
344 define <4 x float>@test_int_x86_avx512_gather3div4_sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) { | |
345 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_sf: | |
346 ; CHECK: ## BB#0: | |
347 ; CHECK-NEXT: kmovb %esi, %k1 | |
348 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 | |
349 ; CHECK-NEXT: vgatherqps (%rdi,%xmm1,4), %xmm2 {%k1} | |
350 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
351 ; CHECK-NEXT: vgatherqps (%rdi,%xmm1,0), %xmm0 {%k1} | |
352 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 | |
353 ; CHECK-NEXT: retq | |
354 %res = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4) | |
355 %res1 = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 0) | |
356 %res2 = fadd <4 x float> %res, %res1 | |
357 ret <4 x float> %res2 | |
358 } | |
359 | |
360 declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, i8*, <2 x i64>, i8, i32) | |
361 | |
362 define <4 x i32>@test_int_x86_avx512_gather3div4_si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) { | |
363 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_si: | |
364 ; CHECK: ## BB#0: | |
365 ; CHECK-NEXT: kmovb %esi, %k1 | |
366 ; CHECK-NEXT: kxnorw %k2, %k2, %k2 | |
367 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 | |
368 ; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2} | |
369 ; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm0 {%k1} | |
370 ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 | |
371 ; CHECK-NEXT: retq | |
372 %res = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 4) | |
373 %res1 = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4) | |
374 %res2 = add <4 x i32> %res, %res1 | |
375 ret <4 x i32> %res2 | |
376 } | |
377 | |
378 declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, i8*, <4 x i64>, i8, i32) | |
379 | |
380 define <4 x float>@test_int_x86_avx512_gather3div8_sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) { | |
381 ; CHECK-LABEL: test_int_x86_avx512_gather3div8_sf: | |
382 ; CHECK: ## BB#0: | |
383 ; CHECK-NEXT: kmovb %esi, %k1 | |
384 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 | |
385 ; CHECK-NEXT: vgatherqps (%rdi,%ymm1,4), %xmm2 {%k1} | |
386 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
387 ; CHECK-NEXT: vgatherqps (%rdi,%ymm1,0), %xmm0 {%k1} | |
388 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 | |
389 ; CHECK-NEXT: retq | |
390 %res = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4) | |
391 %res1 = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 0) | |
392 %res2 = fadd <4 x float> %res, %res1 | |
393 ret <4 x float> %res2 | |
394 } | |
395 | |
396 declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, i8*, <4 x i64>, i8, i32) | |
397 | |
398 define <4 x i32>@test_int_x86_avx512_gather3div8_si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) { | |
399 ; CHECK-LABEL: test_int_x86_avx512_gather3div8_si: | |
400 ; CHECK: ## BB#0: | |
401 ; CHECK-NEXT: kmovb %esi, %k1 | |
402 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 | |
403 ; CHECK-NEXT: kmovw %k1, %k2 | |
404 ; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2} | |
405 ; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,2), %xmm0 {%k1} | |
406 ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 | |
407 ; CHECK-NEXT: retq | |
408 %res = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4) | |
409 %res1 = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 2) | |
410 %res2 = add <4 x i32> %res, %res1 | |
411 ret <4 x i32> %res2 | |
412 } | |
413 | |
414 declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, i8*, <4 x i32>, i8, i32) | |
415 | |
416 define <2 x double>@test_int_x86_avx512_gather3siv2_df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) { | |
417 ; CHECK-LABEL: test_int_x86_avx512_gather3siv2_df: | |
418 ; CHECK: ## BB#0: | |
419 ; CHECK-NEXT: kmovb %esi, %k1 | |
420 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 | |
421 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %xmm2 {%k1} | |
422 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
423 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,0), %xmm0 {%k1} | |
424 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 | |
425 ; CHECK-NEXT: retq | |
426 %res = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4) | |
427 %res1 = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0) | |
428 %res2 = fadd <2 x double> %res, %res1 | |
429 ret <2 x double> %res2 | |
430 } | |
431 | |
432 declare <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, i8*, <4 x i32>, i8, i32) | |
433 | |
434 define <4 x i32>@test_int_x86_avx512_gather3siv2_di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) { | |
435 ; CHECK-LABEL: test_int_x86_avx512_gather3siv2_di: | |
436 ; CHECK: ## BB#0: | |
437 ; CHECK-NEXT: kmovb %esi, %k1 | |
438 ; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1} | |
439 ; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0 | |
440 ; CHECK-NEXT: retq | |
441 %res = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8) | |
442 %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8) | |
443 %res2 = add <4 x i32> %res, %res1 | |
444 ret <4 x i32> %res2 | |
445 } | |
446 | |
447 declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, i8*, <4 x i32>, i8, i32) | |
448 | |
449 define <4 x double>@test_int_x86_avx512_gather3siv4_df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) { | |
450 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_df: | |
451 ; CHECK: ## BB#0: | |
452 ; CHECK-NEXT: kmovb %esi, %k1 | |
453 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 | |
454 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %ymm2 {%k1} | |
455 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
456 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,0), %ymm0 {%k1} | |
457 ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 | |
458 ; CHECK-NEXT: retq | |
459 %res = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4) | |
460 %res1 = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0) | |
461 %res2 = fadd <4 x double> %res, %res1 | |
462 ret <4 x double> %res2 | |
463 } | |
464 | |
465 declare <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, i8*, <4 x i32>, i8, i32) | |
466 | |
467 define <8 x i32>@test_int_x86_avx512_gather3siv4_di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) { | |
468 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_di: | |
469 ; CHECK: ## BB#0: | |
470 ; CHECK-NEXT: kmovb %esi, %k1 | |
471 ; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %ymm0 {%k1} | |
472 ; CHECK-NEXT: vpaddd %ymm0, %ymm0, %ymm0 | |
473 ; CHECK-NEXT: retq | |
474 %res = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8) | |
475 %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8) | |
476 %res2 = add <8 x i32> %res, %res1 | |
477 ret <8 x i32> %res2 | |
478 } | |
479 | |
480 declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, i8*, <4 x i32>, i8, i32) | |
481 | |
482 define <4 x float>@test_int_x86_avx512_gather3siv4_sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) { | |
483 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_sf: | |
484 ; CHECK: ## BB#0: | |
485 ; CHECK-NEXT: kmovb %esi, %k1 | |
486 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 | |
487 ; CHECK-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm2 {%k1} | |
488 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
489 ; CHECK-NEXT: vgatherdps (%rdi,%xmm1,0), %xmm0 {%k1} | |
490 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 | |
491 ; CHECK-NEXT: retq | |
492 %res = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4) | |
493 %res1 = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0) | |
494 %res2 = fadd <4 x float> %res, %res1 | |
495 ret <4 x float> %res2 | |
496 } | |
497 | |
498 declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, i8*, <4 x i32>, i8, i32) | |
499 | |
500 define <4 x i32>@test_int_x86_avx512_gather3siv4_si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) { | |
501 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_si: | |
502 ; CHECK: ## BB#0: | |
503 ; CHECK-NEXT: kmovb %esi, %k1 | |
504 ; CHECK-NEXT: kxnorw %k2, %k2, %k2 | |
505 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 | |
506 ; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2} | |
507 ; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,0), %xmm0 {%k1} | |
508 ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 | |
509 ; CHECK-NEXT: retq | |
510 %res = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 4) | |
511 %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 0) | |
512 %res2 = add <4 x i32> %res, %res1 | |
513 ret <4 x i32> %res2 | |
514 } | |
515 | |
516 declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, i8*, <8 x i32>, i8, i32) | |
517 | |
518 define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) { | |
519 ; CHECK-LABEL: test_int_x86_avx512_gather3siv8_sf: | |
520 ; CHECK: ## BB#0: | |
521 ; CHECK-NEXT: kmovb %esi, %k1 | |
522 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 | |
523 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm2 {%k1} | |
524 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
525 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,0), %ymm0 {%k1} | |
526 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 | |
527 ; CHECK-NEXT: retq | |
528 %res = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4) | |
529 %res1 = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 -1, i32 0) | |
530 %res2 = fadd <8 x float> %res, %res1 | |
531 ret <8 x float> %res2 | |
532 } | |
533 | |
534 declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, i8*, <8 x i32>, i8, i32) | |
535 | |
536 define <8 x i32>@test_int_x86_avx512_gather3siv8_si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) { | |
537 ; CHECK-LABEL: test_int_x86_avx512_gather3siv8_si: | |
538 ; CHECK: ## BB#0: | |
539 ; CHECK-NEXT: kmovb %esi, %k1 | |
540 ; CHECK-NEXT: vmovaps %zmm0, %zmm2 | |
541 ; CHECK-NEXT: kmovw %k1, %k2 | |
542 ; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2} | |
543 ; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,0), %ymm0 {%k1} | |
544 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 | |
545 ; CHECK-NEXT: retq | |
546 %res = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4) | |
547 %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 0) | |
548 %res2 = add <8 x i32> %res, %res1 | |
549 ret <8 x i32> %res2 | |
550 } | |
551 | |
552 declare void @llvm.x86.avx512.scatterdiv2.df(i8*, i8, <2 x i64>, <2 x double>, i32) | |
553 | |
554 define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3) { | |
555 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df: | |
556 ; CHECK: ## BB#0: | |
557 ; CHECK-NEXT: kmovb %esi, %k1 | |
558 ; CHECK-NEXT: kxnorw %k2, %k2, %k2 | |
559 ; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,0) {%k2} | |
560 ; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1} | |
561 ; CHECK-NEXT: retq | |
562 call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 -1, <2 x i64> %x2, <2 x double> %x3, i32 0) | |
563 call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3, i32 4) | |
564 ret void | |
565 } | |
566 | |
567 declare void @llvm.x86.avx512.scatterdiv2.di(i8*, i8, <2 x i64>, <2 x i64>, i32) | |
568 | |
569 define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3) { | |
570 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_di: | |
571 ; CHECK: ## BB#0: | |
572 ; CHECK-NEXT: kmovb %esi, %k1 | |
573 ; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,0) {%k1} | |
574 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
575 ; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1} | |
576 ; CHECK-NEXT: retq | |
577 call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 0) | |
578 call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 -1, <2 x i64> %x2, <2 x i64> %x3, i32 4) | |
579 ret void | |
580 } | |
581 | |
582 declare void @llvm.x86.avx512.scatterdiv4.df(i8*, i8, <4 x i64>, <4 x double>, i32) | |
583 | |
584 define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3) { | |
585 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_df: | |
586 ; CHECK: ## BB#0: | |
587 ; CHECK-NEXT: kmovb %esi, %k1 | |
588 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,0) {%k1} | |
589 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
590 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1} | |
591 ; CHECK-NEXT: retq | |
592 call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 0) | |
593 call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 -1, <4 x i64> %x2, <4 x double> %x3, i32 4) | |
594 ret void | |
595 } | |
596 | |
597 declare void @llvm.x86.avx512.scatterdiv4.di(i8*, i8, <4 x i64>, <4 x i64>, i32) | |
598 | |
599 define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3) { | |
600 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_di: | |
601 ; CHECK: ## BB#0: | |
602 ; CHECK-NEXT: kmovb %esi, %k1 | |
603 ; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,0) {%k1} | |
604 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
605 ; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1} | |
606 ; CHECK-NEXT: retq | |
607 call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 0) | |
608 call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i64> %x3, i32 4) | |
609 ret void | |
610 } | |
611 | |
612 declare void @llvm.x86.avx512.scatterdiv4.sf(i8*, i8, <2 x i64>, <4 x float>, i32) | |
613 | |
614 define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3) { | |
615 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_sf: | |
616 ; CHECK: ## BB#0: | |
617 ; CHECK-NEXT: kmovb %esi, %k1 | |
618 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,0) {%k1} | |
619 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
620 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1} | |
621 ; CHECK-NEXT: retq | |
622 call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 0) | |
623 call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 -1, <2 x i64> %x2, <4 x float> %x3, i32 4) | |
624 ret void | |
625 } | |
626 | |
627 declare void @llvm.x86.avx512.scatterdiv4.si(i8*, i8, <2 x i64>, <4 x i32>, i32) | |
628 | |
629 define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3) { | |
630 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si: | |
631 ; CHECK: ## BB#0: | |
632 ; CHECK-NEXT: kmovb %esi, %k1 | |
633 ; CHECK-NEXT: kxnorw %k2, %k2, %k2 | |
634 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,0) {%k2} | |
635 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1} | |
636 ; CHECK-NEXT: retq | |
637 call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 -1, <2 x i64> %x2, <4 x i32> %x3, i32 0) | |
638 call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3, i32 4) | |
639 ret void | |
640 } | |
641 | |
642 declare void @llvm.x86.avx512.scatterdiv8.sf(i8*, i8, <4 x i64>, <4 x float>, i32) | |
643 | |
644 define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3) { | |
645 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_sf: | |
646 ; CHECK: ## BB#0: | |
647 ; CHECK-NEXT: kmovb %esi, %k1 | |
648 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,0) {%k1} | |
649 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
650 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1} | |
651 ; CHECK-NEXT: retq | |
652 call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 0) | |
653 call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 -1, <4 x i64> %x2, <4 x float> %x3, i32 4) | |
654 ret void | |
655 } | |
656 | |
657 declare void @llvm.x86.avx512.scatterdiv8.si(i8*, i8, <4 x i64>, <4 x i32>, i32) | |
658 | |
659 define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3) { | |
660 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_si: | |
661 ; CHECK: ## BB#0: | |
662 ; CHECK-NEXT: kmovb %esi, %k1 | |
663 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,0) {%k1} | |
664 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
665 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1} | |
666 ; CHECK-NEXT: retq | |
667 call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 0) | |
668 call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i32> %x3, i32 4) | |
669 ret void | |
670 } | |
671 | |
672 declare void @llvm.x86.avx512.scattersiv2.df(i8*, i8, <4 x i32>, <2 x double>, i32) | |
673 | |
674 define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3) { | |
675 ; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df: | |
676 ; CHECK: ## BB#0: | |
677 ; CHECK-NEXT: kmovb %esi, %k1 | |
678 ; CHECK-NEXT: kxnorw %k2, %k2, %k2 | |
679 ; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,0) {%k2} | |
680 ; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1} | |
681 ; CHECK-NEXT: retq | |
682 call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 -1, <4 x i32> %x2, <2 x double> %x3, i32 0) | |
683 call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3, i32 4) | |
684 ret void | |
685 } | |
686 | |
687 declare void @llvm.x86.avx512.scattersiv2.di(i8*, i8, <4 x i32>, <2 x i64>, i32) | |
688 | |
689 define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3) { | |
690 ; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di: | |
691 ; CHECK: ## BB#0: | |
692 ; CHECK-NEXT: kmovb %esi, %k1 | |
693 ; CHECK-NEXT: kxnorw %k2, %k2, %k2 | |
694 ; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,0) {%k2} | |
695 ; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1} | |
696 ; CHECK-NEXT: retq | |
697 call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 -1, <4 x i32> %x2, <2 x i64> %x3, i32 0) | |
698 call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3, i32 4) | |
699 ret void | |
700 } | |
701 | |
702 declare void @llvm.x86.avx512.scattersiv4.df(i8*, i8, <4 x i32>, <4 x double>, i32) | |
703 | |
704 define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3) { | |
705 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_df: | |
706 ; CHECK: ## BB#0: | |
707 ; CHECK-NEXT: kmovb %esi, %k1 | |
708 ; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,0) {%k1} | |
709 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
710 ; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1} | |
711 ; CHECK-NEXT: retq | |
712 call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 0) | |
713 call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 -1, <4 x i32> %x2, <4 x double> %x3, i32 4) | |
714 ret void | |
715 } | |
716 | |
717 declare void @llvm.x86.avx512.scattersiv4.di(i8*, i8, <4 x i32>, <4 x i64>, i32) | |
718 | |
719 define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3) { | |
720 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di: | |
721 ; CHECK: ## BB#0: | |
722 ; CHECK-NEXT: kmovb %esi, %k1 | |
723 ; CHECK-NEXT: kxnorw %k2, %k2, %k2 | |
724 ; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,0) {%k2} | |
725 ; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1} | |
726 ; CHECK-NEXT: retq | |
727 call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i64> %x3, i32 0) | |
728 call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3, i32 4) | |
729 ret void | |
730 } | |
731 | |
732 declare void @llvm.x86.avx512.scattersiv4.sf(i8*, i8, <4 x i32>, <4 x float>, i32) | |
733 | |
734 define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3) { | |
735 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_sf: | |
736 ; CHECK: ## BB#0: | |
737 ; CHECK-NEXT: kmovb %esi, %k1 | |
738 ; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,0) {%k1} | |
739 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
740 ; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1} | |
741 ; CHECK-NEXT: retq | |
742 call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 0) | |
743 call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 -1, <4 x i32> %x2, <4 x float> %x3, i32 4) | |
744 ret void | |
745 } | |
746 | |
747 declare void @llvm.x86.avx512.scattersiv4.si(i8*, i8, <4 x i32>, <4 x i32>, i32) | |
748 | |
749 define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3) { | |
750 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_si: | |
751 ; CHECK: ## BB#0: | |
752 ; CHECK-NEXT: kmovb %esi, %k1 | |
753 ; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,0) {%k1} | |
754 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
755 ; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1} | |
756 ; CHECK-NEXT: retq | |
757 call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 0) | |
758 call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i32> %x3, i32 4) | |
759 ret void | |
760 } | |
761 | |
762 declare void @llvm.x86.avx512.scattersiv8.sf(i8*, i8, <8 x i32>, <8 x float>, i32) | |
763 | |
764 define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3) { | |
765 ; CHECK-LABEL: test_int_x86_avx512_scattersiv8_sf: | |
766 ; CHECK: ## BB#0: | |
767 ; CHECK-NEXT: kmovb %esi, %k1 | |
768 ; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,0) {%k1} | |
769 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
770 ; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1} | |
771 ; CHECK-NEXT: retq | |
772 call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 0) | |
773 call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 -1, <8 x i32> %x2, <8 x float> %x3, i32 4) | |
774 ret void | |
775 } | |
776 | |
777 declare void @llvm.x86.avx512.scattersiv8.si(i8*, i8, <8 x i32>, <8 x i32>, i32) | |
778 | |
779 define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3) { | |
780 ; CHECK-LABEL: test_int_x86_avx512_scattersiv8_si: | |
781 ; CHECK: ## BB#0: | |
782 ; CHECK-NEXT: kmovb %esi, %k1 | |
783 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,0) {%k1} | |
784 ; CHECK-NEXT: kxnorw %k1, %k1, %k1 | |
785 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1} | |
786 ; CHECK-NEXT: retq | |
787 call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 0) | |
788 call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 4) | |
789 ret void | |
790 } | |
791 |