comparison test/CodeGen/X86/avx512-gather-scatter-intrin.ll @ 77:54457678186b LLVM3.6

LLVM 3.6
author Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
date Mon, 08 Sep 2014 22:06:00 +0900
parents 95c75e76d11b
children afa8332a0e37
comparison
equal deleted inserted replaced
34:e874dbf0ad9d 77:54457678186b
1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s 1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
2 2
3 declare <16 x float> @llvm.x86.avx512.gather.dps.mask.512 (<16 x float>, i16, <16 x i32>, i8*, i32) 3 declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32)
4 declare void @llvm.x86.avx512.scatter.dps.mask.512 (i8*, i16, <16 x i32>, <16 x float>, i32) 4 declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32)
5 declare <8 x double> @llvm.x86.avx512.gather.dpd.mask.512 (<8 x double>, i8, <8 x i32>, i8*, i32) 5 declare <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double>, i8*, <8 x i32>, i8, i32)
6 declare void @llvm.x86.avx512.scatter.dpd.mask.512 (i8*, i8, <8 x i32>, <8 x double>, i32) 6 declare void @llvm.x86.avx512.scatter.dpd.512 (i8*, i8, <8 x i32>, <8 x double>, i32)
7 7
8 declare <8 x float> @llvm.x86.avx512.gather.qps.mask.512 (<8 x float>, i8, <8 x i64>, i8*, i32) 8 declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float>, i8*, <8 x i64>, i8, i32)
9 declare void @llvm.x86.avx512.scatter.qps.mask.512 (i8*, i8, <8 x i64>, <8 x float>, i32) 9 declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>, i32)
10 declare <8 x double> @llvm.x86.avx512.gather.qpd.mask.512 (<8 x double>, i8, <8 x i64>, i8*, i32) 10 declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, i8*, <8 x i64>, i8, i32)
11 declare void @llvm.x86.avx512.scatter.qpd.mask.512 (i8*, i8, <8 x i64>, <8 x double>, i32) 11 declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32)
12 12
13 ;CHECK-LABEL: gather_mask_dps 13 ;CHECK-LABEL: gather_mask_dps
14 ;CHECK: kmovw 14 ;CHECK: kmovw
15 ;CHECK: vgatherdps 15 ;CHECK: vgatherdps
16 ;CHECK: vpadd 16 ;CHECK: vpadd
17 ;CHECK: vscatterdps 17 ;CHECK: vscatterdps
18 ;CHECK: ret 18 ;CHECK: ret
19 define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf) { 19 define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf) {
20 %x = call <16 x float> @llvm.x86.avx512.gather.dps.mask.512 (<16 x float> %src, i16 %mask, <16 x i32>%ind, i8* %base, i32 4) 20 %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
21 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 21 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
22 call void @llvm.x86.avx512.scatter.dps.mask.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4) 22 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4)
23 ret void 23 ret void
24 } 24 }
25 25
26 ;CHECK-LABEL: gather_mask_dpd 26 ;CHECK-LABEL: gather_mask_dpd
27 ;CHECK: kmovw 27 ;CHECK: kmovw
28 ;CHECK: vgatherdpd 28 ;CHECK: vgatherdpd
29 ;CHECK: vpadd 29 ;CHECK: vpadd
30 ;CHECK: vscatterdpd 30 ;CHECK: vscatterdpd
31 ;CHECK: ret 31 ;CHECK: ret
32 define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) { 32 define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) {
33 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.mask.512 (<8 x double> %src, i8 %mask, <8 x i32>%ind, i8* %base, i32 4) 33 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
34 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 34 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
35 call void @llvm.x86.avx512.scatter.dpd.mask.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4) 35 call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4)
36 ret void 36 ret void
37 } 37 }
38 38
39 ;CHECK-LABEL: gather_mask_qps 39 ;CHECK-LABEL: gather_mask_qps
40 ;CHECK: kmovw 40 ;CHECK: kmovw
41 ;CHECK: vgatherqps 41 ;CHECK: vgatherqps
42 ;CHECK: vpadd 42 ;CHECK: vpadd
43 ;CHECK: vscatterqps 43 ;CHECK: vscatterqps
44 ;CHECK: ret 44 ;CHECK: ret
45 define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf) { 45 define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf) {
46 %x = call <8 x float> @llvm.x86.avx512.gather.qps.mask.512 (<8 x float> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4) 46 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
47 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 47 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
48 call void @llvm.x86.avx512.scatter.qps.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4) 48 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4)
49 ret void 49 ret void
50 } 50 }
51 51
52 ;CHECK-LABEL: gather_mask_qpd 52 ;CHECK-LABEL: gather_mask_qpd
53 ;CHECK: kmovw 53 ;CHECK: kmovw
54 ;CHECK: vgatherqpd 54 ;CHECK: vgatherqpd
55 ;CHECK: vpadd 55 ;CHECK: vpadd
56 ;CHECK: vscatterqpd 56 ;CHECK: vscatterqpd
57 ;CHECK: ret 57 ;CHECK: ret
58 define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) { 58 define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) {
59 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.mask.512 (<8 x double> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4) 59 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
60 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 60 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
61 call void @llvm.x86.avx512.scatter.qpd.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4) 61 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4)
62 ret void 62 ret void
63 } 63 }
64 ;; 64 ;;
65 ;; Integer Gather/Scatter 65 ;; Integer Gather/Scatter
66 ;; 66 ;;
67 declare <16 x i32> @llvm.x86.avx512.gather.dpi.mask.512 (<16 x i32>, i16, <16 x i32>, i8*, i32) 67 declare <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>, i8*, <16 x i32>, i16, i32)
68 declare void @llvm.x86.avx512.scatter.dpi.mask.512 (i8*, i16, <16 x i32>, <16 x i32>, i32) 68 declare void @llvm.x86.avx512.scatter.dpi.512 (i8*, i16, <16 x i32>, <16 x i32>, i32)
69 declare <8 x i64> @llvm.x86.avx512.gather.dpq.mask.512 (<8 x i64>, i8, <8 x i32>, i8*, i32) 69 declare <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64>, i8*, <8 x i32>, i8, i32)
70 declare void @llvm.x86.avx512.scatter.dpq.mask.512 (i8*, i8, <8 x i32>, <8 x i64>, i32) 70 declare void @llvm.x86.avx512.scatter.dpq.512 (i8*, i8, <8 x i32>, <8 x i64>, i32)
71 71
72 declare <8 x i32> @llvm.x86.avx512.gather.qpi.mask.512 (<8 x i32>, i8, <8 x i64>, i8*, i32) 72 declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32>, i8*, <8 x i64>, i8, i32)
73 declare void @llvm.x86.avx512.scatter.qpi.mask.512 (i8*, i8, <8 x i64>, <8 x i32>, i32) 73 declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i32)
74 declare <8 x i64> @llvm.x86.avx512.gather.qpq.mask.512 (<8 x i64>, i8, <8 x i64>, i8*, i32) 74 declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, <8 x i64>, i8, i32)
75 declare void @llvm.x86.avx512.scatter.qpq.mask.512 (i8*, i8, <8 x i64>, <8 x i64>, i32) 75 declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32)
76 76
77 ;CHECK-LABEL: gather_mask_dd 77 ;CHECK-LABEL: gather_mask_dd
78 ;CHECK: kmovw 78 ;CHECK: kmovw
79 ;CHECK: vpgatherdd 79 ;CHECK: vpgatherdd
80 ;CHECK: vpadd 80 ;CHECK: vpadd
81 ;CHECK: vpscatterdd 81 ;CHECK: vpscatterdd
82 ;CHECK: ret 82 ;CHECK: ret
83 define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf) { 83 define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf) {
84 %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.mask.512 (<16 x i32> %src, i16 %mask, <16 x i32>%ind, i8* %base, i32 4) 84 %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
85 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 85 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
86 call void @llvm.x86.avx512.scatter.dpi.mask.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4) 86 call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4)
87 ret void 87 ret void
88 } 88 }
89 89
90 ;CHECK-LABEL: gather_mask_qd 90 ;CHECK-LABEL: gather_mask_qd
91 ;CHECK: kmovw 91 ;CHECK: kmovw
92 ;CHECK: vpgatherqd 92 ;CHECK: vpgatherqd
93 ;CHECK: vpadd 93 ;CHECK: vpadd
94 ;CHECK: vpscatterqd 94 ;CHECK: vpscatterqd
95 ;CHECK: ret 95 ;CHECK: ret
96 define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf) { 96 define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf) {
97 %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.mask.512 (<8 x i32> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4) 97 %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
98 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 98 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
99 call void @llvm.x86.avx512.scatter.qpi.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4) 99 call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4)
100 ret void 100 ret void
101 } 101 }
102 102
103 ;CHECK-LABEL: gather_mask_qq 103 ;CHECK-LABEL: gather_mask_qq
104 ;CHECK: kmovw 104 ;CHECK: kmovw
105 ;CHECK: vpgatherqq 105 ;CHECK: vpgatherqq
106 ;CHECK: vpadd 106 ;CHECK: vpadd
107 ;CHECK: vpscatterqq 107 ;CHECK: vpscatterqq
108 ;CHECK: ret 108 ;CHECK: ret
109 define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) { 109 define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) {
110 %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.mask.512 (<8 x i64> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4) 110 %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
111 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 111 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
112 call void @llvm.x86.avx512.scatter.qpq.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4) 112 call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4)
113 ret void 113 ret void
114 } 114 }
115 115
116 ;CHECK-LABEL: gather_mask_dq 116 ;CHECK-LABEL: gather_mask_dq
117 ;CHECK: kmovw 117 ;CHECK: kmovw
118 ;CHECK: vpgatherdq 118 ;CHECK: vpgatherdq
119 ;CHECK: vpadd 119 ;CHECK: vpadd
120 ;CHECK: vpscatterdq 120 ;CHECK: vpscatterdq
121 ;CHECK: ret 121 ;CHECK: ret
122 define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) { 122 define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) {
123 %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.mask.512 (<8 x i64> %src, i8 %mask, <8 x i32>%ind, i8* %base, i32 4) 123 %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
124 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 124 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
125 call void @llvm.x86.avx512.scatter.dpq.mask.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4) 125 call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4)
126 ret void 126 ret void
127 } 127 }
128 128
129 ;; FP Intinsics without masks 129
130 130 ;CHECK-LABEL: gather_mask_dpd_execdomain
131 declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x i32>, i8*, i32) 131 ;CHECK: vgatherdpd
132 declare void @llvm.x86.avx512.scatter.dps.512 (i8*, <16 x i32>, <16 x float>, i32) 132 ;CHECK: vmovapd
133 declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x i64>, i8*, i32) 133 ;CHECK: ret
134 declare void @llvm.x86.avx512.scatter.qps.512 (i8*, <8 x i64>, <8 x float>, i32) 134 define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) {
135 declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x i64>, i8*, i32) 135 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
136 declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, <8 x i64>, <8 x double>, i32) 136 store <8 x double> %x, <8 x double>* %stbuf
137 137 ret void
138 ;CHECK-LABEL: gather_dps 138 }
139 ;CHECK: kxnorw 139
140 ;CHECK-LABEL: gather_mask_qpd_execdomain
141 ;CHECK: vgatherqpd
142 ;CHECK: vmovapd
143 ;CHECK: ret
144 define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) {
145 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
146 store <8 x double> %x, <8 x double>* %stbuf
147 ret void
148 }
149
150 ;CHECK-LABEL: gather_mask_dps_execdomain
140 ;CHECK: vgatherdps 151 ;CHECK: vgatherdps
152 ;CHECK: vmovaps
153 ;CHECK: ret
154 define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base) {
155 %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
156 ret <16 x float> %res;
157 }
158
159 ;CHECK-LABEL: gather_mask_qps_execdomain
160 ;CHECK: vgatherqps
161 ;CHECK: vmovaps
162 ;CHECK: ret
163 define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base) {
164 %res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
165 ret <8 x float> %res;
166 }
167
168 ;CHECK-LABEL: scatter_mask_dpd_execdomain
169 ;CHECK: vmovapd
170 ;CHECK: vscatterdpd
171 ;CHECK: ret
172 define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) {
173 %x = load <8 x double>* %src, align 64
174 call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4)
175 ret void
176 }
177
178 ;CHECK-LABEL: scatter_mask_qpd_execdomain
179 ;CHECK: vmovapd
180 ;CHECK: vscatterqpd
181 ;CHECK: ret
182 define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) {
183 %x = load <8 x double>* %src, align 64
184 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4)
185 ret void
186 }
187
188 ;CHECK-LABEL: scatter_mask_dps_execdomain
189 ;CHECK: vmovaps
141 ;CHECK: vscatterdps 190 ;CHECK: vscatterdps
142 ;CHECK: ret 191 ;CHECK: ret
143 define void @gather_dps(<16 x i32> %ind, i8* %base, i8* %stbuf) { 192 define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf) {
144 %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x i32>%ind, i8* %base, i32 4) 193 %x = load <16 x float>* %src, align 64
145 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 194 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4)
146 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, <16 x i32>%ind2, <16 x float> %x, i32 4) 195 ret void
196 }
197
198 ;CHECK-LABEL: scatter_mask_qps_execdomain
199 ;CHECK: vmovaps
200 ;CHECK: vscatterqps
201 ;CHECK: ret
202 define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf) {
203 %x = load <8 x float>* %src, align 32
204 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4)
147 ret void 205 ret void
148 } 206 }
149 207
150 ;CHECK-LABEL: gather_qps 208 ;CHECK-LABEL: gather_qps
151 ;CHECK: kxnorw 209 ;CHECK: kxnorw
152 ;CHECK: vgatherqps 210 ;CHECK: vgatherqps
211 ;CHECK: vpadd
153 ;CHECK: vscatterqps 212 ;CHECK: vscatterqps
154 ;CHECK: ret 213 ;CHECK: ret
155 define void @gather_qps(<8 x i64> %ind, i8* %base, i8* %stbuf) { 214 define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf) {
156 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x i64>%ind, i8* %base, i32 4) 215 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 -1, i32 4)
157 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 216 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
158 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, <8 x i64>%ind2, <8 x float> %x, i32 4) 217 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4)
159 ret void 218 ret void
160 } 219 }
161 220
162 ;CHECK-LABEL: gather_qpd 221 ;CHECK-LABEL: prefetch
163 ;CHECK: kxnorw 222 ;CHECK: gatherpf0
164 ;CHECK: vgatherqpd 223 ;CHECK: gatherpf1
165 ;CHECK: vpadd 224 ;CHECK: scatterpf0
166 ;CHECK: vscatterqpd 225 ;CHECK: scatterpf1
167 ;CHECK: ret 226 ;CHECK: ret
168 define void @gather_qpd(<8 x i64> %ind, i8* %base, i8* %stbuf) { 227 declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
169 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x i64>%ind, i8* %base, i32 4) 228 declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
170 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 229 define void @prefetch(<8 x i64> %ind, i8* %base) {
171 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, <8 x i64>%ind2, <8 x double> %x, i32 4) 230 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0)
172 ret void 231 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1)
173 } 232 call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 0)
174 233 call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 1)
175 ;; Integer Intinsics without masks 234 ret void
176 235 }
177 declare <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>, i8*, i32)
178 declare void @llvm.x86.avx512.scatter.dpi.512 (i8*, <16 x i32>, <16 x i32>, i32)
179 declare <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i32>, i8*, i32)
180 declare void @llvm.x86.avx512.scatter.dpq.512 (i8*, <8 x i32>, <8 x i64>, i32)
181
182 declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i64>, i8*, i32)
183 declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, <8 x i64>, <8 x i32>, i32)
184 declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, i32)
185 declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, <8 x i64>, <8 x i64>, i32)
186
187 ;CHECK-LABEL: gather_dpi
188 ;CHECK: kxnorw
189 ;CHECK: vpgatherdd
190 ;CHECK: vpscatterdd
191 ;CHECK: ret
192 define void @gather_dpi(<16 x i32> %ind, i8* %base, i8* %stbuf) {
193 %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>%ind, i8* %base, i32 4)
194 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
195 call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, <16 x i32>%ind2, <16 x i32> %x, i32 4)
196 ret void
197 }
198
199 ;CHECK-LABEL: gather_qpq
200 ;CHECK: vpxord %zmm
201 ;CHECK: kxnorw
202 ;CHECK: vpgatherqq
203 ;CHECK: vpadd
204 ;CHECK: vpscatterqq
205 ;CHECK: ret
206 define void @gather_qpq(<8 x i64> %ind, i8* %base, i8* %stbuf) {
207 %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>%ind, i8* %base, i32 4)
208 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
209 call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, <8 x i64>%ind2, <8 x i64> %x, i32 4)
210 ret void
211 }
212
213 ;CHECK-LABEL: gather_qpi
214 ;CHECK: vpxor %ymm
215 ;CHECK: kxnorw
216 ;CHECK: vpgatherqd
217 ;CHECK: vpadd
218 ;CHECK: vpscatterqd
219 ;CHECK: ret
220 define void @gather_qpi(<8 x i64> %ind, i8* %base, i8* %stbuf) {
221 %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i64>%ind, i8* %base, i32 4)
222 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
223 call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, <8 x i64>%ind2, <8 x i32> %x, i32 4)
224 ret void
225 }