Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/X86/avx512-gather-scatter-intrin.ll @ 77:54457678186b LLVM3.6
LLVM 3.6
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 08 Sep 2014 22:06:00 +0900 |
parents | 95c75e76d11b |
children | afa8332a0e37 |
comparison
equal
deleted
inserted
replaced
34:e874dbf0ad9d | 77:54457678186b |
---|---|
1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s | 1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s |
2 | 2 |
3 declare <16 x float> @llvm.x86.avx512.gather.dps.mask.512 (<16 x float>, i16, <16 x i32>, i8*, i32) | 3 declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32) |
4 declare void @llvm.x86.avx512.scatter.dps.mask.512 (i8*, i16, <16 x i32>, <16 x float>, i32) | 4 declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32) |
5 declare <8 x double> @llvm.x86.avx512.gather.dpd.mask.512 (<8 x double>, i8, <8 x i32>, i8*, i32) | 5 declare <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double>, i8*, <8 x i32>, i8, i32) |
6 declare void @llvm.x86.avx512.scatter.dpd.mask.512 (i8*, i8, <8 x i32>, <8 x double>, i32) | 6 declare void @llvm.x86.avx512.scatter.dpd.512 (i8*, i8, <8 x i32>, <8 x double>, i32) |
7 | 7 |
8 declare <8 x float> @llvm.x86.avx512.gather.qps.mask.512 (<8 x float>, i8, <8 x i64>, i8*, i32) | 8 declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float>, i8*, <8 x i64>, i8, i32) |
9 declare void @llvm.x86.avx512.scatter.qps.mask.512 (i8*, i8, <8 x i64>, <8 x float>, i32) | 9 declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>, i32) |
10 declare <8 x double> @llvm.x86.avx512.gather.qpd.mask.512 (<8 x double>, i8, <8 x i64>, i8*, i32) | 10 declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, i8*, <8 x i64>, i8, i32) |
11 declare void @llvm.x86.avx512.scatter.qpd.mask.512 (i8*, i8, <8 x i64>, <8 x double>, i32) | 11 declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32) |
12 | 12 |
13 ;CHECK-LABEL: gather_mask_dps | 13 ;CHECK-LABEL: gather_mask_dps |
14 ;CHECK: kmovw | 14 ;CHECK: kmovw |
15 ;CHECK: vgatherdps | 15 ;CHECK: vgatherdps |
16 ;CHECK: vpadd | 16 ;CHECK: vpadd |
17 ;CHECK: vscatterdps | 17 ;CHECK: vscatterdps |
18 ;CHECK: ret | 18 ;CHECK: ret |
19 define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf) { | 19 define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf) { |
20 %x = call <16 x float> @llvm.x86.avx512.gather.dps.mask.512 (<16 x float> %src, i16 %mask, <16 x i32>%ind, i8* %base, i32 4) | 20 %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4) |
21 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> | 21 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> |
22 call void @llvm.x86.avx512.scatter.dps.mask.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4) | 22 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4) |
23 ret void | 23 ret void |
24 } | 24 } |
25 | 25 |
26 ;CHECK-LABEL: gather_mask_dpd | 26 ;CHECK-LABEL: gather_mask_dpd |
27 ;CHECK: kmovw | 27 ;CHECK: kmovw |
28 ;CHECK: vgatherdpd | 28 ;CHECK: vgatherdpd |
29 ;CHECK: vpadd | 29 ;CHECK: vpadd |
30 ;CHECK: vscatterdpd | 30 ;CHECK: vscatterdpd |
31 ;CHECK: ret | 31 ;CHECK: ret |
32 define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) { | 32 define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) { |
33 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.mask.512 (<8 x double> %src, i8 %mask, <8 x i32>%ind, i8* %base, i32 4) | 33 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4) |
34 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> | 34 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> |
35 call void @llvm.x86.avx512.scatter.dpd.mask.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4) | 35 call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4) |
36 ret void | 36 ret void |
37 } | 37 } |
38 | 38 |
39 ;CHECK-LABEL: gather_mask_qps | 39 ;CHECK-LABEL: gather_mask_qps |
40 ;CHECK: kmovw | 40 ;CHECK: kmovw |
41 ;CHECK: vgatherqps | 41 ;CHECK: vgatherqps |
42 ;CHECK: vpadd | 42 ;CHECK: vpadd |
43 ;CHECK: vscatterqps | 43 ;CHECK: vscatterqps |
44 ;CHECK: ret | 44 ;CHECK: ret |
45 define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf) { | 45 define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf) { |
46 %x = call <8 x float> @llvm.x86.avx512.gather.qps.mask.512 (<8 x float> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4) | 46 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) |
47 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> | 47 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> |
48 call void @llvm.x86.avx512.scatter.qps.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4) | 48 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4) |
49 ret void | 49 ret void |
50 } | 50 } |
51 | 51 |
52 ;CHECK-LABEL: gather_mask_qpd | 52 ;CHECK-LABEL: gather_mask_qpd |
53 ;CHECK: kmovw | 53 ;CHECK: kmovw |
54 ;CHECK: vgatherqpd | 54 ;CHECK: vgatherqpd |
55 ;CHECK: vpadd | 55 ;CHECK: vpadd |
56 ;CHECK: vscatterqpd | 56 ;CHECK: vscatterqpd |
57 ;CHECK: ret | 57 ;CHECK: ret |
58 define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) { | 58 define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) { |
59 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.mask.512 (<8 x double> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4) | 59 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) |
60 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> | 60 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> |
61 call void @llvm.x86.avx512.scatter.qpd.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4) | 61 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4) |
62 ret void | 62 ret void |
63 } | 63 } |
64 ;; | 64 ;; |
65 ;; Integer Gather/Scatter | 65 ;; Integer Gather/Scatter |
66 ;; | 66 ;; |
67 declare <16 x i32> @llvm.x86.avx512.gather.dpi.mask.512 (<16 x i32>, i16, <16 x i32>, i8*, i32) | 67 declare <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>, i8*, <16 x i32>, i16, i32) |
68 declare void @llvm.x86.avx512.scatter.dpi.mask.512 (i8*, i16, <16 x i32>, <16 x i32>, i32) | 68 declare void @llvm.x86.avx512.scatter.dpi.512 (i8*, i16, <16 x i32>, <16 x i32>, i32) |
69 declare <8 x i64> @llvm.x86.avx512.gather.dpq.mask.512 (<8 x i64>, i8, <8 x i32>, i8*, i32) | 69 declare <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64>, i8*, <8 x i32>, i8, i32) |
70 declare void @llvm.x86.avx512.scatter.dpq.mask.512 (i8*, i8, <8 x i32>, <8 x i64>, i32) | 70 declare void @llvm.x86.avx512.scatter.dpq.512 (i8*, i8, <8 x i32>, <8 x i64>, i32) |
71 | 71 |
72 declare <8 x i32> @llvm.x86.avx512.gather.qpi.mask.512 (<8 x i32>, i8, <8 x i64>, i8*, i32) | 72 declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32>, i8*, <8 x i64>, i8, i32) |
73 declare void @llvm.x86.avx512.scatter.qpi.mask.512 (i8*, i8, <8 x i64>, <8 x i32>, i32) | 73 declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i32) |
74 declare <8 x i64> @llvm.x86.avx512.gather.qpq.mask.512 (<8 x i64>, i8, <8 x i64>, i8*, i32) | 74 declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, <8 x i64>, i8, i32) |
75 declare void @llvm.x86.avx512.scatter.qpq.mask.512 (i8*, i8, <8 x i64>, <8 x i64>, i32) | 75 declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32) |
76 | 76 |
77 ;CHECK-LABEL: gather_mask_dd | 77 ;CHECK-LABEL: gather_mask_dd |
78 ;CHECK: kmovw | 78 ;CHECK: kmovw |
79 ;CHECK: vpgatherdd | 79 ;CHECK: vpgatherdd |
80 ;CHECK: vpadd | 80 ;CHECK: vpadd |
81 ;CHECK: vpscatterdd | 81 ;CHECK: vpscatterdd |
82 ;CHECK: ret | 82 ;CHECK: ret |
83 define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf) { | 83 define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf) { |
84 %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.mask.512 (<16 x i32> %src, i16 %mask, <16 x i32>%ind, i8* %base, i32 4) | 84 %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4) |
85 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> | 85 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> |
86 call void @llvm.x86.avx512.scatter.dpi.mask.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4) | 86 call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4) |
87 ret void | 87 ret void |
88 } | 88 } |
89 | 89 |
90 ;CHECK-LABEL: gather_mask_qd | 90 ;CHECK-LABEL: gather_mask_qd |
91 ;CHECK: kmovw | 91 ;CHECK: kmovw |
92 ;CHECK: vpgatherqd | 92 ;CHECK: vpgatherqd |
93 ;CHECK: vpadd | 93 ;CHECK: vpadd |
94 ;CHECK: vpscatterqd | 94 ;CHECK: vpscatterqd |
95 ;CHECK: ret | 95 ;CHECK: ret |
96 define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf) { | 96 define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf) { |
97 %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.mask.512 (<8 x i32> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4) | 97 %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) |
98 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> | 98 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> |
99 call void @llvm.x86.avx512.scatter.qpi.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4) | 99 call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4) |
100 ret void | 100 ret void |
101 } | 101 } |
102 | 102 |
103 ;CHECK-LABEL: gather_mask_qq | 103 ;CHECK-LABEL: gather_mask_qq |
104 ;CHECK: kmovw | 104 ;CHECK: kmovw |
105 ;CHECK: vpgatherqq | 105 ;CHECK: vpgatherqq |
106 ;CHECK: vpadd | 106 ;CHECK: vpadd |
107 ;CHECK: vpscatterqq | 107 ;CHECK: vpscatterqq |
108 ;CHECK: ret | 108 ;CHECK: ret |
109 define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) { | 109 define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) { |
110 %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.mask.512 (<8 x i64> %src, i8 %mask, <8 x i64>%ind, i8* %base, i32 4) | 110 %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) |
111 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> | 111 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> |
112 call void @llvm.x86.avx512.scatter.qpq.mask.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4) | 112 call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4) |
113 ret void | 113 ret void |
114 } | 114 } |
115 | 115 |
116 ;CHECK-LABEL: gather_mask_dq | 116 ;CHECK-LABEL: gather_mask_dq |
117 ;CHECK: kmovw | 117 ;CHECK: kmovw |
118 ;CHECK: vpgatherdq | 118 ;CHECK: vpgatherdq |
119 ;CHECK: vpadd | 119 ;CHECK: vpadd |
120 ;CHECK: vpscatterdq | 120 ;CHECK: vpscatterdq |
121 ;CHECK: ret | 121 ;CHECK: ret |
122 define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) { | 122 define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) { |
123 %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.mask.512 (<8 x i64> %src, i8 %mask, <8 x i32>%ind, i8* %base, i32 4) | 123 %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4) |
124 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> | 124 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> |
125 call void @llvm.x86.avx512.scatter.dpq.mask.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4) | 125 call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4) |
126 ret void | 126 ret void |
127 } | 127 } |
128 | 128 |
129 ;; FP Intinsics without masks | 129 |
130 | 130 ;CHECK-LABEL: gather_mask_dpd_execdomain |
131 declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x i32>, i8*, i32) | 131 ;CHECK: vgatherdpd |
132 declare void @llvm.x86.avx512.scatter.dps.512 (i8*, <16 x i32>, <16 x float>, i32) | 132 ;CHECK: vmovapd |
133 declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x i64>, i8*, i32) | 133 ;CHECK: ret |
134 declare void @llvm.x86.avx512.scatter.qps.512 (i8*, <8 x i64>, <8 x float>, i32) | 134 define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) { |
135 declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x i64>, i8*, i32) | 135 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4) |
136 declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, <8 x i64>, <8 x double>, i32) | 136 store <8 x double> %x, <8 x double>* %stbuf |
137 | 137 ret void |
138 ;CHECK-LABEL: gather_dps | 138 } |
139 ;CHECK: kxnorw | 139 |
140 ;CHECK-LABEL: gather_mask_qpd_execdomain | |
141 ;CHECK: vgatherqpd | |
142 ;CHECK: vmovapd | |
143 ;CHECK: ret | |
144 define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) { | |
145 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) | |
146 store <8 x double> %x, <8 x double>* %stbuf | |
147 ret void | |
148 } | |
149 | |
150 ;CHECK-LABEL: gather_mask_dps_execdomain | |
140 ;CHECK: vgatherdps | 151 ;CHECK: vgatherdps |
152 ;CHECK: vmovaps | |
153 ;CHECK: ret | |
154 define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base) { | |
155 %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4) | |
156 ret <16 x float> %res; | |
157 } | |
158 | |
159 ;CHECK-LABEL: gather_mask_qps_execdomain | |
160 ;CHECK: vgatherqps | |
161 ;CHECK: vmovaps | |
162 ;CHECK: ret | |
163 define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base) { | |
164 %res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) | |
165 ret <8 x float> %res; | |
166 } | |
167 | |
168 ;CHECK-LABEL: scatter_mask_dpd_execdomain | |
169 ;CHECK: vmovapd | |
170 ;CHECK: vscatterdpd | |
171 ;CHECK: ret | |
172 define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) { | |
173 %x = load <8 x double>* %src, align 64 | |
174 call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4) | |
175 ret void | |
176 } | |
177 | |
178 ;CHECK-LABEL: scatter_mask_qpd_execdomain | |
179 ;CHECK: vmovapd | |
180 ;CHECK: vscatterqpd | |
181 ;CHECK: ret | |
182 define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) { | |
183 %x = load <8 x double>* %src, align 64 | |
184 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4) | |
185 ret void | |
186 } | |
187 | |
188 ;CHECK-LABEL: scatter_mask_dps_execdomain | |
189 ;CHECK: vmovaps | |
141 ;CHECK: vscatterdps | 190 ;CHECK: vscatterdps |
142 ;CHECK: ret | 191 ;CHECK: ret |
143 define void @gather_dps(<16 x i32> %ind, i8* %base, i8* %stbuf) { | 192 define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf) { |
144 %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x i32>%ind, i8* %base, i32 4) | 193 %x = load <16 x float>* %src, align 64 |
145 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> | 194 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4) |
146 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, <16 x i32>%ind2, <16 x float> %x, i32 4) | 195 ret void |
196 } | |
197 | |
198 ;CHECK-LABEL: scatter_mask_qps_execdomain | |
199 ;CHECK: vmovaps | |
200 ;CHECK: vscatterqps | |
201 ;CHECK: ret | |
202 define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf) { | |
203 %x = load <8 x float>* %src, align 32 | |
204 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4) | |
147 ret void | 205 ret void |
148 } | 206 } |
149 | 207 |
150 ;CHECK-LABEL: gather_qps | 208 ;CHECK-LABEL: gather_qps |
151 ;CHECK: kxnorw | 209 ;CHECK: kxnorw |
152 ;CHECK: vgatherqps | 210 ;CHECK: vgatherqps |
211 ;CHECK: vpadd | |
153 ;CHECK: vscatterqps | 212 ;CHECK: vscatterqps |
154 ;CHECK: ret | 213 ;CHECK: ret |
155 define void @gather_qps(<8 x i64> %ind, i8* %base, i8* %stbuf) { | 214 define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf) { |
156 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x i64>%ind, i8* %base, i32 4) | 215 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 -1, i32 4) |
157 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> | 216 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> |
158 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, <8 x i64>%ind2, <8 x float> %x, i32 4) | 217 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4) |
159 ret void | 218 ret void |
160 } | 219 } |
161 | 220 |
162 ;CHECK-LABEL: gather_qpd | 221 ;CHECK-LABEL: prefetch |
163 ;CHECK: kxnorw | 222 ;CHECK: gatherpf0 |
164 ;CHECK: vgatherqpd | 223 ;CHECK: gatherpf1 |
165 ;CHECK: vpadd | 224 ;CHECK: scatterpf0 |
166 ;CHECK: vscatterqpd | 225 ;CHECK: scatterpf1 |
167 ;CHECK: ret | 226 ;CHECK: ret |
168 define void @gather_qpd(<8 x i64> %ind, i8* %base, i8* %stbuf) { | 227 declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8* , i32, i32); |
169 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x i64>%ind, i8* %base, i32 4) | 228 declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32); |
170 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> | 229 define void @prefetch(<8 x i64> %ind, i8* %base) { |
171 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, <8 x i64>%ind2, <8 x double> %x, i32 4) | 230 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0) |
172 ret void | 231 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1) |
173 } | 232 call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 0) |
174 | 233 call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 1) |
175 ;; Integer Intinsics without masks | 234 ret void |
176 | 235 } |
177 declare <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>, i8*, i32) | |
178 declare void @llvm.x86.avx512.scatter.dpi.512 (i8*, <16 x i32>, <16 x i32>, i32) | |
179 declare <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i32>, i8*, i32) | |
180 declare void @llvm.x86.avx512.scatter.dpq.512 (i8*, <8 x i32>, <8 x i64>, i32) | |
181 | |
182 declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i64>, i8*, i32) | |
183 declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, <8 x i64>, <8 x i32>, i32) | |
184 declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, i32) | |
185 declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, <8 x i64>, <8 x i64>, i32) | |
186 | |
187 ;CHECK-LABEL: gather_dpi | |
188 ;CHECK: kxnorw | |
189 ;CHECK: vpgatherdd | |
190 ;CHECK: vpscatterdd | |
191 ;CHECK: ret | |
192 define void @gather_dpi(<16 x i32> %ind, i8* %base, i8* %stbuf) { | |
193 %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>%ind, i8* %base, i32 4) | |
194 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> | |
195 call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, <16 x i32>%ind2, <16 x i32> %x, i32 4) | |
196 ret void | |
197 } | |
198 | |
199 ;CHECK-LABEL: gather_qpq | |
200 ;CHECK: vpxord %zmm | |
201 ;CHECK: kxnorw | |
202 ;CHECK: vpgatherqq | |
203 ;CHECK: vpadd | |
204 ;CHECK: vpscatterqq | |
205 ;CHECK: ret | |
206 define void @gather_qpq(<8 x i64> %ind, i8* %base, i8* %stbuf) { | |
207 %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>%ind, i8* %base, i32 4) | |
208 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> | |
209 call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, <8 x i64>%ind2, <8 x i64> %x, i32 4) | |
210 ret void | |
211 } | |
212 | |
213 ;CHECK-LABEL: gather_qpi | |
214 ;CHECK: vpxor %ymm | |
215 ;CHECK: kxnorw | |
216 ;CHECK: vpgatherqd | |
217 ;CHECK: vpadd | |
218 ;CHECK: vpscatterqd | |
219 ;CHECK: ret | |
220 define void @gather_qpi(<8 x i64> %ind, i8* %base, i8* %stbuf) { | |
221 %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i64>%ind, i8* %base, i32 4) | |
222 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> | |
223 call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, <8 x i64>%ind2, <8 x i32> %x, i32 4) | |
224 ret void | |
225 } |