236
|
1 // RUN: mlir-opt %s -test-vector-to-vector-lowering="unroll" | FileCheck %s
|
173
|
2
|
236
|
3 // CHECK-DAG: #[[MAP1:map[0-9]*]] = affine_map<(d0, d1, d2) -> (d1, d2)>
|
173
|
4
|
|
5 // CHECK-LABEL: func @add4x2
|
223
|
6 // CHECK: %[[S1:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf32> to vector<2x2xf32>
|
|
7 // CHECK-NEXT: %[[S2:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf32> to vector<2x2xf32>
|
236
|
8 // CHECK-NEXT: %[[A1:.*]] = arith.addf %[[S1]], %[[S2]] : vector<2x2xf32>
|
223
|
9 // CHECK-NEXT: %[[VEC0:.*]] = vector.insert_strided_slice %[[A1]], %{{.*}} {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x2xf32>
|
|
10 // CHECK-NEXT: %[[S3:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf32> to vector<2x2xf32>
|
|
11 // CHECK-NEXT: %[[S4:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf32> to vector<2x2xf32>
|
236
|
12 // CHECK-NEXT: %[[A2:.*]] = arith.addf %[[S3]], %[[S4]] : vector<2x2xf32>
|
223
|
13 // CHECK-NEXT: %[[VEC1:.*]] = vector.insert_strided_slice %[[A2]], %[[VEC0]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x2xf32>
|
|
14 // CHECK-NEXT: return %[[VEC1:.*]] : vector<4x2xf32>
|
173
|
15
|
236
|
16 func.func @add4x2(%0: vector<4x2xf32>) -> vector<4x2xf32> {
|
|
17 %1 = arith.addf %0, %0: vector<4x2xf32>
|
173
|
18 return %1: vector<4x2xf32>
|
|
19 }
|
|
20
|
|
21 // CHECK-LABEL: func @add4x4
|
223
|
22 // CHECK: %[[S1:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
23 // CHECK-NEXT: %[[S2:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
173
|
24
|
236
|
25 // CHECK-NEXT: %[[A1:.*]] = arith.addf %[[S1]], %[[S2]] : vector<2x2xf32>
|
223
|
26
|
|
27 // CHECK-NEXT: %[[S3:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
28 // CHECK-NEXT: %[[S4:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
173
|
29
|
236
|
30 // CHECK-NEXT: %[[A2:.*]] = arith.addf %[[S3]], %[[S4]] : vector<2x2xf32>
|
173
|
31
|
223
|
32 // CHECK-NEXT: %[[S5:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
33 // CHECK-NEXT: %[[S6:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
236
|
34 // CHECK-NEXT: %[[A3:.*]] = arith.addf %[[S5]], %[[S6]] : vector<2x2xf32>
|
223
|
35
|
|
36 // CHECK-NEXT: %[[S7:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
37 // CHECK-NEXT: %[[S8:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
236
|
38 // CHECK-NEXT: %[[A4:.*]] = arith.addf %[[S7]], %[[S8]] : vector<2x2xf32>
|
173
|
39
|
223
|
40 // CHECK-NEXT: %[[S9:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
236
|
41 // CHECK-NEXT: %[[A5:.*]] = arith.addf %[[S9]], %[[A1]] : vector<2x2xf32>
|
223
|
42 // CHECK-NEXT: %[[R1:.*]] = vector.insert_strided_slice %[[A5]], %{{.*}} {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
|
173
|
43
|
|
44
|
223
|
45 // CHECK-NEXT: %[[S11:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
236
|
46 // CHECK-NEXT: %[[A6:.*]] = arith.addf %[[S11]], %[[A2]] : vector<2x2xf32>
|
223
|
47 // CHECK-NEXT: %[[R2:.*]] = vector.insert_strided_slice %[[A6]], %[[R1]] {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
|
173
|
48
|
223
|
49 // CHECK-NEXT: %[[S13:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
236
|
50 // CHECK-NEXT: %[[A7:.*]] = arith.addf %[[S13]], %[[A3]] : vector<2x2xf32>
|
223
|
51 // CHECK-NEXT: %[[R3:.*]] = vector.insert_strided_slice %[[A7]], %[[R2]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
|
173
|
52
|
223
|
53 // CHECK-NEXT: %[[S15:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
236
|
54 // CHECK-NEXT: %[[A8:.*]] = arith.addf %[[S15]], %[[A4]] : vector<2x2xf32>
|
223
|
55 // CHECK-NEXT: %[[R4:.*]] = vector.insert_strided_slice %[[A8]], %[[R3]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
|
173
|
56
|
|
57 // CHECK-NEXT: return %[[R4]] : vector<4x4xf32>
|
|
58
|
236
|
59 func.func @add4x4(%0: vector<4x4xf32>, %1: vector<4x4xf32>) -> vector<4x4xf32> {
|
|
60 %2 = arith.addf %0, %1: vector<4x4xf32>
|
|
61 %3 = arith.addf %1, %2: vector<4x4xf32>
|
173
|
62 return %3: vector<4x4xf32>
|
|
63 }
|
|
64
|
|
65 // CHECK-LABEL: func @contraction4x4_ikj_xfer_read
|
|
66
|
236
|
67 // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
|
|
68 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
|
173
|
69
|
|
70 // Check LHS vector.transfer read is split for each user.
|
|
71
|
|
72 // CHECK: %[[VTR0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<4x2xf32>, vector<2x2xf32>
|
|
73 // CHECK-NEXT: %[[VTR1:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C0]]], %{{.*}} : memref<4x2xf32>, vector<2x2xf32>
|
|
74
|
|
75 // CHECK-NEXT: %[[VTR2:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<2x4xf32>, vector<2x2xf32>
|
|
76 // CHECK-NEXT: %[[VTR3:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C2]]], %{{.*}} : memref<2x4xf32>, vector<2x2xf32>
|
|
77
|
|
78 // CHECK-NEXT: %[[VTR4:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
79 // CHECK-NEXT: %[[VTR5:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C2]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
80 // CHECK-NEXT: %[[VTR6:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C0]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
81 // CHECK-NEXT: %[[VTR7:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C2]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
82
|
252
|
83 // CHECK-NEXT: %[[R0:.*]] = vector.contract {indexing_maps = [#map{{.*}}, #map{{.*}}, #map{{.*}}], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind<add>} %[[VTR0]], %[[VTR2]], %[[VTR4]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
|
84 // CHECK-NEXT: %[[R1:.*]] = vector.contract {indexing_maps = [#map{{.*}}, #map{{.*}}, #map{{.*}}], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind<add>} %[[VTR0]], %[[VTR3]], %[[VTR5]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
|
85 // CHECK-NEXT: %[[R2:.*]] = vector.contract {indexing_maps = [#map{{.*}}, #map{{.*}}, #map{{.*}}], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind<add>} %[[VTR1]], %[[VTR2]], %[[VTR6]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
|
86 // CHECK-NEXT: %[[R3:.*]] = vector.contract {indexing_maps = [#map{{.*}}, #map{{.*}}, #map{{.*}}], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind<add>} %[[VTR1]], %[[VTR3]], %[[VTR7]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
173
|
87
|
221
|
88 // CHECK-NEXT: vector.transfer_write %[[R0]], %{{.*}}[%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<2x2xf32>, memref<4x4xf32>
|
|
89 // CHECK-NEXT: vector.transfer_write %[[R1]], %{{.*}}[%[[C0]], %[[C2]]] {in_bounds = [true, true]} : vector<2x2xf32>, memref<4x4xf32>
|
|
90 // CHECK-NEXT: vector.transfer_write %[[R2]], %{{.*}}[%[[C2]], %[[C0]]] {in_bounds = [true, true]} : vector<2x2xf32>, memref<4x4xf32>
|
|
91 // CHECK-NEXT: vector.transfer_write %[[R3]], %{{.*}}[%[[C2]], %[[C2]]] {in_bounds = [true, true]} : vector<2x2xf32>, memref<4x4xf32>
|
173
|
92 // CHECK-NEXT: return
|
|
93
|
252
|
94 #contraction_accesses1 = [
|
|
95 affine_map<(i, k, j) -> (i, k)>,
|
|
96 affine_map<(i, k, j) -> (k, j)>,
|
|
97 affine_map<(i, k, j) -> (i, j)>
|
|
98 ]
|
|
99 #contraction_trait1 = {
|
|
100 indexing_maps = #contraction_accesses1,
|
|
101 iterator_types = ["parallel", "reduction", "parallel"]
|
|
102 }
|
|
103
|
236
|
104 func.func @contraction4x4_ikj_xfer_read(%arg0 : memref<4x2xf32>,
|
173
|
105 %arg1 : memref<2x4xf32>,
|
|
106 %arg2 : memref<4x4xf32>) {
|
236
|
107 %c0 = arith.constant 0 : index
|
|
108 %cf0 = arith.constant 0.0 : f32
|
173
|
109
|
|
110 %0 = vector.transfer_read %arg0[%c0, %c0], %cf0
|
|
111 { permutation_map = affine_map<(d0, d1) -> (d0, d1)> }
|
|
112 : memref<4x2xf32>, vector<4x2xf32>
|
|
113
|
|
114 %1 = vector.transfer_read %arg1[%c0, %c0], %cf0
|
|
115 { permutation_map = affine_map<(d0, d1) -> (d0, d1)> }
|
|
116 : memref<2x4xf32>, vector<2x4xf32>
|
|
117
|
|
118 %2 = vector.transfer_read %arg2[%c0, %c0], %cf0
|
|
119 { permutation_map = affine_map<(d0, d1) -> (d0, d1)> }
|
|
120 : memref<4x4xf32>, vector<4x4xf32>
|
|
121
|
|
122 %3 = vector.contract #contraction_trait1 %0, %1, %2
|
|
123 : vector<4x2xf32>, vector<2x4xf32> into vector<4x4xf32>
|
|
124
|
|
125 vector.transfer_write %3, %arg2[%c0, %c0]
|
|
126 {permutation_map = affine_map<(d0, d1) -> (d0, d1)>}
|
|
127 : vector<4x4xf32>, memref<4x4xf32>
|
|
128 return
|
|
129 }
|
|
130
|
221
|
131 // TODO: Update test with VTR split transform.
|
173
|
132 // CHECK-LABEL: func @vector_transfers
|
|
133 // CHECK-COUNT-8: vector.transfer_read
|
236
|
134 // CHECK-COUNT-4: arith.addf
|
173
|
135 // CHECK-COUNT-4: vector.transfer_write
|
|
136
|
236
|
137 func.func @vector_transfers(%arg0: index, %arg1: index) {
|
|
138 %cst = arith.constant 0.000000e+00 : f32
|
221
|
139 %0 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
|
|
140 %1 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
|
|
141 %2 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
|
236
|
142 %cst_0 = arith.constant 1.000000e+00 : f32
|
|
143 %cst_1 = arith.constant 2.000000e+00 : f32
|
173
|
144 affine.for %arg2 = 0 to %arg0 step 4 {
|
|
145 affine.for %arg3 = 0 to %arg1 step 4 {
|
221
|
146 %4 = vector.transfer_read %0[%arg2, %arg3], %cst {permutation_map = affine_map<(d0, d1) -> (d0, d1)>} : memref<?x?xf32>, vector<4x4xf32>
|
|
147 %5 = vector.transfer_read %1[%arg2, %arg3], %cst {permutation_map = affine_map<(d0, d1) -> (d0, d1)>} : memref<?x?xf32>, vector<4x4xf32>
|
236
|
148 %6 = arith.addf %4, %5 : vector<4x4xf32>
|
173
|
149 vector.transfer_write %6, %2[%arg2, %arg3] {permutation_map = affine_map<(d0, d1) -> (d0, d1)>} : vector<4x4xf32>, memref<?x?xf32>
|
|
150 }
|
|
151 }
|
|
152 return
|
|
153 }
|
|
154
|
|
155 // CHECK-LABEL: func @cancelling_shape_cast_ops
|
|
156 // CHECK-SAME: %[[A0:.*0]]: vector<2x4xf32>
|
|
157 // CHECK: return %[[A0]] : vector<2x4xf32>
|
236
|
158 func.func @cancelling_shape_cast_ops(%arg0 : vector<2x4xf32>) -> vector<2x4xf32> {
|
173
|
159 %0 = vector.shape_cast %arg0 : vector<2x4xf32> to vector<8xf32>
|
|
160 %1 = vector.shape_cast %0 : vector<8xf32> to vector<2x4xf32>
|
|
161 return %1 : vector<2x4xf32>
|
|
162 }
|
|
163
|
221
|
164 // CHECK-LABEL: func @elementwise_unroll
|
|
165 // CHECK-SAME: (%[[ARG0:.*]]: memref<4x4xf32>, %[[ARG1:.*]]: memref<4x4xf32>)
|
236
|
166 // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
|
|
167 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
|
221
|
168 // CHECK: %[[VT0:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
169 // CHECK: %[[VT1:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
170 // CHECK: %[[VT2:.*]] = vector.transfer_read %[[ARG0]][%[[C2]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
171 // CHECK: %[[VT3:.*]] = vector.transfer_read %[[ARG0]][%[[C2]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
172 // CHECK: %[[VT4:.*]] = vector.transfer_read %[[ARG1]][%[[C0]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
173 // CHECK: %[[VT5:.*]] = vector.transfer_read %[[ARG1]][%[[C0]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
174 // CHECK: %[[VT6:.*]] = vector.transfer_read %[[ARG1]][%[[C2]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
175 // CHECK: %[[VT7:.*]] = vector.transfer_read %[[ARG1]][%[[C2]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
236
|
176 // CHECK: %[[CMP0:.*]] = arith.cmpf ult, %[[VT0]], %[[VT4]] : vector<2x2xf32>
|
|
177 // CHECK: %[[CMP1:.*]] = arith.cmpf ult, %[[VT1]], %[[VT5]] : vector<2x2xf32>
|
|
178 // CHECK: %[[CMP2:.*]] = arith.cmpf ult, %[[VT2]], %[[VT6]] : vector<2x2xf32>
|
|
179 // CHECK: %[[CMP3:.*]] = arith.cmpf ult, %[[VT3]], %[[VT7]] : vector<2x2xf32>
|
221
|
180 // CHECK: %[[VT0:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
181 // CHECK: %[[VT1:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
182 // CHECK: %[[VT2:.*]] = vector.transfer_read %[[ARG0]][%[[C2]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
183 // CHECK: %[[VT3:.*]] = vector.transfer_read %[[ARG0]][%[[C2]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
184 // CHECK: %[[VT4:.*]] = vector.transfer_read %[[ARG1]][%[[C0]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
185 // CHECK: %[[VT5:.*]] = vector.transfer_read %[[ARG1]][%[[C0]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
186 // CHECK: %[[VT6:.*]] = vector.transfer_read %[[ARG1]][%[[C2]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
187 // CHECK: %[[VT7:.*]] = vector.transfer_read %[[ARG1]][%[[C2]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
236
|
188 // CHECK: %[[SEL0:.*]] = arith.select %[[CMP0]], %[[VT0]], %[[VT4]] : vector<2x2xi1>, vector<2x2xf32>
|
|
189 // CHECK: %[[SEL1:.*]] = arith.select %[[CMP1]], %[[VT1]], %[[VT5]] : vector<2x2xi1>, vector<2x2xf32>
|
|
190 // CHECK: %[[SEL2:.*]] = arith.select %[[CMP2]], %[[VT2]], %[[VT6]] : vector<2x2xi1>, vector<2x2xf32>
|
|
191 // CHECK: %[[SEL3:.*]] = arith.select %[[CMP3]], %[[VT3]], %[[VT7]] : vector<2x2xi1>, vector<2x2xf32>
|
221
|
192 // CHECK: vector.transfer_write %[[SEL0]], %[[ARG0]][%[[C0]], %[[C0]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
|
|
193 // CHECK: vector.transfer_write %[[SEL1]], %[[ARG0]][%[[C0]], %[[C2]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
|
|
194 // CHECK: vector.transfer_write %[[SEL2]], %[[ARG0]][%[[C2]], %[[C0]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
|
|
195 // CHECK: vector.transfer_write %[[SEL3]], %[[ARG0]][%[[C2]], %[[C2]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
|
236
|
196 func.func @elementwise_unroll(%arg0 : memref<4x4xf32>, %arg1 : memref<4x4xf32>) {
|
|
197 %c0 = arith.constant 0 : index
|
|
198 %cf0 = arith.constant 0.0 : f32
|
221
|
199 %0 = vector.transfer_read %arg0[%c0, %c0], %cf0 : memref<4x4xf32>, vector<4x4xf32>
|
|
200 %1 = vector.transfer_read %arg1[%c0, %c0], %cf0 : memref<4x4xf32>, vector<4x4xf32>
|
236
|
201 %cond = arith.cmpf ult, %0, %1 : vector<4x4xf32>
|
221
|
202 // Vector transfer split pattern only support single user right now.
|
|
203 %2 = vector.transfer_read %arg0[%c0, %c0], %cf0 : memref<4x4xf32>, vector<4x4xf32>
|
|
204 %3 = vector.transfer_read %arg1[%c0, %c0], %cf0 : memref<4x4xf32>, vector<4x4xf32>
|
236
|
205 %4 = arith.select %cond, %2, %3 : vector<4x4xi1>, vector<4x4xf32>
|
221
|
206 vector.transfer_write %4, %arg0[%c0, %c0] : vector<4x4xf32>, memref<4x4xf32>
|
|
207 return
|
|
208 }
|
|
209
|
|
210 // Check that vector.transfer read/write are split based on contract unrolling.
|
|
211 // CHECK: %[[VTR0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} : tensor<4x2xf32>, vector<2x2xf32>
|
|
212 // CHECK-NEXT: %[[VTR1:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C0]]], %{{.*}} : tensor<4x2xf32>, vector<2x2xf32>
|
|
213
|
|
214 // CHECK-NEXT: %[[VTR2:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} : tensor<2x4xf32>, vector<2x2xf32>
|
|
215 // CHECK-NEXT: %[[VTR3:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C2]]], %{{.*}} : tensor<2x4xf32>, vector<2x2xf32>
|
|
216
|
|
217 // CHECK-NEXT: %[[VTR4:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
|
|
218 // CHECK-NEXT: %[[VTR5:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C2]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
|
|
219 // CHECK-NEXT: %[[VTR6:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C0]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
|
|
220 // CHECK-NEXT: %[[VTR7:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C2]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
|
|
221
|
252
|
222 // CHECK-NEXT: %[[R0:.*]] = vector.contract {indexing_maps = [#map{{.*}}, #map{{.*}}, #map{{.*}}], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind<add>} %[[VTR0]], %[[VTR2]], %[[VTR4]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
|
223 // CHECK-NEXT: %[[R1:.*]] = vector.contract {indexing_maps = [#map{{.*}}, #map{{.*}}, #map{{.*}}], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind<add>} %[[VTR0]], %[[VTR3]], %[[VTR5]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
|
224 // CHECK-NEXT: %[[R2:.*]] = vector.contract {indexing_maps = [#map{{.*}}, #map{{.*}}, #map{{.*}}], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind<add>} %[[VTR1]], %[[VTR2]], %[[VTR6]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
|
225 // CHECK-NEXT: %[[R3:.*]] = vector.contract {indexing_maps = [#map{{.*}}, #map{{.*}}, #map{{.*}}], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind<add>} %[[VTR1]], %[[VTR3]], %[[VTR7]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
221
|
226
|
|
227 // CHECK-NEXT: %[[VTW0:.*]] = vector.transfer_write %[[R0]], %{{.*}}[%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<2x2xf32>, tensor<4x4xf32>
|
|
228 // CHECK-NEXT: %[[VTW1:.*]] = vector.transfer_write %[[R1]], %[[VTW0]][%[[C0]], %[[C2]]] {in_bounds = [true, true]} : vector<2x2xf32>, tensor<4x4xf32>
|
|
229 // CHECK-NEXT: %[[VTW2:.*]] = vector.transfer_write %[[R2]], %[[VTW1]][%[[C2]], %[[C0]]] {in_bounds = [true, true]} : vector<2x2xf32>, tensor<4x4xf32>
|
|
230 // CHECK-NEXT: %[[VTW3:.*]] = vector.transfer_write %[[R3]], %[[VTW2]][%[[C2]], %[[C2]]] {in_bounds = [true, true]} : vector<2x2xf32>, tensor<4x4xf32>
|
|
231 // CHECK-NEXT: return %[[VTW3]] : tensor<4x4xf32>
|
|
232
|
236
|
233 func.func @contraction4x4_ikj_xfer_read_tensor(%arg0 : tensor<4x2xf32>,
|
221
|
234 %arg1 : tensor<2x4xf32>,
|
|
235 %arg2 : tensor<4x4xf32>) ->
|
|
236 tensor<4x4xf32> {
|
236
|
237 %c0 = arith.constant 0 : index
|
|
238 %cf0 = arith.constant 0.0 : f32
|
221
|
239 %0 = vector.transfer_read %arg0[%c0, %c0], %cf0 :
|
|
240 tensor<4x2xf32>, vector<4x2xf32>
|
|
241 %1 = vector.transfer_read %arg1[%c0, %c0], %cf0 :
|
|
242 tensor<2x4xf32>, vector<2x4xf32>
|
|
243 %2 = vector.transfer_read %arg2[%c0, %c0], %cf0 :
|
|
244 tensor<4x4xf32>, vector<4x4xf32>
|
|
245 %3 = vector.contract #contraction_trait1 %0, %1, %2
|
|
246 : vector<4x2xf32>, vector<2x4xf32> into vector<4x4xf32>
|
|
247 %r = vector.transfer_write %3, %arg2[%c0, %c0]
|
|
248 : vector<4x4xf32>, tensor<4x4xf32>
|
|
249 return %r : tensor<4x4xf32>
|
|
250 }
|
|
251
|
|
252 // CHECK-LABEL: func @bubble_down_bitcast_in_extract
|
|
253 // CHECK-SAME: %[[SRC:.+]]: vector<4xf32>
|
236
|
254 func.func @bubble_down_bitcast_in_extract(%src: vector<4xf32>) -> (f16, f16) {
|
221
|
255 %0 = vector.bitcast %src : vector<4xf32> to vector<8xf16>
|
|
256 // CHECK: %[[EXTRACT1:.+]] = vector.extract %[[SRC]][1] : vector<4xf32>
|
|
257 // CHECK: %[[CAST1:.+]] = vector.bitcast %[[EXTRACT1]] : vector<1xf32> to vector<2xf16>
|
|
258 // CHECK: %[[EXTRACT2:.+]] = vector.extract %[[CAST1]][1] : vector<2xf16>
|
|
259 %1 = vector.extract %0[3] : vector<8xf16>
|
|
260 // CHECK: %[[EXTRACT3:.+]] = vector.extract %[[SRC]][2] : vector<4xf32>
|
|
261 // CHECK: %[[CAST2:.+]] = vector.bitcast %[[EXTRACT3]] : vector<1xf32> to vector<2xf16>
|
|
262 // CHECK: %[[EXTRACT4:.+]] = vector.extract %[[CAST2]][0] : vector<2xf16>
|
|
263 %2 = vector.extract %0[4] : vector<8xf16>
|
|
264 // CHECK: return %[[EXTRACT2]], %[[EXTRACT4]]
|
|
265 return %1, %2: f16, f16
|
|
266 }
|
|
267
|
|
268 // CHECK-LABEL: func @bubble_down_bitcast_in_strided_slice_extract
|
|
269 // CHECK-SAME: %[[SRC:.+]]: vector<4xf32>
|
236
|
270 func.func @bubble_down_bitcast_in_strided_slice_extract(%arg0: vector<4xf32>) -> vector<4xf16> {
|
221
|
271 // CHECK: %[[EXTRACT:.+]] = vector.extract_strided_slice %[[SRC]] {offsets = [2], sizes = [2], strides = [1]} : vector<4xf32> to vector<2xf32>
|
|
272 // CHECK: %[[CAST:.+]] = vector.bitcast %[[EXTRACT]] : vector<2xf32> to vector<4xf16>
|
|
273 %cast = vector.bitcast %arg0: vector<4xf32> to vector<8xf16>
|
|
274 %0 = vector.extract_strided_slice %cast {offsets = [4], sizes = [4], strides = [1]} : vector<8xf16> to vector<4xf16>
|
|
275 // CHECK: return %[[CAST]]
|
|
276 return %0: vector<4xf16>
|
|
277 }
|
|
278
|
|
279 // CHECK-LABEL: func @bubble_down_bitcast_in_strided_slice_extract_full_last_dim
|
|
280 // CHECK-SAME: %[[SRC:.+]]: vector<4x2xf32>
|
236
|
281 func.func @bubble_down_bitcast_in_strided_slice_extract_full_last_dim(%arg0: vector<4x2xf32>) -> vector<2x4xf16> {
|
221
|
282 // CHECK: %[[EXTRACT:.+]] = vector.extract_strided_slice %[[SRC]] {offsets = [1], sizes = [2], strides = [1]} : vector<4x2xf32> to vector<2x2xf32>
|
|
283 // CHECK: %[[CAST:.+]] = vector.bitcast %[[EXTRACT]] : vector<2x2xf32> to vector<2x4xf16>
|
|
284 %cast = vector.bitcast %arg0: vector<4x2xf32> to vector<4x4xf16>
|
|
285 %0 = vector.extract_strided_slice %cast {offsets = [1], sizes = [2], strides = [1]} : vector<4x4xf16> to vector<2x4xf16>
|
|
286 // CHECK: return %[[CAST]]
|
|
287 return %0: vector<2x4xf16>
|
|
288 }
|
|
289
|
|
290 // CHECK-LABEL: func @bubble_down_bitcast_in_strided_slice_extract_odd_offset
|
236
|
291 func.func @bubble_down_bitcast_in_strided_slice_extract_odd_offset(%arg0: vector<4xf32>) -> vector<4xf16> {
|
221
|
292 // CHECK: vector.bitcast
|
|
293 // CHECK-NEXT: vector.extract_strided_slice
|
|
294 %cast = vector.bitcast %arg0: vector<4xf32> to vector<8xf16>
|
|
295 %0 = vector.extract_strided_slice %cast {offsets = [3], sizes = [4], strides = [1]} : vector<8xf16> to vector<4xf16>
|
|
296 return %0: vector<4xf16>
|
|
297 }
|
|
298
|
|
299 // CHECK-LABEL: func @bubble_down_bitcast_in_strided_slice_extract_odd_size
|
236
|
300 func.func @bubble_down_bitcast_in_strided_slice_extract_odd_size(%arg0: vector<4xf32>) -> vector<3xf16> {
|
221
|
301 // CHECK: vector.bitcast
|
|
302 // CHECK-NEXT: vector.extract_strided_slice
|
|
303 %cast = vector.bitcast %arg0: vector<4xf32> to vector<8xf16>
|
|
304 %0 = vector.extract_strided_slice %cast {offsets = [0], sizes = [3], strides = [1]} : vector<8xf16> to vector<3xf16>
|
|
305 return %0: vector<3xf16>
|
|
306 }
|
|
307
|
|
308 // CHECK-LABEL: func @bubble_up_bitcast_in_strided_slice_insert
|
|
309 // CHECK-SAME: (%[[DST:.+]]: vector<8xf16>, %[[SRC1:.+]]: vector<4xf16>, %[[SRC2:.+]]: vector<4xf16>)
|
236
|
310 func.func @bubble_up_bitcast_in_strided_slice_insert(%dst: vector<8xf16>, %src1: vector<4xf16>, %src2: vector<4xf16>) -> vector<4xf32> {
|
221
|
311 // CHECK-DAG: %[[CAST_SRC1:.+]] = vector.bitcast %[[SRC1]] : vector<4xf16> to vector<2xf32>
|
|
312 // CHECK-DAG: %[[CAST_SRC2:.+]] = vector.bitcast %[[SRC2]] : vector<4xf16> to vector<2xf32>
|
|
313 // CHECK-DAG: %[[CAST_DST:.+]] = vector.bitcast %[[DST]] : vector<8xf16> to vector<4xf32>
|
|
314 // CHECK: %[[INSERT1:.+]] = vector.insert_strided_slice %[[CAST_SRC1]], %[[CAST_DST]] {offsets = [0], strides = [1]} : vector<2xf32> into vector<4xf32>
|
|
315 // CHECK: %[[INSERT2:.+]] = vector.insert_strided_slice %[[CAST_SRC2]], %[[INSERT1]] {offsets = [2], strides = [1]} : vector<2xf32> into vector<4xf32>
|
|
316 %0 = vector.insert_strided_slice %src1, %dst {offsets = [0], strides = [1]} : vector<4xf16> into vector<8xf16>
|
|
317 %1 = vector.insert_strided_slice %src2, %0 {offsets = [4], strides = [1]} : vector<4xf16> into vector<8xf16>
|
|
318 %cast = vector.bitcast %1: vector<8xf16> to vector<4xf32>
|
|
319 // CHECK: return %[[INSERT2]]
|
|
320 return %cast: vector<4xf32>
|
|
321 }
|
|
322
|
|
323 // CHECK-LABEL: func @bubble_up_bitcast_in_strided_slice_insert_odd_offset
|
236
|
324 func.func @bubble_up_bitcast_in_strided_slice_insert_odd_offset(%dst: vector<8xf16>, %src: vector<4xf16>) -> vector<4xf32> {
|
221
|
325 // CHECK: vector.insert_strided_slice
|
|
326 // CHECK-NEXT: vector.bitcast
|
|
327 %0 = vector.insert_strided_slice %src, %dst {offsets = [3], strides = [1]} : vector<4xf16> into vector<8xf16>
|
|
328 %cast = vector.bitcast %0: vector<8xf16> to vector<4xf32>
|
|
329 return %cast: vector<4xf32>
|
|
330 }
|
|
331
|
|
332 // CHECK-LABEL: func @bubble_up_bitcast_in_strided_slice_insert_different_rank
|
236
|
333 func.func @bubble_up_bitcast_in_strided_slice_insert_different_rank(%dst: vector<16x4x8xf16>, %src: vector<2x4xf16>) -> vector<16x4x4xf32> {
|
221
|
334 // CHECK: vector.insert_strided_slice
|
|
335 // CHECK-NEXT: vector.bitcast
|
|
336 %0 = vector.insert_strided_slice %src, %dst {offsets = [0, 0, 2], strides = [1, 1]} : vector<2x4xf16> into vector<16x4x8xf16>
|
|
337 %cast = vector.bitcast %0: vector<16x4x8xf16> to vector<16x4x4xf32>
|
|
338 return %cast: vector<16x4x4xf32>
|
|
339 }
|
252
|
340
|
|
341 // CHECK-LABEL: func @bubble_up_bitcast_in_strided_slice_insert_odd_shape
|
|
342 func.func @bubble_up_bitcast_in_strided_slice_insert_odd_shape(%dst: vector<2xf16>, %src: vector<1xf16>) -> vector<1xf32> {
|
|
343 // CHECK: vector.insert_strided_slice
|
|
344 // CHECK-NEXT: vector.bitcast
|
|
345 %0 = vector.insert_strided_slice %src, %dst {offsets = [0], strides = [1]} : vector<1xf16> into vector<2xf16>
|
|
346 %cast = vector.bitcast %0: vector<2xf16> to vector<1xf32>
|
|
347 return %cast: vector<1xf32>
|
|
348 }
|
|
349
|
|
350 // CHECK-LABEL: func @bubble_up_bitcast_in_strided_slice_insert_larger_odd_shape
|
|
351 func.func @bubble_up_bitcast_in_strided_slice_insert_larger_odd_shape(%dst: vector<8xf16>, %src: vector<3xf16>) -> vector<4xf32> {
|
|
352 // CHECK: vector.insert_strided_slice
|
|
353 // CHECK-NEXT: vector.bitcast
|
|
354 %0 = vector.insert_strided_slice %src, %dst {offsets = [0], strides = [1]} : vector<3xf16> into vector<8xf16>
|
|
355 %cast = vector.bitcast %0: vector<8xf16> to vector<4xf32>
|
|
356 return %cast: vector<4xf32>
|
|
357 }
|
|
358
|
|
359 // Make sure not crash on 0-D vector.
|
|
360 // CHECK-LABEL:func.func @vec_0D
|
|
361 // CHECK-NEXT:vector.bitcast
|
|
362 func.func @vec_0D(%arg0: vector<f32>) -> vector<i32> {
|
|
363 %0 = vector.bitcast %arg0 : vector<f32> to vector<i32>
|
|
364 return %0 : vector<i32>
|
|
365 }
|