252
|
1 // RUN: mlir-opt %s --test-transform-dialect-interpreter --split-input-file | FileCheck %s
|
|
2
|
221
|
3
|
|
4 // CHECK-DAG: #[[$map_p4:.*]] = affine_map<()[s0] -> (s0 + 4)>
|
|
5 // CHECK-DAG: #[[$map_p8:.*]] = affine_map<()[s0] -> (s0 + 8)>
|
|
6
|
|
7 // CHECK-LABEL: split_vector_transfer_read_2d(
|
236
|
8 // CHECK-SAME: %[[A:[a-zA-Z0-9_]*]]: memref
|
|
9 // CHECK-SAME: %[[i:[a-zA-Z0-9_]*]]: index
|
|
10 // CHECK-SAME: %[[j:[a-zA-Z0-9_]*]]: index
|
221
|
11
|
236
|
12 func.func @split_vector_transfer_read_2d(%A: memref<?x8xf32>, %i: index, %j: index) -> vector<4x8xf32> {
|
|
13 %c0 = arith.constant 0 : index
|
|
14 %f0 = arith.constant 0.0 : f32
|
221
|
15
|
236
|
16 // CHECK-DAG: %[[c8:.*]] = arith.constant 8 : index
|
|
17 // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
|
221
|
18 // alloca for boundary full tile
|
|
19 // CHECK: %[[alloc:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32>
|
|
20 // %i + 4 <= dim(%A, 0)
|
|
21 // CHECK: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]]
|
|
22 // CHECK: %[[d0:.*]] = memref.dim %[[A]], %[[c0]] : memref<?x8xf32>
|
236
|
23 // CHECK: %[[cmp0:.*]] = arith.cmpi sle, %[[idx0]], %[[d0]] : index
|
221
|
24 // %j + 8 <= dim(%A, 1)
|
|
25 // CHECK: %[[idx1:.*]] = affine.apply #[[$map_p8]]()[%[[j]]]
|
236
|
26 // CHECK: %[[cmp1:.*]] = arith.cmpi sle, %[[idx1]], %[[c8]] : index
|
221
|
27 // are both conds true
|
236
|
28 // CHECK: %[[cond:.*]] = arith.andi %[[cmp0]], %[[cmp1]] : i1
|
221
|
29 // CHECK: %[[ifres:.*]]:3 = scf.if %[[cond]] -> (memref<?x8xf32>, index, index) {
|
|
30 // inBounds, just yield %A
|
|
31 // CHECK: scf.yield %[[A]], %[[i]], %[[j]] : memref<?x8xf32>, index, index
|
|
32 // CHECK: } else {
|
|
33 // slow path, fill tmp alloc and yield a memref_casted version of it
|
|
34 // CHECK: %[[slow:.*]] = vector.transfer_read %[[A]][%[[i]], %[[j]]], %cst :
|
|
35 // CHECK-SAME: memref<?x8xf32>, vector<4x8xf32>
|
|
36 // CHECK: %[[cast_alloc:.*]] = vector.type_cast %[[alloc]] :
|
|
37 // CHECK-SAME: memref<4x8xf32> to memref<vector<4x8xf32>>
|
|
38 // CHECK: store %[[slow]], %[[cast_alloc]][] : memref<vector<4x8xf32>>
|
|
39 // CHECK: %[[yielded:.*]] = memref.cast %[[alloc]] :
|
|
40 // CHECK-SAME: memref<4x8xf32> to memref<?x8xf32>
|
|
41 // CHECK: scf.yield %[[yielded]], %[[c0]], %[[c0]] :
|
|
42 // CHECK-SAME: memref<?x8xf32>, index, index
|
|
43 // CHECK: }
|
|
44 // CHECK: %[[res:.*]] = vector.transfer_read %[[ifres]]#0[%[[ifres]]#1, %[[ifres]]#2], %cst
|
236
|
45 // CHECK-SAME: {in_bounds = [true, true]} : memref<?x8xf32>, vector<4x8xf32>
|
221
|
46
|
|
47 %1 = vector.transfer_read %A[%i, %j], %f0 : memref<?x8xf32>, vector<4x8xf32>
|
|
48
|
|
49 return %1: vector<4x8xf32>
|
|
50 }
|
|
51
|
|
52 // CHECK-LABEL: split_vector_transfer_read_strided_2d(
|
236
|
53 // CHECK-SAME: %[[A:[a-zA-Z0-9_]*]]: memref
|
|
54 // CHECK-SAME: %[[i:[a-zA-Z0-9_]*]]: index
|
|
55 // CHECK-SAME: %[[j:[a-zA-Z0-9_]*]]: index
|
221
|
56
|
236
|
57 func.func @split_vector_transfer_read_strided_2d(
|
|
58 %A: memref<7x8xf32, strided<[?, 1], offset: ?>>,
|
221
|
59 %i: index, %j: index) -> vector<4x8xf32> {
|
236
|
60 %c0 = arith.constant 0 : index
|
|
61 %f0 = arith.constant 0.0 : f32
|
221
|
62
|
236
|
63 // CHECK-DAG: %[[c7:.*]] = arith.constant 7 : index
|
|
64 // CHECK-DAG: %[[c8:.*]] = arith.constant 8 : index
|
|
65 // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
|
221
|
66 // alloca for boundary full tile
|
|
67 // CHECK: %[[alloc:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32>
|
|
68 // %i + 4 <= dim(%A, 0)
|
|
69 // CHECK: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]]
|
236
|
70 // CHECK: %[[cmp0:.*]] = arith.cmpi sle, %[[idx0]], %[[c7]] : index
|
221
|
71 // %j + 8 <= dim(%A, 1)
|
|
72 // CHECK: %[[idx1:.*]] = affine.apply #[[$map_p8]]()[%[[j]]]
|
236
|
73 // CHECK: %[[cmp1:.*]] = arith.cmpi sle, %[[idx1]], %[[c8]] : index
|
221
|
74 // are both conds true
|
236
|
75 // CHECK: %[[cond:.*]] = arith.andi %[[cmp0]], %[[cmp1]] : i1
|
|
76 // CHECK: %[[ifres:.*]]:3 = scf.if %[[cond]] -> (memref<?x8xf32, strided<[?, 1], offset: ?>>, index, index) {
|
221
|
77 // inBounds but not cast-compatible: yield a memref_casted form of %A
|
|
78 // CHECK: %[[casted:.*]] = memref.cast %arg0 :
|
236
|
79 // CHECK-SAME: memref<7x8xf32, strided<[?, 1], offset: ?>> to memref<?x8xf32, strided<[?, 1], offset: ?>>
|
221
|
80 // CHECK: scf.yield %[[casted]], %[[i]], %[[j]] :
|
236
|
81 // CHECK-SAME: memref<?x8xf32, strided<[?, 1], offset: ?>>, index, index
|
221
|
82 // CHECK: } else {
|
|
83 // slow path, fill tmp alloc and yield a memref_casted version of it
|
|
84 // CHECK: %[[slow:.*]] = vector.transfer_read %[[A]][%[[i]], %[[j]]], %cst :
|
236
|
85 // CHECK-SAME: memref<7x8xf32, strided<[?, 1], offset: ?>>, vector<4x8xf32>
|
221
|
86 // CHECK: %[[cast_alloc:.*]] = vector.type_cast %[[alloc]] :
|
|
87 // CHECK-SAME: memref<4x8xf32> to memref<vector<4x8xf32>>
|
|
88 // CHECK: store %[[slow]], %[[cast_alloc]][] :
|
|
89 // CHECK-SAME: memref<vector<4x8xf32>>
|
|
90 // CHECK: %[[yielded:.*]] = memref.cast %[[alloc]] :
|
236
|
91 // CHECK-SAME: memref<4x8xf32> to memref<?x8xf32, strided<[?, 1], offset: ?>>
|
221
|
92 // CHECK: scf.yield %[[yielded]], %[[c0]], %[[c0]] :
|
236
|
93 // CHECK-SAME: memref<?x8xf32, strided<[?, 1], offset: ?>>, index, index
|
221
|
94 // CHECK: }
|
|
95 // CHECK: %[[res:.*]] = vector.transfer_read {{.*}} {in_bounds = [true, true]} :
|
236
|
96 // CHECK-SAME: memref<?x8xf32, strided<[?, 1], offset: ?>>, vector<4x8xf32>
|
221
|
97 %1 = vector.transfer_read %A[%i, %j], %f0 :
|
236
|
98 memref<7x8xf32, strided<[?, 1], offset: ?>>, vector<4x8xf32>
|
221
|
99
|
|
100 // CHECK: return %[[res]] : vector<4x8xf32>
|
|
101 return %1 : vector<4x8xf32>
|
|
102 }
|
|
103
|
252
|
104 func.func @split_vector_transfer_read_mem_space(%A: memref<?x8xf32, 3>, %i: index, %j: index) -> vector<4x8xf32> {
|
|
105 %c0 = arith.constant 0 : index
|
|
106 %f0 = arith.constant 0.0 : f32
|
|
107
|
|
108 // CHECK: scf.if {{.*}} -> (memref<?x8xf32, strided<[8, 1]>>, index, index) {
|
|
109 // inBounds with a different memory space
|
|
110 // CHECK: %[[space_cast:.*]] = memref.memory_space_cast %{{.*}} :
|
|
111 // CHECK-SAME: memref<?x8xf32, 3> to memref<?x8xf32>
|
|
112 // CHECK: %[[cast:.*]] = memref.cast %[[space_cast]] :
|
|
113 // CHECK-SAME: memref<?x8xf32> to memref<?x8xf32, strided<[8, 1]>>
|
|
114 // CHECK: scf.yield %[[cast]], {{.*}} : memref<?x8xf32, strided<[8, 1]>>, index, index
|
|
115 // CHECK: } else {
|
|
116 // slow path, fill tmp alloc and yield a memref_casted version of it
|
|
117 // CHECK: %[[slow:.*]] = vector.transfer_read %[[A]][%[[i]], %[[j]]], %cst :
|
|
118 // CHECK-SAME: memref<?x8xf32, 3>, vector<4x8xf32>
|
|
119 // CHECK: %[[cast_alloc:.*]] = vector.type_cast %[[alloc]] :
|
|
120 // CHECK-SAME: memref<4x8xf32> to memref<vector<4x8xf32>>
|
|
121 // CHECK: store %[[slow]], %[[cast_alloc]][] : memref<vector<4x8xf32>>
|
|
122 // CHECK: %[[yielded:.*]] = memref.cast %[[alloc]] :
|
|
123 // CHECK-SAME: memref<4x8xf32> to memref<?x8xf32, strided<[8, 1]>>
|
|
124 // CHECK: scf.yield %[[yielded]], %[[c0]], %[[c0]] :
|
|
125 // CHECK-SAME: memref<?x8xf32, strided<[8, 1]>>, index, index
|
|
126 // CHECK: }
|
|
127 // CHECK: %[[res:.*]] = vector.transfer_read %[[ifres]]#0[%[[ifres]]#1, %[[ifres]]#2], %cst
|
|
128 // CHECK-SAME: {in_bounds = [true, true]} : memref<?x8xf32, strided<[8, 1]>>, vector<4x8xf32>
|
|
129
|
|
130 %1 = vector.transfer_read %A[%i, %j], %f0 : memref<?x8xf32, 3>, vector<4x8xf32>
|
|
131
|
|
132 return %1: vector<4x8xf32>
|
|
133 }
|
|
134
|
|
135 transform.sequence failures(propagate) {
|
|
136 ^bb1(%func_op: !transform.op<"func.func">):
|
|
137 transform.apply_patterns to %func_op {
|
|
138 transform.apply_patterns.vector.split_transfer_full_partial split_transfer_strategy = "vector-transfer"
|
|
139 } : !transform.op<"func.func">
|
|
140 }
|
|
141
|
221
|
142 // -----
|
|
143
|
236
|
144 func.func @split_vector_transfer_write_2d(%V: vector<4x8xf32>, %A: memref<?x8xf32>, %i: index, %j: index) {
|
221
|
145 vector.transfer_write %V, %A[%i, %j] :
|
|
146 vector<4x8xf32>, memref<?x8xf32>
|
|
147 return
|
|
148 }
|
|
149
|
|
150 // CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 4)>
|
|
151 // CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 8)>
|
|
152 // CHECK: func @split_vector_transfer_write_2d(
|
|
153 // CHECK-SAME: %[[VEC:.*]]: vector<4x8xf32>,
|
|
154 // CHECK-SAME: %[[DEST:.*]]: memref<?x8xf32>,
|
|
155 // CHECK-SAME: %[[I:.*]]: index,
|
|
156 // CHECK-SAME: %[[J:.*]]: index) {
|
236
|
157 // CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
|
|
158 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
|
|
159 // CHECK-DAG: %[[CT:.*]] = arith.constant true
|
221
|
160 // CHECK: %[[TEMP:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32>
|
|
161 // CHECK: %[[VAL_8:.*]] = affine.apply #[[MAP0]]()[%[[I]]]
|
|
162 // CHECK: %[[DIM0:.*]] = memref.dim %[[DEST]], %[[C0]] : memref<?x8xf32>
|
236
|
163 // CHECK: %[[DIM0_IN:.*]] = arith.cmpi sle, %[[VAL_8]], %[[DIM0]] : index
|
221
|
164 // CHECK: %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[J]]]
|
236
|
165 // CHECK: %[[DIM1_IN:.*]] = arith.cmpi sle, %[[DIM1]], %[[C8]] : index
|
|
166 // CHECK: %[[IN_BOUNDS:.*]] = arith.andi %[[DIM0_IN]], %[[DIM1_IN]] : i1
|
221
|
167 // CHECK: %[[IN_BOUND_DEST:.*]]:3 = scf.if %[[IN_BOUNDS]] ->
|
|
168 // CHECK-SAME: (memref<?x8xf32>, index, index) {
|
|
169 // CHECK: scf.yield %[[DEST]], %[[I]], %[[J]] : memref<?x8xf32>, index, index
|
|
170 // CHECK: } else {
|
|
171 // CHECK: %[[VAL_15:.*]] = memref.cast %[[TEMP]]
|
|
172 // CHECK-SAME: : memref<4x8xf32> to memref<?x8xf32>
|
|
173 // CHECK: scf.yield %[[VAL_15]], %[[C0]], %[[C0]]
|
|
174 // CHECK-SAME: : memref<?x8xf32>, index, index
|
|
175 // CHECK: }
|
|
176 // CHECK: vector.transfer_write %[[VEC]],
|
|
177 // CHECK-SAME: %[[IN_BOUND_DEST:.*]]#0[%[[IN_BOUND_DEST]]#1, %[[IN_BOUND_DEST]]#2]
|
|
178 // CHECK-SAME: {in_bounds = [true, true]} : vector<4x8xf32>, memref<?x8xf32>
|
236
|
179 // CHECK: %[[OUT_BOUNDS:.*]] = arith.xori %[[IN_BOUNDS]], %[[CT]] : i1
|
221
|
180 // CHECK: scf.if %[[OUT_BOUNDS]] {
|
|
181 // CHECK: %[[CASTED:.*]] = vector.type_cast %[[TEMP]]
|
|
182 // CHECK-SAME: : memref<4x8xf32> to memref<vector<4x8xf32>>
|
|
183 // CHECK: %[[RESULT_COPY:.*]] = memref.load %[[CASTED]][]
|
|
184 // CHECK-SAME: : memref<vector<4x8xf32>>
|
|
185 // CHECK: vector.transfer_write %[[RESULT_COPY]],
|
|
186 // CHECK-SAME: %[[DEST]][%[[I]], %[[J]]]
|
|
187 // CHECK-SAME: : vector<4x8xf32>, memref<?x8xf32>
|
|
188 // CHECK: }
|
|
189 // CHECK: return
|
|
190 // CHECK: }
|
|
191
|
252
|
192
|
|
193 transform.sequence failures(propagate) {
|
|
194 ^bb1(%func_op: !transform.op<"func.func">):
|
|
195 transform.apply_patterns to %func_op {
|
|
196 transform.apply_patterns.vector.split_transfer_full_partial split_transfer_strategy = "vector-transfer"
|
|
197 } : !transform.op<"func.func">
|
|
198 }
|
221
|
199
|
|
200 // -----
|
|
201
|
236
|
202 func.func @split_vector_transfer_write_strided_2d(
|
|
203 %V: vector<4x8xf32>, %A: memref<7x8xf32, strided<[?, 1], offset: ?>>,
|
221
|
204 %i: index, %j: index) {
|
|
205 vector.transfer_write %V, %A[%i, %j] :
|
236
|
206 vector<4x8xf32>, memref<7x8xf32, strided<[?, 1], offset: ?>>
|
221
|
207 return
|
|
208 }
|
|
209
|
|
210 // CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 4)>
|
|
211 // CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0] -> (s0 + 8)>
|
|
212 // CHECK: func @split_vector_transfer_write_strided_2d(
|
|
213 // CHECK-SAME: %[[VEC:.*]]: vector<4x8xf32>,
|
236
|
214 // CHECK-SAME: %[[DEST:.*]]: memref<7x8xf32, strided<[?, 1], offset: ?>>,
|
221
|
215 // CHECK-SAME: %[[I:.*]]: index,
|
|
216 // CHECK-SAME: %[[J:.*]]: index) {
|
236
|
217 // CHECK-DAG: %[[C7:.*]] = arith.constant 7 : index
|
|
218 // CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
|
|
219 // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
|
|
220 // CHECK-DAG: %[[CT:.*]] = arith.constant true
|
221
|
221 // CHECK: %[[TEMP:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32>
|
|
222 // CHECK: %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[I]]]
|
236
|
223 // CHECK: %[[DIM0_IN:.*]] = arith.cmpi sle, %[[DIM0]], %[[C7]] : index
|
221
|
224 // CHECK: %[[DIM1:.*]] = affine.apply #[[MAP2]]()[%[[J]]]
|
236
|
225 // CHECK: %[[DIM1_IN:.*]] = arith.cmpi sle, %[[DIM1]], %[[C8]] : index
|
|
226 // CHECK: %[[IN_BOUNDS:.*]] = arith.andi %[[DIM0_IN]], %[[DIM1_IN]] : i1
|
221
|
227 // CHECK: %[[IN_BOUND_DEST:.*]]:3 = scf.if %[[IN_BOUNDS]]
|
236
|
228 // CHECK-SAME: -> (memref<?x8xf32, strided<[?, 1], offset: ?>>, index, index) {
|
221
|
229 // CHECK: %[[VAL_15:.*]] = memref.cast %[[DEST]]
|
236
|
230 // CHECK-SAME: : memref<7x8xf32, strided<[?, 1], offset: ?>> to memref<?x8xf32, strided<[?, 1], offset: ?>>
|
221
|
231 // CHECK: scf.yield %[[VAL_15]], %[[I]], %[[J]]
|
236
|
232 // CHECK-SAME: : memref<?x8xf32, strided<[?, 1], offset: ?>>, index, index
|
221
|
233 // CHECK: } else {
|
|
234 // CHECK: %[[VAL_16:.*]] = memref.cast %[[TEMP]]
|
236
|
235 // CHECK-SAME: : memref<4x8xf32> to memref<?x8xf32, strided<[?, 1], offset: ?>>
|
221
|
236 // CHECK: scf.yield %[[VAL_16]], %[[C0]], %[[C0]]
|
236
|
237 // CHECK-SAME: : memref<?x8xf32, strided<[?, 1], offset: ?>>, index, index
|
221
|
238 // CHECK: }
|
|
239 // CHECK: vector.transfer_write %[[VEC]],
|
|
240 // CHECK-SAME: %[[IN_BOUND_DEST:.*]]#0
|
|
241 // CHECK-SAME: [%[[IN_BOUND_DEST]]#1, %[[IN_BOUND_DEST]]#2]
|
236
|
242 // CHECK-SAME: {in_bounds = [true, true]} : vector<4x8xf32>, memref<?x8xf32, strided<[?, 1], offset: ?>>
|
|
243 // CHECK: %[[OUT_BOUNDS:.*]] = arith.xori %[[IN_BOUNDS]], %[[CT]] : i1
|
221
|
244 // CHECK: scf.if %[[OUT_BOUNDS]] {
|
|
245 // CHECK: %[[VAL_19:.*]] = vector.type_cast %[[TEMP]]
|
|
246 // CHECK-SAME: : memref<4x8xf32> to memref<vector<4x8xf32>>
|
|
247 // CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_19]][]
|
|
248 // CHECK-SAME: : memref<vector<4x8xf32>>
|
|
249 // CHECK: vector.transfer_write %[[VAL_20]], %[[DEST]][%[[I]], %[[J]]]
|
236
|
250 // CHECK-SAME: : vector<4x8xf32>, memref<7x8xf32, strided<[?, 1], offset: ?>>
|
221
|
251 // CHECK: }
|
|
252 // CHECK: return
|
|
253 // CHECK: }
|
|
254
|
252
|
255 transform.sequence failures(propagate) {
|
|
256 ^bb1(%func_op: !transform.op<"func.func">):
|
|
257 transform.apply_patterns to %func_op {
|
|
258 transform.apply_patterns.vector.split_transfer_full_partial split_transfer_strategy = "vector-transfer"
|
|
259 } : !transform.op<"func.func">
|
|
260 }
|
|
261
|
|
262 // -----
|
|
263
|
|
264 func.func @split_vector_transfer_write_mem_space(%V: vector<4x8xf32>, %A: memref<?x8xf32, 3>, %i: index, %j: index) {
|
|
265 vector.transfer_write %V, %A[%i, %j] :
|
|
266 vector<4x8xf32>, memref<?x8xf32, 3>
|
|
267 return
|
|
268 }
|
|
269
|
|
270 // CHECK: func @split_vector_transfer_write_mem_space(
|
|
271 // CHECK: scf.if {{.*}} -> (memref<?x8xf32, strided<[8, 1]>>, index, index) {
|
|
272 // CHECK: %[[space_cast:.*]] = memref.memory_space_cast %{{.*}} :
|
|
273 // CHECK-SAME: memref<?x8xf32, 3> to memref<?x8xf32>
|
|
274 // CHECK: %[[cast:.*]] = memref.cast %[[space_cast]] :
|
|
275 // CHECK-SAME: memref<?x8xf32> to memref<?x8xf32, strided<[8, 1]>>
|
|
276 // CHECK: scf.yield %[[cast]], {{.*}} : memref<?x8xf32, strided<[8, 1]>>, index, index
|
|
277 // CHECK: } else {
|
|
278 // CHECK: %[[VAL_15:.*]] = memref.cast %[[TEMP]]
|
|
279 // CHECK-SAME: : memref<4x8xf32> to memref<?x8xf32, strided<[8, 1]>>
|
|
280 // CHECK: scf.yield %[[VAL_15]], %[[C0]], %[[C0]]
|
|
281 // CHECK-SAME: : memref<?x8xf32, strided<[8, 1]>>, index, index
|
|
282 // CHECK: }
|
|
283 // CHECK: vector.transfer_write %[[VEC]],
|
|
284 // CHECK-SAME: %[[IN_BOUND_DEST:.*]]#0[%[[IN_BOUND_DEST]]#1, %[[IN_BOUND_DEST]]#2]
|
|
285 // CHECK-SAME: {in_bounds = [true, true]} : vector<4x8xf32>, memref<?x8xf32, strided<[8, 1]>>
|
|
286
|
|
287
|
|
288 transform.sequence failures(propagate) {
|
|
289 ^bb1(%func_op: !transform.op<"func.func">):
|
|
290 transform.apply_patterns to %func_op {
|
|
291 transform.apply_patterns.vector.split_transfer_full_partial split_transfer_strategy = "vector-transfer"
|
|
292 } : !transform.op<"func.func">
|
|
293 }
|
|
294
|
236
|
295
|
|
296 // -----
|
|
297
|
|
298 func.func private @fake_side_effecting_fun(%0: vector<2x2xf32>) -> ()
|
|
299
|
|
300 // CHECK-LABEL: transfer_read_within_async_execute
|
|
301 func.func @transfer_read_within_async_execute(%A : memref<?x?xf32>) -> !async.token {
|
|
302 %c0 = arith.constant 0 : index
|
|
303 %f0 = arith.constant 0.0 : f32
|
|
304 // CHECK-NOT: alloca
|
|
305 // CHECK: async.execute
|
|
306 // CHECK: alloca
|
|
307 %token = async.execute {
|
|
308 %0 = vector.transfer_read %A[%c0, %c0], %f0 : memref<?x?xf32>, vector<2x2xf32>
|
|
309 func.call @fake_side_effecting_fun(%0) : (vector<2x2xf32>) -> ()
|
|
310 async.yield
|
|
311 }
|
|
312 return %token : !async.token
|
|
313 }
|
|
314
|
|
315 // Ensure that `alloca`s are inserted outside of loops even though loops are
|
|
316 // consdered allocation scopes.
|
|
317 // CHECK-LABEL: transfer_read_within_scf_for
|
|
318 func.func @transfer_read_within_scf_for(%A : memref<?x?xf32>, %lb : index, %ub : index, %step : index) {
|
|
319 %c0 = arith.constant 0 : index
|
|
320 %f0 = arith.constant 0.0 : f32
|
|
321 // CHECK: memref.alloca
|
|
322 // CHECK: scf.for
|
|
323 // CHECK-NOT: memref.alloca
|
|
324 scf.for %i = %lb to %ub step %step {
|
|
325 %0 = vector.transfer_read %A[%c0, %c0], %f0 : memref<?x?xf32>, vector<2x2xf32>
|
|
326 func.call @fake_side_effecting_fun(%0) : (vector<2x2xf32>) -> ()
|
|
327 }
|
|
328 return
|
|
329 }
|
252
|
330
|
|
331 transform.sequence failures(propagate) {
|
|
332 ^bb1(%func_op: !transform.op<"func.func">):
|
|
333 transform.apply_patterns to %func_op {
|
|
334 transform.apply_patterns.vector.split_transfer_full_partial split_transfer_strategy = "vector-transfer"
|
|
335 } : !transform.op<"func.func">
|
|
336 }
|