221
|
1 // RUN: mlir-opt %s -test-vector-to-vector-conversion="unroll" | FileCheck %s
|
173
|
2
|
|
3 // CHECK-DAG: #[[MAP1:map[0-9]+]] = affine_map<(d0, d1, d2) -> (d1, d2)>
|
|
4
|
|
5 // CHECK-LABEL: func @add4x2
|
223
|
6 // CHECK: %[[S1:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf32> to vector<2x2xf32>
|
|
7 // CHECK-NEXT: %[[S2:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf32> to vector<2x2xf32>
|
|
8 // CHECK-NEXT: %[[A1:.*]] = addf %[[S1]], %[[S2]] : vector<2x2xf32>
|
|
9 // CHECK-NEXT: %[[VEC0:.*]] = vector.insert_strided_slice %[[A1]], %{{.*}} {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x2xf32>
|
|
10 // CHECK-NEXT: %[[S3:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf32> to vector<2x2xf32>
|
|
11 // CHECK-NEXT: %[[S4:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf32> to vector<2x2xf32>
|
|
12 // CHECK-NEXT: %[[A2:.*]] = addf %[[S3]], %[[S4]] : vector<2x2xf32>
|
|
13 // CHECK-NEXT: %[[VEC1:.*]] = vector.insert_strided_slice %[[A2]], %[[VEC0]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x2xf32>
|
|
14 // CHECK-NEXT: return %[[VEC1:.*]] : vector<4x2xf32>
|
173
|
15
|
|
16 func @add4x2(%0: vector<4x2xf32>) -> vector<4x2xf32> {
|
|
17 %1 = addf %0, %0: vector<4x2xf32>
|
|
18 return %1: vector<4x2xf32>
|
|
19 }
|
|
20
|
|
21 // CHECK-LABEL: func @add4x4
|
223
|
22 // CHECK: %[[S1:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
23 // CHECK-NEXT: %[[S2:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
173
|
24
|
223
|
25 // CHECK-NEXT: %[[A1:.*]] = addf %[[S1]], %[[S2]] : vector<2x2xf32>
|
|
26
|
|
27 // CHECK-NEXT: %[[S3:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
28 // CHECK-NEXT: %[[S4:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
173
|
29
|
223
|
30 // CHECK-NEXT: %[[A2:.*]] = addf %[[S3]], %[[S4]] : vector<2x2xf32>
|
173
|
31
|
223
|
32 // CHECK-NEXT: %[[S5:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
33 // CHECK-NEXT: %[[S6:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
34 // CHECK-NEXT: %[[A3:.*]] = addf %[[S5]], %[[S6]] : vector<2x2xf32>
|
|
35
|
|
36 // CHECK-NEXT: %[[S7:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
37 // CHECK-NEXT: %[[S8:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
38 // CHECK-NEXT: %[[A4:.*]] = addf %[[S7]], %[[S8]] : vector<2x2xf32>
|
173
|
39
|
223
|
40 // CHECK-NEXT: %[[S9:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
41 // CHECK-NEXT: %[[A5:.*]] = addf %[[S9]], %[[A1]] : vector<2x2xf32>
|
|
42 // CHECK-NEXT: %[[R1:.*]] = vector.insert_strided_slice %[[A5]], %{{.*}} {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
|
173
|
43
|
|
44
|
223
|
45 // CHECK-NEXT: %[[S11:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
46 // CHECK-NEXT: %[[A6:.*]] = addf %[[S11]], %[[A2]] : vector<2x2xf32>
|
|
47 // CHECK-NEXT: %[[R2:.*]] = vector.insert_strided_slice %[[A6]], %[[R1]] {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
|
173
|
48
|
223
|
49 // CHECK-NEXT: %[[S13:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
50 // CHECK-NEXT: %[[A7:.*]] = addf %[[S13]], %[[A3]] : vector<2x2xf32>
|
|
51 // CHECK-NEXT: %[[R3:.*]] = vector.insert_strided_slice %[[A7]], %[[R2]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
|
173
|
52
|
223
|
53 // CHECK-NEXT: %[[S15:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
54 // CHECK-NEXT: %[[A8:.*]] = addf %[[S15]], %[[A4]] : vector<2x2xf32>
|
|
55 // CHECK-NEXT: %[[R4:.*]] = vector.insert_strided_slice %[[A8]], %[[R3]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
|
173
|
56
|
|
57 // CHECK-NEXT: return %[[R4]] : vector<4x4xf32>
|
|
58
|
|
59 func @add4x4(%0: vector<4x4xf32>, %1: vector<4x4xf32>) -> vector<4x4xf32> {
|
|
60 %2 = addf %0, %1: vector<4x4xf32>
|
|
61 %3 = addf %1, %2: vector<4x4xf32>
|
|
62 return %3: vector<4x4xf32>
|
|
63 }
|
|
64
|
|
65 #contraction_accesses0 = [
|
|
66 affine_map<(i, j, k) -> (i, k)>,
|
|
67 affine_map<(i, j, k) -> (k, j)>,
|
|
68 affine_map<(i, j, k) -> (i, j)>
|
|
69 ]
|
|
70 #contraction_trait0 = {
|
|
71 indexing_maps = #contraction_accesses0,
|
|
72 iterator_types = ["parallel", "parallel", "reduction"]
|
|
73 }
|
|
74
|
|
75 // CHECK-LABEL: func @contraction4x4_ijk
|
|
76
|
|
77 // Reducing output vector [0, 0]
|
|
78
|
223
|
79 // CHECK: %[[S1:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x6xf32> to vector<2x2xf32>
|
|
80 // CHECK-NEXT: %[[S4:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
81 // CHECK-NEXT: %[[S2:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
|
|
82 // CHECK-NEXT: %[[S5:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
83 // CHECK-NEXT: %[[S3:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
84
|
|
85 // CHECK-NEXT: %[[R1S00:.*]] = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>} %[[S1]], %[[S2]], %[[S3]], %[[S4]], %[[S5]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
173
|
86
|
223
|
87 // CHECK-NEXT: %[[S6:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x6xf32> to vector<2x2xf32>
|
|
88 // CHECK-NEXT: %[[S8:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
89 // CHECK-NEXT: %[[S7:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
|
|
90 // CHECK-NEXT: %[[S9:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
173
|
91
|
223
|
92 // CHECK-NEXT: %[[R2S00:.*]] = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>} %[[S6]], %[[S7]], %[[R1S00]], %[[S8]], %[[S9]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
173
|
93
|
223
|
94 // CHECK-NEXT: %[[S10:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 4], sizes = [2, 2], strides = [1, 1]} : vector<4x6xf32> to vector<2x2xf32>
|
|
95 // CHECK-NEXT: %[[S12:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
96 // CHECK-NEXT: %[[S11:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [4, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
|
|
97 // CHECK-NEXT: %[[S13:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
98 // CHECK-NEXT: %[[R3S00:.*]] = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>} %[[S10]], %[[S11]], %[[R2S00]], %[[S12]], %[[S13]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
173
|
99
|
|
100 // Reducing output vector [0, 2]
|
223
|
101 // CHECK-NEXT: %[[S14:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x6xf32> to vector<2x2xf32>
|
|
102 // CHECK-NEXT: %[[S17:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
103 // CHECK-NEXT: %[[S15:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
|
|
104 // CHECK-NEXT: %[[S18:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
105 // CHECK-NEXT: %[[S16:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
106 // CHECK-NEXT: %[[R1S02:.*]] = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>} %[[S14]], %[[S15]], %[[S16]], %[[S17]], %[[S18]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
173
|
107
|
223
|
108 // CHECK-NEXT: %[[S19:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x6xf32> to vector<2x2xf32>
|
|
109 // CHECK-NEXT: %[[S21:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
110 // CHECK-NEXT: %[[S20:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
|
|
111 // CHECK-NEXT: %[[S22:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
112 // CHECK-NEXT: %[[R2S02:.*]] = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>} %[[S19]], %[[S20]], %[[R1S02]], %[[S21]], %[[S22]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
173
|
113
|
223
|
114 // CHECK-NEXT: %[[S23:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 4], sizes = [2, 2], strides = [1, 1]} : vector<4x6xf32> to vector<2x2xf32>
|
|
115 // CHECK-NEXT: %[[S25:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
116 // CHECK-NEXT: %[[S24:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [4, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
|
|
117 // CHECK-NEXT: %[[S26:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
118 // CHECK-NEXT: %[[R3S02:.*]] = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>} %[[S23]], %[[S24]], %[[R2S02]], %[[S25]], %[[S26]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
173
|
119
|
|
120 // Reducing output vector [2, 0]
|
|
121
|
223
|
122 // CHECK-NEXT: %[[S27:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x6xf32> to vector<2x2xf32>
|
|
123 // CHECK-NEXT: %[[S30:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
124 // CHECK-NEXT: %[[S28:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
|
|
125 // CHECK-NEXT: %[[S31:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
126 // CHECK-NEXT: %[[S29:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
127 // CHECK-NEXT: %[[R1S20:.*]] = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>} %[[S27]], %[[S28]], %[[S29]], %[[S30]], %[[S31]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
173
|
128
|
223
|
129 // CHECK-NEXT: %[[S32:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x6xf32> to vector<2x2xf32>
|
|
130 // CHECK-NEXT: %[[S34:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
131 // CHECK-NEXT: %[[S33:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
|
|
132 // CHECK-NEXT: %[[S35:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
133 // CHECK-NEXT: %[[R2S20:.*]] = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>} %[[S32]], %[[S33]], %[[R1S20]], %[[S34]], %[[S35]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
173
|
134
|
223
|
135 // CHECK-NEXT: %[[S36:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 4], sizes = [2, 2], strides = [1, 1]} : vector<4x6xf32> to vector<2x2xf32>
|
|
136 // CHECK-NEXT: %[[S38:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
137 // CHECK-NEXT: %[[S37:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [4, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
|
|
138 // CHECK-NEXT: %[[S39:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
139 // CHECK-NEXT: %[[R3S20:.*]] = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>} %[[S36]], %[[S37]], %[[R2S20]], %[[S38]], %[[S39]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
173
|
140
|
|
141 // Reducing output vector [2, 2]
|
|
142
|
223
|
143 // CHECK-NEXT: %[[S40:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x6xf32> to vector<2x2xf32>
|
|
144 // CHECK-NEXT: %[[S43:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
145 // CHECK-NEXT: %[[S41:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
|
|
146 // CHECK-NEXT: %[[S44:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
147 // CHECK-NEXT: %[[S42:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
148 // CHECK-NEXT: %[[R1S22:.*]] = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>} %[[S40]], %[[S41]], %[[S42]], %[[S43]], %[[S44]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
|
149
|
|
150 // CHECK-NEXT: %[[S45:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x6xf32> to vector<2x2xf32>
|
|
151 // CHECK-NEXT: %[[S47:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
152 // CHECK-NEXT: %[[S46:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
|
|
153 // CHECK-NEXT: %[[S48:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
154 // CHECK-NEXT: %[[R2S22:.*]] = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>} %[[S45]], %[[S46]], %[[R1S22]], %[[S47]], %[[S48]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
173
|
155
|
223
|
156 // CHECK-NEXT: %[[S49:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 4], sizes = [2, 2], strides = [1, 1]} : vector<4x6xf32> to vector<2x2xf32>
|
|
157 // CHECK-NEXT: %[[S51:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
158 // CHECK-NEXT: %[[S50:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [4, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
|
|
159 // CHECK-NEXT: %[[S52:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
160 // CHECK-NEXT: %[[R3S22:.*]] = vector.contract {indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>} %[[S49]], %[[S50]], %[[R2S22]], %[[S51]], %[[S52]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
|
161
|
|
162 // CHECK-NEXT: %[[VEC1:.*]] = vector.insert_strided_slice %[[R3S00]], %{{.*}} {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
|
|
163 // CHECK-NEXT: %[[VEC2:.*]] = vector.insert_strided_slice %[[R3S02]], %[[VEC1]] {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
|
|
164 // CHECK-NEXT: %[[VEC3:.*]] = vector.insert_strided_slice %[[R3S20]], %[[VEC2]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
|
|
165 // CHECK-NEXT: %[[VEC4:.*]] = vector.insert_strided_slice %[[R3S22]], %[[VEC3]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
|
|
166
|
|
167 // CHECK-NEXT: return %[[VEC4]] : vector<4x4xf32>
|
173
|
168
|
|
169 func @contraction4x4_ijk(%arg0 : vector<4x6xf32>, %arg1 : vector<6x4xf32>,
|
|
170 %arg2 : vector<4x4xf32>, %arg3 : index)
|
|
171 -> (vector<4x4xf32>) {
|
|
172 %lhsm = vector.constant_mask [4, 6] : vector<4x6xi1>
|
|
173 %rhsm = vector.constant_mask [6, 4] : vector<6x4xi1>
|
|
174 %0 = vector.contract #contraction_trait0 %arg0, %arg1, %arg2, %lhsm, %rhsm
|
|
175 : vector<4x6xf32>, vector<6x4xf32> into vector<4x4xf32>
|
|
176
|
|
177 return %0 : vector<4x4xf32>
|
|
178 }
|
|
179
|
|
180 #contraction_accesses1 = [
|
|
181 affine_map<(i, k, j) -> (i, k)>,
|
|
182 affine_map<(i, k, j) -> (k, j)>,
|
|
183 affine_map<(i, k, j) -> (i, j)>
|
|
184 ]
|
|
185 #contraction_trait1 = {
|
|
186 indexing_maps = #contraction_accesses1,
|
|
187 iterator_types = ["parallel", "reduction", "parallel"]
|
|
188 }
|
|
189
|
|
190 // CHECK-LABEL: func @contraction4x4_ikj
|
|
191
|
|
192 // Reducing output vector [0, 0]
|
|
193
|
223
|
194 // CHECK: %[[S1:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf32> to vector<2x2xf32>
|
|
195 // CHECK-NEXT: %[[S4:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
196 // CHECK-NEXT: %[[S2:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<2x4xf32> to vector<2x2xf32>
|
|
197 // CHECK-NEXT: %[[S5:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
198 // CHECK-NEXT: %[[S3:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
199 // CHECK-NEXT: %[[R1S00:.*]] = vector.contract {indexing_maps = [#map2, #map3, #map0], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind<add>} %[[S1]], %[[S2]], %[[S3]], %[[S4]], %[[S5]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
173
|
200
|
|
201 // Reducing output vector [0, 2]
|
|
202
|
223
|
203 // CHECK-NEXT: %[[S6:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf32> to vector<2x2xf32>
|
|
204 // CHECK-NEXT: %[[S9:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
205 // CHECK-NEXT: %[[S7:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<2x4xf32> to vector<2x2xf32>
|
|
206 // CHECK-NEXT: %[[S10:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
207 // CHECK-NEXT: %[[S8:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
208 // CHECK-NEXT: %[[R1S02:.*]] = vector.contract {indexing_maps = [#map2, #map3, #map0], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind<add>} %[[S6]], %[[S7]], %[[S8]], %[[S9]], %[[S10]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
173
|
209
|
|
210 // Reducing output vector [2, 0]
|
|
211
|
223
|
212 // CHECK-NEXT: %[[S11:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf32> to vector<2x2xf32>
|
|
213 // CHECK-NEXT: %[[S14:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
214 // CHECK-NEXT: %[[S12:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<2x4xf32> to vector<2x2xf32>
|
|
215 // CHECK-NEXT: %[[S15:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
216 // CHECK-NEXT: %[[S13:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
217 // CHECK-NEXT: %[[R1S20:.*]] = vector.contract {indexing_maps = [#map2, #map3, #map0], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind<add>} %[[S11]], %[[S12]], %[[S13]], %[[S14]], %[[S15]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
173
|
218
|
|
219 // Reducing output vector [2, 2]
|
|
220
|
223
|
221 // CHECK-NEXT: %[[S16:.*]] = vector.extract_strided_slice %arg0 {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x2xf32> to vector<2x2xf32>
|
|
222 // CHECK-NEXT: %[[S19:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
223 // CHECK-NEXT: %[[S17:.*]] = vector.extract_strided_slice %arg1 {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<2x4xf32> to vector<2x2xf32>
|
|
224 // CHECK-NEXT: %[[S20:.*]] = vector.constant_mask [2, 2] : vector<2x2xi1>
|
|
225 // CHECK-NEXT: %[[S18:.*]] = vector.extract_strided_slice %arg2 {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
|
|
226 // CHECK-NEXT: %[[R1S22:.*]] = vector.contract {indexing_maps = [#map2, #map3, #map0], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind<add>} %[[S16]], %[[S17]], %[[S18]], %[[S19]], %[[S20]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
173
|
227
|
223
|
228 // CHECK-NEXT: %[[VEC0:.*]] = vector.insert_strided_slice %[[R1S00]], %{{.*}} {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
|
|
229 // CHECK-NEXT: %[[VEC1:.*]] = vector.insert_strided_slice %[[R1S02]], %[[VEC0]] {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
|
|
230 // CHECK-NEXT: %[[VEC2:.*]] = vector.insert_strided_slice %[[R1S20]], %[[VEC1]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
|
|
231 // CHECK-NEXT: %[[VEC3:.*]] = vector.insert_strided_slice %[[R1S22]], %[[VEC2]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
|
|
232 // CHECK-NEXT: return %[[VEC3]] : vector<4x4xf32>
|
173
|
233
|
|
234 func @contraction4x4_ikj(%arg0 : vector<4x2xf32>, %arg1 : vector<2x4xf32>,
|
|
235 %arg2 : vector<4x4xf32>, %arg3 : index)
|
|
236 -> (vector<4x4xf32>) {
|
|
237 %lhsm = vector.constant_mask [4, 2] : vector<4x2xi1>
|
|
238 %rhsm = vector.constant_mask [2, 4] : vector<2x4xi1>
|
|
239 %0 = vector.contract #contraction_trait1 %arg0, %arg1, %arg2, %lhsm, %rhsm
|
|
240 : vector<4x2xf32>, vector<2x4xf32> into vector<4x4xf32>
|
|
241
|
|
242 return %0 : vector<4x4xf32>
|
|
243 }
|
|
244
|
|
245 // CHECK-LABEL: func @contraction4x4_ikj_xfer_read
|
|
246
|
221
|
247 // CHECK-DAG: %[[C2:.*]] = constant 2 : index
|
|
248 // CHECK-DAG: %[[C0:.*]] = constant 0 : index
|
173
|
249
|
|
250 // Check LHS vector.transfer read is split for each user.
|
|
251
|
|
252 // CHECK: %[[VTR0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<4x2xf32>, vector<2x2xf32>
|
|
253 // CHECK-NEXT: %[[VTR1:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C0]]], %{{.*}} : memref<4x2xf32>, vector<2x2xf32>
|
|
254
|
|
255 // CHECK-NEXT: %[[VTR2:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<2x4xf32>, vector<2x2xf32>
|
|
256 // CHECK-NEXT: %[[VTR3:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C2]]], %{{.*}} : memref<2x4xf32>, vector<2x2xf32>
|
|
257
|
|
258 // CHECK-NEXT: %[[VTR4:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
259 // CHECK-NEXT: %[[VTR5:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C2]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
260 // CHECK-NEXT: %[[VTR6:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C0]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
261 // CHECK-NEXT: %[[VTR7:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C2]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
262
|
221
|
263 // CHECK-NEXT: %[[R0:.*]] = vector.contract {indexing_maps = [#map2, #map3, #map0], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind<add>} %[[VTR0]], %[[VTR2]], %[[VTR4]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
|
264 // CHECK-NEXT: %[[R1:.*]] = vector.contract {indexing_maps = [#map2, #map3, #map0], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind<add>} %[[VTR0]], %[[VTR3]], %[[VTR5]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
|
265 // CHECK-NEXT: %[[R2:.*]] = vector.contract {indexing_maps = [#map2, #map3, #map0], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind<add>} %[[VTR1]], %[[VTR2]], %[[VTR6]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
|
266 // CHECK-NEXT: %[[R3:.*]] = vector.contract {indexing_maps = [#map2, #map3, #map0], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind<add>} %[[VTR1]], %[[VTR3]], %[[VTR7]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
173
|
267
|
221
|
268 // CHECK-NEXT: vector.transfer_write %[[R0]], %{{.*}}[%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<2x2xf32>, memref<4x4xf32>
|
|
269 // CHECK-NEXT: vector.transfer_write %[[R1]], %{{.*}}[%[[C0]], %[[C2]]] {in_bounds = [true, true]} : vector<2x2xf32>, memref<4x4xf32>
|
|
270 // CHECK-NEXT: vector.transfer_write %[[R2]], %{{.*}}[%[[C2]], %[[C0]]] {in_bounds = [true, true]} : vector<2x2xf32>, memref<4x4xf32>
|
|
271 // CHECK-NEXT: vector.transfer_write %[[R3]], %{{.*}}[%[[C2]], %[[C2]]] {in_bounds = [true, true]} : vector<2x2xf32>, memref<4x4xf32>
|
173
|
272 // CHECK-NEXT: return
|
|
273
|
|
274 func @contraction4x4_ikj_xfer_read(%arg0 : memref<4x2xf32>,
|
|
275 %arg1 : memref<2x4xf32>,
|
|
276 %arg2 : memref<4x4xf32>) {
|
|
277 %c0 = constant 0 : index
|
|
278 %cf0 = constant 0.0 : f32
|
|
279
|
|
280 %0 = vector.transfer_read %arg0[%c0, %c0], %cf0
|
|
281 { permutation_map = affine_map<(d0, d1) -> (d0, d1)> }
|
|
282 : memref<4x2xf32>, vector<4x2xf32>
|
|
283
|
|
284 %1 = vector.transfer_read %arg1[%c0, %c0], %cf0
|
|
285 { permutation_map = affine_map<(d0, d1) -> (d0, d1)> }
|
|
286 : memref<2x4xf32>, vector<2x4xf32>
|
|
287
|
|
288 %2 = vector.transfer_read %arg2[%c0, %c0], %cf0
|
|
289 { permutation_map = affine_map<(d0, d1) -> (d0, d1)> }
|
|
290 : memref<4x4xf32>, vector<4x4xf32>
|
|
291
|
|
292 %3 = vector.contract #contraction_trait1 %0, %1, %2
|
|
293 : vector<4x2xf32>, vector<2x4xf32> into vector<4x4xf32>
|
|
294
|
|
295 vector.transfer_write %3, %arg2[%c0, %c0]
|
|
296 {permutation_map = affine_map<(d0, d1) -> (d0, d1)>}
|
|
297 : vector<4x4xf32>, memref<4x4xf32>
|
|
298 return
|
|
299 }
|
|
300
|
221
|
301 // TODO: Update test with VTR split transform.
|
173
|
302 // CHECK-LABEL: func @vector_transfers
|
|
303 // CHECK-COUNT-8: vector.transfer_read
|
|
304 // CHECK-COUNT-4: addf
|
|
305 // CHECK-COUNT-4: vector.transfer_write
|
|
306
|
|
307 func @vector_transfers(%arg0: index, %arg1: index) {
|
|
308 %cst = constant 0.000000e+00 : f32
|
221
|
309 %0 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
|
|
310 %1 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
|
|
311 %2 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
|
173
|
312 %cst_0 = constant 1.000000e+00 : f32
|
|
313 %cst_1 = constant 2.000000e+00 : f32
|
|
314 affine.for %arg2 = 0 to %arg0 step 4 {
|
|
315 affine.for %arg3 = 0 to %arg1 step 4 {
|
221
|
316 %4 = vector.transfer_read %0[%arg2, %arg3], %cst {permutation_map = affine_map<(d0, d1) -> (d0, d1)>} : memref<?x?xf32>, vector<4x4xf32>
|
|
317 %5 = vector.transfer_read %1[%arg2, %arg3], %cst {permutation_map = affine_map<(d0, d1) -> (d0, d1)>} : memref<?x?xf32>, vector<4x4xf32>
|
173
|
318 %6 = addf %4, %5 : vector<4x4xf32>
|
|
319 vector.transfer_write %6, %2[%arg2, %arg3] {permutation_map = affine_map<(d0, d1) -> (d0, d1)>} : vector<4x4xf32>, memref<?x?xf32>
|
|
320 }
|
|
321 }
|
|
322 return
|
|
323 }
|
|
324
|
|
325 // CHECK-LABEL: func @cancelling_shape_cast_ops
|
|
326 // CHECK-SAME: %[[A0:.*0]]: vector<2x4xf32>
|
|
327 // CHECK: return %[[A0]] : vector<2x4xf32>
|
|
328 func @cancelling_shape_cast_ops(%arg0 : vector<2x4xf32>) -> vector<2x4xf32> {
|
|
329 %0 = vector.shape_cast %arg0 : vector<2x4xf32> to vector<8xf32>
|
|
330 %1 = vector.shape_cast %0 : vector<8xf32> to vector<2x4xf32>
|
|
331 return %1 : vector<2x4xf32>
|
|
332 }
|
|
333
|
221
|
334 // CHECK-LABEL: func @elementwise_unroll
|
|
335 // CHECK-SAME: (%[[ARG0:.*]]: memref<4x4xf32>, %[[ARG1:.*]]: memref<4x4xf32>)
|
|
336 // CHECK-DAG: %[[C2:.*]] = constant 2 : index
|
|
337 // CHECK-DAG: %[[C0:.*]] = constant 0 : index
|
|
338 // CHECK: %[[VT0:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
339 // CHECK: %[[VT1:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
340 // CHECK: %[[VT2:.*]] = vector.transfer_read %[[ARG0]][%[[C2]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
341 // CHECK: %[[VT3:.*]] = vector.transfer_read %[[ARG0]][%[[C2]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
342 // CHECK: %[[VT4:.*]] = vector.transfer_read %[[ARG1]][%[[C0]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
343 // CHECK: %[[VT5:.*]] = vector.transfer_read %[[ARG1]][%[[C0]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
344 // CHECK: %[[VT6:.*]] = vector.transfer_read %[[ARG1]][%[[C2]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
345 // CHECK: %[[VT7:.*]] = vector.transfer_read %[[ARG1]][%[[C2]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
346 // CHECK: %[[CMP0:.*]] = cmpf ult, %[[VT0]], %[[VT4]] : vector<2x2xf32>
|
|
347 // CHECK: %[[CMP1:.*]] = cmpf ult, %[[VT1]], %[[VT5]] : vector<2x2xf32>
|
|
348 // CHECK: %[[CMP2:.*]] = cmpf ult, %[[VT2]], %[[VT6]] : vector<2x2xf32>
|
|
349 // CHECK: %[[CMP3:.*]] = cmpf ult, %[[VT3]], %[[VT7]] : vector<2x2xf32>
|
|
350 // CHECK: %[[VT0:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
351 // CHECK: %[[VT1:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
352 // CHECK: %[[VT2:.*]] = vector.transfer_read %[[ARG0]][%[[C2]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
353 // CHECK: %[[VT3:.*]] = vector.transfer_read %[[ARG0]][%[[C2]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
354 // CHECK: %[[VT4:.*]] = vector.transfer_read %[[ARG1]][%[[C0]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
355 // CHECK: %[[VT5:.*]] = vector.transfer_read %[[ARG1]][%[[C0]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
356 // CHECK: %[[VT6:.*]] = vector.transfer_read %[[ARG1]][%[[C2]], %[[C0]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
357 // CHECK: %[[VT7:.*]] = vector.transfer_read %[[ARG1]][%[[C2]], %[[C2]]], {{.*}} : memref<4x4xf32>, vector<2x2xf32>
|
|
358 // CHECK: %[[SEL0:.*]] = select %[[CMP0]], %[[VT0]], %[[VT4]] : vector<2x2xi1>, vector<2x2xf32>
|
|
359 // CHECK: %[[SEL1:.*]] = select %[[CMP1]], %[[VT1]], %[[VT5]] : vector<2x2xi1>, vector<2x2xf32>
|
|
360 // CHECK: %[[SEL2:.*]] = select %[[CMP2]], %[[VT2]], %[[VT6]] : vector<2x2xi1>, vector<2x2xf32>
|
|
361 // CHECK: %[[SEL3:.*]] = select %[[CMP3]], %[[VT3]], %[[VT7]] : vector<2x2xi1>, vector<2x2xf32>
|
|
362 // CHECK: vector.transfer_write %[[SEL0]], %[[ARG0]][%[[C0]], %[[C0]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
|
|
363 // CHECK: vector.transfer_write %[[SEL1]], %[[ARG0]][%[[C0]], %[[C2]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
|
|
364 // CHECK: vector.transfer_write %[[SEL2]], %[[ARG0]][%[[C2]], %[[C0]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
|
|
365 // CHECK: vector.transfer_write %[[SEL3]], %[[ARG0]][%[[C2]], %[[C2]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
|
|
366 func @elementwise_unroll(%arg0 : memref<4x4xf32>, %arg1 : memref<4x4xf32>) {
|
|
367 %c0 = constant 0 : index
|
|
368 %cf0 = constant 0.0 : f32
|
|
369 %0 = vector.transfer_read %arg0[%c0, %c0], %cf0 : memref<4x4xf32>, vector<4x4xf32>
|
|
370 %1 = vector.transfer_read %arg1[%c0, %c0], %cf0 : memref<4x4xf32>, vector<4x4xf32>
|
|
371 %cond = cmpf ult, %0, %1 : vector<4x4xf32>
|
|
372 // Vector transfer split pattern only support single user right now.
|
|
373 %2 = vector.transfer_read %arg0[%c0, %c0], %cf0 : memref<4x4xf32>, vector<4x4xf32>
|
|
374 %3 = vector.transfer_read %arg1[%c0, %c0], %cf0 : memref<4x4xf32>, vector<4x4xf32>
|
|
375 %4 = select %cond, %2, %3 : vector<4x4xi1>, vector<4x4xf32>
|
|
376 vector.transfer_write %4, %arg0[%c0, %c0] : vector<4x4xf32>, memref<4x4xf32>
|
|
377 return
|
|
378 }
|
|
379
|
|
380 // Check that vector.transfer read/write are split based on contract unrolling.
|
|
381 // CHECK: %[[VTR0:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} : tensor<4x2xf32>, vector<2x2xf32>
|
|
382 // CHECK-NEXT: %[[VTR1:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C0]]], %{{.*}} : tensor<4x2xf32>, vector<2x2xf32>
|
|
383
|
|
384 // CHECK-NEXT: %[[VTR2:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} : tensor<2x4xf32>, vector<2x2xf32>
|
|
385 // CHECK-NEXT: %[[VTR3:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C2]]], %{{.*}} : tensor<2x4xf32>, vector<2x2xf32>
|
|
386
|
|
387 // CHECK-NEXT: %[[VTR4:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
|
|
388 // CHECK-NEXT: %[[VTR5:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C2]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
|
|
389 // CHECK-NEXT: %[[VTR6:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C0]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
|
|
390 // CHECK-NEXT: %[[VTR7:.*]] = vector.transfer_read %{{.*}}[%[[C2]], %[[C2]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
|
|
391
|
|
392 // CHECK-NEXT: %[[R0:.*]] = vector.contract {indexing_maps = [#map2, #map3, #map0], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind<add>} %[[VTR0]], %[[VTR2]], %[[VTR4]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
|
393 // CHECK-NEXT: %[[R1:.*]] = vector.contract {indexing_maps = [#map2, #map3, #map0], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind<add>} %[[VTR0]], %[[VTR3]], %[[VTR5]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
|
394 // CHECK-NEXT: %[[R2:.*]] = vector.contract {indexing_maps = [#map2, #map3, #map0], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind<add>} %[[VTR1]], %[[VTR2]], %[[VTR6]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
|
395 // CHECK-NEXT: %[[R3:.*]] = vector.contract {indexing_maps = [#map2, #map3, #map0], iterator_types = ["parallel", "reduction", "parallel"], kind = #vector.kind<add>} %[[VTR1]], %[[VTR3]], %[[VTR7]] : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32>
|
|
396
|
|
397 // CHECK-NEXT: %[[VTW0:.*]] = vector.transfer_write %[[R0]], %{{.*}}[%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<2x2xf32>, tensor<4x4xf32>
|
|
398 // CHECK-NEXT: %[[VTW1:.*]] = vector.transfer_write %[[R1]], %[[VTW0]][%[[C0]], %[[C2]]] {in_bounds = [true, true]} : vector<2x2xf32>, tensor<4x4xf32>
|
|
399 // CHECK-NEXT: %[[VTW2:.*]] = vector.transfer_write %[[R2]], %[[VTW1]][%[[C2]], %[[C0]]] {in_bounds = [true, true]} : vector<2x2xf32>, tensor<4x4xf32>
|
|
400 // CHECK-NEXT: %[[VTW3:.*]] = vector.transfer_write %[[R3]], %[[VTW2]][%[[C2]], %[[C2]]] {in_bounds = [true, true]} : vector<2x2xf32>, tensor<4x4xf32>
|
|
401 // CHECK-NEXT: return %[[VTW3]] : tensor<4x4xf32>
|
|
402
|
|
403 func @contraction4x4_ikj_xfer_read_tensor(%arg0 : tensor<4x2xf32>,
|
|
404 %arg1 : tensor<2x4xf32>,
|
|
405 %arg2 : tensor<4x4xf32>) ->
|
|
406 tensor<4x4xf32> {
|
|
407 %c0 = constant 0 : index
|
|
408 %cf0 = constant 0.0 : f32
|
|
409 %0 = vector.transfer_read %arg0[%c0, %c0], %cf0 :
|
|
410 tensor<4x2xf32>, vector<4x2xf32>
|
|
411 %1 = vector.transfer_read %arg1[%c0, %c0], %cf0 :
|
|
412 tensor<2x4xf32>, vector<2x4xf32>
|
|
413 %2 = vector.transfer_read %arg2[%c0, %c0], %cf0 :
|
|
414 tensor<4x4xf32>, vector<4x4xf32>
|
|
415 %3 = vector.contract #contraction_trait1 %0, %1, %2
|
|
416 : vector<4x2xf32>, vector<2x4xf32> into vector<4x4xf32>
|
|
417 %r = vector.transfer_write %3, %arg2[%c0, %c0]
|
|
418 : vector<4x4xf32>, tensor<4x4xf32>
|
|
419 return %r : tensor<4x4xf32>
|
|
420 }
|
|
421
|
|
422 // CHECK-LABEL: func @cast_away_extract_strided_slice_leading_one_dims
|
|
423 func @cast_away_extract_strided_slice_leading_one_dims(%arg0: vector<1x8x8xf16>) -> vector<1x1x8xf16> {
|
|
424 // CHECK: %[[SRC:.+]] = vector.shape_cast %{{.*}} : vector<1x8x8xf16> to vector<8x8xf16>
|
|
425 // CHECK: %[[EXTRACT:.+]] = vector.extract_strided_slice %[[SRC]] {offsets = [4], sizes = [1], strides = [1]} : vector<8x8xf16> to vector<1x8xf16>
|
|
426 %0 = vector.extract_strided_slice %arg0 {offsets = [0, 4], sizes = [1, 1], strides = [1, 1]} : vector<1x8x8xf16> to vector<1x1x8xf16>
|
|
427 // CHECK: %[[RET:.+]] = vector.shape_cast %[[EXTRACT]] : vector<1x8xf16> to vector<1x1x8xf16>
|
|
428 // CHECK: return %[[RET]]
|
|
429 return %0: vector<1x1x8xf16>
|
|
430 }
|
|
431
|
|
432 // CHECK-LABEL: func @cast_away_insert_strided_slice_leading_one_dims
|
|
433 func @cast_away_insert_strided_slice_leading_one_dims(%arg0: vector<1x8xf16>, %arg1: vector<1x8x8xf16>) -> vector<1x8x8xf16> {
|
|
434 // CHECK: %[[SRC:.+]] = vector.shape_cast %{{.*}} : vector<1x8xf16> to vector<8xf16>
|
|
435 // CHECK: %[[DST:.+]] = vector.shape_cast %{{.*}} : vector<1x8x8xf16> to vector<8x8xf16>
|
|
436 // CHECK: %[[INSERT:.+]] = vector.insert_strided_slice %[[SRC]], %[[DST]] {offsets = [0, 0], strides = [1]} : vector<8xf16> into vector<8x8xf16>
|
|
437 %0 = vector.insert_strided_slice %arg0, %arg1 {offsets = [0, 0, 0], strides = [1, 1]} : vector<1x8xf16> into vector<1x8x8xf16>
|
|
438 // CHECK: %[[RET:.+]] = vector.shape_cast %[[INSERT]] : vector<8x8xf16> to vector<1x8x8xf16>
|
|
439 // CHECK: return %[[RET]]
|
|
440 return %0: vector<1x8x8xf16>
|
|
441 }
|
|
442
|
|
443 // CHECK-LABEL: func @cast_away_insert_strided_slice_leading_one_dims_one_element
|
|
444 func @cast_away_insert_strided_slice_leading_one_dims_one_element(%arg0: vector<1x1xf16>, %arg1: vector<1x1x1xf16>) -> vector<1x1x1xf16> {
|
|
445 // CHECK: vector.shape_cast %{{.+}} : vector<1x1xf16> to vector<1xf16>
|
|
446 // CHECK: vector.shape_cast %{{.+}} : vector<1x1x1xf16> to vector<1xf16>
|
|
447 %0 = vector.insert_strided_slice %arg0, %arg1 {offsets = [0, 0, 0], strides = [1, 1]} : vector<1x1xf16> into vector<1x1x1xf16>
|
|
448 return %0: vector<1x1x1xf16>
|
|
449 }
|
|
450
|
|
451 // CHECK-LABEL: func @cast_away_transfer_read_leading_one_dims
|
|
452 func @cast_away_transfer_read_leading_one_dims(%arg0: memref<1x4x8x16xf16>) -> vector<1x4xf16> {
|
|
453 // CHECK: %[[C0:.+]] = constant 0 : index
|
|
454 %c0 = constant 0 : index
|
|
455 // CHECK: %[[F0:.+]] = constant 0.000000e+00 : f16
|
|
456 %f0 = constant 0. : f16
|
|
457 // CHECK: %[[READ:.+]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]], %[[C0]]], %[[F0]] {in_bounds = [true]} : memref<1x4x8x16xf16>, vector<4xf16>
|
|
458 // CHECK: %[[CAST:.+]] = vector.shape_cast %[[READ]] : vector<4xf16> to vector<1x4xf16>
|
|
459 %0 = vector.transfer_read %arg0[%c0, %c0, %c0, %c0], %f0 {in_bounds = [true, true]} : memref<1x4x8x16xf16>, vector<1x4xf16>
|
|
460 // CHECK: return %[[CAST]]
|
|
461 return %0: vector<1x4xf16>
|
|
462 }
|
|
463
|
|
464 // CHECK-LABEL: func @cast_away_transfer_read_leading_one_dims_one_element
|
|
465 func @cast_away_transfer_read_leading_one_dims_one_element(%arg0: memref<1x1x1x1xf16>) -> vector<1x1xf16> {
|
|
466 %c0 = constant 0 : index
|
|
467 %f0 = constant 0. : f16
|
|
468 // CHECK: vector.shape_cast %{{.+}} : vector<1xf16> to vector<1x1xf16>
|
|
469 %0 = vector.transfer_read %arg0[%c0, %c0, %c0, %c0], %f0 {in_bounds = [true, true]} : memref<1x1x1x1xf16>, vector<1x1xf16>
|
|
470 return %0: vector<1x1xf16>
|
|
471 }
|
|
472
|
|
473 // CHECK-LABEL: func @cast_away_transfer_write_leading_one_dims
|
|
474 func @cast_away_transfer_write_leading_one_dims(%arg0: memref<1x4x8x16xf16>, %arg1: vector<1x4xf16>) {
|
|
475 // CHECK: %[[C0:.+]] = constant 0 : index
|
|
476 %c0 = constant 0 : index
|
|
477 // CHECK: %[[CAST:.+]] = vector.shape_cast %{{.*}} : vector<1x4xf16> to vector<4xf16>
|
|
478 // CHECK: vector.transfer_write %[[CAST]], %{{.*}}[%[[C0]], %[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true]} : vector<4xf16>, memref<1x4x8x16xf16>
|
|
479
|
|
480 vector.transfer_write %arg1, %arg0[%c0, %c0, %c0, %c0] {in_bounds = [true, true]} : vector<1x4xf16>, memref<1x4x8x16xf16>
|
|
481 return
|
|
482 }
|
|
483
|
|
484 // CHECK-LABEL: func @cast_away_transfer_write_leading_one_dims_one_element
|
|
485 func @cast_away_transfer_write_leading_one_dims_one_element(%arg0: memref<1x1x1x1xf16>, %arg1: vector<1x1xf16>) {
|
|
486 %c0 = constant 0 : index
|
|
487 // CHECK: vector.shape_cast %{{.+}} : vector<1x1xf16> to vector<1xf16>
|
|
488 vector.transfer_write %arg1, %arg0[%c0, %c0, %c0, %c0] {in_bounds = [true, true]} : vector<1x1xf16>, memref<1x1x1x1xf16>
|
|
489 return
|
|
490 }
|
|
491
|
|
492 // CHECK-LABEL: func @cast_away_broadcast_leading_one_dims
|
|
493 func @cast_away_broadcast_leading_one_dims(
|
|
494 %arg0: vector<8xf32>, %arg1: f32, %arg2: vector<1x4xf32>) ->
|
|
495 (vector<1x1x8xf32>, vector<1x1x4xf32>, vector<1x3x4xf32>, vector<1x1x4xf32>) {
|
|
496 // CHECK: vector.broadcast %{{.*}} : vector<8xf32> to vector<8xf32>
|
|
497 // CHECK: vector.shape_cast %{{.*}} : vector<8xf32> to vector<1x1x8xf32>
|
|
498 %0 = vector.broadcast %arg0 : vector<8xf32> to vector<1x1x8xf32>
|
|
499 // CHECK: vector.broadcast %{{.*}} : f32 to vector<4xf32>
|
|
500 // CHECK: vector.shape_cast %{{.*}} : vector<4xf32> to vector<1x1x4xf32>
|
|
501 %1 = vector.broadcast %arg1 : f32 to vector<1x1x4xf32>
|
|
502 // CHECK: vector.shape_cast %{{.*}} : vector<1x4xf32> to vector<4xf32>
|
|
503 // CHECK: vector.broadcast %{{.*}} : vector<4xf32> to vector<3x4xf32>
|
|
504 // CHECK: vector.shape_cast %{{.*}} : vector<3x4xf32> to vector<1x3x4xf32>
|
|
505 %2 = vector.broadcast %arg2 : vector<1x4xf32> to vector<1x3x4xf32>
|
|
506 // CHECK: splat %{{.*}} : vector<4xf32>
|
|
507 // CHECK: vector.shape_cast %{{.*}} : vector<4xf32> to vector<1x1x4xf32>
|
|
508 %3 = splat %arg1 : vector<1x1x4xf32>
|
|
509 return %0, %1, %2, %3: vector<1x1x8xf32>, vector<1x1x4xf32>, vector<1x3x4xf32>, vector<1x1x4xf32>
|
|
510 }
|
|
511
|
|
512 // CHECK-LABEL: func @cast_away_elementwise_leading_one_dims
|
|
513 func @cast_away_elementwise_leading_one_dims(
|
|
514 %arg0: vector<1x1x8xf32>, %arg1: f32, %arg2: vector<1x4xf32>,
|
|
515 %arg3: vector<1x4xf32>, %arg4: i1) ->
|
|
516 (vector<1x1x8xf32>, vector<1x4xi1>, vector<1x4xf32>, vector<1x4xf32>) {
|
|
517 // CHECK: vector.shape_cast %{{.*}} : vector<1x1x8xf32> to vector<8xf32>
|
|
518 // CHECK: vector.shape_cast %{{.*}} : vector<1x1x8xf32> to vector<8xf32>
|
|
519 // CHECK: addf %{{.*}}, %{{.*}} : vector<8xf32>
|
|
520 // CHECK: vector.shape_cast %{{.*}} : vector<8xf32> to vector<1x1x8xf32>
|
|
521 %0 = addf %arg0, %arg0 : vector<1x1x8xf32>
|
|
522 // CHECK: vector.shape_cast %{{.*}} : vector<1x4xf32> to vector<4xf32>
|
|
523 // CHECK: vector.shape_cast %{{.*}} : vector<1x4xf32> to vector<4xf32>
|
|
524 // CHECK: cmpf ogt, %{{.*}}, %{{.*}} : vector<4xf32>
|
|
525 // CHECK: vector.shape_cast %{{.*}} : vector<4xi1> to vector<1x4xi1>
|
|
526 %1 = cmpf ogt, %arg2, %arg3 : vector<1x4xf32>
|
|
527 // CHECK: vector.shape_cast %{{.*}} : vector<1x4xf32> to vector<4xf32>
|
|
528 // CHECK: vector.shape_cast %{{.*}} : vector<1x4xf32> to vector<4xf32>
|
|
529 // CHECK: select %{{.*}}, %{{.*}}, %{{.*}} : vector<4xi1>, vector<4xf32>
|
|
530 // CHECK: vector.shape_cast %{{.*}} : vector<4xf32> to vector<1x4xf32>
|
|
531 %2 = select %1, %arg3, %arg2 : vector<1x4xi1>, vector<1x4xf32>
|
|
532 // CHECK: vector.shape_cast %{{.*}} : vector<1x4xf32> to vector<4xf32>
|
|
533 // CHECK: vector.shape_cast %{{.*}} : vector<1x4xf32> to vector<4xf32>
|
|
534 // CHECK: select %arg4, %12, %{{.*}} : vector<4xf32>
|
|
535 // CHECK: vector.shape_cast %{{.*}} : vector<4xf32> to vector<1x4xf32>
|
|
536 %3 = select %arg4, %arg3, %arg2 : vector<1x4xf32>
|
|
537 return %0, %1, %2, %3: vector<1x1x8xf32>, vector<1x4xi1>, vector<1x4xf32>, vector<1x4xf32>
|
|
538 }
|
|
539
|
|
540 // CHECK-LABEL: func @bubble_down_bitcast_in_extract
|
|
541 // CHECK-SAME: %[[SRC:.+]]: vector<4xf32>
|
|
542 func @bubble_down_bitcast_in_extract(%src: vector<4xf32>) -> (f16, f16) {
|
|
543 %0 = vector.bitcast %src : vector<4xf32> to vector<8xf16>
|
|
544 // CHECK: %[[EXTRACT1:.+]] = vector.extract %[[SRC]][1] : vector<4xf32>
|
|
545 // CHECK: %[[CAST1:.+]] = vector.bitcast %[[EXTRACT1]] : vector<1xf32> to vector<2xf16>
|
|
546 // CHECK: %[[EXTRACT2:.+]] = vector.extract %[[CAST1]][1] : vector<2xf16>
|
|
547 %1 = vector.extract %0[3] : vector<8xf16>
|
|
548 // CHECK: %[[EXTRACT3:.+]] = vector.extract %[[SRC]][2] : vector<4xf32>
|
|
549 // CHECK: %[[CAST2:.+]] = vector.bitcast %[[EXTRACT3]] : vector<1xf32> to vector<2xf16>
|
|
550 // CHECK: %[[EXTRACT4:.+]] = vector.extract %[[CAST2]][0] : vector<2xf16>
|
|
551 %2 = vector.extract %0[4] : vector<8xf16>
|
|
552 // CHECK: return %[[EXTRACT2]], %[[EXTRACT4]]
|
|
553 return %1, %2: f16, f16
|
|
554 }
|
|
555
|
|
556 // CHECK-LABEL: func @bubble_down_bitcast_in_strided_slice_extract
|
|
557 // CHECK-SAME: %[[SRC:.+]]: vector<4xf32>
|
|
558 func @bubble_down_bitcast_in_strided_slice_extract(%arg0: vector<4xf32>) -> vector<4xf16> {
|
|
559 // CHECK: %[[EXTRACT:.+]] = vector.extract_strided_slice %[[SRC]] {offsets = [2], sizes = [2], strides = [1]} : vector<4xf32> to vector<2xf32>
|
|
560 // CHECK: %[[CAST:.+]] = vector.bitcast %[[EXTRACT]] : vector<2xf32> to vector<4xf16>
|
|
561 %cast = vector.bitcast %arg0: vector<4xf32> to vector<8xf16>
|
|
562 %0 = vector.extract_strided_slice %cast {offsets = [4], sizes = [4], strides = [1]} : vector<8xf16> to vector<4xf16>
|
|
563 // CHECK: return %[[CAST]]
|
|
564 return %0: vector<4xf16>
|
|
565 }
|
|
566
|
|
567 // CHECK-LABEL: func @bubble_down_bitcast_in_strided_slice_extract_full_last_dim
|
|
568 // CHECK-SAME: %[[SRC:.+]]: vector<4x2xf32>
|
|
569 func @bubble_down_bitcast_in_strided_slice_extract_full_last_dim(%arg0: vector<4x2xf32>) -> vector<2x4xf16> {
|
|
570 // CHECK: %[[EXTRACT:.+]] = vector.extract_strided_slice %[[SRC]] {offsets = [1], sizes = [2], strides = [1]} : vector<4x2xf32> to vector<2x2xf32>
|
|
571 // CHECK: %[[CAST:.+]] = vector.bitcast %[[EXTRACT]] : vector<2x2xf32> to vector<2x4xf16>
|
|
572 %cast = vector.bitcast %arg0: vector<4x2xf32> to vector<4x4xf16>
|
|
573 %0 = vector.extract_strided_slice %cast {offsets = [1], sizes = [2], strides = [1]} : vector<4x4xf16> to vector<2x4xf16>
|
|
574 // CHECK: return %[[CAST]]
|
|
575 return %0: vector<2x4xf16>
|
|
576 }
|
|
577
|
|
578 // CHECK-LABEL: func @bubble_down_bitcast_in_strided_slice_extract_odd_offset
|
|
579 func @bubble_down_bitcast_in_strided_slice_extract_odd_offset(%arg0: vector<4xf32>) -> vector<4xf16> {
|
|
580 // CHECK: vector.bitcast
|
|
581 // CHECK-NEXT: vector.extract_strided_slice
|
|
582 %cast = vector.bitcast %arg0: vector<4xf32> to vector<8xf16>
|
|
583 %0 = vector.extract_strided_slice %cast {offsets = [3], sizes = [4], strides = [1]} : vector<8xf16> to vector<4xf16>
|
|
584 return %0: vector<4xf16>
|
|
585 }
|
|
586
|
|
587 // CHECK-LABEL: func @bubble_down_bitcast_in_strided_slice_extract_odd_size
|
|
588 func @bubble_down_bitcast_in_strided_slice_extract_odd_size(%arg0: vector<4xf32>) -> vector<3xf16> {
|
|
589 // CHECK: vector.bitcast
|
|
590 // CHECK-NEXT: vector.extract_strided_slice
|
|
591 %cast = vector.bitcast %arg0: vector<4xf32> to vector<8xf16>
|
|
592 %0 = vector.extract_strided_slice %cast {offsets = [0], sizes = [3], strides = [1]} : vector<8xf16> to vector<3xf16>
|
|
593 return %0: vector<3xf16>
|
|
594 }
|
|
595
|
|
596 // CHECK-LABEL: func @bubble_up_bitcast_in_strided_slice_insert
|
|
597 // CHECK-SAME: (%[[DST:.+]]: vector<8xf16>, %[[SRC1:.+]]: vector<4xf16>, %[[SRC2:.+]]: vector<4xf16>)
|
|
598 func @bubble_up_bitcast_in_strided_slice_insert(%dst: vector<8xf16>, %src1: vector<4xf16>, %src2: vector<4xf16>) -> vector<4xf32> {
|
|
599 // CHECK-DAG: %[[CAST_SRC1:.+]] = vector.bitcast %[[SRC1]] : vector<4xf16> to vector<2xf32>
|
|
600 // CHECK-DAG: %[[CAST_SRC2:.+]] = vector.bitcast %[[SRC2]] : vector<4xf16> to vector<2xf32>
|
|
601 // CHECK-DAG: %[[CAST_DST:.+]] = vector.bitcast %[[DST]] : vector<8xf16> to vector<4xf32>
|
|
602 // CHECK: %[[INSERT1:.+]] = vector.insert_strided_slice %[[CAST_SRC1]], %[[CAST_DST]] {offsets = [0], strides = [1]} : vector<2xf32> into vector<4xf32>
|
|
603 // CHECK: %[[INSERT2:.+]] = vector.insert_strided_slice %[[CAST_SRC2]], %[[INSERT1]] {offsets = [2], strides = [1]} : vector<2xf32> into vector<4xf32>
|
|
604 %0 = vector.insert_strided_slice %src1, %dst {offsets = [0], strides = [1]} : vector<4xf16> into vector<8xf16>
|
|
605 %1 = vector.insert_strided_slice %src2, %0 {offsets = [4], strides = [1]} : vector<4xf16> into vector<8xf16>
|
|
606 %cast = vector.bitcast %1: vector<8xf16> to vector<4xf32>
|
|
607 // CHECK: return %[[INSERT2]]
|
|
608 return %cast: vector<4xf32>
|
|
609 }
|
|
610
|
|
611 // CHECK-LABEL: func @bubble_up_bitcast_in_strided_slice_insert_odd_offset
|
|
612 func @bubble_up_bitcast_in_strided_slice_insert_odd_offset(%dst: vector<8xf16>, %src: vector<4xf16>) -> vector<4xf32> {
|
|
613 // CHECK: vector.insert_strided_slice
|
|
614 // CHECK-NEXT: vector.bitcast
|
|
615 %0 = vector.insert_strided_slice %src, %dst {offsets = [3], strides = [1]} : vector<4xf16> into vector<8xf16>
|
|
616 %cast = vector.bitcast %0: vector<8xf16> to vector<4xf32>
|
|
617 return %cast: vector<4xf32>
|
|
618 }
|
|
619
|
|
620 // CHECK-LABEL: func @bubble_up_bitcast_in_strided_slice_insert_different_rank
|
|
621 func @bubble_up_bitcast_in_strided_slice_insert_different_rank(%dst: vector<16x4x8xf16>, %src: vector<2x4xf16>) -> vector<16x4x4xf32> {
|
|
622 // CHECK: vector.insert_strided_slice
|
|
623 // CHECK-NEXT: vector.bitcast
|
|
624 %0 = vector.insert_strided_slice %src, %dst {offsets = [0, 0, 2], strides = [1, 1]} : vector<2x4xf16> into vector<16x4x8xf16>
|
|
625 %cast = vector.bitcast %0: vector<16x4x8xf16> to vector<16x4x4xf32>
|
|
626 return %cast: vector<16x4x4xf32>
|
|
627 }
|