83
|
1 //===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---*- tablegen -*-===//
|
|
2 //
|
|
3 // The LLVM Compiler Infrastructure
|
|
4 //
|
|
5 // This file is distributed under the University of Illinois Open Source
|
|
6 // License. See LICENSE.TXT for details.
|
|
7 //
|
|
8 //===----------------------------------------------------------------------===//
|
|
9 //
|
|
10 // This file defines the itinerary class data for the POWER8 processor.
|
|
11 //
|
|
12 //===----------------------------------------------------------------------===//
|
|
13
|
|
14 // Scheduling for the P8 involves tracking two types of resources:
|
|
15 // 1. The dispatch bundle slots
|
|
16 // 2. The functional unit resources
|
|
17
|
|
18 // Dispatch units:
|
|
19 def P8_DU1 : FuncUnit;
|
|
20 def P8_DU2 : FuncUnit;
|
|
21 def P8_DU3 : FuncUnit;
|
|
22 def P8_DU4 : FuncUnit;
|
|
23 def P8_DU5 : FuncUnit;
|
|
24 def P8_DU6 : FuncUnit;
|
|
25 def P8_DU7 : FuncUnit; // Only branch instructions will use DU7,DU8
|
|
26 def P8_DU8 : FuncUnit;
|
|
27
|
|
28 // 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).
|
|
29
|
|
30 def P8_LU1 : FuncUnit; // Loads or fixed-point operations 1
|
|
31 def P8_LU2 : FuncUnit; // Loads or fixed-point operations 2
|
|
32
|
|
33 // Load/Store pipelines can handle Stores, fixed-point loads, and simple
|
|
34 // fixed-point operations.
|
|
35 def P8_LSU1 : FuncUnit; // Load/Store pipeline 1
|
|
36 def P8_LSU2 : FuncUnit; // Load/Store pipeline 2
|
|
37
|
|
38 // Fixed Point unit
|
|
39 def P8_FXU1 : FuncUnit; // FX pipeline 1
|
|
40 def P8_FXU2 : FuncUnit; // FX pipeline 2
|
|
41
|
|
42 // The Floating-Point Unit (FPU) and Vector Media Extension (VMX) units
|
|
43 // are combined on P7 and newer into a Vector Scalar Unit (VSU).
|
|
44 // The P8 Instruction latency documents still refers to the unit as the
|
|
45 // FPU, so keep in mind that FPU==VSU.
|
|
46 // In contrast to the P7, the VMX units on P8 are symmetric, so no need to
|
|
47 // split vector integer ops or 128-bit load/store/perms to the specific units.
|
|
48 def P8_FPU1 : FuncUnit; // VS pipeline 1
|
|
49 def P8_FPU2 : FuncUnit; // VS pipeline 2
|
|
50
|
|
51 def P8_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs)
|
|
52 def P8_BRU : FuncUnit; // BR unit
|
|
53
|
|
54 def P8Itineraries : ProcessorItineraries<
|
|
55 [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6, P8_DU7, P8_DU8,
|
|
56 P8_LU1, P8_LU2, P8_LSU1, P8_LSU2, P8_FXU1, P8_FXU2,
|
|
57 P8_FPU1, P8_FPU2, P8_CRU, P8_BRU], [], [
|
|
58 InstrItinData<IIC_IntSimple , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
59 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
60 InstrStage<1, [P8_FXU1, P8_FXU2,
|
|
61 P8_LU1, P8_LU2,
|
|
62 P8_LSU1, P8_LSU2]>],
|
|
63 [1, 1, 1]>,
|
|
64 InstrItinData<IIC_IntGeneral , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
65 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
66 InstrStage<1, [P8_FXU1, P8_FXU2, P8_LU1,
|
|
67 P8_LU2, P8_LSU1, P8_LSU2]>],
|
|
68 [1, 1, 1]>,
|
|
69 InstrItinData<IIC_IntISEL, [InstrStage<1, [P8_DU1], 0>,
|
|
70 InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
|
|
71 InstrStage<1, [P8_BRU]>],
|
|
72 [1, 1, 1, 1]>,
|
|
73 InstrItinData<IIC_IntCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
74 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
75 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
76 [1, 1, 1]>,
|
|
77 InstrItinData<IIC_IntDivW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
78 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
79 InstrStage<15, [P8_FXU1, P8_FXU2]>],
|
|
80 [15, 1, 1]>,
|
|
81 InstrItinData<IIC_IntDivD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
82 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
83 InstrStage<23, [P8_FXU1, P8_FXU2]>],
|
|
84 [23, 1, 1]>,
|
|
85 InstrItinData<IIC_IntMulHW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
86 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
87 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
88 [4, 1, 1]>,
|
|
89 InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
90 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
91 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
92 [4, 1, 1]>,
|
|
93 InstrItinData<IIC_IntMulLI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
94 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
95 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
96 [4, 1, 1]>,
|
|
97 InstrItinData<IIC_IntRotate , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
98 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
99 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
100 [1, 1, 1]>,
|
|
101 InstrItinData<IIC_IntRotateD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
102 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
103 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
104 [1, 1, 1]>,
|
|
105 InstrItinData<IIC_IntShift , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
106 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
107 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
108 [1, 1, 1]>,
|
|
109 InstrItinData<IIC_IntTrapW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
110 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
111 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
112 [1, 1]>,
|
|
113 InstrItinData<IIC_IntTrapD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
114 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
115 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
116 [1, 1]>,
|
|
117 InstrItinData<IIC_BrB , [InstrStage<1, [P8_DU7, P8_DU8], 0>,
|
|
118 InstrStage<1, [P8_BRU]>],
|
|
119 [3, 1, 1]>,
|
|
120 // FIXME - the Br* groups below are not branch related, so should probably
|
|
121 // be renamed.
|
|
122 // IIC_BrCR consists of the cr* instructions. (crand,crnor,creqv, etc).
|
|
123 // and should be 'First' in dispatch.
|
|
124 InstrItinData<IIC_BrCR , [InstrStage<1, [P8_DU1], 0>,
|
|
125 InstrStage<1, [P8_CRU]>],
|
|
126 [3, 1, 1]>,
|
|
127 // IIC_BrMCR consists of the mcrf instruction.
|
|
128 InstrItinData<IIC_BrMCR , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
129 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
130 InstrStage<1, [P8_CRU]>],
|
|
131 [3, 1, 1]>,
|
|
132 // IIC_BrMCRX consists of mcrxr (obsolete instruction) and mtcrf, which
|
|
133 // should be first in the dispatch group.
|
|
134 InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>,
|
|
135 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
136 [3, 1, 1]>,
|
|
137 InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>,
|
|
138 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
139 [3, 1]>,
|
|
140 InstrItinData<IIC_LdStLoad , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
141 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
142 InstrStage<1, [P8_LSU1, P8_LSU2,
|
|
143 P8_LU1, P8_LU2]>],
|
|
144 [2, 1, 1]>,
|
|
145 InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P8_DU1], 0>,
|
|
146 InstrStage<1, [P8_DU2], 0>,
|
|
147 InstrStage<1, [P8_LSU1, P8_LSU2,
|
|
148 P8_LU1, P8_LU2 ], 0>,
|
|
149 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
150 [2, 2, 1, 1]>,
|
|
151 // Update-Indexed form loads/stores are no longer first and last in the
|
|
152 // dispatch group. They are simply cracked, so require DU1,DU2.
|
|
153 InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P8_DU1], 0>,
|
|
154 InstrStage<1, [P8_DU2], 0>,
|
|
155 InstrStage<1, [P8_LSU1, P8_LSU2,
|
|
156 P8_LU1, P8_LU2], 0>,
|
|
157 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
158 [3, 3, 1, 1]>,
|
|
159 InstrItinData<IIC_LdStLD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
160 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
161 InstrStage<1, [P8_LSU1, P8_LSU2,
|
|
162 P8_LU1, P8_LU2]>],
|
|
163 [2, 1, 1]>,
|
|
164 InstrItinData<IIC_LdStLDU , [InstrStage<1, [P8_DU1], 0>,
|
|
165 InstrStage<1, [P8_DU2], 0>,
|
|
166 InstrStage<1, [P8_LSU1, P8_LSU2,
|
|
167 P8_LU1, P8_LU2], 0>,
|
|
168 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
169 [2, 2, 1, 1]>,
|
|
170 InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P8_DU1], 0>,
|
|
171 InstrStage<1, [P8_DU2], 0>,
|
|
172 InstrStage<1, [P8_LSU1, P8_LSU2,
|
|
173 P8_LU1, P8_LU2], 0>,
|
|
174 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
175 [3, 3, 1, 1]>,
|
|
176 InstrItinData<IIC_LdStLFD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
177 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
178 InstrStage<1, [P8_LU1, P8_LU2]>],
|
|
179 [3, 1, 1]>,
|
|
180 InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
181 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
182 InstrStage<1, [P8_LU1, P8_LU2]>],
|
|
183 [3, 1, 1]>,
|
|
184 InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P8_DU1], 0>,
|
|
185 InstrStage<1, [P8_DU2], 0>,
|
|
186 InstrStage<1, [P8_LU1, P8_LU2], 0>,
|
|
187 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
188 [3, 3, 1, 1]>,
|
|
189 InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P8_DU1], 0>,
|
|
190 InstrStage<1, [P8_DU2], 0>,
|
|
191 InstrStage<1, [P8_LU1, P8_LU2], 0>,
|
|
192 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
193 [3, 3, 1, 1]>,
|
|
194 InstrItinData<IIC_LdStLHA , [InstrStage<1, [P8_DU1], 0>,
|
|
195 InstrStage<1, [P8_DU2], 0>,
|
|
196 InstrStage<1, [P8_LSU1, P8_LSU2,
|
|
197 P8_LU1, P8_LU2], 0>,
|
|
198 InstrStage<1, [P8_FXU1, P8_FXU2,
|
|
199 P8_LU1, P8_LU2]>],
|
|
200 [3, 1, 1]>,
|
|
201 InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P8_DU1], 0>,
|
|
202 InstrStage<1, [P8_DU2], 0>,
|
|
203 InstrStage<1, [P8_LSU1, P8_LSU2,
|
|
204 P8_LU1, P8_LU2], 0>,
|
|
205 InstrStage<1, [P8_FXU1, P8_FXU2]>,
|
|
206 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
207 [4, 4, 1, 1]>,
|
|
208 // first+last in dispatch group.
|
|
209 InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P8_DU1], 0>,
|
|
210 InstrStage<1, [P8_DU2], 0>,
|
|
211 InstrStage<1, [P8_DU3], 0>,
|
|
212 InstrStage<1, [P8_DU4], 0>,
|
|
213 InstrStage<1, [P8_DU5], 0>,
|
|
214 InstrStage<1, [P8_DU6], 0>,
|
|
215 InstrStage<1, [P8_LSU1, P8_LSU2,
|
|
216 P8_LU1, P8_LU2], 0>,
|
|
217 InstrStage<1, [P8_FXU1, P8_FXU2]>,
|
|
218 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
219 [4, 4, 1, 1]>,
|
|
220 InstrItinData<IIC_LdStLWA , [InstrStage<1, [P8_DU1], 0>,
|
|
221 InstrStage<1, [P8_DU2], 0>,
|
|
222 InstrStage<1, [P8_LSU1, P8_LSU2,
|
|
223 P8_LU1, P8_LU2]>,
|
|
224 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
225 [3, 1, 1]>,
|
|
226 InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P8_DU1], 0>,
|
|
227 InstrStage<1, [P8_DU2], 0>,
|
|
228 InstrStage<1, [P8_DU3], 0>,
|
|
229 InstrStage<1, [P8_DU4], 0>,
|
|
230 InstrStage<1, [P8_LSU1, P8_LSU2,
|
|
231 P8_LU1, P8_LU2]>],
|
|
232 [3, 1, 1]>,
|
|
233 // first+last
|
|
234 InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P8_DU1], 0>,
|
|
235 InstrStage<1, [P8_DU2], 0>,
|
|
236 InstrStage<1, [P8_DU3], 0>,
|
|
237 InstrStage<1, [P8_DU4], 0>,
|
|
238 InstrStage<1, [P8_DU5], 0>,
|
|
239 InstrStage<1, [P8_DU6], 0>,
|
|
240 InstrStage<1, [P8_LSU1, P8_LSU2,
|
|
241 P8_LU1, P8_LU2]>],
|
|
242 [3, 1, 1]>,
|
|
243 InstrItinData<IIC_LdStLMW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
244 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
245 InstrStage<1, [P8_LSU1, P8_LSU2,
|
|
246 P8_LU1, P8_LU2]>],
|
|
247 [2, 1, 1]>,
|
|
248 // Stores are dual-issued from the issue queue, so may only take up one
|
|
249 // dispatch slot. The instruction will be broken into two IOPS. The agen
|
|
250 // op is issued to the LSU, and the data op (register fetch) is issued
|
|
251 // to either the LU (GPR store) or the VSU (FPR store).
|
|
252 InstrItinData<IIC_LdStStore , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
253 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
254 InstrStage<1, [P8_LSU1, P8_LSU2]>,
|
|
255 InstrStage<1, [P8_LU1, P8_LU2]>],
|
|
256 [1, 1, 1]>,
|
|
257 InstrItinData<IIC_LdStSTD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
258 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
259 InstrStage<1, [P8_LU1, P8_LU2,
|
|
260 P8_LSU1, P8_LSU2]>]
|
|
261 [1, 1, 1]>,
|
|
262 InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P8_DU1], 0>,
|
|
263 InstrStage<1, [P8_DU2], 0>,
|
|
264 InstrStage<1, [P8_LU1, P8_LU2,
|
|
265 P8_LSU1, P8_LSU2], 0>,
|
|
266 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
267 [2, 1, 1, 1]>,
|
|
268 // First+last
|
|
269 InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P8_DU1], 0>,
|
|
270 InstrStage<1, [P8_DU2], 0>,
|
|
271 InstrStage<1, [P8_DU3], 0>,
|
|
272 InstrStage<1, [P8_DU4], 0>,
|
|
273 InstrStage<1, [P8_DU5], 0>,
|
|
274 InstrStage<1, [P8_DU6], 0>,
|
|
275 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
|
|
276 InstrStage<1, [P8_FXU1, P8_FXU2]>,
|
|
277 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
278 [2, 1, 1, 1]>,
|
|
279 InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
280 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
281 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
|
|
282 InstrStage<1, [P8_FPU1, P8_FPU2]>],
|
|
283 [1, 1, 1]>,
|
|
284 InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P8_DU1], 0>,
|
|
285 InstrStage<1, [P8_DU2], 0>,
|
|
286 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
|
|
287 InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
|
|
288 InstrStage<1, [P8_FPU1, P8_FPU2]>],
|
|
289 [2, 1, 1, 1]>,
|
|
290 InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
291 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
292 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
|
|
293 InstrStage<1, [P8_FPU1, P8_FPU2]>],
|
|
294 [1, 1, 1]>,
|
|
295 InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P8_DU1], 0>,
|
|
296 InstrStage<1, [P8_DU2], 0>,
|
|
297 InstrStage<1, [P8_DU3], 0>,
|
|
298 InstrStage<1, [P8_DU4], 0>,
|
|
299 InstrStage<1, [P8_DU5], 0>,
|
|
300 InstrStage<1, [P8_DU6], 0>,
|
|
301 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
|
|
302 InstrStage<1, [P8_LU1, P8_LU2]>],
|
|
303 [1, 1, 1]>,
|
|
304 InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P8_DU1], 0>,
|
|
305 InstrStage<1, [P8_DU2], 0>,
|
|
306 InstrStage<1, [P8_DU3], 0>,
|
|
307 InstrStage<1, [P8_DU4], 0>,
|
|
308 InstrStage<1, [P8_DU5], 0>,
|
|
309 InstrStage<1, [P8_DU6], 0>,
|
|
310 InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
|
|
311 InstrStage<1, [P8_LU1, P8_LU2]>],
|
|
312 [1, 1, 1]>,
|
|
313 InstrItinData<IIC_SprMFCR , [InstrStage<1, [P8_DU1], 0>,
|
|
314 InstrStage<1, [P8_CRU]>],
|
|
315 [6, 1]>,
|
|
316 InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P8_DU1], 0>,
|
|
317 InstrStage<1, [P8_CRU]>],
|
|
318 [3, 1]>,
|
|
319 InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P8_DU1], 0>,
|
|
320 InstrStage<1, [P8_FXU1, P8_FXU2]>],
|
|
321 [4, 1]>, // mtctr
|
|
322 InstrItinData<IIC_FPGeneral , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
323 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
324 InstrStage<1, [P8_FPU1, P8_FPU2]>],
|
|
325 [5, 1, 1]>,
|
95
|
326 InstrItinData<IIC_FPAddSub , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
327 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
328 InstrStage<1, [P8_FPU1, P8_FPU2]>],
|
|
329 [5, 1, 1]>,
|
83
|
330 InstrItinData<IIC_FPCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
331 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
332 InstrStage<1, [P8_FPU1, P8_FPU2]>],
|
|
333 [8, 1, 1]>,
|
|
334 InstrItinData<IIC_FPDivD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
335 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
336 InstrStage<1, [P8_FPU1, P8_FPU2]>],
|
|
337 [33, 1, 1]>,
|
|
338 InstrItinData<IIC_FPDivS , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
339 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
340 InstrStage<1, [P8_FPU1, P8_FPU2]>],
|
|
341 [27, 1, 1]>,
|
|
342 InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
343 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
344 InstrStage<1, [P8_FPU1, P8_FPU2]>],
|
|
345 [44, 1, 1]>,
|
|
346 InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
347 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
348 InstrStage<1, [P8_FPU1, P8_FPU2]>],
|
|
349 [32, 1, 1]>,
|
|
350 InstrItinData<IIC_FPFused , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
351 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
352 InstrStage<1, [P8_FPU1, P8_FPU2]>],
|
|
353 [5, 1, 1, 1]>,
|
|
354 InstrItinData<IIC_FPRes , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
|
|
355 P8_DU4, P8_DU5, P8_DU6], 0>,
|
|
356 InstrStage<1, [P8_FPU1, P8_FPU2]>],
|
|
357 [5, 1, 1]>,
|
|
358 InstrItinData<IIC_VecGeneral , [InstrStage<1, [P8_DU1], 0>,
|
|
359 InstrStage<1, [P8_FPU1, P8_FPU2]>],
|
|
360 [2, 1, 1]>,
|
|
361 InstrItinData<IIC_VecVSL , [InstrStage<1, [P8_DU1], 0>,
|
|
362 InstrStage<1, [P8_FPU1, P8_FPU2]>],
|
|
363 [2, 1, 1]>,
|
|
364 InstrItinData<IIC_VecVSR , [InstrStage<1, [P8_DU1], 0>,
|
|
365 InstrStage<1, [P8_FPU1, P8_FPU2]>],
|
|
366 [2, 1, 1]>,
|
|
367 InstrItinData<IIC_VecFP , [InstrStage<1, [P8_DU1], 0>,
|
|
368 InstrStage<1, [P8_FPU1, P8_FPU2]>],
|
|
369 [6, 1, 1]>,
|
|
370 InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P8_DU1], 0>,
|
|
371 InstrStage<1, [P8_FPU1, P8_FPU2]>],
|
|
372 [6, 1, 1]>,
|
|
373 InstrItinData<IIC_VecFPRound , [InstrStage<1, [P8_DU1], 0>,
|
|
374 InstrStage<1, [P8_FPU1, P8_FPU2]>],
|
|
375 [6, 1, 1]>,
|
|
376 InstrItinData<IIC_VecComplex , [InstrStage<1, [P8_DU1], 0>,
|
|
377 InstrStage<1, [P8_FPU1, P8_FPU2]>],
|
|
378 [7, 1, 1]>,
|
|
379 InstrItinData<IIC_VecPerm , [InstrStage<1, [P8_DU1, P8_DU2], 0>,
|
|
380 InstrStage<1, [P8_FPU2, P8_FPU2]>],
|
|
381 [3, 1, 1]>
|
|
382 ]>;
|
|
383
|
|
384 // ===---------------------------------------------------------------------===//
|
|
385 // P8 machine model for scheduling and other instruction cost heuristics.
|
|
386 // P8 has an 8 insn dispatch group (6 non-branch, 2 branch) and can issue up
|
|
387 // to 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).
|
|
388
|
|
389 def P8Model : SchedMachineModel {
|
|
390 let IssueWidth = 8; // up to 8 instructions dispatched per cycle.
|
|
391 // up to six non-branch instructions.
|
|
392 // up to two branches in a dispatch group.
|
|
393
|
|
394 let MinLatency = 0; // Out-of-order dispatch.
|
|
395 let LoadLatency = 3; // Optimistic load latency assuming bypass.
|
|
396 // This is overriden by OperandCycles if the
|
|
397 // Itineraries are queried instead.
|
|
398 let MispredictPenalty = 16;
|
|
399
|
|
400 // Try to make sure we have at least 10 dispatch groups in a loop.
|
|
401 let LoopMicroOpBufferSize = 60;
|
|
402
|
|
403 let Itineraries = P8Itineraries;
|
|
404 }
|
|
405
|