comparison lib/Target/ARM/ARMScheduleA57.td @ 148:63bd29f05246

merged
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Wed, 14 Aug 2019 19:46:37 +0900
parents c2174574ed3a
children
comparison
equal deleted inserted replaced
146:3fc4d5c3e21e 148:63bd29f05246
1 //=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=// 1 //=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=//
2 // 2 //
3 // The LLVM Compiler Infrastructure 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // 4 // See https://llvm.org/LICENSE.txt for license information.
5 // This file is distributed under the University of Illinois Open Source 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 // License. See LICENSE.TXT for details.
7 // 6 //
8 //===----------------------------------------------------------------------===// 7 //===----------------------------------------------------------------------===//
9 // 8 //
10 // This file defines the machine model for ARM Cortex-A57 to support 9 // This file defines the machine model for ARM Cortex-A57 to support
11 // instruction scheduling and other instruction cost heuristics. 10 // instruction scheduling and other instruction cost heuristics.
90 let MispredictPenalty = 16; // Fetch + Decode/Rename/Dispatch + Branch 89 let MispredictPenalty = 16; // Fetch + Decode/Rename/Dispatch + Branch
91 90
92 // Enable partial & runtime unrolling. 91 // Enable partial & runtime unrolling.
93 let LoopMicroOpBufferSize = 16; 92 let LoopMicroOpBufferSize = 16;
94 let CompleteModel = 1; 93 let CompleteModel = 1;
94
95 // FIXME: Remove when all errors have been fixed.
96 let FullInstRWOverlapCheck = 0;
97
98 let UnsupportedFeatures = [HasV8_1MMainline, HasMVEInt, HasMVEFloat,
99 HasFPRegsV8_1M];
95 } 100 }
96 101
97 //===----------------------------------------------------------------------===// 102 //===----------------------------------------------------------------------===//
98 // Define each kind of processor resource and number available on Cortex-A57. 103 // Define each kind of processor resource and number available on Cortex-A57.
99 // Cortex A-57 has 8 pipelines that each has its own 8-entry queue where 104 // Cortex A-57 has 8 pipelines that each has its own 8-entry queue where
123 def : InstRW<[WriteNoop], (instregex "(t)?BKPT$", "(t2)?CDP(2)?$", 128 def : InstRW<[WriteNoop], (instregex "(t)?BKPT$", "(t2)?CDP(2)?$",
124 "(t2)?CLREX$", "CONSTPOOL_ENTRY$", "COPY_STRUCT_BYVAL_I32$", 129 "(t2)?CLREX$", "CONSTPOOL_ENTRY$", "COPY_STRUCT_BYVAL_I32$",
125 "(t2)?CPS[123]p$", "(t2)?DBG$", "(t2)?DMB$", "(t2)?DSB$", "ERET$", 130 "(t2)?CPS[123]p$", "(t2)?DBG$", "(t2)?DMB$", "(t2)?DSB$", "ERET$",
126 "(t2|t)?HINT$", "(t)?HLT$", "(t2)?HVC$", "(t2)?ISB$", "ITasm$", 131 "(t2|t)?HINT$", "(t)?HLT$", "(t2)?HVC$", "(t2)?ISB$", "ITasm$",
127 "(t2)?RFE(DA|DB|IA|IB)", "(t)?SETEND", "(t2)?SETPAN", "(t2)?SMC", "SPACE", 132 "(t2)?RFE(DA|DB|IA|IB)", "(t)?SETEND", "(t2)?SETPAN", "(t2)?SMC", "SPACE",
128 "(t2)?SRS(DA|DB|IA|IB)", "SWP(B)?", "t?TRAP", "UDF$", "t2DCPS", "t2SG", 133 "(t2)?SRS(DA|DB|IA|IB)", "SWP(B)?", "t?TRAP", "(t2|t)?UDF$", "t2DCPS", "t2SG",
129 "t2TT", "tCPS", "CMP_SWAP", "t?SVC", "t2IT", "CompilerBarrier")>; 134 "t2TT", "tCPS", "CMP_SWAP", "t?SVC", "t2IT", "CompilerBarrier",
135 "t__brkdiv0")>;
130 136
131 def : InstRW<[WriteNoop], (instregex "VMRS", "VMSR", "FMSTAT")>; 137 def : InstRW<[WriteNoop], (instregex "VMRS", "VMSR", "FMSTAT")>;
132 138
133 // Specific memory instrs 139 // Specific memory instrs
134 def : InstRW<[WriteNoop, WriteNoop], (instregex "(t2)?LDA", "(t2)?LDC", "(t2)?STC", 140 def : InstRW<[WriteNoop, WriteNoop], (instregex "(t2)?LDA", "(t2)?LDC", "(t2)?STC",
144 def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>; 150 def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>;
145 151
146 // Pseudos 152 // Pseudos
147 def : InstRW<[WriteNoop], (instregex "(t2)?ABS$", 153 def : InstRW<[WriteNoop], (instregex "(t2)?ABS$",
148 "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj", 154 "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj",
149 "tLDRpci_pic", "t2SUBS_PC_LR", 155 "tLDRpci_pic", "(t2)?SUBS_PC_LR",
150 "JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp", 156 "JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp",
151 "VLD(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", 157 "VLD(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm",
152 "VLD(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", 158 "VLD(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm",
153 "VST(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", 159 "VST(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm",
154 "VST(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", 160 "VST(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm",
276 // from similar μops, allowing a typical sequence of multiply-accumulate μops 282 // from similar μops, allowing a typical sequence of multiply-accumulate μops
277 // to issue one every 1 cycle (sched advance = 2). 283 // to issue one every 1 cycle (sched advance = 2).
278 def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; } 284 def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
279 def A57WriteMLAL : SchedWriteRes<[A57UnitM]> { let Latency = 4; } 285 def A57WriteMLAL : SchedWriteRes<[A57UnitM]> { let Latency = 4; }
280 def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>; 286 def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>;
287
288 def : InstRW<[A57WriteMLA],
289 (instregex "t2SMLAD", "t2SMLADX", "t2SMLSD", "t2SMLSDX")>;
281 290
282 def : SchedAlias<WriteMAC16, A57WriteMLA>; 291 def : SchedAlias<WriteMAC16, A57WriteMLA>;
283 def : SchedAlias<WriteMAC32, A57WriteMLA>; 292 def : SchedAlias<WriteMAC32, A57WriteMLA>;
284 def : SchedAlias<ReadMAC, A57ReadMLA>; 293 def : SchedAlias<ReadMAC, A57ReadMLA>;
285 294
585 594
586 // TODO: no writeback latency defined in documentation (implemented as 1 cyc) 595 // TODO: no writeback latency defined in documentation (implemented as 1 cyc)
587 def : InstRW<[A57WriteLDM_Upd], 596 def : InstRW<[A57WriteLDM_Upd],
588 (instregex "(t|t2|sys)?LDM(IA_UPD|DA_UPD|DB_UPD|IB_UPD|IA_RET)", "tPOP")>; 597 (instregex "(t|t2|sys)?LDM(IA_UPD|DA_UPD|DB_UPD|IB_UPD|IA_RET)", "tPOP")>;
589 598
599 def : InstRW<[A57Write_5cyc_1L], (instregex "VLLDM")>;
600
590 // --- 3.9 Store Instructions --- 601 // --- 3.9 Store Instructions ---
591 602
592 // Store, immed offset 603 // Store, immed offset
593 def : InstRW<[A57Write_1cyc_1S], (instregex "STRi12", "STRBi12", "PICSTR", 604 def : InstRW<[A57Write_1cyc_1S], (instregex "STRi12", "STRBi12", "PICSTR",
594 "t2STR(B?)(T|i12|i8|s)", "t2STRDi8", "t2STRH(i12|i8|s)", "tSTR")>; 605 "t2STR(B?)(T|i12|i8|s)", "t2STRDi8", "t2STRH(i12|i8|s)", "tSTR")>;
703 714
704 def : InstRW<[A57WriteSTM], (instregex "(t2|sys|t)?STM(IA|DA|DB|IB)$")>; 715 def : InstRW<[A57WriteSTM], (instregex "(t2|sys|t)?STM(IA|DA|DB|IB)$")>;
705 def : InstRW<[A57WrBackOne, A57WriteSTM_Upd], 716 def : InstRW<[A57WrBackOne, A57WriteSTM_Upd],
706 (instregex "(t2|sys|t)?STM(IA_UPD|DA_UPD|DB_UPD|IB_UPD)", "tPUSH")>; 717 (instregex "(t2|sys|t)?STM(IA_UPD|DA_UPD|DB_UPD|IB_UPD)", "tPUSH")>;
707 718
719 def : InstRW<[A57Write_5cyc_1S], (instregex "VLSTM")>;
720
708 // --- 3.10 FP Data Processing Instructions --- 721 // --- 3.10 FP Data Processing Instructions ---
709 def : SchedAlias<WriteFPALU32, A57Write_5cyc_1V>; 722 def : SchedAlias<WriteFPALU32, A57Write_5cyc_1V>;
710 def : SchedAlias<WriteFPALU64, A57Write_5cyc_1V>; 723 def : SchedAlias<WriteFPALU64, A57Write_5cyc_1V>;
711 724
712 def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(S|D|H)")>; 725 def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(S|D|H)")>;
720 (instregex "VCMP(D|S|H|ZD|ZS|ZH)$", "VCMPE(D|S|H|ZD|ZS|ZH)")>; 733 (instregex "VCMP(D|S|H|ZD|ZS|ZH)$", "VCMPE(D|S|H|ZD|ZS|ZH)")>;
721 734
722 // fp convert 735 // fp convert
723 def : InstRW<[A57Write_5cyc_1V], (instregex 736 def : InstRW<[A57Write_5cyc_1V], (instregex
724 "VCVT(A|N|P|M)(SH|UH|SS|US|SD|UD)", "VCVT(BDH|THD|TDH)")>; 737 "VCVT(A|N|P|M)(SH|UH|SS|US|SD|UD)", "VCVT(BDH|THD|TDH)")>;
725 738 def : InstRW<[A57Write_5cyc_1V], (instregex "VTOSLS", "VTOUHS", "VTOULS")>;
726 def : SchedAlias<WriteFPCVT, A57Write_5cyc_1V>; 739 def : SchedAlias<WriteFPCVT, A57Write_5cyc_1V>;
740
741 def : InstRW<[A57Write_5cyc_1V], (instregex "VJCVT")>;
727 742
728 // FP round to integral 743 // FP round to integral
729 def : InstRW<[A57Write_5cyc_1V], (instregex "VRINT(A|N|P|M|Z|R|X)(H|S|D)$")>; 744 def : InstRW<[A57Write_5cyc_1V], (instregex "VRINT(A|N|P|M|Z|R|X)(H|S|D)$")>;
730 745
731 // FP divide, FP square root 746 // FP divide, FP square root
732 def : SchedAlias<WriteFPDIV32, A57Write_17cyc_1W>; 747 def : SchedAlias<WriteFPDIV32, A57Write_17cyc_1W>;
733 def : SchedAlias<WriteFPDIV64, A57Write_32cyc_1W>; 748 def : SchedAlias<WriteFPDIV64, A57Write_32cyc_1W>;
734 def : SchedAlias<WriteFPSQRT32, A57Write_17cyc_1W>; 749 def : SchedAlias<WriteFPSQRT32, A57Write_17cyc_1W>;
735 def : SchedAlias<WriteFPSQRT64, A57Write_32cyc_1W>; 750 def : SchedAlias<WriteFPSQRT64, A57Write_32cyc_1W>;
751
752 def : InstRW<[A57Write_17cyc_1W], (instregex "VSQRTH")>;
736 753
737 // FP max/min 754 // FP max/min
738 def : InstRW<[A57Write_5cyc_1V], (instregex "VMAX", "VMIN")>; 755 def : InstRW<[A57Write_5cyc_1V], (instregex "VMAX", "VMIN")>;
739 756
740 // FP multiply-accumulate pipelines support late forwarding of the result 757 // FP multiply-accumulate pipelines support late forwarding of the result
765 782
766 def : SchedAlias<WriteFPMAC32, A57WriteVFMA>; 783 def : SchedAlias<WriteFPMAC32, A57WriteVFMA>;
767 def : SchedAlias<WriteFPMAC64, A57WriteVFMA>; 784 def : SchedAlias<WriteFPMAC64, A57WriteVFMA>;
768 def : SchedAlias<ReadFPMAC, A57ReadVFMA5>; 785 def : SchedAlias<ReadFPMAC, A57ReadVFMA5>;
769 786
787 // VMLAH/VMLSH are not binded to scheduling classes by default, so here custom:
788 def : InstRW<[A57WriteVFMA, A57ReadVFMA5, ReadFPMUL, ReadFPMUL],
789 (instregex "VMLAH", "VMLSH", "VNMLAH", "VNMLSH")>;
790
791 def : InstRW<[A57WriteVMUL],
792 (instregex "VUDOTD", "VSDOTD", "VUDOTQ", "VSDOTQ")>;
793
770 def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG")>; 794 def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG")>;
771 def : InstRW<[A57Write_3cyc_1V], (instregex "VSEL")>; 795 def : InstRW<[A57Write_3cyc_1V], (instregex "VSEL")>;
772 796
773 // --- 3.11 FP Miscellaneous Instructions --- 797 // --- 3.11 FP Miscellaneous Instructions ---
774 // VMOV: 3cyc "F0/F1" for imm/reg 798 // VMOV: 3cyc "F0/F1" for imm/reg
775 def : InstRW<[A57Write_3cyc_1V], (instregex "FCONST(D|S|H)")>; 799 def : InstRW<[A57Write_3cyc_1V], (instregex "FCONST(D|S|H)")>;
776 def : InstRW<[A57Write_3cyc_1V], (instregex "VMOV(D|S|H)(cc)?$")>; 800 def : InstRW<[A57Write_3cyc_1V], (instregex "VMOV(D|S|H)(cc)?$")>;
801
802 def : InstRW<[A57Write_3cyc_1V], (instregex "VINSH")>;
777 803
778 // 5cyc L for FP transfer, vfp to core reg, 804 // 5cyc L for FP transfer, vfp to core reg,
779 // 5cyc L for FP transfer, core reg to vfp 805 // 5cyc L for FP transfer, core reg to vfp
780 def : SchedAlias<WriteFPMOV, A57Write_5cyc_1L>; 806 def : SchedAlias<WriteFPMOV, A57Write_5cyc_1L>;
781 // VMOVRRS/VMOVRRD in common code declared with one WriteFPMOV (instead of 2). 807 // VMOVRRS/VMOVRRD in common code declared with one WriteFPMOV (instead of 2).
1060 SchedVar<NoSchedPred, [SchedReadAdvance<3, [A57WriteVQDMLAL_VecInt]>]> 1086 SchedVar<NoSchedPred, [SchedReadAdvance<3, [A57WriteVQDMLAL_VecInt]>]>
1061 ]>; 1087 ]>;
1062 def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt], 1088 def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt],
1063 (instregex "VQDMLAL", "VQDMLSL")>; 1089 (instregex "VQDMLAL", "VQDMLSL")>;
1064 1090
1091 // Vector Saturating Rounding Doubling Multiply Accumulate/Subtract Long
1092 // Scheduling info from VQDMLAL/VQDMLSL
1093 def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt],
1094 (instregex "VQRDMLAH", "VQRDMLSH")>;
1095
1065 // ASIMD multiply long 1096 // ASIMD multiply long
1066 // 5cyc F0 for r0px, 4cyc F0 for r1p0 and later 1097 // 5cyc F0 for r0px, 4cyc F0 for r1p0 and later
1067 def A57WriteVMULL_VecInt : SchedWriteVariant<[ 1098 def A57WriteVMULL_VecInt : SchedWriteVariant<[
1068 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, 1099 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
1069 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; 1100 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
1124 1155
1125 // ASIMD FP arith 1156 // ASIMD FP arith
1126 def : InstRW<[A57Write_5cyc_1V], (instregex "VABD(fd|fq|hd|hq)", 1157 def : InstRW<[A57Write_5cyc_1V], (instregex "VABD(fd|fq|hd|hq)",
1127 "VADD(fd|fq|hd|hq)", "VPADD(f|h)", "VSUB(fd|fq|hd|hq)")>; 1158 "VADD(fd|fq|hd|hq)", "VPADD(f|h)", "VSUB(fd|fq|hd|hq)")>;
1128 1159
1160 def : InstRW<[A57Write_5cyc_1V], (instregex "VCADD", "VCMLA")>;
1161
1129 // ASIMD FP compare 1162 // ASIMD FP compare
1130 def : InstRW<[A57Write_5cyc_1V], (instregex "VAC(GE|GT|LE|LT)", 1163 def : InstRW<[A57Write_5cyc_1V], (instregex "VAC(GE|GT|LE|LT)",
1131 "VC(EQ|GE|GT|LE)(fd|fq|hd|hq)")>; 1164 "VC(EQ|GE|GT|LE)(fd|fq|hd|hq)")>;
1132 1165
1133 // ASIMD FP convert, integer 1166 // ASIMD FP convert, integer
1142 "VCVT(h2xsq|h2xuq|xs2hq|xu2hq)", 1175 "VCVT(h2xsq|h2xuq|xs2hq|xu2hq)",
1143 "VCVT(f2h|h2f)")>; 1176 "VCVT(f2h|h2f)")>;
1144 1177
1145 // ASIMD FP max/min 1178 // ASIMD FP max/min
1146 def : InstRW<[A57Write_5cyc_1V], (instregex 1179 def : InstRW<[A57Write_5cyc_1V], (instregex
1147 "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "VMAXNM", "VMINNM")>; 1180 "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "(NEON|VFP)_VMAXNM",
1181 "(NEON|VFP)_VMINNM")>;
1148 1182
1149 // ASIMD FP multiply 1183 // ASIMD FP multiply
1150 def A57WriteVMUL_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 5; } 1184 def A57WriteVMUL_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
1151 def : InstRW<[A57WriteVMUL_VecFP], (instregex "VMUL(sl)?(fd|fq|hd|hq)")>; 1185 def : InstRW<[A57WriteVMUL_VecFP], (instregex "VMUL(sl)?(fd|fq|hd|hq)")>;
1152 1186
1182 def : InstRW<[A57Write_3cyc_1V], (instregex "VEXT(d|q)(8|16|32|64)")>; 1216 def : InstRW<[A57Write_3cyc_1V], (instregex "VEXT(d|q)(8|16|32|64)")>;
1183 1217
1184 // ASIMD move, immed 1218 // ASIMD move, immed
1185 def : InstRW<[A57Write_3cyc_1V], (instregex 1219 def : InstRW<[A57Write_3cyc_1V], (instregex
1186 "VMOV(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v1i64|v2i64|v2f32|v4f32)", 1220 "VMOV(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v1i64|v2i64|v2f32|v4f32)",
1187 "VMOVQ0")>; 1221 "VMOVD0", "VMOVQ0")>;
1188 1222
1189 // ASIMD move, narrowing 1223 // ASIMD move, narrowing
1190 def : InstRW<[A57Write_3cyc_1V], (instregex "VMOVN")>; 1224 def : InstRW<[A57Write_3cyc_1V], (instregex "VMOVN")>;
1191 1225
1192 // ASIMD move, saturating 1226 // ASIMD move, saturating