Mercurial > hg > CbC > CbC_llvm
comparison lib/Target/ARM/ARMScheduleA57.td @ 148:63bd29f05246
merged
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 14 Aug 2019 19:46:37 +0900 |
parents | c2174574ed3a |
children |
comparison
equal
deleted
inserted
replaced
146:3fc4d5c3e21e | 148:63bd29f05246 |
---|---|
1 //=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=// | 1 //=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=// |
2 // | 2 // |
3 // The LLVM Compiler Infrastructure | 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 // | 4 // See https://llvm.org/LICENSE.txt for license information. |
5 // This file is distributed under the University of Illinois Open Source | 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 // License. See LICENSE.TXT for details. | |
7 // | 6 // |
8 //===----------------------------------------------------------------------===// | 7 //===----------------------------------------------------------------------===// |
9 // | 8 // |
10 // This file defines the machine model for ARM Cortex-A57 to support | 9 // This file defines the machine model for ARM Cortex-A57 to support |
11 // instruction scheduling and other instruction cost heuristics. | 10 // instruction scheduling and other instruction cost heuristics. |
90 let MispredictPenalty = 16; // Fetch + Decode/Rename/Dispatch + Branch | 89 let MispredictPenalty = 16; // Fetch + Decode/Rename/Dispatch + Branch |
91 | 90 |
92 // Enable partial & runtime unrolling. | 91 // Enable partial & runtime unrolling. |
93 let LoopMicroOpBufferSize = 16; | 92 let LoopMicroOpBufferSize = 16; |
94 let CompleteModel = 1; | 93 let CompleteModel = 1; |
94 | |
95 // FIXME: Remove when all errors have been fixed. | |
96 let FullInstRWOverlapCheck = 0; | |
97 | |
98 let UnsupportedFeatures = [HasV8_1MMainline, HasMVEInt, HasMVEFloat, | |
99 HasFPRegsV8_1M]; | |
95 } | 100 } |
96 | 101 |
97 //===----------------------------------------------------------------------===// | 102 //===----------------------------------------------------------------------===// |
98 // Define each kind of processor resource and number available on Cortex-A57. | 103 // Define each kind of processor resource and number available on Cortex-A57. |
99 // Cortex A-57 has 8 pipelines that each has its own 8-entry queue where | 104 // Cortex A-57 has 8 pipelines that each has its own 8-entry queue where |
123 def : InstRW<[WriteNoop], (instregex "(t)?BKPT$", "(t2)?CDP(2)?$", | 128 def : InstRW<[WriteNoop], (instregex "(t)?BKPT$", "(t2)?CDP(2)?$", |
124 "(t2)?CLREX$", "CONSTPOOL_ENTRY$", "COPY_STRUCT_BYVAL_I32$", | 129 "(t2)?CLREX$", "CONSTPOOL_ENTRY$", "COPY_STRUCT_BYVAL_I32$", |
125 "(t2)?CPS[123]p$", "(t2)?DBG$", "(t2)?DMB$", "(t2)?DSB$", "ERET$", | 130 "(t2)?CPS[123]p$", "(t2)?DBG$", "(t2)?DMB$", "(t2)?DSB$", "ERET$", |
126 "(t2|t)?HINT$", "(t)?HLT$", "(t2)?HVC$", "(t2)?ISB$", "ITasm$", | 131 "(t2|t)?HINT$", "(t)?HLT$", "(t2)?HVC$", "(t2)?ISB$", "ITasm$", |
127 "(t2)?RFE(DA|DB|IA|IB)", "(t)?SETEND", "(t2)?SETPAN", "(t2)?SMC", "SPACE", | 132 "(t2)?RFE(DA|DB|IA|IB)", "(t)?SETEND", "(t2)?SETPAN", "(t2)?SMC", "SPACE", |
128 "(t2)?SRS(DA|DB|IA|IB)", "SWP(B)?", "t?TRAP", "UDF$", "t2DCPS", "t2SG", | 133 "(t2)?SRS(DA|DB|IA|IB)", "SWP(B)?", "t?TRAP", "(t2|t)?UDF$", "t2DCPS", "t2SG", |
129 "t2TT", "tCPS", "CMP_SWAP", "t?SVC", "t2IT", "CompilerBarrier")>; | 134 "t2TT", "tCPS", "CMP_SWAP", "t?SVC", "t2IT", "CompilerBarrier", |
135 "t__brkdiv0")>; | |
130 | 136 |
131 def : InstRW<[WriteNoop], (instregex "VMRS", "VMSR", "FMSTAT")>; | 137 def : InstRW<[WriteNoop], (instregex "VMRS", "VMSR", "FMSTAT")>; |
132 | 138 |
133 // Specific memory instrs | 139 // Specific memory instrs |
134 def : InstRW<[WriteNoop, WriteNoop], (instregex "(t2)?LDA", "(t2)?LDC", "(t2)?STC", | 140 def : InstRW<[WriteNoop, WriteNoop], (instregex "(t2)?LDA", "(t2)?LDC", "(t2)?STC", |
144 def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>; | 150 def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>; |
145 | 151 |
146 // Pseudos | 152 // Pseudos |
147 def : InstRW<[WriteNoop], (instregex "(t2)?ABS$", | 153 def : InstRW<[WriteNoop], (instregex "(t2)?ABS$", |
148 "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj", | 154 "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj", |
149 "tLDRpci_pic", "t2SUBS_PC_LR", | 155 "tLDRpci_pic", "(t2)?SUBS_PC_LR", |
150 "JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp", | 156 "JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp", |
151 "VLD(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", | 157 "VLD(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", |
152 "VLD(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", | 158 "VLD(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", |
153 "VST(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", | 159 "VST(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", |
154 "VST(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", | 160 "VST(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", |
276 // from similar μops, allowing a typical sequence of multiply-accumulate μops | 282 // from similar μops, allowing a typical sequence of multiply-accumulate μops |
277 // to issue one every 1 cycle (sched advance = 2). | 283 // to issue one every 1 cycle (sched advance = 2). |
278 def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; } | 284 def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; } |
279 def A57WriteMLAL : SchedWriteRes<[A57UnitM]> { let Latency = 4; } | 285 def A57WriteMLAL : SchedWriteRes<[A57UnitM]> { let Latency = 4; } |
280 def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>; | 286 def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>; |
287 | |
288 def : InstRW<[A57WriteMLA], | |
289 (instregex "t2SMLAD", "t2SMLADX", "t2SMLSD", "t2SMLSDX")>; | |
281 | 290 |
282 def : SchedAlias<WriteMAC16, A57WriteMLA>; | 291 def : SchedAlias<WriteMAC16, A57WriteMLA>; |
283 def : SchedAlias<WriteMAC32, A57WriteMLA>; | 292 def : SchedAlias<WriteMAC32, A57WriteMLA>; |
284 def : SchedAlias<ReadMAC, A57ReadMLA>; | 293 def : SchedAlias<ReadMAC, A57ReadMLA>; |
285 | 294 |
585 | 594 |
586 // TODO: no writeback latency defined in documentation (implemented as 1 cyc) | 595 // TODO: no writeback latency defined in documentation (implemented as 1 cyc) |
587 def : InstRW<[A57WriteLDM_Upd], | 596 def : InstRW<[A57WriteLDM_Upd], |
588 (instregex "(t|t2|sys)?LDM(IA_UPD|DA_UPD|DB_UPD|IB_UPD|IA_RET)", "tPOP")>; | 597 (instregex "(t|t2|sys)?LDM(IA_UPD|DA_UPD|DB_UPD|IB_UPD|IA_RET)", "tPOP")>; |
589 | 598 |
599 def : InstRW<[A57Write_5cyc_1L], (instregex "VLLDM")>; | |
600 | |
590 // --- 3.9 Store Instructions --- | 601 // --- 3.9 Store Instructions --- |
591 | 602 |
592 // Store, immed offset | 603 // Store, immed offset |
593 def : InstRW<[A57Write_1cyc_1S], (instregex "STRi12", "STRBi12", "PICSTR", | 604 def : InstRW<[A57Write_1cyc_1S], (instregex "STRi12", "STRBi12", "PICSTR", |
594 "t2STR(B?)(T|i12|i8|s)", "t2STRDi8", "t2STRH(i12|i8|s)", "tSTR")>; | 605 "t2STR(B?)(T|i12|i8|s)", "t2STRDi8", "t2STRH(i12|i8|s)", "tSTR")>; |
703 | 714 |
704 def : InstRW<[A57WriteSTM], (instregex "(t2|sys|t)?STM(IA|DA|DB|IB)$")>; | 715 def : InstRW<[A57WriteSTM], (instregex "(t2|sys|t)?STM(IA|DA|DB|IB)$")>; |
705 def : InstRW<[A57WrBackOne, A57WriteSTM_Upd], | 716 def : InstRW<[A57WrBackOne, A57WriteSTM_Upd], |
706 (instregex "(t2|sys|t)?STM(IA_UPD|DA_UPD|DB_UPD|IB_UPD)", "tPUSH")>; | 717 (instregex "(t2|sys|t)?STM(IA_UPD|DA_UPD|DB_UPD|IB_UPD)", "tPUSH")>; |
707 | 718 |
719 def : InstRW<[A57Write_5cyc_1S], (instregex "VLSTM")>; | |
720 | |
708 // --- 3.10 FP Data Processing Instructions --- | 721 // --- 3.10 FP Data Processing Instructions --- |
709 def : SchedAlias<WriteFPALU32, A57Write_5cyc_1V>; | 722 def : SchedAlias<WriteFPALU32, A57Write_5cyc_1V>; |
710 def : SchedAlias<WriteFPALU64, A57Write_5cyc_1V>; | 723 def : SchedAlias<WriteFPALU64, A57Write_5cyc_1V>; |
711 | 724 |
712 def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(S|D|H)")>; | 725 def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(S|D|H)")>; |
720 (instregex "VCMP(D|S|H|ZD|ZS|ZH)$", "VCMPE(D|S|H|ZD|ZS|ZH)")>; | 733 (instregex "VCMP(D|S|H|ZD|ZS|ZH)$", "VCMPE(D|S|H|ZD|ZS|ZH)")>; |
721 | 734 |
722 // fp convert | 735 // fp convert |
723 def : InstRW<[A57Write_5cyc_1V], (instregex | 736 def : InstRW<[A57Write_5cyc_1V], (instregex |
724 "VCVT(A|N|P|M)(SH|UH|SS|US|SD|UD)", "VCVT(BDH|THD|TDH)")>; | 737 "VCVT(A|N|P|M)(SH|UH|SS|US|SD|UD)", "VCVT(BDH|THD|TDH)")>; |
725 | 738 def : InstRW<[A57Write_5cyc_1V], (instregex "VTOSLS", "VTOUHS", "VTOULS")>; |
726 def : SchedAlias<WriteFPCVT, A57Write_5cyc_1V>; | 739 def : SchedAlias<WriteFPCVT, A57Write_5cyc_1V>; |
740 | |
741 def : InstRW<[A57Write_5cyc_1V], (instregex "VJCVT")>; | |
727 | 742 |
728 // FP round to integral | 743 // FP round to integral |
729 def : InstRW<[A57Write_5cyc_1V], (instregex "VRINT(A|N|P|M|Z|R|X)(H|S|D)$")>; | 744 def : InstRW<[A57Write_5cyc_1V], (instregex "VRINT(A|N|P|M|Z|R|X)(H|S|D)$")>; |
730 | 745 |
731 // FP divide, FP square root | 746 // FP divide, FP square root |
732 def : SchedAlias<WriteFPDIV32, A57Write_17cyc_1W>; | 747 def : SchedAlias<WriteFPDIV32, A57Write_17cyc_1W>; |
733 def : SchedAlias<WriteFPDIV64, A57Write_32cyc_1W>; | 748 def : SchedAlias<WriteFPDIV64, A57Write_32cyc_1W>; |
734 def : SchedAlias<WriteFPSQRT32, A57Write_17cyc_1W>; | 749 def : SchedAlias<WriteFPSQRT32, A57Write_17cyc_1W>; |
735 def : SchedAlias<WriteFPSQRT64, A57Write_32cyc_1W>; | 750 def : SchedAlias<WriteFPSQRT64, A57Write_32cyc_1W>; |
751 | |
752 def : InstRW<[A57Write_17cyc_1W], (instregex "VSQRTH")>; | |
736 | 753 |
737 // FP max/min | 754 // FP max/min |
738 def : InstRW<[A57Write_5cyc_1V], (instregex "VMAX", "VMIN")>; | 755 def : InstRW<[A57Write_5cyc_1V], (instregex "VMAX", "VMIN")>; |
739 | 756 |
740 // FP multiply-accumulate pipelines support late forwarding of the result | 757 // FP multiply-accumulate pipelines support late forwarding of the result |
765 | 782 |
766 def : SchedAlias<WriteFPMAC32, A57WriteVFMA>; | 783 def : SchedAlias<WriteFPMAC32, A57WriteVFMA>; |
767 def : SchedAlias<WriteFPMAC64, A57WriteVFMA>; | 784 def : SchedAlias<WriteFPMAC64, A57WriteVFMA>; |
768 def : SchedAlias<ReadFPMAC, A57ReadVFMA5>; | 785 def : SchedAlias<ReadFPMAC, A57ReadVFMA5>; |
769 | 786 |
787 // VMLAH/VMLSH are not binded to scheduling classes by default, so here custom: | |
788 def : InstRW<[A57WriteVFMA, A57ReadVFMA5, ReadFPMUL, ReadFPMUL], | |
789 (instregex "VMLAH", "VMLSH", "VNMLAH", "VNMLSH")>; | |
790 | |
791 def : InstRW<[A57WriteVMUL], | |
792 (instregex "VUDOTD", "VSDOTD", "VUDOTQ", "VSDOTQ")>; | |
793 | |
770 def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG")>; | 794 def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG")>; |
771 def : InstRW<[A57Write_3cyc_1V], (instregex "VSEL")>; | 795 def : InstRW<[A57Write_3cyc_1V], (instregex "VSEL")>; |
772 | 796 |
773 // --- 3.11 FP Miscellaneous Instructions --- | 797 // --- 3.11 FP Miscellaneous Instructions --- |
774 // VMOV: 3cyc "F0/F1" for imm/reg | 798 // VMOV: 3cyc "F0/F1" for imm/reg |
775 def : InstRW<[A57Write_3cyc_1V], (instregex "FCONST(D|S|H)")>; | 799 def : InstRW<[A57Write_3cyc_1V], (instregex "FCONST(D|S|H)")>; |
776 def : InstRW<[A57Write_3cyc_1V], (instregex "VMOV(D|S|H)(cc)?$")>; | 800 def : InstRW<[A57Write_3cyc_1V], (instregex "VMOV(D|S|H)(cc)?$")>; |
801 | |
802 def : InstRW<[A57Write_3cyc_1V], (instregex "VINSH")>; | |
777 | 803 |
778 // 5cyc L for FP transfer, vfp to core reg, | 804 // 5cyc L for FP transfer, vfp to core reg, |
779 // 5cyc L for FP transfer, core reg to vfp | 805 // 5cyc L for FP transfer, core reg to vfp |
780 def : SchedAlias<WriteFPMOV, A57Write_5cyc_1L>; | 806 def : SchedAlias<WriteFPMOV, A57Write_5cyc_1L>; |
781 // VMOVRRS/VMOVRRD in common code declared with one WriteFPMOV (instead of 2). | 807 // VMOVRRS/VMOVRRD in common code declared with one WriteFPMOV (instead of 2). |
1060 SchedVar<NoSchedPred, [SchedReadAdvance<3, [A57WriteVQDMLAL_VecInt]>]> | 1086 SchedVar<NoSchedPred, [SchedReadAdvance<3, [A57WriteVQDMLAL_VecInt]>]> |
1061 ]>; | 1087 ]>; |
1062 def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt], | 1088 def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt], |
1063 (instregex "VQDMLAL", "VQDMLSL")>; | 1089 (instregex "VQDMLAL", "VQDMLSL")>; |
1064 | 1090 |
1091 // Vector Saturating Rounding Doubling Multiply Accumulate/Subtract Long | |
1092 // Scheduling info from VQDMLAL/VQDMLSL | |
1093 def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt], | |
1094 (instregex "VQRDMLAH", "VQRDMLSH")>; | |
1095 | |
1065 // ASIMD multiply long | 1096 // ASIMD multiply long |
1066 // 5cyc F0 for r0px, 4cyc F0 for r1p0 and later | 1097 // 5cyc F0 for r0px, 4cyc F0 for r1p0 and later |
1067 def A57WriteVMULL_VecInt : SchedWriteVariant<[ | 1098 def A57WriteVMULL_VecInt : SchedWriteVariant<[ |
1068 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, | 1099 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, |
1069 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; | 1100 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; |
1124 | 1155 |
1125 // ASIMD FP arith | 1156 // ASIMD FP arith |
1126 def : InstRW<[A57Write_5cyc_1V], (instregex "VABD(fd|fq|hd|hq)", | 1157 def : InstRW<[A57Write_5cyc_1V], (instregex "VABD(fd|fq|hd|hq)", |
1127 "VADD(fd|fq|hd|hq)", "VPADD(f|h)", "VSUB(fd|fq|hd|hq)")>; | 1158 "VADD(fd|fq|hd|hq)", "VPADD(f|h)", "VSUB(fd|fq|hd|hq)")>; |
1128 | 1159 |
1160 def : InstRW<[A57Write_5cyc_1V], (instregex "VCADD", "VCMLA")>; | |
1161 | |
1129 // ASIMD FP compare | 1162 // ASIMD FP compare |
1130 def : InstRW<[A57Write_5cyc_1V], (instregex "VAC(GE|GT|LE|LT)", | 1163 def : InstRW<[A57Write_5cyc_1V], (instregex "VAC(GE|GT|LE|LT)", |
1131 "VC(EQ|GE|GT|LE)(fd|fq|hd|hq)")>; | 1164 "VC(EQ|GE|GT|LE)(fd|fq|hd|hq)")>; |
1132 | 1165 |
1133 // ASIMD FP convert, integer | 1166 // ASIMD FP convert, integer |
1142 "VCVT(h2xsq|h2xuq|xs2hq|xu2hq)", | 1175 "VCVT(h2xsq|h2xuq|xs2hq|xu2hq)", |
1143 "VCVT(f2h|h2f)")>; | 1176 "VCVT(f2h|h2f)")>; |
1144 | 1177 |
1145 // ASIMD FP max/min | 1178 // ASIMD FP max/min |
1146 def : InstRW<[A57Write_5cyc_1V], (instregex | 1179 def : InstRW<[A57Write_5cyc_1V], (instregex |
1147 "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "VMAXNM", "VMINNM")>; | 1180 "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "(NEON|VFP)_VMAXNM", |
1181 "(NEON|VFP)_VMINNM")>; | |
1148 | 1182 |
1149 // ASIMD FP multiply | 1183 // ASIMD FP multiply |
1150 def A57WriteVMUL_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 5; } | 1184 def A57WriteVMUL_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 5; } |
1151 def : InstRW<[A57WriteVMUL_VecFP], (instregex "VMUL(sl)?(fd|fq|hd|hq)")>; | 1185 def : InstRW<[A57WriteVMUL_VecFP], (instregex "VMUL(sl)?(fd|fq|hd|hq)")>; |
1152 | 1186 |
1182 def : InstRW<[A57Write_3cyc_1V], (instregex "VEXT(d|q)(8|16|32|64)")>; | 1216 def : InstRW<[A57Write_3cyc_1V], (instregex "VEXT(d|q)(8|16|32|64)")>; |
1183 | 1217 |
1184 // ASIMD move, immed | 1218 // ASIMD move, immed |
1185 def : InstRW<[A57Write_3cyc_1V], (instregex | 1219 def : InstRW<[A57Write_3cyc_1V], (instregex |
1186 "VMOV(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v1i64|v2i64|v2f32|v4f32)", | 1220 "VMOV(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v1i64|v2i64|v2f32|v4f32)", |
1187 "VMOVQ0")>; | 1221 "VMOVD0", "VMOVQ0")>; |
1188 | 1222 |
1189 // ASIMD move, narrowing | 1223 // ASIMD move, narrowing |
1190 def : InstRW<[A57Write_3cyc_1V], (instregex "VMOVN")>; | 1224 def : InstRW<[A57Write_3cyc_1V], (instregex "VMOVN")>; |
1191 | 1225 |
1192 // ASIMD move, saturating | 1226 // ASIMD move, saturating |