Mercurial > hg > CbC > CbC_gcc
diff gcc/config/arm/cortex-a9.md @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | f6334be47118 |
children | 84e7813d76e9 |
line wrap: on
line diff
--- a/gcc/config/arm/cortex-a9.md Sun Aug 21 07:07:55 2011 +0900 +++ b/gcc/config/arm/cortex-a9.md Fri Oct 27 22:46:09 2017 +0900 @@ -1,5 +1,5 @@ ;; ARM Cortex-A9 pipeline description -;; Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc. +;; Copyright (C) 2008-2017 Free Software Foundation, Inc. ;; Originally written by CodeSourcery for VFP. ;; ;; Rewritten by Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> @@ -68,7 +68,8 @@ "cortex_a9_mac_m1*2, cortex_a9_mac_m2, cortex_a9_p0_wb") (define_reservation "cortex_a9_mac" "cortex_a9_multcycle1*2 ,cortex_a9_mac_m2, cortex_a9_p0_wb") - +(define_reservation "cortex_a9_mult_long" + "cortex_a9_mac_m1*3, cortex_a9_mac_m2, cortex_a9_p0_wb") ;; Issue at the same time along the load store pipeline and ;; the VFP / Neon pipeline is not possible. @@ -79,18 +80,24 @@ ;; which can go down E2 without any problem. (define_insn_reservation "cortex_a9_dp" 2 (and (eq_attr "tune" "cortexa9") - (ior (and (eq_attr "type" "alu") - (eq_attr "neon_type" "none")) - (and (and (eq_attr "type" "alu_shift_reg, alu_shift") - (eq_attr "insn" "mov")) - (eq_attr "neon_type" "none")))) + (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ + alu_sreg,alus_sreg,logic_reg,logics_reg,\ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,clz,rbit,rev,alu_dsp_reg,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ + mov_shift_reg,mov_shift,\ + mrs,multiple,no_insn")) "cortex_a9_p0_default|cortex_a9_p1_default") ;; An instruction using the shifter will go down E1. (define_insn_reservation "cortex_a9_dp_shift" 3 (and (eq_attr "tune" "cortexa9") - (and (eq_attr "type" "alu_shift_reg, alu_shift") - (not (eq_attr "insn" "mov")))) + (eq_attr "type" "alu_shift_imm,alus_shift_imm,\ + logic_shift_imm,logics_shift_imm,\ + alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + extend,mvn_shift,mvn_shift_reg")) "cortex_a9_p0_shift | cortex_a9_p1_shift") ;; Loads have a latency of 4 cycles. @@ -100,7 +107,7 @@ (define_insn_reservation "cortex_a9_load1_2" 4 (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "load1, load2, load_byte, f_loads, f_loadd")) + (eq_attr "type" "load_4, load_8, load_byte, f_loads, f_loadd")) "cortex_a9_ls") ;; Loads multiples and store multiples can't be issued for 2 cycles in a @@ -109,12 +116,12 @@ (define_insn_reservation "cortex_a9_load3_4" 5 (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "load3, load4")) + (eq_attr "type" "load_12, load_16")) "cortex_a9_ls, cortex_a9_ls") (define_insn_reservation "cortex_a9_store1_2" 0 (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "store1, store2, f_stores, f_stored")) + (eq_attr "type" "store_4, store_8, f_stores, f_stored")) "cortex_a9_ls") ;; Almost all our store multiples use an auto-increment @@ -123,45 +130,51 @@ (define_insn_reservation "cortex_a9_store3_4" 0 (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "store3, store4")) + (eq_attr "type" "store_12, store_16")) "cortex_a9_ls+(cortex_a9_p0_default | cortex_a9_p1_default), cortex_a9_ls") ;; We get 16*16 multiply / mac results in 3 cycles. (define_insn_reservation "cortex_a9_mult16" 3 (and (eq_attr "tune" "cortexa9") - (eq_attr "insn" "smulxy")) + (eq_attr "type" "smulxy")) "cortex_a9_mult16") ;; The 16*16 mac is slightly different that it ;; reserves M1 and M2 in the same cycle. (define_insn_reservation "cortex_a9_mac16" 3 (and (eq_attr "tune" "cortexa9") - (eq_attr "insn" "smlaxy")) + (eq_attr "type" "smlaxy")) "cortex_a9_mac16") - (define_insn_reservation "cortex_a9_multiply" 4 (and (eq_attr "tune" "cortexa9") - (eq_attr "insn" "mul")) + (eq_attr "type" "mul,smmul,smmulr")) "cortex_a9_mult") (define_insn_reservation "cortex_a9_mac" 4 (and (eq_attr "tune" "cortexa9") - (eq_attr "insn" "mla")) + (eq_attr "type" "mla,smmla")) "cortex_a9_mac") +(define_insn_reservation "cortex_a9_multiply_long" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "smull,umull,smulls,umulls,smlal,smlals,umlal,umlals")) + "cortex_a9_mult_long") + ;; An instruction with a result in E2 can be forwarded ;; to E2 or E1 or M1 or the load store unit in the next cycle. (define_bypass 1 "cortex_a9_dp" "cortex_a9_dp_shift, cortex_a9_multiply, cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2, - cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4") + cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4, + cortex_a9_multiply_long") (define_bypass 2 "cortex_a9_dp_shift" "cortex_a9_dp_shift, cortex_a9_multiply, cortex_a9_load1_2, cortex_a9_dp, cortex_a9_store1_2, - cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4") + cortex_a9_mult16, cortex_a9_mac16, cortex_a9_mac, cortex_a9_store3_4, cortex_a9_load3_4, + cortex_a9_multiply_long") ;; An instruction in the load store pipeline can provide ;; read access to a DP instruction in the P0 default pipeline @@ -194,9 +207,9 @@ ;; Pipelining for VFP instructions. ;; Issue happens either along load store unit or the VFP / Neon unit. ;; Pipeline Instruction Classification. -;; FPS - fcpys, ffariths, ffarithd,r_2_f,f_2_r +;; FPS - fmov, ffariths, ffarithd,f_mcr,f_mcrr,f_mrc,f_mrrc ;; FP_ADD - fadds, faddd, fcmps (1) -;; FPMUL - fmul{s,d}, fmac{s,d} +;; FPMUL - fmul{s,d}, fmac{s,d}, ffma{s,d} ;; FPDIV - fdiv{s,d} (define_cpu_unit "ca9fps" "cortex_a9") (define_cpu_unit "ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4" "cortex_a9") @@ -207,19 +220,20 @@ ;; fmrs, fmrrd, fmstat and fmrx - The data is available after 1 cycle. (define_insn_reservation "cortex_a9_fps" 2 (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "fcpys, fconsts, fconstd, ffariths, ffarithd, r_2_f, f_2_r, f_flag")) + (eq_attr "type" "fmov, fconsts, fconstd, ffariths, ffarithd,\ + f_mcr, f_mcrr, f_mrc, f_mrrc, f_flag")) "ca9_issue_vfp_neon + ca9fps") (define_bypass 1 "cortex_a9_fps" - "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply") + "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply, cortex_a9_multiply_long") ;; Scheduling on the FP_ADD pipeline. (define_reservation "ca9fp_add" "ca9_issue_vfp_neon + ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4") (define_insn_reservation "cortex_a9_fadd" 4 (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "fadds, faddd, f_cvt")) + (eq_attr "type" "fadds, faddd, f_cvt, f_cvtf2i, f_cvti2f")) "ca9fp_add") (define_insn_reservation "cortex_a9_fcmp" 1 @@ -246,23 +260,23 @@ (define_insn_reservation "cortex_a9_fmacs" 8 (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "fmacs")) + (eq_attr "type" "fmacs,ffmas")) "ca9fmuls, ca9fp_add") (define_insn_reservation "cortex_a9_fmacd" 9 (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "fmacd")) + (eq_attr "type" "fmacd,ffmad")) "ca9fmuld, ca9fp_add") ;; Division pipeline description. (define_insn_reservation "cortex_a9_fdivs" 15 (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "fdivs")) + (eq_attr "type" "fdivs, fsqrts")) "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*14") (define_insn_reservation "cortex_a9_fdivd" 25 (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "fdivd")) + (eq_attr "type" "fdivd, fsqrtd")) "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*24") ;; Include Neon pipeline description