diff --git a/gnu/dist/toolchain/gcc/config/arm/arm.c b/gnu/dist/toolchain/gcc/config/arm/arm.c index 5b441291cf09..1c0999856512 100644 --- a/gnu/dist/toolchain/gcc/config/arm/arm.c +++ b/gnu/dist/toolchain/gcc/config/arm/arm.c @@ -103,6 +103,7 @@ int arm_structure_size_boundary = 32; /* Used to be 8 */ #define FL_THUMB 0x20 /* Thumb aware */ #define FL_LDSCHED 0x40 /* Load scheduling necessary */ #define FL_STRONG 0x80 /* StrongARM */ +#define FL_XSCALE 0x100 /* XScale */ /* The bits in this mask specify which instructions we are allowed to generate. */ static int insn_flags = 0; @@ -127,6 +128,9 @@ int arm_ld_sched = 0; /* Nonzero if this chip is a StrongARM. */ int arm_is_strong = 0; +/* Nonzero if this chip is an XScale. */ +int arm_is_xscale = 0; + /* Nonzero if this chip is a an ARM6 or an ARM7. */ int arm_is_6_or_7 = 0; @@ -235,7 +239,7 @@ static struct processors all_cores[] = --thorpej@netbsd.org */ {"arm10tdmi", FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED }, {"arm1020t", FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED }, - {"xscale", FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_LDSCHED | FL_STRONG }, + {"xscale", FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_LDSCHED | FL_STRONG | FL_XSCALE }, {NULL, 0} }; @@ -523,6 +527,7 @@ arm_override_options () /* Initialise boolean versions of the flags, for use in the arm.md file. */ arm_fast_multiply = (insn_flags & FL_FAST_MULT) != 0; arm_arch4 = (insn_flags & FL_ARCH4) != 0; + arm_is_xscale = (insn_flags & FL_XSCALE) != 0; arm_ld_sched = (tune_flags & FL_LDSCHED) != 0; arm_is_strong = (tune_flags & FL_STRONG) != 0; @@ -574,6 +579,9 @@ arm_override_options () to load a constant, and the load scheduler may well reduce that to 1. */ if (optimize_size || (tune_flags & FL_LDSCHED)) arm_constant_limit = 1; + + if (arm_is_xscale) + arm_constant_limit = 2; /* If optimizing for size, bump the number of instructions that we are prepared to conditionally execute (even on a StrongARM). @@ -1867,6 +1875,47 @@ arm_adjust_cost (insn, link, dep, cost) { rtx i_pat, d_pat; + /* Some true dependencies can have a higher cost depending + on precisely how certain input operands are used. */ + if (arm_is_xscale + && REG_NOTE_KIND (link) == 0 + && recog_memoized (insn) < 0 + && recog_memoized (dep) < 0) + { + int shift_opnum = get_attr_shift (insn); + enum attr_type attr_type = get_attr_type (dep); + + /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted + operand for INSN. If we have a shifted input operand and the + instruction we depend on is another ALU instruction, then we may + have to account for an additional stall. */ + if (shift_opnum != 0 && attr_type == TYPE_NORMAL) + { + rtx shifted_operand; + int opno; + + /* Get the shifted operand. */ + extract_insn (insn); + shifted_operand = recog_operand[shift_opnum]; + + /* Iterate over all the operands in DEP. If we write an operand + that overlaps with SHIFTED_OPERAND, then we have increate the + cost of this dependency. */ + extract_insn (dep); + preprocess_constraints (); + for (opno = 0; opno < recog_n_operands; opno++) + { + /* We can ignore strict inputs. */ + if (recog_op_type[opno] == OP_IN) + continue; + + if (reg_overlap_mentioned_p (recog_operand[opno], + shifted_operand)) + return 2; + } + } + } + /* XXX This is not strictly true for the FPA. */ if (REG_NOTE_KIND(link) == REG_DEP_ANTI || REG_NOTE_KIND(link) == REG_DEP_OUTPUT) @@ -3164,6 +3213,58 @@ arm_gen_load_multiple (base_regno, count, from, up, write_back, unchanging_p, int sign = up ? 1 : -1; rtx mem; + /* XScale has load-store double instructions, but they have stricter + alignment requirements than load-store multiple, so we can not + use them. + + For XScale ldm requires 2 + NREGS cycles to complete and blocks + the pipeline until completion. + + NREGS CYCLES + 1 3 + 2 4 + 3 5 + 4 6 + + an ldr instruction takes 1-3 cycles, but does not block the + pipeline. + + NREGS CYCLES + 1 1-3 + 2 2-6 + 3 3-9 + 4 4-12 + + Best case ldr will always win. However, the more ldr instructions + we issue, the less likely we are to be able to schedule them well. + Using ldr instructions also increases code size. + + As a compromise, we use ldr for counts of 1 or 2 regs, and ldm + for counts of 3 or 4 regs. */ + if (arm_is_xscale && count <= 2 && ! optimize_size) + { + rtx seq; + + start_sequence (); + + for (i = 0; i < count; i++) + { + mem = gen_rtx_MEM (SImode, plus_constant (from, i * 4 * sign)); + RTX_UNCHANGING_P (mem) = unchanging_p; + MEM_IN_STRUCT_P (mem) = in_struct_p; + MEM_SCALAR_P (mem) = scalar_p; + emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem); + } + + if (write_back) + emit_move_insn (from, plus_constant (from, count * 4 * sign)); + + seq = gen_sequence (); + end_sequence (); + + return seq; + } + result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + (write_back ? 2 : 0))); if (write_back) @@ -3208,6 +3309,32 @@ arm_gen_store_multiple (base_regno, count, to, up, write_back, unchanging_p, int sign = up ? 1 : -1; rtx mem; + /* See arm_gen_load_multiple for discussion of + the pros/cons of ldm/stm usage for XScale. */ + if (arm_is_xscale && count <= 2 && ! optimize_size) + { + rtx seq; + + start_sequence (); + + for (i = 0; i < count; i++) + { + mem = gen_rtx_MEM (SImode, plus_constant (to, i * 4 * sign)); + RTX_UNCHANGING_P (mem) = unchanging_p; + MEM_IN_STRUCT_P (mem) = in_struct_p; + MEM_SCALAR_P (mem) = scalar_p; + emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i)); + } + + if (write_back) + emit_move_insn (to, plus_constant (to, count * 4 * sign)); + + seq = gen_sequence (); + end_sequence (); + + return seq; + } + result = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count + (write_back ? 2 : 0))); if (write_back) diff --git a/gnu/dist/toolchain/gcc/config/arm/arm.h b/gnu/dist/toolchain/gcc/config/arm/arm.h index 900376e2a117..ee450b43f6bd 100644 --- a/gnu/dist/toolchain/gcc/config/arm/arm.h +++ b/gnu/dist/toolchain/gcc/config/arm/arm.h @@ -477,6 +477,9 @@ extern int arm_ld_sched; /* Nonzero if this chip is a StrongARM. */ extern int arm_is_strong; +/* Nonzero if this chip is an XScale. */ +extern int arm_is_xscale; + /* Nonzero if this chip is a an ARM6 or an ARM7. */ extern int arm_is_6_or_7; @@ -614,9 +617,12 @@ extern int arm_is_6_or_7; #define BIGGEST_ALIGNMENT 32 /* Make strings word-aligned so strcpy from constants will be faster. */ -#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ - (TREE_CODE (EXP) == STRING_CST \ - && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN)) +#define CONSTANT_ALIGNMENT_FACTOR (! arm_is_xscale ? 1 : 2) + +#define CONSTANT_ALIGNMENT(EXP, ALIGN) \ + ((TREE_CODE (EXP) == STRING_CST \ + && (ALIGN) < BITS_PER_WORD * CONSTANT_ALIGNMENT_FACTOR) \ + ? BITS_PER_WORD * CONSTANT_ALIGNMENT_FACTOR : (ALIGN)) /* Every structures size must be a multiple of 32 bits. */ /* This is for compatibility with ARMCC. ARM SDT Reference Manual @@ -1703,6 +1709,9 @@ extern struct rtx_def *legitimize_pic_address (); in one reasonably fast instruction. */ #define MOVE_MAX 4 +#undef MOVE_RATIO +#define MOVE_RATIO (arm_is_xscale ? 4 : 2) + /* Define if operations between registers always perform the operation on the full register even if a narrower mode is specified. */ #define WORD_REGISTER_OPERATIONS diff --git a/gnu/dist/toolchain/gcc/config/arm/arm.md b/gnu/dist/toolchain/gcc/config/arm/arm.md index 923a4dddab15..ab544092a7ee 100644 --- a/gnu/dist/toolchain/gcc/config/arm/arm.md +++ b/gnu/dist/toolchain/gcc/config/arm/arm.md @@ -48,6 +48,11 @@ (define_attr "is_strongarm" "no,yes" (const (symbol_ref "arm_is_strong"))) +;; Operand number of an input operand that is shifted. Zoer if the +;; given instruction does not shift one of its input operands. +(define_attr "is_xscale" "no,yes" (const (symbol_ref "arm_is_xscale"))) +(define_attr "shift" "" (const_int 0)) + ; Floating Point Unit. If we only have floating point emulation, then there ; is no point in scheduling the floating point insns. (Well, for best ; performance we should try and group them together). @@ -238,12 +243,26 @@ ;; Core unit ;;-------------------------------------------------------------------- ;; Everything must spend at least one cycle in the core unit +(define_function_unit "core" 1 0 (eq_attr "core_cycles" "single") 1 1) + (define_function_unit "core" 1 0 (and (eq_attr "ldsched" "yes") (eq_attr "type" "store1")) 1 1) (define_function_unit "core" 1 0 (and (eq_attr "ldsched" "yes") (eq_attr "type" "load")) 2 1) +;; We do not need to conditionalize the define_function_unit immediately +;; above. This one will be ignored for anything other than xscale +;; compiles and for xscale compiles it provides a larger delay +;; and the scheduler will DTRT. +;; FIXME: this test need to be revamped to not depend on this feature +;; of the scheduler. + +(define_function_unit "core" 1 0 + (and (and (eq_attr "ldsched" "yes") (eq_attr "type" "load")) + (eq_attr "is_xscale" "yes")) + 3 1) + (define_function_unit "core" 1 0 (and (eq_attr "ldsched" "!yes") (eq_attr "type" "load,store1")) 2 2) @@ -275,6 +294,10 @@ (define_function_unit "core" 1 0 (eq_attr "type" "store3") 4 4) (define_function_unit "core" 1 0 (eq_attr "type" "store4") 5 5) + +(define_function_unit "core" 1 0 + (and (eq_attr "core_cycles" "multi") + (eq_attr "type" "!mult,load,store1,store2,store3,store4")) 32 32) ;; Note: For DImode insns, there is normally no reason why operands should ;; not be in the same register, what we don't want is for something being @@ -1410,7 +1433,9 @@ (match_operand:SI 3 "arm_rhs_operand" "rM")])) (match_operand:SI 1 "s_register_operand" "r")))] "" - "bic%?\\t%0, %1, %2%S4") + "bic%?\\t%0, %1, %2%S4" + [(set_attr "shift" "2")] +) (define_insn "*andsi_notsi_si_compare0" [(set (reg:CC_NOOV 24) @@ -1783,7 +1808,9 @@ [(match_operand:SI 1 "s_register_operand" "r") (match_operand:SI 2 "reg_or_int_operand" "rM")]))] "" - "mov%?\\t%0, %1%S3") + "mov%?\\t%0, %1%S3" + [(set_attr "shift" "1")] +) (define_insn "*shiftsi3_compare0" [(set (reg:CC_NOOV 24) @@ -1795,7 +1822,10 @@ (match_op_dup 3 [(match_dup 1) (match_dup 2)]))] "" "mov%?s\\t%0, %1%S3" -[(set_attr "conds" "set")]) +[(set_attr "conds" "set") + (set_attr "shift" "1") + ] +) (define_insn "*shiftsi3_compare0_scratch" [(set (reg:CC_NOOV 24) @@ -1806,7 +1836,10 @@ (clobber (match_scratch:SI 0 "=r"))] "" "mov%?s\\t%0, %1%S3" -[(set_attr "conds" "set")]) +[(set_attr "conds" "set") + (set_attr "shift" "1") + ] +) (define_insn "*notsi_shiftsi" [(set (match_operand:SI 0 "s_register_operand" "=r") @@ -1814,7 +1847,9 @@ [(match_operand:SI 1 "s_register_operand" "r") (match_operand:SI 2 "arm_rhs_operand" "rM")])))] "" - "mvn%?\\t%0, %1%S3") + "mvn%?\\t%0, %1%S3" + [(set_attr "shift" "1")] +) (define_insn "*notsi_shiftsi_compare0" [(set (reg:CC_NOOV 24) @@ -1826,7 +1861,10 @@ (not:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])))] "" "mvn%?s\\t%0, %1%S3" -[(set_attr "conds" "set")]) +[(set_attr "conds" "set") + (set_attr "shift" "1") + ] +) (define_insn "*not_shiftsi_compare0_scratch" [(set (reg:CC_NOOV 24) @@ -1837,7 +1875,10 @@ (clobber (match_scratch:SI 0 "=r"))] "" "mvn%?s\\t%0, %1%S3" -[(set_attr "conds" "set")]) +[(set_attr "conds" "set") + (set_attr "shift" "1") + ] +) ;; Unary arithmetic insns @@ -1900,6 +1941,7 @@ cmp\\t%0, #0\;rsblt\\t%0, %0, #0 eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31" [(set_attr "conds" "clob,*") + (set_attr "shift" "1") (set_attr "length" "8")]) (define_insn "*neg_abssi2" @@ -1911,6 +1953,7 @@ cmp\\t%0, #0\;rsbgt\\t%0, %0, #0 eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31" [(set_attr "conds" "clob,*") + (set_attr "shift" "1") (set_attr "length" "8")]) (define_insn "abssf2" @@ -2163,7 +2206,10 @@ output_asm_insn (\"mov%?\\t%Q0, %1\", operands); return \"mov%?\\t%R0, %Q0, asr #31\"; " -[(set_attr "length" "8")]) +[(set_attr "length" "8") + (set_attr "shift" "1") + ] +) (define_expand "zero_extendhisi2" [(set (match_dup 2) (ashift:SI (match_operand:HI 1 "nonimmediate_operand" "") @@ -3597,7 +3643,10 @@ (match_operand:SI 2 "arm_rhs_operand" "rM")])))] "" "cmp%?\\t%0, %1%S3" -[(set_attr "conds" "set")]) +[(set_attr "conds" "set") + (set_attr "shift" "1") + ] +) (define_insn "*cmpsi_shiftsi_swp" [(set (reg:CC_SWP 24) @@ -3607,7 +3656,10 @@ (match_operand:SI 0 "s_register_operand" "r")))] "" "cmp%?\\t%0, %1%S3" -[(set_attr "conds" "set")]) +[(set_attr "conds" "set") + (set_attr "shift" "1") + ] +) (define_insn "*cmpsi_neg_shiftsi" [(set (reg:CC 24) @@ -3617,7 +3669,10 @@ (match_operand:SI 2 "arm_rhs_operand" "rM")]))))] "" "cmn%?\\t%0, %1%S3" -[(set_attr "conds" "set")]) +[(set_attr "conds" "set") + (set_attr "shift" "1") + ] +) (define_insn "*cmpsf_insn" [(set (reg:CCFP 24) @@ -4467,7 +4522,9 @@ (match_operand:SI 5 "reg_or_int_operand" "rI")]) (match_operand:SI 2 "s_register_operand" "r")]))] "" - "%i1%?\\t%0, %2, %4%S3") + "%i1%?\\t%0, %2, %4%S3" + [(set_attr "shift" "4")] +) (define_insn "*arith_shiftsi_compare0" [(set (reg:CC_NOOV 24) @@ -4482,7 +4539,10 @@ (match_dup 2)]))] "" "%i1%?s\\t%0, %2, %4%S3" -[(set_attr "conds" "set")]) +[(set_attr "conds" "set") + (set_attr "shift" "4") + ] +) (define_insn "*arith_shiftsi_compare0_scratch" [(set (reg:CC_NOOV 24) @@ -4495,7 +4555,10 @@ (clobber (match_scratch:SI 0 "=r"))] "" "%i1%?s\\t%0, %2, %4%S3" -[(set_attr "conds" "set")]) +[(set_attr "conds" "set") + (set_attr "shift" "4") + ] +) (define_insn "*sub_shiftsi" [(set (match_operand:SI 0 "s_register_operand" "=r") @@ -4504,7 +4567,9 @@ [(match_operand:SI 3 "s_register_operand" "r") (match_operand:SI 4 "reg_or_int_operand" "rM")])))] "" - "sub%?\\t%0, %1, %3%S2") + "sub%?\\t%0, %1, %3%S2" + [(set_attr "shift" "3")] +) (define_insn "*sub_shiftsi_compare0" [(set (reg:CC_NOOV 24) @@ -4519,7 +4584,10 @@ (match_dup 4)])))] "" "sub%?s\\t%0, %1, %3%S2" -[(set_attr "conds" "set")]) +[(set_attr "conds" "set") + (set_attr "shift" "3") + ] +) (define_insn "*sub_shiftsi_compare0_scratch" [(set (reg:CC_NOOV 24) @@ -4532,7 +4600,10 @@ (clobber (match_scratch:SI 0 "=r"))] "" "sub%?s\\t%0, %1, %3%S2" -[(set_attr "conds" "set")]) +[(set_attr "conds" "set") + (set_attr "shift" "3") + ] +) ;; These variants of the above insns can occur if the first operand is the ;; frame pointer and we eliminate that. This is a kludge, but there doesn't @@ -5236,6 +5307,7 @@ mov%D5\\t%0, %1\;mov%d5\\t%0, %2%S4 mvn%D5\\t%0, #%B1\;mov%d5\\t%0, %2%S4" [(set_attr "conds" "use") + (set_attr "shift" "2") (set_attr "length" "4,8,8")]) (define_insn "*ifcompare_move_shift" @@ -5269,6 +5341,7 @@ mov%d5\\t%0, %1\;mov%D5\\t%0, %2%S4 mvn%d5\\t%0, #%B1\;mov%D5\\t%0, %2%S4" [(set_attr "conds" "use") + (set_attr "shift" "2") (set_attr "length" "4,8,8")]) (define_insn "*ifcompare_shift_shift" @@ -5303,6 +5376,7 @@ "" "mov%d5\\t%0, %1%S6\;mov%D5\\t%0, %3%S7" [(set_attr "conds" "use") + (set_attr "shift" "1") (set_attr "length" "8")]) (define_insn "*ifcompare_not_arith"