diff --git a/gnu/dist/toolchain/gcc/config/arm/arm.c b/gnu/dist/toolchain/gcc/config/arm/arm.c
index 5b441291cf09..1c0999856512 100644
--- a/gnu/dist/toolchain/gcc/config/arm/arm.c
+++ b/gnu/dist/toolchain/gcc/config/arm/arm.c
@@ -103,6 +103,7 @@ int    arm_structure_size_boundary = 32; /* Used to be 8 */
 #define FL_THUMB      0x20            /* Thumb aware */
 #define FL_LDSCHED    0x40	      /* Load scheduling necessary */
 #define FL_STRONG     0x80	      /* StrongARM */
+#define FL_XSCALE     0x100           /* XScale */
 
 /* The bits in this mask specify which instructions we are allowed to generate.  */
 static int insn_flags = 0;
@@ -127,6 +128,9 @@ int arm_ld_sched = 0;
 /* Nonzero if this chip is a StrongARM.  */
 int arm_is_strong = 0;
 
+/* Nonzero if this chip is an XScale.  */
+int arm_is_xscale = 0;
+
 /* Nonzero if this chip is a an ARM6 or an ARM7.  */
 int arm_is_6_or_7 = 0;
 
@@ -235,7 +239,7 @@ static struct processors all_cores[] =
      --thorpej@netbsd.org  */
   {"arm10tdmi",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED },
   {"arm1020t",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED },
-  {"xscale",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED | FL_STRONG },
+  {"xscale",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED | FL_STRONG | FL_XSCALE },
   
   {NULL, 0}
 };
@@ -523,6 +527,7 @@ arm_override_options ()
   /* Initialise boolean versions of the flags, for use in the arm.md file.  */
   arm_fast_multiply = (insn_flags & FL_FAST_MULT) != 0;
   arm_arch4         = (insn_flags & FL_ARCH4) != 0;
+  arm_is_xscale     = (insn_flags & FL_XSCALE) != 0;
   
   arm_ld_sched      = (tune_flags & FL_LDSCHED) != 0;
   arm_is_strong     = (tune_flags & FL_STRONG) != 0;
@@ -574,6 +579,9 @@ arm_override_options ()
      to load a constant, and the load scheduler may well reduce that to 1.  */
   if (optimize_size || (tune_flags & FL_LDSCHED))
     arm_constant_limit = 1;
+
+  if (arm_is_xscale)
+    arm_constant_limit = 2;
   
   /* If optimizing for size, bump the number of instructions that we
      are prepared to conditionally execute (even on a StrongARM). 
@@ -1867,6 +1875,47 @@ arm_adjust_cost (insn, link, dep, cost)
 {
   rtx i_pat, d_pat;
 
+  /* Some true dependencies can have a higher cost depending
+     on precisely how certain input operands are used.  */
+  if (arm_is_xscale
+      && REG_NOTE_KIND (link) == 0
+      && recog_memoized (insn) < 0
+      && recog_memoized (dep) < 0)
+    {
+      int shift_opnum = get_attr_shift (insn);
+      enum attr_type attr_type = get_attr_type (dep);
+
+      /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
+	 operand for INSN.  If we have a shifted input operand and the
+	 instruction we depend on is another ALU instruction, then we may
+	 have to account for an additional stall.  */
+      if (shift_opnum != 0 && attr_type == TYPE_NORMAL)
+	{
+	  rtx shifted_operand;
+	  int opno;
+
+	  /* Get the shifted operand.  */
+	  extract_insn (insn);
+	  shifted_operand = recog_operand[shift_opnum];
+
+	  /* Iterate over all the operands in DEP.  If we write an operand
+	     that overlaps with SHIFTED_OPERAND, then we have increate the
+	     cost of this dependency.  */
+	  extract_insn (dep);
+	  preprocess_constraints ();
+	  for (opno = 0; opno < recog_n_operands; opno++)
+	    {
+	      /* We can ignore strict inputs.  */
+	      if (recog_op_type[opno] == OP_IN)
+		continue;
+
+	      if (reg_overlap_mentioned_p (recog_operand[opno],
+					   shifted_operand))
+		return 2;
+	    }
+	}
+    }
+
   /* XXX This is not strictly true for the FPA. */
   if (REG_NOTE_KIND(link) == REG_DEP_ANTI
       || REG_NOTE_KIND(link) == REG_DEP_OUTPUT)
@@ -3164,6 +3213,58 @@ arm_gen_load_multiple (base_regno, count, from, up, write_back, unchanging_p,
   int sign = up ? 1 : -1;
   rtx mem;
 
+  /* XScale has load-store double instructions, but they have stricter
+     alignment requirements than load-store multiple, so we can not
+     use them.
+
+     For XScale ldm requires 2 + NREGS cycles to complete and blocks
+     the pipeline until completion.
+
+	NREGS		CYCLES
+	  1		  3
+	  2		  4
+	  3		  5
+	  4		  6
+     
+     an ldr instruction takes 1-3 cycles, but does not block the
+     pipeline.
+
+	NREGS		CYCLES
+	  1		 1-3
+	  2		 2-6
+	  3		 3-9
+	  4		 4-12
+
+     Best case ldr will always win.  However, the more ldr instructions
+     we issue, the less likely we are to be able to schedule them well.
+     Using ldr instructions also increases code size.
+
+     As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
+     for counts of 3 or 4 regs.  */
+  if (arm_is_xscale && count <= 2 && ! optimize_size)
+    {
+      rtx seq;
+
+      start_sequence ();
+
+      for (i = 0; i < count; i++)
+	{
+	  mem = gen_rtx_MEM (SImode, plus_constant (from, i * 4 * sign));
+	  RTX_UNCHANGING_P (mem) = unchanging_p;
+	  MEM_IN_STRUCT_P (mem) = in_struct_p;
+	  MEM_SCALAR_P (mem) = scalar_p;
+	  emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
+	}
+
+      if (write_back)
+	emit_move_insn (from, plus_constant (from, count * 4 * sign));
+
+      seq = gen_sequence ();
+      end_sequence ();
+
+      return seq;
+    }
+
   result = gen_rtx_PARALLEL (VOIDmode,
 			     rtvec_alloc (count + (write_back ? 2 : 0)));
   if (write_back)
@@ -3208,6 +3309,32 @@ arm_gen_store_multiple (base_regno, count, to, up, write_back, unchanging_p,
   int sign = up ? 1 : -1;
   rtx mem;
 
+  /* See arm_gen_load_multiple for discussion of
+     the pros/cons of ldm/stm usage for XScale.  */
+  if (arm_is_xscale && count <= 2 && ! optimize_size)
+    {
+      rtx seq;
+
+      start_sequence ();
+
+      for (i = 0; i < count; i++)
+	{
+	  mem = gen_rtx_MEM (SImode, plus_constant (to, i * 4 * sign));
+	  RTX_UNCHANGING_P (mem) = unchanging_p;
+	  MEM_IN_STRUCT_P (mem) = in_struct_p;
+	  MEM_SCALAR_P (mem) = scalar_p;
+	  emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
+	}
+
+      if (write_back)
+	emit_move_insn (to, plus_constant (to, count * 4 * sign));
+
+      seq = gen_sequence ();
+      end_sequence ();
+
+      return seq;
+    }
+
   result = gen_rtx_PARALLEL (VOIDmode,
 			     rtvec_alloc (count + (write_back ? 2 : 0)));
   if (write_back)
diff --git a/gnu/dist/toolchain/gcc/config/arm/arm.h b/gnu/dist/toolchain/gcc/config/arm/arm.h
index 900376e2a117..ee450b43f6bd 100644
--- a/gnu/dist/toolchain/gcc/config/arm/arm.h
+++ b/gnu/dist/toolchain/gcc/config/arm/arm.h
@@ -477,6 +477,9 @@ extern int arm_ld_sched;
 /* Nonzero if this chip is a StrongARM.  */
 extern int arm_is_strong;
 
+/* Nonzero if this chip is an XScale.  */
+extern int arm_is_xscale;
+
 /* Nonzero if this chip is a an ARM6 or an ARM7.  */
 extern int arm_is_6_or_7;
 
@@ -614,9 +617,12 @@ extern int arm_is_6_or_7;
 #define BIGGEST_ALIGNMENT  32
 
 /* Make strings word-aligned so strcpy from constants will be faster.  */
-#define CONSTANT_ALIGNMENT(EXP, ALIGN)  \
-  (TREE_CODE (EXP) == STRING_CST        \
-   && (ALIGN) < BITS_PER_WORD ? BITS_PER_WORD : (ALIGN))
+#define CONSTANT_ALIGNMENT_FACTOR (! arm_is_xscale ? 1 : 2)
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)				\
+  ((TREE_CODE (EXP) == STRING_CST				\
+    && (ALIGN) < BITS_PER_WORD * CONSTANT_ALIGNMENT_FACTOR)	\
+   ? BITS_PER_WORD * CONSTANT_ALIGNMENT_FACTOR : (ALIGN))
 
 /* Every structures size must be a multiple of 32 bits.  */
 /* This is for compatibility with ARMCC.  ARM SDT Reference Manual
@@ -1703,6 +1709,9 @@ extern struct rtx_def *legitimize_pic_address ();
    in one reasonably fast instruction.  */
 #define MOVE_MAX 4
 
+#undef  MOVE_RATIO
+#define MOVE_RATIO (arm_is_xscale ? 4 : 2)
+
 /* Define if operations between registers always perform the operation
    on the full register even if a narrower mode is specified.  */
 #define WORD_REGISTER_OPERATIONS
diff --git a/gnu/dist/toolchain/gcc/config/arm/arm.md b/gnu/dist/toolchain/gcc/config/arm/arm.md
index 923a4dddab15..ab544092a7ee 100644
--- a/gnu/dist/toolchain/gcc/config/arm/arm.md
+++ b/gnu/dist/toolchain/gcc/config/arm/arm.md
@@ -48,6 +48,11 @@
 
 (define_attr "is_strongarm" "no,yes" (const (symbol_ref "arm_is_strong")))
 
+;; Operand number of an input operand that is shifted.  Zoer if the
+;; given instruction does not shift one of its input operands.
+(define_attr "is_xscale" "no,yes" (const (symbol_ref "arm_is_xscale")))
+(define_attr "shift" "" (const_int 0))
+
 ; Floating Point Unit.  If we only have floating point emulation, then there
 ; is no point in scheduling the floating point insns.  (Well, for best
 ; performance we should try and group them together).
@@ -238,12 +243,26 @@
 ;; Core unit
 ;;--------------------------------------------------------------------
 ;; Everything must spend at least one cycle in the core unit
+(define_function_unit "core" 1 0 (eq_attr "core_cycles" "single") 1 1)
+
 (define_function_unit "core" 1 0
   (and (eq_attr "ldsched" "yes") (eq_attr "type" "store1")) 1 1)
 
 (define_function_unit "core" 1 0
   (and (eq_attr "ldsched" "yes") (eq_attr "type" "load")) 2 1)
 
+;; We do not need to conditionalize the define_function_unit immediately
+;; above.  This one will be ignored for anything other than xscale
+;; compiles and for xscale compiles it provides a larger delay
+;; and the scheduler will DTRT.
+;; FIXME: this test need to be revamped to not depend on this feature
+;; of the scheduler.
+
+(define_function_unit "core" 1 0
+  (and (and (eq_attr "ldsched" "yes") (eq_attr "type" "load"))
+       (eq_attr "is_xscale" "yes"))
+   3 1)
+
 (define_function_unit "core" 1 0
   (and (eq_attr "ldsched" "!yes") (eq_attr "type" "load,store1")) 2 2)
 
@@ -275,6 +294,10 @@
 (define_function_unit "core" 1 0 (eq_attr "type" "store3") 4 4)
 
 (define_function_unit "core" 1 0 (eq_attr "type" "store4") 5 5)
+
+(define_function_unit "core" 1 0
+  (and (eq_attr "core_cycles" "multi")
+       (eq_attr "type" "!mult,load,store1,store2,store3,store4")) 32 32)
 
 ;; Note: For DImode insns, there is normally no reason why operands should
 ;; not be in the same register, what we don't want is for something being
@@ -1410,7 +1433,9 @@
 			  (match_operand:SI 3 "arm_rhs_operand" "rM")]))
 		(match_operand:SI 1 "s_register_operand" "r")))]
   ""
-  "bic%?\\t%0, %1, %2%S4")
+  "bic%?\\t%0, %1, %2%S4"
+  [(set_attr "shift" "2")]
+)
 
 (define_insn "*andsi_notsi_si_compare0"
   [(set (reg:CC_NOOV 24)
@@ -1783,7 +1808,9 @@
 	 [(match_operand:SI 1 "s_register_operand" "r")
 	  (match_operand:SI 2 "reg_or_int_operand" "rM")]))]
   ""
-  "mov%?\\t%0, %1%S3")
+  "mov%?\\t%0, %1%S3"
+  [(set_attr "shift" "1")]
+)
 
 (define_insn "*shiftsi3_compare0"
   [(set (reg:CC_NOOV 24)
@@ -1795,7 +1822,10 @@
 	(match_op_dup 3 [(match_dup 1) (match_dup 2)]))]
   ""
   "mov%?s\\t%0, %1%S3"
-[(set_attr "conds" "set")])
+[(set_attr "conds" "set")
+ (set_attr "shift" "1")
+ ]
+)
 
 (define_insn "*shiftsi3_compare0_scratch"
   [(set (reg:CC_NOOV 24)
@@ -1806,7 +1836,10 @@
    (clobber (match_scratch:SI 0 "=r"))]
   ""
   "mov%?s\\t%0, %1%S3"
-[(set_attr "conds" "set")])
+[(set_attr "conds" "set")
+ (set_attr "shift" "1")
+ ]
+)
 
 (define_insn "*notsi_shiftsi"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -1814,7 +1847,9 @@
 		 [(match_operand:SI 1 "s_register_operand" "r")
 		  (match_operand:SI 2 "arm_rhs_operand" "rM")])))]
   ""
-  "mvn%?\\t%0, %1%S3")
+  "mvn%?\\t%0, %1%S3"
+  [(set_attr "shift" "1")]
+)
 
 (define_insn "*notsi_shiftsi_compare0"
   [(set (reg:CC_NOOV 24)
@@ -1826,7 +1861,10 @@
 	(not:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])))]
   ""
   "mvn%?s\\t%0, %1%S3"
-[(set_attr "conds" "set")])
+[(set_attr "conds" "set")
+ (set_attr "shift" "1")
+ ]
+)
 
 (define_insn "*not_shiftsi_compare0_scratch"
   [(set (reg:CC_NOOV 24)
@@ -1837,7 +1875,10 @@
    (clobber (match_scratch:SI 0 "=r"))]
   ""
   "mvn%?s\\t%0, %1%S3"
-[(set_attr "conds" "set")])
+[(set_attr "conds" "set")
+ (set_attr "shift" "1")
+ ]
+)
 
 
 ;; Unary arithmetic insns
@@ -1900,6 +1941,7 @@
    cmp\\t%0, #0\;rsblt\\t%0, %0, #0
    eor%?\\t%0, %1, %1, asr #31\;sub%?\\t%0, %0, %1, asr #31"
 [(set_attr "conds" "clob,*")
+ (set_attr "shift" "1")
  (set_attr "length" "8")])
 
 (define_insn "*neg_abssi2"
@@ -1911,6 +1953,7 @@
    cmp\\t%0, #0\;rsbgt\\t%0, %0, #0
    eor%?\\t%0, %1, %1, asr #31\;rsb%?\\t%0, %0, %1, asr #31"
 [(set_attr "conds" "clob,*")
+ (set_attr "shift" "1")
  (set_attr "length" "8")])
 
 (define_insn "abssf2"
@@ -2163,7 +2206,10 @@
     output_asm_insn (\"mov%?\\t%Q0, %1\", operands);
   return \"mov%?\\t%R0, %Q0, asr #31\";
 "
-[(set_attr "length" "8")])
+[(set_attr "length" "8")
+ (set_attr "shift" "1")
+ ]
+)
 
 (define_expand "zero_extendhisi2"
   [(set (match_dup 2) (ashift:SI (match_operand:HI 1 "nonimmediate_operand" "")
@@ -3597,7 +3643,10 @@
 		      (match_operand:SI 2 "arm_rhs_operand" "rM")])))]
   ""
   "cmp%?\\t%0, %1%S3"
-[(set_attr "conds" "set")])
+[(set_attr "conds" "set")
+ (set_attr "shift" "1")
+ ]
+)
 
 (define_insn "*cmpsi_shiftsi_swp"
   [(set (reg:CC_SWP 24)
@@ -3607,7 +3656,10 @@
 			(match_operand:SI 0 "s_register_operand" "r")))]
   ""
   "cmp%?\\t%0, %1%S3"
-[(set_attr "conds" "set")])
+[(set_attr "conds" "set")
+ (set_attr "shift" "1")
+ ]
+)
 
 (define_insn "*cmpsi_neg_shiftsi"
   [(set (reg:CC 24)
@@ -3617,7 +3669,10 @@
 			      (match_operand:SI 2 "arm_rhs_operand" "rM")]))))]
   ""
   "cmn%?\\t%0, %1%S3"
-[(set_attr "conds" "set")])
+[(set_attr "conds" "set")
+ (set_attr "shift" "1")
+ ]
+)
 
 (define_insn "*cmpsf_insn"
   [(set (reg:CCFP 24)
@@ -4467,7 +4522,9 @@
               (match_operand:SI 5 "reg_or_int_operand" "rI")])
            (match_operand:SI 2 "s_register_operand" "r")]))]
   ""
-  "%i1%?\\t%0, %2, %4%S3")
+  "%i1%?\\t%0, %2, %4%S3"
+  [(set_attr "shift" "4")]
+)
 
 (define_insn "*arith_shiftsi_compare0"
   [(set (reg:CC_NOOV 24)
@@ -4482,7 +4539,10 @@
 			 (match_dup 2)]))]
   ""
   "%i1%?s\\t%0, %2, %4%S3"
-[(set_attr "conds" "set")])
+[(set_attr "conds" "set")
+ (set_attr "shift" "4")
+ ]
+)
 
 (define_insn "*arith_shiftsi_compare0_scratch"
   [(set (reg:CC_NOOV 24)
@@ -4495,7 +4555,10 @@
    (clobber (match_scratch:SI 0 "=r"))]
   ""
   "%i1%?s\\t%0, %2, %4%S3"
-[(set_attr "conds" "set")])
+[(set_attr "conds" "set")
+ (set_attr "shift" "4")
+ ]
+)
 
 (define_insn "*sub_shiftsi"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -4504,7 +4567,9 @@
 		   [(match_operand:SI 3 "s_register_operand" "r")
 		    (match_operand:SI 4 "reg_or_int_operand" "rM")])))]
   ""
-  "sub%?\\t%0, %1, %3%S2")
+  "sub%?\\t%0, %1, %3%S2"
+  [(set_attr "shift" "3")]
+)
 
 (define_insn "*sub_shiftsi_compare0"
   [(set (reg:CC_NOOV 24)
@@ -4519,7 +4584,10 @@
 						 (match_dup 4)])))]
   ""
   "sub%?s\\t%0, %1, %3%S2"
-[(set_attr "conds" "set")])
+[(set_attr "conds" "set")
+ (set_attr "shift" "3")
+ ]
+)
 
 (define_insn "*sub_shiftsi_compare0_scratch"
   [(set (reg:CC_NOOV 24)
@@ -4532,7 +4600,10 @@
    (clobber (match_scratch:SI 0 "=r"))]
   ""
   "sub%?s\\t%0, %1, %3%S2"
-[(set_attr "conds" "set")])
+[(set_attr "conds" "set")
+ (set_attr "shift" "3")
+ ]
+)
 
 ;; These variants of the above insns can occur if the first operand is the
 ;; frame pointer and we eliminate that.  This is a kludge, but there doesn't
@@ -5236,6 +5307,7 @@
    mov%D5\\t%0, %1\;mov%d5\\t%0, %2%S4
    mvn%D5\\t%0, #%B1\;mov%d5\\t%0, %2%S4"
 [(set_attr "conds" "use")
+ (set_attr "shift" "2")
  (set_attr "length" "4,8,8")])
 
 (define_insn "*ifcompare_move_shift"
@@ -5269,6 +5341,7 @@
    mov%d5\\t%0, %1\;mov%D5\\t%0, %2%S4
    mvn%d5\\t%0, #%B1\;mov%D5\\t%0, %2%S4"
 [(set_attr "conds" "use")
+ (set_attr "shift" "2")
  (set_attr "length" "4,8,8")])
 
 (define_insn "*ifcompare_shift_shift"
@@ -5303,6 +5376,7 @@
   ""
   "mov%d5\\t%0, %1%S6\;mov%D5\\t%0, %3%S7"
 [(set_attr "conds" "use")
+ (set_attr "shift" "1")
  (set_attr "length" "8")])
 
 (define_insn "*ifcompare_not_arith"