Import LLVM 3.4 RC2 r196603.

Bug fixes, most noticable for inconsistencies in vectorized code.
2013-12-06 23:18:22 +00:00 · 2013-12-06 23:18:22 +00:00 · 49d3a2eb83
commit 49d3a2eb83
parent 1d57ee3e45
79 changed files with 21498 additions and 1096 deletions
--- a/external/bsd/llvm/dist/llvm/CODE_OWNERS.TXT
+++ b/external/bsd/llvm/dist/llvm/CODE_OWNERS.TXT
@ -109,6 +109,10 @@ N: Nadav Rotem
 E: nrotem@apple.com
 D: X86 Backend, Loop Vectorizer

+N: Daniel Sanders
+E: daniel.sanders@imgtec.com
+D: MIPS Backend (lib/Target/Mips/*)
+
 N: Richard Sandiford
 E: rsandifo@linux.vnet.ibm.com
 D: SystemZ Backend
--- a/external/bsd/llvm/dist/llvm/bindings/ocaml/Makefile.ocaml
+++ b/external/bsd/llvm/dist/llvm/bindings/ocaml/Makefile.ocaml
@ -78,6 +78,15 @@ Compile.CMI  := $(strip $(OCAMLC) -c $(OCAMLCFLAGS) $(OCAMLDEBUGFLAG) -o)
 Compile.CMO  := $(strip $(OCAMLC) -c $(OCAMLCFLAGS) $(OCAMLDEBUGFLAG) -o)
 Compile.CMX  := $(strip $(OCAMLOPT) -c $(OCAMLCFLAGS) $(OCAMLDEBUGFLAG) -o)

+ifdef OCAMLSTUBS
+# Avoid the need for LD_LIBRARY_PATH
+ifneq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+ifneq ($(HOST_OS),Darwin)
+OCAMLRPATH   := $(RPATH) -Wl,'$(SharedLibDir)'
+endif
+endif
+endif
+
 ifdef OCAMLSTUBS
 Archive.CMA  := $(strip $(OCAMLC) -a -dllib -l$(LIBRARYNAME) $(OCAMLDEBUGFLAG) \
                                  -o)
@ -88,7 +97,8 @@ endif

 ifdef OCAMLSTUBS
 Archive.CMXA := $(strip $(OCAMLOPT) -a $(patsubst %,-cclib %, \
-                                    $(LLVMLibsOptions) -l$(LIBRARYNAME)) \
+                                    $(LLVMLibsOptions) -l$(LIBRARYNAME) \
+                                    -L$(SharedLibDir) $(OCAMLRPATH)) \
                                    $(OCAMLDEBUGFLAG) -o)
 else
 Archive.CMXA := $(strip $(OCAMLOPT) -a $(OCAMLAFLAGS) $(OCAMLDEBUGFLAG) -o)
@ -233,7 +243,7 @@ uninstall-local:: uninstall-shared

 $(SharedLib): $(ObjectsO) $(OcamlDir)/.dir
 	$(Echo) "Building $(BuildMode) $(notdir $@)"
-	$(Verb) $(Link) $(SharedLinkOptions) $(LLVMLibsOptions) \
+	$(Verb) $(Link) $(SharedLinkOptions) $(OCAMLRPATH) $(LLVMLibsOptions) \
 			-o $@ $(ObjectsO)

 clean-shared::
--- a/external/bsd/llvm/dist/llvm/docs/Extensions.rst
+++ b/external/bsd/llvm/dist/llvm/docs/Extensions.rst
@ -105,3 +105,41 @@ Supported COMDAT types:
  .section .xdata$foo
  .linkonce associative .text$foo
    ...
+
+``.section`` Directive
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+MC supports passing the information in ``.linkonce`` at the end of
+``.section``. For example,  these two codes are equivalent
+
+.. code-block:: gas
+
+  .section secName, "dr", discard, "Symbol1"
+  .globl Symbol1
+  Symbol1:
+  .long 1
+
+.. code-block:: gas
+
+  .section secName, "dr"
+  .linkonce discard
+  .globl Symbol1
+  Symbol1:
+  .long 1
+
+Note that in the combined form the COMDAT symbol is explict. This
+extension exits to support multiple sections with the same name in
+different comdats:
+
+
+.. code-block:: gas
+
+  .section secName, "dr", discard, "Symbol1"
+  .globl Symbol1
+  Symbol1:
+  .long 1
+
+  .section secName, "dr", discard, "Symbol2"
+  .globl Symbol2
+  Symbol2:
+  .long 1
--- a/external/bsd/llvm/dist/llvm/docs/GoldPlugin.rst
+++ b/external/bsd/llvm/dist/llvm/docs/GoldPlugin.rst
@ -30,29 +30,22 @@ by running ``/usr/bin/ld -plugin``. If it complains "missing argument" then
 you have plugin support. If not, such as an "unknown option" error then you
 will either need to build gold or install a version with plugin support.

-* To build gold with plugin support:
+* Download, configure and build gold with plugin support:

  .. code-block:: bash

-     $ mkdir binutils
-     $ cd binutils
-     $ cvs -z 9 -d :pserver:anoncvs@sourceware.org:/cvs/src login
-     {enter "anoncvs" as the password}
-     $ cvs -z 9 -d :pserver:anoncvs@sourceware.org:/cvs/src co binutils
+     $ git clone --depth 1 git://sourceware.org/git/binutils-gdb.git binutils
     $ mkdir build
     $ cd build
-     $ ../src/configure --enable-gold --enable-plugins
+     $ ../binutils/configure --enable-gold --enable-plugins --disable-werror
     $ make all-gold

-  That should leave you with ``binutils/build/gold/ld-new`` which supports
-  the ``-plugin`` option. It also built would have
-  ``binutils/build/binutils/ar`` and ``nm-new`` which support plugins but
-  don't have a visible -plugin option, instead relying on the gold plugin
-  being present in ``../lib/bfd-plugins`` relative to where the binaries
-  are placed.
+  That should leave you with ``build/gold/ld-new`` which supports
+  the ``-plugin`` option. Running ``make`` will additionally build
+  ``build/binutils/ar`` and ``nm-new`` binaries supporting plugins.

 * Build the LLVMgold plugin: Configure LLVM with
-  ``--with-binutils-include=/path/to/binutils/src/include`` and run
+  ``--with-binutils-include=/path/to/binutils/include`` and run
  ``make``.

 Usage
@ -72,9 +65,10 @@ the ``lib`` directory under its prefix and pass the ``-plugin`` option to
 ``ld``. It will not look for an alternate linker, which is why you need
 gold to be the installed system linker in your path.

-If you want ``ar`` and ``nm`` to work seamlessly as well, install
-``LLVMgold.so`` to ``/usr/lib/bfd-plugins``. If you built your own gold, be
-sure to install the ``ar`` and ``nm-new`` you built to ``/usr/bin``.
+``ar`` and ``nm`` also accept the ``-plugin`` option and it's possible to
+to install ``LLVMgold.so`` to ``/usr/lib/bfd-plugins`` for a seamless setup.
+If you built your own gold, be sure to install the ``ar`` and ``nm-new`` you
+built to ``/usr/bin``.


 Example of link time optimization
--- a/external/bsd/llvm/dist/llvm/docs/LangRef.rst
+++ b/external/bsd/llvm/dist/llvm/docs/LangRef.rst
@ -128,7 +128,9 @@ lexical features of LLVM:
 #. Unnamed temporaries are created when the result of a computation is
   not assigned to a named value.
 #. Unnamed temporaries are numbered sequentially (using a per-function
-   incrementing counter, starting with 0).
+   incrementing counter, starting with 0). Note that basic blocks are
+   included in this numbering. For example, if the entry basic block is not
+   given a label name, then it will get number 0.

 It also shows a convention that we follow in this document. When
 demonstrating instructions, we will follow an instruction with a comment
--- a/external/bsd/llvm/dist/llvm/docs/ReleaseNotes.rst
+++ b/external/bsd/llvm/dist/llvm/docs/ReleaseNotes.rst
@ -84,6 +84,9 @@ Non-comprehensive list of changes in this release
 * Different sized pointers for different address spaces should now
  generally work. This is primarily useful for GPU targets.

+* OCaml bindings have been significantly extended to cover almost all of the
+  LLVM libraries.
+
 * ... next change ...

 .. NOTE
@ -107,6 +110,19 @@ For more information on MSA (including documentation for the instruction set),
 see the `MIPS SIMD page at Imagination Technologies
 <http://imgtec.com/mips/mips-simd.asp>`_

+SPARC Target
+------------
+
+The SPARC backend got many improvements, namely
+
+* experimental SPARC V9 backend
+* JIT support for SPARC
+* fp128 support
+* exception handling
+* TLS support
+* leaf functions optimization
+* bug fixes
+
 External Open Source Projects Using LLVM 3.4
 ============================================

@ -160,6 +176,23 @@ statically parallelize multiple work-items with the kernel compiler, even in
 the presence of work-group barriers. This enables static parallelization of
 the fine-grained static concurrency in the work groups in multiple ways. 

+Portable Native Client (PNaCl)
+------------------------------
+
+`Portable Native Client (PNaCl) <http://www.chromium.org/nativeclient/pnacl>`_
+is a Chrome initiative to bring the performance and low-level control of native
+code to modern web browsers, without sacrificing the security benefits and
+portability of web applications. PNaCl works by compiling native C and C++ code
+to an intermediate representation using the LLVM clang compiler. This
+intermediate representation is a subset of LLVM bytecode that is wrapped into a
+portable executable, which can be hosted on a web server like any other website
+asset. When the site is accessed, Chrome fetches and translates the portable
+executable into an architecture-specific machine code optimized directly for
+the underlying device. PNaCl lets developers compile their code once to run on
+any hardware platform and embed their PNaCl application in any website,
+enabling developers to directly leverage the power of the underlying CPU and
+GPU.
+
 TTA-based Co-design Environment (TCE)
 -------------------------------------

--- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineOperand.h
+++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineOperand.h
@ -564,6 +564,8 @@ public:
                                  unsigned SubReg = 0,
                                  bool isDebug = false,
                                  bool isInternalRead = false) {
+    assert(!(isDead && !isDef) && "Dead flag on non-def");
+    assert(!(isKill && isDef) && "Kill flag on def");
    MachineOperand Op(MachineOperand::MO_Register);
    Op.IsDef = isDef;
    Op.IsImp = isImp;
--- a/external/bsd/llvm/dist/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/external/bsd/llvm/dist/llvm/include/llvm/IR/IntrinsicsAArch64.td
@ -54,6 +54,10 @@ def int_aarch64_neon_fcvtas :
  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
 def int_aarch64_neon_fcvtau :
  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+def int_aarch64_neon_fcvtzs :
+  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+def int_aarch64_neon_fcvtzu :
+  Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;

 // Vector maxNum (Floating Point)
 def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic;
@ -308,6 +312,9 @@ def int_aarch64_neon_vsqadd : Neon_2Arg_Intrinsic;
 def int_aarch64_neon_vabs :
  Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>;

+// Scalar Absolute Difference
+def int_aarch64_neon_vabd : Neon_2Arg_Intrinsic;
+
 // Scalar Negate Value
 def int_aarch64_neon_vneg :
  Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
--- a/external/bsd/llvm/dist/llvm/include/llvm/MC/MCContext.h
+++ b/external/bsd/llvm/dist/llvm/include/llvm/MC/MCContext.h
@ -258,9 +258,15 @@ namespace llvm {

    const MCSectionCOFF *getCOFFSection(StringRef Section,
                                        unsigned Characteristics,
-                                        SectionKind Kind, int Selection = 0,
+                                        SectionKind Kind,
+                                        StringRef COMDATSymName,
+                                        int Selection,
                                        const MCSectionCOFF *Assoc = 0);

+    const MCSectionCOFF *getCOFFSection(StringRef Section,
+                                        unsigned Characteristics,
+                                        SectionKind Kind);
+
    const MCSectionCOFF *getCOFFSection(StringRef Section);

    /// @}
--- a/external/bsd/llvm/dist/llvm/include/llvm/MC/MCSectionCOFF.h
+++ b/external/bsd/llvm/dist/llvm/include/llvm/MC/MCSectionCOFF.h
@ -19,6 +19,7 @@
 #include "llvm/Support/COFF.h"

 namespace llvm {
+class MCSymbol;

 /// MCSectionCOFF - This represents a section on Windows
  class MCSectionCOFF : public MCSection {
@ -32,6 +33,11 @@ namespace llvm {
    /// drawn from the enums below.
    mutable unsigned Characteristics;

+    /// The COMDAT symbol of this section. Only valid if this is a COMDAT
+    /// section. Two COMDAT sections are merged if they have the same
+    /// COMDAT symbol.
+    const MCSymbol *COMDATSymbol;
+
    /// Selection - This is the Selection field for the section symbol, if
    /// it is a COMDAT section (Characteristics & IMAGE_SCN_LNK_COMDAT) != 0
    mutable int Selection;
@ -44,9 +50,11 @@ namespace llvm {
  private:
    friend class MCContext;
    MCSectionCOFF(StringRef Section, unsigned Characteristics,
-                  int Selection, const MCSectionCOFF *Assoc, SectionKind K)
-      : MCSection(SV_COFF, K), SectionName(Section),
-        Characteristics(Characteristics), Selection(Selection), Assoc(Assoc) {
+                  const MCSymbol *COMDATSymbol, int Selection,
+                  const MCSectionCOFF *Assoc, SectionKind K)
+        : MCSection(SV_COFF, K), SectionName(Section),
+          Characteristics(Characteristics), COMDATSymbol(COMDATSymbol),
+          Selection(Selection), Assoc(Assoc) {
      assert ((Characteristics & 0x00F00000) == 0 &&
        "alignment must not be set upon section creation");
      assert ((Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) ==
--- a/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -3342,7 +3342,6 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
  unsigned OpSizeInBits = VT.getSizeInBits();
  SDValue LHSShiftArg = LHSShift.getOperand(0);
  SDValue LHSShiftAmt = LHSShift.getOperand(1);
-  SDValue RHSShiftArg = RHSShift.getOperand(0);
  SDValue RHSShiftAmt = RHSShift.getOperand(1);

  // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
@ -3402,32 +3401,10 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
    // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
    //   (rotr x, (sub 32, y))
    if (ConstantSDNode *SUBC =
-            dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) {
-      if (SUBC->getAPIntValue() == OpSizeInBits) {
+            dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0)))
+      if (SUBC->getAPIntValue() == OpSizeInBits)
        return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
                           HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
-      } else if (LHSShiftArg.getOpcode() == ISD::ZERO_EXTEND ||
-                 LHSShiftArg.getOpcode() == ISD::ANY_EXTEND) {
-        // fold (or (shl (*ext x), (*ext y)),
-        //          (srl (*ext x), (*ext (sub 32, y)))) ->
-        //   (*ext (rotl x, y))
-        // fold (or (shl (*ext x), (*ext y)),
-        //          (srl (*ext x), (*ext (sub 32, y)))) ->
-        //   (*ext (rotr x, (sub 32, y)))
-        SDValue LArgExtOp0 = LHSShiftArg.getOperand(0);
-        EVT LArgVT = LArgExtOp0.getValueType();
-        bool HasROTRWithLArg = TLI.isOperationLegalOrCustom(ISD::ROTR, LArgVT);
-        bool HasROTLWithLArg = TLI.isOperationLegalOrCustom(ISD::ROTL, LArgVT);
-        if (HasROTRWithLArg || HasROTLWithLArg) {
-          if (LArgVT.getSizeInBits() == SUBC->getAPIntValue()) {
-            SDValue V =
-                DAG.getNode(HasROTLWithLArg ? ISD::ROTL : ISD::ROTR, DL, LArgVT,
-                            LArgExtOp0, HasROTL ? LHSShiftAmt : RHSShiftAmt);
-            return DAG.getNode(LHSShiftArg.getOpcode(), DL, VT, V).getNode();
-          }
-        }
-      }
-    }
  } else if (LExtOp0.getOpcode() == ISD::SUB &&
             RExtOp0 == LExtOp0.getOperand(1)) {
    // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
@ -3435,32 +3412,10 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
    // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
    //   (rotl x, (sub 32, y))
    if (ConstantSDNode *SUBC =
-            dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) {
-      if (SUBC->getAPIntValue() == OpSizeInBits) {
+            dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0)))
+      if (SUBC->getAPIntValue() == OpSizeInBits)
        return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
                           HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
-      } else if (RHSShiftArg.getOpcode() == ISD::ZERO_EXTEND ||
-                 RHSShiftArg.getOpcode() == ISD::ANY_EXTEND) {
-        // fold (or (shl (*ext x), (*ext (sub 32, y))),
-        //          (srl (*ext x), (*ext y))) ->
-        //   (*ext (rotl x, y))
-        // fold (or (shl (*ext x), (*ext (sub 32, y))),
-        //          (srl (*ext x), (*ext y))) ->
-        //   (*ext (rotr x, (sub 32, y)))
-        SDValue RArgExtOp0 = RHSShiftArg.getOperand(0);
-        EVT RArgVT = RArgExtOp0.getValueType();
-        bool HasROTRWithRArg = TLI.isOperationLegalOrCustom(ISD::ROTR, RArgVT);
-        bool HasROTLWithRArg = TLI.isOperationLegalOrCustom(ISD::ROTL, RArgVT);
-        if (HasROTRWithRArg || HasROTLWithRArg) {
-          if (RArgVT.getSizeInBits() == SUBC->getAPIntValue()) {
-            SDValue V =
-                DAG.getNode(HasROTRWithRArg ? ISD::ROTR : ISD::ROTL, DL, RArgVT,
-                            RArgExtOp0, HasROTR ? RHSShiftAmt : LHSShiftAmt);
-            return DAG.getNode(RHSShiftArg.getOpcode(), DL, VT, V).getNode();
-          }
-        }
-      }
-    }
  }

  return 0;
--- a/external/bsd/llvm/dist/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@ -733,6 +733,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
  return getContext().getCOFFSection(Name,
                                     Characteristics,
                                     Kind,
+                                     "",
                                     Selection);
 }

@ -768,7 +769,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
    Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;

    return getContext().getCOFFSection(Name.str(), Characteristics,
-                                       Kind, COFF::IMAGE_COMDAT_SELECT_ANY);
+                                       Kind, "", COFF::IMAGE_COMDAT_SELECT_ANY);
  }

  if (Kind.isText())
--- a/external/bsd/llvm/dist/llvm/lib/MC/MCContext.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/MC/MCContext.cpp
@ -34,8 +34,7 @@ typedef std::pair<std::string, std::string> SectionGroupPair;

 typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
 typedef std::map<SectionGroupPair, const MCSectionELF *> ELFUniqueMapTy;
-typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
-
+typedef std::map<SectionGroupPair, const MCSectionCOFF *> COFFUniqueMapTy;

 MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri,
                     const MCObjectFileInfo *mofi, const SourceMgr *mgr,
@ -280,32 +279,51 @@ const MCSectionELF *MCContext::CreateELFGroupSection() {
  return Result;
 }

-const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
-                                               unsigned Characteristics,
-                                               SectionKind Kind, int Selection,
-                                               const MCSectionCOFF *Assoc) {
+const MCSectionCOFF *
+MCContext::getCOFFSection(StringRef Section, unsigned Characteristics,
+                          SectionKind Kind, StringRef COMDATSymName,
+                          int Selection, const MCSectionCOFF *Assoc) {
  if (COFFUniquingMap == 0)
    COFFUniquingMap = new COFFUniqueMapTy();
  COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap;

  // Do the lookup, if we have a hit, return it.
-  StringMapEntry<const MCSectionCOFF*> &Entry = Map.GetOrCreateValue(Section);
-  if (Entry.getValue()) return Entry.getValue();

-  MCSectionCOFF *Result = new (*this) MCSectionCOFF(Entry.getKey(),
-                                                    Characteristics,
-                                                    Selection, Assoc, Kind);
+  SectionGroupPair P(Section, COMDATSymName);
+  std::pair<COFFUniqueMapTy::iterator, bool> Entry =
+      Map.insert(std::make_pair(P, (MCSectionCOFF *)0));
+  COFFUniqueMapTy::iterator Iter = Entry.first;
+  if (!Entry.second)
+    return Iter->second;

-  Entry.setValue(Result);
+  const MCSymbol *COMDATSymbol = NULL;
+  if (!COMDATSymName.empty())
+    COMDATSymbol = GetOrCreateSymbol(COMDATSymName);
+
+  MCSectionCOFF *Result =
+      new (*this) MCSectionCOFF(Iter->first.first, Characteristics,
+                                COMDATSymbol, Selection, Assoc, Kind);
+
+  Iter->second = Result;
  return Result;
 }

+const MCSectionCOFF *
+MCContext::getCOFFSection(StringRef Section, unsigned Characteristics,
+                          SectionKind Kind) {
+  return getCOFFSection(Section, Characteristics, Kind, "", 0);
+}
+
 const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section) {
  if (COFFUniquingMap == 0)
    COFFUniquingMap = new COFFUniqueMapTy();
  COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap;

-  return Map.lookup(Section);
+  SectionGroupPair P(Section, "");
+  COFFUniqueMapTy::iterator Iter = Map.find(P);
+  if (Iter == Map.end())
+    return 0;
+  return Iter->second;
 }

 //===----------------------------------------------------------------------===//
--- a/external/bsd/llvm/dist/llvm/lib/MC/MCELFStreamer.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/MC/MCELFStreamer.cpp
@ -15,6 +15,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCELF.h"
@ -96,6 +97,9 @@ void MCELFStreamer::EmitDebugLabel(MCSymbol *Symbol) {
 }

 void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+  // Let the target do whatever target specific stuff it needs to do.
+  getAssembler().getBackend().handleAssemblerFlag(Flag);
+  // Do any generic stuff we need to do.
  switch (Flag) {
  case MCAF_SyntaxUnified: return; // no-op here.
  case MCAF_Code16: return; // Change parsing mode; no-op here.
--- a/external/bsd/llvm/dist/llvm/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/MC/MCParser/COFFAsmParser.cpp
@ -35,6 +35,10 @@ class COFFAsmParser : public MCAsmParserExtension {
                          unsigned Characteristics,
                          SectionKind Kind);

+  bool ParseSectionSwitch(StringRef Section, unsigned Characteristics,
+                          SectionKind Kind, StringRef COMDATSymName,
+                          COFF::COMDATType Type, const MCSectionCOFF *Assoc);
+
  bool ParseSectionName(StringRef &SectionName);
  bool ParseSectionFlags(StringRef FlagsString, unsigned* Flags);

@ -111,6 +115,8 @@ class COFFAsmParser : public MCAsmParserExtension {
  bool ParseDirectiveType(StringRef, SMLoc);
  bool ParseDirectiveEndef(StringRef, SMLoc);
  bool ParseDirectiveSecRel32(StringRef, SMLoc);
+  bool parseCOMDATTypeAndAssoc(COFF::COMDATType &Type,
+                               const MCSectionCOFF *&Assoc);
  bool ParseDirectiveLinkOnce(StringRef, SMLoc);

  // Win64 EH directives.
@ -284,12 +290,22 @@ bool COFFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
 bool COFFAsmParser::ParseSectionSwitch(StringRef Section,
                                       unsigned Characteristics,
                                       SectionKind Kind) {
+  return ParseSectionSwitch(Section, Characteristics, Kind, "",
+                            COFF::IMAGE_COMDAT_SELECT_ANY, 0);
+}
+
+bool COFFAsmParser::ParseSectionSwitch(StringRef Section,
+                                       unsigned Characteristics,
+                                       SectionKind Kind,
+                                       StringRef COMDATSymName,
+                                       COFF::COMDATType Type,
+                                       const MCSectionCOFF *Assoc) {
  if (getLexer().isNot(AsmToken::EndOfStatement))
    return TokError("unexpected token in section switching directive");
  Lex();

  getStreamer().SwitchSection(getContext().getCOFFSection(
-                                Section, Characteristics, Kind));
+      Section, Characteristics, Kind, COMDATSymName, Type, Assoc));

  return false;
 }
@ -303,7 +319,7 @@ bool COFFAsmParser::ParseSectionName(StringRef &SectionName) {
  return false;
 }

-// .section name [, "flags"]
+// .section name [, "flags"] [, identifier [ identifier ], identifier]
 //
 // Supported flags:
 //   a: Ignored.
@ -340,11 +356,30 @@ bool COFFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
      return true;
  }

+  COFF::COMDATType Type = COFF::IMAGE_COMDAT_SELECT_ANY;
+  const MCSectionCOFF *Assoc = 0;
+  StringRef COMDATSymName;
+  if (getLexer().is(AsmToken::Comma)) {
+    Lex();
+
+    Flags |= COFF::IMAGE_SCN_LNK_COMDAT;
+
+    if (parseCOMDATTypeAndAssoc(Type, Assoc))
+      return true;
+
+    if (getLexer().isNot(AsmToken::Comma))
+      return TokError("expected comma in directive");
+    Lex();
+
+    if (getParser().parseIdentifier(COMDATSymName))
+      return TokError("expected identifier in directive");
+  }
+
  if (getLexer().isNot(AsmToken::EndOfStatement))
    return TokError("unexpected token in directive");

  SectionKind Kind = computeSectionKind(Flags);
-  ParseSectionSwitch(SectionName, Flags, Kind);
+  ParseSectionSwitch(SectionName, Flags, Kind, COMDATSymName, Type, Assoc);
  return false;
 }

@ -409,37 +444,29 @@ bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) {
  return false;
 }

-/// ParseDirectiveLinkOnce
-///  ::= .linkonce [ identifier [ identifier ] ]
-bool COFFAsmParser::ParseDirectiveLinkOnce(StringRef, SMLoc Loc) {
-  COFF::COMDATType Type = COFF::IMAGE_COMDAT_SELECT_ANY;
+/// ::= [ identifier [ identifier ] ]
+bool COFFAsmParser::parseCOMDATTypeAndAssoc(COFF::COMDATType &Type,
+                                            const MCSectionCOFF *&Assoc) {
+  StringRef TypeId = getTok().getIdentifier();

-  if (getLexer().is(AsmToken::Identifier)) {
-    StringRef TypeId = getTok().getIdentifier();
+  Type = StringSwitch<COFF::COMDATType>(TypeId)
+    .Case("one_only", COFF::IMAGE_COMDAT_SELECT_NODUPLICATES)
+    .Case("discard", COFF::IMAGE_COMDAT_SELECT_ANY)
+    .Case("same_size", COFF::IMAGE_COMDAT_SELECT_SAME_SIZE)
+    .Case("same_contents", COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH)
+    .Case("associative", COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
+    .Case("largest", COFF::IMAGE_COMDAT_SELECT_LARGEST)
+    .Case("newest", COFF::IMAGE_COMDAT_SELECT_NEWEST)
+    .Default((COFF::COMDATType)0);

-    Type = StringSwitch<COFF::COMDATType>(TypeId)
-      .Case("one_only", COFF::IMAGE_COMDAT_SELECT_NODUPLICATES)
-      .Case("discard", COFF::IMAGE_COMDAT_SELECT_ANY)
-      .Case("same_size", COFF::IMAGE_COMDAT_SELECT_SAME_SIZE)
-      .Case("same_contents", COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH)
-      .Case("associative", COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
-      .Case("largest", COFF::IMAGE_COMDAT_SELECT_LARGEST)
-      .Case("newest", COFF::IMAGE_COMDAT_SELECT_NEWEST)
-      .Default((COFF::COMDATType)0);
+  if (Type == 0)
+    return TokError(Twine("unrecognized COMDAT type '" + TypeId + "'"));

-    if (Type == 0)
-      return TokError(Twine("unrecognized COMDAT type '" + TypeId + "'"));
+  Lex();

-    Lex();
-  }
-  
-  const MCSectionCOFF *Current = static_cast<const MCSectionCOFF*>(
-                                       getStreamer().getCurrentSection().first);
-
-  const MCSectionCOFF *Assoc = 0;
  if (Type == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
-    StringRef AssocName;
    SMLoc Loc = getTok().getLoc();
+    StringRef AssocName;
    if (ParseSectionName(AssocName))
      return TokError("expected associated section name");

@ -447,14 +474,33 @@ bool COFFAsmParser::ParseDirectiveLinkOnce(StringRef, SMLoc Loc) {
                                        getContext().getCOFFSection(AssocName));
    if (!Assoc)
      return Error(Loc, "cannot associate unknown section '" + AssocName + "'");
-    if (Assoc == Current)
-      return Error(Loc, "cannot associate a section with itself");
    if (!(Assoc->getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT))
      return Error(Loc, "associated section must be a COMDAT section");
    if (Assoc->getSelection() == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
      return Error(Loc, "associated section cannot be itself associative");
  }

+  return false;
+}
+
+/// ParseDirectiveLinkOnce
+///  ::= .linkonce [ identifier [ identifier ] ]
+bool COFFAsmParser::ParseDirectiveLinkOnce(StringRef, SMLoc Loc) {
+  COFF::COMDATType Type = COFF::IMAGE_COMDAT_SELECT_ANY;
+  const MCSectionCOFF *Assoc = 0;
+  if (getLexer().is(AsmToken::Identifier))
+    if (parseCOMDATTypeAndAssoc(Type, Assoc))
+      return true;
+
+  const MCSectionCOFF *Current = static_cast<const MCSectionCOFF*>(
+                                       getStreamer().getCurrentSection().first);
+
+
+  if (Type == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
+    if (Assoc == Current)
+      return Error(Loc, "cannot associate a section with itself");
+  }
+
  if (Current->getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT)
    return Error(Loc, Twine("section '") + Current->getSectionName() +
                                                       "' is already linkonce");
--- a/external/bsd/llvm/dist/llvm/lib/MC/WinCOFFStreamer.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/MC/WinCOFFStreamer.cpp
@ -151,7 +151,8 @@ void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
  int Selection = COFF::IMAGE_COMDAT_SELECT_LARGEST;

  const MCSection *Section = MCStreamer::getContext().getCOFFSection(
-    SectionName, Characteristics, SectionKind::getBSS(), Selection);
+      SectionName, Characteristics, SectionKind::getBSS(), Symbol->getName(),
+      Selection);

  MCSectionData &SectionData = getAssembler().getOrCreateSectionData(*Section);

@ -321,6 +322,7 @@ void WinCOFFStreamer::EmitWin64EHHandlerData() {
 }

 void WinCOFFStreamer::FinishImpl() {
+  EmitFrames(NULL, true);
  EmitW64Tables();
  MCObjectStreamer::FinishImpl();
 }
--- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -4231,6 +4231,23 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
        return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT,
                           V1.getOperand(Lane));
    }
+
+    // Test if V1 is a EXTRACT_SUBVECTOR.
+    if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+      int ExtLane = cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue();
+      return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1.getOperand(0),
+                         DAG.getConstant(Lane + ExtLane, MVT::i64));
+    }
+    // Test if V1 is a CONCAT_VECTORS.
+    if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
+        V1.getOperand(1).getOpcode() == ISD::UNDEF) {
+      SDValue Op0 = V1.getOperand(0);
+      assert((unsigned)Lane < Op0.getValueType().getVectorNumElements() &&
+             "Invalid vector lane access");
+      return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, Op0,
+                         DAG.getConstant(Lane, MVT::i64));
+    }
+
    return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1,
                       DAG.getConstant(Lane, MVT::i64));
  }
--- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64InstrNEON.td
--- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@ -238,6 +238,10 @@ static DecodeStatus DecodeVLDSTLanePostInstruction(MCInst &Inst, unsigned Insn,
                                                   uint64_t Address,
                                                   const void *Decoder);

+static DecodeStatus DecodeSHLLInstruction(MCInst &Inst, unsigned Insn,
+                                          uint64_t Address,
+                                          const void *Decoder);
+
 static bool Check(DecodeStatus &Out, DecodeStatus In);

 #include "AArch64GenDisassemblerTables.inc"
@ -1342,13 +1346,13 @@ static DecodeStatus DecodeVLDSTLanePostInstruction(MCInst &Inst, unsigned Insn,
  case AArch64::LD4LN_WB_D_fixed: case AArch64::LD4LN_WB_D_register: {
    switch (Opc) {
    case AArch64::LD4LN_WB_B_fixed: case AArch64::LD4LN_WB_B_register:
-      TransferBytes = 3; break;
+      TransferBytes = 4; break;
    case AArch64::LD4LN_WB_H_fixed: case AArch64::LD4LN_WB_H_register:
-      TransferBytes = 6; break;
+      TransferBytes = 8; break;
    case AArch64::LD4LN_WB_S_fixed: case AArch64::LD4LN_WB_S_register:
-      TransferBytes = 12; break;
+      TransferBytes = 16; break;
    case AArch64::LD4LN_WB_D_fixed: case AArch64::LD4LN_WB_D_register:
-      TransferBytes = 24; break;
+      TransferBytes = 32; break;
    }
    IsLoad = true;
    NumVecs = 4;
@ -1534,3 +1538,35 @@ static DecodeStatus DecodeVLDSTLanePostInstruction(MCInst &Inst, unsigned Insn,

  return MCDisassembler::Success;
 }
+
+static DecodeStatus DecodeSHLLInstruction(MCInst &Inst, unsigned Insn,
+                                          uint64_t Address,
+                                          const void *Decoder) {
+  unsigned Rd = fieldFromInstruction(Insn, 0, 5);
+  unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+  unsigned size = fieldFromInstruction(Insn, 22, 2);
+  unsigned Q = fieldFromInstruction(Insn, 30, 1);
+
+  DecodeFPR128RegisterClass(Inst, Rd, Address, Decoder);
+
+  if(Q)
+    DecodeFPR128RegisterClass(Inst, Rn, Address, Decoder);
+  else
+    DecodeFPR64RegisterClass(Inst, Rn, Address, Decoder);
+
+  switch (size) {
+  case 0:
+    Inst.addOperand(MCOperand::CreateImm(8));
+    break;
+  case 1:
+    Inst.addOperand(MCOperand::CreateImm(16));
+    break;
+  case 2:
+    Inst.addOperand(MCOperand::CreateImm(32));
+    break;
+  default :
+    return MCDisassembler::Fail;
+  }
+  return MCDisassembler::Success;
+}
+
--- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@ -1913,28 +1913,40 @@ bool llvm::tryFoldSPUpdateIntoPushPop(MachineFunction &MF,

  MachineBasicBlock *MBB = MI->getParent();
  const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
+  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);

  // Now try to find enough space in the reglist to allocate NumBytes.
  for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded;
-       --CurReg, --RegsNeeded) {
+       --CurReg) {
    if (!IsPop) {
      // Pushing any register is completely harmless, mark the
      // register involved as undef since we don't care about it in
      // the slightest.
      RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
                                                  false, false, true));
+      --RegsNeeded;
      continue;
    }

-    // However, we can only pop an extra register if it's not live. Otherwise we
-    // might clobber a return value register. We assume that once we find a live
-    // return register all lower ones will be too so there's no use proceeding.
-    if (MBB->computeRegisterLiveness(TRI, CurReg, MI) !=
-        MachineBasicBlock::LQR_Dead)
-      return false;
+    // However, we can only pop an extra register if it's not live. For
+    // registers live within the function we might clobber a return value
+    // register; the other way a register can be live here is if it's
+    // callee-saved.
+    if (isCalleeSavedRegister(CurReg, CSRegs) ||
+        MBB->computeRegisterLiveness(TRI, CurReg, MI) !=
+            MachineBasicBlock::LQR_Dead) {
+      // VFP pops don't allow holes in the register list, so any skip is fatal
+      // for our transformation. GPR pops do, so we should just keep looking.
+      if (IsVFPPushPop)
+        return false;
+      else
+        continue;
+    }

    // Mark the unimportant registers as <def,dead> in the POP.
-    RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, true));
+    RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
+                                                true));
+    --RegsNeeded;
  }

  if (RegsNeeded > 0)
--- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@ -72,6 +72,14 @@ static inline bool isARMArea3Register(unsigned Reg, bool isIOS) {
  }
 }

+static inline bool isCalleeSavedRegister(unsigned Reg,
+                                         const MCPhysReg *CSRegs) {
+  for (unsigned i = 0; CSRegs[i]; ++i)
+    if (Reg == CSRegs[i])
+      return true;
+  return false;
+}
+
 class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
 protected:
  const ARMSubtarget &STI;
--- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@ -82,13 +82,6 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
  return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
 }

-static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
-  for (unsigned i = 0; CSRegs[i]; ++i)
-    if (Reg == CSRegs[i])
-      return true;
-  return false;
-}
-
 static bool isCSRestore(MachineInstr *MI,
                        const ARMBaseInstrInfo &TII,
                        const uint16_t *CSRegs) {
--- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@ -215,13 +215,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
    AFI->setShouldRestoreSPFromFP(true);
 }

-static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
-  for (unsigned i = 0; CSRegs[i]; ++i)
-    if (Reg == CSRegs[i])
-      return true;
-  return false;
-}
-
 static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) {
  if (MI->getOpcode() == ARM::tLDRspi &&
      MI->getOperand(1).isFI() &&
--- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsLongBranch.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsLongBranch.cpp
@ -437,8 +437,10 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
      if (!I->Br || I->HasLongBranch)
        continue;

+      int ShVal = TM.getSubtarget<MipsSubtarget>().inMicroMipsMode() ? 2 : 4;
+
      // Check if offset fits into 16-bit immediate field of branches.
-      if (!ForceLongBranch && isInt<16>(computeOffset(I->Br) / 4))
+      if (!ForceLongBranch && isInt<16>(computeOffset(I->Br) / ShVal))
        continue;

      I->HasLongBranch = true;
--- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
@ -62,6 +62,24 @@ MipsSERegisterInfo::intRegClass(unsigned Size) const {
  return &Mips::GPR64RegClass;
 }

+/// Determine whether a given opcode is an MSA load/store (supporting 10-bit
+/// offsets) or a non-MSA load/store (supporting 16-bit offsets).
+static inline bool isMSALoadOrStore(const unsigned Opcode) {
+  switch (Opcode) {
+  case Mips::LD_B:
+  case Mips::LD_H:
+  case Mips::LD_W:
+  case Mips::LD_D:
+  case Mips::ST_B:
+  case Mips::ST_H:
+  case Mips::ST_W:
+  case Mips::ST_D:
+    return true;
+  default:
+    return false;
+  }
+}
+
 void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
                                     unsigned OpNo, int FrameIndex,
                                     uint64_t StackSize,
@ -111,23 +129,49 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,

  DEBUG(errs() << "Offset     : " << Offset << "\n" << "<--------->\n");

-  // If MI is not a debug value, make sure Offset fits in the 16-bit immediate
-  // field.
-  if (!MI.isDebugValue() && !isInt<16>(Offset)) {
-    MachineBasicBlock &MBB = *MI.getParent();
-    DebugLoc DL = II->getDebugLoc();
-    unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
-    unsigned NewImm;
-    const MipsSEInstrInfo &TII =
-      *static_cast<const MipsSEInstrInfo*>(
-        MBB.getParent()->getTarget().getInstrInfo());
-    unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL, &NewImm);
-    BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(FrameReg)
-      .addReg(Reg, RegState::Kill);
+  if (!MI.isDebugValue()) {
+    // Make sure Offset fits within the field available.
+    // For MSA instructions, this is a 10-bit signed immediate, otherwise it is
+    // a 16-bit signed immediate.
+    unsigned OffsetBitSize = isMSALoadOrStore(MI.getOpcode()) ? 10 : 16;

-    FrameReg = Reg;
-    Offset = SignExtend64<16>(NewImm);
-    IsKill = true;
+    if (OffsetBitSize == 10 && !isInt<10>(Offset) && isInt<16>(Offset)) {
+      // If we have an offset that needs to fit into a signed 10-bit immediate
+      // and doesn't, but does fit into 16-bits then use an ADDiu
+      MachineBasicBlock &MBB = *MI.getParent();
+      DebugLoc DL = II->getDebugLoc();
+      unsigned ADDiu = Subtarget.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
+      const TargetRegisterClass *RC =
+          Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+      MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
+      unsigned Reg = RegInfo.createVirtualRegister(RC);
+      const MipsSEInstrInfo &TII =
+          *static_cast<const MipsSEInstrInfo *>(
+               MBB.getParent()->getTarget().getInstrInfo());
+      BuildMI(MBB, II, DL, TII.get(ADDiu), Reg).addReg(FrameReg).addImm(Offset);
+
+      FrameReg = Reg;
+      Offset = 0;
+      IsKill = true;
+    } else if (!isInt<16>(Offset)) {
+      // Otherwise split the offset into 16-bit pieces and add it in multiple
+      // instructions.
+      MachineBasicBlock &MBB = *MI.getParent();
+      DebugLoc DL = II->getDebugLoc();
+      unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+      unsigned NewImm = 0;
+      const MipsSEInstrInfo &TII =
+          *static_cast<const MipsSEInstrInfo *>(
+               MBB.getParent()->getTarget().getInstrInfo());
+      unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL,
+                                       OffsetBitSize == 16 ? &NewImm : NULL);
+      BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(FrameReg)
+        .addReg(Reg, RegState::Kill);
+
+      FrameReg = Reg;
+      Offset = SignExtend64<16>(NewImm);
+      IsKill = true;
+    }
  }

  MI.getOperand(OpNo).ChangeToRegister(FrameReg, false, false, IsKill);
--- a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
@ -58,6 +58,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
  setOperationAction(ISD::FABS,   MVT::f32, Legal);
  setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
  setOperationAction(ISD::FRINT,  MVT::f32, Legal);
+  setOperationAction(ISD::FROUND, MVT::f32, Legal);

  // The hardware supports ROTR, but not ROTL
  setOperationAction(ISD::ROTL, MVT::i32, Expand);
@ -178,6 +179,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :

  for (unsigned int x = 0; x < NumFloatTypes; ++x) {
    MVT::SimpleValueType VT = FloatTypes[x];
+    setOperationAction(ISD::FABS, VT, Expand);
    setOperationAction(ISD::FADD, VT, Expand);
    setOperationAction(ISD::FDIV, VT, Expand);
    setOperationAction(ISD::FFLOOR, VT, Expand);
--- a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUInstrInfo.td
@ -83,3 +83,6 @@ def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
 def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR",
                        SDTypeProfile<0, 2, []>,
                        [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def AMDGPUround : SDNode<"ISD::FROUND",
+                         SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>;
--- a/external/bsd/llvm/dist/llvm/lib/Target/R600/R600Instructions.td
+++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/R600Instructions.td
@ -1110,6 +1110,10 @@ class COS_Common <bits<11> inst> : R600_1OP <
  let Itinerary = TransALU;
 }

+def CLAMP_R600 :  CLAMP <R600_Reg32>;
+def FABS_R600 : FABS<R600_Reg32>;
+def FNEG_R600 : FNEG<R600_Reg32>;
+
 //===----------------------------------------------------------------------===//
 // Helper patterns for complex intrinsics
 //===----------------------------------------------------------------------===//
@ -1132,6 +1136,13 @@ class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ie
  (exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x))
 >;

+// FROUND pattern
+class FROUNDPat<Instruction CNDGE> : Pat <
+  (AMDGPUround f32:$x),
+  (CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x))
+>;
+
+
 //===----------------------------------------------------------------------===//
 // R600 / R700 Instructions
 //===----------------------------------------------------------------------===//
@ -1173,6 +1184,7 @@ let Predicates = [isR600] in {
  def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;

  def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
+  def : FROUNDPat <CNDGE_r600>;

  def R600_ExportSwz : ExportSwzInst {
    let Word1{20-17} = 0; // BURST_COUNT
@ -1726,6 +1738,8 @@ def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET",
  // SHA-256 Patterns
  def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;

+  def : FROUNDPat <CNDGE_eg>;
+
  def EG_ExportSwz : ExportSwzInst {
    let Word1{19-16} = 0; // BURST_COUNT
    let Word1{20} = 0; // VALID_PIXEL_MODE
@ -2090,10 +2104,6 @@ def TXD_SHADOW: InstR600 <
 } // End isPseudo = 1
 } // End usesCustomInserter = 1

-def CLAMP_R600 :  CLAMP <R600_Reg32>;
-def FABS_R600 : FABS<R600_Reg32>;
-def FNEG_R600 : FNEG<R600_Reg32>;
-
 //===---------------------------------------------------------------------===//
 // Return instruction
 //===---------------------------------------------------------------------===//
--- a/external/bsd/llvm/dist/llvm/lib/Target/R600/SIInstructions.td
+++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/SIInstructions.td
@ -489,14 +489,17 @@ def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "TBUFFER_STORE_F

 let mayLoad = 1 in {

-defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SReg_32>;
+// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
+// SMRD instructions, because the SGPR_32 register class does not include M0
+// and writing to M0 from an SMRD instruction will hang the GPU.
+defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SGPR_32>;
 defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>;
 defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>;
 defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>;
 defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>;

 defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
-  0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32
+  0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SGPR_32
 >;

 defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
--- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/Sparc.h
+++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/Sparc.h
@ -105,5 +105,22 @@ namespace llvm {
    }
    llvm_unreachable("Invalid cond code");
  }
+
+  inline static unsigned HI22(int64_t imm) {
+    return (unsigned)((imm >> 10) & ((1 << 22)-1));
+  }
+
+  inline static unsigned LO10(int64_t imm) {
+    return (unsigned)(imm & 0x3FF);
+  }
+
+  inline static unsigned HIX22(int64_t imm) {
+    return HI22(~imm);
+  }
+
+  inline static unsigned LOX10(int64_t imm) {
+    return ~LO10(~imm);
+  }
+
 }  // end namespace llvm
 #endif
--- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@ -88,7 +88,7 @@ void SparcAsmPrinter::EmitFunctionBodyStart() {
  const unsigned globalRegs[] = { SP::G2, SP::G3, SP::G6, SP::G7, 0 };
  for (unsigned i = 0; globalRegs[i] != 0; ++i) {
    unsigned reg = globalRegs[i];
-    if (!MRI.isPhysRegUsed(reg))
+    if (MRI.use_empty(reg))
      continue;
    EmitGlobalRegisterDecl(reg);
  }
--- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@ -33,6 +33,51 @@ DisableLeafProc("disable-sparc-leaf-proc",
                cl::Hidden);


+void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF,
+                                          MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator MBBI,
+                                          int NumBytes,
+                                          unsigned ADDrr,
+                                          unsigned ADDri) const {
+
+  DebugLoc dl = (MBBI != MBB.end()) ? MBBI->getDebugLoc() : DebugLoc();
+  const SparcInstrInfo &TII =
+    *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+
+  if (NumBytes >= -4096 && NumBytes < 4096) {
+    BuildMI(MBB, MBBI, dl, TII.get(ADDri), SP::O6)
+      .addReg(SP::O6).addImm(NumBytes);
+    return;
+  }
+
+  // Emit this the hard way.  This clobbers G1 which we always know is
+  // available here.
+  if (NumBytes >= 0) {
+    // Emit nonnegative numbers with sethi + or.
+    // sethi %hi(NumBytes), %g1
+    // or %g1, %lo(NumBytes), %g1
+    // add %sp, %g1, %sp
+    BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1)
+      .addImm(HI22(NumBytes));
+    BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
+      .addReg(SP::G1).addImm(LO10(NumBytes));
+    BuildMI(MBB, MBBI, dl, TII.get(ADDrr), SP::O6)
+      .addReg(SP::O6).addReg(SP::G1);
+    return ;
+  }
+
+  // Emit negative numbers with sethi + xor.
+  // sethi %hix(NumBytes), %g1
+  // xor %g1, %lox(NumBytes), %g1
+  // add %sp, %g1, %sp
+  BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1)
+    .addImm(HIX22(NumBytes));
+  BuildMI(MBB, MBBI, dl, TII.get(SP::XORri), SP::G1)
+    .addReg(SP::G1).addImm(LOX10(NumBytes));
+  BuildMI(MBB, MBBI, dl, TII.get(ADDrr), SP::O6)
+    .addReg(SP::O6).addReg(SP::G1);
+}
+
 void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
  SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();

@ -55,21 +100,8 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
    SAVErr = SP::ADDrr;
  }
  NumBytes = - SubTarget.getAdjustedFrameSize(NumBytes);
+  emitSPAdjustment(MF, MBB, MBBI, NumBytes, SAVErr, SAVEri);

-  if (NumBytes >= -4096) {
-    BuildMI(MBB, MBBI, dl, TII.get(SAVEri), SP::O6)
-      .addReg(SP::O6).addImm(NumBytes);
-  } else {
-    // Emit this the hard way.  This clobbers G1 which we always know is
-    // available here.
-    unsigned OffHi = (unsigned)NumBytes >> 10U;
-    BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
-    // Emit G1 = G1 + I6
-    BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
-      .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
-    BuildMI(MBB, MBBI, dl, TII.get(SAVErr), SP::O6)
-      .addReg(SP::O6).addReg(SP::G1);
-  }
  MachineModuleInfo &MMI = MF.getMMI();
  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
  MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
@ -96,15 +128,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator I) const {
  if (!hasReservedCallFrame(MF)) {
    MachineInstr &MI = *I;
-    DebugLoc DL = MI.getDebugLoc();
    int Size = MI.getOperand(0).getImm();
    if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
      Size = -Size;
-    const SparcInstrInfo &TII =
-      *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+
    if (Size)
-      BuildMI(MBB, I, DL, TII.get(SP::ADDri), SP::O6).addReg(SP::O6)
-        .addImm(Size);
+      emitSPAdjustment(MF, MBB, I, Size, SP::ADDrr, SP::ADDri);
  }
  MBB.erase(I);
 }
@ -131,21 +160,7 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
    return;

  NumBytes = SubTarget.getAdjustedFrameSize(NumBytes);
-
-  if (NumBytes < 4096) {
-    BuildMI(MBB, MBBI, dl, TII.get(SP::ADDri), SP::O6)
-      .addReg(SP::O6).addImm(NumBytes);
-  } else {
-    // Emit this the hard way.  This clobbers G1 which we always know is
-    // available here.
-    unsigned OffHi = (unsigned)NumBytes >> 10U;
-    BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
-    // Emit G1 = G1 + I6
-    BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
-      .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
-    BuildMI(MBB, MBBI, dl, TII.get(SP::ADDrr), SP::O6)
-      .addReg(SP::O6).addReg(SP::G1);
-  }
+  emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri);
 }

 bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
--- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcFrameLowering.h
+++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcFrameLowering.h
@ -49,6 +49,14 @@ private:

  // Returns true if MF is a leaf procedure.
  bool isLeafProc(MachineFunction &MF) const;
+
+
+  // Emits code for adjusting SP in function prologue/epilogue.
+  void emitSPAdjustment(MachineFunction &MF,
+                        MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator MBBI,
+                        int NumBytes, unsigned ADDrr, unsigned ADDri) const;
+
 };

 } // End llvm namespace
--- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@ -1411,6 +1411,7 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
    setOperationAction(ISD::BSWAP, MVT::i64, Expand);
    setOperationAction(ISD::ROTL , MVT::i64, Expand);
    setOperationAction(ISD::ROTR , MVT::i64, Expand);
+    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
  }

  // FIXME: There are instructions available for ATOMIC_FENCE
@ -2289,20 +2290,23 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
                     std::min(PtrVT.getSizeInBits(), VT.getSizeInBits())/8);
 }

-static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
+                                       bool is64Bit) {
  SDValue Chain = Op.getOperand(0);  // Legalize the chain.
  SDValue Size  = Op.getOperand(1);  // Legalize the size.
+  EVT VT = Size->getValueType(0);
  SDLoc dl(Op);

  unsigned SPReg = SP::O6;
-  SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32);
-  SDValue NewSP = DAG.getNode(ISD::SUB, dl, MVT::i32, SP, Size); // Value
+  SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+  SDValue NewSP = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
  Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP);    // Output chain

  // The resultant pointer is actually 16 words from the bottom of the stack,
  // to provide a register spill area.
-  SDValue NewVal = DAG.getNode(ISD::ADD, dl, MVT::i32, NewSP,
-                                 DAG.getConstant(96, MVT::i32));
+  unsigned regSpillArea = (is64Bit) ? 128 : 96;
+  SDValue NewVal = DAG.getNode(ISD::ADD, dl, VT, NewSP,
+                               DAG.getConstant(regSpillArea, VT));
  SDValue Ops[2] = { NewVal, Chain };
  return DAG.getMergeValues(Ops, 2, dl);
 }
@ -2626,7 +2630,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
                                                      hasHardQuad);
  case ISD::VASTART:            return LowerVASTART(Op, DAG, *this);
  case ISD::VAARG:              return LowerVAARG(Op, DAG);
-  case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+  case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG,
+                                                               is64Bit);

  case ISD::LOAD:               return LowerF128Load(Op, DAG);
  case ISD::STORE:              return LowerF128Store(Op, DAG);
--- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcInstr64Bit.td
@ -171,6 +171,12 @@ def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (CMPri $a, (as_i32imm $b))>;

 def : Pat<(ctpop i64:$src), (POPCrr $src)>;

+// "LEA" form of add
+def LEAX_ADDri : F3_2<2, 0b000000,
+                     (outs I64Regs:$dst), (ins MEMri:$addr),
+                     "add ${addr:arith}, $dst",
+                     [(set iPTR:$dst, ADDRri:$addr)]>;
+
 } // Predicates = [Is64Bit]


--- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcInstrInfo.td
@ -502,10 +502,11 @@ defm SRA : F3_12<"sra", 0b100111, sra>;
 defm ADD   : F3_12<"add", 0b000000, add>;

 // "LEA" forms of add (patterns to make tblgen happy)
-def LEA_ADDri   : F3_2<2, 0b000000,
-                   (outs IntRegs:$dst), (ins MEMri:$addr),
-                   "add ${addr:arith}, $dst",
-                   [(set iPTR:$dst, ADDRri:$addr)]>;
+let Predicates = [Is32Bit] in
+  def LEA_ADDri   : F3_2<2, 0b000000,
+                     (outs IntRegs:$dst), (ins MEMri:$addr),
+                     "add ${addr:arith}, $dst",
+                     [(set iPTR:$dst, ADDRri:$addr)]>;

 let Defs = [ICC] in
  defm ADDCC  : F3_12<"addcc", 0b010000, addc>;
--- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@ -105,19 +105,46 @@ static void replaceFI(MachineFunction &MF,
    // encode it.
    MI.getOperand(FIOperandNum).ChangeToRegister(FramePtr, false);
    MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
-  } else {
-    // Otherwise, emit a G1 = SETHI %hi(offset).  FIXME: it would be better to
-    // scavenge a register here instead of reserving G1 all of the time.
-    const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
-    unsigned OffHi = (unsigned)Offset >> 10U;
-    BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+    return;
+  }
+
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  // FIXME: it would be better to scavenge a register here instead of
+  // reserving G1 all of the time.
+  if (Offset >= 0) {
+    // Emit nonnegaive immediates with sethi + or.
+    // sethi %hi(Offset), %g1
+    // add %g1, %fp, %g1
+    // Insert G1+%lo(offset) into the user.
+    BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1)
+      .addImm(HI22(Offset));
+
+
    // Emit G1 = G1 + I6
    BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
      .addReg(FramePtr);
    // Insert: G1+%lo(offset) into the user.
    MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false);
-    MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset & ((1 << 10)-1));
+    MI.getOperand(FIOperandNum + 1).ChangeToImmediate(LO10(Offset));
+    return;
  }
+
+  // Emit Negative numbers with sethi + xor
+  // sethi %hix(Offset), %g1
+  // xor  %g1, %lox(offset), %g1
+  // add %g1, %fp, %g1
+  // Insert: G1 + 0 into the user.
+  BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1)
+    .addImm(HIX22(Offset));
+  BuildMI(*MI.getParent(), II, dl, TII.get(SP::XORri), SP::G1)
+    .addReg(SP::G1).addImm(LOX10(Offset));
+
+  BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
+    .addReg(FramePtr);
+  // Insert: G1+%lo(offset) into the user.
+  MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false);
+  MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
 }


--- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@ -1933,10 +1933,10 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
  // high 32 bits and just masks out low bits.  We can skip it if so.
  if (HighOp.getOpcode() == ISD::AND &&
      HighOp.getOperand(1).getOpcode() == ISD::Constant) {
-    ConstantSDNode *MaskNode = cast<ConstantSDNode>(HighOp.getOperand(1));
-    uint64_t Mask = MaskNode->getZExtValue() | Masks[High];
-    if ((Mask >> 32) == 0xffffffff)
-      HighOp = HighOp.getOperand(0);
+    SDValue HighOp0 = HighOp.getOperand(0);
+    uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
+    if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
+      HighOp = HighOp0;
  }

  // Take advantage of the fact that all GR32 operations only change the
--- a/external/bsd/llvm/dist/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@ -495,17 +495,17 @@ private:
  X86Operand *ParseATTOperand();
  X86Operand *ParseIntelOperand();
  X86Operand *ParseIntelOffsetOfOperator();
-  X86Operand *ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
+  bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
  X86Operand *ParseIntelOperator(unsigned OpKind);
  X86Operand *ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
  X86Operand *ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc,
                                   unsigned Size);
-  X86Operand *ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
+  bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
  X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
                                       int64_t ImmDisp, unsigned Size);
-  X86Operand *ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
-                                   InlineAsmIdentifierInfo &Info,
-                                   bool IsUnevaluatedOperand, SMLoc &End);
+  bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
+                            InlineAsmIdentifierInfo &Info,
+                            bool IsUnevaluatedOperand, SMLoc &End);

  X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);

@ -1269,8 +1269,7 @@ RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
  }
 }

-X86Operand *
-X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
+bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
  const AsmToken &Tok = Parser.getTok();

  bool Done = false;
@ -1292,7 +1291,7 @@ X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
        Done = true;
        break;
      }
-      return ErrorOperand(Tok.getLoc(), "Unexpected token!");
+      return Error(Tok.getLoc(), "unknown token in expression");
    }
    case AsmToken::EndOfStatement: {
      Done = true;
@ -1311,18 +1310,18 @@ X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
      } else {
        if (!isParsingInlineAsm()) {
          if (getParser().parsePrimaryExpr(Val, End))
-            return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
+            return Error(Tok.getLoc(), "Unexpected identifier!");
        } else {
          InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
-          if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
-                                                     /*Unevaluated*/ false, End))
-            return Err;
+          if (ParseIntelIdentifier(Val, Identifier, Info,
+                                   /*Unevaluated=*/false, End))
+            return true;
        }
        SM.onIdentifierExpr(Val, Identifier);
        UpdateLocLex = false;
        break;
      }
-      return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
+      return Error(Tok.getLoc(), "Unexpected identifier!");
    }
    case AsmToken::Integer:
      if (isParsingInlineAsm() && SM.getAddImmPrefix())
@ -1340,14 +1339,14 @@ X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
    case AsmToken::RParen:  SM.onRParen(); break;
    }
    if (SM.hadError())
-      return ErrorOperand(Tok.getLoc(), "Unexpected token!");
+      return Error(Tok.getLoc(), "unknown token in expression");

    if (!Done && UpdateLocLex) {
      End = Tok.getLoc();
      Parser.Lex(); // Consume the token.
    }
  }
-  return 0;
+  return false;
 }

 X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
@ -1364,8 +1363,8 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
  // may have already parsed an immediate displacement before the bracketed
  // expression.
  IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
-  if (X86Operand *Err = ParseIntelExpression(SM, End))
-    return Err;
+  if (ParseIntelExpression(SM, End))
+    return 0;

  const MCExpr *Disp;
  if (const MCExpr *Sym = SM.getSym()) {
@ -1383,8 +1382,8 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
  // Parse the dot operator (e.g., [ebx].foo.bar).
  if (Tok.getString().startswith(".")) {
    const MCExpr *NewDisp;
-    if (X86Operand *Err = ParseIntelDotOperator(Disp, NewDisp))
-      return Err;
+    if (ParseIntelDotOperator(Disp, NewDisp))
+      return 0;
    
    End = Tok.getEndLoc();
    Parser.Lex();  // Eat the field.
@ -1412,11 +1411,10 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
 }

 // Inline assembly may use variable names with namespace alias qualifiers.
-X86Operand *X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
-                                               StringRef &Identifier,
-                                               InlineAsmIdentifierInfo &Info,
-                                               bool IsUnevaluatedOperand,
-                                               SMLoc &End) {
+bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
+                                        StringRef &Identifier,
+                                        InlineAsmIdentifierInfo &Info,
+                                        bool IsUnevaluatedOperand, SMLoc &End) {
  assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
  Val = 0;

@ -1441,7 +1439,7 @@ X86Operand *X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
  MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
  Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
-  return 0;
+  return false;
 }

 /// \brief Parse intel style segment override.
@ -1481,16 +1479,16 @@ X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg,
  SMLoc End;
  if (!isParsingInlineAsm()) {
    if (getParser().parsePrimaryExpr(Val, End))
-      return ErrorOperand(Tok.getLoc(), "Unexpected token!");
+      return ErrorOperand(Tok.getLoc(), "unknown token in expression");

    return X86Operand::CreateMem(Val, Start, End, Size);
  }

  InlineAsmIdentifierInfo Info;
  StringRef Identifier = Tok.getString();
-  if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
-                                             /*Unevaluated*/ false, End))
-    return Err;
+  if (ParseIntelIdentifier(Val, Identifier, Info,
+                           /*Unevaluated=*/false, End))
+    return 0;
  return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
                               /*Scale=*/1, Start, End, Size, Identifier, Info);
 }
@ -1508,22 +1506,22 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start,
  const MCExpr *Val;
  if (!isParsingInlineAsm()) {
    if (getParser().parsePrimaryExpr(Val, End))
-      return ErrorOperand(Tok.getLoc(), "Unexpected token!");
+      return ErrorOperand(Tok.getLoc(), "unknown token in expression");

    return X86Operand::CreateMem(Val, Start, End, Size);
  }

  InlineAsmIdentifierInfo Info;
  StringRef Identifier = Tok.getString();
-  if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
-                                             /*Unevaluated*/ false, End))
-    return Err;
+  if (ParseIntelIdentifier(Val, Identifier, Info,
+                           /*Unevaluated=*/false, End))
+    return 0;
  return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
                               /*Scale=*/1, Start, End, Size, Identifier, Info);
 }

 /// Parse the '.' operator.
-X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
+bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
                                                const MCExpr *&NewDisp) {
  const AsmToken &Tok = Parser.getTok();
  int64_t OrigDispVal, DotDispVal;
@ -1532,7 +1530,7 @@ X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
  if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
    OrigDispVal = OrigDisp->getValue();
  else
-    return ErrorOperand(Tok.getLoc(), "Non-constant offsets are not supported!");
+    return Error(Tok.getLoc(), "Non-constant offsets are not supported!");

  // Drop the '.'.
  StringRef DotDispStr = Tok.getString().drop_front(1);
@ -1547,10 +1545,10 @@ X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
    std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
    if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
                                           DotDisp))
-      return ErrorOperand(Tok.getLoc(), "Unable to lookup field reference!");
+      return Error(Tok.getLoc(), "Unable to lookup field reference!");
    DotDispVal = DotDisp;
  } else
-    return ErrorOperand(Tok.getLoc(), "Unexpected token type!");
+    return Error(Tok.getLoc(), "Unexpected token type!");

  if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
    SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
@ -1561,7 +1559,7 @@ X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
  }

  NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
-  return 0;
+  return false;
 }

 /// Parse the 'offset' operator.  This operator is used to specify the
@ -1575,9 +1573,9 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
  InlineAsmIdentifierInfo Info;
  SMLoc Start = Tok.getLoc(), End;
  StringRef Identifier = Tok.getString();
-  if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
-                                             /*Unevaluated*/ false, End))
-    return Err;
+  if (ParseIntelIdentifier(Val, Identifier, Info,
+                           /*Unevaluated=*/false, End))
+    return 0;

  // Don't emit the offset operator.
  InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
@ -1611,9 +1609,12 @@ X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
  InlineAsmIdentifierInfo Info;
  SMLoc Start = Tok.getLoc(), End;
  StringRef Identifier = Tok.getString();
-  if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
-                                             /*Unevaluated*/ true, End))
-    return Err;
+  if (ParseIntelIdentifier(Val, Identifier, Info,
+                           /*Unevaluated=*/true, End))
+    return 0;
+
+  if (!Info.OpDecl)
+    return ErrorOperand(Start, "unable to lookup expression");

  unsigned CVal = 0;
  switch(OpKind) {
@ -1664,8 +1665,8 @@ X86Operand *X86AsmParser::ParseIntelOperand() {
    AsmToken StartTok = Tok;
    IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
                             /*AddImmPrefix=*/false);
-    if (X86Operand *Err = ParseIntelExpression(SM, End))
-      return Err;
+    if (ParseIntelExpression(SM, End))
+      return 0;

    int64_t Imm = SM.getImm();
    if (isParsingInlineAsm()) {
--- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -13120,19 +13120,27 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
      // fall through
    case MVT::v4i32:
    case MVT::v8i16: {
-      // (sext (vzext x)) -> (vsext x)
      SDValue Op0 = Op.getOperand(0);
      SDValue Op00 = Op0.getOperand(0);
      SDValue Tmp1;
      // Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
      if (Op0.getOpcode() == ISD::BITCAST &&
-          Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
+          Op00.getOpcode() == ISD::VECTOR_SHUFFLE) {
+        // (sext (vzext x)) -> (vsext x)
        Tmp1 = LowerVectorIntExtend(Op00, Subtarget, DAG);
-      if (Tmp1.getNode()) {
-        SDValue Tmp1Op0 = Tmp1.getOperand(0);
-        assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
-               "This optimization is invalid without a VZEXT.");
-        return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+        if (Tmp1.getNode()) {
+          EVT ExtraEltVT = ExtraVT.getVectorElementType();
+          // This folding is only valid when the in-reg type is a vector of i8,
+          // i16, or i32.
+          if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 ||
+              ExtraEltVT == MVT::i32) {
+            SDValue Tmp1Op0 = Tmp1.getOperand(0);
+            assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
+                   "This optimization is invalid without a VZEXT.");
+            return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+          }
+          Op0 = Tmp1;
+        }
      }

      // If the above didn't work, then just use Shift-Left + Shift-Right.
--- a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@ -210,19 +210,20 @@ private:
 // Any two pointers in the same address space are equivalent, intptr_t and
 // pointers are equivalent. Otherwise, standard type equivalence rules apply.
 bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
+
+  PointerType *PTy1 = dyn_cast<PointerType>(Ty1);
+  PointerType *PTy2 = dyn_cast<PointerType>(Ty2);
+
+  if (TD) {
+    if (PTy1 && PTy1->getAddressSpace() == 0) Ty1 = TD->getIntPtrType(Ty1);
+    if (PTy2 && PTy2->getAddressSpace() == 0) Ty2 = TD->getIntPtrType(Ty2);
+  }
+
  if (Ty1 == Ty2)
    return true;
-  if (Ty1->getTypeID() != Ty2->getTypeID()) {
-    if (TD) {

-      if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ty1))
-        return true;
-
-      if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ty2))
-        return true;
-    }
+  if (Ty1->getTypeID() != Ty2->getTypeID())
    return false;
-  }

  switch (Ty1->getTypeID()) {
  default:
@ -244,8 +245,7 @@ bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
    return true;

  case Type::PointerTyID: {
-    PointerType *PTy1 = cast<PointerType>(Ty1);
-    PointerType *PTy2 = cast<PointerType>(Ty2);
+    assert(PTy1 && PTy2 && "Both types must be pointers here.");
    return PTy1->getAddressSpace() == PTy2->getAddressSpace();
  }

--- a/external/bsd/llvm/dist/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@ -1198,11 +1198,16 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
      Type *AndTy = AndCST->getType();          // Type of the and.

      // We can fold this as long as we can't shift unknown bits
-      // into the mask.  This can only happen with signed shift
-      // rights, as they sign-extend.
+      // into the mask. This can happen with signed shift
+      // rights, as they sign-extend. With logical shifts,
+      // we must still make sure the comparison is not signed
+      // because we are effectively changing the
+      // position of the sign bit (PR17827).
+      // TODO: We can relax these constraints a bit more.
      if (ShAmt) {
-        bool CanFold = Shift->isLogicalShift();
-        if (!CanFold) {
+        bool CanFold = false;
+        unsigned ShiftOpcode = Shift->getOpcode();
+        if (ShiftOpcode == Instruction::AShr) {
          // To test for the bad case of the signed shr, see if any
          // of the bits shifted in could be tested after the mask.
          uint32_t TyBits = Ty->getPrimitiveSizeInBits();
@ -1212,6 +1217,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
          if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) &
               AndCST->getValue()) == 0)
            CanFold = true;
+        } else if (ShiftOpcode == Instruction::Shl ||
+                   ShiftOpcode == Instruction::LShr) {
+          CanFold = !ICI.isSigned();
        }

        if (CanFold) {
--- a/external/bsd/llvm/dist/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@ -432,7 +432,7 @@ namespace {
    bool Partial;

    /// The current position in the sequence.
-    Sequence Seq : 8;
+    unsigned char Seq : 8;

    /// Unidirectional information about the current sequence.
    RRInfo RRI;
@ -498,7 +498,7 @@ namespace {
    }

    Sequence GetSeq() const {
-      return Seq;
+      return static_cast<Sequence>(Seq);
    }

    void ClearSequenceProgress() {
@ -538,7 +538,8 @@ namespace {

 void
 PtrState::Merge(const PtrState &Other, bool TopDown) {
-  Seq = MergeSeqs(Seq, Other.Seq, TopDown);
+  Seq = MergeSeqs(static_cast<Sequence>(Seq), static_cast<Sequence>(Other.Seq),
+                  TopDown);
  KnownPositiveRefCount &= Other.KnownPositiveRefCount;

  // If we're not in a sequence (anymore), drop all associated state.
--- a/external/bsd/llvm/dist/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@ -1537,6 +1537,15 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
  const SCEV *ExitCount = SE->getBackedgeTakenCount(OrigLoop);
  assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");

+  // The exit count might have the type of i64 while the phi is i32. This can
+  // happen if we have an induction variable that is sign extended before the
+  // compare. The only way that we get a backedge taken count is that the
+  // induction variable was signed and as such will not overflow. In such a case
+  // truncation is legal.
+  if (ExitCount->getType()->getPrimitiveSizeInBits() >
+      IdxTy->getPrimitiveSizeInBits())
+    ExitCount = SE->getTruncateOrNoop(ExitCount, IdxTy);
+
  ExitCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy);
  // Get the total trip count from the count by adding 1.
  ExitCount = SE->getAddExpr(ExitCount,
--- a/external/bsd/llvm/dist/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@ -520,6 +520,8 @@ private:

  /// Holds all of the instructions that we gathered.
  SetVector<Instruction *> GatherSeq;
+  /// A list of blocks that we are going to CSE.
+  SmallSet<BasicBlock *, 8> CSEBlocks;

  /// Numbers instructions in different blocks.
  DenseMap<BasicBlock *, BlockNumbering> BlocksNumbers;
@ -562,10 +564,8 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
           UE = Scalar->use_end(); User != UE; ++User) {
        DEBUG(dbgs() << "SLP: Checking user:" << **User << ".\n");

-        bool Gathered = MustGather.count(*User);
-
        // Skip in-tree scalars that become vectors.
-        if (ScalarToTreeEntry.count(*User) && !Gathered) {
+        if (ScalarToTreeEntry.count(*User)) {
          DEBUG(dbgs() << "SLP: \tInternal user will be removed:" <<
                **User << ".\n");
          int Idx = ScalarToTreeEntry[*User]; (void) Idx;
@ -1274,6 +1274,7 @@ Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
    Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
    if (Instruction *Insrt = dyn_cast<Instruction>(Vec)) {
      GatherSeq.insert(Insrt);
+      CSEBlocks.insert(Insrt->getParent());

      // Add to our 'need-to-extract' list.
      if (ScalarToTreeEntry.count(VL[i])) {
@ -1588,6 +1589,7 @@ Value *BoUpSLP::vectorizeTree() {
    if (PHINode *PN = dyn_cast<PHINode>(Vec)) {
      Builder.SetInsertPoint(PN->getParent()->getFirstInsertionPt());
      Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+      CSEBlocks.insert(PN->getParent());
      User->replaceUsesOfWith(Scalar, Ex);
    } else if (isa<Instruction>(Vec)){
      if (PHINode *PH = dyn_cast<PHINode>(User)) {
@ -1595,17 +1597,20 @@ Value *BoUpSLP::vectorizeTree() {
          if (PH->getIncomingValue(i) == Scalar) {
            Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
            Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+            CSEBlocks.insert(PH->getIncomingBlock(i));
            PH->setOperand(i, Ex);
          }
        }
      } else {
        Builder.SetInsertPoint(cast<Instruction>(User));
        Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+        CSEBlocks.insert(cast<Instruction>(User)->getParent());
        User->replaceUsesOfWith(Scalar, Ex);
     }
    } else {
      Builder.SetInsertPoint(F->getEntryBlock().begin());
      Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+      CSEBlocks.insert(&F->getEntryBlock());
      User->replaceUsesOfWith(Scalar, Ex);
    }

@ -1631,8 +1636,6 @@ Value *BoUpSLP::vectorizeTree() {
        for (Value::use_iterator User = Scalar->use_begin(),
             UE = Scalar->use_end(); User != UE; ++User) {
          DEBUG(dbgs() << "SLP: \tvalidating user:" << **User << ".\n");
-          assert(!MustGather.count(*User) &&
-                 "Replacing gathered value with undef");

          assert((ScalarToTreeEntry.count(*User) ||
                  // It is legal to replace the reduction users by undef.
@ -1668,9 +1671,6 @@ public:
 void BoUpSLP::optimizeGatherSequence() {
  DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size()
        << " gather sequences instructions.\n");
-  // Keep a list of visited BBs to run CSE on. It is typically small.
-  SmallPtrSet<BasicBlock *, 4> VisitedBBs;
-  SmallVector<BasicBlock *, 4> CSEWorkList;
  // LICM InsertElementInst sequences.
  for (SetVector<Instruction *>::iterator it = GatherSeq.begin(),
       e = GatherSeq.end(); it != e; ++it) {
@ -1679,9 +1679,6 @@ void BoUpSLP::optimizeGatherSequence() {
    if (!Insert)
      continue;

-    if (VisitedBBs.insert(Insert->getParent()))
-      CSEWorkList.push_back(Insert->getParent());
-
    // Check if this block is inside a loop.
    Loop *L = LI->getLoopFor(Insert->getParent());
    if (!L)
@ -1708,6 +1705,7 @@ void BoUpSLP::optimizeGatherSequence() {

  // Sort blocks by domination. This ensures we visit a block after all blocks
  // dominating it are visited.
+  SmallVector<BasicBlock *, 8> CSEWorkList(CSEBlocks.begin(), CSEBlocks.end());
  std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(), DTCmp(DT));

  // Perform O(N^2) search over the gather sequences and merge identical
@ -1723,8 +1721,7 @@ void BoUpSLP::optimizeGatherSequence() {
    // For all instructions in blocks containing gather sequences:
    for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
      Instruction *In = it++;
-      if ((!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In)) ||
-          !GatherSeq.count(In))
+      if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
        continue;

      // Check if we can replace this instruction with any of the
@ -1746,6 +1743,8 @@ void BoUpSLP::optimizeGatherSequence() {
      }
    }
  }
+  CSEBlocks.clear();
+  GatherSeq.clear();
 }

 /// The SLPVectorizer Pass.
--- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-2velem.ll
+++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-2velem.ll
--- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-bsl.ll
+++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-bsl.ll
@ -0,0 +1,222 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+declare <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double>, <2 x double>, <2 x double>)
+
+declare <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
+
+declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
+
+declare <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float>, <4 x float>, <4 x float>)
+
+declare <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
+
+declare <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+
+declare <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>)
+
+declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>)
+
+declare <1 x double> @llvm.arm.neon.vbsl.v1f64(<1 x double>, <1 x double>, <1 x double>)
+
+declare <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float>, <2 x float>, <2 x float>)
+
+declare <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64>, <1 x i64>, <1 x i64>)
+
+declare <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
+
+define <8 x i8> @test_vbsl_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
+; CHECK-LABEL: test_vbsl_s8:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
+  ret <8 x i8> %vbsl.i
+}
+
+define <8 x i8> @test_vbsl_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
+; CHECK-LABEL: test_vbsl_s16:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
+  %0 = bitcast <4 x i16> %vbsl3.i to <8 x i8>
+  ret <8 x i8> %0
+}
+
+define <2 x i32> @test_vbsl_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
+; CHECK-LABEL: test_vbsl_s32:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3)
+  ret <2 x i32> %vbsl3.i
+}
+
+define <1 x i64> @test_vbsl_s64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
+; CHECK-LABEL: test_vbsl_s64:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3)
+  ret <1 x i64> %vbsl3.i
+}
+
+define <8 x i8> @test_vbsl_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
+; CHECK-LABEL: test_vbsl_u8:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
+  ret <8 x i8> %vbsl.i
+}
+
+define <4 x i16> @test_vbsl_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
+; CHECK-LABEL: test_vbsl_u16:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
+  ret <4 x i16> %vbsl3.i
+}
+
+define <2 x i32> @test_vbsl_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
+; CHECK-LABEL: test_vbsl_u32:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3)
+  ret <2 x i32> %vbsl3.i
+}
+
+define <1 x i64> @test_vbsl_u64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
+; CHECK-LABEL: test_vbsl_u64:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3)
+  ret <1 x i64> %vbsl3.i
+}
+
+define <2 x float> @test_vbsl_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) {
+; CHECK-LABEL: test_vbsl_f32:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3)
+  ret <2 x float> %vbsl3.i
+}
+
+define <1 x double> @test_vbsl_f64(<1 x i64> %v1, <1 x double> %v2, <1 x double> %v3) {
+; CHECK-LABEL: test_vbsl_f64:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl.i = bitcast <1 x i64> %v1 to <1 x double>
+  %vbsl3.i = tail call <1 x double> @llvm.arm.neon.vbsl.v1f64(<1 x double> %vbsl.i, <1 x double> %v2, <1 x double> %v3)
+  ret <1 x double> %vbsl3.i
+}
+
+define <8 x i8> @test_vbsl_p8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
+; CHECK-LABEL: test_vbsl_p8:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
+  ret <8 x i8> %vbsl.i
+}
+
+define <4 x i16> @test_vbsl_p16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
+; CHECK-LABEL: test_vbsl_p16:
+; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+entry:
+  %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
+  ret <4 x i16> %vbsl3.i
+}
+
+define <16 x i8> @test_vbslq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
+; CHECK-LABEL: test_vbslq_s8:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
+  ret <16 x i8> %vbsl.i
+}
+
+define <8 x i16> @test_vbslq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
+; CHECK-LABEL: test_vbslq_s16:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
+  ret <8 x i16> %vbsl3.i
+}
+
+define <4 x i32> @test_vbslq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+; CHECK-LABEL: test_vbslq_s32:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3)
+  ret <4 x i32> %vbsl3.i
+}
+
+define <2 x i64> @test_vbslq_s64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) {
+; CHECK-LABEL: test_vbslq_s64:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3)
+  ret <2 x i64> %vbsl3.i
+}
+
+define <16 x i8> @test_vbslq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
+; CHECK-LABEL: test_vbslq_u8:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
+  ret <16 x i8> %vbsl.i
+}
+
+define <8 x i16> @test_vbslq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
+; CHECK-LABEL: test_vbslq_u16:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
+  ret <8 x i16> %vbsl3.i
+}
+
+define <4 x i32> @test_vbslq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+; CHECK-LABEL: test_vbslq_u32:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3)
+  ret <4 x i32> %vbsl3.i
+}
+
+define <2 x i64> @test_vbslq_u64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) {
+; CHECK-LABEL: test_vbslq_u64:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3)
+  ret <2 x i64> %vbsl3.i
+}
+
+define <4 x float> @test_vbslq_f32(<4 x i32> %v1, <4 x float> %v2, <4 x float> %v3) {
+; CHECK-LABEL: test_vbslq_f32:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl.i = bitcast <4 x i32> %v1 to <4 x float>
+  %vbsl3.i = tail call <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float> %vbsl.i, <4 x float> %v2, <4 x float> %v3)
+  ret <4 x float> %vbsl3.i
+}
+
+define <16 x i8> @test_vbslq_p8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
+; CHECK-LABEL: test_vbslq_p8:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
+  ret <16 x i8> %vbsl.i
+}
+
+define <8 x i16> @test_vbslq_p16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
+; CHECK-LABEL: test_vbslq_p16:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
+  ret <8 x i16> %vbsl3.i
+}
+
+define <2 x double> @test_vbslq_f64(<2 x i64> %v1, <2 x double> %v2, <2 x double> %v3) {
+; CHECK-LABEL: test_vbslq_f64:
+; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+entry:
+  %vbsl.i = bitcast <2 x i64> %v1 to <2 x double>
+  %vbsl3.i = tail call <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double> %vbsl.i, <2 x double> %v2, <2 x double> %v3)
+  ret <2 x double> %vbsl3.i
+}
+
--- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-diagnostics.ll
+++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-diagnostics.ll
@ -11,3 +11,14 @@ entry:
  ret <2 x float> %add
 }

+define <4 x i32> @test_vshrn_not_match(<2 x i32> %a, <2 x i64> %b) {
+; CHECK: test_vshrn_not_match
+; CHECK-NOT: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #35
+  %1 = bitcast <2 x i32> %a to <1 x i64>
+  %2 = ashr <2 x i64> %b, <i64 35, i64 35>
+  %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
+  %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
+  %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
+  %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
+  ret <4 x i32> %4
+}
--- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-copy.ll
+++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-copy.ll
@ -78,3 +78,11 @@ define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) {
 ret <1 x i64> %shuffle.i
 }

+define <1 x i64> @test_vector_copy_dup_dv2D(<1 x i64> %a, <2 x i64> %c) {
+  ;CHECK: test_vector_copy_dup_dv2D
+  ;CHECK: dup {{d[0-31]+}}, {{v[0-31]+}}.d[1]
+  %vget_lane = extractelement <2 x i64> %c, i32 1
+  %vset_lane = insertelement <1 x i64> undef, i64 %vget_lane, i32 0
+  ret <1 x i64> %vset_lane
+}
+
--- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-cvt.ll
+++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-cvt.ll
@ -90,10 +90,10 @@ declare double @llvm.aarch64.neon.vcvtf64.n.u64(<1 x i64>, i32)

 define i32 @test_vcvts_n_s32_f32(float %a) {
 ; CHECK: test_vcvts_n_s32_f32
-; CHECK: fcvtzs {{s[0-9]+}}, {{s[0-9]+}}, #0
+; CHECK: fcvtzs {{s[0-9]+}}, {{s[0-9]+}}, #1
 entry:
  %fcvtzs = insertelement <1 x float> undef, float %a, i32 0
-  %fcvtzs1 = call <1 x i32> @llvm.aarch64.neon.vcvts.n.s32.f32(<1 x float> %fcvtzs, i32 0)
+  %fcvtzs1 = call <1 x i32> @llvm.aarch64.neon.vcvts.n.s32.f32(<1 x float> %fcvtzs, i32 1)
  %0 = extractelement <1 x i32> %fcvtzs1, i32 0
  ret i32 %0
 }
@ -102,10 +102,10 @@ declare <1 x i32> @llvm.aarch64.neon.vcvts.n.s32.f32(<1 x float>, i32)

 define i64 @test_vcvtd_n_s64_f64(double %a) {
 ; CHECK: test_vcvtd_n_s64_f64
-; CHECK: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}, #0
+; CHECK: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}, #1
 entry:
  %fcvtzs = insertelement <1 x double> undef, double %a, i32 0
-  %fcvtzs1 = call <1 x i64> @llvm.aarch64.neon.vcvtd.n.s64.f64(<1 x double> %fcvtzs, i32 0)
+  %fcvtzs1 = call <1 x i64> @llvm.aarch64.neon.vcvtd.n.s64.f64(<1 x double> %fcvtzs, i32 1)
  %0 = extractelement <1 x i64> %fcvtzs1, i32 0
  ret i64 %0
 }
@ -114,10 +114,10 @@ declare <1 x i64> @llvm.aarch64.neon.vcvtd.n.s64.f64(<1 x double>, i32)

 define i32 @test_vcvts_n_u32_f32(float %a) {
 ; CHECK: test_vcvts_n_u32_f32
-; CHECK: fcvtzu {{s[0-9]+}}, {{s[0-9]+}}, #0
+; CHECK: fcvtzu {{s[0-9]+}}, {{s[0-9]+}}, #32
 entry:
  %fcvtzu = insertelement <1 x float> undef, float %a, i32 0
-  %fcvtzu1 = call <1 x i32> @llvm.aarch64.neon.vcvts.n.u32.f32(<1 x float> %fcvtzu, i32 0)
+  %fcvtzu1 = call <1 x i32> @llvm.aarch64.neon.vcvts.n.u32.f32(<1 x float> %fcvtzu, i32 32)
  %0 = extractelement <1 x i32> %fcvtzu1, i32 0
  ret i32 %0
 }
@ -126,10 +126,10 @@ declare <1 x i32> @llvm.aarch64.neon.vcvts.n.u32.f32(<1 x float>, i32)

 define i64 @test_vcvtd_n_u64_f64(double %a) {
 ; CHECK: test_vcvtd_n_u64_f64
-; CHECK: fcvtzu {{d[0-9]+}}, {{d[0-9]+}}, #0
+; CHECK: fcvtzu {{d[0-9]+}}, {{d[0-9]+}}, #64
 entry:
  %fcvtzu = insertelement <1 x double> undef, double %a, i32 0
-  %fcvtzu1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtd.n.u64.f64(<1 x double> %fcvtzu, i32 0)
+  %fcvtzu1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtd.n.u64.f64(<1 x double> %fcvtzu, i32 64)
  %0 = extractelement <1 x i64> %fcvtzu1, i32 0
  ret i64 %0
 }
--- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-fabd.ll
+++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-fabd.ll
@ -0,0 +1,26 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+
+define float @test_vabds_f32(float %a, float %b) {
+; CHECK-LABEL: test_vabds_f32
+; CHECK: fabd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+entry:
+  %vabd.i = insertelement <1 x float> undef, float %a, i32 0
+  %vabd1.i = insertelement <1 x float> undef, float %b, i32 0
+  %vabd2.i = call <1 x float> @llvm.aarch64.neon.vabd.v1f32(<1 x float> %vabd.i, <1 x float> %vabd1.i)
+  %0 = extractelement <1 x float> %vabd2.i, i32 0
+  ret float %0
+}
+
+define double @test_vabdd_f64(double %a, double %b) {
+; CHECK-LABEL: test_vabdd_f64
+; CHECK: fabd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %vabd.i = insertelement <1 x double> undef, double %a, i32 0
+  %vabd1.i = insertelement <1 x double> undef, double %b, i32 0
+  %vabd2.i = call <1 x double> @llvm.aarch64.neon.vabd.v1f64(<1 x double> %vabd.i, <1 x double> %vabd1.i)
+  %0 = extractelement <1 x double> %vabd2.i, i32 0
+  ret double %0
+}
+
+declare <1 x double> @llvm.aarch64.neon.vabd.v1f64(<1 x double>, <1 x double>)
+declare <1 x float> @llvm.aarch64.neon.vabd.v1f32(<1 x float>, <1 x float>)
--- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-fcvt.ll
+++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-fcvt.ll
@ -0,0 +1,255 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+;; Scalar Floating-point Convert
+
+define float @test_vcvtxn(double %a) {
+; CHECK: test_vcvtxn
+; CHECK: fcvtxn {{s[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtf.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtf1.i = tail call <1 x float> @llvm.aarch64.neon.fcvtxn.v1f32.v1f64(<1 x double> %vcvtf.i)
+  %0 = extractelement <1 x float> %vcvtf1.i, i32 0
+  ret float %0
+}
+
+declare <1 x float> @llvm.aarch64.neon.fcvtxn.v1f32.v1f64(<1 x double>)
+
+define i32 @test_vcvtass(float %a) {
+; CHECK: test_vcvtass
+; CHECK: fcvtas {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtas.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtas1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.v1f32(<1 x float> %vcvtas.i)
+  %0 = extractelement <1 x i32> %vcvtas1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.v1f32(<1 x float>)
+
+define i64 @test_test_vcvtasd(double %a) {
+; CHECK: test_test_vcvtasd
+; CHECK: fcvtas {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtas.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtas1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %vcvtas.i)
+  %0 = extractelement <1 x i64> %vcvtas1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtaus(float %a) {
+; CHECK: test_vcvtaus
+; CHECK: fcvtau {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtau.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtau1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.v1f32(<1 x float> %vcvtau.i)
+  %0 = extractelement <1 x i32> %vcvtau1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtaud(double %a) {
+; CHECK: test_vcvtaud
+; CHECK: fcvtau {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtau.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtau1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %vcvtau.i)
+  %0 = extractelement <1 x i64> %vcvtau1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double>) 
+
+define i32 @test_vcvtmss(float %a) {
+; CHECK: test_vcvtmss
+; CHECK: fcvtms {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtms.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtms1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.v1f32(<1 x float> %vcvtms.i)
+  %0 = extractelement <1 x i32> %vcvtms1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtmd_s64_f64(double %a) {
+; CHECK: test_vcvtmd_s64_f64
+; CHECK: fcvtms {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtms.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtms1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %vcvtms.i)
+  %0 = extractelement <1 x i64> %vcvtms1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtmus(float %a) {
+; CHECK: test_vcvtmus
+; CHECK: fcvtmu {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtmu.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtmu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.v1f32(<1 x float> %vcvtmu.i)
+  %0 = extractelement <1 x i32> %vcvtmu1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtmud(double %a) {
+; CHECK: test_vcvtmud
+; CHECK: fcvtmu {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtmu.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtmu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %vcvtmu.i)
+  %0 = extractelement <1 x i64> %vcvtmu1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtnss(float %a) {
+; CHECK: test_vcvtnss
+; CHECK: fcvtns {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtns.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtns1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.v1f32(<1 x float> %vcvtns.i)
+  %0 = extractelement <1 x i32> %vcvtns1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtnd_s64_f64(double %a) {
+; CHECK: test_vcvtnd_s64_f64
+; CHECK: fcvtns {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtns.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtns1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %vcvtns.i)
+  %0 = extractelement <1 x i64> %vcvtns1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtnus(float %a) {
+; CHECK: test_vcvtnus
+; CHECK: fcvtnu {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtnu.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtnu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.v1f32(<1 x float> %vcvtnu.i)
+  %0 = extractelement <1 x i32> %vcvtnu1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtnud(double %a) {
+; CHECK: test_vcvtnud
+; CHECK: fcvtnu {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtnu.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtnu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %vcvtnu.i)
+  %0 = extractelement <1 x i64> %vcvtnu1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtpss(float %a) {
+; CHECK: test_vcvtpss
+; CHECK: fcvtps {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtps.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtps1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.v1f32(<1 x float> %vcvtps.i)
+  %0 = extractelement <1 x i32> %vcvtps1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtpd_s64_f64(double %a) {
+; CHECK: test_vcvtpd_s64_f64
+; CHECK: fcvtps {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtps.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtps1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %vcvtps.i)
+  %0 = extractelement <1 x i64> %vcvtps1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtpus(float %a) {
+; CHECK: test_vcvtpus
+; CHECK: fcvtpu {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtpu.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtpu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.v1f32(<1 x float> %vcvtpu.i)
+  %0 = extractelement <1 x i32> %vcvtpu1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtpud(double %a) {
+; CHECK: test_vcvtpud
+; CHECK: fcvtpu {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtpu.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtpu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %vcvtpu.i)
+  %0 = extractelement <1 x i64> %vcvtpu1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtss(float %a) {
+; CHECK: test_vcvtss
+; CHECK: fcvtzs {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtzs.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtzs1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.v1f32(<1 x float> %vcvtzs.i)
+  %0 = extractelement <1 x i32> %vcvtzs1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtd_s64_f64(double %a) {
+; CHECK: test_vcvtd_s64_f64
+; CHECK: fcvtzs {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvzs.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvzs1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %vcvzs.i)
+  %0 = extractelement <1 x i64> %vcvzs1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double>)
+
+define i32 @test_vcvtus(float %a) {
+; CHECK: test_vcvtus
+; CHECK: fcvtzu {{s[0-9]}}, {{s[0-9]}}
+entry:
+  %vcvtzu.i = insertelement <1 x float> undef, float %a, i32 0
+  %vcvtzu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.v1f32(<1 x float> %vcvtzu.i)
+  %0 = extractelement <1 x i32> %vcvtzu1.i, i32 0
+  ret i32 %0
+}
+
+declare <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.v1f32(<1 x float>)
+
+define i64 @test_vcvtud(double %a) {
+; CHECK: test_vcvtud
+; CHECK: fcvtzu {{d[0-9]}}, {{d[0-9]}}
+entry:
+  %vcvtzu.i = insertelement <1 x double> undef, double %a, i32 0
+  %vcvtzu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %vcvtzu.i)
+  %0 = extractelement <1 x i64> %vcvtzu1.i, i32 0
+  ret i64 %0
+}
+
+declare <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double>)
--- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-shift-imm.ll
+++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-shift-imm.ll
@ -316,10 +316,10 @@ entry:

 define i8 @test_vqshrnh_n_s16(i16 %a) {
 ; CHECK: test_vqshrnh_n_s16
-; CHECK: sqshrn {{b[0-9]+}}, {{h[0-9]+}}, #15
+; CHECK: sqshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
 entry:
  %vsqshrn = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vsqshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16> %vsqshrn, i32 15)
+  %vsqshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16> %vsqshrn, i32 8)
  %0 = extractelement <1 x i8> %vsqshrn1, i32 0
  ret i8 %0
 }
@ -328,10 +328,10 @@ declare <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16>, i32)

 define i16 @test_vqshrns_n_s32(i32 %a) {
 ; CHECK: test_vqshrns_n_s32
-; CHECK: sqshrn {{h[0-9]+}}, {{s[0-9]+}}, #31
+; CHECK: sqshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
 entry:
  %vsqshrn = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vsqshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32> %vsqshrn, i32 31)
+  %vsqshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32> %vsqshrn, i32 16)
  %0 = extractelement <1 x i16> %vsqshrn1, i32 0
  ret i16 %0
 }
@ -340,10 +340,10 @@ declare <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32>, i32)

 define i32 @test_vqshrnd_n_s64(i64 %a) {
 ; CHECK: test_vqshrnd_n_s64
-; CHECK: sqshrn {{s[0-9]+}}, {{d[0-9]+}}, #63
+; CHECK: sqshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
 entry:
  %vsqshrn = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsqshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64> %vsqshrn, i32 63)
+  %vsqshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64> %vsqshrn, i32 32)
  %0 = extractelement <1 x i32> %vsqshrn1, i32 0
  ret i32 %0
 }
@ -352,10 +352,10 @@ declare <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64>, i32)

 define i8 @test_vqshrnh_n_u16(i16 %a) {
 ; CHECK: test_vqshrnh_n_u16
-; CHECK: uqshrn {{b[0-9]+}}, {{h[0-9]+}}, #15
+; CHECK: uqshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
 entry:
  %vuqshrn = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vuqshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16> %vuqshrn, i32 15)
+  %vuqshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16> %vuqshrn, i32 8)
  %0 = extractelement <1 x i8> %vuqshrn1, i32 0
  ret i8 %0
 }
@ -364,10 +364,10 @@ declare <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16>, i32)

 define i16 @test_vqshrns_n_u32(i32 %a) {
 ; CHECK: test_vqshrns_n_u32
-; CHECK: uqshrn {{h[0-9]+}}, {{s[0-9]+}}, #31
+; CHECK: uqshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
 entry:
  %vuqshrn = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vuqshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32> %vuqshrn, i32 31)
+  %vuqshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32> %vuqshrn, i32 16)
  %0 = extractelement <1 x i16> %vuqshrn1, i32 0
  ret i16 %0
 }
@ -376,10 +376,10 @@ declare <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32>, i32)

 define i32 @test_vqshrnd_n_u64(i64 %a) {
 ; CHECK: test_vqshrnd_n_u64
-; CHECK: uqshrn {{s[0-9]+}}, {{d[0-9]+}}, #63
+; CHECK: uqshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
 entry:
  %vuqshrn = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vuqshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64> %vuqshrn, i32 63)
+  %vuqshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64> %vuqshrn, i32 32)
  %0 = extractelement <1 x i32> %vuqshrn1, i32 0
  ret i32 %0
 }
@ -388,10 +388,10 @@ declare <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64>, i32)

 define i8 @test_vqrshrnh_n_s16(i16 %a) {
 ; CHECK: test_vqrshrnh_n_s16
-; CHECK: sqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #15
+; CHECK: sqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
 entry:
  %vsqrshrn = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vsqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16> %vsqrshrn, i32 15)
+  %vsqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16> %vsqrshrn, i32 8)
  %0 = extractelement <1 x i8> %vsqrshrn1, i32 0
  ret i8 %0
 }
@ -400,10 +400,10 @@ declare <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16>, i32)

 define i16 @test_vqrshrns_n_s32(i32 %a) {
 ; CHECK: test_vqrshrns_n_s32
-; CHECK: sqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #31
+; CHECK: sqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
 entry:
  %vsqrshrn = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vsqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32> %vsqrshrn, i32 31)
+  %vsqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32> %vsqrshrn, i32 16)
  %0 = extractelement <1 x i16> %vsqrshrn1, i32 0
  ret i16 %0
 }
@ -412,10 +412,10 @@ declare <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32>, i32)

 define i32 @test_vqrshrnd_n_s64(i64 %a) {
 ; CHECK: test_vqrshrnd_n_s64
-; CHECK: sqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #63
+; CHECK: sqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
 entry:
  %vsqrshrn = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64> %vsqrshrn, i32 63)
+  %vsqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64> %vsqrshrn, i32 32)
  %0 = extractelement <1 x i32> %vsqrshrn1, i32 0
  ret i32 %0
 }
@ -424,10 +424,10 @@ declare <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64>, i32)

 define i8 @test_vqrshrnh_n_u16(i16 %a) {
 ; CHECK: test_vqrshrnh_n_u16
-; CHECK: uqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #15
+; CHECK: uqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
 entry:
  %vuqrshrn = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vuqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16> %vuqrshrn, i32 15)
+  %vuqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16> %vuqrshrn, i32 8)
  %0 = extractelement <1 x i8> %vuqrshrn1, i32 0
  ret i8 %0
 }
@ -436,10 +436,10 @@ declare <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16>, i32)

 define i16 @test_vqrshrns_n_u32(i32 %a) {
 ; CHECK: test_vqrshrns_n_u32
-; CHECK: uqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #31
+; CHECK: uqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
 entry:
  %vuqrshrn = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vuqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %vuqrshrn, i32 31)
+  %vuqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %vuqrshrn, i32 16)
  %0 = extractelement <1 x i16> %vuqrshrn1, i32 0
  ret i16 %0
 }
@ -448,10 +448,10 @@ declare <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32>, i32)

 define i32 @test_vqrshrnd_n_u64(i64 %a) {
 ; CHECK: test_vqrshrnd_n_u64
-; CHECK: uqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #63
+; CHECK: uqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
 entry:
  %vuqrshrn = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vuqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64> %vuqrshrn, i32 63)
+  %vuqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64> %vuqrshrn, i32 32)
  %0 = extractelement <1 x i32> %vuqrshrn1, i32 0
  ret i32 %0
 }
@ -460,10 +460,10 @@ declare <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64>, i32)

 define i8 @test_vqshrunh_n_s16(i16 %a) {
 ; CHECK: test_vqshrunh_n_s16
-; CHECK: sqshrun {{b[0-9]+}}, {{h[0-9]+}}, #15
+; CHECK: sqshrun {{b[0-9]+}}, {{h[0-9]+}}, #8
 entry:
  %vsqshrun = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vsqshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16> %vsqshrun, i32 15)
+  %vsqshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16> %vsqshrun, i32 8)
  %0 = extractelement <1 x i8> %vsqshrun1, i32 0
  ret i8 %0
 }
@ -472,10 +472,10 @@ declare <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16>, i32)

 define i16 @test_vqshruns_n_s32(i32 %a) {
 ; CHECK: test_vqshruns_n_s32
-; CHECK: sqshrun {{h[0-9]+}}, {{s[0-9]+}}, #31
+; CHECK: sqshrun {{h[0-9]+}}, {{s[0-9]+}}, #16
 entry:
  %vsqshrun = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vsqshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32> %vsqshrun, i32 31)
+  %vsqshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32> %vsqshrun, i32 16)
  %0 = extractelement <1 x i16> %vsqshrun1, i32 0
  ret i16 %0
 }
@ -484,10 +484,10 @@ declare <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32>, i32)

 define i32 @test_vqshrund_n_s64(i64 %a) {
 ; CHECK: test_vqshrund_n_s64
-; CHECK: sqshrun {{s[0-9]+}}, {{d[0-9]+}}, #63
+; CHECK: sqshrun {{s[0-9]+}}, {{d[0-9]+}}, #32
 entry:
  %vsqshrun = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsqshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64> %vsqshrun, i32 63)
+  %vsqshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64> %vsqshrun, i32 32)
  %0 = extractelement <1 x i32> %vsqshrun1, i32 0
  ret i32 %0
 }
@ -496,10 +496,10 @@ declare <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64>, i32)

 define i8 @test_vqrshrunh_n_s16(i16 %a) {
 ; CHECK: test_vqrshrunh_n_s16
-; CHECK: sqrshrun {{b[0-9]+}}, {{h[0-9]+}}, #15
+; CHECK: sqrshrun {{b[0-9]+}}, {{h[0-9]+}}, #8
 entry:
  %vsqrshrun = insertelement <1 x i16> undef, i16 %a, i32 0
-  %vsqrshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16> %vsqrshrun, i32 15)
+  %vsqrshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16> %vsqrshrun, i32 8)
  %0 = extractelement <1 x i8> %vsqrshrun1, i32 0
  ret i8 %0
 }
@ -508,10 +508,10 @@ declare <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16>, i32)

 define i16 @test_vqrshruns_n_s32(i32 %a) {
 ; CHECK: test_vqrshruns_n_s32
-; CHECK: sqrshrun {{h[0-9]+}}, {{s[0-9]+}}, #31
+; CHECK: sqrshrun {{h[0-9]+}}, {{s[0-9]+}}, #16
 entry:
  %vsqrshrun = insertelement <1 x i32> undef, i32 %a, i32 0
-  %vsqrshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32> %vsqrshrun, i32 31)
+  %vsqrshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32> %vsqrshrun, i32 16)
  %0 = extractelement <1 x i16> %vsqrshrun1, i32 0
  ret i16 %0
 }
@ -520,10 +520,10 @@ declare <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32>, i32)

 define i32 @test_vqrshrund_n_s64(i64 %a) {
 ; CHECK: test_vqrshrund_n_s64
-; CHECK: sqrshrun {{s[0-9]+}}, {{d[0-9]+}}, #63
+; CHECK: sqrshrun {{s[0-9]+}}, {{d[0-9]+}}, #32
 entry:
  %vsqrshrun = insertelement <1 x i64> undef, i64 %a, i32 0
-  %vsqrshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrun.v1i32(<1 x i64> %vsqrshrun, i32 63)
+  %vsqrshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrun.v1i32(<1 x i64> %vsqrshrun, i32 32)
  %0 = extractelement <1 x i32> %vsqrshrun1, i32 0
  ret i32 %0
 }
--- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fold-stack-adjust.ll
+++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fold-stack-adjust.ll
@ -15,7 +15,7 @@ define void @check_simple() minsize {
 ; CHECK-NOT: sub sp, sp,
 ; ...
 ; CHECK-NOT: add sp, sp,
-; CHECK: pop.w {r7, r8, r9, r10, r11, pc}
+; CHECK: pop.w {r0, r1, r2, r3, r11, pc}

 ; CHECK-T1-LABEL: check_simple:
 ; CHECK-T1: push {r3, r4, r5, r6, r7, lr}
@ -23,7 +23,7 @@ define void @check_simple() minsize {
 ; CHECK-T1-NOT: sub sp, sp,
 ; ...
 ; CHECK-T1-NOT: add sp, sp,
-; CHECK-T1: pop {r3, r4, r5, r6, r7, pc}
+; CHECK-T1: pop {r0, r1, r2, r3, r7, pc}

  ; iOS always has a frame pointer and messing with the push affects
  ; how it's set in the prologue. Make sure we get that right.
--- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/msa/frameindex.ll
+++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/msa/frameindex.ll
@ -0,0 +1,85 @@
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32-AE -check-prefix=MIPS32-BE %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32-AE -check-prefix=MIPS32-LE %s
+
+define void @loadstore_v16i8_near() nounwind {
+  ; MIPS32-AE: loadstore_v16i8_near:
+
+  %1 = alloca <16 x i8>
+  %2 = load volatile <16 x i8>* %1
+  ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0($sp)
+  store volatile <16 x i8> %2, <16 x i8>* %1
+  ; MIPS32-AE: st.b [[R1]], 0($sp)
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v16i8_near
+}
+
+define void @loadstore_v16i8_just_under_simm10() nounwind {
+  ; MIPS32-AE: loadstore_v16i8_just_under_simm10:
+
+  %1 = alloca <16 x i8>
+  %2 = alloca [496 x i8] ; Push the frame right up to 512 bytes
+
+  %3 = load volatile <16 x i8>* %1
+  ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 496($sp)
+  store volatile <16 x i8> %3, <16 x i8>* %1
+  ; MIPS32-AE: st.b [[R1]], 496($sp)
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v16i8_just_under_simm10
+}
+
+define void @loadstore_v16i8_just_over_simm10() nounwind {
+  ; MIPS32-AE: loadstore_v16i8_just_over_simm10:
+
+  %1 = alloca <16 x i8>
+  %2 = alloca [497 x i8] ; Push the frame just over 512 bytes
+
+  %3 = load volatile <16 x i8>* %1
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 512
+  ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <16 x i8> %3, <16 x i8>* %1
+  ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 512
+  ; MIPS32-AE: st.b [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v16i8_just_over_simm10
+}
+
+define void @loadstore_v16i8_just_under_simm16() nounwind {
+  ; MIPS32-AE: loadstore_v16i8_just_under_simm16:
+
+  %1 = alloca <16 x i8>
+  %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
+
+  %3 = load volatile <16 x i8>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <16 x i8> %3, <16 x i8>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: st.b [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v16i8_just_under_simm16
+}
+
+define void @loadstore_v16i8_just_over_simm16() nounwind {
+  ; MIPS32-AE: loadstore_v16i8_just_over_simm16:
+
+  %1 = alloca <16 x i8>
+  %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
+
+  %3 = load volatile <16 x i8>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
+  store volatile <16 x i8> %3, <16 x i8>* %1
+  ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
+  ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
+  ; MIPS32-AE: st.b [[R1]], 0([[BASE]])
+
+  ret void
+  ; MIPS32-AE: .size loadstore_v16i8_just_over_simm16
+}
--- a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/fabs.ll
+++ b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/fabs.ll
@ -5,10 +5,10 @@
 ; (fabs (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF))
 ; unless isFabsFree returns true

-; R600-CHECK: @fabs_free
+; R600-CHECK-LABEL: @fabs_free
 ; R600-CHECK-NOT: AND
 ; R600-CHECK: |PV.{{[XYZW]}}|
-; SI-CHECK: @fabs_free
+; SI-CHECK-LABEL: @fabs_free
 ; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0

 define void @fabs_free(float addrspace(1)* %out, i32 %in) {
@ -19,4 +19,36 @@ entry:
  ret void
 }

+; R600-CHECK-LABEL: @fabs_v2
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; SI-CHECK-LABEL: @fabs_v2
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+define void @fabs_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+entry:
+  %0 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; R600-CHECK-LABEL: @fabs_v4
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
+; SI-CHECK-LABEL: @fabs_v4
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
+define void @fabs_v4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+entry:
+  %0 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
+  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  ret void
+}
+
 declare float @fabs(float ) readnone
+declare <2 x float> @llvm.fabs.v2f32(<2 x float> ) readnone
+declare <4 x float> @llvm.fabs.v4f32(<4 x float> ) readnone
--- a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/llvm.round.ll
+++ b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/llvm.round.ll
@ -0,0 +1,41 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 --check-prefix=FUNC
+
+; FUNC-LABEL: @f32
+; R600: FRACT
+; R600-DAG: ADD
+; R600-DAG: CEIL
+; R600-DAG: FLOOR
+; R600: CNDGE
+define void @f32(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = call float @llvm.round.f32(float %in)
+  store float %0, float addrspace(1)* %out
+  ret void
+}
+
+; The vector tests are really difficult to verify, since it can be hard to
+; predict how the scheduler will order the instructions.  We already have
+; a test for the scalar case, so the vector tests just check that the
+; compiler doesn't crash.
+
+; FUNC-LABEL: v2f32
+; R600: CF_END
+define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+entry:
+  %0 = call <2 x float> @llvm.round.v2f32(<2 x float> %in)
+  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: v4f32
+; R600: CF_END
+define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+entry:
+  %0 = call <4 x float> @llvm.round.v4f32(<4 x float> %in)
+  store <4 x float> %0, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+declare float @llvm.round.f32(float)
+declare <2 x float> @llvm.round.v2f32(<2 x float>)
+declare <4 x float> @llvm.round.v4f32(<4 x float>)
--- a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/si-sgpr-spill.ll
+++ b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/si-sgpr-spill.ll
@ -0,0 +1,692 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s
+
+; XXX: Enable when spilling is supported
+; XFAIL: *
+
+; These tests check that the compiler won't crash when it needs to spill
+; SGPRs.
+
+; CHECK-LABEL: @main
+; Writing to M0 from an SMRD instruction will hang the GPU.
+; CHECK-NOT: S_BUFFER_LOAD_DWORD m0
+; CHECK: S_ENDPGM
+@ddxy_lds = external addrspace(3) global [64 x i32]
+
+define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+main_body:
+  %21 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
+  %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0
+  %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 96)
+  %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 100)
+  %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 104)
+  %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 112)
+  %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 116)
+  %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 120)
+  %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128)
+  %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132)
+  %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 140)
+  %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144)
+  %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160)
+  %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 176)
+  %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 180)
+  %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 184)
+  %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 192)
+  %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 196)
+  %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 200)
+  %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 208)
+  %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 212)
+  %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 216)
+  %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 224)
+  %44 = call float @llvm.SI.load.const(<16 x i8> %22, i32 240)
+  %45 = call float @llvm.SI.load.const(<16 x i8> %22, i32 244)
+  %46 = call float @llvm.SI.load.const(<16 x i8> %22, i32 248)
+  %47 = call float @llvm.SI.load.const(<16 x i8> %22, i32 256)
+  %48 = call float @llvm.SI.load.const(<16 x i8> %22, i32 272)
+  %49 = call float @llvm.SI.load.const(<16 x i8> %22, i32 276)
+  %50 = call float @llvm.SI.load.const(<16 x i8> %22, i32 280)
+  %51 = call float @llvm.SI.load.const(<16 x i8> %22, i32 288)
+  %52 = call float @llvm.SI.load.const(<16 x i8> %22, i32 292)
+  %53 = call float @llvm.SI.load.const(<16 x i8> %22, i32 296)
+  %54 = call float @llvm.SI.load.const(<16 x i8> %22, i32 304)
+  %55 = call float @llvm.SI.load.const(<16 x i8> %22, i32 308)
+  %56 = call float @llvm.SI.load.const(<16 x i8> %22, i32 312)
+  %57 = call float @llvm.SI.load.const(<16 x i8> %22, i32 368)
+  %58 = call float @llvm.SI.load.const(<16 x i8> %22, i32 372)
+  %59 = call float @llvm.SI.load.const(<16 x i8> %22, i32 376)
+  %60 = call float @llvm.SI.load.const(<16 x i8> %22, i32 384)
+  %61 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
+  %62 = load <32 x i8> addrspace(2)* %61, !tbaa !0
+  %63 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
+  %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
+  %65 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1
+  %66 = load <32 x i8> addrspace(2)* %65, !tbaa !0
+  %67 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1
+  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
+  %69 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2
+  %70 = load <32 x i8> addrspace(2)* %69, !tbaa !0
+  %71 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2
+  %72 = load <16 x i8> addrspace(2)* %71, !tbaa !0
+  %73 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 3
+  %74 = load <32 x i8> addrspace(2)* %73, !tbaa !0
+  %75 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 3
+  %76 = load <16 x i8> addrspace(2)* %75, !tbaa !0
+  %77 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 4
+  %78 = load <32 x i8> addrspace(2)* %77, !tbaa !0
+  %79 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 4
+  %80 = load <16 x i8> addrspace(2)* %79, !tbaa !0
+  %81 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 5
+  %82 = load <32 x i8> addrspace(2)* %81, !tbaa !0
+  %83 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 5
+  %84 = load <16 x i8> addrspace(2)* %83, !tbaa !0
+  %85 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 6
+  %86 = load <32 x i8> addrspace(2)* %85, !tbaa !0
+  %87 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 6
+  %88 = load <16 x i8> addrspace(2)* %87, !tbaa !0
+  %89 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 7
+  %90 = load <32 x i8> addrspace(2)* %89, !tbaa !0
+  %91 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 7
+  %92 = load <16 x i8> addrspace(2)* %91, !tbaa !0
+  %93 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %4, <2 x i32> %6)
+  %94 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %4, <2 x i32> %6)
+  %95 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %4, <2 x i32> %6)
+  %96 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %4, <2 x i32> %6)
+  %97 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %4, <2 x i32> %6)
+  %98 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %4, <2 x i32> %6)
+  %99 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %4, <2 x i32> %6)
+  %100 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %4, <2 x i32> %6)
+  %101 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %4, <2 x i32> %6)
+  %102 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %4, <2 x i32> %6)
+  %103 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %4, <2 x i32> %6)
+  %104 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %4, <2 x i32> %6)
+  %105 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %4, <2 x i32> %6)
+  %106 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %4, <2 x i32> %6)
+  %107 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %4, <2 x i32> %6)
+  %108 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %4, <2 x i32> %6)
+  %109 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %4, <2 x i32> %6)
+  %110 = call i32 @llvm.SI.tid()
+  %111 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %110
+  %112 = bitcast float %93 to i32
+  store i32 %112, i32 addrspace(3)* %111
+  %113 = bitcast float %94 to i32
+  store i32 %113, i32 addrspace(3)* %111
+  %114 = call i32 @llvm.SI.tid()
+  %115 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %114
+  %116 = and i32 %114, -4
+  %117 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %116
+  %118 = add i32 %116, 1
+  %119 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %118
+  %120 = bitcast float %93 to i32
+  store i32 %120, i32 addrspace(3)* %115
+  %121 = load i32 addrspace(3)* %117
+  %122 = bitcast i32 %121 to float
+  %123 = load i32 addrspace(3)* %119
+  %124 = bitcast i32 %123 to float
+  %125 = fsub float %124, %122
+  %126 = bitcast float %94 to i32
+  store i32 %126, i32 addrspace(3)* %115
+  %127 = load i32 addrspace(3)* %117
+  %128 = bitcast i32 %127 to float
+  %129 = load i32 addrspace(3)* %119
+  %130 = bitcast i32 %129 to float
+  %131 = fsub float %130, %128
+  %132 = insertelement <4 x float> undef, float %125, i32 0
+  %133 = insertelement <4 x float> %132, float %131, i32 1
+  %134 = insertelement <4 x float> %133, float %131, i32 2
+  %135 = insertelement <4 x float> %134, float %131, i32 3
+  %136 = extractelement <4 x float> %135, i32 0
+  %137 = extractelement <4 x float> %135, i32 1
+  %138 = fmul float %60, %93
+  %139 = fmul float %60, %94
+  %140 = fmul float %60, %94
+  %141 = fmul float %60, %94
+  %142 = call i32 @llvm.SI.tid()
+  %143 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %142
+  %144 = bitcast float %138 to i32
+  store i32 %144, i32 addrspace(3)* %143
+  %145 = bitcast float %139 to i32
+  store i32 %145, i32 addrspace(3)* %143
+  %146 = bitcast float %140 to i32
+  store i32 %146, i32 addrspace(3)* %143
+  %147 = bitcast float %141 to i32
+  store i32 %147, i32 addrspace(3)* %143
+  %148 = call i32 @llvm.SI.tid()
+  %149 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %148
+  %150 = and i32 %148, -4
+  %151 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %150
+  %152 = add i32 %150, 2
+  %153 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %152
+  %154 = bitcast float %138 to i32
+  store i32 %154, i32 addrspace(3)* %149
+  %155 = load i32 addrspace(3)* %151
+  %156 = bitcast i32 %155 to float
+  %157 = load i32 addrspace(3)* %153
+  %158 = bitcast i32 %157 to float
+  %159 = fsub float %158, %156
+  %160 = bitcast float %139 to i32
+  store i32 %160, i32 addrspace(3)* %149
+  %161 = load i32 addrspace(3)* %151
+  %162 = bitcast i32 %161 to float
+  %163 = load i32 addrspace(3)* %153
+  %164 = bitcast i32 %163 to float
+  %165 = fsub float %164, %162
+  %166 = bitcast float %140 to i32
+  store i32 %166, i32 addrspace(3)* %149
+  %167 = load i32 addrspace(3)* %151
+  %168 = bitcast i32 %167 to float
+  %169 = load i32 addrspace(3)* %153
+  %170 = bitcast i32 %169 to float
+  %171 = fsub float %170, %168
+  %172 = bitcast float %141 to i32
+  store i32 %172, i32 addrspace(3)* %149
+  %173 = load i32 addrspace(3)* %151
+  %174 = bitcast i32 %173 to float
+  %175 = load i32 addrspace(3)* %153
+  %176 = bitcast i32 %175 to float
+  %177 = fsub float %176, %174
+  %178 = insertelement <4 x float> undef, float %159, i32 0
+  %179 = insertelement <4 x float> %178, float %165, i32 1
+  %180 = insertelement <4 x float> %179, float %171, i32 2
+  %181 = insertelement <4 x float> %180, float %177, i32 3
+  %182 = extractelement <4 x float> %181, i32 0
+  %183 = extractelement <4 x float> %181, i32 1
+  %184 = fdiv float 1.000000e+00, %97
+  %185 = fmul float %33, %184
+  %186 = fcmp uge float 1.000000e+00, %185
+  %187 = select i1 %186, float %185, float 1.000000e+00
+  %188 = fmul float %187, %30
+  %189 = call float @ceil(float %188)
+  %190 = fcmp uge float 3.000000e+00, %189
+  %191 = select i1 %190, float 3.000000e+00, float %189
+  %192 = fdiv float 1.000000e+00, %191
+  %193 = fdiv float 1.000000e+00, %30
+  %194 = fmul float %191, %193
+  %195 = fmul float %31, %194
+  %196 = fmul float %95, %95
+  %197 = fmul float %96, %96
+  %198 = fadd float %197, %196
+  %199 = fmul float %97, %97
+  %200 = fadd float %198, %199
+  %201 = call float @llvm.AMDGPU.rsq(float %200)
+  %202 = fmul float %95, %201
+  %203 = fmul float %96, %201
+  %204 = fmul float %202, %29
+  %205 = fmul float %203, %29
+  %206 = fmul float %204, -1.000000e+00
+  %207 = fmul float %205, 1.000000e+00
+  %208 = fmul float %206, %32
+  %209 = fmul float %207, %32
+  %210 = fsub float -0.000000e+00, %208
+  %211 = fadd float %93, %210
+  %212 = fsub float -0.000000e+00, %209
+  %213 = fadd float %94, %212
+  %214 = fmul float %206, %192
+  %215 = fmul float %207, %192
+  %216 = fmul float -1.000000e+00, %192
+  %217 = bitcast float %136 to i32
+  %218 = bitcast float %182 to i32
+  %219 = bitcast float %137 to i32
+  %220 = bitcast float %183 to i32
+  %221 = insertelement <8 x i32> undef, i32 %217, i32 0
+  %222 = insertelement <8 x i32> %221, i32 %218, i32 1
+  %223 = insertelement <8 x i32> %222, i32 %219, i32 2
+  %224 = insertelement <8 x i32> %223, i32 %220, i32 3
+  br label %LOOP
+
+LOOP:                                             ; preds = %ENDIF, %main_body
+  %temp24.0 = phi float [ 1.000000e+00, %main_body ], [ %258, %ENDIF ]
+  %temp28.0 = phi float [ %211, %main_body ], [ %253, %ENDIF ]
+  %temp29.0 = phi float [ %213, %main_body ], [ %255, %ENDIF ]
+  %temp30.0 = phi float [ 1.000000e+00, %main_body ], [ %257, %ENDIF ]
+  %225 = fcmp oge float %temp24.0, %191
+  %226 = sext i1 %225 to i32
+  %227 = bitcast i32 %226 to float
+  %228 = bitcast float %227 to i32
+  %229 = icmp ne i32 %228, 0
+  br i1 %229, label %IF, label %ENDIF
+
+IF:                                               ; preds = %LOOP
+  %230 = bitcast float %136 to i32
+  %231 = bitcast float %182 to i32
+  %232 = bitcast float %137 to i32
+  %233 = bitcast float %183 to i32
+  %234 = insertelement <8 x i32> undef, i32 %230, i32 0
+  %235 = insertelement <8 x i32> %234, i32 %231, i32 1
+  %236 = insertelement <8 x i32> %235, i32 %232, i32 2
+  %237 = insertelement <8 x i32> %236, i32 %233, i32 3
+  br label %LOOP65
+
+ENDIF:                                            ; preds = %LOOP
+  %238 = bitcast float %temp28.0 to i32
+  %239 = bitcast float %temp29.0 to i32
+  %240 = insertelement <8 x i32> %224, i32 %238, i32 4
+  %241 = insertelement <8 x i32> %240, i32 %239, i32 5
+  %242 = insertelement <8 x i32> %241, i32 undef, i32 6
+  %243 = insertelement <8 x i32> %242, i32 undef, i32 7
+  %244 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %243, <32 x i8> %62, <16 x i8> %64, i32 2)
+  %245 = extractelement <4 x float> %244, i32 3
+  %246 = fcmp oge float %temp30.0, %245
+  %247 = sext i1 %246 to i32
+  %248 = bitcast i32 %247 to float
+  %249 = bitcast float %248 to i32
+  %250 = and i32 %249, 1065353216
+  %251 = bitcast i32 %250 to float
+  %252 = fmul float %214, %251
+  %253 = fadd float %252, %temp28.0
+  %254 = fmul float %215, %251
+  %255 = fadd float %254, %temp29.0
+  %256 = fmul float %216, %251
+  %257 = fadd float %256, %temp30.0
+  %258 = fadd float %temp24.0, 1.000000e+00
+  br label %LOOP
+
+LOOP65:                                           ; preds = %ENDIF66, %IF
+  %temp24.1 = phi float [ 0.000000e+00, %IF ], [ %610, %ENDIF66 ]
+  %temp28.1 = phi float [ %temp28.0, %IF ], [ %605, %ENDIF66 ]
+  %temp29.1 = phi float [ %temp29.0, %IF ], [ %607, %ENDIF66 ]
+  %temp30.1 = phi float [ %temp30.0, %IF ], [ %609, %ENDIF66 ]
+  %temp32.0 = phi float [ 1.000000e+00, %IF ], [ %611, %ENDIF66 ]
+  %259 = fcmp oge float %temp24.1, %195
+  %260 = sext i1 %259 to i32
+  %261 = bitcast i32 %260 to float
+  %262 = bitcast float %261 to i32
+  %263 = icmp ne i32 %262, 0
+  br i1 %263, label %IF67, label %ENDIF66
+
+IF67:                                             ; preds = %LOOP65
+  %264 = bitcast float %136 to i32
+  %265 = bitcast float %182 to i32
+  %266 = bitcast float %137 to i32
+  %267 = bitcast float %183 to i32
+  %268 = bitcast float %temp28.1 to i32
+  %269 = bitcast float %temp29.1 to i32
+  %270 = insertelement <8 x i32> undef, i32 %264, i32 0
+  %271 = insertelement <8 x i32> %270, i32 %265, i32 1
+  %272 = insertelement <8 x i32> %271, i32 %266, i32 2
+  %273 = insertelement <8 x i32> %272, i32 %267, i32 3
+  %274 = insertelement <8 x i32> %273, i32 %268, i32 4
+  %275 = insertelement <8 x i32> %274, i32 %269, i32 5
+  %276 = insertelement <8 x i32> %275, i32 undef, i32 6
+  %277 = insertelement <8 x i32> %276, i32 undef, i32 7
+  %278 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %277, <32 x i8> %66, <16 x i8> %68, i32 2)
+  %279 = extractelement <4 x float> %278, i32 0
+  %280 = extractelement <4 x float> %278, i32 1
+  %281 = extractelement <4 x float> %278, i32 2
+  %282 = extractelement <4 x float> %278, i32 3
+  %283 = fmul float %282, %47
+  %284 = bitcast float %136 to i32
+  %285 = bitcast float %182 to i32
+  %286 = bitcast float %137 to i32
+  %287 = bitcast float %183 to i32
+  %288 = bitcast float %temp28.1 to i32
+  %289 = bitcast float %temp29.1 to i32
+  %290 = insertelement <8 x i32> undef, i32 %284, i32 0
+  %291 = insertelement <8 x i32> %290, i32 %285, i32 1
+  %292 = insertelement <8 x i32> %291, i32 %286, i32 2
+  %293 = insertelement <8 x i32> %292, i32 %287, i32 3
+  %294 = insertelement <8 x i32> %293, i32 %288, i32 4
+  %295 = insertelement <8 x i32> %294, i32 %289, i32 5
+  %296 = insertelement <8 x i32> %295, i32 undef, i32 6
+  %297 = insertelement <8 x i32> %296, i32 undef, i32 7
+  %298 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %297, <32 x i8> %82, <16 x i8> %84, i32 2)
+  %299 = extractelement <4 x float> %298, i32 0
+  %300 = extractelement <4 x float> %298, i32 1
+  %301 = extractelement <4 x float> %298, i32 2
+  %302 = bitcast float %136 to i32
+  %303 = bitcast float %182 to i32
+  %304 = bitcast float %137 to i32
+  %305 = bitcast float %183 to i32
+  %306 = bitcast float %temp28.1 to i32
+  %307 = bitcast float %temp29.1 to i32
+  %308 = insertelement <8 x i32> undef, i32 %302, i32 0
+  %309 = insertelement <8 x i32> %308, i32 %303, i32 1
+  %310 = insertelement <8 x i32> %309, i32 %304, i32 2
+  %311 = insertelement <8 x i32> %310, i32 %305, i32 3
+  %312 = insertelement <8 x i32> %311, i32 %306, i32 4
+  %313 = insertelement <8 x i32> %312, i32 %307, i32 5
+  %314 = insertelement <8 x i32> %313, i32 undef, i32 6
+  %315 = insertelement <8 x i32> %314, i32 undef, i32 7
+  %316 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %315, <32 x i8> %78, <16 x i8> %80, i32 2)
+  %317 = extractelement <4 x float> %316, i32 0
+  %318 = extractelement <4 x float> %316, i32 1
+  %319 = extractelement <4 x float> %316, i32 2
+  %320 = fmul float %317, %23
+  %321 = fmul float %318, %24
+  %322 = fmul float %319, %25
+  %323 = fmul float %299, %26
+  %324 = fadd float %323, %320
+  %325 = fmul float %300, %27
+  %326 = fadd float %325, %321
+  %327 = fmul float %301, %28
+  %328 = fadd float %327, %322
+  %329 = fadd float %279, %324
+  %330 = fadd float %280, %326
+  %331 = fadd float %281, %328
+  %332 = bitcast float %136 to i32
+  %333 = bitcast float %182 to i32
+  %334 = bitcast float %137 to i32
+  %335 = bitcast float %183 to i32
+  %336 = bitcast float %temp28.1 to i32
+  %337 = bitcast float %temp29.1 to i32
+  %338 = insertelement <8 x i32> undef, i32 %332, i32 0
+  %339 = insertelement <8 x i32> %338, i32 %333, i32 1
+  %340 = insertelement <8 x i32> %339, i32 %334, i32 2
+  %341 = insertelement <8 x i32> %340, i32 %335, i32 3
+  %342 = insertelement <8 x i32> %341, i32 %336, i32 4
+  %343 = insertelement <8 x i32> %342, i32 %337, i32 5
+  %344 = insertelement <8 x i32> %343, i32 undef, i32 6
+  %345 = insertelement <8 x i32> %344, i32 undef, i32 7
+  %346 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %345, <32 x i8> %62, <16 x i8> %64, i32 2)
+  %347 = extractelement <4 x float> %346, i32 0
+  %348 = extractelement <4 x float> %346, i32 1
+  %349 = extractelement <4 x float> %346, i32 2
+  %350 = fadd float %347, -5.000000e-01
+  %351 = fadd float %348, -5.000000e-01
+  %352 = fadd float %349, -5.000000e-01
+  %353 = fmul float %350, %350
+  %354 = fmul float %351, %351
+  %355 = fadd float %354, %353
+  %356 = fmul float %352, %352
+  %357 = fadd float %355, %356
+  %358 = call float @llvm.AMDGPU.rsq(float %357)
+  %359 = fmul float %350, %358
+  %360 = fmul float %351, %358
+  %361 = fmul float %352, %358
+  %362 = bitcast float %136 to i32
+  %363 = bitcast float %182 to i32
+  %364 = bitcast float %137 to i32
+  %365 = bitcast float %183 to i32
+  %366 = bitcast float %temp28.1 to i32
+  %367 = bitcast float %temp29.1 to i32
+  %368 = insertelement <8 x i32> undef, i32 %362, i32 0
+  %369 = insertelement <8 x i32> %368, i32 %363, i32 1
+  %370 = insertelement <8 x i32> %369, i32 %364, i32 2
+  %371 = insertelement <8 x i32> %370, i32 %365, i32 3
+  %372 = insertelement <8 x i32> %371, i32 %366, i32 4
+  %373 = insertelement <8 x i32> %372, i32 %367, i32 5
+  %374 = insertelement <8 x i32> %373, i32 undef, i32 6
+  %375 = insertelement <8 x i32> %374, i32 undef, i32 7
+  %376 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %375, <32 x i8> %70, <16 x i8> %72, i32 2)
+  %377 = extractelement <4 x float> %376, i32 0
+  %378 = extractelement <4 x float> %376, i32 1
+  %379 = extractelement <4 x float> %376, i32 2
+  %380 = extractelement <4 x float> %376, i32 3
+  %381 = fsub float -0.000000e+00, %95
+  %382 = fsub float -0.000000e+00, %96
+  %383 = fsub float -0.000000e+00, %97
+  %384 = fmul float %359, %381
+  %385 = fmul float %360, %382
+  %386 = fadd float %385, %384
+  %387 = fmul float %361, %383
+  %388 = fadd float %386, %387
+  %389 = fmul float %388, %359
+  %390 = fmul float %388, %360
+  %391 = fmul float %388, %361
+  %392 = fmul float 2.000000e+00, %389
+  %393 = fmul float 2.000000e+00, %390
+  %394 = fmul float 2.000000e+00, %391
+  %395 = fsub float -0.000000e+00, %392
+  %396 = fadd float %381, %395
+  %397 = fsub float -0.000000e+00, %393
+  %398 = fadd float %382, %397
+  %399 = fsub float -0.000000e+00, %394
+  %400 = fadd float %383, %399
+  %401 = fmul float %396, %98
+  %402 = fmul float %396, %99
+  %403 = fmul float %396, %100
+  %404 = fmul float %398, %101
+  %405 = fadd float %404, %401
+  %406 = fmul float %398, %102
+  %407 = fadd float %406, %402
+  %408 = fmul float %398, %103
+  %409 = fadd float %408, %403
+  %410 = fmul float %400, %104
+  %411 = fadd float %410, %405
+  %412 = fmul float %400, %105
+  %413 = fadd float %412, %407
+  %414 = fmul float %400, %106
+  %415 = fadd float %414, %409
+  %416 = bitcast float %136 to i32
+  %417 = bitcast float %182 to i32
+  %418 = bitcast float %137 to i32
+  %419 = bitcast float %183 to i32
+  %420 = bitcast float %temp28.1 to i32
+  %421 = bitcast float %temp29.1 to i32
+  %422 = insertelement <8 x i32> undef, i32 %416, i32 0
+  %423 = insertelement <8 x i32> %422, i32 %417, i32 1
+  %424 = insertelement <8 x i32> %423, i32 %418, i32 2
+  %425 = insertelement <8 x i32> %424, i32 %419, i32 3
+  %426 = insertelement <8 x i32> %425, i32 %420, i32 4
+  %427 = insertelement <8 x i32> %426, i32 %421, i32 5
+  %428 = insertelement <8 x i32> %427, i32 undef, i32 6
+  %429 = insertelement <8 x i32> %428, i32 undef, i32 7
+  %430 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %429, <32 x i8> %86, <16 x i8> %88, i32 2)
+  %431 = extractelement <4 x float> %430, i32 0
+  %432 = extractelement <4 x float> %430, i32 1
+  %433 = extractelement <4 x float> %430, i32 2
+  %434 = fmul float %48, %411
+  %435 = fmul float %49, %411
+  %436 = fmul float %50, %411
+  %437 = fmul float %51, %413
+  %438 = fadd float %437, %434
+  %439 = fmul float %52, %413
+  %440 = fadd float %439, %435
+  %441 = fmul float %53, %413
+  %442 = fadd float %441, %436
+  %443 = fmul float %54, %415
+  %444 = fadd float %443, %438
+  %445 = fmul float %55, %415
+  %446 = fadd float %445, %440
+  %447 = fmul float %56, %415
+  %448 = fadd float %447, %442
+  %449 = insertelement <4 x float> undef, float %444, i32 0
+  %450 = insertelement <4 x float> %449, float %446, i32 1
+  %451 = insertelement <4 x float> %450, float %448, i32 2
+  %452 = insertelement <4 x float> %451, float %195, i32 3
+  %453 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %452)
+  %454 = extractelement <4 x float> %453, i32 0
+  %455 = extractelement <4 x float> %453, i32 1
+  %456 = extractelement <4 x float> %453, i32 2
+  %457 = extractelement <4 x float> %453, i32 3
+  %458 = call float @fabs(float %456)
+  %459 = fdiv float 1.000000e+00, %458
+  %460 = fmul float %454, %459
+  %461 = fadd float %460, 1.500000e+00
+  %462 = fmul float %455, %459
+  %463 = fadd float %462, 1.500000e+00
+  %464 = bitcast float %463 to i32
+  %465 = bitcast float %461 to i32
+  %466 = bitcast float %457 to i32
+  %467 = insertelement <4 x i32> undef, i32 %464, i32 0
+  %468 = insertelement <4 x i32> %467, i32 %465, i32 1
+  %469 = insertelement <4 x i32> %468, i32 %466, i32 2
+  %470 = insertelement <4 x i32> %469, i32 undef, i32 3
+  %471 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %470, <32 x i8> %90, <16 x i8> %92, i32 4)
+  %472 = extractelement <4 x float> %471, i32 0
+  %473 = extractelement <4 x float> %471, i32 1
+  %474 = extractelement <4 x float> %471, i32 2
+  %475 = fmul float %431, %472
+  %476 = fadd float %475, %329
+  %477 = fmul float %432, %473
+  %478 = fadd float %477, %330
+  %479 = fmul float %433, %474
+  %480 = fadd float %479, %331
+  %481 = fmul float %107, %107
+  %482 = fmul float %108, %108
+  %483 = fadd float %482, %481
+  %484 = fmul float %109, %109
+  %485 = fadd float %483, %484
+  %486 = call float @llvm.AMDGPU.rsq(float %485)
+  %487 = fmul float %107, %486
+  %488 = fmul float %108, %486
+  %489 = fmul float %109, %486
+  %490 = fmul float %377, %40
+  %491 = fmul float %378, %41
+  %492 = fmul float %379, %42
+  %493 = fmul float %359, %487
+  %494 = fmul float %360, %488
+  %495 = fadd float %494, %493
+  %496 = fmul float %361, %489
+  %497 = fadd float %495, %496
+  %498 = fmul float %497, %359
+  %499 = fmul float %497, %360
+  %500 = fmul float %497, %361
+  %501 = fmul float 2.000000e+00, %498
+  %502 = fmul float 2.000000e+00, %499
+  %503 = fmul float 2.000000e+00, %500
+  %504 = fsub float -0.000000e+00, %501
+  %505 = fadd float %487, %504
+  %506 = fsub float -0.000000e+00, %502
+  %507 = fadd float %488, %506
+  %508 = fsub float -0.000000e+00, %503
+  %509 = fadd float %489, %508
+  %510 = fmul float %95, %95
+  %511 = fmul float %96, %96
+  %512 = fadd float %511, %510
+  %513 = fmul float %97, %97
+  %514 = fadd float %512, %513
+  %515 = call float @llvm.AMDGPU.rsq(float %514)
+  %516 = fmul float %95, %515
+  %517 = fmul float %96, %515
+  %518 = fmul float %97, %515
+  %519 = fmul float %505, %516
+  %520 = fmul float %507, %517
+  %521 = fadd float %520, %519
+  %522 = fmul float %509, %518
+  %523 = fadd float %521, %522
+  %524 = fsub float -0.000000e+00, %523
+  %525 = fcmp uge float %524, 0.000000e+00
+  %526 = select i1 %525, float %524, float 0.000000e+00
+  %527 = fmul float %43, %380
+  %528 = fadd float %527, 1.000000e+00
+  %529 = call float @llvm.pow.f32(float %526, float %528)
+  %530 = fmul float %476, %37
+  %531 = fmul float %478, %38
+  %532 = fmul float %480, %39
+  %533 = fmul float %359, %487
+  %534 = fmul float %360, %488
+  %535 = fadd float %534, %533
+  %536 = fmul float %361, %489
+  %537 = fadd float %535, %536
+  %538 = fcmp uge float %537, 0.000000e+00
+  %539 = select i1 %538, float %537, float 0.000000e+00
+  %540 = fmul float %530, %539
+  %541 = fmul float %531, %539
+  %542 = fmul float %532, %539
+  %543 = fmul float %490, %529
+  %544 = fadd float %543, %540
+  %545 = fmul float %491, %529
+  %546 = fadd float %545, %541
+  %547 = fmul float %492, %529
+  %548 = fadd float %547, %542
+  %549 = fmul float %476, %34
+  %550 = fmul float %478, %35
+  %551 = fmul float %480, %36
+  %552 = fmul float %544, %57
+  %553 = fadd float %552, %549
+  %554 = fmul float %546, %58
+  %555 = fadd float %554, %550
+  %556 = fmul float %548, %59
+  %557 = fadd float %556, %551
+  %558 = bitcast float %136 to i32
+  %559 = bitcast float %182 to i32
+  %560 = bitcast float %137 to i32
+  %561 = bitcast float %183 to i32
+  %562 = bitcast float %temp28.1 to i32
+  %563 = bitcast float %temp29.1 to i32
+  %564 = insertelement <8 x i32> undef, i32 %558, i32 0
+  %565 = insertelement <8 x i32> %564, i32 %559, i32 1
+  %566 = insertelement <8 x i32> %565, i32 %560, i32 2
+  %567 = insertelement <8 x i32> %566, i32 %561, i32 3
+  %568 = insertelement <8 x i32> %567, i32 %562, i32 4
+  %569 = insertelement <8 x i32> %568, i32 %563, i32 5
+  %570 = insertelement <8 x i32> %569, i32 undef, i32 6
+  %571 = insertelement <8 x i32> %570, i32 undef, i32 7
+  %572 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %571, <32 x i8> %74, <16 x i8> %76, i32 2)
+  %573 = extractelement <4 x float> %572, i32 0
+  %574 = extractelement <4 x float> %572, i32 1
+  %575 = extractelement <4 x float> %572, i32 2
+  %576 = fmul float %573, %44
+  %577 = fadd float %576, %553
+  %578 = fmul float %574, %45
+  %579 = fadd float %578, %555
+  %580 = fmul float %575, %46
+  %581 = fadd float %580, %557
+  %582 = call i32 @llvm.SI.packf16(float %577, float %579)
+  %583 = bitcast i32 %582 to float
+  %584 = call i32 @llvm.SI.packf16(float %581, float %283)
+  %585 = bitcast i32 %584 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %583, float %585, float %583, float %585)
+  ret void
+
+ENDIF66:                                          ; preds = %LOOP65
+  %586 = bitcast float %temp28.1 to i32
+  %587 = bitcast float %temp29.1 to i32
+  %588 = insertelement <8 x i32> %237, i32 %586, i32 4
+  %589 = insertelement <8 x i32> %588, i32 %587, i32 5
+  %590 = insertelement <8 x i32> %589, i32 undef, i32 6
+  %591 = insertelement <8 x i32> %590, i32 undef, i32 7
+  %592 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %591, <32 x i8> %62, <16 x i8> %64, i32 2)
+  %593 = extractelement <4 x float> %592, i32 3
+  %594 = fcmp oge float %temp30.1, %593
+  %595 = sext i1 %594 to i32
+  %596 = bitcast i32 %595 to float
+  %597 = bitcast float %596 to i32
+  %598 = and i32 %597, 1065353216
+  %599 = bitcast i32 %598 to float
+  %600 = fmul float 5.000000e-01, %temp32.0
+  %601 = fsub float -0.000000e+00, %600
+  %602 = fmul float %599, %temp32.0
+  %603 = fadd float %602, %601
+  %604 = fmul float %214, %603
+  %605 = fadd float %604, %temp28.1
+  %606 = fmul float %215, %603
+  %607 = fadd float %606, %temp29.1
+  %608 = fmul float %216, %603
+  %609 = fadd float %608, %temp30.1
+  %610 = fadd float %temp24.1, 1.000000e+00
+  %611 = fmul float %temp32.0, 5.000000e-01
+  br label %LOOP65
+}
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
+
+; Function Attrs: readnone
+declare i32 @llvm.SI.tid() #2
+
+; Function Attrs: readonly
+declare float @ceil(float) #3
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.rsq(float) #2
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #1
+
+; Function Attrs: readnone
+declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
+
+; Function Attrs: readnone
+declare float @fabs(float) #2
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
+
+; Function Attrs: nounwind readonly
+declare float @llvm.pow.f32(float, float) #4
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.SI.packf16(float, float) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { readnone }
+attributes #3 = { readonly }
+attributes #4 = { nounwind readonly }
+
+!0 = metadata !{metadata !"const", null, i32 1}
--- a/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
+++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
@ -1,10 +1,20 @@
-; RUN: llc -march=sparc < %s | FileCheck %s
+; RUN: llc -march=sparc   < %s | FileCheck %s --check-prefix=V8
+; RUN: llc -march=sparcv9 < %s | FileCheck %s --check-prefix=SPARC64
+
+; V8-LABEL: variable_alloca_with_adj_call_stack
+; V8:       save %sp, -96, %sp
+; V8:       add {{.+}}, 96, %o0
+; V8:       add %sp, -16, %sp
+; V8:       call foo
+; V8:       add %sp, 16, %sp
+
+; SPARC64-LABEL: variable_alloca_with_adj_call_stack
+; SPARC64:       save %sp, -128, %sp
+; SPARC64:       add {{.+}}, 128, %o0
+; SPARC64:       add %sp, -80, %sp
+; SPARC64:       call foo
+; SPARC64:       add %sp, 80, %sp

-; CHECK: variable_alloca_with_adj_call_stack
-; CHECK: save %sp, -96, %sp
-; CHECK: add %sp, -16, %sp
-; CHECK: call foo
-; CHECK: add %sp, 16, %sp
 define void @variable_alloca_with_adj_call_stack(i32 %num) {
 entry:
  %0 = alloca i8, i32 %num, align 8
--- a/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/64abi.ll
+++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/64abi.ll
@ -380,8 +380,6 @@ define signext i32 @ret_nozext(i32 signext %a0) {
 ; CHECK-LABEL: test_register_directive
 ; CHECK:       .register %g2, #scratch
 ; CHECK:       .register %g3, #scratch
-; CHECK:       .register %g6, #ignore
-; CHECK:       .register %g7, #ignore
 ; CHECK:       add %i0, 2, %g2
 ; CHECK:       add %i0, 3, %g3
 define i32 @test_register_directive(i32 %i0) {
@ -392,3 +390,24 @@ entry:
  %2 = add nsw i32 %0, %1
  ret i32 %2
 }
+
+; CHECK-LABEL: test_large_stack
+
+; CHECK:       sethi 16, %g1
+; CHECK:       xor %g1, -176, %g1
+; CHECK:       save %sp, %g1, %sp
+
+; CHECK:       sethi 14, %g1
+; CHECK:       xor %g1, -1, %g1
+; CHECK:       add %g1, %fp, %g1
+; CHECK:       call use_buf
+
+define i32 @test_large_stack() {
+entry:
+  %buffer1 = alloca [16384 x i8], align 8
+  %buffer1.sub = getelementptr inbounds [16384 x i8]* %buffer1, i32 0, i32 0
+  %0 = call i32 @use_buf(i32 16384, i8* %buffer1.sub)
+  ret i32 %0
+}
+
+declare i32 @use_buf(i32, i8*)
--- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/insert-06.ll
+++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/insert-06.ll
@ -165,3 +165,16 @@ define i64 @f13(i64 %a, i32 %b) {
  %or = or i64 %shift, %low
  ret i64 %or
 }
+
+; We previously wrongly removed the upper AND as dead.
+define i64 @f14(i64 %a, i64 %b) {
+; CHECK-LABEL: f14:
+; CHECK: risbg {{%r[0-5]}}, %r2, 6, 134, 0
+; CHECK: br %r14
+  %and1 = and i64 %a, 144115188075855872
+  %and2 = and i64 %b, 15
+  %or = or i64 %and1, %and2
+  %res = icmp eq i64 %or, 0
+  %ext = sext i1 %res to i64
+  ret i64 %ext
+}
--- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/pr18054.ll
+++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/pr18054.ll
@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=penryn | FileCheck %s
+
+define void @foo(<16 x i32>* %p, <16 x i1> %x) {
+  %ret = sext <16 x i1> %x to <16 x i32>
+  store <16 x i32> %ret, <16 x i32>* %p
+  ret void
+; CHECK: foo
+; CHECK-NOT: pmovsxbd
+; CHECK: ret
+}
--- a/external/bsd/llvm/dist/llvm/test/MC/AArch64/neon-diagnostics.s
+++ b/external/bsd/llvm/dist/llvm/test/MC/AArch64/neon-diagnostics.s
@ -7141,3 +7141,178 @@
 // CHECK-ERROR: error: invalid number of vectors
 // CHECK-ERROR:        tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b
 // CHECK-ERROR:                                                    ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Lower Precision Narrow, Rounding To
+// Odd
+//----------------------------------------------------------------------
+
+    fcvtxn s0, s1
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtxn s0, s1
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
+// With Ties To Away
+//----------------------------------------------------------------------
+
+    fcvtas s0, d0
+    fcvtas d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtas s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtas d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding To
+// Nearest With Ties To Away
+//----------------------------------------------------------------------
+
+    fcvtau s0, d0
+    fcvtau d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtau s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtau d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding Toward
+// Minus Infinity
+//----------------------------------------------------------------------
+
+    fcvtms s0, d0
+    fcvtms d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtms s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtms d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
+// Minus Infinity
+//----------------------------------------------------------------------
+
+    fcvtmu s0, d0
+    fcvtmu d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtmu s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtmu d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
+// With Ties To Even
+//----------------------------------------------------------------------
+
+    fcvtns s0, d0
+    fcvtns d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtns s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtns d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding To
+// Nearest With Ties To Even
+//----------------------------------------------------------------------
+
+    fcvtnu s0, d0
+    fcvtnu d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtnu s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtnu d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding Toward
+// Positive Infinity
+//----------------------------------------------------------------------
+
+    fcvtps s0, d0
+    fcvtps d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtps s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtps d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
+// Positive Infinity
+//----------------------------------------------------------------------
+
+    fcvtpu s0, d0
+    fcvtpu d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtpu s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtpu d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding Toward Zero
+//----------------------------------------------------------------------
+
+    fcvtzs s0, d0
+    fcvtzs d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtzs s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtzs d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward 
+// Zero
+//----------------------------------------------------------------------
+
+    fcvtzu s0, d0
+    fcvtzu d0, s0
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtzu s0, d0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fcvtzu d0, s0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Absolute Difference
+//----------------------------------------------------------------------
+
+
+    fabd s29, d24, s20
+    fabd d29, s24, d20
+
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fabd s29, d24, s20
+// CHECK-ERROR:                  ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:        fabd d29, s24, d20
+// CHECK-ERROR:                  ^
--- a/external/bsd/llvm/dist/llvm/test/MC/AArch64/neon-scalar-abs.s
+++ b/external/bsd/llvm/dist/llvm/test/MC/AArch64/neon-scalar-abs.s
@ -9,6 +9,16 @@
    abs d29, d24

 // CHECK: abs d29, d24    // encoding: [0x1d,0xbb,0xe0,0x5e]
+        
+//----------------------------------------------------------------------
+// Scalar Floating-point Absolute Difference
+//----------------------------------------------------------------------
+
+    fabd s29, s24, s20
+    fabd d29, d24, d20
+
+// CHECK: fabd s29, s24, s20  // encoding: [0x1d,0xd7,0xb4,0x7e]
+// CHECK: fabd d29, d24, d20  // encoding: [0x1d,0xd7,0xf4,0x7e]

 //----------------------------------------------------------------------
 // Scalar Signed Saturating Absolute Value
--- a/external/bsd/llvm/dist/llvm/test/MC/AArch64/neon-scalar-cvt.s
+++ b/external/bsd/llvm/dist/llvm/test/MC/AArch64/neon-scalar-cvt.s
@ -61,3 +61,121 @@

 // CHECK: fcvtzu s21, s12, #1  // encoding: [0x95,0xfd,0x3f,0x7f]
 // CHECK: fcvtzu d21, d12, #1  // encoding: [0x95,0xfd,0x7f,0x7f]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Lower Precision Narrow, Rounding To
+// Odd
+//----------------------------------------------------------------------
+
+    fcvtxn s22, d13
+
+// CHECK: fcvtxn s22, d13    // encoding: [0xb6,0x69,0x61,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
+// With Ties To Away
+//----------------------------------------------------------------------
+
+    fcvtas s12, s13
+    fcvtas d21, d14
+
+// CHECK: fcvtas s12, s13    // encoding: [0xac,0xc9,0x21,0x5e]
+// CHECK: fcvtas d21, d14    // encoding: [0xd5,0xc9,0x61,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding To
+// Nearest With Ties To Away
+//----------------------------------------------------------------------
+
+    fcvtau s12, s13
+    fcvtau d21, d14
+
+// CHECK: fcvtau s12, s13    // encoding: [0xac,0xc9,0x21,0x7e]
+// CHECK: fcvtau d21, d14    // encoding: [0xd5,0xc9,0x61,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding Toward
+// Minus Infinity
+//----------------------------------------------------------------------
+
+    fcvtms s22, s13
+    fcvtms d21, d14
+
+// CHECK: fcvtms s22, s13    // encoding: [0xb6,0xb9,0x21,0x5e]
+// CHECK: fcvtms d21, d14    // encoding: [0xd5,0xb9,0x61,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
+// Minus Infinity
+//----------------------------------------------------------------------
+
+    fcvtmu s12, s13
+    fcvtmu d21, d14
+
+// CHECK: fcvtmu s12, s13    // encoding: [0xac,0xb9,0x21,0x7e]
+// CHECK: fcvtmu d21, d14    // encoding: [0xd5,0xb9,0x61,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
+// With Ties To Even
+//----------------------------------------------------------------------
+
+    fcvtns s22, s13
+    fcvtns d21, d14
+
+// CHECK: fcvtns s22, s13    // encoding: [0xb6,0xa9,0x21,0x5e]
+// CHECK: fcvtns d21, d14    // encoding: [0xd5,0xa9,0x61,0x5e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding To
+// Nearest With Ties To Even
+//----------------------------------------------------------------------
+
+    fcvtnu s12, s13
+    fcvtnu d21, d14
+
+// CHECK: fcvtnu s12, s13    // encoding: [0xac,0xa9,0x21,0x7e]
+// CHECK: fcvtnu d21, d14    // encoding: [0xd5,0xa9,0x61,0x7e]
+        
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding Toward
+// Positive Infinity
+//----------------------------------------------------------------------
+
+    fcvtps s22, s13
+    fcvtps d21, d14
+
+// CHECK: fcvtps s22, s13    // encoding: [0xb6,0xa9,0xa1,0x5e]
+// CHECK: fcvtps d21, d14    // encoding: [0xd5,0xa9,0xe1,0x5e]
+        
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
+// Positive Infinity
+//----------------------------------------------------------------------
+
+    fcvtpu s12, s13
+    fcvtpu d21, d14
+
+// CHECK: fcvtpu s12, s13    // encoding: [0xac,0xa9,0xa1,0x7e]
+// CHECK: fcvtpu d21, d14    // encoding: [0xd5,0xa9,0xe1,0x7e]
+
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Signed Integer, Rounding Toward Zero
+//----------------------------------------------------------------------
+
+    fcvtzs s12, s13
+    fcvtzs d21, d14
+
+// CHECK: fcvtzs s12, s13    // encoding: [0xac,0xb9,0xa1,0x5e]
+// CHECK: fcvtzs d21, d14    // encoding: [0xd5,0xb9,0xe1,0x5e]
+        
+//----------------------------------------------------------------------
+// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward 
+// Zero
+//----------------------------------------------------------------------
+
+    fcvtzu s12, s13
+    fcvtzu d21, d14
+
+// CHECK: fcvtzu s12, s13    // encoding: [0xac,0xb9,0xa1,0x7e]
+// CHECK: fcvtzu d21, d14    // encoding: [0xd5,0xb9,0xe1,0x7e]
--- a/external/bsd/llvm/dist/llvm/test/MC/ARM/align_arm_2_thumb.s
+++ b/external/bsd/llvm/dist/llvm/test/MC/ARM/align_arm_2_thumb.s
@ -0,0 +1,15 @@
+@ RUN: llvm-mc -triple armv7-none-linux -filetype=obj -o %t.o %s
+@ RUN: llvm-objdump -triple thumbv7-none-linux -d %t.o | FileCheck --check-prefix=ARM_2_THUMB %s
+
+@ RUN: llvm-mc -triple armv7-apple-darwin -filetype=obj -o %t_darwin.o %s
+@ RUN: llvm-objdump -triple thumbv7-apple-darwin -d %t_darwin.o | FileCheck --check-prefix=ARM_2_THUMB %s
+
+.syntax unified
+.code 16
+@ ARM_2_THUMB-LABEL: foo
+foo:
+  add r0, r0
+.align 3
+@ ARM_2_THUMB: 2: 00 bf     nop
+  add r0, r0
+
--- a/external/bsd/llvm/dist/llvm/test/MC/ARM/align_thumb_2_arm.s
+++ b/external/bsd/llvm/dist/llvm/test/MC/ARM/align_thumb_2_arm.s
@ -0,0 +1,15 @@
+@ RUN: llvm-mc -triple thumbv7-none-linux -filetype=obj -o %t.o %s
+@ RUN: llvm-objdump -triple armv7-none-linux -d %t.o | FileCheck --check-prefix=THUMB_2_ARM %s
+
+@ RUN: llvm-mc -triple thumbv7-apple-darwin -filetype=obj -o %t_darwin.o %s
+@ RUN: llvm-objdump -triple armv7-apple-darwin -d %t_darwin.o | FileCheck --check-prefix=THUMB_2_ARM %s
+
+.syntax unified
+.code 32
+@ THUMB_2_ARM-LABEL: foo
+foo:
+  add r0, r0
+.align 3
+@ THUMB_2_ARM: 4: 00 f0 20 e3    nop
+  add r0, r0
+
--- a/external/bsd/llvm/dist/llvm/test/MC/COFF/eh-frame.s
+++ b/external/bsd/llvm/dist/llvm/test/MC/COFF/eh-frame.s
@ -0,0 +1,14 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s -o - | llvm-readobj -s | FileCheck %s
+
+	.def	 _main;
+	.scl	2;
+	.type	32;
+	.endef
+	.text
+	.globl	_main
+_main:
+	.cfi_startproc
+	ret
+	.cfi_endproc
+
+// CHECK:    Name: .eh_frame
--- a/external/bsd/llvm/dist/llvm/test/MC/COFF/section-comdat.s
+++ b/external/bsd/llvm/dist/llvm/test/MC/COFF/section-comdat.s
@ -0,0 +1,188 @@
+// RUN: llvm-mc -triple i386-pc-win32 -filetype=obj %s | llvm-readobj -s -t | FileCheck %s
+// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s -t | FileCheck %s
+
+.section assocSec
+.linkonce
+.long 1
+
+.section secName, "dr", discard, "Symbol1"
+.globl Symbol1
+Symbol1:
+.long 1
+
+.section secName, "dr", one_only, "Symbol2"
+.globl Symbol2
+Symbol2:
+.long 1
+
+.section SecName, "dr", same_size, "Symbol3"
+.globl Symbol3
+Symbol3:
+.long 1
+
+.section SecName, "dr", same_contents, "Symbol4"
+.globl Symbol4
+Symbol4:
+.long 1
+
+.section SecName, "dr", associative assocSec, "Symbol5"
+.globl Symbol5
+Symbol5:
+.long 1
+
+.section SecName, "dr", largest, "Symbol6"
+.globl Symbol6
+Symbol6:
+.long 1
+
+.section SecName, "dr", newest, "Symbol7"
+.globl Symbol7
+Symbol7:
+.long 1
+
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Number: 1
+// CHECK:     Name: assocSec
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 2
+// CHECK:     Name: secName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 3
+// CHECK:     Name: secName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 4
+// CHECK:     Name: SecName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 5
+// CHECK:     Name: SecName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 6
+// CHECK:     Name: SecName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 7
+// CHECK:     Name: SecName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: 8
+// CHECK:     Name: SecName
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: assocSec
+// CHECK:     Section: assocSec (1)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: Any
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: secName
+// CHECK:     Section: secName (2)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: Any
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: secName
+// CHECK:     Section: secName (3)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: NoDuplicates
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: SecName
+// CHECK:     Section: SecName (4)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: SameSize
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: SecName
+// CHECK:     Section: SecName (5)
+// CHECK:     AuxSymbolCount: 1
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: ExactMatch
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: SecName
+// CHECK:     Section: SecName (6)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: Associative
+// CHECK:       AssocSection: assocSec (1)
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: SecName
+// CHECK:     Section: SecName (7)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: Largest
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: SecName
+// CHECK:     Section: SecName (8)
+// CHECK:     AuxSectionDef {
+// CHECK:       Selection: Newest (0x7)
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol1
+// CHECK:     Section: secName (2)
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol2
+// CHECK:     Section: secName (3)
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol3
+// CHECK:     Section: SecName (4)
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol4
+// CHECK:     Section: SecName (5)
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol5
+// CHECK:     Section: SecName (6)
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol6
+// CHECK:     Section: SecName (7)
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: Symbol7
+// CHECK:     Section: SecName (8)
+// CHECK:   }
+// CHECK: ]
--- a/external/bsd/llvm/dist/llvm/test/MC/Disassembler/AArch64/neon-instructions.txt
+++ b/external/bsd/llvm/dist/llvm/test/MC/Disassembler/AArch64/neon-instructions.txt
@ -674,6 +674,23 @@
 0xf5 0xdd 0x23 0x4e
 0xab 0xdc 0x77 0x4e

+#----------------------------------------------------------------------
+# Vector Shift Left long 
+#----------------------------------------------------------------------
+# CHECK: shll2	v2.8h, v4.16b, #8
+# CHECK: shll2	v6.4s, v8.8h, #16
+# CHECK: shll2	v6.2d, v8.4s, #32
+# CHECK: shll	v2.8h, v4.8b, #8
+# CHECK: shll	v6.4s, v8.4h, #16
+# CHECK: shll	v6.2d, v8.2s, #32
+
+0x82,0x38,0x21,0x6e
+0x06,0x39,0x61,0x6e
+0x06,0x39,0xa1,0x6e
+0x82,0x38,0x21,0x2e
+0x06,0x39,0x61,0x2e
+0x06,0x39,0xa1,0x2e
+
 #----------------------------------------------------------------------
 # Vector Shift Left by Immediate
 #----------------------------------------------------------------------
@ -2129,7 +2146,8 @@
 # CHECK: ld1 {v0.b}[9], [x0], #1
 # CHECK: ld2 {v15.h, v16.h}[7], [x15], #4
 # CHECK: ld3 {v31.s, v0.s, v1.s}[3], [sp], x3
-# CHECK: ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #24
+# CHECK: ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32
+# CHECK: ld4 {v0.h, v1.h, v2.h, v3.h}[7], [x0], x0
 # CHECK: st1 {v0.d}[1], [x0], #8
 # CHECK: st2 {v31.s, v0.s}[3], [sp], #8
 # CHECK: st3 {v15.h, v16.h, v17.h}[7], [x15], #6
@ -2138,6 +2156,7 @@
 0xef,0x59,0xff,0x4d
 0xff,0xb3,0xc3,0x4d
 0x00,0xa4,0xff,0x4d
+0x00,0x78,0xe0,0x4d
 0x00,0x84,0x9f,0x4d
 0xff,0x93,0xbf,0x4d
 0xef,0x79,0x9f,0x4d
@ -2510,3 +2529,110 @@
 # CHECK: tbx v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
 # CHECK: tbx v16.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.16b

+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Lower Precision Narrow, Rounding To
+# Odd
+#----------------------------------------------------------------------
+# CHECK: fcvtxn s22, d13
+0xb6,0x69,0x61,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
+# With Ties To Away
+#----------------------------------------------------------------------
+# CHECK: fcvtas s12, s13
+# CHECK: fcvtas d21, d14
+
+0xac,0xc9,0x21,0x5e
+0xd5,0xc9,0x61,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Unsigned Integer, Rounding To
+# Nearest With Ties To Away
+#----------------------------------------------------------------------
+# CHECK: fcvtau s12, s13
+# CHECK: fcvtau d21, d14
+0xac,0xc9,0x21,0x7e
+0xd5,0xc9,0x61,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Signed Integer, Rounding Toward
+# Minus Infinity
+#----------------------------------------------------------------------
+# CHECK: fcvtms s22, s13
+# CHECK: fcvtms d21, d14
+0xb6,0xb9,0x21,0x5e
+0xd5,0xb9,0x61,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
+# Minus Infinity
+#----------------------------------------------------------------------
+# CHECK: fcvtmu s12, s13
+# CHECK: fcvtmu d21, d14
+0xac,0xb9,0x21,0x7e
+0xd5,0xb9,0x61,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
+# With Ties To Even
+#----------------------------------------------------------------------
+
+# CHECK: fcvtns s22, s13
+# CHECK: fcvtns d21, d14
+
+0xb6,0xa9,0x21,0x5e
+0xd5,0xa9,0x61,0x5e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Unsigned Integer, Rounding To
+# Nearest With Ties To Even
+#----------------------------------------------------------------------
+
+# CHECK: fcvtnu s12, s13
+# CHECK: fcvtnu d21, d14
+0xac,0xa9,0x21,0x7e
+0xd5,0xa9,0x61,0x7e
+        
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Signed Integer, Rounding Toward
+# Positive Infinity
+#----------------------------------------------------------------------
+# CHECK: fcvtps s22, s13
+# CHECK: fcvtps d21, d14
+0xb6,0xa9,0xa1,0x5e
+0xd5,0xa9,0xe1,0x5e
+        
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
+# Positive Infinity
+#----------------------------------------------------------------------
+# CHECK: fcvtpu s12, s13
+# CHECK: fcvtpu d21, d14
+0xac,0xa9,0xa1,0x7e
+0xd5,0xa9,0xe1,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Signed Integer, Rounding Toward Zero
+#----------------------------------------------------------------------
+# CHECK: fcvtzs s12, s13
+# CHECK: fcvtzs d21, d14
+0xac,0xb9,0xa1,0x5e
+0xd5,0xb9,0xe1,0x5e
+        
+#----------------------------------------------------------------------
+# Scalar Floating-point Convert To Unsigned Integer, Rounding Toward 
+# Zero
+#----------------------------------------------------------------------
+# CHECK: fcvtzu s12, s13
+# CHECK: fcvtzu d21, d14
+0xac,0xb9,0xa1,0x7e
+0xd5,0xb9,0xe1,0x7e
+
+#----------------------------------------------------------------------
+# Scalar Floating-point Absolute Difference
+#----------------------------------------------------------------------
+# CHECK: fabd s29, s24, s20
+# CHECK: fabd d29, d24, d20
+0x1d,0xd7,0xb4,0x7e
+0x1d,0xd7,0xf4,0x7e
--- a/external/bsd/llvm/dist/llvm/test/MC/Mips/micromips-long-branch.ll
+++ b/external/bsd/llvm/dist/llvm/test/MC/Mips/micromips-long-branch.ll
--- a/external/bsd/llvm/dist/llvm/test/Transforms/InstCombine/pr17827.ll
+++ b/external/bsd/llvm/dist/llvm/test/Transforms/InstCombine/pr17827.ll
@ -0,0 +1,74 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; With left shift, the comparison should not be modified.
+; CHECK-LABEL: @test_shift_and_cmp_not_changed1(
+; CHECK: icmp slt i8 %andp, 32
+define i1 @test_shift_and_cmp_not_changed1(i8 %p) #0 {
+entry:
+  %shlp = shl i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
+; With arithmetic right shift, the comparison should not be modified.
+; CHECK-LABEL: @test_shift_and_cmp_not_changed2(
+; CHECK: icmp slt i8 %andp, 32
+define i1 @test_shift_and_cmp_not_changed2(i8 %p) #0 {
+entry:
+  %shlp = ashr i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
+; This should simplify functionally to the left shift case.
+; The extra input parameter should be optimized away.
+; CHECK-LABEL: @test_shift_and_cmp_changed1(
+; CHECK:  %andp = shl i8 %p, 5
+; CHECK-NEXT: %shl = and i8 %andp, -64
+; CHECK-NEXT:  %cmp = icmp slt i8 %shl, 32
+define i1 @test_shift_and_cmp_changed1(i8 %p, i8 %q) #0 {
+entry:
+  %andp = and i8 %p, 6
+  %andq = and i8 %q, 8
+  %or = or i8 %andq, %andp
+  %shl = shl i8 %or, 5
+  %ashr = ashr i8 %shl, 5
+  %cmp = icmp slt i8 %ashr, 1
+  ret i1 %cmp
+}
+
+; Unsigned compare allows a transformation to compare against 0.
+; CHECK-LABEL: @test_shift_and_cmp_changed2(
+; CHECK: icmp eq i8 %andp, 0
+define i1 @test_shift_and_cmp_changed2(i8 %p) #0 {
+entry:
+  %shlp = shl i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp ult i8 %andp, 32
+  ret i1 %cmp
+}
+
+; nsw on the shift should not affect the comparison.
+; CHECK-LABEL: @test_shift_and_cmp_changed3(
+; CHECK: icmp slt i8 %andp, 32
+define i1 @test_shift_and_cmp_changed3(i8 %p) #0 {
+entry:
+  %shlp = shl nsw i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
+; Logical shift right allows a return true because the 'and' guarantees no bits are set.
+; CHECK-LABEL: @test_shift_and_cmp_changed4(
+; CHECK: ret i1 true
+define i1 @test_shift_and_cmp_changed4(i8 %p) #0 {
+entry:
+  %shlp = lshr i8 %p, 5
+  %andp = and i8 %shlp, -64
+  %cmp = icmp slt i8 %andp, 32
+  ret i1 %cmp
+}
+
--- a/external/bsd/llvm/dist/llvm/test/Transforms/LoopVectorize/X86/tripcount.ll
+++ b/external/bsd/llvm/dist/llvm/test/Transforms/LoopVectorize/X86/tripcount.ll
@ -0,0 +1,39 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -mcpu=prescott < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-unknown-freebsd11.0"
+
+@big = external global [0 x i32]
+
+; PR18049
+; We need to truncate the exit count to i32. This is legal because the
+; arithmetic is signed (%inc is nsw).
+
+; CHECK-LABEL: tripcount
+; CHECK: trunc i64 %count to i32
+
+define void @tripcount(i64 %count) {
+entry:
+  %cmp6 = icmp sgt i64 %count, 0
+  br i1 %cmp6, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds [0 x i32]* @big, i32 0, i32 %i.07
+  %0 = load i32* %arrayidx, align 4
+  %neg = xor i32 %0, -1
+  store i32 %neg, i32* %arrayidx, align 4
+  %inc = add nsw i32 %i.07, 1
+  %conv = sext i32 %inc to i64
+  %cmp = icmp slt i64 %conv, %count
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
--- a/external/bsd/llvm/dist/llvm/test/Transforms/MergeFunc/ptr-int-transitivity-1.ll
+++ b/external/bsd/llvm/dist/llvm/test/Transforms/MergeFunc/ptr-int-transitivity-1.ll
@ -0,0 +1,21 @@
+; RUN: opt -S -mergefunc < %s | not grep "functions merged"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @stuff()
+
+define void @f0(i64 %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
+define void @f2(i64 addrspace(1)* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
--- a/external/bsd/llvm/dist/llvm/test/Transforms/MergeFunc/ptr-int-transitivity-2.ll
+++ b/external/bsd/llvm/dist/llvm/test/Transforms/MergeFunc/ptr-int-transitivity-2.ll
@ -0,0 +1,25 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @stuff()
+
+define void @f0(i64 %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
+; CHECK-LABEL: @f0
+; CHECK:  %2 = ptrtoint i64* %0 to i64
+; CHECK:  tail call void @f0(i64 %2)
+; CHECK:  ret void
+define void @f1(i64 addrspace(0)* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
--- a/external/bsd/llvm/dist/llvm/test/Transforms/MergeFunc/ptr-int-transitivity-3.ll
+++ b/external/bsd/llvm/dist/llvm/test/Transforms/MergeFunc/ptr-int-transitivity-3.ll
@ -0,0 +1,21 @@
+; RUN: opt -S -mergefunc < %s | not grep "functions merged"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @stuff()
+
+define void @f0(i64 addrspace(0)* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
+define void @f2(i64 addrspace(1)* %p0) {
+entry:
+  call void @stuff()
+  call void @stuff()
+  call void @stuff()
+  ret void
+}
+
--- a/external/bsd/llvm/dist/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
+++ b/external/bsd/llvm/dist/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
@ -59,3 +59,38 @@ for.end:                                          ; preds = %for.body
  ret double %mul3
 }

+; A need-to-gather entry cannot be an external use of the scalar element.
+; Instead the insertelement instructions of the need-to-gather entry are the
+; external users.
+; This test would assert because we would keep the scalar fpext and fadd alive.
+; PR18129
+
+; CHECK-LABEL: needtogather
+define i32 @needtogather(double *noalias %a, i32 *noalias %b,  float * noalias %c,
+                i32 * noalias %d) {
+entry:
+  %0 = load i32* %d, align 4
+  %conv = sitofp i32 %0 to float
+  %1 = load float* %c
+  %sub = fsub float 0.000000e+00, %1
+  %mul = fmul float %sub, 0.000000e+00
+  %add = fadd float %conv, %mul
+  %conv1 = fpext float %add to double
+  %sub3 = fsub float 1.000000e+00, %1
+  %mul4 = fmul float %sub3, 0.000000e+00
+  %add5 = fadd float %conv, %mul4
+  %conv6 = fpext float %add5 to double
+  %tobool = fcmp une float %add, 0.000000e+00
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+  br label %if.end
+
+if.end:
+  %storemerge = phi double [ %conv6, %if.then ], [ %conv1, %entry ]
+  %e.0 = phi double [ %conv1, %if.then ], [ %conv6, %entry ]
+  store double %storemerge, double* %a, align 8
+  %conv7 = fptosi double %e.0 to i32
+  store i32 %conv7, i32* %b, align 4
+  ret i32 undef
+}
--- a/external/bsd/llvm/dist/llvm/test/Transforms/SLPVectorizer/X86/pr18060.ll
+++ b/external/bsd/llvm/dist/llvm/test/Transforms/SLPVectorizer/X86/pr18060.ll
@ -0,0 +1,47 @@
+; RUN: opt < %s -slp-vectorizer -S -mtriple=i386-pc-linux
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-pc-linux"
+
+; Function Attrs: nounwind
+define i32 @_Z16adjustFixupValueyj(i64 %Value, i32 %Kind) {
+entry:
+  %extract.t = trunc i64 %Value to i32
+  %extract = lshr i64 %Value, 12
+  %extract.t6 = trunc i64 %extract to i32
+  switch i32 %Kind, label %sw.default [
+    i32 0, label %return
+    i32 1, label %return
+    i32 129, label %sw.bb1
+    i32 130, label %sw.bb2
+  ]
+
+sw.default:                                       ; preds = %entry
+  call void @_Z25llvm_unreachable_internalv()
+  unreachable
+
+sw.bb1:                                           ; preds = %entry
+  %shr = lshr i64 %Value, 16
+  %extract.t5 = trunc i64 %shr to i32
+  %extract7 = lshr i64 %Value, 28
+  %extract.t8 = trunc i64 %extract7 to i32
+  br label %sw.bb2
+
+sw.bb2:                                           ; preds = %sw.bb1, %entry
+  %Value.addr.0.off0 = phi i32 [ %extract.t, %entry ], [ %extract.t5, %sw.bb1 ]
+  %Value.addr.0.off12 = phi i32 [ %extract.t6, %entry ], [ %extract.t8, %sw.bb1 ]
+  %conv6 = and i32 %Value.addr.0.off0, 4095
+  %conv4 = shl i32 %Value.addr.0.off12, 16
+  %shl = and i32 %conv4, 983040
+  %or = or i32 %shl, %conv6
+  %or11 = or i32 %or, 8388608
+  br label %return
+
+return:                                           ; preds = %sw.bb2, %entry, %entry
+  %retval.0 = phi i32 [ %or11, %sw.bb2 ], [ %extract.t, %entry ], [ %extract.t, %entry ]
+  ret i32 %retval.0
+}
+
+; Function Attrs: noreturn
+declare void @_Z25llvm_unreachable_internalv()
+
--- a/external/bsd/llvm/dist/llvm/tools/gold/README.txt
+++ b/external/bsd/llvm/dist/llvm/tools/gold/README.txt
@ -1,21 +1,13 @@
+The LLVM Gold LTO Plugin
+========================
+
 This directory contains a plugin that is designed to work with binutils
 gold linker. At present time, this is not the default linker in
 binutils, and the default build of gold does not support plugins.

-Obtaining binutils:
+See docs/GoldPlugin.html for complete build and usage instructions.

-  cvs -z 9 -d :pserver:anoncvs@sourceware.org:/cvs/src login
-  {enter "anoncvs" as the password}
-  cvs -z 9 -d :pserver:anoncvs@sourceware.org:/cvs/src co binutils
-
-This will create a src/ directory. Make a build/ directory and from
-there configure binutils with "../src/configure --enable-gold --enable-plugins".
-Then build binutils with "make all-gold".
-
-To build the LLVMgold plugin, configure LLVM with the option
--with-binutils-include=/path/to/binutils/src/include/ --enable-pic. To use the
-plugin, run "ld-new --plugin /path/to/LLVMgold.so".
-Without PIC libLTO and LLVMgold are not being built (because they would fail
-link on x86-64 with a relocation error: PIC and non-PIC can't be combined).
+NOTE: libLTO and LLVMgold aren't built without PIC because they would fail
+to link on x86-64 with a relocation error: PIC and non-PIC can't be combined.
 As an alternative to passing --enable-pic, you can use 'make ENABLE_PIC=1' in
 your entire LLVM build.