Import LLVM 3.4 RC2 r196603.
Bug fixes, most noticable for inconsistencies in vectorized code.
This commit is contained in:
parent
1d57ee3e45
commit
49d3a2eb83
4
external/bsd/llvm/dist/llvm/CODE_OWNERS.TXT
vendored
4
external/bsd/llvm/dist/llvm/CODE_OWNERS.TXT
vendored
@ -109,6 +109,10 @@ N: Nadav Rotem
|
||||
E: nrotem@apple.com
|
||||
D: X86 Backend, Loop Vectorizer
|
||||
|
||||
N: Daniel Sanders
|
||||
E: daniel.sanders@imgtec.com
|
||||
D: MIPS Backend (lib/Target/Mips/*)
|
||||
|
||||
N: Richard Sandiford
|
||||
E: rsandifo@linux.vnet.ibm.com
|
||||
D: SystemZ Backend
|
||||
|
@ -78,6 +78,15 @@ Compile.CMI := $(strip $(OCAMLC) -c $(OCAMLCFLAGS) $(OCAMLDEBUGFLAG) -o)
|
||||
Compile.CMO := $(strip $(OCAMLC) -c $(OCAMLCFLAGS) $(OCAMLDEBUGFLAG) -o)
|
||||
Compile.CMX := $(strip $(OCAMLOPT) -c $(OCAMLCFLAGS) $(OCAMLDEBUGFLAG) -o)
|
||||
|
||||
ifdef OCAMLSTUBS
|
||||
# Avoid the need for LD_LIBRARY_PATH
|
||||
ifneq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
|
||||
ifneq ($(HOST_OS),Darwin)
|
||||
OCAMLRPATH := $(RPATH) -Wl,'$(SharedLibDir)'
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef OCAMLSTUBS
|
||||
Archive.CMA := $(strip $(OCAMLC) -a -dllib -l$(LIBRARYNAME) $(OCAMLDEBUGFLAG) \
|
||||
-o)
|
||||
@ -88,7 +97,8 @@ endif
|
||||
|
||||
ifdef OCAMLSTUBS
|
||||
Archive.CMXA := $(strip $(OCAMLOPT) -a $(patsubst %,-cclib %, \
|
||||
$(LLVMLibsOptions) -l$(LIBRARYNAME)) \
|
||||
$(LLVMLibsOptions) -l$(LIBRARYNAME) \
|
||||
-L$(SharedLibDir) $(OCAMLRPATH)) \
|
||||
$(OCAMLDEBUGFLAG) -o)
|
||||
else
|
||||
Archive.CMXA := $(strip $(OCAMLOPT) -a $(OCAMLAFLAGS) $(OCAMLDEBUGFLAG) -o)
|
||||
@ -233,7 +243,7 @@ uninstall-local:: uninstall-shared
|
||||
|
||||
$(SharedLib): $(ObjectsO) $(OcamlDir)/.dir
|
||||
$(Echo) "Building $(BuildMode) $(notdir $@)"
|
||||
$(Verb) $(Link) $(SharedLinkOptions) $(LLVMLibsOptions) \
|
||||
$(Verb) $(Link) $(SharedLinkOptions) $(OCAMLRPATH) $(LLVMLibsOptions) \
|
||||
-o $@ $(ObjectsO)
|
||||
|
||||
clean-shared::
|
||||
|
38
external/bsd/llvm/dist/llvm/docs/Extensions.rst
vendored
38
external/bsd/llvm/dist/llvm/docs/Extensions.rst
vendored
@ -105,3 +105,41 @@ Supported COMDAT types:
|
||||
.section .xdata$foo
|
||||
.linkonce associative .text$foo
|
||||
...
|
||||
|
||||
``.section`` Directive
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
MC supports passing the information in ``.linkonce`` at the end of
|
||||
``.section``. For example, these two codes are equivalent
|
||||
|
||||
.. code-block:: gas
|
||||
|
||||
.section secName, "dr", discard, "Symbol1"
|
||||
.globl Symbol1
|
||||
Symbol1:
|
||||
.long 1
|
||||
|
||||
.. code-block:: gas
|
||||
|
||||
.section secName, "dr"
|
||||
.linkonce discard
|
||||
.globl Symbol1
|
||||
Symbol1:
|
||||
.long 1
|
||||
|
||||
Note that in the combined form the COMDAT symbol is explict. This
|
||||
extension exits to support multiple sections with the same name in
|
||||
different comdats:
|
||||
|
||||
|
||||
.. code-block:: gas
|
||||
|
||||
.section secName, "dr", discard, "Symbol1"
|
||||
.globl Symbol1
|
||||
Symbol1:
|
||||
.long 1
|
||||
|
||||
.section secName, "dr", discard, "Symbol2"
|
||||
.globl Symbol2
|
||||
Symbol2:
|
||||
.long 1
|
||||
|
28
external/bsd/llvm/dist/llvm/docs/GoldPlugin.rst
vendored
28
external/bsd/llvm/dist/llvm/docs/GoldPlugin.rst
vendored
@ -30,29 +30,22 @@ by running ``/usr/bin/ld -plugin``. If it complains "missing argument" then
|
||||
you have plugin support. If not, such as an "unknown option" error then you
|
||||
will either need to build gold or install a version with plugin support.
|
||||
|
||||
* To build gold with plugin support:
|
||||
* Download, configure and build gold with plugin support:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ mkdir binutils
|
||||
$ cd binutils
|
||||
$ cvs -z 9 -d :pserver:anoncvs@sourceware.org:/cvs/src login
|
||||
{enter "anoncvs" as the password}
|
||||
$ cvs -z 9 -d :pserver:anoncvs@sourceware.org:/cvs/src co binutils
|
||||
$ git clone --depth 1 git://sourceware.org/git/binutils-gdb.git binutils
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
$ ../src/configure --enable-gold --enable-plugins
|
||||
$ ../binutils/configure --enable-gold --enable-plugins --disable-werror
|
||||
$ make all-gold
|
||||
|
||||
That should leave you with ``binutils/build/gold/ld-new`` which supports
|
||||
the ``-plugin`` option. It also built would have
|
||||
``binutils/build/binutils/ar`` and ``nm-new`` which support plugins but
|
||||
don't have a visible -plugin option, instead relying on the gold plugin
|
||||
being present in ``../lib/bfd-plugins`` relative to where the binaries
|
||||
are placed.
|
||||
That should leave you with ``build/gold/ld-new`` which supports
|
||||
the ``-plugin`` option. Running ``make`` will additionally build
|
||||
``build/binutils/ar`` and ``nm-new`` binaries supporting plugins.
|
||||
|
||||
* Build the LLVMgold plugin: Configure LLVM with
|
||||
``--with-binutils-include=/path/to/binutils/src/include`` and run
|
||||
``--with-binutils-include=/path/to/binutils/include`` and run
|
||||
``make``.
|
||||
|
||||
Usage
|
||||
@ -72,9 +65,10 @@ the ``lib`` directory under its prefix and pass the ``-plugin`` option to
|
||||
``ld``. It will not look for an alternate linker, which is why you need
|
||||
gold to be the installed system linker in your path.
|
||||
|
||||
If you want ``ar`` and ``nm`` to work seamlessly as well, install
|
||||
``LLVMgold.so`` to ``/usr/lib/bfd-plugins``. If you built your own gold, be
|
||||
sure to install the ``ar`` and ``nm-new`` you built to ``/usr/bin``.
|
||||
``ar`` and ``nm`` also accept the ``-plugin`` option and it's possible to
|
||||
to install ``LLVMgold.so`` to ``/usr/lib/bfd-plugins`` for a seamless setup.
|
||||
If you built your own gold, be sure to install the ``ar`` and ``nm-new`` you
|
||||
built to ``/usr/bin``.
|
||||
|
||||
|
||||
Example of link time optimization
|
||||
|
4
external/bsd/llvm/dist/llvm/docs/LangRef.rst
vendored
4
external/bsd/llvm/dist/llvm/docs/LangRef.rst
vendored
@ -128,7 +128,9 @@ lexical features of LLVM:
|
||||
#. Unnamed temporaries are created when the result of a computation is
|
||||
not assigned to a named value.
|
||||
#. Unnamed temporaries are numbered sequentially (using a per-function
|
||||
incrementing counter, starting with 0).
|
||||
incrementing counter, starting with 0). Note that basic blocks are
|
||||
included in this numbering. For example, if the entry basic block is not
|
||||
given a label name, then it will get number 0.
|
||||
|
||||
It also shows a convention that we follow in this document. When
|
||||
demonstrating instructions, we will follow an instruction with a comment
|
||||
|
@ -84,6 +84,9 @@ Non-comprehensive list of changes in this release
|
||||
* Different sized pointers for different address spaces should now
|
||||
generally work. This is primarily useful for GPU targets.
|
||||
|
||||
* OCaml bindings have been significantly extended to cover almost all of the
|
||||
LLVM libraries.
|
||||
|
||||
* ... next change ...
|
||||
|
||||
.. NOTE
|
||||
@ -107,6 +110,19 @@ For more information on MSA (including documentation for the instruction set),
|
||||
see the `MIPS SIMD page at Imagination Technologies
|
||||
<http://imgtec.com/mips/mips-simd.asp>`_
|
||||
|
||||
SPARC Target
|
||||
------------
|
||||
|
||||
The SPARC backend got many improvements, namely
|
||||
|
||||
* experimental SPARC V9 backend
|
||||
* JIT support for SPARC
|
||||
* fp128 support
|
||||
* exception handling
|
||||
* TLS support
|
||||
* leaf functions optimization
|
||||
* bug fixes
|
||||
|
||||
External Open Source Projects Using LLVM 3.4
|
||||
============================================
|
||||
|
||||
@ -160,6 +176,23 @@ statically parallelize multiple work-items with the kernel compiler, even in
|
||||
the presence of work-group barriers. This enables static parallelization of
|
||||
the fine-grained static concurrency in the work groups in multiple ways.
|
||||
|
||||
Portable Native Client (PNaCl)
|
||||
------------------------------
|
||||
|
||||
`Portable Native Client (PNaCl) <http://www.chromium.org/nativeclient/pnacl>`_
|
||||
is a Chrome initiative to bring the performance and low-level control of native
|
||||
code to modern web browsers, without sacrificing the security benefits and
|
||||
portability of web applications. PNaCl works by compiling native C and C++ code
|
||||
to an intermediate representation using the LLVM clang compiler. This
|
||||
intermediate representation is a subset of LLVM bytecode that is wrapped into a
|
||||
portable executable, which can be hosted on a web server like any other website
|
||||
asset. When the site is accessed, Chrome fetches and translates the portable
|
||||
executable into an architecture-specific machine code optimized directly for
|
||||
the underlying device. PNaCl lets developers compile their code once to run on
|
||||
any hardware platform and embed their PNaCl application in any website,
|
||||
enabling developers to directly leverage the power of the underlying CPU and
|
||||
GPU.
|
||||
|
||||
TTA-based Co-design Environment (TCE)
|
||||
-------------------------------------
|
||||
|
||||
|
@ -564,6 +564,8 @@ public:
|
||||
unsigned SubReg = 0,
|
||||
bool isDebug = false,
|
||||
bool isInternalRead = false) {
|
||||
assert(!(isDead && !isDef) && "Dead flag on non-def");
|
||||
assert(!(isKill && isDef) && "Kill flag on def");
|
||||
MachineOperand Op(MachineOperand::MO_Register);
|
||||
Op.IsDef = isDef;
|
||||
Op.IsImp = isImp;
|
||||
|
@ -54,6 +54,10 @@ def int_aarch64_neon_fcvtas :
|
||||
Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
|
||||
def int_aarch64_neon_fcvtau :
|
||||
Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
|
||||
def int_aarch64_neon_fcvtzs :
|
||||
Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
|
||||
def int_aarch64_neon_fcvtzu :
|
||||
Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
|
||||
|
||||
// Vector maxNum (Floating Point)
|
||||
def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic;
|
||||
@ -308,6 +312,9 @@ def int_aarch64_neon_vsqadd : Neon_2Arg_Intrinsic;
|
||||
def int_aarch64_neon_vabs :
|
||||
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
|
||||
|
||||
// Scalar Absolute Difference
|
||||
def int_aarch64_neon_vabd : Neon_2Arg_Intrinsic;
|
||||
|
||||
// Scalar Negate Value
|
||||
def int_aarch64_neon_vneg :
|
||||
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
|
||||
|
@ -258,9 +258,15 @@ namespace llvm {
|
||||
|
||||
const MCSectionCOFF *getCOFFSection(StringRef Section,
|
||||
unsigned Characteristics,
|
||||
SectionKind Kind, int Selection = 0,
|
||||
SectionKind Kind,
|
||||
StringRef COMDATSymName,
|
||||
int Selection,
|
||||
const MCSectionCOFF *Assoc = 0);
|
||||
|
||||
const MCSectionCOFF *getCOFFSection(StringRef Section,
|
||||
unsigned Characteristics,
|
||||
SectionKind Kind);
|
||||
|
||||
const MCSectionCOFF *getCOFFSection(StringRef Section);
|
||||
|
||||
/// @}
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "llvm/Support/COFF.h"
|
||||
|
||||
namespace llvm {
|
||||
class MCSymbol;
|
||||
|
||||
/// MCSectionCOFF - This represents a section on Windows
|
||||
class MCSectionCOFF : public MCSection {
|
||||
@ -32,6 +33,11 @@ namespace llvm {
|
||||
/// drawn from the enums below.
|
||||
mutable unsigned Characteristics;
|
||||
|
||||
/// The COMDAT symbol of this section. Only valid if this is a COMDAT
|
||||
/// section. Two COMDAT sections are merged if they have the same
|
||||
/// COMDAT symbol.
|
||||
const MCSymbol *COMDATSymbol;
|
||||
|
||||
/// Selection - This is the Selection field for the section symbol, if
|
||||
/// it is a COMDAT section (Characteristics & IMAGE_SCN_LNK_COMDAT) != 0
|
||||
mutable int Selection;
|
||||
@ -44,9 +50,11 @@ namespace llvm {
|
||||
private:
|
||||
friend class MCContext;
|
||||
MCSectionCOFF(StringRef Section, unsigned Characteristics,
|
||||
int Selection, const MCSectionCOFF *Assoc, SectionKind K)
|
||||
: MCSection(SV_COFF, K), SectionName(Section),
|
||||
Characteristics(Characteristics), Selection(Selection), Assoc(Assoc) {
|
||||
const MCSymbol *COMDATSymbol, int Selection,
|
||||
const MCSectionCOFF *Assoc, SectionKind K)
|
||||
: MCSection(SV_COFF, K), SectionName(Section),
|
||||
Characteristics(Characteristics), COMDATSymbol(COMDATSymbol),
|
||||
Selection(Selection), Assoc(Assoc) {
|
||||
assert ((Characteristics & 0x00F00000) == 0 &&
|
||||
"alignment must not be set upon section creation");
|
||||
assert ((Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) ==
|
||||
|
@ -3342,7 +3342,6 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
|
||||
unsigned OpSizeInBits = VT.getSizeInBits();
|
||||
SDValue LHSShiftArg = LHSShift.getOperand(0);
|
||||
SDValue LHSShiftAmt = LHSShift.getOperand(1);
|
||||
SDValue RHSShiftArg = RHSShift.getOperand(0);
|
||||
SDValue RHSShiftAmt = RHSShift.getOperand(1);
|
||||
|
||||
// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
|
||||
@ -3402,32 +3401,10 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
|
||||
// fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
|
||||
// (rotr x, (sub 32, y))
|
||||
if (ConstantSDNode *SUBC =
|
||||
dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) {
|
||||
if (SUBC->getAPIntValue() == OpSizeInBits) {
|
||||
dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0)))
|
||||
if (SUBC->getAPIntValue() == OpSizeInBits)
|
||||
return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
|
||||
HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
|
||||
} else if (LHSShiftArg.getOpcode() == ISD::ZERO_EXTEND ||
|
||||
LHSShiftArg.getOpcode() == ISD::ANY_EXTEND) {
|
||||
// fold (or (shl (*ext x), (*ext y)),
|
||||
// (srl (*ext x), (*ext (sub 32, y)))) ->
|
||||
// (*ext (rotl x, y))
|
||||
// fold (or (shl (*ext x), (*ext y)),
|
||||
// (srl (*ext x), (*ext (sub 32, y)))) ->
|
||||
// (*ext (rotr x, (sub 32, y)))
|
||||
SDValue LArgExtOp0 = LHSShiftArg.getOperand(0);
|
||||
EVT LArgVT = LArgExtOp0.getValueType();
|
||||
bool HasROTRWithLArg = TLI.isOperationLegalOrCustom(ISD::ROTR, LArgVT);
|
||||
bool HasROTLWithLArg = TLI.isOperationLegalOrCustom(ISD::ROTL, LArgVT);
|
||||
if (HasROTRWithLArg || HasROTLWithLArg) {
|
||||
if (LArgVT.getSizeInBits() == SUBC->getAPIntValue()) {
|
||||
SDValue V =
|
||||
DAG.getNode(HasROTLWithLArg ? ISD::ROTL : ISD::ROTR, DL, LArgVT,
|
||||
LArgExtOp0, HasROTL ? LHSShiftAmt : RHSShiftAmt);
|
||||
return DAG.getNode(LHSShiftArg.getOpcode(), DL, VT, V).getNode();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (LExtOp0.getOpcode() == ISD::SUB &&
|
||||
RExtOp0 == LExtOp0.getOperand(1)) {
|
||||
// fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
|
||||
@ -3435,32 +3412,10 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
|
||||
// fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
|
||||
// (rotl x, (sub 32, y))
|
||||
if (ConstantSDNode *SUBC =
|
||||
dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) {
|
||||
if (SUBC->getAPIntValue() == OpSizeInBits) {
|
||||
dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0)))
|
||||
if (SUBC->getAPIntValue() == OpSizeInBits)
|
||||
return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
|
||||
HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
|
||||
} else if (RHSShiftArg.getOpcode() == ISD::ZERO_EXTEND ||
|
||||
RHSShiftArg.getOpcode() == ISD::ANY_EXTEND) {
|
||||
// fold (or (shl (*ext x), (*ext (sub 32, y))),
|
||||
// (srl (*ext x), (*ext y))) ->
|
||||
// (*ext (rotl x, y))
|
||||
// fold (or (shl (*ext x), (*ext (sub 32, y))),
|
||||
// (srl (*ext x), (*ext y))) ->
|
||||
// (*ext (rotr x, (sub 32, y)))
|
||||
SDValue RArgExtOp0 = RHSShiftArg.getOperand(0);
|
||||
EVT RArgVT = RArgExtOp0.getValueType();
|
||||
bool HasROTRWithRArg = TLI.isOperationLegalOrCustom(ISD::ROTR, RArgVT);
|
||||
bool HasROTLWithRArg = TLI.isOperationLegalOrCustom(ISD::ROTL, RArgVT);
|
||||
if (HasROTRWithRArg || HasROTLWithRArg) {
|
||||
if (RArgVT.getSizeInBits() == SUBC->getAPIntValue()) {
|
||||
SDValue V =
|
||||
DAG.getNode(HasROTRWithRArg ? ISD::ROTR : ISD::ROTL, DL, RArgVT,
|
||||
RArgExtOp0, HasROTR ? RHSShiftAmt : LHSShiftAmt);
|
||||
return DAG.getNode(RHSShiftArg.getOpcode(), DL, VT, V).getNode();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -733,6 +733,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
|
||||
return getContext().getCOFFSection(Name,
|
||||
Characteristics,
|
||||
Kind,
|
||||
"",
|
||||
Selection);
|
||||
}
|
||||
|
||||
@ -768,7 +769,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
|
||||
Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
|
||||
|
||||
return getContext().getCOFFSection(Name.str(), Characteristics,
|
||||
Kind, COFF::IMAGE_COMDAT_SELECT_ANY);
|
||||
Kind, "", COFF::IMAGE_COMDAT_SELECT_ANY);
|
||||
}
|
||||
|
||||
if (Kind.isText())
|
||||
|
44
external/bsd/llvm/dist/llvm/lib/MC/MCContext.cpp
vendored
44
external/bsd/llvm/dist/llvm/lib/MC/MCContext.cpp
vendored
@ -34,8 +34,7 @@ typedef std::pair<std::string, std::string> SectionGroupPair;
|
||||
|
||||
typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
|
||||
typedef std::map<SectionGroupPair, const MCSectionELF *> ELFUniqueMapTy;
|
||||
typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
|
||||
|
||||
typedef std::map<SectionGroupPair, const MCSectionCOFF *> COFFUniqueMapTy;
|
||||
|
||||
MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri,
|
||||
const MCObjectFileInfo *mofi, const SourceMgr *mgr,
|
||||
@ -280,32 +279,51 @@ const MCSectionELF *MCContext::CreateELFGroupSection() {
|
||||
return Result;
|
||||
}
|
||||
|
||||
const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
|
||||
unsigned Characteristics,
|
||||
SectionKind Kind, int Selection,
|
||||
const MCSectionCOFF *Assoc) {
|
||||
const MCSectionCOFF *
|
||||
MCContext::getCOFFSection(StringRef Section, unsigned Characteristics,
|
||||
SectionKind Kind, StringRef COMDATSymName,
|
||||
int Selection, const MCSectionCOFF *Assoc) {
|
||||
if (COFFUniquingMap == 0)
|
||||
COFFUniquingMap = new COFFUniqueMapTy();
|
||||
COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap;
|
||||
|
||||
// Do the lookup, if we have a hit, return it.
|
||||
StringMapEntry<const MCSectionCOFF*> &Entry = Map.GetOrCreateValue(Section);
|
||||
if (Entry.getValue()) return Entry.getValue();
|
||||
|
||||
MCSectionCOFF *Result = new (*this) MCSectionCOFF(Entry.getKey(),
|
||||
Characteristics,
|
||||
Selection, Assoc, Kind);
|
||||
SectionGroupPair P(Section, COMDATSymName);
|
||||
std::pair<COFFUniqueMapTy::iterator, bool> Entry =
|
||||
Map.insert(std::make_pair(P, (MCSectionCOFF *)0));
|
||||
COFFUniqueMapTy::iterator Iter = Entry.first;
|
||||
if (!Entry.second)
|
||||
return Iter->second;
|
||||
|
||||
Entry.setValue(Result);
|
||||
const MCSymbol *COMDATSymbol = NULL;
|
||||
if (!COMDATSymName.empty())
|
||||
COMDATSymbol = GetOrCreateSymbol(COMDATSymName);
|
||||
|
||||
MCSectionCOFF *Result =
|
||||
new (*this) MCSectionCOFF(Iter->first.first, Characteristics,
|
||||
COMDATSymbol, Selection, Assoc, Kind);
|
||||
|
||||
Iter->second = Result;
|
||||
return Result;
|
||||
}
|
||||
|
||||
const MCSectionCOFF *
|
||||
MCContext::getCOFFSection(StringRef Section, unsigned Characteristics,
|
||||
SectionKind Kind) {
|
||||
return getCOFFSection(Section, Characteristics, Kind, "", 0);
|
||||
}
|
||||
|
||||
const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section) {
|
||||
if (COFFUniquingMap == 0)
|
||||
COFFUniquingMap = new COFFUniqueMapTy();
|
||||
COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap;
|
||||
|
||||
return Map.lookup(Section);
|
||||
SectionGroupPair P(Section, "");
|
||||
COFFUniqueMapTy::iterator Iter = Map.find(P);
|
||||
if (Iter == Map.end())
|
||||
return 0;
|
||||
return Iter->second;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/MC/MCAssembler.h"
|
||||
#include "llvm/MC/MCAsmBackend.h"
|
||||
#include "llvm/MC/MCCodeEmitter.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCELF.h"
|
||||
@ -96,6 +97,9 @@ void MCELFStreamer::EmitDebugLabel(MCSymbol *Symbol) {
|
||||
}
|
||||
|
||||
void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
|
||||
// Let the target do whatever target specific stuff it needs to do.
|
||||
getAssembler().getBackend().handleAssemblerFlag(Flag);
|
||||
// Do any generic stuff we need to do.
|
||||
switch (Flag) {
|
||||
case MCAF_SyntaxUnified: return; // no-op here.
|
||||
case MCAF_Code16: return; // Change parsing mode; no-op here.
|
||||
|
@ -35,6 +35,10 @@ class COFFAsmParser : public MCAsmParserExtension {
|
||||
unsigned Characteristics,
|
||||
SectionKind Kind);
|
||||
|
||||
bool ParseSectionSwitch(StringRef Section, unsigned Characteristics,
|
||||
SectionKind Kind, StringRef COMDATSymName,
|
||||
COFF::COMDATType Type, const MCSectionCOFF *Assoc);
|
||||
|
||||
bool ParseSectionName(StringRef &SectionName);
|
||||
bool ParseSectionFlags(StringRef FlagsString, unsigned* Flags);
|
||||
|
||||
@ -111,6 +115,8 @@ class COFFAsmParser : public MCAsmParserExtension {
|
||||
bool ParseDirectiveType(StringRef, SMLoc);
|
||||
bool ParseDirectiveEndef(StringRef, SMLoc);
|
||||
bool ParseDirectiveSecRel32(StringRef, SMLoc);
|
||||
bool parseCOMDATTypeAndAssoc(COFF::COMDATType &Type,
|
||||
const MCSectionCOFF *&Assoc);
|
||||
bool ParseDirectiveLinkOnce(StringRef, SMLoc);
|
||||
|
||||
// Win64 EH directives.
|
||||
@ -284,12 +290,22 @@ bool COFFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
|
||||
bool COFFAsmParser::ParseSectionSwitch(StringRef Section,
|
||||
unsigned Characteristics,
|
||||
SectionKind Kind) {
|
||||
return ParseSectionSwitch(Section, Characteristics, Kind, "",
|
||||
COFF::IMAGE_COMDAT_SELECT_ANY, 0);
|
||||
}
|
||||
|
||||
bool COFFAsmParser::ParseSectionSwitch(StringRef Section,
|
||||
unsigned Characteristics,
|
||||
SectionKind Kind,
|
||||
StringRef COMDATSymName,
|
||||
COFF::COMDATType Type,
|
||||
const MCSectionCOFF *Assoc) {
|
||||
if (getLexer().isNot(AsmToken::EndOfStatement))
|
||||
return TokError("unexpected token in section switching directive");
|
||||
Lex();
|
||||
|
||||
getStreamer().SwitchSection(getContext().getCOFFSection(
|
||||
Section, Characteristics, Kind));
|
||||
Section, Characteristics, Kind, COMDATSymName, Type, Assoc));
|
||||
|
||||
return false;
|
||||
}
|
||||
@ -303,7 +319,7 @@ bool COFFAsmParser::ParseSectionName(StringRef &SectionName) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// .section name [, "flags"]
|
||||
// .section name [, "flags"] [, identifier [ identifier ], identifier]
|
||||
//
|
||||
// Supported flags:
|
||||
// a: Ignored.
|
||||
@ -340,11 +356,30 @@ bool COFFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
|
||||
return true;
|
||||
}
|
||||
|
||||
COFF::COMDATType Type = COFF::IMAGE_COMDAT_SELECT_ANY;
|
||||
const MCSectionCOFF *Assoc = 0;
|
||||
StringRef COMDATSymName;
|
||||
if (getLexer().is(AsmToken::Comma)) {
|
||||
Lex();
|
||||
|
||||
Flags |= COFF::IMAGE_SCN_LNK_COMDAT;
|
||||
|
||||
if (parseCOMDATTypeAndAssoc(Type, Assoc))
|
||||
return true;
|
||||
|
||||
if (getLexer().isNot(AsmToken::Comma))
|
||||
return TokError("expected comma in directive");
|
||||
Lex();
|
||||
|
||||
if (getParser().parseIdentifier(COMDATSymName))
|
||||
return TokError("expected identifier in directive");
|
||||
}
|
||||
|
||||
if (getLexer().isNot(AsmToken::EndOfStatement))
|
||||
return TokError("unexpected token in directive");
|
||||
|
||||
SectionKind Kind = computeSectionKind(Flags);
|
||||
ParseSectionSwitch(SectionName, Flags, Kind);
|
||||
ParseSectionSwitch(SectionName, Flags, Kind, COMDATSymName, Type, Assoc);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -409,37 +444,29 @@ bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// ParseDirectiveLinkOnce
|
||||
/// ::= .linkonce [ identifier [ identifier ] ]
|
||||
bool COFFAsmParser::ParseDirectiveLinkOnce(StringRef, SMLoc Loc) {
|
||||
COFF::COMDATType Type = COFF::IMAGE_COMDAT_SELECT_ANY;
|
||||
/// ::= [ identifier [ identifier ] ]
|
||||
bool COFFAsmParser::parseCOMDATTypeAndAssoc(COFF::COMDATType &Type,
|
||||
const MCSectionCOFF *&Assoc) {
|
||||
StringRef TypeId = getTok().getIdentifier();
|
||||
|
||||
if (getLexer().is(AsmToken::Identifier)) {
|
||||
StringRef TypeId = getTok().getIdentifier();
|
||||
Type = StringSwitch<COFF::COMDATType>(TypeId)
|
||||
.Case("one_only", COFF::IMAGE_COMDAT_SELECT_NODUPLICATES)
|
||||
.Case("discard", COFF::IMAGE_COMDAT_SELECT_ANY)
|
||||
.Case("same_size", COFF::IMAGE_COMDAT_SELECT_SAME_SIZE)
|
||||
.Case("same_contents", COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH)
|
||||
.Case("associative", COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
|
||||
.Case("largest", COFF::IMAGE_COMDAT_SELECT_LARGEST)
|
||||
.Case("newest", COFF::IMAGE_COMDAT_SELECT_NEWEST)
|
||||
.Default((COFF::COMDATType)0);
|
||||
|
||||
Type = StringSwitch<COFF::COMDATType>(TypeId)
|
||||
.Case("one_only", COFF::IMAGE_COMDAT_SELECT_NODUPLICATES)
|
||||
.Case("discard", COFF::IMAGE_COMDAT_SELECT_ANY)
|
||||
.Case("same_size", COFF::IMAGE_COMDAT_SELECT_SAME_SIZE)
|
||||
.Case("same_contents", COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH)
|
||||
.Case("associative", COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
|
||||
.Case("largest", COFF::IMAGE_COMDAT_SELECT_LARGEST)
|
||||
.Case("newest", COFF::IMAGE_COMDAT_SELECT_NEWEST)
|
||||
.Default((COFF::COMDATType)0);
|
||||
if (Type == 0)
|
||||
return TokError(Twine("unrecognized COMDAT type '" + TypeId + "'"));
|
||||
|
||||
if (Type == 0)
|
||||
return TokError(Twine("unrecognized COMDAT type '" + TypeId + "'"));
|
||||
Lex();
|
||||
|
||||
Lex();
|
||||
}
|
||||
|
||||
const MCSectionCOFF *Current = static_cast<const MCSectionCOFF*>(
|
||||
getStreamer().getCurrentSection().first);
|
||||
|
||||
const MCSectionCOFF *Assoc = 0;
|
||||
if (Type == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
|
||||
StringRef AssocName;
|
||||
SMLoc Loc = getTok().getLoc();
|
||||
StringRef AssocName;
|
||||
if (ParseSectionName(AssocName))
|
||||
return TokError("expected associated section name");
|
||||
|
||||
@ -447,14 +474,33 @@ bool COFFAsmParser::ParseDirectiveLinkOnce(StringRef, SMLoc Loc) {
|
||||
getContext().getCOFFSection(AssocName));
|
||||
if (!Assoc)
|
||||
return Error(Loc, "cannot associate unknown section '" + AssocName + "'");
|
||||
if (Assoc == Current)
|
||||
return Error(Loc, "cannot associate a section with itself");
|
||||
if (!(Assoc->getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT))
|
||||
return Error(Loc, "associated section must be a COMDAT section");
|
||||
if (Assoc->getSelection() == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
|
||||
return Error(Loc, "associated section cannot be itself associative");
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// ParseDirectiveLinkOnce
|
||||
/// ::= .linkonce [ identifier [ identifier ] ]
|
||||
bool COFFAsmParser::ParseDirectiveLinkOnce(StringRef, SMLoc Loc) {
|
||||
COFF::COMDATType Type = COFF::IMAGE_COMDAT_SELECT_ANY;
|
||||
const MCSectionCOFF *Assoc = 0;
|
||||
if (getLexer().is(AsmToken::Identifier))
|
||||
if (parseCOMDATTypeAndAssoc(Type, Assoc))
|
||||
return true;
|
||||
|
||||
const MCSectionCOFF *Current = static_cast<const MCSectionCOFF*>(
|
||||
getStreamer().getCurrentSection().first);
|
||||
|
||||
|
||||
if (Type == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
|
||||
if (Assoc == Current)
|
||||
return Error(Loc, "cannot associate a section with itself");
|
||||
}
|
||||
|
||||
if (Current->getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT)
|
||||
return Error(Loc, Twine("section '") + Current->getSectionName() +
|
||||
"' is already linkonce");
|
||||
|
@ -151,7 +151,8 @@ void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
|
||||
int Selection = COFF::IMAGE_COMDAT_SELECT_LARGEST;
|
||||
|
||||
const MCSection *Section = MCStreamer::getContext().getCOFFSection(
|
||||
SectionName, Characteristics, SectionKind::getBSS(), Selection);
|
||||
SectionName, Characteristics, SectionKind::getBSS(), Symbol->getName(),
|
||||
Selection);
|
||||
|
||||
MCSectionData &SectionData = getAssembler().getOrCreateSectionData(*Section);
|
||||
|
||||
@ -321,6 +322,7 @@ void WinCOFFStreamer::EmitWin64EHHandlerData() {
|
||||
}
|
||||
|
||||
void WinCOFFStreamer::FinishImpl() {
|
||||
EmitFrames(NULL, true);
|
||||
EmitW64Tables();
|
||||
MCObjectStreamer::FinishImpl();
|
||||
}
|
||||
|
@ -4231,6 +4231,23 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
|
||||
return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT,
|
||||
V1.getOperand(Lane));
|
||||
}
|
||||
|
||||
// Test if V1 is a EXTRACT_SUBVECTOR.
|
||||
if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
|
||||
int ExtLane = cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue();
|
||||
return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1.getOperand(0),
|
||||
DAG.getConstant(Lane + ExtLane, MVT::i64));
|
||||
}
|
||||
// Test if V1 is a CONCAT_VECTORS.
|
||||
if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
|
||||
V1.getOperand(1).getOpcode() == ISD::UNDEF) {
|
||||
SDValue Op0 = V1.getOperand(0);
|
||||
assert((unsigned)Lane < Op0.getValueType().getVectorNumElements() &&
|
||||
"Invalid vector lane access");
|
||||
return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, Op0,
|
||||
DAG.getConstant(Lane, MVT::i64));
|
||||
}
|
||||
|
||||
return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1,
|
||||
DAG.getConstant(Lane, MVT::i64));
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -238,6 +238,10 @@ static DecodeStatus DecodeVLDSTLanePostInstruction(MCInst &Inst, unsigned Insn,
|
||||
uint64_t Address,
|
||||
const void *Decoder);
|
||||
|
||||
static DecodeStatus DecodeSHLLInstruction(MCInst &Inst, unsigned Insn,
|
||||
uint64_t Address,
|
||||
const void *Decoder);
|
||||
|
||||
static bool Check(DecodeStatus &Out, DecodeStatus In);
|
||||
|
||||
#include "AArch64GenDisassemblerTables.inc"
|
||||
@ -1342,13 +1346,13 @@ static DecodeStatus DecodeVLDSTLanePostInstruction(MCInst &Inst, unsigned Insn,
|
||||
case AArch64::LD4LN_WB_D_fixed: case AArch64::LD4LN_WB_D_register: {
|
||||
switch (Opc) {
|
||||
case AArch64::LD4LN_WB_B_fixed: case AArch64::LD4LN_WB_B_register:
|
||||
TransferBytes = 3; break;
|
||||
TransferBytes = 4; break;
|
||||
case AArch64::LD4LN_WB_H_fixed: case AArch64::LD4LN_WB_H_register:
|
||||
TransferBytes = 6; break;
|
||||
TransferBytes = 8; break;
|
||||
case AArch64::LD4LN_WB_S_fixed: case AArch64::LD4LN_WB_S_register:
|
||||
TransferBytes = 12; break;
|
||||
TransferBytes = 16; break;
|
||||
case AArch64::LD4LN_WB_D_fixed: case AArch64::LD4LN_WB_D_register:
|
||||
TransferBytes = 24; break;
|
||||
TransferBytes = 32; break;
|
||||
}
|
||||
IsLoad = true;
|
||||
NumVecs = 4;
|
||||
@ -1534,3 +1538,35 @@ static DecodeStatus DecodeVLDSTLanePostInstruction(MCInst &Inst, unsigned Insn,
|
||||
|
||||
return MCDisassembler::Success;
|
||||
}
|
||||
|
||||
static DecodeStatus DecodeSHLLInstruction(MCInst &Inst, unsigned Insn,
|
||||
uint64_t Address,
|
||||
const void *Decoder) {
|
||||
unsigned Rd = fieldFromInstruction(Insn, 0, 5);
|
||||
unsigned Rn = fieldFromInstruction(Insn, 5, 5);
|
||||
unsigned size = fieldFromInstruction(Insn, 22, 2);
|
||||
unsigned Q = fieldFromInstruction(Insn, 30, 1);
|
||||
|
||||
DecodeFPR128RegisterClass(Inst, Rd, Address, Decoder);
|
||||
|
||||
if(Q)
|
||||
DecodeFPR128RegisterClass(Inst, Rn, Address, Decoder);
|
||||
else
|
||||
DecodeFPR64RegisterClass(Inst, Rn, Address, Decoder);
|
||||
|
||||
switch (size) {
|
||||
case 0:
|
||||
Inst.addOperand(MCOperand::CreateImm(8));
|
||||
break;
|
||||
case 1:
|
||||
Inst.addOperand(MCOperand::CreateImm(16));
|
||||
break;
|
||||
case 2:
|
||||
Inst.addOperand(MCOperand::CreateImm(32));
|
||||
break;
|
||||
default :
|
||||
return MCDisassembler::Fail;
|
||||
}
|
||||
return MCDisassembler::Success;
|
||||
}
|
||||
|
||||
|
@ -1913,28 +1913,40 @@ bool llvm::tryFoldSPUpdateIntoPushPop(MachineFunction &MF,
|
||||
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
|
||||
const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
|
||||
|
||||
// Now try to find enough space in the reglist to allocate NumBytes.
|
||||
for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded;
|
||||
--CurReg, --RegsNeeded) {
|
||||
--CurReg) {
|
||||
if (!IsPop) {
|
||||
// Pushing any register is completely harmless, mark the
|
||||
// register involved as undef since we don't care about it in
|
||||
// the slightest.
|
||||
RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
|
||||
false, false, true));
|
||||
--RegsNeeded;
|
||||
continue;
|
||||
}
|
||||
|
||||
// However, we can only pop an extra register if it's not live. Otherwise we
|
||||
// might clobber a return value register. We assume that once we find a live
|
||||
// return register all lower ones will be too so there's no use proceeding.
|
||||
if (MBB->computeRegisterLiveness(TRI, CurReg, MI) !=
|
||||
MachineBasicBlock::LQR_Dead)
|
||||
return false;
|
||||
// However, we can only pop an extra register if it's not live. For
|
||||
// registers live within the function we might clobber a return value
|
||||
// register; the other way a register can be live here is if it's
|
||||
// callee-saved.
|
||||
if (isCalleeSavedRegister(CurReg, CSRegs) ||
|
||||
MBB->computeRegisterLiveness(TRI, CurReg, MI) !=
|
||||
MachineBasicBlock::LQR_Dead) {
|
||||
// VFP pops don't allow holes in the register list, so any skip is fatal
|
||||
// for our transformation. GPR pops do, so we should just keep looking.
|
||||
if (IsVFPPushPop)
|
||||
return false;
|
||||
else
|
||||
continue;
|
||||
}
|
||||
|
||||
// Mark the unimportant registers as <def,dead> in the POP.
|
||||
RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, true));
|
||||
RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
|
||||
true));
|
||||
--RegsNeeded;
|
||||
}
|
||||
|
||||
if (RegsNeeded > 0)
|
||||
|
@ -72,6 +72,14 @@ static inline bool isARMArea3Register(unsigned Reg, bool isIOS) {
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool isCalleeSavedRegister(unsigned Reg,
|
||||
const MCPhysReg *CSRegs) {
|
||||
for (unsigned i = 0; CSRegs[i]; ++i)
|
||||
if (Reg == CSRegs[i])
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
|
||||
protected:
|
||||
const ARMSubtarget &STI;
|
||||
|
@ -82,13 +82,6 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
|
||||
return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
|
||||
}
|
||||
|
||||
static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
|
||||
for (unsigned i = 0; CSRegs[i]; ++i)
|
||||
if (Reg == CSRegs[i])
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool isCSRestore(MachineInstr *MI,
|
||||
const ARMBaseInstrInfo &TII,
|
||||
const uint16_t *CSRegs) {
|
||||
|
@ -215,13 +215,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
|
||||
AFI->setShouldRestoreSPFromFP(true);
|
||||
}
|
||||
|
||||
static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
|
||||
for (unsigned i = 0; CSRegs[i]; ++i)
|
||||
if (Reg == CSRegs[i])
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) {
|
||||
if (MI->getOpcode() == ARM::tLDRspi &&
|
||||
MI->getOperand(1).isFI() &&
|
||||
|
@ -437,8 +437,10 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
|
||||
if (!I->Br || I->HasLongBranch)
|
||||
continue;
|
||||
|
||||
int ShVal = TM.getSubtarget<MipsSubtarget>().inMicroMipsMode() ? 2 : 4;
|
||||
|
||||
// Check if offset fits into 16-bit immediate field of branches.
|
||||
if (!ForceLongBranch && isInt<16>(computeOffset(I->Br) / 4))
|
||||
if (!ForceLongBranch && isInt<16>(computeOffset(I->Br) / ShVal))
|
||||
continue;
|
||||
|
||||
I->HasLongBranch = true;
|
||||
|
@ -62,6 +62,24 @@ MipsSERegisterInfo::intRegClass(unsigned Size) const {
|
||||
return &Mips::GPR64RegClass;
|
||||
}
|
||||
|
||||
/// Determine whether a given opcode is an MSA load/store (supporting 10-bit
|
||||
/// offsets) or a non-MSA load/store (supporting 16-bit offsets).
|
||||
static inline bool isMSALoadOrStore(const unsigned Opcode) {
|
||||
switch (Opcode) {
|
||||
case Mips::LD_B:
|
||||
case Mips::LD_H:
|
||||
case Mips::LD_W:
|
||||
case Mips::LD_D:
|
||||
case Mips::ST_B:
|
||||
case Mips::ST_H:
|
||||
case Mips::ST_W:
|
||||
case Mips::ST_D:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
|
||||
unsigned OpNo, int FrameIndex,
|
||||
uint64_t StackSize,
|
||||
@ -111,23 +129,49 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
|
||||
|
||||
DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
|
||||
|
||||
// If MI is not a debug value, make sure Offset fits in the 16-bit immediate
|
||||
// field.
|
||||
if (!MI.isDebugValue() && !isInt<16>(Offset)) {
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = II->getDebugLoc();
|
||||
unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
|
||||
unsigned NewImm;
|
||||
const MipsSEInstrInfo &TII =
|
||||
*static_cast<const MipsSEInstrInfo*>(
|
||||
MBB.getParent()->getTarget().getInstrInfo());
|
||||
unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL, &NewImm);
|
||||
BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(FrameReg)
|
||||
.addReg(Reg, RegState::Kill);
|
||||
if (!MI.isDebugValue()) {
|
||||
// Make sure Offset fits within the field available.
|
||||
// For MSA instructions, this is a 10-bit signed immediate, otherwise it is
|
||||
// a 16-bit signed immediate.
|
||||
unsigned OffsetBitSize = isMSALoadOrStore(MI.getOpcode()) ? 10 : 16;
|
||||
|
||||
FrameReg = Reg;
|
||||
Offset = SignExtend64<16>(NewImm);
|
||||
IsKill = true;
|
||||
if (OffsetBitSize == 10 && !isInt<10>(Offset) && isInt<16>(Offset)) {
|
||||
// If we have an offset that needs to fit into a signed 10-bit immediate
|
||||
// and doesn't, but does fit into 16-bits then use an ADDiu
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = II->getDebugLoc();
|
||||
unsigned ADDiu = Subtarget.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
|
||||
const TargetRegisterClass *RC =
|
||||
Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
|
||||
MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
|
||||
unsigned Reg = RegInfo.createVirtualRegister(RC);
|
||||
const MipsSEInstrInfo &TII =
|
||||
*static_cast<const MipsSEInstrInfo *>(
|
||||
MBB.getParent()->getTarget().getInstrInfo());
|
||||
BuildMI(MBB, II, DL, TII.get(ADDiu), Reg).addReg(FrameReg).addImm(Offset);
|
||||
|
||||
FrameReg = Reg;
|
||||
Offset = 0;
|
||||
IsKill = true;
|
||||
} else if (!isInt<16>(Offset)) {
|
||||
// Otherwise split the offset into 16-bit pieces and add it in multiple
|
||||
// instructions.
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = II->getDebugLoc();
|
||||
unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
|
||||
unsigned NewImm = 0;
|
||||
const MipsSEInstrInfo &TII =
|
||||
*static_cast<const MipsSEInstrInfo *>(
|
||||
MBB.getParent()->getTarget().getInstrInfo());
|
||||
unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL,
|
||||
OffsetBitSize == 16 ? &NewImm : NULL);
|
||||
BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(FrameReg)
|
||||
.addReg(Reg, RegState::Kill);
|
||||
|
||||
FrameReg = Reg;
|
||||
Offset = SignExtend64<16>(NewImm);
|
||||
IsKill = true;
|
||||
}
|
||||
}
|
||||
|
||||
MI.getOperand(OpNo).ChangeToRegister(FrameReg, false, false, IsKill);
|
||||
|
@ -58,6 +58,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
|
||||
setOperationAction(ISD::FABS, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FRINT, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FROUND, MVT::f32, Legal);
|
||||
|
||||
// The hardware supports ROTR, but not ROTL
|
||||
setOperationAction(ISD::ROTL, MVT::i32, Expand);
|
||||
@ -178,6 +179,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
|
||||
|
||||
for (unsigned int x = 0; x < NumFloatTypes; ++x) {
|
||||
MVT::SimpleValueType VT = FloatTypes[x];
|
||||
setOperationAction(ISD::FABS, VT, Expand);
|
||||
setOperationAction(ISD::FADD, VT, Expand);
|
||||
setOperationAction(ISD::FDIV, VT, Expand);
|
||||
setOperationAction(ISD::FFLOOR, VT, Expand);
|
||||
|
@ -83,3 +83,6 @@ def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
|
||||
def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR",
|
||||
SDTypeProfile<0, 2, []>,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
|
||||
def AMDGPUround : SDNode<"ISD::FROUND",
|
||||
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>;
|
||||
|
@ -1110,6 +1110,10 @@ class COS_Common <bits<11> inst> : R600_1OP <
|
||||
let Itinerary = TransALU;
|
||||
}
|
||||
|
||||
def CLAMP_R600 : CLAMP <R600_Reg32>;
|
||||
def FABS_R600 : FABS<R600_Reg32>;
|
||||
def FNEG_R600 : FNEG<R600_Reg32>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Helper patterns for complex intrinsics
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -1132,6 +1136,13 @@ class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ie
|
||||
(exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x))
|
||||
>;
|
||||
|
||||
// FROUND pattern
|
||||
class FROUNDPat<Instruction CNDGE> : Pat <
|
||||
(AMDGPUround f32:$x),
|
||||
(CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x))
|
||||
>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// R600 / R700 Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -1173,6 +1184,7 @@ let Predicates = [isR600] in {
|
||||
def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
|
||||
|
||||
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
|
||||
def : FROUNDPat <CNDGE_r600>;
|
||||
|
||||
def R600_ExportSwz : ExportSwzInst {
|
||||
let Word1{20-17} = 0; // BURST_COUNT
|
||||
@ -1726,6 +1738,8 @@ def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET",
|
||||
// SHA-256 Patterns
|
||||
def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;
|
||||
|
||||
def : FROUNDPat <CNDGE_eg>;
|
||||
|
||||
def EG_ExportSwz : ExportSwzInst {
|
||||
let Word1{19-16} = 0; // BURST_COUNT
|
||||
let Word1{20} = 0; // VALID_PIXEL_MODE
|
||||
@ -2090,10 +2104,6 @@ def TXD_SHADOW: InstR600 <
|
||||
} // End isPseudo = 1
|
||||
} // End usesCustomInserter = 1
|
||||
|
||||
def CLAMP_R600 : CLAMP <R600_Reg32>;
|
||||
def FABS_R600 : FABS<R600_Reg32>;
|
||||
def FNEG_R600 : FNEG<R600_Reg32>;
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Return instruction
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
@ -489,14 +489,17 @@ def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "TBUFFER_STORE_F
|
||||
|
||||
let mayLoad = 1 in {
|
||||
|
||||
defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SReg_32>;
|
||||
// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
|
||||
// SMRD instructions, because the SGPR_32 register class does not include M0
|
||||
// and writing to M0 from an SMRD instruction will hang the GPU.
|
||||
defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SGPR_32>;
|
||||
defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>;
|
||||
defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>;
|
||||
defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>;
|
||||
defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>;
|
||||
|
||||
defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
|
||||
0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32
|
||||
0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SGPR_32
|
||||
>;
|
||||
|
||||
defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
|
||||
|
@ -105,5 +105,22 @@ namespace llvm {
|
||||
}
|
||||
llvm_unreachable("Invalid cond code");
|
||||
}
|
||||
|
||||
inline static unsigned HI22(int64_t imm) {
|
||||
return (unsigned)((imm >> 10) & ((1 << 22)-1));
|
||||
}
|
||||
|
||||
inline static unsigned LO10(int64_t imm) {
|
||||
return (unsigned)(imm & 0x3FF);
|
||||
}
|
||||
|
||||
inline static unsigned HIX22(int64_t imm) {
|
||||
return HI22(~imm);
|
||||
}
|
||||
|
||||
inline static unsigned LOX10(int64_t imm) {
|
||||
return ~LO10(~imm);
|
||||
}
|
||||
|
||||
} // end namespace llvm
|
||||
#endif
|
||||
|
@ -88,7 +88,7 @@ void SparcAsmPrinter::EmitFunctionBodyStart() {
|
||||
const unsigned globalRegs[] = { SP::G2, SP::G3, SP::G6, SP::G7, 0 };
|
||||
for (unsigned i = 0; globalRegs[i] != 0; ++i) {
|
||||
unsigned reg = globalRegs[i];
|
||||
if (!MRI.isPhysRegUsed(reg))
|
||||
if (MRI.use_empty(reg))
|
||||
continue;
|
||||
EmitGlobalRegisterDecl(reg);
|
||||
}
|
||||
|
@ -33,6 +33,51 @@ DisableLeafProc("disable-sparc-leaf-proc",
|
||||
cl::Hidden);
|
||||
|
||||
|
||||
void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
int NumBytes,
|
||||
unsigned ADDrr,
|
||||
unsigned ADDri) const {
|
||||
|
||||
DebugLoc dl = (MBBI != MBB.end()) ? MBBI->getDebugLoc() : DebugLoc();
|
||||
const SparcInstrInfo &TII =
|
||||
*static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
|
||||
|
||||
if (NumBytes >= -4096 && NumBytes < 4096) {
|
||||
BuildMI(MBB, MBBI, dl, TII.get(ADDri), SP::O6)
|
||||
.addReg(SP::O6).addImm(NumBytes);
|
||||
return;
|
||||
}
|
||||
|
||||
// Emit this the hard way. This clobbers G1 which we always know is
|
||||
// available here.
|
||||
if (NumBytes >= 0) {
|
||||
// Emit nonnegative numbers with sethi + or.
|
||||
// sethi %hi(NumBytes), %g1
|
||||
// or %g1, %lo(NumBytes), %g1
|
||||
// add %sp, %g1, %sp
|
||||
BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1)
|
||||
.addImm(HI22(NumBytes));
|
||||
BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
|
||||
.addReg(SP::G1).addImm(LO10(NumBytes));
|
||||
BuildMI(MBB, MBBI, dl, TII.get(ADDrr), SP::O6)
|
||||
.addReg(SP::O6).addReg(SP::G1);
|
||||
return ;
|
||||
}
|
||||
|
||||
// Emit negative numbers with sethi + xor.
|
||||
// sethi %hix(NumBytes), %g1
|
||||
// xor %g1, %lox(NumBytes), %g1
|
||||
// add %sp, %g1, %sp
|
||||
BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1)
|
||||
.addImm(HIX22(NumBytes));
|
||||
BuildMI(MBB, MBBI, dl, TII.get(SP::XORri), SP::G1)
|
||||
.addReg(SP::G1).addImm(LOX10(NumBytes));
|
||||
BuildMI(MBB, MBBI, dl, TII.get(ADDrr), SP::O6)
|
||||
.addReg(SP::O6).addReg(SP::G1);
|
||||
}
|
||||
|
||||
void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
|
||||
SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
|
||||
|
||||
@ -55,21 +100,8 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
|
||||
SAVErr = SP::ADDrr;
|
||||
}
|
||||
NumBytes = - SubTarget.getAdjustedFrameSize(NumBytes);
|
||||
emitSPAdjustment(MF, MBB, MBBI, NumBytes, SAVErr, SAVEri);
|
||||
|
||||
if (NumBytes >= -4096) {
|
||||
BuildMI(MBB, MBBI, dl, TII.get(SAVEri), SP::O6)
|
||||
.addReg(SP::O6).addImm(NumBytes);
|
||||
} else {
|
||||
// Emit this the hard way. This clobbers G1 which we always know is
|
||||
// available here.
|
||||
unsigned OffHi = (unsigned)NumBytes >> 10U;
|
||||
BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
|
||||
// Emit G1 = G1 + I6
|
||||
BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
|
||||
.addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
|
||||
BuildMI(MBB, MBBI, dl, TII.get(SAVErr), SP::O6)
|
||||
.addReg(SP::O6).addReg(SP::G1);
|
||||
}
|
||||
MachineModuleInfo &MMI = MF.getMMI();
|
||||
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
|
||||
MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
|
||||
@ -96,15 +128,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) const {
|
||||
if (!hasReservedCallFrame(MF)) {
|
||||
MachineInstr &MI = *I;
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
int Size = MI.getOperand(0).getImm();
|
||||
if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
|
||||
Size = -Size;
|
||||
const SparcInstrInfo &TII =
|
||||
*static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
|
||||
|
||||
if (Size)
|
||||
BuildMI(MBB, I, DL, TII.get(SP::ADDri), SP::O6).addReg(SP::O6)
|
||||
.addImm(Size);
|
||||
emitSPAdjustment(MF, MBB, I, Size, SP::ADDrr, SP::ADDri);
|
||||
}
|
||||
MBB.erase(I);
|
||||
}
|
||||
@ -131,21 +160,7 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
return;
|
||||
|
||||
NumBytes = SubTarget.getAdjustedFrameSize(NumBytes);
|
||||
|
||||
if (NumBytes < 4096) {
|
||||
BuildMI(MBB, MBBI, dl, TII.get(SP::ADDri), SP::O6)
|
||||
.addReg(SP::O6).addImm(NumBytes);
|
||||
} else {
|
||||
// Emit this the hard way. This clobbers G1 which we always know is
|
||||
// available here.
|
||||
unsigned OffHi = (unsigned)NumBytes >> 10U;
|
||||
BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
|
||||
// Emit G1 = G1 + I6
|
||||
BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
|
||||
.addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
|
||||
BuildMI(MBB, MBBI, dl, TII.get(SP::ADDrr), SP::O6)
|
||||
.addReg(SP::O6).addReg(SP::G1);
|
||||
}
|
||||
emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri);
|
||||
}
|
||||
|
||||
bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
|
||||
|
@ -49,6 +49,14 @@ private:
|
||||
|
||||
// Returns true if MF is a leaf procedure.
|
||||
bool isLeafProc(MachineFunction &MF) const;
|
||||
|
||||
|
||||
// Emits code for adjusting SP in function prologue/epilogue.
|
||||
void emitSPAdjustment(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
int NumBytes, unsigned ADDrr, unsigned ADDri) const;
|
||||
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
@ -1411,6 +1411,7 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
|
||||
setOperationAction(ISD::BSWAP, MVT::i64, Expand);
|
||||
setOperationAction(ISD::ROTL , MVT::i64, Expand);
|
||||
setOperationAction(ISD::ROTR , MVT::i64, Expand);
|
||||
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
|
||||
}
|
||||
|
||||
// FIXME: There are instructions available for ATOMIC_FENCE
|
||||
@ -2289,20 +2290,23 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
|
||||
std::min(PtrVT.getSizeInBits(), VT.getSizeInBits())/8);
|
||||
}
|
||||
|
||||
static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
|
||||
static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
|
||||
bool is64Bit) {
|
||||
SDValue Chain = Op.getOperand(0); // Legalize the chain.
|
||||
SDValue Size = Op.getOperand(1); // Legalize the size.
|
||||
EVT VT = Size->getValueType(0);
|
||||
SDLoc dl(Op);
|
||||
|
||||
unsigned SPReg = SP::O6;
|
||||
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32);
|
||||
SDValue NewSP = DAG.getNode(ISD::SUB, dl, MVT::i32, SP, Size); // Value
|
||||
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
|
||||
SDValue NewSP = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
|
||||
Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP); // Output chain
|
||||
|
||||
// The resultant pointer is actually 16 words from the bottom of the stack,
|
||||
// to provide a register spill area.
|
||||
SDValue NewVal = DAG.getNode(ISD::ADD, dl, MVT::i32, NewSP,
|
||||
DAG.getConstant(96, MVT::i32));
|
||||
unsigned regSpillArea = (is64Bit) ? 128 : 96;
|
||||
SDValue NewVal = DAG.getNode(ISD::ADD, dl, VT, NewSP,
|
||||
DAG.getConstant(regSpillArea, VT));
|
||||
SDValue Ops[2] = { NewVal, Chain };
|
||||
return DAG.getMergeValues(Ops, 2, dl);
|
||||
}
|
||||
@ -2626,7 +2630,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
hasHardQuad);
|
||||
case ISD::VASTART: return LowerVASTART(Op, DAG, *this);
|
||||
case ISD::VAARG: return LowerVAARG(Op, DAG);
|
||||
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
|
||||
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG,
|
||||
is64Bit);
|
||||
|
||||
case ISD::LOAD: return LowerF128Load(Op, DAG);
|
||||
case ISD::STORE: return LowerF128Store(Op, DAG);
|
||||
|
@ -171,6 +171,12 @@ def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (CMPri $a, (as_i32imm $b))>;
|
||||
|
||||
def : Pat<(ctpop i64:$src), (POPCrr $src)>;
|
||||
|
||||
// "LEA" form of add
|
||||
def LEAX_ADDri : F3_2<2, 0b000000,
|
||||
(outs I64Regs:$dst), (ins MEMri:$addr),
|
||||
"add ${addr:arith}, $dst",
|
||||
[(set iPTR:$dst, ADDRri:$addr)]>;
|
||||
|
||||
} // Predicates = [Is64Bit]
|
||||
|
||||
|
||||
|
@ -502,10 +502,11 @@ defm SRA : F3_12<"sra", 0b100111, sra>;
|
||||
defm ADD : F3_12<"add", 0b000000, add>;
|
||||
|
||||
// "LEA" forms of add (patterns to make tblgen happy)
|
||||
def LEA_ADDri : F3_2<2, 0b000000,
|
||||
(outs IntRegs:$dst), (ins MEMri:$addr),
|
||||
"add ${addr:arith}, $dst",
|
||||
[(set iPTR:$dst, ADDRri:$addr)]>;
|
||||
let Predicates = [Is32Bit] in
|
||||
def LEA_ADDri : F3_2<2, 0b000000,
|
||||
(outs IntRegs:$dst), (ins MEMri:$addr),
|
||||
"add ${addr:arith}, $dst",
|
||||
[(set iPTR:$dst, ADDRri:$addr)]>;
|
||||
|
||||
let Defs = [ICC] in
|
||||
defm ADDCC : F3_12<"addcc", 0b010000, addc>;
|
||||
|
@ -105,19 +105,46 @@ static void replaceFI(MachineFunction &MF,
|
||||
// encode it.
|
||||
MI.getOperand(FIOperandNum).ChangeToRegister(FramePtr, false);
|
||||
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
|
||||
} else {
|
||||
// Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to
|
||||
// scavenge a register here instead of reserving G1 all of the time.
|
||||
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
|
||||
unsigned OffHi = (unsigned)Offset >> 10U;
|
||||
BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
|
||||
return;
|
||||
}
|
||||
|
||||
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
|
||||
|
||||
// FIXME: it would be better to scavenge a register here instead of
|
||||
// reserving G1 all of the time.
|
||||
if (Offset >= 0) {
|
||||
// Emit nonnegaive immediates with sethi + or.
|
||||
// sethi %hi(Offset), %g1
|
||||
// add %g1, %fp, %g1
|
||||
// Insert G1+%lo(offset) into the user.
|
||||
BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1)
|
||||
.addImm(HI22(Offset));
|
||||
|
||||
|
||||
// Emit G1 = G1 + I6
|
||||
BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
|
||||
.addReg(FramePtr);
|
||||
// Insert: G1+%lo(offset) into the user.
|
||||
MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false);
|
||||
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset & ((1 << 10)-1));
|
||||
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(LO10(Offset));
|
||||
return;
|
||||
}
|
||||
|
||||
// Emit Negative numbers with sethi + xor
|
||||
// sethi %hix(Offset), %g1
|
||||
// xor %g1, %lox(offset), %g1
|
||||
// add %g1, %fp, %g1
|
||||
// Insert: G1 + 0 into the user.
|
||||
BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1)
|
||||
.addImm(HIX22(Offset));
|
||||
BuildMI(*MI.getParent(), II, dl, TII.get(SP::XORri), SP::G1)
|
||||
.addReg(SP::G1).addImm(LOX10(Offset));
|
||||
|
||||
BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
|
||||
.addReg(FramePtr);
|
||||
// Insert: G1+%lo(offset) into the user.
|
||||
MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false);
|
||||
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1933,10 +1933,10 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
|
||||
// high 32 bits and just masks out low bits. We can skip it if so.
|
||||
if (HighOp.getOpcode() == ISD::AND &&
|
||||
HighOp.getOperand(1).getOpcode() == ISD::Constant) {
|
||||
ConstantSDNode *MaskNode = cast<ConstantSDNode>(HighOp.getOperand(1));
|
||||
uint64_t Mask = MaskNode->getZExtValue() | Masks[High];
|
||||
if ((Mask >> 32) == 0xffffffff)
|
||||
HighOp = HighOp.getOperand(0);
|
||||
SDValue HighOp0 = HighOp.getOperand(0);
|
||||
uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
|
||||
if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
|
||||
HighOp = HighOp0;
|
||||
}
|
||||
|
||||
// Take advantage of the fact that all GR32 operations only change the
|
||||
|
@ -495,17 +495,17 @@ private:
|
||||
X86Operand *ParseATTOperand();
|
||||
X86Operand *ParseIntelOperand();
|
||||
X86Operand *ParseIntelOffsetOfOperator();
|
||||
X86Operand *ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
|
||||
bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
|
||||
X86Operand *ParseIntelOperator(unsigned OpKind);
|
||||
X86Operand *ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
|
||||
X86Operand *ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc,
|
||||
unsigned Size);
|
||||
X86Operand *ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
|
||||
bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
|
||||
X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
|
||||
int64_t ImmDisp, unsigned Size);
|
||||
X86Operand *ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
|
||||
InlineAsmIdentifierInfo &Info,
|
||||
bool IsUnevaluatedOperand, SMLoc &End);
|
||||
bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
|
||||
InlineAsmIdentifierInfo &Info,
|
||||
bool IsUnevaluatedOperand, SMLoc &End);
|
||||
|
||||
X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
|
||||
|
||||
@ -1269,8 +1269,7 @@ RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
|
||||
}
|
||||
}
|
||||
|
||||
X86Operand *
|
||||
X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
|
||||
bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
|
||||
const AsmToken &Tok = Parser.getTok();
|
||||
|
||||
bool Done = false;
|
||||
@ -1292,7 +1291,7 @@ X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
|
||||
Done = true;
|
||||
break;
|
||||
}
|
||||
return ErrorOperand(Tok.getLoc(), "Unexpected token!");
|
||||
return Error(Tok.getLoc(), "unknown token in expression");
|
||||
}
|
||||
case AsmToken::EndOfStatement: {
|
||||
Done = true;
|
||||
@ -1311,18 +1310,18 @@ X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
|
||||
} else {
|
||||
if (!isParsingInlineAsm()) {
|
||||
if (getParser().parsePrimaryExpr(Val, End))
|
||||
return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
|
||||
return Error(Tok.getLoc(), "Unexpected identifier!");
|
||||
} else {
|
||||
InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
|
||||
if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
|
||||
/*Unevaluated*/ false, End))
|
||||
return Err;
|
||||
if (ParseIntelIdentifier(Val, Identifier, Info,
|
||||
/*Unevaluated=*/false, End))
|
||||
return true;
|
||||
}
|
||||
SM.onIdentifierExpr(Val, Identifier);
|
||||
UpdateLocLex = false;
|
||||
break;
|
||||
}
|
||||
return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
|
||||
return Error(Tok.getLoc(), "Unexpected identifier!");
|
||||
}
|
||||
case AsmToken::Integer:
|
||||
if (isParsingInlineAsm() && SM.getAddImmPrefix())
|
||||
@ -1340,14 +1339,14 @@ X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
|
||||
case AsmToken::RParen: SM.onRParen(); break;
|
||||
}
|
||||
if (SM.hadError())
|
||||
return ErrorOperand(Tok.getLoc(), "Unexpected token!");
|
||||
return Error(Tok.getLoc(), "unknown token in expression");
|
||||
|
||||
if (!Done && UpdateLocLex) {
|
||||
End = Tok.getLoc();
|
||||
Parser.Lex(); // Consume the token.
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
|
||||
@ -1364,8 +1363,8 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
|
||||
// may have already parsed an immediate displacement before the bracketed
|
||||
// expression.
|
||||
IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
|
||||
if (X86Operand *Err = ParseIntelExpression(SM, End))
|
||||
return Err;
|
||||
if (ParseIntelExpression(SM, End))
|
||||
return 0;
|
||||
|
||||
const MCExpr *Disp;
|
||||
if (const MCExpr *Sym = SM.getSym()) {
|
||||
@ -1383,8 +1382,8 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
|
||||
// Parse the dot operator (e.g., [ebx].foo.bar).
|
||||
if (Tok.getString().startswith(".")) {
|
||||
const MCExpr *NewDisp;
|
||||
if (X86Operand *Err = ParseIntelDotOperator(Disp, NewDisp))
|
||||
return Err;
|
||||
if (ParseIntelDotOperator(Disp, NewDisp))
|
||||
return 0;
|
||||
|
||||
End = Tok.getEndLoc();
|
||||
Parser.Lex(); // Eat the field.
|
||||
@ -1412,11 +1411,10 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
|
||||
}
|
||||
|
||||
// Inline assembly may use variable names with namespace alias qualifiers.
|
||||
X86Operand *X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
|
||||
StringRef &Identifier,
|
||||
InlineAsmIdentifierInfo &Info,
|
||||
bool IsUnevaluatedOperand,
|
||||
SMLoc &End) {
|
||||
bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
|
||||
StringRef &Identifier,
|
||||
InlineAsmIdentifierInfo &Info,
|
||||
bool IsUnevaluatedOperand, SMLoc &End) {
|
||||
assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
|
||||
Val = 0;
|
||||
|
||||
@ -1441,7 +1439,7 @@ X86Operand *X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
|
||||
MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
|
||||
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
|
||||
Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
/// \brief Parse intel style segment override.
|
||||
@ -1481,16 +1479,16 @@ X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg,
|
||||
SMLoc End;
|
||||
if (!isParsingInlineAsm()) {
|
||||
if (getParser().parsePrimaryExpr(Val, End))
|
||||
return ErrorOperand(Tok.getLoc(), "Unexpected token!");
|
||||
return ErrorOperand(Tok.getLoc(), "unknown token in expression");
|
||||
|
||||
return X86Operand::CreateMem(Val, Start, End, Size);
|
||||
}
|
||||
|
||||
InlineAsmIdentifierInfo Info;
|
||||
StringRef Identifier = Tok.getString();
|
||||
if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
|
||||
/*Unevaluated*/ false, End))
|
||||
return Err;
|
||||
if (ParseIntelIdentifier(Val, Identifier, Info,
|
||||
/*Unevaluated=*/false, End))
|
||||
return 0;
|
||||
return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
|
||||
/*Scale=*/1, Start, End, Size, Identifier, Info);
|
||||
}
|
||||
@ -1508,22 +1506,22 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start,
|
||||
const MCExpr *Val;
|
||||
if (!isParsingInlineAsm()) {
|
||||
if (getParser().parsePrimaryExpr(Val, End))
|
||||
return ErrorOperand(Tok.getLoc(), "Unexpected token!");
|
||||
return ErrorOperand(Tok.getLoc(), "unknown token in expression");
|
||||
|
||||
return X86Operand::CreateMem(Val, Start, End, Size);
|
||||
}
|
||||
|
||||
InlineAsmIdentifierInfo Info;
|
||||
StringRef Identifier = Tok.getString();
|
||||
if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
|
||||
/*Unevaluated*/ false, End))
|
||||
return Err;
|
||||
if (ParseIntelIdentifier(Val, Identifier, Info,
|
||||
/*Unevaluated=*/false, End))
|
||||
return 0;
|
||||
return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
|
||||
/*Scale=*/1, Start, End, Size, Identifier, Info);
|
||||
}
|
||||
|
||||
/// Parse the '.' operator.
|
||||
X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
|
||||
bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
|
||||
const MCExpr *&NewDisp) {
|
||||
const AsmToken &Tok = Parser.getTok();
|
||||
int64_t OrigDispVal, DotDispVal;
|
||||
@ -1532,7 +1530,7 @@ X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
|
||||
if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
|
||||
OrigDispVal = OrigDisp->getValue();
|
||||
else
|
||||
return ErrorOperand(Tok.getLoc(), "Non-constant offsets are not supported!");
|
||||
return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
|
||||
|
||||
// Drop the '.'.
|
||||
StringRef DotDispStr = Tok.getString().drop_front(1);
|
||||
@ -1547,10 +1545,10 @@ X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
|
||||
std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
|
||||
if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
|
||||
DotDisp))
|
||||
return ErrorOperand(Tok.getLoc(), "Unable to lookup field reference!");
|
||||
return Error(Tok.getLoc(), "Unable to lookup field reference!");
|
||||
DotDispVal = DotDisp;
|
||||
} else
|
||||
return ErrorOperand(Tok.getLoc(), "Unexpected token type!");
|
||||
return Error(Tok.getLoc(), "Unexpected token type!");
|
||||
|
||||
if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
|
||||
SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
|
||||
@ -1561,7 +1559,7 @@ X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
|
||||
}
|
||||
|
||||
NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Parse the 'offset' operator. This operator is used to specify the
|
||||
@ -1575,9 +1573,9 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
|
||||
InlineAsmIdentifierInfo Info;
|
||||
SMLoc Start = Tok.getLoc(), End;
|
||||
StringRef Identifier = Tok.getString();
|
||||
if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
|
||||
/*Unevaluated*/ false, End))
|
||||
return Err;
|
||||
if (ParseIntelIdentifier(Val, Identifier, Info,
|
||||
/*Unevaluated=*/false, End))
|
||||
return 0;
|
||||
|
||||
// Don't emit the offset operator.
|
||||
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
|
||||
@ -1611,9 +1609,12 @@ X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
|
||||
InlineAsmIdentifierInfo Info;
|
||||
SMLoc Start = Tok.getLoc(), End;
|
||||
StringRef Identifier = Tok.getString();
|
||||
if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
|
||||
/*Unevaluated*/ true, End))
|
||||
return Err;
|
||||
if (ParseIntelIdentifier(Val, Identifier, Info,
|
||||
/*Unevaluated=*/true, End))
|
||||
return 0;
|
||||
|
||||
if (!Info.OpDecl)
|
||||
return ErrorOperand(Start, "unable to lookup expression");
|
||||
|
||||
unsigned CVal = 0;
|
||||
switch(OpKind) {
|
||||
@ -1664,8 +1665,8 @@ X86Operand *X86AsmParser::ParseIntelOperand() {
|
||||
AsmToken StartTok = Tok;
|
||||
IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
|
||||
/*AddImmPrefix=*/false);
|
||||
if (X86Operand *Err = ParseIntelExpression(SM, End))
|
||||
return Err;
|
||||
if (ParseIntelExpression(SM, End))
|
||||
return 0;
|
||||
|
||||
int64_t Imm = SM.getImm();
|
||||
if (isParsingInlineAsm()) {
|
||||
|
@ -13120,19 +13120,27 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
|
||||
// fall through
|
||||
case MVT::v4i32:
|
||||
case MVT::v8i16: {
|
||||
// (sext (vzext x)) -> (vsext x)
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
SDValue Op00 = Op0.getOperand(0);
|
||||
SDValue Tmp1;
|
||||
// Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
|
||||
if (Op0.getOpcode() == ISD::BITCAST &&
|
||||
Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
|
||||
Op00.getOpcode() == ISD::VECTOR_SHUFFLE) {
|
||||
// (sext (vzext x)) -> (vsext x)
|
||||
Tmp1 = LowerVectorIntExtend(Op00, Subtarget, DAG);
|
||||
if (Tmp1.getNode()) {
|
||||
SDValue Tmp1Op0 = Tmp1.getOperand(0);
|
||||
assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
|
||||
"This optimization is invalid without a VZEXT.");
|
||||
return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
|
||||
if (Tmp1.getNode()) {
|
||||
EVT ExtraEltVT = ExtraVT.getVectorElementType();
|
||||
// This folding is only valid when the in-reg type is a vector of i8,
|
||||
// i16, or i32.
|
||||
if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 ||
|
||||
ExtraEltVT == MVT::i32) {
|
||||
SDValue Tmp1Op0 = Tmp1.getOperand(0);
|
||||
assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
|
||||
"This optimization is invalid without a VZEXT.");
|
||||
return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
|
||||
}
|
||||
Op0 = Tmp1;
|
||||
}
|
||||
}
|
||||
|
||||
// If the above didn't work, then just use Shift-Left + Shift-Right.
|
||||
|
@ -210,19 +210,20 @@ private:
|
||||
// Any two pointers in the same address space are equivalent, intptr_t and
|
||||
// pointers are equivalent. Otherwise, standard type equivalence rules apply.
|
||||
bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
|
||||
|
||||
PointerType *PTy1 = dyn_cast<PointerType>(Ty1);
|
||||
PointerType *PTy2 = dyn_cast<PointerType>(Ty2);
|
||||
|
||||
if (TD) {
|
||||
if (PTy1 && PTy1->getAddressSpace() == 0) Ty1 = TD->getIntPtrType(Ty1);
|
||||
if (PTy2 && PTy2->getAddressSpace() == 0) Ty2 = TD->getIntPtrType(Ty2);
|
||||
}
|
||||
|
||||
if (Ty1 == Ty2)
|
||||
return true;
|
||||
if (Ty1->getTypeID() != Ty2->getTypeID()) {
|
||||
if (TD) {
|
||||
|
||||
if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ty1))
|
||||
return true;
|
||||
|
||||
if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ty2))
|
||||
return true;
|
||||
}
|
||||
if (Ty1->getTypeID() != Ty2->getTypeID())
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (Ty1->getTypeID()) {
|
||||
default:
|
||||
@ -244,8 +245,7 @@ bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
|
||||
return true;
|
||||
|
||||
case Type::PointerTyID: {
|
||||
PointerType *PTy1 = cast<PointerType>(Ty1);
|
||||
PointerType *PTy2 = cast<PointerType>(Ty2);
|
||||
assert(PTy1 && PTy2 && "Both types must be pointers here.");
|
||||
return PTy1->getAddressSpace() == PTy2->getAddressSpace();
|
||||
}
|
||||
|
||||
|
@ -1198,11 +1198,16 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
|
||||
Type *AndTy = AndCST->getType(); // Type of the and.
|
||||
|
||||
// We can fold this as long as we can't shift unknown bits
|
||||
// into the mask. This can only happen with signed shift
|
||||
// rights, as they sign-extend.
|
||||
// into the mask. This can happen with signed shift
|
||||
// rights, as they sign-extend. With logical shifts,
|
||||
// we must still make sure the comparison is not signed
|
||||
// because we are effectively changing the
|
||||
// position of the sign bit (PR17827).
|
||||
// TODO: We can relax these constraints a bit more.
|
||||
if (ShAmt) {
|
||||
bool CanFold = Shift->isLogicalShift();
|
||||
if (!CanFold) {
|
||||
bool CanFold = false;
|
||||
unsigned ShiftOpcode = Shift->getOpcode();
|
||||
if (ShiftOpcode == Instruction::AShr) {
|
||||
// To test for the bad case of the signed shr, see if any
|
||||
// of the bits shifted in could be tested after the mask.
|
||||
uint32_t TyBits = Ty->getPrimitiveSizeInBits();
|
||||
@ -1212,6 +1217,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
|
||||
if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) &
|
||||
AndCST->getValue()) == 0)
|
||||
CanFold = true;
|
||||
} else if (ShiftOpcode == Instruction::Shl ||
|
||||
ShiftOpcode == Instruction::LShr) {
|
||||
CanFold = !ICI.isSigned();
|
||||
}
|
||||
|
||||
if (CanFold) {
|
||||
|
@ -432,7 +432,7 @@ namespace {
|
||||
bool Partial;
|
||||
|
||||
/// The current position in the sequence.
|
||||
Sequence Seq : 8;
|
||||
unsigned char Seq : 8;
|
||||
|
||||
/// Unidirectional information about the current sequence.
|
||||
RRInfo RRI;
|
||||
@ -498,7 +498,7 @@ namespace {
|
||||
}
|
||||
|
||||
Sequence GetSeq() const {
|
||||
return Seq;
|
||||
return static_cast<Sequence>(Seq);
|
||||
}
|
||||
|
||||
void ClearSequenceProgress() {
|
||||
@ -538,7 +538,8 @@ namespace {
|
||||
|
||||
void
|
||||
PtrState::Merge(const PtrState &Other, bool TopDown) {
|
||||
Seq = MergeSeqs(Seq, Other.Seq, TopDown);
|
||||
Seq = MergeSeqs(static_cast<Sequence>(Seq), static_cast<Sequence>(Other.Seq),
|
||||
TopDown);
|
||||
KnownPositiveRefCount &= Other.KnownPositiveRefCount;
|
||||
|
||||
// If we're not in a sequence (anymore), drop all associated state.
|
||||
|
@ -1537,6 +1537,15 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
|
||||
const SCEV *ExitCount = SE->getBackedgeTakenCount(OrigLoop);
|
||||
assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
|
||||
|
||||
// The exit count might have the type of i64 while the phi is i32. This can
|
||||
// happen if we have an induction variable that is sign extended before the
|
||||
// compare. The only way that we get a backedge taken count is that the
|
||||
// induction variable was signed and as such will not overflow. In such a case
|
||||
// truncation is legal.
|
||||
if (ExitCount->getType()->getPrimitiveSizeInBits() >
|
||||
IdxTy->getPrimitiveSizeInBits())
|
||||
ExitCount = SE->getTruncateOrNoop(ExitCount, IdxTy);
|
||||
|
||||
ExitCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy);
|
||||
// Get the total trip count from the count by adding 1.
|
||||
ExitCount = SE->getAddExpr(ExitCount,
|
||||
|
@ -520,6 +520,8 @@ private:
|
||||
|
||||
/// Holds all of the instructions that we gathered.
|
||||
SetVector<Instruction *> GatherSeq;
|
||||
/// A list of blocks that we are going to CSE.
|
||||
SmallSet<BasicBlock *, 8> CSEBlocks;
|
||||
|
||||
/// Numbers instructions in different blocks.
|
||||
DenseMap<BasicBlock *, BlockNumbering> BlocksNumbers;
|
||||
@ -562,10 +564,8 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
|
||||
UE = Scalar->use_end(); User != UE; ++User) {
|
||||
DEBUG(dbgs() << "SLP: Checking user:" << **User << ".\n");
|
||||
|
||||
bool Gathered = MustGather.count(*User);
|
||||
|
||||
// Skip in-tree scalars that become vectors.
|
||||
if (ScalarToTreeEntry.count(*User) && !Gathered) {
|
||||
if (ScalarToTreeEntry.count(*User)) {
|
||||
DEBUG(dbgs() << "SLP: \tInternal user will be removed:" <<
|
||||
**User << ".\n");
|
||||
int Idx = ScalarToTreeEntry[*User]; (void) Idx;
|
||||
@ -1274,6 +1274,7 @@ Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
|
||||
Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
|
||||
if (Instruction *Insrt = dyn_cast<Instruction>(Vec)) {
|
||||
GatherSeq.insert(Insrt);
|
||||
CSEBlocks.insert(Insrt->getParent());
|
||||
|
||||
// Add to our 'need-to-extract' list.
|
||||
if (ScalarToTreeEntry.count(VL[i])) {
|
||||
@ -1588,6 +1589,7 @@ Value *BoUpSLP::vectorizeTree() {
|
||||
if (PHINode *PN = dyn_cast<PHINode>(Vec)) {
|
||||
Builder.SetInsertPoint(PN->getParent()->getFirstInsertionPt());
|
||||
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
|
||||
CSEBlocks.insert(PN->getParent());
|
||||
User->replaceUsesOfWith(Scalar, Ex);
|
||||
} else if (isa<Instruction>(Vec)){
|
||||
if (PHINode *PH = dyn_cast<PHINode>(User)) {
|
||||
@ -1595,17 +1597,20 @@ Value *BoUpSLP::vectorizeTree() {
|
||||
if (PH->getIncomingValue(i) == Scalar) {
|
||||
Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
|
||||
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
|
||||
CSEBlocks.insert(PH->getIncomingBlock(i));
|
||||
PH->setOperand(i, Ex);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Builder.SetInsertPoint(cast<Instruction>(User));
|
||||
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
|
||||
CSEBlocks.insert(cast<Instruction>(User)->getParent());
|
||||
User->replaceUsesOfWith(Scalar, Ex);
|
||||
}
|
||||
} else {
|
||||
Builder.SetInsertPoint(F->getEntryBlock().begin());
|
||||
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
|
||||
CSEBlocks.insert(&F->getEntryBlock());
|
||||
User->replaceUsesOfWith(Scalar, Ex);
|
||||
}
|
||||
|
||||
@ -1631,8 +1636,6 @@ Value *BoUpSLP::vectorizeTree() {
|
||||
for (Value::use_iterator User = Scalar->use_begin(),
|
||||
UE = Scalar->use_end(); User != UE; ++User) {
|
||||
DEBUG(dbgs() << "SLP: \tvalidating user:" << **User << ".\n");
|
||||
assert(!MustGather.count(*User) &&
|
||||
"Replacing gathered value with undef");
|
||||
|
||||
assert((ScalarToTreeEntry.count(*User) ||
|
||||
// It is legal to replace the reduction users by undef.
|
||||
@ -1668,9 +1671,6 @@ public:
|
||||
void BoUpSLP::optimizeGatherSequence() {
|
||||
DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size()
|
||||
<< " gather sequences instructions.\n");
|
||||
// Keep a list of visited BBs to run CSE on. It is typically small.
|
||||
SmallPtrSet<BasicBlock *, 4> VisitedBBs;
|
||||
SmallVector<BasicBlock *, 4> CSEWorkList;
|
||||
// LICM InsertElementInst sequences.
|
||||
for (SetVector<Instruction *>::iterator it = GatherSeq.begin(),
|
||||
e = GatherSeq.end(); it != e; ++it) {
|
||||
@ -1679,9 +1679,6 @@ void BoUpSLP::optimizeGatherSequence() {
|
||||
if (!Insert)
|
||||
continue;
|
||||
|
||||
if (VisitedBBs.insert(Insert->getParent()))
|
||||
CSEWorkList.push_back(Insert->getParent());
|
||||
|
||||
// Check if this block is inside a loop.
|
||||
Loop *L = LI->getLoopFor(Insert->getParent());
|
||||
if (!L)
|
||||
@ -1708,6 +1705,7 @@ void BoUpSLP::optimizeGatherSequence() {
|
||||
|
||||
// Sort blocks by domination. This ensures we visit a block after all blocks
|
||||
// dominating it are visited.
|
||||
SmallVector<BasicBlock *, 8> CSEWorkList(CSEBlocks.begin(), CSEBlocks.end());
|
||||
std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(), DTCmp(DT));
|
||||
|
||||
// Perform O(N^2) search over the gather sequences and merge identical
|
||||
@ -1723,8 +1721,7 @@ void BoUpSLP::optimizeGatherSequence() {
|
||||
// For all instructions in blocks containing gather sequences:
|
||||
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
|
||||
Instruction *In = it++;
|
||||
if ((!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In)) ||
|
||||
!GatherSeq.count(In))
|
||||
if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
|
||||
continue;
|
||||
|
||||
// Check if we can replace this instruction with any of the
|
||||
@ -1746,6 +1743,8 @@ void BoUpSLP::optimizeGatherSequence() {
|
||||
}
|
||||
}
|
||||
}
|
||||
CSEBlocks.clear();
|
||||
GatherSeq.clear();
|
||||
}
|
||||
|
||||
/// The SLPVectorizer Pass.
|
||||
|
File diff suppressed because it is too large
Load Diff
222
external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-bsl.ll
vendored
Normal file
222
external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-bsl.ll
vendored
Normal file
@ -0,0 +1,222 @@
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
|
||||
|
||||
declare <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double>, <2 x double>, <2 x double>)
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
|
||||
|
||||
declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
|
||||
|
||||
declare <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
declare <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
|
||||
|
||||
declare <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
|
||||
declare <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>)
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>)
|
||||
|
||||
declare <1 x double> @llvm.arm.neon.vbsl.v1f64(<1 x double>, <1 x double>, <1 x double>)
|
||||
|
||||
declare <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float>, <2 x float>, <2 x float>)
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64>, <1 x i64>, <1 x i64>)
|
||||
|
||||
declare <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
|
||||
|
||||
define <8 x i8> @test_vbsl_s8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
|
||||
; CHECK-LABEL: test_vbsl_s8:
|
||||
; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
|
||||
ret <8 x i8> %vbsl.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vbsl_s16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
|
||||
; CHECK-LABEL: test_vbsl_s16:
|
||||
; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
|
||||
%0 = bitcast <4 x i16> %vbsl3.i to <8 x i8>
|
||||
ret <8 x i8> %0
|
||||
}
|
||||
|
||||
define <2 x i32> @test_vbsl_s32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
|
||||
; CHECK-LABEL: test_vbsl_s32:
|
||||
; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3)
|
||||
ret <2 x i32> %vbsl3.i
|
||||
}
|
||||
|
||||
define <1 x i64> @test_vbsl_s64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
|
||||
; CHECK-LABEL: test_vbsl_s64:
|
||||
; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3)
|
||||
ret <1 x i64> %vbsl3.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vbsl_u8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
|
||||
; CHECK-LABEL: test_vbsl_u8:
|
||||
; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
|
||||
ret <8 x i8> %vbsl.i
|
||||
}
|
||||
|
||||
define <4 x i16> @test_vbsl_u16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
|
||||
; CHECK-LABEL: test_vbsl_u16:
|
||||
; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
|
||||
ret <4 x i16> %vbsl3.i
|
||||
}
|
||||
|
||||
define <2 x i32> @test_vbsl_u32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) {
|
||||
; CHECK-LABEL: test_vbsl_u32:
|
||||
; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3)
|
||||
ret <2 x i32> %vbsl3.i
|
||||
}
|
||||
|
||||
define <1 x i64> @test_vbsl_u64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
|
||||
; CHECK-LABEL: test_vbsl_u64:
|
||||
; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3)
|
||||
ret <1 x i64> %vbsl3.i
|
||||
}
|
||||
|
||||
define <2 x float> @test_vbsl_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) {
|
||||
; CHECK-LABEL: test_vbsl_f32:
|
||||
; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vbsl3.i = tail call <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3)
|
||||
ret <2 x float> %vbsl3.i
|
||||
}
|
||||
|
||||
define <1 x double> @test_vbsl_f64(<1 x i64> %v1, <1 x double> %v2, <1 x double> %v3) {
|
||||
; CHECK-LABEL: test_vbsl_f64:
|
||||
; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vbsl.i = bitcast <1 x i64> %v1 to <1 x double>
|
||||
%vbsl3.i = tail call <1 x double> @llvm.arm.neon.vbsl.v1f64(<1 x double> %vbsl.i, <1 x double> %v2, <1 x double> %v3)
|
||||
ret <1 x double> %vbsl3.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vbsl_p8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) {
|
||||
; CHECK-LABEL: test_vbsl_p8:
|
||||
; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3)
|
||||
ret <8 x i8> %vbsl.i
|
||||
}
|
||||
|
||||
define <4 x i16> @test_vbsl_p16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) {
|
||||
; CHECK-LABEL: test_vbsl_p16:
|
||||
; CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3)
|
||||
ret <4 x i16> %vbsl3.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vbslq_s8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
|
||||
; CHECK-LABEL: test_vbslq_s8:
|
||||
; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
|
||||
ret <16 x i8> %vbsl.i
|
||||
}
|
||||
|
||||
define <8 x i16> @test_vbslq_s16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
|
||||
; CHECK-LABEL: test_vbslq_s16:
|
||||
; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
|
||||
ret <8 x i16> %vbsl3.i
|
||||
}
|
||||
|
||||
define <4 x i32> @test_vbslq_s32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
|
||||
; CHECK-LABEL: test_vbslq_s32:
|
||||
; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3)
|
||||
ret <4 x i32> %vbsl3.i
|
||||
}
|
||||
|
||||
define <2 x i64> @test_vbslq_s64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) {
|
||||
; CHECK-LABEL: test_vbslq_s64:
|
||||
; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3)
|
||||
ret <2 x i64> %vbsl3.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vbslq_u8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
|
||||
; CHECK-LABEL: test_vbslq_u8:
|
||||
; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
|
||||
ret <16 x i8> %vbsl.i
|
||||
}
|
||||
|
||||
define <8 x i16> @test_vbslq_u16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
|
||||
; CHECK-LABEL: test_vbslq_u16:
|
||||
; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
|
||||
ret <8 x i16> %vbsl3.i
|
||||
}
|
||||
|
||||
define <4 x i32> @test_vbslq_u32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
|
||||
; CHECK-LABEL: test_vbslq_u32:
|
||||
; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3)
|
||||
ret <4 x i32> %vbsl3.i
|
||||
}
|
||||
|
||||
define <2 x i64> @test_vbslq_u64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) {
|
||||
; CHECK-LABEL: test_vbslq_u64:
|
||||
; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3)
|
||||
ret <2 x i64> %vbsl3.i
|
||||
}
|
||||
|
||||
define <4 x float> @test_vbslq_f32(<4 x i32> %v1, <4 x float> %v2, <4 x float> %v3) {
|
||||
; CHECK-LABEL: test_vbslq_f32:
|
||||
; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%vbsl.i = bitcast <4 x i32> %v1 to <4 x float>
|
||||
%vbsl3.i = tail call <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float> %vbsl.i, <4 x float> %v2, <4 x float> %v3)
|
||||
ret <4 x float> %vbsl3.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vbslq_p8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) {
|
||||
; CHECK-LABEL: test_vbslq_p8:
|
||||
; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3)
|
||||
ret <16 x i8> %vbsl.i
|
||||
}
|
||||
|
||||
define <8 x i16> @test_vbslq_p16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) {
|
||||
; CHECK-LABEL: test_vbslq_p16:
|
||||
; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3)
|
||||
ret <8 x i16> %vbsl3.i
|
||||
}
|
||||
|
||||
define <2 x double> @test_vbslq_f64(<2 x i64> %v1, <2 x double> %v2, <2 x double> %v3) {
|
||||
; CHECK-LABEL: test_vbslq_f64:
|
||||
; CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%vbsl.i = bitcast <2 x i64> %v1 to <2 x double>
|
||||
%vbsl3.i = tail call <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double> %vbsl.i, <2 x double> %v2, <2 x double> %v3)
|
||||
ret <2 x double> %vbsl3.i
|
||||
}
|
||||
|
@ -11,3 +11,14 @@ entry:
|
||||
ret <2 x float> %add
|
||||
}
|
||||
|
||||
define <4 x i32> @test_vshrn_not_match(<2 x i32> %a, <2 x i64> %b) {
|
||||
; CHECK: test_vshrn_not_match
|
||||
; CHECK-NOT: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #35
|
||||
%1 = bitcast <2 x i32> %a to <1 x i64>
|
||||
%2 = ashr <2 x i64> %b, <i64 35, i64 35>
|
||||
%vshrn_n = trunc <2 x i64> %2 to <2 x i32>
|
||||
%3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
|
||||
%shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
|
||||
%4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
|
||||
ret <4 x i32> %4
|
||||
}
|
@ -78,3 +78,11 @@ define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) {
|
||||
ret <1 x i64> %shuffle.i
|
||||
}
|
||||
|
||||
define <1 x i64> @test_vector_copy_dup_dv2D(<1 x i64> %a, <2 x i64> %c) {
|
||||
;CHECK: test_vector_copy_dup_dv2D
|
||||
;CHECK: dup {{d[0-31]+}}, {{v[0-31]+}}.d[1]
|
||||
%vget_lane = extractelement <2 x i64> %c, i32 1
|
||||
%vset_lane = insertelement <1 x i64> undef, i64 %vget_lane, i32 0
|
||||
ret <1 x i64> %vset_lane
|
||||
}
|
||||
|
||||
|
@ -90,10 +90,10 @@ declare double @llvm.aarch64.neon.vcvtf64.n.u64(<1 x i64>, i32)
|
||||
|
||||
define i32 @test_vcvts_n_s32_f32(float %a) {
|
||||
; CHECK: test_vcvts_n_s32_f32
|
||||
; CHECK: fcvtzs {{s[0-9]+}}, {{s[0-9]+}}, #0
|
||||
; CHECK: fcvtzs {{s[0-9]+}}, {{s[0-9]+}}, #1
|
||||
entry:
|
||||
%fcvtzs = insertelement <1 x float> undef, float %a, i32 0
|
||||
%fcvtzs1 = call <1 x i32> @llvm.aarch64.neon.vcvts.n.s32.f32(<1 x float> %fcvtzs, i32 0)
|
||||
%fcvtzs1 = call <1 x i32> @llvm.aarch64.neon.vcvts.n.s32.f32(<1 x float> %fcvtzs, i32 1)
|
||||
%0 = extractelement <1 x i32> %fcvtzs1, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
@ -102,10 +102,10 @@ declare <1 x i32> @llvm.aarch64.neon.vcvts.n.s32.f32(<1 x float>, i32)
|
||||
|
||||
define i64 @test_vcvtd_n_s64_f64(double %a) {
|
||||
; CHECK: test_vcvtd_n_s64_f64
|
||||
; CHECK: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}, #0
|
||||
; CHECK: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}, #1
|
||||
entry:
|
||||
%fcvtzs = insertelement <1 x double> undef, double %a, i32 0
|
||||
%fcvtzs1 = call <1 x i64> @llvm.aarch64.neon.vcvtd.n.s64.f64(<1 x double> %fcvtzs, i32 0)
|
||||
%fcvtzs1 = call <1 x i64> @llvm.aarch64.neon.vcvtd.n.s64.f64(<1 x double> %fcvtzs, i32 1)
|
||||
%0 = extractelement <1 x i64> %fcvtzs1, i32 0
|
||||
ret i64 %0
|
||||
}
|
||||
@ -114,10 +114,10 @@ declare <1 x i64> @llvm.aarch64.neon.vcvtd.n.s64.f64(<1 x double>, i32)
|
||||
|
||||
define i32 @test_vcvts_n_u32_f32(float %a) {
|
||||
; CHECK: test_vcvts_n_u32_f32
|
||||
; CHECK: fcvtzu {{s[0-9]+}}, {{s[0-9]+}}, #0
|
||||
; CHECK: fcvtzu {{s[0-9]+}}, {{s[0-9]+}}, #32
|
||||
entry:
|
||||
%fcvtzu = insertelement <1 x float> undef, float %a, i32 0
|
||||
%fcvtzu1 = call <1 x i32> @llvm.aarch64.neon.vcvts.n.u32.f32(<1 x float> %fcvtzu, i32 0)
|
||||
%fcvtzu1 = call <1 x i32> @llvm.aarch64.neon.vcvts.n.u32.f32(<1 x float> %fcvtzu, i32 32)
|
||||
%0 = extractelement <1 x i32> %fcvtzu1, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
@ -126,10 +126,10 @@ declare <1 x i32> @llvm.aarch64.neon.vcvts.n.u32.f32(<1 x float>, i32)
|
||||
|
||||
define i64 @test_vcvtd_n_u64_f64(double %a) {
|
||||
; CHECK: test_vcvtd_n_u64_f64
|
||||
; CHECK: fcvtzu {{d[0-9]+}}, {{d[0-9]+}}, #0
|
||||
; CHECK: fcvtzu {{d[0-9]+}}, {{d[0-9]+}}, #64
|
||||
entry:
|
||||
%fcvtzu = insertelement <1 x double> undef, double %a, i32 0
|
||||
%fcvtzu1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtd.n.u64.f64(<1 x double> %fcvtzu, i32 0)
|
||||
%fcvtzu1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtd.n.u64.f64(<1 x double> %fcvtzu, i32 64)
|
||||
%0 = extractelement <1 x i64> %fcvtzu1, i32 0
|
||||
ret i64 %0
|
||||
}
|
||||
|
26
external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-fabd.ll
vendored
Normal file
26
external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-fabd.ll
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
|
||||
|
||||
define float @test_vabds_f32(float %a, float %b) {
|
||||
; CHECK-LABEL: test_vabds_f32
|
||||
; CHECK: fabd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
|
||||
entry:
|
||||
%vabd.i = insertelement <1 x float> undef, float %a, i32 0
|
||||
%vabd1.i = insertelement <1 x float> undef, float %b, i32 0
|
||||
%vabd2.i = call <1 x float> @llvm.aarch64.neon.vabd.v1f32(<1 x float> %vabd.i, <1 x float> %vabd1.i)
|
||||
%0 = extractelement <1 x float> %vabd2.i, i32 0
|
||||
ret float %0
|
||||
}
|
||||
|
||||
define double @test_vabdd_f64(double %a, double %b) {
|
||||
; CHECK-LABEL: test_vabdd_f64
|
||||
; CHECK: fabd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
|
||||
entry:
|
||||
%vabd.i = insertelement <1 x double> undef, double %a, i32 0
|
||||
%vabd1.i = insertelement <1 x double> undef, double %b, i32 0
|
||||
%vabd2.i = call <1 x double> @llvm.aarch64.neon.vabd.v1f64(<1 x double> %vabd.i, <1 x double> %vabd1.i)
|
||||
%0 = extractelement <1 x double> %vabd2.i, i32 0
|
||||
ret double %0
|
||||
}
|
||||
|
||||
declare <1 x double> @llvm.aarch64.neon.vabd.v1f64(<1 x double>, <1 x double>)
|
||||
declare <1 x float> @llvm.aarch64.neon.vabd.v1f32(<1 x float>, <1 x float>)
|
255
external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-fcvt.ll
vendored
Normal file
255
external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-fcvt.ll
vendored
Normal file
@ -0,0 +1,255 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
;; Scalar Floating-point Convert
|
||||
|
||||
define float @test_vcvtxn(double %a) {
|
||||
; CHECK: test_vcvtxn
|
||||
; CHECK: fcvtxn {{s[0-9]}}, {{d[0-9]}}
|
||||
entry:
|
||||
%vcvtf.i = insertelement <1 x double> undef, double %a, i32 0
|
||||
%vcvtf1.i = tail call <1 x float> @llvm.aarch64.neon.fcvtxn.v1f32.v1f64(<1 x double> %vcvtf.i)
|
||||
%0 = extractelement <1 x float> %vcvtf1.i, i32 0
|
||||
ret float %0
|
||||
}
|
||||
|
||||
declare <1 x float> @llvm.aarch64.neon.fcvtxn.v1f32.v1f64(<1 x double>)
|
||||
|
||||
define i32 @test_vcvtass(float %a) {
|
||||
; CHECK: test_vcvtass
|
||||
; CHECK: fcvtas {{s[0-9]}}, {{s[0-9]}}
|
||||
entry:
|
||||
%vcvtas.i = insertelement <1 x float> undef, float %a, i32 0
|
||||
%vcvtas1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.v1f32(<1 x float> %vcvtas.i)
|
||||
%0 = extractelement <1 x i32> %vcvtas1.i, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
declare <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.v1f32(<1 x float>)
|
||||
|
||||
define i64 @test_test_vcvtasd(double %a) {
|
||||
; CHECK: test_test_vcvtasd
|
||||
; CHECK: fcvtas {{d[0-9]}}, {{d[0-9]}}
|
||||
entry:
|
||||
%vcvtas.i = insertelement <1 x double> undef, double %a, i32 0
|
||||
%vcvtas1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %vcvtas.i)
|
||||
%0 = extractelement <1 x i64> %vcvtas1.i, i32 0
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double>)
|
||||
|
||||
define i32 @test_vcvtaus(float %a) {
|
||||
; CHECK: test_vcvtaus
|
||||
; CHECK: fcvtau {{s[0-9]}}, {{s[0-9]}}
|
||||
entry:
|
||||
%vcvtau.i = insertelement <1 x float> undef, float %a, i32 0
|
||||
%vcvtau1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.v1f32(<1 x float> %vcvtau.i)
|
||||
%0 = extractelement <1 x i32> %vcvtau1.i, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
declare <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.v1f32(<1 x float>)
|
||||
|
||||
define i64 @test_vcvtaud(double %a) {
|
||||
; CHECK: test_vcvtaud
|
||||
; CHECK: fcvtau {{d[0-9]}}, {{d[0-9]}}
|
||||
entry:
|
||||
%vcvtau.i = insertelement <1 x double> undef, double %a, i32 0
|
||||
%vcvtau1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %vcvtau.i)
|
||||
%0 = extractelement <1 x i64> %vcvtau1.i, i32 0
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double>)
|
||||
|
||||
define i32 @test_vcvtmss(float %a) {
|
||||
; CHECK: test_vcvtmss
|
||||
; CHECK: fcvtms {{s[0-9]}}, {{s[0-9]}}
|
||||
entry:
|
||||
%vcvtms.i = insertelement <1 x float> undef, float %a, i32 0
|
||||
%vcvtms1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.v1f32(<1 x float> %vcvtms.i)
|
||||
%0 = extractelement <1 x i32> %vcvtms1.i, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
declare <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.v1f32(<1 x float>)
|
||||
|
||||
define i64 @test_vcvtmd_s64_f64(double %a) {
|
||||
; CHECK: test_vcvtmd_s64_f64
|
||||
; CHECK: fcvtms {{d[0-9]}}, {{d[0-9]}}
|
||||
entry:
|
||||
%vcvtms.i = insertelement <1 x double> undef, double %a, i32 0
|
||||
%vcvtms1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %vcvtms.i)
|
||||
%0 = extractelement <1 x i64> %vcvtms1.i, i32 0
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double>)
|
||||
|
||||
define i32 @test_vcvtmus(float %a) {
|
||||
; CHECK: test_vcvtmus
|
||||
; CHECK: fcvtmu {{s[0-9]}}, {{s[0-9]}}
|
||||
entry:
|
||||
%vcvtmu.i = insertelement <1 x float> undef, float %a, i32 0
|
||||
%vcvtmu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.v1f32(<1 x float> %vcvtmu.i)
|
||||
%0 = extractelement <1 x i32> %vcvtmu1.i, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
declare <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.v1f32(<1 x float>)
|
||||
|
||||
define i64 @test_vcvtmud(double %a) {
|
||||
; CHECK: test_vcvtmud
|
||||
; CHECK: fcvtmu {{d[0-9]}}, {{d[0-9]}}
|
||||
entry:
|
||||
%vcvtmu.i = insertelement <1 x double> undef, double %a, i32 0
|
||||
%vcvtmu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %vcvtmu.i)
|
||||
%0 = extractelement <1 x i64> %vcvtmu1.i, i32 0
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double>)
|
||||
|
||||
define i32 @test_vcvtnss(float %a) {
|
||||
; CHECK: test_vcvtnss
|
||||
; CHECK: fcvtns {{s[0-9]}}, {{s[0-9]}}
|
||||
entry:
|
||||
%vcvtns.i = insertelement <1 x float> undef, float %a, i32 0
|
||||
%vcvtns1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.v1f32(<1 x float> %vcvtns.i)
|
||||
%0 = extractelement <1 x i32> %vcvtns1.i, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
declare <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.v1f32(<1 x float>)
|
||||
|
||||
define i64 @test_vcvtnd_s64_f64(double %a) {
|
||||
; CHECK: test_vcvtnd_s64_f64
|
||||
; CHECK: fcvtns {{d[0-9]}}, {{d[0-9]}}
|
||||
entry:
|
||||
%vcvtns.i = insertelement <1 x double> undef, double %a, i32 0
|
||||
%vcvtns1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %vcvtns.i)
|
||||
%0 = extractelement <1 x i64> %vcvtns1.i, i32 0
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double>)
|
||||
|
||||
define i32 @test_vcvtnus(float %a) {
|
||||
; CHECK: test_vcvtnus
|
||||
; CHECK: fcvtnu {{s[0-9]}}, {{s[0-9]}}
|
||||
entry:
|
||||
%vcvtnu.i = insertelement <1 x float> undef, float %a, i32 0
|
||||
%vcvtnu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.v1f32(<1 x float> %vcvtnu.i)
|
||||
%0 = extractelement <1 x i32> %vcvtnu1.i, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
declare <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.v1f32(<1 x float>)
|
||||
|
||||
define i64 @test_vcvtnud(double %a) {
|
||||
; CHECK: test_vcvtnud
|
||||
; CHECK: fcvtnu {{d[0-9]}}, {{d[0-9]}}
|
||||
entry:
|
||||
%vcvtnu.i = insertelement <1 x double> undef, double %a, i32 0
|
||||
%vcvtnu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %vcvtnu.i)
|
||||
%0 = extractelement <1 x i64> %vcvtnu1.i, i32 0
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double>)
|
||||
|
||||
define i32 @test_vcvtpss(float %a) {
|
||||
; CHECK: test_vcvtpss
|
||||
; CHECK: fcvtps {{s[0-9]}}, {{s[0-9]}}
|
||||
entry:
|
||||
%vcvtps.i = insertelement <1 x float> undef, float %a, i32 0
|
||||
%vcvtps1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.v1f32(<1 x float> %vcvtps.i)
|
||||
%0 = extractelement <1 x i32> %vcvtps1.i, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
declare <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.v1f32(<1 x float>)
|
||||
|
||||
define i64 @test_vcvtpd_s64_f64(double %a) {
|
||||
; CHECK: test_vcvtpd_s64_f64
|
||||
; CHECK: fcvtps {{d[0-9]}}, {{d[0-9]}}
|
||||
entry:
|
||||
%vcvtps.i = insertelement <1 x double> undef, double %a, i32 0
|
||||
%vcvtps1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %vcvtps.i)
|
||||
%0 = extractelement <1 x i64> %vcvtps1.i, i32 0
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double>)
|
||||
|
||||
define i32 @test_vcvtpus(float %a) {
|
||||
; CHECK: test_vcvtpus
|
||||
; CHECK: fcvtpu {{s[0-9]}}, {{s[0-9]}}
|
||||
entry:
|
||||
%vcvtpu.i = insertelement <1 x float> undef, float %a, i32 0
|
||||
%vcvtpu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.v1f32(<1 x float> %vcvtpu.i)
|
||||
%0 = extractelement <1 x i32> %vcvtpu1.i, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
declare <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.v1f32(<1 x float>)
|
||||
|
||||
define i64 @test_vcvtpud(double %a) {
|
||||
; CHECK: test_vcvtpud
|
||||
; CHECK: fcvtpu {{d[0-9]}}, {{d[0-9]}}
|
||||
entry:
|
||||
%vcvtpu.i = insertelement <1 x double> undef, double %a, i32 0
|
||||
%vcvtpu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %vcvtpu.i)
|
||||
%0 = extractelement <1 x i64> %vcvtpu1.i, i32 0
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double>)
|
||||
|
||||
define i32 @test_vcvtss(float %a) {
|
||||
; CHECK: test_vcvtss
|
||||
; CHECK: fcvtzs {{s[0-9]}}, {{s[0-9]}}
|
||||
entry:
|
||||
%vcvtzs.i = insertelement <1 x float> undef, float %a, i32 0
|
||||
%vcvtzs1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.v1f32(<1 x float> %vcvtzs.i)
|
||||
%0 = extractelement <1 x i32> %vcvtzs1.i, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
declare <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.v1f32(<1 x float>)
|
||||
|
||||
define i64 @test_vcvtd_s64_f64(double %a) {
|
||||
; CHECK: test_vcvtd_s64_f64
|
||||
; CHECK: fcvtzs {{d[0-9]}}, {{d[0-9]}}
|
||||
entry:
|
||||
%vcvzs.i = insertelement <1 x double> undef, double %a, i32 0
|
||||
%vcvzs1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %vcvzs.i)
|
||||
%0 = extractelement <1 x i64> %vcvzs1.i, i32 0
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double>)
|
||||
|
||||
define i32 @test_vcvtus(float %a) {
|
||||
; CHECK: test_vcvtus
|
||||
; CHECK: fcvtzu {{s[0-9]}}, {{s[0-9]}}
|
||||
entry:
|
||||
%vcvtzu.i = insertelement <1 x float> undef, float %a, i32 0
|
||||
%vcvtzu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.v1f32(<1 x float> %vcvtzu.i)
|
||||
%0 = extractelement <1 x i32> %vcvtzu1.i, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
declare <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.v1f32(<1 x float>)
|
||||
|
||||
define i64 @test_vcvtud(double %a) {
|
||||
; CHECK: test_vcvtud
|
||||
; CHECK: fcvtzu {{d[0-9]}}, {{d[0-9]}}
|
||||
entry:
|
||||
%vcvtzu.i = insertelement <1 x double> undef, double %a, i32 0
|
||||
%vcvtzu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %vcvtzu.i)
|
||||
%0 = extractelement <1 x i64> %vcvtzu1.i, i32 0
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double>)
|
@ -316,10 +316,10 @@ entry:
|
||||
|
||||
define i8 @test_vqshrnh_n_s16(i16 %a) {
|
||||
; CHECK: test_vqshrnh_n_s16
|
||||
; CHECK: sqshrn {{b[0-9]+}}, {{h[0-9]+}}, #15
|
||||
; CHECK: sqshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
|
||||
entry:
|
||||
%vsqshrn = insertelement <1 x i16> undef, i16 %a, i32 0
|
||||
%vsqshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16> %vsqshrn, i32 15)
|
||||
%vsqshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16> %vsqshrn, i32 8)
|
||||
%0 = extractelement <1 x i8> %vsqshrn1, i32 0
|
||||
ret i8 %0
|
||||
}
|
||||
@ -328,10 +328,10 @@ declare <1 x i8> @llvm.aarch64.neon.vsqshrn.v1i8(<1 x i16>, i32)
|
||||
|
||||
define i16 @test_vqshrns_n_s32(i32 %a) {
|
||||
; CHECK: test_vqshrns_n_s32
|
||||
; CHECK: sqshrn {{h[0-9]+}}, {{s[0-9]+}}, #31
|
||||
; CHECK: sqshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
|
||||
entry:
|
||||
%vsqshrn = insertelement <1 x i32> undef, i32 %a, i32 0
|
||||
%vsqshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32> %vsqshrn, i32 31)
|
||||
%vsqshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32> %vsqshrn, i32 16)
|
||||
%0 = extractelement <1 x i16> %vsqshrn1, i32 0
|
||||
ret i16 %0
|
||||
}
|
||||
@ -340,10 +340,10 @@ declare <1 x i16> @llvm.aarch64.neon.vsqshrn.v1i16(<1 x i32>, i32)
|
||||
|
||||
define i32 @test_vqshrnd_n_s64(i64 %a) {
|
||||
; CHECK: test_vqshrnd_n_s64
|
||||
; CHECK: sqshrn {{s[0-9]+}}, {{d[0-9]+}}, #63
|
||||
; CHECK: sqshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
|
||||
entry:
|
||||
%vsqshrn = insertelement <1 x i64> undef, i64 %a, i32 0
|
||||
%vsqshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64> %vsqshrn, i32 63)
|
||||
%vsqshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64> %vsqshrn, i32 32)
|
||||
%0 = extractelement <1 x i32> %vsqshrn1, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
@ -352,10 +352,10 @@ declare <1 x i32> @llvm.aarch64.neon.vsqshrn.v1i32(<1 x i64>, i32)
|
||||
|
||||
define i8 @test_vqshrnh_n_u16(i16 %a) {
|
||||
; CHECK: test_vqshrnh_n_u16
|
||||
; CHECK: uqshrn {{b[0-9]+}}, {{h[0-9]+}}, #15
|
||||
; CHECK: uqshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
|
||||
entry:
|
||||
%vuqshrn = insertelement <1 x i16> undef, i16 %a, i32 0
|
||||
%vuqshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16> %vuqshrn, i32 15)
|
||||
%vuqshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16> %vuqshrn, i32 8)
|
||||
%0 = extractelement <1 x i8> %vuqshrn1, i32 0
|
||||
ret i8 %0
|
||||
}
|
||||
@ -364,10 +364,10 @@ declare <1 x i8> @llvm.aarch64.neon.vuqshrn.v1i8(<1 x i16>, i32)
|
||||
|
||||
define i16 @test_vqshrns_n_u32(i32 %a) {
|
||||
; CHECK: test_vqshrns_n_u32
|
||||
; CHECK: uqshrn {{h[0-9]+}}, {{s[0-9]+}}, #31
|
||||
; CHECK: uqshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
|
||||
entry:
|
||||
%vuqshrn = insertelement <1 x i32> undef, i32 %a, i32 0
|
||||
%vuqshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32> %vuqshrn, i32 31)
|
||||
%vuqshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32> %vuqshrn, i32 16)
|
||||
%0 = extractelement <1 x i16> %vuqshrn1, i32 0
|
||||
ret i16 %0
|
||||
}
|
||||
@ -376,10 +376,10 @@ declare <1 x i16> @llvm.aarch64.neon.vuqshrn.v1i16(<1 x i32>, i32)
|
||||
|
||||
define i32 @test_vqshrnd_n_u64(i64 %a) {
|
||||
; CHECK: test_vqshrnd_n_u64
|
||||
; CHECK: uqshrn {{s[0-9]+}}, {{d[0-9]+}}, #63
|
||||
; CHECK: uqshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
|
||||
entry:
|
||||
%vuqshrn = insertelement <1 x i64> undef, i64 %a, i32 0
|
||||
%vuqshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64> %vuqshrn, i32 63)
|
||||
%vuqshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64> %vuqshrn, i32 32)
|
||||
%0 = extractelement <1 x i32> %vuqshrn1, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
@ -388,10 +388,10 @@ declare <1 x i32> @llvm.aarch64.neon.vuqshrn.v1i32(<1 x i64>, i32)
|
||||
|
||||
define i8 @test_vqrshrnh_n_s16(i16 %a) {
|
||||
; CHECK: test_vqrshrnh_n_s16
|
||||
; CHECK: sqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #15
|
||||
; CHECK: sqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
|
||||
entry:
|
||||
%vsqrshrn = insertelement <1 x i16> undef, i16 %a, i32 0
|
||||
%vsqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16> %vsqrshrn, i32 15)
|
||||
%vsqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16> %vsqrshrn, i32 8)
|
||||
%0 = extractelement <1 x i8> %vsqrshrn1, i32 0
|
||||
ret i8 %0
|
||||
}
|
||||
@ -400,10 +400,10 @@ declare <1 x i8> @llvm.aarch64.neon.vsqrshrn.v1i8(<1 x i16>, i32)
|
||||
|
||||
define i16 @test_vqrshrns_n_s32(i32 %a) {
|
||||
; CHECK: test_vqrshrns_n_s32
|
||||
; CHECK: sqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #31
|
||||
; CHECK: sqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
|
||||
entry:
|
||||
%vsqrshrn = insertelement <1 x i32> undef, i32 %a, i32 0
|
||||
%vsqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32> %vsqrshrn, i32 31)
|
||||
%vsqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32> %vsqrshrn, i32 16)
|
||||
%0 = extractelement <1 x i16> %vsqrshrn1, i32 0
|
||||
ret i16 %0
|
||||
}
|
||||
@ -412,10 +412,10 @@ declare <1 x i16> @llvm.aarch64.neon.vsqrshrn.v1i16(<1 x i32>, i32)
|
||||
|
||||
define i32 @test_vqrshrnd_n_s64(i64 %a) {
|
||||
; CHECK: test_vqrshrnd_n_s64
|
||||
; CHECK: sqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #63
|
||||
; CHECK: sqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
|
||||
entry:
|
||||
%vsqrshrn = insertelement <1 x i64> undef, i64 %a, i32 0
|
||||
%vsqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64> %vsqrshrn, i32 63)
|
||||
%vsqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64> %vsqrshrn, i32 32)
|
||||
%0 = extractelement <1 x i32> %vsqrshrn1, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
@ -424,10 +424,10 @@ declare <1 x i32> @llvm.aarch64.neon.vsqrshrn.v1i32(<1 x i64>, i32)
|
||||
|
||||
define i8 @test_vqrshrnh_n_u16(i16 %a) {
|
||||
; CHECK: test_vqrshrnh_n_u16
|
||||
; CHECK: uqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #15
|
||||
; CHECK: uqrshrn {{b[0-9]+}}, {{h[0-9]+}}, #8
|
||||
entry:
|
||||
%vuqrshrn = insertelement <1 x i16> undef, i16 %a, i32 0
|
||||
%vuqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16> %vuqrshrn, i32 15)
|
||||
%vuqrshrn1 = call <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16> %vuqrshrn, i32 8)
|
||||
%0 = extractelement <1 x i8> %vuqrshrn1, i32 0
|
||||
ret i8 %0
|
||||
}
|
||||
@ -436,10 +436,10 @@ declare <1 x i8> @llvm.aarch64.neon.vuqrshrn.v1i8(<1 x i16>, i32)
|
||||
|
||||
define i16 @test_vqrshrns_n_u32(i32 %a) {
|
||||
; CHECK: test_vqrshrns_n_u32
|
||||
; CHECK: uqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #31
|
||||
; CHECK: uqrshrn {{h[0-9]+}}, {{s[0-9]+}}, #16
|
||||
entry:
|
||||
%vuqrshrn = insertelement <1 x i32> undef, i32 %a, i32 0
|
||||
%vuqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %vuqrshrn, i32 31)
|
||||
%vuqrshrn1 = call <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32> %vuqrshrn, i32 16)
|
||||
%0 = extractelement <1 x i16> %vuqrshrn1, i32 0
|
||||
ret i16 %0
|
||||
}
|
||||
@ -448,10 +448,10 @@ declare <1 x i16> @llvm.aarch64.neon.vuqrshrn.v1i16(<1 x i32>, i32)
|
||||
|
||||
define i32 @test_vqrshrnd_n_u64(i64 %a) {
|
||||
; CHECK: test_vqrshrnd_n_u64
|
||||
; CHECK: uqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #63
|
||||
; CHECK: uqrshrn {{s[0-9]+}}, {{d[0-9]+}}, #32
|
||||
entry:
|
||||
%vuqrshrn = insertelement <1 x i64> undef, i64 %a, i32 0
|
||||
%vuqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64> %vuqrshrn, i32 63)
|
||||
%vuqrshrn1 = call <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64> %vuqrshrn, i32 32)
|
||||
%0 = extractelement <1 x i32> %vuqrshrn1, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
@ -460,10 +460,10 @@ declare <1 x i32> @llvm.aarch64.neon.vuqrshrn.v1i32(<1 x i64>, i32)
|
||||
|
||||
define i8 @test_vqshrunh_n_s16(i16 %a) {
|
||||
; CHECK: test_vqshrunh_n_s16
|
||||
; CHECK: sqshrun {{b[0-9]+}}, {{h[0-9]+}}, #15
|
||||
; CHECK: sqshrun {{b[0-9]+}}, {{h[0-9]+}}, #8
|
||||
entry:
|
||||
%vsqshrun = insertelement <1 x i16> undef, i16 %a, i32 0
|
||||
%vsqshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16> %vsqshrun, i32 15)
|
||||
%vsqshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16> %vsqshrun, i32 8)
|
||||
%0 = extractelement <1 x i8> %vsqshrun1, i32 0
|
||||
ret i8 %0
|
||||
}
|
||||
@ -472,10 +472,10 @@ declare <1 x i8> @llvm.aarch64.neon.vsqshrun.v1i8(<1 x i16>, i32)
|
||||
|
||||
define i16 @test_vqshruns_n_s32(i32 %a) {
|
||||
; CHECK: test_vqshruns_n_s32
|
||||
; CHECK: sqshrun {{h[0-9]+}}, {{s[0-9]+}}, #31
|
||||
; CHECK: sqshrun {{h[0-9]+}}, {{s[0-9]+}}, #16
|
||||
entry:
|
||||
%vsqshrun = insertelement <1 x i32> undef, i32 %a, i32 0
|
||||
%vsqshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32> %vsqshrun, i32 31)
|
||||
%vsqshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32> %vsqshrun, i32 16)
|
||||
%0 = extractelement <1 x i16> %vsqshrun1, i32 0
|
||||
ret i16 %0
|
||||
}
|
||||
@ -484,10 +484,10 @@ declare <1 x i16> @llvm.aarch64.neon.vsqshrun.v1i16(<1 x i32>, i32)
|
||||
|
||||
define i32 @test_vqshrund_n_s64(i64 %a) {
|
||||
; CHECK: test_vqshrund_n_s64
|
||||
; CHECK: sqshrun {{s[0-9]+}}, {{d[0-9]+}}, #63
|
||||
; CHECK: sqshrun {{s[0-9]+}}, {{d[0-9]+}}, #32
|
||||
entry:
|
||||
%vsqshrun = insertelement <1 x i64> undef, i64 %a, i32 0
|
||||
%vsqshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64> %vsqshrun, i32 63)
|
||||
%vsqshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64> %vsqshrun, i32 32)
|
||||
%0 = extractelement <1 x i32> %vsqshrun1, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
@ -496,10 +496,10 @@ declare <1 x i32> @llvm.aarch64.neon.vsqshrun.v1i32(<1 x i64>, i32)
|
||||
|
||||
define i8 @test_vqrshrunh_n_s16(i16 %a) {
|
||||
; CHECK: test_vqrshrunh_n_s16
|
||||
; CHECK: sqrshrun {{b[0-9]+}}, {{h[0-9]+}}, #15
|
||||
; CHECK: sqrshrun {{b[0-9]+}}, {{h[0-9]+}}, #8
|
||||
entry:
|
||||
%vsqrshrun = insertelement <1 x i16> undef, i16 %a, i32 0
|
||||
%vsqrshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16> %vsqrshrun, i32 15)
|
||||
%vsqrshrun1 = call <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16> %vsqrshrun, i32 8)
|
||||
%0 = extractelement <1 x i8> %vsqrshrun1, i32 0
|
||||
ret i8 %0
|
||||
}
|
||||
@ -508,10 +508,10 @@ declare <1 x i8> @llvm.aarch64.neon.vsqrshrun.v1i8(<1 x i16>, i32)
|
||||
|
||||
define i16 @test_vqrshruns_n_s32(i32 %a) {
|
||||
; CHECK: test_vqrshruns_n_s32
|
||||
; CHECK: sqrshrun {{h[0-9]+}}, {{s[0-9]+}}, #31
|
||||
; CHECK: sqrshrun {{h[0-9]+}}, {{s[0-9]+}}, #16
|
||||
entry:
|
||||
%vsqrshrun = insertelement <1 x i32> undef, i32 %a, i32 0
|
||||
%vsqrshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32> %vsqrshrun, i32 31)
|
||||
%vsqrshrun1 = call <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32> %vsqrshrun, i32 16)
|
||||
%0 = extractelement <1 x i16> %vsqrshrun1, i32 0
|
||||
ret i16 %0
|
||||
}
|
||||
@ -520,10 +520,10 @@ declare <1 x i16> @llvm.aarch64.neon.vsqrshrun.v1i16(<1 x i32>, i32)
|
||||
|
||||
define i32 @test_vqrshrund_n_s64(i64 %a) {
|
||||
; CHECK: test_vqrshrund_n_s64
|
||||
; CHECK: sqrshrun {{s[0-9]+}}, {{d[0-9]+}}, #63
|
||||
; CHECK: sqrshrun {{s[0-9]+}}, {{d[0-9]+}}, #32
|
||||
entry:
|
||||
%vsqrshrun = insertelement <1 x i64> undef, i64 %a, i32 0
|
||||
%vsqrshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrun.v1i32(<1 x i64> %vsqrshrun, i32 63)
|
||||
%vsqrshrun1 = call <1 x i32> @llvm.aarch64.neon.vsqrshrun.v1i32(<1 x i64> %vsqrshrun, i32 32)
|
||||
%0 = extractelement <1 x i32> %vsqrshrun1, i32 0
|
||||
ret i32 %0
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ define void @check_simple() minsize {
|
||||
; CHECK-NOT: sub sp, sp,
|
||||
; ...
|
||||
; CHECK-NOT: add sp, sp,
|
||||
; CHECK: pop.w {r7, r8, r9, r10, r11, pc}
|
||||
; CHECK: pop.w {r0, r1, r2, r3, r11, pc}
|
||||
|
||||
; CHECK-T1-LABEL: check_simple:
|
||||
; CHECK-T1: push {r3, r4, r5, r6, r7, lr}
|
||||
@ -23,7 +23,7 @@ define void @check_simple() minsize {
|
||||
; CHECK-T1-NOT: sub sp, sp,
|
||||
; ...
|
||||
; CHECK-T1-NOT: add sp, sp,
|
||||
; CHECK-T1: pop {r3, r4, r5, r6, r7, pc}
|
||||
; CHECK-T1: pop {r0, r1, r2, r3, r7, pc}
|
||||
|
||||
; iOS always has a frame pointer and messing with the push affects
|
||||
; how it's set in the prologue. Make sure we get that right.
|
||||
|
85
external/bsd/llvm/dist/llvm/test/CodeGen/Mips/msa/frameindex.ll
vendored
Normal file
85
external/bsd/llvm/dist/llvm/test/CodeGen/Mips/msa/frameindex.ll
vendored
Normal file
@ -0,0 +1,85 @@
|
||||
; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32-AE -check-prefix=MIPS32-BE %s
|
||||
; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32-AE -check-prefix=MIPS32-LE %s
|
||||
|
||||
define void @loadstore_v16i8_near() nounwind {
|
||||
; MIPS32-AE: loadstore_v16i8_near:
|
||||
|
||||
%1 = alloca <16 x i8>
|
||||
%2 = load volatile <16 x i8>* %1
|
||||
; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0($sp)
|
||||
store volatile <16 x i8> %2, <16 x i8>* %1
|
||||
; MIPS32-AE: st.b [[R1]], 0($sp)
|
||||
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v16i8_near
|
||||
}
|
||||
|
||||
define void @loadstore_v16i8_just_under_simm10() nounwind {
|
||||
; MIPS32-AE: loadstore_v16i8_just_under_simm10:
|
||||
|
||||
%1 = alloca <16 x i8>
|
||||
%2 = alloca [496 x i8] ; Push the frame right up to 512 bytes
|
||||
|
||||
%3 = load volatile <16 x i8>* %1
|
||||
; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 496($sp)
|
||||
store volatile <16 x i8> %3, <16 x i8>* %1
|
||||
; MIPS32-AE: st.b [[R1]], 496($sp)
|
||||
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v16i8_just_under_simm10
|
||||
}
|
||||
|
||||
define void @loadstore_v16i8_just_over_simm10() nounwind {
|
||||
; MIPS32-AE: loadstore_v16i8_just_over_simm10:
|
||||
|
||||
%1 = alloca <16 x i8>
|
||||
%2 = alloca [497 x i8] ; Push the frame just over 512 bytes
|
||||
|
||||
%3 = load volatile <16 x i8>* %1
|
||||
; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 512
|
||||
; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
|
||||
store volatile <16 x i8> %3, <16 x i8>* %1
|
||||
; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 512
|
||||
; MIPS32-AE: st.b [[R1]], 0([[BASE]])
|
||||
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v16i8_just_over_simm10
|
||||
}
|
||||
|
||||
define void @loadstore_v16i8_just_under_simm16() nounwind {
|
||||
; MIPS32-AE: loadstore_v16i8_just_under_simm16:
|
||||
|
||||
%1 = alloca <16 x i8>
|
||||
%2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
|
||||
|
||||
%3 = load volatile <16 x i8>* %1
|
||||
; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
|
||||
; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
|
||||
; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
|
||||
store volatile <16 x i8> %3, <16 x i8>* %1
|
||||
; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
|
||||
; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
|
||||
; MIPS32-AE: st.b [[R1]], 0([[BASE]])
|
||||
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v16i8_just_under_simm16
|
||||
}
|
||||
|
||||
define void @loadstore_v16i8_just_over_simm16() nounwind {
|
||||
; MIPS32-AE: loadstore_v16i8_just_over_simm16:
|
||||
|
||||
%1 = alloca <16 x i8>
|
||||
%2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
|
||||
|
||||
%3 = load volatile <16 x i8>* %1
|
||||
; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
|
||||
; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
|
||||
; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
|
||||
store volatile <16 x i8> %3, <16 x i8>* %1
|
||||
; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768
|
||||
; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]]
|
||||
; MIPS32-AE: st.b [[R1]], 0([[BASE]])
|
||||
|
||||
ret void
|
||||
; MIPS32-AE: .size loadstore_v16i8_just_over_simm16
|
||||
}
|
@ -5,10 +5,10 @@
|
||||
; (fabs (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF))
|
||||
; unless isFabsFree returns true
|
||||
|
||||
; R600-CHECK: @fabs_free
|
||||
; R600-CHECK-LABEL: @fabs_free
|
||||
; R600-CHECK-NOT: AND
|
||||
; R600-CHECK: |PV.{{[XYZW]}}|
|
||||
; SI-CHECK: @fabs_free
|
||||
; SI-CHECK-LABEL: @fabs_free
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
|
||||
|
||||
define void @fabs_free(float addrspace(1)* %out, i32 %in) {
|
||||
@ -19,4 +19,36 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; R600-CHECK-LABEL: @fabs_v2
|
||||
; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
; SI-CHECK-LABEL: @fabs_v2
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
|
||||
define void @fabs_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
|
||||
entry:
|
||||
%0 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
|
||||
store <2 x float> %0, <2 x float> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; R600-CHECK-LABEL: @fabs_v4
|
||||
; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
; SI-CHECK-LABEL: @fabs_v4
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
|
||||
define void @fabs_v4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
|
||||
entry:
|
||||
%0 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
|
||||
store <4 x float> %0, <4 x float> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @fabs(float ) readnone
|
||||
declare <2 x float> @llvm.fabs.v2f32(<2 x float> ) readnone
|
||||
declare <4 x float> @llvm.fabs.v4f32(<4 x float> ) readnone
|
||||
|
41
external/bsd/llvm/dist/llvm/test/CodeGen/R600/llvm.round.ll
vendored
Normal file
41
external/bsd/llvm/dist/llvm/test/CodeGen/R600/llvm.round.ll
vendored
Normal file
@ -0,0 +1,41 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 --check-prefix=FUNC
|
||||
|
||||
; FUNC-LABEL: @f32
|
||||
; R600: FRACT
|
||||
; R600-DAG: ADD
|
||||
; R600-DAG: CEIL
|
||||
; R600-DAG: FLOOR
|
||||
; R600: CNDGE
|
||||
define void @f32(float addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = call float @llvm.round.f32(float %in)
|
||||
store float %0, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; The vector tests are really difficult to verify, since it can be hard to
|
||||
; predict how the scheduler will order the instructions. We already have
|
||||
; a test for the scalar case, so the vector tests just check that the
|
||||
; compiler doesn't crash.
|
||||
|
||||
; FUNC-LABEL: v2f32
|
||||
; R600: CF_END
|
||||
define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
|
||||
entry:
|
||||
%0 = call <2 x float> @llvm.round.v2f32(<2 x float> %in)
|
||||
store <2 x float> %0, <2 x float> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: v4f32
|
||||
; R600: CF_END
|
||||
define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
|
||||
entry:
|
||||
%0 = call <4 x float> @llvm.round.v4f32(<4 x float> %in)
|
||||
store <4 x float> %0, <4 x float> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.round.f32(float)
|
||||
declare <2 x float> @llvm.round.v2f32(<2 x float>)
|
||||
declare <4 x float> @llvm.round.v4f32(<4 x float>)
|
692
external/bsd/llvm/dist/llvm/test/CodeGen/R600/si-sgpr-spill.ll
vendored
Normal file
692
external/bsd/llvm/dist/llvm/test/CodeGen/R600/si-sgpr-spill.ll
vendored
Normal file
@ -0,0 +1,692 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s
|
||||
|
||||
; XXX: Enable when spilling is supported
|
||||
; XFAIL: *
|
||||
|
||||
; These tests check that the compiler won't crash when it needs to spill
|
||||
; SGPRs.
|
||||
|
||||
; CHECK-LABEL: @main
|
||||
; Writing to M0 from an SMRD instruction will hang the GPU.
|
||||
; CHECK-NOT: S_BUFFER_LOAD_DWORD m0
|
||||
; CHECK: S_ENDPGM
|
||||
@ddxy_lds = external addrspace(3) global [64 x i32]
|
||||
|
||||
define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
|
||||
main_body:
|
||||
%21 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
|
||||
%22 = load <16 x i8> addrspace(2)* %21, !tbaa !0
|
||||
%23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 96)
|
||||
%24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 100)
|
||||
%25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 104)
|
||||
%26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 112)
|
||||
%27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 116)
|
||||
%28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 120)
|
||||
%29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128)
|
||||
%30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132)
|
||||
%31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 140)
|
||||
%32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144)
|
||||
%33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160)
|
||||
%34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 176)
|
||||
%35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 180)
|
||||
%36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 184)
|
||||
%37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 192)
|
||||
%38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 196)
|
||||
%39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 200)
|
||||
%40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 208)
|
||||
%41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 212)
|
||||
%42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 216)
|
||||
%43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 224)
|
||||
%44 = call float @llvm.SI.load.const(<16 x i8> %22, i32 240)
|
||||
%45 = call float @llvm.SI.load.const(<16 x i8> %22, i32 244)
|
||||
%46 = call float @llvm.SI.load.const(<16 x i8> %22, i32 248)
|
||||
%47 = call float @llvm.SI.load.const(<16 x i8> %22, i32 256)
|
||||
%48 = call float @llvm.SI.load.const(<16 x i8> %22, i32 272)
|
||||
%49 = call float @llvm.SI.load.const(<16 x i8> %22, i32 276)
|
||||
%50 = call float @llvm.SI.load.const(<16 x i8> %22, i32 280)
|
||||
%51 = call float @llvm.SI.load.const(<16 x i8> %22, i32 288)
|
||||
%52 = call float @llvm.SI.load.const(<16 x i8> %22, i32 292)
|
||||
%53 = call float @llvm.SI.load.const(<16 x i8> %22, i32 296)
|
||||
%54 = call float @llvm.SI.load.const(<16 x i8> %22, i32 304)
|
||||
%55 = call float @llvm.SI.load.const(<16 x i8> %22, i32 308)
|
||||
%56 = call float @llvm.SI.load.const(<16 x i8> %22, i32 312)
|
||||
%57 = call float @llvm.SI.load.const(<16 x i8> %22, i32 368)
|
||||
%58 = call float @llvm.SI.load.const(<16 x i8> %22, i32 372)
|
||||
%59 = call float @llvm.SI.load.const(<16 x i8> %22, i32 376)
|
||||
%60 = call float @llvm.SI.load.const(<16 x i8> %22, i32 384)
|
||||
%61 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
|
||||
%62 = load <32 x i8> addrspace(2)* %61, !tbaa !0
|
||||
%63 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
|
||||
%64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
|
||||
%65 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1
|
||||
%66 = load <32 x i8> addrspace(2)* %65, !tbaa !0
|
||||
%67 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1
|
||||
%68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
|
||||
%69 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2
|
||||
%70 = load <32 x i8> addrspace(2)* %69, !tbaa !0
|
||||
%71 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2
|
||||
%72 = load <16 x i8> addrspace(2)* %71, !tbaa !0
|
||||
%73 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 3
|
||||
%74 = load <32 x i8> addrspace(2)* %73, !tbaa !0
|
||||
%75 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 3
|
||||
%76 = load <16 x i8> addrspace(2)* %75, !tbaa !0
|
||||
%77 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 4
|
||||
%78 = load <32 x i8> addrspace(2)* %77, !tbaa !0
|
||||
%79 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 4
|
||||
%80 = load <16 x i8> addrspace(2)* %79, !tbaa !0
|
||||
%81 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 5
|
||||
%82 = load <32 x i8> addrspace(2)* %81, !tbaa !0
|
||||
%83 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 5
|
||||
%84 = load <16 x i8> addrspace(2)* %83, !tbaa !0
|
||||
%85 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 6
|
||||
%86 = load <32 x i8> addrspace(2)* %85, !tbaa !0
|
||||
%87 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 6
|
||||
%88 = load <16 x i8> addrspace(2)* %87, !tbaa !0
|
||||
%89 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 7
|
||||
%90 = load <32 x i8> addrspace(2)* %89, !tbaa !0
|
||||
%91 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 7
|
||||
%92 = load <16 x i8> addrspace(2)* %91, !tbaa !0
|
||||
%93 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %4, <2 x i32> %6)
|
||||
%94 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %4, <2 x i32> %6)
|
||||
%95 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %4, <2 x i32> %6)
|
||||
%96 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %4, <2 x i32> %6)
|
||||
%97 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %4, <2 x i32> %6)
|
||||
%98 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %4, <2 x i32> %6)
|
||||
%99 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %4, <2 x i32> %6)
|
||||
%100 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %4, <2 x i32> %6)
|
||||
%101 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %4, <2 x i32> %6)
|
||||
%102 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %4, <2 x i32> %6)
|
||||
%103 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %4, <2 x i32> %6)
|
||||
%104 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %4, <2 x i32> %6)
|
||||
%105 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %4, <2 x i32> %6)
|
||||
%106 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %4, <2 x i32> %6)
|
||||
%107 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %4, <2 x i32> %6)
|
||||
%108 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %4, <2 x i32> %6)
|
||||
%109 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %4, <2 x i32> %6)
|
||||
%110 = call i32 @llvm.SI.tid()
|
||||
%111 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %110
|
||||
%112 = bitcast float %93 to i32
|
||||
store i32 %112, i32 addrspace(3)* %111
|
||||
%113 = bitcast float %94 to i32
|
||||
store i32 %113, i32 addrspace(3)* %111
|
||||
%114 = call i32 @llvm.SI.tid()
|
||||
%115 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %114
|
||||
%116 = and i32 %114, -4
|
||||
%117 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %116
|
||||
%118 = add i32 %116, 1
|
||||
%119 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %118
|
||||
%120 = bitcast float %93 to i32
|
||||
store i32 %120, i32 addrspace(3)* %115
|
||||
%121 = load i32 addrspace(3)* %117
|
||||
%122 = bitcast i32 %121 to float
|
||||
%123 = load i32 addrspace(3)* %119
|
||||
%124 = bitcast i32 %123 to float
|
||||
%125 = fsub float %124, %122
|
||||
%126 = bitcast float %94 to i32
|
||||
store i32 %126, i32 addrspace(3)* %115
|
||||
%127 = load i32 addrspace(3)* %117
|
||||
%128 = bitcast i32 %127 to float
|
||||
%129 = load i32 addrspace(3)* %119
|
||||
%130 = bitcast i32 %129 to float
|
||||
%131 = fsub float %130, %128
|
||||
%132 = insertelement <4 x float> undef, float %125, i32 0
|
||||
%133 = insertelement <4 x float> %132, float %131, i32 1
|
||||
%134 = insertelement <4 x float> %133, float %131, i32 2
|
||||
%135 = insertelement <4 x float> %134, float %131, i32 3
|
||||
%136 = extractelement <4 x float> %135, i32 0
|
||||
%137 = extractelement <4 x float> %135, i32 1
|
||||
%138 = fmul float %60, %93
|
||||
%139 = fmul float %60, %94
|
||||
%140 = fmul float %60, %94
|
||||
%141 = fmul float %60, %94
|
||||
%142 = call i32 @llvm.SI.tid()
|
||||
%143 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %142
|
||||
%144 = bitcast float %138 to i32
|
||||
store i32 %144, i32 addrspace(3)* %143
|
||||
%145 = bitcast float %139 to i32
|
||||
store i32 %145, i32 addrspace(3)* %143
|
||||
%146 = bitcast float %140 to i32
|
||||
store i32 %146, i32 addrspace(3)* %143
|
||||
%147 = bitcast float %141 to i32
|
||||
store i32 %147, i32 addrspace(3)* %143
|
||||
%148 = call i32 @llvm.SI.tid()
|
||||
%149 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %148
|
||||
%150 = and i32 %148, -4
|
||||
%151 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %150
|
||||
%152 = add i32 %150, 2
|
||||
%153 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %152
|
||||
%154 = bitcast float %138 to i32
|
||||
store i32 %154, i32 addrspace(3)* %149
|
||||
%155 = load i32 addrspace(3)* %151
|
||||
%156 = bitcast i32 %155 to float
|
||||
%157 = load i32 addrspace(3)* %153
|
||||
%158 = bitcast i32 %157 to float
|
||||
%159 = fsub float %158, %156
|
||||
%160 = bitcast float %139 to i32
|
||||
store i32 %160, i32 addrspace(3)* %149
|
||||
%161 = load i32 addrspace(3)* %151
|
||||
%162 = bitcast i32 %161 to float
|
||||
%163 = load i32 addrspace(3)* %153
|
||||
%164 = bitcast i32 %163 to float
|
||||
%165 = fsub float %164, %162
|
||||
%166 = bitcast float %140 to i32
|
||||
store i32 %166, i32 addrspace(3)* %149
|
||||
%167 = load i32 addrspace(3)* %151
|
||||
%168 = bitcast i32 %167 to float
|
||||
%169 = load i32 addrspace(3)* %153
|
||||
%170 = bitcast i32 %169 to float
|
||||
%171 = fsub float %170, %168
|
||||
%172 = bitcast float %141 to i32
|
||||
store i32 %172, i32 addrspace(3)* %149
|
||||
%173 = load i32 addrspace(3)* %151
|
||||
%174 = bitcast i32 %173 to float
|
||||
%175 = load i32 addrspace(3)* %153
|
||||
%176 = bitcast i32 %175 to float
|
||||
%177 = fsub float %176, %174
|
||||
%178 = insertelement <4 x float> undef, float %159, i32 0
|
||||
%179 = insertelement <4 x float> %178, float %165, i32 1
|
||||
%180 = insertelement <4 x float> %179, float %171, i32 2
|
||||
%181 = insertelement <4 x float> %180, float %177, i32 3
|
||||
%182 = extractelement <4 x float> %181, i32 0
|
||||
%183 = extractelement <4 x float> %181, i32 1
|
||||
%184 = fdiv float 1.000000e+00, %97
|
||||
%185 = fmul float %33, %184
|
||||
%186 = fcmp uge float 1.000000e+00, %185
|
||||
%187 = select i1 %186, float %185, float 1.000000e+00
|
||||
%188 = fmul float %187, %30
|
||||
%189 = call float @ceil(float %188)
|
||||
%190 = fcmp uge float 3.000000e+00, %189
|
||||
%191 = select i1 %190, float 3.000000e+00, float %189
|
||||
%192 = fdiv float 1.000000e+00, %191
|
||||
%193 = fdiv float 1.000000e+00, %30
|
||||
%194 = fmul float %191, %193
|
||||
%195 = fmul float %31, %194
|
||||
%196 = fmul float %95, %95
|
||||
%197 = fmul float %96, %96
|
||||
%198 = fadd float %197, %196
|
||||
%199 = fmul float %97, %97
|
||||
%200 = fadd float %198, %199
|
||||
%201 = call float @llvm.AMDGPU.rsq(float %200)
|
||||
%202 = fmul float %95, %201
|
||||
%203 = fmul float %96, %201
|
||||
%204 = fmul float %202, %29
|
||||
%205 = fmul float %203, %29
|
||||
%206 = fmul float %204, -1.000000e+00
|
||||
%207 = fmul float %205, 1.000000e+00
|
||||
%208 = fmul float %206, %32
|
||||
%209 = fmul float %207, %32
|
||||
%210 = fsub float -0.000000e+00, %208
|
||||
%211 = fadd float %93, %210
|
||||
%212 = fsub float -0.000000e+00, %209
|
||||
%213 = fadd float %94, %212
|
||||
%214 = fmul float %206, %192
|
||||
%215 = fmul float %207, %192
|
||||
%216 = fmul float -1.000000e+00, %192
|
||||
%217 = bitcast float %136 to i32
|
||||
%218 = bitcast float %182 to i32
|
||||
%219 = bitcast float %137 to i32
|
||||
%220 = bitcast float %183 to i32
|
||||
%221 = insertelement <8 x i32> undef, i32 %217, i32 0
|
||||
%222 = insertelement <8 x i32> %221, i32 %218, i32 1
|
||||
%223 = insertelement <8 x i32> %222, i32 %219, i32 2
|
||||
%224 = insertelement <8 x i32> %223, i32 %220, i32 3
|
||||
br label %LOOP
|
||||
|
||||
LOOP: ; preds = %ENDIF, %main_body
|
||||
%temp24.0 = phi float [ 1.000000e+00, %main_body ], [ %258, %ENDIF ]
|
||||
%temp28.0 = phi float [ %211, %main_body ], [ %253, %ENDIF ]
|
||||
%temp29.0 = phi float [ %213, %main_body ], [ %255, %ENDIF ]
|
||||
%temp30.0 = phi float [ 1.000000e+00, %main_body ], [ %257, %ENDIF ]
|
||||
%225 = fcmp oge float %temp24.0, %191
|
||||
%226 = sext i1 %225 to i32
|
||||
%227 = bitcast i32 %226 to float
|
||||
%228 = bitcast float %227 to i32
|
||||
%229 = icmp ne i32 %228, 0
|
||||
br i1 %229, label %IF, label %ENDIF
|
||||
|
||||
IF: ; preds = %LOOP
|
||||
%230 = bitcast float %136 to i32
|
||||
%231 = bitcast float %182 to i32
|
||||
%232 = bitcast float %137 to i32
|
||||
%233 = bitcast float %183 to i32
|
||||
%234 = insertelement <8 x i32> undef, i32 %230, i32 0
|
||||
%235 = insertelement <8 x i32> %234, i32 %231, i32 1
|
||||
%236 = insertelement <8 x i32> %235, i32 %232, i32 2
|
||||
%237 = insertelement <8 x i32> %236, i32 %233, i32 3
|
||||
br label %LOOP65
|
||||
|
||||
ENDIF: ; preds = %LOOP
|
||||
%238 = bitcast float %temp28.0 to i32
|
||||
%239 = bitcast float %temp29.0 to i32
|
||||
%240 = insertelement <8 x i32> %224, i32 %238, i32 4
|
||||
%241 = insertelement <8 x i32> %240, i32 %239, i32 5
|
||||
%242 = insertelement <8 x i32> %241, i32 undef, i32 6
|
||||
%243 = insertelement <8 x i32> %242, i32 undef, i32 7
|
||||
%244 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %243, <32 x i8> %62, <16 x i8> %64, i32 2)
|
||||
%245 = extractelement <4 x float> %244, i32 3
|
||||
%246 = fcmp oge float %temp30.0, %245
|
||||
%247 = sext i1 %246 to i32
|
||||
%248 = bitcast i32 %247 to float
|
||||
%249 = bitcast float %248 to i32
|
||||
%250 = and i32 %249, 1065353216
|
||||
%251 = bitcast i32 %250 to float
|
||||
%252 = fmul float %214, %251
|
||||
%253 = fadd float %252, %temp28.0
|
||||
%254 = fmul float %215, %251
|
||||
%255 = fadd float %254, %temp29.0
|
||||
%256 = fmul float %216, %251
|
||||
%257 = fadd float %256, %temp30.0
|
||||
%258 = fadd float %temp24.0, 1.000000e+00
|
||||
br label %LOOP
|
||||
|
||||
LOOP65: ; preds = %ENDIF66, %IF
|
||||
%temp24.1 = phi float [ 0.000000e+00, %IF ], [ %610, %ENDIF66 ]
|
||||
%temp28.1 = phi float [ %temp28.0, %IF ], [ %605, %ENDIF66 ]
|
||||
%temp29.1 = phi float [ %temp29.0, %IF ], [ %607, %ENDIF66 ]
|
||||
%temp30.1 = phi float [ %temp30.0, %IF ], [ %609, %ENDIF66 ]
|
||||
%temp32.0 = phi float [ 1.000000e+00, %IF ], [ %611, %ENDIF66 ]
|
||||
%259 = fcmp oge float %temp24.1, %195
|
||||
%260 = sext i1 %259 to i32
|
||||
%261 = bitcast i32 %260 to float
|
||||
%262 = bitcast float %261 to i32
|
||||
%263 = icmp ne i32 %262, 0
|
||||
br i1 %263, label %IF67, label %ENDIF66
|
||||
|
||||
IF67: ; preds = %LOOP65
|
||||
%264 = bitcast float %136 to i32
|
||||
%265 = bitcast float %182 to i32
|
||||
%266 = bitcast float %137 to i32
|
||||
%267 = bitcast float %183 to i32
|
||||
%268 = bitcast float %temp28.1 to i32
|
||||
%269 = bitcast float %temp29.1 to i32
|
||||
%270 = insertelement <8 x i32> undef, i32 %264, i32 0
|
||||
%271 = insertelement <8 x i32> %270, i32 %265, i32 1
|
||||
%272 = insertelement <8 x i32> %271, i32 %266, i32 2
|
||||
%273 = insertelement <8 x i32> %272, i32 %267, i32 3
|
||||
%274 = insertelement <8 x i32> %273, i32 %268, i32 4
|
||||
%275 = insertelement <8 x i32> %274, i32 %269, i32 5
|
||||
%276 = insertelement <8 x i32> %275, i32 undef, i32 6
|
||||
%277 = insertelement <8 x i32> %276, i32 undef, i32 7
|
||||
%278 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %277, <32 x i8> %66, <16 x i8> %68, i32 2)
|
||||
%279 = extractelement <4 x float> %278, i32 0
|
||||
%280 = extractelement <4 x float> %278, i32 1
|
||||
%281 = extractelement <4 x float> %278, i32 2
|
||||
%282 = extractelement <4 x float> %278, i32 3
|
||||
%283 = fmul float %282, %47
|
||||
%284 = bitcast float %136 to i32
|
||||
%285 = bitcast float %182 to i32
|
||||
%286 = bitcast float %137 to i32
|
||||
%287 = bitcast float %183 to i32
|
||||
%288 = bitcast float %temp28.1 to i32
|
||||
%289 = bitcast float %temp29.1 to i32
|
||||
%290 = insertelement <8 x i32> undef, i32 %284, i32 0
|
||||
%291 = insertelement <8 x i32> %290, i32 %285, i32 1
|
||||
%292 = insertelement <8 x i32> %291, i32 %286, i32 2
|
||||
%293 = insertelement <8 x i32> %292, i32 %287, i32 3
|
||||
%294 = insertelement <8 x i32> %293, i32 %288, i32 4
|
||||
%295 = insertelement <8 x i32> %294, i32 %289, i32 5
|
||||
%296 = insertelement <8 x i32> %295, i32 undef, i32 6
|
||||
%297 = insertelement <8 x i32> %296, i32 undef, i32 7
|
||||
%298 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %297, <32 x i8> %82, <16 x i8> %84, i32 2)
|
||||
%299 = extractelement <4 x float> %298, i32 0
|
||||
%300 = extractelement <4 x float> %298, i32 1
|
||||
%301 = extractelement <4 x float> %298, i32 2
|
||||
%302 = bitcast float %136 to i32
|
||||
%303 = bitcast float %182 to i32
|
||||
%304 = bitcast float %137 to i32
|
||||
%305 = bitcast float %183 to i32
|
||||
%306 = bitcast float %temp28.1 to i32
|
||||
%307 = bitcast float %temp29.1 to i32
|
||||
%308 = insertelement <8 x i32> undef, i32 %302, i32 0
|
||||
%309 = insertelement <8 x i32> %308, i32 %303, i32 1
|
||||
%310 = insertelement <8 x i32> %309, i32 %304, i32 2
|
||||
%311 = insertelement <8 x i32> %310, i32 %305, i32 3
|
||||
%312 = insertelement <8 x i32> %311, i32 %306, i32 4
|
||||
%313 = insertelement <8 x i32> %312, i32 %307, i32 5
|
||||
%314 = insertelement <8 x i32> %313, i32 undef, i32 6
|
||||
%315 = insertelement <8 x i32> %314, i32 undef, i32 7
|
||||
%316 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %315, <32 x i8> %78, <16 x i8> %80, i32 2)
|
||||
%317 = extractelement <4 x float> %316, i32 0
|
||||
%318 = extractelement <4 x float> %316, i32 1
|
||||
%319 = extractelement <4 x float> %316, i32 2
|
||||
%320 = fmul float %317, %23
|
||||
%321 = fmul float %318, %24
|
||||
%322 = fmul float %319, %25
|
||||
%323 = fmul float %299, %26
|
||||
%324 = fadd float %323, %320
|
||||
%325 = fmul float %300, %27
|
||||
%326 = fadd float %325, %321
|
||||
%327 = fmul float %301, %28
|
||||
%328 = fadd float %327, %322
|
||||
%329 = fadd float %279, %324
|
||||
%330 = fadd float %280, %326
|
||||
%331 = fadd float %281, %328
|
||||
%332 = bitcast float %136 to i32
|
||||
%333 = bitcast float %182 to i32
|
||||
%334 = bitcast float %137 to i32
|
||||
%335 = bitcast float %183 to i32
|
||||
%336 = bitcast float %temp28.1 to i32
|
||||
%337 = bitcast float %temp29.1 to i32
|
||||
%338 = insertelement <8 x i32> undef, i32 %332, i32 0
|
||||
%339 = insertelement <8 x i32> %338, i32 %333, i32 1
|
||||
%340 = insertelement <8 x i32> %339, i32 %334, i32 2
|
||||
%341 = insertelement <8 x i32> %340, i32 %335, i32 3
|
||||
%342 = insertelement <8 x i32> %341, i32 %336, i32 4
|
||||
%343 = insertelement <8 x i32> %342, i32 %337, i32 5
|
||||
%344 = insertelement <8 x i32> %343, i32 undef, i32 6
|
||||
%345 = insertelement <8 x i32> %344, i32 undef, i32 7
|
||||
%346 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %345, <32 x i8> %62, <16 x i8> %64, i32 2)
|
||||
%347 = extractelement <4 x float> %346, i32 0
|
||||
%348 = extractelement <4 x float> %346, i32 1
|
||||
%349 = extractelement <4 x float> %346, i32 2
|
||||
%350 = fadd float %347, -5.000000e-01
|
||||
%351 = fadd float %348, -5.000000e-01
|
||||
%352 = fadd float %349, -5.000000e-01
|
||||
%353 = fmul float %350, %350
|
||||
%354 = fmul float %351, %351
|
||||
%355 = fadd float %354, %353
|
||||
%356 = fmul float %352, %352
|
||||
%357 = fadd float %355, %356
|
||||
%358 = call float @llvm.AMDGPU.rsq(float %357)
|
||||
%359 = fmul float %350, %358
|
||||
%360 = fmul float %351, %358
|
||||
%361 = fmul float %352, %358
|
||||
%362 = bitcast float %136 to i32
|
||||
%363 = bitcast float %182 to i32
|
||||
%364 = bitcast float %137 to i32
|
||||
%365 = bitcast float %183 to i32
|
||||
%366 = bitcast float %temp28.1 to i32
|
||||
%367 = bitcast float %temp29.1 to i32
|
||||
%368 = insertelement <8 x i32> undef, i32 %362, i32 0
|
||||
%369 = insertelement <8 x i32> %368, i32 %363, i32 1
|
||||
%370 = insertelement <8 x i32> %369, i32 %364, i32 2
|
||||
%371 = insertelement <8 x i32> %370, i32 %365, i32 3
|
||||
%372 = insertelement <8 x i32> %371, i32 %366, i32 4
|
||||
%373 = insertelement <8 x i32> %372, i32 %367, i32 5
|
||||
%374 = insertelement <8 x i32> %373, i32 undef, i32 6
|
||||
%375 = insertelement <8 x i32> %374, i32 undef, i32 7
|
||||
%376 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %375, <32 x i8> %70, <16 x i8> %72, i32 2)
|
||||
%377 = extractelement <4 x float> %376, i32 0
|
||||
%378 = extractelement <4 x float> %376, i32 1
|
||||
%379 = extractelement <4 x float> %376, i32 2
|
||||
%380 = extractelement <4 x float> %376, i32 3
|
||||
%381 = fsub float -0.000000e+00, %95
|
||||
%382 = fsub float -0.000000e+00, %96
|
||||
%383 = fsub float -0.000000e+00, %97
|
||||
%384 = fmul float %359, %381
|
||||
%385 = fmul float %360, %382
|
||||
%386 = fadd float %385, %384
|
||||
%387 = fmul float %361, %383
|
||||
%388 = fadd float %386, %387
|
||||
%389 = fmul float %388, %359
|
||||
%390 = fmul float %388, %360
|
||||
%391 = fmul float %388, %361
|
||||
%392 = fmul float 2.000000e+00, %389
|
||||
%393 = fmul float 2.000000e+00, %390
|
||||
%394 = fmul float 2.000000e+00, %391
|
||||
%395 = fsub float -0.000000e+00, %392
|
||||
%396 = fadd float %381, %395
|
||||
%397 = fsub float -0.000000e+00, %393
|
||||
%398 = fadd float %382, %397
|
||||
%399 = fsub float -0.000000e+00, %394
|
||||
%400 = fadd float %383, %399
|
||||
%401 = fmul float %396, %98
|
||||
%402 = fmul float %396, %99
|
||||
%403 = fmul float %396, %100
|
||||
%404 = fmul float %398, %101
|
||||
%405 = fadd float %404, %401
|
||||
%406 = fmul float %398, %102
|
||||
%407 = fadd float %406, %402
|
||||
%408 = fmul float %398, %103
|
||||
%409 = fadd float %408, %403
|
||||
%410 = fmul float %400, %104
|
||||
%411 = fadd float %410, %405
|
||||
%412 = fmul float %400, %105
|
||||
%413 = fadd float %412, %407
|
||||
%414 = fmul float %400, %106
|
||||
%415 = fadd float %414, %409
|
||||
%416 = bitcast float %136 to i32
|
||||
%417 = bitcast float %182 to i32
|
||||
%418 = bitcast float %137 to i32
|
||||
%419 = bitcast float %183 to i32
|
||||
%420 = bitcast float %temp28.1 to i32
|
||||
%421 = bitcast float %temp29.1 to i32
|
||||
%422 = insertelement <8 x i32> undef, i32 %416, i32 0
|
||||
%423 = insertelement <8 x i32> %422, i32 %417, i32 1
|
||||
%424 = insertelement <8 x i32> %423, i32 %418, i32 2
|
||||
%425 = insertelement <8 x i32> %424, i32 %419, i32 3
|
||||
%426 = insertelement <8 x i32> %425, i32 %420, i32 4
|
||||
%427 = insertelement <8 x i32> %426, i32 %421, i32 5
|
||||
%428 = insertelement <8 x i32> %427, i32 undef, i32 6
|
||||
%429 = insertelement <8 x i32> %428, i32 undef, i32 7
|
||||
%430 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %429, <32 x i8> %86, <16 x i8> %88, i32 2)
|
||||
%431 = extractelement <4 x float> %430, i32 0
|
||||
%432 = extractelement <4 x float> %430, i32 1
|
||||
%433 = extractelement <4 x float> %430, i32 2
|
||||
%434 = fmul float %48, %411
|
||||
%435 = fmul float %49, %411
|
||||
%436 = fmul float %50, %411
|
||||
%437 = fmul float %51, %413
|
||||
%438 = fadd float %437, %434
|
||||
%439 = fmul float %52, %413
|
||||
%440 = fadd float %439, %435
|
||||
%441 = fmul float %53, %413
|
||||
%442 = fadd float %441, %436
|
||||
%443 = fmul float %54, %415
|
||||
%444 = fadd float %443, %438
|
||||
%445 = fmul float %55, %415
|
||||
%446 = fadd float %445, %440
|
||||
%447 = fmul float %56, %415
|
||||
%448 = fadd float %447, %442
|
||||
%449 = insertelement <4 x float> undef, float %444, i32 0
|
||||
%450 = insertelement <4 x float> %449, float %446, i32 1
|
||||
%451 = insertelement <4 x float> %450, float %448, i32 2
|
||||
%452 = insertelement <4 x float> %451, float %195, i32 3
|
||||
%453 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %452)
|
||||
%454 = extractelement <4 x float> %453, i32 0
|
||||
%455 = extractelement <4 x float> %453, i32 1
|
||||
%456 = extractelement <4 x float> %453, i32 2
|
||||
%457 = extractelement <4 x float> %453, i32 3
|
||||
%458 = call float @fabs(float %456)
|
||||
%459 = fdiv float 1.000000e+00, %458
|
||||
%460 = fmul float %454, %459
|
||||
%461 = fadd float %460, 1.500000e+00
|
||||
%462 = fmul float %455, %459
|
||||
%463 = fadd float %462, 1.500000e+00
|
||||
%464 = bitcast float %463 to i32
|
||||
%465 = bitcast float %461 to i32
|
||||
%466 = bitcast float %457 to i32
|
||||
%467 = insertelement <4 x i32> undef, i32 %464, i32 0
|
||||
%468 = insertelement <4 x i32> %467, i32 %465, i32 1
|
||||
%469 = insertelement <4 x i32> %468, i32 %466, i32 2
|
||||
%470 = insertelement <4 x i32> %469, i32 undef, i32 3
|
||||
%471 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %470, <32 x i8> %90, <16 x i8> %92, i32 4)
|
||||
%472 = extractelement <4 x float> %471, i32 0
|
||||
%473 = extractelement <4 x float> %471, i32 1
|
||||
%474 = extractelement <4 x float> %471, i32 2
|
||||
%475 = fmul float %431, %472
|
||||
%476 = fadd float %475, %329
|
||||
%477 = fmul float %432, %473
|
||||
%478 = fadd float %477, %330
|
||||
%479 = fmul float %433, %474
|
||||
%480 = fadd float %479, %331
|
||||
%481 = fmul float %107, %107
|
||||
%482 = fmul float %108, %108
|
||||
%483 = fadd float %482, %481
|
||||
%484 = fmul float %109, %109
|
||||
%485 = fadd float %483, %484
|
||||
%486 = call float @llvm.AMDGPU.rsq(float %485)
|
||||
%487 = fmul float %107, %486
|
||||
%488 = fmul float %108, %486
|
||||
%489 = fmul float %109, %486
|
||||
%490 = fmul float %377, %40
|
||||
%491 = fmul float %378, %41
|
||||
%492 = fmul float %379, %42
|
||||
%493 = fmul float %359, %487
|
||||
%494 = fmul float %360, %488
|
||||
%495 = fadd float %494, %493
|
||||
%496 = fmul float %361, %489
|
||||
%497 = fadd float %495, %496
|
||||
%498 = fmul float %497, %359
|
||||
%499 = fmul float %497, %360
|
||||
%500 = fmul float %497, %361
|
||||
%501 = fmul float 2.000000e+00, %498
|
||||
%502 = fmul float 2.000000e+00, %499
|
||||
%503 = fmul float 2.000000e+00, %500
|
||||
%504 = fsub float -0.000000e+00, %501
|
||||
%505 = fadd float %487, %504
|
||||
%506 = fsub float -0.000000e+00, %502
|
||||
%507 = fadd float %488, %506
|
||||
%508 = fsub float -0.000000e+00, %503
|
||||
%509 = fadd float %489, %508
|
||||
%510 = fmul float %95, %95
|
||||
%511 = fmul float %96, %96
|
||||
%512 = fadd float %511, %510
|
||||
%513 = fmul float %97, %97
|
||||
%514 = fadd float %512, %513
|
||||
%515 = call float @llvm.AMDGPU.rsq(float %514)
|
||||
%516 = fmul float %95, %515
|
||||
%517 = fmul float %96, %515
|
||||
%518 = fmul float %97, %515
|
||||
%519 = fmul float %505, %516
|
||||
%520 = fmul float %507, %517
|
||||
%521 = fadd float %520, %519
|
||||
%522 = fmul float %509, %518
|
||||
%523 = fadd float %521, %522
|
||||
%524 = fsub float -0.000000e+00, %523
|
||||
%525 = fcmp uge float %524, 0.000000e+00
|
||||
%526 = select i1 %525, float %524, float 0.000000e+00
|
||||
%527 = fmul float %43, %380
|
||||
%528 = fadd float %527, 1.000000e+00
|
||||
%529 = call float @llvm.pow.f32(float %526, float %528)
|
||||
%530 = fmul float %476, %37
|
||||
%531 = fmul float %478, %38
|
||||
%532 = fmul float %480, %39
|
||||
%533 = fmul float %359, %487
|
||||
%534 = fmul float %360, %488
|
||||
%535 = fadd float %534, %533
|
||||
%536 = fmul float %361, %489
|
||||
%537 = fadd float %535, %536
|
||||
%538 = fcmp uge float %537, 0.000000e+00
|
||||
%539 = select i1 %538, float %537, float 0.000000e+00
|
||||
%540 = fmul float %530, %539
|
||||
%541 = fmul float %531, %539
|
||||
%542 = fmul float %532, %539
|
||||
%543 = fmul float %490, %529
|
||||
%544 = fadd float %543, %540
|
||||
%545 = fmul float %491, %529
|
||||
%546 = fadd float %545, %541
|
||||
%547 = fmul float %492, %529
|
||||
%548 = fadd float %547, %542
|
||||
%549 = fmul float %476, %34
|
||||
%550 = fmul float %478, %35
|
||||
%551 = fmul float %480, %36
|
||||
%552 = fmul float %544, %57
|
||||
%553 = fadd float %552, %549
|
||||
%554 = fmul float %546, %58
|
||||
%555 = fadd float %554, %550
|
||||
%556 = fmul float %548, %59
|
||||
%557 = fadd float %556, %551
|
||||
%558 = bitcast float %136 to i32
|
||||
%559 = bitcast float %182 to i32
|
||||
%560 = bitcast float %137 to i32
|
||||
%561 = bitcast float %183 to i32
|
||||
%562 = bitcast float %temp28.1 to i32
|
||||
%563 = bitcast float %temp29.1 to i32
|
||||
%564 = insertelement <8 x i32> undef, i32 %558, i32 0
|
||||
%565 = insertelement <8 x i32> %564, i32 %559, i32 1
|
||||
%566 = insertelement <8 x i32> %565, i32 %560, i32 2
|
||||
%567 = insertelement <8 x i32> %566, i32 %561, i32 3
|
||||
%568 = insertelement <8 x i32> %567, i32 %562, i32 4
|
||||
%569 = insertelement <8 x i32> %568, i32 %563, i32 5
|
||||
%570 = insertelement <8 x i32> %569, i32 undef, i32 6
|
||||
%571 = insertelement <8 x i32> %570, i32 undef, i32 7
|
||||
%572 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %571, <32 x i8> %74, <16 x i8> %76, i32 2)
|
||||
%573 = extractelement <4 x float> %572, i32 0
|
||||
%574 = extractelement <4 x float> %572, i32 1
|
||||
%575 = extractelement <4 x float> %572, i32 2
|
||||
%576 = fmul float %573, %44
|
||||
%577 = fadd float %576, %553
|
||||
%578 = fmul float %574, %45
|
||||
%579 = fadd float %578, %555
|
||||
%580 = fmul float %575, %46
|
||||
%581 = fadd float %580, %557
|
||||
%582 = call i32 @llvm.SI.packf16(float %577, float %579)
|
||||
%583 = bitcast i32 %582 to float
|
||||
%584 = call i32 @llvm.SI.packf16(float %581, float %283)
|
||||
%585 = bitcast i32 %584 to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %583, float %585, float %583, float %585)
|
||||
ret void
|
||||
|
||||
ENDIF66: ; preds = %LOOP65
|
||||
%586 = bitcast float %temp28.1 to i32
|
||||
%587 = bitcast float %temp29.1 to i32
|
||||
%588 = insertelement <8 x i32> %237, i32 %586, i32 4
|
||||
%589 = insertelement <8 x i32> %588, i32 %587, i32 5
|
||||
%590 = insertelement <8 x i32> %589, i32 undef, i32 6
|
||||
%591 = insertelement <8 x i32> %590, i32 undef, i32 7
|
||||
%592 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %591, <32 x i8> %62, <16 x i8> %64, i32 2)
|
||||
%593 = extractelement <4 x float> %592, i32 3
|
||||
%594 = fcmp oge float %temp30.1, %593
|
||||
%595 = sext i1 %594 to i32
|
||||
%596 = bitcast i32 %595 to float
|
||||
%597 = bitcast float %596 to i32
|
||||
%598 = and i32 %597, 1065353216
|
||||
%599 = bitcast i32 %598 to float
|
||||
%600 = fmul float 5.000000e-01, %temp32.0
|
||||
%601 = fsub float -0.000000e+00, %600
|
||||
%602 = fmul float %599, %temp32.0
|
||||
%603 = fadd float %602, %601
|
||||
%604 = fmul float %214, %603
|
||||
%605 = fadd float %604, %temp28.1
|
||||
%606 = fmul float %215, %603
|
||||
%607 = fadd float %606, %temp29.1
|
||||
%608 = fmul float %216, %603
|
||||
%609 = fadd float %608, %temp30.1
|
||||
%610 = fadd float %temp24.1, 1.000000e+00
|
||||
%611 = fmul float %temp32.0, 5.000000e-01
|
||||
br label %LOOP65
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare i32 @llvm.SI.tid() #2
|
||||
|
||||
; Function Attrs: readonly
|
||||
declare float @ceil(float) #3
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.AMDGPU.rsq(float) #2
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #1
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @fabs(float) #2
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
declare float @llvm.pow.f32(float, float) #4
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.SI.packf16(float, float) #1
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
attributes #0 = { "ShaderType"="0" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
attributes #2 = { readnone }
|
||||
attributes #3 = { readonly }
|
||||
attributes #4 = { nounwind readonly }
|
||||
|
||||
!0 = metadata !{metadata !"const", null, i32 1}
|
@ -1,10 +1,20 @@
|
||||
; RUN: llc -march=sparc < %s | FileCheck %s
|
||||
; RUN: llc -march=sparc < %s | FileCheck %s --check-prefix=V8
|
||||
; RUN: llc -march=sparcv9 < %s | FileCheck %s --check-prefix=SPARC64
|
||||
|
||||
; V8-LABEL: variable_alloca_with_adj_call_stack
|
||||
; V8: save %sp, -96, %sp
|
||||
; V8: add {{.+}}, 96, %o0
|
||||
; V8: add %sp, -16, %sp
|
||||
; V8: call foo
|
||||
; V8: add %sp, 16, %sp
|
||||
|
||||
; SPARC64-LABEL: variable_alloca_with_adj_call_stack
|
||||
; SPARC64: save %sp, -128, %sp
|
||||
; SPARC64: add {{.+}}, 128, %o0
|
||||
; SPARC64: add %sp, -80, %sp
|
||||
; SPARC64: call foo
|
||||
; SPARC64: add %sp, 80, %sp
|
||||
|
||||
; CHECK: variable_alloca_with_adj_call_stack
|
||||
; CHECK: save %sp, -96, %sp
|
||||
; CHECK: add %sp, -16, %sp
|
||||
; CHECK: call foo
|
||||
; CHECK: add %sp, 16, %sp
|
||||
define void @variable_alloca_with_adj_call_stack(i32 %num) {
|
||||
entry:
|
||||
%0 = alloca i8, i32 %num, align 8
|
||||
|
@ -380,8 +380,6 @@ define signext i32 @ret_nozext(i32 signext %a0) {
|
||||
; CHECK-LABEL: test_register_directive
|
||||
; CHECK: .register %g2, #scratch
|
||||
; CHECK: .register %g3, #scratch
|
||||
; CHECK: .register %g6, #ignore
|
||||
; CHECK: .register %g7, #ignore
|
||||
; CHECK: add %i0, 2, %g2
|
||||
; CHECK: add %i0, 3, %g3
|
||||
define i32 @test_register_directive(i32 %i0) {
|
||||
@ -392,3 +390,24 @@ entry:
|
||||
%2 = add nsw i32 %0, %1
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_large_stack
|
||||
|
||||
; CHECK: sethi 16, %g1
|
||||
; CHECK: xor %g1, -176, %g1
|
||||
; CHECK: save %sp, %g1, %sp
|
||||
|
||||
; CHECK: sethi 14, %g1
|
||||
; CHECK: xor %g1, -1, %g1
|
||||
; CHECK: add %g1, %fp, %g1
|
||||
; CHECK: call use_buf
|
||||
|
||||
define i32 @test_large_stack() {
|
||||
entry:
|
||||
%buffer1 = alloca [16384 x i8], align 8
|
||||
%buffer1.sub = getelementptr inbounds [16384 x i8]* %buffer1, i32 0, i32 0
|
||||
%0 = call i32 @use_buf(i32 16384, i8* %buffer1.sub)
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
declare i32 @use_buf(i32, i8*)
|
||||
|
@ -165,3 +165,16 @@ define i64 @f13(i64 %a, i32 %b) {
|
||||
%or = or i64 %shift, %low
|
||||
ret i64 %or
|
||||
}
|
||||
|
||||
; We previously wrongly removed the upper AND as dead.
|
||||
define i64 @f14(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: f14:
|
||||
; CHECK: risbg {{%r[0-5]}}, %r2, 6, 134, 0
|
||||
; CHECK: br %r14
|
||||
%and1 = and i64 %a, 144115188075855872
|
||||
%and2 = and i64 %b, 15
|
||||
%or = or i64 %and1, %and2
|
||||
%res = icmp eq i64 %or, 0
|
||||
%ext = sext i1 %res to i64
|
||||
ret i64 %ext
|
||||
}
|
||||
|
10
external/bsd/llvm/dist/llvm/test/CodeGen/X86/pr18054.ll
vendored
Normal file
10
external/bsd/llvm/dist/llvm/test/CodeGen/X86/pr18054.ll
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=penryn | FileCheck %s
|
||||
|
||||
define void @foo(<16 x i32>* %p, <16 x i1> %x) {
|
||||
%ret = sext <16 x i1> %x to <16 x i32>
|
||||
store <16 x i32> %ret, <16 x i32>* %p
|
||||
ret void
|
||||
; CHECK: foo
|
||||
; CHECK-NOT: pmovsxbd
|
||||
; CHECK: ret
|
||||
}
|
@ -7141,3 +7141,178 @@
|
||||
// CHECK-ERROR: error: invalid number of vectors
|
||||
// CHECK-ERROR: tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Lower Precision Narrow, Rounding To
|
||||
// Odd
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtxn s0, s1
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtxn s0, s1
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
|
||||
// With Ties To Away
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtas s0, d0
|
||||
fcvtas d0, s0
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtas s0, d0
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtas d0, s0
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Unsigned Integer, Rounding To
|
||||
// Nearest With Ties To Away
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtau s0, d0
|
||||
fcvtau d0, s0
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtau s0, d0
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtau d0, s0
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Signed Integer, Rounding Toward
|
||||
// Minus Infinity
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtms s0, d0
|
||||
fcvtms d0, s0
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtms s0, d0
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtms d0, s0
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
|
||||
// Minus Infinity
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtmu s0, d0
|
||||
fcvtmu d0, s0
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtmu s0, d0
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtmu d0, s0
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
|
||||
// With Ties To Even
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtns s0, d0
|
||||
fcvtns d0, s0
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtns s0, d0
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtns d0, s0
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Unsigned Integer, Rounding To
|
||||
// Nearest With Ties To Even
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtnu s0, d0
|
||||
fcvtnu d0, s0
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtnu s0, d0
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtnu d0, s0
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Signed Integer, Rounding Toward
|
||||
// Positive Infinity
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtps s0, d0
|
||||
fcvtps d0, s0
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtps s0, d0
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtps d0, s0
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
|
||||
// Positive Infinity
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtpu s0, d0
|
||||
fcvtpu d0, s0
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtpu s0, d0
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtpu d0, s0
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Signed Integer, Rounding Toward Zero
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtzs s0, d0
|
||||
fcvtzs d0, s0
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtzs s0, d0
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtzs d0, s0
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
|
||||
// Zero
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtzu s0, d0
|
||||
fcvtzu d0, s0
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtzu s0, d0
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fcvtzu d0, s0
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Absolute Difference
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
|
||||
fabd s29, d24, s20
|
||||
fabd d29, s24, d20
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fabd s29, d24, s20
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fabd d29, s24, d20
|
||||
// CHECK-ERROR: ^
|
||||
|
@ -9,6 +9,16 @@
|
||||
abs d29, d24
|
||||
|
||||
// CHECK: abs d29, d24 // encoding: [0x1d,0xbb,0xe0,0x5e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Absolute Difference
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fabd s29, s24, s20
|
||||
fabd d29, d24, d20
|
||||
|
||||
// CHECK: fabd s29, s24, s20 // encoding: [0x1d,0xd7,0xb4,0x7e]
|
||||
// CHECK: fabd d29, d24, d20 // encoding: [0x1d,0xd7,0xf4,0x7e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Signed Saturating Absolute Value
|
||||
|
@ -61,3 +61,121 @@
|
||||
|
||||
// CHECK: fcvtzu s21, s12, #1 // encoding: [0x95,0xfd,0x3f,0x7f]
|
||||
// CHECK: fcvtzu d21, d12, #1 // encoding: [0x95,0xfd,0x7f,0x7f]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Lower Precision Narrow, Rounding To
|
||||
// Odd
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtxn s22, d13
|
||||
|
||||
// CHECK: fcvtxn s22, d13 // encoding: [0xb6,0x69,0x61,0x7e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
|
||||
// With Ties To Away
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtas s12, s13
|
||||
fcvtas d21, d14
|
||||
|
||||
// CHECK: fcvtas s12, s13 // encoding: [0xac,0xc9,0x21,0x5e]
|
||||
// CHECK: fcvtas d21, d14 // encoding: [0xd5,0xc9,0x61,0x5e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Unsigned Integer, Rounding To
|
||||
// Nearest With Ties To Away
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtau s12, s13
|
||||
fcvtau d21, d14
|
||||
|
||||
// CHECK: fcvtau s12, s13 // encoding: [0xac,0xc9,0x21,0x7e]
|
||||
// CHECK: fcvtau d21, d14 // encoding: [0xd5,0xc9,0x61,0x7e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Signed Integer, Rounding Toward
|
||||
// Minus Infinity
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtms s22, s13
|
||||
fcvtms d21, d14
|
||||
|
||||
// CHECK: fcvtms s22, s13 // encoding: [0xb6,0xb9,0x21,0x5e]
|
||||
// CHECK: fcvtms d21, d14 // encoding: [0xd5,0xb9,0x61,0x5e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
|
||||
// Minus Infinity
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtmu s12, s13
|
||||
fcvtmu d21, d14
|
||||
|
||||
// CHECK: fcvtmu s12, s13 // encoding: [0xac,0xb9,0x21,0x7e]
|
||||
// CHECK: fcvtmu d21, d14 // encoding: [0xd5,0xb9,0x61,0x7e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
|
||||
// With Ties To Even
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtns s22, s13
|
||||
fcvtns d21, d14
|
||||
|
||||
// CHECK: fcvtns s22, s13 // encoding: [0xb6,0xa9,0x21,0x5e]
|
||||
// CHECK: fcvtns d21, d14 // encoding: [0xd5,0xa9,0x61,0x5e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Unsigned Integer, Rounding To
|
||||
// Nearest With Ties To Even
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtnu s12, s13
|
||||
fcvtnu d21, d14
|
||||
|
||||
// CHECK: fcvtnu s12, s13 // encoding: [0xac,0xa9,0x21,0x7e]
|
||||
// CHECK: fcvtnu d21, d14 // encoding: [0xd5,0xa9,0x61,0x7e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Signed Integer, Rounding Toward
|
||||
// Positive Infinity
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtps s22, s13
|
||||
fcvtps d21, d14
|
||||
|
||||
// CHECK: fcvtps s22, s13 // encoding: [0xb6,0xa9,0xa1,0x5e]
|
||||
// CHECK: fcvtps d21, d14 // encoding: [0xd5,0xa9,0xe1,0x5e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
|
||||
// Positive Infinity
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtpu s12, s13
|
||||
fcvtpu d21, d14
|
||||
|
||||
// CHECK: fcvtpu s12, s13 // encoding: [0xac,0xa9,0xa1,0x7e]
|
||||
// CHECK: fcvtpu d21, d14 // encoding: [0xd5,0xa9,0xe1,0x7e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Signed Integer, Rounding Toward Zero
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtzs s12, s13
|
||||
fcvtzs d21, d14
|
||||
|
||||
// CHECK: fcvtzs s12, s13 // encoding: [0xac,0xb9,0xa1,0x5e]
|
||||
// CHECK: fcvtzs d21, d14 // encoding: [0xd5,0xb9,0xe1,0x5e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
|
||||
// Zero
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fcvtzu s12, s13
|
||||
fcvtzu d21, d14
|
||||
|
||||
// CHECK: fcvtzu s12, s13 // encoding: [0xac,0xb9,0xa1,0x7e]
|
||||
// CHECK: fcvtzu d21, d14 // encoding: [0xd5,0xb9,0xe1,0x7e]
|
||||
|
15
external/bsd/llvm/dist/llvm/test/MC/ARM/align_arm_2_thumb.s
vendored
Normal file
15
external/bsd/llvm/dist/llvm/test/MC/ARM/align_arm_2_thumb.s
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
@ RUN: llvm-mc -triple armv7-none-linux -filetype=obj -o %t.o %s
|
||||
@ RUN: llvm-objdump -triple thumbv7-none-linux -d %t.o | FileCheck --check-prefix=ARM_2_THUMB %s
|
||||
|
||||
@ RUN: llvm-mc -triple armv7-apple-darwin -filetype=obj -o %t_darwin.o %s
|
||||
@ RUN: llvm-objdump -triple thumbv7-apple-darwin -d %t_darwin.o | FileCheck --check-prefix=ARM_2_THUMB %s
|
||||
|
||||
.syntax unified
|
||||
.code 16
|
||||
@ ARM_2_THUMB-LABEL: foo
|
||||
foo:
|
||||
add r0, r0
|
||||
.align 3
|
||||
@ ARM_2_THUMB: 2: 00 bf nop
|
||||
add r0, r0
|
||||
|
15
external/bsd/llvm/dist/llvm/test/MC/ARM/align_thumb_2_arm.s
vendored
Normal file
15
external/bsd/llvm/dist/llvm/test/MC/ARM/align_thumb_2_arm.s
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
@ RUN: llvm-mc -triple thumbv7-none-linux -filetype=obj -o %t.o %s
|
||||
@ RUN: llvm-objdump -triple armv7-none-linux -d %t.o | FileCheck --check-prefix=THUMB_2_ARM %s
|
||||
|
||||
@ RUN: llvm-mc -triple thumbv7-apple-darwin -filetype=obj -o %t_darwin.o %s
|
||||
@ RUN: llvm-objdump -triple armv7-apple-darwin -d %t_darwin.o | FileCheck --check-prefix=THUMB_2_ARM %s
|
||||
|
||||
.syntax unified
|
||||
.code 32
|
||||
@ THUMB_2_ARM-LABEL: foo
|
||||
foo:
|
||||
add r0, r0
|
||||
.align 3
|
||||
@ THUMB_2_ARM: 4: 00 f0 20 e3 nop
|
||||
add r0, r0
|
||||
|
14
external/bsd/llvm/dist/llvm/test/MC/COFF/eh-frame.s
vendored
Normal file
14
external/bsd/llvm/dist/llvm/test/MC/COFF/eh-frame.s
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s -o - | llvm-readobj -s | FileCheck %s
|
||||
|
||||
.def _main;
|
||||
.scl 2;
|
||||
.type 32;
|
||||
.endef
|
||||
.text
|
||||
.globl _main
|
||||
_main:
|
||||
.cfi_startproc
|
||||
ret
|
||||
.cfi_endproc
|
||||
|
||||
// CHECK: Name: .eh_frame
|
188
external/bsd/llvm/dist/llvm/test/MC/COFF/section-comdat.s
vendored
Normal file
188
external/bsd/llvm/dist/llvm/test/MC/COFF/section-comdat.s
vendored
Normal file
@ -0,0 +1,188 @@
|
||||
// RUN: llvm-mc -triple i386-pc-win32 -filetype=obj %s | llvm-readobj -s -t | FileCheck %s
|
||||
// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s -t | FileCheck %s
|
||||
|
||||
.section assocSec
|
||||
.linkonce
|
||||
.long 1
|
||||
|
||||
.section secName, "dr", discard, "Symbol1"
|
||||
.globl Symbol1
|
||||
Symbol1:
|
||||
.long 1
|
||||
|
||||
.section secName, "dr", one_only, "Symbol2"
|
||||
.globl Symbol2
|
||||
Symbol2:
|
||||
.long 1
|
||||
|
||||
.section SecName, "dr", same_size, "Symbol3"
|
||||
.globl Symbol3
|
||||
Symbol3:
|
||||
.long 1
|
||||
|
||||
.section SecName, "dr", same_contents, "Symbol4"
|
||||
.globl Symbol4
|
||||
Symbol4:
|
||||
.long 1
|
||||
|
||||
.section SecName, "dr", associative assocSec, "Symbol5"
|
||||
.globl Symbol5
|
||||
Symbol5:
|
||||
.long 1
|
||||
|
||||
.section SecName, "dr", largest, "Symbol6"
|
||||
.globl Symbol6
|
||||
Symbol6:
|
||||
.long 1
|
||||
|
||||
.section SecName, "dr", newest, "Symbol7"
|
||||
.globl Symbol7
|
||||
Symbol7:
|
||||
.long 1
|
||||
|
||||
// CHECK: Sections [
|
||||
// CHECK: Section {
|
||||
// CHECK: Number: 1
|
||||
// CHECK: Name: assocSec
|
||||
// CHECK: Characteristics [
|
||||
// CHECK: IMAGE_SCN_LNK_COMDAT
|
||||
// CHECK: ]
|
||||
// CHECK: }
|
||||
// CHECK: Section {
|
||||
// CHECK: Number: 2
|
||||
// CHECK: Name: secName
|
||||
// CHECK: Characteristics [
|
||||
// CHECK: IMAGE_SCN_LNK_COMDAT
|
||||
// CHECK: ]
|
||||
// CHECK: }
|
||||
// CHECK: Section {
|
||||
// CHECK: Number: 3
|
||||
// CHECK: Name: secName
|
||||
// CHECK: Characteristics [
|
||||
// CHECK: IMAGE_SCN_LNK_COMDAT
|
||||
// CHECK: ]
|
||||
// CHECK: }
|
||||
// CHECK: Section {
|
||||
// CHECK: Number: 4
|
||||
// CHECK: Name: SecName
|
||||
// CHECK: Characteristics [
|
||||
// CHECK: IMAGE_SCN_LNK_COMDAT
|
||||
// CHECK: ]
|
||||
// CHECK: }
|
||||
// CHECK: Section {
|
||||
// CHECK: Number: 5
|
||||
// CHECK: Name: SecName
|
||||
// CHECK: Characteristics [
|
||||
// CHECK: IMAGE_SCN_LNK_COMDAT
|
||||
// CHECK: ]
|
||||
// CHECK: }
|
||||
// CHECK: Section {
|
||||
// CHECK: Number: 6
|
||||
// CHECK: Name: SecName
|
||||
// CHECK: Characteristics [
|
||||
// CHECK: IMAGE_SCN_LNK_COMDAT
|
||||
// CHECK: ]
|
||||
// CHECK: }
|
||||
// CHECK: Section {
|
||||
// CHECK: Number: 7
|
||||
// CHECK: Name: SecName
|
||||
// CHECK: Characteristics [
|
||||
// CHECK: IMAGE_SCN_LNK_COMDAT
|
||||
// CHECK: ]
|
||||
// CHECK: }
|
||||
// CHECK: Section {
|
||||
// CHECK: Number: 8
|
||||
// CHECK: Name: SecName
|
||||
// CHECK: Characteristics [
|
||||
// CHECK: IMAGE_SCN_LNK_COMDAT
|
||||
// CHECK: ]
|
||||
// CHECK: }
|
||||
// CHECK: ]
|
||||
// CHECK: Symbols [
|
||||
// CHECK: Symbol {
|
||||
// CHECK: Name: assocSec
|
||||
// CHECK: Section: assocSec (1)
|
||||
// CHECK: AuxSectionDef {
|
||||
// CHECK: Selection: Any
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: Symbol {
|
||||
// CHECK: Name: secName
|
||||
// CHECK: Section: secName (2)
|
||||
// CHECK: AuxSectionDef {
|
||||
// CHECK: Selection: Any
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: Symbol {
|
||||
// CHECK: Name: secName
|
||||
// CHECK: Section: secName (3)
|
||||
// CHECK: AuxSectionDef {
|
||||
// CHECK: Selection: NoDuplicates
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: Symbol {
|
||||
// CHECK: Name: SecName
|
||||
// CHECK: Section: SecName (4)
|
||||
// CHECK: AuxSectionDef {
|
||||
// CHECK: Selection: SameSize
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: Symbol {
|
||||
// CHECK: Name: SecName
|
||||
// CHECK: Section: SecName (5)
|
||||
// CHECK: AuxSymbolCount: 1
|
||||
// CHECK: AuxSectionDef {
|
||||
// CHECK: Selection: ExactMatch
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: Symbol {
|
||||
// CHECK: Name: SecName
|
||||
// CHECK: Section: SecName (6)
|
||||
// CHECK: AuxSectionDef {
|
||||
// CHECK: Selection: Associative
|
||||
// CHECK: AssocSection: assocSec (1)
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: Symbol {
|
||||
// CHECK: Name: SecName
|
||||
// CHECK: Section: SecName (7)
|
||||
// CHECK: AuxSectionDef {
|
||||
// CHECK: Selection: Largest
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: Symbol {
|
||||
// CHECK: Name: SecName
|
||||
// CHECK: Section: SecName (8)
|
||||
// CHECK: AuxSectionDef {
|
||||
// CHECK: Selection: Newest (0x7)
|
||||
// CHECK: }
|
||||
// CHECK: }
|
||||
// CHECK: Symbol {
|
||||
// CHECK: Name: Symbol1
|
||||
// CHECK: Section: secName (2)
|
||||
// CHECK: }
|
||||
// CHECK: Symbol {
|
||||
// CHECK: Name: Symbol2
|
||||
// CHECK: Section: secName (3)
|
||||
// CHECK: }
|
||||
// CHECK: Symbol {
|
||||
// CHECK: Name: Symbol3
|
||||
// CHECK: Section: SecName (4)
|
||||
// CHECK: }
|
||||
// CHECK: Symbol {
|
||||
// CHECK: Name: Symbol4
|
||||
// CHECK: Section: SecName (5)
|
||||
// CHECK: }
|
||||
// CHECK: Symbol {
|
||||
// CHECK: Name: Symbol5
|
||||
// CHECK: Section: SecName (6)
|
||||
// CHECK: }
|
||||
// CHECK: Symbol {
|
||||
// CHECK: Name: Symbol6
|
||||
// CHECK: Section: SecName (7)
|
||||
// CHECK: }
|
||||
// CHECK: Symbol {
|
||||
// CHECK: Name: Symbol7
|
||||
// CHECK: Section: SecName (8)
|
||||
// CHECK: }
|
||||
// CHECK: ]
|
@ -674,6 +674,23 @@
|
||||
0xf5 0xdd 0x23 0x4e
|
||||
0xab 0xdc 0x77 0x4e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Shift Left long
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: shll2 v2.8h, v4.16b, #8
|
||||
# CHECK: shll2 v6.4s, v8.8h, #16
|
||||
# CHECK: shll2 v6.2d, v8.4s, #32
|
||||
# CHECK: shll v2.8h, v4.8b, #8
|
||||
# CHECK: shll v6.4s, v8.4h, #16
|
||||
# CHECK: shll v6.2d, v8.2s, #32
|
||||
|
||||
0x82,0x38,0x21,0x6e
|
||||
0x06,0x39,0x61,0x6e
|
||||
0x06,0x39,0xa1,0x6e
|
||||
0x82,0x38,0x21,0x2e
|
||||
0x06,0x39,0x61,0x2e
|
||||
0x06,0x39,0xa1,0x2e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Vector Shift Left by Immediate
|
||||
#----------------------------------------------------------------------
|
||||
@ -2129,7 +2146,8 @@
|
||||
# CHECK: ld1 {v0.b}[9], [x0], #1
|
||||
# CHECK: ld2 {v15.h, v16.h}[7], [x15], #4
|
||||
# CHECK: ld3 {v31.s, v0.s, v1.s}[3], [sp], x3
|
||||
# CHECK: ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #24
|
||||
# CHECK: ld4 {v0.d, v1.d, v2.d, v3.d}[1], [x0], #32
|
||||
# CHECK: ld4 {v0.h, v1.h, v2.h, v3.h}[7], [x0], x0
|
||||
# CHECK: st1 {v0.d}[1], [x0], #8
|
||||
# CHECK: st2 {v31.s, v0.s}[3], [sp], #8
|
||||
# CHECK: st3 {v15.h, v16.h, v17.h}[7], [x15], #6
|
||||
@ -2138,6 +2156,7 @@
|
||||
0xef,0x59,0xff,0x4d
|
||||
0xff,0xb3,0xc3,0x4d
|
||||
0x00,0xa4,0xff,0x4d
|
||||
0x00,0x78,0xe0,0x4d
|
||||
0x00,0x84,0x9f,0x4d
|
||||
0xff,0x93,0xbf,0x4d
|
||||
0xef,0x79,0x9f,0x4d
|
||||
@ -2510,3 +2529,110 @@
|
||||
# CHECK: tbx v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
|
||||
# CHECK: tbx v16.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.16b
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Floating-point Convert To Lower Precision Narrow, Rounding To
|
||||
# Odd
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fcvtxn s22, d13
|
||||
0xb6,0x69,0x61,0x7e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
|
||||
# With Ties To Away
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fcvtas s12, s13
|
||||
# CHECK: fcvtas d21, d14
|
||||
|
||||
0xac,0xc9,0x21,0x5e
|
||||
0xd5,0xc9,0x61,0x5e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Floating-point Convert To Unsigned Integer, Rounding To
|
||||
# Nearest With Ties To Away
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fcvtau s12, s13
|
||||
# CHECK: fcvtau d21, d14
|
||||
0xac,0xc9,0x21,0x7e
|
||||
0xd5,0xc9,0x61,0x7e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Floating-point Convert To Signed Integer, Rounding Toward
|
||||
# Minus Infinity
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fcvtms s22, s13
|
||||
# CHECK: fcvtms d21, d14
|
||||
0xb6,0xb9,0x21,0x5e
|
||||
0xd5,0xb9,0x61,0x5e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
|
||||
# Minus Infinity
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fcvtmu s12, s13
|
||||
# CHECK: fcvtmu d21, d14
|
||||
0xac,0xb9,0x21,0x7e
|
||||
0xd5,0xb9,0x61,0x7e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Floating-point Convert To Signed Integer, Rounding To Nearest
|
||||
# With Ties To Even
|
||||
#----------------------------------------------------------------------
|
||||
|
||||
# CHECK: fcvtns s22, s13
|
||||
# CHECK: fcvtns d21, d14
|
||||
|
||||
0xb6,0xa9,0x21,0x5e
|
||||
0xd5,0xa9,0x61,0x5e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Floating-point Convert To Unsigned Integer, Rounding To
|
||||
# Nearest With Ties To Even
|
||||
#----------------------------------------------------------------------
|
||||
|
||||
# CHECK: fcvtnu s12, s13
|
||||
# CHECK: fcvtnu d21, d14
|
||||
0xac,0xa9,0x21,0x7e
|
||||
0xd5,0xa9,0x61,0x7e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Floating-point Convert To Signed Integer, Rounding Toward
|
||||
# Positive Infinity
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fcvtps s22, s13
|
||||
# CHECK: fcvtps d21, d14
|
||||
0xb6,0xa9,0xa1,0x5e
|
||||
0xd5,0xa9,0xe1,0x5e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
|
||||
# Positive Infinity
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fcvtpu s12, s13
|
||||
# CHECK: fcvtpu d21, d14
|
||||
0xac,0xa9,0xa1,0x7e
|
||||
0xd5,0xa9,0xe1,0x7e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Floating-point Convert To Signed Integer, Rounding Toward Zero
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fcvtzs s12, s13
|
||||
# CHECK: fcvtzs d21, d14
|
||||
0xac,0xb9,0xa1,0x5e
|
||||
0xd5,0xb9,0xe1,0x5e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Floating-point Convert To Unsigned Integer, Rounding Toward
|
||||
# Zero
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fcvtzu s12, s13
|
||||
# CHECK: fcvtzu d21, d14
|
||||
0xac,0xb9,0xa1,0x7e
|
||||
0xd5,0xb9,0xe1,0x7e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Floating-point Absolute Difference
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fabd s29, s24, s20
|
||||
# CHECK: fabd d29, d24, d20
|
||||
0x1d,0xd7,0xb4,0x7e
|
||||
0x1d,0xd7,0xf4,0x7e
|
||||
|
16437
external/bsd/llvm/dist/llvm/test/MC/Mips/micromips-long-branch.ll
vendored
Normal file
16437
external/bsd/llvm/dist/llvm/test/MC/Mips/micromips-long-branch.ll
vendored
Normal file
File diff suppressed because it is too large
Load Diff
74
external/bsd/llvm/dist/llvm/test/Transforms/InstCombine/pr17827.ll
vendored
Normal file
74
external/bsd/llvm/dist/llvm/test/Transforms/InstCombine/pr17827.ll
vendored
Normal file
@ -0,0 +1,74 @@
|
||||
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||||
|
||||
; With left shift, the comparison should not be modified.
|
||||
; CHECK-LABEL: @test_shift_and_cmp_not_changed1(
|
||||
; CHECK: icmp slt i8 %andp, 32
|
||||
define i1 @test_shift_and_cmp_not_changed1(i8 %p) #0 {
|
||||
entry:
|
||||
%shlp = shl i8 %p, 5
|
||||
%andp = and i8 %shlp, -64
|
||||
%cmp = icmp slt i8 %andp, 32
|
||||
ret i1 %cmp
|
||||
}
|
||||
|
||||
; With arithmetic right shift, the comparison should not be modified.
|
||||
; CHECK-LABEL: @test_shift_and_cmp_not_changed2(
|
||||
; CHECK: icmp slt i8 %andp, 32
|
||||
define i1 @test_shift_and_cmp_not_changed2(i8 %p) #0 {
|
||||
entry:
|
||||
%shlp = ashr i8 %p, 5
|
||||
%andp = and i8 %shlp, -64
|
||||
%cmp = icmp slt i8 %andp, 32
|
||||
ret i1 %cmp
|
||||
}
|
||||
|
||||
; This should simplify functionally to the left shift case.
|
||||
; The extra input parameter should be optimized away.
|
||||
; CHECK-LABEL: @test_shift_and_cmp_changed1(
|
||||
; CHECK: %andp = shl i8 %p, 5
|
||||
; CHECK-NEXT: %shl = and i8 %andp, -64
|
||||
; CHECK-NEXT: %cmp = icmp slt i8 %shl, 32
|
||||
define i1 @test_shift_and_cmp_changed1(i8 %p, i8 %q) #0 {
|
||||
entry:
|
||||
%andp = and i8 %p, 6
|
||||
%andq = and i8 %q, 8
|
||||
%or = or i8 %andq, %andp
|
||||
%shl = shl i8 %or, 5
|
||||
%ashr = ashr i8 %shl, 5
|
||||
%cmp = icmp slt i8 %ashr, 1
|
||||
ret i1 %cmp
|
||||
}
|
||||
|
||||
; Unsigned compare allows a transformation to compare against 0.
|
||||
; CHECK-LABEL: @test_shift_and_cmp_changed2(
|
||||
; CHECK: icmp eq i8 %andp, 0
|
||||
define i1 @test_shift_and_cmp_changed2(i8 %p) #0 {
|
||||
entry:
|
||||
%shlp = shl i8 %p, 5
|
||||
%andp = and i8 %shlp, -64
|
||||
%cmp = icmp ult i8 %andp, 32
|
||||
ret i1 %cmp
|
||||
}
|
||||
|
||||
; nsw on the shift should not affect the comparison.
|
||||
; CHECK-LABEL: @test_shift_and_cmp_changed3(
|
||||
; CHECK: icmp slt i8 %andp, 32
|
||||
define i1 @test_shift_and_cmp_changed3(i8 %p) #0 {
|
||||
entry:
|
||||
%shlp = shl nsw i8 %p, 5
|
||||
%andp = and i8 %shlp, -64
|
||||
%cmp = icmp slt i8 %andp, 32
|
||||
ret i1 %cmp
|
||||
}
|
||||
|
||||
; Logical shift right allows a return true because the 'and' guarantees no bits are set.
|
||||
; CHECK-LABEL: @test_shift_and_cmp_changed4(
|
||||
; CHECK: ret i1 true
|
||||
define i1 @test_shift_and_cmp_changed4(i8 %p) #0 {
|
||||
entry:
|
||||
%shlp = lshr i8 %p, 5
|
||||
%andp = and i8 %shlp, -64
|
||||
%cmp = icmp slt i8 %andp, 32
|
||||
ret i1 %cmp
|
||||
}
|
||||
|
39
external/bsd/llvm/dist/llvm/test/Transforms/LoopVectorize/X86/tripcount.ll
vendored
Normal file
39
external/bsd/llvm/dist/llvm/test/Transforms/LoopVectorize/X86/tripcount.ll
vendored
Normal file
@ -0,0 +1,39 @@
|
||||
; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -mcpu=prescott < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
|
||||
target triple = "i386-unknown-freebsd11.0"
|
||||
|
||||
@big = external global [0 x i32]
|
||||
|
||||
; PR18049
|
||||
; We need to truncate the exit count to i32. This is legal because the
|
||||
; arithmetic is signed (%inc is nsw).
|
||||
|
||||
; CHECK-LABEL: tripcount
|
||||
; CHECK: trunc i64 %count to i32
|
||||
|
||||
define void @tripcount(i64 %count) {
|
||||
entry:
|
||||
%cmp6 = icmp sgt i64 %count, 0
|
||||
br i1 %cmp6, label %for.body.preheader, label %for.end
|
||||
|
||||
for.body.preheader:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
|
||||
%arrayidx = getelementptr inbounds [0 x i32]* @big, i32 0, i32 %i.07
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%neg = xor i32 %0, -1
|
||||
store i32 %neg, i32* %arrayidx, align 4
|
||||
%inc = add nsw i32 %i.07, 1
|
||||
%conv = sext i32 %inc to i64
|
||||
%cmp = icmp slt i64 %conv, %count
|
||||
br i1 %cmp, label %for.body, label %for.end.loopexit
|
||||
|
||||
for.end.loopexit:
|
||||
br label %for.end
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
21
external/bsd/llvm/dist/llvm/test/Transforms/MergeFunc/ptr-int-transitivity-1.ll
vendored
Normal file
21
external/bsd/llvm/dist/llvm/test/Transforms/MergeFunc/ptr-int-transitivity-1.ll
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
; RUN: opt -S -mergefunc < %s | not grep "functions merged"
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
|
||||
declare void @stuff()
|
||||
|
||||
define void @f0(i64 %p0) {
|
||||
entry:
|
||||
call void @stuff()
|
||||
call void @stuff()
|
||||
call void @stuff()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @f2(i64 addrspace(1)* %p0) {
|
||||
entry:
|
||||
call void @stuff()
|
||||
call void @stuff()
|
||||
call void @stuff()
|
||||
ret void
|
||||
}
|
||||
|
25
external/bsd/llvm/dist/llvm/test/Transforms/MergeFunc/ptr-int-transitivity-2.ll
vendored
Normal file
25
external/bsd/llvm/dist/llvm/test/Transforms/MergeFunc/ptr-int-transitivity-2.ll
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
; RUN: opt -S -mergefunc < %s | FileCheck %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
|
||||
declare void @stuff()
|
||||
|
||||
define void @f0(i64 %p0) {
|
||||
entry:
|
||||
call void @stuff()
|
||||
call void @stuff()
|
||||
call void @stuff()
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @f0
|
||||
; CHECK: %2 = ptrtoint i64* %0 to i64
|
||||
; CHECK: tail call void @f0(i64 %2)
|
||||
; CHECK: ret void
|
||||
define void @f1(i64 addrspace(0)* %p0) {
|
||||
entry:
|
||||
call void @stuff()
|
||||
call void @stuff()
|
||||
call void @stuff()
|
||||
ret void
|
||||
}
|
||||
|
21
external/bsd/llvm/dist/llvm/test/Transforms/MergeFunc/ptr-int-transitivity-3.ll
vendored
Normal file
21
external/bsd/llvm/dist/llvm/test/Transforms/MergeFunc/ptr-int-transitivity-3.ll
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
; RUN: opt -S -mergefunc < %s | not grep "functions merged"
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
|
||||
declare void @stuff()
|
||||
|
||||
define void @f0(i64 addrspace(0)* %p0) {
|
||||
entry:
|
||||
call void @stuff()
|
||||
call void @stuff()
|
||||
call void @stuff()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @f2(i64 addrspace(1)* %p0) {
|
||||
entry:
|
||||
call void @stuff()
|
||||
call void @stuff()
|
||||
call void @stuff()
|
||||
ret void
|
||||
}
|
||||
|
@ -59,3 +59,38 @@ for.end: ; preds = %for.body
|
||||
ret double %mul3
|
||||
}
|
||||
|
||||
; A need-to-gather entry cannot be an external use of the scalar element.
|
||||
; Instead the insertelement instructions of the need-to-gather entry are the
|
||||
; external users.
|
||||
; This test would assert because we would keep the scalar fpext and fadd alive.
|
||||
; PR18129
|
||||
|
||||
; CHECK-LABEL: needtogather
|
||||
define i32 @needtogather(double *noalias %a, i32 *noalias %b, float * noalias %c,
|
||||
i32 * noalias %d) {
|
||||
entry:
|
||||
%0 = load i32* %d, align 4
|
||||
%conv = sitofp i32 %0 to float
|
||||
%1 = load float* %c
|
||||
%sub = fsub float 0.000000e+00, %1
|
||||
%mul = fmul float %sub, 0.000000e+00
|
||||
%add = fadd float %conv, %mul
|
||||
%conv1 = fpext float %add to double
|
||||
%sub3 = fsub float 1.000000e+00, %1
|
||||
%mul4 = fmul float %sub3, 0.000000e+00
|
||||
%add5 = fadd float %conv, %mul4
|
||||
%conv6 = fpext float %add5 to double
|
||||
%tobool = fcmp une float %add, 0.000000e+00
|
||||
br i1 %tobool, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
%storemerge = phi double [ %conv6, %if.then ], [ %conv1, %entry ]
|
||||
%e.0 = phi double [ %conv1, %if.then ], [ %conv6, %entry ]
|
||||
store double %storemerge, double* %a, align 8
|
||||
%conv7 = fptosi double %e.0 to i32
|
||||
store i32 %conv7, i32* %b, align 4
|
||||
ret i32 undef
|
||||
}
|
||||
|
47
external/bsd/llvm/dist/llvm/test/Transforms/SLPVectorizer/X86/pr18060.ll
vendored
Normal file
47
external/bsd/llvm/dist/llvm/test/Transforms/SLPVectorizer/X86/pr18060.ll
vendored
Normal file
@ -0,0 +1,47 @@
|
||||
; RUN: opt < %s -slp-vectorizer -S -mtriple=i386-pc-linux
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
|
||||
target triple = "i386-pc-linux"
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define i32 @_Z16adjustFixupValueyj(i64 %Value, i32 %Kind) {
|
||||
entry:
|
||||
%extract.t = trunc i64 %Value to i32
|
||||
%extract = lshr i64 %Value, 12
|
||||
%extract.t6 = trunc i64 %extract to i32
|
||||
switch i32 %Kind, label %sw.default [
|
||||
i32 0, label %return
|
||||
i32 1, label %return
|
||||
i32 129, label %sw.bb1
|
||||
i32 130, label %sw.bb2
|
||||
]
|
||||
|
||||
sw.default: ; preds = %entry
|
||||
call void @_Z25llvm_unreachable_internalv()
|
||||
unreachable
|
||||
|
||||
sw.bb1: ; preds = %entry
|
||||
%shr = lshr i64 %Value, 16
|
||||
%extract.t5 = trunc i64 %shr to i32
|
||||
%extract7 = lshr i64 %Value, 28
|
||||
%extract.t8 = trunc i64 %extract7 to i32
|
||||
br label %sw.bb2
|
||||
|
||||
sw.bb2: ; preds = %sw.bb1, %entry
|
||||
%Value.addr.0.off0 = phi i32 [ %extract.t, %entry ], [ %extract.t5, %sw.bb1 ]
|
||||
%Value.addr.0.off12 = phi i32 [ %extract.t6, %entry ], [ %extract.t8, %sw.bb1 ]
|
||||
%conv6 = and i32 %Value.addr.0.off0, 4095
|
||||
%conv4 = shl i32 %Value.addr.0.off12, 16
|
||||
%shl = and i32 %conv4, 983040
|
||||
%or = or i32 %shl, %conv6
|
||||
%or11 = or i32 %or, 8388608
|
||||
br label %return
|
||||
|
||||
return: ; preds = %sw.bb2, %entry, %entry
|
||||
%retval.0 = phi i32 [ %or11, %sw.bb2 ], [ %extract.t, %entry ], [ %extract.t, %entry ]
|
||||
ret i32 %retval.0
|
||||
}
|
||||
|
||||
; Function Attrs: noreturn
|
||||
declare void @_Z25llvm_unreachable_internalv()
|
||||
|
@ -1,21 +1,13 @@
|
||||
The LLVM Gold LTO Plugin
|
||||
========================
|
||||
|
||||
This directory contains a plugin that is designed to work with binutils
|
||||
gold linker. At present time, this is not the default linker in
|
||||
binutils, and the default build of gold does not support plugins.
|
||||
|
||||
Obtaining binutils:
|
||||
See docs/GoldPlugin.html for complete build and usage instructions.
|
||||
|
||||
cvs -z 9 -d :pserver:anoncvs@sourceware.org:/cvs/src login
|
||||
{enter "anoncvs" as the password}
|
||||
cvs -z 9 -d :pserver:anoncvs@sourceware.org:/cvs/src co binutils
|
||||
|
||||
This will create a src/ directory. Make a build/ directory and from
|
||||
there configure binutils with "../src/configure --enable-gold --enable-plugins".
|
||||
Then build binutils with "make all-gold".
|
||||
|
||||
To build the LLVMgold plugin, configure LLVM with the option
|
||||
--with-binutils-include=/path/to/binutils/src/include/ --enable-pic. To use the
|
||||
plugin, run "ld-new --plugin /path/to/LLVMgold.so".
|
||||
Without PIC libLTO and LLVMgold are not being built (because they would fail
|
||||
link on x86-64 with a relocation error: PIC and non-PIC can't be combined).
|
||||
NOTE: libLTO and LLVMgold aren't built without PIC because they would fail
|
||||
to link on x86-64 with a relocation error: PIC and non-PIC can't be combined.
|
||||
As an alternative to passing --enable-pic, you can use 'make ENABLE_PIC=1' in
|
||||
your entire LLVM build.
|
||||
|
Loading…
Reference in New Issue
Block a user