diff --git a/external/bsd/llvm/dist/llvm/autoconf/configure.ac b/external/bsd/llvm/dist/llvm/autoconf/configure.ac index a9d491548f11..3d0a2b3b1b78 100644 --- a/external/bsd/llvm/dist/llvm/autoconf/configure.ac +++ b/external/bsd/llvm/dist/llvm/autoconf/configure.ac @@ -31,14 +31,14 @@ dnl=== dnl===-----------------------------------------------------------------------=== dnl Initialize autoconf and define the package name, version number and dnl address for reporting bugs. -AC_INIT([LLVM],[3.4],[http://llvm.org/bugs/]) +AC_INIT([LLVM],[3.5svn],[http://llvm.org/bugs/]) AC_DEFINE([LLVM_VERSION_MAJOR], [3], [Major version of the LLVM API]) -AC_DEFINE([LLVM_VERSION_MINOR], [4], [Minor version of the LLVM API]) +AC_DEFINE([LLVM_VERSION_MINOR], [5], [Minor version of the LLVM API]) dnl Provide a copyright substitution and ensure the copyright notice is included dnl in the output of --version option of the generated configure script. -AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign."]) -AC_COPYRIGHT([Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign.]) +AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2014 University of Illinois at Urbana-Champaign."]) +AC_COPYRIGHT([Copyright (c) 2003-2014 University of Illinois at Urbana-Champaign.]) dnl Indicate that we require autoconf 2.60 or later. AC_PREREQ(2.60) diff --git a/external/bsd/llvm/dist/llvm/bindings/ocaml/Makefile.ocaml b/external/bsd/llvm/dist/llvm/bindings/ocaml/Makefile.ocaml index f8ed841d668e..1b964eec0622 100644 --- a/external/bsd/llvm/dist/llvm/bindings/ocaml/Makefile.ocaml +++ b/external/bsd/llvm/dist/llvm/bindings/ocaml/Makefile.ocaml @@ -37,7 +37,7 @@ OcamlDir := $(LibDir)/ocaml # Info from llvm-config and similar ifndef IS_CLEANING_TARGET ifdef UsedComponents -UsedLibs = $(shell $(LLVM_CONFIG) --libs $(UsedComponents)) +UsedLibs = $(shell $(LLVM_CONFIG) --libs --system-libs $(UsedComponents)) UsedLibNames = $(shell $(LLVM_CONFIG) --libnames $(UsedComponents)) endif endif diff --git a/external/bsd/llvm/dist/llvm/bindings/ocaml/transforms/scalar/llvm_scalar_opts.mli b/external/bsd/llvm/dist/llvm/bindings/ocaml/transforms/scalar/llvm_scalar_opts.mli index d69abe209c45..ab6fa4a64662 100644 --- a/external/bsd/llvm/dist/llvm/bindings/ocaml/transforms/scalar/llvm_scalar_opts.mli +++ b/external/bsd/llvm/dist/llvm/bindings/ocaml/transforms/scalar/llvm_scalar_opts.mli @@ -12,7 +12,7 @@ This interface provides an OCaml API for LLVM scalar transforms, the classes in the [LLVMScalarOpts] library. *) -(** See the [llvm::createConstantPropogationPass] function. *) +(** See the [llvm::createConstantPropagationPass] function. *) external add_constant_propagation : [ unit = "llvm_add_constant_propagation" diff --git a/external/bsd/llvm/dist/llvm/bindings/python/llvm/tests/test_disassembler.py b/external/bsd/llvm/dist/llvm/bindings/python/llvm/tests/test_disassembler.py index e960dc0ba9ca..37a04e4fc7e7 100644 --- a/external/bsd/llvm/dist/llvm/bindings/python/llvm/tests/test_disassembler.py +++ b/external/bsd/llvm/dist/llvm/bindings/python/llvm/tests/test_disassembler.py @@ -16,9 +16,9 @@ class TestDisassembler(TestBase): self.assertEqual(count, 3) self.assertEqual(s, '\tjcxz\t-127') - def test_nonexistant_triple(self): + def test_nonexistent_triple(self): with self.assertRaisesRegexp(Exception, "Could not obtain disassembler for triple"): - Disassembler("nonexistant-triple-raises") + Disassembler("nonexistent-triple-raises") def test_get_instructions(self): sequence = '\x67\xe3\x81\x01\xc7' # jcxz -127; addl %eax, %edi diff --git a/external/bsd/llvm/dist/llvm/docs/CMake.rst b/external/bsd/llvm/dist/llvm/docs/CMake.rst index c9fe538c9dfa..9fb4f489b44d 100644 --- a/external/bsd/llvm/dist/llvm/docs/CMake.rst +++ b/external/bsd/llvm/dist/llvm/docs/CMake.rst @@ -211,6 +211,9 @@ LLVM-specific variables **LLVM_ENABLE_THREADS**:BOOL Build with threads support, if available. Defaults to ON. +**LLVM_ENABLE_CXX11**:BOOL + Build in C++11 mode, if available. Defaults to OFF. + **LLVM_ENABLE_ASSERTIONS**:BOOL Enables code assertions. Defaults to OFF if and only if ``CMAKE_BUILD_TYPE`` is *Release*. diff --git a/external/bsd/llvm/dist/llvm/docs/CodingStandards.rst b/external/bsd/llvm/dist/llvm/docs/CodingStandards.rst index 9418680edc74..b454e49664f0 100644 --- a/external/bsd/llvm/dist/llvm/docs/CodingStandards.rst +++ b/external/bsd/llvm/dist/llvm/docs/CodingStandards.rst @@ -844,7 +844,7 @@ Here are more examples: .. code-block:: c++ - assert(Ty->isPointerType() && "Can't allocate a non pointer type!"); + assert(Ty->isPointerType() && "Can't allocate a non-pointer type!"); assert((Opcode == Shl || Opcode == Shr) && "ShiftInst Opcode invalid!"); diff --git a/external/bsd/llvm/dist/llvm/docs/CommandGuide/index.rst b/external/bsd/llvm/dist/llvm/docs/CommandGuide/index.rst index d50542ddc9da..d799941aeaea 100644 --- a/external/bsd/llvm/dist/llvm/docs/CommandGuide/index.rst +++ b/external/bsd/llvm/dist/llvm/docs/CommandGuide/index.rst @@ -22,7 +22,6 @@ Basic Commands llvm-link llvm-ar llvm-nm - llvm-prof llvm-config llvm-diff llvm-cov diff --git a/external/bsd/llvm/dist/llvm/docs/CommandGuide/llvm-symbolizer.rst b/external/bsd/llvm/dist/llvm/docs/CommandGuide/llvm-symbolizer.rst index e03be9b19876..dfbdb3ac434a 100644 --- a/external/bsd/llvm/dist/llvm/docs/CommandGuide/llvm-symbolizer.rst +++ b/external/bsd/llvm/dist/llvm/docs/CommandGuide/llvm-symbolizer.rst @@ -10,7 +10,9 @@ DESCRIPTION ----------- :program:`llvm-symbolizer` reads object file names and addresses from standard -input and prints corresponding source code locations to standard output. This +input and prints corresponding source code locations to standard output. +If object file is specified in command line, :program:`llvm-symbolizer` reads +only addresses from standard input. This program uses debug info sections and symbol table in the object files. EXAMPLE @@ -45,10 +47,22 @@ EXAMPLE _main /tmp/source_x86_64.cc:8 + $ cat addr2.txt + 0x4004f4 + 0x401000 + $ llvm-symbolizer -obj=a.out < addr2.txt + main + /tmp/a.cc:4 + + foo(int) + /tmp/a.cc:12 OPTIONS ------- +.. option:: -obj + Path to object file to be symbolized. + .. option:: -functions Print function names as well as source file/line locations. Defaults to true. diff --git a/external/bsd/llvm/dist/llvm/docs/CommandLine.rst b/external/bsd/llvm/dist/llvm/docs/CommandLine.rst index 4c84d23297b4..1b342e34bf50 100644 --- a/external/bsd/llvm/dist/llvm/docs/CommandLine.rst +++ b/external/bsd/llvm/dist/llvm/docs/CommandLine.rst @@ -1276,7 +1276,7 @@ The ``cl::getRegisteredOptions`` function ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The ``cl::getRegisteredOptions`` function is designed to give a programmer -access to declared non positional command line options so that how they appear +access to declared non-positional command line options so that how they appear in ``-help`` can be modified prior to calling `cl::ParseCommandLineOptions`_. Note this method should not be called during any static initialisation because it cannot be guaranteed that all options will have been initialised. Hence it diff --git a/external/bsd/llvm/dist/llvm/docs/CompilerWriterInfo.rst b/external/bsd/llvm/dist/llvm/docs/CompilerWriterInfo.rst index 7b02a7876a7a..e69cd1a6107c 100644 --- a/external/bsd/llvm/dist/llvm/docs/CompilerWriterInfo.rst +++ b/external/bsd/llvm/dist/llvm/docs/CompilerWriterInfo.rst @@ -80,8 +80,9 @@ R600 SPARC ----- -* `SPARC resources `_ -* `SPARC standards `_ +* `SPARC standards `_ +* `SPARC V9 ABI `_ +* `SPARC V8 ABI `_ SystemZ ------- diff --git a/external/bsd/llvm/dist/llvm/docs/Extensions.rst b/external/bsd/llvm/dist/llvm/docs/Extensions.rst index e308dbcdfc43..f499bdd57e6e 100644 --- a/external/bsd/llvm/dist/llvm/docs/Extensions.rst +++ b/external/bsd/llvm/dist/llvm/docs/Extensions.rst @@ -37,7 +37,7 @@ X86/COFF-Dependent Relocations ^^^^^^^^^^^ -The following additional relocation type is supported: +The following additional relocation types are supported: **@IMGREL** (AT&T syntax only) generates an image-relative relocation that corresponds to the COFF relocation types ``IMAGE_REL_I386_DIR32NB`` (32-bit) or @@ -54,6 +54,22 @@ corresponds to the COFF relocation types ``IMAGE_REL_I386_DIR32NB`` (32-bit) or .long (fun@imgrel + 0x3F) .long $unwind$fun@imgrel +**.secrel32** generates a relocation that corresponds to the COFF relocation +types ``IMAGE_REL_I386_SECREL`` (32-bit) or ``IMAGE_REL_AMD64_SECREL`` (64-bit). + +**.secidx** relocation generates an index of the section that contains +the target. It corresponds to the COFF relocation types +``IMAGE_REL_I386_SECTION`` (32-bit) or ``IMAGE_REL_AMD64_SECTION`` (64-bit). + +.. code-block:: gas + + .section .debug$S,"rn" + .long 4 + .long 242 + .long 40 + .secrel32 _function_name + .secidx _function_name + ... ``.linkonce`` Directive ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -127,7 +143,7 @@ MC supports passing the information in ``.linkonce`` at the end of Symbol1: .long 1 -Note that in the combined form the COMDAT symbol is explict. This +Note that in the combined form the COMDAT symbol is explicit. This extension exits to support multiple sections with the same name in different comdats: diff --git a/external/bsd/llvm/dist/llvm/docs/HowToReleaseLLVM.rst b/external/bsd/llvm/dist/llvm/docs/HowToReleaseLLVM.rst index bc3b093d755e..6194fdfc8645 100644 --- a/external/bsd/llvm/dist/llvm/docs/HowToReleaseLLVM.rst +++ b/external/bsd/llvm/dist/llvm/docs/HowToReleaseLLVM.rst @@ -238,6 +238,8 @@ when qualifying the build of ``llvm``, ``clang``, and ``dragonegg``. +--------------+---------------+----------------------+ | x86-64 | FreeBSD | gcc 4.2.X | +--------------+---------------+----------------------+ +| ARMv7 | Linux | gcc 4.6.X, gcc 4.7.X | ++--------------+---------------+----------------------+ Release Qualification Criteria ------------------------------ @@ -298,6 +300,10 @@ Specific Target Qualification Details | | | | clang regression tests, | | | | | test-suite | +--------------+-------------+----------------+-----------------------------+ +| ARMv7A | Linux | last release | llvm regression tests, | +| | | | clang regression tests, | +| | | | test-suite | ++--------------+-------------+----------------+-----------------------------+ Community Testing ----------------- diff --git a/external/bsd/llvm/dist/llvm/docs/InAlloca.rst b/external/bsd/llvm/dist/llvm/docs/InAlloca.rst new file mode 100644 index 000000000000..b1779874e0e2 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/docs/InAlloca.rst @@ -0,0 +1,140 @@ +========================================== +Design and Usage of the InAlloca Attribute +========================================== + +Introduction +============ + +.. Warning:: This feature is unstable and not fully implemented. + +The :ref:`attr_inalloca` attribute is designed to allow taking the +address of an aggregate argument that is being passed by value through +memory. Primarily, this feature is required for compatibility with the +Microsoft C++ ABI. Under that ABI, class instances that are passed by +value are constructed directly into argument stack memory. Prior to the +addition of inalloca, calls in LLVM were indivisible instructions. +There was no way to perform intermediate work, such as object +construction, between the first stack adjustment and the final control +transfer. With inalloca, each argument is modelled as an alloca, which +can be stored to independently of the call. Unfortunately, this +complicated feature comes with a large set of restrictions designed to +bound the lifetime of the argument memory around the call, which are +explained in this document. + +For now, it is recommended that frontends and optimizers avoid producing +this construct, primarily because it forces the use of a base pointer. +This feature may grow in the future to allow general mid-level +optimization, but for now, it should be regarded as less efficient than +passing by value with a copy. + +Intended Usage +============== + +In the example below, ``f`` is attempting to pass a default-constructed +``Foo`` object to ``g`` by value. + +.. code-block:: llvm + + %Foo = type { i32, i32 } + declare void @Foo_ctor(%Foo* %this) + declare void @g(%Foo* inalloca %arg) + + define void @f() { + ... + + bb1: + %base = call i8* @llvm.stacksave() + %arg = alloca %Foo + invoke void @Foo_ctor(%Foo* %arg) + to label %invoke.cont unwind %invoke.unwind + + invoke.cont: + call void @g(%Foo* inalloca %arg) + call void @llvm.stackrestore(i8* %base) + ... + + invoke.unwind: + call void @llvm.stackrestore(i8* %base) + ... + } + +The alloca in this example is dynamic, meaning it is not in the entry +block, and it can be executed more than once. Due to the restrictions +against allocas between an alloca used with inalloca and its associated +call site, all allocas used with inalloca are considered dynamic. + +To avoid any stack leakage, the frontend saves the current stack pointer +with a call to :ref:`llvm.stacksave `. Then, it +allocates the argument stack space with alloca and calls the default +constructor. One important consideration is that the default +constructor could throw an exception, so the frontend has to create a +landing pad. At this point, if there were any other inalloca arguments, +the frontend would have to destruct them before restoring the stack +pointer. If the constructor does not unwind, ``g`` is called, and then +the stack is restored. + +Design Considerations +===================== + +Lifetime +-------- + +The biggest design consideration for this feature is object lifetime. +We cannot model the arguments as static allocas in the entry block, +because all calls need to use the memory that is at the end of the call +frame to pass arguments. We cannot vend pointers to that memory at +function entry because after code generation they will alias. In the +current design, the rule against allocas between the inalloca alloca +values and the call site avoids this problem, but it creates a cleanup +problem. Cleanup and lifetime is handled explicitly with stack save and +restore calls. In the future, we may be able to avoid this by using +:ref:`llvm.lifetime.start ` and :ref:`llvm.lifetime.end +` instead. + +Nested Calls and Copy Elision +----------------------------- + +The next consideration is the ability for the frontend to perform copy +elision in the face of nested calls. Consider the evaluation of +``foo(foo(Bar()))``, where ``foo`` takes and returns a ``Bar`` object by +value and ``Bar`` has non-trivial constructors. In this case, we want +to be able to elide copies into ``foo``'s argument slots. That means we +need to have more than one set of argument frames active at the same +time. First, we need to allocate the frame for the outer call so we can +pass it in as the hidden struct return pointer to the middle call. Then +we do the same for the middle call, allocating a frame and passing its +address to ``Bar``'s default constructor. By wrapping the evaluation of +the inner ``foo`` with stack save and restore, we can have multiple +overlapping active call frames. + +Callee-cleanup Calling Conventions +---------------------------------- + +Another wrinkle is the existence of callee-cleanup conventions. On +Windows, all methods and many other functions adjust the stack to clear +the memory used to pass their arguments. In some sense, this means that +the allocas are automatically cleared by the call. However, LLVM +instead models this as a write of undef to all of the inalloca values +passed to the call instead of a stack adjustment. Frontends should +still restore the stack pointer to avoid a stack leak. + +Exceptions +---------- + +There is also the possibility of an exception. If argument evaluation +or copy construction throws an exception, the landing pad must do +cleanup, which includes adjusting the stack pointer to avoid a stack +leak. This means the cleanup of the stack memory cannot be tied to the +call itself. There needs to be a separate IR-level instruction that can +perform independent cleanup of arguments. + +Efficiency +---------- + +Eventually, it should be possible to generate efficient code for this +construct. In particular, using inalloca should not require a base +pointer. If the backend can prove that all points in the CFG only have +one possible stack level, then it can address the stack directly from +the stack pointer. While this is not yet implemented, the plan is that +the inalloca attribute should not change much, but the frontend IR +generation recommendations may change. diff --git a/external/bsd/llvm/dist/llvm/docs/LLVMBuild.rst b/external/bsd/llvm/dist/llvm/docs/LLVMBuild.rst index 040b04480ee6..c0c96d3f3ca8 100644 --- a/external/bsd/llvm/dist/llvm/docs/LLVMBuild.rst +++ b/external/bsd/llvm/dist/llvm/docs/LLVMBuild.rst @@ -315,7 +315,7 @@ the properties which are associated with that component. ``BuildTool`` components are like ``Tool`` components, except that the tool is supposed to be built for the platform where the build is running - (instead of that platform being targetted). Build systems are expected + (instead of that platform being targeted). Build systems are expected to handle the fact that required libraries may need to be built for multiple platforms in order to be able to link this tool. diff --git a/external/bsd/llvm/dist/llvm/docs/LangRef.rst b/external/bsd/llvm/dist/llvm/docs/LangRef.rst index 810455cbc3a7..cc932515d247 100644 --- a/external/bsd/llvm/dist/llvm/docs/LangRef.rst +++ b/external/bsd/llvm/dist/llvm/docs/LangRef.rst @@ -4,7 +4,7 @@ LLVM Language Reference Manual .. contents:: :local: - :depth: 3 + :depth: 4 Abstract ======== @@ -289,13 +289,9 @@ symbols from (to) DLLs (Dynamic Link Libraries). pointer to a pointer in a DLL, so that it can be referenced with the ``dllimport`` attribute. On Microsoft Windows targets, the pointer name is formed by combining ``__imp_`` and the function or variable - name. - -For example, since the "``.LC0``" variable is defined to be internal, if -another module defined a "``.LC0``" variable and was linked with this -one, one of the two would be renamed, preventing a collision. Since -"``main``" and "``puts``" are external (i.e., lacking any linkage -declarations), they are accessible outside of the current module. + name. Since this linkage exists for defining a dll interface, the + compiler, assembler and linker know it is externally referenced and + must refrain from deleting the symbol. It is illegal for a function *declaration* to have any linkage type other than ``external``, ``dllimport`` or ``extern_weak``. @@ -370,6 +366,18 @@ added in the future: accessed runtime components pinned to specific hardware registers. At the moment only X86 supports this convention (both 32 and 64 bit). +"``webkit_jscc``" - WebKit's JavaScript calling convention + This calling convention has been implemented for `WebKit FTL JIT + `_. It passes arguments on the + stack right to left (as cdecl does), and returns a value in the + platform's customary return register. +"``anyregcc``" - Dynamic calling convention for code patching + This is a special convention that supports patching an arbitrary code + sequence in place of a call site. This convention forces the call + arguments into registers but allows them to be dynamcially + allocated. This can currently only be used with calls to + llvm.experimental.patchpoint because only this intrinsic records + the location of its arguments in a side table. See :doc:`StackMaps`. "``cc ``" - Numbered convention Any calling convention may be specified by number, allowing target-specific calling conventions to be used. Target specific @@ -507,8 +515,8 @@ variables defined within the module are not modified from their initial values before the start of the global initializer. This is true even for variables potentially accessible from outside the module, including those with external linkage or appearing in -``@llvm.used``. This assumption may be suppressed by marking the -variable with ``externally_initialized``. +``@llvm.used`` or dllexported variables. This assumption may be suppressed +by marking the variable with ``externally_initialized``. An explicit alignment may be specified for a global, which must be a power of 2. If not present, or if the alignment is set to zero, the @@ -618,7 +626,7 @@ Syntax:: The linkage must be one of ``private``, ``linker_private``, ``linker_private_weak``, ``internal``, ``linkonce``, ``weak``, ``linkonce_odr``, ``weak_odr``, ``external``. Note that some system linkers -might not correctly handle dropping a weak symbol that is aliased by a non weak +might not correctly handle dropping a weak symbol that is aliased by a non-weak alias. .. _namedmetadatastructure: @@ -701,6 +709,39 @@ Currently, only the following parameter attributes are defined: site. If the alignment is not specified, then the code generator makes a target-specific assumption. +.. _attr_inalloca: + +``inalloca`` + +.. Warning:: This feature is unstable and not fully implemented. + + The ``inalloca`` argument attribute allows the caller to get the + address of an outgoing argument to a ``call`` or ``invoke`` before + it executes. It is similar to ``byval`` in that it is used to pass + arguments by value, but it guarantees that the argument will not be + copied. + + To be :ref:`well formed `, the caller must pass in an + alloca value into an ``inalloca`` parameter, and an alloca may be + used as an ``inalloca`` argument at most once. The attribute can + only be applied to parameters that would be passed in memory and not + registers. The ``inalloca`` attribute cannot be used in conjunction + with other attributes that affect argument storage, like ``inreg``, + ``nest``, ``sret``, or ``byval``. The ``inalloca`` stack space is + considered to be clobbered by any call that uses it, so any + ``inalloca`` parameters cannot be marked ``readonly``. + + Allocas passed with ``inalloca`` to a call must be in the opposite + order of the parameter list, meaning that the rightmost argument + must be allocated first. If a call has inalloca arguments, no other + allocas can occur between the first alloca used by the call and the + call site, unless they are are cleared by calls to + :ref:`llvm.stackrestore `. Violating these rules + results in undefined behavior at runtime. + + See :doc:`InAlloca` for more information on how to use this + attribute. + ``sret`` This indicates that the pointer parameter specifies the address of a structure that is the return value of the function in the source @@ -1119,9 +1160,15 @@ as follows: ``a::`` This specifies the alignment for an aggregate type of a given bit ````. -``s::`` - This specifies the alignment for a stack object of a given bit - ````. +``m:`` + If prerest, specifies that llvm names are mangled in the output. The + options are + * ``e``: ELF mangling: Private symbols get a ``.L`` prefix. + * ``m``: Mips mangling: Private symbols get a ``$`` prefix. + * ``o``: Mach-O mangling: Private symbols get ``L`` prefix. Other + symbols get a ``_`` prefix. + * ``c``: COFF prefix: Similar to Mach-O, but stdcall and fastcall + functions also get a suffix based on the frame size. ``n::...`` This specifies a set of native integer widths for the target CPU in bits. For example, it might contain ``n32`` for 32-bit PowerPC, @@ -1151,7 +1198,7 @@ specifications are given in this list: - ``f128:128:128`` - quad is 128-bit aligned - ``v64:64:64`` - 64-bit vector is 64-bit aligned - ``v128:128:128`` - 128-bit vector is 128-bit aligned -- ``a0:0:64`` - aggregates are 64-bit aligned +- ``a:0:64`` - aggregates are 64-bit aligned When LLVM is determining the alignment for a given type, it uses the following rules: @@ -1480,80 +1527,90 @@ transformation. A strong type system makes it easier to read the generated code and enables novel analyses and transformations that are not feasible to perform on normal three address code representations. -.. _typeclassifications: +.. _t_void: -Type Classifications --------------------- +Void Type +--------- -The types fall into a few useful classifications: +:Overview: -.. list-table:: - :header-rows: 1 +The void type does not represent any value and has no size. - * - Classification - - Types - - * - :ref:`integer ` - - ``i1``, ``i2``, ``i3``, ... ``i8``, ... ``i16``, ... ``i32``, ... - ``i64``, ... - - * - :ref:`floating point ` - - ``half``, ``float``, ``double``, ``x86_fp80``, ``fp128``, - ``ppc_fp128`` +:Syntax: - * - first class +:: - .. _t_firstclass: + void - - :ref:`integer `, :ref:`floating point `, - :ref:`pointer `, :ref:`vector `, - :ref:`structure `, :ref:`array `, - :ref:`label `, :ref:`metadata `. - * - :ref:`primitive ` - - :ref:`label `, - :ref:`void `, - :ref:`integer `, - :ref:`floating point `, - :ref:`x86mmx `, - :ref:`metadata `. +.. _t_function: - * - :ref:`derived ` - - :ref:`array `, - :ref:`function `, - :ref:`pointer `, - :ref:`structure `, - :ref:`vector `, - :ref:`opaque `. +Function Type +------------- + +:Overview: + + +The function type can be thought of as a function signature. It consists of a +return type and a list of formal parameter types. The return type of a function +type is a void type or first class type --- except for :ref:`label ` +and :ref:`metadata ` types. + +:Syntax: + +:: + + () + +...where '````' is a comma-separated list of type +specifiers. Optionally, the parameter list may include a type ``...``, which +indicates that the function takes a variable number of arguments. Variable +argument functions can access their arguments with the :ref:`variable argument +handling intrinsic ` functions. '````' is any type +except :ref:`label ` and :ref:`metadata `. + +:Examples: + ++---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``i32 (i32)`` | function taking an ``i32``, returning an ``i32`` | ++---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``float (i16, i32 *) *`` | :ref:`Pointer ` to a function that takes an ``i16`` and a :ref:`pointer ` to ``i32``, returning ``float``. | ++---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``i32 (i8*, ...)`` | A vararg function that takes at least one :ref:`pointer ` to ``i8`` (char in C), which returns an integer. This is the signature for ``printf`` in LLVM. | ++---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| ``{i32, i32} (i32)`` | A function taking an ``i32``, returning a :ref:`structure ` containing two ``i32`` values | ++---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +.. _t_firstclass: + +First Class Types +----------------- The :ref:`first class ` types are perhaps the most important. Values of these types are the only ones which can be produced by instructions. -.. _t_primitive: +.. _t_single_value: -Primitive Types ---------------- +Single Value Types +^^^^^^^^^^^^^^^^^^ -The primitive types are the fundamental building blocks of the LLVM -system. +These are the types that are valid in registers from CodeGen's perspective. .. _t_integer: Integer Type -^^^^^^^^^^^^ +"""""""""""" -Overview: -""""""""" +:Overview: The integer type is a very simple type that simply specifies an arbitrary bit width for the integer type desired. Any bit width from 1 bit to 2\ :sup:`23`\ -1 (about 8 million) can be specified. -Syntax: -""""""" +:Syntax: :: @@ -1563,7 +1620,7 @@ The number of bits the integer will occupy is specified by the ``N`` value. Examples: -""""""""" +********* +----------------+------------------------------------------------+ | ``i1`` | a single-bit integer. | @@ -1576,7 +1633,7 @@ Examples: .. _t_floating: Floating Point Types -^^^^^^^^^^^^^^^^^^^^ +"""""""""""""""""""" .. list-table:: :header-rows: 1 @@ -1605,10 +1662,9 @@ Floating Point Types .. _t_x86mmx: X86mmx Type -^^^^^^^^^^^ +""""""""""" -Overview: -""""""""" +:Overview: The x86mmx type represents a value held in an MMX register on an x86 machine. The operations allowed on it are quite limited: parameters and @@ -1617,42 +1673,92 @@ instructions are represented as intrinsic or asm calls with arguments and/or results of this type. There are no arrays, vectors or constants of this type. -Syntax: -""""""" +:Syntax: :: x86mmx -.. _t_void: -Void Type -^^^^^^^^^ +.. _t_pointer: -Overview: -""""""""" +Pointer Type +"""""""""""" -The void type does not represent any value and has no size. +:Overview: -Syntax: -""""""" +The pointer type is used to specify memory locations. Pointers are +commonly used to reference objects in memory. + +Pointer types may have an optional address space attribute defining the +numbered address space where the pointed-to object resides. The default +address space is number zero. The semantics of non-zero address spaces +are target-specific. + +Note that LLVM does not permit pointers to void (``void*``) nor does it +permit pointers to labels (``label*``). Use ``i8*`` instead. + +:Syntax: :: - void + * + +:Examples: + ++-------------------------+--------------------------------------------------------------------------------------------------------------+ +| ``[4 x i32]*`` | A :ref:`pointer ` to :ref:`array ` of four ``i32`` values. | ++-------------------------+--------------------------------------------------------------------------------------------------------------+ +| ``i32 (i32*) *`` | A :ref:`pointer ` to a :ref:`function ` that takes an ``i32*``, returning an ``i32``. | ++-------------------------+--------------------------------------------------------------------------------------------------------------+ +| ``i32 addrspace(5)*`` | A :ref:`pointer ` to an ``i32`` value that resides in address space #5. | ++-------------------------+--------------------------------------------------------------------------------------------------------------+ + +.. _t_vector: + +Vector Type +""""""""""" + +:Overview: + +A vector type is a simple derived type that represents a vector of +elements. Vector types are used when multiple primitive data are +operated in parallel using a single instruction (SIMD). A vector type +requires a size (number of elements) and an underlying primitive data +type. Vector types are considered :ref:`first class `. + +:Syntax: + +:: + + < <# elements> x > + +The number of elements is a constant integer value larger than 0; +elementtype may be any integer or floating point type, or a pointer to +these types. Vectors of size zero are not allowed. + +:Examples: + ++-------------------+--------------------------------------------------+ +| ``<4 x i32>`` | Vector of 4 32-bit integer values. | ++-------------------+--------------------------------------------------+ +| ``<8 x float>`` | Vector of 8 32-bit floating-point values. | ++-------------------+--------------------------------------------------+ +| ``<2 x i64>`` | Vector of 2 64-bit integer values. | ++-------------------+--------------------------------------------------+ +| ``<4 x i64*>`` | Vector of 4 pointers to 64-bit integer values. | ++-------------------+--------------------------------------------------+ .. _t_label: Label Type ^^^^^^^^^^ -Overview: -""""""""" +:Overview: The label type represents code labels. -Syntax: -""""""" +:Syntax: :: @@ -1663,31 +1769,17 @@ Syntax: Metadata Type ^^^^^^^^^^^^^ -Overview: -""""""""" +:Overview: The metadata type represents embedded metadata. No derived types may be created from metadata except for :ref:`function ` arguments. -Syntax: -""""""" +:Syntax: :: metadata -.. _t_derived: - -Derived Types -------------- - -The real power in LLVM comes from the derived types in the system. This -is what allows a programmer to represent arrays, functions, pointers, -and other useful types. Each of these types contain one or more element -types which may be a primitive type, or another derived type. For -example, it is possible to have a two dimensional array, using an array -as the element type of another array. - .. _t_aggregate: Aggregate Types @@ -1701,17 +1793,15 @@ aggregate types. .. _t_array: Array Type -^^^^^^^^^^ +"""""""""" -Overview: -""""""""" +:Overview: The array type is a very simple derived type that arranges elements sequentially in memory. The array type requires a size (number of elements) and an underlying data type. -Syntax: -""""""" +:Syntax: :: @@ -1720,8 +1810,7 @@ Syntax: The number of elements is a constant integer value; ``elementtype`` may be any type with a size. -Examples: -""""""""" +:Examples: +------------------+--------------------------------------+ | ``[40 x i32]`` | Array of 40 32-bit integer values. | @@ -1749,53 +1838,12 @@ LLVM with a zero length array type. An implementation of 'pascal style arrays' in LLVM could use the type "``{ i32, [0 x float]}``", for example. -.. _t_function: - -Function Type -^^^^^^^^^^^^^ - -Overview: -""""""""" - -The function type can be thought of as a function signature. It consists of a -return type and a list of formal parameter types. The return type of a function -type is a void type or first class type --- except for :ref:`label ` -and :ref:`metadata ` types. - -Syntax: -""""""" - -:: - - () - -...where '````' is a comma-separated list of type -specifiers. Optionally, the parameter list may include a type ``...``, which -indicates that the function takes a variable number of arguments. Variable -argument functions can access their arguments with the :ref:`variable argument -handling intrinsic ` functions. '````' is any type -except :ref:`label ` and :ref:`metadata `. - -Examples: -""""""""" - -+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| ``i32 (i32)`` | function taking an ``i32``, returning an ``i32`` | -+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| ``float (i16, i32 *) *`` | :ref:`Pointer ` to a function that takes an ``i16`` and a :ref:`pointer ` to ``i32``, returning ``float``. | -+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| ``i32 (i8*, ...)`` | A vararg function that takes at least one :ref:`pointer ` to ``i8`` (char in C), which returns an integer. This is the signature for ``printf`` in LLVM. | -+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| ``{i32, i32} (i32)`` | A function taking an ``i32``, returning a :ref:`structure ` containing two ``i32`` values | -+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - .. _t_struct: Structure Type -^^^^^^^^^^^^^^ +"""""""""""""" -Overview: -""""""""" +:Overview: The structure type is used to represent a collection of data members together in memory. The elements of a structure may be any type that has @@ -1819,16 +1867,14 @@ Literal types are uniqued by their contents and can never be recursive or opaque since there is no way to write one. Identified types can be recursive, can be opaqued, and are never uniqued. -Syntax: -""""""" +:Syntax: :: %T1 = type { } ; Identified normal struct type %T2 = type <{ }> ; Identified packed struct type -Examples: -""""""""" +:Examples: +------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | ``{ i32, i32, i32 }`` | A triple of three ``i32`` values | @@ -1841,105 +1887,27 @@ Examples: .. _t_opaque: Opaque Structure Types -^^^^^^^^^^^^^^^^^^^^^^ +"""""""""""""""""""""" -Overview: -""""""""" +:Overview: Opaque structure types are used to represent named structure types that do not have a body specified. This corresponds (for example) to the C notion of a forward declared structure. -Syntax: -""""""" +:Syntax: :: %X = type opaque %52 = type opaque -Examples: -""""""""" +:Examples: +--------------+-------------------+ | ``opaque`` | An opaque type. | +--------------+-------------------+ -.. _t_pointer: - -Pointer Type -^^^^^^^^^^^^ - -Overview: -""""""""" - -The pointer type is used to specify memory locations. Pointers are -commonly used to reference objects in memory. - -Pointer types may have an optional address space attribute defining the -numbered address space where the pointed-to object resides. The default -address space is number zero. The semantics of non-zero address spaces -are target-specific. - -Note that LLVM does not permit pointers to void (``void*``) nor does it -permit pointers to labels (``label*``). Use ``i8*`` instead. - -Syntax: -""""""" - -:: - - * - -Examples: -""""""""" - -+-------------------------+--------------------------------------------------------------------------------------------------------------+ -| ``[4 x i32]*`` | A :ref:`pointer ` to :ref:`array ` of four ``i32`` values. | -+-------------------------+--------------------------------------------------------------------------------------------------------------+ -| ``i32 (i32*) *`` | A :ref:`pointer ` to a :ref:`function ` that takes an ``i32*``, returning an ``i32``. | -+-------------------------+--------------------------------------------------------------------------------------------------------------+ -| ``i32 addrspace(5)*`` | A :ref:`pointer ` to an ``i32`` value that resides in address space #5. | -+-------------------------+--------------------------------------------------------------------------------------------------------------+ - -.. _t_vector: - -Vector Type -^^^^^^^^^^^ - -Overview: -""""""""" - -A vector type is a simple derived type that represents a vector of -elements. Vector types are used when multiple primitive data are -operated in parallel using a single instruction (SIMD). A vector type -requires a size (number of elements) and an underlying primitive data -type. Vector types are considered :ref:`first class `. - -Syntax: -""""""" - -:: - - < <# elements> x > - -The number of elements is a constant integer value larger than 0; -elementtype may be any integer or floating point type, or a pointer to -these types. Vectors of size zero are not allowed. - -Examples: -""""""""" - -+-------------------+--------------------------------------------------+ -| ``<4 x i32>`` | Vector of 4 32-bit integer values. | -+-------------------+--------------------------------------------------+ -| ``<8 x float>`` | Vector of 8 32-bit floating-point values. | -+-------------------+--------------------------------------------------+ -| ``<2 x i64>`` | Vector of 2 64-bit integer values. | -+-------------------+--------------------------------------------------+ -| ``<4 x i64*>`` | Vector of 4 pointers to 64-bit integer values. | -+-------------------+--------------------------------------------------+ - Constants ========= @@ -8502,6 +8470,8 @@ Memory Use Markers This class of intrinsics exists to information about the lifetime of memory objects and ranges where variables are immutable. +.. _int_lifestart: + '``llvm.lifetime.start``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -8533,6 +8503,8 @@ of the memory pointed to by ``ptr`` is dead. This means that it is known to never be used and has an undefined value. A load from the pointer that precedes this intrinsic can be replaced with ``'undef'``. +.. _int_lifeend: + '``llvm.lifetime.end``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -8958,3 +8930,10 @@ Semantics: This intrinsic does nothing, and it's removed by optimizers and ignored by codegen. + +Stack Map Intrinsics +-------------------- + +LLVM provides experimental intrinsics to support runtime patching +mechanisms commonly desired in dynamic language JITs. These intrinsics +are described in :doc:`StackMaps`. diff --git a/external/bsd/llvm/dist/llvm/docs/NVPTXUsage.rst b/external/bsd/llvm/dist/llvm/docs/NVPTXUsage.rst index a9065ce74ba5..e1c401df877a 100644 --- a/external/bsd/llvm/dist/llvm/docs/NVPTXUsage.rst +++ b/external/bsd/llvm/dist/llvm/docs/NVPTXUsage.rst @@ -273,7 +273,7 @@ there is a separate version for each compute architecture. For a list of all math functions implemented in libdevice, see `libdevice Users Guide `_. -To accomodate various math-related compiler flags that can affect code +To accommodate various math-related compiler flags that can affect code generation of libdevice code, the library code depends on a special LLVM IR pass (``NVVMReflect``) to handle conditional compilation within LLVM IR. This pass looks for calls to the ``@__nvvm_reflect`` function and replaces them @@ -839,7 +839,7 @@ Libdevice provides an ``__nv_powf`` function that we will use. %valB = load float addrspace(1)* %ptrB, align 4 ; Compute C = pow(A, B) - %valC = call float @__nv_exp2f(float %valA, float %valB) + %valC = call float @__nv_powf(float %valA, float %valB) ; Store back to C store float %valC, float addrspace(1)* %ptrC, align 4 @@ -850,7 +850,7 @@ Libdevice provides an ``__nv_powf`` function that we will use. !nvvm.annotations = !{!0} !0 = metadata !{void (float addrspace(1)*, float addrspace(1)*, - float addrspace(1)*)* @kernel, metadata !"kernel", i32 1}% + float addrspace(1)*)* @kernel, metadata !"kernel", i32 1} To compile this kernel, we perform the following steps: diff --git a/external/bsd/llvm/dist/llvm/docs/ReleaseNotes.rst b/external/bsd/llvm/dist/llvm/docs/ReleaseNotes.rst index ec9b62e0bc7e..53dd6f9d7d48 100644 --- a/external/bsd/llvm/dist/llvm/docs/ReleaseNotes.rst +++ b/external/bsd/llvm/dist/llvm/docs/ReleaseNotes.rst @@ -1,15 +1,21 @@ ====================== -LLVM 3.4 Release Notes +LLVM 3.5 Release Notes ====================== .. contents:: :local: +.. warning:: + These are in-progress notes for the upcoming LLVM 3.5 release. You may + prefer the `LLVM 3.4 Release Notes `_. + + Introduction ============ This document contains the release notes for the LLVM Compiler Infrastructure, -release 3.4. Here we describe the status of LLVM, including major improvements +release 3.5. Here we describe the status of LLVM, including major improvements from the previous release, improvements in various subprojects of LLVM, and some of the current users of the code. All LLVM releases may be downloaded from the `LLVM releases web site `_. @@ -35,58 +41,6 @@ Non-comprehensive list of changes in this release functionality, or simply have a lot to talk about), see the `NOTE` below for adding a new subsection. -* This is expected to be the last release of LLVM which compiles using a C++98 - toolchain. We expect to start using some C++11 features in LLVM and other - sub-projects starting after this release. That said, we are committed to - supporting a reasonable set of modern C++ toolchains as the host compiler on - all of the platforms. This will at least include Visual Studio 2012 on - Windows, and Clang 3.1 or GCC 4.7.x on Mac and Linux. The final set of - compilers (and the C++11 features they support) is not set in stone, but we - wanted users of LLVM to have a heads up that the next release will involve - a substantial change in the host toolchain requirements. - -* The regression tests now fail if any command in a pipe fails. To disable it in - a directory, just add ``config.pipefail = False`` to its ``lit.local.cfg``. - See :doc:`Lit ` for the details. - -* Support for exception handling has been removed from the old JIT. Use MCJIT - if you need EH support. - -* The R600 backend is not marked experimental anymore and is built by default. - -* APFloat::isNormal() was renamed to APFloat::isFiniteNonZero() and - APFloat::isIEEENormal() was renamed to APFloat::isNormal(). This ensures that - APFloat::isNormal() conforms to IEEE-754R-2008. - -* The library call simplification pass has been removed. Its functionality - has been integrated into the instruction combiner and function attribute - marking passes. - -* Support for building using Visual Studio 2008 has been dropped. Use VS 2010 - or later instead. For more information, see the `Getting Started using Visual - Studio `_ page. - -* The Loop Vectorizer that was previously enabled for -O3 is now enabled for - -Os and -O2. - -* The new SLP Vectorizer is now enabled by default. - -* llvm-ar now uses the new Object library and produces archives and - symbol tables in the gnu format. - -* FileCheck now allows specifing -check-prefix multiple times. This - helps reduce duplicate check lines when using multiple RUN lines. - -* The bitcast instruction no longer allows casting between pointers - with different address spaces. To achieve this, use the new - addrspacecast instruction. - -* Different sized pointers for different address spaces should now - generally work. This is primarily useful for GPU targets. - -* OCaml bindings have been significantly extended to cover almost all of the - LLVM libraries. - * ... next change ... .. NOTE @@ -99,126 +53,12 @@ Non-comprehensive list of changes in this release Makes programs 10x faster by doing Special New Thing. -Mips Target ------------ - -Support for the MIPS SIMD Architecture (MSA) has been added. MSA is supported -through inline assembly, intrinsics with the prefix '__builtin_msa', and normal -code generation. - -For more information on MSA (including documentation for the instruction set), -see the `MIPS SIMD page at Imagination Technologies -`_ - -SPARC Target ------------- - -The SPARC backend got many improvements, namely - -* experimental SPARC V9 backend -* JIT support for SPARC -* fp128 support -* exception handling -* TLS support -* leaf functions optimization -* bug fixes - -External Open Source Projects Using LLVM 3.4 +External Open Source Projects Using LLVM 3.5 ============================================ An exciting aspect of LLVM is that it is used as an enabling technology for a lot of other language and tools projects. This section lists some of the -projects that have already been updated to work with LLVM 3.4. - -DXR ---- - -`DXR `_ is Mozilla's code search and navigation -tool, aimed at making sense of large projects like Firefox. It supports -full-text and regex searches as well as structural queries like "Find all the -callers of this function." Behind the scenes, it uses a custom trigram index, -the re2 library, and structural data collected by a clang compiler plugin. - -LDC - the LLVM-based D compiler -------------------------------- - -`D `_ is a language with C-like syntax and static typing. It -pragmatically combines efficiency, control, and modeling power, with safety and -programmer productivity. D supports powerful concepts like Compile-Time Function -Execution (CTFE) and Template Meta-Programming, provides an innovative approach -to concurrency and offers many classical paradigms. - -`LDC `_ uses the frontend from the reference compiler -combined with LLVM as backend to produce efficient native code. LDC targets -x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux/PPC64. -Ports to other architectures like ARM and AArch64 are underway. - -Likely ------- - -`Likely `_ is an open source domain specific -language for image recognition. Algorithms are just-in-time compiled using -LLVM's MCJIT infrastructure to execute on single or multi-threaded CPUs as well -as OpenCL SPIR or CUDA enabled GPUs. Likely exploits the observation that while -image processing and statistical learning kernels must be written generically -to handle any matrix datatype, at runtime they tend to be executed repeatedly -on the same type. - -Portable Computing Language (pocl) ----------------------------------- - -In addition to producing an easily portable open source OpenCL -implementation, another major goal of `pocl `_ -is improving performance portability of OpenCL programs with -compiler optimizations, reducing the need for target-dependent manual -optimizations. An important part of pocl is a set of LLVM passes used to -statically parallelize multiple work-items with the kernel compiler, even in -the presence of work-group barriers. This enables static parallelization of -the fine-grained static concurrency in the work groups in multiple ways. - -Portable Native Client (PNaCl) ------------------------------- - -`Portable Native Client (PNaCl) `_ -is a Chrome initiative to bring the performance and low-level control of native -code to modern web browsers, without sacrificing the security benefits and -portability of web applications. PNaCl works by compiling native C and C++ code -to an intermediate representation using the LLVM clang compiler. This -intermediate representation is a subset of LLVM bytecode that is wrapped into a -portable executable, which can be hosted on a web server like any other website -asset. When the site is accessed, Chrome fetches and translates the portable -executable into an architecture-specific machine code optimized directly for -the underlying device. PNaCl lets developers compile their code once to run on -any hardware platform and embed their PNaCl application in any website, -enabling developers to directly leverage the power of the underlying CPU and -GPU. - -TTA-based Co-design Environment (TCE) -------------------------------------- - -`TCE `_ is a toolset for designing new -exposed datapath processors based on the Transport triggered architecture (TTA). -The toolset provides a complete co-design flow from C/C++ -programs down to synthesizable VHDL/Verilog and parallel program binaries. -Processor customization points include the register files, function units, -supported operations, and the interconnection network. - -TCE uses Clang and LLVM for C/C++/OpenCL C language support, target independent -optimizations and also for parts of code generation. It generates -new LLVM-based code generators "on the fly" for the designed processors and -loads them in to the compiler backend as runtime libraries to avoid -per-target recompilation of larger parts of the compiler chain. - -WebCL Validator ---------------- - -`WebCL Validator `_ implements -validation for WebCL C language which is a subset of OpenCL ES 1.1. Validator -checks the correctness of WebCL C, and implements memory protection for it as a -source-2-source transformation. The transformation converts WebCL to memory -protected OpenCL. The protected OpenCL cannot access any memory ranges which -were not allocated for it, and its memory is always initialized to prevent -information leakage from other programs. +projects that have already been updated to work with LLVM 3.5. Additional Information diff --git a/external/bsd/llvm/dist/llvm/docs/ReleaseProcess.rst b/external/bsd/llvm/dist/llvm/docs/ReleaseProcess.rst index 0836b6e92a3b..c4bbc91c63ce 100644 --- a/external/bsd/llvm/dist/llvm/docs/ReleaseProcess.rst +++ b/external/bsd/llvm/dist/llvm/docs/ReleaseProcess.rst @@ -52,18 +52,16 @@ The scripts are in the ``utils/release`` directory. test-release.sh --------------- -This script will check-out, configure and compile LLVM+Clang (+ most add-ons, -like ``compiler-rt``, ``libcxx`` and ``clang-extra-tools``) in three stages, and -will test the final stage. It'll have installed the final binaries on the -Phase3/Releasei(+Asserts) directory, and that's the one you should use for the -test-suite and other external tests. +This script will check-out, configure and compile LLVM+Clang (+ most add-ons, like ``compiler-rt``, +``libcxx`` and ``clang-extra-tools``) in three stages, and will test the final stage. +It'll have installed the final binaries on the Phase3/Releasei(+Asserts) directory, and +that's the one you should use for the test-suite and other external tests. To run the script on a specific release candidate run:: ./test-release.sh \ - -release 3.4 \ + -release 3.3 \ -rc 1 \ - -triple x86_64-apple-darwin \ -no-64bit \ -test-asserts \ -no-compare-files diff --git a/external/bsd/llvm/dist/llvm/docs/SourceLevelDebugging.rst b/external/bsd/llvm/dist/llvm/docs/SourceLevelDebugging.rst index a1d8110637f6..a6349fba86a4 100644 --- a/external/bsd/llvm/dist/llvm/docs/SourceLevelDebugging.rst +++ b/external/bsd/llvm/dist/llvm/docs/SourceLevelDebugging.rst @@ -2306,7 +2306,7 @@ stringWithCString:]``") and the basename is the selector only Mach-O Changes """""""""""""" -The sections names for the apple hash tables are for non mach-o files. For +The sections names for the apple hash tables are for non-mach-o files. For mach-o files, the sections should be contained in the ``__DWARF`` segment with names as follows: diff --git a/external/bsd/llvm/dist/llvm/docs/StackMaps.rst b/external/bsd/llvm/dist/llvm/docs/StackMaps.rst new file mode 100644 index 000000000000..0dac62b595d0 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/docs/StackMaps.rst @@ -0,0 +1,480 @@ +=================================== +Stack maps and patch points in LLVM +=================================== + +.. contents:: + :local: + :depth: 2 + +Definitions +=========== + +In this document we refer to the "runtime" collectively as all +components that serve as the LLVM client, including the LLVM IR +generator, object code consumer, and code patcher. + +A stack map records the location of ``live values`` at a particular +instruction address. These ``live values`` do not refer to all the +LLVM values live across the stack map. Instead, they are only the +values that the runtime requires to be live at this point. For +example, they may be the values the runtime will need to resume +program execution at that point independent of the compiled function +containing the stack map. + +LLVM emits stack map data into the object code within a designated +:ref:`stackmap-section`. This stack map data contains a record for +each stack map. The record stores the stack map's instruction address +and contains a entry for each mapped value. Each entry encodes a +value's location as a register, stack offset, or constant. + +A patch point is an instruction address at which space is reserved for +patching a new instruction sequence at run time. Patch points look +much like calls to LLVM. They take arguments that follow a calling +convention and may return a value. They also imply stack map +generation, which allows the runtime to locate the patchpoint and +find the location of ``live values`` at that point. + +Motivation +========== + +This functionality is currently experimental but is potentially useful +in a variety of settings, the most obvious being a runtime (JIT) +compiler. Example applications of the patchpoint intrinsics are +implementing an inline call cache for polymorphic method dispatch or +optimizing the retrieval of properties in dynamically typed languages +such as JavaScript. + +The intrinsics documented here are currently used by the JavaScript +compiler within the open source WebKit project, see the `FTL JIT +`_, but they are designed to be +used whenever stack maps or code patching are needed. Because the +intrinsics have experimental status, compatibility across LLVM +releases is not guaranteed. + +The stack map functionality described in this document is separate +from the functionality described in +:ref:`stack-map`. `GCFunctionMetadata` provides the location of +pointers into a collected heap captured by the `GCRoot` intrinsic, +which can also be considered a "stack map". Unlike the stack maps +defined above, the `GCFunctionMetadata` stack map interface does not +provide a way to associate live register values of arbitrary type with +an instruction address, nor does it specify a format for the resulting +stack map. The stack maps described here could potentially provide +richer information to a garbage collecting runtime, but that usage +will not be discussed in this document. + +Intrinsics +========== + +The following two kinds of intrinsics can be used to implement stack +maps and patch points: ``llvm.experimental.stackmap`` and +``llvm.experimental.patchpoint``. Both kinds of intrinsics generate a +stack map record, and they both allow some form of code patching. They +can be used independently (i.e. ``llvm.experimental.patchpoint`` +implicitly generates a stack map without the need for an additional +call to ``llvm.experimental.stackmap``). The choice of which to use +depends on whether it is necessary to reserve space for code patching +and whether any of the intrinsic arguments should be lowered according +to calling conventions. ``llvm.experimental.stackmap`` does not +reserve any space, nor does it expect any call arguments. If the +runtime patches code at the stack map's address, it will destructively +overwrite the program text. This is unlike +``llvm.experimental.patchpoint``, which reserves space for in-place +patching without overwriting surrounding code. The +``llvm.experimental.patchpoint`` intrinsic also lowers a specified +number of arguments according to its calling convention. This allows +patched code to make in-place function calls without marshaling. + +Each instance of one of these intrinsics generates a stack map record +in the :ref:`stackmap-section`. The record includes an ID, allowing +the runtime to uniquely identify the stack map, and the offset within +the code from the beginning of the enclosing function. + +'``llvm.experimental.stackmap``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void + @llvm.experimental.stackmap(i64 , i32 , ...) + +Overview: +""""""""" + +The '``llvm.experimental.stackmap``' intrinsic records the location of +specified values in the stack map without generating any code. + +Operands: +""""""""" + +The first operand is an ID to be encoded within the stack map. The +second operand is the number of shadow bytes following the +intrinsic. The variable number of operands that follow are the ``live +values`` for which locations will be recorded in the stack map. + +To use this intrinsic as a bare-bones stack map, with no code patching +support, the number of shadow bytes can be set to zero. + +Semantics: +"""""""""" + +The stack map intrinsic generates no code in place, unless nops are +needed to cover its shadow (see below). However, its offset from +function entry is stored in the stack map. This is the relative +instruction address immediately following the instructions that +precede the stack map. + +The stack map ID allows a runtime to locate the desired stack map +record. LLVM passes this ID through directly to the stack map +record without checking uniqueness. + +LLVM guarantees a shadow of instructions following the stack map's +instruction offset during which neither the end of the basic block nor +another call to ``llvm.experimental.stackmap`` or +``llvm.experimental.patchpoint`` may occur. This allows the runtime to +patch the code at this point in response to an event triggered from +outside the code. The code for instructions following the stack map +may be emitted in the stack map's shadow, and these instructions may +be overwritten by destructive patching. Without shadow bytes, this +destructive patching could overwrite program text or data outside the +current function. We disallow overlapping stack map shadows so that +the runtime does not need to consider this corner case. + +For example, a stack map with 8 byte shadow: + +.. code-block:: llvm + + call void @runtime() + call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 77, i32 8, + i64* %ptr) + %val = load i64* %ptr + %add = add i64 %val, 3 + ret i64 %add + +May require one byte of nop-padding: + +.. code-block:: none + + 0x00 callq _runtime + 0x05 nop <--- stack map address + 0x06 movq (%rdi), %rax + 0x07 addq $3, %rax + 0x0a popq %rdx + 0x0b ret <---- end of 8-byte shadow + +Now, if the runtime needs to invalidate the compiled code, it may +patch 8 bytes of code at the stack map's address at follows: + +.. code-block:: none + + 0x00 callq _runtime + 0x05 movl $0xffff, %rax <--- patched code at stack map address + 0x0a callq *%rax <---- end of 8-byte shadow + +This way, after the normal call to the runtime returns, the code will +execute a patched call to a special entry point that can rebuild a +stack frame from the values located by the stack map. + +'``llvm.experimental.patchpoint.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void + @llvm.experimental.patchpoint.void(i64 , i32 , + i8* , i32 , ...) + declare i64 + @llvm.experimental.patchpoint.i64(i64 , i32 , + i8* , i32 , ...) + +Overview: +""""""""" + +The '``llvm.experimental.patchpoint.*``' intrinsics creates a function +call to the specified ```` and records the location of specified +values in the stack map. + +Operands: +""""""""" + +The first operand is an ID, the second operand is the number of bytes +reserved for the patchable region, the third operand is the target +address of a function (optionally null), and the fourth operand +specifies how many of the following variable operands are considered +function call arguments. The remaining variable number of operands are +the ``live values`` for which locations will be recorded in the stack +map. + +Semantics: +"""""""""" + +The patch point intrinsic generates a stack map. It also emits a +function call to the address specified by ```` if the address +is not a constant null. The function call and its arguments are +lowered according to the calling convention specified at the +intrinsic's callsite. Variants of the intrinsic with non-void return +type also return a value according to calling convention. + +Requesting zero patch point arguments is valid. In this case, all +variable operands are handled just like +``llvm.experimental.stackmap.*``. The difference is that space will +still be reserved for patching, a call will be emitted, and a return +value is allowed. + +The location of the arguments are not normally recorded in the stack +map because they are already fixed by the calling convention. The +remaining ``live values`` will have their location recorded, which +could be a register, stack location, or constant. A special calling +convention has been introduced for use with stack maps, anyregcc, +which forces the arguments to be loaded into registers but allows +those register to be dynamically allocated. These argument registers +will have their register locations recorded in the stack map in +addition to the remaining ``live values``. + +The patch point also emits nops to cover at least ```` of +instruction encoding space. Hence, the client must ensure that +```` is enough to encode a call to the target address on the +supported targets. If the call target is constant null, then there is +no minimum requirement. A zero-byte null target patchpoint is +valid. + +The runtime may patch the code emitted for the patch point, including +the call sequence and nops. However, the runtime may not assume +anything about the code LLVM emits within the reserved space. Partial +patching is not allowed. The runtime must patch all reserved bytes, +padding with nops if necessary. + +This example shows a patch point reserving 15 bytes, with one argument +in $rdi, and a return value in $rax per native calling convention: + +.. code-block:: llvm + + %target = inttoptr i64 -281474976710654 to i8* + %val = call i64 (i64, i32, ...)* + @llvm.experimental.patchpoint.i64(i64 78, i32 15, + i8* %target, i32 1, i64* %ptr) + %add = add i64 %val, 3 + ret i64 %add + +May generate: + +.. code-block:: none + + 0x00 movabsq $0xffff000000000002, %r11 <--- patch point address + 0x0a callq *%r11 + 0x0d nop + 0x0e nop <--- end of reserved 15-bytes + 0x0f addq $0x3, %rax + 0x10 movl %rax, 8(%rsp) + +Note that no stack map locations will be recorded. If the patched code +sequence does not need arguments fixed to specific calling convention +registers, then the ``anyregcc`` convention may be used: + +.. code-block:: none + + %val = call anyregcc @llvm.experimental.patchpoint(i64 78, i32 15, + i8* %target, i32 1, + i64* %ptr) + +The stack map now indicates the location of the %ptr argument and +return value: + +.. code-block:: none + + Stack Map: ID=78, Loc0=%r9 Loc1=%r8 + +The patch code sequence may now use the argument that happened to be +allocated in %r8 and return a value allocated in %r9: + +.. code-block:: none + + 0x00 movslq 4(%r8) %r9 <--- patched code at patch point address + 0x03 nop + ... + 0x0e nop <--- end of reserved 15-bytes + 0x0f addq $0x3, %r9 + 0x10 movl %r9, 8(%rsp) + +.. _stackmap-format: + +Stack Map Format +================ + +The existence of a stack map or patch point intrinsic within an LLVM +Module forces code emission to create a :ref:`stackmap-section`. The +format of this section follows: + +.. code-block:: none + + uint32 : Reserved (header) + uint32 : NumConstants + Constants[NumConstants] { + uint64 : LargeConstant + } + uint32 : NumRecords + StkMapRecord[NumRecords] { + uint64 : PatchPoint ID + uint32 : Instruction Offset + uint16 : Reserved (record flags) + uint16 : NumLocations + Location[NumLocations] { + uint8 : Register | Direct | Indirect | Constant | ConstantIndex + uint8 : Reserved (location flags) + uint16 : Dwarf RegNum + int32 : Offset or SmallConstant + } + uint16 : NumLiveOuts + LiveOuts[NumLiveOuts] + uint16 : Dwarf RegNum + uint8 : Reserved + uint8 : Size in Bytes + } + } + +The first byte of each location encodes a type that indicates how to +interpret the ``RegNum`` and ``Offset`` fields as follows: + +======== ========== =================== =========================== +Encoding Type Value Description +-------- ---------- ------------------- --------------------------- +0x1 Register Reg Value in a register +0x2 Direct Reg + Offset Frame index value +0x3 Indirect [Reg + Offset] Spilled value +0x4 Constant Offset Small constant +0x5 ConstIndex Constants[Offset] Large constant +======== ========== =================== =========================== + +In the common case, a value is available in a register, and the +``Offset`` field will be zero. Values spilled to the stack are encoded +as ``Indirect`` locations. The runtime must load those values from a +stack address, typically in the form ``[BP + Offset]``. If an +``alloca`` value is passed directly to a stack map intrinsic, then +LLVM may fold the frame index into the stack map as an optimization to +avoid allocating a register or stack slot. These frame indices will be +encoded as ``Direct`` locations in the form ``BP + Offset``. LLVM may +also optimize constants by emitting them directly in the stack map, +either in the ``Offset`` of a ``Constant`` location or in the constant +pool, referred to by ``ConstantIndex`` locations. + +At each callsite, a "liveout" register list is also recorded. These +are the registers that are live across the stackmap and therefore must +be saved by the runtime. This is an important optimization when the +patchpoint intrinsic is used with a calling convention that by default +preserves most registers as callee-save. + +Each entry in the liveout register list contains a DWARF register +number and size in bytes. The stackmap format deliberately omits +specific subregister information. Instead the runtime must interpret +this information conservatively. For example, if the stackmap reports +one byte at ``%rax``, then the value may be in either ``%al`` or +``%ah``. It doesn't matter in practice, because the runtime will +simply save ``%rax``. However, if the stackmap reports 16 bytes at +``%ymm0``, then the runtime can safely optimize by saving only +``%xmm0``. + +The stack map format is a contract between an LLVM SVN revision and +the runtime. It is currently experimental and may change in the short +term, but minimizing the need to update the runtime is +important. Consequently, the stack map design is motivated by +simplicity and extensibility. Compactness of the representation is +secondary because the runtime is expected to parse the data +immediately after compiling a module and encode the information in its +own format. Since the runtime controls the allocation of sections, it +can reuse the same stack map space for multiple modules. + +.. _stackmap-section: + +Stack Map Section +^^^^^^^^^^^^^^^^^ + +A JIT compiler can easily access this section by providing its own +memory manager via the LLVM C API +``LLVMCreateSimpleMCJITMemoryManager()``. When creating the memory +manager, the JIT provides a callback: +``LLVMMemoryManagerAllocateDataSectionCallback()``. When LLVM creates +this section, it invokes the callback and passes the section name. The +JIT can record the in-memory address of the section at this time and +later parse it to recover the stack map data. + +On Darwin, the stack map section name is "__llvm_stackmaps". The +segment name is "__LLVM_STACKMAPS". + +Stack Map Usage +=============== + +The stack map support described in this document can be used to +precisely determine the location of values at a specific position in +the code. LLVM does not maintain any mapping between those values and +any higher-level entity. The runtime must be able to interpret the +stack map record given only the ID, offset, and the order of the +locations, which LLVM preserves. + +Note that this is quite different from the goal of debug information, +which is a best-effort attempt to track the location of named +variables at every instruction. + +An important motivation for this design is to allow a runtime to +commandeer a stack frame when execution reaches an instruction address +associated with a stack map. The runtime must be able to rebuild a +stack frame and resume program execution using the information +provided by the stack map. For example, execution may resume in an +interpreter or a recompiled version of the same function. + +This usage restricts LLVM optimization. Clearly, LLVM must not move +stores across a stack map. However, loads must also be handled +conservatively. If the load may trigger an exception, hoisting it +above a stack map could be invalid. For example, the runtime may +determine that a load is safe to execute without a type check given +the current state of the type system. If the type system changes while +some activation of the load's function exists on the stack, the load +becomes unsafe. The runtime can prevent subsequent execution of that +load by immediately patching any stack map location that lies between +the current call site and the load (typically, the runtime would +simply patch all stack map locations to invalidate the function). If +the compiler had hoisted the load above the stack map, then the +program could crash before the runtime could take back control. + +To enforce these semantics, stackmap and patchpoint intrinsics are +considered to potentially read and write all memory. This may limit +optimization more than some clients desire. To address this problem +meta-data could be added to the intrinsic call to express aliasing, +thereby allowing optimizations to hoist certain loads above stack +maps. + +Direct Stack Map Entries +^^^^^^^^^^^^^^^^^^^^^^^^ + +As shown in :ref:`stackmap-section`, a Direct stack map location +records the address of frame index. This address is itself the value +that the runtime requested. This differs from Indirect locations, +which refer to a stack locations from which the requested values must +be loaded. Direct locations can communicate the address if an alloca, +while Indirect locations handle register spills. + +For example: + +.. code-block:: none + + entry: + %a = alloca i64... + llvm.experimental.stackmap(i64 , i32 , i64* %a) + +The runtime can determine this alloca's relative location on the +stack immediately after compilation, or at any time thereafter. This +differs from Register and Indirect locations, because the runtime can +only read the values in those locations when execution reaches the +instruction address of the stack map. + +This functionality requires LLVM to treat entry-block allocas +specially when they are directly consumed by an intrinsics. (This is +the same requirement imposed by the llvm.gcroot intrinsic.) LLVM +transformations must not substitute the alloca with any intervening +value. This can be verified by the runtime simply by checking that the +stack map's location is a Direct location type. diff --git a/external/bsd/llvm/dist/llvm/docs/TableGenFundamentals.rst b/external/bsd/llvm/dist/llvm/docs/TableGenFundamentals.rst index 4fe4bb986a2f..a28026f78400 100644 --- a/external/bsd/llvm/dist/llvm/docs/TableGenFundamentals.rst +++ b/external/bsd/llvm/dist/llvm/docs/TableGenFundamentals.rst @@ -601,7 +601,7 @@ the classes multiple times yourself, e.g. by writing: ... A ``defm`` can also be used inside a multiclass providing several levels of -multiclass instanciations. +multiclass instantiations. .. code-block:: llvm @@ -727,7 +727,7 @@ opened, as in the case with the ``CALL*`` instructions above. It's also possible to use "let" expressions inside multiclasses, providing more ways to factor out commonality from the records, specially if using several -levels of multiclass instanciations. This also avoids the need of using "let" +levels of multiclass instantiations. This also avoids the need of using "let" expressions within subsequent records inside a multiclass. .. code-block:: llvm diff --git a/external/bsd/llvm/dist/llvm/docs/WritingAnLLVMBackend.rst b/external/bsd/llvm/dist/llvm/docs/WritingAnLLVMBackend.rst index 35a2d164a909..429f52a3526a 100644 --- a/external/bsd/llvm/dist/llvm/docs/WritingAnLLVMBackend.rst +++ b/external/bsd/llvm/dist/llvm/docs/WritingAnLLVMBackend.rst @@ -238,6 +238,12 @@ For some targets, you also need to support the following methods: * ``getTargetLowering()`` * ``getJITInfo()`` +Some architectures, such as GPUs, do not support jumping to an arbitrary +program location and implement branching using masked execution and loop using +special instructions around the loop body. In order to avoid CFG modifications +that introduce irreducible control flow not handled by such hardware, a target +must call `setRequiresStructuredCFG(true)` when being initialized. + In addition, the ``XXXTargetMachine`` constructor should specify a ``TargetDescription`` string that determines the data layout for the target machine, including characteristics such as pointer size, alignment, and diff --git a/external/bsd/llvm/dist/llvm/docs/YamlIO.rst b/external/bsd/llvm/dist/llvm/docs/YamlIO.rst index 3ecd03afb24d..b1917b6469e8 100644 --- a/external/bsd/llvm/dist/llvm/docs/YamlIO.rst +++ b/external/bsd/llvm/dist/llvm/docs/YamlIO.rst @@ -234,6 +234,7 @@ The following types have built-in support in YAML I/O: * float * double * StringRef +* std::string * int64_t * int32_t * int16_t @@ -640,12 +641,50 @@ The YAML syntax supports tags as a way to specify the type of a node before it is parsed. This allows dynamic types of nodes. But the YAML I/O model uses static typing, so there are limits to how you can use tags with the YAML I/O model. Recently, we added support to YAML I/O for checking/setting the optional -tag on a map. Using this functionality it is even possbile to support differnt +tag on a map. Using this functionality it is even possbile to support different mappings, as long as they are convertable. To check a tag, inside your mapping() method you can use io.mapTag() to specify what the tag should be. This will also add that tag when writing yaml. +Validation +---------- + +Sometimes in a yaml map, each key/value pair is valid, but the combination is +not. This is similar to something having no syntax errors, but still having +semantic errors. To support semantic level checking, YAML I/O allows +an optional ``validate()`` method in a MappingTraits template specialization. + +When parsing yaml, the ``validate()`` method is call *after* all key/values in +the map have been processed. Any error message returned by the ``validate()`` +method during input will be printed just a like a syntax error would be printed. +When writing yaml, the ``validate()`` method is called *before* the yaml +key/values are written. Any error during output will trigger an ``assert()`` +because it is a programming error to have invalid struct values. + + +.. code-block:: c++ + + using llvm::yaml::MappingTraits; + using llvm::yaml::IO; + + struct Stuff { + ... + }; + + template <> + struct MappingTraits { + static void mapping(IO &io, Stuff &stuff) { + ... + } + static StringRef validate(IO &io, Stuff &stuff) { + // Look at all fields in 'stuff' and if there + // are any bad values return a string describing + // the error. Otherwise return an empty string. + return StringRef(); + } + }; + Sequence ======== diff --git a/external/bsd/llvm/dist/llvm/docs/conf.py b/external/bsd/llvm/dist/llvm/docs/conf.py index d71f46e99917..17d21f31424d 100644 --- a/external/bsd/llvm/dist/llvm/docs/conf.py +++ b/external/bsd/llvm/dist/llvm/docs/conf.py @@ -40,7 +40,7 @@ master_doc = 'index' # General information about the project. project = u'LLVM' -copyright = u'2003-2013, LLVM Project' +copyright = u'2003-2014, LLVM Project' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/external/bsd/llvm/dist/llvm/docs/doxygen.cfg.in b/external/bsd/llvm/dist/llvm/docs/doxygen.cfg.in index 0ed686b9349f..612c5391abe7 100644 --- a/external/bsd/llvm/dist/llvm/docs/doxygen.cfg.in +++ b/external/bsd/llvm/dist/llvm/docs/doxygen.cfg.in @@ -280,10 +280,10 @@ TYPEDEF_HIDES_STRUCT = NO # For small to medium size projects (<1000 input files) the default value is # probably good enough. For larger projects a too small cache size can cause # doxygen to be busy swapping symbols to and from disk most of the time -# causing a significant performance penality. +# causing a significant performance penalty. # If the system has enough physical memory increasing the cache will improve the # performance by keeping more symbols in memory. Note that the value works on -# a logarithmic scale so increasing the size by one will rougly double the +# a logarithmic scale so increasing the size by one will roughly double the # memory usage. The cache size is given by this formula: # 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, # corresponding to a cache size of 2^16 = 65536 symbols diff --git a/external/bsd/llvm/dist/llvm/docs/doxygen.footer b/external/bsd/llvm/dist/llvm/docs/doxygen.footer index 95d5434f6712..6f8bc9ea023d 100644 --- a/external/bsd/llvm/dist/llvm/docs/doxygen.footer +++ b/external/bsd/llvm/dist/llvm/docs/doxygen.footer @@ -3,7 +3,7 @@ Generated on $datetime for $projectname by Doxygen$doxygenversion
-Copyright © 2003-2013 University of Illinois at Urbana-Champaign. +Copyright © 2003-2014 University of Illinois at Urbana-Champaign. All Rights Reserved.


diff --git a/external/bsd/llvm/dist/llvm/docs/index.rst b/external/bsd/llvm/dist/llvm/docs/index.rst index 62766f10342f..d040632dc691 100644 --- a/external/bsd/llvm/dist/llvm/docs/index.rst +++ b/external/bsd/llvm/dist/llvm/docs/index.rst @@ -234,6 +234,7 @@ For API clients and LLVM developers. TableGen/LangRef HowToUseAttributes NVPTXUsage + StackMaps :doc:`WritingAnLLVMPass` Information on how to write LLVM transformations and analyses. @@ -308,6 +309,9 @@ For API clients and LLVM developers. :doc:`NVPTXUsage` This document describes using the NVPTX back-end to compile GPU kernels. +:doc:`StackMaps` + LLVM support for mapping instruction addresses to the location of + values and allowing code to be patched. Development Process Documentation ================================= diff --git a/external/bsd/llvm/dist/llvm/examples/BrainF/CMakeLists.txt b/external/bsd/llvm/dist/llvm/examples/BrainF/CMakeLists.txt index 7bec105cdc8a..025d09336405 100644 --- a/external/bsd/llvm/dist/llvm/examples/BrainF/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/examples/BrainF/CMakeLists.txt @@ -1,4 +1,11 @@ -set(LLVM_LINK_COMPONENTS jit bitwriter nativecodegen interpreter) +set(LLVM_LINK_COMPONENTS + BitWriter + Core + ExecutionEngine + JIT + Support + nativecodegen + ) add_llvm_example(BrainF BrainF.cpp diff --git a/external/bsd/llvm/dist/llvm/examples/ExceptionDemo/CMakeLists.txt b/external/bsd/llvm/dist/llvm/examples/ExceptionDemo/CMakeLists.txt index ea818faf3b33..5324acd21eab 100644 --- a/external/bsd/llvm/dist/llvm/examples/ExceptionDemo/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/examples/ExceptionDemo/CMakeLists.txt @@ -1,4 +1,11 @@ -set(LLVM_LINK_COMPONENTS jit mcjit nativecodegen) +set(LLVM_LINK_COMPONENTS + Core + ExecutionEngine + MCJIT + Support + nativecodegen + ) + set(LLVM_REQUIRES_EH 1) add_llvm_example(ExceptionDemo diff --git a/external/bsd/llvm/dist/llvm/examples/Fibonacci/CMakeLists.txt b/external/bsd/llvm/dist/llvm/examples/Fibonacci/CMakeLists.txt index 693761241fc1..724a0f6715d3 100644 --- a/external/bsd/llvm/dist/llvm/examples/Fibonacci/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/examples/Fibonacci/CMakeLists.txt @@ -1,4 +1,11 @@ -set(LLVM_LINK_COMPONENTS jit interpreter nativecodegen) +set(LLVM_LINK_COMPONENTS + Core + ExecutionEngine + Interpreter + JIT + Support + nativecodegen + ) add_llvm_example(Fibonacci fibonacci.cpp diff --git a/external/bsd/llvm/dist/llvm/examples/HowToUseJIT/CMakeLists.txt b/external/bsd/llvm/dist/llvm/examples/HowToUseJIT/CMakeLists.txt index 428b53ffb9be..88aed026bf6f 100644 --- a/external/bsd/llvm/dist/llvm/examples/HowToUseJIT/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/examples/HowToUseJIT/CMakeLists.txt @@ -1,4 +1,11 @@ -set(LLVM_LINK_COMPONENTS jit interpreter nativecodegen) +set(LLVM_LINK_COMPONENTS + Core + ExecutionEngine + Interpreter + JIT + Support + nativecodegen + ) add_llvm_example(HowToUseJIT HowToUseJIT.cpp diff --git a/external/bsd/llvm/dist/llvm/examples/Kaleidoscope/Chapter3/CMakeLists.txt b/external/bsd/llvm/dist/llvm/examples/Kaleidoscope/Chapter3/CMakeLists.txt index 1af8db00a172..a98d7df1049c 100644 --- a/external/bsd/llvm/dist/llvm/examples/Kaleidoscope/Chapter3/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/examples/Kaleidoscope/Chapter3/CMakeLists.txt @@ -1,4 +1,7 @@ -set(LLVM_LINK_COMPONENTS core) +set(LLVM_LINK_COMPONENTS + Core + Support + ) add_llvm_example(Kaleidoscope-Ch3 toy.cpp diff --git a/external/bsd/llvm/dist/llvm/examples/Kaleidoscope/Chapter4/CMakeLists.txt b/external/bsd/llvm/dist/llvm/examples/Kaleidoscope/Chapter4/CMakeLists.txt index 0d1ac533f02d..72a9f0512cd2 100644 --- a/external/bsd/llvm/dist/llvm/examples/Kaleidoscope/Chapter4/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/examples/Kaleidoscope/Chapter4/CMakeLists.txt @@ -1,4 +1,13 @@ -set(LLVM_LINK_COMPONENTS core jit interpreter native) +set(LLVM_LINK_COMPONENTS + Analysis + Core + ExecutionEngine + InstCombine + JIT + ScalarOpts + Support + nativecodegen + ) add_llvm_example(Kaleidoscope-Ch4 toy.cpp diff --git a/external/bsd/llvm/dist/llvm/examples/Kaleidoscope/Chapter5/CMakeLists.txt b/external/bsd/llvm/dist/llvm/examples/Kaleidoscope/Chapter5/CMakeLists.txt index 2d75ad35923f..c7d0276194cf 100644 --- a/external/bsd/llvm/dist/llvm/examples/Kaleidoscope/Chapter5/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/examples/Kaleidoscope/Chapter5/CMakeLists.txt @@ -1,4 +1,13 @@ -set(LLVM_LINK_COMPONENTS core jit interpreter native) +set(LLVM_LINK_COMPONENTS + Analysis + Core + ExecutionEngine + InstCombine + JIT + ScalarOpts + Support + nativecodegen + ) add_llvm_example(Kaleidoscope-Ch5 toy.cpp diff --git a/external/bsd/llvm/dist/llvm/examples/Kaleidoscope/Chapter6/CMakeLists.txt b/external/bsd/llvm/dist/llvm/examples/Kaleidoscope/Chapter6/CMakeLists.txt index 2e15a5f7dfc6..669c7eb171b8 100644 --- a/external/bsd/llvm/dist/llvm/examples/Kaleidoscope/Chapter6/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/examples/Kaleidoscope/Chapter6/CMakeLists.txt @@ -1,4 +1,13 @@ -set(LLVM_LINK_COMPONENTS core jit interpreter native) +set(LLVM_LINK_COMPONENTS + Analysis + Core + ExecutionEngine + InstCombine + JIT + ScalarOpts + Support + nativecodegen + ) add_llvm_example(Kaleidoscope-Ch6 toy.cpp diff --git a/external/bsd/llvm/dist/llvm/examples/Kaleidoscope/Chapter7/CMakeLists.txt b/external/bsd/llvm/dist/llvm/examples/Kaleidoscope/Chapter7/CMakeLists.txt index da3839843bd0..0a0c8e7cab58 100644 --- a/external/bsd/llvm/dist/llvm/examples/Kaleidoscope/Chapter7/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/examples/Kaleidoscope/Chapter7/CMakeLists.txt @@ -1,4 +1,15 @@ -set(LLVM_LINK_COMPONENTS core jit interpreter native) +set(LLVM_LINK_COMPONENTS + Analysis + Core + ExecutionEngine + InstCombine + JIT + ScalarOpts + Support + TransformUtils + nativecodegen + ) + set(LLVM_REQUIRES_RTTI 1) add_llvm_example(Kaleidoscope-Ch7 diff --git a/external/bsd/llvm/dist/llvm/examples/ModuleMaker/CMakeLists.txt b/external/bsd/llvm/dist/llvm/examples/ModuleMaker/CMakeLists.txt index 81e911560bdc..a73909a13bd4 100644 --- a/external/bsd/llvm/dist/llvm/examples/ModuleMaker/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/examples/ModuleMaker/CMakeLists.txt @@ -1,4 +1,8 @@ -set(LLVM_LINK_COMPONENTS bitwriter) +set(LLVM_LINK_COMPONENTS + BitWriter + Core + Support + ) add_llvm_example(ModuleMaker ModuleMaker.cpp diff --git a/external/bsd/llvm/dist/llvm/examples/ParallelJIT/CMakeLists.txt b/external/bsd/llvm/dist/llvm/examples/ParallelJIT/CMakeLists.txt index fbdc6e5fc10b..8673917f5589 100644 --- a/external/bsd/llvm/dist/llvm/examples/ParallelJIT/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/examples/ParallelJIT/CMakeLists.txt @@ -1,4 +1,11 @@ -set(LLVM_LINK_COMPONENTS jit interpreter nativecodegen) +set(LLVM_LINK_COMPONENTS + Core + ExecutionEngine + Interpreter + JIT + Support + nativecodegen + ) add_llvm_example(ParallelJIT ParallelJIT.cpp diff --git a/external/bsd/llvm/dist/llvm/include/llvm-c/Core.h b/external/bsd/llvm/dist/llvm/include/llvm-c/Core.h index 9953d52edd62..269869ef0da7 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm-c/Core.h +++ b/external/bsd/llvm/dist/llvm/include/llvm-c/Core.h @@ -167,7 +167,8 @@ typedef enum { LLVMAddressSafety = 1ULL << 32, LLVMStackProtectStrongAttribute = 1ULL<<33, LLVMCold = 1ULL << 34, - LLVMOptimizeNone = 1ULL << 35 + LLVMOptimizeNone = 1ULL << 35, + LLVMInAllocaAttribute = 1ULL << 36 */ } LLVMAttribute; @@ -2663,7 +2664,9 @@ LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef, LLVMValueRef Val, const char *Name); LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); -LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op, +LLVMValueRef LLVMBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering ordering, + LLVMBool singleThread, const char *Name); +LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B, LLVMAtomicRMWBinOp op, LLVMValueRef PTR, LLVMValueRef Val, LLVMAtomicOrdering ordering, LLVMBool singleThread); diff --git a/external/bsd/llvm/dist/llvm/include/llvm-c/Transforms/Scalar.h b/external/bsd/llvm/dist/llvm/include/llvm-c/Transforms/Scalar.h index 355e8dc299fb..9b820b2334ca 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm-c/Transforms/Scalar.h +++ b/external/bsd/llvm/dist/llvm/include/llvm-c/Transforms/Scalar.h @@ -41,6 +41,9 @@ void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM); /** See llvm::createDeadStoreEliminationPass function. */ void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM); +/** See llvm::createScalarizerPass function. */ +void LLVMAddScalarizerPass(LLVMPassManagerRef PM); + /** See llvm::createGVNPass function. */ void LLVMAddGVNPass(LLVMPassManagerRef PM); diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/BlockFrequencyImpl.h b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/BlockFrequencyImpl.h index 817a44188b89..2d04c5d4a988 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/BlockFrequencyImpl.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/BlockFrequencyImpl.h @@ -48,7 +48,7 @@ class BlockFrequencyImpl { typedef GraphTraits< Inverse > GT; - const uint32_t EntryFreq; + static const uint64_t EntryFreq = 1 << 14; std::string getBlockName(BasicBlock *BB) const { return BB->getName().str(); @@ -67,7 +67,8 @@ class BlockFrequencyImpl { void setBlockFreq(BlockT *BB, BlockFrequency Freq) { Freqs[BB] = Freq; - DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") = " << Freq << "\n"); + DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") = "; + printBlockFreq(dbgs(), Freq) << "\n"); } /// getEdgeFreq - Return edge frequency based on SRC frequency and Src -> Dst @@ -81,8 +82,9 @@ class BlockFrequencyImpl { /// void incBlockFreq(BlockT *BB, BlockFrequency Freq) { Freqs[BB] += Freq; - DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") += " << Freq - << " --> " << Freqs[BB] << "\n"); + DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") += "; + printBlockFreq(dbgs(), Freq) << " --> "; + printBlockFreq(dbgs(), Freqs[BB]) << "\n"); } // All blocks in postorder. @@ -194,7 +196,8 @@ class BlockFrequencyImpl { typename LoopExitProbMap::const_iterator I = LoopExitProb.find(BB); assert(I != LoopExitProb.end() && "Loop header missing from table"); Freqs[BB] /= I->second; - DEBUG(dbgs() << "Loop header scaled to " << Freqs[BB] << ".\n"); + DEBUG(dbgs() << "Loop header scaled to "; + printBlockFreq(dbgs(), Freqs[BB]) << ".\n"); } /// doLoop - Propagate block frequency down through the loop. @@ -256,14 +259,15 @@ class BlockFrequencyImpl { BranchProbability LEP = BranchProbability(N, D); LoopExitProb.insert(std::make_pair(Head, LEP)); DEBUG(dbgs() << "LoopExitProb[" << getBlockName(Head) << "] = " << LEP - << " from 1 - " << BackFreq << " / " << getBlockFreq(Head) - << ".\n"); + << " from 1 - "; + printBlockFreq(dbgs(), BackFreq) << " / "; + printBlockFreq(dbgs(), getBlockFreq(Head)) << ".\n"); } friend class BlockFrequencyInfo; friend class MachineBlockFrequencyInfo; - BlockFrequencyImpl() : EntryFreq(BlockFrequency::getEntryFrequency()) { } + BlockFrequencyImpl() { } void doFunction(FunctionT *fn, BlockProbInfoT *bpi) { Fn = fn; @@ -312,6 +316,9 @@ class BlockFrequencyImpl { } public: + + uint64_t getEntryFreq() { return EntryFreq; } + /// getBlockFreq - Return block frequency. Return 0 if we don't have it. BlockFrequency getBlockFreq(const BlockT *BB) const { typename DenseMap::const_iterator @@ -325,14 +332,15 @@ public: OS << "\n\n---- Block Freqs ----\n"; for (typename FunctionT::iterator I = Fn->begin(), E = Fn->end(); I != E;) { BlockT *BB = I++; - OS << " " << getBlockName(BB) << " = " << getBlockFreq(BB) << "\n"; + OS << " " << getBlockName(BB) << " = "; + printBlockFreq(OS, getBlockFreq(BB)) << "\n"; for (typename GraphTraits::ChildIteratorType SI = GraphTraits::child_begin(BB), SE = GraphTraits::child_end(BB); SI != SE; ++SI) { BlockT *Succ = *SI; OS << " " << getBlockName(BB) << " -> " << getBlockName(Succ) - << " = " << getEdgeFreq(BB, Succ) << "\n"; + << " = "; printBlockFreq(OS, getEdgeFreq(BB, Succ)) << "\n"; } } } @@ -340,6 +348,30 @@ public: void dump() const { print(dbgs()); } + + // Utility method that looks up the block frequency associated with BB and + // prints it to OS. + raw_ostream &printBlockFreq(raw_ostream &OS, + const BlockT *BB) { + return printBlockFreq(OS, getBlockFreq(BB)); + } + + raw_ostream &printBlockFreq(raw_ostream &OS, + const BlockFrequency &Freq) const { + // Convert fixed-point number to decimal. + uint64_t Frequency = Freq.getFrequency(); + OS << Frequency / EntryFreq << "."; + uint64_t Rem = Frequency % EntryFreq; + uint64_t Eps = 1; + do { + Rem *= 10; + Eps *= 10; + OS << Rem / EntryFreq; + Rem = Rem % EntryFreq; + } while (Rem >= Eps/2); + return OS; + } + }; } diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/BlockFrequencyInfo.h b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/BlockFrequencyInfo.h index a123d0b8c136..e594448f4789 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/BlockFrequencyInfo.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/BlockFrequencyInfo.h @@ -50,6 +50,17 @@ public: /// comparison to the other block frequencies. We do this to avoid using of /// floating points. BlockFrequency getBlockFreq(const BasicBlock *BB) const; + + // Print the block frequency Freq to OS using the current functions entry + // frequency to convert freq into a relative decimal form. + raw_ostream &printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const; + + // Convenience method that attempts to look up the frequency associated with + // BB and print it to OS. + raw_ostream &printBlockFreq(raw_ostream &OS, const BasicBlock *BB) const; + + uint64_t getEntryFreq() const; + }; } diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/BranchProbabilityInfo.h index 4ff7121728ec..fdad168ab1e0 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -16,6 +16,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Support/CFG.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" @@ -98,6 +99,9 @@ public: /// It is guaranteed to fall between 1 and UINT32_MAX. uint32_t getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const; + uint32_t getEdgeWeight(const BasicBlock *Src, + succ_const_iterator Dst) const; + /// \brief Set the raw edge weight for a given edge. /// /// This allows a pass to explicitly set the edge weight for an edge. It can diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/CallGraph.h b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/CallGraph.h index d00c2ed327c5..6b44546e76d0 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/CallGraph.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/CallGraph.h @@ -6,52 +6,54 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This interface is used to build and manipulate a call graph, which is a very -// useful tool for interprocedural optimization. -// -// Every function in a module is represented as a node in the call graph. The -// callgraph node keeps track of which functions the are called by the function -// corresponding to the node. -// -// A call graph may contain nodes where the function that they correspond to is -// null. These 'external' nodes are used to represent control flow that is not -// represented (or analyzable) in the module. In particular, this analysis -// builds one external node such that: -// 1. All functions in the module without internal linkage will have edges -// from this external node, indicating that they could be called by -// functions outside of the module. -// 2. All functions whose address is used for something more than a direct -// call, for example being stored into a memory location will also have an -// edge from this external node. Since they may be called by an unknown -// caller later, they must be tracked as such. -// -// There is a second external node added for calls that leave this module. -// Functions have a call edge to the external node iff: -// 1. The function is external, reflecting the fact that they could call -// anything without internal linkage or that has its address taken. -// 2. The function contains an indirect function call. -// -// As an extension in the future, there may be multiple nodes with a null -// function. These will be used when we can prove (through pointer analysis) -// that an indirect call site can call only a specific set of functions. -// -// Because of these properties, the CallGraph captures a conservative superset -// of all of the caller-callee relationships, which is useful for -// transformations. -// -// The CallGraph class also attempts to figure out what the root of the -// CallGraph is, which it currently does by looking for a function named 'main'. -// If no function named 'main' is found, the external node is used as the entry -// node, reflecting the fact that any function without internal linkage could -// be called into (which is common for libraries). -// +/// \file +/// +/// This file provides interfaces used to build and manipulate a call graph, +/// which is a very useful tool for interprocedural optimization. +/// +/// Every function in a module is represented as a node in the call graph. The +/// callgraph node keeps track of which functions the are called by the +/// function corresponding to the node. +/// +/// A call graph may contain nodes where the function that they correspond to +/// is null. These 'external' nodes are used to represent control flow that is +/// not represented (or analyzable) in the module. In particular, this +/// analysis builds one external node such that: +/// 1. All functions in the module without internal linkage will have edges +/// from this external node, indicating that they could be called by +/// functions outside of the module. +/// 2. All functions whose address is used for something more than a direct +/// call, for example being stored into a memory location will also have +/// an edge from this external node. Since they may be called by an +/// unknown caller later, they must be tracked as such. +/// +/// There is a second external node added for calls that leave this module. +/// Functions have a call edge to the external node iff: +/// 1. The function is external, reflecting the fact that they could call +/// anything without internal linkage or that has its address taken. +/// 2. The function contains an indirect function call. +/// +/// As an extension in the future, there may be multiple nodes with a null +/// function. These will be used when we can prove (through pointer analysis) +/// that an indirect call site can call only a specific set of functions. +/// +/// Because of these properties, the CallGraph captures a conservative superset +/// of all of the caller-callee relationships, which is useful for +/// transformations. +/// +/// The CallGraph class also attempts to figure out what the root of the +/// CallGraph is, which it currently does by looking for a function named +/// 'main'. If no function named 'main' is found, the external node is used as +/// the entry node, reflecting the fact that any function without internal +/// linkage could be called into (which is common for libraries). +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_CALLGRAPH_H #define LLVM_ANALYSIS_CALLGRAPH_H #include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Function.h" #include "llvm/Pass.h" @@ -66,171 +68,142 @@ class Function; class Module; class CallGraphNode; -//===----------------------------------------------------------------------===// -// CallGraph class definition -// -class CallGraph : public ModulePass { - Module *Mod; // The module this call graph represents +/// \brief The basic data container for the call graph of a \c Module of IR. +/// +/// This class exposes both the interface to the call graph for a module of IR. +/// +/// The core call graph itself can also be updated to reflect changes to the IR. +class CallGraph { + Module &M; typedef std::map FunctionMapTy; - FunctionMapTy FunctionMap; // Map from a function to its node - // Root is root of the call graph, or the external node if a 'main' function - // couldn't be found. - // + /// \brief A map from \c Function* to \c CallGraphNode*. + FunctionMapTy FunctionMap; + + /// \brief Root is root of the call graph, or the external node if a 'main' + /// function couldn't be found. CallGraphNode *Root; - // ExternalCallingNode - This node has edges to all external functions and - // those internal functions that have their address taken. + /// \brief This node has edges to all external functions and those internal + /// functions that have their address taken. CallGraphNode *ExternalCallingNode; - // CallsExternalNode - This node has edges to it from all functions making - // indirect calls or calling an external function. + /// \brief This node has edges to it from all functions making indirect calls + /// or calling an external function. CallGraphNode *CallsExternalNode; - /// Replace the function represented by this node by another. + /// \brief Replace the function represented by this node by another. + /// /// This does not rescan the body of the function, so it is suitable when /// splicing the body of one function to another while also updating all /// callers from the old function to the new. - /// void spliceFunction(const Function *From, const Function *To); - // Add a function to the call graph, and link the node to all of the functions - // that it calls. + /// \brief Add a function to the call graph, and link the node to all of the + /// functions that it calls. void addToCallGraph(Function *F); public: - static char ID; // Class identification, replacement for typeinfo - //===--------------------------------------------------------------------- - // Accessors. - // + CallGraph(Module &M); + ~CallGraph(); + + void print(raw_ostream &OS) const; + void dump() const; + typedef FunctionMapTy::iterator iterator; typedef FunctionMapTy::const_iterator const_iterator; - /// getModule - Return the module the call graph corresponds to. - /// - Module &getModule() const { return *Mod; } + /// \brief Returns the module the call graph corresponds to. + Module &getModule() const { return M; } - inline iterator begin() { return FunctionMap.begin(); } - inline iterator end() { return FunctionMap.end(); } + inline iterator begin() { return FunctionMap.begin(); } + inline iterator end() { return FunctionMap.end(); } inline const_iterator begin() const { return FunctionMap.begin(); } - inline const_iterator end() const { return FunctionMap.end(); } + inline const_iterator end() const { return FunctionMap.end(); } - // Subscripting operators, return the call graph node for the provided - // function + /// \brief Returns the call graph node for the provided function. inline const CallGraphNode *operator[](const Function *F) const { const_iterator I = FunctionMap.find(F); assert(I != FunctionMap.end() && "Function not in callgraph!"); return I->second; } + + /// \brief Returns the call graph node for the provided function. inline CallGraphNode *operator[](const Function *F) { const_iterator I = FunctionMap.find(F); assert(I != FunctionMap.end() && "Function not in callgraph!"); return I->second; } - /// Returns the CallGraphNode which is used to represent undetermined calls - /// into the callgraph. + /// \brief Returns the \c CallGraphNode which is used to represent + /// undetermined calls into the callgraph. CallGraphNode *getExternalCallingNode() const { return ExternalCallingNode; } - CallGraphNode *getCallsExternalNode() const { return CallsExternalNode; } - /// Return the root/main method in the module, or some other root node, such - /// as the externalcallingnode. - CallGraphNode *getRoot() { return Root; } - const CallGraphNode *getRoot() const { return Root; } + CallGraphNode *getCallsExternalNode() const { return CallsExternalNode; } //===--------------------------------------------------------------------- // Functions to keep a call graph up to date with a function that has been // modified. // - /// removeFunctionFromModule - Unlink the function from this module, returning - /// it. Because this removes the function from the module, the call graph - /// node is destroyed. This is only valid if the function does not call any - /// other functions (ie, there are no edges in it's CGN). The easiest way to - /// do this is to dropAllReferences before calling this. + /// \brief Unlink the function from this module, returning it. /// + /// Because this removes the function from the module, the call graph node is + /// destroyed. This is only valid if the function does not call any other + /// functions (ie, there are no edges in it's CGN). The easiest way to do + /// this is to dropAllReferences before calling this. Function *removeFunctionFromModule(CallGraphNode *CGN); - /// getOrInsertFunction - This method is identical to calling operator[], but - /// it will insert a new CallGraphNode for the specified function if one does - /// not already exist. + /// \brief Similar to operator[], but this will insert a new CallGraphNode for + /// \c F if one does not already exist. CallGraphNode *getOrInsertFunction(const Function *F); - - CallGraph(); - virtual ~CallGraph() { releaseMemory(); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool runOnModule(Module &M); - virtual void releaseMemory(); - - void print(raw_ostream &o, const Module *) const; - void dump() const; }; -//===----------------------------------------------------------------------===// -// CallGraphNode class definition. -// +/// \brief A node in the call graph for a module. +/// +/// Typically represents a function in the call graph. There are also special +/// "null" nodes used to represent theoretical entries in the call graph. class CallGraphNode { - friend class CallGraph; - - AssertingVH F; - - // CallRecord - This is a pair of the calling instruction (a call or invoke) - // and the callgraph node being called. public: - typedef std::pair CallRecord; -private: - std::vector CalledFunctions; - - /// NumReferences - This is the number of times that this CallGraphNode occurs - /// in the CalledFunctions array of this or other CallGraphNodes. - unsigned NumReferences; + /// \brief A pair of the calling instruction (a call or invoke) + /// and the call graph node being called. + typedef std::pair CallRecord; - CallGraphNode(const CallGraphNode &) LLVM_DELETED_FUNCTION; - void operator=(const CallGraphNode &) LLVM_DELETED_FUNCTION; - - void DropRef() { --NumReferences; } - void AddRef() { ++NumReferences; } public: typedef std::vector CalledFunctionsVector; - - // CallGraphNode ctor - Create a node for the specified function. - inline CallGraphNode(Function *f) : F(f), NumReferences(0) {} + /// \brief Creates a node for the specified function. + inline CallGraphNode(Function *F) : F(F), NumReferences(0) {} + ~CallGraphNode() { assert(NumReferences == 0 && "Node deleted while references remain"); } - - //===--------------------------------------------------------------------- - // Accessor methods. - // typedef std::vector::iterator iterator; typedef std::vector::const_iterator const_iterator; - // getFunction - Return the function that this call graph node represents. + /// \brief Returns the function that this call graph node represents. Function *getFunction() const { return F; } inline iterator begin() { return CalledFunctions.begin(); } - inline iterator end() { return CalledFunctions.end(); } + inline iterator end() { return CalledFunctions.end(); } inline const_iterator begin() const { return CalledFunctions.begin(); } - inline const_iterator end() const { return CalledFunctions.end(); } + inline const_iterator end() const { return CalledFunctions.end(); } inline bool empty() const { return CalledFunctions.empty(); } inline unsigned size() const { return (unsigned)CalledFunctions.size(); } - /// getNumReferences - Return the number of other CallGraphNodes in this - /// CallGraph that reference this node in their callee list. + /// \brief Returns the number of other CallGraphNodes in this CallGraph that + /// reference this node in their callee list. unsigned getNumReferences() const { return NumReferences; } - - // Subscripting operator - Return the i'th called function. - // + + /// \brief Returns the i'th called function. CallGraphNode *operator[](unsigned i) const { assert(i < CalledFunctions.size() && "Invalid index"); return CalledFunctions[i].second; } - /// dump - Print out this call graph node. - /// + /// \brief Print out this call graph node. void dump() const; void print(raw_ostream &OS) const; @@ -239,29 +212,25 @@ public: // modified // - /// removeAllCalledFunctions - As the name implies, this removes all edges - /// from this CallGraphNode to any functions it calls. + /// \brief Removes all edges from this CallGraphNode to any functions it + /// calls. void removeAllCalledFunctions() { while (!CalledFunctions.empty()) { CalledFunctions.back().second->DropRef(); CalledFunctions.pop_back(); } } - - /// stealCalledFunctionsFrom - Move all the callee information from N to this - /// node. + + /// \brief Moves all the callee information from N to this node. void stealCalledFunctionsFrom(CallGraphNode *N) { assert(CalledFunctions.empty() && "Cannot steal callsite information if I already have some"); std::swap(CalledFunctions, N->CalledFunctions); } - - /// addCalledFunction - Add a function to the list of functions called by this - /// one. + /// \brief Adds a function to the list of functions called by this one. void addCalledFunction(CallSite CS, CallGraphNode *M) { - assert(!CS.getInstruction() || - !CS.getCalledFunction() || + assert(!CS.getInstruction() || !CS.getCalledFunction() || !CS.getCalledFunction()->isIntrinsic()); CalledFunctions.push_back(std::make_pair(CS.getInstruction(), M)); M->AddRef(); @@ -272,32 +241,152 @@ public: *I = CalledFunctions.back(); CalledFunctions.pop_back(); } - - - /// removeCallEdgeFor - This method removes the edge in the node for the - /// specified call site. Note that this method takes linear time, so it - /// should be used sparingly. + + /// \brief Removes the edge in the node for the specified call site. + /// + /// Note that this method takes linear time, so it should be used sparingly. void removeCallEdgeFor(CallSite CS); - /// removeAnyCallEdgeTo - This method removes all call edges from this node - /// to the specified callee function. This takes more time to execute than - /// removeCallEdgeTo, so it should not be used unless necessary. + /// \brief Removes all call edges from this node to the specified callee + /// function. + /// + /// This takes more time to execute than removeCallEdgeTo, so it should not + /// be used unless necessary. void removeAnyCallEdgeTo(CallGraphNode *Callee); - /// removeOneAbstractEdgeTo - Remove one edge associated with a null callsite - /// from this node to the specified callee function. + /// \brief Removes one edge associated with a null callsite from this node to + /// the specified callee function. void removeOneAbstractEdgeTo(CallGraphNode *Callee); - - /// replaceCallEdge - This method replaces the edge in the node for the - /// specified call site with a new one. Note that this method takes linear - /// time, so it should be used sparingly. + + /// \brief Replaces the edge in the node for the specified call site with a + /// new one. + /// + /// Note that this method takes linear time, so it should be used sparingly. void replaceCallEdge(CallSite CS, CallSite NewCS, CallGraphNode *NewNode); - - /// allReferencesDropped - This is a special function that should only be - /// used by the CallGraph class. - void allReferencesDropped() { - NumReferences = 0; + +private: + friend class CallGraph; + + AssertingVH F; + + std::vector CalledFunctions; + + /// \brief The number of times that this CallGraphNode occurs in the + /// CalledFunctions array of this or other CallGraphNodes. + unsigned NumReferences; + + CallGraphNode(const CallGraphNode &) LLVM_DELETED_FUNCTION; + void operator=(const CallGraphNode &) LLVM_DELETED_FUNCTION; + + void DropRef() { --NumReferences; } + void AddRef() { ++NumReferences; } + + /// \brief A special function that should only be used by the CallGraph class. + void allReferencesDropped() { NumReferences = 0; } +}; + +/// \brief An analysis pass to compute the \c CallGraph for a \c Module. +/// +/// This class implements the concept of an analysis pass used by the \c +/// ModuleAnalysisManager to run an analysis over a module and cache the +/// resulting data. +class CallGraphAnalysis { +public: + /// \brief A formulaic typedef to inform clients of the result type. + typedef CallGraph Result; + + static void *ID() { return (void *)&PassID; } + + /// \brief Compute the \c CallGraph for the module \c M. + /// + /// The real work here is done in the \c CallGraph constructor. + CallGraph run(Module *M) { return CallGraph(*M); } + +private: + static char PassID; +}; + +/// \brief The \c ModulePass which wraps up a \c CallGraph and the logic to +/// build it. +/// +/// This class exposes both the interface to the call graph container and the +/// module pass which runs over a module of IR and produces the call graph. The +/// call graph interface is entirelly a wrapper around a \c CallGraph object +/// which is stored internally for each module. +class CallGraphWrapperPass : public ModulePass { + OwningPtr G; + +public: + static char ID; // Class identification, replacement for typeinfo + + CallGraphWrapperPass(); + virtual ~CallGraphWrapperPass(); + + /// \brief The internal \c CallGraph around which the rest of this interface + /// is wrapped. + const CallGraph &getCallGraph() const { return *G; } + CallGraph &getCallGraph() { return *G; } + + typedef CallGraph::iterator iterator; + typedef CallGraph::const_iterator const_iterator; + + /// \brief Returns the module the call graph corresponds to. + Module &getModule() const { return G->getModule(); } + + inline iterator begin() { return G->begin(); } + inline iterator end() { return G->end(); } + inline const_iterator begin() const { return G->begin(); } + inline const_iterator end() const { return G->end(); } + + /// \brief Returns the call graph node for the provided function. + inline const CallGraphNode *operator[](const Function *F) const { + return (*G)[F]; } + + /// \brief Returns the call graph node for the provided function. + inline CallGraphNode *operator[](const Function *F) { return (*G)[F]; } + + /// \brief Returns the \c CallGraphNode which is used to represent + /// undetermined calls into the callgraph. + CallGraphNode *getExternalCallingNode() const { + return G->getExternalCallingNode(); + } + + CallGraphNode *getCallsExternalNode() const { + return G->getCallsExternalNode(); + } + + //===--------------------------------------------------------------------- + // Functions to keep a call graph up to date with a function that has been + // modified. + // + + /// \brief Unlink the function from this module, returning it. + /// + /// Because this removes the function from the module, the call graph node is + /// destroyed. This is only valid if the function does not call any other + /// functions (ie, there are no edges in it's CGN). The easiest way to do + /// this is to dropAllReferences before calling this. + Function *removeFunctionFromModule(CallGraphNode *CGN) { + return G->removeFunctionFromModule(CGN); + } + + /// \brief Similar to operator[], but this will insert a new CallGraphNode for + /// \c F if one does not already exist. + CallGraphNode *getOrInsertFunction(const Function *F) { + return G->getOrInsertFunction(F); + } + + //===--------------------------------------------------------------------- + // Implementation of the ModulePass interface needed here. + // + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnModule(Module &M); + virtual void releaseMemory(); + + void print(raw_ostream &o, const Module *) const; + void dump() const; }; //===----------------------------------------------------------------------===// @@ -307,11 +396,12 @@ public: // Provide graph traits for tranversing call graphs using standard graph // traversals. -template <> struct GraphTraits { +template <> struct GraphTraits { typedef CallGraphNode NodeType; typedef CallGraphNode::CallRecord CGNPairTy; - typedef std::pointer_to_unary_function CGNDerefFun; + typedef std::pointer_to_unary_function + CGNDerefFun; static NodeType *getEntryNode(CallGraphNode *CGN) { return CGN; } @@ -320,55 +410,54 @@ template <> struct GraphTraits { static inline ChildIteratorType child_begin(NodeType *N) { return map_iterator(N->begin(), CGNDerefFun(CGNDeref)); } - static inline ChildIteratorType child_end (NodeType *N) { + static inline ChildIteratorType child_end(NodeType *N) { return map_iterator(N->end(), CGNDerefFun(CGNDeref)); } - static CallGraphNode *CGNDeref(CGNPairTy P) { - return P.second; - } - + static CallGraphNode *CGNDeref(CGNPairTy P) { return P.second; } }; -template <> struct GraphTraits { +template <> struct GraphTraits { typedef const CallGraphNode NodeType; typedef NodeType::const_iterator ChildIteratorType; static NodeType *getEntryNode(const CallGraphNode *CGN) { return CGN; } - static inline ChildIteratorType child_begin(NodeType *N) { return N->begin();} - static inline ChildIteratorType child_end (NodeType *N) { return N->end(); } + static inline ChildIteratorType child_begin(NodeType *N) { + return N->begin(); + } + static inline ChildIteratorType child_end(NodeType *N) { return N->end(); } }; -template<> struct GraphTraits : public GraphTraits { +template <> +struct GraphTraits : public GraphTraits { static NodeType *getEntryNode(CallGraph *CGN) { - return CGN->getExternalCallingNode(); // Start at the external node! + return CGN->getExternalCallingNode(); // Start at the external node! } - typedef std::pair PairTy; - typedef std::pointer_to_unary_function DerefFun; + typedef std::pair PairTy; + typedef std::pointer_to_unary_function DerefFun; // nodes_iterator/begin/end - Allow iteration over all nodes in the graph typedef mapped_iterator nodes_iterator; static nodes_iterator nodes_begin(CallGraph *CG) { return map_iterator(CG->begin(), DerefFun(CGdereference)); } - static nodes_iterator nodes_end (CallGraph *CG) { + static nodes_iterator nodes_end(CallGraph *CG) { return map_iterator(CG->end(), DerefFun(CGdereference)); } - static CallGraphNode &CGdereference(PairTy P) { - return *P.second; - } + static CallGraphNode &CGdereference(PairTy P) { return *P.second; } }; -template<> struct GraphTraits : - public GraphTraits { +template <> +struct GraphTraits : public GraphTraits< + const CallGraphNode *> { static NodeType *getEntryNode(const CallGraph *CGN) { return CGN->getExternalCallingNode(); } // nodes_iterator/begin/end - Allow iteration over all nodes in the graph typedef CallGraph::const_iterator nodes_iterator; static nodes_iterator nodes_begin(const CallGraph *CG) { return CG->begin(); } - static nodes_iterator nodes_end (const CallGraph *CG) { return CG->end(); } + static nodes_iterator nodes_end(const CallGraph *CG) { return CG->end(); } }; } // End llvm namespace diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h index 0fc1c2dc360d..ed26a06a581a 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/DOTGraphTraitsPass.h @@ -19,50 +19,62 @@ namespace llvm { -template +/// \brief Default traits class for extracting a graph from an analysis pass. +/// +/// This assumes that 'GraphT' is 'AnalysisT *' and so just passes it through. +template +struct DefaultAnalysisGraphTraits { + static GraphT getGraph(AnalysisT *A) { return A; } +}; + +template < + typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *, + typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits > class DOTGraphTraitsViewer : public FunctionPass { public: DOTGraphTraitsViewer(StringRef GraphName, char &ID) - : FunctionPass(ID), Name(GraphName) {} + : FunctionPass(ID), Name(GraphName) {} virtual bool runOnFunction(Function &F) { - Analysis *Graph = &getAnalysis(); - std::string GraphName = DOTGraphTraits::getGraphName(Graph); + GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis()); + std::string GraphName = DOTGraphTraits::getGraphName(Graph); std::string Title = GraphName + " for '" + F.getName().str() + "' function"; - ViewGraph(Graph, Name, Simple, Title); + ViewGraph(Graph, Name, IsSimple, Title); return false; } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired(); + AU.addRequired(); } private: std::string Name; }; -template +template < + typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *, + typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits > class DOTGraphTraitsPrinter : public FunctionPass { public: DOTGraphTraitsPrinter(StringRef GraphName, char &ID) - : FunctionPass(ID), Name(GraphName) {} + : FunctionPass(ID), Name(GraphName) {} virtual bool runOnFunction(Function &F) { - Analysis *Graph = &getAnalysis(); + GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis()); std::string Filename = Name + "." + F.getName().str() + ".dot"; std::string ErrorInfo; errs() << "Writing '" << Filename << "'..."; raw_fd_ostream File(Filename.c_str(), ErrorInfo); - std::string GraphName = DOTGraphTraits::getGraphName(Graph); + std::string GraphName = DOTGraphTraits::getGraphName(Graph); std::string Title = GraphName + " for '" + F.getName().str() + "' function"; if (ErrorInfo.empty()) - WriteGraph(File, Graph, Simple, Title); + WriteGraph(File, Graph, IsSimple, Title); else errs() << " error opening file for writing!"; errs() << "\n"; @@ -72,55 +84,59 @@ public: virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired(); + AU.addRequired(); } private: std::string Name; }; -template +template < + typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *, + typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits > class DOTGraphTraitsModuleViewer : public ModulePass { public: DOTGraphTraitsModuleViewer(StringRef GraphName, char &ID) - : ModulePass(ID), Name(GraphName) {} + : ModulePass(ID), Name(GraphName) {} virtual bool runOnModule(Module &M) { - Analysis *Graph = &getAnalysis(); - std::string Title = DOTGraphTraits::getGraphName(Graph); + GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis()); + std::string Title = DOTGraphTraits::getGraphName(Graph); - ViewGraph(Graph, Name, Simple, Title); + ViewGraph(Graph, Name, IsSimple, Title); return false; } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired(); + AU.addRequired(); } private: std::string Name; }; -template +template < + typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *, + typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits > class DOTGraphTraitsModulePrinter : public ModulePass { public: DOTGraphTraitsModulePrinter(StringRef GraphName, char &ID) - : ModulePass(ID), Name(GraphName) {} + : ModulePass(ID), Name(GraphName) {} virtual bool runOnModule(Module &M) { - Analysis *Graph = &getAnalysis(); + GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis()); std::string Filename = Name + ".dot"; std::string ErrorInfo; errs() << "Writing '" << Filename << "'..."; raw_fd_ostream File(Filename.c_str(), ErrorInfo); - std::string Title = DOTGraphTraits::getGraphName(Graph); + std::string Title = DOTGraphTraits::getGraphName(Graph); if (ErrorInfo.empty()) - WriteGraph(File, Graph, Simple, Title); + WriteGraph(File, Graph, IsSimple, Title); else errs() << " error opening file for writing!"; errs() << "\n"; @@ -130,7 +146,7 @@ public: virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired(); + AU.addRequired(); } private: diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/Dominators.h b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/Dominators.h index 3aa0beb6bb1e..896664c1c10f 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/Dominators.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/Dominators.h @@ -346,12 +346,14 @@ public: DomTreeNodeBase *getRootNode() { return RootNode; } const DomTreeNodeBase *getRootNode() const { return RootNode; } - /// Get all nodes dominated by R, including R itself. Return true on success. + /// Get all nodes dominated by R, including R itself. void getDescendants(NodeT *R, SmallVectorImpl &Result) const { + Result.clear(); const DomTreeNodeBase *RN = getNode(R); + if (RN == NULL) + return; // If R is unreachable, it will not be present in the DOM tree. SmallVector *, 8> WL; WL.push_back(RN); - Result.clear(); while (!WL.empty()) { const DomTreeNodeBase *N = WL.pop_back_val(); @@ -769,7 +771,7 @@ public: return DT->getRootNode(); } - /// Get all nodes dominated by R, including R itself. Return true on success. + /// Get all nodes dominated by R, including R itself. void getDescendants(BasicBlock *R, SmallVectorImpl &Result) const { DT->getDescendants(R, Result); diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/IntervalPartition.h b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/IntervalPartition.h index 8cade58cd324..1af7d6b0bd34 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/IntervalPartition.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/IntervalPartition.h @@ -34,7 +34,7 @@ namespace llvm { // IntervalPartition - This class builds and holds an "interval partition" for // a function. This partition divides the control flow graph into a set of // maximal intervals, as defined with the properties above. Intuitively, an -// interval is a (possibly nonexistent) loop with a "tail" of non looping +// interval is a (possibly nonexistent) loop with a "tail" of non-looping // nodes following it. // class IntervalPartition : public FunctionPass { diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/LoopInfo.h b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/LoopInfo.h index 62f5acad5668..0fb21aa4e41a 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/LoopInfo.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/LoopInfo.h @@ -33,8 +33,10 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/Dominators.h" +#include "llvm/IR/Instruction.h" +#include "llvm/Support/CFG.h" #include "llvm/Pass.h" #include @@ -53,6 +55,7 @@ class Loop; class MDNode; class PHINode; class raw_ostream; +template class DominatorTreeBase; template class LoopInfoBase; template class LoopBase; @@ -228,6 +231,18 @@ public: /// A latch block is a block that contains a branch back to the header. BlockT *getLoopLatch() const; + /// getLoopLatches - Return all loop latch blocks of this loop. A latch block + /// is a block that contains a branch back to the header. + void getLoopLatches(SmallVectorImpl &LoopLatches) const { + BlockT *H = getHeader(); + typedef GraphTraits > InvBlockTraits; + for (typename InvBlockTraits::ChildIteratorType I = + InvBlockTraits::child_begin(H), + E = InvBlockTraits::child_end(H); I != E; ++I) + if (contains(*I)) + LoopLatches.push_back(*I); + } + //===--------------------------------------------------------------------===// // APIs for updating loop information after changing the CFG // diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/LoopInfoImpl.h b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/LoopInfoImpl.h index c98cb589108b..934f7cf9a097 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/LoopInfoImpl.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/LoopInfoImpl.h @@ -15,8 +15,10 @@ #ifndef LLVM_ANALYSIS_LOOPINFOIMPL_H #define LLVM_ANALYSIS_LOOPINFOIMPL_H +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" namespace llvm { diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/MemoryBuiltins.h b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/MemoryBuiltins.h index 91224ad94ac2..ec2fea838fa7 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/MemoryBuiltins.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/MemoryBuiltins.h @@ -190,6 +190,8 @@ public: return knownSize(SizeOffset) && knownOffset(SizeOffset); } + // These are "private", except they can't actually be made private. Only + // compute() should be used by external users. SizeOffsetType visitAllocaInst(AllocaInst &I); SizeOffsetType visitArgument(Argument &A); SizeOffsetType visitCallSite(CallSite CS); @@ -256,6 +258,7 @@ public: return knownSize(SizeOffset) && knownOffset(SizeOffset); } + // The individual instruction visitors should be treated as private. SizeOffsetEvalType visitAllocaInst(AllocaInst &I); SizeOffsetEvalType visitCallSite(CallSite CS); SizeOffsetEvalType visitExtractElementInst(ExtractElementInst &I); diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/PostDominators.h b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/PostDominators.h index 88ebab4edecf..c99ecb38c5d4 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/PostDominators.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/PostDominators.h @@ -79,6 +79,12 @@ struct PostDominatorTree : public FunctionPass { return DT->findNearestCommonDominator(A, B); } + /// Get all nodes post-dominated by R, including R itself. + void getDescendants(BasicBlock *R, + SmallVectorImpl &Result) const { + DT->getDescendants(R, Result); + } + virtual void releaseMemory() { DT->releaseMemory(); } diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/RegionInfo.h b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/RegionInfo.h index e87319516cd1..8af02e3efbdd 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Analysis/RegionInfo.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/Analysis/RegionInfo.h @@ -312,11 +312,11 @@ public: /// The toplevel region represents the whole function. bool isTopLevelRegion() const { return exit == NULL; } - /// @brief Return a new (non canonical) region, that is obtained by joining + /// @brief Return a new (non-canonical) region, that is obtained by joining /// this region with its predecessors. /// /// @return A region also starting at getEntry(), but reaching to the next - /// basic block that forms with getEntry() a (non canonical) region. + /// basic block that forms with getEntry() a (non-canonical) region. /// NULL if such a basic block does not exist. Region *getExpandedRegion() const; diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/external/bsd/llvm/dist/llvm/include/llvm/Bitcode/LLVMBitCodes.h index b3d24661d70b..7e6831bb5f80 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -370,7 +370,8 @@ namespace bitc { ATTR_KIND_Z_EXT = 34, ATTR_KIND_BUILTIN = 35, ATTR_KIND_COLD = 36, - ATTR_KIND_OPTIMIZE_NONE = 37 + ATTR_KIND_OPTIMIZE_NONE = 37, + ATTR_KIND_IN_ALLOCA = 38 }; } // End bitc namespace diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/AsmPrinter.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/AsmPrinter.h index 4bda0f1603ac..c22d1ea306d8 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -22,6 +22,7 @@ #include "llvm/Support/ErrorHandling.h" namespace llvm { + class AsmPrinterHandler; class BlockAddress; class GCStrategy; class Constant; @@ -110,13 +111,21 @@ namespace llvm { /// function. MachineLoopInfo *LI; + struct HandlerInfo { + AsmPrinterHandler *Handler; + const char *TimerName, *TimerGroupName; + HandlerInfo(AsmPrinterHandler *Handler, const char *TimerName, + const char *TimerGroupName) + : Handler(Handler), TimerName(TimerName), + TimerGroupName(TimerGroupName) {} + }; + /// Handlers - a vector of all debug/EH info emitters we should use. + /// This vector maintains ownership of the emitters. + SmallVector Handlers; + /// DD - If the target supports dwarf debug info, this pointer is non-null. DwarfDebug *DD; - /// DE - If the target supports dwarf exception info, this pointer is - /// non-null. - DwarfException *DE; - protected: explicit AsmPrinter(TargetMachine &TM, MCStreamer &Streamer); @@ -200,11 +209,6 @@ namespace llvm { bool needsSEHMoves(); - /// needsRelocationsForDwarfStringPool - Specifies whether the object format - /// expects to use relocations to refer to debug entries. Alternatively we - /// emit section offsets in bytes from the start of the string pool. - bool needsRelocationsForDwarfStringPool() const; - /// EmitConstantPool - Print to the current output stream assembly /// representations of the constants in the constant pool MCP. This is /// used to print out constants which have been "spilled to memory" by @@ -304,13 +308,10 @@ namespace llvm { /// stem. MCSymbol *GetTempSymbol(StringRef Name) const; - - /// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with - /// global value name as its base, with the specified suffix, and where the - /// symbol is forced to have private linkage if ForcePrivate is true. - MCSymbol *GetSymbolWithGlobalValueBase(const GlobalValue *GV, - StringRef Suffix, - bool ForcePrivate = true) const; + /// Return the MCSymbol for a private symbol with global value name as its + /// base, with the specified suffix. + MCSymbol *getSymbolWithGlobalValueBase(const GlobalValue *GV, + StringRef Suffix) const; /// GetExternalSymbolSymbol - Return the MCSymbol for the specified /// ExternalSymbol. diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/ISDOpcodes.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/ISDOpcodes.h index 48a0523dc618..da8ac792e693 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -603,7 +603,7 @@ namespace ISD { /// This corresponds to "load atomic" instruction. ATOMIC_LOAD, - /// OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr, val) + /// OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) /// This corresponds to "store atomic" instruction. ATOMIC_STORE, diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/LexicalScopes.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/LexicalScopes.h index 26563a605574..af1f8470a425 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/LexicalScopes.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/LexicalScopes.h @@ -44,32 +44,35 @@ typedef std::pair InsnRange; /// class LexicalScopes { public: - LexicalScopes() : MF(NULL), CurrentFnLexicalScope(NULL) { } - virtual ~LexicalScopes(); + LexicalScopes() : MF(NULL), CurrentFnLexicalScope(NULL) {} + ~LexicalScopes(); - /// initialize - Scan machine function and constuct lexical scope nest. - virtual void initialize(const MachineFunction &); + /// initialize - Scan machine function and constuct lexical scope nest, resets + /// the instance if necessary. + void initialize(const MachineFunction &); /// releaseMemory - release memory. - virtual void releaseMemory(); - + void reset(); + /// empty - Return true if there is any lexical scope information available. bool empty() { return CurrentFnLexicalScope == NULL; } - /// isCurrentFunctionScope - Return true if given lexical scope represents + /// isCurrentFunctionScope - Return true if given lexical scope represents /// current function. - bool isCurrentFunctionScope(const LexicalScope *LS) { + bool isCurrentFunctionScope(const LexicalScope *LS) { return LS == CurrentFnLexicalScope; } /// getCurrentFunctionScope - Return lexical scope for the current function. - LexicalScope *getCurrentFunctionScope() const { return CurrentFnLexicalScope;} + LexicalScope *getCurrentFunctionScope() const { + return CurrentFnLexicalScope; + } /// getMachineBasicBlocks - Populate given set using machine basic blocks /// which have machine instructions that belong to lexical scope identified by /// DebugLoc. void getMachineBasicBlocks(DebugLoc DL, - SmallPtrSet &MBBs); + SmallPtrSet &MBBs); /// dominates - Return true if DebugLoc's lexical scope dominates at least one /// machine instruction's lexical scope in a given machine basic block. @@ -104,7 +107,6 @@ public: void dump(); private: - /// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If /// not available then create new lexical scope. LexicalScope *getOrCreateLexicalScope(DebugLoc DL); @@ -123,8 +125,9 @@ private: void extractLexicalScopes(SmallVectorImpl &MIRanges, DenseMap &M); void constructScopeNest(LexicalScope *Scope); - void assignInstructionRanges(SmallVectorImpl &MIRanges, - DenseMap &M); + void + assignInstructionRanges(SmallVectorImpl &MIRanges, + DenseMap &M); private: const MachineFunction *MF; @@ -133,10 +136,11 @@ private: /// contained LexicalScope*s. DenseMap LexicalScopeMap; - /// InlinedLexicalScopeMap - Tracks inlined function scopes in current function. + /// InlinedLexicalScopeMap - Tracks inlined function scopes in current + /// function. DenseMap InlinedLexicalScopeMap; - /// AbstractScopeMap - These scopes are not included LexicalScopeMap. + /// AbstractScopeMap - These scopes are not included LexicalScopeMap. /// AbstractScopes owns its LexicalScope*s. DenseMap AbstractScopeMap; @@ -153,26 +157,23 @@ private: /// LexicalScope - This class is used to track scope information. /// class LexicalScope { - virtual void anchor(); public: LexicalScope(LexicalScope *P, const MDNode *D, const MDNode *I, bool A) - : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(A), - LastInsn(0), FirstInsn(0), DFSIn(0), DFSOut(0) { + : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(A), LastInsn(0), + FirstInsn(0), DFSIn(0), DFSOut(0) { if (Parent) Parent->addChild(this); } - virtual ~LexicalScope() {} - // Accessors. - LexicalScope *getParent() const { return Parent; } - const MDNode *getDesc() const { return Desc; } - const MDNode *getInlinedAt() const { return InlinedAtLocation; } - const MDNode *getScopeNode() const { return Desc; } - bool isAbstractScope() const { return AbstractScope; } + LexicalScope *getParent() const { return Parent; } + const MDNode *getDesc() const { return Desc; } + const MDNode *getInlinedAt() const { return InlinedAtLocation; } + const MDNode *getScopeNode() const { return Desc; } + bool isAbstractScope() const { return AbstractScope; } SmallVectorImpl &getChildren() { return Children; } - SmallVectorImpl &getRanges() { return Ranges; } + SmallVectorImpl &getRanges() { return Ranges; } /// addChild - Add a child scope. void addChild(LexicalScope *S) { Children.push_back(S); } @@ -189,7 +190,7 @@ public: /// extendInsnRange - Extend the current instruction range covered by /// this scope. void extendInsnRange(const MachineInstr *MI) { - assert (FirstInsn && "MI Range is not open!"); + assert(FirstInsn && "MI Range is not open!"); LastInsn = MI; if (Parent) Parent->extendInsnRange(MI); @@ -199,7 +200,7 @@ public: /// until now. This is used when a new scope is encountered while walking /// machine instructions. void closeInsnRange(LexicalScope *NewScope = NULL) { - assert (LastInsn && "Last insn missing!"); + assert(LastInsn && "Last insn missing!"); Ranges.push_back(InsnRange(FirstInsn, LastInsn)); FirstInsn = NULL; LastInsn = NULL; @@ -219,28 +220,28 @@ public: } // Depth First Search support to walk and manipulate LexicalScope hierarchy. - unsigned getDFSOut() const { return DFSOut; } - void setDFSOut(unsigned O) { DFSOut = O; } - unsigned getDFSIn() const { return DFSIn; } - void setDFSIn(unsigned I) { DFSIn = I; } + unsigned getDFSOut() const { return DFSOut; } + void setDFSOut(unsigned O) { DFSOut = O; } + unsigned getDFSIn() const { return DFSIn; } + void setDFSIn(unsigned I) { DFSIn = I; } /// dump - print lexical scope. void dump(unsigned Indent = 0) const; private: - LexicalScope *Parent; // Parent to this scope. - AssertingVH Desc; // Debug info descriptor. - AssertingVH InlinedAtLocation; // Location at which this - // scope is inlined. - bool AbstractScope; // Abstract Scope - SmallVector Children; // Scopes defined in scope. - // Contents not owned. + LexicalScope *Parent; // Parent to this scope. + AssertingVH Desc; // Debug info descriptor. + AssertingVH InlinedAtLocation; // Location at which this + // scope is inlined. + bool AbstractScope; // Abstract Scope + SmallVector Children; // Scopes defined in scope. + // Contents not owned. SmallVector Ranges; - const MachineInstr *LastInsn; // Last instruction of this scope. - const MachineInstr *FirstInsn; // First instruction of this scope. - unsigned DFSIn, DFSOut; // In & Out Depth use to determine - // scope nesting. + const MachineInstr *LastInsn; // Last instruction of this scope. + const MachineInstr *FirstInsn; // First instruction of this scope. + unsigned DFSIn, DFSOut; // In & Out Depth use to determine + // scope nesting. }; } // end llvm namespace diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h index d8437f09aaa7..74e47413d8e3 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -45,6 +45,7 @@ namespace llvm { class TargetInstrInfo; class TargetRegisterClass; class VirtRegMap; + class MachineBlockFrequencyInfo; class LiveIntervals : public MachineFunctionPass { MachineFunction* MF; @@ -100,7 +101,9 @@ namespace llvm { virtual ~LiveIntervals(); // Calculate the spill weight to assign to a single instruction. - static float getSpillWeight(bool isDef, bool isUse, BlockFrequency freq); + static float getSpillWeight(bool isDef, bool isUse, + const MachineBlockFrequencyInfo *MBFI, + const MachineInstr *Instr); LiveInterval &getInterval(unsigned Reg) { if (hasInterval(Reg)) diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/LivePhysRegs.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/LivePhysRegs.h new file mode 100644 index 000000000000..c93eaf5b13eb --- /dev/null +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/LivePhysRegs.h @@ -0,0 +1,146 @@ +//===- llvm/CodeGen/LivePhysRegs.h - Live Physical Register Set -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LivePhysRegs utility for tracking liveness of +// physical registers. This can be used for ad-hoc liveness tracking after +// register allocation. You can start with the live-ins/live-outs at the +// beginning/end of a block and update the information while walking the +// instructions inside the block. This implementation tracks the liveness on a +// sub-register granularity. +// +// We assume that the high bits of a physical super-register are not preserved +// unless the instruction has an implicit-use operand reading the super- +// register. +// +// X86 Example: +// %YMM0 = ... +// %XMM0 = ... (Kills %XMM0, all %XMM0s sub-registers, and %YMM0) +// +// %YMM0 = ... +// %XMM0 = ..., %YMM0 (%YMM0 and all its sub-registers are alive) +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_LIVE_PHYS_REGS_H +#define LLVM_CODEGEN_LIVE_PHYS_REGS_H + +#include "llvm/ADT/SparseSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include + +namespace llvm { + +class MachineInstr; + +/// \brief A set of live physical registers with functions to track liveness +/// when walking backward/forward through a basic block. +class LivePhysRegs { + const TargetRegisterInfo *TRI; + SparseSet LiveRegs; + + LivePhysRegs(const LivePhysRegs&) LLVM_DELETED_FUNCTION; + LivePhysRegs &operator=(const LivePhysRegs&) LLVM_DELETED_FUNCTION; +public: + /// \brief Constructs a new empty LivePhysRegs set. + LivePhysRegs() : TRI(0), LiveRegs() {} + + /// \brief Constructs and initialize an empty LivePhysRegs set. + LivePhysRegs(const TargetRegisterInfo *TRI) : TRI(TRI) { + assert(TRI && "Invalid TargetRegisterInfo pointer."); + LiveRegs.setUniverse(TRI->getNumRegs()); + } + + /// \brief Clear and initialize the LivePhysRegs set. + void init(const TargetRegisterInfo *_TRI) { + assert(_TRI && "Invalid TargetRegisterInfo pointer."); + TRI = _TRI; + LiveRegs.clear(); + LiveRegs.setUniverse(TRI->getNumRegs()); + } + + /// \brief Clears the LivePhysRegs set. + void clear() { LiveRegs.clear(); } + + /// \brief Returns true if the set is empty. + bool empty() const { return LiveRegs.empty(); } + + /// \brief Adds a physical register and all its sub-registers to the set. + void addReg(unsigned Reg) { + assert(TRI && "LivePhysRegs is not initialized."); + assert(Reg <= TRI->getNumRegs() && "Expected a physical register."); + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + LiveRegs.insert(*SubRegs); + } + + /// \brief Removes a physical register, all its sub-registers, and all its + /// super-registers from the set. + void removeReg(unsigned Reg) { + assert(TRI && "LivePhysRegs is not initialized."); + assert(Reg <= TRI->getNumRegs() && "Expected a physical register."); + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + LiveRegs.erase(*SubRegs); + for (MCSuperRegIterator SuperRegs(Reg, TRI, /*IncludeSelf=*/false); + SuperRegs.isValid(); ++SuperRegs) + LiveRegs.erase(*SuperRegs); + } + + /// \brief Removes physical registers clobbered by the regmask operand @p MO. + void removeRegsInMask(const MachineOperand &MO); + + /// \brief Returns true if register @p Reg is contained in the set. This also + /// works if only the super register of @p Reg has been defined, because we + /// always add also all sub-registers to the set. + bool contains(unsigned Reg) const { return LiveRegs.count(Reg); } + + /// \brief Simulates liveness when stepping backwards over an + /// instruction(bundle): Remove Defs, add uses. This is the recommended way of + /// calculating liveness. + void stepBackward(const MachineInstr &MI); + + /// \brief Simulates liveness when stepping forward over an + /// instruction(bundle): Remove killed-uses, add defs. This is the not + /// recommended way, because it depends on accurate kill flags. If possible + /// use stepBackwards() instead of this function. + void stepForward(const MachineInstr &MI); + + /// \brief Adds all live-in registers of basic block @p MBB. + void addLiveIns(const MachineBasicBlock *MBB) { + for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(), + LE = MBB->livein_end(); LI != LE; ++LI) + addReg(*LI); + } + + /// \brief Adds all live-out registers of basic block @p MBB. + void addLiveOuts(const MachineBasicBlock *MBB) { + for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + addLiveIns(*SI); + } + + typedef SparseSet::const_iterator const_iterator; + const_iterator begin() const { return LiveRegs.begin(); } + const_iterator end() const { return LiveRegs.end(); } + + /// \brief Prints the currently live registers to @p OS. + void print(raw_ostream &OS) const; + + /// \brief Dumps the currently live registers to the debug output. + void dump() const; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const LivePhysRegs& LR) { + LR.print(OS); + return OS; +} + +} // namespace llvm + +#endif // LLVM_CODEGEN_LIVE_PHYS_REGS_H diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h index a9c7bf7dbc60..5ec4660c850b 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h @@ -1,4 +1,4 @@ -//====----- MachineBlockFrequencyInfo.h - MachineBlock Frequency Analysis ----====// +//====-- MachineBlockFrequencyInfo.h - MBB Frequency Analysis -*- C++ -*--====// // // The LLVM Compiler Infrastructure // @@ -49,6 +49,21 @@ public: /// the other block frequencies. We do this to avoid using of floating points. /// BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const; + + MachineFunction *getFunction() const; + void view() const; + + // Print the block frequency Freq to OS using the current functions entry + // frequency to convert freq into a relative decimal form. + raw_ostream &printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const; + + // Convenience method that attempts to look up the frequency associated with + // BB and print it to OS. + raw_ostream &printBlockFreq(raw_ostream &OS, + const MachineBasicBlock *MBB) const; + + uint64_t getEntryFreq() const; + }; } diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineFrameInfo.h index 022634df87cf..0699863311f9 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -101,11 +101,6 @@ class MachineFrameInfo { // cannot alias any other memory objects. bool isSpillSlot; - // MayNeedSP - If true the stack object triggered the creation of the stack - // protector. We should allocate this object right after the stack - // protector. - bool MayNeedSP; - /// Alloca - If this stack object is originated from an Alloca instruction /// this value saves the original IR allocation. Can be NULL. const AllocaInst *Alloca; @@ -115,9 +110,9 @@ class MachineFrameInfo { bool PreAllocated; StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM, - bool isSS, bool NSP, const AllocaInst *Val) + bool isSS, const AllocaInst *Val) : SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM), - isSpillSlot(isSS), MayNeedSP(NSP), Alloca(Val), PreAllocated(false) {} + isSpillSlot(isSS), Alloca(Val), PreAllocated(false) {} }; const TargetMachine &TM; @@ -145,6 +140,14 @@ class MachineFrameInfo { /// to builtin \@llvm.returnaddress. bool ReturnAddressTaken; + /// HasStackMap - This boolean keeps track of whether there is a call + /// to builtin \@llvm.experimental.stackmap. + bool HasStackMap; + + /// HasPatchPoint - This boolean keeps track of whether there is a call + /// to builtin \@llvm.experimental.patchpoint. + bool HasPatchPoint; + /// StackSize - The prolog/epilog code inserter calculates the final stack /// offsets for all of the fixed size objects, updating the Objects list /// above. It then updates StackSize to contain the number of bytes that need @@ -223,6 +226,10 @@ class MachineFrameInfo { /// Whether the "realign-stack" option is on. bool RealignOption; + /// True if the function includes inline assembly that adjusts the stack + /// pointer. + bool HasInlineAsmWithSPAdjust; + const TargetFrameLowering *getFrameLowering() const; public: explicit MachineFrameInfo(const TargetMachine &TM, bool RealignOpt) @@ -231,6 +238,8 @@ public: HasVarSizedObjects = false; FrameAddressTaken = false; ReturnAddressTaken = false; + HasStackMap = false; + HasPatchPoint = false; AdjustsStack = false; HasCalls = false; StackProtectorIdx = -1; @@ -276,6 +285,18 @@ public: bool isReturnAddressTaken() const { return ReturnAddressTaken; } void setReturnAddressIsTaken(bool s) { ReturnAddressTaken = s; } + /// hasStackMap - This method may be called any time after instruction + /// selection is complete to determine if there is a call to builtin + /// \@llvm.experimental.stackmap. + bool hasStackMap() const { return HasStackMap; } + void setHasStackMap(bool s = true) { HasStackMap = s; } + + /// hasPatchPoint - This method may be called any time after instruction + /// selection is complete to determine if there is a call to builtin + /// \@llvm.experimental.patchpoint. + bool hasPatchPoint() const { return HasPatchPoint; } + void setHasPatchPoint(bool s = true) { HasPatchPoint = s; } + /// getObjectIndexBegin - Return the minimum frame object index. /// int getObjectIndexBegin() const { return -NumFixedObjects; } @@ -380,14 +401,6 @@ public: return Objects[ObjectIdx+NumFixedObjects].Alloca; } - /// NeedsStackProtector - Returns true if the object may need stack - /// protectors. - bool MayNeedStackProtector(int ObjectIdx) const { - assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && - "Invalid Object Idx!"); - return Objects[ObjectIdx+NumFixedObjects].MayNeedSP; - } - /// getObjectOffset - Return the assigned stack offset of the specified object /// from the incoming stack pointer. /// @@ -451,6 +464,10 @@ public: bool hasCalls() const { return HasCalls; } void setHasCalls(bool V) { HasCalls = V; } + /// Returns true if the function contains any stack-adjusting inline assembly. + bool hasInlineAsmWithSPAdjust() const { return HasInlineAsmWithSPAdjust; } + void setHasInlineAsmWithSPAdjust(bool B) { HasInlineAsmWithSPAdjust = B; } + /// getMaxCallFrameSize - Return the maximum size of a call frame that must be /// allocated for an outgoing function call. This is only available if /// CallFrameSetup/Destroy pseudo instructions are used by the target, and @@ -501,7 +518,7 @@ public: /// a nonnegative identifier to represent it. /// int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, - bool MayNeedSP = false, const AllocaInst *Alloca = 0); + const AllocaInst *Alloca = 0); /// CreateSpillStackObject - Create a new statically sized stack object that /// represents a spill slot, returning a nonnegative identifier to represent @@ -521,7 +538,7 @@ public: /// variable sized object is created, whether or not the index returned is /// actually used. /// - int CreateVariableSizedObject(unsigned Alignment); + int CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca); /// getCalleeSavedInfo - Returns a reference to call saved info vector for the /// current function. diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineFunction.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineFunction.h index c886e256e044..09cc1e5dfb89 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineFunction.h @@ -131,8 +131,8 @@ class MachineFunction { /// about the control flow of such functions. bool ExposesReturnsTwice; - /// True if the function includes MS-style inline assembly. - bool HasMSInlineAsm; + /// True if the function includes any inline assembly. + bool HasInlineAsm; MachineFunction(const MachineFunction &) LLVM_DELETED_FUNCTION; void operator=(const MachineFunction&) LLVM_DELETED_FUNCTION; @@ -218,15 +218,14 @@ public: ExposesReturnsTwice = B; } - /// Returns true if the function contains any MS-style inline assembly. - bool hasMSInlineAsm() const { - return HasMSInlineAsm; + /// Returns true if the function contains any inline assembly. + bool hasInlineAsm() const { + return HasInlineAsm; } - /// Set a flag that indicates that the function contains MS-style inline - /// assembly. - void setHasMSInlineAsm(bool B) { - HasMSInlineAsm = B; + /// Set a flag that indicates that the function contains inline assembly. + void setHasInlineAsm(bool B) { + HasInlineAsm = B; } /// getInfo - Keep track of various per-function pieces of information for @@ -427,6 +426,15 @@ public: OperandRecycler.deallocate(Cap, Array); } + /// \brief Allocate and initialize a register mask with @p NumRegister bits. + uint32_t *allocateRegisterMask(unsigned NumRegister) { + unsigned Size = (NumRegister + 31) / 32; + uint32_t *Mask = Allocator.Allocate(Size); + for (unsigned i = 0; i != Size; ++i) + Mask[i] = 0; + return Mask; + } + /// allocateMemRefsArray - Allocate an array to hold MachineMemOperand /// pointers. This array is owned by the MachineFunction. MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num); diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineInstr.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineInstr.h index cccab81efbb2..5af7b8937d81 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineInstr.h @@ -830,6 +830,37 @@ public: const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const; + /// \brief Applies the constraints (def/use) implied by this MI on \p Reg to + /// the given \p CurRC. + /// If \p ExploreBundle is set and MI is part of a bundle, all the + /// instructions inside the bundle will be taken into account. In other words, + /// this method accumulates all the constrains of the operand of this MI and + /// the related bundle if MI is a bundle or inside a bundle. + /// + /// Returns the register class that statisfies both \p CurRC and the + /// constraints set by MI. Returns NULL if such a register class does not + /// exist. + /// + /// \pre CurRC must not be NULL. + const TargetRegisterClass *getRegClassConstraintEffectForVReg( + unsigned Reg, const TargetRegisterClass *CurRC, + const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, + bool ExploreBundle = false) const; + + /// \brief Applies the constraints (def/use) implied by the \p OpIdx operand + /// to the given \p CurRC. + /// + /// Returns the register class that statisfies both \p CurRC and the + /// constraints set by \p OpIdx MI. Returns NULL if such a register class + /// does not exist. + /// + /// \pre CurRC must not be NULL. + /// \pre The operand at \p OpIdx must be a register. + const TargetRegisterClass * + getRegClassConstraintEffect(unsigned OpIdx, const TargetRegisterClass *CurRC, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) const; + /// tieOperands - Add a tie between the register operands at DefIdx and /// UseIdx. The tie will cause the register allocator to ensure that the two /// operands are assigned the same physical register. @@ -1038,6 +1069,13 @@ private: /// hasPropertyInBundle - Slow path for hasProperty when we're dealing with a /// bundle. bool hasPropertyInBundle(unsigned Mask, QueryType Type) const; + + /// \brief Implements the logic of getRegClassConstraintEffectForVReg for the + /// this MI and the given operand index \p OpIdx. + /// If the related operand does not constrained Reg, this returns CurRC. + const TargetRegisterClass *getRegClassConstraintEffectForVRegImpl( + unsigned OpIdx, unsigned Reg, const TargetRegisterClass *CurRC, + const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const; }; /// MachineInstrExpressionTrait - Special DenseMapInfo traits to compare diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineLoopInfo.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineLoopInfo.h index b058ecb4c279..4b8e245840ca 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineLoopInfo.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineLoopInfo.h @@ -31,6 +31,7 @@ #define LLVM_CODEGEN_MACHINELOOPINFO_H #include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" namespace llvm { diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineMemOperand.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineMemOperand.h index 00a55b57f334..f01b8ebca12f 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineMemOperand.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineMemOperand.h @@ -134,6 +134,8 @@ public: /// number. int64_t getOffset() const { return PtrInfo.Offset; } + unsigned getAddrSpace() const { return PtrInfo.getAddrSpace(); } + /// getSize - Return the size in bytes of the memory reference. uint64_t getSize() const { return Size; } diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineOperand.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineOperand.h index 40f3580bfdb4..c2a0f6566632 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineOperand.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineOperand.h @@ -56,6 +56,7 @@ public: MO_GlobalAddress, ///< Address of a global value MO_BlockAddress, ///< Address of a basic block MO_RegisterMask, ///< Mask of preserved registers. + MO_RegisterLiveOut, ///< Mask of live-out registers. MO_Metadata, ///< Metadata reference (for debug info) MO_MCSymbol ///< MCSymbol reference (for debug/eh info) }; @@ -153,7 +154,7 @@ private: const ConstantFP *CFP; // For MO_FPImmediate. const ConstantInt *CI; // For MO_CImmediate. Integers > 64bit. int64_t ImmVal; // For MO_Immediate. - const uint32_t *RegMask; // For MO_RegisterMask. + const uint32_t *RegMask; // For MO_RegisterMask and MO_RegisterLiveOut. const MDNode *MD; // For MO_Metadata. MCSymbol *Sym; // For MO_MCSymbol @@ -246,6 +247,8 @@ public: bool isBlockAddress() const { return OpKind == MO_BlockAddress; } /// isRegMask - Tests if this is a MO_RegisterMask operand. bool isRegMask() const { return OpKind == MO_RegisterMask; } + /// isRegLiveOut - Tests if this is a MO_RegisterLiveOut operand. + bool isRegLiveOut() const { return OpKind == MO_RegisterLiveOut; } /// isMetadata - Tests if this is a MO_Metadata operand. bool isMetadata() const { return OpKind == MO_Metadata; } bool isMCSymbol() const { return OpKind == MO_MCSymbol; } @@ -476,6 +479,12 @@ public: return Contents.RegMask; } + /// getRegLiveOut - Returns a bit mask of live-out registers. + const uint32_t *getRegLiveOut() const { + assert(isRegLiveOut() && "Wrong MachineOperand accessor"); + return Contents.RegMask; + } + const MDNode *getMetadata() const { assert(isMetadata() && "Wrong MachineOperand accessor"); return Contents.MD; @@ -659,6 +668,12 @@ public: Op.Contents.RegMask = Mask; return Op; } + static MachineOperand CreateRegLiveOut(const uint32_t *Mask) { + assert(Mask && "Missing live-out register mask"); + MachineOperand Op(MachineOperand::MO_RegisterLiveOut); + Op.Contents.RegMask = Mask; + return Op; + } static MachineOperand CreateMetadata(const MDNode *Meta) { MachineOperand Op(MachineOperand::MO_Metadata); Op.Contents.MD = Meta; diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineScheduler.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineScheduler.h index 77828953347c..421f8a1caa13 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -23,7 +23,7 @@ // return new CustomMachineScheduler(C); // } // -// The default scheduler, ScheduleDAGMI, builds the DAG and drives list +// The default scheduler, ScheduleDAGMILive, builds the DAG and drives list // scheduling while updating the instruction stream, register pressure, and live // intervals. Most targets don't need to override the DAG builder and list // schedulier, but subtargets that require custom scheduling heuristics may @@ -93,6 +93,7 @@ class MachineLoopInfo; class RegisterClassInfo; class ScheduleDAGInstrs; class SchedDFSResult; +class ScheduleHazardRecognizer; /// MachineSchedContext provides enough context from the MachineScheduler pass /// for the target to instantiate a scheduler. @@ -154,8 +155,8 @@ struct MachineSchedPolicy { bool OnlyTopDown; bool OnlyBottomUp; - MachineSchedPolicy(): - ShouldTrackPressure(false), OnlyTopDown(false), OnlyBottomUp(false) {} + MachineSchedPolicy(): ShouldTrackPressure(false), OnlyTopDown(false), + OnlyBottomUp(false) {} }; /// MachineSchedStrategy - Interface to the scheduling algorithm used by @@ -204,6 +205,262 @@ public: virtual void releaseBottomNode(SUnit *SU) = 0; }; +/// Mutate the DAG as a postpass after normal DAG building. +class ScheduleDAGMutation { + virtual void anchor(); +public: + virtual ~ScheduleDAGMutation() {} + + virtual void apply(ScheduleDAGMI *DAG) = 0; +}; + +/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply +/// schedules machine instructions according to the given MachineSchedStrategy +/// without much extra book-keeping. This is the common functionality between +/// PreRA and PostRA MachineScheduler. +class ScheduleDAGMI : public ScheduleDAGInstrs { +protected: + AliasAnalysis *AA; + MachineSchedStrategy *SchedImpl; + + /// Topo - A topological ordering for SUnits which permits fast IsReachable + /// and similar queries. + ScheduleDAGTopologicalSort Topo; + + /// Ordered list of DAG postprocessing steps. + std::vector Mutations; + + /// The top of the unscheduled zone. + MachineBasicBlock::iterator CurrentTop; + + /// The bottom of the unscheduled zone. + MachineBasicBlock::iterator CurrentBottom; + + /// Record the next node in a scheduled cluster. + const SUnit *NextClusterPred; + const SUnit *NextClusterSucc; + +#ifndef NDEBUG + /// The number of instructions scheduled so far. Used to cut off the + /// scheduler at the point determined by misched-cutoff. + unsigned NumInstrsScheduled; +#endif +public: + ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S, bool IsPostRA): + ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, IsPostRA, + /*RemoveKillFlags=*/IsPostRA, C->LIS), + AA(C->AA), SchedImpl(S), Topo(SUnits, &ExitSU), CurrentTop(), + CurrentBottom(), NextClusterPred(NULL), NextClusterSucc(NULL) { +#ifndef NDEBUG + NumInstrsScheduled = 0; +#endif + } + + virtual ~ScheduleDAGMI(); + + /// Return true if this DAG supports VReg liveness and RegPressure. + virtual bool hasVRegLiveness() const { return false; } + + /// Add a postprocessing step to the DAG builder. + /// Mutations are applied in the order that they are added after normal DAG + /// building and before MachineSchedStrategy initialization. + /// + /// ScheduleDAGMI takes ownership of the Mutation object. + void addMutation(ScheduleDAGMutation *Mutation) { + Mutations.push_back(Mutation); + } + + /// \brief True if an edge can be added from PredSU to SuccSU without creating + /// a cycle. + bool canAddEdge(SUnit *SuccSU, SUnit *PredSU); + + /// \brief Add a DAG edge to the given SU with the given predecessor + /// dependence data. + /// + /// \returns true if the edge may be added without creating a cycle OR if an + /// equivalent edge already existed (false indicates failure). + bool addEdge(SUnit *SuccSU, const SDep &PredDep); + + MachineBasicBlock::iterator top() const { return CurrentTop; } + MachineBasicBlock::iterator bottom() const { return CurrentBottom; } + + /// Implement the ScheduleDAGInstrs interface for handling the next scheduling + /// region. This covers all instructions in a block, while schedule() may only + /// cover a subset. + void enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned regioninstrs) LLVM_OVERRIDE; + + /// Implement ScheduleDAGInstrs interface for scheduling a sequence of + /// reorderable instructions. + virtual void schedule(); + + /// Change the position of an instruction within the basic block and update + /// live ranges and region boundary iterators. + void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); + + const SUnit *getNextClusterPred() const { return NextClusterPred; } + + const SUnit *getNextClusterSucc() const { return NextClusterSucc; } + + void viewGraph(const Twine &Name, const Twine &Title) LLVM_OVERRIDE; + void viewGraph() LLVM_OVERRIDE; + +protected: + // Top-Level entry points for the schedule() driver... + + /// Apply each ScheduleDAGMutation step in order. This allows different + /// instances of ScheduleDAGMI to perform custom DAG postprocessing. + void postprocessDAG(); + + /// Release ExitSU predecessors and setup scheduler queues. + void initQueues(ArrayRef TopRoots, ArrayRef BotRoots); + + /// Update scheduler DAG and queues after scheduling an instruction. + void updateQueues(SUnit *SU, bool IsTopNode); + + /// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues. + void placeDebugValues(); + + /// \brief dump the scheduled Sequence. + void dumpSchedule() const; + + // Lesser helpers... + bool checkSchedLimit(); + + void findRootsAndBiasEdges(SmallVectorImpl &TopRoots, + SmallVectorImpl &BotRoots); + + void releaseSucc(SUnit *SU, SDep *SuccEdge); + void releaseSuccessors(SUnit *SU); + void releasePred(SUnit *SU, SDep *PredEdge); + void releasePredecessors(SUnit *SU); +}; + +/// ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules +/// machine instructions while updating LiveIntervals and tracking regpressure. +class ScheduleDAGMILive : public ScheduleDAGMI { +protected: + RegisterClassInfo *RegClassInfo; + + /// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees + /// will be empty. + SchedDFSResult *DFSResult; + BitVector ScheduledTrees; + + MachineBasicBlock::iterator LiveRegionEnd; + + // Map each SU to its summary of pressure changes. This array is updated for + // liveness during bottom-up scheduling. Top-down scheduling may proceed but + // has no affect on the pressure diffs. + PressureDiffs SUPressureDiffs; + + /// Register pressure in this region computed by initRegPressure. + bool ShouldTrackPressure; + IntervalPressure RegPressure; + RegPressureTracker RPTracker; + + /// List of pressure sets that exceed the target's pressure limit before + /// scheduling, listed in increasing set ID order. Each pressure set is paired + /// with its max pressure in the currently scheduled regions. + std::vector RegionCriticalPSets; + + /// The top of the unscheduled zone. + IntervalPressure TopPressure; + RegPressureTracker TopRPTracker; + + /// The bottom of the unscheduled zone. + IntervalPressure BotPressure; + RegPressureTracker BotRPTracker; + +public: + ScheduleDAGMILive(MachineSchedContext *C, MachineSchedStrategy *S): + ScheduleDAGMI(C, S, /*IsPostRA=*/false), RegClassInfo(C->RegClassInfo), + DFSResult(0), ShouldTrackPressure(false), RPTracker(RegPressure), + TopRPTracker(TopPressure), BotRPTracker(BotPressure) + {} + + virtual ~ScheduleDAGMILive(); + + /// Return true if this DAG supports VReg liveness and RegPressure. + virtual bool hasVRegLiveness() const { return true; } + + /// \brief Return true if register pressure tracking is enabled. + bool isTrackingPressure() const { return ShouldTrackPressure; } + + /// Get current register pressure for the top scheduled instructions. + const IntervalPressure &getTopPressure() const { return TopPressure; } + const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; } + + /// Get current register pressure for the bottom scheduled instructions. + const IntervalPressure &getBotPressure() const { return BotPressure; } + const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; } + + /// Get register pressure for the entire scheduling region before scheduling. + const IntervalPressure &getRegPressure() const { return RegPressure; } + + const std::vector &getRegionCriticalPSets() const { + return RegionCriticalPSets; + } + + PressureDiff &getPressureDiff(const SUnit *SU) { + return SUPressureDiffs[SU->NodeNum]; + } + + /// Compute a DFSResult after DAG building is complete, and before any + /// queue comparisons. + void computeDFSResult(); + + /// Return a non-null DFS result if the scheduling strategy initialized it. + const SchedDFSResult *getDFSResult() const { return DFSResult; } + + BitVector &getScheduledTrees() { return ScheduledTrees; } + + /// Implement the ScheduleDAGInstrs interface for handling the next scheduling + /// region. This covers all instructions in a block, while schedule() may only + /// cover a subset. + void enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned regioninstrs) LLVM_OVERRIDE; + + /// Implement ScheduleDAGInstrs interface for scheduling a sequence of + /// reorderable instructions. + virtual void schedule(); + + /// Compute the cyclic critical path through the DAG. + unsigned computeCyclicCriticalPath(); + +protected: + // Top-Level entry points for the schedule() driver... + + /// Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking + /// enabled. This sets up three trackers. RPTracker will cover the entire DAG + /// region, TopTracker and BottomTracker will be initialized to the top and + /// bottom of the DAG region without covereing any unscheduled instruction. + void buildDAGWithRegPressure(); + + /// Move an instruction and update register pressure. + void scheduleMI(SUnit *SU, bool IsTopNode); + + // Lesser helpers... + + void initRegPressure(); + + void updatePressureDiffs(ArrayRef LiveUses); + + void updateScheduledPressure(const SUnit *SU, + const std::vector &NewMaxPressure); +}; + +//===----------------------------------------------------------------------===// +/// +/// Helpers for implementing custom MachineSchedStrategy classes. These take +/// care of the book-keeping associated with list scheduling heuristics. +/// +//===----------------------------------------------------------------------===// + /// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience /// methods for pushing and removing nodes. ReadyQueue's are uniquely identified /// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in. @@ -261,213 +518,220 @@ public: #endif }; -/// Mutate the DAG as a postpass after normal DAG building. -class ScheduleDAGMutation { - virtual void anchor(); -public: - virtual ~ScheduleDAGMutation() {} +/// Summarize the unscheduled region. +struct SchedRemainder { + // Critical path through the DAG in expected latency. + unsigned CriticalPath; + unsigned CyclicCritPath; - virtual void apply(ScheduleDAGMI *DAG) = 0; + // Scaled count of micro-ops left to schedule. + unsigned RemIssueCount; + + bool IsAcyclicLatencyLimited; + + // Unscheduled resources + SmallVector RemainingCounts; + + void reset() { + CriticalPath = 0; + CyclicCritPath = 0; + RemIssueCount = 0; + IsAcyclicLatencyLimited = false; + RemainingCounts.clear(); + } + + SchedRemainder() { reset(); } + + void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); }; -/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that schedules -/// machine instructions while updating LiveIntervals and tracking regpressure. -class ScheduleDAGMI : public ScheduleDAGInstrs { -protected: - AliasAnalysis *AA; - RegisterClassInfo *RegClassInfo; - MachineSchedStrategy *SchedImpl; +/// Each Scheduling boundary is associated with ready queues. It tracks the +/// current cycle in the direction of movement, and maintains the state +/// of "hazards" and other interlocks at the current cycle. +class SchedBoundary { +public: + /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) + enum { + TopQID = 1, + BotQID = 2, + LogMaxQID = 2 + }; - /// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees - /// will be empty. - SchedDFSResult *DFSResult; - BitVector ScheduledTrees; + ScheduleDAGMI *DAG; + const TargetSchedModel *SchedModel; + SchedRemainder *Rem; - /// Topo - A topological ordering for SUnits which permits fast IsReachable - /// and similar queries. - ScheduleDAGTopologicalSort Topo; + ReadyQueue Available; + ReadyQueue Pending; - /// Ordered list of DAG postprocessing steps. - std::vector Mutations; + ScheduleHazardRecognizer *HazardRec; - MachineBasicBlock::iterator LiveRegionEnd; +private: + /// True if the pending Q should be checked/updated before scheduling another + /// instruction. + bool CheckPending; - // Map each SU to its summary of pressure changes. This array is updated for - // liveness during bottom-up scheduling. Top-down scheduling may proceed but - // has no affect on the pressure diffs. - PressureDiffs SUPressureDiffs; + // For heuristics, keep a list of the nodes that immediately depend on the + // most recently scheduled node. + SmallPtrSet NextSUs; - /// Register pressure in this region computed by initRegPressure. - bool ShouldTrackPressure; - IntervalPressure RegPressure; - RegPressureTracker RPTracker; + /// Number of cycles it takes to issue the instructions scheduled in this + /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls. + /// See getStalls(). + unsigned CurrCycle; - /// List of pressure sets that exceed the target's pressure limit before - /// scheduling, listed in increasing set ID order. Each pressure set is paired - /// with its max pressure in the currently scheduled regions. - std::vector RegionCriticalPSets; + /// Micro-ops issued in the current cycle + unsigned CurrMOps; - /// The top of the unscheduled zone. - MachineBasicBlock::iterator CurrentTop; - IntervalPressure TopPressure; - RegPressureTracker TopRPTracker; + /// MinReadyCycle - Cycle of the soonest available instruction. + unsigned MinReadyCycle; - /// The bottom of the unscheduled zone. - MachineBasicBlock::iterator CurrentBottom; - IntervalPressure BotPressure; - RegPressureTracker BotRPTracker; + // The expected latency of the critical path in this scheduled zone. + unsigned ExpectedLatency; - /// Record the next node in a scheduled cluster. - const SUnit *NextClusterPred; - const SUnit *NextClusterSucc; + // The latency of dependence chains leading into this zone. + // For each node scheduled bottom-up: DLat = max DLat, N.Depth. + // For each cycle scheduled: DLat -= 1. + unsigned DependentLatency; + + /// Count the scheduled (issued) micro-ops that can be retired by + /// time=CurrCycle assuming the first scheduled instr is retired at time=0. + unsigned RetiredMOps; + + // Count scheduled resources that have been executed. Resources are + // considered executed if they become ready in the time that it takes to + // saturate any resource including the one in question. Counts are scaled + // for direct comparison with other resources. Counts can be compared with + // MOps * getMicroOpFactor and Latency * getLatencyFactor. + SmallVector ExecutedResCounts; + + /// Cache the max count for a single resource. + unsigned MaxExecutedResCount; + + // Cache the critical resources ID in this scheduled zone. + unsigned ZoneCritResIdx; + + // Is the scheduled region resource limited vs. latency limited. + bool IsResourceLimited; + + // Record the highest cycle at which each resource has been reserved by a + // scheduled instruction. + SmallVector ReservedCycles; #ifndef NDEBUG - /// The number of instructions scheduled so far. Used to cut off the - /// scheduler at the point determined by misched-cutoff. - unsigned NumInstrsScheduled; + // Remember the greatest operand latency as an upper bound on the number of + // times we should retry the pending queue because of a hazard. + unsigned MaxObservedLatency; #endif public: - ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S): - ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS), - AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S), DFSResult(0), - Topo(SUnits, &ExitSU), ShouldTrackPressure(false), - RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure), - CurrentBottom(), BotRPTracker(BotPressure), - NextClusterPred(NULL), NextClusterSucc(NULL) { + /// Pending queues extend the ready queues with the same ID and the + /// PendingFlag set. + SchedBoundary(unsigned ID, const Twine &Name): + DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), + Pending(ID << LogMaxQID, Name+".P"), + HazardRec(0) { + reset(); + } + + ~SchedBoundary(); + + void reset(); + + void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, + SchedRemainder *rem); + + bool isTop() const { + return Available.getID() == TopQID; + } + + /// Number of cycles to issue the instructions scheduled in this zone. + unsigned getCurrCycle() const { return CurrCycle; } + + /// Micro-ops issued in the current cycle + unsigned getCurrMOps() const { return CurrMOps; } + + /// Return true if the given SU is used by the most recently scheduled + /// instruction. + bool isNextSU(const SUnit *SU) const { return NextSUs.count(SU); } + + // The latency of dependence chains leading into this zone. + unsigned getDependentLatency() const { return DependentLatency; } + + /// Get the number of latency cycles "covered" by the scheduled + /// instructions. This is the larger of the critical path within the zone + /// and the number of cycles required to issue the instructions. + unsigned getScheduledLatency() const { + return std::max(ExpectedLatency, CurrCycle); + } + + unsigned getUnscheduledLatency(SUnit *SU) const { + return isTop() ? SU->getHeight() : SU->getDepth(); + } + + unsigned getResourceCount(unsigned ResIdx) const { + return ExecutedResCounts[ResIdx]; + } + + /// Get the scaled count of scheduled micro-ops and resources, including + /// executed resources. + unsigned getCriticalCount() const { + if (!ZoneCritResIdx) + return RetiredMOps * SchedModel->getMicroOpFactor(); + return getResourceCount(ZoneCritResIdx); + } + + /// Get a scaled count for the minimum execution time of the scheduled + /// micro-ops that are ready to execute by getExecutedCount. Notice the + /// feedback loop. + unsigned getExecutedCount() const { + return std::max(CurrCycle * SchedModel->getLatencyFactor(), + MaxExecutedResCount); + } + + unsigned getZoneCritResIdx() const { return ZoneCritResIdx; } + + // Is the scheduled region resource limited vs. latency limited. + bool isResourceLimited() const { return IsResourceLimited; } + + /// Get the difference between the given SUnit's ready time and the current + /// cycle. + unsigned getLatencyStallCycles(SUnit *SU); + + unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles); + + bool checkHazard(SUnit *SU); + + unsigned findMaxLatency(ArrayRef ReadySUs); + + unsigned getOtherResourceCount(unsigned &OtherCritIdx); + + void releaseNode(SUnit *SU, unsigned ReadyCycle); + + void releaseTopNode(SUnit *SU); + + void releaseBottomNode(SUnit *SU); + + void bumpCycle(unsigned NextCycle); + + void incExecutedResources(unsigned PIdx, unsigned Count); + + unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle); + + void bumpNode(SUnit *SU); + + void releasePending(); + + void removeReady(SUnit *SU); + + /// Call this before applying any other heuristics to the Available queue. + /// Updates the Available/Pending Q's if necessary and returns the single + /// available instruction, or NULL if there are multiple candidates. + SUnit *pickOnlyChoice(); + #ifndef NDEBUG - NumInstrsScheduled = 0; + void dumpScheduledState(); #endif - } - - virtual ~ScheduleDAGMI(); - - /// \brief Return true if register pressure tracking is enabled. - bool isTrackingPressure() const { return ShouldTrackPressure; } - - /// Add a postprocessing step to the DAG builder. - /// Mutations are applied in the order that they are added after normal DAG - /// building and before MachineSchedStrategy initialization. - /// - /// ScheduleDAGMI takes ownership of the Mutation object. - void addMutation(ScheduleDAGMutation *Mutation) { - Mutations.push_back(Mutation); - } - - /// \brief True if an edge can be added from PredSU to SuccSU without creating - /// a cycle. - bool canAddEdge(SUnit *SuccSU, SUnit *PredSU); - - /// \brief Add a DAG edge to the given SU with the given predecessor - /// dependence data. - /// - /// \returns true if the edge may be added without creating a cycle OR if an - /// equivalent edge already existed (false indicates failure). - bool addEdge(SUnit *SuccSU, const SDep &PredDep); - - MachineBasicBlock::iterator top() const { return CurrentTop; } - MachineBasicBlock::iterator bottom() const { return CurrentBottom; } - - /// Implement the ScheduleDAGInstrs interface for handling the next scheduling - /// region. This covers all instructions in a block, while schedule() may only - /// cover a subset. - void enterRegion(MachineBasicBlock *bb, - MachineBasicBlock::iterator begin, - MachineBasicBlock::iterator end, - unsigned regioninstrs) LLVM_OVERRIDE; - - /// Implement ScheduleDAGInstrs interface for scheduling a sequence of - /// reorderable instructions. - virtual void schedule(); - - /// Change the position of an instruction within the basic block and update - /// live ranges and region boundary iterators. - void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); - - /// Get current register pressure for the top scheduled instructions. - const IntervalPressure &getTopPressure() const { return TopPressure; } - const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; } - - /// Get current register pressure for the bottom scheduled instructions. - const IntervalPressure &getBotPressure() const { return BotPressure; } - const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; } - - /// Get register pressure for the entire scheduling region before scheduling. - const IntervalPressure &getRegPressure() const { return RegPressure; } - - const std::vector &getRegionCriticalPSets() const { - return RegionCriticalPSets; - } - - PressureDiff &getPressureDiff(const SUnit *SU) { - return SUPressureDiffs[SU->NodeNum]; - } - - const SUnit *getNextClusterPred() const { return NextClusterPred; } - - const SUnit *getNextClusterSucc() const { return NextClusterSucc; } - - /// Compute a DFSResult after DAG building is complete, and before any - /// queue comparisons. - void computeDFSResult(); - - /// Return a non-null DFS result if the scheduling strategy initialized it. - const SchedDFSResult *getDFSResult() const { return DFSResult; } - - BitVector &getScheduledTrees() { return ScheduledTrees; } - - /// Compute the cyclic critical path through the DAG. - unsigned computeCyclicCriticalPath(); - - void viewGraph(const Twine &Name, const Twine &Title) LLVM_OVERRIDE; - void viewGraph() LLVM_OVERRIDE; - -protected: - // Top-Level entry points for the schedule() driver... - - /// Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking - /// enabled. This sets up three trackers. RPTracker will cover the entire DAG - /// region, TopTracker and BottomTracker will be initialized to the top and - /// bottom of the DAG region without covereing any unscheduled instruction. - void buildDAGWithRegPressure(); - - /// Apply each ScheduleDAGMutation step in order. This allows different - /// instances of ScheduleDAGMI to perform custom DAG postprocessing. - void postprocessDAG(); - - /// Release ExitSU predecessors and setup scheduler queues. - void initQueues(ArrayRef TopRoots, ArrayRef BotRoots); - - /// Move an instruction and update register pressure. - void scheduleMI(SUnit *SU, bool IsTopNode); - - /// Update scheduler DAG and queues after scheduling an instruction. - void updateQueues(SUnit *SU, bool IsTopNode); - - /// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues. - void placeDebugValues(); - - /// \brief dump the scheduled Sequence. - void dumpSchedule() const; - - // Lesser helpers... - - void initRegPressure(); - - void updatePressureDiffs(ArrayRef LiveUses); - - void updateScheduledPressure(const SUnit *SU, - const std::vector &NewMaxPressure); - - bool checkSchedLimit(); - - void findRootsAndBiasEdges(SmallVectorImpl &TopRoots, - SmallVectorImpl &BotRoots); - - void releaseSucc(SUnit *SU, SDep *SuccEdge); - void releaseSuccessors(SUnit *SU); - void releasePred(SUnit *SU, SDep *PredEdge); - void releasePredecessors(SUnit *SU); }; } // namespace llvm diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/PBQP/Graph.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/PBQP/Graph.h index aca0a9130342..07371439249f 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/PBQP/Graph.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/PBQP/Graph.h @@ -437,8 +437,8 @@ namespace PBQP { for (NodeItr nodeItr = nodesBegin(), nodeEnd = nodesEnd(); nodeItr != nodeEnd; ++nodeItr) { - os << " node" << nodeItr << " [ label=\"" - << nodeItr << ": " << getNodeCosts(*nodeItr) << "\" ]\n"; + os << " node" << *nodeItr << " [ label=\"" + << *nodeItr << ": " << getNodeCosts(*nodeItr) << "\" ]\n"; } os << " edge [ len=" << getNumNodes() << " ]\n"; diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/Passes.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/Passes.h index ae4a2fa0bf83..3be21e024217 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/Passes.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/Passes.h @@ -207,9 +207,9 @@ public: /// Fully developed targets will not generally override this. virtual void addMachinePasses(); - /// createTargetScheduler - Create an instance of ScheduleDAGInstrs to be run - /// within the standard MachineScheduler pass for this function and target at - /// the current optimization level. + /// Create an instance of ScheduleDAGInstrs to be run within the standard + /// MachineScheduler pass for this function and target at the current + /// optimization level. /// /// This can also be used to plug a new MachineSchedStrategy into an instance /// of the standard ScheduleDAGMI: @@ -221,6 +221,13 @@ public: return 0; } + /// Similar to createMachineScheduler but used when postRA machine scheduling + /// is enabled. + virtual ScheduleDAGInstrs * + createPostMachineScheduler(MachineSchedContext *C) const { + return 0; + } + protected: // Helper to verify the analysis is really immutable. void setOpt(bool &Opt, bool Val); @@ -403,6 +410,9 @@ namespace llvm { /// MachineScheduler - This pass schedules machine instructions. extern char &MachineSchedulerID; + /// PostMachineScheduler - This pass schedules machine instructions postRA. + extern char &PostMachineSchedulerID; + /// SpillPlacement analysis. Suggest optimal placement of spill code between /// basic blocks. extern char &SpillPlacementID; @@ -568,6 +578,11 @@ namespace llvm { /// bundles (created earlier, e.g. during pre-RA scheduling). extern char &FinalizeMachineBundlesID; + /// StackMapLiveness - This pass analyses the register live-out set of + /// stackmap/patchpoint intrinsics and attaches the calculated information to + /// the intrinsic for later emission to the StackMap. + extern char &StackMapLivenessID; + } // End llvm namespace #endif diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/ScheduleDAG.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/ScheduleDAG.h index ccba1b0364e4..c49a9a7f569a 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/ScheduleDAG.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/ScheduleDAG.h @@ -184,6 +184,12 @@ namespace llvm { || Contents.OrdKind == MustAliasMem); } + /// isBarrier - Test if this is an Order dependence that is marked + /// as a barrier. + bool isBarrier() const { + return getKind() == Order && Contents.OrdKind == Barrier; + } + /// isMustAlias - Test if this is an Order dependence that is marked /// as "must alias", meaning that the SUnits at either end of the edge /// have a memory dependence on a known memory location. @@ -292,6 +298,8 @@ namespace llvm { bool isScheduleHigh : 1; // True if preferable to schedule high. bool isScheduleLow : 1; // True if preferable to schedule low. bool isCloned : 1; // True if this node has been cloned. + bool isUnbuffered : 1; // Uses an unbuffered resource. + bool hasReservedResource : 1; // Uses a reserved resource. Sched::Preference SchedulingPref; // Scheduling preference. private: @@ -316,7 +324,8 @@ namespace llvm { isTwoAddress(false), isCommutable(false), hasPhysRegUses(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false), isAvailable(false), isScheduled(false), isScheduleHigh(false), - isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None), + isScheduleLow(false), isCloned(false), isUnbuffered(false), + hasReservedResource(false), SchedulingPref(Sched::None), isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {} @@ -330,7 +339,8 @@ namespace llvm { isTwoAddress(false), isCommutable(false), hasPhysRegUses(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false), isAvailable(false), isScheduled(false), isScheduleHigh(false), - isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None), + isScheduleLow(false), isCloned(false), isUnbuffered(false), + hasReservedResource(false), SchedulingPref(Sched::None), isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {} @@ -343,7 +353,8 @@ namespace llvm { isTwoAddress(false), isCommutable(false), hasPhysRegUses(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false), isAvailable(false), isScheduled(false), isScheduleHigh(false), - isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None), + isScheduleLow(false), isCloned(false), isUnbuffered(false), + hasReservedResource(false), SchedulingPref(Sched::None), isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0), TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {} diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h index fe4f3c2de3b6..cf449f607b8d 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -43,7 +43,7 @@ namespace llvm { }; /// Record a physical register access. - /// For non data-dependent uses, OpIdx == -1. + /// For non-data-dependent uses, OpIdx == -1. struct PhysRegSUOper { SUnit *SU; int OpIdx; @@ -88,6 +88,10 @@ namespace llvm { /// isPostRA flag indicates vregs cannot be present. bool IsPostRA; + /// True if the DAG builder should remove kill flags (in preparation for + /// rescheduling). + bool RemoveKillFlags; + /// The standard DAG builder does not normally include terminators as DAG /// nodes because it does not create the necessary dependencies to prevent /// reordering. A specialized scheduler can overide @@ -145,15 +149,21 @@ namespace llvm { DbgValueVector DbgValues; MachineInstr *FirstDbgValue; + /// Set of live physical registers for updating kill flags. + BitVector LiveRegs; + public: explicit ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo &mli, const MachineDominatorTree &mdt, bool IsPostRAFlag, + bool RemoveKillFlags = false, LiveIntervals *LIS = 0); virtual ~ScheduleDAGInstrs() {} + bool isPostRA() const { return IsPostRA; } + /// \brief Expose LiveIntervals for use in DAG mutators and such. LiveIntervals *getLIS() const { return LIS; } @@ -227,12 +237,23 @@ namespace llvm { /// Return a label for the region of code covered by the DAG. virtual std::string getDAGName() const; + /// \brief Fix register kill flags that scheduling has made invalid. + void fixupKills(MachineBasicBlock *MBB); protected: void initSUnits(); void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx); void addPhysRegDeps(SUnit *SU, unsigned OperIdx); void addVRegDefDeps(SUnit *SU, unsigned OperIdx); void addVRegUseDeps(SUnit *SU, unsigned OperIdx); + + /// \brief PostRA helper for rewriting kill flags. + void startBlockForKills(MachineBasicBlock *BB); + + /// \brief Toggle a register operand kill flag. + /// + /// Other adjustments may be made to the instruction if necessary. Return + /// true if the operand has been deleted, false if not. + bool toggleKillFlag(MachineInstr *MI, MachineOperand &MO); }; /// newSUnit - Creates a new SUnit and return a ptr to it. diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h index 9dfa3446ef50..8a40e7212ff6 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h @@ -70,6 +70,22 @@ public: /// emitted, to advance the hazard state. virtual void EmitInstruction(SUnit *) {} + /// PreEmitNoops - This callback is invoked prior to emitting an instruction. + /// It should return the number of noops to emit prior to the provided + /// instruction. + /// Note: This is only used during PostRA scheduling. EmitNoop is not called + /// for these noops. + virtual unsigned PreEmitNoops(SUnit *) { + return 0; + } + + /// ShouldPreferAnother - This callback may be invoked if getHazardType + /// returns NoHazard. If, even though there is no hazard, it would be better to + /// schedule another available instruction, this callback should return true. + virtual bool ShouldPreferAnother(SUnit *) { + return false; + } + /// AdvanceCycle - This callback is invoked whenever the next top-down /// instruction to be scheduled cannot issue in the current cycle, either /// because of latency or resource conflicts. This should increment the diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 70c15e6c6e6f..08eda723c6bc 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -70,6 +70,10 @@ namespace ISD { /// BUILD_VECTOR where all of the elements are 0 or undef. bool isBuildVectorAllZeros(const SDNode *N); + /// \brief Return true if the specified node is a BUILD_VECTOR node of + /// all ConstantSDNode or undef. + bool isBuildVectorOfConstantSDNodes(const SDNode *N); + /// isScalarToVector - Return true if the specified node is a /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low /// element is not an undef. diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/StackMapLivenessAnalysis.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/StackMapLivenessAnalysis.h new file mode 100644 index 000000000000..86ff5ebebddf --- /dev/null +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/StackMapLivenessAnalysis.h @@ -0,0 +1,65 @@ +//===--- StackMapLivenessAnalysis - StackMap Liveness Analysis --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass calculates the liveness for each basic block in a function and +// attaches the register live-out information to a stackmap or patchpoint +// intrinsic if present. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_STACKMAP_LIVENESS_ANALYSIS_H +#define LLVM_CODEGEN_STACKMAP_LIVENESS_ANALYSIS_H + +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + + +namespace llvm { + +/// \brief This pass calculates the liveness information for each basic block in +/// a function and attaches the register live-out information to a stackmap or +/// patchpoint intrinsic if present. +/// +/// This is an optional pass that has to be explicitly enabled via the +/// -enable-stackmap-liveness and/or -enable-patchpoint-liveness flag. The pass +/// skips functions that don't have any stackmap or patchpoint intrinsics. The +/// information provided by this pass is optional and not required by the +/// aformentioned intrinsics to function. +class StackMapLiveness : public MachineFunctionPass { + MachineFunction *MF; + const TargetRegisterInfo *TRI; + LivePhysRegs LiveRegs; +public: + static char ID; + + /// \brief Default construct and initialize the pass. + StackMapLiveness(); + + /// \brief Tell the pass manager which passes we depend on and what + /// information we preserve. + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + /// \brief Calculate the liveness information for the given machine function. + virtual bool runOnMachineFunction(MachineFunction &MF); + +private: + /// \brief Performs the actual liveness calculation for the function. + bool calculateLiveness(); + + /// \brief Add the current register live set to the instruction. + void addLiveOutSetToMI(MachineInstr &MI); + + /// \brief Create a register mask and initialize it with the registers from + /// the register live set. + uint32_t *createRegisterMask() const; +}; + +} // llvm namespace + +#endif // LLVM_CODEGEN_STACKMAP_LIVENESS_ANALYSIS_H diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/StackMaps.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/StackMaps.h index e90f22e5b69a..508606ac9bab 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/StackMaps.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/StackMaps.h @@ -1,4 +1,5 @@ //===------------------- StackMaps.h - StackMaps ----------------*- C++ -*-===// + // // The LLVM Compiler Infrastructure // @@ -92,19 +93,28 @@ public: : LocType(LocType), Size(Size), Reg(Reg), Offset(Offset) {} }; - // Typedef a function pointer for functions that parse sequences of operands - // and return a Location, plus a new "next" operand iterator. - typedef std::pair - (*OperandParser)(MachineInstr::const_mop_iterator, - MachineInstr::const_mop_iterator, const TargetMachine&); + struct LiveOutReg { + unsigned short Reg; + unsigned short RegNo; + unsigned short Size; + + LiveOutReg() : Reg(0), RegNo(0), Size(0) {} + LiveOutReg(unsigned short Reg, unsigned short RegNo, unsigned short Size) + : Reg(Reg), RegNo(RegNo), Size(Size) {} + + void MarkInvalid() { Reg = 0; } + + // Only sort by the dwarf register number. + bool operator< (const LiveOutReg &LO) const { return RegNo < LO.RegNo; } + static bool IsInvalid(const LiveOutReg &LO) { return LO.Reg == 0; } + }; // OpTypes are used to encode information about the following logical // operand (which may consist of several MachineOperands) for the // OpParser. typedef enum { DirectMemRefOp, IndirectMemRefOp, ConstantOp } OpType; - StackMaps(AsmPrinter &AP, OperandParser OpParser) - : AP(AP), OpParser(OpParser) {} + StackMaps(AsmPrinter &AP) : AP(AP) {} /// \brief Generate a stackmap record for a stackmap instruction. /// @@ -121,15 +131,18 @@ public: private: typedef SmallVector LocationVec; + typedef SmallVector LiveOutVec; struct CallsiteInfo { const MCExpr *CSOffsetExpr; - unsigned ID; + uint64_t ID; LocationVec Locations; + LiveOutVec LiveOuts; CallsiteInfo() : CSOffsetExpr(0), ID(0) {} - CallsiteInfo(const MCExpr *CSOffsetExpr, unsigned ID, - LocationVec Locations) - : CSOffsetExpr(CSOffsetExpr), ID(ID), Locations(Locations) {} + CallsiteInfo(const MCExpr *CSOffsetExpr, uint64_t ID, + LocationVec &Locations, LiveOutVec &LiveOuts) + : CSOffsetExpr(CSOffsetExpr), ID(ID), Locations(Locations), + LiveOuts(LiveOuts) {} }; typedef std::vector CallsiteInfoList; @@ -155,16 +168,28 @@ private: }; AsmPrinter &AP; - OperandParser OpParser; CallsiteInfoList CSInfos; ConstantPool ConstPool; + MachineInstr::const_mop_iterator + parseOperand(MachineInstr::const_mop_iterator MOI, + MachineInstr::const_mop_iterator MOE, + LocationVec &Locs, LiveOutVec &LiveOuts) const; + + /// \brief Create a live-out register record for the given register @p Reg. + LiveOutReg createLiveOutReg(unsigned Reg, const MCRegisterInfo &MCRI, + const TargetRegisterInfo *TRI) const; + + /// \brief Parse the register live-out mask and return a vector of live-out + /// registers that need to be recorded in the stackmap. + LiveOutVec parseRegisterLiveOutMask(const uint32_t *Mask) const; + /// This should be called by the MC lowering code _immediately_ before /// lowering the MI to an MCInst. It records where the operands for the /// instruction are stored, and outputs a label to record the offset of /// the call from the start of the text section. In special cases (e.g. AnyReg /// calling convention) the return register is also recorded if requested. - void recordStackMapOpers(const MachineInstr &MI, uint32_t ID, + void recordStackMapOpers(const MachineInstr &MI, uint64_t ID, MachineInstr::const_mop_iterator MOI, MachineInstr::const_mop_iterator MOE, bool recordResult = false); diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/StackProtector.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/StackProtector.h index d09a933a663b..7815a49bc24a 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/StackProtector.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/StackProtector.h @@ -20,11 +20,11 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/ValueMap.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Pass.h" #include "llvm/Target/TargetLowering.h" namespace llvm { -class DominatorTree; class Function; class Module; class PHINode; diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/TargetSchedule.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/TargetSchedule.h index 8ef26b7ca548..19a172beeaaa 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/TargetSchedule.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/TargetSchedule.h @@ -98,6 +98,14 @@ public: return SchedModel.getProcResource(PIdx); } +#ifndef NDEBUG + const char *getResourceName(unsigned PIdx) const { + if (!PIdx) + return "MOps"; + return SchedModel.getProcResource(PIdx)->Name; + } +#endif + typedef const MCWriteProcResEntry *ProcResIter; // \brief Get an iterator into the processor resources consumed by this diff --git a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/ValueTypes.h b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/ValueTypes.h index 79f323341fd0..071bd1498e6d 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/ValueTypes.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/CodeGen/ValueTypes.h @@ -880,18 +880,18 @@ namespace llvm { static EVT getExtendedIntegerVT(LLVMContext &C, unsigned BitWidth); static EVT getExtendedVectorVT(LLVMContext &C, EVT VT, unsigned NumElements); - bool isExtendedFloatingPoint() const; - bool isExtendedInteger() const; - bool isExtendedVector() const; - bool isExtended16BitVector() const; - bool isExtended32BitVector() const; - bool isExtended64BitVector() const; - bool isExtended128BitVector() const; - bool isExtended256BitVector() const; - bool isExtended512BitVector() const; - bool isExtended1024BitVector() const; + bool isExtendedFloatingPoint() const LLVM_READONLY; + bool isExtendedInteger() const LLVM_READONLY; + bool isExtendedVector() const LLVM_READONLY; + bool isExtended16BitVector() const LLVM_READONLY; + bool isExtended32BitVector() const LLVM_READONLY; + bool isExtended64BitVector() const LLVM_READONLY; + bool isExtended128BitVector() const LLVM_READONLY; + bool isExtended256BitVector() const LLVM_READONLY; + bool isExtended512BitVector() const LLVM_READONLY; + bool isExtended1024BitVector() const LLVM_READONLY; EVT getExtendedVectorElementType() const; - unsigned getExtendedVectorNumElements() const; + unsigned getExtendedVectorNumElements() const LLVM_READONLY; unsigned getExtendedSizeInBits() const; }; diff --git a/external/bsd/llvm/dist/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h b/external/bsd/llvm/dist/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h index 233084dd50fa..ee43cbcf0ef9 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h @@ -232,7 +232,7 @@ public: /// /// This function is deprecated for the MCJIT execution engine. /// - /// FIXME: the JIT and MCJIT interfaces should be disentangled or united + /// FIXME: the JIT and MCJIT interfaces should be disentangled or united /// again, if possible. /// virtual void *getPointerToNamedFunction(const std::string &Name, @@ -550,7 +550,7 @@ public: WhichEngine = w; return *this; } - + /// setMCJITMemoryManager - Sets the MCJIT memory manager to use. This allows /// clients to customize their memory allocation policies for the MCJIT. This /// is only appropriate for the MCJIT; setting this and configuring the builder diff --git a/external/bsd/llvm/dist/llvm/include/llvm/LTO/LTOModule.h b/external/bsd/llvm/dist/llvm/include/llvm/LTO/LTOModule.h index f4693c8d2260..d7205d8d9a67 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/LTO/LTOModule.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/LTO/LTOModule.h @@ -19,6 +19,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetMachine.h" #include @@ -49,6 +50,7 @@ private: llvm::OwningPtr _module; llvm::OwningPtr _target; + llvm::MCObjectFileInfo ObjFileInfo; std::vector _symbols; // _defines and _undefines only needed to disambiguate tentative definitions diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Object/COFF.h b/external/bsd/llvm/dist/llvm/include/llvm/Object/COFF.h index e05ae6c654c7..aaffbbcb5e79 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Object/COFF.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/Object/COFF.h @@ -157,6 +157,28 @@ struct import_lookup_table_entry32 { } }; +struct export_directory_table_entry { + support::ulittle32_t ExportFlags; + support::ulittle32_t TimeDateStamp; + support::ulittle16_t MajorVersion; + support::ulittle16_t MinorVersion; + support::ulittle32_t NameRVA; + support::ulittle32_t OrdinalBase; + support::ulittle32_t AddressTableEntries; + support::ulittle32_t NumberOfNamePointers; + support::ulittle32_t ExportAddressTableRVA; + support::ulittle32_t NamePointerRVA; + support::ulittle32_t OrdinalTableRVA; +}; + +union export_address_table_entry { + support::ulittle32_t ExportRVA; + support::ulittle32_t ForwarderRVA; +}; + +typedef support::ulittle32_t export_name_pointer_table_entry; +typedef support::ulittle16_t export_ordinal_table_entry; + struct coff_symbol { struct StringTableOffset { support::ulittle32_t Zeroes; diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Target/Mangler.h b/external/bsd/llvm/dist/llvm/include/llvm/Target/Mangler.h index eee7bf6d6979..78ce264b5e37 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Target/Mangler.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/Target/Mangler.h @@ -18,10 +18,10 @@ namespace llvm { +class DataLayout; class GlobalValue; class MCContext; template class SmallVectorImpl; -class TargetMachine; class Twine; class Mangler { @@ -33,7 +33,7 @@ public: }; private: - const TargetMachine *TM; + const DataLayout *DL; /// AnonGlobalIDs - We need to give global values the same name every time /// they are mangled. This keeps track of the number we give to anonymous @@ -46,20 +46,18 @@ private: unsigned NextAnonGlobalID; public: - Mangler(const TargetMachine *TM) : TM(TM), NextAnonGlobalID(1) {} + Mangler(const DataLayout *DL) : DL(DL), NextAnonGlobalID(1) {} /// getNameWithPrefix - Fill OutName with the name of the appropriate prefix /// and the specified global variable's name. If the global variable doesn't /// have a name, this fills in a unique name for the global. - void getNameWithPrefix(SmallVectorImpl &OutName, const GlobalValue *GV, - bool isImplicitlyPrivate, bool UseGlobalPrefix = true); + void getNameWithPrefix(SmallVectorImpl &OutName, const GlobalValue *GV); /// getNameWithPrefix - Fill OutName with the name of the appropriate prefix /// and the specified name as the global variable name. GVName must not be /// empty. void getNameWithPrefix(SmallVectorImpl &OutName, const Twine &GVName, - ManglerPrefixTy PrefixTy = Mangler::Default, - bool UseGlobalPrefix = true); + ManglerPrefixTy PrefixTy = Mangler::Default); }; } // End llvm namespace diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Target/Target.td b/external/bsd/llvm/dist/llvm/include/llvm/Target/Target.td index 3f6eae6bb20a..31da7ebca42c 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Target/Target.td +++ b/external/bsd/llvm/dist/llvm/include/llvm/Target/Target.td @@ -685,6 +685,18 @@ class InstrInfo { // // This option is a temporary migration help. It will go away. bit guessInstructionProperties = 1; + + // TableGen's instruction encoder generator has support for matching operands + // to bit-field variables both by name and by position. While matching by + // name is preferred, this is currently not possible for complex operands, + // and some targets still reply on the positional encoding rules. When + // generating a decoder for such targets, the positional encoding rules must + // be used by the decoder generator as well. + // + // This option is temporary; it will go away once the TableGen decoder + // generator has better support for complex operands and targets have + // migrated away from using positionally encoded operands. + bit decodePositionallyEncodedOperands = 0; } // Standard Pseudo Instructions. @@ -805,6 +817,7 @@ def STACKMAP : Instruction { let InOperandList = (ins i32imm:$id, i32imm:$nbytes, variable_ops); let isCall = 1; let mayLoad = 1; + let usesCustomInserter = 1; } def PATCHPOINT : Instruction { let OutOperandList = (outs unknown:$dst); @@ -812,6 +825,7 @@ def PATCHPOINT : Instruction { i32imm:$nargs, i32imm:$cc, variable_ops); let isCall = 1; let mayLoad = 1; + let usesCustomInserter = 1; } } @@ -947,7 +961,7 @@ class AsmWriter { // AsmWriterClassName - This specifies the suffix to use for the asmwriter // class. Generated AsmWriter classes are always prefixed with the target // name. - string AsmWriterClassName = "AsmPrinter"; + string AsmWriterClassName = "InstPrinter"; // Variant - AsmWriters can be of multiple different variants. Variants are // used to support targets that need to emit assembly code in ways that are @@ -957,21 +971,13 @@ class AsmWriter { // == 1, will expand to "y". int Variant = 0; - - // FirstOperandColumn/OperandSpacing - If the assembler syntax uses a columnar - // layout, the asmwriter can actually generate output in this columns (in - // verbose-asm mode). These two values indicate the width of the first column - // (the "opcode" area) and the width to reserve for subsequent operands. When - // verbose asm mode is enabled, operands will be indented to respect this. - int FirstOperandColumn = -1; - // OperandSpacing - Space between operand columns. int OperandSpacing = -1; // isMCAsmWriter - Is this assembly writer for an MC emitter? This controls // generation of the printInstruction() method. For MC printers, it takes // an MCInstr* operand, otherwise it takes a MachineInstr*. - bit isMCAsmWriter = 0; + bit isMCAsmWriter = 1; } def DefaultAsmWriter : AsmWriter; diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetLibraryInfo.h b/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetLibraryInfo.h index 46eaef2871b1..326104da0e93 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetLibraryInfo.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetLibraryInfo.h @@ -251,6 +251,18 @@ namespace llvm { floorf, /// long double floorl(long double x); floorl, + /// double fmax(double x, double y); + fmax, + /// float fmaxf(float x, float y); + fmaxf, + /// long double fmaxl(long double x, long double y); + fmaxl, + /// double fmin(double x, double y); + fmin, + /// float fminf(float x, float y); + fminf, + /// long double fminl(long double x, long double y); + fminl, /// double fmod(double x, double y); fmod, /// float fmodf(float x, float y); @@ -703,6 +715,8 @@ public: case LibFunc::sqrt: case LibFunc::sqrtf: case LibFunc::sqrtl: case LibFunc::sqrt_finite: case LibFunc::sqrtf_finite: case LibFunc::sqrtl_finite: + case LibFunc::fmax: case LibFunc::fmaxf: case LibFunc::fmaxl: + case LibFunc::fmin: case LibFunc::fminf: case LibFunc::fminl: case LibFunc::floor: case LibFunc::floorf: case LibFunc::floorl: case LibFunc::nearbyint: case LibFunc::nearbyintf: case LibFunc::nearbyintl: case LibFunc::ceil: case LibFunc::ceilf: case LibFunc::ceill: diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetLowering.h b/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetLowering.h index 5ab04f794452..e6917b74e7dc 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetLowering.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetLowering.h @@ -173,6 +173,11 @@ public: return true; } + /// Return true if multiple condition registers are available. + bool hasMultipleConditionRegisters() const { + return HasMultipleConditionRegisters; + } + /// Return true if a vector of the given type should be split /// (TypeSplitVector) instead of promoted (TypePromoteInteger) during type /// legalization. @@ -880,13 +885,13 @@ protected: } /// Indicate whether this target prefers to use _setjmp to implement - /// llvm.setjmp or the non _ version. Defaults to false. + /// llvm.setjmp or the version without _. Defaults to false. void setUseUnderscoreSetJmp(bool Val) { UseUnderscoreSetJmp = Val; } /// Indicate whether this target prefers to use _longjmp to implement - /// llvm.longjmp or the non _ version. Defaults to false. + /// llvm.longjmp or the version without _. Defaults to false. void setUseUnderscoreLongJmp(bool Val) { UseUnderscoreLongJmp = Val; } @@ -926,6 +931,15 @@ protected: SelectIsExpensive = isExpensive; } + /// Tells the code generator that the target has multiple (allocatable) + /// condition registers that can be used to store the results of comparisons + /// for use by selects and conditional branches. With multiple condition + /// registers, the code generator will not aggressively sink comparisons into + /// the blocks of their users. + void setHasMultipleConditionRegisters(bool hasManyRegs = true) { + HasMultipleConditionRegisters = hasManyRegs; + } + /// Tells the code generator not to expand sequence of operations into a /// separate sequences that increases the amount of flow control. void setJumpIsExpensive(bool isExpensive = true) { @@ -1321,6 +1335,13 @@ private: /// the select operations if possible. bool SelectIsExpensive; + /// Tells the code generator that the target has multiple (allocatable) + /// condition registers that can be used to store the results of comparisons + /// for use by selects and conditional branches. With multiple condition + /// registers, the code generator will not aggressively sink comparisons into + /// the blocks of their users. + bool HasMultipleConditionRegisters; + /// Tells the code generator not to expand integer divides by constants into a /// sequence of muls, adds, and shifts. This is a hack until a real cost /// model is in place. If we ever optimize for size, this will be set to true @@ -1685,6 +1706,10 @@ protected: /// Return true if the value types that can be represented by the specified /// register class are all legal. bool isLegalRC(const TargetRegisterClass *RC) const; + + /// Replace/modify any TargetFrameIndex operands with a targte-dependent + /// sequence of memory operands that is recognized by PrologEpilogInserter. + MachineBasicBlock *emitPatchPoint(MachineInstr *MI, MachineBasicBlock *MBB) const; }; /// This class defines information used to lower LLVM code to legal SelectionDAG @@ -2078,6 +2103,18 @@ public: return NULL; } + /// This callback is used to prepare for a volatile or atomic load. + /// It takes a chain node as input and returns the chain for the load itself. + /// + /// Having a callback like this is necessary for targets like SystemZ, + /// which allows a CPU to reuse the result of a previous load indefinitely, + /// even if a cache-coherent store is performed by another CPU. The default + /// implementation does nothing. + virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, + SelectionDAG &DAG) const { + return Chain; + } + /// This callback is invoked by the type legalizer to legalize nodes with an /// illegal operand type but legal result types. It replaces the /// LowerOperation callback in the type Legalizer. The reason we can not do diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetLoweringObjectFile.h index 284b6bbdb897..f389e1966c06 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetLoweringObjectFile.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetLoweringObjectFile.h @@ -34,6 +34,7 @@ namespace llvm { class TargetLoweringObjectFile : public MCObjectFileInfo { MCContext *Ctx; + const DataLayout *DL; TargetLoweringObjectFile( const TargetLoweringObjectFile&) LLVM_DELETED_FUNCTION; @@ -42,7 +43,7 @@ class TargetLoweringObjectFile : public MCObjectFileInfo { public: MCContext &getContext() const { return *Ctx; } - TargetLoweringObjectFile() : MCObjectFileInfo(), Ctx(0) {} + TargetLoweringObjectFile() : MCObjectFileInfo(), Ctx(0), DL(0) {} virtual ~TargetLoweringObjectFile(); @@ -121,6 +122,11 @@ public: /// main label that is the address of the global MCSymbol *getSymbol(Mangler &M, const GlobalValue *GV) const; + /// Return the MCSymbol for a private symbol with global value name as its + /// base, with the specified suffix. + MCSymbol *getSymbolWithGlobalValueBase(Mangler &M, const GlobalValue *GV, + StringRef Suffix) const; + // getCFIPersonalitySymbol - The symbol that gets passed to .cfi_personality. virtual MCSymbol * getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetMachine.h b/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetMachine.h index 11b0f5fb77fc..c27ffdfcd77b 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetMachine.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetMachine.h @@ -88,6 +88,7 @@ protected: // Can only create subclasses. unsigned MCUseLoc : 1; unsigned MCUseCFI : 1; unsigned MCUseDwarfDirectory : 1; + unsigned RequireStructuredCFG : 1; public: virtual ~TargetMachine(); @@ -108,7 +109,7 @@ public: void resetTargetOptions(const MachineFunction *MF) const; // Interfaces to the major aspects of target machine information: - // + // // -- Instruction opcode and operand information // -- Pipelines and scheduling information // -- Stack frame information @@ -156,6 +157,9 @@ public: return 0; } + bool requiresStructuredCFG() const { return RequireStructuredCFG; } + void setRequiresStructuredCFG(bool Value) { RequireStructuredCFG = Value; } + /// hasMCRelaxAll - Check whether all machine code instructions should be /// relaxed. bool hasMCRelaxAll() const { return MCRelaxAll; } diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetRegisterInfo.h b/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetRegisterInfo.h index 958bea6f2b95..5f50e402b9ce 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetRegisterInfo.h +++ b/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetRegisterInfo.h @@ -672,6 +672,17 @@ public: // Do nothing. } + /// Allow the target to reverse allocation order of local live ranges. This + /// will generally allocate shorter local live ranges first. For targets with + /// many registers, this could reduce regalloc compile time by a large + /// factor. It should still achieve optimal coloring; however, it can change + /// register eviction decisions. It is disabled by default for two reasons: + /// (1) Top-down allocation is simpler and easier to debug for targets that + /// don't benefit from reversing the order. + /// (2) Bottom-up allocation could result in poor evicition decisions on some + /// targets affecting the performance of compiled code. + virtual bool reverseLocalAssignment() const { return false; } + /// requiresRegisterScavenging - returns true if the target requires (and can /// make use of) the register scavenger. virtual bool requiresRegisterScavenging(const MachineFunction &MF) const { diff --git a/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetSchedule.td b/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetSchedule.td index 9d4858ac32f2..b4d0c44448ec 100644 --- a/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetSchedule.td +++ b/external/bsd/llvm/dist/llvm/include/llvm/Target/TargetSchedule.td @@ -114,14 +114,46 @@ class ProcResourceKind; // resources implies using one of the super resoruces. // // ProcResourceUnits normally model a few buffered resources within an -// out-of-order engine that the compiler attempts to conserve. -// Buffered resources may be held for multiple clock cycles, but the -// scheduler does not pin them to a particular clock cycle relative to -// instruction dispatch. Setting BufferSize=0 changes this to an -// in-order resource. In this case, the scheduler counts down from the -// cycle that the instruction issues in-order, forcing an interlock -// with subsequent instructions that require the same resource until -// the number of ResourceCyles specified in WriteRes expire. +// out-of-order engine. Buffered resources may be held for multiple +// clock cycles, but the scheduler does not pin them to a particular +// clock cycle relative to instruction dispatch. Setting BufferSize=0 +// changes this to an in-order issue/dispatch resource. In this case, +// the scheduler counts down from the cycle that the instruction +// issues in-order, forcing a stall whenever a subsequent instruction +// requires the same resource until the number of ResourceCyles +// specified in WriteRes expire. Setting BufferSize=1 changes this to +// an in-order latency resource. In this case, the scheduler models +// producer/consumer stalls between instructions that use the +// resource. +// +// Examples (all assume an out-of-order engine): +// +// Use BufferSize = -1 for "issue ports" fed by a unified reservation +// station. Here the size of the reservation station is modeled by +// MicroOpBufferSize, which should be the minimum size of either the +// register rename pool, unified reservation station, or reorder +// buffer. +// +// Use BufferSize = 0 for resources that force "dispatch/issue +// groups". (Different processors define dispath/issue +// differently. Here we refer to stage between decoding into micro-ops +// and moving them into a reservation station.) Normally NumMicroOps +// is sufficient to limit dispatch/issue groups. However, some +// processors can form groups of with only certain combinitions of +// instruction types. e.g. POWER7. +// +// Use BufferSize = 1 for in-order execution units. This is used for +// an in-order pipeline within an out-of-order core where scheduling +// dependent operations back-to-back is guaranteed to cause a +// bubble. e.g. Cortex-a9 floating-point. +// +// Use BufferSize > 1 for out-of-order executions units with a +// separate reservation station. This simply models the size of the +// reservation station. +// +// To model both dispatch/issue groups and in-order execution units, +// create two types of units, one with BufferSize=0 and one with +// BufferSize=1. // // SchedModel ties these units to a processor for any stand-alone defs // of this class. Instances of subclass ProcResource will be automatically diff --git a/external/bsd/llvm/dist/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/external/bsd/llvm/dist/llvm/lib/Analysis/BasicAliasAnalysis.cpp index b2c20110e90e..f1a9dd991c2f 100644 --- a/external/bsd/llvm/dist/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -18,7 +18,10 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" @@ -38,6 +41,12 @@ #include using namespace llvm; +/// Cutoff after which to stop analysing a set of phi nodes potentially involved +/// in a cycle. Because we are analysing 'through' phi nodes we need to be +/// careful with value equivalence. We use reachability to make sure a value +/// cannot be involved in a cycle. +const unsigned MaxNumPhiBBsValueReachabilityCheck = 20; + //===----------------------------------------------------------------------===// // Useful predicates //===----------------------------------------------------------------------===// @@ -403,42 +412,6 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, return V; } -/// GetIndexDifference - Dest and Src are the variable indices from two -/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base -/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic -/// difference between the two pointers. -static void GetIndexDifference(SmallVectorImpl &Dest, - const SmallVectorImpl &Src) { - if (Src.empty()) return; - - for (unsigned i = 0, e = Src.size(); i != e; ++i) { - const Value *V = Src[i].V; - ExtensionKind Extension = Src[i].Extension; - int64_t Scale = Src[i].Scale; - - // Find V in Dest. This is N^2, but pointer indices almost never have more - // than a few variable indexes. - for (unsigned j = 0, e = Dest.size(); j != e; ++j) { - if (Dest[j].V != V || Dest[j].Extension != Extension) continue; - - // If we found it, subtract off Scale V's from the entry in Dest. If it - // goes to zero, remove the entry. - if (Dest[j].Scale != Scale) - Dest[j].Scale -= Scale; - else - Dest.erase(Dest.begin()+j); - Scale = 0; - break; - } - - // If we didn't consume this entry, add it to the end of the Dest list. - if (Scale) { - VariableGEPIndex Entry = { V, Extension, -Scale }; - Dest.push_back(Entry); - } - } -} - //===----------------------------------------------------------------------===// // BasicAliasAnalysis Pass //===----------------------------------------------------------------------===// @@ -492,6 +465,7 @@ namespace { // SmallDenseMap if it ever grows larger. // FIXME: This should really be shrink_to_inline_capacity_and_clear(). AliasCache.shrink_and_clear(); + VisitedPhiBBs.clear(); return Alias; } @@ -532,9 +506,39 @@ namespace { typedef SmallDenseMap AliasCacheTy; AliasCacheTy AliasCache; + /// \brief Track phi nodes we have visited. When interpret "Value" pointer + /// equality as value equality we need to make sure that the "Value" is not + /// part of a cycle. Otherwise, two uses could come from different + /// "iterations" of a cycle and see different values for the same "Value" + /// pointer. + /// The following example shows the problem: + /// %p = phi(%alloca1, %addr2) + /// %l = load %ptr + /// %addr1 = gep, %alloca2, 0, %l + /// %addr2 = gep %alloca2, 0, (%l + 1) + /// alias(%p, %addr1) -> MayAlias ! + /// store %l, ... + SmallPtrSet VisitedPhiBBs; + // Visited - Track instructions visited by pointsToConstantMemory. SmallPtrSet Visited; + /// \brief Check whether two Values can be considered equivalent. + /// + /// In addition to pointer equivalence of \p V1 and \p V2 this checks + /// whether they can not be part of a cycle in the value graph by looking at + /// all visited phi nodes an making sure that the phis cannot reach the + /// value. We have to do this because we are looking through phi nodes (That + /// is we say noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB). + bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2); + + /// \brief Dest and Src are the variable indices from two decomposed + /// GetElementPtr instructions GEP1 and GEP2 which have common base + /// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic + /// difference between the two pointers. + void GetIndexDifference(SmallVectorImpl &Dest, + const SmallVectorImpl &Src); + // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP // instruction against another. AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size, @@ -1094,6 +1098,10 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, const MDNode *PNTBAAInfo, const Value *V2, uint64_t V2Size, const MDNode *V2TBAAInfo) { + // Track phi nodes we have visited. We use this information when we determine + // value equivalence. + VisitedPhiBBs.insert(PN->getParent()); + // If the values are PHIs in the same block, we can do a more precise // as well as efficient check: just check for aliases between the values // on corresponding edges. @@ -1187,7 +1195,13 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, V2 = V2->stripPointerCasts(); // Are we checking for alias of the same value? - if (V1 == V2) return MustAlias; + // Because we look 'through' phi nodes we could look at "Value" pointers from + // different iterations. We must therefore make sure that this is not the + // case. The function isValueEqualInPotentialCycles ensures that this cannot + // happen by looking at the visited phi nodes and making sure they cannot + // reach the value. + if (isValueEqualInPotentialCycles(V1, V2)) + return MustAlias; if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy()) return NoAlias; // Scalars cannot alias each other @@ -1307,3 +1321,71 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, Location(V2, V2Size, V2TBAAInfo)); return AliasCache[Locs] = Result; } + +bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V, + const Value *V2) { + if (V != V2) + return false; + + const Instruction *Inst = dyn_cast(V); + if (!Inst) + return true; + + if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck) + return false; + + // Use dominance or loop info if available. + DominatorTree *DT = getAnalysisIfAvailable(); + LoopInfo *LI = getAnalysisIfAvailable(); + + // Make sure that the visited phis cannot reach the Value. This ensures that + // the Values cannot come from different iterations of a potential cycle the + // phi nodes could be involved in. + for (SmallPtrSet::iterator PI = VisitedPhiBBs.begin(), + PE = VisitedPhiBBs.end(); + PI != PE; ++PI) + if (isPotentiallyReachable((*PI)->begin(), Inst, DT, LI)) + return false; + + return true; +} + +/// GetIndexDifference - Dest and Src are the variable indices from two +/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base +/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic +/// difference between the two pointers. +void BasicAliasAnalysis::GetIndexDifference( + SmallVectorImpl &Dest, + const SmallVectorImpl &Src) { + if (Src.empty()) + return; + + for (unsigned i = 0, e = Src.size(); i != e; ++i) { + const Value *V = Src[i].V; + ExtensionKind Extension = Src[i].Extension; + int64_t Scale = Src[i].Scale; + + // Find V in Dest. This is N^2, but pointer indices almost never have more + // than a few variable indexes. + for (unsigned j = 0, e = Dest.size(); j != e; ++j) { + if (!isValueEqualInPotentialCycles(Dest[j].V, V) || + Dest[j].Extension != Extension) + continue; + + // If we found it, subtract off Scale V's from the entry in Dest. If it + // goes to zero, remove the entry. + if (Dest[j].Scale != Scale) + Dest[j].Scale -= Scale; + else + Dest.erase(Dest.begin() + j); + Scale = 0; + break; + } + + // If we didn't consume this entry, add it to the end of the Dest list. + if (Scale) { + VariableGEPIndex Entry = { V, Extension, -Scale }; + Dest.push_back(Entry); + } + } +} diff --git a/external/bsd/llvm/dist/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Analysis/BlockFrequencyInfo.cpp index 62f3ab16ca7c..0088c2074557 100644 --- a/external/bsd/llvm/dist/llvm/lib/Analysis/BlockFrequencyInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Analysis/BlockFrequencyInfo.cpp @@ -86,7 +86,7 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { OS << Node->getName().str() << ":"; switch (ViewBlockFreqPropagationDAG) { case GVDT_Fraction: - Graph->getBlockFreq(Node).print(OS); + Graph->printBlockFreq(OS, Node); break; case GVDT_Integer: OS << Graph->getBlockFreq(Node).getFrequency(); @@ -159,3 +159,18 @@ void BlockFrequencyInfo::view() const { const Function *BlockFrequencyInfo::getFunction() const { return BFI->Fn; } + +raw_ostream &BlockFrequencyInfo:: +printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const { + return BFI->printBlockFreq(OS, Freq); +} + +raw_ostream & +BlockFrequencyInfo::printBlockFreq(raw_ostream &OS, + const BasicBlock *BB) const { + return BFI->printBlockFreq(OS, BB); +} + +uint64_t BlockFrequencyInfo::getEntryFreq() const { + return BFI->getEntryFreq(); +} diff --git a/external/bsd/llvm/dist/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Analysis/BranchProbabilityInfo.cpp index 86560ca33d0c..15491f072cc8 100644 --- a/external/bsd/llvm/dist/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "branch-prob" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/LoopInfo.h" @@ -483,6 +484,8 @@ void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const { } bool BranchProbabilityInfo::runOnFunction(Function &F) { + DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() + << " ----\n\n"); LastF = &F; // Store the last function we ran on for printing. LI = &getAnalysis(); assert(PostDominatedByUnreachable.empty()); @@ -591,6 +594,13 @@ getEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors) const { return DEFAULT_WEIGHT; } +uint32_t +BranchProbabilityInfo:: +getEdgeWeight(const BasicBlock *Src, succ_const_iterator Dst) const { + size_t index = std::distance(succ_begin(Src), Dst); + return getEdgeWeight(Src, index); +} + /// Get the raw edge weight calculated for the block pair. This returns the sum /// of all raw edge weights from Src to Dst. uint32_t BranchProbabilityInfo:: diff --git a/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/CallGraph.cpp b/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/CallGraph.cpp index f042964c21d9..de8164cc2dbd 100644 --- a/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/CallGraph.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/CallGraph.cpp @@ -16,9 +16,37 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -CallGraph::CallGraph() - : ModulePass(ID), Root(0), ExternalCallingNode(0), CallsExternalNode(0) { - initializeCallGraphPass(*PassRegistry::getPassRegistry()); +//===----------------------------------------------------------------------===// +// Implementations of the CallGraph class methods. +// + +CallGraph::CallGraph(Module &M) + : M(M), Root(0), ExternalCallingNode(getOrInsertFunction(0)), + CallsExternalNode(new CallGraphNode(0)) { + // Add every function to the call graph. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + addToCallGraph(I); + + // If we didn't find a main function, use the external call graph node + if (Root == 0) + Root = ExternalCallingNode; +} + +CallGraph::~CallGraph() { + // CallsExternalNode is not in the function map, delete it explicitly. + CallsExternalNode->allReferencesDropped(); + delete CallsExternalNode; + +// Reset all node's use counts to zero before deleting them to prevent an +// assertion from firing. +#ifndef NDEBUG + for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); + I != E; ++I) + I->second->allReferencesDropped(); +#endif + for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); + I != E; ++I) + delete I->second; } void CallGraph::addToCallGraph(Function *F) { @@ -62,59 +90,7 @@ void CallGraph::addToCallGraph(Function *F) { } } -void CallGraph::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); -} - -bool CallGraph::runOnModule(Module &M) { - Mod = &M; - - ExternalCallingNode = getOrInsertFunction(0); - assert(!CallsExternalNode); - CallsExternalNode = new CallGraphNode(0); - Root = 0; - - // Add every function to the call graph. - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - addToCallGraph(I); - - // If we didn't find a main function, use the external call graph node - if (Root == 0) - Root = ExternalCallingNode; - - return false; -} - -INITIALIZE_PASS(CallGraph, "basiccg", "CallGraph Construction", false, true) - -char CallGraph::ID = 0; - -void CallGraph::releaseMemory() { - /// CallsExternalNode is not in the function map, delete it explicitly. - if (CallsExternalNode) { - CallsExternalNode->allReferencesDropped(); - delete CallsExternalNode; - CallsExternalNode = 0; - } - - if (FunctionMap.empty()) - return; - -// Reset all node's use counts to zero before deleting them to prevent an -// assertion from firing. -#ifndef NDEBUG - for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); - I != E; ++I) - I->second->allReferencesDropped(); -#endif - - for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); - I != E; ++I) - delete I->second; - FunctionMap.clear(); -} - -void CallGraph::print(raw_ostream &OS, const Module*) const { +void CallGraph::print(raw_ostream &OS) const { OS << "CallGraph Root is: "; if (Function *F = Root->getFunction()) OS << F->getName() << "\n"; @@ -125,15 +101,10 @@ void CallGraph::print(raw_ostream &OS, const Module*) const { for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I) I->second->print(OS); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void CallGraph::dump() const { - print(dbgs(), 0); -} -#endif -//===----------------------------------------------------------------------===// -// Implementations of public modification methods -// +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void CallGraph::dump() const { print(dbgs()); } +#endif // removeFunctionFromModule - Unlink the function from this module, returning // it. Because this removes the function from the module, the call graph node @@ -148,7 +119,7 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) { delete CGN; // Delete the call graph node for this func FunctionMap.erase(F); // Remove the call graph node from the map - Mod->getFunctionList().remove(F); + M.getFunctionList().remove(F); return F; } @@ -172,12 +143,17 @@ void CallGraph::spliceFunction(const Function *From, const Function *To) { // not already exist. CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) { CallGraphNode *&CGN = FunctionMap[F]; - if (CGN) return CGN; - - assert((!F || F->getParent() == Mod) && "Function not in current module!"); + if (CGN) + return CGN; + + assert((!F || F->getParent() == &M) && "Function not in current module!"); return CGN = new CallGraphNode(const_cast(F)); } +//===----------------------------------------------------------------------===// +// Implementations of the CallGraphNode class methods. +// + void CallGraphNode::print(raw_ostream &OS) const { if (Function *F = getFunction()) OS << "Call graph node for function: '" << F->getName() << "'"; @@ -260,5 +236,46 @@ void CallGraphNode::replaceCallEdge(CallSite CS, } } +//===----------------------------------------------------------------------===// +// Implementations of the CallGraphWrapperPass class methods. +// + +CallGraphWrapperPass::CallGraphWrapperPass() : ModulePass(ID) { + initializeCallGraphWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +CallGraphWrapperPass::~CallGraphWrapperPass() {} + +void CallGraphWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); +} + +bool CallGraphWrapperPass::runOnModule(Module &M) { + // All the real work is done in the constructor for the CallGraph. + G.reset(new CallGraph(M)); + return false; +} + +INITIALIZE_PASS(CallGraphWrapperPass, "basiccg", "CallGraph Construction", + false, true) + +char CallGraphWrapperPass::ID = 0; + +void CallGraphWrapperPass::releaseMemory() { G.reset(0); } + +void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const { + if (!G) { + OS << "No call graph has been built!\n"; + return; + } + + // Just delegate. + G->print(OS); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void CallGraphWrapperPass::dump() const { print(dbgs(), 0); } +#endif + // Enuse that users of CallGraph.h also link with this file DEFINING_FILE_FOR(CallGraph) diff --git a/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp index 182beca3643e..a3f77b7eb470 100644 --- a/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -60,7 +60,7 @@ public: /// Pass Manager itself does not invalidate any analysis info. void getAnalysisUsage(AnalysisUsage &Info) const { // CGPassManager walks SCC and it needs CallGraph. - Info.addRequired(); + Info.addRequired(); Info.setPreservesAll(); } @@ -424,7 +424,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, /// run - Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the module, and if so, return true. bool CGPassManager::runOnModule(Module &M) { - CallGraph &CG = getAnalysis(); + CallGraph &CG = getAnalysis().getCallGraph(); bool Changed = doInitialization(CG); // Walk the callgraph in bottom-up SCC order. @@ -570,8 +570,8 @@ void CallGraphSCCPass::assignPassManager(PMStack &PMS, /// the call graph. If the derived class implements this method, it should /// always explicitly call the implementation here. void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); } diff --git a/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/CallPrinter.cpp b/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/CallPrinter.cpp index 306ae7a4dbfb..68dcd3c06427 100644 --- a/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/CallPrinter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/CallPrinter.cpp @@ -22,13 +22,10 @@ using namespace llvm; namespace llvm { -template<> -struct DOTGraphTraits : public DefaultDOTGraphTraits { - DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} +template <> struct DOTGraphTraits : public DefaultDOTGraphTraits { + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} - static std::string getGraphName(CallGraph *Graph) { - return "Call graph"; - } + static std::string getGraphName(CallGraph *Graph) { return "Call graph"; } std::string getNodeLabel(CallGraphNode *Node, CallGraph *Graph) { if (Function *Func = Node->getFunction()) @@ -38,49 +35,57 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { } }; +struct AnalysisCallGraphWrapperPassTraits { + static CallGraph *getGraph(CallGraphWrapperPass *P) { + return &P->getCallGraph(); + } +}; + } // end llvm namespace namespace { struct CallGraphViewer - : public DOTGraphTraitsModuleViewer { + : public DOTGraphTraitsModuleViewer { static char ID; CallGraphViewer() - : DOTGraphTraitsModuleViewer("callgraph", ID) { + : DOTGraphTraitsModuleViewer( + "callgraph", ID) { initializeCallGraphViewerPass(*PassRegistry::getPassRegistry()); } }; -struct CallGraphPrinter - : public DOTGraphTraitsModulePrinter { +struct CallGraphPrinter : public DOTGraphTraitsModulePrinter< + CallGraphWrapperPass, true, CallGraph *, + AnalysisCallGraphWrapperPassTraits> { static char ID; CallGraphPrinter() - : DOTGraphTraitsModulePrinter("callgraph", ID) { - initializeCallGraphPrinterPass(*PassRegistry::getPassRegistry()); + : DOTGraphTraitsModulePrinter( + "callgraph", ID) { + initializeCallGraphPrinterPass(*PassRegistry::getPassRegistry()); } }; } // end anonymous namespace char CallGraphViewer::ID = 0; -INITIALIZE_PASS(CallGraphViewer, "view-callgraph", - "View call graph", - false, false) +INITIALIZE_PASS(CallGraphViewer, "view-callgraph", "View call graph", false, + false) char CallGraphPrinter::ID = 0; INITIALIZE_PASS(CallGraphPrinter, "dot-callgraph", - "Print call graph to 'dot' file", - false, false) + "Print call graph to 'dot' file", false, false) // Create methods available outside of this file, to use them // "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by // the link time optimization. -ModulePass *llvm::createCallGraphViewerPass() { - return new CallGraphViewer(); -} +ModulePass *llvm::createCallGraphViewerPass() { return new CallGraphViewer(); } ModulePass *llvm::createCallGraphPrinterPass() { return new CallGraphPrinter(); diff --git a/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/GlobalsModRef.cpp index 7ec46442bf4c..e0723026de72 100644 --- a/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/GlobalsModRef.cpp @@ -95,15 +95,19 @@ namespace { } bool runOnModule(Module &M) { - InitializeAliasAnalysis(this); // set up super class - AnalyzeGlobals(M); // find non-addr taken globals - AnalyzeCallGraph(getAnalysis(), M); // Propagate on CG + InitializeAliasAnalysis(this); + + // Find non-addr taken globals. + AnalyzeGlobals(M); + + // Propagate on CG. + AnalyzeCallGraph(getAnalysis().getCallGraph(), M); return false; } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AliasAnalysis::getAnalysisUsage(AU); - AU.addRequired(); + AU.addRequired(); AU.setPreservesAll(); // Does not transform code } @@ -189,7 +193,7 @@ char GlobalsModRef::ID = 0; INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis, "globalsmodref-aa", "Simple mod/ref analysis for globals", false, true, false) -INITIALIZE_PASS_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis, "globalsmodref-aa", "Simple mod/ref analysis for globals", false, true, false) diff --git a/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/IPA.cpp b/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/IPA.cpp index 47357cf92127..b07271abd42e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/IPA.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/IPA.cpp @@ -19,7 +19,7 @@ using namespace llvm; /// initializeIPA - Initialize all passes linked into the IPA library. void llvm::initializeIPA(PassRegistry &Registry) { - initializeCallGraphPass(Registry); + initializeCallGraphWrapperPassPass(Registry); initializeCallGraphPrinterPass(Registry); initializeCallGraphViewerPass(Registry); initializeFindUsedTypesPass(Registry); diff --git a/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/InlineCost.cpp b/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/InlineCost.cpp index 013691b668b2..453658386d79 100644 --- a/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/InlineCost.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Analysis/IPA/InlineCost.cpp @@ -59,6 +59,8 @@ class CallAnalyzer : public InstVisitor { bool ExposesReturnsTwice; bool HasDynamicAlloca; bool ContainsNoDuplicateCall; + bool HasReturn; + bool HasIndirectBr; /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize; @@ -132,6 +134,12 @@ class CallAnalyzer : public InstVisitor { bool visitExtractValue(ExtractValueInst &I); bool visitInsertValue(InsertValueInst &I); bool visitCallSite(CallSite CS); + bool visitReturnInst(ReturnInst &RI); + bool visitBranchInst(BranchInst &BI); + bool visitSwitchInst(SwitchInst &SI); + bool visitIndirectBrInst(IndirectBrInst &IBI); + bool visitResumeInst(ResumeInst &RI); + bool visitUnreachableInst(UnreachableInst &I); public: CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI, @@ -139,12 +147,13 @@ public: : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), - ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0), - NumVectorInstructions(0), FiftyPercentVectorBonus(0), - TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), - NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), - NumConstantPtrDiffs(0), NumInstructionsSimplified(0), - SROACostSavings(0), SROACostSavingsLost(0) {} + ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), + AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), + FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), + NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), + NumConstantPtrCmps(0), NumConstantPtrDiffs(0), + NumInstructionsSimplified(0), SROACostSavings(0), + SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -704,7 +713,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { } bool CallAnalyzer::visitCallSite(CallSite CS) { - if (CS.isCall() && cast(CS.getInstruction())->canReturnTwice() && + if (CS.hasFnAttr(Attribute::ReturnsTwice) && !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::ReturnsTwice)) { // This aborts the entire analysis. @@ -785,6 +794,60 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { return Base::visitCallSite(CS); } +bool CallAnalyzer::visitReturnInst(ReturnInst &RI) { + // At least one return instruction will be free after inlining. + bool Free = !HasReturn; + HasReturn = true; + return Free; +} + +bool CallAnalyzer::visitBranchInst(BranchInst &BI) { + // We model unconditional branches as essentially free -- they really + // shouldn't exist at all, but handling them makes the behavior of the + // inliner more regular and predictable. Interestingly, conditional branches + // which will fold away are also free. + return BI.isUnconditional() || isa(BI.getCondition()) || + dyn_cast_or_null( + SimplifiedValues.lookup(BI.getCondition())); +} + +bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { + // We model unconditional switches as free, see the comments on handling + // branches. + return isa(SI.getCondition()) || + dyn_cast_or_null( + SimplifiedValues.lookup(SI.getCondition())); +} + +bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) { + // We never want to inline functions that contain an indirectbr. This is + // incorrect because all the blockaddress's (in static global initializers + // for example) would be referring to the original function, and this + // indirect jump would jump from the inlined copy of the function into the + // original function which is extremely undefined behavior. + // FIXME: This logic isn't really right; we can safely inline functions with + // indirectbr's as long as no other function or global references the + // blockaddress of a block within the current function. And as a QOI issue, + // if someone is using a blockaddress without an indirectbr, and that + // reference somehow ends up in another function or global, we probably don't + // want to inline this function. + HasIndirectBr = true; + return false; +} + +bool CallAnalyzer::visitResumeInst(ResumeInst &RI) { + // FIXME: It's not clear that a single instruction is an accurate model for + // the inline cost of a resume instruction. + return false; +} + +bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) { + // FIXME: It might be reasonably to discount the cost of instructions leading + // to unreachable as they have the lowest possible impact on both runtime and + // code size. + return true; // No actual code is needed for unreachable. +} + bool CallAnalyzer::visitInstruction(Instruction &I) { // Some instructions are free. All of the free intrinsics can also be // handled by SROA, etc. @@ -808,8 +871,7 @@ bool CallAnalyzer::visitInstruction(Instruction &I) { /// construct has been detected. It returns false if inlining is no longer /// viable, and true if inlining remains viable. bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { - for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end()); - I != E; ++I) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { ++NumInstructions; if (isa(I) || I->getType()->isVectorTy()) ++NumVectorInstructions; @@ -825,7 +887,8 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { Cost += InlineConstants::InstrCost; // If the visit this instruction detected an uninlinable pattern, abort. - if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || + HasIndirectBr) return false; // If the caller is a recursive function then we don't want to inline @@ -989,10 +1052,6 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { } } - // Track whether we've seen a return instruction. The first return - // instruction is free, as at least one will usually disappear in inlining. - bool HasReturn = false; - // Populate our simplified values by mapping from function arguments to call // arguments with known important simplifications. CallSite::arg_iterator CAI = CS.arg_begin(); @@ -1039,33 +1098,11 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { if (BB->empty()) continue; - // Handle the terminator cost here where we can track returns and other - // function-wide constructs. - TerminatorInst *TI = BB->getTerminator(); - - // We never want to inline functions that contain an indirectbr. This is - // incorrect because all the blockaddress's (in static global initializers - // for example) would be referring to the original function, and this - // indirect jump would jump from the inlined copy of the function into the - // original function which is extremely undefined behavior. - // FIXME: This logic isn't really right; we can safely inline functions - // with indirectbr's as long as no other function or global references the - // blockaddress of a block within the current function. And as a QOI issue, - // if someone is using a blockaddress without an indirectbr, and that - // reference somehow ends up in another function or global, we probably - // don't want to inline this function. - if (isa(TI)) - return false; - - if (!HasReturn && isa(TI)) - HasReturn = true; - else - Cost += InlineConstants::InstrCost; - // Analyze the cost of this block. If we blow through the threshold, this // returns false, and we can bail on out. if (!analyzeBlock(BB)) { - if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || + HasIndirectBr) return false; // If the caller is a recursive function then we don't want to inline @@ -1078,6 +1115,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { break; } + TerminatorInst *TI = BB->getTerminator(); + // Add in the live successors by first checking whether we have terminator // that may be simplified based on the values simplified by this call. if (BranchInst *BI = dyn_cast(TI)) { @@ -1115,7 +1154,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { } } - // If this is a noduplicate call, we can still inline as long as + // If this is a noduplicate call, we can still inline as long as // inlining this would cause the removal of the caller (so the instruction // is not actually duplicated, just moved). if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) diff --git a/external/bsd/llvm/dist/llvm/lib/Analysis/MemDepPrinter.cpp b/external/bsd/llvm/dist/llvm/lib/Analysis/MemDepPrinter.cpp index d26aaf1b9048..d4f023589fc4 100644 --- a/external/bsd/llvm/dist/llvm/lib/Analysis/MemDepPrinter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Analysis/MemDepPrinter.cpp @@ -68,7 +68,7 @@ namespace { return InstTypePair(dep.getInst(), Def); if (dep.isNonFuncLocal()) return InstTypePair(dep.getInst(), NonFuncLocal); - assert(dep.isUnknown() && "unexptected dependence type"); + assert(dep.isUnknown() && "unexpected dependence type"); return InstTypePair(dep.getInst(), Unknown); } static InstTypePair getInstTypePair(const Instruction* inst, DepType type) { diff --git a/external/bsd/llvm/dist/llvm/lib/Analysis/MemoryBuiltins.cpp b/external/bsd/llvm/dist/llvm/lib/Analysis/MemoryBuiltins.cpp index 1db0f634c941..37e2e271ceea 100644 --- a/external/bsd/llvm/dist/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -399,12 +399,14 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *DL, LLVMContext &Context, bool RoundToAlign) : DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) { - IntegerType *IntTy = DL->getIntPtrType(Context); - IntTyBits = IntTy->getBitWidth(); - Zero = APInt::getNullValue(IntTyBits); + // Pointer size must be rechecked for each object visited since it could have + // a different address space. } SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { + IntTyBits = DL->getPointerTypeSizeInBits(V->getType()); + Zero = APInt::getNullValue(IntTyBits); + V = V->stripPointerCasts(); if (Instruction *I = dyn_cast(V)) { // If we have already seen this instruction, bail out. Cycles can happen in @@ -592,11 +594,15 @@ ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *DL, bool RoundToAlign) : DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)), RoundToAlign(RoundToAlign) { - IntTy = DL->getIntPtrType(Context); - Zero = ConstantInt::get(IntTy, 0); + // IntTy and Zero must be set for each compute() since the address space may + // be different for later objects. } SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) { + // XXX - Are vectors of pointers possible here? + IntTy = cast(DL->getIntPtrType(V->getType())); + Zero = ConstantInt::get(IntTy, 0); + SizeOffsetEvalType Result = compute_(V); if (!bothKnown(Result)) { diff --git a/external/bsd/llvm/dist/llvm/lib/Analysis/PHITransAddr.cpp b/external/bsd/llvm/dist/llvm/lib/Analysis/PHITransAddr.cpp index e6af0663feaa..6c85d1195f27 100644 --- a/external/bsd/llvm/dist/llvm/lib/Analysis/PHITransAddr.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Analysis/PHITransAddr.cpp @@ -72,7 +72,7 @@ static bool VerifySubExpr(Value *Expr, // If it isn't in the InstInputs list it is a subexpr incorporated into the // address. Sanity check that it is phi translatable. if (!CanPHITrans(I)) { - errs() << "Non phi translatable instruction found in PHITransAddr:\n"; + errs() << "Instruction in PHITransAddr is not phi-translatable:\n"; errs() << *I << '\n'; llvm_unreachable("Either something is missing from InstInputs or " "CanPHITrans is wrong."); diff --git a/external/bsd/llvm/dist/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/external/bsd/llvm/dist/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index 86a557b55f7e..7a9efdaa4c24 100644 --- a/external/bsd/llvm/dist/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" @@ -1528,7 +1529,7 @@ Value *SCEVExpander::expand(const SCEV *S) { // // This is independent of PostIncLoops. The mapped value simply materializes // the expression at this insertion point. If the mapped value happened to be - // a postinc expansion, it could be reused by a non postinc user, but only if + // a postinc expansion, it could be reused by a non-postinc user, but only if // its insertion point was already at the head of the loop. InsertedExpressions[std::make_pair(S, InsertPt)] = V; return V; diff --git a/external/bsd/llvm/dist/llvm/lib/Analysis/ValueTracking.cpp b/external/bsd/llvm/dist/llvm/lib/Analysis/ValueTracking.cpp index e39ee628ff0c..803051d0bb2d 100644 --- a/external/bsd/llvm/dist/llvm/lib/Analysis/ValueTracking.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Analysis/ValueTracking.cpp @@ -2006,7 +2006,9 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, } case Instruction::Load: { const LoadInst *LI = cast(Inst); - if (!LI->isUnordered()) + if (!LI->isUnordered() || + // Speculative load may create a race that did not exist in the source. + LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) return false; return LI->getPointerOperand()->isDereferenceablePointer(); } diff --git a/external/bsd/llvm/dist/llvm/lib/AsmParser/LLLexer.cpp b/external/bsd/llvm/dist/llvm/lib/AsmParser/LLLexer.cpp index 1e6085b443f2..1b32047e4bf6 100644 --- a/external/bsd/llvm/dist/llvm/lib/AsmParser/LLLexer.cpp +++ b/external/bsd/llvm/dist/llvm/lib/AsmParser/LLLexer.cpp @@ -275,6 +275,10 @@ lltok::Kind LLLexer::LexAt() { if (CurChar == '"') { StrVal.assign(TokStart+2, CurPtr-1); UnEscapeLexed(StrVal); + if (StringRef(StrVal).find_first_of(0) != StringRef::npos) { + Error("Null bytes are not allowed in names"); + return lltok::Error; + } return lltok::GlobalVar; } } @@ -568,6 +572,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(alwaysinline); KEYWORD(builtin); KEYWORD(byval); + KEYWORD(inalloca); KEYWORD(cold); KEYWORD(inlinehint); KEYWORD(inreg); diff --git a/external/bsd/llvm/dist/llvm/lib/AsmParser/LLParser.cpp b/external/bsd/llvm/dist/llvm/lib/AsmParser/LLParser.cpp index 20fa0f4eab1e..a5b2aa586e46 100644 --- a/external/bsd/llvm/dist/llvm/lib/AsmParser/LLParser.cpp +++ b/external/bsd/llvm/dist/llvm/lib/AsmParser/LLParser.cpp @@ -182,6 +182,8 @@ bool LLParser::ValidateEndOfModule() { for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ) UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove + UpgradeDebugInfo(*M); + return false; } @@ -942,6 +944,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, "invalid use of attribute on a function"); break; case lltok::kw_byval: + case lltok::kw_inalloca: case lltok::kw_nest: case lltok::kw_noalias: case lltok::kw_nocapture: @@ -1154,6 +1157,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { continue; } case lltok::kw_byval: B.addAttribute(Attribute::ByVal); break; + case lltok::kw_inalloca: B.addAttribute(Attribute::InAlloca); break; case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break; case lltok::kw_nest: B.addAttribute(Attribute::Nest); break; case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break; @@ -1216,6 +1220,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { // Error handling. case lltok::kw_align: case lltok::kw_byval: + case lltok::kw_inalloca: case lltok::kw_nest: case lltok::kw_nocapture: case lltok::kw_returned: diff --git a/external/bsd/llvm/dist/llvm/lib/AsmParser/LLToken.h b/external/bsd/llvm/dist/llvm/lib/AsmParser/LLToken.h index 786d84d76634..5a6866dd1498 100644 --- a/external/bsd/llvm/dist/llvm/lib/AsmParser/LLToken.h +++ b/external/bsd/llvm/dist/llvm/lib/AsmParser/LLToken.h @@ -99,6 +99,7 @@ namespace lltok { kw_sanitize_address, kw_builtin, kw_byval, + kw_inalloca, kw_cold, kw_inlinehint, kw_inreg, diff --git a/external/bsd/llvm/dist/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/external/bsd/llvm/dist/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 2c95c920f840..37515eb6f285 100644 --- a/external/bsd/llvm/dist/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -522,6 +522,8 @@ static Attribute::AttrKind GetAttrFromCode(uint64_t Code) { return Attribute::Builtin; case bitc::ATTR_KIND_BY_VAL: return Attribute::ByVal; + case bitc::ATTR_KIND_IN_ALLOCA: + return Attribute::InAlloca; case bitc::ATTR_KIND_COLD: return Attribute::Cold; case bitc::ATTR_KIND_INLINE_HINT: @@ -3152,6 +3154,7 @@ error_code BitcodeReader::MaterializeModule(Module *M) { for (unsigned I = 0, E = InstsWithTBAATag.size(); I < E; I++) UpgradeInstWithTBAATag(InstsWithTBAATag[I]); + UpgradeDebugInfo(*M); return error_code::success(); } diff --git a/external/bsd/llvm/dist/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/external/bsd/llvm/dist/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 4cfc6bde76ed..be19b781d4aa 100644 --- a/external/bsd/llvm/dist/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -169,6 +169,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_BUILTIN; case Attribute::ByVal: return bitc::ATTR_KIND_BY_VAL; + case Attribute::InAlloca: + return bitc::ATTR_KIND_IN_ALLOCA; case Attribute::Cold: return bitc::ATTR_KIND_COLD; case Attribute::InlineHint: @@ -382,7 +384,6 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { unsigned Code = 0; switch (T->getTypeID()) { - default: llvm_unreachable("Unknown type!"); case Type::VoidTyID: Code = bitc::TYPE_CODE_VOID; break; case Type::HalfTyID: Code = bitc::TYPE_CODE_HALF; break; case Type::FloatTyID: Code = bitc::TYPE_CODE_FLOAT; break; diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AllocationOrder.h b/external/bsd/llvm/dist/llvm/lib/CodeGen/AllocationOrder.h index aed461a7ed02..64ff2a7ce836 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/AllocationOrder.h +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AllocationOrder.h @@ -45,10 +45,12 @@ public: /// Return the next physical register in the allocation order, or 0. /// It is safe to call next() again after it returned 0, it will keep /// returning 0 until rewind() is called. - unsigned next() { + unsigned next(unsigned Limit = 0) { if (Pos < 0) return Hints.end()[Pos++]; - while (Pos < int(Order.size())) { + if (!Limit) + Limit = Order.size(); + while (Pos < int(Limit)) { unsigned Reg = Order[Pos++]; if (!isHint(Reg)) return Reg; diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index 5d82dd9e9a7e..247867ff6be8 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -52,21 +52,21 @@ ARMTargetStreamer &ARMException::getTargetStreamer() { return static_cast(TS); } -void ARMException::EndModule() { +void ARMException::endModule() { } -/// BeginFunction - Gather pre-function exception information. Assumes it's +/// beginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. -void ARMException::BeginFunction(const MachineFunction *MF) { +void ARMException::beginFunction(const MachineFunction *MF) { getTargetStreamer().emitFnStart(); if (Asm->MF->getFunction()->needsUnwindTableEntry()) Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber())); } -/// EndFunction - Gather and emit post-function exception information. +/// endFunction - Gather and emit post-function exception information. /// -void ARMException::EndFunction() { +void ARMException::endFunction(const MachineFunction *) { ARMTargetStreamer &ATS = getTargetStreamer(); if (!Asm->MF->getFunction()->needsUnwindTableEntry()) ATS.emitCantUnwind(); diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 308b0e091ac5..6028318dcef4 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -99,14 +99,14 @@ AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) OutContext(Streamer.getContext()), OutStreamer(Streamer), LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) { - DD = 0; DE = 0; MMI = 0; LI = 0; MF = 0; + DD = 0; MMI = 0; LI = 0; MF = 0; CurrentFnSym = CurrentFnSymForSize = 0; GCMetadataPrinters = 0; VerboseAsm = Streamer.isVerboseAsm(); } AsmPrinter::~AsmPrinter() { - assert(DD == 0 && DE == 0 && "Debug/EH info didn't get finalized"); + assert(DD == 0 && Handlers.empty() && "Debug/EH info didn't get finalized"); if (GCMetadataPrinters != 0) { gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); @@ -165,7 +165,7 @@ bool AsmPrinter::doInitialization(Module &M) { OutStreamer.InitStreamer(); - Mang = new Mangler(&TM); + Mang = new Mangler(TM.getDataLayout()); // Allow the target to emit any magic that it wants at the start of the file. EmitStartOfAsmFile(M); @@ -192,25 +192,29 @@ bool AsmPrinter::doInitialization(Module &M) { OutStreamer.AddBlankLine(); } - if (MAI->doesSupportDebugInformation()) + if (MAI->doesSupportDebugInformation()) { DD = new DwarfDebug(this, &M); + Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName)); + } + DwarfException *DE = 0; switch (MAI->getExceptionHandlingType()) { case ExceptionHandling::None: - return false; + break; case ExceptionHandling::SjLj: case ExceptionHandling::DwarfCFI: DE = new DwarfCFIException(this); - return false; + break; case ExceptionHandling::ARM: DE = new ARMException(this); - return false; + break; case ExceptionHandling::Win64: DE = new Win64Exception(this); - return false; + break; } - - llvm_unreachable("Unknown exception type."); + if (DE) + Handlers.push_back(HandlerInfo(DE, EHTimerName, DWARFGroupName)); + return false; } void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { @@ -222,13 +226,14 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::LinkerPrivateWeakLinkage: - if (MAI->getWeakDefDirective() != 0) { + if (MAI->hasWeakDefDirective()) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); bool CanBeHidden = false; - if (Linkage == GlobalValue::LinkOnceODRLinkage) { + if (Linkage == GlobalValue::LinkOnceODRLinkage && + MAI->hasWeakDefCanBeHiddenDirective()) { if (GV->hasUnnamedAddr()) { CanBeHidden = true; } else { @@ -243,7 +248,7 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); else OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate); - } else if (MAI->getLinkOnceDirective() != 0) { + } else if (MAI->hasLinkOnceDirective()) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); //NOTE: linkonce is handled by the section the symbol was assigned to. @@ -311,8 +316,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // sections and expected to be contiguous (e.g. ObjC metadata). unsigned AlignLog = getGVAlignmentLog2(GV, *DL); - if (DD) - DD->setSymbolSize(GVSym, Size); + for (unsigned I = 0, E = Handlers.size(); I != E; ++I) { + const HandlerInfo &OI = Handlers[I]; + NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, TimePassesIsEnabled); + OI.Handler->setSymbolSize(GVSym, Size); + } // Handle common and BSS local symbols (.lcomm). if (GVKind.isCommon() || GVKind.isBSSLocal()) { @@ -482,13 +490,10 @@ void AsmPrinter::EmitFunctionHeader() { } // Emit pre-function debug and/or EH information. - if (DE) { - NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); - DE->BeginFunction(MF); - } - if (DD) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); - DD->beginFunction(MF); + for (unsigned I = 0, E = Handlers.size(); I != E; ++I) { + const HandlerInfo &OI = Handlers[I]; + NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, TimePassesIsEnabled); + OI.Handler->beginFunction(MF); } // Emit the prefix data. @@ -578,8 +583,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { raw_svector_ostream OS(Str); OS << '\t' << AP.MAI->getCommentString() << "DEBUG_VALUE: "; - // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast(MI->getOperand(2).getMetadata())); + DIVariable V(MI->getOperand(2).getMetadata()); if (V.getContext().isSubprogram()) { StringRef Name = DISubprogram(V.getContext()).getDisplayName(); if (!Name.empty()) @@ -657,10 +661,6 @@ bool AsmPrinter::needsSEHMoves() { MF->getFunction()->needsUnwindTableEntry(); } -bool AsmPrinter::needsRelocationsForDwarfStringPool() const { - return MAI->doesDwarfUseRelocationsAcrossSections(); -} - void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { const MCSymbol *Label = MI.getOperand(0).getMCSymbol(); @@ -693,7 +693,7 @@ void AsmPrinter::EmitFunctionBody() { // Emit target-specific gunk before the function body. EmitFunctionBodyStart(); - bool ShouldPrintDebugScopes = DD && MMI->hasDebugInfo(); + bool ShouldPrintDebugScopes = MMI->hasDebugInfo(); // Print out code for the function. bool HasAnyRealCode = false; @@ -714,8 +714,12 @@ void AsmPrinter::EmitFunctionBody() { } if (ShouldPrintDebugScopes) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); - DD->beginInstruction(II); + for (unsigned III = 0, EEE = Handlers.size(); III != EEE; ++III) { + const HandlerInfo &OI = Handlers[III]; + NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, + TimePassesIsEnabled); + OI.Handler->beginInstruction(II); + } } if (isVerbose()) @@ -754,8 +758,12 @@ void AsmPrinter::EmitFunctionBody() { } if (ShouldPrintDebugScopes) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); - DD->endInstruction(II); + for (unsigned III = 0, EEE = Handlers.size(); III != EEE; ++III) { + const HandlerInfo &OI = Handlers[III]; + NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, + TimePassesIsEnabled); + OI.Handler->endInstruction(); + } } } } @@ -811,14 +819,11 @@ void AsmPrinter::EmitFunctionBody() { OutStreamer.EmitELFSize(CurrentFnSym, SizeExp); } - // Emit post-function debug information. - if (DD) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); - DD->endFunction(MF); - } - if (DE) { - NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); - DE->EndFunction(); + // Emit post-function debug and/or EH information. + for (unsigned I = 0, E = Handlers.size(); I != E; ++I) { + const HandlerInfo &OI = Handlers[I]; + NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, TimePassesIsEnabled); + OI.Handler->endFunction(MF); } MMI->EndFunction(); @@ -907,20 +912,15 @@ bool AsmPrinter::doFinalization(Module &M) { OutStreamer.Flush(); // Finalize debug and EH information. - if (DE) { - { - NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); - DE->EndModule(); - } - delete DE; DE = 0; - } - if (DD) { - { - NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); - DD->endModule(); - } - delete DD; DD = 0; + for (unsigned I = 0, E = Handlers.size(); I != E; ++I) { + const HandlerInfo &OI = Handlers[I]; + NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, + TimePassesIsEnabled); + OI.Handler->endModule(); + delete OI.Handler; } + Handlers.clear(); + DD = 0; // If the target wants to know about weak references, print them all. if (MAI->getWeakRefDirective()) { @@ -1106,6 +1106,7 @@ void AsmPrinter::EmitConstantPool() { /// by the current function to the current output stream. /// void AsmPrinter::EmitJumpTableInfo() { + const DataLayout *DL = MF->getTarget().getDataLayout(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); if (MJTI == 0) return; if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return; @@ -1171,7 +1172,7 @@ void AsmPrinter::EmitJumpTableInfo() { // before each jump table. The first label is never referenced, but tells // the assembler and linker the extents of the jump table object. The // second label is actually referenced by the code. - if (JTInDiffSection && MAI->getLinkerPrivateGlobalPrefix()[0]) + if (JTInDiffSection && DL->hasLinkerPrivateGlobalPrefix()) // FIXME: This doesn't have to have any specific name, just any randomly // named and numbered 'l' label would work. Simplify GetJTISymbol. OutStreamer.EmitLabel(GetJTISymbol(JTI, true)); @@ -1422,8 +1423,8 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, /// where the size in bytes of the directive is specified by Size and Hi/Lo /// specify the labels. This implicitly uses .set if it is available. void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, - const MCSymbol *Lo, unsigned Size) - const { + const MCSymbol *Lo, + unsigned Size) const { // Emit Hi+Offset - Lo // Get the Hi+Offset expression. @@ -1452,8 +1453,8 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, /// where the size in bytes of the directive is specified by Size and Label /// specifies the label. This implicitly uses .set if it is available. void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, - unsigned Size, bool IsSectionRelative) - const { + unsigned Size, + bool IsSectionRelative) const { if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) { OutStreamer.EmitCOFFSecRel32(Label); return; @@ -1462,14 +1463,12 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, // Emit Label+Offset (or just Label if Offset is zero) const MCExpr *Expr = MCSymbolRefExpr::Create(Label, OutContext); if (Offset) - Expr = MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(Offset, OutContext), - OutContext); + Expr = MCBinaryExpr::CreateAdd( + Expr, MCConstantExpr::Create(Offset, OutContext), OutContext); OutStreamer.EmitValue(Expr, Size); } - //===----------------------------------------------------------------------===// // EmitAlignment - Emit an alignment directive to the specified power of @@ -1995,14 +1994,16 @@ void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const { /// GetTempSymbol - Return the MCSymbol corresponding to the assembler /// temporary label with the specified stem and unique ID. MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name, unsigned ID) const { - return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + + const DataLayout *DL = TM.getDataLayout(); + return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) + Name + Twine(ID)); } /// GetTempSymbol - Return an assembler temporary label with the specified /// stem. MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name) const { - return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix())+ + const DataLayout *DL = TM.getDataLayout(); + return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+ Name); } @@ -2017,8 +2018,9 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const { /// GetCPISymbol - Return the symbol for the specified constant pool entry. MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { + const DataLayout *DL = TM.getDataLayout(); return OutContext.GetOrCreateSymbol - (Twine(MAI->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber()) + (Twine(DL->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber()) + "_" + Twine(CPID)); } @@ -2030,21 +2032,15 @@ MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const { /// GetJTSetSymbol - Return the symbol for the specified jump table .set /// FIXME: privatize to AsmPrinter. MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const { + const DataLayout *DL = TM.getDataLayout(); return OutContext.GetOrCreateSymbol - (Twine(MAI->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" + + (Twine(DL->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" + Twine(UID) + "_set_" + Twine(MBBID)); } -/// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with -/// global value name as its base, with the specified suffix, and where the -/// symbol is forced to have private linkage if ForcePrivate is true. -MCSymbol *AsmPrinter::GetSymbolWithGlobalValueBase(const GlobalValue *GV, - StringRef Suffix, - bool ForcePrivate) const { - SmallString<60> NameStr; - Mang->getNameWithPrefix(NameStr, GV, ForcePrivate); - NameStr.append(Suffix.begin(), Suffix.end()); - return OutContext.GetOrCreateSymbol(NameStr.str()); +MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV, + StringRef Suffix) const { + return getObjFileLowering().getSymbolWithGlobalValueBase(*Mang, GV, Suffix); } /// GetExternalSymbolSymbol - Return the MCSymbol for the specified @@ -2261,3 +2257,6 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name)); } + +/// Pin vtable to this file. +AsmPrinterHandler::~AsmPrinterHandler() {} diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index b92f49cfaed8..2dfa98cc6236 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -52,9 +52,9 @@ void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, /// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value. void AsmPrinter::EmitCFAByte(unsigned Val) const { if (isVerbose()) { - if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset+64) + if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset + 64) OutStreamer.AddComment("DW_CFA_offset + Reg (" + - Twine(Val-dwarf::DW_CFA_offset) + ")"); + Twine(Val - dwarf::DW_CFA_offset) + ")"); else OutStreamer.AddComment(dwarf::CallFrameString(Val)); } @@ -63,43 +63,56 @@ void AsmPrinter::EmitCFAByte(unsigned Val) const { static const char *DecodeDWARFEncoding(unsigned Encoding) { switch (Encoding) { - case dwarf::DW_EH_PE_absptr: return "absptr"; - case dwarf::DW_EH_PE_omit: return "omit"; - case dwarf::DW_EH_PE_pcrel: return "pcrel"; - case dwarf::DW_EH_PE_udata4: return "udata4"; - case dwarf::DW_EH_PE_udata8: return "udata8"; - case dwarf::DW_EH_PE_sdata4: return "sdata4"; - case dwarf::DW_EH_PE_sdata8: return "sdata8"; - case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: return "pcrel udata4"; - case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: return "pcrel sdata4"; - case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: return "pcrel udata8"; - case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: return "pcrel sdata8"; - case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4: + case dwarf::DW_EH_PE_absptr: + return "absptr"; + case dwarf::DW_EH_PE_omit: + return "omit"; + case dwarf::DW_EH_PE_pcrel: + return "pcrel"; + case dwarf::DW_EH_PE_udata4: + return "udata4"; + case dwarf::DW_EH_PE_udata8: + return "udata8"; + case dwarf::DW_EH_PE_sdata4: + return "sdata4"; + case dwarf::DW_EH_PE_sdata8: + return "sdata8"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: + return "pcrel udata4"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: + return "pcrel sdata4"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: + return "pcrel udata8"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: + return "pcrel sdata8"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4 + : return "indirect pcrel udata4"; - case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4: + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 + : return "indirect pcrel sdata4"; - case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8: + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8 + : return "indirect pcrel udata8"; - case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8: + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8 + : return "indirect pcrel sdata8"; } return ""; } - /// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an /// encoding. If verbose assembly output is enabled, we output comments /// describing the encoding. Desc is an optional string saying what the /// encoding is specifying (e.g. "LSDA"). void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const { if (isVerbose()) { - if (Desc != 0) - OutStreamer.AddComment(Twine(Desc)+" Encoding = " + + if (Desc) + OutStreamer.AddComment(Twine(Desc) + " Encoding = " + Twine(DecodeDWARFEncoding(Val))); else - OutStreamer.AddComment(Twine("Encoding = ") + - DecodeDWARFEncoding(Val)); + OutStreamer.AddComment(Twine("Encoding = ") + DecodeDWARFEncoding(Val)); } OutStreamer.EmitIntValue(Val, 1); @@ -111,11 +124,16 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { return 0; switch (Encoding & 0x07) { - default: llvm_unreachable("Invalid encoded value."); - case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize(); - case dwarf::DW_EH_PE_udata2: return 2; - case dwarf::DW_EH_PE_udata4: return 4; - case dwarf::DW_EH_PE_udata8: return 8; + default: + llvm_unreachable("Invalid encoded value."); + case dwarf::DW_EH_PE_absptr: + return TM.getDataLayout()->getPointerSize(); + case dwarf::DW_EH_PE_udata2: + return 2; + case dwarf::DW_EH_PE_udata4: + return 4; + case dwarf::DW_EH_PE_udata8: + return 8; } } @@ -125,7 +143,7 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV, const TargetLoweringObjectFile &TLOF = getObjFileLowering(); const MCExpr *Exp = - TLOF.getTTypeGlobalReference(GV, Mang, MMI, Encoding, OutStreamer); + TLOF.getTTypeGlobalReference(GV, Mang, MMI, Encoding, OutStreamer); OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding)); } else OutStreamer.EmitIntValue(0, GetSizeOfEncodedValue(Encoding)); diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h new file mode 100644 index 000000000000..2825367abd8f --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h @@ -0,0 +1,57 @@ +//===-- lib/CodeGen/AsmPrinter/AsmPrinterHandler.h -------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a generic interface for AsmPrinter handlers, +// like debug and EH info emitters. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H__ +#define CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H__ + +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +class MachineFunction; +class MachineInstr; +class MCSymbol; + +/// \brief Collects and handles AsmPrinter objects required to build debug +/// or EH information. +class AsmPrinterHandler { +public: + virtual ~AsmPrinterHandler(); + + /// \brief For symbols that have a size designated (e.g. common symbols), + /// this tracks that size. + virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) = 0; + + /// \brief Emit all sections that should come after the content. + virtual void endModule() = 0; + + /// \brief Gather pre-function debug information. + /// Every beginFunction(MF) call should be followed by an endFunction(MF) + /// call. + virtual void beginFunction(const MachineFunction *MF) = 0; + + /// \brief Gather post-function debug information. + /// Please note that some AsmPrinter implementations may not call + /// beginFunction at all. + virtual void endFunction(const MachineFunction *MF) = 0; + + /// \brief Process beginning of an instruction. + virtual void beginInstruction(const MachineInstr *MI) = 0; + + /// \brief Process end of an instruction. + virtual void endInstruction() = 0; +}; +} // End of namespace llvm + +#endif diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 4f927f6ba80c..f60cbfddb47c 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" @@ -491,8 +492,9 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { /// for their own strange codes. void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, const char *Code) const { + const DataLayout *DL = TM.getDataLayout(); if (!strcmp(Code, "private")) { - OS << MAI->getPrivateGlobalPrefix(); + OS << DL->getPrivateGlobalPrefix(); } else if (!strcmp(Code, "comment")) { OS << MAI->getCommentString(); } else if (!strcmp(Code, "uid")) { diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt index be484a6fc9d9..2ad24e1af992 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -7,9 +7,9 @@ add_llvm_library(LLVMAsmPrinter DIEHash.cpp DwarfAccelTable.cpp DwarfCFIException.cpp - DwarfCompileUnit.cpp DwarfDebug.cpp DwarfException.cpp + DwarfUnit.cpp ErlangGCPrinter.cpp OcamlGCPrinter.cpp Win64Exception.cpp diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 69444285f4ec..e973541ee5be 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -13,6 +13,7 @@ #include "DIE.h" #include "DwarfDebug.h" +#include "DwarfUnit.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/IR/DataLayout.h" @@ -112,27 +113,28 @@ DIE::~DIE() { delete Children[i]; } -/// Climb up the parent chain to get the compile unit DIE to which this DIE +/// Climb up the parent chain to get the unit DIE to which this DIE /// belongs. -const DIE *DIE::getCompileUnit() const { - const DIE *Cu = getCompileUnitOrNull(); +const DIE *DIE::getUnit() const { + const DIE *Cu = getUnitOrNull(); assert(Cu && "We should not have orphaned DIEs."); return Cu; } -/// Climb up the parent chain to get the compile unit DIE this DIE belongs +/// Climb up the parent chain to get the unit DIE this DIE belongs /// to. Return NULL if DIE is not added to an owner yet. -const DIE *DIE::getCompileUnitOrNull() const { +const DIE *DIE::getUnitOrNull() const { const DIE *p = this; while (p) { - if (p->getTag() == dwarf::DW_TAG_compile_unit) + if (p->getTag() == dwarf::DW_TAG_compile_unit || + p->getTag() == dwarf::DW_TAG_type_unit) return p; p = p->getParent(); } return NULL; } -DIEValue *DIE::findAttribute(uint16_t Attribute) { +DIEValue *DIE::findAttribute(uint16_t Attribute) const { const SmallVectorImpl &Values = getValues(); const DIEAbbrev &Abbrevs = getAbbrev(); @@ -227,6 +229,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_ref4: // Fall thru case dwarf::DW_FORM_data4: Size = 4; break; case dwarf::DW_FORM_ref8: // Fall thru + case dwarf::DW_FORM_ref_sig8: // Fall thru case dwarf::DW_FORM_data8: Size = 8; break; case dwarf::DW_FORM_GNU_str_index: Asm->EmitULEB128(Integer); return; case dwarf::DW_FORM_GNU_addr_index: Asm->EmitULEB128(Integer); return; @@ -253,6 +256,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_ref4: // Fall thru case dwarf::DW_FORM_data4: return sizeof(int32_t); case dwarf::DW_FORM_ref8: // Fall thru + case dwarf::DW_FORM_ref_sig8: // Fall thru case dwarf::DW_FORM_data8: return sizeof(int64_t); case dwarf::DW_FORM_GNU_str_index: return MCAsmInfo::getULEB128Size(Integer); case dwarf::DW_FORM_GNU_addr_index: return MCAsmInfo::getULEB128Size(Integer); @@ -338,6 +342,7 @@ void DIEDelta::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { /// unsigned DIEDelta::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; return AP->getDataLayout().getPointerSize(); } @@ -386,7 +391,9 @@ unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) { // specified to be four bytes in the DWARF 32-bit format and eight bytes // in the DWARF 64-bit format, while DWARF Version 2 specifies that such // references have the same size as an address on the target system. - if (AP->getDwarfDebug()->getDwarfVersion() == 2) + const DwarfDebug *DD = AP->getDwarfDebug(); + assert(DD && "Expected Dwarf Debug info to be available"); + if (DD->getDwarfVersion() == 2) return AP->getDataLayout().getPointerSize(); return sizeof(int32_t); } @@ -397,6 +404,22 @@ void DIEEntry::print(raw_ostream &O) const { } #endif +//===----------------------------------------------------------------------===// +// DIETypeSignature Implementation +//===----------------------------------------------------------------------===// +void DIETypeSignature::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { + assert(Form == dwarf::DW_FORM_ref_sig8); + Asm->OutStreamer.EmitIntValue(Unit.getTypeSignature(), 8); +} + +#ifndef NDEBUG +void DIETypeSignature::print(raw_ostream &O) const { + O << format("Type Unit: 0x%lx", Unit.getTypeSignature()); +} + +void DIETypeSignature::dump() const { print(dbgs()); } +#endif + //===----------------------------------------------------------------------===// // DIEBlock Implementation //===----------------------------------------------------------------------===// diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DIE.h b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DIE.h index f4fa326ef67a..1655c8f1b209 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DIE.h +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DIE.h @@ -26,6 +26,7 @@ namespace llvm { class MCSymbol; class MCSymbolRefExpr; class raw_ostream; + class DwarfTypeUnit; //===--------------------------------------------------------------------===// /// DIEAbbrevData - Dwarf abbreviation data, describes one attribute of a @@ -134,7 +135,7 @@ namespace llvm { explicit DIE(unsigned Tag) : Offset(0), Size(0), Abbrev((dwarf::Tag)Tag, dwarf::DW_CHILDREN_no), Parent(0) {} - virtual ~DIE(); + ~DIE(); // Accessors. DIEAbbrev &getAbbrev() { return Abbrev; } @@ -146,12 +147,12 @@ namespace llvm { const std::vector &getChildren() const { return Children; } const SmallVectorImpl &getValues() const { return Values; } DIE *getParent() const { return Parent; } - /// Climb up the parent chain to get the compile unit DIE this DIE belongs - /// to. - const DIE *getCompileUnit() const; - /// Similar to getCompileUnit, returns null when DIE is not added to an + /// Climb up the parent chain to get the compile or type unit DIE this DIE + /// belongs to. + const DIE *getUnit() const; + /// Similar to getUnit, returns null when DIE is not added to an /// owner yet. - const DIE *getCompileUnitOrNull() const; + const DIE *getUnitOrNull() const; void setOffset(unsigned O) { Offset = O; } void setSize(unsigned S) { Size = S; } @@ -172,9 +173,9 @@ namespace llvm { Child->Parent = this; } - /// findAttribute - Find a value in the DIE with the attribute given, returns NULL - /// if no such attribute exists. - DIEValue *findAttribute(uint16_t Attribute); + /// findAttribute - Find a value in the DIE with the attribute given, + /// returns NULL if no such attribute exists. + DIEValue *findAttribute(uint16_t Attribute) const; #ifndef NDEBUG void print(raw_ostream &O, unsigned IndentCount = 0) const; @@ -195,6 +196,7 @@ namespace llvm { isLabel, isDelta, isEntry, + isTypeSignature, isBlock }; protected: @@ -411,6 +413,33 @@ namespace llvm { #endif }; + //===--------------------------------------------------------------------===// + /// \brief A signature reference to a type unit. + class DIETypeSignature : public DIEValue { + const DwarfTypeUnit &Unit; + public: + explicit DIETypeSignature(const DwarfTypeUnit &Unit) + : DIEValue(isTypeSignature), Unit(Unit) {} + + /// \brief Emit type unit signature. + virtual void EmitValue(AsmPrinter *Asm, dwarf::Form Form) const; + + /// Returns size of a ref_sig8 entry. + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const { + assert(Form == dwarf::DW_FORM_ref_sig8); + return 8; + } + + // \brief Implement isa/cast/dyncast. + static bool classof(const DIEValue *E) { + return E->getType() == isTypeSignature; + } +#ifndef NDEBUG + virtual void print(raw_ostream &O) const; + void dump() const; +#endif + }; + //===--------------------------------------------------------------------===// /// DIEBlock - A block of values. Primarily used for location expressions. // diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index 95eca90ef046..11cc454b33c3 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -16,7 +16,6 @@ #include "DIEHash.h" #include "DIE.h" -#include "DwarfCompileUnit.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" @@ -92,10 +91,12 @@ void DIEHash::addParentContext(const DIE &Parent) { // outermost such construct... SmallVector Parents; const DIE *Cur = &Parent; - while (Cur->getTag() != dwarf::DW_TAG_compile_unit) { + while (Cur->getParent()) { Parents.push_back(Cur); Cur = Cur->getParent(); } + assert(Cur->getTag() == dwarf::DW_TAG_compile_unit || + Cur->getTag() == dwarf::DW_TAG_type_unit); // Reverse iterate over our list to go from the outermost construct to the // innermost. diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DIEHash.h index f0c4ef98bb7a..bac3c5578447 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DIEHash.h @@ -136,7 +136,8 @@ private: StringRef Name); /// \brief Hashes a reference to a previously referenced type DIE. - void hashRepeatedTypeReference(dwarf::Attribute Attribute, unsigned DieNumber); + void hashRepeatedTypeReference(dwarf::Attribute Attribute, + unsigned DieNumber); void hashNestedType(const DIE &Die, StringRef Name); diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index 689aeda86a58..bcbb6c8455c9 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -31,7 +31,7 @@ DwarfAccelTable::DwarfAccelTable(ArrayRef atomList) DwarfAccelTable::~DwarfAccelTable() {} -void DwarfAccelTable::AddName(StringRef Name, DIE *die, char Flags) { +void DwarfAccelTable::AddName(StringRef Name, const DIE *die, char Flags) { assert(Data.empty() && "Already finalized!"); // If the string is in the list already then add this die to the list // otherwise add a new one. @@ -172,7 +172,7 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { // Walk through the buckets and emit the full data for each element in // the bucket. For the string case emit the dies and the various offsets. // Terminate each HashData bucket with 0. -void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { +void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfFile *D) { uint64_t PrevHash = UINT64_MAX; for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), @@ -207,7 +207,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { } // Emit the entire data structure to the output file. -void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfUnits *D) { +void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfFile *D) { // Emit the header. EmitHeader(Asm); diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 7627313d28f1..18d1138228a8 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -63,7 +63,7 @@ namespace llvm { class AsmPrinter; class DIE; -class DwarfUnits; +class DwarfFile; class DwarfAccelTable { @@ -165,10 +165,10 @@ private: // HashData[hash_data_count] public: struct HashDataContents { - DIE *Die; // Offsets + const DIE *Die; // Offsets char Flags; // Specific flags to output - HashDataContents(DIE *D, char Flags) : Die(D), Flags(Flags) {} + HashDataContents(const DIE *D, char Flags) : Die(D), Flags(Flags) {} #ifndef NDEBUG void print(raw_ostream &O) const { O << " Offset: " << Die->getOffset() << "\n"; @@ -216,7 +216,7 @@ private: void EmitBuckets(AsmPrinter *); void EmitHashes(AsmPrinter *); void EmitOffsets(AsmPrinter *, MCSymbol *); - void EmitData(AsmPrinter *, DwarfUnits *D); + void EmitData(AsmPrinter *, DwarfFile *D); // Allocator for HashData and HashDataContents. BumpPtrAllocator Allocator; @@ -241,9 +241,9 @@ private: public: DwarfAccelTable(ArrayRef); ~DwarfAccelTable(); - void AddName(StringRef, DIE *, char = 0); + void AddName(StringRef, const DIE *, char = 0); void FinalizeTable(AsmPrinter *, StringRef); - void Emit(AsmPrinter *, MCSymbol *, DwarfUnits *); + void Emit(AsmPrinter *, MCSymbol *, DwarfFile *); #ifndef NDEBUG void print(raw_ostream &O); void dump() { print(dbgs()); } diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 8918f3de0a6a..4d5682d9998d 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -46,9 +46,9 @@ DwarfCFIException::DwarfCFIException(AsmPrinter *A) DwarfCFIException::~DwarfCFIException() {} -/// EndModule - Emit all exception information that should come after the +/// endModule - Emit all exception information that should come after the /// content. -void DwarfCFIException::EndModule() { +void DwarfCFIException::endModule() { if (moveTypeModule == AsmPrinter::CFI_M_Debug) Asm->OutStreamer.EmitCFISections(false, true); @@ -82,9 +82,9 @@ void DwarfCFIException::EndModule() { } } -/// BeginFunction - Gather pre-function exception information. Assumes it's +/// beginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. -void DwarfCFIException::BeginFunction(const MachineFunction *MF) { +void DwarfCFIException::beginFunction(const MachineFunction *MF) { shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; // If any landing pads survive, we need an EH table. @@ -135,9 +135,9 @@ void DwarfCFIException::BeginFunction(const MachineFunction *MF) { LSDAEncoding); } -/// EndFunction - Gather and emit post-function exception information. +/// endFunction - Gather and emit post-function exception information. /// -void DwarfCFIException::EndFunction() { +void DwarfCFIException::endFunction(const MachineFunction *) { if (!shouldEmitPersonality && !shouldEmitMoves) return; diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 24e2c0539843..f8058d24022a 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -16,7 +16,7 @@ #include "DIE.h" #include "DIEHash.h" #include "DwarfAccelTable.h" -#include "DwarfCompileUnit.h" +#include "DwarfUnit.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" @@ -58,15 +58,9 @@ static cl::opt UnknownLocations( cl::desc("Make an absence of debug location information explicit."), cl::init(false)); -static cl::opt -GenerateODRHash("generate-odr-hash", cl::Hidden, - cl::desc("Add an ODR hash to external type DIEs."), - cl::init(false)); - -static cl::opt -GenerateCUHash("generate-cu-hash", cl::Hidden, - cl::desc("Add the CU hash as the dwo_id."), - cl::init(false)); +static cl::opt GenerateCUHash("generate-cu-hash", cl::Hidden, + cl::desc("Add the CU hash as the dwo_id."), + cl::init(false)); static cl::opt GenerateGnuPubSections("generate-gnu-dwarf-pub-sections", cl::Hidden, @@ -91,7 +85,7 @@ DwarfAccelTables("dwarf-accel-tables", cl::Hidden, static cl::opt SplitDwarf("split-dwarf", cl::Hidden, - cl::desc("Output prototype dwarf split debug info."), + cl::desc("Output DWARF5 split debug info."), cl::values(clEnumVal(Default, "Default for platform"), clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled"), clEnumValEnd), @@ -105,21 +99,25 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, clEnumVal(Disable, "Disabled"), clEnumValEnd), cl::init(Default)); +static cl::opt +DwarfVersionNumber("dwarf-version", cl::Hidden, + cl::desc("Generate DWARF for dwarf version."), cl::init(0)); + +static cl::opt +DwarfCURanges("generate-dwarf-cu-ranges", cl::Hidden, + cl::desc("Generate DW_AT_ranges for compile units"), + cl::init(false)); + static const char *const DWARFGroupName = "DWARF Emission"; static const char *const DbgTimerName = "DWARF Debug Writer"; //===----------------------------------------------------------------------===// -// Configuration values for initial hash set sizes (log2). -// -static const unsigned InitAbbreviationsSetSize = 9; // log2(512) - namespace llvm { /// resolve - Look in the DwarfDebug map for the MDNode that /// corresponds to the reference. -template -T DbgVariable::resolve(DIRef Ref) const { +template T DbgVariable::resolve(DIRef Ref) const { return DD->resolve(Ref); } @@ -179,22 +177,18 @@ static unsigned getDwarfVersionFromModule(const Module *M) { } DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) - : Asm(A), MMI(Asm->MMI), FirstCU(0), - AbbreviationsSet(InitAbbreviationsSetSize), - SourceIdMap(DIEValueAllocator), - PrevLabel(NULL), GlobalCUIndexCount(0), - InfoHolder(A, &AbbreviationsSet, Abbreviations, "info_string", - DIEValueAllocator), - SkeletonAbbrevSet(InitAbbreviationsSetSize), - SkeletonHolder(A, &SkeletonAbbrevSet, SkeletonAbbrevs, "skel_string", - DIEValueAllocator) { + : Asm(A), MMI(Asm->MMI), FirstCU(0), SourceIdMap(DIEValueAllocator), + PrevLabel(NULL), GlobalRangeCount(0), + InfoHolder(A, "info_string", DIEValueAllocator), + SkeletonHolder(A, "skel_string", DIEValueAllocator) { - DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; - DwarfStrSectionSym = TextSectionSym = 0; + DwarfInfoSectionSym = DwarfAbbrevSectionSym = DwarfStrSectionSym = 0; DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0; DwarfAddrSectionSym = 0; DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; + CurFn = 0; + CurMI = 0; // Turn on accelerator tables for Darwin by default, pubnames by // default for non-Darwin, and handle split dwarf. @@ -215,7 +209,9 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) else HasDwarfPubSections = DwarfPubSections == Enable; - DwarfVersion = getDwarfVersionFromModule(MMI->getModule()); + DwarfVersion = DwarfVersionNumber + ? DwarfVersionNumber + : getDwarfVersionFromModule(MMI->getModule()); { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); @@ -228,41 +224,50 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section, const char *SymbolStem = 0) { Asm->OutStreamer.SwitchSection(Section); - if (!SymbolStem) return 0; + if (!SymbolStem) + return 0; MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem); Asm->OutStreamer.EmitLabel(TmpSym); return TmpSym; } -MCSymbol *DwarfUnits::getStringPoolSym() { +DwarfFile::~DwarfFile() { + for (SmallVectorImpl::iterator I = CUs.begin(), E = CUs.end(); + I != E; ++I) + delete *I; +} + +MCSymbol *DwarfFile::getStringPoolSym() { return Asm->GetTempSymbol(StringPref); } -MCSymbol *DwarfUnits::getStringPoolEntry(StringRef Str) { - std::pair &Entry = - StringPool.GetOrCreateValue(Str).getValue(); - if (Entry.first) return Entry.first; +MCSymbol *DwarfFile::getStringPoolEntry(StringRef Str) { + std::pair &Entry = + StringPool.GetOrCreateValue(Str).getValue(); + if (Entry.first) + return Entry.first; Entry.second = NextStringPoolNumber++; return Entry.first = Asm->GetTempSymbol(StringPref, Entry.second); } -unsigned DwarfUnits::getStringPoolIndex(StringRef Str) { - std::pair &Entry = - StringPool.GetOrCreateValue(Str).getValue(); - if (Entry.first) return Entry.second; +unsigned DwarfFile::getStringPoolIndex(StringRef Str) { + std::pair &Entry = + StringPool.GetOrCreateValue(Str).getValue(); + if (Entry.first) + return Entry.second; Entry.second = NextStringPoolNumber++; Entry.first = Asm->GetTempSymbol(StringPref, Entry.second); return Entry.second; } -unsigned DwarfUnits::getAddrPoolIndex(const MCSymbol *Sym) { +unsigned DwarfFile::getAddrPoolIndex(const MCSymbol *Sym) { return getAddrPoolIndex(MCSymbolRefExpr::Create(Sym, Asm->OutContext)); } -unsigned DwarfUnits::getAddrPoolIndex(const MCExpr *Sym) { +unsigned DwarfFile::getAddrPoolIndex(const MCExpr *Sym) { std::pair::iterator, bool> P = AddressPool.insert(std::make_pair(Sym, NextAddrPoolNumber)); if (P.second) @@ -272,9 +277,9 @@ unsigned DwarfUnits::getAddrPoolIndex(const MCExpr *Sym) { // Define a unique number for the abbreviation. // -void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) { +void DwarfFile::assignAbbrevNumber(DIEAbbrev &Abbrev) { // Check the set for priors. - DIEAbbrev *InSet = AbbreviationsSet->GetOrInsertNode(&Abbrev); + DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev); // If it's newly added. if (InSet == &Abbrev) { @@ -294,7 +299,8 @@ static bool isObjCClass(StringRef Name) { } static bool hasObjCCategory(StringRef Name) { - if (!isObjCClass(Name)) return false; + if (!isObjCClass(Name)) + return false; return Name.find(") ") != StringRef::npos; } @@ -318,35 +324,35 @@ static StringRef getObjCMethodName(StringRef In) { // Helper for sorting sections into a stable output order. static bool SectionSort(const MCSection *A, const MCSection *B) { - std::string LA = (A ? A->getLabelBeginName() : ""); - std::string LB = (B ? B->getLabelBeginName() : ""); - return LA < LB; + std::string LA = (A ? A->getLabelBeginName() : ""); + std::string LB = (B ? B->getLabelBeginName() : ""); + return LA < LB; } // Add the various names to the Dwarf accelerator table names. // TODO: Determine whether or not we should add names for programs // that do not have a DW_AT_name or DW_AT_linkage_name field - this // is only slightly different than the lookup of non-standard ObjC names. -static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP, - DIE* Die) { - if (!SP.isDefinition()) return; - TheCU->addAccelName(SP.getName(), Die); +static void addSubprogramNames(DwarfUnit *TheU, DISubprogram SP, DIE *Die) { + if (!SP.isDefinition()) + return; + TheU->addAccelName(SP.getName(), Die); // If the linkage name is different than the name, go ahead and output // that as well into the name table. if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName()) - TheCU->addAccelName(SP.getLinkageName(), Die); + TheU->addAccelName(SP.getLinkageName(), Die); // If this is an Objective-C selector name add it to the ObjC accelerator // too. if (isObjCClass(SP.getName())) { StringRef Class, Category; getObjCClassCategory(SP.getName(), Class, Category); - TheCU->addAccelObjC(Class, Die); + TheU->addAccelObjC(Class, Die); if (Category != "") - TheCU->addAccelObjC(Category, Die); + TheU->addAccelObjC(Category, Die); // Also add the base method name to the name table. - TheCU->addAccelName(getObjCMethodName(SP.getName()), Die); + TheU->addAccelName(getObjCMethodName(SP.getName()), Die); } } @@ -366,7 +372,8 @@ bool DwarfDebug::isSubprogramContext(const MDNode *Context) { // Find DIE for the given subprogram and attach appropriate DW_AT_low_pc // and DW_AT_high_pc attributes. If there are global variables in this // scope then create and insert DIEs for these variables. -DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP) { +DIE *DwarfDebug::updateSubprogramScopeDIE(DwarfCompileUnit *SPCU, + DISubprogram SP) { DIE *SPDie = SPCU->getDIE(SP); assert(SPDie && "Unable to find subprogram DIE!"); @@ -376,7 +383,8 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP) { // concrete DIE twice. if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) { // Pick up abstract subprogram DIE. - SPDie = SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getCUDie()); + SPDie = + SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getUnitDie()); SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, AbsSPDIE); } else { DISubprogram SPDecl = SP.getFunctionDeclaration(); @@ -388,8 +396,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP) { // specification DIE for a function defined inside a function. DIScope SPContext = resolve(SP.getContext()); if (SP.isDefinition() && !SPContext.isCompileUnit() && - !SPContext.isFile() && - !isSubprogramContext(SPContext)) { + !SPContext.isFile() && !isSubprogramContext(SPContext)) { SPCU->addFlag(SPDie, dwarf::DW_AT_declaration); // Add arguments. @@ -408,19 +415,20 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP) { SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, Arg); } DIE *SPDeclDie = SPDie; - SPDie = - SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getCUDie()); + SPDie = SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, + *SPCU->getUnitDie()); SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, SPDeclDie); } } } - SPCU->addLabelAddress(SPDie, dwarf::DW_AT_low_pc, - Asm->GetTempSymbol("func_begin", - Asm->getFunctionNumber())); - SPCU->addLabelAddress(SPDie, dwarf::DW_AT_high_pc, - Asm->GetTempSymbol("func_end", - Asm->getFunctionNumber())); + SPCU->addLabelAddress(SPDie, dwarf::DW_AT_low_pc, FunctionBeginSym); + SPCU->addLabelAddress(SPDie, dwarf::DW_AT_high_pc, FunctionEndSym); + + // Add this range to the list of ranges for the CU. + RangeSpan Span(FunctionBeginSym, FunctionEndSym); + SPCU->addRange(llvm_move(Span)); + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); MachineLocation Location(RI->getFrameRegister(*Asm->MF)); SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location); @@ -453,9 +461,39 @@ bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) { return !End; } +static void addSectionLabel(AsmPrinter *Asm, DwarfUnit *U, DIE *D, + dwarf::Attribute A, const MCSymbol *L, + const MCSymbol *Sec) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + U->addSectionLabel(D, A, L); + else + U->addSectionDelta(D, A, L, Sec); +} + +void DwarfDebug::addScopeRangeList(DwarfCompileUnit *TheCU, DIE *ScopeDIE, + const SmallVectorImpl &Range) { + // Emit offset in .debug_range as a relocatable label. emitDIE will handle + // emitting it appropriately. + MCSymbol *RangeSym = Asm->GetTempSymbol("debug_ranges", GlobalRangeCount++); + addSectionLabel(Asm, TheCU, ScopeDIE, dwarf::DW_AT_ranges, RangeSym, + DwarfDebugRangeSectionSym); + + RangeSpanList List(RangeSym); + for (SmallVectorImpl::const_iterator RI = Range.begin(), + RE = Range.end(); + RI != RE; ++RI) { + RangeSpan Span(getLabelBeforeInsn(RI->first), + getLabelAfterInsn(RI->second)); + List.addRange(llvm_move(Span)); + } + + // Add the range list to the set of ranges to be emitted. + TheCU->addRangeList(llvm_move(List)); +} + // Construct new DW_TAG_lexical_block for this scope and attach // DW_AT_low_pc/DW_AT_high_pc labels. -DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, +DIE *DwarfDebug::constructLexicalScopeDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope) { if (isLexicalScopeDIENull(Scope)) return 0; @@ -464,29 +502,16 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, if (Scope->isAbstractScope()) return ScopeDIE; - const SmallVectorImpl &Ranges = Scope->getRanges(); - // If we have multiple ranges, emit them into the range section. - if (Ranges.size() > 1) { - // .debug_range section has not been laid out yet. Emit offset in - // .debug_range as a uint, size 4, for now. emitDIE will handle - // DW_AT_ranges appropriately. - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, - DebugRangeSymbols.size() - * Asm->getDataLayout().getPointerSize()); - for (SmallVectorImpl::const_iterator RI = Ranges.begin(), - RE = Ranges.end(); RI != RE; ++RI) { - DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); - DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second)); - } + const SmallVectorImpl &ScopeRanges = Scope->getRanges(); - // Terminate the range list. - DebugRangeSymbols.push_back(NULL); - DebugRangeSymbols.push_back(NULL); + // If we have multiple ranges, emit them into the range section. + if (ScopeRanges.size() > 1) { + addScopeRangeList(TheCU, ScopeDIE, ScopeRanges); return ScopeDIE; } // Construct the address range for this DIE. - SmallVectorImpl::const_iterator RI = Ranges.begin(); + SmallVectorImpl::const_iterator RI = ScopeRanges.begin(); MCSymbol *Start = getLabelBeforeInsn(RI->first); MCSymbol *End = getLabelAfterInsn(RI->second); assert(End && "End label should not be null!"); @@ -502,10 +527,10 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, // This scope represents inlined body of a function. Construct DIE to // represent this concrete inlined copy of the function. -DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, +DIE *DwarfDebug::constructInlinedScopeDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope) { - const SmallVectorImpl &Ranges = Scope->getRanges(); - assert(Ranges.empty() == false && + const SmallVectorImpl &ScopeRanges = Scope->getRanges(); + assert(!ScopeRanges.empty() && "LexicalScope does not have instruction markers!"); if (!Scope->getScopeNode()) @@ -521,22 +546,11 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine); TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, OriginDIE); - if (Ranges.size() > 1) { - // .debug_range section has not been laid out yet. Emit offset in - // .debug_range as a uint, size 4, for now. emitDIE will handle - // DW_AT_ranges appropriately. - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, - DebugRangeSymbols.size() - * Asm->getDataLayout().getPointerSize()); - for (SmallVectorImpl::const_iterator RI = Ranges.begin(), - RE = Ranges.end(); RI != RE; ++RI) { - DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); - DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second)); - } - DebugRangeSymbols.push_back(NULL); - DebugRangeSymbols.push_back(NULL); - } else { - SmallVectorImpl::const_iterator RI = Ranges.begin(); + // If we have multiple ranges, emit them into the range section. + if (ScopeRanges.size() > 1) + addScopeRangeList(TheCU, ScopeDIE, ScopeRanges); + else { + SmallVectorImpl::const_iterator RI = ScopeRanges.begin(); MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); MCSymbol *EndLabel = getLabelAfterInsn(RI->second); @@ -567,27 +581,31 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, return ScopeDIE; } -DIE *DwarfDebug::createScopeChildrenDIE(CompileUnit *TheCU, LexicalScope *Scope, - SmallVectorImpl &Children) { - DIE *ObjectPointer = NULL; +DIE *DwarfDebug::createScopeChildrenDIE(DwarfCompileUnit *TheCU, + LexicalScope *Scope, + SmallVectorImpl &Children) { + DIE *ObjectPointer = NULL; // Collect arguments for current function. if (LScopes.isCurrentFunctionScope(Scope)) for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i) if (DbgVariable *ArgDV = CurrentFnArguments[i]) if (DIE *Arg = - TheCU->constructVariableDIE(*ArgDV, Scope->isAbstractScope())) { + TheCU->constructVariableDIE(*ArgDV, Scope->isAbstractScope())) { Children.push_back(Arg); - if (ArgDV->isObjectPointer()) ObjectPointer = Arg; + if (ArgDV->isObjectPointer()) + ObjectPointer = Arg; } // Collect lexical scope children first. - const SmallVectorImpl &Variables =ScopeVariables.lookup(Scope); + const SmallVectorImpl &Variables = + ScopeVariables.lookup(Scope); for (unsigned i = 0, N = Variables.size(); i < N; ++i) - if (DIE *Variable = - TheCU->constructVariableDIE(*Variables[i], Scope->isAbstractScope())) { + if (DIE *Variable = TheCU->constructVariableDIE(*Variables[i], + Scope->isAbstractScope())) { Children.push_back(Variable); - if (Variables[i]->isObjectPointer()) ObjectPointer = Variable; + if (Variables[i]->isObjectPointer()) + ObjectPointer = Variable; } const SmallVectorImpl &Scopes = Scope->getChildren(); for (unsigned j = 0, M = Scopes.size(); j < M; ++j) @@ -597,7 +615,8 @@ DIE *DwarfDebug::createScopeChildrenDIE(CompileUnit *TheCU, LexicalScope *Scope, } // Construct a DIE for this scope. -DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { +DIE *DwarfDebug::constructScopeDIE(DwarfCompileUnit *TheCU, + LexicalScope *Scope) { if (!Scope || !Scope->getScopeNode()) return NULL; @@ -634,10 +653,12 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { // There is no need to emit empty lexical block DIE. std::pair Range = std::equal_range( - ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(), - std::pair(DS, (const MDNode*)0), - less_first()); + ImportedEntityMap::const_iterator> Range = + std::equal_range( + ScopesWithImportedEntities.begin(), + ScopesWithImportedEntities.end(), + std::pair(DS, (const MDNode *)0), + less_first()); if (Children.empty() && Range.first == Range.second) return NULL; ScopeDIE = constructLexicalScopeDIE(TheCU, Scope); @@ -658,15 +679,13 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { // Add children for (SmallVectorImpl::iterator I = Children.begin(), - E = Children.end(); I != E; ++I) + E = Children.end(); + I != E; ++I) ScopeDIE->addChild(*I); if (DS.isSubprogram() && ObjectPointer != NULL) TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, ObjectPointer); - if (DS.isSubprogram()) - TheCU->addPubTypes(DISubprogram(DS)); - return ScopeDIE; } @@ -674,8 +693,8 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { // If none currently exists, create a new id and insert it in the // SourceIds map. This can update DirectoryNames and SourceFileNames maps // as well. -unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName, - StringRef DirName, unsigned CUID) { +unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName, StringRef DirName, + unsigned CUID) { // If we use .loc in assembly, we can't separate .file entries according to // compile units. Thus all files will belong to the default compile unit. @@ -714,15 +733,29 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName, return SrcId; } -// Create new CompileUnit for the given metadata node with tag +void DwarfDebug::addGnuPubAttributes(DwarfUnit *U, DIE *D) const { + if (!GenerateGnuPubSections) + return; + + addSectionLabel(Asm, U, D, dwarf::DW_AT_GNU_pubnames, + Asm->GetTempSymbol("gnu_pubnames", U->getUniqueID()), + DwarfGnuPubNamesSectionSym); + + addSectionLabel(Asm, U, D, dwarf::DW_AT_GNU_pubtypes, + Asm->GetTempSymbol("gnu_pubtypes", U->getUniqueID()), + DwarfGnuPubTypesSectionSym); +} + +// Create new DwarfCompileUnit for the given metadata node with tag // DW_TAG_compile_unit. -CompileUnit *DwarfDebug::constructCompileUnit(DICompileUnit DIUnit) { +DwarfCompileUnit *DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { StringRef FN = DIUnit.getFilename(); CompilationDir = DIUnit.getDirectory(); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, Die, DIUnit, Asm, - this, &InfoHolder); + DwarfCompileUnit *NewCU = new DwarfCompileUnit( + InfoHolder.getUnits().size(), Die, DIUnit, Asm, this, &InfoHolder); + InfoHolder.addUnit(NewCU); FileIDCUMap[NewCU->getUniqueID()] = 0; // Call this to emit a .file directive if it wasn't emitted for the source @@ -734,15 +767,9 @@ CompileUnit *DwarfDebug::constructCompileUnit(DICompileUnit DIUnit) { DIUnit.getLanguage()); NewCU->addString(Die, dwarf::DW_AT_name, FN); - // 2.17.1 requires that we use DW_AT_low_pc for a single entry point - // into an entity. We're using 0 (or a NULL label) for this. For - // split dwarf it's in the skeleton CU so omit it here. - if (!useSplitDwarf()) - NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL); - // Define start line table label for each Compile Unit. - MCSymbol *LineTableStartSym = Asm->GetTempSymbol("line_table_start", - NewCU->getUniqueID()); + MCSymbol *LineTableStartSym = + Asm->GetTempSymbol("line_table_start", NewCU->getUniqueID()); Asm->OutStreamer.getContext().setMCLineTableSymbol(LineTableStartSym, NewCU->getUniqueID()); @@ -758,45 +785,21 @@ CompileUnit *DwarfDebug::constructCompileUnit(DICompileUnit DIUnit) { // The line table entries are not always emitted in assembly, so it // is not okay to use line_table_start here. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, - UseTheFirstCU ? Asm->GetTempSymbol("section_line") - : LineTableStartSym); + NewCU->addSectionLabel(Die, dwarf::DW_AT_stmt_list, + UseTheFirstCU ? Asm->GetTempSymbol("section_line") + : LineTableStartSym); else if (UseTheFirstCU) - NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + NewCU->addSectionOffset(Die, dwarf::DW_AT_stmt_list, 0); else - NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, - LineTableStartSym, DwarfLineSectionSym); + NewCU->addSectionDelta(Die, dwarf::DW_AT_stmt_list, LineTableStartSym, + DwarfLineSectionSym); // If we're using split dwarf the compilation dir is going to be in the // skeleton CU and so we don't need to duplicate it here. if (!CompilationDir.empty()) NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); - // Flags to let the linker know we have emitted new style pubnames. Only - // emit it here if we don't have a skeleton CU for split dwarf. - if (GenerateGnuPubSections) { - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_GNU_pubnames, - dwarf::DW_FORM_sec_offset, - Asm->GetTempSymbol("gnu_pubnames", - NewCU->getUniqueID())); - else - NewCU->addDelta(Die, dwarf::DW_AT_GNU_pubnames, dwarf::DW_FORM_data4, - Asm->GetTempSymbol("gnu_pubnames", - NewCU->getUniqueID()), - DwarfGnuPubNamesSectionSym); - - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_GNU_pubtypes, - dwarf::DW_FORM_sec_offset, - Asm->GetTempSymbol("gnu_pubtypes", - NewCU->getUniqueID())); - else - NewCU->addDelta(Die, dwarf::DW_AT_GNU_pubtypes, dwarf::DW_FORM_data4, - Asm->GetTempSymbol("gnu_pubtypes", - NewCU->getUniqueID()), - DwarfGnuPubTypesSectionSym); - } + addGnuPubAttributes(NewCU, Die); } if (DIUnit.isOptimized()) @@ -808,12 +811,18 @@ CompileUnit *DwarfDebug::constructCompileUnit(DICompileUnit DIUnit) { if (unsigned RVer = DIUnit.getRunTimeVersion()) NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, - dwarf::DW_FORM_data1, RVer); + dwarf::DW_FORM_data1, RVer); if (!FirstCU) FirstCU = NewCU; - InfoHolder.addUnit(NewCU); + if (useSplitDwarf()) { + NewCU->initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection(), + DwarfInfoDWOSectionSym); + NewCU->setSkeleton(constructSkeletonCU(NewCU)); + } else + NewCU->initSection(Asm->getObjFileLowering().getDwarfInfoSection(), + DwarfInfoSectionSym); CUMap.insert(std::make_pair(DIUnit, NewCU)); CUDieMap.insert(std::make_pair(Die, NewCU)); @@ -821,12 +830,13 @@ CompileUnit *DwarfDebug::constructCompileUnit(DICompileUnit DIUnit) { } // Construct subprogram DIE. -void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N) { +void DwarfDebug::constructSubprogramDIE(DwarfCompileUnit *TheCU, + const MDNode *N) { // FIXME: We should only call this routine once, however, during LTO if a // program is defined in multiple CUs we could end up calling it out of // beginModule as we walk the CUs. - CompileUnit *&CURef = SPMap[N]; + DwarfCompileUnit *&CURef = SPMap[N]; if (CURef) return; CURef = TheCU; @@ -843,7 +853,7 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N) { TheCU->addGlobalName(SP.getName(), SubprogramDie, resolve(SP.getContext())); } -void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, +void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit *TheCU, const MDNode *N) { DIImportedEntity Module(N); if (!Module.Verify()) @@ -852,15 +862,15 @@ void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, constructImportedEntityDIE(TheCU, Module, D); } -void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N, - DIE *Context) { +void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit *TheCU, + const MDNode *N, DIE *Context) { DIImportedEntity Module(N); if (!Module.Verify()) return; return constructImportedEntityDIE(TheCU, Module, Context); } -void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, +void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit *TheCU, const DIImportedEntity &Module, DIE *Context) { assert(Module.Verify() && @@ -911,7 +921,7 @@ void DwarfDebug::beginModule() { for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { DICompileUnit CUNode(CU_Nodes->getOperand(i)); - CompileUnit *CU = constructCompileUnit(CUNode); + DwarfCompileUnit *CU = constructDwarfCompileUnit(CUNode); DIArray ImportedEntities = CUNode.getImportedEntities(); for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i) ScopesWithImportedEntities.push_back(std::make_pair( @@ -948,12 +958,14 @@ void DwarfDebug::beginModule() { void DwarfDebug::computeInlinedDIEs() { // Attach DW_AT_inline attribute with inlined subprogram DIEs. for (SmallPtrSet::iterator AI = InlinedSubprogramDIEs.begin(), - AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) { + AE = InlinedSubprogramDIEs.end(); + AI != AE; ++AI) { DIE *ISP = *AI; FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); } for (DenseMap::iterator AI = AbstractSPDies.begin(), - AE = AbstractSPDies.end(); AI != AE; ++AI) { + AE = AbstractSPDies.end(); + AI != AE; ++AI) { DIE *ISP = AI->second; if (InlinedSubprogramDIEs.count(ISP)) continue; @@ -982,7 +994,8 @@ void DwarfDebug::collectDeadVariables() { continue; // Construct subprogram DIE and add variables DIEs. - CompileUnit *SPCU = CUMap.lookup(TheCU); + DwarfCompileUnit *SPCU = + static_cast(CUMap.lookup(TheCU)); assert(SPCU && "Unable to find Compile Unit!"); // FIXME: See the comment in constructSubprogramDIE about duplicate // subprogram DIEs. @@ -993,8 +1006,7 @@ void DwarfDebug::collectDeadVariables() { if (!DV.isVariable()) continue; DbgVariable NewVar(DV, NULL, this); - if (DIE *VariableDIE = - SPCU->constructVariableDIE(NewVar, false)) + if (DIE *VariableDIE = SPCU->constructVariableDIE(NewVar, false)) SPDIE->addChild(VariableDIE); } } @@ -1002,41 +1014,6 @@ void DwarfDebug::collectDeadVariables() { } } -// Type Signature [7.27] and ODR Hash code. - -/// \brief Grabs the string in whichever attribute is passed in and returns -/// a reference to it. Returns "" if the attribute doesn't exist. -static StringRef getDIEStringAttr(DIE *Die, unsigned Attr) { - DIEValue *V = Die->findAttribute(Attr); - - if (DIEString *S = dyn_cast_or_null(V)) - return S->getString(); - - return StringRef(""); -} - -/// Return true if the current DIE is contained within an anonymous namespace. -static bool isContainedInAnonNamespace(DIE *Die) { - DIE *Parent = Die->getParent(); - - while (Parent) { - if (Parent->getTag() == dwarf::DW_TAG_namespace && - getDIEStringAttr(Parent, dwarf::DW_AT_name) == "") - return true; - Parent = Parent->getParent(); - } - - return false; -} - -/// Test if the current CU language is C++ and that we have -/// a named type that is not contained in an anonymous namespace. -static bool shouldAddODRHash(CompileUnit *CU, DIE *Die) { - return CU->getLanguage() == dwarf::DW_LANG_C_plus_plus && - getDIEStringAttr(Die, dwarf::DW_AT_name) != "" && - !isContainedInAnonNamespace(Die); -} - void DwarfDebug::finalizeModuleInfo() { // Collect info for variables that were optimized out. collectDeadVariables(); @@ -1044,47 +1021,48 @@ void DwarfDebug::finalizeModuleInfo() { // Attach DW_AT_inline attribute with inlined subprogram DIEs. computeInlinedDIEs(); - // Split out type units and conditionally add an ODR tag to the split - // out type. - // FIXME: Do type splitting. - for (unsigned i = 0, e = TypeUnits.size(); i != e; ++i) { - DIE *Die = TypeUnits[i]; - DIEHash Hash; - // If we've requested ODR hashes and it's applicable for an ODR hash then - // add the ODR signature now. - // FIXME: This should be added onto the type unit, not the type, but this - // works as an intermediate stage. - if (GenerateODRHash && shouldAddODRHash(CUMap.begin()->second, Die)) - CUMap.begin()->second->addUInt(Die, dwarf::DW_AT_GNU_odr_signature, - dwarf::DW_FORM_data8, - Hash.computeDIEODRSignature(*Die)); - } - - // Handle anything that needs to be done on a per-cu basis. - for (DenseMap::iterator CUI = CUMap.begin(), - CUE = CUMap.end(); - CUI != CUE; ++CUI) { - CompileUnit *TheCU = CUI->second; + // Handle anything that needs to be done on a per-unit basis after + // all other generation. + for (SmallVectorImpl::const_iterator I = getUnits().begin(), + E = getUnits().end(); + I != E; ++I) { + DwarfUnit *TheU = *I; // Emit DW_AT_containing_type attribute to connect types with their // vtable holding type. - TheCU->constructContainingTypeDIEs(); + TheU->constructContainingTypeDIEs(); - // If we're splitting the dwarf out now that we've got the entire - // CU then construct a skeleton CU based upon it. - if (useSplitDwarf()) { - uint64_t ID = 0; - if (GenerateCUHash) { - DIEHash CUHash; - ID = CUHash.computeCUSignature(*TheCU->getCUDie()); + // Add CU specific attributes if we need to add any. + if (TheU->getUnitDie()->getTag() == dwarf::DW_TAG_compile_unit) { + // If we're splitting the dwarf out now that we've got the entire + // CU then add the dwo id to it. + DwarfCompileUnit *SkCU = + static_cast(TheU->getSkeleton()); + if (useSplitDwarf()) { + // This should be a unique identifier when we want to build .dwp files. + uint64_t ID = 0; + if (GenerateCUHash) { + DIEHash CUHash; + ID = CUHash.computeCUSignature(*TheU->getUnitDie()); + } + TheU->addUInt(TheU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); } - // This should be a unique identifier when we want to build .dwp files. - TheCU->addUInt(TheCU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, - dwarf::DW_FORM_data8, ID); - // Now construct the skeleton CU associated. - CompileUnit *SkCU = constructSkeletonCU(TheCU); - // This should be a unique identifier when we want to build .dwp files. - SkCU->addUInt(SkCU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, - dwarf::DW_FORM_data8, ID); + + // If we've requested ranges and have them emit a DW_AT_ranges attribute + // on the unit that will remain in the .o file, otherwise add a + // DW_AT_low_pc. + // FIXME: Also add a high pc if we can. + // FIXME: We should use ranges if we have multiple compile units. + DwarfCompileUnit *U = SkCU ? SkCU : static_cast(TheU); + if (DwarfCURanges && TheU->getRanges().size()) + addSectionLabel(Asm, U, U->getUnitDie(), dwarf::DW_AT_ranges, + Asm->GetTempSymbol("cu_ranges", U->getUniqueID()), + DwarfDebugRangeSectionSym); + else + U->addUInt(U->getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + 0); } } @@ -1095,7 +1073,7 @@ void DwarfDebug::finalizeModuleInfo() { } void DwarfDebug::endSections() { - // Filter labels by section. + // Filter labels by section. for (size_t n = 0; n < ArangeLabels.size(); n++) { const SymbolCU &SCU = ArangeLabels[n]; if (SCU.Sym->isInSection()) { @@ -1124,14 +1102,16 @@ void DwarfDebug::endSections() { std::sort(Sections.begin(), Sections.end(), SectionSort); // Add terminating symbols for each section. - for (unsigned ID=0;IDGetTempSymbol("debug_end", ID); Asm->OutStreamer.SwitchSection(Section); @@ -1145,8 +1125,11 @@ void DwarfDebug::endSections() { // Emit all Dwarf sections that should come after the content. void DwarfDebug::endModule() { + assert(CurFn == 0); + assert(CurMI == 0); - if (!FirstCU) return; + if (!FirstCU) + return; // End any existing sections. // TODO: Does this need to happen? @@ -1155,57 +1138,29 @@ void DwarfDebug::endModule() { // Finalize the debug info for the module. finalizeModuleInfo(); - if (!useSplitDwarf()) { - emitDebugStr(); + emitDebugStr(); - // Emit all the DIEs into a debug info section. - emitDebugInfo(); + // Emit all the DIEs into a debug info section. + emitDebugInfo(); - // Corresponding abbreviations into a abbrev section. - emitAbbreviations(); + // Corresponding abbreviations into a abbrev section. + emitAbbreviations(); - // Emit info into a debug loc section. - emitDebugLoc(); + // Emit info into a debug loc section. + emitDebugLoc(); - // Emit info into a debug aranges section. - emitDebugARanges(); + // Emit info into a debug aranges section. + emitDebugARanges(); - // Emit info into a debug ranges section. - emitDebugRanges(); + // Emit info into a debug ranges section. + emitDebugRanges(); - // Emit info into a debug macinfo section. - emitDebugMacInfo(); - - } else { - // TODO: Fill this in for separated debug sections and separate - // out information into new sections. - emitDebugStr(); - if (useSplitDwarf()) - emitDebugStrDWO(); - - // Emit the debug info section and compile units. - emitDebugInfo(); + if (useSplitDwarf()) { + emitDebugStrDWO(); emitDebugInfoDWO(); - - // Corresponding abbreviations into a abbrev section. - emitAbbreviations(); emitDebugAbbrevDWO(); - - // Emit info into a debug loc section. - emitDebugLoc(); - - // Emit info into a debug aranges section. - emitDebugARanges(); - - // Emit info into a debug ranges section. - emitDebugRanges(); - - // Emit info into a debug macinfo section. - emitDebugMacInfo(); - // Emit DWO addresses. InfoHolder.emitAddresses(Asm->getObjFileLowering().getDwarfAddrSection()); - } // Emit info into the dwarf accelerator table sections. @@ -1224,13 +1179,6 @@ void DwarfDebug::endModule() { // clean up. SPMap.clear(); - for (DenseMap::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) - delete I->second; - - for (SmallVectorImpl::iterator I = SkeletonCUs.begin(), - E = SkeletonCUs.end(); I != E; ++I) - delete *I; // Reset these for the next Module if we have one. FirstCU = NULL; @@ -1257,8 +1205,7 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV, } // If Var is a current function argument then add it to CurrentFnArguments list. -bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, - DbgVariable *Var, LexicalScope *Scope) { +bool DwarfDebug::addCurrentFnArgument(DbgVariable *Var, LexicalScope *Scope) { if (!LScopes.isCurrentFunctionScope(Scope)) return false; DIVariable DV = Var->getVariable(); @@ -1270,7 +1217,7 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, size_t Size = CurrentFnArguments.size(); if (Size == 0) - CurrentFnArguments.resize(MF->getFunction()->arg_size()); + CurrentFnArguments.resize(CurFn->getFunction()->arg_size()); // llvm::Function argument size is not good indicator of how many // arguments does the function have at source level. if (ArgNo > Size) @@ -1280,14 +1227,15 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, } // Collect variable information from side table maintained by MMI. -void -DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF, - SmallPtrSet &Processed) { +void DwarfDebug::collectVariableInfoFromMMITable( + SmallPtrSet &Processed) { MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), - VE = VMap.end(); VI != VE; ++VI) { + VE = VMap.end(); + VI != VE; ++VI) { const MDNode *Var = VI->first; - if (!Var) continue; + if (!Var) + continue; Processed.insert(Var); DIVariable DV(Var); const std::pair &VP = VI->second; @@ -1301,7 +1249,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF, DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second); DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable, this); RegVar->setFrameIndex(VP.first); - if (!addCurrentFnArgument(MF, RegVar, Scope)) + if (!addCurrentFnArgument(RegVar, Scope)) addScopeVariable(Scope, RegVar); if (AbsDbgVariable) AbsDbgVariable->setFrameIndex(VP.first); @@ -1312,8 +1260,8 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF, // defined reg. static bool isDbgValueInDefinedReg(const MachineInstr *MI) { assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); - return MI->getNumOperands() == 3 && - MI->getOperand(0).isReg() && MI->getOperand(0).getReg() && + return MI->getNumOperands() == 3 && MI->getOperand(0).isReg() && + MI->getOperand(0).getReg() && (MI->getOperand(1).isImm() || (MI->getOperand(1).isReg() && MI->getOperand(1).getReg() == 0U)); } @@ -1323,7 +1271,7 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, const MCSymbol *FLabel, const MCSymbol *SLabel, const MachineInstr *MI) { - const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata(); + const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata(); assert(MI->getNumOperands() == 3); if (MI->getOperand(0).isReg()) { @@ -1348,22 +1296,22 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, // Find variables for each lexical scope. void -DwarfDebug::collectVariableInfo(const MachineFunction *MF, - SmallPtrSet &Processed) { +DwarfDebug::collectVariableInfo(SmallPtrSet &Processed) { // Grab the variable info that was squirreled away in the MMI side-table. - collectVariableInfoFromMMITable(MF, Processed); + collectVariableInfoFromMMITable(Processed); - for (SmallVectorImpl::const_iterator - UVI = UserVariables.begin(), UVE = UserVariables.end(); UVI != UVE; - ++UVI) { + for (SmallVectorImpl::const_iterator + UVI = UserVariables.begin(), + UVE = UserVariables.end(); + UVI != UVE; ++UVI) { const MDNode *Var = *UVI; if (Processed.count(Var)) continue; // History contains relevant DBG_VALUE instructions for Var and instructions // clobbering it. - SmallVectorImpl &History = DbgValues[Var]; + SmallVectorImpl &History = DbgValues[Var]; if (History.empty()) continue; const MachineInstr *MInsn = History.front(); @@ -1371,7 +1319,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, DIVariable DV(Var); LexicalScope *Scope = NULL; if (DV.getTag() == dwarf::DW_TAG_arg_variable && - DISubprogram(DV.getContext()).describes(MF->getFunction())) + DISubprogram(DV.getContext()).describes(CurFn->getFunction())) Scope = LScopes.getCurrentFunctionScope(); else if (MDNode *IA = DV.getInlinedAt()) Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA)); @@ -1385,14 +1333,14 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, assert(MInsn->isDebugValue() && "History must begin with debug value"); DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc()); DbgVariable *RegVar = new DbgVariable(DV, AbsVar, this); - if (!addCurrentFnArgument(MF, RegVar, Scope)) + if (!addCurrentFnArgument(RegVar, Scope)) addScopeVariable(Scope, RegVar); if (AbsVar) AbsVar->setMInsn(MInsn); // Simplify ranges that are fully coalesced. - if (History.size() <= 1 || (History.size() == 2 && - MInsn->isIdenticalTo(History.back()))) { + if (History.size() <= 1 || + (History.size() == 2 && MInsn->isIdenticalTo(History.back()))) { RegVar->setMInsn(MInsn); continue; } @@ -1400,14 +1348,16 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, // Handle multiple DBG_VALUE instructions describing one variable. RegVar->setDotDebugLocOffset(DotDebugLocEntries.size()); - for (SmallVectorImpl::const_iterator - HI = History.begin(), HE = History.end(); HI != HE; ++HI) { + for (SmallVectorImpl::const_iterator + HI = History.begin(), + HE = History.end(); + HI != HE; ++HI) { const MachineInstr *Begin = *HI; assert(Begin->isDebugValue() && "Invalid History entry"); // Check if DBG_VALUE is truncating a range. - if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() - && !Begin->getOperand(0).getReg()) + if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() && + !Begin->getOperand(0).getReg()) continue; // Compute the range for a register location. @@ -1421,7 +1371,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, else { const MachineInstr *End = HI[1]; DEBUG(dbgs() << "DotDebugLoc Pair:\n" - << "\t" << *Begin << "\t" << *End << "\n"); + << "\t" << *Begin << "\t" << *End << "\n"); if (End->isDebugValue()) SLabel = getLabelBeforeInsn(End); else { @@ -1433,8 +1383,8 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, } // The value is valid until the next DBG_VALUE or clobber. - DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel, - Begin)); + DotDebugLocEntries.push_back( + getDebugLocEntry(Asm, FLabel, SLabel, Begin)); } DotDebugLocEntries.push_back(DotDebugLocEntry()); } @@ -1465,6 +1415,8 @@ MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) { // Process beginning of an instruction. void DwarfDebug::beginInstruction(const MachineInstr *MI) { + assert(CurMI == 0); + CurMI = MI; // Check if source location changes, but ignore DBG_VALUE locations. if (!MI->isDebugValue()) { DebugLoc DL = MI->getDebugLoc(); @@ -1487,8 +1439,8 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { } // Insert labels where requested. - DenseMap::iterator I = - LabelsBeforeInsn.find(MI); + DenseMap::iterator I = + LabelsBeforeInsn.find(MI); // No label needed. if (I == LabelsBeforeInsn.end()) @@ -1506,14 +1458,16 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { } // Process end of an instruction. -void DwarfDebug::endInstruction(const MachineInstr *MI) { +void DwarfDebug::endInstruction() { + assert(CurMI != 0); // Don't create a new label after DBG_VALUE instructions. // They don't generate code. - if (!MI->isDebugValue()) + if (!CurMI->isDebugValue()) PrevLabel = 0; - DenseMap::iterator I = - LabelsAfterInsn.find(MI); + DenseMap::iterator I = + LabelsAfterInsn.find(CurMI); + CurMI = 0; // No label needed. if (I == LabelsAfterInsn.end()) @@ -1543,8 +1497,10 @@ void DwarfDebug::identifyScopeMarkers() { const SmallVectorImpl &Children = S->getChildren(); if (!Children.empty()) - for (SmallVectorImpl::const_iterator SI = Children.begin(), - SE = Children.end(); SI != SE; ++SI) + for (SmallVectorImpl::const_iterator + SI = Children.begin(), + SE = Children.end(); + SI != SE; ++SI) WorkList.push_back(*SI); if (S->isAbstractScope()) @@ -1554,7 +1510,8 @@ void DwarfDebug::identifyScopeMarkers() { if (Ranges.empty()) continue; for (SmallVectorImpl::const_iterator RI = Ranges.begin(), - RE = Ranges.end(); RI != RE; ++RI) { + RE = Ranges.end(); + RI != RE; ++RI) { assert(RI->first && "InsnRange does not have first instruction!"); assert(RI->second && "InsnRange does not have second instruction!"); requestLabelBeforeInsn(RI->first); @@ -1590,6 +1547,7 @@ static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) { // Gather pre-function debug information. Assumes being called immediately // after the function entry point has been emitted. void DwarfDebug::beginFunction(const MachineFunction *MF) { + CurFn = MF; // If there's no debug info for the function we're not going to do anything. if (!MMI->hasDebugInfo()) @@ -1606,11 +1564,11 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Make sure that each lexical scope will have a begin/end label. identifyScopeMarkers(); - // Set DwarfCompileUnitID in MCContext to the Compile Unit this function + // Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function // belongs to so that we add to the correct per-cu line table in the // non-asm case. LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); - CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); + DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); assert(TheCU && "Unable to find compile unit!"); if (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) // Use a single line table if we are using .loc and generating assembly. @@ -1817,21 +1775,32 @@ void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { // Gather and emit post-function debug information. void DwarfDebug::endFunction(const MachineFunction *MF) { - if (!MMI->hasDebugInfo() || LScopes.empty()) return; + // Every beginFunction(MF) call should be followed by an endFunction(MF) call, + // though the beginFunction may not be called at all. + // We should handle both cases. + if (CurFn == 0) + CurFn = MF; + else + assert(CurFn == MF); + assert(CurFn != 0); + + if (!MMI->hasDebugInfo() || LScopes.empty()) { + CurFn = 0; + return; + } // Define end label for subprogram. - FunctionEndSym = Asm->GetTempSymbol("func_end", - Asm->getFunctionNumber()); + FunctionEndSym = Asm->GetTempSymbol("func_end", Asm->getFunctionNumber()); // Assumes in correct section after the entry point. Asm->OutStreamer.EmitLabel(FunctionEndSym); - // Set DwarfCompileUnitID in MCContext to default value. + // Set DwarfDwarfCompileUnitID in MCContext to default value. Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); SmallPtrSet ProcessedVars; - collectVariableInfo(MF, ProcessedVars); + collectVariableInfo(ProcessedVars); LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); - CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); + DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); assert(TheCU && "Unable to find compile unit!"); // Construct abstract scopes. @@ -1862,12 +1831,13 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope); - if (!MF->getTarget().Options.DisableFramePointerElim(*MF)) + if (!CurFn->getTarget().Options.DisableFramePointerElim(*CurFn)) TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr); // Clear debug info - for (ScopeVariablesMap::iterator - I = ScopeVariables.begin(), E = ScopeVariables.end(); I != E; ++I) + for (ScopeVariablesMap::iterator I = ScopeVariables.begin(), + E = ScopeVariables.end(); + I != E; ++I) DeleteContainerPointers(I->second); ScopeVariables.clear(); DeleteContainerPointers(CurrentFnArguments); @@ -1877,6 +1847,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { LabelsBeforeInsn.clear(); LabelsAfterInsn.clear(); PrevLabel = NULL; + CurFn = 0; } // Register a source line with debug info. Returns the unique label that was @@ -1912,8 +1883,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, } else llvm_unreachable("Unexpected scope info"); - Src = getOrCreateSourceID(Fn, Dir, - Asm->OutStreamer.getContext().getDwarfCompileUnitID()); + Src = getOrCreateSourceID( + Fn, Dir, Asm->OutStreamer.getContext().getDwarfCompileUnitID()); } Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, 0, Fn); } @@ -1924,8 +1895,7 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, // Compute the size and offset of a DIE. The offset is relative to start of the // CU. It returns the offset after laying out the DIE. -unsigned -DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { +unsigned DwarfFile::computeSizeAndOffset(DIE *Die, unsigned Offset) { // Get the children. const std::vector &Children = Die->getChildren(); @@ -1933,17 +1903,16 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { assignAbbrevNumber(Die->getAbbrev()); // Get the abbreviation for this DIE. - unsigned AbbrevNumber = Die->getAbbrevNumber(); - const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1]; + const DIEAbbrev &Abbrev = Die->getAbbrev(); // Set DIE offset Die->setOffset(Offset); // Start the size with the size of abbreviation code. - Offset += MCAsmInfo::getULEB128Size(AbbrevNumber); + Offset += MCAsmInfo::getULEB128Size(Die->getAbbrevNumber()); - const SmallVectorImpl &Values = Die->getValues(); - const SmallVectorImpl &AbbrevData = Abbrev->getData(); + const SmallVectorImpl &Values = Die->getValues(); + const SmallVectorImpl &AbbrevData = Abbrev.getData(); // Size the DIE attribute values. for (unsigned i = 0, N = Values.size(); i < N; ++i) @@ -1952,7 +1921,7 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { // Size the DIE children if any. if (!Children.empty()) { - assert(Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes && + assert(Abbrev.getChildrenFlag() == dwarf::DW_CHILDREN_yes && "Children flag not set"); for (unsigned j = 0, M = Children.size(); j < M; ++j) @@ -1967,23 +1936,24 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { } // Compute the size and offset for each DIE. -void DwarfUnits::computeSizeAndOffsets() { +void DwarfFile::computeSizeAndOffsets() { // Offset from the first CU in the debug info section is 0 initially. unsigned SecOffset = 0; // Iterate over each compile unit and set the size and offsets for each // DIE within each compile unit. All offsets are CU relative. - for (SmallVectorImpl::iterator I = CUs.begin(), - E = CUs.end(); I != E; ++I) { + for (SmallVectorImpl::const_iterator I = CUs.begin(), + E = CUs.end(); + I != E; ++I) { (*I)->setDebugInfoOffset(SecOffset); // CU-relative offset is reset to 0 here. - unsigned Offset = sizeof(int32_t) + // Length of Unit Info + unsigned Offset = sizeof(int32_t) + // Length of Unit Info (*I)->getHeaderSize(); // Unit-specific headers // EndOffset here is CU-relative, after laying out // all of the CU DIE. - unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset); + unsigned EndOffset = computeSizeAndOffset((*I)->getUnitDie(), Offset); SecOffset += EndOffset; } } @@ -1994,20 +1964,22 @@ void DwarfDebug::emitSectionLabels() { // Dwarf sections base addresses. DwarfInfoSectionSym = - emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info"); - DwarfAbbrevSectionSym = - emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev"); + emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info"); if (useSplitDwarf()) - DwarfAbbrevDWOSectionSym = - emitSectionSym(Asm, TLOF.getDwarfAbbrevDWOSection(), - "section_abbrev_dwo"); + DwarfInfoDWOSectionSym = + emitSectionSym(Asm, TLOF.getDwarfInfoDWOSection(), "section_info_dwo"); + DwarfAbbrevSectionSym = + emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev"); + if (useSplitDwarf()) + DwarfAbbrevDWOSectionSym = emitSectionSym( + Asm, TLOF.getDwarfAbbrevDWOSection(), "section_abbrev_dwo"); emitSectionSym(Asm, TLOF.getDwarfARangesSection()); if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection()) emitSectionSym(Asm, MacroInfo); DwarfLineSectionSym = - emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); + emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); emitSectionSym(Asm, TLOF.getDwarfLocSection()); if (GenerateGnuPubSections) { DwarfGnuPubNamesSectionSym = @@ -2020,39 +1992,35 @@ void DwarfDebug::emitSectionLabels() { } DwarfStrSectionSym = - emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string"); + emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string"); if (useSplitDwarf()) { DwarfStrDWOSectionSym = - emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string"); + emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string"); DwarfAddrSectionSym = - emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec"); + emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec"); } - DwarfDebugRangeSectionSym = emitSectionSym(Asm, TLOF.getDwarfRangesSection(), - "debug_range"); + DwarfDebugRangeSectionSym = + emitSectionSym(Asm, TLOF.getDwarfRangesSection(), "debug_range"); - DwarfDebugLocSectionSym = emitSectionSym(Asm, TLOF.getDwarfLocSection(), - "section_debug_loc"); - - TextSectionSym = emitSectionSym(Asm, TLOF.getTextSection(), "text_begin"); - emitSectionSym(Asm, TLOF.getDataSection()); + DwarfDebugLocSectionSym = + emitSectionSym(Asm, TLOF.getDwarfLocSection(), "section_debug_loc"); } // Recursively emits a debug information entry. -void DwarfDebug::emitDIE(DIE *Die, ArrayRef Abbrevs) { +void DwarfDebug::emitDIE(DIE *Die) { // Get the abbreviation for this DIE. - unsigned AbbrevNumber = Die->getAbbrevNumber(); - const DIEAbbrev *Abbrev = Abbrevs[AbbrevNumber - 1]; + const DIEAbbrev &Abbrev = Die->getAbbrev(); // Emit the code (index) for the abbreviation. if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("Abbrev [" + Twine(AbbrevNumber) + "] 0x" + - Twine::utohexstr(Die->getOffset()) + ":0x" + - Twine::utohexstr(Die->getSize()) + " " + - dwarf::TagString(Abbrev->getTag())); - Asm->EmitULEB128(AbbrevNumber); + Asm->OutStreamer.AddComment("Abbrev [" + Twine(Abbrev.getNumber()) + + "] 0x" + Twine::utohexstr(Die->getOffset()) + + ":0x" + Twine::utohexstr(Die->getSize()) + " " + + dwarf::TagString(Abbrev.getTag())); + Asm->EmitULEB128(Abbrev.getNumber()); - const SmallVectorImpl &Values = Die->getValues(); - const SmallVectorImpl &AbbrevData = Abbrev->getData(); + const SmallVectorImpl &Values = Die->getValues(); + const SmallVectorImpl &AbbrevData = Abbrev.getData(); // Emit the DIE attribute values. for (unsigned i = 0, N = Values.size(); i < N; ++i) { @@ -2078,40 +2046,24 @@ void DwarfDebug::emitDIE(DIE *Die, ArrayRef Abbrevs) { // For DW_FORM_ref_addr, output the offset from beginning of debug info // section. Origin->getOffset() returns the offset from start of the // compile unit. - CompileUnit *CU = CUDieMap.lookup(Origin->getCompileUnit()); + DwarfCompileUnit *CU = CUDieMap.lookup(Origin->getUnit()); assert(CU && "CUDie should belong to a CU."); Addr += CU->getDebugInfoOffset(); if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - Asm->EmitLabelPlusOffset(DwarfInfoSectionSym, Addr, + Asm->EmitLabelPlusOffset(CU->getSectionSym(), Addr, DIEEntry::getRefAddrSize(Asm)); else - Asm->EmitLabelOffsetDifference(DwarfInfoSectionSym, Addr, - DwarfInfoSectionSym, + Asm->EmitLabelOffsetDifference(CU->getSectionSym(), Addr, + CU->getSectionSym(), DIEEntry::getRefAddrSize(Asm)); } else { // Make sure Origin belong to the same CU. - assert(Die->getCompileUnit() == Origin->getCompileUnit() && + assert(Die->getUnit() == Origin->getUnit() && "The referenced DIE should belong to the same CU in ref4"); Asm->EmitInt32(Addr); } break; } - case dwarf::DW_AT_ranges: { - // DW_AT_range Value encodes offset in debug_range section. - DIEInteger *V = cast(Values[i]); - - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) { - Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym, - V->getValue(), - 4); - } else { - Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym, - V->getValue(), - DwarfDebugRangeSectionSym, - 4); - } - break; - } case dwarf::DW_AT_location: { if (DIELabel *L = dyn_cast(Values[i])) { if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) @@ -2139,79 +2091,67 @@ void DwarfDebug::emitDIE(DIE *Die, ArrayRef Abbrevs) { } // Emit the DIE children if any. - if (Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes) { + if (Abbrev.getChildrenFlag() == dwarf::DW_CHILDREN_yes) { const std::vector &Children = Die->getChildren(); for (unsigned j = 0, M = Children.size(); j < M; ++j) - emitDIE(Children[j], Abbrevs); + emitDIE(Children[j]); - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("End Of Children Mark"); + Asm->OutStreamer.AddComment("End Of Children Mark"); Asm->EmitInt8(0); } } // Emit the various dwarf units to the unit section USection with // the abbreviations going into ASection. -void DwarfUnits::emitUnits(DwarfDebug *DD, - const MCSection *USection, - const MCSection *ASection, - const MCSymbol *ASectionSym) { - Asm->OutStreamer.SwitchSection(USection); - for (SmallVectorImpl::iterator I = CUs.begin(), - E = CUs.end(); I != E; ++I) { - CompileUnit *TheCU = *I; - DIE *Die = TheCU->getCUDie(); +void DwarfFile::emitUnits(DwarfDebug *DD, const MCSection *ASection, + const MCSymbol *ASectionSym) { + for (SmallVectorImpl::iterator I = CUs.begin(), E = CUs.end(); + I != E; ++I) { + DwarfUnit *TheU = *I; + DIE *Die = TheU->getUnitDie(); + const MCSection *USection = TheU->getSection(); + Asm->OutStreamer.SwitchSection(USection); // Emit the compile units header. - Asm->OutStreamer - .EmitLabel(Asm->GetTempSymbol(USection->getLabelBeginName(), - TheCU->getUniqueID())); + Asm->OutStreamer.EmitLabel(TheU->getLabelBegin()); // Emit size of content not including length itself Asm->OutStreamer.AddComment("Length of Unit"); - Asm->EmitInt32(TheCU->getHeaderSize() + Die->getSize()); + Asm->EmitInt32(TheU->getHeaderSize() + Die->getSize()); - TheCU->emitHeader(ASection, ASectionSym); + TheU->emitHeader(ASection, ASectionSym); - DD->emitDIE(Die, Abbreviations); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(USection->getLabelEndName(), - TheCU->getUniqueID())); + DD->emitDIE(Die); + Asm->OutStreamer.EmitLabel(TheU->getLabelEnd()); } } // Emit the debug info section. void DwarfDebug::emitDebugInfo() { - DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; - Holder.emitUnits(this, Asm->getObjFileLowering().getDwarfInfoSection(), - Asm->getObjFileLowering().getDwarfAbbrevSection(), + Holder.emitUnits(this, Asm->getObjFileLowering().getDwarfAbbrevSection(), DwarfAbbrevSectionSym); } // Emit the abbreviation section. void DwarfDebug::emitAbbreviations() { - if (!useSplitDwarf()) - emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection(), - &Abbreviations); - else - emitSkeletonAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection()); + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + + Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection()); } -void DwarfDebug::emitAbbrevs(const MCSection *Section, - std::vector *Abbrevs) { +void DwarfFile::emitAbbrevs(const MCSection *Section) { // Check to see if it is worth the effort. - if (!Abbrevs->empty()) { + if (!Abbreviations.empty()) { // Start the debug abbrev section. Asm->OutStreamer.SwitchSection(Section); - MCSymbol *Begin = Asm->GetTempSymbol(Section->getLabelBeginName()); - Asm->OutStreamer.EmitLabel(Begin); - // For each abbrevation. - for (unsigned i = 0, N = Abbrevs->size(); i < N; ++i) { + for (unsigned i = 0, N = Abbreviations.size(); i < N; ++i) { // Get abbreviation data - const DIEAbbrev *Abbrev = Abbrevs->at(i); + const DIEAbbrev *Abbrev = Abbreviations[i]; // Emit the abbrevations code (base 1 index.) Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code"); @@ -2222,9 +2162,6 @@ void DwarfDebug::emitAbbrevs(const MCSection *Section, // Mark end of abbreviations. Asm->EmitULEB128(0, "EOM(3)"); - - MCSymbol *End = Asm->GetTempSymbol(Section->getLabelEndName()); - Asm->OutStreamer.EmitLabel(End); } } @@ -2241,8 +2178,9 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { Asm->OutStreamer.AddComment("Section end label"); - Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd), - Asm->getDataLayout().getPointerSize()); + Asm->OutStreamer.EmitSymbolValue( + Asm->GetTempSymbol("section_end", SectionEnd), + Asm->getDataLayout().getPointerSize()); // Mark end of matrix. Asm->OutStreamer.AddComment("DW_LNE_end_sequence"); @@ -2253,25 +2191,29 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { // Emit visible names into a hashed accelerator table section. void DwarfDebug::emitAccelNames() { - DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, - dwarf::DW_FORM_data4)); - for (DenseMap::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) { - CompileUnit *TheCU = I->second; - const StringMap > &Names = TheCU->getAccelNames(); - for (StringMap >::const_iterator - GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { + DwarfAccelTable AT( + DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); + for (SmallVectorImpl::const_iterator I = getUnits().begin(), + E = getUnits().end(); + I != E; ++I) { + DwarfUnit *TheU = *I; + const StringMap > &Names = TheU->getAccelNames(); + for (StringMap >::const_iterator + GI = Names.begin(), + GE = Names.end(); + GI != GE; ++GI) { StringRef Name = GI->getKey(); - const std::vector &Entities = GI->second; - for (std::vector::const_iterator DI = Entities.begin(), - DE = Entities.end(); DI != DE; ++DI) - AT.AddName(Name, (*DI)); + const std::vector &Entities = GI->second; + for (std::vector::const_iterator DI = Entities.begin(), + DE = Entities.end(); + DI != DE; ++DI) + AT.AddName(Name, *DI); } } AT.FinalizeTable(Asm, "Names"); Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfAccelNamesSection()); + Asm->getObjFileLowering().getDwarfAccelNamesSection()); MCSymbol *SectionBegin = Asm->GetTempSymbol("names_begin"); Asm->OutStreamer.EmitLabel(SectionBegin); @@ -2282,25 +2224,29 @@ void DwarfDebug::emitAccelNames() { // Emit objective C classes and categories into a hashed accelerator table // section. void DwarfDebug::emitAccelObjC() { - DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, - dwarf::DW_FORM_data4)); - for (DenseMap::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) { - CompileUnit *TheCU = I->second; - const StringMap > &Names = TheCU->getAccelObjC(); - for (StringMap >::const_iterator - GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { + DwarfAccelTable AT( + DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); + for (SmallVectorImpl::const_iterator I = getUnits().begin(), + E = getUnits().end(); + I != E; ++I) { + DwarfUnit *TheU = *I; + const StringMap > &Names = TheU->getAccelObjC(); + for (StringMap >::const_iterator + GI = Names.begin(), + GE = Names.end(); + GI != GE; ++GI) { StringRef Name = GI->getKey(); - const std::vector &Entities = GI->second; - for (std::vector::const_iterator DI = Entities.begin(), - DE = Entities.end(); DI != DE; ++DI) - AT.AddName(Name, (*DI)); + const std::vector &Entities = GI->second; + for (std::vector::const_iterator DI = Entities.begin(), + DE = Entities.end(); + DI != DE; ++DI) + AT.AddName(Name, *DI); } } AT.FinalizeTable(Asm, "ObjC"); - Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() - .getDwarfAccelObjCSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfAccelObjCSection()); MCSymbol *SectionBegin = Asm->GetTempSymbol("objc_begin"); Asm->OutStreamer.EmitLabel(SectionBegin); @@ -2310,25 +2256,30 @@ void DwarfDebug::emitAccelObjC() { // Emit namespace dies into a hashed accelerator table. void DwarfDebug::emitAccelNamespaces() { - DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, - dwarf::DW_FORM_data4)); - for (DenseMap::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) { - CompileUnit *TheCU = I->second; - const StringMap > &Names = TheCU->getAccelNamespace(); - for (StringMap >::const_iterator - GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { + DwarfAccelTable AT( + DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); + for (SmallVectorImpl::const_iterator I = getUnits().begin(), + E = getUnits().end(); + I != E; ++I) { + DwarfUnit *TheU = *I; + const StringMap > &Names = + TheU->getAccelNamespace(); + for (StringMap >::const_iterator + GI = Names.begin(), + GE = Names.end(); + GI != GE; ++GI) { StringRef Name = GI->getKey(); - const std::vector &Entities = GI->second; - for (std::vector::const_iterator DI = Entities.begin(), - DE = Entities.end(); DI != DE; ++DI) - AT.AddName(Name, (*DI)); + const std::vector &Entities = GI->second; + for (std::vector::const_iterator DI = Entities.begin(), + DE = Entities.end(); + DI != DE; ++DI) + AT.AddName(Name, *DI); } } AT.FinalizeTable(Asm, "namespac"); - Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() - .getDwarfAccelNamespaceSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfAccelNamespaceSection()); MCSymbol *SectionBegin = Asm->GetTempSymbol("namespac_begin"); Asm->OutStreamer.EmitLabel(SectionBegin); @@ -2339,31 +2290,38 @@ void DwarfDebug::emitAccelNamespaces() { // Emit type dies into a hashed accelerator table. void DwarfDebug::emitAccelTypes() { std::vector Atoms; - Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, - dwarf::DW_FORM_data4)); - Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, - dwarf::DW_FORM_data2)); - Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, - dwarf::DW_FORM_data1)); + Atoms.push_back( + DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); + Atoms.push_back( + DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2)); + Atoms.push_back( + DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)); DwarfAccelTable AT(Atoms); - for (DenseMap::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) { - CompileUnit *TheCU = I->second; - const StringMap > > &Names - = TheCU->getAccelTypes(); - for (StringMap > >::const_iterator - GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { + for (SmallVectorImpl::const_iterator I = getUnits().begin(), + E = getUnits().end(); + I != E; ++I) { + DwarfUnit *TheU = *I; + const StringMap > > &Names = + TheU->getAccelTypes(); + for (StringMap< + std::vector > >::const_iterator + GI = Names.begin(), + GE = Names.end(); + GI != GE; ++GI) { StringRef Name = GI->getKey(); - const std::vector > &Entities = GI->second; - for (std::vector >::const_iterator DI - = Entities.begin(), DE = Entities.end(); DI !=DE; ++DI) - AT.AddName(Name, (*DI).first, (*DI).second); + const std::vector > &Entities = + GI->second; + for (std::vector >::const_iterator + DI = Entities.begin(), + DE = Entities.end(); + DI != DE; ++DI) + AT.AddName(Name, DI->first, DI->second); } } AT.FinalizeTable(Asm, "types"); - Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() - .getDwarfAccelTypesSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfAccelTypesSection()); MCSymbol *SectionBegin = Asm->GetTempSymbol("types_begin"); Asm->OutStreamer.EmitLabel(SectionBegin); @@ -2386,8 +2344,8 @@ void DwarfDebug::emitAccelTypes() { // reference in the pubname header doesn't change. /// computeIndexValue - Compute the gdb index value for the DIE and CU. -static dwarf::PubIndexEntryDescriptor computeIndexValue(CompileUnit *CU, - DIE *Die) { +static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU, + const DIE *Die) { dwarf::GDBIndexEntryLinkage Linkage = dwarf::GIEL_STATIC; // We could have a specification DIE that has our most of our knowledge, @@ -2431,135 +2389,124 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(CompileUnit *CU, /// emitDebugPubNames - Emit visible names into a debug pubnames section. /// void DwarfDebug::emitDebugPubNames(bool GnuStyle) { - const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); const MCSection *PSec = GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection() : Asm->getObjFileLowering().getDwarfPubNamesSection(); - typedef DenseMap CUMapType; - for (CUMapType::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { - CompileUnit *TheCU = I->second; - unsigned ID = TheCU->getUniqueID(); + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + const SmallVectorImpl &Units = Holder.getUnits(); + for (unsigned i = 0; i != Units.size(); ++i) { + DwarfUnit *TheU = Units[i]; + unsigned ID = TheU->getUniqueID(); // Start the dwarf pubnames section. Asm->OutStreamer.SwitchSection(PSec); // Emit a label so we can reference the beginning of this pubname section. if (GnuStyle) - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("gnu_pubnames", - TheCU->getUniqueID())); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("gnu_pubnames", ID)); // Emit the header. Asm->OutStreamer.AddComment("Length of Public Names Info"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("pubnames_end", ID), - Asm->GetTempSymbol("pubnames_begin", ID), 4); + MCSymbol *BeginLabel = Asm->GetTempSymbol("pubnames_begin", ID); + MCSymbol *EndLabel = Asm->GetTempSymbol("pubnames_end", ID); + Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", ID)); + Asm->OutStreamer.EmitLabel(BeginLabel); Asm->OutStreamer.AddComment("DWARF Version"); Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION); Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), - DwarfInfoSectionSym); + Asm->EmitSectionOffset(TheU->getLabelBegin(), TheU->getSectionSym()); Asm->OutStreamer.AddComment("Compilation Unit Length"); - Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(), ID), - Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), - 4); + Asm->EmitLabelDifference(TheU->getLabelEnd(), TheU->getLabelBegin(), 4); // Emit the pubnames for this compilation unit. - const StringMap &Globals = TheCU->getGlobalNames(); - for (StringMap::const_iterator - GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { + const StringMap &Globals = getUnits()[ID]->getGlobalNames(); + for (StringMap::const_iterator GI = Globals.begin(), + GE = Globals.end(); + GI != GE; ++GI) { const char *Name = GI->getKeyData(); - DIE *Entity = GI->second; + const DIE *Entity = GI->second; Asm->OutStreamer.AddComment("DIE offset"); Asm->EmitInt32(Entity->getOffset()); if (GnuStyle) { - dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheCU, Entity); + dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity); Asm->OutStreamer.AddComment( Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); Asm->EmitInt8(Desc.toBits()); } - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("External Name"); - Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1)); + Asm->OutStreamer.AddComment("External Name"); + Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength() + 1)); } Asm->OutStreamer.AddComment("End Mark"); Asm->EmitInt32(0); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", ID)); + Asm->OutStreamer.EmitLabel(EndLabel); } } void DwarfDebug::emitDebugPubTypes(bool GnuStyle) { - const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); const MCSection *PSec = GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection() : Asm->getObjFileLowering().getDwarfPubTypesSection(); - for (DenseMap::iterator I = CUMap.begin(), - E = CUMap.end(); - I != E; ++I) { - CompileUnit *TheCU = I->second; + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + const SmallVectorImpl &Units = Holder.getUnits(); + for (unsigned i = 0; i != Units.size(); ++i) { + DwarfUnit *TheU = Units[i]; + unsigned ID = TheU->getUniqueID(); + // Start the dwarf pubtypes section. Asm->OutStreamer.SwitchSection(PSec); // Emit a label so we can reference the beginning of this pubtype section. if (GnuStyle) - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("gnu_pubtypes", - TheCU->getUniqueID())); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("gnu_pubtypes", ID)); // Emit the header. Asm->OutStreamer.AddComment("Length of Public Types Info"); - Asm->EmitLabelDifference( - Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()), - Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4); + MCSymbol *BeginLabel = Asm->GetTempSymbol("pubtypes_begin", ID); + MCSymbol *EndLabel = Asm->GetTempSymbol("pubtypes_end", ID); + Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); - Asm->OutStreamer.EmitLabel( - Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID())); + Asm->OutStreamer.EmitLabel(BeginLabel); - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("DWARF Version"); + Asm->OutStreamer.AddComment("DWARF Version"); Asm->EmitInt16(dwarf::DW_PUBTYPES_VERSION); Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - Asm->EmitSectionOffset( - Asm->GetTempSymbol(ISec->getLabelBeginName(), TheCU->getUniqueID()), - DwarfInfoSectionSym); + Asm->EmitSectionOffset(TheU->getLabelBegin(), TheU->getSectionSym()); Asm->OutStreamer.AddComment("Compilation Unit Length"); - Asm->EmitLabelDifference( - Asm->GetTempSymbol(ISec->getLabelEndName(), TheCU->getUniqueID()), - Asm->GetTempSymbol(ISec->getLabelBeginName(), TheCU->getUniqueID()), 4); + Asm->EmitLabelDifference(TheU->getLabelEnd(), TheU->getLabelBegin(), 4); // Emit the pubtypes. - const StringMap &Globals = TheCU->getGlobalTypes(); - for (StringMap::const_iterator GI = Globals.begin(), - GE = Globals.end(); + const StringMap &Globals = getUnits()[ID]->getGlobalTypes(); + for (StringMap::const_iterator GI = Globals.begin(), + GE = Globals.end(); GI != GE; ++GI) { const char *Name = GI->getKeyData(); - DIE *Entity = GI->second; + const DIE *Entity = GI->second; - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("DIE offset"); + Asm->OutStreamer.AddComment("DIE offset"); Asm->EmitInt32(Entity->getOffset()); if (GnuStyle) { - dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheCU, Entity); + dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity); Asm->OutStreamer.AddComment( Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); Asm->EmitInt8(Desc.toBits()); } - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("External Name"); + Asm->OutStreamer.AddComment("External Name"); // Emit the name with a terminating null byte. Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength() + 1)); @@ -2567,28 +2514,30 @@ void DwarfDebug::emitDebugPubTypes(bool GnuStyle) { Asm->OutStreamer.AddComment("End Mark"); Asm->EmitInt32(0); - Asm->OutStreamer.EmitLabel( - Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID())); + Asm->OutStreamer.EmitLabel(EndLabel); } } // Emit strings into a string section. -void DwarfUnits::emitStrings(const MCSection *StrSection, - const MCSection *OffsetSection = NULL, - const MCSymbol *StrSecSym = NULL) { +void DwarfFile::emitStrings(const MCSection *StrSection, + const MCSection *OffsetSection = NULL, + const MCSymbol *StrSecSym = NULL) { - if (StringPool.empty()) return; + if (StringPool.empty()) + return; // Start the dwarf str section. Asm->OutStreamer.SwitchSection(StrSection); // Get all of the string pool entries and put them in an array by their ID so // we can sort them. - SmallVector >*>, 64> Entries; + SmallVector< + std::pair > *>, + 64> Entries; - for (StringMap >::iterator - I = StringPool.begin(), E = StringPool.end(); + for (StringMap >::iterator + I = StringPool.begin(), + E = StringPool.end(); I != E; ++I) Entries.push_back(std::make_pair(I->second.second, &*I)); @@ -2599,8 +2548,9 @@ void DwarfUnits::emitStrings(const MCSection *StrSection, Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first); // Emit the string itself with a terminating null byte. - Asm->OutStreamer.EmitBytes(StringRef(Entries[i].second->getKeyData(), - Entries[i].second->getKeyLength()+1)); + Asm->OutStreamer.EmitBytes( + StringRef(Entries[i].second->getKeyData(), + Entries[i].second->getKeyLength() + 1)); } // If we've got an offset section go ahead and emit that now as well. @@ -2615,10 +2565,11 @@ void DwarfUnits::emitStrings(const MCSection *StrSection, } } -// Emit strings into a string section. -void DwarfUnits::emitAddresses(const MCSection *AddrSection) { +// Emit addresses into the section given. +void DwarfFile::emitAddresses(const MCSection *AddrSection) { - if (AddressPool.empty()) return; + if (AddressPool.empty()) + return; // Start the dwarf addr section. Asm->OutStreamer.SwitchSection(AddrSection); @@ -2638,12 +2589,11 @@ void DwarfUnits::emitAddresses(const MCSection *AddrSection) { else Asm->OutStreamer.EmitIntValue(0, Asm->getDataLayout().getPointerSize()); } - } // Emit visible names into a debug str section. void DwarfDebug::emitDebugStr() { - DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection()); } @@ -2653,24 +2603,27 @@ void DwarfDebug::emitDebugLoc() { return; for (SmallVectorImpl::iterator - I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); + I = DotDebugLocEntries.begin(), + E = DotDebugLocEntries.end(); I != E; ++I) { DotDebugLocEntry &Entry = *I; if (I + 1 != DotDebugLocEntries.end()) - Entry.Merge(I+1); + Entry.Merge(I + 1); } // Start the dwarf loc section. Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfLocSection()); + Asm->getObjFileLowering().getDwarfLocSection()); unsigned char Size = Asm->getDataLayout().getPointerSize(); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0)); unsigned index = 1; for (SmallVectorImpl::iterator - I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); + I = DotDebugLocEntries.begin(), + E = DotDebugLocEntries.end(); I != E; ++I, ++index) { DotDebugLocEntry &Entry = *I; - if (Entry.isMerged()) continue; + if (Entry.isMerged()) + continue; if (Entry.isEmpty()) { Asm->OutStreamer.EmitIntValue(0, Size); Asm->OutStreamer.EmitIntValue(0, Size); @@ -2686,9 +2639,8 @@ void DwarfDebug::emitDebugLoc() { Asm->OutStreamer.EmitLabel(begin); if (Entry.isInt()) { DIBasicType BTy(DV.getType()); - if (BTy.Verify() && - (BTy.getEncoding() == dwarf::DW_ATE_signed - || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) { + if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed || + BTy.getEncoding() == dwarf::DW_ATE_signed_char)) { Asm->OutStreamer.AddComment("DW_OP_consts"); Asm->EmitInt8(dwarf::DW_OP_consts); Asm->EmitSLEB128(Entry.getInt()); @@ -2751,7 +2703,7 @@ struct SymbolCUSorter { SymbolCUSorter(const MCStreamer &s) : Streamer(s) {} const MCStreamer &Streamer; - bool operator() (const SymbolCU &A, const SymbolCU &B) { + bool operator()(const SymbolCU &A, const SymbolCU &B) { unsigned IA = A.Sym ? Streamer.GetSymbolOrder(A.Sym) : 0; unsigned IB = B.Sym ? Streamer.GetSymbolOrder(B.Sym) : 0; @@ -2765,8 +2717,8 @@ struct SymbolCUSorter { } }; -static bool CUSort(const CompileUnit *A, const CompileUnit *B) { - return (A->getUniqueID() < B->getUniqueID()); +static bool CUSort(const DwarfUnit *A, const DwarfUnit *B) { + return (A->getUniqueID() < B->getUniqueID()); } struct ArangeSpan { @@ -2777,10 +2729,10 @@ struct ArangeSpan { // address we can tie back to a CU. void DwarfDebug::emitDebugARanges() { // Start the dwarf aranges section. - Asm->OutStreamer - .SwitchSection(Asm->getObjFileLowering().getDwarfARangesSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfARangesSection()); - typedef DenseMap > SpansType; + typedef DenseMap > SpansType; SpansType Spans; @@ -2797,7 +2749,7 @@ void DwarfDebug::emitDebugARanges() { std::sort(Sections.begin(), Sections.end(), SectionSort); // Build a set of address spans, sorted by CU. - for (size_t SecIdx=0;SecIdx &List = SectionMap[Section]; if (List.size() < 2) @@ -2838,13 +2790,12 @@ void DwarfDebug::emitDebugARanges() { } } - const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); unsigned PtrSize = Asm->getDataLayout().getPointerSize(); // Build a list of CUs used. - std::vector CUs; + std::vector CUs; for (SpansType::iterator it = Spans.begin(); it != Spans.end(); it++) { - CompileUnit *CU = it->first; + DwarfCompileUnit *CU = it->first; CUs.push_back(CU); } @@ -2852,16 +2803,16 @@ void DwarfDebug::emitDebugARanges() { std::sort(CUs.begin(), CUs.end(), CUSort); // Emit an arange table for each CU we used. - for (size_t CUIdx=0;CUIdx &List = Spans[CU]; // Emit size of content not including length itself. - unsigned ContentSize - = sizeof(int16_t) // DWARF ARange version number - + sizeof(int32_t) // Offset of CU in the .debug_info section - + sizeof(int8_t) // Pointer Size (in bytes) - + sizeof(int8_t); // Segment Size (in bytes) + unsigned ContentSize = + sizeof(int16_t) + // DWARF ARange version number + sizeof(int32_t) + // Offset of CU in the .debug_info section + sizeof(int8_t) + // Pointer Size (in bytes) + sizeof(int8_t); // Segment Size (in bytes) unsigned TupleSize = PtrSize * 2; @@ -2879,9 +2830,7 @@ void DwarfDebug::emitDebugARanges() { Asm->OutStreamer.AddComment("DWARF Arange version number"); Asm->EmitInt16(dwarf::DW_ARANGES_VERSION); Asm->OutStreamer.AddComment("Offset Into Debug Info Section"); - Asm->EmitSectionOffset( - Asm->GetTempSymbol(ISec->getLabelBeginName(), CU->getUniqueID()), - DwarfInfoSectionSym); + Asm->EmitSectionOffset(CU->getLocalLabelBegin(), CU->getLocalSectionSym()); Asm->OutStreamer.AddComment("Address Size (in bytes)"); Asm->EmitInt8(PtrSize); Asm->OutStreamer.AddComment("Segment Size (in bytes)"); @@ -2917,25 +2866,67 @@ void DwarfDebug::emitDebugARanges() { // Emit visible names into a debug ranges section. void DwarfDebug::emitDebugRanges() { // Start the dwarf ranges section. - Asm->OutStreamer - .SwitchSection(Asm->getObjFileLowering().getDwarfRangesSection()); - unsigned char Size = Asm->getDataLayout().getPointerSize(); - for (SmallVectorImpl::iterator - I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); - I != E; ++I) { - if (*I) - Asm->OutStreamer.EmitSymbolValue(const_cast(*I), Size); - else - Asm->OutStreamer.EmitIntValue(0, Size); - } -} + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfRangesSection()); -// Emit visible names into a debug macinfo section. -void DwarfDebug::emitDebugMacInfo() { - if (const MCSection *LineInfo = - Asm->getObjFileLowering().getDwarfMacroInfoSection()) { - // Start the dwarf macinfo section. - Asm->OutStreamer.SwitchSection(LineInfo); + // Size for our labels. + unsigned char Size = Asm->getDataLayout().getPointerSize(); + + // Grab the specific ranges for the compile units in the module. + for (DenseMap::iterator I = CUMap.begin(), + E = CUMap.end(); + I != E; ++I) { + DwarfCompileUnit *TheCU = I->second; + + // Emit a symbol so we can find the beginning of our ranges. + Asm->OutStreamer.EmitLabel(TheCU->getLabelRange()); + + // Iterate over the misc ranges for the compile units in the module. + const SmallVectorImpl &RangeLists = TheCU->getRangeLists(); + for (SmallVectorImpl::const_iterator I = RangeLists.begin(), + E = RangeLists.end(); + I != E; ++I) { + const RangeSpanList &List = *I; + + // Emit our symbol so we can find the beginning of the range. + Asm->OutStreamer.EmitLabel(List.getSym()); + + for (SmallVectorImpl::const_iterator + RI = List.getRanges().begin(), + RE = List.getRanges().end(); + RI != RE; ++RI) { + const RangeSpan &Range = *RI; + const MCSymbol *Begin = Range.getStart(); + const MCSymbol *End = Range.getEnd(); + assert(Begin && "Range without a begin symbol?"); + assert(End && "Range without an end symbol?"); + Asm->OutStreamer.EmitSymbolValue(Begin, Size); + Asm->OutStreamer.EmitSymbolValue(End, Size); + } + + // And terminate the list with two 0 values. + Asm->OutStreamer.EmitIntValue(0, Size); + Asm->OutStreamer.EmitIntValue(0, Size); + } + + // Now emit a range for the CU itself. + if (DwarfCURanges) { + Asm->OutStreamer.EmitLabel( + Asm->GetTempSymbol("cu_ranges", TheCU->getUniqueID())); + const SmallVectorImpl &Ranges = TheCU->getRanges(); + for (uint32_t i = 0, e = Ranges.size(); i != e; ++i) { + RangeSpan Range = Ranges[i]; + const MCSymbol *Begin = Range.getStart(); + const MCSymbol *End = Range.getEnd(); + assert(Begin && "Range without a begin symbol?"); + assert(End && "Range without an end symbol?"); + Asm->OutStreamer.EmitSymbolValue(Begin, Size); + Asm->OutStreamer.EmitSymbolValue(End, Size); + } + // And terminate the list with two 0 values. + Asm->OutStreamer.EmitIntValue(0, Size); + Asm->OutStreamer.EmitIntValue(0, Size); + } } } @@ -2944,11 +2935,14 @@ void DwarfDebug::emitDebugMacInfo() { // This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list, // DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id, // DW_AT_ranges_base, DW_AT_addr_base. -CompileUnit *DwarfDebug::constructSkeletonCU(const CompileUnit *CU) { +// TODO: Implement DW_AT_ranges_base. +DwarfCompileUnit *DwarfDebug::constructSkeletonCU(const DwarfCompileUnit *CU) { DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - CompileUnit *NewCU = new CompileUnit(CU->getUniqueID(), Die, CU->getNode(), - Asm, this, &SkeletonHolder); + DwarfCompileUnit *NewCU = new DwarfCompileUnit( + CU->getUniqueID(), Die, CU->getNode(), Asm, this, &SkeletonHolder); + NewCU->initSection(Asm->getObjFileLowering().getDwarfInfoSection(), + DwarfInfoSectionSym); NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, CU->getNode().getSplitDebugFilename()); @@ -2956,73 +2950,34 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const CompileUnit *CU) { // Relocate to the beginning of the addr_base section, else 0 for the // beginning of the one for this compile unit. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, - DwarfAddrSectionSym); + NewCU->addSectionLabel(Die, dwarf::DW_AT_GNU_addr_base, + DwarfAddrSectionSym); else - NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, - dwarf::DW_FORM_sec_offset, 0); - - // 2.17.1 requires that we use DW_AT_low_pc for a single entry point - // into an entity. We're using 0, or a NULL label for this. - NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); + NewCU->addSectionOffset(Die, dwarf::DW_AT_GNU_addr_base, 0); // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. // FIXME: Should handle multiple compile units. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, - DwarfLineSectionSym); + NewCU->addSectionLabel(Die, dwarf::DW_AT_stmt_list, DwarfLineSectionSym); else - NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, 0); + NewCU->addSectionOffset(Die, dwarf::DW_AT_stmt_list, 0); if (!CompilationDir.empty()) NewCU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir); - // Flags to let the linker know we have emitted new style pubnames. - if (GenerateGnuPubSections) { - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_GNU_pubnames, dwarf::DW_FORM_sec_offset, - Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID())); - else - NewCU->addDelta(Die, dwarf::DW_AT_GNU_pubnames, dwarf::DW_FORM_data4, - Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID()), - DwarfGnuPubNamesSectionSym); - - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_GNU_pubtypes, dwarf::DW_FORM_sec_offset, - Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID())); - else - NewCU->addDelta(Die, dwarf::DW_AT_GNU_pubtypes, dwarf::DW_FORM_data4, - Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID()), - DwarfGnuPubTypesSectionSym); - } - - // Flag if we've emitted any ranges and their location for the compile unit. - if (DebugRangeSymbols.size()) { - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_GNU_ranges_base, - dwarf::DW_FORM_sec_offset, DwarfDebugRangeSectionSym); - else - NewCU->addUInt(Die, dwarf::DW_AT_GNU_ranges_base, dwarf::DW_FORM_data4, - 0); - } + addGnuPubAttributes(NewCU, Die); SkeletonHolder.addUnit(NewCU); - SkeletonCUs.push_back(NewCU); return NewCU; } -void DwarfDebug::emitSkeletonAbbrevs(const MCSection *Section) { - assert(useSplitDwarf() && "No split dwarf debug info?"); - emitAbbrevs(Section, &SkeletonAbbrevs); -} - // Emit the .debug_info.dwo section for separated dwarf. This contains the // compile units that would normally be in debug_info. void DwarfDebug::emitDebugInfoDWO() { assert(useSplitDwarf() && "No split dwarf debug info?"); - InfoHolder.emitUnits(this, Asm->getObjFileLowering().getDwarfInfoDWOSection(), + InfoHolder.emitUnits(this, Asm->getObjFileLowering().getDwarfAbbrevDWOSection(), DwarfAbbrevDWOSectionSym); } @@ -3031,8 +2986,7 @@ void DwarfDebug::emitDebugInfoDWO() { // abbreviations for the .debug_info.dwo section. void DwarfDebug::emitDebugAbbrevDWO() { assert(useSplitDwarf() && "No split dwarf?"); - emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection(), - &Abbreviations); + InfoHolder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection()); } // Emit the .debug_str.dwo section for separated dwarf. This contains the @@ -3040,9 +2994,45 @@ void DwarfDebug::emitDebugAbbrevDWO() { // sections. void DwarfDebug::emitDebugStrDWO() { assert(useSplitDwarf() && "No split dwarf?"); - const MCSection *OffSec = Asm->getObjFileLowering() - .getDwarfStrOffDWOSection(); + const MCSection *OffSec = + Asm->getObjFileLowering().getDwarfStrOffDWOSection(); const MCSymbol *StrSym = DwarfStrSectionSym; InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(), OffSec, StrSym); } + +void DwarfDebug::addDwarfTypeUnitType(uint16_t Language, StringRef Identifier, + DIE *RefDie, DICompositeType CTy) { + const DwarfTypeUnit *&TU = DwarfTypeUnits[CTy]; + if (!TU) { + DIE *UnitDie = new DIE(dwarf::DW_TAG_type_unit); + DwarfTypeUnit *NewTU = + new DwarfTypeUnit(InfoHolder.getUnits().size(), UnitDie, Language, Asm, + this, &InfoHolder); + TU = NewTU; + InfoHolder.addUnit(NewTU); + + NewTU->addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2, + Language); + + DIE *Die = NewTU->createTypeDIE(CTy); + + MD5 Hash; + Hash.update(Identifier); + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, swap bytes + // appropriately. + MD5::MD5Result Result; + Hash.final(Result); + uint64_t Signature = *reinterpret_cast(Result + 8); + NewTU->setTypeSignature(Signature); + NewTU->setType(Die); + + NewTU->initSection( + useSplitDwarf() + ? Asm->getObjFileLowering().getDwarfTypesDWOSection(Signature) + : Asm->getObjFileLowering().getDwarfTypesSection(Signature)); + } + + CUMap.begin()->second->addDIETypeSignature(RefDie, *TU); +} diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index cebac39a19b0..84d9cae7a1d9 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -14,6 +14,7 @@ #ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__ #define CODEGEN_ASMPRINTER_DWARFDEBUG_H__ +#include "AsmPrinterHandler.h" #include "DIE.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" @@ -29,7 +30,8 @@ namespace llvm { -class CompileUnit; +class DwarfUnit; +class DwarfCompileUnit; class ConstantInt; class ConstantFP; class DbgVariable; @@ -37,6 +39,7 @@ class MachineFrameInfo; class MachineModuleInfo; class MachineOperand; class MCAsmInfo; +class MCObjectFileInfo; class DIEAbbrev; class DIE; class DIEBlock; @@ -45,13 +48,13 @@ class DIEEntry; //===----------------------------------------------------------------------===// /// \brief This class is used to record source line correspondence. class SrcLineInfo { - unsigned Line; // Source line number. - unsigned Column; // Source column. - unsigned SourceID; // Source ID number. - MCSymbol *Label; // Label in code ID number. + unsigned Line; // Source line number. + unsigned Column; // Source column. + unsigned SourceID; // Source ID number. + MCSymbol *Label; // Label in code ID number. public: SrcLineInfo(unsigned L, unsigned C, unsigned S, MCSymbol *label) - : Line(L), Column(C), SourceID(S), Label(label) {} + : Line(L), Column(C), SourceID(S), Label(label) {} // Accessors unsigned getLine() const { return Line; } @@ -128,12 +131,12 @@ public: Next->Begin = Begin; Merged = true; } - bool isLocation() const { return EntryKind == E_Location; } - bool isInt() const { return EntryKind == E_Integer; } - bool isConstantFP() const { return EntryKind == E_ConstantFP; } + bool isLocation() const { return EntryKind == E_Location; } + bool isInt() const { return EntryKind == E_Integer; } + bool isConstantFP() const { return EntryKind == E_ConstantFP; } bool isConstantInt() const { return EntryKind == E_ConstantInt; } - int64_t getInt() const { return Constants.Int; } - const ConstantFP *getConstantFP() const { return Constants.CFP; } + int64_t getInt() const { return Constants.Int; } + const ConstantFP *getConstantFP() const { return Constants.CFP; } const ConstantInt *getConstantInt() const { return Constants.CIP; } const MDNode *getVariable() const { return Variable; } const MCSymbol *getBeginSym() const { return Begin; } @@ -144,40 +147,41 @@ public: //===----------------------------------------------------------------------===// /// \brief This class is used to track local variable information. class DbgVariable { - DIVariable Var; // Variable Descriptor. - DIE *TheDIE; // Variable DIE. - unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. - DbgVariable *AbsVar; // Corresponding Abstract variable, if any. - const MachineInstr *MInsn; // DBG_VALUE instruction of the variable. + DIVariable Var; // Variable Descriptor. + DIE *TheDIE; // Variable DIE. + unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. + DbgVariable *AbsVar; // Corresponding Abstract variable, if any. + const MachineInstr *MInsn; // DBG_VALUE instruction of the variable. int FrameIndex; DwarfDebug *DD; + public: // AbsVar may be NULL. DbgVariable(DIVariable V, DbgVariable *AV, DwarfDebug *DD) - : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0), - FrameIndex(~0), DD(DD) {} + : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0), + FrameIndex(~0), DD(DD) {} // Accessors. - DIVariable getVariable() const { return Var; } - void setDIE(DIE *D) { TheDIE = D; } - DIE *getDIE() const { return TheDIE; } - void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } - unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } - StringRef getName() const { return Var.getName(); } + DIVariable getVariable() const { return Var; } + void setDIE(DIE *D) { TheDIE = D; } + DIE *getDIE() const { return TheDIE; } + void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } + unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } + StringRef getName() const { return Var.getName(); } DbgVariable *getAbstractVariable() const { return AbsVar; } - const MachineInstr *getMInsn() const { return MInsn; } - void setMInsn(const MachineInstr *M) { MInsn = M; } - int getFrameIndex() const { return FrameIndex; } - void setFrameIndex(int FI) { FrameIndex = FI; } + const MachineInstr *getMInsn() const { return MInsn; } + void setMInsn(const MachineInstr *M) { MInsn = M; } + int getFrameIndex() const { return FrameIndex; } + void setFrameIndex(int FI) { FrameIndex = FI; } // Translate tag to proper Dwarf tag. - uint16_t getTag() const { + uint16_t getTag() const { if (Var.getTag() == dwarf::DW_TAG_arg_variable) return dwarf::DW_TAG_formal_parameter; return dwarf::DW_TAG_variable; } /// \brief Return true if DbgVariable is artificial. - bool isArtificial() const { + bool isArtificial() const { if (Var.isArtificial()) return true; if (getType().isArtificial()) @@ -185,7 +189,7 @@ public: return false; } - bool isObjectPointer() const { + bool isObjectPointer() const { if (Var.isObjectPointer()) return true; if (getType().isObjectPointer()) @@ -193,21 +197,19 @@ public: return false; } - bool variableHasComplexAddress() const { + bool variableHasComplexAddress() const { assert(Var.isVariable() && "Invalid complex DbgVariable!"); return Var.hasComplexAddress(); } - bool isBlockByrefVariable() const { + bool isBlockByrefVariable() const { assert(Var.isVariable() && "Invalid complex DbgVariable!"); return Var.isBlockByrefVariable(); } - unsigned getNumAddrElements() const { + unsigned getNumAddrElements() const { assert(Var.isVariable() && "Invalid complex DbgVariable!"); return Var.getNumAddrElements(); } - uint64_t getAddrElement(unsigned i) const { - return Var.getAddrElement(i); - } + uint64_t getAddrElement(unsigned i) const { return Var.getAddrElement(i); } DIType getType() const; private: @@ -217,25 +219,26 @@ private: }; /// \brief Collects and handles information specific to a particular -/// collection of units. -class DwarfUnits { +/// collection of units. This collection represents all of the units +/// that will be ultimately output into a single object file. +class DwarfFile { // Target of Dwarf emission, used for sizing of abbreviations. AsmPrinter *Asm; // Used to uniquely define abbreviations. - FoldingSet *AbbreviationsSet; + FoldingSet AbbreviationsSet; // A list of all the unique abbreviations in use. - std::vector &Abbreviations; + std::vector Abbreviations; // A pointer to all units in the section. - SmallVector CUs; + SmallVector CUs; // Collection of strings for this unit and assorted symbols. // A String->Symbol mapping of strings used by indirect // references. - typedef StringMap, - BumpPtrAllocator&> StrPool; + typedef StringMap, BumpPtrAllocator &> + StrPool; StrPool StringPool; unsigned NextStringPoolNumber; std::string StringPref; @@ -248,12 +251,13 @@ class DwarfUnits { unsigned NextAddrPoolNumber; public: - DwarfUnits(AsmPrinter *AP, FoldingSet *AS, - std::vector &A, const char *Pref, - BumpPtrAllocator &DA) - : Asm(AP), AbbreviationsSet(AS), Abbreviations(A), StringPool(DA), - NextStringPoolNumber(0), StringPref(Pref), AddressPool(), - NextAddrPoolNumber(0) {} + DwarfFile(AsmPrinter *AP, const char *Pref, BumpPtrAllocator &DA) + : Asm(AP), StringPool(DA), NextStringPoolNumber(0), StringPref(Pref), + AddressPool(), NextAddrPoolNumber(0) {} + + ~DwarfFile(); + + const SmallVectorImpl &getUnits() { return CUs; } /// \brief Compute the size and offset of a DIE given an incoming Offset. unsigned computeSizeAndOffset(DIE *Die, unsigned Offset); @@ -265,12 +269,15 @@ public: void assignAbbrevNumber(DIEAbbrev &Abbrev); /// \brief Add a unit to the list of CUs. - void addUnit(CompileUnit *CU) { CUs.push_back(CU); } + void addUnit(DwarfUnit *CU) { CUs.push_back(CU); } /// \brief Emit all of the units to the section listed with the given /// abbreviation section. - void emitUnits(DwarfDebug *DD, const MCSection *USection, - const MCSection *ASection, const MCSymbol *ASectionSym); + void emitUnits(DwarfDebug *DD, const MCSection *ASection, + const MCSymbol *ASectionSym); + + /// \brief Emit a set of abbreviations to the specific section. + void emitAbbrevs(const MCSection *); /// \brief Emit all of the strings to the section given. void emitStrings(const MCSection *StrSection, const MCSection *OffsetSection, @@ -304,13 +311,13 @@ public: /// \brief Helper used to pair up a symbol and its DWARF compile unit. struct SymbolCU { - SymbolCU(CompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {} + SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {} const MCSymbol *Sym; - CompileUnit *CU; + DwarfCompileUnit *CU; }; /// \brief Collects and handles dwarf debug information. -class DwarfDebug { +class DwarfDebug : public AsmPrinterHandler { // Target of Dwarf emission. AsmPrinter *Asm; @@ -320,40 +327,37 @@ class DwarfDebug { // All DIEValues are allocated through this allocator. BumpPtrAllocator DIEValueAllocator; - // Handle to the a compile unit used for the inline extension handling. - CompileUnit *FirstCU; + // Handle to the compile unit used for the inline extension handling, + // this is just so that the DIEValue allocator has a place to store + // the particular elements. + // FIXME: Store these off of DwarfDebug instead? + DwarfCompileUnit *FirstCU; - // Maps MDNode with its corresponding CompileUnit. - DenseMap CUMap; + // Maps MDNode with its corresponding DwarfCompileUnit. + DenseMap CUMap; - // Maps subprogram MDNode with its corresponding CompileUnit. - DenseMap SPMap; + // Maps subprogram MDNode with its corresponding DwarfCompileUnit. + DenseMap SPMap; - // Maps a CU DIE with its corresponding CompileUnit. - DenseMap CUDieMap; + // Maps a CU DIE with its corresponding DwarfCompileUnit. + DenseMap CUDieMap; /// Maps MDNodes for type sysstem with the corresponding DIEs. These DIEs can /// be shared across CUs, that is why we keep the map here instead - /// of in CompileUnit. + /// of in DwarfCompileUnit. DenseMap MDTypeNodeToDieMap; - // Used to uniquely define abbreviations. - FoldingSet AbbreviationsSet; - - // A list of all the unique abbreviations in use. - std::vector Abbreviations; - // Stores the current file ID for a given compile unit. - DenseMap FileIDCUMap; + DenseMap FileIDCUMap; // Source id map, i.e. CUID, source filename and directory, // separated by a zero byte, mapped to a unique id. - StringMap SourceIdMap; + StringMap SourceIdMap; // List of all labels used in aranges generation. std::vector ArangeLabels; // Size of each symbol emitted (for those symbols that have a specific size). - DenseMap SymSize; + DenseMap SymSize; // Provides a unique id per text section. typedef DenseMap > SectionMapType; @@ -368,8 +372,8 @@ class DwarfDebug { DenseMap AbstractSPDies; // Collection of dbg variables of a scope. - typedef DenseMap > ScopeVariablesMap; + typedef DenseMap > + ScopeVariablesMap; ScopeVariablesMap ScopeVariables; // Collection of abstract variables. @@ -394,17 +398,15 @@ class DwarfDebug { // Every user variable mentioned by a DBG_VALUE instruction in order of // appearance. - SmallVector UserVariables; + SmallVector UserVariables; // For each user variable, keep a list of DBG_VALUE instructions in order. // The list can also contain normal instructions that clobber the previous // DBG_VALUE. - typedef DenseMap > - DbgValueHistoryMap; + typedef DenseMap > + DbgValueHistoryMap; DbgValueHistoryMap DbgValues; - SmallVector DebugRangeSymbols; - // Previous instruction's location information. This is used to determine // label location to indicate scope boundries in dwarf debug info. DebugLoc PrevInstLoc; @@ -414,6 +416,12 @@ class DwarfDebug { // body. DebugLoc PrologEndLoc; + // If nonnull, stores the current machine function we're processing. + const MachineFunction *CurFn; + + // If nonnull, stores the current machine instruction we're processing. + const MachineInstr *CurMI; + // Section Symbols: these are assembler temporary labels that are emitted at // the beginning of each supported dwarf section. These are used to form // section offsets and are created by EmitSectionLabels. @@ -421,29 +429,31 @@ class DwarfDebug { MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; - MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym; + MCSymbol *DwarfInfoDWOSectionSym, *DwarfAbbrevDWOSectionSym; + MCSymbol *DwarfStrDWOSectionSym; MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym; // As an optimization, there is no need to emit an entry in the directory // table for the same directory as DW_AT_comp_dir. StringRef CompilationDir; - // Counter for assigning globally unique IDs for CUs. - unsigned GlobalCUIndexCount; + // Counter for assigning globally unique IDs for ranges. + unsigned GlobalRangeCount; // Holder for the file specific debug information. - DwarfUnits InfoHolder; + DwarfFile InfoHolder; // Holders for the various debug information flags that we might need to // have exposed. See accessor functions below for description. // Holder for imported entities. typedef SmallVector, 32> - ImportedEntityMap; + ImportedEntityMap; ImportedEntityMap ScopesWithImportedEntities; - // Holder for types that are going to be extracted out into a type unit. - std::vector TypeUnits; + // Map from MDNodes for user-defined types to the type units that describe + // them. + DenseMap DwarfTypeUnits; // Whether to emit the pubnames/pubtypes sections. bool HasDwarfPubSections; @@ -451,6 +461,9 @@ class DwarfDebug { // Version of dwarf we're emitting. unsigned DwarfVersion; + // Maps from a type identifier to the actual MDNode. + DITypeIdentifierMap TypeIdentifierMap; + // DWARF5 Experimental Options bool HasDwarfAccelTables; bool HasSplitDwarf; @@ -460,25 +473,15 @@ class DwarfDebug { // original object file, rather than things that are meant // to be in the .dwo sections. - // The CUs left in the original object file for separated debug info. - SmallVector SkeletonCUs; - - // Used to uniquely define abbreviations for the skeleton emission. - FoldingSet SkeletonAbbrevSet; - - // A list of all the unique abbreviations in use. - std::vector SkeletonAbbrevs; - // Holder for the skeleton information. - DwarfUnits SkeletonHolder; - - // Maps from a type identifier to the actual MDNode. - DITypeIdentifierMap TypeIdentifierMap; - -private: + DwarfFile SkeletonHolder; void addScopeVariable(LexicalScope *LS, DbgVariable *Var); + const SmallVectorImpl &getUnits() { + return InfoHolder.getUnits(); + } + /// \brief Find abstract variable associated with Var. DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc); @@ -486,24 +489,30 @@ private: /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global /// variables in this scope then create and insert DIEs for these /// variables. - DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP); + DIE *updateSubprogramScopeDIE(DwarfCompileUnit *SPCU, DISubprogram SP); + + /// \brief A helper function to check whether the DIE for a given Scope is + /// going to be null. + bool isLexicalScopeDIENull(LexicalScope *Scope); + + /// \brief A helper function to construct a RangeSpanList for a given + /// lexical scope. + void addScopeRangeList(DwarfCompileUnit *TheCU, DIE *ScopeDIE, + const SmallVectorImpl &Range); /// \brief Construct new DW_TAG_lexical_block for this scope and /// attach DW_AT_low_pc/DW_AT_high_pc labels. - DIE *constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); - /// A helper function to check whether the DIE for a given Scope is going - /// to be null. - bool isLexicalScopeDIENull(LexicalScope *Scope); + DIE *constructLexicalScopeDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope); /// \brief This scope represents inlined body of a function. Construct /// DIE to represent this concrete inlined copy of the function. - DIE *constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); + DIE *constructInlinedScopeDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope); /// \brief Construct a DIE for this scope. - DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); + DIE *constructScopeDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope); /// A helper function to create children of a Scope DIE. - DIE *createScopeChildrenDIE(CompileUnit *TheCU, LexicalScope *Scope, - SmallVectorImpl &Children); + DIE *createScopeChildrenDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope, + SmallVectorImpl &Children); /// \brief Emit initial Dwarf sections with a label at the start of each one. void emitSectionLabels(); @@ -528,9 +537,6 @@ private: /// open. void endSections(); - /// \brief Emit a set of abbreviations to the specific section. - void emitAbbrevs(const MCSection *, std::vector *); - /// \brief Emit the debug info section. void emitDebugInfo(); @@ -578,9 +584,6 @@ private: /// \brief Emit visible names into a debug ranges section. void emitDebugRanges(); - /// \brief Emit visible names into a debug macinfo section. - void emitDebugMacInfo(); - /// \brief Emit inline info using custom format. void emitDebugInlineInfo(); @@ -588,10 +591,7 @@ private: /// \brief Construct the split debug info compile unit for the debug info /// section. - CompileUnit *constructSkeletonCU(const CompileUnit *CU); - - /// \brief Emit the local split abbreviations. - void emitSkeletonAbbrevs(const MCSection *); + DwarfCompileUnit *constructSkeletonCU(const DwarfCompileUnit *CU); /// \brief Emit the debug info dwo section. void emitDebugInfoDWO(); @@ -602,24 +602,27 @@ private: /// \brief Emit the debug str dwo section. void emitDebugStrDWO(); - /// \brief Create new CompileUnit for the given metadata node with tag + /// Flags to let the linker know we have emitted new style pubnames. Only + /// emit it here if we don't have a skeleton CU for split dwarf. + void addGnuPubAttributes(DwarfUnit *U, DIE *D) const; + + /// \brief Create new DwarfCompileUnit for the given metadata node with tag /// DW_TAG_compile_unit. - CompileUnit *constructCompileUnit(DICompileUnit DIUnit); + DwarfCompileUnit *constructDwarfCompileUnit(DICompileUnit DIUnit); /// \brief Construct subprogram DIE. - void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N); + void constructSubprogramDIE(DwarfCompileUnit *TheCU, const MDNode *N); /// \brief Construct imported_module or imported_declaration DIE. - void constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N); + void constructImportedEntityDIE(DwarfCompileUnit *TheCU, const MDNode *N); /// \brief Construct import_module DIE. - void constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N, + void constructImportedEntityDIE(DwarfCompileUnit *TheCU, const MDNode *N, DIE *Context); /// \brief Construct import_module DIE. - void constructImportedEntityDIE(CompileUnit *TheCU, - const DIImportedEntity &Module, - DIE *Context); + void constructImportedEntityDIE(DwarfCompileUnit *TheCU, + const DIImportedEntity &Module, DIE *Context); /// \brief Register a source line with debug info. Returns the unique /// label that was emitted and which provides correspondence to the @@ -633,21 +636,18 @@ private: /// \brief If Var is an current function argument that add it in /// CurrentFnArguments list. - bool addCurrentFnArgument(const MachineFunction *MF, - DbgVariable *Var, LexicalScope *Scope); + bool addCurrentFnArgument(DbgVariable *Var, LexicalScope *Scope); /// \brief Populate LexicalScope entries with variables' info. - void collectVariableInfo(const MachineFunction *, - SmallPtrSet &ProcessedVars); + void collectVariableInfo(SmallPtrSet &ProcessedVars); /// \brief Collect variable information from the side table maintained /// by MMI. - void collectVariableInfoFromMMITable(const MachineFunction * MF, - SmallPtrSet &P); + void collectVariableInfoFromMMITable(SmallPtrSet &P); /// \brief Ensure that a label will be emitted before MI. void requestLabelBeforeInsn(const MachineInstr *MI) { - LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol*)0)); + LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol *)0)); } /// \brief Return Label preceding the instruction. @@ -655,7 +655,7 @@ private: /// \brief Ensure that a label will be emitted after MI. void requestLabelAfterInsn(const MachineInstr *MI) { - LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol*)0)); + LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol *)0)); } /// \brief Return Label immediately following the instruction. @@ -691,18 +691,21 @@ public: void beginInstruction(const MachineInstr *MI); /// \brief Process end of an instruction. - void endInstruction(const MachineInstr *MI); + void endInstruction(); /// \brief Add a DIE to the set of types that we're going to pull into /// type units. - void addTypeUnitType(DIE *Die) { TypeUnits.push_back(Die); } + void addDwarfTypeUnitType(uint16_t Language, StringRef Identifier, DIE *Die, + DICompositeType CTy); /// \brief Add a label so that arange data can be generated for it. void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); } /// \brief For symbols that have a size designated (e.g. common symbols), /// this tracks that size. - void setSymbolSize(const MCSymbol *Sym, uint64_t Size) { SymSize[Sym] = Size;} + void setSymbolSize(const MCSymbol *Sym, uint64_t Size) { + SymSize[Sym] = Size; + } /// \brief Look up the source id with the given directory and source file /// names. If none currently exists, create a new id and insert it in the @@ -711,7 +714,7 @@ public: unsigned CUID); /// \brief Recursively Emits a debug information entry. - void emitDIE(DIE *Die, ArrayRef Abbrevs); + void emitDIE(DIE *Die); // Experimental DWARF5 features. @@ -734,7 +737,6 @@ public: /// isSubprogramContext - Return true if Context is either a subprogram /// or another context nested inside a subprogram. bool isSubprogramContext(const MDNode *Context); - }; } // End of namespace llvm diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp index 7133458129cc..5346907499e3 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -717,20 +717,19 @@ void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) { } } -/// EndModule - Emit all exception information that should come after the +/// endModule - Emit all exception information that should come after the /// content. -void DwarfException::EndModule() { +void DwarfException::endModule() { llvm_unreachable("Should be implemented"); } -/// BeginFunction - Gather pre-function exception information. Assumes it's +/// beginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. -void DwarfException::BeginFunction(const MachineFunction *MF) { +void DwarfException::beginFunction(const MachineFunction *MF) { llvm_unreachable("Should be implemented"); } -/// EndFunction - Gather and emit post-function exception information. -/// -void DwarfException::EndFunction() { +/// endFunction - Gather and emit post-function exception information. +void DwarfException::endFunction(const MachineFunction *) { llvm_unreachable("Should be implemented"); } diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfException.h index 15751615b7a4..5a2ee9e490a5 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -14,6 +14,7 @@ #ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H #define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H +#include "AsmPrinterHandler.h" #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/AsmPrinter.h" #include @@ -35,7 +36,7 @@ class AsmPrinter; //===----------------------------------------------------------------------===// /// DwarfException - Emits Dwarf exception handling directives. /// -class DwarfException { +class DwarfException : public AsmPrinterHandler { protected: /// Asm - Target of Dwarf emission. AsmPrinter *Asm; @@ -130,16 +131,21 @@ public: DwarfException(AsmPrinter *A); virtual ~DwarfException(); - /// EndModule - Emit all exception information that should come after the + /// endModule - Emit all exception information that should come after the /// content. - virtual void EndModule(); + virtual void endModule(); - /// BeginFunction - Gather pre-function exception information. Assumes being + /// beginFunction - Gather pre-function exception information. Assumes being /// emitted immediately after the function entry point. - virtual void BeginFunction(const MachineFunction *MF); + virtual void beginFunction(const MachineFunction *MF); - /// EndFunction - Gather and emit post-function exception information. - virtual void EndFunction(); + /// endFunction - Gather and emit post-function exception information. + virtual void endFunction(const MachineFunction *); + + // We don't need these. + virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) {} + virtual void beginInstruction(const MachineInstr *MI) {} + virtual void endInstruction() {} }; class DwarfCFIException : public DwarfException { @@ -164,16 +170,16 @@ public: DwarfCFIException(AsmPrinter *A); virtual ~DwarfCFIException(); - /// EndModule - Emit all exception information that should come after the + /// endModule - Emit all exception information that should come after the /// content. - virtual void EndModule(); + virtual void endModule(); - /// BeginFunction - Gather pre-function exception information. Assumes being + /// beginFunction - Gather pre-function exception information. Assumes being /// emitted immediately after the function entry point. - virtual void BeginFunction(const MachineFunction *MF); + virtual void beginFunction(const MachineFunction *MF); - /// EndFunction - Gather and emit post-function exception information. - virtual void EndFunction(); + /// endFunction - Gather and emit post-function exception information. + virtual void endFunction(const MachineFunction *); }; class ARMException : public DwarfException { @@ -187,16 +193,16 @@ public: ARMException(AsmPrinter *A); virtual ~ARMException(); - /// EndModule - Emit all exception information that should come after the + /// endModule - Emit all exception information that should come after the /// content. - virtual void EndModule(); + virtual void endModule(); - /// BeginFunction - Gather pre-function exception information. Assumes being + /// beginFunction - Gather pre-function exception information. Assumes being /// emitted immediately after the function entry point. - virtual void BeginFunction(const MachineFunction *MF); + virtual void beginFunction(const MachineFunction *MF); - /// EndFunction - Gather and emit post-function exception information. - virtual void EndFunction(); + /// endFunction - Gather and emit post-function exception information. + virtual void endFunction(const MachineFunction *); }; class Win64Exception : public DwarfException { @@ -219,16 +225,16 @@ public: Win64Exception(AsmPrinter *A); virtual ~Win64Exception(); - /// EndModule - Emit all exception information that should come after the + /// endModule - Emit all exception information that should come after the /// content. - virtual void EndModule(); + virtual void endModule(); - /// BeginFunction - Gather pre-function exception information. Assumes being + /// beginFunction - Gather pre-function exception information. Assumes being /// emitted immediately after the function entry point. - virtual void BeginFunction(const MachineFunction *MF); + virtual void beginFunction(const MachineFunction *MF); - /// EndFunction - Gather and emit post-function exception information. - virtual void EndFunction(); + /// endFunction - Gather and emit post-function exception information. + virtual void endFunction(const MachineFunction *); }; } // End of namespace llvm diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp new file mode 100644 index 000000000000..7de4b1e1875e --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -0,0 +1,2007 @@ +//===-- llvm/CodeGen/DwarfUnit.cpp - Dwarf Type and Compile Units ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for constructing a dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dwarfdebug" + +#include "DwarfUnit.h" +#include "DwarfAccelTable.h" +#include "DwarfDebug.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/DIBuilder.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +static cl::opt +GenerateDwarfTypeUnits("generate-type-units", cl::Hidden, + cl::desc("Generate DWARF4 type units."), + cl::init(false)); + +/// Unit - Unit constructor. +DwarfUnit::DwarfUnit(unsigned UID, DIE *D, DICompileUnit Node, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU) + : UniqueID(UID), Node(Node), UnitDie(D), DebugInfoOffset(0), Asm(A), DD(DW), + DU(DWU), IndexTyDie(0), Section(0), Skeleton(0) { + DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); +} + +DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DIE *D, DICompileUnit Node, + AsmPrinter *A, DwarfDebug *DW, + DwarfFile *DWU) + : DwarfUnit(UID, D, Node, A, DW, DWU) { + insertDIE(Node, D); +} + +DwarfTypeUnit::DwarfTypeUnit(unsigned UID, DIE *D, uint16_t Language, + AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) + : DwarfUnit(UID, D, DICompileUnit(), A, DW, DWU), Language(Language) {} + +/// ~Unit - Destructor for compile unit. +DwarfUnit::~DwarfUnit() { + for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) + DIEBlocks[j]->~DIEBlock(); +} + +/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug +/// information entry. +DIEEntry *DwarfUnit::createDIEEntry(DIE *Entry) { + DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry); + return Value; +} + +/// getDefaultLowerBound - Return the default lower bound for an array. If the +/// DWARF version doesn't handle the language, return -1. +int64_t DwarfUnit::getDefaultLowerBound() const { + switch (getLanguage()) { + default: + break; + + case dwarf::DW_LANG_C89: + case dwarf::DW_LANG_C99: + case dwarf::DW_LANG_C: + case dwarf::DW_LANG_C_plus_plus: + case dwarf::DW_LANG_ObjC: + case dwarf::DW_LANG_ObjC_plus_plus: + return 0; + + case dwarf::DW_LANG_Fortran77: + case dwarf::DW_LANG_Fortran90: + case dwarf::DW_LANG_Fortran95: + return 1; + + // The languages below have valid values only if the DWARF version >= 4. + case dwarf::DW_LANG_Java: + case dwarf::DW_LANG_Python: + case dwarf::DW_LANG_UPC: + case dwarf::DW_LANG_D: + if (dwarf::DWARF_VERSION >= 4) + return 0; + break; + + case dwarf::DW_LANG_Ada83: + case dwarf::DW_LANG_Ada95: + case dwarf::DW_LANG_Cobol74: + case dwarf::DW_LANG_Cobol85: + case dwarf::DW_LANG_Modula2: + case dwarf::DW_LANG_Pascal83: + case dwarf::DW_LANG_PLI: + if (dwarf::DWARF_VERSION >= 4) + return 1; + break; + } + + return -1; +} + +/// Check whether the DIE for this MDNode can be shared across CUs. +static bool isShareableAcrossCUs(DIDescriptor D) { + // When the MDNode can be part of the type system, the DIE can be shared + // across CUs. + // Combining type units and cross-CU DIE sharing is lower value (since + // cross-CU DIE sharing is used in LTO and removes type redundancy at that + // level already) but may be implementable for some value in projects + // building multiple independent libraries with LTO and then linking those + // together. + return (D.isType() || + (D.isSubprogram() && !DISubprogram(D).isDefinition())) && + !GenerateDwarfTypeUnits; +} + +/// getDIE - Returns the debug information entry map slot for the +/// specified debug variable. We delegate the request to DwarfDebug +/// when the DIE for this MDNode can be shared across CUs. The mappings +/// will be kept in DwarfDebug for shareable DIEs. +DIE *DwarfUnit::getDIE(DIDescriptor D) const { + if (isShareableAcrossCUs(D)) + return DD->getDIE(D); + return MDNodeToDieMap.lookup(D); +} + +/// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug +/// when the DIE for this MDNode can be shared across CUs. The mappings +/// will be kept in DwarfDebug for shareable DIEs. +void DwarfUnit::insertDIE(DIDescriptor Desc, DIE *D) { + if (isShareableAcrossCUs(Desc)) { + DD->insertDIE(Desc, D); + return; + } + MDNodeToDieMap.insert(std::make_pair(Desc, D)); +} + +/// addFlag - Add a flag that is true. +void DwarfUnit::addFlag(DIE *Die, dwarf::Attribute Attribute) { + if (DD->getDwarfVersion() >= 4) + Die->addValue(Attribute, dwarf::DW_FORM_flag_present, DIEIntegerOne); + else + Die->addValue(Attribute, dwarf::DW_FORM_flag, DIEIntegerOne); +} + +/// addUInt - Add an unsigned integer attribute data and value. +/// +void DwarfUnit::addUInt(DIE *Die, dwarf::Attribute Attribute, + Optional Form, uint64_t Integer) { + if (!Form) + Form = DIEInteger::BestForm(false, Integer); + DIEValue *Value = Integer == 1 ? DIEIntegerOne : new (DIEValueAllocator) + DIEInteger(Integer); + Die->addValue(Attribute, *Form, Value); +} + +void DwarfUnit::addUInt(DIEBlock *Block, dwarf::Form Form, uint64_t Integer) { + addUInt(Block, (dwarf::Attribute)0, Form, Integer); +} + +/// addSInt - Add an signed integer attribute data and value. +/// +void DwarfUnit::addSInt(DIE *Die, dwarf::Attribute Attribute, + Optional Form, int64_t Integer) { + if (!Form) + Form = DIEInteger::BestForm(true, Integer); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); + Die->addValue(Attribute, *Form, Value); +} + +void DwarfUnit::addSInt(DIEBlock *Die, Optional Form, + int64_t Integer) { + addSInt(Die, (dwarf::Attribute)0, Form, Integer); +} + +/// addString - Add a string attribute data and value. We always emit a +/// reference to the string pool instead of immediate strings so that DIEs have +/// more predictable sizes. In the case of split dwarf we emit an index +/// into another table which gets us the static offset into the string +/// table. +void DwarfUnit::addString(DIE *Die, dwarf::Attribute Attribute, + StringRef String) { + + if (!DD->useSplitDwarf()) + return addLocalString(Die, Attribute, String); + + unsigned idx = DU->getStringPoolIndex(String); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); + DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String); + Die->addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Str); +} + +/// addLocalString - Add a string attribute data and value. This is guaranteed +/// to be in the local string pool instead of indirected. +void DwarfUnit::addLocalString(DIE *Die, dwarf::Attribute Attribute, + StringRef String) { + MCSymbol *Symb = DU->getStringPoolEntry(String); + DIEValue *Value; + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + Value = new (DIEValueAllocator) DIELabel(Symb); + else { + MCSymbol *StringPool = DU->getStringPoolSym(); + Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); + } + DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String); + Die->addValue(Attribute, dwarf::DW_FORM_strp, Str); +} + +/// addExpr - Add a Dwarf expression attribute data and value. +/// +void DwarfUnit::addExpr(DIEBlock *Die, dwarf::Form Form, const MCExpr *Expr) { + DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr); + Die->addValue((dwarf::Attribute)0, Form, Value); +} + +/// addLabel - Add a Dwarf label attribute data and value. +/// +void DwarfUnit::addLabel(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form, + const MCSymbol *Label) { + DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); + Die->addValue(Attribute, Form, Value); +} + +void DwarfUnit::addLabel(DIEBlock *Die, dwarf::Form Form, + const MCSymbol *Label) { + addLabel(Die, (dwarf::Attribute)0, Form, Label); +} + +/// addSectionLabel - Add a Dwarf section label attribute data and value. +/// +void DwarfUnit::addSectionLabel(DIE *Die, dwarf::Attribute Attribute, + const MCSymbol *Label) { + if (DD->getDwarfVersion() >= 4) + addLabel(Die, Attribute, dwarf::DW_FORM_sec_offset, Label); + else + addLabel(Die, Attribute, dwarf::DW_FORM_data4, Label); +} + +/// addSectionOffset - Add an offset into a section attribute data and value. +/// +void DwarfUnit::addSectionOffset(DIE *Die, dwarf::Attribute Attribute, + uint64_t Integer) { + if (DD->getDwarfVersion() >= 4) + addUInt(Die, Attribute, dwarf::DW_FORM_sec_offset, Integer); + else + addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer); +} + +/// addLabelAddress - Add a dwarf label attribute data and value using +/// DW_FORM_addr or DW_FORM_GNU_addr_index. +/// +void DwarfCompileUnit::addLabelAddress(DIE *Die, dwarf::Attribute Attribute, + MCSymbol *Label) { + if (Label) + DD->addArangeLabel(SymbolCU(this, Label)); + + if (!DD->useSplitDwarf()) { + if (Label) { + DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); + Die->addValue(Attribute, dwarf::DW_FORM_addr, Value); + } else { + DIEValue *Value = new (DIEValueAllocator) DIEInteger(0); + Die->addValue(Attribute, dwarf::DW_FORM_addr, Value); + } + } else { + unsigned idx = DU->getAddrPoolIndex(Label); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); + Die->addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value); + } +} + +/// addOpAddress - Add a dwarf op address data and value using the +/// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. +/// +void DwarfUnit::addOpAddress(DIEBlock *Die, const MCSymbol *Sym) { + if (!DD->useSplitDwarf()) { + addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addLabel(Die, dwarf::DW_FORM_udata, Sym); + } else { + addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); + addUInt(Die, dwarf::DW_FORM_GNU_addr_index, DU->getAddrPoolIndex(Sym)); + } +} + +/// addSectionDelta - Add a section label delta attribute data and value. +/// +void DwarfUnit::addSectionDelta(DIE *Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo) { + DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); + if (DD->getDwarfVersion() >= 4) + Die->addValue(Attribute, dwarf::DW_FORM_sec_offset, Value); + else + Die->addValue(Attribute, dwarf::DW_FORM_data4, Value); +} + +/// addDIEEntry - Add a DIE attribute data and value. +/// +void DwarfUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry) { + addDIEEntry(Die, Attribute, createDIEEntry(Entry)); +} + +void DwarfUnit::addDIETypeSignature(DIE *Die, const DwarfTypeUnit &Type) { + Die->addValue(dwarf::DW_AT_signature, dwarf::DW_FORM_ref_sig8, + new (DIEValueAllocator) DIETypeSignature(Type)); +} + +void DwarfUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, + DIEEntry *Entry) { + const DIE *DieCU = Die->getUnitOrNull(); + const DIE *EntryCU = Entry->getEntry()->getUnitOrNull(); + if (!DieCU) + // We assume that Die belongs to this CU, if it is not linked to any CU yet. + DieCU = getUnitDie(); + if (!EntryCU) + EntryCU = getUnitDie(); + Die->addValue(Attribute, EntryCU == DieCU ? dwarf::DW_FORM_ref4 + : dwarf::DW_FORM_ref_addr, + Entry); +} + +/// Create a DIE with the given Tag, add the DIE to its parent, and +/// call insertDIE if MD is not null. +DIE *DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) { + DIE *Die = new DIE(Tag); + Parent.addChild(Die); + if (N) + insertDIE(N, Die); + return Die; +} + +/// addBlock - Add block data. +/// +void DwarfUnit::addBlock(DIE *Die, dwarf::Attribute Attribute, + DIEBlock *Block) { + Block->ComputeSize(Asm); + DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. + Die->addValue(Attribute, Block->BestForm(), Block); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void DwarfUnit::addSourceLine(DIE *Die, DIVariable V) { + // Verify variable. + if (!V.isVariable()) + return; + + unsigned Line = V.getLineNumber(); + if (Line == 0) + return; + unsigned FileID = + DD->getOrCreateSourceID(V.getContext().getFilename(), + V.getContext().getDirectory(), getUniqueID()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void DwarfUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { + // Verify global variable. + if (!G.isGlobalVariable()) + return; + + unsigned Line = G.getLineNumber(); + if (Line == 0) + return; + unsigned FileID = + DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(), getUniqueID()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void DwarfUnit::addSourceLine(DIE *Die, DISubprogram SP) { + // Verify subprogram. + if (!SP.isSubprogram()) + return; + + // If the line number is 0, don't add it. + unsigned Line = SP.getLineNumber(); + if (Line == 0) + return; + + unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(), SP.getDirectory(), + getUniqueID()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void DwarfUnit::addSourceLine(DIE *Die, DIType Ty) { + // Verify type. + if (!Ty.isType()) + return; + + unsigned Line = Ty.getLineNumber(); + if (Line == 0) + return; + unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(), Ty.getDirectory(), + getUniqueID()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void DwarfUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { + // Verify type. + if (!Ty.isObjCProperty()) + return; + + unsigned Line = Ty.getLineNumber(); + if (Line == 0) + return; + DIFile File = Ty.getFile(); + unsigned FileID = DD->getOrCreateSourceID(File.getFilename(), + File.getDirectory(), getUniqueID()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void DwarfUnit::addSourceLine(DIE *Die, DINameSpace NS) { + // Verify namespace. + if (!NS.Verify()) + return; + + unsigned Line = NS.getLineNumber(); + if (Line == 0) + return; + StringRef FN = NS.getFilename(); + + unsigned FileID = + DD->getOrCreateSourceID(FN, NS.getDirectory(), getUniqueID()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); +} + +/// addVariableAddress - Add DW_AT_location attribute for a +/// DbgVariable based on provided MachineLocation. +void DwarfUnit::addVariableAddress(const DbgVariable &DV, DIE *Die, + MachineLocation Location) { + if (DV.variableHasComplexAddress()) + addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); + else if (DV.isBlockByrefVariable()) + addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); + else + addAddress(Die, dwarf::DW_AT_location, Location, + DV.getVariable().isIndirect()); +} + +/// addRegisterOp - Add register operand. +void DwarfUnit::addRegisterOp(DIEBlock *TheDie, unsigned Reg) { + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned DWReg = RI->getDwarfRegNum(Reg, false); + if (DWReg < 32) + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg); + else { + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); + addUInt(TheDie, dwarf::DW_FORM_udata, DWReg); + } +} + +/// addRegisterOffset - Add register offset. +void DwarfUnit::addRegisterOffset(DIEBlock *TheDie, unsigned Reg, + int64_t Offset) { + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned DWReg = RI->getDwarfRegNum(Reg, false); + const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); + if (Reg == TRI->getFrameRegister(*Asm->MF)) + // If variable offset is based in frame register then use fbreg. + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); + else if (DWReg < 32) + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg); + else { + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + addUInt(TheDie, dwarf::DW_FORM_udata, DWReg); + } + addSInt(TheDie, dwarf::DW_FORM_sdata, Offset); +} + +/// addAddress - Add an address attribute to a die based on the location +/// provided. +void DwarfUnit::addAddress(DIE *Die, dwarf::Attribute Attribute, + const MachineLocation &Location, bool Indirect) { + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + if (Location.isReg() && !Indirect) + addRegisterOp(Block, Location.getReg()); + else { + addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + if (Indirect && !Location.isReg()) { + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + } + } + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, Block); +} + +/// addComplexAddress - Start with the address based on the location provided, +/// and generate the DWARF information necessary to find the actual variable +/// given the extra address information encoded in the DbgVariable, starting +/// from the starting location. Add the DWARF information to the die. +/// +void DwarfUnit::addComplexAddress(const DbgVariable &DV, DIE *Die, + dwarf::Attribute Attribute, + const MachineLocation &Location) { + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + unsigned N = DV.getNumAddrElements(); + unsigned i = 0; + if (Location.isReg()) { + if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { + // If first address element is OpPlus then emit + // DW_OP_breg + Offset instead of DW_OP_reg + Offset. + addRegisterOffset(Block, Location.getReg(), DV.getAddrElement(1)); + i = 2; + } else + addRegisterOp(Block, Location.getReg()); + } else + addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + + for (; i < N; ++i) { + uint64_t Element = DV.getAddrElement(i); + if (Element == DIBuilder::OpPlus) { + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, dwarf::DW_FORM_udata, DV.getAddrElement(++i)); + } else if (Element == DIBuilder::OpDeref) { + if (!Location.isReg()) + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + } else + llvm_unreachable("unknown DIBuilder Opcode"); + } + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, Block); +} + +/* Byref variables, in Blocks, are declared by the programmer as "SomeType + VarName;", but the compiler creates a __Block_byref_x_VarName struct, and + gives the variable VarName either the struct, or a pointer to the struct, as + its type. This is necessary for various behind-the-scenes things the + compiler needs to do with by-reference variables in Blocks. + + However, as far as the original *programmer* is concerned, the variable + should still have type 'SomeType', as originally declared. + + The function getBlockByrefType dives into the __Block_byref_x_VarName + struct to find the original type of the variable, which is then assigned to + the variable's Debug Information Entry as its real type. So far, so good. + However now the debugger will expect the variable VarName to have the type + SomeType. So we need the location attribute for the variable to be an + expression that explains to the debugger how to navigate through the + pointers and struct to find the actual variable of type SomeType. + + The following function does just that. We start by getting + the "normal" location for the variable. This will be the location + of either the struct __Block_byref_x_VarName or the pointer to the + struct __Block_byref_x_VarName. + + The struct will look something like: + + struct __Block_byref_x_VarName { + ... + struct __Block_byref_x_VarName *forwarding; + ... + SomeType VarName; + ... + }; + + If we are given the struct directly (as our starting point) we + need to tell the debugger to: + + 1). Add the offset of the forwarding field. + + 2). Follow that pointer to get the real __Block_byref_x_VarName + struct to use (the real one may have been copied onto the heap). + + 3). Add the offset for the field VarName, to find the actual variable. + + If we started with a pointer to the struct, then we need to + dereference that pointer first, before the other steps. + Translating this into DWARF ops, we will need to append the following + to the current location description for the variable: + + DW_OP_deref -- optional, if we start with a pointer + DW_OP_plus_uconst + DW_OP_deref + DW_OP_plus_uconst + + That is what this function does. */ + +/// addBlockByrefAddress - Start with the address based on the location +/// provided, and generate the DWARF information necessary to find the +/// actual Block variable (navigating the Block struct) based on the +/// starting location. Add the DWARF information to the die. For +/// more information, read large comment just above here. +/// +void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die, + dwarf::Attribute Attribute, + const MachineLocation &Location) { + DIType Ty = DV.getType(); + DIType TmpTy = Ty; + uint16_t Tag = Ty.getTag(); + bool isPointer = false; + + StringRef varName = DV.getName(); + + if (Tag == dwarf::DW_TAG_pointer_type) { + DIDerivedType DTy(Ty); + TmpTy = resolve(DTy.getTypeDerivedFrom()); + isPointer = true; + } + + DICompositeType blockStruct(TmpTy); + + // Find the __forwarding field and the variable field in the __Block_byref + // struct. + DIArray Fields = blockStruct.getTypeArray(); + DIDerivedType varField; + DIDerivedType forwardingField; + + for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { + DIDerivedType DT(Fields.getElement(i)); + StringRef fieldName = DT.getName(); + if (fieldName == "__forwarding") + forwardingField = DT; + else if (fieldName == varName) + varField = DT; + } + + // Get the offsets for the forwarding field and the variable field. + unsigned forwardingFieldOffset = forwardingField.getOffsetInBits() >> 3; + unsigned varFieldOffset = varField.getOffsetInBits() >> 2; + + // Decode the original location, and use that as the start of the byref + // variable's location. + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + if (Location.isReg()) + addRegisterOp(Block, Location.getReg()); + else + addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + + // If we started with a pointer to the __Block_byref... struct, then + // the first thing we need to do is dereference the pointer (DW_OP_deref). + if (isPointer) + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Next add the offset for the '__forwarding' field: + // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in + // adding the offset if it's 0. + if (forwardingFieldOffset > 0) { + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, dwarf::DW_FORM_udata, forwardingFieldOffset); + } + + // Now dereference the __forwarding field to get to the real __Block_byref + // struct: DW_OP_deref. + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Now that we've got the real __Block_byref... struct, add the offset + // for the variable's field to get to the location of the actual variable: + // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. + if (varFieldOffset > 0) { + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, dwarf::DW_FORM_udata, varFieldOffset); + } + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, Block); +} + +/// isTypeSigned - Return true if the type is signed. +static bool isTypeSigned(DwarfDebug *DD, DIType Ty, int *SizeInBits) { + if (Ty.isDerivedType()) + return isTypeSigned(DD, DD->resolve(DIDerivedType(Ty).getTypeDerivedFrom()), + SizeInBits); + if (Ty.isBasicType()) + if (DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed || + DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed_char) { + *SizeInBits = Ty.getSizeInBits(); + return true; + } + return false; +} + +/// Return true if type encoding is unsigned. +static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) { + DIDerivedType DTy(Ty); + if (DTy.isDerivedType()) + return isUnsignedDIType(DD, DD->resolve(DTy.getTypeDerivedFrom())); + + DIBasicType BTy(Ty); + if (BTy.isBasicType()) { + unsigned Encoding = BTy.getEncoding(); + if (Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char || + Encoding == dwarf::DW_ATE_boolean) + return true; + } + return false; +} + +/// If this type is derived from a base type then return base type size. +static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) { + unsigned Tag = Ty.getTag(); + + if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef && + Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && + Tag != dwarf::DW_TAG_restrict_type) + return Ty.getSizeInBits(); + + DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom()); + + // If this type is not derived from any type then take conservative approach. + if (!BaseType.isValid()) + return Ty.getSizeInBits(); + + // If this is a derived type, go ahead and get the base type, unless it's a + // reference then it's just the size of the field. Pointer types have no need + // of this since they're a different type of qualification on the type. + if (BaseType.getTag() == dwarf::DW_TAG_reference_type || + BaseType.getTag() == dwarf::DW_TAG_rvalue_reference_type) + return Ty.getSizeInBits(); + + if (BaseType.isDerivedType()) + return getBaseTypeSize(DD, DIDerivedType(BaseType)); + + return BaseType.getSizeInBits(); +} + +/// addConstantValue - Add constant value entry in variable DIE. +void DwarfUnit::addConstantValue(DIE *Die, const MachineOperand &MO, + DIType Ty) { + // FIXME: This is a bit conservative/simple - it emits negative values at + // their maximum bit width which is a bit unfortunate (& doesn't prefer + // udata/sdata over dataN as suggested by the DWARF spec) + assert(MO.isImm() && "Invalid machine operand!"); + int SizeInBits = -1; + bool SignedConstant = isTypeSigned(DD, Ty, &SizeInBits); + dwarf::Form Form; + + // If we're a signed constant definitely use sdata. + if (SignedConstant) { + addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, MO.getImm()); + return; + } + + // Else use data for now unless it's larger than we can deal with. + switch (SizeInBits) { + case 8: + Form = dwarf::DW_FORM_data1; + break; + case 16: + Form = dwarf::DW_FORM_data2; + break; + case 32: + Form = dwarf::DW_FORM_data4; + break; + case 64: + Form = dwarf::DW_FORM_data8; + break; + default: + Form = dwarf::DW_FORM_udata; + addUInt(Die, dwarf::DW_AT_const_value, Form, MO.getImm()); + return; + } + addUInt(Die, dwarf::DW_AT_const_value, Form, MO.getImm()); +} + +/// addConstantFPValue - Add constant value entry in variable DIE. +void DwarfUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { + assert(MO.isFPImm() && "Invalid machine operand!"); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + APFloat FPImm = MO.getFPImm()->getValueAPF(); + + // Get the raw data form of the floating point. + const APInt FltVal = FPImm.bitcastToAPInt(); + const char *FltPtr = (const char *)FltVal.getRawData(); + + int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getDataLayout().isLittleEndian(); + int Incr = (LittleEndian ? 1 : -1); + int Start = (LittleEndian ? 0 : NumBytes - 1); + int Stop = (LittleEndian ? NumBytes : -1); + + // Output the constant to DWARF one byte at a time. + for (; Start != Stop; Start += Incr) + addUInt(Block, dwarf::DW_FORM_data1, (unsigned char)0xFF & FltPtr[Start]); + + addBlock(Die, dwarf::DW_AT_const_value, Block); +} + +/// addConstantFPValue - Add constant value entry in variable DIE. +void DwarfUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) { + // Pass this down to addConstantValue as an unsigned bag of bits. + addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true); +} + +/// addConstantValue - Add constant value entry in variable DIE. +void DwarfUnit::addConstantValue(DIE *Die, const ConstantInt *CI, + bool Unsigned) { + addConstantValue(Die, CI->getValue(), Unsigned); +} + +// addConstantValue - Add constant value entry in variable DIE. +void DwarfUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) { + unsigned CIBitWidth = Val.getBitWidth(); + if (CIBitWidth <= 64) { + // If we're a signed constant definitely use sdata. + if (!Unsigned) { + addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, + Val.getSExtValue()); + return; + } + + // Else use data for now unless it's larger than we can deal with. + dwarf::Form Form; + switch (CIBitWidth) { + case 8: + Form = dwarf::DW_FORM_data1; + break; + case 16: + Form = dwarf::DW_FORM_data2; + break; + case 32: + Form = dwarf::DW_FORM_data4; + break; + case 64: + Form = dwarf::DW_FORM_data8; + break; + default: + addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, + Val.getZExtValue()); + return; + } + addUInt(Die, dwarf::DW_AT_const_value, Form, Val.getZExtValue()); + return; + } + + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + // Get the raw data form of the large APInt. + const uint64_t *Ptr64 = Val.getRawData(); + + int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getDataLayout().isLittleEndian(); + + // Output the constant to DWARF one byte at a time. + for (int i = 0; i < NumBytes; i++) { + uint8_t c; + if (LittleEndian) + c = Ptr64[i / 8] >> (8 * (i & 7)); + else + c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7)); + addUInt(Block, dwarf::DW_FORM_data1, c); + } + + addBlock(Die, dwarf::DW_AT_const_value, Block); +} + +/// addTemplateParams - Add template parameters into buffer. +void DwarfUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { + // Add template parameters. + for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) { + DIDescriptor Element = TParams.getElement(i); + if (Element.isTemplateTypeParameter()) + constructTemplateTypeParameterDIE(Buffer, + DITemplateTypeParameter(Element)); + else if (Element.isTemplateValueParameter()) + constructTemplateValueParameterDIE(Buffer, + DITemplateValueParameter(Element)); + } +} + +/// getOrCreateContextDIE - Get context owner's DIE. +DIE *DwarfUnit::getOrCreateContextDIE(DIScope Context) { + if (!Context || Context.isFile()) + return getUnitDie(); + if (Context.isType()) + return getOrCreateTypeDIE(DIType(Context)); + if (Context.isNameSpace()) + return getOrCreateNameSpace(DINameSpace(Context)); + if (Context.isSubprogram()) + return getOrCreateSubprogramDIE(DISubprogram(Context)); + return getDIE(Context); +} + +DIE *DwarfUnit::createTypeDIE(DICompositeType Ty) { + DIScope Context = resolve(Ty.getContext()); + DIE *ContextDIE = getOrCreateContextDIE(Context); + + DIE *TyDIE = getDIE(Ty); + if (TyDIE) + return TyDIE; + + // Create new type. + TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty); + + constructTypeDIE(*TyDIE, Ty); + + updateAcceleratorTables(Context, Ty, TyDIE); + return TyDIE; +} + +/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the +/// given DIType. +DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) { + if (!TyNode) + return NULL; + + DIType Ty(TyNode); + assert(Ty.isType()); + + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIScope Context = resolve(Ty.getContext()); + DIE *ContextDIE = getOrCreateContextDIE(Context); + assert(ContextDIE); + + DIE *TyDIE = getDIE(Ty); + if (TyDIE) + return TyDIE; + + // Create new type. + TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty); + + if (Ty.isBasicType()) + constructTypeDIE(*TyDIE, DIBasicType(Ty)); + else if (Ty.isCompositeType()) { + DICompositeType CTy(Ty); + if (GenerateDwarfTypeUnits && !Ty.isForwardDecl()) + if (MDString *TypeId = CTy.getIdentifier()) { + DD->addDwarfTypeUnitType(getLanguage(), TypeId->getString(), TyDIE, + CTy); + // Skip updating the accellerator tables since this is not the full type + return TyDIE; + } + constructTypeDIE(*TyDIE, CTy); + } else { + assert(Ty.isDerivedType() && "Unknown kind of DIType"); + constructTypeDIE(*TyDIE, DIDerivedType(Ty)); + } + + updateAcceleratorTables(Context, Ty, TyDIE); + + return TyDIE; +} + +void DwarfUnit::updateAcceleratorTables(DIScope Context, DIType Ty, + const DIE *TyDIE) { + if (!Ty.getName().empty() && !Ty.isForwardDecl()) { + bool IsImplementation = 0; + if (Ty.isCompositeType()) { + DICompositeType CT(Ty); + // A runtime language of 0 actually means C/C++ and that any + // non-negative value is some version of Objective-C/C++. + IsImplementation = (CT.getRunTimeLang() == 0) || CT.isObjcClassComplete(); + } + unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0; + addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags)); + + if (!Context || Context.isCompileUnit() || Context.isFile() || + Context.isNameSpace()) + GlobalTypes[getParentContextString(Context) + Ty.getName().str()] = TyDIE; + } +} + +/// addType - Add a new type attribute to the specified entity. +void DwarfUnit::addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute) { + assert(Ty && "Trying to add a type that doesn't exist?"); + + // Check for pre-existence. + DIEEntry *Entry = getDIEEntry(Ty); + // If it exists then use the existing value. + if (Entry) { + addDIEEntry(Entity, Attribute, Entry); + return; + } + + // Construct type. + DIE *Buffer = getOrCreateTypeDIE(Ty); + + // Set up proxy. + Entry = createDIEEntry(Buffer); + insertDIEEntry(Ty, Entry); + addDIEEntry(Entity, Attribute, Entry); +} + +// Accelerator table mutators - add each name along with its companion +// DIE to the proper table while ensuring that the name that we're going +// to reference is in the string table. We do this since the names we +// add may not only be identical to the names in the DIE. +void DwarfUnit::addAccelName(StringRef Name, const DIE *Die) { + DU->getStringPoolEntry(Name); + std::vector &DIEs = AccelNames[Name]; + DIEs.push_back(Die); +} + +void DwarfUnit::addAccelObjC(StringRef Name, const DIE *Die) { + DU->getStringPoolEntry(Name); + std::vector &DIEs = AccelObjC[Name]; + DIEs.push_back(Die); +} + +void DwarfUnit::addAccelNamespace(StringRef Name, const DIE *Die) { + DU->getStringPoolEntry(Name); + std::vector &DIEs = AccelNamespace[Name]; + DIEs.push_back(Die); +} + +void DwarfUnit::addAccelType(StringRef Name, + std::pair Die) { + DU->getStringPoolEntry(Name); + std::vector > &DIEs = AccelTypes[Name]; + DIEs.push_back(Die); +} + +/// addGlobalName - Add a new global name to the compile unit. +void DwarfUnit::addGlobalName(StringRef Name, DIE *Die, DIScope Context) { + std::string FullName = getParentContextString(Context) + Name.str(); + GlobalNames[FullName] = Die; +} + +/// getParentContextString - Walks the metadata parent chain in a language +/// specific manner (using the compile unit language) and returns +/// it as a string. This is done at the metadata level because DIEs may +/// not currently have been added to the parent context and walking the +/// DIEs looking for names is more expensive than walking the metadata. +std::string DwarfUnit::getParentContextString(DIScope Context) const { + if (!Context) + return ""; + + // FIXME: Decide whether to implement this for non-C++ languages. + if (getLanguage() != dwarf::DW_LANG_C_plus_plus) + return ""; + + std::string CS; + SmallVector Parents; + while (!Context.isCompileUnit()) { + Parents.push_back(Context); + if (Context.getContext()) + Context = resolve(Context.getContext()); + else + // Structure, etc types will have a NULL context if they're at the top + // level. + break; + } + + // Reverse iterate over our list to go from the outermost construct to the + // innermost. + for (SmallVectorImpl::reverse_iterator I = Parents.rbegin(), + E = Parents.rend(); + I != E; ++I) { + DIScope Ctx = *I; + StringRef Name = Ctx.getName(); + if (!Name.empty()) { + CS += Name; + CS += "::"; + } + } + return CS; +} + +/// constructTypeDIE - Construct basic type die from DIBasicType. +void DwarfUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { + // Get core information. + StringRef Name = BTy.getName(); + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, Name); + + // An unspecified type only has a name attribute. + if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) + return; + + addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + BTy.getEncoding()); + + uint64_t Size = BTy.getSizeInBits() >> 3; + addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); +} + +/// constructTypeDIE - Construct derived type die from DIDerivedType. +void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { + // Get core information. + StringRef Name = DTy.getName(); + uint64_t Size = DTy.getSizeInBits() >> 3; + uint16_t Tag = Buffer.getTag(); + + // Map to main type, void will not have a type. + DIType FromTy = resolve(DTy.getTypeDerivedFrom()); + if (FromTy) + addType(&Buffer, FromTy); + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, Name); + + // Add size if non-zero (derived types might be zero-sized.) + if (Size && Tag != dwarf::DW_TAG_pointer_type) + addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); + + if (Tag == dwarf::DW_TAG_ptr_to_member_type) + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, + getOrCreateTypeDIE(resolve(DTy.getClassType()))); + // Add source line info if available and TyDesc is not a forward declaration. + if (!DTy.isForwardDecl()) + addSourceLine(&Buffer, DTy); +} + +/// constructTypeDIE - Construct type DIE from DICompositeType. +void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { + // Add name if not anonymous or intermediate type. + StringRef Name = CTy.getName(); + + uint64_t Size = CTy.getSizeInBits() >> 3; + uint16_t Tag = Buffer.getTag(); + + switch (Tag) { + case dwarf::DW_TAG_array_type: + constructArrayTypeDIE(Buffer, CTy); + break; + case dwarf::DW_TAG_enumeration_type: + constructEnumTypeDIE(Buffer, CTy); + break; + case dwarf::DW_TAG_subroutine_type: { + // Add return type. A void return won't have a type. + DIArray Elements = CTy.getTypeArray(); + DIType RTy(Elements.getElement(0)); + if (RTy) + addType(&Buffer, RTy); + + bool isPrototyped = true; + // Add arguments. + for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Ty = Elements.getElement(i); + if (Ty.isUnspecifiedParameter()) { + createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer); + isPrototyped = false; + } else { + DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer); + addType(Arg, DIType(Ty)); + if (DIType(Ty).isArtificial()) + addFlag(Arg, dwarf::DW_AT_artificial); + } + } + // Add prototype flag if we're dealing with a C language and the + // function has been prototyped. + uint16_t Language = getLanguage(); + if (isPrototyped && + (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || + Language == dwarf::DW_LANG_ObjC)) + addFlag(&Buffer, dwarf::DW_AT_prototyped); + + if (CTy.isLValueReference()) + addFlag(&Buffer, dwarf::DW_AT_reference); + + if (CTy.isRValueReference()) + addFlag(&Buffer, dwarf::DW_AT_rvalue_reference); + } break; + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_class_type: { + // Add elements to structure type. + DIArray Elements = CTy.getTypeArray(); + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + DIE *ElemDie = NULL; + if (Element.isSubprogram()) { + DISubprogram SP(Element); + ElemDie = getOrCreateSubprogramDIE(SP); + if (SP.isProtected()) + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_protected); + else if (SP.isPrivate()) + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_private); + else + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_public); + if (SP.isExplicit()) + addFlag(ElemDie, dwarf::DW_AT_explicit); + } else if (Element.isDerivedType()) { + DIDerivedType DDTy(Element); + if (DDTy.getTag() == dwarf::DW_TAG_friend) { + ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer); + addType(ElemDie, resolve(DDTy.getTypeDerivedFrom()), + dwarf::DW_AT_friend); + } else if (DDTy.isStaticMember()) { + getOrCreateStaticMemberDIE(DDTy); + } else { + constructMemberDIE(Buffer, DDTy); + } + } else if (Element.isObjCProperty()) { + DIObjCProperty Property(Element); + ElemDie = createAndAddDIE(Property.getTag(), Buffer); + StringRef PropertyName = Property.getObjCPropertyName(); + addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName); + addType(ElemDie, Property.getType()); + addSourceLine(ElemDie, Property); + StringRef GetterName = Property.getObjCPropertyGetterName(); + if (!GetterName.empty()) + addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName); + StringRef SetterName = Property.getObjCPropertySetterName(); + if (!SetterName.empty()) + addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName); + unsigned PropertyAttributes = 0; + if (Property.isReadOnlyObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly; + if (Property.isReadWriteObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite; + if (Property.isAssignObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign; + if (Property.isRetainObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain; + if (Property.isCopyObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy; + if (Property.isNonAtomicObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic; + if (PropertyAttributes) + addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None, + PropertyAttributes); + + DIEEntry *Entry = getDIEEntry(Element); + if (!Entry) { + Entry = createDIEEntry(ElemDie); + insertDIEEntry(Element, Entry); + } + } else + continue; + } + + if (CTy.isAppleBlockExtension()) + addFlag(&Buffer, dwarf::DW_AT_APPLE_block); + + DICompositeType ContainingType(resolve(CTy.getContainingType())); + if (ContainingType) + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, + getOrCreateTypeDIE(ContainingType)); + + if (CTy.isObjcClassComplete()) + addFlag(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type); + + // Add template parameters to a class, structure or union types. + // FIXME: The support isn't in the metadata for this yet. + if (Tag == dwarf::DW_TAG_class_type || + Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) + addTemplateParams(Buffer, CTy.getTemplateParams()); + + break; + } + default: + break; + } + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, Name); + + if (Tag == dwarf::DW_TAG_enumeration_type || + Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || + Tag == dwarf::DW_TAG_union_type) { + // Add size if non-zero (derived types might be zero-sized.) + // TODO: Do we care about size for enum forward declarations? + if (Size) + addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); + else if (!CTy.isForwardDecl()) + // Add zero size if it is not a forward declaration. + addUInt(&Buffer, dwarf::DW_AT_byte_size, None, 0); + + // If we're a forward decl, say so. + if (CTy.isForwardDecl()) + addFlag(&Buffer, dwarf::DW_AT_declaration); + + // Add source line info if available. + if (!CTy.isForwardDecl()) + addSourceLine(&Buffer, CTy); + + // No harm in adding the runtime language to the declaration. + unsigned RLang = CTy.getRunTimeLang(); + if (RLang) + addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1, + RLang); + } +} + +/// constructTemplateTypeParameterDIE - Construct new DIE for the given +/// DITemplateTypeParameter. +void DwarfUnit::constructTemplateTypeParameterDIE(DIE &Buffer, + DITemplateTypeParameter TP) { + DIE *ParamDIE = + createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer); + // Add the type if it exists, it could be void and therefore no type. + if (TP.getType()) + addType(ParamDIE, resolve(TP.getType())); + if (!TP.getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, TP.getName()); +} + +/// constructTemplateValueParameterDIE - Construct new DIE for the given +/// DITemplateValueParameter. +void +DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer, + DITemplateValueParameter VP) { + DIE *ParamDIE = createAndAddDIE(VP.getTag(), Buffer); + + // Add the type if there is one, template template and template parameter + // packs will not have a type. + if (VP.getTag() == dwarf::DW_TAG_template_value_parameter) + addType(ParamDIE, resolve(VP.getType())); + if (!VP.getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, VP.getName()); + if (Value *Val = VP.getValue()) { + if (ConstantInt *CI = dyn_cast(Val)) + addConstantValue(ParamDIE, CI, + isUnsignedDIType(DD, resolve(VP.getType()))); + else if (GlobalValue *GV = dyn_cast(Val)) { + // For declaration non-type template parameters (such as global values and + // functions) + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + addOpAddress(Block, Asm->getSymbol(GV)); + // Emit DW_OP_stack_value to use the address as the immediate value of the + // parameter, rather than a pointer to it. + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); + addBlock(ParamDIE, dwarf::DW_AT_location, Block); + } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_template_param) { + assert(isa(Val)); + addString(ParamDIE, dwarf::DW_AT_GNU_template_name, + cast(Val)->getString()); + } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) { + assert(isa(Val)); + DIArray A(cast(Val)); + addTemplateParams(*ParamDIE, A); + } + } +} + +/// getOrCreateNameSpace - Create a DIE for DINameSpace. +DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) { + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(NS.getContext()); + + DIE *NDie = getDIE(NS); + if (NDie) + return NDie; + NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS); + + if (!NS.getName().empty()) { + addString(NDie, dwarf::DW_AT_name, NS.getName()); + addAccelNamespace(NS.getName(), NDie); + addGlobalName(NS.getName(), NDie, NS.getContext()); + } else + addAccelNamespace("(anonymous namespace)", NDie); + addSourceLine(NDie, NS); + return NDie; +} + +/// getOrCreateSubprogramDIE - Create new DIE using SP. +DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE (as is the case for member function + // declarations). + DIE *ContextDIE = getOrCreateContextDIE(resolve(SP.getContext())); + + DIE *SPDie = getDIE(SP); + if (SPDie) + return SPDie; + + DISubprogram SPDecl = SP.getFunctionDeclaration(); + if (SPDecl.isSubprogram()) + // Add subprogram definitions to the CU die directly. + ContextDIE = UnitDie.get(); + + // DW_TAG_inlined_subroutine may refer to this DIE. + SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP); + + DIE *DeclDie = NULL; + if (SPDecl.isSubprogram()) + DeclDie = getOrCreateSubprogramDIE(SPDecl); + + // Add function template parameters. + addTemplateParams(*SPDie, SP.getTemplateParams()); + + // If this DIE is going to refer declaration info using AT_specification + // then there is no need to add other attributes. + if (DeclDie) { + // Refer function declaration directly. + addDIEEntry(SPDie, dwarf::DW_AT_specification, DeclDie); + + return SPDie; + } + + // Add the linkage name if we have one. + StringRef LinkageName = SP.getLinkageName(); + if (!LinkageName.empty()) + addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, + GlobalValue::getRealLinkageName(LinkageName)); + + // Constructors and operators for anonymous aggregates do not have names. + if (!SP.getName().empty()) + addString(SPDie, dwarf::DW_AT_name, SP.getName()); + + addSourceLine(SPDie, SP); + + // Add the prototype if we have a prototype and we have a C like + // language. + uint16_t Language = getLanguage(); + if (SP.isPrototyped() && + (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || + Language == dwarf::DW_LANG_ObjC)) + addFlag(SPDie, dwarf::DW_AT_prototyped); + + DICompositeType SPTy = SP.getType(); + assert(SPTy.getTag() == dwarf::DW_TAG_subroutine_type && + "the type of a subprogram should be a subroutine"); + + DIArray Args = SPTy.getTypeArray(); + // Add a return type. If this is a type like a C/C++ void type we don't add a + // return type. + if (Args.getElement(0)) + addType(SPDie, DIType(Args.getElement(0))); + + unsigned VK = SP.getVirtuality(); + if (VK) { + addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK); + DIEBlock *Block = getDIEBlock(); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(Block, dwarf::DW_FORM_udata, SP.getVirtualIndex()); + addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block); + ContainingTypeMap.insert( + std::make_pair(SPDie, resolve(SP.getContainingType()))); + } + + if (!SP.isDefinition()) { + addFlag(SPDie, dwarf::DW_AT_declaration); + + // Add arguments. Do not add arguments for subprogram definition. They will + // be handled while processing variables. + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, *SPDie); + DIType ATy(Args.getElement(i)); + addType(Arg, ATy); + if (ATy.isArtificial()) + addFlag(Arg, dwarf::DW_AT_artificial); + } + } + + if (SP.isArtificial()) + addFlag(SPDie, dwarf::DW_AT_artificial); + + if (!SP.isLocalToUnit()) + addFlag(SPDie, dwarf::DW_AT_external); + + if (SP.isOptimized()) + addFlag(SPDie, dwarf::DW_AT_APPLE_optimized); + + if (unsigned isa = Asm->getISAEncoding()) { + addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); + } + + if (SP.isLValueReference()) + addFlag(SPDie, dwarf::DW_AT_reference); + + if (SP.isRValueReference()) + addFlag(SPDie, dwarf::DW_AT_rvalue_reference); + + return SPDie; +} + +// Return const expression if value is a GEP to access merged global +// constant. e.g. +// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0) +static const ConstantExpr *getMergedGlobalExpr(const Value *V) { + const ConstantExpr *CE = dyn_cast_or_null(V); + if (!CE || CE->getNumOperands() != 3 || + CE->getOpcode() != Instruction::GetElementPtr) + return NULL; + + // First operand points to a global struct. + Value *Ptr = CE->getOperand(0); + if (!isa(Ptr) || + !isa(cast(Ptr->getType())->getElementType())) + return NULL; + + // Second operand is zero. + const ConstantInt *CI = dyn_cast_or_null(CE->getOperand(1)); + if (!CI || !CI->isZero()) + return NULL; + + // Third operand is offset. + if (!isa(CE->getOperand(2))) + return NULL; + + return CE; +} + +/// createGlobalVariableDIE - create global variable DIE. +void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { + // Check for pre-existence. + if (getDIE(GV)) + return; + + if (!GV.isGlobalVariable()) + return; + + DIScope GVContext = GV.getContext(); + DIType GTy = GV.getType(); + + // If this is a static data member definition, some attributes belong + // to the declaration DIE. + DIE *VariableDIE = NULL; + bool IsStaticMember = false; + DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration(); + if (SDMDecl.Verify()) { + assert(SDMDecl.isStaticMember() && "Expected static member decl"); + // We need the declaration DIE that is in the static member's class. + VariableDIE = getOrCreateStaticMemberDIE(SDMDecl); + IsStaticMember = true; + } + + // If this is not a static data member definition, create the variable + // DIE and add the initial set of attributes to it. + if (!VariableDIE) { + // Construct the context before querying for the existence of the DIE in + // case such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(GVContext); + + // Add to map. + VariableDIE = createAndAddDIE(GV.getTag(), *ContextDIE, GV); + + // Add name and type. + addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); + addType(VariableDIE, GTy); + + // Add scoping info. + if (!GV.isLocalToUnit()) + addFlag(VariableDIE, dwarf::DW_AT_external); + + // Add line number info. + addSourceLine(VariableDIE, GV); + } + + // Add location. + bool addToAccelTable = false; + DIE *VariableSpecDIE = NULL; + bool isGlobalVariable = GV.getGlobal() != NULL; + if (isGlobalVariable) { + addToAccelTable = true; + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal()); + if (GV.getGlobal()->isThreadLocal()) { + // FIXME: Make this work with -gsplit-dwarf. + unsigned PointerSize = Asm->getDataLayout().getPointerSize(); + assert((PointerSize == 4 || PointerSize == 8) && + "Add support for other sizes if necessary"); + const MCExpr *Expr = + Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym); + // Based on GCC's support for TLS: + if (!DD->useSplitDwarf()) { + // 1) Start with a constNu of the appropriate pointer size + addUInt(Block, dwarf::DW_FORM_data1, + PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u); + // 2) containing the (relocated) offset of the TLS variable + // within the module's TLS block. + addExpr(Block, dwarf::DW_FORM_udata, Expr); + } else { + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); + addUInt(Block, dwarf::DW_FORM_udata, DU->getAddrPoolIndex(Expr)); + } + // 3) followed by a custom OP to make the debugger do a TLS lookup. + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address); + } else { + DD->addArangeLabel(SymbolCU(this, Sym)); + addOpAddress(Block, Sym); + } + // Do not create specification DIE if context is either compile unit + // or a subprogram. + if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() && + !GVContext.isFile() && !DD->isSubprogramContext(GVContext)) { + // Create specification DIE. + VariableSpecDIE = createAndAddDIE(dwarf::DW_TAG_variable, *UnitDie); + addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, VariableDIE); + addBlock(VariableSpecDIE, dwarf::DW_AT_location, Block); + // A static member's declaration is already flagged as such. + if (!SDMDecl.Verify()) + addFlag(VariableDIE, dwarf::DW_AT_declaration); + } else { + addBlock(VariableDIE, dwarf::DW_AT_location, Block); + } + // Add the linkage name. + StringRef LinkageName = GV.getLinkageName(); + if (!LinkageName.empty()) + // From DWARF4: DIEs to which DW_AT_linkage_name may apply include: + // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and + // TAG_variable. + addString(IsStaticMember && VariableSpecDIE ? VariableSpecDIE + : VariableDIE, + dwarf::DW_AT_MIPS_linkage_name, + GlobalValue::getRealLinkageName(LinkageName)); + } else if (const ConstantInt *CI = + dyn_cast_or_null(GV.getConstant())) { + // AT_const_value was added when the static member was created. To avoid + // emitting AT_const_value multiple times, we only add AT_const_value when + // it is not a static member. + if (!IsStaticMember) + addConstantValue(VariableDIE, CI, isUnsignedDIType(DD, GTy)); + } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getOperand(11))) { + addToAccelTable = true; + // GV is a merged global. + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + Value *Ptr = CE->getOperand(0); + MCSymbol *Sym = Asm->getSymbol(cast(Ptr)); + DD->addArangeLabel(SymbolCU(this, Sym)); + addOpAddress(Block, Sym); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + SmallVector Idx(CE->op_begin() + 1, CE->op_end()); + addUInt(Block, dwarf::DW_FORM_udata, + Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + addBlock(VariableDIE, dwarf::DW_AT_location, Block); + } + + if (addToAccelTable) { + DIE *AddrDIE = VariableSpecDIE ? VariableSpecDIE : VariableDIE; + addAccelName(GV.getName(), AddrDIE); + + // If the linkage name is different than the name, go ahead and output + // that as well into the name table. + if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName()) + addAccelName(GV.getLinkageName(), AddrDIE); + } + + if (!GV.isLocalToUnit()) + addGlobalName(GV.getName(), VariableSpecDIE ? VariableSpecDIE : VariableDIE, + GV.getContext()); +} + +/// constructSubrangeDIE - Construct subrange DIE from DISubrange. +void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) { + DIE *DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer); + addDIEEntry(DW_Subrange, dwarf::DW_AT_type, IndexTy); + + // The LowerBound value defines the lower bounds which is typically zero for + // C/C++. The Count value is the number of elements. Values are 64 bit. If + // Count == -1 then the array is unbounded and we do not emit + // DW_AT_lower_bound and DW_AT_upper_bound attributes. If LowerBound == 0 and + // Count == 0, then the array has zero elements in which case we do not emit + // an upper bound. + int64_t LowerBound = SR.getLo(); + int64_t DefaultLowerBound = getDefaultLowerBound(); + int64_t Count = SR.getCount(); + + if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound) + addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound); + + if (Count != -1 && Count != 0) + // FIXME: An unbounded array should reference the expression that defines + // the array. + addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, None, + LowerBound + Count - 1); +} + +/// constructArrayTypeDIE - Construct array type DIE from DICompositeType. +void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { + if (CTy.isVector()) + addFlag(&Buffer, dwarf::DW_AT_GNU_vector); + + // Emit the element type. + addType(&Buffer, resolve(CTy.getTypeDerivedFrom())); + + // Get an anonymous type for index type. + // FIXME: This type should be passed down from the front end + // as different languages may have different sizes for indexes. + DIE *IdxTy = getIndexTyDie(); + if (!IdxTy) { + // Construct an anonymous type for index type. + IdxTy = createAndAddDIE(dwarf::DW_TAG_base_type, *UnitDie); + addString(IdxTy, dwarf::DW_AT_name, "int"); + addUInt(IdxTy, dwarf::DW_AT_byte_size, None, sizeof(int32_t)); + addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + dwarf::DW_ATE_signed); + setIndexTyDie(IdxTy); + } + + // Add subranges to array type. + DIArray Elements = CTy.getTypeArray(); + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + if (Element.getTag() == dwarf::DW_TAG_subrange_type) + constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy); + } +} + +/// constructEnumTypeDIE - Construct an enum type DIE from DICompositeType. +void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) { + DIArray Elements = CTy.getTypeArray(); + + // Add enumerators to enumeration type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIEnumerator Enum(Elements.getElement(i)); + if (Enum.isEnumerator()) { + DIE *Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer); + StringRef Name = Enum.getName(); + addString(Enumerator, dwarf::DW_AT_name, Name); + int64_t Value = Enum.getEnumValue(); + addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, + Value); + } + } + DIType DTy = resolve(CTy.getTypeDerivedFrom()); + if (DTy) { + addType(&Buffer, DTy); + addFlag(&Buffer, dwarf::DW_AT_enum_class); + } +} + +/// constructContainingTypeDIEs - Construct DIEs for types that contain +/// vtables. +void DwarfUnit::constructContainingTypeDIEs() { + for (DenseMap::iterator CI = ContainingTypeMap.begin(), + CE = ContainingTypeMap.end(); + CI != CE; ++CI) { + DIE *SPDie = CI->first; + DIDescriptor D(CI->second); + if (!D) + continue; + DIE *NDie = getDIE(D); + if (!NDie) + continue; + addDIEEntry(SPDie, dwarf::DW_AT_containing_type, NDie); + } +} + +/// constructVariableDIE - Construct a DIE for the given DbgVariable. +DIE *DwarfUnit::constructVariableDIE(DbgVariable &DV, bool isScopeAbstract) { + StringRef Name = DV.getName(); + + // Define variable debug information entry. + DIE *VariableDie = new DIE(DV.getTag()); + DbgVariable *AbsVar = DV.getAbstractVariable(); + DIE *AbsDIE = AbsVar ? AbsVar->getDIE() : NULL; + if (AbsDIE) + addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, AbsDIE); + else { + if (!Name.empty()) + addString(VariableDie, dwarf::DW_AT_name, Name); + addSourceLine(VariableDie, DV.getVariable()); + addType(VariableDie, DV.getType()); + } + + if (DV.isArtificial()) + addFlag(VariableDie, dwarf::DW_AT_artificial); + + if (isScopeAbstract) { + DV.setDIE(VariableDie); + return VariableDie; + } + + // Add variable address. + + unsigned Offset = DV.getDotDebugLocOffset(); + if (Offset != ~0U) { + addSectionLabel(VariableDie, dwarf::DW_AT_location, + Asm->GetTempSymbol("debug_loc", Offset)); + DV.setDIE(VariableDie); + return VariableDie; + } + + // Check if variable is described by a DBG_VALUE instruction. + if (const MachineInstr *DVInsn = DV.getMInsn()) { + assert(DVInsn->getNumOperands() == 3); + if (DVInsn->getOperand(0).isReg()) { + const MachineOperand RegOp = DVInsn->getOperand(0); + // If the second operand is an immediate, this is an indirect value. + if (DVInsn->getOperand(1).isImm()) { + MachineLocation Location(RegOp.getReg(), + DVInsn->getOperand(1).getImm()); + addVariableAddress(DV, VariableDie, Location); + } else if (RegOp.getReg()) + addVariableAddress(DV, VariableDie, MachineLocation(RegOp.getReg())); + } else if (DVInsn->getOperand(0).isImm()) + addConstantValue(VariableDie, DVInsn->getOperand(0), DV.getType()); + else if (DVInsn->getOperand(0).isFPImm()) + addConstantFPValue(VariableDie, DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isCImm()) + addConstantValue(VariableDie, DVInsn->getOperand(0).getCImm(), + isUnsignedDIType(DD, DV.getType())); + + DV.setDIE(VariableDie); + return VariableDie; + } else { + // .. else use frame index. + int FI = DV.getFrameIndex(); + if (FI != ~0) { + unsigned FrameReg = 0; + const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); + int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + MachineLocation Location(FrameReg, Offset); + addVariableAddress(DV, VariableDie, Location); + } + } + + DV.setDIE(VariableDie); + return VariableDie; +} + +/// constructMemberDIE - Construct member DIE from DIDerivedType. +void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { + DIE *MemberDie = createAndAddDIE(DT.getTag(), Buffer); + StringRef Name = DT.getName(); + if (!Name.empty()) + addString(MemberDie, dwarf::DW_AT_name, Name); + + addType(MemberDie, resolve(DT.getTypeDerivedFrom())); + + addSourceLine(MemberDie, DT); + + if (DT.getTag() == dwarf::DW_TAG_inheritance && DT.isVirtual()) { + + // For C++, virtual base classes are not at fixed offset. Use following + // expression to extract appropriate offset from vtable. + // BaseAddr = ObAddr + *((*ObAddr) - Offset) + + DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock(); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(VBaseLocationDie, dwarf::DW_FORM_udata, DT.getOffsetInBits()); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + + addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie); + } else { + uint64_t Size = DT.getSizeInBits(); + uint64_t FieldSize = getBaseTypeSize(DD, DT); + uint64_t OffsetInBytes; + + if (Size != FieldSize) { + // Handle bitfield. + addUInt(MemberDie, dwarf::DW_AT_byte_size, None, + getBaseTypeSize(DD, DT) >> 3); + addUInt(MemberDie, dwarf::DW_AT_bit_size, None, DT.getSizeInBits()); + + uint64_t Offset = DT.getOffsetInBits(); + uint64_t AlignMask = ~(DT.getAlignInBits() - 1); + uint64_t HiMark = (Offset + FieldSize) & AlignMask; + uint64_t FieldOffset = (HiMark - FieldSize); + Offset -= FieldOffset; + + // Maybe we need to work from the other end. + if (Asm->getDataLayout().isLittleEndian()) + Offset = FieldSize - (Offset + Size); + addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, Offset); + + // Here WD_AT_data_member_location points to the anonymous + // field that includes this bit field. + OffsetInBytes = FieldOffset >> 3; + } else + // This is not a bitfield. + OffsetInBytes = DT.getOffsetInBits() >> 3; + + if (DD->getDwarfVersion() <= 2) { + DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); + addUInt(MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes); + addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie); + } else + addUInt(MemberDie, dwarf::DW_AT_data_member_location, None, + OffsetInBytes); + } + + if (DT.isProtected()) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_protected); + else if (DT.isPrivate()) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_private); + // Otherwise C++ member and base classes are considered public. + else + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_public); + if (DT.isVirtual()) + addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, + dwarf::DW_VIRTUALITY_virtual); + + // Objective-C properties. + if (MDNode *PNode = DT.getObjCProperty()) + if (DIEEntry *PropertyDie = getDIEEntry(PNode)) + MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4, + PropertyDie); + + if (DT.isArtificial()) + addFlag(MemberDie, dwarf::DW_AT_artificial); +} + +/// getOrCreateStaticMemberDIE - Create new DIE for C++ static member. +DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { + if (!DT.Verify()) + return NULL; + + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(resolve(DT.getContext())); + assert(dwarf::isType(ContextDIE->getTag()) && + "Static member should belong to a type."); + + DIE *StaticMemberDIE = getDIE(DT); + if (StaticMemberDIE) + return StaticMemberDIE; + + StaticMemberDIE = createAndAddDIE(DT.getTag(), *ContextDIE, DT); + + DIType Ty = resolve(DT.getTypeDerivedFrom()); + + addString(StaticMemberDIE, dwarf::DW_AT_name, DT.getName()); + addType(StaticMemberDIE, Ty); + addSourceLine(StaticMemberDIE, DT); + addFlag(StaticMemberDIE, dwarf::DW_AT_external); + addFlag(StaticMemberDIE, dwarf::DW_AT_declaration); + + // FIXME: We could omit private if the parent is a class_type, and + // public if the parent is something else. + if (DT.isProtected()) + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_protected); + else if (DT.isPrivate()) + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_private); + else + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_public); + + if (const ConstantInt *CI = dyn_cast_or_null(DT.getConstant())) + addConstantValue(StaticMemberDIE, CI, isUnsignedDIType(DD, Ty)); + if (const ConstantFP *CFP = dyn_cast_or_null(DT.getConstant())) + addConstantFPValue(StaticMemberDIE, CFP); + + return StaticMemberDIE; +} + +void DwarfUnit::emitHeader(const MCSection *ASection, + const MCSymbol *ASectionSym) const { + Asm->OutStreamer.AddComment("DWARF version number"); + Asm->EmitInt16(DD->getDwarfVersion()); + Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); + // We share one abbreviations table across all units so it's always at the + // start of the section. Use a relocatable offset where needed to ensure + // linking doesn't invalidate that offset. + Asm->EmitSectionOffset(ASectionSym, ASectionSym); + Asm->OutStreamer.AddComment("Address Size (in bytes)"); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); +} + +DwarfCompileUnit::~DwarfCompileUnit() {} +DwarfTypeUnit::~DwarfTypeUnit() {} + +void DwarfTypeUnit::emitHeader(const MCSection *ASection, + const MCSymbol *ASectionSym) const { + DwarfUnit::emitHeader(ASection, ASectionSym); + Asm->OutStreamer.AddComment("Type Signature"); + Asm->OutStreamer.EmitIntValue(TypeSignature, sizeof(TypeSignature)); + Asm->OutStreamer.AddComment("Type DIE Offset"); + Asm->OutStreamer.EmitIntValue(Ty->getOffset(), sizeof(Ty->getOffset())); +} + +void DwarfTypeUnit::initSection(const MCSection *Section) { + assert(!this->Section); + this->Section = Section; + // Since each type unit is contained in its own COMDAT section, the begin + // label and the section label are the same. Using the begin label emission in + // DwarfDebug to emit the section label as well is slightly subtle/sneaky, but + // the only other alternative of lazily constructing start-of-section labels + // and storing a mapping in DwarfDebug (or AsmPrinter). + this->SectionSym = this->LabelBegin = + Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID()); + this->LabelEnd = + Asm->GetTempSymbol(Section->getLabelEndName(), getUniqueID()); + this->LabelRange = Asm->GetTempSymbol("gnu_ranges", getUniqueID()); +} diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h new file mode 100644 index 000000000000..524cdc1b59f4 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -0,0 +1,578 @@ +//===-- llvm/CodeGen/DwarfUnit.h - Dwarf Compile Unit ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H +#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H + +#include "DIE.h" +#include "DwarfDebug.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/DebugInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" + +namespace llvm { + +class MachineLocation; +class MachineOperand; +class ConstantInt; +class ConstantFP; +class DbgVariable; + +// Data structure to hold a range for range lists. +class RangeSpan { +public: + RangeSpan(MCSymbol *S, MCSymbol *E) : Start(S), End(E) {} + const MCSymbol *getStart() const { return Start; } + const MCSymbol *getEnd() const { return End; } + +private: + const MCSymbol *Start, *End; +}; + +class RangeSpanList { +private: + // Index for locating within the debug_range section this particular span. + MCSymbol *RangeSym; + // List of ranges. + SmallVector Ranges; + +public: + RangeSpanList(MCSymbol *Sym) : RangeSym(Sym) {} + MCSymbol *getSym() const { return RangeSym; } + const SmallVectorImpl &getRanges() const { return Ranges; } + void addRange(RangeSpan Range) { Ranges.push_back(Range); } +}; + +//===----------------------------------------------------------------------===// +/// Unit - This dwarf writer support class manages information associated +/// with a source file. +class DwarfUnit { +protected: + /// UniqueID - a numeric ID unique among all CUs in the module + unsigned UniqueID; + + /// Node - MDNode for the compile unit. + DICompileUnit Node; + + /// Unit debug information entry. + const OwningPtr UnitDie; + + /// Offset of the UnitDie from beginning of debug info section. + unsigned DebugInfoOffset; + + /// Asm - Target of Dwarf emission. + AsmPrinter *Asm; + + // Holders for some common dwarf information. + DwarfDebug *DD; + DwarfFile *DU; + + /// IndexTyDie - An anonymous type for index type. Owned by UnitDie. + DIE *IndexTyDie; + + /// MDNodeToDieMap - Tracks the mapping of unit level debug information + /// variables to debug information entries. + DenseMap MDNodeToDieMap; + + /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug information + /// descriptors to debug information entries using a DIEEntry proxy. + DenseMap MDNodeToDIEEntryMap; + + /// GlobalNames - A map of globally visible named entities for this unit. + StringMap GlobalNames; + + /// GlobalTypes - A map of globally visible types for this unit. + StringMap GlobalTypes; + + /// AccelNames - A map of names for the name accelerator table. + StringMap > AccelNames; + + /// AccelObjC - A map of objc spec for the objc accelerator table. + StringMap > AccelObjC; + + /// AccelNamespace - A map of names for the namespace accelerator table. + StringMap > AccelNamespace; + + /// AccelTypes - A map of names for the type accelerator table. + StringMap > > AccelTypes; + + /// DIEBlocks - A list of all the DIEBlocks in use. + std::vector DIEBlocks; + + /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that + /// need DW_AT_containing_type attribute. This attribute points to a DIE that + /// corresponds to the MDNode mapped with the subprogram DIE. + DenseMap ContainingTypeMap; + + // List of ranges for a given compile unit. + SmallVector CURanges; + + // List of range lists for a given compile unit, separate from the ranges for + // the CU itself. + SmallVector CURangeLists; + + // DIEValueAllocator - All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; + + // DIEIntegerOne - A preallocated DIEValue because 1 is used frequently. + DIEInteger *DIEIntegerOne; + + /// The section this unit will be emitted in. + const MCSection *Section; + + /// A label at the start of the non-dwo section related to this unit. + MCSymbol *SectionSym; + + /// The start of the unit within its section. + MCSymbol *LabelBegin; + + /// The end of the unit within its section. + MCSymbol *LabelEnd; + + /// The label for the start of the range sets for the elements of this unit. + MCSymbol *LabelRange; + + /// Skeleton unit associated with this unit. + DwarfUnit *Skeleton; + + DwarfUnit(unsigned UID, DIE *D, DICompileUnit CU, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU); + +public: + virtual ~DwarfUnit(); + + /// Set the skeleton unit associated with this unit. + void setSkeleton(DwarfUnit *Skel) { Skeleton = Skel; } + + /// Get the skeleton unit associated with this unit. + DwarfUnit *getSkeleton() const { return Skeleton; } + + /// Pass in the SectionSym even though we could recreate it in every compile + /// unit (type units will have actually distinct symbols once they're in + /// comdat sections). + void initSection(const MCSection *Section, MCSymbol *SectionSym) { + assert(!this->Section); + this->Section = Section; + this->SectionSym = SectionSym; + this->LabelBegin = + Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID()); + this->LabelEnd = + Asm->GetTempSymbol(Section->getLabelEndName(), getUniqueID()); + this->LabelRange = Asm->GetTempSymbol("gnu_ranges", getUniqueID()); + } + + const MCSection *getSection() const { + assert(Section); + return Section; + } + + /// If there's a skeleton then return the section symbol for the skeleton + /// unit, otherwise return the section symbol for this unit. + MCSymbol *getLocalSectionSym() const { + if (Skeleton) + return Skeleton->getSectionSym(); + return getSectionSym(); + } + + MCSymbol *getSectionSym() const { + assert(Section); + return SectionSym; + } + + /// If there's a skeleton then return the begin label for the skeleton unit, + /// otherwise return the local label for this unit. + MCSymbol *getLocalLabelBegin() const { + if (Skeleton) + return Skeleton->getLabelBegin(); + return getLabelBegin(); + } + + MCSymbol *getLabelBegin() const { + assert(Section); + return LabelBegin; + } + + MCSymbol *getLabelEnd() const { + assert(Section); + return LabelEnd; + } + + MCSymbol *getLabelRange() const { + assert(Section); + return LabelRange; + } + + // Accessors. + unsigned getUniqueID() const { return UniqueID; } + virtual uint16_t getLanguage() const = 0; + DICompileUnit getNode() const { return Node; } + DIE *getUnitDie() const { return UnitDie.get(); } + const StringMap &getGlobalNames() const { return GlobalNames; } + const StringMap &getGlobalTypes() const { return GlobalTypes; } + + const StringMap > &getAccelNames() const { + return AccelNames; + } + const StringMap > &getAccelObjC() const { + return AccelObjC; + } + const StringMap > &getAccelNamespace() const { + return AccelNamespace; + } + const StringMap > > & + getAccelTypes() const { + return AccelTypes; + } + + unsigned getDebugInfoOffset() const { return DebugInfoOffset; } + void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } + + /// hasContent - Return true if this compile unit has something to write out. + bool hasContent() const { return !UnitDie->getChildren().empty(); } + + /// addRange - Add an address range to the list of ranges for this unit. + void addRange(RangeSpan Range) { CURanges.push_back(Range); } + + /// getRanges - Get the list of ranges for this unit. + const SmallVectorImpl &getRanges() const { return CURanges; } + SmallVectorImpl &getRanges() { return CURanges; } + + /// addRangeList - Add an address range list to the list of range lists. + void addRangeList(RangeSpanList Ranges) { CURangeLists.push_back(Ranges); } + + /// getRangeLists - Get the vector of range lists. + const SmallVectorImpl &getRangeLists() const { + return CURangeLists; + } + SmallVectorImpl &getRangeLists() { return CURangeLists; } + + /// getParentContextString - Get a string containing the language specific + /// context for a global name. + std::string getParentContextString(DIScope Context) const; + + /// addGlobalName - Add a new global entity to the compile unit. + /// + void addGlobalName(StringRef Name, DIE *Die, DIScope Context); + + /// addAccelName - Add a new name to the name accelerator table. + void addAccelName(StringRef Name, const DIE *Die); + + /// addAccelObjC - Add a new name to the ObjC accelerator table. + void addAccelObjC(StringRef Name, const DIE *Die); + + /// addAccelNamespace - Add a new name to the namespace accelerator table. + void addAccelNamespace(StringRef Name, const DIE *Die); + + /// addAccelType - Add a new type to the type accelerator table. + void addAccelType(StringRef Name, std::pair Die); + + /// getDIE - Returns the debug information entry map slot for the + /// specified debug variable. We delegate the request to DwarfDebug + /// when the MDNode can be part of the type system, since DIEs for + /// the type system can be shared across CUs and the mappings are + /// kept in DwarfDebug. + DIE *getDIE(DIDescriptor D) const; + + /// getDIEBlock - Returns a fresh newly allocated DIEBlock. + DIEBlock *getDIEBlock() { return new (DIEValueAllocator) DIEBlock(); } + + /// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug + /// when the MDNode can be part of the type system, since DIEs for + /// the type system can be shared across CUs and the mappings are + /// kept in DwarfDebug. + void insertDIE(DIDescriptor Desc, DIE *D); + + /// addDie - Adds or interns the DIE to the compile unit. + /// + void addDie(DIE *Buffer) { UnitDie->addChild(Buffer); } + + /// addFlag - Add a flag that is true to the DIE. + void addFlag(DIE *Die, dwarf::Attribute Attribute); + + /// addUInt - Add an unsigned integer attribute data and value. + void addUInt(DIE *Die, dwarf::Attribute Attribute, Optional Form, + uint64_t Integer); + + void addUInt(DIEBlock *Block, dwarf::Form Form, uint64_t Integer); + + /// addSInt - Add an signed integer attribute data and value. + void addSInt(DIE *Die, dwarf::Attribute Attribute, Optional Form, + int64_t Integer); + + void addSInt(DIEBlock *Die, Optional Form, int64_t Integer); + + /// addString - Add a string attribute data and value. + void addString(DIE *Die, dwarf::Attribute Attribute, const StringRef Str); + + /// addLocalString - Add a string attribute data and value. + void addLocalString(DIE *Die, dwarf::Attribute Attribute, + const StringRef Str); + + /// addExpr - Add a Dwarf expression attribute data and value. + void addExpr(DIEBlock *Die, dwarf::Form Form, const MCExpr *Expr); + + /// addLabel - Add a Dwarf label attribute data and value. + void addLabel(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form, + const MCSymbol *Label); + + void addLabel(DIEBlock *Die, dwarf::Form Form, const MCSymbol *Label); + + /// addSectionLabel - Add a Dwarf section label attribute data and value. + /// + void addSectionLabel(DIE *Die, dwarf::Attribute Attribute, + const MCSymbol *Label); + + /// addSectionOffset - Add an offset into a section attribute data and value. + /// + void addSectionOffset(DIE *Die, dwarf::Attribute Attribute, uint64_t Integer); + + /// addOpAddress - Add a dwarf op address data and value using the + /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. + void addOpAddress(DIEBlock *Die, const MCSymbol *Label); + + /// addSectionDelta - Add a label delta attribute data and value. + void addSectionDelta(DIE *Die, dwarf::Attribute Attribute, const MCSymbol *Hi, + const MCSymbol *Lo); + + /// addDIEEntry - Add a DIE attribute data and value. + void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry); + + /// addDIEEntry - Add a DIE attribute data and value. + void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIEEntry *Entry); + + void addDIETypeSignature(DIE *Die, const DwarfTypeUnit &Type); + + /// addBlock - Add block data. + void addBlock(DIE *Die, dwarf::Attribute Attribute, DIEBlock *Block); + + /// addSourceLine - Add location information to specified debug information + /// entry. + void addSourceLine(DIE *Die, DIVariable V); + void addSourceLine(DIE *Die, DIGlobalVariable G); + void addSourceLine(DIE *Die, DISubprogram SP); + void addSourceLine(DIE *Die, DIType Ty); + void addSourceLine(DIE *Die, DINameSpace NS); + void addSourceLine(DIE *Die, DIObjCProperty Ty); + + /// addAddress - Add an address attribute to a die based on the location + /// provided. + void addAddress(DIE *Die, dwarf::Attribute Attribute, + const MachineLocation &Location, bool Indirect = false); + + /// addConstantValue - Add constant value entry in variable DIE. + void addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); + void addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned); + void addConstantValue(DIE *Die, const APInt &Val, bool Unsigned); + + /// addConstantFPValue - Add constant value entry in variable DIE. + void addConstantFPValue(DIE *Die, const MachineOperand &MO); + void addConstantFPValue(DIE *Die, const ConstantFP *CFP); + + /// addTemplateParams - Add template parameters in buffer. + void addTemplateParams(DIE &Buffer, DIArray TParams); + + /// addRegisterOp - Add register operand. + void addRegisterOp(DIEBlock *TheDie, unsigned Reg); + + /// addRegisterOffset - Add register offset. + void addRegisterOffset(DIEBlock *TheDie, unsigned Reg, int64_t Offset); + + /// addComplexAddress - Start with the address based on the location provided, + /// and generate the DWARF information necessary to find the actual variable + /// (navigating the extra location information encoded in the type) based on + /// the starting location. Add the DWARF information to the die. + void addComplexAddress(const DbgVariable &DV, DIE *Die, + dwarf::Attribute Attribute, + const MachineLocation &Location); + + // FIXME: Should be reformulated in terms of addComplexAddress. + /// addBlockByrefAddress - Start with the address based on the location + /// provided, and generate the DWARF information necessary to find the + /// actual Block variable (navigating the Block struct) based on the + /// starting location. Add the DWARF information to the die. Obsolete, + /// please use addComplexAddress instead. + void addBlockByrefAddress(const DbgVariable &DV, DIE *Die, + dwarf::Attribute Attribute, + const MachineLocation &Location); + + /// addVariableAddress - Add DW_AT_location attribute for a + /// DbgVariable based on provided MachineLocation. + void addVariableAddress(const DbgVariable &DV, DIE *Die, + MachineLocation Location); + + /// addType - Add a new type attribute to the specified entity. This takes + /// and attribute parameter because DW_AT_friend attributes are also + /// type references. + void addType(DIE *Entity, DIType Ty, + dwarf::Attribute Attribute = dwarf::DW_AT_type); + + /// getOrCreateNameSpace - Create a DIE for DINameSpace. + DIE *getOrCreateNameSpace(DINameSpace NS); + + /// getOrCreateSubprogramDIE - Create new DIE using SP. + DIE *getOrCreateSubprogramDIE(DISubprogram SP); + + /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the + /// given DIType. + DIE *getOrCreateTypeDIE(const MDNode *N); + + /// getOrCreateContextDIE - Get context owner's DIE. + DIE *createTypeDIE(DICompositeType Ty); + + /// getOrCreateContextDIE - Get context owner's DIE. + DIE *getOrCreateContextDIE(DIScope Context); + + /// constructContainingTypeDIEs - Construct DIEs for types that contain + /// vtables. + void constructContainingTypeDIEs(); + + /// constructVariableDIE - Construct a DIE for the given DbgVariable. + DIE *constructVariableDIE(DbgVariable &DV, bool isScopeAbstract); + + /// Create a DIE with the given Tag, add the DIE to its parent, and + /// call insertDIE if MD is not null. + DIE *createAndAddDIE(unsigned Tag, DIE &Parent, + DIDescriptor N = DIDescriptor()); + + /// Compute the size of a header for this unit, not including the initial + /// length field. + virtual unsigned getHeaderSize() const { + return sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t); // Pointer Size (in bytes) + } + + /// Emit the header for this unit, not including the initial length field. + virtual void emitHeader(const MCSection *ASection, + const MCSymbol *ASectionSym) const; + +protected: + /// getOrCreateStaticMemberDIE - Create new static data member DIE. + DIE *getOrCreateStaticMemberDIE(DIDerivedType DT); + +private: + /// constructTypeDIE - Construct basic type die from DIBasicType. + void constructTypeDIE(DIE &Buffer, DIBasicType BTy); + + /// constructTypeDIE - Construct derived type die from DIDerivedType. + void constructTypeDIE(DIE &Buffer, DIDerivedType DTy); + + /// constructTypeDIE - Construct type DIE from DICompositeType. + void constructTypeDIE(DIE &Buffer, DICompositeType CTy); + + /// constructSubrangeDIE - Construct subrange DIE from DISubrange. + void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); + + /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. + void constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy); + + /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. + void constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy); + + /// constructMemberDIE - Construct member DIE from DIDerivedType. + void constructMemberDIE(DIE &Buffer, DIDerivedType DT); + + /// constructTemplateTypeParameterDIE - Construct new DIE for the given + /// DITemplateTypeParameter. + void constructTemplateTypeParameterDIE(DIE &Buffer, + DITemplateTypeParameter TP); + + /// constructTemplateValueParameterDIE - Construct new DIE for the given + /// DITemplateValueParameter. + void constructTemplateValueParameterDIE(DIE &Buffer, + DITemplateValueParameter TVP); + + /// getLowerBoundDefault - Return the default lower bound for an array. If the + /// DWARF version doesn't handle the language, return -1. + int64_t getDefaultLowerBound() const; + + /// getDIEEntry - Returns the debug information entry for the specified + /// debug variable. + DIEEntry *getDIEEntry(const MDNode *N) const { + return MDNodeToDIEEntryMap.lookup(N); + } + + /// insertDIEEntry - Insert debug information entry into the map. + void insertDIEEntry(const MDNode *N, DIEEntry *E) { + MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); + } + + // getIndexTyDie - Get an anonymous type for index type. + DIE *getIndexTyDie() { return IndexTyDie; } + + // setIndexTyDie - Set D as anonymous type for index which can be reused + // later. + void setIndexTyDie(DIE *D) { IndexTyDie = D; } + + /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug + /// information entry. + DIEEntry *createDIEEntry(DIE *Entry); + + /// resolve - Look in the DwarfDebug map for the MDNode that + /// corresponds to the reference. + template T resolve(DIRef Ref) const { + return DD->resolve(Ref); + } + + /// If this is a named finished type then include it in the list of types for + /// the accelerator tables. + void updateAcceleratorTables(DIScope Context, DIType Ty, const DIE *TyDIE); +}; + +class DwarfCompileUnit : public DwarfUnit { +public: + DwarfCompileUnit(unsigned UID, DIE *D, DICompileUnit Node, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU); + virtual ~DwarfCompileUnit() LLVM_OVERRIDE; + + /// createGlobalVariableDIE - create global variable DIE. + void createGlobalVariableDIE(DIGlobalVariable GV); + + /// addLabelAddress - Add a dwarf label attribute data and value using + /// either DW_FORM_addr or DW_FORM_GNU_addr_index. + void addLabelAddress(DIE *Die, dwarf::Attribute Attribute, MCSymbol *Label); + + uint16_t getLanguage() const LLVM_OVERRIDE { return getNode().getLanguage(); } +}; + +class DwarfTypeUnit : public DwarfUnit { +private: + uint16_t Language; + uint64_t TypeSignature; + const DIE *Ty; + +public: + DwarfTypeUnit(unsigned UID, DIE *D, uint16_t Language, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU); + virtual ~DwarfTypeUnit() LLVM_OVERRIDE; + + void setTypeSignature(uint64_t Signature) { TypeSignature = Signature; } + uint64_t getTypeSignature() const { return TypeSignature; } + void setType(const DIE *Ty) { this->Ty = Ty; } + + uint16_t getLanguage() const LLVM_OVERRIDE { return Language; } + /// Emit the header for this unit, not including the initial length field. + void emitHeader(const MCSection *ASection, const MCSymbol *ASectionSym) const + LLVM_OVERRIDE; + unsigned getHeaderSize() const LLVM_OVERRIDE { + return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature + sizeof(uint32_t); // Type DIE Offset + } + void initSection(const MCSection *Section); +}; +} // end llvm namespace +#endif diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/LLVMBuild.txt index 20b1f7b45b31..bbdb0c7fc3c7 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = AsmPrinter parent = Libraries -required_libraries = Analysis CodeGen Core MC MCParser Support Target +required_libraries = Analysis CodeGen Core MC MCParser Support Target TransformUtils diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp index 156101286b75..422b0fd4ffe6 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -44,14 +44,14 @@ Win64Exception::Win64Exception(AsmPrinter *A) Win64Exception::~Win64Exception() {} -/// EndModule - Emit all exception information that should come after the +/// endModule - Emit all exception information that should come after the /// content. -void Win64Exception::EndModule() { +void Win64Exception::endModule() { } -/// BeginFunction - Gather pre-function exception information. Assumes it's +/// beginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. -void Win64Exception::BeginFunction(const MachineFunction *MF) { +void Win64Exception::beginFunction(const MachineFunction *MF) { shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; // If any landing pads survive, we need an EH table. @@ -86,9 +86,9 @@ void Win64Exception::BeginFunction(const MachineFunction *MF) { Asm->getFunctionNumber())); } -/// EndFunction - Gather and emit post-function exception information. +/// endFunction - Gather and emit post-function exception information. /// -void Win64Exception::EndFunction() { +void Win64Exception::endFunction(const MachineFunction *) { if (!shouldEmitPersonality && !shouldEmitMoves) return; diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/BranchFolding.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/BranchFolding.cpp index 9cd4208d6461..a4a3712de8be 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/BranchFolding.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/BranchFolding.cpp @@ -83,7 +83,11 @@ INITIALIZE_PASS(BranchFolderPass, "branch-folder", bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { TargetPassConfig *PassConfig = &getAnalysis(); - BranchFolder Folder(PassConfig->getEnableTailMerge(), /*CommonHoist=*/true); + // TailMerge can create jump into if branches that make CFG irreducible for + // HW that requires structurized CFG. + bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && + PassConfig->getEnableTailMerge(); + BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true); return Folder.OptimizeFunction(MF, MF.getTarget().getInstrInfo(), MF.getTarget().getRegisterInfo(), diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/CodeGen/CMakeLists.txt index 10cc9fff9a84..123d86f9504f 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/CMakeLists.txt @@ -35,7 +35,7 @@ add_llvm_library(LLVMCodeGen LiveRangeCalc.cpp LiveRangeEdit.cpp LiveRegMatrix.cpp - LiveRegUnits.cpp + LivePhysRegs.cpp LiveStackAnalysis.cpp LiveVariables.cpp LocalStackSlotAllocation.cpp @@ -97,6 +97,7 @@ add_llvm_library(LLVMCodeGen StackColoring.cpp StackProtector.cpp StackSlotColoring.cpp + StackMapLivenessAnalysis.cpp StackMaps.cpp TailDuplication.cpp TargetFrameLoweringImpl.cpp diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/CalcSpillWeights.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/CalcSpillWeights.cpp index 4925c4db1e0a..1a45de069ead 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -132,7 +132,7 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { bool reads, writes; tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); weight = LiveIntervals::getSpillWeight( - writes, reads, MBFI.getBlockFreq(mi->getParent())); + writes, reads, &MBFI, mi); // Give extra weight to what looks like a loop induction variable update. if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb)) diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/CodeGen.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/CodeGen.cpp index 7430c53b8da2..368e7d491ba6 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/CodeGen.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/CodeGen.cpp @@ -51,6 +51,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeOptimizePHIsPass(Registry); initializePHIEliminationPass(Registry); initializePeepholeOptimizerPass(Registry); + initializePostMachineSchedulerPass(Registry); initializePostRASchedulerPass(Registry); initializeProcessImplicitDefsPass(Registry); initializePEIPass(Registry); @@ -69,6 +70,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeVirtRegRewriterPass(Registry); initializeLowerIntrinsicsPass(Registry); initializeMachineFunctionPrinterPassPass(Registry); + initializeStackMapLivenessPass(Registry); } void LLVMInitializeCodeGen(LLVMPassRegistryRef R) { diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/ExecutionDepsFix.cpp index 031f19c135a9..b31b029e77c0 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/ExecutionDepsFix.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/ExecutionDepsFix.cpp @@ -23,7 +23,7 @@ #define DEBUG_TYPE "execution-fix" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Allocator.h" @@ -141,7 +141,7 @@ class ExeDepsFix : public MachineFunctionPass { std::vector > UndefReads; /// Storage for register unit liveness. - LiveRegUnits LiveUnits; + LivePhysRegs LiveRegSet; /// Current instruction number. /// The first instruction in each basic block is 0. @@ -352,7 +352,7 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // Set up UndefReads to track undefined register reads. UndefReads.clear(); - LiveUnits.clear(); + LiveRegSet.clear(); // Set up LiveRegs to represent registers entering MBB. if (!LiveRegs) @@ -547,21 +547,19 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) { return; // Collect this block's live out register units. - LiveUnits.init(TRI); - for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - LiveUnits.addLiveIns(*SI, *TRI); - } + LiveRegSet.init(TRI); + LiveRegSet.addLiveOuts(MBB); + MachineInstr *UndefMI = UndefReads.back().first; unsigned OpIdx = UndefReads.back().second; for (MachineBasicBlock::reverse_iterator I = MBB->rbegin(), E = MBB->rend(); I != E; ++I) { - // Update liveness, including the current instrucion's defs. - LiveUnits.stepBackward(*I, *TRI); + // Update liveness, including the current instruction's defs. + LiveRegSet.stepBackward(*I); if (UndefMI == &*I) { - if (!LiveUnits.contains(UndefMI->getOperand(OpIdx).getReg(), *TRI)) + if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg())) TII->breakPartialRegDependency(UndefMI, OpIdx, TRI); UndefReads.pop_back(); diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/IfConversion.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/IfConversion.cpp index e2d0eb44da06..c6cef9b114b8 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/IfConversion.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/IfConversion.cpp @@ -23,7 +23,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" -#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -162,8 +162,8 @@ namespace { const MachineBranchProbabilityInfo *MBPI; MachineRegisterInfo *MRI; - LiveRegUnits Redefs; - LiveRegUnits DontKill; + LivePhysRegs Redefs; + LivePhysRegs DontKill; bool PreRegAlloc; bool MadeChange; @@ -968,23 +968,22 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) { /// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all /// values defined in MI which are not live/used by MI. -static void UpdatePredRedefs(MachineInstr *MI, LiveRegUnits &Redefs, - const TargetRegisterInfo *TRI) { +static void UpdatePredRedefs(MachineInstr *MI, LivePhysRegs &Redefs) { for (ConstMIBundleOperands Ops(MI); Ops.isValid(); ++Ops) { if (!Ops->isReg() || !Ops->isKill()) continue; unsigned Reg = Ops->getReg(); if (Reg == 0) continue; - Redefs.removeReg(Reg, *TRI); + Redefs.removeReg(Reg); } for (MIBundleOperands Ops(MI); Ops.isValid(); ++Ops) { if (!Ops->isReg() || !Ops->isDef()) continue; unsigned Reg = Ops->getReg(); - if (Reg == 0 || Redefs.contains(Reg, *TRI)) + if (Reg == 0 || Redefs.contains(Reg)) continue; - Redefs.addReg(Reg, *TRI); + Redefs.addReg(Reg); MachineOperand &Op = *Ops; MachineInstr *MI = Op.getParent(); @@ -996,12 +995,11 @@ static void UpdatePredRedefs(MachineInstr *MI, LiveRegUnits &Redefs, /** * Remove kill flags from operands with a registers in the @p DontKill set. */ -static void RemoveKills(MachineInstr &MI, const LiveRegUnits &DontKill, - const MCRegisterInfo &MCRI) { +static void RemoveKills(MachineInstr &MI, const LivePhysRegs &DontKill) { for (MIBundleOperands O(&MI); O.isValid(); ++O) { if (!O->isReg() || !O->isKill()) continue; - if (DontKill.contains(O->getReg(), MCRI)) + if (DontKill.contains(O->getReg())) O->setIsKill(false); } } @@ -1012,10 +1010,10 @@ static void RemoveKills(MachineInstr &MI, const LiveRegUnits &DontKill, */ static void RemoveKills(MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, - const LiveRegUnits &DontKill, + const LivePhysRegs &DontKill, const MCRegisterInfo &MCRI) { for ( ; I != E; ++I) - RemoveKills(*I, DontKill, MCRI); + RemoveKills(*I, DontKill); } /// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG. @@ -1049,13 +1047,13 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { // Initialize liveins to the first BB. These are potentiall redefined by // predicated instructions. Redefs.init(TRI); - Redefs.addLiveIns(CvtBBI->BB, *TRI); - Redefs.addLiveIns(NextBBI->BB, *TRI); + Redefs.addLiveIns(CvtBBI->BB); + Redefs.addLiveIns(NextBBI->BB); // Compute a set of registers which must not be killed by instructions in // BB1: This is everything live-in to BB2. DontKill.init(TRI); - DontKill.addLiveIns(NextBBI->BB, *TRI); + DontKill.addLiveIns(NextBBI->BB); if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); @@ -1154,8 +1152,8 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // Initialize liveins to the first BB. These are potentially redefined by // predicated instructions. Redefs.init(TRI); - Redefs.addLiveIns(CvtBBI->BB, *TRI); - Redefs.addLiveIns(NextBBI->BB, *TRI); + Redefs.addLiveIns(CvtBBI->BB); + Redefs.addLiveIns(NextBBI->BB); DontKill.clear(); @@ -1284,7 +1282,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // Initialize liveins to the first BB. These are potentially redefined by // predicated instructions. Redefs.init(TRI); - Redefs.addLiveIns(BBI1->BB, *TRI); + Redefs.addLiveIns(BBI1->BB); // Remove the duplicated instructions at the beginnings of both paths. MachineBasicBlock::iterator DI1 = BBI1->BB->begin(); @@ -1317,12 +1315,12 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, DontKill.init(TRI); for (MachineBasicBlock::reverse_iterator I = BBI2->BB->rbegin(), E = MachineBasicBlock::reverse_iterator(DI2); I != E; ++I) { - DontKill.stepBackward(*I, *TRI); + DontKill.stepBackward(*I); } for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E; ++I) { - Redefs.stepForward(*I, *TRI); + Redefs.stepForward(*I); } BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); BBI2->BB->erase(BBI2->BB->begin(), DI2); @@ -1506,7 +1504,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI, // If the predicated instruction now redefines a register as the result of // if-conversion, add an implicit kill. - UpdatePredRedefs(I, Redefs, TRI); + UpdatePredRedefs(I, Redefs); } std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate)); @@ -1552,11 +1550,11 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, // If the predicated instruction now redefines a register as the result of // if-conversion, add an implicit kill. - UpdatePredRedefs(MI, Redefs, TRI); + UpdatePredRedefs(MI, Redefs); // Some kill flags may not be correct anymore. if (!DontKill.empty()) - RemoveKills(*MI, DontKill, *TRI); + RemoveKills(*MI, DontKill); } if (!IgnoreBr) { diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/CodeGen/LLVMBuild.txt index 81ef1aa89dd4..fee0347ea659 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/LLVMBuild.txt @@ -22,4 +22,4 @@ subdirectories = AsmPrinter SelectionDAG type = Library name = CodeGen parent = Libraries -required_libraries = Analysis Core MC Scalar Support Target TransformUtils ObjCARC +required_libraries = Analysis Core MC Scalar Support Target TransformUtils diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/LexicalScopes.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/LexicalScopes.cpp index ffe407ac53cc..e58145826ff6 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/LexicalScopes.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/LexicalScopes.cpp @@ -25,12 +25,11 @@ #include "llvm/Support/FormattedStream.h" using namespace llvm; -LexicalScopes::~LexicalScopes() { - releaseMemory(); -} +/// ~LexicalScopes - final cleanup after ourselves. +LexicalScopes::~LexicalScopes() { reset(); } -/// releaseMemory - release memory. -void LexicalScopes::releaseMemory() { +/// reset - Reset the instance so that it's prepared for another function. +void LexicalScopes::reset() { MF = NULL; CurrentFnLexicalScope = NULL; DeleteContainerSeconds(LexicalScopeMap); @@ -41,7 +40,7 @@ void LexicalScopes::releaseMemory() { /// initialize - Scan machine function and constuct lexical scope nest. void LexicalScopes::initialize(const MachineFunction &Fn) { - releaseMemory(); + reset(); MF = &Fn; SmallVector MIRanges; DenseMap MI2ScopeMap; @@ -54,13 +53,13 @@ void LexicalScopes::initialize(const MachineFunction &Fn) { /// extractLexicalScopes - Extract instruction ranges for each lexical scopes /// for the given machine function. -void LexicalScopes:: -extractLexicalScopes(SmallVectorImpl &MIRanges, - DenseMap &MI2ScopeMap) { +void LexicalScopes::extractLexicalScopes( + SmallVectorImpl &MIRanges, + DenseMap &MI2ScopeMap) { // Scan each instruction and create scopes. First build working set of scopes. - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) { + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; + ++I) { const MachineInstr *RangeBeginMI = NULL; const MachineInstr *PrevMI = NULL; DebugLoc PrevDL; @@ -117,14 +116,15 @@ LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) { MDNode *Scope = NULL; MDNode *IA = NULL; DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext()); - if (!Scope) return NULL; + if (!Scope) + return NULL; // The scope that we were created with could have an extra file - which // isn't what we care about in this case. DIDescriptor D = DIDescriptor(Scope); if (D.isLexicalBlockFile()) Scope = DILexicalBlockFile(Scope).getScope(); - + if (IA) return InlinedLexicalScopeMap.lookup(DebugLoc::getFromDILocation(IA)); return LexicalScopeMap.lookup(Scope); @@ -143,7 +143,7 @@ LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) { // Create an inlined scope for inlined function. return getOrCreateInlinedScope(Scope, InlinedAt); } - + return getOrCreateRegularScope(Scope); } @@ -154,7 +154,7 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) { Scope = DILexicalBlockFile(Scope).getScope(); D = DIDescriptor(Scope); } - + LexicalScope *WScope = LexicalScopeMap.lookup(Scope); if (WScope) return WScope; @@ -164,15 +164,15 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) { Parent = getOrCreateLexicalScope(DebugLoc::getFromDILexicalBlock(Scope)); WScope = new LexicalScope(Parent, DIDescriptor(Scope), NULL, false); LexicalScopeMap.insert(std::make_pair(Scope, WScope)); - if (!Parent && DIDescriptor(Scope).isSubprogram() - && DISubprogram(Scope).describes(MF->getFunction())) + if (!Parent && DIDescriptor(Scope).isSubprogram() && + DISubprogram(Scope).describes(MF->getFunction())) CurrentFnLexicalScope = WScope; - + return WScope; } /// getOrCreateInlinedScope - Find or create an inlined lexical scope. -LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *Scope, +LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *Scope, MDNode *InlinedAt) { LexicalScope *InlinedScope = LexicalScopeMap.lookup(InlinedAt); if (InlinedScope) @@ -212,7 +212,7 @@ LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) { /// constructScopeNest void LexicalScopes::constructScopeNest(LexicalScope *Scope) { - assert (Scope && "Unable to calculate scope dominance graph!"); + assert(Scope && "Unable to calculate scope dominance graph!"); SmallVector WorkStack; WorkStack.push_back(Scope); unsigned Counter = 0; @@ -221,7 +221,8 @@ void LexicalScopes::constructScopeNest(LexicalScope *Scope) { const SmallVectorImpl &Children = WS->getChildren(); bool visitedChildren = false; for (SmallVectorImpl::const_iterator SI = Children.begin(), - SE = Children.end(); SI != SE; ++SI) { + SE = Children.end(); + SI != SE; ++SI) { LexicalScope *ChildScope = *SI; if (!ChildScope->getDFSOut()) { WorkStack.push_back(ChildScope); @@ -239,17 +240,17 @@ void LexicalScopes::constructScopeNest(LexicalScope *Scope) { /// assignInstructionRanges - Find ranges of instructions covered by each /// lexical scope. -void LexicalScopes:: -assignInstructionRanges(SmallVectorImpl &MIRanges, - DenseMap &MI2ScopeMap) -{ - +void LexicalScopes::assignInstructionRanges( + SmallVectorImpl &MIRanges, + DenseMap &MI2ScopeMap) { + LexicalScope *PrevLexicalScope = NULL; for (SmallVectorImpl::const_iterator RI = MIRanges.begin(), - RE = MIRanges.end(); RI != RE; ++RI) { + RE = MIRanges.end(); + RI != RE; ++RI) { const InsnRange &R = *RI; LexicalScope *S = MI2ScopeMap.lookup(R.first); - assert (S && "Lost LexicalScope for a machine instruction!"); + assert(S && "Lost LexicalScope for a machine instruction!"); if (PrevLexicalScope && !PrevLexicalScope->dominates(S)) PrevLexicalScope->closeInsnRange(S); S->openInsnRange(R.first); @@ -262,26 +263,26 @@ assignInstructionRanges(SmallVectorImpl &MIRanges, } /// getMachineBasicBlocks - Populate given set using machine basic blocks which -/// have machine instructions that belong to lexical scope identified by +/// have machine instructions that belong to lexical scope identified by /// DebugLoc. -void LexicalScopes:: -getMachineBasicBlocks(DebugLoc DL, - SmallPtrSet &MBBs) { +void LexicalScopes::getMachineBasicBlocks( + DebugLoc DL, SmallPtrSet &MBBs) { MBBs.clear(); LexicalScope *Scope = getOrCreateLexicalScope(DL); if (!Scope) return; - + if (Scope == CurrentFnLexicalScope) { - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; + ++I) MBBs.insert(I); return; } SmallVectorImpl &InsnRanges = Scope->getRanges(); for (SmallVectorImpl::iterator I = InsnRanges.begin(), - E = InsnRanges.end(); I != E; ++I) { + E = InsnRanges.end(); + I != E; ++I) { InsnRange &R = *I; MBBs.insert(R.first->getParent()); } @@ -299,8 +300,8 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) { return true; bool Result = false; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) { DebugLoc IDL = I->getDebugLoc(); if (IDL.isUnknown()) continue; @@ -311,8 +312,6 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) { return Result; } -void LexicalScope::anchor() { } - /// dump - Print data structures. void LexicalScope::dump(unsigned Indent) const { #ifndef NDEBUG @@ -332,4 +331,3 @@ void LexicalScope::dump(unsigned Indent) const { Children[i]->dump(Indent + 2); #endif } - diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/LiveDebugVariables.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/LiveDebugVariables.cpp index 25645e088ece..52b7ee0f2a6b 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -72,7 +72,7 @@ LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) { typedef IntervalMap LocMap; namespace { -/// UserValueScopes - Keeps track of lexical scopes associated with an +/// UserValueScopes - Keeps track of lexical scopes associated with a /// user value's source location. class UserValueScopes { DebugLoc DL; @@ -704,7 +704,6 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { bool Changed = collectDebugValues(mf); computeIntervals(); DEBUG(print(dbgs())); - LS.releaseMemory(); ModifiedMF = Changed; return Changed; } diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp index e1c3217a775e..4329ffc015b3 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -620,9 +621,12 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const { } float -LiveIntervals::getSpillWeight(bool isDef, bool isUse, BlockFrequency freq) { - const float Scale = 1.0f / BlockFrequency::getEntryFrequency(); - return (isDef + isUse) * (freq.getFrequency() * Scale); +LiveIntervals::getSpillWeight(bool isDef, bool isUse, + const MachineBlockFrequencyInfo *MBFI, + const MachineInstr *MI) { + BlockFrequency Freq = MBFI->getBlockFreq(MI->getParent()); + const float Scale = 1.0f / MBFI->getEntryFreq(); + return (isDef + isUse) * (Freq.getFrequency() * Scale); } LiveRange::Segment diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/LivePhysRegs.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/LivePhysRegs.cpp new file mode 100644 index 000000000000..7efd941322b3 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -0,0 +1,114 @@ +//===--- LivePhysRegs.cpp - Live Physical Register Set --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LivePhysRegs utility for tracking liveness of +// physical registers across machine instructions in forward or backward order. +// A more detailed description can be found in the corresponding header file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + + +/// \brief Remove all registers from the set that get clobbered by the register +/// mask. +void LivePhysRegs::removeRegsInMask(const MachineOperand &MO) { + SparseSet::iterator LRI = LiveRegs.begin(); + while (LRI != LiveRegs.end()) { + if (MO.clobbersPhysReg(*LRI)) + LRI = LiveRegs.erase(LRI); + else + ++LRI; + } +} + +/// Simulates liveness when stepping backwards over an instruction(bundle): +/// Remove Defs, add uses. This is the recommended way of calculating liveness. +void LivePhysRegs::stepBackward(const MachineInstr &MI) { + // Remove defined registers and regmask kills from the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (O->isReg()) { + if (!O->isDef()) + continue; + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + removeReg(Reg); + } else if (O->isRegMask()) + removeRegsInMask(*O); + } + + // Add uses to the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (!O->isReg() || !O->readsReg() || O->isUndef()) + continue; + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + addReg(Reg); + } +} + +/// Simulates liveness when stepping forward over an instruction(bundle): Remove +/// killed-uses, add defs. This is the not recommended way, because it depends +/// on accurate kill flags. If possible use stepBackwards() instead of this +/// function. +void LivePhysRegs::stepForward(const MachineInstr &MI) { + SmallVector Defs; + // Remove killed registers from the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (O->isReg()) { + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + if (O->isDef()) { + if (!O->isDead()) + Defs.push_back(Reg); + } else { + if (!O->isKill()) + continue; + assert(O->isUse()); + removeReg(Reg); + } + } else if (O->isRegMask()) + removeRegsInMask(*O); + } + + // Add defs to the set. + for (unsigned i = 0, e = Defs.size(); i != e; ++i) + addReg(Defs[i]); +} + +/// Prin the currently live registers to OS. +void LivePhysRegs::print(raw_ostream &OS) const { + OS << "Live Registers:"; + if (!TRI) { + OS << " (uninitialized)\n"; + return; + } + + if (empty()) { + OS << " (empty)\n"; + return; + } + + for (const_iterator I = begin(), E = end(); I != E; ++I) + OS << " " << PrintReg(*I, TRI); + OS << "\n"; +} + +/// Dumps the currently live registers to the debug output. +void LivePhysRegs::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + dbgs() << " " << *this; +#endif +} diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index 26a117652b08..f521548b2138 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -17,12 +17,14 @@ #define DEBUG_TYPE "localstackalloc" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" @@ -60,18 +62,27 @@ namespace { class LocalStackSlotPass: public MachineFunctionPass { SmallVector LocalOffsets; + /// StackObjSet - A set of stack object indexes + typedef SmallSetVector StackObjSet; void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset, bool StackGrowsDown, unsigned &MaxAlign); + void AssignProtectedObjSet(const StackObjSet &UnassignedObjs, + SmallSet &ProtectedObjs, + MachineFrameInfo *MFI, bool StackGrowsDown, + int64_t &Offset, unsigned &MaxAlign); void calculateFrameObjectOffsets(MachineFunction &Fn); bool insertFrameReferenceRegisters(MachineFunction &Fn); public: static char ID; // Pass identification, replacement for typeid - explicit LocalStackSlotPass() : MachineFunctionPass(ID) { } + explicit LocalStackSlotPass() : MachineFunctionPass(ID) { + initializeLocalStackSlotPassPass(*PassRegistry::getPassRegistry()); + } bool runOnMachineFunction(MachineFunction &MF); virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -81,8 +92,12 @@ namespace { char LocalStackSlotPass::ID = 0; char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID; -INITIALIZE_PASS(LocalStackSlotPass, "localstackalloc", - "Local Stack Slot Allocation", false, false) +INITIALIZE_PASS_BEGIN(LocalStackSlotPass, "localstackalloc", + "Local Stack Slot Allocation", false, false) +INITIALIZE_PASS_DEPENDENCY(StackProtector) +INITIALIZE_PASS_END(LocalStackSlotPass, "localstackalloc", + "Local Stack Slot Allocation", false, false) + bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -145,6 +160,22 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI, ++NumAllocations; } +/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., +/// those required to be close to the Stack Protector) to stack offsets. +void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs, + SmallSet &ProtectedObjs, + MachineFrameInfo *MFI, + bool StackGrowsDown, int64_t &Offset, + unsigned &MaxAlign) { + + for (StackObjSet::const_iterator I = UnassignedObjs.begin(), + E = UnassignedObjs.end(); I != E; ++I) { + int i = *I; + AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); + ProtectedObjs.insert(i); + } +} + /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the /// abstract stack objects. /// @@ -156,11 +187,13 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; int64_t Offset = 0; unsigned MaxAlign = 0; + StackProtector *SP = &getAnalysis(); // Make sure that the stack protector comes before the local variables on the // stack. - SmallSet LargeStackObjs; + SmallSet ProtectedObjs; if (MFI->getStackProtectorIndex() >= 0) { + StackObjSet LargeArrayObjs; AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset, StackGrowsDown, MaxAlign); @@ -170,12 +203,21 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (MFI->getStackProtectorIndex() == (int)i) continue; - if (!MFI->MayNeedStackProtector(i)) - continue; - AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); - LargeStackObjs.insert(i); + switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) { + case StackProtector::SSPLK_None: + case StackProtector::SSPLK_SmallArray: + case StackProtector::SSPLK_AddrOf: + continue; + case StackProtector::SSPLK_LargeArray: + LargeArrayObjs.insert(i); + continue; + } + llvm_unreachable("Unexpected SSPLayoutKind."); } + + AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign); } // Then assign frame offsets to stack objects that are not used to spill @@ -185,7 +227,7 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (MFI->getStackProtectorIndex() == (int)i) continue; - if (LargeStackObjs.count(i)) + if (ProtectedObjs.count(i)) continue; AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); @@ -233,9 +275,11 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { MachineInstr *MI = I; - // Debug value instructions can't be out of range, so they don't need - // any updates. - if (MI->isDebugValue()) + // Debug value, stackmap and patchpoint instructions can't be out of + // range, so they don't need any updates. + if (MI->isDebugValue() || + MI->getOpcode() == TargetOpcode::STACKMAP || + MI->getOpcode() == TargetOpcode::PATCHPOINT) continue; // For now, allocate the base register(s) within the basic block diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineBasicBlock.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineBasicBlock.cpp index ca71e3bf8062..044947a636e6 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -52,7 +52,8 @@ MCSymbol *MachineBasicBlock::getSymbol() const { if (!CachedMCSymbol) { const MachineFunction *MF = getParent(); MCContext &Ctx = MF->getContext(); - const char *Prefix = Ctx.getAsmInfo()->getPrivateGlobalPrefix(); + const TargetMachine &TM = MF->getTarget(); + const char *Prefix = TM.getDataLayout()->getPrivateGlobalPrefix(); CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber())); @@ -677,6 +678,11 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { MachineFunction *MF = getParent(); DebugLoc dl; // FIXME: this is nowhere + // Performance might be harmed on HW that implements branching using exec mask + // where both sides of the branches are always executed. + if (MF->getTarget().requiresStructuredCFG()) + return NULL; + // We may need to update this's terminator, but we can't do that if // AnalyzeBranch fails. If this uses a jump table, we won't touch it. const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index e269d24e1d56..cc2f31795b8c 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -1,4 +1,4 @@ -//====----- MachineBlockFrequencyInfo.cpp - Machine Block Frequency Analysis ----====// +//====------ MachineBlockFrequencyInfo.cpp - MBB Frequency Analysis ------====// // // The LLVM Compiler Infrastructure // @@ -16,9 +16,99 @@ #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" using namespace llvm; +#ifndef NDEBUG +enum GVDAGType { + GVDT_None, + GVDT_Fraction, + GVDT_Integer +}; + +static cl::opt +ViewMachineBlockFreqPropagationDAG("view-machine-block-freq-propagation-dags", + cl::Hidden, + cl::desc("Pop up a window to show a dag displaying how machine block " + "frequencies propagate through the CFG."), + cl::values( + clEnumValN(GVDT_None, "none", + "do not display graphs."), + clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " + "fractional block frequency representation."), + clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " + "integer fractional block frequency representation."), + clEnumValEnd)); + +namespace llvm { + +template <> +struct GraphTraits { + typedef const MachineBasicBlock NodeType; + typedef MachineBasicBlock::const_succ_iterator ChildIteratorType; + typedef MachineFunction::const_iterator nodes_iterator; + + static inline + const NodeType *getEntryNode(const MachineBlockFrequencyInfo *G) { + return G->getFunction()->begin(); + } + + static ChildIteratorType child_begin(const NodeType *N) { + return N->succ_begin(); + } + + static ChildIteratorType child_end(const NodeType *N) { + return N->succ_end(); + } + + static nodes_iterator nodes_begin(const MachineBlockFrequencyInfo *G) { + return G->getFunction()->begin(); + } + + static nodes_iterator nodes_end(const MachineBlockFrequencyInfo *G) { + return G->getFunction()->end(); + } +}; + +template<> +struct DOTGraphTraits : + public DefaultDOTGraphTraits { + explicit DOTGraphTraits(bool isSimple=false) : + DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const MachineBlockFrequencyInfo *G) { + return G->getFunction()->getName(); + } + + std::string getNodeLabel(const MachineBasicBlock *Node, + const MachineBlockFrequencyInfo *Graph) { + std::string Result; + raw_string_ostream OS(Result); + + OS << Node->getName().str() << ":"; + switch (ViewMachineBlockFreqPropagationDAG) { + case GVDT_Fraction: + Graph->printBlockFreq(OS, Node); + break; + case GVDT_Integer: + OS << Graph->getBlockFreq(Node).getFrequency(); + break; + case GVDT_None: + llvm_unreachable("If we are not supposed to render a graph we should " + "never reach this point."); + } + + return Result; + } +}; + + +} // end namespace llvm +#endif + INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq", "Machine Block Frequency Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) @@ -28,7 +118,8 @@ INITIALIZE_PASS_END(MachineBlockFrequencyInfo, "machine-block-freq", char MachineBlockFrequencyInfo::ID = 0; -MachineBlockFrequencyInfo::MachineBlockFrequencyInfo() : MachineFunctionPass(ID) { +MachineBlockFrequencyInfo:: +MachineBlockFrequencyInfo() :MachineFunctionPass(ID) { initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry()); MBFI = new BlockFrequencyImpl(); @@ -45,12 +136,51 @@ void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { } bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) { - MachineBranchProbabilityInfo &MBPI = getAnalysis(); + MachineBranchProbabilityInfo &MBPI = + getAnalysis(); MBFI->doFunction(&F, &MBPI); +#ifndef NDEBUG + if (ViewMachineBlockFreqPropagationDAG != GVDT_None) { + view(); + } +#endif return false; } +/// Pop up a ghostview window with the current block frequency propagation +/// rendered using dot. +void MachineBlockFrequencyInfo::view() const { +// This code is only for debugging. +#ifndef NDEBUG + ViewGraph(const_cast(this), + "MachineBlockFrequencyDAGs"); +#else + errs() << "MachineBlockFrequencyInfo::view is only available in debug builds " + "on systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + BlockFrequency MachineBlockFrequencyInfo:: getBlockFreq(const MachineBasicBlock *MBB) const { return MBFI->getBlockFreq(MBB); } + +MachineFunction *MachineBlockFrequencyInfo::getFunction() const { + return MBFI->Fn; +} + +raw_ostream & +MachineBlockFrequencyInfo::printBlockFreq(raw_ostream &OS, + const BlockFrequency Freq) const { + return MBFI->printBlockFreq(OS, Freq); +} + +raw_ostream & +MachineBlockFrequencyInfo::printBlockFreq(raw_ostream &OS, + const MachineBasicBlock *MBB) const { + return MBFI->printBlockFreq(OS, MBB); +} + +uint64_t MachineBlockFrequencyInfo::getEntryFreq() const { + return MBFI->getEntryFreq(); +} diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 4b0f7f38fcc9..f297c5f67862 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -58,6 +58,13 @@ static cl::opt AlignAllBlock("align-all-blocks", "blocks in the function."), cl::init(0), cl::Hidden); +// FIXME: Find a good default for this flag and remove the flag. +static cl::opt +ExitBlockBias("block-placement-exit-block-bias", + cl::desc("Block frequency percentage a loop exit block needs " + "over the original exit to be considered the new exit."), + cl::init(0), cl::Hidden); + namespace { class BlockChain; /// \brief Type for our function-wide basic block -> block chain mapping. @@ -360,7 +367,8 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( // any CFG constraints. if (SuccChain.LoopPredecessors != 0) { if (SuccProb < HotProb) { - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n"); + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + << " (prob) (CFG conflict)\n"); continue; } @@ -383,8 +391,8 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( } } if (BadCFGConflict) { - DEBUG(dbgs() << " " << getBlockName(*SI) - << " -> non-cold CFG conflict\n"); + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + << " (prob) (non-cold CFG conflict)\n"); continue; } } @@ -453,8 +461,8 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block"); BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI); - DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq - << " (freq)\n"); + DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> "; + MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); if (BestBlock && BestFreq >= CandidateFreq) continue; BestBlock = *WBI; @@ -575,8 +583,8 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L, if (!LoopBlockSet.count(Pred)) continue; DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", " - << Pred->succ_size() << " successors, " - << MBFI->getBlockFreq(Pred) << " freq\n"); + << Pred->succ_size() << " successors, "; + MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); if (Pred->succ_size() > 1) continue; @@ -690,14 +698,17 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb; DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " << getBlockName(*SI) << " [L:" << SuccLoopDepth - << "] (" << ExitEdgeFreq << ")\n"); - // Note that we slightly bias this toward an existing layout successor to - // retain incoming order in the absence of better information. - // FIXME: Should we bias this more strongly? It's pretty weak. + << "] ("; + MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n"); + // Note that we bias this toward an existing layout successor to retain + // incoming order in the absence of better information. The exit must have + // a frequency higher than the current exit before we consider breaking + // the layout. + BranchProbability Bias(100 - ExitBlockBias, 100); if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth || ExitEdgeFreq > BestExitEdgeFreq || ((*I)->isLayoutSuccessor(*SI) && - !(ExitEdgeFreq < BestExitEdgeFreq))) { + !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) { BestExitEdgeFreq = ExitEdgeFreq; ExitingBB = *I; } @@ -939,7 +950,9 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { BlockChain &FunctionChain = *BlockToChain[&F.front()]; buildChain(&F.front(), FunctionChain, BlockWorkList); +#ifndef NDEBUG typedef SmallPtrSet FunctionBlockSetType; +#endif DEBUG({ // Crash at the end so we get all of the debugging output first. bool BadFunc = false; diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineCSE.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineCSE.cpp index d228286d9db8..35ba7ff35e6f 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineCSE.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineCSE.cpp @@ -131,9 +131,24 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, unsigned SrcReg = DefMI->getOperand(1).getReg(); if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) continue; - if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg()) + if (DefMI->getOperand(0).getSubReg()) continue; - if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg))) + // FIXME: We should trivially coalesce subregister copies to expose CSE + // opportunities on instructions with truncated operands (see + // cse-add-with-overflow.ll). This can be done here as follows: + // if (SrcSubReg) + // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC, + // SrcSubReg); + // MO.substVirtReg(SrcReg, SrcSubReg, *TRI); + // + // The 2-addr pass has been updated to handle coalesced subregs. However, + // some machine-specific code still can't handle it. + // To handle it properly we also need a way find a constrained subregister + // class given a super-reg class and subreg index. + if (DefMI->getOperand(1).getSubReg()) + continue; + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + if (!MRI->constrainRegClass(SrcReg, RC)) continue; DEBUG(dbgs() << "Coalescing: " << *DefMI); DEBUG(dbgs() << "*** to: " << *MI); @@ -513,7 +528,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool DoCSE = true; unsigned NumDefs = MI->getDesc().getNumDefs() + MI->getDesc().getNumImplicitDefs(); - + for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineFunction.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineFunction.cpp index 0703df09a60e..4091e4274add 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineFunction.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineFunction.cpp @@ -425,7 +425,16 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, MachineRegisterInfo &MRI = getRegInfo(); unsigned VReg = MRI.getLiveInVirtReg(PReg); if (VReg) { - assert(MRI.getRegClass(VReg) == RC && "Register class mismatch!"); + const TargetRegisterClass *VRegRC = MRI.getRegClass(VReg); + (void)VRegRC; + // A physical register can be added several times. + // Between two calls, the register class of the related virtual register + // may have been constrained to match some operation constraints. + // In that case, check that the current register class includes the + // physical register and is a sub class of the specified RC. + assert((VRegRC == RC || (VRegRC->contains(PReg) && + RC->hasSubClassEq(VRegRC))) && + "Register class mismatch!"); return VReg; } VReg = MRI.createVirtualRegister(RC); @@ -438,12 +447,12 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, /// normal 'L' label is returned. MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate) const { + const DataLayout *DL = getTarget().getDataLayout(); assert(JumpTableInfo && "No jump tables"); assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!"); - const MCAsmInfo &MAI = *getTarget().getMCAsmInfo(); - const char *Prefix = isLinkerPrivate ? MAI.getLinkerPrivateGlobalPrefix() : - MAI.getPrivateGlobalPrefix(); + const char *Prefix = isLinkerPrivate ? DL->getLinkerPrivateGlobalPrefix() : + DL->getPrivateGlobalPrefix(); SmallString<60> Name; raw_svector_ostream(Name) << Prefix << "JTI" << getFunctionNumber() << '_' << JTI; @@ -453,8 +462,8 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, /// getPICBaseSymbol - Return a function-local symbol to represent the PIC /// base. MCSymbol *MachineFunction::getPICBaseSymbol() const { - const MCAsmInfo &MAI = *Target.getMCAsmInfo(); - return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+ + const DataLayout *DL = getTarget().getDataLayout(); + return Ctx.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+ Twine(getFunctionNumber())+"$pb"); } @@ -490,14 +499,13 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, /// a nonnegative identifier to represent it. /// int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, - bool isSS, bool MayNeedSP, const AllocaInst *Alloca) { + bool isSS, const AllocaInst *Alloca) { assert(Size != 0 && "Cannot allocate zero size stack objects!"); Alignment = clampStackAlignment(!getFrameLowering()->isStackRealignable() || !RealignOption, Alignment, getFrameLowering()->getStackAlignment()); - Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP, - Alloca)); + Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca)); int Index = (int)Objects.size() - NumFixedObjects - 1; assert(Index >= 0 && "Bad frame index!"); ensureMaxAlignment(Alignment); @@ -514,7 +522,7 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, clampStackAlignment(!getFrameLowering()->isStackRealignable() || !RealignOption, Alignment, getFrameLowering()->getStackAlignment()); - CreateStackObject(Size, Alignment, true, false); + CreateStackObject(Size, Alignment, true); int Index = (int)Objects.size() - NumFixedObjects - 1; ensureMaxAlignment(Alignment); return Index; @@ -525,13 +533,14 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, /// variable sized object is created, whether or not the index returned is /// actually used. /// -int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment) { +int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, + const AllocaInst *Alloca) { HasVarSizedObjects = true; Alignment = clampStackAlignment(!getFrameLowering()->isStackRealignable() || !RealignOption, Alignment, getFrameLowering()->getStackAlignment()); - Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0)); + Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca)); ensureMaxAlignment(Alignment); return (int)Objects.size()-NumFixedObjects-1; } @@ -556,7 +565,6 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, Align, getFrameLowering()->getStackAlignment()); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, /*isSS*/ false, - /*NeedSP*/ false, /*Alloca*/ 0)); return -++NumFixedObjects; } diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineFunctionPass.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineFunctionPass.cpp index 674cc80a006c..789f2042a073 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineFunctionPass.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineFunctionPass.cpp @@ -51,6 +51,7 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved("domfrontier"); AU.addPreserved("loops"); AU.addPreserved("lda"); + AU.addPreserved("stack-protector"); FunctionPass::getAnalysisUsage(AU); } diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineInstr.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineInstr.cpp index 295b450a0f2f..e3df010312a0 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineInstr.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineInstr.cpp @@ -199,7 +199,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { case MachineOperand::MO_BlockAddress: return getBlockAddress() == Other.getBlockAddress() && getOffset() == Other.getOffset(); - case MO_RegisterMask: + case MachineOperand::MO_RegisterMask: + case MachineOperand::MO_RegisterLiveOut: return getRegMask() == Other.getRegMask(); case MachineOperand::MO_MCSymbol: return getMCSymbol() == Other.getMCSymbol(); @@ -241,6 +242,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) { return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getBlockAddress(), MO.getOffset()); case MachineOperand::MO_RegisterMask: + case MachineOperand::MO_RegisterLiveOut: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); case MachineOperand::MO_Metadata: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata()); @@ -368,6 +370,9 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { case MachineOperand::MO_RegisterMask: OS << ""; break; + case MachineOperand::MO_RegisterLiveOut: + OS << ""; + break; case MachineOperand::MO_Metadata: OS << '<'; WriteAsOperand(OS, getMetadata(), /*PrintType=*/false); @@ -481,6 +486,10 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { else WriteAsOperand(OS, MMO.getValue(), /*PrintType=*/false); + unsigned AS = MMO.getAddrSpace(); + if (AS != 0) + OS << "(addrspace=" << AS << ')'; + // If the alignment of the memory reference itself differs from the alignment // of the base pointer, print the base alignment explicitly, next to the base // pointer. @@ -984,6 +993,54 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, return NULL; } +const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg( + unsigned Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, bool ExploreBundle) const { + // Check every operands inside the bundle if we have + // been asked to. + if (ExploreBundle) + for (ConstMIBundleOperands OpndIt(this); OpndIt.isValid() && CurRC; + ++OpndIt) + CurRC = OpndIt->getParent()->getRegClassConstraintEffectForVRegImpl( + OpndIt.getOperandNo(), Reg, CurRC, TII, TRI); + else + // Otherwise, just check the current operands. + for (ConstMIOperands OpndIt(this); OpndIt.isValid() && CurRC; ++OpndIt) + CurRC = getRegClassConstraintEffectForVRegImpl(OpndIt.getOperandNo(), Reg, + CurRC, TII, TRI); + return CurRC; +} + +const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVRegImpl( + unsigned OpIdx, unsigned Reg, const TargetRegisterClass *CurRC, + const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const { + assert(CurRC && "Invalid initial register class"); + // Check if Reg is constrained by some of its use/def from MI. + const MachineOperand &MO = getOperand(OpIdx); + if (!MO.isReg() || MO.getReg() != Reg) + return CurRC; + // If yes, accumulate the constraints through the operand. + return getRegClassConstraintEffect(OpIdx, CurRC, TII, TRI); +} + +const TargetRegisterClass *MachineInstr::getRegClassConstraintEffect( + unsigned OpIdx, const TargetRegisterClass *CurRC, + const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const { + const TargetRegisterClass *OpRC = getRegClassConstraint(OpIdx, TII, TRI); + const MachineOperand &MO = getOperand(OpIdx); + assert(MO.isReg() && + "Cannot get register constraints for non-register operand"); + assert(CurRC && "Invalid initial register class"); + if (unsigned SubIdx = MO.getSubReg()) { + if (OpRC) + CurRC = TRI->getMatchingSuperRegClass(CurRC, OpRC, SubIdx); + else + CurRC = TRI->getSubClassWithSubReg(CurRC, SubIdx); + } else if (OpRC) + CurRC = TRI->getCommonSubClass(CurRC, OpRC); + return CurRC; +} + /// Return the number of instructions inside the MI bundle, not counting the /// header instruction. unsigned MachineInstr::getBundleSize() const { diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineRegisterInfo.cpp index f8b8796b25fb..bf4c23dcf70e 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -79,17 +79,9 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) { // Accumulate constraints from all uses. for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E; ++I) { - const TargetRegisterClass *OpRC = - I->getRegClassConstraint(I.getOperandNo(), TII, - getTargetRegisterInfo()); - if (unsigned SubIdx = I.getOperand().getSubReg()) { - if (OpRC) - NewRC = getTargetRegisterInfo()->getMatchingSuperRegClass(NewRC, OpRC, - SubIdx); - else - NewRC = getTargetRegisterInfo()->getSubClassWithSubReg(NewRC, SubIdx); - } else if (OpRC) - NewRC = getTargetRegisterInfo()->getCommonSubClass(NewRC, OpRC); + // Apply the effect of the given operand to NewRC. + NewRC = I->getRegClassConstraintEffect(I.getOperandNo(), NewRC, TII, + getTargetRegisterInfo()); if (!NewRC || NewRC == OldRC) return false; } diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineScheduler.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineScheduler.cpp index e71c4df0b797..b1dd34bcb7dd 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/MachineScheduler.cpp @@ -49,6 +49,11 @@ static cl::opt ViewMISchedDAGs("view-misched-dags", cl::Hidden, static cl::opt MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); + +static cl::opt SchedOnlyFunc("misched-only-func", cl::Hidden, + cl::desc("Only schedule this function")); +static cl::opt SchedOnlyBlock("misched-only-block", cl::Hidden, + cl::desc("Only schedule this MBB#")); #else static bool ViewMISchedDAGs = false; #endif // NDEBUG @@ -90,25 +95,47 @@ MachineSchedContext::~MachineSchedContext() { } namespace { +/// Base class for a machine scheduler class that can run at any point. +class MachineSchedulerBase : public MachineSchedContext, + public MachineFunctionPass { +public: + MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {} + + virtual void print(raw_ostream &O, const Module* = 0) const; + +protected: + void scheduleRegions(ScheduleDAGInstrs &Scheduler); +}; + /// MachineScheduler runs after coalescing and before register allocation. -class MachineScheduler : public MachineSchedContext, - public MachineFunctionPass { +class MachineScheduler : public MachineSchedulerBase { public: MachineScheduler(); virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual void releaseMemory() {} - virtual bool runOnMachineFunction(MachineFunction&); - virtual void print(raw_ostream &O, const Module* = 0) const; - static char ID; // Class identification, replacement for typeinfo protected: ScheduleDAGInstrs *createMachineScheduler(); }; + +/// PostMachineScheduler runs after shortly before code emission. +class PostMachineScheduler : public MachineSchedulerBase { +public: + PostMachineScheduler(); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + virtual bool runOnMachineFunction(MachineFunction&); + + static char ID; // Class identification, replacement for typeinfo + +protected: + ScheduleDAGInstrs *createPostMachineScheduler(); +}; } // namespace char MachineScheduler::ID = 0; @@ -124,7 +151,7 @@ INITIALIZE_PASS_END(MachineScheduler, "misched", "Machine Instruction Scheduler", false, false) MachineScheduler::MachineScheduler() -: MachineFunctionPass(ID) { +: MachineSchedulerBase(ID) { initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); } @@ -141,6 +168,26 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } +char PostMachineScheduler::ID = 0; + +char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID; + +INITIALIZE_PASS(PostMachineScheduler, "postmisched", + "PostRA Machine Instruction Scheduler", false, false) + +PostMachineScheduler::PostMachineScheduler() +: MachineSchedulerBase(ID) { + initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry()); +} + +void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequiredID(MachineDominatorsID); + AU.addRequired(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); +} + MachinePassRegistry MachineSchedRegistry::Registry; /// A dummy default scheduler factory indicates whether the scheduler @@ -162,8 +209,8 @@ DefaultSchedRegistry("default", "Use the target's default scheduler choice.", /// Forward declare the standard machine scheduler. This will be used as the /// default scheduler if the target does not set a default. -static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C); - +static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C); +static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C); /// Decrement this iterator until reaching the top or a non-debug instr. static MachineBasicBlock::const_iterator @@ -222,7 +269,20 @@ ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() { return Scheduler; // Default to GenericScheduler. - return createGenericSched(this); + return createGenericSchedLive(this); +} + +/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by +/// the caller. We don't have a command line option to override the postRA +/// scheduler. The Target must configure it. +ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() { + // Get the postRA scheduler set by the target for this function. + ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this); + if (Scheduler) + return Scheduler; + + // Default to GenericScheduler. + return createGenericSchedPostRA(this); } /// Top-level MachineScheduler pass driver. @@ -252,7 +312,6 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { AA = &getAnalysis(); LIS = &getAnalysis(); - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); if (VerifyScheduling) { DEBUG(LIS->dump()); @@ -263,6 +322,56 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // Instantiate the selected scheduler for this target, function, and // optimization level. OwningPtr Scheduler(createMachineScheduler()); + scheduleRegions(*Scheduler); + + DEBUG(LIS->dump()); + if (VerifyScheduling) + MF->verify(this, "After machine scheduling."); + return true; +} + +bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { + DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs())); + + // Initialize the context of the pass. + MF = &mf; + PassConfig = &getAnalysis(); + + if (VerifyScheduling) + MF->verify(this, "Before post machine scheduling."); + + // Instantiate the selected scheduler for this target, function, and + // optimization level. + OwningPtr Scheduler(createPostMachineScheduler()); + scheduleRegions(*Scheduler); + + if (VerifyScheduling) + MF->verify(this, "After post machine scheduling."); + return true; +} + +/// Return true of the given instruction should not be included in a scheduling +/// region. +/// +/// MachineScheduler does not currently support scheduling across calls. To +/// handle calls, the DAG builder needs to be modified to create register +/// anti/output dependencies on the registers clobbered by the call's regmask +/// operand. In PreRA scheduling, the stack pointer adjustment already prevents +/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce +/// the boundary, but there would be no benefit to postRA scheduling across +/// calls this late anyway. +static bool isSchedBoundary(MachineBasicBlock::iterator MI, + MachineBasicBlock *MBB, + MachineFunction *MF, + const TargetInstrInfo *TII, + bool IsPostRA) { + return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF); +} + +/// Main driver for both MachineScheduler and PostMachineScheduler. +void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + bool IsPostRA = Scheduler.isPostRA(); // Visit all machine basic blocks. // @@ -271,7 +380,15 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); MBB != MBBEnd; ++MBB) { - Scheduler->startBlock(MBB); + Scheduler.startBlock(MBB); + +#ifndef NDEBUG + if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName()) + continue; + if (SchedOnlyBlock.getNumOccurrences() + && (int)SchedOnlyBlock != MBB->getNumber()) + continue; +#endif // Break the block into scheduling regions [I, RegionEnd), and schedule each // region as soon as it is discovered. RegionEnd points the scheduling @@ -283,13 +400,16 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // The Scheduler may insert instructions during either schedule() or // exitRegion(), even for empty regions. So the local iterators 'I' and // 'RegionEnd' are invalid across these calls. - unsigned RemainingInstrs = MBB->size(); + // + // MBB::size() uses instr_iterator to count. Here we need a bundle to count + // as a single instruction. + unsigned RemainingInstrs = std::distance(MBB->begin(), MBB->end()); for(MachineBasicBlock::iterator RegionEnd = MBB->end(); - RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { + RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) { // Avoid decrementing RegionEnd for blocks with no terminator. if (RegionEnd != MBB->end() - || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { + || isSchedBoundary(llvm::prior(RegionEnd), MBB, MF, TII, IsPostRA)) { --RegionEnd; // Count the boundary instruction. --RemainingInstrs; @@ -300,21 +420,22 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { unsigned NumRegionInstrs = 0; MachineBasicBlock::iterator I = RegionEnd; for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) { - if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) + if (isSchedBoundary(llvm::prior(I), MBB, MF, TII, IsPostRA)) break; } // Notify the scheduler of the region, even if we may skip scheduling // it. Perhaps it still needs to be bundled. - Scheduler->enterRegion(MBB, I, RegionEnd, NumRegionInstrs); + Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs); // Skip empty scheduling regions (0 or 1 schedulable instructions). if (I == RegionEnd || I == llvm::prior(RegionEnd)) { // Close the current region. Bundle the terminator if needed. // This invalidates 'RegionEnd' and 'I'. - Scheduler->exitRegion(); + Scheduler.exitRegion(); continue; } - DEBUG(dbgs() << "********** MI Scheduling **********\n"); + DEBUG(dbgs() << "********** " << ((Scheduler.isPostRA()) ? "PostRA " : "") + << "MI Scheduling **********\n"); DEBUG(dbgs() << MF->getName() << ":BB#" << MBB->getNumber() << " " << MBB->getName() << "\n From: " << *I << " To: "; @@ -325,26 +446,27 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // Schedule a region: possibly reorder instructions. // This invalidates 'RegionEnd' and 'I'. - Scheduler->schedule(); + Scheduler.schedule(); // Close the current region. - Scheduler->exitRegion(); + Scheduler.exitRegion(); // Scheduling has invalidated the current iterator 'I'. Ask the // scheduler for the top of it's scheduled region. - RegionEnd = Scheduler->begin(); + RegionEnd = Scheduler.begin(); } assert(RemainingInstrs == 0 && "Instruction count mismatch!"); - Scheduler->finishBlock(); + Scheduler.finishBlock(); + if (Scheduler.isPostRA()) { + // FIXME: Ideally, no further passes should rely on kill flags. However, + // thumb2 size reduction is currently an exception. + Scheduler.fixupKills(MBB); + } } - Scheduler->finalizeSchedule(); - DEBUG(LIS->dump()); - if (VerifyScheduling) - MF->verify(this, "After machine scheduling."); - return true; + Scheduler.finalizeSchedule(); } -void MachineScheduler::print(raw_ostream &O, const Module* m) const { +void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { // unimplemented } @@ -358,12 +480,12 @@ void ReadyQueue::dump() { #endif //===----------------------------------------------------------------------===// -// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals -// preservation. -//===----------------------------------------------------------------------===// +// ScheduleDAGMI - Basic machine instruction scheduling. This is +// independent of PreRA/PostRA scheduling and involves no extra book-keeping for +// virtual registers. +// ===----------------------------------------------------------------------===/ ScheduleDAGMI::~ScheduleDAGMI() { - delete DFSResult; DeleteContainerPointers(Mutations); delete SchedImpl; } @@ -453,10 +575,24 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) { } } +/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after +/// crossing a scheduling boundary. [begin, end) includes all instructions in +/// the region, including the boundary itself and single-instruction regions +/// that don't get scheduled. +void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned regioninstrs) +{ + ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); + + SchedImpl->initPolicy(begin, end, regioninstrs); +} + /// This is normally called from the main scheduler loop but may also be invoked /// by the scheduling strategy to perform additional code motion. -void ScheduleDAGMI::moveInstruction(MachineInstr *MI, - MachineBasicBlock::iterator InsertPos) { +void ScheduleDAGMI::moveInstruction( + MachineInstr *MI, MachineBasicBlock::iterator InsertPos) { // Advance RegionBegin if the first instruction moves down. if (&*RegionBegin == MI) ++RegionBegin; @@ -465,7 +601,8 @@ void ScheduleDAGMI::moveInstruction(MachineInstr *MI, BB->splice(InsertPos, BB, MI); // Update LiveIntervals - LIS->handleMove(MI, /*UpdateFlags=*/true); + if (LIS) + LIS->handleMove(MI, /*UpdateFlags=*/true); // Recede RegionBegin if an instruction moves above the first. if (RegionBegin == InsertPos) @@ -483,16 +620,200 @@ bool ScheduleDAGMI::checkSchedLimit() { return true; } +/// Per-region scheduling driver, called back from +/// MachineScheduler::runOnMachineFunction. This is a simplified driver that +/// does not consider liveness or register pressure. It is useful for PostRA +/// scheduling and potentially other custom schedulers. +void ScheduleDAGMI::schedule() { + // Build the DAG. + buildSchedGraph(AA); + + Topo.InitDAGTopologicalSorting(); + + postprocessDAG(); + + SmallVector TopRoots, BotRoots; + findRootsAndBiasEdges(TopRoots, BotRoots); + + // Initialize the strategy before modifying the DAG. + // This may initialize a DFSResult to be used for queue priority. + SchedImpl->initialize(this); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + if (ViewMISchedDAGs) viewGraph(); + + // Initialize ready queues now that the DAG and priority data are finalized. + initQueues(TopRoots, BotRoots); + + bool IsTopNode = false; + while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + assert(!SU->isScheduled && "Node already scheduled"); + if (!checkSchedLimit()) + break; + + MachineInstr *MI = SU->getInstr(); + if (IsTopNode) { + assert(SU->isTopReady() && "node still has unscheduled dependencies"); + if (&*CurrentTop == MI) + CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); + else + moveInstruction(MI, CurrentTop); + } + else { + assert(SU->isBottomReady() && "node still has unscheduled dependencies"); + MachineBasicBlock::iterator priorII = + priorNonDebug(CurrentBottom, CurrentTop); + if (&*priorII == MI) + CurrentBottom = priorII; + else { + if (&*CurrentTop == MI) + CurrentTop = nextIfDebug(++CurrentTop, priorII); + moveInstruction(MI, CurrentBottom); + CurrentBottom = MI; + } + } + updateQueues(SU, IsTopNode); + + // Notify the scheduling strategy after updating the DAG. + SchedImpl->schedNode(SU, IsTopNode); + } + assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); + + placeDebugValues(); + + DEBUG({ + unsigned BBNum = begin()->getParent()->getNumber(); + dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); +} + +/// Apply each ScheduleDAGMutation step in order. +void ScheduleDAGMI::postprocessDAG() { + for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { + Mutations[i]->apply(this); + } +} + +void ScheduleDAGMI:: +findRootsAndBiasEdges(SmallVectorImpl &TopRoots, + SmallVectorImpl &BotRoots) { + for (std::vector::iterator + I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { + SUnit *SU = &(*I); + assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits"); + + // Order predecessors so DFSResult follows the critical path. + SU->biasCriticalPath(); + + // A SUnit is ready to top schedule if it has no predecessors. + if (!I->NumPredsLeft) + TopRoots.push_back(SU); + // A SUnit is ready to bottom schedule if it has no successors. + if (!I->NumSuccsLeft) + BotRoots.push_back(SU); + } + ExitSU.biasCriticalPath(); +} + +/// Identify DAG roots and setup scheduler queues. +void ScheduleDAGMI::initQueues(ArrayRef TopRoots, + ArrayRef BotRoots) { + NextClusterSucc = NULL; + NextClusterPred = NULL; + + // Release all DAG roots for scheduling, not including EntrySU/ExitSU. + // + // Nodes with unreleased weak edges can still be roots. + // Release top roots in forward order. + for (SmallVectorImpl::const_iterator + I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) { + SchedImpl->releaseTopNode(*I); + } + // Release bottom roots in reverse order so the higher priority nodes appear + // first. This is more natural and slightly more efficient. + for (SmallVectorImpl::const_reverse_iterator + I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) { + SchedImpl->releaseBottomNode(*I); + } + + releaseSuccessors(&EntrySU); + releasePredecessors(&ExitSU); + + SchedImpl->registerRoots(); + + // Advance past initial DebugValues. + CurrentTop = nextIfDebug(RegionBegin, RegionEnd); + CurrentBottom = RegionEnd; +} + +/// Update scheduler queues after scheduling an instruction. +void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { + // Release dependent instructions for scheduling. + if (IsTopNode) + releaseSuccessors(SU); + else + releasePredecessors(SU); + + SU->isScheduled = true; +} + +/// Reinsert any remaining debug_values, just like the PostRA scheduler. +void ScheduleDAGMI::placeDebugValues() { + // If first instruction was a DBG_VALUE then put it back. + if (FirstDbgValue) { + BB->splice(RegionBegin, BB, FirstDbgValue); + RegionBegin = FirstDbgValue; + } + + for (std::vector >::iterator + DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { + std::pair P = *prior(DI); + MachineInstr *DbgValue = P.first; + MachineBasicBlock::iterator OrigPrevMI = P.second; + if (&*RegionBegin == DbgValue) + ++RegionBegin; + BB->splice(++OrigPrevMI, BB, DbgValue); + if (OrigPrevMI == llvm::prior(RegionEnd)) + RegionEnd = DbgValue; + } + DbgValues.clear(); + FirstDbgValue = NULL; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ScheduleDAGMI::dumpSchedule() const { + for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { + if (SUnit *SU = getSUnit(&(*MI))) + SU->dump(this); + else + dbgs() << "Missing SUnit\n"; + } +} +#endif + +//===----------------------------------------------------------------------===// +// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals +// preservation. +//===----------------------------------------------------------------------===// + +ScheduleDAGMILive::~ScheduleDAGMILive() { + delete DFSResult; +} + /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after /// crossing a scheduling boundary. [begin, end) includes all instructions in /// the region, including the boundary itself and single-instruction regions /// that don't get scheduled. -void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, +void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) { - ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); + // ScheduleDAGMI initializes SchedImpl's per-region policy. + ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs); // For convenience remember the end of the liveness region. LiveRegionEnd = @@ -500,14 +821,12 @@ void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, SUPressureDiffs.clear(); - SchedImpl->initPolicy(begin, end, regioninstrs); - ShouldTrackPressure = SchedImpl->shouldTrackPressure(); } // Setup the register pressure trackers for the top scheduled top and bottom // scheduled regions. -void ScheduleDAGMI::initRegPressure() { +void ScheduleDAGMILive::initRegPressure() { TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); @@ -567,7 +886,7 @@ void ScheduleDAGMI::initRegPressure() { dbgs() << "\n"); } -void ScheduleDAGMI:: +void ScheduleDAGMILive:: updateScheduledPressure(const SUnit *SU, const std::vector &NewMaxPressure) { const PressureDiff &PDiff = getPressureDiff(SU); @@ -595,7 +914,7 @@ updateScheduledPressure(const SUnit *SU, /// Update the PressureDiff array for liveness after scheduling this /// instruction. -void ScheduleDAGMI::updatePressureDiffs(ArrayRef LiveUses) { +void ScheduleDAGMILive::updatePressureDiffs(ArrayRef LiveUses) { for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) { /// FIXME: Currently assuming single-use physregs. unsigned Reg = LiveUses[LUIdx]; @@ -644,9 +963,9 @@ void ScheduleDAGMI::updatePressureDiffs(ArrayRef LiveUses) { /// so that it can be easilly extended by experimental schedulers. Generally, /// implementing MachineSchedStrategy should be sufficient to implement a new /// scheduling algorithm. However, if a scheduler further subclasses -/// ScheduleDAGMI then it will want to override this virtual method in order to -/// update any specialized state. -void ScheduleDAGMI::schedule() { +/// ScheduleDAGMILive then it will want to override this virtual method in order +/// to update any specialized state. +void ScheduleDAGMILive::schedule() { buildDAGWithRegPressure(); Topo.InitDAGTopologicalSorting(); @@ -667,6 +986,11 @@ void ScheduleDAGMI::schedule() { // Initialize ready queues now that the DAG and priority data are finalized. initQueues(TopRoots, BotRoots); + if (ShouldTrackPressure) { + assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); + TopRPTracker.setPos(CurrentTop); + } + bool IsTopNode = false; while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { assert(!SU->isScheduled && "Node already scheduled"); @@ -676,6 +1000,18 @@ void ScheduleDAGMI::schedule() { scheduleMI(SU, IsTopNode); updateQueues(SU, IsTopNode); + + if (DFSResult) { + unsigned SubtreeID = DFSResult->getSubtreeID(SU); + if (!ScheduledTrees.test(SubtreeID)) { + ScheduledTrees.set(SubtreeID); + DFSResult->scheduleTree(SubtreeID); + SchedImpl->scheduleTree(SubtreeID); + } + } + + // Notify the scheduling strategy after updating the DAG. + SchedImpl->schedNode(SU, IsTopNode); } assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); @@ -690,7 +1026,7 @@ void ScheduleDAGMI::schedule() { } /// Build the DAG and setup three register pressure trackers. -void ScheduleDAGMI::buildDAGWithRegPressure() { +void ScheduleDAGMILive::buildDAGWithRegPressure() { if (!ShouldTrackPressure) { RPTracker.reset(); RegionCriticalPSets.clear(); @@ -713,14 +1049,7 @@ void ScheduleDAGMI::buildDAGWithRegPressure() { initRegPressure(); } -/// Apply each ScheduleDAGMutation step in order. -void ScheduleDAGMI::postprocessDAG() { - for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { - Mutations[i]->apply(this); - } -} - -void ScheduleDAGMI::computeDFSResult() { +void ScheduleDAGMILive::computeDFSResult() { if (!DFSResult) DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize); DFSResult->clear(); @@ -730,26 +1059,6 @@ void ScheduleDAGMI::computeDFSResult() { ScheduledTrees.resize(DFSResult->getNumSubtrees()); } -void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl &TopRoots, - SmallVectorImpl &BotRoots) { - for (std::vector::iterator - I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { - SUnit *SU = &(*I); - assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits"); - - // Order predecessors so DFSResult follows the critical path. - SU->biasCriticalPath(); - - // A SUnit is ready to top schedule if it has no predecessors. - if (!I->NumPredsLeft) - TopRoots.push_back(SU); - // A SUnit is ready to bottom schedule if it has no successors. - if (!I->NumSuccsLeft) - BotRoots.push_back(SU); - } - ExitSU.biasCriticalPath(); -} - /// Compute the max cyclic critical path through the DAG. The scheduling DAG /// only provides the critical path for single block loops. To handle loops that /// span blocks, we could use the vreg path latencies provided by @@ -773,7 +1082,10 @@ void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl &TopRoots, /// LiveOutDepth - LiveInDepth = 3 - 1 = 2 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2 /// CyclicCriticalPath = min(2, 2) = 2 -unsigned ScheduleDAGMI::computeCyclicCriticalPath() { +/// +/// This could be relevant to PostRA scheduling, but is currently implemented +/// assuming LiveIntervals. +unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { // This only applies to single block loop. if (!BB->isSuccessor(BB)) return 0; @@ -835,44 +1147,8 @@ unsigned ScheduleDAGMI::computeCyclicCriticalPath() { return MaxCyclicLatency; } -/// Identify DAG roots and setup scheduler queues. -void ScheduleDAGMI::initQueues(ArrayRef TopRoots, - ArrayRef BotRoots) { - NextClusterSucc = NULL; - NextClusterPred = NULL; - - // Release all DAG roots for scheduling, not including EntrySU/ExitSU. - // - // Nodes with unreleased weak edges can still be roots. - // Release top roots in forward order. - for (SmallVectorImpl::const_iterator - I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) { - SchedImpl->releaseTopNode(*I); - } - // Release bottom roots in reverse order so the higher priority nodes appear - // first. This is more natural and slightly more efficient. - for (SmallVectorImpl::const_reverse_iterator - I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) { - SchedImpl->releaseBottomNode(*I); - } - - releaseSuccessors(&EntrySU); - releasePredecessors(&ExitSU); - - SchedImpl->registerRoots(); - - // Advance past initial DebugValues. - CurrentTop = nextIfDebug(RegionBegin, RegionEnd); - CurrentBottom = RegionEnd; - - if (ShouldTrackPressure) { - assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); - TopRPTracker.setPos(CurrentTop); - } -} - /// Move an instruction and update register pressure. -void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { +void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { // Move the instruction to its new location in the instruction stream. MachineInstr *MI = SU->getInstr(); @@ -917,63 +1193,6 @@ void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { } } -/// Update scheduler queues after scheduling an instruction. -void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { - // Release dependent instructions for scheduling. - if (IsTopNode) - releaseSuccessors(SU); - else - releasePredecessors(SU); - - SU->isScheduled = true; - - if (DFSResult) { - unsigned SubtreeID = DFSResult->getSubtreeID(SU); - if (!ScheduledTrees.test(SubtreeID)) { - ScheduledTrees.set(SubtreeID); - DFSResult->scheduleTree(SubtreeID); - SchedImpl->scheduleTree(SubtreeID); - } - } - - // Notify the scheduling strategy after updating the DAG. - SchedImpl->schedNode(SU, IsTopNode); -} - -/// Reinsert any remaining debug_values, just like the PostRA scheduler. -void ScheduleDAGMI::placeDebugValues() { - // If first instruction was a DBG_VALUE then put it back. - if (FirstDbgValue) { - BB->splice(RegionBegin, BB, FirstDbgValue); - RegionBegin = FirstDbgValue; - } - - for (std::vector >::iterator - DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { - std::pair P = *prior(DI); - MachineInstr *DbgValue = P.first; - MachineBasicBlock::iterator OrigPrevMI = P.second; - if (&*RegionBegin == DbgValue) - ++RegionBegin; - BB->splice(++OrigPrevMI, BB, DbgValue); - if (OrigPrevMI == llvm::prior(RegionEnd)) - RegionEnd = DbgValue; - } - DbgValues.clear(); - FirstDbgValue = NULL; -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void ScheduleDAGMI::dumpSchedule() const { - for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { - if (SUnit *SU = getSUnit(&(*MI))) - SU->dump(this); - else - dbgs() << "Missing SUnit\n"; - } -} -#endif - //===----------------------------------------------------------------------===// // LoadClusterMutation - DAG post-processing to cluster loads. //===----------------------------------------------------------------------===// @@ -1154,7 +1373,7 @@ public: virtual void apply(ScheduleDAGMI *DAG); protected: - void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG); + void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG); }; } // anonymous @@ -1177,7 +1396,7 @@ protected: /// this algorithm should handle extended blocks. An EBB is a set of /// contiguously numbered blocks such that the previous block in the EBB is /// always the single predecessor. -void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) { +void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { LiveIntervals *LIS = DAG->getLIS(); MachineInstr *Copy = CopySU->getInstr(); @@ -1302,6 +1521,8 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) { /// \brief Callback from DAG postProcessing to create weak edges to encourage /// copy elimination. void CopyConstrain::apply(ScheduleDAGMI *DAG) { + assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals"); + MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end()); if (FirstPos == DAG->end()) return; @@ -1314,28 +1535,582 @@ void CopyConstrain::apply(ScheduleDAGMI *DAG) { if (!SU->getInstr()->isCopy()) continue; - constrainLocalCopy(SU, DAG); + constrainLocalCopy(SU, static_cast(DAG)); } } //===----------------------------------------------------------------------===// -// GenericScheduler - Implementation of the generic MachineSchedStrategy. +// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler +// and possibly other custom schedulers. +//===----------------------------------------------------------------------===// + +static const unsigned InvalidCycle = ~0U; + +SchedBoundary::~SchedBoundary() { delete HazardRec; } + +void SchedBoundary::reset() { + // A new HazardRec is created for each DAG and owned by SchedBoundary. + // Destroying and reconstructing it is very expensive though. So keep + // invalid, placeholder HazardRecs. + if (HazardRec && HazardRec->isEnabled()) { + delete HazardRec; + HazardRec = 0; + } + Available.clear(); + Pending.clear(); + CheckPending = false; + NextSUs.clear(); + CurrCycle = 0; + CurrMOps = 0; + MinReadyCycle = UINT_MAX; + ExpectedLatency = 0; + DependentLatency = 0; + RetiredMOps = 0; + MaxExecutedResCount = 0; + ZoneCritResIdx = 0; + IsResourceLimited = false; + ReservedCycles.clear(); +#ifndef NDEBUG + // Track the maximum number of stall cycles that could arise either from the + // latency of a DAG edge or the number of cycles that a processor resource is + // reserved (SchedBoundary::ReservedCycles). + MaxObservedLatency = 0; +#endif + // Reserve a zero-count for invalid CritResIdx. + ExecutedResCounts.resize(1); + assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); +} + +void SchedRemainder:: +init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { + reset(); + if (!SchedModel->hasInstrSchedModel()) + return; + RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); + for (std::vector::iterator + I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { + const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); + RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC) + * SchedModel->getMicroOpFactor(); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + unsigned PIdx = PI->ProcResourceIdx; + unsigned Factor = SchedModel->getResourceFactor(PIdx); + RemainingCounts[PIdx] += (Factor * PI->Cycles); + } + } +} + +void SchedBoundary:: +init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { + reset(); + DAG = dag; + SchedModel = smodel; + Rem = rem; + if (SchedModel->hasInstrSchedModel()) { + ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); + ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle); + } +} + +/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat +/// these "soft stalls" differently than the hard stall cycles based on CPU +/// resources and computed by checkHazard(). A fully in-order model +/// (MicroOpBufferSize==0) will not make use of this since instructions are not +/// available for scheduling until they are ready. However, a weaker in-order +/// model may use this for heuristics. For example, if a processor has in-order +/// behavior when reading certain resources, this may come into play. +unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) { + if (!SU->isUnbuffered) + return 0; + + unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); + if (ReadyCycle > CurrCycle) + return ReadyCycle - CurrCycle; + return 0; +} + +/// Compute the next cycle at which the given processor resource can be +/// scheduled. +unsigned SchedBoundary:: +getNextResourceCycle(unsigned PIdx, unsigned Cycles) { + unsigned NextUnreserved = ReservedCycles[PIdx]; + // If this resource has never been used, always return cycle zero. + if (NextUnreserved == InvalidCycle) + return 0; + // For bottom-up scheduling add the cycles needed for the current operation. + if (!isTop()) + NextUnreserved += Cycles; + return NextUnreserved; +} + +/// Does this SU have a hazard within the current instruction group. +/// +/// The scheduler supports two modes of hazard recognition. The first is the +/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that +/// supports highly complicated in-order reservation tables +/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. +/// +/// The second is a streamlined mechanism that checks for hazards based on +/// simple counters that the scheduler itself maintains. It explicitly checks +/// for instruction dispatch limitations, including the number of micro-ops that +/// can dispatch per cycle. +/// +/// TODO: Also check whether the SU must start a new group. +bool SchedBoundary::checkHazard(SUnit *SU) { + if (HazardRec->isEnabled() + && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) { + return true; + } + unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); + if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { + DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" + << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); + return true; + } + if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) { + const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle) + return true; + } + } + return false; +} + +// Find the unscheduled node in ReadySUs with the highest latency. +unsigned SchedBoundary:: +findMaxLatency(ArrayRef ReadySUs) { + SUnit *LateSU = 0; + unsigned RemLatency = 0; + for (ArrayRef::iterator I = ReadySUs.begin(), E = ReadySUs.end(); + I != E; ++I) { + unsigned L = getUnscheduledLatency(*I); + if (L > RemLatency) { + RemLatency = L; + LateSU = *I; + } + } + if (LateSU) { + DEBUG(dbgs() << Available.getName() << " RemLatency SU(" + << LateSU->NodeNum << ") " << RemLatency << "c\n"); + } + return RemLatency; +} + +// Count resources in this zone and the remaining unscheduled +// instruction. Return the max count, scaled. Set OtherCritIdx to the critical +// resource index, or zero if the zone is issue limited. +unsigned SchedBoundary:: +getOtherResourceCount(unsigned &OtherCritIdx) { + OtherCritIdx = 0; + if (!SchedModel->hasInstrSchedModel()) + return 0; + + unsigned OtherCritCount = Rem->RemIssueCount + + (RetiredMOps * SchedModel->getMicroOpFactor()); + DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: " + << OtherCritCount / SchedModel->getMicroOpFactor() << '\n'); + for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds(); + PIdx != PEnd; ++PIdx) { + unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx]; + if (OtherCount > OtherCritCount) { + OtherCritCount = OtherCount; + OtherCritIdx = PIdx; + } + } + if (OtherCritIdx) { + DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: " + << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) + << " " << SchedModel->getResourceName(OtherCritIdx) << "\n"); + } + return OtherCritCount; +} + +void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + // Check for interlocks first. For the purpose of other heuristics, an + // instruction that cannot issue appears as if it's not in the ReadyQueue. + bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; + if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU)) + Pending.push(SU); + else + Available.push(SU); + + // Record this node as an immediate dependent of the scheduled node. + NextSUs.insert(SU); +} + +void SchedBoundary::releaseTopNode(SUnit *SU) { + if (SU->isScheduled) + return; + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isWeak()) + continue; + unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; + unsigned Latency = I->getLatency(); +#ifndef NDEBUG + MaxObservedLatency = std::max(Latency, MaxObservedLatency); +#endif + if (SU->TopReadyCycle < PredReadyCycle + Latency) + SU->TopReadyCycle = PredReadyCycle + Latency; + } + releaseNode(SU, SU->TopReadyCycle); +} + +void SchedBoundary::releaseBottomNode(SUnit *SU) { + if (SU->isScheduled) + return; + + assert(SU->getInstr() && "Scheduled SUnit must have instr"); + + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isWeak()) + continue; + unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; + unsigned Latency = I->getLatency(); +#ifndef NDEBUG + MaxObservedLatency = std::max(Latency, MaxObservedLatency); +#endif + if (SU->BotReadyCycle < SuccReadyCycle + Latency) + SU->BotReadyCycle = SuccReadyCycle + Latency; + } + releaseNode(SU, SU->BotReadyCycle); +} + +/// Move the boundary of scheduled code by one cycle. +void SchedBoundary::bumpCycle(unsigned NextCycle) { + if (SchedModel->getMicroOpBufferSize() == 0) { + assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); + if (MinReadyCycle > NextCycle) + NextCycle = MinReadyCycle; + } + // Update the current micro-ops, which will issue in the next cycle. + unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle); + CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps; + + // Decrement DependentLatency based on the next cycle. + if ((NextCycle - CurrCycle) > DependentLatency) + DependentLatency = 0; + else + DependentLatency -= (NextCycle - CurrCycle); + + if (!HazardRec->isEnabled()) { + // Bypass HazardRec virtual calls. + CurrCycle = NextCycle; + } + else { + // Bypass getHazardType calls in case of long latency. + for (; CurrCycle != NextCycle; ++CurrCycle) { + if (isTop()) + HazardRec->AdvanceCycle(); + else + HazardRec->RecedeCycle(); + } + } + CheckPending = true; + unsigned LFactor = SchedModel->getLatencyFactor(); + IsResourceLimited = + (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) + > (int)LFactor; + + DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); +} + +void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) { + ExecutedResCounts[PIdx] += Count; + if (ExecutedResCounts[PIdx] > MaxExecutedResCount) + MaxExecutedResCount = ExecutedResCounts[PIdx]; +} + +/// Add the given processor resource to this scheduled zone. +/// +/// \param Cycles indicates the number of consecutive (non-pipelined) cycles +/// during which this resource is consumed. +/// +/// \return the next cycle at which the instruction may execute without +/// oversubscribing resources. +unsigned SchedBoundary:: +countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { + unsigned Factor = SchedModel->getResourceFactor(PIdx); + unsigned Count = Factor * Cycles; + DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) + << " +" << Cycles << "x" << Factor << "u\n"); + + // Update Executed resources counts. + incExecutedResources(PIdx, Count); + assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); + Rem->RemainingCounts[PIdx] -= Count; + + // Check if this resource exceeds the current critical resource. If so, it + // becomes the critical resource. + if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) { + ZoneCritResIdx = PIdx; + DEBUG(dbgs() << " *** Critical resource " + << SchedModel->getResourceName(PIdx) << ": " + << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n"); + } + // For reserved resources, record the highest cycle using the resource. + unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles); + if (NextAvailable > CurrCycle) { + DEBUG(dbgs() << " Resource conflict: " + << SchedModel->getProcResource(PIdx)->Name << " reserved until @" + << NextAvailable << "\n"); + } + return NextAvailable; +} + +/// Move the boundary of scheduled code by one SUnit. +void SchedBoundary::bumpNode(SUnit *SU) { + // Update the reservation table. + if (HazardRec->isEnabled()) { + if (!isTop() && SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); + } + HazardRec->EmitInstruction(SU); + } + // checkHazard should prevent scheduling multiple instructions per cycle that + // exceed the issue width. + const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); + assert( + (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) && + "Cannot schedule this instruction's MicroOps in the current cycle."); + + unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); + DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); + + unsigned NextCycle = CurrCycle; + switch (SchedModel->getMicroOpBufferSize()) { + case 0: + assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); + break; + case 1: + if (ReadyCycle > NextCycle) { + NextCycle = ReadyCycle; + DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n"); + } + break; + default: + // We don't currently model the OOO reorder buffer, so consider all + // scheduled MOps to be "retired". We do loosely model in-order resource + // latency. If this instruction uses an in-order resource, account for any + // likely stall cycles. + if (SU->isUnbuffered && ReadyCycle > NextCycle) + NextCycle = ReadyCycle; + break; + } + RetiredMOps += IncMOps; + + // Update resource counts and critical resource. + if (SchedModel->hasInstrSchedModel()) { + unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor(); + assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted"); + Rem->RemIssueCount -= DecRemIssue; + if (ZoneCritResIdx) { + // Scale scheduled micro-ops for comparing with the critical resource. + unsigned ScaledMOps = + RetiredMOps * SchedModel->getMicroOpFactor(); + + // If scaled micro-ops are now more than the previous critical resource by + // a full cycle, then micro-ops issue becomes critical. + if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx)) + >= (int)SchedModel->getLatencyFactor()) { + ZoneCritResIdx = 0; + DEBUG(dbgs() << " *** Critical resource NumMicroOps: " + << ScaledMOps / SchedModel->getLatencyFactor() << "c\n"); + } + } + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + unsigned RCycle = + countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle); + if (RCycle > NextCycle) + NextCycle = RCycle; + } + if (SU->hasReservedResource) { + // For reserved resources, record the highest cycle using the resource. + // For top-down scheduling, this is the cycle in which we schedule this + // instruction plus the number of cycles the operations reserves the + // resource. For bottom-up is it simply the instruction's cycle. + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + unsigned PIdx = PI->ProcResourceIdx; + if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { + ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle; +#ifndef NDEBUG + MaxObservedLatency = std::max(PI->Cycles, MaxObservedLatency); +#endif + } + } + } + } + // Update ExpectedLatency and DependentLatency. + unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; + unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency; + if (SU->getDepth() > TopLatency) { + TopLatency = SU->getDepth(); + DEBUG(dbgs() << " " << Available.getName() + << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n"); + } + if (SU->getHeight() > BotLatency) { + BotLatency = SU->getHeight(); + DEBUG(dbgs() << " " << Available.getName() + << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n"); + } + // If we stall for any reason, bump the cycle. + if (NextCycle > CurrCycle) { + bumpCycle(NextCycle); + } + else { + // After updating ZoneCritResIdx and ExpectedLatency, check if we're + // resource limited. If a stall occured, bumpCycle does this. + unsigned LFactor = SchedModel->getLatencyFactor(); + IsResourceLimited = + (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) + > (int)LFactor; + } + // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle + // resets CurrMOps. Loop to handle instructions with more MOps than issue in + // one cycle. Since we commonly reach the max MOps here, opportunistically + // bump the cycle to avoid uselessly checking everything in the readyQ. + CurrMOps += IncMOps; + while (CurrMOps >= SchedModel->getIssueWidth()) { + DEBUG(dbgs() << " *** Max MOps " << CurrMOps + << " at cycle " << CurrCycle << '\n'); + bumpCycle(++NextCycle); + } + DEBUG(dumpScheduledState()); +} + +/// Release pending ready nodes in to the available queue. This makes them +/// visible to heuristics. +void SchedBoundary::releasePending() { + // If the available queue is empty, it is safe to reset MinReadyCycle. + if (Available.empty()) + MinReadyCycle = UINT_MAX; + + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; + for (unsigned i = 0, e = Pending.size(); i != e; ++i) { + SUnit *SU = *(Pending.begin()+i); + unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; + + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + if (!IsBuffered && ReadyCycle > CurrCycle) + continue; + + if (checkHazard(SU)) + continue; + + Available.push(SU); + Pending.remove(Pending.begin()+i); + --i; --e; + } + DEBUG(if (!Pending.empty()) Pending.dump()); + CheckPending = false; +} + +/// Remove SU from the ready set for this boundary. +void SchedBoundary::removeReady(SUnit *SU) { + if (Available.isInQueue(SU)) + Available.remove(Available.find(SU)); + else { + assert(Pending.isInQueue(SU) && "bad ready count"); + Pending.remove(Pending.find(SU)); + } +} + +/// If this queue only has one ready candidate, return it. As a side effect, +/// defer any nodes that now hit a hazard, and advance the cycle until at least +/// one node is ready. If multiple instructions are ready, return NULL. +SUnit *SchedBoundary::pickOnlyChoice() { + if (CheckPending) + releasePending(); + + if (CurrMOps > 0) { + // Defer any ready instrs that now have a hazard. + for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { + if (checkHazard(*I)) { + Pending.push(*I); + I = Available.remove(I); + continue; + } + ++I; + } + } + for (unsigned i = 0; Available.empty(); ++i) { + assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) && + "permanent hazard"); (void)i; + bumpCycle(CurrCycle + 1); + releasePending(); + } + if (Available.size() == 1) + return *Available.begin(); + return NULL; +} + +#ifndef NDEBUG +// This is useful information to dump after bumpNode. +// Note that the Queue contents are more useful before pickNodeFromQueue. +void SchedBoundary::dumpScheduledState() { + unsigned ResFactor; + unsigned ResCount; + if (ZoneCritResIdx) { + ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx); + ResCount = getResourceCount(ZoneCritResIdx); + } + else { + ResFactor = SchedModel->getMicroOpFactor(); + ResCount = RetiredMOps * SchedModel->getMicroOpFactor(); + } + unsigned LFactor = SchedModel->getLatencyFactor(); + dbgs() << Available.getName() << " @" << CurrCycle << "c\n" + << " Retired: " << RetiredMOps; + dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c"; + dbgs() << "\n Critical: " << ResCount / LFactor << "c, " + << ResCount / ResFactor << " " + << SchedModel->getResourceName(ZoneCritResIdx) + << "\n ExpectedLatency: " << ExpectedLatency << "c\n" + << (IsResourceLimited ? " - Resource" : " - Latency") + << " limited.\n"; +} +#endif + +//===----------------------------------------------------------------------===// +// GenericScheduler - Generic implementation of MachineSchedStrategy. //===----------------------------------------------------------------------===// namespace { -/// GenericScheduler shrinks the unscheduled zone using heuristics to balance -/// the schedule. -class GenericScheduler : public MachineSchedStrategy { +/// Base class for GenericScheduler. This class maintains information about +/// scheduling candidates based on TargetSchedModel making it easy to implement +/// heuristics for either preRA or postRA scheduling. +class GenericSchedulerBase : public MachineSchedStrategy { public: /// Represent the type of SchedCandidate found within a single queue. /// pickNodeBidirectional depends on these listed by decreasing priority. enum CandReason { - NoCand, PhysRegCopy, RegExcess, RegCritical, Cluster, Weak, RegMax, + NoCand, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak, RegMax, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; #ifndef NDEBUG - static const char *getReasonStr(GenericScheduler::CandReason Reason); + static const char *getReasonStr(GenericSchedulerBase::CandReason Reason); #endif /// Policy for scheduling the next instruction in the candidate's zone. @@ -1407,252 +2182,299 @@ public: const TargetSchedModel *SchedModel); }; - /// Summarize the unscheduled region. - struct SchedRemainder { - // Critical path through the DAG in expected latency. - unsigned CriticalPath; - unsigned CyclicCritPath; - - // Scaled count of micro-ops left to schedule. - unsigned RemIssueCount; - - bool IsAcyclicLatencyLimited; - - // Unscheduled resources - SmallVector RemainingCounts; - - void reset() { - CriticalPath = 0; - CyclicCritPath = 0; - RemIssueCount = 0; - IsAcyclicLatencyLimited = false; - RemainingCounts.clear(); - } - - SchedRemainder() { reset(); } - - void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); - }; - - /// Each Scheduling boundary is associated with ready queues. It tracks the - /// current cycle in the direction of movement, and maintains the state - /// of "hazards" and other interlocks at the current cycle. - struct SchedBoundary { - ScheduleDAGMI *DAG; - const TargetSchedModel *SchedModel; - SchedRemainder *Rem; - - ReadyQueue Available; - ReadyQueue Pending; - bool CheckPending; - - // For heuristics, keep a list of the nodes that immediately depend on the - // most recently scheduled node. - SmallPtrSet NextSUs; - - ScheduleHazardRecognizer *HazardRec; - - /// Number of cycles it takes to issue the instructions scheduled in this - /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls. - /// See getStalls(). - unsigned CurrCycle; - - /// Micro-ops issued in the current cycle - unsigned CurrMOps; - - /// MinReadyCycle - Cycle of the soonest available instruction. - unsigned MinReadyCycle; - - // The expected latency of the critical path in this scheduled zone. - unsigned ExpectedLatency; - - // The latency of dependence chains leading into this zone. - // For each node scheduled bottom-up: DLat = max DLat, N.Depth. - // For each cycle scheduled: DLat -= 1. - unsigned DependentLatency; - - /// Count the scheduled (issued) micro-ops that can be retired by - /// time=CurrCycle assuming the first scheduled instr is retired at time=0. - unsigned RetiredMOps; - - // Count scheduled resources that have been executed. Resources are - // considered executed if they become ready in the time that it takes to - // saturate any resource including the one in question. Counts are scaled - // for direct comparison with other resources. Counts can be compared with - // MOps * getMicroOpFactor and Latency * getLatencyFactor. - SmallVector ExecutedResCounts; - - /// Cache the max count for a single resource. - unsigned MaxExecutedResCount; - - // Cache the critical resources ID in this scheduled zone. - unsigned ZoneCritResIdx; - - // Is the scheduled region resource limited vs. latency limited. - bool IsResourceLimited; - -#ifndef NDEBUG - // Remember the greatest operand latency as an upper bound on the number of - // times we should retry the pending queue because of a hazard. - unsigned MaxObservedLatency; -#endif - - void reset() { - // A new HazardRec is created for each DAG and owned by SchedBoundary. - // Destroying and reconstructing it is very expensive though. So keep - // invalid, placeholder HazardRecs. - if (HazardRec && HazardRec->isEnabled()) { - delete HazardRec; - HazardRec = 0; - } - Available.clear(); - Pending.clear(); - CheckPending = false; - NextSUs.clear(); - CurrCycle = 0; - CurrMOps = 0; - MinReadyCycle = UINT_MAX; - ExpectedLatency = 0; - DependentLatency = 0; - RetiredMOps = 0; - MaxExecutedResCount = 0; - ZoneCritResIdx = 0; - IsResourceLimited = false; -#ifndef NDEBUG - MaxObservedLatency = 0; -#endif - // Reserve a zero-count for invalid CritResIdx. - ExecutedResCounts.resize(1); - assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); - } - - /// Pending queues extend the ready queues with the same ID and the - /// PendingFlag set. - SchedBoundary(unsigned ID, const Twine &Name): - DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), - Pending(ID << GenericScheduler::LogMaxQID, Name+".P"), - HazardRec(0) { - reset(); - } - - ~SchedBoundary() { delete HazardRec; } - - void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, - SchedRemainder *rem); - - bool isTop() const { - return Available.getID() == GenericScheduler::TopQID; - } - -#ifndef NDEBUG - const char *getResourceName(unsigned PIdx) { - if (!PIdx) - return "MOps"; - return SchedModel->getProcResource(PIdx)->Name; - } -#endif - - /// Get the number of latency cycles "covered" by the scheduled - /// instructions. This is the larger of the critical path within the zone - /// and the number of cycles required to issue the instructions. - unsigned getScheduledLatency() const { - return std::max(ExpectedLatency, CurrCycle); - } - - unsigned getUnscheduledLatency(SUnit *SU) const { - return isTop() ? SU->getHeight() : SU->getDepth(); - } - - unsigned getResourceCount(unsigned ResIdx) const { - return ExecutedResCounts[ResIdx]; - } - - /// Get the scaled count of scheduled micro-ops and resources, including - /// executed resources. - unsigned getCriticalCount() const { - if (!ZoneCritResIdx) - return RetiredMOps * SchedModel->getMicroOpFactor(); - return getResourceCount(ZoneCritResIdx); - } - - /// Get a scaled count for the minimum execution time of the scheduled - /// micro-ops that are ready to execute by getExecutedCount. Notice the - /// feedback loop. - unsigned getExecutedCount() const { - return std::max(CurrCycle * SchedModel->getLatencyFactor(), - MaxExecutedResCount); - } - - bool checkHazard(SUnit *SU); - - unsigned findMaxLatency(ArrayRef ReadySUs); - - unsigned getOtherResourceCount(unsigned &OtherCritIdx); - - void setPolicy(CandPolicy &Policy, SchedBoundary &OtherZone); - - void releaseNode(SUnit *SU, unsigned ReadyCycle); - - void bumpCycle(unsigned NextCycle); - - void incExecutedResources(unsigned PIdx, unsigned Count); - - unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle); - - void bumpNode(SUnit *SU); - - void releasePending(); - - void removeReady(SUnit *SU); - - SUnit *pickOnlyChoice(); - -#ifndef NDEBUG - void dumpScheduledState(); -#endif - }; - -private: +protected: const MachineSchedContext *Context; - ScheduleDAGMI *DAG; const TargetSchedModel *SchedModel; const TargetRegisterInfo *TRI; - // State of the top and bottom scheduled instruction boundaries. SchedRemainder Rem; +protected: + GenericSchedulerBase(const MachineSchedContext *C): + Context(C), SchedModel(0), TRI(0) {} + + void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, + SchedBoundary *OtherZone); + +#ifndef NDEBUG + void traceCandidate(const SchedCandidate &Cand); +#endif +}; +} // namespace + +void GenericSchedulerBase::SchedCandidate:: +initResourceDelta(const ScheduleDAGMI *DAG, + const TargetSchedModel *SchedModel) { + if (!Policy.ReduceResIdx && !Policy.DemandResIdx) + return; + + const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + if (PI->ProcResourceIdx == Policy.ReduceResIdx) + ResDelta.CritResources += PI->Cycles; + if (PI->ProcResourceIdx == Policy.DemandResIdx) + ResDelta.DemandedResources += PI->Cycles; + } +} + +/// Set the CandPolicy given a scheduling zone given the current resources and +/// latencies inside and outside the zone. +void GenericSchedulerBase::setPolicy(CandPolicy &Policy, + bool IsPostRA, + SchedBoundary &CurrZone, + SchedBoundary *OtherZone) { + // Apply preemptive heuristics based on the the total latency and resources + // inside and outside this zone. Potential stalls should be considered before + // following this policy. + + // Compute remaining latency. We need this both to determine whether the + // overall schedule has become latency-limited and whether the instructions + // outside this zone are resource or latency limited. + // + // The "dependent" latency is updated incrementally during scheduling as the + // max height/depth of scheduled nodes minus the cycles since it was + // scheduled: + // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone + // + // The "independent" latency is the max ready queue depth: + // ILat = max N.depth for N in Available|Pending + // + // RemainingLatency is the greater of independent and dependent latency. + unsigned RemLatency = CurrZone.getDependentLatency(); + RemLatency = std::max(RemLatency, + CurrZone.findMaxLatency(CurrZone.Available.elements())); + RemLatency = std::max(RemLatency, + CurrZone.findMaxLatency(CurrZone.Pending.elements())); + + // Compute the critical resource outside the zone. + unsigned OtherCritIdx = 0; + unsigned OtherCount = + OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0; + + bool OtherResLimited = false; + if (SchedModel->hasInstrSchedModel()) { + unsigned LFactor = SchedModel->getLatencyFactor(); + OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; + } + // Schedule aggressively for latency in PostRA mode. We don't check for + // acyclic latency during PostRA, and highly out-of-order processors will + // skip PostRA scheduling. + if (!OtherResLimited) { + if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) { + Policy.ReduceLatency |= true; + DEBUG(dbgs() << " " << CurrZone.Available.getName() + << " RemainingLatency " << RemLatency << " + " + << CurrZone.getCurrCycle() << "c > CritPath " + << Rem.CriticalPath << "\n"); + } + } + // If the same resource is limiting inside and outside the zone, do nothing. + if (CurrZone.getZoneCritResIdx() == OtherCritIdx) + return; + + DEBUG( + if (CurrZone.isResourceLimited()) { + dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: " + << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) + << "\n"; + } + if (OtherResLimited) + dbgs() << " RemainingLimit: " + << SchedModel->getResourceName(OtherCritIdx) << "\n"; + if (!CurrZone.isResourceLimited() && !OtherResLimited) + dbgs() << " Latency limited both directions.\n"); + + if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx) + Policy.ReduceResIdx = CurrZone.getZoneCritResIdx(); + + if (OtherResLimited) + Policy.DemandResIdx = OtherCritIdx; +} + +#ifndef NDEBUG +const char *GenericSchedulerBase::getReasonStr( + GenericSchedulerBase::CandReason Reason) { + switch (Reason) { + case NoCand: return "NOCAND "; + case PhysRegCopy: return "PREG-COPY"; + case RegExcess: return "REG-EXCESS"; + case RegCritical: return "REG-CRIT "; + case Stall: return "STALL "; + case Cluster: return "CLUSTER "; + case Weak: return "WEAK "; + case RegMax: return "REG-MAX "; + case ResourceReduce: return "RES-REDUCE"; + case ResourceDemand: return "RES-DEMAND"; + case TopDepthReduce: return "TOP-DEPTH "; + case TopPathReduce: return "TOP-PATH "; + case BotHeightReduce:return "BOT-HEIGHT"; + case BotPathReduce: return "BOT-PATH "; + case NextDefUse: return "DEF-USE "; + case NodeOrder: return "ORDER "; + }; + llvm_unreachable("Unknown reason!"); +} + +void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { + PressureChange P; + unsigned ResIdx = 0; + unsigned Latency = 0; + switch (Cand.Reason) { + default: + break; + case RegExcess: + P = Cand.RPDelta.Excess; + break; + case RegCritical: + P = Cand.RPDelta.CriticalMax; + break; + case RegMax: + P = Cand.RPDelta.CurrentMax; + break; + case ResourceReduce: + ResIdx = Cand.Policy.ReduceResIdx; + break; + case ResourceDemand: + ResIdx = Cand.Policy.DemandResIdx; + break; + case TopDepthReduce: + Latency = Cand.SU->getDepth(); + break; + case TopPathReduce: + Latency = Cand.SU->getHeight(); + break; + case BotHeightReduce: + Latency = Cand.SU->getHeight(); + break; + case BotPathReduce: + Latency = Cand.SU->getDepth(); + break; + } + dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); + if (P.isValid()) + dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) + << ":" << P.getUnitInc() << " "; + else + dbgs() << " "; + if (ResIdx) + dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; + else + dbgs() << " "; + if (Latency) + dbgs() << " " << Latency << " cycles "; + else + dbgs() << " "; + dbgs() << '\n'; +} +#endif + +/// Return true if this heuristic determines order. +static bool tryLess(int TryVal, int CandVal, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { + if (TryVal < CandVal) { + TryCand.Reason = Reason; + return true; + } + if (TryVal > CandVal) { + if (Cand.Reason > Reason) + Cand.Reason = Reason; + return true; + } + Cand.setRepeat(Reason); + return false; +} + +static bool tryGreater(int TryVal, int CandVal, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { + if (TryVal > CandVal) { + TryCand.Reason = Reason; + return true; + } + if (TryVal < CandVal) { + if (Cand.Reason > Reason) + Cand.Reason = Reason; + return true; + } + Cand.setRepeat(Reason); + return false; +} + +static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + SchedBoundary &Zone) { + if (Zone.isTop()) { + if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { + if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, GenericSchedulerBase::TopDepthReduce)) + return true; + } + if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, GenericSchedulerBase::TopPathReduce)) + return true; + } + else { + if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { + if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, GenericSchedulerBase::BotHeightReduce)) + return true; + } + if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, GenericSchedulerBase::BotPathReduce)) + return true; + } + return false; +} + +static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand, + bool IsTop) { + DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") + << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n'); +} + +namespace { +/// GenericScheduler shrinks the unscheduled zone using heuristics to balance +/// the schedule. +class GenericScheduler : public GenericSchedulerBase { + ScheduleDAGMILive *DAG; + + // State of the top and bottom scheduled instruction boundaries. SchedBoundary Top; SchedBoundary Bot; MachineSchedPolicy RegionPolicy; public: - /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) - enum { - TopQID = 1, - BotQID = 2, - LogMaxQID = 2 - }; - GenericScheduler(const MachineSchedContext *C): - Context(C), DAG(0), SchedModel(0), TRI(0), - Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} + GenericSchedulerBase(C), DAG(0), Top(SchedBoundary::TopQID, "TopQ"), + Bot(SchedBoundary::BotQID, "BotQ") {} virtual void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, - unsigned NumRegionInstrs); + unsigned NumRegionInstrs) LLVM_OVERRIDE; - bool shouldTrackPressure() const { return RegionPolicy.ShouldTrackPressure; } + virtual bool shouldTrackPressure() const LLVM_OVERRIDE { + return RegionPolicy.ShouldTrackPressure; + } - virtual void initialize(ScheduleDAGMI *dag); + virtual void initialize(ScheduleDAGMI *dag) LLVM_OVERRIDE; - virtual SUnit *pickNode(bool &IsTopNode); + virtual SUnit *pickNode(bool &IsTopNode) LLVM_OVERRIDE; - virtual void schedNode(SUnit *SU, bool IsTopNode); + virtual void schedNode(SUnit *SU, bool IsTopNode) LLVM_OVERRIDE; - virtual void releaseTopNode(SUnit *SU); + virtual void releaseTopNode(SUnit *SU) LLVM_OVERRIDE { + Top.releaseTopNode(SU); + } - virtual void releaseBottomNode(SUnit *SU); + virtual void releaseBottomNode(SUnit *SU) LLVM_OVERRIDE { + Bot.releaseBottomNode(SU); + } - virtual void registerRoots(); + virtual void registerRoots() LLVM_OVERRIDE; protected: void checkAcyclicLatency(); @@ -1670,48 +2492,40 @@ protected: SchedCandidate &Candidate); void reschedulePhysRegCopies(SUnit *SU, bool isTop); - -#ifndef NDEBUG - void traceCandidate(const SchedCandidate &Cand); -#endif }; } // namespace -void GenericScheduler::SchedRemainder:: -init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { - reset(); - if (!SchedModel->hasInstrSchedModel()) - return; - RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); - for (std::vector::iterator - I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { - const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); - RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC) - * SchedModel->getMicroOpFactor(); - for (TargetSchedModel::ProcResIter - PI = SchedModel->getWriteProcResBegin(SC), - PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { - unsigned PIdx = PI->ProcResourceIdx; - unsigned Factor = SchedModel->getResourceFactor(PIdx); - RemainingCounts[PIdx] += (Factor * PI->Cycles); - } - } -} +void GenericScheduler::initialize(ScheduleDAGMI *dag) { + assert(dag->hasVRegLiveness() && + "(PreRA)GenericScheduler needs vreg liveness"); + DAG = static_cast(dag); + SchedModel = DAG->getSchedModel(); + TRI = DAG->TRI; -void GenericScheduler::SchedBoundary:: -init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { - reset(); - DAG = dag; - SchedModel = smodel; - Rem = rem; - if (SchedModel->hasInstrSchedModel()) - ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); + Rem.init(DAG, SchedModel); + Top.init(DAG, SchedModel, &Rem); + Bot.init(DAG, SchedModel, &Rem); + + // Initialize resource counts. + + // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or + // are disabled, then these HazardRecs will be disabled. + const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); + const TargetMachine &TM = DAG->MF.getTarget(); + if (!Top.HazardRec) { + Top.HazardRec = + TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + } + if (!Bot.HazardRec) { + Bot.HazardRec = + TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + } } /// Initialize the per-region scheduling policy. void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, - MachineBasicBlock::iterator End, - unsigned NumRegionInstrs) { + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) { const TargetMachine &TM = Context->MF->getTarget(); // Avoid setting up the register pressure tracker for small regions to save @@ -1750,71 +2564,6 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, } } -void GenericScheduler::initialize(ScheduleDAGMI *dag) { - DAG = dag; - SchedModel = DAG->getSchedModel(); - TRI = DAG->TRI; - - Rem.init(DAG, SchedModel); - Top.init(DAG, SchedModel, &Rem); - Bot.init(DAG, SchedModel, &Rem); - - // Initialize resource counts. - - // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or - // are disabled, then these HazardRecs will be disabled. - const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); - const TargetMachine &TM = DAG->MF.getTarget(); - if (!Top.HazardRec) { - Top.HazardRec = - TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); - } - if (!Bot.HazardRec) { - Bot.HazardRec = - TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); - } -} - -void GenericScheduler::releaseTopNode(SUnit *SU) { - if (SU->isScheduled) - return; - - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isWeak()) - continue; - unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; - unsigned Latency = I->getLatency(); -#ifndef NDEBUG - Top.MaxObservedLatency = std::max(Latency, Top.MaxObservedLatency); -#endif - if (SU->TopReadyCycle < PredReadyCycle + Latency) - SU->TopReadyCycle = PredReadyCycle + Latency; - } - Top.releaseNode(SU, SU->TopReadyCycle); -} - -void GenericScheduler::releaseBottomNode(SUnit *SU) { - if (SU->isScheduled) - return; - - assert(SU->getInstr() && "Scheduled SUnit must have instr"); - - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isWeak()) - continue; - unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; - unsigned Latency = I->getLatency(); -#ifndef NDEBUG - Bot.MaxObservedLatency = std::max(Latency, Bot.MaxObservedLatency); -#endif - if (SU->BotReadyCycle < SuccReadyCycle + Latency) - SU->BotReadyCycle = SuccReadyCycle + Latency; - } - Bot.releaseNode(SU, SU->BotReadyCycle); -} - /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic /// critical path by more cycles than it takes to drain the instruction buffer. /// We estimate an upper bounds on in-flight instructions as: @@ -1869,493 +2618,11 @@ void GenericScheduler::registerRoots() { } } -/// Does this SU have a hazard within the current instruction group. -/// -/// The scheduler supports two modes of hazard recognition. The first is the -/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that -/// supports highly complicated in-order reservation tables -/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. -/// -/// The second is a streamlined mechanism that checks for hazards based on -/// simple counters that the scheduler itself maintains. It explicitly checks -/// for instruction dispatch limitations, including the number of micro-ops that -/// can dispatch per cycle. -/// -/// TODO: Also check whether the SU must start a new group. -bool GenericScheduler::SchedBoundary::checkHazard(SUnit *SU) { - if (HazardRec->isEnabled()) - return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; - - unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); - if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { - DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" - << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); - return true; - } - return false; -} - -// Find the unscheduled node in ReadySUs with the highest latency. -unsigned GenericScheduler::SchedBoundary:: -findMaxLatency(ArrayRef ReadySUs) { - SUnit *LateSU = 0; - unsigned RemLatency = 0; - for (ArrayRef::iterator I = ReadySUs.begin(), E = ReadySUs.end(); - I != E; ++I) { - unsigned L = getUnscheduledLatency(*I); - if (L > RemLatency) { - RemLatency = L; - LateSU = *I; - } - } - if (LateSU) { - DEBUG(dbgs() << Available.getName() << " RemLatency SU(" - << LateSU->NodeNum << ") " << RemLatency << "c\n"); - } - return RemLatency; -} - -// Count resources in this zone and the remaining unscheduled -// instruction. Return the max count, scaled. Set OtherCritIdx to the critical -// resource index, or zero if the zone is issue limited. -unsigned GenericScheduler::SchedBoundary:: -getOtherResourceCount(unsigned &OtherCritIdx) { - OtherCritIdx = 0; - if (!SchedModel->hasInstrSchedModel()) - return 0; - - unsigned OtherCritCount = Rem->RemIssueCount - + (RetiredMOps * SchedModel->getMicroOpFactor()); - DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: " - << OtherCritCount / SchedModel->getMicroOpFactor() << '\n'); - for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds(); - PIdx != PEnd; ++PIdx) { - unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx]; - if (OtherCount > OtherCritCount) { - OtherCritCount = OtherCount; - OtherCritIdx = PIdx; - } - } - if (OtherCritIdx) { - DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: " - << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) - << " " << getResourceName(OtherCritIdx) << "\n"); - } - return OtherCritCount; -} - -/// Set the CandPolicy for this zone given the current resources and latencies -/// inside and outside the zone. -void GenericScheduler::SchedBoundary::setPolicy(CandPolicy &Policy, - SchedBoundary &OtherZone) { - // Now that potential stalls have been considered, apply preemptive heuristics - // based on the the total latency and resources inside and outside this - // zone. - - // Compute remaining latency. We need this both to determine whether the - // overall schedule has become latency-limited and whether the instructions - // outside this zone are resource or latency limited. - // - // The "dependent" latency is updated incrementally during scheduling as the - // max height/depth of scheduled nodes minus the cycles since it was - // scheduled: - // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone - // - // The "independent" latency is the max ready queue depth: - // ILat = max N.depth for N in Available|Pending - // - // RemainingLatency is the greater of independent and dependent latency. - unsigned RemLatency = DependentLatency; - RemLatency = std::max(RemLatency, findMaxLatency(Available.elements())); - RemLatency = std::max(RemLatency, findMaxLatency(Pending.elements())); - - // Compute the critical resource outside the zone. - unsigned OtherCritIdx; - unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx); - - bool OtherResLimited = false; - if (SchedModel->hasInstrSchedModel()) { - unsigned LFactor = SchedModel->getLatencyFactor(); - OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; - } - if (!OtherResLimited && (RemLatency + CurrCycle > Rem->CriticalPath)) { - Policy.ReduceLatency |= true; - DEBUG(dbgs() << " " << Available.getName() << " RemainingLatency " - << RemLatency << " + " << CurrCycle << "c > CritPath " - << Rem->CriticalPath << "\n"); - } - // If the same resource is limiting inside and outside the zone, do nothing. - if (ZoneCritResIdx == OtherCritIdx) - return; - - DEBUG( - if (IsResourceLimited) { - dbgs() << " " << Available.getName() << " ResourceLimited: " - << getResourceName(ZoneCritResIdx) << "\n"; - } - if (OtherResLimited) - dbgs() << " RemainingLimit: " << getResourceName(OtherCritIdx) << "\n"; - if (!IsResourceLimited && !OtherResLimited) - dbgs() << " Latency limited both directions.\n"); - - if (IsResourceLimited && !Policy.ReduceResIdx) - Policy.ReduceResIdx = ZoneCritResIdx; - - if (OtherResLimited) - Policy.DemandResIdx = OtherCritIdx; -} - -void GenericScheduler::SchedBoundary::releaseNode(SUnit *SU, - unsigned ReadyCycle) { - if (ReadyCycle < MinReadyCycle) - MinReadyCycle = ReadyCycle; - - // Check for interlocks first. For the purpose of other heuristics, an - // instruction that cannot issue appears as if it's not in the ReadyQueue. - bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; - if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU)) - Pending.push(SU); - else - Available.push(SU); - - // Record this node as an immediate dependent of the scheduled node. - NextSUs.insert(SU); -} - -/// Move the boundary of scheduled code by one cycle. -void GenericScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) { - if (SchedModel->getMicroOpBufferSize() == 0) { - assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); - if (MinReadyCycle > NextCycle) - NextCycle = MinReadyCycle; - } - // Update the current micro-ops, which will issue in the next cycle. - unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle); - CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps; - - // Decrement DependentLatency based on the next cycle. - if ((NextCycle - CurrCycle) > DependentLatency) - DependentLatency = 0; - else - DependentLatency -= (NextCycle - CurrCycle); - - if (!HazardRec->isEnabled()) { - // Bypass HazardRec virtual calls. - CurrCycle = NextCycle; - } - else { - // Bypass getHazardType calls in case of long latency. - for (; CurrCycle != NextCycle; ++CurrCycle) { - if (isTop()) - HazardRec->AdvanceCycle(); - else - HazardRec->RecedeCycle(); - } - } - CheckPending = true; - unsigned LFactor = SchedModel->getLatencyFactor(); - IsResourceLimited = - (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) - > (int)LFactor; - - DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); -} - -void GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx, - unsigned Count) { - ExecutedResCounts[PIdx] += Count; - if (ExecutedResCounts[PIdx] > MaxExecutedResCount) - MaxExecutedResCount = ExecutedResCounts[PIdx]; -} - -/// Add the given processor resource to this scheduled zone. -/// -/// \param Cycles indicates the number of consecutive (non-pipelined) cycles -/// during which this resource is consumed. -/// -/// \return the next cycle at which the instruction may execute without -/// oversubscribing resources. -unsigned GenericScheduler::SchedBoundary:: -countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) { - unsigned Factor = SchedModel->getResourceFactor(PIdx); - unsigned Count = Factor * Cycles; - DEBUG(dbgs() << " " << getResourceName(PIdx) - << " +" << Cycles << "x" << Factor << "u\n"); - - // Update Executed resources counts. - incExecutedResources(PIdx, Count); - assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); - Rem->RemainingCounts[PIdx] -= Count; - - // Check if this resource exceeds the current critical resource. If so, it - // becomes the critical resource. - if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) { - ZoneCritResIdx = PIdx; - DEBUG(dbgs() << " *** Critical resource " - << getResourceName(PIdx) << ": " - << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n"); - } - // TODO: We don't yet model reserved resources. It's not hard though. - return CurrCycle; -} - -/// Move the boundary of scheduled code by one SUnit. -void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) { - // Update the reservation table. - if (HazardRec->isEnabled()) { - if (!isTop() && SU->isCall) { - // Calls are scheduled with their preceding instructions. For bottom-up - // scheduling, clear the pipeline state before emitting. - HazardRec->Reset(); - } - HazardRec->EmitInstruction(SU); - } - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); - unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); - CurrMOps += IncMOps; - // checkHazard prevents scheduling multiple instructions per cycle that exceed - // issue width. However, we commonly reach the maximum. In this case - // opportunistically bump the cycle to avoid uselessly checking everything in - // the readyQ. Furthermore, a single instruction may produce more than one - // cycle's worth of micro-ops. - // - // TODO: Also check if this SU must end a dispatch group. - unsigned NextCycle = CurrCycle; - if (CurrMOps >= SchedModel->getIssueWidth()) { - ++NextCycle; - DEBUG(dbgs() << " *** Max MOps " << CurrMOps - << " at cycle " << CurrCycle << '\n'); - } - unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); - DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); - - switch (SchedModel->getMicroOpBufferSize()) { - case 0: - assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); - break; - case 1: - if (ReadyCycle > NextCycle) { - NextCycle = ReadyCycle; - DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n"); - } - break; - default: - // We don't currently model the OOO reorder buffer, so consider all - // scheduled MOps to be "retired". - break; - } - RetiredMOps += IncMOps; - - // Update resource counts and critical resource. - if (SchedModel->hasInstrSchedModel()) { - unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor(); - assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted"); - Rem->RemIssueCount -= DecRemIssue; - if (ZoneCritResIdx) { - // Scale scheduled micro-ops for comparing with the critical resource. - unsigned ScaledMOps = - RetiredMOps * SchedModel->getMicroOpFactor(); - - // If scaled micro-ops are now more than the previous critical resource by - // a full cycle, then micro-ops issue becomes critical. - if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx)) - >= (int)SchedModel->getLatencyFactor()) { - ZoneCritResIdx = 0; - DEBUG(dbgs() << " *** Critical resource NumMicroOps: " - << ScaledMOps / SchedModel->getLatencyFactor() << "c\n"); - } - } - for (TargetSchedModel::ProcResIter - PI = SchedModel->getWriteProcResBegin(SC), - PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { - unsigned RCycle = - countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle); - if (RCycle > NextCycle) - NextCycle = RCycle; - } - } - // Update ExpectedLatency and DependentLatency. - unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; - unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency; - if (SU->getDepth() > TopLatency) { - TopLatency = SU->getDepth(); - DEBUG(dbgs() << " " << Available.getName() - << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n"); - } - if (SU->getHeight() > BotLatency) { - BotLatency = SU->getHeight(); - DEBUG(dbgs() << " " << Available.getName() - << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n"); - } - // If we stall for any reason, bump the cycle. - if (NextCycle > CurrCycle) { - bumpCycle(NextCycle); - } - else { - // After updating ZoneCritResIdx and ExpectedLatency, check if we're - // resource limited. If a stall occured, bumpCycle does this. - unsigned LFactor = SchedModel->getLatencyFactor(); - IsResourceLimited = - (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) - > (int)LFactor; - } - DEBUG(dumpScheduledState()); -} - -/// Release pending ready nodes in to the available queue. This makes them -/// visible to heuristics. -void GenericScheduler::SchedBoundary::releasePending() { - // If the available queue is empty, it is safe to reset MinReadyCycle. - if (Available.empty()) - MinReadyCycle = UINT_MAX; - - // Check to see if any of the pending instructions are ready to issue. If - // so, add them to the available queue. - bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; - for (unsigned i = 0, e = Pending.size(); i != e; ++i) { - SUnit *SU = *(Pending.begin()+i); - unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; - - if (ReadyCycle < MinReadyCycle) - MinReadyCycle = ReadyCycle; - - if (!IsBuffered && ReadyCycle > CurrCycle) - continue; - - if (checkHazard(SU)) - continue; - - Available.push(SU); - Pending.remove(Pending.begin()+i); - --i; --e; - } - DEBUG(if (!Pending.empty()) Pending.dump()); - CheckPending = false; -} - -/// Remove SU from the ready set for this boundary. -void GenericScheduler::SchedBoundary::removeReady(SUnit *SU) { - if (Available.isInQueue(SU)) - Available.remove(Available.find(SU)); - else { - assert(Pending.isInQueue(SU) && "bad ready count"); - Pending.remove(Pending.find(SU)); - } -} - -/// If this queue only has one ready candidate, return it. As a side effect, -/// defer any nodes that now hit a hazard, and advance the cycle until at least -/// one node is ready. If multiple instructions are ready, return NULL. -SUnit *GenericScheduler::SchedBoundary::pickOnlyChoice() { - if (CheckPending) - releasePending(); - - if (CurrMOps > 0) { - // Defer any ready instrs that now have a hazard. - for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { - if (checkHazard(*I)) { - Pending.push(*I); - I = Available.remove(I); - continue; - } - ++I; - } - } - for (unsigned i = 0; Available.empty(); ++i) { - assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) && - "permanent hazard"); (void)i; - bumpCycle(CurrCycle + 1); - releasePending(); - } - if (Available.size() == 1) - return *Available.begin(); - return NULL; -} - -#ifndef NDEBUG -// This is useful information to dump after bumpNode. -// Note that the Queue contents are more useful before pickNodeFromQueue. -void GenericScheduler::SchedBoundary::dumpScheduledState() { - unsigned ResFactor; - unsigned ResCount; - if (ZoneCritResIdx) { - ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx); - ResCount = getResourceCount(ZoneCritResIdx); - } - else { - ResFactor = SchedModel->getMicroOpFactor(); - ResCount = RetiredMOps * SchedModel->getMicroOpFactor(); - } - unsigned LFactor = SchedModel->getLatencyFactor(); - dbgs() << Available.getName() << " @" << CurrCycle << "c\n" - << " Retired: " << RetiredMOps; - dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c"; - dbgs() << "\n Critical: " << ResCount / LFactor << "c, " - << ResCount / ResFactor << " " << getResourceName(ZoneCritResIdx) - << "\n ExpectedLatency: " << ExpectedLatency << "c\n" - << (IsResourceLimited ? " - Resource" : " - Latency") - << " limited.\n"; -} -#endif - -void GenericScheduler::SchedCandidate:: -initResourceDelta(const ScheduleDAGMI *DAG, - const TargetSchedModel *SchedModel) { - if (!Policy.ReduceResIdx && !Policy.DemandResIdx) - return; - - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); - for (TargetSchedModel::ProcResIter - PI = SchedModel->getWriteProcResBegin(SC), - PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { - if (PI->ProcResourceIdx == Policy.ReduceResIdx) - ResDelta.CritResources += PI->Cycles; - if (PI->ProcResourceIdx == Policy.DemandResIdx) - ResDelta.DemandedResources += PI->Cycles; - } -} - - -/// Return true if this heuristic determines order. -static bool tryLess(int TryVal, int CandVal, - GenericScheduler::SchedCandidate &TryCand, - GenericScheduler::SchedCandidate &Cand, - GenericScheduler::CandReason Reason) { - if (TryVal < CandVal) { - TryCand.Reason = Reason; - return true; - } - if (TryVal > CandVal) { - if (Cand.Reason > Reason) - Cand.Reason = Reason; - return true; - } - Cand.setRepeat(Reason); - return false; -} - -static bool tryGreater(int TryVal, int CandVal, - GenericScheduler::SchedCandidate &TryCand, - GenericScheduler::SchedCandidate &Cand, - GenericScheduler::CandReason Reason) { - if (TryVal > CandVal) { - TryCand.Reason = Reason; - return true; - } - if (TryVal < CandVal) { - if (Cand.Reason > Reason) - Cand.Reason = Reason; - return true; - } - Cand.setRepeat(Reason); - return false; -} - static bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, - GenericScheduler::SchedCandidate &TryCand, - GenericScheduler::SchedCandidate &Cand, - GenericScheduler::CandReason Reason) { + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { int TryRank = TryP.getPSetOrMax(); int CandRank = CandP.getPSetOrMax(); // If both candidates affect the same set, go with the smallest increase. @@ -2407,32 +2674,6 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) { return 0; } -static bool tryLatency(GenericScheduler::SchedCandidate &TryCand, - GenericScheduler::SchedCandidate &Cand, - GenericScheduler::SchedBoundary &Zone) { - if (Zone.isTop()) { - if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { - if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), - TryCand, Cand, GenericScheduler::TopDepthReduce)) - return true; - } - if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), - TryCand, Cand, GenericScheduler::TopPathReduce)) - return true; - } - else { - if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { - if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), - TryCand, Cand, GenericScheduler::BotHeightReduce)) - return true; - } - if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), - TryCand, Cand, GenericScheduler::BotPathReduce)) - return true; - } - return false; -} - /// Apply a set of heursitics to a new candidate. Heuristics are currently /// hierarchical. This may be more efficient than a graduated cost model because /// we don't need to evaluate all aspects of the model for each node in the @@ -2445,10 +2686,10 @@ static bool tryLatency(GenericScheduler::SchedCandidate &TryCand, /// \param RPTracker describes reg pressure within the scheduled zone. /// \param TempTracker is a scratch pressure tracker to reuse in queries. void GenericScheduler::tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand, - SchedBoundary &Zone, - const RegPressureTracker &RPTracker, - RegPressureTracker &TempTracker) { + SchedCandidate &TryCand, + SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + RegPressureTracker &TempTracker) { if (DAG->isTrackingPressure()) { // Always initialize TryCand's RPDelta. @@ -2510,10 +2751,15 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, // For loops that are acyclic path limited, aggressively schedule for latency. // This can result in very long dependence chains scheduled in sequence, so // once every cycle (when CurrMOps == 0), switch to normal heuristics. - if (Rem.IsAcyclicLatencyLimited && !Zone.CurrMOps + if (Rem.IsAcyclicLatencyLimited && !Zone.getCurrMOps() && tryLatency(TryCand, Cand, Zone)) return; + // Prioritize instructions that read unbuffered resources by stall cycles. + if (tryLess(Zone.getLatencyStallCycles(TryCand.SU), + Zone.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) + return; + // Keep clustered nodes together to encourage downstream peephole // optimizations which may reduce resource requirements. // @@ -2558,7 +2804,7 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, // Prefer immediate defs/users of the last scheduled instruction. This is a // local pressure avoidance strategy that also makes the machine code // readable. - if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU), + if (tryGreater(Zone.isNextSU(TryCand.SU), Zone.isNextSU(Cand.SU), TryCand, Cand, NextDefUse)) return; @@ -2569,90 +2815,14 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, } } -#ifndef NDEBUG -const char *GenericScheduler::getReasonStr( - GenericScheduler::CandReason Reason) { - switch (Reason) { - case NoCand: return "NOCAND "; - case PhysRegCopy: return "PREG-COPY"; - case RegExcess: return "REG-EXCESS"; - case RegCritical: return "REG-CRIT "; - case Cluster: return "CLUSTER "; - case Weak: return "WEAK "; - case RegMax: return "REG-MAX "; - case ResourceReduce: return "RES-REDUCE"; - case ResourceDemand: return "RES-DEMAND"; - case TopDepthReduce: return "TOP-DEPTH "; - case TopPathReduce: return "TOP-PATH "; - case BotHeightReduce:return "BOT-HEIGHT"; - case BotPathReduce: return "BOT-PATH "; - case NextDefUse: return "DEF-USE "; - case NodeOrder: return "ORDER "; - }; - llvm_unreachable("Unknown reason!"); -} - -void GenericScheduler::traceCandidate(const SchedCandidate &Cand) { - PressureChange P; - unsigned ResIdx = 0; - unsigned Latency = 0; - switch (Cand.Reason) { - default: - break; - case RegExcess: - P = Cand.RPDelta.Excess; - break; - case RegCritical: - P = Cand.RPDelta.CriticalMax; - break; - case RegMax: - P = Cand.RPDelta.CurrentMax; - break; - case ResourceReduce: - ResIdx = Cand.Policy.ReduceResIdx; - break; - case ResourceDemand: - ResIdx = Cand.Policy.DemandResIdx; - break; - case TopDepthReduce: - Latency = Cand.SU->getDepth(); - break; - case TopPathReduce: - Latency = Cand.SU->getHeight(); - break; - case BotHeightReduce: - Latency = Cand.SU->getHeight(); - break; - case BotPathReduce: - Latency = Cand.SU->getDepth(); - break; - } - dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); - if (P.isValid()) - dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) - << ":" << P.getUnitInc() << " "; - else - dbgs() << " "; - if (ResIdx) - dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; - else - dbgs() << " "; - if (Latency) - dbgs() << " " << Latency << " cycles "; - else - dbgs() << " "; - dbgs() << '\n'; -} -#endif - /// Pick the best candidate from the queue. /// /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during /// DAG building. To adjust for the current scheduling location we need to /// maintain the number of vreg uses remaining to be top-scheduled. void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, - const RegPressureTracker &RPTracker, - SchedCandidate &Cand) { + const RegPressureTracker &RPTracker, + SchedCandidate &Cand) { ReadyQueue &Q = Zone.Available; DEBUG(Q.dump()); @@ -2675,12 +2845,6 @@ void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, } } -static void tracePick(const GenericScheduler::SchedCandidate &Cand, - bool IsTop) { - DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") - << GenericScheduler::getReasonStr(Cand.Reason) << '\n'); -} - /// Pick the best candidate node from either the top or bottom queue. SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { // Schedule as far as possible in the direction of no choice. This is most @@ -2698,8 +2862,12 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { CandPolicy NoPolicy; SchedCandidate BotCand(NoPolicy); SchedCandidate TopCand(NoPolicy); - Bot.setPolicy(BotCand.Policy, Top); - Top.setPolicy(TopCand.Policy, Bot); + // Set the bottom-up policy based on the state of the current bottom zone and + // the instructions outside the zone, including the top zone. + setPolicy(BotCand.Policy, /*IsPostRA=*/false, Bot, &Top); + // Set the top-down policy based on the state of the current top zone and + // the instructions outside the zone, including the bottom zone. + setPolicy(TopCand.Policy, /*IsPostRA=*/false, Top, &Bot); // Prefer bottom scheduling when heuristics are silent. pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); @@ -2809,20 +2977,21 @@ void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { } /// Update the scheduler's state after scheduling a node. This is the same node -/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update -/// it's state based on the current cycle before MachineSchedStrategy does. +/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to +/// update it's state based on the current cycle before MachineSchedStrategy +/// does. /// /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling /// them here. See comments in biasPhysRegCopy. void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { - SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle); + SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); Top.bumpNode(SU); if (SU->hasPhysRegUses) reschedulePhysRegCopies(SU, true); } else { - SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.CurrCycle); + SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); Bot.bumpNode(SU); if (SU->hasPhysRegDefs) reschedulePhysRegCopies(SU, false); @@ -2831,8 +3000,8 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { /// Create the standard converging machine scheduler. This will be used as the /// default scheduler if the target does not set a default. -static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C) { - ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new GenericScheduler(C)); +static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) { + ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, new GenericScheduler(C)); // Register DAG post-processors. // // FIXME: extend the mutation API to allow earlier mutations to instantiate @@ -2845,9 +3014,191 @@ static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C) { DAG->addMutation(new MacroFusion(DAG->TII)); return DAG; } + static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", - createGenericSched); + createGenericSchedLive); + +//===----------------------------------------------------------------------===// +// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy. +//===----------------------------------------------------------------------===// + +namespace { +/// PostGenericScheduler - Interface to the scheduling algorithm used by +/// ScheduleDAGMI. +/// +/// Callbacks from ScheduleDAGMI: +/// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ... +class PostGenericScheduler : public GenericSchedulerBase { + ScheduleDAGMI *DAG; + SchedBoundary Top; + SmallVector BotRoots; +public: + PostGenericScheduler(const MachineSchedContext *C): + GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {} + + virtual ~PostGenericScheduler() {} + + virtual void initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) LLVM_OVERRIDE { + /* no configurable policy */ + }; + + /// PostRA scheduling does not track pressure. + virtual bool shouldTrackPressure() const LLVM_OVERRIDE { return false; } + + virtual void initialize(ScheduleDAGMI *Dag) LLVM_OVERRIDE { + DAG = Dag; + SchedModel = DAG->getSchedModel(); + TRI = DAG->TRI; + + Rem.init(DAG, SchedModel); + Top.init(DAG, SchedModel, &Rem); + BotRoots.clear(); + + // Initialize the HazardRecognizers. If itineraries don't exist, are empty, + // or are disabled, then these HazardRecs will be disabled. + const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); + const TargetMachine &TM = DAG->MF.getTarget(); + if (!Top.HazardRec) { + Top.HazardRec = + TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + } + } + + virtual void registerRoots() LLVM_OVERRIDE; + + virtual SUnit *pickNode(bool &IsTopNode) LLVM_OVERRIDE; + + virtual void scheduleTree(unsigned SubtreeID) LLVM_OVERRIDE { + llvm_unreachable("PostRA scheduler does not support subtree analysis."); + } + + virtual void schedNode(SUnit *SU, bool IsTopNode) LLVM_OVERRIDE; + + virtual void releaseTopNode(SUnit *SU) LLVM_OVERRIDE { + Top.releaseTopNode(SU); + } + + // Only called for roots. + virtual void releaseBottomNode(SUnit *SU) LLVM_OVERRIDE { + BotRoots.push_back(SU); + } + +protected: + void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); + + void pickNodeFromQueue(SchedCandidate &Cand); +}; +} // namespace + +void PostGenericScheduler::registerRoots() { + Rem.CriticalPath = DAG->ExitSU.getDepth(); + + // Some roots may not feed into ExitSU. Check all of them in case. + for (SmallVectorImpl::const_iterator + I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) { + if ((*I)->getDepth() > Rem.CriticalPath) + Rem.CriticalPath = (*I)->getDepth(); + } + DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); +} + +/// Apply a set of heursitics to a new candidate for PostRA scheduling. +/// +/// \param Cand provides the policy and current best candidate. +/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. +void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand) { + + // Initialize the candidate if needed. + if (!Cand.isValid()) { + TryCand.Reason = NodeOrder; + return; + } + + // Prioritize instructions that read unbuffered resources by stall cycles. + if (tryLess(Top.getLatencyStallCycles(TryCand.SU), + Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) + return; + + // Avoid critical resource consumption and balance the schedule. + if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, + TryCand, Cand, ResourceReduce)) + return; + if (tryGreater(TryCand.ResDelta.DemandedResources, + Cand.ResDelta.DemandedResources, + TryCand, Cand, ResourceDemand)) + return; + + // Avoid serializing long latency dependence chains. + if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { + return; + } + + // Fall through to original instruction order. + if (TryCand.SU->NodeNum < Cand.SU->NodeNum) + TryCand.Reason = NodeOrder; +} + +void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { + ReadyQueue &Q = Top.Available; + + DEBUG(Q.dump()); + + for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + SchedCandidate TryCand(Cand.Policy); + TryCand.SU = *I; + TryCand.initResourceDelta(DAG, SchedModel); + tryCandidate(Cand, TryCand); + if (TryCand.Reason != NoCand) { + Cand.setBest(TryCand); + DEBUG(traceCandidate(Cand)); + } + } +} + +/// Pick the next node to schedule. +SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) { + assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage"); + return NULL; + } + SUnit *SU; + do { + SU = Top.pickOnlyChoice(); + if (!SU) { + CandPolicy NoPolicy; + SchedCandidate TopCand(NoPolicy); + // Set the top-down policy based on the state of the current top zone and + // the instructions outside the zone, including the bottom zone. + setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, NULL); + pickNodeFromQueue(TopCand); + assert(TopCand.Reason != NoCand && "failed to find a candidate"); + tracePick(TopCand, true); + SU = TopCand.SU; + } + } while (SU->isScheduled); + + IsTopNode = true; + Top.removeReady(SU); + + DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); + return SU; +} + +/// Called after ScheduleDAGMI has scheduled an instruction and updated +/// scheduled/remaining flags in the DAG nodes. +void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { + SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); + Top.bumpNode(SU); +} + +/// Create a generic scheduler with no vreg liveness or DAG mutation passes. +static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) { + return new ScheduleDAGMI(C, new PostGenericScheduler(C), /*IsPostRA=*/true); +} //===----------------------------------------------------------------------===// // ILP Scheduler. Currently for experimental analysis of heuristics. @@ -2889,7 +3240,7 @@ struct ILPOrder { /// \brief Schedule based on the ILP metric. class ILPScheduler : public MachineSchedStrategy { - ScheduleDAGMI *DAG; + ScheduleDAGMILive *DAG; ILPOrder Cmp; std::vector ReadyQ; @@ -2897,7 +3248,8 @@ public: ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {} virtual void initialize(ScheduleDAGMI *dag) { - DAG = dag; + assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness"); + DAG = static_cast(dag); DAG->computeDFSResult(); Cmp.DFSResult = DAG->getDFSResult(); Cmp.ScheduledTrees = &DAG->getScheduledTrees(); @@ -2949,10 +3301,10 @@ public: } // namespace static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { - return new ScheduleDAGMI(C, new ILPScheduler(true)); + return new ScheduleDAGMILive(C, new ILPScheduler(true)); } static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { - return new ScheduleDAGMI(C, new ILPScheduler(false)); + return new ScheduleDAGMILive(C, new ILPScheduler(false)); } static MachineSchedRegistry ILPMaxRegistry( "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); @@ -2994,7 +3346,7 @@ public: InstructionShuffler(bool alternate, bool topdown) : IsAlternating(alternate), IsTopDown(topdown) {} - virtual void initialize(ScheduleDAGMI *) { + virtual void initialize(ScheduleDAGMI*) { TopQ.clear(); BottomQ.clear(); } @@ -3041,7 +3393,7 @@ static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { bool TopDown = !ForceBottomUp; assert((TopDown || !ForceTopDown) && "-misched-topdown incompatible with -misched-bottomup"); - return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown)); + return new ScheduleDAGMILive(C, new InstructionShuffler(Alternate, TopDown)); } static MachineSchedRegistry ShufflerRegistry( "shuffle", "Shuffle machine instructions alternating directions", @@ -3049,7 +3401,7 @@ static MachineSchedRegistry ShufflerRegistry( #endif // !NDEBUG //===----------------------------------------------------------------------===// -// GraphWriter support for ScheduleDAGMI. +// GraphWriter support for ScheduleDAGMILive. //===----------------------------------------------------------------------===// #ifndef NDEBUG @@ -3095,8 +3447,9 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { std::string Str; raw_string_ostream SS(Str); - const SchedDFSResult *DFS = - static_cast(G)->getDFSResult(); + const ScheduleDAGMI *DAG = static_cast(G); + const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? + static_cast(G)->getDFSResult() : 0; SS << "SU:" << SU->NodeNum; if (DFS) SS << " I:" << DFS->getNumInstrs(SU); @@ -3106,11 +3459,11 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { return G->getGraphNodeLabel(SU); } - static std::string getNodeAttributes(const SUnit *N, - const ScheduleDAG *Graph) { + static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) { std::string Str("shape=Mrecord"); - const SchedDFSResult *DFS = - static_cast(Graph)->getDFSResult(); + const ScheduleDAGMI *DAG = static_cast(G); + const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? + static_cast(G)->getDFSResult() : 0; if (DFS) { Str += ",style=filled,fillcolor=\"#"; Str += DOT::getColorString(DFS->getSubtreeID(N)); diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/Passes.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/Passes.cpp index f4ffd03ec3ea..fed5a4aaca56 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/Passes.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/Passes.cpp @@ -30,6 +30,11 @@ using namespace llvm; +namespace llvm { +extern cl::opt EnableStackMapLiveness; +extern cl::opt EnablePatchPointLiveness; +} + static cl::opt DisablePostRA("disable-post-ra", cl::Hidden, cl::desc("Disable Post Regalloc")); static cl::opt DisableBranchFold("disable-branch-fold", cl::Hidden, @@ -83,6 +88,14 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"), cl::value_desc("pass-name"), cl::init("option-unspecified")); +// Temporary option to allow experimenting with MachineScheduler as a post-RA +// scheduler. Targets can "properly" enable this with +// substitutePass(&PostRASchedulerID, &MachineSchedulerID); Ideally it wouldn't +// be part of the standard pass pipeline, and the target would just add a PostRA +// scheduling pass wherever it wants. +static cl::opt MISchedPostRA("misched-postra", cl::Hidden, + cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)")); + // Experimental option to run live interval analysis early. static cl::opt EarlyLiveIntervals("early-live-intervals", cl::Hidden, cl::desc("Run live interval analysis earlier in the pipeline")); @@ -422,10 +435,10 @@ void TargetPassConfig::addCodeGenPrepare() { /// Add common passes that perform LLVM IR to IR transforms in preparation for /// instruction selection. void TargetPassConfig::addISelPrepare() { - addPass(createStackProtectorPass(TM)); - addPreISel(); + addPass(createStackProtectorPass(TM)); + if (PrintISelInput) addPass(createPrintFunctionPass("\n\n" "*** Final LLVM Code input to ISel ***\n", @@ -520,7 +533,10 @@ void TargetPassConfig::addMachinePasses() { // Second pass scheduler. if (getOptLevel() != CodeGenOpt::None) { - addPass(&PostRASchedulerID); + if (MISchedPostRA) + addPass(&PostMachineSchedulerID); + else + addPass(&PostRASchedulerID); printAndVerify("After PostRAScheduler"); } @@ -536,6 +552,9 @@ void TargetPassConfig::addMachinePasses() { if (addPreEmitPass()) printAndVerify("After PreEmit passes"); + + if (EnableStackMapLiveness || EnablePatchPointLiveness) + addPass(&StackMapLivenessID); } /// Add passes that optimize machine instructions in SSA form. @@ -725,7 +744,10 @@ void TargetPassConfig::addMachineLateOptimization() { printAndVerify("After BranchFolding"); // Tail duplication. - if (addPass(&TailDuplicateID)) + // Note that duplicating tail just increases code size and degrades + // performance for targets that require Structured Control Flow. + // In addition it can also make CFG irreducible. Thus we disable it. + if (!TM->requiresStructuredCFG() && addPass(&TailDuplicateID)) printAndVerify("After TailDuplicate"); // Copy propagation. diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/PostRASchedulerList.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/PostRASchedulerList.cpp index 1afc1eca00a9..859643f9b8ec 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -30,7 +30,6 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" @@ -121,9 +120,6 @@ namespace { /// AA - AliasAnalysis for making memory reference queries. AliasAnalysis *AA; - /// LiveRegs - true if the register is live. - BitVector LiveRegs; - /// The schedule. Null SUnit*'s represent noop instructions. std::vector Sequence; @@ -174,11 +170,6 @@ namespace { /// void finishBlock(); - /// FixupKills - Fix register kill flags that have been made - /// invalid due to scheduling - /// - void FixupKills(MachineBasicBlock *MBB); - private: void ReleaseSucc(SUnit *SU, SDep *SuccEdge); void ReleaseSuccessors(SUnit *SU); @@ -186,12 +177,8 @@ namespace { void ListScheduleTopDown(); void StartBlockForKills(MachineBasicBlock *BB); - // ToggleKillFlag - Toggle a register operand kill flag. Other - // adjustments may be made to the instruction if necessary. Return - // true if the operand has been deleted, false if not. - bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO); - void dumpSchedule() const; + void emitNoop(unsigned CurCycle); }; } @@ -205,9 +192,8 @@ SchedulePostRATDList::SchedulePostRATDList( AliasAnalysis *AA, const RegisterClassInfo &RCI, TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, SmallVectorImpl &CriticalPathRCs) - : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA), - LiveRegs(TRI->getNumRegs()), EndIndex(0) -{ + : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA), EndIndex(0) { + const TargetMachine &TM = MF.getTarget(); const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); HazardRec = @@ -352,7 +338,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { Scheduler.finishBlock(); // Update register kills - Scheduler.FixupKills(MBB); + Scheduler.fixupKills(MBB); } return true; @@ -423,148 +409,6 @@ void SchedulePostRATDList::finishBlock() { ScheduleDAGInstrs::finishBlock(); } -/// StartBlockForKills - Initialize register live-range state for updating kills -/// -void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { - // Start with no live registers. - LiveRegs.reset(); - - // Examine the live-in regs of all successors. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) { - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), - E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; - // Repeat, for reg and all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - LiveRegs.set(*SubRegs); - } - } -} - -bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, - MachineOperand &MO) { - // Setting kill flag... - if (!MO.isKill()) { - MO.setIsKill(true); - return false; - } - - // If MO itself is live, clear the kill flag... - if (LiveRegs.test(MO.getReg())) { - MO.setIsKill(false); - return false; - } - - // If any subreg of MO is live, then create an imp-def for that - // subreg and keep MO marked as killed. - MO.setIsKill(false); - bool AllDead = true; - const unsigned SuperReg = MO.getReg(); - MachineInstrBuilder MIB(MF, MI); - for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) { - if (LiveRegs.test(*SubRegs)) { - MIB.addReg(*SubRegs, RegState::ImplicitDefine); - AllDead = false; - } - } - - if(AllDead) - MO.setIsKill(true); - return false; -} - -/// FixupKills - Fix the register kill flags, they may have been made -/// incorrect by instruction reordering. -/// -void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { - DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); - - BitVector killedRegs(TRI->getNumRegs()); - - StartBlockForKills(MBB); - - // Examine block from end to start... - unsigned Count = MBB->size(); - for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); - I != E; --Count) { - MachineInstr *MI = --I; - if (MI->isDebugValue()) - continue; - - // Update liveness. Registers that are defed but not used in this - // instruction are now dead. Mark register and all subregs as they - // are completely defined. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isRegMask()) - LiveRegs.clearBitsNotInMask(MO.getRegMask()); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) continue; - if (!MO.isDef()) continue; - // Ignore two-addr defs. - if (MI->isRegTiedToUseOperand(i)) continue; - - // Repeat for reg and all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - LiveRegs.reset(*SubRegs); - } - - // Examine all used registers and set/clear kill flag. When a - // register is used multiple times we only set the kill flag on - // the first use. Don't set kill flags on undef operands. - killedRegs.reset(); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; - unsigned Reg = MO.getReg(); - if ((Reg == 0) || MRI.isReserved(Reg)) continue; - - bool kill = false; - if (!killedRegs.test(Reg)) { - kill = true; - // A register is not killed if any subregs are live... - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { - if (LiveRegs.test(*SubRegs)) { - kill = false; - break; - } - } - - // If subreg is not live, then register is killed if it became - // live in this instruction - if (kill) - kill = !LiveRegs.test(Reg); - } - - if (MO.isKill() != kill) { - DEBUG(dbgs() << "Fixing " << MO << " in "); - // Warning: ToggleKillFlag may invalidate MO. - ToggleKillFlag(MI, MO); - DEBUG(MI->dump()); - } - - killedRegs.set(Reg); - } - - // Mark any used register (that is not using undef) and subregs as - // now live... - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; - unsigned Reg = MO.getReg(); - if ((Reg == 0) || MRI.isReserved(Reg)) continue; - - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - LiveRegs.set(*SubRegs); - } - } -} - //===----------------------------------------------------------------------===// // Top-Down Scheduling //===----------------------------------------------------------------------===// @@ -630,6 +474,14 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { AvailableQueue.scheduledNode(SU); } +/// emitNoop - Add a noop to the current instruction sequence. +void SchedulePostRATDList::emitNoop(unsigned CurCycle) { + DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n'); + HazardRec->EmitNoop(); + Sequence.push_back(0); // NULL here means noop + ++NumNoops; +} + /// ListScheduleTopDown - The main loop of list scheduling for top-down /// schedulers. void SchedulePostRATDList::ListScheduleTopDown() { @@ -678,7 +530,7 @@ void SchedulePostRATDList::ListScheduleTopDown() { DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this)); - SUnit *FoundSUnit = 0; + SUnit *FoundSUnit = 0, *NotPreferredSUnit = 0; bool HasNoopHazards = false; while (!AvailableQueue.empty()) { SUnit *CurSUnit = AvailableQueue.pop(); @@ -686,8 +538,19 @@ void SchedulePostRATDList::ListScheduleTopDown() { ScheduleHazardRecognizer::HazardType HT = HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); if (HT == ScheduleHazardRecognizer::NoHazard) { - FoundSUnit = CurSUnit; - break; + if (HazardRec->ShouldPreferAnother(CurSUnit)) { + if (!NotPreferredSUnit) { + // If this is the first non-preferred node for this cycle, then + // record it and continue searching for a preferred node. If this + // is not the first non-preferred node, then treat it as though + // there had been a hazard. + NotPreferredSUnit = CurSUnit; + continue; + } + } else { + FoundSUnit = CurSUnit; + break; + } } // Remember if this is a noop hazard. @@ -696,6 +559,20 @@ void SchedulePostRATDList::ListScheduleTopDown() { NotReady.push_back(CurSUnit); } + // If we have a non-preferred node, push it back onto the available list. + // If we did not find a preferred node, then schedule this first + // non-preferred node. + if (NotPreferredSUnit) { + if (!FoundSUnit) { + DEBUG(dbgs() << "*** Will schedule a non-preferred instruction...\n"); + FoundSUnit = NotPreferredSUnit; + } else { + AvailableQueue.push(NotPreferredSUnit); + } + + NotPreferredSUnit = 0; + } + // Add the nodes that aren't ready back onto the available list. if (!NotReady.empty()) { AvailableQueue.push_all(NotReady); @@ -704,6 +581,11 @@ void SchedulePostRATDList::ListScheduleTopDown() { // If we found a node to schedule... if (FoundSUnit) { + // If we need to emit noops prior to this instruction, then do so. + unsigned NumPreNoops = HazardRec->PreEmitNoops(FoundSUnit); + for (unsigned i = 0; i != NumPreNoops; ++i) + emitNoop(CurCycle); + // ... schedule the node... ScheduleNodeTopDown(FoundSUnit, CurCycle); HazardRec->EmitInstruction(FoundSUnit); @@ -728,10 +610,7 @@ void SchedulePostRATDList::ListScheduleTopDown() { // Otherwise, we have no instructions to issue and we have instructions // that will fault if we don't do this right. This is the case for // processors without pipeline interlocks and other cases. - DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n'); - HazardRec->EmitNoop(); - Sequence.push_back(0); // NULL here means noop - ++NumNoops; + emitNoop(CurCycle); } ++CurCycle; diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/PrologEpilogInserter.cpp index b0e494ffcdc0..6a43efbec7ea 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -20,6 +20,7 @@ #include "PrologEpilogInserter.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineDominators.h" @@ -29,7 +30,10 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -54,6 +58,7 @@ INITIALIZE_PASS_BEGIN(PEI, "prologepilog", "Prologue/Epilogue Insertion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(StackProtector) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_END(PEI, "prologepilog", "Prologue/Epilogue Insertion & Frame Finalization", @@ -67,6 +72,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addPreserved(); AU.addPreserved(); + AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -95,6 +101,9 @@ void PEI::calculateSets(MachineFunction &Fn) { return; } +/// StackObjSet - A set of stack object indexes +typedef SmallSetVector StackObjSet; + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. /// @@ -160,10 +169,11 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // Warn on stack size when we exceeds the given limit. MachineFrameInfo *MFI = Fn.getFrameInfo(); - if (WarnStackSize.getNumOccurrences() > 0 && - WarnStackSize < MFI->getStackSize()) - errs() << "warning: Stack size limit exceeded (" << MFI->getStackSize() - << ") in " << Fn.getName() << ".\n"; + uint64_t StackSize = MFI->getStackSize(); + if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) { + DiagnosticInfoStackSize DiagStackSize(*F, StackSize); + F->getContext().diagnose(DiagStackSize); + } delete RS; ReturnBlocks.clear(); @@ -409,11 +419,28 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, } } +/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., +/// those required to be close to the Stack Protector) to stack offsets. +static void +AssignProtectedObjSet(const StackObjSet &UnassignedObjs, + SmallSet &ProtectedObjs, + MachineFrameInfo *MFI, bool StackGrowsDown, + int64_t &Offset, unsigned &MaxAlign) { + + for (StackObjSet::const_iterator I = UnassignedObjs.begin(), + E = UnassignedObjs.end(); I != E; ++I) { + int i = *I; + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); + ProtectedObjs.insert(i); + } +} + /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the /// abstract stack objects. /// void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); + StackProtector *SP = &getAnalysis(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; @@ -523,8 +550,9 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure that the stack protector comes before the local variables on the // stack. - SmallSet LargeStackObjs; + SmallSet ProtectedObjs; if (MFI->getStackProtectorIndex() >= 0) { + StackObjSet LargeArrayObjs; AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, Offset, MaxAlign); @@ -541,12 +569,21 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (MFI->getStackProtectorIndex() == (int)i) continue; - if (!MFI->MayNeedStackProtector(i)) - continue; - AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); - LargeStackObjs.insert(i); + switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) { + case StackProtector::SSPLK_None: + case StackProtector::SSPLK_SmallArray: + case StackProtector::SSPLK_AddrOf: + continue; + case StackProtector::SSPLK_LargeArray: + LargeArrayObjs.insert(i); + continue; + } + llvm_unreachable("Unexpected SSPLayoutKind."); } + + AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign); } // Then assign frame offsets to stack objects that are not used to spill @@ -563,7 +600,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (MFI->getStackProtectorIndex() == (int)i) continue; - if (LargeStackObjs.count(i)) + if (ProtectedObjs.count(i)) continue; AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); @@ -731,15 +768,18 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, // Frame indicies in debug values are encoded in a target independent // way with simply the frame index and offset rather than any // target-specific addressing mode. - if (MI->isDebugValue()) { - assert(i == 0 && "Frame indicies can only appear as the first " - "operand of a DBG_VALUE machine instruction"); + if (MI->isDebugValue() || + MI->getOpcode() == TargetOpcode::STACKMAP || + MI->getOpcode() == TargetOpcode::PATCHPOINT) { + assert((!MI->isDebugValue() || i == 0) && + "Frame indicies can only appear as the first operand of a " + "DBG_VALUE machine instruction"); unsigned Reg; - MachineOperand &Offset = MI->getOperand(1); + MachineOperand &Offset = MI->getOperand(i + 1); Offset.setImm(Offset.getImm() + TFI->getFrameIndexReference( - Fn, MI->getOperand(0).getIndex(), Reg)); - MI->getOperand(0).ChangeToRegister(Reg, false /*isDef*/); + Fn, MI->getOperand(i).getIndex(), Reg)); + MI->getOperand(i).ChangeToRegister(Reg, false /*isDef*/); continue; } diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/RegAllocBase.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/RegAllocBase.cpp index 293e306a2913..2e433811f44a 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/RegAllocBase.cpp @@ -101,8 +101,8 @@ void RegAllocBase::allocatePhysRegs() { // register if possible and populate a list of new live intervals that // result from splitting. DEBUG(dbgs() << "\nselectOrSplit " - << MRI->getRegClass(VirtReg->reg)->getName() - << ':' << *VirtReg << '\n'); + << MRI->getRegClass(VirtReg->reg)->getName() + << ':' << *VirtReg << " w=" << VirtReg->weight << '\n'); typedef SmallVector VirtRegVec; VirtRegVec SplitVRegs; unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/RegAllocGreedy.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/RegAllocGreedy.cpp index c08d95554060..50b7553ad16d 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -35,6 +35,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/PassAnalysisSupport.h" #include "llvm/Support/CommandLine.h" @@ -70,6 +71,11 @@ class RAGreedy : public MachineFunctionPass, // context MachineFunction *MF; + // Shortcuts to some useful interface. + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + RegisterClassInfo RCI; + // analyses SlotIndexes *Indexes; MachineBlockFrequencyInfo *MBFI; @@ -160,10 +166,14 @@ class RAGreedy : public MachineFunctionPass, unsigned BrokenHints; ///< Total number of broken hints. float MaxWeight; ///< Maximum spill weight evicted. - EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {} + EvictionCost(): BrokenHints(0), MaxWeight(0) {} bool isMax() const { return BrokenHints == ~0u; } + void setMax() { BrokenHints = ~0u; } + + void setBrokenHints(unsigned NHints) { BrokenHints = NHints; } + bool operator<(const EvictionCost &O) const { if (BrokenHints != O.BrokenHints) return BrokenHints < O.BrokenHints; @@ -217,7 +227,7 @@ class RAGreedy : public MachineFunctionPass, } }; - /// Candidate info for for each PhysReg in AllocationOrder. + /// Candidate info for each PhysReg in AllocationOrder. /// This vector never shrinks, but grows to the size of the largest register /// class. SmallVector GlobalCand; @@ -419,7 +429,14 @@ void RAGreedy::enqueue(LiveInterval *LI) { // Allocate original local ranges in linear instruction order. Since they // are singly defined, this produces optimal coloring in the absence of // global interference and other constraints. - Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); + if (!TRI->reverseLocalAssignment()) + Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); + else { + // Allocating bottom up may allow many short LRGs to be assigned first + // to one of the cheap registers. This could be much faster for very + // large blocks on targets with many physical registers. + Prio = Indexes->getZeroIndex().getInstrDistance(LI->beginIndex()); + } } else { // Allocate global and split ranges in long->short order. Long ranges that @@ -471,7 +488,8 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) if (Order.isHint(Hint)) { DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); - EvictionCost MaxCost(1); + EvictionCost MaxCost; + MaxCost.setBrokenHints(1); if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { evictInterference(VirtReg, Hint, NewVRegs); return Hint; @@ -543,7 +561,11 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, if (CanSplit && IsHint && !BreaksHint) return true; - return A.weight > B.weight; + if (A.weight > B.weight) { + DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n'); + return true; + } + return false; } /// canEvictInterference - Return true if all interferences between VirtReg and @@ -618,6 +640,9 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, return false; if (Urgent) continue; + // Apply the eviction policy for non-urgent evictions. + if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) + return false; // If !MaxCost.isMax(), then we're just looking for a cheap register. // Evicting another local live range in this case could lead to suboptimal // coloring. @@ -625,9 +650,6 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, !canReassign(*Intf, PhysReg)) { return false; } - // Finally, apply the eviction policy for non-urgent evictions. - if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) - return false; } } MaxCost = Cost; @@ -685,7 +707,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); // Keep track of the cheapest interference seen so far. - EvictionCost BestCost(~0u); + EvictionCost BestCost; + BestCost.setMax(); unsigned BestPhys = 0; unsigned OrderLimit = Order.getOrder().size(); @@ -713,7 +736,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, } Order.rewind(); - while (unsigned PhysReg = Order.nextWithDups(OrderLimit)) { + while (unsigned PhysReg = Order.next(OrderLimit)) { if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) continue; // The first use of a callee-saved register in a function has cost 1. @@ -1186,7 +1209,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, // No benefit from the compact region, our fallback will be per-block // splitting. Make sure we find a solution that is cheaper than spilling. BestCost = calcSpillCost(); - DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); + DEBUG(dbgs() << "Cost of isolating all blocks = "; + MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); } Order.rewind(); @@ -1220,7 +1244,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); continue; } - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost); + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = "; + MBFI->printBlockFreq(dbgs(), Cost)); if (Cost >= BestCost) { DEBUG({ if (BestCand == NoCand) @@ -1243,7 +1268,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, Cost += calcGlobalSplitCost(Cand); DEBUG({ - dbgs() << ", total = " << Cost << " with bundles"; + dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) + << " with bundles"; for (int i = Cand.LiveBundles.find_first(); i>=0; i = Cand.LiveBundles.find_next(i)) dbgs() << " EB#" << i; @@ -1348,6 +1374,22 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Per-Instruction Splitting //===----------------------------------------------------------------------===// +/// Get the number of allocatable registers that match the constraints of \p Reg +/// on \p MI and that are also in \p SuperRC. +static unsigned getNumAllocatableRegsForConstraints( + const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC, + const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, + const RegisterClassInfo &RCI) { + assert(SuperRC && "Invalid register class"); + + const TargetRegisterClass *ConstrainedRC = + MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI, + /* ExploreBundle */ true); + if (!ConstrainedRC) + return 0; + return RCI.getNumAllocatableRegs(ConstrainedRC); +} + /// tryInstructionSplit - Split a live range around individual instructions. /// This is normally not worthwhile since the spiller is doing essentially the /// same thing. However, when the live range is in a constrained register @@ -1358,8 +1400,9 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl &NewVRegs) { + const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); // There is no point to this if there are no larger sub-classes. - if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg))) + if (!RegClassInfo.isProperSubClass(CurRC)) return 0; // Always enable split spill mode, since we're effectively spilling to a @@ -1373,10 +1416,18 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); - // Split around every non-copy instruction. + const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC); + unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); + // Split around every non-copy instruction if this split will relax + // the constraints on the virtual register. + // Otherwise, splitting just inserts uncoalescable copies that do not help + // the allocation. for (unsigned i = 0; i != Uses.size(); ++i) { if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) - if (MI->isFullCopy()) { + if (MI->isFullCopy() || + SuperRCNumAllocatableRegs == + getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII, + TRI, RCI)) { DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); continue; } @@ -1571,7 +1622,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() * - (1.0f / BlockFrequency::getEntryFrequency()); + (1.0f / MBFI->getEntryFreq()); SmallVector GapWeight; Order.rewind(); @@ -1823,6 +1874,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { << "********** Function: " << mf.getName() << '\n'); MF = &mf; + TRI = MF->getTarget().getRegisterInfo(); + TII = MF->getTarget().getInstrInfo(); + RCI.runOnMachineFunction(mf); if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/RegAllocPBQP.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/RegAllocPBQP.cpp index 88c8201fd4b2..483f2e1ae860 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -348,8 +348,7 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, // value plucked randomly out of the air. PBQP::PBQPNum cBenefit = - copyFactor * LiveIntervals::getSpillWeight(false, true, - mbfi->getBlockFreq(mbb)); + copyFactor * LiveIntervals::getSpillWeight(false, true, mbfi, mi); if (cp.isPhys()) { if (!mf->getRegInfo().isAllocatable(dst)) { diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/RegisterClassInfo.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/RegisterClassInfo.cpp index cacd7de4bbf2..aa844466496f 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/RegisterClassInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// // // This file implements the RegisterClassInfo class which provides dynamic -// information about target register classes. Callee saved and reserved -// registers depends on calling conventions and other dynamic information, so -// some things cannot be determined statically. +// information about target register classes. Callee-saved vs. caller-saved and +// reserved registers depend on calling conventions and other dynamic +// information, so some things cannot be determined statically. // //===----------------------------------------------------------------------===// diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 7f1f9c4e7be1..703ae3d9cc58 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" @@ -48,9 +49,11 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo &mli, const MachineDominatorTree &mdt, bool IsPostRAFlag, + bool RemoveKillFlags, LiveIntervals *lis) : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis), - IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) { + IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags), + CanHandleTerminators(false), FirstDbgValue(0) { assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); assert(!(IsPostRA && MRI.getNumVirtRegs()) && @@ -284,8 +287,8 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { /// this SUnit to following instructions in the same scheduling region that /// depend the physical register referenced at OperIdx. void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { - const MachineInstr *MI = SU->getInstr(); - const MachineOperand &MO = MI->getOperand(OperIdx); + MachineInstr *MI = SU->getInstr(); + MachineOperand &MO = MI->getOperand(OperIdx); // Optionally add output and anti dependencies. For anti // dependencies we use a latency of 0 because for a multi-issue @@ -323,6 +326,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // retrieve the existing SUnits list for this register's uses. // Push this SUnit on the use list. Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg())); + if (RemoveKillFlags) + MO.setIsKill(false); } else { addPhysRegDataDeps(SU, OperIdx); @@ -687,10 +692,32 @@ void ScheduleDAGInstrs::initSUnits() { // Assign the Latency field of SU using target-provided information. SU->Latency = SchedModel.computeInstrLatency(SU->getInstr()); + + // If this SUnit uses an unbuffered resource, mark it as such. + // These resources are used for in-order execution pipelines within an + // out-of-order core and are identified by BufferSize=1. BufferSize=0 is + // used for dispatch/issue groups and is not considered here. + if (SchedModel.hasInstrSchedModel()) { + const MCSchedClassDesc *SC = getSchedClass(SU); + for (TargetSchedModel::ProcResIter + PI = SchedModel.getWriteProcResBegin(SC), + PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { + switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) { + case 0: + SU->hasReservedResource = true; + break; + case 1: + SU->isUnbuffered = true; + break; + default: + break; + } + } + } } } -/// If RegPressure is non null, compute register pressure as a side effect. The +/// If RegPressure is non-null, compute register pressure as a side effect. The /// DAG builder is an efficient place to do it because it already visits /// operands. void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, @@ -999,6 +1026,145 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, PendingLoads.clear(); } +/// \brief Initialize register live-range state for updating kills. +void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) { + // Start with no live registers. + LiveRegs.reset(); + + // Examine the live-in regs of all successors. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) { + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { + unsigned Reg = *I; + // Repeat, for reg and all subregs. + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + LiveRegs.set(*SubRegs); + } + } +} + +bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) { + // Setting kill flag... + if (!MO.isKill()) { + MO.setIsKill(true); + return false; + } + + // If MO itself is live, clear the kill flag... + if (LiveRegs.test(MO.getReg())) { + MO.setIsKill(false); + return false; + } + + // If any subreg of MO is live, then create an imp-def for that + // subreg and keep MO marked as killed. + MO.setIsKill(false); + bool AllDead = true; + const unsigned SuperReg = MO.getReg(); + MachineInstrBuilder MIB(MF, MI); + for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) { + if (LiveRegs.test(*SubRegs)) { + MIB.addReg(*SubRegs, RegState::ImplicitDefine); + AllDead = false; + } + } + + if(AllDead) + MO.setIsKill(true); + return false; +} + +// FIXME: Reuse the LivePhysRegs utility for this. +void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); + + LiveRegs.resize(TRI->getNumRegs()); + BitVector killedRegs(TRI->getNumRegs()); + + startBlockForKills(MBB); + + // Examine block from end to start... + unsigned Count = MBB->size(); + for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); + I != E; --Count) { + MachineInstr *MI = --I; + if (MI->isDebugValue()) + continue; + + // Update liveness. Registers that are defed but not used in this + // instruction are now dead. Mark register and all subregs as they + // are completely defined. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) + LiveRegs.clearBitsNotInMask(MO.getRegMask()); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isDef()) continue; + // Ignore two-addr defs. + if (MI->isRegTiedToUseOperand(i)) continue; + + // Repeat for reg and all subregs. + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + LiveRegs.reset(*SubRegs); + } + + // Examine all used registers and set/clear kill flag. When a + // register is used multiple times we only set the kill flag on + // the first use. Don't set kill flags on undef operands. + killedRegs.reset(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; + unsigned Reg = MO.getReg(); + if ((Reg == 0) || MRI.isReserved(Reg)) continue; + + bool kill = false; + if (!killedRegs.test(Reg)) { + kill = true; + // A register is not killed if any subregs are live... + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + if (LiveRegs.test(*SubRegs)) { + kill = false; + break; + } + } + + // If subreg is not live, then register is killed if it became + // live in this instruction + if (kill) + kill = !LiveRegs.test(Reg); + } + + if (MO.isKill() != kill) { + DEBUG(dbgs() << "Fixing " << MO << " in "); + // Warning: toggleKillFlag may invalidate MO. + toggleKillFlag(MI, MO); + DEBUG(MI->dump()); + } + + killedRegs.set(Reg); + } + + // Mark any used register (that is not using undef) and subregs as + // now live... + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; + unsigned Reg = MO.getReg(); + if ((Reg == 0) || MRI.isReserved(Reg)) continue; + + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + LiveRegs.set(*SubRegs); + } + } +} + void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) SU->getInstr()->dump(); diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7dab11a7e934..3b87922b7897 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3342,6 +3342,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { unsigned OpSizeInBits = VT.getSizeInBits(); SDValue LHSShiftArg = LHSShift.getOperand(0); SDValue LHSShiftAmt = LHSShift.getOperand(1); + SDValue RHSShiftArg = RHSShift.getOperand(0); SDValue RHSShiftAmt = RHSShift.getOperand(1); // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) @@ -3401,10 +3402,32 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> // (rotr x, (sub 32, y)) if (ConstantSDNode *SUBC = - dyn_cast(RExtOp0.getOperand(0))) - if (SUBC->getAPIntValue() == OpSizeInBits) + dyn_cast(RExtOp0.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); + } else if (LHSShiftArg.getOpcode() == ISD::ZERO_EXTEND || + LHSShiftArg.getOpcode() == ISD::ANY_EXTEND) { + // fold (or (shl (*ext x), (*ext y)), + // (srl (*ext x), (*ext (sub 32, y)))) -> + // (*ext (rotl x, y)) + // fold (or (shl (*ext x), (*ext y)), + // (srl (*ext x), (*ext (sub 32, y)))) -> + // (*ext (rotr x, (sub 32, y))) + SDValue LArgExtOp0 = LHSShiftArg.getOperand(0); + EVT LArgVT = LArgExtOp0.getValueType(); + bool HasROTRWithLArg = TLI.isOperationLegalOrCustom(ISD::ROTR, LArgVT); + bool HasROTLWithLArg = TLI.isOperationLegalOrCustom(ISD::ROTL, LArgVT); + if (HasROTRWithLArg || HasROTLWithLArg) { + if (LArgVT.getSizeInBits() == SUBC->getAPIntValue()) { + SDValue V = + DAG.getNode(HasROTLWithLArg ? ISD::ROTL : ISD::ROTR, DL, LArgVT, + LArgExtOp0, HasROTL ? LHSShiftAmt : RHSShiftAmt); + return DAG.getNode(LHSShiftArg.getOpcode(), DL, VT, V).getNode(); + } + } + } + } } else if (LExtOp0.getOpcode() == ISD::SUB && RExtOp0 == LExtOp0.getOperand(1)) { // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> @@ -3412,10 +3435,32 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> // (rotl x, (sub 32, y)) if (ConstantSDNode *SUBC = - dyn_cast(LExtOp0.getOperand(0))) - if (SUBC->getAPIntValue() == OpSizeInBits) + dyn_cast(LExtOp0.getOperand(0))) { + if (SUBC->getAPIntValue() == OpSizeInBits) { return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); + } else if (RHSShiftArg.getOpcode() == ISD::ZERO_EXTEND || + RHSShiftArg.getOpcode() == ISD::ANY_EXTEND) { + // fold (or (shl (*ext x), (*ext (sub 32, y))), + // (srl (*ext x), (*ext y))) -> + // (*ext (rotl x, y)) + // fold (or (shl (*ext x), (*ext (sub 32, y))), + // (srl (*ext x), (*ext y))) -> + // (*ext (rotr x, (sub 32, y))) + SDValue RArgExtOp0 = RHSShiftArg.getOperand(0); + EVT RArgVT = RArgExtOp0.getValueType(); + bool HasROTRWithRArg = TLI.isOperationLegalOrCustom(ISD::ROTR, RArgVT); + bool HasROTLWithRArg = TLI.isOperationLegalOrCustom(ISD::ROTL, RArgVT); + if (HasROTRWithRArg || HasROTLWithRArg) { + if (RArgVT.getSizeInBits() == SUBC->getAPIntValue()) { + SDValue V = + DAG.getNode(HasROTRWithRArg ? ISD::ROTR : ISD::ROTL, DL, RArgVT, + RArgExtOp0, HasROTR ? RHSShiftAmt : LHSShiftAmt); + return DAG.getNode(RHSShiftArg.getOpcode(), DL, VT, V).getNode(); + } + } + } + } } return 0; @@ -4925,7 +4970,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } if (N0.getOpcode() == ISD::SETCC) { - if (!LegalOperations && VT.isVector()) { + if (!LegalOperations && VT.isVector() && + N0.getValueType().getVectorElementType() == MVT::i1) { // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. // Only do this before legalize for now. EVT N0VT = N0.getOperand(0).getValueType(); @@ -5466,6 +5512,29 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { BSwap, N1); } + // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs + // into a build_vector. + if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { + SmallVector Elts; + unsigned NumElts = N0->getNumOperands(); + unsigned ShAmt = VTBits - EVTBits; + + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Op = N0->getOperand(i); + if (Op->getOpcode() == ISD::UNDEF) { + Elts.push_back(Op); + continue; + } + + ConstantSDNode *CurrentND = cast(Op); + const APInt &C = CurrentND->getAPIntValue(); + Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt), + Op.getValueType())); + } + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Elts[0], NumElts); + } + return SDValue(); } @@ -5587,6 +5656,20 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue Reduced = ReduceLoadWidth(N); if (Reduced.getNode()) return Reduced; + // Handle the case where the load remains an extending load even + // after truncation. + if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { + LoadSDNode *LN0 = cast(N0); + if (!LN0->isVolatile() && + LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { + SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), + VT, LN0->getChain(), LN0->getBasePtr(), + LN0->getMemoryVT(), + LN0->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1)); + return NewLoad; + } + } } // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), // where ... are all 'undef'. @@ -8075,7 +8158,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { // The width of the type must be a power of 2 and greater than 8-bits. // Otherwise the load cannot be represented in LLVM IR. - // Moreover, if we shifted with a non 8-bits multiple, the slice + // Moreover, if we shifted with a non-8-bits multiple, the slice // will be accross several bytes. We do not support that. unsigned Width = User->getValueSizeInBits(0); if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) @@ -8717,7 +8800,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { } else if (ConstantFPSDNode *C = dyn_cast(StoredVal)) { NonZero |= !C->getConstantFPValue()->isNullValue(); } else { - // Non constant. + // Non-constant. break; } @@ -9814,7 +9897,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (N->getNumOperands() == 2 && N->getOperand(1)->getOpcode() == ISD::UNDEF) { SDValue In = N->getOperand(0); - assert(In->getValueType(0).isVector() && "Must concat vectors"); + assert(In.getValueType().isVector() && "Must concat vectors"); // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr). if (In->getOpcode() == ISD::BITCAST && diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 4309dc1d48cb..82e97f40fc05 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -85,16 +85,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. - // The object may need to be placed onto the stack near the stack - // protector if one exists. Determine here if this object is a suitable - // candidate. I.e., it would trigger the creation of a stack protector. - bool MayNeedSP = - (AI->isArrayAllocation() || - (TySize >= 8 && isa(Ty) && - cast(Ty)->getElementType()->isIntegerTy(8))); StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObject(TySize, Align, false, - MayNeedSP, AI); + MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); } for (; BB != EB; ++BB) diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 9061ae9f7621..d0fa353e4ceb 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -807,7 +807,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, - TLI.getShiftAmountTy(Value.getValueType()))); + TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, @@ -818,7 +818,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the top RoundWidth bits. Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(ExtraWidth, - TLI.getShiftAmountTy(Value.getValueType()))); + TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), RoundVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); @@ -826,7 +826,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, @@ -1017,7 +1017,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Move the top bits to the right place. Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, DAG.getConstant(RoundWidth, - TLI.getShiftAmountTy(Hi.getValueType()))); + TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); @@ -1047,7 +1047,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Move the top bits to the right place. Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, DAG.getConstant(ExtraWidth, - TLI.getShiftAmountTy(Hi.getValueType()))); + TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); @@ -1072,8 +1072,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Chain = Res.getValue(1); } } else { - // If this is an unaligned load and the target doesn't support it, - // expand it. + // If this is an unaligned load and the target doesn't support + // it, expand it. if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); @@ -1088,7 +1088,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { break; } case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && + TLI.isTypeLegal(SrcVT)) { SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, LD->getMemOperand()); unsigned ExtendOp; @@ -1109,15 +1110,16 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { assert(!SrcVT.isVector() && "Vector Loads are handled in LegalizeVectorOps"); - // FIXME: This does not work for vectors on most targets. Sign- and - // zero-extend operations are currently folded into extending loads, - // whether they are legal or not, and then we end up here without any - // support for legalizing them. + // FIXME: This does not work for vectors on most targets. Sign- + // and zero-extend operations are currently folded into extending + // loads, whether they are legal or not, and then we end up here + // without any support for legalizing them. assert(ExtType != ISD::EXTLOAD && "EXTLOAD should always be supported!"); - // Turn the unsupported load into an EXTLOAD followed by an explicit - // zero/sign extend inreg. - SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + // Turn the unsupported load into an EXTLOAD followed by an + // explicit zero/sign extend inreg. + SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, + Node->getValueType(0), Chain, Ptr, SrcVT, LD->getMemOperand()); SDValue ValRes; @@ -1126,7 +1128,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Result.getValueType(), Result, DAG.getValueType(SrcVT)); else - ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); + ValRes = DAG.getZeroExtendInReg(Result, dl, + SrcVT.getScalarType()); Value = ValRes; Chain = Result.getValue(1); break; @@ -1530,9 +1533,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { // the pointer so that the loaded integer will contain the sign bit. unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits(); unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8; - LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), - LoadPtr, - DAG.getConstant(ByteOffset, LoadPtr.getValueType())); + LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr, + DAG.getConstant(ByteOffset, LoadPtr.getValueType())); // Load a legal integer containing the sign bit. SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), false, false, false, 0); @@ -1555,8 +1557,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { // Select between the nabs and abs value based on the sign bit of // the input. return DAG.getSelect(dl, AbsVal.getValueType(), SignBit, - DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), - AbsVal); + DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), + AbsVal); } void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, @@ -3099,7 +3101,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); // BUILD_VECTOR operands are allowed to be wider than the element type. - // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it + // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept + // it. if (NewEltVT.bitsLT(EltVT)) { // Convert shuffle node. @@ -3107,8 +3110,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // cast operands to v8i32 and re-build the mask. // Calculate new VT, the size of the new VT should be equal to original. - EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, - VT.getSizeInBits()/NewEltVT.getSizeInBits()); + EVT NewVT = + EVT::getVectorVT(*DAG.getContext(), NewEltVT, + VT.getSizeInBits() / NewEltVT.getSizeInBits()); assert(NewVT.bitsEq(VT)); // cast operands to new VT @@ -3116,7 +3120,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1); // Convert the shuffle mask - unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements(); + unsigned int factor = + NewVT.getVectorNumElements()/VT.getVectorNumElements(); // EltVT gets smaller assert(factor > 0); @@ -3782,8 +3787,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } else { Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); CC = DAG.getCondCode(ISD::SETNE); - Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, - Tmp3, Tmp4, CC); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, + Tmp2, Tmp3, Tmp4, CC); } } Results.push_back(Tmp1); @@ -3813,8 +3818,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } else { Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); Tmp4 = DAG.getCondCode(ISD::SETNE); - Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, - Tmp3, Node->getOperand(4)); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, + Tmp2, Tmp3, Node->getOperand(4)); } Results.push_back(Tmp1); break; diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 13bb08f08c09..d6a2bd85b83b 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -670,13 +670,13 @@ private: LoadSDNode *LD, ISD::LoadExtType ExtType); /// Helper genWidenVectorStores - Helper function to generate a set of - /// stores to store a widen vector into non widen memory + /// stores to store a widen vector into non-widen memory /// StChain: list of chains for the stores we have generated /// ST: store of a widen value void GenWidenVectorStores(SmallVectorImpl &StChain, StoreSDNode *ST); /// Helper genWidenVectorTruncStores - Helper function to generate a set of - /// stores to store a truncate widen vector into non widen memory + /// stores to store a truncate widen vector into non-widen memory /// StChain: list of chains for the stores we have generated /// ST: store of a widen value void GenWidenVectorTruncStores(SmallVectorImpl &StChain, diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index f7a3e3d25067..2af0c55d962d 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2251,7 +2251,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { SDValue InOp1 = N->getOperand(0); EVT InVT = InOp1.getValueType(); - assert(InVT.isVector() && "can not widen non vector type"); + assert(InVT.isVector() && "can not widen non-vector type"); EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts); InOp1 = GetWidenedVector(InOp1); diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 45d5a4fa69e8..f163f6bdf2cb 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -179,6 +179,22 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { return true; } +/// \brief Return true if the specified node is a BUILD_VECTOR node of +/// all ConstantSDNode or undef. +bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + if (Op.getOpcode() == ISD::UNDEF) + continue; + if (!isa(Op)) + return false; + } + return true; +} + /// isScalarToVector - Return true if the specified node is a /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low /// element is not an undef. diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index b9605448c1c5..e40a7c44adee 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -851,12 +851,20 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); Ops.push_back(Res); + unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); MVT RegisterVT = RegVTs[Value]; for (unsigned i = 0; i != NumRegs; ++i) { assert(Reg < Regs.size() && "Mismatch in # registers expected"); - Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); + unsigned TheReg = Regs[Reg++]; + Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); + + // Notice if we clobbered the stack pointer. Yes, inline asm can do this. + if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setHasInlineAsmWithSPAdjust(true); + } } } } @@ -2351,7 +2359,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, volatile double RDensity = (double)RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble(); - double Metric = Range.logBase2()*(LDensity+RDensity); + volatile double Metric = Range.logBase2()*(LDensity+RDensity); // Should always split in some non-trivial place DEBUG(dbgs() <<"=>Step\n" << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' @@ -3372,7 +3380,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // Inform the Frame Information that we have just allocated a variable-sized // object. - FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1); + FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1, &I); } void SelectionDAGBuilder::visitLoad(const LoadInst &I) { @@ -3400,7 +3408,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue Root; bool ConstantMemory = false; - if (I.isVolatile() || NumValues > MaxParallelChains) + if (isVolatile || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); else if (AA->pointsToConstantMemory( @@ -3413,6 +3421,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Root = DAG.getRoot(); } + const TargetLowering *TLI = TM.getTargetLowering(); + if (isVolatile) + Root = TLI->prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG); + SmallVector Values(NumValues); SmallVector Chains(std::min(unsigned(MaxParallelChains), NumValues)); @@ -3637,6 +3649,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); + InChain = TLI->prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, getValue(I.getPointerOperand()), @@ -4290,7 +4303,7 @@ static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const TargetLowering &TLI) { bool IsExp10 = false; - if (LHS.getValueType() == MVT::f32 && LHS.getValueType() == MVT::f32 && + if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { if (ConstantFPSDNode *LHSC = dyn_cast(LHS)) { APFloat Ten(10.0f); @@ -6781,6 +6794,42 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, return TLI->LowerCallTo(CLI); } +/// \brief Add a stack map intrinsic call's live variable operands to a stackmap +/// or patchpoint target node's operand list. +/// +/// Constants are converted to TargetConstants purely as an optimization to +/// avoid constant materialization and register allocation. +/// +/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not +/// generate addess computation nodes, and so ExpandISelPseudo can convert the +/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids +/// address materialization and register allocation, but may also be required +/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an +/// alloca in the entry block, then the runtime may assume that the alloca's +/// StackMap location can be read immediately after compilation and that the +/// location is valid at any point during execution (this is similar to the +/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were +/// only available in a register, then the runtime would need to trap when +/// execution reaches the StackMap in order to read the alloca's location. +static void addStackMapLiveVars(const CallInst &CI, unsigned StartIdx, + SmallVectorImpl &Ops, + SelectionDAGBuilder &Builder) { + for (unsigned i = StartIdx, e = CI.getNumArgOperands(); i != e; ++i) { + SDValue OpVal = Builder.getValue(CI.getArgOperand(i)); + if (ConstantSDNode *C = dyn_cast(OpVal)) { + Ops.push_back( + Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Ops.push_back( + Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + } else if (FrameIndexSDNode *FI = dyn_cast(OpVal)) { + const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); + Ops.push_back( + Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy())); + } else + Ops.push_back(OpVal); + } +} + /// \brief Lower llvm.experimental.stackmap directly to its target opcode. void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { // void @llvm.experimental.stackmap(i32 , i32 , @@ -6814,8 +6863,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { cast(tmp)->getZExtValue(), MVT::i32)); } // Push live variables for the stack map. - for (unsigned i = 2, e = CI.getNumArgOperands(); i != e; ++i) - Ops.push_back(getValue(CI.getArgOperand(i))); + addStackMapLiveVars(CI, 2, Ops, *this); // Push the chain (this is originally the first operand of the call, but // becomes now the last or second to last operand). @@ -6838,11 +6886,14 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { DAG.ReplaceAllUsesWith(Call, MN); DAG.DeleteNode(Call); + + // Inform the Frame Information that we have a stackmap in this function. + FuncInfo.MF->getFrameInfo()->setHasStackMap(); } /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { - // void|i64 @llvm.experimental.patchpoint.void|i64(i32 , + // void|i64 @llvm.experimental.patchpoint.void|i64(i64 , // i32 , // i8* , // i32 , @@ -6855,17 +6906,19 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { SDValue Callee = getValue(CI.getOperand(2)); // // Get the real number of arguments participating in the call - unsigned NumArgs = - cast(getValue(CI.getArgOperand(3)))->getZExtValue(); + SDValue NArgVal = getValue(CI.getArgOperand(PatchPointOpers::NArgPos)); + unsigned NumArgs = cast(NArgVal)->getZExtValue(); // Skip the four meta args: , , , - assert(CI.getNumArgOperands() >= NumArgs + 4 && + // Intrinsics include all meta-operands up to but not including CC. + unsigned NumMetaOpers = PatchPointOpers::CCPos; + assert(CI.getNumArgOperands() >= NumMetaOpers + NumArgs && "Not enough arguments provided to the patchpoint intrinsic"); // For AnyRegCC the arguments are lowered later on manually. unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs; std::pair Result = - LowerCallOperands(CI, 4, NumCallArgs, Callee, isAnyRegCC); + LowerCallOperands(CI, NumMetaOpers, NumCallArgs, Callee, isAnyRegCC); // Set the root to the target-lowered call chain. SDValue Chain = Result.second; @@ -6885,13 +6938,16 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // Replace the target specific call node with the patchable intrinsic. SmallVector Ops; - // Add the and constants. - for (unsigned i = 0; i < 2; ++i) { - SDValue tmp = getValue(CI.getOperand(i)); - Ops.push_back(DAG.getTargetConstant( - cast(tmp)->getZExtValue(), MVT::i32)); - } + // Add the and constants. + SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); + Ops.push_back(DAG.getTargetConstant( + cast(IDVal)->getZExtValue(), MVT::i64)); + SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(DAG.getTargetConstant( + cast(NBytesVal)->getZExtValue(), MVT::i32)); + // Assume that the Callee is a constant address. + // FIXME: handle function symbols in the future. Ops.push_back( DAG.getIntPtrConstant(cast(Callee)->getZExtValue(), /*isTarget=*/true)); @@ -6909,25 +6965,16 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // Add the arguments we omitted previously. The register allocator should // place these in any free register. if (isAnyRegCC) - for (unsigned i = 4, e = NumArgs + 4; i != e; ++i) + for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) Ops.push_back(getValue(CI.getArgOperand(i))); - // Push the arguments from the call instruction. + // Push the arguments from the call instruction up to the register mask. SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1; for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i) Ops.push_back(*i); // Push live variables for the stack map. - for (unsigned i = NumArgs + 4, e = CI.getNumArgOperands(); i != e; ++i) { - SDValue OpVal = getValue(CI.getArgOperand(i)); - if (ConstantSDNode *C = dyn_cast(OpVal)) { - Ops.push_back( - DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); - Ops.push_back( - DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); - } else - Ops.push_back(OpVal); - } + addStackMapLiveVars(CI, NumMetaOpers + NumArgs, Ops, *this); // Push the register mask info. if (hasGlue) @@ -6981,6 +7028,9 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { } else DAG.ReplaceAllUsesWith(Call, MN); DAG.DeleteNode(Call); + + // Inform the Frame Information that we have a patchpoint in this function. + FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); } /// TargetLowering::LowerCallTo - This is the default LowerCallTo diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 3a0cfa16aee3..2697a0cc9cae 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -428,7 +428,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { SDB->init(GFI, *AA, LibInfo); - MF->setHasMSInlineAsm(false); + MF->setHasInlineAsm(false); + MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(false); + SelectAllBasicBlocks(Fn); // If the first basic block in the function has live ins that need to be @@ -448,7 +450,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; bool hasFI = MI->getOperand(0).isFI(); - unsigned Reg = hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); + unsigned Reg = + hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) EntryMBB->insert(EntryMBB->begin(), MI); else { @@ -511,7 +514,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; ++I) { - if (MFI->hasCalls() && MF->hasMSInlineAsm()) + if (MFI->hasCalls() && MF->hasInlineAsm()) break; const MachineBasicBlock *MBB = I; @@ -522,8 +525,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { II->isStackAligningInlineAsm()) { MFI->setHasCalls(true); } - if (II->isMSInlineAsm()) { - MF->setHasMSInlineAsm(true); + if (II->isInlineAsm()) { + MF->setHasInlineAsm(true); } } } @@ -563,6 +566,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // at this point. FuncInfo->clear(); + DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n"); + DEBUG(MF->print(dbgs())); + return true; } diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/SpillPlacement.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/SpillPlacement.cpp index 10a93b7fa4db..02c2035f4f6b 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/SpillPlacement.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/SpillPlacement.cpp @@ -188,10 +188,10 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { // Compute total ingoing and outgoing block frequencies for all bundles. BlockFrequencies.resize(mf.getNumBlockIDs()); - MachineBlockFrequencyInfo &MBFI = getAnalysis(); + MBFI = &getAnalysis(); for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) { unsigned Num = I->getNumber(); - BlockFrequencies[Num] = MBFI.getBlockFreq(I); + BlockFrequencies[Num] = MBFI->getBlockFreq(I); } // We never change the function. @@ -221,7 +221,7 @@ void SpillPlacement::activate(unsigned n) { // Hopfield network. if (bundles->getBlocks(n).size() > 100) { nodes[n].BiasP = 0; - nodes[n].BiasN = (BlockFrequency::getEntryFrequency() / 16); + nodes[n].BiasN = (MBFI->getEntryFreq() / 16); } } diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/SpillPlacement.h b/external/bsd/llvm/dist/llvm/lib/CodeGen/SpillPlacement.h index 105516bb62b1..9161195e7168 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/SpillPlacement.h +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/SpillPlacement.h @@ -38,12 +38,14 @@ class BitVector; class EdgeBundles; class MachineBasicBlock; class MachineLoopInfo; +class MachineBlockFrequencyInfo; class SpillPlacement : public MachineFunctionPass { struct Node; const MachineFunction *MF; const EdgeBundles *bundles; const MachineLoopInfo *loops; + const MachineBlockFrequencyInfo *MBFI; Node *nodes; // Nodes that are active in the current computation. Owned by the prepare() diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/StackColoring.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/StackColoring.cpp index 3dbc0508aa54..e31777735bde 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/StackColoring.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/StackColoring.cpp @@ -452,7 +452,7 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { // We have a single consecutive region. Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum)); } else { - // We have two non consecutive regions. This happens when + // We have two non-consecutive regions. This happens when // LIFETIME_START appears after the LIFETIME_END marker. SlotIndex NewStart = Indexes->getMBBStartIdx(MBB); SlotIndex NewFin = Indexes->getMBBEndIdx(MBB); diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp new file mode 100644 index 000000000000..a374417a90e7 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -0,0 +1,128 @@ +//===-- StackMapLivenessAnalysis.cpp - StackMap live Out Analysis ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the StackMap Liveness analysis pass. The pass calculates +// the liveness for each basic block in a function and attaches the register +// live-out information to a stackmap or patchpoint intrinsic if present. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "stackmaps" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/StackMapLivenessAnalysis.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + + +using namespace llvm; + +namespace llvm { +cl::opt EnableStackMapLiveness("enable-stackmap-liveness", + cl::Hidden, cl::desc("Enable StackMap Liveness Analysis Pass")); +cl::opt EnablePatchPointLiveness("enable-patchpoint-liveness", + cl::Hidden, cl::desc("Enable PatchPoint Liveness Analysis Pass")); +} + +STATISTIC(NumStackMapFuncVisited, "Number of functions visited"); +STATISTIC(NumStackMapFuncSkipped, "Number of functions skipped"); +STATISTIC(NumBBsVisited, "Number of basic blocks visited"); +STATISTIC(NumBBsHaveNoStackmap, "Number of basic blocks with no stackmap"); +STATISTIC(NumStackMaps, "Number of StackMaps visited"); + +char StackMapLiveness::ID = 0; +char &llvm::StackMapLivenessID = StackMapLiveness::ID; +INITIALIZE_PASS(StackMapLiveness, "stackmap-liveness", + "StackMap Liveness Analysis", false, false) + +/// Default construct and initialize the pass. +StackMapLiveness::StackMapLiveness() : MachineFunctionPass(ID) { + initializeStackMapLivenessPass(*PassRegistry::getPassRegistry()); +} + +/// Tell the pass manager which passes we depend on and what information we +/// preserve. +void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const { + // We preserve all information. + AU.setPreservesAll(); + AU.setPreservesCFG(); + // Default dependencie for all MachineFunction passes. + AU.addRequired(); +} + +/// Calculate the liveness information for the given machine function. +bool StackMapLiveness::runOnMachineFunction(MachineFunction &_MF) { + DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " + << _MF.getName() << " **********\n"); + MF = &_MF; + TRI = MF->getTarget().getRegisterInfo(); + ++NumStackMapFuncVisited; + + // Skip this function if there are no stackmaps or patchpoints to process. + if (!((MF->getFrameInfo()->hasStackMap() && EnableStackMapLiveness) || + (MF->getFrameInfo()->hasPatchPoint() && EnablePatchPointLiveness))) { + ++NumStackMapFuncSkipped; + return false; + } + return calculateLiveness(); +} + +/// Performs the actual liveness calculation for the function. +bool StackMapLiveness::calculateLiveness() { + bool HasChanged = false; + // For all basic blocks in the function. + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + DEBUG(dbgs() << "****** BB " << MBBI->getName() << " ******\n"); + LiveRegs.init(TRI); + LiveRegs.addLiveOuts(MBBI); + bool HasStackMap = false; + // Reverse iterate over all instructions and add the current live register + // set to an instruction if we encounter a stackmap or patchpoint + // instruction. + for (MachineBasicBlock::reverse_iterator I = MBBI->rbegin(), + E = MBBI->rend(); I != E; ++I) { + int Opc = I->getOpcode(); + if ((EnableStackMapLiveness && (Opc == TargetOpcode::STACKMAP)) || + (EnablePatchPointLiveness && (Opc == TargetOpcode::PATCHPOINT))) { + addLiveOutSetToMI(*I); + HasChanged = true; + HasStackMap = true; + ++NumStackMaps; + } + DEBUG(dbgs() << " " << *I << " " << LiveRegs); + LiveRegs.stepBackward(*I); + } + ++NumBBsVisited; + if (!HasStackMap) + ++NumBBsHaveNoStackmap; + } + return HasChanged; +} + +/// Add the current register live set to the instruction. +void StackMapLiveness::addLiveOutSetToMI(MachineInstr &MI) { + uint32_t *Mask = createRegisterMask(); + MachineOperand MO = MachineOperand::CreateRegLiveOut(Mask); + MI.addOperand(*MF, MO); +} + +/// Create a register mask and initialize it with the registers from the +/// register live set. +uint32_t *StackMapLiveness::createRegisterMask() const { + // The mask is owned and cleaned up by the Machine Function. + uint32_t *Mask = MF->allocateRegisterMask(TRI->getNumRegs()); + for (LivePhysRegs::const_iterator RI = LiveRegs.begin(), RE = LiveRegs.end(); + RI != RE; ++RI) + Mask[*RI / 32] |= 1U << (*RI % 32); + return Mask; +} diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/StackMaps.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/StackMaps.cpp index 40893ea247f1..19abcfd36ef7 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/StackMaps.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/StackMaps.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectFileInfo.h" @@ -28,14 +29,13 @@ using namespace llvm; -PatchPointOpers::PatchPointOpers(const MachineInstr *MI): - MI(MI), - HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && - !MI->getOperand(0).isImplicit()), - IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() == CallingConv::AnyReg) { - +PatchPointOpers::PatchPointOpers(const MachineInstr *MI) + : MI(MI), + HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && + !MI->getOperand(0).isImplicit()), + IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() == CallingConv::AnyReg) +{ #ifndef NDEBUG - { unsigned CheckStartIdx = 0, e = MI->getNumOperands(); while (CheckStartIdx < e && MI->getOperand(CheckStartIdx).isReg() && MI->getOperand(CheckStartIdx).isDef() && @@ -44,7 +44,6 @@ PatchPointOpers::PatchPointOpers(const MachineInstr *MI): assert(getMetaIdx() == CheckStartIdx && "Unexpected additonal definition in Patchpoint intrinsic."); - } #endif } @@ -65,7 +64,126 @@ unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const { return ScratchIdx; } -void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint32_t ID, +MachineInstr::const_mop_iterator +StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, + MachineInstr::const_mop_iterator MOE, + LocationVec &Locs, LiveOutVec &LiveOuts) const { + if (MOI->isImm()) { + switch (MOI->getImm()) { + default: llvm_unreachable("Unrecognized operand type."); + case StackMaps::DirectMemRefOp: { + unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits(); + assert((Size % 8) == 0 && "Need pointer size in bytes."); + Size /= 8; + unsigned Reg = (++MOI)->getReg(); + int64_t Imm = (++MOI)->getImm(); + Locs.push_back(Location(StackMaps::Location::Direct, Size, Reg, Imm)); + break; + } + case StackMaps::IndirectMemRefOp: { + int64_t Size = (++MOI)->getImm(); + assert(Size > 0 && "Need a valid size for indirect memory locations."); + unsigned Reg = (++MOI)->getReg(); + int64_t Imm = (++MOI)->getImm(); + Locs.push_back(Location(StackMaps::Location::Indirect, Size, Reg, Imm)); + break; + } + case StackMaps::ConstantOp: { + ++MOI; + assert(MOI->isImm() && "Expected constant operand."); + int64_t Imm = MOI->getImm(); + Locs.push_back(Location(Location::Constant, sizeof(int64_t), 0, Imm)); + break; + } + } + return ++MOI; + } + + // The physical register number will ultimately be encoded as a DWARF regno. + // The stack map also records the size of a spill slot that can hold the + // register content. (The runtime can track the actual size of the data type + // if it needs to.) + if (MOI->isReg()) { + // Skip implicit registers (this includes our scratch registers) + if (MOI->isImplicit()) + return ++MOI; + + assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) && + "Virtreg operands should have been rewritten before now."); + const TargetRegisterClass *RC = + AP.TM.getRegisterInfo()->getMinimalPhysRegClass(MOI->getReg()); + assert(!MOI->getSubReg() && "Physical subreg still around."); + Locs.push_back( + Location(Location::Register, RC->getSize(), MOI->getReg(), 0)); + return ++MOI; + } + + if (MOI->isRegLiveOut()) + LiveOuts = parseRegisterLiveOutMask(MOI->getRegLiveOut()); + + return ++MOI; +} + +/// Go up the super-register chain until we hit a valid dwarf register number. +static unsigned short getDwarfRegNum(unsigned Reg, const MCRegisterInfo &MCRI, + const TargetRegisterInfo *TRI) { + int RegNo = MCRI.getDwarfRegNum(Reg, false); + for (MCSuperRegIterator SR(Reg, TRI); + SR.isValid() && RegNo < 0; ++SR) + RegNo = TRI->getDwarfRegNum(*SR, false); + + assert(RegNo >= 0 && "Invalid Dwarf register number."); + return (unsigned short) RegNo; +} + +/// Create a live-out register record for the given register Reg. +StackMaps::LiveOutReg +StackMaps::createLiveOutReg(unsigned Reg, const MCRegisterInfo &MCRI, + const TargetRegisterInfo *TRI) const { + unsigned RegNo = getDwarfRegNum(Reg, MCRI, TRI); + unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); + return LiveOutReg(Reg, RegNo, Size); +} + +/// Parse the register live-out mask and return a vector of live-out registers +/// that need to be recorded in the stackmap. +StackMaps::LiveOutVec +StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const { + assert(Mask && "No register mask specified"); + const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo(); + MCContext &OutContext = AP.OutStreamer.getContext(); + const MCRegisterInfo &MCRI = *OutContext.getRegisterInfo(); + LiveOutVec LiveOuts; + + // Create a LiveOutReg for each bit that is set in the register mask. + for (unsigned Reg = 0, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg) + if ((Mask[Reg / 32] >> Reg % 32) & 1) + LiveOuts.push_back(createLiveOutReg(Reg, MCRI, TRI)); + + // We don't need to keep track of a register if its super-register is already + // in the list. Merge entries that refer to the same dwarf register and use + // the maximum size that needs to be spilled. + std::sort(LiveOuts.begin(), LiveOuts.end()); + for (LiveOutVec::iterator I = LiveOuts.begin(), E = LiveOuts.end(); + I != E; ++I) { + for (LiveOutVec::iterator II = next(I); II != E; ++II) { + if (I->RegNo != II->RegNo) { + // Skip all the now invalid entries. + I = --II; + break; + } + I->Size = std::max(I->Size, II->Size); + if (TRI->isSuperRegister(I->Reg, II->Reg)) + I->Reg = II->Reg; + II->MarkInvalid(); + } + } + LiveOuts.erase(std::remove_if(LiveOuts.begin(), LiveOuts.end(), + LiveOutReg::IsInvalid), LiveOuts.end()); + return LiveOuts; +} + +void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID, MachineInstr::const_mop_iterator MOI, MachineInstr::const_mop_iterator MOE, bool recordResult) { @@ -74,32 +192,27 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint32_t ID, MCSymbol *MILabel = OutContext.CreateTempSymbol(); AP.OutStreamer.EmitLabel(MILabel); - LocationVec CallsiteLocs; + LocationVec Locations; + LiveOutVec LiveOuts; if (recordResult) { - std::pair ParseResult = - OpParser(MI.operands_begin(), llvm::next(MI.operands_begin()), AP.TM); - - Location &Loc = ParseResult.first; - assert(Loc.LocType == Location::Register && - "Stackmap return location must be a register."); - CallsiteLocs.push_back(Loc); + assert(PatchPointOpers(&MI).hasDef() && "Stackmap has no return value."); + parseOperand(MI.operands_begin(), llvm::next(MI.operands_begin()), + Locations, LiveOuts); } + // Parse operands. while (MOI != MOE) { - std::pair ParseResult = - OpParser(MOI, MOE, AP.TM); + MOI = parseOperand(MOI, MOE, Locations, LiveOuts); + } - Location &Loc = ParseResult.first; - - // Move large constants into the constant pool. - if (Loc.LocType == Location::Constant && (Loc.Offset & ~0xFFFFFFFFULL)) { - Loc.LocType = Location::ConstantIndex; - Loc.Offset = ConstPool.getConstantIndex(Loc.Offset); + // Move large constants into the constant pool. + for (LocationVec::iterator I = Locations.begin(), E = Locations.end(); + I != E; ++I) { + if (I->LocType == Location::Constant && (I->Offset & ~0xFFFFFFFFULL)) { + I->LocType = Location::ConstantIndex; + I->Offset = ConstPool.getConstantIndex(I->Offset); } - - CallsiteLocs.push_back(Loc); - MOI = ParseResult.second; } const MCExpr *CSOffsetExpr = MCBinaryExpr::CreateSub( @@ -107,38 +220,26 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint32_t ID, MCSymbolRefExpr::Create(AP.CurrentFnSym, OutContext), OutContext); - CSInfos.push_back(CallsiteInfo(CSOffsetExpr, ID, CallsiteLocs)); -} - -static MachineInstr::const_mop_iterator -getStackMapEndMOP(MachineInstr::const_mop_iterator MOI, - MachineInstr::const_mop_iterator MOE) { - for (; MOI != MOE; ++MOI) - if (MOI->isRegMask() || (MOI->isReg() && MOI->isImplicit())) - break; - - return MOI; + CSInfos.push_back(CallsiteInfo(CSOffsetExpr, ID, Locations, LiveOuts)); } void StackMaps::recordStackMap(const MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::STACKMAP && "exected stackmap"); + assert(MI.getOpcode() == TargetOpcode::STACKMAP && "expected stackmap"); int64_t ID = MI.getOperand(0).getImm(); - assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs"); recordStackMapOpers(MI, ID, llvm::next(MI.operands_begin(), 2), - getStackMapEndMOP(MI.operands_begin(), - MI.operands_end())); + MI.operands_end()); } void StackMaps::recordPatchPoint(const MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "exected stackmap"); + assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "expected patchpoint"); PatchPointOpers opers(&MI); int64_t ID = opers.getMetaOper(PatchPointOpers::IDPos).getImm(); - assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs"); + MachineInstr::const_mop_iterator MOI = llvm::next(MI.operands_begin(), opers.getStackMapStartIdx()); - recordStackMapOpers(MI, ID, MOI, getStackMapEndMOP(MOI, MI.operands_end()), + recordStackMapOpers(MI, ID, MOI, MI.operands_end(), opers.isAnyReg() && opers.hasDef()); #ifndef NDEBUG @@ -160,7 +261,7 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) { /// int64 : Constants[NumConstants] /// uint32 : NumRecords /// StkMapRecord[NumRecords] { -/// uint32 : PatchPoint ID +/// uint64 : PatchPoint ID /// uint32 : Instruction Offset /// uint16 : Reserved (record flags) /// uint16 : NumLocations @@ -170,6 +271,11 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) { /// uint16 : Dwarf RegNum /// int32 : Offset /// } +/// uint16 : NumLiveOuts +/// LiveOuts[NumLiveOuts] +/// uint16 : Dwarf RegNum +/// uint8 : Reserved +/// uint8 : Size in Bytes /// } /// /// Location Encoding, Type, Value: @@ -220,8 +326,9 @@ void StackMaps::serializeToStackMapSection() { CSIE = CSInfos.end(); CSII != CSIE; ++CSII) { - unsigned CallsiteID = CSII->ID; + uint64_t CallsiteID = CSII->ID; const LocationVec &CSLocs = CSII->Locations; + const LiveOutVec &LiveOuts = CSII->LiveOuts; DEBUG(dbgs() << WSMP << "callsite " << CallsiteID << "\n"); @@ -229,15 +336,16 @@ void StackMaps::serializeToStackMapSection() { // runtime than crash in case of in-process compilation. Currently, we do // simple overflow checks, but we may eventually communicate other // compilation errors this way. - if (CSLocs.size() > UINT16_MAX) { - AP.OutStreamer.EmitIntValue(UINT32_MAX, 4); // Invalid ID. + if (CSLocs.size() > UINT16_MAX || LiveOuts.size() > UINT16_MAX) { + AP.OutStreamer.EmitIntValue(UINT64_MAX, 8); // Invalid ID. AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4); AP.OutStreamer.EmitIntValue(0, 2); // Reserved. AP.OutStreamer.EmitIntValue(0, 2); // 0 locations. + AP.OutStreamer.EmitIntValue(0, 2); // 0 live-out registers. continue; } - AP.OutStreamer.EmitIntValue(CallsiteID, 4); + AP.OutStreamer.EmitIntValue(CallsiteID, 8); AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4); // Reserved for flags. @@ -251,6 +359,29 @@ void StackMaps::serializeToStackMapSection() { for (LocationVec::const_iterator LocI = CSLocs.begin(), LocE = CSLocs.end(); LocI != LocE; ++LocI, ++operIdx) { const Location &Loc = *LocI; + unsigned RegNo = 0; + int Offset = Loc.Offset; + if(Loc.Reg) { + RegNo = MCRI.getDwarfRegNum(Loc.Reg, false); + for (MCSuperRegIterator SR(Loc.Reg, TRI); + SR.isValid() && (int)RegNo < 0; ++SR) { + RegNo = TRI->getDwarfRegNum(*SR, false); + } + // If this is a register location, put the subregister byte offset in + // the location offset. + if (Loc.LocType == Location::Register) { + assert(!Loc.Offset && "Register location should have zero offset"); + unsigned LLVMRegNo = MCRI.getLLVMRegNum(RegNo, false); + unsigned SubRegIdx = MCRI.getSubRegIndex(LLVMRegNo, Loc.Reg); + if (SubRegIdx) + Offset = MCRI.getSubRegIdxOffset(SubRegIdx); + } + } + else { + assert(Loc.LocType != Location::Register && + "Missing location register"); + } + DEBUG( dbgs() << WSMP << " Loc " << operIdx << ": "; switch (Loc.LocType) { @@ -276,36 +407,35 @@ void StackMaps::serializeToStackMapSection() { dbgs() << "Constant Index " << Loc.Offset; break; } - dbgs() << "\n"; + dbgs() << " [encoding: .byte " << Loc.LocType + << ", .byte " << Loc.Size + << ", .short " << RegNo + << ", .int " << Offset << "]\n"; ); - unsigned RegNo = 0; - int Offset = Loc.Offset; - if(Loc.Reg) { - RegNo = MCRI.getDwarfRegNum(Loc.Reg, false); - for (MCSuperRegIterator SR(Loc.Reg, TRI); - SR.isValid() && (int)RegNo < 0; ++SR) { - RegNo = TRI->getDwarfRegNum(*SR, false); - } - // If this is a register location, put the subregister byte offset in - // the location offset. - if (Loc.LocType == Location::Register) { - assert(!Loc.Offset && "Register location should have zero offset"); - unsigned LLVMRegNo = MCRI.getLLVMRegNum(RegNo, false); - unsigned SubRegIdx = MCRI.getSubRegIndex(LLVMRegNo, Loc.Reg); - if (SubRegIdx) - Offset = MCRI.getSubRegIdxOffset(SubRegIdx); - } - } - else { - assert(Loc.LocType != Location::Register && - "Missing location register"); - } AP.OutStreamer.EmitIntValue(Loc.LocType, 1); AP.OutStreamer.EmitIntValue(Loc.Size, 1); AP.OutStreamer.EmitIntValue(RegNo, 2); AP.OutStreamer.EmitIntValue(Offset, 4); } + + DEBUG(dbgs() << WSMP << " has " << LiveOuts.size() + << " live-out registers\n"); + + AP.OutStreamer.EmitIntValue(LiveOuts.size(), 2); + + operIdx = 0; + for (LiveOutVec::const_iterator LI = LiveOuts.begin(), LE = LiveOuts.end(); + LI != LE; ++LI, ++operIdx) { + DEBUG(dbgs() << WSMP << " LO " << operIdx << ": " + << MCRI.getName(LI->Reg) + << " [encoding: .short " << LI->RegNo + << ", .byte 0, .byte " << LI->Size << "]\n"); + + AP.OutStreamer.EmitIntValue(LI->RegNo, 2); + AP.OutStreamer.EmitIntValue(0, 1); + AP.OutStreamer.EmitIntValue(LI->Size, 1); + } } AP.OutStreamer.AddBlankLine(); diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/StackProtector.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/StackProtector.cpp index 902044997151..19fb0edcc3cd 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/StackProtector.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/StackProtector.cpp @@ -20,7 +20,6 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/StackSlotColoring.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/StackSlotColoring.cpp index 9f44df8f4308..77c7047cde55 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -142,7 +142,6 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = &*MBBI; - BlockFrequency Freq = MBFI->getBlockFreq(MBB); for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); MII != EE; ++MII) { MachineInstr *MI = &*MII; @@ -157,7 +156,7 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { continue; LiveInterval &li = LS->getInterval(FI); if (!MI->isDebugValue()) - li.weight += LiveIntervals::getSpillWeight(false, true, Freq); + li.weight += LiveIntervals::getSpillWeight(false, true, MBFI, MI); } for (MachineInstr::mmo_iterator MMOI = MI->memoperands_begin(), EE = MI->memoperands_end(); MMOI != EE; ++MMOI) { diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/TargetInstrInfo.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/TargetInstrInfo.cpp index bf4fd6587ef6..29249cfc8f6b 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -13,10 +13,12 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrItineraries.h" @@ -372,6 +374,65 @@ canFoldMemoryOperand(const MachineInstr *MI, return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]); } +static MachineInstr* foldPatchpoint(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl &Ops, + int FrameIndex, + const TargetInstrInfo &TII) { + unsigned StartIdx = 0; + switch (MI->getOpcode()) { + case TargetOpcode::STACKMAP: + StartIdx = 2; // Skip ID, nShadowBytes. + break; + case TargetOpcode::PATCHPOINT: { + // For PatchPoint, the call args are not foldable. + PatchPointOpers opers(MI); + StartIdx = opers.getVarIdx(); + break; + } + default: + llvm_unreachable("unexpected stackmap opcode"); + } + + // Return false if any operands requested for folding are not foldable (not + // part of the stackmap's live values). + for (SmallVectorImpl::const_iterator I = Ops.begin(), E = Ops.end(); + I != E; ++I) { + if (*I < StartIdx) + return 0; + } + + MachineInstr *NewMI = + MF.CreateMachineInstr(TII.get(MI->getOpcode()), MI->getDebugLoc(), true); + MachineInstrBuilder MIB(MF, NewMI); + + // No need to fold return, the meta data, and function arguments + for (unsigned i = 0; i < StartIdx; ++i) + MIB.addOperand(MI->getOperand(i)); + + for (unsigned i = StartIdx; i < MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); + if (std::find(Ops.begin(), Ops.end(), i) != Ops.end()) { + unsigned SpillSize; + unsigned SpillOffset; + // Compute the spill slot size and offset. + const TargetRegisterClass *RC = + MF.getRegInfo().getRegClass(MO.getReg()); + bool Valid = TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, + SpillOffset, &MF.getTarget()); + if (!Valid) + report_fatal_error("cannot spill patchpoint subregister operand"); + MIB.addImm(StackMaps::IndirectMemRefOp); + MIB.addImm(SpillSize); + MIB.addFrameIndex(FrameIndex); + MIB.addImm(SpillOffset); + } + else + MIB.addOperand(MO); + } + return NewMI; +} + /// foldMemoryOperand - Attempt to fold a load or store of the specified stack /// slot into the specified machine instruction for the specified operand(s). /// If this is possible, a new instruction is returned with the specified @@ -393,8 +454,18 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, assert(MBB && "foldMemoryOperand needs an inserted instruction"); MachineFunction &MF = *MBB->getParent(); - // Ask the target to do the actual folding. - if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) { + MachineInstr *NewMI = 0; + + if (MI->getOpcode() == TargetOpcode::STACKMAP || + MI->getOpcode() == TargetOpcode::PATCHPOINT) { + // Fold stackmap/patchpoint. + NewMI = foldPatchpoint(MF, MI, Ops, FI, *this); + } else { + // Ask the target to do the actual folding. + NewMI =foldMemoryOperandImpl(MF, MI, Ops, FI); + } + + if (NewMI) { NewMI->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); // Add a memory operand, foldMemoryOperandImpl doesn't do that. assert((!(Flags & MachineMemOperand::MOStore) || @@ -450,7 +521,19 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, MachineFunction &MF = *MBB.getParent(); // Ask the target to do the actual folding. - MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI); + MachineInstr *NewMI = 0; + int FrameIndex = 0; + + if ((MI->getOpcode() == TargetOpcode::STACKMAP || + MI->getOpcode() == TargetOpcode::PATCHPOINT) && + isLoadFromStackSlot(LoadMI, FrameIndex)) { + // Fold stackmap/patchpoint. + NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this); + } else { + // Ask the target to do the actual folding. + NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI); + } + if (!NewMI) return 0; NewMI = MBB.insert(MI, NewMI); diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/TargetLoweringBase.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/TargetLoweringBase.cpp index 30305af21152..beaa9b9f7405 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -18,7 +18,9 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" @@ -670,6 +672,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, UseUnderscoreSetJmp = false; UseUnderscoreLongJmp = false; SelectIsExpensive = false; + HasMultipleConditionRegisters = false; IntDivIsCheap = false; Pow2DivIsCheap = false; JumpIsExpensive = false; @@ -894,6 +897,59 @@ bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const { return false; } +/// Replace/modify any TargetFrameIndex operands with a targte-dependent +/// sequence of memory operands that is recognized by PrologEpilogInserter. +MachineBasicBlock* +TargetLoweringBase::emitPatchPoint(MachineInstr *MI, + MachineBasicBlock *MBB) const { + const TargetMachine &TM = getTargetMachine(); + MachineFunction &MF = *MI->getParent()->getParent(); + + // MI changes inside this loop as we grow operands. + for(unsigned OperIdx = 0; OperIdx != MI->getNumOperands(); ++OperIdx) { + MachineOperand &MO = MI->getOperand(OperIdx); + if (!MO.isFI()) + continue; + + // foldMemoryOperand builds a new MI after replacing a single FI operand + // with the canonical set of five x86 addressing-mode operands. + int FI = MO.getIndex(); + MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), MI->getDesc()); + + // Copy operands before the frame-index. + for (unsigned i = 0; i < OperIdx; ++i) + MIB.addOperand(MI->getOperand(i)); + // Add frame index operands: direct-mem-ref tag, #FI, offset. + MIB.addImm(StackMaps::DirectMemRefOp); + MIB.addOperand(MI->getOperand(OperIdx)); + MIB.addImm(0); + // Copy the operands after the frame index. + for (unsigned i = OperIdx + 1; i != MI->getNumOperands(); ++i) + MIB.addOperand(MI->getOperand(i)); + + // Inherit previous memory operands. + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!"); + + // Add a new memory operand for this FI. + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + assert(MFI.getObjectOffset(FI) != -1); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + MachineMemOperand::MOLoad, + TM.getDataLayout()->getPointerSize(), + MFI.getObjectAlignment(FI)); + MIB->addMemOperand(MF, MMO); + + // Replace the instruction and update the operand index. + MBB->insert(MachineBasicBlock::iterator(MI), MIB); + OperIdx += (MIB->getNumOperands() - MI->getNumOperands()) - 1; + MI->eraseFromParent(); + MI = MIB; + } + return MBB; +} + /// findRepresentativeClass - Return the largest legal super-reg register class /// of the register class for the specified type and its associated "cost". std::pair @@ -1230,7 +1286,7 @@ void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr, /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const { - return TD->getCallFrameTypeAlignment(Ty); + return TD->getABITypeAlignment(Ty); } //===----------------------------------------------------------------------===// diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 59d7b570375d..9dbba7c1f768 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -95,13 +95,10 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, if (Encoding & dwarf::DW_EH_PE_indirect) { MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo(); - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, true); - Name += ".DW.stub"; + MCSymbol *SSym = getSymbolWithGlobalValueBase(*Mang, GV, ".DW.stub"); // Add information about the stub reference to ELFMMI so that the stub // gets emitted by the asmprinter. - MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { MCSymbol *Sym = getSymbol(*Mang, GV); @@ -632,13 +629,10 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, MachineModuleInfoMachO &MachOMMI = MMI->getObjFileInfo(); - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, true); - Name += "$non_lazy_ptr"; + MCSymbol *SSym = getSymbolWithGlobalValueBase(*Mang, GV, "$non_lazy_ptr"); // Add information about the stub reference to MachOMMI so that the stub // gets emitted by the asmprinter. - MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) : MachOMMI.getGVStubEntry(SSym); @@ -663,13 +657,10 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, MachineModuleInfoMachO &MachOMMI = MMI->getObjFileInfo(); - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, true); - Name += "$non_lazy_ptr"; + MCSymbol *SSym = getSymbolWithGlobalValueBase(*Mang, GV, "$non_lazy_ptr"); // Add information about the stub reference to MachOMMI so that the stub // gets emitted by the asmprinter. - MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { MCSymbol *Sym = getSymbol(*Mang, GV); @@ -723,33 +714,31 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { int Selection = 0; unsigned Characteristics = getCOFFSectionFlags(Kind); - SmallString<128> Name(GV->getSection().c_str()); + StringRef Name = GV->getSection(); + StringRef COMDATSymName = ""; if (GV->isWeakForLinker()) { Selection = COFF::IMAGE_COMDAT_SELECT_ANY; Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; - Name.append("$"); - Mang->getNameWithPrefix(Name, GV, false, false); + MCSymbol *Sym = getSymbol(*Mang, GV); + COMDATSymName = Sym->getName(); } return getContext().getCOFFSection(Name, Characteristics, Kind, - "", + COMDATSymName, Selection); } -static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { +static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) { if (Kind.isText()) - return ".text$"; + return ".text"; if (Kind.isBSS ()) - return ".bss$"; - if (Kind.isThreadLocal()) { - // 'LLVM' is just an arbitary string to ensure that the section name gets - // sorted in between '.tls$AAA' and '.tls$ZZZ' by the linker. - return ".tls$LLVM"; - } + return ".bss"; + if (Kind.isThreadLocal()) + return ".tls$"; if (Kind.isWriteable()) - return ".data$"; - return ".rdata$"; + return ".data"; + return ".rdata"; } @@ -760,16 +749,14 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // If this global is linkonce/weak and the target handles this by emitting it // into a 'uniqued' section name, create and return the section now. if (GV->isWeakForLinker()) { - const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind); - SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); - Mang->getNameWithPrefix(Name, GV, false, false); - + const char *Name = getCOFFSectionNameForUniqueGlobal(Kind); unsigned Characteristics = getCOFFSectionFlags(Kind); Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; - - return getContext().getCOFFSection(Name.str(), Characteristics, - Kind, "", COFF::IMAGE_COMDAT_SELECT_ANY); + MCSymbol *Sym = getSymbol(*Mang, GV); + return getContext().getCOFFSection(Name, Characteristics, + Kind, Sym->getName(), + COFF::IMAGE_COMDAT_SELECT_ANY); } if (Kind.isText()) diff --git a/external/bsd/llvm/dist/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/external/bsd/llvm/dist/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index b9a6b479c358..15105d4b3f7c 100644 --- a/external/bsd/llvm/dist/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/external/bsd/llvm/dist/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1317,13 +1317,14 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { assert(SrcReg && SrcMO.isUse() && "two address instruction invalid"); // Deal with uses immediately - simply rewrite the src operand. - if (SrcMO.isUndef()) { + if (SrcMO.isUndef() && !DstMO.getSubReg()) { // Constrain the DstReg register class if required. if (TargetRegisterInfo::isVirtualRegister(DstReg)) if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx, TRI, *MF)) MRI->constrainRegClass(DstReg, RC); SrcMO.setReg(DstReg); + SrcMO.setSubReg(0); DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI); continue; } @@ -1349,6 +1350,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, unsigned LastCopiedReg = 0; SlotIndex LastCopyIdx; unsigned RegB = 0; + unsigned SubRegB = 0; for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { unsigned SrcIdx = TiedPairs[tpi].first; unsigned DstIdx = TiedPairs[tpi].second; @@ -1359,6 +1361,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, // Grab RegB from the instruction because it may have changed if the // instruction was commuted. RegB = MI->getOperand(SrcIdx).getReg(); + SubRegB = MI->getOperand(SrcIdx).getSubReg(); if (RegA == RegB) { // The register is tied to multiple destinations (or else we would @@ -1383,8 +1386,25 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, #endif // Emit a copy. - BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - TII->get(TargetOpcode::COPY), RegA).addReg(RegB); + MachineInstrBuilder MIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), RegA); + // If this operand is folding a truncation, the truncation now moves to the + // copy so that the register classes remain valid for the operands. + MIB.addReg(RegB, 0, SubRegB); + const TargetRegisterClass *RC = MRI->getRegClass(RegB); + if (SubRegB) { + if (TargetRegisterInfo::isVirtualRegister(RegA)) { + assert(TRI->getMatchingSuperRegClass(RC, MRI->getRegClass(RegA), + SubRegB) && + "tied subregister must be a truncation"); + // The superreg class will not be used to constrain the subreg class. + RC = 0; + } + else { + assert(TRI->getMatchingSuperReg(RegA, SubRegB, MRI->getRegClass(RegB)) + && "tied subregister must be a truncation"); + } + } // Update DistanceMap. MachineBasicBlock::iterator PrevMI = MI; @@ -1404,7 +1424,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, } } - DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI); + DEBUG(dbgs() << "\t\tprepend:\t" << *MIB); MachineOperand &MO = MI->getOperand(SrcIdx); assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() && @@ -1417,9 +1437,12 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, // Make sure regA is a legal regclass for the SrcIdx operand. if (TargetRegisterInfo::isVirtualRegister(RegA) && TargetRegisterInfo::isVirtualRegister(RegB)) - MRI->constrainRegClass(RegA, MRI->getRegClass(RegB)); - + MRI->constrainRegClass(RegA, RC); MO.setReg(RegA); + // The getMatchingSuper asserts guarantee that the register class projected + // by SubRegB is compatible with RegA with no subregister. So regardless of + // whether the dest oper writes a subreg, the source oper should not. + MO.setSubReg(0); // Propagate SrcRegMap. SrcRegMap[RegA] = RegB; @@ -1431,12 +1454,14 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, // Replace other (un-tied) uses of regB with LastCopiedReg. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { + if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB && + MO.isUse()) { if (MO.isKill()) { MO.setIsKill(false); RemovedKillFlag = true; } MO.setReg(LastCopiedReg); + MO.setSubReg(0); } } } diff --git a/external/bsd/llvm/dist/llvm/lib/DebugInfo/DWARFContext.cpp b/external/bsd/llvm/dist/llvm/lib/DebugInfo/DWARFContext.cpp index e47719025c80..d10c4b4c3101 100644 --- a/external/bsd/llvm/dist/llvm/lib/DebugInfo/DWARFContext.cpp +++ b/external/bsd/llvm/dist/llvm/lib/DebugInfo/DWARFContext.cpp @@ -34,27 +34,29 @@ static void dumpPubSection(raw_ostream &OS, StringRef Name, StringRef Data, OS << "\n." << Name << " contents:\n"; DataExtractor pubNames(Data, LittleEndian, 0); uint32_t offset = 0; - OS << "length = " << format("0x%08x", pubNames.getU32(&offset)); - OS << " version = " << format("0x%04x", pubNames.getU16(&offset)); - OS << " unit_offset = " << format("0x%08x", pubNames.getU32(&offset)); - OS << " unit_size = " << format("0x%08x", pubNames.getU32(&offset)) << '\n'; - if (GnuStyle) - OS << "Offset Linkage Kind Name\n"; - else - OS << "Offset Name\n"; + while (pubNames.isValidOffset(offset)) { + OS << "length = " << format("0x%08x", pubNames.getU32(&offset)); + OS << " version = " << format("0x%04x", pubNames.getU16(&offset)); + OS << " unit_offset = " << format("0x%08x", pubNames.getU32(&offset)); + OS << " unit_size = " << format("0x%08x", pubNames.getU32(&offset)) << '\n'; + if (GnuStyle) + OS << "Offset Linkage Kind Name\n"; + else + OS << "Offset Name\n"; - while (offset < Data.size()) { - uint32_t dieRef = pubNames.getU32(&offset); - if (dieRef == 0) - break; - OS << format("0x%8.8x ", dieRef); - if (GnuStyle) { - PubIndexEntryDescriptor desc(pubNames.getU8(&offset)); - OS << format("%-8s", dwarf::GDBIndexEntryLinkageString(desc.Linkage)) - << ' ' << format("%-8s", dwarf::GDBIndexEntryKindString(desc.Kind)) - << ' '; + while (offset < Data.size()) { + uint32_t dieRef = pubNames.getU32(&offset); + if (dieRef == 0) + break; + OS << format("0x%8.8x ", dieRef); + if (GnuStyle) { + PubIndexEntryDescriptor desc(pubNames.getU8(&offset)); + OS << format("%-8s", dwarf::GDBIndexEntryLinkageString(desc.Linkage)) + << ' ' << format("%-8s", dwarf::GDBIndexEntryKindString(desc.Kind)) + << ' '; + } + OS << '\"' << pubNames.getCStr(&offset) << "\"\n"; } - OS << '\"' << pubNames.getCStr(&offset) << "\"\n"; } } @@ -295,10 +297,8 @@ void DWARFContext::parseCompileUnits() { } void DWARFContext::parseTypeUnits() { - const std::map &Sections = getTypesSections(); - for (std::map::const_iterator - I = Sections.begin(), - E = Sections.end(); + const TypeSectionMap &Sections = getTypesSections(); + for (TypeSectionMap::const_iterator I = Sections.begin(), E = Sections.end(); I != E; ++I) { uint32_t offset = 0; const DataExtractor &DIData = diff --git a/external/bsd/llvm/dist/llvm/lib/DebugInfo/DWARFContext.h b/external/bsd/llvm/dist/llvm/lib/DebugInfo/DWARFContext.h index 03863ab8b1e2..08006d0d5e5b 100644 --- a/external/bsd/llvm/dist/llvm/lib/DebugInfo/DWARFContext.h +++ b/external/bsd/llvm/dist/llvm/lib/DebugInfo/DWARFContext.h @@ -19,6 +19,7 @@ #include "DWARFTypeUnit.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/MapVector.h" #include "llvm/DebugInfo/DIContext.h" namespace llvm { @@ -138,7 +139,9 @@ public: virtual bool isLittleEndian() const = 0; virtual uint8_t getAddressSize() const = 0; virtual const Section &getInfoSection() = 0; - virtual const std::map &getTypesSections() = 0; + typedef MapVector > TypeSectionMap; + virtual const TypeSectionMap &getTypesSections() = 0; virtual StringRef getAbbrevSection() = 0; virtual const Section &getLocSection() = 0; virtual StringRef getARangeSection() = 0; @@ -179,7 +182,7 @@ class DWARFContextInMemory : public DWARFContext { bool IsLittleEndian; uint8_t AddressSize; Section InfoSection; - std::map TypesSections; + TypeSectionMap TypesSections; StringRef AbbrevSection; Section LocSection; StringRef ARangeSection; @@ -208,9 +211,7 @@ public: virtual bool isLittleEndian() const { return IsLittleEndian; } virtual uint8_t getAddressSize() const { return AddressSize; } virtual const Section &getInfoSection() { return InfoSection; } - virtual const std::map &getTypesSections() { - return TypesSections; - } + virtual const TypeSectionMap &getTypesSections() { return TypesSections; } virtual StringRef getAbbrevSection() { return AbbrevSection; } virtual const Section &getLocSection() { return LocSection; } virtual StringRef getARangeSection() { return ARangeSection; } diff --git a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/Interpreter/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/Interpreter/LLVMBuild.txt index 327b320afe2b..5af77e547253 100644 --- a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/Interpreter/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/Interpreter/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Interpreter parent = ExecutionEngine -required_libraries = CodeGen Core ExecutionEngine Support Target +required_libraries = CodeGen Core ExecutionEngine Support diff --git a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/JIT/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/JIT/LLVMBuild.txt index ca2a56537aab..dd22f1b464a7 100644 --- a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/JIT/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/JIT/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = JIT parent = ExecutionEngine -required_libraries = CodeGen Core ExecutionEngine MC RuntimeDyld Support Target +required_libraries = CodeGen Core ExecutionEngine Support diff --git a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/LLVMBuild.txt index 1f94a4fb9ecd..6dc75af2ec96 100644 --- a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/LLVMBuild.txt @@ -22,4 +22,4 @@ subdirectories = Interpreter JIT MCJIT RuntimeDyld IntelJITEvents OProfileJIT type = Library name = ExecutionEngine parent = Libraries -required_libraries = Core MC Support Target +required_libraries = Core MC Support diff --git a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/MCJIT/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/MCJIT/LLVMBuild.txt index 900460bf1cb4..90f4d2f75e24 100644 --- a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/MCJIT/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/MCJIT/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = MCJIT parent = ExecutionEngine -required_libraries = Core ExecutionEngine RuntimeDyld Support Target JIT +required_libraries = Core ExecutionEngine RuntimeDyld Support Target diff --git a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 195c45850c6a..10fb62d63acf 100644 --- a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/MutexGuard.h" +#include "llvm/Target/Mangler.h" using namespace llvm; @@ -231,11 +232,10 @@ void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) { } uint64_t MCJIT::getExistingSymbolAddress(const std::string &Name) { - // Check with the RuntimeDyld to see if we already have this symbol. - if (Name[0] == '\1') - return Dyld.getSymbolLoadAddress(Name.substr(1)); - return Dyld.getSymbolLoadAddress((TM->getMCAsmInfo()->getGlobalPrefix() - + Name)); + Mangler Mang(TM->getDataLayout()); + SmallString<128> FullName; + Mang.getNameWithPrefix(FullName, Name); + return Dyld.getSymbolLoadAddress(FullName); } Module *MCJIT::findModuleForSymbol(const std::string &Name, @@ -320,15 +320,13 @@ void *MCJIT::getPointerToFunction(Function *F) { return NULL; // FIXME: Should the Dyld be retaining module information? Probably not. - // FIXME: Should we be using the mangler for this? Probably. // // This is the accessor for the target address, so make sure to check the // load address of the symbol, not the local address. - StringRef BaseName = F->getName(); - if (BaseName[0] == '\1') - return (void*)Dyld.getSymbolLoadAddress(BaseName.substr(1)); - return (void*)Dyld.getSymbolLoadAddress((TM->getMCAsmInfo()->getGlobalPrefix() - + BaseName).str()); + Mangler Mang(TM->getDataLayout()); + SmallString<128> Name; + Mang.getNameWithPrefix(Name, F); + return (void*)Dyld.getSymbolLoadAddress(Name); } void *MCJIT::recompileAndRelinkFunction(Function *F) { diff --git a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h index 86b478bff56f..58381c3c3436 100644 --- a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -322,7 +322,7 @@ protected: /// emitObject -- Generate a JITed object in memory from the specified module /// Currently, MCJIT only supports a single module and the module passed to /// this function call is expected to be the contained module. The module - /// is passed as a parameter here to prepare for multiple module support in + /// is passed as a parameter here to prepare for multiple module support in /// the future. ObjectBufferStream* emitObject(Module *M); diff --git a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RTDyldMemoryManager.cpp b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RTDyldMemoryManager.cpp index 58a64609b9bc..26e1fddd7695 100644 --- a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RTDyldMemoryManager.cpp +++ b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RTDyldMemoryManager.cpp @@ -34,7 +34,7 @@ RTDyldMemoryManager::~RTDyldMemoryManager() {} // Determine whether we can register EH tables. #if (defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__ia64__) && \ - !defined(__USING_SJLJ_EXCEPTIONS__)) + !defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__)) #define HAVE_EHTABLE_SUPPORT 1 #else #define HAVE_EHTABLE_SUPPORT 0 diff --git a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h index 9cbde5daede5..28680044a365 100644 --- a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h +++ b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h @@ -1,6 +1,6 @@ //===-- ObjectImageCommon.h - Format independent executuable object image -===// // -// The LLVM Compiler Infrastructure +// The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. @@ -45,17 +45,17 @@ public: virtual ~ObjectImageCommon() { delete ObjFile; } virtual object::symbol_iterator begin_symbols() const - { return ObjFile->begin_symbols(); } + { return ObjFile->begin_symbols(); } virtual object::symbol_iterator end_symbols() const - { return ObjFile->end_symbols(); } + { return ObjFile->end_symbols(); } virtual object::section_iterator begin_sections() const - { return ObjFile->begin_sections(); } + { return ObjFile->begin_sections(); } virtual object::section_iterator end_sections() const - { return ObjFile->end_sections(); } + { return ObjFile->end_sections(); } virtual /* Triple::ArchType */ unsigned getArch() const - { return ObjFile->getArch(); } + { return ObjFile->getArch(); } virtual StringRef getData() const { return ObjFile->getData(); } @@ -64,9 +64,9 @@ public: // Subclasses can override these methods to update the image with loaded // addresses for sections and common symbols virtual void updateSectionAddress(const object::SectionRef &Sec, - uint64_t Addr) {} + uint64_t Addr) {} virtual void updateSymbolAddress(const object::SymbolRef &Sym, uint64_t Addr) - {} + {} // Subclasses can override these methods to provide JIT debugging support virtual void registerWithDebugger() {} @@ -76,4 +76,3 @@ public: } // end namespace llvm #endif // LLVM_RUNTIMEDYLD_OBJECT_IMAGE_H - diff --git a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 161135a4f8c0..f872ef98bcd0 100644 --- a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -182,7 +182,7 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) { } processRelocationRef(SectionID, *i, *obj, LocalSections, LocalSymbols, - Stubs); + Stubs); } } diff --git a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 3014b30773ac..634f6cebe1ce 100644 --- a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -149,7 +149,7 @@ protected: SectionList Sections; typedef unsigned SID; // Type for SectionIDs - #define RTDYLD_INVALID_SECTION_ID ((SID)(-1)) + #define RTDYLD_INVALID_SECTION_ID ((SID)(-1)) // Keep a map of sections from object file to the SectionID which // references it. @@ -364,5 +364,4 @@ public: } // end namespace llvm - #endif diff --git a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index 5b92867b4778..66ee50ebe9e3 100644 --- a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -148,7 +148,7 @@ void RuntimeDyldMachO::resolveRelocation(const SectionEntry &Section, unsigned MachoType = Type; unsigned Size = 1 << LogSize; - DEBUG(dbgs() << "resolveRelocation LocalAddress: " + DEBUG(dbgs() << "resolveRelocation LocalAddress: " << format("%p", LocalAddress) << " FinalAddress: " << format("%p", FinalAddress) << " Value: " << format("%p", Value) diff --git a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h index bbf6aa9f6506..3e0870da023b 100644 --- a/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h +++ b/external/bsd/llvm/dist/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h @@ -72,7 +72,7 @@ class RuntimeDyldMachO : public RuntimeDyldImpl { EHFrameRelatedSections() : EHFrameSID(RTDYLD_INVALID_SECTION_ID), TextSID(RTDYLD_INVALID_SECTION_ID), ExceptTabSID(RTDYLD_INVALID_SECTION_ID) {} - EHFrameRelatedSections(SID EH, SID T, SID Ex) + EHFrameRelatedSections(SID EH, SID T, SID Ex) : EHFrameSID(EH), TextSID(T), ExceptTabSID(Ex) {} SID EHFrameSID; SID TextSID; diff --git a/external/bsd/llvm/dist/llvm/lib/IR/AsmWriter.cpp b/external/bsd/llvm/dist/llvm/lib/IR/AsmWriter.cpp index 7decffd0effb..7215fc2a4c1a 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/AsmWriter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/IR/AsmWriter.cpp @@ -88,6 +88,8 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::PTX_Device: Out << "ptx_device"; break; case CallingConv::X86_64_SysV: Out << "x86_64_sysvcc"; break; case CallingConv::X86_64_Win64: Out << "x86_64_win64cc"; break; + case CallingConv::SPIR_FUNC: Out << "spir_func"; break; + case CallingConv::SPIR_KERNEL: Out << "spir_kernel"; break; } } @@ -192,16 +194,16 @@ void TypePrinting::incorporateTypes(const Module &M) { /// use of type names or up references to shorten the type name where possible. void TypePrinting::print(Type *Ty, raw_ostream &OS) { switch (Ty->getTypeID()) { - case Type::VoidTyID: OS << "void"; break; - case Type::HalfTyID: OS << "half"; break; - case Type::FloatTyID: OS << "float"; break; - case Type::DoubleTyID: OS << "double"; break; - case Type::X86_FP80TyID: OS << "x86_fp80"; break; - case Type::FP128TyID: OS << "fp128"; break; - case Type::PPC_FP128TyID: OS << "ppc_fp128"; break; - case Type::LabelTyID: OS << "label"; break; - case Type::MetadataTyID: OS << "metadata"; break; - case Type::X86_MMXTyID: OS << "x86_mmx"; break; + case Type::VoidTyID: OS << "void"; return; + case Type::HalfTyID: OS << "half"; return; + case Type::FloatTyID: OS << "float"; return; + case Type::DoubleTyID: OS << "double"; return; + case Type::X86_FP80TyID: OS << "x86_fp80"; return; + case Type::FP128TyID: OS << "fp128"; return; + case Type::PPC_FP128TyID: OS << "ppc_fp128"; return; + case Type::LabelTyID: OS << "label"; return; + case Type::MetadataTyID: OS << "metadata"; return; + case Type::X86_MMXTyID: OS << "x86_mmx"; return; case Type::IntegerTyID: OS << 'i' << cast(Ty)->getBitWidth(); return; @@ -261,10 +263,8 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) { OS << '>'; return; } - default: - OS << ""; - return; } + llvm_unreachable("Invalid TypeID"); } void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) { @@ -525,7 +525,7 @@ void SlotTracker::processFunction() { // optimizer. if (const CallInst *CI = dyn_cast(I)) { if (Function *F = CI->getCalledFunction()) - if (F->getName().startswith("llvm.")) + if (F->isIntrinsic()) for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (MDNode *N = dyn_cast_or_null(I->getOperand(i))) CreateMetadataSlot(N); diff --git a/external/bsd/llvm/dist/llvm/lib/IR/Attributes.cpp b/external/bsd/llvm/dist/llvm/lib/IR/Attributes.cpp index 0f2b7a0ebb4f..9d9d948527cf 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/Attributes.cpp +++ b/external/bsd/llvm/dist/llvm/lib/IR/Attributes.cpp @@ -166,6 +166,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const { return "builtin"; if (hasAttribute(Attribute::ByVal)) return "byval"; + if (hasAttribute(Attribute::InAlloca)) + return "inalloca"; if (hasAttribute(Attribute::InlineHint)) return "inlinehint"; if (hasAttribute(Attribute::InReg)) @@ -388,6 +390,7 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { case Attribute::Cold: return 1ULL << 40; case Attribute::Builtin: return 1ULL << 41; case Attribute::OptimizeNone: return 1ULL << 42; + case Attribute::InAlloca: return 1ULL << 43; } llvm_unreachable("Unsupported attribute type"); } @@ -1174,7 +1177,8 @@ AttributeSet AttributeFuncs::typeIncompatible(Type *Ty, uint64_t Index) { .addAttribute(Attribute::NoCapture) .addAttribute(Attribute::ReadNone) .addAttribute(Attribute::ReadOnly) - .addAttribute(Attribute::StructRet); + .addAttribute(Attribute::StructRet) + .addAttribute(Attribute::InAlloca); return AttributeSet::get(Ty->getContext(), Index, Incompatible); } diff --git a/external/bsd/llvm/dist/llvm/lib/IR/AutoUpgrade.cpp b/external/bsd/llvm/dist/llvm/lib/IR/AutoUpgrade.cpp index 6f79919f241d..d12bf7b9e3c9 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/AutoUpgrade.cpp +++ b/external/bsd/llvm/dist/llvm/lib/IR/AutoUpgrade.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/AutoUpgrade.h" +#include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -489,3 +490,12 @@ Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { return 0; } + +/// Check the debug info version number, if it is out-dated, drop the debug +/// info. Return true if module is modified. +bool llvm::UpgradeDebugInfo(Module &M) { + if (getDebugMetadataVersionFromModule(M) == DEBUG_METADATA_VERSION) + return false; + + return StripDebugInfo(M); +} diff --git a/external/bsd/llvm/dist/llvm/lib/IR/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/IR/CMakeLists.txt index 581946c5486f..b825583bf370 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/IR/CMakeLists.txt @@ -6,6 +6,8 @@ add_llvm_library(LLVMCore ConstantFold.cpp Constants.cpp Core.cpp + DiagnosticInfo.cpp + DiagnosticPrinter.cpp DIBuilder.cpp DataLayout.cpp DebugInfo.cpp diff --git a/external/bsd/llvm/dist/llvm/lib/IR/ConstantFold.cpp b/external/bsd/llvm/dist/llvm/lib/IR/ConstantFold.cpp index f5e225cffc14..e3f8954ad89a 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/ConstantFold.cpp +++ b/external/bsd/llvm/dist/llvm/lib/IR/ConstantFold.cpp @@ -705,12 +705,21 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond, SmallVector Result; Type *Ty = IntegerType::get(CondV->getContext(), 32); for (unsigned i = 0, e = V1->getType()->getVectorNumElements(); i != e;++i){ - ConstantInt *Cond = dyn_cast(CondV->getOperand(i)); - if (Cond == 0) break; - - Constant *V = Cond->isNullValue() ? V2 : V1; - Constant *Res = ConstantExpr::getExtractElement(V, ConstantInt::get(Ty, i)); - Result.push_back(Res); + Constant *V; + Constant *V1Element = ConstantExpr::getExtractElement(V1, + ConstantInt::get(Ty, i)); + Constant *V2Element = ConstantExpr::getExtractElement(V2, + ConstantInt::get(Ty, i)); + Constant *Cond = dyn_cast(CondV->getOperand(i)); + if (V1Element == V2Element) { + V = V1Element; + } else if (isa(Cond)) { + V = isa(V1Element) ? V1Element : V2Element; + } else { + if (!isa(Cond)) break; + V = Cond->isNullValue() ? V2Element : V1Element; + } + Result.push_back(V); } // If we were able to build the vector, return it. diff --git a/external/bsd/llvm/dist/llvm/lib/IR/Constants.cpp b/external/bsd/llvm/dist/llvm/lib/IR/Constants.cpp index a8a325ae27b7..690ac597b062 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/Constants.cpp +++ b/external/bsd/llvm/dist/llvm/lib/IR/Constants.cpp @@ -1499,7 +1499,18 @@ Constant *ConstantExpr::getPointerCast(Constant *S, Type *Ty) { return getBitCast(S, Ty); } -Constant *ConstantExpr::getIntegerCast(Constant *C, Type *Ty, +Constant *ConstantExpr::getPointerBitCastOrAddrSpaceCast(Constant *S, + Type *Ty) { + assert(S->getType()->isPtrOrPtrVectorTy() && "Invalid cast"); + assert(Ty->isPtrOrPtrVectorTy() && "Invalid cast"); + + if (S->getType()->getPointerAddressSpace() != Ty->getPointerAddressSpace()) + return getAddrSpaceCast(S, Ty); + + return getBitCast(S, Ty); +} + +Constant *ConstantExpr::getIntegerCast(Constant *C, Type *Ty, bool isSigned) { assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() && "Invalid cast"); diff --git a/external/bsd/llvm/dist/llvm/lib/IR/Core.cpp b/external/bsd/llvm/dist/llvm/lib/IR/Core.cpp index c70f4591186f..324780fc2f55 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/Core.cpp +++ b/external/bsd/llvm/dist/llvm/lib/IR/Core.cpp @@ -175,7 +175,6 @@ LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M) { LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) { switch (unwrap(Ty)->getTypeID()) { - default: llvm_unreachable("Unhandled TypeID."); case Type::VoidTyID: return LLVMVoidTypeKind; case Type::HalfTyID: @@ -209,6 +208,7 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) { case Type::X86_MMXTyID: return LLVMX86_MMXTypeKind; } + llvm_unreachable("Unhandled TypeID."); } LLVMBool LLVMTypeIsSized(LLVMTypeRef Ty) @@ -2219,6 +2219,29 @@ LLVMValueRef LLVMBuildStore(LLVMBuilderRef B, LLVMValueRef Val, return wrap(unwrap(B)->CreateStore(unwrap(Val), unwrap(PointerVal))); } +static AtomicOrdering mapFromLLVMOrdering(LLVMAtomicOrdering Ordering) { + switch (Ordering) { + case LLVMAtomicOrderingNotAtomic: return NotAtomic; + case LLVMAtomicOrderingUnordered: return Unordered; + case LLVMAtomicOrderingMonotonic: return Monotonic; + case LLVMAtomicOrderingAcquire: return Acquire; + case LLVMAtomicOrderingRelease: return Release; + case LLVMAtomicOrderingAcquireRelease: return AcquireRelease; + case LLVMAtomicOrderingSequentiallyConsistent: + return SequentiallyConsistent; + } + + llvm_unreachable("Invalid LLVMAtomicOrdering value!"); +} + +LLVMValueRef LLVMBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering Ordering, + LLVMBool isSingleThread, const char *Name) { + return wrap( + unwrap(B)->CreateFence(mapFromLLVMOrdering(Ordering), + isSingleThread ? SingleThread : CrossThread, + Name)); +} + LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer, LLVMValueRef *Indices, unsigned NumIndices, const char *Name) { @@ -2476,22 +2499,8 @@ LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op, case LLVMAtomicRMWBinOpUMax: intop = AtomicRMWInst::UMax; break; case LLVMAtomicRMWBinOpUMin: intop = AtomicRMWInst::UMin; break; } - AtomicOrdering intordering; - switch (ordering) { - case LLVMAtomicOrderingNotAtomic: intordering = NotAtomic; break; - case LLVMAtomicOrderingUnordered: intordering = Unordered; break; - case LLVMAtomicOrderingMonotonic: intordering = Monotonic; break; - case LLVMAtomicOrderingAcquire: intordering = Acquire; break; - case LLVMAtomicOrderingRelease: intordering = Release; break; - case LLVMAtomicOrderingAcquireRelease: - intordering = AcquireRelease; - break; - case LLVMAtomicOrderingSequentiallyConsistent: - intordering = SequentiallyConsistent; - break; - } return wrap(unwrap(B)->CreateAtomicRMW(intop, unwrap(PTR), unwrap(Val), - intordering, singleThread ? SingleThread : CrossThread)); + mapFromLLVMOrdering(ordering), singleThread ? SingleThread : CrossThread)); } diff --git a/external/bsd/llvm/dist/llvm/lib/IR/DIBuilder.cpp b/external/bsd/llvm/dist/llvm/lib/IR/DIBuilder.cpp index c4a9f4113060..c7e75849005f 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/DIBuilder.cpp +++ b/external/bsd/llvm/dist/llvm/lib/IR/DIBuilder.cpp @@ -706,7 +706,8 @@ DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, /// createSubroutineType - Create subroutine type. DICompositeType DIBuilder::createSubroutineType(DIFile File, - DIArray ParameterTypes) { + DIArray ParameterTypes, + unsigned Flags) { // TAG_subroutine_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type), @@ -717,7 +718,7 @@ DICompositeType DIBuilder::createSubroutineType(DIFile File, ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset - ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), // Flags NULL, ParameterTypes, ConstantInt::get(Type::getInt32Ty(VMContext), 0), diff --git a/external/bsd/llvm/dist/llvm/lib/IR/DataLayout.cpp b/external/bsd/llvm/dist/llvm/lib/IR/DataLayout.cpp index 6bdc09eaee44..ee2b4bc4e7bc 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/DataLayout.cpp +++ b/external/bsd/llvm/dist/llvm/lib/IR/DataLayout.cpp @@ -18,6 +18,8 @@ #include "llvm/IR/DataLayout.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Module.h" @@ -118,21 +120,21 @@ LayoutAlignElem::operator==(const LayoutAlignElem &rhs) const { } const LayoutAlignElem -DataLayout::InvalidAlignmentElem = LayoutAlignElem::get(INVALID_ALIGN, 0, 0, 0); +DataLayout::InvalidAlignmentElem = { INVALID_ALIGN, 0, 0, 0 }; //===----------------------------------------------------------------------===// // PointerAlignElem, PointerAlign support //===----------------------------------------------------------------------===// PointerAlignElem -PointerAlignElem::get(uint32_t addr_space, unsigned abi_align, - unsigned pref_align, uint32_t bit_width) { - assert(abi_align <= pref_align && "Preferred alignment worse than ABI!"); +PointerAlignElem::get(uint32_t AddressSpace, unsigned ABIAlign, + unsigned PrefAlign, uint32_t TypeByteWidth) { + assert(ABIAlign <= PrefAlign && "Preferred alignment worse than ABI!"); PointerAlignElem retval; - retval.AddressSpace = addr_space; - retval.ABIAlign = abi_align; - retval.PrefAlign = pref_align; - retval.TypeBitWidth = bit_width; + retval.AddressSpace = AddressSpace; + retval.ABIAlign = ABIAlign; + retval.PrefAlign = PrefAlign; + retval.TypeByteWidth = TypeByteWidth; return retval; } @@ -141,36 +143,54 @@ PointerAlignElem::operator==(const PointerAlignElem &rhs) const { return (ABIAlign == rhs.ABIAlign && AddressSpace == rhs.AddressSpace && PrefAlign == rhs.PrefAlign - && TypeBitWidth == rhs.TypeBitWidth); + && TypeByteWidth == rhs.TypeByteWidth); } const PointerAlignElem -DataLayout::InvalidPointerElem = PointerAlignElem::get(~0U, 0U, 0U, 0U); +DataLayout::InvalidPointerElem = { 0U, 0U, 0U, ~0U }; //===----------------------------------------------------------------------===// // DataLayout Class Implementation //===----------------------------------------------------------------------===// +const char *DataLayout::getManglingComponent(const Triple &T) { + if (T.isOSBinFormatMachO()) + return "-m:o"; + if (T.isOSBinFormatELF() || T.isArch64Bit()) + return "-m:e"; + assert(T.isOSBinFormatCOFF()); + return "-m:c"; +} + +static const LayoutAlignElem DefaultAlignments[] = { + { INTEGER_ALIGN, 1, 1, 1 }, // i1 + { INTEGER_ALIGN, 8, 1, 1 }, // i8 + { INTEGER_ALIGN, 16, 2, 2 }, // i16 + { INTEGER_ALIGN, 32, 4, 4 }, // i32 + { INTEGER_ALIGN, 64, 4, 8 }, // i64 + { FLOAT_ALIGN, 16, 2, 2 }, // half + { FLOAT_ALIGN, 32, 4, 4 }, // float + { FLOAT_ALIGN, 64, 8, 8 }, // double + { FLOAT_ALIGN, 128, 16, 16 }, // ppcf128, quad, ... + { VECTOR_ALIGN, 64, 8, 8 }, // v2i32, v1i64, ... + { VECTOR_ALIGN, 128, 16, 16 }, // v16i8, v8i16, v4i32, ... + { AGGREGATE_ALIGN, 0, 0, 8 } // struct +}; + void DataLayout::init(StringRef Desc) { initializeDataLayoutPass(*PassRegistry::getPassRegistry()); LayoutMap = 0; LittleEndian = false; StackNaturalAlign = 0; + ManglingMode = MM_None; // Default alignments - setAlignment(INTEGER_ALIGN, 1, 1, 1); // i1 - setAlignment(INTEGER_ALIGN, 1, 1, 8); // i8 - setAlignment(INTEGER_ALIGN, 2, 2, 16); // i16 - setAlignment(INTEGER_ALIGN, 4, 4, 32); // i32 - setAlignment(INTEGER_ALIGN, 4, 8, 64); // i64 - setAlignment(FLOAT_ALIGN, 2, 2, 16); // half - setAlignment(FLOAT_ALIGN, 4, 4, 32); // float - setAlignment(FLOAT_ALIGN, 8, 8, 64); // double - setAlignment(FLOAT_ALIGN, 16, 16, 128); // ppcf128, quad, ... - setAlignment(VECTOR_ALIGN, 8, 8, 64); // v2i32, v1i64, ... - setAlignment(VECTOR_ALIGN, 16, 16, 128); // v16i8, v8i16, v4i32, ... - setAlignment(AGGREGATE_ALIGN, 0, 8, 0); // struct + for (int I = 0, N = array_lengthof(DefaultAlignments); I < N; ++I) { + const LayoutAlignElem &E = DefaultAlignments[I]; + setAlignment((AlignTypeEnum)E.AlignType, E.ABIAlign, E.PrefAlign, + E.TypeBitWidth); + } setPointerAlignment(0, 8, 8, 8); parseSpecifier(Desc); @@ -216,6 +236,10 @@ void DataLayout::parseSpecifier(StringRef Desc) { Tok = Tok.substr(1); switch (Specifier) { + case 's': + // Ignored for backward compatibility. + // FIXME: remove this on LLVM 4.0. + break; case 'E': LittleEndian = false; break; @@ -250,8 +274,7 @@ void DataLayout::parseSpecifier(StringRef Desc) { case 'i': case 'v': case 'f': - case 'a': - case 's': { + case 'a': { AlignTypeEnum AlignType; switch (Specifier) { default: @@ -259,7 +282,6 @@ void DataLayout::parseSpecifier(StringRef Desc) { case 'v': AlignType = VECTOR_ALIGN; break; case 'f': AlignType = FLOAT_ALIGN; break; case 'a': AlignType = AGGREGATE_ALIGN; break; - case 's': AlignType = STACK_ALIGN; break; } // Bit size. @@ -294,6 +316,26 @@ void DataLayout::parseSpecifier(StringRef Desc) { StackNaturalAlign = inBytes(getInt(Tok)); break; } + case 'm': + assert(Tok.empty()); + assert(Rest.size() == 1); + switch(Rest[0]) { + default: + llvm_unreachable("Unknown mangling in datalayout string"); + case 'e': + ManglingMode = MM_ELF; + break; + case 'o': + ManglingMode = MM_MachO; + break; + case 'm': + ManglingMode = MM_Mips; + break; + case 'c': + ManglingMode = MM_COFF; + break; + } + break; default: llvm_unreachable("Unknown specifier in datalayout string"); break; @@ -335,18 +377,18 @@ DataLayout::setAlignment(AlignTypeEnum align_type, unsigned abi_align, pref_align, bit_width)); } -void -DataLayout::setPointerAlignment(uint32_t addr_space, unsigned abi_align, - unsigned pref_align, uint32_t bit_width) { - assert(abi_align <= pref_align && "Preferred alignment worse than ABI!"); - DenseMap::iterator val = Pointers.find(addr_space); +void DataLayout::setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign, + unsigned PrefAlign, + uint32_t TypeByteWidth) { + assert(ABIAlign <= PrefAlign && "Preferred alignment worse than ABI!"); + DenseMap::iterator val = Pointers.find(AddrSpace); if (val == Pointers.end()) { - Pointers[addr_space] = PointerAlignElem::get(addr_space, - abi_align, pref_align, bit_width); + Pointers[AddrSpace] = + PointerAlignElem::get(AddrSpace, ABIAlign, PrefAlign, TypeByteWidth); } else { - val->second.ABIAlign = abi_align; - val->second.PrefAlign = pref_align; - val->second.TypeBitWidth = bit_width; + val->second.ABIAlign = ABIAlign; + val->second.PrefAlign = PrefAlign; + val->second.TypeByteWidth = TypeByteWidth; } } @@ -470,6 +512,24 @@ std::string DataLayout::getStringRepresentation() const { raw_string_ostream OS(Result); OS << (LittleEndian ? "e" : "E"); + + switch (ManglingMode) { + case MM_None: + break; + case MM_ELF: + OS << "-m:e"; + break; + case MM_MachO: + OS << "-m:o"; + break; + case MM_COFF: + OS << "-m:c"; + break; + case MM_Mips: + OS << "-m:m"; + break; + } + SmallVector addrSpaces; // Lets get all of the known address spaces and sort them // into increasing order so that we can emit the string @@ -483,19 +543,34 @@ std::string DataLayout::getStringRepresentation() const { for (SmallVectorImpl::iterator asb = addrSpaces.begin(), ase = addrSpaces.end(); asb != ase; ++asb) { const PointerAlignElem &PI = Pointers.find(*asb)->second; + + // Skip default. + if (PI.AddressSpace == 0 && PI.ABIAlign == 8 && PI.PrefAlign == 8 && + PI.TypeByteWidth == 8) + continue; + OS << "-p"; if (PI.AddressSpace) { OS << PI.AddressSpace; } - OS << ":" << PI.TypeBitWidth*8 << ':' << PI.ABIAlign*8 - << ':' << PI.PrefAlign*8; + OS << ":" << PI.TypeByteWidth*8 << ':' << PI.ABIAlign*8; + if (PI.PrefAlign != PI.ABIAlign) + OS << ':' << PI.PrefAlign*8; } - OS << "-S" << StackNaturalAlign*8; + const LayoutAlignElem *DefaultStart = DefaultAlignments; + const LayoutAlignElem *DefaultEnd = + DefaultStart + array_lengthof(DefaultAlignments); for (unsigned i = 0, e = Alignments.size(); i != e; ++i) { const LayoutAlignElem &AI = Alignments[i]; - OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':' - << AI.ABIAlign*8 << ':' << AI.PrefAlign*8; + if (std::find(DefaultStart, DefaultEnd, AI) != DefaultEnd) + continue; + OS << '-' << (char)AI.AlignType; + if (AI.TypeBitWidth) + OS << AI.TypeBitWidth; + OS << ':' << AI.ABIAlign*8; + if (AI.ABIAlign != AI.PrefAlign) + OS << ':' << AI.PrefAlign*8; } if (!LegalIntWidths.empty()) { @@ -504,6 +579,10 @@ std::string DataLayout::getStringRepresentation() const { for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i) OS << ':' << (unsigned)LegalIntWidths[i]; } + + if (StackNaturalAlign) + OS << "-S" << StackNaturalAlign*8; + return OS.str(); } @@ -589,14 +668,6 @@ unsigned DataLayout::getABIIntegerTypeAlignment(unsigned BitWidth) const { return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0); } -unsigned DataLayout::getCallFrameTypeAlignment(Type *Ty) const { - for (unsigned i = 0, e = Alignments.size(); i != e; ++i) - if (Alignments[i].AlignType == STACK_ALIGN) - return Alignments[i].ABIAlign; - - return getABITypeAlignment(Ty); -} - unsigned DataLayout::getPrefTypeAlignment(Type *Ty) const { return getAlignment(Ty, false); } diff --git a/external/bsd/llvm/dist/llvm/lib/IR/DebugInfo.cpp b/external/bsd/llvm/dist/llvm/lib/IR/DebugInfo.cpp index 692e236b9316..f7215dc51e1e 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/DebugInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/IR/DebugInfo.cpp @@ -461,14 +461,13 @@ bool DIType::Verify() const { // DIType is abstract, it should be a BasicType, a DerivedType or // a CompositeType. if (isBasicType()) - DIBasicType(DbgNode).Verify(); + return DIBasicType(DbgNode).Verify(); else if (isCompositeType()) - DICompositeType(DbgNode).Verify(); + return DICompositeType(DbgNode).Verify(); else if (isDerivedType()) - DIDerivedType(DbgNode).Verify(); + return DIDerivedType(DbgNode).Verify(); else return false; - return true; } /// Verify - Verify that a basic type descriptor is well formed. @@ -505,6 +504,10 @@ bool DICompositeType::Verify() const { if (!fieldIsMDString(DbgNode, 14)) return false; + // A subroutine type can't be both & and &&. + if (isLValueReference() && isRValueReference()) + return false; + return DbgNode->getNumOperands() == 15; } @@ -521,6 +524,11 @@ bool DISubprogram::Verify() const { // Containing type @ field 12. if (!fieldIsTypeRef(DbgNode, 12)) return false; + + // A subprogram can't be both & and &&. + if (isLValueReference() && isRValueReference()) + return false; + return DbgNode->getNumOperands() == 20; } @@ -658,19 +666,6 @@ void DICompositeType::setTypeArray(DIArray Elements, DIArray TParams) { DbgNode = N; } -void DICompositeType::addMember(DIDescriptor D) { - SmallVector M; - DIArray OrigM = getTypeArray(); - unsigned Elements = OrigM.getNumElements(); - if (Elements == 1 && !OrigM.getElement(0)) - Elements = 0; - M.reserve(Elements + 1); - for (unsigned i = 0; i != Elements; ++i) - M.push_back(OrigM.getElement(i)); - M.push_back(D); - setTypeArray(DIArray(MDNode::get(DbgNode->getContext(), M))); -} - /// Generate a reference to this DIType. Uses the type identifier instead /// of the actual MDNode if possible, to help type uniquing. DIScopeRef DIScope::getRef() const { @@ -1298,6 +1293,12 @@ void DIType::printInternal(raw_ostream &OS) const { OS << " [vector]"; if (isStaticMember()) OS << " [static]"; + + if (isLValueReference()) + OS << " [reference]"; + + if (isRValueReference()) + OS << " [rvalue reference]"; } void DIDerivedType::printInternal(raw_ostream &OS) const { @@ -1337,6 +1338,12 @@ void DISubprogram::printInternal(raw_ostream &OS) const { else if (isProtected()) OS << " [protected]"; + if (isLValueReference()) + OS << " [reference]"; + + if (isRValueReference()) + OS << " [rvalue reference]"; + StringRef Res = getName(); if (!Res.empty()) OS << " [" << Res << ']'; @@ -1426,3 +1433,63 @@ DIScopeRef DIDescriptor::getFieldAs(unsigned Elt) const { template <> DITypeRef DIDescriptor::getFieldAs(unsigned Elt) const { return DITypeRef(getField(DbgNode, Elt)); } + +/// Strip debug info in the module if it exists. +/// To do this, we remove all calls to the debugger intrinsics and any named +/// metadata for debugging. We also remove debug locations for instructions. +/// Return true if module is modified. +bool llvm::StripDebugInfo(Module &M) { + + bool Changed = false; + + // Remove all of the calls to the debugger intrinsics, and remove them from + // the module. + if (Function *Declare = M.getFunction("llvm.dbg.declare")) { + while (!Declare->use_empty()) { + CallInst *CI = cast(Declare->use_back()); + CI->eraseFromParent(); + } + Declare->eraseFromParent(); + Changed = true; + } + + if (Function *DbgVal = M.getFunction("llvm.dbg.value")) { + while (!DbgVal->use_empty()) { + CallInst *CI = cast(DbgVal->use_back()); + CI->eraseFromParent(); + } + DbgVal->eraseFromParent(); + Changed = true; + } + + for (Module::named_metadata_iterator NMI = M.named_metadata_begin(), + NME = M.named_metadata_end(); NMI != NME;) { + NamedMDNode *NMD = NMI; + ++NMI; + if (NMD->getName().startswith("llvm.dbg.")) { + NMD->eraseFromParent(); + Changed = true; + } + } + + for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) + for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; + ++FI) + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; + ++BI) { + if (!BI->getDebugLoc().isUnknown()) { + Changed = true; + BI->setDebugLoc(DebugLoc()); + } + } + + return Changed; +} + +/// Return Debug Info Metadata Version by checking module flags. +unsigned llvm::getDebugMetadataVersionFromModule(const Module &M) { + Value *Val = M.getModuleFlag("Debug Info Version"); + if (!Val) + return 0; + return cast(Val)->getZExtValue(); +} diff --git a/external/bsd/llvm/dist/llvm/lib/IR/DiagnosticInfo.cpp b/external/bsd/llvm/dist/llvm/lib/IR/DiagnosticInfo.cpp new file mode 100644 index 000000000000..7791050d6a12 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/IR/DiagnosticInfo.cpp @@ -0,0 +1,54 @@ +//===- llvm/Support/DiagnosticInfo.cpp - Diagnostic Definitions -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the different classes involved in low level diagnostics. +// +// Diagnostics reporting is still done as part of the LLVMContext. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Support/Atomic.h" + +#include + +using namespace llvm; + +int llvm::getNextAvailablePluginDiagnosticKind() { + static sys::cas_flag PluginKindID = DK_FirstPluginKind; + return (int)sys::AtomicIncrement(&PluginKindID); +} + +DiagnosticInfoInlineAsm::DiagnosticInfoInlineAsm(const Instruction &I, + const Twine &MsgStr, + DiagnosticSeverity Severity) + : DiagnosticInfo(DK_InlineAsm, Severity), LocCookie(0), MsgStr(MsgStr), + Instr(&I) { + if (const MDNode *SrcLoc = I.getMetadata("srcloc")) { + if (SrcLoc->getNumOperands() != 0) + if (const ConstantInt *CI = dyn_cast(SrcLoc->getOperand(0))) + LocCookie = CI->getZExtValue(); + } +} + +void DiagnosticInfoInlineAsm::print(DiagnosticPrinter &DP) const { + DP << getMsgStr(); + if (getLocCookie()) + DP << " at line " << getLocCookie(); +} + +void DiagnosticInfoStackSize::print(DiagnosticPrinter &DP) const { + DP << "stack size limit exceeded (" << getStackSize() << ") in " + << getFunction(); +} diff --git a/external/bsd/llvm/dist/llvm/lib/IR/DiagnosticPrinter.cpp b/external/bsd/llvm/dist/llvm/lib/IR/DiagnosticPrinter.cpp new file mode 100644 index 000000000000..d76f9f586a9e --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/IR/DiagnosticPrinter.cpp @@ -0,0 +1,101 @@ +//===- llvm/Support/DiagnosticInfo.cpp - Diagnostic Definitions -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the a diagnostic printer relying on raw_ostream. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Twine.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +DiagnosticPrinter &DiagnosticPrinterRawOStream::operator<<(char C) { + Stream << C; + return *this; +} + +DiagnosticPrinter &DiagnosticPrinterRawOStream::operator<<(unsigned char C) { + Stream << C; + return *this; +} + +DiagnosticPrinter &DiagnosticPrinterRawOStream::operator<<(signed char C) { + Stream << C; + return *this; +} + +DiagnosticPrinter &DiagnosticPrinterRawOStream::operator<<(StringRef Str) { + Stream << Str; + return *this; +} + +DiagnosticPrinter &DiagnosticPrinterRawOStream::operator<<(const char *Str) { + Stream << Str; + return *this; +} + +DiagnosticPrinter &DiagnosticPrinterRawOStream::operator<<( + const std::string &Str) { + Stream << Str; + return *this; +} + +DiagnosticPrinter &DiagnosticPrinterRawOStream::operator<<(unsigned long N) { + Stream << N; + return *this; +} +DiagnosticPrinter &DiagnosticPrinterRawOStream::operator<<(long N) { + Stream << N; + return *this; +} + +DiagnosticPrinter &DiagnosticPrinterRawOStream::operator<<( + unsigned long long N) { + Stream << N; + return *this; +} + +DiagnosticPrinter &DiagnosticPrinterRawOStream::operator<<(long long N) { + Stream << N; + return *this; +} + +DiagnosticPrinter &DiagnosticPrinterRawOStream::operator<<(const void *P) { + Stream << P; + return *this; +} + +DiagnosticPrinter &DiagnosticPrinterRawOStream::operator<<(unsigned int N) { + Stream << N; + return *this; +} + +DiagnosticPrinter &DiagnosticPrinterRawOStream::operator<<(int N) { + Stream << N; + return *this; +} + +DiagnosticPrinter &DiagnosticPrinterRawOStream::operator<<(double N) { + Stream << N; + return *this; +} + +DiagnosticPrinter &DiagnosticPrinterRawOStream::operator<<(const Twine &Str) { + Str.print(Stream); + return *this; +} + +// IR related types. +DiagnosticPrinter &DiagnosticPrinterRawOStream::operator<<(const Value &V) { + Stream << V.getName(); + return *this; +} diff --git a/external/bsd/llvm/dist/llvm/lib/IR/Function.cpp b/external/bsd/llvm/dist/llvm/lib/IR/Function.cpp index e8a2402b3d95..970bbaeed8b5 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/Function.cpp +++ b/external/bsd/llvm/dist/llvm/lib/IR/Function.cpp @@ -84,6 +84,14 @@ bool Argument::hasByValAttr() const { hasAttribute(getArgNo()+1, Attribute::ByVal); } +/// \brief Return true if this argument has the inalloca attribute on it in +/// its containing function. +bool Argument::hasInAllocaAttr() const { + if (!getType()->isPointerTy()) return false; + return getParent()->getAttributes(). + hasAttribute(getArgNo()+1, Attribute::InAlloca); +} + unsigned Argument::getParamAlignment() const { assert(getType()->isPointerTy() && "Only pointers have alignments"); return getParent()->getParamAlignment(getArgNo()+1); diff --git a/external/bsd/llvm/dist/llvm/lib/IR/GCOV.cpp b/external/bsd/llvm/dist/llvm/lib/IR/GCOV.cpp index f0f8c7d74bb9..e39842c2fddb 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/GCOV.cpp +++ b/external/bsd/llvm/dist/llvm/lib/IR/GCOV.cpp @@ -1,4 +1,4 @@ -//===- GCOVr.cpp - LLVM coverage tool -------------------------------------===// +//===- GCOV.cpp - LLVM coverage tool --------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -19,6 +19,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/system_error.h" +#include using namespace llvm; //===----------------------------------------------------------------------===// @@ -29,63 +30,73 @@ GCOVFile::~GCOVFile() { DeleteContainerPointers(Functions); } -/// isGCDAFile - Return true if Format identifies a .gcda file. -static bool isGCDAFile(GCOV::GCOVFormat Format) { - return Format == GCOV::GCDA_402 || Format == GCOV::GCDA_404; -} +/// readGCNO - Read GCNO buffer. +bool GCOVFile::readGCNO(GCOVBuffer &Buffer) { + if (!Buffer.readGCNOFormat()) return false; + if (!Buffer.readGCOVVersion(Version)) return false; -/// isGCNOFile - Return true if Format identifies a .gcno file. -static bool isGCNOFile(GCOV::GCOVFormat Format) { - return Format == GCOV::GCNO_402 || Format == GCOV::GCNO_404; -} - -/// read - Read GCOV buffer. -bool GCOVFile::read(GCOVBuffer &Buffer) { - GCOV::GCOVFormat Format = Buffer.readGCOVFormat(); - if (Format == GCOV::InvalidGCOV) - return false; - - if (isGCNOFile(Format)) { - while (true) { - if (!Buffer.readFunctionTag()) break; - GCOVFunction *GFun = new GCOVFunction(); - if (!GFun->read(Buffer, Format)) - return false; - Functions.push_back(GFun); - } + if (!Buffer.readInt(Checksum)) return false; + while (true) { + if (!Buffer.readFunctionTag()) break; + GCOVFunction *GFun = new GCOVFunction(*this); + if (!GFun->readGCNO(Buffer, Version)) + return false; + Functions.push_back(GFun); } - else if (isGCDAFile(Format)) { - for (size_t i = 0, e = Functions.size(); i < e; ++i) { - if (!Buffer.readFunctionTag()) { - errs() << "Unexpected number of functions.\n"; - return false; - } - if (!Functions[i]->read(Buffer, Format)) - return false; - } - if (Buffer.readObjectTag()) { - uint32_t Length; - uint32_t Dummy; - if (!Buffer.readInt(Length)) return false; - if (!Buffer.readInt(Dummy)) return false; // checksum - if (!Buffer.readInt(Dummy)) return false; // num - if (!Buffer.readInt(RunCount)) return false;; - Buffer.advanceCursor(Length-3); - } - while (Buffer.readProgramTag()) { - uint32_t Length; - if (!Buffer.readInt(Length)) return false; - Buffer.advanceCursor(Length); - ++ProgramCount; + + GCNOInitialized = true; + return true; +} + +/// readGCDA - Read GCDA buffer. It is required that readGCDA() can only be +/// called after readGCNO(). +bool GCOVFile::readGCDA(GCOVBuffer &Buffer) { + assert(GCNOInitialized && "readGCDA() can only be called after readGCNO()"); + if (!Buffer.readGCDAFormat()) return false; + GCOV::GCOVVersion GCDAVersion; + if (!Buffer.readGCOVVersion(GCDAVersion)) return false; + if (Version != GCDAVersion) { + errs() << "GCOV versions do not match.\n"; + return false; + } + + uint32_t GCDAChecksum; + if (!Buffer.readInt(GCDAChecksum)) return false; + if (Checksum != GCDAChecksum) { + errs() << "File checksums do not match: " << Checksum << " != " + << GCDAChecksum << ".\n"; + return false; + } + for (size_t i = 0, e = Functions.size(); i < e; ++i) { + if (!Buffer.readFunctionTag()) { + errs() << "Unexpected number of functions.\n"; + return false; } + if (!Functions[i]->readGCDA(Buffer, Version)) + return false; + } + if (Buffer.readObjectTag()) { + uint32_t Length; + uint32_t Dummy; + if (!Buffer.readInt(Length)) return false; + if (!Buffer.readInt(Dummy)) return false; // checksum + if (!Buffer.readInt(Dummy)) return false; // num + if (!Buffer.readInt(RunCount)) return false; + Buffer.advanceCursor(Length-3); + } + while (Buffer.readProgramTag()) { + uint32_t Length; + if (!Buffer.readInt(Length)) return false; + Buffer.advanceCursor(Length); + ++ProgramCount; } return true; } /// dump - Dump GCOVFile content to dbgs() for debugging purposes. -void GCOVFile::dump() { - for (SmallVectorImpl::iterator I = Functions.begin(), +void GCOVFile::dump() const { + for (SmallVectorImpl::const_iterator I = Functions.begin(), E = Functions.end(); I != E; ++I) (*I)->dump(); } @@ -106,54 +117,27 @@ void GCOVFile::collectLineCounts(FileInfo &FI) { /// ~GCOVFunction - Delete GCOVFunction and its content. GCOVFunction::~GCOVFunction() { DeleteContainerPointers(Blocks); + DeleteContainerPointers(Edges); } -/// read - Read a function from the buffer. Return false if buffer cursor -/// does not point to a function tag. -bool GCOVFunction::read(GCOVBuffer &Buff, GCOV::GCOVFormat Format) { +/// readGCNO - Read a function from the GCNO buffer. Return false if an error +/// occurs. +bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) { uint32_t Dummy; if (!Buff.readInt(Dummy)) return false; // Function header length if (!Buff.readInt(Ident)) return false; - if (!Buff.readInt(Dummy)) return false; // Checksum #1 - if (Format != GCOV::GCNO_402 && Format != GCOV::GCDA_402) - if (!Buff.readInt(Dummy)) return false; // Checksum #2 - - if (!Buff.readString(Name)) return false; - - if (Format == GCOV::GCNO_402 || Format == GCOV::GCNO_404) - if (!Buff.readString(Filename)) return false; - - if (Format == GCOV::GCDA_402 || Format == GCOV::GCDA_404) { - if (!Buff.readArcTag()) { - errs() << "Arc tag not found.\n"; + if (!Buff.readInt(Checksum)) return false; + if (Version != GCOV::V402) { + uint32_t CfgChecksum; + if (!Buff.readInt(CfgChecksum)) return false; + if (Parent.getChecksum() != CfgChecksum) { + errs() << "File checksums do not match: " << Parent.getChecksum() + << " != " << CfgChecksum << " in (" << Name << ").\n"; return false; } - uint32_t Count; - if (!Buff.readInt(Count)) return false; - Count /= 2; - - // This for loop adds the counts for each block. A second nested loop is - // required to combine the edge counts that are contained in the GCDA file. - for (uint32_t Line = 0; Count > 0; ++Line) { - if (Line >= Blocks.size()) { - errs() << "Unexpected number of edges.\n"; - return false; - } - GCOVBlock &Block = *Blocks[Line]; - for (size_t Edge = 0, End = Block.getNumEdges(); Edge < End; ++Edge) { - if (Count == 0) { - errs() << "Unexpected number of edges.\n"; - return false; - } - uint64_t ArcCount; - if (!Buff.readInt64(ArcCount)) return false; - Block.addCount(ArcCount); - --Count; - } - } - return true; } - + if (!Buff.readString(Name)) return false; + if (!Buff.readString(Filename)) return false; if (!Buff.readInt(LineNumber)) return false; // read blocks. @@ -176,13 +160,17 @@ bool GCOVFunction::read(GCOVBuffer &Buff, GCOV::GCOVFormat Format) { uint32_t BlockNo; if (!Buff.readInt(BlockNo)) return false; if (BlockNo >= BlockCount) { - errs() << "Unexpected block number.\n"; + errs() << "Unexpected block number: " << BlockNo << " (in " << Name + << ").\n"; return false; } for (uint32_t i = 0, e = EdgeCount; i != e; ++i) { uint32_t Dst; if (!Buff.readInt(Dst)) return false; - Blocks[BlockNo]->addEdge(Dst); + GCOVEdge *Edge = new GCOVEdge(Blocks[BlockNo], Blocks[Dst]); + Edges.push_back(Edge); + Blocks[BlockNo]->addDstEdge(Edge); + Blocks[Dst]->addSrcEdge(Edge); if (!Buff.readInt(Dummy)) return false; // Edge flag } } @@ -195,7 +183,8 @@ bool GCOVFunction::read(GCOVBuffer &Buff, GCOV::GCOVFormat Format) { uint32_t BlockNo; if (!Buff.readInt(BlockNo)) return false; if (BlockNo >= BlockCount) { - errs() << "Unexpected block number.\n"; + errs() << "Unexpected block number: " << BlockNo << " (in " << Name + << ").\n"; return false; } GCOVBlock *Block = Blocks[BlockNo]; @@ -203,8 +192,9 @@ bool GCOVFunction::read(GCOVBuffer &Buff, GCOV::GCOVFormat Format) { while (Buff.getCursor() != (EndPos - 4)) { StringRef F; if (!Buff.readString(F)) return false; - if (F != Filename) { - errs() << "Multiple sources for a single basic block.\n"; + if (Filename != F) { + errs() << "Multiple sources for a single basic block: " << Filename + << " != " << F << " (in " << Name << ").\n"; return false; } if (Buff.getCursor() == (EndPos - 4)) break; @@ -220,10 +210,96 @@ bool GCOVFunction::read(GCOVBuffer &Buff, GCOV::GCOVFormat Format) { return true; } +/// readGCDA - Read a function from the GCDA buffer. Return false if an error +/// occurs. +bool GCOVFunction::readGCDA(GCOVBuffer &Buff, GCOV::GCOVVersion Version) { + uint32_t Dummy; + if (!Buff.readInt(Dummy)) return false; // Function header length + + uint32_t GCDAIdent; + if (!Buff.readInt(GCDAIdent)) return false; + if (Ident != GCDAIdent) { + errs() << "Function identifiers do not match: " << Ident << " != " + << GCDAIdent << " (in " << Name << ").\n"; + return false; + } + + uint32_t GCDAChecksum; + if (!Buff.readInt(GCDAChecksum)) return false; + if (Checksum != GCDAChecksum) { + errs() << "Function checksums do not match: " << Checksum << " != " + << GCDAChecksum << " (in " << Name << ").\n"; + return false; + } + + uint32_t CfgChecksum; + if (Version != GCOV::V402) { + if (!Buff.readInt(CfgChecksum)) return false; + if (Parent.getChecksum() != CfgChecksum) { + errs() << "File checksums do not match: " << Parent.getChecksum() + << " != " << CfgChecksum << " (in " << Name << ").\n"; + return false; + } + } + + StringRef GCDAName; + if (!Buff.readString(GCDAName)) return false; + if (Name != GCDAName) { + errs() << "Function names do not match: " << Name << " != " << GCDAName + << ".\n"; + return false; + } + + if (!Buff.readArcTag()) { + errs() << "Arc tag not found (in " << Name << ").\n"; + return false; + } + + uint32_t Count; + if (!Buff.readInt(Count)) return false; + Count /= 2; + + // This for loop adds the counts for each block. A second nested loop is + // required to combine the edge counts that are contained in the GCDA file. + for (uint32_t BlockNo = 0; Count > 0; ++BlockNo) { + // The last block is always reserved for exit block + if (BlockNo >= Blocks.size()-1) { + errs() << "Unexpected number of edges (in " << Name << ").\n"; + return false; + } + GCOVBlock &Block = *Blocks[BlockNo]; + for (size_t EdgeNo = 0, End = Block.getNumDstEdges(); EdgeNo < End; + ++EdgeNo) { + if (Count == 0) { + errs() << "Unexpected number of edges (in " << Name << ").\n"; + return false; + } + uint64_t ArcCount; + if (!Buff.readInt64(ArcCount)) return false; + Block.addCount(EdgeNo, ArcCount); + --Count; + } + Block.sortDstEdges(); + } + return true; +} + +/// getEntryCount - Get the number of times the function was called by +/// retrieving the entry block's count. +uint64_t GCOVFunction::getEntryCount() const { + return Blocks.front()->getCount(); +} + +/// getExitCount - Get the number of times the function returned by retrieving +/// the exit block's count. +uint64_t GCOVFunction::getExitCount() const { + return Blocks.back()->getCount(); +} + /// dump - Dump GCOVFunction content to dbgs() for debugging purposes. -void GCOVFunction::dump() { +void GCOVFunction::dump() const { dbgs() << "===== " << Name << " @ " << Filename << ":" << LineNumber << "\n"; - for (SmallVectorImpl::iterator I = Blocks.begin(), + for (SmallVectorImpl::const_iterator I = Blocks.begin(), E = Blocks.end(); I != E; ++I) (*I)->dump(); } @@ -234,6 +310,7 @@ void GCOVFunction::collectLineCounts(FileInfo &FI) { for (SmallVectorImpl::iterator I = Blocks.begin(), E = Blocks.end(); I != E; ++I) (*I)->collectLineCounts(FI); + FI.addFunctionLine(Filename, LineNumber, this); } //===----------------------------------------------------------------------===// @@ -241,31 +318,60 @@ void GCOVFunction::collectLineCounts(FileInfo &FI) { /// ~GCOVBlock - Delete GCOVBlock and its content. GCOVBlock::~GCOVBlock() { - Edges.clear(); + SrcEdges.clear(); + DstEdges.clear(); Lines.clear(); } +/// addCount - Add to block counter while storing the edge count. If the +/// destination has no outgoing edges, also update that block's count too. +void GCOVBlock::addCount(size_t DstEdgeNo, uint64_t N) { + assert(DstEdgeNo < DstEdges.size()); // up to caller to ensure EdgeNo is valid + DstEdges[DstEdgeNo]->Count = N; + Counter += N; + if (!DstEdges[DstEdgeNo]->Dst->getNumDstEdges()) + DstEdges[DstEdgeNo]->Dst->Counter += N; +} + +/// sortDstEdges - Sort destination edges by block number, nop if already +/// sorted. This is required for printing branch info in the correct order. +void GCOVBlock::sortDstEdges() { + if (!DstEdgesAreSorted) { + SortDstEdgesFunctor SortEdges; + std::stable_sort(DstEdges.begin(), DstEdges.end(), SortEdges); + } +} + /// collectLineCounts - Collect line counts. This must be used after /// reading .gcno and .gcda files. void GCOVBlock::collectLineCounts(FileInfo &FI) { for (SmallVectorImpl::iterator I = Lines.begin(), E = Lines.end(); I != E; ++I) - FI.addLineCount(Parent.getFilename(), *I, Counter); + FI.addBlockLine(Parent.getFilename(), *I, this); } /// dump - Dump GCOVBlock content to dbgs() for debugging purposes. -void GCOVBlock::dump() { +void GCOVBlock::dump() const { dbgs() << "Block : " << Number << " Counter : " << Counter << "\n"; - if (!Edges.empty()) { - dbgs() << "\tEdges : "; - for (SmallVectorImpl::iterator I = Edges.begin(), E = Edges.end(); - I != E; ++I) - dbgs() << (*I) << ","; + if (!SrcEdges.empty()) { + dbgs() << "\tSource Edges : "; + for (EdgeIterator I = SrcEdges.begin(), E = SrcEdges.end(); I != E; ++I) { + const GCOVEdge *Edge = *I; + dbgs() << Edge->Src->Number << " (" << Edge->Count << "), "; + } + dbgs() << "\n"; + } + if (!DstEdges.empty()) { + dbgs() << "\tDestination Edges : "; + for (EdgeIterator I = DstEdges.begin(), E = DstEdges.end(); I != E; ++I) { + const GCOVEdge *Edge = *I; + dbgs() << Edge->Dst->Number << " (" << Edge->Count << "), "; + } dbgs() << "\n"; } if (!Lines.empty()) { dbgs() << "\tLines : "; - for (SmallVectorImpl::iterator I = Lines.begin(), + for (SmallVectorImpl::const_iterator I = Lines.begin(), E = Lines.end(); I != E; ++I) dbgs() << (*I) << ","; dbgs() << "\n"; @@ -275,40 +381,298 @@ void GCOVBlock::dump() { //===----------------------------------------------------------------------===// // FileInfo implementation. +// Safe integer division, returns 0 if numerator is 0. +static uint32_t safeDiv(uint64_t Numerator, uint64_t Divisor) { + if (!Numerator) + return 0; + return Numerator/Divisor; +} + +// This custom division function mimics gcov's branch ouputs: +// - Round to closest whole number +// - Only output 0% or 100% if it's exactly that value +static uint32_t branchDiv(uint64_t Numerator, uint64_t Divisor) { + if (!Numerator) + return 0; + if (Numerator == Divisor) + return 100; + + uint8_t Res = (Numerator*100+Divisor/2) / Divisor; + if (Res == 0) + return 1; + if (Res == 100) + return 99; + return Res; +} + +struct formatBranchInfo { + formatBranchInfo(const GCOVOptions &Options, uint64_t Count, + uint64_t Total) : + Options(Options), Count(Count), Total(Total) {} + + void print(raw_ostream &OS) const { + if (!Total) + OS << "never executed"; + else if (Options.BranchCount) + OS << "taken " << Count; + else + OS << "taken " << branchDiv(Count, Total) << "%"; + } + + const GCOVOptions &Options; + uint64_t Count; + uint64_t Total; +}; + +static raw_ostream &operator<<(raw_ostream &OS, const formatBranchInfo &FBI) { + FBI.print(OS); + return OS; +} + /// print - Print source files with collected line count information. -void FileInfo::print(raw_fd_ostream &OS, StringRef gcnoFile, - StringRef gcdaFile) { - for (StringMap::iterator I = LineInfo.begin(), E = LineInfo.end(); - I != E; ++I) { +void FileInfo::print(StringRef GCNOFile, StringRef GCDAFile) { + for (StringMap::const_iterator I = LineInfo.begin(), + E = LineInfo.end(); I != E; ++I) { StringRef Filename = I->first(); - OS << " -: 0:Source:" << Filename << "\n"; - OS << " -: 0:Graph:" << gcnoFile << "\n"; - OS << " -: 0:Data:" << gcdaFile << "\n"; - OS << " -: 0:Runs:" << RunCount << "\n"; - OS << " -: 0:Programs:" << ProgramCount << "\n"; - LineCounts &L = LineInfo[Filename]; OwningPtr Buff; if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) { errs() << Filename << ": " << ec.message() << "\n"; return; } StringRef AllLines = Buff->getBuffer(); - uint32_t i = 0; - while (!AllLines.empty()) { - if (L.find(i) != L.end()) { - if (L[i] == 0) - OS << " #####:"; - else - OS << format("%9" PRIu64 ":", L[i]); - } else { - OS << " -:"; + + std::string CovFilename = Filename.str() + ".gcov"; + std::string ErrorInfo; + raw_fd_ostream OS(CovFilename.c_str(), ErrorInfo); + if (!ErrorInfo.empty()) + errs() << ErrorInfo << "\n"; + + OS << " -: 0:Source:" << Filename << "\n"; + OS << " -: 0:Graph:" << GCNOFile << "\n"; + OS << " -: 0:Data:" << GCDAFile << "\n"; + OS << " -: 0:Runs:" << RunCount << "\n"; + OS << " -: 0:Programs:" << ProgramCount << "\n"; + + const LineData &Line = I->second; + GCOVCoverage FileCoverage(Filename); + for (uint32_t LineIndex = 0; !AllLines.empty(); ++LineIndex) { + if (Options.BranchInfo) { + FunctionLines::const_iterator FuncsIt = Line.Functions.find(LineIndex); + if (FuncsIt != Line.Functions.end()) + printFunctionSummary(OS, FuncsIt->second); + } + + BlockLines::const_iterator BlocksIt = Line.Blocks.find(LineIndex); + if (BlocksIt == Line.Blocks.end()) { + // No basic blocks are on this line. Not an executable line of code. + OS << " -:"; + std::pair P = AllLines.split('\n'); + OS << format("%5u:", LineIndex+1) << P.first << "\n"; + AllLines = P.second; + } else { + const BlockVector &Blocks = BlocksIt->second; + + // Add up the block counts to form line counts. + DenseMap LineExecs; + uint64_t LineCount = 0; + for (BlockVector::const_iterator I = Blocks.begin(), E = Blocks.end(); + I != E; ++I) { + const GCOVBlock *Block = *I; + if (Options.AllBlocks) { + // Only take the highest block count for that line. + uint64_t BlockCount = Block->getCount(); + LineCount = LineCount > BlockCount ? LineCount : BlockCount; + } else { + // Sum up all of the block counts. + LineCount += Block->getCount(); + } + + if (Options.FuncCoverage) { + // This is a slightly convoluted way to most accurately gather line + // statistics for functions. Basically what is happening is that we + // don't want to count a single line with multiple blocks more than + // once. However, we also don't simply want to give the total line + // count to every function that starts on the line. Thus, what is + // happening here are two things: + // 1) Ensure that the number of logical lines is only incremented + // once per function. + // 2) If there are multiple blocks on the same line, ensure that the + // number of lines executed is incremented as long as at least + // one of the blocks are executed. + const GCOVFunction *Function = &Block->getParent(); + if (FuncCoverages.find(Function) == FuncCoverages.end()) { + std::pair + KeyValue(Function, GCOVCoverage(Function->getName())); + FuncCoverages.insert(KeyValue); + } + GCOVCoverage &FuncCoverage = FuncCoverages.find(Function)->second; + + if (LineExecs.find(Function) == LineExecs.end()) { + if (Block->getCount()) { + ++FuncCoverage.LinesExec; + LineExecs[Function] = true; + } else { + LineExecs[Function] = false; + } + ++FuncCoverage.LogicalLines; + } else if (!LineExecs[Function] && Block->getCount()) { + ++FuncCoverage.LinesExec; + LineExecs[Function] = true; + } + } + } + + if (LineCount == 0) + OS << " #####:"; + else { + OS << format("%9" PRIu64 ":", LineCount); + ++FileCoverage.LinesExec; + } + ++FileCoverage.LogicalLines; + + std::pair P = AllLines.split('\n'); + OS << format("%5u:", LineIndex+1) << P.first << "\n"; + AllLines = P.second; + + uint32_t BlockNo = 0; + uint32_t EdgeNo = 0; + for (BlockVector::const_iterator I = Blocks.begin(), E = Blocks.end(); + I != E; ++I) { + const GCOVBlock *Block = *I; + + // Only print block and branch information at the end of the block. + if (Block->getLastLine() != LineIndex+1) + continue; + if (Options.AllBlocks) + printBlockInfo(OS, *Block, LineIndex, BlockNo); + if (Options.BranchInfo) { + size_t NumEdges = Block->getNumDstEdges(); + if (NumEdges > 1) + printBranchInfo(OS, *Block, FileCoverage, EdgeNo); + else if (Options.UncondBranch && NumEdges == 1) + printUncondBranchInfo(OS, EdgeNo, (*Block->dst_begin())->Count); + } + } } - std::pair P = AllLines.split('\n'); - if (AllLines != P.first) - OS << format("%5u:", i+1) << P.first; - OS << "\n"; - AllLines = P.second; - ++i; } + FileCoverages.push_back(FileCoverage); + } + + // FIXME: There is no way to detect calls given current instrumentation. + if (Options.FuncCoverage) + printFuncCoverage(); + printFileCoverage(); +} + +/// printFunctionSummary - Print function and block summary. +void FileInfo::printFunctionSummary(raw_fd_ostream &OS, + const FunctionVector &Funcs) const { + for (FunctionVector::const_iterator I = Funcs.begin(), E = Funcs.end(); + I != E; ++I) { + const GCOVFunction *Func = *I; + uint64_t EntryCount = Func->getEntryCount(); + uint32_t BlocksExec = 0; + for (GCOVFunction::BlockIterator I = Func->block_begin(), + E = Func->block_end(); I != E; ++I) { + const GCOVBlock *Block = *I; + if (Block->getNumDstEdges() && Block->getCount()) + ++BlocksExec; + } + + OS << "function " << Func->getName() << " called " << EntryCount + << " returned " << safeDiv(Func->getExitCount()*100, EntryCount) + << "% blocks executed " + << safeDiv(BlocksExec*100, Func->getNumBlocks()-1) << "%\n"; + } +} + +/// printBlockInfo - Output counts for each block. +void FileInfo::printBlockInfo(raw_fd_ostream &OS, const GCOVBlock &Block, + uint32_t LineIndex, uint32_t &BlockNo) const { + if (Block.getCount() == 0) + OS << " $$$$$:"; + else + OS << format("%9" PRIu64 ":", Block.getCount()); + OS << format("%5u-block %2u\n", LineIndex+1, BlockNo++); +} + +/// printBranchInfo - Print conditional branch probabilities. +void FileInfo::printBranchInfo(raw_fd_ostream &OS, const GCOVBlock &Block, + GCOVCoverage &Coverage, uint32_t &EdgeNo) { + SmallVector BranchCounts; + uint64_t TotalCounts = 0; + for (GCOVBlock::EdgeIterator I = Block.dst_begin(), E = Block.dst_end(); + I != E; ++I) { + const GCOVEdge *Edge = *I; + BranchCounts.push_back(Edge->Count); + TotalCounts += Edge->Count; + if (Block.getCount()) ++Coverage.BranchesExec; + if (Edge->Count) ++Coverage.BranchesTaken; + ++Coverage.Branches; + + if (Options.FuncCoverage) { + const GCOVFunction *Function = &Block.getParent(); + GCOVCoverage &FuncCoverage = FuncCoverages.find(Function)->second; + if (Block.getCount()) ++FuncCoverage.BranchesExec; + if (Edge->Count) ++FuncCoverage.BranchesTaken; + ++FuncCoverage.Branches; + } + } + + for (SmallVectorImpl::const_iterator I = BranchCounts.begin(), + E = BranchCounts.end(); I != E; ++I) { + OS << format("branch %2u ", EdgeNo++) + << formatBranchInfo(Options, *I, TotalCounts) << "\n"; + } +} + +/// printUncondBranchInfo - Print unconditional branch probabilities. +void FileInfo::printUncondBranchInfo(raw_fd_ostream &OS, uint32_t &EdgeNo, + uint64_t Count) const { + OS << format("unconditional %2u ", EdgeNo++) + << formatBranchInfo(Options, Count, Count) << "\n"; +} + +// printCoverage - Print generic coverage info used by both printFuncCoverage +// and printFileCoverage. +void FileInfo::printCoverage(const GCOVCoverage &Coverage) const { + outs() << format("Lines executed:%.2f%% of %u\n", + double(Coverage.LinesExec)*100/Coverage.LogicalLines, + Coverage.LogicalLines); + if (Options.BranchInfo) { + if (Coverage.Branches) { + outs() << format("Branches executed:%.2f%% of %u\n", + double(Coverage.BranchesExec)*100/Coverage.Branches, + Coverage.Branches); + outs() << format("Taken at least once:%.2f%% of %u\n", + double(Coverage.BranchesTaken)*100/Coverage.Branches, + Coverage.Branches); + } else { + outs() << "No branches\n"; + } + outs() << "No calls\n"; // to be consistent with gcov + } +} + +// printFuncCoverage - Print per-function coverage info. +void FileInfo::printFuncCoverage() const { + for (MapVector::const_iterator I = + FuncCoverages.begin(), E = FuncCoverages.end(); I != E; ++I) { + const GCOVCoverage &Coverage = I->second; + outs() << "Function '" << Coverage.Name << "'\n"; + printCoverage(Coverage); + outs() << "\n"; + } +} + +// printFileCoverage - Print per-file coverage info. +void FileInfo::printFileCoverage() const { + for (SmallVectorImpl::const_iterator I = + FileCoverages.begin(), E = FileCoverages.end(); I != E; ++I) { + const GCOVCoverage &Coverage = *I; + outs() << "File '" << Coverage.Name << "'\n"; + printCoverage(Coverage); + outs() << Coverage.Name << ":creating '" << Coverage.Name + << ".gcov'\n\n"; } } diff --git a/external/bsd/llvm/dist/llvm/lib/IR/Globals.cpp b/external/bsd/llvm/dist/llvm/lib/IR/Globals.cpp index da3b02a0fa6d..cc42351f1009 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/Globals.cpp +++ b/external/bsd/llvm/dist/llvm/lib/IR/Globals.cpp @@ -237,7 +237,8 @@ GlobalValue *GlobalAlias::getAliasedGlobal() { return GV; ConstantExpr *CE = cast(C); - assert((CE->getOpcode() == Instruction::BitCast || + assert((CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::AddrSpaceCast || CE->getOpcode() == Instruction::GetElementPtr) && "Unsupported aliasee"); diff --git a/external/bsd/llvm/dist/llvm/lib/IR/Instructions.cpp b/external/bsd/llvm/dist/llvm/lib/IR/Instructions.cpp index 8a6b77ba37de..761f60063fab 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/Instructions.cpp +++ b/external/bsd/llvm/dist/llvm/lib/IR/Instructions.cpp @@ -1083,7 +1083,7 @@ void StoreInst::AssertOK() { cast(getOperand(1)->getType())->getElementType() && "Ptr must be a pointer to Val type!"); assert(!(isAtomic() && getAlignment() == 0) && - "Alignment required for atomic load"); + "Alignment required for atomic store"); } @@ -2206,7 +2206,7 @@ unsigned CastInst::isEliminableCastPair( case 3: // No-op cast in second op implies firstOp as long as the DestTy // is integer and we are not converting between a vector and a - // non vector type. + // non-vector type. if (!SrcTy->isVectorTy() && DstTy->isIntegerTy()) return firstOp; return 0; @@ -2823,7 +2823,7 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) { if (SrcTy->isPtrOrPtrVectorTy() != DstTy->isPtrOrPtrVectorTy()) return false; - // For non pointer cases, the cast is okay if the source and destination bit + // For non-pointer cases, the cast is okay if the source and destination bit // widths are identical. if (!SrcTy->isPtrOrPtrVectorTy()) return SrcTy->getPrimitiveSizeInBits() == DstTy->getPrimitiveSizeInBits(); diff --git a/external/bsd/llvm/dist/llvm/lib/IR/LLVMContext.cpp b/external/bsd/llvm/dist/llvm/lib/IR/LLVMContext.cpp index 883bb9878fa5..bae83dd30155 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/LLVMContext.cpp +++ b/external/bsd/llvm/dist/llvm/lib/IR/LLVMContext.cpp @@ -15,6 +15,8 @@ #include "llvm/IR/LLVMContext.h" #include "LLVMContextImpl.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/ManagedStatic.h" @@ -98,6 +100,20 @@ void *LLVMContext::getInlineAsmDiagnosticContext() const { return pImpl->InlineAsmDiagContext; } +void LLVMContext::setDiagnosticHandler(DiagnosticHandlerTy DiagnosticHandler, + void *DiagnosticContext) { + pImpl->DiagnosticHandler = DiagnosticHandler; + pImpl->DiagnosticContext = DiagnosticContext; +} + +LLVMContext::DiagnosticHandlerTy LLVMContext::getDiagnosticHandler() const { + return pImpl->DiagnosticHandler; +} + +void *LLVMContext::getDiagnosticContext() const { + return pImpl->DiagnosticContext; +} + void LLVMContext::emitError(const Twine &ErrorStr) { emitError(0U, ErrorStr); } @@ -112,6 +128,31 @@ void LLVMContext::emitError(const Instruction *I, const Twine &ErrorStr) { return emitError(LocCookie, ErrorStr); } +void LLVMContext::diagnose(const DiagnosticInfo &DI) { + // If there is a report handler, use it. + if (pImpl->DiagnosticHandler != 0) { + pImpl->DiagnosticHandler(DI, pImpl->DiagnosticContext); + return; + } + // Otherwise, print the message with a prefix based on the severity. + std::string MsgStorage; + raw_string_ostream Stream(MsgStorage); + DiagnosticPrinterRawOStream DP(Stream); + DI.print(DP); + Stream.flush(); + switch (DI.getSeverity()) { + case DS_Error: + errs() << "error: " << MsgStorage << "\n"; + exit(1); + case DS_Warning: + errs() << "warning: " << MsgStorage << "\n"; + break; + case DS_Note: + errs() << "note: " << MsgStorage << "\n"; + break; + } +} + void LLVMContext::emitError(unsigned LocCookie, const Twine &ErrorStr) { // If there is no error handler installed, just print the error and exit. if (pImpl->InlineAsmDiagHandler == 0) { diff --git a/external/bsd/llvm/dist/llvm/lib/IR/LLVMContextImpl.cpp b/external/bsd/llvm/dist/llvm/lib/IR/LLVMContextImpl.cpp index 6a6a4d6801f0..ebff9d3a51f6 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/LLVMContextImpl.cpp +++ b/external/bsd/llvm/dist/llvm/lib/IR/LLVMContextImpl.cpp @@ -37,6 +37,8 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C) Int64Ty(C, 64) { InlineAsmDiagHandler = 0; InlineAsmDiagContext = 0; + DiagnosticHandler = 0; + DiagnosticContext = 0; NamedStructTypesUniqueID = 0; } diff --git a/external/bsd/llvm/dist/llvm/lib/IR/LLVMContextImpl.h b/external/bsd/llvm/dist/llvm/lib/IR/LLVMContextImpl.h index 407b9856892a..39e5d778ed68 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/LLVMContextImpl.h +++ b/external/bsd/llvm/dist/llvm/lib/IR/LLVMContextImpl.h @@ -238,9 +238,12 @@ public: LLVMContext::InlineAsmDiagHandlerTy InlineAsmDiagHandler; void *InlineAsmDiagContext; - - typedef DenseMap IntMapTy; + + LLVMContext::DiagnosticHandlerTy DiagnosticHandler; + void *DiagnosticContext; + + typedef DenseMap IntMapTy; IntMapTy IntConstants; typedef DenseMap P = getToken(temp, "-"); @@ -84,7 +84,7 @@ Module::Endianness Module::getEndianness() const { /// Target Pointer Size information. Module::PointerSize Module::getPointerSize() const { StringRef temp = DataLayout; - Module::PointerSize ret = AnyPointerSize; + Module::PointerSize ret = Pointer64; while (!temp.empty()) { std::pair TmpP = getToken(temp, "-"); @@ -318,11 +318,16 @@ getModuleFlagsMetadata(SmallVectorImpl &Flags) const { for (unsigned i = 0, e = ModFlags->getNumOperands(); i != e; ++i) { MDNode *Flag = ModFlags->getOperand(i); - ConstantInt *Behavior = cast(Flag->getOperand(0)); - MDString *Key = cast(Flag->getOperand(1)); - Value *Val = Flag->getOperand(2); - Flags.push_back(ModuleFlagEntry(ModFlagBehavior(Behavior->getZExtValue()), - Key, Val)); + if (Flag->getNumOperands() >= 3 && isa(Flag->getOperand(0)) && + isa(Flag->getOperand(1))) { + // Check the operands of the MDNode before accessing the operands. + // The verifier will actually catch these failures. + ConstantInt *Behavior = cast(Flag->getOperand(0)); + MDString *Key = cast(Flag->getOperand(1)); + Value *Val = Flag->getOperand(2); + Flags.push_back(ModuleFlagEntry(ModFlagBehavior(Behavior->getZExtValue()), + Key, Val)); + } } } diff --git a/external/bsd/llvm/dist/llvm/lib/IR/PassManager.cpp b/external/bsd/llvm/dist/llvm/lib/IR/PassManager.cpp index 966af7debc77..30b46b01c1de 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/PassManager.cpp +++ b/external/bsd/llvm/dist/llvm/lib/IR/PassManager.cpp @@ -12,43 +12,121 @@ using namespace llvm; -void ModulePassManager::run() { - for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) - if (Passes[Idx]->run(M)) - if (AM) AM->invalidateAll(M); +PreservedAnalyses ModulePassManager::run(Module *M, ModuleAnalysisManager *AM) { + PreservedAnalyses PA = PreservedAnalyses::all(); + for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) { + PreservedAnalyses PassPA = Passes[Idx]->run(M, AM); + if (AM) + AM->invalidate(M, PassPA); + PA.intersect(llvm_move(PassPA)); + } + return PA; } -bool FunctionPassManager::run(Module *M) { - bool Changed = false; - for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I) - for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) - if (Passes[Idx]->run(I)) { - Changed = true; - if (AM) AM->invalidateAll(I); - } - return Changed; +const ModuleAnalysisManager::ResultConceptT & +ModuleAnalysisManager::getResultImpl(void *PassID, Module *M) { + ModuleAnalysisResultMapT::iterator RI; + bool Inserted; + llvm::tie(RI, Inserted) = ModuleAnalysisResults.insert(std::make_pair( + PassID, polymorphic_ptr >())); + + // If we don't have a cached result for this module, look up the pass and run + // it to produce a result, which we then add to the cache. + if (Inserted) + RI->second = lookupPass(PassID).run(M, this); + + return *RI->second; } -void AnalysisManager::invalidateAll(Function *F) { - assert(F->getParent() == M && "Invalidating a function from another module!"); +const ModuleAnalysisManager::ResultConceptT * +ModuleAnalysisManager::getCachedResultImpl(void *PassID, Module *M) const { + ModuleAnalysisResultMapT::const_iterator RI = ModuleAnalysisResults.find(PassID); + return RI == ModuleAnalysisResults.end() ? 0 : &*RI->second; +} - // First invalidate any module results we still have laying about. +void ModuleAnalysisManager::invalidateImpl(void *PassID, Module *M) { + ModuleAnalysisResults.erase(PassID); +} + +void ModuleAnalysisManager::invalidateImpl(Module *M, + const PreservedAnalyses &PA) { // FIXME: This is a total hack based on the fact that erasure doesn't // invalidate iteration for DenseMap. for (ModuleAnalysisResultMapT::iterator I = ModuleAnalysisResults.begin(), E = ModuleAnalysisResults.end(); I != E; ++I) - if (I->second->invalidate(M)) + if (I->second->invalidate(M, PA)) ModuleAnalysisResults.erase(I); +} - // Now clear all the invalidated results associated specifically with this +PreservedAnalyses FunctionPassManager::run(Function *F, FunctionAnalysisManager *AM) { + PreservedAnalyses PA = PreservedAnalyses::all(); + for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) { + PreservedAnalyses PassPA = Passes[Idx]->run(F, AM); + if (AM) + AM->invalidate(F, PassPA); + PA.intersect(llvm_move(PassPA)); + } + return PA; +} + +bool FunctionAnalysisManager::empty() const { + assert(FunctionAnalysisResults.empty() == + FunctionAnalysisResultLists.empty() && + "The storage and index of analysis results disagree on how many there " + "are!"); + return FunctionAnalysisResults.empty(); +} + +void FunctionAnalysisManager::clear() { + FunctionAnalysisResults.clear(); + FunctionAnalysisResultLists.clear(); +} + +const FunctionAnalysisManager::ResultConceptT & +FunctionAnalysisManager::getResultImpl(void *PassID, Function *F) { + FunctionAnalysisResultMapT::iterator RI; + bool Inserted; + llvm::tie(RI, Inserted) = FunctionAnalysisResults.insert(std::make_pair( + std::make_pair(PassID, F), FunctionAnalysisResultListT::iterator())); + + // If we don't have a cached result for this function, look up the pass and + // run it to produce a result, which we then add to the cache. + if (Inserted) { + FunctionAnalysisResultListT &ResultList = FunctionAnalysisResultLists[F]; + ResultList.push_back(std::make_pair(PassID, lookupPass(PassID).run(F, this))); + RI->second = llvm::prior(ResultList.end()); + } + + return *RI->second->second; +} + +const FunctionAnalysisManager::ResultConceptT * +FunctionAnalysisManager::getCachedResultImpl(void *PassID, Function *F) const { + FunctionAnalysisResultMapT::const_iterator RI = + FunctionAnalysisResults.find(std::make_pair(PassID, F)); + return RI == FunctionAnalysisResults.end() ? 0 : &*RI->second->second; +} + +void FunctionAnalysisManager::invalidateImpl(void *PassID, Function *F) { + FunctionAnalysisResultMapT::iterator RI = + FunctionAnalysisResults.find(std::make_pair(PassID, F)); + if (RI == FunctionAnalysisResults.end()) + return; + + FunctionAnalysisResultLists[F].erase(RI->second); +} + +void FunctionAnalysisManager::invalidateImpl(Function *F, + const PreservedAnalyses &PA) { + // Clear all the invalidated results associated specifically with this // function. SmallVector InvalidatedPassIDs; FunctionAnalysisResultListT &ResultsList = FunctionAnalysisResultLists[F]; for (FunctionAnalysisResultListT::iterator I = ResultsList.begin(), E = ResultsList.end(); I != E;) - if (I->second->invalidate(F)) { + if (I->second->invalidate(F, PA)) { InvalidatedPassIDs.push_back(I->first); I = ResultsList.erase(I); } else { @@ -59,99 +137,31 @@ void AnalysisManager::invalidateAll(Function *F) { std::make_pair(InvalidatedPassIDs.pop_back_val(), F)); } -void AnalysisManager::invalidateAll(Module *M) { - // First invalidate any module results we still have laying about. - // FIXME: This is a total hack based on the fact that erasure doesn't - // invalidate iteration for DenseMap. - for (ModuleAnalysisResultMapT::iterator I = ModuleAnalysisResults.begin(), - E = ModuleAnalysisResults.end(); - I != E; ++I) - if (I->second->invalidate(M)) - ModuleAnalysisResults.erase(I); +char FunctionAnalysisManagerModuleProxy::PassID; - // Now walk all of the functions for which there are cached results, and - // attempt to invalidate each of those as the entire module may have changed. - // FIXME: How do we handle functions which have been deleted or RAUWed? - SmallVector InvalidatedPassIDs; - for (FunctionAnalysisResultListMapT::iterator - FI = FunctionAnalysisResultLists.begin(), - FE = FunctionAnalysisResultLists.end(); - FI != FE; ++FI) { - Function *F = FI->first; - FunctionAnalysisResultListT &ResultsList = FI->second; - for (FunctionAnalysisResultListT::iterator I = ResultsList.begin(), - E = ResultsList.end(); - I != E;) - if (I->second->invalidate(F)) { - InvalidatedPassIDs.push_back(I->first); - I = ResultsList.erase(I); - } else { - ++I; - } - while (!InvalidatedPassIDs.empty()) - FunctionAnalysisResults.erase( - std::make_pair(InvalidatedPassIDs.pop_back_val(), F)); - } +FunctionAnalysisManagerModuleProxy::Result +FunctionAnalysisManagerModuleProxy::run(Module *M) { + assert(FAM.empty() && "Function analyses ran prior to the module proxy!"); + return Result(FAM); } -const AnalysisManager::AnalysisResultConcept & -AnalysisManager::getResultImpl(void *PassID, Module *M) { - assert(M == this->M && "Wrong module used when querying the AnalysisManager"); - ModuleAnalysisResultMapT::iterator RI; - bool Inserted; - llvm::tie(RI, Inserted) = ModuleAnalysisResults.insert(std::make_pair( - PassID, polymorphic_ptr >())); - - if (Inserted) { - // We don't have a cached result for this result. Look up the pass and run - // it to produce a result, which we then add to the cache. - ModuleAnalysisPassMapT::const_iterator PI = - ModuleAnalysisPasses.find(PassID); - assert(PI != ModuleAnalysisPasses.end() && - "Analysis passes must be registered prior to being queried!"); - RI->second = PI->second->run(M); - } - - return *RI->second; +FunctionAnalysisManagerModuleProxy::Result::~Result() { + // Clear out the analysis manager if we're being destroyed -- it means we + // didn't even see an invalidate call when we got invalidated. + FAM.clear(); } -const AnalysisManager::AnalysisResultConcept & -AnalysisManager::getResultImpl(void *PassID, Function *F) { - assert(F->getParent() == M && "Analyzing a function from another module!"); +bool FunctionAnalysisManagerModuleProxy::Result::invalidate( + Module *M, const PreservedAnalyses &PA) { + // If this proxy isn't marked as preserved, then we can't even invalidate + // individual function analyses, there may be an invalid set of Function + // objects in the cache making it impossible to incrementally preserve them. + // Just clear the entire manager. + if (!PA.preserved(ID())) + FAM.clear(); - FunctionAnalysisResultMapT::iterator RI; - bool Inserted; - llvm::tie(RI, Inserted) = FunctionAnalysisResults.insert(std::make_pair( - std::make_pair(PassID, F), FunctionAnalysisResultListT::iterator())); - - if (Inserted) { - // We don't have a cached result for this result. Look up the pass and run - // it to produce a result, which we then add to the cache. - FunctionAnalysisPassMapT::const_iterator PI = - FunctionAnalysisPasses.find(PassID); - assert(PI != FunctionAnalysisPasses.end() && - "Analysis passes must be registered prior to being queried!"); - FunctionAnalysisResultListT &ResultList = FunctionAnalysisResultLists[F]; - ResultList.push_back(std::make_pair(PassID, PI->second->run(F))); - RI->second = llvm::prior(ResultList.end()); - } - - return *RI->second->second; + // Return false to indicate that this result is still a valid proxy. + return false; } -void AnalysisManager::invalidateImpl(void *PassID, Module *M) { - assert(M == this->M && "Invalidating a pass over a different module!"); - ModuleAnalysisResults.erase(PassID); -} - -void AnalysisManager::invalidateImpl(void *PassID, Function *F) { - assert(F->getParent() == M && - "Invalidating a pass over a function from another module!"); - - FunctionAnalysisResultMapT::iterator RI = - FunctionAnalysisResults.find(std::make_pair(PassID, F)); - if (RI == FunctionAnalysisResults.end()) - return; - - FunctionAnalysisResultLists[F].erase(RI->second); -} +char ModuleAnalysisManagerFunctionProxy::PassID; diff --git a/external/bsd/llvm/dist/llvm/lib/IR/Type.cpp b/external/bsd/llvm/dist/llvm/lib/IR/Type.cpp index 432cbc99f5e6..b02509fcf35e 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/Type.cpp +++ b/external/bsd/llvm/dist/llvm/lib/IR/Type.cpp @@ -132,7 +132,7 @@ unsigned Type::getPrimitiveSizeInBits() const { /// getScalarSizeInBits - If this is a vector type, return the /// getPrimitiveSizeInBits value for the element type. Otherwise return the /// getPrimitiveSizeInBits value for this type. -unsigned Type::getScalarSizeInBits() { +unsigned Type::getScalarSizeInBits() const { return getScalarType()->getPrimitiveSizeInBits(); } @@ -155,20 +155,14 @@ int Type::getFPMantissaWidth() const { /// isSizedDerivedType - Derived types like structures and arrays are sized /// iff all of the members of the type are sized as well. Since asking for /// their size is relatively uncommon, move this operation out of line. -bool Type::isSizedDerivedType() const { - if (this->isIntegerTy()) - return true; - +bool Type::isSizedDerivedType(SmallPtrSet *Visited) const { if (const ArrayType *ATy = dyn_cast(this)) - return ATy->getElementType()->isSized(); + return ATy->getElementType()->isSized(Visited); if (const VectorType *VTy = dyn_cast(this)) - return VTy->getElementType()->isSized(); + return VTy->getElementType()->isSized(Visited); - if (!this->isStructTy()) - return false; - - return cast(this)->isSized(); + return cast(this)->isSized(Visited); } //===----------------------------------------------------------------------===// @@ -556,17 +550,20 @@ StructType *StructType::create(StringRef Name, Type *type, ...) { return llvm::StructType::create(Ctx, StructFields, Name); } -bool StructType::isSized() const { +bool StructType::isSized(SmallPtrSet *Visited) const { if ((getSubclassData() & SCDB_IsSized) != 0) return true; if (isOpaque()) return false; + if (Visited && !Visited->insert(this)) + return false; + // Okay, our struct is sized if all of the elements are, but if one of the // elements is opaque, the struct isn't sized *yet*, but may become sized in // the future, so just bail out without caching. for (element_iterator I = element_begin(), E = element_end(); I != E; ++I) - if (!(*I)->isSized()) + if (!(*I)->isSized(Visited)) return false; // Here we cheat a bit and cast away const-ness. The goal is to memoize when diff --git a/external/bsd/llvm/dist/llvm/lib/IR/Value.cpp b/external/bsd/llvm/dist/llvm/lib/IR/Value.cpp index 62a3b31c18b2..2c90383edd8c 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/Value.cpp +++ b/external/bsd/llvm/dist/llvm/lib/IR/Value.cpp @@ -182,6 +182,8 @@ void Value::setName(const Twine &NewName) { SmallString<256> NameData; StringRef NameRef = NewName.toStringRef(NameData); + assert(NameRef.find_first_of(0) == StringRef::npos && + "Null bytes are not allowed in names"); // Name isn't changing? if (getName() == NameRef) diff --git a/external/bsd/llvm/dist/llvm/lib/IR/Verifier.cpp b/external/bsd/llvm/dist/llvm/lib/IR/Verifier.cpp index da6b573a0c3c..44c66c9062f0 100644 --- a/external/bsd/llvm/dist/llvm/lib/IR/Verifier.cpp +++ b/external/bsd/llvm/dist/llvm/lib/IR/Verifier.cpp @@ -550,9 +550,11 @@ void Verifier::visitGlobalAlias(GlobalAlias &GA) { ConstantExpr *CE = dyn_cast(Aliasee); Assert1(CE && (CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::AddrSpaceCast || CE->getOpcode() == Instruction::GetElementPtr) && isa(CE->getOperand(0)), - "Aliasee should be either GlobalValue or bitcast of GlobalValue", + "Aliasee should be either GlobalValue, bitcast or " + "addrspacecast of GlobalValue", &GA); if (CE->getOpcode() == Instruction::BitCast) { @@ -813,26 +815,25 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty, !Attrs.hasAttribute(Idx, Attribute::Nest) && !Attrs.hasAttribute(Idx, Attribute::StructRet) && !Attrs.hasAttribute(Idx, Attribute::NoCapture) && - !Attrs.hasAttribute(Idx, Attribute::Returned), - "Attribute 'byval', 'nest', 'sret', 'nocapture', and 'returned' " - "do not apply to return values!", V); + !Attrs.hasAttribute(Idx, Attribute::Returned) && + !Attrs.hasAttribute(Idx, Attribute::InAlloca), + "Attributes 'byval', 'inalloca', 'nest', 'sret', 'nocapture', and " + "'returned' do not apply to return values!", V); - // Check for mutually incompatible attributes. - Assert1(!((Attrs.hasAttribute(Idx, Attribute::ByVal) && - Attrs.hasAttribute(Idx, Attribute::Nest)) || - (Attrs.hasAttribute(Idx, Attribute::ByVal) && - Attrs.hasAttribute(Idx, Attribute::StructRet)) || - (Attrs.hasAttribute(Idx, Attribute::Nest) && - Attrs.hasAttribute(Idx, Attribute::StructRet))), "Attributes " - "'byval, nest, and sret' are incompatible!", V); + // Check for mutually incompatible attributes. Only inreg is compatible with + // sret. + unsigned AttrCount = 0; + AttrCount += Attrs.hasAttribute(Idx, Attribute::ByVal); + AttrCount += Attrs.hasAttribute(Idx, Attribute::InAlloca); + AttrCount += Attrs.hasAttribute(Idx, Attribute::StructRet) || + Attrs.hasAttribute(Idx, Attribute::InReg); + AttrCount += Attrs.hasAttribute(Idx, Attribute::Nest); + Assert1(AttrCount <= 1, "Attributes 'byval', 'inalloca', 'inreg', 'nest', " + "and 'sret' are incompatible!", V); - Assert1(!((Attrs.hasAttribute(Idx, Attribute::ByVal) && - Attrs.hasAttribute(Idx, Attribute::Nest)) || - (Attrs.hasAttribute(Idx, Attribute::ByVal) && - Attrs.hasAttribute(Idx, Attribute::InReg)) || - (Attrs.hasAttribute(Idx, Attribute::Nest) && - Attrs.hasAttribute(Idx, Attribute::InReg))), "Attributes " - "'byval, nest, and inreg' are incompatible!", V); + Assert1(!(Attrs.hasAttribute(Idx, Attribute::InAlloca) && + Attrs.hasAttribute(Idx, Attribute::ReadOnly)), "Attributes " + "'inalloca and readonly' are incompatible!", V); Assert1(!(Attrs.hasAttribute(Idx, Attribute::StructRet) && Attrs.hasAttribute(Idx, Attribute::Returned)), "Attributes " @@ -855,14 +856,18 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty, "Wrong types for attribute: " + AttributeFuncs::typeIncompatible(Ty, Idx).getAsString(Idx), V); - if (PointerType *PTy = dyn_cast(Ty)) - Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) || - PTy->getElementType()->isSized(), - "Attribute 'byval' does not support unsized types!", V); - else + if (PointerType *PTy = dyn_cast(Ty)) { + if (!PTy->getElementType()->isSized()) { + Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) && + !Attrs.hasAttribute(Idx, Attribute::InAlloca), + "Attributes 'byval' and 'inalloca' do not support unsized types!", + V); + } + } else { Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal), "Attribute 'byval' only applies to parameters with pointer type!", V); + } } // VerifyFunctionAttrs - Check parameter attributes against a function type. @@ -1533,6 +1538,15 @@ void Verifier::VerifyCallSite(CallSite CS) { // Verify call attributes. VerifyFunctionAttrs(FTy, Attrs, I); + // Verify that values used for inalloca parameters are in fact allocas. + for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) { + if (!Attrs.hasAttribute(1 + i, Attribute::InAlloca)) + continue; + Value *Arg = CS.getArgument(i); + Assert2(isa(Arg), "Inalloca argument is not an alloca!", I, + Arg); + } + if (FTy->isVarArg()) { // FIXME? is 'nest' even legal here? bool SawNest = false; @@ -1861,14 +1875,31 @@ void Verifier::visitStoreInst(StoreInst &SI) { } void Verifier::visitAllocaInst(AllocaInst &AI) { + SmallPtrSet Visited; PointerType *PTy = AI.getType(); Assert1(PTy->getAddressSpace() == 0, "Allocation instruction pointer not in the generic address space!", &AI); - Assert1(PTy->getElementType()->isSized(), "Cannot allocate unsized type", + Assert1(PTy->getElementType()->isSized(&Visited), "Cannot allocate unsized type", &AI); Assert1(AI.getArraySize()->getType()->isIntegerTy(), "Alloca array size must have integer type", &AI); + + // Verify that an alloca instruction is not used with inalloca more than once. + unsigned InAllocaUses = 0; + for (User::use_iterator UI = AI.use_begin(), UE = AI.use_end(); UI != UE; + ++UI) { + CallSite CS(*UI); + if (!CS) + continue; + unsigned ArgNo = CS.getArgumentNo(UI); + if (CS.isInAllocaArgument(ArgNo)) { + InAllocaUses++; + Assert1(InAllocaUses <= 1, + "Allocas can be used at most once with inalloca!", &AI); + } + } + visitInstruction(AI); } diff --git a/external/bsd/llvm/dist/llvm/lib/LTO/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/LTO/LLVMBuild.txt index 38c1170b9e20..c9b5212da164 100644 --- a/external/bsd/llvm/dist/llvm/lib/LTO/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/LTO/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = LTO parent = Libraries -required_libraries = Analysis BitReader BitWriter Core IPO Linker MC MCParser Scalar Support Target Vectorize \ No newline at end of file +required_libraries = BitReader BitWriter Core IPA IPO InstCombine Linker MC MCParser ObjCARC Scalar Support Target TransformUtils diff --git a/external/bsd/llvm/dist/llvm/lib/LTO/LTOCodeGenerator.cpp b/external/bsd/llvm/dist/llvm/lib/LTO/LTOCodeGenerator.cpp index 2b3648e1f3bf..586b526d6094 100644 --- a/external/bsd/llvm/dist/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/external/bsd/llvm/dist/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -320,7 +320,7 @@ applyRestriction(GlobalValue &GV, SmallPtrSet &AsmUsed, Mangler &Mangler) { SmallString<64> Buffer; - Mangler.getNameWithPrefix(Buffer, &GV, false); + Mangler.getNameWithPrefix(Buffer, &GV); if (GV.isDeclaration()) return; @@ -387,7 +387,7 @@ void LTOCodeGenerator::applyScopeRestrictions() { passes.add(createVerifierPass()); // mark which symbols can not be internalized - Mangler Mangler(TargetMach); + Mangler Mangler(TargetMach->getDataLayout()); std::vector MustPreserveList; SmallPtrSet AsmUsed; std::vector Libcalls; @@ -460,6 +460,10 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out, // Add an appropriate DataLayout instance for this module... passes.add(new DataLayout(*TargetMach->getDataLayout())); + + // Add appropriate TargetLibraryInfo for this module. + passes.add(new TargetLibraryInfo(Triple(TargetMach->getTargetTriple()))); + TargetMach->addAnalysisPasses(passes); // Enabling internalize here would use its AllButMain variant. It diff --git a/external/bsd/llvm/dist/llvm/lib/LTO/LTOModule.cpp b/external/bsd/llvm/dist/llvm/lib/LTO/LTOModule.cpp index 65416bed9277..e4deb336c221 100644 --- a/external/bsd/llvm/dist/llvm/lib/LTO/LTOModule.cpp +++ b/external/bsd/llvm/dist/llvm/lib/LTO/LTOModule.cpp @@ -43,8 +43,12 @@ using namespace llvm; LTOModule::LTOModule(llvm::Module *m, llvm::TargetMachine *t) : _module(m), _target(t), - _context(_target->getMCAsmInfo(), _target->getRegisterInfo(), NULL), - _mangler(t) {} + _context(_target->getMCAsmInfo(), _target->getRegisterInfo(), &ObjFileInfo), + _mangler(t->getDataLayout()) { + ObjFileInfo.InitMCObjectFileInfo(t->getTargetTriple(), + t->getRelocationModel(), t->getCodeModel(), + _context); +} /// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM /// bitcode. @@ -360,7 +364,7 @@ void LTOModule::addDefinedSymbol(const GlobalValue *def, bool isFunction) { // string is owned by _defines SmallString<64> Buffer; - _mangler.getNameWithPrefix(Buffer, def, false); + _mangler.getNameWithPrefix(Buffer, def); // set alignment part log2() can have rounding errors uint32_t align = def->getAlignment(); @@ -496,7 +500,7 @@ LTOModule::addPotentialUndefinedSymbol(const GlobalValue *decl, bool isFunc) { return; SmallString<64> name; - _mangler.getNameWithPrefix(name, decl, false); + _mangler.getNameWithPrefix(name, decl); StringMap::value_type &entry = _undefines.GetOrCreateValue(name); diff --git a/external/bsd/llvm/dist/llvm/lib/MC/ELFObjectWriter.cpp b/external/bsd/llvm/dist/llvm/lib/MC/ELFObjectWriter.cpp index 9899bb2eac22..972c64cc5657 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/ELFObjectWriter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/ELFObjectWriter.cpp @@ -551,8 +551,7 @@ void ELFObjectWriter::WriteSymbol(MCDataFragment *SymtabF, // Other and Visibility share the same byte with Visibility using the lower // 2 bits uint8_t Visibility = MCELF::GetVisibility(OrigData); - uint8_t Other = MCELF::getOther(OrigData) << - (ELF_Other_Shift - ELF_STV_Shift); + uint8_t Other = MCELF::getOther(OrigData) << (ELF_STO_Shift - ELF_STV_Shift); Other |= Visibility; uint64_t Value = SymbolValue(Data, Layout); diff --git a/external/bsd/llvm/dist/llvm/lib/MC/MCAsmInfo.cpp b/external/bsd/llvm/dist/llvm/lib/MC/MCAsmInfo.cpp index 28f1c951641c..466a94d8e870 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/MCAsmInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/MCAsmInfo.cpp @@ -41,9 +41,7 @@ MCAsmInfo::MCAsmInfo() { CommentString = "#"; LabelSuffix = ":"; DebugLabelSuffix = ":"; - GlobalPrefix = ""; - PrivateGlobalPrefix = "."; - LinkerPrivateGlobalPrefix = ""; + PrivateGlobalPrefix = "L"; InlineAsmStart = "APP"; InlineAsmEnd = "NO_APP"; Code16Directive = ".code16"; @@ -76,8 +74,9 @@ MCAsmInfo::MCAsmInfo() { HasIdentDirective = false; HasNoDeadStrip = false; WeakRefDirective = 0; - WeakDefDirective = 0; - LinkOnceDirective = 0; + HasWeakDefDirective = false; + HasWeakDefCanBeHiddenDirective = false; + HasLinkOnceDirective = false; HiddenVisibilityAttr = MCSA_Hidden; HiddenDeclarationVisibilityAttr = MCSA_Hidden; ProtectedVisibilityAttr = MCSA_Protected; @@ -86,8 +85,8 @@ MCAsmInfo::MCAsmInfo() { ExceptionsType = ExceptionHandling::None; DwarfUsesRelocationsAcrossSections = true; DwarfRegNumForCFI = false; - HasMicrosoftFastStdCallMangling = false; NeedsDwarfSectionOffsetDirective = false; + UseParensForSymbolVariant = false; } MCAsmInfo::~MCAsmInfo() { diff --git a/external/bsd/llvm/dist/llvm/lib/MC/MCAsmInfoCOFF.cpp b/external/bsd/llvm/dist/llvm/lib/MC/MCAsmInfoCOFF.cpp index 9d9f98e72b96..f11227c6474c 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/MCAsmInfoCOFF.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/MCAsmInfoCOFF.cpp @@ -18,16 +18,14 @@ using namespace llvm; void MCAsmInfoCOFF::anchor() { } MCAsmInfoCOFF::MCAsmInfoCOFF() { - GlobalPrefix = "_"; // MingW 4.5 and later support .comm with log2 alignment, but .lcomm uses byte // alignment. COMMDirectiveAlignmentIsInBytes = false; LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment; HasDotTypeDotSizeDirective = false; HasSingleParameterDotFile = false; - PrivateGlobalPrefix = "L"; // Prefix for private global symbols WeakRefDirective = "\t.weak\t"; - LinkOnceDirective = "\t.linkonce discard\n"; + HasLinkOnceDirective = true; // Doesn't support visibility: HiddenVisibilityAttr = HiddenDeclarationVisibilityAttr = MCSA_Invalid; @@ -36,7 +34,6 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() { // Set up DWARF directives HasLEB128 = true; // Target asm supports leb128 directives (little-endian) SupportsDebugInformation = true; - HasMicrosoftFastStdCallMangling = true; NeedsDwarfSectionOffsetDirective = true; } diff --git a/external/bsd/llvm/dist/llvm/lib/MC/MCAsmInfoDarwin.cpp b/external/bsd/llvm/dist/llvm/lib/MC/MCAsmInfoDarwin.cpp index 704c8161f880..d5382e695384 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/MCAsmInfoDarwin.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/MCAsmInfoDarwin.cpp @@ -23,9 +23,6 @@ void MCAsmInfoDarwin::anchor() { } MCAsmInfoDarwin::MCAsmInfoDarwin() { // Common settings for all Darwin targets. // Syntax: - GlobalPrefix = "_"; - PrivateGlobalPrefix = "L"; - LinkerPrivateGlobalPrefix = "l"; HasSingleParameterDotFile = false; HasSubsectionsViaSymbols = true; @@ -36,7 +33,8 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { InlineAsmEnd = " InlineAsm End"; // Directives: - WeakDefDirective = "\t.weak_definition "; + HasWeakDefDirective = true; + HasWeakDefCanBeHiddenDirective = true; WeakRefDirective = "\t.weak_reference "; ZeroDirective = "\t.space\t"; // ".space N" emits N zeros. HasMachoZeroFillDirective = true; // Uses .zerofill diff --git a/external/bsd/llvm/dist/llvm/lib/MC/MCAsmInfoELF.cpp b/external/bsd/llvm/dist/llvm/lib/MC/MCAsmInfoELF.cpp index 8cf4e4fea233..ccb3dc3c6eda 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/MCAsmInfoELF.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/MCAsmInfoELF.cpp @@ -20,4 +20,5 @@ void MCAsmInfoELF::anchor() { } MCAsmInfoELF::MCAsmInfoELF() { HasIdentDirective = true; WeakRefDirective = "\t.weak\t"; + PrivateGlobalPrefix = ".L"; } diff --git a/external/bsd/llvm/dist/llvm/lib/MC/MCAsmStreamer.cpp b/external/bsd/llvm/dist/llvm/lib/MC/MCAsmStreamer.cpp index ca49f8f5908e..099ce3048db4 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/MCAsmStreamer.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/MCAsmStreamer.cpp @@ -160,6 +160,7 @@ public: virtual void EmitCOFFSymbolStorageClass(int StorageClass); virtual void EmitCOFFSymbolType(int Type); virtual void EndCOFFSymbolDef(); + virtual void EmitCOFFSectionIndex(MCSymbol const *Symbol); virtual void EmitCOFFSecRel32(MCSymbol const *Symbol); virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value); virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, @@ -505,8 +506,13 @@ void MCAsmStreamer::EndCOFFSymbolDef() { EmitEOL(); } +void MCAsmStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) { + OS << "\t.secidx\t" << *Symbol; + EmitEOL(); +} + void MCAsmStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) { - OS << "\t.secrel32\t" << *Symbol << '\n'; + OS << "\t.secrel32\t" << *Symbol; EmitEOL(); } diff --git a/external/bsd/llvm/dist/llvm/lib/MC/MCContext.cpp b/external/bsd/llvm/dist/llvm/lib/MC/MCContext.cpp index 3b45d1670d2c..42056c951ca5 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/MCContext.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/MCContext.cpp @@ -42,13 +42,13 @@ MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri, SrcMgr(mgr), MAI(mai), MRI(mri), MOFI(mofi), Allocator(), Symbols(Allocator), UsedNames(Allocator), NextUniqueID(0), - CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0), + CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0), DwarfLocSeen(false), GenDwarfForAssembly(false), GenDwarfFileNumber(0), AllowTemporaryLabels(true), DwarfCompileUnitID(0), AutoReset(DoAutoReset) { error_code EC = llvm::sys::fs::current_path(CompilationDir); - assert(!EC && "Could not determine the current directory"); - (void)EC; + if (EC) + CompilationDir.clear(); MachOUniquingMap = 0; ELFUniquingMap = 0; @@ -71,7 +71,7 @@ MCContext::~MCContext() { // NOTE: The symbols are all allocated out of a bump pointer allocator, // we don't need to free them here. - + // If the stream for the .secure_log_unique directive was created free it. delete (raw_ostream*)SecureLog; } @@ -138,7 +138,7 @@ MCSymbol *MCContext::CreateSymbol(StringRef Name) { StringMapEntry *NameEntry = &UsedNames.GetOrCreateValue(Name); if (NameEntry->getValue()) { - assert(isTemporary && "Cannot rename non temporary symbols"); + assert(isTemporary && "Cannot rename non-temporary symbols"); SmallString<128> NewName = Name; do { NewName.resize(Name.size()); @@ -157,8 +157,7 @@ MCSymbol *MCContext::CreateSymbol(StringRef Name) { MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) { SmallString<128> NameSV; - Name.toVector(NameSV); - return GetOrCreateSymbol(NameSV.str()); + return GetOrCreateSymbol(Name.toStringRef(NameSV)); } MCSymbol *MCContext::CreateTempSymbol() { diff --git a/external/bsd/llvm/dist/llvm/lib/MC/MCDisassembler/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/MC/MCDisassembler/LLVMBuild.txt index d73c6adcbb47..e4876cd18fff 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/MCDisassembler/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/MC/MCDisassembler/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = MCDisassembler parent = MC -required_libraries = MC MCParser Support +required_libraries = MC Support diff --git a/external/bsd/llvm/dist/llvm/lib/MC/MCDwarf.cpp b/external/bsd/llvm/dist/llvm/lib/MC/MCDwarf.cpp index 1e5c2e34c488..479f4452d4a3 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/MCDwarf.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/MCDwarf.cpp @@ -467,7 +467,8 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) { EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr); EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr); EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string); - EmitAbbrev(MCOS, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string); + if (!context.getCompilationDir().empty()) + EmitAbbrev(MCOS, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string); StringRef DwarfDebugFlags = context.getDwarfDebugFlags(); if (!DwarfDebugFlags.empty()) EmitAbbrev(MCOS, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string); @@ -643,8 +644,10 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. // AT_comp_dir, the working directory the assembly was done in. - MCOS->EmitBytes(context.getCompilationDir()); - MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. + if (!context.getCompilationDir().empty()) { + MCOS->EmitBytes(context.getCompilationDir()); + MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. + } // AT_APPLE_flags, the command line arguments of the assembler tool. StringRef DwarfDebugFlags = context.getDwarfDebugFlags(); diff --git a/external/bsd/llvm/dist/llvm/lib/MC/MCELF.cpp b/external/bsd/llvm/dist/llvm/lib/MC/MCELF.cpp index ebb189e5439e..0a9cd31dda00 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/MCELF.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/MCELF.cpp @@ -72,13 +72,13 @@ unsigned MCELF::GetVisibility(MCSymbolData &SD) { // Other is stored in the last six bits of st_other // st_other values are stored in the second byte of get/setFlags void MCELF::setOther(MCSymbolData &SD, unsigned Other) { - uint32_t OtherFlags = SD.getFlags() & ~(0x3f << ELF_Other_Shift); - SD.setFlags(OtherFlags | (Other << ELF_Other_Shift)); + uint32_t OtherFlags = SD.getFlags() & ~(0x3f << ELF_STO_Shift); + SD.setFlags(OtherFlags | (Other << ELF_STO_Shift)); } unsigned MCELF::getOther(MCSymbolData &SD) { unsigned Other = - (SD.getFlags() & (0x3f << ELF_Other_Shift)) >> ELF_Other_Shift; + (SD.getFlags() & (0x3f << ELF_STO_Shift)) >> ELF_STO_Shift; return Other; } diff --git a/external/bsd/llvm/dist/llvm/lib/MC/MCELFStreamer.cpp b/external/bsd/llvm/dist/llvm/lib/MC/MCELFStreamer.cpp index e806cb9f1c94..b6017859480f 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/MCELFStreamer.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/MCELFStreamer.cpp @@ -371,9 +371,6 @@ void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) { case MCSymbolRefExpr::VK_TLSLDM: case MCSymbolRefExpr::VK_TPOFF: case MCSymbolRefExpr::VK_DTPOFF: - case MCSymbolRefExpr::VK_ARM_TLSGD: - case MCSymbolRefExpr::VK_ARM_TPOFF: - case MCSymbolRefExpr::VK_ARM_GOTTPOFF: case MCSymbolRefExpr::VK_Mips_TLSGD: case MCSymbolRefExpr::VK_Mips_GOTTPREL: case MCSymbolRefExpr::VK_Mips_TPREL_HI: diff --git a/external/bsd/llvm/dist/llvm/lib/MC/MCExpr.cpp b/external/bsd/llvm/dist/llvm/lib/MC/MCExpr.cpp index c777e648bdc6..69cdfa354401 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/MCExpr.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/MCExpr.cpp @@ -11,6 +11,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -47,19 +48,12 @@ void MCExpr::print(raw_ostream &OS) const { else OS << Sym; - if (SRE.getKind() == MCSymbolRefExpr::VK_ARM_NONE || - SRE.getKind() == MCSymbolRefExpr::VK_ARM_PLT || - SRE.getKind() == MCSymbolRefExpr::VK_ARM_TLSGD || - SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOT || - SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTOFF || - SRE.getKind() == MCSymbolRefExpr::VK_ARM_TPOFF || - SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTTPOFF || - SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET1 || - SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET2 || - SRE.getKind() == MCSymbolRefExpr::VK_ARM_PREL31) - OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); - else if (SRE.getKind() != MCSymbolRefExpr::VK_None) - OS << '@' << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); + if (SRE.getKind() != MCSymbolRefExpr::VK_None) { + if (SRE.getMCAsmInfo().useParensForSymbolVariant()) + OS << '(' << MCSymbolRefExpr::getVariantKindName(SRE.getKind()) << ')'; + else + OS << '@' << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); + } return; } @@ -158,7 +152,7 @@ const MCConstantExpr *MCConstantExpr::Create(int64_t Value, MCContext &Ctx) { const MCSymbolRefExpr *MCSymbolRefExpr::Create(const MCSymbol *Sym, VariantKind Kind, MCContext &Ctx) { - return new (Ctx) MCSymbolRefExpr(Sym, Kind); + return new (Ctx) MCSymbolRefExpr(Sym, Kind, Ctx.getAsmInfo()); } const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, VariantKind Kind, @@ -186,16 +180,10 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_DTPOFF: return "DTPOFF"; case VK_TLVP: return "TLVP"; case VK_SECREL: return "SECREL32"; - case VK_ARM_NONE: return "(NONE)"; - case VK_ARM_PLT: return "(PLT)"; - case VK_ARM_GOT: return "(GOT)"; - case VK_ARM_GOTOFF: return "(GOTOFF)"; - case VK_ARM_TPOFF: return "(tpoff)"; - case VK_ARM_GOTTPOFF: return "(gottpoff)"; - case VK_ARM_TLSGD: return "(tlsgd)"; - case VK_ARM_TARGET1: return "(target1)"; - case VK_ARM_TARGET2: return "(target2)"; - case VK_ARM_PREL31: return "(prel31)"; + case VK_ARM_NONE: return "none"; + case VK_ARM_TARGET1: return "target1"; + case VK_ARM_TARGET2: return "target2"; + case VK_ARM_PREL31: return "prel31"; case VK_PPC_LO: return "l"; case VK_PPC_HI: return "h"; case VK_PPC_HA: return "ha"; @@ -409,6 +397,14 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) { .Case("got@tlsld@h", VK_PPC_GOT_TLSLD_HI) .Case("GOT@TLSLD@HA", VK_PPC_GOT_TLSLD_HA) .Case("got@tlsld@ha", VK_PPC_GOT_TLSLD_HA) + .Case("NONE", VK_ARM_NONE) + .Case("none", VK_ARM_NONE) + .Case("TARGET1", VK_ARM_TARGET1) + .Case("target1", VK_ARM_TARGET1) + .Case("TARGET2", VK_ARM_TARGET2) + .Case("target2", VK_ARM_TARGET2) + .Case("PREL31", VK_ARM_PREL31) + .Case("prel31", VK_ARM_PREL31) .Default(VK_Invalid); } diff --git a/external/bsd/llvm/dist/llvm/lib/MC/MCObjectFileInfo.cpp b/external/bsd/llvm/dist/llvm/lib/MC/MCObjectFileInfo.cpp index 8ef4a0a6d7b1..da7072824792 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/MCObjectFileInfo.cpp @@ -9,6 +9,7 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionCOFF.h" @@ -718,6 +719,17 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm, } } +const MCSection *MCObjectFileInfo::getDwarfTypesSection(uint64_t Hash) const { + return Ctx->getELFSection(".debug_types", ELF::SHT_PROGBITS, ELF::SHF_GROUP, + SectionKind::getMetadata(), 0, utostr(Hash)); +} + +const MCSection * +MCObjectFileInfo::getDwarfTypesDWOSection(uint64_t Hash) const { + return Ctx->getELFSection(".debug_types.dwo", ELF::SHT_GROUP, 0, + SectionKind::getMetadata(), 0, utostr(Hash)); +} + void MCObjectFileInfo::InitEHFrameSection() { if (Env == IsMachO) EHFrameSection = diff --git a/external/bsd/llvm/dist/llvm/lib/MC/MCParser/AsmLexer.cpp b/external/bsd/llvm/dist/llvm/lib/MC/MCParser/AsmLexer.cpp index b49dd0104793..ed98f93758e8 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -25,6 +25,7 @@ AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) { CurBuf = NULL; CurPtr = NULL; isAtStartOfLine = true; + AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); } AsmLexer::~AsmLexer() { @@ -139,8 +140,9 @@ AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) { } /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]* -static bool IsIdentifierChar(char c) { - return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@' || c == '?'; +static bool IsIdentifierChar(char c, bool AllowAt) { + return isalnum(c) || c == '_' || c == '$' || c == '.' || + (c == '@' && AllowAt) || c == '?'; } AsmToken AsmLexer::LexIdentifier() { // Check for floating point literals. @@ -148,11 +150,12 @@ AsmToken AsmLexer::LexIdentifier() { // Disambiguate a .1243foo identifier from a floating literal. while (isdigit(*CurPtr)) ++CurPtr; - if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr)) + if (*CurPtr == 'e' || *CurPtr == 'E' || + !IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) return LexFloatLiteral(); } - while (IsIdentifierChar(*CurPtr)) + while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) ++CurPtr; // Handle . as a special case. diff --git a/external/bsd/llvm/dist/llvm/lib/MC/MCParser/AsmParser.cpp b/external/bsd/llvm/dist/llvm/lib/MC/MCParser/AsmParser.cpp index a91bd93105b6..4b83144f3bd2 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/MCParser/AsmParser.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/AsmCond.h" #include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" @@ -357,7 +358,8 @@ private: DK_CFI_RESTORE, DK_CFI_ESCAPE, DK_CFI_SIGNAL_FRAME, DK_CFI_UNDEFINED, DK_CFI_REGISTER, DK_CFI_WINDOW_SAVE, DK_MACROS_ON, DK_MACROS_OFF, DK_MACRO, DK_ENDM, DK_ENDMACRO, DK_PURGEM, - DK_SLEB128, DK_ULEB128 + DK_SLEB128, DK_ULEB128, + DK_END }; /// \brief Maps directive name --> DirectiveKind enum, for @@ -451,7 +453,7 @@ private: MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc); void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, raw_svector_ostream &OS); - bool parseDirectiveRept(SMLoc DirectiveLoc); // ".rept" + bool parseDirectiveRept(SMLoc DirectiveLoc, StringRef Directive); bool parseDirectiveIrp(SMLoc DirectiveLoc); // ".irp" bool parseDirectiveIrpc(SMLoc DirectiveLoc); // ".irpc" bool parseDirectiveEndr(SMLoc DirectiveLoc); // ".endr" @@ -463,6 +465,9 @@ private: // "align" bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info); + // "end" + bool parseDirectiveEnd(SMLoc DirectiveLoc); + void initializeDirectiveKindMap(); }; } @@ -491,19 +496,20 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); // Initialize the platform / file format parser. - // - // FIXME: This is a hack, we need to (majorly) cleanup how these objects are - // created. - if (_MAI.hasMicrosoftFastStdCallMangling()) { - PlatformParser = createCOFFAsmParser(); - PlatformParser->Initialize(*this); - } else if (_MAI.hasSubsectionsViaSymbols()) { - PlatformParser = createDarwinAsmParser(); - PlatformParser->Initialize(*this); - IsDarwin = true; - } else { - PlatformParser = createELFAsmParser(); - PlatformParser->Initialize(*this); + switch (_Ctx.getObjectFileInfo()->getObjectFileType()) { + case MCObjectFileInfo::IsCOFF: + PlatformParser = createCOFFAsmParser(); + PlatformParser->Initialize(*this); + break; + case MCObjectFileInfo::IsMachO: + PlatformParser = createDarwinAsmParser(); + PlatformParser->Initialize(*this); + IsDarwin = true; + break; + case MCObjectFileInfo::IsELF: + PlatformParser = createELFAsmParser(); + PlatformParser->Initialize(*this); + break; } initializeDirectiveKindMap(); @@ -671,6 +677,10 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { } } + // Callback to the target parser in case it needs to do anything. + if (!HadError) + getTargetParser().finishParse(); + // Finalize the output stream if there are no errors and if the client wants // us to. if (!HadError && !NoFinalize) @@ -789,20 +799,34 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { return true; } } + // Parse symbol variant + std::pair Split; + if (!MAI.useParensForSymbolVariant()) { + Split = Identifier.split('@'); + } else if (Lexer.is(AsmToken::LParen)) { + Lexer.Lex(); // eat ( + StringRef VName; + parseIdentifier(VName); + if (Lexer.isNot(AsmToken::RParen)) { + return Error(Lexer.getTok().getLoc(), + "unexpected token in variant, expected ')'"); + } + Lexer.Lex(); // eat ) + Split = std::make_pair(Identifier, VName); + } EndLoc = SMLoc::getFromPointer(Identifier.end()); // This is a symbol reference. StringRef SymbolName = Identifier; MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; - std::pair Split = Identifier.split('@'); // Lookup the symbol variant if used. - if (Split.first.size() != Identifier.size()) { + if (Split.second.size()) { Variant = MCSymbolRefExpr::getVariantKindForName(Split.second); if (Variant != MCSymbolRefExpr::VK_Invalid) { SymbolName = Split.first; - } else if (MAI.doesAllowAtInName()) { + } else if (MAI.doesAllowAtInName() && !MAI.useParensForSymbolVariant()) { Variant = MCSymbolRefExpr::VK_None; } else { Variant = MCSymbolRefExpr::VK_None; @@ -1414,7 +1438,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info) { case DK_CODE16GCC: return TokError(Twine(IDVal) + " not supported yet"); case DK_REPT: - return parseDirectiveRept(IDLoc); + return parseDirectiveRept(IDLoc, IDVal); case DK_IRP: return parseDirectiveIrp(IDLoc); case DK_IRPC: @@ -1492,6 +1516,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info) { return parseDirectiveEndMacro(IDVal); case DK_PURGEM: return parseDirectivePurgeMacro(IDLoc); + case DK_END: + return parseDirectiveEnd(IDLoc); } return Error(IDLoc, "unknown directive"); @@ -3727,6 +3753,20 @@ bool AsmParser::parseDirectiveElse(SMLoc DirectiveLoc) { return false; } +/// parseDirectiveEnd +/// ::= .end +bool AsmParser::parseDirectiveEnd(SMLoc DirectiveLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.end' directive"); + + Lex(); + + while (Lexer.isNot(AsmToken::Eof)) + Lex(); + + return false; +} + /// parseDirectiveEndIf /// ::= .endif bool AsmParser::parseDirectiveEndIf(SMLoc DirectiveLoc) { @@ -3796,6 +3836,7 @@ void AsmParser::initializeDirectiveKindMap() { DirectiveKindMap[".code16"] = DK_CODE16; DirectiveKindMap[".code16gcc"] = DK_CODE16GCC; DirectiveKindMap[".rept"] = DK_REPT; + DirectiveKindMap[".rep"] = DK_REPT; DirectiveKindMap[".irp"] = DK_IRP; DirectiveKindMap[".irpc"] = DK_IRPC; DirectiveKindMap[".endr"] = DK_ENDR; @@ -3812,6 +3853,7 @@ void AsmParser::initializeDirectiveKindMap() { DirectiveKindMap[".ifnotdef"] = DK_IFNOTDEF; DirectiveKindMap[".elseif"] = DK_ELSEIF; DirectiveKindMap[".else"] = DK_ELSE; + DirectiveKindMap[".end"] = DK_END; DirectiveKindMap[".endif"] = DK_ENDIF; DirectiveKindMap[".skip"] = DK_SKIP; DirectiveKindMap[".space"] = DK_SPACE; @@ -3913,16 +3955,25 @@ void AsmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, Lex(); } -bool AsmParser::parseDirectiveRept(SMLoc DirectiveLoc) { +/// parseDirectiveRept +/// ::= .rep | .rept count +bool AsmParser::parseDirectiveRept(SMLoc DirectiveLoc, StringRef Dir) { + const MCExpr *CountExpr; + SMLoc CountLoc = getTok().getLoc(); + if (parseExpression(CountExpr)) + return true; + int64_t Count; - if (parseAbsoluteExpression(Count)) - return TokError("unexpected token in '.rept' directive"); + if (!CountExpr->EvaluateAsAbsolute(Count)) { + eatToEndOfStatement(); + return Error(CountLoc, "unexpected token in '" + Dir + "' directive"); + } if (Count < 0) - return TokError("Count is negative"); + return Error(CountLoc, "Count is negative"); if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.rept' directive"); + return TokError("unexpected token in '" + Dir + "' directive"); // Eat the end of statement. Lex(); @@ -4192,6 +4243,11 @@ bool AsmParser::parseMSInlineAsm( AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Start, SymName.size())); } } + + // Consider implicit defs to be clobbers. Think of cpuid and push. + const uint16_t *ImpDefs = Desc.getImplicitDefs(); + for (unsigned I = 0, E = Desc.getNumImplicitDefs(); I != E; ++I) + ClobberRegs.push_back(ImpDefs[I]); } // Set the number of Outputs and Inputs. diff --git a/external/bsd/llvm/dist/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/external/bsd/llvm/dist/llvm/lib/MC/MCParser/COFFAsmParser.cpp index d8343a3eea1b..cc356c7a1748 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/MCParser/COFFAsmParser.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/MCParser/COFFAsmParser.cpp @@ -55,6 +55,7 @@ class COFFAsmParser : public MCAsmParserExtension { addDirectiveHandler<&COFFAsmParser::ParseDirectiveType>(".type"); addDirectiveHandler<&COFFAsmParser::ParseDirectiveEndef>(".endef"); addDirectiveHandler<&COFFAsmParser::ParseDirectiveSecRel32>(".secrel32"); + addDirectiveHandler<&COFFAsmParser::ParseDirectiveSecIdx>(".secidx"); addDirectiveHandler<&COFFAsmParser::ParseDirectiveLinkOnce>(".linkonce"); // Win64 EH directives. @@ -115,6 +116,7 @@ class COFFAsmParser : public MCAsmParserExtension { bool ParseDirectiveType(StringRef, SMLoc); bool ParseDirectiveEndef(StringRef, SMLoc); bool ParseDirectiveSecRel32(StringRef, SMLoc); + bool ParseDirectiveSecIdx(StringRef, SMLoc); bool parseCOMDATTypeAndAssoc(COFF::COMDATType &Type, const MCSectionCOFF *&Assoc); bool ParseDirectiveLinkOnce(StringRef, SMLoc); @@ -432,7 +434,7 @@ bool COFFAsmParser::ParseDirectiveEndef(StringRef, SMLoc) { bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) { StringRef SymbolID; if (getParser().parseIdentifier(SymbolID)) - return true; + return TokError("expected identifier in directive"); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); @@ -444,6 +446,21 @@ bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) { return false; } +bool COFFAsmParser::ParseDirectiveSecIdx(StringRef, SMLoc) { + StringRef SymbolID; + if (getParser().parseIdentifier(SymbolID)) + return TokError("expected identifier in directive"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + MCSymbol *Symbol = getContext().GetOrCreateSymbol(SymbolID); + + Lex(); + getStreamer().EmitCOFFSectionIndex(Symbol); + return false; +} + /// ::= [ identifier [ identifier ] ] bool COFFAsmParser::parseCOMDATTypeAndAssoc(COFF::COMDATType &Type, const MCSectionCOFF *&Assoc) { diff --git a/external/bsd/llvm/dist/llvm/lib/MC/MCSectionCOFF.cpp b/external/bsd/llvm/dist/llvm/lib/MC/MCSectionCOFF.cpp index 64aa2c5c49ea..ad9ca8840f4f 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/MCSectionCOFF.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/MCSectionCOFF.cpp @@ -20,6 +20,8 @@ MCSectionCOFF::~MCSectionCOFF() {} // anchor. // should be printed before the section name bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const { + if (COMDATSymbol) + return false; // FIXME: Does .section .bss/.data/.text work everywhere?? if (Name == ".text" || Name == ".data" || Name == ".bss") @@ -52,42 +54,49 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, OS << "\t.section\t" << getSectionName() << ",\""; if (getKind().isText()) OS << 'x'; + else if (getKind().isBSS()) + OS << 'b'; if (getKind().isWriteable()) OS << 'w'; else OS << 'r'; if (getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE) OS << 'n'; - OS << "\"\n"; + + OS << '"'; if (getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) { + OS << ","; switch (Selection) { case COFF::IMAGE_COMDAT_SELECT_NODUPLICATES: - OS << "\t.linkonce one_only\n"; + OS << "one_only,"; break; case COFF::IMAGE_COMDAT_SELECT_ANY: - OS << "\t.linkonce discard\n"; + OS << "discard,"; break; case COFF::IMAGE_COMDAT_SELECT_SAME_SIZE: - OS << "\t.linkonce same_size\n"; + OS << "same_size,"; break; case COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH: - OS << "\t.linkonce same_contents\n"; + OS << "same_contents,"; break; case COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE: - OS << "\t.linkonce associative " << Assoc->getSectionName() << "\n"; + OS << "associative " << Assoc->getSectionName() << ","; break; case COFF::IMAGE_COMDAT_SELECT_LARGEST: - OS << "\t.linkonce largest\n"; + OS << "largest,"; break; case COFF::IMAGE_COMDAT_SELECT_NEWEST: - OS << "\t.linkonce newest\n"; + OS << "newest,"; break; default: assert (0 && "unsupported COFF selection type"); break; } + assert(COMDATSymbol); + OS << *COMDATSymbol; } + OS << '\n'; } bool MCSectionCOFF::UseCodeAlign() const { diff --git a/external/bsd/llvm/dist/llvm/lib/MC/MCSectionMachO.cpp b/external/bsd/llvm/dist/llvm/lib/MC/MCSectionMachO.cpp index 870451313bb1..d91bfe25a9ab 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/MCSectionMachO.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/MCSectionMachO.cpp @@ -68,7 +68,7 @@ ENTRY(0 /*FIXME*/, S_ATTR_EXT_RELOC) ENTRY(0 /*FIXME*/, S_ATTR_LOC_RELOC) #undef ENTRY { 0, "none", 0 }, // used if section has no attributes but has a stub size -#define AttrFlagEnd 0xffffffff // non legal value, multiple attribute bits set +#define AttrFlagEnd 0xffffffff // non-legal value, multiple attribute bits set { AttrFlagEnd, 0, 0 } }; diff --git a/external/bsd/llvm/dist/llvm/lib/MC/MCStreamer.cpp b/external/bsd/llvm/dist/llvm/lib/MC/MCStreamer.cpp index 2e1d69b77f6f..22e1d47b0000 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/MCStreamer.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/MCStreamer.cpp @@ -566,6 +566,10 @@ void MCStreamer::EmitWin64EHEndProlog() { EmitLabel(CurFrame->PrologEnd); } +void MCStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) { + llvm_unreachable("This file format doesn't support this directive"); +} + void MCStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) { llvm_unreachable("This file format doesn't support this directive"); } diff --git a/external/bsd/llvm/dist/llvm/lib/MC/MachObjectWriter.cpp b/external/bsd/llvm/dist/llvm/lib/MC/MachObjectWriter.cpp index 8234affc54da..4143d783e29a 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/MachObjectWriter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/MachObjectWriter.cpp @@ -446,7 +446,7 @@ void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) { } } - // Bind non lazy symbol pointers first. + // Bind non-lazy symbol pointers first. unsigned IndirectIndex = 0; for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) { @@ -917,7 +917,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, for (MCAssembler::const_indirect_symbol_iterator it = Asm.indirect_symbol_begin(), ie = Asm.indirect_symbol_end(); it != ie; ++it) { - // Indirect symbols in the non lazy symbol pointer section have some + // Indirect symbols in the non-lazy symbol pointer section have some // special handling. const MCSectionMachO &Section = static_cast(it->SectionData->getSection()); diff --git a/external/bsd/llvm/dist/llvm/lib/MC/WinCOFFObjectWriter.cpp b/external/bsd/llvm/dist/llvm/lib/MC/WinCOFFObjectWriter.cpp index d9ca86d8af93..6d270209b744 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/WinCOFFObjectWriter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/WinCOFFObjectWriter.cpp @@ -351,7 +351,7 @@ object_t *WinCOFFObjectWriter::createCOFFEntity(StringRef Name, /// and creates the associated COFF section staging object. void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) { assert(SectionData.getSection().getVariant() == MCSection::SV_COFF - && "Got non COFF section in the COFF backend!"); + && "Got non-COFF section in the COFF backend!"); // FIXME: Not sure how to verify this (at least in a debug build). MCSectionCOFF const &Sec = static_cast(SectionData.getSection()); @@ -845,7 +845,8 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, Header.PointerToSymbolTable = offset; - Header.TimeDateStamp = sys::TimeValue::now().toEpochTime(); + // We want a deterministic output. It looks like GNU as also writes 0 in here. + Header.TimeDateStamp = 0; // Write it all to disk... WriteFileHeader(Header); diff --git a/external/bsd/llvm/dist/llvm/lib/MC/WinCOFFStreamer.cpp b/external/bsd/llvm/dist/llvm/lib/MC/WinCOFFStreamer.cpp index 5b5aad7a79ea..cb4e9b1c37b9 100644 --- a/external/bsd/llvm/dist/llvm/lib/MC/WinCOFFStreamer.cpp +++ b/external/bsd/llvm/dist/llvm/lib/MC/WinCOFFStreamer.cpp @@ -61,6 +61,7 @@ public: virtual void EmitCOFFSymbolStorageClass(int StorageClass); virtual void EmitCOFFSymbolType(int Type); virtual void EndCOFFSymbolDef(); + virtual void EmitCOFFSectionIndex(MCSymbol const *Symbol); virtual void EmitCOFFSecRel32(MCSymbol const *Symbol); virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value); virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, @@ -94,36 +95,39 @@ private: DF->getContents().append(Code.begin(), Code.end()); } - void SetSection(StringRef Section, - unsigned Characteristics, - SectionKind Kind) { - SwitchSection(getContext().getCOFFSection(Section, Characteristics, Kind)); + const MCSectionCOFF *getSectionText() { + return getContext().getCOFFSection( + ".text", COFF::IMAGE_SCN_CNT_CODE | COFF::IMAGE_SCN_MEM_EXECUTE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getText()); + } + + const MCSectionCOFF *getSectionData() { + return getContext().getCOFFSection( + ".data", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); + } + + const MCSectionCOFF *getSectionBSS() { + return getContext().getCOFFSection( + ".bss", COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getBSS()); } void SetSectionText() { - SetSection(".text", - COFF::IMAGE_SCN_CNT_CODE - | COFF::IMAGE_SCN_MEM_EXECUTE - | COFF::IMAGE_SCN_MEM_READ, - SectionKind::getText()); + SwitchSection(getSectionText()); EmitCodeAlignment(4, 0); } void SetSectionData() { - SetSection(".data", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA - | COFF::IMAGE_SCN_MEM_READ - | COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getDataRel()); + SwitchSection(getSectionData()); EmitCodeAlignment(4, 0); } void SetSectionBSS() { - SetSection(".bss", - COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA - | COFF::IMAGE_SCN_MEM_READ - | COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getBSS()); + SwitchSection(getSectionBSS()); EmitCodeAlignment(4, 0); } }; @@ -137,28 +141,12 @@ void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment, bool External) { assert(!Symbol->isInSection() && "Symbol must not already have a section!"); - std::string SectionName(".bss$linkonce"); - SectionName.append(Symbol->getName().begin(), Symbol->getName().end()); - - MCSymbolData &SymbolData = getAssembler().getOrCreateSymbolData(*Symbol); - - unsigned Characteristics = - COFF::IMAGE_SCN_LNK_COMDAT | - COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE; - - int Selection = COFF::IMAGE_COMDAT_SELECT_LARGEST; - - const MCSection *Section = MCStreamer::getContext().getCOFFSection( - SectionName, Characteristics, SectionKind::getBSS(), Symbol->getName(), - Selection); - + const MCSectionCOFF *Section = getSectionBSS(); MCSectionData &SectionData = getAssembler().getOrCreateSectionData(*Section); - if (SectionData.getAlignment() < ByteAlignment) SectionData.setAlignment(ByteAlignment); + MCSymbolData &SymbolData = getAssembler().getOrCreateSymbolData(*Symbol); SymbolData.setExternal(External); AssignSection(Symbol, Section); @@ -203,7 +191,7 @@ bool WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, assert(Symbol && "Symbol must be non-null!"); assert((Symbol->isInSection() ? Symbol->getSection().getVariant() == MCSection::SV_COFF - : true) && "Got non COFF section in the COFF backend!"); + : true) && "Got non-COFF section in the COFF backend!"); switch (Attribute) { case MCSA_WeakReference: case MCSA_Weak: { @@ -231,7 +219,7 @@ void WinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { void WinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) { assert((Symbol->isInSection() ? Symbol->getSection().getVariant() == MCSection::SV_COFF - : true) && "Got non COFF section in the COFF backend!"); + : true) && "Got non-COFF section in the COFF backend!"); assert(CurSymbol == NULL && "EndCOFFSymbolDef must be called between calls " "to BeginCOFFSymbolDef!"); CurSymbol = Symbol; @@ -262,14 +250,19 @@ void WinCOFFStreamer::EndCOFFSymbolDef() { CurSymbol = NULL; } -void WinCOFFStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) -{ +void WinCOFFStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) { MCDataFragment *DF = getOrCreateDataFragment(); + DF->getFixups().push_back(MCFixup::Create( + DF->getContents().size(), MCSymbolRefExpr::Create(Symbol, getContext()), + FK_SecRel_2)); + DF->getContents().resize(DF->getContents().size() + 4, 0); +} - DF->getFixups().push_back( - MCFixup::Create(DF->getContents().size(), - MCSymbolRefExpr::Create (Symbol, getContext ()), - FK_SecRel_4)); +void WinCOFFStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) { + MCDataFragment *DF = getOrCreateDataFragment(); + DF->getFixups().push_back(MCFixup::Create( + DF->getContents().size(), MCSymbolRefExpr::Create(Symbol, getContext()), + FK_SecRel_4)); DF->getContents().resize(DF->getContents().size() + 4, 0); } @@ -281,7 +274,7 @@ void WinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { assert((Symbol->isInSection() ? Symbol->getSection().getVariant() == MCSection::SV_COFF - : true) && "Got non COFF section in the COFF backend!"); + : true) && "Got non-COFF section in the COFF backend!"); AddCommonSymbol(Symbol, Size, ByteAlignment, true); } @@ -289,7 +282,7 @@ void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { assert((Symbol->isInSection() ? Symbol->getSection().getVariant() == MCSection::SV_COFF - : true) && "Got non COFF section in the COFF backend!"); + : true) && "Got non-COFF section in the COFF backend!"); AddCommonSymbol(Symbol, Size, ByteAlignment, false); } diff --git a/external/bsd/llvm/dist/llvm/lib/Object/ELF.cpp b/external/bsd/llvm/dist/llvm/lib/Object/ELF.cpp index 7c80d41942f9..c39e1e1805cc 100644 --- a/external/bsd/llvm/dist/llvm/lib/Object/ELF.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Object/ELF.cpp @@ -170,6 +170,8 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_GOT_DISP); LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_GOT_PAGE); LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_GOT_OFST); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_TLS_GD); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_TLS_LDM); LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_TLS_DTPREL_HI16); LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_TLS_DTPREL_LO16); LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_TLS_TPREL_HI16); diff --git a/external/bsd/llvm/dist/llvm/lib/Object/MachOObjectFile.cpp b/external/bsd/llvm/dist/llvm/lib/Object/MachOObjectFile.cpp index d2cb8bde6d24..dc0f9ff6b463 100644 --- a/external/bsd/llvm/dist/llvm/lib/Object/MachOObjectFile.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Object/MachOObjectFile.cpp @@ -899,7 +899,7 @@ MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, "GENERIC_RELOC_LOCAL_SECTDIFF", "GENERIC_RELOC_TLV" }; - if (RType > 6) + if (RType > 5) res = "Unknown"; else res = Table[RType]; @@ -962,7 +962,10 @@ MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, "PPC_RELOC_LO14_SECTDIFF", "PPC_RELOC_LOCAL_SECTDIFF" }; - res = Table[RType]; + if (RType > 15) + res = "Unknown"; + else + res = Table[RType]; break; } case Triple::UnknownArch: diff --git a/external/bsd/llvm/dist/llvm/lib/Option/ArgList.cpp b/external/bsd/llvm/dist/llvm/lib/Option/ArgList.cpp index 15f7e8bf4b82..fecd23717359 100644 --- a/external/bsd/llvm/dist/llvm/lib/Option/ArgList.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Option/ArgList.cpp @@ -291,8 +291,7 @@ void ArgList::ClaimAllArgs() const { const char *ArgList::MakeArgString(const Twine &T) const { SmallString<256> Str; - T.toVector(Str); - return MakeArgString(Str.str()); + return MakeArgString(T.toStringRef(Str)); } const char *ArgList::GetOrMakeJoinedArgString(unsigned Index, diff --git a/external/bsd/llvm/dist/llvm/lib/Support/APFloat.cpp b/external/bsd/llvm/dist/llvm/lib/Support/APFloat.cpp index 676e2d4ba007..802233c10999 100644 --- a/external/bsd/llvm/dist/llvm/lib/Support/APFloat.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Support/APFloat.cpp @@ -3816,7 +3816,7 @@ APFloat::opStatus APFloat::next(bool nextDown) { // Decrement the significand. // // We always do this since: - // 1. If we are dealing with a non binade decrement, by definition we + // 1. If we are dealing with a non-binade decrement, by definition we // just decrement the significand. // 2. If we are dealing with a normal -> normal binade decrement, since // we have an explicit integral bit the fact that all bits but the diff --git a/external/bsd/llvm/dist/llvm/lib/Support/BlockFrequency.cpp b/external/bsd/llvm/dist/llvm/lib/Support/BlockFrequency.cpp index 00efe90a2607..00cf75bd5cf3 100644 --- a/external/bsd/llvm/dist/llvm/lib/Support/BlockFrequency.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Support/BlockFrequency.cpp @@ -145,28 +145,19 @@ BlockFrequency::operator+(const BlockFrequency &Prob) const { return Freq; } +BlockFrequency &BlockFrequency::operator>>=(const unsigned count) { + // Frequency can never be 0 by design. + assert(Frequency != 0); + + // Shift right by count. + Frequency >>= count; + + // Saturate to 1 if we are 0. + Frequency |= Frequency == 0; + return *this; +} + uint32_t BlockFrequency::scale(const BranchProbability &Prob) { return scale(Prob.getNumerator(), Prob.getDenominator()); } -void BlockFrequency::print(raw_ostream &OS) const { - // Convert fixed-point number to decimal. - OS << Frequency / getEntryFrequency() << "."; - uint64_t Rem = Frequency % getEntryFrequency(); - uint64_t Eps = 1; - do { - Rem *= 10; - Eps *= 10; - OS << Rem / getEntryFrequency(); - Rem = Rem % getEntryFrequency(); - } while (Rem >= Eps/2); -} - -namespace llvm { - -raw_ostream &operator<<(raw_ostream &OS, const BlockFrequency &Freq) { - Freq.print(OS); - return OS; -} - -} diff --git a/external/bsd/llvm/dist/llvm/lib/Support/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Support/CMakeLists.txt index 3aecf3ffa4ea..cf4edff632ea 100644 --- a/external/bsd/llvm/dist/llvm/lib/Support/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Support/CMakeLists.txt @@ -30,6 +30,7 @@ add_llvm_library(LLVMSupport IntrusiveRefCntPtr.cpp IsInf.cpp IsNAN.cpp + LineIterator.cpp Locale.cpp LockFileManager.cpp ManagedStatic.cpp diff --git a/external/bsd/llvm/dist/llvm/lib/Support/CommandLine.cpp b/external/bsd/llvm/dist/llvm/lib/Support/CommandLine.cpp index 44a88d81e3a0..7ed4dead041e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Support/CommandLine.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Support/CommandLine.cpp @@ -634,7 +634,7 @@ static bool ExpandResponseFile(const char *FName, StringSaver &Saver, bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, SmallVectorImpl &Argv) { unsigned RspFiles = 0; - bool AllExpanded = false; + bool AllExpanded = true; // Don't cache Argv.size() because it can change. for (unsigned I = 0; I != Argv.size(); ) { @@ -655,7 +655,10 @@ bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, // the cwd of the process or the response file? SmallVector ExpandedArgv; if (!ExpandResponseFile(Arg + 1, Saver, Tokenizer, ExpandedArgv)) { + // We couldn't read this file, so we leave it in the argument stream and + // move on. AllExpanded = false; + ++I; continue; } Argv.erase(Argv.begin() + I); diff --git a/external/bsd/llvm/dist/llvm/lib/Support/Dwarf.cpp b/external/bsd/llvm/dist/llvm/lib/Support/Dwarf.cpp index c000b636cc11..6604cc73fae4 100644 --- a/external/bsd/llvm/dist/llvm/lib/Support/Dwarf.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Support/Dwarf.cpp @@ -84,6 +84,9 @@ const char *llvm::dwarf::TagString(unsigned Tag) { case DW_TAG_arg_variable: return "DW_TAG_arg_variable"; case DW_TAG_rvalue_reference_type: return "DW_TAG_rvalue_reference_type"; case DW_TAG_template_alias: return "DW_TAG_template_alias"; + case DW_TAG_coarray_type: return "DW_TAG_coarray_type"; + case DW_TAG_generic_subrange: return "DW_TAG_generic_subrange"; + case DW_TAG_dynamic_type: return "DW_TAG_dynamic_type"; case DW_TAG_MIPS_loop: return "DW_TAG_MIPS_loop"; case DW_TAG_type_unit: return "DW_TAG_type_unit"; case DW_TAG_format_label: return "DW_TAG_format_label"; @@ -206,6 +209,16 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) { case DW_AT_const_expr: return "DW_AT_const_expr"; case DW_AT_enum_class: return "DW_AT_enum_class"; case DW_AT_linkage_name: return "DW_AT_linkage_name"; + case DW_AT_string_length_bit_size: return "DW_AT_string_length_bit_size"; + case DW_AT_string_length_byte_size: return "DW_AT_string_length_byte_size"; + case DW_AT_rank: return "DW_AT_rank"; + case DW_AT_str_offsets_base: return "DW_AT_str_offsets_base"; + case DW_AT_addr_base: return "DW_AT_addr_base"; + case DW_AT_ranges_base: return "DW_AT_ranges_base"; + case DW_AT_dwo_id: return "DW_AT_dwo_id"; + case DW_AT_dwo_name: return "DW_AT_dwo_name"; + case DW_AT_reference: return "DW_AT_reference"; + case DW_AT_rvalue_reference: return "DW_AT_rvalue_reference"; case DW_AT_MIPS_loop_begin: return "DW_AT_MIPS_loop_begin"; case DW_AT_MIPS_tail_loop_begin: return "DW_AT_MIPS_tail_loop_begin"; case DW_AT_MIPS_epilog_begin: return "DW_AT_MIPS_epilog_begin"; @@ -576,6 +589,14 @@ const char *llvm::dwarf::LanguageString(unsigned Language) { case DW_LANG_ObjC_plus_plus: return "DW_LANG_ObjC_plus_plus"; case DW_LANG_UPC: return "DW_LANG_UPC"; case DW_LANG_D: return "DW_LANG_D"; + case DW_LANG_Python: return "DW_LANG_Python"; + case DW_LANG_OpenCL: return "DW_LANG_OpenCL"; + case DW_LANG_Go: return "DW_LANG_Go"; + case DW_LANG_Modula3: return "DW_LANG_Modula3"; + case DW_LANG_Haskell: return "DW_LANG_Haskell"; + case DW_LANG_C_plus_plus_03: return "DW_LANG_C_plus_plus_03"; + case DW_LANG_C_plus_plus_11: return "DW_LANG_C_plus_plus_11"; + case DW_LANG_OCaml: return "DW_LANG_OCaml"; case DW_LANG_lo_user: return "DW_LANG_lo_user"; case DW_LANG_hi_user: return "DW_LANG_hi_user"; } diff --git a/external/bsd/llvm/dist/llvm/lib/Support/Host.cpp b/external/bsd/llvm/dist/llvm/lib/Support/Host.cpp index 6e9a5c9f592f..b6e2cb164d4d 100644 --- a/external/bsd/llvm/dist/llvm/lib/Support/Host.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Support/Host.cpp @@ -98,8 +98,9 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, /// GetX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return the /// 4 values in the specified arguments. If we can't run cpuid on the host, /// return true. -bool GetX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX, - unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { +static bool GetX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, + unsigned *rEAX, unsigned *rEBX, unsigned *rECX, + unsigned *rEDX) { #if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) #if defined(__GNUC__) // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. @@ -192,7 +193,7 @@ static void DetectX86FamilyModel(unsigned EAX, unsigned &Family, } } -std::string sys::getHostCPUName() { +StringRef sys::getHostCPUName() { unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; if (GetX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX)) return "generic"; @@ -448,7 +449,7 @@ std::string sys::getHostCPUName() { return "generic"; } #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) -std::string sys::getHostCPUName() { +StringRef sys::getHostCPUName() { host_basic_info_data_t hostInfo; mach_msg_type_number_t infoCount; @@ -477,7 +478,7 @@ std::string sys::getHostCPUName() { return "generic"; } #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__)) -std::string sys::getHostCPUName() { +StringRef sys::getHostCPUName() { // Access to the Processor Version Register (PVR) on PowerPC is privileged, // and so we must use an operating-system interface to determine the current // processor type. On Linux, this is exposed through the /proc/cpuinfo file. @@ -567,7 +568,7 @@ std::string sys::getHostCPUName() { .Default(generic); } #elif defined(__linux__) && defined(__arm__) -std::string sys::getHostCPUName() { +StringRef sys::getHostCPUName() { // The cpuid register on arm is not accessible from user space. On Linux, // it is exposed through the /proc/cpuinfo file. // Note: We cannot mmap /proc/cpuinfo here and then process the resulting @@ -619,10 +620,21 @@ std::string sys::getHostCPUName() { .Case("0xc24", "cortex-m4") .Default("generic"); + if (Implementer == "0x51") // Qualcomm Technologies, Inc. + // Look for the CPU part line. + for (unsigned I = 0, E = Lines.size(); I != E; ++I) + if (Lines[I].startswith("CPU part")) + // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The + // values correspond to the "Part number" in the CP15/c0 register. The + // contents are specified in the various processor manuals. + return StringSwitch(Lines[I].substr(8).ltrim("\t :")) + .Case("0x06f", "krait") // APQ8064 + .Default("generic"); + return "generic"; } #elif defined(__linux__) && defined(__s390x__) -std::string sys::getHostCPUName() { +StringRef sys::getHostCPUName() { // STIDP is a privileged operation, so use /proc/cpuinfo instead. // Note: We cannot mmap /proc/cpuinfo here and then process the resulting // memory buffer because the 'file' has 0 size (it can be read from only @@ -664,7 +676,7 @@ std::string sys::getHostCPUName() { return "generic"; } #else -std::string sys::getHostCPUName() { +StringRef sys::getHostCPUName() { return "generic"; } #endif diff --git a/external/bsd/llvm/dist/llvm/lib/Support/LineIterator.cpp b/external/bsd/llvm/dist/llvm/lib/Support/LineIterator.cpp new file mode 100644 index 000000000000..056d817526cb --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Support/LineIterator.cpp @@ -0,0 +1,68 @@ +//===- LineIterator.cpp - Implementation of line iteration ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; + +line_iterator::line_iterator(const MemoryBuffer &Buffer, char CommentMarker) + : Buffer(Buffer.getBufferSize() ? &Buffer : 0), + CommentMarker(CommentMarker), LineNumber(1), + CurrentLine(Buffer.getBufferSize() ? Buffer.getBufferStart() : 0, 0) { + // Ensure that if we are constructed on a non-empty memory buffer that it is + // a null terminated buffer. + if (Buffer.getBufferSize()) { + assert(Buffer.getBufferEnd()[0] == '\0'); + advance(); + } +} + +void line_iterator::advance() { + assert(Buffer && "Cannot advance past the end!"); + + const char *Pos = CurrentLine.end(); + assert(Pos == Buffer->getBufferStart() || *Pos == '\n' || *Pos == '\0'); + + if (CommentMarker == '\0') { + // If we're not stripping comments, this is simpler. + size_t Blanks = 0; + while (Pos[Blanks] == '\n') + ++Blanks; + Pos += Blanks; + LineNumber += Blanks; + } else { + // Skip comments and count line numbers, which is a bit more complex. + for (;;) { + if (*Pos == CommentMarker) + do { + ++Pos; + } while (*Pos != '\0' && *Pos != '\n'); + if (*Pos != '\n') + break; + ++Pos; + ++LineNumber; + } + } + + if (*Pos == '\0') { + // We've hit the end of the buffer, reset ourselves to the end state. + Buffer = 0; + CurrentLine = StringRef(); + return; + } + + // Measure the line. + size_t Length = 0; + do { + ++Length; + } while (Pos[Length] != '\0' && Pos[Length] != '\n'); + + CurrentLine = StringRef(Pos, Length); +} diff --git a/external/bsd/llvm/dist/llvm/lib/Support/MemoryBuffer.cpp b/external/bsd/llvm/dist/llvm/lib/Support/MemoryBuffer.cpp index dcd55299213b..33beba7792c8 100644 --- a/external/bsd/llvm/dist/llvm/lib/Support/MemoryBuffer.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Support/MemoryBuffer.cpp @@ -131,9 +131,10 @@ MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size, StringRef BufferName) { // Allocate space for the MemoryBuffer, the data and the name. It is important // that MemoryBuffer and data are aligned so PointerIntPair works with them. + // TODO: Is 16-byte alignment enough? We copy small object files with large + // alignment expectations into this buffer. size_t AlignedStringLen = - RoundUpToAlignment(sizeof(MemoryBufferMem) + BufferName.size() + 1, - sizeof(void*)); // TODO: Is sizeof(void*) enough? + RoundUpToAlignment(sizeof(MemoryBufferMem) + BufferName.size() + 1, 16); size_t RealLen = AlignedStringLen + Size + 1; char *Mem = static_cast(operator new(RealLen, std::nothrow)); if (!Mem) return 0; diff --git a/external/bsd/llvm/dist/llvm/lib/Support/Regex.cpp b/external/bsd/llvm/dist/llvm/lib/Support/Regex.cpp index 541364184073..1115534427cf 100644 --- a/external/bsd/llvm/dist/llvm/lib/Support/Regex.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Support/Regex.cpp @@ -33,8 +33,10 @@ Regex::Regex(StringRef regex, unsigned Flags) { } Regex::~Regex() { - llvm_regfree(preg); - delete preg; + if (preg) { + llvm_regfree(preg); + delete preg; + } } bool Regex::isValid(std::string &Error) { @@ -169,9 +171,23 @@ std::string Regex::sub(StringRef Repl, StringRef String, return Res; } +// These are the special characters matched in functions like "p_ere_exp". +static const char RegexMetachars[] = "()^$|*+?.[]\\{}"; + bool Regex::isLiteralERE(StringRef Str) { // Check for regex metacharacters. This list was derived from our regex // implementation in regcomp.c and double checked against the POSIX extended // regular expression specification. - return Str.find_first_of("()^$|*+?.[]\\{}") == StringRef::npos; + return Str.find_first_of(RegexMetachars) == StringRef::npos; +} + +std::string Regex::escape(StringRef String) { + std::string RegexStr; + for (unsigned i = 0, e = String.size(); i != e; ++i) { + if (strchr(RegexMetachars, String[i])) + RegexStr += '\\'; + RegexStr += String[i]; + } + + return RegexStr; } diff --git a/external/bsd/llvm/dist/llvm/lib/Support/SmallPtrSet.cpp b/external/bsd/llvm/dist/llvm/lib/Support/SmallPtrSet.cpp index dd417b453ef0..f873d91d6e0c 100644 --- a/external/bsd/llvm/dist/llvm/lib/Support/SmallPtrSet.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Support/SmallPtrSet.cpp @@ -186,9 +186,40 @@ SmallPtrSetImpl::SmallPtrSetImpl(const void **SmallStorage, NumTombstones = that.NumTombstones; } +#if LLVM_HAS_RVALUE_REFERENCES +SmallPtrSetImpl::SmallPtrSetImpl(const void **SmallStorage, unsigned SmallSize, + SmallPtrSetImpl &&that) { + SmallArray = SmallStorage; + + // Copy over the basic members. + CurArraySize = that.CurArraySize; + NumElements = that.NumElements; + NumTombstones = that.NumTombstones; + + // When small, just copy into our small buffer. + if (that.isSmall()) { + CurArray = SmallArray; + memcpy(CurArray, that.CurArray, sizeof(void *) * CurArraySize); + return; + } + + // Otherwise, we steal the large memory allocation and no copy is needed. + CurArray = that.CurArray; + that.CurArray = that.SmallArray; + + // Make the "that" object small and empty. + that.CurArraySize = SmallSize; + assert(that.CurArray == that.SmallArray); + that.NumElements = 0; + that.NumTombstones = 0; +} +#endif + /// CopyFrom - implement operator= from a smallptrset that has the same pointer /// type, but may have a different small size. void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) { + assert(&RHS != this && "Self-copy should be handled by the caller."); + if (isSmall() && RHS.isSmall()) assert(CurArraySize == RHS.CurArraySize && "Cannot assign sets with different small sizes"); @@ -222,6 +253,35 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) { NumTombstones = RHS.NumTombstones; } +#if LLVM_HAS_RVALUE_REFERENCES +void SmallPtrSetImpl::MoveFrom(unsigned SmallSize, SmallPtrSetImpl &&RHS) { + assert(&RHS != this && "Self-move should be handled by the caller."); + + if (!isSmall()) + free(CurArray); + + if (RHS.isSmall()) { + // Copy a small RHS rather than moving. + CurArray = SmallArray; + memcpy(CurArray, RHS.CurArray, sizeof(void*)*RHS.CurArraySize); + } else { + CurArray = RHS.CurArray; + RHS.CurArray = RHS.SmallArray; + } + + // Copy the rest of the trivial members. + CurArraySize = RHS.CurArraySize; + NumElements = RHS.NumElements; + NumTombstones = RHS.NumTombstones; + + // Make the RHS small and empty. + RHS.CurArraySize = SmallSize; + assert(RHS.CurArray == RHS.SmallArray); + RHS.NumElements = 0; + RHS.NumTombstones = 0; +} +#endif + void SmallPtrSetImpl::swap(SmallPtrSetImpl &RHS) { if (this == &RHS) return; diff --git a/external/bsd/llvm/dist/llvm/lib/Support/TargetRegistry.cpp b/external/bsd/llvm/dist/llvm/lib/Support/TargetRegistry.cpp index 0c90c17fefb1..8d91a53c2264 100644 --- a/external/bsd/llvm/dist/llvm/lib/Support/TargetRegistry.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Support/TargetRegistry.cpp @@ -71,42 +71,34 @@ const Target *TargetRegistry::lookupTarget(const std::string &TT, Error = "Unable to find target for this triple (no targets are registered)"; return 0; } - const Target *Best = 0, *EquallyBest = 0; - unsigned BestQuality = 0; + const Target *Matching = 0; + Triple::ArchType Arch = Triple(TT).getArch(); for (iterator it = begin(), ie = end(); it != ie; ++it) { - if (unsigned Qual = it->TripleMatchQualityFn(TT)) { - if (!Best || Qual > BestQuality) { - Best = &*it; - EquallyBest = 0; - BestQuality = Qual; - } else if (Qual == BestQuality) - EquallyBest = &*it; + if (it->ArchMatchFn(Arch)) { + if (Matching) { + Error = std::string("Cannot choose between targets \"") + + Matching->Name + "\" and \"" + it->Name + "\""; + return 0; + } + Matching = &*it; } } - if (!Best) { + if (!Matching) { Error = "No available targets are compatible with this triple, " "see -version for the available targets."; return 0; } - // Otherwise, take the best target, but make sure we don't have two equally - // good best targets. - if (EquallyBest) { - Error = std::string("Cannot choose between targets \"") + - Best->Name + "\" and \"" + EquallyBest->Name + "\""; - return 0; - } - - return Best; + return Matching; } void TargetRegistry::RegisterTarget(Target &T, const char *Name, const char *ShortDesc, - Target::TripleMatchQualityFnTy TQualityFn, + Target::ArchMatchFnTy ArchMatchFn, bool HasJIT) { - assert(Name && ShortDesc && TQualityFn && + assert(Name && ShortDesc && ArchMatchFn && "Missing required target information!"); // Check if this target has already been initialized, we allow this as a @@ -120,7 +112,7 @@ void TargetRegistry::RegisterTarget(Target &T, T.Name = Name; T.ShortDesc = ShortDesc; - T.TripleMatchQualityFn = TQualityFn; + T.ArchMatchFn = ArchMatchFn; T.HasJIT = HasJIT; } diff --git a/external/bsd/llvm/dist/llvm/lib/Support/ThreadLocal.cpp b/external/bsd/llvm/dist/llvm/lib/Support/ThreadLocal.cpp index 0587aaec7e68..38ab29b7ffef 100644 --- a/external/bsd/llvm/dist/llvm/lib/Support/ThreadLocal.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Support/ThreadLocal.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Config/config.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ThreadLocal.h" //===----------------------------------------------------------------------===// @@ -23,10 +24,10 @@ // Define all methods as no-ops if threading is explicitly disabled namespace llvm { using namespace sys; -ThreadLocalImpl::ThreadLocalImpl() { } +ThreadLocalImpl::ThreadLocalImpl() : data() { } ThreadLocalImpl::~ThreadLocalImpl() { } void ThreadLocalImpl::setInstance(const void* d) { - typedef int SIZE_TOO_BIG[sizeof(d) <= sizeof(data) ? 1 : -1]; + LLVM_STATIC_ASSERT(sizeof(d) <= sizeof(data), "size too big"); void **pd = reinterpret_cast(&data); *pd = const_cast(d); } @@ -50,7 +51,7 @@ namespace llvm { using namespace sys; ThreadLocalImpl::ThreadLocalImpl() : data() { - typedef int SIZE_TOO_BIG[sizeof(pthread_key_t) <= sizeof(data) ? 1 : -1]; + LLVM_STATIC_ASSERT(sizeof(pthread_key_t) <= sizeof(data), "size too big"); pthread_key_t* key = reinterpret_cast(&data); int errorcode = pthread_key_create(key, NULL); assert(errorcode == 0); diff --git a/external/bsd/llvm/dist/llvm/lib/Support/Triple.cpp b/external/bsd/llvm/dist/llvm/lib/Support/Triple.cpp index 6c978a0244bc..273316a0a6f9 100644 --- a/external/bsd/llvm/dist/llvm/lib/Support/Triple.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Support/Triple.cpp @@ -150,6 +150,7 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) { case GNUEABI: return "gnueabi"; case GNUX32: return "gnux32"; case EABI: return "eabi"; + case EABIHF: return "eabihf"; case MachO: return "macho"; case Android: return "android"; case ELF: return "elf"; @@ -297,6 +298,7 @@ static Triple::OSType parseOS(StringRef OSName) { static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) { return StringSwitch(EnvironmentName) + .StartsWith("eabihf", Triple::EABIHF) .StartsWith("eabi", Triple::EABI) .StartsWith("gnueabihf", Triple::GNUEABIHF) .StartsWith("gnueabi", Triple::GNUEABI) @@ -600,15 +602,15 @@ void Triple::getiOSVersion(unsigned &Major, unsigned &Minor, // the clang driver combines OS X and IOS support into a common Darwin // toolchain that wants to know the iOS version number even when targeting // OS X. - Major = 3; + Major = 5; Minor = 0; Micro = 0; break; case IOS: getOSVersion(Major, Minor, Micro); - // Default to 3.0. + // Default to 5.0. if (Major == 0) - Major = 3; + Major = 5; break; } } diff --git a/external/bsd/llvm/dist/llvm/lib/Support/Unix/Path.inc b/external/bsd/llvm/dist/llvm/lib/Support/Unix/Path.inc index c9dc87167146..d52604246169 100644 --- a/external/bsd/llvm/dist/llvm/lib/Support/Unix/Path.inc +++ b/external/bsd/llvm/dist/llvm/lib/Support/Unix/Path.inc @@ -184,15 +184,15 @@ namespace fs { defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) || \ defined(__linux__) || defined(__CYGWIN__) || defined(__DragonFly__) static int -test_dir(char buf[PATH_MAX], char ret[PATH_MAX], - const char *dir, const char *bin) -{ +test_dir(char ret[PATH_MAX], const char *dir, const char *bin) +{ struct stat sb; + char fullpath[PATH_MAX]; - snprintf(buf, PATH_MAX, "%s/%s", dir, bin); - if (realpath(buf, ret) == NULL) + snprintf(fullpath, PATH_MAX, "%s/%s", dir, bin); + if (realpath(fullpath, ret) == NULL) return (1); - if (stat(buf, &sb) != 0) + if (stat(fullpath, &sb) != 0) return (1); return (0); @@ -201,20 +201,21 @@ test_dir(char buf[PATH_MAX], char ret[PATH_MAX], static char * getprogpath(char ret[PATH_MAX], const char *bin) { - char *pv, *s, *t, buf[PATH_MAX]; + char *pv, *s, *t; /* First approach: absolute path. */ if (bin[0] == '/') { - if (test_dir(buf, ret, "/", bin) == 0) + if (test_dir(ret, "/", bin) == 0) return (ret); return (NULL); } /* Second approach: relative path. */ if (strchr(bin, '/') != NULL) { - if (getcwd(buf, PATH_MAX) == NULL) + char cwd[PATH_MAX]; + if (getcwd(cwd, PATH_MAX) == NULL) return (NULL); - if (test_dir(buf, ret, buf, bin) == 0) + if (test_dir(ret, cwd, bin) == 0) return (ret); return (NULL); } @@ -226,7 +227,7 @@ getprogpath(char ret[PATH_MAX], const char *bin) if (pv == NULL) return (NULL); while ((t = strsep(&s, ":")) != NULL) { - if (test_dir(buf, ret, t, bin) == 0) { + if (test_dir(ret, t, bin) == 0) { free(pv); return (ret); } @@ -530,17 +531,20 @@ error_code setLastModificationAndAccessTime(int FD, TimeValue Time) { Times[0].tv_nsec = 0; Times[1] = Times[0]; if (::futimens(FD, Times)) + return error_code(errno, system_category()); + return error_code::success(); #elif defined(HAVE_FUTIMES) timeval Times[2]; Times[0].tv_sec = Time.toPosixTime(); Times[0].tv_usec = 0; Times[1] = Times[0]; if (::futimes(FD, Times)) -#else -#error Missing futimes() and futimens() -#endif return error_code(errno, system_category()); return error_code::success(); +#else +#warning Missing futimes() and futimens() + return make_error_code(errc::not_supported); +#endif } error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { @@ -645,7 +649,7 @@ uint64_t mapped_file_region::size() const { char *mapped_file_region::data() const { assert(Mapping && "Mapping failed but used anyway!"); - assert(Mode != readonly && "Cannot get non const data for readonly mapping!"); + assert(Mode != readonly && "Cannot get non-const data for readonly mapping!"); return reinterpret_cast(Mapping); } diff --git a/external/bsd/llvm/dist/llvm/lib/Support/Unix/ThreadLocal.inc b/external/bsd/llvm/dist/llvm/lib/Support/Unix/ThreadLocal.inc index 2b4c9017cd91..f14d0fa3d522 100644 --- a/external/bsd/llvm/dist/llvm/lib/Support/Unix/ThreadLocal.inc +++ b/external/bsd/llvm/dist/llvm/lib/Support/Unix/ThreadLocal.inc @@ -18,7 +18,7 @@ namespace llvm { using namespace sys; -ThreadLocalImpl::ThreadLocalImpl() { } +ThreadLocalImpl::ThreadLocalImpl() : data() { } ThreadLocalImpl::~ThreadLocalImpl() { } void ThreadLocalImpl::setInstance(const void* d) { data = const_cast(d);} const void* ThreadLocalImpl::getInstance() { return data; } diff --git a/external/bsd/llvm/dist/llvm/lib/Support/Windows/Path.inc b/external/bsd/llvm/dist/llvm/lib/Support/Windows/Path.inc index 0b39198e6b35..b7926e408061 100644 --- a/external/bsd/llvm/dist/llvm/lib/Support/Windows/Path.inc +++ b/external/bsd/llvm/dist/llvm/lib/Support/Windows/Path.inc @@ -859,7 +859,7 @@ uint64_t mapped_file_region::size() const { } char *mapped_file_region::data() const { - assert(Mode != readonly && "Cannot get non const data for readonly mapping!"); + assert(Mode != readonly && "Cannot get non-const data for readonly mapping!"); assert(Mapping && "Mapping failed but used anyway!"); return reinterpret_cast(Mapping); } diff --git a/external/bsd/llvm/dist/llvm/lib/Support/YAMLTraits.cpp b/external/bsd/llvm/dist/llvm/lib/Support/YAMLTraits.cpp index 42bff96f73bc..c32cbda83a8c 100644 --- a/external/bsd/llvm/dist/llvm/lib/Support/YAMLTraits.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Support/YAMLTraits.cpp @@ -65,7 +65,7 @@ void Input::HNode::anchor() {} void Input::EmptyHNode::anchor() {} void Input::ScalarHNode::anchor() {} -bool Input::outputting() const { +bool Input::outputting() { return false; } @@ -406,7 +406,7 @@ Output::Output(raw_ostream &yout, void *context) Output::~Output() { } -bool Output::outputting() const { +bool Output::outputting() { return true; } @@ -689,6 +689,17 @@ StringRef ScalarTraits::input(StringRef Scalar, void *, Val = Scalar; return StringRef(); } + +void ScalarTraits::output(const std::string &Val, void *, + raw_ostream &Out) { + Out << Val; +} + +StringRef ScalarTraits::input(StringRef Scalar, void *, + std::string &Val) { + Val = Scalar.str(); + return StringRef(); +} void ScalarTraits::output(const uint8_t &Val, void *, raw_ostream &Out) { diff --git a/external/bsd/llvm/dist/llvm/lib/TableGen/TGParser.cpp b/external/bsd/llvm/dist/llvm/lib/TableGen/TGParser.cpp index daac5747d3e3..2a6b3f40492a 100644 --- a/external/bsd/llvm/dist/llvm/lib/TableGen/TGParser.cpp +++ b/external/bsd/llvm/dist/llvm/lib/TableGen/TGParser.cpp @@ -380,10 +380,11 @@ static bool isObjectStart(tgtok::TokKind K) { K == tgtok::MultiClass || K == tgtok::Foreach; } -static std::string GetNewAnonymousName() { - static unsigned AnonCounter = 0; +/// GetNewAnonymousName - Generate a unique anonymous name that can be used as +/// an identifier. +std::string TGParser::GetNewAnonymousName() { unsigned Tmp = AnonCounter++; // MSVC2012 ICEs without this. - return "anonymous." + utostr(Tmp); + return "anonymous_" + utostr(Tmp); } /// ParseObjectName - If an object name is specified, return it. Otherwise, @@ -1215,10 +1216,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, SMLoc EndLoc = Lex.getLoc(); // Create the new record, set it as CurRec temporarily. - static unsigned AnonCounter = 0; - Record *NewRec = new Record("anonymous.val."+utostr(AnonCounter++), - NameLoc, - Records, + Record *NewRec = new Record(GetNewAnonymousName(), NameLoc, Records, /*IsAnonymous=*/true); SubClassReference SCRef; SCRef.RefRange = SMRange(NameLoc, EndLoc); @@ -1227,8 +1225,36 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, // Add info about the subclass to NewRec. if (AddSubClass(NewRec, SCRef)) return 0; - NewRec->resolveReferences(); - Records.addDef(NewRec); + if (!CurMultiClass) { + NewRec->resolveReferences(); + Records.addDef(NewRec); + } else { + // Otherwise, we're inside a multiclass, add it to the multiclass. + CurMultiClass->DefPrototypes.push_back(NewRec); + + // Copy the template arguments for the multiclass into the def. + const std::vector &TArgs = + CurMultiClass->Rec.getTemplateArgs(); + + for (unsigned i = 0, e = TArgs.size(); i != e; ++i) { + const RecordVal *RV = CurMultiClass->Rec.getValue(TArgs[i]); + assert(RV && "Template arg doesn't exist?"); + NewRec->addValue(*RV); + } + + // We can't return the prototype def here, instead return: + // !cast(!strconcat(NAME, AnonName)). + const RecordVal *MCNameRV = CurMultiClass->Rec.getValue("NAME"); + assert(MCNameRV && "multiclass record must have a NAME"); + + return UnOpInit::get(UnOpInit::CAST, + BinOpInit::get(BinOpInit::STRCONCAT, + VarInit::get(MCNameRV->getName(), + MCNameRV->getType()), + NewRec->getNameInit(), + StringRecTy::get()), + Class->getDefInit()->getType()); + } // The result of the expression is a reference to the new record. return DefInit::get(NewRec); @@ -1964,7 +1990,18 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) { return true; } Records.addDef(CurRec); + + if (ParseObjectBody(CurRec)) + return true; } else if (CurMultiClass) { + // Parse the body before adding this prototype to the DefPrototypes vector. + // That way implicit definitions will be added to the DefPrototypes vector + // before this object, instantiated prior to defs derived from this object, + // and this available for indirect name resolution when defs derived from + // this object are instantiated. + if (ParseObjectBody(CurRec)) + return true; + // Otherwise, a def inside a multiclass, add it to the multiclass. for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size(); i != e; ++i) if (CurMultiClass->DefPrototypes[i]->getNameInit() @@ -1974,9 +2011,7 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) { return true; } CurMultiClass->DefPrototypes.push_back(CurRec); - } - - if (ParseObjectBody(CurRec)) + } else if (ParseObjectBody(CurRec)) return true; if (CurMultiClass == 0) // Def's in multiclasses aren't really defs. @@ -2271,7 +2306,7 @@ bool TGParser::ParseMultiClass() { Record *TGParser:: InstantiateMulticlassDef(MultiClass &MC, Record *DefProto, - Init *DefmPrefix, + Init *&DefmPrefix, SMRange DefmPrefixRange) { // We need to preserve DefProto so it can be reused for later // instantiations, so create a new Record to inherit from it. diff --git a/external/bsd/llvm/dist/llvm/lib/TableGen/TGParser.h b/external/bsd/llvm/dist/llvm/lib/TableGen/TGParser.h index 044e3a02ba4b..e5559a33e386 100644 --- a/external/bsd/llvm/dist/llvm/lib/TableGen/TGParser.h +++ b/external/bsd/llvm/dist/llvm/lib/TableGen/TGParser.h @@ -69,6 +69,8 @@ class TGParser { // Record tracker RecordKeeper &Records; + unsigned AnonCounter; + // A "named boolean" indicating how to parse identifiers. Usually // identifiers map to some existing object but in special cases // (e.g. parsing def names) no such object exists yet because we are @@ -82,8 +84,8 @@ class TGParser { }; public: - TGParser(SourceMgr &SrcMgr, RecordKeeper &records) : - Lex(SrcMgr), CurMultiClass(0), Records(records) {} + TGParser(SourceMgr &SrcMgr, RecordKeeper &records) + : Lex(SrcMgr), CurMultiClass(0), Records(records), AnonCounter(0) {} /// ParseFile - Main entrypoint for parsing a tblgen file. These parser /// routines return true on error, or false on success. @@ -112,6 +114,8 @@ private: // Semantic analysis methods. bool AddSubMultiClass(MultiClass *CurMC, SubMultiClassReference &SubMultiClass); + std::string GetNewAnonymousName(); + // IterRecord: Map an iterator name to a value. struct IterRecord { VarInit *IterVar; @@ -133,7 +137,7 @@ private: // Parser methods. bool ParseMultiClass(); Record *InstantiateMulticlassDef(MultiClass &MC, Record *DefProto, - Init *DefmPrefix, + Init *&DefmPrefix, SMRange DefmPrefixRange); bool ResolveMulticlassDefArgs(MultiClass &MC, Record *DefProto, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64.td b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64.td index 9c2c69a65935..6139d147a617 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64.td @@ -54,20 +54,10 @@ include "AArch64InstrInfo.td" def AArch64InstrInfo : InstrInfo; -//===----------------------------------------------------------------------===// -// Assembly printer -//===----------------------------------------------------------------------===// - -def A64InstPrinter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - bit isMCAsmWriter = 1; -} - //===----------------------------------------------------------------------===// // Declare the target which we are implementing //===----------------------------------------------------------------------===// def AArch64 : Target { let InstructionSet = AArch64InstrInfo; - let AssemblyWriters = [A64InstPrinter]; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index d59ca56ba998..fbf38cdc1cb8 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -114,9 +114,6 @@ bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO, case MachineOperand::MO_BlockAddress: Name = GetBlockAddressSymbol(MO.getBlockAddress())->getName(); break; - case MachineOperand::MO_ExternalSymbol: - Name = MO.getSymbolName(); - break; case MachineOperand::MO_ConstantPoolIndex: Name = GetCPISymbol(MO.getIndex())->getName(); break; @@ -238,7 +235,6 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, case MachineOperand::MO_BlockAddress: case MachineOperand::MO_ConstantPoolIndex: case MachineOperand::MO_GlobalAddress: - case MachineOperand::MO_ExternalSymbol: return printSymbolicAddress(MO, false, "", O); } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64CallingConv.td b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64CallingConv.td index a2a9f3f67455..9fe6aae2e32f 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64CallingConv.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64CallingConv.td @@ -60,7 +60,7 @@ def CC_A64_APCS : CallingConv<[ // registers. This makes sense because the PCS does not distinguish Short // Vectors and Floating-point types. CCIfType<[v1i16, v2i8], CCBitConvertToType>, - CCIfType<[v1i32, v4i8, v2i16, v1f32], CCBitConvertToType>, + CCIfType<[v1i32, v4i8, v2i16], CCBitConvertToType>, CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64, v1f64], CCBitConvertToType>, CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCBitConvertToType>, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index ef99541c1700..dac4b32cfecd 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -1113,15 +1113,6 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy, TFI, CurDAG->getTargetConstant(0, PtrTy)); } - case ISD::ConstantPool: { - // Constant pools are fine, just create a Target entry. - ConstantPoolSDNode *CN = cast(Node); - const Constant *C = CN->getConstVal(); - SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0)); - - ReplaceUses(SDValue(Node, 0), CP); - return NULL; - } case ISD::Constant: { SDNode *ResNode = 0; if (cast(Node)->getZExtValue() == 0) { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 6ea4b483eb4c..882b5280fa14 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -64,9 +64,8 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) addRegisterClass(MVT::v1i16, &AArch64::FPR16RegClass); addRegisterClass(MVT::v1i32, &AArch64::FPR32RegClass); addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v1f32, &AArch64::FPR32RegClass); addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass); addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass); addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass); addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass); @@ -141,6 +140,7 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::VAARG, MVT::Other, Expand); setOperationAction(ISD::BlockAddress, MVT::i64, Custom); + setOperationAction(ISD::ConstantPool, MVT::i64, Custom); setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i64, Expand); @@ -296,7 +296,6 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v1f32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v1f64, Custom); @@ -333,7 +332,6 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::SETCC, MVT::v4i32, Custom); setOperationAction(ISD::SETCC, MVT::v1i64, Custom); setOperationAction(ISD::SETCC, MVT::v2i64, Custom); - setOperationAction(ISD::SETCC, MVT::v1f32, Custom); setOperationAction(ISD::SETCC, MVT::v2f32, Custom); setOperationAction(ISD::SETCC, MVT::v4f32, Custom); setOperationAction(ISD::SETCC, MVT::v1f64, Custom); @@ -341,27 +339,64 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal); setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::v1f64, Legal); setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::FCEIL, MVT::v2f32, Legal); setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::FCEIL, MVT::v1f64, Legal); setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal); setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::v1f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); setOperationAction(ISD::FRINT, MVT::v2f32, Legal); setOperationAction(ISD::FRINT, MVT::v4f32, Legal); + setOperationAction(ISD::FRINT, MVT::v1f64, Legal); setOperationAction(ISD::FRINT, MVT::v2f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); setOperationAction(ISD::FROUND, MVT::v2f32, Legal); setOperationAction(ISD::FROUND, MVT::v4f32, Legal); + setOperationAction(ISD::FROUND, MVT::v1f64, Legal); setOperationAction(ISD::FROUND, MVT::v2f64, Legal); + + // Vector ExtLoad and TruncStore are expanded. + for (unsigned I = MVT::FIRST_VECTOR_VALUETYPE; + I <= MVT::LAST_VECTOR_VALUETYPE; ++I) { + MVT VT = (MVT::SimpleValueType) I; + setLoadExtAction(ISD::SEXTLOAD, VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, Expand); + for (unsigned II = MVT::FIRST_VECTOR_VALUETYPE; + II <= MVT::LAST_VECTOR_VALUETYPE; ++II) { + MVT VT1 = (MVT::SimpleValueType) II; + // A TruncStore has two vector types of the same number of elements + // and different element sizes. + if (VT.getVectorNumElements() == VT1.getVectorNumElements() && + VT.getVectorElementType().getSizeInBits() + > VT1.getVectorElementType().getSizeInBits()) + setTruncStoreAction(VT, VT1, Expand); + } + } + + // There is no v1i64/v2i64 multiply, expand v1i64/v2i64 to GPR i64 multiply. + // FIXME: For a v2i64 multiply, we copy VPR to GPR and do 2 i64 multiplies, + // and then copy back to VPR. This solution may be optimized by Following 3 + // NEON instructions: + // pmull v2.1q, v0.1d, v1.1d + // pmull2 v3.1q, v0.2d, v1.2d + // ins v2.d[1], v3.d[0] + // As currently we can't verify the correctness of such assumption, we can + // do such optimization in the future. + setOperationAction(ISD::MUL, MVT::v1i64, Expand); + setOperationAction(ISD::MUL, MVT::v2i64, Expand); } } @@ -403,6 +438,29 @@ static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, StrOpc = StoreOps[Log2_32(Size)]; } +// FIXME: AArch64::DTripleRegClass and AArch64::QTripleRegClass don't really +// have value type mapped, and they are both being defined as MVT::untyped. +// Without knowing the MVT type, MachineLICM::getRegisterClassIDAndCost +// would fail to figure out the register pressure correctly. +std::pair +AArch64TargetLowering::findRepresentativeClass(MVT VT) const{ + const TargetRegisterClass *RRC = 0; + uint8_t Cost = 1; + switch (VT.SimpleTy) { + default: + return TargetLowering::findRepresentativeClass(VT); + case MVT::v4i64: + RRC = &AArch64::QPairRegClass; + Cost = 2; + break; + case MVT::v8i64: + RRC = &AArch64::QQuadRegClass; + Cost = 4; + break; + } + return std::make_pair(RRC, Cost); +} + MachineBasicBlock * AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, @@ -893,8 +951,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge"; case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall"; - case AArch64ISD::NEON_BSL: - return "AArch64ISD::NEON_BSL"; case AArch64ISD::NEON_MOVIMM: return "AArch64ISD::NEON_MOVIMM"; case AArch64ISD::NEON_MVNIMM: @@ -1290,6 +1346,12 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, &RetOps[0], RetOps.size()); } +unsigned AArch64TargetLowering::getByValTypeAlignment(Type *Ty) const { + // This is a new backend. For anything more precise than this a FE should + // set an explicit alignment. + return 4; +} + SDValue AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { @@ -2225,6 +2287,36 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, } } +SDValue +AArch64TargetLowering::LowerConstantPool(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT PtrVT = getPointerTy(); + ConstantPoolSDNode *CN = cast(Op); + const Constant *C = CN->getConstVal(); + + switch(getTargetMachine().getCodeModel()) { + case CodeModel::Small: + // The most efficient code is PC-relative anyway for the small memory model, + // so we don't need to worry about relocation model. + return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + DAG.getTargetConstantPool(C, PtrVT, 0, 0, + AArch64II::MO_NO_FLAG), + DAG.getTargetConstantPool(C, PtrVT, 0, 0, + AArch64II::MO_LO12), + DAG.getConstant(CN->getAlignment(), MVT::i32)); + case CodeModel::Large: + return DAG.getNode( + AArch64ISD::WrapperLarge, DL, PtrVT, + DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G3), + DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC), + DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC), + DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC)); + default: + llvm_unreachable("Only small and large code models supported now"); + } +} + SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL, @@ -2855,6 +2947,7 @@ AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); @@ -3386,12 +3479,9 @@ static SDValue PerformORCombine(SDNode *N, if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, HasAnyUndefs) && !HasAnyUndefs && SplatBits0 == ~SplatBits1) { - // Canonicalize the vector type to make instruction selection simpler. - EVT CanonicalVT = VT.is128BitVector() ? MVT::v16i8 : MVT::v8i8; - SDValue Result = DAG.getNode(AArch64ISD::NEON_BSL, DL, CanonicalVT, - N0->getOperand(1), N0->getOperand(0), - N1->getOperand(0)); - return DAG.getNode(ISD::BITCAST, DL, VT, Result); + + return DAG.getNode(ISD::VSELECT, DL, VT, N0->getOperand(1), + N0->getOperand(0), N1->getOperand(0)); } } } @@ -3885,7 +3975,10 @@ bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, if (V1.getNode() && NumElts == V0NumElts && V0NumElts == V1.getValueType().getVectorNumElements()) { SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask); - Res = LowerVECTOR_SHUFFLE(Shuffle, DAG); + if(Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) + Res = Shuffle; + else + Res = LowerVECTOR_SHUFFLE(Shuffle, DAG); return true; } else return false; @@ -3998,14 +4091,12 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (ValueCounts.size() == 0) return DAG.getUNDEF(VT); - // Loads are better lowered with insert_vector_elt. - // Keep going if we are hitting this case. - if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode())) + if (isOnlyLowElement) return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); unsigned EltSize = VT.getVectorElementType().getSizeInBits(); - // Use VDUP for non-constant splats. if (hasDominantValue && EltSize <= 64) { + // Use VDUP for non-constant splats. if (!isConstant) { SDValue N; @@ -4013,8 +4104,12 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // just use DUPLANE. We can only do this if the lane being extracted // is at a constant index, as the DUP from lane instructions only have // constant-index forms. + // FIXME: for now we have v1i8, v1i16, v1i32 legal vector types, if they + // are not legal any more, no need to check the type size in bits should + // be large than 64. if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && - isa(Value->getOperand(1))) { + isa(Value->getOperand(1)) && + Value->getOperand(0).getValueType().getSizeInBits() >= 64) { N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, Value->getOperand(0), Value->getOperand(1)); } else diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 4cc213514358..f9ed6310d7e8 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -113,9 +113,6 @@ namespace AArch64ISD { // get selected. WrapperSmall, - // Vector bitwise select - NEON_BSL, - // Vector move immediate NEON_MOVIMM, @@ -224,6 +221,8 @@ public: const SmallVectorImpl &OutVals, SDLoc dl, SelectionDAG &DAG) const; + virtual unsigned getByValTypeAlignment(Type *Ty) const LLVM_OVERRIDE; + SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; @@ -309,6 +308,8 @@ public: SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; @@ -343,6 +344,10 @@ public: virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const LLVM_OVERRIDE; +protected: + std::pair + findRepresentativeClass(MVT VT) const; + private: const InstrItineraryData *Itins; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 180110a84dd6..1e19eb0c7417 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -418,10 +418,8 @@ AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, default: llvm_unreachable("Unknown size for regclass"); } - } else { - assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) || - RC->hasType(MVT::f128)) - && "Expected integer or floating type for store"); + } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) || + RC->hasType(MVT::f128)) { switch (RC->getSize()) { case 4: StoreOp = AArch64::LSFP32_STR; break; case 8: StoreOp = AArch64::LSFP64_STR; break; @@ -429,6 +427,22 @@ AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, default: llvm_unreachable("Unknown size for regclass"); } + } else { // The spill of D tuples is implemented by Q tuples + if (RC == &AArch64::QPairRegClass) + StoreOp = AArch64::ST1x2_16B; + else if (RC == &AArch64::QTripleRegClass) + StoreOp = AArch64::ST1x3_16B; + else if (RC == &AArch64::QQuadRegClass) + StoreOp = AArch64::ST1x4_16B; + else + llvm_unreachable("Unknown reg class"); + + MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp)); + // Vector store has different operands from other store instructions. + NewMI.addFrameIndex(FrameIdx) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO); + return; } MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp)); @@ -464,10 +478,8 @@ AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, default: llvm_unreachable("Unknown size for regclass"); } - } else { - assert((RC->hasType(MVT::f32) || RC->hasType(MVT::f64) - || RC->hasType(MVT::f128)) - && "Expected integer or floating type for store"); + } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) || + RC->hasType(MVT::f128)) { switch (RC->getSize()) { case 4: LoadOp = AArch64::LSFP32_LDR; break; case 8: LoadOp = AArch64::LSFP64_LDR; break; @@ -475,6 +487,21 @@ AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, default: llvm_unreachable("Unknown size for regclass"); } + } else { // The spill of D tuples is implemented by Q tuples + if (RC == &AArch64::QPairRegClass) + LoadOp = AArch64::LD1x2_16B; + else if (RC == &AArch64::QTripleRegClass) + LoadOp = AArch64::LD1x3_16B; + else if (RC == &AArch64::QQuadRegClass) + LoadOp = AArch64::LD1x4_16B; + else + llvm_unreachable("Unknown reg class"); + + MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg); + // Vector load has different operands from other load instructions. + NewMI.addFrameIndex(FrameIdx) + .addMemOperand(MMO); + return; } MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg); @@ -572,6 +599,21 @@ void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI, MinOffset = -0x40 * AccessScale; MaxOffset = 0x3f * AccessScale; return; + case AArch64::LD1x2_16B: case AArch64::ST1x2_16B: + AccessScale = 32; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LD1x3_16B: case AArch64::ST1x3_16B: + AccessScale = 48; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; + case AArch64::LD1x4_16B: case AArch64::ST1x4_16B: + AccessScale = 64; + MinOffset = 0; + MaxOffset = 0xfff * AccessScale; + return; } } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 23d81fc478e8..c961fb2c5d2b 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -26,6 +26,15 @@ def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" " FPOpFusion::Fast)">; include "AArch64InstrFormats.td" +//===----------------------------------------------------------------------===// +// AArch64 specific pattern fragments. +// +// An 'fmul' node with a single use. +def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs),[{ + return N->hasOneUse(); +}]>; + + //===----------------------------------------------------------------------===// // Target-specific ISD nodes and profiles //===----------------------------------------------------------------------===// @@ -2172,9 +2181,9 @@ defm FSUB : A64I_fpdp2sizes<0b0011, "fsub", fsub>; def fmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), (fma (fneg node:$Rn), node:$Rm, node:$Ra)>; -def fnmadd : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), - (fma node:$Rn, node:$Rm, (fneg node:$Ra))>; def fnmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), + (fma node:$Rn, node:$Rm, (fneg node:$Ra))>; +def fnmadd : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), (fma (fneg node:$Rn), node:$Rm, (fneg node:$Ra))>; class A64I_fpdp3Impl; // Extra patterns for when we're allowed to optimise separate multiplication and // addition. let Predicates = [HasFPARMv8, UseFusedMAC] in { -def : Pat<(f32 (fadd FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))), +def : Pat<(f32 (fadd FPR32:$Ra, (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))), (FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(f32 (fsub FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))), +def : Pat<(f32 (fsub FPR32:$Ra, (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))), (FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(f32 (fsub (f32 (fmul FPR32:$Rn, FPR32:$Rm)), FPR32:$Ra)), +def : Pat<(f32 (fsub (f32 (fneg FPR32:$Ra)), (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))), (FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(f32 (fsub (f32 (fneg FPR32:$Ra)), (f32 (fmul FPR32:$Rn, FPR32:$Rm)))), +def : Pat<(f32 (fsub (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)), FPR32:$Ra)), (FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(f64 (fadd FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))), +def : Pat<(f64 (fadd FPR64:$Ra, (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))), (FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(f64 (fsub FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))), +def : Pat<(f64 (fsub FPR64:$Ra, (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))), (FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(f64 (fsub (f64 (fmul FPR64:$Rn, FPR64:$Rm)), FPR64:$Ra)), +def : Pat<(f64 (fsub (f64 (fneg FPR64:$Ra)), (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))), (FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(f64 (fsub (f64 (fneg FPR64:$Ra)), (f64 (fmul FPR64:$Rn, FPR64:$Rm)))), +def : Pat<(f64 (fsub (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)), FPR64:$Ra)), (FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; } @@ -4539,6 +4548,7 @@ def : ADRP_ADD; def : ADRP_ADD; def : ADRP_ADD; def : ADRP_ADD; +def : ADRP_ADD; //===----------------------------------------------------------------------===// // GOT access patterns diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64InstrNEON.td b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64InstrNEON.td index 581ebae2481e..43dfb197054d 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64InstrNEON.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64InstrNEON.td @@ -14,9 +14,6 @@ //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. //===----------------------------------------------------------------------===// -def Neon_bsl : SDNode<"AArch64ISD::NEON_BSL", SDTypeProfile<1, 3, - [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>]>>; // (outs Result), (ins Imm, OpCmode) def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>; @@ -67,6 +64,55 @@ def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>; +def SDT_assertext : SDTypeProfile<1, 1, + [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 0>]>; +def assertsext : SDNode<"ISD::AssertSext", SDT_assertext>; +def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>; + +//===----------------------------------------------------------------------===// +// Addressing-mode instantiations +//===----------------------------------------------------------------------===// + +multiclass ls_64_pats { +defm : ls_neutral_pats; +} + +multiclass ls_128_pats { +defm : ls_neutral_pats; +} + +multiclass uimm12_neon_pats { + defm : ls_64_pats; + defm : ls_64_pats; + defm : ls_64_pats; + defm : ls_64_pats; + defm : ls_64_pats; + defm : ls_64_pats; + + defm : ls_128_pats; + defm : ls_128_pats; + defm : ls_128_pats; + defm : ls_128_pats; + defm : ls_128_pats; + defm : ls_128_pats; +} + +defm : uimm12_neon_pats<(A64WrapperSmall + tconstpool:$Hi, tconstpool:$Lo12, ALIGN), + (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>; + //===----------------------------------------------------------------------===// // Multiclasses //===----------------------------------------------------------------------===// @@ -227,7 +273,7 @@ defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul", // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and // two operands constraints. class NeonI_3VSame_Constraint_impl size, + RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size, bits<5> opcode, SDPatternOperator opnode> : NeonI_3VSame; + (fadd node:$Ra, (fmul_su node:$Rn, node:$Rm))>; def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (fsub node:$Ra, (fmul node:$Rn, node:$Rm))>; + (fsub node:$Ra, (fmul_su node:$Rn, node:$Rm))>; let Predicates = [HasNEON, UseFusedMAC] in { def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32, @@ -402,26 +448,30 @@ defm : Neon_bitwise2V_patterns; // Vector Bitwise Select def BSLvvv_8B : NeonI_3VSame_Constraint_impl<"bsl", ".8b", VPR64, v8i8, - 0b0, 0b1, 0b01, 0b00011, Neon_bsl>; + 0b0, 0b1, 0b01, 0b00011, vselect>; def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8, - 0b1, 0b1, 0b01, 0b00011, Neon_bsl>; + 0b1, 0b1, 0b01, 0b00011, vselect>; multiclass Neon_bitwise3V_patterns { // Disassociate type from instruction definition - def : Pat<(v2i32 (opnode VPR64:$src,VPR64:$Rn, VPR64:$Rm)), + def : Pat<(v8i8 (opnode (v8i8 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v4i16 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)), + def : Pat<(v2i32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v1i64 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)), + def : Pat<(v4i16 (opnode (v4i16 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v4i32 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)), + def : Pat<(v1i64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), + (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; + def : Pat<(v16i8 (opnode (v16i8 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v8i16 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)), + def : Pat<(v4i32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v2i64 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)), + def : Pat<(v8i16 (opnode (v8i16 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), + (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; + def : Pat<(v2i64 (opnode (v2i64 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; // Allow to match BSL instruction pattern with non-constant operand @@ -490,10 +540,10 @@ multiclass Neon_bitwise3V_patterns; +defm: Neon_bitwise3V_patterns; def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm), - (Neon_bsl node:$src, node:$Rn, node:$Rm), + (vselect node:$src, node:$Rn, node:$Rm), [{ (void)N; return false; }]>; // Vector Bitwise Insert if True @@ -771,21 +821,21 @@ multiclass NeonI_fpcmpz_sizes opcode, (outs VPR64:$Rd), (ins VPR64:$Rn, fpz32:$FPImm), asmop # "\t$Rd.2s, $Rn.2s, $FPImm", [(set (v2i32 VPR64:$Rd), - (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpimm:$FPImm), CC)))], + (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpz32:$FPImm), CC)))], NoItinerary>; def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm), asmop # "\t$Rd.4s, $Rn.4s, $FPImm", [(set (v4i32 VPR128:$Rd), - (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))], + (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpz32:$FPImm), CC)))], NoItinerary>; def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm), asmop # "\t$Rd.2d, $Rn.2d, $FPImm", [(set (v2i64 VPR128:$Rd), - (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))], + (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpz32:$FPImm), CC)))], NoItinerary>; } @@ -973,6 +1023,20 @@ defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx", int_aarch64_neon_vmulx, v2f32, v4f32, v2f64, 1>; +// Patterns to match llvm.aarch64.* intrinsic for +// ADDP, SMINP, UMINP, SMAXP, UMAXP having i32 as output +class Neon_VectorPair_v2i32_pattern + : Pat<(v1i32 (opnode (v2i32 VPR64:$Rn))), + (EXTRACT_SUBREG + (v2i32 (INST (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rn))), + sub_32)>; + +def : Neon_VectorPair_v2i32_pattern; +def : Neon_VectorPair_v2i32_pattern; +def : Neon_VectorPair_v2i32_pattern; +def : Neon_VectorPair_v2i32_pattern; +def : Neon_VectorPair_v2i32_pattern; + // Vector Immediate Instructions multiclass neon_mov_imm_shift_asmoperands @@ -1418,9 +1482,8 @@ let isReMaterializable = 1 in { def MOVIdi : NeonI_1VModImm<0b0, 0b1, (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm), "movi\t $Rd, $Imm", - [(set (f64 FPR64:$Rd), - (f64 (bitconvert - (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))))], + [(set (v1i64 FPR64:$Rd), + (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))], NoItinerary> { let cmode = 0b1110; } @@ -1445,7 +1508,7 @@ def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>; def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>; } -// Vector Shift (Immediate) +// Vector Shift (Immediate) // Immediate in [0, 63] def imm0_63 : Operand { let ParserMatchClass = uimm6_asmoperand; @@ -1473,7 +1536,7 @@ class shr_imm_asmoperands : AsmOperandClass { class shr_imm : Operand { let EncoderMethod = "getShiftRightImm" # OFFSET; let DecoderMethod = "DecodeShiftRightImm" # OFFSET; - let ParserMatchClass = + let ParserMatchClass = !cast("shr_imm" # OFFSET # "_asmoperand"); } @@ -1496,7 +1559,7 @@ class shl_imm_asmoperands : AsmOperandClass { class shl_imm : Operand { let EncoderMethod = "getShiftLeftImm" # OFFSET; let DecoderMethod = "DecodeShiftLeftImm" # OFFSET; - let ParserMatchClass = + let ParserMatchClass = !cast("shl_imm" # OFFSET # "_asmoperand"); } @@ -1712,6 +1775,23 @@ multiclass NeonI_N2VShLL opcode, string asmop, defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>; defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>; +// Signed integer lengthen (vector) is alias for SSHLL Vd, Vn, #0 +// Signed integer lengthen (vector, second part) is alias for SSHLL2 Vd, Vn, #0 +// FIXME: This is actually the preferred syntax but TableGen can't deal with +// custom printing of aliases. +class NeonI_sxtl_alias + : NeonInstAlias; + +def SXTLvv_8B : NeonI_sxtl_alias<"sxtl", ".8h", ".8b", SSHLLvvi_8B, VPR128, VPR64>; +def SXTLvv_4H : NeonI_sxtl_alias<"sxtl", ".4s", ".4h", SSHLLvvi_4H, VPR128, VPR64>; +def SXTLvv_2S : NeonI_sxtl_alias<"sxtl", ".2d", ".2s", SSHLLvvi_2S, VPR128, VPR64>; +def SXTL2vv_16B : NeonI_sxtl_alias<"sxtl2", ".8h", ".16b", SSHLLvvi_16B, VPR128, VPR128>; +def SXTL2vv_8H : NeonI_sxtl_alias<"sxtl2", ".4s", ".8h", SSHLLvvi_8H, VPR128, VPR128>; +def SXTL2vv_4S : NeonI_sxtl_alias<"sxtl2", ".2d", ".4s", SSHLLvvi_4S, VPR128, VPR128>; + // Rounding/Saturating shift class N2VShift_RQ opcode, string asmop, string T, RegisterOperand VPRC, ValueType Ty, Operand ImmTy, @@ -2366,8 +2446,8 @@ multiclass NeonI_2VAcross_3 opcode, bits<2> size, def _1s4s: NeonI_2VAcross<0b1, u, size, opcode, (outs FPR32:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd, $Rn.4s", - [(set (v1f32 FPR32:$Rd), - (v1f32 (opnode (v4f32 VPR128:$Rn))))], + [(set (f32 FPR32:$Rd), + (f32 (opnode (v4f32 VPR128:$Rn))))], NoItinerary>; } @@ -2779,16 +2859,16 @@ defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>; // For pattern that need two operators being chained. class NeonI_3VDL_Aba size, bits<4> opcode, - string asmop, string ResS, string OpS, + string asmop, string ResS, string OpS, SDPatternOperator opnode, SDPatternOperator subop, RegisterOperand OpVPR, ValueType ResTy, ValueType OpTy, ValueType OpSTy> : NeonI_3VDiff { @@ -2813,13 +2893,13 @@ defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal", multiclass NeonI_3VDL2_Aba_v1 opcode, string asmop, SDPatternOperator opnode, string subop> { def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b", - opnode, !cast(subop # "_16B"), + opnode, !cast(subop # "_16B"), VPR128, v8i16, v16i8, v8i8>; def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h", - opnode, !cast(subop # "_8H"), + opnode, !cast(subop # "_8H"), VPR128, v4i32, v8i16, v4i16>; def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s", - opnode, !cast(subop # "_4S"), + opnode, !cast(subop # "_4S"), VPR128, v2i64, v4i32, v2i32>; } @@ -2939,13 +3019,13 @@ class NeonI_3VDL2_3Op_mlas size, bits<4> opcode, let Constraints = "$src = $Rd"; } -multiclass NeonI_3VDL2_3Op_mlas_v1 opcode, string asmop, +multiclass NeonI_3VDL2_3Op_mlas_v1 opcode, string asmop, SDPatternOperator subop, string opnode> { def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b", subop, !cast(opnode # "_16B"), VPR128, v8i16, v16i8>; def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h", - subop, !cast(opnode # "_8H"), + subop, !cast(opnode # "_8H"), VPR128, v4i32, v8i16>; def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s", subop, !cast(opnode # "_4S"), @@ -2990,7 +3070,7 @@ multiclass NeonI_3VDL_v2 opcode, string asmop, defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull", int_arm_neon_vqdmull, 1>; -multiclass NeonI_3VDL2_2Op_mull_v2 opcode, string asmop, +multiclass NeonI_3VDL2_2Op_mull_v2 opcode, string asmop, string opnode, bit Commutable = 0> { let isCommutable = Commutable in { def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h", @@ -3002,10 +3082,10 @@ multiclass NeonI_3VDL2_2Op_mull_v2 opcode, string asmop, } } -defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2", +defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2", "NI_qdmull_hi", 1>; -multiclass NeonI_3VDL2_3Op_qdmlal_v2 opcode, string asmop, +multiclass NeonI_3VDL2_3Op_qdmlal_v2 opcode, string asmop, SDPatternOperator opnode> { def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h", opnode, NI_qdmull_hi_8H, @@ -3021,31 +3101,38 @@ defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2", int_arm_neon_vqsubs>; multiclass NeonI_3VDL_v3 opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { + SDPatternOperator opnode_8h8b, + SDPatternOperator opnode_1q1d, bit Commutable = 0> { let isCommutable = Commutable in { def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, VPR128, VPR64, v8i16, v8i8>; - - def _1q1d : NeonI_3VDiff<0b0, u, 0b11, opcode, - (outs VPR128:$Rd), (ins VPR64:$Rn, VPR64:$Rm), - asmop # "\t$Rd.1q, $Rn.1d, $Rm.1d", - [], NoItinerary>; + opnode_8h8b, VPR128, VPR64, v8i16, v8i8>; + + def _1q1d : NeonI_3VD_2Op<0b0, u, 0b11, opcode, asmop, "1q", "1d", + opnode_1q1d, VPR128, VPR64, v16i8, v1i64>; } } -defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, 1>; +defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, + int_aarch64_neon_vmull_p64, 1>; -multiclass NeonI_3VDL2_2Op_mull_v3 opcode, string asmop, +multiclass NeonI_3VDL2_2Op_mull_v3 opcode, string asmop, string opnode, bit Commutable = 0> { let isCommutable = Commutable in { def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b", !cast(opnode # "_16B"), v8i16, v16i8>; - - def _1q2d : NeonI_3VDiff<0b1, u, 0b11, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.1q, $Rn.2d, $Rm.2d", - [], NoItinerary>; + + def _1q2d : + NeonI_3VDiff<0b1, u, 0b11, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), + asmop # "\t$Rd.1q, $Rn.2d, $Rm.2d", + [(set (v16i8 VPR128:$Rd), + (v16i8 (int_aarch64_neon_vmull_p64 + (v1i64 (scalar_to_vector + (i64 (vector_extract (v2i64 VPR128:$Rn), 1)))), + (v1i64 (scalar_to_vector + (i64 (vector_extract (v2i64 VPR128:$Rm), 1)))))))], + NoItinerary>; } } @@ -3062,7 +3149,7 @@ defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi", // The structure consists of a sequence of sets of N values. // The first element of the structure is placed in the first lane // of the first first vector, the second element in the first lane -// of the second vector, and so on. +// of the second vector, and so on. // E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into // the three 64-bit vectors list {BA, DC, FE}. // E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three @@ -3127,9 +3214,9 @@ def LD1x4_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">; class NeonI_STVList opcode, bits<2> size, RegisterOperand VecList, string asmop> : NeonI_LdStMult { let mayStore = 1; let neverHasSideEffects = 1; @@ -3226,6 +3313,21 @@ def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr), def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr), (ST1_8B GPR64xsp:$addr, VPR64:$value)>; +// Match load/store of v1i8/v1i16/v1i32 type to FPR8/FPR16/FPR32 load/store. +// FIXME: for now we have v1i8, v1i16, v1i32 legal types, if they are illegal, +// these patterns are not needed any more. +def : Pat<(v1i8 (load GPR64xsp:$addr)), (LSFP8_LDR $addr, 0)>; +def : Pat<(v1i16 (load GPR64xsp:$addr)), (LSFP16_LDR $addr, 0)>; +def : Pat<(v1i32 (load GPR64xsp:$addr)), (LSFP32_LDR $addr, 0)>; + +def : Pat<(store (v1i8 FPR8:$value), GPR64xsp:$addr), + (LSFP8_STR $value, $addr, 0)>; +def : Pat<(store (v1i16 FPR16:$value), GPR64xsp:$addr), + (LSFP16_STR $value, $addr, 0)>; +def : Pat<(store (v1i32 FPR32:$value), GPR64xsp:$addr), + (LSFP32_STR $value, $addr, 0)>; + + // End of vector load/store multiple N-element structure(class SIMD lselem) // The followings are post-index vector load/store multiple N-element @@ -3341,11 +3443,11 @@ def uimm_exact64 : Operand, ImmLeaf { multiclass NeonI_LDWB_VList opcode, bits<2> size, RegisterOperand VecList, Operand ImmTy, string asmop> { - let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1, + let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1, DecoderMethod = "DecodeVLDSTPostInstruction" in { def _fixed : NeonI_LdStMult_Post { @@ -3354,7 +3456,7 @@ multiclass NeonI_LDWB_VList opcode, bits<2> size, def _register : NeonI_LdStMult_Post; @@ -3437,7 +3539,7 @@ multiclass NeonI_STWB_VList opcode, bits<2> size, def _register : NeonI_LdStMult_Post; @@ -3578,7 +3680,7 @@ multiclass LDN_Dup_BHSD opcode, string List, string asmop> { // Load single 1-element structure to all lanes of 1 register defm LD1R : LDN_Dup_BHSD<0b0, 0b110, "VOne", "ld1r">; -// Load single N-element structure to all lanes of N consecutive +// Load single N-element structure to all lanes of N consecutive // registers (N = 2,3,4) defm LD2R : LDN_Dup_BHSD<0b1, 0b110, "VPair", "ld2r">; defm LD3R : LDN_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r">; @@ -3605,12 +3707,16 @@ def : LD1R_pattern; def : LD1R_pattern; def : LD1R_pattern; -def : LD1R_pattern; -def : LD1R_pattern; - def : LD1R_pattern; def : LD1R_pattern; +class LD1R_pattern_v1 + : Pat<(VTy (scalar_to_vector (DTy (LoadOp GPR64xsp:$Rn)))), + (VTy (INST GPR64xsp:$Rn))>; + +def : LD1R_pattern_v1; +def : LD1R_pattern_v1; multiclass VectorList_Bare_BHSD { @@ -3662,7 +3768,7 @@ multiclass LDN_Lane_BHSD { let Inst{12-10} = {lane{0}, 0b0, 0b0}; let Inst{30} = lane{1}; } - + def _D : NeonI_LDN_Lane(List # "D_operand"), neon_uimm1_bare, asmop> { @@ -3685,8 +3791,8 @@ multiclass LD1LN_patterns { def : Pat<(VTy (vector_insert (VTy VPR64:$src), (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp:$lane))), - (VTy (EXTRACT_SUBREG - (INST GPR64xsp:$Rn, + (VTy (EXTRACT_SUBREG + (INST GPR64xsp:$Rn, (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), ImmOp:$lane), sub_64))>; @@ -3746,7 +3852,7 @@ multiclass STN_Lane_BHSD { let Inst{12-10} = {lane{0}, 0b0, 0b0}; let Inst{30} = lane{1}; } - + def _D : NeonI_STN_Lane(List # "D_operand"), neon_uimm1_bare, asmop>{ @@ -3864,7 +3970,7 @@ multiclass LDWB_Dup_BHSD opcode, string List, string asmop, defm LD1R_WB : LDWB_Dup_BHSD<0b0, 0b110, "VOne", "ld1r", uimm_exact1, uimm_exact2, uimm_exact4, uimm_exact8>; -// Post-index load single N-element structure to all lanes of N consecutive +// Post-index load single N-element structure to all lanes of N consecutive // registers (N = 2,3,4) defm LD2R_WB : LDWB_Dup_BHSD<0b1, 0b110, "VPair", "ld2r", uimm_exact2, uimm_exact4, uimm_exact8, uimm_exact16>; @@ -3873,7 +3979,7 @@ defm LD3R_WB : LDWB_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r", uimm_exact3, defm LD4R_WB : LDWB_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r", uimm_exact4, uimm_exact8, uimm_exact16, uimm_exact32>; -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1, +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1, Constraints = "$Rn = $wb, $Rt = $src", DecoderMethod = "DecodeVLDSTLanePostInstruction" in { class LDN_WBFx_Lane op2_1, bit op0, RegisterOperand VList, @@ -3915,14 +4021,14 @@ multiclass LD_Lane_WB_BHSD(List # "H_operand"), uimm_h, neon_uimm3_bare, asmop> { let Inst{12-10} = {lane{1}, lane{0}, 0b0}; let Inst{30} = lane{2}; } - + def _H_register : LDN_WBReg_Lane(List # "H_operand"), uimm_h, neon_uimm3_bare, asmop> { @@ -3943,7 +4049,7 @@ multiclass LD_Lane_WB_BHSD(List # "D_operand"), uimm_d, neon_uimm1_bare, asmop> { @@ -4015,14 +4121,14 @@ multiclass ST_Lane_WB_BHSD(List # "H_operand"), uimm_h, neon_uimm3_bare, asmop> { let Inst{12-10} = {lane{1}, lane{0}, 0b0}; let Inst{30} = lane{2}; } - + def _H_register : STN_WBReg_Lane(List # "H_operand"), uimm_h, neon_uimm3_bare, asmop> { @@ -4043,7 +4149,7 @@ multiclass ST_Lane_WB_BHSD(List # "D_operand"), uimm_d, neon_uimm1_bare, asmop> { @@ -4118,7 +4224,7 @@ multiclass NeonI_Scalar3Same_BHSD_sizes opcode, multiclass Neon_Scalar3Same_D_size_patterns { def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), - (INSTD FPR64:$Rn, FPR64:$Rm)>; + (INSTD FPR64:$Rn, FPR64:$Rm)>; } multiclass Neon_Scalar3Same_BHSD_size_patterns { def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), (INSTB FPR8:$Rn, FPR8:$Rm)>; - def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), (INSTH FPR16:$Rn, FPR16:$Rm)>; - def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), (INSTS FPR32:$Rn, FPR32:$Rm)>; } -class Neon_Scalar3Same_cmp_D_size_patterns - : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), - (INSTD FPR64:$Rn, FPR64:$Rm)>; - multiclass Neon_Scalar3Same_HS_size_patterns { @@ -4152,22 +4251,19 @@ multiclass Neon_Scalar3Same_HS_size_patterns { - def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))), + ValueType SResTy, ValueType STy, + Instruction INSTS, ValueType DResTy, + ValueType DTy, Instruction INSTD> { + def : Pat<(SResTy (opnode (STy FPR32:$Rn), (STy FPR32:$Rm))), (INSTS FPR32:$Rn, FPR32:$Rm)>; - def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + def : Pat<(DResTy (opnode (DTy FPR64:$Rn), (DTy FPR64:$Rm))), (INSTD FPR64:$Rn, FPR64:$Rm)>; } -multiclass Neon_Scalar3Same_cmp_SD_size_patterns { - def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))), - (INSTS FPR32:$Rn, FPR32:$Rm)>; - def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (INSTD FPR64:$Rn, FPR64:$Rm)>; -} +class Neon_Scalar3Same_cmp_V1_D_size_patterns + : Pat<(v1i64 (Neon_cmp (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), CC)), + (INSTD FPR64:$Rn, FPR64:$Rm)>; // Scalar Three Different @@ -4277,37 +4373,46 @@ multiclass NeonI_Scalar2SameMisc_accum_BHSD_size opcode, class Neon_Scalar2SameMisc_fcvtxn_D_size_patterns - : Pat<(v1f32 (opnode (v1f64 FPR64:$Rn))), + : Pat<(f32 (opnode (f64 FPR64:$Rn))), (INSTD FPR64:$Rn)>; multiclass Neon_Scalar2SameMisc_fcvt_SD_size_patterns { - def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn))), + def : Pat<(v1i32 (opnode (f32 FPR32:$Rn))), (INSTS FPR32:$Rn)>; - def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), + def : Pat<(v1i64 (opnode (f64 FPR64:$Rn))), (INSTD FPR64:$Rn)>; } -multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns + : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), + (INSTD FPR64:$Rn)>; + +multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns { - def : Pat<(f32 (Sopnode (v1i32 FPR32:$Rn))), + def : Pat<(f32 (opnode (v1i32 FPR32:$Rn))), (INSTS FPR32:$Rn)>; - def : Pat<(f64 (Dopnode (v1i64 FPR64:$Rn))), + def : Pat<(f64 (opnode (v1i64 FPR64:$Rn))), (INSTD FPR64:$Rn)>; } multiclass Neon_Scalar2SameMisc_SD_size_patterns { - def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn))), + def : Pat<(f32 (opnode (f32 FPR32:$Rn))), (INSTS FPR32:$Rn)>; - def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), + def : Pat<(f64 (opnode (f64 FPR64:$Rn))), (INSTD FPR64:$Rn)>; } +class Neon_Scalar2SameMisc_V1_D_size_patterns + : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), + (INSTD FPR64:$Rn)>; + class NeonI_Scalar2SameMisc_cmpz_D_size opcode, string asmop> : NeonI_Scalar2SameMisc opcode, [], NoItinerary>; def ddi : NeonI_Scalar2SameMisc; @@ -4335,15 +4440,22 @@ class Neon_Scalar2SameMisc_cmpz_D_size_patterns; +class Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns + : Pat<(v1i64 (Neon_cmpz (v1i64 FPR64:$Rn), + (i32 neon_uimm0:$Imm), CC)), + (INSTD FPR64:$Rn, neon_uimm0:$Imm)>; + multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns { - def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn), - (v1f32 (scalar_to_vector (f32 fpimm:$FPImm))))), - (INSTS FPR32:$Rn, fpimm:$FPImm)>; - def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), - (v1f64 (bitconvert (v8i8 Neon_AllZero))))), - (INSTD FPR64:$Rn, 0)>; + def : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (f32 fpz32:$FPImm))), + (INSTS FPR32:$Rn, fpz32:$FPImm)>; + def : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (f32 fpz32:$FPImm))), + (INSTD FPR64:$Rn, fpz32:$FPImm)>; + def : Pat<(v1i64 (Neon_cmpz (v1f64 FPR64:$Rn), (f32 fpz32:$FPImm), CC)), + (INSTD FPR64:$Rn, fpz32:$FPImm)>; } multiclass Neon_Scalar2SameMisc_D_size_patterns; } -class Neon_ScalarShiftImm_arm_D_size_patterns +class Neon_ScalarShiftLImm_V1_D_size_patterns + : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), + (v1i64 (Neon_vdup (i32 shl_imm64:$Imm))))), + (INSTD FPR64:$Rn, imm:$Imm)>; + +class Neon_ScalarShiftRImm_V1_D_size_patterns : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 (Neon_vdup (i32 shr_imm64:$Imm))))), (INSTD FPR64:$Rn, imm:$Imm)>; @@ -4587,23 +4705,21 @@ multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns< (INSTD FPR64:$Rn, imm:$Imm)>; } -multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns { - def ssi : Pat<(f32 (Sopnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))), + def ssi : Pat<(f32 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))), (INSTS FPR32:$Rn, imm:$Imm)>; - def ddi : Pat<(f64 (Dopnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), + def ddi : Pat<(f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), (INSTD FPR64:$Rn, imm:$Imm)>; } -multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns { - def ssi : Pat<(v1i32 (Sopnode (v1f32 FPR32:$Rn), (i32 shr_imm32:$Imm))), + def ssi : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (i32 shr_imm32:$Imm))), (INSTS FPR32:$Rn, imm:$Imm)>; - def ddi : Pat<(v1i64 (Dopnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))), + def ddi : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (i32 shr_imm64:$Imm))), (INSTD FPR64:$Rn, imm:$Imm)>; } @@ -4611,13 +4727,13 @@ multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns; defm : Neon_ScalarShiftRImm_D_size_patterns; // Pattern to match llvm.arm.* intrinsic. -def : Neon_ScalarShiftImm_arm_D_size_patterns; +def : Neon_ScalarShiftRImm_V1_D_size_patterns; // Scalar Unsigned Shift Right (Immediate) defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">; defm : Neon_ScalarShiftRImm_D_size_patterns; // Pattern to match llvm.arm.* intrinsic. -def : Neon_ScalarShiftImm_arm_D_size_patterns; +def : Neon_ScalarShiftRImm_V1_D_size_patterns; // Scalar Signed Rounding Shift Right (Immediate) defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">; @@ -4651,7 +4767,7 @@ def : Neon_ScalarShiftRImm_accum_D_size_patterns defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">; defm : Neon_ScalarShiftLImm_D_size_patterns; // Pattern to match llvm.arm.* intrinsic. -def : Neon_ScalarShiftImm_arm_D_size_patterns; +def : Neon_ScalarShiftLImm_V1_D_size_patterns; // Signed Saturating Shift Left (Immediate) defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">; @@ -4723,28 +4839,47 @@ defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; -defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns; // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate) defm UCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11100, "ucvtf">; -defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns; // Scalar Floating-point Convert To Signed Fixed-point (Immediate) defm FCVTZS_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11111, "fcvtzs">; -defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns; // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate) defm FCVTZU_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11111, "fcvtzu">; -defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns; +// Patterns For Convert Instructions Between v1f64 and v1i64 +class Neon_ScalarShiftImm_cvtf_v1f64_pattern + : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), + (INST FPR64:$Rn, imm:$Imm)>; + +class Neon_ScalarShiftImm_fcvt_v1f64_pattern + : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))), + (INST FPR64:$Rn, imm:$Imm)>; + +def : Neon_ScalarShiftImm_cvtf_v1f64_pattern; + +def : Neon_ScalarShiftImm_cvtf_v1f64_pattern; + +def : Neon_ScalarShiftImm_fcvt_v1f64_pattern; + +def : Neon_ScalarShiftImm_fcvt_v1f64_pattern; + // Scalar Integer Add let isCommutable = 1 in { def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">; @@ -4802,17 +4937,18 @@ defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>; // Scalar Floating-point Reciprocal Step defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>; +defm : Neon_Scalar3Same_SD_size_patterns; +def : Pat<(v1f64 (int_arm_neon_vrecps (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FRECPSddd FPR64:$Rn, FPR64:$Rm)>; // Scalar Floating-point Reciprocal Square Root Step defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>; - -// Patterns to match llvm.arm.* intrinsic for -// Scalar Floating-point Reciprocal Step and -// Scalar Floating-point Reciprocal Square Root Step -defm : Neon_Scalar3Same_SD_size_patterns; -defm : Neon_Scalar3Same_SD_size_patterns; +defm : Neon_Scalar3Same_SD_size_patterns; +def : Pat<(v1f64 (int_arm_neon_vrsqrts (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FRSQRTSddd FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>; // Patterns to match llvm.aarch64.* intrinsic for // Scalar Floating-point Multiply Extended, @@ -4826,7 +4962,9 @@ multiclass Neon_Scalar3Same_MULX_SD_size_patterns; + FMULXsss, FMULXddd>; +def : Pat<(v1f64 (int_aarch64_neon_vmulx (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FMULXddd FPR64:$Rn, FPR64:$Rm)>; // Scalar Integer Shift Left (Signed, Unsigned) def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">; @@ -4905,14 +5043,12 @@ defm : Neon_Scalar3Diff_HS_size_patterns; -defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns; // Scalar Unsigned Integer Convert To Floating-point defm UCVTF : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">; -defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns; // Scalar Floating-point Converts @@ -4923,47 +5059,76 @@ def : Neon_Scalar2SameMisc_fcvtxn_D_size_patterns; defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; +def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">; defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; +def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">; defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; +def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">; defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; +def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">; defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; +def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">; defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; +def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">; defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; +def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">; defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; +def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">; defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; +def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; defm FCVTZU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11011, "fcvtzu">; defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; +def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; + +// Patterns For Convert Instructions Between v1f64 and v1i64 +class Neon_Scalar2SameMisc_cvtf_v1f64_pattern + : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn))), (INST FPR64:$Rn)>; + +class Neon_Scalar2SameMisc_fcvt_v1f64_pattern + : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; + +def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern; +def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern; + +def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern; +def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern; // Scalar Floating-point Reciprocal Estimate defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">; -defm : Neon_Scalar2SameMisc_SD_size_patterns; +def : Neon_Scalar2SameMisc_V1_D_size_patterns; // Scalar Floating-point Reciprocal Exponent defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">; @@ -4972,14 +5137,28 @@ defm : Neon_Scalar2SameMisc_SD_size_patterns; -defm : Neon_Scalar2SameMisc_SD_size_patterns; +defm : Neon_Scalar2SameMisc_SD_size_patterns; +def : Neon_Scalar2SameMisc_V1_D_size_patterns; + +// Scalar Floating-point Round +class Neon_ScalarFloatRound_pattern + : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; + +def : Neon_ScalarFloatRound_pattern; +def : Neon_ScalarFloatRound_pattern; +def : Neon_ScalarFloatRound_pattern; +def : Neon_ScalarFloatRound_pattern; +def : Neon_ScalarFloatRound_pattern; +def : Neon_ScalarFloatRound_pattern; +def : Neon_ScalarFloatRound_pattern; // Scalar Integer Compare // Scalar Compare Bitwise Equal def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">; -def : Neon_Scalar3Same_cmp_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; class Neon_Scalar3Same_cmp_D_size_v1_patterns; // Scalar Compare Signed Greather Than Or Equal def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">; -def : Neon_Scalar3Same_cmp_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; +def : Neon_Scalar3Same_cmp_D_size_v1_patterns; // Scalar Compare Unsigned Higher Or Same def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">; -def : Neon_Scalar3Same_cmp_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; +def : Neon_Scalar3Same_cmp_D_size_v1_patterns; // Scalar Compare Unsigned Higher def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">; -def : Neon_Scalar3Same_cmp_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; +def : Neon_Scalar3Same_cmp_D_size_v1_patterns; // Scalar Compare Signed Greater Than def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">; -def : Neon_Scalar3Same_cmp_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; +def : Neon_Scalar3Same_cmp_D_size_v1_patterns; // Scalar Compare Bitwise Test Bits def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">; -def : Neon_Scalar3Same_cmp_D_size_patterns; -def : Neon_Scalar3Same_cmp_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; // Scalar Compare Bitwise Equal To Zero def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">; def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; +def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; // Scalar Compare Signed Greather Than Or Equal To Zero def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">; def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; +def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; // Scalar Compare Signed Greater Than Zero def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">; def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; +def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; // Scalar Compare Signed Less Than Or Equal To Zero def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">; def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; +def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; // Scalar Compare Less Than Zero def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">; def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; +def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; // Scalar Floating-point Compare // Scalar Floating-point Compare Mask Equal defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">; -defm : Neon_Scalar3Same_cmp_SD_size_patterns; +defm : Neon_Scalar3Same_SD_size_patterns; +def : Neon_Scalar3Same_cmp_V1_D_size_patterns; // Scalar Floating-point Compare Mask Equal To Zero defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">; -defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; // Scalar Floating-point Compare Mask Greater Than Or Equal defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">; -defm : Neon_Scalar3Same_cmp_SD_size_patterns; +defm : Neon_Scalar3Same_SD_size_patterns; +def : Neon_Scalar3Same_cmp_V1_D_size_patterns; // Scalar Floating-point Compare Mask Greater Than Or Equal To Zero defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">; -defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; // Scalar Floating-point Compare Mask Greather Than defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">; -defm : Neon_Scalar3Same_cmp_SD_size_patterns; +defm : Neon_Scalar3Same_SD_size_patterns; +def : Neon_Scalar3Same_cmp_V1_D_size_patterns; // Scalar Floating-point Compare Mask Greather Than Zero defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">; -defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; // Scalar Floating-point Compare Mask Less Than Or Equal To Zero defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">; -defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; // Scalar Floating-point Compare Mask Less Than Zero defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">; -defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">; -defm : Neon_Scalar3Same_cmp_SD_size_patterns; +defm : Neon_Scalar3Same_SD_size_patterns; +def : Pat<(v1i64 (int_aarch64_neon_vcage (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FACGEddd FPR64:$Rn, FPR64:$Rm)>; // Scalar Floating-point Absolute Compare Mask Greater Than defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">; -defm : Neon_Scalar3Same_cmp_SD_size_patterns; +defm : Neon_Scalar3Same_SD_size_patterns; +def : Pat<(v1i64 (int_aarch64_neon_vcagt (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FACGTddd FPR64:$Rn, FPR64:$Rm)>; -// Scakar Floating-point Absolute Difference +// Scalar Floating-point Absolute Difference defm FABD: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11010, "fabd">; -defm : Neon_Scalar3Same_SD_size_patterns; +defm : Neon_Scalar3Same_SD_size_patterns; // Scalar Absolute Value defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">; @@ -5122,6 +5317,27 @@ defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns; +def : Pat<(v1i64 (int_aarch64_neon_suqadd (v1i64 FPR64:$Src), + (v1i64 FPR64:$Rn))), + (SUQADDdd FPR64:$Src, FPR64:$Rn)>; + +def : Pat<(v1i64 (int_aarch64_neon_usqadd (v1i64 FPR64:$Src), + (v1i64 FPR64:$Rn))), + (USQADDdd FPR64:$Src, FPR64:$Rn)>; + +def : Pat<(v1i64 (int_arm_neon_vabs (v1i64 FPR64:$Rn))), + (ABSdd FPR64:$Rn)>; + +def : Pat<(v1i64 (int_arm_neon_vqabs (v1i64 FPR64:$Rn))), + (SQABSdd FPR64:$Rn)>; + +def : Pat<(v1i64 (int_arm_neon_vqneg (v1i64 FPR64:$Rn))), + (SQNEGdd FPR64:$Rn)>; + +def : Pat<(v1i64 (sub (v1i64 (bitconvert (v8i8 Neon_AllZero))), + (v1i64 FPR64:$Rn))), + (NEGdd FPR64:$Rn)>; + // Scalar Signed Saturating Extract Unsigned Narrow defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">; defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns; // Scalar Reduce Addition Pairwise (Integer) def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))), (ADDPvv_D_2D VPR128:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_vaddv (v2i64 VPR128:$Rn))), + (ADDPvv_D_2D VPR128:$Rn)>; // Scalar Reduce Addition Pairwise (Floating Point) defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>; @@ -5189,33 +5407,37 @@ defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>; // Scalar Reduce minNum Pairwise (Floating Point) defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>; -multiclass Neon_ScalarPair_SD_size_patterns { - def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))), + def : Pat<(f32 (opnode (v2f32 VPR64:$Rn))), (INSTS VPR64:$Rn)>; - def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))), + def : Pat<(f64 (opnode (v2f64 VPR128:$Rn))), (INSTD VPR128:$Rn)>; } // Patterns to match llvm.aarch64.* intrinsic for // Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point) defm : Neon_ScalarPair_SD_size_patterns; + FADDPvv_S_2S, FADDPvv_D_2D>; defm : Neon_ScalarPair_SD_size_patterns; + FMAXPvv_S_2S, FMAXPvv_D_2D>; defm : Neon_ScalarPair_SD_size_patterns; + FMINPvv_S_2S, FMINPvv_D_2D>; defm : Neon_ScalarPair_SD_size_patterns; + FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>; -defm : Neon_ScalarPair_SD_size_patterns; +defm : Neon_ScalarPair_SD_size_patterns; +def : Pat<(f32 (int_aarch64_neon_vpfadd (v4f32 VPR128:$Rn))), + (FADDPvv_S_2S (v2f32 + (EXTRACT_SUBREG + (v4f32 (FADDP_4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rn))), + sub_64)))>; // Scalar by element Arithmetic @@ -5323,7 +5545,6 @@ defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns; - // Scalar Floating Point fused multiply-add (scalar, by element) def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla", 0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { @@ -5608,7 +5829,6 @@ defm : Neon_ScalarXIndexedElem_MLAL_Patterns; - // Scalar Signed saturating doubling multiply returning // high half (scalar, by element) def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh", @@ -5695,6 +5915,38 @@ defm : Neon_ScalarXIndexedElem_MUL_Patterns; +// Scalar general arithmetic operation +class Neon_Scalar_GeneralMath2D_pattern + : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; + +class Neon_Scalar_GeneralMath3D_pattern + : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (INST FPR64:$Rn, FPR64:$Rm)>; + +class Neon_Scalar_GeneralMath4D_pattern + : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), + (v1f64 FPR64:$Ra))), + (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; + +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; + +def : Neon_Scalar_GeneralMath2D_pattern; +def : Neon_Scalar_GeneralMath2D_pattern; + +def : Neon_Scalar_GeneralMath4D_pattern; +def : Neon_Scalar_GeneralMath4D_pattern; + // Scalar Copy - DUP element to scalar class NeonI_Scalar_DUP { let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; } -multiclass NeonI_Scalar_DUP_Elt_pattern { - def : Pat<(ResTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)), - (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>; +def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 0)), + (f32 (EXTRACT_SUBREG (v4f32 VPR128:$Rn), sub_32))>; +def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 1)), + (f32 (DUPsv_S (v4f32 VPR128:$Rn), 1))>; +def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 2)), + (f32 (DUPsv_S (v4f32 VPR128:$Rn), 2))>; +def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 3)), + (f32 (DUPsv_S (v4f32 VPR128:$Rn), 3))>; - def : Pat<(ResTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)), - (ResTy (DUPI - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - OpNImm:$Imm))>; -} +def : Pat<(f64 (vector_extract (v2f64 VPR128:$Rn), 0)), + (f64 (EXTRACT_SUBREG (v2f64 VPR128:$Rn), sub_64))>; +def : Pat<(f64 (vector_extract (v2f64 VPR128:$Rn), 1)), + (f64 (DUPdv_D (v2f64 VPR128:$Rn), 1))>; -// Patterns for vector extract of FP data using scalar DUP instructions -defm : NeonI_Scalar_DUP_Elt_pattern; -defm : NeonI_Scalar_DUP_Elt_pattern; +def : Pat<(f32 (vector_extract (v2f32 VPR64:$Rn), 0)), + (f32 (EXTRACT_SUBREG (v2f32 VPR64:$Rn), sub_32))>; +def : Pat<(f32 (vector_extract (v2f32 VPR64:$Rn), 1)), + (f32 (DUPsv_S (v4f32 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + 1))>; + +def : Pat<(f64 (vector_extract (v1f64 VPR64:$Rn), 0)), + (f64 (EXTRACT_SUBREG (v1f64 VPR64:$Rn), sub_64))>; multiclass NeonI_Scalar_DUP_Ext_Vec_pattern; -defm : NeonI_Scalar_DUP_Copy_pattern1; -defm : NeonI_Scalar_DUP_Copy_pattern1; defm : NeonI_Scalar_DUP_Copy_pattern2; @@ -5824,12 +6075,6 @@ defm : NeonI_Scalar_DUP_Copy_pattern2; -defm : NeonI_Scalar_DUP_Copy_pattern2; -defm : NeonI_Scalar_DUP_Copy_pattern2; multiclass NeonI_Scalar_DUP_alias; def : Pat<(v8i16 (bitconvert (v2f64 VPR128:$src))), (v8i16 VPR128:$src)>; def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>; - // ...and scalar bitcasts... def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>; def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>; def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; -def : Pat<(f32 (bitconvert (v1f32 FPR32:$src))), (f32 FPR32:$src)>; def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>; @@ -5967,7 +6210,6 @@ def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>; def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>; def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>; def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1f32 (bitconvert (f32 FPR32:$src))), (v1f32 FPR32:$src)>; def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; @@ -6011,7 +6253,7 @@ class NeonI_Extract op2, string asmop, string OpS, RegisterOperand OpVPR, Operand OpImm> : NeonI_BitExtract{ @@ -6029,7 +6271,7 @@ def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b", } class NI_Extract + Operand OpImm> : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm), (i64 OpImm:$Imm))), (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>; @@ -6097,669 +6339,6 @@ defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">; defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">; defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">; -// The followings are for instruction class (3V Elem) - -// Variant 1 - -class NI_2VE size, bits<4> opcode, - string asmop, string ResS, string OpS, string EleOpS, - Operand OpImm, RegisterOperand ResVPR, - RegisterOperand OpVPR, RegisterOperand EleOpVPR> - : NeonI_2VElem { - bits<3> Index; - bits<5> Re; - - let Constraints = "$src = $Rd"; -} - -multiclass NI_2VE_v1 opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", - neon_uimm2_bare, VPR64, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // Index operations on 16-bit(H) elements are restricted to using v0-v15. - def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h", - neon_uimm3_bare, VPR64, VPR64, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } - - def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h", - neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } -} - -defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">; -defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">; - -// Pattern for lane in 128-bit vector -class NI_2VE_laneq - : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn), - (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VE_lane - : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn), - (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST ResVPR:$src, OpVPR:$Rn, - (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; - -multiclass NI_2VE_v1_pat -{ - def : NI_2VE_laneq(subop # "_2s4s"), neon_uimm2_bare, - op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32>; - - def : NI_2VE_laneq(subop # "_4s4s"), neon_uimm2_bare, - op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32>; - - def : NI_2VE_laneq(subop # "_4h8h"), neon_uimm3_bare, - op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16>; - - def : NI_2VE_laneq(subop # "_8h8h"), neon_uimm3_bare, - op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_lane(subop # "_2s4s"), neon_uimm1_bare, - op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32>; - - def : NI_2VE_lane(subop # "_4h8h"), neon_uimm2_bare, - op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16>; -} - -defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>; -defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>; - -class NI_2VE_2op size, bits<4> opcode, - string asmop, string ResS, string OpS, string EleOpS, - Operand OpImm, RegisterOperand ResVPR, - RegisterOperand OpVPR, RegisterOperand EleOpVPR> - : NeonI_2VElem { - bits<3> Index; - bits<5> Re; -} - -multiclass NI_2VE_v1_2op opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", - neon_uimm2_bare, VPR64, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // Index operations on 16-bit(H) elements are restricted to using v0-v15. - def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h", - neon_uimm3_bare, VPR64, VPR64, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } - - def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h", - neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } -} - -defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">; -defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">; -defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">; - -// Pattern for lane in 128-bit vector -class NI_2VE_mul_laneq - : Pat<(ResTy (op (OpTy OpVPR:$Rn), - (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VE_mul_lane - : Pat<(ResTy (op (OpTy OpVPR:$Rn), - (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST OpVPR:$Rn, - (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; - -multiclass NI_2VE_mul_v1_pat { - def : NI_2VE_mul_laneq(subop # "_2s4s"), neon_uimm2_bare, - op, VPR64, VPR128, v2i32, v2i32, v4i32>; - - def : NI_2VE_mul_laneq(subop # "_4s4s"), neon_uimm2_bare, - op, VPR128, VPR128, v4i32, v4i32, v4i32>; - - def : NI_2VE_mul_laneq(subop # "_4h8h"), neon_uimm3_bare, - op, VPR64, VPR128Lo, v4i16, v4i16, v8i16>; - - def : NI_2VE_mul_laneq(subop # "_8h8h"), neon_uimm3_bare, - op, VPR128, VPR128Lo, v8i16, v8i16, v8i16>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_mul_lane(subop # "_2s4s"), neon_uimm1_bare, - op, VPR64, VPR64, v2i32, v2i32, v2i32>; - - def : NI_2VE_mul_lane(subop # "_4h8h"), neon_uimm2_bare, - op, VPR64, VPR64Lo, v4i16, v4i16, v4i16>; -} - -defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>; -defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>; -defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>; - -// Variant 2 - -multiclass NI_2VE_v2_2op opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", - neon_uimm2_bare, VPR64, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // _1d2d doesn't exist! - - def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d", - neon_uimm1_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{0}}; - let Inst{21} = 0b0; - let Inst{20-16} = Re; - } -} - -defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">; -defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">; - -class NI_2VE_mul_lane_2d - : Pat<(ResTy (op (OpTy OpVPR:$Rn), - (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))), - (INST OpVPR:$Rn, - (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>; - -multiclass NI_2VE_mul_v2_pat { - def : NI_2VE_mul_laneq(subop # "_2s4s"), neon_uimm2_bare, - op, VPR64, VPR128, v2f32, v2f32, v4f32>; - - def : NI_2VE_mul_laneq(subop # "_4s4s"), neon_uimm2_bare, - op, VPR128, VPR128, v4f32, v4f32, v4f32>; - - def : NI_2VE_mul_laneq(subop # "_2d2d"), neon_uimm1_bare, - op, VPR128, VPR128, v2f64, v2f64, v2f64>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_mul_lane(subop # "_2s4s"), neon_uimm1_bare, - op, VPR64, VPR64, v2f32, v2f32, v2f32>; - - def : NI_2VE_mul_lane_2d(subop # "_2d2d"), neon_uimm1_bare, - op, VPR128, VPR64, v2f64, v2f64, v1f64, - BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>; -} - -defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>; -defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>; - -// The followings are patterns using fma -// -ffp-contract=fast generates fma - -multiclass NI_2VE_v2 opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", - neon_uimm2_bare, VPR64, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // _1d2d doesn't exist! - - def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d", - neon_uimm1_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{0}}; - let Inst{21} = 0b0; - let Inst{20-16} = Re; - } -} - -defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">; -defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">; - -// Pattern for lane in 128-bit vector -class NI_2VEswap_laneq - : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))), - (ResTy ResVPR:$src), (ResTy ResVPR:$Rn))), - (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VEswap_lane - : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))), - (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))), - (INST ResVPR:$src, ResVPR:$Rn, - (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VEswap_lane_2d2d - : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))), - (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))), - (INST ResVPR:$src, ResVPR:$Rn, - (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>; - - -multiclass NI_2VE_fma_v2_pat { - def : NI_2VEswap_laneq(subop # "_2s4s"), - neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, - BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; - - def : NI_2VEswap_laneq(subop # "_4s4s"), - neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, - BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; - - def : NI_2VEswap_laneq(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, - BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VEswap_lane(subop # "_2s4s"), - neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, - BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; - - def : NI_2VEswap_lane_2d2d(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, - BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>; -} - -defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>; - -multiclass NI_2VE_fms_v2_pat -{ - def : NI_2VEswap_laneq(subop # "_2s4s"), - neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, - BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_laneq(subop # "_2s4s"), - neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, - BinOpFrag<(Neon_vduplane - (fneg node:$LHS), node:$RHS)>>; - - def : NI_2VEswap_laneq(subop # "_4s4s"), - neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, - BinOpFrag<(fneg (Neon_vduplane - node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_laneq(subop # "_4s4s"), - neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, - BinOpFrag<(Neon_vduplane - (fneg node:$LHS), node:$RHS)>>; - - def : NI_2VEswap_laneq(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, - BinOpFrag<(fneg (Neon_vduplane - node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_laneq(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, - BinOpFrag<(Neon_vduplane - (fneg node:$LHS), node:$RHS)>>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VEswap_lane(subop # "_2s4s"), - neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, - BinOpFrag<(fneg (Neon_vduplane - node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_lane(subop # "_2s4s"), - neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, - BinOpFrag<(Neon_vduplane - (fneg node:$LHS), node:$RHS)>>; - - def : NI_2VEswap_lane(subop # "_4s4s"), - neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32, - BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_lane(subop # "_4s4s"), - neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32, - BinOpFrag<(Neon_vduplane (fneg node:$LHS), node:$RHS)>>; - - def : NI_2VEswap_lane_2d2d(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, - BinOpFrag<(fneg (Neon_combine_2d - node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_lane_2d2d(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, - BinOpFrag<(Neon_combine_2d - (fneg node:$LHS), (fneg node:$RHS))>>; -} - -defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>; - -// Variant 3: Long type -// E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S -// SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S - -multiclass NI_2VE_v3 opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s", - neon_uimm2_bare, VPR128, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // Index operations on 16-bit(H) elements are restricted to using v0-v15. - def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h", - neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } - - def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h", - neon_uimm3_bare, VPR128, VPR64, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } -} - -defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">; -defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">; -defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">; -defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">; -defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">; -defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">; - -multiclass NI_2VE_v3_2op opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s", - neon_uimm2_bare, VPR128, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // Index operations on 16-bit(H) elements are restricted to using v0-v15. - def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h", - neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } - - def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h", - neon_uimm3_bare, VPR128, VPR64, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } -} - -defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">; -defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">; -defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">; - -// Pattern for lane in 128-bit vector -class NI_2VEL2_laneq - : Pat<(ResTy (op (ResTy VPR128:$src), - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vduplane - (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VEL2_lane - : Pat<(ResTy (op (ResTy VPR128:$src), - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vduplane - (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST VPR128:$src, VPR128:$Rn, - (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; - -multiclass NI_2VEL_v3_pat { - def : NI_2VE_laneq(subop # "_4s4h"), neon_uimm3_bare, - op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16>; - - def : NI_2VE_laneq(subop # "_2d2s"), neon_uimm2_bare, - op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32>; - - def : NI_2VEL2_laneq(subop # "_4s8h"), neon_uimm3_bare, - op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_laneq(subop # "_2d4s"), neon_uimm2_bare, - op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_lane(subop # "_4s4h"), neon_uimm2_bare, - op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16>; - - def : NI_2VE_lane(subop # "_2d2s"), neon_uimm1_bare, - op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32>; - - def : NI_2VEL2_lane(subop # "_4s8h"), neon_uimm2_bare, - op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_lane(subop # "_2d4s"), neon_uimm1_bare, - op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>; -} - -defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>; -defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>; -defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>; -defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>; - -// Pattern for lane in 128-bit vector -class NI_2VEL2_mul_laneq - : Pat<(ResTy (op - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vduplane - (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VEL2_mul_lane - : Pat<(ResTy (op - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vduplane - (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST VPR128:$Rn, - (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; - -multiclass NI_2VEL_mul_v3_pat { - def : NI_2VE_mul_laneq(subop # "_4s4h"), neon_uimm3_bare, - op, VPR64, VPR128Lo, v4i32, v4i16, v8i16>; - - def : NI_2VE_mul_laneq(subop # "_2d2s"), neon_uimm2_bare, - op, VPR64, VPR128, v2i64, v2i32, v4i32>; - - def : NI_2VEL2_mul_laneq(subop # "_4s8h"), neon_uimm3_bare, - op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_mul_laneq(subop # "_2d4s"), neon_uimm2_bare, - op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_mul_lane(subop # "_4s4h"), neon_uimm2_bare, - op, VPR64, VPR64Lo, v4i32, v4i16, v4i16>; - - def : NI_2VE_mul_lane(subop # "_2d2s"), neon_uimm1_bare, - op, VPR64, VPR64, v2i64, v2i32, v2i32>; - - def : NI_2VEL2_mul_lane(subop # "_4s8h"), neon_uimm2_bare, - op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_mul_lane(subop # "_2d4s"), neon_uimm1_bare, - op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>; -} - -defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>; -defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>; -defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>; - -multiclass NI_qdma { - def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (op node:$Ra, - (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>; - - def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (op node:$Ra, - (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>; -} - -defm Neon_qdmlal : NI_qdma; -defm Neon_qdmlsl : NI_qdma; - -multiclass NI_2VEL_v3_qdma_pat { - def : NI_2VE_laneq(subop # "_4s4h"), neon_uimm3_bare, - !cast(op # "_4s"), VPR128, VPR64, VPR128Lo, - v4i32, v4i16, v8i16>; - - def : NI_2VE_laneq(subop # "_2d2s"), neon_uimm2_bare, - !cast(op # "_2d"), VPR128, VPR64, VPR128, - v2i64, v2i32, v4i32>; - - def : NI_2VEL2_laneq(subop # "_4s8h"), neon_uimm3_bare, - !cast(op # "_4s"), VPR128Lo, - v4i32, v8i16, v8i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_laneq(subop # "_2d4s"), neon_uimm2_bare, - !cast(op # "_2d"), VPR128, - v2i64, v4i32, v4i32, v2i32, Neon_High4S>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_lane(subop # "_4s4h"), neon_uimm2_bare, - !cast(op # "_4s"), VPR128, VPR64, VPR64Lo, - v4i32, v4i16, v4i16>; - - def : NI_2VE_lane(subop # "_2d2s"), neon_uimm1_bare, - !cast(op # "_2d"), VPR128, VPR64, VPR64, - v2i64, v2i32, v2i32>; - - def : NI_2VEL2_lane(subop # "_4s8h"), neon_uimm2_bare, - !cast(op # "_4s"), VPR64Lo, - v4i32, v8i16, v4i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_lane(subop # "_2d4s"), neon_uimm1_bare, - !cast(op # "_2d"), VPR64, - v2i64, v4i32, v2i32, v2i32, Neon_High4S>; -} - -defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">; -defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">; - -// End of implementation for instruction class (3V Elem) - class NeonI_INS_main : NeonI_copy<0b1, 0b0, 0b0011, @@ -6803,13 +6382,13 @@ def : NeonInstAlias<"mov $Rd.d[$Imm], $Rn", (INSdx VPR128:$Rd, GPR64:$Rn, neon_uimm1_bare:$Imm), 0>; class Neon_INS_main_pattern + RegisterClass OpGPR, ValueType OpTy, + Operand OpImm, Instruction INS> : Pat<(ResTy (vector_insert (ResTy VPR64:$src), (OpTy OpGPR:$Rn), (OpImm:$Imm))), - (ResTy (EXTRACT_SUBREG + (ResTy (EXTRACT_SUBREG (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), OpGPR:$Rn, OpImm:$Imm)), sub_64))>; @@ -6824,7 +6403,7 @@ def INSdx_pattern : Neon_INS_main_pattern : NeonI_insert<0b1, 0b1, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, ResImm:$Immd, ResImm:$Immn), asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]", [], @@ -6947,8 +6526,8 @@ def : Pat <(NaTy (vector_insert (NaTy VPR64:$src), (MidTy OpFPR:$Rn), (ResImm:$Imm))), - (NaTy (EXTRACT_SUBREG - (ResTy (INS + (NaTy (EXTRACT_SUBREG + (ResTy (INS (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)), (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)), ResImm:$Imm, @@ -7007,19 +6586,19 @@ multiclass Neon_SMOVx_pattern ; - + def : Pat<(i64 (sext (i32 (vector_extract (StTy VPR128:$Rn), (StImm:$Imm))))), (SMOVI VPR128:$Rn, StImm:$Imm)>; - + def : Pat<(i64 (sext_inreg (i64 (vector_extract (NaTy VPR64:$Rn), (NaImm:$Imm))), eleTy)), (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), NaImm:$Imm)>; - + def : Pat<(i64 (sext_inreg (i64 (anyext (i32 (vector_extract @@ -7027,12 +6606,12 @@ multiclass Neon_SMOVx_pattern ; - + def : Pat<(i64 (sext (i32 (vector_extract (NaTy VPR64:$Rn), (NaImm:$Imm))))), (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - NaImm:$Imm)>; + NaImm:$Imm)>; } defm : Neon_SMOVx_pattern; def : Neon_UMOV_pattern; + neon_uimm2_bare, UMOVwh>; def : Neon_UMOV_pattern; @@ -7159,13 +6738,10 @@ def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))), def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))), (FMOVxd FPR64:$Rn)>; - + def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))), (f64 FPR64:$Rn)>; -def : Pat<(f32 (vector_extract (v1f32 FPR32:$Rn), (i64 0))), - (f32 FPR32:$Rn)>; - def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)), (v1i8 (EXTRACT_SUBREG (v16i8 (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))), @@ -7182,10 +6758,45 @@ def : Pat<(v1i32 (scalar_to_vector GPR32:$src)), def : Pat<(v1i64 (scalar_to_vector GPR64:$src)), (FMOVdx $src)>; -def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))), - (FMOVdd $src)>; -def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$src))), - (FMOVss $src)>; +def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), + (v8i8 (EXTRACT_SUBREG (v16i8 + (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))), + sub_64))>; + +def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), + (v4i16 (EXTRACT_SUBREG (v8i16 + (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))), + sub_64))>; + +def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)), + (v2i32 (EXTRACT_SUBREG (v16i8 + (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))), + sub_64))>; + +def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), + (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))>; + +def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), + (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))>; + +def : Pat<(v4i32 (scalar_to_vector GPR32:$Rn)), + (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))>; + +def : Pat<(v2i64 (scalar_to_vector GPR64:$Rn)), + (INSdx (v2i64 (IMPLICIT_DEF)), $Rn, (i64 0))>; + +def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)), + (v2i32 (EXTRACT_SUBREG (v16i8 + (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))), + sub_64))>; + +def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)), + (v2i32 (EXTRACT_SUBREG (v16i8 + (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))), + sub_64))>; + +def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))), + (v1f64 FPR64:$Rn)>; def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))), (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), @@ -7270,15 +6881,15 @@ defm : NeonI_DUP_Elt_pattern; def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))), - (v2f32 (DUPELT2s + (v2f32 (DUPELT2s (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), (i64 0)))>; def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))), - (v4f32 (DUPELT4s + (v4f32 (DUPELT4s (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), (i64 0)))>; def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))), - (v2f64 (DUPELT2d + (v2f64 (DUPELT2d (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64), (i64 0)))>; @@ -7287,7 +6898,7 @@ class NeonI_DUP : NeonI_copy; @@ -7331,13 +6942,13 @@ multiclass Concat_Vector_Pattern { def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)), (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>; def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))), - (INSELd + (INSELd (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)), (i64 1), (i64 0))>; def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))), - (DUPELT2d + (DUPELT2d (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), (i64 0))> ; } @@ -7363,6 +6974,751 @@ def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))), def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))), (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; +// The followings are for instruction class (3V Elem) + +// Variant 1 + +class NI_2VE size, bits<4> opcode, + string asmop, string ResS, string OpS, string EleOpS, + Operand OpImm, RegisterOperand ResVPR, + RegisterOperand OpVPR, RegisterOperand EleOpVPR> + : NeonI_2VElem { + bits<3> Index; + bits<5> Re; + + let Constraints = "$src = $Rd"; +} + +multiclass NI_2VE_v1 opcode, string asmop> { + // vector register class for element is always 128-bit to cover the max index + def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", + neon_uimm2_bare, VPR64, VPR64, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", + neon_uimm2_bare, VPR128, VPR128, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + // Index operations on 16-bit(H) elements are restricted to using v0-v15. + def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h", + neon_uimm3_bare, VPR64, VPR64, VPR128Lo> { + let Inst{11} = {Index{2}}; + let Inst{21} = {Index{1}}; + let Inst{20} = {Index{0}}; + let Inst{19-16} = Re{3-0}; + } + + def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h", + neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { + let Inst{11} = {Index{2}}; + let Inst{21} = {Index{1}}; + let Inst{20} = {Index{0}}; + let Inst{19-16} = Re{3-0}; + } +} + +defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">; +defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">; + +// Pattern for lane in 128-bit vector +class NI_2VE_laneq + : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn), + (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), + (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>; + +// Pattern for lane in 64-bit vector +class NI_2VE_lane + : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn), + (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), + (INST ResVPR:$src, OpVPR:$Rn, + (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; + +multiclass NI_2VE_v1_pat +{ + def : NI_2VE_laneq(subop # "_2s4s"), neon_uimm2_bare, + op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32>; + + def : NI_2VE_laneq(subop # "_4s4s"), neon_uimm2_bare, + op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32>; + + def : NI_2VE_laneq(subop # "_4h8h"), neon_uimm3_bare, + op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16>; + + def : NI_2VE_laneq(subop # "_8h8h"), neon_uimm3_bare, + op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16>; + + // Index can only be half of the max value for lane in 64-bit vector + + def : NI_2VE_lane(subop # "_2s4s"), neon_uimm1_bare, + op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32>; + + def : NI_2VE_lane(subop # "_4h8h"), neon_uimm2_bare, + op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16>; +} + +defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>; +defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>; + +class NI_2VE_2op size, bits<4> opcode, + string asmop, string ResS, string OpS, string EleOpS, + Operand OpImm, RegisterOperand ResVPR, + RegisterOperand OpVPR, RegisterOperand EleOpVPR> + : NeonI_2VElem { + bits<3> Index; + bits<5> Re; +} + +multiclass NI_2VE_v1_2op opcode, string asmop> { + // vector register class for element is always 128-bit to cover the max index + def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", + neon_uimm2_bare, VPR64, VPR64, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", + neon_uimm2_bare, VPR128, VPR128, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + // Index operations on 16-bit(H) elements are restricted to using v0-v15. + def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h", + neon_uimm3_bare, VPR64, VPR64, VPR128Lo> { + let Inst{11} = {Index{2}}; + let Inst{21} = {Index{1}}; + let Inst{20} = {Index{0}}; + let Inst{19-16} = Re{3-0}; + } + + def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h", + neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { + let Inst{11} = {Index{2}}; + let Inst{21} = {Index{1}}; + let Inst{20} = {Index{0}}; + let Inst{19-16} = Re{3-0}; + } +} + +defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">; +defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">; +defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">; + +// Pattern for lane in 128-bit vector +class NI_2VE_mul_laneq + : Pat<(ResTy (op (OpTy OpVPR:$Rn), + (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), + (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>; + +// Pattern for lane in 64-bit vector +class NI_2VE_mul_lane + : Pat<(ResTy (op (OpTy OpVPR:$Rn), + (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), + (INST OpVPR:$Rn, + (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; + +multiclass NI_2VE_mul_v1_pat { + def : NI_2VE_mul_laneq(subop # "_2s4s"), neon_uimm2_bare, + op, VPR64, VPR128, v2i32, v2i32, v4i32>; + + def : NI_2VE_mul_laneq(subop # "_4s4s"), neon_uimm2_bare, + op, VPR128, VPR128, v4i32, v4i32, v4i32>; + + def : NI_2VE_mul_laneq(subop # "_4h8h"), neon_uimm3_bare, + op, VPR64, VPR128Lo, v4i16, v4i16, v8i16>; + + def : NI_2VE_mul_laneq(subop # "_8h8h"), neon_uimm3_bare, + op, VPR128, VPR128Lo, v8i16, v8i16, v8i16>; + + // Index can only be half of the max value for lane in 64-bit vector + + def : NI_2VE_mul_lane(subop # "_2s4s"), neon_uimm1_bare, + op, VPR64, VPR64, v2i32, v2i32, v2i32>; + + def : NI_2VE_mul_lane(subop # "_4h8h"), neon_uimm2_bare, + op, VPR64, VPR64Lo, v4i16, v4i16, v4i16>; +} + +defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>; +defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>; +defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>; + +// Variant 2 + +multiclass NI_2VE_v2_2op opcode, string asmop> { + // vector register class for element is always 128-bit to cover the max index + def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", + neon_uimm2_bare, VPR64, VPR64, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", + neon_uimm2_bare, VPR128, VPR128, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + // _1d2d doesn't exist! + + def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d", + neon_uimm1_bare, VPR128, VPR128, VPR128> { + let Inst{11} = {Index{0}}; + let Inst{21} = 0b0; + let Inst{20-16} = Re; + } +} + +defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">; +defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">; + +class NI_2VE_mul_lane_2d + : Pat<(ResTy (op (OpTy OpVPR:$Rn), + (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))), + (INST OpVPR:$Rn, + (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>; + +multiclass NI_2VE_mul_v2_pat { + def : NI_2VE_mul_laneq(subop # "_2s4s"), neon_uimm2_bare, + op, VPR64, VPR128, v2f32, v2f32, v4f32>; + + def : NI_2VE_mul_laneq(subop # "_4s4s"), neon_uimm2_bare, + op, VPR128, VPR128, v4f32, v4f32, v4f32>; + + def : NI_2VE_mul_laneq(subop # "_2d2d"), neon_uimm1_bare, + op, VPR128, VPR128, v2f64, v2f64, v2f64>; + + // Index can only be half of the max value for lane in 64-bit vector + + def : NI_2VE_mul_lane(subop # "_2s4s"), neon_uimm1_bare, + op, VPR64, VPR64, v2f32, v2f32, v2f32>; + + def : NI_2VE_mul_lane_2d(subop # "_2d2d"), neon_uimm1_bare, + op, VPR128, VPR64, v2f64, v2f64, v1f64, + BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>; +} + +defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>; +defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>; + +def : Pat<(v2f32 (fmul (v2f32 (Neon_vdup (f32 FPR32:$Re))), + (v2f32 VPR64:$Rn))), + (FMULve_2s4s VPR64:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; + +def : Pat<(v4f32 (fmul (v4f32 (Neon_vdup (f32 FPR32:$Re))), + (v4f32 VPR128:$Rn))), + (FMULve_4s4s VPR128:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; + +def : Pat<(v2f64 (fmul (v2f64 (Neon_vdup (f64 FPR64:$Re))), + (v2f64 VPR128:$Rn))), + (FMULve_2d2d VPR128:$Rn, (SUBREG_TO_REG (i64 0), $Re, sub_64), 0)>; + +// The followings are patterns using fma +// -ffp-contract=fast generates fma + +multiclass NI_2VE_v2 opcode, string asmop> { + // vector register class for element is always 128-bit to cover the max index + def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", + neon_uimm2_bare, VPR64, VPR64, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", + neon_uimm2_bare, VPR128, VPR128, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + // _1d2d doesn't exist! + + def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d", + neon_uimm1_bare, VPR128, VPR128, VPR128> { + let Inst{11} = {Index{0}}; + let Inst{21} = 0b0; + let Inst{20-16} = Re; + } +} + +defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">; +defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">; + +// Pattern for lane in 128-bit vector +class NI_2VEswap_laneq + : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))), + (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))), + (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>; + +// Pattern for lane 0 +class NI_2VEfma_lane0 + : Pat<(ResTy (op (ResTy ResVPR:$Rn), + (ResTy (Neon_vdup (f32 FPR32:$Re))), + (ResTy ResVPR:$src))), + (INST ResVPR:$src, ResVPR:$Rn, + (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; + +// Pattern for lane in 64-bit vector +class NI_2VEswap_lane + : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))), + (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))), + (INST ResVPR:$src, ResVPR:$Rn, + (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>; + +// Pattern for lane in 64-bit vector +class NI_2VEswap_lane_2d2d + : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))), + (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))), + (INST ResVPR:$src, ResVPR:$Rn, + (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>; + + +multiclass NI_2VE_fma_v2_pat { + def : NI_2VEswap_laneq(subop # "_2s4s"), + neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, + BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; + + def : NI_2VEfma_lane0(subop # "_2s4s"), + op, VPR64, v2f32>; + + def : NI_2VEswap_laneq(subop # "_4s4s"), + neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, + BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; + + def : NI_2VEfma_lane0(subop # "_4s4s"), + op, VPR128, v4f32>; + + def : NI_2VEswap_laneq(subop # "_2d2d"), + neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, + BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; + + // Index can only be half of the max value for lane in 64-bit vector + + def : NI_2VEswap_lane(subop # "_2s4s"), + neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, + BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; + + def : NI_2VEswap_lane_2d2d(subop # "_2d2d"), + neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, + BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>; +} + +defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>; + +// Pattern for lane 0 +class NI_2VEfms_lane0 + : Pat<(ResTy (op (ResTy (fneg ResVPR:$Rn)), + (ResTy (Neon_vdup (f32 FPR32:$Re))), + (ResTy ResVPR:$src))), + (INST ResVPR:$src, ResVPR:$Rn, + (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; + +multiclass NI_2VE_fms_v2_pat +{ + def : NI_2VEswap_laneq(subop # "_2s4s"), + neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, + BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>; + + def : NI_2VEswap_laneq(subop # "_2s4s"), + neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, + BinOpFrag<(Neon_vduplane + (fneg node:$LHS), node:$RHS)>>; + + def : NI_2VEfms_lane0(subop # "_2s4s"), + op, VPR64, v2f32>; + + def : NI_2VEswap_laneq(subop # "_4s4s"), + neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, + BinOpFrag<(fneg (Neon_vduplane + node:$LHS, node:$RHS))>>; + + def : NI_2VEswap_laneq(subop # "_4s4s"), + neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, + BinOpFrag<(Neon_vduplane + (fneg node:$LHS), node:$RHS)>>; + + def : NI_2VEfms_lane0(subop # "_4s4s"), + op, VPR128, v4f32>; + + def : NI_2VEswap_laneq(subop # "_2d2d"), + neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, + BinOpFrag<(fneg (Neon_vduplane + node:$LHS, node:$RHS))>>; + + def : NI_2VEswap_laneq(subop # "_2d2d"), + neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, + BinOpFrag<(Neon_vduplane + (fneg node:$LHS), node:$RHS)>>; + + // Index can only be half of the max value for lane in 64-bit vector + + def : NI_2VEswap_lane(subop # "_2s4s"), + neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, + BinOpFrag<(fneg (Neon_vduplane + node:$LHS, node:$RHS))>>; + + def : NI_2VEswap_lane(subop # "_2s4s"), + neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, + BinOpFrag<(Neon_vduplane + (fneg node:$LHS), node:$RHS)>>; + + def : NI_2VEswap_lane(subop # "_4s4s"), + neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32, + BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>; + + def : NI_2VEswap_lane(subop # "_4s4s"), + neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32, + BinOpFrag<(Neon_vduplane (fneg node:$LHS), node:$RHS)>>; + + def : NI_2VEswap_lane_2d2d(subop # "_2d2d"), + neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, + BinOpFrag<(fneg (Neon_combine_2d + node:$LHS, node:$RHS))>>; + + def : NI_2VEswap_lane_2d2d(subop # "_2d2d"), + neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, + BinOpFrag<(Neon_combine_2d + (fneg node:$LHS), (fneg node:$RHS))>>; +} + +defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>; + +// Variant 3: Long type +// E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S +// SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S + +multiclass NI_2VE_v3 opcode, string asmop> { + // vector register class for element is always 128-bit to cover the max index + def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s", + neon_uimm2_bare, VPR128, VPR64, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s", + neon_uimm2_bare, VPR128, VPR128, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + // Index operations on 16-bit(H) elements are restricted to using v0-v15. + def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h", + neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { + let Inst{11} = {Index{2}}; + let Inst{21} = {Index{1}}; + let Inst{20} = {Index{0}}; + let Inst{19-16} = Re{3-0}; + } + + def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h", + neon_uimm3_bare, VPR128, VPR64, VPR128Lo> { + let Inst{11} = {Index{2}}; + let Inst{21} = {Index{1}}; + let Inst{20} = {Index{0}}; + let Inst{19-16} = Re{3-0}; + } +} + +defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">; +defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">; +defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">; +defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">; +defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">; +defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">; + +multiclass NI_2VE_v3_2op opcode, string asmop> { + // vector register class for element is always 128-bit to cover the max index + def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s", + neon_uimm2_bare, VPR128, VPR64, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s", + neon_uimm2_bare, VPR128, VPR128, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + // Index operations on 16-bit(H) elements are restricted to using v0-v15. + def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h", + neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { + let Inst{11} = {Index{2}}; + let Inst{21} = {Index{1}}; + let Inst{20} = {Index{0}}; + let Inst{19-16} = Re{3-0}; + } + + def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h", + neon_uimm3_bare, VPR128, VPR64, VPR128Lo> { + let Inst{11} = {Index{2}}; + let Inst{21} = {Index{1}}; + let Inst{20} = {Index{0}}; + let Inst{19-16} = Re{3-0}; + } +} + +defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">; +defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">; +defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">; + +def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))), + (FMOVdd $src)>; + +// Pattern for lane in 128-bit vector +class NI_2VEL2_laneq + : Pat<(ResTy (op (ResTy VPR128:$src), + (HalfOpTy (hiop (OpTy VPR128:$Rn))), + (HalfOpTy (Neon_vduplane + (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), + (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>; + +// Pattern for lane in 64-bit vector +class NI_2VEL2_lane + : Pat<(ResTy (op (ResTy VPR128:$src), + (HalfOpTy (hiop (OpTy VPR128:$Rn))), + (HalfOpTy (Neon_vduplane + (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), + (INST VPR128:$src, VPR128:$Rn, + (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; + +class NI_2VEL2_lane0 + : Pat<(ResTy (op (ResTy VPR128:$src), + (HalfOpTy (hiop (OpTy VPR128:$Rn))), + (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))), + (INST VPR128:$src, VPR128:$Rn, (DupInst $Re), 0)>; + +multiclass NI_2VEL_v3_pat { + def : NI_2VE_laneq(subop # "_4s4h"), neon_uimm3_bare, + op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16>; + + def : NI_2VE_laneq(subop # "_2d2s"), neon_uimm2_bare, + op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32>; + + def : NI_2VEL2_laneq(subop # "_4s8h"), neon_uimm3_bare, + op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>; + + def : NI_2VEL2_laneq(subop # "_2d4s"), neon_uimm2_bare, + op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>; + + def : NI_2VEL2_lane0(subop # "_4s8h"), + op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>; + + def : NI_2VEL2_lane0(subop # "_2d4s"), + op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>; + + // Index can only be half of the max value for lane in 64-bit vector + + def : NI_2VE_lane(subop # "_4s4h"), neon_uimm2_bare, + op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16>; + + def : NI_2VE_lane(subop # "_2d2s"), neon_uimm1_bare, + op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32>; + + def : NI_2VEL2_lane(subop # "_4s8h"), neon_uimm2_bare, + op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>; + + def : NI_2VEL2_lane(subop # "_2d4s"), neon_uimm1_bare, + op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>; +} + +defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>; +defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>; +defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>; +defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>; + +// Pattern for lane in 128-bit vector +class NI_2VEL2_mul_laneq + : Pat<(ResTy (op + (HalfOpTy (hiop (OpTy VPR128:$Rn))), + (HalfOpTy (Neon_vduplane + (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), + (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>; + +// Pattern for lane in 64-bit vector +class NI_2VEL2_mul_lane + : Pat<(ResTy (op + (HalfOpTy (hiop (OpTy VPR128:$Rn))), + (HalfOpTy (Neon_vduplane + (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), + (INST VPR128:$Rn, + (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; + +// Pattern for fixed lane 0 +class NI_2VEL2_mul_lane0 + : Pat<(ResTy (op + (HalfOpTy (hiop (OpTy VPR128:$Rn))), + (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))), + (INST VPR128:$Rn, (DupInst $Re), 0)>; + +multiclass NI_2VEL_mul_v3_pat { + def : NI_2VE_mul_laneq(subop # "_4s4h"), neon_uimm3_bare, + op, VPR64, VPR128Lo, v4i32, v4i16, v8i16>; + + def : NI_2VE_mul_laneq(subop # "_2d2s"), neon_uimm2_bare, + op, VPR64, VPR128, v2i64, v2i32, v4i32>; + + def : NI_2VEL2_mul_laneq(subop # "_4s8h"), neon_uimm3_bare, + op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>; + + def : NI_2VEL2_mul_laneq(subop # "_2d4s"), neon_uimm2_bare, + op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>; + + def : NI_2VEL2_mul_lane0(subop # "_4s8h"), + op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>; + + def : NI_2VEL2_mul_lane0(subop # "_2d4s"), + op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>; + + // Index can only be half of the max value for lane in 64-bit vector + + def : NI_2VE_mul_lane(subop # "_4s4h"), neon_uimm2_bare, + op, VPR64, VPR64Lo, v4i32, v4i16, v4i16>; + + def : NI_2VE_mul_lane(subop # "_2d2s"), neon_uimm1_bare, + op, VPR64, VPR64, v2i64, v2i32, v2i32>; + + def : NI_2VEL2_mul_lane(subop # "_4s8h"), neon_uimm2_bare, + op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>; + + def : NI_2VEL2_mul_lane(subop # "_2d4s"), neon_uimm1_bare, + op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>; +} + +defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>; +defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>; +defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>; + +multiclass NI_qdma { + def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), + (op node:$Ra, + (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>; + + def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), + (op node:$Ra, + (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>; +} + +defm Neon_qdmlal : NI_qdma; +defm Neon_qdmlsl : NI_qdma; + +multiclass NI_2VEL_v3_qdma_pat { + def : NI_2VE_laneq(subop # "_4s4h"), neon_uimm3_bare, + !cast(op # "_4s"), VPR128, VPR64, VPR128Lo, + v4i32, v4i16, v8i16>; + + def : NI_2VE_laneq(subop # "_2d2s"), neon_uimm2_bare, + !cast(op # "_2d"), VPR128, VPR64, VPR128, + v2i64, v2i32, v4i32>; + + def : NI_2VEL2_laneq(subop # "_4s8h"), neon_uimm3_bare, + !cast(op # "_4s"), VPR128Lo, + v4i32, v8i16, v8i16, v4i16, Neon_High8H>; + + def : NI_2VEL2_laneq(subop # "_2d4s"), neon_uimm2_bare, + !cast(op # "_2d"), VPR128, + v2i64, v4i32, v4i32, v2i32, Neon_High4S>; + + def : NI_2VEL2_lane0(subop # "_4s8h"), + !cast(op # "_4s"), + v4i32, v8i16, v4i16, Neon_High8H, DUP8h>; + + def : NI_2VEL2_lane0(subop # "_2d4s"), + !cast(op # "_2d"), + v2i64, v4i32, v2i32, Neon_High4S, DUP4s>; + + // Index can only be half of the max value for lane in 64-bit vector + + def : NI_2VE_lane(subop # "_4s4h"), neon_uimm2_bare, + !cast(op # "_4s"), VPR128, VPR64, VPR64Lo, + v4i32, v4i16, v4i16>; + + def : NI_2VE_lane(subop # "_2d2s"), neon_uimm1_bare, + !cast(op # "_2d"), VPR128, VPR64, VPR64, + v2i64, v2i32, v2i32>; + + def : NI_2VEL2_lane(subop # "_4s8h"), neon_uimm2_bare, + !cast(op # "_4s"), VPR64Lo, + v4i32, v8i16, v4i16, v4i16, Neon_High8H>; + + def : NI_2VEL2_lane(subop # "_2d4s"), neon_uimm1_bare, + !cast(op # "_2d"), VPR64, + v2i64, v4i32, v2i32, v2i32, Neon_High4S>; +} + +defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">; +defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">; + +// End of implementation for instruction class (3V Elem) + class NeonI_REV size, bit Q, bit U, bits<5> opcode, RegisterOperand ResVPR, ValueType ResTy, SDPatternOperator Neon_Rev> @@ -7411,35 +7767,35 @@ multiclass NeonI_PairwiseAdd opcode, [(set (v8i16 VPR128:$Rd), (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))], NoItinerary>; - + def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.4h, $Rn.8b", [(set (v4i16 VPR64:$Rd), (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))], NoItinerary>; - + def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.4s, $Rn.8h", [(set (v4i32 VPR128:$Rd), (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))], NoItinerary>; - + def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.4h", [(set (v2i32 VPR64:$Rd), (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))], NoItinerary>; - + def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.2d, $Rn.4s", [(set (v2i64 VPR128:$Rd), (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))], NoItinerary>; - + def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.1d, $Rn.2s", @@ -7453,6 +7809,11 @@ defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010, defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010, int_arm_neon_vpaddlu>; +def : Pat<(v1i64 (int_aarch64_neon_saddlv (v2i32 VPR64:$Rn))), + (SADDLP2s1d $Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_uaddlv (v2i32 VPR64:$Rn))), + (UADDLP2s1d $Rn)>; + multiclass NeonI_PairwiseAddAcc opcode, SDPatternOperator Neon_Padd> { let Constraints = "$src = $Rd" in { @@ -7460,18 +7821,18 @@ multiclass NeonI_PairwiseAddAcc opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "\t$Rd.8h, $Rn.16b", [(set (v8i16 VPR128:$Rd), - (v8i16 (Neon_Padd + (v8i16 (Neon_Padd (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))], NoItinerary>; - + def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode, (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), asmop # "\t$Rd.4h, $Rn.8b", [(set (v4i16 VPR64:$Rd), - (v4i16 (Neon_Padd + (v4i16 (Neon_Padd (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))], NoItinerary>; - + def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "\t$Rd.4s, $Rn.8h", @@ -7479,7 +7840,7 @@ multiclass NeonI_PairwiseAddAcc opcode, (v4i32 (Neon_Padd (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))], NoItinerary>; - + def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.4h", @@ -7487,7 +7848,7 @@ multiclass NeonI_PairwiseAddAcc opcode, (v2i32 (Neon_Padd (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))], NoItinerary>; - + def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "\t$Rd.2d, $Rn.4s", @@ -7495,7 +7856,7 @@ multiclass NeonI_PairwiseAddAcc opcode, (v2i64 (Neon_Padd (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))], NoItinerary>; - + def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode, (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), asmop # "\t$Rd.1d, $Rn.2s", @@ -7516,32 +7877,32 @@ multiclass NeonI_2VMisc_BHSDsize_1Arg opcode> { (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.16b, $Rn.16b", [], NoItinerary>; - + def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.8h, $Rn.8h", [], NoItinerary>; - + def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.4s, $Rn.4s", [], NoItinerary>; - + def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.2d, $Rn.2d", [], NoItinerary>; - + def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.8b, $Rn.8b", [], NoItinerary>; - + def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.4h, $Rn.4h", [], NoItinerary>; - + def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.2s", @@ -7581,31 +7942,31 @@ defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQABS", int_arm_neon_vqabs>; defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQNEG", int_arm_neon_vqneg>; defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"ABS", int_arm_neon_vabs>; -def : Pat<(v16i8 (sub +def : Pat<(v16i8 (sub (v16i8 Neon_AllZero), (v16i8 VPR128:$Rn))), (v16i8 (NEG16b (v16i8 VPR128:$Rn)))>; -def : Pat<(v8i8 (sub +def : Pat<(v8i8 (sub (v8i8 Neon_AllZero), (v8i8 VPR64:$Rn))), (v8i8 (NEG8b (v8i8 VPR64:$Rn)))>; -def : Pat<(v8i16 (sub +def : Pat<(v8i16 (sub (v8i16 (bitconvert (v16i8 Neon_AllZero))), (v8i16 VPR128:$Rn))), (v8i16 (NEG8h (v8i16 VPR128:$Rn)))>; -def : Pat<(v4i16 (sub +def : Pat<(v4i16 (sub (v4i16 (bitconvert (v8i8 Neon_AllZero))), (v4i16 VPR64:$Rn))), (v4i16 (NEG4h (v4i16 VPR64:$Rn)))>; -def : Pat<(v4i32 (sub +def : Pat<(v4i32 (sub (v4i32 (bitconvert (v16i8 Neon_AllZero))), (v4i32 VPR128:$Rn))), (v4i32 (NEG4s (v4i32 VPR128:$Rn)))>; -def : Pat<(v2i32 (sub +def : Pat<(v2i32 (sub (v2i32 (bitconvert (v8i8 Neon_AllZero))), (v2i32 VPR64:$Rn))), (v2i32 (NEG2s (v2i32 VPR64:$Rn)))>; -def : Pat<(v2i64 (sub +def : Pat<(v2i64 (sub (v2i64 (bitconvert (v16i8 Neon_AllZero))), (v2i64 VPR128:$Rn))), (v2i64 (NEG2d (v2i64 VPR128:$Rn)))>; @@ -7616,32 +7977,32 @@ multiclass NeonI_2VMisc_BHSDsize_2Args opcode> { (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "\t$Rd.16b, $Rn.16b", [], NoItinerary>; - + def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "\t$Rd.8h, $Rn.8h", [], NoItinerary>; - + def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "\t$Rd.4s, $Rn.4s", [], NoItinerary>; - + def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "\t$Rd.2d, $Rn.2d", [], NoItinerary>; - + def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode, (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), asmop # "\t$Rd.8b, $Rn.8b", [], NoItinerary>; - + def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), asmop # "\t$Rd.4h, $Rn.4h", [], NoItinerary>; - + def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.2s", @@ -7694,35 +8055,35 @@ multiclass NeonI_2VMisc_BHSsizes; - + def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.8h, $Rn.8h", [(set (v8i16 VPR128:$Rd), (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))], NoItinerary>; - + def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.4s, $Rn.4s", [(set (v4i32 VPR128:$Rd), (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))], NoItinerary>; - + def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.8b, $Rn.8b", [(set (v8i8 VPR64:$Rd), (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))], NoItinerary>; - + def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.4h, $Rn.4h", [(set (v4i16 VPR64:$Rd), (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))], NoItinerary>; - + def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.2s", @@ -7740,7 +8101,7 @@ multiclass NeonI_2VMisc_Bsize size, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.16b, $Rn.16b", [], NoItinerary>; - + def 8b : NeonI_2VMisc<0b0, U, size, Opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.8b, $Rn.8b", @@ -7761,31 +8122,31 @@ def : Pat<(v16i8 (ctpop (v16i8 VPR128:$Rn))), def : Pat<(v8i8 (ctpop (v8i8 VPR64:$Rn))), (v8i8 (CNT8b (v8i8 VPR64:$Rn)))>; -def : Pat<(v16i8 (xor +def : Pat<(v16i8 (xor (v16i8 VPR128:$Rn), (v16i8 Neon_AllOne))), (v16i8 (NOT16b (v16i8 VPR128:$Rn)))>; -def : Pat<(v8i8 (xor +def : Pat<(v8i8 (xor (v8i8 VPR64:$Rn), (v8i8 Neon_AllOne))), (v8i8 (NOT8b (v8i8 VPR64:$Rn)))>; -def : Pat<(v8i16 (xor +def : Pat<(v8i16 (xor (v8i16 VPR128:$Rn), (v8i16 (bitconvert (v16i8 Neon_AllOne))))), (NOT16b VPR128:$Rn)>; -def : Pat<(v4i16 (xor +def : Pat<(v4i16 (xor (v4i16 VPR64:$Rn), (v4i16 (bitconvert (v8i8 Neon_AllOne))))), (NOT8b VPR64:$Rn)>; -def : Pat<(v4i32 (xor +def : Pat<(v4i32 (xor (v4i32 VPR128:$Rn), (v4i32 (bitconvert (v16i8 Neon_AllOne))))), (NOT16b VPR128:$Rn)>; -def : Pat<(v2i32 (xor +def : Pat<(v2i32 (xor (v2i32 VPR64:$Rn), (v2i32 (bitconvert (v8i8 Neon_AllOne))))), (NOT8b VPR64:$Rn)>; -def : Pat<(v2i64 (xor +def : Pat<(v2i64 (xor (v2i64 VPR128:$Rn), (v2i64 (bitconvert (v16i8 Neon_AllOne))))), (NOT16b VPR128:$Rn)>; @@ -7803,14 +8164,14 @@ multiclass NeonI_2VMisc_SDsizes opcode, [(set (v4f32 VPR128:$Rd), (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))], NoItinerary>; - + def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.2d, $Rn.2d", [(set (v2f64 VPR128:$Rd), (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))], NoItinerary>; - + def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.2s", @@ -7843,12 +8204,12 @@ multiclass NeonI_2VMisc_HSD_Narrow opcode> { (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "2\t$Rd.16b, $Rn.8h", [], NoItinerary>; - + def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "2\t$Rd.8h, $Rn.4s", [], NoItinerary>; - + def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "2\t$Rd.4s, $Rn.2d", @@ -7861,7 +8222,7 @@ defm SQXTUN : NeonI_2VMisc_HSD_Narrow<"sqxtun", 0b1, 0b10010>; defm SQXTN : NeonI_2VMisc_HSD_Narrow<"sqxtn", 0b0, 0b10100>; defm UQXTN : NeonI_2VMisc_HSD_Narrow<"uqxtn", 0b1, 0b10100>; -multiclass NeonI_2VMisc_Narrow_Patterns { def : Pat<(v8i8 (Neon_Op (v8i16 VPR128:$Rn))), (v8i8 (!cast(Prefix # 8h8b) (v8i16 VPR128:$Rn)))>; @@ -7871,11 +8232,11 @@ multiclass NeonI_2VMisc_Narrow_Patterns(Prefix # 2d2s) (v2i64 VPR128:$Rn)))>; - + def : Pat<(v16i8 (concat_vectors (v8i8 VPR64:$src), (v8i8 (Neon_Op (v8i16 VPR128:$Rn))))), - (!cast(Prefix # 8h16b) + (!cast(Prefix # 8h16b) (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64), VPR128:$Rn)>; @@ -7906,31 +8267,31 @@ multiclass NeonI_2VMisc_SHIFT opcode> { (ins VPR64:$Rn, uimm_exact8:$Imm), asmop # "\t$Rd.8h, $Rn.8b, $Imm", [], NoItinerary>; - + def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR64:$Rn, uimm_exact16:$Imm), asmop # "\t$Rd.4s, $Rn.4h, $Imm", [], NoItinerary>; - + def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR64:$Rn, uimm_exact32:$Imm), asmop # "\t$Rd.2d, $Rn.2s, $Imm", [], NoItinerary>; - + def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, uimm_exact8:$Imm), asmop # "2\t$Rd.8h, $Rn.16b, $Imm", [], NoItinerary>; - + def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, uimm_exact16:$Imm), asmop # "2\t$Rd.4s, $Rn.8h, $Imm", [], NoItinerary>; - + def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn, uimm_exact32:$Imm), @@ -7943,16 +8304,16 @@ defm SHLL : NeonI_2VMisc_SHIFT<"shll", 0b1, 0b10011>; class NeonI_SHLL_Patterns + string suffix> : Pat<(DesTy (shl (DesTy (ExtOp (OpTy VPR64:$Rn))), (DesTy (Neon_vdup (i32 Neon_Imm:$Imm))))), (!cast("SHLL" # suffix) VPR64:$Rn, Neon_Imm:$Imm)>; - + class NeonI_SHLL_High_Patterns + string suffix, PatFrag GetHigh> : Pat<(DesTy (shl (DesTy (ExtOp (OpTy (GetHigh VPR128:$Rn)))), @@ -7989,13 +8350,13 @@ multiclass NeonI_2VMisc_SD_Narrow opcode> { (outs VPR64:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.2s, $Rn.2d", [], NoItinerary>; - + let Constraints = "$src = $Rd" in { def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "2\t$Rd.8h, $Rn.4s", [], NoItinerary>; - + def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode, (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), asmop # "2\t$Rd.4s, $Rn.2d", @@ -8008,20 +8369,20 @@ defm FCVTN : NeonI_2VMisc_SD_Narrow<"fcvtn", 0b0, 0b10110>; multiclass NeonI_2VMisc_Narrow_Pattern { - + def : Pat<(v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))), (!cast(prefix # "4s4h") (v4f32 VPR128:$Rn))>; - + def : Pat<(v8i16 (concat_vectors (v4i16 VPR64:$src), (v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))))), (!cast(prefix # "4s8h") (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), - (v4f32 VPR128:$Rn))>; - + (v4f32 VPR128:$Rn))>; + def : Pat<(v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))), (!cast(prefix # "2d2s") (v2f64 VPR128:$Rn))>; - + def : Pat<(v4f32 (concat_vectors (v2f32 VPR64:$src), (v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))))), @@ -8045,13 +8406,13 @@ multiclass NeonI_2VMisc_D_Narrow { let Constraints = "$src = $Rd"; } - - def : Pat<(v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))), + + def : Pat<(v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))), (!cast(prefix # "2d2s") VPR128:$Rn)>; def : Pat<(v4f32 (concat_vectors (v2f32 VPR64:$src), - (v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))))), + (v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))))), (!cast(prefix # "2d4s") (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), VPR128:$Rn)>; @@ -8089,15 +8450,15 @@ defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>; multiclass NeonI_2VMisc_Extend_Pattern { def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 VPR64:$Rn))), (!cast(prefix # "4h4s") VPR64:$Rn)>; - + def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 (Neon_High8H (v8i16 VPR128:$Rn))))), (!cast(prefix # "8h4s") VPR128:$Rn)>; - + def : Pat<(v2f64 (fextend (v2f32 VPR64:$Rn))), (!cast(prefix # "2s2d") VPR64:$Rn)>; - + def : Pat<(v2f64 (fextend (v2f32 (Neon_High4Float (v4f32 VPR128:$Rn))))), @@ -8111,7 +8472,7 @@ multiclass NeonI_2VMisc_SD_Conv opcode, ValueType ResTy2d, ValueType OpTy2d, ValueType ResTy2s, ValueType OpTy2s, SDPatternOperator Neon_Op> { - + def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode, (outs VPR128:$Rd), (ins VPR128:$Rn), asmop # "\t$Rd.4s, $Rn.4s", @@ -8125,7 +8486,7 @@ multiclass NeonI_2VMisc_SD_Conv opcode, [(set (ResTy2d VPR128:$Rd), (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))], NoItinerary>; - + def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.2s", @@ -8141,23 +8502,23 @@ multiclass NeonI_2VMisc_fp_to_int; + int_arm_neon_vcvtns>; defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010, - int_aarch64_neon_fcvtnu>; + int_arm_neon_vcvtnu>; defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010, - int_aarch64_neon_fcvtps>; + int_arm_neon_vcvtps>; defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010, - int_aarch64_neon_fcvtpu>; + int_arm_neon_vcvtpu>; defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011, - int_aarch64_neon_fcvtms>; + int_arm_neon_vcvtms>; defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011, - int_aarch64_neon_fcvtmu>; + int_arm_neon_vcvtmu>; defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>; defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>; defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100, - int_aarch64_neon_fcvtas>; + int_arm_neon_vcvtas>; defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100, - int_aarch64_neon_fcvtau>; + int_arm_neon_vcvtau>; multiclass NeonI_2VMisc_int_to_fp opcode, SDPatternOperator Neon_Op> { @@ -8186,8 +8547,7 @@ defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101, int_arm_neon_vrecpe>; defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101, int_arm_neon_vrsqrte>; -defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, - int_aarch64_neon_fsqrt>; +defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>; multiclass NeonI_2VMisc_S_Conv opcode, SDPatternOperator Neon_Op> { @@ -8197,7 +8557,7 @@ multiclass NeonI_2VMisc_S_Conv; - + def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode, (outs VPR64:$Rd), (ins VPR64:$Rn), asmop # "\t$Rd.2s, $Rn.2s", @@ -8330,3 +8690,157 @@ def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c", int_aarch64_neon_sha1c>; def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p", int_aarch64_neon_sha1p>; def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m", int_aarch64_neon_sha1m>; +// Additional patterns to match shl to USHL. +def : Pat<(v8i8 (shl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), + (USHLvvv_8B $Rn, $Rm)>; +def : Pat<(v4i16 (shl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), + (USHLvvv_4H $Rn, $Rm)>; +def : Pat<(v2i32 (shl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), + (USHLvvv_2S $Rn, $Rm)>; +def : Pat<(v1i64 (shl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), + (USHLddd $Rn, $Rm)>; +def : Pat<(v16i8 (shl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), + (USHLvvv_16B $Rn, $Rm)>; +def : Pat<(v8i16 (shl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), + (USHLvvv_8H $Rn, $Rm)>; +def : Pat<(v4i32 (shl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), + (USHLvvv_4S $Rn, $Rm)>; +def : Pat<(v2i64 (shl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), + (USHLvvv_2D $Rn, $Rm)>; + +// Additional patterns to match sra, srl. +// For a vector right shift by vector, the shift amounts of SSHL/USHL are +// negative. Negate the vector of shift amount first. +def : Pat<(v8i8 (srl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), + (USHLvvv_8B $Rn, (NEG8b $Rm))>; +def : Pat<(v4i16 (srl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), + (USHLvvv_4H $Rn, (NEG4h $Rm))>; +def : Pat<(v2i32 (srl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), + (USHLvvv_2S $Rn, (NEG2s $Rm))>; +def : Pat<(v1i64 (srl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), + (USHLddd $Rn, (NEGdd $Rm))>; +def : Pat<(v16i8 (srl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), + (USHLvvv_16B $Rn, (NEG16b $Rm))>; +def : Pat<(v8i16 (srl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), + (USHLvvv_8H $Rn, (NEG8h $Rm))>; +def : Pat<(v4i32 (srl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), + (USHLvvv_4S $Rn, (NEG4s $Rm))>; +def : Pat<(v2i64 (srl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), + (USHLvvv_2D $Rn, (NEG2d $Rm))>; + +def : Pat<(v8i8 (sra (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), + (SSHLvvv_8B $Rn, (NEG8b $Rm))>; +def : Pat<(v4i16 (sra (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), + (SSHLvvv_4H $Rn, (NEG4h $Rm))>; +def : Pat<(v2i32 (sra (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), + (SSHLvvv_2S $Rn, (NEG2s $Rm))>; +def : Pat<(v1i64 (sra (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), + (SSHLddd $Rn, (NEGdd $Rm))>; +def : Pat<(v16i8 (sra (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), + (SSHLvvv_16B $Rn, (NEG16b $Rm))>; +def : Pat<(v8i16 (sra (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), + (SSHLvvv_8H $Rn, (NEG8h $Rm))>; +def : Pat<(v4i32 (sra (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), + (SSHLvvv_4S $Rn, (NEG4s $Rm))>; +def : Pat<(v2i64 (sra (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), + (SSHLvvv_2D $Rn, (NEG2d $Rm))>; + +// +// Patterns for handling half-precision values +// + +// Convert f16 value coming in as i16 value to f32 +def : Pat<(f32 (f16_to_f32 (i32 (and (i32 GPR32:$Rn), 65535)))), + (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>; +def : Pat<(f32 (f16_to_f32 (i32 (assertzext GPR32:$Rn)))), + (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>; + +def : Pat<(f32 (f16_to_f32 (i32 (assertzext (i32 ( + f32_to_f16 (f32 FPR32:$Rn))))))), + (f32 FPR32:$Rn)>; + +// Patterns for vector extract of half-precision FP value in i16 storage type +def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract + (v4i16 VPR64:$Rn), neon_uimm2_bare:$Imm)), 65535)))), + (FCVTsh (f16 (DUPhv_H + (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + neon_uimm2_bare:$Imm)))>; + +def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract + (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)), 65535)))), + (FCVTsh (f16 (DUPhv_H (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)))>; + +// Patterns for vector insert of half-precision FP value 0 in i16 storage type +def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), + (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))), + (neon_uimm3_bare:$Imm))), + (v8i16 (INSELh (v8i16 VPR128:$Rn), + (v8i16 (SUBREG_TO_REG (i64 0), + (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)), + sub_16)), + neon_uimm3_bare:$Imm, 0))>; + +def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn), + (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))), + (neon_uimm2_bare:$Imm))), + (v4i16 (EXTRACT_SUBREG + (v8i16 (INSELh + (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + (v8i16 (SUBREG_TO_REG (i64 0), + (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)), + sub_16)), + neon_uimm2_bare:$Imm, 0)), + sub_64))>; + +// Patterns for vector insert of half-precision FP value in i16 storage type +def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), + (i32 (assertsext (i32 (fp_to_sint + (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))), + (neon_uimm3_bare:$Imm))), + (v8i16 (INSELh (v8i16 VPR128:$Rn), + (v8i16 (SUBREG_TO_REG (i64 0), + (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)), + sub_16)), + neon_uimm3_bare:$Imm, 0))>; + +def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn), + (i32 (assertsext (i32 (fp_to_sint + (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))), + (neon_uimm2_bare:$Imm))), + (v4i16 (EXTRACT_SUBREG + (v8i16 (INSELh + (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + (v8i16 (SUBREG_TO_REG (i64 0), + (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)), + sub_16)), + neon_uimm2_bare:$Imm, 0)), + sub_64))>; + +def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), + (i32 (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)), + (neon_uimm3_bare:$Imm1))), + (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src), + neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>; + +// Patterns for vector copy of half-precision FP value in i16 storage type +def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), + (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32 + (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)), + 65535)))))))), + (neon_uimm3_bare:$Imm1))), + (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src), + neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>; + +def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn), + (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32 + (vector_extract (v4i16 VPR64:$src), neon_uimm3_bare:$Imm2)), + 65535)))))))), + (neon_uimm3_bare:$Imm1))), + (v4i16 (EXTRACT_SUBREG + (v8i16 (INSELh + (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), + neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2)), + sub_64))>; + + diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 75ec44f3fecb..618f6fb9289b 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -76,6 +76,12 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } +static bool hasFrameOffset(int opcode) { + return opcode != AArch64::LD1x2_16B && opcode != AArch64::LD1x3_16B && + opcode != AArch64::LD1x4_16B && opcode != AArch64::ST1x2_16B && + opcode != AArch64::ST1x3_16B && opcode != AArch64::ST1x4_16B; +} + void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI, int SPAdj, @@ -110,8 +116,10 @@ AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI, int64_t Offset; Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj, IsCalleeSaveOp); - - Offset += MI.getOperand(FIOperandNum + 1).getImm(); + // A vector load/store instruction doesn't have an offset operand. + bool HasOffsetOp = hasFrameOffset(MI.getOpcode()); + if (HasOffsetOp) + Offset += MI.getOperand(FIOperandNum + 1).getImm(); // DBG_VALUE instructions have no real restrictions so they can be handled // easily. @@ -124,7 +132,7 @@ AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI, const AArch64InstrInfo &TII = *static_cast(MF.getTarget().getInstrInfo()); int MinOffset, MaxOffset, OffsetScale; - if (MI.getOpcode() == AArch64::ADDxxi_lsl0_s) { + if (MI.getOpcode() == AArch64::ADDxxi_lsl0_s || !HasOffsetOp) { MinOffset = 0; MaxOffset = 0xfff; OffsetScale = 1; @@ -133,10 +141,12 @@ AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI, TII.getAddressConstraints(MI, OffsetScale, MinOffset, MaxOffset); } - // The frame lowering has told us a base and offset it thinks we should use to - // access this variable, but it's still up to us to make sure the values are - // legal for the instruction in question. - if (Offset % OffsetScale != 0 || Offset < MinOffset || Offset > MaxOffset) { + // There are two situations we don't use frame + offset directly in the + // instruction: + // (1) The offset can't really be scaled + // (2) Can't encode offset as it doesn't have an offset operand + if ((Offset % OffsetScale != 0 || Offset < MinOffset || Offset > MaxOffset) || + (!HasOffsetOp && Offset != 0)) { unsigned BaseReg = MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); emitRegUpdate(MBB, MBBI, MBBI->getDebugLoc(), TII, @@ -150,7 +160,8 @@ AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI, assert(Offset >= 0 && "Unexpected negative offset from SP"); MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, true); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset / OffsetScale); + if (HasOffsetOp) + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset / OffsetScale); } unsigned diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index 4e2022c06165..8b1a9cb90740 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -155,7 +155,7 @@ def FPR16 : RegisterClass<"AArch64", [f16, v1i16], 16, (sequence "H%u", 0, 31)> { } -def FPR32 : RegisterClass<"AArch64", [f32, v1i32, v1f32], 32, +def FPR32 : RegisterClass<"AArch64", [f32, v1i32], 32, (sequence "S%u", 0, 31)> { } @@ -288,4 +288,4 @@ multiclass VectorList_BHSD; defm VPair : VectorList_BHSD<"VPair", 2, DPair, QPair>; defm VTriple : VectorList_BHSD<"VTriple", 3, DTriple, QTriple>; -defm VQuad : VectorList_BHSD<"VQuad", 4, DQuad, QQuad>; \ No newline at end of file +defm VQuad : VectorList_BHSD<"VQuad", 4, DQuad, QQuad>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index f1695e2ce207..8bb23b123c93 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -34,7 +34,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT, : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS), InstrInfo(Subtarget), - DL("e-p:64:64-i64:64:64-i128:128:128-s0:32:32-f128:128:128-n32:64-S128"), + DL("e-m:e-i64:64-i128:128-n32:64-S128"), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget) { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AsmParser/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AsmParser/CMakeLists.txt index a018a0aa7b36..e81ec70437a4 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AsmParser/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AsmParser/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMAArch64AsmParser AArch64AsmParser.cpp ) - -add_dependencies(LLVMAArch64AsmParser AArch64CommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AsmParser/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AsmParser/LLVMBuild.txt index bd1fcaf1ffe8..2d8f63212378 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AsmParser/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/AsmParser/LLVMBuild.txt @@ -19,6 +19,5 @@ type = Library name = AArch64AsmParser parent = AArch64 -required_libraries = AArch64Desc AArch64Info MC MCParser Support +required_libraries = AArch64Desc AArch64Info AArch64Utils MC MCParser Support add_to_library_groups = AArch64 - diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/CMakeLists.txt index 0f2e81693198..1eb83f5d03b3 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/CMakeLists.txt @@ -28,11 +28,9 @@ add_llvm_target(AArch64CodeGen AArch64TargetObjectFile.cpp ) -add_dependencies(LLVMAArch64CodeGen AArch64CommonTableGen) - add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) add_subdirectory(MCTargetDesc) add_subdirectory(TargetInfo) -add_subdirectory(Utils) \ No newline at end of file +add_subdirectory(Utils) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/Disassembler/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/Disassembler/CMakeLists.txt index d4bd163dad60..21baf250af86 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/Disassembler/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/Disassembler/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMAArch64Disassembler AArch64Disassembler.cpp ) - -add_dependencies(LLVMAArch64Disassembler AArch64CommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/Disassembler/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/Disassembler/LLVMBuild.txt index a93e343886d0..05c4ed1646b9 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/Disassembler/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/Disassembler/LLVMBuild.txt @@ -19,6 +19,5 @@ type = Library name = AArch64Disassembler parent = AArch64 -required_libraries = AArch64CodeGen AArch64Desc AArch64Info AArch64Utils MC Support +required_libraries = AArch64Info AArch64Utils MC Support add_to_library_groups = AArch64 - diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/InstPrinter/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/InstPrinter/CMakeLists.txt index d4b980a94d9b..3db56e4733f5 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/InstPrinter/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/InstPrinter/CMakeLists.txt @@ -1,8 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMAArch64AsmPrinter AArch64InstPrinter.cpp ) - -add_dependencies(LLVMAArch64AsmPrinter AArch64CommonTableGen) - diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/LLVMBuild.txt index 6e4ce8bea8a7..4c8f1018c1a5 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/LLVMBuild.txt @@ -31,6 +31,5 @@ has_jit = 1 type = Library name = AArch64CodeGen parent = AArch64 -required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AsmPrinter CodeGen Core MC SelectionDAG Support Target +required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AArch64Utils AsmPrinter CodeGen Core MC SelectionDAG Support Target add_to_library_groups = AArch64 - diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index add874c12019..e9747d686688 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -22,7 +22,6 @@ AArch64ELFMCAsmInfo::AArch64ELFMCAsmInfo() { AlignmentIsInBytes = false; CommentString = "//"; - PrivateGlobalPrefix = ".L"; Code32Directive = ".code\t32"; Data16bitsDirective = "\t.hword\t"; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt index 44c66a224e30..54c4465b60d7 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt @@ -7,7 +7,3 @@ add_llvm_library(LLVMAArch64Desc AArch64MCExpr.cpp AArch64MCTargetDesc.cpp ) -add_dependencies(LLVMAArch64Desc AArch64CommonTableGen) - -# Hack: we need to include 'main' target directory to grab private headers -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/TargetInfo/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/TargetInfo/CMakeLists.txt index e236eed00be1..ee734c647261 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/TargetInfo/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/TargetInfo/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMAArch64Info AArch64TargetInfo.cpp ) - -add_dependencies(LLVMAArch64Info AArch64CommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/TargetInfo/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/TargetInfo/LLVMBuild.txt index 5b003f012218..642917239810 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/TargetInfo/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/TargetInfo/LLVMBuild.txt @@ -19,6 +19,5 @@ type = Library name = AArch64Info parent = AArch64 -required_libraries = MC Support Target +required_libraries = Support add_to_library_groups = AArch64 - diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/Utils/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/Utils/CMakeLists.txt index 2348e44f850b..8ee03a7571b4 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/Utils/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/Utils/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMAArch64Utils AArch64BaseInfo.cpp ) - -add_dependencies(LLVMAArch64Utils AArch64CommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/Utils/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/Utils/LLVMBuild.txt index 1be537598ae5..4acecc935e2a 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/AArch64/Utils/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/AArch64/Utils/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = AArch64Utils parent = AArch64 -required_libraries = Core Support +required_libraries = Support add_to_library_groups = AArch64 diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/external/bsd/llvm/dist/llvm/lib/Target/ARM/A15SDOptimizer.cpp index ff585b41a2aa..8ea2073aad12 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/A15SDOptimizer.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/A15SDOptimizer.cpp @@ -165,7 +165,7 @@ unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) { if (!MI) return ARM::ssub_0; MachineOperand *MO = MI->findRegisterDefOperand(SReg); - assert(MO->isReg() && "Non register operand found!"); + assert(MO->isReg() && "Non-register operand found!"); if (!MO) return ARM::ssub_0; if (MI->isCopy() && usesRegClass(MI->getOperand(1), diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARM.td b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARM.td index 36e5680ca4e0..27bbcc22b640 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARM.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARM.td @@ -179,7 +179,14 @@ def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5", "Cortex-A5 ARM processors", [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, FeatureVMLxForwarding, FeatureT2XtPk, - FeatureTrustZone]>; + FeatureTrustZone, FeatureMP]>; +def ProcA7 : SubtargetFeature<"a7", "ARMProcFamily", "CortexA7", + "Cortex-A7 ARM processors", + [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, + FeatureVMLxForwarding, FeatureT2XtPk, + FeatureVFP4, FeatureMP, + FeatureHWDiv, FeatureHWDivARM, + FeatureTrustZone, FeatureVirtualization]>; def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", "Cortex-A8 ARM processors", [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, @@ -198,6 +205,15 @@ def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureAvoidMOVsShOp, FeatureHasSlowFPVMLx, FeatureTrustZone]>; +def ProcA12 : SubtargetFeature<"a12", "ARMProcFamily", "CortexA12", + "Cortex-A12 ARM processors", + [FeatureVMLxForwarding, + FeatureT2XtPk, FeatureVFP4, + FeatureHWDiv, FeatureHWDivARM, + FeatureAvoidPartialCPSR, + FeatureVirtualization, + FeatureTrustZone]>; + // FIXME: It has not been determined if A15 has these features. def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", @@ -227,6 +243,26 @@ def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", FeatureAvoidPartialCPSR, FeatureT2XtPk]>; +// FIXME: krait has currently the same features as A9 +// plus VFP4 and hardware division features. +def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait", + "Qualcomm ARM processors", + [FeatureVMLxForwarding, + FeatureT2XtPk, FeatureFP16, + FeatureAvoidPartialCPSR, + FeatureTrustZone, + FeatureVFP4, + FeatureHWDiv, + FeatureHWDivARM]>; + + +def FeatureAPCS : SubtargetFeature<"apcs", "TargetABI", "ARM_ABI_APCS", + "Use the APCS ABI">; + +def FeatureAAPCS : SubtargetFeature<"aapcs", "TargetABI", "ARM_ABI_AAPCS", + "Use the AAPCS ABI">; + + class ProcNoItin Features> : Processor; @@ -296,6 +332,10 @@ def : ProcessorModel<"cortex-a5", CortexA8Model, [ProcA5, HasV7Ops, FeatureNEON, FeatureDB, FeatureVFP4, FeatureDSPThumb2, FeatureHasRAS, FeatureAClass]>; +def : ProcessorModel<"cortex-a7", CortexA8Model, + [ProcA7, HasV7Ops, FeatureNEON, FeatureDB, + FeatureDSPThumb2, FeatureHasRAS, + FeatureAClass]>; def : ProcessorModel<"cortex-a8", CortexA8Model, [ProcA8, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS, @@ -308,11 +348,19 @@ def : ProcessorModel<"cortex-a9-mp", CortexA9Model, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureMP, FeatureHasRAS, FeatureAClass]>; + +// FIXME: A12 has currently the same Schedule model as A9 +def : ProcessorModel<"cortex-a12", CortexA9Model, + [ProcA12, HasV7Ops, FeatureNEON, FeatureDB, + FeatureDSPThumb2, FeatureMP, + FeatureHasRAS, FeatureAClass]>; + // FIXME: A15 has currently the same ProcessorModel as A9. def : ProcessorModel<"cortex-a15", CortexA9Model, [ProcA15, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS, FeatureAClass]>; + // FIXME: R5 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r5", CortexA8Model, [ProcR5, HasV7Ops, FeatureDB, @@ -347,6 +395,13 @@ def : ProcNoItin<"cortex-a57", [ProcA57, HasV8Ops, FeatureAClass, FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2]>; +// FIXME: krait has currently the same Schedule model as A9 +def : ProcessorModel<"krait", CortexA9Model, + [ProcKrait, HasV7Ops, + FeatureNEON, FeatureDB, + FeatureDSPThumb2, FeatureHasRAS, + FeatureAClass]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// @@ -363,17 +418,6 @@ include "ARMInstrInfo.td" def ARMInstrInfo : InstrInfo; - -//===----------------------------------------------------------------------===// -// Assembly printer -//===----------------------------------------------------------------------===// -// ARM Uses the MC printer for asm output, so make sure the TableGen -// AsmWriter bits get associated with the correct class. -def ARMAsmWriter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - bit isMCAsmWriter = 1; -} - //===----------------------------------------------------------------------===// // Declare the target which we are implementing //===----------------------------------------------------------------------===// @@ -381,6 +425,4 @@ def ARMAsmWriter : AsmWriter { def ARM : Target { // Pull in Instruction Info: let InstructionSet = ARMInstrInfo; - - let AssemblyWriters = [ARMAsmWriter]; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index e79f88d4b6f1..21efe10b1f0b 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -213,18 +213,9 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, O << "(PLT)"; break; } - case MachineOperand::MO_ExternalSymbol: { - O << *GetExternalSymbolSymbol(MO.getSymbolName()); - if (TF == ARMII::MO_PLT) - O << "(PLT)"; - break; - } case MachineOperand::MO_ConstantPoolIndex: O << *GetCPISymbol(MO.getIndex()); break; - case MachineOperand::MO_JumpTableIndex: - O << *GetJTISymbol(MO.getIndex()); - break; } } @@ -232,16 +223,18 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, MCSymbol *ARMAsmPrinter:: GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const { + const DataLayout *DL = TM.getDataLayout(); SmallString<60> Name; - raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "JTI" + raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << uid << '_' << uid2; return OutContext.GetOrCreateSymbol(Name.str()); } MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const { + const DataLayout *DL = TM.getDataLayout(); SmallString<60> Name; - raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "SJLJEH" + raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "SJLJEH" << getFunctionNumber(); return OutContext.GetOrCreateSymbol(Name.str()); } @@ -497,6 +490,19 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { SectionKind::getText()); OutStreamer.SwitchSection(StaticInitSect); } + + // Compiling with debug info should not affect the code + // generation! Since some of the data sections are first switched + // to only in ASMPrinter::doFinalization(), the debug info + // sections would come before the data sections in the object + // file. This is problematic, since PC-relative loads have to use + // different instruction sequences in order to reach global data + // in the same object file. + OutStreamer.SwitchSection(getObjFileLowering().getCStringSection()); + OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); + OutStreamer.SwitchSection(getObjFileLowering().getDataCommonSection()); + OutStreamer.SwitchSection(getObjFileLowering().getDataBSSSection()); + OutStreamer.SwitchSection(getObjFileLowering().getNonLazySymbolPointerSection()); } // Use unified assembler syntax. @@ -618,7 +624,8 @@ void ARMAsmPrinter::emitAttributes() { std::string CPUString = Subtarget->getCPUString(); - if (CPUString != "generic") + // FIXME: remove krait check when GNU tools support krait cpu + if (CPUString != "generic" && CPUString != "krait") ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString); ATS.emitAttribute(ARMBuildAttrs::CPU_arch, @@ -765,23 +772,25 @@ static MCSymbolRefExpr::VariantKind getModifierVariantKind(ARMCP::ARMCPModifier Modifier) { switch (Modifier) { case ARMCP::no_modifier: return MCSymbolRefExpr::VK_None; - case ARMCP::TLSGD: return MCSymbolRefExpr::VK_ARM_TLSGD; - case ARMCP::TPOFF: return MCSymbolRefExpr::VK_ARM_TPOFF; - case ARMCP::GOTTPOFF: return MCSymbolRefExpr::VK_ARM_GOTTPOFF; - case ARMCP::GOT: return MCSymbolRefExpr::VK_ARM_GOT; - case ARMCP::GOTOFF: return MCSymbolRefExpr::VK_ARM_GOTOFF; + case ARMCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD; + case ARMCP::TPOFF: return MCSymbolRefExpr::VK_TPOFF; + case ARMCP::GOTTPOFF: return MCSymbolRefExpr::VK_GOTTPOFF; + case ARMCP::GOT: return MCSymbolRefExpr::VK_GOT; + case ARMCP::GOTOFF: return MCSymbolRefExpr::VK_GOTOFF; } llvm_unreachable("Invalid ARMCPModifier!"); } -MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) { +MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV, + unsigned char TargetFlags) { bool isIndirect = Subtarget->isTargetDarwin() && + (TargetFlags & ARMII::MO_NONLAZY) && Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()); if (!isIndirect) return getSymbol(GV); // FIXME: Remove this when Darwin transition to @GOT like syntax. - MCSymbol *MCSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + MCSymbol *MCSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); MachineModuleInfoMachO &MMIMachO = MMI->getObjFileInfo(); MachineModuleInfoImpl::StubValueTy &StubSym = @@ -795,6 +804,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) { void ARMAsmPrinter:: EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { + const DataLayout *DL = TM.getDataLayout(); int Size = TM.getDataLayout()->getTypeAllocSize(MCPV->getType()); ARMConstantPoolValue *ACPV = static_cast(MCPV); @@ -803,7 +813,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { if (ACPV->isLSDA()) { SmallString<128> Str; raw_svector_ostream OS(Str); - OS << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber(); + OS << DL->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber(); MCSym = OutContext.GetOrCreateSymbol(OS.str()); } else if (ACPV->isBlockAddress()) { const BlockAddress *BA = @@ -811,7 +821,11 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { MCSym = GetBlockAddressSymbol(BA); } else if (ACPV->isGlobalValue()) { const GlobalValue *GV = cast(ACPV)->getGV(); - MCSym = GetARMGVSymbol(GV); + + // On Darwin, const-pool entries may get the "FOO$non_lazy_ptr" mangling, so + // flag the global as MO_NONLAZY. + unsigned char TF = Subtarget->isTargetDarwin() ? ARMII::MO_NONLAZY : 0; + MCSym = GetARMGVSymbol(GV, TF); } else if (ACPV->isMachineBasicBlock()) { const MachineBasicBlock *MBB = cast(ACPV)->getMBB(); MCSym = MBB->getSymbol(); @@ -827,7 +841,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { OutContext); if (ACPV->getPCAdjustment()) { - MCSymbol *PCLabel = getPICLabel(MAI->getPrivateGlobalPrefix(), + MCSymbol *PCLabel = getPICLabel(DL->getPrivateGlobalPrefix(), getFunctionNumber(), ACPV->getLabelId(), OutContext); @@ -1106,6 +1120,8 @@ extern cl::opt EnableARMEHABI; #include "ARMGenMCPseudoLowering.inc" void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { + const DataLayout *DL = TM.getDataLayout(); + // If we just ended a constant pool, mark it as such. if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) { OutStreamer.EmitDataRegion(MCDR_DataRegionEnd); @@ -1239,26 +1255,21 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); unsigned TF = MI->getOperand(1).getTargetFlags(); - bool isPIC = TF == ARMII::MO_LO16_NONLAZY_PIC; const GlobalValue *GV = MI->getOperand(1).getGlobal(); - MCSymbol *GVSym = GetARMGVSymbol(GV); + MCSymbol *GVSym = GetARMGVSymbol(GV, TF); const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); - if (isPIC) { - MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(), - getFunctionNumber(), - MI->getOperand(2).getImm(), OutContext); - const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext); - unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4; - const MCExpr *PCRelExpr = - ARMMCExpr::CreateLower16(MCBinaryExpr::CreateSub(GVSymExpr, - MCBinaryExpr::CreateAdd(LabelSymExpr, + + MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(), + getFunctionNumber(), + MI->getOperand(2).getImm(), OutContext); + const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext); + unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4; + const MCExpr *PCRelExpr = + ARMMCExpr::CreateLower16(MCBinaryExpr::CreateSub(GVSymExpr, + MCBinaryExpr::CreateAdd(LabelSymExpr, MCConstantExpr::Create(PCAdj, OutContext), - OutContext), OutContext), OutContext); + OutContext), OutContext), OutContext); TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr)); - } else { - const MCExpr *RefExpr= ARMMCExpr::CreateLower16(GVSymExpr, OutContext); - TmpInst.addOperand(MCOperand::CreateExpr(RefExpr)); - } // Add predicate operands. TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); @@ -1277,26 +1288,21 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); unsigned TF = MI->getOperand(2).getTargetFlags(); - bool isPIC = TF == ARMII::MO_HI16_NONLAZY_PIC; const GlobalValue *GV = MI->getOperand(2).getGlobal(); - MCSymbol *GVSym = GetARMGVSymbol(GV); + MCSymbol *GVSym = GetARMGVSymbol(GV, TF); const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); - if (isPIC) { - MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(), - getFunctionNumber(), - MI->getOperand(3).getImm(), OutContext); - const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext); - unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4; - const MCExpr *PCRelExpr = + + MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(), + getFunctionNumber(), + MI->getOperand(3).getImm(), OutContext); + const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext); + unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4; + const MCExpr *PCRelExpr = ARMMCExpr::CreateUpper16(MCBinaryExpr::CreateSub(GVSymExpr, MCBinaryExpr::CreateAdd(LabelSymExpr, MCConstantExpr::Create(PCAdj, OutContext), OutContext), OutContext), OutContext); TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr)); - } else { - const MCExpr *RefExpr= ARMMCExpr::CreateUpper16(GVSymExpr, OutContext); - TmpInst.addOperand(MCOperand::CreateExpr(RefExpr)); - } // Add predicate operands. TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); TmpInst.addOperand(MCOperand::CreateReg(0)); @@ -1312,7 +1318,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // This adds the address of LPC0 to r0. // Emit the label. - OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(), + OutStreamer.EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(), getFunctionNumber(), MI->getOperand(2).getImm(), OutContext)); @@ -1333,7 +1339,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // This adds the address of LPC0 to r0. // Emit the label. - OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(), + OutStreamer.EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(), getFunctionNumber(), MI->getOperand(2).getImm(), OutContext)); @@ -1364,7 +1370,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // a PC-relative address at the ldr instruction. // Emit the label. - OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(), + OutStreamer.EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(), getFunctionNumber(), MI->getOperand(2).getImm(), OutContext)); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMAsmPrinter.h b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMAsmPrinter.h index de72e063e0d5..700640010712 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMAsmPrinter.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMAsmPrinter.h @@ -115,7 +115,7 @@ private: MCSymbol *GetARMSJLJEHLabel() const; - MCSymbol *GetARMGVSymbol(const GlobalValue *GV); + MCSymbol *GetARMGVSymbol(const GlobalValue *GV, unsigned char TargetFlags); public: /// EmitMachineConstantPoolValue - Print a machine constantpool value to diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index df8c017515d9..c0478260f51c 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -283,7 +283,7 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, // Walk backwards from the end of the basic block until the branch is // analyzed or we give up. - while (isPredicated(I) || I->isTerminator()) { + while (isPredicated(I) || I->isTerminator() || I->isDebugValue()) { // Flag to be raised on unanalyzeable instructions. This is useful in cases // where we want to clean up on the end of the basic block before we bail @@ -1325,10 +1325,11 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic || - Opcode == ARM::MOV_ga_dyn || + Opcode == ARM::LDRLIT_ga_pcrel || + Opcode == ARM::LDRLIT_ga_pcrel_ldr || + Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || - Opcode == ARM::t2MOV_ga_dyn || Opcode == ARM::t2MOV_ga_pcrel) { if (MI1->getOpcode() != Opcode) return false; @@ -1340,10 +1341,11 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, if (MO0.getOffset() != MO1.getOffset()) return false; - if (Opcode == ARM::MOV_ga_dyn || + if (Opcode == ARM::LDRLIT_ga_pcrel || + Opcode == ARM::LDRLIT_ga_pcrel_ldr || + Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || - Opcode == ARM::t2MOV_ga_dyn || Opcode == ARM::t2MOV_ga_pcrel) // Ignore the PC labels. return MO0.getGlobal() == MO1.getGlobal(); @@ -1857,12 +1859,12 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, } } -bool llvm::tryFoldSPUpdateIntoPushPop(MachineFunction &MF, - MachineInstr *MI, +bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, + MachineFunction &MF, MachineInstr *MI, unsigned NumBytes) { // This optimisation potentially adds lots of load and store // micro-operations, it's only really a great benefit to code-size. - if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize)) + if (!Subtarget.isMinSize()) return false; // If only one register is pushed/popped, LLVM can use an LDR/STR @@ -2372,8 +2374,32 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, isSafe = true; break; } - // Condition code is after the operand before CPSR. - ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm(); + // Condition code is after the operand before CPSR except for VSELs. + ARMCC::CondCodes CC; + bool IsInstrVSel = true; + switch (Instr.getOpcode()) { + default: + IsInstrVSel = false; + CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm(); + break; + case ARM::VSELEQD: + case ARM::VSELEQS: + CC = ARMCC::EQ; + break; + case ARM::VSELGTD: + case ARM::VSELGTS: + CC = ARMCC::GT; + break; + case ARM::VSELGED: + case ARM::VSELGES: + CC = ARMCC::GE; + break; + case ARM::VSELVSS: + case ARM::VSELVSD: + CC = ARMCC::VS; + break; + } + if (Sub) { ARMCC::CondCodes NewCC = getSwappedCondition(CC); if (NewCC == ARMCC::AL) @@ -2384,11 +2410,14 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // If it is safe to remove CmpInstr, the condition code of these // operands will be modified. if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && - Sub->getOperand(2).getReg() == SrcReg) - OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)), - NewCC)); - } - else + Sub->getOperand(2).getReg() == SrcReg) { + // VSel doesn't support condition code update. + if (IsInstrVSel) + return false; + OperandsToUpdate.push_back( + std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); + } + } else switch (CC) { default: // CPSR can be used multiple times, we should continue. diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 93e59647d220..98b4c09a36b2 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -417,7 +417,8 @@ void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, /// NumBytes. This can save a few bytes per function in code-size, but /// obviously generates more memory traffic. As such, it only takes /// effect in functions being optimised for size. -bool tryFoldSPUpdateIntoPushPop(MachineFunction &MF, MachineInstr *MI, +bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, + MachineFunction &MF, MachineInstr *MI, unsigned NumBytes); /// rewriteARMFrameIndex / rewriteT2FrameIndex - diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMBuildAttrs.h b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMBuildAttrs.h index b16d4ef54b6d..c80659f86d8e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMBuildAttrs.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMBuildAttrs.h @@ -35,7 +35,7 @@ namespace ARMBuildAttrs { CPU_arch_profile = 7, ARM_ISA_use = 8, THUMB_ISA_use = 9, - VFP_arch = 10, + FP_arch = 10, WMMX_arch = 11, Advanced_SIMD_arch = 12, PCS_config = 13, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index e6f7f86c5587..f695a8e4e506 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -18,11 +18,13 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" +#include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove! #include "llvm/Target/TargetFrameLowering.h" @@ -898,10 +900,61 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, return true; } - case ARM::MOV_ga_dyn: + case ARM::LDRLIT_ga_abs: + case ARM::LDRLIT_ga_pcrel: + case ARM::LDRLIT_ga_pcrel_ldr: + case ARM::tLDRLIT_ga_abs: + case ARM::tLDRLIT_ga_pcrel: { + unsigned DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + const MachineOperand &MO1 = MI.getOperand(1); + const GlobalValue *GV = MO1.getGlobal(); + bool IsARM = + Opcode != ARM::tLDRLIT_ga_pcrel && Opcode != ARM::tLDRLIT_ga_abs; + bool IsPIC = + Opcode != ARM::LDRLIT_ga_abs && Opcode != ARM::tLDRLIT_ga_abs; + unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci; + unsigned PICAddOpc = + IsARM + ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICADD : ARM::PICLDR) + : ARM::tPICADD; + + // We need a new const-pool entry to load from. + MachineConstantPool *MCP = MBB.getParent()->getConstantPool(); + unsigned ARMPCLabelIndex = 0; + MachineConstantPoolValue *CPV; + + if (IsPIC) { + unsigned PCAdj = IsARM ? 8 : 4; + ARMPCLabelIndex = AFI->createPICLabelUId(); + CPV = ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, + ARMCP::CPValue, PCAdj); + } else + CPV = ARMConstantPoolConstant::Create(GV, ARMCP::no_modifier); + + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LDRLITOpc), DstReg) + .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4)); + if (IsARM) + MIB.addImm(0); + AddDefaultPred(MIB); + + if (IsPIC) { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(PICAddOpc)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg) + .addImm(ARMPCLabelIndex); + + if (IsARM) + AddDefaultPred(MIB); + } + + MI.eraseFromParent(); + return true; + } case ARM::MOV_ga_pcrel: case ARM::MOV_ga_pcrel_ldr: - case ARM::t2MOV_ga_dyn: case ARM::t2MOV_ga_pcrel: { // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode. unsigned LabelId = AFI->createPICLabelUId(); @@ -910,14 +963,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, const MachineOperand &MO1 = MI.getOperand(1); const GlobalValue *GV = MO1.getGlobal(); unsigned TF = MO1.getTargetFlags(); - bool isARM = (Opcode != ARM::t2MOV_ga_pcrel && Opcode!=ARM::t2MOV_ga_dyn); - bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn); + bool isARM = Opcode != ARM::t2MOV_ga_pcrel; unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel; unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel; - unsigned LO16TF = isPIC - ? ARMII::MO_LO16_NONLAZY_PIC : ARMII::MO_LO16_NONLAZY; - unsigned HI16TF = isPIC - ? ARMII::MO_HI16_NONLAZY_PIC : ARMII::MO_HI16_NONLAZY; + unsigned LO16TF = TF | ARMII::MO_LO16; + unsigned HI16TF = TF | ARMII::MO_HI16; unsigned PICAddOpc = isARM ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD) : ARM::tPICADD; @@ -925,16 +975,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, TII->get(LO16Opc), DstReg) .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF) .addImm(LabelId); - MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(HI16Opc), DstReg) + + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc), DstReg) .addReg(DstReg) .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF) .addImm(LabelId); - if (!isPIC) { - TransferImpOps(MI, MIB1, MIB2); - MI.eraseFromParent(); - return true; - } MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(PICAddOpc)) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMFPUName.def b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMFPUName.def index 9a1bbe703d99..1fef3b3bc5e2 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMFPUName.def +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMFPUName.def @@ -28,5 +28,6 @@ ARM_FPU_NAME("neon", NEON) ARM_FPU_NAME("neon-vfpv4", NEON_VFPV4) ARM_FPU_NAME("neon-fp-armv8", NEON_FP_ARMV8) ARM_FPU_NAME("crypto-neon-fp-armv8", CRYPTO_NEON_FP_ARMV8) +ARM_FPU_NAME("softvfp", SOFTVFP) #undef ARM_FPU_NAME diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMFastISel.cpp b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMFastISel.cpp index a4004f32db37..5bb4ba9bd2d2 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -78,6 +78,7 @@ class ARMFastISel : public FastISel { /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. const ARMSubtarget *Subtarget; + Module &M; const TargetMachine &TM; const TargetInstrInfo &TII; const TargetLowering &TLI; @@ -91,6 +92,7 @@ class ARMFastISel : public FastISel { explicit ARMFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) : FastISel(funcInfo, libInfo), + M(const_cast(*funcInfo.Fn->getParent())), TM(funcInfo.MF->getTarget()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()) { @@ -679,25 +681,24 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { if (!Subtarget->isTargetDarwin() && IsThreadLocal) return 0; // Use movw+movt when possible, it avoids constant pool entries. - // Darwin targets don't support movt with Reloc::Static, see - // ARMTargetLowering::LowerGlobalAddressDarwin. Other targets only support - // static movt relocations. + // Non-darwin targets only support static movt relocations in FastISel. if (Subtarget->useMovt() && - Subtarget->isTargetDarwin() == (RelocM != Reloc::Static)) { + (Subtarget->isTargetDarwin() || RelocM == Reloc::Static)) { unsigned Opc; + unsigned char TF = 0; + if (Subtarget->isTargetDarwin()) + TF = ARMII::MO_NONLAZY; + switch (RelocM) { case Reloc::PIC_: Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel; break; - case Reloc::DynamicNoPIC: - Opc = isThumb2 ? ARM::t2MOV_ga_dyn : ARM::MOV_ga_dyn; - break; default: Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm; break; } AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), - DestReg).addGlobalAddress(GV)); + DestReg).addGlobalAddress(GV, 0, TF)); } else { // MachineConstantPool wants an explicit alignment. unsigned Align = TD.getPrefTypeAlignment(GV->getType()); @@ -802,9 +803,11 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { // This will get lowered later into the correct offsets and registers // via rewriteXFrameIndex. if (SI != FuncInfo.StaticAllocaMap.end()) { + unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; const TargetRegisterClass* RC = TLI.getRegClassFor(VT); unsigned ResultReg = createResultReg(RC); - unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; + ResultReg = constrainOperandRegClass(TII.get(Opc), ResultReg, 0); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addFrameIndex(SI->second) @@ -2243,7 +2246,7 @@ unsigned ARMFastISel::getLibcallReg(const Twine &Name) { EVT LCREVT = TLI.getValueType(GVTy); if (!LCREVT.isSimple()) return 0; - GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false, + GlobalValue *GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false, GlobalValue::ExternalLinkage, 0, Name); assert(GV->getType() == GVTy && "We miscomputed the type for the global!"); return ARMMaterializeGV(GV, LCREVT.getSimpleVT()); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 3e72d3690aa5..ceff79068521 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -256,9 +256,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { if (NumBytes) { // Adjust SP after all the callee-save spills. - if (tryFoldSPUpdateIntoPushPop(MF, LastPush, NumBytes)) - FramePtrOffsetInPush += NumBytes; - else + if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes)) { + if (LastPush == FramePtrPush) + FramePtrOffsetInPush += NumBytes; + } else emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, MachineInstr::FrameSetup); @@ -380,15 +381,10 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, if (NumBytes != 0) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); } else { - MachineBasicBlock::iterator FirstPop = MBBI; - // Unwind MBBI to point to first LDR / VLDRD. const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); if (MBBI != MBB.begin()) { do { - if (isPopOpcode(MBBI->getOpcode())) - FirstPop = MBBI; - --MBBI; } while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs)); if (!isCSRestore(MBBI, TII, CSRegs)) @@ -434,7 +430,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, ARM::SP) .addReg(FramePtr)); } - } else if (NumBytes && !tryFoldSPUpdateIntoPushPop(MF, FirstPop, NumBytes)) + } else if (NumBytes && + !tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes)) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); // Increment past our save areas. diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 87d15226947a..44e9dd15b139 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -534,8 +534,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, } if (N.getOpcode() == ARMISD::Wrapper && - !(Subtarget->useMovt() && - N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); } else Base = N; @@ -702,8 +701,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy()); } else if (N.getOpcode() == ARMISD::Wrapper && - !(Subtarget->useMovt() && - N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -963,8 +961,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy()); } else if (N.getOpcode() == ARMISD::Wrapper && - !(Subtarget->useMovt() && - N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); } Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), @@ -1141,8 +1138,7 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, if (!CurDAG->isBaseWithConstantOffset(N)) { if (N.getOpcode() == ARMISD::Wrapper && - !(Subtarget->useMovt() && - N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); } else { Base = N; @@ -1278,8 +1274,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, } if (N.getOpcode() == ARMISD::Wrapper && - !(Subtarget->useMovt() && - N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); if (Base.getOpcode() == ISD::TargetConstantPool) return false; // We want to select t2LDRpci instead. diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMISelLowering.cpp b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMISelLowering.cpp index 76a0a831f695..017c86667ed2 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1009,7 +1009,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: return 0; case ARMISD::Wrapper: return "ARMISD::Wrapper"; - case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN"; case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; case ARMISD::CALL: return "ARMISD::CALL"; @@ -1701,19 +1700,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // ARM call to a local ARM function is predicable. isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); // tBX takes a register source operand. - if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { - unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = - ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4); - SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); - CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - Callee = DAG.getLoad(getPointerTy(), dl, - DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); - Callee = DAG.getNode(ARMISD::PIC_ADD, dl, - getPointerTy(), Callee, PICLabel); + if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { + assert(Subtarget->isTargetDarwin() && "WrapperPIC use on non-Darwin?"); + Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(), + DAG.getTargetGlobalAddress(GV, dl, getPointerTy())); } else { // On ELF targets for PIC code, direct calls should go through the PLT unsigned OpFlags = 0; @@ -1755,8 +1745,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // FIXME: handle tail calls differently. unsigned CallOpc; - bool HasMinSizeAttr = MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); + bool HasMinSizeAttr = Subtarget->isMinSize(); if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; @@ -2538,56 +2527,20 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, const GlobalValue *GV = cast(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); - // FIXME: Enable this for static codegen when tool issues are fixed. Also - // update ARMFastISel::ARMMaterializeGV. - if (Subtarget->useMovt() && RelocM != Reloc::Static) { + if (Subtarget->useMovt()) ++NumMovwMovt; - // FIXME: Once remat is capable of dealing with instructions with register - // operands, expand this into two nodes. - if (RelocM == Reloc::Static) - return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, - DAG.getTargetGlobalAddress(GV, dl, PtrVT)); - unsigned Wrapper = (RelocM == Reloc::PIC_) - ? ARMISD::WrapperPIC : ARMISD::WrapperDYN; - SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, - DAG.getTargetGlobalAddress(GV, dl, PtrVT)); - if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) - Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, - MachinePointerInfo::getGOT(), - false, false, false, 0); - return Result; - } + // FIXME: Once remat is capable of dealing with instructions with register + // operands, expand this into multiple nodes + unsigned Wrapper = + RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper; - unsigned ARMPCLabelIndex = 0; - SDValue CPAddr; - if (RelocM == Reloc::Static) { - CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); - } else { - ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo(); - ARMPCLabelIndex = AFI->createPICLabelUId(); - unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); - ARMConstantPoolValue *CPV = - ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, - PCAdj); - CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); - } - CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - - SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); - SDValue Chain = Result.getValue(1); - - if (RelocM == Reloc::PIC_) { - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); - Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); - } + SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY); + SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G); if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) - Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), - false, false, false, 0); - + Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, + MachinePointerInfo::getGOT(), false, false, false, 0); return Result; } @@ -2849,7 +2802,7 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, bool ForceMutable) const { // Currently, two use-cases possible: - // Case #1. Non var-args function, and we meet first byval parameter. + // Case #1. Non-var-args function, and we meet first byval parameter. // Setup first unallocated register as first byval register; // eat all remained registers // (these two actions are performed by HandleByVal method). @@ -3279,7 +3232,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) { if (CC == ISD::SETNE) return ISD::SETEQ; - return ISD::getSetCCSwappedOperands(CC); + return ISD::getSetCCInverse(CC, true); } static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMISelLowering.h b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMISelLowering.h index f195381538be..bcd605930811 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMISelLowering.h @@ -35,8 +35,6 @@ namespace llvm { Wrapper, // Wrapper - A wrapper node for TargetConstantPool, // TargetExternalSymbol, and TargetGlobalAddress. - WrapperDYN, // WrapperDYN - A wrapper node for TargetGlobalAddress in - // DYN mode. WrapperPIC, // WrapperPIC - A wrapper node for TargetGlobalAddress in // PIC mode. WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable @@ -363,6 +361,12 @@ namespace llvm { /// be used for loads / stores from the global. virtual unsigned getMaximalGlobalOffset() const; + /// Returns true if a cast between SrcAS and DestAS is a noop. + virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { + // Addrspacecasts are always noops. + return true; + } + /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMInstrInfo.td b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMInstrInfo.td index 2042c0460932..e778bc8f4fae 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -95,7 +95,6 @@ def ARMSmlal : SDNode<"ARMISD::SMLAL", SDT_ARM64bitmlal>; // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; -def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>; def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; @@ -276,7 +275,8 @@ def UseMulOps : Predicate<"Subtarget->useMulOps()">; // But only select them if more precision in FP computation is allowed. // Do not use them for Darwin platforms. def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" - " FPOpFusion::Fast) && " + " FPOpFusion::Fast && " + " Subtarget->hasVFP4()) && " "!Subtarget->isTargetDarwin()">; def DontUseFusedMAC : Predicate<"!(TM.Options.AllowFPOpFusion ==" " FPOpFusion::Fast &&" @@ -1725,6 +1725,8 @@ def BKPT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, let Inst{31-28} = 0xe; // AL let Inst{7-4} = 0b0111; } +// default immediate for breakpoint mnemonic +def : InstAlias<"bkpt", (BKPT 0)>, Requires<[IsARM]>; def HLT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, "hlt", "\t$val", []>, Requires<[IsARM, HasV8]> { @@ -5183,6 +5185,10 @@ def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2, [(set GPR:$dst, (arm_i32imm:$src))]>, Requires<[IsARM]>; +def LDRLIT_ga_abs : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iLoad_i, + [(set GPR:$dst, (ARMWrapper tglobaladdr:$src))]>, + Requires<[IsARM, DontUseMovt]>; + // Pseudo instruction that combines movw + movt + add pc (if PIC). // It also makes it possible to rematerialize the instructions. // FIXME: Remove this when we can do generalized remat and when machine licm @@ -5193,10 +5199,17 @@ def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, Requires<[IsARM, UseMovt]>; -def MOV_ga_dyn : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), - IIC_iMOVix2, - [(set GPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>, - Requires<[IsARM, UseMovt]>; +def LDRLIT_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iLoadiALU, + [(set GPR:$dst, + (ARMWrapperPIC tglobaladdr:$addr))]>, + Requires<[IsARM, DontUseMovt]>; + +def LDRLIT_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + NoItinerary, + [(set GPR:$dst, + (load (ARMWrapperPIC tglobaladdr:$addr)))]>, + Requires<[IsARM, DontUseMovt]>; let AddedComplexity = 10 in def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), @@ -5206,8 +5219,6 @@ def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), } // isReMaterializable // ConstantPool, GlobalAddress, and JumpTable -def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>, - Requires<[IsARM, DontUseMovt]>; def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>; def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>, Requires<[IsARM, UseMovt]>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMInstrThumb.td b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMInstrThumb.td index af5ef537b536..f6d233a39c5a 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -300,6 +300,8 @@ def tBKPT : T1I<(outs), (ins imm0_255:$val), NoItinerary, "bkpt\t$val", bits<8> val; let Inst{7-0} = val; } +// default immediate for breakpoint mnemonic +def : InstAlias<"bkpt", (tBKPT 0)>, Requires<[IsThumb]>; def tHLT : T1I<(outs), (ins imm0_63:$val), NoItinerary, "hlt\t$val", []>, T1Encoding<0b101110>, Requires<[IsThumb, HasV8]> { @@ -1306,10 +1308,22 @@ def : T1Pat<(addc tGPR:$lhs, imm8_255_neg:$rhs), def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs), (tSUBrr tGPR:$lhs, tGPR:$rhs)>; -// ConstantPool, GlobalAddress -def : T1Pat<(ARMWrapper tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>; +// ConstantPool def : T1Pat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>; +// GlobalAddress +def tLDRLIT_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iLoadiALU, + [(set GPR:$dst, + (ARMWrapperPIC tglobaladdr:$addr))]>, + Requires<[IsThumb, DontUseMovt]>; + +def tLDRLIT_ga_abs : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iLoad_i, + [(set GPR:$dst, + (ARMWrapper tglobaladdr:$src))]>, + Requires<[IsThumb, DontUseMovt]>; + + // JumpTable def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), (tLEApcrelJT tjumptable:$dst, imm:$id)>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMInstrThumb2.td b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMInstrThumb2.td index 48acffd3a64e..10ba894329e7 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -3793,15 +3793,9 @@ def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr), [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, Requires<[IsThumb2, UseMovt]>; -def t2MOV_ga_dyn : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr), - IIC_iMOVix2, - [(set rGPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>, - Requires<[IsThumb2, UseMovt]>; } // ConstantPool, GlobalAddress, and JumpTable -def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>, - Requires<[IsThumb2, DontUseMovt]>; def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>; def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>, Requires<[IsThumb2, UseMovt]>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMInstrVFP.td b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMInstrVFP.td index a8cdc5ca0637..a5494819d166 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -200,13 +200,34 @@ let mayLoad = 1, hasExtraDefRegAllocReq = 1 in defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>; let mayStore = 1, hasExtraSrcRegAllocReq = 1 in -defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpLoad_m, IIC_fpLoad_mu>; +defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpStore_m, IIC_fpStore_mu>; } // neverHasSideEffects def : MnemonicAlias<"vldm", "vldmia">; def : MnemonicAlias<"vstm", "vstmia">; +// FLDM/FSTM - Load / Store multiple single / double precision registers for +// pre-ARMv6 cores. +// These instructions are deprecated! +def : VFP2MnemonicAlias<"fldmias", "vldmia">; +def : VFP2MnemonicAlias<"fldmdbs", "vldmdb">; +def : VFP2MnemonicAlias<"fldmeas", "vldmdb">; +def : VFP2MnemonicAlias<"fldmfds", "vldmia">; +def : VFP2MnemonicAlias<"fldmiad", "vldmia">; +def : VFP2MnemonicAlias<"fldmdbd", "vldmdb">; +def : VFP2MnemonicAlias<"fldmead", "vldmdb">; +def : VFP2MnemonicAlias<"fldmfdd", "vldmia">; + +def : VFP2MnemonicAlias<"fstmias", "vstmia">; +def : VFP2MnemonicAlias<"fstmdbs", "vstmdb">; +def : VFP2MnemonicAlias<"fstmeas", "vstmia">; +def : VFP2MnemonicAlias<"fstmfds", "vstmdb">; +def : VFP2MnemonicAlias<"fstmiad", "vstmia">; +def : VFP2MnemonicAlias<"fstmdbd", "vstmdb">; +def : VFP2MnemonicAlias<"fstmead", "vstmia">; +def : VFP2MnemonicAlias<"fstmfdd", "vstmdb">; + def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>, Requires<[HasVFP2]>; def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>, @@ -247,7 +268,7 @@ multiclass vfp_ldstx_mult { AXXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), IndexModeUpd, !strconcat(asm, "dbx${p}\t$Rn!, $regs"), "$Rn = $wb", []> { let Inst{24-23} = 0b10; // Decrement Before - let Inst{21} = 1; + let Inst{21} = 1; // Writeback let Inst{20} = L_bit; } } @@ -255,6 +276,12 @@ multiclass vfp_ldstx_mult { defm FLDM : vfp_ldstx_mult<"fldm", 1>; defm FSTM : vfp_ldstx_mult<"fstm", 0>; +def : VFP2MnemonicAlias<"fldmeax", "fldmdbx">; +def : VFP2MnemonicAlias<"fldmfdx", "fldmiax">; + +def : VFP2MnemonicAlias<"fstmeax", "fstmiax">; +def : VFP2MnemonicAlias<"fstmfdx", "fstmdbx">; + //===----------------------------------------------------------------------===// // FP Binary Operations. // @@ -1639,7 +1666,7 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), //===----------------------------------------------------------------------===// // Assembler aliases. // -// A few mnemnoic aliases for pre-unifixed syntax. We don't guarantee to +// A few mnemonic aliases for pre-unifixed syntax. We don't guarantee to // support them all, but supporting at least some of the basics is // good to be friendly. def : VFP2MnemonicAlias<"flds", "vldr">; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMMCInstLower.cpp index e12c9c61ab14..c83062775908 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMMCInstLower.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMMCInstLower.cpp @@ -26,11 +26,12 @@ using namespace llvm; MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol) { const MCExpr *Expr; - switch (MO.getTargetFlags()) { + unsigned Option = MO.getTargetFlags() & ARMII::MO_OPTION_MASK; + switch (Option) { default: { Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext); - switch (MO.getTargetFlags()) { + switch (Option) { default: llvm_unreachable("Unknown target flag on symbol operand"); case 0: break; @@ -49,7 +50,7 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO, } case ARMII::MO_PLT: - Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_PLT, + Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_PLT, OutContext); break; } @@ -81,9 +82,11 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO, MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( MO.getMBB()->getSymbol(), OutContext)); break; - case MachineOperand::MO_GlobalAddress: - MCOp = GetSymbolRef(MO, getSymbol(MO.getGlobal())); + case MachineOperand::MO_GlobalAddress: { + MCOp = GetSymbolRef(MO, + GetARMGVSymbol(MO.getGlobal(), MO.getTargetFlags())); break; + } case MachineOperand::MO_ExternalSymbol: MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName())); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMScheduleA9.td b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMScheduleA9.td index 603e775d351d..9a1d22275646 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMScheduleA9.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMScheduleA9.td @@ -1894,16 +1894,26 @@ def CortexA9Model : SchedMachineModel { let MispredictPenalty = 8; // Based on estimate of pipeline depth. let Itineraries = CortexA9Itineraries; + + // FIXME: Many vector operations were never given an itinerary. We + // haven't mapped these to the new model either. + let CompleteModel = 0; } //===----------------------------------------------------------------------===// // Define each kind of processor resource and number available. +// +// The AGU unit has BufferSize=1 so that the latency between operations +// that use it are considered to stall other operations. +// +// The FP unit has BufferSize=0 so that it is a hard dispatch +// hazard. No instruction may be dispatched while the unit is reserved. let SchedModel = CortexA9Model in { def A9UnitALU : ProcResource<2>; def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; } -def A9UnitAGU : ProcResource<1>; +def A9UnitAGU : ProcResource<1> { let BufferSize = 1; } def A9UnitLS : ProcResource<1>; def A9UnitFP : ProcResource<1> { let BufferSize = 0; } def A9UnitB : ProcResource<1>; @@ -2217,7 +2227,7 @@ def A9WriteLMfp : SchedWriteVariant<[ SchedVar]>; //===----------------------------------------------------------------------===// -// Resources for other (non LDM/VLDM) Variants. +// Resources for other (non-LDM/VLDM) Variants. // These mov immediate writers are unconditionally expanded with // additive latency. @@ -2397,6 +2407,7 @@ def :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>; // ... // VHADD/VRHADD/VQADD/VTST/VADH/VRADH def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>; + // VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>; // VQNEG/VQABS @@ -2431,7 +2442,7 @@ def :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>; def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>; // NEON permute -def :ItinRW<[A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>; +def :ItinRW<[A9WriteV2, A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>; def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2], [IIC_VPERMQ3, IIC_VEXTQ]>; def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 93add6ee33cf..00e44f5273fe 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -145,7 +145,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { - // Use default for non AAPCS (or Darwin) subtargets + // Use default for non-AAPCS (or Darwin) subtargets if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetDarwin()) return SDValue(); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMSubtarget.cpp b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMSubtarget.cpp index a11629852ad7..81fdbfd226c3 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -83,7 +83,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , CPUString(CPU) , TargetTriple(TT) , Options(Options) - , TargetABI(ARM_ABI_APCS) { + , TargetABI(ARM_ABI_UNKNOWN) { initializeEnvironment(); resetSubtargetFeatures(CPU, FS); } @@ -102,6 +102,7 @@ void ARMSubtarget::initializeEnvironment() { HasVFPv4 = false; HasFPARMv8 = false; HasNEON = false; + MinSize = false; UseNEONForSinglePrecisionFP = false; UseMulOps = UseFusedMulOps; SlowFPVMLx = false; @@ -151,6 +152,9 @@ void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { initializeEnvironment(); resetSubtargetFeatures(CPU, FS); } + + MinSize = + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); } void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { @@ -175,10 +179,9 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { } ParseSubtargetFeatures(CPUString, ArchFS); - // Thumb2 implies at least V6T2. FIXME: Fix tests to explicitly specify a - // ARM version or CPU and then remove this. - if (!HasV6T2Ops && hasThumb2()) - HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6MOps = HasV6T2Ops = true; + // FIXME: This used enable V6T2 support implicitly for Thumb2 mode. + // Assert this for now to make the change obvious. + assert(hasV6T2Ops() || !hasThumb2()); // Keep a pointer to static instruction cost data for the specified CPU. SchedModel = getSchedModelForCPU(CPUString); @@ -186,11 +189,23 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); - if ((TargetTriple.getTriple().find("eabi") != std::string::npos) || - (isTargetIOS() && isMClass())) - // FIXME: We might want to separate AAPCS and EABI. Some systems, e.g. - // Darwin-EABI conforms to AACPS but not the rest of EABI. - TargetABI = ARM_ABI_AAPCS; + if (TargetABI == ARM_ABI_UNKNOWN) { + switch (TargetTriple.getEnvironment()) { + case Triple::Android: + case Triple::EABI: + case Triple::EABIHF: + case Triple::GNUEABI: + case Triple::GNUEABIHF: + TargetABI = ARM_ABI_AAPCS; + break; + default: + if (isTargetIOS() && isMClass()) + TargetABI = ARM_ABI_AAPCS; + else + TargetABI = ARM_ABI_APCS; + break; + } + } if (isAAPCS_ABI()) stackAlignment = 8; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMSubtarget.h b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMSubtarget.h index 5276901bbb92..8c471dcfd1ac 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMSubtarget.h @@ -31,7 +31,8 @@ class TargetOptions; class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { - Others, CortexA5, CortexA8, CortexA9, CortexA15, CortexR5, Swift, CortexA53, CortexA57 + Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15, + CortexR5, Swift, CortexA53, CortexA57, Krait }; enum ARMProcClassEnum { None, AClass, RClass, MClass @@ -63,6 +64,10 @@ protected: bool HasFPARMv8; bool HasNEON; + /// MinSize - True if the function being compiled has the "minsize" attribute + /// and should be optimised for size at the expense of speed. + bool MinSize; + /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been /// specified. Use the method useNEONForSinglePrecisionFP() to /// determine if NEON should actually be used. @@ -212,6 +217,7 @@ protected: public: enum { + ARM_ABI_UNKNOWN, ARM_ABI_APCS, ARM_ABI_AAPCS // ARM EABI } TargetABI; @@ -256,8 +262,9 @@ public: bool isCortexA15() const { return ARMProcFamily == CortexA15; } bool isSwift() const { return ARMProcFamily == Swift; } bool isCortexM3() const { return CPUString == "cortex-m3"; } - bool isLikeA9() const { return isCortexA9() || isCortexA15(); } + bool isLikeA9() const { return isCortexA9() || isCortexA15() || isKrait(); } bool isCortexR5() const { return ARMProcFamily == CortexR5; } + bool isKrait() const { return ARMProcFamily == Krait; } bool hasARMOps() const { return !NoARM; } @@ -269,6 +276,7 @@ public: bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } bool hasVirtualization() const { return HasVirtualization; } + bool isMinSize() const { return MinSize; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } @@ -303,18 +311,30 @@ public: bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } - bool isTargetELF() const { return !isTargetDarwin(); } + bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } // ARM EABI is the bare-metal EABI described in ARM ABI documents and // can be accessed via -target arm-none-eabi. This is NOT GNUEABI. // FIXME: Add a flag for bare-metal for that target and set Triple::EABI // even for GNUEABI, so we can make a distinction here and still conform to // the EABI on GNU (and Android) mode. This requires change in Clang, too. bool isTargetAEABI() const { - return TargetTriple.getEnvironment() == Triple::EABI; + return TargetTriple.getEnvironment() == Triple::EABI || + TargetTriple.getEnvironment() == Triple::EABIHF; } - bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; } - bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; } + bool isTargetHardFloat() const { + return TargetTriple.getEnvironment() == Triple::GNUEABIHF || + TargetTriple.getEnvironment() == Triple::EABIHF; + } + + bool isAPCS_ABI() const { + assert(TargetABI != ARM_ABI_UNKNOWN); + return TargetABI == ARM_ABI_APCS; + } + bool isAAPCS_ABI() const { + assert(TargetABI != ARM_ABI_UNKNOWN); + return TargetABI == ARM_ABI_AAPCS; + } bool isThumb() const { return InThumbMode; } bool isThumb1Only() const { return InThumbMode && !HasThumb2; } @@ -326,7 +346,7 @@ public: bool isR9Reserved() const { return IsR9Reserved; } - bool useMovt() const { return UseMovt && hasV6T2Ops(); } + bool useMovt() const { return UseMovt && !isMinSize(); } bool supportsTailCall() const { return SupportsTailCall; } bool allowsUnalignedMem() const { return AllowsUnalignedMem; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMTargetMachine.cpp index c2bf78877875..718311ba1dec 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -51,9 +51,11 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, Subtarget(TT, CPU, FS, Options), JITInfo(), InstrItins(Subtarget.getInstrItineraryData()) { - // Default to soft float ABI + + // Default to triple-appropriate float ABI if (Options.FloatABIType == FloatABI::Default) - this->Options.FloatABIType = FloatABI::Soft; + this->Options.FloatABIType = + Subtarget.isTargetHardFloat() ? FloatABI::Hard : FloatABI::Soft; } void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) { @@ -67,6 +69,56 @@ void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) { void ARMTargetMachine::anchor() { } +static std::string computeDataLayout(ARMSubtarget &ST) { + // Little endian. + std::string Ret = "e"; + + Ret += DataLayout::getManglingComponent(ST.getTargetTriple()); + + // Pointers are 32 bits and aligned to 32 bits. + Ret += "-p:32:32"; + + // On thumb, i16,i18 and i1 have natural aligment requirements, but we try to + // align to 32. + if (ST.isThumb()) + Ret += "-i1:8:32-i8:8:32-i16:16:32"; + + // ABIs other than APC have 64 bit integers with natural alignment. + if (!ST.isAPCS_ABI()) + Ret += "-i64:64"; + + // We have 64 bits floats. The APCS ABI requires them to be aligned to 32 + // bits, others to 64 bits. We always try to align to 64 bits. + if (ST.isAPCS_ABI()) + Ret += "-f64:32:64"; + + // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others + // to 64. We always ty to give them natural alignment. + if (ST.isAPCS_ABI()) + Ret += "-v64:32:64-v128:32:128"; + else + Ret += "-v128:64:128"; + + // On thumb and APCS, only try to align aggregates to 32 bits (the default is + // 64 bits). + if (ST.isThumb() || ST.isAPCS_ABI()) + Ret += "-a:0:32"; + + // Integer registers are 32 bits. + Ret += "-n32"; + + // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit + // aligned everywhere else. + if (ST.isTargetNaCl()) + Ret += "-S128"; + else if (ST.isAAPCS_ABI()) + Ret += "-S64"; + else + Ret += "-S32"; + + return Ret; +} + ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -74,14 +126,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL) : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), InstrInfo(Subtarget), - DL(Subtarget.isAPCS_ABI() ? - std::string("e-p:32:32-f64:32:64-i64:32:64-" - "v128:32:128-v64:32:64-n32-S32") : - Subtarget.isAAPCS_ABI() ? - std::string("e-p:32:32-f64:64:64-i64:64:64-" - "v128:64:128-v64:64:64-n32-S64") : - std::string("e-p:32:32-f64:64:64-i64:64:64-" - "v128:64:128-v64:64:64-n32-S32")), + DL(computeDataLayout(Subtarget)), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget) { @@ -102,17 +147,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, InstrInfo(Subtarget.hasThumb2() ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget)) : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), - DL(Subtarget.isAPCS_ABI() ? - std::string("e-p:32:32-f64:32:64-i64:32:64-" - "i16:16:32-i8:8:32-i1:8:32-" - "v128:32:128-v64:32:64-a:0:32-n32-S32") : - Subtarget.isAAPCS_ABI() ? - std::string("e-p:32:32-f64:64:64-i64:64:64-" - "i16:16:32-i8:8:32-i1:8:32-" - "v128:64:128-v64:64:64-a:0:32-n32-S64") : - std::string("e-p:32:32-f64:64:64-i64:64:64-" - "i16:16:32-i8:8:32-i1:8:32-" - "v128:64:128-v64:64:64-a:0:32-n32-S32")), + DL(computeDataLayout(Subtarget)), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget.hasThumb2() diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/external/bsd/llvm/dist/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index e3f9e0dc609a..33274bfd0fae 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -12,12 +12,15 @@ #include "ARMFeatures.h" #include "llvm/MC/MCTargetAsmParser.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMArchName.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMMCExpr.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmInfo.h" @@ -28,12 +31,14 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSection.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SourceMgr.h" @@ -48,11 +53,83 @@ class ARMOperand; enum VectorLaneTy { NoLanes, AllLanes, IndexedLane }; +// A class to keep track of assembler-generated constant pools that are use to +// implement the ldr-pseudo. +class ConstantPool { + typedef SmallVector, 4> EntryVecTy; + EntryVecTy Entries; + +public: + // Initialize a new empty constant pool + ConstantPool() { } + + // Add a new entry to the constant pool in the next slot. + // \param Value is the new entry to put in the constant pool. + // + // \returns a MCExpr that references the newly inserted value + const MCExpr *addEntry(const MCExpr *Value, MCContext &Context) { + MCSymbol *CPEntryLabel = Context.CreateTempSymbol(); + + Entries.push_back(std::make_pair(CPEntryLabel, Value)); + return MCSymbolRefExpr::Create(CPEntryLabel, Context); + } + + // Emit the contents of the constant pool using the provided streamer. + void emitEntries(MCStreamer &Streamer) { + if (Entries.empty()) + return; + Streamer.EmitCodeAlignment(4); // align to 4-byte address + Streamer.EmitDataRegion(MCDR_DataRegion); + for (EntryVecTy::const_iterator I = Entries.begin(), E = Entries.end(); + I != E; ++I) { + Streamer.EmitLabel(I->first); + Streamer.EmitValue(I->second, 4); + } + Streamer.EmitDataRegion(MCDR_DataRegionEnd); + Entries.clear(); + } + + // Return true if the constant pool is empty + bool empty() { + return Entries.empty(); + } +}; + +// Map type used to keep track of per-Section constant pools used by the +// ldr-pseudo opcode. The map associates a section to its constant pool. The +// constant pool is a vector of (label, value) pairs. When the ldr +// pseudo is parsed we insert a new (label, value) pair into the constant pool +// for the current section and add MCSymbolRefExpr to the new label as +// an opcode to the ldr. After we have parsed all the user input we +// output the (label, value) pairs in each constant pool at the end of the +// section. +// +// We use the MapVector for the map type to ensure stable iteration of +// the sections at the end of the parse. We need to iterate over the +// sections in a stable order to ensure that we have print the +// constant pools in a deterministic order when printing an assembly +// file. +typedef MapVector ConstantPoolMapTy; + class ARMAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; const MCInstrInfo &MII; const MCRegisterInfo *MRI; + ConstantPoolMapTy ConstantPools; + + // Assembler created constant pools for ldr pseudo + ConstantPool *getConstantPool(const MCSection *Section) { + ConstantPoolMapTy::iterator CP = ConstantPools.find(Section); + if (CP == ConstantPools.end()) + return 0; + + return &CP->second; + } + + ConstantPool &getOrCreateConstantPool(const MCSection *Section) { + return ConstantPools[Section]; + } ARMTargetStreamer &getTargetStreamer() { MCTargetStreamer &TS = getParser().getStreamer().getTargetStreamer(); @@ -149,6 +226,9 @@ class ARMAsmParser : public MCTargetAsmParser { bool parseDirectiveSetFP(SMLoc L); bool parseDirectivePad(SMLoc L); bool parseDirectiveRegSave(SMLoc L, bool IsVector); + bool parseDirectiveInst(SMLoc L, char Suffix = '\0'); + bool parseDirectiveLtorg(SMLoc L); + bool parseDirectiveEven(SMLoc L); StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode, bool &CarrySetting, unsigned &ProcessorIMod, @@ -293,7 +373,7 @@ public: MCStreamer &Out, unsigned &ErrorInfo, bool MatchingInlineAsm); void onLabelParsed(MCSymbol *Symbol); - + void finishParse(); }; } // end anonymous namespace @@ -1580,7 +1660,7 @@ public: void addRegShiftedRegOperands(MCInst &Inst, unsigned N) const { assert(N == 3 && "Invalid number of operands!"); assert(isRegShiftedReg() && - "addRegShiftedRegOperands() on non RegShiftedReg!"); + "addRegShiftedRegOperands() on non-RegShiftedReg!"); Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.SrcReg)); Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.ShiftReg)); Inst.addOperand(MCOperand::CreateImm( @@ -1590,7 +1670,7 @@ public: void addRegShiftedImmOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); assert(isRegShiftedImm() && - "addRegShiftedImmOperands() on non RegShiftedImm!"); + "addRegShiftedImmOperands() on non-RegShiftedImm!"); Inst.addOperand(MCOperand::CreateReg(RegShiftedImm.SrcReg)); // Shift of #32 is encoded as 0 where permitted unsigned Imm = (RegShiftedImm.ShiftImm == 32 ? 0 : RegShiftedImm.ShiftImm); @@ -4623,7 +4703,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl &Operands, Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); // There can be a trailing '!' on operands that we want as a separate - // '!' Token operand. Handle that here. For example, the compatibilty + // '!' Token operand. Handle that here. For example, the compatibility // alias for 'srsdb sp!, #imm' is 'srsdb #imm!'. if (Parser.getTok().is(AsmToken::Exclaim)) { Operands.push_back(ARMOperand::CreateToken(Parser.getTok().getString(), @@ -4653,6 +4733,24 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl &Operands, Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E)); return false; } + case AsmToken::Equal: { + if (Mnemonic != "ldr") // only parse for ldr pseudo (e.g. ldr r0, =val) + return Error(Parser.getTok().getLoc(), "unexpected token in operand"); + + const MCSection *Section = + getParser().getStreamer().getCurrentSection().first; + assert(Section); + Parser.Lex(); // Eat '=' + const MCExpr *SubExprVal; + if (getParser().parseExpression(SubExprVal)) + return true; + E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + const MCExpr *CPLoc = + getOrCreateConstantPool(Section).addEntry(SubExprVal, getContext()); + Operands.push_back(ARMOperand::CreateImm(CPLoc, S, E)); + return false; + } } } @@ -5009,12 +5107,40 @@ static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) { } static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features, unsigned VariantID); + +static bool RequiresVFPRegListValidation(StringRef Inst, + bool &AcceptSinglePrecisionOnly, + bool &AcceptDoublePrecisionOnly) { + if (Inst.size() < 7) + return false; + + if (Inst.startswith("fldm") || Inst.startswith("fstm")) { + StringRef AddressingMode = Inst.substr(4, 2); + if (AddressingMode == "ia" || AddressingMode == "db" || + AddressingMode == "ea" || AddressingMode == "fd") { + AcceptSinglePrecisionOnly = Inst[6] == 's'; + AcceptDoublePrecisionOnly = Inst[6] == 'd' || Inst[6] == 'x'; + return true; + } + } + + return false; +} + /// Parse an arm instruction mnemonic followed by its operands. bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, SmallVectorImpl &Operands) { + // FIXME: Can this be done via tablegen in some fashion? + bool RequireVFPRegisterListCheck; + bool AcceptSinglePrecisionOnly; + bool AcceptDoublePrecisionOnly; + RequireVFPRegisterListCheck = + RequiresVFPRegListValidation(Name, AcceptSinglePrecisionOnly, + AcceptDoublePrecisionOnly); + // Apply mnemonic aliases before doing anything else, as the destination - // mnemnonic may include suffices and we want to handle them normally. + // mnemonic may include suffices and we want to handle them normally. // The generic tblgen'erated code does this later, at the start of // MatchInstructionImpl(), but that's too late for aliases that include // any sort of suffix. @@ -5181,6 +5307,16 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, Parser.Lex(); // Consume the EndOfStatement + if (RequireVFPRegisterListCheck) { + ARMOperand *Op = static_cast(Operands.back()); + if (AcceptSinglePrecisionOnly && !Op->isSPRRegList()) + return Error(Op->getStartLoc(), + "VFP/Neon single precision register expected"); + if (AcceptDoublePrecisionOnly && !Op->isDPRRegList()) + return Error(Op->getStartLoc(), + "VFP/Neon double precision register expected"); + } + // Some instructions, mostly Thumb, have forms for the same mnemonic that // do and don't have a cc_out optional-def operand. With some spot-checks // of the operand list, we can figure out which variant we're trying to @@ -7808,6 +7944,16 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveRegSave(DirectiveID.getLoc(), false); else if (IDVal == ".vsave") return parseDirectiveRegSave(DirectiveID.getLoc(), true); + else if (IDVal == ".inst") + return parseDirectiveInst(DirectiveID.getLoc()); + else if (IDVal == ".inst.n") + return parseDirectiveInst(DirectiveID.getLoc(), 'n'); + else if (IDVal == ".inst.w") + return parseDirectiveInst(DirectiveID.getLoc(), 'w'); + else if (IDVal == ".ltorg" || IDVal == ".pool") + return parseDirectiveLtorg(DirectiveID.getLoc()); + else if (IDVal == ".even") + return parseDirectiveEven(DirectiveID.getLoc()); return true; } @@ -7918,8 +8064,10 @@ bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) { else return Error(L, "unrecognized syntax mode in .syntax directive"); - if (getLexer().isNot(AsmToken::EndOfStatement)) - return Error(Parser.getTok().getLoc(), "unexpected token in directive"); + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Error(Parser.getTok().getLoc(), "unexpected token in directive"); + return false; + } Parser.Lex(); // TODO tell the MC streamer the mode @@ -7931,30 +8079,37 @@ bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) { /// ::= .code 16 | 32 bool ARMAsmParser::parseDirectiveCode(SMLoc L) { const AsmToken &Tok = Parser.getTok(); - if (Tok.isNot(AsmToken::Integer)) - return Error(L, "unexpected token in .code directive"); + if (Tok.isNot(AsmToken::Integer)) { + Error(L, "unexpected token in .code directive"); + return false; + } int64_t Val = Parser.getTok().getIntVal(); - if (Val == 16) - Parser.Lex(); - else if (Val == 32) - Parser.Lex(); - else - return Error(L, "invalid operand to .code directive"); + if (Val != 16 && Val != 32) { + Error(L, "invalid operand to .code directive"); + return false; + } + Parser.Lex(); - if (getLexer().isNot(AsmToken::EndOfStatement)) - return Error(Parser.getTok().getLoc(), "unexpected token in directive"); + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Error(Parser.getTok().getLoc(), "unexpected token in directive"); + return false; + } Parser.Lex(); if (Val == 16) { - if (!hasThumb()) - return Error(L, "target does not support Thumb mode"); + if (!hasThumb()) { + Error(L, "target does not support Thumb mode"); + return false; + } if (!isThumb()) SwitchMode(); getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); } else { - if (!hasARM()) - return Error(L, "target does not support ARM mode"); + if (!hasARM()) { + Error(L, "target does not support ARM mode"); + return false; + } if (isThumb()) SwitchMode(); @@ -7972,21 +8127,23 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { SMLoc SRegLoc, ERegLoc; if (ParseRegister(Reg, SRegLoc, ERegLoc)) { Parser.eatToEndOfStatement(); - return Error(SRegLoc, "register name expected"); + Error(SRegLoc, "register name expected"); + return false; } // Shouldn't be anything else. if (Parser.getTok().isNot(AsmToken::EndOfStatement)) { Parser.eatToEndOfStatement(); - return Error(Parser.getTok().getLoc(), - "unexpected input in .req directive."); + Error(Parser.getTok().getLoc(), "unexpected input in .req directive."); + return false; } Parser.Lex(); // Consume the EndOfStatement - if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg) - return Error(SRegLoc, "redefinition of '" + Name + - "' does not match original."); + if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg) { + Error(SRegLoc, "redefinition of '" + Name + "' does not match original."); + return false; + } return false; } @@ -7996,7 +8153,8 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { if (Parser.getTok().isNot(AsmToken::Identifier)) { Parser.eatToEndOfStatement(); - return Error(L, "unexpected input in .unreq directive."); + Error(L, "unexpected input in .unreq directive."); + return false; } RegisterReqs.erase(Parser.getTok().getIdentifier()); Parser.Lex(); // Eat the identifier. @@ -8006,24 +8164,46 @@ bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { /// parseDirectiveArch /// ::= .arch token bool ARMAsmParser::parseDirectiveArch(SMLoc L) { - return true; + StringRef Arch = getParser().parseStringToEndOfStatement().trim(); + + unsigned ID = StringSwitch(Arch) +#define ARM_ARCH_NAME(NAME, ID, DEFAULT_CPU_NAME, DEFAULT_CPU_ARCH) \ + .Case(NAME, ARM::ID) +#define ARM_ARCH_ALIAS(NAME, ID) \ + .Case(NAME, ARM::ID) +#include "MCTargetDesc/ARMArchName.def" + .Default(ARM::INVALID_ARCH); + + if (ID == ARM::INVALID_ARCH) { + Error(L, "Unknown arch name"); + return false; + } + + getTargetStreamer().emitArch(ID); + return false; } /// parseDirectiveEabiAttr /// ::= .eabi_attribute int, int bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { - if (Parser.getTok().isNot(AsmToken::Integer)) - return Error(L, "integer expected"); + if (Parser.getTok().isNot(AsmToken::Integer)) { + Error(L, "integer expected"); + return false; + } int64_t Tag = Parser.getTok().getIntVal(); Parser.Lex(); // eat tag integer - if (Parser.getTok().isNot(AsmToken::Comma)) - return Error(L, "comma expected"); + if (Parser.getTok().isNot(AsmToken::Comma)) { + Error(L, "comma expected"); + return false; + } Parser.Lex(); // skip comma L = Parser.getTok().getLoc(); - if (Parser.getTok().isNot(AsmToken::Integer)) - return Error(L, "integer expected"); + if (Parser.getTok().isNot(AsmToken::Integer)) { + Error(L, "integer expected"); + return false; + } int64_t Value = Parser.getTok().getIntVal(); Parser.Lex(); // eat value integer @@ -8049,8 +8229,10 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { #include "ARMFPUName.def" .Default(ARM::INVALID_FPU); - if (ID == ARM::INVALID_FPU) - return Error(L, "Unknown FPU name"); + if (ID == ARM::INVALID_FPU) { + Error(L, "Unknown FPU name"); + return false; + } getTargetStreamer().emitFPU(ID); return false; @@ -8062,7 +8244,7 @@ bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) { if (FnStartLoc.isValid()) { Error(L, ".fnstart starts before the end of previous one"); Error(FnStartLoc, "previous .fnstart starts here"); - return true; + return false; } FnStartLoc = L; @@ -8074,8 +8256,10 @@ bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) { /// ::= .fnend bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) { // Check the ordering of unwind directives - if (!FnStartLoc.isValid()) - return Error(L, ".fnstart must precede .fnend directive"); + if (!FnStartLoc.isValid()) { + Error(L, ".fnstart must precede .fnend directive"); + return false; + } // Reset the unwind directives parser state resetUnwindDirectiveParserState(); @@ -8088,17 +8272,19 @@ bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) { bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) { // Check the ordering of unwind directives CantUnwindLoc = L; - if (!FnStartLoc.isValid()) - return Error(L, ".fnstart must precede .cantunwind directive"); + if (!FnStartLoc.isValid()) { + Error(L, ".fnstart must precede .cantunwind directive"); + return false; + } if (HandlerDataLoc.isValid()) { Error(L, ".cantunwind can't be used with .handlerdata directive"); Error(HandlerDataLoc, ".handlerdata was specified here"); - return true; + return false; } if (PersonalityLoc.isValid()) { Error(L, ".cantunwind can't be used with .personality directive"); Error(PersonalityLoc, ".personality was specified here"); - return true; + return false; } getTargetStreamer().emitCantUnwind(); @@ -8110,23 +8296,26 @@ bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) { bool ARMAsmParser::parseDirectivePersonality(SMLoc L) { // Check the ordering of unwind directives PersonalityLoc = L; - if (!FnStartLoc.isValid()) - return Error(L, ".fnstart must precede .personality directive"); + if (!FnStartLoc.isValid()) { + Error(L, ".fnstart must precede .personality directive"); + return false; + } if (CantUnwindLoc.isValid()) { Error(L, ".personality can't be used with .cantunwind directive"); Error(CantUnwindLoc, ".cantunwind was specified here"); - return true; + return false; } if (HandlerDataLoc.isValid()) { Error(L, ".personality must precede .handlerdata directive"); Error(HandlerDataLoc, ".handlerdata was specified here"); - return true; + return false; } // Parse the name of the personality routine if (Parser.getTok().isNot(AsmToken::Identifier)) { Parser.eatToEndOfStatement(); - return Error(L, "unexpected input in .personality directive."); + Error(L, "unexpected input in .personality directive."); + return false; } StringRef Name(Parser.getTok().getIdentifier()); Parser.Lex(); @@ -8141,12 +8330,14 @@ bool ARMAsmParser::parseDirectivePersonality(SMLoc L) { bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) { // Check the ordering of unwind directives HandlerDataLoc = L; - if (!FnStartLoc.isValid()) - return Error(L, ".fnstart must precede .personality directive"); + if (!FnStartLoc.isValid()) { + Error(L, ".fnstart must precede .personality directive"); + return false; + } if (CantUnwindLoc.isValid()) { Error(L, ".handlerdata can't be used with .cantunwind directive"); Error(CantUnwindLoc, ".cantunwind was specified here"); - return true; + return false; } getTargetStreamer().emitHandlerData(); @@ -8157,31 +8348,43 @@ bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) { /// ::= .setfp fpreg, spreg [, offset] bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) { // Check the ordering of unwind directives - if (!FnStartLoc.isValid()) - return Error(L, ".fnstart must precede .setfp directive"); - if (HandlerDataLoc.isValid()) - return Error(L, ".setfp must precede .handlerdata directive"); + if (!FnStartLoc.isValid()) { + Error(L, ".fnstart must precede .setfp directive"); + return false; + } + if (HandlerDataLoc.isValid()) { + Error(L, ".setfp must precede .handlerdata directive"); + return false; + } // Parse fpreg SMLoc NewFPRegLoc = Parser.getTok().getLoc(); int NewFPReg = tryParseRegister(); - if (NewFPReg == -1) - return Error(NewFPRegLoc, "frame pointer register expected"); + if (NewFPReg == -1) { + Error(NewFPRegLoc, "frame pointer register expected"); + return false; + } // Consume comma - if (!Parser.getTok().is(AsmToken::Comma)) - return Error(Parser.getTok().getLoc(), "comma expected"); + if (!Parser.getTok().is(AsmToken::Comma)) { + Error(Parser.getTok().getLoc(), "comma expected"); + return false; + } Parser.Lex(); // skip comma // Parse spreg SMLoc NewSPRegLoc = Parser.getTok().getLoc(); int NewSPReg = tryParseRegister(); - if (NewSPReg == -1) - return Error(NewSPRegLoc, "stack pointer register expected"); + if (NewSPReg == -1) { + Error(NewSPRegLoc, "stack pointer register expected"); + return false; + } - if (NewSPReg != ARM::SP && NewSPReg != FPReg) - return Error(NewSPRegLoc, - "register should be either $sp or the latest fp register"); + if (NewSPReg != ARM::SP && NewSPReg != FPReg) { + Error(NewSPRegLoc, + "register should be either $sp or the latest fp register"); + return false; + } // Update the frame pointer register FPReg = NewFPReg; @@ -8193,18 +8396,23 @@ bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) { if (Parser.getTok().isNot(AsmToken::Hash) && Parser.getTok().isNot(AsmToken::Dollar)) { - return Error(Parser.getTok().getLoc(), "'#' expected"); + Error(Parser.getTok().getLoc(), "'#' expected"); + return false; } Parser.Lex(); // skip hash token. const MCExpr *OffsetExpr; SMLoc ExLoc = Parser.getTok().getLoc(); SMLoc EndLoc; - if (getParser().parseExpression(OffsetExpr, EndLoc)) - return Error(ExLoc, "malformed setfp offset"); + if (getParser().parseExpression(OffsetExpr, EndLoc)) { + Error(ExLoc, "malformed setfp offset"); + return false; + } const MCConstantExpr *CE = dyn_cast(OffsetExpr); - if (!CE) - return Error(ExLoc, "setfp offset must be an immediate"); + if (!CE) { + Error(ExLoc, "setfp offset must be an immediate"); + return false; + } Offset = CE->getValue(); } @@ -8218,26 +8426,35 @@ bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) { /// ::= .pad offset bool ARMAsmParser::parseDirectivePad(SMLoc L) { // Check the ordering of unwind directives - if (!FnStartLoc.isValid()) - return Error(L, ".fnstart must precede .pad directive"); - if (HandlerDataLoc.isValid()) - return Error(L, ".pad must precede .handlerdata directive"); + if (!FnStartLoc.isValid()) { + Error(L, ".fnstart must precede .pad directive"); + return false; + } + if (HandlerDataLoc.isValid()) { + Error(L, ".pad must precede .handlerdata directive"); + return false; + } // Parse the offset if (Parser.getTok().isNot(AsmToken::Hash) && Parser.getTok().isNot(AsmToken::Dollar)) { - return Error(Parser.getTok().getLoc(), "'#' expected"); + Error(Parser.getTok().getLoc(), "'#' expected"); + return false; } Parser.Lex(); // skip hash token. const MCExpr *OffsetExpr; SMLoc ExLoc = Parser.getTok().getLoc(); SMLoc EndLoc; - if (getParser().parseExpression(OffsetExpr, EndLoc)) - return Error(ExLoc, "malformed pad offset"); + if (getParser().parseExpression(OffsetExpr, EndLoc)) { + Error(ExLoc, "malformed pad offset"); + return false; + } const MCConstantExpr *CE = dyn_cast(OffsetExpr); - if (!CE) - return Error(ExLoc, "pad offset must be an immediate"); + if (!CE) { + Error(ExLoc, "pad offset must be an immediate"); + return false; + } getTargetStreamer().emitPad(CE->getValue()); return false; @@ -8248,10 +8465,14 @@ bool ARMAsmParser::parseDirectivePad(SMLoc L) { /// ::= .vsave { registers } bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) { // Check the ordering of unwind directives - if (!FnStartLoc.isValid()) - return Error(L, ".fnstart must precede .save or .vsave directives"); - if (HandlerDataLoc.isValid()) - return Error(L, ".save or .vsave must precede .handlerdata directive"); + if (!FnStartLoc.isValid()) { + Error(L, ".fnstart must precede .save or .vsave directives"); + return false; + } + if (HandlerDataLoc.isValid()) { + Error(L, ".save or .vsave must precede .handlerdata directive"); + return false; + } // RAII object to make sure parsed operands are deleted. struct CleanupObject { @@ -8266,15 +8487,138 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) { if (parseRegisterList(CO.Operands)) return true; ARMOperand *Op = (ARMOperand*)CO.Operands[0]; - if (!IsVector && !Op->isRegList()) - return Error(L, ".save expects GPR registers"); - if (IsVector && !Op->isDPRRegList()) - return Error(L, ".vsave expects DPR registers"); + if (!IsVector && !Op->isRegList()) { + Error(L, ".save expects GPR registers"); + return false; + } + if (IsVector && !Op->isDPRRegList()) { + Error(L, ".vsave expects DPR registers"); + return false; + } getTargetStreamer().emitRegSave(Op->getRegList(), IsVector); return false; } +/// parseDirectiveInst +/// ::= .inst opcode [, ...] +/// ::= .inst.n opcode [, ...] +/// ::= .inst.w opcode [, ...] +bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) { + int Width; + + if (isThumb()) { + switch (Suffix) { + case 'n': + Width = 2; + break; + case 'w': + Width = 4; + break; + default: + Parser.eatToEndOfStatement(); + Error(Loc, "cannot determine Thumb instruction size, " + "use inst.n/inst.w instead"); + return false; + } + } else { + if (Suffix) { + Parser.eatToEndOfStatement(); + Error(Loc, "width suffixes are invalid in ARM mode"); + return false; + } + Width = 4; + } + + if (getLexer().is(AsmToken::EndOfStatement)) { + Parser.eatToEndOfStatement(); + Error(Loc, "expected expression following directive"); + return false; + } + + for (;;) { + const MCExpr *Expr; + + if (getParser().parseExpression(Expr)) { + Error(Loc, "expected expression"); + return false; + } + + const MCConstantExpr *Value = dyn_cast_or_null(Expr); + if (!Value) { + Error(Loc, "expected constant expression"); + return false; + } + + switch (Width) { + case 2: + if (Value->getValue() > 0xffff) { + Error(Loc, "inst.n operand is too big, use inst.w instead"); + return false; + } + break; + case 4: + if (Value->getValue() > 0xffffffff) { + Error(Loc, + StringRef(Suffix ? "inst.w" : "inst") + " operand is too big"); + return false; + } + break; + default: + llvm_unreachable("only supported widths are 2 and 4"); + } + + getTargetStreamer().emitInst(Value->getValue(), Suffix); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + if (getLexer().isNot(AsmToken::Comma)) { + Error(Loc, "unexpected token in directive"); + return false; + } + + Parser.Lex(); + } + + Parser.Lex(); + return false; +} + +/// parseDirectiveLtorg +/// ::= .ltorg | .pool +bool ARMAsmParser::parseDirectiveLtorg(SMLoc L) { + MCStreamer &Streamer = getParser().getStreamer(); + const MCSection *Section = Streamer.getCurrentSection().first; + + if (ConstantPool *CP = getConstantPool(Section)) { + if (!CP->empty()) + CP->emitEntries(Streamer); + } + return false; +} + +bool ARMAsmParser::parseDirectiveEven(SMLoc L) { + const MCSection *Section = getStreamer().getCurrentSection().first; + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + TokError("unexpected token in directive"); + return false; + } + + if (!Section) { + getStreamer().InitToTextSection(); + Section = getStreamer().getCurrentSection().first; + } + + if (Section->UseCodeAlign()) + getStreamer().EmitCodeAlignment(2, 0); + else + getStreamer().EmitValueToAlignment(2, 0, 1, 0); + + return false; +} + /// Force static initialization. extern "C" void LLVMInitializeARMAsmParser() { RegisterMCAsmParser X(TheARMTarget); @@ -8303,3 +8647,20 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, } return Match_InvalidOperand; } + +void ARMAsmParser::finishParse() { + // Dump contents of assembler constant pools. + MCStreamer &Streamer = getParser().getStreamer(); + for (ConstantPoolMapTy::iterator CPI = ConstantPools.begin(), + CPE = ConstantPools.end(); + CPI != CPE; ++CPI) { + const MCSection *Section = CPI->first; + ConstantPool &CP = CPI->second; + + // Dump non-empty assembler constant pools at the end of the section. + if (!CP.empty()) { + Streamer.SwitchSection(Section); + CP.emitEntries(Streamer); + } + } +} diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/AsmParser/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/ARM/AsmParser/CMakeLists.txt index d2012c387cda..66ed1df61a21 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/AsmParser/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/AsmParser/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMARMAsmParser ARMAsmParser.cpp ) - -add_dependencies(LLVMARMAsmParser ARMCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/ARM/CMakeLists.txt index f271a932b540..66d015e2b3cb 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/CMakeLists.txt @@ -49,8 +49,6 @@ add_llvm_target(ARMCodeGen Thumb2SizeReduction.cpp ) -add_dependencies(LLVMARMCodeGen ARMCommonTableGen intrinsics_gen) - # workaround for hanging compilation on MSVC9, 10 if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 ) set_property( diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/Disassembler/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/ARM/Disassembler/CMakeLists.txt index 9de6e5c511bd..92bc709ecb24 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/Disassembler/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/Disassembler/CMakeLists.txt @@ -1,5 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMARMDisassembler ARMDisassembler.cpp ) @@ -10,4 +8,3 @@ set_property( PROPERTY COMPILE_FLAGS "/Od" ) endif() -add_dependencies(LLVMARMDisassembler ARMCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt index e2d4819b4b4a..e59ec4bb77fa 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/InstPrinter/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMARMAsmPrinter ARMInstPrinter.cpp ) - -add_dependencies(LLVMARMAsmPrinter ARMCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/ARM/LLVMBuild.txt index fd4b3a33de1a..9ed51dfda0f1 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/LLVMBuild.txt @@ -31,5 +31,5 @@ has_jit = 1 type = Library name = ARMCodeGen parent = ARM -required_libraries = ARMAsmPrinter ARMDesc ARMInfo Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target +required_libraries = ARMAsmPrinter ARMDesc ARMInfo Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target add_to_library_groups = ARM diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMArchName.def b/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMArchName.def new file mode 100644 index 000000000000..9f007a035a88 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMArchName.def @@ -0,0 +1,50 @@ +//===-- ARMArchName.def - List of the ARM arch names ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the list of the supported ARM architecture names, +// i.e. the supported value for -march= option. +// +//===----------------------------------------------------------------------===// + +// NOTE: NO INCLUDE GUARD DESIRED! + +#ifndef ARM_ARCH_NAME +#error "You must define ARM_ARCH_NAME before including ARMArchName.def" +#endif + +// ARM_ARCH_NAME(NAME, ID, DEFAULT_CPU_NAME, DEFAULT_CPU_ARCH) +ARM_ARCH_NAME("armv2", ARMV2, "2", v4) +ARM_ARCH_NAME("armv2a", ARMV2A, "2A", v4) +ARM_ARCH_NAME("armv3", ARMV3, "3", v4) +ARM_ARCH_NAME("armv3m", ARMV3M, "3M", v4) +ARM_ARCH_NAME("armv4", ARMV4, "4", v4) +ARM_ARCH_NAME("armv4t", ARMV4T, "4T", v4T) +ARM_ARCH_NAME("armv5", ARMV5, "5", v5T) +ARM_ARCH_NAME("armv5t", ARMV5T, "5T", v5T) +ARM_ARCH_NAME("armv5te", ARMV5TE, "5TE", v5TE) +ARM_ARCH_NAME("armv6", ARMV6, "6", v6) +ARM_ARCH_NAME("armv6j", ARMV6J, "6J", v6) +ARM_ARCH_NAME("armv6t2", ARMV6T2, "6T2", v6T2) +ARM_ARCH_NAME("armv6z", ARMV6Z, "6Z", v6KZ) +ARM_ARCH_NAME("armv6zk", ARMV6ZK, "6ZK", v6KZ) +ARM_ARCH_NAME("armv6-m", ARMV6M, "6-M", v6_M) +ARM_ARCH_NAME("armv7", ARMV7, "7", v7) +ARM_ARCH_NAME("armv7-a", ARMV7A, "7-A", v7) +ARM_ARCH_ALIAS("armv7a", ARMV7A) +ARM_ARCH_NAME("armv7-r", ARMV7R, "7-R", v7) +ARM_ARCH_ALIAS("armv7r", ARMV7R) +ARM_ARCH_NAME("armv7-m", ARMV7M, "7-M", v7) +ARM_ARCH_ALIAS("armv7m", ARMV7M) +ARM_ARCH_NAME("armv8-a", ARMV8A, "8-A", v8) +ARM_ARCH_ALIAS("armv8a", ARMV8A) +ARM_ARCH_NAME("iwmmxt", IWMMXT, "iwmmxt", v5TE) +ARM_ARCH_NAME("iwmmxt2", IWMMXT2, "iwmmxt2", v5TE) + +#undef ARM_ARCH_NAME +#undef ARM_ARCH_ALIAS diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMArchName.h b/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMArchName.h new file mode 100644 index 000000000000..34b9fc126ff1 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMArchName.h @@ -0,0 +1,27 @@ +//===-- ARMArchName.h - List of the ARM arch names --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMARCHNAME_H +#define ARMARCHNAME_H + +namespace llvm { +namespace ARM { + +enum ArchKind { + INVALID_ARCH = 0 + +#define ARM_ARCH_NAME(NAME, ID, DEFAULT_CPU_NAME, DEFAULT_CPU_ARCH) , ID +#define ARM_ARCH_ALIAS(NAME, ID) /* empty */ +#include "ARMArchName.def" +}; + +} // namespace ARM +} // namespace llvm + +#endif // ARMARCHNAME_H diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index af939fc19129..bb781ecece08 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -278,42 +278,36 @@ namespace ARMII { //===------------------------------------------------------------------===// // ARM Specific MachineOperand flags. - MO_NO_FLAG, + MO_NO_FLAG = 0, /// MO_LO16 - On a symbol operand, this represents a relocation containing /// lower 16 bit of the address. Used only via movw instruction. - MO_LO16, + MO_LO16 = 0x1, /// MO_HI16 - On a symbol operand, this represents a relocation containing /// higher 16 bit of the address. Used only via movt instruction. - MO_HI16, - - /// MO_LO16_NONLAZY - On a symbol operand "FOO", this represents a - /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol, - /// i.e. "FOO$non_lazy_ptr". - /// Used only via movw instruction. - MO_LO16_NONLAZY, - - /// MO_HI16_NONLAZY - On a symbol operand "FOO", this represents a - /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol, - /// i.e. "FOO$non_lazy_ptr". Used only via movt instruction. - MO_HI16_NONLAZY, - - /// MO_LO16_NONLAZY_PIC - On a symbol operand "FOO", this represents a - /// relocation containing lower 16 bit of the PC relative address of the - /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL". - /// Used only via movw instruction. - MO_LO16_NONLAZY_PIC, - - /// MO_HI16_NONLAZY_PIC - On a symbol operand "FOO", this represents a - /// relocation containing lower 16 bit of the PC relative address of the - /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL". - /// Used only via movt instruction. - MO_HI16_NONLAZY_PIC, + MO_HI16 = 0x2, /// MO_PLT - On a symbol operand, this represents an ELF PLT reference on a /// call operand. - MO_PLT + MO_PLT = 0x3, + + /// MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects + /// just that part of the flag set. + MO_OPTION_MASK = 0x7f, + + /// MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it + /// represents a symbol which, if indirect, will get special Darwin mangling + /// as a non-lazy-ptr indirect symbol (i.e. "L_FOO$non_lazy_ptr"). Can be + /// combined with MO_LO16, MO_HI16 or MO_NO_FLAG (in a constant-pool, for + /// example). + MO_NONLAZY = 0x80, + + // It's undefined behaviour if an enum overflows the range between its + // smallest and largest values, but since these are |ed together, it can + // happen. Put a sentinel in (values of this enum are stored as "unsigned + // char"). + MO_UNUSED_MAXIMUM = 0xff }; enum { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index f98bbd204c7a..72ac6e22e8dd 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -166,9 +166,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_None: Type = ELF::R_ARM_REL32; break; - case MCSymbolRefExpr::VK_ARM_TLSGD: + case MCSymbolRefExpr::VK_TLSGD: llvm_unreachable("unimplemented"); - case MCSymbolRefExpr::VK_ARM_GOTTPOFF: + case MCSymbolRefExpr::VK_GOTTPOFF: Type = ELF::R_ARM_TLS_IE32; break; } @@ -176,7 +176,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case ARM::fixup_arm_blx: case ARM::fixup_arm_uncondbl: switch (Modifier) { - case MCSymbolRefExpr::VK_ARM_PLT: + case MCSymbolRefExpr::VK_PLT: Type = ELF::R_ARM_PLT32; break; default: @@ -223,22 +223,22 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_ARM_NONE: Type = ELF::R_ARM_NONE; break; - case MCSymbolRefExpr::VK_ARM_GOT: + case MCSymbolRefExpr::VK_GOT: Type = ELF::R_ARM_GOT_BREL; break; - case MCSymbolRefExpr::VK_ARM_TLSGD: + case MCSymbolRefExpr::VK_TLSGD: Type = ELF::R_ARM_TLS_GD32; break; - case MCSymbolRefExpr::VK_ARM_TPOFF: + case MCSymbolRefExpr::VK_TPOFF: Type = ELF::R_ARM_TLS_LE32; break; - case MCSymbolRefExpr::VK_ARM_GOTTPOFF: + case MCSymbolRefExpr::VK_GOTTPOFF: Type = ELF::R_ARM_TLS_IE32; break; case MCSymbolRefExpr::VK_None: Type = ELF::R_ARM_ABS32; break; - case MCSymbolRefExpr::VK_ARM_GOTOFF: + case MCSymbolRefExpr::VK_GOTOFF: Type = ELF::R_ARM_GOTOFF32; break; case MCSymbolRefExpr::VK_ARM_TARGET1: diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 471897de5c1c..05b4e2e22a8e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -14,13 +14,16 @@ //===----------------------------------------------------------------------===// #include "ARMBuildAttrs.h" +#include "ARMArchName.h" #include "ARMFPUName.h" #include "ARMRegisterInfo.h" #include "ARMUnwindOp.h" #include "ARMUnwindOpAsm.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" @@ -61,6 +64,45 @@ static const char *GetFPUName(unsigned ID) { return NULL; } +static const char *GetArchName(unsigned ID) { + switch (ID) { + default: + llvm_unreachable("Unknown ARCH kind"); + break; +#define ARM_ARCH_NAME(NAME, ID, DEFAULT_CPU_NAME, DEFAULT_CPU_ARCH) \ + case ARM::ID: return NAME; +#define ARM_ARCH_ALIAS(NAME, ID) /* empty */ +#include "ARMArchName.def" + } + return NULL; +} + +static const char *GetArchDefaultCPUName(unsigned ID) { + switch (ID) { + default: + llvm_unreachable("Unknown ARCH kind"); + break; +#define ARM_ARCH_NAME(NAME, ID, DEFAULT_CPU_NAME, DEFAULT_CPU_ARCH) \ + case ARM::ID: return DEFAULT_CPU_NAME; +#define ARM_ARCH_ALIAS(NAME, ID) /* empty */ +#include "ARMArchName.def" + } + return NULL; +} + +static unsigned GetArchDefaultCPUArch(unsigned ID) { + switch (ID) { + default: + llvm_unreachable("Unknown ARCH kind"); + break; +#define ARM_ARCH_NAME(NAME, ID, DEFAULT_CPU_NAME, DEFAULT_CPU_ARCH) \ + case ARM::ID: return ARMBuildAttrs::DEFAULT_CPU_ARCH; +#define ARM_ARCH_ALIAS(NAME, ID) /* empty */ +#include "ARMArchName.def" + } + return 0; +} + namespace { class ARMELFStreamer; @@ -82,7 +124,9 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer { virtual void switchVendor(StringRef Vendor); virtual void emitAttribute(unsigned Attribute, unsigned Value); virtual void emitTextAttribute(unsigned Attribute, StringRef String); + virtual void emitArch(unsigned Arch); virtual void emitFPU(unsigned FPU); + virtual void emitInst(uint32_t Inst, char Suffix = '\0'); virtual void finishAttributeSection(); public: @@ -143,12 +187,22 @@ void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute, break; } } +void ARMTargetAsmStreamer::emitArch(unsigned Arch) { + OS << "\t.arch\t" << GetArchName(Arch) << "\n"; +} void ARMTargetAsmStreamer::emitFPU(unsigned FPU) { OS << "\t.fpu\t" << GetFPUName(FPU) << "\n"; } void ARMTargetAsmStreamer::finishAttributeSection() { } +void ARMTargetAsmStreamer::emitInst(uint32_t Inst, char Suffix) { + OS << "\t.inst"; + if (Suffix) + OS << "." << Suffix; + OS << "\t0x" << utohexstr(Inst) << "\n"; +} + class ARMTargetELFStreamer : public ARMTargetStreamer { private: // This structure holds all attributes, accounting for @@ -171,6 +225,7 @@ private: StringRef CurrentVendor; unsigned FPU; + unsigned Arch; SmallVector Contents; const MCSection *AttributeSection; @@ -233,6 +288,7 @@ private: Contents.push_back(Item); } + void emitArchDefaultAttributes(); void emitFPUDefaultAttributes(); ARMELFStreamer &getStreamer(); @@ -250,7 +306,9 @@ private: virtual void switchVendor(StringRef Vendor); virtual void emitAttribute(unsigned Attribute, unsigned Value); virtual void emitTextAttribute(unsigned Attribute, StringRef String); + virtual void emitArch(unsigned Arch); virtual void emitFPU(unsigned FPU); + virtual void emitInst(uint32_t Inst, char Suffix = '\0'); virtual void finishAttributeSection(); size_t calculateContentSize() const; @@ -258,7 +316,7 @@ private: public: ARMTargetELFStreamer() : ARMTargetStreamer(), CurrentVendor("aeabi"), FPU(ARM::INVALID_FPU), - AttributeSection(0) { + Arch(ARM::INVALID_ARCH), AttributeSection(0) { } }; @@ -323,6 +381,44 @@ public: MCELFStreamer::EmitInstruction(Inst); } + virtual void emitInst(uint32_t Inst, char Suffix) { + unsigned Size; + char Buffer[4]; + const bool LittleEndian = getContext().getAsmInfo()->isLittleEndian(); + + switch (Suffix) { + case '\0': + Size = 4; + + assert(!IsThumb); + EmitARMMappingSymbol(); + for (unsigned II = 0, IE = Size; II != IE; II++) { + const unsigned I = LittleEndian ? (Size - II - 1) : II; + Buffer[Size - II - 1] = uint8_t(Inst >> I * CHAR_BIT); + } + + break; + case 'n': + case 'w': + Size = (Suffix == 'n' ? 2 : 4); + + assert(IsThumb); + EmitThumbMappingSymbol(); + for (unsigned II = 0, IE = Size; II != IE; II = II + 2) { + const unsigned I0 = LittleEndian ? II + 0 : (Size - II - 1); + const unsigned I1 = LittleEndian ? II + 1 : (Size - II - 2); + Buffer[Size - II - 2] = uint8_t(Inst >> I0 * CHAR_BIT); + Buffer[Size - II - 1] = uint8_t(Inst >> I1 * CHAR_BIT); + } + + break; + default: + llvm_unreachable("Invalid Suffix"); + } + + MCELFStreamer::EmitBytes(StringRef(Buffer, Size)); + } + /// This is one of the functions used to emit data into an ELF section, so the /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if /// necessary. @@ -491,6 +587,96 @@ void ARMTargetELFStreamer::emitTextAttribute(unsigned Attribute, StringRef Value) { setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true); } +void ARMTargetELFStreamer::emitArch(unsigned Value) { + Arch = Value; +} +void ARMTargetELFStreamer::emitArchDefaultAttributes() { + using namespace ARMBuildAttrs; + setAttributeItem(CPU_name, GetArchDefaultCPUName(Arch), false); + setAttributeItem(CPU_arch, GetArchDefaultCPUArch(Arch), false); + + switch (Arch) { + case ARM::ARMV2: + case ARM::ARMV2A: + case ARM::ARMV3: + case ARM::ARMV3M: + case ARM::ARMV4: + case ARM::ARMV5: + setAttributeItem(ARM_ISA_use, Allowed, false); + break; + + case ARM::ARMV4T: + case ARM::ARMV5T: + case ARM::ARMV5TE: + case ARM::ARMV6: + case ARM::ARMV6J: + setAttributeItem(ARM_ISA_use, Allowed, false); + setAttributeItem(THUMB_ISA_use, Allowed, false); + break; + + case ARM::ARMV6T2: + setAttributeItem(ARM_ISA_use, Allowed, false); + setAttributeItem(THUMB_ISA_use, AllowThumb32, false); + break; + + case ARM::ARMV6Z: + case ARM::ARMV6ZK: + setAttributeItem(ARM_ISA_use, Allowed, false); + setAttributeItem(THUMB_ISA_use, Allowed, false); + setAttributeItem(Virtualization_use, AllowTZ, false); + break; + + case ARM::ARMV6M: + setAttributeItem(CPU_arch_profile, MicroControllerProfile, false); + setAttributeItem(THUMB_ISA_use, Allowed, false); + break; + + case ARM::ARMV7: + setAttributeItem(THUMB_ISA_use, AllowThumb32, false); + break; + + case ARM::ARMV7A: + setAttributeItem(CPU_arch_profile, ApplicationProfile, false); + setAttributeItem(ARM_ISA_use, Allowed, false); + setAttributeItem(THUMB_ISA_use, AllowThumb32, false); + break; + + case ARM::ARMV7R: + setAttributeItem(CPU_arch_profile, RealTimeProfile, false); + setAttributeItem(ARM_ISA_use, Allowed, false); + setAttributeItem(THUMB_ISA_use, AllowThumb32, false); + break; + + case ARM::ARMV7M: + setAttributeItem(CPU_arch_profile, MicroControllerProfile, false); + setAttributeItem(THUMB_ISA_use, AllowThumb32, false); + break; + + case ARM::ARMV8A: + setAttributeItem(CPU_arch_profile, ApplicationProfile, false); + setAttributeItem(ARM_ISA_use, Allowed, false); + setAttributeItem(THUMB_ISA_use, AllowThumb32, false); + setAttributeItem(MPextension_use, Allowed, false); + setAttributeItem(Virtualization_use, AllowTZVirtualization, false); + break; + + case ARM::IWMMXT: + setAttributeItem(ARM_ISA_use, Allowed, false); + setAttributeItem(THUMB_ISA_use, Allowed, false); + setAttributeItem(WMMX_arch, AllowWMMXv1, false); + break; + + case ARM::IWMMXT2: + setAttributeItem(ARM_ISA_use, Allowed, false); + setAttributeItem(THUMB_ISA_use, Allowed, false); + setAttributeItem(WMMX_arch, AllowWMMXv2, false); + break; + + default: + report_fatal_error("Unknown Arch: " + Twine(Arch)); + break; + } +} void ARMTargetELFStreamer::emitFPU(unsigned Value) { FPU = Value; } @@ -498,43 +684,43 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { switch (FPU) { case ARM::VFP: case ARM::VFPV2: - setAttributeItem(ARMBuildAttrs::VFP_arch, + setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv2, /* OverwriteExisting= */ false); break; case ARM::VFPV3: - setAttributeItem(ARMBuildAttrs::VFP_arch, + setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv3A, /* OverwriteExisting= */ false); break; case ARM::VFPV3_D16: - setAttributeItem(ARMBuildAttrs::VFP_arch, + setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv3B, /* OverwriteExisting= */ false); break; case ARM::VFPV4: - setAttributeItem(ARMBuildAttrs::VFP_arch, + setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv4A, /* OverwriteExisting= */ false); break; case ARM::VFPV4_D16: - setAttributeItem(ARMBuildAttrs::VFP_arch, + setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv4B, /* OverwriteExisting= */ false); break; case ARM::FP_ARMV8: - setAttributeItem(ARMBuildAttrs::VFP_arch, + setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPARMv8A, /* OverwriteExisting= */ false); break; case ARM::NEON: - setAttributeItem(ARMBuildAttrs::VFP_arch, + setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv3A, /* OverwriteExisting= */ false); setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch, @@ -543,7 +729,7 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { break; case ARM::NEON_VFPV4: - setAttributeItem(ARMBuildAttrs::VFP_arch, + setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv4A, /* OverwriteExisting= */ false); setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch, @@ -553,7 +739,7 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { case ARM::NEON_FP_ARMV8: case ARM::CRYPTO_NEON_FP_ARMV8: - setAttributeItem(ARMBuildAttrs::VFP_arch, + setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPARMv8A, /* OverwriteExisting= */ false); setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch, @@ -561,6 +747,9 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { /* OverwriteExisting= */ false); break; + case ARM::SOFTVFP: + break; + default: report_fatal_error("Unknown FPU: " + Twine(FPU)); break; @@ -597,6 +786,9 @@ void ARMTargetELFStreamer::finishAttributeSection() { if (FPU != ARM::INVALID_FPU) emitFPUDefaultAttributes(); + if (Arch != ARM::INVALID_ARCH) + emitArchDefaultAttributes(); + if (Contents.empty()) return; @@ -654,6 +846,9 @@ void ARMTargetELFStreamer::finishAttributeSection() { Contents.clear(); FPU = ARM::INVALID_FPU; } +void ARMTargetELFStreamer::emitInst(uint32_t Inst, char Suffix) { + getStreamer().emitInst(Inst, Suffix); +} void ARMELFStreamer::FinishImpl() { MCTargetStreamer &TS = getTargetStreamer(); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index ad796e660e96..b2a71a87061a 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -45,7 +45,6 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() { Data64bitsDirective = 0; CommentString = "@"; - PrivateGlobalPrefix = ".L"; Code16Directive = ".code\t16"; Code32Directive = ".code\t32"; @@ -55,4 +54,7 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() { // Exceptions handling if (EnableARMEHABI) ExceptionsType = ExceptionHandling::ARM; + + // foo(plt) instead of foo@plt + UseParensForSymbolVariant = true; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index a99de0e78230..400dab71486f 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -89,14 +89,11 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { unsigned Idx = 0; // FIXME: Enhance Triple helper class to extract ARM version. - bool isThumb = false; + bool isThumb = triple.getArch() == Triple::thumb; if (Len >= 5 && TT.substr(0, 4) == "armv") Idx = 4; - else if (Len >= 6 && TT.substr(0, 5) == "thumb") { - isThumb = true; - if (Len >= 7 && TT[5] == 'v') - Idx = 6; - } + else if (Len >= 7 && TT.substr(0, 6) == "thumbv") + Idx = 6; bool NoCPU = CPU == "generic" || CPU.empty(); std::string ARMArchFeature; @@ -268,7 +265,7 @@ static MCInstPrinter *createARMMCInstPrinter(const Target &T, static MCRelocationInfo *createARMMCRelocationInfo(StringRef TT, MCContext &Ctx) { Triple TheTriple(TT); - if (TheTriple.isEnvironmentMachO()) + if (TheTriple.isOSBinFormatMachO()) return createARMMachORelocationInfo(Ctx); // Default to the stock relocation info. return llvm::createMCRelocationInfo(TT, Ctx); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 1f681bac2242..4ef18b4298b3 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -82,10 +82,14 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, Log2Size = llvm::Log2_32(8); return true; - // Handle 24-bit branch kinds. + // These fixups are expected to always be resolvable at assembly time and + // have no relocations supported. case ARM::fixup_arm_ldst_pcrel_12: case ARM::fixup_arm_pcrel_10: case ARM::fixup_arm_adr_pcrel_12: + return false; + + // Handle 24-bit branch kinds. case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: case ARM::fixup_arm_uncondbl: diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt index bab59f41c989..162de7d21e21 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt @@ -11,7 +11,3 @@ add_llvm_library(LLVMARMDesc ARMUnwindOpAsm.cpp ARMMachORelocationInfo.cpp ) -add_dependencies(LLVMARMDesc ARMCommonTableGen) - -# Hack: we need to include 'main' target directory to grab private headers -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/TargetInfo/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/ARM/TargetInfo/CMakeLists.txt index 533e747894ca..03393a0102c1 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/TargetInfo/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/TargetInfo/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMARMInfo ARMTargetInfo.cpp ) - -add_dependencies(LLVMARMInfo ARMCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/TargetInfo/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/ARM/TargetInfo/LLVMBuild.txt index a07a94047d4e..cce6cc769559 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/TargetInfo/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/TargetInfo/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = ARMInfo parent = ARM -required_libraries = MC Support Target +required_libraries = Support add_to_library_groups = ARM diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/external/bsd/llvm/dist/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index cfb33f5b8212..0a05c0b84953 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -165,7 +165,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { NumBytes = DPRCSOffset; int FramePtrOffsetInBlock = 0; - if (tryFoldSPUpdateIntoPushPop(MF, prior(MBBI), NumBytes)) { + if (tryFoldSPUpdateIntoPushPop(STI, MF, prior(MBBI), NumBytes)) { FramePtrOffsetInBlock = NumBytes; NumBytes = 0; } @@ -291,9 +291,9 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, &MBB.front() != MBBI && prior(MBBI)->getOpcode() == ARM::tPOP) { MachineBasicBlock::iterator PMBBI = prior(MBBI); - if (!tryFoldSPUpdateIntoPushPop(MF, PMBBI, NumBytes)) + if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes)) emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes); - } else if (!tryFoldSPUpdateIntoPushPop(MF, MBBI, NumBytes)) + } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, MBBI, NumBytes)) emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); } } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/external/bsd/llvm/dist/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp index 4795aae12fd6..3ef822d94fc2 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -1012,8 +1012,7 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { AttributeSet FnAttrs = MF.getFunction()->getAttributes(); OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); - MinimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::MinSize); + MinimizeSize = STI->isMinSize(); BlockInfo.clear(); BlockInfo.resize(MF.getNumBlockIDs()); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/CppBackend/CPPBackend.cpp b/external/bsd/llvm/dist/llvm/lib/Target/CppBackend/CPPBackend.cpp index ddc7a66c9f39..c290f70e9e4c 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/CppBackend/CPPBackend.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/CppBackend/CPPBackend.cpp @@ -361,25 +361,25 @@ void CppWriter::printEscapedString(const std::string &Str) { } std::string CppWriter::getCppName(Type* Ty) { - // First, handle the primitive types .. easy - if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { - switch (Ty->getTypeID()) { - case Type::VoidTyID: return "Type::getVoidTy(mod->getContext())"; - case Type::IntegerTyID: { - unsigned BitWidth = cast(Ty)->getBitWidth(); - return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")"; - } - case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())"; - case Type::FloatTyID: return "Type::getFloatTy(mod->getContext())"; - case Type::DoubleTyID: return "Type::getDoubleTy(mod->getContext())"; - case Type::LabelTyID: return "Type::getLabelTy(mod->getContext())"; - case Type::X86_MMXTyID: return "Type::getX86_MMXTy(mod->getContext())"; - default: - error("Invalid primitive type"); - break; - } - // shouldn't be returned, but make it sensible + switch (Ty->getTypeID()) { + default: + break; + case Type::VoidTyID: return "Type::getVoidTy(mod->getContext())"; + case Type::IntegerTyID: { + unsigned BitWidth = cast(Ty)->getBitWidth(); + return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")"; + } + case Type::X86_FP80TyID: + return "Type::getX86_FP80Ty(mod->getContext())"; + case Type::FloatTyID: + return "Type::getFloatTy(mod->getContext())"; + case Type::DoubleTyID: + return "Type::getDoubleTy(mod->getContext())"; + case Type::LabelTyID: + return "Type::getLabelTy(mod->getContext())"; + case Type::X86_MMXTyID: + return "Type::getX86_MMXTy(mod->getContext())"; } // Now, see if we've seen the type before and return that @@ -491,6 +491,7 @@ void CppWriter::printAttributes(const AttributeSet &PAL, HANDLE_ATTR(NoUnwind); HANDLE_ATTR(NoAlias); HANDLE_ATTR(ByVal); + HANDLE_ATTR(InAlloca); HANDLE_ATTR(Nest); HANDLE_ATTR(ReadNone); HANDLE_ATTR(ReadOnly); @@ -537,7 +538,8 @@ void CppWriter::printAttributes(const AttributeSet &PAL, void CppWriter::printType(Type* Ty) { // We don't print definitions for primitive types - if (Ty->isPrimitiveType() || Ty->isIntegerTy()) + if (Ty->isFloatingPointTy() || Ty->isX86_MMXTy() || Ty->isIntegerTy() || + Ty->isLabelTy() || Ty->isMetadataTy() || Ty->isVoidTy()) return; // If we already defined this type, we don't need to define it again. diff --git a/external/bsd/llvm/dist/llvm/lib/Target/CppBackend/TargetInfo/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/CppBackend/TargetInfo/CMakeLists.txt index f82d72e378cb..d86446f6bc02 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/CppBackend/TargetInfo/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/CppBackend/TargetInfo/CMakeLists.txt @@ -1,5 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMCppBackendInfo CppBackendTargetInfo.cpp ) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp index 1ca74a4895c4..096dc7350db0 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp @@ -14,9 +14,10 @@ using namespace llvm; Target llvm::TheCppBackendTarget; -static unsigned CppBackend_TripleMatchQuality(const std::string &TT) { - // This class always works, but shouldn't be the default in most cases. - return 1; +static bool CppBackend_TripleMatchQuality(Triple::ArchType Arch) { + // This backend doesn't correspond to any architecture. It must be explicitly + // selected with -march. + return false; } extern "C" void LLVMInitializeCppBackendTargetInfo() { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt index d4dfc3ef0406..9c186a52f4fa 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = CppBackendInfo parent = CppBackend -required_libraries = MC Support Target +required_libraries = Support add_to_library_groups = CppBackend diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/CMakeLists.txt index ae3c9ebc2555..81b0e5680547 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/CMakeLists.txt @@ -37,8 +37,6 @@ add_llvm_target(HexagonCodeGen HexagonCopyToCombine.cpp ) -add_dependencies(LLVMHexagonCodeGen HexagonCommonTableGen intrinsics_gen) - add_subdirectory(TargetInfo) add_subdirectory(InstPrinter) add_subdirectory(MCTargetDesc) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/Hexagon.td b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/Hexagon.td index 568798c3a412..c1b6d45ce899 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/Hexagon.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/Hexagon.td @@ -205,14 +205,6 @@ def : Proc<"hexagonv3", HexagonModel, [ArchV2, ArchV3]>; def : Proc<"hexagonv4", HexagonModelV4, [ArchV2, ArchV3, ArchV4]>; def : Proc<"hexagonv5", HexagonModelV4, [ArchV2, ArchV3, ArchV4, ArchV5]>; - -// Hexagon Uses the MC printer for assembler output, so make sure the TableGen -// AsmWriter bits get associated with the correct class. -def HexagonAsmWriter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - bit isMCAsmWriter = 1; -} - //===----------------------------------------------------------------------===// // Declare the target which we are implementing //===----------------------------------------------------------------------===// @@ -220,6 +212,4 @@ def HexagonAsmWriter : AsmWriter { def Hexagon : Target { // Pull in Instruction Info: let InstructionSet = HexagonInstrInfo; - - let AssemblyWriters = [HexagonAsmWriter]; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp index a2e04baea76a..df59ec719024 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -61,17 +61,6 @@ static cl::opt AlignCalls( "hexagon-align-calls", cl::Hidden, cl::init(true), cl::desc("Insert falign after call instruction for Hexagon target")); -void HexagonAsmPrinter::EmitAlignment(unsigned NumBits, - const GlobalValue *GV) const { - // For basic block level alignment, use ".falign". - if (!GV) { - OutStreamer.EmitRawText(StringRef("\t.falign")); - return; - } - - AsmPrinter::EmitAlignment(NumBits, GV); -} - void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(OpNo); @@ -87,16 +76,9 @@ void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, case MachineOperand::MO_MachineBasicBlock: O << *MO.getMBB()->getSymbol(); return; - case MachineOperand::MO_JumpTableIndex: - O << *GetJTISymbol(MO.getIndex()); - // FIXME: PIC relocation model. - return; case MachineOperand::MO_ConstantPoolIndex: O << *GetCPISymbol(MO.getIndex()); return; - case MachineOperand::MO_ExternalSymbol: - O << *GetExternalSymbolSymbol(MO.getSymbolName()); - return; case MachineOperand::MO_GlobalAddress: // Computing the address of a global symbol, not calling it. O << *getSymbol(MO.getGlobal()); @@ -186,12 +168,6 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } -void HexagonAsmPrinter::printPredicateOperand(const MachineInstr *MI, - unsigned OpNo, - raw_ostream &O) { - llvm_unreachable("Unimplemented"); -} - /// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to /// the current output stream. @@ -240,60 +216,6 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } -/// PrintUnmangledNameSafely - Print out the printable characters in the name. -/// Don't print things like \n or \0. -// static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) { -// for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen(); -// Name != E; ++Name) -// if (isprint(*Name)) -// OS << *Name; -// } - - -void HexagonAsmPrinter::printAddrModeBasePlusOffset(const MachineInstr *MI, - int OpNo, raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(OpNo); - const MachineOperand &MO2 = MI->getOperand(OpNo+1); - - O << HexagonInstPrinter::getRegisterName(MO1.getReg()) - << " + #" - << MO2.getImm(); -} - - -void HexagonAsmPrinter::printGlobalOperand(const MachineInstr *MI, int OpNo, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(OpNo); - assert( (MO.getType() == MachineOperand::MO_GlobalAddress) && - "Expecting global address"); - - O << *getSymbol(MO.getGlobal()); - if (MO.getOffset() != 0) { - O << " + "; - O << MO.getOffset(); - } -} - -void HexagonAsmPrinter::printJumpTable(const MachineInstr *MI, int OpNo, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(OpNo); - assert( (MO.getType() == MachineOperand::MO_JumpTableIndex) && - "Expecting jump table index"); - - // Hexagon_TODO: Do we need name mangling? - O << *GetJTISymbol(MO.getIndex()); -} - -void HexagonAsmPrinter::printConstantPool(const MachineInstr *MI, int OpNo, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(OpNo); - assert( (MO.getType() == MachineOperand::MO_ConstantPoolIndex) && - "Expecting constant pool index"); - - // Hexagon_TODO: Do we need name mangling? - O << *GetCPISymbol(MO.getIndex()); -} - static MCInstPrinter *createHexagonMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h index bc2af636124c..a186dc9c733e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h @@ -37,8 +37,6 @@ namespace llvm { bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; virtual void EmitInstruction(const MachineInstr *MI); - virtual void EmitAlignment(unsigned NumBits, - const GlobalValue *GV = 0) const; void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, @@ -48,116 +46,7 @@ namespace llvm { unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS); - /// printInstruction - This method is automatically generated by tablegen - /// from the instruction set description. This method returns true if the - /// machine instruction was sufficiently described to print it, otherwise it - /// returns false. - void printInstruction(const MachineInstr *MI, raw_ostream &O); - - // void printMachineInstruction(const MachineInstr *MI); - void printOp(const MachineOperand &MO, raw_ostream &O); - - /// printRegister - Print register according to target requirements. - /// - void printRegister(const MachineOperand &MO, bool R0AsZero, - raw_ostream &O) { - unsigned RegNo = MO.getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??"); - O << getRegisterName(RegNo); - } - - void printImmOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - int value = MI->getOperand(OpNo).getImm(); - O << value; - } - - void printNegImmOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - int value = MI->getOperand(OpNo).getImm(); - O << -value; - } - - void printMEMriOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(OpNo); - const MachineOperand &MO2 = MI->getOperand(OpNo+1); - - O << getRegisterName(MO1.getReg()) - << " + #" - << (int) MO2.getImm(); - } - - void printFrameIndexOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(OpNo); - const MachineOperand &MO2 = MI->getOperand(OpNo+1); - - O << getRegisterName(MO1.getReg()) - << ", #" - << MO2.getImm(); - } - - void printBranchOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - // Branches can take an immediate operand. This is used by the branch - // selection pass to print $+8, an eight byte displacement from the PC. - if (MI->getOperand(OpNo).isImm()) { - O << "$+" << MI->getOperand(OpNo).getImm()*4; - } else { - printOp(MI->getOperand(OpNo), O); - } - } - - void printCallOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - } - - void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - } - - void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - O << "#HI("; - if (MI->getOperand(OpNo).isImm()) { - printImmOperand(MI, OpNo, O); - } - else { - printOp(MI->getOperand(OpNo), O); - } - O << ")"; - } - - void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - O << "#HI("; - if (MI->getOperand(OpNo).isImm()) { - printImmOperand(MI, OpNo, O); - } - else { - printOp(MI->getOperand(OpNo), O); - } - O << ")"; - } - - void printPredicateOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O); - -#if 0 - void printModuleLevelGV(const GlobalVariable* GVar, raw_ostream &O); -#endif - - void printAddrModeBasePlusOffset(const MachineInstr *MI, int OpNo, - raw_ostream &O); - - void printGlobalOperand(const MachineInstr *MI, int OpNo, raw_ostream &O); - void printJumpTable(const MachineInstr *MI, int OpNo, raw_ostream &O); - void printConstantPool(const MachineInstr *MI, int OpNo, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo); - -#if 0 - void EmitStartOfAsmFile(Module &M); -#endif }; } // end of llvm namespace diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 5ae93284269b..7a345b6937e9 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -1639,7 +1639,7 @@ bool HexagonDAGToDAGISel::hasNumUsesBelowThresGA(SDNode *N) const { } //===--------------------------------------------------------------------===// -// Return true if the non GP-relative global address can be folded. +// Return true if the non-GP-relative global address can be folded. //===--------------------------------------------------------------------===// inline bool HexagonDAGToDAGISel::foldGlobalAddress(SDValue &N, SDValue &R) { return foldGlobalAddressImpl(N, R, false); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 6b97609415a3..f9be3192f1fa 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1539,7 +1539,7 @@ int HexagonInstrInfo::GetDotOldOp(const int opc) const { assert(0 && "Couldn't change predicate new instruction to its old form."); } - if (isNewValueStore(NewOp)) { // Convert into non new-value format + if (isNewValueStore(NewOp)) { // Convert into non-new-value format NewOp = Hexagon::getNonNVStore(NewOp); if (NewOp < 0) assert(0 && "Couldn't change new-value store to its old form."); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td index 475c23d98bf7..d2600dffb08a 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -1016,7 +1016,7 @@ class NVJrr_template majOp, bit NvOpNum, bits<5> src1; bits<5> src2; bits<3> Ns; // New-Value Operand - bits<5> RegOp; // Non New-Value Operand + bits<5> RegOp; // Non-New-Value Operand bits<11> offset; let isBrTaken = !if(isTaken, "true", "false"); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp index c94f081ab13b..98aeabb800be 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -186,6 +186,9 @@ void VLIWMachineScheduler::schedule() { scheduleMI(SU, IsTopNode); updateQueues(SU, IsTopNode); + + // Notify the scheduling strategy after updating the DAG. + SchedImpl->schedNode(SU, IsTopNode); } assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); @@ -266,7 +269,7 @@ void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { /// can dispatch per cycle. /// /// TODO: Also check whether the SU must start a new group. -bool ConvergingVLIWScheduler::SchedBoundary::checkHazard(SUnit *SU) { +bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) { if (HazardRec->isEnabled()) return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; @@ -277,7 +280,7 @@ bool ConvergingVLIWScheduler::SchedBoundary::checkHazard(SUnit *SU) { return false; } -void ConvergingVLIWScheduler::SchedBoundary::releaseNode(SUnit *SU, +void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; @@ -292,7 +295,7 @@ void ConvergingVLIWScheduler::SchedBoundary::releaseNode(SUnit *SU, } /// Move the boundary of scheduled code by one cycle. -void ConvergingVLIWScheduler::SchedBoundary::bumpCycle() { +void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() { unsigned Width = SchedModel->getIssueWidth(); IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; @@ -318,7 +321,7 @@ void ConvergingVLIWScheduler::SchedBoundary::bumpCycle() { } /// Move the boundary of scheduled code by one SUnit. -void ConvergingVLIWScheduler::SchedBoundary::bumpNode(SUnit *SU) { +void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) { bool startNewCycle = false; // Update the reservation table. @@ -348,7 +351,7 @@ void ConvergingVLIWScheduler::SchedBoundary::bumpNode(SUnit *SU) { /// Release pending ready nodes in to the available queue. This makes them /// visible to heuristics. -void ConvergingVLIWScheduler::SchedBoundary::releasePending() { +void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() { // If the available queue is empty, it is safe to reset MinReadyCycle. if (Available.empty()) MinReadyCycle = UINT_MAX; @@ -376,7 +379,7 @@ void ConvergingVLIWScheduler::SchedBoundary::releasePending() { } /// Remove SU from the ready set for this boundary. -void ConvergingVLIWScheduler::SchedBoundary::removeReady(SUnit *SU) { +void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) { if (Available.isInQueue(SU)) Available.remove(Available.find(SU)); else { @@ -388,7 +391,7 @@ void ConvergingVLIWScheduler::SchedBoundary::removeReady(SUnit *SU) { /// If this queue only has one ready candidate, return it. As a side effect, /// advance the cycle until at least one node is ready. If multiple instructions /// are ready, return NULL. -SUnit *ConvergingVLIWScheduler::SchedBoundary::pickOnlyChoice() { +SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() { if (CheckPending) releasePending(); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h index 8ac333fa7db3..8106a205a490 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -92,14 +92,14 @@ VLIWResourceModel(const TargetMachine &TM, const TargetSchedModel *SM) : /// Extend the standard ScheduleDAGMI to provide more context and override the /// top-level schedule() driver. -class VLIWMachineScheduler : public ScheduleDAGMI { +class VLIWMachineScheduler : public ScheduleDAGMILive { public: VLIWMachineScheduler(MachineSchedContext *C, MachineSchedStrategy *S): - ScheduleDAGMI(C, S) {} + ScheduleDAGMILive(C, S) {} /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's /// time to do some work. - virtual void schedule(); + virtual void schedule() LLVM_OVERRIDE; /// Perform platform specific DAG postprocessing. void postprocessDAG(); }; @@ -130,7 +130,7 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy { /// Each Scheduling boundary is associated with ready queues. It tracks the /// current cycle in whichever direction at has moved, and maintains the state /// of "hazards" and other interlocks at the current cycle. - struct SchedBoundary { + struct VLIWSchedBoundary { VLIWMachineScheduler *DAG; const TargetSchedModel *SchedModel; @@ -152,14 +152,14 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy { /// Pending queues extend the ready queues with the same ID and the /// PendingFlag set. - SchedBoundary(unsigned ID, const Twine &Name): + VLIWSchedBoundary(unsigned ID, const Twine &Name): DAG(0), SchedModel(0), Available(ID, Name+".A"), Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name+".P"), CheckPending(false), HazardRec(0), ResourceModel(0), CurrCycle(0), IssueCount(0), MinReadyCycle(UINT_MAX), MaxMinLatency(0) {} - ~SchedBoundary() { + ~VLIWSchedBoundary() { delete ResourceModel; delete HazardRec; } @@ -192,8 +192,8 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy { const TargetSchedModel *SchedModel; // State of the top and bottom scheduled instruction boundaries. - SchedBoundary Top; - SchedBoundary Bot; + VLIWSchedBoundary Top; + VLIWSchedBoundary Bot; public: /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) @@ -206,15 +206,15 @@ public: ConvergingVLIWScheduler(): DAG(0), SchedModel(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} - virtual void initialize(ScheduleDAGMI *dag); + virtual void initialize(ScheduleDAGMI *dag) LLVM_OVERRIDE; - virtual SUnit *pickNode(bool &IsTopNode); + virtual SUnit *pickNode(bool &IsTopNode) LLVM_OVERRIDE; - virtual void schedNode(SUnit *SU, bool IsTopNode); + virtual void schedNode(SUnit *SU, bool IsTopNode) LLVM_OVERRIDE; - virtual void releaseTopNode(SUnit *SU); + virtual void releaseTopNode(SUnit *SU) LLVM_OVERRIDE; - virtual void releaseBottomNode(SUnit *SU); + virtual void releaseBottomNode(SUnit *SU) LLVM_OVERRIDE; unsigned ReportPackets() { return Top.ResourceModel->getTotalPackets() + diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp index 44234e85dc69..552cac22eefb 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp @@ -42,6 +42,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addPreserved(); + AU.addPreserved("stack-protector"); FunctionPass::getAnalysisUsage(AU); } }; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index bb950a0ea75a..09e6e1afad20 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -71,9 +71,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, CodeModel::Model CM, CodeGenOpt::Level OL) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - DL("e-p:32:32:32-" - "i64:64:64-i32:32:32-i16:16:16-i1:32:32-" - "f64:64:64-f32:32:32-a0:0-n32") , + DL("e-m:e-p:32:32-i1:32-i64:64-a:0-n32") , Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget), diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 41e382dc072a..697419be6e43 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -681,7 +681,7 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI, } } - // Make sure that for non POST_INC stores: + // Make sure that for non-POST_INC stores: // 1. The only use of reg is DepReg and no other registers. // This handles V4 base+index registers. // The following store can not be dot new. diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonVarargsCallingConvention.h b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonVarargsCallingConvention.h index c607b5d35649..668ca98402b2 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonVarargsCallingConvention.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/HexagonVarargsCallingConvention.h @@ -41,7 +41,7 @@ static bool CC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT, } - // Only assign registers for named (non varargs) arguments + // Only assign registers for named (non-varargs) arguments if ( !ForceMem && ((NonVarArgsParams == -1) || (CurrentParam <= NonVarArgsParams))) { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/InstPrinter/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/InstPrinter/CMakeLists.txt index cb106a884432..1ddaf9bac203 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/InstPrinter/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/InstPrinter/CMakeLists.txt @@ -1,5 +1,3 @@ add_llvm_library(LLVMHexagonAsmPrinter HexagonInstPrinter.cpp ) - -add_dependencies(LLVMHexagonAsmPrinter HexagonCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/LLVMBuild.txt index c6d419a91058..0cf9a062b659 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/LLVMBuild.txt @@ -28,5 +28,5 @@ has_asmprinter = 1 type = Library name = HexagonCodeGen parent = Hexagon -required_libraries = AsmPrinter CodeGen Core HexagonAsmPrinter HexagonDesc HexagonInfo MC SelectionDAG Support Target +required_libraries = Analysis AsmPrinter CodeGen Core HexagonAsmPrinter HexagonDesc HexagonInfo MC Scalar SelectionDAG Support Target TransformUtils add_to_library_groups = Hexagon diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt index 62b9b600ce8e..eeef3ef8c200 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt @@ -3,5 +3,3 @@ add_llvm_library(LLVMHexagonDesc HexagonMCInst.cpp HexagonMCTargetDesc.cpp ) - -add_dependencies(LLVMHexagonDesc HexagonCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp index 3f9415b94df9..f1a65c3f5063 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp @@ -26,7 +26,6 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(StringRef TT) { CommentString = "//"; HasLEB128 = true; - PrivateGlobalPrefix = ".L"; LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment; InlineAsmStart = "# InlineAsm Start"; InlineAsmEnd = "# InlineAsm End"; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/TargetInfo/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/TargetInfo/CMakeLists.txt index 5b04a30d26c2..b9411f6925d2 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/TargetInfo/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/TargetInfo/CMakeLists.txt @@ -1,8 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMHexagonInfo HexagonTargetInfo.cpp ) - -add_dependencies(LLVMHexagonInfo HexagonCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt index 7b87be3e05a8..095a4b003264 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = HexagonInfo parent = Hexagon -required_libraries = MC Support +required_libraries = Support add_to_library_groups = Hexagon diff --git a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/CMakeLists.txt index c9b3c3d0c8e3..a8f9b52746ad 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/CMakeLists.txt @@ -23,8 +23,6 @@ add_llvm_target(MSP430CodeGen MSP430MCInstLower.cpp ) -add_dependencies(LLVMMSP430CodeGen MSP430CommonTableGen intrinsics_gen) - add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt index 64ac994b7f47..580a9ce71d9d 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/InstPrinter/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMMSP430AsmPrinter MSP430InstPrinter.cpp ) - -add_dependencies(LLVMMSP430AsmPrinter MSP430CommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt index adc95c52014e..0f3ebd303924 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt @@ -2,5 +2,3 @@ add_llvm_library(LLVMMSP430Desc MSP430MCTargetDesc.cpp MSP430MCAsmInfo.cpp ) - -add_dependencies(LLVMMSP430Desc MSP430CommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt index 3319d9363e16..b8f3d02ab4d8 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = MSP430Desc parent = MSP430 -required_libraries = MC MSP430AsmPrinter MSP430Info Support Target +required_libraries = MC MSP430AsmPrinter MSP430Info add_to_library_groups = MSP430 diff --git a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp index acf2ab869d59..df1aa1a41f19 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp @@ -20,7 +20,6 @@ void MSP430MCAsmInfo::anchor() { } MSP430MCAsmInfo::MSP430MCAsmInfo(StringRef TT) { PointerSize = CalleeSaveStackSlotSize = 2; - PrivateGlobalPrefix = ".L"; CommentString = ";"; AlignmentIsInBytes = false; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MSP430.td b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MSP430.td index c6796b3789ad..dfea669f3ba1 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MSP430.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MSP430.td @@ -50,17 +50,11 @@ include "MSP430InstrInfo.td" def MSP430InstrInfo : InstrInfo; -def MSP430InstPrinter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - bit isMCAsmWriter = 1; -} - //===----------------------------------------------------------------------===// // Target Declaration //===----------------------------------------------------------------------===// def MSP430 : Target { let InstructionSet = MSP430InstrInfo; - let AssemblyWriters = [MSP430InstPrinter]; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp index 18311c3f5522..95be0881ed04 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -99,12 +99,6 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum, return; } - case MachineOperand::MO_ExternalSymbol: { - bool isMemOp = Modifier && !strcmp(Modifier, "mem"); - O << (isMemOp ? '&' : '#'); - O << MAI->getGlobalPrefix() << MO.getSymbolName(); - return; - } } } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp index 52f9ee57e2be..f9b7a3ec3d4e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -24,6 +25,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; MCSymbol *MSP430MCInstLower:: @@ -48,8 +50,9 @@ GetExternalSymbolSymbol(const MachineOperand &MO) const { MCSymbol *MSP430MCInstLower:: GetJumpTableSymbol(const MachineOperand &MO) const { + const DataLayout *DL = Printer.TM.getDataLayout(); SmallString<256> Name; - raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI" + raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI" << Printer.getFunctionNumber() << '_' << MO.getIndex(); @@ -64,8 +67,9 @@ GetJumpTableSymbol(const MachineOperand &MO) const { MCSymbol *MSP430MCInstLower:: GetConstantPoolIndexSymbol(const MachineOperand &MO) const { + const DataLayout *DL = Printer.TM.getDataLayout(); SmallString<256> Name; - raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI" + raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "CPI" << Printer.getFunctionNumber() << '_' << MO.getIndex(); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp index 6710a097075b..98a6003fe65b 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -34,7 +34,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS), // FIXME: Check DataLayout string. - DL("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), + DL("e-m:e-p:16:16-i32:16:32-n8:16"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget) { initAsmInfo(); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/TargetInfo/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/TargetInfo/CMakeLists.txt index f6b40eab31b6..fee5f434e79b 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/TargetInfo/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/TargetInfo/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMMSP430Info MSP430TargetInfo.cpp ) - -add_dependencies(LLVMMSP430Info MSP430CommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/TargetInfo/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/TargetInfo/LLVMBuild.txt index deafc2d2f558..ee41ae466744 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/MSP430/TargetInfo/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/MSP430/TargetInfo/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = MSP430Info parent = MSP430 -required_libraries = MC Support Target +required_libraries = Support add_to_library_groups = MSP430 diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mangler.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mangler.cpp index 38be25c330ab..ccff5c839696 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mangler.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mangler.cpp @@ -17,9 +17,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -27,36 +25,26 @@ using namespace llvm; /// and the specified name as the global variable name. GVName must not be /// empty. void Mangler::getNameWithPrefix(SmallVectorImpl &OutName, - const Twine &GVName, ManglerPrefixTy PrefixTy, - bool UseGlobalPrefix) { + const Twine &GVName, ManglerPrefixTy PrefixTy) { SmallString<256> TmpData; StringRef Name = GVName.toStringRef(TmpData); assert(!Name.empty() && "getNameWithPrefix requires non-empty name"); - - const MCAsmInfo *MAI = TM->getMCAsmInfo(); - + // If the global name is not led with \1, add the appropriate prefixes. if (Name[0] == '\1') { Name = Name.substr(1); } else { if (PrefixTy == Mangler::Private) { - const char *Prefix = MAI->getPrivateGlobalPrefix(); + const char *Prefix = DL->getPrivateGlobalPrefix(); OutName.append(Prefix, Prefix+strlen(Prefix)); } else if (PrefixTy == Mangler::LinkerPrivate) { - const char *Prefix = MAI->getLinkerPrivateGlobalPrefix(); + const char *Prefix = DL->getLinkerPrivateGlobalPrefix(); OutName.append(Prefix, Prefix+strlen(Prefix)); } - if (UseGlobalPrefix) { - const char *Prefix = MAI->getGlobalPrefix(); - if (Prefix[0] == 0) - ; // Common noop, no prefix. - else if (Prefix[1] == 0) - OutName.push_back(Prefix[0]); // Common, one character prefix. - else - // Arbitrary length prefix. - OutName.append(Prefix, Prefix+strlen(Prefix)); - } + char Prefix = DL->getGlobalPrefix(); + if (Prefix != '\0') + OutName.push_back(Prefix); } // If this is a simple string that doesn't need escaping, just append it. @@ -88,10 +76,9 @@ static void AddFastCallStdCallSuffix(SmallVectorImpl &OutName, /// and the specified global variable's name. If the global variable doesn't /// have a name, this fills in a unique name for the global. void Mangler::getNameWithPrefix(SmallVectorImpl &OutName, - const GlobalValue *GV, bool isImplicitlyPrivate, - bool UseGlobalPrefix) { + const GlobalValue *GV) { ManglerPrefixTy PrefixTy = Mangler::Default; - if (GV->hasPrivateLinkage() || isImplicitlyPrivate) + if (GV->hasPrivateLinkage()) PrefixTy = Mangler::Private; else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage()) PrefixTy = Mangler::LinkerPrivate; @@ -99,7 +86,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl &OutName, // If this global has a name, handle it simply. if (GV->hasName()) { StringRef Name = GV->getName(); - getNameWithPrefix(OutName, Name, PrefixTy, UseGlobalPrefix); + getNameWithPrefix(OutName, Name, PrefixTy); // No need to do anything else if the global has the special "do not mangle" // flag in the name. if (Name[0] == 1) @@ -111,13 +98,12 @@ void Mangler::getNameWithPrefix(SmallVectorImpl &OutName, if (ID == 0) ID = NextAnonGlobalID++; // Must mangle the global into a unique ID. - getNameWithPrefix(OutName, "__unnamed_" + Twine(ID), PrefixTy, - UseGlobalPrefix); + getNameWithPrefix(OutName, "__unnamed_" + Twine(ID), PrefixTy); } - + // If we are supposed to add a microsoft-style suffix for stdcall/fastcall, // add it. - if (TM->getMCAsmInfo()->hasMicrosoftFastStdCallMangling()) { + if (DL->hasMicrosoftFastStdCallMangling()) { if (const Function *F = dyn_cast(GV)) { CallingConv::ID CC = F->getCallingConv(); @@ -137,7 +123,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl &OutName, // "Pure" variadic functions do not receive @0 suffix. (!FT->isVarArg() || FT->getNumParams() == 0 || (FT->getNumParams() == 1 && F->hasStructRetAttr()))) - AddFastCallStdCallSuffix(OutName, F, *TM->getDataLayout()); + AddFastCallStdCallSuffix(OutName, F, *DL); } } } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/AsmParser/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/Mips/AsmParser/CMakeLists.txt index 28f521910901..f1675560853a 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/AsmParser/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/AsmParser/CMakeLists.txt @@ -1,6 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMMipsAsmParser MipsAsmParser.cpp ) - -add_dependencies(LLVMMipsAsmParser MipsCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index cdae6c2f37e5..cae3999c12bc 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/ADT/APInt.h" +#include "llvm/Support/MathExtras.h" using namespace llvm; @@ -220,6 +221,10 @@ class MipsAsmParser : public MCTargetAsmParser { bool isN64() const { return STI.getFeatureBits() & Mips::FeatureN64; } + bool isMicroMips() const { + return STI.getFeatureBits() & Mips::FeatureMicroMips; + } + int matchRegisterName(StringRef Symbol, bool is64BitReg); int matchCPURegisterName(StringRef Symbol); @@ -563,6 +568,45 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl &Instructions) { const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); Inst.setLoc(IDLoc); + + if (MCID.isBranch() || MCID.isCall()) { + const unsigned Opcode = Inst.getOpcode(); + MCOperand Offset; + + switch (Opcode) { + default: + break; + case Mips::BEQ: + case Mips::BNE: + assert (MCID.getNumOperands() == 3 && "unexpected number of operands"); + Offset = Inst.getOperand(2); + if (!Offset.isImm()) + break; // We'll deal with this situation later on when applying fixups. + if (!isIntN(isMicroMips() ? 17 : 18, Offset.getImm())) + return Error(IDLoc, "branch target out of range"); + if (OffsetToAlignment (Offset.getImm(), 1LL << (isMicroMips() ? 1 : 2))) + return Error(IDLoc, "branch to misaligned address"); + break; + case Mips::BGEZ: + case Mips::BGTZ: + case Mips::BLEZ: + case Mips::BLTZ: + case Mips::BGEZAL: + case Mips::BLTZAL: + case Mips::BC1F: + case Mips::BC1T: + assert (MCID.getNumOperands() == 2 && "unexpected number of operands"); + Offset = Inst.getOperand(1); + if (!Offset.isImm()) + break; // We'll deal with this situation later on when applying fixups. + if (!isIntN(isMicroMips() ? 17 : 18, Offset.getImm())) + return Error(IDLoc, "branch target out of range"); + if (OffsetToAlignment (Offset.getImm(), 1LL << (isMicroMips() ? 1 : 2))) + return Error(IDLoc, "branch to misaligned address"); + break; + } + } + if (MCID.hasDelaySlot() && Options.isReorder()) { // If this instruction has a delay slot and .set reorder is active, // emit a NOP after it. @@ -2308,22 +2352,7 @@ bool MipsAsmParser::parseSetAssignment() { return reportParseError("unexpected token in .set directive"); Lex(); // Eat comma - if (getLexer().is(AsmToken::Dollar)) { - MCSymbol *Symbol; - SMLoc DollarLoc = getLexer().getLoc(); - // Consume the dollar sign, and check for a following identifier. - Parser.Lex(); - // We have a '$' followed by something, make sure they are adjacent. - if (DollarLoc.getPointer() + 1 != getTok().getLoc().getPointer()) - return true; - StringRef Res = - StringRef(DollarLoc.getPointer(), - getTok().getEndLoc().getPointer() - DollarLoc.getPointer()); - Symbol = getContext().GetOrCreateSymbol(Res); - Parser.Lex(); - Value = - MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, getContext()); - } else if (Parser.parseExpression(Value)) + if (Parser.parseExpression(Value)) return reportParseError("expected valid expression after comma"); // Check if the Name already exists as a symbol. diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/Mips/CMakeLists.txt index 6acc9a88c06e..a225a00c023b 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/CMakeLists.txt @@ -34,6 +34,7 @@ add_llvm_target(MipsCodeGen MipsMCInstLower.cpp MipsMachineFunction.cpp MipsModuleISelDAGToDAG.cpp + MipsOptimizePICCall.cpp MipsOs16.cpp MipsRegisterInfo.cpp MipsSEFrameLowering.cpp @@ -47,8 +48,6 @@ add_llvm_target(MipsCodeGen MipsSelectionDAGInfo.cpp ) -add_dependencies(LLVMMipsCodeGen MipsCommonTableGen intrinsics_gen) - add_subdirectory(InstPrinter) add_subdirectory(Disassembler) add_subdirectory(TargetInfo) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Disassembler/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Disassembler/CMakeLists.txt index fe1dc75776f1..35b0999d0e5d 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Disassembler/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Disassembler/CMakeLists.txt @@ -1,5 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMMipsDisassembler MipsDisassembler.cpp ) @@ -11,5 +9,3 @@ set_property( PROPERTY COMPILE_FLAGS "/Od" ) endif() - -add_dependencies(LLVMMipsDisassembler MipsCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 60508a8c4fcb..c574f549a18e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -565,7 +565,37 @@ static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn, Inst.addOperand(MCOperand::CreateReg(Reg)); Inst.addOperand(MCOperand::CreateReg(Base)); - Inst.addOperand(MCOperand::CreateImm(Offset)); + + // The immediate field of an LD/ST instruction is scaled which means it must + // be multiplied (when decoding) by the size (in bytes) of the instructions' + // data format. + // .b - 1 byte + // .h - 2 bytes + // .w - 4 bytes + // .d - 8 bytes + switch(Inst.getOpcode()) + { + default: + assert (0 && "Unexpected instruction"); + return MCDisassembler::Fail; + break; + case Mips::LD_B: + case Mips::ST_B: + Inst.addOperand(MCOperand::CreateImm(Offset)); + break; + case Mips::LD_H: + case Mips::ST_H: + Inst.addOperand(MCOperand::CreateImm(Offset << 1)); + break; + case Mips::LD_W: + case Mips::ST_W: + Inst.addOperand(MCOperand::CreateImm(Offset << 2)); + break; + case Mips::LD_D: + case Mips::ST_D: + Inst.addOperand(MCOperand::CreateImm(Offset << 3)); + break; + } return MCDisassembler::Success; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt index 3e9fbf1c5566..2a67fba796a6 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/InstPrinter/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMMipsAsmPrinter MipsInstPrinter.cpp ) - -add_dependencies(LLVMMipsAsmPrinter MipsCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index 78845898997c..82deec1a1e58 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -83,6 +83,27 @@ void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O, case Mips::RDHWR64: O << "\t.set\tpush\n"; O << "\t.set\tmips32r2\n"; + break; + case Mips::Save16: + O << "\tsave\t"; + printSaveRestore(MI, O); + O << " # 16 bit inst\n"; + return; + case Mips::SaveX16: + O << "\tsave\t"; + printSaveRestore(MI, O); + O << "\n"; + return; + case Mips::Restore16: + O << "\trestore\t"; + printSaveRestore(MI, O); + O << " # 16 bit inst\n"; + return; + case Mips::RestoreX16: + O << "\trestore\t"; + printSaveRestore(MI, O); + O << "\n"; + return; } // Try to print any aliases first. @@ -286,3 +307,14 @@ bool MipsInstPrinter::printAlias(const MCInst &MI, raw_ostream &OS) { default: return false; } } + +void MipsInstPrinter::printSaveRestore(const MCInst *MI, raw_ostream &O) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (i != 0) O << ", "; + if (MI->getOperand(i).isReg()) + printRegName(O, MI->getOperand(i).getReg()); + else + printUnsignedImm(MI, i, O); + } +} + diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/external/bsd/llvm/dist/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h index f75ae249c3ee..2b745f028e14 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h @@ -104,6 +104,7 @@ private: bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo0, unsigned OpNo1, raw_ostream &OS); bool printAlias(const MCInst &MI, raw_ostream &OS); + void printSaveRestore(const MCInst *MI, raw_ostream &O); }; } // end namespace llvm diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/Mips/LLVMBuild.txt index a95d6bc1352a..e6d3a426b2ea 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/LLVMBuild.txt @@ -31,5 +31,5 @@ has_jit = 1 type = Library name = MipsCodeGen parent = Mips -required_libraries = AsmPrinter CodeGen Core MC MipsAsmPrinter MipsDesc MipsInfo SelectionDAG Support Target +required_libraries = Analysis AsmPrinter CodeGen Core MC MipsAsmPrinter MipsDesc MipsInfo Scalar SelectionDAG Support Target add_to_library_groups = Mips diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt index 911674890c73..dc20647da20f 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt @@ -7,5 +7,3 @@ add_llvm_library(LLVMMipsDesc MipsReginfo.cpp MipsTargetStreamer.cpp ) - -add_dependencies(LLVMMipsDesc MipsCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 3e70b23dccc6..e8c40048387d 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -16,6 +16,7 @@ #include "MCTargetDesc/MipsMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixupKindInfo.h" @@ -23,11 +24,15 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/MathExtras.h" using namespace llvm; // Prepare value for the target space for it -static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { +static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, + MCContext *Ctx = NULL) { + + unsigned Kind = Fixup.getKind(); // Add/subtract and shift switch (Kind) { @@ -56,8 +61,11 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { // so the displacement will be one instruction size less. Value -= 4; // The displacement is then divided by 4 to give us an 18 bit - // address range. - Value >>= 2; + // address range. Forcing a signed division because Value can be negative. + Value = (int64_t)Value / 4; + // We now check if Value can be encoded as a 16-bit signed immediate. + if (!isIntN(16, Value) && Ctx) + Ctx->FatalError(Fixup.getLoc(), "out of range PC16 fixup"); break; case Mips::fixup_Mips_26: // So far we are only using this type for jumps. @@ -86,7 +94,11 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { break; case Mips::fixup_MICROMIPS_PC16_S1: Value -= 4; - Value >>= 1; + // Forcing a signed division because Value can be negative. + Value = (int64_t)Value / 2; + // We now check if Value can be encoded as a 16-bit signed immediate. + if (!isIntN(16, Value) && Ctx) + Ctx->FatalError(Fixup.getLoc(), "out of range PC16 fixup"); break; } @@ -115,7 +127,7 @@ public: void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { MCFixupKind Kind = Fixup.getKind(); - Value = adjustFixupValue((unsigned)Kind, Value); + Value = adjustFixupValue(Fixup, Value); if (!Value) return; // Doesn't change encoding. @@ -210,6 +222,8 @@ public: { "fixup_MICROMIPS_GOT_DISP", 0, 16, 0 }, { "fixup_MICROMIPS_GOT_PAGE", 0, 16, 0 }, { "fixup_MICROMIPS_GOT_OFST", 0, 16, 0 }, + { "fixup_MICROMIPS_TLS_GD", 0, 16, 0 }, + { "fixup_MICROMIPS_TLS_LDM", 0, 16, 0 }, { "fixup_MICROMIPS_TLS_DTPREL_HI16", 0, 16, 0 }, { "fixup_MICROMIPS_TLS_DTPREL_LO16", 0, 16, 0 }, { "fixup_MICROMIPS_TLS_TPREL_HI16", 0, 16, 0 }, @@ -273,6 +287,20 @@ public: OW->Write32(0); return true; } + + /// processFixupValue - Target hook to process the literal value of a fixup + /// if necessary. + void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFixup &Fixup, const MCFragment *DF, + MCValue &Target, uint64_t &Value, + bool &IsResolved) { + // At this point we'll ignore the value returned by adjustFixupValue as + // we are only checking if the fixup can be applied correctly. We have + // access to MCContext from here which allows us to report a fatal error + // with *possibly* a source code location. + (void)adjustFixupValue(Fixup, Value, &Asm.getContext()); + } + }; // class MipsAsmBackend } // namespace diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 83c7d4bcc3c6..aa9b42337cb1 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -210,6 +210,12 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, case Mips::fixup_MICROMIPS_GOT_OFST: Type = ELF::R_MICROMIPS_GOT_OFST; break; + case Mips::fixup_MICROMIPS_TLS_GD: + Type = ELF::R_MICROMIPS_TLS_GD; + break; + case Mips::fixup_MICROMIPS_TLS_LDM: + Type = ELF::R_MICROMIPS_TLS_LDM; + break; case Mips::fixup_MICROMIPS_TLS_DTPREL_HI16: Type = ELF::R_MICROMIPS_TLS_DTPREL_HI16; break; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h index 6ed44b74cc4b..dc6192c20506 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h @@ -155,6 +155,12 @@ namespace Mips { // resulting in - R_MICROMIPS_GOT_OFST fixup_MICROMIPS_GOT_OFST, + // resulting in - R_MICROMIPS_TLS_GD + fixup_MICROMIPS_TLS_GD, + + // resulting in - R_MICROMIPS_TLS_LDM + fixup_MICROMIPS_TLS_LDM, + // resulting in - R_MICROMIPS_TLS_DTPREL_HI16 fixup_MICROMIPS_TLS_DTPREL_HI16, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 66428bdfa747..aad777dd832a 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -81,8 +81,8 @@ public: // getBranchJumpOpValue - Return binary encoding of the jump // target operand. If the machine operand requires relocation, // record the relocation and return zero. - unsigned getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups) const; + unsigned getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const; // getBranchJumpOpValueMM - Return binary encoding of the microMIPS jump // target operand. If the machine operand requires relocation, @@ -107,6 +107,9 @@ public: unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO, SmallVectorImpl &Fixups) const; + unsigned getMSAMemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const; + unsigned getMemEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups) const; unsigned getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo, @@ -363,96 +366,98 @@ getExprOpValue(const MCExpr *Expr,SmallVectorImpl &Fixups) const { return Res; } if (Kind == MCExpr::SymbolRef) { - Mips::Fixups FixupKind = Mips::Fixups(0); + Mips::Fixups FixupKind = Mips::Fixups(0); - switch(cast(Expr)->getKind()) { - default: llvm_unreachable("Unknown fixup kind!"); - break; - case MCSymbolRefExpr::VK_Mips_GPOFF_HI : - FixupKind = Mips::fixup_Mips_GPOFF_HI; - break; - case MCSymbolRefExpr::VK_Mips_GPOFF_LO : - FixupKind = Mips::fixup_Mips_GPOFF_LO; - break; - case MCSymbolRefExpr::VK_Mips_GOT_PAGE : - FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_GOT_PAGE - : Mips::fixup_Mips_GOT_PAGE; - break; - case MCSymbolRefExpr::VK_Mips_GOT_OFST : - FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_GOT_OFST - : Mips::fixup_Mips_GOT_OFST; - break; - case MCSymbolRefExpr::VK_Mips_GOT_DISP : - FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_GOT_DISP - : Mips::fixup_Mips_GOT_DISP; - break; - case MCSymbolRefExpr::VK_Mips_GPREL: - FixupKind = Mips::fixup_Mips_GPREL16; - break; - case MCSymbolRefExpr::VK_Mips_GOT_CALL: - FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_CALL16 - : Mips::fixup_Mips_CALL16; - break; - case MCSymbolRefExpr::VK_Mips_GOT16: - FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_GOT16 - : Mips::fixup_Mips_GOT_Global; - break; - case MCSymbolRefExpr::VK_Mips_GOT: - FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_GOT16 - : Mips::fixup_Mips_GOT_Local; - break; - case MCSymbolRefExpr::VK_Mips_ABS_HI: - FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_HI16 - : Mips::fixup_Mips_HI16; - break; - case MCSymbolRefExpr::VK_Mips_ABS_LO: - FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_LO16 - : Mips::fixup_Mips_LO16; - break; - case MCSymbolRefExpr::VK_Mips_TLSGD: - FixupKind = Mips::fixup_Mips_TLSGD; - break; - case MCSymbolRefExpr::VK_Mips_TLSLDM: - FixupKind = Mips::fixup_Mips_TLSLDM; - break; - case MCSymbolRefExpr::VK_Mips_DTPREL_HI: - FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_TLS_DTPREL_HI16 - : Mips::fixup_Mips_DTPREL_HI; - break; - case MCSymbolRefExpr::VK_Mips_DTPREL_LO: - FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_TLS_DTPREL_LO16 - : Mips::fixup_Mips_DTPREL_LO; - break; - case MCSymbolRefExpr::VK_Mips_GOTTPREL: - FixupKind = Mips::fixup_Mips_GOTTPREL; - break; - case MCSymbolRefExpr::VK_Mips_TPREL_HI: - FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_TLS_TPREL_HI16 - : Mips::fixup_Mips_TPREL_HI; - break; - case MCSymbolRefExpr::VK_Mips_TPREL_LO: - FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_TLS_TPREL_LO16 - : Mips::fixup_Mips_TPREL_LO; - break; - case MCSymbolRefExpr::VK_Mips_HIGHER: - FixupKind = Mips::fixup_Mips_HIGHER; - break; - case MCSymbolRefExpr::VK_Mips_HIGHEST: - FixupKind = Mips::fixup_Mips_HIGHEST; - break; - case MCSymbolRefExpr::VK_Mips_GOT_HI16: - FixupKind = Mips::fixup_Mips_GOT_HI16; - break; - case MCSymbolRefExpr::VK_Mips_GOT_LO16: - FixupKind = Mips::fixup_Mips_GOT_LO16; - break; - case MCSymbolRefExpr::VK_Mips_CALL_HI16: - FixupKind = Mips::fixup_Mips_CALL_HI16; - break; - case MCSymbolRefExpr::VK_Mips_CALL_LO16: - FixupKind = Mips::fixup_Mips_CALL_LO16; - break; - } // switch + switch(cast(Expr)->getKind()) { + default: llvm_unreachable("Unknown fixup kind!"); + break; + case MCSymbolRefExpr::VK_Mips_GPOFF_HI : + FixupKind = Mips::fixup_Mips_GPOFF_HI; + break; + case MCSymbolRefExpr::VK_Mips_GPOFF_LO : + FixupKind = Mips::fixup_Mips_GPOFF_LO; + break; + case MCSymbolRefExpr::VK_Mips_GOT_PAGE : + FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_GOT_PAGE + : Mips::fixup_Mips_GOT_PAGE; + break; + case MCSymbolRefExpr::VK_Mips_GOT_OFST : + FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_GOT_OFST + : Mips::fixup_Mips_GOT_OFST; + break; + case MCSymbolRefExpr::VK_Mips_GOT_DISP : + FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_GOT_DISP + : Mips::fixup_Mips_GOT_DISP; + break; + case MCSymbolRefExpr::VK_Mips_GPREL: + FixupKind = Mips::fixup_Mips_GPREL16; + break; + case MCSymbolRefExpr::VK_Mips_GOT_CALL: + FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_CALL16 + : Mips::fixup_Mips_CALL16; + break; + case MCSymbolRefExpr::VK_Mips_GOT16: + FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_GOT16 + : Mips::fixup_Mips_GOT_Global; + break; + case MCSymbolRefExpr::VK_Mips_GOT: + FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_GOT16 + : Mips::fixup_Mips_GOT_Local; + break; + case MCSymbolRefExpr::VK_Mips_ABS_HI: + FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_HI16 + : Mips::fixup_Mips_HI16; + break; + case MCSymbolRefExpr::VK_Mips_ABS_LO: + FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_LO16 + : Mips::fixup_Mips_LO16; + break; + case MCSymbolRefExpr::VK_Mips_TLSGD: + FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_TLS_GD + : Mips::fixup_Mips_TLSGD; + break; + case MCSymbolRefExpr::VK_Mips_TLSLDM: + FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_TLS_LDM + : Mips::fixup_Mips_TLSLDM; + break; + case MCSymbolRefExpr::VK_Mips_DTPREL_HI: + FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_TLS_DTPREL_HI16 + : Mips::fixup_Mips_DTPREL_HI; + break; + case MCSymbolRefExpr::VK_Mips_DTPREL_LO: + FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_TLS_DTPREL_LO16 + : Mips::fixup_Mips_DTPREL_LO; + break; + case MCSymbolRefExpr::VK_Mips_GOTTPREL: + FixupKind = Mips::fixup_Mips_GOTTPREL; + break; + case MCSymbolRefExpr::VK_Mips_TPREL_HI: + FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_TLS_TPREL_HI16 + : Mips::fixup_Mips_TPREL_HI; + break; + case MCSymbolRefExpr::VK_Mips_TPREL_LO: + FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_TLS_TPREL_LO16 + : Mips::fixup_Mips_TPREL_LO; + break; + case MCSymbolRefExpr::VK_Mips_HIGHER: + FixupKind = Mips::fixup_Mips_HIGHER; + break; + case MCSymbolRefExpr::VK_Mips_HIGHEST: + FixupKind = Mips::fixup_Mips_HIGHEST; + break; + case MCSymbolRefExpr::VK_Mips_GOT_HI16: + FixupKind = Mips::fixup_Mips_GOT_HI16; + break; + case MCSymbolRefExpr::VK_Mips_GOT_LO16: + FixupKind = Mips::fixup_Mips_GOT_LO16; + break; + case MCSymbolRefExpr::VK_Mips_CALL_HI16: + FixupKind = Mips::fixup_Mips_CALL_HI16; + break; + case MCSymbolRefExpr::VK_Mips_CALL_LO16: + FixupKind = Mips::fixup_Mips_CALL_LO16; + break; + } // switch Fixups.push_back(MCFixup::Create(0, Expr, MCFixupKind(FixupKind))); return 0; @@ -480,6 +485,49 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, return getExprOpValue(MO.getExpr(),Fixups); } +/// getMSAMemEncoding - Return binary encoding of memory operand for LD/ST +/// instructions. +unsigned +MipsMCCodeEmitter::getMSAMemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const { + // Base register is encoded in bits 20-16, offset is encoded in bits 15-0. + assert(MI.getOperand(OpNo).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups) << 16; + unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups); + + // The immediate field of an LD/ST instruction is scaled which means it must + // be divided (when encoding) by the size (in bytes) of the instructions' + // data format. + // .b - 1 byte + // .h - 2 bytes + // .w - 4 bytes + // .d - 8 bytes + switch(MI.getOpcode()) + { + default: + assert (0 && "Unexpected instruction"); + break; + case Mips::LD_B: + case Mips::ST_B: + // We don't need to scale the offset in this case + break; + case Mips::LD_H: + case Mips::ST_H: + OffBits >>= 1; + break; + case Mips::LD_W: + case Mips::ST_W: + OffBits >>= 2; + break; + case Mips::LD_D: + case Mips::ST_D: + OffBits >>= 3; + break; + } + + return (OffBits & 0xFFFF) | RegBits; +} + /// getMemEncoding - Return binary encoding of memory related operand. /// If the offset operand requires relocation, record the relocation. unsigned diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MicroMipsInstrFPU.td b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MicroMipsInstrFPU.td new file mode 100644 index 000000000000..f8dc5042e0db --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MicroMipsInstrFPU.td @@ -0,0 +1,148 @@ +let isCodeGenOnly = 1, Predicates = [InMicroMips] in { +def FADD_S_MM : MMRel, ADDS_FT<"add.s", FGR32Opnd, IIFadd, 1, fadd>, + ADDS_FM_MM<0, 0x30>; +def FDIV_S_MM : MMRel, ADDS_FT<"div.s", FGR32Opnd, IIFdivSingle, 0, fdiv>, + ADDS_FM_MM<0, 0xf0>; +def FMUL_S_MM : MMRel, ADDS_FT<"mul.s", FGR32Opnd, IIFmulSingle, 1, fmul>, + ADDS_FM_MM<0, 0xb0>; +def FSUB_S_MM : MMRel, ADDS_FT<"sub.s", FGR32Opnd, IIFadd, 0, fsub>, + ADDS_FM_MM<0, 0x70>; + +def FADD_MM : MMRel, ADDS_FT<"add.d", AFGR64Opnd, IIFadd, 1, fadd>, + ADDS_FM_MM<1, 0x30>; +def FDIV_MM : MMRel, ADDS_FT<"div.d", AFGR64Opnd, IIFdivDouble, 0, fdiv>, + ADDS_FM_MM<1, 0xf0>; +def FMUL_MM : MMRel, ADDS_FT<"mul.d", AFGR64Opnd, IIFmulDouble, 1, fmul>, + ADDS_FM_MM<1, 0xb0>; +def FSUB_MM : MMRel, ADDS_FT<"sub.d", AFGR64Opnd, IIFadd, 0, fsub>, + ADDS_FM_MM<1, 0x70>; + +def LWC1_MM : MMRel, LW_FT<"lwc1", FGR32Opnd, IIFLoad, load>, LW_FM_MM<0x27>; +def SWC1_MM : MMRel, SW_FT<"swc1", FGR32Opnd, IIFStore, store>, + LW_FM_MM<0x26>; +def LDC1_MM : MMRel, LW_FT<"ldc1", AFGR64Opnd, IIFLoad, load>, LW_FM_MM<0x2f>; +def SDC1_MM : MMRel, SW_FT<"sdc1", AFGR64Opnd, IIFStore, store>, + LW_FM_MM<0x2e>; +def LWXC1_MM : MMRel, LWXC1_FT<"lwxc1", FGR32Opnd, IIFLoad, load>, + LWXC1_FM_MM<0x48>; +def SWXC1_MM : MMRel, SWXC1_FT<"swxc1", FGR32Opnd, IIFStore, store>, + SWXC1_FM_MM<0x88>; +def LUXC1_MM : MMRel, LWXC1_FT<"luxc1", AFGR64Opnd, IIFLoad>, + LWXC1_FM_MM<0x148>; +def SUXC1_MM : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, IIFStore>, + SWXC1_FM_MM<0x188>; + +def FCMP_S32_MM : MMRel, CEQS_FT<"s", FGR32, IIFcmp, MipsFPCmp>, + CEQS_FM_MM<0>; +def FCMP_D32_MM : MMRel, CEQS_FT<"d", AFGR64, IIFcmp, MipsFPCmp>, + CEQS_FM_MM<1>; + +def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, IIBranch, MIPS_BRANCH_F>, + BC1F_FM_MM<0x1c>; +def BC1T_MM : MMRel, BC1F_FT<"bc1t", brtarget_mm, IIBranch, MIPS_BRANCH_T>, + BC1F_FM_MM<0x1d>; + +def CEIL_W_S_MM : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>, + ROUND_W_FM_MM<0, 0x6c>; +def CVT_W_S_MM : MMRel, ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>, + ROUND_W_FM_MM<0, 0x24>; +def FLOOR_W_S_MM : MMRel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>, + ROUND_W_FM_MM<0, 0x2c>; +def ROUND_W_S_MM : MMRel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>, + ROUND_W_FM_MM<0, 0xec>; +def TRUNC_W_S_MM : MMRel, ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>, + ROUND_W_FM_MM<0, 0xac>; +def FSQRT_S_MM : MMRel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd,IIFsqrtSingle, + fsqrt>, ROUND_W_FM_MM<0, 0x28>; + +def CEIL_W_MM : MMRel, ABSS_FT<"ceil.w.d", FGR32Opnd, AFGR64Opnd, IIFcvt>, + ROUND_W_FM_MM<1, 0x6c>; +def CVT_W_MM : MMRel, ABSS_FT<"cvt.w.d", FGR32Opnd, AFGR64Opnd, IIFcvt>, + ROUND_W_FM_MM<1, 0x24>; +def FLOOR_W_MM : MMRel, ABSS_FT<"floor.w.d", FGR32Opnd, AFGR64Opnd, IIFcvt>, + ROUND_W_FM_MM<1, 0x2c>; +def ROUND_W_MM : MMRel, ABSS_FT<"round.w.d", FGR32Opnd, AFGR64Opnd, IIFcvt>, + ROUND_W_FM_MM<1, 0xec>; +def TRUNC_W_MM : MMRel, ABSS_FT<"trunc.w.d", FGR32Opnd, AFGR64Opnd, IIFcvt>, + ROUND_W_FM_MM<1, 0xac>; + +def FSQRT_MM : MMRel, ABSS_FT<"sqrt.d", AFGR64Opnd, AFGR64Opnd, + IIFsqrtDouble, fsqrt>, ROUND_W_FM_MM<1, 0x28>; + +def CVT_L_S_MM : MMRel, ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, IIFcvt>, + ROUND_W_FM_MM<0, 0x4>; +def CVT_L_D64_MM : MMRel, ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, IIFcvt>, + ROUND_W_FM_MM<1, 0x4>; + +def FABS_S_MM : MMRel, ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, IIFcvt, fabs>, + ABS_FM_MM<0, 0xd>; +def FMOV_S_MM : MMRel, ABSS_FT<"mov.s", FGR32Opnd, FGR32Opnd, IIFmove>, + ABS_FM_MM<0, 0x1>; +def FNEG_S_MM : MMRel, ABSS_FT<"neg.s", FGR32Opnd, FGR32Opnd, IIFcvt, fneg>, + ABS_FM_MM<0, 0x2d>; +def CVT_D_S_MM : MMRel, ABSS_FT<"cvt.d.s", AFGR64Opnd, FGR32Opnd, IIFcvt>, + ABS_FM_MM<0, 0x4d>; +def CVT_D32_W_MM : MMRel, ABSS_FT<"cvt.d.w", AFGR64Opnd, FGR32Opnd, IIFcvt>, + ABS_FM_MM<1, 0x4d>; +def CVT_S_D32_MM : MMRel, ABSS_FT<"cvt.s.d", FGR32Opnd, AFGR64Opnd, IIFcvt>, + ABS_FM_MM<0, 0x6d>; +def CVT_S_W_MM : MMRel, ABSS_FT<"cvt.s.w", FGR32Opnd, FGR32Opnd, IIFcvt>, + ABS_FM_MM<1, 0x6d>; + +def FABS_MM : MMRel, ABSS_FT<"abs.d", AFGR64Opnd, AFGR64Opnd, IIFcvt, fabs>, + ABS_FM_MM<1, 0xd>; +def FNEG_MM : MMRel, ABSS_FT<"neg.d", AFGR64Opnd, AFGR64Opnd, IIFcvt, fneg>, + ABS_FM_MM<1, 0x2d>; + +def FMOV_D32_MM : MMRel, ABSS_FT<"mov.d", AFGR64Opnd, AFGR64Opnd, IIFmove>, + ABS_FM_MM<1, 0x1>, Requires<[NotFP64bit, HasStdEnc]>; + +def MOVZ_I_S_MM : MMRel, CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32Opnd, IIFmove>, + CMov_I_F_FM_MM<0x78, 0>; +def MOVN_I_S_MM : MMRel, CMov_I_F_FT<"movn.s", GPR32Opnd, FGR32Opnd, IIFmove>, + CMov_I_F_FM_MM<0x38, 0>; +def MOVZ_I_D32_MM : MMRel, CMov_I_F_FT<"movz.d", GPR32Opnd, AFGR64Opnd, + IIFmove>, CMov_I_F_FM_MM<0x78, 1>; +def MOVN_I_D32_MM : MMRel, CMov_I_F_FT<"movn.d", GPR32Opnd, AFGR64Opnd, + IIFmove>, CMov_I_F_FM_MM<0x38, 1>; + +def MOVT_S_MM : MMRel, CMov_F_F_FT<"movt.s", FGR32Opnd, IIFmove, MipsCMovFP_T>, + CMov_F_F_FM_MM<0x60, 0>; +def MOVF_S_MM : MMRel, CMov_F_F_FT<"movf.s", FGR32Opnd, IIFmove, MipsCMovFP_F>, + CMov_F_F_FM_MM<0x20, 0>; +def MOVT_D32_MM : MMRel, CMov_F_F_FT<"movt.d", AFGR64Opnd, + IIFmove, MipsCMovFP_T>, CMov_F_F_FM_MM<0x60, 1>; +def MOVF_D32_MM : MMRel, CMov_F_F_FT<"movf.d", AFGR64Opnd, + IIFmove, MipsCMovFP_F>, CMov_F_F_FM_MM<0x20, 1>; + +def CFC1_MM : MMRel, MFC1_FT<"cfc1", GPR32Opnd, CCROpnd, IIFmove>, + MFC1_FM_MM<0x40>; +def CTC1_MM : MMRel, MTC1_FT<"ctc1", CCROpnd, GPR32Opnd, IIFmove>, + MFC1_FM_MM<0x60>; +def MFC1_MM : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, + IIFmoveC1, bitconvert>, MFC1_FM_MM<0x80>; +def MTC1_MM : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, + IIFmoveC1, bitconvert>, MFC1_FM_MM<0xa0>; +def MFHC1_MM : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, FGRH32Opnd, IIFmoveC1>, + MFC1_FM_MM<3>; +def MTHC1_MM : MMRel, MTC1_FT<"mthc1", FGRH32Opnd, GPR32Opnd, IIFmoveC1>, + MFC1_FM_MM<7>; + +def MADD_S_MM : MMRel, MADDS_FT<"madd.s", FGR32Opnd, IIFmulSingle, fadd>, + MADDS_FM_MM<0x1>; +def MSUB_S_MM : MMRel, MADDS_FT<"msub.s", FGR32Opnd, IIFmulSingle, fsub>, + MADDS_FM_MM<0x21>; +def NMADD_S_MM : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, IIFmulSingle, fadd>, + MADDS_FM_MM<0x2>; +def NMSUB_S_MM : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, IIFmulSingle, fsub>, + MADDS_FM_MM<0x22>; + +def MADD_D32_MM : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, IIFmulDouble, fadd>, + MADDS_FM_MM<0x9>; +def MSUB_D32_MM : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, IIFmulDouble, fsub>, + MADDS_FM_MM<0x29>; +def NMADD_D32_MM : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, IIFmulDouble, + fadd>, MADDS_FM_MM<0xa>; +def NMSUB_D32_MM : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, IIFmulDouble, + fsub>, MADDS_FM_MM<0x2a>; +} diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MicroMipsInstrFormats.td b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MicroMipsInstrFormats.td index c12a32e3d803..21e5707e91e7 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MicroMipsInstrFormats.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MicroMipsInstrFormats.td @@ -276,6 +276,66 @@ class BGEZAL_FM_MM funct> : MMArch { let Inst{15-0} = offset; } +class SYNC_FM_MM : MMArch { + bits<5> stype; + + bits<32> Inst; + + let Inst{31-26} = 0x00; + let Inst{25-21} = 0x0; + let Inst{20-16} = stype; + let Inst{15-6} = 0x1ad; + let Inst{5-0} = 0x3c; +} + +class BRK_FM_MM : MMArch { + bits<10> code_1; + bits<10> code_2; + bits<32> Inst; + let Inst{31-26} = 0x0; + let Inst{25-16} = code_1; + let Inst{15-6} = code_2; + let Inst{5-0} = 0x07; +} + +class SYS_FM_MM : MMArch { + bits<10> code_; + bits<32> Inst; + let Inst{31-26} = 0x0; + let Inst{25-16} = code_; + let Inst{15-6} = 0x22b; + let Inst{5-0} = 0x3c; +} + +class WAIT_FM_MM : MMArch { + bits<32> Inst; + + let Inst{31-26} = 0x00; + let Inst{25-16} = 0x00; + let Inst{15-6} = 0x24d; + let Inst{5-0} = 0x3c; +} + +class ER_FM_MM funct> : MMArch { + bits<32> Inst; + + let Inst{31-26} = 0x00; + let Inst{25-16} = 0x00; + let Inst{15-6} = funct; + let Inst{5-0} = 0x3c; +} + +class EI_FM_MM funct> : MMArch { + bits<32> Inst; + bits<5> rt; + + let Inst{31-26} = 0x00; + let Inst{25-21} = 0x00; + let Inst{20-16} = rt; + let Inst{15-6} = funct; + let Inst{5-0} = 0x3c; +} + class TEQ_FM_MM funct> : MMArch { bits<5> rs; bits<5> rt; @@ -302,3 +362,183 @@ class TEQI_FM_MM funct> : MMArch { let Inst{20-16} = rs; let Inst{15-0} = imm16; } + +class LL_FM_MM funct> { + bits<5> rt; + bits<21> addr; + + bits<32> Inst; + + let Inst{31-26} = 0x18; + let Inst{25-21} = rt; + let Inst{20-16} = addr{20-16}; + let Inst{15-12} = funct; + let Inst{11-0} = addr{11-0}; +} + +class ADDS_FM_MM fmt, bits<8> funct> : MMArch { + bits<5> ft; + bits<5> fs; + bits<5> fd; + + bits<32> Inst; + + let Inst{31-26} = 0x15; + let Inst{25-21} = ft; + let Inst{20-16} = fs; + let Inst{15-11} = fd; + let Inst{10} = 0; + let Inst{9-8} = fmt; + let Inst{7-0} = funct; + + list Pattern = []; +} + +class LWXC1_FM_MM funct> : MMArch { + bits<5> fd; + bits<5> base; + bits<5> index; + + bits<32> Inst; + + let Inst{31-26} = 0x15; + let Inst{25-21} = index; + let Inst{20-16} = base; + let Inst{15-11} = fd; + let Inst{10-9} = 0x0; + let Inst{8-0} = funct; +} + +class SWXC1_FM_MM funct> : MMArch { + bits<5> fs; + bits<5> base; + bits<5> index; + + bits<32> Inst; + + let Inst{31-26} = 0x15; + let Inst{25-21} = index; + let Inst{20-16} = base; + let Inst{15-11} = fs; + let Inst{10-9} = 0x0; + let Inst{8-0} = funct; +} + +class CEQS_FM_MM fmt> : MMArch { + bits<5> fs; + bits<5> ft; + bits<4> cond; + + bits<32> Inst; + + let Inst{31-26} = 0x15; + let Inst{25-21} = ft; + let Inst{20-16} = fs; + let Inst{15-13} = 0x0; // cc + let Inst{12} = 0; + let Inst{11-10} = fmt; + let Inst{9-6} = cond; + let Inst{5-0} = 0x3c; +} + +class BC1F_FM_MM tf> : MMArch { + bits<16> offset; + + bits<32> Inst; + + let Inst{31-26} = 0x10; + let Inst{25-21} = tf; + let Inst{20-18} = 0x0; // cc + let Inst{17-16} = 0x0; + let Inst{15-0} = offset; +} + +class ROUND_W_FM_MM fmt, bits<8> funct> : MMArch { + bits<5> fd; + bits<5> fs; + + bits<32> Inst; + + let Inst{31-26} = 0x15; + let Inst{25-21} = fd; + let Inst{20-16} = fs; + let Inst{15} = 0; + let Inst{14} = fmt; + let Inst{13-6} = funct; + let Inst{5-0} = 0x3b; +} + +class ABS_FM_MM fmt, bits<7> funct> : MMArch { + bits<5> fd; + bits<5> fs; + + bits<32> Inst; + + let Inst{31-26} = 0x15; + let Inst{25-21} = fd; + let Inst{20-16} = fs; + let Inst{15} = 0; + let Inst{14-13} = fmt; + let Inst{12-6} = funct; + let Inst{5-0} = 0x3b; +} + +class CMov_F_F_FM_MM func, bits<2> fmt> : MMArch { + bits<5> fd; + bits<5> fs; + + bits<32> Inst; + + let Inst{31-26} = 0x15; + let Inst{25-21} = fd; + let Inst{20-16} = fs; + let Inst{15-13} = 0x0; //cc + let Inst{12-11} = 0x0; + let Inst{10-9} = fmt; + let Inst{8-0} = func; +} + +class CMov_I_F_FM_MM funct, bits<2> fmt> : MMArch { + bits<5> fd; + bits<5> fs; + bits<5> rt; + + bits<32> Inst; + + let Inst{31-26} = 0x15; + let Inst{25-21} = rt; + let Inst{20-16} = fs; + let Inst{15-11} = fd; + let Inst{9-8} = fmt; + let Inst{7-0} = funct; +} + +class MFC1_FM_MM funct> : MMArch { + bits<5> rt; + bits<5> fs; + + bits<32> Inst; + + let Inst{31-26} = 0x15; + let Inst{25-21} = rt; + let Inst{20-16} = fs; + let Inst{15-14} = 0x0; + let Inst{13-6} = funct; + let Inst{5-0} = 0x3b; +} + +class MADDS_FM_MM funct>: MMArch { + bits<5> ft; + bits<5> fs; + bits<5> fd; + bits<5> fr; + + bits<32> Inst; + + let Inst{31-26} = 0x15; + let Inst{25-21} = ft; + let Inst{20-16} = fs; + let Inst{15-11} = fd; + let Inst{10-6} = fr; + let Inst{5-0} = funct; +} diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MicroMipsInstrInfo.td index d9507fa88ebc..84cba5fb20e7 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MicroMipsInstrInfo.td @@ -45,6 +45,21 @@ class StoreLeftRightMM : + InstSE<(outs RO:$rt), (ins mem_mm_12:$addr), + !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> { + let DecoderMethod = "DecodeMem"; + let mayLoad = 1; +} + +class SCBaseMM : + InstSE<(outs RO:$dst), (ins RO:$rt, mem_mm_12:$addr), + !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> { + let DecoderMethod = "DecodeMem"; + let mayStore = 1; + let Constraints = "$rt = $dst"; +} + let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { /// Arithmetic Instructions (ALU Immediate) def ADDiu_MM : MMRel, ArithLogicI<"addiu", simm16, GPR32Opnd>, @@ -63,6 +78,9 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { ADDI_FM_MM<0x1c>; def LUi_MM : MMRel, LoadUpper<"lui", GPR32Opnd, uimm16>, LUI_FM_MM; + def LEA_ADDiu_MM : MMRel, EffectiveAddress<"addiu", GPR32Opnd>, + LW_FM_MM<0xc>; + /// Arithmetic Instructions (3-Operand, R-Type) def ADDu_MM : MMRel, ArithLogicR<"addu", GPR32Opnd>, ADD_FM_MM<0, 0x150>; def SUBu_MM : MMRel, ArithLogicR<"subu", GPR32Opnd>, ADD_FM_MM<0, 0x1d0>; @@ -175,13 +193,9 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { def J_MM : MMRel, JumpFJ, J_FM_MM<0x35>; def JAL_MM : MMRel, JumpLink<"jal", calltarget_mm>, J_FM_MM<0x3d>; - def TAILCALL_MM : MMRel, JumpFJ, J_FM_MM<0x3d>, IsTailCall; } def JR_MM : MMRel, IndirectBranch<"jr", GPR32Opnd>, JR_FM_MM<0x3c>; def JALR_MM : MMRel, JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM_MM<0x03c>; - def TAILCALL_R_MM : MMRel, JumpFR<"tcallr", GPR32Opnd, MipsTailCall>, - JR_FM_MM<0x3c>, IsTailCall; def RET_MM : MMRel, RetBase<"ret", GPR32Opnd>, JR_FM_MM<0x3c>; /// Branch Instructions @@ -202,6 +216,16 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { def BLTZAL_MM : MMRel, BGEZAL_FT<"bltzal", brtarget_mm, GPR32Opnd>, BGEZAL_FM_MM<0x01>; + /// Control Instructions + def SYNC_MM : MMRel, SYNC_FT<"sync">, SYNC_FM_MM; + def BREAK_MM : MMRel, BRK_FT<"break">, BRK_FM_MM; + def SYSCALL_MM : MMRel, SYS_FT<"syscall">, SYS_FM_MM; + def WAIT_MM : MMRel, WAIT_FT<"wait">, WAIT_FM_MM; + def ERET_MM : MMRel, ER_FT<"eret">, ER_FM_MM<0x3cd>; + def DERET_MM : MMRel, ER_FT<"deret">, ER_FM_MM<0x38d>; + def EI_MM : MMRel, DEI_FT<"ei", GPR32Opnd>, EI_FM_MM<0x15d>; + def DI_MM : MMRel, DEI_FT<"di", GPR32Opnd>, EI_FM_MM<0x11d>; + /// Trap Instructions def TEQ_MM : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM_MM<0x0>; def TGE_MM : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM_MM<0x08>; @@ -216,4 +240,8 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { def TLTI_MM : MMRel, TEQI_FT<"tlti", GPR32Opnd>, TEQI_FM_MM<0x08>; def TLTIU_MM : MMRel, TEQI_FT<"tltiu", GPR32Opnd>, TEQI_FM_MM<0x0a>; def TNEI_MM : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM_MM<0x0c>; + + /// Load-linked, Store-conditional + def LL_MM : LLBaseMM<"ll", GPR32Opnd>, LL_FM_MM<0x3>; + def SC_MM : SCBaseMM<"sc", GPR32Opnd>, LL_FM_MM<0xb>; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips.h b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips.h index e796debd79b6..d512d6589c40 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips.h @@ -23,6 +23,7 @@ namespace llvm { class FunctionPass; FunctionPass *createMipsISelDag(MipsTargetMachine &TM); + FunctionPass *createMipsOptimizePICCallPass(MipsTargetMachine &TM); FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM); FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM); FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips.td b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips.td index b8e3f39256da..c7ebdac1c05b 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips.td @@ -96,11 +96,6 @@ def : Proc<"mips64", [FeatureMips64]>; def : Proc<"mips64r2", [FeatureMips64r2]>; def : Proc<"mips16", [FeatureMips16]>; -def MipsAsmWriter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - bit isMCAsmWriter = 1; -} - def MipsAsmParser : AsmParser { let ShouldEmitMatchRegisterName = 0; let MnemonicContainsDot = 1; @@ -116,6 +111,5 @@ def MipsAsmParserVariant : AsmParserVariant { def Mips : Target { let InstructionSet = MipsInstrInfo; let AssemblyParsers = [MipsAsmParser]; - let AssemblyWriters = [MipsAsmWriter]; let AssemblyParserVariants = [MipsAsmParserVariant]; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16FrameLowering.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16FrameLowering.cpp index 6655ff98e033..fa629476e9f4 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16FrameLowering.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16FrameLowering.cpp @@ -15,6 +15,7 @@ #include "MCTargetDesc/MipsBaseInfo.h" #include "Mips16InstrInfo.h" #include "MipsInstrInfo.h" +#include "MipsRegisterInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -53,21 +54,24 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const { MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(AdjustSPLabel, -StackSize)); - MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, - TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel); - unsigned S2 = MRI->getDwarfRegNum(Mips::S2, true); - MMI.addFrameInst(MCCFIInstruction::createOffset(CSLabel, S2, -8)); + const std::vector &CSI = MFI->getCalleeSavedInfo(); - unsigned S1 = MRI->getDwarfRegNum(Mips::S1, true); - MMI.addFrameInst(MCCFIInstruction::createOffset(CSLabel, S1, -12)); + if (CSI.size()) { + MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, + TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel); - unsigned S0 = MRI->getDwarfRegNum(Mips::S0, true); - MMI.addFrameInst(MCCFIInstruction::createOffset(CSLabel, S0, -16)); - unsigned RA = MRI->getDwarfRegNum(Mips::RA, true); - MMI.addFrameInst(MCCFIInstruction::createOffset(CSLabel, RA, -4)); + const std::vector &CSI = MFI->getCalleeSavedInfo(); + for (std::vector::const_iterator I = CSI.begin(), + E = CSI.end(); I != E; ++I) { + int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); + unsigned Reg = I->getReg(); + unsigned DReg = MRI->getDwarfRegNum(Reg, true); + MMI.addFrameInst(MCCFIInstruction::createOffset(CSLabel, DReg, Offset)); + } + } if (hasFP(MF)) BuildMI(MBB, MBBI, dl, TII.get(Mips::MoveR3216), Mips::S0) .addReg(Mips::SP); @@ -168,10 +172,15 @@ Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { void Mips16FrameLowering:: processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { - MF.getRegInfo().setPhysRegUsed(Mips::RA); - MF.getRegInfo().setPhysRegUsed(Mips::S0); - MF.getRegInfo().setPhysRegUsed(Mips::S1); - MF.getRegInfo().setPhysRegUsed(Mips::S2); + const Mips16InstrInfo &TII = + *static_cast(MF.getTarget().getInstrInfo()); + const MipsRegisterInfo &RI = TII.getRegisterInfo(); + const BitVector Reserved = RI.getReservedRegs(MF); + bool SaveS2 = Reserved[Mips::S2]; + if (SaveS2) + MF.getRegInfo().setPhysRegUsed(Mips::S2); + if (hasFP(MF)) + MF.getRegInfo().setPhysRegUsed(Mips::S0); } const MipsFrameLowering * diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16HardFloat.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16HardFloat.cpp index 81bf18cd09d9..cb0ea869887c 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16HardFloat.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16HardFloat.cpp @@ -13,6 +13,7 @@ #define DEBUG_TYPE "mips16-hard-float" #include "Mips16HardFloat.h" +#include "llvm/IR/Value.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -167,6 +168,11 @@ static bool needsFPReturnHelper(Function &F) { return whichFPReturnVariant(RetType) != NoFPRet; } +static bool needsFPReturnHelper(const FunctionType &FT) { + Type* RetType = FT.getReturnType(); + return whichFPReturnVariant(RetType) != NoFPRet; +} + static bool needsFPHelperFromSig(Function &F) { return needsFPStubFromParams(F) || needsFPReturnHelper(F); } @@ -400,13 +406,31 @@ static bool fixupFPReturnAndCall Value *F = (M->getOrInsertFunction(Name, A, MyVoid, T, NULL)); CallInst::Create(F, Params, "", &Inst ); } else if (const CallInst *CI = dyn_cast(I)) { + const Value* V = CI->getCalledValue(); + const Type* T = 0; + if (V) T = V->getType(); + const PointerType *PFT=0; + if (T) PFT = dyn_cast(T); + const FunctionType *FT=0; + if (PFT) FT = dyn_cast(PFT->getElementType()); + Function *F_ = CI->getCalledFunction(); + if (FT && needsFPReturnHelper(*FT) && + !(F_ && isIntrinsicInline(F_))) { + Modified=true; + F.addFnAttr("saveS2"); + } + if (F_ && !isIntrinsicInline(F_)) { // pic mode calls are handled by already defined // helper functions - if (Subtarget.getRelocationModel() != Reloc::PIC_ ) { - Function *F_ = CI->getCalledFunction(); - if (F_ && !isIntrinsicInline(F_) && needsFPHelperFromSig(*F_)) { - assureFPCallStub(*F_, M, Subtarget); + if (needsFPReturnHelper(*F_)) { Modified=true; + F.addFnAttr("saveS2"); + } + if (Subtarget.getRelocationModel() != Reloc::PIC_ ) { + if (needsFPHelperFromSig(*F_)) { + assureFPCallStub(*F_, M, Subtarget); + Modified=true; + } } } } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16ISelLowering.cpp index 61d8bb8e5582..c4fcdf8dba37 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16ISelLowering.cpp @@ -429,8 +429,7 @@ getOpndList(SmallVectorImpl &Ops, const char* Mips16HelperFunction = 0; bool NeedMips16Helper = false; - if (getTargetMachine().Options.UseSoftFloat && - Subtarget->inMips16HardFloat()) { + if (Subtarget->inMips16HardFloat()) { // // currently we don't have symbols tagged with the mips16 or mips32 // qualifier so we will assume that we don't know what kind it is. diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16InstrInfo.cpp index 000ea2897f43..1b6b89985189 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16InstrInfo.cpp @@ -1,3 +1,4 @@ + //===-- Mips16InstrInfo.cpp - Mips16 Instruction Information --------------===// // // The LLVM Compiler Infrastructure @@ -28,13 +29,6 @@ using namespace llvm; -static cl::opt NeverUseSaveRestore( - "mips16-never-use-save-restore", - cl::init(false), - cl::desc("For testing ability to adjust stack pointer " - "without save/restore instruction"), - cl::Hidden); - Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm) : MipsInstrInfo(tm, Mips::Bimm16), @@ -175,45 +169,56 @@ unsigned Mips16InstrInfo::getOppositeBranchOpc(unsigned Opc) const { return 0; } +static void addSaveRestoreRegs(MachineInstrBuilder &MIB, + const std::vector &CSI, unsigned Flags=0) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + // Add the callee-saved register as live-in. Do not add if the register is + // RA and return address is taken, because it has already been added in + // method MipsTargetLowering::LowerRETURNADDR. + // It's killed at the spill, unless the register is RA and return address + // is taken. + unsigned Reg = CSI[e-i-1].getReg(); + switch (Reg) { + case Mips::RA: + case Mips::S0: + case Mips::S1: + MIB.addReg(Reg, Flags); + break; + case Mips::S2: + break; + default: + llvm_unreachable("unexpected mips16 callee saved register"); + + } + } +} // Adjust SP by FrameSize bytes. Save RA, S0, S1 void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); - if (!NeverUseSaveRestore) { - if (isUInt<11>(FrameSize)) - BuildMI(MBB, I, DL, get(Mips::SaveRaF16)).addImm(FrameSize); - else { - int Base = 2040; // should create template function like isUInt that - // returns largest possible n bit unsigned integer - int64_t Remainder = FrameSize - Base; - BuildMI(MBB, I, DL, get(Mips::SaveRaF16)). addImm(Base); - if (isInt<16>(-Remainder)) - BuildAddiuSpImm(MBB, I, -Remainder); - else - adjustStackPtrBig(SP, -Remainder, MBB, I, Mips::V0, Mips::V1); - } - - } + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const BitVector Reserved = RI.getReservedRegs(MF); + bool SaveS2 = Reserved[Mips::S2]; + MachineInstrBuilder MIB; + unsigned Opc = ((FrameSize <= 128) && !SaveS2)? Mips::Save16:Mips::SaveX16; + MIB = BuildMI(MBB, I, DL, get(Opc)); + const std::vector &CSI = MFI->getCalleeSavedInfo(); + addSaveRestoreRegs(MIB, CSI); + if (SaveS2) + MIB.addReg(Mips::S2); + if (isUInt<11>(FrameSize)) + MIB.addImm(FrameSize); else { - // - // sw ra, -4[sp] - // sw s1, -8[sp] - // sw s0, -12[sp] - - MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), - Mips::RA); - MIB1.addReg(Mips::SP); - MIB1.addImm(-4); - MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), - Mips::S1); - MIB2.addReg(Mips::SP); - MIB2.addImm(-8); - MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), - Mips::S0); - MIB3.addReg(Mips::SP); - MIB3.addImm(-12); - adjustStackPtrBig(SP, -FrameSize, MBB, I, Mips::V0, Mips::V1); + int Base = 2040; // should create template function like isUInt that + // returns largest possible n bit unsigned integer + int64_t Remainder = FrameSize - Base; + MIB.addImm(Base); + if (isInt<16>(-Remainder)) + BuildAddiuSpImm(MBB, I, -Remainder); + else + adjustStackPtrBig(SP, -Remainder, MBB, I, Mips::V0, Mips::V1); } } @@ -222,42 +227,31 @@ void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); - if (!NeverUseSaveRestore) { - if (isUInt<11>(FrameSize)) - BuildMI(MBB, I, DL, get(Mips::RestoreRaF16)).addImm(FrameSize); - else { - int Base = 2040; // should create template function like isUInt that - // returns largest possible n bit unsigned integer - int64_t Remainder = FrameSize - Base; - if (isInt<16>(Remainder)) - BuildAddiuSpImm(MBB, I, Remainder); - else - adjustStackPtrBig(SP, Remainder, MBB, I, Mips::A0, Mips::A1); - BuildMI(MBB, I, DL, get(Mips::RestoreRaF16)). addImm(Base); - } - } - else { - adjustStackPtrBig(SP, FrameSize, MBB, I, Mips::A0, Mips::A1); - // lw ra, -4[sp] - // lw s1, -8[sp] - // lw s0, -12[sp] - MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), - Mips::A0); - MIB1.addReg(Mips::SP); - MIB1.addImm(-4); - MachineInstrBuilder MIB0 = BuildMI(MBB, I, DL, get(Mips::Move32R16), - Mips::RA); - MIB0.addReg(Mips::A0); - MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), - Mips::S1); - MIB2.addReg(Mips::SP); - MIB2.addImm(-8); - MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), - Mips::S0); - MIB3.addReg(Mips::SP); - MIB3.addImm(-12); - } + MachineFunction *MF = MBB.getParent(); + MachineFrameInfo *MFI = MF->getFrameInfo(); + const BitVector Reserved = RI.getReservedRegs(*MF); + bool SaveS2 = Reserved[Mips::S2]; + MachineInstrBuilder MIB; + unsigned Opc = ((FrameSize <= 128) && !SaveS2)? + Mips::Restore16:Mips::RestoreX16; + if (!isUInt<11>(FrameSize)) { + unsigned Base = 2040; + int64_t Remainder = FrameSize - Base; + FrameSize = Base; // should create template function like isUInt that + // returns largest possible n bit unsigned integer + + if (isInt<16>(Remainder)) + BuildAddiuSpImm(MBB, I, Remainder); + else + adjustStackPtrBig(SP, Remainder, MBB, I, Mips::A0, Mips::A1); + } + MIB = BuildMI(MBB, I, DL, get(Opc)); + const std::vector &CSI = MFI->getCalleeSavedInfo(); + addSaveRestoreRegs(MIB, CSI, RegState::Define); + if (SaveS2) + MIB.addReg(Mips::S2, RegState::Define); + MIB.addImm(FrameSize); } // Adjust SP by Amount bytes where bytes can be up to 32bit number. @@ -281,7 +275,7 @@ void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount, // // MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::LwConstant32), Reg1); - MIB1.addImm(Amount); + MIB1.addImm(Amount).addImm(-1); MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::MoveR3216), Reg2); MIB2.addReg(Mips::SP, RegState::Kill); MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::AdduRxRyRz16), Reg1); @@ -393,7 +387,7 @@ Mips16InstrInfo::loadImmediate(unsigned FrameReg, } else Available.reset(Reg); - BuildMI(MBB, II, DL, get(Mips::LwConstant32), Reg).addImm(Imm); + BuildMI(MBB, II, DL, get(Mips::LwConstant32), Reg).addImm(Imm).addImm(-1); NewImm = 0; if (FrameReg == Mips::SP) { SpReg = Available.find_first(); @@ -426,22 +420,6 @@ Mips16InstrInfo::loadImmediate(unsigned FrameReg, return Reg; } -/// This function generates the sequence of instructions needed to get the -/// result of adding register REG and immediate IMM. -unsigned -Mips16InstrInfo::basicLoadImmediate( - unsigned FrameReg, - int64_t Imm, MachineBasicBlock &MBB, - MachineBasicBlock::iterator II, DebugLoc DL, - unsigned &NewImm) const { - const TargetRegisterClass *RC = &Mips::CPU16RegsRegClass; - MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); - unsigned Reg = RegInfo.createVirtualRegister(RC); - BuildMI(MBB, II, DL, get(Mips::LwConstant32), Reg).addImm(Imm); - NewImm = 0; - return Reg; -} - unsigned Mips16InstrInfo::getAnalyzableBrOpc(unsigned Opc) const { return (Opc == Mips::BeqzRxImmX16 || Opc == Mips::BimmX16 || Opc == Mips::Bimm16 || diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16InstrInfo.h b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16InstrInfo.h index d9a594b537a2..e93925c88621 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16InstrInfo.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16InstrInfo.h @@ -88,11 +88,6 @@ public: MachineBasicBlock::iterator II, DebugLoc DL, unsigned &NewImm) const; - unsigned basicLoadImmediate(unsigned FrameReg, - int64_t Imm, MachineBasicBlock &MBB, - MachineBasicBlock::iterator II, DebugLoc DL, - unsigned &NewImm) const; - static bool validImmediate(unsigned Opcode, unsigned Reg, int64_t Amount); static bool validSpImm8(int offset) { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16InstrInfo.td b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16InstrInfo.td index 7441c78a0330..840dfde650a1 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16InstrInfo.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips16InstrInfo.td @@ -120,6 +120,15 @@ class FJAL16_ins _X, string asmstr, itin> { let isCodeGenOnly=1; } + +class FJALB16_ins _X, string asmstr, + InstrItinClass itin>: + FJAL16<_X, (outs), (ins simm20:$imm), + !strconcat(asmstr, "\t$imm\t# branch\n\tnop"),[], + itin> { + let isCodeGenOnly=1; +} + // // EXT-I instruction format // @@ -289,7 +298,7 @@ class FI8_MOV32R16_ins: // // This are pseudo formats for multiply -// This first one can be changed to non pseudo now. +// This first one can be changed to non-pseudo now. // // MULT // @@ -734,6 +743,13 @@ def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> { def Jal16 : FJAL16_ins<0b0, "jal", IIAlu> { let hasDelaySlot = 0; // not true, but we add the nop for now let isCall=1; + let Defs = [RA]; +} + +def JalB16 : FJALB16_ins<0b0, "jal", IIAlu>, branch16 { + let hasDelaySlot = 0; // not true, but we add the nop for now + let isBranch=1; + let Defs = [RA]; } // @@ -941,26 +957,18 @@ def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIAlu>, ArithLogic16Defs<1>; // stack // -// fixed form for restoring RA and the frame -// for direct object emitter, encoding needs to be adjusted for the -// frame size -// -let ra=1, s=0,s0=1,s1=1 in -def RestoreRaF16: - FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), - "restore\t$$ra, $$s0, $$s1, $$s2, $frame_size", [], IILoad >, MayLoad { +def Restore16: + FI8_SVRS16<0b1, (outs), (ins variable_ops), + "", [], IILoad >, MayLoad { let isCodeGenOnly = 1; - let Defs = [S0, S1, S2, RA, SP]; + let Defs = [SP]; let Uses = [SP]; } -// Use Restore to increment SP since SP is not a Mip 16 register, this -// is an easy way to do that which does not require a register. -// -let ra=0, s=0,s0=0,s1=0 in -def RestoreIncSpF16: - FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), - "restore\t$frame_size", [], IILoad >, MayLoad { + +def RestoreX16: + FI8_SVRS16<0b1, (outs), (ins variable_ops), + "", [], IILoad >, MayLoad { let isCodeGenOnly = 1; let Defs = [SP]; let Uses = [SP]; @@ -973,23 +981,17 @@ def RestoreIncSpF16: // To set up a stack frame on entry to a subroutine, // saving return address and static registers, and adjusting stack // -let ra=1, s=1,s0=1,s1=1 in -def SaveRaF16: - FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), - "save\t$$ra, $$s0, $$s1, $$s2, $frame_size", [], IIStore >, MayStore { +def Save16: + FI8_SVRS16<0b1, (outs), (ins variable_ops), + "", [], IIStore >, MayStore { let isCodeGenOnly = 1; - let Uses = [RA, SP, S0, S1, S2]; + let Uses = [SP]; let Defs = [SP]; } -// -// Use Save to decrement the SP by a constant since SP is not -// a Mips16 register. -// -let ra=0, s=0,s0=0,s1=0 in -def SaveDecSpF16: - FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), - "save\t$frame_size", [], IIStore >, MayStore { +def SaveX16: + FI8_SVRS16<0b1, (outs), (ins variable_ops), + "", [], IIStore >, MayStore { let isCodeGenOnly = 1; let Uses = [SP]; let Defs = [SP]; @@ -1374,7 +1376,9 @@ def: Mips16Pat< let isCall=1, hasDelaySlot=0 in def JumpLinkReg16: FRR16_JALRC<0, 0, 0, (outs), (ins CPU16Regs:$rs), - "jalrc \t$rs", [(MipsJmpLink CPU16Regs:$rs)], IIBranch>; + "jalrc \t$rs", [(MipsJmpLink CPU16Regs:$rs)], IIBranch> { + let Defs = [RA]; +} // Mips16 pseudos let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1, @@ -1890,7 +1894,7 @@ def GotPrologue16: MipsPseudo16< (outs CPU16Regs:$rh, CPU16Regs:$rl), (ins simm16:$immHi, simm16:$immLo), - ".align 2\n\tli\t$rh, $immHi\n\taddiu\t$rl, $$pc, $immLo\n ",[]> ; + "li\t$rh, $immHi\n\taddiu\t$rl, $$pc, $immLo\n ",[]> ; // An operand for the CONSTPOOL_ENTRY pseudo-instruction. def cpinst_operand : Operand { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips64InstrInfo.td b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips64InstrInfo.td index 15ef654555d6..0fb83839ddcf 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips64InstrInfo.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/Mips64InstrInfo.td @@ -159,8 +159,7 @@ def BLEZ64 : CBranchZero<"blez", brtarget, setle, GPR64Opnd>, BGEZ_FM<6, 0>; def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>; def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM; def JALR64Pseudo : JumpLinkRegPseudo; -def TAILCALL64_R : JumpFR<"tcallr", GPR64Opnd, MipsTailCall>, - MTLO_FM<8>, IsTailCall; +def TAILCALL64_R : TailCallReg; } /// Multiply and Divide Instructions. diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsAsmPrinter.cpp index 45c439826422..284e51c59788 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsAsmPrinter.cpp @@ -495,6 +495,7 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, raw_ostream &O) { + const DataLayout *DL = TM.getDataLayout(); const MachineOperand &MO = MI->getOperand(opNum); bool closeP = false; @@ -542,17 +543,8 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, break; } - case MachineOperand::MO_ExternalSymbol: - O << *GetExternalSymbolSymbol(MO.getSymbolName()); - break; - - case MachineOperand::MO_JumpTableIndex: - O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << MO.getIndex(); - break; - case MachineOperand::MO_ConstantPoolIndex: - O << MAI->getPrivateGlobalPrefix() << "CPI" + O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" << MO.getIndex(); if (MO.getOffset()) O << "+" << MO.getOffset(); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsAsmPrinter.h b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsAsmPrinter.h index 11c6acd208d1..b3060ad93067 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsAsmPrinter.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsAsmPrinter.h @@ -50,7 +50,6 @@ private: /// pool entries so we can properly mark them as data regions. bool InConstantPool; - bool UsingConstantPools; public: @@ -62,8 +61,6 @@ public: : AsmPrinter(TM, Streamer), MCP(0), InConstantPool(false), MCInstLowering(*this) { Subtarget = &TM.getSubtarget(); - UsingConstantPools = - (Subtarget->inMips16Mode() && Subtarget->useConstantIslands()); } virtual const char *getPassName() const { @@ -73,6 +70,8 @@ public: virtual bool runOnMachineFunction(MachineFunction &MF); virtual void EmitConstantPool() LLVM_OVERRIDE { + bool UsingConstantPools = + (Subtarget->inMips16Mode() && Subtarget->useConstantIslands()); if (!UsingConstantPools) AsmPrinter::EmitConstantPool(); // we emit constant pools customly! diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsCallingConv.td b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsCallingConv.td index 66391cb9cb1e..bf7162f224bd 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsCallingConv.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsCallingConv.td @@ -246,4 +246,6 @@ def CSR_N64 : CalleeSavedRegs<(add (sequence "D%u_64", 31, 24), RA_64, FP_64, GP_64, (sequence "S%u_64", 7, 0))>; def CSR_Mips16RetHelper : - CalleeSavedRegs<(add V0, V1, (sequence "A%u", 3, 0), S0, S1)>; + CalleeSavedRegs<(add V0, V1, FP, + (sequence "A%u", 3, 0), (sequence "S%u", 7, 0), + (sequence "D%u", 15, 10))>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsCodeEmitter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsCodeEmitter.cpp index ca4163d4e58c..76f604d66034 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsCodeEmitter.cpp @@ -112,6 +112,7 @@ private: unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemEncodingMMImm12(const MachineInstr &MI, unsigned OpNo) const; + unsigned getMSAMemEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getLSAImmEncoding(const MachineInstr &MI, unsigned OpNo) const; @@ -224,6 +225,12 @@ unsigned MipsCodeEmitter::getMemEncodingMMImm12(const MachineInstr &MI, return 0; } +unsigned MipsCodeEmitter::getMSAMemEncoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + unsigned MipsCodeEmitter::getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const { // size is encoded as size-1. diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsCondMov.td b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsCondMov.td index 2de1430a395f..6b618830e596 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsCondMov.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsCondMov.td @@ -27,7 +27,7 @@ class CMov_I_I_FT : InstSE<(outs DRC:$fd), (ins DRC:$fs, CRC:$rt, DRC:$F), - !strconcat(opstr, "\t$fd, $fs, $rt"), [], Itin, FrmFR> { + !strconcat(opstr, "\t$fd, $fs, $rt"), [], Itin, FrmFR, opstr> { let Constraints = "$F = $fd"; } @@ -47,7 +47,7 @@ class CMov_F_F_FT { + Itin, FrmFR, opstr> { let Constraints = "$F = $fd"; } @@ -127,14 +127,14 @@ let Predicates = [HasStdEnc], isCodeGenOnly = 1 in { ADD_FM<0, 0xb>; } -def MOVZ_I_S : CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32Opnd, IIFmove>, +def MOVZ_I_S : MMRel, CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32Opnd, IIFmove>, CMov_I_F_FM<18, 16>; let isCodeGenOnly = 1 in def MOVZ_I64_S : CMov_I_F_FT<"movz.s", GPR64Opnd, FGR32Opnd, IIFmove>, CMov_I_F_FM<18, 16>, Requires<[HasMips64, HasStdEnc]>; -def MOVN_I_S : CMov_I_F_FT<"movn.s", GPR32Opnd, FGR32Opnd, IIFmove>, +def MOVN_I_S : MMRel, CMov_I_F_FT<"movn.s", GPR32Opnd, FGR32Opnd, IIFmove>, CMov_I_F_FM<19, 16>; let isCodeGenOnly = 1 in @@ -142,10 +142,10 @@ def MOVN_I64_S : CMov_I_F_FT<"movn.s", GPR64Opnd, FGR32Opnd, IIFmove>, CMov_I_F_FM<19, 16>, Requires<[HasMips64, HasStdEnc]>; let Predicates = [NotFP64bit, HasStdEnc] in { - def MOVZ_I_D32 : CMov_I_F_FT<"movz.d", GPR32Opnd, AFGR64Opnd, IIFmove>, - CMov_I_F_FM<18, 17>; - def MOVN_I_D32 : CMov_I_F_FT<"movn.d", GPR32Opnd, AFGR64Opnd, IIFmove>, - CMov_I_F_FM<19, 17>; + def MOVZ_I_D32 : MMRel, CMov_I_F_FT<"movz.d", GPR32Opnd, AFGR64Opnd, + IIFmove>, CMov_I_F_FM<18, 17>; + def MOVN_I_D32 : MMRel, CMov_I_F_FT<"movn.d", GPR32Opnd, AFGR64Opnd, + IIFmove>, CMov_I_F_FM<19, 17>; } let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { @@ -175,16 +175,16 @@ let isCodeGenOnly = 1 in def MOVF_I64 : CMov_F_I_FT<"movf", GPR64Opnd, IIArith, MipsCMovFP_F>, CMov_F_I_FM<0>, Requires<[HasMips64, HasStdEnc]>; -def MOVT_S : CMov_F_F_FT<"movt.s", FGR32Opnd, IIFmove, MipsCMovFP_T>, +def MOVT_S : MMRel, CMov_F_F_FT<"movt.s", FGR32Opnd, IIFmove, MipsCMovFP_T>, CMov_F_F_FM<16, 1>; -def MOVF_S : CMov_F_F_FT<"movf.s", FGR32Opnd, IIFmove, MipsCMovFP_F>, +def MOVF_S : MMRel, CMov_F_F_FT<"movf.s", FGR32Opnd, IIFmove, MipsCMovFP_F>, CMov_F_F_FM<16, 0>; let Predicates = [NotFP64bit, HasStdEnc] in { - def MOVT_D32 : CMov_F_F_FT<"movt.d", AFGR64Opnd, IIFmove, MipsCMovFP_T>, - CMov_F_F_FM<17, 1>; - def MOVF_D32 : CMov_F_F_FT<"movf.d", AFGR64Opnd, IIFmove, MipsCMovFP_F>, - CMov_F_F_FM<17, 0>; + def MOVT_D32 : MMRel, CMov_F_F_FT<"movt.d", AFGR64Opnd, IIFmove, + MipsCMovFP_T>, CMov_F_F_FM<17, 1>; + def MOVF_D32 : MMRel, CMov_F_F_FT<"movf.d", AFGR64Opnd, IIFmove, + MipsCMovFP_F>, CMov_F_F_FM<17, 0>; } let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp index c46bbacf6585..c99640516785 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -17,7 +17,7 @@ // // The constants can be not just numbers but addresses of functions and labels. // This can be particularly helpful in static relocation mode for embedded -// non linux targets. +// non-linux targets. // // @@ -77,6 +77,113 @@ static cl::opt NoLoadRelaxation( cl::desc("Don't relax loads to long loads - for testing purposes"), cl::Hidden); +static unsigned int branchTargetOperand(MachineInstr *MI) { + switch (MI->getOpcode()) { + case Mips::Bimm16: + case Mips::BimmX16: + case Mips::Bteqz16: + case Mips::BteqzX16: + case Mips::Btnez16: + case Mips::BtnezX16: + case Mips::JalB16: + return 0; + case Mips::BeqzRxImm16: + case Mips::BeqzRxImmX16: + case Mips::BnezRxImm16: + case Mips::BnezRxImmX16: + return 1; + } + llvm_unreachable("Unknown branch type"); +} + +static bool isUnconditionalBranch(unsigned int Opcode) { + switch (Opcode) { + default: return false; + case Mips::Bimm16: + case Mips::BimmX16: + case Mips::JalB16: + return true; + } +} + +static unsigned int longformBranchOpcode(unsigned int Opcode) { + switch (Opcode) { + case Mips::Bimm16: + case Mips::BimmX16: + return Mips::BimmX16; + case Mips::Bteqz16: + case Mips::BteqzX16: + return Mips::BteqzX16; + case Mips::Btnez16: + case Mips::BtnezX16: + return Mips::BtnezX16; + case Mips::JalB16: + return Mips::JalB16; + case Mips::BeqzRxImm16: + case Mips::BeqzRxImmX16: + return Mips::BeqzRxImmX16; + case Mips::BnezRxImm16: + case Mips::BnezRxImmX16: + return Mips::BnezRxImmX16; + } + llvm_unreachable("Unknown branch type"); +} + +// +// FIXME: need to go through this whole constant islands port and check the math +// for branch ranges and clean this up and make some functions to calculate things +// that are done many times identically. +// Need to refactor some of the code to call this routine. +// +static unsigned int branchMaxOffsets(unsigned int Opcode) { + unsigned Bits, Scale; + switch (Opcode) { + case Mips::Bimm16: + Bits = 11; + Scale = 2; + break; + case Mips::BimmX16: + Bits = 16; + Scale = 2; + break; + case Mips::BeqzRxImm16: + Bits = 8; + Scale = 2; + break; + case Mips::BeqzRxImmX16: + Bits = 16; + Scale = 2; + break; + case Mips::BnezRxImm16: + Bits = 8; + Scale = 2; + break; + case Mips::BnezRxImmX16: + Bits = 16; + Scale = 2; + break; + case Mips::Bteqz16: + Bits = 8; + Scale = 2; + break; + case Mips::BteqzX16: + Bits = 16; + Scale = 2; + break; + case Mips::Btnez16: + Bits = 8; + Scale = 2; + break; + case Mips::BtnezX16: + Bits = 16; + Scale = 2; + break; + default: + llvm_unreachable("Unknown branch type"); + } + unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale; + return MaxOffs; +} namespace { @@ -603,6 +710,55 @@ initializeFunctionInfo(const std::vector &CPEMIs) { Bits = 16; Scale = 2; isCond = false; + break; + case Mips::BeqzRxImm16: + UOpc=Mips::Bimm16; + Bits = 8; + Scale = 2; + isCond = true; + break; + case Mips::BeqzRxImmX16: + UOpc=Mips::Bimm16; + Bits = 16; + Scale = 2; + isCond = true; + break; + case Mips::BnezRxImm16: + UOpc=Mips::Bimm16; + Bits = 8; + Scale = 2; + isCond = true; + break; + case Mips::BnezRxImmX16: + UOpc=Mips::Bimm16; + Bits = 16; + Scale = 2; + isCond = true; + break; + case Mips::Bteqz16: + UOpc=Mips::Bimm16; + Bits = 8; + Scale = 2; + isCond = true; + break; + case Mips::BteqzX16: + UOpc=Mips::Bimm16; + Bits = 16; + Scale = 2; + isCond = true; + break; + case Mips::Btnez16: + UOpc=Mips::Bimm16; + Bits = 8; + Scale = 2; + isCond = true; + break; + case Mips::BtnezX16: + UOpc=Mips::Bimm16; + Bits = 16; + Scale = 2; + isCond = true; + break; } // Record this immediate branch. unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale; @@ -1275,6 +1431,10 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { // Decrement the old entry, and remove it if refcount becomes 0. decrementCPEReferenceCount(CPI, CPEMI); + // No existing clone of this CPE is within range. + // We will be generating a new clone. Get a UID for it. + unsigned ID = createPICLabelUId(); + // Now that we have an island to add the CPE to, clone the original CPE and // add it to the island. U.HighWaterMark = NewIsland; @@ -1290,9 +1450,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { BBInfo[NewIsland->getNumber()].Size += Size; adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland))); - // No existing clone of this CPE is within range. - // We will be generating a new clone. Get a UID for it. - unsigned ID = createPICLabelUId(); + // Finally, change the CPI in the instruction operand to be ID. for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) @@ -1380,7 +1538,8 @@ unsigned PCAdj = 4; /// away to fit in its displacement field. bool MipsConstantIslands::fixupImmediateBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; - MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); + unsigned TargetOperand = branchTargetOperand(MI); + MachineBasicBlock *DestBB = MI->getOperand(TargetOperand).getMBB(); // Check to see if the DestBB is already in-range. if (isBBInRange(MI, DestBB, Br.MaxDisp)) @@ -1399,9 +1558,29 @@ bool MipsConstantIslands::fixupUnconditionalBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); // Use BL to implement far jump. - Br.MaxDisp = ((1 << 16)-1) * 2; - MI->setDesc(TII->get(Mips::BimmX16)); + unsigned BimmX16MaxDisp = ((1 << 16)-1) * 2; + if (isBBInRange(MI, DestBB, BimmX16MaxDisp)) { + Br.MaxDisp = BimmX16MaxDisp; + MI->setDesc(TII->get(Mips::BimmX16)); + } + else { + // need to give the math a more careful look here + // this is really a segment address and not + // a PC relative address. FIXME. But I think that + // just reducing the bits by 1 as I've done is correct. + // The basic block we are branching too much be longword aligned. + // we know that RA is saved because we always save it right now. + // this requirement will be relaxed later but we also have an alternate + // way to implement this that I will implement that does not need jal. + // We should have a way to back out this alignment restriction if we "can" later. + // but it is not harmful. + // + DestBB->setAlignment(2); + Br.MaxDisp = ((1<<24)-1) * 2; + MI->setDesc(TII->get(Mips::JalB16)); + } BBInfo[MBB->getNumber()].Size += 2; adjustBBOffsetsAfter(MBB); HasFarJump = true; @@ -1412,23 +1591,33 @@ MipsConstantIslands::fixupUnconditionalBr(ImmBranch &Br) { return true; } + /// fixupConditionalBr - Fix up a conditional branch whose destination is too /// far away to fit in its displacement field. It is converted to an inverse /// conditional branch + an unconditional branch to the destination. bool MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; - MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); + unsigned TargetOperand = branchTargetOperand(MI); + MachineBasicBlock *DestBB = MI->getOperand(TargetOperand).getMBB(); + unsigned Opcode = MI->getOpcode(); + unsigned LongFormOpcode = longformBranchOpcode(Opcode); + unsigned LongFormMaxOff = branchMaxOffsets(LongFormOpcode); + + // Check to see if the DestBB is already in-range. + if (isBBInRange(MI, DestBB, LongFormMaxOff)) { + Br.MaxDisp = LongFormMaxOff; + MI->setDesc(TII->get(LongFormOpcode)); + return true; + } // Add an unconditional branch to the destination and invert the branch // condition to jump over it: - // blt L1 + // bteqz L1 // => - // bge L2 + // bnez L2 // b L1 // L2: - unsigned CCReg = 0; // FIXME - unsigned CC=0; //FIXME // If the branch is at the end of its MBB and that has a fall-through block, // direct the updated conditional branch to the fall-through block. Otherwise, @@ -1436,29 +1625,34 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) { MachineBasicBlock *MBB = MI->getParent(); MachineInstr *BMI = &MBB->back(); bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); - + unsigned OppositeBranchOpcode = TII->getOppositeBranchOpc(Opcode); + ++NumCBrFixed; if (BMI != MI) { if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && - BMI->getOpcode() == Br.UncondBr) { + isUnconditionalBranch(BMI->getOpcode())) { // Last MI in the BB is an unconditional branch. Can we simply invert the // condition and swap destinations: - // beq L1 + // beqz L1 // b L2 // => - // bne L2 + // bnez L2 // b L1 - MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); + unsigned BMITargetOperand = branchTargetOperand(BMI); + MachineBasicBlock *NewDest = + BMI->getOperand(BMITargetOperand).getMBB(); if (isBBInRange(MI, NewDest, Br.MaxDisp)) { DEBUG(dbgs() << " Invert Bcc condition and swap its destination with " << *BMI); - BMI->getOperand(0).setMBB(DestBB); - MI->getOperand(0).setMBB(NewDest); + MI->setDesc(TII->get(OppositeBranchOpcode)); + BMI->getOperand(BMITargetOperand).setMBB(DestBB); + MI->getOperand(TargetOperand).setMBB(NewDest); return true; } } } + if (NeedSplit) { splitBlockBeforeInstr(MI); // No need for the branch to the next block. We're adding an unconditional @@ -1476,8 +1670,14 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) { // Insert a new conditional branch and a new unconditional branch. // Also update the ImmBranch as well as adding a new entry for the new branch. - BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode())) - .addMBB(NextBB).addImm(CC).addReg(CCReg); + if (MI->getNumExplicitOperands() == 2) { + BuildMI(MBB, DebugLoc(), TII->get(OppositeBranchOpcode)) + .addReg(MI->getOperand(0).getReg()) + .addMBB(NextBB); + } else { + BuildMI(MBB, DebugLoc(), TII->get(OppositeBranchOpcode)) + .addMBB(NextBB); + } Br.MI = &MBB->back(); BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB); @@ -1496,13 +1696,13 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) { void MipsConstantIslands::prescanForConstants() { unsigned J = 0; (void)J; - PrescannedForConstants = true; for (MachineFunction::iterator B = MF->begin(), E = MF->end(); B != E; ++B) { for (MachineBasicBlock::instr_iterator I = B->instr_begin(), EB = B->instr_end(); I != EB; ++I) { switch(I->getDesc().getOpcode()) { case Mips::LwConstant32: { + PrescannedForConstants = true; DEBUG(dbgs() << "constant island constant " << *I << "\n"); J = I->getNumOperands(); DEBUG(dbgs() << "num operands " << J << "\n"); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsISelLowering.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsISelLowering.cpp index 1e8250c847fe..9c74ae4ce2b1 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -392,6 +392,8 @@ MipsTargetLowering(MipsTargetMachine &TM) setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1); MaxStoresPerMemcpy = 16; + + isMicroMips = Subtarget->inMicroMipsMode(); } const MipsTargetLowering *MipsTargetLowering::create(MipsTargetMachine &TM) { @@ -535,19 +537,65 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, if (!FalseTy.isInteger()) return SDValue(); - ConstantSDNode *CN = dyn_cast(False); + ConstantSDNode *FalseC = dyn_cast(False); - if (!CN || CN->getZExtValue()) + // If the RHS (False) is 0, we swap the order of the operands + // of ISD::SELECT (obviously also inverting the condition) so that we can + // take advantage of conditional moves using the $0 register. + // Example: + // return (a != 0) ? x : 0; + // load $reg, x + // movz $reg, $0, a + if (!FalseC) return SDValue(); const SDLoc DL(N); - ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); + + if (!FalseC->getZExtValue()) { + ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); + SDValue True = N->getOperand(1); + + SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0), + SetCC.getOperand(1), ISD::getSetCCInverse(CC, true)); + + return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True); + } + + // If both operands are integer constants there's a possibility that we + // can do some interesting optimizations. SDValue True = N->getOperand(1); + ConstantSDNode *TrueC = dyn_cast(True); - SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0), - SetCC.getOperand(1), ISD::getSetCCInverse(CC, true)); + if (!TrueC || !True.getValueType().isInteger()) + return SDValue(); - return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True); + // We'll also ignore MVT::i64 operands as this optimizations proves + // to be ineffective because of the required sign extensions as the result + // of a SETCC operator is always MVT::i32 for non-vector types. + if (True.getValueType() == MVT::i64) + return SDValue(); + + int64_t Diff = TrueC->getSExtValue() - FalseC->getSExtValue(); + + // 1) (a < x) ? y : y-1 + // slti $reg1, a, x + // addiu $reg2, $reg1, y-1 + if (Diff == 1) + return DAG.getNode(ISD::ADD, DL, SetCC.getValueType(), SetCC, False); + + // 2) (a < x) ? y-1 : y + // slti $reg1, a, x + // xor $reg1, $reg1, 1 + // addiu $reg2, $reg1, y-1 + if (Diff == -1) { + ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); + SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0), + SetCC.getOperand(1), ISD::getSetCCInverse(CC, true)); + return DAG.getNode(ISD::ADD, DL, SetCC.getValueType(), SetCC, True); + } + + // Couldn't optimize. + return SDValue(); } static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, @@ -885,8 +933,8 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned LL, SC, AND, NOR, ZERO, BEQ; if (Size == 4) { - LL = Mips::LL; - SC = Mips::SC; + LL = isMicroMips ? Mips::LL_MM : Mips::LL; + SC = isMicroMips ? Mips::SC_MM : Mips::SC; AND = Mips::AND; NOR = Mips::NOR; ZERO = Mips::ZERO; @@ -1128,8 +1176,8 @@ MachineBasicBlock * MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI, unsigned LL, SC, ZERO, BNE, BEQ; if (Size == 4) { - LL = Mips::LL; - SC = Mips::SC; + LL = isMicroMips ? Mips::LL_MM : Mips::LL; + SC = isMicroMips ? Mips::SC_MM : Mips::SC; ZERO = Mips::ZERO; BNE = Mips::BNE; BEQ = Mips::BEQ; @@ -2285,7 +2333,7 @@ getOpndList(SmallVectorImpl &Ops, if (GlobalAddressSDNode *G = dyn_cast(CLI.Callee)) { llvm::StringRef Sym = G->getGlobal()->getName(); Function *F = G->getGlobal()->getParent()->getFunction(Sym); - if (F->hasFnAttribute("__Mips16RetHelper")) { + if (F && F->hasFnAttribute("__Mips16RetHelper")) { Mask = MipsRegisterInfo::getMips16RetHelperMask(); } } @@ -3216,7 +3264,7 @@ MipsTargetLowering::MipsCC::SpecialCallingConvType if (GlobalAddressSDNode *G = dyn_cast(Callee)) { llvm::StringRef Sym = G->getGlobal()->getName(); Function *F = G->getGlobal()->getParent()->getFunction(Sym); - if (F->hasFnAttribute("__Mips16RetHelper")) { + if (F && F->hasFnAttribute("__Mips16RetHelper")) { SpecialCallingConv = MipsCC::Mips16RetHelperConv; } } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsISelLowering.h b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsISelLowering.h index 65f68f04315d..2214fd18fe2d 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsISelLowering.h @@ -209,6 +209,7 @@ namespace llvm { class MipsFunctionInfo; class MipsTargetLowering : public TargetLowering { + bool isMicroMips; public: explicit MipsTargetLowering(MipsTargetMachine &TM); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsInstrFPU.td b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsInstrFPU.td index 9f7ce9aa72b0..98e9f792972f 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsInstrFPU.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsInstrFPU.td @@ -93,15 +93,16 @@ class ADDS_FT : InstSE<(outs RC:$fd), (ins RC:$fs, RC:$ft), !strconcat(opstr, "\t$fd, $fs, $ft"), - [(set RC:$fd, (OpNode RC:$fs, RC:$ft))], Itin, FrmFR> { + [(set RC:$fd, (OpNode RC:$fs, RC:$ft))], Itin, FrmFR, opstr> { let isCommutable = IsComm; } multiclass ADDS_M { - def _D32 : ADDS_FT, + def _D32 : MMRel, ADDS_FT, Requires<[NotFP64bit, HasStdEnc]>; - def _D64 : ADDS_FT, + def _D64 : ADDS_FT, Requires<[IsFP64bit, HasStdEnc]> { string DecoderNamespace = "Mips64"; } @@ -110,12 +111,12 @@ multiclass ADDS_M : InstSE<(outs DstRC:$fd), (ins SrcRC:$fs), !strconcat(opstr, "\t$fd, $fs"), - [(set DstRC:$fd, (OpNode SrcRC:$fs))], Itin, FrmFR>, + [(set DstRC:$fd, (OpNode SrcRC:$fs))], Itin, FrmFR, opstr>, NeverHasSideEffects; multiclass ABSS_M { - def _D32 : ABSS_FT, + def _D32 : MMRel, ABSS_FT, Requires<[NotFP64bit, HasStdEnc]>; def _D64 : ABSS_FT, Requires<[IsFP64bit, HasStdEnc]> { @@ -124,7 +125,7 @@ multiclass ABSS_M { - def _D32 : ABSS_FT, + def _D32 : MMRel, ABSS_FT, Requires<[NotFP64bit, HasStdEnc]>; def _D64 : ABSS_FT, Requires<[IsFP64bit, HasStdEnc]> { @@ -135,17 +136,17 @@ multiclass ROUND_M { class MFC1_FT : InstSE<(outs DstRC:$rt), (ins SrcRC:$fs), !strconcat(opstr, "\t$rt, $fs"), - [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR>; + [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR, opstr>; class MTC1_FT : InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"), - [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>; + [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR, opstr>; class LW_FT : InstSE<(outs RC:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"), - [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI> { + [(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI, opstr> { let DecoderMethod = "DecodeFMem"; let mayLoad = 1; } @@ -153,7 +154,7 @@ class LW_FT : InstSE<(outs), (ins RC:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"), - [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI> { + [(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI, opstr> { let DecoderMethod = "DecodeFMem"; let mayStore = 1; } @@ -162,20 +163,22 @@ class MADDS_FT : InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft), !strconcat(opstr, "\t$fd, $fr, $fs, $ft"), - [(set RC:$fd, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr))], Itin, FrmFR>; + [(set RC:$fd, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr))], Itin, + FrmFR, opstr>; class NMADDS_FT : InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft), !strconcat(opstr, "\t$fd, $fr, $fs, $ft"), [(set RC:$fd, (fsub fpimm0, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr)))], - Itin, FrmFR>; + Itin, FrmFR, opstr>; class LWXC1_FT : InstSE<(outs DRC:$fd), (ins PtrRC:$base, PtrRC:$index), !strconcat(opstr, "\t$fd, ${index}(${base})"), - [(set DRC:$fd, (OpNode (add iPTR:$base, iPTR:$index)))], Itin, FrmFI> { + [(set DRC:$fd, (OpNode (add iPTR:$base, iPTR:$index)))], Itin, + FrmFI, opstr> { let AddedComplexity = 20; } @@ -183,15 +186,17 @@ class SWXC1_FT : InstSE<(outs), (ins DRC:$fs, PtrRC:$base, PtrRC:$index), !strconcat(opstr, "\t$fs, ${index}(${base})"), - [(OpNode DRC:$fs, (add iPTR:$base, iPTR:$index))], Itin, FrmFI> { + [(OpNode DRC:$fs, (add iPTR:$base, iPTR:$index))], Itin, + FrmFI, opstr> { let AddedComplexity = 20; } -class BC1F_FT : - InstSE<(outs), (ins FCCRegsOpnd:$fcc, brtarget:$offset), + InstSE<(outs), (ins FCCRegsOpnd:$fcc, opnd:$offset), !strconcat(opstr, "\t$fcc, $offset"), - [(MipsFPBrcond Op, FCCRegsOpnd:$fcc, bb:$offset)], Itin, FrmFI> { + [(MipsFPBrcond Op, FCCRegsOpnd:$fcc, bb:$offset)], Itin, + FrmFI, opstr> { let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 1; @@ -202,7 +207,8 @@ class CEQS_FT : InstSE<(outs), (ins RC:$fs, RC:$ft, condcode:$cond), !strconcat("c.$cond.", typestr, "\t$fs, $ft"), - [(OpNode RC:$fs, RC:$ft, imm:$cond)], Itin, FrmFR> { + [(OpNode RC:$fs, RC:$ft, imm:$cond)], Itin, FrmFR, + !strconcat("c.$cond.", typestr)> { let Defs = [FCC0]; let isCodeGenOnly = 1; } @@ -240,15 +246,15 @@ defm D64 : C_COND_M<"d", FGR64Opnd, 17>, Requires<[IsFP64bit, HasStdEnc]>; //===----------------------------------------------------------------------===// // Floating Point Instructions //===----------------------------------------------------------------------===// -def ROUND_W_S : ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>, +def ROUND_W_S : MMRel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0xc, 16>; -def TRUNC_W_S : ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>, +def TRUNC_W_S : MMRel, ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0xd, 16>; -def CEIL_W_S : ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>, +def CEIL_W_S : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0xe, 16>; -def FLOOR_W_S : ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>, +def FLOOR_W_S : MMRel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0xf, 16>; -def CVT_W_S : ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>, +def CVT_W_S : MMRel, ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0x24, 16>; defm ROUND_W : ROUND_M<"round.w.d", IIFcvt>, ABSS_FM<0xc, 17>; @@ -276,19 +282,19 @@ let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { ABSS_FM<0xb, 17>; } -def CVT_S_W : ABSS_FT<"cvt.s.w", FGR32Opnd, FGR32Opnd, IIFcvt>, +def CVT_S_W : MMRel, ABSS_FT<"cvt.s.w", FGR32Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0x20, 20>; -def CVT_L_S : ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, IIFcvt>, +def CVT_L_S : MMRel, ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0x25, 16>; -def CVT_L_D64: ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, IIFcvt>, +def CVT_L_D64: MMRel, ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, IIFcvt>, ABSS_FM<0x25, 17>; let Predicates = [NotFP64bit, HasStdEnc] in { - def CVT_S_D32 : ABSS_FT<"cvt.s.d", FGR32Opnd, AFGR64Opnd, IIFcvt>, + def CVT_S_D32 : MMRel, ABSS_FT<"cvt.s.d", FGR32Opnd, AFGR64Opnd, IIFcvt>, ABSS_FM<0x20, 17>; - def CVT_D32_W : ABSS_FT<"cvt.d.w", AFGR64Opnd, FGR32Opnd, IIFcvt>, + def CVT_D32_W : MMRel, ABSS_FT<"cvt.d.w", AFGR64Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0x21, 20>; - def CVT_D32_S : ABSS_FT<"cvt.d.s", AFGR64Opnd, FGR32Opnd, IIFcvt>, + def CVT_D32_S : MMRel, ABSS_FT<"cvt.d.s", AFGR64Opnd, FGR32Opnd, IIFcvt>, ABSS_FM<0x21, 16>; } @@ -314,15 +320,15 @@ let isPseudo = 1, isCodeGenOnly = 1 in { } let Predicates = [NoNaNsFPMath, HasStdEnc] in { - def FABS_S : ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, IIFcvt, fabs>, + def FABS_S : MMRel, ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, IIFcvt, fabs>, ABSS_FM<0x5, 16>; - def FNEG_S : ABSS_FT<"neg.s", FGR32Opnd, FGR32Opnd, IIFcvt, fneg>, + def FNEG_S : MMRel, ABSS_FT<"neg.s", FGR32Opnd, FGR32Opnd, IIFcvt, fneg>, ABSS_FM<0x7, 16>; defm FABS : ABSS_M<"abs.d", IIFcvt, fabs>, ABSS_FM<0x5, 17>; defm FNEG : ABSS_M<"neg.d", IIFcvt, fneg>, ABSS_FM<0x7, 17>; } -def FSQRT_S : ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, IIFsqrtSingle, +def FSQRT_S : MMRel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, IIFsqrtSingle, fsqrt>, ABSS_FM<0x4, 16>; defm FSQRT : ABSS_M<"sqrt.d", IIFsqrtDouble, fsqrt>, ABSS_FM<0x4, 17>; @@ -332,24 +338,24 @@ defm FSQRT : ABSS_M<"sqrt.d", IIFsqrtDouble, fsqrt>, ABSS_FM<0x4, 17>; // regardless of register aliasing. /// Move Control Registers From/To CPU Registers -def CFC1 : MFC1_FT<"cfc1", GPR32Opnd, CCROpnd, IIFmove>, MFC1_FM<2>; -def CTC1 : MTC1_FT<"ctc1", CCROpnd, GPR32Opnd, IIFmove>, MFC1_FM<6>; -def MFC1 : MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, IIFmoveC1, bitconvert>, - MFC1_FM<0>; -def MTC1 : MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, IIFmoveC1, bitconvert>, - MFC1_FM<4>; -def MFHC1 : MFC1_FT<"mfhc1", GPR32Opnd, FGRH32Opnd, IIFmoveC1>, +def CFC1 : MMRel, MFC1_FT<"cfc1", GPR32Opnd, CCROpnd, IIFmove>, MFC1_FM<2>; +def CTC1 : MMRel, MTC1_FT<"ctc1", CCROpnd, GPR32Opnd, IIFmove>, MFC1_FM<6>; +def MFC1 : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, IIFmoveC1, + bitconvert>, MFC1_FM<0>; +def MTC1 : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, IIFmoveC1, + bitconvert>, MFC1_FM<4>; +def MFHC1 : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, FGRH32Opnd, IIFmoveC1>, MFC1_FM<3>; -def MTHC1 : MTC1_FT<"mthc1", FGRH32Opnd, GPR32Opnd, IIFmoveC1>, +def MTHC1 : MMRel, MTC1_FT<"mthc1", FGRH32Opnd, GPR32Opnd, IIFmoveC1>, MFC1_FM<7>; def DMFC1 : MFC1_FT<"dmfc1", GPR64Opnd, FGR64Opnd, IIFmoveC1, bitconvert>, MFC1_FM<1>; def DMTC1 : MTC1_FT<"dmtc1", FGR64Opnd, GPR64Opnd, IIFmoveC1, bitconvert>, MFC1_FM<5>; -def FMOV_S : ABSS_FT<"mov.s", FGR32Opnd, FGR32Opnd, IIFmove>, +def FMOV_S : MMRel, ABSS_FT<"mov.s", FGR32Opnd, FGR32Opnd, IIFmove>, ABSS_FM<0x6, 16>; -def FMOV_D32 : ABSS_FT<"mov.d", AFGR64Opnd, AFGR64Opnd, IIFmove>, +def FMOV_D32 : MMRel, ABSS_FT<"mov.d", AFGR64Opnd, AFGR64Opnd, IIFmove>, ABSS_FM<0x6, 17>, Requires<[NotFP64bit, HasStdEnc]>; def FMOV_D64 : ABSS_FT<"mov.d", FGR64Opnd, FGR64Opnd, IIFmove>, ABSS_FM<0x6, 17>, Requires<[IsFP64bit, HasStdEnc]> { @@ -358,8 +364,8 @@ def FMOV_D64 : ABSS_FT<"mov.d", FGR64Opnd, FGR64Opnd, IIFmove>, /// Floating Point Memory Instructions let Predicates = [HasStdEnc] in { - def LWC1 : LW_FT<"lwc1", FGR32Opnd, IIFLoad, load>, LW_FM<0x31>; - def SWC1 : SW_FT<"swc1", FGR32Opnd, IIFStore, store>, LW_FM<0x39>; + def LWC1 : MMRel, LW_FT<"lwc1", FGR32Opnd, IIFLoad, load>, LW_FM<0x31>; + def SWC1 : MMRel, SW_FT<"swc1", FGR32Opnd, IIFStore, store>, LW_FM<0x39>; } let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { @@ -368,8 +374,8 @@ let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { } let Predicates = [NotFP64bit, HasStdEnc] in { - def LDC1 : LW_FT<"ldc1", AFGR64Opnd, IIFLoad, load>, LW_FM<0x35>; - def SDC1 : SW_FT<"sdc1", AFGR64Opnd, IIFStore, store>, LW_FM<0x3d>; + def LDC1 : MMRel, LW_FT<"ldc1", AFGR64Opnd, IIFLoad, load>, LW_FM<0x35>; + def SDC1 : MMRel, SW_FT<"sdc1", AFGR64Opnd, IIFStore, store>, LW_FM<0x3d>; } /// Cop2 Memory Instructions @@ -382,11 +388,12 @@ let Predicates = [HasStdEnc] in { // Indexed loads and stores. let Predicates = [HasFPIdx, HasStdEnc] in { - def LWXC1 : LWXC1_FT<"lwxc1", FGR32Opnd, IIFLoad, load>, LWXC1_FM<0>; - def SWXC1 : SWXC1_FT<"swxc1", FGR32Opnd, IIFStore, store>, SWXC1_FM<8>; + def LWXC1 : MMRel, LWXC1_FT<"lwxc1", FGR32Opnd, IIFLoad, load>, LWXC1_FM<0>; + def SWXC1 : MMRel, SWXC1_FT<"swxc1", FGR32Opnd, IIFStore, store>, + SWXC1_FM<8>; } -let Predicates = [HasFPIdx, NotFP64bit, HasStdEnc] in { +let Predicates = [HasFPIdx, NotFP64bit, HasStdEnc, NotInMicroMips] in { def LDXC1 : LWXC1_FT<"ldxc1", AFGR64Opnd, IIFLoad, load>, LWXC1_FM<1>; def SDXC1 : SWXC1_FT<"sdxc1", AFGR64Opnd, IIFStore, store>, SWXC1_FM<9>; } @@ -399,8 +406,8 @@ let Predicates = [HasFPIdx, IsFP64bit, HasStdEnc], // Load/store doubleword indexed unaligned. let Predicates = [NotFP64bit, HasStdEnc] in { - def LUXC1 : LWXC1_FT<"luxc1", AFGR64Opnd, IIFLoad>, LWXC1_FM<0x5>; - def SUXC1 : SWXC1_FT<"suxc1", AFGR64Opnd, IIFStore>, SWXC1_FM<0xd>; + def LUXC1 : MMRel, LWXC1_FT<"luxc1", AFGR64Opnd, IIFLoad>, LWXC1_FM<0x5>; + def SUXC1 : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, IIFStore>, SWXC1_FM<0xd>; } let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace="Mips64" in { @@ -409,44 +416,44 @@ let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace="Mips64" in { } /// Floating-point Aritmetic -def FADD_S : ADDS_FT<"add.s", FGR32Opnd, IIFadd, 1, fadd>, +def FADD_S : MMRel, ADDS_FT<"add.s", FGR32Opnd, IIFadd, 1, fadd>, ADDS_FM<0x00, 16>; defm FADD : ADDS_M<"add.d", IIFadd, 1, fadd>, ADDS_FM<0x00, 17>; -def FDIV_S : ADDS_FT<"div.s", FGR32Opnd, IIFdivSingle, 0, fdiv>, +def FDIV_S : MMRel, ADDS_FT<"div.s", FGR32Opnd, IIFdivSingle, 0, fdiv>, ADDS_FM<0x03, 16>; defm FDIV : ADDS_M<"div.d", IIFdivDouble, 0, fdiv>, ADDS_FM<0x03, 17>; -def FMUL_S : ADDS_FT<"mul.s", FGR32Opnd, IIFmulSingle, 1, fmul>, +def FMUL_S : MMRel, ADDS_FT<"mul.s", FGR32Opnd, IIFmulSingle, 1, fmul>, ADDS_FM<0x02, 16>; defm FMUL : ADDS_M<"mul.d", IIFmulDouble, 1, fmul>, ADDS_FM<0x02, 17>; -def FSUB_S : ADDS_FT<"sub.s", FGR32Opnd, IIFadd, 0, fsub>, +def FSUB_S : MMRel, ADDS_FT<"sub.s", FGR32Opnd, IIFadd, 0, fsub>, ADDS_FM<0x01, 16>; defm FSUB : ADDS_M<"sub.d", IIFadd, 0, fsub>, ADDS_FM<0x01, 17>; let Predicates = [HasMips32r2, HasStdEnc] in { - def MADD_S : MADDS_FT<"madd.s", FGR32Opnd, IIFmulSingle, fadd>, + def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, IIFmulSingle, fadd>, MADDS_FM<4, 0>; - def MSUB_S : MADDS_FT<"msub.s", FGR32Opnd, IIFmulSingle, fsub>, + def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, IIFmulSingle, fsub>, MADDS_FM<5, 0>; } let Predicates = [HasMips32r2, NoNaNsFPMath, HasStdEnc] in { - def NMADD_S : NMADDS_FT<"nmadd.s", FGR32Opnd, IIFmulSingle, fadd>, + def NMADD_S : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, IIFmulSingle, fadd>, MADDS_FM<6, 0>; - def NMSUB_S : NMADDS_FT<"nmsub.s", FGR32Opnd, IIFmulSingle, fsub>, + def NMSUB_S : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, IIFmulSingle, fsub>, MADDS_FM<7, 0>; } let Predicates = [HasMips32r2, NotFP64bit, HasStdEnc] in { - def MADD_D32 : MADDS_FT<"madd.d", AFGR64Opnd, IIFmulDouble, fadd>, + def MADD_D32 : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, IIFmulDouble, fadd>, MADDS_FM<4, 1>; - def MSUB_D32 : MADDS_FT<"msub.d", AFGR64Opnd, IIFmulDouble, fsub>, + def MSUB_D32 : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, IIFmulDouble, fsub>, MADDS_FM<5, 1>; } let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath, HasStdEnc] in { - def NMADD_D32 : NMADDS_FT<"nmadd.d", AFGR64Opnd, IIFmulDouble, fadd>, + def NMADD_D32 : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, IIFmulDouble, fadd>, MADDS_FM<6, 1>; - def NMSUB_D32 : NMADDS_FT<"nmsub.d", AFGR64Opnd, IIFmulDouble, fsub>, + def NMSUB_D32 : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, IIFmulDouble, fsub>, MADDS_FM<7, 1>; } @@ -473,8 +480,10 @@ let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStdEnc], def MIPS_BRANCH_F : PatLeaf<(i32 0)>; def MIPS_BRANCH_T : PatLeaf<(i32 1)>; -def BC1F : BC1F_FT<"bc1f", IIBranch, MIPS_BRANCH_F>, BC1F_FM<0, 0>; -def BC1T : BC1F_FT<"bc1t", IIBranch, MIPS_BRANCH_T>, BC1F_FM<0, 1>; +def BC1F : MMRel, BC1F_FT<"bc1f", brtarget, IIBranch, MIPS_BRANCH_F>, + BC1F_FM<0, 0>; +def BC1T : MMRel, BC1F_FT<"bc1t", brtarget, IIBranch, MIPS_BRANCH_T>, + BC1F_FM<0, 1>; //===----------------------------------------------------------------------===// // Floating Point Flag Conditions @@ -499,8 +508,8 @@ def MIPS_FCOND_LE : PatLeaf<(i32 14)>; def MIPS_FCOND_NGT : PatLeaf<(i32 15)>; /// Floating Point Compare -def FCMP_S32 : CEQS_FT<"s", FGR32, IIFcmp, MipsFPCmp>, CEQS_FM<16>; -def FCMP_D32 : CEQS_FT<"d", AFGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>, +def FCMP_S32 : MMRel, CEQS_FT<"s", FGR32, IIFcmp, MipsFPCmp>, CEQS_FM<16>; +def FCMP_D32 : MMRel, CEQS_FT<"d", AFGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>, Requires<[NotFP64bit, HasStdEnc]>; let DecoderNamespace = "Mips64" in def FCMP_D64 : CEQS_FT<"d", FGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsInstrFormats.td b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsInstrFormats.td index 737a018c67af..8926264594ef 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsInstrFormats.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsInstrFormats.td @@ -401,7 +401,7 @@ class BGEZAL_FM funct> : StdArch { let Inst{15-0} = offset; } -class SYNC_FM { +class SYNC_FM : StdArch { bits<5> stype; bits<32> Inst; @@ -479,11 +479,21 @@ class TEQI_FM funct> : StdArch { let Inst{20-16} = funct; let Inst{15-0} = imm16; } + +class WAIT_FM : StdArch { + bits<32> Inst; + + let Inst{31-26} = 0x10; + let Inst{25} = 1; + let Inst{24-6} = 0; + let Inst{5-0} = 0x20; +} + //===----------------------------------------------------------------------===// // System calls format //===----------------------------------------------------------------------===// -class SYS_FM funct> +class SYS_FM funct> : StdArch { bits<20> code_; bits<32> Inst; @@ -496,7 +506,7 @@ class SYS_FM funct> // Break instruction format //===----------------------------------------------------------------------===// -class BRK_FM funct> +class BRK_FM funct> : StdArch { bits<10> code_1; bits<10> code_2; @@ -511,7 +521,7 @@ class BRK_FM funct> // Exception return format //===----------------------------------------------------------------------===// -class ER_FM funct> +class ER_FM funct> : StdArch { bits<32> Inst; let Inst{31-26} = 0x10; @@ -525,7 +535,7 @@ class ER_FM funct> // Enable/disable interrupt instruction format //===----------------------------------------------------------------------===// -class EI_FM sc> +class EI_FM sc> : StdArch { bits<32> Inst; bits<5> rt; @@ -569,7 +579,7 @@ class FFI op, dag outs, dag ins, string asmstr, list pattern>: let Inst{15-0} = imm16; } -class ADDS_FM funct, bits<5> fmt> { +class ADDS_FM funct, bits<5> fmt> : StdArch { bits<5> fd; bits<5> fs; bits<5> ft; @@ -584,7 +594,7 @@ class ADDS_FM funct, bits<5> fmt> { let Inst{5-0} = funct; } -class ABSS_FM funct, bits<5> fmt> { +class ABSS_FM funct, bits<5> fmt> : StdArch { bits<5> fd; bits<5> fs; @@ -598,7 +608,7 @@ class ABSS_FM funct, bits<5> fmt> { let Inst{5-0} = funct; } -class MFC1_FM funct> { +class MFC1_FM funct> : StdArch { bits<5> rt; bits<5> fs; @@ -623,7 +633,7 @@ class LW_FM op> : StdArch { let Inst{15-0} = addr{15-0}; } -class MADDS_FM funct, bits<3> fmt> { +class MADDS_FM funct, bits<3> fmt> : StdArch { bits<5> fd; bits<5> fr; bits<5> fs; @@ -640,7 +650,7 @@ class MADDS_FM funct, bits<3> fmt> { let Inst{2-0} = fmt; } -class LWXC1_FM funct> { +class LWXC1_FM funct> : StdArch { bits<5> fd; bits<5> base; bits<5> index; @@ -655,7 +665,7 @@ class LWXC1_FM funct> { let Inst{5-0} = funct; } -class SWXC1_FM funct> { +class SWXC1_FM funct> : StdArch { bits<5> fs; bits<5> base; bits<5> index; @@ -670,7 +680,7 @@ class SWXC1_FM funct> { let Inst{5-0} = funct; } -class BC1F_FM { +class BC1F_FM : StdArch { bits<3> fcc; bits<16> offset; @@ -684,7 +694,7 @@ class BC1F_FM { let Inst{15-0} = offset; } -class CEQS_FM fmt> { +class CEQS_FM fmt> : StdArch { bits<5> fs; bits<5> ft; bits<4> cond; @@ -704,7 +714,7 @@ class C_COND_FM fmt, bits<4> c> : CEQS_FM { let cond = c; } -class CMov_I_F_FM funct, bits<5> fmt> { +class CMov_I_F_FM funct, bits<5> fmt> : StdArch { bits<5> fd; bits<5> fs; bits<5> rt; @@ -736,7 +746,7 @@ class CMov_F_I_FM : StdArch { let Inst{5-0} = 1; } -class CMov_F_F_FM fmt, bit tf> { +class CMov_F_F_FM fmt, bit tf> : StdArch { bits<5> fd; bits<5> fs; bits<3> fcc; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsInstrInfo.td b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsInstrInfo.td index ebdbaa416fcc..65e2b7a6a05f 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsInstrInfo.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsInstrInfo.td @@ -302,8 +302,7 @@ def InvertedImOperand : Operand { let ParserMatchClass = MipsInvertedImmoperand; } -// Address operand -def mem : Operand { +class mem_generic : Operand { let PrintMethod = "printMemOperand"; let MIOperandInfo = (ops ptr_rc, simm16); let EncoderMethod = "getMemEncoding"; @@ -311,6 +310,14 @@ def mem : Operand { let OperandType = "OPERAND_MEMORY"; } +// Address operand +def mem : mem_generic; + +// MSA specific address operand +def mem_msa : mem_generic { + let EncoderMethod = "getMSAMemEncoding"; +} + def mem_ea : Operand { let PrintMethod = "printMemOperandEA"; let MIOperandInfo = (ops ptr_rc, simm16); @@ -611,6 +618,18 @@ let isCall=1, hasDelaySlot=1, Defs = [RA] in { } +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1, + hasExtraSrcRegAllocReq = 1, Defs = [AT] in { + class TailCall : + PseudoSE<(outs), (ins calltarget:$target), [], IIBranch>, + PseudoInstExpansion<(JumpInst jmptarget:$target)>; + + class TailCallReg : + PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], IIBranch>, + PseudoInstExpansion<(JRInst ResRO:$rs)>; +} + class BAL_BR_Pseudo : PseudoSE<(outs), (ins brtarget:$offset), [], IIBranch>, PseudoInstExpansion<(RealInst ZERO, brtarget:$offset)> { @@ -624,36 +643,32 @@ class BAL_BR_Pseudo : // Syscall class SYS_FT : InstSE<(outs), (ins uimm20:$code_), - !strconcat(opstr, "\t$code_"), [], NoItinerary, FrmI>; + !strconcat(opstr, "\t$code_"), [], NoItinerary, FrmI, opstr>; // Break class BRK_FT : InstSE<(outs), (ins uimm10:$code_1, uimm10:$code_2), - !strconcat(opstr, "\t$code_1, $code_2"), [], NoItinerary, FrmOther>; + !strconcat(opstr, "\t$code_1, $code_2"), [], NoItinerary, + FrmOther, opstr>; // (D)Eret class ER_FT : InstSE<(outs), (ins), - opstr, [], NoItinerary, FrmOther>; + opstr, [], NoItinerary, FrmOther, opstr>; // Interrupts class DEI_FT : InstSE<(outs RO:$rt), (ins), - !strconcat(opstr, "\t$rt"), [], NoItinerary, FrmOther>; + !strconcat(opstr, "\t$rt"), [], NoItinerary, FrmOther, opstr>; // Wait class WAIT_FT : - InstSE<(outs), (ins), opstr, [], NoItinerary, FrmOther> { - let Inst{31-26} = 0x10; - let Inst{25} = 1; - let Inst{24-6} = 0; - let Inst{5-0} = 0x20; -} + InstSE<(outs), (ins), opstr, [], NoItinerary, FrmOther, opstr>; // Sync let hasSideEffects = 1 in -class SYNC_FT : +class SYNC_FT : InstSE<(outs), (ins i32imm:$stype), "sync $stype", [(MipsSync imm:$stype)], - NoItinerary, FrmOther>; + NoItinerary, FrmOther, opstr>; let hasSideEffects = 1 in class TEQ_FT : @@ -732,7 +747,8 @@ class MoveToLOHI DefRegs>: class EffectiveAddress : InstSE<(outs RO:$rt), (ins mem_ea:$addr), !strconcat(opstr, "\t$rt, $addr"), - [(set RO:$rt, addr:$addr)], NoItinerary, FrmI> { + [(set RO:$rt, addr:$addr)], NoItinerary, FrmI, + !strconcat(opstr, "_lea")> { let isCodeGenOnly = 1; let DecoderMethod = "DecodeMem"; } @@ -965,7 +981,7 @@ def SWL : StoreLeftRight<"swl", MipsSWL, GPR32Opnd, IIStore>, LW_FM<0x2a>; def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd, IIStore>, LW_FM<0x2e>; } -def SYNC : SYNC_FT, SYNC_FM; +def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM; def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>; def TGE : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>; def TGEU : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>; @@ -980,21 +996,23 @@ def TLTI : MMRel, TEQI_FT<"tlti", GPR32Opnd>, TEQI_FM<0xa>; def TTLTIU : MMRel, TEQI_FT<"tltiu", GPR32Opnd>, TEQI_FM<0xb>; def TNEI : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM<0xe>; -def BREAK : BRK_FT<"break">, BRK_FM<0xd>; -def SYSCALL : SYS_FT<"syscall">, SYS_FM<0xc>; +def BREAK : MMRel, BRK_FT<"break">, BRK_FM<0xd>; +def SYSCALL : MMRel, SYS_FT<"syscall">, SYS_FM<0xc>; def TRAP : TrapBase; -def ERET : ER_FT<"eret">, ER_FM<0x18>; -def DERET : ER_FT<"deret">, ER_FM<0x1f>; +def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18>; +def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f>; -def EI : DEI_FT<"ei", GPR32Opnd>, EI_FM<1>; -def DI : DEI_FT<"di", GPR32Opnd>, EI_FM<0>; +def EI : MMRel, DEI_FT<"ei", GPR32Opnd>, EI_FM<1>; +def DI : MMRel, DEI_FT<"di", GPR32Opnd>, EI_FM<0>; -def WAIT : WAIT_FT<"wait">; +def WAIT : MMRel, WAIT_FT<"wait">, WAIT_FM; /// Load-linked, Store-conditional +let Predicates = [NotInMicroMips] in { def LL : LLBase<"ll", GPR32Opnd>, LW_FM<0x30>; def SC : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>; +} /// Jump and Branch Instructions def J : MMRel, JumpFJ, FJ<2>, @@ -1018,10 +1036,8 @@ def JALRPseudo : JumpLinkRegPseudo; def BGEZAL : MMRel, BGEZAL_FT<"bgezal", brtarget, GPR32Opnd>, BGEZAL_FM<0x11>; def BLTZAL : MMRel, BGEZAL_FT<"bltzal", brtarget, GPR32Opnd>, BGEZAL_FM<0x10>; def BAL_BR : BAL_BR_Pseudo; -def TAILCALL : MMRel, JumpFJ, - FJ<2>, IsTailCall; -def TAILCALL_R : MMRel, JumpFR<"tcallr", GPR32Opnd, MipsTailCall>, MTLO_FM<8>, - IsTailCall; +def TAILCALL : TailCall; +def TAILCALL_R : TailCallReg; def RET : MMRel, RetBase<"ret", GPR32Opnd>, MTLO_FM<8>; @@ -1079,7 +1095,7 @@ def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>; // instructions. The same not happens for stack address copies, so an // add op with mem ComplexPattern is used and the stack address copy // can be matched. It's similar to Sparc LEA_ADDRi -def LEA_ADDiu : EffectiveAddress<"addiu", GPR32Opnd>, LW_FM<9>; +def LEA_ADDiu : MMRel, EffectiveAddress<"addiu", GPR32Opnd>, LW_FM<9>; // MADD*/MSUB* def MADD : MMRel, MArithR<"madd", 1>, MULT_FM<0x1c, 0>; @@ -1419,3 +1435,4 @@ include "MipsMSAInstrInfo.td" // Micromips include "MicroMipsInstrFormats.td" include "MicroMipsInstrInfo.td" +include "MicroMipsInstrFPU.td" diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsMSAInstrFormats.td b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsMSAInstrFormats.td index 875dc0b4034d..27f0bde766c8 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsMSAInstrFormats.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsMSAInstrFormats.td @@ -23,7 +23,7 @@ class MSASpecial : MSAInst { let Inst{31-26} = 0b000000; } -class PseudoMSA pattern, +class MSAPseudo pattern, InstrItinClass itin = IIPseudo>: MipsPseudo { let Predicates = [HasMSA]; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsMSAInstrInfo.td index 82c51a6473da..59b771f70b26 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsMSAInstrInfo.td @@ -1261,20 +1261,22 @@ class MSA_COPY_DESC_BASE { +class MSA_ELM_SLD_DESC_BASE { dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWS:$ws, uimm4:$n); + dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, uimm4:$n); string AsmString = !strconcat(instr_asm, "\t$wd, $ws[$n]"); - list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt4:$n))]; + list Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in, ROWS:$ws, + immZExt4:$n))]; + string Constraints = "$wd = $wd_in"; InstrItinClass Itinerary = itin; } class MSA_COPY_PSEUDO_BASE : - MipsPseudo<(outs RCD:$wd), (ins RCWS:$ws, uimm4:$n), - [(set RCD:$wd, (OpNode (VecTy RCWS:$ws), immZExt4:$n))]> { + MSAPseudo<(outs RCD:$wd), (ins RCWS:$ws, uimm4:$n), + [(set RCD:$wd, (OpNode (VecTy RCWS:$ws), immZExt4:$n))]> { bit usesCustomInserter = 1; } @@ -1300,17 +1302,6 @@ class MSA_I8_DESC_BASE { - dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWS:$ws, uimm8:$u8); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8"); - list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt8:$u8))]; - InstrItinClass Itinerary = itin; -} - class MSA_I8_SHF_DESC_BASE { @@ -1355,8 +1346,8 @@ class MSA_2R_FILL_DESC_BASE : - MipsPseudo<(outs RCWD:$wd), (ins RCWS:$fs), - [(set RCWD:$wd, (OpNode RCWS:$fs))]> { + MSAPseudo<(outs RCWD:$wd), (ins RCWS:$fs), + [(set RCWD:$wd, (OpNode RCWS:$fs))]> { let usesCustomInserter = 1; } @@ -1421,10 +1412,12 @@ class MSA_3R_SLD_DESC_BASE { dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWS:$ws, GPR32:$rt); + dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, GPR32:$rt); string AsmString = !strconcat(instr_asm, "\t$wd, $ws[$rt]"); - list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, GPR32:$rt))]; + list Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in, ROWS:$ws, + GPR32:$rt))]; InstrItinClass Itinerary = itin; + string Constraints = "$wd = $wd_in"; } class MSA_3R_4R_DESC_BASE Pattern = [(set ROWD:$wd, - (OpNode ROWD:$wd_in, ROWS:$ws, ROWT:$wt))]; + list Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in, ROWS:$ws, + ROWT:$wt))]; InstrItinClass Itinerary = itin; string Constraints = "$wd = $wd_in"; } @@ -1479,8 +1472,8 @@ class MSA_INSERT_DESC_BASE : - MipsPseudo<(outs ROWD:$wd), (ins ROWD:$wd_in, uimm6:$n, ROFS:$fs), - [(set ROWD:$wd, (OpNode (Ty ROWD:$wd_in), ROFS:$fs, + MSAPseudo<(outs ROWD:$wd), (ins ROWD:$wd_in, uimm6:$n, ROFS:$fs), + [(set ROWD:$wd, (OpNode (Ty ROWD:$wd_in), ROFS:$fs, immZExt6:$n))]> { bit usesCustomInserter = 1; string Constraints = "$wd = $wd_in"; @@ -1525,8 +1518,8 @@ class MSA_ELM_SPLAT_DESC_BASE : - MipsPseudo<(outs ROWD:$wd), (ins ROWS:$ws, ROWT:$wt), - [(set ROWD:$wd, (OpNode ROWS:$ws, ROWT:$wt))]>; + MSAPseudo<(outs ROWD:$wd), (ins ROWS:$ws, ROWT:$wt), + [(set ROWD:$wd, (OpNode ROWS:$ws, ROWT:$wt))]>; class ADD_A_B_DESC : MSA_3R_DESC_BASE<"add_a.b", int_mips_add_a_b, MSA128BOpnd>, IsCommutable; @@ -1735,10 +1728,14 @@ class BNEG_H_DESC : MSA_3R_DESC_BASE<"bneg.h", vbneg_h, MSA128HOpnd>; class BNEG_W_DESC : MSA_3R_DESC_BASE<"bneg.w", vbneg_w, MSA128WOpnd>; class BNEG_D_DESC : MSA_3R_DESC_BASE<"bneg.d", vbneg_d, MSA128DOpnd>; -class BNEGI_B_DESC : MSA_BIT_B_DESC_BASE<"bnegi.b", xor, vsplat_uimm_pow2, MSA128BOpnd>; -class BNEGI_H_DESC : MSA_BIT_H_DESC_BASE<"bnegi.h", xor, vsplat_uimm_pow2, MSA128HOpnd>; -class BNEGI_W_DESC : MSA_BIT_W_DESC_BASE<"bnegi.w", xor, vsplat_uimm_pow2, MSA128WOpnd>; -class BNEGI_D_DESC : MSA_BIT_D_DESC_BASE<"bnegi.d", xor, vsplat_uimm_pow2, MSA128DOpnd>; +class BNEGI_B_DESC : MSA_BIT_B_DESC_BASE<"bnegi.b", xor, vsplat_uimm_pow2, + MSA128BOpnd>; +class BNEGI_H_DESC : MSA_BIT_H_DESC_BASE<"bnegi.h", xor, vsplat_uimm_pow2, + MSA128HOpnd>; +class BNEGI_W_DESC : MSA_BIT_W_DESC_BASE<"bnegi.w", xor, vsplat_uimm_pow2, + MSA128WOpnd>; +class BNEGI_D_DESC : MSA_BIT_D_DESC_BASE<"bnegi.d", xor, vsplat_uimm_pow2, + MSA128DOpnd>; class BNZ_B_DESC : MSA_CBRANCH_DESC_BASE<"bnz.b", MSA128BOpnd>; class BNZ_H_DESC : MSA_CBRANCH_DESC_BASE<"bnz.h", MSA128HOpnd>; @@ -2047,11 +2044,11 @@ class FEXP2_W_DESC : MSA_3RF_DESC_BASE<"fexp2.w", mul_fexp2, MSA128WOpnd>; class FEXP2_D_DESC : MSA_3RF_DESC_BASE<"fexp2.d", mul_fexp2, MSA128DOpnd>; let usesCustomInserter = 1 in { class FEXP2_W_1_PSEUDO_DESC : - MipsPseudo<(outs MSA128W:$wd), (ins MSA128W:$ws), - [(set MSA128W:$wd, (fexp2 MSA128W:$ws))]>; + MSAPseudo<(outs MSA128W:$wd), (ins MSA128W:$ws), + [(set MSA128W:$wd, (fexp2 MSA128W:$ws))]>; class FEXP2_D_1_PSEUDO_DESC : - MipsPseudo<(outs MSA128D:$wd), (ins MSA128D:$ws), - [(set MSA128D:$wd, (fexp2 MSA128D:$ws))]>; + MSAPseudo<(outs MSA128D:$wd), (ins MSA128D:$ws), + [(set MSA128D:$wd, (fexp2 MSA128D:$ws))]>; } class FEXUPL_W_DESC : MSA_2RF_DESC_BASE<"fexupl.w", int_mips_fexupl_w, @@ -2276,7 +2273,7 @@ class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", int_mips_insve_d, class LD_DESC_BASE { dag OutOperandList = (outs ROWD:$wd); dag InOperandList = (ins MemOpnd:$addr); @@ -2502,10 +2499,14 @@ class SLD_H_DESC : MSA_3R_SLD_DESC_BASE<"sld.h", int_mips_sld_h, MSA128HOpnd>; class SLD_W_DESC : MSA_3R_SLD_DESC_BASE<"sld.w", int_mips_sld_w, MSA128WOpnd>; class SLD_D_DESC : MSA_3R_SLD_DESC_BASE<"sld.d", int_mips_sld_d, MSA128DOpnd>; -class SLDI_B_DESC : MSA_ELM_DESC_BASE<"sldi.b", int_mips_sldi_b, MSA128BOpnd>; -class SLDI_H_DESC : MSA_ELM_DESC_BASE<"sldi.h", int_mips_sldi_h, MSA128HOpnd>; -class SLDI_W_DESC : MSA_ELM_DESC_BASE<"sldi.w", int_mips_sldi_w, MSA128WOpnd>; -class SLDI_D_DESC : MSA_ELM_DESC_BASE<"sldi.d", int_mips_sldi_d, MSA128DOpnd>; +class SLDI_B_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.b", int_mips_sldi_b, + MSA128BOpnd>; +class SLDI_H_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.h", int_mips_sldi_h, + MSA128HOpnd>; +class SLDI_W_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.w", int_mips_sldi_w, + MSA128WOpnd>; +class SLDI_D_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.d", int_mips_sldi_d, + MSA128DOpnd>; class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", shl, MSA128BOpnd>; class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128HOpnd>; @@ -2597,7 +2598,7 @@ class SRLRI_D_DESC : MSA_BIT_D_X_DESC_BASE<"srlri.d", int_mips_srlri_d, class ST_DESC_BASE { dag OutOperandList = (outs); dag InOperandList = (ins ROWD:$wd, MemOpnd:$addr); @@ -2810,8 +2811,8 @@ def BNZ_V : BNZ_V_ENC, BNZ_V_DESC; def BSEL_V : BSEL_V_ENC, BSEL_V_DESC; class MSA_BSEL_PSEUDO_BASE : - MipsPseudo<(outs RO:$wd), (ins RO:$wd_in, RO:$ws, RO:$wt), - [(set RO:$wd, (Ty (vselect RO:$wd_in, RO:$ws, RO:$wt)))]>, + MSAPseudo<(outs RO:$wd), (ins RO:$wd_in, RO:$ws, RO:$wt), + [(set RO:$wd, (Ty (vselect RO:$wd_in, RO:$ws, RO:$wt)))]>, PseudoInstExpansion<(BSEL_V MSA128BOpnd:$wd, MSA128BOpnd:$wd_in, MSA128BOpnd:$ws, MSA128BOpnd:$wt)> { let Constraints = "$wd_in = $wd"; @@ -3501,9 +3502,9 @@ def ST_FD : MSAPat<(store (v2f64 MSA128D:$ws), addrRegImm:$addr), class MSA_FABS_PSEUDO_DESC_BASE : - MipsPseudo<(outs ROWD:$wd), - (ins ROWS:$ws), - [(set ROWD:$wd, (fabs ROWS:$ws))]> { + MSAPseudo<(outs ROWD:$wd), + (ins ROWS:$ws), + [(set ROWD:$wd, (fabs ROWS:$ws))]> { InstrItinClass Itinerary = itin; } def FABS_W : MSA_FABS_PSEUDO_DESC_BASE, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp new file mode 100644 index 000000000000..8718e0470550 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp @@ -0,0 +1,297 @@ +//===--------- MipsOptimizePICCall.cpp - Optimize PIC Calls ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass eliminates unnecessary instructions that set up $gp and replace +// instructions that load target function addresses with copy instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "optimize-mips-pic-call" + +#include "Mips.h" +#include "MipsTargetMachine.h" +#include "MipsMachineFunction.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/ADT/ScopedHashTable.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +static cl::opt LoadTargetFromGOT("mips-load-target-from-got", + cl::init(true), + cl::desc("Load target address from GOT"), + cl::Hidden); + +static cl::opt EraseGPOpnd("mips-erase-gp-opnd", + cl::init(true), cl::desc("Erase GP Operand"), + cl::Hidden); + +namespace { +typedef std::pair CntRegP; +typedef RecyclingAllocator > +AllocatorTy; +typedef ScopedHashTable, + AllocatorTy> ScopedHTType; + +class MBBInfo { +public: + MBBInfo(MachineDomTreeNode *N); + const MachineDomTreeNode *getNode() const; + bool isVisited() const; + void preVisit(ScopedHTType &ScopedHT); + void postVisit(); + +private: + MachineDomTreeNode *Node; + ScopedHTType::ScopeTy *HTScope; +}; + +class OptimizePICCall : public MachineFunctionPass { +public: + OptimizePICCall(TargetMachine &tm) : MachineFunctionPass(ID) {} + + virtual const char *getPassName() const { return "Mips OptimizePICCall"; } + + bool runOnMachineFunction(MachineFunction &F); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + /// \brief Visit MBB. + bool visitNode(MBBInfo &MBBI); + + /// \brief Test if MI jumps to a function via a register. + /// + /// Also, return the virtual register containing the target function's address + /// and the underlying object in Reg and Val respectively, if the function's + /// address can be resolved lazily. + bool isCallViaRegister(MachineInstr &MI, unsigned &Reg, + const Value *&Val) const; + + /// \brief Return the number of instructions that dominate the current + /// instruction and load the function address from object Entry. + unsigned getCount(const Value *Entry); + + /// \brief Return the destination virtual register of the last instruction + /// that loads from object Entry. + unsigned getReg(const Value *Entry); + + /// \brief Update ScopedHT. + void incCntAndSetReg(const Value *Entry, unsigned Reg); + + ScopedHTType ScopedHT; + static char ID; +}; + +char OptimizePICCall::ID = 0; +} // end of anonymous namespace + +/// Return the first MachineOperand of MI if it is a used virtual register. +static MachineOperand *getCallTargetRegOpnd(MachineInstr &MI) { + if (MI.getNumOperands() == 0) + return 0; + + MachineOperand &MO = MI.getOperand(0); + + if (!MO.isReg() || !MO.isUse() || + !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + return 0; + + return &MO; +} + +/// Return type of register Reg. +static MVT::SimpleValueType getRegTy(unsigned Reg, MachineFunction &MF) { + const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); + assert(RC->vt_end() - RC->vt_begin() == 1); + return *RC->vt_begin(); +} + +/// Do the following transformation: +/// +/// jalr $vreg +/// => +/// copy $t9, $vreg +/// jalr $t9 +static void setCallTargetReg(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I) { + MachineFunction &MF = *MBB->getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + unsigned SrcReg = I->getOperand(0).getReg(); + unsigned DstReg = getRegTy(SrcReg, MF) == MVT::i32 ? Mips::T9 : Mips::T9_64; + BuildMI(*MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), DstReg) + .addReg(SrcReg); + I->getOperand(0).setReg(DstReg); +} + +/// Search MI's operands for register GP and erase it. +static void eraseGPOpnd(MachineInstr &MI) { + if (!EraseGPOpnd) + return; + + MachineFunction &MF = *MI.getParent()->getParent(); + MVT::SimpleValueType Ty = getRegTy(MI.getOperand(0).getReg(), MF); + unsigned Reg = Ty == MVT::i32 ? Mips::GP : Mips::GP_64; + + for (unsigned I = 0; I < MI.getNumOperands(); ++I) { + MachineOperand &MO = MI.getOperand(I); + if (MO.isReg() && MO.getReg() == Reg) { + MI.RemoveOperand(I); + return; + } + } + + llvm_unreachable(0); +} + +MBBInfo::MBBInfo(MachineDomTreeNode *N) : Node(N), HTScope(0) {} + +const MachineDomTreeNode *MBBInfo::getNode() const { return Node; } + +bool MBBInfo::isVisited() const { return HTScope; } + +void MBBInfo::preVisit(ScopedHTType &ScopedHT) { + HTScope = new ScopedHTType::ScopeTy(ScopedHT); +} + +void MBBInfo::postVisit() { + delete HTScope; +} + +// OptimizePICCall methods. +bool OptimizePICCall::runOnMachineFunction(MachineFunction &F) { + if (F.getTarget().getSubtarget().inMips16Mode()) + return false; + + // Do a pre-order traversal of the dominator tree. + MachineDominatorTree *MDT = &getAnalysis(); + bool Changed = false; + + SmallVector WorkList(1, MBBInfo(MDT->getRootNode())); + + while (!WorkList.empty()) { + MBBInfo &MBBI = WorkList.back(); + + // If this MBB has already been visited, destroy the scope for the MBB and + // pop it from the work list. + if (MBBI.isVisited()) { + MBBI.postVisit(); + WorkList.pop_back(); + continue; + } + + // Visit the MBB and add its children to the work list. + MBBI.preVisit(ScopedHT); + Changed |= visitNode(MBBI); + const MachineDomTreeNode *Node = MBBI.getNode(); + const std::vector &Children = Node->getChildren(); + WorkList.append(Children.begin(), Children.end()); + } + + return Changed; +} + +bool OptimizePICCall::visitNode(MBBInfo &MBBI) { + bool Changed = false; + MachineBasicBlock *MBB = MBBI.getNode()->getBlock(); + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) { + unsigned Reg; + const Value *Entry; + + // Skip instructions that are not call instructions via registers. + if (!isCallViaRegister(*I, Reg, Entry)) + continue; + + Changed = true; + unsigned N = getCount(Entry); + + if (N != 0) { + // If a function has been called more than twice, we do not have to emit a + // load instruction to get the function address from the GOT, but can + // instead reuse the address that has been loaded before. + if (N >= 2 && !LoadTargetFromGOT) + getCallTargetRegOpnd(*I)->setReg(getReg(Entry)); + + // Erase the $gp operand if this isn't the first time a function has + // been called. $gp needs to be set up only if the function call can go + // through a lazy binding stub. + eraseGPOpnd(*I); + } + + if (Entry) + incCntAndSetReg(Entry, Reg); + + setCallTargetReg(MBB, I); + } + + return Changed; +} + +bool OptimizePICCall::isCallViaRegister(MachineInstr &MI, unsigned &Reg, + const Value *&Val) const { + if (!MI.isCall()) + return false; + + MachineOperand *MO = getCallTargetRegOpnd(MI); + + // Return if MI is not a function call via a register. + if (!MO) + return false; + + // Get the instruction that loads the function address from the GOT. + Reg = MO->getReg(); + Val = 0; + MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + MachineInstr *DefMI = MRI.getVRegDef(Reg); + + assert(DefMI); + + // See if DefMI is an instruction that loads from a GOT entry that holds the + // address of a lazy binding stub. + if (!DefMI->mayLoad() || DefMI->getNumOperands() < 3) + return true; + + unsigned Flags = DefMI->getOperand(2).getTargetFlags(); + + if (Flags != MipsII::MO_GOT_CALL && Flags != MipsII::MO_CALL_LO16) + return true; + + // Return the underlying object for the GOT entry in Val. + assert(DefMI->hasOneMemOperand()); + Val = (*DefMI->memoperands_begin())->getValue(); + return true; +} + +unsigned OptimizePICCall::getCount(const Value *Entry) { + return ScopedHT.lookup(Entry).first; +} + +unsigned OptimizePICCall::getReg(const Value *Entry) { + unsigned Reg = ScopedHT.lookup(Entry).second; + assert(Reg); + return Reg; +} + +void OptimizePICCall::incCntAndSetReg(const Value *Entry, unsigned Reg) { + CntRegP P = ScopedHT.lookup(Entry); + ScopedHT.insert(Entry, std::make_pair(P.first + 1, Reg)); +} + +/// Return an OptimizeCall object. +FunctionPass *llvm::createMipsOptimizePICCallPass(MipsTargetMachine &TM) { + return new OptimizePICCall(TM); +} diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsRegisterInfo.cpp index 3105b0208451..65b1f8cf2d10 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsRegisterInfo.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/ValueTypes.h" #include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -183,6 +184,8 @@ getReservedRegs(const MachineFunction &MF) const { Reserved.set(Mips::RA_64); Reserved.set(Mips::T0); Reserved.set(Mips::T1); + if (MF.getFunction()->hasFnAttribute("saveS2")) + Reserved.set(Mips::S2); } // Reserve GP if small section is used. diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsSEISelLowering.cpp index 809adc03b151..cc6411fd885d 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1077,14 +1077,7 @@ getOpndList(SmallVectorImpl &Ops, std::deque< std::pair > &RegsToPass, bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { - // T9 should contain the address of the callee function if - // -reloction-model=pic or it is an indirect call. - if (IsPICCall || !GlobalOrExternal) { - unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; - RegsToPass.push_front(std::make_pair(T9Reg, Callee)); - } else - Ops.push_back(Callee); - + Ops.push_back(Callee); MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, InternalLinkage, CLI, Callee, Chain); } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp index 2d440840aaff..fcf6d0b06c78 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -62,21 +62,42 @@ MipsSERegisterInfo::intRegClass(unsigned Size) const { return &Mips::GPR64RegClass; } -/// Determine whether a given opcode is an MSA load/store (supporting 10-bit -/// offsets) or a non-MSA load/store (supporting 16-bit offsets). -static inline bool isMSALoadOrStore(const unsigned Opcode) { +/// Get the size of the offset supported by the given load/store. +/// The result includes the effects of any scale factors applied to the +/// instruction immediate. +static inline unsigned getLoadStoreOffsetSizeInBits(const unsigned Opcode) { switch (Opcode) { case Mips::LD_B: - case Mips::LD_H: - case Mips::LD_W: - case Mips::LD_D: case Mips::ST_B: + return 10; + case Mips::LD_H: case Mips::ST_H: + return 10 + 1 /* scale factor */; + case Mips::LD_W: case Mips::ST_W: + return 10 + 2 /* scale factor */; + case Mips::LD_D: case Mips::ST_D: - return true; + return 10 + 3 /* scale factor */; default: - return false; + return 16; + } +} + +/// Get the scale factor applied to the immediate in the given load/store. +static inline unsigned getLoadStoreOffsetAlign(const unsigned Opcode) { + switch (Opcode) { + case Mips::LD_H: + case Mips::ST_H: + return 2; + case Mips::LD_W: + case Mips::ST_W: + return 4; + case Mips::LD_D: + case Mips::ST_D: + return 8; + default: + return 1; } } @@ -131,13 +152,16 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, if (!MI.isDebugValue()) { // Make sure Offset fits within the field available. - // For MSA instructions, this is a 10-bit signed immediate, otherwise it is - // a 16-bit signed immediate. - unsigned OffsetBitSize = isMSALoadOrStore(MI.getOpcode()) ? 10 : 16; + // For MSA instructions, this is a 10-bit signed immediate (scaled by + // element size), otherwise it is a 16-bit signed immediate. + unsigned OffsetBitSize = getLoadStoreOffsetSizeInBits(MI.getOpcode()); + unsigned OffsetAlign = getLoadStoreOffsetAlign(MI.getOpcode()); - if (OffsetBitSize == 10 && !isInt<10>(Offset) && isInt<16>(Offset)) { - // If we have an offset that needs to fit into a signed 10-bit immediate - // and doesn't, but does fit into 16-bits then use an ADDiu + if (OffsetBitSize < 16 && isInt<16>(Offset) && + (!isIntN(OffsetBitSize, Offset) || + OffsetToAlignment(Offset, OffsetAlign) != 0)) { + // If we have an offset that needs to fit into a signed n-bit immediate + // (where n < 16) and doesn't, but does fit into 16-bits then use an ADDiu MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = II->getDebugLoc(); unsigned ADDiu = Subtarget.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsSubtarget.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsSubtarget.cpp index 0a81072b0858..5103084db5b3 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsSubtarget.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsSubtarget.cpp @@ -55,9 +55,9 @@ Mips16HardFloat("mips16-hard-float", cl::NotHidden, static cl::opt Mips16ConstantIslands( - "mips16-constant-islands", cl::Hidden, - cl::desc("MIPS: mips16 constant islands enable. experimental feature"), - cl::init(false)); + "mips16-constant-islands", cl::NotHidden, + cl::desc("MIPS: mips16 constant islands enable."), + cl::init(true)); void MipsSubtarget::anchor() { } @@ -81,6 +81,16 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, // Parse features string. ParseSubtargetFeatures(CPUName, FS); + if (InMips16Mode && !TM->Options.UseSoftFloat) { + // Hard float for mips16 means essentially to compile as soft float + // but to use a runtime library for soft float that is written with + // native mips32 floating point instructions (those runtime routines + // run in mips32 hard float mode). + TM->Options.UseSoftFloat = true; + TM->Options.FloatABIType = FloatABI::Soft; + InMips16HardFloat = true; + } + PreviousInMips16Mode = InMips16Mode; // Initialize scheduling itinerary for the specified CPU. diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsTargetMachine.cpp index 5046c1b782f6..47f8bad906dd 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsTargetMachine.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/MipsTargetMachine.cpp @@ -45,8 +45,36 @@ extern "C" void LLVMInitializeMipsTarget() { RegisterTargetMachine B(TheMips64elTarget); } -// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment -// The stack is always 8 byte aligned +static std::string computeDataLayout(const MipsSubtarget &ST) { + std::string Ret = ""; + + // There are both little and big endian mips. + if (ST.isLittle()) + Ret += "e"; + else + Ret += "E"; + + Ret += "-m:m"; + + // Pointers are 32 bit on some ABIs. + if (!ST.isABI_N64()) + Ret += "-p:32:32"; + + // 8 and 16 bit integers only need no have natural alignment, but try to + // align them to 32 bits. 64 bit integers have natural alignment. + Ret += "-i8:8:32-i16:16:32-i64:64"; + + // 32 bit registers are always available and the stack is at least 64 bit + // aligned. On N64 64 bit registers are also available and the stack is + // 128 bit aligned. + if (ST.isABI_N64() || ST.isABI_N32()) + Ret += "-n32:64-S128"; + else + Ret += "-n32-S64"; + + return Ret; +} + // On function prologue, the stack is created by decrementing // its pointer. Once decremented, all references are done with positive // offset from the stack/frame pointer, using StackGrowsUp enables @@ -60,15 +88,7 @@ MipsTargetMachine(const Target &T, StringRef TT, bool isLittle) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, isLittle, RM, this), - DL(isLittle ? - (Subtarget.isABI_N64() ? - "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-" - "n32:64-S128" : - "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64") : - (Subtarget.isABI_N64() ? - "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-" - "n32:64-S128" : - "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")), + DL(computeDataLayout(Subtarget)), InstrInfo(MipsInstrInfo::create(*this)), FrameLowering(MipsFrameLowering::create(*this, Subtarget)), TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), @@ -153,6 +173,7 @@ public: virtual void addIRPasses(); virtual bool addInstSelector(); + virtual void addMachineSSAOptimization(); virtual bool addPreEmitPass(); }; } // namespace @@ -182,6 +203,11 @@ bool MipsPassConfig::addInstSelector() { return false; } +void MipsPassConfig::addMachineSSAOptimization() { + addPass(createMipsOptimizePICCallPass(getMipsTargetMachine())); + TargetPassConfig::addMachineSSAOptimization(); +} + void MipsTargetMachine::addAnalysisPasses(PassManagerBase &PM) { if (Subtarget.allowMixed16_32()) { DEBUG(errs() << "No "); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/TargetInfo/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/Mips/TargetInfo/CMakeLists.txt index 4172d00a33f0..3347a99a4321 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/TargetInfo/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/TargetInfo/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMMipsInfo MipsTargetInfo.cpp ) - -add_dependencies(LLVMMipsInfo MipsCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Mips/TargetInfo/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/Mips/TargetInfo/LLVMBuild.txt index 2d425686227f..6235bfc3fd18 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Mips/TargetInfo/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/Mips/TargetInfo/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = MipsInfo parent = Mips -required_libraries = MC Support Target +required_libraries = Support add_to_library_groups = Mips diff --git a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/CMakeLists.txt index 4f1324c6d5a5..8d25077a87e7 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/CMakeLists.txt @@ -30,8 +30,6 @@ set(NVPTXCodeGen_sources add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources}) -add_dependencies(LLVMNVPTXCodeGen NVPTXCommonTableGen intrinsics_gen) - add_subdirectory(TargetInfo) add_subdirectory(InstPrinter) add_subdirectory(MCTargetDesc) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/InstPrinter/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/InstPrinter/CMakeLists.txt index ae4c75119687..bb6c8abc961f 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/InstPrinter/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/InstPrinter/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMNVPTXAsmPrinter NVPTXInstPrinter.cpp ) - -add_dependencies(LLVMNVPTXAsmPrinter NVPTXCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/LLVMBuild.txt index e2d6ed2b89ed..e805aba58aab 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/LLVMBuild.txt @@ -28,5 +28,5 @@ has_asmprinter = 1 type = Library name = NVPTXCodeGen parent = NVPTX -required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXDesc NVPTXInfo SelectionDAG Support Target TransformUtils +required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXAsmPrinter NVPTXDesc NVPTXInfo SelectionDAG Support Target add_to_library_groups = NVPTX diff --git a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt index a030d9f8f1f1..dbbf23554aa9 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt @@ -2,8 +2,3 @@ add_llvm_library(LLVMNVPTXDesc NVPTXMCAsmInfo.cpp NVPTXMCTargetDesc.cpp ) - -add_dependencies(LLVMNVPTXDesc NVPTXCommonTableGen) - -# Hack: we need to include 'main' target directory to grab private headers -#include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index f2784b836b70..366341afe1b8 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -33,8 +33,6 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const StringRef &TT) { CommentString = "//"; - PrivateGlobalPrefix = "$L__"; - HasSetDirective = false; HasSingleParameterDotFile = false; @@ -49,7 +47,6 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const StringRef &TT) { Data16bitsDirective = " .b16 "; Data32bitsDirective = " .b32 "; Data64bitsDirective = " .b64 "; - PrivateGlobalPrefix = ""; ZeroDirective = " .b8"; AsciiDirective = " .b8"; AscizDirective = " .b8"; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTX.td b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTX.td index 6183a755c320..d78b4e81a3e5 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTX.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTX.td @@ -57,12 +57,6 @@ def : Proc<"sm_35", [SM35]>; def NVPTXInstrInfo : InstrInfo { } -def NVPTXAsmWriter : AsmWriter { - bit isMCAsmWriter = 1; - string AsmWriterClassName = "InstPrinter"; -} - def NVPTX : Target { let InstructionSet = NVPTXInstrInfo; - let AssemblyWriters = [NVPTXAsmWriter]; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h index 19d73c5783cb..d3305701a5b7 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h @@ -32,6 +32,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); + AU.addPreserved("stack-protector"); AU.addPreserved(); } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 7552fe704115..0a62b0855c21 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -430,7 +430,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { O << " ("; if (isABI) { - if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { + if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) { unsigned size = 0; if (const IntegerType *ITy = dyn_cast(Ty)) { size = ITy->getBitWidth(); @@ -895,7 +895,7 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) { const_cast(getObjFileLowering()) .Initialize(OutContext, TM); - Mang = new Mangler(&TM); + Mang = new Mangler(TM.getDataLayout()); // Emit header before any dwarf directives are emitted below. emitHeader(M, OS1); @@ -1207,7 +1207,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, else O << " .align " << GVar->getAlignment(); - if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa(ETy)) { + if (ETy->isSingleValueType()) { O << " ."; // Special case: ABI requires that we use .u8 for predicates if (ETy->isIntegerTy(1)) @@ -1378,7 +1378,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, else O << " .align " << GVar->getAlignment(); - if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa(ETy)) { + if (ETy->isSingleValueType()) { O << " ."; O << getPTXFundamentalTypeStr(ETy); O << " "; @@ -1410,7 +1410,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, } static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { - if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa(Ty)) + if (Ty->isSingleValueType()) return TD->getPrefTypeAlignment(Ty); const ArrayType *ATy = dyn_cast(Ty); @@ -1580,7 +1580,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { continue; } // Non-kernel function, just print .param .b for ABI - // and .reg .b for non ABY + // and .reg .b for non-ABI unsigned sz = 0; if (isa(Ty)) { sz = cast(Ty)->getBitWidth(); @@ -2087,21 +2087,6 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, O << *getSymbol(MO.getGlobal()); break; - case MachineOperand::MO_ExternalSymbol: { - const char *symbname = MO.getSymbolName(); - if (strstr(symbname, ".PARAM") == symbname) { - unsigned index; - sscanf(symbname + 6, "%u[];", &index); - printParamName(index, O); - } else if (strstr(symbname, ".HLPPARAM") == symbname) { - unsigned index; - sscanf(symbname + 9, "%u[];", &index); - O << *CurrentFnSym << "_param_" << index << "_offset"; - } else - O << symbname; - break; - } - case MachineOperand::MO_MachineBasicBlock: O << *MO.getMBB()->getSymbol(); return; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h index 3abe5d166826..04183dff5abb 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -204,8 +204,6 @@ private: void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier = 0); void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const; - // definition autogenerated. - void printInstruction(const MachineInstr *MI, raw_ostream &O); void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O, bool = false); void printParamName(int paramIndex, raw_ostream &O); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 4b8b306a705a..fc0c84d205bd 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -2014,7 +2014,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) { VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue); } else { EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue }; - VTs = CurDAG->getVTList(&EVTs[0], 5); + VTs = CurDAG->getVTList(&EVTs[0], array_lengthof(EVTs)); } unsigned OffsetVal = cast(Offset)->getZExtValue(); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 6a8be753c87c..d8151761e057 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -361,7 +361,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, O << "()"; } else { O << "("; - if (retTy->isPrimitiveType() || retTy->isIntegerTy()) { + if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) { unsigned size = 0; if (const IntegerType *ITy = dyn_cast(retTy)) { size = ITy->getBitWidth(); @@ -856,8 +856,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // .param .align 16 .b8 retval0[], or // .param .b retval0 unsigned resultsz = TD->getTypeAllocSizeInBits(retTy); - if (retTy->isPrimitiveType() || retTy->isIntegerTy() || - retTy->isPointerTy()) { + if (retTy->isSingleValueType()) { // Scalar needs to be at least 32bit wide if (resultsz < 32) resultsz = 32; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h index 286e753fa92b..780ed4974b2f 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h @@ -29,6 +29,7 @@ struct NVPTXLowerAggrCopies : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); + AU.addPreserved("stack-protector"); AU.addPreserved(); } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXSplitBBatBar.h b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXSplitBBatBar.h index bdafba9075a0..03391feb3b4e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXSplitBBatBar.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXSplitBBatBar.h @@ -26,6 +26,7 @@ struct NVPTXSplitBBatBar : public FunctionPass { NVPTXSplitBBatBar() : FunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved("stack-protector"); AU.addPreserved(); } virtual bool runOnFunction(Function &F); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index 004be116a96c..f99bebd6ce12 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -74,21 +74,6 @@ public: unsigned getPTXVersion() const { return PTXVersion; } void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - - std::string getDataLayout() const { - const char *p; - if (is64Bit()) - p = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-" - "f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-" - "n16:32:64"; - else - p = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-" - "f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-" - "n16:32:64"; - - return std::string(p); - } - }; } // End llvm namespace diff --git a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 46edd6d83f65..2c93abec6e14 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -63,12 +63,23 @@ extern "C" void LLVMInitializeNVPTXTarget() { initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); } +static std::string computeDataLayout(const NVPTXSubtarget &ST) { + std::string Ret = "e"; + + if (!ST.is64Bit()) + Ret += "-p:32:32"; + + Ret += "-i64:64-v16:16-v32:32-n16:32:64"; + + return Ret; +} + NVPTXTargetMachine::NVPTXTargetMachine( const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64bit) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()), + Subtarget(TT, CPU, FS, is64bit), DL(computeDataLayout(Subtarget)), InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering( *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/TargetInfo/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/TargetInfo/CMakeLists.txt index 0bf13346d2ef..1beb40ea58e4 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/TargetInfo/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/TargetInfo/CMakeLists.txt @@ -1,7 +1,3 @@ -#include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMNVPTXInfo NVPTXTargetInfo.cpp ) - -add_dependencies(LLVMNVPTXInfo NVPTXCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/TargetInfo/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/TargetInfo/LLVMBuild.txt index ef12b0e64700..af7ec27278bc 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/TargetInfo/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/NVPTX/TargetInfo/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = NVPTXInfo parent = NVPTX -required_libraries = MC Support Target +required_libraries = Support add_to_library_groups = NVPTX diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/AsmParser/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/AsmParser/CMakeLists.txt index 3aa59c00c369..408858e424d5 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/AsmParser/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/AsmParser/CMakeLists.txt @@ -1,8 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMPowerPCAsmParser PPCAsmParser.cpp ) - -add_dependencies(LLVMPowerPCAsmParser PowerPCCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index fe83fe1438ce..b297cd89cb90 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -177,6 +177,7 @@ class PPCAsmParser : public MCTargetAsmParser { MCAsmParser &Parser; const MCInstrInfo &MII; bool IsPPC64; + bool IsDarwin; MCAsmParser &getParser() const { return Parser; } MCAsmLexer &getLexer() const { return Parser.getLexer(); } @@ -185,6 +186,7 @@ class PPCAsmParser : public MCTargetAsmParser { bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } bool isPPC64() const { return IsPPC64; } + bool isDarwin() const { return IsDarwin; } bool MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal); @@ -195,12 +197,14 @@ class PPCAsmParser : public MCTargetAsmParser { PPCMCExpr::VariantKind &Variant); const MCExpr *FixupVariantKind(const MCExpr *E); bool ParseExpression(const MCExpr *&EVal); + bool ParseDarwinExpression(const MCExpr *&EVal); bool ParseOperand(SmallVectorImpl &Operands); bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveTC(unsigned Size, SMLoc L); bool ParseDirectiveMachine(SMLoc L); + bool ParseDarwinDirectiveMachine(SMLoc L); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl &Operands, @@ -227,6 +231,7 @@ public: Triple TheTriple(STI.getTargetTriple()); IsPPC64 = (TheTriple.getArch() == Triple::ppc64 || TheTriple.getArch() == Triple::ppc64le); + IsDarwin = TheTriple.isMacOSX(); // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } @@ -1081,10 +1086,16 @@ FixupVariantKind(const MCExpr *E) { llvm_unreachable("Invalid expression kind!"); } -/// Parse an expression. This differs from the default "parseExpression" -/// in that it handles complex \code @l/@ha \endcode modifiers. +/// ParseExpression. This differs from the default "parseExpression" in that +/// it handles modifiers. bool PPCAsmParser:: ParseExpression(const MCExpr *&EVal) { + + if (isDarwin()) + return ParseDarwinExpression(EVal); + + // (ELF Platforms) + // Handle \code @l/@ha \endcode if (getParser().parseExpression(EVal)) return true; @@ -1098,6 +1109,55 @@ ParseExpression(const MCExpr *&EVal) { return false; } +/// ParseDarwinExpression. (MachO Platforms) +/// This differs from the default "parseExpression" in that it handles detection +/// of the \code hi16(), ha16() and lo16() \endcode modifiers. At present, +/// parseExpression() doesn't recognise the modifiers when in the Darwin/MachO +/// syntax form so it is done here. TODO: Determine if there is merit in arranging +/// for this to be done at a higher level. +bool PPCAsmParser:: +ParseDarwinExpression(const MCExpr *&EVal) { + PPCMCExpr::VariantKind Variant = PPCMCExpr::VK_PPC_None; + switch (getLexer().getKind()) { + default: + break; + case AsmToken::Identifier: + // Compiler-generated Darwin identifiers begin with L,l,_ or "; thus + // something starting with any other char should be part of the + // asm syntax. If handwritten asm includes an identifier like lo16, + // then all bets are off - but no-one would do that, right? + StringRef poss = Parser.getTok().getString(); + if (poss.equals_lower("lo16")) { + Variant = PPCMCExpr::VK_PPC_LO; + } else if (poss.equals_lower("hi16")) { + Variant = PPCMCExpr::VK_PPC_HI; + } else if (poss.equals_lower("ha16")) { + Variant = PPCMCExpr::VK_PPC_HA; + } + if (Variant != PPCMCExpr::VK_PPC_None) { + Parser.Lex(); // Eat the xx16 + if (getLexer().isNot(AsmToken::LParen)) + return Error(Parser.getTok().getLoc(), "expected '('"); + Parser.Lex(); // Eat the '(' + } + break; + } + + if (getParser().parseExpression(EVal)) + return true; + + if (Variant != PPCMCExpr::VK_PPC_None) { + if (getLexer().isNot(AsmToken::RParen)) + return Error(Parser.getTok().getLoc(), "expected ')'"); + Parser.Lex(); // Eat the ')' + EVal = PPCMCExpr::Create(Variant, EVal, false, getParser().getContext()); + } + return false; +} + +/// ParseOperand +/// This handles registers in the form 'NN', '%rNN' for ELF platforms and +/// rNN for MachO. bool PPCAsmParser:: ParseOperand(SmallVectorImpl &Operands) { SMLoc S = Parser.getTok().getLoc(); @@ -1121,12 +1181,27 @@ ParseOperand(SmallVectorImpl &Operands) { } return Error(S, "invalid register name"); + case AsmToken::Identifier: + // Note that non-register-name identifiers from the compiler will begin + // with '_', 'L'/'l' or '"'. Of course, handwritten asm could include + // identifiers like r31foo - so we fall through in the event that parsing + // a register name fails. + if (isDarwin()) { + unsigned RegNo; + int64_t IntVal; + if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) { + Parser.Lex(); // Eat the identifier token. + Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64()); + Operands.push_back(Op); + return false; + } + } + // Fall-through to process non-register-name identifiers as expression. // All other expressions case AsmToken::LParen: case AsmToken::Plus: case AsmToken::Minus: case AsmToken::Integer: - case AsmToken::Identifier: case AsmToken::Dot: case AsmToken::Dollar: if (!ParseExpression(EVal)) @@ -1177,11 +1252,25 @@ ParseOperand(SmallVectorImpl &Operands) { break; case AsmToken::Integer: - if (getParser().parseAbsoluteExpression(IntVal) || + if (!isDarwin()) { + if (getParser().parseAbsoluteExpression(IntVal) || IntVal < 0 || IntVal > 31) return Error(S, "invalid register number"); + } else { + return Error(S, "unexpected integer value"); + } break; + case AsmToken::Identifier: + if (isDarwin()) { + unsigned RegNo; + if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) { + Parser.Lex(); // Eat the identifier token. + break; + } + } + // Fall-through.. + default: return Error(S, "invalid memory operand"); } @@ -1261,14 +1350,19 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, /// ParseDirective parses the PPC specific directives bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); - if (IDVal == ".word") - return ParseDirectiveWord(2, DirectiveID.getLoc()); - if (IDVal == ".llong") - return ParseDirectiveWord(8, DirectiveID.getLoc()); - if (IDVal == ".tc") - return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc()); - if (IDVal == ".machine") - return ParseDirectiveMachine(DirectiveID.getLoc()); + if (!isDarwin()) { + if (IDVal == ".word") + return ParseDirectiveWord(2, DirectiveID.getLoc()); + if (IDVal == ".llong") + return ParseDirectiveWord(8, DirectiveID.getLoc()); + if (IDVal == ".tc") + return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc()); + if (IDVal == ".machine") + return ParseDirectiveMachine(DirectiveID.getLoc()); + } else { + if (IDVal == ".machine") + return ParseDarwinDirectiveMachine(DirectiveID.getLoc()); + } return true; } @@ -1314,7 +1408,7 @@ bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) { return ParseDirectiveWord(Size, L); } -/// ParseDirectiveMachine +/// ParseDirectiveMachine (ELF platforms) /// ::= .machine [ cpu | "push" | "pop" ] bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) { if (getLexer().isNot(AsmToken::Identifier) && @@ -1338,6 +1432,33 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) { return false; } +/// ParseDarwinDirectiveMachine (Mach-o platforms) +/// ::= .machine cpu-identifier +bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) { + if (getLexer().isNot(AsmToken::Identifier) && + getLexer().isNot(AsmToken::String)) + return Error(L, "unexpected token in directive"); + + StringRef CPU = Parser.getTok().getIdentifier(); + Parser.Lex(); + + // FIXME: this is only the 'default' set of cpu variants. + // However we don't act on this information at present, this is simply + // allowing parsing to proceed with minimal sanity checking. + if (CPU != "ppc7400" && CPU != "ppc" && CPU != "ppc64") + return Error(L, "unrecognized cpu type"); + + if (isPPC64() && (CPU == "ppc7400" || CPU == "ppc")) + return Error(L, "wrong cpu type specified for 64bit"); + if (!isPPC64() && CPU == "ppc64") + return Error(L, "wrong cpu type specified for 32bit"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(L, "unexpected token in directive"); + + return false; +} + /// Force static initialization. extern "C" void LLVMInitializePowerPCAsmParser() { RegisterMCAsmParser A(ThePPC32Target); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/CMakeLists.txt index 9a763f53a2d1..ea4de63a2448 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_TARGET_DEFINITIONS PPC.td) tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM PPCGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM PPCGenCodeEmitter.inc -gen-emitter) +tablegen(LLVM PPCGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info) tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info) @@ -34,9 +35,8 @@ add_llvm_target(PowerPCCodeGen PPCSelectionDAGInfo.cpp ) -add_dependencies(LLVMPowerPCCodeGen PowerPCCommonTableGen intrinsics_gen) - add_subdirectory(AsmParser) +add_subdirectory(Disassembler) add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/Disassembler/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/Disassembler/CMakeLists.txt new file mode 100644 index 000000000000..ca457df88d3e --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/Disassembler/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_library(LLVMPowerPCDisassembler + PPCDisassembler.cpp + ) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/Disassembler/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/Disassembler/LLVMBuild.txt new file mode 100644 index 000000000000..7f29040eb6db --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/Disassembler/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===-- ./lib/Target/PowerPC/Disassembler/LLVMBuild.txt ---------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = PowerPCDisassembler +parent = PowerPC +required_libraries = MC Support PowerPCDesc PowerPCInfo +add_to_library_groups = PowerPC diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/Disassembler/Makefile b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/Disassembler/Makefile new file mode 100644 index 000000000000..86e3b4752207 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/Disassembler/Makefile @@ -0,0 +1,16 @@ +##===-- lib/Target/PowerPC/Disassembler/Makefile -----------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMPowerPCDisassembler + +# Hack: we need to include 'main' PPC target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp new file mode 100644 index 000000000000..1dab14b2ca86 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -0,0 +1,293 @@ +//===------ PPCDisassembler.cpp - Disassembler for PowerPC ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCFixedLenDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +typedef MCDisassembler::DecodeStatus DecodeStatus; + +namespace { +class PPCDisassembler : public MCDisassembler { +public: + PPCDisassembler(const MCSubtargetInfo &STI) + : MCDisassembler(STI) {} + virtual ~PPCDisassembler() {} + + // Override MCDisassembler. + virtual DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const LLVM_OVERRIDE; +}; +} // end anonymous namespace + +static MCDisassembler *createPPCDisassembler(const Target &T, + const MCSubtargetInfo &STI) { + return new PPCDisassembler(STI); +} + +extern "C" void LLVMInitializePowerPCDisassembler() { + // Register the disassembler for each target. + TargetRegistry::RegisterMCDisassembler(ThePPC32Target, + createPPCDisassembler); + TargetRegistry::RegisterMCDisassembler(ThePPC64Target, + createPPCDisassembler); + TargetRegistry::RegisterMCDisassembler(ThePPC64LETarget, + createPPCDisassembler); +} + +// FIXME: These can be generated by TableGen from the existing register +// encoding values! + +static const unsigned CRRegs[] = { + PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, + PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7 +}; + +static const unsigned CRBITRegs[] = { + PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, + PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, + PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, + PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, + PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, + PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, + PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, + PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN +}; + +static const unsigned FRegs[] = { + PPC::F0, PPC::F1, PPC::F2, PPC::F3, + PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8, PPC::F9, PPC::F10, PPC::F11, + PPC::F12, PPC::F13, PPC::F14, PPC::F15, + PPC::F16, PPC::F17, PPC::F18, PPC::F19, + PPC::F20, PPC::F21, PPC::F22, PPC::F23, + PPC::F24, PPC::F25, PPC::F26, PPC::F27, + PPC::F28, PPC::F29, PPC::F30, PPC::F31 +}; + +static const unsigned VRegs[] = { + PPC::V0, PPC::V1, PPC::V2, PPC::V3, + PPC::V4, PPC::V5, PPC::V6, PPC::V7, + PPC::V8, PPC::V9, PPC::V10, PPC::V11, + PPC::V12, PPC::V13, PPC::V14, PPC::V15, + PPC::V16, PPC::V17, PPC::V18, PPC::V19, + PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, + PPC::V28, PPC::V29, PPC::V30, PPC::V31 +}; + +static const unsigned GPRegs[] = { + PPC::R0, PPC::R1, PPC::R2, PPC::R3, + PPC::R4, PPC::R5, PPC::R6, PPC::R7, + PPC::R8, PPC::R9, PPC::R10, PPC::R11, + PPC::R12, PPC::R13, PPC::R14, PPC::R15, + PPC::R16, PPC::R17, PPC::R18, PPC::R19, + PPC::R20, PPC::R21, PPC::R22, PPC::R23, + PPC::R24, PPC::R25, PPC::R26, PPC::R27, + PPC::R28, PPC::R29, PPC::R30, PPC::R31 +}; + +static const unsigned GP0Regs[] = { + PPC::ZERO, PPC::R1, PPC::R2, PPC::R3, + PPC::R4, PPC::R5, PPC::R6, PPC::R7, + PPC::R8, PPC::R9, PPC::R10, PPC::R11, + PPC::R12, PPC::R13, PPC::R14, PPC::R15, + PPC::R16, PPC::R17, PPC::R18, PPC::R19, + PPC::R20, PPC::R21, PPC::R22, PPC::R23, + PPC::R24, PPC::R25, PPC::R26, PPC::R27, + PPC::R28, PPC::R29, PPC::R30, PPC::R31 +}; + +static const unsigned G8Regs[] = { + PPC::X0, PPC::X1, PPC::X2, PPC::X3, + PPC::X4, PPC::X5, PPC::X6, PPC::X7, + PPC::X8, PPC::X9, PPC::X10, PPC::X11, + PPC::X12, PPC::X13, PPC::X14, PPC::X15, + PPC::X16, PPC::X17, PPC::X18, PPC::X19, + PPC::X20, PPC::X21, PPC::X22, PPC::X23, + PPC::X24, PPC::X25, PPC::X26, PPC::X27, + PPC::X28, PPC::X29, PPC::X30, PPC::X31 +}; + +template +static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, + const unsigned (&Regs)[N]) { + assert(RegNo < N && "Invalid register number"); + Inst.addOperand(MCOperand::CreateReg(Regs[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, CRRegs); +} + +static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, CRBITRegs); +} + +static DecodeStatus DecodeF4RCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, FRegs); +} + +static DecodeStatus DecodeF8RCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, FRegs); +} + +static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, VRegs); +} + +static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, GPRegs); +} + +static DecodeStatus DecodeGPRC_NOR0RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, GP0Regs); +} + +static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, G8Regs); +} + +#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass +#define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass + +template +static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm, + int64_t Address, const void *Decoder) { + assert(isUInt(Imm) && "Invalid immediate"); + Inst.addOperand(MCOperand::CreateImm(Imm)); + return MCDisassembler::Success; +} + +template +static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm, + int64_t Address, const void *Decoder) { + assert(isUInt(Imm) && "Invalid immediate"); + Inst.addOperand(MCOperand::CreateImm(SignExtend64(Imm))); + return MCDisassembler::Success; +} + +static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm, + int64_t Address, const void *Decoder) { + // Decode the memri field (imm, reg), which has the low 16-bits as the + // displacement and the next 5 bits as the register #. + + uint64_t Base = Imm >> 16; + uint64_t Disp = Imm & 0xFFFF; + + assert(Base < 32 && "Invalid base register"); + + switch (Inst.getOpcode()) { + default: break; + case PPC::LBZU: + case PPC::LHAU: + case PPC::LHZU: + case PPC::LWZU: + case PPC::LFSU: + case PPC::LFDU: + // Add the tied output operand. + Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base])); + break; + case PPC::STBU: + case PPC::STHU: + case PPC::STWU: + case PPC::STFSU: + case PPC::STFDU: + Inst.insert(Inst.begin(), MCOperand::CreateReg(GP0Regs[Base])); + break; + } + + Inst.addOperand(MCOperand::CreateImm(SignExtend64<16>(Disp))); + Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base])); + return MCDisassembler::Success; +} + +static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm, + int64_t Address, const void *Decoder) { + // Decode the memrix field (imm, reg), which has the low 14-bits as the + // displacement and the next 5 bits as the register #. + + uint64_t Base = Imm >> 14; + uint64_t Disp = Imm & 0x3FFF; + + assert(Base < 32 && "Invalid base register"); + + if (Inst.getOpcode() == PPC::LDU) + // Add the tied output operand. + Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base])); + else if (Inst.getOpcode() == PPC::STDU) + Inst.insert(Inst.begin(), MCOperand::CreateReg(GP0Regs[Base])); + + Inst.addOperand(MCOperand::CreateImm(SignExtend64<16>(Disp << 2))); + Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base])); + return MCDisassembler::Success; +} + +static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm, + int64_t Address, const void *Decoder) { + // The cr bit encoding is 0x80 >> cr_reg_num. + + unsigned Zeros = countTrailingZeros(Imm); + assert(Zeros < 8 && "Invalid CR bit value"); + + Inst.addOperand(MCOperand::CreateReg(CRRegs[7 - Zeros])); + return MCDisassembler::Success; +} + +#include "PPCGenDisassemblerTables.inc" + +DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size, + const MemoryObject &Region, + uint64_t Address, + raw_ostream &os, + raw_ostream &cs) const { + // Get the four bytes of the instruction. + uint8_t Bytes[4]; + Size = 4; + if (Region.readBytes(Address, Size, Bytes) == -1) { + Size = 0; + return MCDisassembler::Fail; + } + + // The instruction is big-endian encoded. + uint32_t Inst = (Bytes[0] << 24) | + (Bytes[1] << 16) | + (Bytes[2] << 8) | + (Bytes[3] << 0); + + return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI); +} + diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt index a605cc4b5f27..ab30a110f40e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/InstPrinter/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMPowerPCAsmPrinter PPCInstPrinter.cpp ) - -add_dependencies(LLVMPowerPCAsmPrinter PowerPCCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/LLVMBuild.txt index 7b3e843507a8..9d173d64b944 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/LLVMBuild.txt @@ -16,18 +16,20 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo +subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo [component_0] type = TargetGroup name = PowerPC parent = Target +has_asmparser = 1 has_asmprinter = 1 +has_disassembler = 1 has_jit = 1 [component_1] type = Library name = PowerPCCodeGen parent = PowerPC -required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo SelectionDAG Support Target +required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo SelectionDAG Support Target TransformUtils add_to_library_groups = PowerPC diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt index 3efa5ecf9096..3cea65ee4de6 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt @@ -8,5 +8,3 @@ add_llvm_library(LLVMPowerPCDesc PPCMachObjectWriter.cpp PPCELFObjectWriter.cpp ) - -add_dependencies(LLVMPowerPCDesc PowerPCCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index f3dddce30120..fcd22fa5dafa 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -12,11 +12,13 @@ //===----------------------------------------------------------------------===// #include "PPCMCAsmInfo.h" +#include "llvm/ADT/Triple.h" + using namespace llvm; void PPCMCAsmInfoDarwin::anchor() { } -PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { +PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) { if (is64Bit) { PointerSize = CalleeSaveStackSlotSize = 8; } @@ -30,6 +32,12 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { AssemblerDialect = 1; // New-Style mnemonics. SupportsDebugInformation= true; // Debug information. + + // The installed assembler for OSX < 10.6 lacks some directives. + // FIXME: this should really be a check on the assembler characteristics + // rather than OS version + if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6)) + HasWeakDefCanBeHiddenDirective = false; } void PPCLinuxMCAsmInfo::anchor() { } @@ -44,8 +52,6 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { AlignmentIsInBytes = false; CommentString = "#"; - GlobalPrefix = ""; - PrivateGlobalPrefix = ".L"; // Uses '.section' before '.bss' directive UsesELFSectionDirectiveForBSS = true; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h index 1530e774cfc7..6e6152eab398 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h @@ -18,11 +18,12 @@ #include "llvm/MC/MCAsmInfoELF.h" namespace llvm { +class Triple; class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin { virtual void anchor(); public: - explicit PPCMCAsmInfoDarwin(bool is64Bit); + explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&); }; class PPCLinuxMCAsmInfo : public MCAsmInfoELF { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 346a9beada90..66ebfd23817a 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -217,7 +217,8 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo, // Return the thread-pointer register's encoding. Fixups.push_back(MCFixup::Create(0, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_nofixup)); - return CTX.getRegisterInfo()->getEncodingValue(PPC::X13); + bool isPPC64 = TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le; + return CTX.getRegisterInfo()->getEncodingValue(isPPC64 ? PPC::X13 : PPC::R2); } unsigned PPCMCCodeEmitter::getTLSCallEncoding(const MCInst &MI, unsigned OpNo, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index f18d095c6d02..6a5051840181 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -72,7 +72,7 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { MCAsmInfo *MAI; if (TheTriple.isOSDarwin()) - MAI = new PPCMCAsmInfoDarwin(isPPC64); + MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple); else MAI = new PPCLinuxMCAsmInfo(isPPC64); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/Makefile b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/Makefile index 21fdcd9350e1..c96674809b01 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/Makefile +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/Makefile @@ -16,8 +16,9 @@ BUILT_SOURCES = PPCGenRegisterInfo.inc PPCGenAsmMatcher.inc \ PPCGenAsmWriter.inc PPCGenCodeEmitter.inc \ PPCGenInstrInfo.inc PPCGenDAGISel.inc \ PPCGenSubtargetInfo.inc PPCGenCallingConv.inc \ - PPCGenMCCodeEmitter.inc PPCGenFastISel.inc + PPCGenMCCodeEmitter.inc PPCGenFastISel.inc \ + PPCGenDisassemblerTables.inc -DIRS = AsmParser InstPrinter TargetInfo MCTargetDesc +DIRS = AsmParser Disassembler InstPrinter TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPC.td b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPC.td index 54e3d400a9d9..661149837808 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPC.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPC.td @@ -153,12 +153,12 @@ include "PPCInstrInfo.td" // def : Processor<"generic", G3Itineraries, [Directive32]>; -def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL, - FeatureFRES, FeatureFRSQRTE, - FeatureBookE, DeprecatedMFTB]>; -def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL, - FeatureFRES, FeatureFRSQRTE, - FeatureBookE, DeprecatedMFTB]>; +def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL, + FeatureFRES, FeatureFRSQRTE, + FeatureBookE, DeprecatedMFTB]>; +def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL, + FeatureFRES, FeatureFRSQRTE, + FeatureBookE, DeprecatedMFTB]>; def : Processor<"601", G3Itineraries, [Directive601]>; def : Processor<"602", G3Itineraries, [Directive602]>; def : Processor<"603", G3Itineraries, [Directive603, @@ -254,7 +254,7 @@ def : ProcessorModel<"pwr6x", G5Model, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, Feature64Bit, DeprecatedMFTB, DeprecatedDST]>; -def : ProcessorModel<"pwr7", G5Model, +def : ProcessorModel<"pwr7", P7Model, [DirectivePwr7, FeatureAltivec, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, @@ -283,11 +283,9 @@ include "PPCCallingConv.td" def PPCInstrInfo : InstrInfo { let isLittleEndianEncoding = 1; -} -def PPCAsmWriter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - bit isMCAsmWriter = 1; + // FIXME: Unset this when no longer needed! + let decodePositionallyEncodedOperands = 1; } def PPCAsmParser : AsmParser { @@ -306,8 +304,7 @@ def PPCAsmParserVariant : AsmParserVariant { def PPC : Target { // Information about the instructions. let InstructionSet = PPCInstrInfo; - - let AssemblyWriters = [PPCAsmWriter]; + let AssemblyParsers = [PPCAsmParser]; let AssemblyParserVariants = [PPCAsmParserVariant]; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index ada34ed9e18a..72971684ac67 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -139,6 +139,7 @@ static const char *stripRegisterPrefix(const char *RegName) { void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + const DataLayout *DL = TM.getDataLayout(); const MachineOperand &MO = MI->getOperand(OpNo); switch (MO.getType()) { @@ -157,37 +158,13 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, case MachineOperand::MO_MachineBasicBlock: O << *MO.getMBB()->getSymbol(); return; - case MachineOperand::MO_JumpTableIndex: - O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << MO.getIndex(); - // FIXME: PIC relocation model - return; case MachineOperand::MO_ConstantPoolIndex: - O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' << MO.getIndex(); return; case MachineOperand::MO_BlockAddress: O << *GetBlockAddressSymbol(MO.getBlockAddress()); return; - case MachineOperand::MO_ExternalSymbol: { - // Computing the address of an external symbol, not calling it. - if (TM.getRelocationModel() == Reloc::Static) { - O << *GetExternalSymbolSymbol(MO.getSymbolName()); - return; - } - - MCSymbol *NLPSym = - OutContext.GetOrCreateSymbol(StringRef(MAI->getGlobalPrefix())+ - MO.getSymbolName()+"$non_lazy_ptr"); - MachineModuleInfoImpl::StubValueTy &StubSym = - MMI->getObjFileInfo().getGVStubEntry(NLPSym); - if (StubSym.getPointer() == 0) - StubSym = MachineModuleInfoImpl:: - StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true); - - O << *NLPSym; - return; - } case MachineOperand::MO_GlobalAddress: { // Computing the address of a global symbol, not calling it. const GlobalValue *GV = MO.getGlobal(); @@ -197,7 +174,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, if (TM.getRelocationModel() != Reloc::Static && (GV->isDeclaration() || GV->isWeakForLinker())) { if (!GV->hasHiddenVisibility()) { - SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); MachineModuleInfoImpl::StubValueTy &StubSym = MMI->getObjFileInfo() .getGVStubEntry(SymToPrint); @@ -206,7 +183,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); } else if (GV->isDeclaration() || GV->hasCommonLinkage() || GV->hasAvailableExternallyLinkage()) { - SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); MachineModuleInfoImpl::StubValueTy &StubSym = MMI->getObjFileInfo(). @@ -305,12 +282,12 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, /// exists for it. If not, create one. Then return a symbol that references /// the TOC entry. MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { - + const DataLayout *DL = TM.getDataLayout(); MCSymbol *&TOCEntry = TOC[Sym]; // To avoid name clash check if the name already exists. while (TOCEntry == 0) { - if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) + + if (OutContext.LookupSymbol(Twine(DL->getPrivateGlobalPrefix()) + "C" + Twine(TOCLabelID++)) == 0) { TOCEntry = GetTempSymbol("C", TOCLabelID); } @@ -325,6 +302,7 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { /// void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; + bool isPPC64 = Subtarget.isPPC64(); // Lower multi-instruction pseudo operations. switch (MI->getOpcode()) { @@ -518,12 +496,13 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addExpr(SymGotTprel)); return; } - case PPC::LDgotTprelL: { + case PPC::LDgotTprelL: + case PPC::LDgotTprelL32: { // Transform %Xd = LDgotTprelL , %Xs LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); // Change the opcode to LD. - TmpInst.setOpcode(PPC::LD); + TmpInst.setOpcode(isPPC64 ? PPC::LD : PPC::LWZ); const MachineOperand &MO = MI->getOperand(1); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -534,6 +513,24 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitInstruction(TmpInst); return; } + + case PPC::PPC32GOT: { + MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_")); + const MCExpr *SymGotTlsL = + MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LO, + OutContext); + const MCExpr *SymGotTlsHA = + MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_HA, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::LI) + .addReg(MI->getOperand(0).getReg()) + .addExpr(SymGotTlsL)); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(0).getReg()) + .addExpr(SymGotTlsHA)); + return; + } case PPC::ADDIStlsgdHA: { // Transform: %Xd = ADDIStlsgdHA %X2, // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha @@ -1058,7 +1055,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { for (std::vector::const_iterator I = Personalities.begin(), E = Personalities.end(); I != E; ++I) { if (*I) { - MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr"); + MCSymbol *NLPSym = getSymbolWithGlobalValueBase(*I, "$non_lazy_ptr"); MachineModuleInfoImpl::StubValueTy &StubSym = MMIMacho.getGVStubEntry(NLPSym); StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(*I), true); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 0df50e17dd9d..37c85b37351d 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -15,34 +15,220 @@ #include "PPCHazardRecognizers.h" #include "PPC.h" #include "PPCInstrInfo.h" +#include "PPCTargetMachine.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -//===----------------------------------------------------------------------===// -// PowerPC Scoreboard Hazard Recognizer -void PPCScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { +bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) { + // FIXME: Move this. + if (isBCTRAfterSet(SU)) + return true; + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); if (!MCID) - // This is a PPC pseudo-instruction. - return; + return false; - ScoreboardHazardRecognizer::EmitInstruction(SU); + if (!MCID->mayLoad()) + return false; + + // SU is a load; for any predecessors in this dispatch group, that are stores, + // and with which we have an ordering dependency, return true. + for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { + const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); + if (!PredMCID || !PredMCID->mayStore()) + continue; + + if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier()) + continue; + + for (unsigned j = 0, je = CurGroup.size(); j != je; ++j) + if (SU->Preds[i].getSUnit() == CurGroup[j]) + return true; + } + + return false; +} + +bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) { + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + if (!MCID) + return false; + + if (!MCID->isBranch()) + return false; + + // SU is a branch; for any predecessors in this dispatch group, with which we + // have a data dependence and set the counter register, return true. + for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { + const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); + if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR) + continue; + + if (SU->Preds[i].isCtrl()) + continue; + + for (unsigned j = 0, je = CurGroup.size(); j != je; ++j) + if (SU->Preds[i].getSUnit() == CurGroup[j]) + return true; + } + + return false; +} + +// FIXME: Remove this when we don't need this: +namespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } } + +// FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific. + +bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID, + unsigned &NSlots) { + // FIXME: Indirectly, this information is contained in the itinerary, and + // we should derive it from there instead of separately specifying it + // here. + unsigned IIC = MCID->getSchedClass(); + switch (IIC) { + default: + NSlots = 1; + break; + case PPC::Sched::IIC_IntDivW: + case PPC::Sched::IIC_IntDivD: + case PPC::Sched::IIC_LdStLoadUpd: + case PPC::Sched::IIC_LdStLDU: + case PPC::Sched::IIC_LdStLFDU: + case PPC::Sched::IIC_LdStLFDUX: + case PPC::Sched::IIC_LdStLHA: + case PPC::Sched::IIC_LdStLHAU: + case PPC::Sched::IIC_LdStLWA: + case PPC::Sched::IIC_LdStSTDU: + case PPC::Sched::IIC_LdStSTFDU: + NSlots = 2; + break; + case PPC::Sched::IIC_LdStLoadUpdX: + case PPC::Sched::IIC_LdStLDUX: + case PPC::Sched::IIC_LdStLHAUX: + case PPC::Sched::IIC_LdStLWARX: + case PPC::Sched::IIC_LdStLDARX: + case PPC::Sched::IIC_LdStSTDUX: + case PPC::Sched::IIC_LdStSTDCX: + case PPC::Sched::IIC_LdStSTWCX: + case PPC::Sched::IIC_BrMCRX: // mtcr + // FIXME: Add sync/isync (here and in the itinerary). + NSlots = 4; + break; + } + + // FIXME: record-form instructions need a different itinerary class. + if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1) + NSlots = 2; + + switch (IIC) { + default: + // All multi-slot instructions must come first. + return NSlots > 1; + case PPC::Sched::IIC_BrCR: // cr logicals + case PPC::Sched::IIC_SprMFCR: + case PPC::Sched::IIC_SprMFCRF: + case PPC::Sched::IIC_SprMTSPR: + return true; + } } ScheduleHazardRecognizer::HazardType -PPCScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { +PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + if (Stalls == 0 && isLoadAfterStore(SU)) + return NoopHazard; + return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); } -void PPCScoreboardHazardRecognizer::AdvanceCycle() { - ScoreboardHazardRecognizer::AdvanceCycle(); +bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) { + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + unsigned NSlots; + if (MCID && mustComeFirst(MCID, NSlots) && CurSlots) + return true; + + return ScoreboardHazardRecognizer::ShouldPreferAnother(SU); } -void PPCScoreboardHazardRecognizer::Reset() { - ScoreboardHazardRecognizer::Reset(); +unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) { + // We only need to fill out a maximum of 5 slots here: The 6th slot could + // only be a second branch, and otherwise the next instruction will start a + // new group. + if (isLoadAfterStore(SU) && CurSlots < 6) { + unsigned Directive = + DAG->TM.getSubtarget().getDarwinDirective(); + // If we're using a special group-terminating nop, then we need only one. + if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7) + return 1; + + return 5 - CurSlots; + } + + return ScoreboardHazardRecognizer::PreEmitNoops(SU); +} + +void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) { + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + if (MCID) { + if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) { + CurGroup.clear(); + CurSlots = CurBranches = 0; + } else { + DEBUG(dbgs() << "**** Adding to dispatch group: SU(" << + SU->NodeNum << "): "); + DEBUG(DAG->dumpNode(SU)); + + unsigned NSlots; + bool MustBeFirst = mustComeFirst(MCID, NSlots); + + // If this instruction must come first, but does not, then it starts a + // new group. + if (MustBeFirst && CurSlots) { + CurSlots = CurBranches = 0; + CurGroup.clear(); + } + + CurSlots += NSlots; + CurGroup.push_back(SU); + + if (MCID->isBranch()) + ++CurBranches; + } + } + + return ScoreboardHazardRecognizer::EmitInstruction(SU); +} + +void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() { + return ScoreboardHazardRecognizer::AdvanceCycle(); +} + +void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() { + llvm_unreachable("Bottom-up scheduling not supported"); +} + +void PPCDispatchGroupSBHazardRecognizer::Reset() { + CurGroup.clear(); + CurSlots = CurBranches = 0; + return ScoreboardHazardRecognizer::Reset(); +} + +void PPCDispatchGroupSBHazardRecognizer::EmitNoop() { + unsigned Directive = + DAG->TM.getSubtarget().getDarwinDirective(); + // If the group has now filled all of its slots, or if we're using a special + // group-terminating nop, the group is complete. + if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || + CurSlots == 6) { + CurGroup.clear(); + CurSlots = CurBranches = 0; + } else { + CurGroup.push_back(0); + ++CurSlots; + } } //===----------------------------------------------------------------------===// diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h index 84b8e6de4579..6b7fe41e5749 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -21,19 +21,30 @@ namespace llvm { -/// PPCScoreboardHazardRecognizer - This class implements a scoreboard-based -/// hazard recognizer for generic PPC processors. -class PPCScoreboardHazardRecognizer : public ScoreboardHazardRecognizer { +/// PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based +/// hazard recognizer for PPC ooo processors with dispatch-group hazards. +class PPCDispatchGroupSBHazardRecognizer : public ScoreboardHazardRecognizer { const ScheduleDAG *DAG; + SmallVector CurGroup; + unsigned CurSlots, CurBranches; + + bool isLoadAfterStore(SUnit *SU); + bool isBCTRAfterSet(SUnit *SU); + bool mustComeFirst(const MCInstrDesc *MCID, unsigned &NSlots); public: - PPCScoreboardHazardRecognizer(const InstrItineraryData *ItinData, + PPCDispatchGroupSBHazardRecognizer(const InstrItineraryData *ItinData, const ScheduleDAG *DAG_) : - ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) {} + ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_), + CurSlots(0), CurBranches(0) {} virtual HazardType getHazardType(SUnit *SU, int Stalls); + virtual bool ShouldPreferAnother(SUnit* SU); + virtual unsigned PreEmitNoops(SUnit *SU); virtual void EmitInstruction(SUnit *SU); virtual void AdvanceCycle(); + virtual void RecedeCycle(); virtual void Reset(); + virtual void EmitNoop(); }; /// PPCHazardRecognizer970 - This class defines a finite state automata that diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 6ba6af6446e5..e714281bdfa9 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1122,7 +1122,21 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { isMask_64(Imm64)) { SDValue Val = N->getOperand(0); MB = 64 - CountTrailingOnes_64(Imm64); - SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB) }; + SH = 0; + + // If the operand is a logical right shift, we can fold it into this + // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) + // for n <= mb. The right shift is really a left rotate followed by a + // mask, and this mask is a more-restrictive sub-mask of the mask implied + // by the shift. + if (Val.getOpcode() == ISD::SRL && + isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) { + assert(Imm < 64 && "Illegal shift amount"); + Val = Val.getOperand(0); + SH = 64 - Imm; + } + + SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB) }; return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops, 3); } // AND X, 0 -> 0, not "rlwinm 32". @@ -1382,7 +1396,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return SelectCode(N); } -/// PostProcessISelDAG - Perform some late peephole optimizations +/// PostprocessISelDAG - Perform some late peephole optimizations /// on the DAG representation. void PPCDAGToDAGISel::PostprocessISelDAG() { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 8da5f0563c6a..a00e37ab988c 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -670,6 +670,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA"; case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L"; case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L"; + case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; @@ -1431,18 +1432,19 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); } - if (!is64bit) - llvm_unreachable("only local-exec is currently supported for ppc32"); - if (Model == TLSModel::InitialExec) { SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLS); - SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); - SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, - PtrVT, GOTReg, TGA); + SDValue GOTPtr; + if (is64bit) { + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, + PtrVT, GOTReg, TGA); + } else + GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, - PtrVT, TGA, TPOffsetHi); + PtrVT, TGA, GOTPtr); return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); } @@ -2968,7 +2970,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (Flags.isByVal()) return false; } - // Non PIC/GOT tail calls are supported. + // Non-PIC/GOT tail calls are supported. if (getTargetMachine().getRelocationModel() != Reloc::PIC_) return true; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCISelLowering.h b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCISelLowering.h index df3af35761ee..7ae43fa789a6 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -177,6 +177,10 @@ namespace llvm { CR6SET, CR6UNSET, + /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS + /// on PPC32. + PPC32_GOT, + /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec /// TLS model, produces an ADDIS8 instruction that adds the GOT /// base to sym\@got\@tprel\@ha. diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 46db4fe91308..c5919fdaa3e6 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -19,11 +19,13 @@ def s16imm64 : Operand { let PrintMethod = "printS16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCS16ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; } def u16imm64 : Operand { let PrintMethod = "printU16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCU16ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<16>"; } def s17imm64 : Operand { // This operand type is used for addis/lis to allow the assembler parser @@ -32,14 +34,11 @@ def s17imm64 : Operand { let PrintMethod = "printS16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCS17ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; } def tocentry : Operand { let MIOperandInfo = (ops i64imm:$imm); } -def PPCTLSRegOperand : AsmOperandClass { - let Name = "TLSReg"; let PredicateMethod = "isTLSReg"; - let RenderMethod = "addTLSRegOperands"; -} def tlsreg : Operand { let EncoderMethod = "getTLSRegEncoding"; let ParserMatchClass = PPCTLSRegOperand; @@ -80,15 +79,15 @@ def HI48_64 : SDNodeXForm, + def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, + []>, Requires<[In64BitMode]>; - - let isCodeGenOnly = 1 in def BCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), - "b${cond:cc}ctr${cond:pm} ${cond:reg}", BrB, []>, + "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB, + []>, Requires<[In64BitMode]>; } } @@ -107,9 +106,9 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { let isReturn = 1, Defs = [CTR8], Uses = [CTR8, LR8, RM] in { def BDZLR8 : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins), - "bdzlr", BrB, []>; + "bdzlr", IIC_BrB, []>; def BDNZLR8 : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins), - "bdnzlr", BrB, []>; + "bdnzlr", IIC_BrB, []>; } } @@ -119,41 +118,50 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func), - "bl $func", BrB, []>; // See Pat patterns below. + "bl $func", IIC_BrB, []>; // See Pat patterns below. def BL8_TLS : IForm<18, 0, 1, (outs), (ins tlscall:$func), - "bl $func", BrB, []>; + "bl $func", IIC_BrB, []>; def BLA8 : IForm<18, 1, 1, (outs), (ins abscalltarget:$func), - "bla $func", BrB, [(PPCcall (i64 imm:$func))]>; + "bla $func", IIC_BrB, [(PPCcall (i64 imm:$func))]>; } let Uses = [RM], isCodeGenOnly = 1 in { def BL8_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24, (outs), (ins calltarget:$func), - "bl $func\n\tnop", BrB, []>; + "bl $func\n\tnop", IIC_BrB, []>; def BL8_NOP_TLS : IForm_and_DForm_4_zero<18, 0, 1, 24, (outs), (ins tlscall:$func), - "bl $func\n\tnop", BrB, []>; + "bl $func\n\tnop", IIC_BrB, []>; def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24, (outs), (ins abscalltarget:$func), - "bla $func\n\tnop", BrB, + "bla $func\n\tnop", IIC_BrB, [(PPCcall_nop (i64 imm:$func))]>; } let Uses = [CTR8, RM] in { def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins), - "bctrl", BrB, [(PPCbctrl)]>, + "bctrl", IIC_BrB, [(PPCbctrl)]>, Requires<[In64BitMode]>; let isCodeGenOnly = 1 in def BCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), - "b${cond:cc}ctrl${cond:pm} ${cond:reg}", BrB, []>, + "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB, + []>, Requires<[In64BitMode]>; } } } // Interpretation64Bit +// FIXME: Duplicating this for the asm parser should be unnecessary, but the +// previous definition must be marked as CodeGen only to prevent decoding +// conflicts. +let Interpretation64Bit = 1, isAsmParserOnly = 1 in +let isCall = 1, PPC970_Unit = 7, Defs = [LR8], Uses = [RM] in +def BL8_TLS_ : IForm<18, 0, 1, (outs), (ins tlscall:$func), + "bl $func", IIC_BrB, []>; + // Calls def : Pat<(PPCcall (i64 tglobaladdr:$dst)), (BL8 tglobaladdr:$dst)>; @@ -199,16 +207,16 @@ let usesCustomInserter = 1 in { // Instructions to support atomic operations def LDARX : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr), - "ldarx $rD, $ptr", LdStLDARX, + "ldarx $rD, $ptr", IIC_LdStLDARX, [(set i64:$rD, (PPClarx xoaddr:$ptr))]>; let Defs = [CR0] in def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst), - "stdcx. $rS, $dst", LdStSTDCX, + "stdcx. $rS, $dst", IIC_LdStSTDCX, [(PPCstcx i64:$rS, xoaddr:$dst)]>, isDOT; -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in def TCRETURNdi8 :Pseudo< (outs), (ins calltarget:$dst, i32imm:$offset), @@ -225,28 +233,23 @@ def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset), "#TC_RETURNr8 $dst $offset", []>; -let isCodeGenOnly = 1 in { - let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR8, RM] in -def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, +def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, + []>, Requires<[In64BitMode]>; - let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$dst), - "b $dst", BrB, + "b $dst", IIC_BrB, []>; - let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in def TAILBA8 : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst), - "ba $dst", BrB, + "ba $dst", IIC_BrB, []>; - -} } // Interpretation64Bit def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm), @@ -260,23 +263,23 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), // 64-bit CR instructions -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { let neverHasSideEffects = 1 in { def MTOCRF8: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins g8rc:$ST), - "mtocrf $FXM, $ST", BrMCRX>, + "mtocrf $FXM, $ST", IIC_BrMCRX>, PPC970_DGroup_First, PPC970_Unit_CRU; def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$rS), - "mtcrf $FXM, $rS", BrMCRX>, + "mtcrf $FXM, $rS", IIC_BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking. def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM), - "mfocrf $rT, $FXM", SprMFCR>, + "mfocrf $rT, $FXM", IIC_SprMFCRF>, PPC970_DGroup_First, PPC970_Unit_CRU; def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins), - "mfcr $rT", SprMFCR>, + "mfcr $rT", IIC_SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; } // neverHasSideEffects = 1 @@ -298,24 +301,24 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { let Uses = [CTR8] in { def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs g8rc:$rT), (ins), - "mfctr $rT", SprMFSPR>, + "mfctr $rT", IIC_SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in { def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS), - "mtctr $rS", SprMTSPR>, + "mtctr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } -let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR8] in { +let hasSideEffects = 1, Defs = [CTR8] in { let Pattern = [(int_ppc_mtctr i64:$rS)] in def MTCTR8loop : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS), - "mtctr $rS", SprMTSPR>, + "mtctr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } -let isCodeGenOnly = 1, Pattern = [(set i64:$rT, readcyclecounter)] in +let Pattern = [(set i64:$rT, readcyclecounter)] in def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins), - "mfspr $rT, 268", SprMFTB>, + "mfspr $rT, 268", IIC_SprMFTB>, PPC970_DGroup_First, PPC970_Unit_FXU; // Note that encoding mftb using mfspr is now the preferred form, // and has been since at least ISA v2.03. The mftb instruction has @@ -329,12 +332,12 @@ def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#D let Defs = [LR8] in { def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins g8rc:$rS), - "mtlr $rS", SprMTSPR>, + "mtlr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Uses = [LR8] in { def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins), - "mflr $rT", SprMFSPR>, + "mflr $rT", IIC_SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } } // Interpretation64Bit @@ -346,213 +349,223 @@ def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins), let PPC970_Unit = 1 in { // FXU Operations. let Interpretation64Bit = 1 in { let neverHasSideEffects = 1 in { +let isCodeGenOnly = 1 in { let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI8 : DForm_2_r0<14, (outs g8rc:$rD), (ins s16imm64:$imm), - "li $rD, $imm", IntSimple, + "li $rD, $imm", IIC_IntSimple, [(set i64:$rD, imm64SExt16:$imm)]>; def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins s17imm64:$imm), - "lis $rD, $imm", IntSimple, + "lis $rD, $imm", IIC_IntSimple, [(set i64:$rD, imm16ShiftedSExt:$imm)]>; } // Logical ops. defm NAND8: XForm_6r<31, 476, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "nand", "$rA, $rS, $rB", IntSimple, + "nand", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>; defm AND8 : XForm_6r<31, 28, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "and", "$rA, $rS, $rB", IntSimple, + "and", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (and i64:$rS, i64:$rB))]>; defm ANDC8: XForm_6r<31, 60, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "andc", "$rA, $rS, $rB", IntSimple, + "andc", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>; defm OR8 : XForm_6r<31, 444, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "or", "$rA, $rS, $rB", IntSimple, + "or", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (or i64:$rS, i64:$rB))]>; defm NOR8 : XForm_6r<31, 124, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "nor", "$rA, $rS, $rB", IntSimple, + "nor", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>; defm ORC8 : XForm_6r<31, 412, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "orc", "$rA, $rS, $rB", IntSimple, + "orc", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>; defm EQV8 : XForm_6r<31, 284, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "eqv", "$rA, $rS, $rB", IntSimple, + "eqv", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>; defm XOR8 : XForm_6r<31, 316, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB), - "xor", "$rA, $rS, $rB", IntSimple, + "xor", "$rA, $rS, $rB", IIC_IntSimple, [(set i64:$rA, (xor i64:$rS, i64:$rB))]>; // Logical ops with immediate. let Defs = [CR0] in { -def ANDIo8 : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), - "andi. $dst, $src1, $src2", IntGeneral, +def ANDIo8 : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "andi. $dst, $src1, $src2", IIC_IntGeneral, [(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>, isDOT; -def ANDISo8 : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), - "andis. $dst, $src1, $src2", IntGeneral, +def ANDISo8 : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "andis. $dst, $src1, $src2", IIC_IntGeneral, [(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>, isDOT; } -def ORI8 : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), - "ori $dst, $src1, $src2", IntSimple, +def ORI8 : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "ori $dst, $src1, $src2", IIC_IntSimple, [(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>; -def ORIS8 : DForm_4<25, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), - "oris $dst, $src1, $src2", IntSimple, +def ORIS8 : DForm_4<25, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "oris $dst, $src1, $src2", IIC_IntSimple, [(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>; -def XORI8 : DForm_4<26, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), - "xori $dst, $src1, $src2", IntSimple, +def XORI8 : DForm_4<26, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "xori $dst, $src1, $src2", IIC_IntSimple, [(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>; -def XORIS8 : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2), - "xoris $dst, $src1, $src2", IntSimple, +def XORIS8 : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "xoris $dst, $src1, $src2", IIC_IntSimple, [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>; defm ADD8 : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "add", "$rT, $rA, $rB", IntSimple, + "add", "$rT, $rA, $rB", IIC_IntSimple, [(set i64:$rT, (add i64:$rA, i64:$rB))]>; // ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the // initial-exec thread-local storage model. def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB), - "add $rT, $rA, $rB", IntSimple, + "add $rT, $rA, $rB", IIC_IntSimple, [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>; defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "addc", "$rT, $rA, $rB", IntGeneral, + "addc", "$rT, $rA, $rB", IIC_IntGeneral, [(set i64:$rT, (addc i64:$rA, i64:$rB))]>, PPC970_DGroup_Cracked; let Defs = [CARRY] in def ADDIC8 : DForm_2<12, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), - "addic $rD, $rA, $imm", IntGeneral, + "addic $rD, $rA, $imm", IIC_IntGeneral, [(set i64:$rD, (addc i64:$rA, imm64SExt16:$imm))]>; def ADDI8 : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s16imm64:$imm), - "addi $rD, $rA, $imm", IntSimple, + "addi $rD, $rA, $imm", IIC_IntSimple, [(set i64:$rD, (add i64:$rA, imm64SExt16:$imm))]>; def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s17imm64:$imm), - "addis $rD, $rA, $imm", IntSimple, + "addis $rD, $rA, $imm", IIC_IntSimple, [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>; let Defs = [CARRY] in { def SUBFIC8: DForm_2< 8, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), - "subfic $rD, $rA, $imm", IntGeneral, + "subfic $rD, $rA, $imm", IIC_IntGeneral, [(set i64:$rD, (subc imm64SExt16:$imm, i64:$rA))]>; defm SUBFC8 : XOForm_1r<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "subfc", "$rT, $rA, $rB", IntGeneral, + "subfc", "$rT, $rA, $rB", IIC_IntGeneral, [(set i64:$rT, (subc i64:$rB, i64:$rA))]>, PPC970_DGroup_Cracked; } defm SUBF8 : XOForm_1r<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "subf", "$rT, $rA, $rB", IntGeneral, + "subf", "$rT, $rA, $rB", IIC_IntGeneral, [(set i64:$rT, (sub i64:$rB, i64:$rA))]>; defm NEG8 : XOForm_3r<31, 104, 0, (outs g8rc:$rT), (ins g8rc:$rA), - "neg", "$rT, $rA", IntSimple, + "neg", "$rT, $rA", IIC_IntSimple, [(set i64:$rT, (ineg i64:$rA))]>; let Uses = [CARRY] in { defm ADDE8 : XOForm_1rc<31, 138, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "adde", "$rT, $rA, $rB", IntGeneral, + "adde", "$rT, $rA, $rB", IIC_IntGeneral, [(set i64:$rT, (adde i64:$rA, i64:$rB))]>; defm ADDME8 : XOForm_3rc<31, 234, 0, (outs g8rc:$rT), (ins g8rc:$rA), - "addme", "$rT, $rA", IntGeneral, + "addme", "$rT, $rA", IIC_IntGeneral, [(set i64:$rT, (adde i64:$rA, -1))]>; defm ADDZE8 : XOForm_3rc<31, 202, 0, (outs g8rc:$rT), (ins g8rc:$rA), - "addze", "$rT, $rA", IntGeneral, + "addze", "$rT, $rA", IIC_IntGeneral, [(set i64:$rT, (adde i64:$rA, 0))]>; defm SUBFE8 : XOForm_1rc<31, 136, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "subfe", "$rT, $rA, $rB", IntGeneral, + "subfe", "$rT, $rA, $rB", IIC_IntGeneral, [(set i64:$rT, (sube i64:$rB, i64:$rA))]>; defm SUBFME8 : XOForm_3rc<31, 232, 0, (outs g8rc:$rT), (ins g8rc:$rA), - "subfme", "$rT, $rA", IntGeneral, + "subfme", "$rT, $rA", IIC_IntGeneral, [(set i64:$rT, (sube -1, i64:$rA))]>; defm SUBFZE8 : XOForm_3rc<31, 200, 0, (outs g8rc:$rT), (ins g8rc:$rA), - "subfze", "$rT, $rA", IntGeneral, + "subfze", "$rT, $rA", IIC_IntGeneral, [(set i64:$rT, (sube 0, i64:$rA))]>; } +} // isCodeGenOnly +// FIXME: Duplicating this for the asm parser should be unnecessary, but the +// previous definition must be marked as CodeGen only to prevent decoding +// conflicts. +let isAsmParserOnly = 1 in +def ADD8TLS_ : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB), + "add $rT, $rA, $rB", IIC_IntSimple, []>; defm MULHD : XOForm_1r<31, 73, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "mulhd", "$rT, $rA, $rB", IntMulHW, + "mulhd", "$rT, $rA, $rB", IIC_IntMulHW, [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>; defm MULHDU : XOForm_1r<31, 9, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "mulhdu", "$rT, $rA, $rB", IntMulHWU, + "mulhdu", "$rT, $rA, $rB", IIC_IntMulHWU, [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>; } } // Interpretation64Bit let isCompare = 1, neverHasSideEffects = 1 in { def CMPD : XForm_16_ext<31, 0, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB), - "cmpd $crD, $rA, $rB", IntCompare>, isPPC64; + "cmpd $crD, $rA, $rB", IIC_IntCompare>, isPPC64; def CMPLD : XForm_16_ext<31, 32, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB), - "cmpld $crD, $rA, $rB", IntCompare>, isPPC64; - def CMPDI : DForm_5_ext<11, (outs crrc:$crD), (ins g8rc:$rA, s16imm:$imm), - "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64; - def CMPLDI : DForm_6_ext<10, (outs crrc:$dst), (ins g8rc:$src1, u16imm:$src2), - "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64; + "cmpld $crD, $rA, $rB", IIC_IntCompare>, isPPC64; + def CMPDI : DForm_5_ext<11, (outs crrc:$crD), (ins g8rc:$rA, s16imm64:$imm), + "cmpdi $crD, $rA, $imm", IIC_IntCompare>, isPPC64; + def CMPLDI : DForm_6_ext<10, (outs crrc:$dst), (ins g8rc:$src1, u16imm64:$src2), + "cmpldi $dst, $src1, $src2", + IIC_IntCompare>, isPPC64; } let neverHasSideEffects = 1 in { defm SLD : XForm_6r<31, 27, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB), - "sld", "$rA, $rS, $rB", IntRotateD, + "sld", "$rA, $rS, $rB", IIC_IntRotateD, [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64; defm SRD : XForm_6r<31, 539, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB), - "srd", "$rA, $rS, $rB", IntRotateD, + "srd", "$rA, $rS, $rB", IIC_IntRotateD, [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64; defm SRAD : XForm_6rc<31, 794, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB), - "srad", "$rA, $rS, $rB", IntRotateD, + "srad", "$rA, $rS, $rB", IIC_IntRotateD, [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64; -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { defm EXTSB8 : XForm_11r<31, 954, (outs g8rc:$rA), (ins g8rc:$rS), - "extsb", "$rA, $rS", IntSimple, + "extsb", "$rA, $rS", IIC_IntSimple, [(set i64:$rA, (sext_inreg i64:$rS, i8))]>; defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS), - "extsh", "$rA, $rS", IntSimple, + "extsh", "$rA, $rS", IIC_IntSimple, [(set i64:$rA, (sext_inreg i64:$rS, i16))]>; } // Interpretation64Bit // For fast-isel: let isCodeGenOnly = 1 in { def EXTSB8_32_64 : XForm_11<31, 954, (outs g8rc:$rA), (ins gprc:$rS), - "extsb $rA, $rS", IntSimple, []>, isPPC64; + "extsb $rA, $rS", IIC_IntSimple, []>, isPPC64; def EXTSH8_32_64 : XForm_11<31, 922, (outs g8rc:$rA), (ins gprc:$rS), - "extsh $rA, $rS", IntSimple, []>, isPPC64; + "extsh $rA, $rS", IIC_IntSimple, []>, isPPC64; } // isCodeGenOnly for fast-isel defm EXTSW : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS), - "extsw", "$rA, $rS", IntSimple, + "extsw", "$rA, $rS", IIC_IntSimple, [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64; -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm EXTSW_32_64 : XForm_11r<31, 986, (outs g8rc:$rA), (ins gprc:$rS), - "extsw", "$rA, $rS", IntSimple, + "extsw", "$rA, $rS", IIC_IntSimple, [(set i64:$rA, (sext i32:$rS))]>, isPPC64; defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH), - "sradi", "$rA, $rS, $SH", IntRotateDI, + "sradi", "$rA, $rS, $SH", IIC_IntRotateDI, [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64; defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS), - "cntlzd", "$rA, $rS", IntGeneral, + "cntlzd", "$rA, $rS", IIC_IntGeneral, [(set i64:$rA, (ctlz i64:$rS))]>; def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS), - "popcntd $rA, $rS", IntGeneral, + "popcntd $rA, $rS", IIC_IntGeneral, [(set i64:$rA, (ctpop i64:$rS))]>; // popcntw also does a population count on the high 32 bits (storing the // results in the high 32-bits of the output). We'll ignore that here (which is // safe because we never separately use the high part of the 64-bit registers). def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS), - "popcntw $rA, $rS", IntGeneral, + "popcntw $rA, $rS", IIC_IntGeneral, [(set i32:$rA, (ctpop i32:$rS))]>; defm DIVD : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "divd", "$rT, $rA, $rB", IntDivD, + "divd", "$rT, $rA, $rB", IIC_IntDivD, [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64, PPC970_DGroup_First, PPC970_DGroup_Cracked; defm DIVDU : XOForm_1r<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "divdu", "$rT, $rA, $rB", IntDivD, + "divdu", "$rT, $rA, $rB", IIC_IntDivD, [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64, PPC970_DGroup_First, PPC970_DGroup_Cracked; defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), - "mulld", "$rT, $rA, $rB", IntMulHD, + "mulld", "$rT, $rA, $rB", IIC_IntMulHD, [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64; +let Interpretation64Bit = 1, isCodeGenOnly = 1 in def MULLI8 : DForm_2<7, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm), - "mulli $rD, $rA, $imm", IntMulLI, + "mulli $rD, $rA, $imm", IIC_IntMulLI, [(set i64:$rD, (mul i64:$rA, imm64SExt16:$imm))]>; } @@ -560,7 +573,7 @@ let neverHasSideEffects = 1 in { let isCommutable = 1 in { defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA), (ins g8rc:$rSi, g8rc:$rS, u6imm:$SH, u6imm:$MBE), - "rldimi", "$rA, $rS, $SH, $MBE", IntRotateDI, + "rldimi", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, []>, isPPC64, RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">; } @@ -568,43 +581,43 @@ defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA), // Rotate instructions. defm RLDCL : MDSForm_1r<30, 8, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE), - "rldcl", "$rA, $rS, $rB, $MBE", IntRotateD, + "rldcl", "$rA, $rS, $rB, $MBE", IIC_IntRotateD, []>, isPPC64; defm RLDCR : MDSForm_1r<30, 9, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE), - "rldcr", "$rA, $rS, $rB, $MBE", IntRotateD, + "rldcr", "$rA, $rS, $rB, $MBE", IIC_IntRotateD, []>, isPPC64; defm RLDICL : MDForm_1r<30, 0, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), - "rldicl", "$rA, $rS, $SH, $MBE", IntRotateDI, + "rldicl", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, []>, isPPC64; // For fast-isel: let isCodeGenOnly = 1 in def RLDICL_32_64 : MDForm_1<30, 0, (outs g8rc:$rA), (ins gprc:$rS, u6imm:$SH, u6imm:$MBE), - "rldicl $rA, $rS, $SH, $MBE", IntRotateDI, + "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI, []>, isPPC64; // End fast-isel. defm RLDICR : MDForm_1r<30, 1, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), - "rldicr", "$rA, $rS, $SH, $MBE", IntRotateDI, + "rldicr", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, []>, isPPC64; defm RLDIC : MDForm_1r<30, 2, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), - "rldic", "$rA, $rS, $SH, $MBE", IntRotateDI, + "rldic", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, []>, isPPC64; -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { defm RLWINM8 : MForm_2r<21, (outs g8rc:$rA), (ins g8rc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), - "rlwinm", "$rA, $rS, $SH, $MB, $ME", IntGeneral, + "rlwinm", "$rA, $rS, $SH, $MB, $ME", IIC_IntGeneral, []>; let isSelect = 1 in def ISEL8 : AForm_4<31, 15, (outs g8rc:$rT), (ins g8rc_nox0:$rA, g8rc:$rB, crbitrc:$cond), - "isel $rT, $rA, $rB, $cond", IntGeneral, + "isel $rT, $rA, $rB, $cond", IIC_IntGeneral, []>; } // Interpretation64Bit } // neverHasSideEffects = 1 @@ -618,111 +631,111 @@ def ISEL8 : AForm_4<31, 15, // Sign extending loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src), - "lha $rD, $src", LdStLHA, + "lha $rD, $src", IIC_LdStLHA, [(set i64:$rD, (sextloadi16 iaddr:$src))]>, PPC970_DGroup_Cracked; def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src), - "lwa $rD, $src", LdStLWA, + "lwa $rD, $src", IIC_LdStLWA, [(set i64:$rD, (aligned4sextloadi32 ixaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHAX8: XForm_1<31, 343, (outs g8rc:$rD), (ins memrr:$src), - "lhax $rD, $src", LdStLHA, + "lhax $rD, $src", IIC_LdStLHA, [(set i64:$rD, (sextloadi16 xaddr:$src))]>, PPC970_DGroup_Cracked; def LWAX : XForm_1<31, 341, (outs g8rc:$rD), (ins memrr:$src), - "lwax $rD, $src", LdStLHA, + "lwax $rD, $src", IIC_LdStLHA, [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; // For fast-isel: let isCodeGenOnly = 1, mayLoad = 1 in { def LWA_32 : DSForm_1<58, 2, (outs gprc:$rD), (ins memrix:$src), - "lwa $rD, $src", LdStLWA, []>, isPPC64, + "lwa $rD, $src", IIC_LdStLWA, []>, isPPC64, PPC970_DGroup_Cracked; def LWAX_32 : XForm_1<31, 341, (outs gprc:$rD), (ins memrr:$src), - "lwax $rD, $src", LdStLHA, []>, isPPC64, + "lwax $rD, $src", IIC_LdStLHA, []>, isPPC64, PPC970_DGroup_Cracked; } // end fast-isel isCodeGenOnly // Update forms. let mayLoad = 1, neverHasSideEffects = 1 in { -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHAU8 : DForm_1<43, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lhau $rD, $addr", LdStLHAU, + "lhau $rD, $addr", IIC_LdStLHAU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; // NO LWAU! -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHAUX8 : XForm_1<31, 375, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lhaux $rD, $addr", LdStLHAU, + "lhaux $rD, $addr", IIC_LdStLHAUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LWAUX : XForm_1<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lwaux $rD, $addr", LdStLHAU, + "lwaux $rD, $addr", IIC_LdStLHAUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } } -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { // Zero extending loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src), - "lbz $rD, $src", LdStLoad, + "lbz $rD, $src", IIC_LdStLoad, [(set i64:$rD, (zextloadi8 iaddr:$src))]>; def LHZ8 : DForm_1<40, (outs g8rc:$rD), (ins memri:$src), - "lhz $rD, $src", LdStLoad, + "lhz $rD, $src", IIC_LdStLoad, [(set i64:$rD, (zextloadi16 iaddr:$src))]>; def LWZ8 : DForm_1<32, (outs g8rc:$rD), (ins memri:$src), - "lwz $rD, $src", LdStLoad, + "lwz $rD, $src", IIC_LdStLoad, [(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64; def LBZX8 : XForm_1<31, 87, (outs g8rc:$rD), (ins memrr:$src), - "lbzx $rD, $src", LdStLoad, + "lbzx $rD, $src", IIC_LdStLoad, [(set i64:$rD, (zextloadi8 xaddr:$src))]>; def LHZX8 : XForm_1<31, 279, (outs g8rc:$rD), (ins memrr:$src), - "lhzx $rD, $src", LdStLoad, + "lhzx $rD, $src", IIC_LdStLoad, [(set i64:$rD, (zextloadi16 xaddr:$src))]>; def LWZX8 : XForm_1<31, 23, (outs g8rc:$rD), (ins memrr:$src), - "lwzx $rD, $src", LdStLoad, + "lwzx $rD, $src", IIC_LdStLoad, [(set i64:$rD, (zextloadi32 xaddr:$src))]>; // Update forms. let mayLoad = 1, neverHasSideEffects = 1 in { def LBZU8 : DForm_1<35, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStLoadUpd, + "lbzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU8 : DForm_1<41, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStLoadUpd, + "lhzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU8 : DForm_1<33, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStLoadUpd, + "lwzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LBZUX8 : XForm_1<31, 119, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lbzux $rD, $addr", LdStLoadUpd, + "lbzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LHZUX8 : XForm_1<31, 311, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lhzux $rD, $addr", LdStLoadUpd, + "lhzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lwzux $rD, $addr", LdStLoadUpd, + "lwzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; } @@ -733,7 +746,7 @@ def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), // Full 8-byte loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src), - "ld $rD, $src", LdStLD, + "ld $rD, $src", IIC_LdStLD, [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64; // The following three definitions are selected for small code model only. // Otherwise, we need to create two instructions to form a 32-bit offset, @@ -754,30 +767,30 @@ def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), let hasSideEffects = 1, isCodeGenOnly = 1 in { let RST = 2, DS = 2 in def LDinto_toc: DSForm_1a<58, 0, (outs), (ins g8rc:$reg), - "ld 2, 8($reg)", LdStLD, + "ld 2, 8($reg)", IIC_LdStLD, [(PPCload_toc i64:$reg)]>, isPPC64; let RST = 2, DS = 10, RA = 1 in def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins), - "ld 2, 40(1)", LdStLD, + "ld 2, 40(1)", IIC_LdStLD, [(PPCtoc_restore)]>, isPPC64; } def LDX : XForm_1<31, 21, (outs g8rc:$rD), (ins memrr:$src), - "ldx $rD, $src", LdStLD, + "ldx $rD, $src", IIC_LdStLD, [(set i64:$rD, (load xaddr:$src))]>, isPPC64; def LDBRX : XForm_1<31, 532, (outs g8rc:$rD), (ins memrr:$src), - "ldbrx $rD, $src", LdStLoad, + "ldbrx $rD, $src", IIC_LdStLoad, [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64; let mayLoad = 1, neverHasSideEffects = 1 in { def LDU : DSForm_1<58, 1, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr), - "ldu $rD, $addr", LdStLDU, + "ldu $rD, $addr", IIC_LdStLDU, []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, NoEncode<"$ea_result">; def LDUX : XForm_1<31, 53, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "ldux $rD, $addr", LdStLDU, + "ldux $rD, $addr", IIC_LdStLDUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } @@ -860,78 +873,79 @@ def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), isPPC64; let PPC970_Unit = 2 in { -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { // Truncating stores. def STB8 : DForm_1<38, (outs), (ins g8rc:$rS, memri:$src), - "stb $rS, $src", LdStStore, + "stb $rS, $src", IIC_LdStStore, [(truncstorei8 i64:$rS, iaddr:$src)]>; def STH8 : DForm_1<44, (outs), (ins g8rc:$rS, memri:$src), - "sth $rS, $src", LdStStore, + "sth $rS, $src", IIC_LdStStore, [(truncstorei16 i64:$rS, iaddr:$src)]>; def STW8 : DForm_1<36, (outs), (ins g8rc:$rS, memri:$src), - "stw $rS, $src", LdStStore, + "stw $rS, $src", IIC_LdStStore, [(truncstorei32 i64:$rS, iaddr:$src)]>; def STBX8 : XForm_8<31, 215, (outs), (ins g8rc:$rS, memrr:$dst), - "stbx $rS, $dst", LdStStore, + "stbx $rS, $dst", IIC_LdStStore, [(truncstorei8 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX8 : XForm_8<31, 407, (outs), (ins g8rc:$rS, memrr:$dst), - "sthx $rS, $dst", LdStStore, + "sthx $rS, $dst", IIC_LdStStore, [(truncstorei16 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX8 : XForm_8<31, 151, (outs), (ins g8rc:$rS, memrr:$dst), - "stwx $rS, $dst", LdStStore, + "stwx $rS, $dst", IIC_LdStStore, [(truncstorei32 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; } // Interpretation64Bit // Normal 8-byte stores. def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst), - "std $rS, $dst", LdStSTD, + "std $rS, $dst", IIC_LdStSTD, [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64; def STDX : XForm_8<31, 149, (outs), (ins g8rc:$rS, memrr:$dst), - "stdx $rS, $dst", LdStSTD, + "stdx $rS, $dst", IIC_LdStSTD, [(store i64:$rS, xaddr:$dst)]>, isPPC64, PPC970_DGroup_Cracked; def STDBRX: XForm_8<31, 660, (outs), (ins g8rc:$rS, memrr:$dst), - "stdbrx $rS, $dst", LdStStore, + "stdbrx $rS, $dst", IIC_LdStStore, [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64, PPC970_DGroup_Cracked; } // Stores with Update (pre-inc). let PPC970_Unit = 2, mayStore = 1 in { -let Interpretation64Bit = 1 in { +let Interpretation64Bit = 1, isCodeGenOnly = 1 in { def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst), - "stbu $rS, $dst", LdStStoreUpd, []>, + "stbu $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst), - "sthu $rS, $dst", LdStStoreUpd, []>, + "sthu $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst), - "stwu $rS, $dst", LdStStoreUpd, []>, + "stwu $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; -def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrix:$dst), - "stdu $rS, $dst", LdStSTDU, []>, - RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">, - isPPC64; def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst), - "stbux $rS, $dst", LdStStoreUpd, []>, + "stbux $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst), - "sthux $rS, $dst", LdStStoreUpd, []>, + "sthux $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst), - "stwux $rS, $dst", LdStStoreUpd, []>, + "stwux $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; } // Interpretation64Bit +def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrix:$dst), + "stdu $rS, $dst", IIC_LdStSTDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">, + isPPC64; + def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst), - "stdux $rS, $dst", LdStSTDU, []>, + "stdux $rS, $dst", IIC_LdStSTDUX, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked, isPPC64; } @@ -966,29 +980,29 @@ def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), let PPC970_Unit = 3, neverHasSideEffects = 1, Uses = [RM] in { // FPU Operations. defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB), - "fcfid", "$frD, $frB", FPGeneral, + "fcfid", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64; defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB), - "fctid", "$frD, $frB", FPGeneral, + "fctid", "$frD, $frB", IIC_FPGeneral, []>, isPPC64; defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB), - "fctidz", "$frD, $frB", FPGeneral, + "fctidz", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64; defm FCFIDU : XForm_26r<63, 974, (outs f8rc:$frD), (ins f8rc:$frB), - "fcfidu", "$frD, $frB", FPGeneral, + "fcfidu", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64; defm FCFIDS : XForm_26r<59, 846, (outs f4rc:$frD), (ins f8rc:$frB), - "fcfids", "$frD, $frB", FPGeneral, + "fcfids", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64; defm FCFIDUS : XForm_26r<59, 974, (outs f4rc:$frD), (ins f8rc:$frB), - "fcfidus", "$frD, $frB", FPGeneral, + "fcfidus", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64; defm FCTIDUZ : XForm_26r<63, 943, (outs f8rc:$frD), (ins f8rc:$frB), - "fctiduz", "$frD, $frB", FPGeneral, + "fctiduz", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64; defm FCTIWUZ : XForm_26r<63, 143, (outs f8rc:$frD), (ins f8rc:$frB), - "fctiwuz", "$frD, $frB", FPGeneral, + "fctiwuz", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index a55abe373556..b67837394ae7 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -164,7 +164,7 @@ def vecspltisw : PatLeaf<(build_vector), [{ // VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type. class VA1a_Int_Ty xo, string opc, Intrinsic IntID, ValueType Ty> : VAForm_1a; // VA1a_Int_Ty2 - A VAForm_1a intrinsic definition where the type of the @@ -172,7 +172,7 @@ class VA1a_Int_Ty xo, string opc, Intrinsic IntID, ValueType Ty> class VA1a_Int_Ty2 xo, string opc, Intrinsic IntID, ValueType OutTy, ValueType InTy> : VAForm_1a; // VA1a_Int_Ty3 - A VAForm_1a intrinsic definition where there are two @@ -180,14 +180,14 @@ class VA1a_Int_Ty2 xo, string opc, Intrinsic IntID, ValueType OutTy, class VA1a_Int_Ty3 xo, string opc, Intrinsic IntID, ValueType OutTy, ValueType In1Ty, ValueType In2Ty> : VAForm_1a; // VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type. class VX1_Int_Ty xo, string opc, Intrinsic IntID, ValueType Ty> : VXForm_1; // VX1_Int_Ty2 - A VXForm_1 intrinsic definition where the type of the @@ -195,7 +195,7 @@ class VX1_Int_Ty xo, string opc, Intrinsic IntID, ValueType Ty> class VX1_Int_Ty2 xo, string opc, Intrinsic IntID, ValueType OutTy, ValueType InTy> : VXForm_1; // VX1_Int_Ty3 - A VXForm_1 intrinsic definition where there are two @@ -203,13 +203,13 @@ class VX1_Int_Ty2 xo, string opc, Intrinsic IntID, ValueType OutTy, class VX1_Int_Ty3 xo, string opc, Intrinsic IntID, ValueType OutTy, ValueType In1Ty, ValueType In2Ty> : VXForm_1; // VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type. class VX2_Int_SP xo, string opc, Intrinsic IntID> : VXForm_2; // VX2_Int_Ty2 - A VXForm_2 intrinsic definition where the type of the @@ -217,7 +217,7 @@ class VX2_Int_SP xo, string opc, Intrinsic IntID> class VX2_Int_Ty2 xo, string opc, Intrinsic IntID, ValueType OutTy, ValueType InTy> : VXForm_2; //===----------------------------------------------------------------------===// @@ -229,109 +229,109 @@ let Predicates = [HasAltivec] in { let isCodeGenOnly = 1 in { def DSS : DSS_Form<822, (outs), (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2), - "dss $STRM", LdStLoad /*FIXME*/, []>, + "dss $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated; def DSSALL : DSS_Form<822, (outs), (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2), - "dssall", LdStLoad /*FIXME*/, []>, + "dssall", IIC_LdStLoad /*FIXME*/, []>, Deprecated; def DST : DSS_Form<342, (outs), (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated; def DSTT : DSS_Form<342, (outs), (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated; def DSTST : DSS_Form<374, (outs), (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated; def DSTSTT : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB), - "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated; def DST64 : DSS_Form<342, (outs), (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated; def DSTT64 : DSS_Form<342, (outs), (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated; def DSTST64 : DSS_Form<374, (outs), (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated; def DSTSTT64 : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB), - "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>, + "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>, Deprecated; } def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins), - "mfvscr $vD", LdStStore, + "mfvscr $vD", IIC_LdStStore, [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB), - "mtvscr $vB", LdStLoad, + "mtvscr $vB", IIC_LdStLoad, [(int_ppc_altivec_mtvscr v4i32:$vB)]>; let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads. def LVEBX: XForm_1<31, 7, (outs vrrc:$vD), (ins memrr:$src), - "lvebx $vD, $src", LdStLoad, + "lvebx $vD, $src", IIC_LdStLoad, [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; def LVEHX: XForm_1<31, 39, (outs vrrc:$vD), (ins memrr:$src), - "lvehx $vD, $src", LdStLoad, + "lvehx $vD, $src", IIC_LdStLoad, [(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>; def LVEWX: XForm_1<31, 71, (outs vrrc:$vD), (ins memrr:$src), - "lvewx $vD, $src", LdStLoad, + "lvewx $vD, $src", IIC_LdStLoad, [(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>; def LVX : XForm_1<31, 103, (outs vrrc:$vD), (ins memrr:$src), - "lvx $vD, $src", LdStLoad, + "lvx $vD, $src", IIC_LdStLoad, [(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>; def LVXL : XForm_1<31, 359, (outs vrrc:$vD), (ins memrr:$src), - "lvxl $vD, $src", LdStLoad, + "lvxl $vD, $src", IIC_LdStLoad, [(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>; } def LVSL : XForm_1<31, 6, (outs vrrc:$vD), (ins memrr:$src), - "lvsl $vD, $src", LdStLoad, + "lvsl $vD, $src", IIC_LdStLoad, [(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>, PPC970_Unit_LSU; def LVSR : XForm_1<31, 38, (outs vrrc:$vD), (ins memrr:$src), - "lvsr $vD, $src", LdStLoad, + "lvsr $vD, $src", IIC_LdStLoad, [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>, PPC970_Unit_LSU; let PPC970_Unit = 2 in { // Stores. def STVEBX: XForm_8<31, 135, (outs), (ins vrrc:$rS, memrr:$dst), - "stvebx $rS, $dst", LdStStore, + "stvebx $rS, $dst", IIC_LdStStore, [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>; def STVEHX: XForm_8<31, 167, (outs), (ins vrrc:$rS, memrr:$dst), - "stvehx $rS, $dst", LdStStore, + "stvehx $rS, $dst", IIC_LdStStore, [(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>; def STVEWX: XForm_8<31, 199, (outs), (ins vrrc:$rS, memrr:$dst), - "stvewx $rS, $dst", LdStStore, + "stvewx $rS, $dst", IIC_LdStStore, [(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>; def STVX : XForm_8<31, 231, (outs), (ins vrrc:$rS, memrr:$dst), - "stvx $rS, $dst", LdStStore, + "stvx $rS, $dst", IIC_LdStStore, [(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>; def STVXL : XForm_8<31, 487, (outs), (ins vrrc:$rS, memrr:$dst), - "stvxl $rS, $dst", LdStStore, + "stvxl $rS, $dst", IIC_LdStStore, [(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>; } let PPC970_Unit = 5 in { // VALU Operations. // VA-Form instructions. 3-input AltiVec ops. def VMADDFP : VAForm_1<46, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB), - "vmaddfp $vD, $vA, $vC, $vB", VecFP, + "vmaddfp $vD, $vA, $vC, $vB", IIC_VecFP, [(set v4f32:$vD, (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>; // FIXME: The fma+fneg pattern won't match because fneg is not legal. def VNMSUBFP: VAForm_1<47, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB), - "vnmsubfp $vD, $vA, $vC, $vB", VecFP, + "vnmsubfp $vD, $vA, $vC, $vB", IIC_VecFP, [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC, (fneg v4f32:$vB))))]>; @@ -346,23 +346,23 @@ def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>; // Shuffles. def VSLDOI : VAForm_2<44, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u5imm:$SH), - "vsldoi $vD, $vA, $vB, $SH", VecFP, + "vsldoi $vD, $vA, $vB, $SH", IIC_VecFP, [(set v16i8:$vD, (vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB))]>; // VX-Form instructions. AltiVec arithmetic ops. def VADDFP : VXForm_1<10, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vaddfp $vD, $vA, $vB", VecFP, + "vaddfp $vD, $vA, $vB", IIC_VecFP, [(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>; def VADDUBM : VXForm_1<0, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vaddubm $vD, $vA, $vB", VecGeneral, + "vaddubm $vD, $vA, $vB", IIC_VecGeneral, [(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>; def VADDUHM : VXForm_1<64, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vadduhm $vD, $vA, $vB", VecGeneral, + "vadduhm $vD, $vA, $vB", IIC_VecGeneral, [(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>; def VADDUWM : VXForm_1<128, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vadduwm $vD, $vA, $vB", VecGeneral, + "vadduwm $vD, $vA, $vB", IIC_VecGeneral, [(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>; def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>; @@ -375,27 +375,27 @@ def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>; def VAND : VXForm_1<1028, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vand $vD, $vA, $vB", VecFP, + "vand $vD, $vA, $vB", IIC_VecFP, [(set v4i32:$vD, (and v4i32:$vA, v4i32:$vB))]>; def VANDC : VXForm_1<1092, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vandc $vD, $vA, $vB", VecFP, + "vandc $vD, $vA, $vB", IIC_VecFP, [(set v4i32:$vD, (and v4i32:$vA, (vnot_ppc v4i32:$vB)))]>; def VCFSX : VXForm_1<842, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vcfsx $vD, $vB, $UIMM", VecFP, + "vcfsx $vD, $vB, $UIMM", IIC_VecFP, [(set v4f32:$vD, (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>; def VCFUX : VXForm_1<778, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vcfux $vD, $vB, $UIMM", VecFP, + "vcfux $vD, $vB, $UIMM", IIC_VecFP, [(set v4f32:$vD, (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>; def VCTSXS : VXForm_1<970, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vctsxs $vD, $vB, $UIMM", VecFP, + "vctsxs $vD, $vB, $UIMM", IIC_VecFP, [(set v4i32:$vD, (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>; def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vctuxs $vD, $vB, $UIMM", VecFP, + "vctuxs $vD, $vB, $UIMM", IIC_VecFP, [(set v4i32:$vD, (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>; @@ -404,19 +404,19 @@ def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), // to floating-point (sint_to_fp/uint_to_fp) conversions. let isCodeGenOnly = 1, VA = 0 in { def VCFSX_0 : VXForm_1<842, (outs vrrc:$vD), (ins vrrc:$vB), - "vcfsx $vD, $vB, 0", VecFP, + "vcfsx $vD, $vB, 0", IIC_VecFP, [(set v4f32:$vD, (int_ppc_altivec_vcfsx v4i32:$vB, 0))]>; def VCTUXS_0 : VXForm_1<906, (outs vrrc:$vD), (ins vrrc:$vB), - "vctuxs $vD, $vB, 0", VecFP, + "vctuxs $vD, $vB, 0", IIC_VecFP, [(set v4i32:$vD, (int_ppc_altivec_vctuxs v4f32:$vB, 0))]>; def VCFUX_0 : VXForm_1<778, (outs vrrc:$vD), (ins vrrc:$vB), - "vcfux $vD, $vB, 0", VecFP, + "vcfux $vD, $vB, 0", IIC_VecFP, [(set v4f32:$vD, (int_ppc_altivec_vcfux v4i32:$vB, 0))]>; def VCTSXS_0 : VXForm_1<970, (outs vrrc:$vD), (ins vrrc:$vB), - "vctsxs $vD, $vB, 0", VecFP, + "vctsxs $vD, $vB, 0", IIC_VecFP, [(set v4i32:$vD, (int_ppc_altivec_vctsxs v4f32:$vB, 0))]>; } @@ -446,22 +446,22 @@ def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>; def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>; def VMRGHB : VXForm_1< 12, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmrghb $vD, $vA, $vB", VecFP, + "vmrghb $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGHH : VXForm_1< 76, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmrghh $vD, $vA, $vB", VecFP, + "vmrghh $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vmrghh_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGHW : VXForm_1<140, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmrghw $vD, $vA, $vB", VecFP, + "vmrghw $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vmrghw_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGLB : VXForm_1<268, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmrglb $vD, $vA, $vB", VecFP, + "vmrglb $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGLH : VXForm_1<332, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmrglh $vD, $vA, $vB", VecFP, + "vmrglh $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vmrglh_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGLW : VXForm_1<396, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmrglw $vD, $vA, $vB", VecFP, + "vmrglw $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vmrglw_shuffle v16i8:$vA, v16i8:$vB))]>; def VMSUMMBM : VA1a_Int_Ty3<37, "vmsummbm", int_ppc_altivec_vmsummbm, @@ -504,16 +504,16 @@ def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>; def VSUBCUW : VX1_Int_Ty<1408, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>; def VSUBFP : VXForm_1<74, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vsubfp $vD, $vA, $vB", VecGeneral, + "vsubfp $vD, $vA, $vB", IIC_VecGeneral, [(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>; def VSUBUBM : VXForm_1<1024, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vsububm $vD, $vA, $vB", VecGeneral, + "vsububm $vD, $vA, $vB", IIC_VecGeneral, [(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>; def VSUBUHM : VXForm_1<1088, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vsubuhm $vD, $vA, $vB", VecGeneral, + "vsubuhm $vD, $vA, $vB", IIC_VecGeneral, [(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>; def VSUBUWM : VXForm_1<1152, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vsubuwm $vD, $vA, $vB", VecGeneral, + "vsubuwm $vD, $vA, $vB", IIC_VecGeneral, [(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>; def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>; @@ -534,14 +534,14 @@ def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs, v4i32, v16i8, v4i32>; def VNOR : VXForm_1<1284, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vnor $vD, $vA, $vB", VecFP, + "vnor $vD, $vA, $vB", IIC_VecFP, [(set v4i32:$vD, (vnot_ppc (or v4i32:$vA, v4i32:$vB)))]>; def VOR : VXForm_1<1156, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vor $vD, $vA, $vB", VecFP, + "vor $vD, $vA, $vB", IIC_VecFP, [(set v4i32:$vD, (or v4i32:$vA, v4i32:$vB))]>; def VXOR : VXForm_1<1220, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vxor $vD, $vA, $vB", VecFP, + "vxor $vD, $vA, $vB", IIC_VecFP, [(set v4i32:$vD, (xor v4i32:$vA, v4i32:$vB))]>; def VRLB : VX1_Int_Ty< 4, "vrlb", int_ppc_altivec_vrlb, v16i8>; @@ -556,15 +556,15 @@ def VSLH : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>; def VSLW : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>; def VSPLTB : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vspltb $vD, $vB, $UIMM", VecPerm, + "vspltb $vD, $vB, $UIMM", IIC_VecPerm, [(set v16i8:$vD, (vspltb_shuffle:$UIMM v16i8:$vB, (undef)))]>; def VSPLTH : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vsplth $vD, $vB, $UIMM", VecPerm, + "vsplth $vD, $vB, $UIMM", IIC_VecPerm, [(set v16i8:$vD, (vsplth_shuffle:$UIMM v16i8:$vB, (undef)))]>; def VSPLTW : VXForm_1<652, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB), - "vspltw $vD, $vB, $UIMM", VecPerm, + "vspltw $vD, $vB, $UIMM", IIC_VecPerm, [(set v16i8:$vD, (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>; @@ -580,13 +580,13 @@ def VSRW : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>; def VSPLTISB : VXForm_3<780, (outs vrrc:$vD), (ins s5imm:$SIMM), - "vspltisb $vD, $SIMM", VecPerm, + "vspltisb $vD, $SIMM", IIC_VecPerm, [(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>; def VSPLTISH : VXForm_3<844, (outs vrrc:$vD), (ins s5imm:$SIMM), - "vspltish $vD, $SIMM", VecPerm, + "vspltish $vD, $SIMM", IIC_VecPerm, [(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>; def VSPLTISW : VXForm_3<908, (outs vrrc:$vD), (ins s5imm:$SIMM), - "vspltisw $vD, $SIMM", VecPerm, + "vspltisw $vD, $SIMM", IIC_VecPerm, [(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>; // Vector Pack. @@ -601,13 +601,13 @@ def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss, def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus, v8i16, v4i32>; def VPKUHUM : VXForm_1<14, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vpkuhum $vD, $vA, $vB", VecFP, + "vpkuhum $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>; def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus, v16i8, v8i16>; def VPKUWUM : VXForm_1<78, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vpkuwum $vD, $vA, $vB", VecFP, + "vpkuwum $vD, $vA, $vB", IIC_VecFP, [(set v16i8:$vD, (vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>; def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus, @@ -631,10 +631,12 @@ def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh, // Altivec Comparisons. class VCMP xo, string asmstr, ValueType Ty> - : VXRForm_1; class VCMPo xo, string asmstr, ValueType Ty> - : VXRForm_1 { let Defs = [CR6]; let RC = 1; @@ -676,24 +678,24 @@ def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>; let isCodeGenOnly = 1 in { def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins), - "vxor $vD, $vD, $vD", VecFP, + "vxor $vD, $vD, $vD", IIC_VecFP, [(set v16i8:$vD, (v16i8 immAllZerosV))]>; def V_SET0H : VXForm_setzero<1220, (outs vrrc:$vD), (ins), - "vxor $vD, $vD, $vD", VecFP, + "vxor $vD, $vD, $vD", IIC_VecFP, [(set v8i16:$vD, (v8i16 immAllZerosV))]>; def V_SET0 : VXForm_setzero<1220, (outs vrrc:$vD), (ins), - "vxor $vD, $vD, $vD", VecFP, + "vxor $vD, $vD, $vD", IIC_VecFP, [(set v4i32:$vD, (v4i32 immAllZerosV))]>; let IMM=-1 in { def V_SETALLONESB : VXForm_3<908, (outs vrrc:$vD), (ins), - "vspltisw $vD, -1", VecFP, + "vspltisw $vD, -1", IIC_VecFP, [(set v16i8:$vD, (v16i8 immAllOnesV))]>; def V_SETALLONESH : VXForm_3<908, (outs vrrc:$vD), (ins), - "vspltisw $vD, -1", VecFP, + "vspltisw $vD, -1", IIC_VecFP, [(set v8i16:$vD, (v8i16 immAllOnesV))]>; def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins), - "vspltisw $vD, -1", VecFP, + "vspltisw $vD, -1", IIC_VecFP, [(set v4i32:$vD, (v4i32 immAllOnesV))]>; } } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstrFormats.td index 29233d49148d..dae40c0cc26b 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -14,6 +14,8 @@ class I opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : Instruction { field bits<32> Inst; + field bits<32> SoftFail = 0; + let Size = 4; bit PPC64 = 0; // Default value, override with isPPC64 @@ -67,6 +69,8 @@ class I2 opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : Instruction { field bits<64> Inst; + field bits<64> SoftFail = 0; + let Size = 8; bit PPC64 = 0; // Default value, override with isPPC64 @@ -109,7 +113,7 @@ class IForm opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr, // 1.7.2 B-Form class BForm opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr> - : I { + : I { bits<7> BIBO; // 2 bits of BI and 5 bits of BO. bits<3> CR; bits<14> BD; @@ -135,7 +139,7 @@ class BForm_1 opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL, class BForm_2 opcode, bits<5> bo, bits<5> bi, bit aa, bit lk, dag OOL, dag IOL, string asmstr> - : I { + : I { bits<14> BD; let Inst{6-10} = bo; @@ -147,7 +151,7 @@ class BForm_2 opcode, bits<5> bo, bits<5> bi, bit aa, bit lk, class BForm_3 opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr> - : I { + : I { bits<5> BO; bits<5> BI; bits<14> BD; @@ -258,6 +262,15 @@ class DForm_4_zero opcode, dag OOL, dag IOL, string asmstr, let Addr = 0; } +class DForm_4_fixedreg_zero opcode, bits<5> R, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : DForm_4 { + let A = R; + let B = R; + let C = 0; +} + class IForm_and_DForm_1 opcode1, bit aa, bit lk, bits<6> opcode2, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 87e13ef9eb11..2bbaf4626b05 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -61,7 +61,7 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 || Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) { const InstrItineraryData *II = TM->getInstrItineraryData(); - return new PPCScoreboardHazardRecognizer(II, DAG); + return new ScoreboardHazardRecognizer(II, DAG); } return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG); @@ -74,6 +74,9 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( const ScheduleDAG *DAG) const { unsigned Directive = TM.getSubtarget().getDarwinDirective(); + if (Directive == PPC::DIR_PWR7) + return new PPCDispatchGroupSBHazardRecognizer(II, DAG); + // Most subtargets use a PPC970 recognizer. if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 && Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) { @@ -82,7 +85,57 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( return new PPCHazardRecognizer970(TM); } - return new PPCScoreboardHazardRecognizer(II, DAG); + return new ScoreboardHazardRecognizer(II, DAG); +} + + +int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, + unsigned UseIdx) const { + int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx, + UseMI, UseIdx); + + const MachineOperand &DefMO = DefMI->getOperand(DefIdx); + unsigned Reg = DefMO.getReg(); + + const TargetRegisterInfo *TRI = &getRegisterInfo(); + bool IsRegCR; + if (TRI->isVirtualRegister(Reg)) { + const MachineRegisterInfo *MRI = + &DefMI->getParent()->getParent()->getRegInfo(); + IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) || + MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass); + } else { + IsRegCR = PPC::CRRCRegClass.contains(Reg) || + PPC::CRBITRCRegClass.contains(Reg); + } + + if (UseMI->isBranch() && IsRegCR) { + if (Latency < 0) + Latency = getInstrLatency(ItinData, DefMI); + + // On some cores, there is an additional delay between writing to a condition + // register, and using it from a branch. + unsigned Directive = TM.getSubtarget().getDarwinDirective(); + switch (Directive) { + default: break; + case PPC::DIR_7400: + case PPC::DIR_750: + case PPC::DIR_970: + case PPC::DIR_E5500: + case PPC::DIR_PWR4: + case PPC::DIR_PWR5: + case PPC::DIR_PWR5X: + case PPC::DIR_PWR6: + case PPC::DIR_PWR6X: + case PPC::DIR_PWR7: + Latency += 2; + break; + } + } + + return Latency; } // Detect 32 -> 64-bit extensions where we may reuse the low sub-register. @@ -174,6 +227,8 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { unsigned Reg0 = MI->getOperand(0).getReg(); unsigned Reg1 = MI->getOperand(1).getReg(); unsigned Reg2 = MI->getOperand(2).getReg(); + unsigned SubReg1 = MI->getOperand(1).getSubReg(); + unsigned SubReg2 = MI->getOperand(2).getSubReg(); bool Reg1IsKill = MI->getOperand(1).isKill(); bool Reg2IsKill = MI->getOperand(2).isKill(); bool ChangeReg0 = false; @@ -183,6 +238,7 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { // Must be two address instruction! assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) && "Expecting a two-address instruction!"); + assert(MI->getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch"); Reg2IsKill = false; ChangeReg0 = true; } @@ -203,10 +259,14 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { .addImm((MB-1) & 31); } - if (ChangeReg0) + if (ChangeReg0) { MI->getOperand(0).setReg(Reg2); + MI->getOperand(0).setSubReg(SubReg2); + } MI->getOperand(2).setReg(Reg1); MI->getOperand(1).setReg(Reg2); + MI->getOperand(2).setSubReg(SubReg1); + MI->getOperand(1).setSubReg(SubReg2); MI->getOperand(2).setIsKill(Reg1IsKill); MI->getOperand(1).setIsKill(Reg2IsKill); @@ -218,10 +278,19 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { - DebugLoc DL; - BuildMI(MBB, MI, DL, get(PPC::NOP)); -} + // This function is used for scheduling, and the nop wanted here is the type + // that terminates dispatch groups on the POWER cores. + unsigned Directive = TM.getSubtarget().getDarwinDirective(); + unsigned Opcode; + switch (Directive) { + default: Opcode = PPC::NOP; break; + case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break; + case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break; + } + DebugLoc DL; + BuildMI(MBB, MI, DL, get(Opcode)); +} // Branch analysis. // Note: If the condition register is set to CTR or CTR8 then this is a @@ -988,6 +1057,10 @@ bool PPCInstrInfo::SubsumesPredicate( if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR) return false; + // P1 can only subsume P2 if they test the same condition register. + if (Pred1[1].getReg() != Pred2[1].getReg()) + return false; + PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm(); PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm(); @@ -1420,7 +1493,7 @@ protected: if (J->getOpcode() == PPC::B) { if (J->getOperand(0).getMBB() == &ReturnMBB) { // This is an unconditional branch to the return. Replace the - // branch with a blr. + // branch with a blr. BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BLR)); MachineBasicBlock::iterator K = J--; K->eraseFromParent(); @@ -1462,7 +1535,7 @@ protected: if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&ReturnMBB)) OtherReference = true; - // Predecessors are stored in a vector and can't be removed here. + // Predecessors are stored in a vector and can't be removed here. if (!OtherReference && BlockChanged) { PredToRemove.push_back(*PI); } @@ -1505,7 +1578,7 @@ public: return Changed; for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { - MachineBasicBlock &B = *I++; + MachineBasicBlock &B = *I++; if (processBlock(B)) Changed = true; } @@ -1525,4 +1598,3 @@ INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE, char PPCEarlyReturn::ID = 0; FunctionPass* llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); } - diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstrInfo.h index f140c41a2a89..7876703584db 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -95,6 +95,18 @@ public: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const; + virtual + int getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const; + virtual + int getOperandLatency(const InstrItineraryData *ItinData, + SDNode *DefNode, unsigned DefIdx, + SDNode *UseNode, unsigned UseIdx) const { + return PPCGenInstrInfo::getOperandLatency(ItinData, DefNode, DefIdx, + UseNode, UseIdx); + } + bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 2bd3aadc798d..362dd09f64e3 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -99,6 +99,8 @@ def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>; def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>; def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>; +def PPCppc32GOT : SDNode<"PPCISD::PPC32_GOT", SDTIntLeaf, []>; + def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>; def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp, [SDNPMayLoad]>; @@ -411,6 +413,7 @@ def PPCS5ImmAsmOperand : AsmOperandClass { def s5imm : Operand { let PrintMethod = "printS5ImmOperand"; let ParserMatchClass = PPCS5ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<5>"; } def PPCU5ImmAsmOperand : AsmOperandClass { let Name = "U5Imm"; let PredicateMethod = "isU5Imm"; @@ -419,6 +422,7 @@ def PPCU5ImmAsmOperand : AsmOperandClass { def u5imm : Operand { let PrintMethod = "printU5ImmOperand"; let ParserMatchClass = PPCU5ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<5>"; } def PPCU6ImmAsmOperand : AsmOperandClass { let Name = "U6Imm"; let PredicateMethod = "isU6Imm"; @@ -427,6 +431,7 @@ def PPCU6ImmAsmOperand : AsmOperandClass { def u6imm : Operand { let PrintMethod = "printU6ImmOperand"; let ParserMatchClass = PPCU6ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<6>"; } def PPCS16ImmAsmOperand : AsmOperandClass { let Name = "S16Imm"; let PredicateMethod = "isS16Imm"; @@ -436,6 +441,7 @@ def s16imm : Operand { let PrintMethod = "printS16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCS16ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; } def PPCU16ImmAsmOperand : AsmOperandClass { let Name = "U16Imm"; let PredicateMethod = "isU16Imm"; @@ -445,6 +451,7 @@ def u16imm : Operand { let PrintMethod = "printU16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCU16ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<16>"; } def PPCS17ImmAsmOperand : AsmOperandClass { let Name = "S17Imm"; let PredicateMethod = "isS17Imm"; @@ -457,6 +464,7 @@ def s17imm : Operand { let PrintMethod = "printS16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCS17ImmAsmOperand; + let DecoderMethod = "decodeSImmOperand<16>"; } def PPCDirectBrAsmOperand : AsmOperandClass { let Name = "DirectBr"; let PredicateMethod = "isDirectBr"; @@ -502,6 +510,7 @@ def PPCCRBitMaskOperand : AsmOperandClass { def crbitm: Operand { let PrintMethod = "printcrbitm"; let EncoderMethod = "get_crbitm_encoding"; + let DecoderMethod = "decodeCRBitMOperand"; let ParserMatchClass = PPCCRBitMaskOperand; } // Address operands @@ -539,6 +548,7 @@ def memri : Operand { let PrintMethod = "printMemRegImm"; let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIEncoding"; + let DecoderMethod = "decodeMemRIOperands"; } def memrr : Operand { let PrintMethod = "printMemRegReg"; @@ -548,6 +558,7 @@ def memrix : Operand { // memri where the imm is 4-aligned. let PrintMethod = "printMemRegImm"; let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIXEncoding"; + let DecoderMethod = "decodeMemRIXOperands"; } // A single-register address. This is used with the SjLj @@ -555,6 +566,14 @@ def memrix : Operand { // memri where the imm is 4-aligned. def memr : Operand { let MIOperandInfo = (ops ptr_rc:$ptrreg); } +def PPCTLSRegOperand : AsmOperandClass { + let Name = "TLSReg"; let PredicateMethod = "isTLSReg"; + let RenderMethod = "addTLSRegOperands"; +} +def tlsreg32 : Operand { + let EncoderMethod = "getTLSRegEncoding"; + let ParserMatchClass = PPCTLSRegOperand; +} // PowerPC Predicate operand. def pred : Operand { @@ -613,20 +632,6 @@ multiclass XForm_6rc opcode, bits<10> xo, dag OOL, dag IOL, } } -multiclass XForm_10r opcode, bits<10> xo, dag OOL, dag IOL, - string asmbase, string asmstr, InstrItinClass itin, - list pattern> { - let BaseName = asmbase in { - def NAME : XForm_10, RecFormRel; - let Defs = [CR0] in - def o : XForm_10, isDOT, RecFormRel; - } -} - multiclass XForm_10rc opcode, bits<10> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { @@ -903,14 +908,16 @@ def RESTORE_CR : Pseudo<(outs crrc:$cond), (ins memri:$F), let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { let isReturn = 1, Uses = [LR, RM] in - def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", BrB, + def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB, [(retflag)]>; let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in { - def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>; + def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, + []>; let isCodeGenOnly = 1 in def BCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), - "b${cond:cc}ctr${cond:pm} ${cond:reg}", BrB, []>; + "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB, + []>; } } @@ -921,10 +928,10 @@ let Defs = [LR] in let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { let isBarrier = 1 in { def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst), - "b $dst", BrB, + "b $dst", IIC_BrB, [(br bb:$dst)]>; def BA : IForm<18, 1, 0, (outs), (ins absdirectbrtarget:$dst), - "ba $dst", BrB, []>; + "ba $dst", IIC_BrB, []>; } // BCC represents an arbitrary conditional branch on a predicate. @@ -939,22 +946,22 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { let isReturn = 1, Uses = [LR, RM] in def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond), - "b${cond:cc}lr${cond:pm} ${cond:reg}", BrB, []>; + "b${cond:cc}lr${cond:pm} ${cond:reg}", IIC_BrB, []>; } let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in { def BDZLR : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins), - "bdzlr", BrB, []>; + "bdzlr", IIC_BrB, []>; def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins), - "bdnzlr", BrB, []>; + "bdnzlr", IIC_BrB, []>; def BDZLRp : XLForm_2_ext<19, 16, 27, 0, 0, (outs), (ins), - "bdzlr+", BrB, []>; + "bdzlr+", IIC_BrB, []>; def BDNZLRp: XLForm_2_ext<19, 16, 25, 0, 0, (outs), (ins), - "bdnzlr+", BrB, []>; + "bdnzlr+", IIC_BrB, []>; def BDZLRm : XLForm_2_ext<19, 16, 26, 0, 0, (outs), (ins), - "bdzlr-", BrB, []>; + "bdzlr-", IIC_BrB, []>; def BDNZLRm: XLForm_2_ext<19, 16, 24, 0, 0, (outs), (ins), - "bdnzlr-", BrB, []>; + "bdnzlr-", IIC_BrB, []>; } let Defs = [CTR], Uses = [CTR] in { @@ -997,9 +1004,9 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL : IForm<18, 0, 1, (outs), (ins calltarget:$func), - "bl $func", BrB, []>; // See Pat patterns below. + "bl $func", IIC_BrB, []>; // See Pat patterns below. def BLA : IForm<18, 1, 1, (outs), (ins abscalltarget:$func), - "bla $func", BrB, [(PPCcall (i32 imm:$func))]>; + "bla $func", IIC_BrB, [(PPCcall (i32 imm:$func))]>; let isCodeGenOnly = 1 in { def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst), @@ -1010,20 +1017,22 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { } let Uses = [CTR, RM] in { def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins), - "bctrl", BrB, [(PPCbctrl)]>, + "bctrl", IIC_BrB, [(PPCbctrl)]>, Requires<[In32BitMode]>; let isCodeGenOnly = 1 in def BCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), - "b${cond:cc}ctrl${cond:pm} ${cond:reg}", BrB, []>; + "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB, + []>; } let Uses = [LR, RM] in { def BLRL : XLForm_2_ext<19, 16, 20, 0, 1, (outs), (ins), - "blrl", BrB, []>; + "blrl", IIC_BrB, []>; let isCodeGenOnly = 1 in def BCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond), - "b${cond:cc}lrl${cond:pm} ${cond:reg}", BrB, []>; + "b${cond:cc}lrl${cond:pm} ${cond:reg}", IIC_BrB, + []>; } let Defs = [CTR], Uses = [CTR, RM] in { def BDZL : BForm_1<16, 18, 0, 1, (outs), (ins condbrtarget:$dst), @@ -1053,17 +1062,17 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { } let Defs = [CTR], Uses = [CTR, LR, RM] in { def BDZLRL : XLForm_2_ext<19, 16, 18, 0, 1, (outs), (ins), - "bdzlrl", BrB, []>; + "bdzlrl", IIC_BrB, []>; def BDNZLRL : XLForm_2_ext<19, 16, 16, 0, 1, (outs), (ins), - "bdnzlrl", BrB, []>; + "bdnzlrl", IIC_BrB, []>; def BDZLRLp : XLForm_2_ext<19, 16, 27, 0, 1, (outs), (ins), - "bdzlrl+", BrB, []>; + "bdzlrl+", IIC_BrB, []>; def BDNZLRLp: XLForm_2_ext<19, 16, 25, 0, 1, (outs), (ins), - "bdnzlrl+", BrB, []>; + "bdnzlrl+", IIC_BrB, []>; def BDZLRLm : XLForm_2_ext<19, 16, 26, 0, 1, (outs), (ins), - "bdzlrl-", BrB, []>; + "bdzlrl-", IIC_BrB, []>; def BDNZLRLm: XLForm_2_ext<19, 16, 24, 0, 1, (outs), (ins), - "bdnzlrl-", BrB, []>; + "bdnzlrl-", IIC_BrB, []>; } } @@ -1089,19 +1098,19 @@ let isCodeGenOnly = 1 in { let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in -def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, - Requires<[In32BitMode]>; +def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, + []>, Requires<[In32BitMode]>; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst), - "b $dst", BrB, + "b $dst", IIC_BrB, []>; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in def TAILBA : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst), - "ba $dst", BrB, + "ba $dst", IIC_BrB, []>; } @@ -1127,33 +1136,33 @@ let isBranch = 1, isTerminator = 1 in { // System call. let PPC970_Unit = 7 in { def SC : SCForm<17, 1, (outs), (ins i32imm:$lev), - "sc $lev", BrB, [(PPCsc (i32 imm:$lev))]>; + "sc $lev", IIC_BrB, [(PPCsc (i32 imm:$lev))]>; } // DCB* instructions. -def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), - "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>, +def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), "dcba $dst", + IIC_LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBF : DCB_Form<86, 0, (outs), (ins memrr:$dst), - "dcbf $dst", LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>, +def DCBF : DCB_Form<86, 0, (outs), (ins memrr:$dst), "dcbf $dst", + IIC_LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst), - "dcbi $dst", LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>, +def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst), "dcbi $dst", + IIC_LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst), - "dcbst $dst", LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>, +def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst), "dcbst $dst", + IIC_LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBT : DCB_Form<278, 0, (outs), (ins memrr:$dst), - "dcbt $dst", LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>, +def DCBT : DCB_Form<278, 0, (outs), (ins memrr:$dst), "dcbt $dst", + IIC_LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst), - "dcbtst $dst", LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>, +def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst), "dcbtst $dst", + IIC_LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst), - "dcbz $dst", LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>, +def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst), "dcbz $dst", + IIC_LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>, PPC970_DGroup_Single; -def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), - "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>, +def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst", + IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>, PPC970_DGroup_Single; def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)), @@ -1241,26 +1250,26 @@ let usesCustomInserter = 1 in { // Instructions to support atomic operations def LWARX : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src), - "lwarx $rD, $src", LdStLWARX, + "lwarx $rD, $src", IIC_LdStLWARX, [(set i32:$rD, (PPClarx xoaddr:$src))]>; let Defs = [CR0] in def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst), - "stwcx. $rS, $dst", LdStSTWCX, + "stwcx. $rS, $dst", IIC_LdStSTWCX, [(PPCstcx i32:$rS, xoaddr:$dst)]>, isDOT; let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in -def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>; +def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>; def TWI : DForm_base<3, (outs), (ins u5imm:$to, gprc:$rA, s16imm:$imm), - "twi $to, $rA, $imm", IntTrapW, []>; + "twi $to, $rA, $imm", IIC_IntTrapW, []>; def TW : XForm_1<31, 4, (outs), (ins u5imm:$to, gprc:$rA, gprc:$rB), - "tw $to, $rA, $rB", IntTrapW, []>; + "tw $to, $rA, $rB", IIC_IntTrapW, []>; def TDI : DForm_base<2, (outs), (ins u5imm:$to, g8rc:$rA, s16imm:$imm), - "tdi $to, $rA, $imm", IntTrapD, []>; + "tdi $to, $rA, $imm", IIC_IntTrapD, []>; def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB), - "td $to, $rA, $rB", IntTrapD, []>; + "td $to, $rA, $rB", IIC_IntTrapD, []>; //===----------------------------------------------------------------------===// // PPC32 Load Instructions. @@ -1269,56 +1278,56 @@ def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB), // Unindexed (r+i) Loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src), - "lbz $rD, $src", LdStLoad, + "lbz $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi8 iaddr:$src))]>; def LHA : DForm_1<42, (outs gprc:$rD), (ins memri:$src), - "lha $rD, $src", LdStLHA, + "lha $rD, $src", IIC_LdStLHA, [(set i32:$rD, (sextloadi16 iaddr:$src))]>, PPC970_DGroup_Cracked; def LHZ : DForm_1<40, (outs gprc:$rD), (ins memri:$src), - "lhz $rD, $src", LdStLoad, + "lhz $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi16 iaddr:$src))]>; def LWZ : DForm_1<32, (outs gprc:$rD), (ins memri:$src), - "lwz $rD, $src", LdStLoad, + "lwz $rD, $src", IIC_LdStLoad, [(set i32:$rD, (load iaddr:$src))]>; def LFS : DForm_1<48, (outs f4rc:$rD), (ins memri:$src), - "lfs $rD, $src", LdStLFD, + "lfs $rD, $src", IIC_LdStLFD, [(set f32:$rD, (load iaddr:$src))]>; def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src), - "lfd $rD, $src", LdStLFD, + "lfd $rD, $src", IIC_LdStLFD, [(set f64:$rD, (load iaddr:$src))]>; // Unindexed (r+i) Loads with Update (preinc). let mayLoad = 1, neverHasSideEffects = 1 in { def LBZU : DForm_1<35, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStLoadUpd, + "lbzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHAU : DForm_1<43, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lhau $rD, $addr", LdStLHAU, + "lhau $rD, $addr", IIC_LdStLHAU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU : DForm_1<41, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStLoadUpd, + "lhzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU : DForm_1<33, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStLoadUpd, + "lwzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LFSU : DForm_1<49, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lfsu $rD, $addr", LdStLFDU, + "lfsu $rD, $addr", IIC_LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LFDU : DForm_1<51, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), - "lfdu $rD, $addr", LdStLFDU, + "lfdu $rD, $addr", IIC_LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; @@ -1326,37 +1335,37 @@ def LFDU : DForm_1<51, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr // Indexed (r+r) Loads with Update (preinc). def LBZUX : XForm_1<31, 119, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lbzux $rD, $addr", LdStLoadUpd, + "lbzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LHAUX : XForm_1<31, 375, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lhaux $rD, $addr", LdStLHAU, + "lhaux $rD, $addr", IIC_LdStLHAUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LHZUX : XForm_1<31, 311, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lhzux $rD, $addr", LdStLoadUpd, + "lhzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LWZUX : XForm_1<31, 55, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lwzux $rD, $addr", LdStLoadUpd, + "lwzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LFSUX : XForm_1<31, 567, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lfsux $rD, $addr", LdStLFDU, + "lfsux $rD, $addr", IIC_LdStLFDUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), - "lfdux $rD, $addr", LdStLFDU, + "lfdux $rD, $addr", IIC_LdStLFDUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; } @@ -1366,45 +1375,45 @@ def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), // let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZX : XForm_1<31, 87, (outs gprc:$rD), (ins memrr:$src), - "lbzx $rD, $src", LdStLoad, + "lbzx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi8 xaddr:$src))]>; def LHAX : XForm_1<31, 343, (outs gprc:$rD), (ins memrr:$src), - "lhax $rD, $src", LdStLHA, + "lhax $rD, $src", IIC_LdStLHA, [(set i32:$rD, (sextloadi16 xaddr:$src))]>, PPC970_DGroup_Cracked; def LHZX : XForm_1<31, 279, (outs gprc:$rD), (ins memrr:$src), - "lhzx $rD, $src", LdStLoad, + "lhzx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi16 xaddr:$src))]>; def LWZX : XForm_1<31, 23, (outs gprc:$rD), (ins memrr:$src), - "lwzx $rD, $src", LdStLoad, + "lwzx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (load xaddr:$src))]>; def LHBRX : XForm_1<31, 790, (outs gprc:$rD), (ins memrr:$src), - "lhbrx $rD, $src", LdStLoad, + "lhbrx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>; def LWBRX : XForm_1<31, 534, (outs gprc:$rD), (ins memrr:$src), - "lwbrx $rD, $src", LdStLoad, + "lwbrx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>; def LFSX : XForm_25<31, 535, (outs f4rc:$frD), (ins memrr:$src), - "lfsx $frD, $src", LdStLFD, + "lfsx $frD, $src", IIC_LdStLFD, [(set f32:$frD, (load xaddr:$src))]>; def LFDX : XForm_25<31, 599, (outs f8rc:$frD), (ins memrr:$src), - "lfdx $frD, $src", LdStLFD, + "lfdx $frD, $src", IIC_LdStLFD, [(set f64:$frD, (load xaddr:$src))]>; def LFIWAX : XForm_25<31, 855, (outs f8rc:$frD), (ins memrr:$src), - "lfiwax $frD, $src", LdStLFD, + "lfiwax $frD, $src", IIC_LdStLFD, [(set f64:$frD, (PPClfiwax xoaddr:$src))]>; def LFIWZX : XForm_25<31, 887, (outs f8rc:$frD), (ins memrr:$src), - "lfiwzx $frD, $src", LdStLFD, + "lfiwzx $frD, $src", IIC_LdStLFD, [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>; } // Load Multiple def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src), - "lmw $rD, $src", LdStLMW, []>; + "lmw $rD, $src", IIC_LdStLMW, []>; //===----------------------------------------------------------------------===// // PPC32 Store Instructions. @@ -1413,38 +1422,38 @@ def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src), // Unindexed (r+i) Stores. let PPC970_Unit = 2 in { def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$src), - "stb $rS, $src", LdStStore, + "stb $rS, $src", IIC_LdStStore, [(truncstorei8 i32:$rS, iaddr:$src)]>; def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$src), - "sth $rS, $src", LdStStore, + "sth $rS, $src", IIC_LdStStore, [(truncstorei16 i32:$rS, iaddr:$src)]>; def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$src), - "stw $rS, $src", LdStStore, + "stw $rS, $src", IIC_LdStStore, [(store i32:$rS, iaddr:$src)]>; def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst), - "stfs $rS, $dst", LdStSTFD, + "stfs $rS, $dst", IIC_LdStSTFD, [(store f32:$rS, iaddr:$dst)]>; def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst), - "stfd $rS, $dst", LdStSTFD, + "stfd $rS, $dst", IIC_LdStSTFD, [(store f64:$rS, iaddr:$dst)]>; } // Unindexed (r+i) Stores with Update (preinc). let PPC970_Unit = 2, mayStore = 1 in { def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), - "stbu $rS, $dst", LdStStoreUpd, []>, + "stbu $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), - "sthu $rS, $dst", LdStStoreUpd, []>, + "sthu $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), - "stwu $rS, $dst", LdStStoreUpd, []>, + "stwu $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst), - "stfsu $rS, $dst", LdStSTFDU, []>, + "stfsu $rS, $dst", IIC_LdStSTFDU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STFDU : DForm_1<55, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memri:$dst), - "stfdu $rS, $dst", LdStSTFDU, []>, + "stfdu $rS, $dst", IIC_LdStSTFDU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; } @@ -1465,59 +1474,59 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), // Indexed (r+r) Stores. let PPC970_Unit = 2 in { def STBX : XForm_8<31, 215, (outs), (ins gprc:$rS, memrr:$dst), - "stbx $rS, $dst", LdStStore, + "stbx $rS, $dst", IIC_LdStStore, [(truncstorei8 i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX : XForm_8<31, 407, (outs), (ins gprc:$rS, memrr:$dst), - "sthx $rS, $dst", LdStStore, + "sthx $rS, $dst", IIC_LdStStore, [(truncstorei16 i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX : XForm_8<31, 151, (outs), (ins gprc:$rS, memrr:$dst), - "stwx $rS, $dst", LdStStore, + "stwx $rS, $dst", IIC_LdStStore, [(store i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHBRX: XForm_8<31, 918, (outs), (ins gprc:$rS, memrr:$dst), - "sthbrx $rS, $dst", LdStStore, + "sthbrx $rS, $dst", IIC_LdStStore, [(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>, PPC970_DGroup_Cracked; def STWBRX: XForm_8<31, 662, (outs), (ins gprc:$rS, memrr:$dst), - "stwbrx $rS, $dst", LdStStore, + "stwbrx $rS, $dst", IIC_LdStStore, [(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>, PPC970_DGroup_Cracked; def STFIWX: XForm_28<31, 983, (outs), (ins f8rc:$frS, memrr:$dst), - "stfiwx $frS, $dst", LdStSTFD, + "stfiwx $frS, $dst", IIC_LdStSTFD, [(PPCstfiwx f64:$frS, xoaddr:$dst)]>; def STFSX : XForm_28<31, 663, (outs), (ins f4rc:$frS, memrr:$dst), - "stfsx $frS, $dst", LdStSTFD, + "stfsx $frS, $dst", IIC_LdStSTFD, [(store f32:$frS, xaddr:$dst)]>; def STFDX : XForm_28<31, 727, (outs), (ins f8rc:$frS, memrr:$dst), - "stfdx $frS, $dst", LdStSTFD, + "stfdx $frS, $dst", IIC_LdStSTFD, [(store f64:$frS, xaddr:$dst)]>; } // Indexed (r+r) Stores with Update (preinc). let PPC970_Unit = 2, mayStore = 1 in { def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst), - "stbux $rS, $dst", LdStStoreUpd, []>, + "stbux $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst), - "sthux $rS, $dst", LdStStoreUpd, []>, + "sthux $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst), - "stwux $rS, $dst", LdStStoreUpd, []>, + "stwux $rS, $dst", IIC_LdStStoreUpd, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memrr:$dst), - "stfsux $rS, $dst", LdStSTFDU, []>, + "stfsux $rS, $dst", IIC_LdStSTFDU, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memrr:$dst), - "stfdux $rS, $dst", LdStSTFDU, []>, + "stfdux $rS, $dst", IIC_LdStSTFDU, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; } @@ -1538,10 +1547,10 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff), // Store Multiple def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst), - "stmw $rS, $dst", LdStLMW, []>; + "stmw $rS, $dst", IIC_LdStLMW, []>; def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L), - "sync $L", LdStSync, []>; + "sync $L", IIC_LdStSync, []>; def : Pat<(int_ppc_sync), (SYNC 0)>; //===----------------------------------------------------------------------===// @@ -1550,41 +1559,41 @@ def : Pat<(int_ppc_sync), (SYNC 0)>; let PPC970_Unit = 1 in { // FXU Operations. def ADDI : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$imm), - "addi $rD, $rA, $imm", IntSimple, + "addi $rD, $rA, $imm", IIC_IntSimple, [(set i32:$rD, (add i32:$rA, imm32SExt16:$imm))]>; let BaseName = "addic" in { let Defs = [CARRY] in def ADDIC : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), - "addic $rD, $rA, $imm", IntGeneral, + "addic $rD, $rA, $imm", IIC_IntGeneral, [(set i32:$rD, (addc i32:$rA, imm32SExt16:$imm))]>, RecFormRel, PPC970_DGroup_Cracked; let Defs = [CARRY, CR0] in def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), - "addic. $rD, $rA, $imm", IntGeneral, + "addic. $rD, $rA, $imm", IIC_IntGeneral, []>, isDOT, RecFormRel; } def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, s17imm:$imm), - "addis $rD, $rA, $imm", IntSimple, + "addis $rD, $rA, $imm", IIC_IntSimple, [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>; let isCodeGenOnly = 1 in def LA : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$sym), - "la $rD, $sym($rA)", IntGeneral, + "la $rD, $sym($rA)", IIC_IntGeneral, [(set i32:$rD, (add i32:$rA, (PPClo tglobaladdr:$sym, 0)))]>; def MULLI : DForm_2< 7, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), - "mulli $rD, $rA, $imm", IntMulLI, + "mulli $rD, $rA, $imm", IIC_IntMulLI, [(set i32:$rD, (mul i32:$rA, imm32SExt16:$imm))]>; let Defs = [CARRY] in def SUBFIC : DForm_2< 8, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), - "subfic $rD, $rA, $imm", IntGeneral, + "subfic $rD, $rA, $imm", IIC_IntGeneral, [(set i32:$rD, (subc imm32SExt16:$imm, i32:$rA))]>; let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI : DForm_2_r0<14, (outs gprc:$rD), (ins s16imm:$imm), - "li $rD, $imm", IntSimple, + "li $rD, $imm", IIC_IntSimple, [(set i32:$rD, imm32SExt16:$imm)]>; def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins s17imm:$imm), - "lis $rD, $imm", IntSimple, + "lis $rD, $imm", IIC_IntSimple, [(set i32:$rD, imm16ShiftedSExt:$imm)]>; } } @@ -1592,154 +1601,164 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { let PPC970_Unit = 1 in { // FXU Operations. let Defs = [CR0] in { def ANDIo : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), - "andi. $dst, $src1, $src2", IntGeneral, + "andi. $dst, $src1, $src2", IIC_IntGeneral, [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>, isDOT; def ANDISo : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), - "andis. $dst, $src1, $src2", IntGeneral, + "andis. $dst, $src1, $src2", IIC_IntGeneral, [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>, isDOT; } def ORI : DForm_4<24, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), - "ori $dst, $src1, $src2", IntSimple, + "ori $dst, $src1, $src2", IIC_IntSimple, [(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>; def ORIS : DForm_4<25, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), - "oris $dst, $src1, $src2", IntSimple, + "oris $dst, $src1, $src2", IIC_IntSimple, [(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>; def XORI : DForm_4<26, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), - "xori $dst, $src1, $src2", IntSimple, + "xori $dst, $src1, $src2", IIC_IntSimple, [(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>; def XORIS : DForm_4<27, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), - "xoris $dst, $src1, $src2", IntSimple, + "xoris $dst, $src1, $src2", IIC_IntSimple, [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>; -def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntSimple, + +def NOP : DForm_4_zero<24, (outs), (ins), "nop", IIC_IntSimple, []>; +let isCodeGenOnly = 1 in { +// The POWER6 and POWER7 have special group-terminating nops. +def NOP_GT_PWR6 : DForm_4_fixedreg_zero<24, 1, (outs), (ins), + "ori 1, 1, 0", IIC_IntSimple, []>; +def NOP_GT_PWR7 : DForm_4_fixedreg_zero<24, 2, (outs), (ins), + "ori 2, 2, 0", IIC_IntSimple, []>; +} + let isCompare = 1, neverHasSideEffects = 1 in { def CMPWI : DForm_5_ext<11, (outs crrc:$crD), (ins gprc:$rA, s16imm:$imm), - "cmpwi $crD, $rA, $imm", IntCompare>; + "cmpwi $crD, $rA, $imm", IIC_IntCompare>; def CMPLWI : DForm_6_ext<10, (outs crrc:$dst), (ins gprc:$src1, u16imm:$src2), - "cmplwi $dst, $src1, $src2", IntCompare>; + "cmplwi $dst, $src1, $src2", IIC_IntCompare>; } } let PPC970_Unit = 1, neverHasSideEffects = 1 in { // FXU Operations. defm NAND : XForm_6r<31, 476, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "nand", "$rA, $rS, $rB", IntSimple, + "nand", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>; defm AND : XForm_6r<31, 28, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "and", "$rA, $rS, $rB", IntSimple, + "and", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (and i32:$rS, i32:$rB))]>; defm ANDC : XForm_6r<31, 60, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "andc", "$rA, $rS, $rB", IntSimple, + "andc", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>; defm OR : XForm_6r<31, 444, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "or", "$rA, $rS, $rB", IntSimple, + "or", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (or i32:$rS, i32:$rB))]>; defm NOR : XForm_6r<31, 124, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "nor", "$rA, $rS, $rB", IntSimple, + "nor", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>; defm ORC : XForm_6r<31, 412, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "orc", "$rA, $rS, $rB", IntSimple, + "orc", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>; defm EQV : XForm_6r<31, 284, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "eqv", "$rA, $rS, $rB", IntSimple, + "eqv", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>; defm XOR : XForm_6r<31, 316, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "xor", "$rA, $rS, $rB", IntSimple, + "xor", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (xor i32:$rS, i32:$rB))]>; defm SLW : XForm_6r<31, 24, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "slw", "$rA, $rS, $rB", IntGeneral, + "slw", "$rA, $rS, $rB", IIC_IntGeneral, [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>; defm SRW : XForm_6r<31, 536, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "srw", "$rA, $rS, $rB", IntGeneral, + "srw", "$rA, $rS, $rB", IIC_IntGeneral, [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>; defm SRAW : XForm_6rc<31, 792, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), - "sraw", "$rA, $rS, $rB", IntShift, + "sraw", "$rA, $rS, $rB", IIC_IntShift, [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>; } let PPC970_Unit = 1 in { // FXU Operations. let neverHasSideEffects = 1 in { defm SRAWI : XForm_10rc<31, 824, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH), - "srawi", "$rA, $rS, $SH", IntShift, + "srawi", "$rA, $rS, $SH", IIC_IntShift, [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>; defm CNTLZW : XForm_11r<31, 26, (outs gprc:$rA), (ins gprc:$rS), - "cntlzw", "$rA, $rS", IntGeneral, + "cntlzw", "$rA, $rS", IIC_IntGeneral, [(set i32:$rA, (ctlz i32:$rS))]>; defm EXTSB : XForm_11r<31, 954, (outs gprc:$rA), (ins gprc:$rS), - "extsb", "$rA, $rS", IntSimple, + "extsb", "$rA, $rS", IIC_IntSimple, [(set i32:$rA, (sext_inreg i32:$rS, i8))]>; defm EXTSH : XForm_11r<31, 922, (outs gprc:$rA), (ins gprc:$rS), - "extsh", "$rA, $rS", IntSimple, + "extsh", "$rA, $rS", IIC_IntSimple, [(set i32:$rA, (sext_inreg i32:$rS, i16))]>; } let isCompare = 1, neverHasSideEffects = 1 in { def CMPW : XForm_16_ext<31, 0, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB), - "cmpw $crD, $rA, $rB", IntCompare>; + "cmpw $crD, $rA, $rB", IIC_IntCompare>; def CMPLW : XForm_16_ext<31, 32, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB), - "cmplw $crD, $rA, $rB", IntCompare>; + "cmplw $crD, $rA, $rB", IIC_IntCompare>; } } let PPC970_Unit = 3 in { // FPU Operations. //def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB), -// "fcmpo $crD, $fA, $fB", FPCompare>; +// "fcmpo $crD, $fA, $fB", IIC_FPCompare>; let isCompare = 1, neverHasSideEffects = 1 in { def FCMPUS : XForm_17<63, 0, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB), - "fcmpu $crD, $fA, $fB", FPCompare>; + "fcmpu $crD, $fA, $fB", IIC_FPCompare>; + let Interpretation64Bit = 1, isCodeGenOnly = 1 in def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB), - "fcmpu $crD, $fA, $fB", FPCompare>; + "fcmpu $crD, $fA, $fB", IIC_FPCompare>; } let Uses = [RM] in { let neverHasSideEffects = 1 in { defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB), - "fctiw", "$frD, $frB", FPGeneral, + "fctiw", "$frD, $frB", IIC_FPGeneral, []>; defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB), - "fctiwz", "$frD, $frB", FPGeneral, + "fctiwz", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfctiwz f64:$frB))]>; defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB), - "frsp", "$frD, $frB", FPGeneral, + "frsp", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fround f64:$frB))]>; - let Interpretation64Bit = 1 in + let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB), - "frin", "$frD, $frB", FPGeneral, + "frin", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (frnd f64:$frB))]>; defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB), - "frin", "$frD, $frB", FPGeneral, + "frin", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (frnd f32:$frB))]>; } let neverHasSideEffects = 1 in { - let Interpretation64Bit = 1 in + let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIPD : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB), - "frip", "$frD, $frB", FPGeneral, + "frip", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (fceil f64:$frB))]>; defm FRIPS : XForm_26r<63, 456, (outs f4rc:$frD), (ins f4rc:$frB), - "frip", "$frD, $frB", FPGeneral, + "frip", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fceil f32:$frB))]>; - let Interpretation64Bit = 1 in + let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIZD : XForm_26r<63, 424, (outs f8rc:$frD), (ins f8rc:$frB), - "friz", "$frD, $frB", FPGeneral, + "friz", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (ftrunc f64:$frB))]>; defm FRIZS : XForm_26r<63, 424, (outs f4rc:$frD), (ins f4rc:$frB), - "friz", "$frD, $frB", FPGeneral, + "friz", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (ftrunc f32:$frB))]>; - let Interpretation64Bit = 1 in + let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIMD : XForm_26r<63, 488, (outs f8rc:$frD), (ins f8rc:$frB), - "frim", "$frD, $frB", FPGeneral, + "frim", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (ffloor f64:$frB))]>; defm FRIMS : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB), - "frim", "$frD, $frB", FPGeneral, + "frim", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (ffloor f32:$frB))]>; defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB), - "fsqrt", "$frD, $frB", FPSqrt, + "fsqrt", "$frD, $frB", IIC_FPSqrtD, [(set f64:$frD, (fsqrt f64:$frB))]>; defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB), - "fsqrts", "$frD, $frB", FPSqrt, + "fsqrts", "$frD, $frB", IIC_FPSqrtS, [(set f32:$frD, (fsqrt f32:$frB))]>; } } @@ -1751,54 +1770,54 @@ let Uses = [RM] in { /// sneak into a d-group with a store). let neverHasSideEffects = 1 in defm FMR : XForm_26r<63, 72, (outs f4rc:$frD), (ins f4rc:$frB), - "fmr", "$frD, $frB", FPGeneral, + "fmr", "$frD, $frB", IIC_FPGeneral, []>, // (set f32:$frD, f32:$frB) PPC970_Unit_Pseudo; let PPC970_Unit = 3, neverHasSideEffects = 1 in { // FPU Operations. // These are artificially split into two different forms, for 4/8 byte FP. defm FABSS : XForm_26r<63, 264, (outs f4rc:$frD), (ins f4rc:$frB), - "fabs", "$frD, $frB", FPGeneral, + "fabs", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fabs f32:$frB))]>; -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FABSD : XForm_26r<63, 264, (outs f8rc:$frD), (ins f8rc:$frB), - "fabs", "$frD, $frB", FPGeneral, + "fabs", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (fabs f64:$frB))]>; defm FNABSS : XForm_26r<63, 136, (outs f4rc:$frD), (ins f4rc:$frB), - "fnabs", "$frD, $frB", FPGeneral, + "fnabs", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fneg (fabs f32:$frB)))]>; -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FNABSD : XForm_26r<63, 136, (outs f8rc:$frD), (ins f8rc:$frB), - "fnabs", "$frD, $frB", FPGeneral, + "fnabs", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (fneg (fabs f64:$frB)))]>; defm FNEGS : XForm_26r<63, 40, (outs f4rc:$frD), (ins f4rc:$frB), - "fneg", "$frD, $frB", FPGeneral, + "fneg", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fneg f32:$frB))]>; -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FNEGD : XForm_26r<63, 40, (outs f8rc:$frD), (ins f8rc:$frB), - "fneg", "$frD, $frB", FPGeneral, + "fneg", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (fneg f64:$frB))]>; defm FCPSGNS : XForm_28r<63, 8, (outs f4rc:$frD), (ins f4rc:$frA, f4rc:$frB), - "fcpsgn", "$frD, $frA, $frB", FPGeneral, + "fcpsgn", "$frD, $frA, $frB", IIC_FPGeneral, [(set f32:$frD, (fcopysign f32:$frB, f32:$frA))]>; -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$frD), (ins f8rc:$frA, f8rc:$frB), - "fcpsgn", "$frD, $frA, $frB", FPGeneral, + "fcpsgn", "$frD, $frA, $frB", IIC_FPGeneral, [(set f64:$frD, (fcopysign f64:$frB, f64:$frA))]>; // Reciprocal estimates. defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB), - "fre", "$frD, $frB", FPGeneral, + "fre", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfre f64:$frB))]>; defm FRES : XForm_26r<59, 24, (outs f4rc:$frD), (ins f4rc:$frB), - "fres", "$frD, $frB", FPGeneral, + "fres", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (PPCfre f32:$frB))]>; defm FRSQRTE : XForm_26r<63, 26, (outs f8rc:$frD), (ins f8rc:$frB), - "frsqrte", "$frD, $frB", FPGeneral, + "frsqrte", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfrsqrte f64:$frB))]>; defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB), - "frsqrtes", "$frD, $frB", FPGeneral, + "frsqrtes", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (PPCfrsqrte f32:$frB))]>; } @@ -1806,57 +1825,57 @@ defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB), // let neverHasSideEffects = 1 in def MCRF : XLForm_3<19, 0, (outs crrc:$BF), (ins crrc:$BFA), - "mcrf $BF, $BFA", BrMCR>, + "mcrf $BF, $BFA", IIC_BrMCR>, PPC970_DGroup_First, PPC970_Unit_CRU; def CRAND : XLForm_1<19, 257, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crand $CRD, $CRA, $CRB", BrCR, []>; + "crand $CRD, $CRA, $CRB", IIC_BrCR, []>; def CRNAND : XLForm_1<19, 225, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crnand $CRD, $CRA, $CRB", BrCR, []>; + "crnand $CRD, $CRA, $CRB", IIC_BrCR, []>; def CROR : XLForm_1<19, 449, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "cror $CRD, $CRA, $CRB", BrCR, []>; + "cror $CRD, $CRA, $CRB", IIC_BrCR, []>; def CRXOR : XLForm_1<19, 193, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crxor $CRD, $CRA, $CRB", BrCR, []>; + "crxor $CRD, $CRA, $CRB", IIC_BrCR, []>; def CRNOR : XLForm_1<19, 33, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crnor $CRD, $CRA, $CRB", BrCR, []>; + "crnor $CRD, $CRA, $CRB", IIC_BrCR, []>; def CREQV : XLForm_1<19, 289, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "creqv $CRD, $CRA, $CRB", BrCR, []>; + "creqv $CRD, $CRA, $CRB", IIC_BrCR, []>; def CRANDC : XLForm_1<19, 129, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crandc $CRD, $CRA, $CRB", BrCR, []>; + "crandc $CRD, $CRA, $CRB", IIC_BrCR, []>; def CRORC : XLForm_1<19, 417, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), - "crorc $CRD, $CRA, $CRB", BrCR, []>; + "crorc $CRD, $CRA, $CRB", IIC_BrCR, []>; let isCodeGenOnly = 1 in { def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins), - "creqv $dst, $dst, $dst", BrCR, + "creqv $dst, $dst, $dst", IIC_BrCR, []>; def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins), - "crxor $dst, $dst, $dst", BrCR, + "crxor $dst, $dst, $dst", IIC_BrCR, []>; let Defs = [CR1EQ], CRD = 6 in { def CR6SET : XLForm_1_ext<19, 289, (outs), (ins), - "creqv 6, 6, 6", BrCR, + "creqv 6, 6, 6", IIC_BrCR, [(PPCcr6set)]>; def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins), - "crxor 6, 6, 6", BrCR, + "crxor 6, 6, 6", IIC_BrCR, [(PPCcr6unset)]>; } } @@ -1865,38 +1884,38 @@ def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins), // def MFSPR : XFXForm_1<31, 339, (outs gprc:$RT), (ins i32imm:$SPR), - "mfspr $RT, $SPR", SprMFSPR>; + "mfspr $RT, $SPR", IIC_SprMFSPR>; def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT), - "mtspr $SPR, $RT", SprMTSPR>; + "mtspr $SPR, $RT", IIC_SprMTSPR>; def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR), - "mftb $RT, $SPR", SprMFTB>, Deprecated; + "mftb $RT, $SPR", IIC_SprMFTB>, Deprecated; let Uses = [CTR] in { def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins), - "mfctr $rT", SprMFSPR>, + "mfctr $rT", IIC_SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in { def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS), - "mtctr $rS", SprMTSPR>, + "mtctr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR] in { let Pattern = [(int_ppc_mtctr i32:$rS)] in def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS), - "mtctr $rS", SprMTSPR>, + "mtctr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Defs = [LR] in { def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS), - "mtlr $rS", SprMTSPR>, + "mtlr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Uses = [LR] in { def MFLR : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins), - "mflr $rT", SprMFSPR>, + "mflr $rT", IIC_SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } @@ -1905,19 +1924,19 @@ let isCodeGenOnly = 1 in { // like a GPR on the PPC970. As such, copies in and out have the same // performance characteristics as an OR instruction. def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins gprc:$rS), - "mtspr 256, $rS", IntGeneral>, + "mtspr 256, $rS", IIC_IntGeneral>, PPC970_DGroup_Single, PPC970_Unit_FXU; def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins), - "mfspr $rT, 256", IntGeneral>, + "mfspr $rT, 256", IIC_IntGeneral>, PPC970_DGroup_First, PPC970_Unit_FXU; def MTVRSAVEv : XFXForm_7_ext<31, 467, 256, (outs VRSAVERC:$reg), (ins gprc:$rS), - "mtspr 256, $rS", IntGeneral>, + "mtspr 256, $rS", IIC_IntGeneral>, PPC970_DGroup_Single, PPC970_Unit_FXU; def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins VRSAVERC:$reg), - "mfspr $rT, 256", IntGeneral>, + "mfspr $rT, 256", IIC_IntGeneral>, PPC970_DGroup_First, PPC970_Unit_FXU; } @@ -1935,20 +1954,20 @@ def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F), let neverHasSideEffects = 1 in { def MTOCRF: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins gprc:$ST), - "mtocrf $FXM, $ST", BrMCRX>, + "mtocrf $FXM, $ST", IIC_BrMCRX>, PPC970_DGroup_First, PPC970_Unit_CRU; def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS), - "mtcrf $FXM, $rS", BrMCRX>, + "mtcrf $FXM, $rS", IIC_BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking. def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM), - "mfocrf $rT, $FXM", SprMFCR>, + "mfocrf $rT, $FXM", IIC_SprMFCRF>, PPC970_DGroup_First, PPC970_Unit_CRU; def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins), - "mfcr $rT", SprMFCR>, + "mfcr $rT", IIC_SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; } // neverHasSideEffects = 1 @@ -1962,18 +1981,18 @@ let usesCustomInserter = 1, Uses = [RM] in { // to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level. let Uses = [RM], Defs = [RM] in { def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM), - "mtfsb0 $FM", IntMTFSB0, []>, + "mtfsb0 $FM", IIC_IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM), - "mtfsb1 $FM", IntMTFSB0, []>, + "mtfsb1 $FM", IIC_IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; def MTFSF : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT), - "mtfsf $FM, $rT", IntMTFSB0, []>, + "mtfsf $FM, $rT", IIC_IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; } let Uses = [RM] in { def MFFS : XForm_42<63, 583, (outs f8rc:$rT), (ins), - "mffs $rT", IntMFFS, + "mffs $rT", IIC_IntMFFS, [(set f64:$rT, (PPCmffs))]>, PPC970_DGroup_Single, PPC970_Unit_FPU; } @@ -1983,57 +2002,61 @@ let PPC970_Unit = 1, neverHasSideEffects = 1 in { // FXU Operations. // XO-Form instructions. Arithmetic instructions that can set overflow bit // defm ADD4 : XOForm_1r<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "add", "$rT, $rA, $rB", IntSimple, + "add", "$rT, $rA, $rB", IIC_IntSimple, [(set i32:$rT, (add i32:$rA, i32:$rB))]>; +let isCodeGenOnly = 1 in +def ADD4TLS : XOForm_1<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, tlsreg32:$rB), + "add $rT, $rA, $rB", IIC_IntSimple, + [(set i32:$rT, (add i32:$rA, tglobaltlsaddr:$rB))]>; defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "addc", "$rT, $rA, $rB", IntGeneral, + "addc", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (addc i32:$rA, i32:$rB))]>, PPC970_DGroup_Cracked; defm DIVW : XOForm_1r<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "divw", "$rT, $rA, $rB", IntDivW, + "divw", "$rT, $rA, $rB", IIC_IntDivW, [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>, PPC970_DGroup_First, PPC970_DGroup_Cracked; defm DIVWU : XOForm_1r<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "divwu", "$rT, $rA, $rB", IntDivW, + "divwu", "$rT, $rA, $rB", IIC_IntDivW, [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>, PPC970_DGroup_First, PPC970_DGroup_Cracked; defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "mulhw", "$rT, $rA, $rB", IntMulHW, + "mulhw", "$rT, $rA, $rB", IIC_IntMulHW, [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>; defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "mulhwu", "$rT, $rA, $rB", IntMulHWU, + "mulhwu", "$rT, $rA, $rB", IIC_IntMulHWU, [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>; defm MULLW : XOForm_1r<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "mullw", "$rT, $rA, $rB", IntMulHW, + "mullw", "$rT, $rA, $rB", IIC_IntMulHW, [(set i32:$rT, (mul i32:$rA, i32:$rB))]>; defm SUBF : XOForm_1r<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "subf", "$rT, $rA, $rB", IntGeneral, + "subf", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (sub i32:$rB, i32:$rA))]>; defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "subfc", "$rT, $rA, $rB", IntGeneral, + "subfc", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (subc i32:$rB, i32:$rA))]>, PPC970_DGroup_Cracked; defm NEG : XOForm_3r<31, 104, 0, (outs gprc:$rT), (ins gprc:$rA), - "neg", "$rT, $rA", IntSimple, + "neg", "$rT, $rA", IIC_IntSimple, [(set i32:$rT, (ineg i32:$rA))]>; let Uses = [CARRY] in { defm ADDE : XOForm_1rc<31, 138, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "adde", "$rT, $rA, $rB", IntGeneral, + "adde", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (adde i32:$rA, i32:$rB))]>; defm ADDME : XOForm_3rc<31, 234, 0, (outs gprc:$rT), (ins gprc:$rA), - "addme", "$rT, $rA", IntGeneral, + "addme", "$rT, $rA", IIC_IntGeneral, [(set i32:$rT, (adde i32:$rA, -1))]>; defm ADDZE : XOForm_3rc<31, 202, 0, (outs gprc:$rT), (ins gprc:$rA), - "addze", "$rT, $rA", IntGeneral, + "addze", "$rT, $rA", IIC_IntGeneral, [(set i32:$rT, (adde i32:$rA, 0))]>; defm SUBFE : XOForm_1rc<31, 136, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), - "subfe", "$rT, $rA, $rB", IntGeneral, + "subfe", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (sube i32:$rB, i32:$rA))]>; defm SUBFME : XOForm_3rc<31, 232, 0, (outs gprc:$rT), (ins gprc:$rA), - "subfme", "$rT, $rA", IntGeneral, + "subfme", "$rT, $rA", IIC_IntGeneral, [(set i32:$rT, (sube -1, i32:$rA))]>; defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA), - "subfze", "$rT, $rA", IntGeneral, + "subfze", "$rT, $rA", IIC_IntGeneral, [(set i32:$rT, (sube 0, i32:$rA))]>; } } @@ -2045,40 +2068,40 @@ let PPC970_Unit = 3, neverHasSideEffects = 1 in { // FPU Operations. let Uses = [RM] in { defm FMADD : AForm_1r<63, 29, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), - "fmadd", "$FRT, $FRA, $FRC, $FRB", FPFused, + "fmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>; defm FMADDS : AForm_1r<59, 29, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), - "fmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral, + "fmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>; defm FMSUB : AForm_1r<63, 28, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), - "fmsub", "$FRT, $FRA, $FRC, $FRB", FPFused, + "fmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>; defm FMSUBS : AForm_1r<59, 28, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), - "fmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral, + "fmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>; defm FNMADD : AForm_1r<63, 31, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), - "fnmadd", "$FRT, $FRA, $FRC, $FRB", FPFused, + "fnmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>; defm FNMADDS : AForm_1r<59, 31, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), - "fnmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral, + "fnmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>; defm FNMSUB : AForm_1r<63, 30, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), - "fnmsub", "$FRT, $FRA, $FRC, $FRB", FPFused, + "fnmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB))))]>; defm FNMSUBS : AForm_1r<59, 30, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), - "fnmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral, + "fnmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB))))]>; } @@ -2086,47 +2109,47 @@ let Uses = [RM] in { // having 4 of these, force the comparison to always be an 8-byte double (code // should use an FMRSD if the input comparison value really wants to be a float) // and 4/8 byte forms for the result and operand type.. -let Interpretation64Bit = 1 in +let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FSELD : AForm_1r<63, 23, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), - "fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral, + "fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>; defm FSELS : AForm_1r<63, 23, (outs f4rc:$FRT), (ins f8rc:$FRA, f4rc:$FRC, f4rc:$FRB), - "fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral, + "fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>; let Uses = [RM] in { defm FADD : AForm_2r<63, 21, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), - "fadd", "$FRT, $FRA, $FRB", FPAddSub, + "fadd", "$FRT, $FRA, $FRB", IIC_FPAddSub, [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>; defm FADDS : AForm_2r<59, 21, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), - "fadds", "$FRT, $FRA, $FRB", FPGeneral, + "fadds", "$FRT, $FRA, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>; defm FDIV : AForm_2r<63, 18, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), - "fdiv", "$FRT, $FRA, $FRB", FPDivD, + "fdiv", "$FRT, $FRA, $FRB", IIC_FPDivD, [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>; defm FDIVS : AForm_2r<59, 18, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), - "fdivs", "$FRT, $FRA, $FRB", FPDivS, + "fdivs", "$FRT, $FRA, $FRB", IIC_FPDivS, [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>; defm FMUL : AForm_3r<63, 25, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC), - "fmul", "$FRT, $FRA, $FRC", FPFused, + "fmul", "$FRT, $FRA, $FRC", IIC_FPFused, [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>; defm FMULS : AForm_3r<59, 25, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC), - "fmuls", "$FRT, $FRA, $FRC", FPGeneral, + "fmuls", "$FRT, $FRA, $FRC", IIC_FPGeneral, [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>; defm FSUB : AForm_2r<63, 20, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), - "fsub", "$FRT, $FRA, $FRB", FPAddSub, + "fsub", "$FRT, $FRA, $FRB", IIC_FPAddSub, [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>; defm FSUBS : AForm_2r<59, 20, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), - "fsubs", "$FRT, $FRA, $FRB", FPGeneral, + "fsubs", "$FRT, $FRA, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>; } } @@ -2136,7 +2159,7 @@ let PPC970_Unit = 1 in { // FXU Operations. let isSelect = 1 in def ISEL : AForm_4<31, 15, (outs gprc:$rT), (ins gprc_nor0:$rA, gprc:$rB, crbitrc:$cond), - "isel $rT, $rA, $rB, $cond", IntGeneral, + "isel $rT, $rA, $rB, $cond", IIC_IntGeneral, []>; } @@ -2147,24 +2170,24 @@ let isCommutable = 1 in { // RLWIMI can be commuted if the rotate amount is zero. defm RLWIMI : MForm_2r<20, (outs gprc:$rA), (ins gprc:$rSi, gprc:$rS, u5imm:$SH, u5imm:$MB, - u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME", IntRotate, - []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">, - NoEncode<"$rSi">; + u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME", + IIC_IntRotate, []>, PPC970_DGroup_Cracked, + RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">; } let BaseName = "rlwinm" in { def RLWINM : MForm_2<21, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), - "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral, + "rlwinm $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral, []>, RecFormRel; let Defs = [CR0] in def RLWINMo : MForm_2<21, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), - "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral, + "rlwinm. $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral, []>, isDOT, RecFormRel, PPC970_DGroup_Cracked; } defm RLWNM : MForm_2r<23, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB, u5imm:$MB, u5imm:$ME), - "rlwnm", "$rA, $rS, $rB, $MB, $ME", IntGeneral, + "rlwnm", "$rA, $rS, $rB, $MB, $ME", IIC_IntGeneral, []>; } } // neverHasSideEffects = 1 @@ -2250,6 +2273,17 @@ def : Pat<(add i32:$in, (PPChi tjumptable:$g, 0)), def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)), (ADDIS $in, tblockaddress:$g)>; +// Support for thread-local storage. +def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT", + [(set i32:$rD, (PPCppc32GOT))]>; + +def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg), + "#LDgotTprelL32", + [(set i32:$rD, + (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>; +def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g), + (ADD4TLS $in, tglobaltlsaddr:$g)>; + // Standard shifts. These are represented separately from the real shifts above // so that we can distinguish between shifts that allow 5-bit and 6-bit shift // amounts. @@ -2311,45 +2345,45 @@ include "PPCInstr64Bit.td" // def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins), - "isync", SprISYNC, []>; + "isync", IIC_SprISYNC, []>; def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src), - "icbi $src", LdStICBI, []>; + "icbi $src", IIC_LdStICBI, []>; def EIEIO : XForm_24_eieio<31, 854, (outs), (ins), - "eieio", LdStLoad, []>; + "eieio", IIC_LdStLoad, []>; def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L), - "wait $L", LdStLoad, []>; + "wait $L", IIC_LdStLoad, []>; def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L), - "mtmsr $RS, $L", SprMTMSR>; + "mtmsr $RS, $L", IIC_SprMTMSR>; def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins), - "mfmsr $RT", SprMFMSR, []>; + "mfmsr $RT", IIC_SprMFMSR, []>; def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, i32imm:$L), - "mtmsrd $RS, $L", SprMTMSRD>; + "mtmsrd $RS, $L", IIC_SprMTMSRD>; def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB), - "slbie $RB", SprSLBIE, []>; + "slbie $RB", IIC_SprSLBIE, []>; def SLBMTE : XForm_26<31, 402, (outs), (ins gprc:$RS, gprc:$RB), - "slbmte $RS, $RB", SprSLBMTE, []>; + "slbmte $RS, $RB", IIC_SprSLBMTE, []>; def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB), - "slbmfee $RT, $RB", SprSLBMFEE, []>; + "slbmfee $RT, $RB", IIC_SprSLBMFEE, []>; -def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", SprSLBIA, []>; +def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>; def TLBSYNC : XForm_0<31, 566, (outs), (ins), - "tlbsync", SprTLBSYNC, []>; + "tlbsync", IIC_SprTLBSYNC, []>; def TLBIEL : XForm_16b<31, 274, (outs), (ins gprc:$RB), - "tlbiel $RB", SprTLBIEL, []>; + "tlbiel $RB", IIC_SprTLBIEL, []>; def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB), - "tlbie $RB,$RS", SprTLBIE, []>; + "tlbie $RB,$RS", IIC_SprTLBIE, []>; //===----------------------------------------------------------------------===// // PowerPC Assembler Instruction Aliases @@ -2565,19 +2599,19 @@ let PPC970_Unit = 7 in { let Defs = [CTR], Uses = [CTR, LR, RM] in def gBCLR : XLForm_2<19, 16, 0, (outs), (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), - "bclr $bo, $bi, $bh", BrB, []>; + "bclr $bo, $bi, $bh", IIC_BrB, []>; let Defs = [LR, CTR], Uses = [CTR, LR, RM] in def gBCLRL : XLForm_2<19, 16, 1, (outs), (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), - "bclrl $bo, $bi, $bh", BrB, []>; + "bclrl $bo, $bi, $bh", IIC_BrB, []>; let Defs = [CTR], Uses = [CTR, LR, RM] in def gBCCTR : XLForm_2<19, 528, 0, (outs), (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), - "bcctr $bo, $bi, $bh", BrB, []>; + "bcctr $bo, $bi, $bh", IIC_BrB, []>; let Defs = [LR, CTR], Uses = [CTR, LR, RM] in def gBCCTRL : XLForm_2<19, 528, 1, (outs), (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), - "bcctrl $bo, $bi, $bh", BrB, []>; + "bcctrl $bo, $bi, $bh", IIC_BrB, []>; } def : InstAlias<"bclr $bo, $bi", (gBCLR u5imm:$bo, crbitrc:$bi, 0)>; def : InstAlias<"bclrl $bo, $bi", (gBCLRL u5imm:$bo, crbitrc:$bi, 0)>; @@ -2671,18 +2705,18 @@ def : InstAlias<"cmpwi $rA, $imm", (CMPWI CR0, gprc:$rA, s16imm:$imm)>; def : InstAlias<"cmpw $rA, $rB", (CMPW CR0, gprc:$rA, gprc:$rB)>; def : InstAlias<"cmplwi $rA, $imm", (CMPLWI CR0, gprc:$rA, u16imm:$imm)>; def : InstAlias<"cmplw $rA, $rB", (CMPLW CR0, gprc:$rA, gprc:$rB)>; -def : InstAlias<"cmpdi $rA, $imm", (CMPDI CR0, g8rc:$rA, s16imm:$imm)>; +def : InstAlias<"cmpdi $rA, $imm", (CMPDI CR0, g8rc:$rA, s16imm64:$imm)>; def : InstAlias<"cmpd $rA, $rB", (CMPD CR0, g8rc:$rA, g8rc:$rB)>; -def : InstAlias<"cmpldi $rA, $imm", (CMPLDI CR0, g8rc:$rA, u16imm:$imm)>; +def : InstAlias<"cmpldi $rA, $imm", (CMPLDI CR0, g8rc:$rA, u16imm64:$imm)>; def : InstAlias<"cmpld $rA, $rB", (CMPLD CR0, g8rc:$rA, g8rc:$rB)>; def : InstAlias<"cmpi $bf, 0, $rA, $imm", (CMPWI crrc:$bf, gprc:$rA, s16imm:$imm)>; def : InstAlias<"cmp $bf, 0, $rA, $rB", (CMPW crrc:$bf, gprc:$rA, gprc:$rB)>; def : InstAlias<"cmpli $bf, 0, $rA, $imm", (CMPLWI crrc:$bf, gprc:$rA, u16imm:$imm)>; def : InstAlias<"cmpl $bf, 0, $rA, $rB", (CMPLW crrc:$bf, gprc:$rA, gprc:$rB)>; -def : InstAlias<"cmpi $bf, 1, $rA, $imm", (CMPDI crrc:$bf, g8rc:$rA, s16imm:$imm)>; +def : InstAlias<"cmpi $bf, 1, $rA, $imm", (CMPDI crrc:$bf, g8rc:$rA, s16imm64:$imm)>; def : InstAlias<"cmp $bf, 1, $rA, $rB", (CMPD crrc:$bf, g8rc:$rA, g8rc:$rB)>; -def : InstAlias<"cmpli $bf, 1, $rA, $imm", (CMPLDI crrc:$bf, g8rc:$rA, u16imm:$imm)>; +def : InstAlias<"cmpli $bf, 1, $rA, $imm", (CMPLDI crrc:$bf, g8rc:$rA, u16imm64:$imm)>; def : InstAlias<"cmpl $bf, 1, $rA, $rB", (CMPLD crrc:$bf, g8rc:$rA, g8rc:$rB)>; multiclass TrapExtendedMnemonic { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp index f61c8bf0216e..20e971856385 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -19,11 +19,13 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) { @@ -32,35 +34,38 @@ static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) { static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ + const DataLayout *DL = AP.TM.getDataLayout(); MCContext &Ctx = AP.OutContext; SmallString<128> Name; + StringRef Suffix; + if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) + Suffix = "$stub"; + else if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) + Suffix = "$non_lazy_ptr"; + + if (!Suffix.empty()) + Name += DL->getPrivateGlobalPrefix(); + + unsigned PrefixLen = Name.size(); + if (!MO.isGlobal()) { assert(MO.isSymbol() && "Isn't a symbol reference"); - Name += AP.MAI->getGlobalPrefix(); - Name += MO.getSymbolName(); - } else { + AP.Mang->getNameWithPrefix(Name, MO.getSymbolName()); + } else { const GlobalValue *GV = MO.getGlobal(); - bool isImplicitlyPrivate = false; - if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB || - (MO.getTargetFlags() & PPCII::MO_NLP_FLAG)) - isImplicitlyPrivate = true; - - AP.Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate); + AP.Mang->getNameWithPrefix(Name, GV); } - + + unsigned OrigLen = Name.size() - PrefixLen; + + Name += Suffix; + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); + StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen); + // If the target flags on the operand changes the name of the symbol, do that // before we return the symbol. if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) { - Name += "$stub"; - const char *PGP = AP.MAI->getPrivateGlobalPrefix(); - const char *Prefix = ""; - if (!Name.startswith(PGP)) { - // http://llvm.org/bugs/show_bug.cgi?id=15763 - // all stubs and lazy_ptrs should be local symbols, which need leading 'L' - Prefix = PGP; - } - MCSymbol *Sym = Ctx.GetOrCreateSymbol(Twine(Prefix) + Twine(Name)); MachineModuleInfoImpl::StubValueTy &StubSym = getMachOMMI(AP).getFnStubEntry(Sym); if (StubSym.getPointer()) @@ -72,10 +77,9 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ StubValueTy(AP.getSymbol(MO.getGlobal()), !MO.getGlobal()->hasInternalLinkage()); } else { - Name.erase(Name.end()-5, Name.end()); StubSym = MachineModuleInfoImpl:: - StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false); + StubValueTy(Ctx.GetOrCreateSymbol(OrigName), false); } return Sym; } @@ -83,9 +87,6 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ // If the symbol reference is actually to a non_lazy_ptr, not to the symbol, // then add the suffix. if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) { - Name += "$non_lazy_ptr"; - MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); - MachineModuleInfoMachO &MachO = getMachOMMI(AP); MachineModuleInfoImpl::StubValueTy &StubSym = @@ -101,7 +102,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ return Sym; } - return Ctx.GetOrCreateSymbol(Name.str()); + return Sym; } static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCSchedule.td b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCSchedule.td index 92ba69c2c6b8..1221d4149996 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCSchedule.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCSchedule.td @@ -7,115 +7,107 @@ // //===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// Functional units across PowerPC chips sets -// -def BPU : FuncUnit; // Branch unit -def SLU : FuncUnit; // Store/load unit -def SRU : FuncUnit; // special register unit -def IU1 : FuncUnit; // integer unit 1 (simple) -def IU2 : FuncUnit; // integer unit 2 (complex) -def FPU1 : FuncUnit; // floating point unit 1 -def FPU2 : FuncUnit; // floating point unit 2 -def VPU : FuncUnit; // vector permutation unit -def VIU1 : FuncUnit; // vector integer unit 1 (simple) -def VIU2 : FuncUnit; // vector integer unit 2 (complex) -def VFPU : FuncUnit; // vector floating point unit - //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for PowerPC // -def IntSimple : InstrItinClass; -def IntGeneral : InstrItinClass; -def IntCompare : InstrItinClass; -def IntDivD : InstrItinClass; -def IntDivW : InstrItinClass; -def IntMFFS : InstrItinClass; -def IntMFVSCR : InstrItinClass; -def IntMTFSB0 : InstrItinClass; -def IntMTSRD : InstrItinClass; -def IntMulHD : InstrItinClass; -def IntMulHW : InstrItinClass; -def IntMulHWU : InstrItinClass; -def IntMulLI : InstrItinClass; -def IntRFID : InstrItinClass; -def IntRotateD : InstrItinClass; -def IntRotateDI : InstrItinClass; -def IntRotate : InstrItinClass; -def IntShift : InstrItinClass; -def IntTrapD : InstrItinClass; -def IntTrapW : InstrItinClass; -def BrB : InstrItinClass; -def BrCR : InstrItinClass; -def BrMCR : InstrItinClass; -def BrMCRX : InstrItinClass; -def LdStDCBA : InstrItinClass; -def LdStDCBF : InstrItinClass; -def LdStDCBI : InstrItinClass; -def LdStLoad : InstrItinClass; -def LdStLoadUpd : InstrItinClass; -def LdStStore : InstrItinClass; -def LdStStoreUpd : InstrItinClass; -def LdStDSS : InstrItinClass; -def LdStICBI : InstrItinClass; -def LdStLD : InstrItinClass; -def LdStLDU : InstrItinClass; -def LdStLDARX : InstrItinClass; -def LdStLFD : InstrItinClass; -def LdStLFDU : InstrItinClass; -def LdStLHA : InstrItinClass; -def LdStLHAU : InstrItinClass; -def LdStLMW : InstrItinClass; -def LdStLVecX : InstrItinClass; -def LdStLWA : InstrItinClass; -def LdStLWARX : InstrItinClass; -def LdStSLBIA : InstrItinClass; -def LdStSLBIE : InstrItinClass; -def LdStSTD : InstrItinClass; -def LdStSTDCX : InstrItinClass; -def LdStSTDU : InstrItinClass; -def LdStSTFD : InstrItinClass; -def LdStSTFDU : InstrItinClass; -def LdStSTVEBX : InstrItinClass; -def LdStSTWCX : InstrItinClass; -def LdStSync : InstrItinClass; -def SprISYNC : InstrItinClass; -def SprMFSR : InstrItinClass; -def SprMTMSR : InstrItinClass; -def SprMTSR : InstrItinClass; -def SprTLBSYNC : InstrItinClass; -def SprMFCR : InstrItinClass; -def SprMFMSR : InstrItinClass; -def SprMFSPR : InstrItinClass; -def SprMFTB : InstrItinClass; -def SprMTSPR : InstrItinClass; -def SprMTSRIN : InstrItinClass; -def SprRFI : InstrItinClass; -def SprSC : InstrItinClass; -def FPGeneral : InstrItinClass; -def FPAddSub : InstrItinClass; -def FPCompare : InstrItinClass; -def FPDivD : InstrItinClass; -def FPDivS : InstrItinClass; -def FPFused : InstrItinClass; -def FPRes : InstrItinClass; -def FPSqrt : InstrItinClass; -def VecGeneral : InstrItinClass; -def VecFP : InstrItinClass; -def VecFPCompare : InstrItinClass; -def VecComplex : InstrItinClass; -def VecPerm : InstrItinClass; -def VecFPRound : InstrItinClass; -def VecVSL : InstrItinClass; -def VecVSR : InstrItinClass; -def SprMTMSRD : InstrItinClass; -def SprSLIE : InstrItinClass; -def SprSLBIE : InstrItinClass; -def SprSLBMTE : InstrItinClass; -def SprSLBMFEE : InstrItinClass; -def SprSLBIA : InstrItinClass; -def SprTLBIEL : InstrItinClass; -def SprTLBIE : InstrItinClass; +def IIC_IntSimple : InstrItinClass; +def IIC_IntGeneral : InstrItinClass; +def IIC_IntCompare : InstrItinClass; +def IIC_IntDivD : InstrItinClass; +def IIC_IntDivW : InstrItinClass; +def IIC_IntMFFS : InstrItinClass; +def IIC_IntMFVSCR : InstrItinClass; +def IIC_IntMTFSB0 : InstrItinClass; +def IIC_IntMTSRD : InstrItinClass; +def IIC_IntMulHD : InstrItinClass; +def IIC_IntMulHW : InstrItinClass; +def IIC_IntMulHWU : InstrItinClass; +def IIC_IntMulLI : InstrItinClass; +def IIC_IntRFID : InstrItinClass; +def IIC_IntRotateD : InstrItinClass; +def IIC_IntRotateDI : InstrItinClass; +def IIC_IntRotate : InstrItinClass; +def IIC_IntShift : InstrItinClass; +def IIC_IntTrapD : InstrItinClass; +def IIC_IntTrapW : InstrItinClass; +def IIC_BrB : InstrItinClass; +def IIC_BrCR : InstrItinClass; +def IIC_BrMCR : InstrItinClass; +def IIC_BrMCRX : InstrItinClass; +def IIC_LdStDCBA : InstrItinClass; +def IIC_LdStDCBF : InstrItinClass; +def IIC_LdStDCBI : InstrItinClass; +def IIC_LdStLoad : InstrItinClass; +def IIC_LdStLoadUpd : InstrItinClass; +def IIC_LdStLoadUpdX : InstrItinClass; +def IIC_LdStStore : InstrItinClass; +def IIC_LdStStoreUpd : InstrItinClass; +def IIC_LdStDSS : InstrItinClass; +def IIC_LdStICBI : InstrItinClass; +def IIC_LdStLD : InstrItinClass; +def IIC_LdStLDU : InstrItinClass; +def IIC_LdStLDUX : InstrItinClass; +def IIC_LdStLDARX : InstrItinClass; +def IIC_LdStLFD : InstrItinClass; +def IIC_LdStLFDU : InstrItinClass; +def IIC_LdStLFDUX : InstrItinClass; +def IIC_LdStLHA : InstrItinClass; +def IIC_LdStLHAU : InstrItinClass; +def IIC_LdStLHAUX : InstrItinClass; +def IIC_LdStLMW : InstrItinClass; +def IIC_LdStLVecX : InstrItinClass; +def IIC_LdStLWA : InstrItinClass; +def IIC_LdStLWARX : InstrItinClass; +def IIC_LdStSLBIA : InstrItinClass; +def IIC_LdStSLBIE : InstrItinClass; +def IIC_LdStSTD : InstrItinClass; +def IIC_LdStSTDCX : InstrItinClass; +def IIC_LdStSTDU : InstrItinClass; +def IIC_LdStSTDUX : InstrItinClass; +def IIC_LdStSTFD : InstrItinClass; +def IIC_LdStSTFDU : InstrItinClass; +def IIC_LdStSTVEBX : InstrItinClass; +def IIC_LdStSTWCX : InstrItinClass; +def IIC_LdStSync : InstrItinClass; +def IIC_SprISYNC : InstrItinClass; +def IIC_SprMFSR : InstrItinClass; +def IIC_SprMTMSR : InstrItinClass; +def IIC_SprMTSR : InstrItinClass; +def IIC_SprTLBSYNC : InstrItinClass; +def IIC_SprMFCR : InstrItinClass; +def IIC_SprMFCRF : InstrItinClass; +def IIC_SprMFMSR : InstrItinClass; +def IIC_SprMFSPR : InstrItinClass; +def IIC_SprMFTB : InstrItinClass; +def IIC_SprMTSPR : InstrItinClass; +def IIC_SprMTSRIN : InstrItinClass; +def IIC_SprRFI : InstrItinClass; +def IIC_SprSC : InstrItinClass; +def IIC_FPGeneral : InstrItinClass; +def IIC_FPAddSub : InstrItinClass; +def IIC_FPCompare : InstrItinClass; +def IIC_FPDivD : InstrItinClass; +def IIC_FPDivS : InstrItinClass; +def IIC_FPFused : InstrItinClass; +def IIC_FPRes : InstrItinClass; +def IIC_FPSqrtD : InstrItinClass; +def IIC_FPSqrtS : InstrItinClass; +def IIC_VecGeneral : InstrItinClass; +def IIC_VecFP : InstrItinClass; +def IIC_VecFPCompare : InstrItinClass; +def IIC_VecComplex : InstrItinClass; +def IIC_VecPerm : InstrItinClass; +def IIC_VecFPRound : InstrItinClass; +def IIC_VecVSL : InstrItinClass; +def IIC_VecVSR : InstrItinClass; +def IIC_SprMTMSRD : InstrItinClass; +def IIC_SprSLIE : InstrItinClass; +def IIC_SprSLBIE : InstrItinClass; +def IIC_SprSLBMTE : InstrItinClass; +def IIC_SprSLBMFEE : InstrItinClass; +def IIC_SprSLBIA : InstrItinClass; +def IIC_SprTLBIEL : InstrItinClass; +def IIC_SprTLBIE : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. @@ -125,6 +117,7 @@ include "PPCSchedule440.td" include "PPCScheduleG4.td" include "PPCScheduleG4Plus.td" include "PPCScheduleG5.td" +include "PPCScheduleP7.td" include "PPCScheduleA2.td" include "PPCScheduleE500mc.td" include "PPCScheduleE5500.td" @@ -136,392 +129,392 @@ include "PPCScheduleE5500.td" // // opcode itinerary class // ====== =============== -// add IntSimple -// addc IntGeneral -// adde IntGeneral -// addi IntSimple -// addic IntGeneral -// addic. IntGeneral -// addis IntSimple -// addme IntGeneral -// addze IntGeneral -// and IntSimple -// andc IntSimple -// andi. IntGeneral -// andis. IntGeneral -// b BrB -// bc BrB -// bcctr BrB -// bclr BrB -// cmp IntCompare -// cmpi IntCompare -// cmpl IntCompare -// cmpli IntCompare -// cntlzd IntRotateD -// cntlzw IntGeneral -// crand BrCR -// crandc BrCR -// creqv BrCR -// crnand BrCR -// crnor BrCR -// cror BrCR -// crorc BrCR -// crxor BrCR -// dcba LdStDCBA -// dcbf LdStDCBF -// dcbi LdStDCBI -// dcbst LdStDCBF -// dcbt LdStLoad -// dcbtst LdStLoad -// dcbz LdStDCBF -// divd IntDivD -// divdu IntDivD -// divw IntDivW -// divwu IntDivW -// dss LdStDSS -// dst LdStDSS -// dstst LdStDSS -// eciwx LdStLoad -// ecowx LdStLoad -// eieio LdStLoad -// eqv IntSimple -// extsb IntSimple -// extsh IntSimple -// extsw IntSimple -// fabs FPGeneral -// fadd FPAddSub -// fadds FPGeneral -// fcfid FPGeneral -// fcmpo FPCompare -// fcmpu FPCompare -// fctid FPGeneral -// fctidz FPGeneral -// fctiw FPGeneral -// fctiwz FPGeneral -// fdiv FPDivD -// fdivs FPDivS -// fmadd FPFused -// fmadds FPGeneral -// fmr FPGeneral -// fmsub FPFused -// fmsubs FPGeneral -// fmul FPFused -// fmuls FPGeneral -// fnabs FPGeneral -// fneg FPGeneral -// fnmadd FPFused -// fnmadds FPGeneral -// fnmsub FPFused -// fnmsubs FPGeneral -// fres FPRes -// frsp FPGeneral -// frsqrte FPGeneral -// fsel FPGeneral -// fsqrt FPSqrt -// fsqrts FPSqrt -// fsub FPAddSub -// fsubs FPGeneral -// icbi LdStICBI -// isync SprISYNC -// lbz LdStLoad -// lbzu LdStLoadUpd -// lbzux LdStLoadUpd -// lbzx LdStLoad -// ld LdStLD -// ldarx LdStLDARX -// ldu LdStLDU -// ldux LdStLDU -// ldx LdStLD -// lfd LdStLFD -// lfdu LdStLFDU -// lfdux LdStLFDU -// lfdx LdStLFD -// lfs LdStLFD -// lfsu LdStLFDU -// lfsux LdStLFDU -// lfsx LdStLFD -// lha LdStLHA -// lhau LdStLHAU -// lhaux LdStLHAU -// lhax LdStLHA -// lhbrx LdStLoad -// lhz LdStLoad -// lhzu LdStLoadUpd -// lhzux LdStLoadUpd -// lhzx LdStLoad -// lmw LdStLMW -// lswi LdStLMW -// lswx LdStLMW -// lvebx LdStLVecX -// lvehx LdStLVecX -// lvewx LdStLVecX -// lvsl LdStLVecX -// lvsr LdStLVecX -// lvx LdStLVecX -// lvxl LdStLVecX -// lwa LdStLWA -// lwarx LdStLWARX -// lwaux LdStLHAU -// lwax LdStLHA -// lwbrx LdStLoad -// lwz LdStLoad -// lwzu LdStLoadUpd -// lwzux LdStLoadUpd -// lwzx LdStLoad -// mcrf BrMCR -// mcrfs FPGeneral -// mcrxr BrMCRX -// mfcr SprMFCR -// mffs IntMFFS -// mfmsr SprMFMSR -// mfspr SprMFSPR -// mfsr SprMFSR -// mfsrin SprMFSR -// mftb SprMFTB -// mfvscr IntMFVSCR -// mtcrf BrMCRX -// mtfsb0 IntMTFSB0 -// mtfsb1 IntMTFSB0 -// mtfsf IntMTFSB0 -// mtfsfi IntMTFSB0 -// mtmsr SprMTMSR -// mtmsrd LdStLD -// mtspr SprMTSPR -// mtsr SprMTSR -// mtsrd IntMTSRD -// mtsrdin IntMTSRD -// mtsrin SprMTSRIN -// mtvscr IntMFVSCR -// mulhd IntMulHD -// mulhdu IntMulHD -// mulhw IntMulHW -// mulhwu IntMulHWU -// mulld IntMulHD -// mulli IntMulLI -// mullw IntMulHW -// nand IntSimple -// neg IntSimple -// nor IntSimple -// or IntSimple -// orc IntSimple -// ori IntSimple -// oris IntSimple -// rfi SprRFI -// rfid IntRFID -// rldcl IntRotateD -// rldcr IntRotateD -// rldic IntRotateDI -// rldicl IntRotateDI -// rldicr IntRotateDI -// rldimi IntRotateDI -// rlwimi IntRotate -// rlwinm IntGeneral -// rlwnm IntGeneral -// sc SprSC -// slbia LdStSLBIA -// slbie LdStSLBIE -// sld IntRotateD -// slw IntGeneral -// srad IntRotateD -// sradi IntRotateDI -// sraw IntShift -// srawi IntShift -// srd IntRotateD -// srw IntGeneral -// stb LdStStore -// stbu LdStStoreUpd -// stbux LdStStoreUpd -// stbx LdStStore -// std LdStSTD -// stdcx. LdStSTDCX -// stdu LdStSTDU -// stdux LdStSTDU -// stdx LdStSTD -// stfd LdStSTFD -// stfdu LdStSTFDU -// stfdux LdStSTFDU -// stfdx LdStSTFD -// stfiwx LdStSTFD -// stfs LdStSTFD -// stfsu LdStSTFDU -// stfsux LdStSTFDU -// stfsx LdStSTFD -// sth LdStStore -// sthbrx LdStStore -// sthu LdStStoreUpd -// sthux LdStStoreUpd -// sthx LdStStore -// stmw LdStLMW -// stswi LdStLMW -// stswx LdStLMW -// stvebx LdStSTVEBX -// stvehx LdStSTVEBX -// stvewx LdStSTVEBX -// stvx LdStSTVEBX -// stvxl LdStSTVEBX -// stw LdStStore -// stwbrx LdStStore -// stwcx. LdStSTWCX -// stwu LdStStoreUpd -// stwux LdStStoreUpd -// stwx LdStStore -// subf IntGeneral -// subfc IntGeneral -// subfe IntGeneral -// subfic IntGeneral -// subfme IntGeneral -// subfze IntGeneral -// sync LdStSync -// td IntTrapD -// tdi IntTrapD -// tlbia LdStSLBIA -// tlbie LdStDCBF -// tlbsync SprTLBSYNC -// tw IntTrapW -// twi IntTrapW -// vaddcuw VecGeneral -// vaddfp VecFP -// vaddsbs VecGeneral -// vaddshs VecGeneral -// vaddsws VecGeneral -// vaddubm VecGeneral -// vaddubs VecGeneral -// vadduhm VecGeneral -// vadduhs VecGeneral -// vadduwm VecGeneral -// vadduws VecGeneral -// vand VecGeneral -// vandc VecGeneral -// vavgsb VecGeneral -// vavgsh VecGeneral -// vavgsw VecGeneral -// vavgub VecGeneral -// vavguh VecGeneral -// vavguw VecGeneral -// vcfsx VecFP -// vcfux VecFP -// vcmpbfp VecFPCompare -// vcmpeqfp VecFPCompare -// vcmpequb VecGeneral -// vcmpequh VecGeneral -// vcmpequw VecGeneral -// vcmpgefp VecFPCompare -// vcmpgtfp VecFPCompare -// vcmpgtsb VecGeneral -// vcmpgtsh VecGeneral -// vcmpgtsw VecGeneral -// vcmpgtub VecGeneral -// vcmpgtuh VecGeneral -// vcmpgtuw VecGeneral -// vctsxs VecFP -// vctuxs VecFP -// vexptefp VecFP -// vlogefp VecFP -// vmaddfp VecFP -// vmaxfp VecFPCompare -// vmaxsb VecGeneral -// vmaxsh VecGeneral -// vmaxsw VecGeneral -// vmaxub VecGeneral -// vmaxuh VecGeneral -// vmaxuw VecGeneral -// vmhaddshs VecComplex -// vmhraddshs VecComplex -// vminfp VecFPCompare -// vminsb VecGeneral -// vminsh VecGeneral -// vminsw VecGeneral -// vminub VecGeneral -// vminuh VecGeneral -// vminuw VecGeneral -// vmladduhm VecComplex -// vmrghb VecPerm -// vmrghh VecPerm -// vmrghw VecPerm -// vmrglb VecPerm -// vmrglh VecPerm -// vmrglw VecPerm -// vmsubfp VecFP -// vmsummbm VecComplex -// vmsumshm VecComplex -// vmsumshs VecComplex -// vmsumubm VecComplex -// vmsumuhm VecComplex -// vmsumuhs VecComplex -// vmulesb VecComplex -// vmulesh VecComplex -// vmuleub VecComplex -// vmuleuh VecComplex -// vmulosb VecComplex -// vmulosh VecComplex -// vmuloub VecComplex -// vmulouh VecComplex -// vnor VecGeneral -// vor VecGeneral -// vperm VecPerm -// vpkpx VecPerm -// vpkshss VecPerm -// vpkshus VecPerm -// vpkswss VecPerm -// vpkswus VecPerm -// vpkuhum VecPerm -// vpkuhus VecPerm -// vpkuwum VecPerm -// vpkuwus VecPerm -// vrefp VecFPRound -// vrfim VecFPRound -// vrfin VecFPRound -// vrfip VecFPRound -// vrfiz VecFPRound -// vrlb VecGeneral -// vrlh VecGeneral -// vrlw VecGeneral -// vrsqrtefp VecFP -// vsel VecGeneral -// vsl VecVSL -// vslb VecGeneral -// vsldoi VecPerm -// vslh VecGeneral -// vslo VecPerm -// vslw VecGeneral -// vspltb VecPerm -// vsplth VecPerm -// vspltisb VecPerm -// vspltish VecPerm -// vspltisw VecPerm -// vspltw VecPerm -// vsr VecVSR -// vsrab VecGeneral -// vsrah VecGeneral -// vsraw VecGeneral -// vsrb VecGeneral -// vsrh VecGeneral -// vsro VecPerm -// vsrw VecGeneral -// vsubcuw VecGeneral -// vsubfp VecFP -// vsubsbs VecGeneral -// vsubshs VecGeneral -// vsubsws VecGeneral -// vsububm VecGeneral -// vsububs VecGeneral -// vsubuhm VecGeneral -// vsubuhs VecGeneral -// vsubuwm VecGeneral -// vsubuws VecGeneral -// vsum2sws VecComplex -// vsum4sbs VecComplex -// vsum4shs VecComplex -// vsum4ubs VecComplex -// vsumsws VecComplex -// vupkhpx VecPerm -// vupkhsb VecPerm -// vupkhsh VecPerm -// vupklpx VecPerm -// vupklsb VecPerm -// vupklsh VecPerm -// vxor VecGeneral -// xor IntSimple -// xori IntSimple -// xoris IntSimple +// add IIC_IntSimple +// addc IIC_IntGeneral +// adde IIC_IntGeneral +// addi IIC_IntSimple +// addic IIC_IntGeneral +// addic. IIC_IntGeneral +// addis IIC_IntSimple +// addme IIC_IntGeneral +// addze IIC_IntGeneral +// and IIC_IntSimple +// andc IIC_IntSimple +// andi. IIC_IntGeneral +// andis. IIC_IntGeneral +// b IIC_BrB +// bc IIC_BrB +// bcctr IIC_BrB +// bclr IIC_BrB +// cmp IIC_IntCompare +// cmpi IIC_IntCompare +// cmpl IIC_IntCompare +// cmpli IIC_IntCompare +// cntlzd IIC_IntRotateD +// cntlzw IIC_IntGeneral +// crand IIC_BrCR +// crandc IIC_BrCR +// creqv IIC_BrCR +// crnand IIC_BrCR +// crnor IIC_BrCR +// cror IIC_BrCR +// crorc IIC_BrCR +// crxor IIC_BrCR +// dcba IIC_LdStDCBA +// dcbf IIC_LdStDCBF +// dcbi IIC_LdStDCBI +// dcbst IIC_LdStDCBF +// dcbt IIC_LdStLoad +// dcbtst IIC_LdStLoad +// dcbz IIC_LdStDCBF +// divd IIC_IntDivD +// divdu IIC_IntDivD +// divw IIC_IntDivW +// divwu IIC_IntDivW +// dss IIC_LdStDSS +// dst IIC_LdStDSS +// dstst IIC_LdStDSS +// eciwx IIC_LdStLoad +// ecowx IIC_LdStLoad +// eieio IIC_LdStLoad +// eqv IIC_IntSimple +// extsb IIC_IntSimple +// extsh IIC_IntSimple +// extsw IIC_IntSimple +// fabs IIC_FPGeneral +// fadd IIC_FPAddSub +// fadds IIC_FPGeneral +// fcfid IIC_FPGeneral +// fcmpo IIC_FPCompare +// fcmpu IIC_FPCompare +// fctid IIC_FPGeneral +// fctidz IIC_FPGeneral +// fctiw IIC_FPGeneral +// fctiwz IIC_FPGeneral +// fdiv IIC_FPDivD +// fdivs IIC_FPDivS +// fmadd IIC_FPFused +// fmadds IIC_FPGeneral +// fmr IIC_FPGeneral +// fmsub IIC_FPFused +// fmsubs IIC_FPGeneral +// fmul IIC_FPFused +// fmuls IIC_FPGeneral +// fnabs IIC_FPGeneral +// fneg IIC_FPGeneral +// fnmadd IIC_FPFused +// fnmadds IIC_FPGeneral +// fnmsub IIC_FPFused +// fnmsubs IIC_FPGeneral +// fres IIC_FPRes +// frsp IIC_FPGeneral +// frsqrte IIC_FPGeneral +// fsel IIC_FPGeneral +// fsqrt IIC_FPSqrtD +// fsqrts IIC_FPSqrtS +// fsub IIC_FPAddSub +// fsubs IIC_FPGeneral +// icbi IIC_LdStICBI +// isync IIC_SprISYNC +// lbz IIC_LdStLoad +// lbzu IIC_LdStLoadUpd +// lbzux IIC_LdStLoadUpdX +// lbzx IIC_LdStLoad +// ld IIC_LdStLD +// ldarx IIC_LdStLDARX +// ldu IIC_LdStLDU +// ldux IIC_LdStLDUX +// ldx IIC_LdStLD +// lfd IIC_LdStLFD +// lfdu IIC_LdStLFDU +// lfdux IIC_LdStLFDUX +// lfdx IIC_LdStLFD +// lfs IIC_LdStLFD +// lfsu IIC_LdStLFDU +// lfsux IIC_LdStLFDUX +// lfsx IIC_LdStLFD +// lha IIC_LdStLHA +// lhau IIC_LdStLHAU +// lhaux IIC_LdStLHAUX +// lhax IIC_LdStLHA +// lhbrx IIC_LdStLoad +// lhz IIC_LdStLoad +// lhzu IIC_LdStLoadUpd +// lhzux IIC_LdStLoadUpdX +// lhzx IIC_LdStLoad +// lmw IIC_LdStLMW +// lswi IIC_LdStLMW +// lswx IIC_LdStLMW +// lvebx IIC_LdStLVecX +// lvehx IIC_LdStLVecX +// lvewx IIC_LdStLVecX +// lvsl IIC_LdStLVecX +// lvsr IIC_LdStLVecX +// lvx IIC_LdStLVecX +// lvxl IIC_LdStLVecX +// lwa IIC_LdStLWA +// lwarx IIC_LdStLWARX +// lwaux IIC_LdStLHAUX +// lwax IIC_LdStLHA +// lwbrx IIC_LdStLoad +// lwz IIC_LdStLoad +// lwzu IIC_LdStLoadUpd +// lwzux IIC_LdStLoadUpdX +// lwzx IIC_LdStLoad +// mcrf IIC_BrMCR +// mcrfs IIC_FPGeneral +// mcrxr IIC_BrMCRX +// mfcr IIC_SprMFCR +// mffs IIC_IntMFFS +// mfmsr IIC_SprMFMSR +// mfspr IIC_SprMFSPR +// mfsr IIC_SprMFSR +// mfsrin IIC_SprMFSR +// mftb IIC_SprMFTB +// mfvscr IIC_IntMFVSCR +// mtcrf IIC_BrMCRX +// mtfsb0 IIC_IntMTFSB0 +// mtfsb1 IIC_IntMTFSB0 +// mtfsf IIC_IntMTFSB0 +// mtfsfi IIC_IntMTFSB0 +// mtmsr IIC_SprMTMSR +// mtmsrd IIC_LdStLD +// mtspr IIC_SprMTSPR +// mtsr IIC_SprMTSR +// mtsrd IIC_IntMTSRD +// mtsrdin IIC_IntMTSRD +// mtsrin IIC_SprMTSRIN +// mtvscr IIC_IntMFVSCR +// mulhd IIC_IntMulHD +// mulhdu IIC_IntMulHD +// mulhw IIC_IntMulHW +// mulhwu IIC_IntMulHWU +// mulld IIC_IntMulHD +// mulli IIC_IntMulLI +// mullw IIC_IntMulHW +// nand IIC_IntSimple +// neg IIC_IntSimple +// nor IIC_IntSimple +// or IIC_IntSimple +// orc IIC_IntSimple +// ori IIC_IntSimple +// oris IIC_IntSimple +// rfi IIC_SprRFI +// rfid IIC_IntRFID +// rldcl IIC_IntRotateD +// rldcr IIC_IntRotateD +// rldic IIC_IntRotateDI +// rldicl IIC_IntRotateDI +// rldicr IIC_IntRotateDI +// rldimi IIC_IntRotateDI +// rlwimi IIC_IntRotate +// rlwinm IIC_IntGeneral +// rlwnm IIC_IntGeneral +// sc IIC_SprSC +// slbia IIC_LdStSLBIA +// slbie IIC_LdStSLBIE +// sld IIC_IntRotateD +// slw IIC_IntGeneral +// srad IIC_IntRotateD +// sradi IIC_IntRotateDI +// sraw IIC_IntShift +// srawi IIC_IntShift +// srd IIC_IntRotateD +// srw IIC_IntGeneral +// stb IIC_LdStStore +// stbu IIC_LdStStoreUpd +// stbux IIC_LdStStoreUpd +// stbx IIC_LdStStore +// std IIC_LdStSTD +// stdcx. IIC_LdStSTDCX +// stdu IIC_LdStSTDU +// stdux IIC_LdStSTDUX +// stdx IIC_LdStSTD +// stfd IIC_LdStSTFD +// stfdu IIC_LdStSTFDU +// stfdux IIC_LdStSTFDU +// stfdx IIC_LdStSTFD +// stfiwx IIC_LdStSTFD +// stfs IIC_LdStSTFD +// stfsu IIC_LdStSTFDU +// stfsux IIC_LdStSTFDU +// stfsx IIC_LdStSTFD +// sth IIC_LdStStore +// sthbrx IIC_LdStStore +// sthu IIC_LdStStoreUpd +// sthux IIC_LdStStoreUpd +// sthx IIC_LdStStore +// stmw IIC_LdStLMW +// stswi IIC_LdStLMW +// stswx IIC_LdStLMW +// stvebx IIC_LdStSTVEBX +// stvehx IIC_LdStSTVEBX +// stvewx IIC_LdStSTVEBX +// stvx IIC_LdStSTVEBX +// stvxl IIC_LdStSTVEBX +// stw IIC_LdStStore +// stwbrx IIC_LdStStore +// stwcx. IIC_LdStSTWCX +// stwu IIC_LdStStoreUpd +// stwux IIC_LdStStoreUpd +// stwx IIC_LdStStore +// subf IIC_IntGeneral +// subfc IIC_IntGeneral +// subfe IIC_IntGeneral +// subfic IIC_IntGeneral +// subfme IIC_IntGeneral +// subfze IIC_IntGeneral +// sync IIC_LdStSync +// td IIC_IntTrapD +// tdi IIC_IntTrapD +// tlbia IIC_LdStSLBIA +// tlbie IIC_LdStDCBF +// tlbsync IIC_SprTLBSYNC +// tw IIC_IntTrapW +// twi IIC_IntTrapW +// vaddcuw IIC_VecGeneral +// vaddfp IIC_VecFP +// vaddsbs IIC_VecGeneral +// vaddshs IIC_VecGeneral +// vaddsws IIC_VecGeneral +// vaddubm IIC_VecGeneral +// vaddubs IIC_VecGeneral +// vadduhm IIC_VecGeneral +// vadduhs IIC_VecGeneral +// vadduwm IIC_VecGeneral +// vadduws IIC_VecGeneral +// vand IIC_VecGeneral +// vandc IIC_VecGeneral +// vavgsb IIC_VecGeneral +// vavgsh IIC_VecGeneral +// vavgsw IIC_VecGeneral +// vavgub IIC_VecGeneral +// vavguh IIC_VecGeneral +// vavguw IIC_VecGeneral +// vcfsx IIC_VecFP +// vcfux IIC_VecFP +// vcmpbfp IIC_VecFPCompare +// vcmpeqfp IIC_VecFPCompare +// vcmpequb IIC_VecGeneral +// vcmpequh IIC_VecGeneral +// vcmpequw IIC_VecGeneral +// vcmpgefp IIC_VecFPCompare +// vcmpgtfp IIC_VecFPCompare +// vcmpgtsb IIC_VecGeneral +// vcmpgtsh IIC_VecGeneral +// vcmpgtsw IIC_VecGeneral +// vcmpgtub IIC_VecGeneral +// vcmpgtuh IIC_VecGeneral +// vcmpgtuw IIC_VecGeneral +// vctsxs IIC_VecFP +// vctuxs IIC_VecFP +// vexptefp IIC_VecFP +// vlogefp IIC_VecFP +// vmaddfp IIC_VecFP +// vmaxfp IIC_VecFPCompare +// vmaxsb IIC_VecGeneral +// vmaxsh IIC_VecGeneral +// vmaxsw IIC_VecGeneral +// vmaxub IIC_VecGeneral +// vmaxuh IIC_VecGeneral +// vmaxuw IIC_VecGeneral +// vmhaddshs IIC_VecComplex +// vmhraddshs IIC_VecComplex +// vminfp IIC_VecFPCompare +// vminsb IIC_VecGeneral +// vminsh IIC_VecGeneral +// vminsw IIC_VecGeneral +// vminub IIC_VecGeneral +// vminuh IIC_VecGeneral +// vminuw IIC_VecGeneral +// vmladduhm IIC_VecComplex +// vmrghb IIC_VecPerm +// vmrghh IIC_VecPerm +// vmrghw IIC_VecPerm +// vmrglb IIC_VecPerm +// vmrglh IIC_VecPerm +// vmrglw IIC_VecPerm +// vmsubfp IIC_VecFP +// vmsummbm IIC_VecComplex +// vmsumshm IIC_VecComplex +// vmsumshs IIC_VecComplex +// vmsumubm IIC_VecComplex +// vmsumuhm IIC_VecComplex +// vmsumuhs IIC_VecComplex +// vmulesb IIC_VecComplex +// vmulesh IIC_VecComplex +// vmuleub IIC_VecComplex +// vmuleuh IIC_VecComplex +// vmulosb IIC_VecComplex +// vmulosh IIC_VecComplex +// vmuloub IIC_VecComplex +// vmulouh IIC_VecComplex +// vnor IIC_VecGeneral +// vor IIC_VecGeneral +// vperm IIC_VecPerm +// vpkpx IIC_VecPerm +// vpkshss IIC_VecPerm +// vpkshus IIC_VecPerm +// vpkswss IIC_VecPerm +// vpkswus IIC_VecPerm +// vpkuhum IIC_VecPerm +// vpkuhus IIC_VecPerm +// vpkuwum IIC_VecPerm +// vpkuwus IIC_VecPerm +// vrefp IIC_VecFPRound +// vrfim IIC_VecFPRound +// vrfin IIC_VecFPRound +// vrfip IIC_VecFPRound +// vrfiz IIC_VecFPRound +// vrlb IIC_VecGeneral +// vrlh IIC_VecGeneral +// vrlw IIC_VecGeneral +// vrsqrtefp IIC_VecFP +// vsel IIC_VecGeneral +// vsl IIC_VecVSL +// vslb IIC_VecGeneral +// vsldoi IIC_VecPerm +// vslh IIC_VecGeneral +// vslo IIC_VecPerm +// vslw IIC_VecGeneral +// vspltb IIC_VecPerm +// vsplth IIC_VecPerm +// vspltisb IIC_VecPerm +// vspltish IIC_VecPerm +// vspltisw IIC_VecPerm +// vspltw IIC_VecPerm +// vsr IIC_VecVSR +// vsrab IIC_VecGeneral +// vsrah IIC_VecGeneral +// vsraw IIC_VecGeneral +// vsrb IIC_VecGeneral +// vsrh IIC_VecGeneral +// vsro IIC_VecPerm +// vsrw IIC_VecGeneral +// vsubcuw IIC_VecGeneral +// vsubfp IIC_VecFP +// vsubsbs IIC_VecGeneral +// vsubshs IIC_VecGeneral +// vsubsws IIC_VecGeneral +// vsububm IIC_VecGeneral +// vsububs IIC_VecGeneral +// vsubuhm IIC_VecGeneral +// vsubuhs IIC_VecGeneral +// vsubuwm IIC_VecGeneral +// vsubuws IIC_VecGeneral +// vsum2sws IIC_VecComplex +// vsum4sbs IIC_VecComplex +// vsum4shs IIC_VecComplex +// vsum4ubs IIC_VecComplex +// vsumsws IIC_VecComplex +// vupkhpx IIC_VecPerm +// vupkhsb IIC_VecPerm +// vupkhsh IIC_VecPerm +// vupklpx IIC_VecPerm +// vupklsb IIC_VecPerm +// vupklsh IIC_VecPerm +// vxor IIC_VecGeneral +// xor IIC_IntSimple +// xori IIC_IntSimple +// xoris IIC_IntSimple // diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCSchedule440.td b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCSchedule440.td index 37b6eac10cfe..218fed248a31 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCSchedule440.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCSchedule440.td @@ -26,43 +26,39 @@ //===----------------------------------------------------------------------===// // Functional units on the PowerPC 440/450 chip sets // -def IFTH1 : FuncUnit; // Fetch unit 1 -def IFTH2 : FuncUnit; // Fetch unit 2 -def PDCD1 : FuncUnit; // Decode unit 1 -def PDCD2 : FuncUnit; // Decode unit 2 -def DISS1 : FuncUnit; // Issue unit 1 -def DISS2 : FuncUnit; // Issue unit 2 -def LRACC : FuncUnit; // Register access and dispatch for - // the simple integer (J-pipe) and - // load/store (L-pipe) pipelines -def IRACC : FuncUnit; // Register access and dispatch for - // the complex integer (I-pipe) pipeline -def FRACC : FuncUnit; // Register access and dispatch for - // the floating-point execution (F-pipe) pipeline -def IEXE1 : FuncUnit; // Execution stage 1 for the I pipeline -def IEXE2 : FuncUnit; // Execution stage 2 for the I pipeline -def IWB : FuncUnit; // Write-back unit for the I pipeline -def JEXE1 : FuncUnit; // Execution stage 1 for the J pipeline -def JEXE2 : FuncUnit; // Execution stage 2 for the J pipeline -def JWB : FuncUnit; // Write-back unit for the J pipeline -def AGEN : FuncUnit; // Address generation for the L pipeline -def CRD : FuncUnit; // D-cache access for the L pipeline -def LWB : FuncUnit; // Write-back unit for the L pipeline -def FEXE1 : FuncUnit; // Execution stage 1 for the F pipeline -def FEXE2 : FuncUnit; // Execution stage 2 for the F pipeline -def FEXE3 : FuncUnit; // Execution stage 3 for the F pipeline -def FEXE4 : FuncUnit; // Execution stage 4 for the F pipeline -def FEXE5 : FuncUnit; // Execution stage 5 for the F pipeline -def FEXE6 : FuncUnit; // Execution stage 6 for the F pipeline -def FWB : FuncUnit; // Write-back unit for the F pipeline +def P440_DISS1 : FuncUnit; // Issue unit 1 +def P440_DISS2 : FuncUnit; // Issue unit 2 +def P440_LRACC : FuncUnit; // Register access and dispatch for + // the simple integer (J-pipe) and + // load/store (L-pipe) pipelines +def P440_IRACC : FuncUnit; // Register access and dispatch for + // the complex integer (I-pipe) pipeline +def P440_FRACC : FuncUnit; // Register access and dispatch for + // the floating-point execution (F-pipe) pipeline +def P440_IEXE1 : FuncUnit; // Execution stage 1 for the I pipeline +def P440_IEXE2 : FuncUnit; // Execution stage 2 for the I pipeline +def P440_IWB : FuncUnit; // Write-back unit for the I pipeline +def P440_JEXE1 : FuncUnit; // Execution stage 1 for the J pipeline +def P440_JEXE2 : FuncUnit; // Execution stage 2 for the J pipeline +def P440_JWB : FuncUnit; // Write-back unit for the J pipeline +def P440_AGEN : FuncUnit; // Address generation for the L pipeline +def P440_CRD : FuncUnit; // D-cache access for the L pipeline +def P440_LWB : FuncUnit; // Write-back unit for the L pipeline +def P440_FEXE1 : FuncUnit; // Execution stage 1 for the F pipeline +def P440_FEXE2 : FuncUnit; // Execution stage 2 for the F pipeline +def P440_FEXE3 : FuncUnit; // Execution stage 3 for the F pipeline +def P440_FEXE4 : FuncUnit; // Execution stage 4 for the F pipeline +def P440_FEXE5 : FuncUnit; // Execution stage 5 for the F pipeline +def P440_FEXE6 : FuncUnit; // Execution stage 6 for the F pipeline +def P440_FWB : FuncUnit; // Write-back unit for the F pipeline -def LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used - // to make sure that no lwarx/stwcx. - // instructions are issued while another - // lwarx/stwcx. is in the L pipe. +def P440_LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used + // to make sure that no lwarx/stwcx. + // instructions are issued while another + // lwarx/stwcx. is in the L pipe. -def GPR_Bypass : Bypass; // The bypass for general-purpose regs. -def FPR_Bypass : Bypass; // The bypass for floating-point regs. +def P440_GPR_Bypass : Bypass; // The bypass for general-purpose regs. +def P440_FPR_Bypass : Bypass; // The bypass for floating-point regs. // Notes: // Instructions are held in the FRACC, LRACC and IRACC pipeline @@ -104,560 +100,500 @@ def FPR_Bypass : Bypass; // The bypass for floating-point regs. def PPC440Itineraries : ProcessorItineraries< - [IFTH1, IFTH2, PDCD1, PDCD2, DISS1, DISS2, FRACC, - IRACC, IEXE1, IEXE2, IWB, LRACC, JEXE1, JEXE2, JWB, AGEN, CRD, LWB, - FEXE1, FEXE2, FEXE3, FEXE4, FEXE5, FEXE6, FWB, LWARX_Hold], - [GPR_Bypass, FPR_Bypass], [ - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC, LRACC]>, - InstrStage<1, [IEXE1, JEXE1]>, - InstrStage<1, [IEXE2, JEXE2]>, - InstrStage<1, [IWB, JWB]>], - [6, 4, 4], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC, LRACC]>, - InstrStage<1, [IEXE1, JEXE1]>, - InstrStage<1, [IEXE2, JEXE2]>, - InstrStage<1, [IWB, JWB]>], - [6, 4, 4], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC, LRACC]>, - InstrStage<1, [IEXE1, JEXE1]>, - InstrStage<1, [IEXE2, JEXE2]>, - InstrStage<1, [IWB, JWB]>], - [6, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<33, [IWB]>], - [40, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [7, 4, 4], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [7, 4, 4], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC, LRACC]>, - InstrStage<1, [IEXE1, JEXE1]>, - InstrStage<1, [IEXE2, JEXE2]>, - InstrStage<1, [IWB, JWB]>], - [6, 4, 4], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC, LRACC]>, - InstrStage<1, [IEXE1, JEXE1]>, - InstrStage<1, [IEXE2, JEXE2]>, - InstrStage<1, [IWB, JWB]>], - [6, 4, 4], - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [6, 4], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4, 4], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [9, 5], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [9, 5], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5, 5], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5, 5], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [9, 5, 5], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [9, 5, 5], - [NoBypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1]>, - InstrStage<1, [IRACC], 0>, - InstrStage<4, [LWARX_Hold], 0>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1]>, - InstrStage<1, [IRACC], 0>, - InstrStage<4, [LWARX_Hold], 0>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1]>, - InstrStage<1, [IRACC], 0>, - InstrStage<4, [LWARX_Hold], 0>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<3, [AGEN], 1>, - InstrStage<2, [CRD], 1>, - InstrStage<1, [LWB]>]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC], 0>, - InstrStage<1, [LRACC], 0>, - InstrStage<1, [IRACC]>, - InstrStage<1, [FEXE1], 0>, - InstrStage<1, [AGEN], 0>, - InstrStage<1, [JEXE1], 0>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [FEXE2], 0>, - InstrStage<1, [CRD], 0>, - InstrStage<1, [JEXE2], 0>, - InstrStage<1, [IEXE2]>, - InstrStage<6, [FEXE3], 0>, - InstrStage<6, [LWB], 0>, - InstrStage<6, [JWB], 0>, - InstrStage<6, [IWB]>]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [6, 4], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [6, 4], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<3, [IWB]>], - [9, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [7, 4], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<3, [IWB]>], - [10, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<3, [IWB]>], - [10, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<3, [IWB]>], - [10, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<3, [IWB]>], - [10, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [IRACC]>, - InstrStage<1, [IEXE1]>, - InstrStage<1, [IEXE2]>, - InstrStage<1, [IWB]>], - [8, 4], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<1, [FWB]>], - [10, 4, 4], - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<1, [FWB]>], - [10, 4, 4], - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<1, [FWB]>], - [10, 4, 4], - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<25, [FWB]>], - [35, 4, 4], - [NoBypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<13, [FWB]>], - [23, 4, 4], - [NoBypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<1, [FWB]>], - [10, 4, 4, 4], - [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [FRACC]>, - InstrStage<1, [FEXE1]>, - InstrStage<1, [FEXE2]>, - InstrStage<1, [FEXE3]>, - InstrStage<1, [FEXE4]>, - InstrStage<1, [FEXE5]>, - InstrStage<1, [FEXE6]>, - InstrStage<1, [FWB]>], - [10, 4], - [FPR_Bypass, FPR_Bypass]> + [P440_DISS1, P440_DISS2, P440_FRACC, P440_IRACC, P440_IEXE1, P440_IEXE2, + P440_IWB, P440_LRACC, P440_JEXE1, P440_JEXE2, P440_JWB, P440_AGEN, P440_CRD, + P440_LWB, P440_FEXE1, P440_FEXE2, P440_FEXE3, P440_FEXE4, P440_FEXE5, + P440_FEXE6, P440_FWB, P440_LWARX_Hold], + [P440_GPR_Bypass, P440_FPR_Bypass], [ + InstrItinData, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<33, [P440_IWB]>], + [36, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [3, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [3, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC, P440_LRACC]>, + InstrStage<1, [P440_IEXE1, P440_JEXE1]>, + InstrStage<1, [P440_IEXE2, P440_JEXE2]>, + InstrStage<1, [P440_IWB, P440_JWB]>], + [2, 0, 0], + [P440_GPR_Bypass, + P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [2, 0], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0, 0], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [5, 1, 1], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [5, 2, 1, 1], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [5, 2, 1, 1], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [1, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [2, 1, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [1, 1, 1], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [2, 1, 1, 1], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [5, 1, 1], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [5, 2, 1, 1], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [5, 2, 1, 1], + [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC], 0>, + InstrStage<4, [P440_LWARX_Hold], 0>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [2, 1, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<2, [P440_LWB]>], + [2, 1, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC], 0>, + InstrStage<4, [P440_LWARX_Hold], 0>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC], 0>, + InstrStage<4, [P440_LWARX_Hold], 0>, + InstrStage<1, [P440_LRACC]>, + InstrStage<1, [P440_AGEN]>, + InstrStage<1, [P440_CRD]>, + InstrStage<1, [P440_LWB]>], + [4, 1, 1], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_LRACC]>, + InstrStage<3, [P440_AGEN], 1>, + InstrStage<2, [P440_CRD], 1>, + InstrStage<1, [P440_LWB]>]>, + InstrItinData, + InstrStage<1, [P440_FRACC], 0>, + InstrStage<1, [P440_LRACC], 0>, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_FEXE1], 0>, + InstrStage<1, [P440_AGEN], 0>, + InstrStage<1, [P440_JEXE1], 0>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_FEXE2], 0>, + InstrStage<1, [P440_CRD], 0>, + InstrStage<1, [P440_JEXE2], 0>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<6, [P440_FEXE3], 0>, + InstrStage<6, [P440_LWB], 0>, + InstrStage<6, [P440_JWB], 0>, + InstrStage<6, [P440_IWB]>]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [2, 0], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [2, 0], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<3, [P440_IWB]>], + [5, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [3, 0], + [P440_GPR_Bypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<3, [P440_IWB]>], + [6, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<3, [P440_IWB]>], + [6, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<3, [P440_IWB]>], + [6, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<3, [P440_IWB]>], + [6, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_IRACC]>, + InstrStage<1, [P440_IEXE1]>, + InstrStage<1, [P440_IEXE2]>, + InstrStage<1, [P440_IWB]>], + [4, 0], + [NoBypass, P440_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<1, [P440_FWB]>], + [6, 0, 0], + [P440_FPR_Bypass, + P440_FPR_Bypass, P440_FPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<1, [P440_FWB]>], + [6, 0, 0], + [P440_FPR_Bypass, + P440_FPR_Bypass, P440_FPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<1, [P440_FWB]>], + [6, 0, 0], + [P440_FPR_Bypass, P440_FPR_Bypass, + P440_FPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<25, [P440_FWB]>], + [31, 0, 0], + [NoBypass, P440_FPR_Bypass, P440_FPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<13, [P440_FWB]>], + [19, 0, 0], + [NoBypass, P440_FPR_Bypass, P440_FPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<1, [P440_FWB]>], + [6, 0, 0, 0], + [P440_FPR_Bypass, + P440_FPR_Bypass, P440_FPR_Bypass, + P440_FPR_Bypass]>, + InstrItinData, + InstrStage<1, [P440_FRACC]>, + InstrStage<1, [P440_FEXE1]>, + InstrStage<1, [P440_FEXE2]>, + InstrStage<1, [P440_FEXE3]>, + InstrStage<1, [P440_FEXE4]>, + InstrStage<1, [P440_FEXE5]>, + InstrStage<1, [P440_FEXE6]>, + InstrStage<1, [P440_FWB]>], + [6, 0], + [P440_FPR_Bypass, P440_FPR_Bypass]> ]>; + +// ===---------------------------------------------------------------------===// +// PPC440 machine model for scheduling and other instruction cost heuristics. + +def PPC440Model : SchedMachineModel { + let IssueWidth = 2; // 2 instructions are dispatched per cycle. + let MinLatency = -1; // OperandCycles are interpreted as MinLatency. + let LoadLatency = 5; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + + let Itineraries = PPC440Itineraries; +} + diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleA2.td index 1612cd2a0b84..14476963bad0 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleA2.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleA2.td @@ -14,8 +14,8 @@ //===----------------------------------------------------------------------===// // Functional units on the PowerPC A2 chip sets // -def XU : FuncUnit; // XU pipeline -def FU : FuncUnit; // FI pipeline +def A2_XU : FuncUnit; // A2_XU pipeline +def A2_FU : FuncUnit; // FI pipeline // // This file defines the itinerary class data for the PPC A2 processor. @@ -24,126 +24,140 @@ def FU : FuncUnit; // FI pipeline def PPCA2Itineraries : ProcessorItineraries< - [XU, FU], [], [ - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [39, 1, 1]>, - InstrItinData], - [71, 1, 1]>, - InstrItinData], - [5, 1, 1]>, - InstrItinData], - [5, 1, 1]>, - InstrItinData], - [6, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [2, 1]>, - InstrItinData], - [2, 1]>, - InstrItinData], - [6, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [5, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [6, 1, 1]>, - InstrItinData], - [6, 8, 1, 1]>, - InstrItinData], - [6, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [2, 1, 1, 1]>, - InstrItinData], - [16, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [2, 1, 1, 1]>, - InstrItinData], - [7, 1, 1]>, - InstrItinData], - [7, 9, 1, 1]>, - InstrItinData], - [6, 1, 1]>, - InstrItinData], - [6, 8, 1, 1]>, - InstrItinData], - [82, 1, 1]>, // L2 latency - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [2, 1, 1, 1]>, - InstrItinData], - [82, 1, 1]>, // L2 latency - InstrItinData], - [82, 1, 1]>, // L2 latency - InstrItinData], - [6]>, - InstrItinData], - [16]>, - InstrItinData], - [16, 1]>, - InstrItinData], - [6, 1]>, - InstrItinData], - [4, 1]>, - InstrItinData], - [6, 1]>, - InstrItinData], - [4, 1]>, - InstrItinData], - [6, 1]>, - InstrItinData], - [16]>, - InstrItinData], - [16]>, - InstrItinData], - [6, 1, 1]>, - InstrItinData], - [6, 1, 1]>, - InstrItinData], - [5, 1, 1]>, - InstrItinData], - [72, 1, 1]>, - InstrItinData], - [59, 1, 1]>, - InstrItinData], - [69, 1, 1]>, - InstrItinData], - [6, 1, 1, 1]>, - InstrItinData], - [6, 1]> + [A2_XU, A2_FU], [], [ + InstrItinData], + [1, 0, 0]>, + InstrItinData], + [2, 0, 0]>, + InstrItinData], + [2, 0, 0]>, + InstrItinData], + [39, 0, 0]>, + InstrItinData], + [71, 0, 0]>, + InstrItinData], + [5, 0, 0]>, + InstrItinData], + [5, 0, 0]>, + InstrItinData], + [6, 0, 0]>, + InstrItinData], + [2, 0, 0]>, + InstrItinData], + [2, 0, 0]>, + InstrItinData], + [2, 0, 0]>, + InstrItinData], + [2, 0, 0]>, + InstrItinData], + [2, 0]>, + InstrItinData], + [2, 0]>, + InstrItinData], + [6, 0, 0]>, + InstrItinData], + [1, 0, 0]>, + InstrItinData], + [5, 0, 0]>, + InstrItinData], + [1, 0, 0]>, + InstrItinData], + [1, 0, 0]>, + InstrItinData], + [1, 0, 0]>, + InstrItinData], + [1, 0, 0]>, + InstrItinData], + [6, 0, 0]>, + InstrItinData], + [6, 8, 0, 0]>, + InstrItinData], + [6, 8, 0, 0]>, + InstrItinData], + [6, 0, 0]>, + InstrItinData], + [6, 0, 0]>, + InstrItinData], + [0, 0, 0]>, + InstrItinData], + [2, 0, 0, 0]>, + InstrItinData], + [16, 0, 0]>, + InstrItinData], + [0, 0, 0]>, + InstrItinData], + [2, 0, 0, 0]>, + InstrItinData], + [7, 0, 0]>, + InstrItinData], + [7, 9, 0, 0]>, + InstrItinData], + [7, 9, 0, 0]>, + InstrItinData], + [6, 0, 0]>, + InstrItinData], + [6, 8, 0, 0]>, + InstrItinData], + [6, 8, 0, 0]>, + InstrItinData], + [82, 0, 0]>, // L2 latency + InstrItinData], + [0, 0, 0]>, + InstrItinData], + [2, 0, 0, 0]>, + InstrItinData], + [2, 0, 0, 0]>, + InstrItinData], + [82, 0, 0]>, // L2 latency + InstrItinData], + [82, 0, 0]>, // L2 latency + InstrItinData], + [6]>, + InstrItinData], + [16]>, + InstrItinData], + [16, 0]>, + InstrItinData], + [6, 0]>, + InstrItinData], + [1, 0]>, + InstrItinData], + [4, 0]>, + InstrItinData], + [6, 0]>, + InstrItinData], + [4, 0]>, + InstrItinData], + [6, 0]>, + InstrItinData], + [16]>, + InstrItinData], + [16]>, + InstrItinData], + [6, 0, 0]>, + InstrItinData], + [6, 0, 0]>, + InstrItinData], + [5, 0, 0]>, + InstrItinData], + [72, 0, 0]>, + InstrItinData], + [59, 0, 0]>, + InstrItinData], + [69, 0, 0]>, + InstrItinData], + [65, 0, 0]>, + InstrItinData], + [6, 0, 0, 0]>, + InstrItinData], + [6, 0]> ]>; // ===---------------------------------------------------------------------===// // A2 machine model for scheduling and other instruction cost heuristics. def PPCA2Model : SchedMachineModel { - let IssueWidth = 1; // 2 micro-ops are dispatched per cycle. + let IssueWidth = 1; // 1 instruction is dispatched per cycle. let MinLatency = -1; // OperandCycles are interpreted as MinLatency. let LoadLatency = 6; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td index c189b9ed9a6c..dab89e3db353 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td @@ -19,238 +19,285 @@ // * Decode & Dispatch // Can dispatch up to 2 instructions per clock cycle to either the GPR Issue // queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ). -def DIS0 : FuncUnit; // Dispatch stage - insn 1 -def DIS1 : FuncUnit; // Dispatch stage - insn 2 +def E500_DIS0 : FuncUnit; // Dispatch stage - insn 1 +def E500_DIS1 : FuncUnit; // Dispatch stage - insn 2 // * Execute // 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. // Some instructions can only execute in SFX0 but not SFX1. // The CFX has a bypass path, allowing non-divide instructions to execute // while a divide instruction is executed. -def SFX0 : FuncUnit; // Simple unit 0 -def SFX1 : FuncUnit; // Simple unit 1 -def BU : FuncUnit; // Branch unit -def CFX_DivBypass - : FuncUnit; // CFX divide bypass path -def CFX_0 : FuncUnit; // CFX pipeline -def LSU_0 : FuncUnit; // LSU pipeline -def FPU_0 : FuncUnit; // FPU pipeline +def E500_SFX0 : FuncUnit; // Simple unit 0 +def E500_SFX1 : FuncUnit; // Simple unit 1 +def E500_BU : FuncUnit; // Branch unit +def E500_CFX_DivBypass + : FuncUnit; // CFX divide bypass path +def E500_CFX_0 : FuncUnit; // CFX pipeline +def E500_LSU_0 : FuncUnit; // LSU pipeline +def E500_FPU_0 : FuncUnit; // FPU pipeline -def CR_Bypass : Bypass; +def E500_GPR_Bypass : Bypass; +def E500_FPR_Bypass : Bypass; +def E500_CR_Bypass : Bypass; def PPCE500mcItineraries : ProcessorItineraries< - [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, LSU_0, FPU_0], - [CR_Bypass, GPR_Bypass, FPR_Bypass], [ - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [4, 1, 1], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [4, 1, 1], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [5, 1, 1], // Latency = 1 or 2 - [CR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [CFX_0], 0>, - InstrStage<14, [CFX_DivBypass]>], - [17, 1, 1], // Latency=4..35, Repeat= 4..35 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<8, [FPU_0]>], - [11], // Latency = 8 - [FPR_Bypass]>, - InstrItinData, - InstrStage<8, [FPU_0]>], - [11, 1, 1], // Latency = 8 - [NoBypass, NoBypass, NoBypass]>, - InstrItinData, - InstrStage<1, [CFX_0]>], - [7, 1, 1], // Latency = 4, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [CFX_0]>], - [7, 1, 1], // Latency = 4, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [CFX_0]>], - [7, 1, 1], // Latency = 4, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [4, 1, 1], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [4, 1, 1], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<2, [SFX0]>], - [5, 1], // Latency = 2, Repeat rate = 2 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [BU]>], - [4, 1], // Latency = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [BU]>], - [4, 1, 1], // Latency = 1 - [CR_Bypass, CR_Bypass, CR_Bypass]>, - InstrItinData, - InstrStage<1, [BU]>], - [4, 1], // Latency = 1 - [CR_Bypass, CR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [4, 1, 1], // Latency = 1 - [CR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [NoBypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [6, 1, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 1, 1], // Latency = 4 - [FPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 1, 1], // Latency = 4 - [FPR_Bypass, GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 1], // Latency = r+3 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<3, [LSU_0]>], - [6, 1, 1], // Latency = 3, Repeat rate = 3 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [6, 1], // Latency = 3 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>]>, - InstrItinData, - InstrStage<4, [SFX0]>], - [7, 1], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<2, [SFX0, SFX1]>], - [5, 1], // Latency = 2, Repeat rate = 4 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0]>], - [5, 1], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0], 0>]>, - InstrItinData, - InstrStage<5, [SFX0]>], - [8, 1], - [GPR_Bypass, CR_Bypass]>, - InstrItinData, - InstrStage<4, [SFX0]>], - [7, 1], // Latency = 4, Repeat rate = 4 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [4, 1], // Latency = 1, Repeat rate = 1 - [GPR_Bypass, CR_Bypass]>, - InstrItinData, - InstrStage<4, [SFX0]>], - [7, 1], // Latency = 4, Repeat rate = 4 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [4, 1], // Latency = 1, Repeat rate = 1 - [CR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0]>], - [4, 1], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<2, [FPU_0]>], - [11, 1, 1], // Latency = 8, Repeat rate = 2 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<4, [FPU_0]>], - [13, 1, 1], // Latency = 10, Repeat rate = 4 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<2, [FPU_0]>], - [11, 1, 1], // Latency = 8, Repeat rate = 2 - [CR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<68, [FPU_0]>], - [71, 1, 1], // Latency = 68, Repeat rate = 68 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<38, [FPU_0]>], - [41, 1, 1], // Latency = 38, Repeat rate = 38 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<4, [FPU_0]>], - [13, 1, 1, 1], // Latency = 10, Repeat rate = 4 - [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<38, [FPU_0]>], - [41, 1], // Latency = 38, Repeat rate = 38 - [FPR_Bypass, FPR_Bypass]> + [E500_DIS0, E500_DIS1, E500_SFX0, E500_SFX1, E500_BU, E500_CFX_DivBypass, + E500_CFX_0, E500_LSU_0, E500_FPU_0], + [E500_CR_Bypass, E500_GPR_Bypass, E500_FPR_Bypass], [ + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1, 1], // Latency = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1, 1], // Latency = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [5, 1, 1], // Latency = 1 or 2 + [E500_CR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_CFX_0], 0>, + InstrStage<14, [E500_CFX_DivBypass]>], + [17, 1, 1], // Latency=4..35, Repeat= 4..35 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<8, [E500_FPU_0]>], + [11], // Latency = 8 + [E500_FPR_Bypass]>, + InstrItinData, + InstrStage<8, [E500_FPU_0]>], + [11, 1, 1], // Latency = 8 + [NoBypass, NoBypass, NoBypass]>, + InstrItinData, + InstrStage<1, [E500_CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_CFX_0]>], + [7, 1, 1], // Latency = 4, Repeat rate = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1, 1], // Latency = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1, 1], // Latency = 1 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<2, [E500_SFX0]>], + [5, 1], // Latency = 2, Repeat rate = 2 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_BU]>], + [4, 1], // Latency = 1 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_BU]>], + [4, 1, 1], // Latency = 1 + [E500_CR_Bypass, + E500_CR_Bypass, E500_CR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_BU]>], + [4, 1], // Latency = 1 + [E500_CR_Bypass, E500_CR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1, 1], // Latency = 1 + [E500_CR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3, Repeat rate = 1 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [6, 1, 1], // Latency = 3 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1, 1], // Latency = 3 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [7, 1, 1], // Latency = 4 + [E500_FPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [7, 1, 1], // Latency = 4 + [E500_FPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [7, 1, 1], // Latency = 4 + [E500_FPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1], 0>, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [7, 1], // Latency = r+3 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<3, [E500_LSU_0]>], + [6, 1, 1], // Latency = 3, Repeat rate = 3 + [E500_GPR_Bypass, + E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_LSU_0]>], + [6, 1], // Latency = 3 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_LSU_0]>]>, + InstrItinData, + InstrStage<4, [E500_SFX0]>], + [7, 1], + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<2, [E500_SFX0, E500_SFX1]>], + [5, 1], // Latency = 2, Repeat rate = 4 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0]>], + [5, 1], + [NoBypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_LSU_0], 0>]>, + InstrItinData, + InstrStage<5, [E500_SFX0]>], + [8, 1], + [E500_GPR_Bypass, E500_CR_Bypass]>, + InstrItinData, + InstrStage<5, [E500_SFX0]>], + [8, 1], + [E500_GPR_Bypass, E500_CR_Bypass]>, + InstrItinData, + InstrStage<4, [E500_SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [E500_GPR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [E500_GPR_Bypass, E500_CR_Bypass]>, + InstrItinData, + InstrStage<4, [E500_SFX0]>], + [7, 1], // Latency = 4, Repeat rate = 4 + [NoBypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0, E500_SFX1]>], + [4, 1], // Latency = 1, Repeat rate = 1 + [E500_CR_Bypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E500_SFX0]>], + [4, 1], + [NoBypass, E500_GPR_Bypass]>, + InstrItinData, + InstrStage<2, [E500_FPU_0]>], + [11, 1, 1], // Latency = 8, Repeat rate = 2 + [E500_FPR_Bypass, + E500_FPR_Bypass, E500_FPR_Bypass]>, + InstrItinData, + InstrStage<4, [E500_FPU_0]>], + [13, 1, 1], // Latency = 10, Repeat rate = 4 + [E500_FPR_Bypass, + E500_FPR_Bypass, E500_FPR_Bypass]>, + InstrItinData, + InstrStage<2, [E500_FPU_0]>], + [11, 1, 1], // Latency = 8, Repeat rate = 2 + [E500_CR_Bypass, + E500_FPR_Bypass, E500_FPR_Bypass]>, + InstrItinData, + InstrStage<68, [E500_FPU_0]>], + [71, 1, 1], // Latency = 68, Repeat rate = 68 + [E500_FPR_Bypass, + E500_FPR_Bypass, E500_FPR_Bypass]>, + InstrItinData, + InstrStage<38, [E500_FPU_0]>], + [41, 1, 1], // Latency = 38, Repeat rate = 38 + [E500_FPR_Bypass, + E500_FPR_Bypass, E500_FPR_Bypass]>, + InstrItinData, + InstrStage<4, [E500_FPU_0]>], + [13, 1, 1, 1], // Latency = 10, Repeat rate = 4 + [E500_FPR_Bypass, + E500_FPR_Bypass, E500_FPR_Bypass, + E500_FPR_Bypass]>, + InstrItinData, + InstrStage<38, [E500_FPU_0]>], + [41, 1], // Latency = 38, Repeat rate = 38 + [E500_FPR_Bypass, E500_FPR_Bypass]> ]>; // ===---------------------------------------------------------------------===// diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleE5500.td index 7a24d20323da..de097d9d8cf5 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleE5500.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleE5500.td @@ -20,280 +20,344 @@ // * Decode & Dispatch // Can dispatch up to 2 instructions per clock cycle to either the GPR Issue // queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ). -// def DIS0 : FuncUnit; -// def DIS1 : FuncUnit; +def E5500_DIS0 : FuncUnit; +def E5500_DIS1 : FuncUnit; // * Execute // 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX. // The CFX has a bypass path, allowing non-divide instructions to execute // while a divide instruction is being executed. -// def SFX0 : FuncUnit; // Simple unit 0 -// def SFX1 : FuncUnit; // Simple unit 1 -// def BU : FuncUnit; // Branch unit -// def CFX_DivBypass -// : FuncUnit; // CFX divide bypass path -// def CFX_0 : FuncUnit; // CFX pipeline stage 0 +def E5500_SFX0 : FuncUnit; // Simple unit 0 +def E5500_SFX1 : FuncUnit; // Simple unit 1 +def E5500_BU : FuncUnit; // Branch unit +def E5500_CFX_DivBypass + : FuncUnit; // CFX divide bypass path +def E5500_CFX_0 : FuncUnit; // CFX pipeline stage 0 -def CFX_1 : FuncUnit; // CFX pipeline stage 1 +def E5500_CFX_1 : FuncUnit; // CFX pipeline stage 1 -// def LSU_0 : FuncUnit; // LSU pipeline -// def FPU_0 : FuncUnit; // FPU pipeline +def E5500_LSU_0 : FuncUnit; // LSU pipeline +def E5500_FPU_0 : FuncUnit; // FPU pipeline -// def CR_Bypass : Bypass; +def E5500_GPR_Bypass : Bypass; +def E5500_FPR_Bypass : Bypass; +def E5500_CR_Bypass : Bypass; def PPCE5500Itineraries : ProcessorItineraries< - [DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, CFX_1, - LSU_0, FPU_0], - [CR_Bypass, GPR_Bypass, FPR_Bypass], [ - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [5, 2, 2], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [5, 2, 2], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [6, 2, 2], // Latency = 1 or 2 - [CR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [CFX_0], 0>, - InstrStage<26, [CFX_DivBypass]>], - [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [CFX_0], 0>, - InstrStage<16, [CFX_DivBypass]>], - [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [FPU_0]>], - [11], // Latency = 7, Repeat rate = 1 - [FPR_Bypass]>, - InstrItinData, - InstrStage<7, [FPU_0]>], - [11, 2, 2], // Latency = 7, Repeat rate = 7 - [NoBypass, NoBypass, NoBypass]>, - InstrItinData, - InstrStage<1, [CFX_0], 0>, - InstrStage<2, [CFX_1]>], - [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [CFX_0], 0>, - InstrStage<1, [CFX_1]>], - [8, 2, 2], // Latency = 4, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [CFX_0], 0>, - InstrStage<1, [CFX_1]>], - [8, 2, 2], // Latency = 4, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [CFX_0], 0>, - InstrStage<2, [CFX_1]>], - [8, 2, 2], // Latency = 4 or 5, Repeat = 2 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [5, 2, 2], // Latency = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<2, [SFX0, SFX1]>], - [6, 2, 2], // Latency = 2, Repeat rate = 2 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [5, 2, 2], // Latency = 1, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<2, [SFX0, SFX1]>], - [6, 2, 2], // Latency = 2, Repeat rate = 2 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<2, [SFX0]>], - [6, 2], // Latency = 2, Repeat rate = 2 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [BU]>], - [5, 2], // Latency = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [BU]>], - [5, 2, 2], // Latency = 1 - [CR_Bypass, CR_Bypass, CR_Bypass]>, - InstrItinData, - InstrStage<1, [BU]>], - [5, 2], // Latency = 1 - [CR_Bypass, CR_Bypass]>, - InstrItinData, - InstrStage<1, [CFX_0]>], - [5, 2, 2], // Latency = 1 - [CR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<3, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [LSU_0]>], - [8, 2, 2], // Latency = 4, Repeat rate = 1 - [FPR_Bypass, GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [8, 2, 2], // Latency = 4, Repeat rate = 1 - [FPR_Bypass, GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [GPR_Bypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<4, [LSU_0]>], - [8, 2], // Latency = r+3, Repeat rate = r+3 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<3, [LSU_0]>], - [7, 2, 2], // Latency = 3, Repeat rate = 3 - [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1], 0>, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass], - 2>, // 2 micro-ops - InstrItinData, - InstrStage<1, [LSU_0]>], - [7, 2], // Latency = 3, Repeat rate = 1 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0]>]>, - InstrItinData, - InstrStage<2, [CFX_0]>], - [6, 2], // Latency = 2, Repeat rate = 4 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [LSU_0], 0>]>, - InstrItinData, - InstrStage<5, [CFX_0]>], - [9, 2], // Latency = 5, Repeat rate = 5 - [GPR_Bypass, CR_Bypass]>, - InstrItinData, - InstrStage<4, [SFX0]>], - [8, 2], // Latency = 4, Repeat rate = 4 - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [CFX_0]>], - [5], // Latency = 1, Repeat rate = 1 - [GPR_Bypass]>, - InstrItinData, - InstrStage<4, [CFX_0]>], - [8, 2], // Latency = 4, Repeat rate = 4 - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [SFX0, SFX1]>], - [5], // Latency = 1, Repeat rate = 1 - [GPR_Bypass]>, - InstrItinData, - InstrStage<1, [FPU_0]>], - [11, 2, 2], // Latency = 7, Repeat rate = 1 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [FPU_0]>], - [11, 2, 2], // Latency = 7, Repeat rate = 1 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [FPU_0]>], - [11, 2, 2], // Latency = 7, Repeat rate = 1 - [CR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<31, [FPU_0]>], - [39, 2, 2], // Latency = 35, Repeat rate = 31 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<16, [FPU_0]>], - [24, 2, 2], // Latency = 20, Repeat rate = 16 - [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<1, [FPU_0]>], - [11, 2, 2, 2], // Latency = 7, Repeat rate = 1 - [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, - InstrItinData, - InstrStage<2, [FPU_0]>], - [12, 2], // Latency = 8, Repeat rate = 2 - [FPR_Bypass, FPR_Bypass]> + [E5500_DIS0, E5500_DIS1, E5500_SFX0, E5500_SFX1, E5500_BU, + E5500_CFX_DivBypass, E5500_CFX_0, E5500_CFX_1, + E5500_LSU_0, E5500_FPU_0], + [E5500_CR_Bypass, E5500_GPR_Bypass, E5500_FPR_Bypass], [ + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [5, 2, 2], // Latency = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [5, 2, 2], // Latency = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [6, 2, 2], // Latency = 1 or 2 + [E5500_CR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<26, [E5500_CFX_DivBypass]>], + [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<16, [E5500_CFX_DivBypass]>], + [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_FPU_0]>], + [11], // Latency = 7, Repeat rate = 1 + [E5500_FPR_Bypass]>, + InstrItinData, + InstrStage<7, [E5500_FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 7 + [NoBypass, NoBypass, NoBypass]>, + InstrItinData, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<2, [E5500_CFX_1]>], + [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<1, [E5500_CFX_1]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<1, [E5500_CFX_1]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_CFX_0], 0>, + InstrStage<2, [E5500_CFX_1]>], + [8, 2, 2], // Latency = 4 or 5, Repeat = 2 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [5, 2, 2], // Latency = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<2, [E5500_SFX0, E5500_SFX1]>], + [6, 2, 2], // Latency = 2, Repeat rate = 2 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [5, 2, 2], // Latency = 1, Repeat rate = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<2, [E5500_SFX0, E5500_SFX1]>], + [6, 2, 2], // Latency = 2, Repeat rate = 2 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<2, [E5500_SFX0]>], + [6, 2], // Latency = 2, Repeat rate = 2 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_BU]>], + [5, 2], // Latency = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_BU]>], + [5, 2, 2], // Latency = 1 + [E5500_CR_Bypass, + E5500_CR_Bypass, E5500_CR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_BU]>], + [5, 2], // Latency = 1 + [E5500_CR_Bypass, E5500_CR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_CFX_0]>], + [5, 2, 2], // Latency = 1 + [E5500_CR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<3, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 3 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [8, 2, 2], // Latency = 4, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [E5500_GPR_Bypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<4, [E5500_LSU_0]>], + [8, 2], // Latency = r+3, Repeat rate = r+3 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<3, [E5500_LSU_0]>], + [7, 2, 2], // Latency = 3, Repeat rate = 3 + [E5500_GPR_Bypass, + E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass], + 2>, // 2 micro-ops + InstrItinData, + InstrStage<1, [E5500_LSU_0]>], + [7, 2], // Latency = 3, Repeat rate = 1 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_LSU_0]>]>, + InstrItinData, + InstrStage<2, [E5500_CFX_0]>], + [6, 2], // Latency = 2, Repeat rate = 4 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_LSU_0], 0>]>, + InstrItinData, + InstrStage<5, [E5500_CFX_0]>], + [9, 2], // Latency = 5, Repeat rate = 5 + [E5500_GPR_Bypass, E5500_CR_Bypass]>, + InstrItinData, + InstrStage<5, [E5500_CFX_0]>], + [9, 2], // Latency = 5, Repeat rate = 5 + [E5500_GPR_Bypass, E5500_CR_Bypass]>, + InstrItinData, + InstrStage<4, [E5500_SFX0]>], + [8, 2], // Latency = 4, Repeat rate = 4 + [E5500_GPR_Bypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_CFX_0]>], + [5], // Latency = 1, Repeat rate = 1 + [E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<4, [E5500_CFX_0]>], + [8, 2], // Latency = 4, Repeat rate = 4 + [NoBypass, E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_SFX0, E5500_SFX1]>], + [5], // Latency = 1, Repeat rate = 1 + [E5500_GPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_FPU_0]>], + [11, 2, 2], // Latency = 7, Repeat rate = 1 + [E5500_CR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass]>, + InstrItinData, + InstrStage<31, [E5500_FPU_0]>], + [39, 2, 2], // Latency = 35, Repeat rate = 31 + [E5500_FPR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass]>, + InstrItinData, + InstrStage<16, [E5500_FPU_0]>], + [24, 2, 2], // Latency = 20, Repeat rate = 16 + [E5500_FPR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass]>, + InstrItinData, + InstrStage<1, [E5500_FPU_0]>], + [11, 2, 2, 2], // Latency = 7, Repeat rate = 1 + [E5500_FPR_Bypass, + E5500_FPR_Bypass, E5500_FPR_Bypass, + E5500_FPR_Bypass]>, + InstrItinData, + InstrStage<2, [E5500_FPU_0]>], + [12, 2], // Latency = 8, Repeat rate = 2 + [E5500_FPR_Bypass, E5500_FPR_Bypass]> ]>; // ===---------------------------------------------------------------------===// diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleG3.td b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleG3.td index 72a0a392631a..21efd8f8f6c9 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleG3.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleG3.td @@ -11,61 +11,70 @@ // //===----------------------------------------------------------------------===// +def G3_BPU : FuncUnit; // Branch unit +def G3_SLU : FuncUnit; // Store/load unit +def G3_SRU : FuncUnit; // special register unit +def G3_IU1 : FuncUnit; // integer unit 1 (simple) +def G3_IU2 : FuncUnit; // integer unit 2 (complex) +def G3_FPU1 : FuncUnit; // floating point unit 1 def G3Itineraries : ProcessorItineraries< - [IU1, IU2, FPU1, BPU, SRU, SLU], [], [ - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]> + [G3_IU1, G3_IU2, G3_FPU1, G3_BPU, G3_SRU, G3_SLU], [], [ + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> ]>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleG4.td b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleG4.td index fc9120dfa290..340773ef7876 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleG4.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleG4.td @@ -11,71 +11,86 @@ // //===----------------------------------------------------------------------===// +def G4_BPU : FuncUnit; // Branch unit +def G4_SLU : FuncUnit; // Store/load unit +def G4_SRU : FuncUnit; // special register unit +def G4_IU1 : FuncUnit; // integer unit 1 (simple) +def G4_IU2 : FuncUnit; // integer unit 2 (complex) +def G4_FPU1 : FuncUnit; // floating point unit 1 +def G4_VPU : FuncUnit; // vector permutation unit +def G4_VIU1 : FuncUnit; // vector integer unit 1 (simple) +def G4_VIU2 : FuncUnit; // vector integer unit 2 (complex) +def G4_VFPU : FuncUnit; // vector floating point unit + def G4Itineraries : ProcessorItineraries< - [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [], [ - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]> + [G4_IU1, G4_IU2, G4_SLU, G4_SRU, G4_BPU, G4_FPU1, + G4_VIU1, G4_VIU2, G4_VPU, G4_VFPU], [], [ + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> ]>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td index a4e82ce23e6f..1d9f13fcb850 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -11,78 +11,102 @@ // //===----------------------------------------------------------------------===// -def IU3 : FuncUnit; // integer unit 3 (7450 simple) -def IU4 : FuncUnit; // integer unit 4 (7450 simple) +def G4P_BPU : FuncUnit; // Branch unit +def G4P_SLU : FuncUnit; // Store/load unit +def G4P_SRU : FuncUnit; // special register unit +def G4P_IU1 : FuncUnit; // integer unit 1 (simple) +def G4P_IU2 : FuncUnit; // integer unit 2 (complex) +def G4P_IU3 : FuncUnit; // integer unit 3 (simple) +def G4P_IU4 : FuncUnit; // integer unit 4 (simple) +def G4P_FPU1 : FuncUnit; // floating point unit 1 +def G4P_VPU : FuncUnit; // vector permutation unit +def G4P_VIU1 : FuncUnit; // vector integer unit 1 (simple) +def G4P_VIU2 : FuncUnit; // vector integer unit 2 (complex) +def G4P_VFPU : FuncUnit; // vector floating point unit def G4PlusItineraries : ProcessorItineraries< - [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [], [ - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]> + [G4P_IU1, G4P_IU2, G4P_IU3, G4P_IU4, G4P_BPU, G4P_SLU, G4P_FPU1, + G4P_VFPU, G4P_VIU1, G4P_VIU2, G4P_VPU], [], [ + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> ]>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleG5.td index c64998d52a0c..a3b73ab4454f 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleG5.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleG5.td @@ -11,90 +11,110 @@ // //===----------------------------------------------------------------------===// +def G5_BPU : FuncUnit; // Branch unit +def G5_SLU : FuncUnit; // Store/load unit +def G5_SRU : FuncUnit; // special register unit +def G5_IU1 : FuncUnit; // integer unit 1 (simple) +def G5_IU2 : FuncUnit; // integer unit 2 (complex) +def G5_FPU1 : FuncUnit; // floating point unit 1 +def G5_FPU2 : FuncUnit; // floating point unit 2 +def G5_VPU : FuncUnit; // vector permutation unit +def G5_VIU1 : FuncUnit; // vector integer unit 1 (simple) +def G5_VIU2 : FuncUnit; // vector integer unit 2 (complex) +def G5_VFPU : FuncUnit; // vector floating point unit + def G5Itineraries : ProcessorItineraries< - [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [], [ - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, // needs work - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, // needs work - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]> + [G5_IU1, G5_IU2, G5_SLU, G5_BPU, G5_FPU1, G5_FPU2, + G5_VFPU, G5_VIU1, G5_VIU2, G5_VPU], [], [ + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, // needs work + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, // needs work + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> ]>; // ===---------------------------------------------------------------------===// -// e5500 machine model for scheduling and other instruction cost heuristics. +// G5 machine model for scheduling and other instruction cost heuristics. def G5Model : SchedMachineModel { let IssueWidth = 4; // 4 (non-branch) instructions are dispatched per cycle. diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleP7.td new file mode 100644 index 000000000000..a3670a55a049 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCScheduleP7.td @@ -0,0 +1,393 @@ +//===-- PPCScheduleP7.td - PPC P7 Scheduling Definitions ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the POWER7 processor. +// +//===----------------------------------------------------------------------===// + +// Primary reference: +// IBM POWER7 multicore server processor +// B. Sinharoy, et al. +// IBM J. Res. & Dev. (55) 3. May/June 2011. + +// Scheduling for the P7 involves tracking two types of resources: +// 1. The dispatch bundle slots +// 2. The functional unit resources + +// Dispatch units: +def P7_DU1 : FuncUnit; +def P7_DU2 : FuncUnit; +def P7_DU3 : FuncUnit; +def P7_DU4 : FuncUnit; +def P7_DU5 : FuncUnit; +def P7_DU6 : FuncUnit; + +def P7_LS1 : FuncUnit; // Load/Store pipeline 1 +def P7_LS2 : FuncUnit; // Load/Store pipeline 2 + +def P7_FX1 : FuncUnit; // FX pipeline 1 +def P7_FX2 : FuncUnit; // FX pipeline 2 + +// VS pipeline 1 (vector integer ops. always here) +def P7_VS1 : FuncUnit; // VS pipeline 1 +// VS pipeline 2 (128-bit stores and perms. here) +def P7_VS2 : FuncUnit; // VS pipeline 2 + +def P7_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs) +def P7_BRU : FuncUnit; // BR unit + +// Notes: +// Each LSU pipeline can also execute FX add and logical instructions. +// Each LSU pipeline can complete a load or store in one cycle. +// +// Each store is broken into two parts, AGEN goes to the LSU while a +// "data steering" op. goes to the FXU or VSU. +// +// FX loads have a two cycle load-to-use latency (so one "bubble" cycle). +// VSU loads have a three cycle load-to-use latency (so two "bubble" cycle). +// +// Frequent FX ops. take only one cycle and results can be used again in the +// next cycle (there is a self-bypass). Getting results from the other FX +// pipeline takes an additional cycle. +// +// The VSU XS is similar to the POWER6, but with a pipeline length of 2 cycles +// (instead of 3 cycles on the POWER6). VSU XS handles vector FX-style ops. +// Dispatch of an instruction to VS1 that uses four single prec. inputs +// (either to a float or XC op). prevents dispatch in that cycle to VS2 of any +// floating point instruction. +// +// The VSU PM is similar to the POWER6, but with a pipeline length of 3 cycles +// (instead of 4 cycles on the POWER6). vsel is handled by the PM pipeline +// (unlike on the POWER6). +// +// FMA from the VSUs can forward results in 6 cycles. VS1 XS and vector FP +// share the same write-back, and have a 5-cycle latency difference, so the +// IFU/IDU will not dispatch an XS instructon 5 cycles after a vector FP +// op. has been dispatched to VS1. +// +// Three cycles after an L1 cache hit, a dependent VSU instruction can issue. +// +// Instruction dispatch groups have (at most) four non-branch instructions, and +// two branches. Unlike on the POWER4/5, a branch does not automatically +// end the dispatch group, but a second branch must be the last in the group. + +def P7Itineraries : ProcessorItineraries< + [P7_DU1, P7_DU2, P7_DU3, P7_DU4, P7_DU5, P7_DU6, + P7_LS1, P7_LS2, P7_FX1, P7_FX2, P7_VS1, P7_VS2, P7_CRU, P7_BRU], [], [ + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2, + P7_LS1, P7_LS2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + // FIXME: Add record-form itinerary data. + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<36, [P7_FX1, P7_FX2]>], + [36, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<68, [P7_FX1, P7_FX2]>], + [68, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1]>, + InstrItinData, + InstrStage<1, [P7_BRU]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_CRU]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_BRU]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_BRU]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_LS1, P7_LS2]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [2, 2, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_LS1, P7_LS2]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [2, 2, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_LS1, P7_LS2]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_LS1, P7_LS2]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 4, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [4, 4, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_LS1, P7_LS2]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [2, 1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [2, 1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_FX1, P7_FX2], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [2, 1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_LS1, P7_LS2], 0>, + InstrStage<1, [P7_VS2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_LS1, P7_LS2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_DU2], 0>, + InstrStage<1, [P7_DU3], 0>, + InstrStage<1, [P7_DU4], 0>, + InstrStage<1, [P7_CRU]>, + InstrStage<1, [P7_FX1, P7_FX2]>], + [3, 1]>, // mtcr + InstrItinData, + InstrStage<1, [P7_CRU]>], + [6, 1]>, + InstrItinData, + InstrStage<1, [P7_CRU]>], + [3, 1]>, + InstrItinData, + InstrStage<1, [P7_FX1]>], + [4, 1]>, // mtctr + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [5, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [8, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [33, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [27, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [44, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [32, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [5, 1, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [5, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [6, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [6, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1, P7_VS2]>], + [6, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS1]>], + [7, 1, 1]>, + InstrItinData, + InstrStage<1, [P7_VS2]>], + [2, 1, 1]> +]>; + +// ===---------------------------------------------------------------------===// +// P7 machine model for scheduling and other instruction cost heuristics. + +def P7Model : SchedMachineModel { + let IssueWidth = 6; // 4 (non-branch) instructions are dispatched per cycle. + // Note that the dispatch bundle size is 6 (including + // branches), but the total internal issue bandwidth per + // cycle (from all queues) is 8. + + let MinLatency = 0; // Out-of-order dispatch. + let LoadLatency = 3; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + let MispredictPenalty = 16; + + let Itineraries = P7Itineraries; +} + diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 7231ab101a26..4de558377b62 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -179,7 +179,7 @@ bool PPCSubtarget::enablePostRAScheduler( return OptLevel >= CodeGenOpt::Default; } -// Embedded cores need aggressive scheduling. +// Embedded cores need aggressive scheduling (and some others also benefit). static bool needsAggressiveScheduling(unsigned Directive) { switch (Directive) { default: return false; @@ -187,6 +187,7 @@ static bool needsAggressiveScheduling(unsigned Directive) { case PPC::DIR_A2: case PPC::DIR_E500mc: case PPC::DIR_E5500: + case PPC::DIR_PWR7: return true; } } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCSubtarget.h b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCSubtarget.h index c863a6ecc777..ec8c82ad521c 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -126,22 +126,6 @@ public: /// selection. const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } - /// getDataLayoutString - Return the pointer size and type alignment - /// properties of this subtarget. - const char *getDataLayoutString() const { - // Note, the alignment values for f64 and i64 on ppc64 in Darwin - // documentation are wrong; these are correct (i.e. "what gcc does"). - if (isPPC64() && isSVR4ABI()) { - if (TargetTriple.getOS() == llvm::Triple::FreeBSD) - return "E-p:64:64-f64:64:64-i64:64:64-v128:128:128-n32:64"; - else - return "E-p:64:64-f64:64:64-i64:64:64-f128:128:128-v128:128:128-n32:64"; - } - - return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64" - : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32"; - } - /// \brief Reset the features for the PowerPC target. virtual void resetSubtargetFeatures(const MachineFunction *MF); private: diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index 9acefe53ce4a..2e8d2d67fddc 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -33,6 +33,36 @@ extern "C" void LLVMInitializePowerPCTarget() { RegisterTargetMachine C(ThePPC64LETarget); } +/// Return the datalayout string of a subtarget. +static std::string getDataLayoutString(const PPCSubtarget &ST) { + const Triple &T = ST.getTargetTriple(); + + // PPC is big endian. + std::string Ret = "E"; + + Ret += DataLayout::getManglingComponent(T); + + // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit + // pointers. + if (!ST.isPPC64() || T.getOS() == Triple::Lv2) + Ret += "-p:32:32"; + + // Note, the alignment values for f64 and i64 on ppc64 in Darwin + // documentation are wrong; these are correct (i.e. "what gcc does"). + if (ST.isPPC64() || ST.isSVR4ABI()) + Ret += "-i64:64"; + else + Ret += "-f64:32:64"; + + // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones. + if (ST.isPPC64()) + Ret += "-n32:64"; + else + Ret += "-n32"; + + return Ret; +} + PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -41,7 +71,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, bool is64Bit) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, is64Bit), - DL(Subtarget.getDataLayoutString()), InstrInfo(*this), + DL(getDataLayoutString(Subtarget)), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/TargetInfo/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/TargetInfo/CMakeLists.txt index fdb8a62b9d24..c9548c7fe0cd 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/TargetInfo/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/TargetInfo/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMPowerPCInfo PowerPCTargetInfo.cpp ) - -add_dependencies(LLVMPowerPCInfo PowerPCCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt index f77d85b15ab9..410234686400 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = PowerPCInfo parent = PowerPC -required_libraries = MC Support Target +required_libraries = Support add_to_library_groups = PowerPC diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPU.h b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPU.h index 025b28e32bfe..8eb1b695d76a 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPU.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPU.h @@ -28,11 +28,11 @@ class TargetMachine; FunctionPass *createR600VectorRegMerger(TargetMachine &tm); FunctionPass *createR600TextureIntrinsicsReplacer(); FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); -FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm); +FunctionPass *createR600EmitClauseMarkers(); FunctionPass *createR600ClauseMergePass(TargetMachine &tm); FunctionPass *createR600Packetizer(TargetMachine &tm); FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm); -FunctionPass *createAMDGPUCFGStructurizerPass(TargetMachine &tm); +FunctionPass *createAMDGPUCFGStructurizerPass(); // SI Passes FunctionPass *createSITypeRewriter(); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPU.td b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPU.td index 182235b27c48..36c415609152 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPU.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPU.td @@ -100,19 +100,9 @@ def AMDGPUInstrInfo : InstrInfo { let guessInstructionProperties = 1; } -//===----------------------------------------------------------------------===// -// Declare the target which we are implementing -//===----------------------------------------------------------------------===// -def AMDGPUAsmWriter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - int Variant = 0; - bit isMCAsmWriter = 1; -} - def AMDGPU : Target { // Pull in Instruction Info: let InstructionSet = AMDGPUInstrInfo; - let AssemblyWriters = [AMDGPUAsmWriter]; } // Include AMDGPU TD files diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp index 67bdba28787a..8aca57a6b479 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -46,8 +46,7 @@ extern "C" void LLVMInitializeR600AsmPrinter() { } AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) -{ + : AsmPrinter(TM, Streamer) { DisasmEnabled = TM.getSubtarget().dumpCode() && ! Streamer.hasRawTextSupport(); } @@ -56,6 +55,7 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) /// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle. bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { SetupMachineFunction(MF); + if (OutStreamer.hasRawTextSupport()) { OutStreamer.EmitRawText("@" + MF.getName() + ":"); } @@ -65,9 +65,12 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { ELF::SHT_PROGBITS, 0, SectionKind::getReadOnly()); OutStreamer.SwitchSection(ConfigSection); + const AMDGPUSubtarget &STM = TM.getSubtarget(); + SIProgramInfo KernelInfo; if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { - EmitProgramInfoSI(MF); + findNumUsedRegistersSI(MF, KernelInfo.NumSGPR, KernelInfo.NumVGPR); + EmitProgramInfoSI(MF, KernelInfo); } else { EmitProgramInfoR600(MF); } @@ -79,6 +82,19 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); EmitFunctionBody(); + if (isVerbose() && OutStreamer.hasRawTextSupport()) { + const MCSectionELF *CommentSection + = Context.getELFSection(".AMDGPU.csdata", + ELF::SHT_PROGBITS, 0, + SectionKind::getReadOnly()); + OutStreamer.SwitchSection(CommentSection); + + OutStreamer.EmitRawText( + Twine("; Kernel info:\n") + + "; NumSgprs: " + Twine(KernelInfo.NumSGPR) + "\n" + + "; NumVgprs: " + Twine(KernelInfo.NumVGPR) + "\n"); + } + if (STM.dumpCode()) { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) MF.dump(); @@ -166,8 +182,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) { } } -void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) { - const AMDGPUSubtarget &STM = TM.getSubtarget(); +void AMDGPUAsmPrinter::findNumUsedRegistersSI(MachineFunction &MF, + unsigned &NumSGPR, + unsigned &NumVGPR) const { unsigned MaxSGPR = 0; unsigned MaxVGPR = 0; bool VCCUsed = false; @@ -240,7 +257,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) { isSGPR = false; width = 16; } else { - assert(!"Unknown register class"); + llvm_unreachable("Unknown register class"); } hwReg = RI->getEncodingValue(reg) & 0xff; maxUsed = hwReg + width - 1; @@ -252,10 +269,24 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) { } } } - if (VCCUsed) { + + if (VCCUsed) MaxSGPR += 2; - } - SIMachineFunctionInfo * MFI = MF.getInfo(); + + NumSGPR = MaxSGPR; + NumVGPR = MaxVGPR; +} + +void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &Out, + MachineFunction &MF) const { + findNumUsedRegistersSI(MF, Out.NumSGPR, Out.NumVGPR); +} + +void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF, + const SIProgramInfo &KernelInfo) { + const AMDGPUSubtarget &STM = TM.getSubtarget(); + + SIMachineFunctionInfo *MFI = MF.getInfo(); unsigned RsrcReg; switch (MFI->ShaderType) { default: // Fall through @@ -266,7 +297,8 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) { } OutStreamer.EmitIntValue(RsrcReg, 4); - OutStreamer.EmitIntValue(S_00B028_VGPRS(MaxVGPR / 4) | S_00B028_SGPRS(MaxSGPR / 8), 4); + OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) | + S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4); unsigned LDSAlignShift; if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUAsmPrinter.h b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUAsmPrinter.h index 05dc9bb672d7..a2b833707609 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUAsmPrinter.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUAsmPrinter.h @@ -22,6 +22,22 @@ namespace llvm { class AMDGPUAsmPrinter : public AsmPrinter { +private: + struct SIProgramInfo { + SIProgramInfo() : NumSGPR(0), NumVGPR(0) {} + unsigned NumSGPR; + unsigned NumVGPR; + }; + + void getSIProgramInfo(SIProgramInfo &Out, MachineFunction &MF) const; + void findNumUsedRegistersSI(MachineFunction &MF, + unsigned &NumSGPR, + unsigned &NumVGPR) const; + + /// \brief Emit register usage information so that the GPU driver + /// can correctly setup the GPU state. + void EmitProgramInfoR600(MachineFunction &MF); + void EmitProgramInfoSI(MachineFunction &MF, const SIProgramInfo &KernelInfo); public: explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer); @@ -32,11 +48,6 @@ public: return "AMDGPU Assembly Printer"; } - /// \brief Emit register usage information so that the GPU driver - /// can correctly setup the GPU state. - void EmitProgramInfoR600(MachineFunction &MF); - void EmitProgramInfoSI(MachineFunction &MF); - /// Implemented in AMDGPUMCInstLower.cpp virtual void EmitInstruction(const MachineInstr *MI); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUISelLowering.cpp index c4d75ffa0d06..c59be7ce2430 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUISelLowering.cpp @@ -32,8 +32,9 @@ using namespace llvm; static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() / 8, ArgFlags.getOrigAlign()); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + unsigned Offset = State.AllocateStack(ValVT.getStoreSize(), + ArgFlags.getOrigAlign()); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return true; } @@ -59,6 +60,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FRINT, MVT::f32, Legal); setOperationAction(ISD::FROUND, MVT::f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::f32, Legal); // The hardware supports ROTR, but not ROTL setOperationAction(ISD::ROTL, MVT::i32, Expand); @@ -183,6 +185,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FADD, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); + setOperationAction(ISD::FTRUNC, VT, Expand); setOperationAction(ISD::FMUL, VT, Expand); setOperationAction(ISD::FRINT, VT, Expand); setOperationAction(ISD::FSQRT, VT, Expand); @@ -254,8 +257,8 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) switch (Op.getOpcode()) { default: Op.getNode()->dump(); - assert(0 && "Custom lowering code for this" - "instruction is not implemented yet!"); + llvm_unreachable("Custom lowering code for this" + "instruction is not implemented yet!"); break; // AMDIL DAG lowering case ISD::SDIV: return LowerSDIV(Op, DAG); @@ -455,7 +458,7 @@ SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, case ISD::SETTRUE2: case ISD::SETUO: case ISD::SETO: - assert(0 && "Operation should already be optimised !"); + llvm_unreachable("Operation should already be optimised!"); case ISD::SETULE: case ISD::SETULT: case ISD::SETOLE: @@ -479,7 +482,7 @@ SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); } case ISD::SETCC_INVALID: - assert(0 && "Invalid setcc condcode !"); + llvm_unreachable("Invalid setcc condcode!"); } return Op; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp index 4f7084b9202f..4bc90c0404e5 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp @@ -110,7 +110,7 @@ AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert(!"Not Implemented"); + llvm_unreachable("Not Implemented"); } void @@ -119,7 +119,7 @@ AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert(!"Not Implemented"); + llvm_unreachable("Not Implemented"); } bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUInstrInfo.h b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUInstrInfo.h index ce5b58c6923f..426910c8fc21 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUInstrInfo.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUInstrInfo.h @@ -78,18 +78,18 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const = 0; - void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; protected: MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp index 47617a72990d..8fbec4ec3783 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPURegisterInfo.cpp @@ -38,7 +38,7 @@ void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { - assert(!"Subroutines not supported yet"); + llvm_unreachable("Subroutines not supported yet"); } unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUSubtarget.cpp b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUSubtarget.cpp index 061793a68b9d..51d9eadbafe6 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUSubtarget.cpp @@ -80,40 +80,13 @@ AMDGPUSubtarget::isTargetELF() const { } size_t AMDGPUSubtarget::getDefaultSize(uint32_t dim) const { - if (dim > 3) { + if (dim > 2) { return 1; } else { return DefaultSize[dim]; } } -std::string -AMDGPUSubtarget::getDataLayout() const { - std::string DataLayout = std::string( - "e" - "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32" - "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128" - "-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048" - "-n32:64" - ); - - if (hasHWFP64()) { - DataLayout.append("-f64:64:64"); - } - - if (is64bit()) { - DataLayout.append("-p:64:64:64"); - } else { - DataLayout.append("-p:32:32:32"); - } - - if (Gen >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { - DataLayout.append("-p3:32:32:32"); - } - - return DataLayout; -} - std::string AMDGPUSubtarget::getDeviceName() const { return DevName; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUSubtarget.h b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUSubtarget.h index 4288d275c99e..060571e26b92 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUSubtarget.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUSubtarget.h @@ -75,7 +75,6 @@ public: // Helper functions to simplify if statements bool isTargetELF() const; - std::string getDataLayout() const; std::string getDeviceName() const; virtual size_t getDefaultSize(uint32_t dim) const; bool dumpCode() const { return DumpCode; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp index bc4f5d720ae2..1279665f16a8 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -42,13 +42,27 @@ extern "C" void LLVMInitializeR600Target() { } static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { - return new ScheduleDAGMI(C, new R600SchedStrategy()); + return new ScheduleDAGMILive(C, new R600SchedStrategy()); } static MachineSchedRegistry SchedCustomRegistry("r600", "Run R600's custom scheduler", createR600MachineScheduler); +static std::string computeDataLayout(const AMDGPUSubtarget &ST) { + std::string Ret = "e-p:32:32"; + + if (ST.is64bit()) { + // 32-bit private, local, and region pointers. 64-bit global and constant. + Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64"; + } + + Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256" + "-v512:512-v1024:1024-v2048:2048-n32:64"; + + return Ret; +} + AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, TargetOptions Options, @@ -58,7 +72,7 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel), Subtarget(TT, CPU, FS), - Layout(Subtarget.getDataLayout()), + Layout(computeDataLayout(Subtarget)), FrameLowering(TargetFrameLowering::StackGrowsUp, 64 * 16 // Maximum stack alignment (long16) , 0), @@ -72,6 +86,7 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, InstrInfo.reset(new SIInstrInfo(*this)); TLInfo.reset(new SITargetLowering(*this)); } + setRequiresStructuredCFG(true); initAsmInfo(); } @@ -167,7 +182,7 @@ bool AMDGPUPassConfig::addPreSched2() { const AMDGPUSubtarget &ST = TM->getSubtarget(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) - addPass(createR600EmitClauseMarkers(*TM)); + addPass(createR600EmitClauseMarkers()); if (ST.isIfCvtEnabled()) addPass(&IfConverterID); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) @@ -178,7 +193,7 @@ bool AMDGPUPassConfig::addPreSched2() { bool AMDGPUPassConfig::addPreEmitPass() { const AMDGPUSubtarget &ST = TM->getSubtarget(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - addPass(createAMDGPUCFGStructurizerPass(*TM)); + addPass(createAMDGPUCFGStructurizerPass()); addPass(createR600ExpandSpecialInstrsPass(*TM)); addPass(&FinalizeMachineBundlesID); addPass(createR600Packetizer(*TM)); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp index 507570fdcaa2..92ce82fc227d 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/AMDILCFGStructurizer.cpp @@ -54,6 +54,10 @@ STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue " STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); +namespace llvm { + void initializeAMDGPUCFGStructurizerPass(PassRegistry&); +} + //===----------------------------------------------------------------------===// // // Miscellaneous utility for CFGStructurizer. @@ -131,13 +135,13 @@ public: static char ID; - AMDGPUCFGStructurizer(TargetMachine &tm) : - MachineFunctionPass(ID), TM(tm), - TII(static_cast(tm.getInstrInfo())), - TRI(&TII->getRegisterInfo()) { } + AMDGPUCFGStructurizer() : + MachineFunctionPass(ID), TII(NULL), TRI(NULL) { + initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry()); + } const char *getPassName() const { - return "AMD IL Control Flow Graph structurizer Pass"; + return "AMDGPU Control Flow Graph structurizer Pass"; } void getAnalysisUsage(AnalysisUsage &AU) const { @@ -157,6 +161,8 @@ public: bool prepare(); bool runOnMachineFunction(MachineFunction &MF) { + TII = static_cast(MF.getTarget().getInstrInfo()); + TRI = &TII->getRegisterInfo(); DEBUG(MF.dump();); OrderedBlks.clear(); FuncRep = &MF; @@ -173,7 +179,6 @@ public: } protected: - TargetMachine &TM; MachineDominatorTree *MDT; MachinePostDominatorTree *PDT; MachineLoopInfo *MLI; @@ -1899,6 +1904,14 @@ char AMDGPUCFGStructurizer::ID = 0; } // end anonymous namespace -FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm) { - return new AMDGPUCFGStructurizer(tm); +INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer", + "AMDGPU CFG Structurizer", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer", + "AMDGPU CFG Structurizer", false, false) + +FunctionPass *llvm::createAMDGPUCFGStructurizerPass() { + return new AMDGPUCFGStructurizer(); } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/R600/CMakeLists.txt index 9f8f6a83e459..93a51179754f 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/CMakeLists.txt @@ -50,8 +50,6 @@ add_llvm_target(R600CodeGen SITypeRewriter.cpp ) -add_dependencies(LLVMR600CodeGen AMDGPUCommonTableGen intrinsics_gen) - add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/InstPrinter/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/R600/InstPrinter/CMakeLists.txt index 069c55ba948e..dcd87037fabb 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/InstPrinter/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/InstPrinter/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMR600AsmPrinter AMDGPUInstPrinter.cpp ) - -add_dependencies(LLVMR600AsmPrinter AMDGPUCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/R600/LLVMBuild.txt index f2a7554e5269..408ed758dbed 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/LLVMBuild.txt @@ -28,5 +28,5 @@ has_asmprinter = 1 type = Library name = R600CodeGen parent = R600 -required_libraries = AsmPrinter CodeGen Core SelectionDAG Support Target MC R600AsmPrinter R600Desc R600Info +required_libraries = Analysis AsmPrinter CodeGen Core MC R600AsmPrinter R600Desc R600Info Scalar SelectionDAG Support Target TransformUtils add_to_library_groups = R600 diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 4a8e1b0b2d86..227da41ebdc5 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -13,7 +13,6 @@ using namespace llvm; AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() { HasSingleParameterDotFile = false; - WeakDefDirective = 0; //===------------------------------------------------------------------===// HasSubsectionsViaSymbols = true; HasMachoZeroFillDirective = false; @@ -25,9 +24,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() { CommentColumn = 40; CommentString = ";"; LabelSuffix = ":"; - GlobalPrefix = "@"; - PrivateGlobalPrefix = ";."; - LinkerPrivateGlobalPrefix = "!"; InlineAsmStart = ";#ASMSTART"; InlineAsmEnd = ";#ASMEND"; AssemblerDialect = 0; @@ -43,7 +39,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() { GPRel32Directive = 0; SunStyleELFSectionSwitchSyntax = true; UsesELFSectionDirectiveForBSS = true; - HasMicrosoftFastStdCallMangling = false; //===--- Alignment Information ----------------------------------------===// AlignDirective = ".align\t"; @@ -58,7 +53,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() { HasDotTypeDotSizeDirective = false; HasNoDeadStrip = true; WeakRefDirective = ".weakref\t"; - LinkOnceDirective = 0; //===--- Dwarf Emission Directives -----------------------------------===// HasLEB128 = true; SupportsDebugInformation = true; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/MCTargetDesc/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/R600/MCTargetDesc/CMakeLists.txt index 98f6925d9fb2..801c9054937d 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/MCTargetDesc/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/MCTargetDesc/CMakeLists.txt @@ -8,5 +8,3 @@ add_llvm_library(LLVMR600Desc R600MCCodeEmitter.cpp SIMCCodeEmitter.cpp ) - -add_dependencies(LLVMR600Desc AMDGPUCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/Processors.td b/external/bsd/llvm/dist/llvm/lib/Target/R600/Processors.td index ee190e4aed7b..5499a20dfc02 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/Processors.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/Processors.td @@ -9,46 +9,94 @@ class Proc Features> : Processor; + +//===----------------------------------------------------------------------===// +// R600 +//===----------------------------------------------------------------------===// def : Proc<"", R600_VLIW5_Itin, [FeatureR600, FeatureVertexCache]>; + def : Proc<"r600", R600_VLIW5_Itin, [FeatureR600 , FeatureVertexCache]>; + def : Proc<"rs880", R600_VLIW5_Itin, [FeatureR600]>; + def : Proc<"rv670", R600_VLIW5_Itin, [FeatureR600, FeatureFP64, FeatureVertexCache]>; + +//===----------------------------------------------------------------------===// +// R700 +//===----------------------------------------------------------------------===// + def : Proc<"rv710", R600_VLIW5_Itin, [FeatureR700, FeatureVertexCache]>; + def : Proc<"rv730", R600_VLIW5_Itin, [FeatureR700, FeatureVertexCache]>; + def : Proc<"rv770", R600_VLIW5_Itin, [FeatureR700, FeatureFP64, FeatureVertexCache]>; + +//===----------------------------------------------------------------------===// +// Evergreen +//===----------------------------------------------------------------------===// + def : Proc<"cedar", R600_VLIW5_Itin, [FeatureEvergreen, FeatureVertexCache]>; + def : Proc<"redwood", R600_VLIW5_Itin, [FeatureEvergreen, FeatureVertexCache]>; + def : Proc<"sumo", R600_VLIW5_Itin, [FeatureEvergreen]>; + def : Proc<"juniper", R600_VLIW5_Itin, [FeatureEvergreen, FeatureVertexCache]>; + def : Proc<"cypress", R600_VLIW5_Itin, [FeatureEvergreen, FeatureFP64, FeatureVertexCache]>; + +//===----------------------------------------------------------------------===// +// Northern Islands +//===----------------------------------------------------------------------===// + def : Proc<"barts", R600_VLIW5_Itin, [FeatureNorthernIslands, FeatureVertexCache]>; + def : Proc<"turks", R600_VLIW5_Itin, [FeatureNorthernIslands, FeatureVertexCache]>; + def : Proc<"caicos", R600_VLIW5_Itin, [FeatureNorthernIslands]>; + def : Proc<"cayman", R600_VLIW4_Itin, [FeatureNorthernIslands, FeatureFP64, FeatureCaymanISA]>; +//===----------------------------------------------------------------------===// +// Southern Islands +//===----------------------------------------------------------------------===// + def : Proc<"SI", SI_Itin, [FeatureSouthernIslands]>; + def : Proc<"tahiti", SI_Itin, [FeatureSouthernIslands]>; + def : Proc<"pitcairn", SI_Itin, [FeatureSouthernIslands]>; + def : Proc<"verde", SI_Itin, [FeatureSouthernIslands]>; + def : Proc<"oland", SI_Itin, [FeatureSouthernIslands]>; + def : Proc<"hainan", SI_Itin, [FeatureSouthernIslands]>; + +//===----------------------------------------------------------------------===// +// Sea Islands +//===----------------------------------------------------------------------===// + def : Proc<"bonaire", SI_Itin, [FeatureSeaIslands]>; + def : Proc<"kabini", SI_Itin, [FeatureSeaIslands]>; + def : Proc<"kaveri", SI_Itin, [FeatureSeaIslands]>; + def : Proc<"hawaii", SI_Itin, [FeatureSeaIslands]>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp b/external/bsd/llvm/dist/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp index ac3d8f63d57f..ec39e097ba95 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -332,6 +332,7 @@ public: unsigned MaxStack = 0; unsigned CurrentStack = 0; + unsigned CurrentLoopDepth = 0; bool HasPush = false; for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; ++MB) { @@ -370,6 +371,13 @@ public: CurrentStack++; MaxStack = std::max(MaxStack, CurrentStack); HasPush = true; + if (ST.hasCaymanISA() && CurrentLoopDepth > 1) { + BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_CM)) + .addImm(CfCount + 1) + .addImm(1); + MI->setDesc(TII->get(AMDGPU::CF_ALU)); + CfCount++; + } case AMDGPU::CF_ALU: I = MI; AluClauses.push_back(MakeALUClause(MBB, I)); @@ -378,6 +386,7 @@ public: break; case AMDGPU::WHILELOOP: { CurrentStack+=4; + CurrentLoopDepth++; MaxStack = std::max(MaxStack, CurrentStack); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_WHILE_LOOP)) @@ -392,6 +401,7 @@ public: } case AMDGPU::ENDLOOP: { CurrentStack-=4; + CurrentLoopDepth--; std::pair > Pair = LoopStack.back(); LoopStack.pop_back(); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/R600EmitClauseMarkers.cpp b/external/bsd/llvm/dist/llvm/lib/Target/R600/R600EmitClauseMarkers.cpp index 1bbfd2b68f3d..5bd793a3d35d 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/R600EmitClauseMarkers.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/R600EmitClauseMarkers.cpp @@ -25,12 +25,15 @@ using namespace llvm; +namespace llvm { + void initializeR600EmitClauseMarkersPass(PassRegistry&); +} + namespace { -class R600EmitClauseMarkersPass : public MachineFunctionPass { +class R600EmitClauseMarkers : public MachineFunctionPass { private: - static char ID; const R600InstrInfo *TII; int Address; @@ -287,8 +290,11 @@ private: } public: - R600EmitClauseMarkersPass(TargetMachine &tm) : MachineFunctionPass(ID), - TII(0), Address(0) { } + static char ID; + R600EmitClauseMarkers() : MachineFunctionPass(ID), TII(0), Address(0) { + + initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnMachineFunction(MachineFunction &MF) { TII = static_cast(MF.getTarget().getInstrInfo()); @@ -314,12 +320,16 @@ public: } }; -char R600EmitClauseMarkersPass::ID = 0; +char R600EmitClauseMarkers::ID = 0; } // end anonymous namespace +INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers", + "R600 Emit Clause Markters", false, false) +INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers", + "R600 Emit Clause Markters", false, false) -llvm::FunctionPass *llvm::createR600EmitClauseMarkers(TargetMachine &TM) { - return new R600EmitClauseMarkersPass(TM); +llvm::FunctionPass *llvm::createR600EmitClauseMarkers() { + return new R600EmitClauseMarkers(); } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/external/bsd/llvm/dist/llvm/lib/Target/R600/R600ExpandSpecialInstrs.cpp index aeee4aa89562..0be491c30491 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/R600ExpandSpecialInstrs.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/R600ExpandSpecialInstrs.cpp @@ -33,8 +33,8 @@ private: static char ID; const R600InstrInfo *TII; - bool ExpandInputPerspective(MachineInstr& MI); - bool ExpandInputConstant(MachineInstr& MI); + void SetFlagInNewMI(MachineInstr *NewMI, const MachineInstr *OldMI, + unsigned Op); public: R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), @@ -55,6 +55,15 @@ FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { return new R600ExpandSpecialInstrsPass(TM); } +void R600ExpandSpecialInstrsPass::SetFlagInNewMI(MachineInstr *NewMI, + const MachineInstr *OldMI, unsigned Op) { + int OpIdx = TII->getOperandIdx(*OldMI, Op); + if (OpIdx > -1) { + uint64_t Val = OldMI->getOperand(OpIdx).getImm(); + TII->setImmOperand(NewMI, Op, Val); + } +} + bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { TII = static_cast(MF.getTarget().getInstrInfo()); @@ -325,6 +334,12 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { if (NotLast) { TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); } + SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::clamp); + SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::literal); + SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_abs); + SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_abs); + SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_neg); + SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_neg); } MI.eraseFromParent(); } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/R600ISelLowering.cpp b/external/bsd/llvm/dist/llvm/lib/Target/R600/R600ISelLowering.cpp index 0fcb488672f8..8d71919704df 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/R600ISelLowering.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/R600ISelLowering.cpp @@ -977,7 +977,7 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const HWFalse = DAG.getConstant(0, CompareVT); } else { - assert(!"Unhandled value type in LowerSELECT_CC"); + llvm_unreachable("Unhandled value type in LowerSELECT_CC"); } // Lower this unsupported SELECT_CC into a combination of two supported @@ -1099,7 +1099,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { Ptr, DAG.getConstant(2, MVT::i32))); if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) { - assert(!"Truncated and indexed stores not supported yet"); + llvm_unreachable("Truncated and indexed stores not supported yet"); } else { Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); } @@ -1239,7 +1239,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const } Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements); } else { - // non constant ptr cant be folded, keeps it as a v4f32 load + // non-constant ptr can't be folded, keeps it as a v4f32 load Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32, DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)), DAG.getConstant(LoadNode->getAddressSpace() - @@ -1442,17 +1442,20 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, VectorEntry.getOperand(3) }; bool isUnmovable[4] = { false, false, false, false }; - for (unsigned i = 0; i < 4; i++) + for (unsigned i = 0; i < 4; i++) { RemapSwizzle[i] = i; + if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + unsigned Idx = dyn_cast(NewBldVec[i].getOperand(1)) + ->getZExtValue(); + if (i == Idx) + isUnmovable[Idx] = true; + } + } for (unsigned i = 0; i < 4; i++) { if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { unsigned Idx = dyn_cast(NewBldVec[i].getOperand(1)) ->getZExtValue(); - if (i == Idx) { - isUnmovable[Idx] = true; - continue; - } if (isUnmovable[Idx]) continue; // Swap i and Idx diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/R600Instructions.td b/external/bsd/llvm/dist/llvm/lib/Target/R600/R600Instructions.td index 0346e24ab771..34bbdd9fa5b4 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/R600Instructions.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/R600Instructions.td @@ -733,6 +733,9 @@ def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>; def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>; def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>; +// Add also ftrunc intrinsic pattern +def : Pat<(ftrunc f32:$src0), (TRUNC $src0)>; + def MOV : R600_1OP <0x19, "MOV", []>; let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { @@ -1867,6 +1870,10 @@ def : Pat < let COUNT = 0; } + def CF_PUSH_CM : CF_CLAUSE_EG<11, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "PUSH @$ADDR POP:$POP_COUNT"> { + let COUNT = 0; + } + def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; class RAT_STORE_DWORD mask> : diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/R600MachineScheduler.cpp b/external/bsd/llvm/dist/llvm/lib/Target/R600/R600MachineScheduler.cpp index da2a4d862e7d..d3ffb506f1ba 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/R600MachineScheduler.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/R600MachineScheduler.cpp @@ -24,8 +24,8 @@ using namespace llvm; void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { - - DAG = dag; + assert(dag->hasVRegLiveness() && "R600SchedStrategy needs vreg liveness"); + DAG = static_cast(dag); TII = static_cast(DAG->TII); TRI = static_cast(DAG->TRI); VLIW5 = !DAG->MF.getTarget().getSubtarget().hasCaymanISA(); @@ -72,7 +72,7 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { // OpenCL Programming Guide : // The approx. number of WF that allows TEX inst to hide ALU inst is : // 500 (cycles for TEX) / (AluFetchRatio * 8 (cycles for ALU)) - float ALUFetchRationEstimate = + float ALUFetchRationEstimate = (AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) / (FetchInstCount + Available[IDFetch].size()); unsigned NeededWF = 62.5f / ALUFetchRationEstimate; @@ -464,4 +464,3 @@ SUnit* R600SchedStrategy::pickOther(int QID) { } return SU; } - diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/R600MachineScheduler.h b/external/bsd/llvm/dist/llvm/lib/Target/R600/R600MachineScheduler.h index 97c8cdec0aae..b909ff71a692 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/R600MachineScheduler.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/R600MachineScheduler.h @@ -26,7 +26,7 @@ namespace llvm { class R600SchedStrategy : public MachineSchedStrategy { - const ScheduleDAGMI *DAG; + const ScheduleDAGMILive *DAG; const R600InstrInfo *TII; const R600RegisterInfo *TRI; MachineRegisterInfo *MRI; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/external/bsd/llvm/dist/llvm/lib/Target/R600/R600OptimizeVectorRegisters.cpp index cf719c0b9fe1..8e0946ed2528 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/R600OptimizeVectorRegisters.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/R600OptimizeVectorRegisters.cpp @@ -63,7 +63,7 @@ public: DenseMap RegToChan; std::vector UndefReg; RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) { - assert (MI->getOpcode() == AMDGPU::REG_SEQUENCE); + assert(MI->getOpcode() == AMDGPU::REG_SEQUENCE); for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) { MachineOperand &MO = Instr->getOperand(i); unsigned Chan = Instr->getOperand(i + 1).getImm(); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp b/external/bsd/llvm/dist/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp index 6bbdf59d559b..5240c48dd166 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/SIAnnotateControlFlow.cpp @@ -205,7 +205,7 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) { void SIAnnotateControlFlow::handleLoopCondition(Value *Cond) { if (PHINode *Phi = dyn_cast(Cond)) { - // Handle all non constant incoming values first + // Handle all non-constant incoming values first for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) { Value *Incoming = Phi->getIncomingValue(i); if (isa(Incoming)) @@ -253,7 +253,7 @@ void SIAnnotateControlFlow::handleLoopCondition(Value *Cond) { PhiInserter.AddAvailableValue(Parent, Ret); } else { - assert(0 && "Unhandled loop condition!"); + llvm_unreachable("Unhandled loop condition!"); } } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/SIISelLowering.cpp b/external/bsd/llvm/dist/llvm/lib/Target/R600/SIISelLowering.cpp index d5d2b68caf0a..4fb844439aba 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/SIISelLowering.cpp @@ -137,7 +137,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::FrameIndex, MVT::i64, Custom); + setOperationAction(ISD::FrameIndex, MVT::i32, Custom); setTargetDAGCombine(ISD::SELECT_CC); @@ -704,9 +704,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) return SDValue(); - SDValue TruncPtr = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, - Load->getBasePtr(), DAG.getConstant(0, MVT::i32)); - SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr, + SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(), DAG.getConstant(2, MVT::i32)); SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(), @@ -793,8 +791,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { if (Store->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) return SDValue(); - SDValue TruncPtr = DAG.getZExtOrTrunc(Store->getBasePtr(), DL, MVT::i32); - SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr, + SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Store->getBasePtr(), DAG.getConstant(2, MVT::i32)); SDValue Chain = Store->getChain(); SmallVector Values; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/SIInstrInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/R600/SIInstrInfo.cpp index ab55c1b173ce..68292afc9a47 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/SIInstrInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/SIInstrInfo.cpp @@ -16,6 +16,7 @@ #include "SIInstrInfo.h" #include "AMDGPUTargetMachine.h" #include "SIDefines.h" +#include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrDesc.h" @@ -185,6 +186,67 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const { return Opcode; } +void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, + int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo(); + DebugLoc DL = MBB.findDebugLoc(MI); + unsigned KillFlag = isKill ? RegState::Kill : 0; + + if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) { + unsigned Lane = MFI->SpillTracker.getNextLane(MRI); + BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), + MFI->SpillTracker.LaneVGPR) + .addReg(SrcReg, KillFlag) + .addImm(Lane); + MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, + Lane); + } else { + for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) { + unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(MBB, MI, MBB.findDebugLoc(MI), get(AMDGPU::COPY), SubReg) + .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); + storeRegToStackSlot(MBB, MI, SubReg, isKill, FrameIndex + i, + &AMDGPU::SReg_32RegClass, TRI); + } + } +} + +void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo(); + DebugLoc DL = MBB.findDebugLoc(MI); + if (TRI->getCommonSubClass(RC, &AMDGPU::SReg_32RegClass)) { + SIMachineFunctionInfo::SpilledReg Spill = + MFI->SpillTracker.getSpilledReg(FrameIndex); + assert(Spill.VGPR); + BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), DestReg) + .addReg(Spill.VGPR) + .addImm(Spill.Lane); + } else { + for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) { + unsigned Flags = RegState::Define; + if (i == 0) { + Flags |= RegState::Undef; + } + unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + loadRegFromStackSlot(MBB, MI, SubReg, FrameIndex + i, + &AMDGPU::SReg_32RegClass, TRI); + BuildMI(MBB, MI, DL, get(AMDGPU::COPY)) + .addReg(DestReg, Flags, RI.getSubRegFromChannel(i)) + .addReg(SubReg); + } + } +} + MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { @@ -213,8 +275,10 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, return 0; unsigned Reg = MI->getOperand(1).getReg(); + unsigned SubReg = MI->getOperand(1).getSubReg(); MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm()); MI->getOperand(2).ChangeToRegister(Reg, false); + MI->getOperand(2).setSubReg(SubReg); } else { MI = TargetInstrInfo::commuteInstruction(MI, NewMI); } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/SIInstrInfo.h b/external/bsd/llvm/dist/llvm/lib/Target/R600/SIInstrInfo.h index 4af63481e3af..19bcf0cd946f 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/SIInstrInfo.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/SIInstrInfo.h @@ -43,12 +43,27 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const; + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + unsigned commuteOpcode(unsigned Opcode) const; virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI=false) const; - virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;} + virtual unsigned getIEQOpcode() const { + llvm_unreachable("Unimplemented"); + } + MachineInstr *buildMovInstr(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, unsigned DstReg, unsigned SrcReg) const; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/SIInstrInfo.td b/external/bsd/llvm/dist/llvm/lib/Target/R600/SIInstrInfo.td index 4cd0daa55c5f..d0cc7ce78611 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/SIInstrInfo.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/SIInstrInfo.td @@ -121,7 +121,7 @@ class SGPRImm : PatLeaf; -def FRAMEri64 : Operand { +def FRAMEri32 : Operand { let MIOperandInfo = (ops SReg_32:$ptr, i32imm:$index); } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/SIInstructions.td b/external/bsd/llvm/dist/llvm/lib/Target/R600/SIInstructions.td index 76f05eb49655..3baa4cd33a06 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/SIInstructions.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/SIInstructions.td @@ -876,8 +876,21 @@ def : Pat < $src2), sub1) >; -defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>; -defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>; +def V_READLANE_B32 : VOP2 < + 0x00000001, + (outs SReg_32:$vdst), + (ins VReg_32:$src0, SSrc_32:$vsrc1), + "V_READLANE_B32 $vdst, $src0, $vsrc1", + [] +>; + +def V_WRITELANE_B32 : VOP2 < + 0x00000002, + (outs VReg_32:$vdst), + (ins SReg_32:$src0, SSrc_32:$vsrc1), + "V_WRITELANE_B32 $vdst, $src0, $vsrc1", + [] +>; let isCommutable = 1 in { defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", @@ -1306,7 +1319,7 @@ def SI_END_CF : InstSI < def SI_KILL : InstSI < (outs), (ins VReg_32:$src), - "SI_KIL $src", + "SI_KILL $src", [(int_AMDGPU_kill f32:$src)] >; @@ -1315,13 +1328,13 @@ def SI_KILL : InstSI < let Uses = [EXEC], Defs = [EXEC,VCC,M0] in { -//defm SI_ : RegisterLoadStore ; +//defm SI_ : RegisterLoadStore ; let UseNamedOperandTable = 1 in { def SI_RegisterLoad : AMDGPUShaderInst < (outs VReg_32:$dst, SReg_64:$temp), - (ins FRAMEri64:$addr, i32imm:$chan), + (ins FRAMEri32:$addr, i32imm:$chan), "", [] > { let isRegisterLoad = 1; @@ -1330,7 +1343,7 @@ def SI_RegisterLoad : AMDGPUShaderInst < class SIRegStore : AMDGPUShaderInst < outs, - (ins VReg_32:$val, FRAMEri64:$addr, i32imm:$chan), + (ins VReg_32:$val, FRAMEri32:$addr, i32imm:$chan), "", [] > { let isRegisterStore = 1; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/SILowerControlFlow.cpp b/external/bsd/llvm/dist/llvm/lib/Target/R600/SILowerControlFlow.cpp index 958763dffc22..8c12e1357f03 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/SILowerControlFlow.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/SILowerControlFlow.cpp @@ -283,16 +283,11 @@ void SILowerControlFlowPass::EndCf(MachineInstr &MI) { } void SILowerControlFlowPass::Branch(MachineInstr &MI) { - MachineBasicBlock *Next = MI.getParent()->getNextNode(); - MachineBasicBlock *Target = MI.getOperand(0).getMBB(); - if (Target == Next) - MI.eraseFromParent(); - else - assert(0); + assert(MI.getOperand(0).getMBB() == MI.getParent()->getNextNode()); + MI.eraseFromParent(); } void SILowerControlFlowPass::Kill(MachineInstr &MI) { - MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp index 071f9fa43a16..ea04346e5097 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp @@ -10,6 +10,10 @@ #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +#define MAX_LANES 64 using namespace llvm; @@ -19,4 +23,33 @@ void SIMachineFunctionInfo::anchor() {} SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), - PSInputAddr(0) { } + PSInputAddr(0), + SpillTracker() { } + +static unsigned createLaneVGPR(MachineRegisterInfo &MRI) { + return MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); +} + +unsigned SIMachineFunctionInfo::RegSpillTracker::getNextLane(MachineRegisterInfo &MRI) { + if (!LaneVGPR) { + LaneVGPR = createLaneVGPR(MRI); + } else { + CurrentLane++; + if (CurrentLane == MAX_LANES) { + CurrentLane = 0; + LaneVGPR = createLaneVGPR(MRI); + } + } + return CurrentLane; +} + +void SIMachineFunctionInfo::RegSpillTracker::addSpilledReg(unsigned FrameIndex, + unsigned Reg, + int Lane) { + SpilledRegisters[FrameIndex] = SpilledReg(Reg, Lane); +} + +const SIMachineFunctionInfo::SpilledReg& +SIMachineFunctionInfo::RegSpillTracker::getSpilledReg(unsigned FrameIndex) { + return SpilledRegisters[FrameIndex]; +} diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/SIMachineFunctionInfo.h b/external/bsd/llvm/dist/llvm/lib/Target/R600/SIMachineFunctionInfo.h index 2f1961cafdb4..8dc82a0b4253 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/SIMachineFunctionInfo.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/SIMachineFunctionInfo.h @@ -16,16 +16,44 @@ #define SIMACHINEFUNCTIONINFO_H_ #include "AMDGPUMachineFunction.h" +#include namespace llvm { +class MachineRegisterInfo; + /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which /// tells the hardware which interpolation parameters to load. class SIMachineFunctionInfo : public AMDGPUMachineFunction { virtual void anchor(); public: + + struct SpilledReg { + unsigned VGPR; + int Lane; + SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { } + SpilledReg() : VGPR(0), Lane(-1) { } + bool hasLane() { return Lane != -1;} + }; + + struct RegSpillTracker { + private: + unsigned CurrentLane; + std::map SpilledRegisters; + public: + unsigned LaneVGPR; + RegSpillTracker() : CurrentLane(0), SpilledRegisters(), LaneVGPR(0) { } + unsigned getNextLane(MachineRegisterInfo &MRI); + void addSpilledReg(unsigned FrameIndex, unsigned Reg, int Lane = -1); + const SpilledReg& getSpilledReg(unsigned FrameIndex); + bool programSpillsRegisters() { return !SpilledRegisters.empty(); } + }; + + // SIMachineFunctionInfo definition + SIMachineFunctionInfo(const MachineFunction &MF); unsigned PSInputAddr; + struct RegSpillTracker SpillTracker; }; } // End namespace llvm diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/TargetInfo/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/R600/TargetInfo/CMakeLists.txt index 3d1584eba346..c3bd26c7a893 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/TargetInfo/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/TargetInfo/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMR600Info AMDGPUTargetInfo.cpp ) - -add_dependencies(LLVMR600Info AMDGPUCommonTableGen intrinsics_gen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/R600/TargetInfo/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/R600/TargetInfo/LLVMBuild.txt index 4c6fea4aa08c..c3d3cf51cc8e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/R600/TargetInfo/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/R600/TargetInfo/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = R600Info parent = R600 -required_libraries = MC Support +required_libraries = Support add_to_library_groups = R600 diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/CMakeLists.txt index 6339394eab65..3aad2723abaa 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/CMakeLists.txt @@ -23,9 +23,9 @@ add_llvm_target(SparcCodeGen SparcSelectionDAGInfo.cpp SparcJITInfo.cpp SparcCodeEmitter.cpp + SparcMCInstLower.cpp ) -add_dependencies(LLVMSparcCodeGen SparcCommonTableGen intrinsics_gen) - add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) +add_subdirectory(InstPrinter) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/InstPrinter/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/InstPrinter/CMakeLists.txt new file mode 100644 index 000000000000..a285a8337a81 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/InstPrinter/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_library(LLVMSparcAsmPrinter + SparcInstPrinter.cpp + ) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/InstPrinter/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/InstPrinter/LLVMBuild.txt new file mode 100644 index 000000000000..b4c8802b6a38 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/InstPrinter/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/Sparc/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = SparcAsmPrinter +parent = Sparc +required_libraries = MC Support +add_to_library_groups = Sparc diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/InstPrinter/Makefile b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/InstPrinter/Makefile new file mode 100644 index 000000000000..2dabd82965f4 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/InstPrinter/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/Sparc/InstPrinter/Makefile ---------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMSparcAsmPrinter + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp new file mode 100644 index 000000000000..d7bee72b747c --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp @@ -0,0 +1,94 @@ +//===-- SparcInstPrinter.cpp - Convert Sparc MCInst to assembly syntax -----==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an Sparc MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "SparcInstPrinter.h" + +#include "Sparc.h" +#include "MCTargetDesc/SparcBaseInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define GET_INSTRUCTION_NAME +#include "SparcGenAsmWriter.inc" + +void SparcInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const +{ + OS << '%' << StringRef(getRegisterName(RegNo)).lower(); +} + +void SparcInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) +{ + printInstruction(MI, O); + printAnnotation(O, Annot); +} + +void SparcInstPrinter::printOperand(const MCInst *MI, int opNum, + raw_ostream &O) +{ + const MCOperand &MO = MI->getOperand (opNum); + + if (MO.isReg()) { + printRegName(O, MO.getReg()); + return ; + } + + if (MO.isImm()) { + O << (int)MO.getImm(); + return; + } + + assert(MO.isExpr() && "Unknown operand kind in printOperand"); + MO.getExpr()->print(O); +} + +void SparcInstPrinter::printMemOperand(const MCInst *MI, int opNum, + raw_ostream &O, const char *Modifier) +{ + printOperand(MI, opNum, O); + + // If this is an ADD operand, emit it like normal operands. + if (Modifier && !strcmp(Modifier, "arith")) { + O << ", "; + printOperand(MI, opNum+1, O); + return; + } + const MCOperand &MO = MI->getOperand(opNum+1); + + if (MO.isReg() && MO.getReg() == SP::G0) + return; // don't print "+%g0" + if (MO.isImm() && MO.getImm() == 0) + return; // don't print "+0" + + O << "+"; + + printOperand(MI, opNum+1, O); +} + +void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum, + raw_ostream &O) +{ + int CC = (int)MI->getOperand(opNum).getImm(); + O << SPARCCondCodeToString((SPCC::CondCodes)CC); +} + +bool SparcInstPrinter::printGetPCX(const MCInst *MI, unsigned opNum, + raw_ostream &O) +{ + assert(0 && "FIXME: Implement SparcInstPrinter::printGetPCX."); + return true; +} diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h new file mode 100644 index 000000000000..c6b57f826b29 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h @@ -0,0 +1,46 @@ +//===-- SparcInstPrinter.h - Convert Sparc MCInst to assembly syntax ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an Sparc MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef SparcINSTPRINTER_H +#define SparcINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + +class MCOperand; + +class SparcInstPrinter : public MCInstPrinter { +public: + SparcInstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} + + virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + void printOperand(const MCInst *MI, int opNum, raw_ostream &OS); + void printMemOperand(const MCInst *MI, int opNum, raw_ostream &OS, + const char *Modifier = 0); + void printCCOperand(const MCInst *MI, int opNum, raw_ostream &OS); + bool printGetPCX(const MCInst *MI, unsigned OpNo, raw_ostream &OS); + +}; +} // end namespace llvm + +#endif diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/LLVMBuild.txt index fd8e5d9fd8fe..1ac02b3058c8 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = MCTargetDesc TargetInfo +subdirectories = InstPrinter MCTargetDesc TargetInfo [component_0] type = TargetGroup @@ -29,6 +29,6 @@ has_jit = 1 type = Library name = SparcCodeGen parent = Sparc -required_libraries = AsmPrinter CodeGen Core MC SelectionDAG SparcDesc - SparcInfo Support Target +required_libraries = AsmPrinter CodeGen Core MC SelectionDAG SparcAsmPrinter + SparcDesc SparcInfo Support Target add_to_library_groups = Sparc diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt index 9d4db4d25ef7..3a40fcaba47e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt @@ -1,6 +1,6 @@ add_llvm_library(LLVMSparcDesc SparcMCTargetDesc.cpp SparcMCAsmInfo.cpp + SparcMCExpr.cpp + SparcTargetStreamer.cpp ) - -add_dependencies(LLVMSparcDesc SparcCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt index 97f8f162c27f..22515e6d6dc1 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = SparcDesc parent = Sparc -required_libraries = MC SparcInfo Support +required_libraries = MC SparcAsmPrinter SparcInfo Support add_to_library_groups = Sparc diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp index baac36b1db64..e3f957fc4b2e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp @@ -40,8 +40,6 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) { SunStyleELFSectionSwitchSyntax = true; UsesELFSectionDirectiveForBSS = true; - - PrivateGlobalPrefix = ".L"; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp new file mode 100644 index 000000000000..c38a3a6eb24c --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp @@ -0,0 +1,84 @@ +//===-- SparcMCExpr.cpp - Sparc specific MC expression classes --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the assembly expression modifiers +// accepted by the Sparc architecture (e.g. "%hi", "%lo", ...). +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sparcmcexpr" +#include "SparcMCExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCELF.h" +#include "llvm/Object/ELF.h" + + +using namespace llvm; + +const SparcMCExpr* +SparcMCExpr::Create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx) { + return new (Ctx) SparcMCExpr(Kind, Expr); +} + + +void SparcMCExpr::PrintImpl(raw_ostream &OS) const +{ + bool closeParen = true; + switch (Kind) { + case VK_Sparc_None: closeParen = false; break; + case VK_Sparc_LO: OS << "%lo("; break; + case VK_Sparc_HI: OS << "%hi("; break; + case VK_Sparc_H44: OS << "%h44("; break; + case VK_Sparc_M44: OS << "%m44("; break; + case VK_Sparc_L44: OS << "%l44("; break; + case VK_Sparc_HH: OS << "%hh("; break; + case VK_Sparc_HM: OS << "%hm("; break; + case VK_Sparc_TLS_GD_HI22: OS << "%tgd_hi22("; break; + case VK_Sparc_TLS_GD_LO10: OS << "%tgd_lo10("; break; + case VK_Sparc_TLS_GD_ADD: OS << "%tgd_add("; break; + case VK_Sparc_TLS_GD_CALL: OS << "%tgd_call("; break; + case VK_Sparc_TLS_LDM_HI22: OS << "%tldm_hi22("; break; + case VK_Sparc_TLS_LDM_LO10: OS << "%tldm_lo10("; break; + case VK_Sparc_TLS_LDM_ADD: OS << "%tldm_add("; break; + case VK_Sparc_TLS_LDM_CALL: OS << "%tldm_call("; break; + case VK_Sparc_TLS_LDO_HIX22: OS << "%tldo_hix22("; break; + case VK_Sparc_TLS_LDO_LOX10: OS << "%tldo_lox10("; break; + case VK_Sparc_TLS_LDO_ADD: OS << "%tldo_add("; break; + case VK_Sparc_TLS_IE_HI22: OS << "%tie_hi22("; break; + case VK_Sparc_TLS_IE_LO10: OS << "%tie_lo10("; break; + case VK_Sparc_TLS_IE_LD: OS << "%tie_ld("; break; + case VK_Sparc_TLS_IE_LDX: OS << "%tie_ldx("; break; + case VK_Sparc_TLS_IE_ADD: OS << "%tie_add("; break; + case VK_Sparc_TLS_LE_HIX22: OS << "%tle_hix22("; break; + case VK_Sparc_TLS_LE_LOX10: OS << "%tle_lox10("; break; + } + + const MCExpr *Expr = getSubExpr(); + Expr->print(OS); + if (closeParen) + OS << ')'; +} + +bool +SparcMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout) const { + assert(0 && "FIXME: Implement SparcMCExpr::EvaluateAsRelocatableImpl"); + return getSubExpr()->EvaluateAsRelocatable(Res, *Layout); +} + + +void SparcMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { + assert(0 && "FIXME: Implement SparcMCExpr::fixELFSymbolsInTLSFixups"); +} + +void SparcMCExpr::AddValueSymbols(MCAssembler *Asm) const { + assert(0 && "FIXME: Implement SparcMCExpr::AddValueSymbols"); +} diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h new file mode 100644 index 000000000000..244c14395e75 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h @@ -0,0 +1,98 @@ +//====- SparcMCExpr.h - Sparc specific MC expression classes --*- C++ -*-=====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes Sparc-specific MCExprs, used for modifiers like +// "%hi" or "%lo" etc., +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SPARCMCEXPR_H +#define LLVM_SPARCMCEXPR_H + +#include "llvm/MC/MCExpr.h" + +namespace llvm { + +class SparcMCExpr : public MCTargetExpr { +public: + enum VariantKind { + VK_Sparc_None, + VK_Sparc_LO, + VK_Sparc_HI, + VK_Sparc_H44, + VK_Sparc_M44, + VK_Sparc_L44, + VK_Sparc_HH, + VK_Sparc_HM, + VK_Sparc_TLS_GD_HI22, + VK_Sparc_TLS_GD_LO10, + VK_Sparc_TLS_GD_ADD, + VK_Sparc_TLS_GD_CALL, + VK_Sparc_TLS_LDM_HI22, + VK_Sparc_TLS_LDM_LO10, + VK_Sparc_TLS_LDM_ADD, + VK_Sparc_TLS_LDM_CALL, + VK_Sparc_TLS_LDO_HIX22, + VK_Sparc_TLS_LDO_LOX10, + VK_Sparc_TLS_LDO_ADD, + VK_Sparc_TLS_IE_HI22, + VK_Sparc_TLS_IE_LO10, + VK_Sparc_TLS_IE_LD, + VK_Sparc_TLS_IE_LDX, + VK_Sparc_TLS_IE_ADD, + VK_Sparc_TLS_LE_HIX22, + VK_Sparc_TLS_LE_LOX10 + }; + +private: + const VariantKind Kind; + const MCExpr *Expr; + + explicit SparcMCExpr(VariantKind _Kind, const MCExpr *_Expr) + : Kind(_Kind), Expr(_Expr) {} + +public: + /// @name Construction + /// @{ + + static const SparcMCExpr *Create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx); + /// @} + /// @name Accessors + /// @{ + + /// getOpcode - Get the kind of this expression. + VariantKind getKind() const { return Kind; } + + /// getSubExpr - Get the child of this expression. + const MCExpr *getSubExpr() const { return Expr; } + + /// @} + void PrintImpl(raw_ostream &OS) const; + bool EvaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout) const; + void AddValueSymbols(MCAssembler *) const; + const MCSection *FindAssociatedSection() const { + return getSubExpr()->FindAssociatedSection(); + } + + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const; + + static bool classof(const MCExpr *E) { + return E->getKind() == MCExpr::Target; + } + + static bool classof(const SparcMCExpr *) { return true; } + + +}; + +} // end namespace llvm. + +#endif diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp index 1c64e1b0c4c0..45e1d353695b 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp @@ -13,6 +13,8 @@ #include "SparcMCTargetDesc.h" #include "SparcMCAsmInfo.h" +#include "SparcTargetStreamer.h" +#include "InstPrinter/SparcInstPrinter.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" @@ -86,6 +88,28 @@ static MCCodeGenInfo *createSparcV9MCCodeGenInfo(StringRef TT, Reloc::Model RM, X->InitMCCodeGenInfo(RM, CM, OL); return X; } + +static MCStreamer * +createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, + bool isVerboseAsm, bool useLoc, bool useCFI, + bool useDwarfDirectory, MCInstPrinter *InstPrint, + MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst) { + SparcTargetAsmStreamer *S = new SparcTargetAsmStreamer(OS); + + return llvm::createAsmStreamer(Ctx, S, OS, isVerboseAsm, useLoc, useCFI, + useDwarfDirectory, InstPrint, CE, TAB, + ShowInst); +} + +static MCInstPrinter *createSparcMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) { + return new SparcInstPrinter(MAI, MII, MRI); +} + extern "C" void LLVMInitializeSparcTargetMC() { // Register the MC asm info. RegisterMCAsmInfo X(TheSparcTarget); @@ -106,4 +130,15 @@ extern "C" void LLVMInitializeSparcTargetMC() { // Register the MC subtarget info. TargetRegistry::RegisterMCSubtargetInfo(TheSparcTarget, createSparcMCSubtargetInfo); + + TargetRegistry::RegisterAsmStreamer(TheSparcTarget, + createMCAsmStreamer); + TargetRegistry::RegisterAsmStreamer(TheSparcV9Target, + createMCAsmStreamer); + + // Register the MCInstPrinter + TargetRegistry::RegisterMCInstPrinter(TheSparcTarget, + createSparcMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheSparcV9Target, + createSparcMCInstPrinter); } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.cpp new file mode 100644 index 000000000000..01043aed5eee --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.cpp @@ -0,0 +1,40 @@ +//===-- SparcTargetStreamer.cpp - Sparc Target Streamer Methods -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Sparc specific target streamer methods. +// +//===----------------------------------------------------------------------===// + +#include "SparcTargetStreamer.h" +#include "InstPrinter/SparcInstPrinter.h" +#include "llvm/Support/FormattedStream.h" + +using namespace llvm; + +// pin vtable to this file +void SparcTargetStreamer::anchor() {} + +SparcTargetAsmStreamer::SparcTargetAsmStreamer(formatted_raw_ostream &OS) + : OS(OS) {} + +void SparcTargetAsmStreamer::emitSparcRegisterIgnore(unsigned reg) { + OS << "\t.register " + << "%" << StringRef(SparcInstPrinter::getRegisterName(reg)).lower() + << ", #ignore\n"; +} + +void SparcTargetAsmStreamer::emitSparcRegisterScratch(unsigned reg) { + OS << "\t.register " + << "%" << StringRef(SparcInstPrinter::getRegisterName(reg)).lower() + << ", #scratch\n"; +} + +MCELFStreamer &SparcTargetELFStreamer::getStreamer() { + return static_cast(*Streamer); +} diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/Makefile b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/Makefile index c171db77cf38..702b5cbfccae 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/Makefile +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/Makefile @@ -17,7 +17,7 @@ BUILT_SOURCES = SparcGenRegisterInfo.inc SparcGenInstrInfo.inc \ SparcGenSubtargetInfo.inc SparcGenCallingConv.inc \ SparcGenCodeEmitter.inc -DIRS = TargetInfo MCTargetDesc +DIRS = InstPrinter TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/Sparc.h b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/Sparc.h index f44b60420d06..8d46c60255e9 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/Sparc.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/Sparc.h @@ -23,12 +23,18 @@ namespace llvm { class FunctionPass; class SparcTargetMachine; class formatted_raw_ostream; + class AsmPrinter; + class MCInst; + class MachineInstr; FunctionPass *createSparcISelDag(SparcTargetMachine &TM); FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM); FunctionPass *createSparcJITCodeEmitterPass(SparcTargetMachine &TM, JITCodeEmitter &JCE); + void LowerSparcMachineInstrToMCInst(const MachineInstr *MI, + MCInst &OutMI, + AsmPrinter &AP); } // end namespace llvm; namespace llvm { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp index d06c894c7e05..e2115a7c4ac8 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -16,12 +16,17 @@ #include "Sparc.h" #include "SparcInstrInfo.h" #include "SparcTargetMachine.h" +#include "SparcTargetStreamer.h" +#include "InstPrinter/SparcInstPrinter.h" #include "MCTargetDesc/SparcBaseInfo.h" +#include "MCTargetDesc/SparcMCExpr.h" #include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/TargetRegistry.h" @@ -31,6 +36,9 @@ using namespace llvm; namespace { class SparcAsmPrinter : public AsmPrinter { + SparcTargetStreamer &getTargetStreamer() { + return static_cast(OutStreamer.getTargetStreamer()); + } public: explicit SparcAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : AsmPrinter(TM, Streamer) {} @@ -45,14 +53,11 @@ namespace { void printCCOperand(const MachineInstr *MI, int opNum, raw_ostream &OS); virtual void EmitFunctionBodyStart(); - virtual void EmitInstruction(const MachineInstr *MI) { - SmallString<128> Str; - raw_svector_ostream OS(Str); - printInstruction(MI, OS); - OutStreamer.EmitRawText(OS.str()); + virtual void EmitInstruction(const MachineInstr *MI); + + static const char *getRegisterName(unsigned RegNo) { + return SparcInstPrinter::getRegisterName(RegNo); } - void printInstruction(const MachineInstr *MI, raw_ostream &OS);// autogen'd. - static const char *getRegisterName(unsigned RegNo); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, @@ -61,24 +66,138 @@ namespace { unsigned AsmVariant, const char *ExtraCode, raw_ostream &O); - bool printGetPCX(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS); - virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; - void EmitGlobalRegisterDecl(unsigned reg) { - SmallString<128> Str; - raw_svector_ostream OS(Str); - OS << "\t.register " - << "%" << StringRef(getRegisterName(reg)).lower() - << ", " - << ((reg == SP::G6 || reg == SP::G7)? "#ignore" : "#scratch"); - OutStreamer.EmitRawText(OS.str()); - } }; } // end of anonymous namespace -#include "SparcGenAsmWriter.inc" +static MCOperand createPCXCallOP(MCSymbol *Label, + MCContext &OutContext) +{ + const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Label, + OutContext); + const SparcMCExpr *expr = SparcMCExpr::Create(SparcMCExpr::VK_Sparc_None, + MCSym, OutContext); + return MCOperand::CreateExpr(expr); +} + +static MCOperand createPCXRelExprOp(SparcMCExpr::VariantKind Kind, + MCSymbol *GOTLabel, MCSymbol *StartLabel, + MCSymbol *CurLabel, + MCContext &OutContext) +{ + const MCSymbolRefExpr *GOT = MCSymbolRefExpr::Create(GOTLabel, OutContext); + const MCSymbolRefExpr *Start = MCSymbolRefExpr::Create(StartLabel, + OutContext); + const MCSymbolRefExpr *Cur = MCSymbolRefExpr::Create(CurLabel, + OutContext); + + const MCBinaryExpr *Sub = MCBinaryExpr::CreateSub(Cur, Start, OutContext); + const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(GOT, Sub, OutContext); + const SparcMCExpr *expr = SparcMCExpr::Create(Kind, + Add, OutContext); + return MCOperand::CreateExpr(expr); +} + +static void EmitCall(MCStreamer &OutStreamer, + MCOperand &Callee) +{ + MCInst CallInst; + CallInst.setOpcode(SP::CALL); + CallInst.addOperand(Callee); + OutStreamer.EmitInstruction(CallInst); +} + +static void EmitSETHI(MCStreamer &OutStreamer, + MCOperand &Imm, MCOperand &RD) +{ + MCInst SETHIInst; + SETHIInst.setOpcode(SP::SETHIi); + SETHIInst.addOperand(RD); + SETHIInst.addOperand(Imm); + OutStreamer.EmitInstruction(SETHIInst); +} + +static void EmitOR(MCStreamer &OutStreamer, MCOperand &RS1, + MCOperand &Imm, MCOperand &RD) +{ + MCInst ORInst; + ORInst.setOpcode(SP::ORri); + ORInst.addOperand(RD); + ORInst.addOperand(RS1); + ORInst.addOperand(Imm); + OutStreamer.EmitInstruction(ORInst); +} + +void EmitADD(MCStreamer &OutStreamer, + MCOperand &RS1, MCOperand &RS2, MCOperand &RD) +{ + MCInst ADDInst; + ADDInst.setOpcode(SP::ADDrr); + ADDInst.addOperand(RD); + ADDInst.addOperand(RS1); + ADDInst.addOperand(RS2); + OutStreamer.EmitInstruction(ADDInst); +} + +static void LowerGETPCXAndEmitMCInsts(const MachineInstr *MI, + MCStreamer &OutStreamer, + MCContext &OutContext) +{ + const MachineOperand &MO = MI->getOperand(0); + MCSymbol *StartLabel = OutContext.CreateTempSymbol(); + MCSymbol *EndLabel = OutContext.CreateTempSymbol(); + MCSymbol *SethiLabel = OutContext.CreateTempSymbol(); + MCSymbol *GOTLabel = + OutContext.GetOrCreateSymbol(Twine("_GLOBAL_OFFSET_TABLE_")); + + assert(MO.getReg() != SP::O7 && + "%o7 is assigned as destination for getpcx!"); + + MCOperand MCRegOP = MCOperand::CreateReg(MO.getReg()); + MCOperand RegO7 = MCOperand::CreateReg(SP::O7); + + // : + // call + // : + // sethi %hi(_GLOBAL_OFFSET_TABLE_+(-)), + // : + // or , %lo(_GLOBAL_OFFSET_TABLE_+(-))), + // add , %o7, + + OutStreamer.EmitLabel(StartLabel); + MCOperand Callee = createPCXCallOP(EndLabel, OutContext); + EmitCall(OutStreamer, Callee); + OutStreamer.EmitLabel(SethiLabel); + MCOperand hiImm = createPCXRelExprOp(SparcMCExpr::VK_Sparc_HI, + GOTLabel, StartLabel, SethiLabel, + OutContext); + EmitSETHI(OutStreamer, hiImm, MCRegOP); + OutStreamer.EmitLabel(EndLabel); + MCOperand loImm = createPCXRelExprOp(SparcMCExpr::VK_Sparc_LO, + GOTLabel, StartLabel, EndLabel, + OutContext); + EmitOR(OutStreamer, MCRegOP, loImm, MCRegOP); + EmitADD(OutStreamer, MCRegOP, RegO7, MCRegOP); +} + +void SparcAsmPrinter::EmitInstruction(const MachineInstr *MI) +{ + MCInst TmpInst; + + switch (MI->getOpcode()) { + default: break; + case TargetOpcode::DBG_VALUE: + // FIXME: Debug Value. + return; + case SP::GETPCX: + LowerGETPCXAndEmitMCInsts(MI, OutStreamer, OutContext); + return; + } + LowerSparcMachineInstrToMCInst(MI, TmpInst, *this); + OutStreamer.EmitInstruction(TmpInst); +} void SparcAsmPrinter::EmitFunctionBodyStart() { if (!TM.getSubtarget().is64Bit()) @@ -90,12 +209,17 @@ void SparcAsmPrinter::EmitFunctionBodyStart() { unsigned reg = globalRegs[i]; if (MRI.use_empty(reg)) continue; - EmitGlobalRegisterDecl(reg); + + if (reg == SP::G6 || reg == SP::G7) + getTargetStreamer().emitSparcRegisterIgnore(reg); + else + getTargetStreamer().emitSparcRegisterScratch(reg); } } void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, raw_ostream &O) { + const DataLayout *DL = TM.getDataLayout(); const MachineOperand &MO = MI->getOperand (opNum); unsigned TF = MO.getTargetFlags(); #ifndef NDEBUG @@ -104,7 +228,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, if (MI->getOpcode() == SP::CALL) assert(TF == SPII::MO_NO_FLAG && "Cannot handle target flags on call address"); - else if (MI->getOpcode() == SP::SETHIi) + else if (MI->getOpcode() == SP::SETHIi || MI->getOpcode() == SP::SETHIXi) assert((TF == SPII::MO_HI || TF == SPII::MO_H44 || TF == SPII::MO_HH || TF == SPII::MO_TLS_GD_HI22 || TF == SPII::MO_TLS_LDM_HI22 @@ -127,7 +251,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, else if (MI->getOpcode() == SP::TLS_LDXrr) assert(TF == SPII::MO_TLS_IE_LDX && "Cannot handle target flags on ldx for TLS"); - else if (MI->getOpcode() == SP::XORri) + else if (MI->getOpcode() == SP::XORri || MI->getOpcode() == SP::XORXri) assert((TF == SPII::MO_TLS_LDO_LOX10 || TF == SPII::MO_TLS_LE_LOX10) && "Cannot handle target flags on xor for TLS"); else @@ -195,7 +319,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, O << MO.getSymbolName(); break; case MachineOperand::MO_ConstantPoolIndex: - O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" + O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" << MO.getIndex(); break; default: @@ -226,46 +350,6 @@ void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, printOperand(MI, opNum+1, O); } -bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum, - raw_ostream &O) { - std::string operand = ""; - const MachineOperand &MO = MI->getOperand(opNum); - switch (MO.getType()) { - default: llvm_unreachable("Operand is not a register"); - case MachineOperand::MO_Register: - assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && - "Operand is not a physical register "); - assert(MO.getReg() != SP::O7 && - "%o7 is assigned as destination for getpcx!"); - operand = "%" + StringRef(getRegisterName(MO.getReg())).lower(); - break; - } - - unsigned mfNum = MI->getParent()->getParent()->getFunctionNumber(); - unsigned bbNum = MI->getParent()->getNumber(); - - O << '\n' << ".LLGETPCH" << mfNum << '_' << bbNum << ":\n"; - O << "\tcall\t.LLGETPC" << mfNum << '_' << bbNum << '\n' ; - - O << "\t sethi\t" - << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum - << ")), " << operand << '\n' ; - - O << ".LLGETPC" << mfNum << '_' << bbNum << ":\n" ; - O << "\tor\t" << operand - << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum - << ")), " << operand << '\n'; - O << "\tadd\t" << operand << ", %o7, " << operand << '\n'; - - return true; -} - -void SparcAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum, - raw_ostream &O) { - int CC = (int)MI->getOperand(opNum).getImm(); - O << SPARCCondCodeToString((SPCC::CondCodes)CC); -} - /// PrintAsmOperand - Print out an operand for an inline asm expression. /// bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 707fe7b4a605..5a34f8651f13 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -80,11 +80,14 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT, static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { - assert((LocVT == MVT::f32 || LocVT.getSizeInBits() == 64) && + assert((LocVT == MVT::f32 || LocVT == MVT::f128 + || LocVT.getSizeInBits() == 64) && "Can't handle non-64 bits locations"); // Stack space is allocated for all arguments starting from [%fp+BIAS+128]. - unsigned Offset = State.AllocateStack(8, 8); + unsigned size = (LocVT == MVT::f128) ? 16 : 8; + unsigned alignment = (LocVT == MVT::f128) ? 16 : 8; + unsigned Offset = State.AllocateStack(size, alignment); unsigned Reg = 0; if (LocVT == MVT::i64 && Offset < 6*8) @@ -96,6 +99,9 @@ static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, else if (LocVT == MVT::f32 && Offset < 16*8) // Promote floats to %f1, %f3, ... Reg = SP::F1 + Offset/4; + else if (LocVT == MVT::f128 && Offset < 16*8) + // Promote long doubles to %q0-%q28. (Which LLVM calls Q0-Q7). + Reg = SP::Q0 + Offset/16; // Promote to register when possible, otherwise use the stack slot. if (Reg) { @@ -998,9 +1004,10 @@ static void fixupVariableFloatArgs(SmallVectorImpl &ArgLocs, ArrayRef Outs) { for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { const CCValAssign &VA = ArgLocs[i]; + MVT ValTy = VA.getLocVT(); // FIXME: What about f32 arguments? C promotes them to f64 when calling // varargs functions. - if (!VA.isRegLoc() || VA.getLocVT() != MVT::f64) + if (!VA.isRegLoc() || (ValTy != MVT::f64 && ValTy != MVT::f128)) continue; // The fixed arguments to a varargs function still go in FP registers. if (Outs[VA.getValNo()].IsFixed) @@ -1010,15 +1017,25 @@ static void fixupVariableFloatArgs(SmallVectorImpl &ArgLocs, CCValAssign NewVA; // Determine the offset into the argument array. - unsigned Offset = 8 * (VA.getLocReg() - SP::D0); + unsigned firstReg = (ValTy == MVT::f64) ? SP::D0 : SP::Q0; + unsigned argSize = (ValTy == MVT::f64) ? 8 : 16; + unsigned Offset = argSize * (VA.getLocReg() - firstReg); assert(Offset < 16*8 && "Offset out of range, bad register enum?"); if (Offset < 6*8) { // This argument should go in %i0-%i5. unsigned IReg = SP::I0 + Offset/8; - // Full register, just bitconvert into i64. - NewVA = CCValAssign::getReg(VA.getValNo(), VA.getValVT(), - IReg, MVT::i64, CCValAssign::BCvt); + if (ValTy == MVT::f64) + // Full register, just bitconvert into i64. + NewVA = CCValAssign::getReg(VA.getValNo(), VA.getValVT(), + IReg, MVT::i64, CCValAssign::BCvt); + else { + assert(ValTy == MVT::f128 && "Unexpected type!"); + // Full register, just bitconvert into i128 -- We will lower this into + // two i64s in LowerCall_64. + NewVA = CCValAssign::getCustomReg(VA.getValNo(), VA.getValVT(), + IReg, MVT::i128, CCValAssign::BCvt); + } } else { // This needs to go to memory, we're out of integer registers. NewVA = CCValAssign::getMem(VA.getValNo(), VA.getValVT(), @@ -1094,11 +1111,46 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); break; case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); + // fixupVariableFloatArgs() may create bitcasts from f128 to i128. But + // SPARC does not support i128 natively. Lower it into two i64, see below. + if (!VA.needsCustom() || VA.getValVT() != MVT::f128 + || VA.getLocVT() != MVT::i128) + Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); break; } if (VA.isRegLoc()) { + if (VA.needsCustom() && VA.getValVT() == MVT::f128 + && VA.getLocVT() == MVT::i128) { + // Store and reload into the interger register reg and reg+1. + unsigned Offset = 8 * (VA.getLocReg() - SP::I0); + unsigned StackOffset = Offset + Subtarget->getStackPointerBias() + 128; + SDValue StackPtr = DAG.getRegister(SP::O6, getPointerTy()); + SDValue HiPtrOff = DAG.getIntPtrConstant(StackOffset); + HiPtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, + HiPtrOff); + SDValue LoPtrOff = DAG.getIntPtrConstant(StackOffset + 8); + LoPtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, + LoPtrOff); + + // Store to %sp+BIAS+128+Offset + SDValue Store = DAG.getStore(Chain, DL, Arg, HiPtrOff, + MachinePointerInfo(), + false, false, 0); + // Load into Reg and Reg+1 + SDValue Hi64 = DAG.getLoad(MVT::i64, DL, Store, HiPtrOff, + MachinePointerInfo(), + false, false, false, 0); + SDValue Lo64 = DAG.getLoad(MVT::i64, DL, Store, LoPtrOff, + MachinePointerInfo(), + false, false, false, 0); + RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()), + Hi64)); + RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()+1), + Lo64)); + continue; + } + // The custom bit on an i32 return value indicates that it should be // passed in the high bits of the register. if (VA.getValVT() == MVT::i32 && VA.needsCustom()) { @@ -1200,6 +1252,12 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, SmallVector RVLocs; CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), DAG.getTarget(), RVLocs, *DAG.getContext()); + + // Set inreg flag manually for codegen generated library calls that + // return float. + if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && CLI.CS == 0) + CLI.Ins[0].Flags.setInReg(); + RVInfo.AnalyzeCallResult(CLI.Ins, CC_Sparc64); // Copy all of the result registers out of their specified physreg. @@ -1414,9 +1472,29 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); } - // FIXME: There are instructions available for ATOMIC_FENCE - // on SparcV8 and later. - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); + // ATOMICs. + // FIXME: We insert fences for each atomics and generate sub-optimal code + // for PSO/TSO. Also, implement other atomicrmw operations. + + setInsertFencesForAtomic(true); + + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Legal); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, + (Subtarget->isV9() ? Legal: Expand)); + + + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Legal); + + // Custom Lower Atomic LOAD/STORE + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); + + if (Subtarget->is64Bit()) { + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Legal); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Custom); + } if (!Subtarget->isV9()) { // SparcV8 does not have FNEGD and FABSD. @@ -1462,6 +1540,16 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + if (Subtarget->is64Bit()) { + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i64, Expand); + setOperationAction(ISD::MULHS, MVT::i64, Expand); + + setOperationAction(ISD::UMULO, MVT::i64, Custom); + setOperationAction(ISD::SMULO, MVT::i64, Custom); + } + // VASTART needs to be custom lowered to use the VarArgsFrameIndex. setOperationAction(ISD::VASTART , MVT::Other, Custom); // VAARG needs to be lowered to not do unaligned accesses for doubles. @@ -1600,6 +1688,12 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const { } } +EVT SparcTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { + if (!VT.isVector()) + return MVT::i32; + return VT.changeVectorElementTypeToInteger(); +} + /// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to /// be zero. Op is expected to be a target specific node. Used by DAG /// combiner. @@ -2291,7 +2385,7 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, - bool is64Bit) { + const SparcSubtarget *Subtarget) { SDValue Chain = Op.getOperand(0); // Legalize the chain. SDValue Size = Op.getOperand(1); // Legalize the size. EVT VT = Size->getValueType(0); @@ -2304,7 +2398,9 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, // The resultant pointer is actually 16 words from the bottom of the stack, // to provide a register spill area. - unsigned regSpillArea = (is64Bit) ? 128 : 96; + unsigned regSpillArea = Subtarget->is64Bit() ? 128 : 96; + regSpillArea += Subtarget->getStackPointerBias(); + SDValue NewVal = DAG.getNode(ISD::ADD, dl, VT, NewSP, DAG.getConstant(regSpillArea, VT)); SDValue Ops[2] = { NewVal, Chain }; @@ -2600,6 +2696,63 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { return DAG.getMergeValues(Ops, 2, dl); } +// Custom lower UMULO/SMULO for SPARC. This code is similar to ExpandNode() +// in LegalizeDAG.cpp except the order of arguments to the library function. +static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG, + const SparcTargetLowering &TLI) +{ + unsigned opcode = Op.getOpcode(); + assert((opcode == ISD::UMULO || opcode == ISD::SMULO) && "Invalid Opcode."); + + bool isSigned = (opcode == ISD::SMULO); + EVT VT = MVT::i64; + EVT WideVT = MVT::i128; + SDLoc dl(Op); + SDValue LHS = Op.getOperand(0); + + if (LHS.getValueType() != VT) + return Op; + + SDValue ShiftAmt = DAG.getConstant(63, VT); + + SDValue RHS = Op.getOperand(1); + SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, ShiftAmt); + SDValue HiRHS = DAG.getNode(ISD::SRA, dl, MVT::i64, RHS, ShiftAmt); + SDValue Args[] = { HiLHS, LHS, HiRHS, RHS }; + + SDValue MulResult = TLI.makeLibCall(DAG, + RTLIB::MUL_I128, WideVT, + Args, 4, isSigned, dl).first; + SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, + MulResult, DAG.getIntPtrConstant(0)); + SDValue TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, + MulResult, DAG.getIntPtrConstant(1)); + if (isSigned) { + SDValue Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt); + TopHalf = DAG.getSetCC(dl, MVT::i32, TopHalf, Tmp1, ISD::SETNE); + } else { + TopHalf = DAG.getSetCC(dl, MVT::i32, TopHalf, DAG.getConstant(0, VT), + ISD::SETNE); + } + // MulResult is a node with an illegal type. Because such things are not + // generally permitted during this phase of legalization, delete the + // node. The above EXTRACT_ELEMENT nodes should have been folded. + DAG.DeleteNode(MulResult.getNode()); + + SDValue Ops[2] = { BottomHalf, TopHalf } ; + return DAG.getMergeValues(Ops, 2, dl); +} + +static SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) { + // Monotonic load/stores are legal. + if (cast(Op)->getOrdering() <= Monotonic) + return Op; + + // Otherwise, expand with a fence. + return SDValue(); +} + + SDValue SparcTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -2631,7 +2784,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VASTART: return LowerVASTART(Op, DAG, *this); case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG, - is64Bit); + Subtarget); case ISD::LOAD: return LowerF128Load(Op, DAG); case ISD::STORE: return LowerF128Store(Op, DAG); @@ -2653,6 +2806,10 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::UMULO: + case ISD::SMULO: return LowerUMULO_SMULO(Op, DAG, *this); + case ISD::ATOMIC_LOAD: + case ISD::ATOMIC_STORE: return LowerATOMIC_LOAD_STORE(Op, DAG); } } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcISelLowering.h b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcISelLowering.h index 8d27caaf159b..2659fc89501d 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcISelLowering.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcISelLowering.h @@ -79,6 +79,9 @@ namespace llvm { virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + /// getSetCCResultType - Return the ISD::SETCC ValueType + virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const; + virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcInstr64Bit.td b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcInstr64Bit.td index 8656de5c8ba9..7c443978e4b4 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcInstr64Bit.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcInstr64Bit.td @@ -141,32 +141,36 @@ def : Pat<(i64 imm:$val), let Predicates = [Is64Bit] in { // Register-register instructions. +defm ANDX : F3_12<"and", 0b000001, and, I64Regs, i64, i64imm>; +defm ORX : F3_12<"or", 0b000010, or, I64Regs, i64, i64imm>; +defm XORX : F3_12<"xor", 0b000011, xor, I64Regs, i64, i64imm>; -def : Pat<(and i64:$a, i64:$b), (ANDrr $a, $b)>; -def : Pat<(or i64:$a, i64:$b), (ORrr $a, $b)>; -def : Pat<(xor i64:$a, i64:$b), (XORrr $a, $b)>; +def ANDXNrr : F3_1<2, 0b000101, + (outs I64Regs:$dst), (ins I64Regs:$b, I64Regs:$c), + "andn $b, $c, $dst", + [(set i64:$dst, (and i64:$b, (not i64:$c)))]>; +def ORXNrr : F3_1<2, 0b000110, + (outs I64Regs:$dst), (ins I64Regs:$b, I64Regs:$c), + "orn $b, $c, $dst", + [(set i64:$dst, (or i64:$b, (not i64:$c)))]>; +def XNORXrr : F3_1<2, 0b000111, + (outs I64Regs:$dst), (ins I64Regs:$b, I64Regs:$c), + "xnor $b, $c, $dst", + [(set i64:$dst, (not (xor i64:$b, i64:$c)))]>; -def : Pat<(and i64:$a, (not i64:$b)), (ANDNrr $a, $b)>; -def : Pat<(or i64:$a, (not i64:$b)), (ORNrr $a, $b)>; -def : Pat<(xor i64:$a, (not i64:$b)), (XNORrr $a, $b)>; - -def : Pat<(add i64:$a, i64:$b), (ADDrr $a, $b)>; -def : Pat<(sub i64:$a, i64:$b), (SUBrr $a, $b)>; +defm ADDX : F3_12<"add", 0b000000, add, I64Regs, i64, i64imm>; +defm SUBX : F3_12<"sub", 0b000100, sub, I64Regs, i64, i64imm>; def : Pat<(SPcmpicc i64:$a, i64:$b), (CMPrr $a, $b)>; -def : Pat<(tlsadd i64:$a, i64:$b, tglobaltlsaddr:$sym), - (TLS_ADDrr $a, $b, $sym)>; +def TLS_ADDXrr : F3_1<2, 0b000000, (outs I64Regs:$rd), + (ins I64Regs:$rs1, I64Regs:$rs2, TLSSym:$sym), + "add $rs1, $rs2, $rd, $sym", + [(set i64:$rd, + (tlsadd i64:$rs1, i64:$rs2, tglobaltlsaddr:$sym))]>; // Register-immediate instructions. -def : Pat<(and i64:$a, (i64 simm13:$b)), (ANDri $a, (as_i32imm $b))>; -def : Pat<(or i64:$a, (i64 simm13:$b)), (ORri $a, (as_i32imm $b))>; -def : Pat<(xor i64:$a, (i64 simm13:$b)), (XORri $a, (as_i32imm $b))>; - -def : Pat<(add i64:$a, (i64 simm13:$b)), (ADDri $a, (as_i32imm $b))>; -def : Pat<(sub i64:$a, (i64 simm13:$b)), (SUBri $a, (as_i32imm $b))>; - def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (CMPri $a, (as_i32imm $b))>; def : Pat<(ctpop i64:$src), (POPCrr $src)>; @@ -402,3 +406,64 @@ def : Pat<(SPselectfcc (i64 simm11:$t), i64:$f, imm:$cond), (MOVFCCri (as_i32imm $t), $f, imm:$cond)>; } // Predicates = [Is64Bit] + + +// 64 bit SETHI +let Predicates = [Is64Bit] in { +def SETHIXi : F2_1<0b100, + (outs IntRegs:$rd), (ins i64imm:$imm22), + "sethi $imm22, $rd", + [(set i64:$rd, SETHIimm:$imm22)]>; +} + +// ATOMICS. +let Predicates = [Is64Bit], Constraints = "$swap = $rd" in { + def CASXrr: F3_1<3, 0b111110, + (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2, + I64Regs:$swap), + "casx [$rs1], $rs2, $rd", + [(set i64:$rd, + (atomic_cmp_swap i64:$rs1, i64:$rs2, i64:$swap))]>; + +} // Predicates = [Is64Bit], Constraints = ... + +let Predicates = [Is64Bit] in { + +def : Pat<(atomic_fence imm, imm), (MEMBARi 0xf)>; + +// atomic_load_64 addr -> load addr +def : Pat<(i64 (atomic_load ADDRrr:$src)), (LDXrr ADDRrr:$src)>; +def : Pat<(i64 (atomic_load ADDRri:$src)), (LDXri ADDRri:$src)>; + +// atomic_store_64 val, addr -> store val, addr +def : Pat<(atomic_store ADDRrr:$dst, i64:$val), (STXrr ADDRrr:$dst, $val)>; +def : Pat<(atomic_store ADDRri:$dst, i64:$val), (STXri ADDRri:$dst, $val)>; + +} // Predicates = [Is64Bit] + +// Global addresses, constant pool entries +let Predicates = [Is64Bit] in { + +def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>; +def : Pat<(SPlo tglobaladdr:$in), (ORXri (i64 G0), tglobaladdr:$in)>; +def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>; +def : Pat<(SPlo tconstpool:$in), (ORXri (i64 G0), tconstpool:$in)>; + +// GlobalTLS addresses +def : Pat<(SPhi tglobaltlsaddr:$in), (SETHIi tglobaltlsaddr:$in)>; +def : Pat<(SPlo tglobaltlsaddr:$in), (ORXri (i64 G0), tglobaltlsaddr:$in)>; +def : Pat<(add (SPhi tglobaltlsaddr:$in1), (SPlo tglobaltlsaddr:$in2)), + (ADDXri (SETHIXi tglobaltlsaddr:$in1), (tglobaltlsaddr:$in2))>; +def : Pat<(xor (SPhi tglobaltlsaddr:$in1), (SPlo tglobaltlsaddr:$in2)), + (XORXri (SETHIXi tglobaltlsaddr:$in1), (tglobaltlsaddr:$in2))>; + +// Blockaddress +def : Pat<(SPhi tblockaddress:$in), (SETHIi tblockaddress:$in)>; +def : Pat<(SPlo tblockaddress:$in), (ORXri (i64 G0), tblockaddress:$in)>; + +// Add reg, lo. This is used when taking the addr of a global/constpool entry. +def : Pat<(add iPTR:$r, (SPlo tglobaladdr:$in)), (ADDXri $r, tglobaladdr:$in)>; +def : Pat<(add iPTR:$r, (SPlo tconstpool:$in)), (ADDXri $r, tconstpool:$in)>; +def : Pat<(add iPTR:$r, (SPlo tblockaddress:$in)), + (ADDXri $r, tblockaddress:$in)>; +} diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcInstrInfo.td b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcInstrInfo.td index ef7a11457071..ba945976ff1f 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcInstrInfo.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcInstrInfo.td @@ -210,15 +210,16 @@ def FCC_O : FCC_VAL<29>; // Ordered //===----------------------------------------------------------------------===// /// F3_12 multiclass - Define a normal F3_1/F3_2 pattern in one shot. -multiclass F3_12 Op3Val, SDNode OpNode> { +multiclass F3_12 Op3Val, SDNode OpNode, + RegisterClass RC, ValueType Ty, Operand immOp> { def rr : F3_1<2, Op3Val, - (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + (outs RC:$dst), (ins RC:$b, RC:$c), !strconcat(OpcStr, " $b, $c, $dst"), - [(set i32:$dst, (OpNode i32:$b, i32:$c))]>; + [(set Ty:$dst, (OpNode Ty:$b, Ty:$c))]>; def ri : F3_2<2, Op3Val, - (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), + (outs RC:$dst), (ins RC:$b, immOp:$c), !strconcat(OpcStr, " $b, $c, $dst"), - [(set i32:$dst, (OpNode i32:$b, (i32 simm13:$c)))]>; + [(set Ty:$dst, (OpNode Ty:$b, (Ty simm13:$c)))]>; } /// F3_12np multiclass - Define a normal F3_1/F3_2 pattern in one shot, with no @@ -464,7 +465,7 @@ let rd = 0, imm22 = 0 in def NOP : F2_1<0b100, (outs), (ins), "nop", []>; // Section B.11 - Logical Instructions, p. 106 -defm AND : F3_12<"and", 0b000001, and>; +defm AND : F3_12<"and", 0b000001, and, IntRegs, i32, i32imm>; def ANDNrr : F3_1<2, 0b000101, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), @@ -474,7 +475,7 @@ def ANDNri : F3_2<2, 0b000101, (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), "andn $b, $c, $dst", []>; -defm OR : F3_12<"or", 0b000010, or>; +defm OR : F3_12<"or", 0b000010, or, IntRegs, i32, i32imm>; def ORNrr : F3_1<2, 0b000110, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), @@ -483,7 +484,7 @@ def ORNrr : F3_1<2, 0b000110, def ORNri : F3_2<2, 0b000110, (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), "orn $b, $c, $dst", []>; -defm XOR : F3_12<"xor", 0b000011, xor>; +defm XOR : F3_12<"xor", 0b000011, xor, IntRegs, i32, i32imm>; def XNORrr : F3_1<2, 0b000111, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), @@ -494,12 +495,12 @@ def XNORri : F3_2<2, 0b000111, "xnor $b, $c, $dst", []>; // Section B.12 - Shift Instructions, p. 107 -defm SLL : F3_12<"sll", 0b100101, shl>; -defm SRL : F3_12<"srl", 0b100110, srl>; -defm SRA : F3_12<"sra", 0b100111, sra>; +defm SLL : F3_12<"sll", 0b100101, shl, IntRegs, i32, i32imm>; +defm SRL : F3_12<"srl", 0b100110, srl, IntRegs, i32, i32imm>; +defm SRA : F3_12<"sra", 0b100111, sra, IntRegs, i32, i32imm>; // Section B.13 - Add Instructions, p. 108 -defm ADD : F3_12<"add", 0b000000, add>; +defm ADD : F3_12<"add", 0b000000, add, IntRegs, i32, i32imm>; // "LEA" forms of add (patterns to make tblgen happy) let Predicates = [Is32Bit] in @@ -509,18 +510,18 @@ let Predicates = [Is32Bit] in [(set iPTR:$dst, ADDRri:$addr)]>; let Defs = [ICC] in - defm ADDCC : F3_12<"addcc", 0b010000, addc>; + defm ADDCC : F3_12<"addcc", 0b010000, addc, IntRegs, i32, i32imm>; let Uses = [ICC], Defs = [ICC] in - defm ADDX : F3_12<"addxcc", 0b011000, adde>; + defm ADDE : F3_12<"addxcc", 0b011000, adde, IntRegs, i32, i32imm>; // Section B.15 - Subtract Instructions, p. 110 -defm SUB : F3_12 <"sub" , 0b000100, sub>; +defm SUB : F3_12 <"sub" , 0b000100, sub, IntRegs, i32, i32imm>; let Uses = [ICC], Defs = [ICC] in - defm SUBX : F3_12 <"subxcc" , 0b011100, sube>; + defm SUBE : F3_12 <"subxcc" , 0b011100, sube, IntRegs, i32, i32imm>; let Defs = [ICC] in - defm SUBCC : F3_12 <"subcc", 0b010100, subc>; + defm SUBCC : F3_12 <"subcc", 0b010100, subc, IntRegs, i32, i32imm>; let Defs = [ICC], rd = 0 in { def CMPrr : F3_1<2, 0b010100, @@ -542,7 +543,7 @@ let Uses = [ICC], Defs = [ICC] in // Section B.18 - Multiply Instructions, p. 113 let Defs = [Y] in { defm UMUL : F3_12np<"umul", 0b001010>; - defm SMUL : F3_12 <"smul", 0b001011, mul>; + defm SMUL : F3_12 <"smul", 0b001011, mul, IntRegs, i32, i32imm>; } // Section B.19 - Divide Instructions, p. 115 @@ -966,13 +967,40 @@ let Predicates = [HasV9] in { } // POPCrr - This does a ctpop of a 64-bit register. As such, we have to clear -// the top 32-bits before using it. To do this clearing, we use a SLLri X,0. +// the top 32-bits before using it. To do this clearing, we use a SRLri X,0. let rs1 = 0 in def POPCrr : F3_1<2, 0b101110, (outs IntRegs:$dst), (ins IntRegs:$src), "popc $src, $dst", []>, Requires<[HasV9]>; def : Pat<(ctpop i32:$src), - (POPCrr (SLLri $src, 0))>; + (POPCrr (SRLri $src, 0))>; + +// Atomic swap. +let hasSideEffects =1, rd = 0, rs1 = 0b01111, rs2 = 0 in + def STBAR : F3_1<2, 0b101000, (outs), (ins), "stbar", []>; + +let Predicates = [HasV9], hasSideEffects = 1, rd = 0, rs1 = 0b01111 in + def MEMBARi : F3_2<2, 0b101000, (outs), (ins i32imm:$simm13), + "membar $simm13", []>; + +let Constraints = "$val = $rd" in { + def SWAPrr : F3_1<3, 0b001111, + (outs IntRegs:$rd), (ins IntRegs:$val, MEMrr:$addr), + "swap [$addr], $rd", + [(set i32:$rd, (atomic_swap_32 ADDRrr:$addr, i32:$val))]>; + def SWAPri : F3_2<3, 0b001111, + (outs IntRegs:$rd), (ins IntRegs:$val, MEMri:$addr), + "swap [$addr], $rd", + [(set i32:$rd, (atomic_swap_32 ADDRri:$addr, i32:$val))]>; +} + +let Predicates = [HasV9], Constraints = "$swap = $rd" in + def CASrr: F3_1<3, 0b111100, + (outs IntRegs:$rd), (ins IntRegs:$rs1, IntRegs:$rs2, + IntRegs:$swap), + "cas [$rs1], $rs2, $rd", + [(set i32:$rd, + (atomic_cmp_swap iPTR:$rs1, i32:$rs2, i32:$swap))]>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -987,6 +1015,8 @@ def : Pat<(i32 imm:$val), // Global addresses, constant pool entries +let Predicates = [Is32Bit] in { + def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>; def : Pat<(SPlo tglobaladdr:$in), (ORri (i32 G0), tglobaladdr:$in)>; def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>; @@ -1009,6 +1039,7 @@ def : Pat<(add iPTR:$r, (SPlo tglobaladdr:$in)), (ADDri $r, tglobaladdr:$in)>; def : Pat<(add iPTR:$r, (SPlo tconstpool:$in)), (ADDri $r, tconstpool:$in)>; def : Pat<(add iPTR:$r, (SPlo tblockaddress:$in)), (ADDri $r, tblockaddress:$in)>; +} // Calls: def : Pat<(call tglobaladdr:$dst), @@ -1032,4 +1063,17 @@ def : Pat<(i32 (zextloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>; def : Pat<(store (i32 0), ADDRrr:$dst), (STrr ADDRrr:$dst, (i32 G0))>; def : Pat<(store (i32 0), ADDRri:$dst), (STri ADDRri:$dst, (i32 G0))>; +// store bar for all atomic_fence in V8. +let Predicates = [HasNoV9] in + def : Pat<(atomic_fence imm, imm), (STBAR)>; + +// atomic_load_32 addr -> load addr +def : Pat<(i32 (atomic_load ADDRrr:$src)), (LDrr ADDRrr:$src)>; +def : Pat<(i32 (atomic_load ADDRri:$src)), (LDri ADDRri:$src)>; + +// atomic_store_32 val, addr -> store val, addr +def : Pat<(atomic_store ADDRrr:$dst, i32:$val), (STrr ADDRrr:$dst, $val)>; +def : Pat<(atomic_store ADDRri:$dst, i32:$val), (STri ADDRri:$dst, $val)>; + + include "SparcInstr64Bit.td" diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcMCInstLower.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcMCInstLower.cpp new file mode 100644 index 000000000000..f58a83c125ef --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcMCInstLower.cpp @@ -0,0 +1,141 @@ +//===-- SparcMCInstLower.cpp - Convert Sparc MachineInstr to MCInst -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower Sparc MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "Sparc.h" +#include "MCTargetDesc/SparcBaseInfo.h" +#include "MCTargetDesc/SparcMCExpr.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/Mangler.h" +#include "llvm/ADT/SmallString.h" + +using namespace llvm; + + +static MCOperand LowerSymbolOperand(const MachineInstr *MI, + const MachineOperand &MO, + AsmPrinter &AP) { + + SparcMCExpr::VariantKind Kind; + const MCSymbol *Symbol = 0; + + unsigned TF = MO.getTargetFlags(); + + switch(TF) { + default: llvm_unreachable("Unknown target flags on operand"); + case SPII::MO_NO_FLAG: Kind = SparcMCExpr::VK_Sparc_None; break; + case SPII::MO_LO: Kind = SparcMCExpr::VK_Sparc_LO; break; + case SPII::MO_HI: Kind = SparcMCExpr::VK_Sparc_HI; break; + case SPII::MO_H44: Kind = SparcMCExpr::VK_Sparc_H44; break; + case SPII::MO_M44: Kind = SparcMCExpr::VK_Sparc_M44; break; + case SPII::MO_L44: Kind = SparcMCExpr::VK_Sparc_L44; break; + case SPII::MO_HH: Kind = SparcMCExpr::VK_Sparc_HH; break; + case SPII::MO_HM: Kind = SparcMCExpr::VK_Sparc_HM; break; + case SPII::MO_TLS_GD_HI22: Kind = SparcMCExpr::VK_Sparc_TLS_GD_HI22; break; + case SPII::MO_TLS_GD_LO10: Kind = SparcMCExpr::VK_Sparc_TLS_GD_LO10; break; + case SPII::MO_TLS_GD_ADD: Kind = SparcMCExpr::VK_Sparc_TLS_GD_ADD; break; + case SPII::MO_TLS_GD_CALL: Kind = SparcMCExpr::VK_Sparc_TLS_GD_CALL; break; + case SPII::MO_TLS_LDM_HI22: Kind = SparcMCExpr::VK_Sparc_TLS_LDM_HI22; break; + case SPII::MO_TLS_LDM_LO10: Kind = SparcMCExpr::VK_Sparc_TLS_LDM_LO10; break; + case SPII::MO_TLS_LDM_ADD: Kind = SparcMCExpr::VK_Sparc_TLS_LDM_ADD; break; + case SPII::MO_TLS_LDM_CALL: Kind = SparcMCExpr::VK_Sparc_TLS_LDM_CALL; break; + case SPII::MO_TLS_LDO_HIX22:Kind = SparcMCExpr::VK_Sparc_TLS_LDO_HIX22; break; + case SPII::MO_TLS_LDO_LOX10:Kind = SparcMCExpr::VK_Sparc_TLS_LDO_LOX10; break; + case SPII::MO_TLS_LDO_ADD: Kind = SparcMCExpr::VK_Sparc_TLS_LDO_ADD; break; + case SPII::MO_TLS_IE_HI22: Kind = SparcMCExpr::VK_Sparc_TLS_IE_HI22; break; + case SPII::MO_TLS_IE_LO10: Kind = SparcMCExpr::VK_Sparc_TLS_IE_LO10; break; + case SPII::MO_TLS_IE_LD: Kind = SparcMCExpr::VK_Sparc_TLS_IE_LD; break; + case SPII::MO_TLS_IE_LDX: Kind = SparcMCExpr::VK_Sparc_TLS_IE_LDX; break; + case SPII::MO_TLS_IE_ADD: Kind = SparcMCExpr::VK_Sparc_TLS_IE_ADD; break; + case SPII::MO_TLS_LE_HIX22: Kind = SparcMCExpr::VK_Sparc_TLS_LE_HIX22; break; + case SPII::MO_TLS_LE_LOX10: Kind = SparcMCExpr::VK_Sparc_TLS_LE_LOX10; break; + } + + switch(MO.getType()) { + default: llvm_unreachable("Unknown type in LowerSymbolOperand"); + case MachineOperand::MO_MachineBasicBlock: + Symbol = MO.getMBB()->getSymbol(); + break; + + case MachineOperand::MO_GlobalAddress: + Symbol = AP.getSymbol(MO.getGlobal()); + break; + + case MachineOperand::MO_BlockAddress: + Symbol = AP.GetBlockAddressSymbol(MO.getBlockAddress()); + break; + + case MachineOperand::MO_ExternalSymbol: + Symbol = AP.GetExternalSymbolSymbol(MO.getSymbolName()); + break; + + case MachineOperand::MO_ConstantPoolIndex: + Symbol = AP.GetCPISymbol(MO.getIndex()); + break; + } + + const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, + AP.OutContext); + const SparcMCExpr *expr = SparcMCExpr::Create(Kind, MCSym, + AP.OutContext); + return MCOperand::CreateExpr(expr); +} + +static MCOperand LowerOperand(const MachineInstr *MI, + const MachineOperand &MO, + AsmPrinter &AP) { + switch(MO.getType()) { + default: llvm_unreachable("unknown operand type"); break; + case MachineOperand::MO_Register: + if (MO.isImplicit()) + break; + return MCOperand::CreateReg(MO.getReg()); + + case MachineOperand::MO_Immediate: + return MCOperand::CreateImm(MO.getImm()); + + case MachineOperand::MO_MachineBasicBlock: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_BlockAddress: + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_ConstantPoolIndex: + return LowerSymbolOperand(MI, MO, AP); + + case MachineOperand::MO_RegisterMask: break; + + } + return MCOperand(); +} + +void llvm::LowerSparcMachineInstrToMCInst(const MachineInstr *MI, + MCInst &OutMI, + AsmPrinter &AP) +{ + + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + MCOperand MCOp = LowerOperand(MI, MO, AP); + + if (MCOp.isValid()) + OutMI.addOperand(MCOp); + } +} diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcSubtarget.h b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcSubtarget.h index 0f81cc960f82..012aca7d923f 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcSubtarget.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcSubtarget.h @@ -45,15 +45,6 @@ public: void ParseSubtargetFeatures(StringRef CPU, StringRef FS); bool is64Bit() const { return Is64Bit; } - std::string getDataLayout() const { - const char *p; - if (is64Bit()) { - p = "E-p:64:64:64-i64:64:64-f64:64:64-f128:128:128-n32:64"; - } else { - p = "E-p:32:32:32-i64:64:64-f64:64:64-f128:64:64-n32"; - } - return std::string(p); - } /// The 64-bit ABI uses biased stack and frame pointers, so the stack frame /// of the current function is the area from [%sp+BIAS] to [%fp+BIAS]. diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcTargetMachine.cpp index 0f936747cfed..83f3474759b6 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcTargetMachine.cpp @@ -23,6 +23,32 @@ extern "C" void LLVMInitializeSparcTarget() { RegisterTargetMachine Y(TheSparcV9Target); } +static std::string computeDataLayout(const SparcSubtarget &ST) { + // Sparc is big endian. + std::string Ret = "E-m:e"; + + // Some ABIs have 32bit pointers. + if (!ST.is64Bit()) + Ret += "-p:32:32"; + + // Alignments for 64 bit integers. + Ret += "-i64:64"; + + // On SparcV9 128 floats are aligned to 128 bits, on others only to 64. + // On SparcV9 registers can hold 64 or 32 bits, on others only 32. + if (ST.is64Bit()) + Ret += "-n32:64"; + else + Ret += "-f128:64-n32"; + + if (ST.is64Bit()) + Ret += "-S128"; + else + Ret += "-S64"; + + return Ret; +} + /// SparcTargetMachine ctor - Create an ILP32 architecture model /// SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, @@ -33,7 +59,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, bool is64bit) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, is64bit), - DL(Subtarget.getDataLayout()), + DL(computeDataLayout(Subtarget)), InstrInfo(Subtarget), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget) { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcTargetStreamer.h b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcTargetStreamer.h new file mode 100644 index 000000000000..73339acf8dfa --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/SparcTargetStreamer.h @@ -0,0 +1,47 @@ +//===-- SparcTargetStreamer.h - Sparc Target Streamer ----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef SPARCTARGETSTREAMER_H +#define SPARCTARGETSTREAMER_H + +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCStreamer.h" + +namespace llvm { +class SparcTargetStreamer : public MCTargetStreamer { + virtual void anchor(); + +public: + /// Emit ".register , #ignore". + virtual void emitSparcRegisterIgnore(unsigned reg) = 0; + /// Emit ".register , #scratch". + virtual void emitSparcRegisterScratch(unsigned reg) = 0; +}; + +// This part is for ascii assembly output +class SparcTargetAsmStreamer : public SparcTargetStreamer { + formatted_raw_ostream &OS; + +public: + SparcTargetAsmStreamer(formatted_raw_ostream &OS); + virtual void emitSparcRegisterIgnore(unsigned reg); + virtual void emitSparcRegisterScratch(unsigned reg); + +}; + +// This part is for ELF object output +class SparcTargetELFStreamer : public SparcTargetStreamer { +public: + MCELFStreamer &getStreamer(); + virtual void emitSparcRegisterIgnore(unsigned reg) {} + virtual void emitSparcRegisterScratch(unsigned reg) {} +}; +} // end namespace llvm + +#endif diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/TargetInfo/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/TargetInfo/CMakeLists.txt index b0d031e0c2be..9633e03a3847 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/TargetInfo/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/TargetInfo/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMSparcInfo SparcTargetInfo.cpp ) - -add_dependencies(LLVMSparcInfo SparcCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/TargetInfo/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/TargetInfo/LLVMBuild.txt index b5c320f92553..e992d3e81f18 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Sparc/TargetInfo/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/Sparc/TargetInfo/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = SparcInfo parent = Sparc -required_libraries = MC Support Target +required_libraries = Support add_to_library_groups = Sparc diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/AsmParser/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/AsmParser/CMakeLists.txt index 78a57146e1cb..ad19a565030b 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/AsmParser/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/AsmParser/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMSystemZAsmParser SystemZAsmParser.cpp ) - -add_dependencies(LLVMSystemZAsmParser SystemZCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/CMakeLists.txt index d21c0a8086fb..4da2d0f2dd59 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/CMakeLists.txt @@ -30,8 +30,6 @@ add_llvm_target(SystemZCodeGen SystemZTargetMachine.cpp ) -add_dependencies(LLVMSystemZCodeGen SystemZCommonTableGen intrinsics_gen) - add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/Disassembler/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/Disassembler/CMakeLists.txt index 5bc1859816fa..4b94bcd1f5f1 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/Disassembler/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/Disassembler/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMSystemZDisassembler SystemZDisassembler.cpp ) - -add_dependencies(LLVMSystemZDisassembler SystemZCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/InstPrinter/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/InstPrinter/CMakeLists.txt index ddbf82fe16f6..21b023ce1f7b 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/InstPrinter/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/InstPrinter/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMSystemZAsmPrinter SystemZInstPrinter.cpp ) - -add_dependencies(LLVMSystemZAsmPrinter SystemZCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/LLVMBuild.txt index 95e657f7bd15..7781318cc164 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/LLVMBuild.txt @@ -31,5 +31,5 @@ has_jit = 1 type = Library name = SystemZCodeGen parent = SystemZ -required_libraries = AsmPrinter CodeGen Core MC SelectionDAG SystemZDesc SystemZInfo Support Target +required_libraries = AsmPrinter CodeGen Core MC Scalar SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target add_to_library_groups = SystemZ diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt index 3d131288c764..1aa8c76e8bc6 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt @@ -5,5 +5,3 @@ add_llvm_library(LLVMSystemZDesc SystemZMCObjectWriter.cpp SystemZMCTargetDesc.cpp ) - -add_dependencies(LLVMSystemZDesc SystemZCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp index 965c41e2d151..c46a36bdd23d 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp @@ -19,8 +19,6 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(StringRef TT) { IsLittleEndian = false; CommentString = "#"; - GlobalPrefix = ""; - PrivateGlobalPrefix = ".L"; ZeroDirective = "\t.space\t"; Data64bitsDirective = "\t.quad\t"; UsesELFSectionDirectiveForBSS = true; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/README.txt b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/README.txt index afa6cf090d07..e089047d013e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/README.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/README.txt @@ -166,13 +166,6 @@ See CodeGen/SystemZ/alloca-01.ll for an example. -- -Atomic loads and stores use the default compare-and-swap based implementation. -This is much too conservative in practice, since the architecture guarantees -that 1-, 2-, 4- and 8-byte loads and stores to aligned addresses are -inherently atomic. - --- - If needed, we can support 16-byte atomics using LPQ, STPQ and CSDG. -- diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZ.td b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZ.td index abf5c8eb320c..5f829034902f 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZ.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZ.td @@ -52,15 +52,6 @@ def SystemZAsmParser : AsmParser { let ShouldEmitMatchRegisterName = 0; } -//===----------------------------------------------------------------------===// -// Assembly writer -//===----------------------------------------------------------------------===// - -def SystemZAsmWriter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - bit isMCAsmWriter = 1; -} - //===----------------------------------------------------------------------===// // Top-level target declaration //===----------------------------------------------------------------------===// @@ -68,5 +59,4 @@ def SystemZAsmWriter : AsmWriter { def SystemZ : Target { let InstructionSet = SystemZInstrInfo; let AssemblyParsers = [SystemZAsmParser]; - let AssemblyWriters = [SystemZAsmWriter]; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 75cbda4958a2..0960b0dbe7af 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -151,6 +151,15 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { #undef LOWER_HIGH + case SystemZ::Serialize: + if (Subtarget->hasFastSerialization()) + LoweredMI = MCInstBuilder(SystemZ::AsmBCR) + .addImm(14).addReg(SystemZ::R0D); + else + LoweredMI = MCInstBuilder(SystemZ::AsmBCR) + .addImm(15).addReg(SystemZ::R0D); + break; + default: Lower.lower(MI, LoweredMI); break; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index f4a27733ce0e..d95361eed159 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -764,9 +764,22 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const { return true; } - case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: { + case ISD::ANY_EXTEND: + // Bits above the extended operand are don't-care. + RxSBG.Input = N.getOperand(0); + return true; + + case ISD::ZERO_EXTEND: { + // Restrict the mask to the extended operand. + unsigned InnerBitSize = N.getOperand(0).getValueType().getSizeInBits(); + if (!refineRxSBGMask(RxSBG, allOnes(InnerBitSize))) + return false; + + RxSBG.Input = N.getOperand(0); + return true; + } + + case ISD::SIGN_EXTEND: { // Check that the extension bits are don't-care (i.e. are masked out // by the final mask). unsigned InnerBitSize = N.getOperand(0).getValueType().getSizeInBits(); @@ -1064,6 +1077,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { case ISD::ROTL: case ISD::SHL: case ISD::SRL: + case ISD::ZERO_EXTEND: if (!ResNode) ResNode = tryRISBGZero(Node); break; @@ -1079,20 +1093,6 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { } break; - case ISD::ATOMIC_LOAD_SUB: - // Try to convert subtractions of constants to additions. - if (ConstantSDNode *Op2 = dyn_cast(Node->getOperand(2))) { - uint64_t Value = -Op2->getZExtValue(); - EVT VT = Node->getValueType(0); - if (VT == MVT::i32 || isInt<32>(Value)) { - SDValue Ops[] = { Node->getOperand(0), Node->getOperand(1), - CurDAG->getConstant(int32_t(Value), VT) }; - Node = CurDAG->MorphNodeTo(Node, ISD::ATOMIC_LOAD_ADD, - Node->getVTList(), Ops, array_lengthof(Ops)); - } - } - break; - case SystemZISD::SELECT_CCMASK: { SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index f6e18530f4a5..c22089d91911 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -38,6 +38,27 @@ struct IPMConversion { int64_t AddValue; unsigned Bit; }; + +// Represents information about a comparison. +struct Comparison { + Comparison(SDValue Op0In, SDValue Op1In) + : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} + + // The operands to the comparison. + SDValue Op0, Op1; + + // The opcode that should be used to compare Op0 and Op1. + unsigned Opcode; + + // A SystemZICMP value. Only used for integer comparisons. + unsigned ICmpType; + + // The mask of CC values that Opcode can produce. + unsigned CCValid; + + // The mask of CC values for which the original condition is true. + unsigned CCMask; +}; } // Classify VT as either 32 or 64 bit. @@ -134,10 +155,14 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) setOperationAction(ISD::SDIVREM, VT, Custom); setOperationAction(ISD::UDIVREM, VT, Custom); - // Expand ATOMIC_LOAD and ATOMIC_STORE using ATOMIC_CMP_SWAP. - // FIXME: probably much too conservative. - setOperationAction(ISD::ATOMIC_LOAD, VT, Expand); - setOperationAction(ISD::ATOMIC_STORE, VT, Expand); + // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and + // stores, putting a serialization instruction after the stores. + setOperationAction(ISD::ATOMIC_LOAD, VT, Custom); + setOperationAction(ISD::ATOMIC_STORE, VT, Custom); + + // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are + // available, or if the operand is constant. + setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); // No special instructions for these. setOperationAction(ISD::CTPOP, VT, Expand); @@ -970,6 +995,11 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, RetOps.data(), RetOps.size()); } +SDValue SystemZTargetLowering:: +prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const { + return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain); +} + // CC is a comparison that will be implemented using an integer or // floating-point comparison. Return the condition code mask for // a branch on true. In the integer case, CCMASK_CMP_UO is set for @@ -1065,103 +1095,94 @@ static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) { llvm_unreachable("Unexpected CC combination"); } -// If a comparison described by IsUnsigned, CCMask, CmpOp0 and CmpOp1 -// can be converted to a comparison against zero, adjust the operands +// If C can be converted to a comparison against zero, adjust the operands // as necessary. -static void adjustZeroCmp(SelectionDAG &DAG, bool &IsUnsigned, - SDValue &CmpOp0, SDValue &CmpOp1, - unsigned &CCMask) { - if (IsUnsigned) +static void adjustZeroCmp(SelectionDAG &DAG, Comparison &C) { + if (C.ICmpType == SystemZICMP::UnsignedOnly) return; - ConstantSDNode *ConstOp1 = dyn_cast(CmpOp1.getNode()); + ConstantSDNode *ConstOp1 = dyn_cast(C.Op1.getNode()); if (!ConstOp1) return; int64_t Value = ConstOp1->getSExtValue(); - if ((Value == -1 && CCMask == SystemZ::CCMASK_CMP_GT) || - (Value == -1 && CCMask == SystemZ::CCMASK_CMP_LE) || - (Value == 1 && CCMask == SystemZ::CCMASK_CMP_LT) || - (Value == 1 && CCMask == SystemZ::CCMASK_CMP_GE)) { - CCMask ^= SystemZ::CCMASK_CMP_EQ; - CmpOp1 = DAG.getConstant(0, CmpOp1.getValueType()); + if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) || + (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) || + (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) || + (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) { + C.CCMask ^= SystemZ::CCMASK_CMP_EQ; + C.Op1 = DAG.getConstant(0, C.Op1.getValueType()); } } -// If a comparison described by IsUnsigned, CCMask, CmpOp0 and CmpOp1 -// is suitable for CLI(Y), CHHSI or CLHHSI, adjust the operands as necessary. -static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned, - SDValue &CmpOp0, SDValue &CmpOp1, - unsigned &CCMask) { +// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI, +// adjust the operands as necessary. +static void adjustSubwordCmp(SelectionDAG &DAG, Comparison &C) { // For us to make any changes, it must a comparison between a single-use // load and a constant. - if (!CmpOp0.hasOneUse() || - CmpOp0.getOpcode() != ISD::LOAD || - CmpOp1.getOpcode() != ISD::Constant) + if (!C.Op0.hasOneUse() || + C.Op0.getOpcode() != ISD::LOAD || + C.Op1.getOpcode() != ISD::Constant) return; // We must have an 8- or 16-bit load. - LoadSDNode *Load = cast(CmpOp0); + LoadSDNode *Load = cast(C.Op0); unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits(); if (NumBits != 8 && NumBits != 16) return; // The load must be an extending one and the constant must be within the // range of the unextended value. - ConstantSDNode *Constant = cast(CmpOp1); - uint64_t Value = Constant->getZExtValue(); + ConstantSDNode *ConstOp1 = cast(C.Op1); + uint64_t Value = ConstOp1->getZExtValue(); uint64_t Mask = (1 << NumBits) - 1; if (Load->getExtensionType() == ISD::SEXTLOAD) { - int64_t SignedValue = Constant->getSExtValue(); - if (uint64_t(SignedValue) + (1ULL << (NumBits - 1)) > Mask) + // Make sure that ConstOp1 is in range of C.Op0. + int64_t SignedValue = ConstOp1->getSExtValue(); + if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask) return; - // Unsigned comparison between two sign-extended values is equivalent - // to unsigned comparison between two zero-extended values. - if (IsUnsigned) + if (C.ICmpType != SystemZICMP::SignedOnly) { + // Unsigned comparison between two sign-extended values is equivalent + // to unsigned comparison between two zero-extended values. Value &= Mask; - else if (CCMask == SystemZ::CCMASK_CMP_EQ || - CCMask == SystemZ::CCMASK_CMP_NE) - // Any choice of IsUnsigned is OK for equality comparisons. - // We could use either CHHSI or CLHHSI for 16-bit comparisons, - // but since we use CLHHSI for zero extensions, it seems better - // to be consistent and do the same here. - Value &= Mask, IsUnsigned = true; - else if (NumBits == 8) { + } else if (NumBits == 8) { // Try to treat the comparison as unsigned, so that we can use CLI. // Adjust CCMask and Value as necessary. - if (Value == 0 && CCMask == SystemZ::CCMASK_CMP_LT) + if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT) // Test whether the high bit of the byte is set. - Value = 127, CCMask = SystemZ::CCMASK_CMP_GT, IsUnsigned = true; - else if (Value == 0 && CCMask == SystemZ::CCMASK_CMP_GE) + Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT; + else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE) // Test whether the high bit of the byte is clear. - Value = 128, CCMask = SystemZ::CCMASK_CMP_LT, IsUnsigned = true; + Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT; else // No instruction exists for this combination. return; + C.ICmpType = SystemZICMP::UnsignedOnly; } } else if (Load->getExtensionType() == ISD::ZEXTLOAD) { if (Value > Mask) return; - // Signed comparison between two zero-extended values is equivalent - // to unsigned comparison. - IsUnsigned = true; + assert(C.ICmpType == SystemZICMP::Any && + "Signedness shouldn't matter here."); } else return; // Make sure that the first operand is an i32 of the right extension type. - ISD::LoadExtType ExtType = IsUnsigned ? ISD::ZEXTLOAD : ISD::SEXTLOAD; - if (CmpOp0.getValueType() != MVT::i32 || + ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ? + ISD::SEXTLOAD : + ISD::ZEXTLOAD); + if (C.Op0.getValueType() != MVT::i32 || Load->getExtensionType() != ExtType) - CmpOp0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, - Load->getChain(), Load->getBasePtr(), - Load->getPointerInfo(), Load->getMemoryVT(), - Load->isVolatile(), Load->isNonTemporal(), - Load->getAlignment()); + C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, + Load->getChain(), Load->getBasePtr(), + Load->getPointerInfo(), Load->getMemoryVT(), + Load->isVolatile(), Load->isNonTemporal(), + Load->getAlignment()); // Make sure that the second operand is an i32 with the right value. - if (CmpOp1.getValueType() != MVT::i32 || - Value != Constant->getZExtValue()) - CmpOp1 = DAG.getConstant(Value, MVT::i32); + if (C.Op1.getValueType() != MVT::i32 || + Value != ConstOp1->getZExtValue()) + C.Op1 = DAG.getConstant(Value, MVT::i32); } // Return true if Op is either an unextended load, or a load suitable @@ -1187,49 +1208,162 @@ static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) { return false; } -// Return true if it is better to swap comparison operands Op0 and Op1. -// ICmpType is the type of an integer comparison. -static bool shouldSwapCmpOperands(SDValue Op0, SDValue Op1, - unsigned ICmpType) { +// Return true if it is better to swap the operands of C. +static bool shouldSwapCmpOperands(const Comparison &C) { // Leave f128 comparisons alone, since they have no memory forms. - if (Op0.getValueType() == MVT::f128) + if (C.Op0.getValueType() == MVT::f128) return false; // Always keep a floating-point constant second, since comparisons with // zero can use LOAD TEST and comparisons with other constants make a // natural memory operand. - if (isa(Op1)) + if (isa(C.Op1)) return false; // Never swap comparisons with zero since there are many ways to optimize // those later. - ConstantSDNode *COp1 = dyn_cast(Op1); - if (COp1 && COp1->getZExtValue() == 0) + ConstantSDNode *ConstOp1 = dyn_cast(C.Op1); + if (ConstOp1 && ConstOp1->getZExtValue() == 0) + return false; + + // Also keep natural memory operands second if the loaded value is + // only used here. Several comparisons have memory forms. + if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse()) return false; // Look for cases where Cmp0 is a single-use load and Cmp1 isn't. // In that case we generally prefer the memory to be second. - if ((isNaturalMemoryOperand(Op0, ICmpType) && Op0.hasOneUse()) && - !(isNaturalMemoryOperand(Op1, ICmpType) && Op1.hasOneUse())) { + if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) { // The only exceptions are when the second operand is a constant and // we can use things like CHHSI. - if (!COp1) + if (!ConstOp1) return true; // The unsigned memory-immediate instructions can handle 16-bit // unsigned integers. - if (ICmpType != SystemZICMP::SignedOnly && - isUInt<16>(COp1->getZExtValue())) + if (C.ICmpType != SystemZICMP::SignedOnly && + isUInt<16>(ConstOp1->getZExtValue())) return false; // The signed memory-immediate instructions can handle 16-bit // signed integers. - if (ICmpType != SystemZICMP::UnsignedOnly && - isInt<16>(COp1->getSExtValue())) + if (C.ICmpType != SystemZICMP::UnsignedOnly && + isInt<16>(ConstOp1->getSExtValue())) return false; return true; } + + // Try to promote the use of CGFR and CLGFR. + unsigned Opcode0 = C.Op0.getOpcode(); + if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND) + return true; + if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND) + return true; + if (C.ICmpType != SystemZICMP::SignedOnly && + Opcode0 == ISD::AND && + C.Op0.getOperand(1).getOpcode() == ISD::Constant && + cast(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff) + return true; + return false; } +// Return a version of comparison CC mask CCMask in which the LT and GT +// actions are swapped. +static unsigned reverseCCMask(unsigned CCMask) { + return ((CCMask & SystemZ::CCMASK_CMP_EQ) | + (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) | + (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) | + (CCMask & SystemZ::CCMASK_CMP_UO)); +} + +// Check whether C tests for equality between X and Y and whether X - Y +// or Y - X is also computed. In that case it's better to compare the +// result of the subtraction against zero. +static void adjustForSubtraction(SelectionDAG &DAG, Comparison &C) { + if (C.CCMask == SystemZ::CCMASK_CMP_EQ || + C.CCMask == SystemZ::CCMASK_CMP_NE) { + for (SDNode::use_iterator I = C.Op0->use_begin(), E = C.Op0->use_end(); + I != E; ++I) { + SDNode *N = *I; + if (N->getOpcode() == ISD::SUB && + ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) || + (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) { + C.Op0 = SDValue(N, 0); + C.Op1 = DAG.getConstant(0, N->getValueType(0)); + return; + } + } + } +} + +// Check whether C compares a floating-point value with zero and if that +// floating-point value is also negated. In this case we can use the +// negation to set CC, so avoiding separate LOAD AND TEST and +// LOAD (NEGATIVE/COMPLEMENT) instructions. +static void adjustForFNeg(Comparison &C) { + ConstantFPSDNode *C1 = dyn_cast(C.Op1); + if (C1 && C1->isZero()) { + for (SDNode::use_iterator I = C.Op0->use_begin(), E = C.Op0->use_end(); + I != E; ++I) { + SDNode *N = *I; + if (N->getOpcode() == ISD::FNEG) { + C.Op0 = SDValue(N, 0); + C.CCMask = reverseCCMask(C.CCMask); + return; + } + } + } +} + +// Check whether C compares (shl X, 32) with 0 and whether X is +// also sign-extended. In that case it is better to test the result +// of the sign extension using LTGFR. +// +// This case is important because InstCombine transforms a comparison +// with (sext (trunc X)) into a comparison with (shl X, 32). +static void adjustForLTGFR(Comparison &C) { + // Check for a comparison between (shl X, 32) and 0. + if (C.Op0.getOpcode() == ISD::SHL && + C.Op0.getValueType() == MVT::i64 && + C.Op1.getOpcode() == ISD::Constant && + cast(C.Op1)->getZExtValue() == 0) { + ConstantSDNode *C1 = dyn_cast(C.Op0.getOperand(1)); + if (C1 && C1->getZExtValue() == 32) { + SDValue ShlOp0 = C.Op0.getOperand(0); + // See whether X has any SIGN_EXTEND_INREG uses. + for (SDNode::use_iterator I = ShlOp0->use_begin(), E = ShlOp0->use_end(); + I != E; ++I) { + SDNode *N = *I; + if (N->getOpcode() == ISD::SIGN_EXTEND_INREG && + cast(N->getOperand(1))->getVT() == MVT::i32) { + C.Op0 = SDValue(N, 0); + return; + } + } + } + } +} + +// If C compares the truncation of an extending load, try to compare +// the untruncated value instead. This exposes more opportunities to +// reuse CC. +static void adjustICmpTruncate(SelectionDAG &DAG, Comparison &C) { + if (C.Op0.getOpcode() == ISD::TRUNCATE && + C.Op0.getOperand(0).getOpcode() == ISD::LOAD && + C.Op1.getOpcode() == ISD::Constant && + cast(C.Op1)->getZExtValue() == 0) { + LoadSDNode *L = cast(C.Op0.getOperand(0)); + if (L->getMemoryVT().getStoreSizeInBits() + <= C.Op0.getValueType().getSizeInBits()) { + unsigned Type = L->getExtensionType(); + if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) || + (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) { + C.Op0 = C.Op0.getOperand(0); + C.Op1 = DAG.getConstant(0, C.Op0.getValueType()); + } + } + } +} + // Return true if shift operation N has an in-range constant shift value. // Store it in ShiftVal if so. static bool isSimpleShift(SDValue N, unsigned &ShiftVal) { @@ -1341,118 +1475,143 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, return 0; } -// See whether the comparison (Opcode CmpOp0, CmpOp1, ICmpType) can be -// implemented as a TEST UNDER MASK instruction when the condition being -// tested is as described by CCValid and CCMask. Update the arguments -// with the TM version if so. -static void adjustForTestUnderMask(SelectionDAG &DAG, unsigned &Opcode, - SDValue &CmpOp0, SDValue &CmpOp1, - unsigned &CCValid, unsigned &CCMask, - unsigned &ICmpType) { +// See whether C can be implemented as a TEST UNDER MASK instruction. +// Update the arguments with the TM version if so. +static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) { // Check that we have a comparison with a constant. - ConstantSDNode *ConstCmpOp1 = dyn_cast(CmpOp1); - if (!ConstCmpOp1) + ConstantSDNode *ConstOp1 = dyn_cast(C.Op1); + if (!ConstOp1) return; - uint64_t CmpVal = ConstCmpOp1->getZExtValue(); + uint64_t CmpVal = ConstOp1->getZExtValue(); // Check whether the nonconstant input is an AND with a constant mask. - if (CmpOp0.getOpcode() != ISD::AND) - return; - SDValue AndOp0 = CmpOp0.getOperand(0); - SDValue AndOp1 = CmpOp0.getOperand(1); - ConstantSDNode *Mask = dyn_cast(AndOp1.getNode()); - if (!Mask) - return; - uint64_t MaskVal = Mask->getZExtValue(); + Comparison NewC(C); + uint64_t MaskVal; + ConstantSDNode *Mask = 0; + if (C.Op0.getOpcode() == ISD::AND) { + NewC.Op0 = C.Op0.getOperand(0); + NewC.Op1 = C.Op0.getOperand(1); + Mask = dyn_cast(NewC.Op1); + if (!Mask) + return; + MaskVal = Mask->getZExtValue(); + } else { + // There is no instruction to compare with a 64-bit immediate + // so use TMHH instead if possible. We need an unsigned ordered + // comparison with an i64 immediate. + if (NewC.Op0.getValueType() != MVT::i64 || + NewC.CCMask == SystemZ::CCMASK_CMP_EQ || + NewC.CCMask == SystemZ::CCMASK_CMP_NE || + NewC.ICmpType == SystemZICMP::SignedOnly) + return; + // Convert LE and GT comparisons into LT and GE. + if (NewC.CCMask == SystemZ::CCMASK_CMP_LE || + NewC.CCMask == SystemZ::CCMASK_CMP_GT) { + if (CmpVal == uint64_t(-1)) + return; + CmpVal += 1; + NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ; + } + // If the low N bits of Op1 are zero than the low N bits of Op0 can + // be masked off without changing the result. + MaskVal = -(CmpVal & -CmpVal); + NewC.ICmpType = SystemZICMP::UnsignedOnly; + } // Check whether the combination of mask, comparison value and comparison // type are suitable. - unsigned BitSize = CmpOp0.getValueType().getSizeInBits(); + unsigned BitSize = NewC.Op0.getValueType().getSizeInBits(); unsigned NewCCMask, ShiftVal; - if (ICmpType != SystemZICMP::SignedOnly && - AndOp0.getOpcode() == ISD::SHL && - isSimpleShift(AndOp0, ShiftVal) && - (NewCCMask = getTestUnderMaskCond(BitSize, CCMask, MaskVal >> ShiftVal, + if (NewC.ICmpType != SystemZICMP::SignedOnly && + NewC.Op0.getOpcode() == ISD::SHL && + isSimpleShift(NewC.Op0, ShiftVal) && + (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, + MaskVal >> ShiftVal, CmpVal >> ShiftVal, SystemZICMP::Any))) { - AndOp0 = AndOp0.getOperand(0); - AndOp1 = DAG.getConstant(MaskVal >> ShiftVal, AndOp0.getValueType()); - } else if (ICmpType != SystemZICMP::SignedOnly && - AndOp0.getOpcode() == ISD::SRL && - isSimpleShift(AndOp0, ShiftVal) && - (NewCCMask = getTestUnderMaskCond(BitSize, CCMask, + NewC.Op0 = NewC.Op0.getOperand(0); + MaskVal >>= ShiftVal; + } else if (NewC.ICmpType != SystemZICMP::SignedOnly && + NewC.Op0.getOpcode() == ISD::SRL && + isSimpleShift(NewC.Op0, ShiftVal) && + (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal << ShiftVal, CmpVal << ShiftVal, SystemZICMP::UnsignedOnly))) { - AndOp0 = AndOp0.getOperand(0); - AndOp1 = DAG.getConstant(MaskVal << ShiftVal, AndOp0.getValueType()); + NewC.Op0 = NewC.Op0.getOperand(0); + MaskVal <<= ShiftVal; } else { - NewCCMask = getTestUnderMaskCond(BitSize, CCMask, MaskVal, CmpVal, - ICmpType); + NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal, + NewC.ICmpType); if (!NewCCMask) return; } // Go ahead and make the change. - Opcode = SystemZISD::TM; - CmpOp0 = AndOp0; - CmpOp1 = AndOp1; - ICmpType = (bool(NewCCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != - bool(NewCCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); - CCValid = SystemZ::CCMASK_TM; - CCMask = NewCCMask; + C.Opcode = SystemZISD::TM; + C.Op0 = NewC.Op0; + if (Mask && Mask->getZExtValue() == MaskVal) + C.Op1 = SDValue(Mask, 0); + else + C.Op1 = DAG.getConstant(MaskVal, C.Op0.getValueType()); + C.CCValid = SystemZ::CCMASK_TM; + C.CCMask = NewCCMask; } -// Return a target node that compares CmpOp0 with CmpOp1 and stores a -// 2-bit result in CC. Set CCValid to the CCMASK_* of all possible -// 2-bit results and CCMask to the subset of those results that are -// associated with Cond. -static SDValue emitCmp(const SystemZTargetMachine &TM, SelectionDAG &DAG, - SDLoc DL, SDValue CmpOp0, SDValue CmpOp1, - ISD::CondCode Cond, unsigned &CCValid, - unsigned &CCMask) { - bool IsUnsigned = false; - CCMask = CCMaskForCondCode(Cond); - unsigned Opcode, ICmpType = 0; - if (CmpOp0.getValueType().isFloatingPoint()) { - CCValid = SystemZ::CCMASK_FCMP; - Opcode = SystemZISD::FCMP; +// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. +static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, + ISD::CondCode Cond) { + Comparison C(CmpOp0, CmpOp1); + C.CCMask = CCMaskForCondCode(Cond); + if (C.Op0.getValueType().isFloatingPoint()) { + C.CCValid = SystemZ::CCMASK_FCMP; + C.Opcode = SystemZISD::FCMP; + adjustForFNeg(C); } else { - IsUnsigned = CCMask & SystemZ::CCMASK_CMP_UO; - CCValid = SystemZ::CCMASK_ICMP; - CCMask &= CCValid; - adjustZeroCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask); - adjustSubwordCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask); - Opcode = SystemZISD::ICMP; + C.CCValid = SystemZ::CCMASK_ICMP; + C.Opcode = SystemZISD::ICMP; // Choose the type of comparison. Equality and inequality tests can // use either signed or unsigned comparisons. The choice also doesn't // matter if both sign bits are known to be clear. In those cases we // want to give the main isel code the freedom to choose whichever // form fits best. - if (CCMask == SystemZ::CCMASK_CMP_EQ || - CCMask == SystemZ::CCMASK_CMP_NE || - (DAG.SignBitIsZero(CmpOp0) && DAG.SignBitIsZero(CmpOp1))) - ICmpType = SystemZICMP::Any; - else if (IsUnsigned) - ICmpType = SystemZICMP::UnsignedOnly; + if (C.CCMask == SystemZ::CCMASK_CMP_EQ || + C.CCMask == SystemZ::CCMASK_CMP_NE || + (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1))) + C.ICmpType = SystemZICMP::Any; + else if (C.CCMask & SystemZ::CCMASK_CMP_UO) + C.ICmpType = SystemZICMP::UnsignedOnly; else - ICmpType = SystemZICMP::SignedOnly; + C.ICmpType = SystemZICMP::SignedOnly; + C.CCMask &= ~SystemZ::CCMASK_CMP_UO; + adjustZeroCmp(DAG, C); + adjustSubwordCmp(DAG, C); + adjustForSubtraction(DAG, C); + adjustForLTGFR(C); + adjustICmpTruncate(DAG, C); } - if (shouldSwapCmpOperands(CmpOp0, CmpOp1, ICmpType)) { - std::swap(CmpOp0, CmpOp1); - CCMask = ((CCMask & SystemZ::CCMASK_CMP_EQ) | - (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) | - (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) | - (CCMask & SystemZ::CCMASK_CMP_UO)); + if (shouldSwapCmpOperands(C)) { + std::swap(C.Op0, C.Op1); + C.CCMask = reverseCCMask(C.CCMask); } - adjustForTestUnderMask(DAG, Opcode, CmpOp0, CmpOp1, CCValid, CCMask, - ICmpType); - if (Opcode == SystemZISD::ICMP || Opcode == SystemZISD::TM) - return DAG.getNode(Opcode, DL, MVT::Glue, CmpOp0, CmpOp1, - DAG.getConstant(ICmpType, MVT::i32)); - return DAG.getNode(Opcode, DL, MVT::Glue, CmpOp0, CmpOp1); + adjustForTestUnderMask(DAG, C); + return C; +} + +// Emit the comparison instruction described by C. +static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { + if (C.Opcode == SystemZISD::ICMP) + return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1, + DAG.getConstant(C.ICmpType, MVT::i32)); + if (C.Opcode == SystemZISD::TM) { + bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != + bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); + return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1, + DAG.getConstant(RegisterOnly, MVT::i32)); + } + return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1); } // Implement a 32-bit *MUL_LOHI operation by extending both operands to @@ -1486,16 +1645,11 @@ static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT, Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result); } -SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, - SelectionDAG &DAG) const { - SDValue CmpOp0 = Op.getOperand(0); - SDValue CmpOp1 = Op.getOperand(1); - ISD::CondCode CC = cast(Op.getOperand(2))->get(); - SDLoc DL(Op); - - unsigned CCValid, CCMask; - SDValue Glue = emitCmp(TM, DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask); - +// Return an i32 value that is 1 if the CC value produced by Glue is +// in the mask CCMask and 0 otherwise. CC is known to have a value +// in CCValid, so other values can be ignored. +static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue, + unsigned CCValid, unsigned CCMask) { IPMConversion Conversion = getIPMConversion(CCValid, CCMask); SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); @@ -1516,6 +1670,18 @@ SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, return Result; } +SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, + SelectionDAG &DAG) const { + SDValue CmpOp0 = Op.getOperand(0); + SDValue CmpOp1 = Op.getOperand(1); + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + SDLoc DL(Op); + + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); + SDValue Glue = emitCmp(DAG, DL, C); + return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask); +} + SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast(Op.getOperand(1))->get(); @@ -1524,11 +1690,33 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Dest = Op.getOperand(4); SDLoc DL(Op); - unsigned CCValid, CCMask; - SDValue Flags = emitCmp(TM, DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask); + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); + SDValue Glue = emitCmp(DAG, DL, C); return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), - Chain, DAG.getConstant(CCValid, MVT::i32), - DAG.getConstant(CCMask, MVT::i32), Dest, Flags); + Chain, DAG.getConstant(C.CCValid, MVT::i32), + DAG.getConstant(C.CCMask, MVT::i32), Dest, Glue); +} + +// Return true if Pos is CmpOp and Neg is the negative of CmpOp, +// allowing Pos and Neg to be wider than CmpOp. +static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) { + return (Neg.getOpcode() == ISD::SUB && + Neg.getOperand(0).getOpcode() == ISD::Constant && + cast(Neg.getOperand(0))->getZExtValue() == 0 && + Neg.getOperand(1) == Pos && + (Pos == CmpOp || + (Pos.getOpcode() == ISD::SIGN_EXTEND && + Pos.getOperand(0) == CmpOp))); +} + +// Return the absolute or negative absolute of Op; IsNegative decides which. +static SDValue getAbsolute(SelectionDAG &DAG, SDLoc DL, SDValue Op, + bool IsNegative) { + Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op); + if (IsNegative) + Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(), + DAG.getConstant(0, Op.getValueType()), Op); + return Op; } SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, @@ -1540,15 +1728,53 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, ISD::CondCode CC = cast(Op.getOperand(4))->get(); SDLoc DL(Op); - unsigned CCValid, CCMask; - SDValue Flags = emitCmp(TM, DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask); + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); + + // Check for absolute and negative-absolute selections, including those + // where the comparison value is sign-extended (for LPGFR and LNGFR). + // This check supplements the one in DAGCombiner. + if (C.Opcode == SystemZISD::ICMP && + C.CCMask != SystemZ::CCMASK_CMP_EQ && + C.CCMask != SystemZ::CCMASK_CMP_NE && + C.Op1.getOpcode() == ISD::Constant && + cast(C.Op1)->getZExtValue() == 0) { + if (isAbsolute(C.Op0, TrueOp, FalseOp)) + return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT); + if (isAbsolute(C.Op0, FalseOp, TrueOp)) + return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT); + } + + SDValue Glue = emitCmp(DAG, DL, C); + + // Special case for handling -1/0 results. The shifts we use here + // should get optimized with the IPM conversion sequence. + ConstantSDNode *TrueC = dyn_cast(TrueOp); + ConstantSDNode *FalseC = dyn_cast(FalseOp); + if (TrueC && FalseC) { + int64_t TrueVal = TrueC->getSExtValue(); + int64_t FalseVal = FalseC->getSExtValue(); + if ((TrueVal == -1 && FalseVal == 0) || (TrueVal == 0 && FalseVal == -1)) { + // Invert the condition if we want -1 on false. + if (TrueVal == 0) + C.CCMask ^= C.CCValid; + SDValue Result = emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask); + EVT VT = Op.getValueType(); + // Extend the result to VT. Upper bits are ignored. + if (!is32Bit(VT)) + Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result); + // Sign-extend from the low bit. + SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, MVT::i32); + SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt); + return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt); + } + } SmallVector Ops; Ops.push_back(TrueOp); Ops.push_back(FalseOp); - Ops.push_back(DAG.getConstant(CCValid, MVT::i32)); - Ops.push_back(DAG.getConstant(CCMask, MVT::i32)); - Ops.push_back(Flags); + Ops.push_back(DAG.getConstant(C.CCValid, MVT::i32)); + Ops.push_back(DAG.getConstant(C.CCMask, MVT::i32)); + Ops.push_back(Glue); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size()); @@ -1949,11 +2175,32 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { MVT::i64, HighOp, Low32); } +// Op is an atomic load. Lower it into a normal volatile load. +SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, + SelectionDAG &DAG) const { + AtomicSDNode *Node = cast(Op.getNode()); + return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(), + Node->getChain(), Node->getBasePtr(), + Node->getMemoryVT(), Node->getMemOperand()); +} + +// Op is an atomic store. Lower it into a normal volatile store followed +// by a serialization. +SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op, + SelectionDAG &DAG) const { + AtomicSDNode *Node = cast(Op.getNode()); + SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(), + Node->getBasePtr(), Node->getMemoryVT(), + Node->getMemOperand()); + return SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), MVT::Other, + Chain), 0); +} + // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first // two into the fullword ATOMIC_LOADW_* operation given by Opcode. -SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, - SelectionDAG &DAG, - unsigned Opcode) const { +SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, + SelectionDAG &DAG, + unsigned Opcode) const { AtomicSDNode *Node = cast(Op.getNode()); // 32-bit operations need no code outside the main loop. @@ -2023,6 +2270,44 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, return DAG.getMergeValues(RetOps, 2, DL); } +// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations +// into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit +// operations into additions. +SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, + SelectionDAG &DAG) const { + AtomicSDNode *Node = cast(Op.getNode()); + EVT MemVT = Node->getMemoryVT(); + if (MemVT == MVT::i32 || MemVT == MVT::i64) { + // A full-width operation. + assert(Op.getValueType() == MemVT && "Mismatched VTs"); + SDValue Src2 = Node->getVal(); + SDValue NegSrc2; + SDLoc DL(Src2); + + if (ConstantSDNode *Op2 = dyn_cast(Src2)) { + // Use an addition if the operand is constant and either LAA(G) is + // available or the negative value is in the range of A(G)FHI. + int64_t Value = (-Op2->getAPIntValue()).getSExtValue(); + if (isInt<32>(Value) || TM.getSubtargetImpl()->hasInterlockedAccess1()) + NegSrc2 = DAG.getConstant(Value, MemVT); + } else if (TM.getSubtargetImpl()->hasInterlockedAccess1()) + // Use LAA(G) if available. + NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, MemVT), + Src2); + + if (NegSrc2.getNode()) + return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT, + Node->getChain(), Node->getBasePtr(), NegSrc2, + Node->getMemOperand(), Node->getOrdering(), + Node->getSynchScope()); + + // Use the node as-is. + return Op; + } + + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); +} + // Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation. Lower the first two // into a fullword ATOMIC_CMP_SWAPW operation. SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, @@ -2143,27 +2428,31 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, case ISD::OR: return lowerOR(Op, DAG); case ISD::ATOMIC_SWAP: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_SWAPW); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); + case ISD::ATOMIC_STORE: + return lowerATOMIC_STORE(Op, DAG); + case ISD::ATOMIC_LOAD: + return lowerATOMIC_LOAD(Op, DAG); case ISD::ATOMIC_LOAD_ADD: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD); case ISD::ATOMIC_LOAD_SUB: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); + return lowerATOMIC_LOAD_SUB(Op, DAG); case ISD::ATOMIC_LOAD_AND: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_AND); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND); case ISD::ATOMIC_LOAD_OR: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_OR); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR); case ISD::ATOMIC_LOAD_XOR: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR); case ISD::ATOMIC_LOAD_NAND: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND); case ISD::ATOMIC_LOAD_MIN: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN); case ISD::ATOMIC_LOAD_MAX: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX); case ISD::ATOMIC_LOAD_UMIN: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN); case ISD::ATOMIC_LOAD_UMAX: - return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX); + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX); case ISD::ATOMIC_CMP_SWAP: return lowerATOMIC_CMP_SWAP(Op, DAG); case ISD::STACKSAVE: @@ -2185,6 +2474,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(SIBCALL); OPCODE(PCREL_WRAPPER); OPCODE(PCREL_OFFSET); + OPCODE(IABS); OPCODE(ICMP); OPCODE(FCMP); OPCODE(TM); @@ -2210,6 +2500,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(STPCPY); OPCODE(SEARCH_STRING); OPCODE(IPM); + OPCODE(SERIALIZE); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); OPCODE(ATOMIC_LOADW_SUB); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZISelLowering.h index c6dcca6982a6..2caa0bcb6dff 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -45,6 +45,9 @@ namespace SystemZISD { // as a register base. PCREL_OFFSET, + // Integer absolute. + IABS, + // Integer comparisons. There are three operands: the two values // to compare, and an integer of type SystemZICMP. ICMP, @@ -132,6 +135,9 @@ namespace SystemZISD { // Store the CC value in bits 29 and 28 of an integer. IPM, + // Perform a serialization operation. (BCR 15,0 or BCR 14,0.) + SERIALIZE, + // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or // ATOMIC_LOAD_. // @@ -244,6 +250,9 @@ public: const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, SDLoc DL, SelectionDAG &DAG) const LLVM_OVERRIDE; + virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, + SelectionDAG &DAG) const + LLVM_OVERRIDE; private: const SystemZSubtarget &Subtarget; @@ -270,9 +279,13 @@ private: SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG, - unsigned Opcode) const; + SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG, + unsigned Opcode) const; + SDValue lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerLOAD_SEQUENCE_POINT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZInstrFP.td index 60800460fca7..07f253d8cdcf 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZInstrFP.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -46,9 +46,9 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in { defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>; defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>; } -def : CompareZeroFP; -def : CompareZeroFP; -def : CompareZeroFP; +defm : CompareZeroFP; +defm : CompareZeroFP; +defm : CompareZeroFP; // Moves between 64-bit integer and floating-point registers. def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index a8efe165e36f..50badf82bc70 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -531,6 +531,10 @@ class InstSS op, dag outs, dag ins, string asmstr, list pattern> // Ternary: // One register output operand and three register input operands. // +// LoadAndOp: +// One output operand and two input operands. The first input operand +// is a register and the second is an address. +// // CmpSwap: // One output operand and three input operands. The first two // operands are registers and the third is an address. The instruction @@ -1267,6 +1271,15 @@ class TernaryRXF opcode, SDPatternOperator operator, let AccessBytes = bytes; } +class LoadAndOpRSY opcode, SDPatternOperator operator, + RegisterOperand cls, AddressingMode mode = bdaddr20only> + : InstRSY { + let mayLoad = 1; + let mayStore = 1; +} + class CmpSwapRS opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode = bdaddr12only> : InstRS; - def LPGR : UnaryRRE<"lpg", 0xB900, z_iabs64, GR64, GR64>; + def LPR : UnaryRR <"lp", 0x10, z_iabs, GR32, GR32>; + def LPGR : UnaryRRE<"lpg", 0xB900, z_iabs, GR64, GR64>; } let CCValues = 0xE, CompareZeroCCMask = 0xE in def LPGFR : UnaryRRE<"lpgf", 0xB910, null_frag, GR64, GR32>; } +def : Pat<(z_iabs32 GR32:$src), (LPR GR32:$src)>; +def : Pat<(z_iabs64 GR64:$src), (LPGR GR64:$src)>; +defm : SXU; defm : SXU; let Defs = [CC] in { let CCValues = 0xF, CompareZeroCCMask = 0x8 in { - def LNR : UnaryRR <"ln", 0x11, z_inegabs32, GR32, GR32>; - def LNGR : UnaryRRE<"lng", 0xB901, z_inegabs64, GR64, GR64>; + def LNR : UnaryRR <"ln", 0x11, z_inegabs, GR32, GR32>; + def LNGR : UnaryRRE<"lng", 0xB901, z_inegabs, GR64, GR64>; } let CCValues = 0xE, CompareZeroCCMask = 0xE in def LNGFR : UnaryRRE<"lngf", 0xB911, null_frag, GR64, GR32>; } +def : Pat<(z_inegabs32 GR32:$src), (LNR GR32:$src)>; +def : Pat<(z_inegabs64 GR64:$src), (LNGR GR64:$src)>; +defm : SXU; defm : SXU; let Defs = [CC] in { @@ -1195,58 +1201,89 @@ def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>; // Atomic operations //===----------------------------------------------------------------------===// -def ATOMIC_SWAPW : AtomicLoadWBinaryReg; -def ATOMIC_SWAP_32 : AtomicLoadBinaryReg32; -def ATOMIC_SWAP_64 : AtomicLoadBinaryReg64; +def Serialize : Alias<2, (outs), (ins), [(z_serialize)]>; -def ATOMIC_LOADW_AR : AtomicLoadWBinaryReg; -def ATOMIC_LOADW_AFI : AtomicLoadWBinaryImm; -def ATOMIC_LOAD_AR : AtomicLoadBinaryReg32; -def ATOMIC_LOAD_AHI : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_AFI : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_AGR : AtomicLoadBinaryReg64; -def ATOMIC_LOAD_AGHI : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_AGFI : AtomicLoadBinaryImm64; +let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in { + def LAA : LoadAndOpRSY<"laa", 0xEBF8, atomic_load_add_32, GR32>; + def LAAG : LoadAndOpRSY<"laag", 0xEBE8, atomic_load_add_64, GR64>; + def LAAL : LoadAndOpRSY<"laal", 0xEBFA, null_frag, GR32>; + def LAALG : LoadAndOpRSY<"laalg", 0xEBEA, null_frag, GR64>; + def LAN : LoadAndOpRSY<"lan", 0xEBF4, atomic_load_and_32, GR32>; + def LANG : LoadAndOpRSY<"lang", 0xEBE4, atomic_load_and_64, GR64>; + def LAO : LoadAndOpRSY<"lao", 0xEBF6, atomic_load_or_32, GR32>; + def LAOG : LoadAndOpRSY<"laog", 0xEBE6, atomic_load_or_64, GR64>; + def LAX : LoadAndOpRSY<"lax", 0xEBF7, atomic_load_xor_32, GR32>; + def LAXG : LoadAndOpRSY<"laxg", 0xEBE7, atomic_load_xor_64, GR64>; +} -def ATOMIC_LOADW_SR : AtomicLoadWBinaryReg; -def ATOMIC_LOAD_SR : AtomicLoadBinaryReg32; -def ATOMIC_LOAD_SGR : AtomicLoadBinaryReg64; +def ATOMIC_SWAPW : AtomicLoadWBinaryReg; +def ATOMIC_SWAP_32 : AtomicLoadBinaryReg32; +def ATOMIC_SWAP_64 : AtomicLoadBinaryReg64; -def ATOMIC_LOADW_NR : AtomicLoadWBinaryReg; -def ATOMIC_LOADW_NILH : AtomicLoadWBinaryImm; -def ATOMIC_LOAD_NR : AtomicLoadBinaryReg32; -def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_NGR : AtomicLoadBinaryReg64; -def ATOMIC_LOAD_NILL64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_NILH64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_NIHL64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_NIHH64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_NILF64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_NIHF64 : AtomicLoadBinaryImm64; +def ATOMIC_LOADW_AR : AtomicLoadWBinaryReg; +def ATOMIC_LOADW_AFI : AtomicLoadWBinaryImm; +let Predicates = [FeatureNoInterlockedAccess1] in { + def ATOMIC_LOAD_AR : AtomicLoadBinaryReg32; + def ATOMIC_LOAD_AHI : AtomicLoadBinaryImm32; + def ATOMIC_LOAD_AFI : AtomicLoadBinaryImm32; + def ATOMIC_LOAD_AGR : AtomicLoadBinaryReg64; + def ATOMIC_LOAD_AGHI : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_AGFI : AtomicLoadBinaryImm64; +} + +def ATOMIC_LOADW_SR : AtomicLoadWBinaryReg; +def ATOMIC_LOAD_SR : AtomicLoadBinaryReg32; +def ATOMIC_LOAD_SGR : AtomicLoadBinaryReg64; + +def ATOMIC_LOADW_NR : AtomicLoadWBinaryReg; +def ATOMIC_LOADW_NILH : AtomicLoadWBinaryImm; +let Predicates = [FeatureNoInterlockedAccess1] in { + def ATOMIC_LOAD_NR : AtomicLoadBinaryReg32; + def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm32; + def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm32; + def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm32; + def ATOMIC_LOAD_NGR : AtomicLoadBinaryReg64; + def ATOMIC_LOAD_NILL64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_NILH64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_NIHL64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_NIHH64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_NILF64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_NIHF64 : AtomicLoadBinaryImm64; +} def ATOMIC_LOADW_OR : AtomicLoadWBinaryReg; def ATOMIC_LOADW_OILH : AtomicLoadWBinaryImm; -def ATOMIC_LOAD_OR : AtomicLoadBinaryReg32; -def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_OGR : AtomicLoadBinaryReg64; -def ATOMIC_LOAD_OILL64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_OILH64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_OIHL64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_OIHH64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_OILF64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_OIHF64 : AtomicLoadBinaryImm64; +let Predicates = [FeatureNoInterlockedAccess1] in { + def ATOMIC_LOAD_OR : AtomicLoadBinaryReg32; + def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm32; + def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm32; + def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm32; + def ATOMIC_LOAD_OGR : AtomicLoadBinaryReg64; + def ATOMIC_LOAD_OILL64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_OILH64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_OIHL64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_OIHH64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_OILF64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_OIHF64 : AtomicLoadBinaryImm64; +} def ATOMIC_LOADW_XR : AtomicLoadWBinaryReg; def ATOMIC_LOADW_XILF : AtomicLoadWBinaryImm; -def ATOMIC_LOAD_XR : AtomicLoadBinaryReg32; -def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm32; -def ATOMIC_LOAD_XGR : AtomicLoadBinaryReg64; -def ATOMIC_LOAD_XILF64 : AtomicLoadBinaryImm64; -def ATOMIC_LOAD_XIHF64 : AtomicLoadBinaryImm64; +let Predicates = [FeatureNoInterlockedAccess1] in { + def ATOMIC_LOAD_XR : AtomicLoadBinaryReg32; + def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm32; + def ATOMIC_LOAD_XGR : AtomicLoadBinaryReg64; + def ATOMIC_LOAD_XILF64 : AtomicLoadBinaryImm64; + def ATOMIC_LOAD_XIHF64 : AtomicLoadBinaryImm64; +} def ATOMIC_LOADW_NRi : AtomicLoadWBinaryReg; def ATOMIC_LOADW_NILHi : AtomicLoadWBinaryImm; def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET", SDT_ZWrapOffset, []>; +def z_iabs : SDNode<"SystemZISD::IABS", SDTIntUnaryOp, []>; def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp, [SDNPOutGlue]>; def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp, [SDNPOutGlue]>; def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp, [SDNPOutGlue]>; @@ -119,6 +120,9 @@ def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>; def z_udivrem32 : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>; def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>; +def z_serialize : SDNode<"SystemZISD::SERIALIZE", SDTNone, + [SDNPHasChain, SDNPMayStore]>; + class AtomicWOp : SDNode<"SystemZISD::"##name, profile, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; @@ -346,6 +350,9 @@ def or_as_revinserti8 : PatFrag<(ops node:$src1, node:$src2), APInt::getLowBitsSet(BitWidth, 8)); }]>; +// Negative integer absolute. +def z_inegabs : PatFrag<(ops node:$src), (ineg (z_iabs node:$src))>; + // Integer absolute, matching the canonical form generated by DAGCombiner. def z_iabs32 : PatFrag<(ops node:$src), (xor (add node:$src, (sra node:$src, (i32 31))), diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZPatterns.td b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZPatterns.td index 7706351e54b3..c0f94ecbe2c9 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZPatterns.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZPatterns.td @@ -148,5 +148,8 @@ multiclass BlockLoadStore - : Pat<(z_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>; +multiclass CompareZeroFP { + def : Pat<(z_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>; + // The sign of the zero makes no difference. + def : Pat<(z_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>; +} diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZProcessors.td b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZProcessors.td index f241fb0c2222..e6b58f17b0e6 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZProcessors.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZProcessors.td @@ -16,6 +16,9 @@ class SystemZFeature AssemblerPredicate<"Feature"##intname, extname>, SubtargetFeature; +class SystemZMissingFeature + : Predicate<"!Subtarget.has"##intname##"()">; + def FeatureDistinctOps : SystemZFeature< "distinct-ops", "DistinctOps", "Assume that the distinct-operands facility is installed" @@ -36,11 +39,24 @@ def FeatureFPExtension : SystemZFeature< "Assume that the floating-point extension facility is installed" >; +def FeatureFastSerialization : SystemZFeature< + "fast-serialization", "FastSerialization", + "Assume that the fast-serialization facility is installed" +>; + +def FeatureInterlockedAccess1 : SystemZFeature< + "interlocked-access1", "InterlockedAccess1", + "Assume that interlocked-access facility 1 is installed" +>; +def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">; + def : Processor<"generic", NoItineraries, []>; def : Processor<"z10", NoItineraries, []>; def : Processor<"z196", NoItineraries, [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, - FeatureFPExtension]>; + FeatureFPExtension, FeatureFastSerialization, + FeatureInterlockedAccess1]>; def : Processor<"zEC12", NoItineraries, [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, - FeatureFPExtension]>; + FeatureFPExtension, FeatureFastSerialization, + FeatureInterlockedAccess1]>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp index 3971d5e2a5fa..5a85a043be35 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -26,6 +26,7 @@ SystemZSubtarget::SystemZSubtarget(const std::string &TT, const std::string &FS) : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false), HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false), + HasFastSerialization(false), HasInterlockedAccess1(false), TargetTriple(TT) { std::string CPUName = CPU; if (CPUName.empty()) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZSubtarget.h index 5817491d4585..f7c8f96c0449 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZSubtarget.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZSubtarget.h @@ -32,6 +32,8 @@ protected: bool HasLoadStoreOnCond; bool HasHighWord; bool HasFPExtension; + bool HasFastSerialization; + bool HasInterlockedAccess1; private: Triple TargetTriple; @@ -58,6 +60,12 @@ public: // Return true if the target has the floating-point extension facility. bool hasFPExtension() const { return HasFPExtension; } + // Return true if the target has the fast-serialization facility. + bool hasFastSerialization() const { return HasFastSerialization; } + + // Return true if the target has interlocked-access facility 1. + bool hasInterlockedAccess1() const { return HasInterlockedAccess1; } + // Return true if GV can be accessed using LARL for reloc model RM // and code model CM. bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp index dee92e960c54..769bee51e38a 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -30,8 +30,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT, // Make sure that global data has at least 16 bits of alignment by default, // so that we can refer to it using LARL. We don't have any special // requirements for stack variables though. - DL("E-p:64:64:64-i1:8:16-i8:8:16-i16:16-i32:32-i64:64" - "-f32:32-f64:64-f128:64-a0:8:16-n32:64"), + DL("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this, Subtarget) { initAsmInfo(); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/TargetInfo/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/TargetInfo/CMakeLists.txt index b6051d3bf600..1ebc669e59af 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/TargetInfo/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/TargetInfo/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMSystemZInfo SystemZTargetInfo.cpp ) - -add_dependencies(LLVMSystemZInfo SystemZCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/TargetInfo/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/TargetInfo/LLVMBuild.txt index ea43736a686f..a5547e68a4ae 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/TargetInfo/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/SystemZ/TargetInfo/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = SystemZInfo parent = SystemZ -required_libraries = MC Support Target +required_libraries = Support add_to_library_groups = SystemZ diff --git a/external/bsd/llvm/dist/llvm/lib/Target/Target.cpp b/external/bsd/llvm/dist/llvm/lib/Target/Target.cpp index 2190198d8c90..f6cd258d81a9 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/Target.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/Target.cpp @@ -113,7 +113,7 @@ unsigned LLVMABIAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) { } unsigned LLVMCallFrameAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) { - return unwrap(TD)->getCallFrameTypeAlignment(unwrap(Ty)); + return unwrap(TD)->getABITypeAlignment(unwrap(Ty)); } unsigned LLVMPreferredAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/TargetLibraryInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/TargetLibraryInfo.cpp index 3e68fe16d4a4..93c008af3503 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/TargetLibraryInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/TargetLibraryInfo.cpp @@ -140,6 +140,12 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "floor", "floorf", "floorl", + "fmax", + "fmaxf", + "fmaxl", + "fmin", + "fminf", + "fminl", "fmod", "fmodf", "fmodl", @@ -422,6 +428,8 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T, TLI.setUnavailable(LibFunc::fabsf); // Win32 and Win64 both lack fabsf TLI.setUnavailable(LibFunc::fabsl); TLI.setUnavailable(LibFunc::floorl); + TLI.setUnavailable(LibFunc::fmaxl); + TLI.setUnavailable(LibFunc::fminl); TLI.setUnavailable(LibFunc::fmodl); TLI.setUnavailable(LibFunc::frexpl); TLI.setUnavailable(LibFunc::logl); @@ -446,9 +454,6 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T, TLI.setUnavailable(LibFunc::cbrt); TLI.setUnavailable(LibFunc::cbrtf); TLI.setUnavailable(LibFunc::cbrtl); - TLI.setUnavailable(LibFunc::exp10); - TLI.setUnavailable(LibFunc::exp10f); - TLI.setUnavailable(LibFunc::exp10l); TLI.setUnavailable(LibFunc::exp2); TLI.setUnavailable(LibFunc::exp2f); TLI.setUnavailable(LibFunc::exp2l); @@ -492,6 +497,8 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T, TLI.setUnavailable(LibFunc::coshf); TLI.setUnavailable(LibFunc::expf); TLI.setUnavailable(LibFunc::floorf); + TLI.setUnavailable(LibFunc::fminf); + TLI.setUnavailable(LibFunc::fmaxf); TLI.setUnavailable(LibFunc::fmodf); TLI.setUnavailable(LibFunc::logf); TLI.setUnavailable(LibFunc::powf); @@ -567,6 +574,43 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T, TLI.setUnavailable(LibFunc::llabs); } + switch (T.getOS()) { + case Triple::MacOSX: + // exp10 and exp10f are not available on OS X until 10.9 and iOS until 7.0 + // and their names are __exp10 and __exp10f. exp10l is not available on + // OS X or iOS. + TLI.setUnavailable(LibFunc::exp10l); + if (T.isMacOSXVersionLT(10, 9)) { + TLI.setUnavailable(LibFunc::exp10); + TLI.setUnavailable(LibFunc::exp10f); + } else { + TLI.setAvailableWithName(LibFunc::exp10, "__exp10"); + TLI.setAvailableWithName(LibFunc::exp10f, "__exp10f"); + } + break; + case Triple::IOS: + TLI.setUnavailable(LibFunc::exp10l); + if (T.isOSVersionLT(7, 0)) { + TLI.setUnavailable(LibFunc::exp10); + TLI.setUnavailable(LibFunc::exp10f); + } else { + TLI.setAvailableWithName(LibFunc::exp10, "__exp10"); + TLI.setAvailableWithName(LibFunc::exp10f, "__exp10f"); + } + break; + case Triple::Linux: + // exp10, exp10f, exp10l is available on Linux (GLIBC) but are extremely + // buggy prior to glibc version 2.18. Until this version is widely deployed + // or we have a reasonable detection strategy, we cannot use exp10 reliably + // on Linux. + // + // Fall through to disable all of them. + default: + TLI.setUnavailable(LibFunc::exp10); + TLI.setUnavailable(LibFunc::exp10f); + TLI.setUnavailable(LibFunc::exp10l); + } + // ffsl is available on at least Darwin, Mac OS X, iOS, FreeBSD, and // Linux (GLIBC): // http://developer.apple.com/library/mac/#documentation/Darwin/Reference/ManPages/man3/ffsl.3.html diff --git a/external/bsd/llvm/dist/llvm/lib/Target/TargetLoweringObjectFile.cpp b/external/bsd/llvm/dist/llvm/lib/Target/TargetLoweringObjectFile.cpp index 7b8d1108f1bf..a895dda059f2 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/TargetLoweringObjectFile.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/TargetLoweringObjectFile.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" @@ -40,6 +41,7 @@ using namespace llvm; void TargetLoweringObjectFile::Initialize(MCContext &ctx, const TargetMachine &TM) { Ctx = &ctx; + DL = TM.getDataLayout(); InitMCObjectFileInfo(TM.getTargetTriple(), TM.getRelocationModel(), TM.getCodeModel(), *Ctx); } @@ -102,10 +104,23 @@ static bool IsNullTerminatedString(const Constant *C) { MCSymbol *TargetLoweringObjectFile::getSymbol(Mangler &M, const GlobalValue *GV) const { SmallString<60> NameStr; - M.getNameWithPrefix(NameStr, GV, false); + M.getNameWithPrefix(NameStr, GV); return Ctx->GetOrCreateSymbol(NameStr.str()); } +MCSymbol *TargetLoweringObjectFile::getSymbolWithGlobalValueBase( + Mangler &M, const GlobalValue *GV, StringRef Suffix) const { + assert(!Suffix.empty()); + assert(!GV->hasPrivateLinkage()); + assert(!GV->hasLinkerPrivateLinkage()); + assert(!GV->hasLinkerPrivateWeakLinkage()); + + SmallString<60> NameStr; + NameStr += DL->getPrivateGlobalPrefix(); + M.getNameWithPrefix(NameStr, GV); + NameStr.append(Suffix.begin(), Suffix.end()); + return Ctx->GetOrCreateSymbol(NameStr.str()); +} MCSymbol *TargetLoweringObjectFile:: getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/TargetMachine.cpp b/external/bsd/llvm/dist/llvm/lib/Target/TargetMachine.cpp index cb42e8311b87..a2350352071e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/TargetMachine.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/TargetMachine.cpp @@ -55,6 +55,7 @@ TargetMachine::TargetMachine(const Target &T, MCUseLoc(true), MCUseCFI(true), MCUseDwarfDirectory(false), + RequireStructuredCFG(false), Options(Options) { } @@ -67,7 +68,7 @@ TargetMachine::~TargetMachine() { void TargetMachine::resetTargetOptions(const MachineFunction *MF) const { const Function *F = MF->getFunction(); TargetOptions &TO = MF->getTarget().Options; - + #define RESET_OPTION(X, Y) \ do { \ if (F->hasFnAttribute(Y)) \ diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/AsmParser/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/X86/AsmParser/CMakeLists.txt index 54204d4b6390..14544267bf5f 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/AsmParser/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/AsmParser/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMX86AsmParser X86AsmParser.cpp ) - -add_dependencies(LLVMX86AsmParser X86CommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index bc8f367e9255..0f8e9f7b4ad3 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -50,6 +50,12 @@ class X86AsmParser : public MCTargetAsmParser { MCAsmParser &Parser; ParseInstructionInfo *InstInfo; private: + SMLoc consumeToken() { + SMLoc Result = Parser.getTok().getLoc(); + Parser.Lex(); + return Result; + } + enum InfixCalculatorTok { IC_PLUS = 0, IC_MINUS, @@ -1073,7 +1079,7 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, RegNo = MatchRegisterName(Tok.getString().lower()); if (!is64BitMode()) { - // FIXME: This should be done using Requires and + // FIXME: This should be done using Requires and // Requires so "eiz" usage in 64-bit instructions can be also // checked. // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a @@ -1323,12 +1329,37 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { } return Error(Tok.getLoc(), "Unexpected identifier!"); } - case AsmToken::Integer: + case AsmToken::Integer: { if (isParsingInlineAsm() && SM.getAddImmPrefix()) InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Tok.getLoc())); - SM.onInteger(Tok.getIntVal()); + // Look for 'b' or 'f' following an Integer as a directional label + SMLoc Loc = getTok().getLoc(); + int64_t IntVal = getTok().getIntVal(); + End = consumeToken(); + UpdateLocLex = false; + if (getLexer().getKind() == AsmToken::Identifier) { + StringRef IDVal = getTok().getString(); + if (IDVal == "f" || IDVal == "b") { + MCSymbol *Sym = + getContext().GetDirectionalLocalSymbol(IntVal, + IDVal == "f" ? 1 : 0); + MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; + const MCExpr *Val = + MCSymbolRefExpr::Create(Sym, Variant, getContext()); + if (IDVal == "b" && Sym->isUndefined()) + return Error(Loc, "invalid reference to undefined symbol"); + StringRef Identifier = Sym->getName(); + SM.onIdentifierExpr(Val, Identifier); + End = consumeToken(); + } else { + SM.onInteger(IntVal); + } + } else { + SM.onInteger(IntVal); + } break; + } case AsmToken::Plus: SM.onPlus(); break; case AsmToken::Minus: SM.onMinus(); break; case AsmToken::Star: SM.onStar(); break; @@ -1341,10 +1372,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { if (SM.hadError()) return Error(Tok.getLoc(), "unknown token in expression"); - if (!Done && UpdateLocLex) { - End = Tok.getLoc(); - Parser.Lex(); // Consume the token. - } + if (!Done && UpdateLocLex) + End = consumeToken(); } return false; } @@ -1680,6 +1709,13 @@ X86Operand *X86AsmParser::ParseIntelOperand() { } if (getLexer().isNot(AsmToken::LBrac)) { + // If a directional label (ie. 1f or 2b) was parsed above from + // ParseIntelExpression() then SM.getSym() was set to a pointer to + // to the MCExpr with the directional local symbol and this is a + // memory operand not an immediate operand. + if (SM.getSym()) + return X86Operand::CreateMem(SM.getSym(), Start, End, Size); + const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext()); return X86Operand::CreateImm(ImmExpr, Start, End); } @@ -1991,11 +2027,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { // Parse '*' modifier. - if (getLexer().is(AsmToken::Star)) { - SMLoc Loc = Parser.getTok().getLoc(); - Operands.push_back(X86Operand::CreateToken("*", Loc)); - Parser.Lex(); // Eat the star. - } + if (getLexer().is(AsmToken::Star)) + Operands.push_back(X86Operand::CreateToken("*", consumeToken())); // Read the first operand. if (X86Operand *Op = ParseOperand()) @@ -2020,9 +2053,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, if (STI.getFeatureBits() & X86::FeatureAVX512) { // Parse mask register {%k1} if (getLexer().is(AsmToken::LCurly)) { - SMLoc Loc = Parser.getTok().getLoc(); - Operands.push_back(X86Operand::CreateToken("{", Loc)); - Parser.Lex(); // Eat the { + Operands.push_back(X86Operand::CreateToken("{", consumeToken())); if (X86Operand *Op = ParseOperand()) { Operands.push_back(Op); if (!getLexer().is(AsmToken::RCurly)) { @@ -2030,19 +2061,16 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, Parser.eatToEndOfStatement(); return Error(Loc, "Expected } at this point"); } - Loc = Parser.getTok().getLoc(); - Operands.push_back(X86Operand::CreateToken("}", Loc)); - Parser.Lex(); // Eat the } + Operands.push_back(X86Operand::CreateToken("}", consumeToken())); } else { Parser.eatToEndOfStatement(); return true; } } + // TODO: add parsing of broadcasts {1to8}, {1to16} // Parse "zeroing non-masked" semantic {z} if (getLexer().is(AsmToken::LCurly)) { - SMLoc Loc = Parser.getTok().getLoc(); - Operands.push_back(X86Operand::CreateToken("{z}", Loc)); - Parser.Lex(); // Eat the { + Operands.push_back(X86Operand::CreateToken("{z}", consumeToken())); if (!getLexer().is(AsmToken::Identifier) || getLexer().getTok().getIdentifier() != "z") { SMLoc Loc = getLexer().getLoc(); Parser.eatToEndOfStatement(); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/X86/CMakeLists.txt index 7e20151a19fc..77606ba36ee4 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/CMakeLists.txt @@ -53,8 +53,6 @@ endif() add_llvm_target(X86CodeGen ${sources}) -add_dependencies(LLVMX86CodeGen X86CommonTableGen intrinsics_gen) - add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/CMakeLists.txt index 0cd6db96dabe..34d36cb09571 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/CMakeLists.txt @@ -1,5 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMX86Disassembler X86Disassembler.cpp X86DisassemblerDecoder.c @@ -12,5 +10,3 @@ set_property( PROPERTY COMPILE_FLAGS "/Od" ) endif() - -add_dependencies(LLVMX86Disassembler X86CommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/LLVMBuild.txt index 0609f3c28de3..cac7adff4922 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = X86Disassembler parent = X86 -required_libraries = MC Support X86Desc X86Info +required_libraries = MC Support X86Info add_to_library_groups = X86 diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 903e36cfe6ce..240d8ce9404e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -418,13 +418,22 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm || Opcode == X86::VGATHERDPSYrm || Opcode == X86::VGATHERQPSYrm || + Opcode == X86::VGATHERDPDZrm || + Opcode == X86::VPGATHERDQZrm || Opcode == X86::VPGATHERQQYrm || Opcode == X86::VPGATHERDDYrm || Opcode == X86::VPGATHERQDYrm); - if (IndexIs128 || IndexIs256) { + bool IndexIs512 = (Opcode == X86::VGATHERQPDZrm || + Opcode == X86::VGATHERDPSZrm || + Opcode == X86::VGATHERQPSZrm || + Opcode == X86::VPGATHERQQZrm || + Opcode == X86::VPGATHERDDZrm || + Opcode == X86::VPGATHERQDZrm); + if (IndexIs128 || IndexIs256 || IndexIs512) { unsigned IndexOffset = insn.sibIndex - (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX); - SIBIndex IndexBase = IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; + SIBIndex IndexBase = IndexIs512 ? SIB_INDEX_ZMM0 : + IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; insn.sibIndex = (SIBIndex)(IndexBase + (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset)); } @@ -565,6 +574,9 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, case TYPE_XMM128: case TYPE_XMM256: case TYPE_XMM512: + case TYPE_VK1: + case TYPE_VK8: + case TYPE_VK16: case TYPE_DEBUGREG: case TYPE_CONTROLREG: return translateRMRegister(mcInst, insn); @@ -596,16 +608,25 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, /// /// @param mcInst - The MCInst to append to. /// @param stackPos - The stack position to translate. -/// @return - 0 on success; nonzero otherwise. -static bool translateFPRegister(MCInst &mcInst, - uint8_t stackPos) { - if (stackPos >= 8) { - debug("Invalid FP stack position"); +static void translateFPRegister(MCInst &mcInst, + uint8_t stackPos) { + mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos)); +} + +/// translateMaskRegister - Translates a 3-bit mask register number to +/// LLVM form, and appends it to an MCInst. +/// +/// @param mcInst - The MCInst to append to. +/// @param maskRegNum - Number of mask register from 0 to 7. +/// @return - false on success; true otherwise. +static bool translateMaskRegister(MCInst &mcInst, + uint8_t maskRegNum) { + if (maskRegNum >= 8) { + debug("Invalid mask register number"); return true; } - - mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos)); + mcInst.addOperand(MCOperand::CreateReg(X86::K0 + maskRegNum)); return false; } @@ -626,6 +647,8 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, case ENCODING_REG: translateRegister(mcInst, insn.reg); return false; + case ENCODING_WRITEMASK: + return translateMaskRegister(mcInst, insn.writemask); case ENCODING_RM: return translateRM(mcInst, operand, insn, Dis); case ENCODING_CB: @@ -652,13 +675,12 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, case ENCODING_RW: case ENCODING_RD: case ENCODING_RO: - translateRegister(mcInst, insn.opcodeRegister); - return false; - case ENCODING_I: - return translateFPRegister(mcInst, insn.opcodeModifier); case ENCODING_Rv: translateRegister(mcInst, insn.opcodeRegister); return false; + case ENCODING_FP: + translateFPRegister(mcInst, insn.modRM & 7); + return false; case ENCODING_VVVV: translateRegister(mcInst, insn.vvvv); return false; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index c81a85755f82..52631bcbf2db 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -40,7 +40,7 @@ * @return - The InstructionContext to use when looking up an * an instruction with these attributes. */ -static InstructionContext contextForAttrs(uint8_t attrMask) { +static InstructionContext contextForAttrs(uint16_t attrMask) { return CONTEXTS_SYM[attrMask]; } @@ -57,7 +57,7 @@ static InstructionContext contextForAttrs(uint8_t attrMask) { */ static int modRMRequired(OpcodeType type, InstructionContext insnContext, - uint8_t opcode) { + uint16_t opcode) { const struct ContextDecision* decision = 0; switch (type) { @@ -444,9 +444,60 @@ static int readPrefixes(struct InternalInstruction* insn) { dbgprintf(insn, "Found prefix 0x%hhx", byte); } - insn->vexXopType = TYPE_NO_VEX_XOP; + insn->vectorExtensionType = TYPE_NO_VEX_XOP; - if (byte == 0xc4) { + if (byte == 0x62) { + uint8_t byte1, byte2; + + if (consumeByte(insn, &byte1)) { + dbgprintf(insn, "Couldn't read second byte of EVEX prefix"); + return -1; + } + + if (lookAtByte(insn, &byte2)) { + dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); + return -1; + } + + if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) && + ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) { + insn->vectorExtensionType = TYPE_EVEX; + } + else { + unconsumeByte(insn); /* unconsume byte1 */ + unconsumeByte(insn); /* unconsume byte */ + insn->necessaryPrefixLocation = insn->readerCursor - 2; + } + + if (insn->vectorExtensionType == TYPE_EVEX) { + insn->vectorExtensionPrefix[0] = byte; + insn->vectorExtensionPrefix[1] = byte1; + if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) { + dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); + return -1; + } + if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) { + dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix"); + return -1; + } + + /* We simulate the REX prefix for simplicity's sake */ + if (insn->mode == MODE_64BIT) { + insn->rexPrefix = 0x40 + | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) + | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) + | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) + | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0); + } + + hasOpSize = (VEX_PREFIX_66 == ppFromEVEX3of4(insn->vectorExtensionPrefix[2])); + + dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx", + insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], + insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]); + } + } + else if (byte == 0xc4) { uint8_t byte1; if (lookAtByte(insn, &byte1)) { @@ -455,7 +506,7 @@ static int readPrefixes(struct InternalInstruction* insn) { } if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { - insn->vexXopType = TYPE_VEX_3B; + insn->vectorExtensionType = TYPE_VEX_3B; insn->necessaryPrefixLocation = insn->readerCursor - 1; } else { @@ -463,22 +514,22 @@ static int readPrefixes(struct InternalInstruction* insn) { insn->necessaryPrefixLocation = insn->readerCursor - 1; } - if (insn->vexXopType == TYPE_VEX_3B) { - insn->vexXopPrefix[0] = byte; - consumeByte(insn, &insn->vexXopPrefix[1]); - consumeByte(insn, &insn->vexXopPrefix[2]); + if (insn->vectorExtensionType == TYPE_VEX_3B) { + insn->vectorExtensionPrefix[0] = byte; + consumeByte(insn, &insn->vectorExtensionPrefix[1]); + consumeByte(insn, &insn->vectorExtensionPrefix[2]); /* We simulate the REX prefix for simplicity's sake */ if (insn->mode == MODE_64BIT) { insn->rexPrefix = 0x40 - | (wFromVEX3of3(insn->vexXopPrefix[2]) << 3) - | (rFromVEX2of3(insn->vexXopPrefix[1]) << 2) - | (xFromVEX2of3(insn->vexXopPrefix[1]) << 1) - | (bFromVEX2of3(insn->vexXopPrefix[1]) << 0); + | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) + | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) + | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) + | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0); } - switch (ppFromVEX3of3(insn->vexXopPrefix[2])) + switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) { default: break; @@ -488,8 +539,8 @@ static int readPrefixes(struct InternalInstruction* insn) { } dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", - insn->vexXopPrefix[0], insn->vexXopPrefix[1], - insn->vexXopPrefix[2]); + insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], + insn->vectorExtensionPrefix[2]); } } else if (byte == 0xc5) { @@ -501,22 +552,22 @@ static int readPrefixes(struct InternalInstruction* insn) { } if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { - insn->vexXopType = TYPE_VEX_2B; + insn->vectorExtensionType = TYPE_VEX_2B; } else { unconsumeByte(insn); } - if (insn->vexXopType == TYPE_VEX_2B) { - insn->vexXopPrefix[0] = byte; - consumeByte(insn, &insn->vexXopPrefix[1]); + if (insn->vectorExtensionType == TYPE_VEX_2B) { + insn->vectorExtensionPrefix[0] = byte; + consumeByte(insn, &insn->vectorExtensionPrefix[1]); if (insn->mode == MODE_64BIT) { insn->rexPrefix = 0x40 - | (rFromVEX2of2(insn->vexXopPrefix[1]) << 2); + | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); } - switch (ppFromVEX2of2(insn->vexXopPrefix[1])) + switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { default: break; @@ -525,7 +576,9 @@ static int readPrefixes(struct InternalInstruction* insn) { break; } - dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexXopPrefix[0], insn->vexXopPrefix[1]); + dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", + insn->vectorExtensionPrefix[0], + insn->vectorExtensionPrefix[1]); } } else if (byte == 0x8f) { @@ -537,7 +590,7 @@ static int readPrefixes(struct InternalInstruction* insn) { } if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */ - insn->vexXopType = TYPE_XOP; + insn->vectorExtensionType = TYPE_XOP; insn->necessaryPrefixLocation = insn->readerCursor - 1; } else { @@ -545,22 +598,22 @@ static int readPrefixes(struct InternalInstruction* insn) { insn->necessaryPrefixLocation = insn->readerCursor - 1; } - if (insn->vexXopType == TYPE_XOP) { - insn->vexXopPrefix[0] = byte; - consumeByte(insn, &insn->vexXopPrefix[1]); - consumeByte(insn, &insn->vexXopPrefix[2]); + if (insn->vectorExtensionType == TYPE_XOP) { + insn->vectorExtensionPrefix[0] = byte; + consumeByte(insn, &insn->vectorExtensionPrefix[1]); + consumeByte(insn, &insn->vectorExtensionPrefix[2]); /* We simulate the REX prefix for simplicity's sake */ if (insn->mode == MODE_64BIT) { insn->rexPrefix = 0x40 - | (wFromXOP3of3(insn->vexXopPrefix[2]) << 3) - | (rFromXOP2of3(insn->vexXopPrefix[1]) << 2) - | (xFromXOP2of3(insn->vexXopPrefix[1]) << 1) - | (bFromXOP2of3(insn->vexXopPrefix[1]) << 0); + | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) + | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) + | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) + | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); } - switch (ppFromXOP3of3(insn->vexXopPrefix[2])) + switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { default: break; @@ -570,8 +623,8 @@ static int readPrefixes(struct InternalInstruction* insn) { } dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx", - insn->vexXopPrefix[0], insn->vexXopPrefix[1], - insn->vexXopPrefix[2]); + insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], + insn->vectorExtensionPrefix[2]); } } else { @@ -646,13 +699,12 @@ static int readOpcode(struct InternalInstruction* insn) { insn->opcodeType = ONEBYTE; - if (insn->vexXopType == TYPE_VEX_3B) + if (insn->vectorExtensionType == TYPE_EVEX) { - switch (mmmmmFromVEX2of3(insn->vexXopPrefix[1])) - { + switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) { default: - dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", - mmmmmFromVEX2of3(insn->vexXopPrefix[1])); + dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)", + mmFromEVEX2of4(insn->vectorExtensionPrefix[1])); return -1; case VEX_LOB_0F: insn->opcodeType = TWOBYTE; @@ -665,18 +717,32 @@ static int readOpcode(struct InternalInstruction* insn) { return consumeByte(insn, &insn->opcode); } } - else if (insn->vexXopType == TYPE_VEX_2B) - { + else if (insn->vectorExtensionType == TYPE_VEX_3B) { + switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) { + default: + dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", + mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])); + return -1; + case VEX_LOB_0F: + insn->opcodeType = TWOBYTE; + return consumeByte(insn, &insn->opcode); + case VEX_LOB_0F38: + insn->opcodeType = THREEBYTE_38; + return consumeByte(insn, &insn->opcode); + case VEX_LOB_0F3A: + insn->opcodeType = THREEBYTE_3A; + return consumeByte(insn, &insn->opcode); + } + } + else if (insn->vectorExtensionType == TYPE_VEX_2B) { insn->opcodeType = TWOBYTE; return consumeByte(insn, &insn->opcode); } - else if (insn->vexXopType == TYPE_XOP) - { - switch (mmmmmFromXOP2of3(insn->vexXopPrefix[1])) - { + else if (insn->vectorExtensionType == TYPE_XOP) { + switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) { default: dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", - mmmmmFromVEX2of3(insn->vexXopPrefix[1])); + mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])); return -1; case XOP_MAP_SELECT_8: insn->opcodeType = XOP8_MAP; @@ -760,10 +826,10 @@ static int readModRM(struct InternalInstruction* insn); */ static int getIDWithAttrMask(uint16_t* instructionID, struct InternalInstruction* insn, - uint8_t attrMask) { + uint16_t attrMask) { BOOL hasModRMExtension; - uint8_t instructionClass; + uint16_t instructionClass; instructionClass = contextForAttrs(attrMask); @@ -826,7 +892,7 @@ static BOOL is16BitEquivalent(const char* orig, const char* equiv) { * nonzero otherwise. */ static int getID(struct InternalInstruction* insn, const void *miiArg) { - uint8_t attrMask; + uint16_t attrMask; uint16_t instructionID; dbgprintf(insn, "getID()"); @@ -836,11 +902,11 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { if (insn->mode == MODE_64BIT) attrMask |= ATTR_64BIT; - if (insn->vexXopType != TYPE_NO_VEX_XOP) { - attrMask |= ATTR_VEX; + if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) { + attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX; - if (insn->vexXopType == TYPE_VEX_3B) { - switch (ppFromVEX3of3(insn->vexXopPrefix[2])) { + if (insn->vectorExtensionType == TYPE_EVEX) { + switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) { case VEX_PREFIX_66: attrMask |= ATTR_OPSIZE; break; @@ -852,11 +918,35 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { break; } - if (lFromVEX3of3(insn->vexXopPrefix[2])) + if (zFromEVEX4of4(insn->vectorExtensionPrefix[3])) + attrMask |= ATTR_EVEXKZ; + if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) + attrMask |= ATTR_EVEXB; + if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])) + attrMask |= ATTR_EVEXK; + if (lFromEVEX4of4(insn->vectorExtensionPrefix[3])) + attrMask |= ATTR_EVEXL; + if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3])) + attrMask |= ATTR_EVEXL2; + } + else if (insn->vectorExtensionType == TYPE_VEX_3B) { + switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) { + case VEX_PREFIX_66: + attrMask |= ATTR_OPSIZE; + break; + case VEX_PREFIX_F3: + attrMask |= ATTR_XS; + break; + case VEX_PREFIX_F2: + attrMask |= ATTR_XD; + break; + } + + if (lFromVEX3of3(insn->vectorExtensionPrefix[2])) attrMask |= ATTR_VEXL; } - else if (insn->vexXopType == TYPE_VEX_2B) { - switch (ppFromVEX2of2(insn->vexXopPrefix[1])) { + else if (insn->vectorExtensionType == TYPE_VEX_2B) { + switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { case VEX_PREFIX_66: attrMask |= ATTR_OPSIZE; break; @@ -868,11 +958,11 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { break; } - if (lFromVEX2of2(insn->vexXopPrefix[1])) + if (lFromVEX2of2(insn->vectorExtensionPrefix[1])) attrMask |= ATTR_VEXL; } - else if (insn->vexXopType == TYPE_XOP) { - switch (ppFromXOP3of3(insn->vexXopPrefix[2])) { + else if (insn->vectorExtensionType == TYPE_XOP) { + switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { case VEX_PREFIX_66: attrMask |= ATTR_OPSIZE; break; @@ -884,7 +974,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { break; } - if (lFromXOP3of3(insn->vexXopPrefix[2])) + if (lFromXOP3of3(insn->vectorExtensionPrefix[2])) attrMask |= ATTR_VEXL; } else { @@ -1033,6 +1123,8 @@ static int readSIB(struct InternalInstruction* insn) { return -1; index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); + if (insn->vectorExtensionType == TYPE_EVEX) + index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4; switch (index) { case 0x4: @@ -1183,6 +1275,10 @@ static int readModRM(struct InternalInstruction* insn) { reg |= rFromREX(insn->rexPrefix) << 3; rm |= bFromREX(insn->rexPrefix) << 3; + if (insn->vectorExtensionType == TYPE_EVEX) { + reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; + rm |= xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; + } insn->reg = (Reg)(insn->regBase + reg); @@ -1229,6 +1325,7 @@ static int readModRM(struct InternalInstruction* insn) { case 0x0: insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ switch (rm) { + case 0x14: case 0x4: case 0xc: /* in case REXW.b is set */ insn->eaBase = (insn->addressSize == 4 ? @@ -1252,6 +1349,7 @@ static int readModRM(struct InternalInstruction* insn) { case 0x2: insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); switch (rm) { + case 0x14: case 0x4: case 0xc: /* in case REXW.b is set */ insn->eaBase = EA_BASE_sib; @@ -1312,6 +1410,10 @@ static int readModRM(struct InternalInstruction* insn) { case TYPE_XMM32: \ case TYPE_XMM: \ return prefix##_XMM0 + index; \ + case TYPE_VK1: \ + case TYPE_VK8: \ + case TYPE_VK16: \ + return prefix##_K0 + index; \ case TYPE_MM64: \ case TYPE_MM32: \ case TYPE_MM: \ @@ -1399,45 +1501,12 @@ static int fixupReg(struct InternalInstruction *insn, return 0; } -/* - * readOpcodeModifier - Reads an operand from the opcode field of an - * instruction. Handles AddRegFrm instructions. - * - * @param insn - The instruction whose opcode field is to be read. - * @param inModRM - Indicates that the opcode field is to be read from the - * ModR/M extension; useful for escape opcodes - * @return - 0 on success; nonzero otherwise. - */ -static int readOpcodeModifier(struct InternalInstruction* insn) { - dbgprintf(insn, "readOpcodeModifier()"); - - if (insn->consumedOpcodeModifier) - return 0; - - insn->consumedOpcodeModifier = TRUE; - - switch (insn->spec->modifierType) { - default: - debug("Unknown modifier type."); - return -1; - case MODIFIER_NONE: - debug("No modifier but an operand expects one."); - return -1; - case MODIFIER_OPCODE: - insn->opcodeModifier = insn->opcode - insn->spec->modifierBase; - return 0; - case MODIFIER_MODRM: - insn->opcodeModifier = insn->modRM - insn->spec->modifierBase; - return 0; - } -} - /* * readOpcodeRegister - Reads an operand from the opcode field of an * instruction and interprets it appropriately given the operand width. * Handles AddRegFrm instructions. * - * @param insn - See readOpcodeModifier(). + * @param insn - the instruction whose opcode field is to be read. * @param size - The width (in bytes) of the register being specified. * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means * RAX. @@ -1446,16 +1515,13 @@ static int readOpcodeModifier(struct InternalInstruction* insn) { static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { dbgprintf(insn, "readOpcodeRegister()"); - if (readOpcodeModifier(insn)) - return -1; - if (size == 0) size = insn->registerSize; switch (size) { case 1: insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) - | insn->opcodeModifier)); + | (insn->opcode & 7))); if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 && insn->opcodeRegister < MODRM_REG_AL + 0x8) { @@ -1467,17 +1533,17 @@ static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { case 2: insn->opcodeRegister = (Reg)(MODRM_REG_AX + ((bFromREX(insn->rexPrefix) << 3) - | insn->opcodeModifier)); + | (insn->opcode & 7))); break; case 4: insn->opcodeRegister = (Reg)(MODRM_REG_EAX + ((bFromREX(insn->rexPrefix) << 3) - | insn->opcodeModifier)); + | (insn->opcode & 7))); break; case 8: insn->opcodeRegister = (Reg)(MODRM_REG_RAX + ((bFromREX(insn->rexPrefix) << 3) - | insn->opcodeModifier)); + | (insn->opcode & 7))); break; } @@ -1550,12 +1616,14 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) { static int readVVVV(struct InternalInstruction* insn) { dbgprintf(insn, "readVVVV()"); - if (insn->vexXopType == TYPE_VEX_3B) - insn->vvvv = vvvvFromVEX3of3(insn->vexXopPrefix[2]); - else if (insn->vexXopType == TYPE_VEX_2B) - insn->vvvv = vvvvFromVEX2of2(insn->vexXopPrefix[1]); - else if (insn->vexXopType == TYPE_XOP) - insn->vvvv = vvvvFromXOP3of3(insn->vexXopPrefix[2]); + if (insn->vectorExtensionType == TYPE_EVEX) + insn->vvvv = vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]); + else if (insn->vectorExtensionType == TYPE_VEX_3B) + insn->vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]); + else if (insn->vectorExtensionType == TYPE_VEX_2B) + insn->vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]); + else if (insn->vectorExtensionType == TYPE_XOP) + insn->vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]); else return -1; @@ -1565,6 +1633,23 @@ static int readVVVV(struct InternalInstruction* insn) { return 0; } +/* + * readMaskRegister - Reads an mask register from the opcode field of an + * instruction. + * + * @param insn - The instruction whose opcode field is to be read. + * @return - 0 on success; nonzero otherwise. + */ +static int readMaskRegister(struct InternalInstruction* insn) { + dbgprintf(insn, "readMaskRegister()"); + + if (insn->vectorExtensionType != TYPE_EVEX) + return -1; + + insn->writemask = aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]); + return 0; +} + /* * readOperands - Consults the specifier for an instruction and consumes all * operands for that instruction, interpreting them as it goes. @@ -1664,9 +1749,7 @@ static int readOperands(struct InternalInstruction* insn) { if (readOpcodeRegister(insn, 0)) return -1; break; - case ENCODING_I: - if (readOpcodeModifier(insn)) - return -1; + case ENCODING_FP: break; case ENCODING_VVVV: needVVVV = 0; /* Mark that we have found a VVVV operand. */ @@ -1675,6 +1758,10 @@ static int readOperands(struct InternalInstruction* insn) { if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) return -1; break; + case ENCODING_WRITEMASK: + if (readMaskRegister(insn)) + return -1; + break; case ENCODING_DUP: break; default: diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index 6d03d5ca5f36..c4c86ada3fa6 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -45,6 +45,21 @@ extern "C" { #define xFromREX(rex) (((rex) & 0x2) >> 1) #define bFromREX(rex) ((rex) & 0x1) +#define rFromEVEX2of4(evex) (((~(evex)) & 0x80) >> 7) +#define xFromEVEX2of4(evex) (((~(evex)) & 0x40) >> 6) +#define bFromEVEX2of4(evex) (((~(evex)) & 0x20) >> 5) +#define r2FromEVEX2of4(evex) (((~(evex)) & 0x10) >> 4) +#define mmFromEVEX2of4(evex) ((evex) & 0x3) +#define wFromEVEX3of4(evex) (((evex) & 0x80) >> 7) +#define vvvvFromEVEX3of4(evex) (((~(evex)) & 0x78) >> 3) +#define ppFromEVEX3of4(evex) ((evex) & 0x3) +#define zFromEVEX4of4(evex) (((evex) & 0x80) >> 7) +#define l2FromEVEX4of4(evex) (((evex) & 0x40) >> 6) +#define lFromEVEX4of4(evex) (((evex) & 0x20) >> 5) +#define bFromEVEX4of4(evex) (((evex) & 0x10) >> 4) +#define v2FromEVEX4of4(evex) (((~evex) & 0x8) >> 3) +#define aaaFromEVEX4of4(evex) ((evex) & 0x7) + #define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7) #define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6) #define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5) @@ -314,6 +329,16 @@ extern "C" { ENTRY(ZMM30) \ ENTRY(ZMM31) +#define REGS_MASKS \ + ENTRY(K0) \ + ENTRY(K1) \ + ENTRY(K2) \ + ENTRY(K3) \ + ENTRY(K4) \ + ENTRY(K5) \ + ENTRY(K6) \ + ENTRY(K7) + #define REGS_SEGMENT \ ENTRY(ES) \ ENTRY(CS) \ @@ -361,6 +386,7 @@ extern "C" { REGS_XMM \ REGS_YMM \ REGS_ZMM \ + REGS_MASKS \ REGS_SEGMENT \ REGS_DEBUG \ REGS_CONTROL \ @@ -463,7 +489,7 @@ typedef enum { } XOPMapSelect; /* - * VEXPrefixCode - Possible values for the VEX.pp field + * VEXPrefixCode - Possible values for the VEX.pp/EVEX.pp field */ typedef enum { @@ -474,11 +500,12 @@ typedef enum { } VEXPrefixCode; typedef enum { - TYPE_NO_VEX_XOP = 0x0, - TYPE_VEX_2B = 0x1, - TYPE_VEX_3B = 0x2, - TYPE_XOP = 0x3 -} VEXXOPType; + TYPE_NO_VEX_XOP = 0x0, + TYPE_VEX_2B = 0x1, + TYPE_VEX_3B = 0x2, + TYPE_EVEX = 0x3, + TYPE_XOP = 0x4 +} VectorExtensionType; typedef uint8_t BOOL; @@ -536,10 +563,10 @@ struct InternalInstruction { uint8_t prefixPresent[0x100]; /* contains the location (for use with the reader) of the prefix byte */ uint64_t prefixLocations[0x100]; - /* The value of the VEX/XOP prefix, if present */ - uint8_t vexXopPrefix[3]; - /* The length of the VEX prefix (0 if not present) */ - VEXXOPType vexXopType; + /* The value of the vector extention prefix(EVEX/VEX/XOP), if present */ + uint8_t vectorExtensionPrefix[4]; + /* The type of the vector extension prefix */ + VectorExtensionType vectorExtensionType; /* The value of the REX prefix, if present */ uint8_t rexPrefix; /* The location where a mandatory prefix would have to be (i.e., right before @@ -585,6 +612,9 @@ struct InternalInstruction { instructions */ Reg vvvv; + /* The writemask for AVX-512 instructions which is contained in EVEX.aaa */ + Reg writemask; + /* The ModR/M byte, which contains most register operands and some portion of all memory operands */ BOOL consumedModRM; @@ -604,8 +634,6 @@ struct InternalInstruction { uint64_t immediates[2]; /* A register or immediate operand encoded into the opcode */ - BOOL consumedOpcodeModifier; - uint8_t opcodeModifier; Reg opcodeRegister; /* Portions of the ModR/M byte */ diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index dd1719c64d76..1acaef1b9472 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -53,16 +53,22 @@ * processed correctly. Most of these indicate the presence of particular * prefixes, but ATTR_64BIT is simply an attribute of the decoding context. */ -#define ATTRIBUTE_BITS \ - ENUM_ENTRY(ATTR_NONE, 0x00) \ - ENUM_ENTRY(ATTR_64BIT, 0x01) \ - ENUM_ENTRY(ATTR_XS, 0x02) \ - ENUM_ENTRY(ATTR_XD, 0x04) \ - ENUM_ENTRY(ATTR_REXW, 0x08) \ - ENUM_ENTRY(ATTR_OPSIZE, 0x10) \ - ENUM_ENTRY(ATTR_ADSIZE, 0x20) \ - ENUM_ENTRY(ATTR_VEX, 0x40) \ - ENUM_ENTRY(ATTR_VEXL, 0x80) +#define ATTRIBUTE_BITS \ + ENUM_ENTRY(ATTR_NONE, 0x00) \ + ENUM_ENTRY(ATTR_64BIT, (0x1 << 0)) \ + ENUM_ENTRY(ATTR_XS, (0x1 << 1)) \ + ENUM_ENTRY(ATTR_XD, (0x1 << 2)) \ + ENUM_ENTRY(ATTR_REXW, (0x1 << 3)) \ + ENUM_ENTRY(ATTR_OPSIZE, (0x1 << 4)) \ + ENUM_ENTRY(ATTR_ADSIZE, (0x1 << 5)) \ + ENUM_ENTRY(ATTR_VEX, (0x1 << 6)) \ + ENUM_ENTRY(ATTR_VEXL, (0x1 << 7)) \ + ENUM_ENTRY(ATTR_EVEX, (0x1 << 8)) \ + ENUM_ENTRY(ATTR_EVEXL, (0x1 << 9)) \ + ENUM_ENTRY(ATTR_EVEXL2, (0x1 << 10)) \ + ENUM_ENTRY(ATTR_EVEXK, (0x1 << 11)) \ + ENUM_ENTRY(ATTR_EVEXKZ, (0x1 << 12)) \ + ENUM_ENTRY(ATTR_EVEXB, (0x1 << 13)) #define ENUM_ENTRY(n, v) n = v, enum attributeBits { @@ -73,7 +79,7 @@ enum attributeBits { /* * Combinations of the above attributes that are relevant to instruction - * decode. Although other combinations are possible, they can be reduced to + * decode. Although other combinations are possible, they can be reduced to * these without affecting the ultimately decoded instruction. */ @@ -198,38 +204,38 @@ enum attributeBits { ENUM_ENTRY(IC_EVEX_L2_W_XS_B, 4, "requires EVEX_B, L2, W and XS prefix") \ ENUM_ENTRY(IC_EVEX_L2_W_XD_B, 4, "requires EVEX_B, L2, W and XD prefix") \ ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_B, 4, "requires EVEX_B, L2, W and OpSize") \ - ENUM_ENTRY(IC_EVEX_K_B, 1, "requires EVEX_B and EVEX_K prefix") \ - ENUM_ENTRY(IC_EVEX_XS_K_B, 2, "requires EVEX_B, EVEX_K and the XS prefix") \ - ENUM_ENTRY(IC_EVEX_XD_K_B, 2, "requires EVEX_B, EVEX_K and the XD prefix") \ - ENUM_ENTRY(IC_EVEX_OPSIZE_K_B, 2, "requires EVEX_B, EVEX_K and the OpSize prefix") \ - ENUM_ENTRY(IC_EVEX_W_K_B, 3, "requires EVEX_B, EVEX_K and the W prefix") \ - ENUM_ENTRY(IC_EVEX_W_XS_K_B, 4, "requires EVEX_B, EVEX_K, W, and XS prefix") \ - ENUM_ENTRY(IC_EVEX_W_XD_K_B, 4, "requires EVEX_B, EVEX_K, W, and XD prefix") \ - ENUM_ENTRY(IC_EVEX_W_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, W, and OpSize") \ - ENUM_ENTRY(IC_EVEX_L_K_B, 3, "requires EVEX_B, EVEX_K and the L prefix") \ - ENUM_ENTRY(IC_EVEX_L_XS_K_B, 4, "requires EVEX_B, EVEX_K and the L and XS prefix")\ - ENUM_ENTRY(IC_EVEX_L_XD_K_B, 4, "requires EVEX_B, EVEX_K and the L and XD prefix")\ - ENUM_ENTRY(IC_EVEX_L_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, L, and OpSize") \ - ENUM_ENTRY(IC_EVEX_L_W_K_B, 3, "requires EVEX_B, EVEX_K, L and W") \ - ENUM_ENTRY(IC_EVEX_L_W_XS_K_B, 4, "requires EVEX_B, EVEX_K, L, W and XS prefix") \ - ENUM_ENTRY(IC_EVEX_L_W_XD_K_B, 4, "requires EVEX_B, EVEX_K, L, W and XD prefix") \ - ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, L, W and OpSize") \ - ENUM_ENTRY(IC_EVEX_L2_K_B, 3, "requires EVEX_B, EVEX_K and the L2 prefix") \ - ENUM_ENTRY(IC_EVEX_L2_XS_K_B, 4, "requires EVEX_B, EVEX_K and the L2 and XS prefix")\ - ENUM_ENTRY(IC_EVEX_L2_XD_K_B, 4, "requires EVEX_B, EVEX_K and the L2 and XD prefix")\ - ENUM_ENTRY(IC_EVEX_L2_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, L2, and OpSize") \ - ENUM_ENTRY(IC_EVEX_L2_W_K_B, 3, "requires EVEX_B, EVEX_K, L2 and W") \ - ENUM_ENTRY(IC_EVEX_L2_W_XS_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and XS prefix") \ - ENUM_ENTRY(IC_EVEX_L2_W_XD_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and XD prefix") \ - ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and OpSize") \ - ENUM_ENTRY(IC_EVEX_KZ_B, 1, "requires EVEX_B and EVEX_KZ prefix") \ - ENUM_ENTRY(IC_EVEX_XS_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the XS prefix") \ - ENUM_ENTRY(IC_EVEX_XD_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the XD prefix") \ - ENUM_ENTRY(IC_EVEX_OPSIZE_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the OpSize prefix") \ - ENUM_ENTRY(IC_EVEX_W_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the W prefix") \ - ENUM_ENTRY(IC_EVEX_W_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and XS prefix") \ - ENUM_ENTRY(IC_EVEX_W_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and XD prefix") \ - ENUM_ENTRY(IC_EVEX_W_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and OpSize") \ + ENUM_ENTRY(IC_EVEX_K_B, 1, "requires EVEX_B and EVEX_K prefix") \ + ENUM_ENTRY(IC_EVEX_XS_K_B, 2, "requires EVEX_B, EVEX_K and the XS prefix") \ + ENUM_ENTRY(IC_EVEX_XD_K_B, 2, "requires EVEX_B, EVEX_K and the XD prefix") \ + ENUM_ENTRY(IC_EVEX_OPSIZE_K_B, 2, "requires EVEX_B, EVEX_K and the OpSize prefix") \ + ENUM_ENTRY(IC_EVEX_W_K_B, 3, "requires EVEX_B, EVEX_K and the W prefix") \ + ENUM_ENTRY(IC_EVEX_W_XS_K_B, 4, "requires EVEX_B, EVEX_K, W, and XS prefix") \ + ENUM_ENTRY(IC_EVEX_W_XD_K_B, 4, "requires EVEX_B, EVEX_K, W, and XD prefix") \ + ENUM_ENTRY(IC_EVEX_W_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, W, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L_K_B, 3, "requires EVEX_B, EVEX_K and the L prefix") \ + ENUM_ENTRY(IC_EVEX_L_XS_K_B, 4, "requires EVEX_B, EVEX_K and the L and XS prefix")\ + ENUM_ENTRY(IC_EVEX_L_XD_K_B, 4, "requires EVEX_B, EVEX_K and the L and XD prefix")\ + ENUM_ENTRY(IC_EVEX_L_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, L, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L_W_K_B, 3, "requires EVEX_B, EVEX_K, L and W") \ + ENUM_ENTRY(IC_EVEX_L_W_XS_K_B, 4, "requires EVEX_B, EVEX_K, L, W and XS prefix") \ + ENUM_ENTRY(IC_EVEX_L_W_XD_K_B, 4, "requires EVEX_B, EVEX_K, L, W and XD prefix") \ + ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_K_B,4, "requires EVEX_B, EVEX_K, L, W and OpSize") \ + ENUM_ENTRY(IC_EVEX_L2_K_B, 3, "requires EVEX_B, EVEX_K and the L2 prefix") \ + ENUM_ENTRY(IC_EVEX_L2_XS_K_B, 4, "requires EVEX_B, EVEX_K and the L2 and XS prefix")\ + ENUM_ENTRY(IC_EVEX_L2_XD_K_B, 4, "requires EVEX_B, EVEX_K and the L2 and XD prefix")\ + ENUM_ENTRY(IC_EVEX_L2_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, L2, and OpSize") \ + ENUM_ENTRY(IC_EVEX_L2_W_K_B, 3, "requires EVEX_B, EVEX_K, L2 and W") \ + ENUM_ENTRY(IC_EVEX_L2_W_XS_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and XS prefix") \ + ENUM_ENTRY(IC_EVEX_L2_W_XD_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and XD prefix") \ + ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_K_B,4, "requires EVEX_B, EVEX_K, L2, W and OpSize") \ + ENUM_ENTRY(IC_EVEX_KZ_B, 1, "requires EVEX_B and EVEX_KZ prefix") \ + ENUM_ENTRY(IC_EVEX_XS_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the XS prefix") \ + ENUM_ENTRY(IC_EVEX_XD_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the XD prefix") \ + ENUM_ENTRY(IC_EVEX_OPSIZE_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the OpSize prefix") \ + ENUM_ENTRY(IC_EVEX_W_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the W prefix") \ + ENUM_ENTRY(IC_EVEX_W_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and XS prefix") \ + ENUM_ENTRY(IC_EVEX_W_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and XD prefix") \ + ENUM_ENTRY(IC_EVEX_W_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and OpSize") \ ENUM_ENTRY(IC_EVEX_L_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the L prefix") \ ENUM_ENTRY(IC_EVEX_L_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L and XS prefix")\ ENUM_ENTRY(IC_EVEX_L_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L and XD prefix")\ @@ -395,8 +401,8 @@ struct ContextDecision { ENUM_ENTRY(ENCODING_RW, "(AX..DI, R8W..R15W)") \ ENUM_ENTRY(ENCODING_RD, "(EAX..EDI, R8D..R15D)") \ ENUM_ENTRY(ENCODING_RO, "(RAX..RDI, R8..R15)") \ - ENUM_ENTRY(ENCODING_I, "Position on floating-point stack added to the " \ - "opcode byte") \ + ENUM_ENTRY(ENCODING_FP, "Position on floating-point stack in ModR/M " \ + "byte.") \ \ ENUM_ENTRY(ENCODING_Iv, "Immediate of operand size") \ ENUM_ENTRY(ENCODING_Ia, "Immediate of address size") \ @@ -478,6 +484,7 @@ struct ContextDecision { ENUM_ENTRY(TYPE_XMM128, "16-byte") \ ENUM_ENTRY(TYPE_XMM256, "32-byte") \ ENUM_ENTRY(TYPE_XMM512, "64-byte") \ + ENUM_ENTRY(TYPE_VK1, "1-bit") \ ENUM_ENTRY(TYPE_VK8, "8-bit") \ ENUM_ENTRY(TYPE_VK16, "16-bit") \ ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \ @@ -518,9 +525,7 @@ struct OperandSpecifier { */ #define MODIFIER_TYPES \ - ENUM_ENTRY(MODIFIER_NONE) \ - ENUM_ENTRY(MODIFIER_OPCODE) \ - ENUM_ENTRY(MODIFIER_MODRM) + ENUM_ENTRY(MODIFIER_NONE) #define ENUM_ENTRY(n) n, typedef enum { @@ -536,9 +541,6 @@ typedef enum { * its operands. */ struct InstructionSpecifier { - uint8_t modifierType; - uint8_t modifierBase; - /* The macro below must be defined wherever this file is included. */ INSTRUCTION_SPECIFIER_FIELDS }; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt index 28e2460d8233..686a37e61498 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/InstPrinter/CMakeLists.txt @@ -1,9 +1,5 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMX86AsmPrinter X86ATTInstPrinter.cpp X86IntelInstPrinter.cpp X86InstComments.cpp ) - -add_dependencies(LLVMX86AsmPrinter X86CommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 44393115cc46..e214e9b5879a 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -123,6 +123,18 @@ void X86ATTInstPrinter::printAVXCC(const MCInst *MI, unsigned Op, } } +void X86ATTInstPrinter::printRoundingControl(const MCInst *MI, unsigned Op, + raw_ostream &O) { + int64_t Imm = MI->getOperand(Op).getImm() & 0x1f; + switch (Imm) { + case 0: O << "{rn-sae}"; break; + case 1: O << "{rd-sae}"; break; + case 2: O << "{ru-sae}"; break; + case 3: O << "{rz-sae}"; break; + + default: llvm_unreachable("Invalid AVX-512 rounding control argument!"); + } +} /// printPCRelImm - This is used to print an immediate value that ends up /// being encoded as a pc-relative value (e.g. for jumps and calls). These /// print slightly differently than normal immediates. For example, a $ is not diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/external/bsd/llvm/dist/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index a8fab72bc0d6..4dc4fe6eabd7 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -43,6 +43,7 @@ public: void printAVXCC(const MCInst *MI, unsigned Op, raw_ostream &OS); void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &OS); void printMemOffset(const MCInst *MI, unsigned OpNo, raw_ostream &OS); + void printRoundingControl(const MCInst *MI, unsigned Op, raw_ostream &OS); void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printMemReference(MI, OpNo, O); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index e7e7b151c3bd..320ac5addbbb 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -113,6 +113,19 @@ void X86IntelInstPrinter::printAVXCC(const MCInst *MI, unsigned Op, } } +void X86IntelInstPrinter::printRoundingControl(const MCInst *MI, unsigned Op, + raw_ostream &O) { + int64_t Imm = MI->getOperand(Op).getImm() & 0x1f; + switch (Imm) { + case 0: O << "{rn-sae}"; break; + case 1: O << "{rd-sae}"; break; + case 2: O << "{ru-sae}"; break; + case 3: O << "{rz-sae}"; break; + + default: llvm_unreachable("Invalid AVX-512 rounding control argument!"); + } +} + /// printPCRelImm - This is used to print an immediate value that ends up /// being encoded as a pc-relative value. void X86IntelInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/external/bsd/llvm/dist/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index 590bf6812417..90d4e3978491 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -40,6 +40,7 @@ public: void printAVXCC(const MCInst *MI, unsigned Op, raw_ostream &O); void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printMemOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printRoundingControl(const MCInst *MI, unsigned Op, raw_ostream &OS); void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { O << "opaque ptr "; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt index 2eb5f25ffd44..3f5a0e2bafb7 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt @@ -9,8 +9,3 @@ add_llvm_library(LLVMX86Desc X86MachORelocationInfo.cpp X86ELFRelocationInfo.cpp ) - -add_dependencies(LLVMX86Desc X86CommonTableGen) - -# Hack: we need to include 'main' target directory to grab private headers -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index f8e359b160f3..ab95eb6d3324 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -314,7 +314,7 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, }; - // This CPU doesnt support long nops. If needed add more. + // This CPU doesn't support long nops. If needed add more. // FIXME: Can we get this from the subtarget somehow? // FIXME: We could generated something better than plain 0x90. if (!HasNopl) { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 1ef98141f82b..9c1ff155511b 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -268,10 +268,6 @@ namespace X86II { MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27, // Format /0 /1 /2 /3 MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31, // Format /4 /5 /6 /7 - // MRMInitReg - This form is used for instructions whose source and - // destinations are the same register. - MRMInitReg = 32, - //// MRM_XX - A mod/rm byte of exactly 0xXX. MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35, MRM_C4 = 36, MRM_C8 = 37, MRM_C9 = 38, MRM_CA = 39, MRM_CB = 40, @@ -596,9 +592,6 @@ namespace X86II { /// inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) { switch (TSFlags & X86II::FormMask) { - case X86II::MRMInitReg: - // FIXME: Remove this form. - return -1; default: llvm_unreachable("Unknown FormMask value in getMemoryOperandNo!"); case X86II::Pseudo: case X86II::RawFrm: diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 3861e1ce290a..e450f5dc5fd2 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -65,6 +65,12 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { // Exceptions handling ExceptionsType = ExceptionHandling::DwarfCFI; + + // old assembler lacks some directives + // FIXME: this should really be a check on the assembler characteristics + // rather than OS version + if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6)) + HasWeakDefCanBeHiddenDirective = false; } X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple) @@ -89,8 +95,6 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { TextAlignFillValue = 0x90; - PrivateGlobalPrefix = ".L"; - // Set up DWARF directives HasLEB128 = true; // Target asm supports leb128 directives (little-endian) @@ -127,10 +131,8 @@ getNonexecutableStackSection(MCContext &Ctx) const { void X86MCAsmInfoMicrosoft::anchor() { } X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { - if (Triple.getArch() == Triple::x86_64) { - GlobalPrefix = ""; + if (Triple.getArch() == Triple::x86_64) PrivateGlobalPrefix = ".L"; - } AssemblerDialect = AsmWriterFlavor; @@ -142,10 +144,8 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { void X86MCAsmInfoGNUCOFF::anchor() { } X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) { - if (Triple.getArch() == Triple::x86_64) { - GlobalPrefix = ""; + if (Triple.getArch() == Triple::x86_64) PrivateGlobalPrefix = ".L"; - } AssemblerDialect = AsmWriterFlavor; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 7952607aca0e..54a90f13a80a 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -535,6 +535,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; + bool HasEVEX_RC = false; // VEX_R: opcode externsion equivalent to REX.R in // 1's complement (inverted) form @@ -610,6 +611,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // EVEX_b unsigned char EVEX_b = 0; + // EVEX_rc + unsigned char EVEX_rc = 0; + // EVEX_aaa unsigned char EVEX_aaa = 0; @@ -676,6 +680,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // Classify VEX_B, VEX_4V, VEX_R, VEX_X unsigned NumOps = Desc.getNumOperands(); + unsigned RcOperand = NumOps-1; unsigned CurOp = 0; if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0) ++CurOp; @@ -694,7 +699,6 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, ++CurOp; switch (TSFlags & X86II::FormMask) { - case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this!"); case X86II::MRMDestMem: { // MRMDestMem instructions forms: // MemAddr, src1(ModR/M) @@ -835,7 +839,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_X = 0x0; CurOp++; if (HasVEX_4VOp3) - VEX_4V = getVEXRegisterEncoding(MI, CurOp); + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + if (EVEX_b) { + assert(RcOperand >= CurOp); + EVEX_rc = MI.getOperand(RcOperand).getImm() & 0x3; + HasEVEX_RC = true; + } break; case X86II::MRMDestReg: // MRMDestReg instructions forms: @@ -935,12 +944,19 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, (VEX_4V << 3) | (EVEX_U << 2) | VEX_PP, CurByte, OS); - EmitByte((EVEX_z << 7) | - (EVEX_L2 << 6) | - (VEX_L << 5) | - (EVEX_b << 4) | - (EVEX_V2 << 3) | - EVEX_aaa, CurByte, OS); + if (HasEVEX_RC) + EmitByte((EVEX_z << 7) | + (EVEX_rc << 5) | + (EVEX_b << 4) | + (EVEX_V2 << 3) | + EVEX_aaa, CurByte, OS); + else + EmitByte((EVEX_z << 7) | + (EVEX_L2 << 6) | + (VEX_L << 5) | + (EVEX_b << 4) | + (EVEX_V2 << 3) | + EVEX_aaa, CurByte, OS); } } @@ -974,7 +990,6 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, } switch (TSFlags & X86II::FormMask) { - case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this!"); case X86II::MRMSrcReg: if (MI.getOperand(0).isReg() && X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) @@ -1124,34 +1139,28 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::A7: // 0F A7 Need0FPrefix = true; break; + case X86II::XS: // F3 0F case X86II::T8XS: // F3 0F 38 EmitByte(0xF3, CurByte, OS); Need0FPrefix = true; break; + case X86II::XD: // F2 0F case X86II::T8XD: // F2 0F 38 - EmitByte(0xF2, CurByte, OS); - Need0FPrefix = true; - break; case X86II::TAXD: // F2 0F 3A EmitByte(0xF2, CurByte, OS); Need0FPrefix = true; break; - case X86II::XS: // F3 0F - EmitByte(0xF3, CurByte, OS); - Need0FPrefix = true; + case X86II::D8: + case X86II::D9: + case X86II::DA: + case X86II::DB: + case X86II::DC: + case X86II::DD: + case X86II::DE: + case X86II::DF: + EmitByte(0xD8+(((TSFlags & X86II::Op0Mask) - X86II::D8) >> X86II::Op0Shift), + CurByte, OS); break; - case X86II::XD: // F2 0F - EmitByte(0xF2, CurByte, OS); - Need0FPrefix = true; - break; - case X86II::D8: EmitByte(0xD8, CurByte, OS); break; - case X86II::D9: EmitByte(0xD9, CurByte, OS); break; - case X86II::DA: EmitByte(0xDA, CurByte, OS); break; - case X86II::DB: EmitByte(0xDB, CurByte, OS); break; - case X86II::DC: EmitByte(0xDC, CurByte, OS); break; - case X86II::DD: EmitByte(0xDD, CurByte, OS); break; - case X86II::DE: EmitByte(0xDE, CurByte, OS); break; - case X86II::DF: EmitByte(0xDF, CurByte, OS); break; } // Handle REX prefix. @@ -1214,7 +1223,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, // It uses the EVEX.aaa field? bool HasEVEX = (TSFlags >> X86II::VEXShift) & X86II::EVEX; bool HasEVEX_K = HasEVEX && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_K); - + bool HasEVEX_B = HasEVEX && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_B); + // Determine where the memory operand starts, if present. int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode); if (MemoryOperand != -1) MemoryOperand += CurOp; @@ -1231,8 +1241,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, unsigned SrcRegNum = 0; switch (TSFlags & X86II::FormMask) { - case X86II::MRMInitReg: - llvm_unreachable("FIXME: Remove this form when the JIT moves to MCCodeEmitter!"); default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n"; llvm_unreachable("Unknown FormMask value in X86MCCodeEmitter!"); case X86II::Pseudo: @@ -1312,6 +1320,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1; if (HasVEX_4VOp3) ++CurOp; + // do not count the rounding control operand + if (HasEVEX_B) + NumOps--; break; case X86II::MRMSrcMem: { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index 1cbdafdf151d..403e50dbf8ab 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -387,7 +387,7 @@ static MCInstPrinter *createX86MCInstPrinter(const Target &T, static MCRelocationInfo *createX86MCRelocationInfo(StringRef TT, MCContext &Ctx) { Triple TheTriple(TT); - if (TheTriple.isEnvironmentMachO() && TheTriple.getArch() == Triple::x86_64) + if (TheTriple.isOSBinFormatMachO() && TheTriple.getArch() == Triple::x86_64) return createX86_64MachORelocationInfo(Ctx); else if (TheTriple.isOSBinFormatELF()) return createX86_64ELFRelocationInfo(Ctx); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index eb7c0b1a9965..0f16621b852f 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -362,6 +362,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, MCValue Target, unsigned Log2Size, uint64_t &FixedValue) { + uint64_t OriginalFixedValue = FixedValue; uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); unsigned Type = MachO::GENERIC_RELOC_VANILLA; @@ -431,8 +432,10 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, // symbol, things can go badly. // // Required for 'as' compatibility. - if (FixupOffset > 0xffffff) + if (FixupOffset > 0xffffff) { + FixedValue = OriginalFixedValue; return false; + } } MachO::any_relocation_info MRE; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp index 6da414287cfc..31dc1107eccf 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp @@ -65,6 +65,9 @@ unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target, if (Is64Bit) return COFF::IMAGE_REL_AMD64_ADDR64; llvm_unreachable("unsupported relocation type"); + case FK_SecRel_2: + return Is64Bit ? COFF::IMAGE_REL_AMD64_SECTION + : COFF::IMAGE_REL_I386_SECTION; case FK_SecRel_4: return Is64Bit ? COFF::IMAGE_REL_AMD64_SECREL : COFF::IMAGE_REL_I386_SECREL; default: diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/README.txt b/external/bsd/llvm/dist/llvm/lib/Target/X86/README.txt index b4285a071879..a30f8c7d9852 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/README.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/README.txt @@ -123,7 +123,7 @@ flags. The instruction selector sometimes misses folding a load into a compare. The pattern is written as (cmp reg, (load p)). Because the compare isn't commutative, it is not matched with the load on both sides. The dag combiner -should be made smart enough to cannonicalize the load into the RHS of a compare +should be made smart enough to canonicalize the load into the RHS of a compare when it can invert the result of the compare for free. //===---------------------------------------------------------------------===// diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/TargetInfo/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/X86/TargetInfo/CMakeLists.txt index b1d0b9f9f9bd..1d8a8c1c118e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/TargetInfo/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/TargetInfo/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMX86Info X86TargetInfo.cpp ) - -add_dependencies(LLVMX86Info X86CommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/TargetInfo/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/X86/TargetInfo/LLVMBuild.txt index 3c64a2255302..6a52ea61d87e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/TargetInfo/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/TargetInfo/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = X86Info parent = X86 -required_libraries = MC Support Target +required_libraries = Support add_to_library_groups = X86 diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/Utils/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/X86/Utils/CMakeLists.txt index 2e72c344d99c..b2697467f26a 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/Utils/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/Utils/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMX86Utils X86ShuffleDecode.cpp ) - -add_dependencies(LLVMX86Utils X86CommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/Utils/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/X86/Utils/LLVMBuild.txt index de0a30fa19c8..fdb886f53a08 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/Utils/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/Utils/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = X86Utils parent = X86 -required_libraries = Core Support +required_libraries = Support add_to_library_groups = X86 diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86.td b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86.td index 65c5552de2af..d55178ea12d8 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86.td @@ -73,6 +73,8 @@ def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true", [Feature64Bit]>; def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true", "Bit testing of memory is slow">; +def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", + "SHLD instruction is slow">; def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem", "IsUAMemFast", "true", "Fast unaligned memory access">; @@ -268,46 +270,53 @@ def : ProcessorModel<"knl", HaswellModel, def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [Feature3DNow]>; def : Proc<"k6-3", [Feature3DNow]>; -def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem]>; -def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem]>; -def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>; -def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>; -def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>; +def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem, + FeatureSlowSHLD]>; +def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem, + FeatureSlowSHLD]>; +def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem, + FeatureSlowSHLD]>; +def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem, + FeatureSlowSHLD]>; +def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem, + FeatureSlowSHLD]>; def : Proc<"k8", [FeatureSSE2, Feature3DNowA, Feature64Bit, - FeatureSlowBTMem]>; + FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"opteron", [FeatureSSE2, Feature3DNowA, Feature64Bit, - FeatureSlowBTMem]>; + FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"athlon64", [FeatureSSE2, Feature3DNowA, Feature64Bit, - FeatureSlowBTMem]>; + FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"athlon-fx", [FeatureSSE2, Feature3DNowA, Feature64Bit, - FeatureSlowBTMem]>; + FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"k8-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, - FeatureSlowBTMem]>; + FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, - FeatureSlowBTMem]>; + FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, - FeatureSlowBTMem]>; + FeatureSlowBTMem, FeatureSlowSHLD]>; def : Proc<"amdfam10", [FeatureSSE4A, Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT, - FeaturePOPCNT, FeatureSlowBTMem]>; + FeaturePOPCNT, FeatureSlowBTMem, + FeatureSlowSHLD]>; // Bobcat def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B, - FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT]>; + FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT, + FeatureSlowSHLD]>; // Jaguar def : Proc<"btver2", [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B, FeaturePRFCHW, FeatureAES, FeaturePCLMUL, FeatureBMI, FeatureF16C, FeatureMOVBE, - FeatureLZCNT, FeaturePOPCNT]>; + FeatureLZCNT, FeaturePOPCNT, FeatureSlowSHLD]>; // Bulldozer def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, - FeatureLZCNT, FeaturePOPCNT]>; + FeatureLZCNT, FeaturePOPCNT, FeatureSlowSHLD]>; // Piledriver def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, FeaturePCLMUL, FeatureF16C, FeatureLZCNT, - FeaturePOPCNT, FeatureBMI, FeatureTBM, - FeatureFMA]>; + FeaturePOPCNT, FeatureBMI, FeatureTBM, + FeatureFMA, FeatureSlowSHLD]>; // Steamroller def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, @@ -387,12 +396,10 @@ def IntelAsmParserVariant : AsmParserVariant { def ATTAsmWriter : AsmWriter { string AsmWriterClassName = "ATTInstPrinter"; int Variant = 0; - bit isMCAsmWriter = 1; } def IntelAsmWriter : AsmWriter { string AsmWriterClassName = "IntelInstPrinter"; int Variant = 1; - bit isMCAsmWriter = 1; } def X86 : Target { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86AsmPrinter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86AsmPrinter.cpp index 12584411509d..69a69e9e8453 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -51,7 +51,7 @@ using namespace llvm; bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { SetupMachineFunction(MF); - if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) { + if (Subtarget->isTargetCOFF()) { bool Intrn = MF.getFunction()->hasInternalLinkage(); OutStreamer.BeginCOFFSymbolDef(CurrentFnSym); OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC @@ -74,56 +74,55 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { /// printSymbolOperand - Print a raw symbol reference operand. This handles /// jump tables, constant pools, global address and external symbols, all of /// which print to a label with various suffixes for relocation types etc. -void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, - raw_ostream &O) { +static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO, + raw_ostream &O) { switch (MO.getType()) { default: llvm_unreachable("unknown symbol type!"); - case MachineOperand::MO_JumpTableIndex: - O << *GetJTISymbol(MO.getIndex()); - break; case MachineOperand::MO_ConstantPoolIndex: - O << *GetCPISymbol(MO.getIndex()); - printOffset(MO.getOffset(), O); + O << *P.GetCPISymbol(MO.getIndex()); + P.printOffset(MO.getOffset(), O); break; case MachineOperand::MO_GlobalAddress: { const GlobalValue *GV = MO.getGlobal(); MCSymbol *GVSym; if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) - GVSym = GetSymbolWithGlobalValueBase(GV, "$stub"); + GVSym = P.getSymbolWithGlobalValueBase(GV, "$stub"); else if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY || MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE || MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE) - GVSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + GVSym = P.getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); else - GVSym = getSymbol(GV); + GVSym = P.getSymbol(GV); // Handle dllimport linkage. if (MO.getTargetFlags() == X86II::MO_DLLIMPORT) - GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName()); + GVSym = + P.OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName()); if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY || MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) { - MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + MCSymbol *Sym = P.getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); MachineModuleInfoImpl::StubValueTy &StubSym = - MMI->getObjFileInfo().getGVStubEntry(Sym); + P.MMI->getObjFileInfo().getGVStubEntry(Sym); if (StubSym.getPointer() == 0) StubSym = MachineModuleInfoImpl:: - StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); + StubValueTy(P.getSymbol(GV), !GV->hasInternalLinkage()); } else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){ - MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + MCSymbol *Sym = P.getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); MachineModuleInfoImpl::StubValueTy &StubSym = - MMI->getObjFileInfo().getHiddenGVStubEntry(Sym); + P.MMI->getObjFileInfo().getHiddenGVStubEntry( + Sym); if (StubSym.getPointer() == 0) StubSym = MachineModuleInfoImpl:: - StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); + StubValueTy(P.getSymbol(GV), !GV->hasInternalLinkage()); } else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) { - MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub"); + MCSymbol *Sym = P.getSymbolWithGlobalValueBase(GV, "$stub"); MachineModuleInfoImpl::StubValueTy &StubSym = - MMI->getObjFileInfo().getFnStubEntry(Sym); + P.MMI->getObjFileInfo().getFnStubEntry(Sym); if (StubSym.getPointer() == 0) StubSym = MachineModuleInfoImpl:: - StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); + StubValueTy(P.getSymbol(GV), !GV->hasInternalLinkage()); } // If the name begins with a dollar-sign, enclose it in parens. We do this @@ -132,36 +131,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, O << *GVSym; else O << '(' << *GVSym << ')'; - printOffset(MO.getOffset(), O); - break; - } - case MachineOperand::MO_ExternalSymbol: { - const MCSymbol *SymToPrint; - if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) { - SmallString<128> TempNameStr; - TempNameStr += StringRef(MO.getSymbolName()); - TempNameStr += StringRef("$stub"); - - MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str()); - MachineModuleInfoImpl::StubValueTy &StubSym = - MMI->getObjFileInfo().getFnStubEntry(Sym); - if (StubSym.getPointer() == 0) { - TempNameStr.erase(TempNameStr.end()-5, TempNameStr.end()); - StubSym = MachineModuleInfoImpl:: - StubValueTy(OutContext.GetOrCreateSymbol(TempNameStr.str()), - true); - } - SymToPrint = StubSym.getPointer(); - } else { - SymToPrint = GetExternalSymbolSymbol(MO.getSymbolName()); - } - - // If the name begins with a dollar-sign, enclose it in parens. We do this - // to avoid having it look like an integer immediate to the assembler. - if (SymToPrint->getName()[0] != '$') - O << *SymToPrint; - else - O << '(' << *SymToPrint << '('; + P.printOffset(MO.getOffset(), O); break; } } @@ -177,12 +147,12 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, // These affect the name of the symbol, not any suffix. break; case X86II::MO_GOT_ABSOLUTE_ADDRESS: - O << " + [.-" << *MF->getPICBaseSymbol() << ']'; + O << " + [.-" << *P.MF->getPICBaseSymbol() << ']'; break; case X86II::MO_PIC_BASE_OFFSET: case X86II::MO_DARWIN_NONLAZY_PIC_BASE: case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: - O << '-' << *MF->getPICBaseSymbol(); + O << '-' << *P.MF->getPICBaseSymbol(); break; case X86II::MO_TLSGD: O << "@TLSGD"; break; case X86II::MO_TLSLD: O << "@TLSLD"; break; @@ -199,41 +169,40 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, case X86II::MO_PLT: O << "@PLT"; break; case X86II::MO_TLVP: O << "@TLVP"; break; case X86II::MO_TLVP_PIC_BASE: - O << "@TLVP" << '-' << *MF->getPICBaseSymbol(); + O << "@TLVP" << '-' << *P.MF->getPICBaseSymbol(); break; case X86II::MO_SECREL: O << "@SECREL32"; break; } } +static void printOperand(X86AsmPrinter &P, const MachineInstr *MI, + unsigned OpNo, raw_ostream &O, + const char *Modifier = 0, unsigned AsmVariant = 0); + /// printPCRelImm - This is used to print an immediate value that ends up /// being encoded as a pc-relative value. These print slightly differently, for /// example, a $ is not emitted. -void X86AsmPrinter::printPCRelImm(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { +static void printPCRelImm(X86AsmPrinter &P, const MachineInstr *MI, + unsigned OpNo, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(OpNo); switch (MO.getType()) { default: llvm_unreachable("Unknown pcrel immediate operand"); case MachineOperand::MO_Register: // pc-relativeness was handled when computing the value in the reg. - printOperand(MI, OpNo, O); + printOperand(P, MI, OpNo, O); return; case MachineOperand::MO_Immediate: O << MO.getImm(); return; - case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); - return; case MachineOperand::MO_GlobalAddress: - case MachineOperand::MO_ExternalSymbol: - printSymbolOperand(MO, O); + printSymbolOperand(P, MO, O); return; } } - -void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O, const char *Modifier, - unsigned AsmVariant) { +static void printOperand(X86AsmPrinter &P, const MachineInstr *MI, + unsigned OpNo, raw_ostream &O, const char *Modifier, + unsigned AsmVariant) { const MachineOperand &MO = MI->getOperand(OpNo); switch (MO.getType()) { default: llvm_unreachable("unknown operand type!"); @@ -256,19 +225,17 @@ void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, O << MO.getImm(); return; - case MachineOperand::MO_JumpTableIndex: - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_GlobalAddress: - case MachineOperand::MO_ExternalSymbol: { + case MachineOperand::MO_GlobalAddress: { if (AsmVariant == 0) O << '$'; - printSymbolOperand(MO, O); + printSymbolOperand(P, MO, O); break; } } } -void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op, - raw_ostream &O, const char *Modifier) { +static void printLeaMemReference(X86AsmPrinter &P, const MachineInstr *MI, + unsigned Op, raw_ostream &O, + const char *Modifier = NULL) { const MachineOperand &BaseReg = MI->getOperand(Op); const MachineOperand &IndexReg = MI->getOperand(Op+2); const MachineOperand &DispSpec = MI->getOperand(Op+3); @@ -282,14 +249,18 @@ void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op, // HasParenPart - True if we will print out the () part of the mem ref. bool HasParenPart = IndexReg.getReg() || HasBaseReg; - if (DispSpec.isImm()) { + switch (DispSpec.getType()) { + default: + llvm_unreachable("unknown operand type!"); + case MachineOperand::MO_Immediate: { int DispVal = DispSpec.getImm(); if (DispVal || !HasParenPart) O << DispVal; - } else { - assert(DispSpec.isGlobal() || DispSpec.isCPI() || - DispSpec.isJTI() || DispSpec.isSymbol()); - printSymbolOperand(MI->getOperand(Op+3), O); + break; + } + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ConstantPoolIndex: + printSymbolOperand(P, MI->getOperand(Op + 3), O); } if (Modifier && strcmp(Modifier, "H") == 0) @@ -301,11 +272,11 @@ void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op, O << '('; if (HasBaseReg) - printOperand(MI, Op, O, Modifier); + printOperand(P, MI, Op, O, Modifier); if (IndexReg.getReg()) { O << ','; - printOperand(MI, Op+2, O, Modifier); + printOperand(P, MI, Op+2, O, Modifier); unsigned ScaleVal = MI->getOperand(Op+1).getImm(); if (ScaleVal != 1) O << ',' << ScaleVal; @@ -314,20 +285,22 @@ void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op, } } -void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op, - raw_ostream &O, const char *Modifier) { +static void printMemReference(X86AsmPrinter &P, const MachineInstr *MI, + unsigned Op, raw_ostream &O, + const char *Modifier = NULL) { assert(isMem(MI, Op) && "Invalid memory reference!"); const MachineOperand &Segment = MI->getOperand(Op+4); if (Segment.getReg()) { - printOperand(MI, Op+4, O, Modifier); + printOperand(P, MI, Op+4, O, Modifier); O << ':'; } - printLeaMemReference(MI, Op, O, Modifier); + printLeaMemReference(P, MI, Op, O, Modifier); } -void X86AsmPrinter::printIntelMemReference(const MachineInstr *MI, unsigned Op, - raw_ostream &O, const char *Modifier, - unsigned AsmVariant){ +static void printIntelMemReference(X86AsmPrinter &P, const MachineInstr *MI, + unsigned Op, raw_ostream &O, + const char *Modifier = NULL, + unsigned AsmVariant = 1) { const MachineOperand &BaseReg = MI->getOperand(Op); unsigned ScaleVal = MI->getOperand(Op+1).getImm(); const MachineOperand &IndexReg = MI->getOperand(Op+2); @@ -336,7 +309,7 @@ void X86AsmPrinter::printIntelMemReference(const MachineInstr *MI, unsigned Op, // If this has a segment register, print it. if (SegReg.getReg()) { - printOperand(MI, Op+4, O, Modifier, AsmVariant); + printOperand(P, MI, Op+4, O, Modifier, AsmVariant); O << ':'; } @@ -344,7 +317,7 @@ void X86AsmPrinter::printIntelMemReference(const MachineInstr *MI, unsigned Op, bool NeedPlus = false; if (BaseReg.getReg()) { - printOperand(MI, Op, O, Modifier, AsmVariant); + printOperand(P, MI, Op, O, Modifier, AsmVariant); NeedPlus = true; } @@ -352,13 +325,13 @@ void X86AsmPrinter::printIntelMemReference(const MachineInstr *MI, unsigned Op, if (NeedPlus) O << " + "; if (ScaleVal != 1) O << ScaleVal << '*'; - printOperand(MI, Op+2, O, Modifier, AsmVariant); + printOperand(P, MI, Op+2, O, Modifier, AsmVariant); NeedPlus = true; } if (!DispSpec.isImm()) { if (NeedPlus) O << " + "; - printOperand(MI, Op+3, O, Modifier, AsmVariant); + printOperand(P, MI, Op+3, O, Modifier, AsmVariant); } else { int64_t DispVal = DispSpec.getImm(); if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) { @@ -376,8 +349,8 @@ void X86AsmPrinter::printIntelMemReference(const MachineInstr *MI, unsigned Op, O << ']'; } -bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode, - raw_ostream &O) { +static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO, + char Mode, raw_ostream &O) { unsigned Reg = MO.getReg(); switch (Mode) { default: return true; // Unknown mode. @@ -419,37 +392,50 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, // See if this is a generic print operand return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); case 'a': // This is an address. Currently only 'i' and 'r' are expected. - if (MO.isImm()) { + switch (MO.getType()) { + default: + return true; + case MachineOperand::MO_Immediate: O << MO.getImm(); return false; - } - if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) { - printSymbolOperand(MO, O); + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ExternalSymbol: + llvm_unreachable("unexpected operand type!"); + case MachineOperand::MO_GlobalAddress: + printSymbolOperand(*this, MO, O); if (Subtarget->isPICStyleRIPRel()) O << "(%rip)"; return false; - } - if (MO.isReg()) { + case MachineOperand::MO_Register: O << '('; - printOperand(MI, OpNo, O); + printOperand(*this, MI, OpNo, O); O << ')'; return false; } - return true; case 'c': // Don't print "$" before a global var name or constant. - if (MO.isImm()) + switch (MO.getType()) { + default: + printOperand(*this, MI, OpNo, O); + break; + case MachineOperand::MO_Immediate: O << MO.getImm(); - else if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) - printSymbolOperand(MO, O); - else - printOperand(MI, OpNo, O); + break; + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ExternalSymbol: + llvm_unreachable("unexpected operand type!"); + case MachineOperand::MO_GlobalAddress: + printSymbolOperand(*this, MO, O); + break; + } return false; case 'A': // Print '*' before a register (it must be a register) if (MO.isReg()) { O << '*'; - printOperand(MI, OpNo, O); + printOperand(*this, MI, OpNo, O); return false; } return true; @@ -460,12 +446,12 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, case 'k': // Print SImode register case 'q': // Print DImode register if (MO.isReg()) - return printAsmMRegister(MO, ExtraCode[0], O); - printOperand(MI, OpNo, O); + return printAsmMRegister(*this, MO, ExtraCode[0], O); + printOperand(*this, MI, OpNo, O); return false; case 'P': // This is the operand of a call, treat specially. - printPCRelImm(MI, OpNo, O); + printPCRelImm(*this, MI, OpNo, O); return false; case 'n': // Negate the immediate or print a '-' before the operand. @@ -479,7 +465,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, } } - printOperand(MI, OpNo, O, /*Modifier*/ 0, AsmVariant); + printOperand(*this, MI, OpNo, O, /*Modifier*/ 0, AsmVariant); return false; } @@ -488,7 +474,7 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, const char *ExtraCode, raw_ostream &O) { if (AsmVariant) { - printIntelMemReference(MI, OpNo, O); + printIntelMemReference(*this, MI, OpNo, O); return false; } @@ -505,19 +491,19 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, // These only apply to registers, ignore on mem. break; case 'H': - printMemReference(MI, OpNo, O, "H"); + printMemReference(*this, MI, OpNo, O, "H"); return false; case 'P': // Don't print @PLT, but do print as memory. - printMemReference(MI, OpNo, O, "no-rip"); + printMemReference(*this, MI, OpNo, O, "no-rip"); return false; } } - printMemReference(MI, OpNo, O); + printMemReference(*this, MI, OpNo, O); return false; } void X86AsmPrinter::EmitStartOfAsmFile(Module &M) { - if (Subtarget->isTargetEnvMacho()) + if (Subtarget->isTargetMacho()) OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); if (Subtarget->isTargetCOFF()) { @@ -544,7 +530,7 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) { void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { - if (Subtarget->isTargetEnvMacho()) { + if (Subtarget->isTargetMacho()) { // All darwin targets use mach-o. MachineModuleInfoMachO &MMIMacho = MMI->getObjFileInfo(); @@ -645,7 +631,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { OutStreamer.EmitSymbolAttribute(S, MCSA_Global); } - if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) { + if (Subtarget->isTargetCOFF()) { X86COFFMachineModuleInfo &COFFMMI = MMI->getObjFileInfo(); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86AsmPrinter.h b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86AsmPrinter.h index 24a768b933bd..cf47dc48bc8b 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86AsmPrinter.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86AsmPrinter.h @@ -27,19 +27,9 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { const X86Subtarget *Subtarget; StackMaps SM; - // Parses operands of PATCHPOINT and STACKMAP to produce stack map Location - // structures. Returns a result location and an iterator to the operand - // immediately following the operands consumed. - // - // This method is implemented in X86MCInstLower.cpp. - static std::pair - stackmapOperandParser(MachineInstr::const_mop_iterator MOI, - MachineInstr::const_mop_iterator MOE, - const TargetMachine &TM); - public: explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), SM(*this, stackmapOperandParser) { + : AsmPrinter(TM, Streamer), SM(*this) { Subtarget = &TM.getSubtarget(); } @@ -55,14 +45,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { virtual void EmitInstruction(const MachineInstr *MI) LLVM_OVERRIDE; - void printSymbolOperand(const MachineOperand &MO, raw_ostream &O); - - // These methods are used by the tablegen'erated instruction printer. - void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O, - const char *Modifier = 0, unsigned AsmVariant = 0); - void printPCRelImm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); - - bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O); virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS) LLVM_OVERRIDE; @@ -70,15 +52,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS) LLVM_OVERRIDE; - void printMemReference(const MachineInstr *MI, unsigned Op, raw_ostream &O, - const char *Modifier=NULL); - void printLeaMemReference(const MachineInstr *MI, unsigned Op, raw_ostream &O, - const char *Modifier=NULL); - - void printIntelMemReference(const MachineInstr *MI, unsigned Op, - raw_ostream &O, const char *Modifier=NULL, - unsigned AsmVariant = 1); - virtual bool runOnMachineFunction(MachineFunction &F) LLVM_OVERRIDE; }; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86CallingConv.td b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86CallingConv.td index a78b5c0a7967..6185c1cd3873 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86CallingConv.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86CallingConv.td @@ -357,9 +357,16 @@ def CC_X86_64_WebKit_JS : CallingConv<[ // Promote i8/i16 arguments to i32. CCIfType<[i8, i16], CCPromoteToType>, - // Integer/FP values are always stored in stack slots that are 8 bytes in size - // and 8-byte aligned. - CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>> + // Only the first integer argument is passed in register. + CCIfType<[i32], CCAssignToReg<[EAX]>>, + CCIfType<[i64], CCAssignToReg<[RAX]>>, + + // The remaining integer arguments are passed on the stack. 32bit integer and + // floating-point arguments are aligned to 4 byte and stored in 4 byte slots. + // 64bit integer and floating-point arguments are aligned to 8 byte and stored + // in 8 byte stack slots. + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + CCIfType<[i64, f64], CCAssignToStack<8, 8>> ]>; // No explicit register is specified for the AnyReg calling convention. The @@ -453,18 +460,34 @@ def CC_X86_32_FastCall : CallingConv<[ CCDelegateTo ]>; -def CC_X86_32_ThisCall : CallingConv<[ +def CC_X86_32_ThisCall_Common : CallingConv<[ + // The first integer argument is passed in ECX + CCIfType<[i32], CCAssignToReg<[ECX]>>, + + // Otherwise, same as everything else. + CCDelegateTo +]>; + +def CC_X86_32_ThisCall_Mingw : CallingConv<[ + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType>, + + CCDelegateTo +]>; + +def CC_X86_32_ThisCall_Win : CallingConv<[ // Promote i8/i16 arguments to i32. CCIfType<[i8, i16], CCPromoteToType>, // Pass sret arguments indirectly through stack. CCIfSRet>, - // The first integer argument is passed in ECX - CCIfType<[i32], CCAssignToReg<[ECX]>>, + CCDelegateTo +]>; - // Otherwise, same as everything else. - CCDelegateTo +def CC_X86_32_ThisCall : CallingConv<[ + CCIfSubtarget<"isTargetCygMing()", CCDelegateTo>, + CCDelegateTo ]>; def CC_X86_32_FastCC : CallingConv<[ @@ -601,6 +624,11 @@ def CSR_MostRegs_64 : CalleeSavedRegs<(add RBX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15, RBP, (sequence "XMM%u", 0, 15))>; +def CSR_AllRegs_64 : CalleeSavedRegs<(add CSR_MostRegs_64, RAX, RSP, + (sequence "XMM%u", 16, 31), + (sequence "YMM%u", 0, 31), + (sequence "ZMM%u", 0, 31))>; + // Standard C + YMM6-15 def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86CodeEmitter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86CodeEmitter.cpp index 14385edb1474..0d3989d85102 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86CodeEmitter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86CodeEmitter.cpp @@ -186,10 +186,6 @@ static unsigned determineREX(const MachineInstr &MI) { } switch (Desc.TSFlags & X86II::FormMask) { - case X86II::MRMInitReg: - if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) - REX |= (1 << 0) | (1 << 2); - break; case X86II::MRMSrcReg: { if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) REX |= 1 << 2; @@ -941,18 +937,6 @@ void Emitter::emitVEXOpcodePrefix(uint64_t TSFlags, } switch (TSFlags & X86II::FormMask) { - case X86II::MRMInitReg: - // Duplicate register. - if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) - VEX_R = 0x0; - - if (HasVEX_4V) - VEX_4V = getVEXRegisterEncoding(MI, CurOp); - if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) - VEX_B = 0x0; - if (HasVEX_4VOp3) - VEX_4V = getVEXRegisterEncoding(MI, CurOp); - break; case X86II::MRMDestMem: { // MRMDestMem instructions forms: // MemAddr, src1(ModR/M) @@ -1432,41 +1416,46 @@ void Emitter::emitInstruction(MachineInstr &MI, break; } - case X86II::MRMInitReg: + case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3: + case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9: + case X86II::MRM_CA: case X86II::MRM_CB: case X86II::MRM_D0: + case X86II::MRM_D1: case X86II::MRM_D4: case X86II::MRM_D5: + case X86II::MRM_D6: case X86II::MRM_D8: case X86II::MRM_D9: + case X86II::MRM_DA: case X86II::MRM_DB: case X86II::MRM_DC: + case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF: + case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8: MCE.emitByte(BaseOpcode); - // Duplicate register, used by things like MOV8r0 (aka xor reg,reg). - emitRegModRMByte(MI.getOperand(CurOp).getReg(), - getX86RegNum(MI.getOperand(CurOp).getReg())); - ++CurOp; - break; - case X86II::MRM_C1: - MCE.emitByte(BaseOpcode); - MCE.emitByte(0xC1); - break; - case X86II::MRM_C8: - MCE.emitByte(BaseOpcode); - MCE.emitByte(0xC8); - break; - case X86II::MRM_C9: - MCE.emitByte(BaseOpcode); - MCE.emitByte(0xC9); - break; - case X86II::MRM_CA: - MCE.emitByte(BaseOpcode); - MCE.emitByte(0xCA); - break; - case X86II::MRM_CB: - MCE.emitByte(BaseOpcode); - MCE.emitByte(0xCB); - break; - case X86II::MRM_E8: - MCE.emitByte(BaseOpcode); - MCE.emitByte(0xE8); - break; - case X86II::MRM_F0: - MCE.emitByte(BaseOpcode); - MCE.emitByte(0xF0); + unsigned char MRM; + switch (TSFlags & X86II::FormMask) { + default: llvm_unreachable("Invalid Form"); + case X86II::MRM_C1: MRM = 0xC1; break; + case X86II::MRM_C2: MRM = 0xC2; break; + case X86II::MRM_C3: MRM = 0xC3; break; + case X86II::MRM_C4: MRM = 0xC4; break; + case X86II::MRM_C8: MRM = 0xC8; break; + case X86II::MRM_C9: MRM = 0xC9; break; + case X86II::MRM_CA: MRM = 0xCA; break; + case X86II::MRM_CB: MRM = 0xCB; break; + case X86II::MRM_D0: MRM = 0xD0; break; + case X86II::MRM_D1: MRM = 0xD1; break; + case X86II::MRM_D4: MRM = 0xD4; break; + case X86II::MRM_D5: MRM = 0xD5; break; + case X86II::MRM_D6: MRM = 0xD6; break; + case X86II::MRM_D8: MRM = 0xD8; break; + case X86II::MRM_D9: MRM = 0xD9; break; + case X86II::MRM_DA: MRM = 0xDA; break; + case X86II::MRM_DB: MRM = 0xDB; break; + case X86II::MRM_DC: MRM = 0xDC; break; + case X86II::MRM_DD: MRM = 0xDD; break; + case X86II::MRM_DE: MRM = 0xDE; break; + case X86II::MRM_DF: MRM = 0xDF; break; + case X86II::MRM_E8: MRM = 0xE8; break; + case X86II::MRM_F0: MRM = 0xF0; break; + case X86II::MRM_F8: MRM = 0xF8; break; + case X86II::MRM_F9: MRM = 0xF9; break; + } + MCE.emitByte(MRM); break; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86FastISel.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86FastISel.cpp index 97f96ab72c24..7be2a14a44f2 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86FastISel.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86FastISel.cpp @@ -1508,8 +1508,13 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) { unsigned Op2Reg = getRegForValue(I->getOperand(2)); if (Op2Reg == 0) return false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr)) - .addReg(Op0Reg).addReg(Op0Reg); + // Selects operate on i1, however, Op0Reg is 8 bits width and may contain + // garbage. Indeed, only the less significant bit is supposed to be accurate. + // If we read more than the lsb, we may see non-zero values whereas lsb + // is zero. Therefore, we have to truncate Op0Reg to i1 for the select. + // This is acheived by performing TEST against 1. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8ri)) + .addReg(Op0Reg).addImm(1); unsigned ResultReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addReg(Op1Reg).addReg(Op2Reg); @@ -1696,6 +1701,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { const Value *Op1 = I.getArgOperand(0); // The guard's value. const AllocaInst *Slot = cast(I.getArgOperand(1)); + MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]); + // Grab the frame index. X86AddressMode AM; if (!X86SelectAddress(Slot, AM)) return false; @@ -1863,7 +1870,7 @@ static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget, const ImmutableCallSite &CS) { if (Subtarget.is64Bit()) return 0; - if (Subtarget.isTargetWindows()) + if (Subtarget.getTargetTriple().isOSMSVCRT()) return 0; CallingConv::ID CC = CS.getCallingConv(); if (CC == CallingConv::Fast || CC == CallingConv::GHC) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86FrameLowering.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86FrameLowering.cpp index a06ba9d750a9..0c5209cf3eed 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -50,7 +50,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const { return (MF.getTarget().Options.DisableFramePointerElim(MF) || RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects() || - MFI->isFrameAddressTaken() || MF.hasMSInlineAsm() || + MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() || MF.getInfo()->getForceFramePointer() || MMI.callsUnwindInit() || MMI.callsEHReturn()); } @@ -606,16 +606,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // responsible for adjusting the stack pointer. Touching the stack at 4K // increments is necessary to ensure that the guard pages used by the OS // virtual memory manager are allocated in correct sequence. - if (NumBytes >= 4096 && STI.isOSWindows() && !STI.isTargetEnvMacho()) { + if (NumBytes >= 4096 && STI.isOSWindows() && !STI.isTargetMacho()) { const char *StackProbeSymbol; - bool isSPUpdateNeeded = false; if (Is64Bit) { - if (STI.isTargetCygMing()) - StackProbeSymbol = "___chkstk"; - else { + if (STI.isTargetCygMing()) { + StackProbeSymbol = "___chkstk_ms"; + } else { StackProbeSymbol = "__chkstk"; - isSPUpdateNeeded = true; } } else if (STI.isTargetCygMing()) StackProbeSymbol = "_alloca"; @@ -657,15 +655,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit) .setMIFlag(MachineInstr::FrameSetup); - // MSVC x64's __chkstk does not adjust %rsp itself. - // It also does not clobber %rax so we can reuse it when adjusting %rsp. - if (isSPUpdateNeeded) { + if (Is64Bit) { + // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp + // themself. It also does not clobber %rax so we can reuse it when + // adjusting %rsp. BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), StackPtr) .addReg(StackPtr) .addReg(X86::RAX) .setMIFlag(MachineInstr::FrameSetup); } - if (isEAXAlive) { // Restore EAX MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 36d16907bfef..bc751d37db69 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -344,7 +344,7 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { // addl %gs:0, %eax // if the block also has an access to a second TLS address this will save // a load. - // FIXME: This is probably also true for non TLS addresses. + // FIXME: This is probably also true for non-TLS addresses. if (Op1.getOpcode() == X86ISD::Wrapper) { SDValue Val = Op1.getOperand(0); if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86ISelLowering.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86ISelLowering.cpp index 12d40c488ada..5e19f2802fec 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -180,7 +180,7 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { const X86Subtarget *Subtarget = &TM.getSubtarget(); bool is64Bit = Subtarget->is64Bit(); - if (Subtarget->isTargetEnvMacho()) { + if (Subtarget->isTargetMacho()) { if (is64Bit) return new X86_64MachoTargetObjectFile(); return new TargetLoweringObjectFileMachO(); @@ -190,7 +190,7 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { return new X86LinuxTargetObjectFile(); if (Subtarget->isTargetELF()) return new TargetLoweringObjectFileELF(); - if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) + if (Subtarget->isTargetCOFF()) return new TargetLoweringObjectFileCOFF(); llvm_unreachable("unknown subtarget type"); } @@ -632,7 +632,7 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - if (Subtarget->isOSWindows() && !Subtarget->isTargetEnvMacho()) + if (Subtarget->isOSWindows() && !Subtarget->isTargetMacho()) setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? MVT::i64 : MVT::i32, Custom); else if (TM.Options.EnableSegmentedStacks) @@ -1306,9 +1306,15 @@ void X86TargetLowering::resetOperationActions() { addRegisterClass(MVT::v8i64, &X86::VR512RegClass); addRegisterClass(MVT::v8f64, &X86::VR512RegClass); + addRegisterClass(MVT::i1, &X86::VK1RegClass); addRegisterClass(MVT::v8i1, &X86::VK8RegClass); addRegisterClass(MVT::v16i1, &X86::VK16RegClass); + setOperationAction(ISD::BR_CC, MVT::i1, Expand); + setOperationAction(ISD::SETCC, MVT::i1, Custom); + setOperationAction(ISD::XOR, MVT::i1, Legal); + setOperationAction(ISD::OR, MVT::i1, Legal); + setOperationAction(ISD::AND, MVT::i1, Legal); setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, Legal); setOperationAction(ISD::LOAD, MVT::v16f32, Legal); setOperationAction(ISD::LOAD, MVT::v8f64, Legal); @@ -1352,7 +1358,7 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal); setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal); - setOperationAction(ISD::TRUNCATE, MVT::i1, Legal); + setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom); @@ -1370,12 +1376,15 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Legal); setOperationAction(ISD::SETCC, MVT::v16i1, Custom); setOperationAction(ISD::SETCC, MVT::v8i1, Custom); setOperationAction(ISD::MUL, MVT::v8i64, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom); setOperationAction(ISD::SELECT, MVT::v8f64, Custom); @@ -1548,14 +1557,13 @@ void X86TargetLowering::resetOperationActions() { EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { if (!VT.isVector()) - return MVT::i8; + return Subtarget->hasAVX512() ? MVT::i1: MVT::i8; - const TargetMachine &TM = getTargetMachine(); - if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512()) + if (Subtarget->hasAVX512()) switch(VT.getVectorNumElements()) { case 8: return MVT::v8i1; case 16: return MVT::v16i1; - } + } return VT.changeVectorElementTypeToInteger(); } @@ -2175,7 +2183,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, MachineFrameInfo *MFI = MF.getFrameInfo(); bool Is64Bit = Subtarget->is64Bit(); - bool IsWindows = Subtarget->isTargetWindows(); bool IsWin64 = Subtarget->isCallingConvWin64(CallConv); assert(!(isVarArg && IsTailCallConvention(CallConv)) && @@ -2222,6 +2229,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, RC = &X86::VR128RegClass; else if (RegVT == MVT::x86mmx) RC = &X86::VR64RegClass; + else if (RegVT == MVT::i1) + RC = &X86::VK1RegClass; else if (RegVT == MVT::v8i1) RC = &X86::VK8RegClass; else if (RegVT == MVT::v16i1) @@ -2420,7 +2429,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, } else { FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. // If this is an sret function, the return should pop the hidden pointer. - if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows && + if (!Is64Bit && !IsTailCallConvention(CallConv) && + !Subtarget->getTargetTriple().isOSMSVCRT() && argsAreStructReturn(Ins) == StackStructReturn) FuncInfo->setBytesToPopOnReturn(4); } @@ -2509,7 +2519,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, MachineFunction &MF = DAG.getMachineFunction(); bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isCallingConvWin64(CallConv); - bool IsWindows = Subtarget->isTargetWindows(); StructReturnType SR = callIsStructReturn(Outs); bool IsSibcall = false; @@ -2903,7 +2912,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, getTargetMachine().Options.GuaranteedTailCallOpt)) NumBytesForCalleeToPush = NumBytes; // Callee pops everything - else if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows && + else if (!Is64Bit && !IsTailCallConvention(CallConv) && + !Subtarget->getTargetTriple().isOSMSVCRT() && SR == StackStructReturn) // If this is a call to a struct-return function, the callee // pops the hidden struct pointer, so we have to push it back. @@ -3092,9 +3102,13 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (isCalleeStructRet || isCallerStructRet) return false; - // An stdcall caller is expected to clean up its arguments; the callee - // isn't going to do that. - if (!CCMatch && CallerCC == CallingConv::X86_StdCall) + // An stdcall/thiscall caller is expected to clean up its arguments; the + // callee isn't going to do that. + // FIXME: this is more restrictive than needed. We could produce a tailcall + // when the stack adjustment matches. For example, with a thiscall that takes + // only one argument. + if (!CCMatch && (CallerCC == CallingConv::X86_StdCall || + CallerCC == CallingConv::X86_ThisCall)) return false; // Do not sibcall optimize vararg calls unless all arguments are passed via @@ -3415,6 +3429,24 @@ bool X86::isCalleePop(CallingConv::ID CallingConv, } } +/// \brief Return true if the condition is an unsigned comparison operation. +static bool isX86CCUnsigned(unsigned X86CC) { + switch (X86CC) { + default: llvm_unreachable("Invalid integer condition!"); + case X86::COND_E: return true; + case X86::COND_G: return false; + case X86::COND_GE: return false; + case X86::COND_L: return false; + case X86::COND_LE: return false; + case X86::COND_NE: return true; + case X86::COND_B: return true; + case X86::COND_A: return true; + case X86::COND_BE: return true; + case X86::COND_AE: return true; + } + llvm_unreachable("covered switch fell through?!"); +} + /// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86 /// specific condition code, returning the condition code and the LHS/RHS of the /// comparison to make. @@ -4208,7 +4240,7 @@ static bool isVPERMILPMask(ArrayRef Mask, MVT VT) { unsigned NumLanes = VT.getSizeInBits()/128; unsigned LaneSize = NumElts/NumLanes; // 2 or 4 elements in one lane - + SmallVector ExpectedMaskVal(LaneSize, -1); for (unsigned l = 0; l != NumElts; l += LaneSize) { for (unsigned i = 0; i != LaneSize; ++i) { @@ -5395,7 +5427,8 @@ LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) { /// rather than undef via VZEXT_LOAD, but we do not detect that case today. /// There's even a handy isZeroNode for that purpose. static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl &Elts, - SDLoc &DL, SelectionDAG &DAG) { + SDLoc &DL, SelectionDAG &DAG, + bool isAfterLegalize) { EVT EltVT = VT.getVectorElementType(); unsigned NumElems = Elts.size(); @@ -5431,7 +5464,13 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl &Elts, // load of the entire vector width starting at the base pointer. If we found // consecutive loads for the low half, generate a vzext_load node. if (LastLoadedElt == NumElems - 1) { + + if (isAfterLegalize && + !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, VT)) + return SDValue(); + SDValue NewLd = SDValue(); + if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16) NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), LDBase->getPointerInfo(), @@ -6075,7 +6114,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { V[i] = Op.getOperand(i); // Check for elements which are consecutive loads. - SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG); + SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false); if (LD.getNode()) return LD; @@ -6150,14 +6189,27 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { if(ResVT.is256BitVector()) return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl); + if (Op.getNumOperands() == 4) { + MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(), + ResVT.getVectorNumElements()/2); + SDValue V3 = Op.getOperand(2); + SDValue V4 = Op.getOperand(3); + return Concat256BitVectors(Concat128BitVectors(V1, V2, HalfVT, NumElems/2, DAG, dl), + Concat128BitVectors(V3, V4, HalfVT, NumElems/2, DAG, dl), ResVT, NumElems, DAG, dl); + } return Concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl); } static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { - assert(Op.getNumOperands() == 2); + MVT LLVM_ATTRIBUTE_UNUSED VT = Op.getSimpleValueType(); + assert((VT.is256BitVector() && Op.getNumOperands() == 2) || + (VT.is512BitVector() && (Op.getNumOperands() == 2 || + Op.getNumOperands() == 4))); - // AVX/AVX-512 can use the vinsertf128 instruction to create 256-bit vectors + // AVX can use the vinsertf128 instruction to create 256-bit vectors // from two other 128-bit ones. + + // 512-bit vector may contain 2 256-bit vectors or 4 128-bit vectors return LowerAVXCONCAT_VECTORS(Op, DAG); } @@ -7634,6 +7686,39 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { return SDValue(); } +/// Extract one bit from mask vector, like v16i1 or v8i1. +/// AVX-512 feature. +static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) { + SDValue Vec = Op.getOperand(0); + SDLoc dl(Vec); + MVT VecVT = Vec.getSimpleValueType(); + SDValue Idx = Op.getOperand(1); + MVT EltVT = Op.getSimpleValueType(); + + assert((EltVT == MVT::i1) && "Unexpected operands in ExtractBitFromMaskVector"); + + // variable index can't be handled in mask registers, + // extend vector to VR512 + if (!isa(Idx)) { + MVT ExtVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32); + SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Vec); + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + ExtVT.getVectorElementType(), Ext, Idx); + return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); + } + + unsigned IdxVal = cast(Idx)->getZExtValue(); + if (IdxVal) { + unsigned MaxSift = VecVT.getSizeInBits() - 1; + Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec, + DAG.getConstant(MaxSift - IdxVal, MVT::i8)); + Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec, + DAG.getConstant(MaxSift, MVT::i8)); + } + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i1, Vec, + DAG.getIntPtrConstant(0)); +} + SDValue X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { @@ -7641,6 +7726,10 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SDValue Vec = Op.getOperand(0); MVT VecVT = Vec.getSimpleValueType(); SDValue Idx = Op.getOperand(1); + + if (Op.getSimpleValueType() == MVT::i1) + return ExtractBitFromMaskVector(Op, DAG); + if (!isa(Idx)) { if (VecVT.is512BitVector() || (VecVT.is256BitVector() && Subtarget->hasInt256() && @@ -7650,7 +7739,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, MVT::getIntegerVT(VecVT.getVectorElementType().getSizeInBits()); MVT MaskVT = MVT::getVectorVT(MaskEltVT, VecVT.getSizeInBits() / MaskEltVT.getSizeInBits()); - + Idx = DAG.getZExtOrTrunc(Idx, dl, MaskEltVT); SDValue Mask = DAG.getNode(X86ISD::VINSERT, dl, MaskVT, getZeroVector(MaskVT, Subtarget, DAG, dl), @@ -8415,15 +8504,20 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { /// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values /// and take a 2 x i32 value to shift plus a shift amount. -SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{ +static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); unsigned VTBits = VT.getSizeInBits(); SDLoc dl(Op); bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); + // X86ISD::SHLD and X86ISD::SHRD have defined overflow behavior but the + // generic ISD nodes haven't. Insert an AND to be safe, it's optimized away + // during isel. + SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, + DAG.getConstant(VTBits - 1, MVT::i8)); SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi, DAG.getConstant(VTBits - 1, MVT::i8)) : DAG.getConstant(0, VT); @@ -8431,12 +8525,15 @@ SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{ SDValue Tmp2, Tmp3; if (Op.getOpcode() == ISD::SHL_PARTS) { Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt); - Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt); } else { Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt); - Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, ShAmt); + Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt); } + // If the shift amount is larger or equal than the width of a part we can't + // rely on the results of shld/shrd. Insert a test and select the appropriate + // values for large shift amounts. SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, DAG.getConstant(VTBits, MVT::i8)); SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32, @@ -8461,12 +8558,12 @@ SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { - EVT SrcVT = Op.getOperand(0).getValueType(); + MVT SrcVT = Op.getOperand(0).getSimpleValueType(); if (SrcVT.isVector()) return SDValue(); - assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 && + assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 && "Unknown SINT_TO_FP to lower!"); // These are really Legal; return the operand so the caller accepts it as @@ -8670,15 +8767,14 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const { SDValue N0 = Op.getOperand(0); - EVT SVT = N0.getValueType(); + MVT SVT = N0.getSimpleValueType(); SDLoc dl(Op); assert((SVT == MVT::v4i8 || SVT == MVT::v4i16 || SVT == MVT::v8i8 || SVT == MVT::v8i16) && "Custom UINT_TO_FP is not supported!"); - EVT NVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, - SVT.getVectorNumElements()); + MVT NVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements()); return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0)); } @@ -8697,8 +8793,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0); - EVT SrcVT = N0.getValueType(); - EVT DstVT = Op.getValueType(); + MVT SrcVT = N0.getSimpleValueType(); + MVT DstVT = Op.getSimpleValueType(); if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64) return LowerUINT_TO_FP_i64(Op, DAG); if (SrcVT == MVT::i32 && X86ScalarSSEf64) @@ -8912,9 +9008,9 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, static SDValue LowerZERO_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op->getValueType(0).getSimpleVT(); + MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); - MVT InVT = In.getValueType().getSimpleVT(); + MVT InVT = In.getSimpleValueType(); SDLoc DL(Op); unsigned int NumElts = VT.getVectorNumElements(); if (NumElts != 8 && NumElts != 16) @@ -8975,9 +9071,21 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget, SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - MVT VT = Op.getSimpleValueType(); + MVT VT = Op.getSimpleValueType(); SDValue In = Op.getOperand(0); MVT InVT = In.getSimpleValueType(); + + if (VT == MVT::i1) { + assert((InVT.isInteger() && (InVT.getSizeInBits() <= 64)) && + "Invalid scalar TRUNCATE operation"); + if (InVT == MVT::i32) + return SDValue(); + if (InVT.getSizeInBits() == 64) + In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::i32, In); + else if (InVT.getSizeInBits() < 32) + In = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, In); + return DAG.getNode(ISD::TRUNCATE, DL, VT, In); + } assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && "Invalid TRUNCATE operation"); @@ -9100,8 +9208,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->hasFp256() && "256-bit vector without AVX!"); unsigned NumElems = VT.getVectorNumElements(); - EVT NVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), - NumElems * 2); + MVT NVT = MVT::getVectorVT(VT.getVectorElementType(), NumElems * 2); SmallVector MaskVec(NumElems * 2, -1); // Prepare truncation shuffle mask @@ -9171,7 +9278,7 @@ static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) { In, DAG.getUNDEF(SVT))); } -SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) { LLVMContext *Context = DAG.getContext(); SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); @@ -9189,7 +9296,8 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle, APInt(32, ~(1U << 31)))); C = ConstantVector::getSplat(NumElts, C); - SDValue CPIdx = DAG.getConstantPool(C, getPointerTy()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy()); unsigned Alignment = cast(CPIdx)->getAlignment(); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), @@ -9205,7 +9313,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask); } -SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) { LLVMContext *Context = DAG.getContext(); SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); @@ -9223,7 +9331,8 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle, APInt(32, 1U << 31))); C = ConstantVector::getSplat(NumElts, C); - SDValue CPIdx = DAG.getConstantPool(C, getPointerTy()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy()); unsigned Alignment = cast(CPIdx)->getAlignment(); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), @@ -9240,7 +9349,8 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask); } -SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); LLVMContext *Context = DAG.getContext(); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -9276,7 +9386,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0)))); } Constant *C = ConstantVector::get(CV); - SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); + SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(), 16); SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), false, false, false, 16); @@ -9309,7 +9419,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { CV.push_back(ConstantFP::get(*Context, APFloat(Sem, APInt(32, 0)))); } C = ConstantVector::get(CV); - CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); + CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(), 16); SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), false, false, false, 16); @@ -9449,11 +9559,14 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, // See if we can use the EFLAGS value from the operand instead of // doing a separate TEST. TEST always sets OF and CF to 0, so unless // we prove that the arithmetic won't overflow, we can't use OF or CF. - if (Op.getResNo() != 0 || NeedOF || NeedCF) + if (Op.getResNo() != 0 || NeedOF || NeedCF) { // Emit a CMP with 0, which is the TEST pattern. + if (Op.getValueType() == MVT::i1) + return DAG.getNode(X86ISD::CMP, dl, MVT::i1, Op, + DAG.getConstant(0, MVT::i1)); return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, DAG.getConstant(0, Op.getValueType())); - + } unsigned Opcode = 0; unsigned NumOperands = 0; @@ -9638,13 +9751,32 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, /// equivalent. SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, SelectionDAG &DAG) const { - if (ConstantSDNode *C = dyn_cast(Op1)) + SDLoc dl(Op0); + if (ConstantSDNode *C = dyn_cast(Op1)) { if (C->getAPIntValue() == 0) return EmitTest(Op0, X86CC, DAG); - SDLoc dl(Op0); + if (Op0.getValueType() == MVT::i1) { + Op0 = DAG.getNode(ISD::XOR, dl, MVT::i1, Op0, + DAG.getConstant(-1, MVT::i1)); + return DAG.getNode(X86ISD::CMP, dl, MVT::i1, Op0, + DAG.getConstant(0, MVT::i1)); + } + } + if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 || Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) { + // Do the comparison at i32 if it's smaller. This avoids subregister + // aliasing issues. Keep the smaller reference if we're optimizing for + // size, however, as that'll allow better folding of memory operations. + if (Op0.getValueType() != MVT::i32 && Op0.getValueType() != MVT::i64 && + !DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::MinSize)) { + unsigned ExtendOp = + isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; + Op0 = DAG.getNode(ExtendOp, dl, MVT::i32, Op0); + Op1 = DAG.getNode(ExtendOp, dl, MVT::i32, Op1); + } // Use SUB instead of CMP to enable CSE between SUB and CMP. SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32); SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, @@ -9934,7 +10066,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, // operations may be required for some comparisons. unsigned Opc; bool Swap = false, Invert = false, FlipSigns = false, MinMax = false; - + switch (SetCCOpcode) { default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETNE: Invert = true; @@ -9951,23 +10083,23 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, case ISD::SETULE: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT; FlipSigns = true; Invert = true; break; } - + // Special case: Use min/max operations for SETULE/SETUGE MVT VET = VT.getVectorElementType(); bool hasMinMax = (Subtarget->hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) || (Subtarget->hasSSE2() && (VET == MVT::i8)); - + if (hasMinMax) { switch (SetCCOpcode) { default: break; case ISD::SETULE: Opc = X86ISD::UMIN; MinMax = true; break; case ISD::SETUGE: Opc = X86ISD::UMAX; MinMax = true; break; } - + if (MinMax) { Swap = false; Invert = false; FlipSigns = false; } } - + if (Swap) std::swap(Op0, Op1); @@ -10054,7 +10186,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, // If the logical-not of the result is required, perform that now. if (Invert) Result = DAG.getNOT(dl, Result, VT); - + if (MinMax) Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Op0, Result); @@ -10067,7 +10199,8 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG); - assert(VT == MVT::i8 && "SetCC type must be 8-bit integer"); + assert(((!Subtarget->hasAVX512() && VT == MVT::i8) || (VT == MVT::i1)) + && "SetCC type must be 8-bit or 1-bit integer"); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDLoc dl(Op); @@ -10102,7 +10235,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (!Invert) return Op0; CCode = X86::GetOppositeBranchCondition(CCode); - return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + return DAG.getNode(X86ISD::SETCC, dl, VT, DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1)); } } @@ -10114,8 +10247,8 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG); EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG); - return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(X86CC, MVT::i8), EFLAGS); + return DAG.getNode(X86ISD::SETCC, dl, VT, + DAG.getConstant(X86CC, MVT::i8), EFLAGS); } // isX86LogicalCmp - Return true if opcode is a X86 logical comparison. @@ -10180,8 +10313,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { cast(Cond.getOperand(2))->get(), CondOp0, CondOp1); if (SSECC != 8) { - unsigned Opcode = VT == MVT::f32 ? X86ISD::FSETCCss : X86ISD::FSETCCsd; - SDValue Cmp = DAG.getNode(Opcode, DL, VT, CondOp0, CondOp1, + if (Subtarget->hasAVX512()) { + SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CondOp0, CondOp1, + DAG.getConstant(SSECC, MVT::i8)); + return DAG.getNode(X86ISD::SELECT, DL, VT, Cmp, Op1, Op2); + } + SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1, DAG.getConstant(SSECC, MVT::i8)); SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2); SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1); @@ -10753,7 +10890,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, getTargetMachine().Options.EnableSegmentedStacks) && "This should be used only on Windows targets or when segmented stacks " "are being used"); - assert(!Subtarget->isTargetEnvMacho() && "Not implemented"); + assert(!Subtarget->isTargetMacho() && "Not implemented"); SDLoc dl(Op); // Get the inputs. @@ -10964,14 +11101,15 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget, // getTargetVShiftByConstNode - Handle vector element shifts where the shift // amount is a constant. Takes immediate version of shift as input. -static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, EVT VT, +static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT, SDValue SrcOp, uint64_t ShiftAmt, SelectionDAG &DAG) { + MVT ElementType = VT.getVectorElementType(); // Check for ShiftAmt >= element width - if (ShiftAmt >= VT.getVectorElementType().getSizeInBits()) { + if (ShiftAmt >= ElementType.getSizeInBits()) { if (Opc == X86ISD::VSRAI) - ShiftAmt = VT.getVectorElementType().getSizeInBits() - 1; + ShiftAmt = ElementType.getSizeInBits() - 1; else return DAG.getConstant(0, VT); } @@ -10979,12 +11117,62 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, EVT VT, assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI) && "Unknown target vector shift-by-constant node"); + // Fold this packed vector shift into a build vector if SrcOp is a + // vector of ConstantSDNodes or UNDEFs. + if (ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) { + SmallVector Elts; + unsigned NumElts = SrcOp->getNumOperands(); + ConstantSDNode *ND; + + switch(Opc) { + default: llvm_unreachable(0); + case X86ISD::VSHLI: + for (unsigned i=0; i!=NumElts; ++i) { + SDValue CurrentOp = SrcOp->getOperand(i); + if (CurrentOp->getOpcode() == ISD::UNDEF) { + Elts.push_back(CurrentOp); + continue; + } + ND = cast(CurrentOp); + const APInt &C = ND->getAPIntValue(); + Elts.push_back(DAG.getConstant(C.shl(ShiftAmt), ElementType)); + } + break; + case X86ISD::VSRLI: + for (unsigned i=0; i!=NumElts; ++i) { + SDValue CurrentOp = SrcOp->getOperand(i); + if (CurrentOp->getOpcode() == ISD::UNDEF) { + Elts.push_back(CurrentOp); + continue; + } + ND = cast(CurrentOp); + const APInt &C = ND->getAPIntValue(); + Elts.push_back(DAG.getConstant(C.lshr(ShiftAmt), ElementType)); + } + break; + case X86ISD::VSRAI: + for (unsigned i=0; i!=NumElts; ++i) { + SDValue CurrentOp = SrcOp->getOperand(i); + if (CurrentOp->getOpcode() == ISD::UNDEF) { + Elts.push_back(CurrentOp); + continue; + } + ND = cast(CurrentOp); + const APInt &C = ND->getAPIntValue(); + Elts.push_back(DAG.getConstant(C.ashr(ShiftAmt), ElementType)); + } + break; + } + + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Elts[0], NumElts); + } + return DAG.getNode(Opc, dl, VT, SrcOp, DAG.getConstant(ShiftAmt, MVT::i8)); } // getTargetVShiftNode - Handle vector element shifts where the shift amount // may or may not be a constant. Takes immediate version of shift as input. -static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, EVT VT, +static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, SDValue SrcOp, SDValue ShAmt, SelectionDAG &DAG) { assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32"); @@ -11012,7 +11200,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, EVT VT, // The return type has to be a 128-bit type with the same element // type as the input type. - MVT EltVT = VT.getVectorElementType().getSimpleVT(); + MVT EltVT = VT.getVectorElementType(); EVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits()); ShAmt = DAG.getNode(ISD::BITCAST, dl, ShVT, ShAmt); @@ -11218,7 +11406,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { case Intrinsic::x86_sse41_pminsd: case Intrinsic::x86_avx2_pmins_b: case Intrinsic::x86_avx2_pmins_w: - case Intrinsic::x86_avx2_pmins_d: + case Intrinsic::x86_avx2_pmins_d: case Intrinsic::x86_avx512_pmins_d: case Intrinsic::x86_avx512_pmins_q: { unsigned Opcode; @@ -11444,14 +11632,14 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } - case Intrinsic::x86_avx512_kortestz: - case Intrinsic::x86_avx512_kortestc: { - unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz)? X86::COND_E: X86::COND_B; + case Intrinsic::x86_avx512_kortestz_w: + case Intrinsic::x86_avx512_kortestc_w: { + unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz_w)? X86::COND_E: X86::COND_B; SDValue LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(1)); SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2)); SDValue CC = DAG.getConstant(X86CC, MVT::i8); SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS); - SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test); + SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i1, CC, Test); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } @@ -11545,7 +11733,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { Opcode = X86ISD::VSRAI; break; } - return getTargetVShiftNode(Opcode, dl, Op.getValueType(), + return getTargetVShiftNode(Opcode, dl, Op.getSimpleValueType(), Op.getOperand(1), Op.getOperand(2), DAG); } @@ -11660,7 +11848,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { case Intrinsic::x86_fma_vfmaddsub_ps_512: case Intrinsic::x86_fma_vfmaddsub_pd_512: case Intrinsic::x86_fma_vfmsubadd_ps_512: - case Intrinsic::x86_fma_vfmsubadd_pd_512: { + case Intrinsic::x86_fma_vfmsubadd_pd_512: { unsigned Opc; switch (IntNo) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. @@ -11728,9 +11916,9 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, ConstantSDNode *C = dyn_cast(ScaleOp); assert(C && "Invalid scale type"); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8); - SDValue Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl); - EVT MaskVT = MVT::getVectorVT(MVT::i1, - Index.getValueType().getVectorNumElements()); + SDValue Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl); + EVT MaskVT = MVT::getVectorVT(MVT::i1, + Index.getSimpleValueType().getVectorNumElements()); SDValue MaskInReg = DAG.getConstant(~0, MaskVT); SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); SDValue Disp = DAG.getTargetConstant(0, MVT::i32); @@ -11750,13 +11938,13 @@ static SDValue getMGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, assert(C && "Invalid scale type"); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8); EVT MaskVT = MVT::getVectorVT(MVT::i1, - Index.getValueType().getVectorNumElements()); + Index.getSimpleValueType().getVectorNumElements()); SDValue MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask); SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); SDValue Disp = DAG.getTargetConstant(0, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); if (Src.getOpcode() == ISD::UNDEF) - Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl); + Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl); SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) }; @@ -11773,7 +11961,7 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Disp = DAG.getTargetConstant(0, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); EVT MaskVT = MVT::getVectorVT(MVT::i1, - Index.getValueType().getVectorNumElements()); + Index.getSimpleValueType().getVectorNumElements()); SDValue MaskInReg = DAG.getConstant(~0, MaskVT); SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other); SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain}; @@ -11791,7 +11979,7 @@ static SDValue getMScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Disp = DAG.getTargetConstant(0, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); EVT MaskVT = MVT::getVectorVT(MVT::i1, - Index.getValueType().getVectorNumElements()); + Index.getSimpleValueType().getVectorNumElements()); SDValue MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask); SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other); SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain}; @@ -11846,15 +12034,15 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, case Intrinsic::x86_avx512_gather_dpi_512: { unsigned Opc; switch (IntNo) { - default: llvm_unreachable("Unexpected intrinsic!"); - case Intrinsic::x86_avx512_gather_qps_512: Opc = X86::VGATHERQPSZrm; break; - case Intrinsic::x86_avx512_gather_qpd_512: Opc = X86::VGATHERQPDZrm; break; - case Intrinsic::x86_avx512_gather_dpd_512: Opc = X86::VGATHERDPDZrm; break; - case Intrinsic::x86_avx512_gather_dps_512: Opc = X86::VGATHERDPSZrm; break; - case Intrinsic::x86_avx512_gather_qpi_512: Opc = X86::VPGATHERQDZrm; break; - case Intrinsic::x86_avx512_gather_qpq_512: Opc = X86::VPGATHERQQZrm; break; - case Intrinsic::x86_avx512_gather_dpi_512: Opc = X86::VPGATHERDDZrm; break; - case Intrinsic::x86_avx512_gather_dpq_512: Opc = X86::VPGATHERDQZrm; break; + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_avx512_gather_qps_512: Opc = X86::VGATHERQPSZrm; break; + case Intrinsic::x86_avx512_gather_qpd_512: Opc = X86::VGATHERQPDZrm; break; + case Intrinsic::x86_avx512_gather_dpd_512: Opc = X86::VGATHERDPDZrm; break; + case Intrinsic::x86_avx512_gather_dps_512: Opc = X86::VGATHERDPSZrm; break; + case Intrinsic::x86_avx512_gather_qpi_512: Opc = X86::VPGATHERQDZrm; break; + case Intrinsic::x86_avx512_gather_qpq_512: Opc = X86::VPGATHERQQZrm; break; + case Intrinsic::x86_avx512_gather_dpi_512: Opc = X86::VPGATHERDDZrm; break; + case Intrinsic::x86_avx512_gather_dpq_512: Opc = X86::VPGATHERDQZrm; break; } SDValue Chain = Op.getOperand(0); SDValue Index = Op.getOperand(2); @@ -11873,23 +12061,23 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, case Intrinsic::x86_avx512_gather_dpq_mask_512: { unsigned Opc; switch (IntNo) { - default: llvm_unreachable("Unexpected intrinsic!"); - case Intrinsic::x86_avx512_gather_qps_mask_512: - Opc = X86::VGATHERQPSZrm; break; - case Intrinsic::x86_avx512_gather_qpd_mask_512: - Opc = X86::VGATHERQPDZrm; break; - case Intrinsic::x86_avx512_gather_dpd_mask_512: - Opc = X86::VGATHERDPDZrm; break; - case Intrinsic::x86_avx512_gather_dps_mask_512: - Opc = X86::VGATHERDPSZrm; break; - case Intrinsic::x86_avx512_gather_qpi_mask_512: - Opc = X86::VPGATHERQDZrm; break; - case Intrinsic::x86_avx512_gather_qpq_mask_512: - Opc = X86::VPGATHERQQZrm; break; - case Intrinsic::x86_avx512_gather_dpi_mask_512: - Opc = X86::VPGATHERDDZrm; break; - case Intrinsic::x86_avx512_gather_dpq_mask_512: - Opc = X86::VPGATHERDQZrm; break; + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_avx512_gather_qps_mask_512: + Opc = X86::VGATHERQPSZrm; break; + case Intrinsic::x86_avx512_gather_qpd_mask_512: + Opc = X86::VGATHERQPDZrm; break; + case Intrinsic::x86_avx512_gather_dpd_mask_512: + Opc = X86::VGATHERDPDZrm; break; + case Intrinsic::x86_avx512_gather_dps_mask_512: + Opc = X86::VGATHERDPSZrm; break; + case Intrinsic::x86_avx512_gather_qpi_mask_512: + Opc = X86::VPGATHERQDZrm; break; + case Intrinsic::x86_avx512_gather_qpq_mask_512: + Opc = X86::VPGATHERQQZrm; break; + case Intrinsic::x86_avx512_gather_dpi_mask_512: + Opc = X86::VPGATHERDDZrm; break; + case Intrinsic::x86_avx512_gather_dpq_mask_512: + Opc = X86::VPGATHERDQZrm; break; } SDValue Chain = Op.getOperand(0); SDValue Src = Op.getOperand(2); @@ -11911,23 +12099,23 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, case Intrinsic::x86_avx512_scatter_dpi_512: { unsigned Opc; switch (IntNo) { - default: llvm_unreachable("Unexpected intrinsic!"); - case Intrinsic::x86_avx512_scatter_qpd_512: - Opc = X86::VSCATTERQPDZmr; break; - case Intrinsic::x86_avx512_scatter_qps_512: - Opc = X86::VSCATTERQPSZmr; break; - case Intrinsic::x86_avx512_scatter_dpd_512: - Opc = X86::VSCATTERDPDZmr; break; - case Intrinsic::x86_avx512_scatter_dps_512: - Opc = X86::VSCATTERDPSZmr; break; - case Intrinsic::x86_avx512_scatter_qpi_512: - Opc = X86::VPSCATTERQDZmr; break; - case Intrinsic::x86_avx512_scatter_qpq_512: - Opc = X86::VPSCATTERQQZmr; break; - case Intrinsic::x86_avx512_scatter_dpq_512: - Opc = X86::VPSCATTERDQZmr; break; - case Intrinsic::x86_avx512_scatter_dpi_512: - Opc = X86::VPSCATTERDDZmr; break; + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_avx512_scatter_qpd_512: + Opc = X86::VSCATTERQPDZmr; break; + case Intrinsic::x86_avx512_scatter_qps_512: + Opc = X86::VSCATTERQPSZmr; break; + case Intrinsic::x86_avx512_scatter_dpd_512: + Opc = X86::VSCATTERDPDZmr; break; + case Intrinsic::x86_avx512_scatter_dps_512: + Opc = X86::VSCATTERDPSZmr; break; + case Intrinsic::x86_avx512_scatter_qpi_512: + Opc = X86::VPSCATTERQDZmr; break; + case Intrinsic::x86_avx512_scatter_qpq_512: + Opc = X86::VPSCATTERQQZmr; break; + case Intrinsic::x86_avx512_scatter_dpq_512: + Opc = X86::VPSCATTERDQZmr; break; + case Intrinsic::x86_avx512_scatter_dpi_512: + Opc = X86::VPSCATTERDDZmr; break; } SDValue Chain = Op.getOperand(0); SDValue Base = Op.getOperand(2); @@ -11947,23 +12135,23 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, case Intrinsic::x86_avx512_scatter_dpq_mask_512: { unsigned Opc; switch (IntNo) { - default: llvm_unreachable("Unexpected intrinsic!"); - case Intrinsic::x86_avx512_scatter_qpd_mask_512: - Opc = X86::VSCATTERQPDZmr; break; - case Intrinsic::x86_avx512_scatter_qps_mask_512: - Opc = X86::VSCATTERQPSZmr; break; - case Intrinsic::x86_avx512_scatter_dpd_mask_512: - Opc = X86::VSCATTERDPDZmr; break; - case Intrinsic::x86_avx512_scatter_dps_mask_512: - Opc = X86::VSCATTERDPSZmr; break; - case Intrinsic::x86_avx512_scatter_qpi_mask_512: - Opc = X86::VPSCATTERQDZmr; break; - case Intrinsic::x86_avx512_scatter_qpq_mask_512: - Opc = X86::VPSCATTERQQZmr; break; - case Intrinsic::x86_avx512_scatter_dpq_mask_512: - Opc = X86::VPSCATTERDQZmr; break; - case Intrinsic::x86_avx512_scatter_dpi_mask_512: - Opc = X86::VPSCATTERDDZmr; break; + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_avx512_scatter_qpd_mask_512: + Opc = X86::VSCATTERQPDZmr; break; + case Intrinsic::x86_avx512_scatter_qps_mask_512: + Opc = X86::VSCATTERQPSZmr; break; + case Intrinsic::x86_avx512_scatter_dpd_mask_512: + Opc = X86::VSCATTERDPDZmr; break; + case Intrinsic::x86_avx512_scatter_dps_mask_512: + Opc = X86::VSCATTERDPSZmr; break; + case Intrinsic::x86_avx512_scatter_qpi_mask_512: + Opc = X86::VPSCATTERQDZmr; break; + case Intrinsic::x86_avx512_scatter_qpq_mask_512: + Opc = X86::VPSCATTERQQZmr; break; + case Intrinsic::x86_avx512_scatter_dpq_mask_512: + Opc = X86::VPSCATTERDQZmr; break; + case Intrinsic::x86_avx512_scatter_dpi_mask_512: + Opc = X86::VPSCATTERDDZmr; break; } SDValue Chain = Op.getOperand(0); SDValue Base = Op.getOperand(2); @@ -12262,7 +12450,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, const TargetMachine &TM = MF.getTarget(); const TargetFrameLowering &TFI = *TM.getFrameLowering(); unsigned StackAlignment = TFI.getStackAlignment(); - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); SDLoc DL(Op); // Save FP Control Word to stack slot @@ -12307,7 +12495,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, } static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); EVT OpVT = VT; unsigned NumBits = VT.getSizeInBits(); SDLoc dl(Op); @@ -12341,7 +12529,7 @@ static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); EVT OpVT = VT; unsigned NumBits = VT.getSizeInBits(); SDLoc dl(Op); @@ -12366,7 +12554,7 @@ static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); unsigned NumBits = VT.getSizeInBits(); SDLoc dl(Op); Op = Op.getOperand(0); @@ -12388,7 +12576,7 @@ static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) { // Lower256IntArith - Break a 256-bit integer operation into two new 128-bit // ones, and then concatenate the result back. static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); assert(VT.is256BitVector() && VT.isInteger() && "Unsupported value type for operation"); @@ -12406,8 +12594,8 @@ static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) { SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, dl); SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl); - MVT EltVT = VT.getVectorElementType().getSimpleVT(); - EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); + MVT EltVT = VT.getVectorElementType(); + MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1), @@ -12415,15 +12603,15 @@ static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerADD(SDValue Op, SelectionDAG &DAG) { - assert(Op.getValueType().is256BitVector() && - Op.getValueType().isInteger() && + assert(Op.getSimpleValueType().is256BitVector() && + Op.getSimpleValueType().isInteger() && "Only handle AVX 256-bit vector integer operation"); return Lower256IntArith(Op, DAG); } static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) { - assert(Op.getValueType().is256BitVector() && - Op.getValueType().isInteger() && + assert(Op.getSimpleValueType().is256BitVector() && + Op.getSimpleValueType().isInteger() && "Only handle AVX 256-bit vector integer operation"); return Lower256IntArith(Op, DAG); } @@ -12431,7 +12619,7 @@ static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) { static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDLoc dl(Op); - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); // Decompose 256-bit ops into smaller 128-bit ops. if (VT.is256BitVector() && !Subtarget->hasInt256()) @@ -12501,8 +12689,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, } static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); - EVT EltTy = VT.getVectorElementType(); + MVT VT = Op.getSimpleValueType(); + MVT EltTy = VT.getVectorElementType(); unsigned NumElts = VT.getVectorNumElements(); SDValue N0 = Op.getOperand(0); SDLoc dl(Op); @@ -12557,7 +12745,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget) { - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); @@ -12589,7 +12777,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, // Make a large shift. SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, MVT::v8i16, R, ShiftAmt, - DAG); + DAG); SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL); // Zero out the rightmost bits. SmallVector V(16, @@ -12684,7 +12872,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, Amt.getOpcode() == ISD::BITCAST && Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { Amt = Amt.getOperand(0); - unsigned Ratio = Amt.getValueType().getVectorNumElements() / + unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() / VT.getVectorNumElements(); unsigned RatioInLog2 = Log2_32_Ceil(Ratio); uint64_t ShiftAmt = 0; @@ -12729,7 +12917,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, const X86Subtarget* Subtarget) { - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); @@ -12799,7 +12987,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, default: llvm_unreachable("Unknown shift opcode!"); case ISD::SHL: - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { default: return SDValue(); case MVT::v2i64: case MVT::v4i32: @@ -12812,7 +13000,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG); } case ISD::SRA: - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { default: return SDValue(); case MVT::v4i32: case MVT::v8i16: @@ -12823,7 +13011,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG); } case ISD::SRL: - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { default: return SDValue(); case MVT::v2i64: case MVT::v4i32: @@ -12846,7 +13034,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, Amt.getOpcode() == ISD::BITCAST && Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { Amt = Amt.getOperand(0); - unsigned Ratio = Amt.getValueType().getVectorNumElements() / + unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() / VT.getVectorNumElements(); std::vector Vals(Ratio); for (unsigned i = 0; i != Ratio; ++i) @@ -12874,7 +13062,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); @@ -12962,7 +13150,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, // Decompose 256-bit shifts into smaller 128-bit shifts. if (VT.is256BitVector()) { unsigned NumElems = VT.getVectorNumElements(); - MVT EltVT = VT.getVectorElementType().getSimpleVT(); + MVT EltVT = VT.getVectorElementType(); EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); // Extract the two vectors @@ -13080,7 +13268,7 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); EVT ExtraVT = cast(Op.getOperand(1))->getVT(); - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); if (!Subtarget->hasSSE2() || !VT.isVector()) return SDValue(); @@ -13088,7 +13276,7 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, unsigned BitsDiff = VT.getScalarType().getSizeInBits() - ExtraVT.getScalarType().getSizeInBits(); - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { default: return SDValue(); case MVT::v8i32: case MVT::v16i16: @@ -13103,7 +13291,7 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl); SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl); - MVT EltVT = VT.getVectorElementType().getSimpleVT(); + MVT EltVT = VT.getVectorElementType(); EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); EVT ExtraEltVT = ExtraVT.getVectorElementType(); @@ -13190,11 +13378,11 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget, static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - EVT T = Op.getValueType(); + MVT T = Op.getSimpleValueType(); SDLoc DL(Op); unsigned Reg = 0; unsigned size = 0; - switch(T.getSimpleVT().SimpleTy) { + switch(T.SimpleTy) { default: llvm_unreachable("Invalid value type!"); case MVT::i8: Reg = X86::AL; size = 1; break; case MVT::i16: Reg = X86::AX; size = 2; break; @@ -13302,7 +13490,7 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getNode()->getValueType(0); + EVT VT = Op.getNode()->getSimpleValueType(0); // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) @@ -13720,8 +13908,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::CMPMU: return "X86ISD::CMPMU"; case X86ISD::SETCC: return "X86ISD::SETCC"; case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY"; - case X86ISD::FSETCCsd: return "X86ISD::FSETCCsd"; - case X86ISD::FSETCCss: return "X86ISD::FSETCCss"; + case X86ISD::FSETCC: return "X86ISD::FSETCC"; case X86ISD::CMOV: return "X86ISD::CMOV"; case X86ISD::BRCOND: return "X86ISD::BRCOND"; case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; @@ -13816,7 +14003,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::TESTP: return "X86ISD::TESTP"; case X86ISD::TESTM: return "X86ISD::TESTM"; case X86ISD::KORTEST: return "X86ISD::KORTEST"; - case X86ISD::KTEST: return "X86ISD::KTEST"; case X86ISD::PALIGNR: return "X86ISD::PALIGNR"; case X86ISD::PSHUFD: return "X86ISD::PSHUFD"; case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW"; @@ -15211,9 +15397,15 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( MBB->addSuccessor(EndMBB); } + // Make sure the last operand is EFLAGS, which gets clobbered by the branch + // that was just emitted, but clearly shouldn't be "saved". + assert((MI->getNumOperands() <= 3 || + !MI->getOperand(MI->getNumOperands() - 1).isReg() || + MI->getOperand(MI->getNumOperands() - 1).getReg() == X86::EFLAGS) + && "Expected last argument to be EFLAGS"); unsigned MOVOpc = Subtarget->hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr; // In the XMM save block, save all the XMM argument registers. - for (int i = 3, e = MI->getNumOperands(); i != e; ++i) { + for (int i = 3, e = MI->getNumOperands() - 1; i != e; ++i) { int64_t Offset = (i - 3) * 16 + VarArgsFPOffset; MachineMemOperand *MMO = F->getMachineMemOperand( @@ -15466,7 +15658,7 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); - assert(!Subtarget->isTargetEnvMacho()); + assert(!Subtarget->isTargetMacho()); // The lowering is pretty easy: we're just emitting the call to _alloca. The // non-trivial part is impdef of ESP. @@ -16000,6 +16192,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::EH_SjLj_LongJmp32: case X86::EH_SjLj_LongJmp64: return emitEHSjLjLongJmp(MI, BB); + + case TargetOpcode::STACKMAP: + case TargetOpcode::PATCHPOINT: + return emitPatchPoint(MI, BB); } } @@ -16255,7 +16451,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) Elts.push_back(getShuffleScalarElt(N, i, DAG, 0)); - return EltsFromConsecutiveLoads(VT, Elts, dl, DAG); + return EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true); } /// PerformTruncateCombine - Converts truncate operation to @@ -16362,44 +16558,6 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, EltNo); } -/// Extract one bit from mask vector, like v16i1 or v8i1. -/// AVX-512 feature. -static SDValue ExtractBitFromMaskVector(SDNode *N, SelectionDAG &DAG) { - SDValue Vec = N->getOperand(0); - SDLoc dl(Vec); - MVT VecVT = Vec.getSimpleValueType(); - SDValue Idx = N->getOperand(1); - MVT EltVT = N->getSimpleValueType(0); - - assert((VecVT.getVectorElementType() == MVT::i1 && EltVT == MVT::i8) || - "Unexpected operands in ExtractBitFromMaskVector"); - - // variable index - if (!isa(Idx)) { - MVT ExtVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32); - SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Vec); - SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - ExtVT.getVectorElementType(), Ext); - return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); - } - - unsigned IdxVal = cast(Idx)->getZExtValue(); - - MVT ScalarVT = MVT::getIntegerVT(VecVT.getSizeInBits()); - unsigned MaxShift = VecVT.getSizeInBits() - 1; - Vec = DAG.getNode(ISD::BITCAST, dl, ScalarVT, Vec); - Vec = DAG.getNode(ISD::SHL, dl, ScalarVT, Vec, - DAG.getConstant(MaxShift - IdxVal, ScalarVT)); - Vec = DAG.getNode(ISD::SRL, dl, ScalarVT, Vec, - DAG.getConstant(MaxShift, ScalarVT)); - - if (VecVT == MVT::v16i1) { - Vec = DAG.getNode(ISD::BITCAST, dl, MVT::i16, Vec); - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Vec); - } - return DAG.getNode(ISD::BITCAST, dl, MVT::i8, Vec); -} - /// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index /// generation and convert it from being a bunch of shuffles and extracts /// to a simple store and scalar loads to extract the elements. @@ -16411,10 +16569,6 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, SDValue InputVector = N->getOperand(0); - if (InputVector.getValueType().getVectorElementType() == MVT::i1 && - !DCI.isBeforeLegalize()) - return ExtractBitFromMaskVector(N, DAG); - // Detect whether we are trying to convert from mmx to i32 and the bitcast // from mmx to v2i32 has a single usage. if (InputVector.getNode()->getOpcode() == llvm::ISD::BITCAST && @@ -16960,12 +17114,13 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Simplify vector selection if the selector will be produced by CMPP*/PCMP*. if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC && // Check if SETCC has already been promoted - TLI.getSetCCResultType(*DAG.getContext(), VT) == Cond.getValueType()) { + TLI.getSetCCResultType(*DAG.getContext(), VT) == CondVT && + // Check that condition value type matches vselect operand type + CondVT == VT) { assert(Cond.getValueType().isVector() && "vector select expects a vector selector!"); - EVT IntVT = Cond.getValueType(); bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode()); bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); @@ -16980,7 +17135,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, ISD::CondCode NewCC = ISD::getSetCCInverse(cast(CC)->get(), Cond.getOperand(0).getValueType().isInteger()); - Cond = DAG.getSetCC(DL, IntVT, Cond.getOperand(0), Cond.getOperand(1), NewCC); + Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1), NewCC); std::swap(LHS, RHS); TValIsAllOnes = FValIsAllOnes; FValIsAllZeros = TValIsAllZeros; @@ -16993,11 +17148,11 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, if (TValIsAllOnes && FValIsAllZeros) Ret = Cond; else if (TValIsAllOnes) - Ret = DAG.getNode(ISD::OR, DL, IntVT, Cond, - DAG.getNode(ISD::BITCAST, DL, IntVT, RHS)); + Ret = DAG.getNode(ISD::OR, DL, CondVT, Cond, + DAG.getNode(ISD::BITCAST, DL, CondVT, RHS)); else if (FValIsAllZeros) - Ret = DAG.getNode(ISD::AND, DL, IntVT, Cond, - DAG.getNode(ISD::BITCAST, DL, IntVT, LHS)); + Ret = DAG.getNode(ISD::AND, DL, CondVT, Cond, + DAG.getNode(ISD::BITCAST, DL, CondVT, LHS)); return DAG.getNode(ISD::BITCAST, DL, VT, Ret); } @@ -17451,7 +17606,7 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { } /// \brief Returns a vector of 0s if the node in input is a vector logical -/// shift by a constant amount which is known to be bigger than or equal +/// shift by a constant amount which is known to be bigger than or equal /// to the vector element size in bits. static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { @@ -17471,7 +17626,7 @@ static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG, unsigned MaxAmount = VT.getVectorElementType().getSizeInBits(); // SSE2/AVX2 logical shifts always return a vector of 0s - // if the shift amount is bigger than or equal to + // if the shift amount is bigger than or equal to // the element size. The constant shift amount will be // encoded as a 8-bit immediate. if (ShiftAmt.trunc(8).uge(MaxAmount)) @@ -17557,17 +17712,22 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) || (cc0 == X86::COND_NE && cc1 == X86::COND_P)) { bool is64BitFP = (CMP00.getValueType() == MVT::f64); - X86ISD::NodeType NTOperator = is64BitFP ? - X86ISD::FSETCCsd : X86ISD::FSETCCss; // FIXME: need symbolic constants for these magic numbers. // See X86ATTInstPrinter.cpp:printSSECC(). unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4; - SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, MVT::f32, CMP00, CMP01, + if (Subtarget->hasAVX512()) { + // SETCC type in AVX-512 is MVT::i1 + assert(N->getValueType(0) == MVT::i1 && "Unexpected AND node type"); + return DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CMP00, CMP01, + DAG.getConstant(x86cc, MVT::i8)); + } + SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, CMP01, DAG.getConstant(x86cc, MVT::i8)); - SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, MVT::i32, + MVT IntVT = (is64BitFP ? MVT::i64 : MVT::i32); + SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT, OnesOrZeroesF); - SDValue ANDed = DAG.getNode(ISD::AND, DL, MVT::i32, OnesOrZeroesI, - DAG.getConstant(1, MVT::i32)); + SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI, + DAG.getConstant(1, IntVT)); SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed); return OneBitOfTruth; } @@ -17902,6 +18062,18 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) + MachineFunction &MF = DAG.getMachineFunction(); + bool OptForSize = MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + + // SHLD/SHRD instructions have lower register pressure, but on some + // platforms they have higher latency than the equivalent + // series of shifts/or that would otherwise be generated. + // Don't fold (or (x << c) | (y >> (64 - c))) if SHLD/SHRD instructions + // have higher latencies and we are not optimizing for size. + if (!OptForSize && Subtarget->isSHLDSlow()) + return SDValue(); + if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) std::swap(N0, N1); if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) @@ -18830,6 +19002,17 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, } } + if (N0.getOpcode() == ISD::TRUNCATE && + N0.hasOneUse() && + N0.getOperand(0).hasOneUse()) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == X86ISD::SETCC_CARRY) { + return DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, + N00.getOperand(0), N00.getOperand(1)), + DAG.getConstant(1, VT)); + } + } if (VT.is256BitVector()) { SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget); if (R.getNode()) @@ -18868,11 +19051,17 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) { // Helper function of PerformSETCCCombine. It is to materialize "setb reg" // as "sbb reg,reg", since it can be extended without zext and produces // an all-ones bit which is more useful than 0/1 in some cases. -static SDValue MaterializeSETB(SDLoc DL, SDValue EFLAGS, SelectionDAG &DAG) { - return DAG.getNode(ISD::AND, DL, MVT::i8, +static SDValue MaterializeSETB(SDLoc DL, SDValue EFLAGS, SelectionDAG &DAG, + MVT VT) { + if (VT == MVT::i8) + return DAG.getNode(ISD::AND, DL, VT, + DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, + DAG.getConstant(X86::COND_B, MVT::i8), EFLAGS), + DAG.getConstant(1, VT)); + assert (VT == MVT::i1 && "Unexpected type for SECCC node"); + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, - DAG.getConstant(X86::COND_B, MVT::i8), EFLAGS), - DAG.getConstant(1, MVT::i8)); + DAG.getConstant(X86::COND_B, MVT::i8), EFLAGS)); } // Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT @@ -18897,7 +19086,7 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG, EFLAGS.getNode()->getVTList(), EFLAGS.getOperand(1), EFLAGS.getOperand(0)); SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); - return MaterializeSETB(DL, NewEFLAGS, DAG); + return MaterializeSETB(DL, NewEFLAGS, DAG, N->getSimpleValueType(0)); } } @@ -18905,7 +19094,7 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG, // a zext and produces an all-ones bit which is more useful than 0/1 in some // cases. if (CC == X86::COND_B) - return MaterializeSETB(DL, EFLAGS, DAG); + return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0)); SDValue Flags; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86ISelLowering.h b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86ISelLowering.h index bc3dd608da52..e14c78f5add0 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86ISelLowering.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86ISelLowering.h @@ -94,6 +94,9 @@ namespace llvm { /// operand, usually produced by a CMP instruction. SETCC, + /// X86 Select + SELECT, + // Same as SETCC except it's materialized with a sbb and the value is all // one's or all zero's. SETCC_CARRY, // R = carry_bit ? ~0 : 0 @@ -101,7 +104,7 @@ namespace llvm { /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. /// Operands are two FP values to compare; result is a mask of /// 0s or 1s. Generally DTRT for C/C++ with NaNs. - FSETCCss, FSETCCsd, + FSETCC, /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values, /// result in an integer GPR. Needs masking for scalar result. @@ -314,7 +317,6 @@ namespace llvm { // OR/AND test for masks KORTEST, - KTEST, // Several flavors of instructions with vector shuffle behaviors. PALIGNR, @@ -865,7 +867,6 @@ namespace llvm { SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; @@ -874,9 +875,6 @@ namespace llvm { SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerToBT(SDValue And, ISD::CondCode CC, SDLoc dl, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrAVX512.td b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrAVX512.td index cb19fbd5638e..b0c1424f7a6e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrAVX512.td @@ -207,12 +207,12 @@ def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1), // vinsertps - insert f32 to XMM def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3), - "vinsertps{z}\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR128X:$dst, (X86insrtps VR128X:$src1, VR128X:$src2, imm:$src3))]>, EVEX_4V; def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), (ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3), - "vinsertps{z}\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR128X:$dst, (X86insrtps VR128X:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>; @@ -352,13 +352,13 @@ def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)), // vextractps - extract 32 bits from XMM def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src1, u32u8imm:$src2), - "vextractps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, EVEX; def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs), (ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2), - "vextractps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), addr:$dst)]>, EVEX; @@ -375,13 +375,13 @@ multiclass avx512_fp_broadcast opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),[]>, EVEX; } let ExeDomain = SSEPackedSingle in { - defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss{z}", VR512, + defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512, VR128X, f32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; } let ExeDomain = SSEPackedDouble in { - defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd{z}", VR512, + defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512, VR128X, f64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; } @@ -420,6 +420,8 @@ def : Pat <(v8i64 (X86vzext VK8WM:$mask)), def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))), (VPBROADCASTDrZrr GR32:$src)>; +def : Pat<(v16i32 (X86VBroadcastm VK16WM:$mask, (i32 GR32:$src))), + (VPBROADCASTDrZkrr VK16WM:$mask, GR32:$src)>; def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))), (VPBROADCASTQrZrr GR64:$src)>; def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))), @@ -616,6 +618,7 @@ multiclass avx512_blendmask opc, string OpcodeStr, Intrinsic Int, "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), [(set RC:$dst, (OpNode KRC:$mask, (vt RC:$src2), (vt RC:$src1)))]>, EVEX_4V, EVEX_K; + let isCodeGenOnly = 1 in def rr_Int : AVX5128I opc, string OpcodeStr, Intrinsic Int, def rm : AVX5128I, EVEX_4V, EVEX_K; + let isCodeGenOnly = 1 in def rm_Int : AVX5128I, EVEX_4V, EVEX_K; @@ -643,25 +647,25 @@ multiclass avx512_blendmask opc, string OpcodeStr, Intrinsic Int, let ExeDomain = SSEPackedSingle in defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps", - int_x86_avx512_mskblend_ps_512, + int_x86_avx512_mask_blend_ps_512, VK16WM, VR512, f512mem, memopv16f32, vselect, v16f32>, EVEX_CD8<32, CD8VF>, EVEX_V512; let ExeDomain = SSEPackedDouble in defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd", - int_x86_avx512_mskblend_pd_512, + int_x86_avx512_mask_blend_pd_512, VK8WM, VR512, f512mem, memopv8f64, vselect, v8f64>, VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512; defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd", - int_x86_avx512_mskblend_d_512, + int_x86_avx512_mask_blend_d_512, VK16WM, VR512, f512mem, memopv16i32, vselect, v16i32>, EVEX_CD8<32, CD8VF>, EVEX_V512; defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq", - int_x86_avx512_mskblend_q_512, + int_x86_avx512_mask_blend_q_512, VK8WM, VR512, f512mem, memopv8i64, vselect, v8i64>, VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512; @@ -681,6 +685,42 @@ def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1), (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; } +//===----------------------------------------------------------------------===// +// Compare Instructions +//===----------------------------------------------------------------------===// + +// avx512_cmp_scalar - AVX512 CMPSS and CMPSD +multiclass avx512_cmp_scalar { + def rr : AVX512Ii8<0xC2, MRMSrcReg, + (outs VK1:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, + [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))], + IIC_SSE_ALU_F32S_RR>, EVEX_4V; + def rm : AVX512Ii8<0xC2, MRMSrcMem, + (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, + [(set VK1:$dst, (OpNode (VT RC:$src1), + (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V; + let neverHasSideEffects = 1 in { + def rri_alt : AVX512Ii8<0xC2, MRMSrcReg, + (outs VK1:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), + asm_alt, [], IIC_SSE_ALU_F32S_RR>, EVEX_4V; + def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem, + (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), + asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V; + } +} + +let Predicates = [HasAVX512] in { +defm VCMPSSZ : avx512_cmp_scalar, + XS; +defm VCMPSDZ : avx512_cmp_scalar, + XD, VEX_W; +} multiclass avx512_icmp_packed opc, string OpcodeStr, RegisterClass KRC, RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, @@ -732,10 +772,10 @@ multiclass avx512_icmp_cc opc, RegisterClass KRC, // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { def rri_alt : AVX512AIi8, EVEX_4V; def rmi_alt : AVX512AIi8, EVEX_4V; } } @@ -764,36 +804,42 @@ defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8, VR512, i512mem, memopv8i64, // avx512_cmp_packed - sse 1 & 2 compare packed instructions multiclass avx512_cmp_packed { + X86MemOperand x86memop, ValueType vt, + string suffix, Domain d> { def rri : AVX512PIi8<0xC2, MRMSrcReg, - (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, - [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>; + (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc), + !strconcat("vcmp${cc}", suffix, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set KRC:$dst, (X86cmpm (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>; + def rrib: AVX512PIi8<0xC2, MRMSrcReg, + (outs KRC:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc, i32imm:$sae), + !strconcat("vcmp${cc}", suffix, + "\t{{sae}, $src2, $src1, $dst|$dst, $src1, $src2, {sae}}"), + [], d>, EVEX_B; def rmi : AVX512PIi8<0xC2, MRMSrcMem, - (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, + (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc), + !strconcat("vcmp", suffix, + "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [(set KRC:$dst, - (OpNode (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>; + (X86cmpm (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { def rri_alt : AVX512PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), - asm_alt, [], d>; + !strconcat("vcmp", suffix, + "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>; def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), - asm_alt, [], d>; + !strconcat("vcmp", suffix, + "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>; } } -defm VCMPPSZ : avx512_cmp_packed, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VCMPPDZ : avx512_cmp_packed, OpSize, EVEX_4V, VEX_W, EVEX_V512, +defm VCMPPSZ : avx512_cmp_packed, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VCMPPDZ : avx512_cmp_packed, OpSize, EVEX_4V, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)), @@ -811,7 +857,31 @@ def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), imm:$cc), VK8)>; - + +def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1), + (v16f32 VR512:$src2), imm:$cc, (i16 -1), + FROUND_NO_EXC)), + (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2, + (I8Imm imm:$cc), (i32 0)), GR16)>; + +def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1), + (v8f64 VR512:$src2), imm:$cc, (i8 -1), + FROUND_NO_EXC)), + (COPY_TO_REGCLASS (VCMPPDZrrib VR512:$src1, VR512:$src2, + (I8Imm imm:$cc), (i32 0)), GR8)>; + +def : Pat<(i16 (int_x86_avx512_mask_cmp_ps_512 (v16f32 VR512:$src1), + (v16f32 VR512:$src2), imm:$cc, (i16 -1), + FROUND_CURRENT)), + (COPY_TO_REGCLASS (VCMPPSZrri VR512:$src1, VR512:$src2, + (I8Imm imm:$cc)), GR16)>; + +def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1), + (v8f64 VR512:$src2), imm:$cc, (i8 -1), + FROUND_CURRENT)), + (COPY_TO_REGCLASS (VCMPPDZrri VR512:$src1, VR512:$src2, + (I8Imm imm:$cc)), GR8)>; + // Mask register copy, including // - copy between mask registers // - load/store mask registers @@ -862,8 +932,26 @@ let Predicates = [HasAVX512] in { def : Pat<(store (v16i1 VK16:$src), addr:$dst), (KMOVWmk addr:$dst, VK16:$src)>; - def : Pat<(store (v8i1 VK8:$src), addr:$dst), - (KMOVWmk addr:$dst, (v16i1 (COPY_TO_REGCLASS VK8:$src, VK16)))>; + def : Pat<(store VK8:$src, addr:$dst), + (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>; + + def : Pat<(i1 (load addr:$src)), + (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>; + + def : Pat<(v8i1 (load addr:$src)), + (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>; + + def : Pat<(i1 (trunc (i32 GR32:$src))), + (COPY_TO_REGCLASS (KMOVWkr $src), VK1)>; + + def : Pat<(i1 (trunc (i8 GR8:$src))), + (COPY_TO_REGCLASS + (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)), VK1)>; + + def : Pat<(i32 (zext VK1:$src)), (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16))>; + def : Pat<(i8 (zext VK1:$src)), + (EXTRACT_SUBREG + (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_8bit)>; } // With AVX-512 only, 8-bit mask is promoted to 16-bit mask. let Predicates = [HasAVX512] in { @@ -876,6 +964,12 @@ let Predicates = [HasAVX512] in { (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)), sub_8bit)>; + + def : Pat<(i1 (extractelt VK16:$src, (iPTR 0))), + (COPY_TO_REGCLASS VK16:$src, VK1)>; + def : Pat<(i1 (extractelt VK8:$src, (iPTR 0))), + (COPY_TO_REGCLASS VK8:$src, VK1)>; + } // Mask unary operation @@ -896,6 +990,15 @@ multiclass avx512_mask_unop_w opc, string OpcodeStr, defm KNOT : avx512_mask_unop_w<0x44, "knot", not>; +multiclass avx512_mask_unop_int { + let Predicates = [HasAVX512] in + def : Pat<(!cast("int_x86_avx512_"##IntName##"_w") + (i16 GR16:$src)), + (COPY_TO_REGCLASS (!cast(InstName##"Wrr") + (v16i1 (COPY_TO_REGCLASS GR16:$src, VK16))), GR16)>; +} +defm : avx512_mask_unop_int<"knot", "KNOT">; + def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>; def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>; @@ -906,7 +1009,7 @@ def : Pat<(not VK8:$src), (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; // Mask binary operation -// - KADD, KAND, KANDN, KOR, KXNOR, KXOR +// - KAND, KANDN, KOR, KXNOR, KXOR multiclass avx512_mask_binop opc, string OpcodeStr, RegisterClass KRC, SDPatternOperator OpNode> { let Predicates = [HasAVX512] in @@ -926,7 +1029,6 @@ def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>; def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>; let isCommutable = 1 in { - defm KADD : avx512_mask_binop_w<0x4a, "kadd", add>; defm KAND : avx512_mask_binop_w<0x41, "kand", and>; let isCommutable = 0 in defm KANDN : avx512_mask_binop_w<0x42, "kandn", andn>; @@ -935,19 +1037,38 @@ let isCommutable = 1 in { defm KXOR : avx512_mask_binop_w<0x47, "kxor", xor>; } +def : Pat<(xor VK1:$src1, VK1:$src2), + (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src1, VK16), + (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; + +def : Pat<(or VK1:$src1, VK1:$src2), + (COPY_TO_REGCLASS (KORWrr (COPY_TO_REGCLASS VK1:$src1, VK16), + (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; + +def : Pat<(not VK1:$src), + (COPY_TO_REGCLASS (KXORWrr (COPY_TO_REGCLASS VK1:$src, VK16), + (COPY_TO_REGCLASS (VCMPSSZrr (f32 (IMPLICIT_DEF)), + (f32 (IMPLICIT_DEF)), (i8 0)), VK16)), VK1)>; + +def : Pat<(and VK1:$src1, VK1:$src2), + (COPY_TO_REGCLASS (KANDWrr (COPY_TO_REGCLASS VK1:$src1, VK16), + (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; + multiclass avx512_mask_binop_int { let Predicates = [HasAVX512] in - def : Pat<(!cast("int_x86_"##IntName##"_v16i1") - VK16:$src1, VK16:$src2), - (!cast(InstName##"Wrr") VK16:$src1, VK16:$src2)>; + def : Pat<(!cast("int_x86_avx512_"##IntName##"_w") + (i16 GR16:$src1), (i16 GR16:$src2)), + (COPY_TO_REGCLASS (!cast(InstName##"Wrr") + (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)), + (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>; } -defm : avx512_mask_binop_int<"kadd", "KADD">; defm : avx512_mask_binop_int<"kand", "KAND">; defm : avx512_mask_binop_int<"kandn", "KANDN">; defm : avx512_mask_binop_int<"kor", "KOR">; defm : avx512_mask_binop_int<"kxnor", "KXNOR">; defm : avx512_mask_binop_int<"kxor", "KXOR">; + // With AVX-512, 8-bit mask is promoted to 16-bit mask. multiclass avx512_binop_pat { let Predicates = [HasAVX512] in @@ -965,28 +1086,34 @@ defm : avx512_binop_pat; // Mask unpacking multiclass avx512_mask_unpck opc, string OpcodeStr, - RegisterClass KRC1, RegisterClass KRC2> { + RegisterClass KRC> { let Predicates = [HasAVX512] in - def rr : I; } multiclass avx512_mask_unpck_bw opc, string OpcodeStr> { - defm BW : avx512_mask_unpck, + defm BW : avx512_mask_unpck, VEX_4V, VEX_L, OpSize, TB; } defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">; +def : Pat<(v16i1 (concat_vectors (v8i1 VK8:$src1), (v8i1 VK8:$src2))), + (KUNPCKBWrr (COPY_TO_REGCLASS VK8:$src2, VK16), + (COPY_TO_REGCLASS VK8:$src1, VK16))>; + multiclass avx512_mask_unpck_int { let Predicates = [HasAVX512] in - def : Pat<(!cast("int_x86_"##IntName##"_v16i1") - VK8:$src1, VK8:$src2), - (!cast(InstName##"BWrr") VK8:$src1, VK8:$src2)>; + def : Pat<(!cast("int_x86_avx512_"##IntName##"_bw") + (i16 GR16:$src1), (i16 GR16:$src2)), + (COPY_TO_REGCLASS (!cast(InstName##"BWrr") + (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)), + (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>; } +defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">; -defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">; // Mask bit testing multiclass avx512_mask_testop opc, string OpcodeStr, RegisterClass KRC, SDNode OpNode> { @@ -1002,7 +1129,10 @@ multiclass avx512_mask_testop_w opc, string OpcodeStr, SDNode OpNode> { } defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>; -defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest>; + +def : Pat<(X86cmp VK1:$src1, (i1 0)), + (KORTESTWrr (COPY_TO_REGCLASS VK1:$src1, VK16), + (COPY_TO_REGCLASS VK1:$src1, VK16))>; // Mask shift multiclass avx512_mask_shiftop opc, string OpcodeStr, RegisterClass KRC, @@ -1020,8 +1150,8 @@ multiclass avx512_mask_shiftop_w opc1, bits<8> opc2, string OpcodeStr, VEX, OpSize, TA, VEX_W; } -defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", shl>; -defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", srl>; +defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>; +defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>; // Mask setting all 0s or 1s multiclass avx512_mask_setop { @@ -1032,7 +1162,7 @@ multiclass avx512_mask_setop { } multiclass avx512_mask_setop_w { - defm B : avx512_mask_setop; + defm B : avx512_mask_setop; defm W : avx512_mask_setop; } @@ -1216,32 +1346,32 @@ def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src1), // Move Int Doubleword to Packed Double Int // def VMOVDI2PDIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", + "vmovd\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, EVEX, VEX_LIG; def VMOVDI2PDIZrm : AVX512SI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", + "vmovd\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; def VMOV64toPQIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", + "vmovq\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v2i64 (scalar_to_vector GR64:$src)))], IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG; let isCodeGenOnly = 1 in { def VMOV64toSDZrr : AVX512SI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", + "vmovq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>; def VMOVSDto64Zrr : AVX512SI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", + "vmovq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>; } def VMOVSDto64Zmr : AVX512SI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", + "vmovq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)], IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>, EVEX_CD8<64, CD8VT1>; @@ -1250,12 +1380,12 @@ def VMOVSDto64Zmr : AVX512SI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$s // let isCodeGenOnly = 1 in { def VMOVDI2SSZrr : AVX512SI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", + "vmovd\t{$src, $dst|$dst, $src}", [(set FR32X:$dst, (bitconvert GR32:$src))], IIC_SSE_MOVDQ>, EVEX, VEX_LIG; def VMOVDI2SSZrm : AVX512SI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", + "vmovd\t{$src, $dst|$dst, $src}", [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))], IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; } @@ -1263,13 +1393,13 @@ def VMOVDI2SSZrm : AVX512SI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$sr // Move Packed Doubleword Int to Packed Double Int // def VMOVPDI2DIZrr : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", + "vmovd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG; def VMOVPDI2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128X:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", + "vmovd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128X:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; @@ -1277,7 +1407,7 @@ def VMOVPDI2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs), // Move Packed Doubleword Int first element to Doubleword Int // def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", + "vmovq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>, TB, OpSize, EVEX, VEX_LIG, VEX_W, @@ -1285,7 +1415,7 @@ def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", + "vmovq\t{$src, $dst|$dst, $src}", [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), addr:$dst)], IIC_SSE_MOVDQ>, EVEX, OpSize, VEX_LIG, VEX_W, TB, EVEX_CD8<64, CD8VT1>, @@ -1296,12 +1426,12 @@ def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs), let isCodeGenOnly = 1 in { def VMOVSS2DIZrr : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32X:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", + "vmovd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32X:$src))], IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG; def VMOVSS2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32X:$src), - "vmovd{z}\t{$src, $dst|$dst, $src}", + "vmovd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32X:$src)), addr:$dst)], IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; } @@ -1310,7 +1440,7 @@ def VMOVSS2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs), // def VMOVQI2PQIZrm : AVX512SI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i64mem:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", + "vmovq\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; @@ -1327,6 +1457,12 @@ multiclass avx512_move_scalar , EVEX_4V, VEX_LIG; + let Constraints = "$src1 = $dst" in + def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst), + (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3), + !strconcat(asm, + "\t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"), + [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K; def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>, @@ -1338,24 +1474,31 @@ multiclass avx512_move_scalar , XS, EVEX_CD8<32, CD8VT1>; let ExeDomain = SSEPackedDouble in -defm VMOVSDZ : avx512_move_scalar<"movsd{z}", FR64X, X86Movsd, v2f64, f64mem, +defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem, loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>; +def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), + (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), + VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>; + +def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), + (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X), + VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>; // For the disassembler let isCodeGenOnly = 1 in { def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src1, FR32X:$src2), - "movss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_MOV_S_RR>, XS, EVEX_4V, VEX_LIG; def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src1, FR64X:$src2), - "movsd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_MOV_S_RR>, XD, EVEX_4V, VEX_LIG, VEX_W; } @@ -1504,7 +1647,7 @@ let Predicates = [HasAVX512] in { let AddedComplexity = 15 in def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", + "vmovq\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v2i64 (X86vzmovl (v2i64 VR128X:$src))))], IIC_SSE_MOVQ_RR>, EVEX, VEX_W; @@ -1512,7 +1655,7 @@ def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), let AddedComplexity = 20 in def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), (ins i128mem:$src), - "vmovq{z}\t{$src, $dst|$dst, $src}", + "vmovq\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v2i64 (X86vzmovl (loadv2i64 addr:$src))))], IIC_SSE_MOVDQ>, EVEX, VEX_W, @@ -1536,6 +1679,8 @@ let Predicates = [HasAVX512] in { (VMOVZPQILo2PQIZrm addr:$src)>; def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), (VMOVZPQILo2PQIZrr VR128X:$src)>; + def : Pat<(v2i64 (X86vzload addr:$src)), + (VMOVZPQILo2PQIZrm addr:$src)>; } // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. @@ -1797,10 +1942,10 @@ defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VR512, memopv8 multiclass avx512_binop_s opc, string OpcodeStr, SDNode OpNode, SizeItins itins> { - defm SSZ : sse12_fp_scalar, XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; - defm SDZ : sse12_fp_scalar, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; } @@ -2109,12 +2254,12 @@ def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))), //===----------------------------------------------------------------------===// def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), - "vmovlhps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))], IIC_SSE_MOV_LH>, EVEX_4V; def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), - "vmovhlps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))], IIC_SSE_MOV_LH>, EVEX_4V; @@ -2309,21 +2454,21 @@ multiclass avx512_fma3s_rm opc, string OpcodeStr, SDNode OpNode, } // Constraints = "$src1 = $dst" -defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss{z}", X86Fmadd, FR32X, +defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X, f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd{z}", X86Fmadd, FR64X, +defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd", X86Fmadd, FR64X, f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss{z}", X86Fmsub, FR32X, +defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss", X86Fmsub, FR32X, f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd{z}", X86Fmsub, FR64X, +defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd", X86Fmsub, FR64X, f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss{z}", X86Fnmadd, FR32X, +defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss", X86Fnmadd, FR32X, f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd{z}", X86Fnmadd, FR64X, +defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd", X86Fnmadd, FR64X, f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss{z}", X86Fnmsub, FR32X, +defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss", X86Fnmsub, FR32X, f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>; -defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd{z}", X86Fnmsub, FR64X, +defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd", X86Fnmsub, FR64X, f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>; //===----------------------------------------------------------------------===// @@ -2344,13 +2489,13 @@ let neverHasSideEffects = 1 in { } // neverHasSideEffects = 1 } let Predicates = [HasAVX512] in { -defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}{z}">, +defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">, XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}{z}">, +defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}">, XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; -defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}{z}">, +defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}">, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}{z}">, +defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}">, XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), @@ -2371,13 +2516,13 @@ def : Pat<(f64 (sint_to_fp GR32:$src)), def : Pat<(f64 (sint_to_fp GR64:$src)), (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; -defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}{z}">, +defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}">, XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}{z}">, +defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}">, XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; -defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}{z}">, +defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}">, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}{z}">, +defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}">, XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))), @@ -2408,98 +2553,103 @@ multiclass avx512_cvt_s_int opc, RegisterClass SrcRC, RegisterClass DstR let neverHasSideEffects = 1 in { def rr : SI, EVEX, VEX_LIG; + [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG, + Requires<[HasAVX512]>; let mayLoad = 1 in def rm : SI, EVEX, VEX_LIG; + !strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG, + Requires<[HasAVX512]>; } // neverHasSideEffects = 1 } let Predicates = [HasAVX512] in { // Convert float/double to signed/unsigned int 32/64 defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si, - ssmem, sse_load_f32, "cvtss2si{z}">, + ssmem, sse_load_f32, "cvtss2si">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64, - ssmem, sse_load_f32, "cvtss2si{z}">, + ssmem, sse_load_f32, "cvtss2si">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi, - ssmem, sse_load_f32, "cvtss2usi{z}">, + ssmem, sse_load_f32, "cvtss2usi">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64, int_x86_avx512_cvtss2usi64, ssmem, - sse_load_f32, "cvtss2usi{z}">, XS, VEX_W, + sse_load_f32, "cvtss2usi">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si, - sdmem, sse_load_f64, "cvtsd2si{z}">, + sdmem, sse_load_f64, "cvtsd2si">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64, - sdmem, sse_load_f64, "cvtsd2si{z}">, + sdmem, sse_load_f64, "cvtsd2si">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi, - sdmem, sse_load_f64, "cvtsd2usi{z}">, + sdmem, sse_load_f64, "cvtsd2usi">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64, int_x86_avx512_cvtsd2usi64, sdmem, - sse_load_f64, "cvtsd2usi{z}">, XD, VEX_W, + sse_load_f64, "cvtsd2usi">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; -defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, - int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}{z}", - SSE_CVT_Scalar, 0>, XS, EVEX_4V; -defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, - int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}{z}", - SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W; -defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, - int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}{z}", - SSE_CVT_Scalar, 0>, XD, EVEX_4V; -defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, - int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}{z}", - SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W; +let isCodeGenOnly = 1 in { + defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, + int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}", + SSE_CVT_Scalar, 0>, XS, EVEX_4V; + defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, + int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}", + SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W; + defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, + int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}", + SSE_CVT_Scalar, 0>, XD, EVEX_4V; + defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, + int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", + SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W; -defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, - int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}{z}", - SSE_CVT_Scalar, 0>, XS, EVEX_4V; -defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, - int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}{z}", - SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W; -defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, - int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}{z}", - SSE_CVT_Scalar, 0>, XD, EVEX_4V; -defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, - int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}{z}", - SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W; + defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, + int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}", + SSE_CVT_Scalar, 0>, XS, EVEX_4V; + defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, + int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}", + SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W; + defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, + int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}", + SSE_CVT_Scalar, 0>, XD, EVEX_4V; + defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X, + int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}", + SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W; +} // isCodeGenOnly = 1 // Convert float/double to signed/unsigned int 32/64 with truncation -defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si, - ssmem, sse_load_f32, "cvttss2si{z}">, - XS, EVEX_CD8<32, CD8VT1>; -defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64, - int_x86_sse_cvttss2si64, ssmem, sse_load_f32, - "cvttss2si{z}">, XS, VEX_W, - EVEX_CD8<32, CD8VT1>; -defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si, - sdmem, sse_load_f64, "cvttsd2si{z}">, XD, - EVEX_CD8<64, CD8VT1>; -defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64, - int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, - "cvttsd2si{z}">, XD, VEX_W, - EVEX_CD8<64, CD8VT1>; -defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32, - int_x86_avx512_cvttss2usi, ssmem, sse_load_f32, - "cvttss2si{z}">, XS, EVEX_CD8<32, CD8VT1>; -defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64, - int_x86_avx512_cvttss2usi64, ssmem, - sse_load_f32, "cvttss2usi{z}">, XS, VEX_W, - EVEX_CD8<32, CD8VT1>; -defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32, - int_x86_avx512_cvttsd2usi, - sdmem, sse_load_f64, "cvttsd2usi{z}">, XD, - EVEX_CD8<64, CD8VT1>; -defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64, - int_x86_avx512_cvttsd2usi64, sdmem, - sse_load_f64, "cvttsd2usi{z}">, XD, VEX_W, - EVEX_CD8<64, CD8VT1>; -} +let isCodeGenOnly = 1 in { + defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si, + ssmem, sse_load_f32, "cvttss2si">, + XS, EVEX_CD8<32, CD8VT1>; + defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64, + int_x86_sse_cvttss2si64, ssmem, sse_load_f32, + "cvttss2si">, XS, VEX_W, + EVEX_CD8<32, CD8VT1>; + defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si, + sdmem, sse_load_f64, "cvttsd2si">, XD, + EVEX_CD8<64, CD8VT1>; + defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64, + int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, + "cvttsd2si">, XD, VEX_W, + EVEX_CD8<64, CD8VT1>; + defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32, + int_x86_avx512_cvttss2usi, ssmem, sse_load_f32, + "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>; + defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64, + int_x86_avx512_cvttss2usi64, ssmem, + sse_load_f32, "cvttss2usi">, XS, VEX_W, + EVEX_CD8<32, CD8VT1>; + defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32, + int_x86_avx512_cvttsd2usi, + sdmem, sse_load_f64, "cvttsd2usi">, XD, + EVEX_CD8<64, CD8VT1>; + defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64, + int_x86_avx512_cvttsd2usi64, sdmem, + sse_load_f64, "cvttsd2usi">, XD, VEX_W, + EVEX_CD8<64, CD8VT1>; +} // isCodeGenOnly = 1 multiclass avx512_cvt_s opc, RegisterClass SrcRC, RegisterClass DstRC, SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, @@ -2513,53 +2663,54 @@ multiclass avx512_cvt_s opc, RegisterClass SrcRC, RegisterClass DstRC, } defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem, - loadf32, "cvttss2si{z}">, XS, + loadf32, "cvttss2si">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem, - loadf32, "cvttss2usi{z}">, XS, + loadf32, "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem, - loadf32, "cvttss2si{z}">, XS, VEX_W, + loadf32, "cvttss2si">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem, - loadf32, "cvttss2usi{z}">, XS, VEX_W, + loadf32, "cvttss2usi">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem, - loadf64, "cvttsd2si{z}">, XD, + loadf64, "cvttsd2si">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem, - loadf64, "cvttsd2usi{z}">, XD, + loadf64, "cvttsd2usi">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem, - loadf64, "cvttsd2si{z}">, XD, VEX_W, + loadf64, "cvttsd2si">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem, - loadf64, "cvttsd2usi{z}">, XD, VEX_W, + loadf64, "cvttsd2usi">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; +} // HasAVX512 //===----------------------------------------------------------------------===// // AVX-512 Convert form float to double and back //===----------------------------------------------------------------------===// let neverHasSideEffects = 1 in { def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst), (ins FR32X:$src1, FR32X:$src2), - "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; let mayLoad = 1 in def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst), (ins FR32X:$src1, f32mem:$src2), - "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<32, CD8VT1>; // Convert scalar double to scalar single def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst), (ins FR64X:$src1, FR64X:$src2), - "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>; let mayLoad = 1 in def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst), (ins FR64X:$src1, f64mem:$src2), - "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>; } @@ -2589,6 +2740,9 @@ let neverHasSideEffects = 1 in { !strconcat(asm,"\t{$src, $dst|$dst, $src}"), [(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX; + def rrb : AVX512PI, EVEX, EVEX_B; let mayLoad = 1 in def rm : AVX512PI opc, string asm, RegisterClass SrcRC, + RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag, + X86MemOperand x86memop, ValueType OpVT, ValueType InVT, + Domain d> { +let neverHasSideEffects = 1 in { + def rr : AVX512PI, EVEX; + let mayLoad = 1 in + def rm : AVX512PI, EVEX; +} // neverHasSideEffects = 1 +} + + defm VCVTPD2PSZ : avx512_vcvt_fp<0x5A, "vcvtpd2ps", VR512, VR256X, fround, memopv8f64, f512mem, v8f32, v8f64, SSEPackedSingle>, EVEX_V512, VEX_W, OpSize, @@ -2621,26 +2793,36 @@ defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp, SSEPackedDouble>, EVEX_V512, XS, EVEX_CD8<32, CD8VH>; -defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint, +defm VCVTTPS2DQZ : avx512_vcvtt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint, memopv16f32, f512mem, v16i32, v16f32, SSEPackedSingle>, EVEX_V512, XS, EVEX_CD8<32, CD8VF>; -defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint, +defm VCVTTPD2DQZ : avx512_vcvtt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint, memopv8f64, f512mem, v8i32, v8f64, SSEPackedDouble>, EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; -defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint, +defm VCVTTPS2UDQZ : avx512_vcvtt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint, memopv16f32, f512mem, v16i32, v16f32, SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint, +// cvttps2udq (src, 0, mask-all-ones, sae-current) +def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src), + (v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)), + (VCVTTPS2UDQZrr VR512:$src)>; + +defm VCVTTPD2UDQZ : avx512_vcvtt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint, memopv8f64, f512mem, v8i32, v8f64, SSEPackedDouble>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +// cvttpd2udq (src, 0, mask-all-ones, sae-current) +def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src), + (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)), + (VCVTTPD2UDQZrr VR512:$src)>; + defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp, memopv4i64, f256mem, v8f64, v8i32, SSEPackedDouble>, EVEX_V512, XS, @@ -2656,22 +2838,57 @@ def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; -def : Pat<(int_x86_avx512_cvtdq2_ps_512 VR512:$src), - (VCVTDQ2PSZrr VR512:$src)>; -def : Pat<(int_x86_avx512_cvtdq2_ps_512 (bitconvert (memopv8i64 addr:$src))), - (VCVTDQ2PSZrm addr:$src)>; +def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src), + (v16f32 immAllZerosV), (i16 -1), imm:$rc)), + (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>; -def VCVTPS2DQZrr : AVX512BI<0x5B, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src), - "vcvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR512:$dst, - (int_x86_avx512_cvt_ps2dq_512 VR512:$src))], - IIC_SSE_CVT_PS_RR>, EVEX, EVEX_V512; -def VCVTPS2DQZrm : AVX512BI<0x5B, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src), - "vcvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR512:$dst, - (int_x86_avx512_cvt_ps2dq_512 (memopv16f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; +multiclass avx512_vcvt_fp2int opc, string asm, RegisterClass SrcRC, + RegisterClass DstRC, PatFrag mem_frag, + X86MemOperand x86memop, Domain d> { +let neverHasSideEffects = 1 in { + def rr : AVX512PI, EVEX; + def rrb : AVX512PI, EVEX, EVEX_B; + let mayLoad = 1 in + def rm : AVX512PI, EVEX; +} // neverHasSideEffects = 1 +} + +defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512, + memopv16f32, f512mem, SSEPackedSingle>, OpSize, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X, + memopv8f64, f512mem, SSEPackedDouble>, XD, VEX_W, + EVEX_V512, EVEX_CD8<64, CD8VF>; + +def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src), + (v16i32 immAllZerosV), (i16 -1), imm:$rc)), + (VCVTPS2DQZrrb VR512:$src, imm:$rc)>; + +def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src), + (v8i32 immAllZerosV), (i8 -1), imm:$rc)), + (VCVTPD2DQZrrb VR512:$src, imm:$rc)>; + +defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512, + memopv16f32, f512mem, SSEPackedSingle>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X, + memopv8f64, f512mem, SSEPackedDouble>, VEX_W, + EVEX_V512, EVEX_CD8<64, CD8VF>; + +def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src), + (v16i32 immAllZerosV), (i16 -1), imm:$rc)), + (VCVTPS2UDQZrrb VR512:$src, imm:$rc)>; + +def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2udq_512 (v8f64 VR512:$src), + (v8i32 immAllZerosV), (i8 -1), imm:$rc)), + (VCVTPD2UDQZrrb VR512:$src, imm:$rc)>; let Predicates = [HasAVX512] in { def : Pat<(v8f32 (fround (loadv8f64 addr:$src))), @@ -2714,32 +2931,34 @@ defm VCVTPS2PHZ : avx512_f16c_ps2ph, TB, EVEX, VEX_LIG, + "ucomiss">, TB, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, - "ucomisd{z}">, TB, OpSize, EVEX, + "ucomisd">, TB, OpSize, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; let Pattern = [] in { defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load, - "comiss{z}">, TB, EVEX, VEX_LIG, + "comiss">, TB, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load, - "comisd{z}">, TB, OpSize, EVEX, + "comisd">, TB, OpSize, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } - defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem, - load, "ucomiss">, TB, EVEX, VEX_LIG, - EVEX_CD8<32, CD8VT1>; - defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem, - load, "ucomisd">, TB, OpSize, EVEX, - VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; + let isCodeGenOnly = 1 in { + defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem, + load, "ucomiss">, TB, EVEX, VEX_LIG, + EVEX_CD8<32, CD8VT1>; + defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem, + load, "ucomisd">, TB, OpSize, EVEX, + VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; - defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem, - load, "comiss">, TB, EVEX, VEX_LIG, - EVEX_CD8<32, CD8VT1>; - defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem, - load, "comisd">, TB, OpSize, EVEX, - VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; + defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem, + load, "comiss">, TB, EVEX, VEX_LIG, + EVEX_CD8<32, CD8VT1>; + defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem, + load, "comisd">, TB, OpSize, EVEX, + VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; + } } /// avx512_unop_p - AVX-512 unops in packed form. @@ -2769,6 +2988,7 @@ multiclass avx512_fp_unop_p opc, string OpcodeStr, SDNode OpNode> { /// avx512_fp_unop_p_int - AVX-512 intrinsics unops in packed forms. multiclass avx512_fp_unop_p_int opc, string OpcodeStr, Intrinsic V16F32Int, Intrinsic V8F64Int> { +let isCodeGenOnly = 1 in { def PSZr_Int : AVX5128I opc, string OpcodeStr, [(set VR512:$dst, (V8F64Int (memopv8f64 addr:$src)))]>, EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +} // isCodeGenOnly = 1 } /// avx512_fp_unop_s - AVX-512 unops in scalar form. @@ -2807,6 +3028,7 @@ multiclass avx512_fp_unop_s opc, string OpcodeStr> { !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V, EVEX_CD8<32, CD8VT1>; + let isCodeGenOnly = 1 in def SSZm_Int : AVX5128I opc, string OpcodeStr> { !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; + let isCodeGenOnly = 1 in def SDZm_Int : AVX5128I opc, string OpcodeStr, SDNode OpNode, (v8f64 (bitconvert (memopv16f32 addr:$src)))))], itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>; +let isCodeGenOnly = 1 in { def PSZr_Int : AVX512PSI opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR512:$dst, (V8F64Int (memopv8f64 addr:$src)))]>, - EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +} // isCodeGenOnly = 1 } multiclass avx512_sqrt_scalar opc, string OpcodeStr, @@ -2938,12 +3163,13 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr, def SSZr : SI, XS, EVEX_4V; + let isCodeGenOnly = 1 in def SSZr_Int : SIi8, XS, EVEX_4V; @@ -2951,12 +3177,13 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr, def SSZm : SI, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>; + let isCodeGenOnly = 1 in def SSZm_Int : SIi8, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>; @@ -2964,12 +3191,13 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr, def SDZr : SI, + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, XD, EVEX_4V, VEX_W; + let isCodeGenOnly = 1 in def SDZr_Int : SIi8, XD, EVEX_4V, VEX_W; @@ -2977,12 +3205,13 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr, def SDZm : SI, + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; + let isCodeGenOnly = 1 in def SDZm_Int : SIi8, XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>; @@ -3094,6 +3323,7 @@ let ExeDomain = GenericDomain in { []>; // Intrinsic operation, reg. + let isCodeGenOnly = 1 in def SSr_Int : AVX512AIi8, VEX_W; // Intrinsic operation, reg. + let isCodeGenOnly = 1 in def SDr_Int : AVX512AIi8, EVEX_4V; +multiclass avx512_rndscale opc, string OpcodeStr, + X86MemOperand x86memop, RegisterClass RC, + PatFrag mem_frag, Domain d> { +let ExeDomain = d in { + // Intrinsic operation, reg. + // Vector intrinsic operation, reg + def r : AVX512AIi8, EVEX; - defm VRNDSCALEZ : avx512_fp_unop_rm<0x08, 0x09, "vrndscale", f256mem, VR512, - memopv16f32, memopv8f64, - int_x86_avx512_rndscale_ps_512, - int_x86_avx512_rndscale_pd_512, CD8VF>, - EVEX, EVEX_V512; + // Vector intrinsic operation, mem + def m : AVX512AIi8, EVEX; +} // ExeDomain } + +defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512, + memopv16f32, SSEPackedSingle>, EVEX_V512, + EVEX_CD8<32, CD8VF>; + +def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1), + imm:$src2, (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), + FROUND_CURRENT)), + (VRNDSCALEPSZr VR512:$src1, imm:$src2)>; + + +defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512, + memopv8f64, SSEPackedDouble>, EVEX_V512, + VEX_W, EVEX_CD8<64, CD8VF>; + +def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1), + imm:$src2, (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), + FROUND_CURRENT)), + (VRNDSCALEPDZr VR512:$src1, imm:$src2)>; + +multiclass avx512_rndscale_scalar opc, string OpcodeStr, + Operand x86memop, RegisterClass RC, Domain d> { +let ExeDomain = d in { + def r : AVX512AIi8, EVEX_4V; + + def m : AVX512AIi8, EVEX_4V; +} // ExeDomain +} + +defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X, + SSEPackedSingle>, EVEX_CD8<32, CD8VT1>; + +defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X, + SSEPackedDouble>, EVEX_CD8<64, CD8VT1>; + def : Pat<(ffloor FR32X:$src), (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>; def : Pat<(f64 (ffloor FR64X:$src)), @@ -3170,26 +3453,26 @@ def : Pat<(f64 (ftrunc FR64X:$src)), (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>; def : Pat<(v16f32 (ffloor VR512:$src)), - (VRNDSCALEZPSr VR512:$src, (i32 0x1))>; + (VRNDSCALEPSZr VR512:$src, (i32 0x1))>; def : Pat<(v16f32 (fnearbyint VR512:$src)), - (VRNDSCALEZPSr VR512:$src, (i32 0xC))>; + (VRNDSCALEPSZr VR512:$src, (i32 0xC))>; def : Pat<(v16f32 (fceil VR512:$src)), - (VRNDSCALEZPSr VR512:$src, (i32 0x2))>; + (VRNDSCALEPSZr VR512:$src, (i32 0x2))>; def : Pat<(v16f32 (frint VR512:$src)), - (VRNDSCALEZPSr VR512:$src, (i32 0x4))>; + (VRNDSCALEPSZr VR512:$src, (i32 0x4))>; def : Pat<(v16f32 (ftrunc VR512:$src)), - (VRNDSCALEZPSr VR512:$src, (i32 0x3))>; + (VRNDSCALEPSZr VR512:$src, (i32 0x3))>; def : Pat<(v8f64 (ffloor VR512:$src)), - (VRNDSCALEZPDr VR512:$src, (i32 0x1))>; + (VRNDSCALEPDZr VR512:$src, (i32 0x1))>; def : Pat<(v8f64 (fnearbyint VR512:$src)), - (VRNDSCALEZPDr VR512:$src, (i32 0xC))>; + (VRNDSCALEPDZr VR512:$src, (i32 0xC))>; def : Pat<(v8f64 (fceil VR512:$src)), - (VRNDSCALEZPDr VR512:$src, (i32 0x2))>; + (VRNDSCALEPDZr VR512:$src, (i32 0x2))>; def : Pat<(v8f64 (frint VR512:$src)), - (VRNDSCALEZPDr VR512:$src, (i32 0x4))>; + (VRNDSCALEPDZr VR512:$src, (i32 0x4))>; def : Pat<(v8f64 (ftrunc VR512:$src)), - (VRNDSCALEZPDr VR512:$src, (i32 0x3))>; + (VRNDSCALEPDZr VR512:$src, (i32 0x3))>; //------------------------------------------------- // Integer truncate and extend operations @@ -3455,18 +3738,17 @@ defm VPABSQ : avx512_vpabs<0x1F, "vpabsq", VR512, i512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; multiclass avx512_conflict opc, string OpcodeStr, - RegisterClass RC, RegisterClass KRC, PatFrag memop_frag, - X86MemOperand x86memop, PatFrag scalar_mfrag, - X86MemOperand x86scalar_mop, string BrdcstStr, - Intrinsic Int, Intrinsic maskInt, Intrinsic maskzInt> { + RegisterClass RC, RegisterClass KRC, + X86MemOperand x86memop, + X86MemOperand x86scalar_mop, string BrdcstStr> { def rr : AVX5128I, EVEX; + []>, EVEX; def rm : AVX5128I, EVEX; + []>, EVEX; def rmb : AVX5128I opc, string OpcodeStr, (ins KRC:$mask, RC:$src), !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"), - [(set RC:$dst, (maskzInt KRC:$mask, RC:$src))]>, EVEX, EVEX_KZ; + []>, EVEX, EVEX_KZ; def rmkz : AVX5128I, - EVEX, EVEX_KZ; + []>, EVEX, EVEX_KZ; def rmbkz : AVX5128I opc, string OpcodeStr, (ins RC:$src1, KRC:$mask, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), - [(set RC:$dst, (maskInt RC:$src1, KRC:$mask, RC:$src2))]>, EVEX, EVEX_K; + []>, EVEX, EVEX_K; def rmk : AVX5128I, EVEX, EVEX_K; + []>, EVEX, EVEX_K; def rmbk : AVX5128I opc, string OpcodeStr, let Predicates = [HasCDI] in { defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM, - memopv16i32, i512mem, loadi32, i32mem, "{1to16}", - int_x86_avx512_conflict_d_512, - int_x86_avx512_conflict_d_mask_512, - int_x86_avx512_conflict_d_maskz_512>, + i512mem, i32mem, "{1to16}">, EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM, - memopv8i64, i512mem, loadi64, i64mem, "{1to8}", - int_x86_avx512_conflict_q_512, - int_x86_avx512_conflict_q_mask_512, - int_x86_avx512_conflict_q_maskz_512>, + i512mem, i64mem, "{1to8}">, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + } + +def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1, + GR16:$mask), + (VPCONFLICTDrrk VR512:$src1, + (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>; + +def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1, + GR8:$mask), + (VPCONFLICTQrrk VR512:$src1, + (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrArithmetic.td b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrArithmetic.td index 7fc9c443373d..b8d7f9a3e71a 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -24,7 +24,7 @@ def LEA32r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "lea{l}\t{$src|$dst}, {$dst|$src}", [(set GR32:$dst, lea32addr:$src)], IIC_LEA>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def LEA64_32r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_32mem:$src), @@ -460,12 +460,12 @@ let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), "inc{w}\t$dst", [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))], IIC_UNARY_REG>, - OpSize, Requires<[In32BitMode]>; + OpSize, Requires<[Not64BitMode]>; def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), "inc{l}\t$dst", [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))], IIC_UNARY_REG>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))], IIC_UNARY_REG>; @@ -500,16 +500,16 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), let isCodeGenOnly = 1, CodeSize = 2 in { def INC32_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1), "inc{w}\t$dst", [], IIC_UNARY_REG>, - OpSize, Requires<[In32BitMode]>; + OpSize, Requires<[Not64BitMode]>; def INC32_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1), "inc{l}\t$dst", [], IIC_UNARY_REG>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def DEC32_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1), "dec{w}\t$dst", [], IIC_UNARY_REG>, - OpSize, Requires<[In32BitMode]>; + OpSize, Requires<[Not64BitMode]>; def DEC32_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), "dec{l}\t$dst", [], IIC_UNARY_REG>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; } // isCodeGenOnly = 1, CodeSize = 2 } // Constraints = "$src1 = $dst", SchedRW @@ -521,11 +521,11 @@ let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in { def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", [(store (add (loadi16 addr:$dst), 1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>, - OpSize, Requires<[In32BitMode]>; + OpSize, Requires<[Not64BitMode]>; def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", [(store (add (loadi32 addr:$dst), 1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst", [(store (add (loadi64 addr:$dst), 1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; @@ -562,12 +562,12 @@ def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), "dec{w}\t$dst", [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))], IIC_UNARY_REG>, - OpSize, Requires<[In32BitMode]>; + OpSize, Requires<[Not64BitMode]>; def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), "dec{l}\t$dst", [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))], IIC_UNARY_REG>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))], IIC_UNARY_REG>; @@ -582,11 +582,11 @@ let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in { def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", [(store (add (loadi16 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>, - OpSize, Requires<[In32BitMode]>; + OpSize, Requires<[Not64BitMode]>; def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", [(store (add (loadi32 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", [(store (add (loadi64 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrCompiler.td b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrCompiler.td index 7d10b67bfe6d..6a6c31fbaf5c 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrCompiler.td @@ -46,11 +46,11 @@ let Defs = [ESP, EFLAGS], Uses = [ESP] in { def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt), "#ADJCALLSTACKDOWN", [(X86callseq_start timm:$amt)]>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), "#ADJCALLSTACKUP", [(X86callseq_end timm:$amt1, timm:$amt2)]>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; } // ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into @@ -72,7 +72,7 @@ def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), // x86-64 va_start lowering magic. -let usesCustomInserter = 1 in { +let usesCustomInserter = 1, Defs = [EFLAGS] in { def VASTART_SAVE_XMM_REGS : I<0, Pseudo, (outs), (ins GR8:$al, @@ -81,7 +81,8 @@ def VASTART_SAVE_XMM_REGS : I<0, Pseudo, "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset", [(X86vastart_save_xmm_regs GR8:$al, imm:$regsavefi, - imm:$offset)]>; + imm:$offset), + (implicit EFLAGS)]>; // The VAARG_64 pseudo-instruction takes the address of the va_list, // and places the address of the next argument into a register. @@ -117,7 +118,7 @@ def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size), "# variable sized alloca for segmented stacks", [(set GR32:$dst, (X86SegAlloca GR32:$size))]>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size), @@ -139,12 +140,12 @@ let Defs = [EAX, EDX, ECX, EFLAGS], FPForm = SpecialFP in { def WIN_FTOL_32 : I<0, Pseudo, (outs), (ins RFP32:$src), "# win32 fptoui", [(X86WinFTOL RFP32:$src)]>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def WIN_FTOL_64 : I<0, Pseudo, (outs), (ins RFP64:$src), "# win32 fptoui", [(X86WinFTOL RFP64:$src)]>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; } //===----------------------------------------------------------------------===// @@ -172,7 +173,7 @@ let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf), "#EH_SJLJ_SETJMP32", [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def EH_SjLj_SetJmp64 : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf), "#EH_SJLJ_SETJMP64", [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>, @@ -181,7 +182,7 @@ let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf), "#EH_SJLJ_LONGJMP32", [(X86eh_sjlj_longjmp addr:$buf)]>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf), "#EH_SJLJ_LONGJMP64", [(X86eh_sjlj_longjmp addr:$buf)]>, @@ -221,8 +222,8 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), // FIXME: remove when we can teach regalloc that xor reg, reg is ok. // FIXME: Set encoding to pseudo. let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, - isCodeGenOnly = 1 in -def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", + isPseudo = 1 in +def MOV32r0 : I<0x31, Pseudo, (outs GR32:$dst), (ins), "", [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; // Other widths can also make use of the 32-bit xor, which may have a smaller @@ -318,13 +319,13 @@ let SchedRW = [WriteMicrocoded] in { let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in { def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", [(X86rep_movs i8)], IIC_REP_MOVS>, REP, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", [(X86rep_movs i32)], IIC_REP_MOVS>, REP, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; } let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in { @@ -347,15 +348,15 @@ let Defs = [ECX,EDI], isCodeGenOnly = 1 in { let Uses = [AL,ECX,EDI] in def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", [(X86rep_stos i8)], IIC_REP_STOS>, REP, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; let Uses = [AX,ECX,EDI] in def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; let Uses = [EAX,ECX,EDI] in def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", [(X86rep_stos i32)], IIC_REP_STOS>, REP, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; } let Defs = [RCX,RDI], isCodeGenOnly = 1 in { @@ -395,11 +396,11 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_addr32", [(X86tlsaddr tls32addr:$sym)]>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_base_addr32", [(X86tlsbaseaddr tls32baseaddr:$sym)]>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; } // All calls clobber the non-callee saved registers. RSP is marked as @@ -431,7 +432,7 @@ let Defs = [EAX, ECX, EFLAGS], def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLSCall_32", [(X86TLSCall addr:$sym)]>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; // For x86_64, the address of the thunk is passed in %rdi, on return // the address of the variable is in %rax. All other registers are preserved. @@ -590,7 +591,7 @@ defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">; let isCodeGenOnly = 1, Defs = [EFLAGS] in def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), "or{l}\t{$zero, $dst|$dst, $zero}", - [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK, + [], IIC_ALU_MEM>, Requires<[Not64BitMode]>, LOCK, Sched<[WriteALULd, WriteRMW]>; let hasSideEffects = 1 in @@ -1020,22 +1021,22 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off), def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; // FIXME: This is disabled for 32-bit PIC mode because the global base // register which is part of the address mode may be assigned a // callee-saved register. def : Pat<(X86tcret (load addr:$dst), imm:$off), (TCRETURNmi addr:$dst, imm:$off)>, - Requires<[In32BitMode, IsNotPIC]>; + Requires<[Not64BitMode, IsNotPIC]>; def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), (TCRETURNdi texternalsym:$dst, imm:$off)>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off), (TCRETURNdi texternalsym:$dst, imm:$off)>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>, @@ -1304,13 +1305,13 @@ def : Pat<(and GR32:$src1, 0xff), (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1, GR32_ABCD)), sub_8bit))>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; // r & (2^8-1) ==> movz def : Pat<(and GR16:$src1, 0xff), (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), sub_8bit)), sub_16bit)>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; // r & (2^32-1) ==> movz def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), @@ -1345,13 +1346,13 @@ def : Pat<(sext_inreg GR32:$src, i8), (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), sub_8bit))>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def : Pat<(sext_inreg GR16:$src, i8), (EXTRACT_SUBREG (i32 (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit))), sub_16bit)>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def : Pat<(sext_inreg GR64:$src, i32), (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; @@ -1383,11 +1384,11 @@ def : Pat<(i16 (trunc GR32:$src)), def : Pat<(i8 (trunc GR32:$src)), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), sub_8bit)>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def : Pat<(i8 (trunc GR16:$src)), (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit)>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def : Pat<(i32 (trunc GR64:$src)), (EXTRACT_SUBREG GR64:$src, sub_32bit)>; def : Pat<(i16 (trunc GR64:$src)), @@ -1405,38 +1406,38 @@ def : Pat<(i8 (trunc GR16:$src)), def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))), (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit_hi)>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), sub_8bit_hi)>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit_hi)), sub_16bit)>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit_hi))>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit_hi))>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), sub_8bit_hi))>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), sub_8bit_hi))>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; // h-register tricks. // For now, be conservative on x86-64 and use an h-register extract only if the @@ -1725,17 +1726,17 @@ def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2), // Increment reg. def : Pat<(add GR8 :$src, 1), (INC8r GR8 :$src)>; -def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[In32BitMode]>; +def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[Not64BitMode]>; def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>; -def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[In32BitMode]>; +def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[Not64BitMode]>; def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>; def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>; // Decrement reg. def : Pat<(add GR8 :$src, -1), (DEC8r GR8 :$src)>; -def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[In32BitMode]>; +def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[Not64BitMode]>; def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>; -def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[In32BitMode]>; +def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[Not64BitMode]>; def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>; def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrControl.td b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrControl.td index e4ccc06feb89..7d610e6ca45e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrControl.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrControl.td @@ -94,10 +94,10 @@ let isBranch = 1, isTerminator = 1, hasSideEffects = 0, SchedRW = [WriteJump] in // jecxz. let Uses = [CX] in def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), - "jcxz\t$dst", [], IIC_JCXZ>, AdSize, Requires<[In32BitMode]>; + "jcxz\t$dst", [], IIC_JCXZ>, AdSize, Requires<[Not64BitMode]>; let Uses = [ECX] in def JECXZ_32 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), - "jecxz\t$dst", [], IIC_JCXZ>, Requires<[In32BitMode]>; + "jecxz\t$dst", [], IIC_JCXZ>, Requires<[Not64BitMode]>; // J*CXZ instruction: 64-bit versions of this instruction for the asmparser. // In 64-bit mode, the address size prefix is jecxz and the unprefixed version @@ -113,11 +113,11 @@ let isBranch = 1, isTerminator = 1, hasSideEffects = 0, SchedRW = [WriteJump] in // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst", - [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[In32BitMode]>, + [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[Not64BitMode]>, Sched<[WriteJump]>; def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst", [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>, - Requires<[In32BitMode]>, Sched<[WriteJumpLd]>; + Requires<[Not64BitMode]>, Sched<[WriteJumpLd]>; def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst", [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>, @@ -166,14 +166,14 @@ let isCall = 1 in def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm, (outs), (ins i32imm_pcrel:$dst), "call{l}\t$dst", [], IIC_CALL_RI>, - Requires<[In32BitMode]>, Sched<[WriteJump]>; + Requires<[Not64BitMode]>, Sched<[WriteJump]>; def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst), "call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>, - Requires<[In32BitMode]>, Sched<[WriteJump]>; + Requires<[Not64BitMode]>, Sched<[WriteJump]>; def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst), "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))], IIC_CALL_MEM>, - Requires<[In32BitMode,FavorMemIndirectCall]>, + Requires<[Not64BitMode,FavorMemIndirectCall]>, Sched<[WriteJumpLd]>; def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs), diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrFMA.td b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrFMA.td index 69cd5a568ba8..1d32040dc958 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrFMA.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrFMA.td @@ -138,19 +138,21 @@ multiclass fma3s_rm opc, string OpcodeStr, X86MemOperand x86memop, multiclass fma3s_rm_int opc, string OpcodeStr, Operand memop, ComplexPattern mem_cpat, Intrinsic IntId, RegisterClass RC> { - let isCommutable = 1 in - def r_Int : FMA3; - def m_Int : FMA3; + let isCodeGenOnly = 1 in { + let isCommutable = 1 in + def r_Int : FMA3; + def m_Int : FMA3; + } // isCodeGenOnly } } // Constraints = "$src1 = $dst" @@ -230,6 +232,7 @@ let isCodeGenOnly = 1, hasSideEffects = 0 in multiclass fma4s_int opc, string OpcodeStr, Operand memop, ComplexPattern mem_cpat, Intrinsic Int> { +let isCodeGenOnly = 1 in { let isCommutable = 1 in def rr_Int : FMA4 opc, string OpcodeStr, Operand memop, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>, VEX_LIG; +} // isCodeGenOnly = 1 } multiclass fma4p opc, string OpcodeStr, SDNode OpNode, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrFPStack.td b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrFPStack.td index 7c3788865c1e..ded44eea06a3 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrFPStack.td @@ -218,37 +218,37 @@ defm DIV : FPBinary; defm DIVR: FPBinary; } -class FPST0rInst o, string asm> - : FPI, D8; -class FPrST0Inst o, string asm> - : FPI, DC; -class FPrST0PInst o, string asm> - : FPI, DE; +class FPST0rInst + : FPI<0xD8, fp, (outs), (ins RST:$op), asm>; +class FPrST0Inst + : FPI<0xDC, fp, (outs), (ins RST:$op), asm>; +class FPrST0PInst + : FPI<0xDE, fp, (outs), (ins RST:$op), asm>; // NOTE: GAS and apparently all other AT&T style assemblers have a broken notion // of some of the 'reverse' forms of the fsub and fdiv instructions. As such, // we have to put some 'r's in and take them out of weird places. -def ADD_FST0r : FPST0rInst <0xC0, "fadd\t$op">; -def ADD_FrST0 : FPrST0Inst <0xC0, "fadd\t{%st(0), $op|$op, st(0)}">; -def ADD_FPrST0 : FPrST0PInst<0xC0, "faddp\t$op">; -def SUBR_FST0r : FPST0rInst <0xE8, "fsubr\t$op">; -def SUB_FrST0 : FPrST0Inst <0xE8, "fsub{r}\t{%st(0), $op|$op, st(0)}">; -def SUB_FPrST0 : FPrST0PInst<0xE8, "fsub{r}p\t$op">; -def SUB_FST0r : FPST0rInst <0xE0, "fsub\t$op">; -def SUBR_FrST0 : FPrST0Inst <0xE0, "fsub{|r}\t{%st(0), $op|$op, st(0)}">; -def SUBR_FPrST0 : FPrST0PInst<0xE0, "fsub{|r}p\t$op">; -def MUL_FST0r : FPST0rInst <0xC8, "fmul\t$op">; -def MUL_FrST0 : FPrST0Inst <0xC8, "fmul\t{%st(0), $op|$op, st(0)}">; -def MUL_FPrST0 : FPrST0PInst<0xC8, "fmulp\t$op">; -def DIVR_FST0r : FPST0rInst <0xF8, "fdivr\t$op">; -def DIV_FrST0 : FPrST0Inst <0xF8, "fdiv{r}\t{%st(0), $op|$op, st(0)}">; -def DIV_FPrST0 : FPrST0PInst<0xF8, "fdiv{r}p\t$op">; -def DIV_FST0r : FPST0rInst <0xF0, "fdiv\t$op">; -def DIVR_FrST0 : FPrST0Inst <0xF0, "fdiv{|r}\t{%st(0), $op|$op, st(0)}">; -def DIVR_FPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p\t$op">; +def ADD_FST0r : FPST0rInst ; +def ADD_FrST0 : FPrST0Inst ; +def ADD_FPrST0 : FPrST0PInst; +def SUBR_FST0r : FPST0rInst ; +def SUB_FrST0 : FPrST0Inst ; +def SUB_FPrST0 : FPrST0PInst; +def SUB_FST0r : FPST0rInst ; +def SUBR_FrST0 : FPrST0Inst ; +def SUBR_FPrST0 : FPrST0PInst; +def MUL_FST0r : FPST0rInst ; +def MUL_FrST0 : FPrST0Inst ; +def MUL_FPrST0 : FPrST0PInst; +def DIVR_FST0r : FPST0rInst ; +def DIV_FrST0 : FPrST0Inst ; +def DIV_FPrST0 : FPrST0PInst; +def DIV_FST0r : FPST0rInst ; +def DIVR_FrST0 : FPrST0Inst ; +def DIVR_FPrST0 : FPrST0PInst; -def COM_FST0r : FPST0rInst <0xD0, "fcom\t$op">; -def COMP_FST0r : FPST0rInst <0xD8, "fcomp\t$op">; +def COM_FST0r : FPST0rInst ; +def COMP_FST0r : FPST0rInst ; // Unary operations. multiclass FPUnary opcode, string asmstring> { @@ -336,22 +336,22 @@ defm CMOVNP : FPCMov; let Predicates = [HasCMov] in { // These are not factored because there's no clean way to pass DA/DB. -def CMOVB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins), - "fcmovb\t{$op, %st(0)|st(0), $op}">, DA; -def CMOVBE_F : FPI<0xD0, AddRegFrm, (outs RST:$op), (ins), - "fcmovbe\t{$op, %st(0)|st(0), $op}">, DA; -def CMOVE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins), - "fcmove\t{$op, %st(0)|st(0), $op}">, DA; -def CMOVP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins), - "fcmovu\t{$op, %st(0)|st(0), $op}">, DA; -def CMOVNB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins), - "fcmovnb\t{$op, %st(0)|st(0), $op}">, DB; -def CMOVNBE_F: FPI<0xD0, AddRegFrm, (outs RST:$op), (ins), - "fcmovnbe\t{$op, %st(0)|st(0), $op}">, DB; -def CMOVNE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins), - "fcmovne\t{$op, %st(0)|st(0), $op}">, DB; -def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins), - "fcmovnu\t{$op, %st(0)|st(0), $op}">, DB; +def CMOVB_F : FPI<0xDA, MRM0r, (outs RST:$op), (ins), + "fcmovb\t{$op, %st(0)|st(0), $op}">; +def CMOVBE_F : FPI<0xDA, MRM2r, (outs RST:$op), (ins), + "fcmovbe\t{$op, %st(0)|st(0), $op}">; +def CMOVE_F : FPI<0xDA, MRM1r, (outs RST:$op), (ins), + "fcmove\t{$op, %st(0)|st(0), $op}">; +def CMOVP_F : FPI<0xDA, MRM3r, (outs RST:$op), (ins), + "fcmovu\t{$op, %st(0)|st(0), $op}">; +def CMOVNB_F : FPI<0xDB, MRM0r, (outs RST:$op), (ins), + "fcmovnb\t{$op, %st(0)|st(0), $op}">; +def CMOVNBE_F: FPI<0xDB, MRM2r, (outs RST:$op), (ins), + "fcmovnbe\t{$op, %st(0)|st(0), $op}">; +def CMOVNE_F : FPI<0xDB, MRM1r, (outs RST:$op), (ins), + "fcmovne\t{$op, %st(0)|st(0), $op}">; +def CMOVNP_F : FPI<0xDB, MRM3r, (outs RST:$op), (ins), + "fcmovnu\t{$op, %st(0)|st(0), $op}">; } // Predicates = [HasCMov] // Floating point loads & stores. @@ -492,14 +492,10 @@ def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), // FP Stack manipulation instructions. let SchedRW = [WriteMove] in { -def LD_Frr : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld\t$op", - IIC_FLD>, D9; -def ST_Frr : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst\t$op", - IIC_FST>, DD; -def ST_FPrr : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp\t$op", - IIC_FST>, DD; -def XCH_F : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op", - IIC_FXCH>, D9; +def LD_Frr : FPI<0xD9, MRM0r, (outs), (ins RST:$op), "fld\t$op", IIC_FLD>; +def ST_Frr : FPI<0xDD, MRM2r, (outs), (ins RST:$op), "fst\t$op", IIC_FST>; +def ST_FPrr : FPI<0xDD, MRM3r, (outs), (ins RST:$op), "fstp\t$op", IIC_FST>; +def XCH_F : FPI<0xD9, MRM1r, (outs), (ins RST:$op), "fxch\t$op", IIC_FXCH>; } // Floating point constant loads. @@ -546,31 +542,26 @@ def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, } let Defs = [FPSW], Uses = [ST0] in { -def UCOM_Fr : FPI<0xE0, AddRegFrm, // FPSW = cmp ST(0) with ST(i) - (outs), (ins RST:$reg), - "fucom\t$reg", IIC_FUCOM>, DD; -def UCOM_FPr : FPI<0xE8, AddRegFrm, // FPSW = cmp ST(0) with ST(i), pop - (outs), (ins RST:$reg), - "fucomp\t$reg", IIC_FUCOM>, DD; +def UCOM_Fr : FPI<0xDD, MRM4r, // FPSW = cmp ST(0) with ST(i) + (outs), (ins RST:$reg), "fucom\t$reg", IIC_FUCOM>; +def UCOM_FPr : FPI<0xDD, MRM5r, // FPSW = cmp ST(0) with ST(i), pop + (outs), (ins RST:$reg), "fucomp\t$reg", IIC_FUCOM>; def UCOM_FPPr : FPI<0xE9, RawFrm, // cmp ST(0) with ST(1), pop, pop - (outs), (ins), - "fucompp", IIC_FUCOM>, DA; + (outs), (ins), "fucompp", IIC_FUCOM>, DA; } let Defs = [EFLAGS, FPSW], Uses = [ST0] in { -def UCOM_FIr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i) - (outs), (ins RST:$reg), - "fucomi\t$reg", IIC_FUCOMI>, DB; -def UCOM_FIPr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i), pop - (outs), (ins RST:$reg), - "fucompi\t$reg", IIC_FUCOMI>, DF; +def UCOM_FIr : FPI<0xDB, MRM5r, // CC = cmp ST(0) with ST(i) + (outs), (ins RST:$reg), "fucomi\t$reg", IIC_FUCOMI>; +def UCOM_FIPr : FPI<0xDF, MRM5r, // CC = cmp ST(0) with ST(i), pop + (outs), (ins RST:$reg), "fucompi\t$reg", IIC_FUCOMI>; } let Defs = [EFLAGS, FPSW] in { -def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg), - "fcomi\t$reg", IIC_FCOMI>, DB; -def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg), - "fcompi\t$reg", IIC_FCOMI>, DF; +def COM_FIr : FPI<0xDB, MRM6r, (outs), (ins RST:$reg), + "fcomi\t$reg", IIC_FCOMI>; +def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RST:$reg), + "fcompi\t$reg", IIC_FCOMI>; } } // SchedRW @@ -594,8 +585,8 @@ def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16] let SchedRW = [WriteMicrocoded] in { let Defs = [FPSW] in def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", [], IIC_FNINIT>, DB; -def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg), - "ffree\t$reg", IIC_FFREE>, DD; +def FFREE : FPI<0xDD, MRM0r, (outs), (ins RST:$reg), + "ffree\t$reg", IIC_FFREE>; // Clear exceptions let Defs = [FPSW] in diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrFormats.td b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrFormats.td index 0fd9011338b4..b67948254141 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrFormats.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrFormats.td @@ -28,7 +28,6 @@ def MRM6r : Format<22>; def MRM7r : Format<23>; def MRM0m : Format<24>; def MRM1m : Format<25>; def MRM2m : Format<26>; def MRM3m : Format<27>; def MRM4m : Format<28>; def MRM5m : Format<29>; def MRM6m : Format<30>; def MRM7m : Format<31>; -def MRMInitReg : Format<32>; def MRM_C1 : Format<33>; def MRM_C2 : Format<34>; def MRM_C3 : Format<35>; @@ -784,7 +783,7 @@ class MMXI o, Format F, dag outs, dag ins, string asm, : I, TB, Requires<[HasMMX]>; class MMXI32 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> - : I, TB, Requires<[HasMMX,In32BitMode]>; + : I, TB, Requires<[HasMMX,Not64BitMode]>; class MMXI64 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : I, TB, Requires<[HasMMX,In64BitMode]>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 1fed424fd6e0..28e2cd1f4875 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -59,8 +59,8 @@ def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>; def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>; def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>; def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; -def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>; -def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>; +def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>; +//def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>; def X86pshufb : SDNode<"X86ISD::PSHUFB", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; @@ -108,6 +108,9 @@ def X86vsext : SDNode<"X86ISD::VSEXT", def X86vtrunc : SDNode<"X86ISD::VTRUNC", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisInt<1>]>>; +def X86trunc : SDNode<"X86ISD::TRUNC", + SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>>; + def X86vtruncm : SDNode<"X86ISD::VTRUNCM", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisInt<1>, @@ -130,9 +133,14 @@ def X86IntCmpMask : SDTypeProfile<1, 2, def X86pcmpeqm : SDNode<"X86ISD::PCMPEQM", X86IntCmpMask, [SDNPCommutative]>; def X86pcmpgtm : SDNode<"X86ISD::PCMPGTM", X86IntCmpMask>; -def X86CmpMaskCC : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; +def X86CmpMaskCC : + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; +def X86CmpMaskCCScalar : + SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; + def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>; def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>; +def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>; def X86vshl : SDNode<"X86ISD::VSHL", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, @@ -155,10 +163,10 @@ def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>; def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>; -def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>; -def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>, +def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>>; +def X86select : SDNode<"X86ISD::SELECT" , SDTSelect>; def X86pmuludq : SDNode<"X86ISD::PMULUDQ", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, @@ -462,6 +470,8 @@ def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>; // 512-bit bitconvert pattern fragments def bc_v16i32 : PatFrag<(ops node:$in), (v16i32 (bitconvert node:$in))>; def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>; +def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>; +def bc_v16f32 : PatFrag<(ops node:$in), (v16f32 (bitconvert node:$in))>; def vzmovl_v2i64 : PatFrag<(ops node:$src), (bitconvert (v2i64 (X86vzmovl @@ -478,6 +488,14 @@ def fp32imm0 : PatLeaf<(f32 fpimm), [{ return N->isExactlyValue(+0.0); }]>; +def I8Imm : SDNodeXFormgetZExtValue()); +}]>; + +def FROUND_NO_EXC : ImmLeaf; +def FROUND_CURRENT : ImmLeaf; + // BYTE_imm - Transform bit immediates into byte immediates. def BYTE_imm : SDNodeXForm> 3 diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrInfo.cpp index 24617737420b..2e136ad80572 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3015,6 +3015,11 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, return 0; } +inline static bool MaskRegClassContains(unsigned Reg) { + return X86::VK8RegClass.contains(Reg) || + X86::VK16RegClass.contains(Reg) || + X86::VK1RegClass.contains(Reg); +} static unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) { if (X86::VR128XRegClass.contains(DestReg, SrcReg) || @@ -3024,11 +3029,23 @@ unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) { SrcReg = get512BitSuperRegister(SrcReg); return X86::VMOVAPSZrr; } - if ((X86::VK8RegClass.contains(DestReg) || - X86::VK16RegClass.contains(DestReg)) && - (X86::VK8RegClass.contains(SrcReg) || - X86::VK16RegClass.contains(SrcReg))) + if (MaskRegClassContains(DestReg) && + MaskRegClassContains(SrcReg)) return X86::KMOVWkk; + if (MaskRegClassContains(DestReg) && + (X86::GR32RegClass.contains(SrcReg) || + X86::GR16RegClass.contains(SrcReg) || + X86::GR8RegClass.contains(SrcReg))) { + SrcReg = getX86SubSuperRegister(SrcReg, MVT::i32); + return X86::KMOVWkr; + } + if ((X86::GR32RegClass.contains(DestReg) || + X86::GR16RegClass.contains(DestReg) || + X86::GR8RegClass.contains(DestReg)) && + MaskRegClassContains(SrcReg)) { + DestReg = getX86SubSuperRegister(DestReg, MVT::i32); + return X86::KMOVWrk; + } return 0; } @@ -3837,6 +3854,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { bool HasAVX = TM.getSubtarget().hasAVX(); MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); switch (MI->getOpcode()) { + case X86::MOV32r0: + return Expand2AddrUndef(MIB, get(X86::XOR32rr)); case X86::SETB_C8r: return Expand2AddrUndef(MIB, get(X86::SBB8rr)); case X86::SETB_C16r: @@ -4198,75 +4217,10 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, MI->addRegisterKilled(Reg, TRI, true); } -static MachineInstr* foldPatchpoint(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl &Ops, - int FrameIndex, - const TargetInstrInfo &TII) { - unsigned StartIdx = 0; - switch (MI->getOpcode()) { - case TargetOpcode::STACKMAP: - StartIdx = 2; // Skip ID, nShadowBytes. - break; - case TargetOpcode::PATCHPOINT: { - // For PatchPoint, the call args are not foldable. - PatchPointOpers opers(MI); - StartIdx = opers.getVarIdx(); - break; - } - default: - llvm_unreachable("unexpected stackmap opcode"); - } - - // Return false if any operands requested for folding are not foldable (not - // part of the stackmap's live values). - for (SmallVectorImpl::const_iterator I = Ops.begin(), E = Ops.end(); - I != E; ++I) { - if (*I < StartIdx) - return 0; - } - - MachineInstr *NewMI = - MF.CreateMachineInstr(TII.get(MI->getOpcode()), MI->getDebugLoc(), true); - MachineInstrBuilder MIB(MF, NewMI); - - // No need to fold return, the meta data, and function arguments - for (unsigned i = 0; i < StartIdx; ++i) - MIB.addOperand(MI->getOperand(i)); - - for (unsigned i = StartIdx; i < MI->getNumOperands(); ++i) { - MachineOperand &MO = MI->getOperand(i); - if (std::find(Ops.begin(), Ops.end(), i) != Ops.end()) { - assert(MO.getReg() && "patchpoint can only fold a vreg operand"); - // Compute the spill slot size and offset. - const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(MO.getReg()); - unsigned SpillSize; - unsigned SpillOffset; - bool Valid = TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, - SpillOffset, &MF.getTarget()); - if (!Valid) - report_fatal_error("cannot spill patchpoint subregister operand"); - - MIB.addOperand(MachineOperand::CreateImm(StackMaps::IndirectMemRefOp)); - MIB.addOperand(MachineOperand::CreateImm(SpillSize)); - MIB.addOperand(MachineOperand::CreateFI(FrameIndex)); - addOffset(MIB, SpillOffset); - } - else - MIB.addOperand(MO); - } - return NewMI; -} - MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl &Ops, int FrameIndex) const { - // Special case stack map and patch point intrinsics. - if (MI->getOpcode() == TargetOpcode::STACKMAP - || MI->getOpcode() == TargetOpcode::PATCHPOINT) { - return foldPatchpoint(MF, MI, Ops, FrameIndex, *this); - } // Check switch flag if (NoFusing) return NULL; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrInfo.td b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrInfo.td index 6e5d54349faa..5e03a59adc12 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrInfo.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrInfo.td @@ -23,8 +23,8 @@ def SDTIntShiftDOp: SDTypeProfile<1, 3, def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>; -def SDTX86Cmpsd : SDTypeProfile<1, 3, [SDTCisVT<0, f64>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; -def SDTX86Cmpss : SDTypeProfile<1, 3, [SDTCisVT<0, f32>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; +def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; +//def SDTX86Cmpss : SDTypeProfile<1, 3, [SDTCisVT<0, f32>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; def SDTX86Cmov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, @@ -510,6 +510,10 @@ def GR32orGR64 : RegisterOperand { let ParserMatchClass = X86GR32orGR64AsmOperand; } +def AVX512RC : Operand { + let PrintMethod = "printRoundingControl"; + let OperandType = "OPERAND_IMMEDIATE"; +} // Sign-extended immediate classes. We don't need to define the full lattice // here because there is no instruction with an ambiguity between ImmSExti64i32 // and ImmSExti32i8. @@ -657,7 +661,8 @@ def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasAVX2 : Predicate<"Subtarget->hasAVX2()">; def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">; -def HasAVX512 : Predicate<"Subtarget->hasAVX512()">; +def HasAVX512 : Predicate<"Subtarget->hasAVX512()">, + AssemblerPredicate<"FeatureAVX512", "AVX-512 ISA">; def UseAVX : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">; def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">; def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">; @@ -691,8 +696,8 @@ def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; -def In32BitMode : Predicate<"!Subtarget->is64Bit()">, - AssemblerPredicate<"!Mode64Bit", "32-bit mode">; +def Not64BitMode : Predicate<"!Subtarget->is64Bit()">, + AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">; def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate<"Mode64Bit", "64-bit mode">; def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; @@ -860,7 +865,7 @@ let SchedRW = [WriteALU] in { let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in def LEAVE : I<0xC9, RawFrm, (outs), (ins), "leave", [], IIC_LEAVE>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in def LEAVE64 : I<0xC9, RawFrm, @@ -889,7 +894,7 @@ def POP32rmm: I<0x8F, MRM0m, (outs), (ins i32mem:$dst), "pop{l}\t$dst", [], def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>, OpSize; def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; } // mayLoad, SchedRW let mayStore = 1, SchedRW = [WriteStore] in { @@ -917,7 +922,7 @@ def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>, OpSize; def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", [], IIC_PUSH_F>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; } // mayStore, SchedRW } @@ -961,12 +966,12 @@ def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>, let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP], mayLoad = 1, neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", [], IIC_POP_A>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; } let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in { def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", [], IIC_PUSH_A>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; } let Constraints = "$src = $dst", SchedRW = [WriteALU] in { @@ -1125,24 +1130,24 @@ let SchedRW = [WriteALU] in { let mayLoad = 1 in { def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src), "mov{b}\t{$src, %al|al, $src}", [], IIC_MOV_MEM>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src), "mov{w}\t{$src, %ax|ax, $src}", [], IIC_MOV_MEM>, OpSize, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src), "mov{l}\t{$src, %eax|eax, $src}", [], IIC_MOV_MEM>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; } let mayStore = 1 in { def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins), "mov{b}\t{%al, $dst|$dst, al}", [], IIC_MOV_MEM>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins), "mov{w}\t{%ax, $dst|$dst, ax}", [], IIC_MOV_MEM>, OpSize, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), "mov{l}\t{%eax, $dst|$dst, eax}", [], IIC_MOV_MEM>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; } } @@ -1513,7 +1518,7 @@ def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src), "xchg{w}\t{$src, %ax|ax, $src}", [], IIC_XCHG_REG>, OpSize; def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src), "xchg{l}\t{$src, %eax|eax, $src}", [], IIC_XCHG_REG>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; // Uses GR32_NOAX in 64-bit mode to prevent encoding using the 0x90 NOP encoding. // xchg %eax, %eax needs to clear upper 32-bits of RAX so is not a NOP. def XCHG32ar64 : I<0x90, AddRegFrm, (outs), (ins GR32_NOAX:$src), @@ -1594,7 +1599,8 @@ def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), def LOCK_PREFIX : I<0xF0, RawFrm, (outs), (ins), "lock", []>; // Rex64 instruction prefix -def REX64_PREFIX : I<0x48, RawFrm, (outs), (ins), "rex64", []>; +def REX64_PREFIX : I<0x48, RawFrm, (outs), (ins), "rex64", []>, + Requires<[In64BitMode]>; // Data16 instruction prefix def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", []>; @@ -1644,50 +1650,50 @@ let SchedRW = [WriteMicrocoded] in { // ASCII Adjust After Addition // sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", [], IIC_AAA>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; // ASCII Adjust AX Before Division // sets AL, AH and EFLAGS and uses AL and AH def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src), - "aad\t$src", [], IIC_AAD>, Requires<[In32BitMode]>; + "aad\t$src", [], IIC_AAD>, Requires<[Not64BitMode]>; // ASCII Adjust AX After Multiply // sets AL, AH and EFLAGS and uses AL def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src), - "aam\t$src", [], IIC_AAM>, Requires<[In32BitMode]>; + "aam\t$src", [], IIC_AAM>, Requires<[Not64BitMode]>; // ASCII Adjust AL After Subtraction - sets // sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", [], IIC_AAS>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; // Decimal Adjust AL after Addition // sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS def DAA : I<0x27, RawFrm, (outs), (ins), "daa", [], IIC_DAA>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; // Decimal Adjust AL after Subtraction // sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS def DAS : I<0x2F, RawFrm, (outs), (ins), "das", [], IIC_DAS>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; } // SchedRW let SchedRW = [WriteSystem] in { // Check Array Index Against Bounds def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>, OpSize, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; // Adjust RPL Field of Segment Selector def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_REG>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; } // SchedRW //===----------------------------------------------------------------------===// @@ -2088,7 +2094,7 @@ include "X86InstrCompiler.td" // Assembler Mnemonic Aliases //===----------------------------------------------------------------------===// -def : MnemonicAlias<"call", "calll", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"call", "calll", "att">, Requires<[Not64BitMode]>; def : MnemonicAlias<"call", "callq", "att">, Requires<[In64BitMode]>; def : MnemonicAlias<"cbw", "cbtw", "att">; @@ -2101,35 +2107,35 @@ def : MnemonicAlias<"cqo", "cqto", "att">; // lret maps to lretl, it is not ambiguous with lretq. def : MnemonicAlias<"lret", "lretl", "att">; -def : MnemonicAlias<"leavel", "leave", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"leavel", "leave", "att">, Requires<[Not64BitMode]>; def : MnemonicAlias<"leaveq", "leave", "att">, Requires<[In64BitMode]>; def : MnemonicAlias<"loopz", "loope", "att">; def : MnemonicAlias<"loopnz", "loopne", "att">; -def : MnemonicAlias<"pop", "popl", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"pop", "popl", "att">, Requires<[Not64BitMode]>; def : MnemonicAlias<"pop", "popq", "att">, Requires<[In64BitMode]>; -def : MnemonicAlias<"popf", "popfl", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"popf", "popfl", "att">, Requires<[Not64BitMode]>; def : MnemonicAlias<"popf", "popfq", "att">, Requires<[In64BitMode]>; def : MnemonicAlias<"popfd", "popfl", "att">; // FIXME: This is wrong for "push reg". "push %bx" should turn into pushw in // all modes. However: "push (addr)" and "push $42" should default to // pushl/pushq depending on the current mode. Similar for "pop %bx" -def : MnemonicAlias<"push", "pushl", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"push", "pushl", "att">, Requires<[Not64BitMode]>; def : MnemonicAlias<"push", "pushq", "att">, Requires<[In64BitMode]>; -def : MnemonicAlias<"pushf", "pushfl", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"pushf", "pushfl", "att">, Requires<[Not64BitMode]>; def : MnemonicAlias<"pushf", "pushfq", "att">, Requires<[In64BitMode]>; def : MnemonicAlias<"pushfd", "pushfl", "att">; -def : MnemonicAlias<"popad", "popa", "intel">, Requires<[In32BitMode]>; -def : MnemonicAlias<"pushad", "pusha", "intel">, Requires<[In32BitMode]>; +def : MnemonicAlias<"popad", "popa", "intel">, Requires<[Not64BitMode]>; +def : MnemonicAlias<"pushad", "pusha", "intel">, Requires<[Not64BitMode]>; def : MnemonicAlias<"repe", "rep", "att">; def : MnemonicAlias<"repz", "rep", "att">; def : MnemonicAlias<"repnz", "repne", "att">; -def : MnemonicAlias<"retl", "ret", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"retl", "ret", "att">, Requires<[Not64BitMode]>; def : MnemonicAlias<"retq", "ret", "att">, Requires<[In64BitMode]>; def : MnemonicAlias<"salb", "shlb", "att">; @@ -2150,13 +2156,13 @@ def : MnemonicAlias<"iret", "iretl", "att">; def : MnemonicAlias<"sysret", "sysretl", "att">; def : MnemonicAlias<"sysexit", "sysexitl", "att">; -def : MnemonicAlias<"lgdtl", "lgdt", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"lgdtl", "lgdt", "att">, Requires<[Not64BitMode]>; def : MnemonicAlias<"lgdtq", "lgdt", "att">, Requires<[In64BitMode]>; -def : MnemonicAlias<"lidtl", "lidt", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"lidtl", "lidt", "att">, Requires<[Not64BitMode]>; def : MnemonicAlias<"lidtq", "lidt", "att">, Requires<[In64BitMode]>; -def : MnemonicAlias<"sgdtl", "sgdt", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"sgdtl", "sgdt", "att">, Requires<[Not64BitMode]>; def : MnemonicAlias<"sgdtq", "sgdt", "att">, Requires<[In64BitMode]>; -def : MnemonicAlias<"sidtl", "sidt", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"sidtl", "sidt", "att">, Requires<[Not64BitMode]>; def : MnemonicAlias<"sidtq", "sidt", "att">, Requires<[In64BitMode]>; @@ -2462,6 +2468,6 @@ def : InstAlias<"xchg{q}\t{$mem, $val|$val, $mem}", (XCHG64rm GR64:$val, i64mem: // xchg: We accept "xchgX , %eax" and "xchgX %eax, " as synonyms. def : InstAlias<"xchg{w}\t{%ax, $src|$src, ax}", (XCHG16ar GR16:$src)>; -def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}", (XCHG32ar GR32:$src)>, Requires<[In32BitMode]>; +def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}", (XCHG32ar GR32:$src)>, Requires<[Not64BitMode]>; def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}", (XCHG32ar64 GR32_NOAX:$src)>, Requires<[In64BitMode]>; def : InstAlias<"xchg{q}\t{%rax, $src|$src, rax}", (XCHG64ar GR64:$src)>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrSSE.td b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrSSE.td index a5debc025690..40504afe8053 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrSSE.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrSSE.td @@ -210,6 +210,7 @@ multiclass sse12_fp_scalar_int opc, string OpcodeStr, RegisterClass RC, Operand memopr, ComplexPattern mem_cpat, OpndItins itins, bit Is2Addr = 1> { +let isCodeGenOnly = 1 in { def rr_Int : SI opc, string OpcodeStr, RegisterClass RC, RC:$src1, mem_cpat:$src2))], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } +} /// sse12_fp_packed - SSE 1 & 2 packed instructions class multiclass sse12_fp_packed opc, string OpcodeStr, SDNode OpNode, @@ -1632,40 +1634,43 @@ defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, REX_W; -let Predicates = [UseAVX] in { -defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}", - SSE_CVT_Scalar, 0>, XS, VEX_4V; -defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}", - SSE_CVT_Scalar, 0>, XS, VEX_4V, - VEX_W; -defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}", - SSE_CVT_Scalar, 0>, XD, VEX_4V; -defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", - SSE_CVT_Scalar, 0>, XD, - VEX_4V, VEX_W; -} -let Constraints = "$src1 = $dst" in { - defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - int_x86_sse_cvtsi2ss, i32mem, loadi32, - "cvtsi2ss{l}", SSE_CVT_Scalar>, XS; - defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - int_x86_sse_cvtsi642ss, i64mem, loadi64, - "cvtsi2ss{q}", SSE_CVT_Scalar>, XS, REX_W; - defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - int_x86_sse2_cvtsi2sd, i32mem, loadi32, - "cvtsi2sd{l}", SSE_CVT_Scalar>, XD; - defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - int_x86_sse2_cvtsi642sd, i64mem, loadi64, - "cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W; -} +let isCodeGenOnly = 1 in { + let Predicates = [UseAVX] in { + defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}", + SSE_CVT_Scalar, 0>, XS, VEX_4V; + defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}", + SSE_CVT_Scalar, 0>, XS, VEX_4V, + VEX_W; + defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}", + SSE_CVT_Scalar, 0>, XD, VEX_4V; + defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", + SSE_CVT_Scalar, 0>, XD, + VEX_4V, VEX_W; + } + let Constraints = "$src1 = $dst" in { + defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + int_x86_sse_cvtsi2ss, i32mem, loadi32, + "cvtsi2ss{l}", SSE_CVT_Scalar>, XS; + defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + int_x86_sse_cvtsi642ss, i64mem, loadi64, + "cvtsi2ss{q}", SSE_CVT_Scalar>, XS, REX_W; + defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + int_x86_sse2_cvtsi2sd, i32mem, loadi32, + "cvtsi2sd{l}", SSE_CVT_Scalar>, XD; + defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + int_x86_sse2_cvtsi642sd, i64mem, loadi64, + "cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W; + } +} // isCodeGenOnly = 1 /// SSE 1 Only // Aliases for intrinsics +let isCodeGenOnly = 1 in { let Predicates = [UseAVX] in { defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, ssmem, sse_load_f32, "cvttss2si", @@ -1694,6 +1699,7 @@ defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, "cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W; +} // isCodeGenOnly = 1 let Predicates = [UseAVX] in { defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, @@ -1792,6 +1798,7 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), XD, Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; +let isCodeGenOnly = 1 in { def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1823,6 +1830,7 @@ def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>; } +} // isCodeGenOnly = 1 // Convert scalar single to scalar double // SSE2 instructions with XS prefix @@ -1875,6 +1883,7 @@ def : Pat<(fextend (loadf32 addr:$src)), def : Pat<(extloadf32 addr:$src), (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>; +let isCodeGenOnly = 1 in { def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1905,6 +1914,7 @@ def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>; } +} // isCodeGenOnly = 1 // Convert packed single/double fp to doubleword def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -2299,23 +2309,23 @@ multiclass sse12_cmp_scalar, XS, VEX_4V, VEX_LIG; -defm VCMPSD : sse12_cmp_scalar, // same latency as 32 bit compare XD, VEX_4V, VEX_LIG; let Constraints = "$src1 = $dst" in { - defm CMPSS : sse12_cmp_scalar, XS; - defm CMPSD : sse12_cmp_scalar, @@ -2338,23 +2348,25 @@ multiclass sse12_cmp_scalar_int; } -// Aliases to match intrinsics which expect XMM operand(s). -defm Int_VCMPSS : sse12_cmp_scalar_int, - XS, VEX_4V; -defm Int_VCMPSD : sse12_cmp_scalar_int, // same latency as f32 - XD, VEX_4V; -let Constraints = "$src1 = $dst" in { - defm Int_CMPSS : sse12_cmp_scalar_int, XS; - defm Int_CMPSD : sse12_cmp_scalar_int, - XD; +let isCodeGenOnly = 1 in { + // Aliases to match intrinsics which expect XMM operand(s). + defm Int_VCMPSS : sse12_cmp_scalar_int, + XS, VEX_4V; + defm Int_VCMPSD : sse12_cmp_scalar_int, // same latency as f32 + XD, VEX_4V; + let Constraints = "$src1 = $dst" in { + defm Int_CMPSS : sse12_cmp_scalar_int, XS; + defm Int_CMPSD : sse12_cmp_scalar_int, + XD; +} } @@ -2387,15 +2399,17 @@ let Defs = [EFLAGS] in { "comisd">, TB, OpSize, VEX, VEX_LIG; } - defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, - load, "ucomiss">, TB, VEX; - defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, - load, "ucomisd">, TB, OpSize, VEX; + let isCodeGenOnly = 1 in { + defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, + load, "ucomiss">, TB, VEX; + defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, + load, "ucomisd">, TB, OpSize, VEX; - defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, - load, "comiss">, TB, VEX; - defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, - load, "comisd">, TB, OpSize, VEX; + defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, + load, "comiss">, TB, VEX; + defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, + load, "comisd">, TB, OpSize, VEX; + } defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, "ucomiss">, TB; defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, @@ -2408,15 +2422,17 @@ let Defs = [EFLAGS] in { "comisd">, TB, OpSize; } - defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, - load, "ucomiss">, TB; - defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, - load, "ucomisd">, TB, OpSize; + let isCodeGenOnly = 1 in { + defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, + load, "ucomiss">, TB; + defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, + load, "ucomisd">, TB, OpSize; - defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load, - "comiss">, TB; - defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load, - "comisd">, TB, OpSize; + defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load, + "comiss">, TB; + defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load, + "comisd">, TB, OpSize; + } } // Defs = [EFLAGS] // sse12_cmp_packed - sse 1 & 2 compare packed instructions @@ -3017,6 +3033,214 @@ let isCodeGenOnly = 1 in { basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>; } +// Patterns used to select SSE scalar fp arithmetic instructions from +// a scalar fp operation followed by a blend. +// +// These patterns know, for example, how to select an ADDSS from a +// float add plus vector insert. +// +// The effect is that the backend no longer emits unnecessary vector +// insert instructions immediately after SSE scalar fp instructions +// like addss or mulss. +// +// For example, given the following code: +// __m128 foo(__m128 A, __m128 B) { +// A[0] += B[0]; +// return A; +// } +// +// previously we generated: +// addss %xmm0, %xmm1 +// movss %xmm1, %xmm0 +// +// we now generate: +// addss %xmm1, %xmm0 + +def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fadd + (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), + FR32:$src))))), + (ADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; +def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fsub + (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), + FR32:$src))))), + (SUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; +def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fmul + (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), + FR32:$src))))), + (MULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; +def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fdiv + (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), + FR32:$src))))), + (DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; + +let Predicates = [HasSSE2] in { + // SSE2 patterns to select scalar double-precision fp arithmetic instructions + + def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd + (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), + FR64:$src))))), + (ADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; + def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fsub + (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), + FR64:$src))))), + (SUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; + def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fmul + (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), + FR64:$src))))), + (MULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; + def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fdiv + (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), + FR64:$src))))), + (DIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; +} + +let Predicates = [UseSSE41] in { + // If the subtarget has SSE4.1 but not AVX, the vector insert + // instruction is lowered into a X86insrtps rather than a X86Movss. + // When selecting SSE scalar single-precision fp arithmetic instructions, + // make sure that we correctly match the X86insrtps. + + def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector + (fadd (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), + FR32:$src))), (iPTR 0))), + (ADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; + def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector + (fsub (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), + FR32:$src))), (iPTR 0))), + (SUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; + def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector + (fmul (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), + FR32:$src))), (iPTR 0))), + (MULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; + def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector + (fdiv (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), + FR32:$src))), (iPTR 0))), + (DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; +} + +let AddedComplexity = 20, Predicates = [HasAVX] in { + // The following patterns select AVX Scalar single/double precision fp + // arithmetic instructions. + // The 'AddedComplexity' is required to give them higher priority over + // the equivalent SSE/SSE2 patterns. + + def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd + (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), + FR64:$src))))), + (VADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; + def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fsub + (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), + FR64:$src))))), + (VSUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; + def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fmul + (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), + FR64:$src))))), + (VMULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; + def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fdiv + (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), + FR64:$src))))), + (VDIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; + def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector + (fadd (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), + FR32:$src))), (iPTR 0))), + (VADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; + def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector + (fsub (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), + FR32:$src))), (iPTR 0))), + (VSUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; + def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector + (fmul (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), + FR32:$src))), (iPTR 0))), + (VMULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; + def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector + (fdiv (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), + FR32:$src))), (iPTR 0))), + (VDIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; +} + +// Patterns used to select SSE scalar fp arithmetic instructions from +// a vector packed single/double fp operation followed by a vector insert. +// +// The effect is that the backend converts the packed fp instruction +// followed by a vector insert into a single SSE scalar fp instruction. +// +// For example, given the following code: +// __m128 foo(__m128 A, __m128 B) { +// __m128 C = A + B; +// return (__m128) {c[0], a[1], a[2], a[3]}; +// } +// +// previously we generated: +// addps %xmm0, %xmm1 +// movss %xmm1, %xmm0 +// +// we now generate: +// addss %xmm1, %xmm0 + +def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), + (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)))), + (ADDSSrr_Int v4f32:$dst, v4f32:$src)>; +def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), + (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)))), + (SUBSSrr_Int v4f32:$dst, v4f32:$src)>; +def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), + (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)))), + (MULSSrr_Int v4f32:$dst, v4f32:$src)>; +def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), + (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)))), + (DIVSSrr_Int v4f32:$dst, v4f32:$src)>; + +let Predicates = [HasSSE2] in { + // SSE2 patterns to select scalar double-precision fp arithmetic instructions + // from a packed double-precision fp instruction plus movsd. + + def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), + (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)))), + (ADDSDrr_Int v2f64:$dst, v2f64:$src)>; + def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), + (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)))), + (SUBSDrr_Int v2f64:$dst, v2f64:$src)>; + def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), + (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)))), + (MULSDrr_Int v2f64:$dst, v2f64:$src)>; + def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), + (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)))), + (DIVSDrr_Int v2f64:$dst, v2f64:$src)>; +} + +let AddedComplexity = 20, Predicates = [HasAVX] in { + // The following patterns select AVX Scalar single/double precision fp + // arithmetic instructions from a packed single precision fp instruction + // plus movss/movsd. + // The 'AddedComplexity' is required to give them higher priority over + // the equivalent SSE/SSE2 patterns. + + def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), + (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)))), + (VADDSSrr_Int v4f32:$dst, v4f32:$src)>; + def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), + (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)))), + (VSUBSSrr_Int v4f32:$dst, v4f32:$src)>; + def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), + (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)))), + (VMULSSrr_Int v4f32:$dst, v4f32:$src)>; + def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), + (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)))), + (VDIVSSrr_Int v4f32:$dst, v4f32:$src)>; + def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), + (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)))), + (VADDSDrr_Int v2f64:$dst, v2f64:$src)>; + def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), + (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)))), + (VSUBSDrr_Int v2f64:$dst, v2f64:$src)>; + def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), + (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)))), + (VMULSDrr_Int v2f64:$dst, v2f64:$src)>; + def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), + (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)))), + (VDIVSDrr_Int v2f64:$dst, v2f64:$src)>; +} + /// Unop Arithmetic /// In addition, we also have a special variant of the scalar form here to /// represent the associated intrinsic operation. This form is unlike the @@ -3069,6 +3293,7 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, VEX_4V, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>; + let isCodeGenOnly = 1 in def V#NAME#SSm_Int : SSI, XS, Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; +let isCodeGenOnly = 1 in { def SSr_Int : SSI, @@ -3098,6 +3324,7 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>, Sched<[itins.Sched.Folded]>; } +} /// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. multiclass sse1_fp_unop_rw opc, string OpcodeStr, SDNode OpNode, @@ -3115,6 +3342,7 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, VEX_4V, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>; + let isCodeGenOnly = 1 in def V#NAME#SSm_Int : SSI, XS, Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; - let Constraints = "$src1 = $dst" in { + let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in { def SSr_Int : SSI opc, string OpcodeStr, Intrinsic V4F32Int, Intrinsic V8F32Int, OpndItins itins> { +let isCodeGenOnly = 1 in { let Predicates = [HasAVX] in { def V#NAME#PSr_Int : PSI, Sched<[itins.Sched.Folded]>; +} // isCodeGenOnly = 1 } /// sse2_fp_unop_s - SSE2 unops in scalar form. @@ -3238,6 +3468,7 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, VEX_4V, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>; + let isCodeGenOnly = 1 in def V#NAME#SDm_Int : SDI, XD, Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>; +let isCodeGenOnly = 1 in { def SDr_Int : SDI, @@ -3265,6 +3497,7 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>, Sched<[itins.Sched.Folded]>; } +} /// sse2_fp_unop_p - SSE2 unops in vector forms. multiclass sse2_fp_unop_p opc, string OpcodeStr, @@ -3499,7 +3732,9 @@ def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), // Pause. This "instruction" is encoded as "rep; nop", so even though it // was introduced with SSE2, it's backward compatible. -def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", [], IIC_SSE_PAUSE>, REP; +def PAUSE : I<0x90, RawFrm, (outs), (ins), + "pause", [(int_x86_sse2_pause)], IIC_SSE_PAUSE>, + REP, Requires<[HasSSE2]>; // Load, store, and memory fence def SFENCE : I<0xAE, MRM_F8, (outs), (ins), @@ -4320,7 +4555,7 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src), let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in { -let Uses = [EDI], Predicates = [HasAVX,In32BitMode] in +let Uses = [EDI], Predicates = [HasAVX,Not64BitMode] in def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), "maskmovdqu\t{$mask, $src|$src, $mask}", @@ -4333,7 +4568,7 @@ def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)], IIC_SSE_MASKMOV>, VEX; -let Uses = [EDI], Predicates = [UseSSE2,In32BitMode] in +let Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), "maskmovdqu\t{$mask, $src|$src, $mask}", [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)], @@ -5426,11 +5661,11 @@ def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", TB, Requires<[HasSSE3]>; } // SchedRW -def : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[In32BitMode]>; +def : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[Not64BitMode]>; def : InstAlias<"mwait\t{%rax, %rcx|rcx, rax}", (MWAITrr)>, Requires<[In64BitMode]>; def : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORrrr)>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORrrr)>, Requires<[In64BitMode]>; @@ -6311,6 +6546,7 @@ let ExeDomain = GenericDomain in { []>, OpSize; // Intrinsic operation, reg. + let isCodeGenOnly = 1 in def SSr_Int : SS4AIi8, OpSize; // Intrinsic operation, reg. + let isCodeGenOnly = 1 in def SDr_Int : SS4AIi8, TB; // 0F 01 D8 let Uses = [EAX] in def VMRUN32 : I<0x01, MRM_D8, (outs), (ins), - "vmrun\t{%eax|eax}", []>, TB, Requires<[In32BitMode]>; + "vmrun\t{%eax|eax}", []>, TB, Requires<[Not64BitMode]>; let Uses = [RAX] in def VMRUN64 : I<0x01, MRM_D8, (outs), (ins), "vmrun\t{%rax|rax}", []>, TB, Requires<[In64BitMode]>; @@ -39,7 +39,7 @@ def VMRUN64 : I<0x01, MRM_D8, (outs), (ins), // 0F 01 DA let Uses = [EAX] in def VMLOAD32 : I<0x01, MRM_DA, (outs), (ins), - "vmload\t{%eax|eax}", []>, TB, Requires<[In32BitMode]>; + "vmload\t{%eax|eax}", []>, TB, Requires<[Not64BitMode]>; let Uses = [RAX] in def VMLOAD64 : I<0x01, MRM_DA, (outs), (ins), "vmload\t{%rax|rax}", []>, TB, Requires<[In64BitMode]>; @@ -47,7 +47,7 @@ def VMLOAD64 : I<0x01, MRM_DA, (outs), (ins), // 0F 01 DB let Uses = [EAX] in def VMSAVE32 : I<0x01, MRM_DB, (outs), (ins), - "vmsave\t{%eax|eax}", []>, TB, Requires<[In32BitMode]>; + "vmsave\t{%eax|eax}", []>, TB, Requires<[Not64BitMode]>; let Uses = [RAX] in def VMSAVE64 : I<0x01, MRM_DB, (outs), (ins), "vmsave\t{%rax|rax}", []>, TB, Requires<[In64BitMode]>; @@ -55,7 +55,7 @@ def VMSAVE64 : I<0x01, MRM_DB, (outs), (ins), // 0F 01 DF let Uses = [EAX, ECX] in def INVLPGA32 : I<0x01, MRM_DF, (outs), (ins), - "invlpga\t{%ecx, %eax|eax, ecx}", []>, TB, Requires<[In32BitMode]>; + "invlpga\t{%ecx, %eax|eax, ecx}", []>, TB, Requires<[Not64BitMode]>; let Uses = [RAX, ECX] in def INVLPGA64 : I<0x01, MRM_DF, (outs), (ins), "invlpga\t{%ecx, %rax|rax, ecx}", []>, TB, Requires<[In64BitMode]>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrSystem.td b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrSystem.td index 2196dc32e7a2..dced6410de6e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrSystem.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrSystem.td @@ -248,75 +248,75 @@ def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), "ltr{w}\t$src", [], IIC_LTR>, TB; def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins), - "push{w}\t{%cs|cs}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>, + "push{w}\t{%cs|cs}", [], IIC_PUSH_SR>, Requires<[Not64BitMode]>, OpSize; def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins), - "push{l}\t{%cs|cs}", [], IIC_PUSH_CS>, Requires<[In32BitMode]>; + "push{l}\t{%cs|cs}", [], IIC_PUSH_CS>, Requires<[Not64BitMode]>; def PUSHSS16 : I<0x16, RawFrm, (outs), (ins), - "push{w}\t{%ss|ss}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>, + "push{w}\t{%ss|ss}", [], IIC_PUSH_SR>, Requires<[Not64BitMode]>, OpSize; def PUSHSS32 : I<0x16, RawFrm, (outs), (ins), - "push{l}\t{%ss|ss}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>; + "push{l}\t{%ss|ss}", [], IIC_PUSH_SR>, Requires<[Not64BitMode]>; def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins), - "push{w}\t{%ds|ds}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>, + "push{w}\t{%ds|ds}", [], IIC_PUSH_SR>, Requires<[Not64BitMode]>, OpSize; def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins), - "push{l}\t{%ds|ds}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>; + "push{l}\t{%ds|ds}", [], IIC_PUSH_SR>, Requires<[Not64BitMode]>; def PUSHES16 : I<0x06, RawFrm, (outs), (ins), - "push{w}\t{%es|es}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>, + "push{w}\t{%es|es}", [], IIC_PUSH_SR>, Requires<[Not64BitMode]>, OpSize; def PUSHES32 : I<0x06, RawFrm, (outs), (ins), - "push{l}\t{%es|es}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>; + "push{l}\t{%es|es}", [], IIC_PUSH_SR>, Requires<[Not64BitMode]>; def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins), "push{w}\t{%fs|fs}", [], IIC_PUSH_SR>, OpSize, TB; def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins), - "push{l}\t{%fs|fs}", [], IIC_PUSH_SR>, TB, Requires<[In32BitMode]>; + "push{l}\t{%fs|fs}", [], IIC_PUSH_SR>, TB, Requires<[Not64BitMode]>; def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins), "push{w}\t{%gs|gs}", [], IIC_PUSH_SR>, OpSize, TB; def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins), - "push{l}\t{%gs|gs}", [], IIC_PUSH_SR>, TB, Requires<[In32BitMode]>; + "push{l}\t{%gs|gs}", [], IIC_PUSH_SR>, TB, Requires<[Not64BitMode]>; def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins), - "push{q}\t{%fs|fs}", [], IIC_PUSH_SR>, TB; + "push{q}\t{%fs|fs}", [], IIC_PUSH_SR>, TB, Requires<[In64BitMode]>; def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins), - "push{q}\t{%gs|gs}", [], IIC_PUSH_SR>, TB; + "push{q}\t{%gs|gs}", [], IIC_PUSH_SR>, TB, Requires<[In64BitMode]>; // No "pop cs" instruction. def POPSS16 : I<0x17, RawFrm, (outs), (ins), "pop{w}\t{%ss|ss}", [], IIC_POP_SR_SS>, - OpSize, Requires<[In32BitMode]>; + OpSize, Requires<[Not64BitMode]>; def POPSS32 : I<0x17, RawFrm, (outs), (ins), "pop{l}\t{%ss|ss}", [], IIC_POP_SR_SS>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def POPDS16 : I<0x1F, RawFrm, (outs), (ins), "pop{w}\t{%ds|ds}", [], IIC_POP_SR>, - OpSize, Requires<[In32BitMode]>; + OpSize, Requires<[Not64BitMode]>; def POPDS32 : I<0x1F, RawFrm, (outs), (ins), "pop{l}\t{%ds|ds}", [], IIC_POP_SR>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def POPES16 : I<0x07, RawFrm, (outs), (ins), "pop{w}\t{%es|es}", [], IIC_POP_SR>, - OpSize, Requires<[In32BitMode]>; + OpSize, Requires<[Not64BitMode]>; def POPES32 : I<0x07, RawFrm, (outs), (ins), "pop{l}\t{%es|es}", [], IIC_POP_SR>, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def POPFS16 : I<0xa1, RawFrm, (outs), (ins), "pop{w}\t{%fs|fs}", [], IIC_POP_SR>, OpSize, TB; def POPFS32 : I<0xa1, RawFrm, (outs), (ins), - "pop{l}\t{%fs|fs}", [], IIC_POP_SR>, TB, Requires<[In32BitMode]>; + "pop{l}\t{%fs|fs}", [], IIC_POP_SR>, TB, Requires<[Not64BitMode]>; def POPFS64 : I<0xa1, RawFrm, (outs), (ins), - "pop{q}\t{%fs|fs}", [], IIC_POP_SR>, TB; + "pop{q}\t{%fs|fs}", [], IIC_POP_SR>, TB, Requires<[In64BitMode]>; def POPGS16 : I<0xa9, RawFrm, (outs), (ins), "pop{w}\t{%gs|gs}", [], IIC_POP_SR>, OpSize, TB; def POPGS32 : I<0xa9, RawFrm, (outs), (ins), - "pop{l}\t{%gs|gs}", [], IIC_POP_SR>, TB, Requires<[In32BitMode]>; + "pop{l}\t{%gs|gs}", [], IIC_POP_SR>, TB, Requires<[Not64BitMode]>; def POPGS64 : I<0xa9, RawFrm, (outs), (ins), - "pop{q}\t{%gs|gs}", [], IIC_POP_SR>, TB; + "pop{q}\t{%gs|gs}", [], IIC_POP_SR>, TB, Requires<[In64BitMode]>; def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), @@ -367,11 +367,11 @@ def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), let SchedRW = [WriteSystem] in { def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), - "sgdt{w}\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>; + "sgdt{w}\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[Not64BitMode]>; def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), "sgdt\t$dst", [], IIC_SGDT>, TB; def SIDT16m : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins), - "sidt{w}\t$dst", [], IIC_SIDT>, TB, OpSize, Requires<[In32BitMode]>; + "sidt{w}\t$dst", [], IIC_SIDT>, TB, OpSize, Requires<[Not64BitMode]>; def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins), "sidt\t$dst", []>, TB; def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins), @@ -389,11 +389,11 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins), "sldt{q}\t$dst", [], IIC_SLDT>, TB; def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src), - "lgdt{w}\t$src", [], IIC_LGDT>, TB, OpSize, Requires<[In32BitMode]>; + "lgdt{w}\t$src", [], IIC_LGDT>, TB, OpSize, Requires<[Not64BitMode]>; def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src), "lgdt\t$src", [], IIC_LGDT>, TB; def LIDT16m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src), - "lidt{w}\t$src", [], IIC_LIDT>, TB, OpSize, Requires<[In32BitMode]>; + "lidt{w}\t$src", [], IIC_LIDT>, TB, OpSize, Requires<[Not64BitMode]>; def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src), "lidt\t$src", [], IIC_LIDT>, TB; def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src), @@ -516,7 +516,7 @@ let Predicates = [HasFSGSBase, In64BitMode] in { // INVPCID Instruction def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), "invpcid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invpcid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In64BitMode]>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrVMX.td b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrVMX.td index 6d3548f09398..5bf46d146512 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrVMX.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86InstrVMX.td @@ -18,14 +18,14 @@ // 66 0F 38 80 def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In64BitMode]>; // 66 0F 38 81 def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, - Requires<[In32BitMode]>; + Requires<[Not64BitMode]>; def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In64BitMode]>; @@ -48,17 +48,17 @@ def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src), def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>; def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src), - "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In32BitMode]>; + "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[Not64BitMode]>; def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), - "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In32BitMode]>; + "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[Not64BitMode]>; def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>; def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>; def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), - "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In32BitMode]>; + "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[Not64BitMode]>; def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In32BitMode]>; + "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[Not64BitMode]>; // 0F 01 C4 def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB; def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon), diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86MCInstLower.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86MCInstLower.cpp index 6649c825b6c9..c700dd2d545e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -70,42 +70,53 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { /// operand to an MCSymbol. MCSymbol *X86MCInstLower:: GetSymbolFromOperand(const MachineOperand &MO) const { + const DataLayout *DL = TM.getDataLayout(); assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference"); SmallString<128> Name; + StringRef Suffix; + + switch (MO.getTargetFlags()) { + case X86II::MO_DLLIMPORT: + // Handle dllimport linkage. + Name += "__imp_"; + break; + case X86II::MO_DARWIN_STUB: + Suffix = "$stub"; + break; + case X86II::MO_DARWIN_NONLAZY: + case X86II::MO_DARWIN_NONLAZY_PIC_BASE: + case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: + Suffix = "$non_lazy_ptr"; + break; + } + + if (!Suffix.empty()) + Name += DL->getPrivateGlobalPrefix(); + + unsigned PrefixLen = Name.size(); if (MO.isGlobal()) { const GlobalValue *GV = MO.getGlobal(); - bool isImplicitlyPrivate = false; - if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB || - MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY || - MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE || - MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE) - isImplicitlyPrivate = true; - - getMang()->getNameWithPrefix(Name, GV, isImplicitlyPrivate); + getMang()->getNameWithPrefix(Name, GV); } else if (MO.isSymbol()) { - Name += MAI.getGlobalPrefix(); - Name += MO.getSymbolName(); + getMang()->getNameWithPrefix(Name, MO.getSymbolName()); } else if (MO.isMBB()) { Name += MO.getMBB()->getSymbol()->getName(); } + unsigned OrigLen = Name.size() - PrefixLen; + + Name += Suffix; + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); + + StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen); // If the target flags on the operand changes the name of the symbol, do that // before we return the symbol. switch (MO.getTargetFlags()) { default: break; - case X86II::MO_DLLIMPORT: { - // Handle dllimport linkage. - const char *Prefix = "__imp_"; - Name.insert(Name.begin(), Prefix, Prefix+strlen(Prefix)); - break; - } case X86II::MO_DARWIN_NONLAZY: case X86II::MO_DARWIN_NONLAZY_PIC_BASE: { - Name += "$non_lazy_ptr"; - MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); - MachineModuleInfoImpl::StubValueTy &StubSym = getMachOMMI().getGVStubEntry(Sym); if (StubSym.getPointer() == 0) { @@ -115,11 +126,9 @@ GetSymbolFromOperand(const MachineOperand &MO) const { StubValueTy(AsmPrinter.getSymbol(MO.getGlobal()), !MO.getGlobal()->hasInternalLinkage()); } - return Sym; + break; } case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: { - Name += "$non_lazy_ptr"; - MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = getMachOMMI().getHiddenGVStubEntry(Sym); if (StubSym.getPointer() == 0) { @@ -129,11 +138,9 @@ GetSymbolFromOperand(const MachineOperand &MO) const { StubValueTy(AsmPrinter.getSymbol(MO.getGlobal()), !MO.getGlobal()->hasInternalLinkage()); } - return Sym; + break; } case X86II::MO_DARWIN_STUB: { - Name += "$stub"; - MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = getMachOMMI().getFnStubEntry(Sym); if (StubSym.getPointer()) @@ -145,16 +152,15 @@ GetSymbolFromOperand(const MachineOperand &MO) const { StubValueTy(AsmPrinter.getSymbol(MO.getGlobal()), !MO.getGlobal()->hasInternalLinkage()); } else { - Name.erase(Name.end()-5, Name.end()); StubSym = MachineModuleInfoImpl:: - StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false); + StubValueTy(Ctx.GetOrCreateSymbol(OrigName), false); } - return Sym; + break; } } - return Ctx.GetOrCreateSymbol(Name.str()); + return Sym; } MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, @@ -227,13 +233,6 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, } -/// LowerUnaryToTwoAddr - R = setb -> R = sbb R, R -static void LowerUnaryToTwoAddr(MCInst &OutMI, unsigned NewOpc) { - OutMI.setOpcode(NewOpc); - OutMI.addOperand(OutMI.getOperand(0)); - OutMI.addOperand(OutMI.getOperand(0)); -} - /// \brief Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with /// a short fixed-register form. static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) { @@ -390,7 +389,6 @@ ReSimplify: assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 && "LEA has segment specified!"); break; - case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break; case X86::MOV32ri64: OutMI.setOpcode(X86::MOV32ri); @@ -675,111 +673,76 @@ static void LowerTlsAddr(MCStreamer &OutStreamer, .addExpr(tlsRef)); } -static std::pair -parseMemoryOperand(StackMaps::Location::LocationType LocTy, unsigned Size, - MachineInstr::const_mop_iterator MOI, - MachineInstr::const_mop_iterator MOE) { - - typedef StackMaps::Location Location; - - assert(std::distance(MOI, MOE) >= 5 && "Too few operands to encode mem op."); - - const MachineOperand &Base = *MOI; - const MachineOperand &Scale = *(++MOI); - const MachineOperand &Index = *(++MOI); - const MachineOperand &Disp = *(++MOI); - const MachineOperand &ZeroReg = *(++MOI); - - // Sanity check for supported operand format. - assert(Base.isReg() && - Scale.isImm() && Scale.getImm() == 1 && - Index.isReg() && Index.getReg() == 0 && - Disp.isImm() && ZeroReg.isReg() && (ZeroReg.getReg() == 0) && - "Unsupported x86 memory operand sequence."); - (void)Scale; - (void)Index; - (void)ZeroReg; - - return std::make_pair( - Location(LocTy, Size, Base.getReg(), Disp.getImm()), ++MOI); -} - -std::pair -X86AsmPrinter::stackmapOperandParser(MachineInstr::const_mop_iterator MOI, - MachineInstr::const_mop_iterator MOE, - const TargetMachine &TM) { - - typedef StackMaps::Location Location; - - const MachineOperand &MOP = *MOI; - assert(!MOP.isRegMask() && (!MOP.isReg() || !MOP.isImplicit()) && - "Register mask and implicit operands should not be processed."); - - if (MOP.isImm()) { - // Verify anyregcc - // [], , , , , , ... - - switch (MOP.getImm()) { - default: llvm_unreachable("Unrecognized operand type."); - case StackMaps::DirectMemRefOp: { - unsigned Size = TM.getDataLayout()->getPointerSizeInBits(); - assert((Size % 8) == 0 && "Need pointer size in bytes."); - Size /= 8; - return parseMemoryOperand(StackMaps::Location::Direct, Size, - llvm::next(MOI), MOE); +/// \brief Emit the optimal amount of multi-byte nops on X86. +static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit) { + // This works only for 64bit. For 32bit we have to do additional checking if + // the CPU supports multi-byte nops. + assert(Is64Bit && "EmitNops only supports X86-64"); + while (NumBytes) { + unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg; + Opc = IndexReg = Displacement = SegmentReg = 0; + BaseReg = X86::RAX; ScaleVal = 1; + switch (NumBytes) { + case 0: llvm_unreachable("Zero nops?"); break; + case 1: NumBytes -= 1; Opc = X86::NOOP; break; + case 2: NumBytes -= 2; Opc = X86::XCHG16ar; break; + case 3: NumBytes -= 3; Opc = X86::NOOPL; break; + case 4: NumBytes -= 4; Opc = X86::NOOPL; Displacement = 8; break; + case 5: NumBytes -= 5; Opc = X86::NOOPL; Displacement = 8; + IndexReg = X86::RAX; break; + case 6: NumBytes -= 6; Opc = X86::NOOPW; Displacement = 8; + IndexReg = X86::RAX; break; + case 7: NumBytes -= 7; Opc = X86::NOOPL; Displacement = 512; break; + case 8: NumBytes -= 8; Opc = X86::NOOPL; Displacement = 512; + IndexReg = X86::RAX; break; + case 9: NumBytes -= 9; Opc = X86::NOOPW; Displacement = 512; + IndexReg = X86::RAX; break; + default: NumBytes -= 10; Opc = X86::NOOPW; Displacement = 512; + IndexReg = X86::RAX; SegmentReg = X86::CS; break; } - case StackMaps::IndirectMemRefOp: { - ++MOI; - int64_t Size = MOI->getImm(); - assert(Size > 0 && "Need a valid size for indirect memory locations."); - return parseMemoryOperand(StackMaps::Location::Indirect, Size, - llvm::next(MOI), MOE); - } - case StackMaps::ConstantOp: { - ++MOI; - assert(MOI->isImm() && "Expected constant operand."); - int64_t Imm = MOI->getImm(); - return std::make_pair( - Location(Location::Constant, sizeof(int64_t), 0, Imm), ++MOI); - } - } - } - // Otherwise this is a reg operand. The physical register number will - // ultimately be encoded as a DWARF regno. The stack map also records the size - // of a spill slot that can hold the register content. (The runtime can - // track the actual size of the data type if it needs to.) - assert(MOP.isReg() && "Expected register operand here."); - assert(TargetRegisterInfo::isPhysicalRegister(MOP.getReg()) && - "Virtreg operands should have been rewritten before now."); - const TargetRegisterClass *RC = - TM.getRegisterInfo()->getMinimalPhysRegClass(MOP.getReg()); - assert(!MOP.getSubReg() && "Physical subreg still around."); - return std::make_pair( - Location(Location::Register, RC->getSize(), MOP.getReg(), 0), ++MOI); + unsigned NumPrefixes = std::min(NumBytes, 5U); + NumBytes -= NumPrefixes; + for (unsigned i = 0; i != NumPrefixes; ++i) + OS.EmitBytes("\x66"); + + switch (Opc) { + default: llvm_unreachable("Unexpected opcode"); break; + case X86::NOOP: + OS.EmitInstruction(MCInstBuilder(Opc)); + break; + case X86::XCHG16ar: + OS.EmitInstruction(MCInstBuilder(Opc).addReg(X86::AX)); + break; + case X86::NOOPL: + case X86::NOOPW: + OS.EmitInstruction(MCInstBuilder(Opc).addReg(BaseReg).addImm(ScaleVal) + .addReg(IndexReg) + .addImm(Displacement) + .addReg(SegmentReg)); + break; + } + } // while (NumBytes) } // Lower a stackmap of the form: // , , ... -static void LowerSTACKMAP(MCStreamer &OutStreamer, - StackMaps &SM, - const MachineInstr &MI) -{ - unsigned NumNOPBytes = MI.getOperand(1).getImm(); +static void LowerSTACKMAP(MCStreamer &OS, StackMaps &SM, + const MachineInstr &MI, bool Is64Bit) { + unsigned NumBytes = MI.getOperand(1).getImm(); SM.recordStackMap(MI); // Emit padding. // FIXME: These nops ensure that the stackmap's shadow is covered by // instructions from the same basic block, but the nops should not be // necessary if instructions from the same block follow the stackmap. - for (unsigned i = 0; i < NumNOPBytes; ++i) - OutStreamer.EmitInstruction(MCInstBuilder(X86::NOOP)); + EmitNops(OS, NumBytes, Is64Bit); } // Lower a patchpoint of the form: // [], , , , , , ... -static void LowerPATCHPOINT(MCStreamer &OutStreamer, - StackMaps &SM, - const MachineInstr &MI) { +static void LowerPATCHPOINT(MCStreamer &OS, StackMaps &SM, + const MachineInstr &MI, bool Is64Bit) { + assert(Is64Bit && "Patchpoint currently only supports X86-64"); SM.recordPatchPoint(MI); PatchPointOpers opers(&MI); @@ -789,22 +752,21 @@ static void LowerPATCHPOINT(MCStreamer &OutStreamer, if (CallTarget) { // Emit MOV to materialize the target address and the CALL to target. // This is encoded with 12-13 bytes, depending on which register is used. - // We conservatively assume that it is 12 bytes and emit in worst case one - // extra NOP byte. - EncodedBytes = 12; - OutStreamer.EmitInstruction(MCInstBuilder(X86::MOV64ri) - .addReg(MI.getOperand(ScratchIdx).getReg()) - .addImm(CallTarget)); - OutStreamer.EmitInstruction(MCInstBuilder(X86::CALL64r) - .addReg(MI.getOperand(ScratchIdx).getReg())); + unsigned ScratchReg = MI.getOperand(ScratchIdx).getReg(); + if (X86II::isX86_64ExtendedReg(ScratchReg)) + EncodedBytes = 13; + else + EncodedBytes = 12; + OS.EmitInstruction(MCInstBuilder(X86::MOV64ri).addReg(ScratchReg) + .addImm(CallTarget)); + OS.EmitInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg)); } // Emit padding. unsigned NumBytes = opers.getMetaOper(PatchPointOpers::NBytesPos).getImm(); assert(NumBytes >= EncodedBytes && "Patchpoint can't request size less than the length of a call."); - for (unsigned i = EncodedBytes; i < NumBytes; ++i) - OutStreamer.EmitInstruction(MCInstBuilder(X86::NOOP)); + EmitNops(OS, NumBytes - EncodedBytes, Is64Bit); } void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { @@ -898,10 +860,10 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { } case TargetOpcode::STACKMAP: - return LowerSTACKMAP(OutStreamer, SM, *MI); + return LowerSTACKMAP(OutStreamer, SM, *MI, Subtarget->is64Bit()); case TargetOpcode::PATCHPOINT: - return LowerPATCHPOINT(OutStreamer, SM, *MI); + return LowerPATCHPOINT(OutStreamer, SM, *MI, Subtarget->is64Bit()); case X86::MORESTACK_RET: OutStreamer.EmitInstruction(MCInstBuilder(X86::RET)); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86RegisterInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86RegisterInfo.cpp index dbda556b1b5e..bdaf4ef58fc0 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -239,10 +239,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { case CallingConv::HiPE: return CSR_NoRegs_SaveList; - case CallingConv::WebKit_JS: - return CSR_64_SaveList; case CallingConv::AnyReg: - return CSR_MostRegs_64_SaveList; + return CSR_AllRegs_64_SaveList; case CallingConv::Intel_OCL_BI: { bool HasAVX = TM.getSubtarget().hasAVX(); @@ -301,8 +299,8 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { } if (CC == CallingConv::GHC || CC == CallingConv::HiPE) return CSR_NoRegs_RegMask; - if (CC == CallingConv::WebKit_JS || CC == CallingConv::AnyReg) - return CSR_MostRegs_64_RegMask; + if (CC == CallingConv::AnyReg) + return CSR_AllRegs_64_RegMask; if (!Is64Bit) return CSR_32_RegMask; if (CC == CallingConv::Cold) @@ -403,18 +401,15 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { if (!EnableBasePointer) return false; - // When we need stack realignment and there are dynamic allocas, we can't - // reference off of the stack pointer, so we reserve a base pointer. - // - // This is also true if the function contain MS-style inline assembly. We - // do this because if any stack changes occur in the inline assembly, e.g., - // "pusha", then any C local variable or C argument references in the - // inline assembly will be wrong because the SP is not properly tracked. - if ((needsStackRealignment(MF) && MFI->hasVarSizedObjects()) || - MF.hasMSInlineAsm()) - return true; - - return false; + // When we need stack realignment, we can't address the stack from the frame + // pointer. When we have dynamic allocas or stack-adjusting inline asm, we + // can't address variables from the stack pointer. MS inline asm can + // reference locals while also adjusting the stack pointer. When we can't + // use both the SP and the FP, we need a separate base pointer register. + bool CantUseFP = needsStackRealignment(MF); + bool CantUseSP = + MFI->hasVarSizedObjects() || MFI->hasInlineAsmWithSPAdjust(); + return CantUseFP && CantUseSP; } bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86RegisterInfo.td b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86RegisterInfo.td index b8027283cc1f..8d79e13b1db8 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86RegisterInfo.td @@ -463,9 +463,11 @@ def VR128X : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], def VR256X : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], 256, (sequence "YMM%u", 0, 31)>; +def VK1 : RegisterClass<"X86", [i1], 1, (sequence "K%u", 0, 7)>; def VK8 : RegisterClass<"X86", [v8i1], 8, (sequence "K%u", 0, 7)>; def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)>; +def VK1WM : RegisterClass<"X86", [i1], 1, (sub VK1, K0)>; def VK8WM : RegisterClass<"X86", [v8i1], 8, (sub VK8, K0)>; def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86Subtarget.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86Subtarget.cpp index 01353b2acb07..597fccb9fdb2 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86Subtarget.cpp @@ -263,6 +263,15 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { ToggleFeature(X86::FeatureSlowBTMem); } + // Determine if SHLD/SHRD instructions have higher latency then the + // equivalent series of shifts/or instructions. + // FIXME: Add Intel's processors that have SHLD instructions with very + // poor latency. + if (IsAMD) { + IsSHLDSlow = true; + ToggleFeature(X86::FeatureSlowSHLD); + } + // If it's an Intel chip since Nehalem and not an Atom chip, unaligned // memory access is fast. We hard code model numbers here because they // aren't strictly increasing for Intel chips it seems. @@ -519,6 +528,7 @@ void X86Subtarget::initializeEnvironment() { HasPRFCHW = false; HasRDSEED = false; IsBTMemSlow = false; + IsSHLDSlow = false; IsUAMemFast = false; HasVectorUAMem = false; HasCmpxchg16b = false; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86Subtarget.h b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86Subtarget.h index dd8c0811ce51..93d251a67949 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86Subtarget.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86Subtarget.h @@ -142,6 +142,9 @@ protected: /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; + /// IsSHLDSlow - True if SHLD instructions are slow. + bool IsSHLDSlow; + /// IsUAMemFast - True if unaligned memory access is fast. bool IsUAMemFast; @@ -243,7 +246,8 @@ public: /// Is this x86_64 with the ILP32 programming model (x32 ABI)? bool isTarget64BitILP32() const { - return In64BitMode && (TargetTriple.getEnvironment() == Triple::GNUX32); + return In64BitMode && (TargetTriple.getEnvironment() == Triple::GNUX32 || + TargetTriple.getOS() == Triple::NaCl); } /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)? @@ -292,6 +296,7 @@ public: bool hasPRFCHW() const { return HasPRFCHW; } bool hasRDSEED() const { return HasRDSEED; } bool isBTMemSlow() const { return IsBTMemSlow; } + bool isSHLDSlow() const { return IsSHLDSlow; } bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool hasVectorUAMem() const { return HasVectorUAMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b; } @@ -315,10 +320,11 @@ public: bool isTargetSolaris() const { return TargetTriple.getOS() == Triple::Solaris; } - bool isTargetELF() const { - return (TargetTriple.getEnvironment() == Triple::ELF || - TargetTriple.isOSBinFormatELF()); - } + + bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } + bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } + bool isTargetMacho() const { return TargetTriple.isOSBinFormatMachO(); } + bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); } @@ -327,11 +333,6 @@ public: bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; } bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; } bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); } - bool isTargetCOFF() const { - return (TargetTriple.getEnvironment() != Triple::ELF && - TargetTriple.isOSBinFormatCOFF()); - } - bool isTargetEnvMacho() const { return TargetTriple.isEnvironmentMachO(); } bool isOSWindows() const { return TargetTriple.isOSWindows(); } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86TargetMachine.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86TargetMachine.cpp index ddf580f73145..9fa2481a25ee 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -30,21 +30,52 @@ extern "C" void LLVMInitializeX86Target() { void X86_32TargetMachine::anchor() { } +static std::string computeDataLayout(const X86Subtarget &ST) { + // X86 is little endian + std::string Ret = "e"; + + Ret += DataLayout::getManglingComponent(ST.getTargetTriple()); + // X86 and x32 have 32 bit pointers. + if (ST.isTarget64BitILP32() || !ST.is64Bit()) + Ret += "-p:32:32"; + + // Some ABIs align 64 bit integers and doubles to 64 bits, others to 32. + if (ST.is64Bit() || ST.isTargetCygMing() || ST.isTargetWindows() || + ST.isTargetNaCl()) + Ret += "-i64:64"; + else + Ret += "-f64:32:64"; + + // Some ABIs align long double to 128 bits, others to 32. + if (ST.isTargetNaCl()) + ; // No f80 + else if (ST.is64Bit() || ST.isTargetDarwin()) + Ret += "-f80:128"; + else + Ret += "-f80:32"; + + // The registers can hold 8, 16, 32 or, in x86-64, 64 bits. + if (ST.is64Bit()) + Ret += "-n8:16:32:64"; + else + Ret += "-n8:16:32"; + + // The stack is aligned to 32 bits on some ABIs and 128 bits on others. + if (!ST.is64Bit() && (ST.isTargetCygMing() || ST.isTargetWindows())) + Ret += "-S32"; + else + Ret += "-S128"; + + return Ret; +} + X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false), - DL(getSubtargetImpl()->isTargetDarwin() ? - "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-" - "n8:16:32-S128" : - (getSubtargetImpl()->isTargetCygMing() || - getSubtargetImpl()->isTargetWindows()) ? - "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-" - "n8:16:32-S32" : - "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-" - "n8:16:32-S128"), + DL(computeDataLayout(*getSubtargetImpl())), InstrInfo(*this), TLInfo(*this), TSInfo(*this), @@ -61,11 +92,7 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL) : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true), // The x32 ABI dictates the ILP32 programming model for x64. - DL(getSubtargetImpl()->isTarget64BitILP32() ? - "e-p:32:32-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" - "n8:16:32:64-S128" : - "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" - "n8:16:32:64-S128"), + DL(computeDataLayout(*getSubtargetImpl())), InstrInfo(*this), TLInfo(*this), TSInfo(*this), diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index f88a666092bc..cb6af0d22cba 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -555,7 +555,7 @@ unsigned X86TTI::getScalarizationOverhead(Type *Ty, bool Insert, unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const { - // Handle non power of two vectors such as <3 x float> + // Handle non-power-of-two vectors such as <3 x float> if (VectorType *VTy = dyn_cast(Src)) { unsigned NumElem = VTy->getVectorNumElements(); @@ -570,7 +570,7 @@ unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, // Cost = 128 bit store + unpack + 64 bit store. return 3; - // Assume that all other non power-of-two numbers are scalarized. + // Assume that all other non-power-of-two numbers are scalarized. if (!isPowerOf2_32(NumElem)) { unsigned Cost = TargetTransformInfo::getMemoryOpCost(Opcode, VTy->getScalarType(), diff --git a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86VZeroUpper.cpp b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86VZeroUpper.cpp index 0d37a7d0e674..66ae9c2d7f9d 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/X86/X86VZeroUpper.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/X86/X86VZeroUpper.cpp @@ -122,11 +122,11 @@ static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) { } static bool clobbersAllYmmRegs(const MachineOperand &MO) { - for (unsigned reg = X86::YMM0; reg < X86::YMM31; ++reg) { + for (unsigned reg = X86::YMM0; reg <= X86::YMM31; ++reg) { if (!MO.clobbersPhysReg(reg)) return false; } - for (unsigned reg = X86::ZMM0; reg < X86::ZMM31; ++reg) { + for (unsigned reg = X86::ZMM0; reg <= X86::ZMM31; ++reg) { if (!MO.clobbersPhysReg(reg)) return false; } @@ -148,6 +148,25 @@ static bool hasYmmReg(MachineInstr *MI) { return false; } +/// clobbersAnyYmmReg() - Check if any YMM register will be clobbered by this +/// instruction. +static bool clobbersAnyYmmReg(MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isRegMask()) + continue; + for (unsigned reg = X86::YMM0; reg <= X86::YMM31; ++reg) { + if (MO.clobbersPhysReg(reg)) + return true; + } + for (unsigned reg = X86::ZMM0; reg <= X86::ZMM31; ++reg) { + if (MO.clobbersPhysReg(reg)) + return true; + } + } + return false; +} + /// runOnMachineFunction - Loop over all of the basic blocks, inserting /// vzero upper instructions before function calls. bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { @@ -234,14 +253,6 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, DebugLoc dl = I->getDebugLoc(); MachineInstr *MI = I; - // Don't need to check instructions added in prolog. - // In prolog, special function calls may be added for specific targets - // (e.g. on Windows, a prolog helper '_chkstk' is called when the local - // variables exceed 4K bytes on stack.) These helpers won't use/def YMM/XMM - // registers. - if (MI->getFlag(MachineInstr::FrameSetup)) - continue; - bool isControlFlow = MI->isCall() || MI->isReturn(); // Shortcut: don't need to check regular instructions in dirty state. @@ -260,6 +271,14 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, if (!isControlFlow) continue; + // If the call won't clobber any YMM register, skip it as well. It usually + // happens on helper function calls (such as '_chkstk', '_ftol2') where + // standard calling convention is not used (RegMask is not used to mark + // register clobbered and register usage (def/imp-def/use) is well-dfined + // and explicitly specified. + if (MI->isCall() && !clobbersAnyYmmReg(MI)) + continue; + BBHasCall = true; // The VZEROUPPER instruction resets the upper 128 bits of all Intel AVX diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/XCore/CMakeLists.txt index 3fa3b34245d7..72d06ebaff57 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/CMakeLists.txt @@ -26,8 +26,6 @@ add_llvm_target(XCoreCodeGen XCoreSelectionDAGInfo.cpp ) -add_dependencies(LLVMXCoreCodeGen XCoreCommonTableGen intrinsics_gen) - add_subdirectory(Disassembler) add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/Disassembler/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/XCore/Disassembler/CMakeLists.txt index cdc5d993b8bf..1ed10c02c356 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/Disassembler/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/Disassembler/CMakeLists.txt @@ -1,5 +1,3 @@ add_llvm_library(LLVMXCoreDisassembler XCoreDisassembler.cpp ) - -add_dependencies(LLVMXCoreDisassembler XCoreCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/InstPrinter/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/XCore/InstPrinter/CMakeLists.txt index 930e733cd7f1..53cf84da7be9 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/InstPrinter/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/InstPrinter/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMXCoreAsmPrinter XCoreInstPrinter.cpp ) - -add_dependencies(LLVMXCoreAsmPrinter XCoreCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/XCore/LLVMBuild.txt index 59e64ad0855c..0504e8ab8f0c 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/LLVMBuild.txt @@ -29,5 +29,5 @@ has_disassembler = 1 type = Library name = XCoreCodeGen parent = XCore -required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support Target XCoreDesc XCoreInfo +required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target TransformUtils XCoreAsmPrinter XCoreDesc XCoreInfo add_to_library_groups = XCore diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt index 3a3f5b4cc63e..a14cf5ce2ac1 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt @@ -2,8 +2,3 @@ add_llvm_library(LLVMXCoreDesc XCoreMCTargetDesc.cpp XCoreMCAsmInfo.cpp ) - -add_dependencies(LLVMXCoreDesc XCoreCommonTableGen) - -# Hack: we need to include 'main' target directory to grab private headers -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt index 8213f9e42883..6d390d272748 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = XCoreDesc parent = XCore -required_libraries = MC XCoreAsmPrinter XCoreInfo +required_libraries = MC Support XCoreAsmPrinter XCoreInfo add_to_library_groups = XCore diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp index 3d1c4745d6ac..f788c5957bb8 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp @@ -20,8 +20,7 @@ XCoreMCAsmInfo::XCoreMCAsmInfo(StringRef TT) { Data64bitsDirective = 0; ZeroDirective = "\t.space\t"; CommentString = "#"; - - PrivateGlobalPrefix = ".L"; + AscizDirective = ".asciiz"; HiddenVisibilityAttr = MCSA_Invalid; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/external/bsd/llvm/dist/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index 10bb6dfa928a..c4347357d517 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -69,6 +69,12 @@ static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM, if (RM == Reloc::Default) { RM = Reloc::Static; } + if (CM == CodeModel::Default) { + CM = CodeModel::Small; + } + if (CM != CodeModel::Small && CM != CodeModel::Large) + report_fatal_error("Target only supports CodeModel Small or Large"); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/TargetInfo/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Target/XCore/TargetInfo/CMakeLists.txt index 2c34b8730c85..462f2d405304 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/TargetInfo/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/TargetInfo/CMakeLists.txt @@ -1,7 +1,3 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMXCoreInfo XCoreTargetInfo.cpp ) - -add_dependencies(LLVMXCoreInfo XCoreCommonTableGen) diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/TargetInfo/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Target/XCore/TargetInfo/LLVMBuild.txt index 770ba87e4a39..45ff75f14359 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/TargetInfo/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/TargetInfo/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = XCoreInfo parent = XCore -required_libraries = MC Support Target +required_libraries = Support add_to_library_groups = XCore diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCore.td b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCore.td index e9a6d88fd68e..04a1dd5e95be 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCore.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCore.td @@ -41,13 +41,7 @@ def : Proc<"xs1b-generic", []>; // Declare the target which we are implementing //===----------------------------------------------------------------------===// -def XCoreAsmWriter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - bit isMCAsmWriter = 1; -} - def XCore : Target { // Pull in Instruction Info: let InstructionSet = XCoreInstrInfo; - let AssemblyWriters = [XCoreAsmWriter]; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp index c03dfe61504e..eb58598cc262 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -204,6 +204,7 @@ printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O, void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum, raw_ostream &O) { + const DataLayout *DL = TM.getDataLayout(); const MachineOperand &MO = MI->getOperand(opNum); switch (MO.getType()) { case MachineOperand::MO_Register: @@ -218,15 +219,8 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum, case MachineOperand::MO_GlobalAddress: O << *getSymbol(MO.getGlobal()); break; - case MachineOperand::MO_ExternalSymbol: - O << MO.getSymbolName(); - break; case MachineOperand::MO_ConstantPoolIndex: - O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() - << '_' << MO.getIndex(); - break; - case MachineOperand::MO_JumpTableIndex: - O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' << MO.getIndex(); break; case MachineOperand::MO_BlockAddress: diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreFrameLowering.cpp index c34b35c140f9..238a59aaa142 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreFrameLowering.cpp @@ -29,6 +29,9 @@ using namespace llvm; +static const unsigned FramePtr = XCore::R10; +static const int MaxImmU16 = (1<<16) - 1; + // helper functions. FIXME: Eliminate. static inline bool isImmU6(unsigned val) { return val < (1 << 6); @@ -38,34 +41,93 @@ static inline bool isImmU16(unsigned val) { return val < (1 << 16); } -static void loadFromStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DstReg, int Offset, DebugLoc dl, - const TargetInstrInfo &TII) { - assert(Offset%4 == 0 && "Misaligned stack offset"); - Offset/=4; - bool isU6 = isImmU6(Offset); - if (!isU6 && !isImmU16(Offset)) - report_fatal_error("loadFromStack offset too big " + Twine(Offset)); - int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6; - BuildMI(MBB, I, dl, TII.get(Opcode), DstReg) - .addImm(Offset); +static void EmitDefCfaRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc dl, + const TargetInstrInfo &TII, + MachineModuleInfo *MMI, unsigned DRegNum) { + MCSymbol *Label = MMI->getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(Label); + MMI->addFrameInst(MCCFIInstruction::createDefCfaRegister(Label, DRegNum)); } +static void EmitDefCfaOffset(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc dl, + const TargetInstrInfo &TII, + MachineModuleInfo *MMI, int Offset) { + MCSymbol *Label = MMI->getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(Label); + MMI->addFrameInst(MCCFIInstruction::createDefCfaOffset(Label, -Offset)); +} -static void storeToStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned SrcReg, int Offset, DebugLoc dl, - const TargetInstrInfo &TII) { - assert(Offset%4 == 0 && "Misaligned stack offset"); - Offset/=4; - bool isU6 = isImmU6(Offset); - if (!isU6 && !isImmU16(Offset)) - report_fatal_error("storeToStack offset too big " + Twine(Offset)); - int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6; - BuildMI(MBB, I, dl, TII.get(Opcode)) - .addReg(SrcReg) - .addImm(Offset); +static void EmitCfiOffset(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc dl, + const TargetInstrInfo &TII, MachineModuleInfo *MMI, + unsigned DRegNum, int Offset, MCSymbol *Label) { + if (!Label) { + Label = MMI->getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(Label); + } + MMI->addFrameInst(MCCFIInstruction::createOffset(Label, DRegNum, Offset)); +} + +/// The SP register is moved in steps of 'MaxImmU16' towards the bottom of the +/// frame. During these steps, it may be necessary to spill registers. +/// IfNeededExtSP emits the necessary EXTSP instructions to move the SP only +/// as far as to make 'OffsetFromBottom' reachable using an STWSP_lru6. +/// \param OffsetFromTop the spill offset from the top of the frame. +/// \param [in,out] Adjusted the current SP offset from the top of the frame. +static void IfNeededExtSP(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc dl, + const TargetInstrInfo &TII, MachineModuleInfo *MMI, + int OffsetFromTop, int &Adjusted, int FrameSize, + bool emitFrameMoves) { + while (OffsetFromTop > Adjusted) { + assert(Adjusted < FrameSize && "OffsetFromTop is beyond FrameSize"); + int remaining = FrameSize - Adjusted; + int OpImm = (remaining > MaxImmU16) ? MaxImmU16 : remaining; + int Opcode = isImmU6(OpImm) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6; + BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(OpImm); + Adjusted += OpImm; + if (emitFrameMoves) + EmitDefCfaOffset(MBB, MBBI, dl, TII, MMI, Adjusted*4); + } +} + +/// The SP register is moved in steps of 'MaxImmU16' towards the top of the +/// frame. During these steps, it may be necessary to re-load registers. +/// IfNeededLDAWSP emits the necessary LDAWSP instructions to move the SP only +/// as far as to make 'OffsetFromTop' reachable using an LDAWSP_lru6. +/// \param OffsetFromTop the spill offset from the top of the frame. +/// \param [in,out] RemainingAdj the current SP offset from the top of the frame. +static void IfNeededLDAWSP(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc dl, + const TargetInstrInfo &TII, int OffsetFromTop, + int &RemainingAdj) { + while (OffsetFromTop < RemainingAdj - MaxImmU16) { + assert(RemainingAdj && "OffsetFromTop is beyond FrameSize"); + int OpImm = (RemainingAdj > MaxImmU16) ? MaxImmU16 : RemainingAdj; + int Opcode = isImmU6(OpImm) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6; + BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(OpImm); + RemainingAdj -= OpImm; + } +} + +/// Creates an ordered list of registers that are spilled +/// during the emitPrologue/emitEpilogue. +/// Registers are ordered according to their frame offset. +static void GetSpillList(SmallVectorImpl > &SpillList, + MachineFrameInfo *MFI, XCoreFunctionInfo *XFI, + bool fetchLR, bool fetchFP) { + int LRSpillOffset = fetchLR? MFI->getObjectOffset(XFI->getLRSpillSlot()) : 0; + int FPSpillOffset = fetchFP? MFI->getObjectOffset(XFI->getFPSpillSlot()) : 0; + if (fetchLR && fetchFP && LRSpillOffset > FPSpillOffset) { + SpillList.push_back(std::pair(XCore::LR, LRSpillOffset)); + fetchLR = false; + } + if (fetchFP) + SpillList.push_back(std::pair(FramePtr, FPSpillOffset)); + if (fetchLR) + SpillList.push_back(std::pair(XCore::LR, LRSpillOffset)); } @@ -80,7 +142,7 @@ XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti) bool XCoreFrameLowering::hasFP(const MachineFunction &MF) const { return MF.getTarget().Options.DisableFramePointerElim(MF) || - MF.getFrameInfo()->hasVarSizedObjects(); + MF.getFrameInfo()->hasVarSizedObjects(); } void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const { @@ -98,90 +160,69 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const { report_fatal_error("emitPrologue unsupported alignment: " + Twine(MFI->getMaxAlignment())); - bool FP = hasFP(MF); const AttributeSet &PAL = MF.getFunction()->getAttributes(); - if (PAL.hasAttrSomewhere(Attribute::Nest)) - loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII); + BuildMI(MBB, MBBI, dl, TII.get(XCore::LDWSP_ru6), XCore::R11).addImm(0); // Work out frame sizes. - int FrameSize = MFI->getStackSize(); - assert(FrameSize%4 == 0 && "Misaligned frame size"); - FrameSize/=4; - - bool isU6 = isImmU6(FrameSize); - - if (!isU6 && !isImmU16(FrameSize)) { - // FIXME could emit multiple instructions. - report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize)); - } - bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(MF); + // We will adjust the SP in stages towards the final FrameSize. + assert(MFI->getStackSize()%4 == 0 && "Misaligned frame size"); + const int FrameSize = MFI->getStackSize() / 4; + int Adjusted = 0; bool saveLR = XFI->getUsesLR(); - // Do we need to allocate space on the stack? - if (FrameSize) { - bool LRSavedOnEntry = false; - int Opcode; - if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) { - Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6; - MBB.addLiveIn(XCore::LR); - saveLR = false; - LRSavedOnEntry = true; - } else { - Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6; - } - BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize); + bool UseENTSP = saveLR && FrameSize + && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0); + if (UseENTSP) + saveLR = false; + bool FP = hasFP(MF); + bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(MF); - if (emitFrameMoves) { - // Show update of SP. - MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel); - MMI->addFrameInst(MCCFIInstruction::createDefCfaOffset(FrameLabel, - -FrameSize*4)); - if (LRSavedOnEntry) { - unsigned Reg = MRI->getDwarfRegNum(XCore::LR, true); - MMI->addFrameInst(MCCFIInstruction::createOffset(FrameLabel, Reg, 0)); - } - } - } - if (saveLR) { - int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot()); - storeToStack(MBB, MBBI, XCore::LR, LRSpillOffset + FrameSize*4, dl, TII); + if (UseENTSP) { + // Allocate space on the stack at the same time as saving LR. + Adjusted = (FrameSize > MaxImmU16) ? MaxImmU16 : FrameSize; + int Opcode = isImmU6(Adjusted) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6; + BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(Adjusted); MBB.addLiveIn(XCore::LR); - if (emitFrameMoves) { - MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel); - unsigned Reg = MRI->getDwarfRegNum(XCore::LR, true); - MMI->addFrameInst(MCCFIInstruction::createOffset(SaveLRLabel, Reg, - LRSpillOffset)); + EmitDefCfaOffset(MBB, MBBI, dl, TII, MMI, Adjusted*4); + unsigned DRegNum = MRI->getDwarfRegNum(XCore::LR, true); + EmitCfiOffset(MBB, MBBI, dl, TII, MMI, DRegNum, 0, NULL); } } + // If necessary, save LR and FP to the stack, as we EXTSP. + SmallVector,2> SpillList; + GetSpillList(SpillList, MFI, XFI, saveLR, FP); + for (unsigned i = 0, e = SpillList.size(); i != e; ++i) { + unsigned SpillReg = SpillList[i].first; + int SpillOffset = SpillList[i].second; + assert(SpillOffset % 4 == 0 && "Misaligned stack offset"); + assert(SpillOffset <= 0 && "Unexpected positive stack offset"); + int OffsetFromTop = - SpillOffset/4; + IfNeededExtSP(MBB, MBBI, dl, TII, MMI, OffsetFromTop, Adjusted, FrameSize, + emitFrameMoves); + int Offset = Adjusted - OffsetFromTop; + int Opcode = isImmU6(Offset) ? XCore::STWSP_ru6 : XCore::STWSP_lru6; + BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addReg(SpillReg).addImm(Offset); + MBB.addLiveIn(SpillReg); + if (emitFrameMoves) { + unsigned DRegNum = MRI->getDwarfRegNum(SpillReg, true); + EmitCfiOffset(MBB, MBBI, dl, TII, MMI, DRegNum, SpillOffset, NULL); + } + } + + // Complete any remaining Stack adjustment. + IfNeededExtSP(MBB, MBBI, dl, TII, MMI, FrameSize, Adjusted, FrameSize, + emitFrameMoves); + assert(Adjusted==FrameSize && "IfNeededExtSP has not completed adjustment"); + if (FP) { - // Save R10 to the stack. - int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot()); - storeToStack(MBB, MBBI, XCore::R10, FPSpillOffset + FrameSize*4, dl, TII); - // R10 is live-in. It is killed at the spill. - MBB.addLiveIn(XCore::R10); - if (emitFrameMoves) { - MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label); - unsigned Reg = MRI->getDwarfRegNum(XCore::R10, true); - MMI->addFrameInst(MCCFIInstruction::createOffset(SaveR10Label, Reg, - FPSpillOffset)); - } // Set the FP from the SP. - unsigned FramePtr = XCore::R10; BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr).addImm(0); - if (emitFrameMoves) { - // Show FP is now valid. - MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel); - unsigned Reg = MRI->getDwarfRegNum(FramePtr, true); - MMI->addFrameInst(MCCFIInstruction::createDefCfaRegister(FrameLabel, - Reg)); - } + if (emitFrameMoves) + EmitDefCfaRegister(MBB, MBBI, dl, TII, MMI, + MRI->getDwarfRegNum(FramePtr, true)); } if (emitFrameMoves) { @@ -192,9 +233,8 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const { MCSymbol *SpillLabel = SpillLabels[I].first; CalleeSavedInfo &CSI = SpillLabels[I].second; int Offset = MFI->getObjectOffset(CSI.getFrameIdx()); - unsigned Reg = MRI->getDwarfRegNum(CSI.getReg(), true); - MMI->addFrameInst(MCCFIInstruction::createOffset(SpillLabel, Reg, - Offset)); + unsigned DRegNum = MRI->getDwarfRegNum(CSI.getReg(), true); + EmitCfiOffset(MBB, MBBI, dl, TII, MMI, DRegNum, Offset, SpillLabel); } } } @@ -208,66 +248,65 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF, XCoreFunctionInfo *XFI = MF.getInfo(); DebugLoc dl = MBBI->getDebugLoc(); - bool FP = hasFP(MF); - if (FP) { - // Restore the stack pointer. - unsigned FramePtr = XCore::R10; - BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r)) - .addReg(FramePtr); - } - // Work out frame sizes. - int FrameSize = MFI->getStackSize(); - - assert(FrameSize%4 == 0 && "Misaligned frame size"); - - FrameSize/=4; - - bool isU6 = isImmU6(FrameSize); - - if (!isU6 && !isImmU16(FrameSize)) { - // FIXME could emit multiple instructions. - report_fatal_error("emitEpilogue Frame size too big: " + Twine(FrameSize)); - } - - if (FP) { - // Restore R10 - int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot()); - FPSpillOffset += FrameSize*4; - loadFromStack(MBB, MBBI, XCore::R10, FPSpillOffset, dl, TII); - } + // We will adjust the SP in stages towards the final FrameSize. + int RemainingAdj = MFI->getStackSize(); + assert(RemainingAdj%4 == 0 && "Misaligned frame size"); + RemainingAdj /= 4; bool restoreLR = XFI->getUsesLR(); - if (restoreLR && - (FrameSize == 0 || MFI->getObjectOffset(XFI->getLRSpillSlot()) != 0)) { - int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot()); - LRSpillOffset += FrameSize*4; - loadFromStack(MBB, MBBI, XCore::LR, LRSpillOffset, dl, TII); + bool UseRETSP = restoreLR && RemainingAdj + && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0); + if (UseRETSP) restoreLR = false; + bool FP = hasFP(MF); + + if (FP) // Restore the stack pointer. + BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r)).addReg(FramePtr); + + // If necessary, restore LR and FP from the stack, as we EXTSP. + SmallVector,2> SpillList; + GetSpillList(SpillList, MFI, XFI, restoreLR, FP); + unsigned i = SpillList.size(); + while (i--) { + unsigned SpilledReg = SpillList[i].first; + int SpillOffset = SpillList[i].second; + assert(SpillOffset % 4 == 0 && "Misaligned stack offset"); + assert(SpillOffset <= 0 && "Unexpected positive stack offset"); + int OffsetFromTop = - SpillOffset/4; + IfNeededLDAWSP(MBB, MBBI, dl, TII, OffsetFromTop, RemainingAdj); + int Offset = RemainingAdj - OffsetFromTop; + int Opcode = isImmU6(Offset) ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6; + BuildMI(MBB, MBBI, dl, TII.get(Opcode), SpilledReg).addImm(Offset); } - if (FrameSize) { - if (restoreLR) { + if (RemainingAdj) { + // Complete all but one of the remaining Stack adjustments. + IfNeededLDAWSP(MBB, MBBI, dl, TII, 0, RemainingAdj); + if (UseRETSP) { // Fold prologue into return instruction - assert(MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0); assert(MBBI->getOpcode() == XCore::RETSP_u6 - || MBBI->getOpcode() == XCore::RETSP_lu6); - int Opcode = (isU6) ? XCore::RETSP_u6 : XCore::RETSP_lu6; - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize); + || MBBI->getOpcode() == XCore::RETSP_lu6); + int Opcode = isImmU6(RemainingAdj) ? XCore::RETSP_u6 : XCore::RETSP_lu6; + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opcode)) + .addImm(RemainingAdj); for (unsigned i = 3, e = MBBI->getNumOperands(); i < e; ++i) MIB->addOperand(MBBI->getOperand(i)); // copy any variadic operands - MBB.erase(MBBI); + MBB.erase(MBBI); // Erase the previous return instruction. } else { - int Opcode = (isU6) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6; - BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(FrameSize); + int Opcode = isImmU6(RemainingAdj) ? XCore::LDAWSP_ru6 : + XCore::LDAWSP_lru6; + BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(RemainingAdj); + // Don't erase the return instruction. } - } + } // else Don't erase the return instruction. } -bool XCoreFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const { +bool XCoreFrameLowering:: +spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return true; @@ -278,7 +317,8 @@ bool XCoreFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF); DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); + if (MI != MBB.end()) + DL = MI->getDebugLoc(); for (std::vector::const_iterator it = CSI.begin(); it != CSI.end(); ++it) { @@ -298,10 +338,11 @@ bool XCoreFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } -bool XCoreFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const{ +bool XCoreFrameLowering:: +restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const{ MachineFunction *MF = MBB.getParent(); const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); @@ -381,11 +422,10 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -void -XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { +void XCoreFrameLowering:: +processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR); const TargetRegisterClass *RC = &XCore::GRRegsRegClass; XCoreFunctionInfo *XFI = MF.getInfo(); @@ -395,7 +435,7 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, bool isVarArg = MF.getFunction()->isVarArg(); int FrameIdx; if (! isVarArg) { - // A fixed offset of 0 allows us to save / restore LR using entsp / retsp. + // A fixed offset of 0 allows us to save/restore LR using entsp/retsp. FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true); } else { FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), @@ -404,17 +444,32 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, XFI->setUsesLR(FrameIdx); XFI->setLRSpillSlot(FrameIdx); } - if (RegInfo->requiresRegisterScavenging(MF)) { - // Reserve a slot close to SP or frame pointer. - RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } - if (hasFP(MF)) { - // A callee save register is used to hold the FP. - // This needs saving / restoring in the epilogue / prologue. + + // A callee save register is used to hold the FP. + // This needs saving / restoring in the epilogue / prologue. + if (hasFP(MF)) XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); - } +} + +void XCoreFrameLowering:: +processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const { + assert(RS && "requiresRegisterScavenging failed"); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterClass *RC = &XCore::GRRegsRegClass; + XCoreFunctionInfo *XFI = MF.getInfo(); + // Reserve slots close to SP or frame pointer for Scavenging spills. + // When using SP for small frames, we don't need any scratch registers. + // When using SP for large frames, we may need 2 scratch registers. + // When using FP, for large or small frames, we may need 1 scratch register. + if (XFI->isLargeFrame(MF) || hasFP(MF)) + RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + if (XFI->isLargeFrame(MF) && !hasFP(MF)) + RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreFrameLowering.h b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreFrameLowering.h index ebad62f2fa53..6cd90c96e7f0 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreFrameLowering.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreFrameLowering.h @@ -48,6 +48,9 @@ namespace llvm { void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; + //! Stack slot size (4 bytes) static int stackSlotSize() { return 4; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreISelLowering.cpp index 89ad27daec13..668a24863961 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreISelLowering.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreISelLowering.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" @@ -270,20 +271,35 @@ getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV, SDValue XCoreTargetLowering:: LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { - SDLoc DL(Op); const GlobalAddressSDNode *GN = cast(Op); const GlobalValue *GV = GN->getGlobal(); + SDLoc DL(GN); int64_t Offset = GN->getOffset(); - // We can only fold positive offsets that are a multiple of the word size. - int64_t FoldedOffset = std::max(Offset & ~3, (int64_t)0); - SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, FoldedOffset); - GA = getGlobalAddressWrapper(GA, GV, DAG); - // Handle the rest of the offset. - if (Offset != FoldedOffset) { - SDValue Remaining = DAG.getConstant(Offset - FoldedOffset, MVT::i32); - GA = DAG.getNode(ISD::ADD, DL, MVT::i32, GA, Remaining); + Type *ObjType = GV->getType()->getPointerElementType(); + if (getTargetMachine().getCodeModel() == CodeModel::Small || + !ObjType->isSized() || + getDataLayout()->getTypeAllocSize(ObjType) < CodeModelLargeSize) { + // We can only fold positive offsets that are a multiple of the word size. + int64_t FoldedOffset = std::max(Offset & ~3, (int64_t)0); + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, FoldedOffset); + GA = getGlobalAddressWrapper(GA, GV, DAG); + // Handle the rest of the offset. + if (Offset != FoldedOffset) { + SDValue Remaining = DAG.getConstant(Offset - FoldedOffset, MVT::i32); + GA = DAG.getNode(ISD::ADD, DL, MVT::i32, GA, Remaining); + } + return GA; + } else { + // Ideally we would not fold in offset with an index <= 11. + Type *Ty = Type::getInt8PtrTy(*DAG.getContext()); + Constant *GA = ConstantExpr::getBitCast(const_cast(GV), Ty); + Ty = Type::getInt32Ty(*DAG.getContext()); + Constant *Idx = ConstantInt::get(Ty, Offset); + Constant *GAI = ConstantExpr::getGetElementPtr(GA, Idx); + SDValue CP = DAG.getConstantPool(GAI, MVT::i32); + return DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), CP, + MachinePointerInfo(), false, false, false, 0); } - return GA; } SDValue XCoreTargetLowering:: @@ -307,10 +323,10 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const SDValue Res; if (CP->isMachineConstantPoolEntry()) { Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, - CP->getAlignment()); + CP->getAlignment(), CP->getOffset()); } else { Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, - CP->getAlignment()); + CP->getAlignment(), CP->getOffset()); } return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, Res); } @@ -1230,7 +1246,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, unsigned Size = ArgDI->Flags.getByValSize(); unsigned Align = std::max(StackSlotSize, ArgDI->Flags.getByValAlign()); // Create a new object on the stack and copy the pointee into it. - int FI = MFI->CreateStackObject(Size, Align, false, false); + int FI = MFI->CreateStackObject(Size, Align, false); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); InVals.push_back(FIN); MemOps.push_back(DAG.getMemcpy(Chain, dl, FIN, ArgDI->SDV, diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreInstrInfo.td b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreInstrInfo.td index 934a707e785b..5974842383ce 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreInstrInfo.td +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreInstrInfo.td @@ -1286,3 +1286,9 @@ def : Pat<(setgt GRRegs:$lhs, -1), def : Pat<(sra (shl GRRegs:$src, immBpwSubBitp:$imm), immBpwSubBitp:$imm), (SEXT_rus GRRegs:$src, (bpwsub_xform immBpwSubBitp:$imm))>; + +def : Pat<(load (cprelwrapper tconstpool:$b)), + (LDWCP_lru6 tconstpool:$b)>; + +def : Pat<(cprelwrapper tconstpool:$b), + (LDAWCP_lu6 tconstpool:$b)>; diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp index 7ca06729120e..91b297600807 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp @@ -12,3 +12,19 @@ using namespace llvm; void XCoreFunctionInfo::anchor() { } + +bool XCoreFunctionInfo::isLargeFrame(const MachineFunction &MF) const { + if (CachedEStackSize == -1) { + CachedEStackSize = MF.getFrameInfo()->estimateStackSize(MF); + } + // isLargeFrame() is used when deciding if spill slots should be added to + // allow eliminateFrameIndex() to scavenge registers. + // This is only required when there is no FP and offsets are greater than + // ~256KB (~64Kwords). Thus only for code run on the emulator! + // + // The arbitrary value of 0xf000 allows frames of up to ~240KB before spill + // slots are added for the use of eliminateFrameIndex() register scavenging. + // For frames less than 240KB, it is assumed that there will be less than + // 16KB of function arguments. + return CachedEStackSize > 0xf000; +} diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h index 69d5de3e03ad..4fa4ee5d9c07 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h @@ -31,6 +31,7 @@ class XCoreFunctionInfo : public MachineFunctionInfo { int LRSpillSlot; int FPSpillSlot; int VarArgsFrameIndex; + mutable int CachedEStackSize; std::vector > SpillLabels; public: @@ -38,13 +39,15 @@ public: UsesLR(false), LRSpillSlot(0), FPSpillSlot(0), - VarArgsFrameIndex(0) {} + VarArgsFrameIndex(0), + CachedEStackSize(-1) {} explicit XCoreFunctionInfo(MachineFunction &MF) : UsesLR(false), LRSpillSlot(0), FPSpillSlot(0), - VarArgsFrameIndex(0) {} + VarArgsFrameIndex(0), + CachedEStackSize(-1) {} ~XCoreFunctionInfo() {} @@ -60,6 +63,8 @@ public: void setFPSpillSlot(int off) { FPSpillSlot = off; } int getFPSpillSlot() const { return FPSpillSlot; } + bool isLargeFrame(const MachineFunction &MF) const; + std::vector > &getSpillLabels() { return SpillLabels; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp index dbd2f52a5b1a..7c2d842e774e 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -16,16 +16,19 @@ #include "XCoreMachineFunctionInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" @@ -54,6 +57,165 @@ static inline bool isImmU16(unsigned val) { return val < (1 << 16); } +static void loadConstant(MachineBasicBlock::iterator II, + const TargetInstrInfo &TII, + unsigned DstReg, int64_t Value) { + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + if (isMask_32(Value)) { + int N = Log2_32(Value) + 1; + BuildMI(MBB, II, dl, TII.get(XCore::MKMSK_rus), DstReg).addImm(N); + } else if (isImmU16(Value)) { + int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6; + BuildMI(MBB, II, dl, TII.get(Opcode), DstReg).addImm(Value); + } else { + MachineConstantPool *ConstantPool = MBB.getParent()->getConstantPool(); + const Constant *C = ConstantInt::get( + Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Value); + unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); + BuildMI(MBB, II, dl, TII.get(XCore::LDWCP_lru6), DstReg) + .addConstantPoolIndex(Idx); + } +} + +static void InsertFPImmInst(MachineBasicBlock::iterator II, + const TargetInstrInfo &TII, + unsigned Reg, unsigned FrameReg, int Offset ) { + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + switch (MI.getOpcode()) { + case XCore::LDWFI: + BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg) + .addReg(FrameReg) + .addImm(Offset); + break; + case XCore::STWFI: + BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus)) + .addReg(Reg, getKillRegState(MI.getOperand(0).isKill())) + .addReg(FrameReg) + .addImm(Offset); + break; + case XCore::LDAWFI: + BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg) + .addReg(FrameReg) + .addImm(Offset); + break; + default: + llvm_unreachable("Unexpected Opcode"); + } +} + +static void InsertFPConstInst(MachineBasicBlock::iterator II, + const TargetInstrInfo &TII, + unsigned Reg, unsigned FrameReg, + int Offset, RegScavenger *RS ) { + assert(RS && "requiresRegisterScavenging failed"); + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + unsigned ScratchOffset = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0); + RS->setUsed(ScratchOffset); + loadConstant(II, TII, ScratchOffset, Offset); + + switch (MI.getOpcode()) { + case XCore::LDWFI: + BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg) + .addReg(FrameReg) + .addReg(ScratchOffset, RegState::Kill); + break; + case XCore::STWFI: + BuildMI(MBB, II, dl, TII.get(XCore::STW_l3r)) + .addReg(Reg, getKillRegState(MI.getOperand(0).isKill())) + .addReg(FrameReg) + .addReg(ScratchOffset, RegState::Kill); + break; + case XCore::LDAWFI: + BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg) + .addReg(FrameReg) + .addReg(ScratchOffset, RegState::Kill); + break; + default: + llvm_unreachable("Unexpected Opcode"); + } +} + +static void InsertSPImmInst(MachineBasicBlock::iterator II, + const TargetInstrInfo &TII, + unsigned Reg, int Offset) { + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + bool isU6 = isImmU6(Offset); + switch (MI.getOpcode()) { + int NewOpcode; + case XCore::LDWFI: + NewOpcode = (isU6) ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6; + BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg) + .addImm(Offset); + break; + case XCore::STWFI: + NewOpcode = (isU6) ? XCore::STWSP_ru6 : XCore::STWSP_lru6; + BuildMI(MBB, II, dl, TII.get(NewOpcode)) + .addReg(Reg, getKillRegState(MI.getOperand(0).isKill())) + .addImm(Offset); + break; + case XCore::LDAWFI: + NewOpcode = (isU6) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6; + BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg) + .addImm(Offset); + break; + default: + llvm_unreachable("Unexpected Opcode"); + } +} + +static void InsertSPConstInst(MachineBasicBlock::iterator II, + const TargetInstrInfo &TII, + unsigned Reg, int Offset, RegScavenger *RS ) { + assert(RS && "requiresRegisterScavenging failed"); + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + unsigned OpCode = MI.getOpcode(); + + unsigned ScratchBase; + if (OpCode==XCore::STWFI) { + ScratchBase = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0); + RS->setUsed(ScratchBase); + } else + ScratchBase = Reg; + BuildMI(MBB, II, dl, TII.get(XCore::LDAWSP_ru6), ScratchBase).addImm(0); + unsigned ScratchOffset = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0); + RS->setUsed(ScratchOffset); + loadConstant(II, TII, ScratchOffset, Offset); + + switch (OpCode) { + case XCore::LDWFI: + BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg) + .addReg(ScratchBase, RegState::Kill) + .addReg(ScratchOffset, RegState::Kill); + break; + case XCore::STWFI: + BuildMI(MBB, II, dl, TII.get(XCore::STW_l3r)) + .addReg(Reg, getKillRegState(MI.getOperand(0).isKill())) + .addReg(ScratchBase, RegState::Kill) + .addReg(ScratchOffset, RegState::Kill); + break; + case XCore::LDAWFI: + BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg) + .addReg(ScratchBase, RegState::Kill) + .addReg(ScratchOffset, RegState::Kill); + break; + default: + llvm_unreachable("Unexpected Opcode"); + } +} + bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) { return MF.getMMI().hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry(); @@ -85,15 +247,12 @@ BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const { bool XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - // TODO can we estimate stack size? - return TFI->hasFP(MF); + return true; } bool XCoreRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { - return requiresRegisterScavenging(MF); + return true; } bool @@ -107,7 +266,6 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); MachineInstr &MI = *II; - DebugLoc dl = MI.getDebugLoc(); MachineOperand &FrameOp = MI.getOperand(FIOperandNum); int FrameIndex = FrameOp.getIndex(); @@ -143,116 +301,28 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0); assert(Offset%4 == 0 && "Misaligned stack offset"); - DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); - Offset/=4; - bool FP = TFI->hasFP(MF); - unsigned Reg = MI.getOperand(0).getReg(); - bool isKill = MI.getOpcode() == XCore::STWFI && MI.getOperand(0).isKill(); - assert(XCore::GRRegsRegClass.contains(Reg) && "Unexpected register operand"); - - MachineBasicBlock &MBB = *MI.getParent(); - - if (FP) { - bool isUs = isImmUs(Offset); - - if (!isUs) { - if (!RS) - report_fatal_error("eliminateFrameIndex Frame size too big: " + - Twine(Offset)); - unsigned ScratchReg = RS->scavengeRegister(&XCore::GRRegsRegClass, II, - SPAdj); - loadConstant(MBB, II, ScratchReg, Offset, dl); - switch (MI.getOpcode()) { - case XCore::LDWFI: - BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg) - .addReg(FrameReg) - .addReg(ScratchReg, RegState::Kill); - break; - case XCore::STWFI: - BuildMI(MBB, II, dl, TII.get(XCore::STW_l3r)) - .addReg(Reg, getKillRegState(isKill)) - .addReg(FrameReg) - .addReg(ScratchReg, RegState::Kill); - break; - case XCore::LDAWFI: - BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg) - .addReg(FrameReg) - .addReg(ScratchReg, RegState::Kill); - break; - default: - llvm_unreachable("Unexpected Opcode"); - } - } else { - switch (MI.getOpcode()) { - case XCore::LDWFI: - BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg) - .addReg(FrameReg) - .addImm(Offset); - break; - case XCore::STWFI: - BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus)) - .addReg(Reg, getKillRegState(isKill)) - .addReg(FrameReg) - .addImm(Offset); - break; - case XCore::LDAWFI: - BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg) - .addReg(FrameReg) - .addImm(Offset); - break; - default: - llvm_unreachable("Unexpected Opcode"); - } - } - } else { - bool isU6 = isImmU6(Offset); - if (!isU6 && !isImmU16(Offset)) - report_fatal_error("eliminateFrameIndex Frame size too big: " + - Twine(Offset)); - switch (MI.getOpcode()) { - int NewOpcode; - case XCore::LDWFI: - NewOpcode = (isU6) ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6; - BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg) - .addImm(Offset); - break; - case XCore::STWFI: - NewOpcode = (isU6) ? XCore::STWSP_ru6 : XCore::STWSP_lru6; - BuildMI(MBB, II, dl, TII.get(NewOpcode)) - .addReg(Reg, getKillRegState(isKill)) - .addImm(Offset); - break; - case XCore::LDAWFI: - NewOpcode = (isU6) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6; - BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg) - .addImm(Offset); - break; - default: - llvm_unreachable("Unexpected Opcode"); - } + if (TFI->hasFP(MF)) { + if (isImmUs(Offset)) + InsertFPImmInst(II, TII, Reg, FrameReg, Offset); + else + InsertFPConstInst(II, TII, Reg, FrameReg, Offset, RS); + } else { + if (isImmU16(Offset)) + InsertSPImmInst(II, TII, Reg, Offset); + else + InsertSPConstInst(II, TII, Reg, Offset, RS); } // Erase old instruction. + MachineBasicBlock &MBB = *MI.getParent(); MBB.erase(II); } -void XCoreRegisterInfo:: -loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DstReg, int64_t Value, DebugLoc dl) const { - // TODO use mkmsk if possible. - if (!isImmU16(Value)) { - // TODO use constant pool. - report_fatal_error("loadConstant value too big " + Twine(Value)); - } - int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6; - const TargetInstrInfo &TII = *MBB.getParent()->getTarget().getInstrInfo(); - BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value); -} unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreRegisterInfo.h b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreRegisterInfo.h index 2370c6280f2e..36ba7b46e5ed 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreRegisterInfo.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreRegisterInfo.h @@ -24,19 +24,6 @@ namespace llvm { class TargetInstrInfo; struct XCoreRegisterInfo : public XCoreGenRegisterInfo { -private: - void loadConstant(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DstReg, int64_t Value, DebugLoc dl) const; - - void storeToStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned SrcReg, int Offset, DebugLoc dl) const; - - void loadFromStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DstReg, int Offset, DebugLoc dl) const; - public: XCoreRegisterInfo(); diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreTargetMachine.cpp index 9ae0b860dff9..21027270911d 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreTargetMachine.cpp @@ -27,8 +27,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS), - DL("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-" - "i16:16:32-i32:32:32-i64:32:32-n32"), + DL("e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32"), InstrInfo(), FrameLowering(Subtarget), TLInfo(*this), diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp index 88e3bfd7b81c..61eb2b51f3eb 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp @@ -9,27 +9,52 @@ #include "XCoreTargetObjectFile.h" #include "XCoreSubtarget.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" + using namespace llvm; void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ TargetLoweringObjectFileELF::Initialize(Ctx, TM); - DataSection = - Ctx.getELFSection(".dp.data", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_WRITE | - ELF::XCORE_SHF_DP_SECTION, - SectionKind::getDataRel()); BSSSection = Ctx.getELFSection(".dp.bss", ELF::SHT_NOBITS, ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::XCORE_SHF_DP_SECTION, SectionKind::getBSS()); - + BSSSectionLarge = + Ctx.getELFSection(".dp.bss.large", ELF::SHT_NOBITS, + ELF::SHF_ALLOC | ELF::SHF_WRITE | + ELF::XCORE_SHF_DP_SECTION, + SectionKind::getBSS()); + DataSection = + Ctx.getELFSection(".dp.data", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_WRITE | + ELF::XCORE_SHF_DP_SECTION, + SectionKind::getDataRel()); + DataSectionLarge = + Ctx.getELFSection(".dp.data.large", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_WRITE | + ELF::XCORE_SHF_DP_SECTION, + SectionKind::getDataRel()); + // This is the wrong place to decide if const data should be placed + // in the .cp or .dp section. + // Ideally we should set up DataRelROSection to use the '.dp.'' and use this + // for const data, unless the front end explicitly states a '.cp.'' section. + ReadOnlySection = + Ctx.getELFSection(".cp.rodata", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | + ELF::XCORE_SHF_CP_SECTION, + SectionKind::getReadOnlyWithRel()); + ReadOnlySectionLarge = + Ctx.getELFSection(".cp.rodata.large", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | + ELF::XCORE_SHF_CP_SECTION, + SectionKind::getReadOnlyWithRel()); MergeableConst4Section = Ctx.getELFSection(".cp.rodata.cst4", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_MERGE | @@ -45,16 +70,100 @@ void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ ELF::SHF_ALLOC | ELF::SHF_MERGE | ELF::XCORE_SHF_CP_SECTION, SectionKind::getMergeableConst16()); - - // TLS globals are lowered in the backend to arrays indexed by the current - // thread id. After lowering they require no special handling by the linker - // and can be placed in the standard data / bss sections. - TLSDataSection = DataSection; - TLSBSSSection = BSSSection; - - ReadOnlySection = - Ctx.getELFSection(".cp.rodata", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | + CStringSection = + Ctx.getELFSection(".cp.rodata.string", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_MERGE | ELF::SHF_STRINGS | ELF::XCORE_SHF_CP_SECTION, SectionKind::getReadOnlyWithRel()); + // TextSection - see MObjectFileInfo.cpp + // StaticCtorSection - see MObjectFileInfo.cpp + // StaticDtorSection - see MObjectFileInfo.cpp + } + +static SectionKind getXCoreKindForNamedSection(StringRef Name, SectionKind K) { + if (Name.startswith(".cp.")) + return SectionKind::getReadOnly(); + return K; +} + +static unsigned getXCoreSectionType(SectionKind K) { + if (K.isBSS()) + return ELF::SHT_NOBITS; + return ELF::SHT_PROGBITS; +} + +static unsigned getXCoreSectionFlags(SectionKind K) { + unsigned Flags = 0; + + if (!K.isMetadata()) + Flags |= ELF::SHF_ALLOC; + + if (K.isText()) + Flags |= ELF::SHF_EXECINSTR; + else if (K.isReadOnly()) + Flags |= ELF::XCORE_SHF_CP_SECTION; + else + Flags |= ELF::XCORE_SHF_DP_SECTION; + + if (K.isWriteable()) + Flags |= ELF::SHF_WRITE; + + if (K.isMergeableCString() || K.isMergeableConst4() || + K.isMergeableConst8() || K.isMergeableConst16()) + Flags |= ELF::SHF_MERGE; + + if (K.isMergeableCString()) + Flags |= ELF::SHF_STRINGS; + + return Flags; +} + +const MCSection *XCoreTargetObjectFile:: +getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + StringRef SectionName = GV->getSection(); + // Infer section flags from the section name if we can. + Kind = getXCoreKindForNamedSection(SectionName, Kind); + return getContext().getELFSection(SectionName, getXCoreSectionType(Kind), + getXCoreSectionFlags(Kind), Kind); +} + +const MCSection *XCoreTargetObjectFile:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, + const TargetMachine &TM) const{ + if (Kind.isText()) return TextSection; + if (Kind.isMergeable1ByteCString()) return CStringSection; + if (Kind.isMergeableConst4()) return MergeableConst4Section; + if (Kind.isMergeableConst8()) return MergeableConst8Section; + if (Kind.isMergeableConst16()) return MergeableConst16Section; + + Type *ObjType = GV->getType()->getPointerElementType(); + if (TM.getCodeModel() == CodeModel::Small || + !ObjType->isSized() || + TM.getDataLayout()->getTypeAllocSize(ObjType) < CodeModelLargeSize) { + if (Kind.isReadOnly()) return ReadOnlySection; + if (Kind.isBSS()) return BSSSection; + if (Kind.isDataRel()) return DataSection; + if (Kind.isReadOnlyWithRel()) return ReadOnlySection; + } else { + if (Kind.isReadOnly()) return ReadOnlySectionLarge; + if (Kind.isBSS()) return BSSSectionLarge; + if (Kind.isDataRel()) return DataSectionLarge; + if (Kind.isReadOnlyWithRel()) return ReadOnlySectionLarge; + } + + assert((Kind.isThreadLocal() || Kind.isCommon()) && "Unknown section kind"); + report_fatal_error("Target does not support TLS or Common sections"); +} + +const MCSection *XCoreTargetObjectFile:: +getSectionForConstant(SectionKind Kind) const { + if (Kind.isMergeableConst4()) return MergeableConst4Section; + if (Kind.isMergeableConst8()) return MergeableConst8Section; + if (Kind.isMergeableConst16()) return MergeableConst16Section; + assert((Kind.isReadOnly() || Kind.isReadOnlyWithRel()) && + "Unknown section kind"); + // We assume the size of the object is never greater than CodeModelLargeSize. + // To handle CodeModelLargeSize changes to AsmPrinter would be required. + return ReadOnlySection; } diff --git a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreTargetObjectFile.h b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreTargetObjectFile.h index 27875e783b33..bf9798d5085d 100644 --- a/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreTargetObjectFile.h +++ b/external/bsd/llvm/dist/llvm/lib/Target/XCore/XCoreTargetObjectFile.h @@ -14,11 +14,24 @@ namespace llvm { +static const unsigned CodeModelLargeSize = 256; + class XCoreTargetObjectFile : public TargetLoweringObjectFileELF { + const MCSection *BSSSectionLarge; + const MCSection *DataSectionLarge; + const MCSection *ReadOnlySectionLarge; public: void Initialize(MCContext &Ctx, const TargetMachine &TM); - // TODO: Classify globals as xcore wishes. + virtual const MCSection * + getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const; + + virtual const MCSection * + SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const; + + virtual const MCSection *getSectionForConstant(SectionKind Kind) const; }; } // end namespace llvm diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Hello/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Transforms/Hello/CMakeLists.txt index 917b745628de..e724dbc8be89 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Hello/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Hello/CMakeLists.txt @@ -1,3 +1,11 @@ +# If we don't need RTTI or EH, there's no reason to export anything +# from the hello plugin. +if( NOT LLVM_REQUIRES_RTTI ) + if( NOT LLVM_REQUIRES_EH ) + set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/Hello.exports) + endif() +endif() + add_llvm_loadable_module( LLVMHello Hello.cpp ) diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index df08091ac5bf..49a67211db6c 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -88,7 +88,7 @@ char ArgPromotion::ID = 0; INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion", "Promote 'by reference' arguments to scalars", false, false) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_END(ArgPromotion, "argpromotion", "Promote 'by reference' arguments to scalars", false, false) @@ -621,8 +621,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Get the callgraph information that we need to update to reflect our // changes. - CallGraph &CG = getAnalysis(); - + CallGraph &CG = getAnalysis().getCallGraph(); + // Get a new callgraph node for NF. CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 60e5f0676393..6e41f262fa6f 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -137,7 +137,7 @@ char FunctionAttrs::ID = 0; INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs", "Deduce function attributes", false, false) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_END(FunctionAttrs, "functionattrs", "Deduce function attributes", false, false) diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 7e918979ecae..611d81063c41 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -37,6 +37,7 @@ #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -267,9 +268,17 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, DataLayout *TD, TargetLibraryInfo *TLI) { bool Changed = false; - SmallVector WorkList(V->use_begin(), V->use_end()); + // Note that we need to use a weak value handle for the worklist items. When + // we delete a constant array, we may also be holding pointer to one of its + // elements (or an element of one of its elements if we're dealing with an + // array of arrays) in the worklist. + SmallVector WorkList(V->use_begin(), V->use_end()); while (!WorkList.empty()) { - User *U = WorkList.pop_back_val(); + Value *UV = WorkList.pop_back_val(); + if (!UV) + continue; + + User *U = cast(UV); if (LoadInst *LI = dyn_cast(U)) { if (Init) { @@ -288,8 +297,9 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, if (Init) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); Changed |= CleanupConstantGlobalUsers(CE, SubInit, TD, TLI); - } else if (CE->getOpcode() == Instruction::BitCast && - CE->getType()->isPointerTy()) { + } else if ((CE->getOpcode() == Instruction::BitCast && + CE->getType()->isPointerTy()) || + CE->getOpcode() == Instruction::AddrSpaceCast) { // Pointer cast, delete any stores and memsets to the global. Changed |= CleanupConstantGlobalUsers(CE, 0, TD, TLI); } @@ -1737,7 +1747,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // and this function is main (which we know is not recursive), we replace // the global with a local alloca in this function. // - // NOTE: It doesn't make sense to promote non single-value types since we + // NOTE: It doesn't make sense to promote non-single-value types since we // are just replacing static memory to stack memory. // // If the global is in different address space, don't bring it to stack. @@ -2571,7 +2581,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // We don't insert an entry into Values, as it doesn't have a // meaningful return value. if (!II->use_empty()) { - DEBUG(dbgs() << "Found unused invariant_start. Cant evaluate.\n"); + DEBUG(dbgs() << "Found unused invariant_start. Can't evaluate.\n"); return false; } ConstantInt *Size = cast(II->getArgOperand(0)); @@ -2847,12 +2857,14 @@ static void setUsedInitializer(GlobalVariable &V, return; } - SmallVector UsedArray; - PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext()); + // Type of pointer to the array of pointers. + PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0); + SmallVector UsedArray; for (SmallPtrSet::iterator I = Init.begin(), E = Init.end(); I != E; ++I) { - Constant *Cast = llvm::ConstantExpr::getBitCast(*I, Int8PtrTy); + Constant *Cast + = ConstantExpr::getPointerBitCastOrAddrSpaceCast(*I, Int8PtrTy); UsedArray.push_back(Cast); } // Sort to get deterministic order. diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp index 4ac1dfc09682..8b816e556e34 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -210,7 +210,7 @@ bool IPCP::PropagateConstantReturn(Function &F) { // Different or no known return value? Don't propagate this return // value. RetVals[i] = 0; - // All values non constant? Stop looking. + // All values non-constant? Stop looking. if (++NumNonConstant == RetVals.size()) return false; } diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/IPO.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/IPO.cpp index 5d563d8bbf51..b4d31d8d6fc2 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/IPO.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/IPO.cpp @@ -44,6 +44,7 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeStripDebugDeclarePass(Registry); initializeStripDeadDebugInfoPass(Registry); initializeStripNonDebugSymbolsPass(Registry); + initializeBarrierNoopPass(Registry); } void LLVMInitializeIPO(LLVMPassRegistryRef R) { diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/InlineAlways.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/InlineAlways.cpp index 437597ec037b..be1fe9a117ea 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/InlineAlways.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/InlineAlways.cpp @@ -63,7 +63,7 @@ public: char AlwaysInliner::ID = 0; INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline", "Inliner for always_inline functions", false, false) -INITIALIZE_PASS_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis) INITIALIZE_PASS_END(AlwaysInliner, "always-inline", "Inliner for always_inline functions", false, false) diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/InlineSimple.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/InlineSimple.cpp index 57379a334f2d..29292288e1b0 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/InlineSimple.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/InlineSimple.cpp @@ -61,7 +61,7 @@ public: char SimpleInliner::ID = 0; INITIALIZE_PASS_BEGIN(SimpleInliner, "inline", "Function Integration/Inlining", false, false) -INITIALIZE_PASS_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis) INITIALIZE_PASS_END(SimpleInliner, "inline", "Function Integration/Inlining", false, false) diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/Inliner.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/Inliner.cpp index d75d6ca92b3c..dc710d14157b 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/Inliner.cpp @@ -395,7 +395,7 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, } bool Inliner::runOnSCC(CallGraphSCC &SCC) { - CallGraph &CG = getAnalysis(); + CallGraph &CG = getAnalysis().getCallGraph(); const DataLayout *TD = getAnalysisIfAvailable(); const TargetLibraryInfo *TLI = getAnalysisIfAvailable(); diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/Internalize.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/Internalize.cpp index 64e2cedfb13a..dae69ce09156 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/Internalize.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/Internalize.cpp @@ -63,7 +63,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addPreserved(); + AU.addPreserved(); } }; } // end anonymous namespace @@ -115,6 +115,10 @@ static bool shouldInternalize(const GlobalValue &GV, if (GV.hasAvailableExternallyLinkage()) return false; + // Assume that dllexported symbols are referenced elsewhere + if (GV.hasDLLExportLinkage()) + return false; + // Already has internal linkage if (GV.hasLocalLinkage()) return false; @@ -127,7 +131,8 @@ static bool shouldInternalize(const GlobalValue &GV, } bool InternalizePass::runOnModule(Module &M) { - CallGraph *CG = getAnalysisIfAvailable(); + CallGraphWrapperPass *CGPass = getAnalysisIfAvailable(); + CallGraph *CG = CGPass ? &CGPass->getCallGraph() : 0; CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0; bool Changed = false; diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/LLVMBuild.txt index 124cbb6f0549..77e0b22086fd 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/LLVMBuild.txt @@ -20,4 +20,4 @@ type = Library name = IPO parent = Transforms library_name = ipo -required_libraries = Analysis Core IPA InstCombine Scalar Vectorize Support Target TransformUtils ObjCARC +required_libraries = Analysis Core IPA InstCombine Scalar Support Target TransformUtils Vectorize diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 24c5018d5421..cd46c7998298 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -32,11 +32,6 @@ static cl::opt RunLoopVectorization("vectorize-loops", cl::Hidden, cl::desc("Run the Loop vectorization passes")); -static cl::opt -LateVectorization("late-vectorize", cl::init(true), cl::Hidden, - cl::desc("Run the vectorization pasess late in the pass " - "pipeline (after the inliner)")); - static cl::opt RunSLPVectorization("vectorize-slp", cl::Hidden, cl::desc("Run the SLP vectorization passes")); @@ -68,7 +63,6 @@ PassManagerBuilder::PassManagerBuilder() { BBVectorize = RunBBVectorization; SLPVectorize = RunSLPVectorization; LoopVectorize = RunLoopVectorization; - LateVectorize = LateVectorization; RerollLoops = RunLoopRerolling; } @@ -200,9 +194,6 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. MPM.add(createLoopDeletionPass()); // Delete dead loops - if (!LateVectorize && LoopVectorize) - MPM.add(createLoopVectorizePass(DisableUnrollLoops)); - if (!DisableUnrollLoops) MPM.add(createLoopUnrollPass()); // Unroll small loops addExtensionsToPM(EP_LoopOptimizerEnd, MPM); @@ -243,21 +234,18 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createInstructionCombiningPass()); // Clean up after everything. - // As an experimental mode, run any vectorization passes in a separate - // pipeline from the CGSCC pass manager that runs iteratively with the - // inliner. - if (LateVectorize && LoopVectorize) { - // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC - // pass manager that we are specifically trying to avoid. To prevent this - // we must insert a no-op module pass to reset the pass manager. - MPM.add(createBarrierNoopPass()); - - // Add the various vectorization passes and relevant cleanup passes for - // them since we are no longer in the middle of the main scalar pipeline. - MPM.add(createLoopVectorizePass(DisableUnrollLoops)); - MPM.add(createInstructionCombiningPass()); - MPM.add(createCFGSimplificationPass()); - } + // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC + // pass manager that we are specifically trying to avoid. To prevent this + // we must insert a no-op module pass to reset the pass manager. + MPM.add(createBarrierNoopPass()); + MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize)); + // FIXME: Because of #pragma vectorize enable, the passes below are always + // inserted in the pipeline, even when the vectorizer doesn't run (ex. when + // on -O1 and no #pragma is found). Would be good to have these two passes + // as function calls, so that we can only pass them when the vectorizer + // changed the code. + MPM.add(createInstructionCombiningPass()); + MPM.add(createCFGSimplificationPass()); if (!DisableUnitAtATime) { // FIXME: We shouldn't bother with this anymore. diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/PruneEH.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/PruneEH.cpp index b16091312148..cba6ae052b82 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/PruneEH.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/PruneEH.cpp @@ -51,7 +51,7 @@ namespace { char PruneEH::ID = 0; INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh", "Remove unused exception handling info", false, false) -INITIALIZE_PASS_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_END(PruneEH, "prune-eh", "Remove unused exception handling info", false, false) @@ -60,7 +60,7 @@ Pass *llvm::createPruneEHPass() { return new PruneEH(); } bool PruneEH::runOnSCC(CallGraphSCC &SCC) { SmallPtrSet SCCNodes; - CallGraph &CG = getAnalysis(); + CallGraph &CG = getAnalysis().getCallGraph(); bool MadeChange = false; // Fill SCCNodes with the elements of the SCC. Used for quickly @@ -234,7 +234,7 @@ bool PruneEH::SimplifyFunction(Function *F) { /// exist in the BB. void PruneEH::DeleteBasicBlock(BasicBlock *BB) { assert(pred_begin(BB) == pred_end(BB) && "BB is not dead!"); - CallGraph &CG = getAnalysis(); + CallGraph &CG = getAnalysis().getCallGraph(); CallGraphNode *CGN = CG[BB->getParent()]; for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) { diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/StripSymbols.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/StripSymbols.cpp index 1792aa866d2e..b4c8b3726e16 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/StripSymbols.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/IPO/StripSymbols.cpp @@ -147,7 +147,7 @@ static void RemoveDeadConstant(Constant *C) { if (OnlyUsedBy(C->getOperand(i), C)) Operands.insert(cast(C->getOperand(i))); if (GlobalVariable *GV = dyn_cast(C)) { - if (!GV->hasLocalLinkage()) return; // Don't delete non static globals. + if (!GV->hasLocalLinkage()) return; // Don't delete non-static globals. GV->eraseFromParent(); } else if (!isa(C)) @@ -231,57 +231,6 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) { return true; } -// StripDebugInfo - Strip debug info in the module if it exists. -// To do this, we remove llvm.dbg.func.start, llvm.dbg.stoppoint, and -// llvm.dbg.region.end calls, and any globals they point to if now dead. -static bool StripDebugInfo(Module &M) { - - bool Changed = false; - - // Remove all of the calls to the debugger intrinsics, and remove them from - // the module. - if (Function *Declare = M.getFunction("llvm.dbg.declare")) { - while (!Declare->use_empty()) { - CallInst *CI = cast(Declare->use_back()); - CI->eraseFromParent(); - } - Declare->eraseFromParent(); - Changed = true; - } - - if (Function *DbgVal = M.getFunction("llvm.dbg.value")) { - while (!DbgVal->use_empty()) { - CallInst *CI = cast(DbgVal->use_back()); - CI->eraseFromParent(); - } - DbgVal->eraseFromParent(); - Changed = true; - } - - for (Module::named_metadata_iterator NMI = M.named_metadata_begin(), - NME = M.named_metadata_end(); NMI != NME;) { - NamedMDNode *NMD = NMI; - ++NMI; - if (NMD->getName().startswith("llvm.dbg.")) { - NMD->eraseFromParent(); - Changed = true; - } - } - - for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) - for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; - ++FI) - for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; - ++BI) { - if (!BI->getDebugLoc().isUnknown()) { - Changed = true; - BI->setDebugLoc(DebugLoc()); - } - } - - return Changed; -} - bool StripSymbols::runOnModule(Module &M) { bool Changed = false; Changed |= StripDebugInfo(M); diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 72377dc0adca..44e60b775cc7 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1189,36 +1189,92 @@ static Value *LookThroughFPExtensions(Value *V) { Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { if (Instruction *I = commonCastTransforms(CI)) return I; - - // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are - // smaller than the destination type, we can eliminate the truncate by doing - // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well - // as many builtins (sqrt, etc). + // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to + // simpilify this expression to avoid one or more of the trunc/extend + // operations if we can do so without changing the numerical results. + // + // The exact manner in which the widths of the operands interact to limit + // what we can and cannot do safely varies from operation to operation, and + // is explained below in the various case statements. BinaryOperator *OpI = dyn_cast(CI.getOperand(0)); if (OpI && OpI->hasOneUse()) { + Value *LHSOrig = LookThroughFPExtensions(OpI->getOperand(0)); + Value *RHSOrig = LookThroughFPExtensions(OpI->getOperand(1)); + unsigned OpWidth = OpI->getType()->getFPMantissaWidth(); + unsigned LHSWidth = LHSOrig->getType()->getFPMantissaWidth(); + unsigned RHSWidth = RHSOrig->getType()->getFPMantissaWidth(); + unsigned SrcWidth = std::max(LHSWidth, RHSWidth); + unsigned DstWidth = CI.getType()->getFPMantissaWidth(); switch (OpI->getOpcode()) { - default: break; - case Instruction::FAdd: - case Instruction::FSub: - case Instruction::FMul: - case Instruction::FDiv: - case Instruction::FRem: - Type *SrcTy = OpI->getType(); - Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0)); - Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1)); - if (LHSTrunc->getType() != SrcTy && - RHSTrunc->getType() != SrcTy) { - unsigned DstSize = CI.getType()->getScalarSizeInBits(); - // If the source types were both smaller than the destination type of - // the cast, do this xform. - if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize && - RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) { - LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType()); - RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType()); - return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc); + default: break; + case Instruction::FAdd: + case Instruction::FSub: + // For addition and subtraction, the infinitely precise result can + // essentially be arbitrarily wide; proving that double rounding + // will not occur because the result of OpI is exact (as we will for + // FMul, for example) is hopeless. However, we *can* nonetheless + // frequently know that double rounding cannot occur (or that it is + // innoculous) by taking advantage of the specific structure of + // infinitely-precise results that admit double rounding. + // + // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficent + // to represent both sources, we can guarantee that the double + // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis, + // "A Rigorous Framework for Fully Supporting the IEEE Standard ..." + // for proof of this fact). + // + // Note: Figueroa does not consider the case where DstFormat != + // SrcFormat. It's possible (likely even!) that this analysis + // could be tightened for those cases, but they are rare (the main + // case of interest here is (float)((double)float + float)). + if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) { + if (LHSOrig->getType() != CI.getType()) + LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType()); + if (RHSOrig->getType() != CI.getType()) + RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType()); + return BinaryOperator::Create(OpI->getOpcode(), LHSOrig, RHSOrig); } - } - break; + break; + case Instruction::FMul: + // For multiplication, the infinitely precise result has at most + // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient + // that such a value can be exactly represented, then no double + // rounding can possibly occur; we can safely perform the operation + // in the destination format if it can represent both sources. + if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) { + if (LHSOrig->getType() != CI.getType()) + LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType()); + if (RHSOrig->getType() != CI.getType()) + RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType()); + return BinaryOperator::CreateFMul(LHSOrig, RHSOrig); + } + break; + case Instruction::FDiv: + // For division, we use again use the bound from Figueroa's + // dissertation. I am entirely certain that this bound can be + // tightened in the unbalanced operand case by an analysis based on + // the diophantine rational approximation bound, but the well-known + // condition used here is a good conservative first pass. + // TODO: Tighten bound via rigorous analysis of the unbalanced case. + if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) { + if (LHSOrig->getType() != CI.getType()) + LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType()); + if (RHSOrig->getType() != CI.getType()) + RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType()); + return BinaryOperator::CreateFDiv(LHSOrig, RHSOrig); + } + break; + case Instruction::FRem: + // Remainder is straightforward. Remainder is always exact, so the + // type of OpI doesn't enter into things at all. We simply evaluate + // in whichever source type is larger, then convert to the + // destination type. + if (LHSWidth < SrcWidth) + LHSOrig = Builder->CreateFPExt(LHSOrig, RHSOrig->getType()); + else if (RHSWidth <= SrcWidth) + RHSOrig = Builder->CreateFPExt(RHSOrig, LHSOrig->getType()); + Value *ExactResult = Builder->CreateFRem(LHSOrig, RHSOrig); + return CastInst::CreateFPCast(ExactResult, CI.getType()); } // (fptrunc (fneg x)) -> (fneg (fptrunc x)) diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 9bb65efbd619..b6cdf640b009 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1078,17 +1078,17 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } break; - case Instruction::Xor: // (icmp pred (xor X, XorCST), CI) - if (ConstantInt *XorCST = dyn_cast(LHSI->getOperand(1))) { + case Instruction::Xor: // (icmp pred (xor X, XorCst), CI) + if (ConstantInt *XorCst = dyn_cast(LHSI->getOperand(1))) { // If this is a comparison that tests the signbit (X < 0) or (x > -1), // fold the xor. if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) || (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) { Value *CompareVal = LHSI->getOperand(0); - // If the sign bit of the XorCST is not set, there is no change to + // If the sign bit of the XorCst is not set, there is no change to // the operation, just stop using the Xor. - if (!XorCST->isNegative()) { + if (!XorCst->isNegative()) { ICI.setOperand(0, CompareVal); Worklist.Add(LHSI); return &ICI; @@ -1110,8 +1110,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (LHSI->hasOneUse()) { // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit)) - if (!ICI.isEquality() && XorCST->getValue().isSignBit()) { - const APInt &SignBit = XorCST->getValue(); + if (!ICI.isEquality() && XorCst->getValue().isSignBit()) { + const APInt &SignBit = XorCst->getValue(); ICmpInst::Predicate Pred = ICI.isSigned() ? ICI.getUnsignedPredicate() : ICI.getSignedPredicate(); @@ -1120,8 +1120,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) - if (!ICI.isEquality() && XorCST->isMaxValue(true)) { - const APInt &NotSignBit = XorCST->getValue(); + if (!ICI.isEquality() && XorCst->isMaxValue(true)) { + const APInt &NotSignBit = XorCst->getValue(); ICmpInst::Predicate Pred = ICI.isSigned() ? ICI.getUnsignedPredicate() : ICI.getSignedPredicate(); @@ -1134,20 +1134,20 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // (icmp ugt (xor X, C), ~C) -> (icmp ult X, C) // iff -C is a power of 2 if (ICI.getPredicate() == ICmpInst::ICMP_UGT && - XorCST->getValue() == ~RHSV && (RHSV + 1).isPowerOf2()) - return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), XorCST); + XorCst->getValue() == ~RHSV && (RHSV + 1).isPowerOf2()) + return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), XorCst); // (icmp ult (xor X, C), -C) -> (icmp uge X, C) // iff -C is a power of 2 if (ICI.getPredicate() == ICmpInst::ICMP_ULT && - XorCST->getValue() == -RHSV && RHSV.isPowerOf2()) - return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), XorCST); + XorCst->getValue() == -RHSV && RHSV.isPowerOf2()) + return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), XorCst); } break; - case Instruction::And: // (icmp pred (and X, AndCST), RHS) + case Instruction::And: // (icmp pred (and X, AndCst), RHS) if (LHSI->hasOneUse() && isa(LHSI->getOperand(1)) && LHSI->getOperand(0)->hasOneUse()) { - ConstantInt *AndCST = cast(LHSI->getOperand(1)); + ConstantInt *AndCst = cast(LHSI->getOperand(1)); // If the LHS is an AND of a truncating cast, we can widen the // and/compare to be the input width without changing the value @@ -1158,10 +1158,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // Extending a relational comparison when we're checking the sign // bit would not work. if (ICI.isEquality() || - (!AndCST->isNegative() && RHSV.isNonNegative())) { + (!AndCst->isNegative() && RHSV.isNonNegative())) { Value *NewAnd = Builder->CreateAnd(Cast->getOperand(0), - ConstantExpr::getZExt(AndCST, Cast->getSrcTy())); + ConstantExpr::getZExt(AndCst, Cast->getSrcTy())); NewAnd->takeName(LHSI); return new ICmpInst(ICI.getPredicate(), NewAnd, ConstantExpr::getZExt(RHS, Cast->getSrcTy())); @@ -1177,7 +1177,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) { Value *NewAnd = Builder->CreateAnd(Cast->getOperand(0), - ConstantExpr::getTrunc(AndCST, Ty)); + ConstantExpr::getTrunc(AndCst, Ty)); NewAnd->takeName(LHSI); return new ICmpInst(ICI.getPredicate(), NewAnd, ConstantExpr::getTrunc(RHS, Ty)); @@ -1194,45 +1194,54 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, ConstantInt *ShAmt; ShAmt = Shift ? dyn_cast(Shift->getOperand(1)) : 0; - Type *Ty = Shift ? Shift->getType() : 0; // Type of the shift. - Type *AndTy = AndCST->getType(); // Type of the and. - // We can fold this as long as we can't shift unknown bits - // into the mask. This can happen with signed shift - // rights, as they sign-extend. With logical shifts, - // we must still make sure the comparison is not signed - // because we are effectively changing the - // position of the sign bit (PR17827). - // TODO: We can relax these constraints a bit more. + // This seemingly simple opportunity to fold away a shift turns out to + // be rather complicated. See PR17827 + // ( http://llvm.org/bugs/show_bug.cgi?id=17827 ) for details. if (ShAmt) { bool CanFold = false; unsigned ShiftOpcode = Shift->getOpcode(); if (ShiftOpcode == Instruction::AShr) { - // To test for the bad case of the signed shr, see if any - // of the bits shifted in could be tested after the mask. - uint32_t TyBits = Ty->getPrimitiveSizeInBits(); - int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits); - - uint32_t BitWidth = AndTy->getPrimitiveSizeInBits(); - if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & - AndCST->getValue()) == 0) + // There may be some constraints that make this possible, + // but nothing simple has been discovered yet. + CanFold = false; + } else if (ShiftOpcode == Instruction::Shl) { + // For a left shift, we can fold if the comparison is not signed. + // We can also fold a signed comparison if the mask value and + // comparison value are not negative. These constraints may not be + // obvious, but we can prove that they are correct using an SMT + // solver. + if (!ICI.isSigned() || (!AndCst->isNegative() && !RHS->isNegative())) CanFold = true; - } else if (ShiftOpcode == Instruction::Shl || - ShiftOpcode == Instruction::LShr) { - CanFold = !ICI.isSigned(); + } else if (ShiftOpcode == Instruction::LShr) { + // For a logical right shift, we can fold if the comparison is not + // signed. We can also fold a signed comparison if the shifted mask + // value and the shifted comparison value are not negative. + // These constraints may not be obvious, but we can prove that they + // are correct using an SMT solver. + if (!ICI.isSigned()) + CanFold = true; + else { + ConstantInt *ShiftedAndCst = + cast(ConstantExpr::getShl(AndCst, ShAmt)); + ConstantInt *ShiftedRHSCst = + cast(ConstantExpr::getShl(RHS, ShAmt)); + + if (!ShiftedAndCst->isNegative() && !ShiftedRHSCst->isNegative()) + CanFold = true; + } } if (CanFold) { Constant *NewCst; - if (Shift->getOpcode() == Instruction::Shl) + if (ShiftOpcode == Instruction::Shl) NewCst = ConstantExpr::getLShr(RHS, ShAmt); else NewCst = ConstantExpr::getShl(RHS, ShAmt); // Check to see if we are shifting out any of the bits being // compared. - if (ConstantExpr::get(Shift->getOpcode(), - NewCst, ShAmt) != RHS) { + if (ConstantExpr::get(ShiftOpcode, NewCst, ShAmt) != RHS) { // If we shifted bits out, the fold is not going to work out. // As a special case, check to see if this means that the // result is always true or false now. @@ -1242,12 +1251,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, return ReplaceInstUsesWith(ICI, Builder->getTrue()); } else { ICI.setOperand(1, NewCst); - Constant *NewAndCST; - if (Shift->getOpcode() == Instruction::Shl) - NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt); + Constant *NewAndCst; + if (ShiftOpcode == Instruction::Shl) + NewAndCst = ConstantExpr::getLShr(AndCst, ShAmt); else - NewAndCST = ConstantExpr::getShl(AndCST, ShAmt); - LHSI->setOperand(1, NewAndCST); + NewAndCst = ConstantExpr::getShl(AndCst, ShAmt); + LHSI->setOperand(1, NewAndCst); LHSI->setOperand(0, Shift->getOperand(0)); Worklist.Add(Shift); // Shift is dead. return &ICI; @@ -1264,10 +1273,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // Compute C << Y. Value *NS; if (Shift->getOpcode() == Instruction::LShr) { - NS = Builder->CreateShl(AndCST, Shift->getOperand(1)); + NS = Builder->CreateShl(AndCst, Shift->getOperand(1)); } else { // Insert a logical shift. - NS = Builder->CreateLShr(AndCST, Shift->getOperand(1)); + NS = Builder->CreateLShr(AndCst, Shift->getOperand(1)); } // Compute X & (C << Y). @@ -1278,12 +1287,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, return &ICI; } - // Replace ((X & AndCST) > RHSV) with ((X & AndCST) != 0), if any - // bit set in (X & AndCST) will produce a result greater than RHSV. + // Replace ((X & AndCst) > RHSV) with ((X & AndCst) != 0), if any + // bit set in (X & AndCst) will produce a result greater than RHSV. if (ICI.getPredicate() == ICmpInst::ICMP_UGT) { - unsigned NTZ = AndCST->getValue().countTrailingZeros(); - if ((NTZ < AndCST->getBitWidth()) && - APInt::getOneBitSet(AndCST->getBitWidth(), NTZ).ugt(RHSV)) + unsigned NTZ = AndCst->getValue().countTrailingZeros(); + if ((NTZ < AndCst->getBitWidth()) && + APInt::getOneBitSet(AndCst->getBitWidth(), NTZ).ugt(RHSV)) return new ICmpInst(ICmpInst::ICMP_NE, LHSI, Constant::getNullValue(RHS->getType())); } diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 0a0727e0fa34..4c861b3fd095 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -263,9 +263,9 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) EraseInstFromFunction(*ToDelete[i]); Constant *TheSrc = cast(Copy->getSource()); - Instruction *NewI - = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc, - AI.getType())); + Constant *Cast + = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType()); + Instruction *NewI = ReplaceInstUsesWith(AI, Cast); EraseInstFromFunction(*Copy); ++NumGlobalCopies; return NewI; diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 27f1a3eb699a..191a101e0a30 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -699,7 +699,10 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); Value *InV = 0; - if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) + // Beware of ConstantExpr: it may eventually evaluate to getNullValue, + // even if currently isNullValue gives false. + Constant *InC = dyn_cast(PN->getIncomingValue(i)); + if (InC && !isa(InC)) InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; else InV = Builder->CreateSelect(PN->getIncomingValue(i), diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index d731ec5499ea..511c63b5eb96 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -41,8 +41,8 @@ #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" +#include "llvm/Transforms/Utils/ASanStackFrameLayout.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" @@ -93,11 +93,6 @@ static const char *const kAsanUnpoisonStackMemoryName = static const char *const kAsanOptionDetectUAR = "__asan_option_detect_stack_use_after_return"; -// These constants must match the definitions in the run-time library. -static const int kAsanStackLeftRedzoneMagic = 0xf1; -static const int kAsanStackMidRedzoneMagic = 0xf2; -static const int kAsanStackRightRedzoneMagic = 0xf3; -static const int kAsanStackPartialRedzoneMagic = 0xf4; #ifndef NDEBUG static const int kAsanStackAfterReturnMagic = 0xf5; #endif @@ -141,8 +136,9 @@ static cl::opt ClInitializers("asan-initialization-order", cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false)); static cl::opt ClMemIntrin("asan-memintrin", cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true)); -static cl::opt ClRealignStack("asan-realign-stack", - cl::desc("Realign stack to 32"), cl::Hidden, cl::init(true)); +static cl::opt ClRealignStack("asan-realign-stack", + cl::desc("Realign stack to the value of this flag (power of two)"), + cl::Hidden, cl::init(32)); static cl::opt ClBlacklistFile("asan-blacklist", cl::desc("File containing the list of objects to ignore " "during instrumentation"), cl::Hidden); @@ -376,7 +372,7 @@ class AddressSanitizerModule : public ModulePass { bool ShouldInstrumentGlobal(GlobalVariable *G); void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName); - size_t RedzoneSize() const { + size_t MinRedzoneSizeForGlobal() const { return RedzoneSizeForScale(Mapping.Scale); } @@ -416,7 +412,6 @@ struct FunctionStackPoisoner : public InstVisitor { SmallVector AllocaVec; SmallVector RetVec; - uint64_t TotalStackSize; unsigned StackAlignment; Function *AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1], @@ -440,7 +435,7 @@ struct FunctionStackPoisoner : public InstVisitor { : F(F), ASan(ASan), DIB(*F.getParent()), C(ASan.C), IntptrTy(ASan.IntptrTy), IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping), - TotalStackSize(0), StackAlignment(1 << Mapping.Scale) {} + StackAlignment(1 << Mapping.Scale) {} bool runOnFunction() { if (!ClStack) return false; @@ -479,8 +474,6 @@ struct FunctionStackPoisoner : public InstVisitor { StackAlignment = std::max(StackAlignment, AI.getAlignment()); AllocaVec.push_back(&AI); - uint64_t AlignedSize = getAlignedAllocaSize(&AI); - TotalStackSize += AlignedSize; } /// \brief Collect lifetime intrinsic calls to check for use-after-scope @@ -514,31 +507,20 @@ struct FunctionStackPoisoner : public InstVisitor { // Check if we want (and can) handle this alloca. bool isInterestingAlloca(AllocaInst &AI) const { - return (!AI.isArrayAllocation() && - AI.isStaticAlloca() && - AI.getAlignment() <= RedzoneSize() && - AI.getAllocatedType()->isSized()); + return (!AI.isArrayAllocation() && AI.isStaticAlloca() && + AI.getAllocatedType()->isSized() && + // alloca() may be called with 0 size, ignore it. + getAllocaSizeInBytes(&AI) > 0); } - size_t RedzoneSize() const { - return RedzoneSizeForScale(Mapping.Scale); - } uint64_t getAllocaSizeInBytes(AllocaInst *AI) const { Type *Ty = AI->getAllocatedType(); uint64_t SizeInBytes = ASan.TD->getTypeAllocSize(Ty); return SizeInBytes; } - uint64_t getAlignedSize(uint64_t SizeInBytes) const { - size_t RZ = RedzoneSize(); - return ((SizeInBytes + RZ - 1) / RZ) * RZ; - } - uint64_t getAlignedAllocaSize(AllocaInst *AI) const { - uint64_t SizeInBytes = getAllocaSizeInBytes(AI); - return getAlignedSize(SizeInBytes); - } /// Finds alloca where the value comes from. AllocaInst *findAllocaForValue(Value *V); - void poisonRedZones(const ArrayRef &AllocaVec, IRBuilder<> &IRB, + void poisonRedZones(const ArrayRef ShadowBytes, IRBuilder<> &IRB, Value *ShadowBase, bool DoPoison); void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> &IRB, bool DoPoison); @@ -576,12 +558,22 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { } // \brief Create a constant for Str so that we can pass it to the run-time lib. -static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) { +static GlobalVariable *createPrivateGlobalForString( + Module &M, StringRef Str, bool AllowMerging) { Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); - GlobalVariable *GV = new GlobalVariable(M, StrConst->getType(), true, - GlobalValue::InternalLinkage, StrConst, - kAsanGenPrefix); - GV->setUnnamedAddr(true); // Ok to merge these. + // For module-local strings that can be merged with another one we set the + // private linkage and the unnamed_addr attribute. + // Non-mergeable strings are made linker_private to remove them from the + // symbol table. "private" linkage doesn't work for Darwin, where the + // "L"-prefixed globals end up in __TEXT,__const section + // (see http://llvm.org/bugs/show_bug.cgi?id=17976 for more info). + GlobalValue::LinkageTypes linkage = + AllowMerging ? GlobalValue::PrivateLinkage + : GlobalValue::LinkerPrivateLinkage; + GlobalVariable *GV = + new GlobalVariable(M, StrConst->getType(), true, + linkage, StrConst, kAsanGenPrefix); + if (AllowMerging) GV->setUnnamedAddr(true); GV->setAlignment(1); // Strings may not be merged w/o setting align 1. return GV; } @@ -635,7 +627,7 @@ bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { Value *Cmp = IRB.CreateICmpNE(Length, Constant::getNullValue(Length->getType())); - InsertBefore = SplitBlockAndInsertIfThen(cast(Cmp), false); + InsertBefore = SplitBlockAndInsertIfThen(Cmp, InsertBefore, false); } instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true); @@ -798,7 +790,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) { TerminatorInst *CheckTerm = - SplitBlockAndInsertIfThen(cast(Cmp), false); + SplitBlockAndInsertIfThen(Cmp, InsertBefore, false); assert(dyn_cast(CheckTerm)->isUnconditional()); BasicBlock *NextBB = CheckTerm->getSuccessor(0); IRB.SetInsertPoint(CheckTerm); @@ -809,7 +801,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2); ReplaceInstWithInst(CheckTerm, NewTerm); } else { - CrashTerm = SplitBlockAndInsertIfThen(cast(Cmp), true); + CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, true); } Instruction *Crash = generateCrashCode( @@ -861,8 +853,8 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { // - Need to poison all copies, not just the main thread's one. if (G->isThreadLocal()) return false; - // For now, just ignore this Alloca if the alignment is large. - if (G->getAlignment() > RedzoneSize()) return false; + // For now, just ignore this Global if the alignment is large. + if (G->getAlignment() > MinRedzoneSizeForGlobal()) return false; // Ignore all the globals with the names starting with "\01L_OBJC_". // Many of those are put into the .cstring section. The linker compresses @@ -968,11 +960,10 @@ bool AddressSanitizerModule::runOnModule(Module &M) { bool HasDynamicallyInitializedGlobals = false; - GlobalVariable *ModuleName = createPrivateGlobalForString( - M, M.getModuleIdentifier()); // We shouldn't merge same module names, as this string serves as unique // module ID in runtime. - ModuleName->setUnnamedAddr(false); + GlobalVariable *ModuleName = createPrivateGlobalForString( + M, M.getModuleIdentifier(), /*AllowMerging*/false); for (size_t i = 0; i < n; i++) { static const uint64_t kMaxGlobalRedzone = 1 << 18; @@ -980,7 +971,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) { PointerType *PtrTy = cast(G->getType()); Type *Ty = PtrTy->getElementType(); uint64_t SizeInBytes = TD->getTypeAllocSize(Ty); - uint64_t MinRZ = RedzoneSize(); + uint64_t MinRZ = MinRedzoneSizeForGlobal(); // MinRZ <= RZ <= kMaxGlobalRedzone // and trying to make RZ to be ~ 1/4 of SizeInBytes. uint64_t RZ = std::max(MinRZ, @@ -1003,7 +994,8 @@ bool AddressSanitizerModule::runOnModule(Module &M) { NewTy, G->getInitializer(), Constant::getNullValue(RightRedZoneTy), NULL); - GlobalVariable *Name = createPrivateGlobalForString(M, G->getName()); + GlobalVariable *Name = + createPrivateGlobalForString(M, G->getName(), /*AllowMerging*/true); // Create a new global variable with enough space for a redzone. GlobalValue::LinkageTypes Linkage = G->getLinkage(); @@ -1185,7 +1177,19 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { // b) collect usage statistics to help improve Clang coverage design. bool AddressSanitizer::InjectCoverage(Function &F) { if (!ClCoverage) return false; - IRBuilder<> IRB(F.getEntryBlock().getFirstInsertionPt()); + + // Skip static allocas at the top of the entry block so they don't become + // dynamic when we split the block. If we used our optimized stack layout, + // then there will only be one alloca and it will come first. + BasicBlock &Entry = F.getEntryBlock(); + BasicBlock::iterator IP = Entry.getFirstInsertionPt(), BE = Entry.end(); + for (; IP != BE; ++IP) { + AllocaInst *AI = dyn_cast(IP); + if (!AI || !AI->isStaticAlloca()) + break; + } + + IRBuilder<> IRB(IP); Type *Int8Ty = IRB.getInt8Ty(); GlobalVariable *Guard = new GlobalVariable( *F.getParent(), Int8Ty, false, GlobalValue::PrivateLinkage, @@ -1194,7 +1198,7 @@ bool AddressSanitizer::InjectCoverage(Function &F) { Load->setAtomic(Monotonic); Load->setAlignment(1); Value *Cmp = IRB.CreateICmpEQ(Constant::getNullValue(Int8Ty), Load); - Instruction *Ins = SplitBlockAndInsertIfThen(cast(Cmp), false); + Instruction *Ins = SplitBlockAndInsertIfThen(Cmp, IP, false); IRB.SetInsertPoint(Ins); // We pass &F to __sanitizer_cov. We could avoid this and rely on // GET_CALLER_PC, but having the PC of the first instruction is just nice. @@ -1323,32 +1327,6 @@ bool AddressSanitizer::runOnFunction(Function &F) { return res; } -static uint64_t ValueForPoison(uint64_t PoisonByte, size_t ShadowRedzoneSize) { - if (ShadowRedzoneSize == 1) return PoisonByte; - if (ShadowRedzoneSize == 2) return (PoisonByte << 8) + PoisonByte; - if (ShadowRedzoneSize == 4) - return (PoisonByte << 24) + (PoisonByte << 16) + - (PoisonByte << 8) + (PoisonByte); - llvm_unreachable("ShadowRedzoneSize is either 1, 2 or 4"); -} - -static void PoisonShadowPartialRightRedzone(uint8_t *Shadow, - size_t Size, - size_t RZSize, - size_t ShadowGranularity, - uint8_t Magic) { - for (size_t i = 0; i < RZSize; - i+= ShadowGranularity, Shadow++) { - if (i + ShadowGranularity <= Size) { - *Shadow = 0; // fully addressable - } else if (i >= Size) { - *Shadow = Magic; // unaddressable - } else { - *Shadow = Size - i; // first Size-i bytes are addressable - } - } -} - // Workaround for bug 11395: we don't want to instrument stack in functions // with large assembly blobs (32-bit only), otherwise reg alloc may crash. // FIXME: remove once the bug 11395 is fixed. @@ -1378,65 +1356,31 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) { kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); } -void FunctionStackPoisoner::poisonRedZones( - const ArrayRef &AllocaVec, IRBuilder<> &IRB, Value *ShadowBase, - bool DoPoison) { - size_t ShadowRZSize = RedzoneSize() >> Mapping.Scale; - assert(ShadowRZSize >= 1 && ShadowRZSize <= 4); - Type *RZTy = Type::getIntNTy(*C, ShadowRZSize * 8); - Type *RZPtrTy = PointerType::get(RZTy, 0); - - Value *PoisonLeft = ConstantInt::get(RZTy, - ValueForPoison(DoPoison ? kAsanStackLeftRedzoneMagic : 0LL, ShadowRZSize)); - Value *PoisonMid = ConstantInt::get(RZTy, - ValueForPoison(DoPoison ? kAsanStackMidRedzoneMagic : 0LL, ShadowRZSize)); - Value *PoisonRight = ConstantInt::get(RZTy, - ValueForPoison(DoPoison ? kAsanStackRightRedzoneMagic : 0LL, ShadowRZSize)); - - // poison the first red zone. - IRB.CreateStore(PoisonLeft, IRB.CreateIntToPtr(ShadowBase, RZPtrTy)); - - // poison all other red zones. - uint64_t Pos = RedzoneSize(); - for (size_t i = 0, n = AllocaVec.size(); i < n; i++) { - AllocaInst *AI = AllocaVec[i]; - uint64_t SizeInBytes = getAllocaSizeInBytes(AI); - uint64_t AlignedSize = getAlignedAllocaSize(AI); - assert(AlignedSize - SizeInBytes < RedzoneSize()); - Value *Ptr = NULL; - - Pos += AlignedSize; - - assert(ShadowBase->getType() == IntptrTy); - if (SizeInBytes < AlignedSize) { - // Poison the partial redzone at right - Ptr = IRB.CreateAdd( - ShadowBase, ConstantInt::get(IntptrTy, - (Pos >> Mapping.Scale) - ShadowRZSize)); - size_t AddressableBytes = RedzoneSize() - (AlignedSize - SizeInBytes); - uint32_t Poison = 0; - if (DoPoison) { - PoisonShadowPartialRightRedzone((uint8_t*)&Poison, AddressableBytes, - RedzoneSize(), - 1ULL << Mapping.Scale, - kAsanStackPartialRedzoneMagic); - Poison = - ASan.TD->isLittleEndian() - ? support::endian::byte_swap(Poison) - : support::endian::byte_swap(Poison); +void +FunctionStackPoisoner::poisonRedZones(const ArrayRef ShadowBytes, + IRBuilder<> &IRB, Value *ShadowBase, + bool DoPoison) { + size_t n = ShadowBytes.size(); + size_t i = 0; + // We need to (un)poison n bytes of stack shadow. Poison as many as we can + // using 64-bit stores (if we are on 64-bit arch), then poison the rest + // with 32-bit stores, then with 16-byte stores, then with 8-byte stores. + for (size_t LargeStoreSizeInBytes = ASan.LongSize / 8; + LargeStoreSizeInBytes != 0; LargeStoreSizeInBytes /= 2) { + for (; i + LargeStoreSizeInBytes - 1 < n; i += LargeStoreSizeInBytes) { + uint64_t Val = 0; + for (size_t j = 0; j < LargeStoreSizeInBytes; j++) { + if (ASan.TD->isLittleEndian()) + Val |= (uint64_t)ShadowBytes[i + j] << (8 * j); + else + Val = (Val << 8) | ShadowBytes[i + j]; } - Value *PartialPoison = ConstantInt::get(RZTy, Poison); - IRB.CreateStore(PartialPoison, IRB.CreateIntToPtr(Ptr, RZPtrTy)); + if (!Val) continue; + Value *Ptr = IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i)); + Type *StoreTy = Type::getIntNTy(*C, LargeStoreSizeInBytes * 8); + Value *Poison = ConstantInt::get(StoreTy, DoPoison ? Val : 0); + IRB.CreateStore(Poison, IRB.CreateIntToPtr(Ptr, StoreTy->getPointerTo())); } - - // Poison the full redzone at right. - Ptr = IRB.CreateAdd(ShadowBase, - ConstantInt::get(IntptrTy, Pos >> Mapping.Scale)); - bool LastAlloca = (i == AllocaVec.size() - 1); - Value *Poison = LastAlloca ? PoisonRight : PoisonMid; - IRB.CreateStore(Poison, IRB.CreateIntToPtr(Ptr, RZPtrTy)); - - Pos += RedzoneSize(); } } @@ -1468,24 +1412,37 @@ void FunctionStackPoisoner::SetShadowToStackAfterReturnInlined( } void FunctionStackPoisoner::poisonStack() { - uint64_t LocalStackSize = TotalStackSize + - (AllocaVec.size() + 1) * RedzoneSize(); - - bool DoStackMalloc = ASan.CheckUseAfterReturn - && LocalStackSize <= kMaxStackMallocSize; int StackMallocIdx = -1; assert(AllocaVec.size() > 0); Instruction *InsBefore = AllocaVec[0]; IRBuilder<> IRB(InsBefore); + SmallVector SVD; + SVD.reserve(AllocaVec.size()); + for (size_t i = 0, n = AllocaVec.size(); i < n; i++) { + AllocaInst *AI = AllocaVec[i]; + ASanStackVariableDescription D = { AI->getName().data(), + getAllocaSizeInBytes(AI), + AI->getAlignment(), AI, 0}; + SVD.push_back(D); + } + // Minimal header size (left redzone) is 4 pointers, + // i.e. 32 bytes on 64-bit platforms and 16 bytes in 32-bit platforms. + size_t MinHeaderSize = ASan.LongSize / 2; + ASanStackFrameLayout L; + ComputeASanStackFrameLayout(SVD, 1UL << Mapping.Scale, MinHeaderSize, &L); + DEBUG(dbgs() << L.DescriptionString << " --- " << L.FrameSize << "\n"); + uint64_t LocalStackSize = L.FrameSize; + bool DoStackMalloc = + ASan.CheckUseAfterReturn && LocalStackSize <= kMaxStackMallocSize; Type *ByteArrayTy = ArrayType::get(IRB.getInt8Ty(), LocalStackSize); AllocaInst *MyAlloca = new AllocaInst(ByteArrayTy, "MyAlloca", InsBefore); - if (ClRealignStack && StackAlignment < RedzoneSize()) - StackAlignment = RedzoneSize(); - MyAlloca->setAlignment(StackAlignment); + assert((ClRealignStack & (ClRealignStack - 1)) == 0); + size_t FrameAlignment = std::max(L.FrameAlignment, (size_t)ClRealignStack); + MyAlloca->setAlignment(FrameAlignment); assert(MyAlloca->isStaticAlloca()); Value *OrigStackBase = IRB.CreatePointerCast(MyAlloca, IntptrTy); Value *LocalStackBase = OrigStackBase; @@ -1500,8 +1457,7 @@ void FunctionStackPoisoner::poisonStack() { kAsanOptionDetectUAR, IRB.getInt32Ty()); Value *Cmp = IRB.CreateICmpNE(IRB.CreateLoad(OptionDetectUAR), Constant::getNullValue(IRB.getInt32Ty())); - Instruction *Term = - SplitBlockAndInsertIfThen(cast(Cmp), false); + Instruction *Term = SplitBlockAndInsertIfThen(Cmp, InsBefore, false); BasicBlock *CmpBlock = cast(Cmp)->getParent(); IRBuilder<> IRBIf(Term); LocalStackBase = IRBIf.CreateCall2( @@ -1515,11 +1471,6 @@ void FunctionStackPoisoner::poisonStack() { LocalStackBase = Phi; } - // This string will be parsed by the run-time (DescribeAddressIfStack). - SmallString<2048> StackDescriptionStorage; - raw_svector_ostream StackDescription(StackDescriptionStorage); - StackDescription << AllocaVec.size() << " "; - // Insert poison calls for lifetime intrinsics for alloca. bool HavePoisonedAllocas = false; for (size_t i = 0, n = AllocaPoisonCallVec.size(); i < n; i++) { @@ -1531,24 +1482,16 @@ void FunctionStackPoisoner::poisonStack() { HavePoisonedAllocas |= APC.DoPoison; } - uint64_t Pos = RedzoneSize(); // Replace Alloca instructions with base+offset. - for (size_t i = 0, n = AllocaVec.size(); i < n; i++) { - AllocaInst *AI = AllocaVec[i]; - uint64_t SizeInBytes = getAllocaSizeInBytes(AI); - StringRef Name = AI->getName(); - StackDescription << Pos << " " << SizeInBytes << " " - << Name.size() << " " << Name << " "; - uint64_t AlignedSize = getAlignedAllocaSize(AI); - assert((AlignedSize % RedzoneSize()) == 0); + for (size_t i = 0, n = SVD.size(); i < n; i++) { + AllocaInst *AI = SVD[i].AI; Value *NewAllocaPtr = IRB.CreateIntToPtr( - IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Pos)), - AI->getType()); + IRB.CreateAdd(LocalStackBase, + ConstantInt::get(IntptrTy, SVD[i].Offset)), + AI->getType()); replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB); AI->replaceAllUsesWith(NewAllocaPtr); - Pos += AlignedSize + RedzoneSize(); } - assert(Pos == LocalStackSize); // The left-most redzone has enough space for at least 4 pointers. // Write the Magic value to redzone[0]. @@ -1560,7 +1503,8 @@ void FunctionStackPoisoner::poisonStack() { IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, ASan.LongSize/8)), IntptrPtrTy); GlobalVariable *StackDescriptionGlobal = - createPrivateGlobalForString(*F.getParent(), StackDescription.str()); + createPrivateGlobalForString(*F.getParent(), L.DescriptionString, + /*AllowMerging*/true); Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy); IRB.CreateStore(Description, BasePlus1); @@ -1573,30 +1517,33 @@ void FunctionStackPoisoner::poisonStack() { // Poison the stack redzones at the entry. Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB); - poisonRedZones(AllocaVec, IRB, ShadowBase, true); + poisonRedZones(L.ShadowBytes, IRB, ShadowBase, true); - // Unpoison the stack before all ret instructions. + // (Un)poison the stack before all ret instructions. for (size_t i = 0, n = RetVec.size(); i < n; i++) { Instruction *Ret = RetVec[i]; IRBuilder<> IRBRet(Ret); // Mark the current frame as retired. IRBRet.CreateStore(ConstantInt::get(IntptrTy, kRetiredStackFrameMagic), BasePlus0); - // Unpoison the stack. - poisonRedZones(AllocaVec, IRBRet, ShadowBase, false); if (DoStackMalloc) { assert(StackMallocIdx >= 0); - // In use-after-return mode, mark the whole stack frame unaddressable. + // if LocalStackBase != OrigStackBase: + // // In use-after-return mode, poison the whole stack frame. + // if StackMallocIdx <= 4 + // // For small sizes inline the whole thing: + // memset(ShadowBase, kAsanStackAfterReturnMagic, ShadowSize); + // **SavedFlagPtr(LocalStackBase) = 0 + // else + // __asan_stack_free_N(LocalStackBase, OrigStackBase) + // else + // + Value *Cmp = IRBRet.CreateICmpNE(LocalStackBase, OrigStackBase); + TerminatorInst *ThenTerm, *ElseTerm; + SplitBlockAndInsertIfThenElse(Cmp, Ret, &ThenTerm, &ElseTerm); + + IRBuilder<> IRBPoison(ThenTerm); if (StackMallocIdx <= 4) { - // For small sizes inline the whole thing: - // if LocalStackBase != OrigStackBase: - // memset(ShadowBase, kAsanStackAfterReturnMagic, ShadowSize); - // **SavedFlagPtr(LocalStackBase) = 0 - // FIXME: if LocalStackBase != OrigStackBase don't call poisonRedZones. - Value *Cmp = IRBRet.CreateICmpNE(LocalStackBase, OrigStackBase); - TerminatorInst *PoisonTerm = - SplitBlockAndInsertIfThen(cast(Cmp), false); - IRBuilder<> IRBPoison(PoisonTerm); int ClassSize = kMinStackMallocSize << StackMallocIdx; SetShadowToStackAfterReturnInlined(IRBPoison, ShadowBase, ClassSize >> Mapping.Scale); @@ -1610,15 +1557,20 @@ void FunctionStackPoisoner::poisonStack() { IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getInt8PtrTy())); } else { // For larger frames call __asan_stack_free_*. - IRBRet.CreateCall3(AsanStackFreeFunc[StackMallocIdx], LocalStackBase, - ConstantInt::get(IntptrTy, LocalStackSize), - OrigStackBase); + IRBPoison.CreateCall3(AsanStackFreeFunc[StackMallocIdx], LocalStackBase, + ConstantInt::get(IntptrTy, LocalStackSize), + OrigStackBase); } + + IRBuilder<> IRBElse(ElseTerm); + poisonRedZones(L.ShadowBytes, IRBElse, ShadowBase, false); } else if (HavePoisonedAllocas) { // If we poisoned some allocas in llvm.lifetime analysis, // unpoison whole stack frame now. assert(LocalStackBase == OrigStackBase); poisonAlloca(LocalStackBase, LocalStackSize, IRBRet, false); + } else { + poisonRedZones(L.ShadowBytes, IRBRet, ShadowBase, false); } } diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 9b9e725cde81..bc089ec09390 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -96,6 +96,22 @@ static cl::opt ClArgsABI( cl::desc("Use the argument ABI rather than the TLS ABI"), cl::Hidden); +// Controls whether the pass includes or ignores the labels of pointers in load +// instructions. +static cl::opt ClCombinePointerLabelsOnLoad( + "dfsan-combine-pointer-labels-on-load", + cl::desc("Combine the label of the pointer with the label of the data when " + "loading from memory."), + cl::Hidden, cl::init(true)); + +// Controls whether the pass includes or ignores the labels of pointers in +// stores instructions. +static cl::opt ClCombinePointerLabelsOnStore( + "dfsan-combine-pointer-labels-on-store", + cl::desc("Combine the label of the pointer with the label of the data when " + "storing in memory."), + cl::Hidden, cl::init(false)); + static cl::opt ClDebugNonzeroLabels( "dfsan-debug-nonzero-labels", cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, " @@ -505,6 +521,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { DFSanUnionLoadFn = Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy); if (Function *F = dyn_cast(DFSanUnionLoadFn)) { + F->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadOnly); F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); } DFSanUnimplementedFn = @@ -718,10 +735,9 @@ bool DataFlowSanitizer::runOnModule(Module &M) { while (isa(Pos) || isa(Pos)) Pos = Pos->getNextNode(); IRBuilder<> IRB(Pos); - Instruction *NeInst = cast( - IRB.CreateICmpNE(*i, DFSF.DFS.ZeroShadow)); + Value *Ne = IRB.CreateICmpNE(*i, DFSF.DFS.ZeroShadow); BranchInst *BI = cast(SplitBlockAndInsertIfThen( - NeInst, /*Unreachable=*/ false, ColdCallWeights)); + Ne, Pos, /*Unreachable=*/false, ColdCallWeights)); IRBuilder<> ThenIRB(BI); ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn); } @@ -821,26 +837,20 @@ Value *DataFlowSanitizer::combineShadows(Value *V1, Value *V2, IRBuilder<> IRB(Pos); BasicBlock *Head = Pos->getParent(); Value *Ne = IRB.CreateICmpNE(V1, V2); - Instruction *NeInst = dyn_cast(Ne); - if (NeInst) { - BranchInst *BI = cast(SplitBlockAndInsertIfThen( - NeInst, /*Unreachable=*/ false, ColdCallWeights)); - IRBuilder<> ThenIRB(BI); - CallInst *Call = ThenIRB.CreateCall2(DFSanUnionFn, V1, V2); - Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); - Call->addAttribute(1, Attribute::ZExt); - Call->addAttribute(2, Attribute::ZExt); + BranchInst *BI = cast(SplitBlockAndInsertIfThen( + Ne, Pos, /*Unreachable=*/false, ColdCallWeights)); + IRBuilder<> ThenIRB(BI); + CallInst *Call = ThenIRB.CreateCall2(DFSanUnionFn, V1, V2); + Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); + Call->addAttribute(1, Attribute::ZExt); + Call->addAttribute(2, Attribute::ZExt); - BasicBlock *Tail = BI->getSuccessor(0); - PHINode *Phi = PHINode::Create(ShadowTy, 2, "", Tail->begin()); - Phi->addIncoming(Call, Call->getParent()); - Phi->addIncoming(V1, Head); - Pos = Phi; - return Phi; - } else { - assert(0 && "todo"); - return 0; - } + BasicBlock *Tail = BI->getSuccessor(0); + PHINode *Phi = PHINode::Create(ShadowTy, 2, "", Tail->begin()); + Phi->addIncoming(Call, Call->getParent()); + Phi->addIncoming(V1, Head); + Pos = Phi; + return Phi; } // A convenience function which folds the shadows of each of the operands @@ -978,14 +988,15 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) { Align = 1; } IRBuilder<> IRB(&LI); - Value *LoadedShadow = - DFSF.loadShadow(LI.getPointerOperand(), Size, Align, &LI); - Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand()); - Value *CombinedShadow = DFSF.DFS.combineShadows(LoadedShadow, PtrShadow, &LI); - if (CombinedShadow != DFSF.DFS.ZeroShadow) - DFSF.NonZeroChecks.insert(CombinedShadow); + Value *Shadow = DFSF.loadShadow(LI.getPointerOperand(), Size, Align, &LI); + if (ClCombinePointerLabelsOnLoad) { + Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand()); + Shadow = DFSF.DFS.combineShadows(Shadow, PtrShadow, &LI); + } + if (Shadow != DFSF.DFS.ZeroShadow) + DFSF.NonZeroChecks.insert(Shadow); - DFSF.setShadow(&LI, CombinedShadow); + DFSF.setShadow(&LI, Shadow); } void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align, @@ -1050,8 +1061,13 @@ void DFSanVisitor::visitStoreInst(StoreInst &SI) { } else { Align = 1; } - DFSF.storeShadow(SI.getPointerOperand(), Size, Align, - DFSF.getShadow(SI.getValueOperand()), &SI); + + Value* Shadow = DFSF.getShadow(SI.getValueOperand()); + if (ClCombinePointerLabelsOnStore) { + Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand()); + Shadow = DFSF.DFS.combineShadows(Shadow, PtrShadow, &SI); + } + DFSF.storeShadow(SI.getPointerOperand(), Size, Align, Shadow, &SI); } void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) { diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 206bffbb274e..8d5a83ca23af 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -18,6 +18,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Hashing.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" @@ -62,32 +63,35 @@ GCOVOptions GCOVOptions::getDefault() { } namespace { + class GCOVFunction; + class GCOVProfiler : public ModulePass { public: static char ID; GCOVProfiler() : ModulePass(ID), Options(GCOVOptions::getDefault()) { - ReversedVersion[0] = Options.Version[3]; - ReversedVersion[1] = Options.Version[2]; - ReversedVersion[2] = Options.Version[1]; - ReversedVersion[3] = Options.Version[0]; - ReversedVersion[4] = '\0'; - initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); + init(); } GCOVProfiler(const GCOVOptions &Options) : ModulePass(ID), Options(Options){ assert((Options.EmitNotes || Options.EmitData) && "GCOVProfiler asked to do nothing?"); - ReversedVersion[0] = Options.Version[3]; - ReversedVersion[1] = Options.Version[2]; - ReversedVersion[2] = Options.Version[1]; - ReversedVersion[3] = Options.Version[0]; - ReversedVersion[4] = '\0'; - initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); + init(); + } + ~GCOVProfiler() { + DeleteContainerPointers(Funcs); } virtual const char *getPassName() const { return "GCOV Profiler"; } private: + void init() { + ReversedVersion[0] = Options.Version[3]; + ReversedVersion[1] = Options.Version[2]; + ReversedVersion[2] = Options.Version[1]; + ReversedVersion[3] = Options.Version[0]; + ReversedVersion[4] = '\0'; + initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); + } bool runOnModule(Module &M); // Create the .gcno files for the Module based on DebugInfo. @@ -131,9 +135,12 @@ namespace { // Reversed, NUL-terminated copy of Options.Version. char ReversedVersion[5]; + // Checksum, produced by hash of EdgeDestinations + SmallVector FileChecksums; Module *M; LLVMContext *Ctx; + SmallVector Funcs; }; } @@ -145,7 +152,7 @@ ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) { return new GCOVProfiler(Options); } -static std::string getFunctionName(DISubprogram SP) { +static StringRef getFunctionName(DISubprogram SP) { if (!SP.getLinkageName().empty()) return SP.getLinkageName(); return SP.getName(); @@ -302,30 +309,23 @@ namespace { class GCOVFunction : public GCOVRecord { public: GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident, - bool UseCfgChecksum) { + bool UseCfgChecksum) : + SP(SP), Ident(Ident), UseCfgChecksum(UseCfgChecksum), CfgChecksum(0) { this->os = os; Function *F = SP.getFunction(); - DEBUG(dbgs() << "Function: " << F->getName() << "\n"); + DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n"); uint32_t i = 0; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { Blocks[BB] = new GCOVBlock(i++, os); } ReturnBlock = new GCOVBlock(i++, os); - writeBytes(FunctionTag, 4); - uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) + - 1 + lengthOfGCOVString(SP.getFilename()) + 1; - if (UseCfgChecksum) - ++BlockLen; - write(BlockLen); - write(Ident); - write(0); // lineno checksum - if (UseCfgChecksum) - write(0); // cfg checksum - writeGCOVString(getFunctionName(SP)); - writeGCOVString(SP.getFilename()); - write(SP.getLineNumber()); + std::string FunctionNameAndLine; + raw_string_ostream FNLOS(FunctionNameAndLine); + FNLOS << getFunctionName(SP) << SP.getLineNumber(); + FNLOS.flush(); + FuncChecksum = hash_value(FunctionNameAndLine); } ~GCOVFunction() { @@ -341,7 +341,41 @@ namespace { return *ReturnBlock; } + std::string getEdgeDestinations() { + std::string EdgeDestinations; + raw_string_ostream EDOS(EdgeDestinations); + Function *F = Blocks.begin()->first->getParent(); + for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { + GCOVBlock &Block = *Blocks[I]; + for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) + EDOS << Block.OutEdges[i]->Number; + } + return EdgeDestinations; + } + + uint32_t getFuncChecksum() { + return FuncChecksum; + } + + void setCfgChecksum(uint32_t Checksum) { + CfgChecksum = Checksum; + } + void writeOut() { + writeBytes(FunctionTag, 4); + uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) + + 1 + lengthOfGCOVString(SP.getFilename()) + 1; + if (UseCfgChecksum) + ++BlockLen; + write(BlockLen); + write(Ident); + write(FuncChecksum); + if (UseCfgChecksum) + write(CfgChecksum); + writeGCOVString(getFunctionName(SP)); + writeGCOVString(SP.getFilename()); + write(SP.getLineNumber()); + // Emit count of blocks. writeBytes(BlockTag, 4); write(Blocks.size() + 1); @@ -375,6 +409,11 @@ namespace { } private: + DISubprogram SP; + uint32_t Ident; + uint32_t FuncChecksum; + bool UseCfgChecksum; + uint32_t CfgChecksum; DenseMap Blocks; GCOVBlock *ReturnBlock; }; @@ -427,9 +466,7 @@ void GCOVProfiler::emitProfileNotes() { std::string ErrorInfo; raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo, sys::fs::F_Binary); - out.write("oncg", 4); - out.write(ReversedVersion, 4); - out.write("MVLL", 4); + std::string EdgeDestinations; DIArray SPs = CU.getSubprograms(); for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { @@ -441,17 +478,22 @@ void GCOVProfiler::emitProfileNotes() { Function *F = SP.getFunction(); if (!F) continue; - GCOVFunction Func(SP, &out, i, Options.UseCfgChecksum); + BasicBlock &EntryBlock = F->getEntryBlock(); + EntryBlock.splitBasicBlock(EntryBlock.begin()); + + GCOVFunction *Func = + new GCOVFunction(SP, &out, i, Options.UseCfgChecksum); + Funcs.push_back(Func); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - GCOVBlock &Block = Func.getBlock(BB); + GCOVBlock &Block = Func->getBlock(BB); TerminatorInst *TI = BB->getTerminator(); if (int successors = TI->getNumSuccessors()) { for (int i = 0; i != successors; ++i) { - Block.addEdge(Func.getBlock(TI->getSuccessor(i))); + Block.addEdge(Func->getBlock(TI->getSuccessor(i))); } } else if (isa(TI)) { - Block.addEdge(Func.getReturnBlock()); + Block.addEdge(Func->getReturnBlock()); } uint32_t Line = 0; @@ -467,8 +509,21 @@ void GCOVProfiler::emitProfileNotes() { Lines.addLine(Loc.getLine()); } } - Func.writeOut(); + EdgeDestinations += Func->getEdgeDestinations(); } + + FileChecksums.push_back(hash_value(EdgeDestinations)); + out.write("oncg", 4); + out.write(ReversedVersion, 4); + out.write(reinterpret_cast(&FileChecksums.back()), 4); + + for (SmallVectorImpl::iterator I = Funcs.begin(), + E = Funcs.end(); I != E; ++I) { + GCOVFunction *Func = *I; + Func->setCfgChecksum(FileChecksums.back()); + Func->writeOut(); + } + out.write("\0\0\0\0\0\0\0\0", 8); // EOF out.close(); } @@ -666,6 +721,7 @@ Constant *GCOVProfiler::getStartFileFunc() { Type *Args[] = { Type::getInt8PtrTy(*Ctx), // const char *orig_filename Type::getInt8PtrTy(*Ctx), // const char version[4] + Type::getInt32Ty(*Ctx), // uint32_t checksum }; FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); return M->getOrInsertFunction("llvm_gcda_start_file", FTy); @@ -683,10 +739,12 @@ Constant *GCOVProfiler::getIncrementIndirectCounterFunc() { } Constant *GCOVProfiler::getEmitFunctionFunc() { - Type *Args[3] = { + Type *Args[] = { Type::getInt32Ty(*Ctx), // uint32_t ident Type::getInt8PtrTy(*Ctx), // const char *function_name + Type::getInt32Ty(*Ctx), // uint32_t func_checksum Type::getInt8Ty(*Ctx), // uint8_t use_extra_checksum + Type::getInt32Ty(*Ctx), // uint32_t cfg_checksum }; FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); return M->getOrInsertFunction("llvm_gcda_emit_function", FTy); @@ -760,17 +818,22 @@ Function *GCOVProfiler::insertCounterWriteout( for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { DICompileUnit CU(CU_Nodes->getOperand(i)); std::string FilenameGcda = mangleName(CU, "gcda"); - Builder.CreateCall2(StartFile, + uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i]; + Builder.CreateCall3(StartFile, Builder.CreateGlobalStringPtr(FilenameGcda), - Builder.CreateGlobalStringPtr(ReversedVersion)); + Builder.CreateGlobalStringPtr(ReversedVersion), + Builder.getInt32(CfgChecksum)); for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) { DISubprogram SP(CountersBySP[j].second); - Builder.CreateCall3( + uint32_t FuncChecksum = Funcs.empty() ? 0 : Funcs[j]->getFuncChecksum(); + Builder.CreateCall5( EmitFunction, Builder.getInt32(j), Options.FunctionNamesInData ? Builder.CreateGlobalStringPtr(getFunctionName(SP)) : Constant::getNullValue(Builder.getInt8PtrTy()), - Builder.getInt8(Options.UseCfgChecksum)); + Builder.getInt32(FuncChecksum), + Builder.getInt8(Options.UseCfgChecksum), + Builder.getInt32(CfgChecksum)); GlobalVariable *GV = CountersBySP[j].first; unsigned Arcs = diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/LLVMBuild.txt index d36ad540ee80..99e95dfa375a 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Instrumentation parent = Transforms -required_libraries = Analysis Core Support TransformUtils +required_libraries = Analysis Core Support Target TransformUtils diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index d547adc86e1b..8a52a4444be2 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -565,8 +565,7 @@ struct MemorySanitizerVisitor : public InstVisitor { Value *Cmp = IRB.CreateICmpNE(ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp"); Instruction *CheckTerm = - SplitBlockAndInsertIfThen(cast(Cmp), false, - MS.OriginStoreWeights); + SplitBlockAndInsertIfThen(Cmp, &I, false, MS.OriginStoreWeights); IRBuilder<> IRBNew(CheckTerm); IRBNew.CreateAlignedStore(getOrigin(Val), getOriginPtr(Addr, IRBNew), Alignment); @@ -588,10 +587,9 @@ struct MemorySanitizerVisitor : public InstVisitor { continue; Value *Cmp = IRB.CreateICmpNE(ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp"); - Instruction *CheckTerm = - SplitBlockAndInsertIfThen(cast(Cmp), - /* Unreachable */ !ClKeepGoing, - MS.ColdCallWeights); + Instruction *CheckTerm = SplitBlockAndInsertIfThen( + Cmp, OrigIns, + /* Unreachable */ !ClKeepGoing, MS.ColdCallWeights); IRB.SetInsertPoint(CheckTerm); if (MS.TrackOrigins) { @@ -629,7 +627,7 @@ struct MemorySanitizerVisitor : public InstVisitor { IRB.CreatePHI(Fn0->getType(), 2, "msandr.indirect_target"); Instruction *CheckTerm = SplitBlockAndInsertIfThen( - cast(NotInThisModule), + NotInThisModule, NewFnPhi, /* Unreachable */ false, MS.ColdCallWeights); IRB.SetInsertPoint(CheckTerm); @@ -2082,13 +2080,20 @@ struct MemorySanitizerVisitor : public InstVisitor { // Origins are always i32, so any vector conditions must be flattened. // FIXME: consider tracking vector origins for app vectors? Value *Cond = I.getCondition(); + Value *CondShadow = getShadow(Cond); if (Cond->getType()->isVectorTy()) { - Value *ConvertedShadow = convertToShadowTyNoVec(Cond, IRB); - Cond = IRB.CreateICmpNE(ConvertedShadow, - getCleanShadow(ConvertedShadow), "_mso_select"); + Type *FlatTy = getShadowTyNoVec(Cond->getType()); + Cond = IRB.CreateICmpNE(IRB.CreateBitCast(Cond, FlatTy), + ConstantInt::getNullValue(FlatTy)); + CondShadow = IRB.CreateICmpNE(IRB.CreateBitCast(CondShadow, FlatTy), + ConstantInt::getNullValue(FlatTy)); } - setOrigin(&I, IRB.CreateSelect(Cond, - getOrigin(I.getTrueValue()), getOrigin(I.getFalseValue()))); + // a = select b, c, d + // Oa = Sb ? Ob : (b ? Oc : Od) + setOrigin(&I, IRB.CreateSelect( + CondShadow, getOrigin(I.getCondition()), + IRB.CreateSelect(Cond, getOrigin(I.getTrueValue()), + getOrigin(I.getFalseValue())))); } } diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 89fb746a5c47..5c1881782008 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -402,8 +402,13 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) { if (IsWrite && isVtableAccess(I)) { DEBUG(dbgs() << " VPTR : " << *I << "\n"); Value *StoredValue = cast(I)->getValueOperand(); - // StoredValue does not necessary have a pointer type. - if (isa(StoredValue->getType())) + // StoredValue may be a vector type if we are storing several vptrs at once. + // In this case, just take the first element of the vector since this is + // enough to find vptr races. + if (isa(StoredValue->getType())) + StoredValue = IRB.CreateExtractElement( + StoredValue, ConstantInt::get(IRB.getInt32Ty(), 0)); + if (StoredValue->getType()->isIntegerTy()) StoredValue = IRB.CreateIntToPtr(StoredValue, IRB.getInt8PtrTy()); // Call TsanVptrUpdate. IRB.CreateCall2(TsanVptrUpdate, diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 2976df6b9de1..f8b6f15850f8 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -538,8 +538,7 @@ namespace { void PtrState::Merge(const PtrState &Other, bool TopDown) { - Seq = MergeSeqs(static_cast(Seq), static_cast(Other.Seq), - TopDown); + Seq = MergeSeqs(GetSeq(), Other.GetSeq(), TopDown); KnownPositiveRefCount &= Other.KnownPositiveRefCount; // If we're not in a sequence (anymore), drop all associated state. @@ -1006,7 +1005,7 @@ static void GenerateARCAnnotation(unsigned InstMDId, // llvm-arc-annotation-processor tool to cross reference where the source // pointer is in the LLVM IR since the LLVM IR parser does not submit such // information via debug info for backends to use (since why would anyone - // need such a thing from LLVM IR besides in non standard cases + // need such a thing from LLVM IR besides in non-standard cases // [i.e. this]). MDString *SourcePtrMDNode = AppendMDNodeToSourcePtr(PtrMDId, Ptr); diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/CMakeLists.txt index 626c810a50fb..0b2928677c30 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_library(LLVMScalarOpts CorrelatedValuePropagation.cpp DCE.cpp DeadStoreElimination.cpp + Scalarizer.cpp EarlyCSE.cpp GlobalMerge.cpp GVN.cpp diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp index 007e9b79e20a..79d92ce5a534 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -1916,7 +1916,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { } if (CmpInst *CI = dyn_cast(I)) - return OptimizeCmpExpression(CI); + if (!TLI || !TLI->hasMultipleConditionRegisters()) + return OptimizeCmpExpression(CI); if (LoadInst *LI = dyn_cast(I)) { if (TLI) diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 5266894bc34c..8a363286c06c 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -26,7 +26,7 @@ #include "llvm/Support/RecyclingAllocator.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" -#include +#include using namespace llvm; STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd"); @@ -552,7 +552,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { bool EarlyCSE::runOnFunction(Function &F) { - std::deque nodesToProcess; + std::vector nodesToProcess; TD = getAnalysisIfAvailable(); TLI = &getAnalysis(); @@ -570,7 +570,7 @@ bool EarlyCSE::runOnFunction(Function &F) { bool Changed = false; // Process the root node. - nodesToProcess.push_front( + nodesToProcess.push_back( new StackNode(AvailableValues, AvailableLoads, AvailableCalls, CurrentGeneration, DT->getRootNode(), DT->getRootNode()->begin(), @@ -583,7 +583,7 @@ bool EarlyCSE::runOnFunction(Function &F) { while (!nodesToProcess.empty()) { // Grab the first item off the stack. Set the current generation, remove // the node from the stack, and process it. - StackNode *NodeToProcess = nodesToProcess.front(); + StackNode *NodeToProcess = nodesToProcess.back(); // Initialize class members. CurrentGeneration = NodeToProcess->currentGeneration(); @@ -597,7 +597,7 @@ bool EarlyCSE::runOnFunction(Function &F) { } else if (NodeToProcess->childIter() != NodeToProcess->end()) { // Push the next child onto the stack. DomTreeNode *child = NodeToProcess->nextChild(); - nodesToProcess.push_front( + nodesToProcess.push_back( new StackNode(AvailableValues, AvailableLoads, AvailableCalls, @@ -607,7 +607,7 @@ bool EarlyCSE::runOnFunction(Function &F) { // It has been processed, and there are no more children to process, // so delete it and pop it off the stack. delete NodeToProcess; - nodesToProcess.pop_front(); + nodesToProcess.pop_back(); } } // while (!nodes...) diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/GVN.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/GVN.cpp index 6af269dfed32..d49f3d09d81a 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1789,7 +1789,7 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) { ReplInst->setMetadata(Kind, MDNode::getMostGenericRange(IMD, ReplMD)); break; case LLVMContext::MD_prof: - llvm_unreachable("MD_prof in a non terminator instruction"); + llvm_unreachable("MD_prof in a non-terminator instruction"); break; case LLVMContext::MD_fpmath: ReplInst->setMetadata(Kind, MDNode::getMostGenericFPMath(IMD, ReplMD)); diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 235aaaa6f801..6e141bd2d6ac 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -63,6 +63,9 @@ static cl::opt VerifyIndvars( "verify-indvars", cl::Hidden, cl::desc("Verify the ScalarEvolution result after running indvars")); +static cl::opt ReduceLiveIVs("liv-reduce", cl::Hidden, + cl::desc("Reduce live induction variables.")); + namespace { class IndVarSimplify : public LoopPass { LoopInfo *LI; @@ -634,27 +637,13 @@ namespace { WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {} }; - - class WideIVVisitor : public IVVisitor { - ScalarEvolution *SE; - const DataLayout *TD; - - public: - WideIVInfo WI; - - WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV, - const DataLayout *TData) : - SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; } - - // Implement the interface used by simplifyUsersOfIV. - virtual void visitCast(CastInst *Cast); - }; } /// visitCast - Update information about the induction variable that is /// extended by this sign or zero extend operation. This is used to determine /// the final width of the IV before actually widening it. -void WideIVVisitor::visitCast(CastInst *Cast) { +static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, + const DataLayout *TD) { bool IsSigned = Cast->getOpcode() == Instruction::SExt; if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) return; @@ -891,15 +880,24 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { return AddRec; } +/// This IV user cannot be widen. Replace this use of the original narrow IV +/// with a truncation of the new wide IV to isolate and eliminate the narrow IV. +static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) { + IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT)); + Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); + DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); +} + /// WidenIVUse - Determine whether an individual user of the narrow IV can be /// widened. If so, return the wide clone of the user. Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // Stop traversing the def-use chain at inner-loop phis or post-loop phis. if (isa(DU.NarrowUse) && - LI->getLoopFor(DU.NarrowUse->getParent()) != L) + LI->getLoopFor(DU.NarrowUse->getParent()) != L) { + truncateIVUse(DU, DT); return 0; - + } // Our raison d'etre! Eliminate sign and zero extension. if (IsSigned ? isa(DU.NarrowUse) : isa(DU.NarrowUse)) { Value *NewDef = DU.WideDef; @@ -947,9 +945,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // This user does not evaluate to a recurence after widening, so don't // follow it. Instead insert a Trunc to kill off the original use, // eventually isolating the original narrow IV so it can be removed. - IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT)); - Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); - DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); + truncateIVUse(DU, DT); return 0; } // Assume block terminators cannot evaluate to a recurrence. We can't to @@ -1079,10 +1075,37 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { return WidePhi; } +//===----------------------------------------------------------------------===// +// Live IV Reduction - Minimize IVs live across the loop. +//===----------------------------------------------------------------------===// + + //===----------------------------------------------------------------------===// // Simplification of IV users based on SCEV evaluation. //===----------------------------------------------------------------------===// +namespace { + class IndVarSimplifyVisitor : public IVVisitor { + ScalarEvolution *SE; + const DataLayout *TD; + PHINode *IVPhi; + + public: + WideIVInfo WI; + + IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, + const DataLayout *TData, const DominatorTree *DTree): + SE(SCEV), TD(TData), IVPhi(IV) { + DT = DTree; + WI.NarrowIV = IVPhi; + if (ReduceLiveIVs) + setSplitOverflowIntrinsics(); + } + + // Implement the interface used by simplifyUsersOfIV. + virtual void visitCast(CastInst *Cast) { visitIVCast(Cast, WI, SE, TD); } + }; +} /// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV /// users. Each successive simplification may push more users which may @@ -1114,12 +1137,12 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L, PHINode *CurrIV = LoopPhis.pop_back_val(); // Information about sign/zero extensions of CurrIV. - WideIVVisitor WIV(CurrIV, SE, TD); + IndVarSimplifyVisitor Visitor(CurrIV, SE, TD, DT); - Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV); + Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &Visitor); - if (WIV.WI.WidestNativeType) { - WideIVs.push_back(WIV.WI); + if (Visitor.WI.WidestNativeType) { + WideIVs.push_back(Visitor.WI); } } while(!LoopPhis.empty()); diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/LLVMBuild.txt index cee911976804..1f6df7dac7ff 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/LLVMBuild.txt @@ -20,4 +20,4 @@ type = Library name = Scalar parent = Transforms library_name = ScalarOpts -required_libraries = Analysis Core InstCombine Support Target TransformUtils +required_libraries = Analysis Core IPA InstCombine Support Target TransformUtils diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 952b76b822cf..39ac14e326a3 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -45,6 +45,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp index 335af81b957a..7186e8b6fda8 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" @@ -1088,9 +1089,8 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, L, SCEV::FlagAnyWrap)); { // Limit the lifetime of SCEVExpander. SCEVExpander Expander(*SE, "reroll"); - PHINode *NewIV = - cast(Expander.expandCodeFor(H, IV->getType(), - Header->begin())); + Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin()); + for (DenseSet::iterator J = BaseUseSet.begin(), JE = BaseUseSet.end(); J != JE; ++J) (*J)->replaceUsesOfWith(IV, NewIV); @@ -1101,20 +1101,23 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, if (Inc == 1) ICSCEV = SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale)); - Value *IC; - if (isa(ICSCEV)) { - IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(), BI); + // Iteration count SCEV minus 1 + const SCEV *ICMinus1SCEV = + SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1)); + + Value *ICMinus1; // Iteration count minus 1 + if (isa(ICMinus1SCEV)) { + ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI); } else { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) Preheader = InsertPreheaderForLoop(L, this); - IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(), - Preheader->getTerminator()); + ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), + Preheader->getTerminator()); } - Value *NewIVNext = NewIV->getIncomingValueForBlock(Header); - Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIVNext, IC, + Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1, "exitcond"); BI->setCondition(Cond); diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/LoopRotation.cpp index 14c5655f0838..808c21b438b1 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/LoopRotation.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/LoopRotation.cpp @@ -15,6 +15,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -301,7 +302,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { CodeMetrics Metrics; Metrics.analyzeBasicBlock(OrigHeader, *TTI); if (Metrics.notDuplicatable) { - DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non duplicatable" + DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable" << " instructions: "; L->dump()); return false; } diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 08ac38dec5dd..1d43efcc70fa 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "loop-unroll" #include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -213,7 +214,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { notDuplicatable, TTI); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); if (notDuplicatable) { - DEBUG(dbgs() << " Not unrolling loop which contains non duplicatable" + DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable" << " instructions.\n"); return false; } diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/SampleProfile.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/SampleProfile.cpp index 9bcd702a9137..9856d7fbb28f 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/SampleProfile.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/SampleProfile.cpp @@ -54,6 +54,55 @@ static cl::opt SampleProfileFile( cl::desc("Profile file loaded by -sample-profile"), cl::Hidden); namespace { + +typedef DenseMap BodySampleMap; +typedef DenseMap BlockWeightMap; + +/// \brief Representation of the runtime profile for a function. +/// +/// This data structure contains the runtime profile for a given +/// function. It contains the total number of samples collected +/// in the function and a map of samples collected in every statement. +class SampleFunctionProfile { +public: + SampleFunctionProfile() : TotalSamples(0), TotalHeadSamples(0) {} + + bool emitAnnotations(Function &F); + uint32_t getInstWeight(Instruction &I, unsigned FirstLineno, + BodySampleMap &BodySamples); + uint32_t computeBlockWeight(BasicBlock *B, unsigned FirstLineno, + BodySampleMap &BodySamples); + void addTotalSamples(unsigned Num) { TotalSamples += Num; } + void addHeadSamples(unsigned Num) { TotalHeadSamples += Num; } + void addBodySamples(unsigned LineOffset, unsigned Num) { + BodySamples[LineOffset] += Num; + } + void print(raw_ostream &OS); + +protected: + /// \brief Total number of samples collected inside this function. + /// + /// Samples are cumulative, they include all the samples collected + /// inside this function and all its inlined callees. + unsigned TotalSamples; + + // \brief Total number of samples collected at the head of the function. + unsigned TotalHeadSamples; + + /// \brief Map line offsets to collected samples. + /// + /// Each entry in this map contains the number of samples + /// collected at the corresponding line offset. All line locations + /// are an offset from the start of the function. + BodySampleMap BodySamples; + + /// \brief Map basic blocks to their computed weights. + /// + /// The weight of a basic block is defined to be the maximum + /// of all the instruction weights in that block. + BlockWeightMap BlockWeights; +}; + /// \brief Sample-based profile reader. /// /// Each profile contains sample counts for all the functions @@ -77,61 +126,26 @@ namespace { /// 2. The samples collected at each line in F. To provide some /// protection against source code shuffling, line numbers should /// be relative to the start of the function. -class SampleProfile { +class SampleModuleProfile { public: - SampleProfile(StringRef F) : Profiles(0), Filename(F) {} + SampleModuleProfile(StringRef F) : Profiles(0), Filename(F) {} void dump(); void loadText(); void loadNative() { llvm_unreachable("not implemented"); } - bool emitAnnotations(Function &F); void printFunctionProfile(raw_ostream &OS, StringRef FName); void dumpFunctionProfile(StringRef FName); + SampleFunctionProfile &getProfile(const Function &F) { + return Profiles[F.getName()]; + } protected: - typedef DenseMap BodySampleMap; - typedef DenseMap BlockWeightMap; - - /// \brief Representation of the runtime profile for a function. - /// - /// This data structure contains the runtime profile for a given - /// function. It contains the total number of samples collected - /// in the function and a map of samples collected in every statement. - struct FunctionProfile { - /// \brief Total number of samples collected inside this function. - /// - /// Samples are cumulative, they include all the samples collected - /// inside this function and all its inlined callees. - unsigned TotalSamples; - - // \brief Total number of samples collected at the head of the function. - unsigned TotalHeadSamples; - - /// \brief Map line offsets to collected samples. - /// - /// Each entry in this map contains the number of samples - /// collected at the corresponding line offset. All line locations - /// are an offset from the start of the function. - BodySampleMap BodySamples; - - /// \brief Map basic blocks to their computed weights. - /// - /// The weight of a basic block is defined to be the maximum - /// of all the instruction weights in that block. - BlockWeightMap BlockWeights; - }; - - uint32_t getInstWeight(Instruction &I, unsigned FirstLineno, - BodySampleMap &BodySamples); - uint32_t computeBlockWeight(BasicBlock *B, unsigned FirstLineno, - BodySampleMap &BodySamples); - /// \brief Map every function to its associated profile. /// /// The profile of every function executed at runtime is collected - /// in the structure FunctionProfile. This maps function objects + /// in the structure SampleFunctionProfile. This maps function objects /// to their corresponding profiles. - StringMap Profiles; + StringMap Profiles; /// \brief Path name to the file holding the profile data. /// @@ -228,41 +242,48 @@ public: protected: /// \brief Profile reader object. - OwningPtr Profiler; + OwningPtr Profiler; /// \brief Name of the profile file to load. StringRef Filename; }; } -/// \brief Print the function profile for \p FName on stream \p OS. +/// \brief Print this function profile on stream \p OS. /// /// \param OS Stream to emit the output to. -/// \param FName Name of the function to print. -void SampleProfile::printFunctionProfile(raw_ostream &OS, StringRef FName) { - FunctionProfile FProfile = Profiles[FName]; - OS << "Function: " << FName << ", " << FProfile.TotalSamples << ", " - << FProfile.TotalHeadSamples << ", " << FProfile.BodySamples.size() +void SampleFunctionProfile::print(raw_ostream &OS) { + OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size() << " sampled lines\n"; - for (BodySampleMap::const_iterator SI = FProfile.BodySamples.begin(), - SE = FProfile.BodySamples.end(); + for (BodySampleMap::const_iterator SI = BodySamples.begin(), + SE = BodySamples.end(); SI != SE; ++SI) OS << "\tline offset: " << SI->first << ", number of samples: " << SI->second << "\n"; OS << "\n"; } +/// \brief Print the function profile for \p FName on stream \p OS. +/// +/// \param OS Stream to emit the output to. +/// \param FName Name of the function to print. +void SampleModuleProfile::printFunctionProfile(raw_ostream &OS, + StringRef FName) { + OS << "Function: " << FName << ":\n"; + Profiles[FName].print(OS); +} + /// \brief Dump the function profile for \p FName. /// /// \param FName Name of the function to print. -void SampleProfile::dumpFunctionProfile(StringRef FName) { +void SampleModuleProfile::dumpFunctionProfile(StringRef FName) { printFunctionProfile(dbgs(), FName); } /// \brief Dump all the function profiles found. -void SampleProfile::dump() { - for (StringMap::const_iterator I = Profiles.begin(), - E = Profiles.end(); +void SampleModuleProfile::dump() { + for (StringMap::const_iterator I = Profiles.begin(), + E = Profiles.end(); I != E; ++I) dumpFunctionProfile(I->getKey()); } @@ -297,7 +318,7 @@ void SampleProfile::dump() { /// for debugging purposes, but it should not be used to generate /// profiles for large programs, as the representation is extremely /// inefficient. -void SampleProfile::loadText() { +void SampleModuleProfile::loadText() { ExternalProfileTextLoader Loader(Filename); // Read the symbol table. @@ -308,13 +329,8 @@ void SampleProfile::loadText() { Line = Loader.readLine(); if (Line.getAsInteger(10, NumSymbols)) Loader.reportParseError("Expected a number, found " + Line); - for (int I = 0; I < NumSymbols; I++) { - StringRef FName = Loader.readLine(); - FunctionProfile &FProfile = Profiles[FName]; - FProfile.BodySamples.clear(); - FProfile.TotalSamples = 0; - FProfile.TotalHeadSamples = 0; - } + for (int I = 0; I < NumSymbols; I++) + Profiles[Loader.readLine()] = SampleFunctionProfile(); // Read the profile of each function. Since each function may be // mentioned more than once, and we are collecting flat profiles, @@ -333,10 +349,9 @@ void SampleProfile::loadText() { Matches[2].getAsInteger(10, NumSamples); Matches[3].getAsInteger(10, NumHeadSamples); Matches[4].getAsInteger(10, NumSampledLines); - FunctionProfile &FProfile = Profiles[FName]; - FProfile.TotalSamples += NumSamples; - FProfile.TotalHeadSamples += NumHeadSamples; - BodySampleMap &SampleMap = FProfile.BodySamples; + SampleFunctionProfile &FProfile = Profiles[FName]; + FProfile.addTotalSamples(NumSamples); + FProfile.addHeadSamples(NumHeadSamples); unsigned I; for (I = 0; I < NumSampledLines && !Loader.atEOF(); I++) { Line = Loader.readLine(); @@ -346,7 +361,7 @@ void SampleProfile::loadText() { unsigned LineOffset, NumSamples; Matches[1].getAsInteger(10, LineOffset); Matches[2].getAsInteger(10, NumSamples); - SampleMap[LineOffset] += NumSamples; + FProfile.addBodySamples(LineOffset, NumSamples); } if (I < NumSampledLines) @@ -354,6 +369,24 @@ void SampleProfile::loadText() { } } +char SampleProfileLoader::ID = 0; +INITIALIZE_PASS(SampleProfileLoader, "sample-profile", "Sample Profile loader", + false, false) + +bool SampleProfileLoader::doInitialization(Module &M) { + Profiler.reset(new SampleModuleProfile(Filename)); + Profiler->loadText(); + return true; +} + +FunctionPass *llvm::createSampleProfileLoaderPass() { + return new SampleProfileLoader(SampleProfileFile); +} + +FunctionPass *llvm::createSampleProfileLoaderPass(StringRef Name) { + return new SampleProfileLoader(Name); +} + /// \brief Get the weight for an instruction. /// /// The "weight" of an instruction \p Inst is the number of samples @@ -367,8 +400,9 @@ void SampleProfile::loadText() { /// \param BodySamples Map of relative source line locations to samples. /// /// \returns The profiled weight of I. -uint32_t SampleProfile::getInstWeight(Instruction &Inst, unsigned FirstLineno, - BodySampleMap &BodySamples) { +uint32_t SampleFunctionProfile::getInstWeight(Instruction &Inst, + unsigned FirstLineno, + BodySampleMap &BodySamples) { unsigned LOffset = Inst.getDebugLoc().getLine() - FirstLineno + 1; return BodySamples.lookup(LOffset); } @@ -385,13 +419,12 @@ uint32_t SampleProfile::getInstWeight(Instruction &Inst, unsigned FirstLineno, /// function. /// /// \returns The computed weight of B. -uint32_t SampleProfile::computeBlockWeight(BasicBlock *B, unsigned FirstLineno, - BodySampleMap &BodySamples) { +uint32_t SampleFunctionProfile::computeBlockWeight(BasicBlock *B, + unsigned FirstLineno, + BodySampleMap &BodySamples) { // If we've computed B's weight before, return it. - Function *F = B->getParent(); - FunctionProfile &FProfile = Profiles[F->getName()]; std::pair Entry = - FProfile.BlockWeights.insert(std::make_pair(B, 0)); + BlockWeights.insert(std::make_pair(B, 0)); if (!Entry.second) return Entry.first->second; @@ -420,14 +453,13 @@ uint32_t SampleProfile::computeBlockWeight(BasicBlock *B, unsigned FirstLineno, /// metadata on B using the computed values. /// /// \param F The function to query. -bool SampleProfile::emitAnnotations(Function &F) { +bool SampleFunctionProfile::emitAnnotations(Function &F) { bool Changed = false; - FunctionProfile &FProfile = Profiles[F.getName()]; unsigned FirstLineno = inst_begin(F)->getDebugLoc().getLine(); MDBuilder MDB(F.getContext()); // Clear the block weights cache. - FProfile.BlockWeights.clear(); + BlockWeights.clear(); // When we find a branch instruction: For each edge E out of the branch, // the weight of E is the weight of the target block. @@ -443,8 +475,7 @@ bool SampleProfile::emitAnnotations(Function &F) { unsigned NSuccs = TI->getNumSuccessors(); for (unsigned I = 0; I < NSuccs; ++I) { BasicBlock *Succ = TI->getSuccessor(I); - uint32_t Weight = - computeBlockWeight(Succ, FirstLineno, FProfile.BodySamples); + uint32_t Weight = computeBlockWeight(Succ, FirstLineno, BodySamples); Weights.push_back(Weight); } @@ -456,24 +487,6 @@ bool SampleProfile::emitAnnotations(Function &F) { return Changed; } -char SampleProfileLoader::ID = 0; -INITIALIZE_PASS(SampleProfileLoader, "sample-profile", "Sample Profile loader", - false, false) - bool SampleProfileLoader::runOnFunction(Function &F) { - return Profiler->emitAnnotations(F); -} - -bool SampleProfileLoader::doInitialization(Module &M) { - Profiler.reset(new SampleProfile(Filename)); - Profiler->loadText(); - return true; -} - -FunctionPass *llvm::createSampleProfileLoaderPass() { - return new SampleProfileLoader(SampleProfileFile); -} - -FunctionPass *llvm::createSampleProfileLoaderPass(StringRef Name) { - return new SampleProfileLoader(Name); + return Profiler->getProfile(F).emitAnnotations(F); } diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/Scalar.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/Scalar.cpp index 857597e47462..c616179ef3f4 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -34,6 +34,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeCorrelatedValuePropagationPass(Registry); initializeDCEPass(Registry); initializeDeadInstEliminationPass(Registry); + initializeScalarizerPass(Registry); initializeDSEPass(Registry); initializeGVNPass(Registry); initializeEarlyCSEPass(Registry); @@ -81,6 +82,10 @@ void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createDeadStoreEliminationPass()); } +void LLVMAddScalarizerPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createScalarizerPass()); +} + void LLVMAddGVNPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createGVNPass()); } diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 57b290e14b13..394274d86d33 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -1731,7 +1731,7 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, // Compute the offset due to this GEP and check if the alloca has a // component element at that offset. SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); - // If this GEP is non constant then the last operand must have been a + // If this GEP is non-constant then the last operand must have been a // dynamic index into a vector. Pop this now as it has no impact on the // constant part of the offset. if (NonConstant) diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/Scalarizer.cpp new file mode 100644 index 000000000000..e614ecee53a0 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -0,0 +1,661 @@ +//===--- Scalarizer.cpp - Scalarize vector operations ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass converts vector operations into scalar operations, in order +// to expose optimization opportunities on the individual scalar operations. +// It is mainly intended for targets that do not have vector units, but it +// may also be useful for revectorizing code to different vector widths. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "scalarizer" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/InstVisitor.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +namespace { +// Used to store the scattered form of a vector. +typedef SmallVector ValueVector; + +// Used to map a vector Value to its scattered form. We use std::map +// because we want iterators to persist across insertion and because the +// values are relatively large. +typedef std::map ScatterMap; + +// Lists Instructions that have been replaced with scalar implementations, +// along with a pointer to their scattered forms. +typedef SmallVector, 16> GatherList; + +// Provides a very limited vector-like interface for lazily accessing one +// component of a scattered vector or vector pointer. +class Scatterer { +public: + Scatterer() {} + + // Scatter V into Size components. If new instructions are needed, + // insert them before BBI in BB. If Cache is nonnull, use it to cache + // the results. + Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, + ValueVector *cachePtr = 0); + + // Return component I, creating a new Value for it if necessary. + Value *operator[](unsigned I); + + // Return the number of components. + unsigned size() const { return Size; } + +private: + BasicBlock *BB; + BasicBlock::iterator BBI; + Value *V; + ValueVector *CachePtr; + PointerType *PtrTy; + ValueVector Tmp; + unsigned Size; +}; + +// FCmpSpliiter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp +// called Name that compares X and Y in the same way as FCI. +struct FCmpSplitter { + FCmpSplitter(FCmpInst &fci) : FCI(fci) {} + Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, + const Twine &Name) const { + return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name); + } + FCmpInst &FCI; +}; + +// ICmpSpliiter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp +// called Name that compares X and Y in the same way as ICI. +struct ICmpSplitter { + ICmpSplitter(ICmpInst &ici) : ICI(ici) {} + Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, + const Twine &Name) const { + return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name); + } + ICmpInst &ICI; +}; + +// BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create +// a binary operator like BO called Name with operands X and Y. +struct BinarySplitter { + BinarySplitter(BinaryOperator &bo) : BO(bo) {} + Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, + const Twine &Name) const { + return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name); + } + BinaryOperator &BO; +}; + +// Information about a load or store that we're scalarizing. +struct VectorLayout { + VectorLayout() : VecTy(0), ElemTy(0), VecAlign(0), ElemSize(0) {} + + // Return the alignment of element I. + uint64_t getElemAlign(unsigned I) { + return MinAlign(VecAlign, I * ElemSize); + } + + // The type of the vector. + VectorType *VecTy; + + // The type of each element. + Type *ElemTy; + + // The alignment of the vector. + uint64_t VecAlign; + + // The size of each element. + uint64_t ElemSize; +}; + +class Scalarizer : public FunctionPass, + public InstVisitor { +public: + static char ID; + + Scalarizer() : + FunctionPass(ID) { + initializeScalarizerPass(*PassRegistry::getPassRegistry()); + } + + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + + // InstVisitor methods. They return true if the instruction was scalarized, + // false if nothing changed. + bool visitInstruction(Instruction &) { return false; } + bool visitSelectInst(SelectInst &SI); + bool visitICmpInst(ICmpInst &); + bool visitFCmpInst(FCmpInst &); + bool visitBinaryOperator(BinaryOperator &); + bool visitGetElementPtrInst(GetElementPtrInst &); + bool visitCastInst(CastInst &); + bool visitBitCastInst(BitCastInst &); + bool visitShuffleVectorInst(ShuffleVectorInst &); + bool visitPHINode(PHINode &); + bool visitLoadInst(LoadInst &); + bool visitStoreInst(StoreInst &); + +private: + Scatterer scatter(Instruction *, Value *); + void gather(Instruction *, const ValueVector &); + bool canTransferMetadata(unsigned Kind); + void transferMetadata(Instruction *, const ValueVector &); + bool getVectorLayout(Type *, unsigned, VectorLayout &); + bool finish(); + + template bool splitBinary(Instruction &, const T &); + + ScatterMap Scattered; + GatherList Gathered; + unsigned ParallelLoopAccessMDKind; + const DataLayout *TDL; +}; + +char Scalarizer::ID = 0; +} // end anonymous namespace + +// This is disabled by default because having separate loads and stores makes +// it more likely that the -combiner-alias-analysis limits will be reached. +static cl::opt ScalarizeLoadStore + ("scalarize-load-store", cl::Hidden, cl::init(false), + cl::desc("Allow the scalarizer pass to scalarize loads and store")); + +INITIALIZE_PASS(Scalarizer, "scalarizer", "Scalarize vector operations", + false, false) + +Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, + ValueVector *cachePtr) + : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) { + Type *Ty = V->getType(); + PtrTy = dyn_cast(Ty); + if (PtrTy) + Ty = PtrTy->getElementType(); + Size = Ty->getVectorNumElements(); + if (!CachePtr) + Tmp.resize(Size, 0); + else if (CachePtr->empty()) + CachePtr->resize(Size, 0); + else + assert(Size == CachePtr->size() && "Inconsistent vector sizes"); +} + +// Return component I, creating a new Value for it if necessary. +Value *Scatterer::operator[](unsigned I) { + ValueVector &CV = (CachePtr ? *CachePtr : Tmp); + // Try to reuse a previous value. + if (CV[I]) + return CV[I]; + IRBuilder<> Builder(BB, BBI); + if (PtrTy) { + if (!CV[0]) { + Type *Ty = + PointerType::get(PtrTy->getElementType()->getVectorElementType(), + PtrTy->getAddressSpace()); + CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0"); + } + if (I != 0) + CV[I] = Builder.CreateConstGEP1_32(CV[0], I, + V->getName() + ".i" + Twine(I)); + } else { + // Search through a chain of InsertElementInsts looking for element I. + // Record other elements in the cache. The new V is still suitable + // for all uncached indices. + for (;;) { + InsertElementInst *Insert = dyn_cast(V); + if (!Insert) + break; + ConstantInt *Idx = dyn_cast(Insert->getOperand(2)); + if (!Idx) + break; + unsigned J = Idx->getZExtValue(); + CV[J] = Insert->getOperand(1); + V = Insert->getOperand(0); + if (I == J) + return CV[J]; + } + CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I), + V->getName() + ".i" + Twine(I)); + } + return CV[I]; +} + +bool Scalarizer::doInitialization(Module &M) { + ParallelLoopAccessMDKind = + M.getContext().getMDKindID("llvm.mem.parallel_loop_access"); + return false; +} + +bool Scalarizer::runOnFunction(Function &F) { + TDL = getAnalysisIfAvailable(); + for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { + BasicBlock *BB = BBI; + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { + Instruction *I = II; + bool Done = visit(I); + ++II; + if (Done && I->getType()->isVoidTy()) + I->eraseFromParent(); + } + } + return finish(); +} + +// Return a scattered form of V that can be accessed by Point. V must be a +// vector or a pointer to a vector. +Scatterer Scalarizer::scatter(Instruction *Point, Value *V) { + if (Argument *VArg = dyn_cast(V)) { + // Put the scattered form of arguments in the entry block, + // so that it can be used everywhere. + Function *F = VArg->getParent(); + BasicBlock *BB = &F->getEntryBlock(); + return Scatterer(BB, BB->begin(), V, &Scattered[V]); + } + if (Instruction *VOp = dyn_cast(V)) { + // Put the scattered form of an instruction directly after the + // instruction. + BasicBlock *BB = VOp->getParent(); + return Scatterer(BB, llvm::next(BasicBlock::iterator(VOp)), + V, &Scattered[V]); + } + // In the fallback case, just put the scattered before Point and + // keep the result local to Point. + return Scatterer(Point->getParent(), Point, V); +} + +// Replace Op with the gathered form of the components in CV. Defer the +// deletion of Op and creation of the gathered form to the end of the pass, +// so that we can avoid creating the gathered form if all uses of Op are +// replaced with uses of CV. +void Scalarizer::gather(Instruction *Op, const ValueVector &CV) { + // Since we're not deleting Op yet, stub out its operands, so that it + // doesn't make anything live unnecessarily. + for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) + Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType())); + + transferMetadata(Op, CV); + + // If we already have a scattered form of Op (created from ExtractElements + // of Op itself), replace them with the new form. + ValueVector &SV = Scattered[Op]; + if (!SV.empty()) { + for (unsigned I = 0, E = SV.size(); I != E; ++I) { + Instruction *Old = cast(SV[I]); + CV[I]->takeName(Old); + Old->replaceAllUsesWith(CV[I]); + Old->eraseFromParent(); + } + } + SV = CV; + Gathered.push_back(GatherList::value_type(Op, &SV)); +} + +// Return true if it is safe to transfer the given metadata tag from +// vector to scalar instructions. +bool Scalarizer::canTransferMetadata(unsigned Tag) { + return (Tag == LLVMContext::MD_tbaa + || Tag == LLVMContext::MD_fpmath + || Tag == LLVMContext::MD_tbaa_struct + || Tag == LLVMContext::MD_invariant_load + || Tag == ParallelLoopAccessMDKind); +} + +// Transfer metadata from Op to the instructions in CV if it is known +// to be safe to do so. +void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) { + SmallVector, 4> MDs; + Op->getAllMetadataOtherThanDebugLoc(MDs); + for (unsigned I = 0, E = CV.size(); I != E; ++I) { + if (Instruction *New = dyn_cast(CV[I])) { + for (SmallVectorImpl >::iterator + MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) + if (canTransferMetadata(MI->first)) + New->setMetadata(MI->first, MI->second); + New->setDebugLoc(Op->getDebugLoc()); + } + } +} + +// Try to fill in Layout from Ty, returning true on success. Alignment is +// the alignment of the vector, or 0 if the ABI default should be used. +bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment, + VectorLayout &Layout) { + if (!TDL) + return false; + + // Make sure we're dealing with a vector. + Layout.VecTy = dyn_cast(Ty); + if (!Layout.VecTy) + return false; + + // Check that we're dealing with full-byte elements. + Layout.ElemTy = Layout.VecTy->getElementType(); + if (TDL->getTypeSizeInBits(Layout.ElemTy) != + TDL->getTypeStoreSizeInBits(Layout.ElemTy)) + return false; + + if (Alignment) + Layout.VecAlign = Alignment; + else + Layout.VecAlign = TDL->getABITypeAlignment(Layout.VecTy); + Layout.ElemSize = TDL->getTypeStoreSize(Layout.ElemTy); + return true; +} + +// Scalarize two-operand instruction I, using Split(Builder, X, Y, Name) +// to create an instruction like I with operands X and Y and name Name. +template +bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) { + VectorType *VT = dyn_cast(I.getType()); + if (!VT) + return false; + + unsigned NumElems = VT->getNumElements(); + IRBuilder<> Builder(I.getParent(), &I); + Scatterer Op0 = scatter(&I, I.getOperand(0)); + Scatterer Op1 = scatter(&I, I.getOperand(1)); + assert(Op0.size() == NumElems && "Mismatched binary operation"); + assert(Op1.size() == NumElems && "Mismatched binary operation"); + ValueVector Res; + Res.resize(NumElems); + for (unsigned Elem = 0; Elem < NumElems; ++Elem) + Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem], + I.getName() + ".i" + Twine(Elem)); + gather(&I, Res); + return true; +} + +bool Scalarizer::visitSelectInst(SelectInst &SI) { + VectorType *VT = dyn_cast(SI.getType()); + if (!VT) + return false; + + unsigned NumElems = VT->getNumElements(); + IRBuilder<> Builder(SI.getParent(), &SI); + Scatterer Op1 = scatter(&SI, SI.getOperand(1)); + Scatterer Op2 = scatter(&SI, SI.getOperand(2)); + assert(Op1.size() == NumElems && "Mismatched select"); + assert(Op2.size() == NumElems && "Mismatched select"); + ValueVector Res; + Res.resize(NumElems); + + if (SI.getOperand(0)->getType()->isVectorTy()) { + Scatterer Op0 = scatter(&SI, SI.getOperand(0)); + assert(Op0.size() == NumElems && "Mismatched select"); + for (unsigned I = 0; I < NumElems; ++I) + Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I], + SI.getName() + ".i" + Twine(I)); + } else { + Value *Op0 = SI.getOperand(0); + for (unsigned I = 0; I < NumElems; ++I) + Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I], + SI.getName() + ".i" + Twine(I)); + } + gather(&SI, Res); + return true; +} + +bool Scalarizer::visitICmpInst(ICmpInst &ICI) { + return splitBinary(ICI, ICmpSplitter(ICI)); +} + +bool Scalarizer::visitFCmpInst(FCmpInst &FCI) { + return splitBinary(FCI, FCmpSplitter(FCI)); +} + +bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) { + return splitBinary(BO, BinarySplitter(BO)); +} + +bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) { + VectorType *VT = dyn_cast(GEPI.getType()); + if (!VT) + return false; + + IRBuilder<> Builder(GEPI.getParent(), &GEPI); + unsigned NumElems = VT->getNumElements(); + unsigned NumIndices = GEPI.getNumIndices(); + + Scatterer Base = scatter(&GEPI, GEPI.getOperand(0)); + + SmallVector Ops; + Ops.resize(NumIndices); + for (unsigned I = 0; I < NumIndices; ++I) + Ops[I] = scatter(&GEPI, GEPI.getOperand(I + 1)); + + ValueVector Res; + Res.resize(NumElems); + for (unsigned I = 0; I < NumElems; ++I) { + SmallVector Indices; + Indices.resize(NumIndices); + for (unsigned J = 0; J < NumIndices; ++J) + Indices[J] = Ops[J][I]; + Res[I] = Builder.CreateGEP(Base[I], Indices, + GEPI.getName() + ".i" + Twine(I)); + if (GEPI.isInBounds()) + if (GetElementPtrInst *NewGEPI = dyn_cast(Res[I])) + NewGEPI->setIsInBounds(); + } + gather(&GEPI, Res); + return true; +} + +bool Scalarizer::visitCastInst(CastInst &CI) { + VectorType *VT = dyn_cast(CI.getDestTy()); + if (!VT) + return false; + + unsigned NumElems = VT->getNumElements(); + IRBuilder<> Builder(CI.getParent(), &CI); + Scatterer Op0 = scatter(&CI, CI.getOperand(0)); + assert(Op0.size() == NumElems && "Mismatched cast"); + ValueVector Res; + Res.resize(NumElems); + for (unsigned I = 0; I < NumElems; ++I) + Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(), + CI.getName() + ".i" + Twine(I)); + gather(&CI, Res); + return true; +} + +bool Scalarizer::visitBitCastInst(BitCastInst &BCI) { + VectorType *DstVT = dyn_cast(BCI.getDestTy()); + VectorType *SrcVT = dyn_cast(BCI.getSrcTy()); + if (!DstVT || !SrcVT) + return false; + + unsigned DstNumElems = DstVT->getNumElements(); + unsigned SrcNumElems = SrcVT->getNumElements(); + IRBuilder<> Builder(BCI.getParent(), &BCI); + Scatterer Op0 = scatter(&BCI, BCI.getOperand(0)); + ValueVector Res; + Res.resize(DstNumElems); + + if (DstNumElems == SrcNumElems) { + for (unsigned I = 0; I < DstNumElems; ++I) + Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(), + BCI.getName() + ".i" + Twine(I)); + } else if (DstNumElems > SrcNumElems) { + // -> . Convert each t1 to and copy the + // individual elements to the destination. + unsigned FanOut = DstNumElems / SrcNumElems; + Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut); + unsigned ResI = 0; + for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) { + Value *V = Op0[Op0I]; + Instruction *VI; + // Look through any existing bitcasts before converting to . + // In the best case, the resulting conversion might be a no-op. + while ((VI = dyn_cast(V)) && + VI->getOpcode() == Instruction::BitCast) + V = VI->getOperand(0); + V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast"); + Scatterer Mid = scatter(&BCI, V); + for (unsigned MidI = 0; MidI < FanOut; ++MidI) + Res[ResI++] = Mid[MidI]; + } + } else { + // -> . Convert each group of into a t2. + unsigned FanIn = SrcNumElems / DstNumElems; + Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn); + unsigned Op0I = 0; + for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) { + Value *V = UndefValue::get(MidTy); + for (unsigned MidI = 0; MidI < FanIn; ++MidI) + V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI), + BCI.getName() + ".i" + Twine(ResI) + + ".upto" + Twine(MidI)); + Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(), + BCI.getName() + ".i" + Twine(ResI)); + } + } + gather(&BCI, Res); + return true; +} + +bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) { + VectorType *VT = dyn_cast(SVI.getType()); + if (!VT) + return false; + + unsigned NumElems = VT->getNumElements(); + Scatterer Op0 = scatter(&SVI, SVI.getOperand(0)); + Scatterer Op1 = scatter(&SVI, SVI.getOperand(1)); + ValueVector Res; + Res.resize(NumElems); + + for (unsigned I = 0; I < NumElems; ++I) { + int Selector = SVI.getMaskValue(I); + if (Selector < 0) + Res[I] = UndefValue::get(VT->getElementType()); + else if (unsigned(Selector) < Op0.size()) + Res[I] = Op0[Selector]; + else + Res[I] = Op1[Selector - Op0.size()]; + } + gather(&SVI, Res); + return true; +} + +bool Scalarizer::visitPHINode(PHINode &PHI) { + VectorType *VT = dyn_cast(PHI.getType()); + if (!VT) + return false; + + unsigned NumElems = VT->getNumElements(); + IRBuilder<> Builder(PHI.getParent(), &PHI); + ValueVector Res; + Res.resize(NumElems); + + unsigned NumOps = PHI.getNumOperands(); + for (unsigned I = 0; I < NumElems; ++I) + Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps, + PHI.getName() + ".i" + Twine(I)); + + for (unsigned I = 0; I < NumOps; ++I) { + Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I)); + BasicBlock *IncomingBlock = PHI.getIncomingBlock(I); + for (unsigned J = 0; J < NumElems; ++J) + cast(Res[J])->addIncoming(Op[J], IncomingBlock); + } + gather(&PHI, Res); + return true; +} + +bool Scalarizer::visitLoadInst(LoadInst &LI) { + if (!ScalarizeLoadStore) + return false; + if (!LI.isSimple()) + return false; + + VectorLayout Layout; + if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout)) + return false; + + unsigned NumElems = Layout.VecTy->getNumElements(); + IRBuilder<> Builder(LI.getParent(), &LI); + Scatterer Ptr = scatter(&LI, LI.getPointerOperand()); + ValueVector Res; + Res.resize(NumElems); + + for (unsigned I = 0; I < NumElems; ++I) + Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I), + LI.getName() + ".i" + Twine(I)); + gather(&LI, Res); + return true; +} + +bool Scalarizer::visitStoreInst(StoreInst &SI) { + if (!ScalarizeLoadStore) + return false; + if (!SI.isSimple()) + return false; + + VectorLayout Layout; + Value *FullValue = SI.getValueOperand(); + if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout)) + return false; + + unsigned NumElems = Layout.VecTy->getNumElements(); + IRBuilder<> Builder(SI.getParent(), &SI); + Scatterer Ptr = scatter(&SI, SI.getPointerOperand()); + Scatterer Val = scatter(&SI, FullValue); + + ValueVector Stores; + Stores.resize(NumElems); + for (unsigned I = 0; I < NumElems; ++I) { + unsigned Align = Layout.getElemAlign(I); + Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align); + } + transferMetadata(&SI, Stores); + return true; +} + +// Delete the instructions that we scalarized. If a full vector result +// is still needed, recreate it using InsertElements. +bool Scalarizer::finish() { + if (Gathered.empty()) + return false; + for (GatherList::iterator GMI = Gathered.begin(), GME = Gathered.end(); + GMI != GME; ++GMI) { + Instruction *Op = GMI->first; + ValueVector &CV = *GMI->second; + if (!Op->use_empty()) { + // The value is still needed, so recreate it using a series of + // InsertElements. + Type *Ty = Op->getType(); + Value *Res = UndefValue::get(Ty); + BasicBlock *BB = Op->getParent(); + unsigned Count = Ty->getVectorNumElements(); + IRBuilder<> Builder(BB, Op); + if (isa(Op)) + Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); + for (unsigned I = 0; I < Count; ++I) + Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I), + Op->getName() + ".upto" + Twine(I)); + Res->takeName(Op); + Op->replaceAllUsesWith(Res); + } + Op->eraseFromParent(); + } + Gathered.clear(); + Scattered.clear(); + return true; +} + +FunctionPass *llvm::createScalarizerPass() { + return new Scalarizer(); +} diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp new file mode 100644 index 000000000000..cce016aafdda --- /dev/null +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp @@ -0,0 +1,114 @@ +//===-- ASanStackFrameLayout.cpp - helper for AddressSanitizer ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Definition of ComputeASanStackFrameLayout (see ASanStackFrameLayout.h). +// +//===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/ASanStackFrameLayout.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace llvm { + +// We sort the stack variables by alignment (largest first) to minimize +// unnecessary large gaps due to alignment. +// It is tempting to also sort variables by size so that larger variables +// have larger redzones at both ends. But reordering will make report analysis +// harder, especially when temporary unnamed variables are present. +// So, until we can provide more information (type, line number, etc) +// for the stack variables we avoid reordering them too much. +static inline bool CompareVars(const ASanStackVariableDescription &a, + const ASanStackVariableDescription &b) { + return a.Alignment > b.Alignment; +} + +// We also force minimal alignment for all vars to kMinAlignment so that vars +// with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars. +static const size_t kMinAlignment = 16; + +static size_t RoundUpTo(size_t X, size_t RoundTo) { + assert((RoundTo & (RoundTo - 1)) == 0); + return (X + RoundTo - 1) & ~(RoundTo - 1); +} + +// The larger the variable Size the larger is the redzone. +// The resulting frame size is a multiple of Alignment. +static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) { + size_t Res = 0; + if (Size <= 4) Res = 16; + else if (Size <= 16) Res = 32; + else if (Size <= 128) Res = Size + 32; + else if (Size <= 512) Res = Size + 64; + else if (Size <= 4096) Res = Size + 128; + else Res = Size + 256; + return RoundUpTo(Res, Alignment); +} + +void +ComputeASanStackFrameLayout(SmallVectorImpl &Vars, + size_t Granularity, size_t MinHeaderSize, + ASanStackFrameLayout *Layout) { + assert(Granularity >= 8 && Granularity <= 64 && + (Granularity & (Granularity - 1)) == 0); + assert(MinHeaderSize >= 16 && (MinHeaderSize & (MinHeaderSize - 1)) == 0 && + MinHeaderSize >= Granularity); + size_t NumVars = Vars.size(); + assert(NumVars > 0); + for (size_t i = 0; i < NumVars; i++) + Vars[i].Alignment = std::max(Vars[i].Alignment, kMinAlignment); + + std::stable_sort(Vars.begin(), Vars.end(), CompareVars); + SmallString<2048> StackDescriptionStorage; + raw_svector_ostream StackDescription(StackDescriptionStorage); + StackDescription << NumVars; + Layout->FrameAlignment = std::max(Granularity, Vars[0].Alignment); + SmallVector &SB(Layout->ShadowBytes); + SB.clear(); + size_t Offset = std::max(std::max(MinHeaderSize, Granularity), + Vars[0].Alignment); + assert((Offset % Granularity) == 0); + SB.insert(SB.end(), Offset / Granularity, kAsanStackLeftRedzoneMagic); + for (size_t i = 0; i < NumVars; i++) { + bool IsLast = i == NumVars - 1; + size_t Alignment = std::max(Granularity, Vars[i].Alignment); + (void)Alignment; // Used only in asserts. + size_t Size = Vars[i].Size; + const char *Name = Vars[i].Name; + assert((Alignment & (Alignment - 1)) == 0); + assert(Layout->FrameAlignment >= Alignment); + assert((Offset % Alignment) == 0); + assert(Size > 0); + StackDescription << " " << Offset << " " << Size << " " << strlen(Name) + << " " << Name; + size_t NextAlignment = IsLast ? Granularity + : std::max(Granularity, Vars[i + 1].Alignment); + size_t SizeWithRedzone = VarAndRedzoneSize(Vars[i].Size, NextAlignment); + SB.insert(SB.end(), Size / Granularity, 0); + if (Size % Granularity) + SB.insert(SB.end(), Size % Granularity); + SB.insert(SB.end(), (SizeWithRedzone - Size) / Granularity, + IsLast ? kAsanStackRightRedzoneMagic + : kAsanStackMidRedzoneMagic); + Vars[i].Offset = Offset; + Offset += SizeWithRedzone; + } + if (Offset % MinHeaderSize) { + size_t ExtraRedzone = MinHeaderSize - (Offset % MinHeaderSize); + SB.insert(SB.end(), ExtraRedzone / Granularity, + kAsanStackRightRedzoneMagic); + Offset += ExtraRedzone; + } + Layout->DescriptionString = StackDescription.str(); + Layout->FrameSize = Offset; + assert((Layout->FrameSize % MinHeaderSize) == 0); + assert(Layout->FrameSize / Granularity == Layout->ShadowBytes.size()); +} + +} // llvm namespace diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 12de9eed4b85..214a3aa538e5 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -630,28 +630,29 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, } /// SplitBlockAndInsertIfThen - Split the containing block at the -/// specified instruction - everything before and including Cmp stays -/// in the old basic block, and everything after Cmp is moved to a +/// specified instruction - everything before and including SplitBefore stays +/// in the old basic block, and everything after SplitBefore is moved to a /// new block. The two blocks are connected by a conditional branch /// (with value of Cmp being the condition). /// Before: /// Head -/// Cmp +/// SplitBefore /// Tail /// After: /// Head -/// Cmp -/// if (Cmp) +/// if (Cond) /// ThenBlock +/// SplitBefore /// Tail /// /// If Unreachable is true, then ThenBlock ends with /// UnreachableInst, otherwise it branches to Tail. /// Returns the NewBasicBlock's terminator. -TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp, - bool Unreachable, MDNode *BranchWeights) { - Instruction *SplitBefore = Cmp->getNextNode(); +TerminatorInst *llvm::SplitBlockAndInsertIfThen(Value *Cond, + Instruction *SplitBefore, + bool Unreachable, + MDNode *BranchWeights) { BasicBlock *Head = SplitBefore->getParent(); BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); TerminatorInst *HeadOldTerm = Head->getTerminator(); @@ -663,12 +664,45 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp, else CheckTerm = BranchInst::Create(Tail, ThenBlock); BranchInst *HeadNewTerm = - BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp); + BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cond); HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights); ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); return CheckTerm; } +/// SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, +/// but also creates the ElseBlock. +/// Before: +/// Head +/// SplitBefore +/// Tail +/// After: +/// Head +/// if (Cond) +/// ThenBlock +/// else +/// ElseBlock +/// SplitBefore +/// Tail +void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, + TerminatorInst **ThenTerm, + TerminatorInst **ElseTerm, + MDNode *BranchWeights) { + BasicBlock *Head = SplitBefore->getParent(); + BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); + TerminatorInst *HeadOldTerm = Head->getTerminator(); + LLVMContext &C = Head->getContext(); + BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + BasicBlock *ElseBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + *ThenTerm = BranchInst::Create(Tail, ThenBlock); + *ElseTerm = BranchInst::Create(Tail, ElseBlock); + BranchInst *HeadNewTerm = + BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/ElseBlock, Cond); + HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights); + ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); +} + + /// GetIfCondition - Given a basic block (BB) with two predecessors, /// check to see if the merge at this block is due /// to an "if condition". If so, return the boolean condition that determines diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 6d13217df55d..82384a1edf52 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -286,6 +286,21 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, return CI; } +/// Append a suffix to the function name according to the type of 'Op'. +static void AppendTypeSuffix(Value *Op, StringRef &Name, SmallString<20> &NameBuffer) { + if (!Op->getType()->isDoubleTy()) { + NameBuffer += Name; + + if (Op->getType()->isFloatTy()) + NameBuffer += 'f'; + else + NameBuffer += 'l'; + + Name = NameBuffer; + } + return; +} + /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g. /// 'floor'). This function is known to take a single of type matching 'Op' and /// returns one value with the same type. If 'Op' is a long double, 'l' is @@ -293,15 +308,7 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, const AttributeSet &Attrs) { SmallString<20> NameBuffer; - if (!Op->getType()->isDoubleTy()) { - // If we need to add a suffix, copy into NameBuffer. - NameBuffer += Name; - if (Op->getType()->isFloatTy()) - NameBuffer += 'f'; // floorf - else - NameBuffer += 'l'; // floorl - Name = NameBuffer; - } + AppendTypeSuffix(Op, Name, NameBuffer); Module *M = B.GetInsertBlock()->getParent()->getParent(); Value *Callee = M->getOrInsertFunction(Name, Op->getType(), @@ -314,6 +321,27 @@ Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, return CI; } +/// EmitBinaryFloatFnCall - Emit a call to the binary function named 'Name' +/// (e.g. 'fmin'). This function is known to take type matching 'Op1' and 'Op2' +/// and return one value with the same type. If 'Op1/Op2' are long double, 'l' +/// is added as the suffix of name, if 'Op1/Op2' is a float, we add a 'f' +/// suffix. +Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, + IRBuilder<> &B, const AttributeSet &Attrs) { + SmallString<20> NameBuffer; + AppendTypeSuffix(Op1, Name, NameBuffer); + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Value *Callee = M->getOrInsertFunction(Name, Op1->getType(), + Op1->getType(), Op2->getType(), NULL); + CallInst *CI = B.CreateCall2(Callee, Op1, Op2, Name); + CI->setAttributes(Attrs); + if (const Function *F = dyn_cast(Callee->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + /// EmitPutChar - Emit a call to the putchar function. This assumes that Char /// is an integer. Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD, diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/CMakeLists.txt b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/CMakeLists.txt index 5afd6b8369a7..2858540c8c35 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMTransformUtils + ASanStackFrameLayout.cpp BasicBlockUtils.cpp BreakCriticalEdges.cpp BuildLibCalls.cpp diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/InlineFunction.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/InlineFunction.cpp index d021bcef4027..e35a1d0b006d 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -144,7 +144,6 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() { void InvokeInliningInfo::forwardResume(ResumeInst *RI, SmallPtrSet &InlinedLPads) { BasicBlock *Dest = getInnerResumeDest(); - LandingPadInst *OuterLPad = getLandingPadInst(); BasicBlock *Src = RI->getParent(); BranchInst::Create(Dest, Src); @@ -155,16 +154,6 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI, InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); RI->eraseFromParent(); - - // Append the clauses from the outer landing pad instruction into the inlined - // landing pad instructions. - for (SmallPtrSet::iterator I = InlinedLPads.begin(), - E = InlinedLPads.end(); I != E; ++I) { - LandingPadInst *InlinedLPad = *I; - for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses(); - OuterIdx != OuterNum; ++OuterIdx) - InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); - } } /// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into @@ -172,22 +161,11 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI, /// invokes. This function analyze BB to see if there are any calls, and if so, /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI /// nodes in that block with the values specified in InvokeDestPHIValues. -/// -/// Returns true to indicate that the next block should be skipped. -static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, +static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, InvokeInliningInfo &Invoke) { - LandingPadInst *LPI = Invoke.getLandingPadInst(); - for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { Instruction *I = BBI++; - if (LandingPadInst *L = dyn_cast(I)) { - unsigned NumClauses = LPI->getNumClauses(); - L->reserveClauses(NumClauses); - for (unsigned i = 0; i != NumClauses; ++i) - L->addClause(LPI->getClause(i)); - } - // We only need to check for function calls: inlined invoke // instructions require no special handling. CallInst *CI = dyn_cast(I); @@ -223,10 +201,8 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, // Update any PHI nodes in the exceptional block to indicate that there is // now a new entry in them. Invoke.addIncomingPHIValuesFor(BB); - return false; + return; } - - return false; } /// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls @@ -252,13 +228,23 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, if (InvokeInst *II = dyn_cast(I->getTerminator())) InlinedLPads.insert(II->getLandingPadInst()); + // Append the clauses from the outer landing pad instruction into the inlined + // landing pad instructions. + LandingPadInst *OuterLPad = Invoke.getLandingPadInst(); + for (SmallPtrSet::iterator I = InlinedLPads.begin(), + E = InlinedLPads.end(); I != E; ++I) { + LandingPadInst *InlinedLPad = *I; + unsigned OuterNum = OuterLPad->getNumClauses(); + InlinedLPad->reserveClauses(OuterNum); + for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx) + InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); + if (OuterLPad->isCleanup()) + InlinedLPad->setCleanup(true); + } + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){ if (InlinedCodeInfo.ContainsCalls) - if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) { - // Honor a request to skip the next block. - ++BB; - continue; - } + HandleCallsInBlockInlinedThroughInvoke(BB, Invoke); // Forward any resumes that are remaining here. if (ResumeInst *RI = dyn_cast(BB->getTerminator())) diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/IntegerDivision.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/IntegerDivision.cpp index 3cb8ded8506a..e73a5433cca7 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/IntegerDivision.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/IntegerDivision.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// // -// This file contains an implementation of 32bit scalar integer division for -// targets that don't have native support. It's largely derived from -// compiler-rt's implementation of __udivsi3, but hand-tuned to reduce the -// amount of control flow +// This file contains an implementation of 32bit and 64bit scalar integer +// division for targets that don't have native support. It's largely derived +// from compiler-rt's implementations of __udivsi3 and __udivmoddi4, +// but hand-tuned for targets that prefer less control flow. // //===----------------------------------------------------------------------===// @@ -20,6 +20,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include using namespace llvm; @@ -31,7 +32,18 @@ using namespace llvm; /// be expanded if the user wishes static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor, IRBuilder<> &Builder) { - ConstantInt *ThirtyOne = Builder.getInt32(31); + unsigned BitWidth = Dividend->getType()->getIntegerBitWidth(); + ConstantInt *Shift; + + if (BitWidth == 64) { + Shift = Builder.getInt64(63); + } else { + assert(BitWidth == 32 && "Unexpected bit width"); + Shift = Builder.getInt32(31); + } + + // Following instructions are generated for both i32 (shift 31) and + // i64 (shift 63). // ; %dividend_sgn = ashr i32 %dividend, 31 // ; %divisor_sgn = ashr i32 %divisor, 31 @@ -42,8 +54,8 @@ static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor, // ; %urem = urem i32 %dividend, %divisor // ; %xored = xor i32 %urem, %dividend_sgn // ; %srem = sub i32 %xored, %dividend_sgn - Value *DividendSign = Builder.CreateAShr(Dividend, ThirtyOne); - Value *DivisorSign = Builder.CreateAShr(Divisor, ThirtyOne); + Value *DividendSign = Builder.CreateAShr(Dividend, Shift); + Value *DivisorSign = Builder.CreateAShr(Divisor, Shift); Value *DvdXor = Builder.CreateXor(Dividend, DividendSign); Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign); Value *UDividend = Builder.CreateSub(DvdXor, DividendSign); @@ -68,6 +80,8 @@ static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor, IRBuilder<> &Builder) { // Remainder = Dividend - Quotient*Divisor + // Following instructions are generated for both i32 and i64 + // ; %quotient = udiv i32 %dividend, %divisor // ; %product = mul i32 %divisor, %quotient // ; %remainder = sub i32 %dividend, %product @@ -88,9 +102,20 @@ static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor, /// present, i.e. not folded), ready to be expanded if the user wishes. static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor, IRBuilder<> &Builder) { - // Implementation taken from compiler-rt's __divsi3 + // Implementation taken from compiler-rt's __divsi3 and __divdi3 - ConstantInt *ThirtyOne = Builder.getInt32(31); + unsigned BitWidth = Dividend->getType()->getIntegerBitWidth(); + ConstantInt *Shift; + + if (BitWidth == 64) { + Shift = Builder.getInt64(63); + } else { + assert(BitWidth == 32 && "Unexpected bit width"); + Shift = Builder.getInt32(31); + } + + // Following instructions are generated for both i32 (shift 31) and + // i64 (shift 63). // ; %tmp = ashr i32 %dividend, 31 // ; %tmp1 = ashr i32 %divisor, 31 @@ -102,8 +127,8 @@ static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor, // ; %q_mag = udiv i32 %u_dvnd, %u_dvsr // ; %tmp4 = xor i32 %q_mag, %q_sgn // ; %q = sub i32 %tmp4, %q_sgn - Value *Tmp = Builder.CreateAShr(Dividend, ThirtyOne); - Value *Tmp1 = Builder.CreateAShr(Divisor, ThirtyOne); + Value *Tmp = Builder.CreateAShr(Dividend, Shift); + Value *Tmp1 = Builder.CreateAShr(Divisor, Shift); Value *Tmp2 = Builder.CreateXor(Tmp, Dividend); Value *U_Dvnd = Builder.CreateSub(Tmp2, Tmp); Value *Tmp3 = Builder.CreateXor(Tmp1, Divisor); @@ -119,9 +144,9 @@ static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor, return Q; } -/// Generates code to divide two unsigned scalar 32-bit integers. Returns the -/// quotient, rounded towards 0. Builder's insert point should be pointing where -/// the caller wants code generated, e.g. at the udiv instruction. +/// Generates code to divide two unsigned scalar 32-bit or 64-bit integers. +/// Returns the quotient, rounded towards 0. Builder's insert point should +/// point where the caller wants code generated, e.g. at the udiv instruction. static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, IRBuilder<> &Builder) { // The basic algorithm can be found in the compiler-rt project's @@ -129,18 +154,33 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, // that's been hand-tuned to lessen the amount of control flow involved. // Some helper values - IntegerType *I32Ty = Builder.getInt32Ty(); + IntegerType *DivTy = cast(Dividend->getType()); + unsigned BitWidth = DivTy->getBitWidth(); - ConstantInt *Zero = Builder.getInt32(0); - ConstantInt *One = Builder.getInt32(1); - ConstantInt *ThirtyOne = Builder.getInt32(31); - ConstantInt *NegOne = ConstantInt::getSigned(I32Ty, -1); - ConstantInt *True = Builder.getTrue(); + ConstantInt *Zero; + ConstantInt *One; + ConstantInt *NegOne; + ConstantInt *MSB; + + if (BitWidth == 64) { + Zero = Builder.getInt64(0); + One = Builder.getInt64(1); + NegOne = ConstantInt::getSigned(DivTy, -1); + MSB = Builder.getInt64(63); + } else { + assert(BitWidth == 32 && "Unexpected bit width"); + Zero = Builder.getInt32(0); + One = Builder.getInt32(1); + NegOne = ConstantInt::getSigned(DivTy, -1); + MSB = Builder.getInt32(31); + } + + ConstantInt *True = Builder.getTrue(); BasicBlock *IBB = Builder.GetInsertBlock(); Function *F = IBB->getParent(); - Function *CTLZi32 = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, - I32Ty); + Function *CTLZ = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, + DivTy); // Our CFG is going to look like: // +---------------------+ @@ -190,6 +230,8 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, // We'll be overwriting the terminator to insert our extra blocks SpecialCases->getTerminator()->eraseFromParent(); + // Same instructions are generated for both i32 (msb 31) and i64 (msb 63). + // First off, check for special cases: dividend or divisor is zero, divisor // is greater than dividend, and divisor is 1. // ; special-cases: @@ -209,12 +251,12 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero); Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero); Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2); - Value *Tmp0 = Builder.CreateCall2(CTLZi32, Divisor, True); - Value *Tmp1 = Builder.CreateCall2(CTLZi32, Dividend, True); + Value *Tmp0 = Builder.CreateCall2(CTLZ, Divisor, True); + Value *Tmp1 = Builder.CreateCall2(CTLZ, Dividend, True); Value *SR = Builder.CreateSub(Tmp0, Tmp1); - Value *Ret0_4 = Builder.CreateICmpUGT(SR, ThirtyOne); + Value *Ret0_4 = Builder.CreateICmpUGT(SR, MSB); Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4); - Value *RetDividend = Builder.CreateICmpEQ(SR, ThirtyOne); + Value *RetDividend = Builder.CreateICmpEQ(SR, MSB); Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend); Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend); Builder.CreateCondBr(EarlyRet, End, BB1); @@ -227,7 +269,7 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, // ; br i1 %skipLoop, label %loop-exit, label %preheader Builder.SetInsertPoint(BB1); Value *SR_1 = Builder.CreateAdd(SR, One); - Value *Tmp2 = Builder.CreateSub(ThirtyOne, SR); + Value *Tmp2 = Builder.CreateSub(MSB, SR); Value *Q = Builder.CreateShl(Dividend, Tmp2); Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero); Builder.CreateCondBr(SkipLoop, LoopExit, Preheader); @@ -260,17 +302,17 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, // ; %tmp12 = icmp eq i32 %sr_2, 0 // ; br i1 %tmp12, label %loop-exit, label %do-while Builder.SetInsertPoint(DoWhile); - PHINode *Carry_1 = Builder.CreatePHI(I32Ty, 2); - PHINode *SR_3 = Builder.CreatePHI(I32Ty, 2); - PHINode *R_1 = Builder.CreatePHI(I32Ty, 2); - PHINode *Q_2 = Builder.CreatePHI(I32Ty, 2); + PHINode *Carry_1 = Builder.CreatePHI(DivTy, 2); + PHINode *SR_3 = Builder.CreatePHI(DivTy, 2); + PHINode *R_1 = Builder.CreatePHI(DivTy, 2); + PHINode *Q_2 = Builder.CreatePHI(DivTy, 2); Value *Tmp5 = Builder.CreateShl(R_1, One); - Value *Tmp6 = Builder.CreateLShr(Q_2, ThirtyOne); + Value *Tmp6 = Builder.CreateLShr(Q_2, MSB); Value *Tmp7 = Builder.CreateOr(Tmp5, Tmp6); Value *Tmp8 = Builder.CreateShl(Q_2, One); Value *Q_1 = Builder.CreateOr(Carry_1, Tmp8); Value *Tmp9 = Builder.CreateSub(Tmp4, Tmp7); - Value *Tmp10 = Builder.CreateAShr(Tmp9, 31); + Value *Tmp10 = Builder.CreateAShr(Tmp9, MSB); Value *Carry = Builder.CreateAnd(Tmp10, One); Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor); Value *R = Builder.CreateSub(Tmp7, Tmp11); @@ -285,8 +327,8 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, // ; %q_4 = or i32 %carry_2, %tmp13 // ; br label %end Builder.SetInsertPoint(LoopExit); - PHINode *Carry_2 = Builder.CreatePHI(I32Ty, 2); - PHINode *Q_3 = Builder.CreatePHI(I32Ty, 2); + PHINode *Carry_2 = Builder.CreatePHI(DivTy, 2); + PHINode *Q_3 = Builder.CreatePHI(DivTy, 2); Value *Tmp13 = Builder.CreateShl(Q_3, One); Value *Q_4 = Builder.CreateOr(Carry_2, Tmp13); Builder.CreateBr(End); @@ -295,7 +337,7 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ] // ; ret i32 %q_5 Builder.SetInsertPoint(End, End->begin()); - PHINode *Q_5 = Builder.CreatePHI(I32Ty, 2); + PHINode *Q_5 = Builder.CreatePHI(DivTy, 2); // Populate the Phis, since all values have now been created. Our Phis were: // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ] @@ -326,9 +368,8 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, /// Generate code to calculate the remainder of two integers, replacing Rem with /// the generated code. This currently generates code using the udiv expansion, /// but future work includes generating more specialized code, e.g. when more -/// information about the operands are known. Currently only implements 32bit -/// scalar division (due to udiv's limitation), but future work is removing this -/// limitation. +/// information about the operands are known. Implements both 32bit and 64bit +/// scalar division. /// /// @brief Replace Rem with generated code. bool llvm::expandRemainder(BinaryOperator *Rem) { @@ -338,6 +379,15 @@ bool llvm::expandRemainder(BinaryOperator *Rem) { IRBuilder<> Builder(Rem); + Type *RemTy = Rem->getType(); + if (RemTy->isVectorTy()) + llvm_unreachable("Div over vectors not supported"); + + unsigned RemTyBitWidth = RemTy->getIntegerBitWidth(); + + if (RemTyBitWidth != 32 && RemTyBitWidth != 64) + llvm_unreachable("Div of bitwidth other than 32 or 64 not supported"); + // First prepare the sign if it's a signed remainder if (Rem->getOpcode() == Instruction::SRem) { Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0), @@ -376,9 +426,8 @@ bool llvm::expandRemainder(BinaryOperator *Rem) { /// Generate code to divide two integers, replacing Div with the generated /// code. This currently generates code similarly to compiler-rt's /// implementations, but future work includes generating more specialized code -/// when more information about the operands are known. Currently only -/// implements 32bit scalar division, but future work is removing this -/// limitation. +/// when more information about the operands are known. Implements both +/// 32bit and 64bit scalar division. /// /// @brief Replace Div with generated code. bool llvm::expandDivision(BinaryOperator *Div) { @@ -388,9 +437,15 @@ bool llvm::expandDivision(BinaryOperator *Div) { IRBuilder<> Builder(Div); - if (Div->getType()->isVectorTy()) + Type *DivTy = Div->getType(); + if (DivTy->isVectorTy()) llvm_unreachable("Div over vectors not supported"); + unsigned DivTyBitWidth = DivTy->getIntegerBitWidth(); + + if (DivTyBitWidth != 32 && DivTyBitWidth != 64) + llvm_unreachable("Div of bitwidth other than 32 or 64 not supported"); + // First prepare the sign if it's a signed division if (Div->getOpcode() == Instruction::SDiv) { // Lower the code to unsigned division, and reset Div to point to the udiv. @@ -443,7 +498,7 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) { if (RemTyBitWidth == 32) return expandRemainder(Rem); - // If bitwidth smaller than 32 extend inputs, truncate output and proceed + // If bitwidth smaller than 32 extend inputs, extend output and proceed // with 32 bit division. IRBuilder<> Builder(Rem); @@ -471,6 +526,55 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) { return expandRemainder(cast(ExtRem)); } +/// Generate code to compute the remainder of two integers of bitwidth up to +/// 64 bits. Uses the above routines and extends the inputs/truncates the +/// outputs to operate in 64 bits. +/// +/// @brief Replace Rem with emulation code. +bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) { + assert((Rem->getOpcode() == Instruction::SRem || + Rem->getOpcode() == Instruction::URem) && + "Trying to expand remainder from a non-remainder function"); + + Type *RemTy = Rem->getType(); + if (RemTy->isVectorTy()) + llvm_unreachable("Div over vectors not supported"); + + unsigned RemTyBitWidth = RemTy->getIntegerBitWidth(); + + if (RemTyBitWidth > 64) + llvm_unreachable("Div of bitwidth greater than 64 not supported"); + + if (RemTyBitWidth == 64) + return expandRemainder(Rem); + + // If bitwidth smaller than 64 extend inputs, extend output and proceed + // with 64 bit division. + IRBuilder<> Builder(Rem); + + Value *ExtDividend; + Value *ExtDivisor; + Value *ExtRem; + Value *Trunc; + Type *Int64Ty = Builder.getInt64Ty(); + + if (Rem->getOpcode() == Instruction::SRem) { + ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int64Ty); + ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int64Ty); + ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor); + } else { + ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int64Ty); + ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int64Ty); + ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor); + } + Trunc = Builder.CreateTrunc(ExtRem, RemTy); + + Rem->replaceAllUsesWith(Trunc); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + return expandRemainder(cast(ExtRem)); +} /// Generate code to divide two integers of bitwidth up to 32 bits. Uses the /// above routines and extends the inputs/truncates the outputs to operate @@ -495,7 +599,7 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) { if (DivTyBitWidth == 32) return expandDivision(Div); - // If bitwidth smaller than 32 extend inputs, truncate output and proceed + // If bitwidth smaller than 32 extend inputs, extend output and proceed // with 32 bit division. IRBuilder<> Builder(Div); @@ -522,3 +626,53 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) { return expandDivision(cast(ExtDiv)); } + +/// Generate code to divide two integers of bitwidth up to 64 bits. Uses the +/// above routines and extends the inputs/truncates the outputs to operate +/// in 64 bits. +/// +/// @brief Replace Div with emulation code. +bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) { + assert((Div->getOpcode() == Instruction::SDiv || + Div->getOpcode() == Instruction::UDiv) && + "Trying to expand division from a non-division function"); + + Type *DivTy = Div->getType(); + if (DivTy->isVectorTy()) + llvm_unreachable("Div over vectors not supported"); + + unsigned DivTyBitWidth = DivTy->getIntegerBitWidth(); + + if (DivTyBitWidth > 64) + llvm_unreachable("Div of bitwidth greater than 64 not supported"); + + if (DivTyBitWidth == 64) + return expandDivision(Div); + + // If bitwidth smaller than 64 extend inputs, extend output and proceed + // with 64 bit division. + IRBuilder<> Builder(Div); + + Value *ExtDividend; + Value *ExtDivisor; + Value *ExtDiv; + Value *Trunc; + Type *Int64Ty = Builder.getInt64Ty(); + + if (Div->getOpcode() == Instruction::SDiv) { + ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int64Ty); + ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int64Ty); + ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor); + } else { + ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int64Ty); + ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int64Ty); + ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor); + } + Trunc = Builder.CreateTrunc(ExtDiv, DivTy); + + Div->replaceAllUsesWith(Trunc); + Div->dropAllReferences(); + Div->eraseFromParent(); + + return expandDivision(cast(ExtDiv)); +} diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/Local.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/Local.cpp index 2768041fb2b9..8a15c7c9a1b7 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/Local.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/Local.cpp @@ -127,8 +127,10 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // dest. If so, eliminate it as an explicit compare. if (i.getCaseSuccessor() == DefaultDest) { MDNode* MD = SI->getMetadata(LLVMContext::MD_prof); - // MD should have 2 + NumCases operands. - if (MD && MD->getNumOperands() == 2 + SI->getNumCases()) { + unsigned NCases = SI->getNumCases(); + // Fold the case metadata into the default if there will be any branches + // left, unless the metadata doesn't match the switch. + if (NCases > 1 && MD && MD->getNumOperands() == 2 + NCases) { // Collect branch weights into a vector. SmallVector Weights; for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e; diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 162807d03c62..e2da1f40b27f 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -19,6 +19,7 @@ #define DEBUG_TYPE "loop-unroll" #include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 8f6eee3510d1..839bd208cf81 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -679,8 +679,8 @@ void PromoteMem2Reg::run() { // Iterating over NewPhiNodes is deterministic, so it is safe to try to // simplify and RAUW them as we go. If it was not, we could add uses to - // the values we replace with in a non deterministic order, thus creating - // non deterministic def->use chains. + // the values we replace with in a non-deterministic order, thus creating + // non-deterministic def->use chains. for (DenseMap, PHINode *>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index ff50b12cdb3f..0a4589796153 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -3222,7 +3222,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) { Case.getCaseSuccessor()->removePredecessor(SI->getParent()); SI->removeCase(Case); } - if (HasWeight) { + if (HasWeight && Weights.size() >= 2) { SmallVector MDWeights(Weights.begin(), Weights.end()); SI->setMetadata(LLVMContext::MD_prof, MDBuilder(SI->getParent()->getContext()). diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index bf3442aeaaad..d1f6c5c62a2f 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -18,12 +18,16 @@ #include "llvm/Transforms/Utils/SimplifyIndVar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -75,6 +79,9 @@ namespace { void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, bool IsSigned); + + Instruction *splitOverflowIntrinsic(Instruction *IVUser, + const DominatorTree *DT); }; } @@ -263,6 +270,71 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, return true; } +/// \brief Split sadd.with.overflow into add + sadd.with.overflow to allow +/// analysis and optimization. +/// +/// \return A new value representing the non-overflowing add if possible, +/// otherwise return the original value. +Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser, + const DominatorTree *DT) { + IntrinsicInst *II = dyn_cast(IVUser); + if (!II || II->getIntrinsicID() != Intrinsic::sadd_with_overflow) + return IVUser; + + // Find a branch guarded by the overflow check. + BranchInst *Branch = 0; + Instruction *AddVal = 0; + for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); + UI != E; ++UI) { + if (ExtractValueInst *ExtractInst = dyn_cast(*UI)) { + if (ExtractInst->getNumIndices() != 1) + continue; + if (ExtractInst->getIndices()[0] == 0) + AddVal = ExtractInst; + else if (ExtractInst->getIndices()[0] == 1 && ExtractInst->hasOneUse()) + Branch = dyn_cast(ExtractInst->use_back()); + } + } + if (!AddVal || !Branch) + return IVUser; + + BasicBlock *ContinueBB = Branch->getSuccessor(1); + if (llvm::next(pred_begin(ContinueBB)) != pred_end(ContinueBB)) + return IVUser; + + // Check if all users of the add are provably NSW. + bool AllNSW = true; + for (Value::use_iterator UI = AddVal->use_begin(), E = AddVal->use_end(); + UI != E; ++UI) { + if (Instruction *UseInst = dyn_cast(*UI)) { + BasicBlock *UseBB = UseInst->getParent(); + if (PHINode *PHI = dyn_cast(UseInst)) + UseBB = PHI->getIncomingBlock(UI); + if (!DT->dominates(ContinueBB, UseBB)) { + AllNSW = false; + break; + } + } + } + if (!AllNSW) + return IVUser; + + // Go for it... + IRBuilder<> Builder(IVUser); + Instruction *AddInst = dyn_cast( + Builder.CreateNSWAdd(II->getOperand(0), II->getOperand(1))); + + // The caller expects the new add to have the same form as the intrinsic. The + // IV operand position must be the same. + assert((AddInst->getOpcode() == Instruction::Add && + AddInst->getOperand(0) == II->getOperand(0)) && + "Bad add instruction created from overflow intrinsic."); + + AddVal->replaceAllUsesWith(AddInst); + DeadInsts.push_back(AddVal); + return AddInst; +} + /// pushIVUsers - Add all uses of Def to the current IV's worklist. /// static void pushIVUsers( @@ -334,8 +406,16 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { while (!SimpleIVUsers.empty()) { std::pair UseOper = SimpleIVUsers.pop_back_val(); + Instruction *UseInst = UseOper.first; + // Bypass back edges to avoid extra work. - if (UseOper.first == CurrIV) continue; + if (UseInst == CurrIV) continue; + + if (V && V->shouldSplitOverflowInstrinsics()) { + UseInst = splitOverflowIntrinsic(UseInst, V->getDomTree()); + if (!UseInst) + continue; + } Instruction *IVOperand = UseOper.second; for (unsigned N = 0; IVOperand; ++N) { diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 15b3e66f94ad..36d24624f946 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1100,6 +1100,49 @@ struct UnaryDoubleFPOpt : public LibCallOptimization { } }; +// Double -> Float Shrinking Optimizations for Binary Functions like 'fmin/fmax' +struct BinaryDoubleFPOpt : public LibCallOptimization { + bool CheckRetType; + BinaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {} + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // Just make sure this has 2 arguments of the same FP type, which match the + // result type. + if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + !FT->getParamType(0)->isFloatingPointTy()) + return 0; + + if (CheckRetType) { + // Check if all the uses for function like 'fmin/fmax' are converted to + // float. + for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end(); + ++UseI) { + FPTruncInst *Cast = dyn_cast(*UseI); + if (Cast == 0 || !Cast->getType()->isFloatTy()) + return 0; + } + } + + // If this is something like 'fmin((double)floatval1, (double)floatval2)', + // we convert it to fminf. + FPExtInst *Cast1 = dyn_cast(CI->getArgOperand(0)); + FPExtInst *Cast2 = dyn_cast(CI->getArgOperand(1)); + if (Cast1 == 0 || !Cast1->getOperand(0)->getType()->isFloatTy() || + Cast2 == 0 || !Cast2->getOperand(0)->getType()->isFloatTy()) + return 0; + + // fmin((double)floatval1, (double)floatval2) + // -> (double)fmin(floatval1, floatval2) + Value *V = NULL; + Value *V1 = Cast1->getOperand(0); + Value *V2 = Cast2->getOperand(0); + V = EmitBinaryFloatFnCall(V1, V2, Callee->getName(), B, + Callee->getAttributes()); + return B.CreateFPExt(V, B.getDoubleTy()); + } +}; + struct UnsafeFPLibCallOptimization : public LibCallOptimization { bool UnsafeFPShrink; UnsafeFPLibCallOptimization(bool UnsafeFPShrink) { @@ -1162,6 +1205,12 @@ struct PowOpt : public UnsafeFPLibCallOptimization { hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f, LibFunc::exp2l)) return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes()); + // pow(10.0, x) -> exp10(x) + if (Op1C->isExactlyValue(10.0) && + hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp10, LibFunc::exp10f, + LibFunc::exp10l)) + return EmitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp10), B, + Callee->getAttributes()); } ConstantFP *Op2C = dyn_cast(Op2); @@ -1975,6 +2024,7 @@ static MemSetOpt MemSet; // Math library call optimizations. static UnaryDoubleFPOpt UnaryDoubleFP(false); +static BinaryDoubleFPOpt BinaryDoubleFP(false); static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); static SinCosPiOpt SinCosPi; @@ -2144,6 +2194,11 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { if (UnsafeFPShrink && hasFloatVersion(FuncName)) return &UnsafeUnaryDoubleFP; return 0; + case LibFunc::fmin: + case LibFunc::fmax: + if (hasFloatVersion(FuncName)) + return &BinaryDoubleFP; + return 0; case LibFunc::memcpy_chk: return &MemCpyChk; default: diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Vectorize/LLVMBuild.txt b/external/bsd/llvm/dist/llvm/lib/Transforms/Vectorize/LLVMBuild.txt index 7167d273ae50..b57ce6c5aa09 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Vectorize/LLVMBuild.txt +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Vectorize/LLVMBuild.txt @@ -20,5 +20,4 @@ type = Library name = Vectorize parent = Transforms library_name = Vectorize -required_libraries = Analysis Core InstCombine Support Target TransformUtils - +required_libraries = Analysis Core Support Target TransformUtils diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 874db9ff1526..d83aa7642e6c 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -564,7 +564,7 @@ public: /// pointer itself is an induction variable. /// This check allows us to vectorize A[idx] into a wide load/store. /// Returns: - /// 0 - Stride is unknown or non consecutive. + /// 0 - Stride is unknown or non-consecutive. /// 1 - Address is consecutive. /// -1 - Address is consecutive, and decreasing. int isConsecutivePtr(Value *Ptr); @@ -763,10 +763,13 @@ struct LoopVectorizeHints { unsigned Width; /// Vectorization unroll factor. unsigned Unroll; + /// Vectorization forced (-1 not selected, 0 force disabled, 1 force enabled) + int Force; LoopVectorizeHints(const Loop *L, bool DisableUnrolling) : Width(VectorizationFactor) , Unroll(DisableUnrolling ? 1 : VectorizationUnroll) + , Force(-1) , LoopID(L->getLoopID()) { getHints(L); // The command line options override any loop metadata except for when @@ -877,6 +880,11 @@ private: Unroll = Val; else DEBUG(dbgs() << "LV: ignoring invalid unroll hint metadata\n"); + } else if (Hint == "enable") { + if (C->getBitWidth() == 1) + Force = Val; + else + DEBUG(dbgs() << "LV: ignoring invalid enable hint metadata\n"); } else { DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint << '\n'); } @@ -888,8 +896,10 @@ struct LoopVectorize : public LoopPass { /// Pass identification, replacement for typeid static char ID; - explicit LoopVectorize(bool NoUnrolling = false) - : LoopPass(ID), DisableUnrolling(NoUnrolling) { + explicit LoopVectorize(bool NoUnrolling = false, bool AlwaysVectorize = true) + : LoopPass(ID), + DisableUnrolling(NoUnrolling), + AlwaysVectorize(AlwaysVectorize) { initializeLoopVectorizePass(*PassRegistry::getPassRegistry()); } @@ -900,6 +910,7 @@ struct LoopVectorize : public LoopPass { DominatorTree *DT; TargetLibraryInfo *TLI; bool DisableUnrolling; + bool AlwaysVectorize; virtual bool runOnLoop(Loop *L, LPPassManager &LPM) { // We only vectorize innermost loops. @@ -919,7 +930,7 @@ struct LoopVectorize : public LoopPass { return false; if (DL == NULL) { - DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout\n"); + DEBUG(dbgs() << "LV: Not vectorizing: Missing data layout\n"); return false; } @@ -928,15 +939,25 @@ struct LoopVectorize : public LoopPass { LoopVectorizeHints Hints(L, DisableUnrolling); + if (Hints.Force == 0) { + DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); + return false; + } + + if (!AlwaysVectorize && Hints.Force != 1) { + DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); + return false; + } + if (Hints.Width == 1 && Hints.Unroll == 1) { - DEBUG(dbgs() << "LV: Not vectorizing.\n"); + DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); return false; } // Check if it is legal to vectorize the loop. LoopVectorizationLegality LVL(L, SE, DL, DT, TLI); if (!LVL.canVectorize()) { - DEBUG(dbgs() << "LV: Not vectorizing.\n"); + DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); return false; } @@ -949,7 +970,8 @@ struct LoopVectorize : public LoopPass { Attribute::AttrKind SzAttr = Attribute::OptimizeForSize; Attribute::AttrKind FlAttr = Attribute::NoImplicitFloat; unsigned FnIndex = AttributeSet::FunctionIndex; - bool OptForSize = F->getAttributes().hasAttribute(FnIndex, SzAttr); + bool OptForSize = Hints.Force != 1 && + F->getAttributes().hasAttribute(FnIndex, SzAttr); bool NoFloat = F->getAttributes().hasAttribute(FnIndex, FlAttr); if (NoFloat) { @@ -973,6 +995,7 @@ struct LoopVectorize : public LoopPass { DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); if (UF == 1) return false; + DEBUG(dbgs() << "LV: Trying to at least unroll the loops.\n"); // We decided not to vectorize, but we may want to unroll. InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF); Unroller.vectorize(&LVL); @@ -1093,7 +1116,7 @@ static unsigned getGEPInductionOperand(DataLayout *DL, } int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { - assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr"); + assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr"); // Make sure that the pointer does not point to structs. if (Ptr->getType()->getPointerElementType()->isAggregateType()) return 0; @@ -1216,7 +1239,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, if (ScalarAllocatedSize != VectorElementSize) return scalarizeInstruction(Instr); - // If the pointer is loop invariant or if it is non consecutive, + // If the pointer is loop invariant or if it is non-consecutive, // scalarize the load. int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); bool Reverse = ConsecutiveStride < 0; @@ -2430,7 +2453,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, setDebugLocFromInst(Builder, P); // Check for PHI nodes that are lowered to vector selects. if (P->getParent() != OrigLoop->getHeader()) { - // We know that all PHIs in non header blocks are converted into + // We know that all PHIs in non-header blocks are converted into // selects, so we don't have to worry about the insertion order and we // can just use the builder. // At this point we generate the predication tree. There may be @@ -2781,6 +2804,23 @@ void InnerLoopVectorizer::updateAnalysis() { DEBUG(DT->verifyAnalysis()); } +/// \brief Check whether it is safe to if-convert this phi node. +/// +/// Phi nodes with constant expressions that can trap are not safe to if +/// convert. +static bool canIfConvertPHINodes(BasicBlock *BB) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + PHINode *Phi = dyn_cast(I); + if (!Phi) + return true; + for (unsigned p = 0, e = Phi->getNumIncomingValues(); p != e; ++p) + if (Constant *C = dyn_cast(Phi->getIncomingValue(p))) + if (C->canTrap()) + return false; + } + return true; +} + bool LoopVectorizationLegality::canVectorizeWithIfConvert() { if (!EnableIfConversion) return false; @@ -2807,6 +2847,7 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { } // Collect the blocks that need predication. + BasicBlock *Header = TheLoop->getHeader(); for (Loop::block_iterator BI = TheLoop->block_begin(), BE = TheLoop->block_end(); BI != BE; ++BI) { BasicBlock *BB = *BI; @@ -2816,8 +2857,12 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { return false; // We must be able to predicate all blocks that need to be predicated. - if (blockNeedsPredication(BB) && !blockCanBePredicated(BB, SafePointes)) + if (blockNeedsPredication(BB)) { + if (!blockCanBePredicated(BB, SafePointes)) + return false; + } else if (BB != Header && !canIfConvertPHINodes(BB)) return false; + } // We can if-convert this loop. @@ -2846,7 +2891,7 @@ bool LoopVectorizationLegality::canVectorize() { DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName() << '\n'); - // Check if we can if-convert non single-bb loops. + // Check if we can if-convert non-single-bb loops. unsigned NumBlocks = TheLoop->getNumBlocks(); if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); @@ -3499,7 +3544,7 @@ private: // We can access this many bytes in parallel safely. unsigned MaxSafeDepDistBytes; - /// \brief If we see a non constant dependence distance we can still try to + /// \brief If we see a non-constant dependence distance we can still try to /// vectorize this loop with runtime checks. bool ShouldRetryWithRuntimeCheck; @@ -3535,7 +3580,7 @@ static bool isInBoundsGep(Value *Ptr) { static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value *Ptr, const Loop *Lp) { const Type *Ty = Ptr->getType(); - assert(Ty->isPointerTy() && "Unexpected non ptr"); + assert(Ty->isPointerTy() && "Unexpected non-ptr"); // Make sure that the pointer does not point to aggregate types. const PointerType *PtrTy = cast(Ty); @@ -3699,7 +3744,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, const SCEVConstant *C = dyn_cast(Dist); if (!C) { - DEBUG(dbgs() << "LV: Dependence because of non constant distance\n"); + DEBUG(dbgs() << "LV: Dependence because of non-constant distance\n"); ShouldRetryWithRuntimeCheck = true; return true; } @@ -4140,7 +4185,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // Check whether we found a reduction operator. FoundReduxOp |= !IsAPhi; - // Process users of current instruction. Push non PHI nodes after PHI nodes + // Process users of current instruction. Push non-PHI nodes after PHI nodes // onto the stack. This way we are going to have seen all inputs to PHI // nodes once we get to them. SmallVector NonPHIs; @@ -4371,6 +4416,14 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB, if (it->mayWriteToMemory() || it->mayThrow()) return false; + // Check that we don't have a constant expression that can trap as operand. + for (Instruction::op_iterator OI = it->op_begin(), OE = it->op_end(); + OI != OE; ++OI) { + if (Constant *C = dyn_cast(*OI)) + if (C->canTrap()) + return false; + } + // The instructions below can trap. switch (it->getOpcode()) { default: continue; @@ -5016,8 +5069,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false) namespace llvm { - Pass *createLoopVectorizePass(bool NoUnrolling) { - return new LoopVectorize(NoUnrolling); + Pass *createLoopVectorizePass(bool NoUnrolling, bool AlwaysVectorize) { + return new LoopVectorize(NoUnrolling, AlwaysVectorize); } } diff --git a/external/bsd/llvm/dist/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/external/bsd/llvm/dist/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index c72b51fc4444..f735c9e74702 100644 --- a/external/bsd/llvm/dist/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/external/bsd/llvm/dist/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -442,7 +443,7 @@ private: /// \returns whether the VectorizableTree is fully vectoriable and will /// be beneficial even the tree height is tiny. - bool isFullyVectorizableTinyTree(); + bool isFullyVectorizableTinyTree(); struct TreeEntry { TreeEntry() : Scalars(), VectorizedValue(0), LastScalarIndex(0), @@ -521,7 +522,7 @@ private: /// Holds all of the instructions that we gathered. SetVector GatherSeq; /// A list of blocks that we are going to CSE. - SmallSet CSEBlocks; + SetVector CSEBlocks; /// Numbers instructions in different blocks. DenseMap BlocksNumbers; @@ -930,7 +931,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) if (!isConsecutiveAccess(VL[i], VL[i + 1])) { newTreeEntry(VL, false); - DEBUG(dbgs() << "SLP: Non consecutive store.\n"); + DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); return; } @@ -1113,16 +1114,19 @@ int BoUpSLP::getTreeCost() { Cost += C; } + SmallSet ExtractCostCalculated; int ExtractCost = 0; for (UserList::iterator I = ExternalUses.begin(), E = ExternalUses.end(); I != E; ++I) { + // We only add extract cost once for the same scalar. + if (!ExtractCostCalculated.insert(I->Scalar)) + continue; VectorType *VecTy = VectorType::get(I->Scalar->getType(), BundleWidth); ExtractCost += TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, I->Lane); } - DEBUG(dbgs() << "SLP: Total Cost " << Cost + ExtractCost<< ".\n"); return Cost + ExtractCost; } diff --git a/external/bsd/llvm/dist/llvm/test/Analysis/BasicAA/phi-aa.ll b/external/bsd/llvm/dist/llvm/test/Analysis/BasicAA/phi-aa.ll index 6aa26c185e0f..74279e1c4c93 100644 --- a/external/bsd/llvm/dist/llvm/test/Analysis/BasicAA/phi-aa.ll +++ b/external/bsd/llvm/dist/llvm/test/Analysis/BasicAA/phi-aa.ll @@ -1,10 +1,14 @@ ; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + ; rdar://7282591 @X = common global i32 0 @Y = common global i32 0 @Z = common global i32 0 +; CHECK-LABEL: foo ; CHECK: NoAlias: i32* %P, i32* @Z define void @foo(i32 %cond) nounwind { @@ -29,3 +33,46 @@ bb2: return: ret void } + +; Pointers can vary in between iterations of loops. +; PR18068 + +; CHECK-LABEL: pr18068 +; CHECK: MayAlias: i32* %0, i32* %arrayidx5 + +define i32 @pr18068(i32* %jj7, i32* %j) { +entry: + %oa5 = alloca [100 x i32], align 16 + br label %codeRepl + +codeRepl: + %0 = phi i32* [ %arrayidx13, %for.body ], [ %j, %entry ] + %targetBlock = call i1 @cond(i32* %jj7) + br i1 %targetBlock, label %for.body, label %bye + +for.body: + %1 = load i32* %jj7, align 4 + %idxprom4 = zext i32 %1 to i64 + %arrayidx5 = getelementptr inbounds [100 x i32]* %oa5, i64 0, i64 %idxprom4 + %2 = load i32* %arrayidx5, align 4 + %sub6 = sub i32 %2, 6 + store i32 %sub6, i32* %arrayidx5, align 4 + ; %0 and %arrayidx5 can alias! It is not safe to DSE the above store. + %3 = load i32* %0, align 4 + store i32 %3, i32* %arrayidx5, align 4 + %sub11 = add i32 %1, -1 + %idxprom12 = zext i32 %sub11 to i64 + %arrayidx13 = getelementptr inbounds [100 x i32]* %oa5, i64 0, i64 %idxprom12 + call void @inc(i32* %jj7) + br label %codeRepl + +bye: + %.reload = load i32* %jj7, align 4 + ret i32 %.reload +} + +declare i1 @cond(i32*) + +declare void @inc(i32*) + + diff --git a/external/bsd/llvm/dist/llvm/test/Assembler/ConstantExprFoldSelect.ll b/external/bsd/llvm/dist/llvm/test/Assembler/ConstantExprFoldSelect.ll new file mode 100644 index 000000000000..b000e02653c6 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/Assembler/ConstantExprFoldSelect.ll @@ -0,0 +1,8 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s +; PR18319 + +define void @function() { + %c = trunc <4 x i16> select (<4 x i1> , <4 x i16> , <4 x i16> ) to <4 x i8> +; CHECK: + ret void +} diff --git a/external/bsd/llvm/dist/llvm/test/Assembler/addrspacecast-alias.ll b/external/bsd/llvm/dist/llvm/test/Assembler/addrspacecast-alias.ll new file mode 100644 index 000000000000..6623a25d1849 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/Assembler/addrspacecast-alias.ll @@ -0,0 +1,6 @@ +; RUN: llvm-as -disable-output %s + +; Test that global aliases are allowed to be constant addrspacecast + +@i = internal addrspace(1) global i8 42 +@ia = alias internal i8 addrspace(2)* addrspacecast (i8 addrspace(1)* @i to i8 addrspace(2)*) diff --git a/external/bsd/llvm/dist/llvm/test/Assembler/functionlocal-metadata.ll b/external/bsd/llvm/dist/llvm/test/Assembler/functionlocal-metadata.ll index 0f0ab4c1ba09..0d93bfdb275d 100644 --- a/external/bsd/llvm/dist/llvm/test/Assembler/functionlocal-metadata.ll +++ b/external/bsd/llvm/dist/llvm/test/Assembler/functionlocal-metadata.ll @@ -3,7 +3,7 @@ define void @Foo(i32 %a, i32 %b) { entry: call void @llvm.dbg.value(metadata !{ i32* %1 }, i64 16, metadata !2) -; CHECK: call void @llvm.dbg.value(metadata !{i32* %1}, i64 16, metadata !2) +; CHECK: call void @llvm.dbg.value(metadata !{i32* %1}, i64 16, metadata ![[ID2:[0-9]+]]) %0 = add i32 %a, 1 ; [#uses=1] %two = add i32 %b, %0 ; [#uses=0] %1 = alloca i32 ; [#uses=1] @@ -19,28 +19,38 @@ entry: call void @llvm.dbg.declare(metadata !{i32 %a}, metadata !{i32 %a, metadata !"foo"}) ; CHECK: metadata !{i32 %a}, metadata !{i32 %a, metadata !"foo"} call void @llvm.dbg.declare(metadata !{i32 %b}, metadata !{metadata !0, i32 %two}) -; CHECK: metadata !{i32 %b}, metadata !{metadata !0, i32 %two} +; CHECK: metadata !{i32 %b}, metadata !{metadata ![[ID0:[0-9]+]], i32 %two} call void @llvm.dbg.value(metadata !{ i32 %a }, i64 0, metadata !1) -; CHECK: metadata !{i32 %a}, i64 0, metadata !1 +; CHECK: metadata !{i32 %a}, i64 0, metadata ![[ID1:[0-9]+]] call void @llvm.dbg.value(metadata !{ i32 %0 }, i64 25, metadata !0) -; CHECK: metadata !{i32 %0}, i64 25, metadata !0 +; CHECK: metadata !{i32 %0}, i64 25, metadata ![[ID0]] call void @llvm.dbg.value(metadata !{ i32* %1 }, i64 16, metadata !3) -; CHECK: call void @llvm.dbg.value(metadata !{i32* %1}, i64 16, metadata !3) +; CHECK: call void @llvm.dbg.value(metadata !{i32* %1}, i64 16, metadata ![[ID3:[0-9]+]]) call void @llvm.dbg.value(metadata !3, i64 12, metadata !2) -; CHECK: metadata !3, i64 12, metadata !2 +; CHECK: metadata ![[ID3]], i64 12, metadata ![[ID2]] ret void, !foo !0, !bar !1 -; CHECK: ret void, !foo !0, !bar !1 +; CHECK: ret void, !foo ![[FOO:[0-9]+]], !bar ![[BAR:[0-9]+]] } +!llvm.module.flags = !{!4} + !0 = metadata !{i32 662302, i32 26, metadata !1, null} !1 = metadata !{i32 4, metadata !"foo"} !2 = metadata !{metadata !"bar"} !3 = metadata !{metadata !"foo"} +!4 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !foo = !{ !0 } !bar = !{ !1 } + +; CHECK: !foo = !{![[FOO]]} +; CHECK: !bar = !{![[BAR]]} +; CHECK: ![[ID0]] = metadata !{i32 662302, i32 26, metadata ![[ID1]], null} +; CHECK: ![[ID1]] = metadata !{i32 4, metadata !"foo"} +; CHECK: ![[ID2]] = metadata !{metadata !"bar"} +; CHECK; ![[ID3]] = metadata !{metadata !"foo"} diff --git a/external/bsd/llvm/dist/llvm/test/Assembler/invalid-name.ll b/external/bsd/llvm/dist/llvm/test/Assembler/invalid-name.ll new file mode 100644 index 000000000000..d9d7a1108808 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/Assembler/invalid-name.ll @@ -0,0 +1,6 @@ +; RUN: not llvm-as %s 2>&1 | FileCheck %s + +; CHECK: expected function name +define void @"zed\00bar"() { + ret void +} diff --git a/external/bsd/llvm/dist/llvm/test/Bindings/Ocaml/target.ml b/external/bsd/llvm/dist/llvm/test/Bindings/Ocaml/target.ml index d69fb0e664fd..26cd12939d1e 100644 --- a/external/bsd/llvm/dist/llvm/test/Bindings/Ocaml/target.ml +++ b/external/bsd/llvm/dist/llvm/test/Bindings/Ocaml/target.ml @@ -43,9 +43,7 @@ let machine = TargetMachine.create (Target.default_triple ()) target let test_target_data () = let module DL = DataLayout in - let layout = "e-p:32:32:32-S32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-" ^ - "f16:16:16-f32:32:32-f64:32:64-f128:128:128-v64:32:64-v128:32:128-" ^ - "a0:0:64-n32" in + let layout = "e-p:32:32-f64:32:64-v64:32:64-v128:32:128-n32-S32" in let dl = DL.of_string layout in let sty = struct_type context [| i32_type; i64_type |] in diff --git a/external/bsd/llvm/dist/llvm/test/Bindings/Ocaml/vmcore.ml b/external/bsd/llvm/dist/llvm/test/Bindings/Ocaml/vmcore.ml index 12ac62b25519..167efce0b2b1 100644 --- a/external/bsd/llvm/dist/llvm/test/Bindings/Ocaml/vmcore.ml +++ b/external/bsd/llvm/dist/llvm/test/Bindings/Ocaml/vmcore.ml @@ -1031,8 +1031,8 @@ let test_builder () = end; group "metadata"; begin - (* CHECK: %metadata = add i32 %P1, %P2, !test !0 - * !0 is metadata emitted at EOF. + (* CHECK: %metadata = add i32 %P1, %P2, !test !1 + * !1 is metadata emitted at EOF. *) let i = build_add p1 p2 "metadata" atentry in insist ((has_metadata i) = false); @@ -1056,18 +1056,18 @@ let test_builder () = end; group "named metadata"; begin - (* !md is emitted at EOF. *) + (* !llvm.module.flags is emitted at EOF. *) let n1 = const_int i32_type 1 in - let n2 = mdstring context "metadata test" in - let md = mdnode context [| n1; n2 |] in - add_named_metadata_operand m "md" md; + let n2 = mdstring context "Debug Info Version" in + let md = mdnode context [| n1; n2; n1 |] in + add_named_metadata_operand m "llvm.module.flags" md; - insist ((get_named_metadata m "md") = [| md |]) + insist ((get_named_metadata m "llvm.module.flags") = [| md |]) end; group "dbg"; begin - (* CHECK: %dbg = add i32 %P1, %P2, !dbg !1 - * !1 is metadata emitted at EOF. + (* CHECK: %dbg = add i32 %P1, %P2, !dbg !2 + * !2 is metadata emitted at EOF. *) insist ((current_debug_location atentry) = None); @@ -1346,9 +1346,10 @@ let test_builder () = (* End-of-file checks for things like metdata and attributes. * CHECK: attributes #0 = {{.*}}uwtable{{.*}} - * CHECK: !md = !{!0} - * CHECK: !0 = metadata !{i32 1, metadata !"metadata test"} - * CHECK: !1 = metadata !{i32 2, i32 3, metadata !2, metadata !2} + * CHECK: !llvm.module.flags = !{!0} + * CHECK: !0 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} + * CHECK: !1 = metadata !{i32 1, metadata !"metadata test"} + * CHECK: !2 = metadata !{i32 2, i32 3, metadata !3, metadata !3} *) (*===-- Pass Managers -----------------------------------------------------===*) diff --git a/external/bsd/llvm/dist/llvm/test/Bitcode/attributes.ll b/external/bsd/llvm/dist/llvm/test/Bitcode/attributes.ll index 1789878e9f50..545f1cbb28c5 100644 --- a/external/bsd/llvm/dist/llvm/test/Bitcode/attributes.ll +++ b/external/bsd/llvm/dist/llvm/test/Bitcode/attributes.ll @@ -213,6 +213,11 @@ define void @f35() optnone noinline ret void; } +define void @f36(i8* inalloca) { +; CHECK: define void @f36(i8* inalloca) { + ret void +} + ; CHECK: attributes #0 = { noreturn } ; CHECK: attributes #1 = { nounwind } ; CHECK: attributes #2 = { readnone } diff --git a/external/bsd/llvm/dist/llvm/test/Bitcode/calling-conventions.3.2.ll b/external/bsd/llvm/dist/llvm/test/Bitcode/calling-conventions.3.2.ll new file mode 100644 index 000000000000..aca9efd0892b --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/Bitcode/calling-conventions.3.2.ll @@ -0,0 +1,150 @@ +; RUN: llvm-dis < %s.bc| FileCheck %s + +; calling-conventions.3.2.ll.bc was generated by passing this file to llvm-as-3.2. +; The test checks that LLVM does not silently misread calling conventions of +; older bitcode files. + +declare ccc void @ccc() +; CHECK: declare void @ccc + +declare fastcc void @fastcc() +; CHECK: declare fastcc void @fastcc + +declare coldcc void @coldcc() +; CHECK: declare coldcc void @coldcc + +declare cc10 void @cc10() +; CHECK: declare cc10 void @cc10 + +declare spir_kernel void @spir_kernel() +; CHECK: declare spir_kernel void @spir_kernel + +declare spir_func void @spir_func() +; CHECK: declare spir_func void @spir_func + +declare intel_ocl_bicc void @intel_ocl_bicc() +; CHECK: declare intel_ocl_bicc void @intel_ocl_bicc + +declare x86_stdcallcc void @x86_stdcallcc() +; CHECK: declare x86_stdcallcc void @x86_stdcallcc + +declare x86_fastcallcc void @x86_fastcallcc() +; CHECK: declare x86_fastcallcc void @x86_fastcallcc + +declare x86_thiscallcc void @x86_thiscallcc() +; CHECK: declare x86_thiscallcc void @x86_thiscallcc + +declare arm_apcscc void @arm_apcscc() +; CHECK: declare arm_apcscc void @arm_apcscc + +declare arm_aapcscc void @arm_aapcscc() +; CHECK: declare arm_aapcscc void @arm_aapcscc + +declare arm_aapcs_vfpcc void @arm_aapcs_vfpcc() +; CHECK: declare arm_aapcs_vfpcc void @arm_aapcs_vfpcc + +declare msp430_intrcc void @msp430_intrcc() +; CHECK: declare msp430_intrcc void @msp430_intrcc + +declare ptx_kernel void @ptx_kernel() +; CHECK: declare ptx_kernel void @ptx_kernel + +declare ptx_device void @ptx_device() +; CHECK: declare ptx_device void @ptx_device + +define void @call_ccc() { +; CHECK: call void @ccc + call ccc void @ccc() + ret void +} + +define void @call_fastcc() { +; CHECK: call fastcc void @fastcc + call fastcc void @fastcc() + ret void +} + +define void @call_coldcc() { +; CHECK: call coldcc void @coldcc + call coldcc void @coldcc() + ret void +} + +define void @call_cc10 () { +; CHECK: call cc10 void @cc10 + call cc10 void @cc10 () + ret void +} + +define void @call_spir_kernel() { +; CHECK: call spir_kernel void @spir_kernel + call spir_kernel void @spir_kernel() + ret void +} + +define void @call_spir_func() { +; CHECK: call spir_func void @spir_func + call spir_func void @spir_func() + ret void +} + +define void @call_intel_ocl_bicc() { +; CHECK: call intel_ocl_bicc void @intel_ocl_bicc + call intel_ocl_bicc void @intel_ocl_bicc() + ret void +} + +define void @call_x86_stdcallcc() { +; CHECK: call x86_stdcallcc void @x86_stdcallcc + call x86_stdcallcc void @x86_stdcallcc() + ret void +} + +define void @call_x86_fastcallcc() { +; CHECK: call x86_fastcallcc void @x86_fastcallcc + call x86_fastcallcc void @x86_fastcallcc() + ret void +} + +define void @call_x86_thiscallcc() { +; CHECK: call x86_thiscallcc void @x86_thiscallcc + call x86_thiscallcc void @x86_thiscallcc() + ret void +} + +define void @call_arm_apcscc() { +; CHECK: call arm_apcscc void @arm_apcscc + call arm_apcscc void @arm_apcscc() + ret void +} + +define void @call_arm_aapcscc() { +; CHECK: call arm_aapcscc void @arm_aapcscc + call arm_aapcscc void @arm_aapcscc() + ret void +} + +define void @call_arm_aapcs_vfpcc() { +; CHECK: call arm_aapcs_vfpcc void @arm_aapcs_vfpcc + call arm_aapcs_vfpcc void @arm_aapcs_vfpcc() + ret void +} + +define void @call_msp430_intrcc() { +; CHECK: call msp430_intrcc void @msp430_intrcc + call msp430_intrcc void @msp430_intrcc() + ret void +} + +define void @call_ptx_kernel() { +; CHECK: call ptx_kernel void @ptx_kernel + call ptx_kernel void @ptx_kernel() + ret void +} + +define void @call_ptx_device() { +; CHECK: call ptx_device void @ptx_device + call ptx_device void @ptx_device() + ret void +} + diff --git a/external/bsd/llvm/dist/llvm/test/Bitcode/calling-conventions.3.2.ll.bc b/external/bsd/llvm/dist/llvm/test/Bitcode/calling-conventions.3.2.ll.bc new file mode 100644 index 000000000000..b3fad967db0e Binary files /dev/null and b/external/bsd/llvm/dist/llvm/test/Bitcode/calling-conventions.3.2.ll.bc differ diff --git a/external/bsd/llvm/dist/llvm/test/Bitcode/drop-debug-info.ll b/external/bsd/llvm/dist/llvm/test/Bitcode/drop-debug-info.ll new file mode 100644 index 000000000000..da4ae0c541eb --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/Bitcode/drop-debug-info.ll @@ -0,0 +1,26 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +define i32 @main() { +entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval + ret i32 0, !dbg !12 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!9} + +!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 (trunk 195495) (llvm/trunk 195495:195504M)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/manmanren/llvm_gmail/release/../llvm/tools/clang/test/CodeGen/debug-info-version.c] [DW_LANG_C99] +!1 = metadata !{metadata !"../llvm/tools/clang/test/CodeGen/debug-info-version.c", metadata !"/Users/manmanren/llvm_gmail/release"} +!2 = metadata !{i32 0} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [main] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/Users/manmanren/llvm_gmail/release/../llvm/tools/clang/test/CodeGen/debug-info-version.c] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{metadata !8} +!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 2} +!12 = metadata !{i32 4, i32 0, metadata !4, null} + +; CHECK-NOT: !dbg +; CHECK-NOT: !llvm.dbg.cu diff --git a/external/bsd/llvm/dist/llvm/test/Bitcode/global-variables.3.2.ll b/external/bsd/llvm/dist/llvm/test/Bitcode/global-variables.3.2.ll new file mode 100644 index 000000000000..549d025549be --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/Bitcode/global-variables.3.2.ll @@ -0,0 +1,41 @@ +; RUN: llvm-dis < %s.bc| FileCheck %s + +; global-variables.3.2.ll.bc was generated by passing this file to llvm-as-3.2. +; The test checks that LLVM does not silently misread global variables attributes of +; older bitcode files. + +@global.var = global i32 1 +; CHECK: @global.var = global i32 1 + +@constant.var = constant i32 1 +; CHECK: @constant.var = constant i32 1 + +@noinit.var = global float undef +; CHECK: @noinit.var = global float undef + +@section.var = global i32 1, section "foo" +; CHECK: @section.var = global i32 1, section "foo" + +@align.var = global i64 undef, align 8 +; CHECK: @align.var = global i64 undef, align 8 + +@unnamed_addr.var = unnamed_addr global i8 1 +; CHECK: @unnamed_addr.var = unnamed_addr global i8 1 + +@default_addrspace.var = addrspace(0) global i8 1 +; CHECK: @default_addrspace.var = global i8 1 + +@non_default_addrspace.var = addrspace(1) global i8* undef +; CHECK: @non_default_addrspace.var = addrspace(1) global i8* undef + +@initialexec.var = thread_local(initialexec) global i32 0, align 4 +; CHECK: @initialexec.var = thread_local(initialexec) global i32 0, align 4 + +@localdynamic.var = thread_local(localdynamic) constant i32 0, align 4 +; CHECK: @localdynamic.var = thread_local(localdynamic) constant i32 0, align 4 + +@localexec.var = thread_local(localexec) constant i32 0, align 4 +; CHECK: @localexec.var = thread_local(localexec) constant i32 0, align 4 + +@string.var = private unnamed_addr constant [13 x i8] c"hello world\0A\00" +; CHECK: @string.var = private unnamed_addr constant [13 x i8] c"hello world\0A\00" diff --git a/external/bsd/llvm/dist/llvm/test/Bitcode/global-variables.3.2.ll.bc b/external/bsd/llvm/dist/llvm/test/Bitcode/global-variables.3.2.ll.bc new file mode 100644 index 000000000000..c105f2fb1831 Binary files /dev/null and b/external/bsd/llvm/dist/llvm/test/Bitcode/global-variables.3.2.ll.bc differ diff --git a/external/bsd/llvm/dist/llvm/test/Bitcode/linkage-types-3.2.ll b/external/bsd/llvm/dist/llvm/test/Bitcode/linkage-types-3.2.ll new file mode 100644 index 000000000000..65c31dd12877 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/Bitcode/linkage-types-3.2.ll @@ -0,0 +1,128 @@ +; RUN: llvm-dis < %s.bc| FileCheck %s + +; linkage-types-3.2.ll.bc was generated by passing this file to llvm-as-3.2 +; The test checks that LLVM does not silently misread linkage types of +; older bitcode files. + +@common.var = common global i32 0 +; CHECK: @common.var = common global i32 0 + +@appending.var = appending global [8 x i32] undef +; CHECK: @appending.var = appending global [8 x i32] undef + +@extern_weak.var = extern_weak global i32 +; CHECK: @extern_weak.var = extern_weak global i32 + +@private.var = private constant i32 0 +; CHECK: @private.var = private constant i32 0 + +@linker_private.var = linker_private constant i32 0 +; CHECK: @linker_private.var = linker_private constant i32 0 + +@linker_private_weak.var = linker_private_weak constant i32 0 +; CHECK: @linker_private_weak.var = linker_private_weak constant i32 0 + +@linker_private_weak_def_auto.var = linker_private_weak_def_auto constant i32 0 +; CHECK: @linker_private_weak_def_auto.var = constant i32 0 + +@internal.var = internal constant i32 0 +; CHECK: @internal.var = internal constant i32 0 + +@available_externally.var = available_externally constant i32 0 +; CHECK: @available_externally.var = available_externally constant i32 0 + +@linkonce.var = linkonce constant i32 0 +; CHECK: @linkonce.var = linkonce constant i32 0 + +@weak.var = weak constant i32 0 +; CHECK: @weak.var = weak constant i32 0 + +@linkonce_odr.var = linkonce_odr constant i32 0 +; CHECK: @linkonce_odr.var = linkonce_odr constant i32 0 + +@linkonce_odr_auto_hide.var = linkonce_odr_auto_hide constant i32 0 +; CHECK: @linkonce_odr_auto_hide.var = constant i32 0 + +@external.var = external constant i32 +; CHECK: @external.var = external constant i32 + +@dllexport.var = dllexport global i32 0 +; CHECK: @dllexport.var = dllexport global i32 0 + +@dllimport.var = dllimport global i32 +; CHECK: @dllimport.var = dllimport global i32 + +define private void @private() +; CHECK: define private void @private +{ + ret void; +} + +define linker_private void @linker_private() +; CHECK: define linker_private void @linker_private +{ + ret void; +} + +define linker_private_weak void @linker_private_weak() +; CHECK: define linker_private_weak void @linker_private_weak +{ + ret void; +} + +define linker_private_weak_def_auto void @linker_private_weak_def_auto() +; CHECK: define void @linker_private_weak_def_auto +{ + ret void; +} + +define internal void @internal() +; CHECK: define internal void @internal +{ + ret void; +} + +define available_externally void @available_externally() +; CHECK: define available_externally void @available_externally +{ + ret void; +} + +define linkonce void @linkonce() +; CHECK: define linkonce void @linkonce +{ + ret void; +} + +define weak void @weak() +; CHECK: define weak void @weak +{ + ret void; +} + +define linkonce_odr void @linkonce_odr() +; CHECK: define linkonce_odr void @linkonce_odr +{ + ret void; +} + +define linkonce_odr_auto_hide void @linkonce_odr_auto_hide() +; CHECK: define void @linkonce_odr_auto_hide +{ + ret void; +} + +define external void @external() +; CHECK: define void @external +{ + ret void; +} + +declare dllimport void @dllimport() +; CHECK: declare dllimport void @dllimport + +define dllexport void @dllexport() +; CHECK: define dllexport void @dllexport() +{ + ret void; +} diff --git a/external/bsd/llvm/dist/llvm/test/Bitcode/linkage-types-3.2.ll.bc b/external/bsd/llvm/dist/llvm/test/Bitcode/linkage-types-3.2.ll.bc new file mode 100644 index 000000000000..c856ddf7aa7b Binary files /dev/null and b/external/bsd/llvm/dist/llvm/test/Bitcode/linkage-types-3.2.ll.bc differ diff --git a/external/bsd/llvm/dist/llvm/test/Bitcode/select.ll b/external/bsd/llvm/dist/llvm/test/Bitcode/select.ll index 71e669a90cdc..08a3061394db 100644 --- a/external/bsd/llvm/dist/llvm/test/Bitcode/select.ll +++ b/external/bsd/llvm/dist/llvm/test/Bitcode/select.ll @@ -5,5 +5,5 @@ define <2 x i32> @main() { } ; CHECK: define <2 x i32> @main() { -; CHECK: ret <2 x i32> select (<2 x i1> , <2 x i32> zeroinitializer, <2 x i32> ) +; CHECK: ret <2 x i32> ; CHECK: } diff --git a/external/bsd/llvm/dist/llvm/test/Bitcode/visibility-styles.3.2.ll b/external/bsd/llvm/dist/llvm/test/Bitcode/visibility-styles.3.2.ll new file mode 100644 index 000000000000..ec2ee6832063 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/Bitcode/visibility-styles.3.2.ll @@ -0,0 +1,23 @@ +; RUN: llvm-dis < %s.bc| FileCheck %s + +; visibility-styles.3.2.ll.bc was generated by passing this file to llvm-as-3.2. +; The test checks that LLVM does not silently misread visibility styles of +; older bitcode files. + +@default.var = default global i32 0 +; CHECK: @default.var = global i32 0 + +@hidden.var = hidden global i32 0 +; CHECK: @hidden.var = hidden global i32 0 + +@protected.var = protected global i32 0 +; CHECK: @protected.var = protected global i32 0 + +declare default void @default() +; CHECK: declare void @default + +declare hidden void @hidden() +; CHECK: declare hidden void @hidden + +declare protected void @protected() +; CHECK: declare protected void @protected diff --git a/external/bsd/llvm/dist/llvm/test/Bitcode/visibility-styles.3.2.ll.bc b/external/bsd/llvm/dist/llvm/test/Bitcode/visibility-styles.3.2.ll.bc new file mode 100644 index 000000000000..e2f0b058cce6 Binary files /dev/null and b/external/bsd/llvm/dist/llvm/test/Bitcode/visibility-styles.3.2.ll.bc differ diff --git a/external/bsd/llvm/dist/llvm/test/BugPoint/metadata.ll b/external/bsd/llvm/dist/llvm/test/BugPoint/metadata.ll index 2ba1a9f210ed..cc043f084feb 100644 --- a/external/bsd/llvm/dist/llvm/test/BugPoint/metadata.ll +++ b/external/bsd/llvm/dist/llvm/test/BugPoint/metadata.ll @@ -4,12 +4,12 @@ ; Bugpoint should keep the call's metadata attached to the call. -; CHECK: call void @foo(), !dbg !0, !attach !4 -; CHECK: !0 = metadata !{i32 104, i32 105, metadata !1, metadata !1} -; CHECK: !1 = metadata !{i32 458769, metadata !2, i32 0, metadata !"me", i1 true, metadata !"", i32 0, metadata !3, metadata !3, null, null, null, metadata !""} -; CHECK: !2 = metadata !{metadata !"source.c", metadata !"/dir"} -; CHECK: !3 = metadata !{i32 0} -; CHECK: !4 = metadata !{metadata !"the call to foo"} +; CHECK: call void @foo(), !dbg ![[LOC:[0-9]+]], !attach ![[CALL:[0-9]+]] +; CHECK: ![[LOC]] = metadata !{i32 104, i32 105, metadata ![[SCOPE:[0-9]+]], metadata ![[SCOPE]]} +; CHECK: ![[SCOPE]] = metadata !{i32 458769, metadata ![[FILE:[0-9]+]], i32 0, metadata !"me", i1 true, metadata !"", i32 0, metadata ![[LIST:[0-9]+]], metadata ![[LIST]], null, null, null, metadata !""} +; CHECK: ![[FILE]] = metadata !{metadata !"source.c", metadata !"/dir"} +; CHECK: ![[LIST]] = metadata !{i32 0} +; CHECK: ![[CALL]] = metadata !{metadata !"the call to foo"} %rust_task = type {} define void @test(i32* %a, i8* %b) { @@ -23,6 +23,8 @@ define void @test(i32* %a, i8* %b) { declare void @foo() +!llvm.module.flags = !{!17} + !0 = metadata !{metadata !"boring"} !1 = metadata !{metadata !"uninteresting"} !2 = metadata !{metadata !"the call to foo"} @@ -37,3 +39,4 @@ declare void @foo() !14 = metadata !{i32 108, i32 109, metadata !9, metadata !9} !15 = metadata !{metadata !"source.c", metadata !"/dir"} !16 = metadata !{i32 0} +!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CMakeLists.txt b/external/bsd/llvm/dist/llvm/test/CMakeLists.txt index d6f7dab1287e..373b1eec7c25 100644 --- a/external/bsd/llvm/dist/llvm/test/CMakeLists.txt +++ b/external/bsd/llvm/dist/llvm/test/CMakeLists.txt @@ -15,6 +15,7 @@ endif() # Set the depends list as a variable so that it can grow conditionally. # NOTE: Sync the substitutions in test/lit.cfg when adding to this list. set(LLVM_TEST_DEPENDS + llvm-config UnitTests BugpointPasses LLVMHello diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/128bit_load_store.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/128bit_load_store.ll new file mode 100644 index 000000000000..502fd70791b2 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/128bit_load_store.ll @@ -0,0 +1,53 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=neon | FileCheck %s + +define void @test_store_f128(fp128* %ptr, fp128 %val) #0 { +; CHECK: test_store_f128 +; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}] +entry: + store fp128 %val, fp128* %ptr, align 16 + ret void +} + +define fp128 @test_load_f128(fp128* readonly %ptr) #2 { +; CHECK: test_load_f128 +; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}] +entry: + %0 = load fp128* %ptr, align 16 + ret fp128 %0 +} + +define void @test_vstrq_p128(i128* %ptr, i128 %val) #0 { +; CHECK: test_vstrq_p128 +; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #8] +; CHECK-NEXT: str {{x[0-9]+}}, [{{x[0-9]+}}] +entry: + %0 = bitcast i128* %ptr to fp128* + %1 = bitcast i128 %val to fp128 + store fp128 %1, fp128* %0, align 16 + ret void +} + +define i128 @test_vldrq_p128(i128* readonly %ptr) #2 { +; CHECK: test_vldrq_p128 +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}] +; CHECK-NEXT: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #8] +entry: + %0 = bitcast i128* %ptr to fp128* + %1 = load fp128* %0, align 16 + %2 = bitcast fp128 %1 to i128 + ret i128 %2 +} + +define void @test_ld_st_p128(i128* nocapture %ptr) #0 { +; CHECK: test_ld_st_p128 +; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}] +; CHECK-NEXT: str {{q[0-9]+}}, [{{x[0-9]+}}, #16] +entry: + %0 = bitcast i128* %ptr to fp128* + %1 = load fp128* %0, align 16 + %add.ptr = getelementptr inbounds i128* %ptr, i64 1 + %2 = bitcast i128* %add.ptr to fp128* + store fp128 %1, fp128* %2, align 16 + ret void +} + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/assertion-rc-mismatch.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/assertion-rc-mismatch.ll new file mode 100644 index 000000000000..02b0c0e786e1 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/assertion-rc-mismatch.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s +; Test case related to . + +; CHECK-LABEL: small +define i64 @small(i64 %encodedBase) { +cmp: + %lnot.i.i = icmp eq i64 %encodedBase, 0 + br i1 %lnot.i.i, label %if, label %else +if: + %tmp1 = call i8* @llvm.returnaddress(i32 0) + br label %end +else: + %tmp3 = call i8* @llvm.returnaddress(i32 0) + %ptr = getelementptr inbounds i8* %tmp3, i64 -16 + %ld = load i8* %ptr, align 4 + %tmp2 = inttoptr i8 %ld to i8* + br label %end +end: + %tmp = phi i8* [ %tmp1, %if ], [ %tmp2, %else ] + %coerce.val.pi56 = ptrtoint i8* %tmp to i64 + ret i64 %coerce.val.pi56 +} + +declare i8* @llvm.returnaddress(i32) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/fp-dp3.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/fp-dp3.ll index 590557f1e8ed..2a6790e99c6c 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/fp-dp3.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/fp-dp3.ll @@ -26,8 +26,9 @@ define float @test_fmsub(float %a, float %b, float %c) { define float @test_fnmadd(float %a, float %b, float %c) { ; CHECK-LABEL: test_fnmadd: ; CHECK-NOFAST-LABEL: test_fnmadd: + %nega = fsub float -0.0, %a %negc = fsub float -0.0, %c - %val = call float @llvm.fma.f32(float %a, float %b, float %negc) + %val = call float @llvm.fma.f32(float %nega, float %b, float %negc) ; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ; CHECK-NOFAST: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret float %val @@ -36,9 +37,8 @@ define float @test_fnmadd(float %a, float %b, float %c) { define float @test_fnmsub(float %a, float %b, float %c) { ; CHECK-LABEL: test_fnmsub: ; CHECK-NOFAST-LABEL: test_fnmsub: - %nega = fsub float -0.0, %a %negc = fsub float -0.0, %c - %val = call float @llvm.fma.f32(float %nega, float %b, float %negc) + %val = call float @llvm.fma.f32(float %a, float %b, float %negc) ; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ; CHECK-NOFAST: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret float %val @@ -66,8 +66,9 @@ define double @testd_fmsub(double %a, double %b, double %c) { define double @testd_fnmadd(double %a, double %b, double %c) { ; CHECK-LABEL: testd_fnmadd: ; CHECK-NOFAST-LABEL: testd_fnmadd: + %nega = fsub double -0.0, %a %negc = fsub double -0.0, %c - %val = call double @llvm.fma.f64(double %a, double %b, double %negc) + %val = call double @llvm.fma.f64(double %nega, double %b, double %negc) ; CHECK: fnmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ; CHECK-NOFAST: fnmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret double %val @@ -76,9 +77,8 @@ define double @testd_fnmadd(double %a, double %b, double %c) { define double @testd_fnmsub(double %a, double %b, double %c) { ; CHECK-LABEL: testd_fnmsub: ; CHECK-NOFAST-LABEL: testd_fnmsub: - %nega = fsub double -0.0, %a %negc = fsub double -0.0, %c - %val = call double @llvm.fma.f64(double %nega, double %b, double %negc) + %val = call double @llvm.fma.f64(double %a, double %b, double %negc) ; CHECK: fnmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ; CHECK-NOFAST: fnmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret double %val @@ -113,12 +113,13 @@ define float @test_fnmadd_unfused(float %a, float %b, float %c) { ; CHECK-NOFAST-LABEL: test_fnmadd_unfused: %nega = fsub float -0.0, %a %prod = fmul float %b, %c - %sum = fadd float %nega, %prod + %diff = fsub float %nega, %prod ; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ; CHECK-NOFAST-NOT: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ; CHECK-NOFAST: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ; CHECK-NOFAST: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - ret float %sum +; CHECK-NOFAST: ret + ret float %diff } define float @test_fnmsub_unfused(float %a, float %b, float %c) { @@ -126,12 +127,37 @@ define float @test_fnmsub_unfused(float %a, float %b, float %c) { ; CHECK-NOFAST-LABEL: test_fnmsub_unfused: %nega = fsub float -0.0, %a %prod = fmul float %b, %c - %diff = fsub float %nega, %prod + %sum = fadd float %nega, %prod ; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ; CHECK-NOFAST-NOT: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} -; CHECK-NOFAST-DAG: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} -; CHECK-NOFAST-DAG: fneg {{s[0-9]+}}, {{s[0-9]+}} -; CHECK-NOFAST-DAG: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} -; CHECK-NOFAST: ret - ret float %diff +; CHECK-NOFAST: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +; CHECK-NOFAST: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + ret float %sum } + +; Another set of tests that check for multiply single use + +define float @test_fmadd_unfused_su(float %a, float %b, float %c) { +; CHECK-LABEL: test_fmadd_unfused_su: + %prod = fmul float %b, %c + %sum = fadd float %a, %prod + %res = fadd float %sum, %prod +; CHECK-NOT: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: fadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: fadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + ret float %res +} + +define float @test_fmsub_unfused_su(float %a, float %b, float %c) { +; CHECK-LABEL: test_fmsub_unfused_su: + %prod = fmul float %b, %c + %diff = fsub float %a, %prod + %res = fsub float %diff, %prod +; CHECK-NOT: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + ret float %res +} + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-2velem-high.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-2velem-high.ll new file mode 100644 index 000000000000..97031d98b7c0 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-2velem-high.ll @@ -0,0 +1,331 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s + +declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) + +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) + +declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) + +declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) + +declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) + +declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) + +declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) + +declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) + +declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) + +declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) + +declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) + +declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) + +define <4 x i32> @test_vmull_high_n_s16(<8 x i16> %a, i16 %b) { +; CHECK: test_vmull_high_n_s16: +; CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +entry: + %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> + %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 + %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 + %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 + %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 + %vmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + ret <4 x i32> %vmull15.i.i +} + +define <2 x i64> @test_vmull_high_n_s32(<4 x i32> %a, i32 %b) { +; CHECK: test_vmull_high_n_s32: +; CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +entry: + %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> + %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 + %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 + %vmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + ret <2 x i64> %vmull9.i.i +} + +define <4 x i32> @test_vmull_high_n_u16(<8 x i16> %a, i16 %b) { +; CHECK: test_vmull_high_n_u16: +; CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +entry: + %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> + %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 + %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 + %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 + %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 + %vmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + ret <4 x i32> %vmull15.i.i +} + +define <2 x i64> @test_vmull_high_n_u32(<4 x i32> %a, i32 %b) { +; CHECK: test_vmull_high_n_u32: +; CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +entry: + %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> + %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 + %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 + %vmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + ret <2 x i64> %vmull9.i.i +} + +define <4 x i32> @test_vqdmull_high_n_s16(<8 x i16> %a, i16 %b) { +; CHECK: test_vqdmull_high_n_s16: +; CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +entry: + %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> + %vecinit.i.i = insertelement <4 x i16> undef, i16 %b, i32 0 + %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %b, i32 1 + %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %b, i32 2 + %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %b, i32 3 + %vqdmull15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + ret <4 x i32> %vqdmull15.i.i +} + +define <2 x i64> @test_vqdmull_high_n_s32(<4 x i32> %a, i32 %b) { +; CHECK: test_vqdmull_high_n_s32: +; CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +entry: + %shuffle.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> + %vecinit.i.i = insertelement <2 x i32> undef, i32 %b, i32 0 + %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %b, i32 1 + %vqdmull9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + ret <2 x i64> %vqdmull9.i.i +} + +define <4 x i32> @test_vmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { +; CHECK: test_vmlal_high_n_s16: +; CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}] +entry: + %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> + %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 + %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 + %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 + %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 + %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %add.i.i = add <4 x i32> %vmull2.i.i.i, %a + ret <4 x i32> %add.i.i +} + +define <2 x i64> @test_vmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { +; CHECK: test_vmlal_high_n_s32: +; CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] +entry: + %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> + %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 + %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 + %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %add.i.i = add <2 x i64> %vmull2.i.i.i, %a + ret <2 x i64> %add.i.i +} + +define <4 x i32> @test_vmlal_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) { +; CHECK: test_vmlal_high_n_u16: +; CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}] +entry: + %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> + %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 + %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 + %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 + %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 + %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %add.i.i = add <4 x i32> %vmull2.i.i.i, %a + ret <4 x i32> %add.i.i +} + +define <2 x i64> @test_vmlal_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) { +; CHECK: test_vmlal_high_n_u32: +; CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] +entry: + %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> + %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 + %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 + %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %add.i.i = add <2 x i64> %vmull2.i.i.i, %a + ret <2 x i64> %add.i.i +} + +define <4 x i32> @test_vqdmlal_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { +; CHECK: test_vqdmlal_high_n_s16: +; CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}] +entry: + %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> + %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 + %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 + %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 + %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 + %vqdmlal15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vqdmlal17.i.i = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %vqdmlal15.i.i) + ret <4 x i32> %vqdmlal17.i.i +} + +define <2 x i64> @test_vqdmlal_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { +; CHECK: test_vqdmlal_high_n_s32: +; CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] +entry: + %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> + %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 + %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 + %vqdmlal9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vqdmlal11.i.i = tail call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %vqdmlal9.i.i) + ret <2 x i64> %vqdmlal11.i.i +} + +define <4 x i32> @test_vmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { +; CHECK: test_vmlsl_high_n_s16: +; CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}] +entry: + %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> + %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 + %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 + %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 + %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 + %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i + ret <4 x i32> %sub.i.i +} + +define <2 x i64> @test_vmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { +; CHECK: test_vmlsl_high_n_s32: +; CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] +entry: + %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> + %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 + %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 + %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i + ret <2 x i64> %sub.i.i +} + +define <4 x i32> @test_vmlsl_high_n_u16(<4 x i32> %a, <8 x i16> %b, i16 %c) { +; CHECK: test_vmlsl_high_n_u16: +; CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}] +entry: + %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> + %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 + %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 + %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 + %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 + %vmull2.i.i.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %sub.i.i = sub <4 x i32> %a, %vmull2.i.i.i + ret <4 x i32> %sub.i.i +} + +define <2 x i64> @test_vmlsl_high_n_u32(<2 x i64> %a, <4 x i32> %b, i32 %c) { +; CHECK: test_vmlsl_high_n_u32: +; CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] +entry: + %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> + %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 + %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 + %vmull2.i.i.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %sub.i.i = sub <2 x i64> %a, %vmull2.i.i.i + ret <2 x i64> %sub.i.i +} + +define <4 x i32> @test_vqdmlsl_high_n_s16(<4 x i32> %a, <8 x i16> %b, i16 %c) { +; CHECK: test_vqdmlsl_high_n_s16: +; CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[{{[0-9]+}}] +entry: + %shuffle.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> + %vecinit.i.i = insertelement <4 x i16> undef, i16 %c, i32 0 + %vecinit1.i.i = insertelement <4 x i16> %vecinit.i.i, i16 %c, i32 1 + %vecinit2.i.i = insertelement <4 x i16> %vecinit1.i.i, i16 %c, i32 2 + %vecinit3.i.i = insertelement <4 x i16> %vecinit2.i.i, i16 %c, i32 3 + %vqdmlsl15.i.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %shuffle.i.i, <4 x i16> %vecinit3.i.i) + %vqdmlsl17.i.i = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %vqdmlsl15.i.i) + ret <4 x i32> %vqdmlsl17.i.i +} + +define <2 x i64> @test_vqdmlsl_high_n_s32(<2 x i64> %a, <4 x i32> %b, i32 %c) { +; CHECK: test_vqdmlsl_high_n_s32: +; CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] +entry: + %shuffle.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> + %vecinit.i.i = insertelement <2 x i32> undef, i32 %c, i32 0 + %vecinit1.i.i = insertelement <2 x i32> %vecinit.i.i, i32 %c, i32 1 + %vqdmlsl9.i.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %shuffle.i.i, <2 x i32> %vecinit1.i.i) + %vqdmlsl11.i.i = tail call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %vqdmlsl9.i.i) + ret <2 x i64> %vqdmlsl11.i.i +} + +define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) { +; CHECK: test_vmul_n_f32: +; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +entry: + %vecinit.i = insertelement <2 x float> undef, float %b, i32 0 + %vecinit1.i = insertelement <2 x float> %vecinit.i, float %b, i32 1 + %mul.i = fmul <2 x float> %vecinit1.i, %a + ret <2 x float> %mul.i +} + +define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) { +; CHECK: test_vmulq_n_f32: +; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +entry: + %vecinit.i = insertelement <4 x float> undef, float %b, i32 0 + %vecinit1.i = insertelement <4 x float> %vecinit.i, float %b, i32 1 + %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %b, i32 2 + %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %b, i32 3 + %mul.i = fmul <4 x float> %vecinit3.i, %a + ret <4 x float> %mul.i +} + +define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) { +; CHECK: test_vmulq_n_f64: +; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +entry: + %vecinit.i = insertelement <2 x double> undef, double %b, i32 0 + %vecinit1.i = insertelement <2 x double> %vecinit.i, double %b, i32 1 + %mul.i = fmul <2 x double> %vecinit1.i, %a + ret <2 x double> %mul.i +} + +define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) { +; CHECK: test_vfma_n_f32: +; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] +entry: + %vecinit.i = insertelement <2 x float> undef, float %n, i32 0 + %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1 + %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %vecinit1.i, <2 x float> %a) + ret <2 x float> %0 +} + +define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) { +; CHECK: test_vfmaq_n_f32: +; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] +entry: + %vecinit.i = insertelement <4 x float> undef, float %n, i32 0 + %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1 + %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2 + %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3 + %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %vecinit3.i, <4 x float> %a) + ret <4 x float> %0 +} + +define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) { +; CHECK: test_vfms_n_f32: +; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[{{[0-9]+}}] +entry: + %vecinit.i = insertelement <2 x float> undef, float %n, i32 0 + %vecinit1.i = insertelement <2 x float> %vecinit.i, float %n, i32 1 + %0 = fsub <2 x float> , %b + %1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %vecinit1.i, <2 x float> %a) + ret <2 x float> %1 +} + +define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) { +; CHECK: test_vfmsq_n_f32: +; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[{{[0-9]+}}] +entry: + %vecinit.i = insertelement <4 x float> undef, float %n, i32 0 + %vecinit1.i = insertelement <4 x float> %vecinit.i, float %n, i32 1 + %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %n, i32 2 + %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %n, i32 3 + %0 = fsub <4 x float> , %b + %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %vecinit3.i, <4 x float> %a) + ret <4 x float> %1 +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-2velem.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-2velem.ll index 9d6184243713..acffb14edf5d 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-2velem.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-2velem.ll @@ -45,6 +45,7 @@ declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmla_lane_s16: ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %b @@ -55,6 +56,7 @@ entry: define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlaq_lane_s16: ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %b @@ -65,6 +67,7 @@ entry: define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmla_lane_s32: ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %b @@ -75,6 +78,7 @@ entry: define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlaq_lane_s32: ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %b @@ -85,6 +89,7 @@ entry: define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmla_laneq_s16: ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %b @@ -95,6 +100,7 @@ entry: define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlaq_laneq_s16: ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %b @@ -105,6 +111,7 @@ entry: define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmla_laneq_s32: ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %b @@ -115,6 +122,7 @@ entry: define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlaq_laneq_s32: ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %b @@ -125,6 +133,7 @@ entry: define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmls_lane_s16: ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %b @@ -135,6 +144,7 @@ entry: define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlsq_lane_s16: ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %b @@ -145,6 +155,7 @@ entry: define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmls_lane_s32: ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %b @@ -155,6 +166,7 @@ entry: define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlsq_lane_s32: ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %b @@ -165,6 +177,7 @@ entry: define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmls_laneq_s16: ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %b @@ -175,6 +188,7 @@ entry: define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlsq_laneq_s16: ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %b @@ -185,6 +199,7 @@ entry: define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmls_laneq_s32: ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %b @@ -195,6 +210,7 @@ entry: define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlsq_laneq_s32: ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %b @@ -205,6 +221,7 @@ entry: define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmul_lane_s16: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %a @@ -214,6 +231,7 @@ entry: define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmulq_lane_s16: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %a @@ -223,6 +241,7 @@ entry: define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmul_lane_s32: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %a @@ -232,6 +251,7 @@ entry: define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmulq_lane_s32: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %a @@ -241,6 +261,7 @@ entry: define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmul_lane_u16: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %a @@ -250,6 +271,7 @@ entry: define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmulq_lane_u16: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %a @@ -259,6 +281,7 @@ entry: define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmul_lane_u32: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %a @@ -268,6 +291,7 @@ entry: define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmulq_lane_u32: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %a @@ -277,6 +301,7 @@ entry: define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmul_laneq_s16: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %a @@ -286,6 +311,7 @@ entry: define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmulq_laneq_s16: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %a @@ -295,6 +321,7 @@ entry: define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmul_laneq_s32: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %a @@ -304,6 +331,7 @@ entry: define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmulq_laneq_s32: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %a @@ -313,6 +341,7 @@ entry: define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmul_laneq_u16: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %a @@ -322,6 +351,7 @@ entry: define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmulq_laneq_u16: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %a @@ -331,6 +361,7 @@ entry: define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmul_laneq_u32: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %a @@ -340,6 +371,7 @@ entry: define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmulq_laneq_u32: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %a @@ -349,6 +381,7 @@ entry: define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { ; CHECK: test_vfma_lane_f32: ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -360,6 +393,7 @@ declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { ; CHECK: test_vfmaq_lane_f32: ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -371,6 +405,7 @@ declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { ; CHECK: test_vfma_laneq_f32: ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -380,6 +415,7 @@ entry: define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { ; CHECK: test_vfmaq_laneq_f32: ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -389,6 +425,7 @@ entry: define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { ; CHECK: test_vfms_lane_f32: ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %sub = fsub <2 x float> , %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> @@ -399,6 +436,7 @@ entry: define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { ; CHECK: test_vfmsq_lane_f32: ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %sub = fsub <2 x float> , %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> @@ -409,6 +447,7 @@ entry: define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { ; CHECK: test_vfms_laneq_f32: ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %sub = fsub <4 x float> , %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> @@ -419,6 +458,7 @@ entry: define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { ; CHECK: test_vfmsq_laneq_f32: ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %sub = fsub <4 x float> , %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> @@ -429,6 +469,7 @@ entry: define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { ; CHECK: test_vfmaq_lane_f64: ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) @@ -440,6 +481,7 @@ declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { ; CHECK: test_vfmaq_laneq_f64: ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] +; CHECK-NEXT: ret entry: %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) @@ -449,6 +491,7 @@ entry: define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { ; CHECK: test_vfmsq_lane_f64: ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %sub = fsub <1 x double> , %v %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer @@ -459,6 +502,7 @@ entry: define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { ; CHECK: test_vfmsq_laneq_f64: ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] +; CHECK-NEXT: ret entry: %sub = fsub <2 x double> , %v %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> @@ -466,9 +510,57 @@ entry: ret <2 x double> %0 } +define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) { +; CHECK-LABEL: test_vfmas_laneq_f32 +; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret +entry: + %extract = extractelement <4 x float> %v, i32 3 + %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) + ret float %0 +} + +declare float @llvm.fma.f32(float, float, float) + +define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) { +; CHECK-LABEL: test_vfmsd_lane_f64 +; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret +entry: + %extract.rhs = extractelement <1 x double> %v, i32 0 + %extract = fsub double -0.000000e+00, %extract.rhs + %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a) + ret double %0 +} + +declare double @llvm.fma.f64(double, double, double) + +define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) { +; CHECK: test_vfmss_laneq_f32 +; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret +entry: + %extract.rhs = extractelement <4 x float> %v, i32 3 + %extract = fsub float -0.000000e+00, %extract.rhs + %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) + ret float %0 +} + +define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) { +; CHECK-LABEL: test_vfmsd_laneq_f64 +; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] +; CHECK-NEXT: ret +entry: + %extract.rhs = extractelement <2 x double> %v, i32 1 + %extract = fsub double -0.000000e+00, %extract.rhs + %0 = tail call double @llvm.fma.f64(double %b, double %extract, double %a) + ret double %0 +} + define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlal_lane_s16: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -479,6 +571,7 @@ entry: define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlal_lane_s32: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -489,6 +582,7 @@ entry: define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlal_laneq_s16: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -499,6 +593,7 @@ entry: define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlal_laneq_s32: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -509,6 +604,7 @@ entry: define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlal_high_lane_s16: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -520,6 +616,7 @@ entry: define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlal_high_lane_s32: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -531,6 +628,7 @@ entry: define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlal_high_laneq_s16: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -542,6 +640,7 @@ entry: define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlal_high_laneq_s32: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -553,6 +652,7 @@ entry: define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlsl_lane_s16: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -563,6 +663,7 @@ entry: define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlsl_lane_s32: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -573,6 +674,7 @@ entry: define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlsl_laneq_s16: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -583,6 +685,7 @@ entry: define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlsl_laneq_s32: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -593,6 +696,7 @@ entry: define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlsl_high_lane_s16: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -604,6 +708,7 @@ entry: define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlsl_high_lane_s32: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -615,6 +720,7 @@ entry: define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlsl_high_laneq_s16: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -626,6 +732,7 @@ entry: define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlsl_high_laneq_s32: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -637,6 +744,7 @@ entry: define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlal_lane_u16: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -647,6 +755,7 @@ entry: define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlal_lane_u32: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -657,6 +766,7 @@ entry: define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlal_laneq_u16: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -667,6 +777,7 @@ entry: define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlal_laneq_u32: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -677,6 +788,7 @@ entry: define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlal_high_lane_u16: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -688,6 +800,7 @@ entry: define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlal_high_lane_u32: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -699,6 +812,7 @@ entry: define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlal_high_laneq_u16: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -710,6 +824,7 @@ entry: define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlal_high_laneq_u32: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -721,6 +836,7 @@ entry: define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlsl_lane_u16: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -731,6 +847,7 @@ entry: define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlsl_lane_u32: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -741,6 +858,7 @@ entry: define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlsl_laneq_u16: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -751,6 +869,7 @@ entry: define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlsl_laneq_u32: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -761,6 +880,7 @@ entry: define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlsl_high_lane_u16: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -772,6 +892,7 @@ entry: define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlsl_high_lane_u32: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -783,6 +904,7 @@ entry: define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlsl_high_laneq_u16: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -794,6 +916,7 @@ entry: define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlsl_high_laneq_u32: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -805,6 +928,7 @@ entry: define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmull_lane_s16: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -814,6 +938,7 @@ entry: define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmull_lane_s32: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -823,6 +948,7 @@ entry: define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmull_lane_u16: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -832,6 +958,7 @@ entry: define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmull_lane_u32: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -841,6 +968,7 @@ entry: define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmull_high_lane_s16: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -851,6 +979,7 @@ entry: define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmull_high_lane_s32: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -861,6 +990,7 @@ entry: define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmull_high_lane_u16: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -871,6 +1001,7 @@ entry: define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmull_high_lane_u32: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -881,6 +1012,7 @@ entry: define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmull_laneq_s16: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -890,6 +1022,7 @@ entry: define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmull_laneq_s32: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -899,6 +1032,7 @@ entry: define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmull_laneq_u16: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -908,6 +1042,7 @@ entry: define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmull_laneq_u32: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -917,6 +1052,7 @@ entry: define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmull_high_laneq_s16: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -927,6 +1063,7 @@ entry: define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmull_high_laneq_s32: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -937,6 +1074,7 @@ entry: define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmull_high_laneq_u16: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -947,6 +1085,7 @@ entry: define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmull_high_laneq_u32: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -957,6 +1096,7 @@ entry: define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vqdmlal_lane_s16: ; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -967,6 +1107,7 @@ entry: define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vqdmlal_lane_s32: ; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -977,6 +1118,7 @@ entry: define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vqdmlal_high_lane_s16: ; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -988,6 +1130,7 @@ entry: define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vqdmlal_high_lane_s32: ; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -999,6 +1142,7 @@ entry: define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vqdmlsl_lane_s16: ; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1009,6 +1153,7 @@ entry: define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vqdmlsl_lane_s32: ; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1019,6 +1164,7 @@ entry: define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vqdmlsl_high_lane_s16: ; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -1030,6 +1176,7 @@ entry: define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vqdmlsl_high_lane_s32: ; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -1041,6 +1188,7 @@ entry: define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqdmull_lane_s16: ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -1050,6 +1198,7 @@ entry: define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqdmull_lane_s32: ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -1059,6 +1208,7 @@ entry: define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { ; CHECK: test_vqdmull_laneq_s16: ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -1068,6 +1218,7 @@ entry: define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { ; CHECK: test_vqdmull_laneq_s32: ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -1077,6 +1228,7 @@ entry: define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqdmull_high_lane_s16: ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -1087,6 +1239,7 @@ entry: define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqdmull_high_lane_s32: ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -1097,6 +1250,7 @@ entry: define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { ; CHECK: test_vqdmull_high_laneq_s16: ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -1107,6 +1261,7 @@ entry: define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { ; CHECK: test_vqdmull_high_laneq_s32: ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -1117,6 +1272,7 @@ entry: define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqdmulh_lane_s16: ; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vqdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) @@ -1126,6 +1282,7 @@ entry: define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqdmulhq_lane_s16: ; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %vqdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) @@ -1135,6 +1292,7 @@ entry: define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqdmulh_lane_s32: ; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vqdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) @@ -1144,6 +1302,7 @@ entry: define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqdmulhq_lane_s32: ; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %vqdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) @@ -1153,6 +1312,7 @@ entry: define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqrdmulh_lane_s16: ; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vqrdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) @@ -1162,6 +1322,7 @@ entry: define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqrdmulhq_lane_s16: ; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %vqrdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) @@ -1171,6 +1332,7 @@ entry: define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqrdmulh_lane_s32: ; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vqrdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) @@ -1180,6 +1342,7 @@ entry: define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqrdmulhq_lane_s32: ; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %vqrdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) @@ -1189,6 +1352,7 @@ entry: define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) { ; CHECK: test_vmul_lane_f32: ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> %mul = fmul <2 x float> %shuffle, %a @@ -1198,6 +1362,7 @@ entry: define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) { ; CHECK: test_vmul_lane_f64: ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %0 = bitcast <1 x double> %a to <8 x i8> %1 = bitcast <8 x i8> %0 to double @@ -1210,6 +1375,7 @@ entry: define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) { ; CHECK: test_vmulq_lane_f32: ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> %mul = fmul <4 x float> %shuffle, %a @@ -1219,6 +1385,7 @@ entry: define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) { ; CHECK: test_vmulq_lane_f64: ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %mul = fmul <2 x double> %shuffle, %a @@ -1228,6 +1395,7 @@ entry: define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) { ; CHECK: test_vmul_laneq_f32: ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> %mul = fmul <2 x float> %shuffle, %a @@ -1237,6 +1405,7 @@ entry: define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) { ; CHECK: test_vmul_laneq_f64: ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] +; CHECK-NEXT: ret entry: %0 = bitcast <1 x double> %a to <8 x i8> %1 = bitcast <8 x i8> %0 to double @@ -1249,6 +1418,7 @@ entry: define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) { ; CHECK: test_vmulq_laneq_f32: ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> %mul = fmul <4 x float> %shuffle, %a @@ -1258,6 +1428,7 @@ entry: define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) { ; CHECK: test_vmulq_laneq_f64: ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> %mul = fmul <2 x double> %shuffle, %a @@ -1267,6 +1438,7 @@ entry: define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) { ; CHECK: test_vmulx_lane_f32: ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -1276,6 +1448,7 @@ entry: define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) { ; CHECK: test_vmulxq_lane_f32: ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -1285,6 +1458,7 @@ entry: define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) { ; CHECK: test_vmulxq_lane_f64: ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -1294,6 +1468,7 @@ entry: define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) { ; CHECK: test_vmulx_laneq_f32: ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -1303,6 +1478,7 @@ entry: define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) { ; CHECK: test_vmulxq_laneq_f32: ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -1312,6 +1488,7 @@ entry: define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) { ; CHECK: test_vmulxq_laneq_f64: ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -1321,6 +1498,7 @@ entry: define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmla_lane_s16_0: ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %b @@ -1331,6 +1509,7 @@ entry: define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlaq_lane_s16_0: ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %b @@ -1341,6 +1520,7 @@ entry: define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmla_lane_s32_0: ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %b @@ -1351,6 +1531,7 @@ entry: define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlaq_lane_s32_0: ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %b @@ -1361,6 +1542,7 @@ entry: define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmla_laneq_s16_0: ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %b @@ -1371,6 +1553,7 @@ entry: define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlaq_laneq_s16_0: ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %b @@ -1381,6 +1564,7 @@ entry: define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmla_laneq_s32_0: ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %b @@ -1391,6 +1575,7 @@ entry: define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlaq_laneq_s32_0: ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %b @@ -1401,6 +1586,7 @@ entry: define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmls_lane_s16_0: ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %b @@ -1411,6 +1597,7 @@ entry: define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlsq_lane_s16_0: ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %b @@ -1421,6 +1608,7 @@ entry: define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmls_lane_s32_0: ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %b @@ -1431,6 +1619,7 @@ entry: define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlsq_lane_s32_0: ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %b @@ -1441,6 +1630,7 @@ entry: define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmls_laneq_s16_0: ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %b @@ -1451,6 +1641,7 @@ entry: define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlsq_laneq_s16_0: ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %b @@ -1461,6 +1652,7 @@ entry: define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmls_laneq_s32_0: ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %b @@ -1471,6 +1663,7 @@ entry: define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlsq_laneq_s32_0: ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %b @@ -1481,6 +1674,7 @@ entry: define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmul_lane_s16_0: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %a @@ -1490,6 +1684,7 @@ entry: define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmulq_lane_s16_0: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %a @@ -1499,6 +1694,7 @@ entry: define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmul_lane_s32_0: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %a @@ -1508,6 +1704,7 @@ entry: define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmulq_lane_s32_0: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %a @@ -1517,6 +1714,7 @@ entry: define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmul_lane_u16_0: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %a @@ -1526,6 +1724,7 @@ entry: define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmulq_lane_u16_0: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %a @@ -1535,6 +1734,7 @@ entry: define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmul_lane_u32_0: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %a @@ -1544,6 +1744,7 @@ entry: define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmulq_lane_u32_0: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %a @@ -1553,6 +1754,7 @@ entry: define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmul_laneq_s16_0: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %a @@ -1562,6 +1764,7 @@ entry: define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmulq_laneq_s16_0: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %a @@ -1571,6 +1774,7 @@ entry: define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmul_laneq_s32_0: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %a @@ -1580,6 +1784,7 @@ entry: define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmulq_laneq_s32_0: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %a @@ -1589,6 +1794,7 @@ entry: define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmul_laneq_u16_0: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %a @@ -1598,6 +1804,7 @@ entry: define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmulq_laneq_u16_0: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %a @@ -1607,6 +1814,7 @@ entry: define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmul_laneq_u32_0: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %a @@ -1616,6 +1824,7 @@ entry: define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmulq_laneq_u32_0: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %a @@ -1625,6 +1834,7 @@ entry: define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { ; CHECK: test_vfma_lane_f32_0: ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -1634,6 +1844,7 @@ entry: define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { ; CHECK: test_vfmaq_lane_f32_0: ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -1643,6 +1854,7 @@ entry: define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { ; CHECK: test_vfma_laneq_f32_0: ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -1652,6 +1864,7 @@ entry: define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { ; CHECK: test_vfmaq_laneq_f32_0: ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -1661,6 +1874,7 @@ entry: define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { ; CHECK: test_vfms_lane_f32_0: ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %sub = fsub <2 x float> , %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer @@ -1671,6 +1885,7 @@ entry: define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { ; CHECK: test_vfmsq_lane_f32_0: ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %sub = fsub <2 x float> , %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer @@ -1681,6 +1896,7 @@ entry: define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { ; CHECK: test_vfms_laneq_f32_0: ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %sub = fsub <4 x float> , %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer @@ -1691,6 +1907,7 @@ entry: define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { ; CHECK: test_vfmsq_laneq_f32_0: ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %sub = fsub <4 x float> , %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer @@ -1701,6 +1918,7 @@ entry: define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { ; CHECK: test_vfmaq_laneq_f64_0: ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) @@ -1710,6 +1928,7 @@ entry: define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { ; CHECK: test_vfmsq_laneq_f64_0: ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %sub = fsub <2 x double> , %v %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer @@ -1720,6 +1939,7 @@ entry: define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlal_lane_s16_0: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1730,6 +1950,7 @@ entry: define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlal_lane_s32_0: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1740,6 +1961,7 @@ entry: define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlal_laneq_s16_0: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1750,6 +1972,7 @@ entry: define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlal_laneq_s32_0: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1760,6 +1983,7 @@ entry: define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlal_high_lane_s16_0: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -1771,6 +1995,7 @@ entry: define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlal_high_lane_s32_0: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -1782,6 +2007,7 @@ entry: define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlal_high_laneq_s16_0: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -1793,6 +2019,7 @@ entry: define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlal_high_laneq_s32_0: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -1804,6 +2031,7 @@ entry: define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlsl_lane_s16_0: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1814,6 +2042,7 @@ entry: define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlsl_lane_s32_0: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1824,6 +2053,7 @@ entry: define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlsl_laneq_s16_0: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1834,6 +2064,7 @@ entry: define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlsl_laneq_s32_0: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1844,6 +2075,7 @@ entry: define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlsl_high_lane_s16_0: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -1855,6 +2087,7 @@ entry: define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlsl_high_lane_s32_0: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -1866,6 +2099,7 @@ entry: define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlsl_high_laneq_s16_0: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -1877,6 +2111,7 @@ entry: define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlsl_high_laneq_s32_0: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -1888,6 +2123,7 @@ entry: define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlal_lane_u16_0: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1898,6 +2134,7 @@ entry: define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlal_lane_u32_0: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1908,6 +2145,7 @@ entry: define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlal_laneq_u16_0: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1918,6 +2156,7 @@ entry: define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlal_laneq_u32_0: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1928,6 +2167,7 @@ entry: define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlal_high_lane_u16_0: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -1939,6 +2179,7 @@ entry: define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlal_high_lane_u32_0: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -1950,6 +2191,7 @@ entry: define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlal_high_laneq_u16_0: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -1961,6 +2203,7 @@ entry: define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlal_high_laneq_u32_0: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -1972,6 +2215,7 @@ entry: define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlsl_lane_u16_0: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1982,6 +2226,7 @@ entry: define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlsl_lane_u32_0: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1992,6 +2237,7 @@ entry: define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlsl_laneq_u16_0: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2002,6 +2248,7 @@ entry: define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlsl_laneq_u32_0: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2012,6 +2259,7 @@ entry: define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vmlsl_high_lane_u16_0: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2023,6 +2271,7 @@ entry: define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vmlsl_high_lane_u32_0: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2034,6 +2283,7 @@ entry: define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK: test_vmlsl_high_laneq_u16_0: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2045,6 +2295,7 @@ entry: define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK: test_vmlsl_high_laneq_u32_0: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2056,6 +2307,7 @@ entry: define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmull_lane_s16_0: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2065,6 +2317,7 @@ entry: define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmull_lane_s32_0: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2074,6 +2327,7 @@ entry: define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmull_lane_u16_0: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2083,6 +2337,7 @@ entry: define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmull_lane_u32_0: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2092,6 +2347,7 @@ entry: define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmull_high_lane_s16_0: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2102,6 +2358,7 @@ entry: define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmull_high_lane_s32_0: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2112,6 +2369,7 @@ entry: define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vmull_high_lane_u16_0: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2122,6 +2380,7 @@ entry: define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vmull_high_lane_u32_0: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2132,6 +2391,7 @@ entry: define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmull_laneq_s16_0: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2141,6 +2401,7 @@ entry: define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmull_laneq_s32_0: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2150,6 +2411,7 @@ entry: define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmull_laneq_u16_0: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2159,6 +2421,7 @@ entry: define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmull_laneq_u32_0: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2168,6 +2431,7 @@ entry: define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmull_high_laneq_s16_0: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2178,6 +2442,7 @@ entry: define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmull_high_laneq_s32_0: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2188,6 +2453,7 @@ entry: define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { ; CHECK: test_vmull_high_laneq_u16_0: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2198,6 +2464,7 @@ entry: define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { ; CHECK: test_vmull_high_laneq_u32_0: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2208,6 +2475,7 @@ entry: define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vqdmlal_lane_s16_0: ; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vqdmlal2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2218,6 +2486,7 @@ entry: define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vqdmlal_lane_s32_0: ; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vqdmlal2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2228,6 +2497,7 @@ entry: define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vqdmlal_high_lane_s16_0: ; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2239,6 +2509,7 @@ entry: define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vqdmlal_high_lane_s32_0: ; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2250,6 +2521,7 @@ entry: define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK: test_vqdmlsl_lane_s16_0: ; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vqdmlsl2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2260,6 +2532,7 @@ entry: define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK: test_vqdmlsl_lane_s32_0: ; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vqdmlsl2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2270,6 +2543,7 @@ entry: define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK: test_vqdmlsl_high_lane_s16_0: ; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2281,6 +2555,7 @@ entry: define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK: test_vqdmlsl_high_lane_s32_0: ; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2292,6 +2567,7 @@ entry: define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqdmull_lane_s16_0: ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2301,6 +2577,7 @@ entry: define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqdmull_lane_s32_0: ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2310,6 +2587,7 @@ entry: define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK: test_vqdmull_laneq_s16_0: ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vqdmull2.i = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2319,6 +2597,7 @@ entry: define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK: test_vqdmull_laneq_s32_0: ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vqdmull2.i = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2328,6 +2607,7 @@ entry: define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqdmull_high_lane_s16_0: ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2338,6 +2618,7 @@ entry: define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqdmull_high_lane_s32_0: ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2348,6 +2629,7 @@ entry: define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { ; CHECK: test_vqdmull_high_laneq_s16_0: ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2358,6 +2640,7 @@ entry: define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { ; CHECK: test_vqdmull_high_laneq_s32_0: ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2368,6 +2651,7 @@ entry: define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqdmulh_lane_s16_0: ; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vqdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) @@ -2377,6 +2661,7 @@ entry: define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqdmulhq_lane_s16_0: ; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %vqdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) @@ -2386,6 +2671,7 @@ entry: define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqdmulh_lane_s32_0: ; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vqdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) @@ -2395,6 +2681,7 @@ entry: define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqdmulhq_lane_s32_0: ; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %vqdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) @@ -2404,6 +2691,7 @@ entry: define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqrdmulh_lane_s16_0: ; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vqrdmulh2.i = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) @@ -2413,6 +2701,7 @@ entry: define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK: test_vqrdmulhq_lane_s16_0: ; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %vqrdmulh2.i = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) @@ -2422,6 +2711,7 @@ entry: define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqrdmulh_lane_s32_0: ; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vqrdmulh2.i = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) @@ -2431,6 +2721,7 @@ entry: define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK: test_vqrdmulhq_lane_s32_0: ; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %vqrdmulh2.i = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) @@ -2440,6 +2731,7 @@ entry: define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) { ; CHECK: test_vmul_lane_f32_0: ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer %mul = fmul <2 x float> %shuffle, %a @@ -2449,6 +2741,7 @@ entry: define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) { ; CHECK: test_vmulq_lane_f32_0: ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer %mul = fmul <4 x float> %shuffle, %a @@ -2458,6 +2751,7 @@ entry: define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) { ; CHECK: test_vmul_laneq_f32_0: ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer %mul = fmul <2 x float> %shuffle, %a @@ -2467,6 +2761,7 @@ entry: define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) { ; CHECK: test_vmul_laneq_f64_0: ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %0 = bitcast <1 x double> %a to <8 x i8> %1 = bitcast <8 x i8> %0 to double @@ -2479,6 +2774,7 @@ entry: define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { ; CHECK: test_vmulq_laneq_f32_0: ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer %mul = fmul <4 x float> %shuffle, %a @@ -2488,6 +2784,7 @@ entry: define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { ; CHECK: test_vmulq_laneq_f64_0: ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer %mul = fmul <2 x double> %shuffle, %a @@ -2497,6 +2794,7 @@ entry: define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) { ; CHECK: test_vmulx_lane_f32_0: ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -2506,6 +2804,7 @@ entry: define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) { ; CHECK: test_vmulxq_lane_f32_0: ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -2515,6 +2814,7 @@ entry: define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) { ; CHECK: test_vmulxq_lane_f64_0: ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -2524,6 +2824,7 @@ entry: define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) { ; CHECK: test_vmulx_laneq_f32_0: ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.vmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -2533,6 +2834,7 @@ entry: define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { ; CHECK: test_vmulxq_laneq_f32_0: ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.vmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -2542,6 +2844,7 @@ entry: define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { ; CHECK: test_vmulxq_laneq_f64_0: ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-3vdiff.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-3vdiff.ll index 171e2b2edad0..96400eb30390 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-3vdiff.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-3vdiff.ll @@ -1804,3 +1804,30 @@ entry: ret <8 x i16> %vmull.i.i } +define i128 @test_vmull_p64(i64 %a, i64 %b) #4 { +; CHECK: test_vmull_p64 +; CHECK: pmull {{v[0-9]+}}.1q, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d +entry: + %vmull.i = insertelement <1 x i64> undef, i64 %a, i32 0 + %vmull1.i = insertelement <1 x i64> undef, i64 %b, i32 0 + %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64> %vmull.i, <1 x i64> %vmull1.i) #1 + %vmull3.i = bitcast <16 x i8> %vmull2.i to i128 + ret i128 %vmull3.i +} + +define i128 @test_vmull_high_p64(<2 x i64> %a, <2 x i64> %b) #4 { +; CHECK: test_vmull_high_p64 +; CHECK: pmull2 {{v[0-9]+}}.1q, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +entry: + %0 = extractelement <2 x i64> %a, i32 1 + %1 = extractelement <2 x i64> %b, i32 1 + %vmull.i.i = insertelement <1 x i64> undef, i64 %0, i32 0 + %vmull1.i.i = insertelement <1 x i64> undef, i64 %1, i32 0 + %vmull2.i.i = tail call <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64> %vmull.i.i, <1 x i64> %vmull1.i.i) #1 + %vmull3.i.i = bitcast <16 x i8> %vmull2.i.i to i128 + ret i128 %vmull3.i.i +} + +declare <16 x i8> @llvm.aarch64.neon.vmull.p64(<1 x i64>, <1 x i64>) #5 + + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-aba-abd.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-aba-abd.ll index ee22a45c751b..54009849ef60 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-aba-abd.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-aba-abd.ll @@ -157,6 +157,16 @@ define <2 x i32> @test_sabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ret <2 x i32> %abd } +define <2 x i32> @test_sabd_v2i32_const() { +; CHECK: test_sabd_v2i32_const: +; CHECK: movi d1, #0xffffffff0000 +; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s + %1 = tail call <2 x i32> @llvm.arm.neon.vabds.v2i32( + <2 x i32> , + <2 x i32> ) + ret <2 x i32> %1 +} + define <2 x i32> @test_saba_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK: test_saba_v2i32: %abd = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-across.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-across.ll index 733db970cf33..6d30c953022c 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-across.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-across.ll @@ -1,12 +1,12 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s -declare <1 x float> @llvm.aarch64.neon.vminnmv.v1f32.v4f32(<4 x float>) +declare float @llvm.aarch64.neon.vminnmv(<4 x float>) -declare <1 x float> @llvm.aarch64.neon.vmaxnmv.v1f32.v4f32(<4 x float>) +declare float @llvm.aarch64.neon.vmaxnmv(<4 x float>) -declare <1 x float> @llvm.aarch64.neon.vminv.v1f32.v4f32(<4 x float>) +declare float @llvm.aarch64.neon.vminv(<4 x float>) -declare <1 x float> @llvm.aarch64.neon.vmaxv.v1f32.v4f32(<4 x float>) +declare float @llvm.aarch64.neon.vmaxv(<4 x float>) declare <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v4i32(<4 x i32>) @@ -442,8 +442,7 @@ define float @test_vmaxvq_f32(<4 x float> %a) { ; CHECK: test_vmaxvq_f32: ; CHECK: fmaxv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %vmaxv.i = tail call <1 x float> @llvm.aarch64.neon.vmaxv.v1f32.v4f32(<4 x float> %a) - %0 = extractelement <1 x float> %vmaxv.i, i32 0 + %0 = call float @llvm.aarch64.neon.vmaxv(<4 x float> %a) ret float %0 } @@ -451,8 +450,7 @@ define float @test_vminvq_f32(<4 x float> %a) { ; CHECK: test_vminvq_f32: ; CHECK: fminv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %vminv.i = tail call <1 x float> @llvm.aarch64.neon.vminv.v1f32.v4f32(<4 x float> %a) - %0 = extractelement <1 x float> %vminv.i, i32 0 + %0 = call float @llvm.aarch64.neon.vminv(<4 x float> %a) ret float %0 } @@ -460,8 +458,7 @@ define float @test_vmaxnmvq_f32(<4 x float> %a) { ; CHECK: test_vmaxnmvq_f32: ; CHECK: fmaxnmv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %vmaxnmv.i = tail call <1 x float> @llvm.aarch64.neon.vmaxnmv.v1f32.v4f32(<4 x float> %a) - %0 = extractelement <1 x float> %vmaxnmv.i, i32 0 + %0 = call float @llvm.aarch64.neon.vmaxnmv(<4 x float> %a) ret float %0 } @@ -469,8 +466,7 @@ define float @test_vminnmvq_f32(<4 x float> %a) { ; CHECK: test_vminnmvq_f32: ; CHECK: fminnmv s{{[0-9]+}}, {{v[0-9]+}}.4s entry: - %vminnmv.i = tail call <1 x float> @llvm.aarch64.neon.vminnmv.v1f32.v4f32(<4 x float> %a) - %0 = extractelement <1 x float> %vminnmv.i, i32 0 + %0 = call float @llvm.aarch64.neon.vminnmv(<4 x float> %a) ret float %0 } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-add-pairwise.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-add-pairwise.ll index 1abfed31908c..32d8222ded29 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-add-pairwise.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-add-pairwise.ll @@ -90,3 +90,12 @@ define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { ret <2 x double> %val } +define i32 @test_vaddv.v2i32(<2 x i32> %a) { +; CHECK-LABEL: test_vaddv.v2i32 +; CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %1 = tail call <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i32> %1, i32 0 + ret i32 %2 +} + +declare <1 x i32> @llvm.aarch64.neon.vaddv.v1i32.v2i32(<2 x i32>) \ No newline at end of file diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-add-sub.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-add-sub.ll index 566e02957616..5dc95e6f6e28 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-add-sub.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-add-sub.ll @@ -1,120 +1,237 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @add8xi8(<8 x i8> %A, <8 x i8> %B) { -;CHECK: add {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = add <8 x i8> %A, %B; ret <8 x i8> %tmp3 } define <16 x i8> @add16xi8(<16 x i8> %A, <16 x i8> %B) { -;CHECK: add {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: add {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = add <16 x i8> %A, %B; ret <16 x i8> %tmp3 } define <4 x i16> @add4xi16(<4 x i16> %A, <4 x i16> %B) { -;CHECK: add {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +;CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h %tmp3 = add <4 x i16> %A, %B; ret <4 x i16> %tmp3 } define <8 x i16> @add8xi16(<8 x i16> %A, <8 x i16> %B) { -;CHECK: add {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +;CHECK: add {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h %tmp3 = add <8 x i16> %A, %B; ret <8 x i16> %tmp3 } define <2 x i32> @add2xi32(<2 x i32> %A, <2 x i32> %B) { -;CHECK: add {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp3 = add <2 x i32> %A, %B; ret <2 x i32> %tmp3 } define <4 x i32> @add4x32(<4 x i32> %A, <4 x i32> %B) { -;CHECK: add {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: add {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp3 = add <4 x i32> %A, %B; ret <4 x i32> %tmp3 } define <2 x i64> @add2xi64(<2 x i64> %A, <2 x i64> %B) { -;CHECK: add {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %tmp3 = add <2 x i64> %A, %B; ret <2 x i64> %tmp3 } define <2 x float> @add2xfloat(<2 x float> %A, <2 x float> %B) { -;CHECK: fadd {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp3 = fadd <2 x float> %A, %B; ret <2 x float> %tmp3 } define <4 x float> @add4xfloat(<4 x float> %A, <4 x float> %B) { -;CHECK: fadd {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp3 = fadd <4 x float> %A, %B; ret <4 x float> %tmp3 } define <2 x double> @add2xdouble(<2 x double> %A, <2 x double> %B) { -;CHECK: add {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %tmp3 = fadd <2 x double> %A, %B; ret <2 x double> %tmp3 } define <8 x i8> @sub8xi8(<8 x i8> %A, <8 x i8> %B) { -;CHECK: sub {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = sub <8 x i8> %A, %B; ret <8 x i8> %tmp3 } define <16 x i8> @sub16xi8(<16 x i8> %A, <16 x i8> %B) { -;CHECK: sub {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: sub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = sub <16 x i8> %A, %B; ret <16 x i8> %tmp3 } define <4 x i16> @sub4xi16(<4 x i16> %A, <4 x i16> %B) { -;CHECK: sub {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +;CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h %tmp3 = sub <4 x i16> %A, %B; ret <4 x i16> %tmp3 } define <8 x i16> @sub8xi16(<8 x i16> %A, <8 x i16> %B) { -;CHECK: sub {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +;CHECK: sub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h %tmp3 = sub <8 x i16> %A, %B; ret <8 x i16> %tmp3 } define <2 x i32> @sub2xi32(<2 x i32> %A, <2 x i32> %B) { -;CHECK: sub {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp3 = sub <2 x i32> %A, %B; ret <2 x i32> %tmp3 } define <4 x i32> @sub4x32(<4 x i32> %A, <4 x i32> %B) { -;CHECK: sub {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: sub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp3 = sub <4 x i32> %A, %B; ret <4 x i32> %tmp3 } define <2 x i64> @sub2xi64(<2 x i64> %A, <2 x i64> %B) { -;CHECK: sub {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %tmp3 = sub <2 x i64> %A, %B; ret <2 x i64> %tmp3 } define <2 x float> @sub2xfloat(<2 x float> %A, <2 x float> %B) { -;CHECK: fsub {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp3 = fsub <2 x float> %A, %B; ret <2 x float> %tmp3 } define <4 x float> @sub4xfloat(<4 x float> %A, <4 x float> %B) { -;CHECK: fsub {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp3 = fsub <4 x float> %A, %B; ret <4 x float> %tmp3 } define <2 x double> @sub2xdouble(<2 x double> %A, <2 x double> %B) { -;CHECK: sub {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %tmp3 = fsub <2 x double> %A, %B; ret <2 x double> %tmp3 } +define <1 x double> @test_vadd_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vadd_f64 +; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = fadd <1 x double> %a, %b + ret <1 x double> %1 +} + +define <1 x double> @test_vmul_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vmul_f64 +; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = fmul <1 x double> %a, %b + ret <1 x double> %1 +} + +define <1 x double> @test_vdiv_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vdiv_f64 +; CHECK: fdiv d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = fdiv <1 x double> %a, %b + ret <1 x double> %1 +} + +define <1 x double> @test_vmla_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) { +; CHECK-LABEL: test_vmla_f64 +; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +; CHECK: fadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = fmul <1 x double> %b, %c + %2 = fadd <1 x double> %1, %a + ret <1 x double> %2 +} + +define <1 x double> @test_vmls_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) { +; CHECK-LABEL: test_vmls_f64 +; CHECK: fmul d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = fmul <1 x double> %b, %c + %2 = fsub <1 x double> %a, %1 + ret <1 x double> %2 +} + +define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) { +; CHECK-LABEL: test_vfms_f64 +; CHECK: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = fsub <1 x double> , %b + %2 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %1, <1 x double> %c, <1 x double> %a) + ret <1 x double> %2 +} + +define <1 x double> @test_vfma_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) { +; CHECK-LABEL: test_vfma_f64 +; CHECK: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vsub_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vsub_f64 +; CHECK: fsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = fsub <1 x double> %a, %b + ret <1 x double> %1 +} + +define <1 x double> @test_vabd_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vabd_f64 +; CHECK: fabd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double> %a, <1 x double> %b) + ret <1 x double> %1 +} + +define <1 x double> @test_vmax_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vmax_f64 +; CHECK: fmax d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double> %a, <1 x double> %b) + ret <1 x double> %1 +} + +define <1 x double> @test_vmin_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vmin_f64 +; CHECK: fmin d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double> %a, <1 x double> %b) + ret <1 x double> %1 +} + +define <1 x double> @test_vmaxnm_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vmaxnm_f64 +; CHECK: fmaxnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double> %a, <1 x double> %b) + ret <1 x double> %1 +} + +define <1 x double> @test_vminnm_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vminnm_f64 +; CHECK: fminnm d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.aarch64.neon.vminnm.v1f64(<1 x double> %a, <1 x double> %b) + ret <1 x double> %1 +} + +define <1 x double> @test_vabs_f64(<1 x double> %a) { +; CHECK-LABEL: test_vabs_f64 +; CHECK: fabs d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.fabs.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vneg_f64(<1 x double> %a) { +; CHECK-LABEL: test_vneg_f64 +; CHECK: fneg d{{[0-9]+}}, d{{[0-9]+}} + %1 = fsub <1 x double> , %a + ret <1 x double> %1 +} + +declare <1 x double> @llvm.fabs.v1f64(<1 x double>) +declare <1 x double> @llvm.aarch64.neon.vminnm.v1f64(<1 x double>, <1 x double>) +declare <1 x double> @llvm.aarch64.neon.vmaxnm.v1f64(<1 x double>, <1 x double>) +declare <1 x double> @llvm.arm.neon.vmins.v1f64(<1 x double>, <1 x double>) +declare <1 x double> @llvm.arm.neon.vmaxs.v1f64(<1 x double>, <1 x double>) +declare <1 x double> @llvm.arm.neon.vabds.v1f64(<1 x double>, <1 x double>) +declare <1 x double> @llvm.fma.v1f64(<1 x double>, <1 x double>, <1 x double>) \ No newline at end of file diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-bitcast.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-bitcast.ll index f9ec70484024..61099d48fdd2 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-bitcast.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-bitcast.ll @@ -20,8 +20,8 @@ define <2 x i32> @test_v8i8_to_v2i32(<8 x i8> %in) nounwind { ret <2 x i32> %val } -define <2 x float> @test_v8i8_to_v1f32(<8 x i8> %in) nounwind{ -; CHECK: test_v8i8_to_v1f32: +define <2 x float> @test_v8i8_to_v2f32(<8 x i8> %in) nounwind{ +; CHECK: test_v8i8_to_v2f32: ; CHECK-NEXT: // BB#0: ; CHECK-NEXT: ret @@ -67,8 +67,8 @@ define <2 x i32> @test_v4i16_to_v2i32(<4 x i16> %in) nounwind { ret <2 x i32> %val } -define <2 x float> @test_v4i16_to_v1f32(<4 x i16> %in) nounwind{ -; CHECK: test_v4i16_to_v1f32: +define <2 x float> @test_v4i16_to_v2f32(<4 x i16> %in) nounwind{ +; CHECK: test_v4i16_to_v2f32: ; CHECK-NEXT: // BB#0: ; CHECK-NEXT: ret @@ -114,8 +114,8 @@ define <2 x i32> @test_v2i32_to_v2i32(<2 x i32> %in) nounwind { ret <2 x i32> %val } -define <2 x float> @test_v2i32_to_v1f32(<2 x i32> %in) nounwind{ -; CHECK: test_v2i32_to_v1f32: +define <2 x float> @test_v2i32_to_v2f32(<2 x i32> %in) nounwind{ +; CHECK: test_v2i32_to_v2f32: ; CHECK-NEXT: // BB#0: ; CHECK-NEXT: ret diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll index 1c43b979fc44..b0e515666775 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -2,45 +2,45 @@ define <8 x i8> @and8xi8(<8 x i8> %a, <8 x i8> %b) { -;CHECK: and {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = and <8 x i8> %a, %b; ret <8 x i8> %tmp1 } define <16 x i8> @and16xi8(<16 x i8> %a, <16 x i8> %b) { -;CHECK: and {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = and <16 x i8> %a, %b; ret <16 x i8> %tmp1 } define <8 x i8> @orr8xi8(<8 x i8> %a, <8 x i8> %b) { -;CHECK: orr {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = or <8 x i8> %a, %b; ret <8 x i8> %tmp1 } define <16 x i8> @orr16xi8(<16 x i8> %a, <16 x i8> %b) { -;CHECK: orr {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = or <16 x i8> %a, %b; ret <16 x i8> %tmp1 } define <8 x i8> @xor8xi8(<8 x i8> %a, <8 x i8> %b) { -;CHECK: eor {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <8 x i8> %a, %b; ret <8 x i8> %tmp1 } define <16 x i8> @xor16xi8(<16 x i8> %a, <16 x i8> %b) { -;CHECK: eor {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <16 x i8> %a, %b; ret <16 x i8> %tmp1 } define <8 x i8> @bsl8xi8_const(<8 x i8> %a, <8 x i8> %b) { -;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > %tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0 > %tmp3 = or <8 x i8> %tmp1, %tmp2 @@ -48,7 +48,7 @@ define <8 x i8> @bsl8xi8_const(<8 x i8> %a, <8 x i8> %b) { } define <16 x i8> @bsl16xi8_const(<16 x i8> %a, <16 x i8> %b) { -;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > %tmp2 = and <16 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0 > %tmp3 = or <16 x i8> %tmp1, %tmp2 @@ -56,397 +56,397 @@ define <16 x i8> @bsl16xi8_const(<16 x i8> %a, <16 x i8> %b) { } define <8 x i8> @orn8xi8(<8 x i8> %a, <8 x i8> %b) { -;CHECK: orn {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <8 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > %tmp2 = or <8 x i8> %a, %tmp1 ret <8 x i8> %tmp2 } define <16 x i8> @orn16xi8(<16 x i8> %a, <16 x i8> %b) { -;CHECK: orn {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <16 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > %tmp2 = or <16 x i8> %a, %tmp1 ret <16 x i8> %tmp2 } define <8 x i8> @bic8xi8(<8 x i8> %a, <8 x i8> %b) { -;CHECK: bic {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <8 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > %tmp2 = and <8 x i8> %a, %tmp1 ret <8 x i8> %tmp2 } define <16 x i8> @bic16xi8(<16 x i8> %a, <16 x i8> %b) { -;CHECK: bic {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <16 x i8> %b, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > %tmp2 = and <16 x i8> %a, %tmp1 ret <16 x i8> %tmp2 } define <2 x i32> @orrimm2s_lsl0(<2 x i32> %a) { -;CHECK: orr {{v[0-31]+}}.2s, #0xff +;CHECK: orr {{v[0-9]+}}.2s, #0xff %tmp1 = or <2 x i32> %a, < i32 255, i32 255> ret <2 x i32> %tmp1 } define <2 x i32> @orrimm2s_lsl8(<2 x i32> %a) { -;CHECK: orr {{v[0-31]+}}.2s, #0xff, lsl #8 +;CHECK: orr {{v[0-9]+}}.2s, #0xff, lsl #8 %tmp1 = or <2 x i32> %a, < i32 65280, i32 65280> ret <2 x i32> %tmp1 } define <2 x i32> @orrimm2s_lsl16(<2 x i32> %a) { -;CHECK: orr {{v[0-31]+}}.2s, #0xff, lsl #16 +;CHECK: orr {{v[0-9]+}}.2s, #0xff, lsl #16 %tmp1 = or <2 x i32> %a, < i32 16711680, i32 16711680> ret <2 x i32> %tmp1 } define <2 x i32> @orrimm2s_lsl24(<2 x i32> %a) { -;CHECK: orr {{v[0-31]+}}.2s, #0xff, lsl #24 +;CHECK: orr {{v[0-9]+}}.2s, #0xff, lsl #24 %tmp1 = or <2 x i32> %a, < i32 4278190080, i32 4278190080> ret <2 x i32> %tmp1 } define <4 x i32> @orrimm4s_lsl0(<4 x i32> %a) { -;CHECK: orr {{v[0-31]+}}.4s, #0xff +;CHECK: orr {{v[0-9]+}}.4s, #0xff %tmp1 = or <4 x i32> %a, < i32 255, i32 255, i32 255, i32 255> ret <4 x i32> %tmp1 } define <4 x i32> @orrimm4s_lsl8(<4 x i32> %a) { -;CHECK: orr {{v[0-31]+}}.4s, #0xff, lsl #8 +;CHECK: orr {{v[0-9]+}}.4s, #0xff, lsl #8 %tmp1 = or <4 x i32> %a, < i32 65280, i32 65280, i32 65280, i32 65280> ret <4 x i32> %tmp1 } define <4 x i32> @orrimm4s_lsl16(<4 x i32> %a) { -;CHECK: orr {{v[0-31]+}}.4s, #0xff, lsl #16 +;CHECK: orr {{v[0-9]+}}.4s, #0xff, lsl #16 %tmp1 = or <4 x i32> %a, < i32 16711680, i32 16711680, i32 16711680, i32 16711680> ret <4 x i32> %tmp1 } define <4 x i32> @orrimm4s_lsl24(<4 x i32> %a) { -;CHECK: orr {{v[0-31]+}}.4s, #0xff, lsl #24 +;CHECK: orr {{v[0-9]+}}.4s, #0xff, lsl #24 %tmp1 = or <4 x i32> %a, < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080> ret <4 x i32> %tmp1 } define <4 x i16> @orrimm4h_lsl0(<4 x i16> %a) { -;CHECK: orr {{v[0-31]+}}.4h, #0xff +;CHECK: orr {{v[0-9]+}}.4h, #0xff %tmp1 = or <4 x i16> %a, < i16 255, i16 255, i16 255, i16 255 > ret <4 x i16> %tmp1 } define <4 x i16> @orrimm4h_lsl8(<4 x i16> %a) { -;CHECK: orr {{v[0-31]+}}.4h, #0xff, lsl #8 +;CHECK: orr {{v[0-9]+}}.4h, #0xff, lsl #8 %tmp1 = or <4 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280 > ret <4 x i16> %tmp1 } define <8 x i16> @orrimm8h_lsl0(<8 x i16> %a) { -;CHECK: orr {{v[0-31]+}}.8h, #0xff +;CHECK: orr {{v[0-9]+}}.8h, #0xff %tmp1 = or <8 x i16> %a, < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 > ret <8 x i16> %tmp1 } define <8 x i16> @orrimm8h_lsl8(<8 x i16> %a) { -;CHECK: orr {{v[0-31]+}}.8h, #0xff, lsl #8 +;CHECK: orr {{v[0-9]+}}.8h, #0xff, lsl #8 %tmp1 = or <8 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 > ret <8 x i16> %tmp1 } define <2 x i32> @bicimm2s_lsl0(<2 x i32> %a) { -;CHECK: bic {{v[0-31]+}}.2s, #0x10 +;CHECK: bic {{v[0-9]+}}.2s, #0x10 %tmp1 = and <2 x i32> %a, < i32 4294967279, i32 4294967279 > ret <2 x i32> %tmp1 } define <2 x i32> @bicimm2s_lsl8(<2 x i32> %a) { -;CHECK: bic {{v[0-31]+}}.2s, #0x10, lsl #8 +;CHECK: bic {{v[0-9]+}}.2s, #0x10, lsl #8 %tmp1 = and <2 x i32> %a, < i32 18446744073709547519, i32 18446744073709547519 > ret <2 x i32> %tmp1 } define <2 x i32> @bicimm2s_lsl16(<2 x i32> %a) { -;CHECK: bic {{v[0-31]+}}.2s, #0x10, lsl #16 +;CHECK: bic {{v[0-9]+}}.2s, #0x10, lsl #16 %tmp1 = and <2 x i32> %a, < i32 18446744073708503039, i32 18446744073708503039 > ret <2 x i32> %tmp1 } define <2 x i32> @bicimm2s_lsl124(<2 x i32> %a) { -;CHECK: bic {{v[0-31]+}}.2s, #0x10, lsl #24 +;CHECK: bic {{v[0-9]+}}.2s, #0x10, lsl #24 %tmp1 = and <2 x i32> %a, < i32 18446744073441116159, i32 18446744073441116159> ret <2 x i32> %tmp1 } define <4 x i32> @bicimm4s_lsl0(<4 x i32> %a) { -;CHECK: bic {{v[0-31]+}}.4s, #0x10 +;CHECK: bic {{v[0-9]+}}.4s, #0x10 %tmp1 = and <4 x i32> %a, < i32 4294967279, i32 4294967279, i32 4294967279, i32 4294967279 > ret <4 x i32> %tmp1 } define <4 x i32> @bicimm4s_lsl8(<4 x i32> %a) { -;CHECK: bic {{v[0-31]+}}.4s, #0x10, lsl #8 +;CHECK: bic {{v[0-9]+}}.4s, #0x10, lsl #8 %tmp1 = and <4 x i32> %a, < i32 18446744073709547519, i32 18446744073709547519, i32 18446744073709547519, i32 18446744073709547519 > ret <4 x i32> %tmp1 } define <4 x i32> @bicimm4s_lsl16(<4 x i32> %a) { -;CHECK: bic {{v[0-31]+}}.4s, #0x10, lsl #16 +;CHECK: bic {{v[0-9]+}}.4s, #0x10, lsl #16 %tmp1 = and <4 x i32> %a, < i32 18446744073708503039, i32 18446744073708503039, i32 18446744073708503039, i32 18446744073708503039 > ret <4 x i32> %tmp1 } define <4 x i32> @bicimm4s_lsl124(<4 x i32> %a) { -;CHECK: bic {{v[0-31]+}}.4s, #0x10, lsl #24 +;CHECK: bic {{v[0-9]+}}.4s, #0x10, lsl #24 %tmp1 = and <4 x i32> %a, < i32 18446744073441116159, i32 18446744073441116159, i32 18446744073441116159, i32 18446744073441116159> ret <4 x i32> %tmp1 } define <4 x i16> @bicimm4h_lsl0_a(<4 x i16> %a) { -;CHECK: bic {{v[0-31]+}}.4h, #0x10 +;CHECK: bic {{v[0-9]+}}.4h, #0x10 %tmp1 = and <4 x i16> %a, < i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599 > ret <4 x i16> %tmp1 } define <4 x i16> @bicimm4h_lsl0_b(<4 x i16> %a) { -;CHECK: bic {{v[0-31]+}}.4h, #0x0 +;CHECK: bic {{v[0-9]+}}.4h, #0x0 %tmp1 = and <4 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280 > ret <4 x i16> %tmp1 } define <4 x i16> @bicimm4h_lsl8_a(<4 x i16> %a) { -;CHECK: bic {{v[0-31]+}}.4h, #0x10, lsl #8 +;CHECK: bic {{v[0-9]+}}.4h, #0x10, lsl #8 %tmp1 = and <4 x i16> %a, < i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519> ret <4 x i16> %tmp1 } define <4 x i16> @bicimm4h_lsl8_b(<4 x i16> %a) { -;CHECK: bic {{v[0-31]+}}.4h, #0x0, lsl #8 +;CHECK: bic {{v[0-9]+}}.4h, #0x0, lsl #8 %tmp1 = and <4 x i16> %a, < i16 255, i16 255, i16 255, i16 255> ret <4 x i16> %tmp1 } define <8 x i16> @bicimm8h_lsl0_a(<8 x i16> %a) { -;CHECK: bic {{v[0-31]+}}.8h, #0x10 +;CHECK: bic {{v[0-9]+}}.8h, #0x10 %tmp1 = and <8 x i16> %a, < i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599, i16 18446744073709551599 > ret <8 x i16> %tmp1 } define <8 x i16> @bicimm8h_lsl0_b(<8 x i16> %a) { -;CHECK: bic {{v[0-31]+}}.8h, #0x0 +;CHECK: bic {{v[0-9]+}}.8h, #0x0 %tmp1 = and <8 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 > ret <8 x i16> %tmp1 } define <8 x i16> @bicimm8h_lsl8_a(<8 x i16> %a) { -;CHECK: bic {{v[0-31]+}}.8h, #0x10, lsl #8 +;CHECK: bic {{v[0-9]+}}.8h, #0x10, lsl #8 %tmp1 = and <8 x i16> %a, < i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519, i16 18446744073709547519> ret <8 x i16> %tmp1 } define <8 x i16> @bicimm8h_lsl8_b(<8 x i16> %a) { -;CHECK: bic {{v[0-31]+}}.8h, #0x0, lsl #8 +;CHECK: bic {{v[0-9]+}}.8h, #0x0, lsl #8 %tmp1 = and <8 x i16> %a, < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> ret <8 x i16> %tmp1 } define <2 x i32> @and2xi32(<2 x i32> %a, <2 x i32> %b) { -;CHECK: and {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = and <2 x i32> %a, %b; ret <2 x i32> %tmp1 } define <4 x i16> @and4xi16(<4 x i16> %a, <4 x i16> %b) { -;CHECK: and {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = and <4 x i16> %a, %b; ret <4 x i16> %tmp1 } define <1 x i64> @and1xi64(<1 x i64> %a, <1 x i64> %b) { -;CHECK: and {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = and <1 x i64> %a, %b; ret <1 x i64> %tmp1 } define <4 x i32> @and4xi32(<4 x i32> %a, <4 x i32> %b) { -;CHECK: and {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = and <4 x i32> %a, %b; ret <4 x i32> %tmp1 } define <8 x i16> @and8xi16(<8 x i16> %a, <8 x i16> %b) { -;CHECK: and {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = and <8 x i16> %a, %b; ret <8 x i16> %tmp1 } define <2 x i64> @and2xi64(<2 x i64> %a, <2 x i64> %b) { -;CHECK: and {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = and <2 x i64> %a, %b; ret <2 x i64> %tmp1 } define <2 x i32> @orr2xi32(<2 x i32> %a, <2 x i32> %b) { -;CHECK: orr {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = or <2 x i32> %a, %b; ret <2 x i32> %tmp1 } define <4 x i16> @orr4xi16(<4 x i16> %a, <4 x i16> %b) { -;CHECK: orr {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = or <4 x i16> %a, %b; ret <4 x i16> %tmp1 } define <1 x i64> @orr1xi64(<1 x i64> %a, <1 x i64> %b) { -;CHECK: orr {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = or <1 x i64> %a, %b; ret <1 x i64> %tmp1 } define <4 x i32> @orr4xi32(<4 x i32> %a, <4 x i32> %b) { -;CHECK: orr {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = or <4 x i32> %a, %b; ret <4 x i32> %tmp1 } define <8 x i16> @orr8xi16(<8 x i16> %a, <8 x i16> %b) { -;CHECK: orr {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = or <8 x i16> %a, %b; ret <8 x i16> %tmp1 } define <2 x i64> @orr2xi64(<2 x i64> %a, <2 x i64> %b) { -;CHECK: orr {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = or <2 x i64> %a, %b; ret <2 x i64> %tmp1 } define <2 x i32> @eor2xi32(<2 x i32> %a, <2 x i32> %b) { -;CHECK: eor {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <2 x i32> %a, %b; ret <2 x i32> %tmp1 } define <4 x i16> @eor4xi16(<4 x i16> %a, <4 x i16> %b) { -;CHECK: eor {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <4 x i16> %a, %b; ret <4 x i16> %tmp1 } define <1 x i64> @eor1xi64(<1 x i64> %a, <1 x i64> %b) { -;CHECK: eor {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <1 x i64> %a, %b; ret <1 x i64> %tmp1 } define <4 x i32> @eor4xi32(<4 x i32> %a, <4 x i32> %b) { -;CHECK: eor {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <4 x i32> %a, %b; ret <4 x i32> %tmp1 } define <8 x i16> @eor8xi16(<8 x i16> %a, <8 x i16> %b) { -;CHECK: eor {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <8 x i16> %a, %b; ret <8 x i16> %tmp1 } define <2 x i64> @eor2xi64(<2 x i64> %a, <2 x i64> %b) { -;CHECK: eor {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <2 x i64> %a, %b; ret <2 x i64> %tmp1 } define <2 x i32> @bic2xi32(<2 x i32> %a, <2 x i32> %b) { -;CHECK: bic {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <2 x i32> %b, < i32 -1, i32 -1 > %tmp2 = and <2 x i32> %a, %tmp1 ret <2 x i32> %tmp2 } define <4 x i16> @bic4xi16(<4 x i16> %a, <4 x i16> %b) { -;CHECK: bic {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <4 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1 > %tmp2 = and <4 x i16> %a, %tmp1 ret <4 x i16> %tmp2 } define <1 x i64> @bic1xi64(<1 x i64> %a, <1 x i64> %b) { -;CHECK: bic {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <1 x i64> %b, < i64 -1> %tmp2 = and <1 x i64> %a, %tmp1 ret <1 x i64> %tmp2 } define <4 x i32> @bic4xi32(<4 x i32> %a, <4 x i32> %b) { -;CHECK: bic {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <4 x i32> %b, < i32 -1, i32 -1, i32 -1, i32 -1> %tmp2 = and <4 x i32> %a, %tmp1 ret <4 x i32> %tmp2 } define <8 x i16> @bic8xi16(<8 x i16> %a, <8 x i16> %b) { -;CHECK: bic {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <8 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1, i16 -1, i16 -1, i16 -1, i16 -1 > %tmp2 = and <8 x i16> %a, %tmp1 ret <8 x i16> %tmp2 } define <2 x i64> @bic2xi64(<2 x i64> %a, <2 x i64> %b) { -;CHECK: bic {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <2 x i64> %b, < i64 -1, i64 -1> %tmp2 = and <2 x i64> %a, %tmp1 ret <2 x i64> %tmp2 } define <2 x i32> @orn2xi32(<2 x i32> %a, <2 x i32> %b) { -;CHECK: orn {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <2 x i32> %b, < i32 -1, i32 -1 > %tmp2 = or <2 x i32> %a, %tmp1 ret <2 x i32> %tmp2 } define <4 x i16> @orn4xi16(<4 x i16> %a, <4 x i16> %b) { -;CHECK: orn {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <4 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1 > %tmp2 = or <4 x i16> %a, %tmp1 ret <4 x i16> %tmp2 } define <1 x i64> @orn1xi64(<1 x i64> %a, <1 x i64> %b) { -;CHECK: orn {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = xor <1 x i64> %b, < i64 -1> %tmp2 = or <1 x i64> %a, %tmp1 ret <1 x i64> %tmp2 } define <4 x i32> @orn4xi32(<4 x i32> %a, <4 x i32> %b) { -;CHECK: orn {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <4 x i32> %b, < i32 -1, i32 -1, i32 -1, i32 -1> %tmp2 = or <4 x i32> %a, %tmp1 ret <4 x i32> %tmp2 } define <8 x i16> @orn8xi16(<8 x i16> %a, <8 x i16> %b) { -;CHECK: orn {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <8 x i16> %b, < i16 -1, i16 -1, i16 -1, i16-1, i16 -1, i16 -1, i16 -1, i16 -1 > %tmp2 = or <8 x i16> %a, %tmp1 ret <8 x i16> %tmp2 } define <2 x i64> @orn2xi64(<2 x i64> %a, <2 x i64> %b) { -;CHECK: orn {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = xor <2 x i64> %b, < i64 -1, i64 -1> %tmp2 = or <2 x i64> %a, %tmp1 ret <2 x i64> %tmp2 } define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b) { -;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = and <2 x i32> %a, < i32 -1, i32 -1 > %tmp2 = and <2 x i32> %b, < i32 0, i32 0 > %tmp3 = or <2 x i32> %tmp1, %tmp2 @@ -455,7 +455,7 @@ define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b) { define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b) { -;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = and <4 x i16> %a, < i16 -1, i16 -1, i16 -1,i16 -1 > %tmp2 = and <4 x i16> %b, < i16 0, i16 0,i16 0, i16 0 > %tmp3 = or <4 x i16> %tmp1, %tmp2 @@ -463,7 +463,7 @@ define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b) { } define <1 x i64> @bsl1xi64_const(<1 x i64> %a, <1 x i64> %b) { -;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = and <1 x i64> %a, < i64 -1 > %tmp2 = and <1 x i64> %b, < i64 0 > %tmp3 = or <1 x i64> %tmp1, %tmp2 @@ -471,7 +471,7 @@ define <1 x i64> @bsl1xi64_const(<1 x i64> %a, <1 x i64> %b) { } define <4 x i32> @bsl4xi32_const(<4 x i32> %a, <4 x i32> %b) { -;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = and <4 x i32> %a, < i32 -1, i32 -1, i32 -1, i32 -1 > %tmp2 = and <4 x i32> %b, < i32 0, i32 0, i32 0, i32 0 > %tmp3 = or <4 x i32> %tmp1, %tmp2 @@ -479,7 +479,7 @@ define <4 x i32> @bsl4xi32_const(<4 x i32> %a, <4 x i32> %b) { } define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b) { -;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = and <8 x i16> %a, < i16 -1, i16 -1, i16 -1,i16 -1, i16 -1, i16 -1, i16 -1,i16 -1 > %tmp2 = and <8 x i16> %b, < i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0 > %tmp3 = or <8 x i16> %tmp1, %tmp2 @@ -487,7 +487,7 @@ define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b) { } define <2 x i64> @bsl2xi64_const(<2 x i64> %a, <2 x i64> %b) { -;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = and <2 x i64> %a, < i64 -1, i64 -1 > %tmp2 = and <2 x i64> %b, < i64 0, i64 0 > %tmp3 = or <2 x i64> %tmp1, %tmp2 @@ -496,7 +496,7 @@ define <2 x i64> @bsl2xi64_const(<2 x i64> %a, <2 x i64> %b) { define <8 x i8> @bsl8xi8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { -;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %1 = and <8 x i8> %v1, %v2 %2 = xor <8 x i8> %v1, %3 = and <8 x i8> %2, %v3 @@ -505,7 +505,7 @@ define <8 x i8> @bsl8xi8(<8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { } define <4 x i16> @bsl4xi16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) { -;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %1 = and <4 x i16> %v1, %v2 %2 = xor <4 x i16> %v1, %3 = and <4 x i16> %2, %v3 @@ -514,7 +514,7 @@ define <4 x i16> @bsl4xi16(<4 x i16> %v1, <4 x i16> %v2, <4 x i16> %v3) { } define <2 x i32> @bsl2xi32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { -;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %1 = and <2 x i32> %v1, %v2 %2 = xor <2 x i32> %v1, %3 = and <2 x i32> %2, %v3 @@ -523,7 +523,7 @@ define <2 x i32> @bsl2xi32(<2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { } define <1 x i64> @bsl1xi64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) { -;CHECK: bsl {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %1 = and <1 x i64> %v1, %v2 %2 = xor <1 x i64> %v1, %3 = and <1 x i64> %2, %v3 @@ -532,7 +532,7 @@ define <1 x i64> @bsl1xi64(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) { } define <16 x i8> @bsl16xi8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) { -;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %1 = and <16 x i8> %v1, %v2 %2 = xor <16 x i8> %v1, %3 = and <16 x i8> %2, %v3 @@ -541,7 +541,7 @@ define <16 x i8> @bsl16xi8(<16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) { } define <8 x i16> @bsl8xi16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) { -;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %1 = and <8 x i16> %v1, %v2 %2 = xor <8 x i16> %v1, %3 = and <8 x i16> %2, %v3 @@ -550,7 +550,7 @@ define <8 x i16> @bsl8xi16(<8 x i16> %v1, <8 x i16> %v2, <8 x i16> %v3) { } define <4 x i32> @bsl4xi32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { -;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %1 = and <4 x i32> %v1, %v2 %2 = xor <4 x i32> %v1, %3 = and <4 x i32> %2, %v3 @@ -559,7 +559,7 @@ define <4 x i32> @bsl4xi32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { } define <2 x i64> @bsl2xi64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) { -;CHECK: bsl {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %1 = and <2 x i64> %v1, %v2 %2 = xor <2 x i64> %v1, %3 = and <2 x i64> %2, %v3 @@ -568,25 +568,25 @@ define <2 x i64> @bsl2xi64(<2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3) { } define <8 x i8> @orrimm8b_as_orrimm4h_lsl0(<8 x i8> %a) { -;CHECK: orr {{v[0-31]+}}.4h, #0xff +;CHECK: orr {{v[0-9]+}}.4h, #0xff %val = or <8 x i8> %a, ret <8 x i8> %val } define <8 x i8> @orrimm8b_as_orimm4h_lsl8(<8 x i8> %a) { -;CHECK: orr {{v[0-31]+}}.4h, #0xff, lsl #8 +;CHECK: orr {{v[0-9]+}}.4h, #0xff, lsl #8 %val = or <8 x i8> %a, ret <8 x i8> %val } define <16 x i8> @orimm16b_as_orrimm8h_lsl0(<16 x i8> %a) { -;CHECK: orr {{v[0-31]+}}.8h, #0xff +;CHECK: orr {{v[0-9]+}}.8h, #0xff %val = or <16 x i8> %a, ret <16 x i8> %val } define <16 x i8> @orimm16b_as_orrimm8h_lsl8(<16 x i8> %a) { -;CHECK: orr {{v[0-31]+}}.8h, #0xff, lsl #8 +;CHECK: orr {{v[0-9]+}}.8h, #0xff, lsl #8 %val = or <16 x i8> %a, ret <16 x i8> %val } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-copy.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-copy.ll index e18530e6ff8e..881a858bc19c 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-copy.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-copy.ll @@ -2,269 +2,269 @@ define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) { -;CHECK: ins {{v[0-31]+}}.b[15], {{w[0-31]+}} +;CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}} %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15 ret <16 x i8> %tmp3 } define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) { -;CHECK: ins {{v[0-31]+}}.h[6], {{w[0-31]+}} +;CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}} %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6 ret <8 x i16> %tmp3 } define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) { -;CHECK: ins {{v[0-31]+}}.s[2], {{w[0-31]+}} +;CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}} %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2 ret <4 x i32> %tmp3 } define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) { -;CHECK: ins {{v[0-31]+}}.d[1], {{x[0-31]+}} +;CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}} %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1 ret <2 x i64> %tmp3 } define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) { -;CHECK: ins {{v[0-31]+}}.b[5], {{w[0-31]+}} +;CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}} %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5 ret <8 x i8> %tmp3 } define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) { -;CHECK: ins {{v[0-31]+}}.h[3], {{w[0-31]+}} +;CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}} %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3 ret <4 x i16> %tmp3 } define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) { -;CHECK: ins {{v[0-31]+}}.s[1], {{w[0-31]+}} +;CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}} %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1 ret <2 x i32> %tmp3 } define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) { -;CHECK: ins {{v[0-31]+}}.b[15], {{v[0-31]+}}.b[2] +;CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2] %tmp3 = extractelement <16 x i8> %tmp1, i32 2 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 ret <16 x i8> %tmp4 } define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) { -;CHECK: ins {{v[0-31]+}}.h[7], {{v[0-31]+}}.h[2] +;CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2] %tmp3 = extractelement <8 x i16> %tmp1, i32 2 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 ret <8 x i16> %tmp4 } define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) { -;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[2] +;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] %tmp3 = extractelement <4 x i32> %tmp1, i32 2 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 ret <4 x i32> %tmp4 } define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) { -;CHECK: ins {{v[0-31]+}}.d[1], {{v[0-31]+}}.d[0] +;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] %tmp3 = extractelement <2 x i64> %tmp1, i32 0 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 ret <2 x i64> %tmp4 } define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) { -;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[2] +;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] %tmp3 = extractelement <4 x float> %tmp1, i32 2 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 ret <4 x float> %tmp4 } define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) { -;CHECK: ins {{v[0-31]+}}.d[1], {{v[0-31]+}}.d[0] +;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] %tmp3 = extractelement <2 x double> %tmp1, i32 0 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 ret <2 x double> %tmp4 } define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) { -;CHECK: ins {{v[0-31]+}}.b[15], {{v[0-31]+}}.b[2] +;CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2] %tmp3 = extractelement <8 x i8> %tmp1, i32 2 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 ret <16 x i8> %tmp4 } define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) { -;CHECK: ins {{v[0-31]+}}.h[7], {{v[0-31]+}}.h[2] +;CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2] %tmp3 = extractelement <4 x i16> %tmp1, i32 2 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 ret <8 x i16> %tmp4 } define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) { -;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[1] +;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1] %tmp3 = extractelement <2 x i32> %tmp1, i32 1 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 ret <4 x i32> %tmp4 } define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) { -;CHECK: ins {{v[0-31]+}}.d[1], {{v[0-31]+}}.d[0] +;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] %tmp3 = extractelement <1 x i64> %tmp1, i32 0 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 ret <2 x i64> %tmp4 } define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) { -;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[1] +;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1] %tmp3 = extractelement <2 x float> %tmp1, i32 1 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 ret <4 x float> %tmp4 } define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) { -;CHECK: ins {{v[0-31]+}}.d[1], {{v[0-31]+}}.d[0] +;CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] %tmp3 = extractelement <1 x double> %tmp1, i32 0 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 ret <2 x double> %tmp4 } define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) { -;CHECK: ins {{v[0-31]+}}.b[7], {{v[0-31]+}}.b[2] +;CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2] %tmp3 = extractelement <16 x i8> %tmp1, i32 2 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7 ret <8 x i8> %tmp4 } define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) { -;CHECK: ins {{v[0-31]+}}.h[3], {{v[0-31]+}}.h[2] +;CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2] %tmp3 = extractelement <8 x i16> %tmp1, i32 2 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 ret <4 x i16> %tmp4 } define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) { -;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[2] +;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] %tmp3 = extractelement <4 x i32> %tmp1, i32 2 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 ret <2 x i32> %tmp4 } define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) { -;CHECK: ins {{v[0-31]+}}.d[0], {{v[0-31]+}}.d[0] +;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] %tmp3 = extractelement <2 x i64> %tmp1, i32 0 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 ret <1 x i64> %tmp4 } define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) { -;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[2] +;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2] %tmp3 = extractelement <4 x float> %tmp1, i32 2 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 ret <2 x float> %tmp4 } define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) { -;CHECK: ins {{v[0-31]+}}.d[0], {{v[0-31]+}}.d[0] +;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] %tmp3 = extractelement <2 x double> %tmp1, i32 0 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 ret <1 x double> %tmp4 } define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) { -;CHECK: ins {{v[0-31]+}}.b[4], {{v[0-31]+}}.b[2] +;CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2] %tmp3 = extractelement <8 x i8> %tmp1, i32 2 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4 ret <8 x i8> %tmp4 } define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) { -;CHECK: ins {{v[0-31]+}}.h[3], {{v[0-31]+}}.h[2] +;CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2] %tmp3 = extractelement <4 x i16> %tmp1, i32 2 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 ret <4 x i16> %tmp4 } define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) { -;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[0] +;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] %tmp3 = extractelement <2 x i32> %tmp1, i32 0 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 ret <2 x i32> %tmp4 } define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) { -;CHECK: ins {{v[0-31]+}}.d[0], {{v[0-31]+}}.d[0] +;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] %tmp3 = extractelement <1 x i64> %tmp1, i32 0 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 ret <1 x i64> %tmp4 } define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) { -;CHECK: ins {{v[0-31]+}}.s[1], {{v[0-31]+}}.s[0] +;CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] %tmp3 = extractelement <2 x float> %tmp1, i32 0 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 ret <2 x float> %tmp4 } define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) { -;CHECK: ins {{v[0-31]+}}.d[0], {{v[0-31]+}}.d[0] +;CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] %tmp3 = extractelement <1 x double> %tmp1, i32 0 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 ret <1 x double> %tmp4 } define i32 @umovw16b(<16 x i8> %tmp1) { -;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.b[8] +;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8] %tmp3 = extractelement <16 x i8> %tmp1, i32 8 %tmp4 = zext i8 %tmp3 to i32 ret i32 %tmp4 } define i32 @umovw8h(<8 x i16> %tmp1) { -;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.h[2] +;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2] %tmp3 = extractelement <8 x i16> %tmp1, i32 2 %tmp4 = zext i16 %tmp3 to i32 ret i32 %tmp4 } define i32 @umovw4s(<4 x i32> %tmp1) { -;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.s[2] +;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[2] %tmp3 = extractelement <4 x i32> %tmp1, i32 2 ret i32 %tmp3 } define i64 @umovx2d(<2 x i64> %tmp1) { -;CHECK: umov {{x[0-31]+}}, {{v[0-31]+}}.d[0] +;CHECK: umov {{x[0-9]+}}, {{v[0-9]+}}.d[0] %tmp3 = extractelement <2 x i64> %tmp1, i32 0 ret i64 %tmp3 } define i32 @umovw8b(<8 x i8> %tmp1) { -;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.b[7] +;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[7] %tmp3 = extractelement <8 x i8> %tmp1, i32 7 %tmp4 = zext i8 %tmp3 to i32 ret i32 %tmp4 } define i32 @umovw4h(<4 x i16> %tmp1) { -;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.h[2] +;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2] %tmp3 = extractelement <4 x i16> %tmp1, i32 2 %tmp4 = zext i16 %tmp3 to i32 ret i32 %tmp4 } define i32 @umovw2s(<2 x i32> %tmp1) { -;CHECK: umov {{w[0-31]+}}, {{v[0-31]+}}.s[1] +;CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[1] %tmp3 = extractelement <2 x i32> %tmp1, i32 1 ret i32 %tmp3 } define i64 @umovx1d(<1 x i64> %tmp1) { -;CHECK: fmov {{x[0-31]+}}, {{d[0-31]+}} +;CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} %tmp3 = extractelement <1 x i64> %tmp1, i32 0 ret i64 %tmp3 } define i32 @smovw16b(<16 x i8> %tmp1) { -;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.b[8] +;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8] %tmp3 = extractelement <16 x i8> %tmp1, i32 8 %tmp4 = sext i8 %tmp3 to i32 %tmp5 = add i32 5, %tmp4 @@ -272,7 +272,7 @@ define i32 @smovw16b(<16 x i8> %tmp1) { } define i32 @smovw8h(<8 x i16> %tmp1) { -;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.h[2] +;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2] %tmp3 = extractelement <8 x i16> %tmp1, i32 2 %tmp4 = sext i16 %tmp3 to i32 %tmp5 = add i32 5, %tmp4 @@ -280,28 +280,28 @@ define i32 @smovw8h(<8 x i16> %tmp1) { } define i32 @smovx16b(<16 x i8> %tmp1) { -;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.b[8] +;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8] %tmp3 = extractelement <16 x i8> %tmp1, i32 8 %tmp4 = sext i8 %tmp3 to i32 ret i32 %tmp4 } define i32 @smovx8h(<8 x i16> %tmp1) { -;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.h[2] +;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2] %tmp3 = extractelement <8 x i16> %tmp1, i32 2 %tmp4 = sext i16 %tmp3 to i32 ret i32 %tmp4 } define i64 @smovx4s(<4 x i32> %tmp1) { -;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.s[2] +;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2] %tmp3 = extractelement <4 x i32> %tmp1, i32 2 %tmp4 = sext i32 %tmp3 to i64 ret i64 %tmp4 } define i32 @smovw8b(<8 x i8> %tmp1) { -;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.b[4] +;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4] %tmp3 = extractelement <8 x i8> %tmp1, i32 4 %tmp4 = sext i8 %tmp3 to i32 %tmp5 = add i32 5, %tmp4 @@ -309,7 +309,7 @@ define i32 @smovw8b(<8 x i8> %tmp1) { } define i32 @smovw4h(<4 x i16> %tmp1) { -;CHECK: smov {{w[0-31]+}}, {{v[0-31]+}}.h[2] +;CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2] %tmp3 = extractelement <4 x i16> %tmp1, i32 2 %tmp4 = sext i16 %tmp3 to i32 %tmp5 = add i32 5, %tmp4 @@ -317,21 +317,21 @@ define i32 @smovw4h(<4 x i16> %tmp1) { } define i32 @smovx8b(<8 x i8> %tmp1) { -;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.b[6] +;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[6] %tmp3 = extractelement <8 x i8> %tmp1, i32 6 %tmp4 = sext i8 %tmp3 to i32 ret i32 %tmp4 } define i32 @smovx4h(<4 x i16> %tmp1) { -;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.h[2] +;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2] %tmp3 = extractelement <4 x i16> %tmp1, i32 2 %tmp4 = sext i16 %tmp3 to i32 ret i32 %tmp4 } define i64 @smovx2s(<2 x i32> %tmp1) { -;CHECK: smov {{x[0-31]+}}, {{v[0-31]+}}.s[1] +;CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1] %tmp3 = extractelement <2 x i32> %tmp1, i32 1 %tmp4 = sext i32 %tmp3 to i64 ret i64 %tmp4 @@ -612,4 +612,117 @@ define <1 x double> @test_bitcasti64tov1f64(i64 %in) { %res = bitcast i64 %in to <1 x double> ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} ret <1 x double> %res -} \ No newline at end of file +} + +; Test insert element into an undef vector +define <8 x i8> @scalar_to_vector.v8i8(i8 %a) { +; CHECK-LABEL: scalar_to_vector.v8i8: +; CHECK: ins {{v[0-9]+}}.b[0], {{w[0-9]+}} + %b = insertelement <8 x i8> undef, i8 %a, i32 0 + ret <8 x i8> %b +} + +define <16 x i8> @scalar_to_vector.v16i8(i8 %a) { +; CHECK-LABEL: scalar_to_vector.v16i8: +; CHECK: ins {{v[0-9]+}}.b[0], {{w[0-9]+}} + %b = insertelement <16 x i8> undef, i8 %a, i32 0 + ret <16 x i8> %b +} + +define <4 x i16> @scalar_to_vector.v4i16(i16 %a) { +; CHECK-LABEL: scalar_to_vector.v4i16: +; CHECK: ins {{v[0-9]+}}.h[0], {{w[0-9]+}} + %b = insertelement <4 x i16> undef, i16 %a, i32 0 + ret <4 x i16> %b +} + +define <8 x i16> @scalar_to_vector.v8i16(i16 %a) { +; CHECK-LABEL: scalar_to_vector.v8i16: +; CHECK: ins {{v[0-9]+}}.h[0], {{w[0-9]+}} + %b = insertelement <8 x i16> undef, i16 %a, i32 0 + ret <8 x i16> %b +} + +define <2 x i32> @scalar_to_vector.v2i32(i32 %a) { +; CHECK-LABEL: scalar_to_vector.v2i32: +; CHECK: ins {{v[0-9]+}}.s[0], {{w[0-9]+}} + %b = insertelement <2 x i32> undef, i32 %a, i32 0 + ret <2 x i32> %b +} + +define <4 x i32> @scalar_to_vector.v4i32(i32 %a) { +; CHECK-LABEL: scalar_to_vector.v4i32: +; CHECK: ins {{v[0-9]+}}.s[0], {{w[0-9]+}} + %b = insertelement <4 x i32> undef, i32 %a, i32 0 + ret <4 x i32> %b +} + +define <2 x i64> @scalar_to_vector.v2i64(i64 %a) { +; CHECK-LABEL: scalar_to_vector.v2i64: +; CHECK: ins {{v[0-9]+}}.d[0], {{x[0-9]+}} + %b = insertelement <2 x i64> undef, i64 %a, i32 0 + ret <2 x i64> %b +} + +define <8 x i8> @testDUP.v1i8(<1 x i8> %a) { +; CHECK-LABEL: testDUP.v1i8: +; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} + %b = extractelement <1 x i8> %a, i32 0 + %c = insertelement <8 x i8> undef, i8 %b, i32 0 + %d = insertelement <8 x i8> %c, i8 %b, i32 1 + %e = insertelement <8 x i8> %d, i8 %b, i32 2 + %f = insertelement <8 x i8> %e, i8 %b, i32 3 + %g = insertelement <8 x i8> %f, i8 %b, i32 4 + %h = insertelement <8 x i8> %g, i8 %b, i32 5 + %i = insertelement <8 x i8> %h, i8 %b, i32 6 + %j = insertelement <8 x i8> %i, i8 %b, i32 7 + ret <8 x i8> %j +} + +define <8 x i16> @testDUP.v1i16(<1 x i16> %a) { +; CHECK-LABEL: testDUP.v1i16: +; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} + %b = extractelement <1 x i16> %a, i32 0 + %c = insertelement <8 x i16> undef, i16 %b, i32 0 + %d = insertelement <8 x i16> %c, i16 %b, i32 1 + %e = insertelement <8 x i16> %d, i16 %b, i32 2 + %f = insertelement <8 x i16> %e, i16 %b, i32 3 + %g = insertelement <8 x i16> %f, i16 %b, i32 4 + %h = insertelement <8 x i16> %g, i16 %b, i32 5 + %i = insertelement <8 x i16> %h, i16 %b, i32 6 + %j = insertelement <8 x i16> %i, i16 %b, i32 7 + ret <8 x i16> %j +} + +define <4 x i32> @testDUP.v1i32(<1 x i32> %a) { +; CHECK-LABEL: testDUP.v1i32: +; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} + %b = extractelement <1 x i32> %a, i32 0 + %c = insertelement <4 x i32> undef, i32 %b, i32 0 + %d = insertelement <4 x i32> %c, i32 %b, i32 1 + %e = insertelement <4 x i32> %d, i32 %b, i32 2 + %f = insertelement <4 x i32> %e, i32 %b, i32 3 + ret <4 x i32> %f +} + +define <8 x i8> @getl(<16 x i8> %x) #0 { +; CHECK-LABEL: getl: +; CHECK: ret + %vecext = extractelement <16 x i8> %x, i32 0 + %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0 + %vecext1 = extractelement <16 x i8> %x, i32 1 + %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1 + %vecext3 = extractelement <16 x i8> %x, i32 2 + %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2 + %vecext5 = extractelement <16 x i8> %x, i32 3 + %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3 + %vecext7 = extractelement <16 x i8> %x, i32 4 + %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4 + %vecext9 = extractelement <16 x i8> %x, i32 5 + %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5 + %vecext11 = extractelement <16 x i8> %x, i32 6 + %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6 + %vecext13 = extractelement <16 x i8> %x, i32 7 + %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7 + ret <8 x i8> %vecinit14 +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-fma.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-fma.ll index dcf4e2878068..af70302ca939 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-fma.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-fma.ll @@ -1,21 +1,21 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s define <2 x float> @fmla2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) { -;CHECK: fmla {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp1 = fmul <2 x float> %A, %B; %tmp2 = fadd <2 x float> %C, %tmp1; ret <2 x float> %tmp2 } define <4 x float> @fmla4xfloat(<4 x float> %A, <4 x float> %B, <4 x float> %C) { -;CHECK: fmla {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp1 = fmul <4 x float> %A, %B; %tmp2 = fadd <4 x float> %C, %tmp1; ret <4 x float> %tmp2 } define <2 x double> @fmla2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> %C) { -;CHECK: fmla {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %tmp1 = fmul <2 x double> %A, %B; %tmp2 = fadd <2 x double> %C, %tmp1; ret <2 x double> %tmp2 @@ -23,21 +23,21 @@ define <2 x double> @fmla2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> define <2 x float> @fmls2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) { -;CHECK: fmls {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp1 = fmul <2 x float> %A, %B; %tmp2 = fsub <2 x float> %C, %tmp1; ret <2 x float> %tmp2 } define <4 x float> @fmls4xfloat(<4 x float> %A, <4 x float> %B, <4 x float> %C) { -;CHECK: fmls {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp1 = fmul <4 x float> %A, %B; %tmp2 = fsub <4 x float> %C, %tmp1; ret <4 x float> %tmp2 } define <2 x double> @fmls2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> %C) { -;CHECK: fmls {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %tmp1 = fmul <2 x double> %A, %B; %tmp2 = fsub <2 x double> %C, %tmp1; ret <2 x double> %tmp2 @@ -51,39 +51,39 @@ declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) define <2 x float> @fmla2xfloat_fused(<2 x float> %A, <2 x float> %B, <2 x float> %C) { -;CHECK: fmla {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %val = call <2 x float> @llvm.fma.v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C) ret <2 x float> %val } define <4 x float> @fmla4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) { -;CHECK: fmla {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %val = call <4 x float> @llvm.fma.v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C) ret <4 x float> %val } define <2 x double> @fmla2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) { -;CHECK: fmla {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %val = call <2 x double> @llvm.fma.v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C) ret <2 x double> %val } define <2 x float> @fmls2xfloat_fused(<2 x float> %A, <2 x float> %B, <2 x float> %C) { -;CHECK: fmls {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %negA = fsub <2 x float> , %A %val = call <2 x float> @llvm.fma.v2f32(<2 x float> %negA, <2 x float> %B, <2 x float> %C) ret <2 x float> %val } define <4 x float> @fmls4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) { -;CHECK: fmls {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %negA = fsub <4 x float> , %A %val = call <4 x float> @llvm.fma.v4f32(<4 x float> %negA, <4 x float> %B, <4 x float> %C) ret <4 x float> %val } define <2 x double> @fmls2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) { -;CHECK: fmls {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %negA = fsub <2 x double> , %A %val = call <2 x double> @llvm.fma.v2f64(<2 x double> %negA, <2 x double> %B, <2 x double> %C) ret <2 x double> %val @@ -94,19 +94,39 @@ declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>) define <2 x float> @fmuladd2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) { -;CHECK: fmla {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %val = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C) ret <2 x float> %val } define <4 x float> @fmuladd4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) { -;CHECK: fmla {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %val = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C) ret <4 x float> %val } define <2 x double> @fmuladd2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) { -;CHECK: fmla {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %val = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C) ret <2 x double> %val } + + +; Another set of tests that check for multiply single use + +define <2 x float> @fmla2xfloati_su(<2 x float> %A, <2 x float> %B, <2 x float> %C) { +;CHECK-NOT: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %tmp1 = fmul <2 x float> %A, %B; + %tmp2 = fadd <2 x float> %C, %tmp1; + %tmp3 = fadd <2 x float> %tmp2, %tmp1; + ret <2 x float> %tmp3 +} + +define <2 x double> @fmls2xdouble_su(<2 x double> %A, <2 x double> %B, <2 x double> %C) { +;CHECK-NOT: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d + %tmp1 = fmul <2 x double> %A, %B; + %tmp2 = fsub <2 x double> %C, %tmp1; + %tmp3 = fsub <2 x double> %tmp2, %tmp1; + ret <2 x double> %tmp3 +} + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-max-min-pairwise.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-max-min-pairwise.ll index d757aca86a69..3e18077337d2 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-max-min-pairwise.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-max-min-pairwise.ll @@ -308,3 +308,39 @@ define <2 x double> @test_fminnmp_v2f64(<2 x double> %lhs, <2 x double> %rhs) { ret <2 x double> %val } +define i32 @test_vminv_s32(<2 x i32> %a) { +; CHECK-LABEL: test_vminv_s32 +; CHECK: sminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %1 = tail call <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i32> %1, i32 0 + ret i32 %2 +} + +define i32 @test_vminv_u32(<2 x i32> %a) { +; CHECK-LABEL: test_vminv_u32 +; CHECK: uminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %1 = tail call <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i32> %1, i32 0 + ret i32 %2 +} + +define i32 @test_vmaxv_s32(<2 x i32> %a) { +; CHECK-LABEL: test_vmaxv_s32 +; CHECK: smaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %1 = tail call <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i32> %1, i32 0 + ret i32 %2 +} + +define i32 @test_vmaxv_u32(<2 x i32> %a) { +; CHECK-LABEL: test_vmaxv_u32 +; CHECK: umaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + %1 = tail call <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i32> %1, i32 0 + ret i32 %2 +} + +declare <1 x i32> @llvm.aarch64.neon.uminv.v1i32.v2i32(<2 x i32>) +declare <1 x i32> @llvm.aarch64.neon.sminv.v1i32.v2i32(<2 x i32>) +declare <1 x i32> @llvm.aarch64.neon.umaxv.v1i32.v2i32(<2 x i32>) +declare <1 x i32> @llvm.aarch64.neon.smaxv.v1i32.v2i32(<2 x i32>) \ No newline at end of file diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-misc-scalar.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-misc-scalar.ll new file mode 100644 index 000000000000..cca8deb45cba --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-misc-scalar.ll @@ -0,0 +1,60 @@ +;RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +declare <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64>) + +declare <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64>) + +declare <1 x i64> @llvm.arm.neon.vabs.v1i64(<1 x i64>) + +declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>) + +declare <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64>, <1 x i64>) + +define <1 x i64> @test_vuqadd_s64(<1 x i64> %a, <1 x i64> %b) { +entry: + ; CHECK: test_vuqadd_s64 + %vuqadd2.i = tail call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b) + ; CHECK: suqadd d{{[0-9]+}}, d{{[0-9]+}} + ret <1 x i64> %vuqadd2.i +} + +define <1 x i64> @test_vsqadd_u64(<1 x i64> %a, <1 x i64> %b) { +entry: + ; CHECK: test_vsqadd_u64 + %vsqadd2.i = tail call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b) + ; CHECK: usqadd d{{[0-9]+}}, d{{[0-9]+}} + ret <1 x i64> %vsqadd2.i +} + +define <1 x i64> @test_vabs_s64(<1 x i64> %a) { + ; CHECK: test_vabs_s64 +entry: + %vabs1.i = tail call <1 x i64> @llvm.arm.neon.vabs.v1i64(<1 x i64> %a) + ; CHECK: abs d{{[0-9]+}}, d{{[0-9]+}} + ret <1 x i64> %vabs1.i +} + +define <1 x i64> @test_vqabs_s64(<1 x i64> %a) { + ; CHECK: test_vqabs_s64 +entry: + %vqabs1.i = tail call <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64> %a) + ; CHECK: sqabs d{{[0-9]+}}, d{{[0-9]+}} + ret <1 x i64> %vqabs1.i +} + +define <1 x i64> @test_vqneg_s64(<1 x i64> %a) { + ; CHECK: test_vqneg_s64 +entry: + %vqneg1.i = tail call <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64> %a) + ; CHECK: sqneg d{{[0-9]+}}, d{{[0-9]+}} + ret <1 x i64> %vqneg1.i +} + +define <1 x i64> @test_vneg_s64(<1 x i64> %a) { + ; CHECK: test_vneg_s64 +entry: + %sub.i = sub <1 x i64> zeroinitializer, %a + ; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}} + ret <1 x i64> %sub.i +} + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-misc.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-misc.ll index 3fd9a500f48c..ed0cc0fc413a 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-misc.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-misc.ll @@ -894,13 +894,13 @@ define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 { define <2 x float> @test_vcvtx_f32_f64(<2 x double> %a) #0 { ; CHECK: fcvtxn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d - %vcvtx_f32_f641.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %a) #4 + %vcvtx_f32_f641.i = call <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double> %a) #4 ret <2 x float> %vcvtx_f32_f641.i } define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 { ; CHECK: fcvtxn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d - %vcvtx_f32_f641.i.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %b) #4 + %vcvtx_f32_f641.i.i = tail call <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double> %b) #4 %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvtx_f32_f641.i.i, <4 x i32> ret <4 x float> %shuffle.i } @@ -1080,147 +1080,171 @@ define <2 x i64> @test_vcvtq_u64_f64(<2 x double> %a) #0 { ret <2 x i64> %vcvt.i } -define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) #0 { +define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) { +; CHECK-LABEL: test_vcvtn_s32_f32 ; CHECK: fcvtns v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtns_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float> %a) #4 + %vcvtns_f321.i = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %a) ret <2 x i32> %vcvtns_f321.i } -define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) #0 { +define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) { +; CHECK-LABEL: test_vcvtnq_s32_f32 ; CHECK: fcvtns v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtns_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float> %a) #4 + %vcvtns_f321.i = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %a) ret <4 x i32> %vcvtns_f321.i } -define <2 x i64> @test_vcvtnq_s64_f64(<2 x double> %a) #0 { +define <2 x i64> @test_vcvtnq_s64_f64(<2 x double> %a) { +; CHECK-LABEL: test_vcvtnq_s64_f64 ; CHECK: fcvtns v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtns_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double> %a) #4 + %vcvtns_f641.i = call <2 x i64> @llvm.arm.neon.vcvtns.v2i64.v2f64(<2 x double> %a) ret <2 x i64> %vcvtns_f641.i } -define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) #0 { +define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) { +; CHECK-LABEL: test_vcvtn_u32_f32 ; CHECK: fcvtnu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtnu_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float> %a) #4 + %vcvtnu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %a) ret <2 x i32> %vcvtnu_f321.i } -define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) #0 { +define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) { +; CHECK-LABEL: test_vcvtnq_u32_f32 ; CHECK: fcvtnu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtnu_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float> %a) #4 + %vcvtnu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %a) ret <4 x i32> %vcvtnu_f321.i } -define <2 x i64> @test_vcvtnq_u64_f64(<2 x double> %a) #0 { +define <2 x i64> @test_vcvtnq_u64_f64(<2 x double> %a) { +; CHECK-LABEL: test_vcvtnq_u64_f64 ; CHECK: fcvtnu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtnu_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double> %a) #4 + %vcvtnu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtnu.v2i64.v2f64(<2 x double> %a) ret <2 x i64> %vcvtnu_f641.i } -define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) #0 { +define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) { +; CHECK-LABEL: test_vcvtp_s32_f32 ; CHECK: fcvtps v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtps_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float> %a) #4 + %vcvtps_f321.i = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %a) ret <2 x i32> %vcvtps_f321.i } -define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) #0 { +define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) { +; CHECK-LABEL: test_vcvtpq_s32_f32 ; CHECK: fcvtps v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtps_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float> %a) #4 + %vcvtps_f321.i = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %a) ret <4 x i32> %vcvtps_f321.i } -define <2 x i64> @test_vcvtpq_s64_f64(<2 x double> %a) #0 { +define <2 x i64> @test_vcvtpq_s64_f64(<2 x double> %a) { +; CHECK-LABEL: test_vcvtpq_s64_f64 ; CHECK: fcvtps v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtps_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double> %a) #4 + %vcvtps_f641.i = call <2 x i64> @llvm.arm.neon.vcvtps.v2i64.v2f64(<2 x double> %a) ret <2 x i64> %vcvtps_f641.i } -define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) #0 { +define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) { +; CHECK-LABEL: test_vcvtp_u32_f32 ; CHECK: fcvtpu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtpu_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float> %a) #4 + %vcvtpu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %a) ret <2 x i32> %vcvtpu_f321.i } -define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) #0 { +define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) { +; CHECK-LABEL: test_vcvtpq_u32_f32 ; CHECK: fcvtpu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtpu_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float> %a) #4 + %vcvtpu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %a) ret <4 x i32> %vcvtpu_f321.i } -define <2 x i64> @test_vcvtpq_u64_f64(<2 x double> %a) #0 { +define <2 x i64> @test_vcvtpq_u64_f64(<2 x double> %a) { +; CHECK-LABEL: test_vcvtpq_u64_f64 ; CHECK: fcvtpu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtpu_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double> %a) #4 + %vcvtpu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtpu.v2i64.v2f64(<2 x double> %a) ret <2 x i64> %vcvtpu_f641.i } -define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) #0 { +define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) { +; CHECK-LABEL: test_vcvtm_s32_f32 ; CHECK: fcvtms v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtms_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float> %a) #4 + %vcvtms_f321.i = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %a) ret <2 x i32> %vcvtms_f321.i } -define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) #0 { +define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) { +; CHECK-LABEL: test_vcvtmq_s32_f32 ; CHECK: fcvtms v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtms_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float> %a) #4 + %vcvtms_f321.i = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %a) ret <4 x i32> %vcvtms_f321.i } -define <2 x i64> @test_vcvtmq_s64_f64(<2 x double> %a) #0 { +define <2 x i64> @test_vcvtmq_s64_f64(<2 x double> %a) { +; CHECK-LABEL: test_vcvtmq_s64_f64 ; CHECK: fcvtms v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtms_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double> %a) #4 + %vcvtms_f641.i = call <2 x i64> @llvm.arm.neon.vcvtms.v2i64.v2f64(<2 x double> %a) ret <2 x i64> %vcvtms_f641.i } -define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) #0 { +define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) { +; CHECK-LABEL: test_vcvtm_u32_f32 ; CHECK: fcvtmu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtmu_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float> %a) #4 + %vcvtmu_f321.i = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %a) ret <2 x i32> %vcvtmu_f321.i } -define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) #0 { +define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) { +; CHECK-LABEL: test_vcvtmq_u32_f32 ; CHECK: fcvtmu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtmu_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float> %a) #4 + %vcvtmu_f321.i = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %a) ret <4 x i32> %vcvtmu_f321.i } -define <2 x i64> @test_vcvtmq_u64_f64(<2 x double> %a) #0 { +define <2 x i64> @test_vcvtmq_u64_f64(<2 x double> %a) { +; CHECK-LABEL: test_vcvtmq_u64_f64 ; CHECK: fcvtmu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtmu_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double> %a) #4 + %vcvtmu_f641.i = call <2 x i64> @llvm.arm.neon.vcvtmu.v2i64.v2f64(<2 x double> %a) ret <2 x i64> %vcvtmu_f641.i } -define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) #0 { +define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) { +; CHECK-LABEL: test_vcvta_s32_f32 ; CHECK: fcvtas v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtas_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float> %a) #4 + %vcvtas_f321.i = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %a) ret <2 x i32> %vcvtas_f321.i } -define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) #0 { +define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) { +; CHECK-LABEL: test_vcvtaq_s32_f32 ; CHECK: fcvtas v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtas_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float> %a) #4 + %vcvtas_f321.i = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %a) ret <4 x i32> %vcvtas_f321.i } -define <2 x i64> @test_vcvtaq_s64_f64(<2 x double> %a) #0 { +define <2 x i64> @test_vcvtaq_s64_f64(<2 x double> %a) { +; CHECK-LABEL: test_vcvtaq_s64_f64 ; CHECK: fcvtas v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtas_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double> %a) #4 + %vcvtas_f641.i = call <2 x i64> @llvm.arm.neon.vcvtas.v2i64.v2f64(<2 x double> %a) ret <2 x i64> %vcvtas_f641.i } -define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) #0 { +define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) { +; CHECK-LABEL: test_vcvta_u32_f32 ; CHECK: fcvtau v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vcvtau_f321.i = tail call <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float> %a) #4 + %vcvtau_f321.i = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %a) ret <2 x i32> %vcvtau_f321.i } -define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) #0 { +define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) { +; CHECK-LABEL: test_vcvtaq_u32_f32 ; CHECK: fcvtau v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vcvtau_f321.i = tail call <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float> %a) #4 + %vcvtau_f321.i = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %a) ret <4 x i32> %vcvtau_f321.i } -define <2 x i64> @test_vcvtaq_u64_f64(<2 x double> %a) #0 { +define <2 x i64> @test_vcvtaq_u64_f64(<2 x double> %a) { +; CHECK-LABEL: test_vcvtaq_u64_f64 ; CHECK: fcvtau v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vcvtau_f641.i = tail call <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double> %a) #4 + %vcvtau_f641.i = call <2 x i64> @llvm.arm.neon.vcvtau.v2i64.v2f64(<2 x double> %a) ret <2 x i64> %vcvtau_f641.i } @@ -1274,19 +1298,19 @@ define <4 x i32> @test_vrecpeq_u32(<4 x i32> %a) #0 { define <2 x float> @test_vsqrt_f32(<2 x float> %a) #0 { ; CHECK: fsqrt v{{[0-9]+}}.2s, v{{[0-9]+}}.2s - %vsqrt1.i = tail call <2 x float> @llvm.aarch64.neon.fsqrt.v2f32(<2 x float> %a) #4 + %vsqrt1.i = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #4 ret <2 x float> %vsqrt1.i } define <4 x float> @test_vsqrtq_f32(<4 x float> %a) #0 { ; CHECK: fsqrt v{{[0-9]+}}.4s, v{{[0-9]+}}.4s - %vsqrt1.i = tail call <4 x float> @llvm.aarch64.neon.fsqrt.v4f32(<4 x float> %a) #4 + %vsqrt1.i = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #4 ret <4 x float> %vsqrt1.i } define <2 x double> @test_vsqrtq_f64(<2 x double> %a) #0 { ; CHECK: fsqrt v{{[0-9]+}}.2d, v{{[0-9]+}}.2d - %vsqrt1.i = tail call <2 x double> @llvm.aarch64.neon.fsqrt.v2f64(<2 x double> %a) #4 + %vsqrt1.i = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #4 ret <2 x double> %vsqrt1.i } @@ -1326,11 +1350,11 @@ define <2 x double> @test_vcvtq_f64_u64(<2 x i64> %a) #0 { ret <2 x double> %vcvt.i } -declare <2 x double> @llvm.aarch64.neon.fsqrt.v2f64(<2 x double>) #2 +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #2 -declare <4 x float> @llvm.aarch64.neon.fsqrt.v4f32(<4 x float>) #2 +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #2 -declare <2 x float> @llvm.aarch64.neon.fsqrt.v2f32(<2 x float>) #2 +declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #2 declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) #2 @@ -1348,53 +1372,53 @@ declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) #2 declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) #2 -declare <2 x i64> @llvm.aarch64.neon.fcvtau.v2i64.v2f64(<2 x double>) #2 +declare <2 x i64> @llvm.arm.neon.vcvtau.v2i64.v2f64(<2 x double>) -declare <4 x i32> @llvm.aarch64.neon.fcvtau.v4i32.v4f32(<4 x float>) #2 +declare <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float>) -declare <2 x i32> @llvm.aarch64.neon.fcvtau.v2i32.v2f32(<2 x float>) #2 +declare <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float>) -declare <2 x i64> @llvm.aarch64.neon.fcvtas.v2i64.v2f64(<2 x double>) #2 +declare <2 x i64> @llvm.arm.neon.vcvtas.v2i64.v2f64(<2 x double>) -declare <4 x i32> @llvm.aarch64.neon.fcvtas.v4i32.v4f32(<4 x float>) #2 +declare <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float>) -declare <2 x i32> @llvm.aarch64.neon.fcvtas.v2i32.v2f32(<2 x float>) #2 +declare <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float>) -declare <2 x i64> @llvm.aarch64.neon.fcvtmu.v2i64.v2f64(<2 x double>) #2 +declare <2 x i64> @llvm.arm.neon.vcvtmu.v2i64.v2f64(<2 x double>) -declare <4 x i32> @llvm.aarch64.neon.fcvtmu.v4i32.v4f32(<4 x float>) #2 +declare <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float>) -declare <2 x i32> @llvm.aarch64.neon.fcvtmu.v2i32.v2f32(<2 x float>) #2 +declare <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float>) -declare <2 x i64> @llvm.aarch64.neon.fcvtms.v2i64.v2f64(<2 x double>) #2 +declare <2 x i64> @llvm.arm.neon.vcvtms.v2i64.v2f64(<2 x double>) -declare <4 x i32> @llvm.aarch64.neon.fcvtms.v4i32.v4f32(<4 x float>) #2 +declare <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float>) -declare <2 x i32> @llvm.aarch64.neon.fcvtms.v2i32.v2f32(<2 x float>) #2 +declare <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float>) -declare <2 x i64> @llvm.aarch64.neon.fcvtpu.v2i64.v2f64(<2 x double>) #2 +declare <2 x i64> @llvm.arm.neon.vcvtpu.v2i64.v2f64(<2 x double>) -declare <4 x i32> @llvm.aarch64.neon.fcvtpu.v4i32.v4f32(<4 x float>) #2 +declare <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float>) -declare <2 x i32> @llvm.aarch64.neon.fcvtpu.v2i32.v2f32(<2 x float>) #2 +declare <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float>) -declare <2 x i64> @llvm.aarch64.neon.fcvtps.v2i64.v2f64(<2 x double>) #2 +declare <2 x i64> @llvm.arm.neon.vcvtps.v2i64.v2f64(<2 x double>) -declare <4 x i32> @llvm.aarch64.neon.fcvtps.v4i32.v4f32(<4 x float>) #2 +declare <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float>) -declare <2 x i32> @llvm.aarch64.neon.fcvtps.v2i32.v2f32(<2 x float>) #2 +declare <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float>) -declare <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64(<2 x double>) #2 +declare <2 x i64> @llvm.arm.neon.vcvtnu.v2i64.v2f64(<2 x double>) -declare <4 x i32> @llvm.aarch64.neon.fcvtnu.v4i32.v4f32(<4 x float>) #2 +declare <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float>) -declare <2 x i32> @llvm.aarch64.neon.fcvtnu.v2i32.v2f32(<2 x float>) #2 +declare <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float>) -declare <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64(<2 x double>) #2 +declare <2 x i64> @llvm.arm.neon.vcvtns.v2i64.v2f64(<2 x double>) -declare <4 x i32> @llvm.aarch64.neon.fcvtns.v4i32.v4f32(<4 x float>) #2 +declare <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float>) -declare <2 x i32> @llvm.aarch64.neon.fcvtns.v2i32.v2f32(<2 x float>) #2 +declare <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float>) declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #3 @@ -1438,7 +1462,7 @@ declare <4 x float> @llvm.aarch64.neon.frintn.v4f32(<4 x float>) #2 declare <2 x float> @llvm.aarch64.neon.frintn.v2f32(<2 x float>) #2 -declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) #2 +declare <2 x float> @llvm.aarch64.neon.vcvtxn.v2f32.v2f64(<2 x double>) #2 declare <2 x float> @llvm.aarch64.neon.fcvtn.v2f32.v2f64(<2 x double>) #2 @@ -1607,3 +1631,212 @@ declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) #2 declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) #2 +define <1 x i64> @test_vcvt_s64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvt_s64_f64 +; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}} + %1 = fptosi <1 x double> %a to <1 x i64> + ret <1 x i64> %1 +} + +define <1 x i64> @test_vcvt_u64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvt_u64_f64 +; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}} + %1 = fptoui <1 x double> %a to <1 x i64> + ret <1 x i64> %1 +} + +define <1 x i64> @test_vcvtn_s64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvtn_s64_f64 +; CHECK: fcvtns d{{[0-9]+}}, d{{[0-9]+}} + %1 = call <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double> %a) + ret <1 x i64> %1 +} + +define <1 x i64> @test_vcvtn_u64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvtn_u64_f64 +; CHECK: fcvtnu d{{[0-9]+}}, d{{[0-9]+}} + %1 = call <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double> %a) + ret <1 x i64> %1 +} + +define <1 x i64> @test_vcvtp_s64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvtp_s64_f64 +; CHECK: fcvtps d{{[0-9]+}}, d{{[0-9]+}} + %1 = call <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double> %a) + ret <1 x i64> %1 +} + +define <1 x i64> @test_vcvtp_u64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvtp_u64_f64 +; CHECK: fcvtpu d{{[0-9]+}}, d{{[0-9]+}} + %1 = call <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double> %a) + ret <1 x i64> %1 +} + +define <1 x i64> @test_vcvtm_s64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvtm_s64_f64 +; CHECK: fcvtms d{{[0-9]+}}, d{{[0-9]+}} + %1 = call <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double> %a) + ret <1 x i64> %1 +} + +define <1 x i64> @test_vcvtm_u64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvtm_u64_f64 +; CHECK: fcvtmu d{{[0-9]+}}, d{{[0-9]+}} + %1 = call <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double> %a) + ret <1 x i64> %1 +} + +define <1 x i64> @test_vcvta_s64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvta_s64_f64 +; CHECK: fcvtas d{{[0-9]+}}, d{{[0-9]+}} + %1 = call <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double> %a) + ret <1 x i64> %1 +} + +define <1 x i64> @test_vcvta_u64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvta_u64_f64 +; CHECK: fcvtau d{{[0-9]+}}, d{{[0-9]+}} + %1 = call <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double> %a) + ret <1 x i64> %1 +} + +define <1 x double> @test_vcvt_f64_s64(<1 x i64> %a) { +; CHECK-LABEL: test_vcvt_f64_s64 +; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}} + %1 = sitofp <1 x i64> %a to <1 x double> + ret <1 x double> %1 +} + +define <1 x double> @test_vcvt_f64_u64(<1 x i64> %a) { +; CHECK-LABEL: test_vcvt_f64_u64 +; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}} + %1 = uitofp <1 x i64> %a to <1 x double> + ret <1 x double> %1 +} + +declare <1 x i64> @llvm.arm.neon.vcvtau.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.arm.neon.vcvtas.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.arm.neon.vcvtmu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.arm.neon.vcvtms.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.arm.neon.vcvtpu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.arm.neon.vcvtps.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.arm.neon.vcvtnu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.arm.neon.vcvtns.v1i64.v1f64(<1 x double>) + +define <1 x double> @test_vrndn_f64(<1 x double> %a) { +; CHECK-LABEL: test_vrndn_f64 +; CHECK: frintn d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vrnda_f64(<1 x double> %a) { +; CHECK-LABEL: test_vrnda_f64 +; CHECK: frinta d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.round.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vrndp_f64(<1 x double> %a) { +; CHECK-LABEL: test_vrndp_f64 +; CHECK: frintp d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.ceil.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vrndm_f64(<1 x double> %a) { +; CHECK-LABEL: test_vrndm_f64 +; CHECK: frintm d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.floor.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vrndx_f64(<1 x double> %a) { +; CHECK-LABEL: test_vrndx_f64 +; CHECK: frintx d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.rint.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vrnd_f64(<1 x double> %a) { +; CHECK-LABEL: test_vrnd_f64 +; CHECK: frintz d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.trunc.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vrndi_f64(<1 x double> %a) { +; CHECK-LABEL: test_vrndi_f64 +; CHECK: frinti d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>) +declare <1 x double> @llvm.trunc.v1f64(<1 x double>) +declare <1 x double> @llvm.rint.v1f64(<1 x double>) +declare <1 x double> @llvm.floor.v1f64(<1 x double>) +declare <1 x double> @llvm.ceil.v1f64(<1 x double>) +declare <1 x double> @llvm.round.v1f64(<1 x double>) +declare <1 x double> @llvm.aarch64.neon.frintn.v1f64(<1 x double>) + +define <1 x double> @test_vrsqrte_f64(<1 x double> %a) { +; CHECK-LABEL: test_vrsqrte_f64 +; CHECK: frsqrte d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vrecpe_f64(<1 x double> %a) { +; CHECK-LABEL: test_vrecpe_f64 +; CHECK: frecpe d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vsqrt_f64(<1 x double> %a) { +; CHECK-LABEL: test_vsqrt_f64 +; CHECK: fsqrt d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a) + ret <1 x double> %1 +} + +define <1 x double> @test_vrecps_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vrecps_f64 +; CHECK: frecps d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double> %a, <1 x double> %b) + ret <1 x double> %1 +} + +define <1 x double> @test_vrsqrts_f64(<1 x double> %a, <1 x double> %b) { +; CHECK-LABEL: test_vrsqrts_f64 +; CHECK: frsqrts d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %1 = tail call <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double> %a, <1 x double> %b) + ret <1 x double> %1 +} + +declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>) +declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>) +declare <1 x double> @llvm.sqrt.v1f64(<1 x double>) +declare <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double>) +declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>) + +define i64 @test_vaddlv_s32(<2 x i32> %a) { +; CHECK-LABEL: test_vaddlv_s32 +; CHECK: saddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s + %1 = tail call <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i64> %1, i32 0 + ret i64 %2 +} + +define i64 @test_vaddlv_u32(<2 x i32> %a) { +; CHECK-LABEL: test_vaddlv_u32 +; CHECK: uaddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s + %1 = tail call <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v2i32(<2 x i32> %a) + %2 = extractelement <1 x i64> %1, i32 0 + ret i64 %2 +} + +declare <1 x i64> @llvm.aarch64.neon.saddlv.v1i64.v2i32(<2 x i32>) +declare <1 x i64> @llvm.aarch64.neon.uaddlv.v1i64.v2i32(<2 x i32>) \ No newline at end of file diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-mla-mls.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-mla-mls.ll index 23e9223a8b7b..71bb0e70abfa 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-mla-mls.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-mla-mls.ll @@ -2,84 +2,84 @@ define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) { -;CHECK: mla {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: mla {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = mul <8 x i8> %A, %B; %tmp2 = add <8 x i8> %C, %tmp1; ret <8 x i8> %tmp2 } define <16 x i8> @mla16xi8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { -;CHECK: mla {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: mla {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = mul <16 x i8> %A, %B; %tmp2 = add <16 x i8> %C, %tmp1; ret <16 x i8> %tmp2 } define <4 x i16> @mla4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) { -;CHECK: mla {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +;CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h %tmp1 = mul <4 x i16> %A, %B; %tmp2 = add <4 x i16> %C, %tmp1; ret <4 x i16> %tmp2 } define <8 x i16> @mla8xi16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) { -;CHECK: mla {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +;CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h %tmp1 = mul <8 x i16> %A, %B; %tmp2 = add <8 x i16> %C, %tmp1; ret <8 x i16> %tmp2 } define <2 x i32> @mla2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) { -;CHECK: mla {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp1 = mul <2 x i32> %A, %B; %tmp2 = add <2 x i32> %C, %tmp1; ret <2 x i32> %tmp2 } define <4 x i32> @mla4xi32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { -;CHECK: mla {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp1 = mul <4 x i32> %A, %B; %tmp2 = add <4 x i32> %C, %tmp1; ret <4 x i32> %tmp2 } define <8 x i8> @mls8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) { -;CHECK: mls {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: mls {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp1 = mul <8 x i8> %A, %B; %tmp2 = sub <8 x i8> %C, %tmp1; ret <8 x i8> %tmp2 } define <16 x i8> @mls16xi8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { -;CHECK: mls {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: mls {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp1 = mul <16 x i8> %A, %B; %tmp2 = sub <16 x i8> %C, %tmp1; ret <16 x i8> %tmp2 } define <4 x i16> @mls4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) { -;CHECK: mls {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +;CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h %tmp1 = mul <4 x i16> %A, %B; %tmp2 = sub <4 x i16> %C, %tmp1; ret <4 x i16> %tmp2 } define <8 x i16> @mls8xi16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) { -;CHECK: mls {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +;CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h %tmp1 = mul <8 x i16> %A, %B; %tmp2 = sub <8 x i16> %C, %tmp1; ret <8 x i16> %tmp2 } define <2 x i32> @mls2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) { -;CHECK: mls {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp1 = mul <2 x i32> %A, %B; %tmp2 = sub <2 x i32> %C, %tmp1; ret <2 x i32> %tmp2 } define <4 x i32> @mls4xi32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { -;CHECK: mls {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp1 = mul <4 x i32> %A, %B; %tmp2 = sub <4 x i32> %C, %tmp1; ret <4 x i32> %tmp2 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-mov.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-mov.ll index 42f6a894da64..4035b914b569 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-mov.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-mov.ll @@ -1,205 +1,219 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @movi8b() { -;CHECK: movi {{v[0-31]+}}.8b, #0x8 +;CHECK: movi {{v[0-9]+}}.8b, #0x8 ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > } define <16 x i8> @movi16b() { -;CHECK: movi {{v[0-31]+}}.16b, #0x8 +;CHECK: movi {{v[0-9]+}}.16b, #0x8 ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > } define <2 x i32> @movi2s_lsl0() { -;CHECK: movi {{v[0-31]+}}.2s, #0xff +;CHECK: movi {{v[0-9]+}}.2s, #0xff ret <2 x i32> < i32 255, i32 255 > } define <2 x i32> @movi2s_lsl8() { -;CHECK: movi {{v[0-31]+}}.2s, #0xff, lsl #8 +;CHECK: movi {{v[0-9]+}}.2s, #0xff, lsl #8 ret <2 x i32> < i32 65280, i32 65280 > } define <2 x i32> @movi2s_lsl16() { -;CHECK: movi {{v[0-31]+}}.2s, #0xff, lsl #16 +;CHECK: movi {{v[0-9]+}}.2s, #0xff, lsl #16 ret <2 x i32> < i32 16711680, i32 16711680 > } define <2 x i32> @movi2s_lsl24() { -;CHECK: movi {{v[0-31]+}}.2s, #0xff, lsl #24 +;CHECK: movi {{v[0-9]+}}.2s, #0xff, lsl #24 ret <2 x i32> < i32 4278190080, i32 4278190080 > } define <4 x i32> @movi4s_lsl0() { -;CHECK: movi {{v[0-31]+}}.4s, #0xff +;CHECK: movi {{v[0-9]+}}.4s, #0xff ret <4 x i32> < i32 255, i32 255, i32 255, i32 255 > } define <4 x i32> @movi4s_lsl8() { -;CHECK: movi {{v[0-31]+}}.4s, #0xff, lsl #8 +;CHECK: movi {{v[0-9]+}}.4s, #0xff, lsl #8 ret <4 x i32> < i32 65280, i32 65280, i32 65280, i32 65280 > } define <4 x i32> @movi4s_lsl16() { -;CHECK: movi {{v[0-31]+}}.4s, #0xff, lsl #16 +;CHECK: movi {{v[0-9]+}}.4s, #0xff, lsl #16 ret <4 x i32> < i32 16711680, i32 16711680, i32 16711680, i32 16711680 > } define <4 x i32> @movi4s_lsl24() { -;CHECK: movi {{v[0-31]+}}.4s, #0xff, lsl #24 +;CHECK: movi {{v[0-9]+}}.4s, #0xff, lsl #24 ret <4 x i32> < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080 > } define <4 x i16> @movi4h_lsl0() { -;CHECK: movi {{v[0-31]+}}.4h, #0xff +;CHECK: movi {{v[0-9]+}}.4h, #0xff ret <4 x i16> < i16 255, i16 255, i16 255, i16 255 > } define <4 x i16> @movi4h_lsl8() { -;CHECK: movi {{v[0-31]+}}.4h, #0xff, lsl #8 +;CHECK: movi {{v[0-9]+}}.4h, #0xff, lsl #8 ret <4 x i16> < i16 65280, i16 65280, i16 65280, i16 65280 > } define <8 x i16> @movi8h_lsl0() { -;CHECK: movi {{v[0-31]+}}.8h, #0xff +;CHECK: movi {{v[0-9]+}}.8h, #0xff ret <8 x i16> < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 > } define <8 x i16> @movi8h_lsl8() { -;CHECK: movi {{v[0-31]+}}.8h, #0xff, lsl #8 +;CHECK: movi {{v[0-9]+}}.8h, #0xff, lsl #8 ret <8 x i16> < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 > } define <2 x i32> @mvni2s_lsl0() { -;CHECK: mvni {{v[0-31]+}}.2s, #0x10 +;CHECK: mvni {{v[0-9]+}}.2s, #0x10 ret <2 x i32> < i32 4294967279, i32 4294967279 > } define <2 x i32> @mvni2s_lsl8() { -;CHECK: mvni {{v[0-31]+}}.2s, #0x10, lsl #8 +;CHECK: mvni {{v[0-9]+}}.2s, #0x10, lsl #8 ret <2 x i32> < i32 4294963199, i32 4294963199 > } define <2 x i32> @mvni2s_lsl16() { -;CHECK: mvni {{v[0-31]+}}.2s, #0x10, lsl #16 +;CHECK: mvni {{v[0-9]+}}.2s, #0x10, lsl #16 ret <2 x i32> < i32 4293918719, i32 4293918719 > } define <2 x i32> @mvni2s_lsl24() { -;CHECK: mvni {{v[0-31]+}}.2s, #0x10, lsl #24 +;CHECK: mvni {{v[0-9]+}}.2s, #0x10, lsl #24 ret <2 x i32> < i32 4026531839, i32 4026531839 > } define <4 x i32> @mvni4s_lsl0() { -;CHECK: mvni {{v[0-31]+}}.4s, #0x10 +;CHECK: mvni {{v[0-9]+}}.4s, #0x10 ret <4 x i32> < i32 4294967279, i32 4294967279, i32 4294967279, i32 4294967279 > } define <4 x i32> @mvni4s_lsl8() { -;CHECK: mvni {{v[0-31]+}}.4s, #0x10, lsl #8 +;CHECK: mvni {{v[0-9]+}}.4s, #0x10, lsl #8 ret <4 x i32> < i32 4294963199, i32 4294963199, i32 4294963199, i32 4294963199 > } define <4 x i32> @mvni4s_lsl16() { -;CHECK: mvni {{v[0-31]+}}.4s, #0x10, lsl #16 +;CHECK: mvni {{v[0-9]+}}.4s, #0x10, lsl #16 ret <4 x i32> < i32 4293918719, i32 4293918719, i32 4293918719, i32 4293918719 > } define <4 x i32> @mvni4s_lsl24() { -;CHECK: mvni {{v[0-31]+}}.4s, #0x10, lsl #24 +;CHECK: mvni {{v[0-9]+}}.4s, #0x10, lsl #24 ret <4 x i32> < i32 4026531839, i32 4026531839, i32 4026531839, i32 4026531839 > } define <4 x i16> @mvni4h_lsl0() { -;CHECK: mvni {{v[0-31]+}}.4h, #0x10 +;CHECK: mvni {{v[0-9]+}}.4h, #0x10 ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 > } define <4 x i16> @mvni4h_lsl8() { -;CHECK: mvni {{v[0-31]+}}.4h, #0x10, lsl #8 +;CHECK: mvni {{v[0-9]+}}.4h, #0x10, lsl #8 ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 > } define <8 x i16> @mvni8h_lsl0() { -;CHECK: mvni {{v[0-31]+}}.8h, #0x10 +;CHECK: mvni {{v[0-9]+}}.8h, #0x10 ret <8 x i16> < i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519, i16 65519 > } define <8 x i16> @mvni8h_lsl8() { -;CHECK: mvni {{v[0-31]+}}.8h, #0x10, lsl #8 +;CHECK: mvni {{v[0-9]+}}.8h, #0x10, lsl #8 ret <8 x i16> < i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439, i16 61439 > } define <2 x i32> @movi2s_msl8(<2 x i32> %a) { -;CHECK: movi {{v[0-31]+}}.2s, #0xff, msl #8 +;CHECK: movi {{v[0-9]+}}.2s, #0xff, msl #8 ret <2 x i32> < i32 65535, i32 65535 > } define <2 x i32> @movi2s_msl16() { -;CHECK: movi {{v[0-31]+}}.2s, #0xff, msl #16 +;CHECK: movi {{v[0-9]+}}.2s, #0xff, msl #16 ret <2 x i32> < i32 16777215, i32 16777215 > } define <4 x i32> @movi4s_msl8() { -;CHECK: movi {{v[0-31]+}}.4s, #0xff, msl #8 +;CHECK: movi {{v[0-9]+}}.4s, #0xff, msl #8 ret <4 x i32> < i32 65535, i32 65535, i32 65535, i32 65535 > } define <4 x i32> @movi4s_msl16() { -;CHECK: movi {{v[0-31]+}}.4s, #0xff, msl #16 +;CHECK: movi {{v[0-9]+}}.4s, #0xff, msl #16 ret <4 x i32> < i32 16777215, i32 16777215, i32 16777215, i32 16777215 > } define <2 x i32> @mvni2s_msl8() { -;CHECK: mvni {{v[0-31]+}}.2s, #0x10, msl #8 +;CHECK: mvni {{v[0-9]+}}.2s, #0x10, msl #8 ret <2 x i32> < i32 18446744073709547264, i32 18446744073709547264> } define <2 x i32> @mvni2s_msl16() { -;CHECK: mvni {{v[0-31]+}}.2s, #0x10, msl #16 +;CHECK: mvni {{v[0-9]+}}.2s, #0x10, msl #16 ret <2 x i32> < i32 18446744073708437504, i32 18446744073708437504> } define <4 x i32> @mvni4s_msl8() { -;CHECK: mvni {{v[0-31]+}}.4s, #0x10, msl #8 +;CHECK: mvni {{v[0-9]+}}.4s, #0x10, msl #8 ret <4 x i32> < i32 18446744073709547264, i32 18446744073709547264, i32 18446744073709547264, i32 18446744073709547264> } define <4 x i32> @mvni4s_msl16() { -;CHECK: mvni {{v[0-31]+}}.4s, #0x10, msl #16 +;CHECK: mvni {{v[0-9]+}}.4s, #0x10, msl #16 ret <4 x i32> < i32 18446744073708437504, i32 18446744073708437504, i32 18446744073708437504, i32 18446744073708437504> } define <2 x i64> @movi2d() { -;CHECK: movi {{v[0-31]+}}.2d, #0xff0000ff0000ffff +;CHECK: movi {{v[0-9]+}}.2d, #0xff0000ff0000ffff ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 > } define <1 x i64> @movid() { -;CHECK: movi {{d[0-31]+}}, #0xff0000ff0000ffff +;CHECK: movi {{d[0-9]+}}, #0xff0000ff0000ffff ret <1 x i64> < i64 18374687574888349695 > } define <2 x float> @fmov2s() { -;CHECK: fmov {{v[0-31]+}}.2s, #-12.00000000 +;CHECK: fmov {{v[0-9]+}}.2s, #-12.00000000 ret <2 x float> < float -1.2e1, float -1.2e1> } define <4 x float> @fmov4s() { -;CHECK: fmov {{v[0-31]+}}.4s, #-12.00000000 +;CHECK: fmov {{v[0-9]+}}.4s, #-12.00000000 ret <4 x float> < float -1.2e1, float -1.2e1, float -1.2e1, float -1.2e1> } define <2 x double> @fmov2d() { -;CHECK: fmov {{v[0-31]+}}.2d, #-12.00000000 +;CHECK: fmov {{v[0-9]+}}.2d, #-12.00000000 ret <2 x double> < double -1.2e1, double -1.2e1> } +define <2 x i32> @movi1d_1() { +; CHECK: movi d0, #0xffffffff0000 + ret <2 x i32> < i32 -65536, i32 65535> +} + + +declare <2 x i32> @test_movi1d(<2 x i32>, <2 x i32>) +define <2 x i32> @movi1d() { +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +; CHECK-NEXT: movi d1, #0xffffffff0000 + %1 = tail call <2 x i32> @test_movi1d(<2 x i32> , <2 x i32> ) + ret <2 x i32> %1 +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-mul-div.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-mul-div.ll index e1be31326638..09ba072f9f43 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-mul-div.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-mul-div.ll @@ -2,72 +2,87 @@ define <8 x i8> @mul8xi8(<8 x i8> %A, <8 x i8> %B) { -;CHECK: mul {{v[0-31]+}}.8b, {{v[0-31]+}}.8b, {{v[0-31]+}}.8b +;CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b %tmp3 = mul <8 x i8> %A, %B; ret <8 x i8> %tmp3 } define <16 x i8> @mul16xi8(<16 x i8> %A, <16 x i8> %B) { -;CHECK: mul {{v[0-31]+}}.16b, {{v[0-31]+}}.16b, {{v[0-31]+}}.16b +;CHECK: mul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b %tmp3 = mul <16 x i8> %A, %B; ret <16 x i8> %tmp3 } define <4 x i16> @mul4xi16(<4 x i16> %A, <4 x i16> %B) { -;CHECK: mul {{v[0-31]+}}.4h, {{v[0-31]+}}.4h, {{v[0-31]+}}.4h +;CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h %tmp3 = mul <4 x i16> %A, %B; ret <4 x i16> %tmp3 } define <8 x i16> @mul8xi16(<8 x i16> %A, <8 x i16> %B) { -;CHECK: mul {{v[0-31]+}}.8h, {{v[0-31]+}}.8h, {{v[0-31]+}}.8h +;CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h %tmp3 = mul <8 x i16> %A, %B; ret <8 x i16> %tmp3 } define <2 x i32> @mul2xi32(<2 x i32> %A, <2 x i32> %B) { -;CHECK: mul {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp3 = mul <2 x i32> %A, %B; ret <2 x i32> %tmp3 } define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) { -;CHECK: mul {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp3 = mul <4 x i32> %A, %B; ret <4 x i32> %tmp3 } +define <1 x i64> @mul1xi64(<1 x i64> %A, <1 x i64> %B) { +;CHECK-LABEL: mul1xi64: +;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} + %tmp3 = mul <1 x i64> %A, %B; + ret <1 x i64> %tmp3 +} + +define <2 x i64> @mul2xi64(<2 x i64> %A, <2 x i64> %B) { +;CHECK-LABEL: mul2xi64: +;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} +;CHECK: mul x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} + %tmp3 = mul <2 x i64> %A, %B; + ret <2 x i64> %tmp3 +} + define <2 x float> @mul2xfloat(<2 x float> %A, <2 x float> %B) { -;CHECK: fmul {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp3 = fmul <2 x float> %A, %B; ret <2 x float> %tmp3 } define <4 x float> @mul4xfloat(<4 x float> %A, <4 x float> %B) { -;CHECK: fmul {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp3 = fmul <4 x float> %A, %B; ret <4 x float> %tmp3 } define <2 x double> @mul2xdouble(<2 x double> %A, <2 x double> %B) { -;CHECK: fmul {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %tmp3 = fmul <2 x double> %A, %B; ret <2 x double> %tmp3 } define <2 x float> @div2xfloat(<2 x float> %A, <2 x float> %B) { -;CHECK: fdiv {{v[0-31]+}}.2s, {{v[0-31]+}}.2s, {{v[0-31]+}}.2s +;CHECK: fdiv {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s %tmp3 = fdiv <2 x float> %A, %B; ret <2 x float> %tmp3 } define <4 x float> @div4xfloat(<4 x float> %A, <4 x float> %B) { -;CHECK: fdiv {{v[0-31]+}}.4s, {{v[0-31]+}}.4s, {{v[0-31]+}}.4s +;CHECK: fdiv {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s %tmp3 = fdiv <4 x float> %A, %B; ret <4 x float> %tmp3 } define <2 x double> @div2xdouble(<2 x double> %A, <2 x double> %B) { -;CHECK: fdiv {{v[0-31]+}}.2d, {{v[0-31]+}}.2d, {{v[0-31]+}}.2d +;CHECK: fdiv {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %tmp3 = fdiv <2 x double> %A, %B; ret <2 x double> %tmp3 } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-add-sub.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-add-sub.ll index 09ca880c8053..4f322e081839 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-add-sub.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-add-sub.ll @@ -1,13 +1,13 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) { -;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} %tmp3 = add <1 x i64> %A, %B; ret <1 x i64> %tmp3 } define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) { -;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} %tmp3 = sub <1 x i64> %A, %B; ret <1 x i64> %tmp3 } @@ -18,14 +18,14 @@ declare <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64>, <1 x i64>) define <1 x i64> @test_add_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_add_v1i64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +; CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } define <1 x i64> @test_uadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_uadd_v1i64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } @@ -35,14 +35,14 @@ declare <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64>, <1 x i64>) define <1 x i64> @test_sub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_sub_v1i64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +; CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } define <1 x i64> @test_usub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_usub_v1i64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll index 8ce42def409a..247514cd5bc4 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll @@ -5,7 +5,7 @@ declare double @llvm.fma.f64(double, double, double) define float @test_fmla_ss4S(float %a, float %b, <4 x float> %v) { ; CHECK: test_fmla_ss4S - ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3] + ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] %tmp1 = extractelement <4 x float> %v, i32 3 %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a) ret float %tmp2 @@ -13,7 +13,7 @@ define float @test_fmla_ss4S(float %a, float %b, <4 x float> %v) { define float @test_fmla_ss4S_swap(float %a, float %b, <4 x float> %v) { ; CHECK: test_fmla_ss4S_swap - ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3] + ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] %tmp1 = extractelement <4 x float> %v, i32 3 %tmp2 = call float @llvm.fma.f32(float %tmp1, float %a, float %a) ret float %tmp2 @@ -21,7 +21,7 @@ define float @test_fmla_ss4S_swap(float %a, float %b, <4 x float> %v) { define float @test_fmla_ss2S(float %a, float %b, <2 x float> %v) { ; CHECK: test_fmla_ss2S - ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[1] + ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] %tmp1 = extractelement <2 x float> %v, i32 1 %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a) ret float %tmp2 @@ -29,7 +29,7 @@ define float @test_fmla_ss2S(float %a, float %b, <2 x float> %v) { define double @test_fmla_ddD(double %a, double %b, <1 x double> %v) { ; CHECK: test_fmla_ddD - ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[0] + ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] %tmp1 = extractelement <1 x double> %v, i32 0 %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a) ret double %tmp2 @@ -37,7 +37,7 @@ define double @test_fmla_ddD(double %a, double %b, <1 x double> %v) { define double @test_fmla_dd2D(double %a, double %b, <2 x double> %v) { ; CHECK: test_fmla_dd2D - ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1] + ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a) ret double %tmp2 @@ -45,7 +45,7 @@ define double @test_fmla_dd2D(double %a, double %b, <2 x double> %v) { define double @test_fmla_dd2D_swap(double %a, double %b, <2 x double> %v) { ; CHECK: test_fmla_dd2D_swap - ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1] + ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a) ret double %tmp2 @@ -53,7 +53,7 @@ define double @test_fmla_dd2D_swap(double %a, double %b, <2 x double> %v) { define float @test_fmls_ss4S(float %a, float %b, <4 x float> %v) { ; CHECK: test_fmls_ss4S - ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3] + ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] %tmp1 = extractelement <4 x float> %v, i32 3 %tmp2 = fsub float -0.0, %tmp1 %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a) @@ -62,7 +62,7 @@ define float @test_fmls_ss4S(float %a, float %b, <4 x float> %v) { define float @test_fmls_ss4S_swap(float %a, float %b, <4 x float> %v) { ; CHECK: test_fmls_ss4S_swap - ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[3] + ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] %tmp1 = extractelement <4 x float> %v, i32 3 %tmp2 = fsub float -0.0, %tmp1 %tmp3 = call float @llvm.fma.f32(float %tmp1, float %tmp2, float %a) @@ -72,7 +72,7 @@ define float @test_fmls_ss4S_swap(float %a, float %b, <4 x float> %v) { define float @test_fmls_ss2S(float %a, float %b, <2 x float> %v) { ; CHECK: test_fmls_ss2S - ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-31]+}}.s[1] + ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] %tmp1 = extractelement <2 x float> %v, i32 1 %tmp2 = fsub float -0.0, %tmp1 %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a) @@ -81,7 +81,7 @@ define float @test_fmls_ss2S(float %a, float %b, <2 x float> %v) { define double @test_fmls_ddD(double %a, double %b, <1 x double> %v) { ; CHECK: test_fmls_ddD - ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[0] + ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] %tmp1 = extractelement <1 x double> %v, i32 0 %tmp2 = fsub double -0.0, %tmp1 %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a) @@ -90,7 +90,7 @@ define double @test_fmls_ddD(double %a, double %b, <1 x double> %v) { define double @test_fmls_dd2D(double %a, double %b, <2 x double> %v) { ; CHECK: test_fmls_dd2D - ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1] + ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 %tmp2 = fsub double -0.0, %tmp1 %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a) @@ -99,7 +99,7 @@ define double @test_fmls_dd2D(double %a, double %b, <2 x double> %v) { define double @test_fmls_dd2D_swap(double %a, double %b, <2 x double> %v) { ; CHECK: test_fmls_dd2D_swap - ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-31]+}}.d[1] + ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 %tmp2 = fsub double -0.0, %tmp1 %tmp3 = call double @llvm.fma.f64(double %tmp1, double %tmp2, double %a) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll index 968ad3e8cf71..c9128e7e5351 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll @@ -2,7 +2,7 @@ define float @test_fmul_lane_ss2S(float %a, <2 x float> %v) { ; CHECK: test_fmul_lane_ss2S - ; CHECK: fmul {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[1] + ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] %tmp1 = extractelement <2 x float> %v, i32 1 %tmp2 = fmul float %a, %tmp1; ret float %tmp2; @@ -10,7 +10,7 @@ define float @test_fmul_lane_ss2S(float %a, <2 x float> %v) { define float @test_fmul_lane_ss2S_swap(float %a, <2 x float> %v) { ; CHECK: test_fmul_lane_ss2S_swap - ; CHECK: fmul {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[1] + ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] %tmp1 = extractelement <2 x float> %v, i32 1 %tmp2 = fmul float %tmp1, %a; ret float %tmp2; @@ -19,7 +19,7 @@ define float @test_fmul_lane_ss2S_swap(float %a, <2 x float> %v) { define float @test_fmul_lane_ss4S(float %a, <4 x float> %v) { ; CHECK: test_fmul_lane_ss4S - ; CHECK: fmul {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[3] + ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] %tmp1 = extractelement <4 x float> %v, i32 3 %tmp2 = fmul float %a, %tmp1; ret float %tmp2; @@ -27,7 +27,7 @@ define float @test_fmul_lane_ss4S(float %a, <4 x float> %v) { define float @test_fmul_lane_ss4S_swap(float %a, <4 x float> %v) { ; CHECK: test_fmul_lane_ss4S_swap - ; CHECK: fmul {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[3] + ; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] %tmp1 = extractelement <4 x float> %v, i32 3 %tmp2 = fmul float %tmp1, %a; ret float %tmp2; @@ -36,7 +36,7 @@ define float @test_fmul_lane_ss4S_swap(float %a, <4 x float> %v) { define double @test_fmul_lane_ddD(double %a, <1 x double> %v) { ; CHECK: test_fmul_lane_ddD - ; CHECK: fmul {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[0] + ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] %tmp1 = extractelement <1 x double> %v, i32 0 %tmp2 = fmul double %a, %tmp1; ret double %tmp2; @@ -46,7 +46,7 @@ define double @test_fmul_lane_ddD(double %a, <1 x double> %v) { define double @test_fmul_lane_dd2D(double %a, <2 x double> %v) { ; CHECK: test_fmul_lane_dd2D - ; CHECK: fmul {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[1] + ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 %tmp2 = fmul double %a, %tmp1; ret double %tmp2; @@ -55,7 +55,7 @@ define double @test_fmul_lane_dd2D(double %a, <2 x double> %v) { define double @test_fmul_lane_dd2D_swap(double %a, <2 x double> %v) { ; CHECK: test_fmul_lane_dd2D_swap - ; CHECK: fmul {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[1] + ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 %tmp2 = fmul double %tmp1, %a; ret double %tmp2; @@ -65,7 +65,7 @@ declare float @llvm.aarch64.neon.vmulx.f32(float, float) define float @test_fmulx_lane_f32(float %a, <2 x float> %v) { ; CHECK: test_fmulx_lane_f32 - ; CHECK: fmulx {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[1] + ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] %tmp1 = extractelement <2 x float> %v, i32 1 %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %tmp1) ret float %tmp2; @@ -73,7 +73,7 @@ define float @test_fmulx_lane_f32(float %a, <2 x float> %v) { define float @test_fmulx_laneq_f32(float %a, <4 x float> %v) { ; CHECK: test_fmulx_laneq_f32 - ; CHECK: fmulx {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[3] + ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] %tmp1 = extractelement <4 x float> %v, i32 3 %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %a, float %tmp1) ret float %tmp2; @@ -81,7 +81,7 @@ define float @test_fmulx_laneq_f32(float %a, <4 x float> %v) { define float @test_fmulx_laneq_f32_swap(float %a, <4 x float> %v) { ; CHECK: test_fmulx_laneq_f32_swap - ; CHECK: fmulx {{s[0-31]+}}, {{s[0-31]+}}, {{v[0-31]+}}.s[3] + ; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] %tmp1 = extractelement <4 x float> %v, i32 3 %tmp2 = call float @llvm.aarch64.neon.vmulx.f32(float %tmp1, float %a) ret float %tmp2; @@ -91,7 +91,7 @@ declare double @llvm.aarch64.neon.vmulx.f64(double, double) define double @test_fmulx_lane_f64(double %a, <1 x double> %v) { ; CHECK: test_fmulx_lane_f64 - ; CHECK: fmulx {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[0] + ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] %tmp1 = extractelement <1 x double> %v, i32 0 %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1) ret double %tmp2; @@ -99,7 +99,7 @@ define double @test_fmulx_lane_f64(double %a, <1 x double> %v) { define double @test_fmulx_laneq_f64_0(double %a, <2 x double> %v) { ; CHECK: test_fmulx_laneq_f64_0 - ; CHECK: fmulx {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[0] + ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] %tmp1 = extractelement <2 x double> %v, i32 0 %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1) ret double %tmp2; @@ -108,7 +108,7 @@ define double @test_fmulx_laneq_f64_0(double %a, <2 x double> %v) { define double @test_fmulx_laneq_f64_1(double %a, <2 x double> %v) { ; CHECK: test_fmulx_laneq_f64_1 - ; CHECK: fmulx {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[1] + ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %a, double %tmp1) ret double %tmp2; @@ -116,7 +116,7 @@ define double @test_fmulx_laneq_f64_1(double %a, <2 x double> %v) { define double @test_fmulx_laneq_f64_1_swap(double %a, <2 x double> %v) { ; CHECK: test_fmulx_laneq_f64_1_swap - ; CHECK: fmulx {{d[0-31]+}}, {{d[0-31]+}}, {{v[0-31]+}}.d[1] + ; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] %tmp1 = extractelement <2 x double> %v, i32 1 %tmp2 = call double @llvm.aarch64.neon.vmulx.f64(double %tmp1, double %a) ret double %tmp2; diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-compare.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-compare.ll index a1cfdf0b5c71..d1b5f9c2546e 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-compare.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-compare.ll @@ -118,6 +118,221 @@ entry: ret i64 %0 } + +define <1 x i64> @test_vcage_f64(<1 x double> %a, <1 x double> %b) #0 { +; CHECK: test_vcage_f64 +; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %vcage2.i = tail call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %a, <1 x double> %b) #2 + ret <1 x i64> %vcage2.i +} + +define <1 x i64> @test_vcagt_f64(<1 x double> %a, <1 x double> %b) #0 { +; CHECK: test_vcagt_f64 +; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %vcagt2.i = tail call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %a, <1 x double> %b) #2 + ret <1 x i64> %vcagt2.i +} + +define <1 x i64> @test_vcale_f64(<1 x double> %a, <1 x double> %b) #0 { +; CHECK: test_vcale_f64 +; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %vcage2.i = tail call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %b, <1 x double> %a) #2 + ret <1 x i64> %vcage2.i +} + +define <1 x i64> @test_vcalt_f64(<1 x double> %a, <1 x double> %b) #0 { +; CHECK: test_vcalt_f64 +; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %vcagt2.i = tail call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %b, <1 x double> %a) #2 + ret <1 x i64> %vcagt2.i +} + +define <1 x i64> @test_vceq_s64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK: test_vceq_s64 +; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = icmp eq <1 x i64> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vceq_u64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK: test_vceq_u64 +; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = icmp eq <1 x i64> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vceq_f64(<1 x double> %a, <1 x double> %b) #0 { +; CHECK: test_vceq_f64 +; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = fcmp oeq <1 x double> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vcge_s64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK: test_vcge_s64 +; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = icmp sge <1 x i64> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vcge_u64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK: test_vcge_u64 +; CHECK: cmhs {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = icmp uge <1 x i64> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vcge_f64(<1 x double> %a, <1 x double> %b) #0 { +; CHECK: test_vcge_f64 +; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = fcmp oge <1 x double> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vcle_s64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK: test_vcle_s64 +; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = icmp sle <1 x i64> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vcle_u64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK: test_vcle_u64 +; CHECK: cmhs {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = icmp ule <1 x i64> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vcle_f64(<1 x double> %a, <1 x double> %b) #0 { +; CHECK: test_vcle_f64 +; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = fcmp ole <1 x double> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vcgt_s64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK: test_vcgt_s64 +; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = icmp sgt <1 x i64> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vcgt_u64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK: test_vcgt_u64 +; CHECK: cmhi {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = icmp ugt <1 x i64> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vcgt_f64(<1 x double> %a, <1 x double> %b) #0 { +; CHECK: test_vcgt_f64 +; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = fcmp ogt <1 x double> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vclt_s64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK: test_vclt_s64 +; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = icmp slt <1 x i64> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vclt_u64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK: test_vclt_u64 +; CHECK: cmhi {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = icmp ult <1 x i64> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vclt_f64(<1 x double> %a, <1 x double> %b) #0 { +; CHECK: test_vclt_f64 +; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} + %cmp.i = fcmp olt <1 x double> %a, %b + %sext.i = sext <1 x i1> %cmp.i to <1 x i64> + ret <1 x i64> %sext.i +} + +define <1 x i64> @test_vceqz_s64(<1 x i64> %a) #0 { +; CHECK: test_vceqz_s64 +; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0 + %1 = icmp eq <1 x i64> %a, zeroinitializer + %vceqz.i = sext <1 x i1> %1 to <1 x i64> + ret <1 x i64> %vceqz.i +} + +define <1 x i64> @test_vceqz_u64(<1 x i64> %a) #0 { +; CHECK: test_vceqz_u64 +; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0 + %1 = icmp eq <1 x i64> %a, zeroinitializer + %vceqz.i = sext <1 x i1> %1 to <1 x i64> + ret <1 x i64> %vceqz.i +} + +define <1 x i64> @test_vceqz_p64(<1 x i64> %a) #0 { +; CHECK: test_vceqz_p64 +; CHECK: cmeq {{d[0-9]}}, {{d[0-9]}}, #0x0 + %1 = icmp eq <1 x i64> %a, zeroinitializer + %vceqz.i = sext <1 x i1> %1 to <1 x i64> + ret <1 x i64> %vceqz.i +} + +define <2 x i64> @test_vceqzq_p64(<2 x i64> %a) #0 { +; CHECK: test_vceqzq_p64 +; CHECK: cmeq {{v[0-9]}}.2d, {{v[0-9]}}.2d, #0 + %1 = icmp eq <2 x i64> %a, zeroinitializer + %vceqz.i = sext <2 x i1> %1 to <2 x i64> + ret <2 x i64> %vceqz.i +} + +define <1 x i64> @test_vcgez_s64(<1 x i64> %a) #0 { +; CHECK: test_vcgez_s64 +; CHECK: cmge {{d[0-9]}}, {{d[0-9]}}, #0x0 + %1 = icmp sge <1 x i64> %a, zeroinitializer + %vcgez.i = sext <1 x i1> %1 to <1 x i64> + ret <1 x i64> %vcgez.i +} + +define <1 x i64> @test_vclez_s64(<1 x i64> %a) #0 { +; CHECK: test_vclez_s64 +; CHECK: cmle {{d[0-9]}}, {{d[0-9]}}, #0x0 + %1 = icmp sle <1 x i64> %a, zeroinitializer + %vclez.i = sext <1 x i1> %1 to <1 x i64> + ret <1 x i64> %vclez.i +} + +define <1 x i64> @test_vcgtz_s64(<1 x i64> %a) #0 { +; CHECK: test_vcgtz_s64 +; CHECK: cmgt {{d[0-9]}}, {{d[0-9]}}, #0x0 + %1 = icmp sgt <1 x i64> %a, zeroinitializer + %vcgtz.i = sext <1 x i1> %1 to <1 x i64> + ret <1 x i64> %vcgtz.i +} + +define <1 x i64> @test_vcltz_s64(<1 x i64> %a) #0 { +; CHECK: test_vcltz_s64 +; CHECK: cmlt {{d[0-9]}}, {{d[0-9]}}, #0 + %1 = icmp slt <1 x i64> %a, zeroinitializer + %vcltz.i = sext <1 x i1> %1 to <1 x i64> + ret <1 x i64> %vcltz.i +} + +declare <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) +declare <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) declare <1 x i64> @llvm.aarch64.neon.vtstd.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) declare <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) declare <1 x i64> @llvm.aarch64.neon.vchs.v1i64.v1i64.v1i64(<1 x i64>, <1 x i64>) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-copy.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-copy.ll index d433ff595d1c..fadd73484e7b 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-copy.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-copy.ll @@ -2,21 +2,30 @@ define float @test_dup_sv2S(<2 x float> %v) { ;CHECK: test_dup_sv2S - ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[1] + ;CHECK: dup {{s[0-9]+}}, {{v[0-9]+}}.s[1] %tmp1 = extractelement <2 x float> %v, i32 1 ret float %tmp1 } +define float @test_dup_sv2S_0(<2 x float> %v) { + ;CHECK-LABEL: test_dup_sv2S_0 + ;CHECK-NOT: dup {{s[0-9]+}}, {{v[0-9]+}}.s[0] + ;CHECK: ret + %tmp1 = extractelement <2 x float> %v, i32 0 + ret float %tmp1 +} + define float @test_dup_sv4S(<4 x float> %v) { - ;CHECK: test_dup_sv4S - ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[0] + ;CHECK-LABEL: test_dup_sv4S + ;CHECK-NOT: dup {{s[0-9]+}}, {{v[0-9]+}}.s[0] + ;CHECK: ret %tmp1 = extractelement <4 x float> %v, i32 0 ret float %tmp1 } define double @test_dup_dvD(<1 x double> %v) { ;CHECK: test_dup_dvD - ;CHECK-NOT: dup {{d[0-31]+}}, {{v[0-31]+}}.d[0] + ;CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[0] ;CHECK: ret %tmp1 = extractelement <1 x double> %v, i32 0 ret double %tmp1 @@ -24,63 +33,71 @@ define double @test_dup_dvD(<1 x double> %v) { define double @test_dup_dv2D(<2 x double> %v) { ;CHECK: test_dup_dv2D - ;CHECK: dup {{d[0-31]+}}, {{v[0-31]+}}.d[1] + ;CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] + %tmp1 = extractelement <2 x double> %v, i32 1 + ret double %tmp1 +} + +define double @test_dup_dv2D_0(<2 x double> %v) { + ;CHECK: test_dup_dv2D_0 + ;CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[0] + ;CHECK: ret %tmp1 = extractelement <2 x double> %v, i32 1 ret double %tmp1 } define <1 x i8> @test_vector_dup_bv16B(<16 x i8> %v1) { ;CHECK: test_vector_dup_bv16B - ;CHECK: dup {{b[0-31]+}}, {{v[0-31]+}}.b[14] + ;CHECK: dup {{b[0-9]+}}, {{v[0-9]+}}.b[14] %shuffle.i = shufflevector <16 x i8> %v1, <16 x i8> undef, <1 x i32> ret <1 x i8> %shuffle.i } define <1 x i8> @test_vector_dup_bv8B(<8 x i8> %v1) { ;CHECK: test_vector_dup_bv8B - ;CHECK: dup {{b[0-31]+}}, {{v[0-31]+}}.b[7] + ;CHECK: dup {{b[0-9]+}}, {{v[0-9]+}}.b[7] %shuffle.i = shufflevector <8 x i8> %v1, <8 x i8> undef, <1 x i32> ret <1 x i8> %shuffle.i } define <1 x i16> @test_vector_dup_hv8H(<8 x i16> %v1) { ;CHECK: test_vector_dup_hv8H - ;CHECK: dup {{h[0-31]+}}, {{v[0-31]+}}.h[7] + ;CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7] %shuffle.i = shufflevector <8 x i16> %v1, <8 x i16> undef, <1 x i32> ret <1 x i16> %shuffle.i } define <1 x i16> @test_vector_dup_hv4H(<4 x i16> %v1) { ;CHECK: test_vector_dup_hv4H - ;CHECK: dup {{h[0-31]+}}, {{v[0-31]+}}.h[3] + ;CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3] %shuffle.i = shufflevector <4 x i16> %v1, <4 x i16> undef, <1 x i32> ret <1 x i16> %shuffle.i } define <1 x i32> @test_vector_dup_sv4S(<4 x i32> %v1) { ;CHECK: test_vector_dup_sv4S - ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[3] + ;CHECK: dup {{s[0-9]+}}, {{v[0-9]+}}.s[3] %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <1 x i32> ret <1 x i32> %shuffle } define <1 x i32> @test_vector_dup_sv2S(<2 x i32> %v1) { ;CHECK: test_vector_dup_sv2S - ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[1] + ;CHECK: dup {{s[0-9]+}}, {{v[0-9]+}}.s[1] %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <1 x i32> ret <1 x i32> %shuffle } define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) { ;CHECK: test_vector_dup_dv2D - ;CHECK: dup {{d[0-31]+}}, {{v[0-31]+}}.d[1] + ;CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] %shuffle.i = shufflevector <2 x i64> %v1, <2 x i64> undef, <1 x i32> ret <1 x i64> %shuffle.i } define <1 x i64> @test_vector_copy_dup_dv2D(<1 x i64> %a, <2 x i64> %c) { ;CHECK: test_vector_copy_dup_dv2D - ;CHECK: dup {{d[0-31]+}}, {{v[0-31]+}}.d[1] + ;CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] %vget_lane = extractelement <2 x i64> %c, i32 1 %vset_lane = insertelement <1 x i64> undef, i64 %vget_lane, i32 0 ret <1 x i64> %vset_lane diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-cvt.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-cvt.ll index a06d5d60a85b..3a19bed9f671 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-cvt.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-cvt.ll @@ -5,133 +5,129 @@ define float @test_vcvts_f32_s32(i32 %a) { ; CHECK: scvtf {{s[0-9]+}}, {{s[0-9]+}} entry: %vcvtf.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %0 = call float @llvm.aarch64.neon.vcvtf32.s32(<1 x i32> %vcvtf.i) + %0 = call float @llvm.aarch64.neon.vcvtint2fps.f32.v1i32(<1 x i32> %vcvtf.i) ret float %0 } -declare float @llvm.aarch64.neon.vcvtf32.s32(<1 x i32>) +declare float @llvm.aarch64.neon.vcvtint2fps.f32.v1i32(<1 x i32>) define double @test_vcvtd_f64_s64(i64 %a) { ; CHECK: test_vcvtd_f64_s64 ; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}} entry: %vcvtf.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %0 = call double @llvm.aarch64.neon.vcvtf64.s64(<1 x i64> %vcvtf.i) + %0 = call double @llvm.aarch64.neon.vcvtint2fps.f64.v1i64(<1 x i64> %vcvtf.i) ret double %0 } -declare double @llvm.aarch64.neon.vcvtf64.s64(<1 x i64>) +declare double @llvm.aarch64.neon.vcvtint2fps.f64.v1i64(<1 x i64>) define float @test_vcvts_f32_u32(i32 %a) { ; CHECK: test_vcvts_f32_u32 ; CHECK: ucvtf {{s[0-9]+}}, {{s[0-9]+}} entry: %vcvtf.i = insertelement <1 x i32> undef, i32 %a, i32 0 - %0 = call float @llvm.aarch64.neon.vcvtf32.u32(<1 x i32> %vcvtf.i) + %0 = call float @llvm.aarch64.neon.vcvtint2fpu.f32.v1i32(<1 x i32> %vcvtf.i) ret float %0 } -declare float @llvm.aarch64.neon.vcvtf32.u32(<1 x i32>) +declare float @llvm.aarch64.neon.vcvtint2fpu.f32.v1i32(<1 x i32>) define double @test_vcvtd_f64_u64(i64 %a) { ; CHECK: test_vcvtd_f64_u64 ; CHECK: ucvtf {{d[0-9]+}}, {{d[0-9]+}} entry: %vcvtf.i = insertelement <1 x i64> undef, i64 %a, i32 0 - %0 = call double @llvm.aarch64.neon.vcvtf64.u64(<1 x i64> %vcvtf.i) + %0 = call double @llvm.aarch64.neon.vcvtint2fpu.f64.v1i64(<1 x i64> %vcvtf.i) ret double %0 } -declare double @llvm.aarch64.neon.vcvtf64.u64(<1 x i64>) +declare double @llvm.aarch64.neon.vcvtint2fpu.f64.v1i64(<1 x i64>) define float @test_vcvts_n_f32_s32(i32 %a) { ; CHECK: test_vcvts_n_f32_s32 ; CHECK: scvtf {{s[0-9]+}}, {{s[0-9]+}}, #1 entry: %vcvtf = insertelement <1 x i32> undef, i32 %a, i32 0 - %0 = call float @llvm.aarch64.neon.vcvtf32.n.s32(<1 x i32> %vcvtf, i32 1) + %0 = call float @llvm.aarch64.neon.vcvtfxs2fp.n.f32.v1i32(<1 x i32> %vcvtf, i32 1) ret float %0 } -declare float @llvm.aarch64.neon.vcvtf32.n.s32(<1 x i32>, i32) +declare float @llvm.aarch64.neon.vcvtfxs2fp.n.f32.v1i32(<1 x i32>, i32) define double @test_vcvtd_n_f64_s64(i64 %a) { ; CHECK: test_vcvtd_n_f64_s64 ; CHECK: scvtf {{d[0-9]+}}, {{d[0-9]+}}, #1 entry: %vcvtf = insertelement <1 x i64> undef, i64 %a, i32 0 - %0 = call double @llvm.aarch64.neon.vcvtf64.n.s64(<1 x i64> %vcvtf, i32 1) + %0 = call double @llvm.aarch64.neon.vcvtfxs2fp.n.f64.v1i64(<1 x i64> %vcvtf, i32 1) ret double %0 } -declare double @llvm.aarch64.neon.vcvtf64.n.s64(<1 x i64>, i32) +declare double @llvm.aarch64.neon.vcvtfxs2fp.n.f64.v1i64(<1 x i64>, i32) define float @test_vcvts_n_f32_u32(i32 %a) { ; CHECK: test_vcvts_n_f32_u32 ; CHECK: ucvtf {{s[0-9]+}}, {{s[0-9]+}}, #1 entry: %vcvtf = insertelement <1 x i32> undef, i32 %a, i32 0 - %0 = call float @llvm.aarch64.neon.vcvtf32.n.u32(<1 x i32> %vcvtf, i32 1) + %0 = call float @llvm.aarch64.neon.vcvtfxu2fp.n.f32.v1i32(<1 x i32> %vcvtf, i32 1) ret float %0 } -declare float @llvm.aarch64.neon.vcvtf32.n.u32(<1 x i32>, i32) +declare float @llvm.aarch64.neon.vcvtfxu2fp.n.f32.v1i32(<1 x i32>, i32) define double @test_vcvtd_n_f64_u64(i64 %a) { ; CHECK: test_vcvtd_n_f64_u64 ; CHECK: ucvtf {{d[0-9]+}}, {{d[0-9]+}}, #1 entry: %vcvtf = insertelement <1 x i64> undef, i64 %a, i32 0 - %0 = call double @llvm.aarch64.neon.vcvtf64.n.u64(<1 x i64> %vcvtf, i32 1) + %0 = call double @llvm.aarch64.neon.vcvtfxu2fp.n.f64.v1i64(<1 x i64> %vcvtf, i32 1) ret double %0 } -declare double @llvm.aarch64.neon.vcvtf64.n.u64(<1 x i64>, i32) +declare double @llvm.aarch64.neon.vcvtfxu2fp.n.f64.v1i64(<1 x i64>, i32) define i32 @test_vcvts_n_s32_f32(float %a) { ; CHECK: test_vcvts_n_s32_f32 ; CHECK: fcvtzs {{s[0-9]+}}, {{s[0-9]+}}, #1 entry: - %fcvtzs = insertelement <1 x float> undef, float %a, i32 0 - %fcvtzs1 = call <1 x i32> @llvm.aarch64.neon.vcvts.n.s32.f32(<1 x float> %fcvtzs, i32 1) + %fcvtzs1 = call <1 x i32> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i32.f32(float %a, i32 1) %0 = extractelement <1 x i32> %fcvtzs1, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.vcvts.n.s32.f32(<1 x float>, i32) +declare <1 x i32> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i32.f32(float, i32) define i64 @test_vcvtd_n_s64_f64(double %a) { ; CHECK: test_vcvtd_n_s64_f64 ; CHECK: fcvtzs {{d[0-9]+}}, {{d[0-9]+}}, #1 entry: - %fcvtzs = insertelement <1 x double> undef, double %a, i32 0 - %fcvtzs1 = call <1 x i64> @llvm.aarch64.neon.vcvtd.n.s64.f64(<1 x double> %fcvtzs, i32 1) + %fcvtzs1 = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i64.f64(double %a, i32 1) %0 = extractelement <1 x i64> %fcvtzs1, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.vcvtd.n.s64.f64(<1 x double>, i32) +declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.n.v1i64.f64(double, i32) define i32 @test_vcvts_n_u32_f32(float %a) { ; CHECK: test_vcvts_n_u32_f32 ; CHECK: fcvtzu {{s[0-9]+}}, {{s[0-9]+}}, #32 entry: - %fcvtzu = insertelement <1 x float> undef, float %a, i32 0 - %fcvtzu1 = call <1 x i32> @llvm.aarch64.neon.vcvts.n.u32.f32(<1 x float> %fcvtzu, i32 32) + %fcvtzu1 = call <1 x i32> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i32.f32(float %a, i32 32) %0 = extractelement <1 x i32> %fcvtzu1, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.vcvts.n.u32.f32(<1 x float>, i32) +declare <1 x i32> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i32.f32(float, i32) define i64 @test_vcvtd_n_u64_f64(double %a) { ; CHECK: test_vcvtd_n_u64_f64 ; CHECK: fcvtzu {{d[0-9]+}}, {{d[0-9]+}}, #64 entry: - %fcvtzu = insertelement <1 x double> undef, double %a, i32 0 - %fcvtzu1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtd.n.u64.f64(<1 x double> %fcvtzu, i32 64) + %fcvtzu1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i64.f64(double %a, i32 64) %0 = extractelement <1 x i64> %fcvtzu1, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.vcvtd.n.u64.f64(<1 x double>, i32) +declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.n.v1i64.f64(double, i32) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-fabd.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-fabd.ll index 75686d32064b..6343310a3c02 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-fabd.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-fabd.ll @@ -4,10 +4,7 @@ define float @test_vabds_f32(float %a, float %b) { ; CHECK-LABEL: test_vabds_f32 ; CHECK: fabd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} entry: - %vabd.i = insertelement <1 x float> undef, float %a, i32 0 - %vabd1.i = insertelement <1 x float> undef, float %b, i32 0 - %vabd2.i = call <1 x float> @llvm.aarch64.neon.vabd.v1f32(<1 x float> %vabd.i, <1 x float> %vabd1.i) - %0 = extractelement <1 x float> %vabd2.i, i32 0 + %0 = call float @llvm.aarch64.neon.vabd.f32(float %a, float %a) ret float %0 } @@ -15,12 +12,9 @@ define double @test_vabdd_f64(double %a, double %b) { ; CHECK-LABEL: test_vabdd_f64 ; CHECK: fabd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} entry: - %vabd.i = insertelement <1 x double> undef, double %a, i32 0 - %vabd1.i = insertelement <1 x double> undef, double %b, i32 0 - %vabd2.i = call <1 x double> @llvm.aarch64.neon.vabd.v1f64(<1 x double> %vabd.i, <1 x double> %vabd1.i) - %0 = extractelement <1 x double> %vabd2.i, i32 0 + %0 = call double @llvm.aarch64.neon.vabd.f64(double %a, double %b) ret double %0 } -declare <1 x double> @llvm.aarch64.neon.vabd.v1f64(<1 x double>, <1 x double>) -declare <1 x float> @llvm.aarch64.neon.vabd.v1f32(<1 x float>, <1 x float>) +declare double @llvm.aarch64.neon.vabd.f64(double, double) +declare float @llvm.aarch64.neon.vabd.f32(float, float) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-fcvt.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-fcvt.ll index d7b84fae7375..6cf30a7df3b8 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-fcvt.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-fcvt.ll @@ -6,250 +6,228 @@ define float @test_vcvtxn(double %a) { ; CHECK: test_vcvtxn ; CHECK: fcvtxn {{s[0-9]}}, {{d[0-9]}} entry: - %vcvtf.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtf1.i = tail call <1 x float> @llvm.aarch64.neon.fcvtxn.v1f32.v1f64(<1 x double> %vcvtf.i) - %0 = extractelement <1 x float> %vcvtf1.i, i32 0 - ret float %0 + %vcvtf = call float @llvm.aarch64.neon.fcvtxn(double %a) + ret float %vcvtf } -declare <1 x float> @llvm.aarch64.neon.fcvtxn.v1f32.v1f64(<1 x double>) +declare float @llvm.aarch64.neon.fcvtxn(double) define i32 @test_vcvtass(float %a) { ; CHECK: test_vcvtass ; CHECK: fcvtas {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtas.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtas1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.v1f32(<1 x float> %vcvtas.i) + %vcvtas1.i = call <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtas1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtas.v1i32.f32(float) define i64 @test_test_vcvtasd(double %a) { ; CHECK: test_test_vcvtasd ; CHECK: fcvtas {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtas.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtas1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %vcvtas.i) + %vcvtas1.i = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtas1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.f64(double) define i32 @test_vcvtaus(float %a) { ; CHECK: test_vcvtaus ; CHECK: fcvtau {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtau.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtau1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.v1f32(<1 x float> %vcvtau.i) + %vcvtau1.i = call <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtau1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtau.v1i32.f32(float) define i64 @test_vcvtaud(double %a) { ; CHECK: test_vcvtaud ; CHECK: fcvtau {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtau.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtau1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %vcvtau.i) + %vcvtau1.i = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtau1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.f64(double) define i32 @test_vcvtmss(float %a) { ; CHECK: test_vcvtmss ; CHECK: fcvtms {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtms.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtms1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.v1f32(<1 x float> %vcvtms.i) + %vcvtms1.i = call <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtms1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtms.v1i32.f32(float) define i64 @test_vcvtmd_s64_f64(double %a) { ; CHECK: test_vcvtmd_s64_f64 ; CHECK: fcvtms {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtms.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtms1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %vcvtms.i) + %vcvtms1.i = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtms1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.f64(double) define i32 @test_vcvtmus(float %a) { ; CHECK: test_vcvtmus ; CHECK: fcvtmu {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtmu.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtmu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.v1f32(<1 x float> %vcvtmu.i) + %vcvtmu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtmu1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtmu.v1i32.f32(float) define i64 @test_vcvtmud(double %a) { ; CHECK: test_vcvtmud ; CHECK: fcvtmu {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtmu.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtmu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %vcvtmu.i) + %vcvtmu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtmu1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.f64(double) define i32 @test_vcvtnss(float %a) { ; CHECK: test_vcvtnss ; CHECK: fcvtns {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtns.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtns1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.v1f32(<1 x float> %vcvtns.i) + %vcvtns1.i = call <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtns1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtns.v1i32.f32(float) define i64 @test_vcvtnd_s64_f64(double %a) { ; CHECK: test_vcvtnd_s64_f64 ; CHECK: fcvtns {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtns.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtns1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %vcvtns.i) + %vcvtns1.i = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtns1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.f64(double) define i32 @test_vcvtnus(float %a) { ; CHECK: test_vcvtnus ; CHECK: fcvtnu {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtnu.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtnu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.v1f32(<1 x float> %vcvtnu.i) + %vcvtnu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtnu1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtnu.v1i32.f32(float) define i64 @test_vcvtnud(double %a) { ; CHECK: test_vcvtnud ; CHECK: fcvtnu {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtnu.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtnu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %vcvtnu.i) + %vcvtnu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtnu1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.f64(double) define i32 @test_vcvtpss(float %a) { ; CHECK: test_vcvtpss ; CHECK: fcvtps {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtps.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtps1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.v1f32(<1 x float> %vcvtps.i) + %vcvtps1.i = call <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtps1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtps.v1i32.f32(float) define i64 @test_vcvtpd_s64_f64(double %a) { ; CHECK: test_vcvtpd_s64_f64 ; CHECK: fcvtps {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtps.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtps1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %vcvtps.i) + %vcvtps1.i = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtps1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.f64(double) define i32 @test_vcvtpus(float %a) { ; CHECK: test_vcvtpus ; CHECK: fcvtpu {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtpu.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtpu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.v1f32(<1 x float> %vcvtpu.i) + %vcvtpu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtpu1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtpu.v1i32.f32(float) define i64 @test_vcvtpud(double %a) { ; CHECK: test_vcvtpud ; CHECK: fcvtpu {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtpu.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtpu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %vcvtpu.i) + %vcvtpu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtpu1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.f64(double) define i32 @test_vcvtss(float %a) { ; CHECK: test_vcvtss ; CHECK: fcvtzs {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtzs.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtzs1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.v1f32(<1 x float> %vcvtzs.i) + %vcvtzs1.i = call <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtzs1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtzs.v1i32.f32(float) define i64 @test_vcvtd_s64_f64(double %a) { ; CHECK: test_vcvtd_s64_f64 ; CHECK: fcvtzs {{d[0-9]}}, {{d[0-9]}} entry: - %vcvzs.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvzs1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %vcvzs.i) + %vcvzs1.i = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvzs1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.f64(double) define i32 @test_vcvtus(float %a) { ; CHECK: test_vcvtus ; CHECK: fcvtzu {{s[0-9]}}, {{s[0-9]}} entry: - %vcvtzu.i = insertelement <1 x float> undef, float %a, i32 0 - %vcvtzu1.i = tail call <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.v1f32(<1 x float> %vcvtzu.i) + %vcvtzu1.i = call <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.f32(float %a) %0 = extractelement <1 x i32> %vcvtzu1.i, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.v1f32(<1 x float>) +declare <1 x i32> @llvm.aarch64.neon.fcvtzu.v1i32.f32(float) define i64 @test_vcvtud(double %a) { ; CHECK: test_vcvtud ; CHECK: fcvtzu {{d[0-9]}}, {{d[0-9]}} entry: - %vcvtzu.i = insertelement <1 x double> undef, double %a, i32 0 - %vcvtzu1.i = tail call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %vcvtzu.i) + %vcvtzu1.i = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.f64(double %a) %0 = extractelement <1 x i64> %vcvtzu1.i, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double>) +declare <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.f64(double) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-fp-compare.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-fp-compare.ll index aa6348d11b24..e0dce1336d89 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-fp-compare.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-fp-compare.ll @@ -3,314 +3,280 @@ ;; Scalar Floating-point Compare define i32 @test_vceqs_f32(float %a, float %b) { -; CHECK: test_vceqs_f32 +; CHECK-LABEL: test_vceqs_f32 ; CHECK: fcmeq {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} entry: - %vceq.i = insertelement <1 x float> undef, float %a, i32 0 - %vceq1.i = insertelement <1 x float> undef, float %b, i32 0 - %vceq2.i = call <1 x i32> @llvm.aarch64.neon.vceq.v1i32.v1f32.v1f32(<1 x float> %vceq.i, <1 x float> %vceq1.i) - %0 = extractelement <1 x i32> %vceq2.i, i32 0 + %fceq2.i = call <1 x i32> @llvm.aarch64.neon.fceq.v1i32.f32.f32(float %a, float %b) + %0 = extractelement <1 x i32> %fceq2.i, i32 0 ret i32 %0 } define i64 @test_vceqd_f64(double %a, double %b) { -; CHECK: test_vceqd_f64 +; CHECK-LABEL: test_vceqd_f64 ; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} entry: - %vceq.i = insertelement <1 x double> undef, double %a, i32 0 - %vceq1.i = insertelement <1 x double> undef, double %b, i32 0 - %vceq2.i = call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1f64.v1f64(<1 x double> %vceq.i, <1 x double> %vceq1.i) - %0 = extractelement <1 x i64> %vceq2.i, i32 0 + %fceq2.i = call <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f64(double %a, double %b) + %0 = extractelement <1 x i64> %fceq2.i, i32 0 ret i64 %0 } +define <1 x i64> @test_vceqz_f64(<1 x double> %a) { +; CHECK-LABEL: test_vceqz_f64 +; CHECK: fcmeq {{d[0-9]+}}, {{d[0-9]+}}, #0.0 +entry: + %0 = fcmp oeq <1 x double> %a, zeroinitializer + %vceqz.i = sext <1 x i1> %0 to <1 x i64> + ret <1 x i64> %vceqz.i +} + define i32 @test_vceqzs_f32(float %a) { -; CHECK: test_vceqzs_f32 +; CHECK-LABEL: test_vceqzs_f32 ; CHECK: fcmeq {{s[0-9]}}, {{s[0-9]}}, #0.0 entry: - %vceq.i = insertelement <1 x float> undef, float %a, i32 0 - %vceq1.i = call <1 x i32> @llvm.aarch64.neon.vceq.v1i32.v1f32.v1f32(<1 x float> %vceq.i, <1 x float> zeroinitializer) - %0 = extractelement <1 x i32> %vceq1.i, i32 0 + %fceq1.i = call <1 x i32> @llvm.aarch64.neon.fceq.v1i32.f32.f32(float %a, float 0.0) + %0 = extractelement <1 x i32> %fceq1.i, i32 0 ret i32 %0 } define i64 @test_vceqzd_f64(double %a) { -; CHECK: test_vceqzd_f64 +; CHECK-LABEL: test_vceqzd_f64 ; CHECK: fcmeq {{d[0-9]}}, {{d[0-9]}}, #0.0 entry: - %vceq.i = insertelement <1 x double> undef, double %a, i32 0 - %vceq1.i = call <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1f64.v1f64(<1 x double> %vceq.i, <1 x double> zeroinitializer) - %0 = extractelement <1 x i64> %vceq1.i, i32 0 + %fceq1.i = call <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f32(double %a, float 0.0) + %0 = extractelement <1 x i64> %fceq1.i, i32 0 ret i64 %0 } define i32 @test_vcges_f32(float %a, float %b) { -; CHECK: test_vcges_f32 +; CHECK-LABEL: test_vcges_f32 ; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} entry: - %vcge.i = insertelement <1 x float> undef, float %a, i32 0 - %vcge1.i = insertelement <1 x float> undef, float %b, i32 0 - %vcge2.i = call <1 x i32> @llvm.aarch64.neon.vcge.v1i32.v1f32.v1f32(<1 x float> %vcge.i, <1 x float> %vcge1.i) - %0 = extractelement <1 x i32> %vcge2.i, i32 0 + %fcge2.i = call <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float %a, float %b) + %0 = extractelement <1 x i32> %fcge2.i, i32 0 ret i32 %0 } define i64 @test_vcged_f64(double %a, double %b) { -; CHECK: test_vcged_f64 +; CHECK-LABEL: test_vcged_f64 ; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} entry: - %vcge.i = insertelement <1 x double> undef, double %a, i32 0 - %vcge1.i = insertelement <1 x double> undef, double %b, i32 0 - %vcge2.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f64(<1 x double> %vcge.i, <1 x double> %vcge1.i) - %0 = extractelement <1 x i64> %vcge2.i, i32 0 + %fcge2.i = call <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f64(double %a, double %b) + %0 = extractelement <1 x i64> %fcge2.i, i32 0 ret i64 %0 } define i32 @test_vcgezs_f32(float %a) { -; CHECK: test_vcgezs_f32 +; CHECK-LABEL: test_vcgezs_f32 ; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, #0.0 entry: - %vcge.i = insertelement <1 x float> undef, float %a, i32 0 - %vcge1.i = call <1 x i32> @llvm.aarch64.neon.vcge.v1i32.v1f32.v1f32(<1 x float> %vcge.i, <1 x float> zeroinitializer) - %0 = extractelement <1 x i32> %vcge1.i, i32 0 + %fcge1.i = call <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float %a, float 0.0) + %0 = extractelement <1 x i32> %fcge1.i, i32 0 ret i32 %0 } define i64 @test_vcgezd_f64(double %a) { -; CHECK: test_vcgezd_f64 +; CHECK-LABEL: test_vcgezd_f64 ; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, #0.0 entry: - %vcge.i = insertelement <1 x double> undef, double %a, i32 0 - %vcge1.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f64(<1 x double> %vcge.i, <1 x double> zeroinitializer) - %0 = extractelement <1 x i64> %vcge1.i, i32 0 + %fcge1.i = call <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f32(double %a, float 0.0) + %0 = extractelement <1 x i64> %fcge1.i, i32 0 ret i64 %0 } define i32 @test_vcgts_f32(float %a, float %b) { -; CHECK: test_vcgts_f32 +; CHECK-LABEL: test_vcgts_f32 ; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} entry: - %vcgt.i = insertelement <1 x float> undef, float %a, i32 0 - %vcgt1.i = insertelement <1 x float> undef, float %b, i32 0 - %vcgt2.i = call <1 x i32> @llvm.aarch64.neon.vcgt.v1i32.v1f32.v1f32(<1 x float> %vcgt.i, <1 x float> %vcgt1.i) - %0 = extractelement <1 x i32> %vcgt2.i, i32 0 + %fcgt2.i = call <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float %a, float %b) + %0 = extractelement <1 x i32> %fcgt2.i, i32 0 ret i32 %0 } define i64 @test_vcgtd_f64(double %a, double %b) { -; CHECK: test_vcgtd_f64 +; CHECK-LABEL: test_vcgtd_f64 ; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} entry: - %vcgt.i = insertelement <1 x double> undef, double %a, i32 0 - %vcgt1.i = insertelement <1 x double> undef, double %b, i32 0 - %vcgt2.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f64(<1 x double> %vcgt.i, <1 x double> %vcgt1.i) - %0 = extractelement <1 x i64> %vcgt2.i, i32 0 + %fcgt2.i = call <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f64(double %a, double %b) + %0 = extractelement <1 x i64> %fcgt2.i, i32 0 ret i64 %0 } define i32 @test_vcgtzs_f32(float %a) { -; CHECK: test_vcgtzs_f32 +; CHECK-LABEL: test_vcgtzs_f32 ; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, #0.0 entry: - %vcgt.i = insertelement <1 x float> undef, float %a, i32 0 - %vcgt1.i = call <1 x i32> @llvm.aarch64.neon.vcgt.v1i32.v1f32.v1f32(<1 x float> %vcgt.i, <1 x float> zeroinitializer) - %0 = extractelement <1 x i32> %vcgt1.i, i32 0 + %fcgt1.i = call <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float %a, float 0.0) + %0 = extractelement <1 x i32> %fcgt1.i, i32 0 ret i32 %0 } define i64 @test_vcgtzd_f64(double %a) { -; CHECK: test_vcgtzd_f64 +; CHECK-LABEL: test_vcgtzd_f64 ; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, #0.0 entry: - %vcgt.i = insertelement <1 x double> undef, double %a, i32 0 - %vcgt1.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f64(<1 x double> %vcgt.i, <1 x double> zeroinitializer) - %0 = extractelement <1 x i64> %vcgt1.i, i32 0 + %fcgt1.i = call <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f32(double %a, float 0.0) + %0 = extractelement <1 x i64> %fcgt1.i, i32 0 ret i64 %0 } define i32 @test_vcles_f32(float %a, float %b) { -; CHECK: test_vcles_f32 +; CHECK-LABEL: test_vcles_f32 ; CHECK: fcmge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} entry: - %vcge.i = insertelement <1 x float> undef, float %a, i32 0 - %vcge1.i = insertelement <1 x float> undef, float %b, i32 0 - %vcge2.i = call <1 x i32> @llvm.aarch64.neon.vcge.v1i32.v1f32.v1f32(<1 x float> %vcge.i, <1 x float> %vcge1.i) - %0 = extractelement <1 x i32> %vcge2.i, i32 0 + %fcge2.i = call <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float %a, float %b) + %0 = extractelement <1 x i32> %fcge2.i, i32 0 ret i32 %0 } define i64 @test_vcled_f64(double %a, double %b) { -; CHECK: test_vcled_f64 +; CHECK-LABEL: test_vcled_f64 ; CHECK: fcmge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} entry: - %vcge.i = insertelement <1 x double> undef, double %a, i32 0 - %vcge1.i = insertelement <1 x double> undef, double %b, i32 0 - %vcge2.i = call <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f64(<1 x double> %vcge.i, <1 x double> %vcge1.i) - %0 = extractelement <1 x i64> %vcge2.i, i32 0 + %fcge2.i = call <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f64(double %a, double %b) + %0 = extractelement <1 x i64> %fcge2.i, i32 0 ret i64 %0 } define i32 @test_vclezs_f32(float %a) { -; CHECK: test_vclezs_f32 +; CHECK-LABEL: test_vclezs_f32 ; CHECK: fcmle {{s[0-9]}}, {{s[0-9]}}, #0.0 entry: - %vcle.i = insertelement <1 x float> undef, float %a, i32 0 - %vcle1.i = call <1 x i32> @llvm.aarch64.neon.vclez.v1i32.v1f32.v1f32(<1 x float> %vcle.i, <1 x float> zeroinitializer) - %0 = extractelement <1 x i32> %vcle1.i, i32 0 + %fcle1.i = call <1 x i32> @llvm.aarch64.neon.fclez.v1i32.f32.f32(float %a, float 0.0) + %0 = extractelement <1 x i32> %fcle1.i, i32 0 ret i32 %0 } define i64 @test_vclezd_f64(double %a) { -; CHECK: test_vclezd_f64 +; CHECK-LABEL: test_vclezd_f64 ; CHECK: fcmle {{d[0-9]}}, {{d[0-9]}}, #0.0 entry: - %vcle.i = insertelement <1 x double> undef, double %a, i32 0 - %vcle1.i = call <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1f64.v1f64(<1 x double> %vcle.i, <1 x double> zeroinitializer) - %0 = extractelement <1 x i64> %vcle1.i, i32 0 + %fcle1.i = call <1 x i64> @llvm.aarch64.neon.fclez.v1i64.f64.f32(double %a, float 0.0) + %0 = extractelement <1 x i64> %fcle1.i, i32 0 ret i64 %0 } define i32 @test_vclts_f32(float %a, float %b) { -; CHECK: test_vclts_f32 +; CHECK-LABEL: test_vclts_f32 ; CHECK: fcmgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} entry: - %vcgt.i = insertelement <1 x float> undef, float %b, i32 0 - %vcgt1.i = insertelement <1 x float> undef, float %a, i32 0 - %vcgt2.i = call <1 x i32> @llvm.aarch64.neon.vcgt.v1i32.v1f32.v1f32(<1 x float> %vcgt.i, <1 x float> %vcgt1.i) - %0 = extractelement <1 x i32> %vcgt2.i, i32 0 + %fcgt2.i = call <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float %a, float %b) + %0 = extractelement <1 x i32> %fcgt2.i, i32 0 ret i32 %0 } define i64 @test_vcltd_f64(double %a, double %b) { -; CHECK: test_vcltd_f64 +; CHECK-LABEL: test_vcltd_f64 ; CHECK: fcmgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} entry: - %vcgt.i = insertelement <1 x double> undef, double %b, i32 0 - %vcgt1.i = insertelement <1 x double> undef, double %a, i32 0 - %vcgt2.i = call <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f64(<1 x double> %vcgt.i, <1 x double> %vcgt1.i) - %0 = extractelement <1 x i64> %vcgt2.i, i32 0 + %fcgt2.i = call <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f64(double %a, double %b) + %0 = extractelement <1 x i64> %fcgt2.i, i32 0 ret i64 %0 } define i32 @test_vcltzs_f32(float %a) { -; CHECK: test_vcltzs_f32 +; CHECK-LABEL: test_vcltzs_f32 ; CHECK: fcmlt {{s[0-9]}}, {{s[0-9]}}, #0.0 entry: - %vclt.i = insertelement <1 x float> undef, float %a, i32 0 - %vclt1.i = call <1 x i32> @llvm.aarch64.neon.vcltz.v1i32.v1f32.v1f32(<1 x float> %vclt.i, <1 x float> zeroinitializer) - %0 = extractelement <1 x i32> %vclt1.i, i32 0 + %fclt1.i = call <1 x i32> @llvm.aarch64.neon.fcltz.v1i32.f32.f32(float %a, float 0.0) + %0 = extractelement <1 x i32> %fclt1.i, i32 0 ret i32 %0 } define i64 @test_vcltzd_f64(double %a) { -; CHECK: test_vcltzd_f64 +; CHECK-LABEL: test_vcltzd_f64 ; CHECK: fcmlt {{d[0-9]}}, {{d[0-9]}}, #0.0 entry: - %vclt.i = insertelement <1 x double> undef, double %a, i32 0 - %vclt1.i = call <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1f64.v1f64(<1 x double> %vclt.i, <1 x double> zeroinitializer) - %0 = extractelement <1 x i64> %vclt1.i, i32 0 + %fclt1.i = call <1 x i64> @llvm.aarch64.neon.fcltz.v1i64.f64.f32(double %a, float 0.0) + %0 = extractelement <1 x i64> %fclt1.i, i32 0 ret i64 %0 } define i32 @test_vcages_f32(float %a, float %b) { -; CHECK: test_vcages_f32 +; CHECK-LABEL: test_vcages_f32 ; CHECK: facge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} entry: - %vcage.i = insertelement <1 x float> undef, float %a, i32 0 - %vcage1.i = insertelement <1 x float> undef, float %b, i32 0 - %vcage2.i = call <1 x i32> @llvm.aarch64.neon.vcage.v1i32.v1f32.v1f32(<1 x float> %vcage.i, <1 x float> %vcage1.i) - %0 = extractelement <1 x i32> %vcage2.i, i32 0 + %fcage2.i = call <1 x i32> @llvm.aarch64.neon.fcage.v1i32.f32.f32(float %a, float %b) + %0 = extractelement <1 x i32> %fcage2.i, i32 0 ret i32 %0 } define i64 @test_vcaged_f64(double %a, double %b) { -; CHECK: test_vcaged_f64 +; CHECK-LABEL: test_vcaged_f64 ; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} entry: - %vcage.i = insertelement <1 x double> undef, double %a, i32 0 - %vcage1.i = insertelement <1 x double> undef, double %b, i32 0 - %vcage2.i = call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %vcage.i, <1 x double> %vcage1.i) - %0 = extractelement <1 x i64> %vcage2.i, i32 0 + %fcage2.i = call <1 x i64> @llvm.aarch64.neon.fcage.v1i64.f64.f64(double %a, double %b) + %0 = extractelement <1 x i64> %fcage2.i, i32 0 ret i64 %0 } define i32 @test_vcagts_f32(float %a, float %b) { -; CHECK: test_vcagts_f32 +; CHECK-LABEL: test_vcagts_f32 ; CHECK: facgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} entry: - %vcagt.i = insertelement <1 x float> undef, float %a, i32 0 - %vcagt1.i = insertelement <1 x float> undef, float %b, i32 0 - %vcagt2.i = call <1 x i32> @llvm.aarch64.neon.vcagt.v1i32.v1f32.v1f32(<1 x float> %vcagt.i, <1 x float> %vcagt1.i) - %0 = extractelement <1 x i32> %vcagt2.i, i32 0 + %fcagt2.i = call <1 x i32> @llvm.aarch64.neon.fcagt.v1i32.f32.f32(float %a, float %b) + %0 = extractelement <1 x i32> %fcagt2.i, i32 0 ret i32 %0 } define i64 @test_vcagtd_f64(double %a, double %b) { -; CHECK: test_vcagtd_f64 +; CHECK-LABEL: test_vcagtd_f64 ; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} entry: - %vcagt.i = insertelement <1 x double> undef, double %a, i32 0 - %vcagt1.i = insertelement <1 x double> undef, double %b, i32 0 - %vcagt2.i = call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %vcagt.i, <1 x double> %vcagt1.i) - %0 = extractelement <1 x i64> %vcagt2.i, i32 0 + %fcagt2.i = call <1 x i64> @llvm.aarch64.neon.fcagt.v1i64.f64.f64(double %a, double %b) + %0 = extractelement <1 x i64> %fcagt2.i, i32 0 ret i64 %0 } define i32 @test_vcales_f32(float %a, float %b) { -; CHECK: test_vcales_f32 +; CHECK-LABEL: test_vcales_f32 ; CHECK: facge {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} entry: - %vcage.i = insertelement <1 x float> undef, float %b, i32 0 - %vcage1.i = insertelement <1 x float> undef, float %a, i32 0 - %vcage2.i = call <1 x i32> @llvm.aarch64.neon.vcage.v1i32.v1f32.v1f32(<1 x float> %vcage.i, <1 x float> %vcage1.i) - %0 = extractelement <1 x i32> %vcage2.i, i32 0 + %fcage2.i = call <1 x i32> @llvm.aarch64.neon.fcage.v1i32.f32.f32(float %a, float %b) + %0 = extractelement <1 x i32> %fcage2.i, i32 0 ret i32 %0 } define i64 @test_vcaled_f64(double %a, double %b) { -; CHECK: test_vcaled_f64 +; CHECK-LABEL: test_vcaled_f64 ; CHECK: facge {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} entry: - %vcage.i = insertelement <1 x double> undef, double %b, i32 0 - %vcage1.i = insertelement <1 x double> undef, double %a, i32 0 - %vcage2.i = call <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double> %vcage.i, <1 x double> %vcage1.i) - %0 = extractelement <1 x i64> %vcage2.i, i32 0 + %fcage2.i = call <1 x i64> @llvm.aarch64.neon.fcage.v1i64.f64.f64(double %a, double %b) + %0 = extractelement <1 x i64> %fcage2.i, i32 0 ret i64 %0 } define i32 @test_vcalts_f32(float %a, float %b) { -; CHECK: test_vcalts_f32 +; CHECK-LABEL: test_vcalts_f32 ; CHECK: facgt {{s[0-9]}}, {{s[0-9]}}, {{s[0-9]}} entry: - %vcalt.i = insertelement <1 x float> undef, float %b, i32 0 - %vcalt1.i = insertelement <1 x float> undef, float %a, i32 0 - %vcalt2.i = call <1 x i32> @llvm.aarch64.neon.vcagt.v1i32.v1f32.v1f32(<1 x float> %vcalt.i, <1 x float> %vcalt1.i) - %0 = extractelement <1 x i32> %vcalt2.i, i32 0 + %fcalt2.i = call <1 x i32> @llvm.aarch64.neon.fcagt.v1i32.f32.f32(float %a, float %b) + %0 = extractelement <1 x i32> %fcalt2.i, i32 0 ret i32 %0 } define i64 @test_vcaltd_f64(double %a, double %b) { -; CHECK: test_vcaltd_f64 +; CHECK-LABEL: test_vcaltd_f64 ; CHECK: facgt {{d[0-9]}}, {{d[0-9]}}, {{d[0-9]}} entry: - %vcalt.i = insertelement <1 x double> undef, double %b, i32 0 - %vcalt1.i = insertelement <1 x double> undef, double %a, i32 0 - %vcalt2.i = call <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double> %vcalt.i, <1 x double> %vcalt1.i) - %0 = extractelement <1 x i64> %vcalt2.i, i32 0 + %fcalt2.i = call <1 x i64> @llvm.aarch64.neon.fcagt.v1i64.f64.f64(double %a, double %b) + %0 = extractelement <1 x i64> %fcalt2.i, i32 0 ret i64 %0 } -declare <1 x i32> @llvm.aarch64.neon.vceq.v1i32.v1f32.v1f32(<1 x float>, <1 x float>) -declare <1 x i64> @llvm.aarch64.neon.vceq.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) -declare <1 x i32> @llvm.aarch64.neon.vcge.v1i32.v1f32.v1f32(<1 x float>, <1 x float>) -declare <1 x i64> @llvm.aarch64.neon.vcge.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) -declare <1 x i32> @llvm.aarch64.neon.vclez.v1i32.v1f32.v1f32(<1 x float>, <1 x float>) -declare <1 x i64> @llvm.aarch64.neon.vclez.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) -declare <1 x i32> @llvm.aarch64.neon.vcgt.v1i32.v1f32.v1f32(<1 x float>, <1 x float>) -declare <1 x i64> @llvm.aarch64.neon.vcgt.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) -declare <1 x i32> @llvm.aarch64.neon.vcltz.v1i32.v1f32.v1f32(<1 x float>, <1 x float>) -declare <1 x i64> @llvm.aarch64.neon.vcltz.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) -declare <1 x i32> @llvm.aarch64.neon.vcage.v1i32.v1f32.v1f32(<1 x float>, <1 x float>) -declare <1 x i64> @llvm.aarch64.neon.vcage.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) -declare <1 x i32> @llvm.aarch64.neon.vcagt.v1i32.v1f32.v1f32(<1 x float>, <1 x float>) -declare <1 x i64> @llvm.aarch64.neon.vcagt.v1i64.v1f64.v1f64(<1 x double>, <1 x double>) +declare <1 x i32> @llvm.aarch64.neon.fceq.v1i32.f32.f32(float, float) +declare <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f32(double, float) +declare <1 x i64> @llvm.aarch64.neon.fceq.v1i64.f64.f64(double, double) +declare <1 x i32> @llvm.aarch64.neon.fcge.v1i32.f32.f32(float, float) +declare <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f32(double, float) +declare <1 x i64> @llvm.aarch64.neon.fcge.v1i64.f64.f64(double, double) +declare <1 x i32> @llvm.aarch64.neon.fclez.v1i32.f32.f32(float, float) +declare <1 x i64> @llvm.aarch64.neon.fclez.v1i64.f64.f32(double, float) +declare <1 x i32> @llvm.aarch64.neon.fcgt.v1i32.f32.f32(float, float) +declare <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f32(double, float) +declare <1 x i64> @llvm.aarch64.neon.fcgt.v1i64.f64.f64(double, double) +declare <1 x i32> @llvm.aarch64.neon.fcltz.v1i32.f32.f32(float, float) +declare <1 x i64> @llvm.aarch64.neon.fcltz.v1i64.f64.f32(double, float) +declare <1 x i32> @llvm.aarch64.neon.fcage.v1i32.f32.f32(float, float) +declare <1 x i64> @llvm.aarch64.neon.fcage.v1i64.f64.f64(double, double) +declare <1 x i32> @llvm.aarch64.neon.fcagt.v1i32.f32.f32(float, float) +declare <1 x i64> @llvm.aarch64.neon.fcagt.v1i64.f64.f64(double, double) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-recip.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-recip.ll index f21c27bee435..100839b14e67 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-recip.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-recip.ll @@ -3,56 +3,42 @@ define float @test_vrecpss_f32(float %a, float %b) { ; CHECK: test_vrecpss_f32 ; CHECK: frecps {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - %1 = insertelement <1 x float> undef, float %a, i32 0 - %2 = insertelement <1 x float> undef, float %b, i32 0 - %3 = call <1 x float> @llvm.arm.neon.vrecps.v1f32(<1 x float> %1, <1 x float> %2) - %4 = extractelement <1 x float> %3, i32 0 - ret float %4 + %1 = call float @llvm.aarch64.neon.vrecps.f32(float %a, float %b) + ret float %1 } define double @test_vrecpsd_f64(double %a, double %b) { ; CHECK: test_vrecpsd_f64 ; CHECK: frecps {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - %1 = insertelement <1 x double> undef, double %a, i32 0 - %2 = insertelement <1 x double> undef, double %b, i32 0 - %3 = call <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double> %1, <1 x double> %2) - %4 = extractelement <1 x double> %3, i32 0 - ret double %4 + %1 = call double @llvm.aarch64.neon.vrecps.f64(double %a, double %b) + ret double %1 } -declare <1 x float> @llvm.arm.neon.vrecps.v1f32(<1 x float>, <1 x float>) -declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>) +declare float @llvm.aarch64.neon.vrecps.f32(float, float) +declare double @llvm.aarch64.neon.vrecps.f64(double, double) define float @test_vrsqrtss_f32(float %a, float %b) { ; CHECK: test_vrsqrtss_f32 ; CHECK: frsqrts {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} - %1 = insertelement <1 x float> undef, float %a, i32 0 - %2 = insertelement <1 x float> undef, float %b, i32 0 - %3 = call <1 x float> @llvm.arm.neon.vrsqrts.v1f32(<1 x float> %1, <1 x float> %2) - %4 = extractelement <1 x float> %3, i32 0 - ret float %4 + %1 = call float @llvm.aarch64.neon.vrsqrts.f32(float %a, float %b) + ret float %1 } define double @test_vrsqrtsd_f64(double %a, double %b) { ; CHECK: test_vrsqrtsd_f64 ; CHECK: frsqrts {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} - %1 = insertelement <1 x double> undef, double %a, i32 0 - %2 = insertelement <1 x double> undef, double %b, i32 0 - %3 = call <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double> %1, <1 x double> %2) - %4 = extractelement <1 x double> %3, i32 0 - ret double %4 + %1 = call double @llvm.aarch64.neon.vrsqrts.f64(double %a, double %b) + ret double %1 } -declare <1 x float> @llvm.arm.neon.vrsqrts.v1f32(<1 x float>, <1 x float>) -declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>) +declare float @llvm.aarch64.neon.vrsqrts.f32(float, float) +declare double @llvm.aarch64.neon.vrsqrts.f64(double, double) define float @test_vrecpes_f32(float %a) { ; CHECK: test_vrecpes_f32 ; CHECK: frecpe {{s[0-9]+}}, {{s[0-9]+}} entry: - %vrecpe.i = insertelement <1 x float> undef, float %a, i32 0 - %vrecpe1.i = tail call <1 x float> @llvm.arm.neon.vrecpe.v1f32(<1 x float> %vrecpe.i) - %0 = extractelement <1 x float> %vrecpe1.i, i32 0 + %0 = call float @llvm.aarch64.neon.vrecpe.f32(float %a) ret float %0 } @@ -60,22 +46,18 @@ define double @test_vrecped_f64(double %a) { ; CHECK: test_vrecped_f64 ; CHECK: frecpe {{d[0-9]+}}, {{d[0-9]+}} entry: - %vrecpe.i = insertelement <1 x double> undef, double %a, i32 0 - %vrecpe1.i = tail call <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double> %vrecpe.i) - %0 = extractelement <1 x double> %vrecpe1.i, i32 0 + %0 = call double @llvm.aarch64.neon.vrecpe.f64(double %a) ret double %0 } -declare <1 x float> @llvm.arm.neon.vrecpe.v1f32(<1 x float>) -declare <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double>) +declare float @llvm.aarch64.neon.vrecpe.f32(float) +declare double @llvm.aarch64.neon.vrecpe.f64(double) define float @test_vrecpxs_f32(float %a) { ; CHECK: test_vrecpxs_f32 ; CHECK: frecpx {{s[0-9]+}}, {{s[0-9]+}} entry: - %vrecpx.i = insertelement <1 x float> undef, float %a, i32 0 - %vrecpx1.i = tail call <1 x float> @llvm.aarch64.neon.vrecpx.v1f32(<1 x float> %vrecpx.i) - %0 = extractelement <1 x float> %vrecpx1.i, i32 0 + %0 = call float @llvm.aarch64.neon.vrecpx.f32(float %a) ret float %0 } @@ -83,22 +65,18 @@ define double @test_vrecpxd_f64(double %a) { ; CHECK: test_vrecpxd_f64 ; CHECK: frecpx {{d[0-9]+}}, {{d[0-9]+}} entry: - %vrecpx.i = insertelement <1 x double> undef, double %a, i32 0 - %vrecpx1.i = tail call <1 x double> @llvm.aarch64.neon.vrecpx.v1f64(<1 x double> %vrecpx.i) - %0 = extractelement <1 x double> %vrecpx1.i, i32 0 + %0 = call double @llvm.aarch64.neon.vrecpx.f64(double %a) ret double %0 } -declare <1 x float> @llvm.aarch64.neon.vrecpx.v1f32(<1 x float>) -declare <1 x double> @llvm.aarch64.neon.vrecpx.v1f64(<1 x double>) +declare float @llvm.aarch64.neon.vrecpx.f32(float) +declare double @llvm.aarch64.neon.vrecpx.f64(double) define float @test_vrsqrtes_f32(float %a) { ; CHECK: test_vrsqrtes_f32 ; CHECK: frsqrte {{s[0-9]+}}, {{s[0-9]+}} entry: - %vrsqrte.i = insertelement <1 x float> undef, float %a, i32 0 - %vrsqrte1.i = tail call <1 x float> @llvm.arm.neon.vrsqrte.v1f32(<1 x float> %vrsqrte.i) - %0 = extractelement <1 x float> %vrsqrte1.i, i32 0 + %0 = call float @llvm.aarch64.neon.vrsqrte.f32(float %a) ret float %0 } @@ -106,11 +84,9 @@ define double @test_vrsqrted_f64(double %a) { ; CHECK: test_vrsqrted_f64 ; CHECK: frsqrte {{d[0-9]+}}, {{d[0-9]+}} entry: - %vrsqrte.i = insertelement <1 x double> undef, double %a, i32 0 - %vrsqrte1.i = tail call <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double> %vrsqrte.i) - %0 = extractelement <1 x double> %vrsqrte1.i, i32 0 + %0 = call double @llvm.aarch64.neon.vrsqrte.f64(double %a) ret double %0 } -declare <1 x float> @llvm.arm.neon.vrsqrte.v1f32(<1 x float>) -declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>) +declare float @llvm.aarch64.neon.vrsqrte.f32(float) +declare double @llvm.aarch64.neon.vrsqrte.f64(double) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll index 1bb3b40440a2..33ce5cf6ce60 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll @@ -4,100 +4,212 @@ declare <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64>) define <1 x i64> @test_addp_v1i64(<2 x i64> %a) { ; CHECK: test_addp_v1i64: - %val = call <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64> %a) -; CHECK: addp d0, v0.2d - ret <1 x i64> %val +; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d + %val = call <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64> %a) + ret <1 x i64> %val } -declare <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float>) +declare float @llvm.aarch64.neon.vpfadd.f32.v2f32(<2 x float>) -define <1 x float> @test_faddp_v1f32(<2 x float> %a) { -; CHECK: test_faddp_v1f32: - %val = call <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float> %a) -; CHECK: faddp s0, v0.2s - ret <1 x float> %val +define float @test_faddp_f32(<2 x float> %a) { +; CHECK: test_faddp_f32: +; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s + %val = call float @llvm.aarch64.neon.vpfadd.f32.v2f32(<2 x float> %a) + ret float %val } -declare <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double>) +declare double @llvm.aarch64.neon.vpfadd.f64.v2f64(<2 x double>) -define <1 x double> @test_faddp_v1f64(<2 x double> %a) { -; CHECK: test_faddp_v1f64: - %val = call <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double> %a) -; CHECK: faddp d0, v0.2d - ret <1 x double> %val +define double @test_faddp_f64(<2 x double> %a) { +; CHECK: test_faddp_f64: +; CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d + %val = call double @llvm.aarch64.neon.vpfadd.f64.v2f64(<2 x double> %a) + ret double %val } -declare <1 x float> @llvm.aarch64.neon.vpmax(<2 x float>) +declare float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float>) -define <1 x float> @test_fmaxp_v1f32(<2 x float> %a) { -; CHECK: test_fmaxp_v1f32: - %val = call <1 x float> @llvm.aarch64.neon.vpmax(<2 x float> %a) -; CHECK: fmaxp s0, v0.2s - ret <1 x float> %val +define float @test_fmaxp_f32(<2 x float> %a) { +; CHECK: test_fmaxp_f32: +; CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s + %val = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a) + ret float %val } -declare <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double>) +declare double @llvm.aarch64.neon.vpmax.f64.v2f64(<2 x double>) -define <1 x double> @test_fmaxp_v1f64(<2 x double> %a) { -; CHECK: test_fmaxp_v1f64: - %val = call <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double> %a) -; CHECK: fmaxp d0, v0.2d - ret <1 x double> %val +define double @test_fmaxp_f64(<2 x double> %a) { +; CHECK: test_fmaxp_f64: +; CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d + %val = call double @llvm.aarch64.neon.vpmax.f64.v2f64(<2 x double> %a) + ret double %val } +declare float @llvm.aarch64.neon.vpmin.f32.v2f32(<2 x float>) -declare <1 x float> @llvm.aarch64.neon.vpmin(<2 x float>) - -define <1 x float> @test_fminp_v1f32(<2 x float> %a) { -; CHECK: test_fminp_v1f32: - %val = call <1 x float> @llvm.aarch64.neon.vpmin(<2 x float> %a) -; CHECK: fminp s0, v0.2s - ret <1 x float> %val +define float @test_fminp_f32(<2 x float> %a) { +; CHECK: test_fminp_f32: +; CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s + %val = call float @llvm.aarch64.neon.vpmin.f32.v2f32(<2 x float> %a) + ret float %val } -declare <1 x double> @llvm.aarch64.neon.vpminq(<2 x double>) +declare double @llvm.aarch64.neon.vpmin.f64.v2f64(<2 x double>) -define <1 x double> @test_fminp_v1f64(<2 x double> %a) { -; CHECK: test_fminp_v1f64: - %val = call <1 x double> @llvm.aarch64.neon.vpminq(<2 x double> %a) -; CHECK: fminp d0, v0.2d - ret <1 x double> %val +define double @test_fminp_f64(<2 x double> %a) { +; CHECK: test_fminp_f64: +; CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d + %val = call double @llvm.aarch64.neon.vpmin.f64.v2f64(<2 x double> %a) + ret double %val } -declare <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float>) +declare float @llvm.aarch64.neon.vpfmaxnm.f32.v2f32(<2 x float>) -define <1 x float> @test_fmaxnmp_v1f32(<2 x float> %a) { -; CHECK: test_fmaxnmp_v1f32: - %val = call <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float> %a) -; CHECK: fmaxnmp s0, v0.2s - ret <1 x float> %val +define float @test_fmaxnmp_f32(<2 x float> %a) { +; CHECK: test_fmaxnmp_f32: +; CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s + %val = call float @llvm.aarch64.neon.vpfmaxnm.f32.v2f32(<2 x float> %a) + ret float %val } -declare <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double>) +declare double @llvm.aarch64.neon.vpfmaxnm.f64.v2f64(<2 x double>) -define <1 x double> @test_fmaxnmp_v1f64(<2 x double> %a) { -; CHECK: test_fmaxnmp_v1f64: - %val = call <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double> %a) -; CHECK: fmaxnmp d0, v0.2d - ret <1 x double> %val +define double @test_fmaxnmp_f64(<2 x double> %a) { +; CHECK: test_fmaxnmp_f64: +; CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d + %val = call double @llvm.aarch64.neon.vpfmaxnm.f64.v2f64(<2 x double> %a) + ret double %val } -declare <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float>) +declare float @llvm.aarch64.neon.vpfminnm.f32.v2f32(<2 x float>) -define <1 x float> @test_fminnmp_v1f32(<2 x float> %a) { -; CHECK: test_fminnmp_v1f32: - %val = call <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float> %a) -; CHECK: fminnmp s0, v0.2s - ret <1 x float> %val +define float @test_fminnmp_f32(<2 x float> %a) { +; CHECK: test_fminnmp_f32: +; CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s + %val = call float @llvm.aarch64.neon.vpfminnm.f32.v2f32(<2 x float> %a) + ret float %val } -declare <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double>) +declare double @llvm.aarch64.neon.vpfminnm.f64.v2f64(<2 x double>) -define <1 x double> @test_fminnmp_v1f64(<2 x double> %a) { -; CHECK: test_fminnmp_v1f64: - %val = call <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double> %a) -; CHECK: fminnmp d0, v0.2d - ret <1 x double> %val +define double @test_fminnmp_f64(<2 x double> %a) { +; CHECK: test_fminnmp_f64: +; CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d + %val = call double @llvm.aarch64.neon.vpfminnm.f64.v2f64(<2 x double> %a) + ret double %val } +define float @test_vaddv_f32(<2 x float> %a) { +; CHECK-LABEL: test_vaddv_f32 +; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s + %1 = call float @llvm.aarch64.neon.vpfadd.f32.v2f32(<2 x float> %a) + ret float %1 +} + +define float @test_vaddvq_f32(<4 x float> %a) { +; CHECK-LABEL: test_vaddvq_f32 +; CHECK: faddp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s + %1 = call float @llvm.aarch64.neon.vpfadd.f32.v4f32(<4 x float> %a) + ret float %1 +} + +define double @test_vaddvq_f64(<2 x double> %a) { +; CHECK-LABEL: test_vaddvq_f64 +; CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d + %1 = call double @llvm.aarch64.neon.vpfadd.f64.v2f64(<2 x double> %a) + ret double %1 +} + +define float @test_vmaxv_f32(<2 x float> %a) { +; CHECK-LABEL: test_vmaxv_f32 +; CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s + %1 = call float @llvm.aarch64.neon.vpmax.f32.v2f32(<2 x float> %a) + ret float %1 +} + +define double @test_vmaxvq_f64(<2 x double> %a) { +; CHECK-LABEL: test_vmaxvq_f64 +; CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d + %1 = call double @llvm.aarch64.neon.vpmax.f64.v2f64(<2 x double> %a) + ret double %1 +} + +define float @test_vminv_f32(<2 x float> %a) { +; CHECK-LABEL: test_vminv_f32 +; CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s + %1 = call float @llvm.aarch64.neon.vpmin.f32.v2f32(<2 x float> %a) + ret float %1 +} + +define double @test_vminvq_f64(<2 x double> %a) { +; CHECK-LABEL: test_vminvq_f64 +; CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d + %1 = call double @llvm.aarch64.neon.vpmin.f64.v2f64(<2 x double> %a) + ret double %1 +} + +define double @test_vmaxnmvq_f64(<2 x double> %a) { +; CHECK-LABEL: test_vmaxnmvq_f64 +; CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d + %1 = call double @llvm.aarch64.neon.vpfmaxnm.f64.v2f64(<2 x double> %a) + ret double %1 +} + +define float @test_vmaxnmv_f32(<2 x float> %a) { +; CHECK-LABEL: test_vmaxnmv_f32 +; CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s + %1 = call float @llvm.aarch64.neon.vpfmaxnm.f32.v2f32(<2 x float> %a) + ret float %1 +} + +define double @test_vminnmvq_f64(<2 x double> %a) { +; CHECK-LABEL: test_vminnmvq_f64 +; CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d + %1 = call double @llvm.aarch64.neon.vpfminnm.f64.v2f64(<2 x double> %a) + ret double %1 +} + +define float @test_vminnmv_f32(<2 x float> %a) { +; CHECK-LABEL: test_vminnmv_f32 +; CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s + %1 = call float @llvm.aarch64.neon.vpfminnm.f32.v2f32(<2 x float> %a) + ret float %1 +} + +define <2 x i64> @test_vpaddq_s64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vpaddq_s64 +; CHECK: addp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d + %1 = call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %1 +} + +define <2 x i64> @test_vpaddq_u64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vpaddq_u64 +; CHECK: addp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d + %1 = call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %1 +} + +define i64 @test_vaddvq_s64(<2 x i64> %a) { +; CHECK-LABEL: test_vaddvq_s64 +; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d + %1 = call <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64> %a) + %2 = extractelement <1 x i64> %1, i32 0 + ret i64 %2 +} + +define i64 @test_vaddvq_u64(<2 x i64> %a) { +; CHECK-LABEL: test_vaddvq_u64 +; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d + %1 = call <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64> %a) + %2 = extractelement <1 x i64> %1, i32 0 + ret i64 %2 +} + +declare <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64>) + +declare <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64>, <2 x i64>) + +declare float @llvm.aarch64.neon.vpfadd.f32.v4f32(<4 x float>) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll index 83ceb4ebdad5..7c9ffa0727b2 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-rounding-shift.ll @@ -7,14 +7,14 @@ declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_urshl_v1i64: %tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: urshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_srshl_v1i64: %tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: srshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } @@ -24,14 +24,14 @@ declare <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64>, <1 x i64>) define <1 x i64> @test_urshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_urshl_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: urshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } define <1 x i64> @test_srshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_srshl_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: srshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll index bd66f80cebb6..5c010ef0063e 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll @@ -6,14 +6,14 @@ declare <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8>, <1 x i8>) define <1 x i8> @test_uqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { ; CHECK: test_uqadd_v1i8_aarch64: %tmp1 = call <1 x i8> @llvm.arm.neon.vqaddu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: uqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} +;CHECK: uqadd {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} ret <1 x i8> %tmp1 } define <1 x i8> @test_sqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { ; CHECK: test_sqadd_v1i8_aarch64: %tmp1 = call <1 x i8> @llvm.arm.neon.vqadds.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: sqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} +;CHECK: sqadd {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} ret <1 x i8> %tmp1 } @@ -23,14 +23,14 @@ declare <1 x i8> @llvm.arm.neon.vqsubs.v1i8(<1 x i8>, <1 x i8>) define <1 x i8> @test_uqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { ; CHECK: test_uqsub_v1i8_aarch64: %tmp1 = call <1 x i8> @llvm.arm.neon.vqsubu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: uqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} +;CHECK: uqsub {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} ret <1 x i8> %tmp1 } define <1 x i8> @test_sqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { ; CHECK: test_sqsub_v1i8_aarch64: %tmp1 = call <1 x i8> @llvm.arm.neon.vqsubs.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: sqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} +;CHECK: sqsub {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} ret <1 x i8> %tmp1 } @@ -40,14 +40,14 @@ declare <1 x i16> @llvm.arm.neon.vqadds.v1i16(<1 x i16>, <1 x i16>) define <1 x i16> @test_uqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { ; CHECK: test_uqadd_v1i16_aarch64: %tmp1 = call <1 x i16> @llvm.arm.neon.vqaddu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: uqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} +;CHECK: uqadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} ret <1 x i16> %tmp1 } define <1 x i16> @test_sqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { ; CHECK: test_sqadd_v1i16_aarch64: %tmp1 = call <1 x i16> @llvm.arm.neon.vqadds.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: sqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} +;CHECK: sqadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} ret <1 x i16> %tmp1 } @@ -57,14 +57,14 @@ declare <1 x i16> @llvm.arm.neon.vqsubs.v1i16(<1 x i16>, <1 x i16>) define <1 x i16> @test_uqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { ; CHECK: test_uqsub_v1i16_aarch64: %tmp1 = call <1 x i16> @llvm.arm.neon.vqsubu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: uqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} +;CHECK: uqsub {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} ret <1 x i16> %tmp1 } define <1 x i16> @test_sqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { ; CHECK: test_sqsub_v1i16_aarch64: %tmp1 = call <1 x i16> @llvm.arm.neon.vqsubs.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: sqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} +;CHECK: sqsub {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} ret <1 x i16> %tmp1 } @@ -74,14 +74,14 @@ declare <1 x i32> @llvm.arm.neon.vqadds.v1i32(<1 x i32>, <1 x i32>) define <1 x i32> @test_uqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { ; CHECK: test_uqadd_v1i32_aarch64: %tmp1 = call <1 x i32> @llvm.arm.neon.vqaddu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: uqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} +;CHECK: uqadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret <1 x i32> %tmp1 } define <1 x i32> @test_sqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { ; CHECK: test_sqadd_v1i32_aarch64: %tmp1 = call <1 x i32> @llvm.arm.neon.vqadds.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: sqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} +;CHECK: sqadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret <1 x i32> %tmp1 } @@ -91,7 +91,7 @@ declare <1 x i32> @llvm.arm.neon.vqsubs.v1i32(<1 x i32>, <1 x i32>) define <1 x i32> @test_uqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { ; CHECK: test_uqsub_v1i32_aarch64: %tmp1 = call <1 x i32> @llvm.arm.neon.vqsubu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: uqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} +;CHECK: uqsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret <1 x i32> %tmp1 } @@ -99,7 +99,7 @@ define <1 x i32> @test_uqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { define <1 x i32> @test_sqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { ; CHECK: test_sqsub_v1i32_aarch64: %tmp1 = call <1 x i32> @llvm.arm.neon.vqsubs.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: sqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} +;CHECK: sqsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret <1 x i32> %tmp1 } @@ -109,14 +109,14 @@ declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) define <1 x i64> @test_uqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_uqadd_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: uqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } define <1 x i64> @test_sqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_sqadd_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: sqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } @@ -126,14 +126,14 @@ declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) define <1 x i64> @test_uqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_uqsub_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: uqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } define <1 x i64> @test_sqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_sqsub_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: sqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll index 0fd67dfa901c..dbf9669202cb 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll @@ -6,7 +6,7 @@ declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_uqrshl_v1i64: %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } @@ -14,7 +14,7 @@ define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_sqrshl_v1i64: %tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } @@ -24,7 +24,7 @@ declare <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8>, <1 x i8>) define <1 x i8> @test_uqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { ; CHECK: test_uqrshl_v1i8_aarch64: %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: uqrshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} +;CHECK: uqrshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} ret <1 x i8> %tmp1 } @@ -32,7 +32,7 @@ define <1 x i8> @test_uqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { define <1 x i8> @test_sqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { ; CHECK: test_sqrshl_v1i8_aarch64: %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: sqrshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} +;CHECK: sqrshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} ret <1 x i8> %tmp1 } @@ -42,7 +42,7 @@ declare <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16>, <1 x i16>) define <1 x i16> @test_uqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { ; CHECK: test_uqrshl_v1i16_aarch64: %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: uqrshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} +;CHECK: uqrshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} ret <1 x i16> %tmp1 } @@ -50,7 +50,7 @@ define <1 x i16> @test_uqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { define <1 x i16> @test_sqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { ; CHECK: test_sqrshl_v1i16_aarch64: %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: sqrshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} +;CHECK: sqrshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} ret <1 x i16> %tmp1 } @@ -60,7 +60,7 @@ declare <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32>, <1 x i32>) define <1 x i32> @test_uqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { ; CHECK: test_uqrshl_v1i32_aarch64: %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: uqrshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} +;CHECK: uqrshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret <1 x i32> %tmp1 } @@ -68,7 +68,7 @@ define <1 x i32> @test_uqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { define <1 x i32> @test_sqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { ; CHECK: test_sqrshl_v1i32_aarch64: %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: sqrshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} +;CHECK: sqrshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret <1 x i32> %tmp1 } @@ -78,7 +78,7 @@ declare <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64>, <1 x i64>) define <1 x i64> @test_uqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_uqrshl_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } @@ -86,7 +86,7 @@ define <1 x i64> @test_uqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { define <1 x i64> @test_sqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_sqrshl_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll index 8fdea24a36d7..0a1f4c9b3f58 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-saturating-shift.ll @@ -6,14 +6,14 @@ declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>) define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_uqshl_v1i64: %tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_sqshl_v1i64: %tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } @@ -23,14 +23,14 @@ declare <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8>, <1 x i8>) define <1 x i8> @test_uqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { ; CHECK: test_uqshl_v1i8_aarch64: %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: uqshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} +;CHECK: uqshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} ret <1 x i8> %tmp1 } define <1 x i8> @test_sqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) { ; CHECK: test_sqshl_v1i8_aarch64: %tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs) -;CHECK: sqshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}} +;CHECK: sqshl {{b[0-9]+}}, {{b[0-9]+}}, {{b[0-9]+}} ret <1 x i8> %tmp1 } @@ -40,14 +40,14 @@ declare <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16>, <1 x i16>) define <1 x i16> @test_uqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { ; CHECK: test_uqshl_v1i16_aarch64: %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: uqshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} +;CHECK: uqshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} ret <1 x i16> %tmp1 } define <1 x i16> @test_sqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) { ; CHECK: test_sqshl_v1i16_aarch64: %tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs) -;CHECK: sqshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}} +;CHECK: sqshl {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} ret <1 x i16> %tmp1 } @@ -57,14 +57,14 @@ declare <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32>, <1 x i32>) define <1 x i32> @test_uqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { ; CHECK: test_uqshl_v1i32_aarch64: %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: uqshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} +;CHECK: uqshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret <1 x i32> %tmp1 } define <1 x i32> @test_sqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) { ; CHECK: test_sqshl_v1i32_aarch64: %tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs) -;CHECK: sqshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}} +;CHECK: sqshl {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} ret <1 x i32> %tmp1 } @@ -74,14 +74,14 @@ declare <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64>, <1 x i64>) define <1 x i64> @test_uqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_uqshl_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: uqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } define <1 x i64> @test_sqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_sqshl_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -;CHECK: sqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +;CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-shift.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-shift.ll index 1222be50cf4b..b712ea4d6092 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-shift.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-scalar-shift.ll @@ -6,7 +6,7 @@ declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>) define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_ushl_v1i64: %tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: ushl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +; CHECK: ushl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } @@ -14,7 +14,7 @@ define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_sshl_v1i64: %tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +; CHECK: sshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } @@ -24,15 +24,213 @@ declare <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64>, <1 x i64>) define <1 x i64> @test_ushl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_ushl_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: ushl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +; CHECK: ushl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } define <1 x i64> @test_sshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) { ; CHECK: test_sshl_v1i64_aarch64: %tmp1 = call <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64> %lhs, <1 x i64> %rhs) -; CHECK: sshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}} +; CHECK: sshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <1 x i64> %tmp1 } +define <1 x i64> @test_vtst_s64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: test_vtst_s64 +; CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +entry: + %0 = and <1 x i64> %a, %b + %1 = icmp ne <1 x i64> %0, zeroinitializer + %vtst.i = sext <1 x i1> %1 to <1 x i64> + ret <1 x i64> %vtst.i +} +define <1 x i64> @test_vtst_u64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: test_vtst_u64 +; CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +entry: + %0 = and <1 x i64> %a, %b + %1 = icmp ne <1 x i64> %0, zeroinitializer + %vtst.i = sext <1 x i1> %1 to <1 x i64> + ret <1 x i64> %vtst.i +} + +define <1 x i64> @test_vsli_n_p64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: test_vsli_n_p64 +; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #0 +entry: + %vsli_n2 = tail call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %a, <1 x i64> %b, i32 0) + ret <1 x i64> %vsli_n2 +} + +declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) + +define <2 x i64> @test_vsliq_n_p64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vsliq_n_p64 +; CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0 +entry: + %vsli_n2 = tail call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %a, <2 x i64> %b, i32 0) + ret <2 x i64> %vsli_n2 +} + +declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) + +define <2 x i32> @test_vrsqrte_u32(<2 x i32> %a) { +; CHECK-LABEL: test_vrsqrte_u32 +; CHECK: ursqrte {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +entry: + %vrsqrte1.i = tail call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %a) + ret <2 x i32> %vrsqrte1.i +} + +define <4 x i32> @test_vrsqrteq_u32(<4 x i32> %a) { +; CHECK-LABEL: test_vrsqrteq_u32 +; CHECK: ursqrte {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %vrsqrte1.i = tail call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %a) + ret <4 x i32> %vrsqrte1.i +} + +define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) { +; CHECK-LABEL: test_vqshl_n_s8 +; CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0 +entry: + %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer) + ret <8 x i8> %vqshl_n +} + +declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>) + +define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) { +; CHECK-LABEL: test_vqshlq_n_s8 +; CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0 +entry: + %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) + ret <16 x i8> %vqshl_n +} + +declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>) + +define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) { +; CHECK-LABEL: test_vqshl_n_s16 +; CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0 +entry: + %vqshl_n1 = tail call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> zeroinitializer) + ret <4 x i16> %vqshl_n1 +} + +declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>) + +define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) { +; CHECK-LABEL: test_vqshlq_n_s16 +; CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0 +entry: + %vqshl_n1 = tail call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> zeroinitializer) + ret <8 x i16> %vqshl_n1 +} + +declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>) + +define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) { +; CHECK-LABEL: test_vqshl_n_s32 +; CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0 +entry: + %vqshl_n1 = tail call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> zeroinitializer) + ret <2 x i32> %vqshl_n1 +} + +declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>) + +define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) { +; CHECK-LABEL: test_vqshlq_n_s32 +; CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0 +entry: + %vqshl_n1 = tail call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> zeroinitializer) + ret <4 x i32> %vqshl_n1 +} + +declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>) + +define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) { +; CHECK-LABEL: test_vqshlq_n_s64 +; CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0 +entry: + %vqshl_n1 = tail call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> zeroinitializer) + ret <2 x i64> %vqshl_n1 +} + +declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>) + +define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) { +; CHECK-LABEL: test_vqshl_n_u8 +; CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0 +entry: + %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer) + ret <8 x i8> %vqshl_n +} + +declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>) + +define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) { +; CHECK-LABEL: test_vqshlq_n_u8 +; CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0 +entry: + %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) + ret <16 x i8> %vqshl_n +} + +declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) + +define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) { +; CHECK-LABEL: test_vqshl_n_u16 +; CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0 +entry: + %vqshl_n1 = tail call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> zeroinitializer) + ret <4 x i16> %vqshl_n1 +} + +declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>) + +define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) { +; CHECK-LABEL: test_vqshlq_n_u16 +; CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0 +entry: + %vqshl_n1 = tail call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> zeroinitializer) + ret <8 x i16> %vqshl_n1 +} + +declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) + +define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) { +; CHECK-LABEL: test_vqshl_n_u32 +; CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0 +entry: + %vqshl_n1 = tail call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> zeroinitializer) + ret <2 x i32> %vqshl_n1 +} + +declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) + +define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) { +; CHECK-LABEL: test_vqshlq_n_u32 +; CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0 +entry: + %vqshl_n1 = tail call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> zeroinitializer) + ret <4 x i32> %vqshl_n1 +} + +declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>) + +define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) { +; CHECK-LABEL: test_vqshlq_n_u64 +; CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, +entry: + %vqshl_n1 = tail call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> zeroinitializer) + ret <2 x i64> %vqshl_n1 +} + +declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>) + +declare <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32>) + +declare <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32>) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll index d45c47685b0f..d10d551805a6 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-shift-left-long.ll @@ -191,3 +191,13 @@ define <2 x i64> @test_ushll2_shl0_v4i32(<4 x i32> %a) { %tmp = zext <2 x i32> %1 to <2 x i64> ret <2 x i64> %tmp } + +define <8 x i16> @test_ushll_cmp(<8 x i8> %a, <8 x i8> %b) #0 { +; CHECK: test_ushll_cmp: +; CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK-NEXT: ushll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #0 + %cmp.i = icmp eq <8 x i8> %a, %b + %vcgtz.i.i = sext <8 x i1> %cmp.i to <8 x i8> + %vmovl.i.i.i = zext <8 x i8> %vcgtz.i.i to <8 x i16> + ret <8 x i16> %vmovl.i.i.i +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll new file mode 100644 index 000000000000..af2ab4d4246c --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-shl-ashr-lshr.ll @@ -0,0 +1,199 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +define <8 x i8> @shl.v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: shl.v8i8: +; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %c = shl <8 x i8> %a, %b + ret <8 x i8> %c +} + +define <4 x i16> @shl.v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: shl.v4i16: +; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h + %c = shl <4 x i16> %a, %b + ret <4 x i16> %c +} + +define <2 x i32> @shl.v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: shl.v2i32: +; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s + %c = shl <2 x i32> %a, %b + ret <2 x i32> %c +} + +define <1 x i64> @shl.v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: shl.v1i64: +; CHECK: ushl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %c = shl <1 x i64> %a, %b + ret <1 x i64> %c +} + +define <16 x i8> @shl.v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shl.v16i8: +; CHECK: ushl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b + %c = shl <16 x i8> %a, %b + ret <16 x i8> %c +} + +define <8 x i16> @shl.v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shl.v8i16: +; CHECK: ushl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h + %c = shl <8 x i16> %a, %b + ret <8 x i16> %c +} + +define <4 x i32> @shl.v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shl.v4i32: +; CHECK: ushl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s + %c = shl <4 x i32> %a, %b + ret <4 x i32> %c +} + +define <2 x i64> @shl.v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shl.v2i64: +; CHECK: ushl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d + %c = shl <2 x i64> %a, %b + ret <2 x i64> %c +} + +define <8 x i8> @lshr.v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: lshr.v8i8: +; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b +; CHECK: ushl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %c = lshr <8 x i8> %a, %b + ret <8 x i8> %c +} + +define <4 x i16> @lshr.v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: lshr.v4i16: +; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4 +; CHECK: ushl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h + %c = lshr <4 x i16> %a, %b + ret <4 x i16> %c +} + +define <2 x i32> @lshr.v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: lshr.v2i32: +; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s +; CHECK: ushl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s + %c = lshr <2 x i32> %a, %b + ret <2 x i32> %c +} + +define <1 x i64> @lshr.v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: lshr.v1i64: +; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}} +; CHECK: ushl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %c = lshr <1 x i64> %a, %b + ret <1 x i64> %c +} + +define <16 x i8> @lshr.v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: lshr.v16i8: +; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: ushl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b + %c = lshr <16 x i8> %a, %b + ret <16 x i8> %c +} + +define <8 x i16> @lshr.v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: lshr.v8i16: +; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h +; CHECK: ushl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h + %c = lshr <8 x i16> %a, %b + ret <8 x i16> %c +} + +define <4 x i32> @lshr.v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: lshr.v4i32: +; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s +; CHECK: ushl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s + %c = lshr <4 x i32> %a, %b + ret <4 x i32> %c +} + +define <2 x i64> @lshr.v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: lshr.v2i64: +; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d +; CHECK: ushl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d + %c = lshr <2 x i64> %a, %b + ret <2 x i64> %c +} + +define <8 x i8> @ashr.v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: ashr.v8i8: +; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b +; CHECK: sshl v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b + %c = ashr <8 x i8> %a, %b + ret <8 x i8> %c +} + +define <4 x i16> @ashr.v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: ashr.v4i16: +; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4 +; CHECK: sshl v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h + %c = ashr <4 x i16> %a, %b + ret <4 x i16> %c +} + +define <2 x i32> @ashr.v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: ashr.v2i32: +; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s +; CHECK: sshl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s + %c = ashr <2 x i32> %a, %b + ret <2 x i32> %c +} + +define <1 x i64> @ashr.v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: ashr.v1i64: +; CHECK: neg d{{[0-9]+}}, d{{[0-9]+}} +; CHECK: sshl d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + %c = ashr <1 x i64> %a, %b + ret <1 x i64> %c +} + +define <16 x i8> @ashr.v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: ashr.v16i8: +; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: sshl v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b + %c = ashr <16 x i8> %a, %b + ret <16 x i8> %c +} + +define <8 x i16> @ashr.v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: ashr.v8i16: +; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h +; CHECK: sshl v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h + %c = ashr <8 x i16> %a, %b + ret <8 x i16> %c +} + +define <4 x i32> @ashr.v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: ashr.v4i32: +; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s +; CHECK: sshl v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s + %c = ashr <4 x i32> %a, %b + ret <4 x i32> %c +} + +define <2 x i64> @ashr.v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: ashr.v2i64: +; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d +; CHECK: sshl v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d + %c = ashr <2 x i64> %a, %b + ret <2 x i64> %c +} + +define <1 x i64> @shl.v1i64.0(<1 x i64> %a) { +; CHECK-LABEL: shl.v1i64.0: +; CHECK: shl d{{[0-9]+}}, d{{[0-9]+}}, #0 + %c = shl <1 x i64> %a, zeroinitializer + ret <1 x i64> %c +} + +define <2 x i32> @shl.v2i32.0(<2 x i32> %a) { +; CHECK-LABEL: shl.v2i32.0: +; CHECK: shl v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #0 + %c = shl <2 x i32> %a, zeroinitializer + ret <2 x i32> %c +} \ No newline at end of file diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-simd-ldst-one.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-simd-ldst-one.ll index 3f28320f23d5..927c93301b32 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-simd-ldst-one.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-simd-ldst-one.ll @@ -1,5 +1,8 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +%struct.uint8x16x2_t = type { [2 x <16 x i8>] } +%struct.poly8x16x2_t = type { [2 x <16 x i8>] } +%struct.uint8x16x3_t = type { [3 x <16 x i8>] } %struct.int8x16x2_t = type { [2 x <16 x i8>] } %struct.int16x8x2_t = type { [2 x <8 x i16>] } %struct.int32x4x2_t = type { [2 x <4 x i32>] } @@ -37,6 +40,87 @@ %struct.float32x2x4_t = type { [4 x <2 x float>] } %struct.float64x1x4_t = type { [4 x <1 x double>] } +define <16 x i8> @test_ld_from_poll_v16i8(<16 x i8> %a) { +; CHECK-LABEL: test_ld_from_poll_v16i8 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <16 x i8> %a, + ret <16 x i8> %b +} + +define <8 x i16> @test_ld_from_poll_v8i16(<8 x i16> %a) { +; CHECK-LABEL: test_ld_from_poll_v8i16 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <8 x i16> %a, + ret <8 x i16> %b +} + +define <4 x i32> @test_ld_from_poll_v4i32(<4 x i32> %a) { +; CHECK-LABEL: test_ld_from_poll_v4i32 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <4 x i32> %a, + ret <4 x i32> %b +} + +define <2 x i64> @test_ld_from_poll_v2i64(<2 x i64> %a) { +; CHECK-LABEL: test_ld_from_poll_v2i64 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <4 x float> @test_ld_from_poll_v4f32(<4 x float> %a) { +; CHECK-LABEL: test_ld_from_poll_v4f32 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = fadd <4 x float> %a, + ret <4 x float> %b +} + +define <2 x double> @test_ld_from_poll_v2f64(<2 x double> %a) { +; CHECK-LABEL: test_ld_from_poll_v2f64 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = fadd <2 x double> %a, + ret <2 x double> %b +} + +define <8 x i8> @test_ld_from_poll_v8i8(<8 x i8> %a) { +; CHECK-LABEL: test_ld_from_poll_v8i8 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <8 x i8> %a, + ret <8 x i8> %b +} + +define <4 x i16> @test_ld_from_poll_v4i16(<4 x i16> %a) { +; CHECK-LABEL: test_ld_from_poll_v4i16 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <4 x i16> %a, + ret <4 x i16> %b +} + +define <2 x i32> @test_ld_from_poll_v2i32(<2 x i32> %a) { +; CHECK-LABEL: test_ld_from_poll_v2i32 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <2 x i32> %a, + ret <2 x i32> %b +} + define <16 x i8> @test_vld1q_dup_s8(i8* %a) { ; CHECK-LABEL: test_vld1q_dup_s8 ; CHECK: ld1r {{{v[0-9]+}}.16b}, [x0] @@ -155,6 +239,31 @@ entry: ret <1 x double> %1 } +define <1 x i64> @testDUP.v1i64(i64* %a, i64* %b) #0 { +; As there is a store operation depending on %1, LD1R pattern can't be selected. +; So LDR and FMOV should be emitted. +; CHECK-LABEL: testDUP.v1i64 +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}] +; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} +; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}] + %1 = load i64* %a, align 8 + store i64 %1, i64* %b, align 8 + %vecinit.i = insertelement <1 x i64> undef, i64 %1, i32 0 + ret <1 x i64> %vecinit.i +} + +define <1 x double> @testDUP.v1f64(double* %a, double* %b) #0 { +; As there is a store operation depending on %1, LD1R pattern can't be selected. +; So LDR and FMOV should be emitted. +; CHECK-LABEL: testDUP.v1f64 +; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}] +; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}] + %1 = load double* %a, align 8 + store double %1, double* %b, align 8 + %vecinit.i = insertelement <1 x double> undef, double %1, i32 0 + ret <1 x double> %vecinit.i +} + define %struct.int8x16x2_t @test_vld2q_dup_s8(i8* %a) { ; CHECK-LABEL: test_vld2q_dup_s8 ; CHECK: ld2r {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, [x0] @@ -2110,4 +2219,81 @@ declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) declare void @llvm.arm.neon.vst4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32) declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) -declare void @llvm.arm.neon.vst4lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32, i32) \ No newline at end of file +declare void @llvm.arm.neon.vst4lane.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32, i32) + +define %struct.int8x16x2_t @test_vld2q_lane_s8(i8* readonly %ptr, [2 x <16 x i8>] %src.coerce) { +; CHECK-LABEL: test_vld2q_lane_s8 +; CHECK: ld2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[15], [x0] +entry: + %src.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %src.coerce, 0 + %src.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %src.coerce, 1 + %vld2_lane = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, i32 15, i32 1) + %vld2_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 0 + %vld2_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 1 + %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vld2_lane.fca.0.extract, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2_lane.fca.1.extract, 0, 1 + ret %struct.int8x16x2_t %.fca.0.1.insert +} + +define %struct.uint8x16x2_t @test_vld2q_lane_u8(i8* readonly %ptr, [2 x <16 x i8>] %src.coerce) { +; CHECK-LABEL: test_vld2q_lane_u8 +; CHECK: ld2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[15], [x0] +entry: + %src.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %src.coerce, 0 + %src.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %src.coerce, 1 + %vld2_lane = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, i32 15, i32 1) + %vld2_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 0 + %vld2_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 1 + %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vld2_lane.fca.0.extract, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2_lane.fca.1.extract, 0, 1 + ret %struct.uint8x16x2_t %.fca.0.1.insert +} + +define %struct.poly8x16x2_t @test_vld2q_lane_p8(i8* readonly %ptr, [2 x <16 x i8>] %src.coerce) { +; CHECK-LABEL: test_vld2q_lane_p8 +; CHECK: ld2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[15], [x0] +entry: + %src.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %src.coerce, 0 + %src.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %src.coerce, 1 + %vld2_lane = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, i32 15, i32 1) + %vld2_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 0 + %vld2_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2_lane, 1 + %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vld2_lane.fca.0.extract, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2_lane.fca.1.extract, 0, 1 + ret %struct.poly8x16x2_t %.fca.0.1.insert +} + +define %struct.int8x16x3_t @test_vld3q_lane_s8(i8* readonly %ptr, [3 x <16 x i8>] %src.coerce) { +; CHECK-LABEL: test_vld3q_lane_s8 +; CHECK: ld3 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[15], [x0] +entry: + %src.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %src.coerce, 0 + %src.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %src.coerce, 1 + %src.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %src.coerce, 2 + %vld3_lane = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, <16 x i8> %src.coerce.fca.2.extract, i32 15, i32 1) + %vld3_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 0 + %vld3_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 1 + %vld3_lane.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 2 + %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %vld3_lane.fca.0.extract, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3_lane.fca.1.extract, 0, 1 + %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3_lane.fca.2.extract, 0, 2 + ret %struct.int8x16x3_t %.fca.0.2.insert +} + +define %struct.uint8x16x3_t @test_vld3q_lane_u8(i8* readonly %ptr, [3 x <16 x i8>] %src.coerce) { +; CHECK-LABEL: test_vld3q_lane_u8 +; CHECK: ld3 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[15], [x0] +entry: + %src.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %src.coerce, 0 + %src.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %src.coerce, 1 + %src.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %src.coerce, 2 + %vld3_lane = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3lane.v16i8(i8* %ptr, <16 x i8> %src.coerce.fca.0.extract, <16 x i8> %src.coerce.fca.1.extract, <16 x i8> %src.coerce.fca.2.extract, i32 15, i32 1) + %vld3_lane.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 0 + %vld3_lane.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 1 + %vld3_lane.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3_lane, 2 + %.fca.0.0.insert = insertvalue %struct.uint8x16x3_t undef, <16 x i8> %vld3_lane.fca.0.extract, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3_lane.fca.1.extract, 0, 1 + %.fca.0.2.insert = insertvalue %struct.uint8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3_lane.fca.2.extract, 0, 2 + ret %struct.uint8x16x3_t %.fca.0.2.insert +} + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-simd-ldst.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-simd-ldst.ll new file mode 100644 index 000000000000..afc0901bbc0b --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-simd-ldst.ll @@ -0,0 +1,164 @@ +; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +define void @test_ldstq_4v(i8* noalias %io, i32 %count) { +; CHECK-LABEL: test_ldstq_4v +; CHECK: ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0] +; CHECK: st4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0] +entry: + %tobool62 = icmp eq i32 %count, 0 + br i1 %tobool62, label %while.end, label %while.body + +while.body: ; preds = %entry, %while.body + %count.addr.063 = phi i32 [ %dec, %while.body ], [ %count, %entry ] + %dec = add i32 %count.addr.063, -1 + %vld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %io, i32 1) + %vld4.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 0 + %vld4.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 1 + %vld4.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 2 + %vld4.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 3 + tail call void @llvm.arm.neon.vst4.v16i8(i8* %io, <16 x i8> %vld4.fca.0.extract, <16 x i8> %vld4.fca.1.extract, <16 x i8> %vld4.fca.2.extract, <16 x i8> %vld4.fca.3.extract, i32 1) + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: ; preds = %while.body, %entry + ret void +} + +declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32) + +declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) + +define void @test_ldstq_3v(i8* noalias %io, i32 %count) { +; CHECK-LABEL: test_ldstq_3v +; CHECK: ld3 {v0.16b, v1.16b, v2.16b}, [x0] +; CHECK: st3 {v0.16b, v1.16b, v2.16b}, [x0] +entry: + %tobool47 = icmp eq i32 %count, 0 + br i1 %tobool47, label %while.end, label %while.body + +while.body: ; preds = %entry, %while.body + %count.addr.048 = phi i32 [ %dec, %while.body ], [ %count, %entry ] + %dec = add i32 %count.addr.048, -1 + %vld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %io, i32 1) + %vld3.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 0 + %vld3.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 1 + %vld3.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 2 + tail call void @llvm.arm.neon.vst3.v16i8(i8* %io, <16 x i8> %vld3.fca.0.extract, <16 x i8> %vld3.fca.1.extract, <16 x i8> %vld3.fca.2.extract, i32 1) + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: ; preds = %while.body, %entry + ret void +} + +declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32) + +declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32) + +define void @test_ldstq_2v(i8* noalias %io, i32 %count) { +; CHECK-LABEL: test_ldstq_2v +; CHECK: ld2 {v0.16b, v1.16b}, [x0] +; CHECK: st2 {v0.16b, v1.16b}, [x0] +entry: + %tobool22 = icmp eq i32 %count, 0 + br i1 %tobool22, label %while.end, label %while.body + +while.body: ; preds = %entry, %while.body + %count.addr.023 = phi i32 [ %dec, %while.body ], [ %count, %entry ] + %dec = add i32 %count.addr.023, -1 + %vld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %io, i32 1) + %vld2.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 0 + %vld2.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 1 + tail call void @llvm.arm.neon.vst2.v16i8(i8* %io, <16 x i8> %vld2.fca.0.extract, <16 x i8> %vld2.fca.1.extract, i32 1) + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: ; preds = %while.body, %entry + ret void +} + +declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32) + +declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32) + +define void @test_ldst_4v(i8* noalias %io, i32 %count) { +; CHECK-LABEL: test_ldst_4v +; CHECK: ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] +; CHECK: st4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] +entry: + %tobool42 = icmp eq i32 %count, 0 + br i1 %tobool42, label %while.end, label %while.body + +while.body: ; preds = %entry, %while.body + %count.addr.043 = phi i32 [ %dec, %while.body ], [ %count, %entry ] + %dec = add i32 %count.addr.043, -1 + %vld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %io, i32 1) + %vld4.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 0 + %vld4.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 1 + %vld4.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 2 + %vld4.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 3 + tail call void @llvm.arm.neon.vst4.v8i8(i8* %io, <8 x i8> %vld4.fca.0.extract, <8 x i8> %vld4.fca.1.extract, <8 x i8> %vld4.fca.2.extract, <8 x i8> %vld4.fca.3.extract, i32 1) + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: ; preds = %while.body, %entry + ret void +} + +declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32) + +declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) + +define void @test_ldst_3v(i8* noalias %io, i32 %count) { +; CHECK-LABEL: test_ldst_3v +; CHECK: ld3 {v0.8b, v1.8b, v2.8b}, [x0] +; CHECK: st3 {v0.8b, v1.8b, v2.8b}, [x0] +entry: + %tobool32 = icmp eq i32 %count, 0 + br i1 %tobool32, label %while.end, label %while.body + +while.body: ; preds = %entry, %while.body + %count.addr.033 = phi i32 [ %dec, %while.body ], [ %count, %entry ] + %dec = add i32 %count.addr.033, -1 + %vld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8* %io, i32 1) + %vld3.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 0 + %vld3.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 1 + %vld3.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 2 + tail call void @llvm.arm.neon.vst3.v8i8(i8* %io, <8 x i8> %vld3.fca.0.extract, <8 x i8> %vld3.fca.1.extract, <8 x i8> %vld3.fca.2.extract, i32 1) + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: ; preds = %while.body, %entry + ret void +} + +declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8*, i32) + +declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) + +define void @test_ldst_2v(i8* noalias %io, i32 %count) { +; CHECK-LABEL: test_ldst_2v +; CHECK: ld2 {v0.8b, v1.8b}, [x0] +; CHECK: st2 {v0.8b, v1.8b}, [x0] +entry: + %tobool22 = icmp eq i32 %count, 0 + br i1 %tobool22, label %while.end, label %while.body + +while.body: ; preds = %entry, %while.body + %count.addr.023 = phi i32 [ %dec, %while.body ], [ %count, %entry ] + %dec = add i32 %count.addr.023, -1 + %vld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8* %io, i32 1) + %vld2.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 0 + %vld2.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 1 + tail call void @llvm.arm.neon.vst2.v8i8(i8* %io, <8 x i8> %vld2.fca.0.extract, <8 x i8> %vld2.fca.1.extract, i32 1) + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: ; preds = %while.body, %entry + ret void +} + +declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8*, i32) + +declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-simd-shift.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-simd-shift.ll index 19d1b219646c..fd762656e56e 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-simd-shift.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-simd-shift.ll @@ -1522,3 +1522,35 @@ declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) declare <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32) +define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvt_n_s64_f64 +; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64 + %1 = tail call <1 x i64> @llvm.arm.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64) + ret <1 x i64> %1 +} + +define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) { +; CHECK-LABEL: test_vcvt_n_u64_f64 +; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64 + %1 = tail call <1 x i64> @llvm.arm.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64) + ret <1 x i64> %1 +} + +define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) { +; CHECK-LABEL: test_vcvt_n_f64_s64 +; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64 + %1 = tail call <1 x double> @llvm.arm.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64) + ret <1 x double> %1 +} + +define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) { +; CHECK-LABEL: test_vcvt_n_f64_u64 +; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64 + %1 = tail call <1 x double> @llvm.arm.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64) + ret <1 x double> %1 +} + +declare <1 x i64> @llvm.arm.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32) +declare <1 x i64> @llvm.arm.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32) +declare <1 x double> @llvm.arm.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32) +declare <1 x double> @llvm.arm.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32) \ No newline at end of file diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-truncStore-extLoad.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-truncStore-extLoad.ll new file mode 100644 index 000000000000..e5b76942446c --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-truncStore-extLoad.ll @@ -0,0 +1,57 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +; A vector TruncStore can not be selected. +; Test a trunc IR and a vector store IR can be selected correctly. +define void @truncStore.v2i64(<2 x i64> %a, <2 x i32>* %result) { +; CHECK-LABEL: truncStore.v2i64: +; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d +; CHECK: st1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}] + %b = trunc <2 x i64> %a to <2 x i32> + store <2 x i32> %b, <2 x i32>* %result + ret void +} + +define void @truncStore.v4i32(<4 x i32> %a, <4 x i16>* %result) { +; CHECK-LABEL: truncStore.v4i32: +; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s +; CHECK: st1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}] + %b = trunc <4 x i32> %a to <4 x i16> + store <4 x i16> %b, <4 x i16>* %result + ret void +} + +define void @truncStore.v8i16(<8 x i16> %a, <8 x i8>* %result) { +; CHECK-LABEL: truncStore.v8i16: +; CHECK: xtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h +; CHECK: st1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}] + %b = trunc <8 x i16> %a to <8 x i8> + store <8 x i8> %b, <8 x i8>* %result + ret void +} + +; A vector LoadExt can not be selected. +; Test a vector load IR and a sext/zext IR can be selected correctly. +define <4 x i32> @loadSExt.v4i8(<4 x i8>* %ref) { +; CHECK-LABEL: loadSExt.v4i8: +; CHECK: ldrsb + %a = load <4 x i8>* %ref + %conv = sext <4 x i8> %a to <4 x i32> + ret <4 x i32> %conv +} + +define <4 x i32> @loadZExt.v4i8(<4 x i8>* %ref) { +; CHECK-LABEL: loadZExt.v4i8: +; CHECK: ldrb + %a = load <4 x i8>* %ref + %conv = zext <4 x i8> %a to <4 x i32> + ret <4 x i32> %conv +} + +define i32 @loadExt.i32(<4 x i8>* %ref) { +; CHECK-LABEL: loadExt.i32: +; CHECK: ldrb + %a = load <4 x i8>* %ref + %vecext = extractelement <4 x i8> %a, i32 0 + %conv = zext i8 %vecext to i32 + ret i32 %conv +} \ No newline at end of file diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-vector-list-spill.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-vector-list-spill.ll new file mode 100644 index 000000000000..9ac2c05ebd0f --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/AArch64/neon-vector-list-spill.ll @@ -0,0 +1,134 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast + +; FIXME: We should not generate ld/st for such register spill/fill, because the +; test case seems very simple and the register pressure is not high. If the +; spill/fill algorithm is optimized, this test case may not be triggered. And +; then we can delete it. +define i32 @spill.DPairReg(i8* %arg1, i32 %arg2) { +; CHECK-LABEL: spill.DPairReg: +; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] +; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +entry: + %vld = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8* %arg1, i32 4) + %cmp = icmp eq i32 %arg2, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @foo() + br label %if.end + +if.end: + %vld.extract = extractvalue { <2 x i32>, <2 x i32> } %vld, 0 + %res = extractelement <2 x i32> %vld.extract, i32 1 + ret i32 %res +} + +define i16 @spill.DTripleReg(i8* %arg1, i32 %arg2) { +; CHECK-LABEL: spill.DTripleReg: +; CHECK: ld3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] +; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +entry: + %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %arg1, i32 4) + %cmp = icmp eq i32 %arg2, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @foo() + br label %if.end + +if.end: + %vld.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld, 0 + %res = extractelement <4 x i16> %vld.extract, i32 1 + ret i16 %res +} + +define i16 @spill.DQuadReg(i8* %arg1, i32 %arg2) { +; CHECK-LABEL: spill.DQuadReg: +; CHECK: ld4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] +; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +entry: + %vld = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8* %arg1, i32 4) + %cmp = icmp eq i32 %arg2, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @foo() + br label %if.end + +if.end: + %vld.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld, 0 + %res = extractelement <4 x i16> %vld.extract, i32 0 + ret i16 %res +} + +define i32 @spill.QPairReg(i8* %arg1, i32 %arg2) { +; CHECK-LABEL: spill.QPairReg: +; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] +; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +entry: + %vld = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8* %arg1, i32 4) + %cmp = icmp eq i32 %arg2, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @foo() + br label %if.end + +if.end: + %vld.extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0 + %res = extractelement <4 x i32> %vld.extract, i32 1 + ret i32 %res +} + +define float @spill.QTripleReg(i8* %arg1, i32 %arg2) { +; CHECK-LABEL: spill.QTripleReg: +; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] +; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +entry: + %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8* %arg1, i32 4) + %cmp = icmp eq i32 %arg2, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @foo() + br label %if.end + +if.end: + %vld3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 0 + %res = extractelement <4 x float> %vld3.extract, i32 1 + ret float %res +} + +define i8 @spill.QQuadReg(i8* %arg1, i32 %arg2) { +; CHECK-LABEL: spill.QQuadReg: +; CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] +; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] +entry: + %vld = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %arg1, i32 4) + %cmp = icmp eq i32 %arg2, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @foo() + br label %if.end + +if.end: + %vld.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld, 0 + %res = extractelement <16 x i8> %vld.extract, i32 1 + ret i8 %res +} + +declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8*, i32) +declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32) +declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8*, i32) +declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8*, i32) +declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8*, i32) +declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32) + +declare void @foo() diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll index 93fb74a7ccd8..35739d76eae0 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll @@ -13,6 +13,7 @@ entry: declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!3} +!llvm.module.flags = !{!15} !0 = metadata !{i32 524545, metadata !1, metadata !"b", metadata !2, i32 93, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ] !1 = metadata !{i32 524334, metadata !12, null, metadata !"__addvsi3", metadata !"__addvsi3", metadata !"__addvsi3", i32 94, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] !2 = metadata !{i32 524329, metadata !12} ; [ DW_TAG_file_type ] @@ -28,3 +29,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !11 = metadata !{i32 100, i32 0, metadata !10, null} !13 = metadata !{i32 0} !14 = metadata !{metadata !1} +!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll index 128928a14e23..7aacd1aa70ca 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll @@ -75,6 +75,7 @@ return: ; preds = %entry declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!3} +!llvm.module.flags = !{!49} !0 = metadata !{i32 786478, metadata !48, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] !1 = metadata !{i32 786451, metadata !48, null, metadata !"SVal", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [SVal] [line 1, size 128, align 64, offset 0] [def] [from ] @@ -125,3 +126,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !46 = metadata !{metadata !0, metadata !9, metadata !16, metadata !17, metadata !20} !47 = metadata !{i32 0} !48 = metadata !{metadata !"small.cc", metadata !"/Users/manav/R8248330"} +!49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll index 38158b086aff..f57411bb2c56 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll @@ -76,6 +76,7 @@ entry: } !llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!49} !0 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1, null, null, metadata !42, i32 4} ; [ DW_TAG_subprogram ] !1 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ] @@ -126,3 +127,4 @@ entry: !46 = metadata !{metadata !27, metadata !28} !47 = metadata !{metadata !"foo.c", metadata !"/tmp/"} !48 = metadata !{i32 0} +!49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll index da30a7d14b12..bb7870764c50 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll @@ -73,6 +73,7 @@ define i32 @get5(i32 %a) nounwind optsize ssp { declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!49} !0 = metadata !{i32 786449, metadata !47, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !48, metadata !48, metadata !40, metadata !41, metadata !41, null} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get1", metadata !"get1", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @get1, null, null, metadata !42, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [get1] @@ -123,3 +124,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !46 = metadata !{metadata !27, metadata !28} !47 = metadata !{metadata !"ss3.c", metadata !"/private/tmp"} !48 = metadata !{i32 0} +!49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll index a263c9c8d678..1d2be314851e 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s +; RUN: llc < %s -march=arm -float-abi=soft -mcpu=cortex-a9 | FileCheck %s @A = global <4 x float> diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/addrspacecast.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/addrspacecast.ll new file mode 100644 index 000000000000..2e98ba53c67a --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/addrspacecast.ll @@ -0,0 +1,7 @@ +; RUN: llc < %s -march=arm + +; Check that codegen for an addrspace cast succeeds without error. +define <4 x i32 addrspace(1)*> @f (<4 x i32*> %x) { + %1 = addrspacecast <4 x i32*> %x to <4 x i32 addrspace(1)*> + ret <4 x i32 addrspace(1)*> %1 +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/arm-abi-attr.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/arm-abi-attr.ll new file mode 100644 index 000000000000..f3923ae5cc82 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/arm-abi-attr.ll @@ -0,0 +1,28 @@ +; RUN: llc -mtriple=arm-linux < %s | FileCheck %s --check-prefix=APCS +; RUN: llc -mtriple=arm-linux -mattr=apcs < %s | \ +; RUN: FileCheck %s --check-prefix=APCS +; RUN: llc -mtriple=arm-linux-gnueabi -mattr=apcs < %s | \ +; RUN: FileCheck %s --check-prefix=APCS + +; RUN: llc -mtriple=arm-linux-gnueabi < %s | FileCheck %s --check-prefix=AAPCS +; RUN: llc -mtriple=arm-linux-gnueabi -mattr=aapcs < %s | \ +; RUN: FileCheck %s --check-prefix=AAPCS +; RUN: llc -mtriple=arm-linux-gnu -mattr=aapcs < %s | \ +; RUN: FileCheck %s --check-prefix=AAPCS + +; The stack is 8 byte aligned on AAPCS and 4 on APCS, so we should get a BIC +; only on APCS. + +define void @g() { +; APCS: sub sp, sp, #8 +; APCS: bic sp, sp, #7 + +; AAPCS: sub sp, sp, #8 +; AAPCS-NOT: bic + + %c = alloca i8, align 8 + call void @f(i8* %c) + ret void +} + +declare void @f(i8*) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll index 88d797e83648..3373455224e7 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -march=arm | FileCheck -check-prefix=ARM %s ; RUN: llc < %s -march=thumb | FileCheck -check-prefix=THUMB %s -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck -check-prefix=T2 %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 \ +; RUN: | FileCheck -check-prefix=T2 %s ; RUN: llc < %s -mtriple=thumbv8 | FileCheck -check-prefix=V8 %s ; FIXME: The -march=thumb test doesn't change if -disable-peephole is specified. diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/build-attributes.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/build-attributes.ll new file mode 100644 index 000000000000..faf89728a84b --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/build-attributes.ll @@ -0,0 +1,435 @@ +; This tests that MC/asm header conversion is smooth and that the +; build attributes are correct + +; RUN: llc < %s -mtriple=armv6-linux-gnueabi | FileCheck %s --check-prefix=V6 +; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi | FileCheck %s --check-prefix=V6M +; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s | FileCheck %s --check-prefix=ARM1156T2F-S +; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi | FileCheck %s --check-prefix=V7M +; RUN: llc < %s -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=V7 +; RUN: llc < %s -mtriple=armv8-linux-gnueabi | FileCheck %s --check-prefix=V8 +; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi | FileCheck %s --check-prefix=Vt8 +; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-neon,-crypto | FileCheck %s --check-prefix=V8-FPARMv8 +; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-fp-armv8,-crypto | FileCheck %s --check-prefix=V8-NEON +; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-crypto | FileCheck %s --check-prefix=V8-FPARMv8-NEON +; RUN: llc < %s -mtriple=armv8-linux-gnueabi | FileCheck %s --check-prefix=V8-FPARMv8-NEON-CRYPTO +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 | FileCheck %s --check-prefix=CORTEX-A5-DEFAULT +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-neon,+d16 | FileCheck %s --check-prefix=CORTEX-A5-NONEON +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-vfp2 | FileCheck %s --check-prefix=CORTEX-A5-NOFPU +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-A9-SOFT +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-A9-HARD +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 | FileCheck %s --check-prefix=CORTEX-A12-DEFAULT +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 -mattr=-vfp2 | FileCheck %s --check-prefix=CORTEX-A12-NOFPU +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9-mp | FileCheck %s --check-prefix=CORTEX-A9-MP +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 | FileCheck %s --check-prefix=CORTEX-A15 +; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=CORTEX-M0 +; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-M4-SOFT +; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-M4-HARD +; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 | FileCheck %s --check-prefix=CORTEX-R5 +; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 | FileCheck %s --check-prefix=CORTEX-A53 +; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a57 | FileCheck %s --check-prefix=CORTEX-A57 +; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 | FileCheck %s --check-prefix=CORTEX-A7-CHECK +; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2,-vfp3,-vfp4,-neon | FileCheck %s --check-prefix=CORTEX-A7-NOFPU +; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4 +; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,,+d16,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4 + +; V6: .eabi_attribute 6, 6 +; V6: .eabi_attribute 8, 1 +; V6: .eabi_attribute 24, 1 +; V6: .eabi_attribute 25, 1 +; V6-NOT: .eabi_attribute 27 +; V6-NOT: .eabi_attribute 28 +; V6-NOT: .eabi_attribute 36 +; V6-NOT: .eabi_attribute 42 +; V6-NOT: .eabi_attribute 68 + +; V6M: .eabi_attribute 6, 12 +; V6M: .eabi_attribute 7, 77 +; V6M: .eabi_attribute 8, 0 +; V6M: .eabi_attribute 9, 1 +; V6M: .eabi_attribute 24, 1 +; V6M: .eabi_attribute 25, 1 +; V6M-NOT: .eabi_attribute 27 +; V6M-NOT: .eabi_attribute 28 +; V6M-NOT: .eabi_attribute 36 +; V6M-NOT: .eabi_attribute 42 +; V6M-NOT: .eabi_attribute 68 + +; ARM1156T2F-S: .cpu arm1156t2f-s +; ARM1156T2F-S: .eabi_attribute 6, 8 +; ARM1156T2F-S: .eabi_attribute 8, 1 +; ARM1156T2F-S: .eabi_attribute 9, 2 +; ARM1156T2F-S: .fpu vfpv2 +; ARM1156T2F-S: .eabi_attribute 20, 1 +; ARM1156T2F-S: .eabi_attribute 21, 1 +; ARM1156T2F-S: .eabi_attribute 23, 3 +; ARM1156T2F-S: .eabi_attribute 24, 1 +; ARM1156T2F-S: .eabi_attribute 25, 1 +; ARM1156T2F-S-NOT: .eabi_attribute 27 +; ARM1156T2F-S-NOT: .eabi_attribute 28 +; ARM1156T2F-S-NOT: .eabi_attribute 36 +; ARM1156T2F-S-NOT: .eabi_attribute 42 +; ARM1156T2F-S-NOT: .eabi_attribute 68 + +; V7M: .eabi_attribute 6, 10 +; V7M: .eabi_attribute 7, 77 +; V7M: .eabi_attribute 8, 0 +; V7M: .eabi_attribute 9, 2 +; V7M: .eabi_attribute 24, 1 +; V7M: .eabi_attribute 25, 1 +; V7M-NOT: .eabi_attribute 27 +; V7M-NOT: .eabi_attribute 28 +; V7M-NOT: .eabi_attribute 36 +; V7M-NOT: .eabi_attribute 42 +; V7M: .eabi_attribute 44, 0 +; V7M-NOT: .eabi_attribute 68 + +; V7: .syntax unified +; V7: .eabi_attribute 6, 10 +; V7: .eabi_attribute 20, 1 +; V7: .eabi_attribute 21, 1 +; V7: .eabi_attribute 23, 3 +; V7: .eabi_attribute 24, 1 +; V7: .eabi_attribute 25, 1 +; V7-NOT: .eabi_attribute 27 +; V7-NOT: .eabi_attribute 28 +; V7-NOT: .eabi_attribute 36 +; V7-NOT: .eabi_attribute 42 +; V7-NOT: .eabi_attribute 68 + +; V8: .syntax unified +; V8: .eabi_attribute 6, 14 + +; Vt8: .syntax unified +; Vt8: .eabi_attribute 6, 14 + +; V8-FPARMv8: .syntax unified +; V8-FPARMv8: .eabi_attribute 6, 14 +; V8-FPARMv8: .fpu fp-armv8 + +; V8-NEON: .syntax unified +; V8-NEON: .eabi_attribute 6, 14 +; V8-NEON: .fpu neon +; V8-NEON: .eabi_attribute 12, 3 + +; V8-FPARMv8-NEON: .syntax unified +; V8-FPARMv8-NEON: .eabi_attribute 6, 14 +; V8-FPARMv8-NEON: .fpu neon-fp-armv8 +; V8-FPARMv8-NEON: .eabi_attribute 12, 3 + +; V8-FPARMv8-NEON-CRYPTO: .syntax unified +; V8-FPARMv8-NEON-CRYPTO: .eabi_attribute 6, 14 +; V8-FPARMv8-NEON-CRYPTO: .fpu crypto-neon-fp-armv8 +; V8-FPARMv8-NEON-CRYPTO: .eabi_attribute 12, 3 + +; Tag_CPU_arch 'ARMv7' +; CORTEX-A7-CHECK: .eabi_attribute 6, 10 +; CORTEX-A7-NOFPU: .eabi_attribute 6, 10 +; CORTEX-A7-FPUV4: .eabi_attribute 6, 10 + +; Tag_CPU_arch_profile 'A' +; CORTEX-A7-CHECK: .eabi_attribute 7, 65 +; CORTEX-A7-NOFPU: .eabi_attribute 7, 65 +; CORTEX-A7-FPUV4: .eabi_attribute 7, 65 + +; Tag_ARM_ISA_use +; CORTEX-A7-CHECK: .eabi_attribute 8, 1 +; CORTEX-A7-NOFPU: .eabi_attribute 8, 1 +; CORTEX-A7-FPUV4: .eabi_attribute 8, 1 + +; Tag_THUMB_ISA_use +; CORTEX-A7-CHECK: .eabi_attribute 9, 2 +; CORTEX-A7-NOFPU: .eabi_attribute 9, 2 +; CORTEX-A7-FPUV4: .eabi_attribute 9, 2 + +; CORTEX-A7-CHECK: .fpu neon-vfpv4 +; CORTEX-A7-NOFPU-NOT: .fpu +; CORTEX-A7-FPUV4: .fpu vfpv4 + +; Tag_ABI_FP_denormal +; CORTEX-A7-CHECK: .eabi_attribute 20, 1 +; CORTEX-A7-NOFPU: .eabi_attribute 20, 1 +; CORTEX-A7-FPUV4: .eabi_attribute 20, 1 + +; Tag_ABI_FP_exceptions +; CORTEX-A7-CHECK: .eabi_attribute 21, 1 +; CORTEX-A7-NOFPU: .eabi_attribute 21, 1 +; CORTEX-A7-FPUV4: .eabi_attribute 21, 1 + +; Tag_ABI_FP_number_model +; CORTEX-A7-CHECK: .eabi_attribute 23, 3 +; CORTEX-A7-NOFPU: .eabi_attribute 23, 3 +; CORTEX-A7-FPUV4: .eabi_attribute 23, 3 + +; Tag_ABI_align_needed +; CORTEX-A7-CHECK: .eabi_attribute 24, 1 +; CORTEX-A7-NOFPU: .eabi_attribute 24, 1 +; CORTEX-A7-FPUV4: .eabi_attribute 24, 1 + +; Tag_ABI_align8_preserved +; CORTEX-A7-CHECK: .eabi_attribute 25, 1 +; CORTEX-A7-NOFPU: .eabi_attribute 25, 1 +; CORTEX-A7-FPUV4: .eabi_attribute 25, 1 + +; Tag_FP_HP_extension +; CORTEX-A7-CHECK: .eabi_attribute 36, 1 +; CORTEX-A7-NOFPU: .eabi_attribute 36, 1 +; CORTEX-A7-FPUV4: .eabi_attribute 36, 1 + +; Tag_MPextension_use +; CORTEX-A7-CHECK: .eabi_attribute 42, 1 +; CORTEX-A7-NOFPU: .eabi_attribute 42, 1 +; CORTEX-A7-FPUV4: .eabi_attribute 42, 1 + +; Tag_DIV_use +; CORTEX-A7-CHECK: .eabi_attribute 44, 2 +; CORTEX-A7-NOFPU: .eabi_attribute 44, 2 +; CORTEX-A7-FPUV4: .eabi_attribute 44, 2 + +; Tag_Virtualization_use +; CORTEX-A7-CHECK: .eabi_attribute 68, 3 +; CORTEX-A7-NOFPU: .eabi_attribute 68, 3 +; CORTEX-A7-FPUV4: .eabi_attribute 68, 3 + +; CORTEX-A5-DEFAULT: .cpu cortex-a5 +; CORTEX-A5-DEFAULT: .eabi_attribute 6, 10 +; CORTEX-A5-DEFAULT: .eabi_attribute 7, 65 +; CORTEX-A5-DEFAULT: .eabi_attribute 8, 1 +; CORTEX-A5-DEFAULT: .eabi_attribute 9, 2 +; CORTEX-A5-DEFAULT: .fpu neon-vfpv4 +; CORTEX-A5-DEFAULT: .eabi_attribute 20, 1 +; CORTEX-A5-DEFAULT: .eabi_attribute 21, 1 +; CORTEX-A5-DEFAULT: .eabi_attribute 23, 3 +; CORTEX-A5-DEFAULT: .eabi_attribute 24, 1 +; CORTEX-A5-DEFAULT: .eabi_attribute 25, 1 +; CORTEX-A5-DEFAULT: .eabi_attribute 42, 1 +; CORTEX-A5-DEFAULT: .eabi_attribute 68, 1 + +; CORTEX-A5-NONEON: .cpu cortex-a5 +; CORTEX-A5-NONEON: .eabi_attribute 6, 10 +; CORTEX-A5-NONEON: .eabi_attribute 7, 65 +; CORTEX-A5-NONEON: .eabi_attribute 8, 1 +; CORTEX-A5-NONEON: .eabi_attribute 9, 2 +; CORTEX-A5-NONEON: .fpu vfpv4-d16 +; CORTEX-A5-NONEON: .eabi_attribute 20, 1 +; CORTEX-A5-NONEON: .eabi_attribute 21, 1 +; CORTEX-A5-NONEON: .eabi_attribute 23, 3 +; CORTEX-A5-NONEON: .eabi_attribute 24, 1 +; CORTEX-A5-NONEON: .eabi_attribute 25, 1 +; CORTEX-A5-NONEON: .eabi_attribute 42, 1 +; CORTEX-A5-NONEON: .eabi_attribute 68, 1 + +; CORTEX-A5-NOFPU: .cpu cortex-a5 +; CORTEX-A5-NOFPU: .eabi_attribute 6, 10 +; CORTEX-A5-NOFPU: .eabi_attribute 7, 65 +; CORTEX-A5-NOFPU: .eabi_attribute 8, 1 +; CORTEX-A5-NOFPU: .eabi_attribute 9, 2 +; CORTEX-A5-NOFPU-NOT: .fpu +; CORTEX-A5-NOFPU: .eabi_attribute 20, 1 +; CORTEX-A5-NOFPU: .eabi_attribute 21, 1 +; CORTEX-A5-NOFPU: .eabi_attribute 23, 3 +; CORTEX-A5-NOFPU: .eabi_attribute 24, 1 +; CORTEX-A5-NOFPU: .eabi_attribute 25, 1 +; CORTEX-A5-NOFPU: .eabi_attribute 42, 1 +; CORTEX-A5-NOFPU: .eabi_attribute 68, 1 + +; CORTEX-A9-SOFT: .cpu cortex-a9 +; CORTEX-A9-SOFT: .eabi_attribute 6, 10 +; CORTEX-A9-SOFT: .eabi_attribute 7, 65 +; CORTEX-A9-SOFT: .eabi_attribute 8, 1 +; CORTEX-A9-SOFT: .eabi_attribute 9, 2 +; CORTEX-A9-SOFT: .fpu neon +; CORTEX-A9-SOFT: .eabi_attribute 20, 1 +; CORTEX-A9-SOFT: .eabi_attribute 21, 1 +; CORTEX-A9-SOFT: .eabi_attribute 23, 3 +; CORTEX-A9-SOFT: .eabi_attribute 24, 1 +; CORTEX-A9-SOFT: .eabi_attribute 25, 1 +; CORTEX-A9-SOFT-NOT: .eabi_attribute 27 +; CORTEX-A9-SOFT-NOT: .eabi_attribute 28 +; CORTEX-A9-SOFT: .eabi_attribute 36, 1 +; CORTEX-A9-SOFT-NOT: .eabi_attribute 42 +; CORTEX-A9-SOFT: .eabi_attribute 68, 1 + +; CORTEX-A9-HARD: .cpu cortex-a9 +; CORTEX-A9-HARD: .eabi_attribute 6, 10 +; CORTEX-A9-HARD: .eabi_attribute 7, 65 +; CORTEX-A9-HARD: .eabi_attribute 8, 1 +; CORTEX-A9-HARD: .eabi_attribute 9, 2 +; CORTEX-A9-HARD: .fpu neon +; CORTEX-A9-HARD: .eabi_attribute 20, 1 +; CORTEX-A9-HARD: .eabi_attribute 21, 1 +; CORTEX-A9-HARD: .eabi_attribute 23, 3 +; CORTEX-A9-HARD: .eabi_attribute 24, 1 +; CORTEX-A9-HARD: .eabi_attribute 25, 1 +; CORTEX-A9-HARD-NOT: .eabi_attribute 27 +; CORTEX-A9-HARD: .eabi_attribute 28, 1 +; CORTEX-A9-HARD: .eabi_attribute 36, 1 +; CORTEX-A9-HARD-NOT: .eabi_attribute 42 +; CORTEX-A9-HARD: .eabi_attribute 68, 1 + +; CORTEX-A9-MP: .cpu cortex-a9-mp +; CORTEX-A9-MP: .eabi_attribute 6, 10 +; CORTEX-A9-MP: .eabi_attribute 7, 65 +; CORTEX-A9-MP: .eabi_attribute 8, 1 +; CORTEX-A9-MP: .eabi_attribute 9, 2 +; CORTEX-A9-MP: .fpu neon +; CORTEX-A9-MP: .eabi_attribute 20, 1 +; CORTEX-A9-MP: .eabi_attribute 21, 1 +; CORTEX-A9-MP: .eabi_attribute 23, 3 +; CORTEX-A9-MP: .eabi_attribute 24, 1 +; CORTEX-A9-MP: .eabi_attribute 25, 1 +; CORTEX-A9-NOT: .eabi_attribute 27 +; CORTEX-A9-NOT: .eabi_attribute 28 +; CORTEX-A9-MP: .eabi_attribute 36, 1 +; CORTEX-A9-MP: .eabi_attribute 42, 1 +; CORTEX-A9-MP: .eabi_attribute 68, 1 + +; CORTEX-A12-DEFAULT: .cpu cortex-a12 +; CORTEX-A12-DEFAULT: .eabi_attribute 6, 10 +; CORTEX-A12-DEFAULT: .eabi_attribute 7, 65 +; CORTEX-A12-DEFAULT: .eabi_attribute 8, 1 +; CORTEX-A12-DEFAULT: .eabi_attribute 9, 2 +; CORTEX-A12-DEFAULT: .fpu neon-vfpv4 +; CORTEX-A12-DEFAULT: .eabi_attribute 20, 1 +; CORTEX-A12-DEFAULT: .eabi_attribute 21, 1 +; CORTEX-A12-DEFAULT: .eabi_attribute 23, 3 +; CORTEX-A12-DEFAULT: .eabi_attribute 24, 1 +; CORTEX-A12-DEFAULT: .eabi_attribute 25, 1 +; CORTEX-A12-DEFAULT: .eabi_attribute 42, 1 +; CORTEX-A12-DEFAULT: .eabi_attribute 44, 2 +; CORTEX-A12-DEFAULT: .eabi_attribute 68, 3 + +; CORTEX-A12-NOFPU: .cpu cortex-a12 +; CORTEX-A12-NOFPU: .eabi_attribute 6, 10 +; CORTEX-A12-NOFPU: .eabi_attribute 7, 65 +; CORTEX-A12-NOFPU: .eabi_attribute 8, 1 +; CORTEX-A12-NOFPU: .eabi_attribute 9, 2 +; CORTEX-A12-NOFPU-NOT: .fpu +; CORTEX-A12-NOFPU: .eabi_attribute 20, 1 +; CORTEX-A12-NOFPU: .eabi_attribute 21, 1 +; CORTEX-A12-NOFPU: .eabi_attribute 23, 3 +; CORTEX-A12-NOFPU: .eabi_attribute 24, 1 +; CORTEX-A12-NOFPU: .eabi_attribute 25, 1 +; CORTEX-A12-NOFPU: .eabi_attribute 42, 1 +; CORTEX-A12-NOFPU: .eabi_attribute 44, 2 +; CORTEX-A12-NOFPU: .eabi_attribute 68, 3 + +; CORTEX-A15: .cpu cortex-a15 +; CORTEX-A15: .eabi_attribute 6, 10 +; CORTEX-A15: .eabi_attribute 7, 65 +; CORTEX-A15: .eabi_attribute 8, 1 +; CORTEX-A15: .eabi_attribute 9, 2 +; CORTEX-A15: .fpu neon-vfpv4 +; CORTEX-A15: .eabi_attribute 20, 1 +; CORTEX-A15: .eabi_attribute 21, 1 +; CORTEX-A15: .eabi_attribute 23, 3 +; CORTEX-A15: .eabi_attribute 24, 1 +; CORTEX-A15: .eabi_attribute 25, 1 +; CORTEX-A15-NOT: .eabi_attribute 27 +; CORTEX-A15-NOT: .eabi_attribute 28 +; CORTEX-A15: .eabi_attribute 36, 1 +; CORTEX-A15: .eabi_attribute 42, 1 +; CORTEX-A15: .eabi_attribute 44, 2 +; CORTEX-A15: .eabi_attribute 68, 3 + +; CORTEX-M0: .cpu cortex-m0 +; CORTEX-M0: .eabi_attribute 6, 12 +; CORTEX-M0: .eabi_attribute 7, 77 +; CORTEX-M0: .eabi_attribute 8, 0 +; CORTEX-M0: .eabi_attribute 9, 1 +; CORTEX-M0: .eabi_attribute 24, 1 +; CORTEX-M0: .eabi_attribute 25, 1 +; CORTEX-M0-NOT: .eabi_attribute 27 +; CORTEX-M0-NOT: .eabi_attribute 28 +; CORTEX-M0-NOT: .eabi_attribute 36 +; CORTEX-M0-NOT: .eabi_attribute 42 +; CORTEX-M0-NOT: .eabi_attribute 68 + +; CORTEX-M4-SOFT: .cpu cortex-m4 +; CORTEX-M4-SOFT: .eabi_attribute 6, 13 +; CORTEX-M4-SOFT: .eabi_attribute 7, 77 +; CORTEX-M4-SOFT: .eabi_attribute 8, 0 +; CORTEX-M4-SOFT: .eabi_attribute 9, 2 +; CORTEX-M4-SOFT: .fpu vfpv4-d16 +; CORTEX-M4-SOFT: .eabi_attribute 20, 1 +; CORTEX-M4-SOFT: .eabi_attribute 21, 1 +; CORTEX-M4-SOFT: .eabi_attribute 23, 3 +; CORTEX-M4-SOFT: .eabi_attribute 24, 1 +; CORTEX-M4-SOFT: .eabi_attribute 25, 1 +; CORTEX-M4-SOFT: .eabi_attribute 27, 1 +; CORTEX-M4-SOFT-NOT: .eabi_attribute 28 +; CORTEX-M4-SOFT: .eabi_attribute 36, 1 +; CORTEX-M4-SOFT-NOT: .eabi_attribute 42 +; CORTEX-M4-SOFT: .eabi_attribute 44, 0 +; CORTEX-M4-SOFT-NOT: .eabi_attribute 68 + +; CORTEX-M4-HARD: .cpu cortex-m4 +; CORTEX-M4-HARD: .eabi_attribute 6, 13 +; CORTEX-M4-HARD: .eabi_attribute 7, 77 +; CORTEX-M4-HARD: .eabi_attribute 8, 0 +; CORTEX-M4-HARD: .eabi_attribute 9, 2 +; CORTEX-M4-HARD: .fpu vfpv4-d16 +; CORTEX-M4-HARD: .eabi_attribute 20, 1 +; CORTEX-M4-HARD: .eabi_attribute 21, 1 +; CORTEX-M4-HARD: .eabi_attribute 23, 3 +; CORTEX-M4-HARD: .eabi_attribute 24, 1 +; CORTEX-M4-HARD: .eabi_attribute 25, 1 +; CORTEX-M4-HARD: .eabi_attribute 27, 1 +; CORTEX-M4-HARD: .eabi_attribute 28, 1 +; CORTEX-M4-HARD: .eabi_attribute 36, 1 +; CORTEX-M4-HARD-NOT: .eabi_attribute 42 +; CORTEX-M4-HARD: .eabi_attribute 44, 0 +; CORTEX-M4-HRAD-NOT: .eabi_attribute 68 + +; CORTEX-R5: .cpu cortex-r5 +; CORTEX-R5: .eabi_attribute 6, 10 +; CORTEX-R5: .eabi_attribute 7, 82 +; CORTEX-R5: .eabi_attribute 8, 1 +; CORTEX-R5: .eabi_attribute 9, 2 +; CORTEX-R5: .fpu vfpv3-d16 +; CORTEX-R5: .eabi_attribute 20, 1 +; CORTEX-R5: .eabi_attribute 21, 1 +; CORTEX-R5: .eabi_attribute 23, 3 +; CORTEX-R5: .eabi_attribute 24, 1 +; CORTEX-R5: .eabi_attribute 25, 1 +; CORTEX-R5: .eabi_attribute 27, 1 +; CORTEX-R5-NOT: .eabi_attribute 28 +; CORTEX-R5-NOT: .eabi_attribute 36 +; CORTEX-R5-NOT: .eabi_attribute 42 +; CORTEX-R5: .eabi_attribute 44, 2 +; CORTEX-R5-NOT: .eabi_attribute 68 + +; CORTEX-A53: .cpu cortex-a53 +; CORTEX-A53: .eabi_attribute 6, 14 +; CORTEX-A53: .eabi_attribute 7, 65 +; CORTEX-A53: .eabi_attribute 8, 1 +; CORTEX-A53: .eabi_attribute 9, 2 +; CORTEX-A53: .fpu crypto-neon-fp-armv8 +; CORTEX-A53: .eabi_attribute 12, 3 +; CORTEX-A53: .eabi_attribute 24, 1 +; CORTEX-A53: .eabi_attribute 25, 1 +; CORTEX-A53-NOT: .eabi_attribute 27 +; CORTEX-A53-NOT: .eabi_attribute 28 +; CORTEX-A53: .eabi_attribute 36, 1 +; CORTEX-A53: .eabi_attribute 42, 1 +; CORTEX-A53: .eabi_attribute 44, 2 +; CORTEX-A53: .eabi_attribute 68, 3 + +; CORTEX-A57: .cpu cortex-a57 +; CORTEX-A57: .eabi_attribute 6, 14 +; CORTEX-A57: .eabi_attribute 7, 65 +; CORTEX-A57: .eabi_attribute 8, 1 +; CORTEX-A57: .eabi_attribute 9, 2 +; CORTEX-A57: .fpu crypto-neon-fp-armv8 +; CORTEX-A57: .eabi_attribute 12, 3 +; CORTEX-A57: .eabi_attribute 24, 1 +; CORTEX-A57: .eabi_attribute 25, 1 +; CORTEX-A57-NOT: .eabi_attribute 27 +; CORTEX-A57-NOT: .eabi_attribute 28 +; CORTEX-A57: .eabi_attribute 36, 1 +; CORTEX-A57: .eabi_attribute 42, 1 +; CORTEX-A57: .eabi_attribute 44, 2 +; CORTEX-A57: .eabi_attribute 68, 3 + +define i32 @f(i64 %z) { + ret i32 0 +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/coalesce-dbgvalue.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/coalesce-dbgvalue.ll index f125e7722259..86106a045201 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/coalesce-dbgvalue.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/coalesce-dbgvalue.ll @@ -77,6 +77,7 @@ attributes #2 = { nounwind readnone } attributes #3 = { nounwind } !llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!33} !0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 182024) (llvm/trunk 182023)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !15, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/d/b/pr16110.c] [DW_LANG_C99] !1 = metadata !{metadata !"pr16110.c", metadata !"/d/b"} @@ -107,3 +108,4 @@ attributes #3 = { nounwind } !29 = metadata !{i32 14, i32 0, metadata !12, null} !31 = metadata !{i32 16, i32 0, metadata !4, null} !32 = metadata !{i32 18, i32 0, metadata !4, null} +!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/cse-ldrlit.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/cse-ldrlit.ll new file mode 100644 index 000000000000..c59b4c06f0ec --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/cse-ldrlit.ll @@ -0,0 +1,61 @@ +; RUN: llc -mtriple=thumbv6m-apple-darwin-eabi -relocation-model=pic -o - %s | FileCheck %s --check-prefix=CHECK-THUMB-PIC +; RUN: llc -mtriple=arm-apple-darwin-eabi -relocation-model=pic -o - %s | FileCheck %s --check-prefix=CHECK-ARM-PIC +; RUN: llc -mtriple=thumbv6m-apple-darwin-eabi -relocation-model=dynamic-no-pic -o - %s | FileCheck %s --check-prefix=CHECK-DYNAMIC +; RUN: llc -mtriple=arm-apple-darwin-eabi -relocation-model=dynamic-no-pic -o - %s | FileCheck %s --check-prefix=CHECK-DYNAMIC +; RUN: llc -mtriple=thumbv6m-apple-darwin-eabi -relocation-model=static -o - %s | FileCheck %s --check-prefix=CHECK-STATIC +; RUN: llc -mtriple=arm-apple-darwin-eabi -relocation-model=static -o - %s | FileCheck %s --check-prefix=CHECK-STATIC +@var = global [16 x i32] zeroinitializer + +declare void @bar(i32*) + +define void @foo() { + %flag = load i32* getelementptr inbounds([16 x i32]* @var, i32 0, i32 1) + %tst = icmp eq i32 %flag, 0 + br i1 %tst, label %true, label %false +true: + tail call void @bar(i32* getelementptr inbounds([16 x i32]* @var, i32 0, i32 4)) + ret void +false: + ret void +} + +; CHECK-THUMB-PIC-LABEL: foo: +; CHECK-THUMB-PIC: ldr r0, LCPI0_0 +; CHECK-THUMB-PIC: LPC0_0: +; CHECK-THUMB-PIC-NEXT: add r0, pc +; CHECK-THUMB-PIC: ldr {{r[1-9][0-9]?}}, [r0, #4] + +; CHECK-THUMB-PIC: LCPI0_0: +; CHECK-THUMB-PIC-NEXT: .long _var-(LPC0_0+4) +; CHECK-THUMB-PIC-NOT: LCPI0_1 + + +; CHECK-ARM-PIC-LABEL: foo: +; CHECK-ARM-PIC: ldr [[VAR_OFFSET:r[0-9]+]], LCPI0_0 +; CHECK-ARM-PIC: LPC0_0: +; CHECK-ARM-PIC-NEXT: ldr r0, [pc, [[VAR_OFFSET]]] +; CHECK-ARM-PIC: ldr {{r[1-9][0-9]?}}, [r0, #4] + +; CHECK-ARM-PIC: LCPI0_0: +; CHECK-ARM-PIC-NEXT: .long _var-(LPC0_0+8) +; CHECK-ARM-PIC-NOT: LCPI0_1 + + +; CHECK-DYNAMIC-LABEL: foo: +; CHECK-DYNAMIC: ldr r0, LCPI0_0 +; CHECK-DYNAMIC: ldr {{r[1-9][0-9]?}}, [r0, #4] + +; CHECK-DYNAMIC: LCPI0_0: +; CHECK-DYNAMIC-NEXT: .long _var +; CHECK-DYNAMIC-NOT: LCPI0_1 + + +; CHECK-STATIC-LABEL: foo: +; CHECK-STATIC: ldr r0, LCPI0_0 +; CHECK-STATIC: ldr {{r[1-9][0-9]?}}, [r0, #4] + +; CHECK-STATIC: LCPI0_0: +; CHECK-STATIC-NEXT: .long _var{{$}} +; CHECK-STATIC-NOT: LCPI0_1 + + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-arg.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-arg.ll index e4aad14d1781..e8bf3ba9d61f 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-arg.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-arg.ll @@ -30,6 +30,7 @@ declare void @foobar(i64, i64) declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!33} !0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !30, null, null, null} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 786478, metadata !2, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, null, null, metadata !31, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [foo] @@ -64,3 +65,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !30 = metadata !{metadata !1} !31 = metadata !{metadata !5, metadata !13, metadata !14, metadata !17, metadata !18, metadata!19} !32 = metadata !{metadata !"one.c", metadata !"/Volumes/Athwagate/R10048772"} +!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-blocks.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-blocks.ll index 6e2c0721f472..6cbe4b4727cd 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-blocks.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-blocks.ll @@ -93,6 +93,7 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load } !llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!162} !0 = metadata !{i32 786449, metadata !153, i32 16, metadata !"Apple clang version 2.1", i1 false, metadata !"", i32 2, metadata !147, metadata !26, metadata !148, null, null, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 786436, metadata !160, metadata !0, metadata !"", i32 248, i64 32, i64 32, i32 0, i32 0, null, metadata !3, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 248, size 32, align 32, offset 0] [def] [from ] @@ -256,3 +257,4 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load !159 = metadata !{metadata !"header15.h", metadata !"/Volumes/Sandbox/llvm"} !160 = metadata !{metadata !"header.h", metadata !"/Volumes/Sandbox/llvm"} !161 = metadata !{metadata !"header2.h", metadata !"/Volumes/Sandbox/llvm"} +!162 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-branch-folding.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-branch-folding.ll index c77c13889a32..8505f5365567 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-branch-folding.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-branch-folding.ll @@ -38,6 +38,8 @@ declare i32 @printf(i8* nocapture, ...) nounwind declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone +!llvm.module.flags = !{!56} + !0 = metadata !{i32 786478, metadata !54, null, metadata !"test0001", metadata !"test0001", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 0} ; [ DW_TAG_subprogram ] !1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ] !2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, metadata !17, metadata !17, metadata !50, null, null, null} ; [ DW_TAG_compile_unit ] @@ -94,3 +96,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !53 = metadata !{metadata !30} !54 = metadata !{metadata !"build2.c", metadata !"/private/tmp"} !55 = metadata !{metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp"} +!56 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-d16-reg.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-d16-reg.ll index e68688974a8e..30a3e2dcdc2c 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-d16-reg.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-d16-reg.ll @@ -57,6 +57,7 @@ entry: declare i32 @puts(i8* nocapture) nounwind !llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!48} !0 = metadata !{i32 786478, metadata !46, metadata !1, metadata !"printer", metadata !"printer", metadata !"printer", i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @printer, null, null, metadata !43, i32 12} ; [ DW_TAG_subprogram ] !1 = metadata !{i32 786473, metadata !46} ; [ DW_TAG_file_type ] @@ -106,3 +107,4 @@ declare i32 @puts(i8* nocapture) nounwind !45 = metadata !{metadata !22, metadata !23, metadata !24} !46 = metadata !{metadata !"a.c", metadata !"/tmp/"} !47 = metadata !{i32 0} +!48 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-qreg.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-qreg.ll index 1ad71ab2486b..ee515fd55c81 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-qreg.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-qreg.ll @@ -36,6 +36,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!56} !0 = metadata !{i32 786478, metadata !54, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [test0001] !1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ] @@ -93,3 +94,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !53 = metadata !{metadata !30} !54 = metadata !{metadata !"build2.c", metadata !"/private/tmp"} !55 = metadata !{metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp"} +!56 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-s16-reg.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-s16-reg.ll index 9d5907c50324..e92d9776db8c 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-s16-reg.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-s16-reg.ll @@ -62,6 +62,7 @@ declare i32 @puts(i8* nocapture) nounwind optsize declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!53} !0 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null, null, metadata !48, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [inlineprinter] !1 = metadata !{i32 786473, metadata !51} ; [ DW_TAG_file_type ] @@ -116,3 +117,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !50 = metadata !{metadata !17, metadata !18, metadata !22} !51 = metadata !{metadata !"a.c", metadata !"/private/tmp"} !52 = metadata !{i32 0} +!53 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-sreg2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-sreg2.ll index c1646430d1ec..854fcabbae87 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-sreg2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/debug-info-sreg2.ll @@ -40,6 +40,7 @@ declare float @_Z2f3f(float) optsize declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!20} !0 = metadata !{i32 786449, metadata !18, i32 4, metadata !"clang version 3.0 (trunk 130845)", i1 true, metadata !"", i32 0, metadata !19, metadata !19, metadata !16, null, null, null} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @_Z3foov, null, null, metadata !17, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [foo] @@ -61,3 +62,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !17 = metadata !{metadata !5, metadata !8} !18 = metadata !{metadata !"k.cc", metadata !"/private/tmp"} !19 = metadata !{i32 0} +!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/default-float-abi.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/default-float-abi.ll new file mode 100644 index 000000000000..1b26bbdd9259 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/default-float-abi.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s --check-prefix=CHECK-HARD +; RUN: llc -mtriple=armv7-linux-eabihf %s -o - | FileCheck %s --check-prefix=CHECK-HARD +; RUN: llc -mtriple=armv7-linux-gnueabihf -float-abi=soft %s -o - | FileCheck %s --check-prefix=CHECK-SOFT +; RUN: llc -mtriple=armv7-linux-gnueabi %s -o - | FileCheck %s --check-prefix=CHECK-SOFT +; RUN: llc -mtriple=armv7-linux-eabi -float-abi=hard %s -o - | FileCheck %s --check-prefix=CHECK-HARD +; RUN: llc -mtriple=thumbv7-apple-ios6.0 %s -o - | FileCheck %s --check-prefix=CHECK-SOFT + +define float @test_abi(float %lhs, float %rhs) { + %sum = fadd float %lhs, %rhs + ret float %sum + +; CHECK-HARD-LABEL: test_abi: +; CHECK-HARD-NOT: vmov +; CHECK-HARD: vadd.f32 s0, s0, s1 +; CHECK-HARD-NOT: vmov + +; CHECK-SOFT-LABEL: test_abi: +; CHECK-SOFT-DAG: vmov [[LHS:s[0-9]+]], r0 +; CHECK-SOFT-DAG: vmov [[RHS:s[0-9]+]], r1 +; CHECK-SOFT: vadd.f32 [[DEST:s[0-9]+]], [[LHS]], [[RHS]] +; CHECK-SOFT: vmov r0, [[DEST]] +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/divmod-eabi.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/divmod-eabi.ll index 404cae0da2b2..64d093e768b5 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/divmod-eabi.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/divmod-eabi.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple armv7-none-eabi %s -o - | FileCheck %s --check-prefix=EABI +; RUN: llc -mtriple armv7-none-eabihf %s -o - | FileCheck %s --check-prefix=EABI ; RUN: llc -mtriple armv7-linux-gnueabi %s -o - | FileCheck %s --check-prefix=GNU ; RUN: llc -mtriple armv7-apple-darwin %s -o - | FileCheck %s --check-prefix=DARWIN diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fast-isel-call.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fast-isel-call.ll index 917a15d28bd7..2d7378e47f2f 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fast-isel-call.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fast-isel-call.ll @@ -8,8 +8,6 @@ ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP -; XFAIL: vg_leak - ; Note that some of these tests assume that relocations are either ; movw/movt or constant pool loads. Different platforms will select ; different approaches. diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll index b08b72baa61e..089209e45fc3 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll @@ -5,8 +5,6 @@ ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -arm-long-calls -verify-machineinstrs | FileCheck %s --check-prefix=ARM-LONG ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-LONG -; XFAIL: vg_leak - ; Note that some of these tests assume that relocations are either ; movw/movt or constant pool loads. Different platforms will select ; different approaches. @@ -15,7 +13,7 @@ @temp = common global [60 x i8] zeroinitializer, align 1 define void @t1() nounwind ssp { -; ARM: t1 +; ARM-LABEL: t1: ; ARM: {{(movw r0, :lower16:_?message1)|(ldr r0, .LCPI)}} ; ARM: {{(movt r0, :upper16:_?message1)|(ldr r0, \[r0\])}} ; ARM: add r0, r0, #5 @@ -23,12 +21,12 @@ define void @t1() nounwind ssp { ; ARM: movw r2, #10 ; ARM: and r1, r1, #255 ; ARM: bl {{_?}}memset -; ARM-LONG: t1 +; ARM-LONG-LABEL: t1: ; ARM-LONG: {{(movw r3, :lower16:L_memset\$non_lazy_ptr)|(ldr r3, .LCPI)}} ; ARM-LONG: {{(movt r3, :upper16:L_memset\$non_lazy_ptr)?}} ; ARM-LONG: ldr r3, [r3] ; ARM-LONG: blx r3 -; THUMB: t1 +; THUMB-LABEL: t1: ; THUMB: {{(movw r0, :lower16:_?message1)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:_?message1)|(ldr r0, \[r0\])}} ; THUMB: adds r0, #5 @@ -38,7 +36,7 @@ define void @t1() nounwind ssp { ; THUMB: movt r2, #0 ; THUMB: and r1, r1, #255 ; THUMB: bl {{_?}}memset -; THUMB-LONG: t1 +; THUMB-LONG-LABEL: t1: ; THUMB-LONG: movw r3, :lower16:L_memset$non_lazy_ptr ; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr ; THUMB-LONG: ldr r3, [r3] @@ -50,7 +48,7 @@ define void @t1() nounwind ssp { declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind define void @t2() nounwind ssp { -; ARM: t2 +; ARM-LABEL: t2: ; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; ARM: ldr r0, [r0] @@ -61,12 +59,12 @@ define void @t2() nounwind ssp { ; ARM: mov r0, r1 ; ARM: ldr r1, [sp[[SLOT]]] @ 4-byte Reload ; ARM: bl {{_?}}memcpy -; ARM-LONG: t2 +; ARM-LONG-LABEL: t2: ; ARM-LONG: {{(movw r3, :lower16:L_memcpy\$non_lazy_ptr)|(ldr r3, .LCPI)}} ; ARM-LONG: {{(movt r3, :upper16:L_memcpy\$non_lazy_ptr)?}} ; ARM-LONG: ldr r3, [r3] ; ARM-LONG: blx r3 -; THUMB: t2 +; THUMB-LABEL: t2: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; THUMB: ldr r0, [r0] @@ -78,7 +76,7 @@ define void @t2() nounwind ssp { ; THUMB: mov r0, r1 ; THUMB: ldr r1, [sp[[SLOT]]] @ 4-byte Reload ; THUMB: bl {{_?}}memcpy -; THUMB-LONG: t2 +; THUMB-LONG-LABEL: t2: ; THUMB-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr ; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr ; THUMB-LONG: ldr r3, [r3] @@ -90,7 +88,7 @@ define void @t2() nounwind ssp { declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind define void @t3() nounwind ssp { -; ARM: t3 +; ARM-LABEL: t3: ; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; ARM: ldr r0, [r0] @@ -99,12 +97,12 @@ define void @t3() nounwind ssp { ; ARM: movw r2, #10 ; ARM: mov r0, r1 ; ARM: bl {{_?}}memmove -; ARM-LONG: t3 +; ARM-LONG-LABEL: t3: ; ARM-LONG: {{(movw r3, :lower16:L_memmove\$non_lazy_ptr)|(ldr r3, .LCPI)}} ; ARM-LONG: {{(movt r3, :upper16:L_memmove\$non_lazy_ptr)?}} ; ARM-LONG: ldr r3, [r3] ; ARM-LONG: blx r3 -; THUMB: t3 +; THUMB-LABEL: t3: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; THUMB: ldr r0, [r0] @@ -116,7 +114,7 @@ define void @t3() nounwind ssp { ; THUMB: mov r0, r1 ; THUMB: ldr r1, [sp[[SLOT]]] @ 4-byte Reload ; THUMB: bl {{_?}}memmove -; THUMB-LONG: t3 +; THUMB-LONG-LABEL: t3: ; THUMB-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr ; THUMB-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr ; THUMB-LONG: ldr r3, [r3] @@ -126,7 +124,7 @@ define void @t3() nounwind ssp { } define void @t4() nounwind ssp { -; ARM: t4 +; ARM-LABEL: t4: ; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; ARM: ldr r0, [r0] @@ -137,7 +135,7 @@ define void @t4() nounwind ssp { ; ARM: ldrh r1, [r0, #24] ; ARM: strh r1, [r0, #12] ; ARM: bx lr -; THUMB: t4 +; THUMB-LABEL: t4: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; THUMB: ldr r0, [r0] @@ -155,7 +153,7 @@ define void @t4() nounwind ssp { declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind define void @t5() nounwind ssp { -; ARM: t5 +; ARM-LABEL: t5: ; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; ARM: ldr r0, [r0] @@ -170,7 +168,7 @@ define void @t5() nounwind ssp { ; ARM: ldrh r1, [r0, #24] ; ARM: strh r1, [r0, #12] ; ARM: bx lr -; THUMB: t5 +; THUMB-LABEL: t5: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; THUMB: ldr r0, [r0] @@ -190,7 +188,7 @@ define void @t5() nounwind ssp { } define void @t6() nounwind ssp { -; ARM: t6 +; ARM-LABEL: t6: ; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; ARM: ldr r0, [r0] @@ -215,7 +213,7 @@ define void @t6() nounwind ssp { ; ARM: ldrb r1, [r0, #25] ; ARM: strb r1, [r0, #13] ; ARM: bx lr -; THUMB: t6 +; THUMB-LABEL: t6: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} ; THUMB: ldr r0, [r0] @@ -253,9 +251,9 @@ define void @t7() nounwind ssp { define i32 @t8(i32 %x) nounwind { entry: -; ARM: t8 +; ARM-LABEL: t8: ; ARM-NOT: FastISel missed call: %expval = call i32 @llvm.expect.i32(i32 %x, i32 1) -; THUMB: t8 +; THUMB-LABEL: t8: ; THUMB-NOT: FastISel missed call: %expval = call i32 @llvm.expect.i32(i32 %x, i32 1) %expval = call i32 @llvm.expect.i32(i32 %x, i32 1) ret i32 %expval diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fast-isel-static.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fast-isel-static.ll index 93c14a09205e..9bd0a51e7120 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fast-isel-static.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fast-isel-static.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=CHECK-LONG %s -; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=CHECK-LONG %s -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s -; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s +; RUN: llc < %s -mtriple=thumbv7-apple-ios -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=CHECK-LONG %s +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=CHECK-LONG %s +; RUN: llc < %s -mtriple=thumbv7-apple-ios -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s define void @myadd(float* %sum, float* %addend) nounwind { entry: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fold-stack-adjust.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fold-stack-adjust.ll index 8bda7683f902..67fd129fd1c9 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fold-stack-adjust.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fold-stack-adjust.ll @@ -124,3 +124,41 @@ define arm_aapcs_vfpcc double @check_vfp_no_return_clobber() minsize { ret double 1.0 } + +@dbl = global double 0.0 + +; PR18136: there was a bug determining where the first eligible pop in a +; basic-block was when the entire block was epilogue code. +define void @test_fold_point(i1 %tst) minsize { +; CHECK-LABEL: test_fold_point: + + ; Important to check for beginning of basic block, because if it gets + ; if-converted the test is probably no longer checking what it should. +; CHECK: {{LBB[0-9]+_2}}: +; CHECK-NEXT: vpop {d7, d8} +; CHECK-NEXT: pop {r4, pc} + + ; With a guaranteed frame-pointer, we want to make sure that its offset in the + ; push block is correct, even if a few registers have been tacked onto a later + ; vpush (PR18160). +; CHECK-IOS-LABEL: test_fold_point: +; CHECK-IOS: push {r4, r7, lr} +; CHECK-IOS-NEXT: add r7, sp, #4 +; CHECK-IOS-NEXT: vpush {d7, d8} + + ; We want some memory so there's a stack adjustment to fold... + %var = alloca i8, i32 8 + + ; We want a long-lived floating register so that a callee-saved dN is used and + ; there's both a vpop and a pop. + %live_val = load double* @dbl + br i1 %tst, label %true, label %end +true: + call void @bar(i8* %var) + store double %live_val, double* @dbl + br label %end +end: + ; We want the epilogue to be the only thing in a basic block so that we hit + ; the correct edge-case (first inst in block is correct one to adjust). + ret void +} \ No newline at end of file diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fp.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fp.ll index fbf3a4a56ad5..888cfa470a33 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fp.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fp.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s +; RUN: llc < %s -march=arm -float-abi=soft -mattr=+vfp2 | FileCheck %s define float @f(i32 %a) { ;CHECK-LABEL: f: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fpmem.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fpmem.ll index 8fbd1d805840..966ffee053f4 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fpmem.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/fpmem.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s +; RUN: llc < %s -march=arm -float-abi=soft -mattr=+vfp2 | FileCheck %s define float @f1(float %a) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/indirectbr.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/indirectbr.ll index 1aeeb916e489..7c49cb310f39 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/indirectbr.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/indirectbr.ll @@ -11,6 +11,11 @@ define internal i32 @foo(i32 %i) nounwind { ; THUMB-LABEL: foo: ; THUMB2-LABEL: foo: entry: + ; _nextaddr gets CSEed for use later on. +; THUMB: ldr r[[NEXTADDR_REG:[0-9]+]], [[NEXTADDR_CPI:LCPI0_[0-9]+]] +; THUMB: [[NEXTADDR_PCBASE:LPC0_[0-9]]]: +; THUMB: add r[[NEXTADDR_REG]], pc + %0 = load i8** @nextaddr, align 4 ; [#uses=2] %1 = icmp eq i8* %0, null ; [#uses=1] ; indirect branch gets duplicated here @@ -53,12 +58,11 @@ L1: ; preds = %L2, %bb2 ; ARM: ldr [[R1:r[0-9]+]], LCPI ; ARM: add [[R1b:r[0-9]+]], pc, [[R1]] ; ARM: str [[R1b]] + ; THUMB-LABEL: %L1 -; THUMB: ldr -; THUMB: add ; THUMB: ldr [[R2:r[0-9]+]], LCPI ; THUMB: add [[R2]], pc -; THUMB: str [[R2]] +; THUMB: str [[R2]], [r[[NEXTADDR_REG]]] ; THUMB2-LABEL: %L1 ; THUMB2: ldr [[R2:r[0-9]+]], LCPI ; THUMB2-NEXT: str{{(.w)?}} [[R2]] @@ -67,4 +71,5 @@ L1: ; preds = %L2, %bb2 } ; ARM: .long Ltmp0-(LPC{{.*}}+8) ; THUMB: .long Ltmp0-(LPC{{.*}}+4) +; THUMB: .long _nextaddr-([[NEXTADDR_PCBASE]]+4) ; THUMB2: .long Ltmp0 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/inlineasm3.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/inlineasm3.ll index 390a44e375b9..5ee3247209a7 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/inlineasm3.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/inlineasm3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon,+v6t2 | FileCheck %s +; RUN: llc < %s -march=arm -float-abi=soft -mattr=+neon,+v6t2 | FileCheck %s ; Radar 7449043 %struct.int32x4_t = type { <4 x i32> } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/machine-licm.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/machine-licm.ll index fc9b22614d6d..ca6550178f92 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/machine-licm.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/machine-licm.ll @@ -5,20 +5,12 @@ ; rdar://7354376 ; rdar://8887598 -; The generated code is no where near ideal. It's not recognizing the two -; constantpool entries being loaded can be merged into one. - @GV = external global i32 ; [#uses=2] define void @t(i32* nocapture %vals, i32 %c) nounwind { entry: ; ARM-LABEL: t: ; ARM: ldr [[REGISTER_1:r[0-9]+]], LCPI0_0 -; Unfortunately currently ARM codegen doesn't cse the ldr from constantpool. -; The issue is it can be read by an "add pc" or a "ldr [pc]" so it's messy -; to add the pseudo instructions to make sure they are CSE'ed at the same -; time as the "ldr cp". -; ARM: ldr r{{[0-9]+}}, LCPI0_1 ; ARM: LPC0_0: ; ARM: ldr r{{[0-9]+}}, [pc, [[REGISTER_1]]] ; ARM: ldr r{{[0-9]+}}, [r{{[0-9]+}}] @@ -36,7 +28,7 @@ entry: bb.nph: ; preds = %entry ; ARM: LCPI0_0: -; ARM: LCPI0_1: +; ARM-NOT: LCPI0_1: ; ARM: .section ; THUMB: BB#1 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/memfunc.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/memfunc.ll index fe0056c42a11..a724080b74ca 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/memfunc.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/memfunc.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -mtriple=armv7-apple-ios -o - | FileCheck %s ; RUN: llc < %s -mtriple=thumbv7m-darwin-eabi -o - | FileCheck %s --check-prefix=DARWIN ; RUN: llc < %s -mtriple=arm-none-eabi -o - | FileCheck --check-prefix=EABI %s +; RUN: llc < %s -mtriple=arm-none-eabihf -o - | FileCheck --check-prefix=EABI %s @from = common global [500 x i32] zeroinitializer, align 4 @to = common global [500 x i32] zeroinitializer, align 4 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/minsize-litpools.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/minsize-litpools.ll new file mode 100644 index 000000000000..d5cd2a9b72e1 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/minsize-litpools.ll @@ -0,0 +1,26 @@ +; RUN: llc -mtriple=thumbv7s %s -o - | FileCheck %s +; RUN: llc -mtriple=armv7s %s -o - | FileCheck %s + +; CodeGen should be able to set and reset the MinSize subtarget-feature, and +; make use of it in deciding whether to use MOVW/MOVT for global variables or a +; lit-pool load (saving roughly 2 bytes of code). + +@var = global i32 0 + +define i32 @small_global() minsize { +; CHECK-LABEL: small_global: +; CHECK: ldr r[[GLOBDEST:[0-9]+]], {{.?LCPI0_0}} +; CHECK: ldr r0, [r[[GLOBDEST]]] + + %val = load i32* @var + ret i32 %val +} + +define i32 @big_global() { +; CHECK-LABEL: big_global: +; CHECK: movw [[GLOBDEST:r[0-9]+]], :lower16:var +; CHECK: movt [[GLOBDEST]], :upper16:var + + %val = load i32* @var + ret i32 %val +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/movt-movw-global.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/movt-movw-global.ll index bbedea19d780..1e10af181f30 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/movt-movw-global.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/movt-movw-global.ll @@ -16,8 +16,8 @@ entry: ; IOS-PIC: movw r0, :lower16:(L_foo$non_lazy_ptr-(LPC0_0+8)) ; IOS-PIC-NEXT: movt r0, :upper16:(L_foo$non_lazy_ptr-(LPC0_0+8)) -; IOS-STATIC-NOT: movw r0, :lower16:_foo -; IOS-STATIC-NOT: movt r0, :upper16:_foo +; IOS-STATIC: movw r0, :lower16:_foo +; IOS-STATIC-NEXT: movt r0, :upper16:_foo ret i32* @foo } @@ -32,8 +32,8 @@ entry: ; IOS-PIC: movw r1, :lower16:(L_foo$non_lazy_ptr-(LPC1_0+8)) ; IOS-PIC-NEXT: movt r1, :upper16:(L_foo$non_lazy_ptr-(LPC1_0+8)) -; IOS-STATIC-NOT: movw r1, :lower16:_foo -; IOS-STATIC-NOT: movt r1, :upper16:_foo +; IOS-STATIC: movw r1, :lower16:_foo +; IOS-STATIC-NEXT: movt r1, :upper16:_foo store i32 %baz, i32* @foo, align 4 ret void } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/movt.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/movt.ll index 25c1bfe32044..735d949f755c 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/movt.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/movt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=arm -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; rdar://7317664 define i32 @t(i32 %X) nounwind { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/neon_ld1.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/neon_ld1.ll index b892d2db67d6..7172a4dfe1a9 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/neon_ld1.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/neon_ld1.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -float-abi=soft -mattr=+neon | FileCheck %s ; CHECK: t1 ; CHECK: vldr d diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/neon_ld2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/neon_ld2.ll index 25a670b09778..8d215a6e3cdf 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/neon_ld2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/neon_ld2.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s --check-prefix=SWIFT +; RUN: llc < %s -march=arm -float-abi=soft -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -float-abi=soft -mcpu=swift | FileCheck %s --check-prefix=SWIFT ; CHECK: t1 ; CHECK: vld1.64 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/saxpy10-a9.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/saxpy10-a9.ll new file mode 100644 index 000000000000..f8f5e18fcf5e --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/saxpy10-a9.ll @@ -0,0 +1,135 @@ +; RUN: llc < %s -march=arm -mtriple=thumbv7-apple-ios7.0.0 -float-abi=hard -mcpu=cortex-a9 -misched-postra -misched-bench -scheditins=false | FileCheck %s +; +; Test MI-Sched suppory latency based stalls on in in-order pipeline +; using the new machine model. + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" + +; Don't be too strict with the top of the schedule, but most of it +; should be nicely pipelined. +; +; CHECK: saxpy10: +; CHECK: vldr +; CHECK: vldr +; CHECK: vldr +; CHECK: vldr +; CHECK: vldr +; CHECK: vldr +; CHECK-NEXT: vadd +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vldr +; CHECK-NEXT: vldr +; CHECK-NEXT: vadd +; CHECK-NEXT: vmul +; CHECK-NEXT: vldr +; CHECK-NEXT: vadd +; CHECK-NEXT: vadd +; CHECK-NEXT: vmul +; CHECK-NEXT: vldr +; CHECK-NEXT: vadd +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vmul +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vmul +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vmul +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vmul +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vmul +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vmul +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr +; CHECK-NEXT: vadd +; CHECK-NEXT: vadd +; CHECK-NEXT: vadd +; CHECK-NEXT: vmov +; CHECK-NEXT: bx +; +; This accumulates a sum rather than storing each result. +define float @saxpy10(float* nocapture readonly %data1, float* nocapture readonly %data2, float %a) { +entry: + %0 = load float* %data1, align 4 + %mul = fmul float %0, %a + %1 = load float* %data2, align 4 + %add = fadd float %mul, %1 + %add2 = fadd float %add, 0.000000e+00 + %arrayidx.1 = getelementptr inbounds float* %data1, i32 1 + %2 = load float* %arrayidx.1, align 4 + %mul.1 = fmul float %2, %a + %arrayidx1.1 = getelementptr inbounds float* %data2, i32 1 + %3 = load float* %arrayidx1.1, align 4 + %add.1 = fadd float %mul.1, %3 + %add2.1 = fadd float %add2, %add.1 + %arrayidx.2 = getelementptr inbounds float* %data1, i32 2 + %4 = load float* %arrayidx.2, align 4 + %mul.2 = fmul float %4, %a + %arrayidx1.2 = getelementptr inbounds float* %data2, i32 2 + %5 = load float* %arrayidx1.2, align 4 + %add.2 = fadd float %mul.2, %5 + %add2.2 = fadd float %add2.1, %add.2 + %arrayidx.3 = getelementptr inbounds float* %data1, i32 3 + %6 = load float* %arrayidx.3, align 4 + %mul.3 = fmul float %6, %a + %arrayidx1.3 = getelementptr inbounds float* %data2, i32 3 + %7 = load float* %arrayidx1.3, align 4 + %add.3 = fadd float %mul.3, %7 + %add2.3 = fadd float %add2.2, %add.3 + %arrayidx.4 = getelementptr inbounds float* %data1, i32 4 + %8 = load float* %arrayidx.4, align 4 + %mul.4 = fmul float %8, %a + %arrayidx1.4 = getelementptr inbounds float* %data2, i32 4 + %9 = load float* %arrayidx1.4, align 4 + %add.4 = fadd float %mul.4, %9 + %add2.4 = fadd float %add2.3, %add.4 + %arrayidx.5 = getelementptr inbounds float* %data1, i32 5 + %10 = load float* %arrayidx.5, align 4 + %mul.5 = fmul float %10, %a + %arrayidx1.5 = getelementptr inbounds float* %data2, i32 5 + %11 = load float* %arrayidx1.5, align 4 + %add.5 = fadd float %mul.5, %11 + %add2.5 = fadd float %add2.4, %add.5 + %arrayidx.6 = getelementptr inbounds float* %data1, i32 6 + %12 = load float* %arrayidx.6, align 4 + %mul.6 = fmul float %12, %a + %arrayidx1.6 = getelementptr inbounds float* %data2, i32 6 + %13 = load float* %arrayidx1.6, align 4 + %add.6 = fadd float %mul.6, %13 + %add2.6 = fadd float %add2.5, %add.6 + %arrayidx.7 = getelementptr inbounds float* %data1, i32 7 + %14 = load float* %arrayidx.7, align 4 + %mul.7 = fmul float %14, %a + %arrayidx1.7 = getelementptr inbounds float* %data2, i32 7 + %15 = load float* %arrayidx1.7, align 4 + %add.7 = fadd float %mul.7, %15 + %add2.7 = fadd float %add2.6, %add.7 + %arrayidx.8 = getelementptr inbounds float* %data1, i32 8 + %16 = load float* %arrayidx.8, align 4 + %mul.8 = fmul float %16, %a + %arrayidx1.8 = getelementptr inbounds float* %data2, i32 8 + %17 = load float* %arrayidx1.8, align 4 + %add.8 = fadd float %mul.8, %17 + %add2.8 = fadd float %add2.7, %add.8 + %arrayidx.9 = getelementptr inbounds float* %data1, i32 9 + %18 = load float* %arrayidx.9, align 4 + %mul.9 = fmul float %18, %a + %arrayidx1.9 = getelementptr inbounds float* %data2, i32 9 + %19 = load float* %arrayidx1.9, align 4 + %add.9 = fadd float %mul.9, %19 + %add2.9 = fadd float %add2.8, %add.9 + ret float %add2.9 +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/select-imm.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/select-imm.ll index 6f4bfb81d51b..4dc297c7ec08 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/select-imm.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/select-imm.ll @@ -1,6 +1,8 @@ ; RUN: llc < %s -march=arm | FileCheck %s --check-prefix=ARM -; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s --check-prefix=ARMT2 -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s --check-prefix=THUMB2 +; RUN: llc < %s -march=arm -mcpu=arm1156t2-s -mattr=+thumb2 | \ +; RUN: FileCheck %s --check-prefix=ARMT2 +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | \ +; RUN: FileCheck %s --check-prefix=THUMB2 define i32 @t1(i32 %c) nounwind readnone { entry: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/ssp-data-layout.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/ssp-data-layout.ll new file mode 100644 index 000000000000..ad4195b5192f --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/ssp-data-layout.ll @@ -0,0 +1,207 @@ +; RUN: llc < %s -disable-fp-elim -march=arm -mcpu=cortex-a8 -mtriple arm-linux-gnu -o - | FileCheck %s +; This test is fairly fragile. The goal is to ensure that "large" stack +; objects are allocated closest to the stack protector (i.e., farthest away +; from the Stack Pointer.) In standard SSP mode this means that large (>= +; ssp-buffer-size) arrays and structures containing such arrays are +; closet to the protector. With sspstrong and sspreq this means large +; arrays/structures-with-arrays are closest, followed by small (< ssp-buffer-size) +; arrays/structures-with-arrays, and then addr-taken variables. +; +; Ideally, we only want verify that the objects appear in the correct groups +; and that the groups have the correct relative stack offset. The ordering +; within a group is not relevant to this test. Unfortunately, there is not +; an elegant way to do this, so just match the offset for each object. + +%struct.struct_large_char = type { [8 x i8] } +%struct.struct_large_char2 = type { [2 x i8], [8 x i8] } +%struct.struct_small_char = type { [2 x i8] } +%struct.struct_large_nonchar = type { [8 x i32] } +%struct.struct_small_nonchar = type { [2 x i16] } + +define void @layout_ssp() ssp { +entry: +; Expected stack layout for ssp is +; 180 large_char . Group 1, nested arrays, arrays >= ssp-buffer-size +; 172 struct_large_char . +; 168 scalar1 | Everything else +; 164 scalar2 +; 160 scalar3 +; 156 addr-of +; 152 small_nonchar (84+68) +; 112 large_nonchar +; 110 small_char +; 108 struct_small_char +; 72 struct_large_nonchar +; 68 struct_small_nonchar + +; CHECK: layout_ssp: +; r[[SP]] is used as an offset into the stack later +; CHECK: add r[[SP:[0-9]+]], sp, #68 + +; CHECK: bl get_scalar1 +; CHECK: str r0, [sp, #168] +; CHECK: bl end_scalar1 + +; CHECK: bl get_scalar2 +; CHECK: str r0, [sp, #164] +; CHECK: bl end_scalar2 + +; CHECK: bl get_scalar3 +; CHECK: str r0, [sp, #160] +; CHECK: bl end_scalar3 + +; CHECK: bl get_addrof +; CHECK: str r0, [sp, #156] +; CHECK: bl end_addrof + +; CHECK: get_small_nonchar +; CHECK: strh r0, [r[[SP]], #84] +; CHECK: bl end_small_nonchar + +; CHECK: bl get_large_nonchar +; CHECK: str r0, [sp, #112] +; CHECK: bl end_large_nonchar + +; CHECK: bl get_small_char +; CHECK: strb r0, [sp, #110] +; CHECK: bl end_small_char + +; CHECK: bl get_large_char +; CHECK: strb r0, [sp, #180] +; CHECK: bl end_large_char + +; CHECK: bl get_struct_large_char +; CHECK: strb r0, [sp, #172] +; CHECK: bl end_struct_large_char + +; CHECK: bl get_struct_small_char +; CHECK: strb r0, [sp, #108] +; CHECK: bl end_struct_small_char + +; CHECK: bl get_struct_large_nonchar +; CHECK:str r0, [sp, #72] +; CHECK: bl end_struct_large_nonchar + +; CHECK: bl get_struct_small_nonchar +; CHECK: strh r0, [r[[SP]]] +; CHECK: bl end_struct_small_nonchar + %x = alloca i32, align 4 + %y = alloca i32, align 4 + %z = alloca i32, align 4 + %ptr = alloca i32, align 4 + %small2 = alloca [2 x i16], align 2 + %large2 = alloca [8 x i32], align 16 + %small = alloca [2 x i8], align 1 + %large = alloca [8 x i8], align 1 + %a = alloca %struct.struct_large_char, align 1 + %b = alloca %struct.struct_small_char, align 1 + %c = alloca %struct.struct_large_nonchar, align 8 + %d = alloca %struct.struct_small_nonchar, align 2 + %call = call i32 @get_scalar1() + store i32 %call, i32* %x, align 4 + call void @end_scalar1() + %call1 = call i32 @get_scalar2() + store i32 %call1, i32* %y, align 4 + call void @end_scalar2() + %call2 = call i32 @get_scalar3() + store i32 %call2, i32* %z, align 4 + call void @end_scalar3() + %call3 = call i32 @get_addrof() + store i32 %call3, i32* %ptr, align 4 + call void @end_addrof() + %call4 = call signext i16 @get_small_nonchar() + %arrayidx = getelementptr inbounds [2 x i16]* %small2, i32 0, i64 0 + store i16 %call4, i16* %arrayidx, align 2 + call void @end_small_nonchar() + %call5 = call i32 @get_large_nonchar() + %arrayidx6 = getelementptr inbounds [8 x i32]* %large2, i32 0, i64 0 + store i32 %call5, i32* %arrayidx6, align 4 + call void @end_large_nonchar() + %call7 = call signext i8 @get_small_char() + %arrayidx8 = getelementptr inbounds [2 x i8]* %small, i32 0, i64 0 + store i8 %call7, i8* %arrayidx8, align 1 + call void @end_small_char() + %call9 = call signext i8 @get_large_char() + %arrayidx10 = getelementptr inbounds [8 x i8]* %large, i32 0, i64 0 + store i8 %call9, i8* %arrayidx10, align 1 + call void @end_large_char() + %call11 = call signext i8 @get_struct_large_char() + %foo = getelementptr inbounds %struct.struct_large_char* %a, i32 0, i32 0 + %arrayidx12 = getelementptr inbounds [8 x i8]* %foo, i32 0, i64 0 + store i8 %call11, i8* %arrayidx12, align 1 + call void @end_struct_large_char() + %call13 = call signext i8 @get_struct_small_char() + %foo14 = getelementptr inbounds %struct.struct_small_char* %b, i32 0, i32 0 + %arrayidx15 = getelementptr inbounds [2 x i8]* %foo14, i32 0, i64 0 + store i8 %call13, i8* %arrayidx15, align 1 + call void @end_struct_small_char() + %call16 = call i32 @get_struct_large_nonchar() + %foo17 = getelementptr inbounds %struct.struct_large_nonchar* %c, i32 0, i32 0 + %arrayidx18 = getelementptr inbounds [8 x i32]* %foo17, i32 0, i64 0 + store i32 %call16, i32* %arrayidx18, align 4 + call void @end_struct_large_nonchar() + %call19 = call signext i16 @get_struct_small_nonchar() + %foo20 = getelementptr inbounds %struct.struct_small_nonchar* %d, i32 0, i32 0 + %arrayidx21 = getelementptr inbounds [2 x i16]* %foo20, i32 0, i64 0 + store i16 %call19, i16* %arrayidx21, align 2 + call void @end_struct_small_nonchar() + %arraydecay = getelementptr inbounds [8 x i8]* %large, i32 0, i32 0 + %arraydecay22 = getelementptr inbounds [2 x i8]* %small, i32 0, i32 0 + %arraydecay23 = getelementptr inbounds [8 x i32]* %large2, i32 0, i32 0 + %arraydecay24 = getelementptr inbounds [2 x i16]* %small2, i32 0, i32 0 + %0 = load i32* %x, align 4 + %1 = load i32* %y, align 4 + %2 = load i32* %z, align 4 + %coerce.dive = getelementptr %struct.struct_large_char* %a, i32 0, i32 0 + %3 = bitcast [8 x i8]* %coerce.dive to i64* + %4 = load i64* %3, align 1 + %coerce.dive25 = getelementptr %struct.struct_small_char* %b, i32 0, i32 0 + %5 = bitcast [2 x i8]* %coerce.dive25 to i16* + %6 = load i16* %5, align 1 + %coerce.dive26 = getelementptr %struct.struct_small_nonchar* %d, i32 0, i32 0 + %7 = bitcast [2 x i16]* %coerce.dive26 to i32* + %8 = load i32* %7, align 1 + call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2) + ret void +} + +declare i32 @get_scalar1() +declare void @end_scalar1() + +declare i32 @get_scalar2() +declare void @end_scalar2() + +declare i32 @get_scalar3() +declare void @end_scalar3() + +declare i32 @get_addrof() +declare void @end_addrof() + +declare signext i16 @get_small_nonchar() +declare void @end_small_nonchar() + +declare i32 @get_large_nonchar() +declare void @end_large_nonchar() + +declare signext i8 @get_small_char() +declare void @end_small_char() + +declare signext i8 @get_large_char() +declare void @end_large_char() + +declare signext i8 @get_struct_large_char() +declare void @end_struct_large_char() + +declare signext i8 @get_struct_large_char2() +declare void @end_struct_large_char2() + +declare signext i8 @get_struct_small_char() +declare void @end_struct_small_char() + +declare i32 @get_struct_large_nonchar() +declare void @end_struct_large_nonchar() + +declare signext i16 @get_struct_small_nonchar() +declare void @end_struct_small_nonchar() + +declare void @takes_all(i64, i16, %struct.struct_large_nonchar* byval align 8, i32, i8*, i8*, i32*, i16*, i32*, i32, i32, i32) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll index 1b411e3af1cd..19727dabf09e 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll @@ -1,4 +1,7 @@ ; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s --check-prefix=V7 +; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi | FileCheck %s -check-prefix=V8 + define i32 @f(i32 %a, i32 %b) nounwind ssp { entry: @@ -84,3 +87,60 @@ land.lhs.true: ; preds = %num2long.exit if.end11: ; preds = %num2long.exit ret i32 23 } + +define float @float_sel(i32 %a, i32 %b, float %x, float %y) { +entry: +; CHECK-LABEL: float_sel: +; CHECK-NOT: cmp +; V8-LABEL: float_sel: +; V8-NOT: cmp +; V8: vseleq.f32 + %sub = sub i32 %a, %b + %cmp = icmp eq i32 %sub, 0 + %ret = select i1 %cmp, float %x, float %y + ret float %ret +} + +define double @double_sel(i32 %a, i32 %b, double %x, double %y) { +entry: +; CHECK-LABEL: double_sel: +; CHECK-NOT: cmp +; V8-LABEL: double_sel: +; V8-NOT: cmp +; V8: vseleq.f64 + %sub = sub i32 %a, %b + %cmp = icmp eq i32 %sub, 0 + %ret = select i1 %cmp, double %x, double %y + ret double %ret +} + +@t = common global i32 0 +define double @double_sub(i32 %a, i32 %b, double %x, double %y) { +entry: +; CHECK-LABEL: double_sub: +; CHECK: subs +; CHECK-NOT: cmp +; V8-LABEL: double_sub: +; V8: vsel + %cmp = icmp sgt i32 %a, %b + %sub = sub i32 %a, %b + store i32 %sub, i32* @t + %ret = select i1 %cmp, double %x, double %y + ret double %ret +} + +define double @double_sub_swap(i32 %a, i32 %b, double %x, double %y) { +entry: +; V7-LABEL: double_sub_swap: +; V7-NOT: cmp +; V7: subs +; V8-LABEL: double_sub_swap: +; V8-NOT: subs +; V8: cmp +; V8: vsel + %cmp = icmp sgt i32 %a, %b + %sub = sub i32 %b, %a + %ret = select i1 %cmp, double %x, double %y + store i32 %sub, i32* @t + ret double %ret +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/t2-imm.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/t2-imm.ll index 8b4145914e7c..dd75cd1783ba 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/t2-imm.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/t2-imm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f6(i32 %a) { ; CHECK:f6 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/thumb2-it-block.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/thumb2-it-block.ll index 47c5dccd6fee..d95476017399 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/thumb2-it-block.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/thumb2-it-block.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s ; PR11107 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/tls-models.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/tls-models.ll index ccc9032313b8..42c1ba911028 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/tls-models.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/tls-models.ll @@ -22,9 +22,9 @@ entry: ; Non-PIC code can use initial-exec, PIC code has to use general dynamic. ; CHECK-NONPIC-LABEL: f1: - ; CHECK-NONPIC: external_gd(gottpoff) + ; CHECK-NONPIC: external_gd(GOTTPOFF) ; CHECK-PIC-LABEL: f1: - ; CHECK-PIC: external_gd(tlsgd) + ; CHECK-PIC: external_gd(TLSGD) } define i32* @f2() { @@ -34,9 +34,9 @@ entry: ; Non-PIC code can use local exec, PIC code can use local dynamic, ; but that is not implemented, so falls back to general dynamic. ; CHECK-NONPIC-LABEL: f2: - ; CHECK-NONPIC: internal_gd(tpoff) + ; CHECK-NONPIC: internal_gd(TPOFF) ; CHECK-PIC-LABEL: f2: - ; CHECK-PIC: internal_gd(tlsgd) + ; CHECK-PIC: internal_gd(TLSGD) } @@ -49,9 +49,9 @@ entry: ; Non-PIC code can use initial exec, PIC should use local dynamic, ; but that is not implemented, so falls back to general dynamic. ; CHECK-NONPIC-LABEL: f3: - ; CHECK-NONPIC: external_ld(gottpoff) + ; CHECK-NONPIC: external_ld(GOTTPOFF) ; CHECK-PIC-LABEL: f3: - ; CHECK-PIC: external_ld(tlsgd) + ; CHECK-PIC: external_ld(TLSGD) } define i32* @f4() { @@ -61,9 +61,9 @@ entry: ; Non-PIC code can use local exec, PIC code can use local dynamic, ; but that is not implemented, so it falls back to general dynamic. ; CHECK-NONPIC-LABEL: f4: - ; CHECK-NONPIC: internal_ld(tpoff) + ; CHECK-NONPIC: internal_ld(TPOFF) ; CHECK-PIC-LABEL: f4: - ; CHECK-PIC: internal_ld(tlsgd) + ; CHECK-PIC: internal_ld(TLSGD) } @@ -75,9 +75,9 @@ entry: ; Non-PIC and PIC code will use initial exec as specified. ; CHECK-NONPIC-LABEL: f5: - ; CHECK-NONPIC: external_ie(gottpoff) + ; CHECK-NONPIC: external_ie(GOTTPOFF) ; CHECK-PIC-LABEL: f5: - ; CHECK-PIC: external_ie(gottpoff) + ; CHECK-PIC: external_ie(GOTTPOFF) } define i32* @f6() { @@ -86,9 +86,9 @@ entry: ; Non-PIC code can use local exec, PIC code use initial exec as specified. ; CHECK-NONPIC-LABEL: f6: - ; CHECK-NONPIC: internal_ie(tpoff) + ; CHECK-NONPIC: internal_ie(TPOFF) ; CHECK-PIC-LABEL: f6: - ; CHECK-PIC: internal_ie(gottpoff) + ; CHECK-PIC: internal_ie(GOTTPOFF) } @@ -100,9 +100,9 @@ entry: ; Non-PIC and PIC code will use local exec as specified. ; CHECK-NONPIC-LABEL: f7: - ; CHECK-NONPIC: external_le(tpoff) + ; CHECK-NONPIC: external_le(TPOFF) ; CHECK-PIC-LABEL: f7: - ; CHECK-PIC: external_le(tpoff) + ; CHECK-PIC: external_le(TPOFF) } define i32* @f8() { @@ -111,7 +111,7 @@ entry: ; Non-PIC and PIC code will use local exec as specified. ; CHECK-NONPIC-LABEL: f8: - ; CHECK-NONPIC: internal_le(tpoff) + ; CHECK-NONPIC: internal_le(TPOFF) ; CHECK-PIC-LABEL: f8: - ; CHECK-PIC: internal_le(tpoff) + ; CHECK-PIC: internal_le(TPOFF) } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/tls1.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/tls1.ll index ec4278ce72f6..a1ca0b758b45 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/tls1.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/tls1.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ -; RUN: grep "i(tpoff)" +; RUN: grep "i(TPOFF)" ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ ; RUN: grep "__aeabi_read_tp" ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \ diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/tls2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/tls2.ll index f04812583114..24b4794b061a 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/tls2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/tls2.ll @@ -8,7 +8,7 @@ define i32 @f() { ; CHECK-NONPIC-LABEL: f: ; CHECK-NONPIC: ldr {{r.}}, [pc, {{r.}}] -; CHECK-NONPIC: i(gottpoff) +; CHECK-NONPIC: i(GOTTPOFF) ; CHECK-PIC-LABEL: f: ; CHECK-PIC: __tls_get_addr entry: @@ -19,7 +19,7 @@ entry: define i32* @g() { ; CHECK-NONPIC-LABEL: g: ; CHECK-NONPIC: ldr {{r.}}, [pc, {{r.}}] -; CHECK-NONPIC: i(gottpoff) +; CHECK-NONPIC: i(GOTTPOFF) ; CHECK-PIC-LABEL: g: ; CHECK-PIC: __tls_get_addr entry: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vcombine.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vcombine.ll index 527f93b6637c..7885ac6245f9 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vcombine.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vcombine.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -float-abi=soft -mattr=+neon | FileCheck %s define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK: vcombine8 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vdup.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vdup.ll index b24be2654dfc..f46f39211117 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vdup.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vdup.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -float-abi=soft -mattr=+neon | FileCheck %s define <8 x i8> @v_dup8(i8 %A) nounwind { ;CHECK-LABEL: v_dup8: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vld1.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vld1.ll index 444d0d5b5edc..49bc84df6e1e 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vld1.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vld1.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s +; RUN: llc < %s -march=arm -float-abi=soft -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -float-abi=soft -mattr=+neon -regalloc=basic | FileCheck %s define <8 x i8> @vld1i8(i8* %A) nounwind { ;CHECK-LABEL: vld1i8: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vlddup.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vlddup.ll index 5509f3e0a0da..f5339f8495a7 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vlddup.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vlddup.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -float-abi=soft -mattr=+neon | FileCheck %s define <8 x i8> @vld1dupi8(i8* %A) nounwind { ;CHECK-LABEL: vld1dupi8: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vldlane.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vldlane.ll index 7a83a4c0cac6..e16746ce937e 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vldlane.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vldlane.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s +; RUN: llc < %s -march=arm -float-abi=soft -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -float-abi=soft -mattr=+neon -regalloc=basic | FileCheck %s define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vld1lanei8: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vsel.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vsel.ll index 7e1f7146fd1c..746b1b000ef1 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vsel.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/vsel.ll @@ -61,7 +61,7 @@ define void @test_vsel32slt(i32 %lhs32, i32 %rhs32, float %a, float %b) { %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat ; CHECK: cmp r0, r1 -; CHECK: vselgt.f32 s0, s1, s0 +; CHECK: vselge.f32 s0, s1, s0 ret void } define void @test_vsel64slt(i32 %lhs32, i32 %rhs32, double %a, double %b) { @@ -70,7 +70,7 @@ define void @test_vsel64slt(i32 %lhs32, i32 %rhs32, double %a, double %b) { %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble ; CHECK: cmp r0, r1 -; CHECK: vselgt.f64 d16, d1, d0 +; CHECK: vselge.f64 d16, d1, d0 ret void } define void @test_vsel32sle(i32 %lhs32, i32 %rhs32, float %a, float %b) { @@ -79,7 +79,7 @@ define void @test_vsel32sle(i32 %lhs32, i32 %rhs32, float %a, float %b) { %val1 = select i1 %tst1, float %a, float %b store float %val1, float* @varfloat ; CHECK: cmp r0, r1 -; CHECK: vselge.f32 s0, s1, s0 +; CHECK: vselgt.f32 s0, s1, s0 ret void } define void @test_vsel64sle(i32 %lhs32, i32 %rhs32, double %a, double %b) { @@ -88,7 +88,7 @@ define void @test_vsel64sle(i32 %lhs32, i32 %rhs32, double %a, double %b) { %val1 = select i1 %tst1, double %a, double %b store double %val1, double* @vardouble ; CHECK: cmp r0, r1 -; CHECK: vselge.f64 d16, d1, d0 +; CHECK: vselgt.f64 d16, d1, d0 ret void } define void @test_vsel32ogt(float %lhs32, float %rhs32, float %a, float %b) { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/warn-stack.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/warn-stack.ll index 9538bbf10488..90a3e1f798ed 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/warn-stack.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/ARM/warn-stack.ll @@ -12,7 +12,7 @@ entry: ret void } -; CHECK: warning: Stack size limit exceeded (96) in warn. +; CHECK: warning: stack size limit exceeded (96) in warn define void @warn() nounwind ssp { entry: %buffer = alloca [80 x i8], align 1 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/CPP/attributes.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/CPP/attributes.ll new file mode 100644 index 000000000000..3dab617d80b9 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/CPP/attributes.ll @@ -0,0 +1,7 @@ +; RUN: llc < %s -march=cpp | FileCheck %s + +define void @f1(i8* byval, i8* inalloca) { +; CHECK: ByVal +; CHECK: InAlloca + ret void +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Generic/no-target.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Generic/no-target.ll new file mode 100644 index 000000000000..4a4724fdf2de --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Generic/no-target.ll @@ -0,0 +1,3 @@ +; RUN: not llc -mtriple le32-unknown-nacl %s -o - 2>&1 | FileCheck %s + +; CHECK: error: unable to get target for 'le32-unknown-nacl' diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Hexagon/hwloop-dbg.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Hexagon/hwloop-dbg.ll index e5eb086181a5..bfdd8130d5bf 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Hexagon/hwloop-dbg.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Hexagon/hwloop-dbg.ll @@ -35,6 +35,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!29} !0 = metadata !{i32 786449, metadata !28, i32 12, metadata !"QuIC LLVM Hexagon Clang version 6.1-pre-unknown, (git://git-hexagon-aus.quicinc.com/llvm/clang-mainline.git e9382867661454cdf44addb39430741578e9765c) (llvm/llvm-mainline.git 36412bb1fcf03ed426d4437b41198bae066675ac)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c] [DW_LANG_C99] !2 = metadata !{i32 0} @@ -60,3 +61,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !26 = metadata !{i32 3, i32 23, metadata !20, null} !27 = metadata !{i32 6, i32 1, metadata !16, null} !28 = metadata !{metadata !"hwloop-dbg.c", metadata !"/usr2/kparzysz/s.hex/t"} +!29 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Inputs/DbgValueOtherTargets.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Inputs/DbgValueOtherTargets.ll index d1a47fca787f..953e576af85c 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Inputs/DbgValueOtherTargets.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Inputs/DbgValueOtherTargets.ll @@ -12,6 +12,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!13} !0 = metadata !{i32 786478, metadata !12, metadata !1, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, null, i32 0} ; [ DW_TAG_subprogram ] !1 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ] @@ -26,3 +27,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !10 = metadata !{i32 4, i32 2, metadata !8, null} !11 = metadata !{metadata !0} !12 = metadata !{metadata !"/tmp/x.c", metadata !"/Users/manav"} +!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll index 3381143c761d..8807d750e499 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s -; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 -soft-float -mips16-hard-float < %s | FileCheck %s +; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s +; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 < %s | FileCheck %s define signext i8 @A(i8 %e.0, i8 signext %sum) nounwind { entry: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/addi.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/addi.ll index 8f70a469c44f..01d409e521d7 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/addi.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/addi.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=16 +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=16 @i = global i32 6, align 4 @j = global i32 12, align 4 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/align16.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/align16.ll index 267cff54291d..689ae8307f57 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/align16.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/align16.ll @@ -25,7 +25,7 @@ entry: call void @p(i32* %arrayidx1) ret void } -; 16: save $ra, $s0, $s1, $s2, 2040 -; 16: addiu $sp, -56 # 16 bit inst -; 16: addiu $sp, 56 # 16 bit inst -; 16: restore $ra, $s0, $s1, $s2, 2040 +; 16: save $ra, 2040 +; 16: addiu $sp, -40 # 16 bit inst +; 16: addiu $sp, 40 # 16 bit inst +; 16: restore $ra, 2040 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/alloca16.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/alloca16.ll index 017665f00bd4..4f6059878c3b 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/alloca16.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/alloca16.ll @@ -19,8 +19,8 @@ entry: define void @test() nounwind { entry: -; 16: .frame $sp,24,$ra -; 16: save $ra, $s0, $s1, $s2, 24 +; 16: .frame $sp,8,$ra +; 16: save 8 # 16 bit inst ; 16: move $16, $sp ; 16: move ${{[0-9]+}}, $sp ; 16: subu $[[REGISTER:[0-9]+]], ${{[0-9]+}}, ${{[0-9]+}} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/blockaddr.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/blockaddr.ll index beab65f47196..41c5c8f75f5c 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/blockaddr.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/blockaddr.ll @@ -4,8 +4,8 @@ ; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N32 ; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64 ; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64 -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -mattr=+mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-MIPS16-1 -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -mattr=+mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-MIPS16-2 +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -mattr=+mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-MIPS16-1 +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -mattr=+mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-MIPS16-2 @reg = common global i8* null, align 4 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/bswap.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/bswap.ll index f17b91aab802..4824388da109 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/bswap.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/bswap.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=MIPS32 ; RUN: llc < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=MIPS64 -; RUN: llc < %s -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 -soft-float -mips16-hard-float | FileCheck %s -check-prefix=mips16 +; RUN: llc < %s -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 | FileCheck %s -check-prefix=mips16 define i32 @bswap32(i32 %x) nounwind readnone { entry: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/call-optimization.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/call-optimization.ll new file mode 100644 index 000000000000..bfa09eaae3cb --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/call-optimization.ll @@ -0,0 +1,91 @@ +; RUN: llc -march=mipsel -disable-mips-delay-filler < %s | \ +; RUN: FileCheck %s -check-prefix=O32 +; RUN: llc -march=mipsel -mips-load-target-from-got=false \ +; RUN: -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=O32-LOADTGT + +@gd1 = common global double 0.000000e+00, align 8 +@gd2 = common global double 0.000000e+00, align 8 + +; O32-LABEL: caller3: +; O32-DAG: lw $25, %call16(callee3) +; O32-DAG: move $gp +; O32: jalr $25 +; O32-NOT: move $gp +; O32: lw $25, %call16(callee3) +; O32-NOT: move $gp +; O32: jalr $25 +; O32-NOT: move $gp +; O32: lw $25, %call16(callee3) +; O32-NOT: move $gp +; O32: jalr $25 + +; O32-LOADTGT-LABEL: caller3: +; O32-LOADTGT-DAG: lw $25, %call16(callee3) +; O32-LOADTGT-DAG: move $gp +; O32-LOADTGT: jalr $25 +; O32-LOADTGT-NOT: move $gp +; O32-LOADTGT: move $25 +; O32-LOADTGT-NOT: move $gp +; O32-LOADTGT: jalr $25 +; O32-LOADTGT-NOT: move $gp +; O32-LOADTGT: move $25 +; O32-LOADTGT-NOT: move $gp +; O32-LOADTGT: jalr $25 + +define void @caller3(i32 %n) { +entry: + tail call void @callee3() + tail call void @callee3() + %tobool1 = icmp eq i32 %n, 0 + br i1 %tobool1, label %while.end, label %while.body + +while.body: + %n.addr.02 = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %dec = add nsw i32 %n.addr.02, -1 + tail call void @callee3() + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: + ret void +} + +declare void @callee3() + +; O32-LABEL: caller4: +; O32-DAG: lw $25, %call16(ceil) +; O32-DAG: move $gp +; O32: jalr $25 +; O32-NOT: move $gp +; O32: lw $25, %call16(ceil) +; O32-NOT: move $gp +; O32: jalr $25 +; O32-NOT: move $gp +; O32: lw $25, %call16(ceil) +; O32-NOT: move $gp +; O32: jalr $25 + +; O32-LOADTGT-LABEL: caller4: +; O32-LOADTGT-DAG: lw $25, %call16(ceil) +; O32-LOADTGT-DAG: move $gp +; O32-LOADTGT: jalr $25 +; O32-LOADTGT-NOT: move $gp +; O32-LOADTGT: move $25 +; O32-LOADTGT-NOT: move $gp +; O32-LOADTGT: jalr $25 +; O32-LOADTGT-NOT: move $gp +; O32-LOADTGT: move $25 +; O32-LOADTGT-NOT: move $gp +; O32-LOADTGT: jalr $25 + +define void @caller4(double %d) { +entry: + %call = tail call double @ceil(double %d) + %call1 = tail call double @ceil(double %call) + store double %call1, double* @gd2, align 8 + %call2 = tail call double @ceil(double %call1) + store double %call2, double* @gd1, align 8 + ret void +} + +declare double @ceil(double) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/ci2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/ci2.ll new file mode 100644 index 000000000000..7187f0c75888 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/ci2.ll @@ -0,0 +1,39 @@ +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands < %s | FileCheck %s -check-prefix=constisle + +@i = common global i32 0, align 4 +@b = common global i32 0, align 4 +@l = common global i32 0, align 4 + +; Function Attrs: nounwind +define void @foo() #0 { +entry: + store i32 305419896, i32* @i, align 4 + %0 = load i32* @b, align 4 + %tobool = icmp ne i32 %0, 0 + br i1 %tobool, label %if.then, label %if.else + +if.then: ; preds = %entry + store i32 10, i32* @b, align 4 + br label %if.end + +if.else: ; preds = %entry + store i32 20, i32* @b, align 4 + br label %if.end + +if.end: ; preds = %if.else, %if.then + call void asm sideeffect ".space 100000", ""() #1, !srcloc !1 + store i32 305419896, i32* @l, align 4 + ret void +; constisle: $CPI0_1: +; constisle .4byte 305419896 # 0x12345678 +; constisle #APP +; constisle .space 100000 +; constisle #NO_APP +; constisle $CPI0_0: +; constisle .4byte 305419896 # 0x12345678 +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } + +!1 = metadata !{i32 103} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/cmov.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/cmov.ll index c24c5ac26ae1..f2009fa5afb9 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/cmov.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/cmov.ll @@ -74,7 +74,7 @@ entry: define i32 @slti0(i32 %a) { entry: %cmp = icmp sgt i32 %a, 32766 - %cond = select i1 %cmp, i32 3, i32 4 + %cond = select i1 %cmp, i32 3, i32 5 ret i32 %cond } @@ -84,7 +84,7 @@ entry: define i32 @slti1(i32 %a) { entry: %cmp = icmp sgt i32 %a, 32767 - %cond = select i1 %cmp, i32 3, i32 4 + %cond = select i1 %cmp, i32 3, i32 5 ret i32 %cond } @@ -95,7 +95,7 @@ entry: define i32 @slti2(i32 %a) { entry: %cmp = icmp sgt i32 %a, -32769 - %cond = select i1 %cmp, i32 3, i32 4 + %cond = select i1 %cmp, i32 3, i32 5 ret i32 %cond } @@ -105,7 +105,7 @@ entry: define i32 @slti3(i32 %a) { entry: %cmp = icmp sgt i32 %a, -32770 - %cond = select i1 %cmp, i32 3, i32 4 + %cond = select i1 %cmp, i32 3, i32 5 ret i32 %cond } @@ -162,7 +162,7 @@ entry: define i32 @sltiu0(i32 %a) { entry: %cmp = icmp ugt i32 %a, 32766 - %cond = select i1 %cmp, i32 3, i32 4 + %cond = select i1 %cmp, i32 3, i32 5 ret i32 %cond } @@ -172,7 +172,7 @@ entry: define i32 @sltiu1(i32 %a) { entry: %cmp = icmp ugt i32 %a, 32767 - %cond = select i1 %cmp, i32 3, i32 4 + %cond = select i1 %cmp, i32 3, i32 5 ret i32 %cond } @@ -183,7 +183,7 @@ entry: define i32 @sltiu2(i32 %a) { entry: %cmp = icmp ugt i32 %a, -32769 - %cond = select i1 %cmp, i32 3, i32 4 + %cond = select i1 %cmp, i32 3, i32 5 ret i32 %cond } @@ -193,6 +193,49 @@ entry: define i32 @sltiu3(i32 %a) { entry: %cmp = icmp ugt i32 %a, -32770 - %cond = select i1 %cmp, i32 3, i32 4 + %cond = select i1 %cmp, i32 3, i32 5 ret i32 %cond } + +; Check if +; (select (setxx a, N), x, x-1) or +; (select (setxx a, N), x-1, x) +; doesn't generate conditional moves +; for constant operands whose difference is |1| + +define i32 @slti4(i32 %a) nounwind readnone { + %1 = icmp slt i32 %a, 7 + %2 = select i1 %1, i32 4, i32 3 + ret i32 %2 +} + +; O32-LABEL: slti4: +; O32-DAG: slti [[R1:\$[0-9]+]], $4, 7 +; O32-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3 +; O32-NOT: movn +; O32:.size slti4 + +define i32 @slti5(i32 %a) nounwind readnone { + %1 = icmp slt i32 %a, 7 + %2 = select i1 %1, i32 -3, i32 -4 + ret i32 %2 +} + +; O32-LABEL: slti5: +; O32-DAG: slti [[R1:\$[0-9]+]], $4, 7 +; O32-DAG: addiu [[R3:\$[0-9]+]], [[R2:\$[a-z0-9]+]], -4 +; O32-NOT: movn +; O32:.size slti5 + +define i32 @slti6(i32 %a) nounwind readnone { + %1 = icmp slt i32 %a, 7 + %2 = select i1 %1, i32 3, i32 4 + ret i32 %2 +} + +; O32-LABEL: slti6: +; O32-DAG: slti [[R1:\$[0-9]+]], $4, 7 +; O32-DAG: xori [[R1]], [[R1]], 1 +; O32-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3 +; O32-NOT: movn +; O32:.size slti6 \ No newline at end of file diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/const4a.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/const4a.ll index 0332327cec69..bec61cfc5f49 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/const4a.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/const4a.ll @@ -22,7 +22,7 @@ entry: ; no-load-relax: lw ${{[0-9]+}}, %call16(foo)(${{[0-9]+}}) ; no-load-relax: b $BB0_4 ; no-load-relax: .align 2 -; no-load-relax: $CPI0_0: +; no-load-relax: $CPI0_1: ; no-load-relax: .4byte 3735943886 ; no-load-relax: $BB0_3: ; no-load-relax: lw ${{[0-9]+}}, %call16(goo)(${{[0-9]+}}) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/const6.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/const6.ll index 20cdc09f7be1..3f02ab907e1e 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/const6.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/const6.ll @@ -27,7 +27,7 @@ entry: ; no-load-relax: jalrc ${{[0-9]+}} ; no-load-relax: b $BB0_2 ; no-load-relax: .align 2 -; no-load-relax: $CPI0_0: +; no-load-relax: $CPI0_1: ; no-load-relax: .4byte 3735943886 ; no-load-relax: $BB0_2: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/ctlz.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/ctlz.ll index 2ddb72755ac8..1f871664a6cf 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/ctlz.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/ctlz.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -mattr=+mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=static +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -mattr=+mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=static @x = global i32 28912, align 4 @y = common global i32 0, align 4 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/ex2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/ex2.ll index c5535e7661a7..6d024c209c26 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/ex2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/ex2.ll @@ -6,12 +6,11 @@ define i32 @main() { ; 16-LABEL: main: ; 16: .cfi_startproc -; 16: save $ra, $s0, $s1, $s2, 40 -; 16: .cfi_def_cfa_offset 40 -; 16: .cfi_offset 18, -8 -; 16: .cfi_offset 17, -12 -; 16: .cfi_offset 16, -16 +; 16: save $16, $17, $ra, 32 # 16 bit inst +; 16: .cfi_def_cfa_offset 32 ; 16: .cfi_offset 31, -4 +; 16: .cfi_offset 17, -8 +; 16: .cfi_offset 16, -12 ; 16: .cfi_endproc entry: %retval = alloca i32, align 4 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/f16abs.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/f16abs.ll index 928914f067dd..0fba9c4fd08a 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/f16abs.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/f16abs.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=static +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=static @y = global double -1.450000e+00, align 8 @x = common global double 0.000000e+00, align 8 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fixdfsf.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fixdfsf.ll index b08eefd71235..4271ac222edb 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fixdfsf.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fixdfsf.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s -check-prefix=pic1 -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s -check-prefix=pic2 +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic1 +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic2 @x = common global double 0.000000e+00, align 8 @y = common global i32 0, align 4 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fp16instrinsmc.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fp16instrinsmc.ll index bb43d2711c26..7ced36c016f7 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fp16instrinsmc.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fp16instrinsmc.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s -check-prefix=pic -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips32-function-mask=1010111 -mips-os16 < %s | FileCheck %s -check-prefix=fmask +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -mips32-function-mask=1010111 -mips-os16 < %s | FileCheck %s -check-prefix=fmask @x = global float 1.500000e+00, align 4 @xn = global float -1.900000e+01, align 4 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fp16mix.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fp16mix.ll index 8d85099ba9f2..a94f838fb675 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fp16mix.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fp16mix.ll @@ -1,8 +1,8 @@ -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips32-function-mask=10 -mips-os16 < %s | FileCheck %s -check-prefix=fmask1 +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -mips32-function-mask=10 -mips-os16 < %s | FileCheck %s -check-prefix=fmask1 -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips32-function-mask=01 -mips-os16 < %s | FileCheck %s -check-prefix=fmask2 +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -mips32-function-mask=01 -mips-os16 < %s | FileCheck %s -check-prefix=fmask2 -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips32-function-mask=10. -mips-os16 < %s | FileCheck %s -check-prefix=fmask1nr +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -mips32-function-mask=10. -mips-os16 < %s | FileCheck %s -check-prefix=fmask1nr ; Function Attrs: nounwind optsize readnone define void @foo1() { @@ -17,7 +17,7 @@ entry: ; fmask1: .set reorder ; fmask1: .end foo1 ; fmask2: .ent foo1 -; fmask2: save {{.*}} +; fmask2: jrc $ra ; fmask2: .end foo1 ; fmask1nr: .ent foo1 ; fmask1nr: .set noreorder @@ -42,10 +42,10 @@ entry: ; fmask2: .set reorder ; fmask2: .end foo2 ; fmask1: .ent foo2 -; fmask1: save {{.*}} +; fmask1: jrc $ra ; fmask1: .end foo2 ; fmask1nr: .ent foo2 -; fmask1nr: save {{.*}} +; fmask1nr: jrc $ra ; fmask1nr: .end foo2 } @@ -62,10 +62,10 @@ entry: ; fmask1: .set reorder ; fmask1: .end foo3 ; fmask2: .ent foo3 -; fmask2: save {{.*}} +; fmask2: jrc $ra ; fmask2: .end foo3 ; fmask1r: .ent foo3 -; fmask1r: save {{.*}} +; fmask1r: jrc $ra ; fmask1r: .end foo3 } @@ -82,10 +82,10 @@ entry: ; fmask2: .set reorder ; fmask2: .end foo4 ; fmask1: .ent foo4 -; fmask1: save {{.*}} +; fmask1: jrc $ra ; fmask1: .end foo4 ; fmask1nr: .ent foo4 -; fmask1nr: save {{.*}} +; fmask1nr: jrc $ra ; fmask1nr: .end foo4 } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fp16static.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fp16static.ll index 240ec75a36b6..beb063db15ca 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fp16static.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fp16static.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16 +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16 @x = common global float 0.000000e+00, align 4 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fpnotneeded.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fpnotneeded.ll index b4fab6414223..6a4363c58136 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fpnotneeded.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fpnotneeded.ll @@ -1,4 +1,6 @@ -; RUN: llc -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-os16 | FileCheck %s -check-prefix=32 +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-os16 | FileCheck %s -check-prefix=32 + +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -relocation-model=static -O3 -mips16-constant-islands < %s -mips-os16 | FileCheck %s -check-prefix=cisle @i = global i32 1, align 4 @f = global float 1.000000e+00, align 4 @@ -57,6 +59,8 @@ entry: ; 32: restore {{.+}} ; 32: .end foo +; cisle: .end foo + attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fptr2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fptr2.ll index 77028dbde9aa..c8b5e0d1771e 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fptr2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/fptr2.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=static16 +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=static16 ; Function Attrs: nounwind define double @my_mul(double %a, double %b) #0 { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/helloworld.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/helloworld.ll index 058a041c16a9..709c12e1c67f 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/helloworld.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/helloworld.ll @@ -25,10 +25,9 @@ entry: ; SR32: .set noreorder ; SR32: .set nomacro ; SR32: .set noat -; SR: save $ra, $s0, $s1, $s2, [[FS:[0-9]+]] +; SR: save $ra, 24 # 16 bit inst ; PE: .ent main -; PE: .align 2 -; PE-NEXT: li $[[T1:[0-9]+]], %hi(_gp_disp) +; PE: li $[[T1:[0-9]+]], %hi(_gp_disp) ; PE-NEXT: addiu $[[T2:[0-9]+]], $pc, %lo(_gp_disp) ; PE: sll $[[T3:[0-9]+]], $[[T1]], 16 ; C1: lw ${{[0-9]+}}, %got($.str)(${{[0-9]+}}) @@ -37,7 +36,7 @@ entry: ; C2: move $25, ${{[0-9]+}} ; C1: move $gp, ${{[0-9]+}} ; C1: jalrc ${{[0-9]+}} -; SR: restore $ra, $s0, $s1, $s2, [[FS]] +; SR: restore $ra, 24 # 16 bit inst ; PE: li $2, 0 ; PE: jrc $ra diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/hf16_1.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/hf16_1.ll index c7454ee0a8dd..9879cd523af3 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/hf16_1.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/hf16_1.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -soft-float -mips16-hard-float -O3 < %s | FileCheck %s -check-prefix=1 -; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -soft-float -mips16-hard-float -O3 < %s | FileCheck %s -check-prefix=2 +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=1 +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=2 @x = common global float 0.000000e+00, align 4 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/hf16call32.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/hf16call32.ll index 461438e8bec0..aec9c71c485b 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/hf16call32.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/hf16call32.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=stel +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=stel @x = common global float 0.000000e+00, align 4 @y = common global float 0.000000e+00, align 4 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/hf16call32_body.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/hf16call32_body.ll index 34bae26f85f3..adac31460c44 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/hf16call32_body.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/hf16call32_body.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=stel +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=stel @x = external global float @xd = external global double diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/hf1_body.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/hf1_body.ll index b2cce92aa1a4..5acfe86373d9 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/hf1_body.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/hf1_body.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -soft-float -mips16-hard-float < %s | FileCheck %s -check-prefix=picfp16 +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=picfp16 @x = external global float diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/hfptrcall.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/hfptrcall.ll index 25639dad63a8..9df8d900693c 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/hfptrcall.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/hfptrcall.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s -check-prefix=picel +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=picel @ptrsv = global float ()* @sv, align 4 @ptrdv = global double ()* @dv, align 4 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/i32k.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/i32k.ll index f4dd1eb78a1d..73f1302beec0 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/i32k.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/i32k.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16 +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -mips16-constant-islands=false -O3 < %s | FileCheck %s -check-prefix=16 @.str = private unnamed_addr constant [4 x i8] c"%i\0A\00", align 1 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/lcb2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/lcb2.ll new file mode 100644 index 000000000000..715584b6797d --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/lcb2.ll @@ -0,0 +1,133 @@ +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands=true < %s | FileCheck %s -check-prefix=lcb + +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands=true < %s | FileCheck %s -check-prefix=lcbn + +@i = global i32 0, align 4 +@j = common global i32 0, align 4 +@k = common global i32 0, align 4 + +; Function Attrs: nounwind optsize +define i32 @bnez() #0 { +entry: + %0 = load i32* @i, align 4, !tbaa !1 + %cmp = icmp eq i32 %0, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void asm sideeffect ".space 10000", ""() #1, !srcloc !5 + store i32 0, i32* @i, align 4, !tbaa !1 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i32 0 +} +; lcb: .ent bnez +; lcbn: .ent bnez +; lcb: bnez ${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]+}} +; lcbn-NOT: bnez ${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]+}} # 16 bit inst +; lcb: .end bnez +; lcbn: .end bnez + +; Function Attrs: nounwind optsize +define i32 @beqz() #0 { +entry: + %0 = load i32* @i, align 4, !tbaa !1 + %cmp = icmp eq i32 %0, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + store i32 10, i32* @j, align 4, !tbaa !1 + tail call void asm sideeffect ".space 10000", ""() #1, !srcloc !6 + br label %if.end + +if.else: ; preds = %entry + store i32 55, i32* @j, align 4, !tbaa !1 + tail call void asm sideeffect ".space 10000", ""() #1, !srcloc !7 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret i32 0 +} + +; lcb: .ent beqz +; lcbn: .ent beqz +; lcb: beqz ${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]+}} +; lcbn-NOT: beqz ${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]+}} # 16 bit inst +; lcb: .end beqz +; lcbn: .end beqz + + +; Function Attrs: nounwind optsize +define void @bteqz() #0 { +entry: + %0 = load i32* @i, align 4, !tbaa !1 + %1 = load i32* @j, align 4, !tbaa !1 + %cmp = icmp eq i32 %0, %1 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + store i32 1, i32* @k, align 4, !tbaa !1 + tail call void asm sideeffect ".space 1000", ""() #1, !srcloc !8 + br label %if.end + +if.else: ; preds = %entry + tail call void asm sideeffect ".space 1000", ""() #1, !srcloc !9 + store i32 2, i32* @k, align 4, !tbaa !1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + +; lcb: .ent bteqz +; lcbn: .ent bteqz +; lcb: btnez $BB{{[0-9]+}}_{{[0-9]+}} +; lcbn-NOT: btnez $BB{{[0-9]+}}_{{[0-9]+}} # 16 bit inst +; lcb: .end bteqz +; lcbn: .end bteqz + + +; Function Attrs: nounwind optsize +define void @btz() #0 { +entry: + %0 = load i32* @i, align 4, !tbaa !1 + %1 = load i32* @j, align 4, !tbaa !1 + %cmp1 = icmp sgt i32 %0, %1 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %entry, %if.then + tail call void asm sideeffect ".space 60000", ""() #1, !srcloc !10 + %2 = load i32* @i, align 4, !tbaa !1 + %3 = load i32* @j, align 4, !tbaa !1 + %cmp = icmp sgt i32 %2, %3 + br i1 %cmp, label %if.then, label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +; lcb: .ent btz +; lcbn: .ent btz +; lcb: bteqz $BB{{[0-9]+}}_{{[0-9]+}} +; lcbn-NOT: bteqz $BB{{[0-9]+}}_{{[0-9]+}} # 16 bit inst +; lcb: btnez $BB{{[0-9]+}}_{{[0-9]+}} +; lcbn-NOT: btnez $BB{{[0-9]+}}_{{[0-9]+}} # 16 bit inst +; lcb: .end btz +; lcbn: .end btz + +attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } + +!llvm.ident = !{!0} + +!0 = metadata !{metadata !"clang version 3.5 (gitosis@dmz-portal.mips.com:clang.git ed197d08c90d82e1119774e10920e6f7a841c8ec) (gitosis@dmz-portal.mips.com:llvm.git b9235a363fa2dddb26ac01cbaed58efbc9eff392)"} +!1 = metadata !{metadata !2, metadata !2, i64 0} +!2 = metadata !{metadata !"int", metadata !3, i64 0} +!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0} +!4 = metadata !{metadata !"Simple C/C++ TBAA"} +!5 = metadata !{i32 59} +!6 = metadata !{i32 156} +!7 = metadata !{i32 210} +!8 = metadata !{i32 299} +!9 = metadata !{i32 340} +!10 = metadata !{i32 412} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/lcb3c.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/lcb3c.ll new file mode 100644 index 000000000000..72a0b8cf5cea --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/lcb3c.ll @@ -0,0 +1,59 @@ +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -O0 < %s | FileCheck %s -check-prefix=lcb + +@i = global i32 0, align 4 +@j = common global i32 0, align 4 +@k = common global i32 0, align 4 + +; Function Attrs: nounwind +define i32 @s() #0 { +entry: + %0 = load i32* @i, align 4 + %cmp = icmp eq i32 %0, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + store i32 0, i32* @i, align 4 + call void asm sideeffect ".space 1000", ""() #1, !srcloc !1 + br label %if.end + +if.else: ; preds = %entry + store i32 1, i32* @i, align 4 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret i32 0 +; lcb: bnez $2, $BB0_2 +; lcb: b $BB0_1 # 16 bit inst +; lcb: $BB0_1: # %if.then +} + +; Function Attrs: nounwind +define i32 @b() #0 { +entry: + %0 = load i32* @i, align 4 + %cmp = icmp eq i32 %0, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + store i32 0, i32* @i, align 4 + call void asm sideeffect ".space 1000000", ""() #1, !srcloc !2 + br label %if.end + +if.else: ; preds = %entry + store i32 1, i32* @i, align 4 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret i32 0 +} + +; lcb: beqz $2, $BB1_1 # 16 bit inst +; lcb: jal $BB1_2 # branch +; lcb: $BB1_1: # %if.then + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } + + +!1 = metadata !{i32 65} +!2 = metadata !{i32 167} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/lcb4a.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/lcb4a.ll new file mode 100644 index 000000000000..e37feca78179 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/lcb4a.ll @@ -0,0 +1,69 @@ +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=ci + +@i = global i32 0, align 4 +@j = common global i32 0, align 4 +@k = common global i32 0, align 4 + +; Function Attrs: nounwind optsize +define i32 @foo() #0 { +entry: + %0 = load i32* @i, align 4, !tbaa !1 + %cmp = icmp eq i32 %0, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + tail call void asm sideeffect ".space 1000", ""() #1, !srcloc !5 + br label %if.end + +if.else: ; preds = %entry + tail call void asm sideeffect ".space 1004", ""() #1, !srcloc !6 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %storemerge = phi i32 [ 1, %if.else ], [ 0, %if.then ] + store i32 %storemerge, i32* @i, align 4, !tbaa !1 + ret i32 0 +} + +; ci: beqz $3, $BB0_2 +; ci: # BB#1: # %if.else + + +; Function Attrs: nounwind optsize +define i32 @goo() #0 { +entry: + %0 = load i32* @i, align 4, !tbaa !1 + %cmp = icmp eq i32 %0, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + tail call void asm sideeffect ".space 1000000", ""() #1, !srcloc !7 + br label %if.end + +if.else: ; preds = %entry + tail call void asm sideeffect ".space 1000004", ""() #1, !srcloc !8 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %storemerge = phi i32 [ 1, %if.else ], [ 0, %if.then ] + store i32 %storemerge, i32* @i, align 4, !tbaa !1 + ret i32 0 +} + +; ci: bnez $3, $BB1_1 # 16 bit inst +; ci: jal $BB1_2 # branch +; ci: nop +; ci: $BB1_1: # %if.else + +attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } + + +!1 = metadata !{metadata !2, metadata !2, i64 0} +!2 = metadata !{metadata !"int", metadata !3, i64 0} +!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0} +!4 = metadata !{metadata !"Simple C/C++ TBAA"} +!5 = metadata !{i32 58} +!6 = metadata !{i32 108} +!7 = metadata !{i32 190} +!8 = metadata !{i32 243} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/lcb5.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/lcb5.ll new file mode 100644 index 000000000000..0a89c804945f --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/lcb5.ll @@ -0,0 +1,240 @@ +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=ci + +@i = global i32 0, align 4 +@j = common global i32 0, align 4 +@k = common global i32 0, align 4 + +; Function Attrs: nounwind optsize +define i32 @x0() #0 { +entry: + %0 = load i32* @i, align 4, !tbaa !1 + %cmp = icmp eq i32 %0, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + tail call void asm sideeffect ".space 1000", ""() #1, !srcloc !5 + br label %if.end + +if.else: ; preds = %entry + tail call void asm sideeffect ".space 1004", ""() #1, !srcloc !6 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %storemerge = phi i32 [ 1, %if.else ], [ 0, %if.then ] + store i32 %storemerge, i32* @i, align 4, !tbaa !1 + ret i32 0 +} + +; ci: .ent x0 +; ci: beqz $3, $BB0_2 +; ci: $BB0_2: +; ci: .end x0 + +; Function Attrs: nounwind optsize +define i32 @x1() #0 { +entry: + %0 = load i32* @i, align 4, !tbaa !1 + %cmp = icmp eq i32 %0, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + tail call void asm sideeffect ".space 1000000", ""() #1, !srcloc !7 + br label %if.end + +if.else: ; preds = %entry + tail call void asm sideeffect ".space 1000004", ""() #1, !srcloc !8 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %storemerge = phi i32 [ 1, %if.else ], [ 0, %if.then ] + store i32 %storemerge, i32* @i, align 4, !tbaa !1 + ret i32 0 +} + +; ci: .ent x1 +; ci: bnez $3, $BB1_1 # 16 bit inst +; ci: jal $BB1_2 # branch +; ci: nop +; ci: $BB1_1: +; ci: .end x1 + +; Function Attrs: nounwind optsize +define i32 @y0() #0 { +entry: + %0 = load i32* @i, align 4, !tbaa !1 + %cmp = icmp eq i32 %0, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + store i32 10, i32* @j, align 4, !tbaa !1 + tail call void asm sideeffect ".space 1000", ""() #1, !srcloc !9 + br label %if.end + +if.else: ; preds = %entry + store i32 55, i32* @j, align 4, !tbaa !1 + tail call void asm sideeffect ".space 1004", ""() #1, !srcloc !10 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret i32 0 +} + +; ci: .ent y0 +; ci: beqz $2, $BB2_2 +; ci: .end y0 + +; Function Attrs: nounwind optsize +define i32 @y1() #0 { +entry: + %0 = load i32* @i, align 4, !tbaa !1 + %cmp = icmp eq i32 %0, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + store i32 10, i32* @j, align 4, !tbaa !1 + tail call void asm sideeffect ".space 1000000", ""() #1, !srcloc !11 + br label %if.end + +if.else: ; preds = %entry + store i32 55, i32* @j, align 4, !tbaa !1 + tail call void asm sideeffect ".space 1000004", ""() #1, !srcloc !12 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret i32 0 +} + +; ci: .ent y1 +; ci: bnez $2, $BB3_1 # 16 bit inst +; ci: jal $BB3_2 # branch +; ci: nop +; ci: $BB3_1: +; ci: .end y1 + +; Function Attrs: nounwind optsize +define void @z0() #0 { +entry: + %0 = load i32* @i, align 4, !tbaa !1 + %1 = load i32* @j, align 4, !tbaa !1 + %cmp = icmp eq i32 %0, %1 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + store i32 1, i32* @k, align 4, !tbaa !1 + tail call void asm sideeffect ".space 10000", ""() #1, !srcloc !13 + br label %if.end + +if.else: ; preds = %entry + tail call void asm sideeffect ".space 10004", ""() #1, !srcloc !14 + store i32 2, i32* @k, align 4, !tbaa !1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + +; ci: .ent z0 +; ci: btnez $BB4_2 +; ci: .end z0 + +; Function Attrs: nounwind optsize +define void @z1() #0 { +entry: + %0 = load i32* @i, align 4, !tbaa !1 + %1 = load i32* @j, align 4, !tbaa !1 + %cmp = icmp eq i32 %0, %1 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + store i32 1, i32* @k, align 4, !tbaa !1 + tail call void asm sideeffect ".space 10000000", ""() #1, !srcloc !15 + br label %if.end + +if.else: ; preds = %entry + tail call void asm sideeffect ".space 10000004", ""() #1, !srcloc !16 + store i32 2, i32* @k, align 4, !tbaa !1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + +; ci: .ent z1 +; ci: bteqz $BB5_1 # 16 bit inst +; ci: jal $BB5_2 # branch +; ci: nop +; ci: $BB5_1: +; ci: .end z1 + +; Function Attrs: nounwind optsize +define void @z3() #0 { +entry: + %0 = load i32* @i, align 4, !tbaa !1 + %1 = load i32* @j, align 4, !tbaa !1 + %cmp1 = icmp sgt i32 %0, %1 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %entry, %if.then + tail call void asm sideeffect ".space 10000", ""() #1, !srcloc !17 + %2 = load i32* @i, align 4, !tbaa !1 + %3 = load i32* @j, align 4, !tbaa !1 + %cmp = icmp sgt i32 %2, %3 + br i1 %cmp, label %if.then, label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +; ci: .ent z3 +; ci: bteqz $BB6_2 +; ci: .end z3 + +; Function Attrs: nounwind optsize +define void @z4() #0 { +entry: + %0 = load i32* @i, align 4, !tbaa !1 + %1 = load i32* @j, align 4, !tbaa !1 + %cmp1 = icmp sgt i32 %0, %1 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %entry, %if.then + tail call void asm sideeffect ".space 10000000", ""() #1, !srcloc !18 + %2 = load i32* @i, align 4, !tbaa !1 + %3 = load i32* @j, align 4, !tbaa !1 + %cmp = icmp sgt i32 %2, %3 + br i1 %cmp, label %if.then, label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +; ci: .ent z4 +; ci: btnez $BB7_1 # 16 bit inst +; ci: jal $BB7_2 # branch +; ci: nop +; ci: .align 2 +; ci: $BB7_1: +; ci: .end z4 + +attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } + + +!1 = metadata !{metadata !2, metadata !2, i64 0} +!2 = metadata !{metadata !"int", metadata !3, i64 0} +!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0} +!4 = metadata !{metadata !"Simple C/C++ TBAA"} +!5 = metadata !{i32 57} +!6 = metadata !{i32 107} +!7 = metadata !{i32 188} +!8 = metadata !{i32 241} +!9 = metadata !{i32 338} +!10 = metadata !{i32 391} +!11 = metadata !{i32 477} +!12 = metadata !{i32 533} +!13 = metadata !{i32 621} +!14 = metadata !{i32 663} +!15 = metadata !{i32 747} +!16 = metadata !{i32 792} +!17 = metadata !{i32 867} +!18 = metadata !{i32 953} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mbrsize4a.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mbrsize4a.ll new file mode 100644 index 000000000000..c80299166ab4 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mbrsize4a.ll @@ -0,0 +1,37 @@ +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands < %s | FileCheck %s -check-prefix=jal16 + +@j = global i32 10, align 4 +@.str = private unnamed_addr constant [11 x i8] c"at bottom\0A\00", align 1 +@i = common global i32 0, align 4 + +; Function Attrs: nounwind +define i32 @main() #0 { +entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval + br label %z + +z: ; preds = %y, %entry + %call = call i32 bitcast (i32 (...)* @foo to i32 ()*)() + call void asm sideeffect ".space 10000000", ""() #2, !srcloc !1 + br label %y + +y: ; preds = %z + %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0)) + br label %z + +return: ; No predecessors! + %0 = load i32* %retval + ret i32 %0 +; jal16: jal $BB{{[0-9]+}}_{{[0-9]+}} +} + +declare i32 @foo(...) #1 + +declare i32 @printf(i8*, ...) #1 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } + +!1 = metadata !{i32 68} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16-hf-attr.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16-hf-attr.ll new file mode 100644 index 000000000000..d9ad6295bef8 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16-hf-attr.ll @@ -0,0 +1,45 @@ +; Check that stubs generation for mips16 hard-float mode does not depend +; on the function 'use-soft-float' attribute's value. +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel \ +; RUN: -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s + +define void @bar_sf() #0 { +; CHECK: bar_sf: +entry: + %call1 = call float @foo(float 1.000000e+00) +; CHECK: lw $2, %call16(foo)($3) +; CHECK: lw $5, %got(__mips16_call_stub_sf_1)($3) + ret void +} + +define void @bar_hf() #1 { +; CHECK: bar_hf: +entry: + %call1 = call float @foo(float 1.000000e+00) +; CHECK: lw $2, %call16(foo)($3) +; CHECK: lw $5, %got(__mips16_call_stub_sf_1)($3) + ret void +} + +declare float @foo(float) #2 + +attributes #0 = { + nounwind + "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" + "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" + "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" + "unsafe-fp-math"="false" "use-soft-float"="false" +} +attributes #1 = { + nounwind + "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" + "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" + "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" + "unsafe-fp-math"="false" "use-soft-float"="true" +} +attributes #2 = { + "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" + "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" + "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" + "unsafe-fp-math"="false" "use-soft-float"="true" +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_1.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_1.ll index e156641d4e50..a72cfc84875d 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_1.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_1.ll @@ -8,7 +8,6 @@ entry: ; CHECK: .set mips16 # @foo ; CHECK: .ent foo -; CHECK: save {{.+}} -; CHECK: restore {{.+}} +; CHECK: jrc $ra ; CHECK: .end foo attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_10.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_10.ll index 7c017b8e4b75..686faa718e08 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_10.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_10.ll @@ -24,8 +24,7 @@ entry: ; 16: .set mips16 # @nofoo ; 16: .ent nofoo -; 16: save {{.+}} -; 16: restore {{.+}} +; 16: jrc $ra ; 16: .end nofoo define i32 @main() #2 { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_3.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_3.ll index dd94ec1ce80a..4ad427f0df7f 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_3.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_3.ll @@ -8,13 +8,11 @@ entry: ; 16: .set mips16 # @foo ; 16: .ent foo -; 16: save {{.+}} -; 16: restore {{.+}} +; 16: jrc $ra ; 16: .end foo ; 32: .set mips16 # @foo ; 32: .ent foo -; 32: save {{.+}} -; 32: restore {{.+}} +; 32: jrc $ra ; 32: .end foo define void @nofoo() #1 { entry: @@ -50,8 +48,7 @@ entry: ; 16: .set mips16 # @main ; 16: .ent main -; 16: save {{.+}} -; 16: restore {{.+}} +; 16: jrc $ra ; 16: .end main ; 32: .set nomips16 # @main ; 32: .ent main diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_4.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_4.ll index 5e4907139445..e8100e8237a1 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_4.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_4.ll @@ -8,13 +8,11 @@ entry: ; 16: .set mips16 # @foo ; 16: .ent foo -; 16: save {{.+}} -; 16: restore {{.+}} +; 16: jrc $ra ; 16: .end foo ; 32: .set mips16 # @foo ; 32: .ent foo -; 32: save {{.+}} -; 32: restore {{.+}} +; 32: jrc $ra ; 32: .end foo define void @nofoo() #1 { entry: @@ -50,13 +48,11 @@ entry: ; 16: .set mips16 # @main ; 16: .ent main -; 16: save {{.+}} -; 16: restore {{.+}} +; 16: jrc $ra ; 16: .end main ; 32: .set mips16 # @main ; 32: .ent main -; 32: save {{.+}} -; 32: restore {{.+}} +; 32: jrc $ra ; 32: .end main diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_5.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_5.ll index 17900a2dc75f..5bdeede5c66d 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_5.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_5.ll @@ -8,13 +8,11 @@ entry: ; 16: .set mips16 # @foo ; 16: .ent foo -; 16: save {{.+}} -; 16: restore {{.+}} +; 16: jrc $ra ; 16: .end foo ; 32: .set mips16 # @foo ; 32: .ent foo -; 32: save {{.+}} -; 32: restore {{.+}} +; 32: jrc $ra ; 32: .end foo define void @nofoo() #1 { entry: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_6.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_6.ll index a77031af8be6..45f71f01ec65 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_6.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_6.ll @@ -8,8 +8,7 @@ entry: ; 16: .set mips16 # @foo ; 16: .ent foo -; 16: save {{.+}} -; 16: restore {{.+}} +; 16: jrc $ra ; 16: .end foo ; 32: .set nomips16 # @foo ; 32: .ent foo diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_7.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_7.ll index 895b5d4346a8..e171b367c47e 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_7.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_7.ll @@ -8,8 +8,7 @@ entry: ; 16: .set mips16 # @foo ; 16: .ent foo -; 16: save {{.+}} -; 16: restore {{.+}} +; 16: jrc $ra ; 16: .end foo ; 32: .set nomips16 # @foo ; 32: .ent foo @@ -56,14 +55,12 @@ entry: ; 16: .set mips16 # @main ; 16: .ent main -; 16: save {{.+}} -; 16: restore {{.+}} +; 16: jrc $ra ; 16: .end main ; 32: .set mips16 # @main ; 32: .ent main -; 32: save {{.+}} -; 32: restore {{.+}} +; 32: jrc $ra ; 32: .end main diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_8.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_8.ll index 4152d687093e..3c8cc5a4e281 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_8.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_8.ll @@ -16,8 +16,7 @@ entry: ; 32: .set mips16 # @foo ; 32: .ent foo -; 32: save {{.+}} -; 32: restore {{.+}} +; 32: jrc $ra ; 32: .end foo define void @nofoo() #1 { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_9.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_9.ll index c9b494f2a890..585a4e8408b4 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_9.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_32_9.ll @@ -7,8 +7,7 @@ entry: ; 32: .set mips16 # @foo ; 32: .ent foo -; 32: save {{.+}} -; 32: restore {{.+}} +; 32: jrc $ra ; 32: .end foo define void @nofoo() #1 { entry: @@ -33,8 +32,7 @@ entry: ; 32: .set mips16 # @main ; 32: .ent main -; 32: save {{.+}} -; 32: restore {{.+}} +; 32: jrc $ra ; 32: .end main diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_fpret.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_fpret.ll index c132f63cfb01..fe87604d6107 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_fpret.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16_fpret.ll @@ -1,7 +1,7 @@ -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=1 -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=2 -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=3 -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=4 +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=1 +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=2 +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=3 +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=4 @x = global float 0x41F487E980000000, align 4 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16fpe.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16fpe.ll index 10c5163f7fd0..987980e080ff 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16fpe.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/mips16fpe.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 -soft-float -mips16-hard-float < %s | FileCheck %s -check-prefix=16hf +; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16hf @x = global float 5.000000e+00, align 4 @y = global float 1.500000e+01, align 4 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/msa/3r-s.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/msa/3r-s.ll index 30cf265233e5..581c3bfd78af 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/msa/3r-s.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/msa/3r-s.ll @@ -5,98 +5,114 @@ ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s @llvm_mips_sld_b_ARG1 = global <16 x i8> , align 16 -@llvm_mips_sld_b_ARG2 = global i32 10, align 16 +@llvm_mips_sld_b_ARG2 = global <16 x i8> , align 16 +@llvm_mips_sld_b_ARG3 = global i32 10, align 16 @llvm_mips_sld_b_RES = global <16 x i8> , align 16 define void @llvm_mips_sld_b_test() nounwind { entry: %0 = load <16 x i8>* @llvm_mips_sld_b_ARG1 - %1 = load i32* @llvm_mips_sld_b_ARG2 - %2 = tail call <16 x i8> @llvm.mips.sld.b(<16 x i8> %0, i32 %1) - store <16 x i8> %2, <16 x i8>* @llvm_mips_sld_b_RES + %1 = load <16 x i8>* @llvm_mips_sld_b_ARG2 + %2 = load i32* @llvm_mips_sld_b_ARG3 + %3 = tail call <16 x i8> @llvm.mips.sld.b(<16 x i8> %0, <16 x i8> %1, i32 %2) + store <16 x i8> %3, <16 x i8>* @llvm_mips_sld_b_RES ret void } -declare <16 x i8> @llvm.mips.sld.b(<16 x i8>, i32) nounwind +declare <16 x i8> @llvm.mips.sld.b(<16 x i8>, <16 x i8>, i32) nounwind ; CHECK: llvm_mips_sld_b_test: ; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sld_b_ARG1) ; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sld_b_ARG2) -; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R1]]) -; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R2]]) -; CHECK-DAG: sld.b [[WD:\$w[0-9]+]], [[WS]]{{\[}}[[RT]]{{\]}} +; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_sld_b_ARG3) +; CHECK-DAG: ld.b [[WD:\$w[0-9]+]], 0([[R1]]) +; CHECK-DAG: ld.b [[WS:\$w[0-9]+]], 0([[R2]]) +; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R3]]) +; CHECK-DAG: sld.b [[WD]], [[WS]]{{\[}}[[RT]]{{\]}} ; CHECK-DAG: st.b [[WD]] ; CHECK: .size llvm_mips_sld_b_test ; @llvm_mips_sld_h_ARG1 = global <8 x i16> , align 16 -@llvm_mips_sld_h_ARG2 = global i32 10, align 16 +@llvm_mips_sld_h_ARG2 = global <8 x i16> , align 16 +@llvm_mips_sld_h_ARG3 = global i32 10, align 16 @llvm_mips_sld_h_RES = global <8 x i16> , align 16 define void @llvm_mips_sld_h_test() nounwind { entry: %0 = load <8 x i16>* @llvm_mips_sld_h_ARG1 - %1 = load i32* @llvm_mips_sld_h_ARG2 - %2 = tail call <8 x i16> @llvm.mips.sld.h(<8 x i16> %0, i32 %1) - store <8 x i16> %2, <8 x i16>* @llvm_mips_sld_h_RES + %1 = load <8 x i16>* @llvm_mips_sld_h_ARG2 + %2 = load i32* @llvm_mips_sld_h_ARG3 + %3 = tail call <8 x i16> @llvm.mips.sld.h(<8 x i16> %0, <8 x i16> %1, i32 %2) + store <8 x i16> %3, <8 x i16>* @llvm_mips_sld_h_RES ret void } -declare <8 x i16> @llvm.mips.sld.h(<8 x i16>, i32) nounwind +declare <8 x i16> @llvm.mips.sld.h(<8 x i16>, <8 x i16>, i32) nounwind ; CHECK: llvm_mips_sld_h_test: ; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sld_h_ARG1) -; CHECK-DAG: lw [[RT:\$[0-9]+]], %got(llvm_mips_sld_h_ARG2) -; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R1]]) -; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R2]]) -; CHECK-DAG: sld.h [[WD:\$w[0-9]+]], [[WS]]{{\[}}[[RT]]{{\]}} +; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sld_h_ARG2) +; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_sld_h_ARG3) +; CHECK-DAG: ld.h [[WD:\$w[0-9]+]], 0([[R1]]) +; CHECK-DAG: ld.h [[WS:\$w[0-9]+]], 0([[R2]]) +; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R3]]) +; CHECK-DAG: sld.h [[WD]], [[WS]]{{\[}}[[RT]]{{\]}} ; CHECK-DAG: st.h [[WD]] ; CHECK: .size llvm_mips_sld_h_test ; @llvm_mips_sld_w_ARG1 = global <4 x i32> , align 16 -@llvm_mips_sld_w_ARG2 = global i32 10, align 16 +@llvm_mips_sld_w_ARG2 = global <4 x i32> , align 16 +@llvm_mips_sld_w_ARG3 = global i32 10, align 16 @llvm_mips_sld_w_RES = global <4 x i32> , align 16 define void @llvm_mips_sld_w_test() nounwind { entry: %0 = load <4 x i32>* @llvm_mips_sld_w_ARG1 - %1 = load i32* @llvm_mips_sld_w_ARG2 - %2 = tail call <4 x i32> @llvm.mips.sld.w(<4 x i32> %0, i32 %1) - store <4 x i32> %2, <4 x i32>* @llvm_mips_sld_w_RES + %1 = load <4 x i32>* @llvm_mips_sld_w_ARG2 + %2 = load i32* @llvm_mips_sld_w_ARG3 + %3 = tail call <4 x i32> @llvm.mips.sld.w(<4 x i32> %0, <4 x i32> %1, i32 %2) + store <4 x i32> %3, <4 x i32>* @llvm_mips_sld_w_RES ret void } -declare <4 x i32> @llvm.mips.sld.w(<4 x i32>, i32) nounwind +declare <4 x i32> @llvm.mips.sld.w(<4 x i32>, <4 x i32>, i32) nounwind ; CHECK: llvm_mips_sld_w_test: ; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sld_w_ARG1) -; CHECK-DAG: lw [[RT:\$[0-9]+]], %got(llvm_mips_sld_w_ARG2) -; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]]) -; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R2]]) -; CHECK-DAG: sld.w [[WD:\$w[0-9]+]], [[WS]]{{\[}}[[RT]]{{\]}} +; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sld_w_ARG2) +; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_sld_w_ARG3) +; CHECK-DAG: ld.w [[WD:\$w[0-9]+]], 0([[R1]]) +; CHECK-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R2]]) +; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R3]]) +; CHECK-DAG: sld.w [[WD]], [[WS]]{{\[}}[[RT]]{{\]}} ; CHECK-DAG: st.w [[WD]] ; CHECK: .size llvm_mips_sld_w_test ; @llvm_mips_sld_d_ARG1 = global <2 x i64> , align 16 -@llvm_mips_sld_d_ARG2 = global i32 10, align 16 +@llvm_mips_sld_d_ARG2 = global <2 x i64> , align 16 +@llvm_mips_sld_d_ARG3 = global i32 10, align 16 @llvm_mips_sld_d_RES = global <2 x i64> , align 16 define void @llvm_mips_sld_d_test() nounwind { entry: %0 = load <2 x i64>* @llvm_mips_sld_d_ARG1 - %1 = load i32* @llvm_mips_sld_d_ARG2 - %2 = tail call <2 x i64> @llvm.mips.sld.d(<2 x i64> %0, i32 %1) - store <2 x i64> %2, <2 x i64>* @llvm_mips_sld_d_RES + %1 = load <2 x i64>* @llvm_mips_sld_d_ARG2 + %2 = load i32* @llvm_mips_sld_d_ARG3 + %3 = tail call <2 x i64> @llvm.mips.sld.d(<2 x i64> %0, <2 x i64> %1, i32 %2) + store <2 x i64> %3, <2 x i64>* @llvm_mips_sld_d_RES ret void } -declare <2 x i64> @llvm.mips.sld.d(<2 x i64>, i32) nounwind +declare <2 x i64> @llvm.mips.sld.d(<2 x i64>, <2 x i64>, i32) nounwind ; CHECK: llvm_mips_sld_d_test: ; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_sld_d_ARG1) -; CHECK-DAG: lw [[RT:\$[0-9]+]], %got(llvm_mips_sld_d_ARG2) -; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]]) -; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R2]]) -; CHECK-DAG: sld.d [[WD:\$w[0-9]+]], [[WS]]{{\[}}[[RT]]{{\]}} +; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_sld_d_ARG2) +; CHECK-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_sld_d_ARG3) +; CHECK-DAG: ld.d [[WD:\$w[0-9]+]], 0([[R1]]) +; CHECK-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R2]]) +; CHECK-DAG: lw [[RT:\$[0-9]+]], 0([[R3]]) +; CHECK-DAG: sld.d [[WD]], [[WS]]{{\[}}[[RT]]{{\]}} ; CHECK-DAG: st.d [[WD]] ; CHECK: .size llvm_mips_sld_d_test ; diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/msa/elm_shift_slide.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/msa/elm_shift_slide.ll index 39d670dac841..00a6544b1207 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/msa/elm_shift_slide.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/msa/elm_shift_slide.ll @@ -5,17 +5,19 @@ ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s @llvm_mips_sldi_b_ARG1 = global <16 x i8> , align 16 +@llvm_mips_sldi_b_ARG2 = global <16 x i8> , align 16 @llvm_mips_sldi_b_RES = global <16 x i8> , align 16 define void @llvm_mips_sldi_b_test() nounwind { entry: %0 = load <16 x i8>* @llvm_mips_sldi_b_ARG1 - %1 = tail call <16 x i8> @llvm.mips.sldi.b(<16 x i8> %0, i32 1) - store <16 x i8> %1, <16 x i8>* @llvm_mips_sldi_b_RES + %1 = load <16 x i8>* @llvm_mips_sldi_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.sldi.b(<16 x i8> %0, <16 x i8> %1, i32 1) + store <16 x i8> %2, <16 x i8>* @llvm_mips_sldi_b_RES ret void } -declare <16 x i8> @llvm.mips.sldi.b(<16 x i8>, i32) nounwind +declare <16 x i8> @llvm.mips.sldi.b(<16 x i8>, <16 x i8>, i32) nounwind ; CHECK: llvm_mips_sldi_b_test: ; CHECK: ld.b @@ -24,17 +26,19 @@ declare <16 x i8> @llvm.mips.sldi.b(<16 x i8>, i32) nounwind ; CHECK: .size llvm_mips_sldi_b_test ; @llvm_mips_sldi_h_ARG1 = global <8 x i16> , align 16 +@llvm_mips_sldi_h_ARG2 = global <8 x i16> , align 16 @llvm_mips_sldi_h_RES = global <8 x i16> , align 16 define void @llvm_mips_sldi_h_test() nounwind { entry: %0 = load <8 x i16>* @llvm_mips_sldi_h_ARG1 - %1 = tail call <8 x i16> @llvm.mips.sldi.h(<8 x i16> %0, i32 1) - store <8 x i16> %1, <8 x i16>* @llvm_mips_sldi_h_RES + %1 = load <8 x i16>* @llvm_mips_sldi_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.sldi.h(<8 x i16> %0, <8 x i16> %1, i32 1) + store <8 x i16> %2, <8 x i16>* @llvm_mips_sldi_h_RES ret void } -declare <8 x i16> @llvm.mips.sldi.h(<8 x i16>, i32) nounwind +declare <8 x i16> @llvm.mips.sldi.h(<8 x i16>, <8 x i16>, i32) nounwind ; CHECK: llvm_mips_sldi_h_test: ; CHECK: ld.h @@ -43,17 +47,19 @@ declare <8 x i16> @llvm.mips.sldi.h(<8 x i16>, i32) nounwind ; CHECK: .size llvm_mips_sldi_h_test ; @llvm_mips_sldi_w_ARG1 = global <4 x i32> , align 16 +@llvm_mips_sldi_w_ARG2 = global <4 x i32> , align 16 @llvm_mips_sldi_w_RES = global <4 x i32> , align 16 define void @llvm_mips_sldi_w_test() nounwind { entry: %0 = load <4 x i32>* @llvm_mips_sldi_w_ARG1 - %1 = tail call <4 x i32> @llvm.mips.sldi.w(<4 x i32> %0, i32 1) - store <4 x i32> %1, <4 x i32>* @llvm_mips_sldi_w_RES + %1 = load <4 x i32>* @llvm_mips_sldi_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.sldi.w(<4 x i32> %0, <4 x i32> %1, i32 1) + store <4 x i32> %2, <4 x i32>* @llvm_mips_sldi_w_RES ret void } -declare <4 x i32> @llvm.mips.sldi.w(<4 x i32>, i32) nounwind +declare <4 x i32> @llvm.mips.sldi.w(<4 x i32>, <4 x i32>, i32) nounwind ; CHECK: llvm_mips_sldi_w_test: ; CHECK: ld.w @@ -62,17 +68,19 @@ declare <4 x i32> @llvm.mips.sldi.w(<4 x i32>, i32) nounwind ; CHECK: .size llvm_mips_sldi_w_test ; @llvm_mips_sldi_d_ARG1 = global <2 x i64> , align 16 +@llvm_mips_sldi_d_ARG2 = global <2 x i64> , align 16 @llvm_mips_sldi_d_RES = global <2 x i64> , align 16 define void @llvm_mips_sldi_d_test() nounwind { entry: %0 = load <2 x i64>* @llvm_mips_sldi_d_ARG1 - %1 = tail call <2 x i64> @llvm.mips.sldi.d(<2 x i64> %0, i32 1) - store <2 x i64> %1, <2 x i64>* @llvm_mips_sldi_d_RES + %1 = load <2 x i64>* @llvm_mips_sldi_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.sldi.d(<2 x i64> %0, <2 x i64> %1, i32 1) + store <2 x i64> %2, <2 x i64>* @llvm_mips_sldi_d_RES ret void } -declare <2 x i64> @llvm.mips.sldi.d(<2 x i64>, i32) nounwind +declare <2 x i64> @llvm.mips.sldi.d(<2 x i64>, <2 x i64>, i32) nounwind ; CHECK: llvm_mips_sldi_d_test: ; CHECK: ld.d diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/msa/frameindex.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/msa/frameindex.ll index 3088e1ba9893..07e67bf04287 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/msa/frameindex.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/msa/frameindex.ll @@ -83,3 +83,312 @@ define void @loadstore_v16i8_just_over_simm16() nounwind { ret void ; MIPS32-AE: .size loadstore_v16i8_just_over_simm16 } + +define void @loadstore_v8i16_near() nounwind { + ; MIPS32-AE: loadstore_v8i16_near: + + %1 = alloca <8 x i16> + %2 = load volatile <8 x i16>* %1 + ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0($sp) + store volatile <8 x i16> %2, <8 x i16>* %1 + ; MIPS32-AE: st.h [[R1]], 0($sp) + + ret void + ; MIPS32-AE: .size loadstore_v8i16_near +} + +define void @loadstore_v8i16_unaligned() nounwind { + ; MIPS32-AE: loadstore_v8i16_unaligned: + + %1 = alloca [2 x <8 x i16>] + %2 = bitcast [2 x <8 x i16>]* %1 to i8* + %3 = getelementptr i8* %2, i32 1 + %4 = bitcast i8* %3 to [2 x <8 x i16>]* + %5 = getelementptr [2 x <8 x i16>]* %4, i32 0, i32 0 + + %6 = load volatile <8 x i16>* %5 + ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1 + ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]]) + store volatile <8 x i16> %6, <8 x i16>* %5 + ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1 + ; MIPS32-AE: st.h [[R1]], 0([[BASE]]) + + ret void + ; MIPS32-AE: .size loadstore_v8i16_unaligned +} + +define void @loadstore_v8i16_just_under_simm10() nounwind { + ; MIPS32-AE: loadstore_v8i16_just_under_simm10: + + %1 = alloca <8 x i16> + %2 = alloca [1008 x i8] ; Push the frame right up to 1024 bytes + + %3 = load volatile <8 x i16>* %1 + ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 1008($sp) + store volatile <8 x i16> %3, <8 x i16>* %1 + ; MIPS32-AE: st.h [[R1]], 1008($sp) + + ret void + ; MIPS32-AE: .size loadstore_v8i16_just_under_simm10 +} + +define void @loadstore_v8i16_just_over_simm10() nounwind { + ; MIPS32-AE: loadstore_v8i16_just_over_simm10: + + %1 = alloca <8 x i16> + %2 = alloca [1009 x i8] ; Push the frame just over 1024 bytes + + %3 = load volatile <8 x i16>* %1 + ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1024 + ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]]) + store volatile <8 x i16> %3, <8 x i16>* %1 + ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1024 + ; MIPS32-AE: st.h [[R1]], 0([[BASE]]) + + ret void + ; MIPS32-AE: .size loadstore_v8i16_just_over_simm10 +} + +define void @loadstore_v8i16_just_under_simm16() nounwind { + ; MIPS32-AE: loadstore_v8i16_just_under_simm16: + + %1 = alloca <8 x i16> + %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes + + %3 = load volatile <8 x i16>* %1 + ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]]) + store volatile <8 x i16> %3, <8 x i16>* %1 + ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: st.h [[R1]], 0([[BASE]]) + + ret void + ; MIPS32-AE: .size loadstore_v8i16_just_under_simm16 +} + +define void @loadstore_v8i16_just_over_simm16() nounwind { + ; MIPS32-AE: loadstore_v8i16_just_over_simm16: + + %1 = alloca <8 x i16> + %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes + + %3 = load volatile <8 x i16>* %1 + ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]]) + store volatile <8 x i16> %3, <8 x i16>* %1 + ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: st.h [[R1]], 0([[BASE]]) + + ret void + ; MIPS32-AE: .size loadstore_v8i16_just_over_simm16 +} + +define void @loadstore_v4i32_near() nounwind { + ; MIPS32-AE: loadstore_v4i32_near: + + %1 = alloca <4 x i32> + %2 = load volatile <4 x i32>* %1 + ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0($sp) + store volatile <4 x i32> %2, <4 x i32>* %1 + ; MIPS32-AE: st.w [[R1]], 0($sp) + + ret void + ; MIPS32-AE: .size loadstore_v4i32_near +} + +define void @loadstore_v4i32_unaligned() nounwind { + ; MIPS32-AE: loadstore_v4i32_unaligned: + + %1 = alloca [2 x <4 x i32>] + %2 = bitcast [2 x <4 x i32>]* %1 to i8* + %3 = getelementptr i8* %2, i32 1 + %4 = bitcast i8* %3 to [2 x <4 x i32>]* + %5 = getelementptr [2 x <4 x i32>]* %4, i32 0, i32 0 + + %6 = load volatile <4 x i32>* %5 + ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1 + ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]]) + store volatile <4 x i32> %6, <4 x i32>* %5 + ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1 + ; MIPS32-AE: st.w [[R1]], 0([[BASE]]) + + ret void + ; MIPS32-AE: .size loadstore_v4i32_unaligned +} + +define void @loadstore_v4i32_just_under_simm10() nounwind { + ; MIPS32-AE: loadstore_v4i32_just_under_simm10: + + %1 = alloca <4 x i32> + %2 = alloca [2032 x i8] ; Push the frame right up to 2048 bytes + + %3 = load volatile <4 x i32>* %1 + ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 2032($sp) + store volatile <4 x i32> %3, <4 x i32>* %1 + ; MIPS32-AE: st.w [[R1]], 2032($sp) + + ret void + ; MIPS32-AE: .size loadstore_v4i32_just_under_simm10 +} + +define void @loadstore_v4i32_just_over_simm10() nounwind { + ; MIPS32-AE: loadstore_v4i32_just_over_simm10: + + %1 = alloca <4 x i32> + %2 = alloca [2033 x i8] ; Push the frame just over 2048 bytes + + %3 = load volatile <4 x i32>* %1 + ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 2048 + ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]]) + store volatile <4 x i32> %3, <4 x i32>* %1 + ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 2048 + ; MIPS32-AE: st.w [[R1]], 0([[BASE]]) + + ret void + ; MIPS32-AE: .size loadstore_v4i32_just_over_simm10 +} + +define void @loadstore_v4i32_just_under_simm16() nounwind { + ; MIPS32-AE: loadstore_v4i32_just_under_simm16: + + %1 = alloca <4 x i32> + %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes + + %3 = load volatile <4 x i32>* %1 + ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]]) + store volatile <4 x i32> %3, <4 x i32>* %1 + ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: st.w [[R1]], 0([[BASE]]) + + ret void + ; MIPS32-AE: .size loadstore_v4i32_just_under_simm16 +} + +define void @loadstore_v4i32_just_over_simm16() nounwind { + ; MIPS32-AE: loadstore_v4i32_just_over_simm16: + + %1 = alloca <4 x i32> + %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes + + %3 = load volatile <4 x i32>* %1 + ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]]) + store volatile <4 x i32> %3, <4 x i32>* %1 + ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: st.w [[R1]], 0([[BASE]]) + + ret void + ; MIPS32-AE: .size loadstore_v4i32_just_over_simm16 +} + +define void @loadstore_v2i64_near() nounwind { + ; MIPS32-AE: loadstore_v2i64_near: + + %1 = alloca <2 x i64> + %2 = load volatile <2 x i64>* %1 + ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0($sp) + store volatile <2 x i64> %2, <2 x i64>* %1 + ; MIPS32-AE: st.d [[R1]], 0($sp) + + ret void + ; MIPS32-AE: .size loadstore_v2i64_near +} + +define void @loadstore_v2i64_unaligned() nounwind { + ; MIPS32-AE: loadstore_v2i64_unaligned: + + %1 = alloca [2 x <2 x i64>] + %2 = bitcast [2 x <2 x i64>]* %1 to i8* + %3 = getelementptr i8* %2, i32 1 + %4 = bitcast i8* %3 to [2 x <2 x i64>]* + %5 = getelementptr [2 x <2 x i64>]* %4, i32 0, i32 0 + + %6 = load volatile <2 x i64>* %5 + ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1 + ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]]) + store volatile <2 x i64> %6, <2 x i64>* %5 + ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 1 + ; MIPS32-AE: st.d [[R1]], 0([[BASE]]) + + ret void + ; MIPS32-AE: .size loadstore_v2i64_unaligned +} + +define void @loadstore_v2i64_just_under_simm10() nounwind { + ; MIPS32-AE: loadstore_v2i64_just_under_simm10: + + %1 = alloca <2 x i64> + %2 = alloca [4080 x i8] ; Push the frame right up to 4096 bytes + + %3 = load volatile <2 x i64>* %1 + ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 4080($sp) + store volatile <2 x i64> %3, <2 x i64>* %1 + ; MIPS32-AE: st.d [[R1]], 4080($sp) + + ret void + ; MIPS32-AE: .size loadstore_v2i64_just_under_simm10 +} + +define void @loadstore_v2i64_just_over_simm10() nounwind { + ; MIPS32-AE: loadstore_v2i64_just_over_simm10: + + %1 = alloca <2 x i64> + %2 = alloca [4081 x i8] ; Push the frame just over 4096 bytes + + %3 = load volatile <2 x i64>* %1 + ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 4096 + ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]]) + store volatile <2 x i64> %3, <2 x i64>* %1 + ; MIPS32-AE: addiu [[BASE:\$[0-9]+]], $sp, 4096 + ; MIPS32-AE: st.d [[R1]], 0([[BASE]]) + + ret void + ; MIPS32-AE: .size loadstore_v2i64_just_over_simm10 +} + +define void @loadstore_v2i64_just_under_simm16() nounwind { + ; MIPS32-AE: loadstore_v2i64_just_under_simm16: + + %1 = alloca <2 x i64> + %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes + + %3 = load volatile <2 x i64>* %1 + ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]]) + store volatile <2 x i64> %3, <2 x i64>* %1 + ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: st.d [[R1]], 0([[BASE]]) + + ret void + ; MIPS32-AE: .size loadstore_v2i64_just_under_simm16 +} + +define void @loadstore_v2i64_just_over_simm16() nounwind { + ; MIPS32-AE: loadstore_v2i64_just_over_simm16: + + %1 = alloca <2 x i64> + %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes + + %3 = load volatile <2 x i64>* %1 + ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]]) + store volatile <2 x i64> %3, <2 x i64>* %1 + ; MIPS32-AE: ori [[R2:\$[0-9]+]], $zero, 32768 + ; MIPS32-AE: addu [[BASE:\$[0-9]+]], $sp, [[R2]] + ; MIPS32-AE: st.d [[R1]], 0([[BASE]]) + + ret void + ; MIPS32-AE: .size loadstore_v2i64_just_over_simm16 +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/nomips16.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/nomips16.ll index bf7c667d057f..0affb16ac7c2 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/nomips16.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/nomips16.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s @x = global float 0.000000e+00, align 4 @.str = private unnamed_addr constant [20 x i8] c"in main: mips16 %f\0A\00", align 1 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/powif64_16.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/powif64_16.ll index 35a7ca9201e2..48757276bb8c 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/powif64_16.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/powif64_16.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s declare float @llvm.powi.f32(float %Val, i32 %power) declare double @llvm.powi.f64(double %Val, i32 %power) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/rotate.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/rotate.ll index 813bbdf18bbd..70eff6e224d0 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/rotate.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/rotate.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 -soft-float -mips16-hard-float < %s | FileCheck %s -check-prefix=mips16 +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 < %s | FileCheck %s -check-prefix=mips16 ; CHECK: rotrv $2, $4 ; mips16: .ent rot0 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/s2rem.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/s2rem.ll new file mode 100644 index 000000000000..9edb5be2771e --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/s2rem.ll @@ -0,0 +1,92 @@ +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC + +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC + + +@xi = common global i32 0, align 4 +@x = common global float 0.000000e+00, align 4 +@xd = common global double 0.000000e+00, align 8 + +; Function Attrs: nounwind +define void @it() #0 { +entry: + %call = call i32 @i(i32 1) + store i32 %call, i32* @xi, align 4 + ret void +; PIC: .ent it +; STATIC: .ent it +; PIC: save $16, $17, $ra, [[FS:[0-9]+]] +; STATIC: save $16, $ra, [[FS:[0-9]+]] +; PIC: restore $16, $17, $ra, [[FS]] +; STATIC: restore $16, $ra, [[FS]] +; PIC: .end it +; STATIC: .end it +} + +declare i32 @i(i32) #1 + +; Function Attrs: nounwind +define void @ft() #0 { +entry: + %call = call float @f() + store float %call, float* @x, align 4 + ret void +; PIC: .ent ft +; PIC: save $16, $17, $ra, $18, [[FS:[0-9]+]] +; PIC: restore $16, $17, $ra, $18, [[FS]] +; PIC: .end ft +} + +declare float @f() #1 + +; Function Attrs: nounwind +define void @dt() #0 { +entry: + %call = call double @d() + store double %call, double* @xd, align 8 + ret void +; PIC: .ent dt +; PIC: save $16, $17, $ra, $18, [[FS:[0-9]+]] +; PIC: restore $16, $17, $ra, $18, [[FS]] +; PIC: .end dt +} + +declare double @d() #1 + +; Function Attrs: nounwind +define void @fft() #0 { +entry: + %0 = load float* @x, align 4 + %call = call float @ff(float %0) + store float %call, float* @x, align 4 + ret void +; PIC: .ent fft +; PIC: save $16, $17, $ra, $18, [[FS:[0-9]+]] +; PIC: restore $16, $17, $ra, $18, [[FS]] +; PIC: .end fft +} + +declare float @ff(float) #1 + +; Function Attrs: nounwind +define void @vft() #0 { +entry: + %0 = load float* @x, align 4 + call void @vf(float %0) + ret void +; PIC: .ent vft +; STATIC: .ent vft +; PIC: save $16, $ra, [[FS:[0-9]+]] +; STATIC: save $16, $ra, [[FS:[0-9]+]] +; PIC: restore $16, $ra, [[FS]] +; STATIC: restore $16, $ra, [[FS]] +; PIC: .end vft +; STATIC: .end vft +} + +declare void @vf(float) #1 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/sel1c.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/sel1c.ll index 4c4784de6aa8..edd2e3e43b79 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/sel1c.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/sel1c.ll @@ -10,7 +10,7 @@ entry: %0 = load i32* @i, align 4 %1 = load i32* @j, align 4 %cmp = icmp eq i32 %0, %1 - %cond = select i1 %cmp, i32 1, i32 2 + %cond = select i1 %cmp, i32 1, i32 3 store i32 %cond, i32* @k, align 4 ret void ; cond-b-short: bteqz $BB0_{{[0-9]+}} # 16 bit inst diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/sel2c.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/sel2c.ll index 25dfaa9ba87e..4b211245f46e 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/sel2c.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/sel2c.ll @@ -10,7 +10,7 @@ entry: %0 = load i32* @i, align 4 %1 = load i32* @j, align 4 %cmp = icmp ne i32 %0, %1 - %cond = select i1 %cmp, i32 1, i32 2 + %cond = select i1 %cmp, i32 1, i32 3 store i32 %cond, i32* @k, align 4 ; cond-b-short: btnez $BB0_{{[0-9]+}} # 16 bit inst ret void diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/sr1.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/sr1.ll new file mode 100644 index 000000000000..610693d58b3f --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/sr1.ll @@ -0,0 +1,60 @@ +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s + +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=NEG + +@f = common global float 0.000000e+00, align 4 + +; Function Attrs: nounwind +define void @foo1() #0 { +entry: + %c = alloca [10 x i8], align 1 + %arraydecay = getelementptr inbounds [10 x i8]* %c, i32 0, i32 0 + call void @x(i8* %arraydecay) + %arraydecay1 = getelementptr inbounds [10 x i8]* %c, i32 0, i32 0 + call void @x(i8* %arraydecay1) + ret void +; CHECK: .ent foo1 +; CHECK: save $16, $17, $ra, [[FS:[0-9]+]] # 16 bit inst +; CHECK: restore $16, $17, $ra, [[FS]] # 16 bit inst +; CHECK: .end foo1 +} + +declare void @x(i8*) #1 + +; Function Attrs: nounwind +define void @foo2() #0 { +entry: + %c = alloca [150 x i8], align 1 + %arraydecay = getelementptr inbounds [150 x i8]* %c, i32 0, i32 0 + call void @x(i8* %arraydecay) + %arraydecay1 = getelementptr inbounds [150 x i8]* %c, i32 0, i32 0 + call void @x(i8* %arraydecay1) + ret void +; CHECK: .ent foo2 +; CHECK: save $16, $17, $ra, [[FS:[0-9]+]] +; CHECK: restore $16, $17, $ra, [[FS]] +; CHECK: .end foo2 +} + +; Function Attrs: nounwind +define void @foo3() #0 { +entry: + %call = call float @xf() + store float %call, float* @f, align 4 + ret void +; CHECK: .ent foo3 +; CHECK: save $16, $17, $ra, $18, [[FS:[0-9]+]] +; CHECK: restore $16, $17, $ra, $18, [[FS]] +; CHECK: .end foo3 +; NEG: .ent foo3 +; NEG-NOT: save $16, $17, $ra, $18, [[FS:[0-9]+]] # 16 bit inst +; NEG-NOT: restore $16, $17, $ra, $18, [[FS]] # 16 bit inst +; NEG: .end foo3 +} + +declare float @xf() #1 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/tail16.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/tail16.ll new file mode 100644 index 000000000000..4e62e557478a --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/tail16.ll @@ -0,0 +1,20 @@ +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s + +; Function Attrs: nounwind optsize +define float @h() { +entry: + %call = tail call float bitcast (float (...)* @g to float ()*)() + ret float %call +; CHECK: .ent h +; CHECK: save $16, $ra, $18, 32 +; CHECK: lw ${{[0-9]+}}, %got(__mips16_call_stub_sf_0)(${{[0-9]+}}) +; CHECK: restore $16, $ra, $18, 32 +; CHECK: .end h +} + +; Function Attrs: optsize +declare float @g(...) + + + + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/trap1.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/trap1.ll index bfcd7fed30d9..90755130e7c2 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/trap1.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Mips/trap1.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s -check-prefix=pic +; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic declare void @llvm.trap() diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/NVPTX/ldparam-v4.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/NVPTX/ldparam-v4.ll new file mode 100644 index 000000000000..ec306aafe854 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/NVPTX/ldparam-v4.ll @@ -0,0 +1,10 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +declare <4 x float> @bar() + +define void @foo(<4 x float>* %ptr) { +; CHECK: ld.param.v4.f32 + %val = tail call <4 x float> @bar() + store <4 x float> %val, <4 x float>* %ptr + ret void +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll index 3d3728dcde12..ccf52974a67b 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc -mcpu=g5 < %s | FileCheck %s ;; Formerly crashed, see PR 1508 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128" target triple = "powerpc64-apple-darwin8" diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/anon_aggr.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/anon_aggr.ll index 1525e05501ee..3bae5c6516c9 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/anon_aggr.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/anon_aggr.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -fast-isel=false < %s | FileCheck %s +; RUN: llc -O0 -mcpu=ppc64 -mtriple=powerpc64-unknown-linux-gnu -fast-isel=false < %s | FileCheck %s ; RUN: llc -O0 -mcpu=g4 -mtriple=powerpc-apple-darwin8 < %s | FileCheck -check-prefix=DARWIN32 %s ; RUN: llc -O0 -mcpu=ppc970 -mtriple=powerpc64-apple-darwin8 < %s | FileCheck -check-prefix=DARWIN64 %s @@ -119,9 +119,9 @@ unequal: ; CHECK: ld 3, -[[OFFSET1]](1) ; DARWIN32: _func3: -; DARWIN32: addi r[[REG1:[0-9]+]], r[[REGSP:[0-9]+]], 40 +; DARWIN32: addi r[[REG1:[0-9]+]], r[[REGSP:[0-9]+]], 36 ; DARWIN32: addi r[[REG2:[0-9]+]], r[[REGSP]], 24 -; DARWIN32: lwz r[[REG3:[0-9]+]], 48(r[[REGSP]]) +; DARWIN32: lwz r[[REG3:[0-9]+]], 44(r[[REGSP]]) ; DARWIN32: lwz r[[REG4:[0-9]+]], 32(r[[REGSP]]) ; DARWIN32: cmplw cr{{[0-9]+}}, r[[REG4]], r[[REG3]] ; DARWIN32: stw r[[REG3]], -[[OFFSET1:[0-9]+]] diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/coalesce-ext.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/coalesce-ext.ll index f19175c9beaa..eb7cd261b564 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/coalesce-ext.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/coalesce-ext.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=ppc64 -mtriple=powerpc64-apple-darwin < %s | FileCheck %s +; RUN: llc -march=ppc64 -mcpu=g5 -mtriple=powerpc64-apple-darwin < %s | FileCheck %s ; Check that the peephole optimizer knows about sext and zext instructions. ; CHECK: test1sext define i32 @test1sext(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/complex-return.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/complex-return.ll index 3eb30e93fd31..5ac7524ff5c7 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/complex-return.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/complex-return.ll @@ -1,4 +1,4 @@ -; RUN: llc -mcpu=pwr7 -O0 < %s | FileCheck %s +; RUN: llc -mcpu=ppc64 -O0 < %s | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/crash.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/crash.ll new file mode 100644 index 000000000000..5cecca72fdbf --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/crash.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 + +define void @test1(i1 %x, i8 %x2, i8* %x3, i64 %x4) { +entry: + %tmp3 = and i64 %x4, 16 + %bf.shl = trunc i64 %tmp3 to i8 + %bf.clear = and i8 %x2, -17 + %bf.set = or i8 %bf.shl, %bf.clear + br i1 %x, label %if.then, label %if.end + +if.then: + ret void + +if.end: + store i8 %bf.set, i8* %x3, align 4 + ret void +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/dbg.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/dbg.ll index 1088f0ff3fff..cb93decac8e9 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/dbg.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/dbg.ll @@ -15,6 +15,7 @@ entry: declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!22} !0 = metadata !{i32 720913, metadata !21, i32 12, metadata !"clang version 3.1", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !"", metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} @@ -36,3 +37,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !19 = metadata !{i32 2, i32 3, metadata !20, null} !20 = metadata !{i32 720907, metadata !21, metadata !5, i32 1, i32 34, i32 0} ; [ DW_TAG_lexical_block ] !21 = metadata !{metadata !"dbg.c", metadata !"/src"} +!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/hello-reloc.s b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/hello-reloc.s index 9bbfb3817890..1e3fb8fb0e71 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/hello-reloc.s +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/hello-reloc.s @@ -1,14 +1,10 @@ ; This tests for the basic implementation of PPCMachObjectWriter.cpp, ; which is responsible for writing mach-o relocation entries for (PIC) ; PowerPC objects. -; NOTE: Darwin PPC asm syntax is not yet supported by PPCAsmParser, -; so this test case uses ELF PPC asm syntax to produce a mach-o object. -; Once PPCAsmParser supports darwin asm syntax, this test case should -; be updated accordingly. ; RUN: llvm-mc -filetype=obj -relocation-model=pic -mcpu=g4 -triple=powerpc-apple-darwin8 %s -o - | llvm-readobj -relocations | FileCheck -check-prefix=DARWIN-G4-DUMP %s -; .machine ppc7400 + .machine ppc7400 .section __TEXT,__textcoal_nt,coalesced,pure_instructions .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 .section __TEXT,__text,regular,pure_instructions @@ -16,40 +12,40 @@ .align 4 _main: ; @main ; BB#0: ; %entry - mflr 0 - stw 31, -4(1) - stw 0, 8(1) - stwu 1, -80(1) + mflr r0 + stw r31, -4(r1) + stw r0, 8(r1) + stwu r1, -80(r1) bl L0$pb L0$pb: - mr 31, 1 - li 5, 0 + mr r31, r1 + li r5, 0 mflr 2 - stw 3, 68(31) - stw 5, 72(31) - stw 4, 64(31) - addis 2, 2, (L_.str-L0$pb)@ha - la 3, (L_.str-L0$pb)@l(2) + stw r3, 68(r31) + stw r5, 72(r31) + stw r4, 64(r31) + addis r2, r2, ha16(L_.str-L0$pb) + la r3, lo16(L_.str-L0$pb)(r2) bl L_puts$stub - li 3, 0 - addi 1, 1, 80 - lwz 0, 8(1) - lwz 31, -4(1) - mtlr 0 + li r3, 0 + addi r1, r1, 80 + lwz r0, 8(r1) + lwz r31, -4(r1) + mtlr r0 blr .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 .align 4 L_puts$stub: .indirect_symbol _puts - mflr 0 + mflr r0 bcl 20, 31, L_puts$stub$tmp L_puts$stub$tmp: - mflr 11 - addis 11, 11, (L_puts$lazy_ptr-L_puts$stub$tmp)@ha - mtlr 0 - lwzu 12, (L_puts$lazy_ptr-L_puts$stub$tmp)@l(11) - mtctr 12 + mflr r11 + addis r11, r11, ha16(L_puts$lazy_ptr-L_puts$stub$tmp) + mtlr r0 + lwzu r12, lo16(L_puts$lazy_ptr-L_puts$stub$tmp)(r11) + mtctr r12 bctr .section __DATA,__la_symbol_ptr,lazy_symbol_pointers L_puts$lazy_ptr: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/jaggedstructs.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/jaggedstructs.ll index 82d4fef10cb3..9365e581529a 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/jaggedstructs.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/jaggedstructs.ll @@ -1,4 +1,4 @@ -; RUN: llc -mcpu=pwr7 -O0 -fast-isel=false < %s | FileCheck %s +; RUN: llc -mcpu=ppc64 -O0 -fast-isel=false < %s | FileCheck %s ; This tests receiving and re-passing parameters consisting of structures ; of size 3, 5, 6, and 7. They are to be found/placed right-adjusted in diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/lsa.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/lsa.ll index 8a6338ef5a02..a892a4cf4140 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/lsa.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/lsa.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=ppc64 | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/ppc32-vacopy.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/ppc32-vacopy.ll index bc394125f135..fa540452ac28 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/ppc32-vacopy.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/ppc32-vacopy.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple="powerpc-unknown-linux-gnu" < %s | FileCheck %s +; RUN: llc -mtriple="powerpc-unknown-linux-gnu" -mcpu=ppc64 < %s | FileCheck %s ; PR15286 %va_list = type {i8, i8, i16, i8*, i8*} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/pr17168.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/pr17168.ll index 4c13a4d1086a..2848221e0764 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/pr17168.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/pr17168.ll @@ -52,7 +52,7 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"= attributes #1 = { nounwind readnone } !llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!438} +!llvm.module.flags = !{!438, !464} !0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 190311)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !298, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c] [DW_LANG_C99] !1 = metadata !{metadata !"bt.c", metadata !"/home/hfinkel/src/NPB2.3-omp-C/BT"} @@ -518,3 +518,4 @@ attributes #1 = { nounwind readnone } !461 = metadata !{i32 786443, metadata !1, metadata !462, i32 2019, i32 0, i32 147} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c] !462 = metadata !{i32 786443, metadata !1, metadata !463, i32 2018, i32 0, i32 146} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c] !463 = metadata !{i32 786443, metadata !1, metadata !114, i32 2018, i32 0, i32 145} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c] +!464 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/pwr7-gt-nop.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/pwr7-gt-nop.ll new file mode 100644 index 000000000000..8c8545d60df7 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/pwr7-gt-nop.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mcpu=pwr7 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Function Attrs: nounwind +define void @foo(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c, float* nocapture %d) #0 { + +; CHECK-LABEL: @foo + +entry: + %0 = load float* %b, align 4 + store float %0, float* %a, align 4 + %1 = load float* %c, align 4 + store float %1, float* %b, align 4 + %2 = load float* %a, align 4 + store float %2, float* %d, align 4 + ret void + +; CHECK: lfs [[REG1:[0-9]+]], 0(4) +; CHECK: stfs [[REG1]], 0(3) +; CHECK: ori 2, 2, 0 +; CHECK: lfs [[REG2:[0-9]+]], 0(5) +; CHECK: stfs [[REG2]], 0(4) +; CHECK: ori 2, 2, 0 +; CHECK: lfs [[REG3:[0-9]+]], 0(3) +; CHECK: stfs [[REG3]], 0(6) +; CHECK: blr +} + +attributes #0 = { nounwind } + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/sjlj.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/sjlj.ll index 414640b2b7e3..f9f887af31f3 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/sjlj.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/sjlj.ll @@ -134,8 +134,8 @@ return: ; preds = %if.end, %if.then ; CHECK: @main2 ; CHECK: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha -; CHECK: std 31, env_sigill@toc@l([[REG]]) -; CHECK: addi [[REGB:[0-9]+]], [[REG]], env_sigill@toc@l +; CHECK-DAG: std 31, env_sigill@toc@l([[REG]]) +; CHECK-DAG: addi [[REGB:[0-9]+]], [[REG]], env_sigill@toc@l ; CHECK-DAG: std [[REGB]], [[OFF:[0-9]+]](31) # 8-byte Folded Spill ; CHECK-DAG: std 1, 16([[REGB]]) ; CHECK-DAG: std 2, 24([[REGB]]) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/srl-mask.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/srl-mask.ll new file mode 100644 index 000000000000..2749df99fd4f --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/srl-mask.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define i64 @foo(i64 %x) #0 { +entry: +; CHECK-LABEL: @foo + %a = lshr i64 %x, 35 + %b = and i64 %a, 65535 +; CHECK: rldicl 3, 3, 29, 48 + ret i64 %b +; CHECK: blr +} + +attributes #0 = { nounwind } + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/stfiwx.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/stfiwx.ll index 1ad558c6abc9..588e44fb28d3 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/stfiwx.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/stfiwx.ll @@ -1,18 +1,27 @@ -; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=stfiwx -o %t1 -; RUN: grep stfiwx %t1 -; RUN: not grep r1 %t1 -; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-stfiwx \ -; RUN: -o %t2 -; RUN: not grep stfiwx %t2 -; RUN: grep r1 %t2 +; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=stfiwx | FileCheck %s +; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-stfiwx | FileCheck -check-prefix=CHECK-LS %s -define void @test(float %a, i32* %b) nounwind { +define void @test1(float %a, i32* %b) nounwind { +; CHECK-LABEL: @test1 +; CHECK-LS-LABEL: @test1 %tmp.2 = fptosi float %a to i32 ; [#uses=1] store i32 %tmp.2, i32* %b ret void + +; CHECK-NOT: lwz +; CHECK-NOT: stw +; CHECK: stfiwx +; CHECK: blr + +; CHECK-LS: lwz +; CHECK-LS: stw +; CHECK-LS-NOT: stfiwx +; CHECK-LS: blr } define void @test2(float %a, i32* %b, i32 %i) nounwind { +; CHECK-LABEL: @test2 +; CHECK-LS-LABEL: @test2 %tmp.2 = getelementptr i32* %b, i32 1 ; [#uses=1] %tmp.5 = getelementptr i32* %b, i32 %i ; [#uses=1] %tmp.7 = fptosi float %a to i32 ; [#uses=3] @@ -20,5 +29,15 @@ define void @test2(float %a, i32* %b, i32 %i) nounwind { store i32 %tmp.7, i32* %tmp.2 store i32 %tmp.7, i32* %b ret void + +; CHECK-NOT: lwz +; CHECK-NOT: stw +; CHECK: stfiwx +; CHECK: blr + +; CHECK-LS: lwz +; CHECK-LS: stw +; CHECK-LS-NOT: stfiwx +; CHECK-LS: blr } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/structsinmem.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/structsinmem.ll index 5b8dead16893..b5552af0eb51 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/structsinmem.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/structsinmem.ll @@ -1,4 +1,4 @@ -; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim -fast-isel=false < %s | FileCheck %s +; RUN: llc -mcpu=ppc64 -O0 -disable-fp-elim -fast-isel=false < %s | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/structsinregs.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/structsinregs.ll index fb3bd7cd57e6..cfe32e9560ae 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/structsinregs.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/structsinregs.ll @@ -1,4 +1,4 @@ -; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim -fast-isel=false < %s | FileCheck %s +; RUN: llc -mcpu=ppc64 -O0 -disable-fp-elim -fast-isel=false < %s | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/subsumes-pred-regs.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/subsumes-pred-regs.ll new file mode 100644 index 000000000000..97ac788164ab --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/subsumes-pred-regs.ll @@ -0,0 +1,65 @@ +; RUN: llc < %s -mcpu=ppc64 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Function Attrs: nounwind +define zeroext i1 @test1() unnamed_addr #0 align 2 { + +; CHECK-LABEL: @test1 + +entry: + br i1 undef, label %lor.end, label %lor.rhs + +lor.rhs: ; preds = %entry + unreachable + +lor.end: ; preds = %entry + br i1 undef, label %land.rhs, label %if.then + +if.then: ; preds = %lor.end + br i1 undef, label %return, label %if.end.i24 + +if.end.i24: ; preds = %if.then + %0 = load i32* undef, align 4 + %lnot.i.i16.i23 = icmp eq i32 %0, 0 + br i1 %lnot.i.i16.i23, label %if.end7.i37, label %test.exit27.i34 + +test.exit27.i34: ; preds = %if.end.i24 + br i1 undef, label %return, label %if.end7.i37 + +if.end7.i37: ; preds = %test.exit27.i34, %if.end.i24 + %tobool.i.i36 = icmp eq i8 undef, 0 + br i1 %tobool.i.i36, label %return, label %if.then9.i39 + +if.then9.i39: ; preds = %if.end7.i37 + br i1 %lnot.i.i16.i23, label %return, label %lor.rhs.i.i49 + +; CHECK: .LBB0_7: +; CHECK: beq 1, .LBB0_10 +; CHECK: beq 0, .LBB0_10 +; CHECK: .LBB0_9: + +lor.rhs.i.i49: ; preds = %if.then9.i39 + %cmp.i.i.i.i48 = icmp ne i64 undef, 0 + br label %return + +land.rhs: ; preds = %lor.end + br i1 undef, label %return, label %if.end.i + +if.end.i: ; preds = %land.rhs + br i1 undef, label %return, label %if.then9.i + +if.then9.i: ; preds = %if.end.i + br i1 undef, label %return, label %lor.rhs.i.i + +lor.rhs.i.i: ; preds = %if.then9.i + %cmp.i.i.i.i = icmp ne i64 undef, 0 + br label %return + +return: ; preds = %lor.rhs.i.i, %if.then9.i, %if.end.i, %land.rhs, %lor.rhs.i.i49, %if.then9.i39, %if.end7.i37, %test.exit27.i34, %if.then + %retval.0 = phi i1 [ false, %if.then ], [ false, %test.exit27.i34 ], [ true, %if.end7.i37 ], [ true, %if.then9.i39 ], [ %cmp.i.i.i.i48, %lor.rhs.i.i49 ], [ false, %land.rhs ], [ true, %if.end.i ], [ true, %if.then9.i ], [ %cmp.i.i.i.i, %lor.rhs.i.i ] + ret i1 %retval.0 +} + +attributes #0 = { nounwind } + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/tls-pic.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/tls-pic.ll new file mode 100644 index 000000000000..9f3ab6e3b491 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/tls-pic.ll @@ -0,0 +1,55 @@ +; RUN: llc -march=ppc64 -mcpu=pwr7 -O0 -relocation-model=pic < %s | FileCheck -check-prefix=OPT0 %s +; RUN: llc -march=ppc64 -mcpu=pwr7 -O1 -relocation-model=pic < %s | FileCheck -check-prefix=OPT1 %s + +target triple = "powerpc64-unknown-linux-gnu" +; Test correct assembly code generation for thread-local storage using +; the local dynamic model. + +@a = hidden thread_local global i32 0, align 4 + +define signext i32 @main() nounwind { +entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval + %0 = load i32* @a, align 4 + ret i32 %0 +} + +; OPT0-LABEL: main: +; OPT0: addis [[REG:[0-9]+]], 2, a@got@tlsld@ha +; OPT0-NEXT: addi 3, [[REG]], a@got@tlsld@l +; OPT0: bl __tls_get_addr(a@tlsld) +; OPT0-NEXT: nop +; OPT0: addis [[REG2:[0-9]+]], 3, a@dtprel@ha +; OPT0-NEXT: addi {{[0-9]+}}, [[REG2]], a@dtprel@l + +; Test peephole optimization for thread-local storage using the +; local dynamic model. + +; OPT1-LABEL: main: +; OPT1: addis [[REG:[0-9]+]], 2, a@got@tlsld@ha +; OPT1-NEXT: addi 3, [[REG]], a@got@tlsld@l +; OPT1: bl __tls_get_addr(a@tlsld) +; OPT1-NEXT: nop +; OPT1: addis [[REG2:[0-9]+]], 3, a@dtprel@ha +; OPT1-NEXT: lwa {{[0-9]+}}, a@dtprel@l([[REG2]]) + +; Test correct assembly code generation for thread-local storage using +; the general dynamic model. + +@a2 = thread_local global i32 0, align 4 + +define signext i32 @main2() nounwind { +entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval + %0 = load i32* @a2, align 4 + ret i32 %0 +} + +; OPT1-LABEL: main2 +; OPT1: addis [[REG:[0-9]+]], 2, a2@got@tlsgd@ha +; OPT1-NEXT: addi 3, [[REG]], a2@got@tlsgd@l +; OPT1: bl __tls_get_addr(a2@tlsgd) +; OPT1-NEXT: nop + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/tls.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/tls.ll index 4e0a822399dd..59b4de755988 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/tls.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/tls.ll @@ -1,7 +1,8 @@ -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" -target triple = "powerpc64-unknown-freebsd10.0" -; RUN: llc -O0 < %s -march=ppc64 | FileCheck -check-prefix=OPT0 %s -; RUN: llc -O1 < %s -march=ppc64 | FileCheck -check-prefix=OPT1 %s +; RUN: llc -O0 < %s -march=ppc64 -mcpu=ppc64 | FileCheck -check-prefix=OPT0 %s +; RUN: llc -O1 < %s -march=ppc64 -mcpu=ppc64 | FileCheck -check-prefix=OPT1 %s +; RUN: llc -O0 < %s -march=ppc32 -mcpu=ppc | FileCheck -check-prefix=OPT0-PPC32 %s + +target triple = "powerpc64-unknown-linux-gnu" @a = thread_local global i32 0, align 4 @@ -19,3 +20,27 @@ entry: store i32 42, i32* @a, align 4 ret i32 0 } + +; Test correct assembly code generation for thread-local storage +; using the initial-exec model. + +@a2 = external thread_local global i32 + +define signext i32 @main2() nounwind { +entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval + %0 = load i32* @a2, align 4 + ret i32 %0 +} + +; OPT1-LABEL: main2: +; OPT1: addis [[REG1:[0-9]+]], 2, a2@got@tprel@ha +; OPT1: ld [[REG2:[0-9]+]], a2@got@tprel@l([[REG1]]) +; OPT1: add {{[0-9]+}}, [[REG2]], a2@tls + +;OPT0-PPC32-LABEL: main2: +;OPT0-PPC32: li [[REG1:[0-9]+]], _GLOBAL_OFFSET_TABLE_@l +;OPT0-PPC32: addis [[REG1]], [[REG1]], _GLOBAL_OFFSET_TABLE_@ha +;OPT0-PPC32: lwz [[REG2:[0-9]+]], a2@got@tprel@l([[REG1]]) +;OPT0-PPC32: add 3, [[REG2]], a2@tls diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/unaligned.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/unaligned.ll index d05080338f33..d469c62f2f05 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/unaligned.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/unaligned.ll @@ -65,9 +65,9 @@ entry: ; These loads and stores are legalized into aligned loads and stores ; using aligned stack slots. ; CHECK: @foo6 -; CHECK: ld -; CHECK: ld -; CHECK: std -; CHECK: std +; CHECK-DAG: ld +; CHECK-DAG: ld +; CHECK-DAG: stdx +; CHECK: stdx } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/unwind-dw2-g.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/unwind-dw2-g.ll index f4243464241e..260d03664295 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/unwind-dw2-g.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/unwind-dw2-g.ll @@ -19,7 +19,7 @@ declare void @llvm.eh.unwind.init() #0 attributes #0 = { nounwind } !llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!8} +!llvm.module.flags = !{!8, !11} !0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/unwind-dw2.c] [DW_LANG_C99] !1 = metadata !{metadata !"/tmp/unwind-dw2.c", metadata !"/tmp"} @@ -32,3 +32,4 @@ attributes #0 = { nounwind } !8 = metadata !{i32 2, metadata !"Dwarf Version", i32 3} !9 = metadata !{i32 2, i32 0, metadata !4, null} !10 = metadata !{i32 3, i32 0, metadata !4, null} +!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/vec_cmp.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/vec_cmp.ll index 83e0e0263061..4bce8c80fc6a 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/vec_cmp.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/vec_cmp.ll @@ -1,6 +1,6 @@ ; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s -; Check vector comparisons using altivec. For non native types, just basic +; Check vector comparisons using altivec. For non-native types, just basic ; comparison instruction check is done. For altivec supported type (16i8, ; 8i16, 4i32, and 4f32) all the comparisons operators (==, !=, >, >=, <, <=) ; are checked. diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/weak_def_can_be_hidden.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/weak_def_can_be_hidden.ll new file mode 100644 index 000000000000..130d8faaf8bc --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/PowerPC/weak_def_can_be_hidden.ll @@ -0,0 +1,38 @@ +; taken from X86 version of the same test +; RUN: llc -mtriple=powerpc-apple-darwin10 -O0 < %s | FileCheck %s +; RUN: llc -mtriple=powerpc-apple-darwin9 -O0 < %s | FileCheck --check-prefix=CHECK-D89 %s +; RUN: llc -mtriple=powerpc-apple-darwin8 -O0 < %s | FileCheck --check-prefix=CHECK-D89 %s + +@v1 = linkonce_odr global i32 32 +; CHECK: .globl _v1 +; CHECK: .weak_def_can_be_hidden _v1 + +; CHECK-D89: .globl _v1 +; CHECK-D89: .weak_definition _v1 + +define i32 @f1() { + %x = load i32 * @v1 + ret i32 %x +} + +@v2 = linkonce_odr global i32 32 +; CHECK: .globl _v2 +; CHECK: .weak_definition _v2 + +; CHECK-D89: .globl _v2 +; CHECK-D89: .weak_definition _v2 + +@v3 = linkonce_odr unnamed_addr global i32 32 +; CHECK: .globl _v3 +; CHECK: .weak_def_can_be_hidden _v3 + +; CHECK-D89: .globl _v3 +; CHECK-D89: .weak_definition _v3 + +define i32* @f2() { + ret i32* @v2 +} + +define i32* @f3() { + ret i32* @v3 +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/array-ptr-calc-i32.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/array-ptr-calc-i32.ll new file mode 100644 index 000000000000..cb2a1c80a160 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/array-ptr-calc-i32.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s + +declare i32 @llvm.SI.tid() nounwind readnone +declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate + +; The required pointer calculations for the alloca'd actually requires +; an add and won't be folded into the addressing, which fails with a +; 64-bit pointer add. This should work since private pointers should +; be 32-bits. + +; SI-LABEL: @test_private_array_ptr_calc: +; SI: V_ADD_I32_e32 [[PTRREG:v[0-9]+]] +; SI: V_MOVRELD_B32_e32 {{v[0-9]+}}, [[PTRREG]] +define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) { + %alloca = alloca [4 x i32], i32 4, align 16 + %tid = call i32 @llvm.SI.tid() readnone + %a_ptr = getelementptr i32 addrspace(1)* %inA, i32 %tid + %b_ptr = getelementptr i32 addrspace(1)* %inB, i32 %tid + %a = load i32 addrspace(1)* %a_ptr + %b = load i32 addrspace(1)* %b_ptr + %result = add i32 %a, %b + %alloca_ptr = getelementptr [4 x i32]* %alloca, i32 1, i32 %b + store i32 %result, i32* %alloca_ptr, align 4 + ; Dummy call + call void @llvm.AMDGPU.barrier.local() nounwind noduplicate + %reload = load i32* %alloca_ptr, align 4 + %out_ptr = getelementptr i32 addrspace(1)* %out, i32 %tid + store i32 %reload, i32 addrspace(1)* %out_ptr, align 4 + ret void +} + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/cayman-loop-bug.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/cayman-loop-bug.ll new file mode 100644 index 000000000000..a87352895eb3 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/cayman-loop-bug.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s + +; CHECK-LABEL: @main +; CHECK: LOOP_START_DX10 +; CHECK: ALU_PUSH_BEFORE +; CHECK: LOOP_START_DX10 +; CHECK: PUSH +; CHECK-NOT: ALU_PUSH_BEFORE +; CHECK: END_LOOP +; CHECK: END_LOOP +define void @main (<4 x float> inreg %reg0) #0 { +entry: + br label %outer_loop +outer_loop: + %cnt = phi i32 [0, %entry], [%cnt_incr, %inner_loop] + %cond = icmp eq i32 %cnt, 16 + br i1 %cond, label %outer_loop_body, label %exit +outer_loop_body: + %cnt_incr = add i32 %cnt, 1 + br label %inner_loop +inner_loop: + %cnt2 = phi i32 [0, %outer_loop_body], [%cnt2_incr, %inner_loop_body] + %cond2 = icmp eq i32 %cnt2, 16 + br i1 %cond, label %inner_loop_body, label %outer_loop +inner_loop_body: + %cnt2_incr = add i32 %cnt2, 1 + br label %inner_loop +exit: + ret void +} + +attributes #0 = { "ShaderType"="0" } \ No newline at end of file diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/icmp64.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/icmp64.ll new file mode 100644 index 000000000000..c9e62ff934ee --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/icmp64.ll @@ -0,0 +1,92 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +; SI-LABEL: @test_i64_eq: +; SI: V_CMP_EQ_I64 +define void @test_i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %cmp = icmp eq i64 %a, %b + %result = sext i1 %cmp to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @test_i64_ne: +; SI: V_CMP_NE_I64 +define void @test_i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %cmp = icmp ne i64 %a, %b + %result = sext i1 %cmp to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @test_i64_slt: +; SI: V_CMP_LT_I64 +define void @test_i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %cmp = icmp slt i64 %a, %b + %result = sext i1 %cmp to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @test_i64_ult: +; SI: V_CMP_LT_U64 +define void @test_i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %cmp = icmp ult i64 %a, %b + %result = sext i1 %cmp to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @test_i64_sle: +; SI: V_CMP_LE_I64 +define void @test_i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %cmp = icmp sle i64 %a, %b + %result = sext i1 %cmp to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @test_i64_ule: +; SI: V_CMP_LE_U64 +define void @test_i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %cmp = icmp ule i64 %a, %b + %result = sext i1 %cmp to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @test_i64_sgt: +; SI: V_CMP_GT_I64 +define void @test_i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %cmp = icmp sgt i64 %a, %b + %result = sext i1 %cmp to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @test_i64_ugt: +; SI: V_CMP_GT_U64 +define void @test_i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %cmp = icmp ugt i64 %a, %b + %result = sext i1 %cmp to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @test_i64_sge: +; SI: V_CMP_GE_I64 +define void @test_i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %cmp = icmp sge i64 %a, %b + %result = sext i1 %cmp to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @test_i64_uge: +; SI: V_CMP_GE_U64 +define void @test_i64_uge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %cmp = icmp uge i64 %a, %b + %result = sext i1 %cmp to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/insert_vector_elt.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/insert_vector_elt.ll index 05aeccebac00..7e04a1f4bc87 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/insert_vector_elt.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/insert_vector_elt.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; XFAIL: * ; RUN: llc < %s -march=r600 -mcpu=redwood -o %t diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/jump-address.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/jump-address.ll index ae9c8bba4fd6..a1cd3882443a 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/jump-address.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/jump-address.ll @@ -1,6 +1,6 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -; CHECK: JUMP @3 +; CHECK: JUMP @6 ; CHECK: EXPORT ; CHECK-NOT: EXPORT diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/llvm.exp2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/llvm.exp2.ll new file mode 100644 index 000000000000..13bfbab85695 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/llvm.exp2.ll @@ -0,0 +1,26 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK +;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK + +;EG-CHECK-LABEL: @test +;EG-CHECK: EXP_IEEE * +;CM-CHECK-LABEL: @test +;CM-CHECK: EXP_IEEE T{{[0-9]+}}.X, -|T{{[0-9]+}}.X| +;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Y (MASKED), -|T{{[0-9]+}}.X| +;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Z (MASKED), -|T{{[0-9]+}}.X| +;CM-CHECK: EXP_IEEE * T{{[0-9]+}}.W (MASKED), -|T{{[0-9]+}}.X| + +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = call float @llvm.fabs.f32(float %r0) + %r2 = fsub float -0.000000e+00, %r1 + %r3 = call float @llvm.exp2.f32(float %r2) + %vec = insertelement <4 x float> undef, float %r3, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) + ret void +} + +declare float @llvm.exp2.f32(float) readnone +declare float @llvm.fabs.f32(float) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="0" } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/llvm.trunc.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/llvm.trunc.ll new file mode 100644 index 000000000000..fa6fb9906dde --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/llvm.trunc.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK-LABEL: @trunc_f32 +; CHECK: TRUNC + +define void @trunc_f32(float addrspace(1)* %out, float %in) { +entry: + %0 = call float @llvm.trunc.f32(float %in) + store float %0, float addrspace(1)* %out + ret void +} + +declare float @llvm.trunc.f32(float) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/load64.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/load64.ll index e351e4135a7d..a117557e98e0 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/load64.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/load64.ll @@ -1,18 +1,28 @@ ; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s ; load a f64 value from the global address space. -; CHECK: @load_f64 +; CHECK-LABEL: @load_f64: ; CHECK: BUFFER_LOAD_DWORDX2 v[{{[0-9]+:[0-9]+}}] +; CHECK: BUFFER_STORE_DWORDX2 v[{{[0-9]+:[0-9]+}}] define void @load_f64(double addrspace(1)* %out, double addrspace(1)* %in) { -entry: - %0 = load double addrspace(1)* %in - store double %0, double addrspace(1)* %out + %1 = load double addrspace(1)* %in + store double %1, double addrspace(1)* %out + ret void +} + +; CHECK-LABEL: @load_i64: +; CHECK: BUFFER_LOAD_DWORDX2 v[{{[0-9]+:[0-9]+}}] +; CHECK: BUFFER_STORE_DWORDX2 v[{{[0-9]+:[0-9]+}}] +define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { + %tmp = load i64 addrspace(1)* %in + store i64 %tmp, i64 addrspace(1)* %out, align 8 ret void } ; Load a f64 value from the constant address space. -; CHECK: @load_const_addrspace_f64 +; CHECK-LABEL: @load_const_addrspace_f64: ; CHECK: S_LOAD_DWORDX2 s[{{[0-9]+:[0-9]+}}] +; CHECK: BUFFER_STORE_DWORDX2 v[{{[0-9]+:[0-9]+}}] define void @load_const_addrspace_f64(double addrspace(1)* %out, double addrspace(2)* %in) { %1 = load double addrspace(2)* %in store double %1, double addrspace(1)* %out diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll new file mode 100644 index 000000000000..c89398f00662 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll @@ -0,0 +1,59 @@ +;RUN: llc < %s -march=r600 -mcpu=cayman +;REQUIRES: asserts + +define void @main(<4 x float> inreg, <4 x float> inreg) #0 { +main_body: + %2 = extractelement <4 x float> %0, i32 0 + %3 = extractelement <4 x float> %0, i32 1 + %4 = extractelement <4 x float> %0, i32 2 + %5 = extractelement <4 x float> %0, i32 3 + %6 = insertelement <4 x float> undef, float %2, i32 0 + %7 = insertelement <4 x float> %6, float %3, i32 1 + %8 = insertelement <4 x float> %7, float %4, i32 2 + %9 = insertelement <4 x float> %8, float %5, i32 3 + %10 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %9) + %11 = extractelement <4 x float> %10, i32 0 + %12 = extractelement <4 x float> %10, i32 1 + %13 = extractelement <4 x float> %10, i32 2 + %14 = extractelement <4 x float> %10, i32 3 + %15 = call float @fabs(float %13) + %16 = fdiv float 1.000000e+00, %15 + %17 = fmul float %11, %16 + %18 = fadd float %17, 1.500000e+00 + %19 = fmul float %12, %16 + %20 = fadd float %19, 1.500000e+00 + %21 = insertelement <4 x float> undef, float %20, i32 0 + %22 = insertelement <4 x float> %21, float %18, i32 1 + %23 = insertelement <4 x float> %22, float %14, i32 2 + %24 = insertelement <4 x float> %23, float %5, i32 3 + %25 = extractelement <4 x float> %24, i32 0 + %26 = extractelement <4 x float> %24, i32 1 + %27 = extractelement <4 x float> %24, i32 2 + %28 = extractelement <4 x float> %24, i32 3 + %29 = insertelement <4 x float> undef, float %25, i32 0 + %30 = insertelement <4 x float> %29, float %26, i32 1 + %31 = insertelement <4 x float> %30, float %27, i32 2 + %32 = insertelement <4 x float> %31, float %28, i32 3 + %33 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %32, i32 16, i32 0, i32 13) + %34 = extractelement <4 x float> %33, i32 0 + %35 = insertelement <4 x float> undef, float %34, i32 0 + %36 = insertelement <4 x float> %35, float %34, i32 1 + %37 = insertelement <4 x float> %36, float %34, i32 2 + %38 = insertelement <4 x float> %37, float 1.000000e+00, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %38, i32 0, i32 0) + ret void +} + +; Function Attrs: readnone +declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #1 + +; Function Attrs: readnone +declare float @fabs(float) #1 + +; Function Attrs: readnone +declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #1 + +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="0" } +attributes #1 = { readnone } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/register-count-comments.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/register-count-comments.ll new file mode 100644 index 000000000000..a64b2804bde9 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/register-count-comments.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s + +declare i32 @llvm.SI.tid() nounwind readnone + +; SI-LABEL: @foo: +; SI: .section .AMDGPU.csdata +; SI: ; Kernel info: +; SI: ; NumSgprs: {{[0-9]+}} +; SI: ; NumVgprs: {{[0-9]+}} +define void @foo(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %abase, i32 addrspace(1)* %bbase) nounwind { + %tid = call i32 @llvm.SI.tid() nounwind readnone + %aptr = getelementptr i32 addrspace(1)* %abase, i32 %tid + %bptr = getelementptr i32 addrspace(1)* %bbase, i32 %tid + %outptr = getelementptr i32 addrspace(1)* %out, i32 %tid + %a = load i32 addrspace(1)* %aptr, align 4 + %b = load i32 addrspace(1)* %bptr, align 4 + %result = add i32 %a, %b + store i32 %result, i32 addrspace(1)* %outptr, align 4 + ret void +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/si-annotate-cf-assertion.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/si-annotate-cf-assertion.ll index 9886fe9169bb..cd3ba2b222d0 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/si-annotate-cf-assertion.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/si-annotate-cf-assertion.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; XFAIL: * ; RUN: llc -march=r600 -mcpu=SI -asm-verbose=false < %s | FileCheck %s diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/si-sgpr-spill.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/si-sgpr-spill.ll index 05c5e31f3fad..b34a757d9b65 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/si-sgpr-spill.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/si-sgpr-spill.ll @@ -1,8 +1,5 @@ ; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s -; XXX: Enable when spilling is supported -; XFAIL: * - ; These tests check that the compiler won't crash when it needs to spill ; SGPRs. @@ -690,3 +687,880 @@ attributes #3 = { readonly } attributes #4 = { nounwind readonly } !0 = metadata !{metadata !"const", null, i32 1} + +; CHECK-LABEL: @main1 +; CHECK: S_ENDPGM +define void @main1([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { +main_body: + %21 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0 + %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 + %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 0) + %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 4) + %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 8) + %26 = call float @llvm.SI.load.const(<16 x i8> %22, i32 12) + %27 = call float @llvm.SI.load.const(<16 x i8> %22, i32 28) + %28 = call float @llvm.SI.load.const(<16 x i8> %22, i32 48) + %29 = call float @llvm.SI.load.const(<16 x i8> %22, i32 52) + %30 = call float @llvm.SI.load.const(<16 x i8> %22, i32 56) + %31 = call float @llvm.SI.load.const(<16 x i8> %22, i32 64) + %32 = call float @llvm.SI.load.const(<16 x i8> %22, i32 68) + %33 = call float @llvm.SI.load.const(<16 x i8> %22, i32 72) + %34 = call float @llvm.SI.load.const(<16 x i8> %22, i32 76) + %35 = call float @llvm.SI.load.const(<16 x i8> %22, i32 128) + %36 = call float @llvm.SI.load.const(<16 x i8> %22, i32 132) + %37 = call float @llvm.SI.load.const(<16 x i8> %22, i32 144) + %38 = call float @llvm.SI.load.const(<16 x i8> %22, i32 148) + %39 = call float @llvm.SI.load.const(<16 x i8> %22, i32 152) + %40 = call float @llvm.SI.load.const(<16 x i8> %22, i32 160) + %41 = call float @llvm.SI.load.const(<16 x i8> %22, i32 164) + %42 = call float @llvm.SI.load.const(<16 x i8> %22, i32 168) + %43 = call float @llvm.SI.load.const(<16 x i8> %22, i32 172) + %44 = call float @llvm.SI.load.const(<16 x i8> %22, i32 176) + %45 = call float @llvm.SI.load.const(<16 x i8> %22, i32 180) + %46 = call float @llvm.SI.load.const(<16 x i8> %22, i32 184) + %47 = call float @llvm.SI.load.const(<16 x i8> %22, i32 192) + %48 = call float @llvm.SI.load.const(<16 x i8> %22, i32 196) + %49 = call float @llvm.SI.load.const(<16 x i8> %22, i32 200) + %50 = call float @llvm.SI.load.const(<16 x i8> %22, i32 208) + %51 = call float @llvm.SI.load.const(<16 x i8> %22, i32 212) + %52 = call float @llvm.SI.load.const(<16 x i8> %22, i32 216) + %53 = call float @llvm.SI.load.const(<16 x i8> %22, i32 220) + %54 = call float @llvm.SI.load.const(<16 x i8> %22, i32 236) + %55 = call float @llvm.SI.load.const(<16 x i8> %22, i32 240) + %56 = call float @llvm.SI.load.const(<16 x i8> %22, i32 244) + %57 = call float @llvm.SI.load.const(<16 x i8> %22, i32 248) + %58 = call float @llvm.SI.load.const(<16 x i8> %22, i32 252) + %59 = call float @llvm.SI.load.const(<16 x i8> %22, i32 256) + %60 = call float @llvm.SI.load.const(<16 x i8> %22, i32 260) + %61 = call float @llvm.SI.load.const(<16 x i8> %22, i32 264) + %62 = call float @llvm.SI.load.const(<16 x i8> %22, i32 268) + %63 = call float @llvm.SI.load.const(<16 x i8> %22, i32 272) + %64 = call float @llvm.SI.load.const(<16 x i8> %22, i32 276) + %65 = call float @llvm.SI.load.const(<16 x i8> %22, i32 280) + %66 = call float @llvm.SI.load.const(<16 x i8> %22, i32 284) + %67 = call float @llvm.SI.load.const(<16 x i8> %22, i32 288) + %68 = call float @llvm.SI.load.const(<16 x i8> %22, i32 292) + %69 = call float @llvm.SI.load.const(<16 x i8> %22, i32 464) + %70 = call float @llvm.SI.load.const(<16 x i8> %22, i32 468) + %71 = call float @llvm.SI.load.const(<16 x i8> %22, i32 472) + %72 = call float @llvm.SI.load.const(<16 x i8> %22, i32 496) + %73 = call float @llvm.SI.load.const(<16 x i8> %22, i32 500) + %74 = call float @llvm.SI.load.const(<16 x i8> %22, i32 504) + %75 = call float @llvm.SI.load.const(<16 x i8> %22, i32 512) + %76 = call float @llvm.SI.load.const(<16 x i8> %22, i32 516) + %77 = call float @llvm.SI.load.const(<16 x i8> %22, i32 524) + %78 = call float @llvm.SI.load.const(<16 x i8> %22, i32 532) + %79 = call float @llvm.SI.load.const(<16 x i8> %22, i32 536) + %80 = call float @llvm.SI.load.const(<16 x i8> %22, i32 540) + %81 = call float @llvm.SI.load.const(<16 x i8> %22, i32 544) + %82 = call float @llvm.SI.load.const(<16 x i8> %22, i32 548) + %83 = call float @llvm.SI.load.const(<16 x i8> %22, i32 552) + %84 = call float @llvm.SI.load.const(<16 x i8> %22, i32 556) + %85 = call float @llvm.SI.load.const(<16 x i8> %22, i32 560) + %86 = call float @llvm.SI.load.const(<16 x i8> %22, i32 564) + %87 = call float @llvm.SI.load.const(<16 x i8> %22, i32 568) + %88 = call float @llvm.SI.load.const(<16 x i8> %22, i32 572) + %89 = call float @llvm.SI.load.const(<16 x i8> %22, i32 576) + %90 = call float @llvm.SI.load.const(<16 x i8> %22, i32 580) + %91 = call float @llvm.SI.load.const(<16 x i8> %22, i32 584) + %92 = call float @llvm.SI.load.const(<16 x i8> %22, i32 588) + %93 = call float @llvm.SI.load.const(<16 x i8> %22, i32 592) + %94 = call float @llvm.SI.load.const(<16 x i8> %22, i32 596) + %95 = call float @llvm.SI.load.const(<16 x i8> %22, i32 600) + %96 = call float @llvm.SI.load.const(<16 x i8> %22, i32 604) + %97 = call float @llvm.SI.load.const(<16 x i8> %22, i32 608) + %98 = call float @llvm.SI.load.const(<16 x i8> %22, i32 612) + %99 = call float @llvm.SI.load.const(<16 x i8> %22, i32 616) + %100 = call float @llvm.SI.load.const(<16 x i8> %22, i32 624) + %101 = call float @llvm.SI.load.const(<16 x i8> %22, i32 628) + %102 = call float @llvm.SI.load.const(<16 x i8> %22, i32 632) + %103 = call float @llvm.SI.load.const(<16 x i8> %22, i32 636) + %104 = call float @llvm.SI.load.const(<16 x i8> %22, i32 640) + %105 = call float @llvm.SI.load.const(<16 x i8> %22, i32 644) + %106 = call float @llvm.SI.load.const(<16 x i8> %22, i32 648) + %107 = call float @llvm.SI.load.const(<16 x i8> %22, i32 652) + %108 = call float @llvm.SI.load.const(<16 x i8> %22, i32 656) + %109 = call float @llvm.SI.load.const(<16 x i8> %22, i32 660) + %110 = call float @llvm.SI.load.const(<16 x i8> %22, i32 664) + %111 = call float @llvm.SI.load.const(<16 x i8> %22, i32 668) + %112 = call float @llvm.SI.load.const(<16 x i8> %22, i32 672) + %113 = call float @llvm.SI.load.const(<16 x i8> %22, i32 676) + %114 = call float @llvm.SI.load.const(<16 x i8> %22, i32 680) + %115 = call float @llvm.SI.load.const(<16 x i8> %22, i32 684) + %116 = call float @llvm.SI.load.const(<16 x i8> %22, i32 688) + %117 = call float @llvm.SI.load.const(<16 x i8> %22, i32 692) + %118 = call float @llvm.SI.load.const(<16 x i8> %22, i32 696) + %119 = call float @llvm.SI.load.const(<16 x i8> %22, i32 700) + %120 = call float @llvm.SI.load.const(<16 x i8> %22, i32 704) + %121 = call float @llvm.SI.load.const(<16 x i8> %22, i32 708) + %122 = call float @llvm.SI.load.const(<16 x i8> %22, i32 712) + %123 = call float @llvm.SI.load.const(<16 x i8> %22, i32 716) + %124 = call float @llvm.SI.load.const(<16 x i8> %22, i32 864) + %125 = call float @llvm.SI.load.const(<16 x i8> %22, i32 868) + %126 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0 + %127 = load <32 x i8> addrspace(2)* %126, !tbaa !0 + %128 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0 + %129 = load <16 x i8> addrspace(2)* %128, !tbaa !0 + %130 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1 + %131 = load <32 x i8> addrspace(2)* %130, !tbaa !0 + %132 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1 + %133 = load <16 x i8> addrspace(2)* %132, !tbaa !0 + %134 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2 + %135 = load <32 x i8> addrspace(2)* %134, !tbaa !0 + %136 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2 + %137 = load <16 x i8> addrspace(2)* %136, !tbaa !0 + %138 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 3 + %139 = load <32 x i8> addrspace(2)* %138, !tbaa !0 + %140 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 3 + %141 = load <16 x i8> addrspace(2)* %140, !tbaa !0 + %142 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 4 + %143 = load <32 x i8> addrspace(2)* %142, !tbaa !0 + %144 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 4 + %145 = load <16 x i8> addrspace(2)* %144, !tbaa !0 + %146 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 5 + %147 = load <32 x i8> addrspace(2)* %146, !tbaa !0 + %148 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 5 + %149 = load <16 x i8> addrspace(2)* %148, !tbaa !0 + %150 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 6 + %151 = load <32 x i8> addrspace(2)* %150, !tbaa !0 + %152 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 6 + %153 = load <16 x i8> addrspace(2)* %152, !tbaa !0 + %154 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 7 + %155 = load <32 x i8> addrspace(2)* %154, !tbaa !0 + %156 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 7 + %157 = load <16 x i8> addrspace(2)* %156, !tbaa !0 + %158 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 8 + %159 = load <32 x i8> addrspace(2)* %158, !tbaa !0 + %160 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 8 + %161 = load <16 x i8> addrspace(2)* %160, !tbaa !0 + %162 = fcmp ugt float %17, 0.000000e+00 + %163 = select i1 %162, float 1.000000e+00, float 0.000000e+00 + %164 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %4, <2 x i32> %6) + %165 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %4, <2 x i32> %6) + %166 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %4, <2 x i32> %6) + %167 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %4, <2 x i32> %6) + %168 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %4, <2 x i32> %6) + %169 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %4, <2 x i32> %6) + %170 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %4, <2 x i32> %6) + %171 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %4, <2 x i32> %6) + %172 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %4, <2 x i32> %6) + %173 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %4, <2 x i32> %6) + %174 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %4, <2 x i32> %6) + %175 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %4, <2 x i32> %6) + %176 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %4, <2 x i32> %6) + %177 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %4, <2 x i32> %6) + %178 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %4, <2 x i32> %6) + %179 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %4, <2 x i32> %6) + %180 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %4, <2 x i32> %6) + %181 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %4, <2 x i32> %6) + %182 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %4, <2 x i32> %6) + %183 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %4, <2 x i32> %6) + %184 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %4, <2 x i32> %6) + %185 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %4, <2 x i32> %6) + %186 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %4, <2 x i32> %6) + %187 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %4, <2 x i32> %6) + %188 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %4, <2 x i32> %6) + %189 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %4, <2 x i32> %6) + %190 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %4, <2 x i32> %6) + %191 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %4, <2 x i32> %6) + %192 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %4, <2 x i32> %6) + %193 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %4, <2 x i32> %6) + %194 = call float @llvm.SI.fs.interp(i32 2, i32 7, i32 %4, <2 x i32> %6) + %195 = call float @llvm.SI.fs.interp(i32 3, i32 7, i32 %4, <2 x i32> %6) + %196 = fmul float %14, %124 + %197 = fadd float %196, %125 + %198 = call float @llvm.AMDIL.clamp.(float %163, float 0.000000e+00, float 1.000000e+00) + %199 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) + %200 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) + %201 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00) + %202 = bitcast float %198 to i32 + %203 = icmp ne i32 %202, 0 + %. = select i1 %203, float -1.000000e+00, float 1.000000e+00 + %204 = fsub float -0.000000e+00, %164 + %205 = fadd float %44, %204 + %206 = fsub float -0.000000e+00, %165 + %207 = fadd float %45, %206 + %208 = fsub float -0.000000e+00, %166 + %209 = fadd float %46, %208 + %210 = fmul float %205, %205 + %211 = fmul float %207, %207 + %212 = fadd float %211, %210 + %213 = fmul float %209, %209 + %214 = fadd float %212, %213 + %215 = call float @llvm.AMDGPU.rsq(float %214) + %216 = fmul float %205, %215 + %217 = fmul float %207, %215 + %218 = fmul float %209, %215 + %219 = fmul float %., %54 + %220 = fmul float %13, %47 + %221 = fmul float %197, %48 + %222 = bitcast float %174 to i32 + %223 = bitcast float %175 to i32 + %224 = insertelement <2 x i32> undef, i32 %222, i32 0 + %225 = insertelement <2 x i32> %224, i32 %223, i32 1 + %226 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %225, <32 x i8> %131, <16 x i8> %133, i32 2) + %227 = extractelement <4 x float> %226, i32 0 + %228 = extractelement <4 x float> %226, i32 1 + %229 = extractelement <4 x float> %226, i32 2 + %230 = extractelement <4 x float> %226, i32 3 + %231 = fmul float %227, 0x4012611180000000 + %232 = fmul float %228, 0x4012611180000000 + %233 = fmul float %229, 0x4012611180000000 + %234 = call float @llvm.AMDGPU.lrp(float %27, float %231, float 1.000000e+00) + %235 = call float @llvm.AMDGPU.lrp(float %27, float %232, float 1.000000e+00) + %236 = call float @llvm.AMDGPU.lrp(float %27, float %233, float 1.000000e+00) + %237 = fmul float %216, %184 + %238 = fmul float %217, %185 + %239 = fadd float %238, %237 + %240 = fmul float %218, %186 + %241 = fadd float %239, %240 + %242 = fmul float %216, %187 + %243 = fmul float %217, %188 + %244 = fadd float %243, %242 + %245 = fmul float %218, %189 + %246 = fadd float %244, %245 + %247 = fmul float %216, %190 + %248 = fmul float %217, %191 + %249 = fadd float %248, %247 + %250 = fmul float %218, %192 + %251 = fadd float %249, %250 + %252 = call float @llvm.AMDIL.clamp.(float %251, float 0.000000e+00, float 1.000000e+00) + %253 = fmul float %214, 0x3F5A36E2E0000000 + %254 = call float @llvm.AMDIL.clamp.(float %253, float 0.000000e+00, float 1.000000e+00) + %255 = fsub float -0.000000e+00, %254 + %256 = fadd float 1.000000e+00, %255 + %257 = call float @llvm.pow.f32(float %252, float 2.500000e-01) + %258 = fmul float %39, %257 + %259 = fmul float %241, %258 + %260 = fmul float %246, %258 + %261 = fmul float %259, %230 + %262 = fmul float %260, %230 + %263 = fadd float %252, 0x3EE4F8B580000000 + %264 = fsub float -0.000000e+00, %252 + %265 = fadd float 1.000000e+00, %264 + %266 = fmul float 1.200000e+01, %265 + %267 = fadd float %266, 4.000000e+00 + %268 = fsub float -0.000000e+00, %267 + %269 = fmul float %268, %263 + %270 = fsub float -0.000000e+00, %267 + %271 = fmul float %270, %263 + %272 = fsub float -0.000000e+00, %267 + %273 = fmul float %272, %263 + %274 = fdiv float 1.000000e+00, %269 + %275 = fdiv float 1.000000e+00, %271 + %276 = fdiv float 1.000000e+00, %273 + %277 = fmul float %261, %274 + %278 = fmul float %262, %275 + %279 = fmul float %263, %276 + br label %LOOP + +LOOP: ; preds = %LOOP, %main_body + %temp144.0 = phi float [ 1.000000e+00, %main_body ], [ %292, %LOOP ] + %temp168.0 = phi float [ %176, %main_body ], [ %288, %LOOP ] + %temp169.0 = phi float [ %177, %main_body ], [ %289, %LOOP ] + %temp170.0 = phi float [ %256, %main_body ], [ %290, %LOOP ] + %280 = bitcast float %temp168.0 to i32 + %281 = bitcast float %temp169.0 to i32 + %282 = insertelement <4 x i32> undef, i32 %280, i32 0 + %283 = insertelement <4 x i32> %282, i32 %281, i32 1 + %284 = insertelement <4 x i32> %283, i32 0, i32 2 + %285 = insertelement <4 x i32> %284, i32 undef, i32 3 + %286 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %285, <32 x i8> %147, <16 x i8> %149, i32 2) + %287 = extractelement <4 x float> %286, i32 3 + %288 = fadd float %temp168.0, %277 + %289 = fadd float %temp169.0, %278 + %290 = fadd float %temp170.0, %279 + %291 = fsub float -0.000000e+00, %287 + %292 = fadd float %290, %291 + %293 = fcmp oge float 0.000000e+00, %292 + %294 = sext i1 %293 to i32 + %295 = bitcast i32 %294 to float + %296 = bitcast float %295 to i32 + %297 = icmp ne i32 %296, 0 + br i1 %297, label %IF189, label %LOOP + +IF189: ; preds = %LOOP + %298 = extractelement <4 x float> %286, i32 0 + %299 = extractelement <4 x float> %286, i32 1 + %300 = extractelement <4 x float> %286, i32 2 + %301 = fsub float -0.000000e+00, %292 + %302 = fadd float %temp144.0, %301 + %303 = fdiv float 1.000000e+00, %302 + %304 = fmul float %292, %303 + %305 = fadd float %304, -1.000000e+00 + %306 = fmul float %305, %277 + %307 = fadd float %306, %288 + %308 = fmul float %305, %278 + %309 = fadd float %308, %289 + %310 = fsub float -0.000000e+00, %176 + %311 = fadd float %307, %310 + %312 = fsub float -0.000000e+00, %177 + %313 = fadd float %309, %312 + %314 = fadd float %176, %311 + %315 = fadd float %177, %313 + %316 = fmul float %311, %67 + %317 = fmul float %313, %68 + %318 = fmul float %316, %55 + %319 = fmul float %316, %56 + %320 = fmul float %317, %57 + %321 = fadd float %320, %318 + %322 = fmul float %317, %58 + %323 = fadd float %322, %319 + %324 = fadd float %178, %321 + %325 = fadd float %179, %323 + %326 = fmul float %316, %59 + %327 = fmul float %316, %60 + %328 = fmul float %316, %61 + %329 = fmul float %316, %62 + %330 = fmul float %317, %63 + %331 = fadd float %330, %326 + %332 = fmul float %317, %64 + %333 = fadd float %332, %327 + %334 = fmul float %317, %65 + %335 = fadd float %334, %328 + %336 = fmul float %317, %66 + %337 = fadd float %336, %329 + %338 = fadd float %168, %331 + %339 = fadd float %169, %333 + %340 = fadd float %170, %335 + %341 = fadd float %171, %337 + %342 = bitcast float %338 to i32 + %343 = bitcast float %339 to i32 + %344 = insertelement <2 x i32> undef, i32 %342, i32 0 + %345 = insertelement <2 x i32> %344, i32 %343, i32 1 + %346 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %345, <32 x i8> %135, <16 x i8> %137, i32 2) + %347 = extractelement <4 x float> %346, i32 0 + %348 = extractelement <4 x float> %346, i32 1 + %349 = extractelement <4 x float> %346, i32 2 + %350 = extractelement <4 x float> %346, i32 3 + %351 = fmul float %347, %23 + %352 = fmul float %348, %24 + %353 = fmul float %349, %25 + %354 = fmul float %350, %26 + %355 = fmul float %351, %180 + %356 = fmul float %352, %181 + %357 = fmul float %353, %182 + %358 = fmul float %354, %183 + %359 = fsub float -0.000000e+00, %350 + %360 = fadd float 1.000000e+00, %359 + %361 = fmul float %360, %49 + %362 = call float @llvm.AMDGPU.lrp(float %361, float %347, float %355) + %363 = call float @llvm.AMDGPU.lrp(float %361, float %348, float %356) + %364 = call float @llvm.AMDGPU.lrp(float %361, float %349, float %357) + %365 = bitcast float %340 to i32 + %366 = bitcast float %341 to i32 + %367 = insertelement <2 x i32> undef, i32 %365, i32 0 + %368 = insertelement <2 x i32> %367, i32 %366, i32 1 + %369 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %368, <32 x i8> %151, <16 x i8> %153, i32 2) + %370 = extractelement <4 x float> %369, i32 2 + %371 = fmul float %362, %234 + %372 = fmul float %363, %235 + %373 = fmul float %364, %236 + %374 = fmul float %358, %230 + %375 = bitcast float %314 to i32 + %376 = bitcast float %315 to i32 + %377 = insertelement <2 x i32> undef, i32 %375, i32 0 + %378 = insertelement <2 x i32> %377, i32 %376, i32 1 + %379 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %378, <32 x i8> %139, <16 x i8> %141, i32 2) + %380 = extractelement <4 x float> %379, i32 0 + %381 = extractelement <4 x float> %379, i32 1 + %382 = extractelement <4 x float> %379, i32 2 + %383 = extractelement <4 x float> %379, i32 3 + %384 = fcmp olt float 0.000000e+00, %382 + %385 = sext i1 %384 to i32 + %386 = bitcast i32 %385 to float + %387 = bitcast float %386 to i32 + %388 = icmp ne i32 %387, 0 + %.224 = select i1 %388, float %381, float %380 + %.225 = select i1 %388, float %383, float %381 + %389 = bitcast float %324 to i32 + %390 = bitcast float %325 to i32 + %391 = insertelement <2 x i32> undef, i32 %389, i32 0 + %392 = insertelement <2 x i32> %391, i32 %390, i32 1 + %393 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %392, <32 x i8> %143, <16 x i8> %145, i32 2) + %394 = extractelement <4 x float> %393, i32 0 + %395 = extractelement <4 x float> %393, i32 1 + %396 = extractelement <4 x float> %393, i32 2 + %397 = extractelement <4 x float> %393, i32 3 + %398 = fcmp olt float 0.000000e+00, %396 + %399 = sext i1 %398 to i32 + %400 = bitcast i32 %399 to float + %401 = bitcast float %400 to i32 + %402 = icmp ne i32 %401, 0 + %temp112.1 = select i1 %402, float %395, float %394 + %temp113.1 = select i1 %402, float %397, float %395 + %403 = fmul float %.224, 2.000000e+00 + %404 = fadd float %403, -1.000000e+00 + %405 = fmul float %.225, 2.000000e+00 + %406 = fadd float %405, -1.000000e+00 + %407 = fmul float %temp112.1, 2.000000e+00 + %408 = fadd float %407, -1.000000e+00 + %409 = fmul float %temp113.1, 2.000000e+00 + %410 = fadd float %409, -1.000000e+00 + %411 = fsub float -0.000000e+00, %404 + %412 = fmul float %411, %35 + %413 = fsub float -0.000000e+00, %406 + %414 = fmul float %413, %35 + %415 = fsub float -0.000000e+00, %408 + %416 = fmul float %415, %36 + %417 = fsub float -0.000000e+00, %410 + %418 = fmul float %417, %36 + %419 = fmul float %416, %370 + %420 = fmul float %418, %370 + %421 = call float @fabs(float %412) + %422 = call float @fabs(float %414) + %423 = fsub float -0.000000e+00, %421 + %424 = fadd float 1.000000e+00, %423 + %425 = fsub float -0.000000e+00, %422 + %426 = fadd float 1.000000e+00, %425 + %427 = fmul float %424, %419 + %428 = fadd float %427, %412 + %429 = fmul float %426, %420 + %430 = fadd float %429, %414 + %431 = fmul float %428, %428 + %432 = fmul float %430, %430 + %433 = fadd float %431, %432 + %434 = fsub float -0.000000e+00, %433 + %435 = fadd float 0x3FF00068E0000000, %434 + %436 = call float @llvm.AMDIL.clamp.(float %435, float 0.000000e+00, float 1.000000e+00) + %437 = call float @llvm.AMDGPU.rsq(float %436) + %438 = fmul float %437, %436 + %439 = fsub float -0.000000e+00, %436 + %440 = call float @llvm.AMDGPU.cndlt(float %439, float %438, float 0.000000e+00) + %441 = fmul float %184, %428 + %442 = fmul float %185, %428 + %443 = fmul float %186, %428 + %444 = fmul float %187, %430 + %445 = fadd float %444, %441 + %446 = fmul float %188, %430 + %447 = fadd float %446, %442 + %448 = fmul float %189, %430 + %449 = fadd float %448, %443 + %450 = fmul float %190, %440 + %451 = fadd float %450, %445 + %452 = fmul float %191, %440 + %453 = fadd float %452, %447 + %454 = fmul float %192, %440 + %455 = fadd float %454, %449 + %456 = fmul float %451, %451 + %457 = fmul float %453, %453 + %458 = fadd float %457, %456 + %459 = fmul float %455, %455 + %460 = fadd float %458, %459 + %461 = call float @llvm.AMDGPU.rsq(float %460) + %462 = fmul float %451, %461 + %463 = fmul float %453, %461 + %464 = fmul float %455, %461 + %465 = fcmp olt float 0.000000e+00, %219 + %466 = sext i1 %465 to i32 + %467 = bitcast i32 %466 to float + %468 = bitcast float %467 to i32 + %469 = icmp ne i32 %468, 0 + br i1 %469, label %IF198, label %ENDIF197 + +IF198: ; preds = %IF189 + %470 = fsub float -0.000000e+00, %462 + %471 = fsub float -0.000000e+00, %463 + %472 = fsub float -0.000000e+00, %464 + br label %ENDIF197 + +ENDIF197: ; preds = %IF189, %IF198 + %temp14.0 = phi float [ %472, %IF198 ], [ %464, %IF189 ] + %temp13.0 = phi float [ %471, %IF198 ], [ %463, %IF189 ] + %temp12.0 = phi float [ %470, %IF198 ], [ %462, %IF189 ] + %473 = bitcast float %220 to i32 + %474 = bitcast float %221 to i32 + %475 = insertelement <2 x i32> undef, i32 %473, i32 0 + %476 = insertelement <2 x i32> %475, i32 %474, i32 1 + %477 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %476, <32 x i8> %159, <16 x i8> %161, i32 2) + %478 = extractelement <4 x float> %477, i32 0 + %479 = extractelement <4 x float> %477, i32 1 + %480 = extractelement <4 x float> %477, i32 2 + %481 = extractelement <4 x float> %477, i32 3 + %482 = fmul float %478, %40 + %483 = fadd float %482, %41 + %484 = fmul float %479, %40 + %485 = fadd float %484, %41 + %486 = fmul float %480, %40 + %487 = fadd float %486, %41 + %488 = fmul float %481, %42 + %489 = fadd float %488, %43 + %490 = bitcast float %172 to i32 + %491 = bitcast float %173 to i32 + %492 = insertelement <2 x i32> undef, i32 %490, i32 0 + %493 = insertelement <2 x i32> %492, i32 %491, i32 1 + %494 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %493, <32 x i8> %155, <16 x i8> %157, i32 2) + %495 = extractelement <4 x float> %494, i32 0 + %496 = extractelement <4 x float> %494, i32 1 + %497 = extractelement <4 x float> %494, i32 2 + %498 = extractelement <4 x float> %494, i32 3 + %499 = fmul float %498, 3.200000e+01 + %500 = fadd float %499, -1.600000e+01 + %501 = call float @llvm.AMDIL.exp.(float %500) + %502 = fmul float %495, %501 + %503 = fmul float %496, %501 + %504 = fmul float %497, %501 + %505 = fmul float %28, %502 + %506 = fadd float %505, %193 + %507 = fmul float %29, %503 + %508 = fadd float %507, %194 + %509 = fmul float %30, %504 + %510 = fadd float %509, %195 + %511 = fmul float %506, %489 + %512 = fmul float %508, %489 + %513 = fmul float %510, %489 + %514 = fmul float %489, 5.000000e-01 + %515 = fadd float %514, 5.000000e-01 + %516 = fmul float %483, %515 + %517 = fadd float %516, %511 + %518 = fmul float %485, %515 + %519 = fadd float %518, %512 + %520 = fmul float %487, %515 + %521 = fadd float %520, %513 + %522 = fmul float %517, %371 + %523 = fmul float %519, %372 + %524 = fmul float %521, %373 + %525 = fmul float %428, 0x3FDB272440000000 + %526 = fmul float %430, 0xBFDB272440000000 + %527 = fadd float %526, %525 + %528 = fmul float %440, 0x3FE99999A0000000 + %529 = fadd float %527, %528 + %530 = fmul float %529, 5.000000e-01 + %531 = fadd float %530, 0x3FE3333340000000 + %532 = fmul float %531, %531 + %533 = fmul float %522, %532 + %534 = fmul float %523, %532 + %535 = fmul float %524, %532 + %536 = fsub float -0.000000e+00, %72 + %537 = fsub float -0.000000e+00, %73 + %538 = fsub float -0.000000e+00, %74 + %539 = fmul float %temp12.0, %536 + %540 = fmul float %temp13.0, %537 + %541 = fadd float %540, %539 + %542 = fmul float %temp14.0, %538 + %543 = fadd float %541, %542 + %544 = call float @llvm.AMDIL.clamp.(float %543, float 0.000000e+00, float 1.000000e+00) + %545 = fmul float %371, %544 + %546 = fmul float %372, %544 + %547 = fmul float %373, %544 + %548 = fmul float %545, %69 + %549 = fmul float %546, %70 + %550 = fmul float %547, %71 + %551 = fsub float -0.000000e+00, %164 + %552 = fadd float %97, %551 + %553 = fsub float -0.000000e+00, %165 + %554 = fadd float %98, %553 + %555 = fsub float -0.000000e+00, %166 + %556 = fadd float %99, %555 + %557 = fmul float %552, %552 + %558 = fmul float %554, %554 + %559 = fadd float %558, %557 + %560 = fmul float %556, %556 + %561 = fadd float %559, %560 + %562 = call float @llvm.AMDGPU.rsq(float %561) + %563 = fmul float %562, %561 + %564 = fsub float -0.000000e+00, %561 + %565 = call float @llvm.AMDGPU.cndlt(float %564, float %563, float 0.000000e+00) + %566 = fsub float -0.000000e+00, %84 + %567 = fadd float %565, %566 + %568 = fsub float -0.000000e+00, %83 + %569 = fadd float %565, %568 + %570 = fsub float -0.000000e+00, %82 + %571 = fadd float %565, %570 + %572 = fsub float -0.000000e+00, %84 + %573 = fadd float %83, %572 + %574 = fsub float -0.000000e+00, %83 + %575 = fadd float %82, %574 + %576 = fsub float -0.000000e+00, %82 + %577 = fadd float %81, %576 + %578 = fdiv float 1.000000e+00, %573 + %579 = fdiv float 1.000000e+00, %575 + %580 = fdiv float 1.000000e+00, %577 + %581 = fmul float %567, %578 + %582 = fmul float %569, %579 + %583 = fmul float %571, %580 + %584 = fcmp olt float %565, %83 + %585 = sext i1 %584 to i32 + %586 = bitcast i32 %585 to float + %587 = bitcast float %586 to i32 + %588 = icmp ne i32 %587, 0 + br i1 %588, label %ENDIF200, label %ELSE202 + +ELSE202: ; preds = %ENDIF197 + %589 = fcmp olt float %565, %82 + %590 = sext i1 %589 to i32 + %591 = bitcast i32 %590 to float + %592 = bitcast float %591 to i32 + %593 = icmp ne i32 %592, 0 + br i1 %593, label %ENDIF200, label %ELSE205 + +ENDIF200: ; preds = %ELSE205, %ELSE202, %ENDIF197 + %temp80.0 = phi float [ %581, %ENDIF197 ], [ %.226, %ELSE205 ], [ %582, %ELSE202 ] + %temp88.0 = phi float [ %122, %ENDIF197 ], [ %.227, %ELSE205 ], [ %120, %ELSE202 ] + %temp89.0 = phi float [ %123, %ENDIF197 ], [ %.228, %ELSE205 ], [ %121, %ELSE202 ] + %temp90.0 = phi float [ %120, %ENDIF197 ], [ %116, %ELSE205 ], [ %118, %ELSE202 ] + %temp91.0 = phi float [ %121, %ENDIF197 ], [ %117, %ELSE205 ], [ %119, %ELSE202 ] + %594 = fcmp olt float %565, %83 + %595 = sext i1 %594 to i32 + %596 = bitcast i32 %595 to float + %597 = bitcast float %596 to i32 + %598 = icmp ne i32 %597, 0 + br i1 %598, label %ENDIF209, label %ELSE211 + +ELSE205: ; preds = %ELSE202 + %599 = fcmp olt float %565, %81 + %600 = sext i1 %599 to i32 + %601 = bitcast i32 %600 to float + %602 = bitcast float %601 to i32 + %603 = icmp ne i32 %602, 0 + %.226 = select i1 %603, float %583, float 1.000000e+00 + %.227 = select i1 %603, float %118, float %116 + %.228 = select i1 %603, float %119, float %117 + br label %ENDIF200 + +ELSE211: ; preds = %ENDIF200 + %604 = fcmp olt float %565, %82 + %605 = sext i1 %604 to i32 + %606 = bitcast i32 %605 to float + %607 = bitcast float %606 to i32 + %608 = icmp ne i32 %607, 0 + br i1 %608, label %ENDIF209, label %ELSE214 + +ENDIF209: ; preds = %ELSE214, %ELSE211, %ENDIF200 + %temp52.0 = phi float [ %108, %ENDIF200 ], [ %100, %ELSE214 ], [ %104, %ELSE211 ] + %temp53.0 = phi float [ %109, %ENDIF200 ], [ %101, %ELSE214 ], [ %105, %ELSE211 ] + %temp54.0 = phi float [ %110, %ENDIF200 ], [ %102, %ELSE214 ], [ %106, %ELSE211 ] + %temp55.0 = phi float [ %111, %ENDIF200 ], [ %103, %ELSE214 ], [ %107, %ELSE211 ] + %temp68.0 = phi float [ %112, %ENDIF200 ], [ %.230, %ELSE214 ], [ %108, %ELSE211 ] + %temp69.0 = phi float [ %113, %ENDIF200 ], [ %.231, %ELSE214 ], [ %109, %ELSE211 ] + %temp70.0 = phi float [ %114, %ENDIF200 ], [ %.232, %ELSE214 ], [ %110, %ELSE211 ] + %temp71.0 = phi float [ %115, %ENDIF200 ], [ %.233, %ELSE214 ], [ %111, %ELSE211 ] + %609 = fmul float %164, %85 + %610 = fmul float %165, %86 + %611 = fadd float %609, %610 + %612 = fmul float %166, %87 + %613 = fadd float %611, %612 + %614 = fmul float %167, %88 + %615 = fadd float %613, %614 + %616 = fmul float %164, %89 + %617 = fmul float %165, %90 + %618 = fadd float %616, %617 + %619 = fmul float %166, %91 + %620 = fadd float %618, %619 + %621 = fmul float %167, %92 + %622 = fadd float %620, %621 + %623 = fmul float %164, %93 + %624 = fmul float %165, %94 + %625 = fadd float %623, %624 + %626 = fmul float %166, %95 + %627 = fadd float %625, %626 + %628 = fmul float %167, %96 + %629 = fadd float %627, %628 + %630 = fsub float -0.000000e+00, %78 + %631 = fadd float 1.000000e+00, %630 + %632 = call float @fabs(float %615) + %633 = call float @fabs(float %622) + %634 = fcmp oge float %631, %632 + %635 = sext i1 %634 to i32 + %636 = bitcast i32 %635 to float + %637 = bitcast float %636 to i32 + %638 = and i32 %637, 1065353216 + %639 = bitcast i32 %638 to float + %640 = fcmp oge float %631, %633 + %641 = sext i1 %640 to i32 + %642 = bitcast i32 %641 to float + %643 = bitcast float %642 to i32 + %644 = and i32 %643, 1065353216 + %645 = bitcast i32 %644 to float + %646 = fmul float %639, %645 + %647 = fmul float %629, %646 + %648 = fmul float %615, %temp68.0 + %649 = fadd float %648, %temp70.0 + %650 = fmul float %622, %temp69.0 + %651 = fadd float %650, %temp71.0 + %652 = fmul float %615, %temp52.0 + %653 = fadd float %652, %temp54.0 + %654 = fmul float %622, %temp53.0 + %655 = fadd float %654, %temp55.0 + %656 = fadd float %temp80.0, -1.000000e+00 + %657 = fmul float %656, %77 + %658 = fadd float %657, 1.000000e+00 + %659 = call float @llvm.AMDIL.clamp.(float %658, float 0.000000e+00, float 1.000000e+00) + %660 = bitcast float %649 to i32 + %661 = bitcast float %651 to i32 + %662 = bitcast float 0.000000e+00 to i32 + %663 = insertelement <4 x i32> undef, i32 %660, i32 0 + %664 = insertelement <4 x i32> %663, i32 %661, i32 1 + %665 = insertelement <4 x i32> %664, i32 %662, i32 2 + %666 = insertelement <4 x i32> %665, i32 undef, i32 3 + %667 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %666, <32 x i8> %127, <16 x i8> %129, i32 2) + %668 = extractelement <4 x float> %667, i32 0 + %669 = extractelement <4 x float> %667, i32 1 + %670 = bitcast float %653 to i32 + %671 = bitcast float %655 to i32 + %672 = bitcast float 0.000000e+00 to i32 + %673 = insertelement <4 x i32> undef, i32 %670, i32 0 + %674 = insertelement <4 x i32> %673, i32 %671, i32 1 + %675 = insertelement <4 x i32> %674, i32 %672, i32 2 + %676 = insertelement <4 x i32> %675, i32 undef, i32 3 + %677 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %676, <32 x i8> %127, <16 x i8> %129, i32 2) + %678 = extractelement <4 x float> %677, i32 0 + %679 = extractelement <4 x float> %677, i32 1 + %680 = fsub float -0.000000e+00, %669 + %681 = fadd float 1.000000e+00, %680 + %682 = fsub float -0.000000e+00, %679 + %683 = fadd float 1.000000e+00, %682 + %684 = fmul float %681, 2.500000e-01 + %685 = fmul float %683, 2.500000e-01 + %686 = fsub float -0.000000e+00, %684 + %687 = fadd float %668, %686 + %688 = fsub float -0.000000e+00, %685 + %689 = fadd float %678, %688 + %690 = fmul float %647, %temp88.0 + %691 = fadd float %690, %temp89.0 + %692 = fmul float %647, %temp90.0 + %693 = fadd float %692, %temp91.0 + %694 = call float @llvm.AMDIL.clamp.(float %691, float 0.000000e+00, float 1.000000e+00) + %695 = call float @llvm.AMDIL.clamp.(float %693, float 0.000000e+00, float 1.000000e+00) + %696 = fsub float -0.000000e+00, %694 + %697 = fadd float %668, %696 + %698 = fsub float -0.000000e+00, %695 + %699 = fadd float %678, %698 + %700 = fmul float %668, %668 + %701 = fmul float %678, %678 + %702 = fsub float -0.000000e+00, %700 + %703 = fadd float %687, %702 + %704 = fsub float -0.000000e+00, %701 + %705 = fadd float %689, %704 + %706 = fcmp uge float %703, %75 + %707 = select i1 %706, float %703, float %75 + %708 = fcmp uge float %705, %75 + %709 = select i1 %708, float %705, float %75 + %710 = fmul float %697, %697 + %711 = fadd float %710, %707 + %712 = fmul float %699, %699 + %713 = fadd float %712, %709 + %714 = fdiv float 1.000000e+00, %711 + %715 = fdiv float 1.000000e+00, %713 + %716 = fmul float %707, %714 + %717 = fmul float %709, %715 + %718 = fcmp oge float %697, 0.000000e+00 + %719 = sext i1 %718 to i32 + %720 = bitcast i32 %719 to float + %721 = bitcast float %720 to i32 + %722 = icmp ne i32 %721, 0 + %.229 = select i1 %722, float 1.000000e+00, float %716 + %723 = fcmp oge float %699, 0.000000e+00 + %724 = sext i1 %723 to i32 + %725 = bitcast i32 %724 to float + %726 = bitcast float %725 to i32 + %727 = icmp ne i32 %726, 0 + %temp28.0 = select i1 %727, float 1.000000e+00, float %717 + %728 = call float @llvm.AMDGPU.lrp(float %659, float %temp28.0, float %.229) + %729 = call float @llvm.pow.f32(float %728, float %76) + %730 = fmul float %729, %79 + %731 = fadd float %730, %80 + %732 = call float @llvm.AMDIL.clamp.(float %731, float 0.000000e+00, float 1.000000e+00) + %733 = fmul float %732, %732 + %734 = fmul float 2.000000e+00, %732 + %735 = fsub float -0.000000e+00, %734 + %736 = fadd float 3.000000e+00, %735 + %737 = fmul float %733, %736 + %738 = fmul float %548, %737 + %739 = fmul float %549, %737 + %740 = fmul float %550, %737 + %741 = fmul float %738, %515 + %742 = fadd float %741, %533 + %743 = fmul float %739, %515 + %744 = fadd float %743, %534 + %745 = fmul float %740, %515 + %746 = fadd float %745, %535 + %747 = call float @llvm.AMDGPU.lrp(float %230, float %287, float 1.000000e+00) + %748 = call float @llvm.AMDGPU.lrp(float %37, float %298, float 1.000000e+00) + %749 = call float @llvm.AMDGPU.lrp(float %37, float %299, float 1.000000e+00) + %750 = call float @llvm.AMDGPU.lrp(float %37, float %300, float 1.000000e+00) + %751 = call float @llvm.AMDGPU.lrp(float %38, float %747, float 1.000000e+00) + %752 = fmul float %748, %751 + %753 = fmul float %749, %751 + %754 = fmul float %750, %751 + %755 = fmul float %742, %752 + %756 = fmul float %744, %753 + %757 = fmul float %746, %754 + %758 = fmul float %temp12.0, %216 + %759 = fmul float %temp13.0, %217 + %760 = fadd float %759, %758 + %761 = fmul float %temp14.0, %218 + %762 = fadd float %760, %761 + %763 = call float @fabs(float %762) + %764 = fmul float %763, %763 + %765 = fmul float %764, %50 + %766 = fadd float %765, %51 + %767 = call float @llvm.AMDIL.clamp.(float %766, float 0.000000e+00, float 1.000000e+00) + %768 = fsub float -0.000000e+00, %767 + %769 = fadd float 1.000000e+00, %768 + %770 = fmul float %33, %769 + %771 = fmul float %33, %769 + %772 = fmul float %33, %769 + %773 = fmul float %34, %769 + %774 = call float @llvm.AMDGPU.lrp(float %770, float %31, float %755) + %775 = call float @llvm.AMDGPU.lrp(float %771, float %31, float %756) + %776 = call float @llvm.AMDGPU.lrp(float %772, float %31, float %757) + %777 = call float @llvm.AMDGPU.lrp(float %773, float %32, float %374) + %778 = fcmp uge float %774, 0x3E6FFFFE60000000 + %779 = select i1 %778, float %774, float 0x3E6FFFFE60000000 + %780 = fcmp uge float %775, 0x3E6FFFFE60000000 + %781 = select i1 %780, float %775, float 0x3E6FFFFE60000000 + %782 = fcmp uge float %776, 0x3E6FFFFE60000000 + %783 = select i1 %782, float %776, float 0x3E6FFFFE60000000 + %784 = fcmp uge float %779, 6.550400e+04 + %785 = select i1 %784, float 6.550400e+04, float %779 + %786 = fcmp uge float %781, 6.550400e+04 + %787 = select i1 %786, float 6.550400e+04, float %781 + %788 = fcmp uge float %783, 6.550400e+04 + %789 = select i1 %788, float 6.550400e+04, float %783 + %790 = fmul float %777, %52 + %791 = fadd float %790, %53 + %792 = call float @llvm.AMDIL.clamp.(float %791, float 0.000000e+00, float 1.000000e+00) + %793 = call i32 @llvm.SI.packf16(float %785, float %787) + %794 = bitcast i32 %793 to float + %795 = call i32 @llvm.SI.packf16(float %789, float %792) + %796 = bitcast i32 %795 to float + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %794, float %796, float %794, float %796) + ret void + +ELSE214: ; preds = %ELSE211 + %797 = fcmp olt float %565, %81 + %798 = sext i1 %797 to i32 + %799 = bitcast i32 %798 to float + %800 = bitcast float %799 to i32 + %801 = icmp ne i32 %800, 0 + %.230 = select i1 %801, float %104, float %100 + %.231 = select i1 %801, float %105, float %101 + %.232 = select i1 %801, float %106, float %102 + %.233 = select i1 %801, float %107, float %103 + br label %ENDIF209 +} + +; Function Attrs: readnone +declare float @llvm.AMDIL.clamp.(float, float, float) #2 + +; Function Attrs: nounwind readnone +declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 + +; Function Attrs: readnone +declare float @llvm.AMDGPU.lrp(float, float, float) #2 + +; Function Attrs: nounwind readnone +declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 + +; Function Attrs: readnone +declare float @llvm.AMDGPU.cndlt(float, float, float) #2 + +; Function Attrs: readnone +declare float @llvm.AMDIL.exp.(float) #2 + +attributes #0 = { "ShaderType"="0" } +attributes #1 = { nounwind readnone } +attributes #2 = { readnone } +attributes #3 = { nounwind readonly } +attributes #4 = { readonly } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/store-vector-ptrs.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/store-vector-ptrs.ll index 01210ce1f944..3af7d919c6f4 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/R600/store-vector-ptrs.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/R600/store-vector-ptrs.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; XFAIL: * ; RUN: llc -march=r600 -mcpu=SI < %s diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll index 9c74aa0c7c35..81f586fe8a7c 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll @@ -10,7 +10,7 @@ ; SPARC64-LABEL: variable_alloca_with_adj_call_stack ; SPARC64: save %sp, -128, %sp -; SPARC64: add {{.+}}, 128, %o0 +; SPARC64: add {{.+}}, 2175, %o0 ; SPARC64: add %sp, -80, %sp ; SPARC64: call foo ; SPARC64: add %sp, 80, %sp diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/64abi.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/64abi.ll index 8b752a1a2c3c..7f9d216e52e6 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/64abi.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/64abi.ll @@ -411,3 +411,54 @@ entry: } declare i32 @use_buf(i32, i8*) + +; CHECK-LABEL: test_fp128_args +; CHECK-DAG: std %f0, [%fp+{{.+}}] +; CHECK-DAG: std %f2, [%fp+{{.+}}] +; CHECK-DAG: std %f6, [%fp+{{.+}}] +; CHECK-DAG: std %f4, [%fp+{{.+}}] +; CHECK: add %fp, [[Offset:[0-9]+]], %o0 +; CHECK: call _Qp_add +; CHECK: ldd [%fp+[[Offset]]], %f0 +define fp128 @test_fp128_args(fp128 %a, fp128 %b) { +entry: + %0 = fadd fp128 %a, %b + ret fp128 %0 +} + +declare i64 @receive_fp128(i64 %a, ...) + +; CHECK-LABEL: test_fp128_variable_args +; CHECK-DAG: std %f4, [%sp+[[Offset0:[0-9]+]]] +; CHECK-DAG: std %f6, [%sp+[[Offset1:[0-9]+]]] +; CHECK-DAG: ldx [%sp+[[Offset0]]], %o2 +; CHECK-DAG: ldx [%sp+[[Offset1]]], %o3 +; CHECK: call receive_fp128 +define i64 @test_fp128_variable_args(i64 %a, fp128 %b) { +entry: + %0 = call i64 (i64, ...)* @receive_fp128(i64 %a, fp128 %b) + ret i64 %0 +} + +; CHECK-LABEL: test_call_libfunc +; CHECK: st %f1, [%fp+[[Offset0:[0-9]+]]] +; CHECK: fmovs %f3, %f1 +; CHECK: call cosf +; CHECK: st %f0, [%fp+[[Offset1:[0-9]+]]] +; CHECK: ld [%fp+[[Offset0]]], %f1 +; CHECK: call sinf +; CHECK: ld [%fp+[[Offset1]]], %f1 +; CHECK: fmuls %f1, %f0, %f0 + +define inreg float @test_call_libfunc(float %arg0, float %arg1) { +entry: + %0 = tail call inreg float @cosf(float %arg1) + %1 = tail call inreg float @sinf(float %arg0) + %2 = fmul float %0, %1 + ret float %2 +} + +declare inreg float @cosf(float %arg) readnone nounwind +declare inreg float @sinf(float %arg) readnone nounwind + + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/64cond.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/64cond.ll index bdc5e70a2de8..8c3f644382dd 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/64cond.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/64cond.ll @@ -109,3 +109,22 @@ entry: %rv = select i1 %tobool, i64 123, i64 0 ret i64 %rv } + +; CHECK-LABEL: setcc_resultty +; CHECK-DAG: srax %i0, 63, %o0 +; CHECK-DAG: or %g0, %i0, %o1 +; CHECK-DAG: or %g0, 0, %o2 +; CHECK-DAG: or %g0, 32, %o3 +; CHECK-DAG: call __multi3 +; CHECK: cmp +; CHECK: movne %xcc, 1, [[R:%[gilo][0-7]]] +; CHECK: or [[R]], %i1, %i0 + +define i1 @setcc_resultty(i64 %a, i1 %b) { + %a0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %a, i64 32) + %a1 = extractvalue { i64, i1 } %a0, 1 + %a4 = or i1 %a1, %b + ret i1 %a4 +} + +declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/64spill.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/64spill.ll new file mode 100644 index 000000000000..ab08d6b0570b --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/64spill.ll @@ -0,0 +1,116 @@ +; RUN: llc < %s -march=sparcv9 | FileCheck %s + +target datalayout = "E-i64:64-n32:64-S128" +target triple = "sparc64-sun-sparc" + +; CHECK-LABEL: test_and_spill +; CHECK: and %i0, %i1, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%fp+{{.+}}] +; CHECK: ldx [%fp+{{.+}}, %i0 +define i64 @test_and_spill(i64 %a, i64 %b) { +entry: + %r0 = and i64 %a, %b + %0 = tail call i64 asm sideeffect "#$0 $1", "=r,r,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6}"(i64 %r0) + ret i64 %r0 +} + +; CHECK-LABEL: test_or_spill +; CHECK: or %i0, %i1, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%fp+{{.+}}] +; CHECK: ldx [%fp+{{.+}}, %i0 +define i64 @test_or_spill(i64 %a, i64 %b) { +entry: + %r0 = or i64 %a, %b + %0 = tail call i64 asm sideeffect "#$0 $1", "=r,r,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6}"(i64 %r0) + ret i64 %r0 +} + +; CHECK-LABEL: test_xor_spill +; CHECK: xor %i0, %i1, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%fp+{{.+}}] +; CHECK: ldx [%fp+{{.+}}, %i0 +define i64 @test_xor_spill(i64 %a, i64 %b) { +entry: + %r0 = xor i64 %a, %b + %0 = tail call i64 asm sideeffect "#$0 $1", "=r,r,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6}"(i64 %r0) + ret i64 %r0 +} + + +; CHECK-LABEL: test_add_spill +; CHECK: add %i0, %i1, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%fp+{{.+}}] +; CHECK: ldx [%fp+{{.+}}, %i0 +define i64 @test_add_spill(i64 %a, i64 %b) { +entry: + %r0 = add i64 %a, %b + %0 = tail call i64 asm sideeffect "#$0 $1", "=r,r,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6}"(i64 %r0) + ret i64 %r0 +} + +; CHECK-LABEL: test_sub_spill +; CHECK: sub %i0, %i1, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%fp+{{.+}}] +; CHECK: ldx [%fp+{{.+}}, %i0 +define i64 @test_sub_spill(i64 %a, i64 %b) { +entry: + %r0 = sub i64 %a, %b + %0 = tail call i64 asm sideeffect "#$0 $1", "=r,r,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6}"(i64 %r0) + ret i64 %r0 +} + +; CHECK-LABEL: test_andi_spill +; CHECK: and %i0, 1729, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%fp+{{.+}}] +; CHECK: ldx [%fp+{{.+}}, %i0 +define i64 @test_andi_spill(i64 %a) { +entry: + %r0 = and i64 %a, 1729 + %0 = tail call i64 asm sideeffect "#$0 $1", "=r,r,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6}"(i64 %r0) + ret i64 %r0 +} + +; CHECK-LABEL: test_ori_spill +; CHECK: or %i0, 1729, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%fp+{{.+}}] +; CHECK: ldx [%fp+{{.+}}, %i0 +define i64 @test_ori_spill(i64 %a) { +entry: + %r0 = or i64 %a, 1729 + %0 = tail call i64 asm sideeffect "#$0 $1", "=r,r,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6}"(i64 %r0) + ret i64 %r0 +} + +; CHECK-LABEL: test_xori_spill +; CHECK: xor %i0, 1729, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%fp+{{.+}}] +; CHECK: ldx [%fp+{{.+}}, %i0 +define i64 @test_xori_spill(i64 %a) { +entry: + %r0 = xor i64 %a, 1729 + %0 = tail call i64 asm sideeffect "#$0 $1", "=r,r,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6}"(i64 %r0) + ret i64 %r0 +} + +; CHECK-LABEL: test_addi_spill +; CHECK: add %i0, 1729, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%fp+{{.+}}] +; CHECK: ldx [%fp+{{.+}}, %i0 +define i64 @test_addi_spill(i64 %a) { +entry: + %r0 = add i64 %a, 1729 + %0 = tail call i64 asm sideeffect "#$0 $1", "=r,r,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6}"(i64 %r0) + ret i64 %r0 +} + +; CHECK-LABEL: test_subi_spill +; CHECK: add %i0, -1729, [[R:%[gilo][0-7]]] +; CHECK: stx [[R]], [%fp+{{.+}}] +; CHECK: ldx [%fp+{{.+}}, %i0 +define i64 @test_subi_spill(i64 %a) { +entry: + %r0 = sub i64 %a, 1729 + %0 = tail call i64 asm sideeffect "#$0 $1", "=r,r,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6}"(i64 %r0) + ret i64 %r0 +} + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/atomics.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/atomics.ll new file mode 100644 index 000000000000..c4a9411f46a7 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/atomics.ll @@ -0,0 +1,63 @@ +; RUN: llc < %s -march=sparcv9 | FileCheck %s + +; CHECK-LABEL: test_atomic_i32 +; CHECK: ld [%o0] +; CHECK: membar +; CHECK: ld [%o1] +; CHECK: membar +; CHECK: membar +; CHECK: st {{.+}}, [%o2] +define i32 @test_atomic_i32(i32* %ptr1, i32* %ptr2, i32* %ptr3) { +entry: + %0 = load atomic i32* %ptr1 acquire, align 8 + %1 = load atomic i32* %ptr2 acquire, align 8 + %2 = add i32 %0, %1 + store atomic i32 %2, i32* %ptr3 release, align 8 + ret i32 %2 +} + +; CHECK-LABEL: test_atomic_i64 +; CHECK: ldx [%o0] +; CHECK: membar +; CHECK: ldx [%o1] +; CHECK: membar +; CHECK: membar +; CHECK: stx {{.+}}, [%o2] +define i64 @test_atomic_i64(i64* %ptr1, i64* %ptr2, i64* %ptr3) { +entry: + %0 = load atomic i64* %ptr1 acquire, align 8 + %1 = load atomic i64* %ptr2 acquire, align 8 + %2 = add i64 %0, %1 + store atomic i64 %2, i64* %ptr3 release, align 8 + ret i64 %2 +} + +; CHECK-LABEL: test_cmpxchg_i32 +; CHECK: or %g0, 123, [[R:%[gilo][0-7]]] +; CHECK: cas [%o1], %o0, [[R]] + +define i32 @test_cmpxchg_i32(i32 %a, i32* %ptr) { +entry: + %b = cmpxchg i32* %ptr, i32 %a, i32 123 monotonic + ret i32 %b +} + +; CHECK-LABEL: test_cmpxchg_i64 +; CHECK: or %g0, 123, [[R:%[gilo][0-7]]] +; CHECK: casx [%o1], %o0, [[R]] + +define i64 @test_cmpxchg_i64(i64 %a, i64* %ptr) { +entry: + %b = cmpxchg i64* %ptr, i64 %a, i64 123 monotonic + ret i64 %b +} + +; CHECK-LABEL: test_swap_i32 +; CHECK: or %g0, 42, [[R:%[gilo][0-7]]] +; CHECK: swap [%o1], [[R]] + +define i32 @test_swap_i32(i32 %a, i32* %ptr) { +entry: + %b = atomicrmw xchg i32* %ptr, i32 42 monotonic + ret i32 %b +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/ctpop.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/ctpop.ll index 916a41496e2a..eab1de6c952c 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/ctpop.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/ctpop.ll @@ -1,8 +1,22 @@ -; RUN: llc < %s -march=sparc -mattr=-v9 | not grep popc -; RUN: llc < %s -march=sparc -mattr=+v9 | grep popc +; RUN: llc < %s -march=sparc -mattr=-v9 | FileCheck %s -check-prefix=V8 +; RUN: llc < %s -march=sparc -mattr=+v9 | FileCheck %s -check-prefix=V9 +; RUN: llc < %s -march=sparcv9 | FileCheck %s -check-prefix=SPARC64 declare i32 @llvm.ctpop.i32(i32) +; V8-LABEL: test +; V8-NOT : popc + +; V9-LABEL: test +; V9: srl %o0, 0, %o0 +; V9-NEXT: jmp %o7+8 +; V9-NEXT: popc %o0, %o0 + +; SPARC64-LABEL: test +; SPARC64: srl %o0, 0, %o0 +; SPARC64: jmp %o7+8 +; SPARC64: popc %o0, %o0 + define i32 @test(i32 %X) { %Y = call i32 @llvm.ctpop.i32( i32 %X ) ; [#uses=1] ret i32 %Y diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/exception.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/exception.ll index cb5b6e5c1168..a1d6015b89a0 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/exception.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/exception.ll @@ -11,7 +11,7 @@ ; CHECK-LABEL: main: ; CHECK: .cfi_startproc -; CHECK: .cfi_def_cfa_register 30 +; CHECK: .cfi_def_cfa_register {{30|%fp}} ; CHECK: .cfi_window_save ; CHECK: .cfi_register 15, 31 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/rem.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/rem.ll index 71f62e4fc1c4..abef1fc112b4 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/rem.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SPARC/rem.ll @@ -21,3 +21,19 @@ define i64 @test2(i64 %X, i64 %Y) { %tmp1 = urem i64 %X, %Y ret i64 %tmp1 } + +; PR18150 +; CHECK-LABEL: test3 +; CHECK: sethi 2545, [[R0:%[gilo][0-7]]] +; CHECK: or [[R0]], 379, [[R1:%[gilo][0-7]]] +; CHECK: mulx %o0, [[R1]], [[R2:%[gilo][0-7]]] +; CHECK: udivx [[R2]], 1021, [[R3:%[gilo][0-7]]] +; CHECK: mulx [[R3]], 1021, [[R4:%[gilo][0-7]]] +; CHECK: sub [[R2]], [[R4]], %o0 + +define i64 @test3(i64 %b) { +entry: + %mul = mul i64 %b, 2606459 + %rem = urem i64 %mul, 1021 + ret i64 %rem +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-01.py b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-01.py index 552c9ca0ea85..edb631d8c6d5 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-01.py +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-01.py @@ -79,7 +79,7 @@ for i in xrange(branch_blocks): next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main' print 'before%d:' % i print ' %%bstop%d = getelementptr i32 *%%stop, i64 %d' % (i, i) - print ' %%bcur%d = load volatile i32 *%%bstop%d' % (i, i) + print ' %%bcur%d = load i32 *%%bstop%d' % (i, i) print ' %%btest%d = icmp eq i32 %%limit, %%bcur%d' % (i, i) print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next) print '' @@ -95,7 +95,7 @@ for i in xrange(0, main_size, 6): for i in xrange(branch_blocks): print ' %%astop%d = getelementptr i32 *%%stop, i64 %d' % (i, i + 25) - print ' %%acur%d = load volatile i32 *%%astop%d' % (i, i) + print ' %%acur%d = load i32 *%%astop%d' % (i, i) print ' %%atest%d = icmp eq i32 %%limit, %%acur%d' % (i, i) print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i) print '' diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-02.py b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-02.py index 0b21ced99a1f..743e12de0f1f 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-02.py +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-02.py @@ -72,7 +72,7 @@ for i in xrange(blocks): print 'b%d:' % i print ' store volatile i8 %d, i8 *%%base' % value print ' %%astop%d = getelementptr i32 *%%stop, i64 %d' % (i, i) - print ' %%acur%d = load volatile i32 *%%astop%d' % (i, i) + print ' %%acur%d = load i32 *%%astop%d' % (i, i) print ' %%atest%d = icmp eq i32 %%limit, %%acur%d' % (i, i) print ' br i1 %%atest%d, label %%%s, label %%%s' % (i, other, next) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-03.py b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-03.py index 75cdf247c6f3..5c9a93b87f73 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-03.py +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-03.py @@ -79,7 +79,7 @@ for i in xrange(branch_blocks): next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main' print 'before%d:' % i print ' %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i) - print ' %%bcur%d = load volatile i8 *%%bstop%d' % (i, i) + print ' %%bcur%d = load i8 *%%bstop%d' % (i, i) print ' %%bext%d = sext i8 %%bcur%d to i32' % (i, i) print ' %%btest%d = icmp eq i32 %%limit, %%bext%d' % (i, i) print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next) @@ -96,7 +96,7 @@ for i in xrange(0, main_size, 6): for i in xrange(branch_blocks): print ' %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25) - print ' %%acur%d = load volatile i8 *%%astop%d' % (i, i) + print ' %%acur%d = load i8 *%%astop%d' % (i, i) print ' %%aext%d = sext i8 %%acur%d to i32' % (i, i) print ' %%atest%d = icmp eq i32 %%limit, %%aext%d' % (i, i) print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-04.py b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-04.py index 3ae3ae9c37f7..2c9090fa2067 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-04.py +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-04.py @@ -83,7 +83,7 @@ for i in xrange(branch_blocks): next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main' print 'before%d:' % i print ' %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i) - print ' %%bcur%d = load volatile i8 *%%bstop%d' % (i, i) + print ' %%bcur%d = load i8 *%%bstop%d' % (i, i) print ' %%bext%d = sext i8 %%bcur%d to i64' % (i, i) print ' %%btest%d = icmp eq i64 %%limit, %%bext%d' % (i, i) print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next) @@ -100,7 +100,7 @@ for i in xrange(0, main_size, 6): for i in xrange(branch_blocks): print ' %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25) - print ' %%acur%d = load volatile i8 *%%astop%d' % (i, i) + print ' %%acur%d = load i8 *%%astop%d' % (i, i) print ' %%aext%d = sext i8 %%acur%d to i64' % (i, i) print ' %%atest%d = icmp eq i64 %%limit, %%aext%d' % (i, i) print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-05.py b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-05.py index 6928b8fc21d6..52f4a961c88f 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-05.py +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-05.py @@ -82,7 +82,7 @@ print '' for i in xrange(branch_blocks): next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main' print 'before%d:' % i - print ' %%bcur%d = load volatile i8 *%%stop' % i + print ' %%bcur%d = load i8 *%%stop' % i print ' %%bext%d = sext i8 %%bcur%d to i32' % (i, i) print ' %%btest%d = icmp slt i32 %%bext%d, %d' % (i, i, i + 50) print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next) @@ -98,7 +98,7 @@ for i in xrange(0, main_size, 6): print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i) for i in xrange(branch_blocks): - print ' %%acur%d = load volatile i8 *%%stop' % i + print ' %%acur%d = load i8 *%%stop' % i print ' %%aext%d = sext i8 %%acur%d to i32' % (i, i) print ' %%atest%d = icmp slt i32 %%aext%d, %d' % (i, i, i + 100) print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-06.py b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-06.py index aabc72fa6ec8..c34ebac4ce36 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-06.py +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-06.py @@ -82,7 +82,7 @@ print '' for i in xrange(branch_blocks): next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main' print 'before%d:' % i - print ' %%bcur%d = load volatile i8 *%%stop' % i + print ' %%bcur%d = load i8 *%%stop' % i print ' %%bext%d = sext i8 %%bcur%d to i64' % (i, i) print ' %%btest%d = icmp slt i64 %%bext%d, %d' % (i, i, i + 50) print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next) @@ -98,7 +98,7 @@ for i in xrange(0, main_size, 6): print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i) for i in xrange(branch_blocks): - print ' %%acur%d = load volatile i8 *%%stop' % i + print ' %%acur%d = load i8 *%%stop' % i print ' %%aext%d = sext i8 %%acur%d to i64' % (i, i) print ' %%atest%d = icmp slt i64 %%aext%d, %d' % (i, i, i + 100) print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-09.py b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-09.py index b3fd81324dab..bc712cb164ea 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-09.py +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-09.py @@ -79,7 +79,7 @@ for i in xrange(branch_blocks): next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main' print 'before%d:' % i print ' %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i) - print ' %%bcur%d = load volatile i8 *%%bstop%d' % (i, i) + print ' %%bcur%d = load i8 *%%bstop%d' % (i, i) print ' %%bext%d = sext i8 %%bcur%d to i32' % (i, i) print ' %%btest%d = icmp ult i32 %%limit, %%bext%d' % (i, i) print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next) @@ -96,7 +96,7 @@ for i in xrange(0, main_size, 6): for i in xrange(branch_blocks): print ' %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25) - print ' %%acur%d = load volatile i8 *%%astop%d' % (i, i) + print ' %%acur%d = load i8 *%%astop%d' % (i, i) print ' %%aext%d = sext i8 %%acur%d to i32' % (i, i) print ' %%atest%d = icmp ult i32 %%limit, %%aext%d' % (i, i) print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-10.py b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-10.py index 3aeea3ebccdf..8c483c33724c 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-10.py +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-10.py @@ -83,7 +83,7 @@ for i in xrange(branch_blocks): next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main' print 'before%d:' % i print ' %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i) - print ' %%bcur%d = load volatile i8 *%%bstop%d' % (i, i) + print ' %%bcur%d = load i8 *%%bstop%d' % (i, i) print ' %%bext%d = sext i8 %%bcur%d to i64' % (i, i) print ' %%btest%d = icmp ult i64 %%limit, %%bext%d' % (i, i) print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next) @@ -100,7 +100,7 @@ for i in xrange(0, main_size, 6): for i in xrange(branch_blocks): print ' %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25) - print ' %%acur%d = load volatile i8 *%%astop%d' % (i, i) + print ' %%acur%d = load i8 *%%astop%d' % (i, i) print ' %%aext%d = sext i8 %%acur%d to i64' % (i, i) print ' %%atest%d = icmp ult i64 %%limit, %%aext%d' % (i, i) print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-11.py b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-11.py index 034902c4a342..054610380e31 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-11.py +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-11.py @@ -98,8 +98,8 @@ print '' for i in xrange(branch_blocks): next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main' print 'before%d:' % i - print ' %%bcur%da = load volatile i32 *%%stopa' % i - print ' %%bcur%db = load volatile i32 *%%stopb' % i + print ' %%bcur%da = load i32 *%%stopa' % i + print ' %%bcur%db = load i32 *%%stopb' % i print ' %%bsub%d = sub i32 %%bcur%da, %%bcur%db' % (i, i, i) print ' %%btest%d = icmp ult i32 %%bsub%d, %d' % (i, i, i + 50) print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next) @@ -115,8 +115,8 @@ for i in xrange(0, main_size, 6): print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i) for i in xrange(branch_blocks): - print ' %%acur%da = load volatile i32 *%%stopa' % i - print ' %%acur%db = load volatile i32 *%%stopb' % i + print ' %%acur%da = load i32 *%%stopa' % i + print ' %%acur%db = load i32 *%%stopb' % i print ' %%asub%d = sub i32 %%acur%da, %%acur%db' % (i, i, i) print ' %%atest%d = icmp ult i32 %%asub%d, %d' % (i, i, i + 100) print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-12.py b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-12.py index 007d477e2140..626c8998d5d4 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-12.py +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/Large/branch-range-12.py @@ -98,8 +98,8 @@ print '' for i in xrange(branch_blocks): next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main' print 'before%d:' % i - print ' %%bcur%da = load volatile i64 *%%stopa' % i - print ' %%bcur%db = load volatile i64 *%%stopb' % i + print ' %%bcur%da = load i64 *%%stopa' % i + print ' %%bcur%db = load i64 *%%stopb' % i print ' %%bsub%d = sub i64 %%bcur%da, %%bcur%db' % (i, i, i) print ' %%btest%d = icmp ult i64 %%bsub%d, %d' % (i, i, i + 50) print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next) @@ -115,8 +115,8 @@ for i in xrange(0, main_size, 6): print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i) for i in xrange(branch_blocks): - print ' %%acur%da = load volatile i64 *%%stopa' % i - print ' %%acur%db = load volatile i64 *%%stopb' % i + print ' %%acur%da = load i64 *%%stopa' % i + print ' %%acur%db = load i64 *%%stopb' % i print ' %%asub%d = sub i64 %%acur%da, %%acur%db' % (i, i, i) print ' %%atest%d = icmp ult i64 %%asub%d, %d' % (i, i, i + 100) print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-load-01.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-load-01.ll index a5bc8833e78a..f3acd605b012 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-load-01.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-load-01.ll @@ -2,11 +2,10 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; This is just a placeholder to make sure that loads are handled. -; The CS-based sequence is probably far too conservative. define i8 @f1(i8 *%src) { ; CHECK-LABEL: f1: -; CHECK: cs +; CHECK: bcr 1{{[45]}}, %r0 +; CHECK: lb %r2, 0(%r2) ; CHECK: br %r14 %val = load atomic i8 *%src seq_cst, align 1 ret i8 %val diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-load-02.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-load-02.ll index 2c9bbdb488a1..d9bec60f4c1b 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-load-02.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-load-02.ll @@ -2,11 +2,10 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; This is just a placeholder to make sure that loads are handled. -; The CS-based sequence is probably far too conservative. define i16 @f1(i16 *%src) { ; CHECK-LABEL: f1: -; CHECK: cs +; CHECK: bcr 1{{[45]}}, %r0 +; CHECK: lh %r2, 0(%r2) ; CHECK: br %r14 %val = load atomic i16 *%src seq_cst, align 2 ret i16 %val diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-load-03.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-load-03.ll index 1fb41f5e39aa..7e5eb9249a93 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-load-03.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-load-03.ll @@ -2,12 +2,10 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; This is just a placeholder to make sure that loads are handled. -; Using CS is probably too conservative. -define i32 @f1(i32 %dummy, i32 *%src) { +define i32 @f1(i32 *%src) { ; CHECK-LABEL: f1: -; CHECK: lhi %r2, 0 -; CHECK: cs %r2, %r2, 0(%r3) +; CHECK: bcr 1{{[45]}}, %r0 +; CHECK: l %r2, 0(%r2) ; CHECK: br %r14 %val = load atomic i32 *%src seq_cst, align 4 ret i32 %val diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-load-04.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-load-04.ll index 92cac406e200..c7a9a98a425d 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-load-04.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-load-04.ll @@ -2,12 +2,10 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; This is just a placeholder to make sure that loads are handled. -; Using CSG is probably too conservative. -define i64 @f1(i64 %dummy, i64 *%src) { +define i64 @f1(i64 *%src) { ; CHECK-LABEL: f1: -; CHECK: lghi %r2, 0 -; CHECK: csg %r2, %r2, 0(%r3) +; CHECK: bcr 1{{[45]}}, %r0 +; CHECK: lg %r2, 0(%r2) ; CHECK: br %r14 %val = load atomic i64 *%src seq_cst, align 8 ret i64 %val diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-store-01.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-store-01.ll index 53ed24f623cf..952e1a912168 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-store-01.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-store-01.ll @@ -2,11 +2,10 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; This is just a placeholder to make sure that stores are handled. -; The CS-based sequence is probably far too conservative. define void @f1(i8 %val, i8 *%src) { ; CHECK-LABEL: f1: -; CHECK: cs +; CHECK: stc %r2, 0(%r3) +; CHECK: bcr 1{{[45]}}, %r0 ; CHECK: br %r14 store atomic i8 %val, i8 *%src seq_cst, align 1 ret void diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-store-02.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-store-02.ll index 42d6695b51d9..c9576e556566 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-store-02.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-store-02.ll @@ -2,11 +2,10 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; This is just a placeholder to make sure that stores are handled. -; The CS-based sequence is probably far too conservative. define void @f1(i16 %val, i16 *%src) { ; CHECK-LABEL: f1: -; CHECK: cs +; CHECK: sth %r2, 0(%r3) +; CHECK: bcr 1{{[45]}}, %r0 ; CHECK: br %r14 store atomic i16 %val, i16 *%src seq_cst, align 2 ret void diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-store-03.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-store-03.ll index 846c86fd3662..459cb6a94e12 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-store-03.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-store-03.ll @@ -2,14 +2,10 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; This is just a placeholder to make sure that stores are handled. -; Using CS is probably too conservative. define void @f1(i32 %val, i32 *%src) { ; CHECK-LABEL: f1: -; CHECK: l %r0, 0(%r3) -; CHECK: [[LABEL:\.[^:]*]]: -; CHECK: cs %r0, %r2, 0(%r3) -; CHECK: jl [[LABEL]] +; CHECK: st %r2, 0(%r3) +; CHECK: bcr 1{{[45]}}, %r0 ; CHECK: br %r14 store atomic i32 %val, i32 *%src seq_cst, align 4 ret void diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-store-04.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-store-04.ll index 24615b115658..7f2406eb5468 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-store-04.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomic-store-04.ll @@ -2,14 +2,10 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -; This is just a placeholder to make sure that stores are handled. -; Using CS is probably too conservative. define void @f1(i64 %val, i64 *%src) { ; CHECK-LABEL: f1: -; CHECK: lg %r0, 0(%r3) -; CHECK: [[LABEL:\.[^:]*]]: -; CHECK: csg %r0, %r2, 0(%r3) -; CHECK: jl [[LABEL]] +; CHECK: stg %r2, 0(%r3) +; CHECK: bcr 1{{[45]}}, %r0 ; CHECK: br %r14 store atomic i64 %val, i64 *%src seq_cst, align 8 ret void diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-add-05.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-add-05.ll new file mode 100644 index 000000000000..956c0d9642cd --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-add-05.ll @@ -0,0 +1,64 @@ +; Test 32-bit atomic additions, z196 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check addition of a variable. +define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: laa %r2, %r4, 0(%r3) +; CHECK: br %r14 + %res = atomicrmw add i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check addition of 1, which needs a temporary. +define i32 @f2(i32 %dummy, i32 *%src) { +; CHECK-LABEL: f2: +; CHECK: lhi [[TMP:%r[0-5]]], 1 +; CHECK: laa %r2, [[TMP]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw add i32 *%src, i32 1 seq_cst + ret i32 %res +} + +; Check the high end of the LAA range. +define i32 @f3(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: laa %r2, %r4, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 131071 + %res = atomicrmw add i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word up, which needs separate address logic. +define i32 @f4(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, 524288 +; CHECK: laa %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 131072 + %res = atomicrmw add i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the low end of the LAA range. +define i32 @f5(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f5: +; CHECK: laa %r2, %r4, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 -131072 + %res = atomicrmw add i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word down, which needs separate address logic. +define i32 @f6(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524292 +; CHECK: laa %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 -131073 + %res = atomicrmw add i32 *%ptr, i32 %b seq_cst + ret i32 %res +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-add-06.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-add-06.ll new file mode 100644 index 000000000000..f508858d1562 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-add-06.ll @@ -0,0 +1,64 @@ +; Test 64-bit atomic additions, z196 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check addition of a variable. +define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f1: +; CHECK: laag %r2, %r4, 0(%r3) +; CHECK: br %r14 + %res = atomicrmw add i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check addition of 1, which needs a temporary. +define i64 @f2(i64 %dummy, i64 *%src) { +; CHECK-LABEL: f2: +; CHECK: lghi [[TMP:%r[0-5]]], 1 +; CHECK: laag %r2, [[TMP]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw add i64 *%src, i64 1 seq_cst + ret i64 %res +} + +; Check the high end of the LAAG range. +define i64 @f3(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f3: +; CHECK: laag %r2, %r4, 524280(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %res = atomicrmw add i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword up, which needs separate address logic. +define i64 @f4(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, 524288 +; CHECK: laag %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %res = atomicrmw add i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the low end of the LAAG range. +define i64 @f5(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f5: +; CHECK: laag %r2, %r4, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %res = atomicrmw add i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword down, which needs separate address logic. +define i64 @f6(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524296 +; CHECK: laag %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %res = atomicrmw add i64 *%ptr, i64 %b seq_cst + ret i64 %res +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-and-05.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-and-05.ll new file mode 100644 index 000000000000..f0b999c60431 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-and-05.ll @@ -0,0 +1,64 @@ +; Test 32-bit atomic ANDs, z196 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check AND of a variable. +define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: lan %r2, %r4, 0(%r3) +; CHECK: br %r14 + %res = atomicrmw and i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check AND of 1, which needs a temporary. +define i32 @f2(i32 %dummy, i32 *%src) { +; CHECK-LABEL: f2: +; CHECK: lhi [[TMP:%r[0-5]]], 1 +; CHECK: lan %r2, [[TMP]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw and i32 *%src, i32 1 seq_cst + ret i32 %res +} + +; Check the high end of the LAN range. +define i32 @f3(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: lan %r2, %r4, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 131071 + %res = atomicrmw and i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word up, which needs separate address logic. +define i32 @f4(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, 524288 +; CHECK: lan %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 131072 + %res = atomicrmw and i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the low end of the LAN range. +define i32 @f5(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f5: +; CHECK: lan %r2, %r4, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 -131072 + %res = atomicrmw and i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word down, which needs separate address logic. +define i32 @f6(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524292 +; CHECK: lan %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 -131073 + %res = atomicrmw and i32 *%ptr, i32 %b seq_cst + ret i32 %res +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-and-06.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-and-06.ll new file mode 100644 index 000000000000..e5b71945d57c --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-and-06.ll @@ -0,0 +1,64 @@ +; Test 64-bit atomic ANDs, z196 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check AND of a variable. +define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f1: +; CHECK: lang %r2, %r4, 0(%r3) +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check AND of -2, which needs a temporary. +define i64 @f2(i64 %dummy, i64 *%src) { +; CHECK-LABEL: f2: +; CHECK: lghi [[TMP:%r[0-5]]], -2 +; CHECK: lang %r2, [[TMP]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw and i64 *%src, i64 -2 seq_cst + ret i64 %res +} + +; Check the high end of the LANG range. +define i64 @f3(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f3: +; CHECK: lang %r2, %r4, 524280(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %res = atomicrmw and i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword up, which needs separate address logic. +define i64 @f4(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, 524288 +; CHECK: lang %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %res = atomicrmw and i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the low end of the LANG range. +define i64 @f5(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f5: +; CHECK: lang %r2, %r4, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %res = atomicrmw and i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword down, which needs separate address logic. +define i64 @f6(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524296 +; CHECK: lang %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %res = atomicrmw and i64 *%ptr, i64 %b seq_cst + ret i64 %res +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-or-05.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-or-05.ll new file mode 100644 index 000000000000..b38654ca6f07 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-or-05.ll @@ -0,0 +1,64 @@ +; Test 32-bit atomic ORs, z196 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check OR of a variable. +define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: lao %r2, %r4, 0(%r3) +; CHECK: br %r14 + %res = atomicrmw or i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check OR of 1, which needs a temporary. +define i32 @f2(i32 %dummy, i32 *%src) { +; CHECK-LABEL: f2: +; CHECK: lhi [[TMP:%r[0-5]]], 1 +; CHECK: lao %r2, [[TMP]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw or i32 *%src, i32 1 seq_cst + ret i32 %res +} + +; Check the high end of the LAO range. +define i32 @f3(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: lao %r2, %r4, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 131071 + %res = atomicrmw or i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word up, which needs separate address logic. +define i32 @f4(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, 524288 +; CHECK: lao %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 131072 + %res = atomicrmw or i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the low end of the LAO range. +define i32 @f5(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f5: +; CHECK: lao %r2, %r4, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 -131072 + %res = atomicrmw or i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word down, which needs separate address logic. +define i32 @f6(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524292 +; CHECK: lao %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 -131073 + %res = atomicrmw or i32 *%ptr, i32 %b seq_cst + ret i32 %res +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-or-06.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-or-06.ll new file mode 100644 index 000000000000..30874abfe4a2 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-or-06.ll @@ -0,0 +1,64 @@ +; Test 64-bit atomic ORs, z196 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check OR of a variable. +define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f1: +; CHECK: laog %r2, %r4, 0(%r3) +; CHECK: br %r14 + %res = atomicrmw or i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check OR of 1, which needs a temporary. +define i64 @f2(i64 %dummy, i64 *%src) { +; CHECK-LABEL: f2: +; CHECK: lghi [[TMP:%r[0-5]]], 1 +; CHECK: laog %r2, [[TMP]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw or i64 *%src, i64 1 seq_cst + ret i64 %res +} + +; Check the high end of the LAOG range. +define i64 @f3(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f3: +; CHECK: laog %r2, %r4, 524280(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %res = atomicrmw or i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword up, which needs separate address logic. +define i64 @f4(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, 524288 +; CHECK: laog %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %res = atomicrmw or i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the low end of the LAOG range. +define i64 @f5(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f5: +; CHECK: laog %r2, %r4, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %res = atomicrmw or i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword down, which needs separate address logic. +define i64 @f6(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524296 +; CHECK: laog %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %res = atomicrmw or i64 *%ptr, i64 %b seq_cst + ret i64 %res +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-sub-05.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-sub-05.ll new file mode 100644 index 000000000000..7668f0e2a7ac --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-sub-05.ll @@ -0,0 +1,69 @@ +; Test 32-bit atomic subtractions, z196 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check addition of a variable. +define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: lcr [[NEG:%r[0-5]]], %r4 +; CHECK: laa %r2, [[NEG]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw sub i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check addition of 1, which needs a temporary. +define i32 @f2(i32 %dummy, i32 *%src) { +; CHECK-LABEL: f2: +; CHECK: lhi [[TMP:%r[0-5]]], -1 +; CHECK: laa %r2, [[TMP]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw sub i32 *%src, i32 1 seq_cst + ret i32 %res +} + +; Check the high end of the LAA range. +define i32 @f3(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: lcr [[NEG:%r[0-5]]], %r4 +; CHECK: laa %r2, [[NEG]], 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 131071 + %res = atomicrmw sub i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word up, which needs separate address logic. +define i32 @f4(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f4: +; CHECK-DAG: lcr [[NEG:%r[0-5]]], %r4 +; CHECK-DAG: agfi %r3, 524288 +; CHECK: laa %r2, [[NEG]], 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 131072 + %res = atomicrmw sub i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the low end of the LAA range. +define i32 @f5(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f5: +; CHECK: lcr [[NEG:%r[0-5]]], %r4 +; CHECK: laa %r2, [[NEG]], -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 -131072 + %res = atomicrmw sub i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word down, which needs separate address logic. +define i32 @f6(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f6: +; CHECK-DAG: lcr [[NEG:%r[0-5]]], %r4 +; CHECK-DAG: agfi %r3, -524292 +; CHECK: laa %r2, [[NEG]], 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 -131073 + %res = atomicrmw sub i32 *%ptr, i32 %b seq_cst + ret i32 %res +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-sub-06.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-sub-06.ll new file mode 100644 index 000000000000..5d11bdf96cde --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-sub-06.ll @@ -0,0 +1,69 @@ +; Test 64-bit atomic subtractions, z196 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check addition of a variable. +define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f1: +; CHECK: lcgr [[NEG:%r[0-5]]], %r4 +; CHECK: laag %r2, [[NEG]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw sub i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check addition of 1, which needs a temporary. +define i64 @f2(i64 %dummy, i64 *%src) { +; CHECK-LABEL: f2: +; CHECK: lghi [[TMP:%r[0-5]]], -1 +; CHECK: laag %r2, [[TMP]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw sub i64 *%src, i64 1 seq_cst + ret i64 %res +} + +; Check the high end of the LAAG range. +define i64 @f3(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f3: +; CHECK: lcgr [[NEG:%r[0-5]]], %r4 +; CHECK: laag %r2, [[NEG]], 524280(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %res = atomicrmw sub i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword up, which needs separate address logic. +define i64 @f4(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f4: +; CHECK-DAG: lcgr [[NEG:%r[0-5]]], %r4 +; CHECK-DAG: agfi %r3, 524288 +; CHECK: laag %r2, [[NEG]], 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %res = atomicrmw sub i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the low end of the LAAG range. +define i64 @f5(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f5: +; CHECK: lcgr [[NEG:%r[0-5]]], %r4 +; CHECK: laag %r2, [[NEG]], -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %res = atomicrmw sub i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword down, which needs separate address logic. +define i64 @f6(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f6: +; CHECK-DAG: lcgr [[NEG:%r[0-5]]], %r4 +; CHECK-DAG: agfi %r3, -524296 +; CHECK: laag %r2, [[NEG]], 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %res = atomicrmw sub i64 *%ptr, i64 %b seq_cst + ret i64 %res +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-xor-05.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-xor-05.ll new file mode 100644 index 000000000000..e9e7d30b3578 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-xor-05.ll @@ -0,0 +1,64 @@ +; Test 32-bit atomic ORs, z196 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check OR of a variable. +define i32 @f1(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: lax %r2, %r4, 0(%r3) +; CHECK: br %r14 + %res = atomicrmw xor i32 *%src, i32 %b seq_cst + ret i32 %res +} + +; Check OR of 1, which needs a temporary. +define i32 @f2(i32 %dummy, i32 *%src) { +; CHECK-LABEL: f2: +; CHECK: lhi [[TMP:%r[0-5]]], 1 +; CHECK: lax %r2, [[TMP]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw xor i32 *%src, i32 1 seq_cst + ret i32 %res +} + +; Check the high end of the LAX range. +define i32 @f3(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: lax %r2, %r4, 524284(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 131071 + %res = atomicrmw xor i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word up, which needs separate address logic. +define i32 @f4(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, 524288 +; CHECK: lax %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 131072 + %res = atomicrmw xor i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the low end of the LAX range. +define i32 @f5(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f5: +; CHECK: lax %r2, %r4, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 -131072 + %res = atomicrmw xor i32 *%ptr, i32 %b seq_cst + ret i32 %res +} + +; Check the next word down, which needs separate address logic. +define i32 @f6(i32 %dummy, i32 *%src, i32 %b) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524292 +; CHECK: lax %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i32 *%src, i32 -131073 + %res = atomicrmw xor i32 *%ptr, i32 %b seq_cst + ret i32 %res +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-xor-06.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-xor-06.ll new file mode 100644 index 000000000000..0870c6476f61 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/atomicrmw-xor-06.ll @@ -0,0 +1,64 @@ +; Test 64-bit atomic XORs, z196 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check XOR of a variable. +define i64 @f1(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f1: +; CHECK: laxg %r2, %r4, 0(%r3) +; CHECK: br %r14 + %res = atomicrmw xor i64 *%src, i64 %b seq_cst + ret i64 %res +} + +; Check XOR of 1, which needs a temporary. +define i64 @f2(i64 %dummy, i64 *%src) { +; CHECK-LABEL: f2: +; CHECK: lghi [[TMP:%r[0-5]]], 1 +; CHECK: laxg %r2, [[TMP]], 0(%r3) +; CHECK: br %r14 + %res = atomicrmw xor i64 *%src, i64 1 seq_cst + ret i64 %res +} + +; Check the high end of the LAXG range. +define i64 @f3(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f3: +; CHECK: laxg %r2, %r4, 524280(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65535 + %res = atomicrmw xor i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword up, which needs separate address logic. +define i64 @f4(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, 524288 +; CHECK: laxg %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 65536 + %res = atomicrmw xor i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the low end of the LAXG range. +define i64 @f5(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f5: +; CHECK: laxg %r2, %r4, -524288(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65536 + %res = atomicrmw xor i64 *%ptr, i64 %b seq_cst + ret i64 %res +} + +; Check the next doubleword down, which needs separate address logic. +define i64 @f6(i64 %dummy, i64 *%src, i64 %b) { +; CHECK-LABEL: f6: +; CHECK: agfi %r3, -524296 +; CHECK: laxg %r2, %r4, 0(%r3) +; CHECK: br %r14 + %ptr = getelementptr i64 *%src, i64 -65537 + %res = atomicrmw xor i64 *%ptr, i64 %b seq_cst + ret i64 %res +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/cond-store-01.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/cond-store-01.ll index d55ea2133e8f..62e9796fa21b 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/cond-store-01.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/cond-store-01.ll @@ -347,11 +347,10 @@ define void @f19(i8 *%ptr, i8 %alt, i32 %limit) { define void @f20(i8 *%ptr, i8 %alt, i32 %limit) { ; FIXME: should use a normal load instead of CS. ; CHECK-LABEL: f20: -; CHECK: cs {{%r[0-9]+}}, -; CHECK: jl +; CHECK: lb {{%r[0-9]+}}, 0(%r2) ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] ; CHECK: [[LABEL]]: -; CHECK: stc {{%r[0-9]+}}, +; CHECK: stc {{%r[0-9]+}}, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 %orig = load atomic i8 *%ptr unordered, align 1 @@ -367,7 +366,7 @@ define void @f21(i8 *%ptr, i8 %alt, i32 %limit) { ; CHECK: jhe [[LABEL:[^ ]*]] ; CHECK: lb %r3, 0(%r2) ; CHECK: [[LABEL]]: -; CHECK: cs {{%r[0-9]+}}, +; CHECK: stc %r3, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 %orig = load i8 *%ptr diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/cond-store-02.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/cond-store-02.ll index 91bc4860b384..4fbcdaba5103 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/cond-store-02.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/cond-store-02.ll @@ -347,11 +347,10 @@ define void @f19(i16 *%ptr, i16 %alt, i32 %limit) { define void @f20(i16 *%ptr, i16 %alt, i32 %limit) { ; FIXME: should use a normal load instead of CS. ; CHECK-LABEL: f20: -; CHECK: cs {{%r[0-9]+}}, -; CHECK: jl +; CHECK: lh {{%r[0-9]+}}, 0(%r2) ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] ; CHECK: [[LABEL]]: -; CHECK: sth {{%r[0-9]+}}, +; CHECK: sth {{%r[0-9]+}}, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 %orig = load atomic i16 *%ptr unordered, align 2 @@ -367,7 +366,7 @@ define void @f21(i16 *%ptr, i16 %alt, i32 %limit) { ; CHECK: jhe [[LABEL:[^ ]*]] ; CHECK: lh %r3, 0(%r2) ; CHECK: [[LABEL]]: -; CHECK: cs {{%r[0-9]+}}, +; CHECK: sth %r3, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 %orig = load i16 *%ptr diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/cond-store-03.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/cond-store-03.ll index d4fd48d61324..4b22555d0d60 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/cond-store-03.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/cond-store-03.ll @@ -272,7 +272,7 @@ define void @f15(i32 *%ptr, i32 %alt, i32 %limit) { define void @f16(i32 *%ptr, i32 %alt, i32 %limit) { ; FIXME: should use a normal load instead of CS. ; CHECK-LABEL: f16: -; CHECK: cs {{%r[0-5]}}, {{%r[0-5]}}, 0(%r2) +; CHECK: l {{%r[0-5]}}, 0(%r2) ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] ; CHECK: [[LABEL]]: ; CHECK: st {{%r[0-5]}}, 0(%r2) @@ -291,7 +291,7 @@ define void @f17(i32 *%ptr, i32 %alt, i32 %limit) { ; CHECK: jhe [[LABEL:[^ ]*]] ; CHECK: l %r3, 0(%r2) ; CHECK: [[LABEL]]: -; CHECK: cs {{%r[0-5]}}, %r3, 0(%r2) +; CHECK: st %r3, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 %orig = load i32 *%ptr diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/cond-store-04.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/cond-store-04.ll index fc565c432fff..346b51a17d78 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/cond-store-04.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/cond-store-04.ll @@ -164,7 +164,7 @@ define void @f9(i64 *%ptr, i64 %alt, i32 %limit) { define void @f10(i64 *%ptr, i64 %alt, i32 %limit) { ; FIXME: should use a normal load instead of CSG. ; CHECK-LABEL: f10: -; CHECK: csg {{%r[0-5]}}, {{%r[0-5]}}, 0(%r2) +; CHECK: lg {{%r[0-5]}}, 0(%r2) ; CHECK: {{jl|jnl}} [[LABEL:[^ ]*]] ; CHECK: [[LABEL]]: ; CHECK: stg {{%r[0-5]}}, 0(%r2) @@ -183,7 +183,7 @@ define void @f11(i64 *%ptr, i64 %alt, i32 %limit) { ; CHECK: jhe [[LABEL:[^ ]*]] ; CHECK: lg %r3, 0(%r2) ; CHECK: [[LABEL]]: -; CHECK: csg {{%r[0-5]}}, %r3, 0(%r2) +; CHECK: stg %r3, 0(%r2) ; CHECK: br %r14 %cond = icmp ult i32 %limit, 420 %orig = load i64 *%ptr diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll index 8d842164fa4f..94f4b7cb3709 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll @@ -346,3 +346,62 @@ store: exit: ret double %val } + +; Repeat f2 with a comparison against -0. +define float @f17(float %a, float %b, float *%dest) { +; CHECK-LABEL: f17: +; CHECK: aebr %f0, %f2 +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + %res = fadd float %a, %b + %cmp = fcmp olt float %res, -0.0 + br i1 %cmp, label %exit, label %store + +store: + store float %b, float *%dest + br label %exit + +exit: + ret float %res +} + +; Test another form of f7 in which the condition is based on the unnegated +; result. This is what InstCombine would produce. +define float @f18(float %dummy, float %a, float *%dest) { +; CHECK-LABEL: f18: +; CHECK: lnebr %f0, %f2 +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + %abs = call float @llvm.fabs.f32(float %a) + %res = fsub float -0.0, %abs + %cmp = fcmp ogt float %abs, 0.0 + br i1 %cmp, label %exit, label %store + +store: + store float %res, float *%dest + br label %exit + +exit: + ret float %res +} + +; Similarly for f8. +define float @f19(float %dummy, float %a, float *%dest) { +; CHECK-LABEL: f19: +; CHECK: lcebr %f0, %f2 +; CHECK-NEXT: jle .L{{.*}} +; CHECK: br %r14 +entry: + %res = fsub float -0.0, %a + %cmp = fcmp oge float %a, 0.0 + br i1 %cmp, label %exit, label %store + +store: + store float %res, float *%dest + br label %exit + +exit: + ret float %res +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/frame-13.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/frame-13.ll index 393850fbf617..58dee1da58b5 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/frame-13.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/frame-13.ll @@ -243,8 +243,8 @@ define void @f10(i32 *%vptr) { ; And again with maximum register pressure. The only spill slots that the ; NOFP case needs are the emergency ones, so the offsets are the same as for f2. -; However, the FP case uses %r11 as the frame pointer and must therefore -; spill a second register. This leads to an extra displacement of 8. +; The FP case needs to spill an extra register and is too dependent on +; register allocation heuristics for a stable test. define void @f11(i32 *%vptr) { ; CHECK-NOFP-LABEL: f11: ; CHECK-NOFP: stmg %r6, %r15, @@ -254,15 +254,6 @@ define void @f11(i32 *%vptr) { ; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15) ; CHECK-NOFP: lmg %r6, %r15, ; CHECK-NOFP: br %r14 -; -; CHECK-FP-LABEL: f11: -; CHECK-FP: stmg %r6, %r15, -; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11) -; CHECK-FP: lay [[REGISTER]], 4096(%r11) -; CHECK-FP: mvhi 8([[REGISTER]]), 42 -; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11) -; CHECK-FP: lmg %r6, %r15, -; CHECK-FP: br %r14 %i0 = load volatile i32 *%vptr %i1 = load volatile i32 *%vptr %i3 = load volatile i32 *%vptr diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/frame-14.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/frame-14.ll index 3b48179c40b6..24169cf61f00 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/frame-14.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/frame-14.ll @@ -266,8 +266,8 @@ define void @f10(i32 *%vptr) { ; And again with maximum register pressure. The only spill slots that the ; NOFP case needs are the emergency ones, so the offsets are the same as for f4. -; However, the FP case uses %r11 as the frame pointer and must therefore -; spill a second register. This leads to an extra displacement of 8. +; The FP case needs to spill an extra register and is too dependent on +; register allocation heuristics for a stable test. define void @f11(i32 *%vptr) { ; CHECK-NOFP-LABEL: f11: ; CHECK-NOFP: stmg %r6, %r15, @@ -278,16 +278,6 @@ define void @f11(i32 *%vptr) { ; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15) ; CHECK-NOFP: lmg %r6, %r15, ; CHECK-NOFP: br %r14 -; -; CHECK-FP-LABEL: f11: -; CHECK-FP: stmg %r6, %r15, -; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], [[OFFSET:160|168]](%r11) -; CHECK-FP: llilh [[REGISTER]], 8 -; CHECK-FP: agr [[REGISTER]], %r11 -; CHECK-FP: mvi 8([[REGISTER]]), 42 -; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11) -; CHECK-FP: lmg %r6, %r15, -; CHECK-FP: br %r14 %i0 = load volatile i32 *%vptr %i1 = load volatile i32 *%vptr %i3 = load volatile i32 *%vptr diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/insert-06.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/insert-06.ll index edcd0c5dccd2..81a9c8770708 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/insert-06.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/insert-06.ll @@ -178,3 +178,17 @@ define i64 @f14(i64 %a, i64 %b) { %ext = sext i1 %res to i64 ret i64 %ext } + +; Check another representation of f8. +define i64 @f15(i64 %a, i8 *%src) { +; CHECK-LABEL: f15: +; CHECK-NOT: {{%r[23]}} +; CHECK: lb %r2, 0(%r3) +; CHECK: br %r14 + %byte = load i8 *%src + %b = sext i8 %byte to i64 + %low = and i64 %b, 4294967295 + %high = and i64 %a, -4294967296 + %res = or i64 %high, %low + ret i64 %res +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-abs-01.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-abs-01.ll index 40fb61192c6e..053c347c0b75 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-abs-01.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-abs-01.ll @@ -81,3 +81,67 @@ define i64 @f7(i64 %val) { %res = select i1 %cmp, i64 %neg, i64 %val ret i64 %res } + +; Test another form of f6, which is that produced by InstCombine. +define i64 @f8(i64 %val) { +; CHECK-LABEL: f8: +; CHECK: lpgfr %r2, %r2 +; CHECK: br %r14 + %shl = shl i64 %val, 32 + %ashr = ashr i64 %shl, 32 + %neg = sub i64 0, %ashr + %cmp = icmp slt i64 %shl, 0 + %abs = select i1 %cmp, i64 %neg, i64 %ashr + ret i64 %abs +} + +; Try again with sle rather than slt. +define i64 @f9(i64 %val) { +; CHECK-LABEL: f9: +; CHECK: lpgfr %r2, %r2 +; CHECK: br %r14 + %shl = shl i64 %val, 32 + %ashr = ashr i64 %shl, 32 + %neg = sub i64 0, %ashr + %cmp = icmp sle i64 %shl, 0 + %abs = select i1 %cmp, i64 %neg, i64 %ashr + ret i64 %abs +} + +; Repeat f8 with the operands reversed. +define i64 @f10(i64 %val) { +; CHECK-LABEL: f10: +; CHECK: lpgfr %r2, %r2 +; CHECK: br %r14 + %shl = shl i64 %val, 32 + %ashr = ashr i64 %shl, 32 + %neg = sub i64 0, %ashr + %cmp = icmp sgt i64 %shl, 0 + %abs = select i1 %cmp, i64 %ashr, i64 %neg + ret i64 %abs +} + +; Try again with sge rather than sgt. +define i64 @f11(i64 %val) { +; CHECK-LABEL: f11: +; CHECK: lpgfr %r2, %r2 +; CHECK: br %r14 + %shl = shl i64 %val, 32 + %ashr = ashr i64 %shl, 32 + %neg = sub i64 0, %ashr + %cmp = icmp sge i64 %shl, 0 + %abs = select i1 %cmp, i64 %ashr, i64 %neg + ret i64 %abs +} + +; Repeat f5 with the comparison on the unextended value. +define i64 @f12(i32 %val) { +; CHECK-LABEL: f12: +; CHECK: lpgfr %r2, %r2 +; CHECK: br %r14 + %ext = sext i32 %val to i64 + %cmp = icmp slt i32 %val, 0 + %neg = sub i64 0, %ext + %abs = select i1 %cmp, i64 %neg, i64 %ext + ret i64 %abs +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-cmp-05.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-cmp-05.ll index f15b76bb87fe..0be43a3ef1bf 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-cmp-05.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-cmp-05.ll @@ -291,9 +291,22 @@ define i64 @f15(i32 *%ptr0) { ret i64 %sel9 } -; Check the comparison can be reversed if that allows CGF to be used. -define double @f16(double %a, double %b, i64 %i2, i32 *%ptr) { +; Check the comparison can be reversed if that allows CGFR to be used. +define double @f16(double %a, double %b, i64 %i1, i32 %unext) { ; CHECK-LABEL: f16: +; CHECK: cgfr %r2, %r3 +; CHECK-NEXT: jh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %i2 = sext i32 %unext to i64 + %cond = icmp slt i64 %i2, %i1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Likewise CGF. +define double @f17(double %a, double %b, i64 %i2, i32 *%ptr) { +; CHECK-LABEL: f17: ; CHECK: cgf %r2, 0(%r3) ; CHECK-NEXT: jh {{\.L.*}} ; CHECK: ldr %f0, %f2 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-cmp-06.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-cmp-06.ll index 8ab62e89ec39..82007e221766 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-cmp-06.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-cmp-06.ll @@ -341,9 +341,35 @@ define i64 @f19(i32 *%ptr0) { ret i64 %sel9 } -; Check the comparison can be reversed if that allows CLGF to be used. -define double @f20(double %a, double %b, i64 %i2, i32 *%ptr) { +; Check the comparison can be reversed if that allows CLGFR to be used. +define double @f20(double %a, double %b, i64 %i1, i32 %unext) { ; CHECK-LABEL: f20: +; CHECK: clgfr %r2, %r3 +; CHECK-NEXT: jh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %i2 = zext i32 %unext to i64 + %cond = icmp ult i64 %i2, %i1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; ...and again with the AND representation. +define double @f21(double %a, double %b, i64 %i1, i64 %unext) { +; CHECK-LABEL: f21: +; CHECK: clgfr %r2, %r3 +; CHECK-NEXT: jh +; CHECK: ldr %f0, %f2 +; CHECK: br %r14 + %i2 = and i64 %unext, 4294967295 + %cond = icmp ult i64 %i2, %i1 + %res = select i1 %cond, double %a, double %b + ret double %res +} + +; Check the comparison can be reversed if that allows CLGF to be used. +define double @f22(double %a, double %b, i64 %i2, i32 *%ptr) { +; CHECK-LABEL: f22: ; CHECK: clgf %r2, 0(%r3) ; CHECK-NEXT: jh {{\.L.*}} ; CHECK: ldr %f0, %f2 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-cmp-44.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-cmp-44.ll index ae0133f10860..822dcac3059f 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-cmp-44.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-cmp-44.ll @@ -797,3 +797,93 @@ store: exit: ret i32 %val } + +; Test f35 for in-register extensions. +define i64 @f39(i64 %dummy, i64 %a, i64 *%dest) { +; CHECK-LABEL: f39: +; CHECK: ltgfr %r2, %r3 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %r2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jh .L{{.*}} +; CHECK: br %r14 +entry: + %val = trunc i64 %a to i32 + %ext = sext i32 %val to i64 + call void asm sideeffect "blah $0", "{r2}"(i64 %ext) + %cmp = icmp sgt i64 %ext, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 %ext, i64 *%dest + br label %exit + +exit: + ret i64 %ext +} + +; ...and again with what InstCombine would produce for f40. +define i64 @f40(i64 %dummy, i64 %a, i64 *%dest) { +; CHECK-LABEL: f40: +; CHECK: ltgfr %r2, %r3 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %r2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jh .L{{.*}} +; CHECK: br %r14 +entry: + %shl = shl i64 %a, 32 + %ext = ashr i64 %shl, 32 + call void asm sideeffect "blah $0", "{r2}"(i64 %ext) + %cmp = icmp sgt i64 %shl, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 %ext, i64 *%dest + br label %exit + +exit: + ret i64 %ext +} + +; Try a form of f7 in which the subtraction operands are compared directly. +define i32 @f41(i32 %a, i32 %b, i32 *%dest) { +; CHECK-LABEL: f41: +; CHECK: s %r2, 0(%r4) +; CHECK-NEXT: jne .L{{.*}} +; CHECK: br %r14 +entry: + %cur = load i32 *%dest + %res = sub i32 %a, %cur + %cmp = icmp ne i32 %a, %cur + br i1 %cmp, label %exit, label %store + +store: + store i32 %b, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; A version of f32 that tests the unextended value. +define i64 @f42(i64 %base, i64 %index, i64 *%dest) { +; CHECK-LABEL: f42: +; CHECK: ltgf %r2, 0({{%r2,%r3|%r3,%r2}}) +; CHECK-NEXT: jh .L{{.*}} +; CHECK: br %r14 +entry: + %add = add i64 %base, %index + %ptr = inttoptr i64 %add to i32 * + %val = load i32 *%ptr + %res = sext i32 %val to i64 + %cmp = icmp sgt i32 %val, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 %res, i64 *%dest + br label %exit + +exit: + ret i64 %res +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-cmp-47.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-cmp-47.ll index 9ebcbfe525ba..038a25b2a6ed 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-cmp-47.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-cmp-47.ll @@ -232,3 +232,112 @@ store: exit: ret void } + +; Check a case where TMHH can be used to implement a ult comparison. +define void @f13(i64 %a) { +; CHECK-LABEL: f13: +; CHECK: tmhh %r2, 49152 +; CHECK: jno {{\.L.*}} +; CHECK: br %r14 +entry: + %cmp = icmp ult i64 %a, 13835058055282163712 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; And again with ule. +define void @f14(i64 %a) { +; CHECK-LABEL: f14: +; CHECK: tmhh %r2, 49152 +; CHECK: jno {{\.L.*}} +; CHECK: br %r14 +entry: + %cmp = icmp ule i64 %a, 13835058055282163711 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; And again with ugt. +define void @f15(i64 %a) { +; CHECK-LABEL: f15: +; CHECK: tmhh %r2, 49152 +; CHECK: jo {{\.L.*}} +; CHECK: br %r14 +entry: + %cmp = icmp ugt i64 %a, 13835058055282163711 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; And again with uge. +define void @f16(i64 %a) { +; CHECK-LABEL: f16: +; CHECK: tmhh %r2, 49152 +; CHECK: jo {{\.L.*}} +; CHECK: br %r14 +entry: + %cmp = icmp uge i64 %a, 13835058055282163712 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Decrease the constant from f13 to make TMHH invalid. +define void @f17(i64 %a) { +; CHECK-LABEL: f17: +; CHECK-NOT: tmhh +; CHECK: llihh {{%r[0-5]}}, 49151 +; CHECK-NOT: tmhh +; CHECK: br %r14 +entry: + %cmp = icmp ult i64 %a, 13834776580305453056 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} + +; Check that we don't use TMHH just to test the top bit. +define void @f18(i64 %a) { +; CHECK-LABEL: f18: +; CHECK-NOT: tmhh +; CHECK: cgijhe %r2, 0, +; CHECK: br %r14 +entry: + %cmp = icmp ult i64 %a, 9223372036854775808 + br i1 %cmp, label %exit, label %store + +store: + store i32 1, i32 *@g + br label %exit + +exit: + ret void +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-neg-02.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-neg-02.ll index e26194c162d4..7f3f6375129a 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-neg-02.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/int-neg-02.ll @@ -89,3 +89,136 @@ define i64 @f7(i64 %val) { %res = sub i64 0, %abs ret i64 %res } + +; Test another form of f6, which is that produced by InstCombine. +define i64 @f8(i64 %val) { +; CHECK-LABEL: f8: +; CHECK: lngfr %r2, %r2 +; CHECK: br %r14 + %shl = shl i64 %val, 32 + %ashr = ashr i64 %shl, 32 + %neg = sub i64 0, %ashr + %cmp = icmp slt i64 %shl, 0 + %abs = select i1 %cmp, i64 %neg, i64 %ashr + %res = sub i64 0, %abs + ret i64 %res +} + +; Try again with sle rather than slt. +define i64 @f9(i64 %val) { +; CHECK-LABEL: f9: +; CHECK: lngfr %r2, %r2 +; CHECK: br %r14 + %shl = shl i64 %val, 32 + %ashr = ashr i64 %shl, 32 + %neg = sub i64 0, %ashr + %cmp = icmp sle i64 %shl, 0 + %abs = select i1 %cmp, i64 %neg, i64 %ashr + %res = sub i64 0, %abs + ret i64 %res +} + +; Repeat f8 with the operands reversed. +define i64 @f10(i64 %val) { +; CHECK-LABEL: f10: +; CHECK: lngfr %r2, %r2 +; CHECK: br %r14 + %shl = shl i64 %val, 32 + %ashr = ashr i64 %shl, 32 + %neg = sub i64 0, %ashr + %cmp = icmp sgt i64 %shl, 0 + %abs = select i1 %cmp, i64 %ashr, i64 %neg + %res = sub i64 0, %abs + ret i64 %res +} + +; Try again with sge rather than sgt. +define i64 @f11(i64 %val) { +; CHECK-LABEL: f11: +; CHECK: lngfr %r2, %r2 +; CHECK: br %r14 + %shl = shl i64 %val, 32 + %ashr = ashr i64 %shl, 32 + %neg = sub i64 0, %ashr + %cmp = icmp sge i64 %shl, 0 + %abs = select i1 %cmp, i64 %ashr, i64 %neg + %res = sub i64 0, %abs + ret i64 %res +} + +; Repeat f8 with the negation coming from swapped operands. +define i64 @f12(i64 %val) { +; CHECK-LABEL: f12: +; CHECK: lngfr %r2, %r2 +; CHECK: br %r14 + %shl = shl i64 %val, 32 + %ashr = ashr i64 %shl, 32 + %neg = sub i64 0, %ashr + %cmp = icmp slt i64 %shl, 0 + %negabs = select i1 %cmp, i64 %ashr, i64 %neg + ret i64 %negabs +} + +; Likewise f9. +define i64 @f13(i64 %val) { +; CHECK-LABEL: f13: +; CHECK: lngfr %r2, %r2 +; CHECK: br %r14 + %shl = shl i64 %val, 32 + %ashr = ashr i64 %shl, 32 + %neg = sub i64 0, %ashr + %cmp = icmp sle i64 %shl, 0 + %negabs = select i1 %cmp, i64 %ashr, i64 %neg + ret i64 %negabs +} + +; Likewise f10. +define i64 @f14(i64 %val) { +; CHECK-LABEL: f14: +; CHECK: lngfr %r2, %r2 +; CHECK: br %r14 + %shl = shl i64 %val, 32 + %ashr = ashr i64 %shl, 32 + %neg = sub i64 0, %ashr + %cmp = icmp sgt i64 %shl, 0 + %negabs = select i1 %cmp, i64 %neg, i64 %ashr + ret i64 %negabs +} + +; Likewise f11. +define i64 @f15(i64 %val) { +; CHECK-LABEL: f15: +; CHECK: lngfr %r2, %r2 +; CHECK: br %r14 + %shl = shl i64 %val, 32 + %ashr = ashr i64 %shl, 32 + %neg = sub i64 0, %ashr + %cmp = icmp sge i64 %shl, 0 + %negabs = select i1 %cmp, i64 %neg, i64 %ashr + ret i64 %negabs +} + +; Repeat f5 with the comparison on the unextended value. +define i64 @f16(i32 %val) { +; CHECK-LABEL: f16: +; CHECK: lngfr %r2, %r2 +; CHECK: br %r14 + %ext = sext i32 %val to i64 + %cmp = icmp slt i32 %val, 0 + %neg = sub i64 0, %ext + %abs = select i1 %cmp, i64 %neg, i64 %ext + %res = sub i64 0, %abs + ret i64 %res +} + +; And again with the negation coming from swapped operands. +define i64 @f17(i32 %val) { +; CHECK-LABEL: f17: +; CHECK: lngfr %r2, %r2 +; CHECK: br %r14 + %ext = sext i32 %val to i64 + %cmp = icmp slt i32 %val, 0 + %neg = sub i64 0, %ext + %abs = select i1 %cmp, i64 %ext, i64 %neg + ret i64 %abs +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/risbg-01.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/risbg-01.ll index a4d11fdae5b9..e303067e539e 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/risbg-01.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/risbg-01.ll @@ -457,11 +457,22 @@ define i64 @f40(i64 %foo, i64 *%dest) { ret i64 %and } +; Check a case where the result is zero-extended. +define i64 @f41(i32 %a) { +; CHECK-LABEL: f41 +; CHECK: risbg %r2, %r2, 36, 191, 62 +; CHECK: br %r14 + %shl = shl i32 %a, 2 + %shr = lshr i32 %shl, 4 + %ext = zext i32 %shr to i64 + ret i64 %ext +} + ; In this case the sign extension is converted to a pair of 32-bit shifts, ; which is then extended to 64 bits. We previously used the wrong bit size ; when testing whether the shifted-in bits of the shift right were significant. -define i64 @f41(i1 %x) { -; CHECK-LABEL: f41: +define i64 @f42(i1 %x) { +; CHECK-LABEL: f42: ; CHECK: sll %r2, 31 ; CHECK: sra %r2, 31 ; CHECK: llgcr %r2, %r2 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/selectcc-01.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/selectcc-01.ll new file mode 100644 index 000000000000..a57444c831a5 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/selectcc-01.ll @@ -0,0 +1,178 @@ +; Test an i32 0/-1 SELECTCCC for every floating-point condition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test CC in { 0 } +define i32 @f1(float %a, float %b) { +; CHECK-LABEL: f1: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, -268435456 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp oeq float %a, %b + %res = select i1 %cond, i32 -1, i32 0 + ret i32 %res +} + +; Test CC in { 1 } +define i32 @f2(float %a, float %b) { +; CHECK-LABEL: f2: +; CHECK: ipm %r2 +; CHECK-NEXT: xilf %r2, 268435456 +; CHECK-NEXT: afi %r2, -268435456 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp olt float %a, %b + %res = select i1 %cond, i32 -1, i32 0 + ret i32 %res +} + +; Test CC in { 0, 1 } +define i32 @f3(float %a, float %b) { +; CHECK-LABEL: f3: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, -536870912 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp ole float %a, %b + %res = select i1 %cond, i32 -1, i32 0 + ret i32 %res +} + +; Test CC in { 2 } +define i32 @f4(float %a, float %b) { +; CHECK-LABEL: f4: +; CHECK: ipm %r2 +; CHECK-NEXT: xilf %r2, 268435456 +; CHECK-NEXT: afi %r2, 1342177280 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp ogt float %a, %b + %res = select i1 %cond, i32 -1, i32 0 + ret i32 %res +} + +; Test CC in { 0, 2 } +define i32 @f5(float %a, float %b) { +; CHECK-LABEL: f5: +; CHECK: ipm %r2 +; CHECK-NEXT: xilf %r2, 4294967295 +; CHECK-NEXT: sll %r2, 3 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp oge float %a, %b + %res = select i1 %cond, i32 -1, i32 0 + ret i32 %res +} + +; Test CC in { 1, 2 } +define i32 @f6(float %a, float %b) { +; CHECK-LABEL: f6: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, 268435456 +; CHECK-NEXT: sll %r2, 2 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp one float %a, %b + %res = select i1 %cond, i32 -1, i32 0 + ret i32 %res +} + +; Test CC in { 0, 1, 2 } +define i32 @f7(float %a, float %b) { +; CHECK-LABEL: f7: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, -805306368 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp ord float %a, %b + %res = select i1 %cond, i32 -1, i32 0 + ret i32 %res +} + +; Test CC in { 3 } +define i32 @f8(float %a, float %b) { +; CHECK-LABEL: f8: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, 1342177280 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp uno float %a, %b + %res = select i1 %cond, i32 -1, i32 0 + ret i32 %res +} + +; Test CC in { 0, 3 } +define i32 @f9(float %a, float %b) { +; CHECK-LABEL: f9: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, -268435456 +; CHECK-NEXT: sll %r2, 2 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp ueq float %a, %b + %res = select i1 %cond, i32 -1, i32 0 + ret i32 %res +} + +; Test CC in { 1, 3 } +define i32 @f10(float %a, float %b) { +; CHECK-LABEL: f10: +; CHECK: ipm %r2 +; CHECK-NEXT: sll %r2, 3 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp ult float %a, %b + %res = select i1 %cond, i32 -1, i32 0 + ret i32 %res +} + +; Test CC in { 0, 1, 3 } +define i32 @f11(float %a, float %b) { +; CHECK-LABEL: f11: +; CHECK: ipm %r2 +; CHECK-NEXT: xilf %r2, 268435456 +; CHECK-NEXT: afi %r2, -805306368 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp ule float %a, %b + %res = select i1 %cond, i32 -1, i32 0 + ret i32 %res +} + +; Test CC in { 2, 3 } +define i32 @f12(float %a, float %b) { +; CHECK-LABEL: f12: +; CHECK: ipm %r2 +; CHECK-NEXT: sll %r2, 2 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp ugt float %a, %b + %res = select i1 %cond, i32 -1, i32 0 + ret i32 %res +} + +; Test CC in { 0, 2, 3 } +define i32 @f13(float %a, float %b) { +; CHECK-LABEL: f13: +; CHECK: ipm %r2 +; CHECK-NEXT: xilf %r2, 268435456 +; CHECK-NEXT: afi %r2, 1879048192 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp uge float %a, %b + %res = select i1 %cond, i32 -1, i32 0 + ret i32 %res +} + +; Test CC in { 1, 2, 3 } +define i32 @f14(float %a, float %b) { +; CHECK-LABEL: f14: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, 1879048192 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp une float %a, %b + %res = select i1 %cond, i32 -1, i32 0 + ret i32 %res +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/selectcc-02.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/selectcc-02.ll new file mode 100644 index 000000000000..b1081a0621d6 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/selectcc-02.ll @@ -0,0 +1,178 @@ +; Test an i32 0/-1 SELECTCCC for every floating-point condition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test CC in { 1, 2, 3 } +define i32 @f1(float %a, float %b) { +; CHECK-LABEL: f1: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, 1879048192 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp oeq float %a, %b + %res = select i1 %cond, i32 0, i32 -1 + ret i32 %res +} + +; Test CC in { 0, 2, 3 } +define i32 @f2(float %a, float %b) { +; CHECK-LABEL: f2: +; CHECK: ipm %r2 +; CHECK-NEXT: xilf %r2, 268435456 +; CHECK-NEXT: afi %r2, 1879048192 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp olt float %a, %b + %res = select i1 %cond, i32 0, i32 -1 + ret i32 %res +} + +; Test CC in { 2, 3 } +define i32 @f3(float %a, float %b) { +; CHECK-LABEL: f3: +; CHECK: ipm %r2 +; CHECK-NEXT: sll %r2, 2 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp ole float %a, %b + %res = select i1 %cond, i32 0, i32 -1 + ret i32 %res +} + +; Test CC in { 0, 1, 3 } +define i32 @f4(float %a, float %b) { +; CHECK-LABEL: f4: +; CHECK: ipm %r2 +; CHECK-NEXT: xilf %r2, 268435456 +; CHECK-NEXT: afi %r2, -805306368 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp ogt float %a, %b + %res = select i1 %cond, i32 0, i32 -1 + ret i32 %res +} + +; Test CC in { 1, 3 } +define i32 @f5(float %a, float %b) { +; CHECK-LABEL: f5: +; CHECK: ipm %r2 +; CHECK-NEXT: sll %r2, 3 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp oge float %a, %b + %res = select i1 %cond, i32 0, i32 -1 + ret i32 %res +} + +; Test CC in { 0, 3 } +define i32 @f6(float %a, float %b) { +; CHECK-LABEL: f6: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, -268435456 +; CHECK-NEXT: sll %r2, 2 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp one float %a, %b + %res = select i1 %cond, i32 0, i32 -1 + ret i32 %res +} + +; Test CC in { 3 } +define i32 @f7(float %a, float %b) { +; CHECK-LABEL: f7: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, 1342177280 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp ord float %a, %b + %res = select i1 %cond, i32 0, i32 -1 + ret i32 %res +} + +; Test CC in { 0, 1, 2 } +define i32 @f8(float %a, float %b) { +; CHECK-LABEL: f8: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, -805306368 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp uno float %a, %b + %res = select i1 %cond, i32 0, i32 -1 + ret i32 %res +} + +; Test CC in { 1, 2 } +define i32 @f9(float %a, float %b) { +; CHECK-LABEL: f9: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, 268435456 +; CHECK-NEXT: sll %r2, 2 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp ueq float %a, %b + %res = select i1 %cond, i32 0, i32 -1 + ret i32 %res +} + +; Test CC in { 0, 2 } +define i32 @f10(float %a, float %b) { +; CHECK-LABEL: f10: +; CHECK: ipm %r2 +; CHECK-NEXT: xilf %r2, 4294967295 +; CHECK-NEXT: sll %r2, 3 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp ult float %a, %b + %res = select i1 %cond, i32 0, i32 -1 + ret i32 %res +} + +; Test CC in { 2 } +define i32 @f11(float %a, float %b) { +; CHECK-LABEL: f11: +; CHECK: ipm %r2 +; CHECK-NEXT: xilf %r2, 268435456 +; CHECK-NEXT: afi %r2, 1342177280 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp ule float %a, %b + %res = select i1 %cond, i32 0, i32 -1 + ret i32 %res +} + +; Test CC in { 0, 1 } +define i32 @f12(float %a, float %b) { +; CHECK-LABEL: f12: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, -536870912 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp ugt float %a, %b + %res = select i1 %cond, i32 0, i32 -1 + ret i32 %res +} + +; Test CC in { 1 } +define i32 @f13(float %a, float %b) { +; CHECK-LABEL: f13: +; CHECK: ipm %r2 +; CHECK-NEXT: xilf %r2, 268435456 +; CHECK-NEXT: afi %r2, -268435456 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp uge float %a, %b + %res = select i1 %cond, i32 0, i32 -1 + ret i32 %res +} + +; Test CC in { 0 } +define i32 @f14(float %a, float %b) { +; CHECK-LABEL: f14: +; CHECK: ipm %r2 +; CHECK-NEXT: afi %r2, -268435456 +; CHECK-NEXT: sra %r2, 31 +; CHECK: br %r14 + %cond = fcmp une float %a, %b + %res = select i1 %cond, i32 0, i32 -1 + ret i32 %res +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/selectcc-03.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/selectcc-03.ll new file mode 100644 index 000000000000..cafb4a2f1842 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/selectcc-03.ll @@ -0,0 +1,187 @@ +; Test an i64 0/-1 SELECTCCC for every floating-point condition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test CC in { 0 } +define i64 @f1(float %a, float %b) { +; CHECK-LABEL: f1: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: afi [[REG]], -268435456 +; CHECK-NEXT: sllg [[REG]], [[REG]], 32 +; CHECK-NEXT: srag %r2, [[REG]], 63 +; CHECK: br %r14 + %cond = fcmp oeq float %a, %b + %res = select i1 %cond, i64 -1, i64 0 + ret i64 %res +} + +; Test CC in { 1 } +define i64 @f2(float %a, float %b) { +; CHECK-LABEL: f2: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: xilf [[REG]], 268435456 +; CHECK-NEXT: afi [[REG]], -268435456 +; CHECK-NEXT: sllg [[REG]], [[REG]], 32 +; CHECK-NEXT: srag %r2, [[REG]], 63 +; CHECK: br %r14 + %cond = fcmp olt float %a, %b + %res = select i1 %cond, i64 -1, i64 0 + ret i64 %res +} + +; Test CC in { 0, 1 } +define i64 @f3(float %a, float %b) { +; CHECK-LABEL: f3: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: afi [[REG]], -536870912 +; CHECK-NEXT: sllg [[REG]], [[REG]], 32 +; CHECK-NEXT: srag %r2, [[REG]], 63 +; CHECK: br %r14 + %cond = fcmp ole float %a, %b + %res = select i1 %cond, i64 -1, i64 0 + ret i64 %res +} + +; Test CC in { 2 } +define i64 @f4(float %a, float %b) { +; CHECK-LABEL: f4: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: xilf [[REG]], 268435456 +; CHECK-NEXT: afi [[REG]], 1342177280 +; CHECK-NEXT: sllg [[REG]], [[REG]], 32 +; CHECK-NEXT: srag %r2, [[REG]], 63 +; CHECK: br %r14 + %cond = fcmp ogt float %a, %b + %res = select i1 %cond, i64 -1, i64 0 + ret i64 %res +} + +; Test CC in { 0, 2 } +define i64 @f5(float %a, float %b) { +; CHECK-LABEL: f5: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: xilf [[REG]], 4294967295 +; CHECK-NEXT: sllg [[REG]], [[REG]], 35 +; CHECK-NEXT: srag %r2, [[REG]], 63 +; CHECK: br %r14 + %cond = fcmp oge float %a, %b + %res = select i1 %cond, i64 -1, i64 0 + ret i64 %res +} + +; Test CC in { 1, 2 } +define i64 @f6(float %a, float %b) { +; CHECK-LABEL: f6: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: afi [[REG]], 268435456 +; CHECK-NEXT: sllg [[REG]], [[REG]], 34 +; CHECK-NEXT: srag %r2, [[REG]], 63 +; CHECK: br %r14 + %cond = fcmp one float %a, %b + %res = select i1 %cond, i64 -1, i64 0 + ret i64 %res +} + +; Test CC in { 0, 1, 2 } +define i64 @f7(float %a, float %b) { +; CHECK-LABEL: f7: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: afi [[REG]], -805306368 +; CHECK-NEXT: sllg [[REG]], [[REG]], 32 +; CHECK-NEXT: srag %r2, [[REG]], 63 +; CHECK: br %r14 + %cond = fcmp ord float %a, %b + %res = select i1 %cond, i64 -1, i64 0 + ret i64 %res +} + +; Test CC in { 3 } +define i64 @f8(float %a, float %b) { +; CHECK-LABEL: f8: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: afi [[REG]], 1342177280 +; CHECK-NEXT: sllg [[REG]], [[REG]], 32 +; CHECK-NEXT: srag %r2, [[REG]], 63 +; CHECK: br %r14 + %cond = fcmp uno float %a, %b + %res = select i1 %cond, i64 -1, i64 0 + ret i64 %res +} + +; Test CC in { 0, 3 } +define i64 @f9(float %a, float %b) { +; CHECK-LABEL: f9: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: afi [[REG]], -268435456 +; CHECK-NEXT: sllg [[REG]], [[REG]], 34 +; CHECK-NEXT: srag %r2, [[REG]], 63 +; CHECK: br %r14 + %cond = fcmp ueq float %a, %b + %res = select i1 %cond, i64 -1, i64 0 + ret i64 %res +} + +; Test CC in { 1, 3 } +define i64 @f10(float %a, float %b) { +; CHECK-LABEL: f10: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: sllg [[REG]], [[REG]], 35 +; CHECK-NEXT: srag %r2, [[REG]], 63 +; CHECK: br %r14 + %cond = fcmp ult float %a, %b + %res = select i1 %cond, i64 -1, i64 0 + ret i64 %res +} + +; Test CC in { 0, 1, 3 } +define i64 @f11(float %a, float %b) { +; CHECK-LABEL: f11: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: xilf [[REG]], 268435456 +; CHECK-NEXT: afi [[REG]], -805306368 +; CHECK-NEXT: sllg [[REG]], [[REG]], 32 +; CHECK-NEXT: srag %r2, [[REG]], 63 +; CHECK: br %r14 + %cond = fcmp ule float %a, %b + %res = select i1 %cond, i64 -1, i64 0 + ret i64 %res +} + +; Test CC in { 2, 3 } +define i64 @f12(float %a, float %b) { +; CHECK-LABEL: f12: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: sllg [[REG]], [[REG]], 34 +; CHECK-NEXT: srag %r2, [[REG]], 63 +; CHECK: br %r14 + %cond = fcmp ugt float %a, %b + %res = select i1 %cond, i64 -1, i64 0 + ret i64 %res +} + +; Test CC in { 0, 2, 3 } +define i64 @f13(float %a, float %b) { +; CHECK-LABEL: f13: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: xilf [[REG]], 268435456 +; CHECK-NEXT: afi [[REG]], 1879048192 +; CHECK-NEXT: sllg [[REG]], [[REG]], 32 +; CHECK-NEXT: srag %r2, [[REG]], 63 +; CHECK: br %r14 + %cond = fcmp uge float %a, %b + %res = select i1 %cond, i64 -1, i64 0 + ret i64 %res +} + +; Test CC in { 1, 2, 3 } +define i64 @f14(float %a, float %b) { +; CHECK-LABEL: f14: +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK-NEXT: afi [[REG]], 1879048192 +; CHECK-NEXT: sllg [[REG]], [[REG]], 32 +; CHECK-NEXT: srag %r2, [[REG]], 63 +; CHECK: br %r14 + %cond = fcmp une float %a, %b + %res = select i1 %cond, i64 -1, i64 0 + ret i64 %res +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/serialize-01.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/serialize-01.ll new file mode 100644 index 000000000000..7801fac8d472 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/serialize-01.ll @@ -0,0 +1,21 @@ +; Test serialization instructions. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \ +; RUN: FileCheck %s -check-prefix=CHECK-FULL +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | \ +; RUN: FileCheck %s -check-prefix=CHECK-FAST + +; Check that volatile loads produce a serialisation. +define i32 @f1(i32 *%src) { +; CHECK-FULL-LABEL: f1: +; CHECK-FULL: bcr 15, %r0 +; CHECK-FULL: l %r2, 0(%r2) +; CHECK-FULL: br %r14 +; +; CHECK-FAST-LABEL: f1: +; CHECK-FAST: bcr 14, %r0 +; CHECK-FAST: l %r2, 0(%r2) +; CHECK-FAST: br %r14 + %val = load volatile i32 *%src + ret i32 %val +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/spill-01.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/spill-01.ll index ca64a88f2a0d..c1f780c55d3c 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/spill-01.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/SystemZ/spill-01.ll @@ -400,6 +400,7 @@ define void @f10() { ; CHECK: stgrl [[REG]], h8 ; CHECK: br %r14 entry: + %val8 = load volatile i64 *@h8 %val0 = load volatile i64 *@h0 %val1 = load volatile i64 *@h1 %val2 = load volatile i64 *@h2 @@ -408,7 +409,6 @@ entry: %val5 = load volatile i64 *@h5 %val6 = load volatile i64 *@h6 %val7 = load volatile i64 *@h7 - %val8 = load volatile i64 *@h8 %val9 = load volatile i64 *@h9 call void @foo() diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll index 1c40ca44b155..b87bf24993a1 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll @@ -47,6 +47,7 @@ declare double @sqrt(double) nounwind readonly declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!5} +!llvm.module.flags = !{!104} !0 = metadata !{i32 46, i32 0, metadata !1, null} !1 = metadata !{i32 524299, metadata !101, metadata !2, i32 44, i32 0, i32 0} ; [ DW_TAG_lexical_block ] !2 = metadata !{i32 524299, metadata !101, metadata !3, i32 44, i32 0, i32 0} ; [ DW_TAG_lexical_block ] @@ -151,3 +152,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !101 = metadata !{metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src"} !102 = metadata !{i32 0} !103 = metadata !{metadata !3} +!104 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb/triple.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb/triple.ll new file mode 100644 index 000000000000..0a1759f081ba --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb/triple.ll @@ -0,0 +1,7 @@ +; RUN: llc < %s -mtriple=thumb | FileCheck %s + +; CHECK: .code 16 + +define void @f() { + ret void +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb/unord.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb/unord.ll index 39458ae7b7bc..41a002efd651 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb/unord.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb/unord.ll @@ -1,13 +1,20 @@ -; RUN: llc < %s -march=thumb | grep bne | count 1 -; RUN: llc < %s -march=thumb | grep beq | count 1 +; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s define i32 @f1(float %X, float %Y) { +; CHECK-LABEL _f1: +; CHECK: bne +; CHECK: .data_region +; CHECK: .long ___unordsf2 %tmp = fcmp uno float %X, %Y %retval = select i1 %tmp, i32 1, i32 -1 ret i32 %retval } define i32 @f2(float %X, float %Y) { +; CHECK-LABEL _f2: +; CHECK: beq +; CHECK: .data_region +; CHECK: .long ___unordsf2 %tmp = fcmp ord float %X, %Y %retval = select i1 %tmp, i32 1, i32 -1 ret i32 %retval diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/bfx.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/bfx.ll index 489349d61552..e380b8f858aa 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/bfx.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/bfx.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @sbfx1(i32 %a) { ; CHECK: sbfx1 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/carry.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/carry.ll index da1902b7e0f8..48fba4ed4a91 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/carry.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/carry.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i64 @f1(i64 %a, i64 %b) { entry: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/div.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/div.ll index 003d71797ab1..e783c8892397 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/div.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/div.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumb-apple-darwin -mattr=+thumb2 \ +; RUN: llc < %s -mtriple=thumb-apple-darwin -mcpu=arm1156t2-s -mattr=+thumb2 \ ; RUN: | FileCheck %s -check-prefix=CHECK-THUMB ; RUN: llc < %s -march=thumb -mcpu=cortex-m3 -mattr=+thumb2 \ ; RUN: | FileCheck %s -check-prefix=CHECK-THUMBV7M diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/large-stack.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/large-stack.ll index 36f3ce2eaa88..8d79da7982b1 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/large-stack.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/large-stack.ll @@ -1,5 +1,7 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN -; RUN: llc < %s -march=thumb -mattr=+thumb2 -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=LINUX +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 \ +; RUN: -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 \ +; RUN: -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=LINUX define void @test1() { ; DARWIN-LABEL: test1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/longMACt.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/longMACt.ll index a457333d978f..abe65f2248be 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/longMACt.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/longMACt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; Check generated signed and unsigned multiply accumulate long. define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/mul_const.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/mul_const.ll index 488f4d13a0eb..41de47731da5 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/mul_const.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/mul_const.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; rdar://7069502 define i32 @t1(i32 %v) nounwind readnone { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-adc.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-adc.ll index 7c34cfdef3f9..58e4c59c8f9a 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-adc.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-adc.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; 734439407618 = 0x000000ab00000002 define i64 @f1(i64 %a) { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add.ll index c23c74a1682e..5e81fcfb49ef 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @t2ADDrc_255(i32 %lhs) { ; CHECK-LABEL: t2ADDrc_255: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add2.ll index 3bbc3bf812ad..ff0e0879e9da 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; 171 = 0x000000ab define i32 @f1(i32 %a) { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add3.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add3.ll index 6cd818c03e11..bb7788f2b76c 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add3.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a) { %tmp = add i32 %a, 4095 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add4.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add4.ll index 8b957114835d..ed68d62a9f40 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add4.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add4.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; 171 = 0x000000ab define i64 @f1(i64 %a) { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add5.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add5.ll index beaa09e1e69e..7ef756fa18bd 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add5.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add5.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a, i32 %b) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add6.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add6.ll index 0d2f12249956..c4a13be7ffef 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add6.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-add6.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i64 @f1(i64 %a, i64 %b) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-and.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-and.ll index c9578d9d7d21..3ffcfd706dab 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-and.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-and.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a, i32 %b) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-and2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-and2.ll index c0501ab8ad37..3bfe9b265d09 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-and2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-and2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; 171 = 0x000000ab define i32 @f1(i32 %a) { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-asr.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-asr.ll index ba782dde1034..fbe39718f5d8 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-asr.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-asr.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a, i32 %b) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-asr2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-asr2.ll index 3685badcafdf..321b3f5ffe38 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-asr2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-asr2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-bcc.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-bcc.ll index 81f7de9ae39c..61171acd0a04 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-bcc.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-bcc.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; If-conversion defeats the purpose of this test, which is to check CBZ ; generation, so use memory barrier instruction to make sure it doesn't ; happen and we get actual branches. diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-bfc.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-bfc.ll index 327b6d1a503a..844fb4a1a637 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-bfc.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-bfc.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; 4278190095 = 0xff00000f define i32 @f1(i32 %a) { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-bic.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-bic.ll index 5938fa19a3c4..fc57ec8c16a8 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-bic.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-bic.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a, i32 %b) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-clz.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-clz.ll index dbdaae29eaef..a5cd0747f153 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-clz.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-clz.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2,+v7 | FileCheck %s define i32 @f1(i32 %a) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-cmn.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-cmn.ll index 8bcaa7e8209e..da7d4b1ec31f 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-cmn.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-cmn.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; These tests could be improved by 'movs r0, #0' being rematerialized below the ; test as 'mov.w r0, #0'. diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-cmn2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-cmn2.ll index f5db728d46a4..a09a1493e980 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-cmn2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-cmn2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; -0x000000bb = 4294967109 define i1 @f1(i32 %a) { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-cmp.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-cmp.ll index 87413444ca3b..06c611da4295 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-cmp.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-cmp.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; These tests would be improved by 'movs r0, #0' being rematerialized below the ; test as 'mov.w r0, #0'. diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-cmp2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-cmp2.ll index 5b880f16deb5..8ca3caf0a088 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-cmp2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-cmp2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; These tests would be improved by 'movs r0, #0' being rematerialized below the ; test as 'mov.w r0, #0'. diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-eor.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-eor.ll index b3e323c10d2e..6dfc5cd5de69 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-eor.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-eor.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a, i32 %b) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-eor2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-eor2.ll index 5daa13df655d..cf27448e36cf 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-eor2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-eor2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; 0x000000bb = 187 define i32 @f1(i32 %a) { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-jtb.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-jtb.ll index 0748b9b32d9a..11620c2f1f92 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-jtb.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-jtb.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 -arm-adjust-jump-tables=0 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 -arm-adjust-jump-tables=0 | FileCheck %s ; Do not use tbb / tbh if any destination is before the jumptable. ; rdar://7102917 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldr.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldr.ll index 7f68f661fa9a..09212d34a29e 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldr.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldr.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32* %v) { entry: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldr_ext.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldr_ext.ll index 9e6aef4e0974..b865cf428170 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldr_ext.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldr_ext.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrb | count 1 -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrh | count 1 -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrsb | count 1 -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrsh | count 1 +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | grep ldrb | count 1 +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | grep ldrh | count 1 +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | grep ldrsb | count 1 +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | grep ldrsh | count 1 define i32 @test1(i8* %v.pntr.s0.u1) { %tmp.u = load i8* %v.pntr.s0.u1 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldr_post.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldr_post.ll index bce847471beb..4f04647d7af7 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldr_post.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldr_post.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @test(i32 %a, i32 %b, i32 %c) { %tmp1 = mul i32 %a, %b ; [#uses=2] diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldr_pre.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldr_pre.ll index 601c0b560800..4907dec19dfb 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldr_pre.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldr_pre.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | \ +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | \ ; RUN: grep "ldr.*\!" | count 3 -; RUN: llc < %s -march=thumb -mattr=+thumb2 | \ +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | \ ; RUN: grep "ldrsb.*\!" | count 1 define i32* @test1(i32* %X, i32* %dest) { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldrb.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldrb.ll index c135effd796b..c79f7327b150 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldrb.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldrb.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i8 @f1(i8* %v) { entry: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldrh.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldrh.ll index 99f6aba65cf0..7ba9f2230d43 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldrh.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ldrh.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i16 @f1(i16* %v) { entry: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-lsl.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-lsl.ll index 1b4853853a4e..015a9dd471a4 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-lsl.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-lsl.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-lsl2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-lsl2.ll index bc0978e68241..c64897a11580 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-lsl2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-lsl2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a, i32 %b) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-lsr.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-lsr.ll index a3b207c1f90b..24973c7b5331 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-lsr.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-lsr.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-lsr2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-lsr2.ll index ae55735fabbc..0b199bb46b27 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-lsr2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-lsr2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a, i32 %b) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-lsr3.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-lsr3.ll index e7ba782afe6a..c81412317ccb 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-lsr3.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-lsr3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i1 @test1(i64 %poscnt, i32 %work) { entry: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mla.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mla.ll index 709fa13dd3a1..a99ffe7a7668 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mla.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mla.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s -; RUN: llc < %s -march=thumb -mattr=+thumb2 -arm-use-mulops=false | FileCheck %s -check-prefix=NO_MULOPS +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 \ +; RUN: -arm-use-mulops=false | FileCheck %s -check-prefix=NO_MULOPS define i32 @f1(i32 %a, i32 %b, i32 %c) { %tmp1 = mul i32 %a, %b diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mls.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mls.ll index 86e147b24018..45d6d135c6d8 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mls.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mls.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a, i32 %b, i32 %c) { %tmp1 = mul i32 %a, %b diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mov.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mov.ll index 148bafec4014..7c0dc01ccf16 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mov.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mov.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; Test # diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mul.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mul.ll index a989989b43f7..5f6825072d4c 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mul.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mul.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mulhi.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mulhi.ll index 9d4840a2deb8..e32bd26ae9b7 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mulhi.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mulhi.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2dsp | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2,+t2dsp | FileCheck %s define i32 @smulhi(i32 %x, i32 %y) { ; CHECK: smulhi diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mvn2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mvn2.ll index bce54a352e80..cee6f235a2ac 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mvn2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-mvn2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-neg.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-neg.ll index 40e809862140..491e4de93157 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-neg.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-neg.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-orn.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-orn.ll index 5bbe653cd12e..08676b1e34e6 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-orn.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-orn.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a, i32 %b) { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-orn2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-orn2.ll index eff3ae38a056..a8f4a84e65e1 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-orn2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-orn2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; 0x000000bb = 187 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-orr.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-orr.ll index 13ed8620059b..776d7fe790ae 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-orr.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-orr.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a, i32 %b) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-orr2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-orr2.ll index 837bb1cb07c1..37885e277c8a 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-orr2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-orr2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; 0x000000bb = 187 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-pack.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-pack.ll index 1052dd2a072e..9a0d8892ccea 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-pack.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-pack.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk | FileCheck %s ; CHECK: test1 ; CHECK: pkhbt r0, r0, r1, lsl #16 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-rev.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-rev.ll index 67cd62362fe9..d71011382a74 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-rev.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-rev.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7,+t2xtpk | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2,+v7,+t2xtpk | FileCheck %s define i32 @f1(i32 %a) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-rev16.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-rev16.ll index 10cd5391a48d..3e2658741b6f 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-rev16.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-rev16.ll @@ -1,7 +1,7 @@ ; XFAIL: * ; fixme rev16 pattern is not matching -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep "rev16\W*r[0-9]*,\W*r[0-9]*" | count 1 +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | grep "rev16\W*r[0-9]*,\W*r[0-9]*" | count 1 ; 0xff00ff00 = 4278255360 ; 0x00ff00ff = 16711935 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ror.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ror.ll index 2a218eae9752..3a21560ae8b1 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ror.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-ror.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; RUN: llc < %s -march=thumb | FileCheck %s -check-prefix=THUMB1 ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-rsb.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-rsb.ll index 150a25f51b54..94a1fb0a4a1a 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-rsb.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-rsb.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a, i32 %b) { %tmp = shl i32 %b, 5 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-rsb2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-rsb2.ll index 15aa8af3b83b..248ab165a492 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-rsb2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-rsb2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; 171 = 0x000000ab define i32 @f1(i32 %a) { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sbc.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sbc.ll index 0c37984ba3e7..7c69451b1bd7 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sbc.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sbc.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=thumb -mattr=+thumb2 < %s | FileCheck %s +; RUN: llc -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 < %s | FileCheck %s define i64 @f1(i64 %a, i64 %b) { ; CHECK: f1 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-select.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-select.ll index 5f5fa1992516..949b6116ca38 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-select.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-select.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 -show-mc-encoding | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 -show-mc-encoding | FileCheck %s define i32 @f1(i32 %a.s) { entry: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-select_xform.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-select_xform.ll index ed4d26d746cb..f8ceba220aec 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-select_xform.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-select_xform.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK: t1 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-smla.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-smla.ll index aaaedfa42e74..f96263ee9ce7 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-smla.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-smla.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk,+t2dsp | FileCheck %s -; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk,+t2dsp -arm-use-mulops=false | FileCheck %s -check-prefix=NO_MULOPS +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+t2dsp | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+t2dsp -arm-use-mulops=false | FileCheck %s -check-prefix=NO_MULOPS define i32 @f3(i32 %a, i16 %x, i32 %y) { ; CHECK: f3 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-smul.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-smul.ll index 7a13269615d4..742e7662b91c 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-smul.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-smul.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk,+t2dsp | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+t2dsp | FileCheck %s @x = weak global i16 0 ; [#uses=1] @y = weak global i16 0 ; [#uses=0] diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-str.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-str.ll index fb5fa168e8b8..f800974ed991 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-str.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-str.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a, i32* %v) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-str_post.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-str_post.ll index 2133d2807006..716c2d2e240e 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-str_post.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-str_post.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i16 @test1(i32* %X, i16* %A) { ; CHECK-LABEL: test1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-str_pre.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-str_pre.ll index 1e6616a91cc5..83b3779c1d6e 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-str_pre.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-str_pre.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define void @test1(i32* %X, i32* %A, i32** %dest) { ; CHECK: test1 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-strb.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-strb.ll index cc39b7d585c5..39e376d1a735 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-strb.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-strb.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i8 @f1(i8 %a, i8* %v) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-strh.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-strh.ll index d68693830518..944438361138 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-strh.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-strh.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i16 @f1(i16 %a, i16* %v) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sub.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sub.ll index f83dfe2e00a4..ad5eda187769 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sub.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sub.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; 171 = 0x000000ab define i32 @f1(i32 %a) { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sub2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sub2.ll index 47eb1e1a36cf..f11489259b9b 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sub2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sub2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a) { %tmp = sub i32 %a, 4095 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sub3.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sub3.ll index 1dbda57f2369..ae12b28e93c3 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sub3.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sub3.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=thumb -mattr=+thumb2 < %s | FileCheck %s +; RUN: llc -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 < %s | FileCheck %s ; 171 = 0x000000ab define i64 @f1(i64 %a) { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sub4.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sub4.ll index ff1441ac64dd..873080a2cad1 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sub4.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sub4.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a, i32 %b) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sub5.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sub5.ll index 5941dd6ec89f..02c83f6629d5 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sub5.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sub5.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 -mattr=+32bit | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2,+32bit \ +; RUN: | FileCheck %s define i64 @f1(i64 %a, i64 %b) { ; CHECK-LABEL: f1: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sxt_rot.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sxt_rot.ll index f3d0edf0c578..75bbd83558a0 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sxt_rot.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-sxt_rot.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk | FileCheck %s define i32 @test0(i8 %A) { ; CHECK: test0 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-teq.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-teq.ll index 5acda35b4948..6b34e703f3d3 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-teq.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-teq.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; These tests would be improved by 'movs r0, #0' being rematerialized below the ; test as 'mov.w r0, #0'. diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-teq2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-teq2.ll index 27ecad839399..ea43e5606caf 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-teq2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-teq2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; These tests would be improved by 'movs r0, #0' being rematerialized below the ; tst as 'mov.w r0, #0'. diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-tst.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-tst.ll index 31eafea614de..c17510d42d54 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-tst.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-tst.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; These tests would be improved by 'movs r0, #0' being rematerialized below the ; tst as 'mov.w r0, #0'. diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-tst2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-tst2.ll index f71e91d1e9de..764e3d4d3832 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-tst2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/thumb2-tst2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s +; RUN: llc < %s -march=thumb -mcpu=arm1156t2-s -mattr=+thumb2 | FileCheck %s ; These tests would be improved by 'movs r0, #0' being rematerialized below the ; tst as 'mov.w r0, #0'. diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/tls1.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/tls1.ll index d91e3b32f9b7..40973562d2b9 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/tls1.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/tls1.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | \ -; RUN: grep "i(tpoff)" +; RUN: grep "i(TPOFF)" ; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | \ ; RUN: grep "__aeabi_read_tp" ; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi \ diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/tls2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/tls2.ll index 6cb019ff00ec..e6bed2f65a49 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/tls2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/Thumb2/tls2.ll @@ -8,7 +8,7 @@ entry: ; CHECK-NOT-PIC-LABEL: f: ; CHECK-NOT-PIC: add r0, pc ; CHECK-NOT-PIC: ldr r1, [r0] -; CHECK-NOT-PIC: i(gottpoff) +; CHECK-NOT-PIC: i(GOTTPOFF) ; CHECK-PIC-LABEL: f: ; CHECK-PIC: bl __tls_get_addr(PLT) @@ -21,7 +21,7 @@ entry: ; CHECK-NOT-PIC-LABEL: g: ; CHECK-NOT-PIC: add r0, pc ; CHECK-NOT-PIC: ldr r1, [r0] -; CHECK-NOT-PIC: i(gottpoff) +; CHECK-NOT-PIC: i(GOTTPOFF) ; CHECK-PIC-LABEL: g: ; CHECK-PIC: bl __tls_get_addr(PLT) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll index d906da43fe11..1b3fc382e890 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | \ +; RUN: llc < %s -march=x86 -mcpu=generic | \ ; RUN: grep shld | count 1 ; ; Check that the isel does not fold the shld, which already folds a load diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2009-06-05-VZextByteShort.ll index 5f5d5cccf714..50c62dfb73b8 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2009-06-05-VZextByteShort.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2009-06-05-VZextByteShort.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 > %t1 +; RUN: llc < %s -march=x86 -mcpu=core2 > %t1 ; RUN: grep movzwl %t1 | count 2 ; RUN: grep movzbl %t1 | count 1 ; RUN: grep movd %t1 | count 4 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-01-18-DbgValue.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-01-18-DbgValue.ll index 0b5a2299cb7f..f99e68242811 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-01-18-DbgValue.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-01-18-DbgValue.ll @@ -29,6 +29,7 @@ return: ; preds = %entry declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !llvm.dbg.cu = !{!3} +!llvm.module.flags = !{!21} !0 = metadata !{i32 786689, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ] !1 = metadata !{i32 786478, metadata !19, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 11} ; [ DW_TAG_subprogram ] @@ -51,3 +52,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !18 = metadata !{metadata !1} !19 = metadata !{metadata !"b2.c", metadata !"/tmp/"} !20 = metadata !{i32 0} +!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll index 1023744e0767..7faee993a7d1 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll @@ -199,6 +199,7 @@ declare float @copysignf(float, float) nounwind readnone declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!3} +!llvm.module.flags = !{!48} !0 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ] !1 = metadata !{i32 786478, metadata !45, metadata !2, metadata !"__divsc3", metadata !"__divsc3", metadata !"__divsc3", i32 1922, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, %0 (float, float, float, float)* @__divsc3, null, null, metadata !43, i32 1922} ; [ DW_TAG_subprogram ] @@ -248,3 +249,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !45 = metadata !{metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc"} !46 = metadata !{metadata !"libgcc2.h", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc"} !47 = metadata !{i32 0} +!48 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll index 43582bbb725e..c5736eb9b449 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll @@ -22,6 +22,7 @@ declare void @foo(i32) nounwind optsize noinline ssp declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!38} !0 = metadata !{i32 786484, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 7, metadata !3, i1 false, i1 true, null, null} ; [ DW_TAG_variable ] !1 = metadata !{i32 786473, metadata !36} ; [ DW_TAG_file_type ] @@ -86,3 +87,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone ; CHECK-NEXT: Ltmp{{.*}}: ; CHECK-NEXT: .byte 83 ; CHECK-NEXT: Ltmp{{.*}}: +!38 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-05-28-Crash.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-05-28-Crash.ll index 7eff37210ffe..1114c8dc87bb 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-05-28-Crash.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-05-28-Crash.ll @@ -23,6 +23,7 @@ entry: } !llvm.dbg.cu = !{!3} +!llvm.module.flags = !{!20} !0 = metadata !{i32 786689, metadata !1, metadata !"y", metadata !2, i32 2, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ] !1 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo, null, null, metadata !15, i32 2} ; [ DW_TAG_subprogram ] @@ -48,3 +49,4 @@ entry: ;CHECK: DEBUG_VALUE: bar:x <- E ;CHECK: Ltmp ;CHECK: DEBUG_VALUE: foo:y <- 1{{$}} +!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll index 302f433473b1..b45ac226a650 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll @@ -20,6 +20,7 @@ entry: declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!4} +!llvm.module.flags = !{!34} !llvm.dbg.lv = !{!0, !14, !15, !16, !17, !24, !25, !28} !0 = metadata !{i32 786689, metadata !1, metadata !"this", metadata !3, i32 11, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ] @@ -56,3 +57,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !31 = metadata !{metadata !"foo.cp", metadata !"/tmp/"} !32 = metadata !{i32 0} !33 = metadata !{metadata !1, metadata !8, metadata !18} +!34 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-08-04-StackVariable.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-08-04-StackVariable.ll index d00155234646..91fec3beefcb 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-08-04-StackVariable.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-08-04-StackVariable.ll @@ -75,6 +75,7 @@ return: ; preds = %entry declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!3} +!llvm.module.flags = !{!49} !46 = metadata !{metadata !0, metadata !9, metadata !16, metadata !17, metadata !20} !0 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 11} ; [ DW_TAG_subprogram ] @@ -125,3 +126,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !45 = metadata !{i32 27, i32 0, metadata !39, null} !47 = metadata !{metadata !"small.cc", metadata !"/Users/manav/R8248330"} !48 = metadata !{i32 0} +!49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-09-16-EmptyFilename.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-09-16-EmptyFilename.ll index cbf357a0c109..9aa41c32c366 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-09-16-EmptyFilename.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-09-16-EmptyFilename.ll @@ -13,6 +13,7 @@ entry: } !llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!17} !0 = metadata !{i32 786478, metadata !14, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 53, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] !1 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ] @@ -31,3 +32,4 @@ entry: !14 = metadata !{metadata !"", metadata !"/private/tmp"} !15 = metadata !{metadata !"bug.c", metadata !"/private/tmp"} !16 = metadata !{i32 0} +!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-11-02-DbgParameter.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-11-02-DbgParameter.ll index 5fd3fd31ee23..21ac7c9079e8 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-11-02-DbgParameter.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-11-02-DbgParameter.ll @@ -16,6 +16,7 @@ entry: declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!19} !0 = metadata !{i32 786478, metadata !17, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo, null, null, metadata !16, i32 3} ; [ DW_TAG_subprogram ] !1 = metadata !{i32 786473, metadata !17} ; [ DW_TAG_file_type ] @@ -36,3 +37,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !16 = metadata !{metadata !6} !17 = metadata !{metadata !"one.c", metadata !"/private/tmp"} !18 = metadata !{i32 0} +!19 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-12-02-MC-Set.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-12-02-MC-Set.ll index 255f7f5bbf80..5a407d3f9972 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-12-02-MC-Set.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2010-12-02-MC-Set.ll @@ -7,6 +7,7 @@ entry: } !llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!10} !7 = metadata !{metadata !0} !0 = metadata !{i32 786478, metadata !9, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] @@ -23,3 +24,4 @@ entry: ; CHECK-NEXT: __debug_line ; CHECK-NEXT: Lline_table_start0 ; CHECK-NEXT: Ltmp{{[0-9]}} = (Ltmp +!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll index 4b33f0c7d403..d5340300df54 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll @@ -70,6 +70,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone declare i32 @puts(i8* nocapture) nounwind !llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!33} !0 = metadata !{i32 786478, metadata !31, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i64 (i64, i64)* @gcd, null, null, metadata !29, i32 0} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 0] [gcd] !1 = metadata !{i32 786473, metadata !31} ; [ DW_TAG_file_type ] @@ -104,3 +105,4 @@ declare i32 @puts(i8* nocapture) nounwind !30 = metadata !{metadata !14, metadata !17} !31 = metadata !{metadata !"rem_small.c", metadata !"/private/tmp"} !32 = metadata !{i32 0} +!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2011-12-28-vselecti8.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2011-12-28-vselecti8.ll index dbc122ac6e40..c91646640b8f 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2011-12-28-vselecti8.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2011-12-28-vselecti8.ll @@ -3,10 +3,20 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-darwin11.2.0" -; CHECK: @foo8 -; CHECK: psll -; CHECK: psraw -; CHECK: pblendvb +; During legalization, the vselect mask is 'type legalized' into a +; wider BUILD_VECTOR. This causes the introduction of a new +; sign_extend_inreg in the DAG. +; +; A sign_extend_inreg of a vector of ConstantSDNode or undef can be +; always folded into a simple build_vector. +; +; Make sure that the sign_extend_inreg is simplified and that we +; don't generate psll, psraw and pblendvb from the vselect. + +; CHECK-LABEL: foo8 +; CHECK-NOT: psll +; CHECK-NOT: psraw +; CHECK-NOT: pblendvb ; CHECK: ret define void @foo8(float* nocapture %RET) nounwind { allocas: @@ -17,4 +27,3 @@ allocas: ret void } - diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll index 2ba0f08e9a2f..d41b43228b6c 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll @@ -36,6 +36,7 @@ return: ; preds = %for.cond.preheader, declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!12} !0 = metadata !{i32 786449, metadata !11, i32 12, metadata !"clang version 3.3 (trunk 168918) (llvm/trunk 168920)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, null, metadata !""} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/Olden/bh/newbh.c] [DW_LANG_C99] !1 = metadata !{metadata !2} @@ -46,3 +47,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !6 = metadata !{i32 786454, metadata !11, null, metadata !"hgstruct", i32 492, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ] [hgstruct] [line 492, size 0, align 0, offset 0] [from ] !7 = metadata !{i32 786451, metadata !11, null, metadata !"", i32 487, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, null, i32 0, null} ; [ DW_TAG_structure_type ] [line 487, size 512, align 64, offset 0] [def] [from ] !11 = metadata !{metadata !"MultiSource/Benchmarks/Olden/bh/newbh.c", metadata !"MultiSource/Benchmarks/Olden/bh"} +!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2012-11-30-misched-dbg.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2012-11-30-misched-dbg.ll index 1a639bc08386..7befa6b4757d 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2012-11-30-misched-dbg.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2012-11-30-misched-dbg.ll @@ -63,6 +63,7 @@ if.else4114: ; preds = %if.then4073 declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...) !llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!35} !0 = metadata !{i32 786449, metadata !19, i32 12, metadata !"clang version 3.3 (trunk 168918) (llvm/trunk 168920)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/MiBench/consumer-typeset/MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c] [DW_LANG_C99] !1 = metadata !{metadata !2} @@ -134,3 +135,4 @@ declare void @_Znwm() !32 = metadata !{i32 786454, metadata !34, null, metadata !"HM", i32 28, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_typedef ] [HM] [line 28, size 0, align 0, offset 0] [from ] !33 = metadata !{i32 786473, metadata !34} ; [ DW_TAG_file_type ] !34 = metadata !{metadata !"SingleSource/Benchmarks/Shootout-C++/hash.cpp", metadata !"SingleSource/Benchmarks/Shootout-C++"} +!35 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2012-11-30-regpres-dbg.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2012-11-30-regpres-dbg.ll index babe432bd8ce..5aec3d92c70f 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2012-11-30-regpres-dbg.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/2012-11-30-regpres-dbg.ll @@ -34,6 +34,7 @@ invoke.cont44: ; preds = %if.end } !llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!8} !0 = metadata !{i32 786449, metadata !6, i32 4, metadata !"clang version 3.3 (trunk 168984) (llvm/trunk 168983)", i1 true, metadata !"", i32 0, metadata !2, metadata !7, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/Bullet/MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp] [DW_LANG_C_plus_plus] !2 = metadata !{null} @@ -42,3 +43,4 @@ invoke.cont44: ; preds = %if.end !5 = metadata !{i32 786473, metadata !6} ; [ DW_TAG_file_type ] !6 = metadata !{metadata !"MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", metadata !"MultiSource/Benchmarks/Bullet"} !7 = metadata !{i32 0} +!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/3addr-16bit.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/3addr-16bit.ll index fafdfdb74811..2d6a5e76657f 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/3addr-16bit.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/3addr-16bit.ll @@ -34,7 +34,7 @@ entry: ; 64BIT-LABEL: t2: ; 64BIT-NOT: movw %si, %ax -; 64BIT: decl %eax +; 64BIT: leal -1(%rsi), %eax ; 64BIT: movzwl %ax %0 = icmp eq i16 %k, %c ; [#uses=1] %1 = add i16 %k, -1 ; [#uses=3] @@ -59,7 +59,7 @@ entry: ; 64BIT-LABEL: t3: ; 64BIT-NOT: movw %si, %ax -; 64BIT: addl $2, %eax +; 64BIT: leal 2(%rsi), %eax %0 = add i16 %k, 2 ; [#uses=3] %1 = icmp eq i16 %k, %c ; [#uses=1] br i1 %1, label %bb, label %bb1 @@ -82,7 +82,7 @@ entry: ; 64BIT-LABEL: t4: ; 64BIT-NOT: movw %si, %ax -; 64BIT: addl %edi, %eax +; 64BIT: leal (%rsi,%rdi), %eax %0 = add i16 %k, %c ; [#uses=3] %1 = icmp eq i16 %k, %c ; [#uses=1] br i1 %1, label %bb, label %bb1 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/MachineSink-DbgValue.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/MachineSink-DbgValue.ll index 9208bd5a78ca..584e644ed51f 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/MachineSink-DbgValue.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/MachineSink-DbgValue.ll @@ -26,6 +26,7 @@ bb2: declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!22} !0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, metadata !21, metadata !21, metadata !18, null, null, null} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 786478, metadata !20, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i32*)* @foo, null, null, metadata !19, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo] @@ -49,3 +50,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !19 = metadata !{metadata !6, metadata !7, metadata !10} !20 = metadata !{metadata !"a.c", metadata !"/private/tmp"} !21 = metadata !{i32 0} +!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/StackColoring-dbg.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/StackColoring-dbg.ll index 84c3afb6d953..51d0d1775c67 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/StackColoring-dbg.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/StackColoring-dbg.ll @@ -26,8 +26,10 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind !llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!23} !0 = metadata !{i32 524305, metadata !1, i32 1, metadata !"clang", i1 true, metadata !"", i32 0, metadata !2, metadata !2, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{metadata !"t.c", metadata !""} !16 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} !2 = metadata !{i32 0} !22 = metadata !{i32 786688, null, metadata !"x", metadata !2, i32 16, metadata !16, i32 0, i32 0} +!23 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/anyregcc-crash.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/anyregcc-crash.ll index cf6f6edb31a8..3abe3d149a11 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/anyregcc-crash.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/anyregcc-crash.ll @@ -7,11 +7,11 @@ define i64 @anyreglimit(i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i64 %v7, i64 %v8, i64 %v9, i64 %v10, i64 %v11, i64 %v12, i64 %v13, i64 %v14, i64 %v15, i64 %v16) { entry: - %result = tail call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 16, + %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 16, i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i64 %v7, i64 %v8, i64 %v9, i64 %v10, i64 %v11, i64 %v12, i64 %v13, i64 %v14, i64 %v15, i64 %v16) ret i64 %result } -declare i64 @llvm.experimental.patchpoint.i64(i32, i32, i8*, i32, ...) +declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/anyregcc.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/anyregcc.ll index 8109f879f217..0677023174e7 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/anyregcc.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/anyregcc.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -disable-fp-elim | FileCheck %s ; Stackmap Header: no constants - 6 callsites ; CHECK-LABEL: .section __LLVM_STACKMAPS,__llvm_stackmaps @@ -11,7 +11,6 @@ ; CHECK-NEXT: .long 8 ; test -; CHECK-NEXT: .long 0 ; CHECK-LABEL: .long L{{.*}}-_test ; CHECK-NEXT: .short 0 ; 3 locations @@ -33,12 +32,11 @@ ; CHECK-NEXT: .long 3 define i64 @test() nounwind ssp uwtable { entry: - call anyregcc void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 0, i32 15, i8* null, i32 2, i32 1, i32 2, i64 3) + call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 0, i32 15, i8* null, i32 2, i32 1, i32 2, i64 3) ret i64 0 } ; property access 1 - %obj is an anyreg call argument and should therefore be in a register -; CHECK-NEXT: .long 1 ; CHECK-LABEL: .long L{{.*}}-_property_access1 ; CHECK-NEXT: .short 0 ; 2 locations @@ -56,12 +54,11 @@ entry: define i64 @property_access1(i8* %obj) nounwind ssp uwtable { entry: %f = inttoptr i64 12297829382473034410 to i8* - %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 1, i32 15, i8* %f, i32 1, i8* %obj) + %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 1, i32 15, i8* %f, i32 1, i8* %obj) ret i64 %ret } ; property access 2 - %obj is an anyreg call argument and should therefore be in a register -; CHECK-NEXT: .long 2 ; CHECK-LABEL: .long L{{.*}}-_property_access2 ; CHECK-NEXT: .short 0 ; 2 locations @@ -80,12 +77,11 @@ define i64 @property_access2() nounwind ssp uwtable { entry: %obj = alloca i64, align 8 %f = inttoptr i64 12297829382473034410 to i8* - %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 2, i32 15, i8* %f, i32 1, i64* %obj) + %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 15, i8* %f, i32 1, i64* %obj) ret i64 %ret } ; property access 3 - %obj is a frame index -; CHECK-NEXT: .long 3 ; CHECK-LABEL: .long L{{.*}}-_property_access3 ; CHECK-NEXT: .short 0 ; 2 locations @@ -95,21 +91,20 @@ entry: ; CHECK-NEXT: .byte 8 ; CHECK-NEXT: .short {{[0-9]+}} ; CHECK-NEXT: .long 0 -; Loc 1: Register <-- this will be folded once folding for FI is implemented -; CHECK-NEXT: .byte 1 +; Loc 1: Direct RBP - ofs +; CHECK-NEXT: .byte 2 ; CHECK-NEXT: .byte 8 -; CHECK-NEXT: .short {{[0-9]+}} -; CHECK-NEXT: .long 0 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .long define i64 @property_access3() nounwind ssp uwtable { entry: %obj = alloca i64, align 8 %f = inttoptr i64 12297829382473034410 to i8* - %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 3, i32 15, i8* %f, i32 0, i64* %obj) + %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 3, i32 15, i8* %f, i32 0, i64* %obj) ret i64 %ret } ; anyreg_test1 -; CHECK-NEXT: .long 4 ; CHECK-LABEL: .long L{{.*}}-_anyreg_test1 ; CHECK-NEXT: .short 0 ; 14 locations @@ -187,12 +182,11 @@ entry: define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable { entry: %f = inttoptr i64 12297829382473034410 to i8* - %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 4, i32 15, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) + %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 4, i32 15, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) ret i64 %ret } ; anyreg_test2 -; CHECK-NEXT: .long 5 ; CHECK-LABEL: .long L{{.*}}-_anyreg_test2 ; CHECK-NEXT: .short 0 ; 14 locations @@ -270,7 +264,7 @@ entry: define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable { entry: %f = inttoptr i64 12297829382473034410 to i8* - %ret = call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 5, i32 15, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) + %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 15, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) ret i64 %ret } @@ -278,7 +272,6 @@ entry: ; ; [JS] Assertion: "Folded a def to a non-store!" ; -; CHECK-LABEL: .long 12 ; CHECK-LABEL: .long L{{.*}}-_patchpoint_spilldef ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 3 @@ -299,7 +292,7 @@ entry: ; CHECK-NEXT: .long 0 define i64 @patchpoint_spilldef(i64 %p1, i64 %p2, i64 %p3, i64 %p4) { entry: - %result = tail call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2) + %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2) tail call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind ret i64 %result } @@ -308,7 +301,6 @@ entry: ; ; [JS] AnyRegCC argument ends up being spilled ; -; CHECK-LABEL: .long 13 ; CHECK-LABEL: .long L{{.*}}-_patchpoint_spillargs ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 5 @@ -330,19 +322,19 @@ entry: ; Loc 3: Arg2 spilled to RBP + ; CHECK-NEXT: .byte 3 ; CHECK-NEXT: .byte 8 -; CHECK-NEXT: .short 7 -; CHECK-NEXT: .long {{[0-9]+}} +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .long ; Loc 4: Arg3 spilled to RBP + ; CHECK-NEXT: .byte 3 ; CHECK-NEXT: .byte 8 -; CHECK-NEXT: .short 7 -; CHECK-NEXT: .long {{[0-9]+}} +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .long define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) { entry: tail call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind - %result = tail call anyregcc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 13, i32 15, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4) + %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 13, i32 15, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4) ret i64 %result } -declare void @llvm.experimental.patchpoint.void(i32, i32, i8*, i32, ...) -declare i64 @llvm.experimental.patchpoint.i64(i32, i32, i8*, i32, ...) +declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...) +declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-arith.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-arith.ll index e27600ecd734..223c023a8a44 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-arith.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-arith.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s ; CHECK-LABEL: addpd512 ; CHECK: vaddpd @@ -196,7 +196,7 @@ define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { } ; CHECK-LABEL: sqrtA -; CHECK: vsqrtssz +; CHECK: vsqrtss {{.*}} encoding: [0x62 ; CHECK: ret declare float @sqrtf(float) readnone define float @sqrtA(float %a) nounwind uwtable readnone ssp { @@ -206,7 +206,7 @@ entry: } ; CHECK-LABEL: sqrtB -; CHECK: vsqrtsdz +; CHECK: vsqrtsd {{.*}}## encoding: [0x62 ; CHECK: ret declare double @sqrt(double) readnone define double @sqrtB(double %a) nounwind uwtable readnone ssp { @@ -216,7 +216,7 @@ entry: } ; CHECK-LABEL: sqrtC -; CHECK: vsqrtssz +; CHECK: vsqrtss {{.*}}## encoding: [0x62 ; CHECK: ret declare float @llvm.sqrt.f32(float) define float @sqrtC(float %a) nounwind { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-cmp.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-cmp.ll index ba52745e6c19..93aa8b09087d 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-cmp.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-cmp.ll @@ -1,6 +1,7 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s -; CHECK: vucomisdz +; CHECK-LABEL: test1 +; CHECK: vucomisd {{.*}}encoding: [0x62 define double @test1(double %a, double %b) nounwind { %tobool = fcmp une double %a, %b br i1 %tobool, label %l1, label %l2 @@ -13,7 +14,8 @@ l2: ret double %c1 } -; CHECK: vucomissz +; CHECK-LABEL: test2 +; CHECK: vucomiss {{.*}}encoding: [0x62 define float @test2(float %a, float %b) nounwind { %tobool = fcmp olt float %a, %b br i1 %tobool, label %l1, label %l2 @@ -25,3 +27,33 @@ l2: %c1 = fadd float %a, %b ret float %c1 } + +; CHECK-LABEL: test3 +; CHECK: vcmpeqss +; CHECK: kmov +; CHECK: ret +define i32 @test3(float %a, float %b) { + + %cmp10.i = fcmp oeq float %a, %b + %conv11.i = zext i1 %cmp10.i to i32 + ret i32 %conv11.i +} + +; CHECK-LABEL: test4 +; CHECK: kortestw +; CHECK: jne +; CHECK: ret +declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) + +define i16 @test4(i16 %a, i16 %b) { + %kortz = call i32 @llvm.x86.avx512.kortestz.w(i16 %a, i16 %b) + %t1 = and i32 %kortz, 1 + %res = icmp eq i32 %t1, 0 + br i1 %res, label %A, label %B + + A: ret i16 %a + B: + %b1 = add i16 %a, %b + ret i16 %b1 + +} \ No newline at end of file diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-cvt.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-cvt.ll index ed68ff7bcbdb..89a69e7b9824 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-cvt.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-cvt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s ; CHECK-LABEL: sitof32 ; CHECK: vcvtdq2ps %zmm @@ -67,7 +67,7 @@ define <8 x double> @fpext00(<8 x float> %b) nounwind { } ; CHECK-LABEL: funcA -; CHECK: vcvtsi2sdqz (% +; CHECK: vcvtsi2sdq (%rdi){{.*}} encoding: [0x62 ; CHECK: ret define double @funcA(i64* nocapture %e) { entry: @@ -77,7 +77,7 @@ entry: } ; CHECK-LABEL: funcB -; CHECK: vcvtsi2sdlz (% +; CHECK: vcvtsi2sdl (%{{.*}} encoding: [0x62 ; CHECK: ret define double @funcB(i32* %e) { entry: @@ -87,7 +87,7 @@ entry: } ; CHECK-LABEL: funcC -; CHECK: vcvtsi2sslz (% +; CHECK: vcvtsi2ssl (%{{.*}} encoding: [0x62 ; CHECK: ret define float @funcC(i32* %e) { entry: @@ -97,7 +97,7 @@ entry: } ; CHECK-LABEL: i64tof32 -; CHECK: vcvtsi2ssqz (% +; CHECK: vcvtsi2ssq (%{{.*}} encoding: [0x62 ; CHECK: ret define float @i64tof32(i64* %e) { entry: @@ -107,7 +107,7 @@ entry: } ; CHECK-LABEL: fpext -; CHECK: vcvtss2sdz +; CHECK: vcvtss2sd {{.*}} encoding: [0x62 ; CHECK: ret define void @fpext() { entry: @@ -120,9 +120,9 @@ entry: } ; CHECK-LABEL: fpround_scalar -; CHECK: vmovsdz -; CHECK: vcvtsd2ssz -; CHECK: vmovssz +; CHECK: vmovsd {{.*}} encoding: [0x62 +; CHECK: vcvtsd2ss {{.*}} encoding: [0x62 +; CHECK: vmovss {{.*}} encoding: [0x62 ; CHECK: ret define void @fpround_scalar() nounwind uwtable { entry: @@ -135,7 +135,7 @@ entry: } ; CHECK-LABEL: long_to_double -; CHECK: vmovqz +; CHECK: vmovq {{.*}} encoding: [0x62 ; CHECK: ret define double @long_to_double(i64 %x) { %res = bitcast i64 %x to double @@ -143,7 +143,7 @@ define double @long_to_double(i64 %x) { } ; CHECK-LABEL: double_to_long -; CHECK: vmovqz +; CHECK: vmovq {{.*}} encoding: [0x62 ; CHECK: ret define i64 @double_to_long(double %x) { %res = bitcast double %x to i64 @@ -151,7 +151,7 @@ define i64 @double_to_long(double %x) { } ; CHECK-LABEL: int_to_float -; CHECK: vmovdz +; CHECK: vmovd {{.*}} encoding: [0x62 ; CHECK: ret define float @int_to_float(i32 %x) { %res = bitcast i32 %x to float @@ -159,7 +159,7 @@ define float @int_to_float(i32 %x) { } ; CHECK-LABEL: float_to_int -; CHECK: vmovdz +; CHECK: vmovd {{.*}} encoding: [0x62 ; CHECK: ret define i32 @float_to_int(float %x) { %res = bitcast float %x to i32 @@ -185,7 +185,7 @@ define <16 x float> @uitof32(<16 x i32> %a) nounwind { } ; CHECK-LABEL: @fptosi02 -; CHECK vcvttss2siz +; CHECK vcvttss2si {{.*}} encoding: [0x62 ; CHECK: ret define i32 @fptosi02(float %a) nounwind { %b = fptosi float %a to i32 @@ -193,7 +193,7 @@ define i32 @fptosi02(float %a) nounwind { } ; CHECK-LABEL: @fptoui02 -; CHECK vcvttss2usiz +; CHECK vcvttss2usi {{.*}} encoding: [0x62 ; CHECK: ret define i32 @fptoui02(float %a) nounwind { %b = fptoui float %a to i32 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-insert-extract.ll index 3f067401ed3f..64f2a197008f 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-insert-extract.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-insert-extract.ll @@ -44,7 +44,7 @@ define <8 x i64> @test4(<8 x i64> %x) nounwind { } ;CHECK-LABEL: test5: -;CHECK: vextractpsz +;CHECK: vextractps ;CHECK: ret define i32 @test5(<4 x float> %x) nounwind { %ef = extractelement <4 x float> %x, i32 3 @@ -53,7 +53,7 @@ define i32 @test5(<4 x float> %x) nounwind { } ;CHECK-LABEL: test6: -;CHECK: vextractpsz {{.*}}, (%rdi) +;CHECK: vextractps {{.*}}, (%rdi) ;CHECK: ret define void @test6(<4 x float> %x, float* %out) nounwind { %ef = extractelement <4 x float> %x, i32 3 @@ -62,7 +62,7 @@ define void @test6(<4 x float> %x, float* %out) nounwind { } ;CHECK-LABEL: test7 -;CHECK: vmovdz +;CHECK: vmovd ;CHECK: vpermps %zmm ;CHECK: ret define float @test7(<16 x float> %x, i32 %ind) nounwind { @@ -71,7 +71,7 @@ define float @test7(<16 x float> %x, i32 %ind) nounwind { } ;CHECK-LABEL: test8 -;CHECK: vmovqz +;CHECK: vmovq ;CHECK: vpermpd %zmm ;CHECK: ret define double @test8(<8 x double> %x, i32 %ind) nounwind { @@ -89,7 +89,7 @@ define float @test9(<8 x float> %x, i32 %ind) nounwind { } ;CHECK-LABEL: test10 -;CHECK: vmovdz +;CHECK: vmovd ;CHECK: vpermd %zmm ;CHEKK: vmovdz %xmm0, %eax ;CHECK: ret @@ -99,27 +99,21 @@ define i32 @test10(<16 x i32> %x, i32 %ind) nounwind { } ;CHECK-LABEL: test11 -;CHECK: movl $260 -;CHECK: bextrl -;CHECK: movl $268 -;CHECK: bextrl +;CHECK: vpcmpltud +;CKECK: kshiftlw $11 +;CKECK: kshiftrw $15 +;CHECK: kxorw +;CHECK: kortestw +;CHECK: jne +;CHECK: ret ;CHECK: ret define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) { %cmp_res = icmp ult <16 x i32> %a, %b %ia = extractelement <16 x i1> %cmp_res, i32 4 - %ib = extractelement <16 x i1> %cmp_res, i32 12 - br i1 %ia, label %A, label %B - A: ret <16 x i32>%b B: %c = add <16 x i32>%b, %a - br i1 %ib, label %C, label %D - C: - %c1 = sub <16 x i32>%c, %a - ret <16 x i32>%c1 - D: - %c2 = mul <16 x i32>%c, %a - ret <16 x i32>%c2 + ret <16 x i32>%c } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 5bdabf234990..b43c00bf4880 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -1,23 +1,51 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s -declare i32 @llvm.x86.avx512.kortestz(i16, i16) nounwind readnone -; CHECK: test_kortestz +declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone +; CHECK-LABEL: test_kortestz ; CHECK: kortestw ; CHECK: sete define i32 @test_kortestz(i16 %a0, i16 %a1) { - %res = call i32 @llvm.x86.avx512.kortestz(i16 %a0, i16 %a1) + %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1) ret i32 %res } -declare i32 @llvm.x86.avx512.kortestc(i16, i16) nounwind readnone -; CHECK: test_kortestc +declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone +; CHECK-LABEL: test_kortestc ; CHECK: kortestw ; CHECK: sbbl define i32 @test_kortestc(i16 %a0, i16 %a1) { - %res = call i32 @llvm.x86.avx512.kortestc(i16 %a0, i16 %a1) + %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1) ret i32 %res } +declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone +; CHECK-LABEL: test_kand +; CHECK: kandw +; CHECK: kandw +define i16 @test_kand(i16 %a0, i16 %a1) { + %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8) + %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1) + ret i16 %t2 +} + +declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone +; CHECK-LABEL: test_knot +; CHECK: knotw +define i16 @test_knot(i16 %a0) { + %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0) + ret i16 %res +} + +declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone + +; CHECK-LABEL: unpckbw_test +; CHECK: kunpckbw +; CHECK:ret +define i16 @unpckbw_test(i16 %a0, i16 %a1) { + %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1) + ret i16 %res +} + define <16 x float> @test_rcp_ps_512(<16 x float> %a0) { ; CHECK: vrcp14ps %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1] @@ -46,21 +74,21 @@ define <8 x double> @test_rcp28_pd_512(<8 x double> %a0) { } declare <8 x double> @llvm.x86.avx512.rcp28.pd.512(<8 x double>) nounwind readnone -define <8 x double> @test_rndscale_pd_512(<8 x double> %a0) { - ; CHECK: vrndscale - %res = call <8 x double> @llvm.x86.avx512.rndscale.pd.512(<8 x double> %a0, i32 7) ; <<8 x double>> [#uses=1] - ret <8 x double> %res +declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32) + +define <8 x double> @test7(<8 x double> %a) { +; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b] + %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> zeroinitializer, i8 -1, i32 4) + ret <8 x double>%res } -declare <8 x double> @llvm.x86.avx512.rndscale.pd.512(<8 x double>, i32) nounwind readnone +declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32) -define <16 x float> @test_rndscale_ps_512(<16 x float> %a0) { - ; CHECK: vrndscale - %res = call <16 x float> @llvm.x86.avx512.rndscale.ps.512(<16 x float> %a0, i32 7) ; <<16 x float>> [#uses=1] - ret <16 x float> %res +define <16 x float> @test8(<16 x float> %a) { +; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b] + %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> zeroinitializer, i16 -1, i32 4) + ret <16 x float>%res } -declare <16 x float> @llvm.x86.avx512.rndscale.ps.512(<16 x float>, i32) nounwind readnone - define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) { ; CHECK: vrsqrt14ps @@ -119,42 +147,42 @@ define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) { declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>) nounwind readnone define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: vsqrtssz + ; CHECK: vsqrtss {{.*}}encoding: [0x62 %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: vsqrtsdz + ; CHECK: vsqrtsd {{.*}}encoding: [0x62 %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res } declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) { - ; CHECK: vcvtsd2siz + ; CHECK: vcvtsd2si {{.*}}encoding: [0x62 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; [#uses=1] ret i64 %res } declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) { - ; CHECK: vcvtsi2sdqz + ; CHECK: vcvtsi2sdq {{.*}}encoding: [0x62 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res } declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone define <2 x double> @test_x86_avx512_cvtusi642sd(<2 x double> %a0, i64 %a1) { - ; CHECK: vcvtusi2sdqz + ; CHECK: vcvtusi2sdq {{.*}}encoding: [0x62 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res } declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64) nounwind readnone define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) { - ; CHECK: vcvttsd2siz + ; CHECK: vcvttsd2si {{.*}}encoding: [0x62 %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; [#uses=1] ret i64 %res } @@ -162,7 +190,7 @@ declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) { - ; CHECK: vcvtss2siz + ; CHECK: vcvtss2si {{.*}}encoding: [0x62 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; [#uses=1] ret i64 %res } @@ -170,7 +198,7 @@ declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) { - ; CHECK: vcvtsi2ssqz + ; CHECK: vcvtsi2ssq {{.*}}encoding: [0x62 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -178,14 +206,14 @@ declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) { - ; CHECK: vcvttss2siz + ; CHECK: vcvttss2si {{.*}}encoding: [0x62 %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; [#uses=1] ret i64 %res } declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) { - ; CHECK: vcvtsd2usiz + ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; [#uses=1] ret i64 %res } @@ -319,56 +347,104 @@ define <8 x i64> @test_x86_pmins_q(<8 x i64> %a0, <8 x i64> %a1) { declare <8 x i64> @llvm.x86.avx512.pmins.q(<8 x i64>, <8 x i64>) nounwind readonly define <16 x i32> @test_conflict_d(<16 x i32> %a) { + ; CHECK: movw $-1, %ax + ; CHECK: vpxor ; CHECK: vpconflictd - %res = call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %a) + %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1) ret <16 x i32> %res } -declare <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32>) nounwind readonly + +declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly + +define <8 x i64> @test_conflict_q(<8 x i64> %a) { + ; CHECK: movb $-1, %al + ; CHECK: vpxor + ; CHECK: vpconflictq + %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly + define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) { - ; CHECK: vpconflictd %zmm0, %zmm0 {%k1} {z} - %vmask = bitcast i16 %mask to <16 x i1> - %res = call <16 x i32> @llvm.x86.avx512.conflict.d.maskz.512(<16 x i1> %vmask, <16 x i32> %a) + ; CHECK: vpconflictd + %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask) ret <16 x i32> %res } -declare <16 x i32> @llvm.x86.avx512.conflict.d.maskz.512(<16 x i1>,<16 x i32>) nounwind readonly define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { - ; CHECK: vpconflictq {{.*}} {%k1} - %vmask = bitcast i8 %mask to <8 x i1> - %res = call <8 x i64> @llvm.x86.avx512.conflict.q.mask.512(<8 x i64> %b, <8 x i1> %vmask, <8 x i64> %a) + ; CHECK: vpconflictq + %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) ret <8 x i64> %res } -declare <8 x i64> @llvm.x86.avx512.conflict.q.mask.512(<8 x i64>, <8 x i1>,<8 x i64>) nounwind readonly -define <16 x float> @test_x86_mskblend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) { +define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) { ; CHECK: vblendmps %m0 = bitcast i16 %a0 to <16 x i1> - %res = call <16 x float> @llvm.x86.avx512.mskblend.ps.512(<16 x i1> %m0, <16 x float> %a1, <16 x float> %a2) ; <<16 x float>> [#uses=1] + %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x i1> %m0, <16 x float> %a1, <16 x float> %a2) ; <<16 x float>> [#uses=1] ret <16 x float> %res } -declare <16 x float> @llvm.x86.avx512.mskblend.ps.512(<16 x i1> %a0, <16 x float> %a1, <16 x float> %a2) nounwind readonly -define <8 x double> @test_x86_mskblend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) { +declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x i1> %a0, <16 x float> %a1, <16 x float> %a2) nounwind readonly + +define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK: vblendmpd %m0 = bitcast i8 %a0 to <8 x i1> - %res = call <8 x double> @llvm.x86.avx512.mskblend.pd.512(<8 x i1> %m0, <8 x double> %a1, <8 x double> %a2) ; <<8 x double>> [#uses=1] + %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x i1> %m0, <8 x double> %a1, <8 x double> %a2) ; <<8 x double>> [#uses=1] ret <8 x double> %res } -declare <8 x double> @llvm.x86.avx512.mskblend.pd.512(<8 x i1> %a0, <8 x double> %a1, <8 x double> %a2) nounwind readonly -define <16 x i32> @test_x86_mskblend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) { +define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) { + ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop + ; CHECK: vblendmpd {{.*}}, {{%zmm[0-9]}}, {{%zmm[0-9]}} {%k1} + %vmask = bitcast i8 %mask to <8 x i1> + %b = load <8 x double>* %ptr + %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x i1> %vmask, <8 x double> %a, <8 x double> %b) ; <<8 x double>> [#uses=1] + ret <8 x double> %res +} +declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x i1> %a0, <8 x double> %a1, <8 x double> %a2) nounwind readonly + +define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) { ; CHECK: vpblendmd %m0 = bitcast i16 %a0 to <16 x i1> - %res = call <16 x i32> @llvm.x86.avx512.mskblend.d.512(<16 x i1> %m0, <16 x i32> %a1, <16 x i32> %a2) ; <<16 x i32>> [#uses=1] + %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i1> %m0, <16 x i32> %a1, <16 x i32> %a2) ; <<16 x i32>> [#uses=1] ret <16 x i32> %res } -declare <16 x i32> @llvm.x86.avx512.mskblend.d.512(<16 x i1> %a0, <16 x i32> %a1, <16 x i32> %a2) nounwind readonly +declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i1> %a0, <16 x i32> %a1, <16 x i32> %a2) nounwind readonly -define <8 x i64> @test_x86_mskblend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) { +define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) { ; CHECK: vpblendmq %m0 = bitcast i8 %a0 to <8 x i1> - %res = call <8 x i64> @llvm.x86.avx512.mskblend.q.512(<8 x i1> %m0, <8 x i64> %a1, <8 x i64> %a2) ; <<8 x i64>> [#uses=1] + %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i1> %m0, <8 x i64> %a1, <8 x i64> %a2) ; <<8 x i64>> [#uses=1] ret <8 x i64> %res } -declare <8 x i64> @llvm.x86.avx512.mskblend.q.512(<8 x i1> %a0, <8 x i64> %a1, <8 x i64> %a2) nounwind readonly +declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i1> %a0, <8 x i64> %a1, <8 x i64> %a2) nounwind readonly + + define <8 x i32> @test_cvtpd2udq(<8 x double> %a) { + ;CHECK: vcvtpd2udq {ru-sae}{{.*}}encoding: [0x62,0xf1,0xfc,0x58,0x79,0xc0] + %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %a, <8 x i32>zeroinitializer, i8 -1, i32 2) + ret <8 x i32>%res + } + declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32) + + define <16 x i32> @test_cvtps2udq(<16 x float> %a) { + ;CHECK: vcvtps2udq {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x79,0xc0] + %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %a, <16 x i32>zeroinitializer, i16 -1, i32 1) + ret <16 x i32>%res + } + declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32) + + define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) { + ;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02] + %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8) + ret i16 %res + } + declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32) + + define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) { + ;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04] + %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4) + ret i8 %res + } + declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32) \ No newline at end of file diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-mask-op.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-mask-op.ll index ef5cb56d7284..923e72ec5f7e 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -33,19 +33,6 @@ define i16 @mand16(i16 %x, i16 %y) { ret i16 %ret } -; CHECK: unpckbw_test -; CHECK: kunpckbw -; CHECK:ret -declare <16 x i1> @llvm.x86.kunpck.v16i1(<8 x i1>, <8 x i1>) nounwind readnone - -define i16 @unpckbw_test(i8 %x, i8 %y) { - %m0 = bitcast i8 %x to <8 x i1> - %m1 = bitcast i8 %y to <8 x i1> - %k = tail call <16 x i1> @llvm.x86.kunpck.v16i1(<8 x i1> %m0, <8 x i1> %m1) - %r = bitcast <16 x i1> %k to i16 - ret i16 %r -} - ; CHECK: shuf_test1 ; CHECK: kshiftrw $8 ; CHECK:ret diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-mov.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-mov.ll index 91242b1cc125..13e684324470 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-mov.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-mov.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s ; CHECK-LABEL: @test1 -; CHECK: vmovdz %xmm0, %eax +; CHECK: vmovd %xmm0, %eax ## encoding: [0x62 ; CHECK: ret define i32 @test1(float %x) { %res = bitcast float %x to i32 @@ -9,7 +9,7 @@ define i32 @test1(float %x) { } ; CHECK-LABEL: @test2 -; CHECK: vmovdz %edi +; CHECK: vmovd %edi, %xmm0 ## encoding: [0x62 ; CHECK: ret define <4 x i32> @test2(i32 %x) { %res = insertelement <4 x i32>undef, i32 %x, i32 0 @@ -17,7 +17,7 @@ define <4 x i32> @test2(i32 %x) { } ; CHECK-LABEL: @test3 -; CHECK: vmovqz %rdi +; CHECK: vmovq %rdi, %xmm0 ## encoding: [0x62 ; CHECK: ret define <2 x i64> @test3(i64 %x) { %res = insertelement <2 x i64>undef, i64 %x, i32 0 @@ -25,7 +25,7 @@ define <2 x i64> @test3(i64 %x) { } ; CHECK-LABEL: @test4 -; CHECK: vmovdz (%rdi) +; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62 ; CHECK: ret define <4 x i32> @test4(i32* %x) { %y = load i32* %x @@ -34,7 +34,7 @@ define <4 x i32> @test4(i32* %x) { } ; CHECK-LABEL: @test5 -; CHECK: vmovssz %xmm0, (%rdi) +; CHECK: vmovss %xmm0, (%rdi) ## encoding: [0x62 ; CHECK: ret define void @test5(float %x, float* %y) { store float %x, float* %y, align 4 @@ -42,7 +42,7 @@ define void @test5(float %x, float* %y) { } ; CHECK-LABEL: @test6 -; CHECK: vmovsdz %xmm0, (%rdi) +; CHECK: vmovsd %xmm0, (%rdi) ## encoding: [0x62 ; CHECK: ret define void @test6(double %x, double* %y) { store double %x, double* %y, align 8 @@ -50,7 +50,7 @@ define void @test6(double %x, double* %y) { } ; CHECK-LABEL: @test7 -; CHECK: vmovssz (%rdi), %xmm0 +; CHECK: vmovss (%rdi), %xmm0 ## encoding: [0x62 ; CHECK: ret define float @test7(i32* %x) { %y = load i32* %x @@ -59,7 +59,7 @@ define float @test7(i32* %x) { } ; CHECK-LABEL: @test8 -; CHECK: vmovdz %xmm0, %eax +; CHECK: vmovd %xmm0, %eax ## encoding: [0x62 ; CHECK: ret define i32 @test8(<4 x i32> %x) { %res = extractelement <4 x i32> %x, i32 0 @@ -67,7 +67,7 @@ define i32 @test8(<4 x i32> %x) { } ; CHECK-LABEL: @test9 -; CHECK: vmovqz %xmm0, %rax +; CHECK: vmovq %xmm0, %rax ## encoding: [0x62 ; CHECK: ret define i64 @test9(<2 x i64> %x) { %res = extractelement <2 x i64> %x, i32 0 @@ -75,7 +75,7 @@ define i64 @test9(<2 x i64> %x) { } ; CHECK-LABEL: @test10 -; CHECK: vmovdz (%rdi) +; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62 ; CHECK: ret define <4 x i32> @test10(i32* %x) { %y = load i32* %x, align 4 @@ -84,7 +84,7 @@ define <4 x i32> @test10(i32* %x) { } ; CHECK-LABEL: @test11 -; CHECK: vmovssz (%rdi) +; CHECK: vmovss (%rdi), %xmm0 ## encoding: [0x62 ; CHECK: ret define <4 x float> @test11(float* %x) { %y = load float* %x, align 4 @@ -93,7 +93,7 @@ define <4 x float> @test11(float* %x) { } ; CHECK-LABEL: @test12 -; CHECK: vmovsdz (%rdi) +; CHECK: vmovsd (%rdi), %xmm0 ## encoding: [0x62 ; CHECK: ret define <2 x double> @test12(double* %x) { %y = load double* %x, align 8 @@ -102,7 +102,7 @@ define <2 x double> @test12(double* %x) { } ; CHECK-LABEL: @test13 -; CHECK: vmovqz %rdi +; CHECK: vmovq %rdi, %xmm0 ## encoding: [0x62 ; CHECK: ret define <2 x i64> @test13(i64 %x) { %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0 @@ -110,7 +110,7 @@ define <2 x i64> @test13(i64 %x) { } ; CHECK-LABEL: @test14 -; CHECK: vmovdz %edi +; CHECK: vmovd %edi, %xmm0 ## encoding: [0x62 ; CHECK: ret define <4 x i32> @test14(i32 %x) { %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0 @@ -118,7 +118,7 @@ define <4 x i32> @test14(i32 %x) { } ; CHECK-LABEL: @test15 -; CHECK: vmovdz (%rdi) +; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62 ; CHECK: ret define <4 x i32> @test15(i32* %x) { %y = load i32* %x, align 4 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-select.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-select.ll index d2d6681fb422..83f46984781f 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-select.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-select.ll @@ -20,3 +20,22 @@ define <8 x i64> @select01(i32 %a, <8 x i64> %b) nounwind { ret <8 x i64> %res } +; CHECK-LABEL: @select02 +; CHECK: cmpless %xmm0, %xmm3, %k1 +; CHECK-NEXT: vmovss %xmm2, {{.*}}%xmm1 {%k1} +; CHECK: ret +define float @select02(float %a, float %b, float %c, float %eps) { + %cmp = fcmp oge float %a, %eps + %cond = select i1 %cmp, float %c, float %b + ret float %cond +} + +; CHECK-LABEL: @select03 +; CHECK: cmplesd %xmm0, %xmm3, %k1 +; CHECK-NEXT: vmovsd %xmm2, {{.*}}%xmm1 {%k1} +; CHECK: ret +define double @select03(double %a, double %b, double %c, double %eps) { + %cmp = fcmp oge double %a, %eps + %cond = select i1 %cmp, double %c, double %b + ret double %cond +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-shuffle.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-shuffle.ll index c9e0c2b992d9..84a87e23f33c 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-shuffle.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-shuffle.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s ; CHECK: LCP ; CHECK: .long 2 ; CHECK: .long 5 @@ -107,7 +107,7 @@ define <16 x i32> @test11(<16 x i32> %a, <16 x i32>* %b) nounwind { } ; CHECK-LABEL: test12 -; CHECK: vmovlhpsz %xmm +; CHECK: vmovlhps {{.*}}## encoding: [0x62 ; CHECK: ret define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) nounwind { %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -186,7 +186,7 @@ define <16 x float> @test21(<16 x float> %a, <16 x float> %c) { } ; CHECK-LABEL: test22 -; CHECK: vmovhlpsz %xmm +; CHECK: vmovhlps {{.*}}## encoding: [0x62 ; CHECK: ret define <4 x i32> @test22(<4 x i32> %a, <4 x i32> %b) nounwind { %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -223,4 +223,11 @@ define <16 x i32> @test25(<16 x i32> %a, <16 x i32> %b) nounwind { define <16 x i32> @test26(<16 x i32> %a) nounwind { %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> ret <16 x i32> %c -} \ No newline at end of file +} + +; CHECK-LABEL: @test27 +; CHECK: ret +define <16 x i32> @test27(<4 x i32>%a) { + %res = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> + ret <16 x i32> %res +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-vbroadcast.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-vbroadcast.ll index 6f89d6ce2342..9c6db11d8f45 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-vbroadcast.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-vbroadcast.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s ;CHECK-LABEL: _inreg16xi32: ;CHECK: vpbroadcastd {{.*}}, %zmm @@ -19,7 +19,7 @@ define <8 x i64> @_inreg8xi64(i64 %a) { } ;CHECK-LABEL: _inreg16xfloat: -;CHECK: vbroadcastssz {{.*}}, %zmm +;CHECK: vbroadcastss {{.*}}, %zmm ;CHECK: ret define <16 x float> @_inreg16xfloat(float %a) { %b = insertelement <16 x float> undef, float %a, i32 0 @@ -28,7 +28,7 @@ define <16 x float> @_inreg16xfloat(float %a) { } ;CHECK-LABEL: _inreg8xdouble: -;CHECK: vbroadcastsdz {{.*}}, %zmm +;CHECK: vbroadcastsd {{.*}}, %zmm ;CHECK: ret define <8 x double> @_inreg8xdouble(double %a) { %b = insertelement <8 x double> undef, double %a, i32 0 @@ -45,9 +45,20 @@ define <16 x i32> @_xmm16xi32(<16 x i32> %a) { } ;CHECK-LABEL: _xmm16xfloat -;CHECK: vbroadcastssz +;CHECK: vbroadcastss {{.*}}## encoding: [0x62 ;CHECK: ret define <16 x float> @_xmm16xfloat(<16 x float> %a) { %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer ret <16 x float> %b } + +define <16 x i32> @test_vbroadcast() { + ; CHECK: vpbroadcastd +entry: + %0 = sext <16 x i1> zeroinitializer to <16 x i32> + %1 = fcmp uno <16 x float> undef, zeroinitializer + %2 = sext <16 x i1> %1 to <16 x i32> + %3 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> %2 + ret <16 x i32> %3 +} + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-vec-cmp.ll index 6ca5bcc3b862..822809c62edd 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-vec-cmp.ll @@ -111,3 +111,14 @@ define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind { %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y ret <8 x i32> %max } + +; CHECK-LABEL: test12 +; CHECK: vpcmpeqq %zmm2, %zmm0, [[LO:%k[0-7]]] +; CHECK: vpcmpeqq %zmm3, %zmm1, [[HI:%k[0-7]]] +; CHECK: kunpckbw [[LO]], [[HI]], {{%k[0-7]}} + +define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind { + %res = icmp eq <16 x i64> %a, %b + %res1 = bitcast <16 x i1> %res to i16 + ret i16 %res1 +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-vselect-crash.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-vselect-crash.ll new file mode 100644 index 000000000000..7cca51d5e2dc --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-vselect-crash.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s + +; CHECK-LABEL: test +; CHECK: vmovdqu32 +; CHECK: ret +define <16 x i32> @test() { +entry: + %0 = icmp slt <16 x i32> undef, undef + %1 = select <16 x i1> %0, <16 x i32> undef, <16 x i32> zeroinitializer + ret <16 x i32> %1 +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-zext-load-crash.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-zext-load-crash.ll new file mode 100644 index 000000000000..07ded13a0e3c --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx512-zext-load-crash.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s + +define <8 x i16> @test_zext_load() { + ; CHECK: vmovq +entry: + %0 = load <2 x i16> ** undef, align 8 + %1 = getelementptr inbounds <2 x i16>* %0, i64 1 + %2 = load <2 x i16>* %0, align 1 + %3 = shufflevector <2 x i16> %2, <2 x i16> undef, <8 x i32> + %4 = load <2 x i16>* %1, align 1 + %5 = shufflevector <2 x i16> %4, <2 x i16> undef, <8 x i32> + %6 = shufflevector <8 x i16> %3, <8 x i16> %5, <8 x i32> + ret <8 x i16> %6 +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/blend-msb.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/blend-msb.ll index 4f2060f7012b..0485a42eb7e5 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/blend-msb.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/blend-msb.ll @@ -5,7 +5,7 @@ ; shifting the needed bit to the MSB, and not using shl+sra. ;CHECK-LABEL: vsel_float: -;CHECK: movl $-2147483648 +;CHECK: movl $-1 ;CHECK-NEXT: movd ;CHECK-NEXT: blendvps ;CHECK: ret @@ -15,7 +15,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) { } ;CHECK-LABEL: vsel_4xi8: -;CHECK: movl $-2147483648 +;CHECK: movl $-1 ;CHECK-NEXT: movd ;CHECK-NEXT: blendvps ;CHECK: ret @@ -26,12 +26,12 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) { ; We do not have native support for v8i16 blends and we have to use the -; blendvb instruction or a sequence of NAND/OR/AND. Make sure that we do not r +; blendvb instruction or a sequence of NAND/OR/AND. Make sure that we do not ; reduce the mask in this case. ;CHECK-LABEL: vsel_8xi16: -;CHECK: psllw -;CHECK: psraw -;CHECK: pblendvb +;CHECK: andps +;CHECK: andps +;CHECK: orps ;CHECK: ret define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) { %vsel = select <8 x i1> , <8 x i16> %v1, <8 x i16> %v2 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/cmov.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/cmov.ll index 215b86267a47..d38d2b430ccb 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/cmov.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/cmov.ll @@ -41,8 +41,8 @@ declare void @bar(i64) nounwind define void @test3(i64 %a, i64 %b, i1 %p) nounwind { ; CHECK-LABEL: test3: -; CHECK: cmovnel %edi, %esi -; CHECK-NEXT: movl %esi, %edi +; CHECK: cmov{{n?}}el %[[R1:e..]], %[[R2:e..]] +; CHECK-NEXT: movl %[[R2]], %{{e..}} %c = trunc i64 %a to i32 %d = trunc i64 %b to i32 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/codegen-prepare-extload.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/codegen-prepare-extload.ll index 14df815663e3..9320706d9728 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/codegen-prepare-extload.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/codegen-prepare-extload.ll @@ -5,7 +5,7 @@ ; CodeGenPrepare should move the zext into the block with the load ; so that SelectionDAG can select it with the load. -; CHECK: movzbl ({{%rdi|%rcx}}), %eax +; CHECK: movsbl ({{%rdi|%rcx}}), %eax define void @foo(i8* %p, i32* %q) { entry: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/cse-add-with-overflow.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/cse-add-with-overflow.ll new file mode 100644 index 000000000000..1fcc03f117d3 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/cse-add-with-overflow.ll @@ -0,0 +1,43 @@ +; RUN: llc < %s -mtriple=x86_64-darwin -mcpu=generic | FileCheck %s +; XFAIL: * +; rdar:15661073 simple example of redundant adds +; +; MachineCSE should coalesce trivial subregister copies. +; +; The extra movl+addl should be removed during MachineCSE. +; CHECK-LABEL: redundantadd +; CHECK: cmpq +; CHECK: movq +; CHECK-NOT: movl +; CHECK: addl +; CHECK-NOT: addl +; CHECK: ret + +define i64 @redundantadd(i64* %a0, i64* %a1) { +entry: + %tmp8 = load i64* %a0, align 8 + %tmp12 = load i64* %a1, align 8 + %tmp13 = icmp ult i64 %tmp12, -281474976710656 + br i1 %tmp13, label %exit1, label %body + +exit1: + unreachable + +body: + %tmp14 = trunc i64 %tmp8 to i32 + %tmp15 = trunc i64 %tmp12 to i32 + %tmp16 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %tmp14, i32 %tmp15) + %tmp17 = extractvalue { i32, i1 } %tmp16, 1 + br i1 %tmp17, label %exit2, label %return + +exit2: + unreachable + +return: + %tmp18 = add i64 %tmp12, %tmp8 + %tmp19 = and i64 %tmp18, 4294967295 + %tmp20 = or i64 %tmp19, -281474976710656 + ret i64 %tmp20 +} + +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/ctpop-combine.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/ctpop-combine.ll index 786f7f9b1cc8..463505bd95d9 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/ctpop-combine.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/ctpop-combine.ll @@ -35,6 +35,6 @@ define i32 @test3(i64 %x) nounwind readnone { %conv = zext i1 %cmp to i32 ret i32 %conv ; CHECK-LABEL: test3: -; CHECK: cmpb $2 +; CHECK: cmpl $2 ; CHECK: ret } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/dwarf-comp-dir.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/dwarf-comp-dir.ll index b746decdb814..3b4a8689060d 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/dwarf-comp-dir.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/dwarf-comp-dir.ll @@ -5,6 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-unknown-linux-gnu" !llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!5} !0 = metadata !{i32 720913, metadata !4, i32 12, metadata !"clang version 3.1 (trunk 143523)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ] !2 = metadata !{i32 0} @@ -15,3 +16,4 @@ target triple = "x86_64-unknown-linux-gnu" ; Dir Mod Time File Len File Name ; ---- ---------- ---------- --------------------------- ; CHECK: file_names[ 1] 0 0x00000000 0x00000000 empty.c +!5 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/fast-isel-select.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/fast-isel-select.ll new file mode 100644 index 000000000000..53158bc5396d --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/fast-isel-select.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple x86_64-apple-darwin -O0 -o - < %s | FileCheck %s +; Make sure we only use the less significant bit of the value that feeds the +; select. Otherwise, we may account for a non-zero value whereas the +; lsb is zero. +; + +; CHECK-LABEL: fastisel_select: +; CHECK: subb {{%[a-z0-9]+}}, [[RES:%[a-z0-9]+]] +; CHECK: testb $1, [[RES]] +; CHECK: cmovel +define i32 @fastisel_select(i1 %exchSub2211_, i1 %trunc_8766) { + %shuffleInternal15257_8932 = sub i1 %exchSub2211_, %trunc_8766 + %counter_diff1345 = select i1 %shuffleInternal15257_8932, i32 1204476887, i32 0 + ret i32 %counter_diff1345 +} + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/fastcall-correct-mangling.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/fastcall-correct-mangling.ll index 3569d36541f7..15cffa4fbd06 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/fastcall-correct-mangling.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/fastcall-correct-mangling.ll @@ -1,14 +1,26 @@ -; RUN: llc < %s -mtriple=i386-unknown-mingw32 | FileCheck %s +; RUN: llc < %s -mtriple=i386-unknown-mingw32 | \ +; RUN: FileCheck --check-prefix=CHECK32 %s + +; RUN: llc < %s -mtriple=i386-unknown-win32 | \ +; RUN: FileCheck --check-prefix=CHECK32 %s + +; RUN: llc < %s -mtriple=x86_64-unknown-mingw32 | \ +; RUN: FileCheck --check-prefix=CHECK64 %s + +; RUN: llc < %s -mtriple=x86_64-unknown-mingw32 | \ +; RUN: FileCheck --check-prefix=CHECK64 %s ; Check that a fastcall function gets correct mangling define x86_fastcallcc void @func(i64 %X, i8 %Y, i8 %G, i16 %Z) { -; CHECK: @func@20: +; CHECK32-LABEL: {{^}}@func@20: +; CHECK64-LABEL: {{^}}func: ret void } define x86_fastcallcc i32 @"\01DoNotMangle"(i32 %a) { -; CHECK: DoNotMangle: +; CHECK32-LABEL: {{^}}DoNotMangle: +; CHECK64-LABEL: {{^}}DoNotMangle: entry: ret i32 %a } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/fma4-intrinsics-x86_64.ll index 7a1a9ae46147..494cb28677a4 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/fma4-intrinsics-x86_64.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/fma4-intrinsics-x86_64.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s ; VFMADD diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/fp-fast.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/fp-fast.ll index 07baca84804e..7b08ad67220b 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/fp-fast.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/fp-fast.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=x86-64 -mattr=+avx,-fma4 -mtriple=x86_64-apple-darwin -enable-unsafe-fp-math < %s | FileCheck %s +; RUN: llc -march=x86-64 -mcpu=corei7-avx -enable-unsafe-fp-math < %s | FileCheck %s ; CHECK-LABEL: test1 define float @test1(float %a) { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/inline-asm-stack-realign.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/inline-asm-stack-realign.ll new file mode 100644 index 000000000000..f2ac0f451bb0 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/inline-asm-stack-realign.ll @@ -0,0 +1,16 @@ +; RUN: not llc -mtriple=i686-pc-win32 < %s 2>&1 | FileCheck %s + +; FIXME: This is miscompiled due to our unconditional use of ESI as the base +; pointer. +; XFAIL: * + +; CHECK: Stack realignment in presence of dynamic stack adjustments is not supported with inline assembly + +define i32 @foo() { +entry: + %r = alloca i32, align 16 + store i32 -1, i32* %r, align 16 + call void asm sideeffect inteldialect "push esi\0A\09xor esi, esi\0A\09mov dword ptr $0, esi\0A\09pop esi", "=*m,~{flags},~{esi},~{esp},~{dirflag},~{fpsr},~{flags}"(i32* %r) + %0 = load i32* %r, align 16 + ret i32 %0 +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/inline-asm-stack-realign2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/inline-asm-stack-realign2.ll new file mode 100644 index 000000000000..0e4e7e1a6776 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/inline-asm-stack-realign2.ll @@ -0,0 +1,16 @@ +; RUN: not llc -mtriple=i686-pc-win32 < %s 2>&1 | FileCheck %s + +; FIXME: This is miscompiled due to our unconditional use of ESI as the base +; pointer. +; XFAIL: * + +; CHECK: Stack realignment in presence of dynamic stack adjustments is not supported with inline assembly + +define i32 @foo() { +entry: + %r = alloca i32, align 16 + store i32 -1, i32* %r, align 16 + call void asm sideeffect "push %esi\0A\09xor %esi, %esi\0A\09mov %esi, $0\0A\09pop %esi", "=*m,~{flags},~{esi},~{esp},~{dirflag},~{fpsr},~{flags}"(i32* %r) + %0 = load i32* %r, align 16 + ret i32 %0 +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/inline-asm-stack-realign3.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/inline-asm-stack-realign3.ll new file mode 100644 index 000000000000..cdb77ca3ea30 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/inline-asm-stack-realign3.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=x86 < %s | FileCheck %s + +declare void @bar(i32* %junk) + +define i32 @foo(i1 %cond) { +entry: + %r = alloca i32, align 128 + store i32 -1, i32* %r, align 128 + br i1 %cond, label %doit, label %skip + +doit: + call void asm sideeffect "xor %ecx, %ecx\0A\09mov %ecx, $0", "=*m,~{ecx},~{flags}"(i32* %r) + %junk = alloca i32 + call void @bar(i32* %junk) + br label %skip + +skip: + %0 = load i32* %r, align 128 + ret i32 %0 +} + +; CHECK-LABEL: foo: +; CHECK: pushl %ebp +; CHECK: andl $-128, %esp +; CHECK: xor %ecx, %ecx +; CHECK-NEXT: mov %ecx, (%esi) +; CHECK: movl (%esi), %eax +; CHECK: popl %ebp +; CHECK: ret diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/inline-asm.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/inline-asm.ll index d201ebdc85d1..f12c2600fff3 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/inline-asm.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/inline-asm.ll @@ -59,3 +59,18 @@ entry: %asm = tail call i32 asm sideeffect "", "={ax},i,~{eax},~{flags},~{rax}"(i64 61) nounwind ret i32 %asm } + +@test8_v = global i32 42 + +define void @test8() { + call void asm sideeffect "${0:P}", "i"( i32* @test8_v ) + ret void +} + +define void @test9() { + call void asm sideeffect "${0:P}", "X"( i8* blockaddress(@test9, %bb) ) + br label %bb + +bb: + ret void +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/ins_split_regalloc.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/ins_split_regalloc.ll new file mode 100644 index 000000000000..f5c5254fcec3 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/ins_split_regalloc.ll @@ -0,0 +1,33 @@ +; RUN: llc -O1 -regalloc=greedy -mtriple=x86_64-apple-macosx -march x86-64 < %s -o - | FileCheck %s +; Check that last chance split (RAGreedy::tryInstructonSplit) just split +; when this is beneficial, otherwise we end up with uncoalesced copies. +; + +target datalayout = "e-i64:64-f80:128-s:64-n8:16:32:64-S128" + +@f = external constant void (i32)* + +; CHECK-LABEL: test: +; Get the address of f in the GOT. +; CHECK: movq _f@{{[^,]+}}, [[F_ENTRY_ADDR:%[a-z0-9]+]] +; Read the actual address of f. +; CHECK: movq ([[F_ENTRY_ADDR]]), [[F_ADDR:%[a-z0-9]+]] +; Check that we do not have useless split points before each call. +; CHECK-NOT: movq +; CHECK: callq *[[F_ADDR]] +; Check that we do not have useless split points before each call. +; CHECK-NOT: movq +; CHECK: callq *[[F_ADDR]] +; Last call is a tail call, thus the address of the function cannot use +; a callee saved register. +; CHECK: movq [[F_ADDR]], [[F_ADDR_TC:%[a-z0-9]+]] +; CHECK: popq [[F_ADDR]] +; CHECK: jmpq *[[F_ADDR_TC]] +define void @test(i32 %a, i32 %b, i32 %c) { +entry: + %fct_f = load void (i32)** @f, align 8 + tail call void %fct_f(i32 %a) + tail call void %fct_f(i32 %b) + tail call void %fct_f(i32 %c) + ret void +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/isint.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/isint.ll index 4a98e63f38fc..96c89605b6ae 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/isint.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/isint.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s +; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn| FileCheck %s define i32 @isint_return(double %d) nounwind { ; CHECK-NOT: xor @@ -9,7 +9,7 @@ define i32 @isint_return(double %d) nounwind { ; CHECK: cmpeqsd %c = fcmp oeq double %d, %e ; CHECK-NEXT: movd -; CHECK-NEXT: andl +; CHECK-NEXT: andq %z = zext i1 %c to i32 ret i32 %z } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/legalize-shift-64.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/legalize-shift-64.ll index 77364688afe4..64460bb91186 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/legalize-shift-64.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/legalize-shift-64.ll @@ -64,3 +64,31 @@ define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) { ; CHECK: shl ; CHECK: shldl } + +; PR16108 +define i32 @test6() { + %x = alloca i32, align 4 + %t = alloca i64, align 8 + store i32 1, i32* %x, align 4 + store i64 1, i64* %t, align 8 ;; DEAD + %load = load i32* %x, align 4 + %shl = shl i32 %load, 8 + %add = add i32 %shl, -224 + %sh_prom = zext i32 %add to i64 + %shl1 = shl i64 1, %sh_prom + %cmp = icmp ne i64 %shl1, 4294967296 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + ret i32 1 + +if.end: ; preds = %entry + ret i32 0 + +; CHECK-LABEL: test6: +; CHECK-NOT: andb $31 +; CHECK: sete +; CHECK: movzbl +; CHECK: xorl $1 +; CHECK: orl +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/linker-private.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/linker-private.ll new file mode 100644 index 000000000000..ecea34235df0 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/linker-private.ll @@ -0,0 +1,10 @@ +; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck --check-prefix=ELF %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=MACHO %s + +@foo = linker_private global i32 42 +;ELF: {{^}}.Lfoo: +;MACHO: {{^}}l_foo: + +define i32* @f() { + ret i32* @foo +} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/memcmp.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/memcmp.ll index cb0797d3eb33..0a534926c6cd 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/memcmp.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/memcmp.ll @@ -22,8 +22,9 @@ bb: ; preds = %entry return: ; preds = %entry ret void ; CHECK-LABEL: memcmp2: -; CHECK: movw ([[A0:%rdi|%rcx]]), %ax -; CHECK: cmpw ([[A1:%rsi|%rdx]]), %ax +; CHECK: movzwl +; CHECK-NEXT: movzwl +; CHECK-NEXT: cmpl ; NOBUILTIN-LABEL: memcmp2: ; NOBUILTIN: callq } @@ -41,7 +42,8 @@ bb: ; preds = %entry return: ; preds = %entry ret void ; CHECK-LABEL: memcmp2a: -; CHECK: cmpw $28527, ([[A0]]) +; CHECK: movzwl +; CHECK-NEXT: cmpl $28527, } @@ -58,8 +60,8 @@ bb: ; preds = %entry return: ; preds = %entry ret void ; CHECK-LABEL: memcmp4: -; CHECK: movl ([[A0]]), %eax -; CHECK: cmpl ([[A1]]), %eax +; CHECK: movl +; CHECK-NEXT: cmpl } define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind { @@ -75,7 +77,7 @@ bb: ; preds = %entry return: ; preds = %entry ret void ; CHECK-LABEL: memcmp4a: -; CHECK: cmpl $1869573999, ([[A0]]) +; CHECK: cmpl $1869573999, } define void @memcmp8(i8* %X, i8* %Y, i32* nocapture %P) nounwind { @@ -91,8 +93,8 @@ bb: ; preds = %entry return: ; preds = %entry ret void ; CHECK-LABEL: memcmp8: -; CHECK: movq ([[A0]]), %rax -; CHECK: cmpq ([[A1]]), %rax +; CHECK: movq +; CHECK: cmpq } define void @memcmp8a(i8* %X, i32* nocapture %P) nounwind { @@ -108,7 +110,7 @@ bb: ; preds = %entry return: ; preds = %entry ret void ; CHECK-LABEL: memcmp8a: -; CHECK: movabsq $8029759185026510694, %rax -; CHECK: cmpq %rax, ([[A0]]) +; CHECK: movabsq $8029759185026510694, +; CHECK: cmpq } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/memset-2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/memset-2.ll index d0a3c7a74bce..a87ef2e15a5a 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/memset-2.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/memset-2.ll @@ -5,7 +5,7 @@ declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind define fastcc void @t1() nounwind { entry: ; CHECK-LABEL: t1: -; CHECK: calll _memset +; CHECK: calll L_memset$stub call void @llvm.memset.p0i8.i32(i8* null, i8 0, i32 188, i32 1, i1 false) unreachable } @@ -13,7 +13,7 @@ entry: define fastcc void @t2(i8 signext %c) nounwind { entry: ; CHECK-LABEL: t2: -; CHECK: calll _memset +; CHECK: calll L_memset$stub call void @llvm.memset.p0i8.i32(i8* undef, i8 %c, i32 76, i32 1, i1 false) unreachable } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/ms-inline-asm.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/ms-inline-asm.ll index 5e7ba37b39c0..436d34a11558 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/ms-inline-asm.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/ms-inline-asm.ll @@ -5,7 +5,6 @@ entry: %0 = tail call i32 asm sideeffect inteldialect "mov eax, $1\0A\09mov $0, eax", "=r,r,~{eax},~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind ret i32 %0 ; CHECK: t1 -; CHECK: movl %esp, %ebp ; CHECK: {{## InlineAsm Start|#APP}} ; CHECK: .intel_syntax ; CHECK: mov eax, ecx @@ -19,7 +18,6 @@ entry: call void asm sideeffect inteldialect "mov eax, $$1", "~{eax},~{dirflag},~{fpsr},~{flags}"() nounwind ret void ; CHECK: t2 -; CHECK: movl %esp, %ebp ; CHECK: {{## InlineAsm Start|#APP}} ; CHECK: .intel_syntax ; CHECK: mov eax, 1 @@ -34,7 +32,6 @@ entry: call void asm sideeffect inteldialect "mov eax, DWORD PTR [$0]", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %V.addr) nounwind ret void ; CHECK: t3 -; CHECK: movl %esp, %ebp ; CHECK: {{## InlineAsm Start|#APP}} ; CHECK: .intel_syntax ; CHECK: mov eax, DWORD PTR {{[[esp]}} @@ -56,7 +53,6 @@ entry: %0 = load i32* %b1, align 4 ret i32 %0 ; CHECK: t18 -; CHECK: movl %esp, %ebp ; CHECK: {{## InlineAsm Start|#APP}} ; CHECK: .intel_syntax ; CHECK: lea ebx, foo @@ -76,7 +72,6 @@ entry: call void asm sideeffect inteldialect "call $0", "r,~{dirflag},~{fpsr},~{flags}"(void ()* @t19_helper) nounwind ret void ; CHECK-LABEL: t19: -; CHECK: movl %esp, %ebp ; CHECK: movl ${{_?}}t19_helper, %eax ; CHECK: {{## InlineAsm Start|#APP}} ; CHECK: .intel_syntax @@ -95,7 +90,6 @@ entry: %0 = load i32** %res, align 4 ret i32* %0 ; CHECK-LABEL: t30: -; CHECK: movl %esp, %ebp ; CHECK: {{## InlineAsm Start|#APP}} ; CHECK: .intel_syntax ; CHECK: lea edi, dword ptr [{{_?}}results] @@ -103,8 +97,31 @@ entry: ; CHECK: {{## InlineAsm End|#NO_APP}} ; CHECK: {{## InlineAsm Start|#APP}} ; CHECK: .intel_syntax -; CHECK: mov dword ptr [esi], edi +; CHECK: mov dword ptr [esp], edi ; CHECK: .att_syntax ; CHECK: {{## InlineAsm End|#NO_APP}} -; CHECK: movl (%esi), %eax +; CHECK: movl (%esp), %eax +} + +; Stack realignment plus MS inline asm that does *not* adjust the stack is no +; longer an error. + +define i32 @t31() { +entry: + %val = alloca i32, align 64 + store i32 -1, i32* %val, align 64 + call void asm sideeffect inteldialect "mov dword ptr $0, esp", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %val) #1 + %sp = load i32* %val, align 64 + ret i32 %sp +; CHECK-LABEL: t31: +; CHECK: pushl %ebp +; CHECK: movl %esp, %ebp +; CHECK: andl $-64, %esp +; CHECK: {{## InlineAsm Start|#APP}} +; CHECK: .intel_syntax +; CHECK: mov dword ptr [esp], esp +; CHECK: .att_syntax +; CHECK: {{## InlineAsm End|#NO_APP}} +; CHECK: movl (%esp), %eax +; CHECK: ret } diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/patchpoint.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/patchpoint.ll index d534639953b3..62b12732ded4 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/patchpoint.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/patchpoint.ll @@ -7,16 +7,16 @@ entry: ; CHECK-LABEL: trivial_patchpoint_codegen: ; CHECK: movabsq $-559038736, %r11 ; CHECK-NEXT: callq *%r11 -; CHECK-NEXT: nop +; CHECK-NEXT: xchgw %ax, %ax ; CHECK: movq %rax, %[[REG:r.+]] ; CHECK: callq *%r11 -; CHECK-NEXT: nop +; CHECK-NEXT: xchgw %ax, %ax ; CHECK: movq %[[REG]], %rax ; CHECK: ret %resolveCall2 = inttoptr i64 -559038736 to i8* - %result = tail call i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 2, i32 15, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4) + %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 15, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4) %resolveCall3 = inttoptr i64 -559038737 to i8* - tail call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 3, i32 15, i8* %resolveCall3, i32 2, i64 %p1, i64 %result) + tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 3, i32 15, i8* %resolveCall3, i32 2, i64 %p1, i64 %result) ret i64 %result } @@ -34,31 +34,65 @@ entry: store i64 11, i64* %metadata store i64 12, i64* %metadata store i64 13, i64* %metadata - call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 4, i32 0, i64* %metadata) + call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata) ret void } ; Test the webkit_jscc calling convention. -; Two arguments will be pushed on the stack. +; One argument will be passed in register, the other will be pushed on the stack. ; Return value in $rax. define void @jscall_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) { entry: ; CHECK-LABEL: jscall_patchpoint_codegen: ; CHECK: Ltmp -; CHECK: movq %r{{.+}}, 8(%rsp) ; CHECK: movq %r{{.+}}, (%rsp) +; CHECK: movq %r{{.+}}, %rax ; CHECK: Ltmp ; CHECK-NEXT: movabsq $-559038736, %r11 ; CHECK-NEXT: callq *%r11 -; CHECK: movq %rax, 8(%rsp) +; CHECK: movq %rax, (%rsp) ; CHECK: callq %resolveCall2 = inttoptr i64 -559038736 to i8* - %result = tail call webkit_jscc i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 5, i32 15, i8* %resolveCall2, i32 2, i64 %p1, i64 %p2) + %result = tail call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 15, i8* %resolveCall2, i32 2, i64 %p4, i64 %p2) %resolveCall3 = inttoptr i64 -559038737 to i8* - tail call webkit_jscc void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 6, i32 15, i8* %resolveCall3, i32 2, i64 %p1, i64 %result) + tail call webkit_jscc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 15, i8* %resolveCall3, i32 2, i64 %p4, i64 %result) ret void } +; Test if the arguments are properly aligned and that we don't store undef arguments. +define i64 @jscall_patchpoint_codegen2(i64 %callee) { +entry: +; CHECK-LABEL: jscall_patchpoint_codegen2: +; CHECK: Ltmp +; CHECK: movq $6, 24(%rsp) +; CHECK-NEXT: movl $4, 16(%rsp) +; CHECK-NEXT: movq $2, (%rsp) +; CHECK: Ltmp +; CHECK-NEXT: movabsq $-559038736, %r11 +; CHECK-NEXT: callq *%r11 + %call = inttoptr i64 -559038736 to i8* + %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 15, i8* %call, i32 6, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6) + ret i64 %result +} + +; Test if the arguments are properly aligned and that we don't store undef arguments. +define i64 @jscall_patchpoint_codegen3(i64 %callee) { +entry: +; CHECK-LABEL: jscall_patchpoint_codegen3: +; CHECK: Ltmp +; CHECK: movq $10, 48(%rsp) +; CHECK-NEXT: movl $8, 36(%rsp) +; CHECK-NEXT: movq $6, 24(%rsp) +; CHECK-NEXT: movl $4, 16(%rsp) +; CHECK-NEXT: movq $2, (%rsp) +; CHECK: Ltmp +; CHECK-NEXT: movabsq $-559038736, %r11 +; CHECK-NEXT: callq *%r11 + %call = inttoptr i64 -559038736 to i8* + %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 15, i8* %call, i32 10, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6, i32 undef, i32 8, i32 undef, i64 10) + ret i64 %result +} + ; Test patchpoints reusing the same TargetConstant. ; Assertion failed: (CI.getNumArgOperands() >= NumArgs + 4) ; There is no way to verify this, since it depends on memory allocation. @@ -68,14 +102,14 @@ entry: %tmp80 = add i64 %tmp79, -16 %tmp81 = inttoptr i64 %tmp80 to i64* %tmp82 = load i64* %tmp81, align 8 - tail call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 14, i32 5, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82) - tail call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 15, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 5, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82) + tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 15, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82) %tmp83 = load i64* %tmp33, align 8 %tmp84 = add i64 %tmp83, -24 %tmp85 = inttoptr i64 %tmp84 to i64* %tmp86 = load i64* %tmp85, align 8 - tail call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 17, i32 5, i64 %arg, i64 %tmp10, i64 %tmp86) - tail call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 18, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 17, i32 5, i64 %arg, i64 %tmp10, i64 %tmp86) + tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 18, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86) ret i64 10 } @@ -84,17 +118,13 @@ define void @small_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) { entry: ; CHECK-LABEL: small_patchpoint_codegen: ; CHECK: Ltmp -; CHECK: nop -; CHECK-NEXT: nop -; CHECK-NEXT: nop -; CHECK-NEXT: nop -; CHECK-NEXT: nop +; CHECK: nopl 8(%rax,%rax) ; CHECK-NEXT: popq ; CHECK-NEXT: ret - %result = tail call i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 5, i32 5, i8* null, i32 2, i64 %p1, i64 %p2) + %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 5, i8* null, i32 2, i64 %p1, i64 %p2) ret void } -declare void @llvm.experimental.stackmap(i32, i32, ...) -declare void @llvm.experimental.patchpoint.void(i32, i32, i8*, i32, ...) -declare i64 @llvm.experimental.patchpoint.i64(i32, i32, i8*, i32, ...) +declare void @llvm.experimental.stackmap(i64, i32, ...) +declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...) +declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/pic.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/pic.ll index 7bb127eae930..da1e2248065f 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/pic.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/pic.ll @@ -192,7 +192,8 @@ bb12: ; LINUX: .LJTI7_0@GOTOFF( ; LINUX: jmpl * -; LINUX: .LJTI7_0: +; LINUX: .align 4 +; LINUX-NEXT: .LJTI7_0: ; LINUX: .long .LBB7_2@GOTOFF ; LINUX: .long .LBB7_8@GOTOFF ; LINUX: .long .LBB7_14@GOTOFF diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/pr17631.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/pr17631.ll index a572ff2e3b3b..98f951f1b10c 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/pr17631.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/pr17631.ll @@ -1,16 +1,16 @@ ; RUN: llc < %s -mcpu=core-avx-i -mtriple=i386-pc-win32 | FileCheck %s - + %struct_type = type { [64 x <8 x float>], <8 x float> } - + ; Function Attrs: nounwind readnone declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) - + ; Function Attrs: nounwind define i32 @equal(<8 x i32> %A) { allocas: %first_alloc = alloca [64 x <8 x i32>] %second_alloc = alloca %struct_type - + %A1 = bitcast <8 x i32> %A to <8 x float> %A2 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %A1) ret i32 %A2 @@ -20,3 +20,15 @@ allocas: ; CHECK-NOT: vzeroupper ; CHECK: _chkstk ; CHECK: ret + +define <8 x float> @foo(<8 x float> %y, i64* %p, double %x) { + %i = fptoui double %x to i64 + store i64 %i, i64* %p + %ret = fadd <8 x float> %y, %y + ret <8 x float> %ret +} + +; CHECK: foo +; CHECK-NOT: vzeroupper +; CHECK: _ftol2 +; CHECK: ret diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/pr18162.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/pr18162.ll new file mode 100644 index 000000000000..523e47db5eee --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/pr18162.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s + +; Make sure we are not crashing on this one. + +target triple = "x86_64-unknown-linux-gnu" + +%"Iterator" = type { i32* } + +declare { i64, <2 x float> } @Call() +declare { i64, <2 x float> }* @CallPtr() + +define { i64, <2 x float> } @Foo(%"Iterator"* %this) { +entry: + %retval = alloca i32 + %this.addr = alloca %"Iterator"* + %this1 = load %"Iterator"** %this.addr + %bundle_ = getelementptr inbounds %"Iterator"* %this1, i32 0, i32 0 + %0 = load i32** %bundle_ + %1 = call { i64, <2 x float> } @Call() + %2 = call { i64, <2 x float> }* @CallPtr() + %3 = getelementptr { i64, <2 x float> }* %2, i32 0, i32 1 + %4 = extractvalue { i64, <2 x float> } %1, 1 + store <2 x float> %4, <2 x float>* %3 + %5 = load { i64, <2 x float> }* %2 + ret { i64, <2 x float> } %5 +} + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/rot16.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/rot16.ll index 0293f4e21123..6d7c702afc40 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/rot16.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/rot16.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | FileCheck %s +; RUN: llc < %s -march=x86 -mcpu=generic | FileCheck %s define i16 @foo(i16 %x, i16 %y, i16 %z) nounwind readnone { entry: diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/shift-double.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/shift-double.ll index 8d2b2907c5a7..fd4ba81d47c1 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/shift-double.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/shift-double.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \ +; RUN: llc < %s -march=x86 -mcpu=generic -x86-asm-syntax=intel | \ ; RUN: grep "sh[lr]d" | count 5 define i64 @test1(i64 %X, i8 %C) { diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/shrink-compare.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/shrink-compare.ll index bb892011e2d6..fc7ee061f35d 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/shrink-compare.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/shrink-compare.ll @@ -2,7 +2,7 @@ declare void @bar() -define void @test1(i32* nocapture %X) nounwind { +define void @test1(i32* nocapture %X) nounwind minsize { entry: %tmp1 = load i32* %X, align 4 %and = and i32 %tmp1, 255 @@ -19,7 +19,7 @@ if.end: ; CHECK: cmpb $47, (%{{rdi|rcx}}) } -define void @test2(i32 %X) nounwind { +define void @test2(i32 %X) nounwind minsize { entry: %and = and i32 %X, 255 %cmp = icmp eq i32 %and, 47 @@ -35,7 +35,7 @@ if.end: ; CHECK: cmpb $47, %{{dil|cl}} } -define void @test3(i32 %X) nounwind { +define void @test3(i32 %X) nounwind minsize { entry: %and = and i32 %X, 255 %cmp = icmp eq i32 %and, 255 @@ -70,7 +70,7 @@ lor.end: ; preds = %lor.rhs, %entry @x = global { i8, i8, i8, i8, i8, i8, i8, i8 } { i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 1 }, align 4 ; PR16551 -define void @test5(i32 %X) nounwind { +define void @test5(i32 %X) nounwind minsize { entry: %bf.load = load i56* bitcast ({ i8, i8, i8, i8, i8, i8, i8, i8 }* @x to i56*), align 4 %bf.lshr = lshr i56 %bf.load, 32 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/sibcall-5.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/sibcall-5.ll index c479030508a9..c04af234b131 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/sibcall-5.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/sibcall-5.ll @@ -8,7 +8,7 @@ define double @foo(double %a) nounwind readonly ssp { entry: ; X32-LABEL: foo: -; X32: jmp _sin$stub +; X32: jmp L_sin$stub ; X64-LABEL: foo: ; X64: jmp _sin @@ -18,7 +18,7 @@ entry: define float @bar(float %a) nounwind readonly ssp { ; X32-LABEL: bar: -; X32: jmp _sinf$stub +; X32: jmp L_sinf$stub ; X64-LABEL: bar: ; X64: jmp _sinf @@ -27,6 +27,11 @@ entry: ret float %0 } +; X32-LABEL: L_sin$stub: +; X32-NEXT: .indirect_symbol _sin +; X32-LABEL: L_sinf$stub: +; X32-NEXT: .indirect_symbol _sinf + declare float @sinf(float) nounwind readonly declare double @sin(double) nounwind readonly diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/sse-scalar-fp-arith-2.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/sse-scalar-fp-arith-2.ll new file mode 100644 index 000000000000..59685993f5d4 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/sse-scalar-fp-arith-2.ll @@ -0,0 +1,215 @@ +; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck -check-prefix=CHECK -check-prefix=SSE2 %s +; RUN: llc -mtriple=x86_64-pc-linux -mattr=-sse4.1 -mcpu=corei7 < %s | FileCheck -check-prefix=CHECK -check-prefix=SSE2 %s +; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7-avx < %s | FileCheck -check-prefix=CHECK -check-prefix=AVX %s + +; Ensure that the backend selects SSE/AVX scalar fp instructions +; from a packed fp instrution plus a vector insert. + + +define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) { + %1 = fadd <4 x float> %a, %b + %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> + ret <4 x float> %2 +} + +; CHECK-LABEL: test_add_ss +; SSE2: addss %xmm1, %xmm0 +; AVX: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-NOT: movss +; CHECK: ret + + +define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) { + %1 = fsub <4 x float> %a, %b + %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> + ret <4 x float> %2 +} + +; CHECK-LABEL: test_sub_ss +; SSE2: subss %xmm1, %xmm0 +; AVX: vsubss %xmm1, %xmm0, %xmm0 +; CHECK-NOT: movss +; CHECK: ret + + +define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) { + %1 = fmul <4 x float> %a, %b + %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> + ret <4 x float> %2 +} + +; CHECK-LABEL: test_mul_ss +; SSE2: mulss %xmm1, %xmm0 +; AVX: vmulss %xmm1, %xmm0, %xmm0 +; CHECK-NOT: movss +; CHECK: ret + + +define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) { + %1 = fdiv <4 x float> %a, %b + %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> + ret <4 x float> %2 +} + +; CHECK-LABEL: test_div_ss +; SSE2: divss %xmm1, %xmm0 +; AVX: vdivss %xmm1, %xmm0, %xmm0 +; CHECK-NOT: movss +; CHECK: ret + + +define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) { + %1 = fadd <2 x double> %a, %b + %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> + ret <2 x double> %2 +} + +; CHECK-LABEL: test_add_sd +; SSE2: addsd %xmm1, %xmm0 +; AVX: vaddsd %xmm1, %xmm0, %xmm0 +; CHECK-NOT: movsd +; CHECK: ret + + +define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) { + %1 = fsub <2 x double> %a, %b + %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> + ret <2 x double> %2 +} + +; CHECK-LABEL: test_sub_sd +; SSE2: subsd %xmm1, %xmm0 +; AVX: vsubsd %xmm1, %xmm0, %xmm0 +; CHECK-NOT: movsd +; CHECK: ret + + +define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) { + %1 = fmul <2 x double> %a, %b + %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> + ret <2 x double> %2 +} + +; CHECK-LABEL: test_mul_sd +; SSE2: mulsd %xmm1, %xmm0 +; AVX: vmulsd %xmm1, %xmm0, %xmm0 +; CHECK-NOT: movsd +; CHECK: ret + + +define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) { + %1 = fdiv <2 x double> %a, %b + %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> + ret <2 x double> %2 +} + +; CHECK-LABEL: test_div_sd +; SSE2: divsd %xmm1, %xmm0 +; AVX: vdivsd %xmm1, %xmm0, %xmm0 +; CHECK-NOT: movsd +; CHECK: ret + + +define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) { + %1 = fadd <4 x float> %b, %a + %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> + ret <4 x float> %2 +} + +; CHECK-LABEL: test2_add_ss +; SSE2: addss %xmm0, %xmm1 +; AVX: vaddss %xmm0, %xmm1, %xmm0 +; CHECK-NOT: movss +; CHECK: ret + + +define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) { + %1 = fsub <4 x float> %b, %a + %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> + ret <4 x float> %2 +} + +; CHECK-LABEL: test2_sub_ss +; SSE2: subss %xmm0, %xmm1 +; AVX: vsubss %xmm0, %xmm1, %xmm0 +; CHECK-NOT: movss +; CHECK: ret + + +define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) { + %1 = fmul <4 x float> %b, %a + %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> + ret <4 x float> %2 +} + +; CHECK-LABEL: test2_mul_ss +; SSE2: mulss %xmm0, %xmm1 +; AVX: vmulss %xmm0, %xmm1, %xmm0 +; CHECK-NOT: movss +; CHECK: ret + + +define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) { + %1 = fdiv <4 x float> %b, %a + %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> + ret <4 x float> %2 +} + +; CHECK-LABEL: test2_div_ss +; SSE2: divss %xmm0, %xmm1 +; AVX: vdivss %xmm0, %xmm1, %xmm0 +; CHECK-NOT: movss +; CHECK: ret + + +define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) { + %1 = fadd <2 x double> %b, %a + %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> + ret <2 x double> %2 +} + +; CHECK-LABEL: test2_add_sd +; SSE2: addsd %xmm0, %xmm1 +; AVX: vaddsd %xmm0, %xmm1, %xmm0 +; CHECK-NOT: movsd +; CHECK: ret + + +define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) { + %1 = fsub <2 x double> %b, %a + %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> + ret <2 x double> %2 +} + +; CHECK-LABEL: test2_sub_sd +; SSE2: subsd %xmm0, %xmm1 +; AVX: vsubsd %xmm0, %xmm1, %xmm0 +; CHECK-NOT: movsd +; CHECK: ret + + +define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) { + %1 = fmul <2 x double> %b, %a + %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> + ret <2 x double> %2 +} + +; CHECK-LABEL: test2_mul_sd +; SSE2: mulsd %xmm0, %xmm1 +; AVX: vmulsd %xmm0, %xmm1, %xmm0 +; CHECK-NOT: movsd +; CHECK: ret + + +define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) { + %1 = fdiv <2 x double> %b, %a + %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> + ret <2 x double> %2 +} + +; CHECK-LABEL: test2_div_sd +; SSE2: divsd %xmm0, %xmm1 +; AVX: vdivsd %xmm0, %xmm1, %xmm0 +; CHECK-NOT: movsd +; CHECK: ret + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll new file mode 100644 index 000000000000..3949a835e67a --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll @@ -0,0 +1,310 @@ +; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck -check-prefix=CHECK -check-prefix=SSE2 %s +; RUN: llc -mtriple=x86_64-pc-linux -mattr=-sse4.1 -mcpu=corei7 < %s | FileCheck -check-prefix=CHECK -check-prefix=SSE2 %s +; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7-avx < %s | FileCheck -check-prefix=CHECK -check-prefix=AVX %s + +; Ensure that the backend no longer emits unnecessary vector insert +; instructions immediately after SSE scalar fp instructions +; like addss or mulss. + + +define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) { + %1 = extractelement <4 x float> %b, i32 0 + %2 = extractelement <4 x float> %a, i32 0 + %add = fadd float %2, %1 + %3 = insertelement <4 x float> %a, float %add, i32 0 + ret <4 x float> %3 +} + +; CHECK-LABEL: test_add_ss +; SSE2: addss %xmm1, %xmm0 +; AVX: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-NOT: movss +; CHECK: ret + + +define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) { + %1 = extractelement <4 x float> %b, i32 0 + %2 = extractelement <4 x float> %a, i32 0 + %sub = fsub float %2, %1 + %3 = insertelement <4 x float> %a, float %sub, i32 0 + ret <4 x float> %3 +} + +; CHECK-LABEL: test_sub_ss +; SSE2: subss %xmm1, %xmm0 +; AVX: vsubss %xmm1, %xmm0, %xmm0 +; CHECK-NOT: movss +; CHECK: ret + +define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) { + %1 = extractelement <4 x float> %b, i32 0 + %2 = extractelement <4 x float> %a, i32 0 + %mul = fmul float %2, %1 + %3 = insertelement <4 x float> %a, float %mul, i32 0 + ret <4 x float> %3 +} + +; CHECK-LABEL: test_mul_ss +; SSE2: mulss %xmm1, %xmm0 +; AVX: vmulss %xmm1, %xmm0, %xmm0 +; CHECK-NOT: movss +; CHECK: ret + + +define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) { + %1 = extractelement <4 x float> %b, i32 0 + %2 = extractelement <4 x float> %a, i32 0 + %div = fdiv float %2, %1 + %3 = insertelement <4 x float> %a, float %div, i32 0 + ret <4 x float> %3 +} + +; CHECK-LABEL: test_div_ss +; SSE2: divss %xmm1, %xmm0 +; AVX: vdivss %xmm1, %xmm0, %xmm0 +; CHECK-NOT: movss +; CHECK: ret + + +define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) { + %1 = extractelement <2 x double> %b, i32 0 + %2 = extractelement <2 x double> %a, i32 0 + %add = fadd double %2, %1 + %3 = insertelement <2 x double> %a, double %add, i32 0 + ret <2 x double> %3 +} + +; CHECK-LABEL: test_add_sd +; SSE2: addsd %xmm1, %xmm0 +; AVX: vaddsd %xmm1, %xmm0, %xmm0 +; CHECK-NOT: movsd +; CHECK: ret + + +define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) { + %1 = extractelement <2 x double> %b, i32 0 + %2 = extractelement <2 x double> %a, i32 0 + %sub = fsub double %2, %1 + %3 = insertelement <2 x double> %a, double %sub, i32 0 + ret <2 x double> %3 +} + +; CHECK-LABEL: test_sub_sd +; SSE2: subsd %xmm1, %xmm0 +; AVX: vsubsd %xmm1, %xmm0, %xmm0 +; CHECK-NOT: movsd +; CHECK: ret + + +define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) { + %1 = extractelement <2 x double> %b, i32 0 + %2 = extractelement <2 x double> %a, i32 0 + %mul = fmul double %2, %1 + %3 = insertelement <2 x double> %a, double %mul, i32 0 + ret <2 x double> %3 +} + +; CHECK-LABEL: test_mul_sd +; SSE2: mulsd %xmm1, %xmm0 +; AVX: vmulsd %xmm1, %xmm0, %xmm0 +; CHECK-NOT: movsd +; CHECK: ret + + +define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) { + %1 = extractelement <2 x double> %b, i32 0 + %2 = extractelement <2 x double> %a, i32 0 + %div = fdiv double %2, %1 + %3 = insertelement <2 x double> %a, double %div, i32 0 + ret <2 x double> %3 +} + +; CHECK-LABEL: test_div_sd +; SSE2: divsd %xmm1, %xmm0 +; AVX: vdivsd %xmm1, %xmm0, %xmm0 +; CHECK-NOT: movsd +; CHECK: ret + + +define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) { + %1 = extractelement <4 x float> %a, i32 0 + %2 = extractelement <4 x float> %b, i32 0 + %add = fadd float %1, %2 + %3 = insertelement <4 x float> %b, float %add, i32 0 + ret <4 x float> %3 +} + +; CHECK-LABEL: test2_add_ss +; SSE2: addss %xmm0, %xmm1 +; AVX: vaddss %xmm0, %xmm1, %xmm0 +; CHECK-NOT: movss +; CHECK: ret + + +define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) { + %1 = extractelement <4 x float> %a, i32 0 + %2 = extractelement <4 x float> %b, i32 0 + %sub = fsub float %2, %1 + %3 = insertelement <4 x float> %b, float %sub, i32 0 + ret <4 x float> %3 +} + +; CHECK-LABEL: test2_sub_ss +; SSE2: subss %xmm0, %xmm1 +; AVX: vsubss %xmm0, %xmm1, %xmm0 +; CHECK-NOT: movss +; CHECK: ret + + +define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) { + %1 = extractelement <4 x float> %a, i32 0 + %2 = extractelement <4 x float> %b, i32 0 + %mul = fmul float %1, %2 + %3 = insertelement <4 x float> %b, float %mul, i32 0 + ret <4 x float> %3 +} + +; CHECK-LABEL: test2_mul_ss +; SSE2: mulss %xmm0, %xmm1 +; AVX: vmulss %xmm0, %xmm1, %xmm0 +; CHECK-NOT: movss +; CHECK: ret + + +define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) { + %1 = extractelement <4 x float> %a, i32 0 + %2 = extractelement <4 x float> %b, i32 0 + %div = fdiv float %2, %1 + %3 = insertelement <4 x float> %b, float %div, i32 0 + ret <4 x float> %3 +} + +; CHECK-LABEL: test2_div_ss +; SSE2: divss %xmm0, %xmm1 +; AVX: vdivss %xmm0, %xmm1, %xmm0 +; CHECK-NOT: movss +; CHECK: ret + + +define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) { + %1 = extractelement <2 x double> %a, i32 0 + %2 = extractelement <2 x double> %b, i32 0 + %add = fadd double %1, %2 + %3 = insertelement <2 x double> %b, double %add, i32 0 + ret <2 x double> %3 +} + +; CHECK-LABEL: test2_add_sd +; SSE2: addsd %xmm0, %xmm1 +; AVX: vaddsd %xmm0, %xmm1, %xmm0 +; CHECK-NOT: movsd +; CHECK: ret + + +define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) { + %1 = extractelement <2 x double> %a, i32 0 + %2 = extractelement <2 x double> %b, i32 0 + %sub = fsub double %2, %1 + %3 = insertelement <2 x double> %b, double %sub, i32 0 + ret <2 x double> %3 +} + +; CHECK-LABEL: test2_sub_sd +; SSE2: subsd %xmm0, %xmm1 +; AVX: vsubsd %xmm0, %xmm1, %xmm0 +; CHECK-NOT: movsd +; CHECK: ret + + +define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) { + %1 = extractelement <2 x double> %a, i32 0 + %2 = extractelement <2 x double> %b, i32 0 + %mul = fmul double %1, %2 + %3 = insertelement <2 x double> %b, double %mul, i32 0 + ret <2 x double> %3 +} + +; CHECK-LABEL: test2_mul_sd +; SSE2: mulsd %xmm0, %xmm1 +; AVX: vmulsd %xmm0, %xmm1, %xmm0 +; CHECK-NOT: movsd +; CHECK: ret + + +define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) { + %1 = extractelement <2 x double> %a, i32 0 + %2 = extractelement <2 x double> %b, i32 0 + %div = fdiv double %2, %1 + %3 = insertelement <2 x double> %b, double %div, i32 0 + ret <2 x double> %3 +} + +; CHECK-LABEL: test2_div_sd +; SSE2: divsd %xmm0, %xmm1 +; AVX: vdivsd %xmm0, %xmm1, %xmm0 +; CHECK-NOT: movsd +; CHECK: ret + + +define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) { + %1 = extractelement <4 x float> %b, i32 0 + %2 = extractelement <4 x float> %a, i32 0 + %add = fadd float %2, %1 + %add2 = fadd float %2, %add + %3 = insertelement <4 x float> %a, float %add2, i32 0 + ret <4 x float> %3 +} + +; CHECK-LABEL: test_multiple_add_ss +; CHECK: addss +; CHECK: addss +; CHECK-NOT: movss +; CHECK: ret + + +define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) { + %1 = extractelement <4 x float> %b, i32 0 + %2 = extractelement <4 x float> %a, i32 0 + %sub = fsub float %2, %1 + %sub2 = fsub float %2, %sub + %3 = insertelement <4 x float> %a, float %sub2, i32 0 + ret <4 x float> %3 +} + +; CHECK-LABEL: test_multiple_sub_ss +; CHECK: subss +; CHECK: subss +; CHECK-NOT: movss +; CHECK: ret + + +define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) { + %1 = extractelement <4 x float> %b, i32 0 + %2 = extractelement <4 x float> %a, i32 0 + %mul = fmul float %2, %1 + %mul2 = fmul float %2, %mul + %3 = insertelement <4 x float> %a, float %mul2, i32 0 + ret <4 x float> %3 +} + +; CHECK-LABEL: test_multiple_mul_ss +; CHECK: mulss +; CHECK: mulss +; CHECK-NOT: movss +; CHECK: ret + +define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) { + %1 = extractelement <4 x float> %b, i32 0 + %2 = extractelement <4 x float> %a, i32 0 + %div = fdiv float %2, %1 + %div2 = fdiv float %2, %div + %3 = insertelement <4 x float> %a, float %div2, i32 0 + ret <4 x float> %3 +} + +; CHECK-LABEL: test_multiple_div_ss +; CHECK: divss +; CHECK: divss +; CHECK-NOT: movss +; CHECK: ret + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/sse2-blend.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/sse2-blend.ll index 1ac983254eaf..968595c383af 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/sse2-blend.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/sse2-blend.ll @@ -1,9 +1,9 @@ ; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse4.1 | FileCheck %s ; CHECK: vsel_float -; CHECK: pandn -; CHECK: pand -; CHECK: por +; CHECK: xorps +; CHECK: movss +; CHECK: orps ; CHECK: ret define void@vsel_float(<4 x float>* %v1, <4 x float>* %v2) { %A = load <4 x float>* %v1 @@ -14,9 +14,9 @@ define void@vsel_float(<4 x float>* %v1, <4 x float>* %v2) { } ; CHECK: vsel_i32 -; CHECK: pandn -; CHECK: pand -; CHECK: por +; CHECK: xorps +; CHECK: movss +; CHECK: orps ; CHECK: ret define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) { %A = load <4 x i32>* %v1 diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll index ff6c10bfe5a8..cfc892daac21 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -710,3 +710,10 @@ define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) { ret i32 %res } declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone + +define void @test_x86_sse2_pause() { + ; CHECK: pause + tail call void @llvm.x86.sse2.pause() + ret void +} +declare void @llvm.x86.sse2.pause() nounwind diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/ssp-data-layout.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/ssp-data-layout.ll new file mode 100644 index 000000000000..72194af96f14 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/ssp-data-layout.ll @@ -0,0 +1,238 @@ +; RUN: llc < %s -disable-fp-elim -mtriple=x86_64-pc-linux-gnu -mcpu=corei7 -o - | FileCheck %s +; This test is fairly fragile. The goal is to ensure that "large" stack +; objects are allocated closest to the stack protector (i.e., farthest away +; from the Stack Pointer.) In standard SSP mode this means that large (>= +; ssp-buffer-size) arrays and structures containing such arrays are +; closet to the protector. With sspstrong and sspreq this means large +; arrays/structures-with-arrays are closest, followed by small (< ssp-buffer-size) +; arrays/structures-with-arrays, and then addr-taken variables. +; +; Ideally, we only want verify that the objects appear in the correct groups +; and that the groups have the correct relative stack offset. The ordering +; within a group is not relevant to this test. Unfortunately, there is not +; an elegant way to do this, so just match the offset for each object. +; RUN: llc < %s -disable-fp-elim -mtriple=x86_64-unknown-unknown -O0 -mcpu=corei7 -o - \ +; RUN: | FileCheck --check-prefix=FAST-NON-LIN %s +; FastISel was not setting the StackProtectorIndex when lowering +; Intrinsic::stackprotector and as a result the stack re-arrangement code was +; never applied. This problem only shows up on non-Linux platforms because on +; Linux the stack protector cookie is loaded from a special address space which +; always triggers standard ISel. Run a basic test to ensure that at -O0 +; on a non-linux target the data layout rules are triggered. + +%struct.struct_large_char = type { [8 x i8] } +%struct.struct_large_char2 = type { [2 x i8], [8 x i8] } +%struct.struct_small_char = type { [2 x i8] } +%struct.struct_large_nonchar = type { [8 x i32] } +%struct.struct_small_nonchar = type { [2 x i16] } + +define void @layout_ssp() ssp { +entry: +; Expected stack layout for ssp is +; -16 large_char . Group 1, nested arrays, arrays >= ssp-buffer-size +; -24 struct_large_char . +; -28 scalar1 | Everything else +; -32 scalar2 +; -36 scalar3 +; -40 addr-of +; -44 small_nonchar +; -80 large_nonchar +; -82 small_char +; -88 struct_small_char +; -120 struct_large_nonchar +; -128 struct_small_nonchar + +; CHECK: layout_ssp: +; CHECK: call{{l|q}} get_scalar1 +; CHECK: movl %eax, -28( +; CHECK: call{{l|q}} end_scalar1 + +; CHECK: call{{l|q}} get_scalar2 +; CHECK: movl %eax, -32( +; CHECK: call{{l|q}} end_scalar2 + +; CHECK: call{{l|q}} get_scalar3 +; CHECK: movl %eax, -36( +; CHECK: call{{l|q}} end_scalar3 + +; CHECK: call{{l|q}} get_addrof +; CHECK: movl %eax, -40( +; CHECK: call{{l|q}} end_addrof + +; CHECK: get_small_nonchar +; CHECK: movw %ax, -44( +; CHECK: call{{l|q}} end_small_nonchar + +; CHECK: call{{l|q}} get_large_nonchar +; CHECK: movl %eax, -80( +; CHECK: call{{l|q}} end_large_nonchar + +; CHECK: call{{l|q}} get_small_char +; CHECK: movb %al, -82( +; CHECK: call{{l|q}} end_small_char + +; CHECK: call{{l|q}} get_large_char +; CHECK: movb %al, -16( +; CHECK: call{{l|q}} end_large_char + +; CHECK: call{{l|q}} get_struct_large_char +; CHECK: movb %al, -24( +; CHECK: call{{l|q}} end_struct_large_char + +; CHECK: call{{l|q}} get_struct_small_char +; CHECK: movb %al, -88( +; CHECK: call{{l|q}} end_struct_small_char + +; CHECK: call{{l|q}} get_struct_large_nonchar +; CHECK: movl %eax, -120( +; CHECK: call{{l|q}} end_struct_large_nonchar + +; CHECK: call{{l|q}} get_struct_small_nonchar +; CHECK: movw %ax, -128( +; CHECK: call{{l|q}} end_struct_small_nonchar + %x = alloca i32, align 4 + %y = alloca i32, align 4 + %z = alloca i32, align 4 + %ptr = alloca i32, align 4 + %small2 = alloca [2 x i16], align 2 + %large2 = alloca [8 x i32], align 16 + %small = alloca [2 x i8], align 1 + %large = alloca [8 x i8], align 1 + %a = alloca %struct.struct_large_char, align 1 + %b = alloca %struct.struct_small_char, align 1 + %c = alloca %struct.struct_large_nonchar, align 8 + %d = alloca %struct.struct_small_nonchar, align 2 + %call = call i32 @get_scalar1() + store i32 %call, i32* %x, align 4 + call void @end_scalar1() + %call1 = call i32 @get_scalar2() + store i32 %call1, i32* %y, align 4 + call void @end_scalar2() + %call2 = call i32 @get_scalar3() + store i32 %call2, i32* %z, align 4 + call void @end_scalar3() + %call3 = call i32 @get_addrof() + store i32 %call3, i32* %ptr, align 4 + call void @end_addrof() + %call4 = call signext i16 @get_small_nonchar() + %arrayidx = getelementptr inbounds [2 x i16]* %small2, i32 0, i64 0 + store i16 %call4, i16* %arrayidx, align 2 + call void @end_small_nonchar() + %call5 = call i32 @get_large_nonchar() + %arrayidx6 = getelementptr inbounds [8 x i32]* %large2, i32 0, i64 0 + store i32 %call5, i32* %arrayidx6, align 4 + call void @end_large_nonchar() + %call7 = call signext i8 @get_small_char() + %arrayidx8 = getelementptr inbounds [2 x i8]* %small, i32 0, i64 0 + store i8 %call7, i8* %arrayidx8, align 1 + call void @end_small_char() + %call9 = call signext i8 @get_large_char() + %arrayidx10 = getelementptr inbounds [8 x i8]* %large, i32 0, i64 0 + store i8 %call9, i8* %arrayidx10, align 1 + call void @end_large_char() + %call11 = call signext i8 @get_struct_large_char() + %foo = getelementptr inbounds %struct.struct_large_char* %a, i32 0, i32 0 + %arrayidx12 = getelementptr inbounds [8 x i8]* %foo, i32 0, i64 0 + store i8 %call11, i8* %arrayidx12, align 1 + call void @end_struct_large_char() + %call13 = call signext i8 @get_struct_small_char() + %foo14 = getelementptr inbounds %struct.struct_small_char* %b, i32 0, i32 0 + %arrayidx15 = getelementptr inbounds [2 x i8]* %foo14, i32 0, i64 0 + store i8 %call13, i8* %arrayidx15, align 1 + call void @end_struct_small_char() + %call16 = call i32 @get_struct_large_nonchar() + %foo17 = getelementptr inbounds %struct.struct_large_nonchar* %c, i32 0, i32 0 + %arrayidx18 = getelementptr inbounds [8 x i32]* %foo17, i32 0, i64 0 + store i32 %call16, i32* %arrayidx18, align 4 + call void @end_struct_large_nonchar() + %call19 = call signext i16 @get_struct_small_nonchar() + %foo20 = getelementptr inbounds %struct.struct_small_nonchar* %d, i32 0, i32 0 + %arrayidx21 = getelementptr inbounds [2 x i16]* %foo20, i32 0, i64 0 + store i16 %call19, i16* %arrayidx21, align 2 + call void @end_struct_small_nonchar() + %arraydecay = getelementptr inbounds [8 x i8]* %large, i32 0, i32 0 + %arraydecay22 = getelementptr inbounds [2 x i8]* %small, i32 0, i32 0 + %arraydecay23 = getelementptr inbounds [8 x i32]* %large2, i32 0, i32 0 + %arraydecay24 = getelementptr inbounds [2 x i16]* %small2, i32 0, i32 0 + %0 = load i32* %x, align 4 + %1 = load i32* %y, align 4 + %2 = load i32* %z, align 4 + %coerce.dive = getelementptr %struct.struct_large_char* %a, i32 0, i32 0 + %3 = bitcast [8 x i8]* %coerce.dive to i64* + %4 = load i64* %3, align 1 + %coerce.dive25 = getelementptr %struct.struct_small_char* %b, i32 0, i32 0 + %5 = bitcast [2 x i8]* %coerce.dive25 to i16* + %6 = load i16* %5, align 1 + %coerce.dive26 = getelementptr %struct.struct_small_nonchar* %d, i32 0, i32 0 + %7 = bitcast [2 x i16]* %coerce.dive26 to i32* + %8 = load i32* %7, align 1 + call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2) + ret void +} + +define void @fast_non_linux() ssp { +entry: +; FAST-NON-LIN: fast_non_linux: +; FAST-NON-LIN: call{{l|q}} get_scalar1 +; FAST-NON-LIN: movl %eax, -20( +; FAST-NON-LIN: call{{l|q}} end_scalar1 + +; FAST-NON-LIN: call{{l|q}} get_large_char +; FAST-NON-LIN: movb %al, -16( +; FAST-NON-LIN: call{{l|q}} end_large_char + %x = alloca i32, align 4 + %large = alloca [8 x i8], align 1 + %call = call i32 @get_scalar1() + store i32 %call, i32* %x, align 4 + call void @end_scalar1() + %call1 = call signext i8 @get_large_char() + %arrayidx = getelementptr inbounds [8 x i8]* %large, i32 0, i64 0 + store i8 %call1, i8* %arrayidx, align 1 + call void @end_large_char() + %0 = load i32* %x, align 4 + %arraydecay = getelementptr inbounds [8 x i8]* %large, i32 0, i32 0 + call void @takes_two(i32 %0, i8* %arraydecay) + ret void +} + +declare i32 @get_scalar1() +declare void @end_scalar1() + +declare i32 @get_scalar2() +declare void @end_scalar2() + +declare i32 @get_scalar3() +declare void @end_scalar3() + +declare i32 @get_addrof() +declare void @end_addrof() + +declare signext i16 @get_small_nonchar() +declare void @end_small_nonchar() + +declare i32 @get_large_nonchar() +declare void @end_large_nonchar() + +declare signext i8 @get_small_char() +declare void @end_small_char() + +declare signext i8 @get_large_char() +declare void @end_large_char() + +declare signext i8 @get_struct_large_char() +declare void @end_struct_large_char() + +declare signext i8 @get_struct_large_char2() +declare void @end_struct_large_char2() + +declare signext i8 @get_struct_small_char() +declare void @end_struct_small_char() + +declare i32 @get_struct_large_nonchar() +declare void @end_struct_large_nonchar() + +declare signext i16 @get_struct_small_nonchar() +declare void @end_struct_small_nonchar() + +declare void @takes_all(i64, i16, %struct.struct_large_nonchar* byval align 8, i32, i8*, i8*, i32*, i16*, i32*, i32, i32, i32) +declare void @takes_two(i32, i8*) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/stack-protector-dbginfo.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/stack-protector-dbginfo.ll index 465f06796bf1..bd27ac347690 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/stack-protector-dbginfo.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/stack-protector-dbginfo.ll @@ -23,7 +23,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) attributes #0 = { sspreq } !llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!21} +!llvm.module.flags = !{!21, !72} !0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 true, metadata !"", i32 0, metadata !2, metadata !5, metadata !8, metadata !20, metadata !5, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/matt/ryan_bug/] [DW_LANG_C_plus_plus] !1 = metadata !{metadata !"", metadata !"/Users/matt/ryan_bug"} @@ -94,3 +94,4 @@ attributes #0 = { sspreq } !69 = metadata !{i32 786689, metadata !65, metadata !"p1", metadata !10, i32 33554433, metadata !50, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p1] [line 1] !70 = metadata !{i32 786689, metadata !65, metadata !"", metadata !10, i32 50331650, metadata !50, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 2] !71 = metadata !{i32 1, i32 0, metadata !65, metadata !40} +!72 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/stackmap-liveness.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/stackmap-liveness.ll new file mode 100644 index 000000000000..b1a931dd7c43 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/stackmap-liveness.ll @@ -0,0 +1,178 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -disable-fp-elim | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -disable-fp-elim -enable-stackmap-liveness| FileCheck -check-prefix=STACK %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -disable-fp-elim -enable-patchpoint-liveness| FileCheck -check-prefix=PATCH %s +; +; Note: Print verbose stackmaps using -debug-only=stackmaps. + +; CHECK-LABEL: .section __LLVM_STACKMAPS,__llvm_stackmaps +; CHECK-NEXT: __LLVM_StackMaps: +; CHECK-NEXT: .long 0 +; Num LargeConstants +; CHECK-NEXT: .long 0 +; Num Callsites +; CHECK-NEXT: .long 5 +define void @stackmap_liveness() { +entry: + %a1 = call <2 x double> asm sideeffect "", "={xmm2}"() nounwind +; StackMap 1 (no liveness information available) +; CHECK-LABEL: .long L{{.*}}-_stackmap_liveness +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; Num LiveOut Entries: 0 +; CHECK-NEXT: .short 0 + +; StackMap 1 (stackmap liveness information enabled) +; STACK-LABEL: .long L{{.*}}-_stackmap_liveness +; STACK-NEXT: .short 0 +; STACK-NEXT: .short 0 +; Num LiveOut Entries: 2 +; STACK-NEXT: .short 2 +; LiveOut Entry 1: %RSP (8 bytes) +; STACK-NEXT: .short 7 +; STACK-NEXT: .byte 0 +; STACK-NEXT: .byte 8 +; LiveOut Entry 2: %YMM2 (16 bytes) --> %XMM2 +; STACK-NEXT: .short 19 +; STACK-NEXT: .byte 0 +; STACK-NEXT: .byte 16 + +; StackMap 1 (patchpoint liveness information enabled) +; PATCH-LABEL: .long L{{.*}}-_stackmap_liveness +; PATCH-NEXT: .short 0 +; PATCH-NEXT: .short 0 +; Num LiveOut Entries: 0 +; PATCH-NEXT: .short 0 + call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 5) + %a2 = call i64 asm sideeffect "", "={r8}"() nounwind + %a3 = call i8 asm sideeffect "", "={ah}"() nounwind + %a4 = call <4 x double> asm sideeffect "", "={ymm0}"() nounwind + %a5 = call <4 x double> asm sideeffect "", "={ymm1}"() nounwind + +; StackMap 2 (no liveness information available) +; CHECK-LABEL: .long L{{.*}}-_stackmap_liveness +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; Num LiveOut Entries: 0 +; CHECK-NEXT: .short 0 + +; StackMap 2 (stackmap liveness information enabled) +; STACK-LABEL: .long L{{.*}}-_stackmap_liveness +; STACK-NEXT: .short 0 +; STACK-NEXT: .short 0 +; Num LiveOut Entries: 6 +; STACK-NEXT: .short 6 +; LiveOut Entry 2: %RAX (1 bytes) --> %AL or %AH +; STACK-NEXT: .short 0 +; STACK-NEXT: .byte 0 +; STACK-NEXT: .byte 1 +; LiveOut Entry 2: %RSP (8 bytes) +; STACK-NEXT: .short 7 +; STACK-NEXT: .byte 0 +; STACK-NEXT: .byte 8 +; LiveOut Entry 2: %R8 (8 bytes) +; STACK-NEXT: .short 8 +; STACK-NEXT: .byte 0 +; STACK-NEXT: .byte 8 +; LiveOut Entry 2: %YMM0 (32 bytes) +; STACK-NEXT: .short 17 +; STACK-NEXT: .byte 0 +; STACK-NEXT: .byte 32 +; LiveOut Entry 2: %YMM1 (32 bytes) +; STACK-NEXT: .short 18 +; STACK-NEXT: .byte 0 +; STACK-NEXT: .byte 32 +; LiveOut Entry 2: %YMM2 (16 bytes) --> %XMM2 +; STACK-NEXT: .short 19 +; STACK-NEXT: .byte 0 +; STACK-NEXT: .byte 16 + +; StackMap 2 (patchpoint liveness information enabled) +; PATCH-LABEL: .long L{{.*}}-_stackmap_liveness +; PATCH-NEXT: .short 0 +; PATCH-NEXT: .short 0 +; Num LiveOut Entries: 0 +; PATCH-NEXT: .short 0 + call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 2, i32 5) + call void asm sideeffect "", "{r8},{ah},{ymm0},{ymm1}"(i64 %a2, i8 %a3, <4 x double> %a4, <4 x double> %a5) nounwind + +; StackMap 3 (no liveness information available) +; CHECK-LABEL: .long L{{.*}}-_stackmap_liveness +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; Num LiveOut Entries: 0 +; CHECK-NEXT: .short 0 + +; StackMap 3 (stackmap liveness information enabled) +; STACK-LABEL: .long L{{.*}}-_stackmap_liveness +; STACK-NEXT: .short 0 +; STACK-NEXT: .short 0 +; Num LiveOut Entries: 2 +; STACK-NEXT: .short 2 +; LiveOut Entry 2: %RSP (8 bytes) +; STACK-NEXT: .short 7 +; STACK-NEXT: .byte 0 +; STACK-NEXT: .byte 8 +; LiveOut Entry 2: %YMM2 (16 bytes) --> %XMM2 +; STACK-NEXT: .short 19 +; STACK-NEXT: .byte 0 +; STACK-NEXT: .byte 16 + +; StackMap 3 (patchpoint liveness information enabled) +; PATCH-LABEL: .long L{{.*}}-_stackmap_liveness +; PATCH-NEXT: .short 0 +; PATCH-NEXT: .short 0 +; Num LiveOut Entries: 0 +; PATCH-NEXT: .short 0 + call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 3, i32 5) + call void asm sideeffect "", "{xmm2}"(<2 x double> %a1) nounwind + ret void +} + +define void @mixed_liveness() { +entry: + %a1 = call <2 x double> asm sideeffect "", "={xmm2}"() nounwind +; StackMap 4 (stackmap liveness information enabled) +; STACK-LABEL: .long L{{.*}}-_mixed_liveness +; STACK-NEXT: .short 0 +; STACK-NEXT: .short 0 +; Num LiveOut Entries: 1 +; STACK-NEXT: .short 1 +; LiveOut Entry 1: %YMM2 (16 bytes) --> %XMM2 +; STACK-NEXT: .short 19 +; STACK-NEXT: .byte 0 +; STACK-NEXT: .byte 16 +; StackMap 5 (stackmap liveness information enabled) +; STACK-LABEL: .long L{{.*}}-_mixed_liveness +; STACK-NEXT: .short 0 +; STACK-NEXT: .short 0 +; Num LiveOut Entries: 0 +; STACK-NEXT: .short 0 + +; StackMap 4 (patchpoint liveness information enabled) +; PATCH-LABEL: .long L{{.*}}-_mixed_liveness +; PATCH-NEXT: .short 0 +; PATCH-NEXT: .short 0 +; Num LiveOut Entries: 0 +; PATCH-NEXT: .short 0 +; StackMap 5 (patchpoint liveness information enabled) +; PATCH-LABEL: .long L{{.*}}-_mixed_liveness +; PATCH-NEXT: .short 0 +; PATCH-NEXT: .short 0 +; Num LiveOut Entries: 2 +; PATCH-NEXT: .short 2 +; LiveOut Entry 1: %RSP (8 bytes) +; PATCH-NEXT: .short 7 +; PATCH-NEXT: .byte 0 +; PATCH-NEXT: .byte 8 +; LiveOut Entry 1: %YMM2 (16 bytes) --> %XMM2 +; PATCH-NEXT: .short 19 +; PATCH-NEXT: .byte 0 +; PATCH-NEXT: .byte 16 + call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4, i32 5) + call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 5, i32 0, i8* null, i32 0) + call void asm sideeffect "", "{xmm2}"(<2 x double> %a1) nounwind + ret void +} + +declare void @llvm.experimental.stackmap(i64, i32, ...) +declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/stackmap-nops.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/stackmap-nops.ll new file mode 100644 index 000000000000..5a78f24d7b5e --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/stackmap-nops.ll @@ -0,0 +1,230 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -disable-fp-elim | FileCheck %s + +define void @nop_test() { +entry: +; CHECK-LABEL: nop_test: +; CHECK: nop +; CHECK: xchgw %ax, %ax +; CHECK: nopl (%rax) +; CHECK: nopl 8(%rax) +; CHECK: nopl 8(%rax,%rax) +; CHECK: nopw 8(%rax,%rax) +; CHECK: nopl 512(%rax) +; CHECK: nopl 512(%rax,%rax) +; CHECK: nopw 512(%rax,%rax) +; CHECK: nopw %cs:512(%rax,%rax) + +; 11 +; CHECK: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) + +; 12 +; CHECK: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) + +; 13 +; CHECK: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) + +; 14 +; CHECK: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) + +; 15 +; CHECK: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) + +; 16 +; CHECK: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) +; CHECK-NEXT: nop + +; 17 +; CHECK: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) +; CHECK-NEXT: xchgw %ax, %ax + +; 18 +; CHECK: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) +; CHECK-NEXT: nopl (%rax) + +; 19 +; CHECK: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) +; CHECK-NEXT: nopl 8(%rax) + +; 20 +; CHECK: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) +; CHECK-NEXT: nopl 8(%rax,%rax) + +; 21 +; CHECK: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) +; CHECK-NEXT: nopw 8(%rax,%rax) + +; 22 +; CHECK: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) +; CHECK-NEXT: nopl 512(%rax) + +; 23 +; CHECK: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) +; CHECK-NEXT: nopl 512(%rax,%rax) + +; 24 +; CHECK: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) +; CHECK-NEXT: nopw 512(%rax,%rax) + +; 25 +; CHECK: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) +; CHECK-NEXT: nopw %cs:512(%rax,%rax) + +; 26 +; CHECK: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) + +; 27 +; CHECK: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) + +; 28 +; CHECK: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) + +;29 +; CHECK: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) + +; 30 +; CHECK: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: .byte 102 +; CHECK-NEXT: nopw %cs:512(%rax,%rax) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 0, i32 0) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 1) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 2, i32 2) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 3, i32 3) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4, i32 4) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 5, i32 5) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 6, i32 6) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 7, i32 7) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 8, i32 8) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 9, i32 9) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 10, i32 10) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 11, i32 11) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 12, i32 12) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 13, i32 13) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 14) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 15, i32 15) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 16) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 17, i32 17) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 18, i32 18) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 19, i32 19) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 20, i32 20) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 21, i32 21) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 22, i32 22) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 23, i32 23) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 24, i32 24) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 25, i32 25) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 26, i32 26) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 27, i32 27) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 28, i32 28) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 29, i32 29) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 30, i32 30) + ret void +} + +declare void @llvm.experimental.stackmap(i64, i32, ...) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/stackmap.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/stackmap.ll index ed9558302848..e4194adfaa41 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/stackmap.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/stackmap.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -disable-fp-elim | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -disable-fp-elim | FileCheck %s ; ; Note: Print verbose stackmaps using -debug-only=stackmaps. @@ -9,11 +9,11 @@ ; CHECK-NEXT: .long 1 ; CHECK-NEXT: .quad 4294967296 ; Num Callsites -; CHECK-NEXT: .long 11 +; CHECK-NEXT: .long 18 ; Constant arguments ; -; CHECK-NEXT: .long 1 +; CHECK-NEXT: .quad 1 ; CHECK-NEXT: .long L{{.*}}-_constantargs ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 4 @@ -41,14 +41,13 @@ define void @constantargs() { entry: %0 = inttoptr i64 12345 to i8* - tail call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 1, i32 15, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296) + tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 1, i32 15, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296) ret void } ; Inline OSR Exit ; -; CHECK-NEXT: .long 3 -; CHECK-NEXT: .long L{{.*}}-_osrinline +; CHECK-LABEL: .long L{{.*}}-_osrinline ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 2 ; CHECK-NEXT: .byte 1 @@ -64,7 +63,7 @@ entry: ; Runtime void->void call. call void inttoptr (i64 -559038737 to void ()*)() ; Followed by inline OSR patchpoint with 12-byte shadow and 2 live vars. - call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 3, i32 12, i64 %a, i64 %b) + call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 3, i32 12, i64 %a, i64 %b) ret void } @@ -72,8 +71,7 @@ entry: ; ; 2 live variables in register. ; -; CHECK-NEXT: .long 4 -; CHECK-NEXT: .long L{{.*}}-_osrcold +; CHECK-LABEL: .long L{{.*}}-_osrcold ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 2 ; CHECK-NEXT: .byte 1 @@ -83,7 +81,7 @@ entry: ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .byte 8 ; CHECK-NEXT: .short {{[0-9]+}} -; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 0 define void @osrcold(i64 %a, i64 %b) { entry: %test = icmp slt i64 %a, %b @@ -91,40 +89,48 @@ entry: cold: ; OSR patchpoint with 12-byte nop-slide and 2 live vars. %thunk = inttoptr i64 -559038737 to i8* - call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 4, i32 15, i8* %thunk, i32 0, i64 %a, i64 %b) + call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 4, i32 15, i8* %thunk, i32 0, i64 %a, i64 %b) unreachable ret: ret void } ; Property Read -; CHECK-NEXT: .long 5 -; CHECK-NEXT: .long L{{.*}}-_propertyRead -; CHECK-NEXT: .short 0 -; CHECK-NEXT: .short 0 -; -; FIXME: There are currently no stackmap entries. After moving to -; AnyRegCC, we will have entries for the object and return value. +; CHECK-LABEL: .long L{{.*}}-_propertyRead +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 2 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 define i64 @propertyRead(i64* %obj) { entry: %resolveRead = inttoptr i64 -559038737 to i8* - %result = call i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 5, i32 15, i8* %resolveRead, i32 1, i64* %obj) + %result = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 15, i8* %resolveRead, i32 1, i64* %obj) %add = add i64 %result, 3 ret i64 %add } ; Property Write -; CHECK-NEXT: .long 6 -; CHECK-NEXT: .long L{{.*}}-_propertyWrite -; CHECK-NEXT: .short 0 -; CHECK-NEXT: .short 0 -; -; FIXME: There are currently no stackmap entries. After moving to -; AnyRegCC, we will have entries for the object and return value. +; CHECK-LABEL: .long L{{.*}}-_propertyWrite +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 2 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .long 0 define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a) { entry: %resolveWrite = inttoptr i64 -559038737 to i8* - call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 6, i32 15, i8* %resolveWrite, i32 2, i64* %obj, i64 %a) + call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 15, i8* %resolveWrite, i32 2, i64* %obj, i64 %a) ret void } @@ -132,8 +138,7 @@ entry: ; ; 2 live variables in registers. ; -; CHECK-NEXT: .long 7 -; CHECK-NEXT: .long L{{.*}}-_jsVoidCall +; CHECK-LABEL: .long L{{.*}}-_jsVoidCall ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 2 ; CHECK-NEXT: .byte 1 @@ -147,7 +152,7 @@ entry: define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) { entry: %resolveCall = inttoptr i64 -559038737 to i8* - call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 7, i32 15, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2) + call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 7, i32 15, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2) ret void } @@ -155,8 +160,7 @@ entry: ; ; 2 live variables in registers. ; -; CHECK: .long 8 -; CHECK-NEXT: .long L{{.*}}-_jsIntCall +; CHECK-LABEL: .long L{{.*}}-_jsIntCall ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 2 ; CHECK-NEXT: .byte 1 @@ -170,7 +174,7 @@ entry: define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) { entry: %resolveCall = inttoptr i64 -559038737 to i8* - %result = call i64 (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i32 8, i32 15, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2) + %result = call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 8, i32 15, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2) %add = add i64 %result, 3 ret i64 %add } @@ -179,19 +183,18 @@ entry: ; ; Verify 17 stack map entries. ; -; CHECK: .long 11 -; CHECK-NEXT: .long L{{.*}}-_spilledValue -; CHECK-NEXT: .short 0 -; CHECK-NEXT: .short 17 +; CHECK-LABEL: .long L{{.*}}-_spilledValue +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 17 ; ; Check that at least one is a spilled entry from RBP. ; Location: Indirect RBP + ... -; CHECK: .byte 3 -; CHECK-NEXT: .byte 8 -; CHECK-NEXT: .short 6 +; CHECK: .byte 3 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short 6 define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) { entry: - call void (i32, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i32 11, i32 15, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) + call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 11, i32 15, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) ret void } @@ -199,35 +202,33 @@ entry: ; ; Verify 17 stack map entries. ; -; CHECK: .long 12 -; CHECK-LABEL: .long L{{.*}}-_spilledStackMapValue -; CHECK-NEXT: .short 0 -; CHECK-NEXT: .short 17 +; CHECK-LABEL: .long L{{.*}}-_spilledStackMapValue +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 17 ; ; Check that at least one is a spilled entry from RBP. ; Location: Indirect RBP + ... -; CHECK: .byte 3 -; CHECK-NEXT: .byte 8 -; CHECK-NEXT: .short 6 +; CHECK: .byte 3 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short 6 define webkit_jscc void @spilledStackMapValue(i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) { entry: - call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 12, i32 15, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) + call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 12, i32 15, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) ret void } ; Spill a subregister stackmap operand. ; -; CHECK: .long 13 -; CHECK-LABEL: .long L{{.*}}-_spillSubReg -; CHECK-NEXT: .short 0 +; CHECK-LABEL: .long L{{.*}}-_spillSubReg +; CHECK-NEXT: .short 0 ; 4 locations -; CHECK-NEXT: .short 1 +; CHECK-NEXT: .short 1 ; ; Check that the subregister operand is a 4-byte spill. ; Location: Indirect, 4-byte, RBP + ... -; CHECK: .byte 3 -; CHECK-NEXT: .byte 4 -; CHECK-NEXT: .short 6 +; CHECK: .byte 3 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .short 6 define void @spillSubReg(i64 %arg) #0 { bb: br i1 undef, label %bb1, label %bb2 @@ -248,7 +249,7 @@ bb17: bb60: tail call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind - tail call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 13, i32 5, i32 %tmp32) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 13, i32 5, i32 %tmp32) unreachable bb61: @@ -258,24 +259,23 @@ bb61: ; Map a single byte subregister. There is no DWARF register number, so ; we expect the register to be encoded with the proper size and spill offset. We don't know which ; -; CHECK: .long 14 -; CHECK-LABEL: .long L{{.*}}-_subRegOffset -; CHECK-NEXT: .short 0 +; CHECK-LABEL: .long L{{.*}}-_subRegOffset +; CHECK-NEXT: .short 0 ; 2 locations -; CHECK-NEXT: .short 2 +; CHECK-NEXT: .short 2 ; ; Check that the subregister operands are 1-byte spills. ; Location 0: Register, 4-byte, AL -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .short 0 -; CHECK-NEXT: .long 0 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 0 ; ; Location 1: Register, 4-byte, BL -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .short 3 -; CHECK-NEXT: .long 0 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .short 3 +; CHECK-NEXT: .long 0 define void @subRegOffset(i16 %arg) { %v = mul i16 %arg, 5 %a0 = trunc i16 %v to i8 @@ -283,10 +283,87 @@ define void @subRegOffset(i16 %arg) { %arghi = lshr i16 %v, 8 %a1 = trunc i16 %arghi to i8 tail call void asm sideeffect "nop", "~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind - tail call void (i32, i32, ...)* @llvm.experimental.stackmap(i32 14, i32 5, i8 %a0, i8 %a1) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 5, i8 %a0, i8 %a1) ret void } -declare void @llvm.experimental.stackmap(i32, i32, ...) -declare void @llvm.experimental.patchpoint.void(i32, i32, i8*, i32, ...) -declare i64 @llvm.experimental.patchpoint.i64(i32, i32, i8*, i32, ...) +; Map a constant value. +; +; CHECK-LABEL: .long L{{.*}}-_liveConstant +; CHECK-NEXT: .short 0 +; 1 location +; CHECK-NEXT: .short 1 +; Loc 0: SmallConstant +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long 33 + +define void @liveConstant() { + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 15, i32 5, i32 33) + ret void +} + +; Directly map an alloca's address. +; +; Callsite 16 +; CHECK-LABEL: .long L{{.*}}-_directFrameIdx +; CHECK-NEXT: .short 0 +; 1 location +; CHECK-NEXT: .short 1 +; Loc 0: Direct RBP - ofs +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .long + +; Callsite 17 +; CHECK-LABEL: .long L{{.*}}-_directFrameIdx +; CHECK-NEXT: .short 0 +; 2 locations +; CHECK-NEXT: .short 2 +; Loc 0: Direct RBP - ofs +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .long +; Loc 1: Direct RBP - ofs +; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .short 6 +; CHECK-NEXT: .long +define void @directFrameIdx() { +entry: + %metadata1 = alloca i64, i32 3, align 8 + store i64 11, i64* %metadata1 + store i64 12, i64* %metadata1 + store i64 13, i64* %metadata1 + call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 0, i64* %metadata1) + %metadata2 = alloca i8, i32 4, align 8 + %metadata3 = alloca i16, i32 4, align 8 + call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 17, i32 5, i8* null, i32 0, i8* %metadata2, i16* %metadata3) + ret void +} + +; Test a 64-bit ID. +; +; CHECK: .quad 4294967295 +; CHECK-LABEL: .long L{{.*}}-_longid +; CHECK: .quad 4294967296 +; CHECK-LABEL: .long L{{.*}}-_longid +; CHECK: .quad 9223372036854775807 +; CHECK-LABEL: .long L{{.*}}-_longid +; CHECK: .quad -1 +; CHECK-LABEL: .long L{{.*}}-_longid +define void @longid() { +entry: + tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 4294967295, i32 0, i8* null, i32 0) + tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 4294967296, i32 0, i8* null, i32 0) + tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 9223372036854775807, i32 0, i8* null, i32 0) + tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 -1, i32 0, i8* null, i32 0) + ret void +} + +declare void @llvm.experimental.stackmap(i64, i32, ...) +declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...) +declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...) diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/stdcall-notailcall.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/stdcall-notailcall.ll index 8f522cda284a..c847ec7b6c08 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/stdcall-notailcall.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/stdcall-notailcall.ll @@ -10,4 +10,12 @@ entry: ret void } +define x86_thiscallcc void @test2(%struct.I* %this, i32 %a) { +; CHECK-LABEL: test2: +; CHECK: calll _foo +; CHECK: ret $4 + tail call void @foo() + ret void +} + declare void @foo() diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/unknown-location.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/unknown-location.ll index 64e24feed9f2..d7ae46939035 100644 --- a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/unknown-location.ll +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/unknown-location.ll @@ -19,6 +19,7 @@ entry: } !llvm.dbg.cu = !{!3} +!llvm.module.flags = !{!12} !0 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !2, i32 1, metadata !6} ; [ DW_TAG_arg_variable ] !1 = metadata !{i32 786478, metadata !10, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 1, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, i32, i32, i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ] @@ -32,3 +33,4 @@ entry: !9 = metadata !{metadata !1} !10 = metadata !{metadata !"test.c", metadata !"/dir"} !11 = metadata !{i32 0} +!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/v4i32load-crash.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/v4i32load-crash.ll new file mode 100644 index 000000000000..052c4c3c61b8 --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/v4i32load-crash.ll @@ -0,0 +1,27 @@ +; RUN: llc --mcpu=x86-64 --mattr=ssse3 < %s + +;PR18045: +;Issue of selection for 'v4i32 load'. +;This instruction is not legal for X86 CPUs with sse < 'sse4.1'. +;This node was generated by X86ISelLowering.cpp, EltsFromConsecutiveLoads +;static function after legilize stage. + +@e = external global [4 x i32], align 4 +@f = external global [4 x i32], align 4 + +; Function Attrs: nounwind +define void @fn3(i32 %el) { +entry: + %0 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 0) + %1 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 1) + %2 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 2) + %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 3) + %4 = insertelement <4 x i32> undef, i32 %0, i32 0 + %5 = insertelement <4 x i32> %4, i32 %1, i32 1 + %6 = insertelement <4 x i32> %5, i32 %2, i32 2 + %7 = insertelement <4 x i32> %6, i32 %3, i32 3 + %8 = add <4 x i32> %6, %7 + store <4 x i32> %8, <4 x i32>* bitcast ([4 x i32]* @f to <4 x i32>*) + ret void +} + diff --git a/external/bsd/llvm/dist/llvm/test/CodeGen/X86/vaargs.ll b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/vaargs.ll new file mode 100644 index 000000000000..ddeb7a336d4a --- /dev/null +++ b/external/bsd/llvm/dist/llvm/test/CodeGen/X86/vaargs.ll @@ -0,0 +1,67 @@ +; RUN: llc -mcpu=corei7-avx %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=NO-FLAGS +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +%struct.__va_list_tag = type { i32, i32, i8*, i8* } + +; Check that vastart gets the right thing. +define i32 @sum(i32 %count, ...) nounwind optsize ssp uwtable { +; CHECK: testb %al, %al +; CHECK-NEXT: je +; CHECK-NEXT: ## BB#{{[0-9]+}}: +; CHECK-NEXT: vmovaps %xmm0, 48(%rsp) +; CHECK-NEXT: vmovaps %xmm1, 64(%rsp) +; CHECK-NEXT: vmovaps %xmm2, 80(%rsp) +; CHECK-NEXT: vmovaps %xmm3, 96(%rsp) +; CHECK-NEXT: vmovaps %xmm4, 112(%rsp) +; CHECK-NEXT: vmovaps %xmm5, 128(%rsp) +; CHECK-NEXT: vmovaps %xmm6, 144(%rsp) +; CHECK-NEXT: vmovaps %xmm7, 160(%rsp) + +; Check that [EFLAGS] hasn't been pulled in. +; NO-FLAGS-NOT: %flags + + %ap = alloca [1 x %struct.__va_list_tag], align 16 + %1 = bitcast [1 x %struct.__va_list_tag]* %ap to i8* + call void @llvm.va_start(i8* %1) + %2 = icmp sgt i32 %count, 0 + br i1 %2, label %.lr.ph, label %._crit_edge + +.lr.ph: ; preds = %0 + %3 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 0 + %4 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 2 + %.pre = load i32* %3, align 16 + br label %5 + +;