Import LLVM 3.5svn r198450.
This commit is contained in:
parent
d149ea39d0
commit
fdaf75aa90
|
@ -31,14 +31,14 @@ dnl===
|
|||
dnl===-----------------------------------------------------------------------===
|
||||
dnl Initialize autoconf and define the package name, version number and
|
||||
dnl address for reporting bugs.
|
||||
AC_INIT([LLVM],[3.4],[http://llvm.org/bugs/])
|
||||
AC_INIT([LLVM],[3.5svn],[http://llvm.org/bugs/])
|
||||
AC_DEFINE([LLVM_VERSION_MAJOR], [3], [Major version of the LLVM API])
|
||||
AC_DEFINE([LLVM_VERSION_MINOR], [4], [Minor version of the LLVM API])
|
||||
AC_DEFINE([LLVM_VERSION_MINOR], [5], [Minor version of the LLVM API])
|
||||
|
||||
dnl Provide a copyright substitution and ensure the copyright notice is included
|
||||
dnl in the output of --version option of the generated configure script.
|
||||
AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign."])
|
||||
AC_COPYRIGHT([Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign.])
|
||||
AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2014 University of Illinois at Urbana-Champaign."])
|
||||
AC_COPYRIGHT([Copyright (c) 2003-2014 University of Illinois at Urbana-Champaign.])
|
||||
|
||||
dnl Indicate that we require autoconf 2.60 or later.
|
||||
AC_PREREQ(2.60)
|
||||
|
|
|
@ -37,7 +37,7 @@ OcamlDir := $(LibDir)/ocaml
|
|||
# Info from llvm-config and similar
|
||||
ifndef IS_CLEANING_TARGET
|
||||
ifdef UsedComponents
|
||||
UsedLibs = $(shell $(LLVM_CONFIG) --libs $(UsedComponents))
|
||||
UsedLibs = $(shell $(LLVM_CONFIG) --libs --system-libs $(UsedComponents))
|
||||
UsedLibNames = $(shell $(LLVM_CONFIG) --libnames $(UsedComponents))
|
||||
endif
|
||||
endif
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
This interface provides an OCaml API for LLVM scalar transforms, the
|
||||
classes in the [LLVMScalarOpts] library. *)
|
||||
|
||||
(** See the [llvm::createConstantPropogationPass] function. *)
|
||||
(** See the [llvm::createConstantPropagationPass] function. *)
|
||||
external add_constant_propagation : [<Llvm.PassManager.any] Llvm.PassManager.t
|
||||
-> unit
|
||||
= "llvm_add_constant_propagation"
|
||||
|
|
|
@ -16,9 +16,9 @@ class TestDisassembler(TestBase):
|
|||
self.assertEqual(count, 3)
|
||||
self.assertEqual(s, '\tjcxz\t-127')
|
||||
|
||||
def test_nonexistant_triple(self):
|
||||
def test_nonexistent_triple(self):
|
||||
with self.assertRaisesRegexp(Exception, "Could not obtain disassembler for triple"):
|
||||
Disassembler("nonexistant-triple-raises")
|
||||
Disassembler("nonexistent-triple-raises")
|
||||
|
||||
def test_get_instructions(self):
|
||||
sequence = '\x67\xe3\x81\x01\xc7' # jcxz -127; addl %eax, %edi
|
||||
|
|
|
@ -211,6 +211,9 @@ LLVM-specific variables
|
|||
**LLVM_ENABLE_THREADS**:BOOL
|
||||
Build with threads support, if available. Defaults to ON.
|
||||
|
||||
**LLVM_ENABLE_CXX11**:BOOL
|
||||
Build in C++11 mode, if available. Defaults to OFF.
|
||||
|
||||
**LLVM_ENABLE_ASSERTIONS**:BOOL
|
||||
Enables code assertions. Defaults to OFF if and only if ``CMAKE_BUILD_TYPE``
|
||||
is *Release*.
|
||||
|
|
|
@ -844,7 +844,7 @@ Here are more examples:
|
|||
|
||||
.. code-block:: c++
|
||||
|
||||
assert(Ty->isPointerType() && "Can't allocate a non pointer type!");
|
||||
assert(Ty->isPointerType() && "Can't allocate a non-pointer type!");
|
||||
|
||||
assert((Opcode == Shl || Opcode == Shr) && "ShiftInst Opcode invalid!");
|
||||
|
||||
|
|
|
@ -22,7 +22,6 @@ Basic Commands
|
|||
llvm-link
|
||||
llvm-ar
|
||||
llvm-nm
|
||||
llvm-prof
|
||||
llvm-config
|
||||
llvm-diff
|
||||
llvm-cov
|
||||
|
|
|
@ -10,7 +10,9 @@ DESCRIPTION
|
|||
-----------
|
||||
|
||||
:program:`llvm-symbolizer` reads object file names and addresses from standard
|
||||
input and prints corresponding source code locations to standard output. This
|
||||
input and prints corresponding source code locations to standard output.
|
||||
If object file is specified in command line, :program:`llvm-symbolizer` reads
|
||||
only addresses from standard input. This
|
||||
program uses debug info sections and symbol table in the object files.
|
||||
|
||||
EXAMPLE
|
||||
|
@ -45,10 +47,22 @@ EXAMPLE
|
|||
|
||||
_main
|
||||
/tmp/source_x86_64.cc:8
|
||||
$ cat addr2.txt
|
||||
0x4004f4
|
||||
0x401000
|
||||
$ llvm-symbolizer -obj=a.out < addr2.txt
|
||||
main
|
||||
/tmp/a.cc:4
|
||||
|
||||
foo(int)
|
||||
/tmp/a.cc:12
|
||||
|
||||
OPTIONS
|
||||
-------
|
||||
|
||||
.. option:: -obj
|
||||
Path to object file to be symbolized.
|
||||
|
||||
.. option:: -functions
|
||||
|
||||
Print function names as well as source file/line locations. Defaults to true.
|
||||
|
|
|
@ -1276,7 +1276,7 @@ The ``cl::getRegisteredOptions`` function
|
|||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The ``cl::getRegisteredOptions`` function is designed to give a programmer
|
||||
access to declared non positional command line options so that how they appear
|
||||
access to declared non-positional command line options so that how they appear
|
||||
in ``-help`` can be modified prior to calling `cl::ParseCommandLineOptions`_.
|
||||
Note this method should not be called during any static initialisation because
|
||||
it cannot be guaranteed that all options will have been initialised. Hence it
|
||||
|
|
|
@ -80,8 +80,9 @@ R600
|
|||
SPARC
|
||||
-----
|
||||
|
||||
* `SPARC resources <http://www.sparc.org/resource.htm>`_
|
||||
* `SPARC standards <http://www.sparc.org/standards.html>`_
|
||||
* `SPARC standards <http://sparc.org/standards>`_
|
||||
* `SPARC V9 ABI <http://sparc.org/standards/64.psabi.1.35.ps.Z>`_
|
||||
* `SPARC V8 ABI <http://sparc.org/standards/psABI3rd.pdf>`_
|
||||
|
||||
SystemZ
|
||||
-------
|
||||
|
|
|
@ -37,7 +37,7 @@ X86/COFF-Dependent
|
|||
Relocations
|
||||
^^^^^^^^^^^
|
||||
|
||||
The following additional relocation type is supported:
|
||||
The following additional relocation types are supported:
|
||||
|
||||
**@IMGREL** (AT&T syntax only) generates an image-relative relocation that
|
||||
corresponds to the COFF relocation types ``IMAGE_REL_I386_DIR32NB`` (32-bit) or
|
||||
|
@ -54,6 +54,22 @@ corresponds to the COFF relocation types ``IMAGE_REL_I386_DIR32NB`` (32-bit) or
|
|||
.long (fun@imgrel + 0x3F)
|
||||
.long $unwind$fun@imgrel
|
||||
|
||||
**.secrel32** generates a relocation that corresponds to the COFF relocation
|
||||
types ``IMAGE_REL_I386_SECREL`` (32-bit) or ``IMAGE_REL_AMD64_SECREL`` (64-bit).
|
||||
|
||||
**.secidx** relocation generates an index of the section that contains
|
||||
the target. It corresponds to the COFF relocation types
|
||||
``IMAGE_REL_I386_SECTION`` (32-bit) or ``IMAGE_REL_AMD64_SECTION`` (64-bit).
|
||||
|
||||
.. code-block:: gas
|
||||
|
||||
.section .debug$S,"rn"
|
||||
.long 4
|
||||
.long 242
|
||||
.long 40
|
||||
.secrel32 _function_name
|
||||
.secidx _function_name
|
||||
...
|
||||
|
||||
``.linkonce`` Directive
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
@ -127,7 +143,7 @@ MC supports passing the information in ``.linkonce`` at the end of
|
|||
Symbol1:
|
||||
.long 1
|
||||
|
||||
Note that in the combined form the COMDAT symbol is explict. This
|
||||
Note that in the combined form the COMDAT symbol is explicit. This
|
||||
extension exits to support multiple sections with the same name in
|
||||
different comdats:
|
||||
|
||||
|
|
|
@ -238,6 +238,8 @@ when qualifying the build of ``llvm``, ``clang``, and ``dragonegg``.
|
|||
+--------------+---------------+----------------------+
|
||||
| x86-64 | FreeBSD | gcc 4.2.X |
|
||||
+--------------+---------------+----------------------+
|
||||
| ARMv7 | Linux | gcc 4.6.X, gcc 4.7.X |
|
||||
+--------------+---------------+----------------------+
|
||||
|
||||
Release Qualification Criteria
|
||||
------------------------------
|
||||
|
@ -298,6 +300,10 @@ Specific Target Qualification Details
|
|||
| | | | clang regression tests, |
|
||||
| | | | test-suite |
|
||||
+--------------+-------------+----------------+-----------------------------+
|
||||
| ARMv7A | Linux | last release | llvm regression tests, |
|
||||
| | | | clang regression tests, |
|
||||
| | | | test-suite |
|
||||
+--------------+-------------+----------------+-----------------------------+
|
||||
|
||||
Community Testing
|
||||
-----------------
|
||||
|
|
|
@ -0,0 +1,140 @@
|
|||
==========================================
|
||||
Design and Usage of the InAlloca Attribute
|
||||
==========================================
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
.. Warning:: This feature is unstable and not fully implemented.
|
||||
|
||||
The :ref:`attr_inalloca` attribute is designed to allow taking the
|
||||
address of an aggregate argument that is being passed by value through
|
||||
memory. Primarily, this feature is required for compatibility with the
|
||||
Microsoft C++ ABI. Under that ABI, class instances that are passed by
|
||||
value are constructed directly into argument stack memory. Prior to the
|
||||
addition of inalloca, calls in LLVM were indivisible instructions.
|
||||
There was no way to perform intermediate work, such as object
|
||||
construction, between the first stack adjustment and the final control
|
||||
transfer. With inalloca, each argument is modelled as an alloca, which
|
||||
can be stored to independently of the call. Unfortunately, this
|
||||
complicated feature comes with a large set of restrictions designed to
|
||||
bound the lifetime of the argument memory around the call, which are
|
||||
explained in this document.
|
||||
|
||||
For now, it is recommended that frontends and optimizers avoid producing
|
||||
this construct, primarily because it forces the use of a base pointer.
|
||||
This feature may grow in the future to allow general mid-level
|
||||
optimization, but for now, it should be regarded as less efficient than
|
||||
passing by value with a copy.
|
||||
|
||||
Intended Usage
|
||||
==============
|
||||
|
||||
In the example below, ``f`` is attempting to pass a default-constructed
|
||||
``Foo`` object to ``g`` by value.
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
%Foo = type { i32, i32 }
|
||||
declare void @Foo_ctor(%Foo* %this)
|
||||
declare void @g(%Foo* inalloca %arg)
|
||||
|
||||
define void @f() {
|
||||
...
|
||||
|
||||
bb1:
|
||||
%base = call i8* @llvm.stacksave()
|
||||
%arg = alloca %Foo
|
||||
invoke void @Foo_ctor(%Foo* %arg)
|
||||
to label %invoke.cont unwind %invoke.unwind
|
||||
|
||||
invoke.cont:
|
||||
call void @g(%Foo* inalloca %arg)
|
||||
call void @llvm.stackrestore(i8* %base)
|
||||
...
|
||||
|
||||
invoke.unwind:
|
||||
call void @llvm.stackrestore(i8* %base)
|
||||
...
|
||||
}
|
||||
|
||||
The alloca in this example is dynamic, meaning it is not in the entry
|
||||
block, and it can be executed more than once. Due to the restrictions
|
||||
against allocas between an alloca used with inalloca and its associated
|
||||
call site, all allocas used with inalloca are considered dynamic.
|
||||
|
||||
To avoid any stack leakage, the frontend saves the current stack pointer
|
||||
with a call to :ref:`llvm.stacksave <int_stacksave>`. Then, it
|
||||
allocates the argument stack space with alloca and calls the default
|
||||
constructor. One important consideration is that the default
|
||||
constructor could throw an exception, so the frontend has to create a
|
||||
landing pad. At this point, if there were any other inalloca arguments,
|
||||
the frontend would have to destruct them before restoring the stack
|
||||
pointer. If the constructor does not unwind, ``g`` is called, and then
|
||||
the stack is restored.
|
||||
|
||||
Design Considerations
|
||||
=====================
|
||||
|
||||
Lifetime
|
||||
--------
|
||||
|
||||
The biggest design consideration for this feature is object lifetime.
|
||||
We cannot model the arguments as static allocas in the entry block,
|
||||
because all calls need to use the memory that is at the end of the call
|
||||
frame to pass arguments. We cannot vend pointers to that memory at
|
||||
function entry because after code generation they will alias. In the
|
||||
current design, the rule against allocas between the inalloca alloca
|
||||
values and the call site avoids this problem, but it creates a cleanup
|
||||
problem. Cleanup and lifetime is handled explicitly with stack save and
|
||||
restore calls. In the future, we may be able to avoid this by using
|
||||
:ref:`llvm.lifetime.start <int_lifestart>` and :ref:`llvm.lifetime.end
|
||||
<int_lifeend>` instead.
|
||||
|
||||
Nested Calls and Copy Elision
|
||||
-----------------------------
|
||||
|
||||
The next consideration is the ability for the frontend to perform copy
|
||||
elision in the face of nested calls. Consider the evaluation of
|
||||
``foo(foo(Bar()))``, where ``foo`` takes and returns a ``Bar`` object by
|
||||
value and ``Bar`` has non-trivial constructors. In this case, we want
|
||||
to be able to elide copies into ``foo``'s argument slots. That means we
|
||||
need to have more than one set of argument frames active at the same
|
||||
time. First, we need to allocate the frame for the outer call so we can
|
||||
pass it in as the hidden struct return pointer to the middle call. Then
|
||||
we do the same for the middle call, allocating a frame and passing its
|
||||
address to ``Bar``'s default constructor. By wrapping the evaluation of
|
||||
the inner ``foo`` with stack save and restore, we can have multiple
|
||||
overlapping active call frames.
|
||||
|
||||
Callee-cleanup Calling Conventions
|
||||
----------------------------------
|
||||
|
||||
Another wrinkle is the existence of callee-cleanup conventions. On
|
||||
Windows, all methods and many other functions adjust the stack to clear
|
||||
the memory used to pass their arguments. In some sense, this means that
|
||||
the allocas are automatically cleared by the call. However, LLVM
|
||||
instead models this as a write of undef to all of the inalloca values
|
||||
passed to the call instead of a stack adjustment. Frontends should
|
||||
still restore the stack pointer to avoid a stack leak.
|
||||
|
||||
Exceptions
|
||||
----------
|
||||
|
||||
There is also the possibility of an exception. If argument evaluation
|
||||
or copy construction throws an exception, the landing pad must do
|
||||
cleanup, which includes adjusting the stack pointer to avoid a stack
|
||||
leak. This means the cleanup of the stack memory cannot be tied to the
|
||||
call itself. There needs to be a separate IR-level instruction that can
|
||||
perform independent cleanup of arguments.
|
||||
|
||||
Efficiency
|
||||
----------
|
||||
|
||||
Eventually, it should be possible to generate efficient code for this
|
||||
construct. In particular, using inalloca should not require a base
|
||||
pointer. If the backend can prove that all points in the CFG only have
|
||||
one possible stack level, then it can address the stack directly from
|
||||
the stack pointer. While this is not yet implemented, the plan is that
|
||||
the inalloca attribute should not change much, but the frontend IR
|
||||
generation recommendations may change.
|
|
@ -315,7 +315,7 @@ the properties which are associated with that component.
|
|||
|
||||
``BuildTool`` components are like ``Tool`` components, except that the
|
||||
tool is supposed to be built for the platform where the build is running
|
||||
(instead of that platform being targetted). Build systems are expected
|
||||
(instead of that platform being targeted). Build systems are expected
|
||||
to handle the fact that required libraries may need to be built for
|
||||
multiple platforms in order to be able to link this tool.
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ LLVM Language Reference Manual
|
|||
|
||||
.. contents::
|
||||
:local:
|
||||
:depth: 3
|
||||
:depth: 4
|
||||
|
||||
Abstract
|
||||
========
|
||||
|
@ -289,13 +289,9 @@ symbols from (to) DLLs (Dynamic Link Libraries).
|
|||
pointer to a pointer in a DLL, so that it can be referenced with the
|
||||
``dllimport`` attribute. On Microsoft Windows targets, the pointer
|
||||
name is formed by combining ``__imp_`` and the function or variable
|
||||
name.
|
||||
|
||||
For example, since the "``.LC0``" variable is defined to be internal, if
|
||||
another module defined a "``.LC0``" variable and was linked with this
|
||||
one, one of the two would be renamed, preventing a collision. Since
|
||||
"``main``" and "``puts``" are external (i.e., lacking any linkage
|
||||
declarations), they are accessible outside of the current module.
|
||||
name. Since this linkage exists for defining a dll interface, the
|
||||
compiler, assembler and linker know it is externally referenced and
|
||||
must refrain from deleting the symbol.
|
||||
|
||||
It is illegal for a function *declaration* to have any linkage type
|
||||
other than ``external``, ``dllimport`` or ``extern_weak``.
|
||||
|
@ -370,6 +366,18 @@ added in the future:
|
|||
accessed runtime components pinned to specific hardware registers.
|
||||
At the moment only X86 supports this convention (both 32 and 64
|
||||
bit).
|
||||
"``webkit_jscc``" - WebKit's JavaScript calling convention
|
||||
This calling convention has been implemented for `WebKit FTL JIT
|
||||
<https://trac.webkit.org/wiki/FTLJIT>`_. It passes arguments on the
|
||||
stack right to left (as cdecl does), and returns a value in the
|
||||
platform's customary return register.
|
||||
"``anyregcc``" - Dynamic calling convention for code patching
|
||||
This is a special convention that supports patching an arbitrary code
|
||||
sequence in place of a call site. This convention forces the call
|
||||
arguments into registers but allows them to be dynamcially
|
||||
allocated. This can currently only be used with calls to
|
||||
llvm.experimental.patchpoint because only this intrinsic records
|
||||
the location of its arguments in a side table. See :doc:`StackMaps`.
|
||||
"``cc <n>``" - Numbered convention
|
||||
Any calling convention may be specified by number, allowing
|
||||
target-specific calling conventions to be used. Target specific
|
||||
|
@ -507,8 +515,8 @@ variables defined within the module are not modified from their
|
|||
initial values before the start of the global initializer. This is
|
||||
true even for variables potentially accessible from outside the
|
||||
module, including those with external linkage or appearing in
|
||||
``@llvm.used``. This assumption may be suppressed by marking the
|
||||
variable with ``externally_initialized``.
|
||||
``@llvm.used`` or dllexported variables. This assumption may be suppressed
|
||||
by marking the variable with ``externally_initialized``.
|
||||
|
||||
An explicit alignment may be specified for a global, which must be a
|
||||
power of 2. If not present, or if the alignment is set to zero, the
|
||||
|
@ -618,7 +626,7 @@ Syntax::
|
|||
The linkage must be one of ``private``, ``linker_private``,
|
||||
``linker_private_weak``, ``internal``, ``linkonce``, ``weak``,
|
||||
``linkonce_odr``, ``weak_odr``, ``external``. Note that some system linkers
|
||||
might not correctly handle dropping a weak symbol that is aliased by a non weak
|
||||
might not correctly handle dropping a weak symbol that is aliased by a non-weak
|
||||
alias.
|
||||
|
||||
.. _namedmetadatastructure:
|
||||
|
@ -701,6 +709,39 @@ Currently, only the following parameter attributes are defined:
|
|||
site. If the alignment is not specified, then the code generator
|
||||
makes a target-specific assumption.
|
||||
|
||||
.. _attr_inalloca:
|
||||
|
||||
``inalloca``
|
||||
|
||||
.. Warning:: This feature is unstable and not fully implemented.
|
||||
|
||||
The ``inalloca`` argument attribute allows the caller to get the
|
||||
address of an outgoing argument to a ``call`` or ``invoke`` before
|
||||
it executes. It is similar to ``byval`` in that it is used to pass
|
||||
arguments by value, but it guarantees that the argument will not be
|
||||
copied.
|
||||
|
||||
To be :ref:`well formed <wellformed>`, the caller must pass in an
|
||||
alloca value into an ``inalloca`` parameter, and an alloca may be
|
||||
used as an ``inalloca`` argument at most once. The attribute can
|
||||
only be applied to parameters that would be passed in memory and not
|
||||
registers. The ``inalloca`` attribute cannot be used in conjunction
|
||||
with other attributes that affect argument storage, like ``inreg``,
|
||||
``nest``, ``sret``, or ``byval``. The ``inalloca`` stack space is
|
||||
considered to be clobbered by any call that uses it, so any
|
||||
``inalloca`` parameters cannot be marked ``readonly``.
|
||||
|
||||
Allocas passed with ``inalloca`` to a call must be in the opposite
|
||||
order of the parameter list, meaning that the rightmost argument
|
||||
must be allocated first. If a call has inalloca arguments, no other
|
||||
allocas can occur between the first alloca used by the call and the
|
||||
call site, unless they are are cleared by calls to
|
||||
:ref:`llvm.stackrestore <int_stackrestore>`. Violating these rules
|
||||
results in undefined behavior at runtime.
|
||||
|
||||
See :doc:`InAlloca` for more information on how to use this
|
||||
attribute.
|
||||
|
||||
``sret``
|
||||
This indicates that the pointer parameter specifies the address of a
|
||||
structure that is the return value of the function in the source
|
||||
|
@ -1119,9 +1160,15 @@ as follows:
|
|||
``a<size>:<abi>:<pref>``
|
||||
This specifies the alignment for an aggregate type of a given bit
|
||||
``<size>``.
|
||||
``s<size>:<abi>:<pref>``
|
||||
This specifies the alignment for a stack object of a given bit
|
||||
``<size>``.
|
||||
``m:<mangling>``
|
||||
If prerest, specifies that llvm names are mangled in the output. The
|
||||
options are
|
||||
* ``e``: ELF mangling: Private symbols get a ``.L`` prefix.
|
||||
* ``m``: Mips mangling: Private symbols get a ``$`` prefix.
|
||||
* ``o``: Mach-O mangling: Private symbols get ``L`` prefix. Other
|
||||
symbols get a ``_`` prefix.
|
||||
* ``c``: COFF prefix: Similar to Mach-O, but stdcall and fastcall
|
||||
functions also get a suffix based on the frame size.
|
||||
``n<size1>:<size2>:<size3>...``
|
||||
This specifies a set of native integer widths for the target CPU in
|
||||
bits. For example, it might contain ``n32`` for 32-bit PowerPC,
|
||||
|
@ -1151,7 +1198,7 @@ specifications are given in this list:
|
|||
- ``f128:128:128`` - quad is 128-bit aligned
|
||||
- ``v64:64:64`` - 64-bit vector is 64-bit aligned
|
||||
- ``v128:128:128`` - 128-bit vector is 128-bit aligned
|
||||
- ``a0:0:64`` - aggregates are 64-bit aligned
|
||||
- ``a:0:64`` - aggregates are 64-bit aligned
|
||||
|
||||
When LLVM is determining the alignment for a given type, it uses the
|
||||
following rules:
|
||||
|
@ -1480,80 +1527,90 @@ transformation. A strong type system makes it easier to read the
|
|||
generated code and enables novel analyses and transformations that are
|
||||
not feasible to perform on normal three address code representations.
|
||||
|
||||
.. _typeclassifications:
|
||||
.. _t_void:
|
||||
|
||||
Type Classifications
|
||||
--------------------
|
||||
Void Type
|
||||
---------
|
||||
|
||||
The types fall into a few useful classifications:
|
||||
:Overview:
|
||||
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
The void type does not represent any value and has no size.
|
||||
|
||||
* - Classification
|
||||
- Types
|
||||
|
||||
* - :ref:`integer <t_integer>`
|
||||
- ``i1``, ``i2``, ``i3``, ... ``i8``, ... ``i16``, ... ``i32``, ...
|
||||
``i64``, ...
|
||||
|
||||
* - :ref:`floating point <t_floating>`
|
||||
- ``half``, ``float``, ``double``, ``x86_fp80``, ``fp128``,
|
||||
``ppc_fp128``
|
||||
:Syntax:
|
||||
|
||||
|
||||
* - first class
|
||||
::
|
||||
|
||||
.. _t_firstclass:
|
||||
void
|
||||
|
||||
- :ref:`integer <t_integer>`, :ref:`floating point <t_floating>`,
|
||||
:ref:`pointer <t_pointer>`, :ref:`vector <t_vector>`,
|
||||
:ref:`structure <t_struct>`, :ref:`array <t_array>`,
|
||||
:ref:`label <t_label>`, :ref:`metadata <t_metadata>`.
|
||||
|
||||
* - :ref:`primitive <t_primitive>`
|
||||
- :ref:`label <t_label>`,
|
||||
:ref:`void <t_void>`,
|
||||
:ref:`integer <t_integer>`,
|
||||
:ref:`floating point <t_floating>`,
|
||||
:ref:`x86mmx <t_x86mmx>`,
|
||||
:ref:`metadata <t_metadata>`.
|
||||
.. _t_function:
|
||||
|
||||
* - :ref:`derived <t_derived>`
|
||||
- :ref:`array <t_array>`,
|
||||
:ref:`function <t_function>`,
|
||||
:ref:`pointer <t_pointer>`,
|
||||
:ref:`structure <t_struct>`,
|
||||
:ref:`vector <t_vector>`,
|
||||
:ref:`opaque <t_opaque>`.
|
||||
Function Type
|
||||
-------------
|
||||
|
||||
:Overview:
|
||||
|
||||
|
||||
The function type can be thought of as a function signature. It consists of a
|
||||
return type and a list of formal parameter types. The return type of a function
|
||||
type is a void type or first class type --- except for :ref:`label <t_label>`
|
||||
and :ref:`metadata <t_metadata>` types.
|
||||
|
||||
:Syntax:
|
||||
|
||||
::
|
||||
|
||||
<returntype> (<parameter list>)
|
||||
|
||||
...where '``<parameter list>``' is a comma-separated list of type
|
||||
specifiers. Optionally, the parameter list may include a type ``...``, which
|
||||
indicates that the function takes a variable number of arguments. Variable
|
||||
argument functions can access their arguments with the :ref:`variable argument
|
||||
handling intrinsic <int_varargs>` functions. '``<returntype>``' is any type
|
||||
except :ref:`label <t_label>` and :ref:`metadata <t_metadata>`.
|
||||
|
||||
:Examples:
|
||||
|
||||
+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| ``i32 (i32)`` | function taking an ``i32``, returning an ``i32`` |
|
||||
+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| ``float (i16, i32 *) *`` | :ref:`Pointer <t_pointer>` to a function that takes an ``i16`` and a :ref:`pointer <t_pointer>` to ``i32``, returning ``float``. |
|
||||
+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| ``i32 (i8*, ...)`` | A vararg function that takes at least one :ref:`pointer <t_pointer>` to ``i8`` (char in C), which returns an integer. This is the signature for ``printf`` in LLVM. |
|
||||
+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| ``{i32, i32} (i32)`` | A function taking an ``i32``, returning a :ref:`structure <t_struct>` containing two ``i32`` values |
|
||||
+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
|
||||
.. _t_firstclass:
|
||||
|
||||
First Class Types
|
||||
-----------------
|
||||
|
||||
The :ref:`first class <t_firstclass>` types are perhaps the most important.
|
||||
Values of these types are the only ones which can be produced by
|
||||
instructions.
|
||||
|
||||
.. _t_primitive:
|
||||
.. _t_single_value:
|
||||
|
||||
Primitive Types
|
||||
---------------
|
||||
Single Value Types
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The primitive types are the fundamental building blocks of the LLVM
|
||||
system.
|
||||
These are the types that are valid in registers from CodeGen's perspective.
|
||||
|
||||
.. _t_integer:
|
||||
|
||||
Integer Type
|
||||
^^^^^^^^^^^^
|
||||
""""""""""""
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
:Overview:
|
||||
|
||||
The integer type is a very simple type that simply specifies an
|
||||
arbitrary bit width for the integer type desired. Any bit width from 1
|
||||
bit to 2\ :sup:`23`\ -1 (about 8 million) can be specified.
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
:Syntax:
|
||||
|
||||
::
|
||||
|
||||
|
@ -1563,7 +1620,7 @@ The number of bits the integer will occupy is specified by the ``N``
|
|||
value.
|
||||
|
||||
Examples:
|
||||
"""""""""
|
||||
*********
|
||||
|
||||
+----------------+------------------------------------------------+
|
||||
| ``i1`` | a single-bit integer. |
|
||||
|
@ -1576,7 +1633,7 @@ Examples:
|
|||
.. _t_floating:
|
||||
|
||||
Floating Point Types
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
""""""""""""""""""""
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
@ -1605,10 +1662,9 @@ Floating Point Types
|
|||
.. _t_x86mmx:
|
||||
|
||||
X86mmx Type
|
||||
^^^^^^^^^^^
|
||||
"""""""""""
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
:Overview:
|
||||
|
||||
The x86mmx type represents a value held in an MMX register on an x86
|
||||
machine. The operations allowed on it are quite limited: parameters and
|
||||
|
@ -1617,42 +1673,92 @@ instructions are represented as intrinsic or asm calls with arguments
|
|||
and/or results of this type. There are no arrays, vectors or constants
|
||||
of this type.
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
:Syntax:
|
||||
|
||||
::
|
||||
|
||||
x86mmx
|
||||
|
||||
.. _t_void:
|
||||
|
||||
Void Type
|
||||
^^^^^^^^^
|
||||
.. _t_pointer:
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
Pointer Type
|
||||
""""""""""""
|
||||
|
||||
The void type does not represent any value and has no size.
|
||||
:Overview:
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
The pointer type is used to specify memory locations. Pointers are
|
||||
commonly used to reference objects in memory.
|
||||
|
||||
Pointer types may have an optional address space attribute defining the
|
||||
numbered address space where the pointed-to object resides. The default
|
||||
address space is number zero. The semantics of non-zero address spaces
|
||||
are target-specific.
|
||||
|
||||
Note that LLVM does not permit pointers to void (``void*``) nor does it
|
||||
permit pointers to labels (``label*``). Use ``i8*`` instead.
|
||||
|
||||
:Syntax:
|
||||
|
||||
::
|
||||
|
||||
void
|
||||
<type> *
|
||||
|
||||
:Examples:
|
||||
|
||||
+-------------------------+--------------------------------------------------------------------------------------------------------------+
|
||||
| ``[4 x i32]*`` | A :ref:`pointer <t_pointer>` to :ref:`array <t_array>` of four ``i32`` values. |
|
||||
+-------------------------+--------------------------------------------------------------------------------------------------------------+
|
||||
| ``i32 (i32*) *`` | A :ref:`pointer <t_pointer>` to a :ref:`function <t_function>` that takes an ``i32*``, returning an ``i32``. |
|
||||
+-------------------------+--------------------------------------------------------------------------------------------------------------+
|
||||
| ``i32 addrspace(5)*`` | A :ref:`pointer <t_pointer>` to an ``i32`` value that resides in address space #5. |
|
||||
+-------------------------+--------------------------------------------------------------------------------------------------------------+
|
||||
|
||||
.. _t_vector:
|
||||
|
||||
Vector Type
|
||||
"""""""""""
|
||||
|
||||
:Overview:
|
||||
|
||||
A vector type is a simple derived type that represents a vector of
|
||||
elements. Vector types are used when multiple primitive data are
|
||||
operated in parallel using a single instruction (SIMD). A vector type
|
||||
requires a size (number of elements) and an underlying primitive data
|
||||
type. Vector types are considered :ref:`first class <t_firstclass>`.
|
||||
|
||||
:Syntax:
|
||||
|
||||
::
|
||||
|
||||
< <# elements> x <elementtype> >
|
||||
|
||||
The number of elements is a constant integer value larger than 0;
|
||||
elementtype may be any integer or floating point type, or a pointer to
|
||||
these types. Vectors of size zero are not allowed.
|
||||
|
||||
:Examples:
|
||||
|
||||
+-------------------+--------------------------------------------------+
|
||||
| ``<4 x i32>`` | Vector of 4 32-bit integer values. |
|
||||
+-------------------+--------------------------------------------------+
|
||||
| ``<8 x float>`` | Vector of 8 32-bit floating-point values. |
|
||||
+-------------------+--------------------------------------------------+
|
||||
| ``<2 x i64>`` | Vector of 2 64-bit integer values. |
|
||||
+-------------------+--------------------------------------------------+
|
||||
| ``<4 x i64*>`` | Vector of 4 pointers to 64-bit integer values. |
|
||||
+-------------------+--------------------------------------------------+
|
||||
|
||||
.. _t_label:
|
||||
|
||||
Label Type
|
||||
^^^^^^^^^^
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
:Overview:
|
||||
|
||||
The label type represents code labels.
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
:Syntax:
|
||||
|
||||
::
|
||||
|
||||
|
@ -1663,31 +1769,17 @@ Syntax:
|
|||
Metadata Type
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
:Overview:
|
||||
|
||||
The metadata type represents embedded metadata. No derived types may be
|
||||
created from metadata except for :ref:`function <t_function>` arguments.
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
:Syntax:
|
||||
|
||||
::
|
||||
|
||||
metadata
|
||||
|
||||
.. _t_derived:
|
||||
|
||||
Derived Types
|
||||
-------------
|
||||
|
||||
The real power in LLVM comes from the derived types in the system. This
|
||||
is what allows a programmer to represent arrays, functions, pointers,
|
||||
and other useful types. Each of these types contain one or more element
|
||||
types which may be a primitive type, or another derived type. For
|
||||
example, it is possible to have a two dimensional array, using an array
|
||||
as the element type of another array.
|
||||
|
||||
.. _t_aggregate:
|
||||
|
||||
Aggregate Types
|
||||
|
@ -1701,17 +1793,15 @@ aggregate types.
|
|||
.. _t_array:
|
||||
|
||||
Array Type
|
||||
^^^^^^^^^^
|
||||
""""""""""
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
:Overview:
|
||||
|
||||
The array type is a very simple derived type that arranges elements
|
||||
sequentially in memory. The array type requires a size (number of
|
||||
elements) and an underlying data type.
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
:Syntax:
|
||||
|
||||
::
|
||||
|
||||
|
@ -1720,8 +1810,7 @@ Syntax:
|
|||
The number of elements is a constant integer value; ``elementtype`` may
|
||||
be any type with a size.
|
||||
|
||||
Examples:
|
||||
"""""""""
|
||||
:Examples:
|
||||
|
||||
+------------------+--------------------------------------+
|
||||
| ``[40 x i32]`` | Array of 40 32-bit integer values. |
|
||||
|
@ -1749,53 +1838,12 @@ LLVM with a zero length array type. An implementation of 'pascal style
|
|||
arrays' in LLVM could use the type "``{ i32, [0 x float]}``", for
|
||||
example.
|
||||
|
||||
.. _t_function:
|
||||
|
||||
Function Type
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The function type can be thought of as a function signature. It consists of a
|
||||
return type and a list of formal parameter types. The return type of a function
|
||||
type is a void type or first class type --- except for :ref:`label <t_label>`
|
||||
and :ref:`metadata <t_metadata>` types.
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
|
||||
::
|
||||
|
||||
<returntype> (<parameter list>)
|
||||
|
||||
...where '``<parameter list>``' is a comma-separated list of type
|
||||
specifiers. Optionally, the parameter list may include a type ``...``, which
|
||||
indicates that the function takes a variable number of arguments. Variable
|
||||
argument functions can access their arguments with the :ref:`variable argument
|
||||
handling intrinsic <int_varargs>` functions. '``<returntype>``' is any type
|
||||
except :ref:`label <t_label>` and :ref:`metadata <t_metadata>`.
|
||||
|
||||
Examples:
|
||||
"""""""""
|
||||
|
||||
+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| ``i32 (i32)`` | function taking an ``i32``, returning an ``i32`` |
|
||||
+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| ``float (i16, i32 *) *`` | :ref:`Pointer <t_pointer>` to a function that takes an ``i16`` and a :ref:`pointer <t_pointer>` to ``i32``, returning ``float``. |
|
||||
+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| ``i32 (i8*, ...)`` | A vararg function that takes at least one :ref:`pointer <t_pointer>` to ``i8`` (char in C), which returns an integer. This is the signature for ``printf`` in LLVM. |
|
||||
+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| ``{i32, i32} (i32)`` | A function taking an ``i32``, returning a :ref:`structure <t_struct>` containing two ``i32`` values |
|
||||
+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
|
||||
.. _t_struct:
|
||||
|
||||
Structure Type
|
||||
^^^^^^^^^^^^^^
|
||||
""""""""""""""
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
:Overview:
|
||||
|
||||
The structure type is used to represent a collection of data members
|
||||
together in memory. The elements of a structure may be any type that has
|
||||
|
@ -1819,16 +1867,14 @@ Literal types are uniqued by their contents and can never be recursive
|
|||
or opaque since there is no way to write one. Identified types can be
|
||||
recursive, can be opaqued, and are never uniqued.
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
:Syntax:
|
||||
|
||||
::
|
||||
|
||||
%T1 = type { <type list> } ; Identified normal struct type
|
||||
%T2 = type <{ <type list> }> ; Identified packed struct type
|
||||
|
||||
Examples:
|
||||
"""""""""
|
||||
:Examples:
|
||||
|
||||
+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| ``{ i32, i32, i32 }`` | A triple of three ``i32`` values |
|
||||
|
@ -1841,105 +1887,27 @@ Examples:
|
|||
.. _t_opaque:
|
||||
|
||||
Opaque Structure Types
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
""""""""""""""""""""""
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
:Overview:
|
||||
|
||||
Opaque structure types are used to represent named structure types that
|
||||
do not have a body specified. This corresponds (for example) to the C
|
||||
notion of a forward declared structure.
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
:Syntax:
|
||||
|
||||
::
|
||||
|
||||
%X = type opaque
|
||||
%52 = type opaque
|
||||
|
||||
Examples:
|
||||
"""""""""
|
||||
:Examples:
|
||||
|
||||
+--------------+-------------------+
|
||||
| ``opaque`` | An opaque type. |
|
||||
+--------------+-------------------+
|
||||
|
||||
.. _t_pointer:
|
||||
|
||||
Pointer Type
|
||||
^^^^^^^^^^^^
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The pointer type is used to specify memory locations. Pointers are
|
||||
commonly used to reference objects in memory.
|
||||
|
||||
Pointer types may have an optional address space attribute defining the
|
||||
numbered address space where the pointed-to object resides. The default
|
||||
address space is number zero. The semantics of non-zero address spaces
|
||||
are target-specific.
|
||||
|
||||
Note that LLVM does not permit pointers to void (``void*``) nor does it
|
||||
permit pointers to labels (``label*``). Use ``i8*`` instead.
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
|
||||
::
|
||||
|
||||
<type> *
|
||||
|
||||
Examples:
|
||||
"""""""""
|
||||
|
||||
+-------------------------+--------------------------------------------------------------------------------------------------------------+
|
||||
| ``[4 x i32]*`` | A :ref:`pointer <t_pointer>` to :ref:`array <t_array>` of four ``i32`` values. |
|
||||
+-------------------------+--------------------------------------------------------------------------------------------------------------+
|
||||
| ``i32 (i32*) *`` | A :ref:`pointer <t_pointer>` to a :ref:`function <t_function>` that takes an ``i32*``, returning an ``i32``. |
|
||||
+-------------------------+--------------------------------------------------------------------------------------------------------------+
|
||||
| ``i32 addrspace(5)*`` | A :ref:`pointer <t_pointer>` to an ``i32`` value that resides in address space #5. |
|
||||
+-------------------------+--------------------------------------------------------------------------------------------------------------+
|
||||
|
||||
.. _t_vector:
|
||||
|
||||
Vector Type
|
||||
^^^^^^^^^^^
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
A vector type is a simple derived type that represents a vector of
|
||||
elements. Vector types are used when multiple primitive data are
|
||||
operated in parallel using a single instruction (SIMD). A vector type
|
||||
requires a size (number of elements) and an underlying primitive data
|
||||
type. Vector types are considered :ref:`first class <t_firstclass>`.
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
|
||||
::
|
||||
|
||||
< <# elements> x <elementtype> >
|
||||
|
||||
The number of elements is a constant integer value larger than 0;
|
||||
elementtype may be any integer or floating point type, or a pointer to
|
||||
these types. Vectors of size zero are not allowed.
|
||||
|
||||
Examples:
|
||||
"""""""""
|
||||
|
||||
+-------------------+--------------------------------------------------+
|
||||
| ``<4 x i32>`` | Vector of 4 32-bit integer values. |
|
||||
+-------------------+--------------------------------------------------+
|
||||
| ``<8 x float>`` | Vector of 8 32-bit floating-point values. |
|
||||
+-------------------+--------------------------------------------------+
|
||||
| ``<2 x i64>`` | Vector of 2 64-bit integer values. |
|
||||
+-------------------+--------------------------------------------------+
|
||||
| ``<4 x i64*>`` | Vector of 4 pointers to 64-bit integer values. |
|
||||
+-------------------+--------------------------------------------------+
|
||||
|
||||
Constants
|
||||
=========
|
||||
|
||||
|
@ -8502,6 +8470,8 @@ Memory Use Markers
|
|||
This class of intrinsics exists to information about the lifetime of
|
||||
memory objects and ranges where variables are immutable.
|
||||
|
||||
.. _int_lifestart:
|
||||
|
||||
'``llvm.lifetime.start``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
@ -8533,6 +8503,8 @@ of the memory pointed to by ``ptr`` is dead. This means that it is known
|
|||
to never be used and has an undefined value. A load from the pointer
|
||||
that precedes this intrinsic can be replaced with ``'undef'``.
|
||||
|
||||
.. _int_lifeend:
|
||||
|
||||
'``llvm.lifetime.end``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
@ -8958,3 +8930,10 @@ Semantics:
|
|||
|
||||
This intrinsic does nothing, and it's removed by optimizers and ignored
|
||||
by codegen.
|
||||
|
||||
Stack Map Intrinsics
|
||||
--------------------
|
||||
|
||||
LLVM provides experimental intrinsics to support runtime patching
|
||||
mechanisms commonly desired in dynamic language JITs. These intrinsics
|
||||
are described in :doc:`StackMaps`.
|
||||
|
|
|
@ -273,7 +273,7 @@ there is a separate version for each compute architecture.
|
|||
For a list of all math functions implemented in libdevice, see
|
||||
`libdevice Users Guide <http://docs.nvidia.com/cuda/libdevice-users-guide/index.html>`_.
|
||||
|
||||
To accomodate various math-related compiler flags that can affect code
|
||||
To accommodate various math-related compiler flags that can affect code
|
||||
generation of libdevice code, the library code depends on a special LLVM IR
|
||||
pass (``NVVMReflect``) to handle conditional compilation within LLVM IR. This
|
||||
pass looks for calls to the ``@__nvvm_reflect`` function and replaces them
|
||||
|
@ -839,7 +839,7 @@ Libdevice provides an ``__nv_powf`` function that we will use.
|
|||
%valB = load float addrspace(1)* %ptrB, align 4
|
||||
|
||||
; Compute C = pow(A, B)
|
||||
%valC = call float @__nv_exp2f(float %valA, float %valB)
|
||||
%valC = call float @__nv_powf(float %valA, float %valB)
|
||||
|
||||
; Store back to C
|
||||
store float %valC, float addrspace(1)* %ptrC, align 4
|
||||
|
@ -850,7 +850,7 @@ Libdevice provides an ``__nv_powf`` function that we will use.
|
|||
!nvvm.annotations = !{!0}
|
||||
!0 = metadata !{void (float addrspace(1)*,
|
||||
float addrspace(1)*,
|
||||
float addrspace(1)*)* @kernel, metadata !"kernel", i32 1}%
|
||||
float addrspace(1)*)* @kernel, metadata !"kernel", i32 1}
|
||||
|
||||
|
||||
To compile this kernel, we perform the following steps:
|
||||
|
|
|
@ -1,15 +1,21 @@
|
|||
======================
|
||||
LLVM 3.4 Release Notes
|
||||
LLVM 3.5 Release Notes
|
||||
======================
|
||||
|
||||
.. contents::
|
||||
:local:
|
||||
|
||||
.. warning::
|
||||
These are in-progress notes for the upcoming LLVM 3.5 release. You may
|
||||
prefer the `LLVM 3.4 Release Notes <http://llvm.org/releases/3.4/docs
|
||||
/ReleaseNotes.html>`_.
|
||||
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
This document contains the release notes for the LLVM Compiler Infrastructure,
|
||||
release 3.4. Here we describe the status of LLVM, including major improvements
|
||||
release 3.5. Here we describe the status of LLVM, including major improvements
|
||||
from the previous release, improvements in various subprojects of LLVM, and
|
||||
some of the current users of the code. All LLVM releases may be downloaded
|
||||
from the `LLVM releases web site <http://llvm.org/releases/>`_.
|
||||
|
@ -35,58 +41,6 @@ Non-comprehensive list of changes in this release
|
|||
functionality, or simply have a lot to talk about), see the `NOTE` below
|
||||
for adding a new subsection.
|
||||
|
||||
* This is expected to be the last release of LLVM which compiles using a C++98
|
||||
toolchain. We expect to start using some C++11 features in LLVM and other
|
||||
sub-projects starting after this release. That said, we are committed to
|
||||
supporting a reasonable set of modern C++ toolchains as the host compiler on
|
||||
all of the platforms. This will at least include Visual Studio 2012 on
|
||||
Windows, and Clang 3.1 or GCC 4.7.x on Mac and Linux. The final set of
|
||||
compilers (and the C++11 features they support) is not set in stone, but we
|
||||
wanted users of LLVM to have a heads up that the next release will involve
|
||||
a substantial change in the host toolchain requirements.
|
||||
|
||||
* The regression tests now fail if any command in a pipe fails. To disable it in
|
||||
a directory, just add ``config.pipefail = False`` to its ``lit.local.cfg``.
|
||||
See :doc:`Lit <CommandGuide/lit>` for the details.
|
||||
|
||||
* Support for exception handling has been removed from the old JIT. Use MCJIT
|
||||
if you need EH support.
|
||||
|
||||
* The R600 backend is not marked experimental anymore and is built by default.
|
||||
|
||||
* APFloat::isNormal() was renamed to APFloat::isFiniteNonZero() and
|
||||
APFloat::isIEEENormal() was renamed to APFloat::isNormal(). This ensures that
|
||||
APFloat::isNormal() conforms to IEEE-754R-2008.
|
||||
|
||||
* The library call simplification pass has been removed. Its functionality
|
||||
has been integrated into the instruction combiner and function attribute
|
||||
marking passes.
|
||||
|
||||
* Support for building using Visual Studio 2008 has been dropped. Use VS 2010
|
||||
or later instead. For more information, see the `Getting Started using Visual
|
||||
Studio <GettingStartedVS.html>`_ page.
|
||||
|
||||
* The Loop Vectorizer that was previously enabled for -O3 is now enabled for
|
||||
-Os and -O2.
|
||||
|
||||
* The new SLP Vectorizer is now enabled by default.
|
||||
|
||||
* llvm-ar now uses the new Object library and produces archives and
|
||||
symbol tables in the gnu format.
|
||||
|
||||
* FileCheck now allows specifing -check-prefix multiple times. This
|
||||
helps reduce duplicate check lines when using multiple RUN lines.
|
||||
|
||||
* The bitcast instruction no longer allows casting between pointers
|
||||
with different address spaces. To achieve this, use the new
|
||||
addrspacecast instruction.
|
||||
|
||||
* Different sized pointers for different address spaces should now
|
||||
generally work. This is primarily useful for GPU targets.
|
||||
|
||||
* OCaml bindings have been significantly extended to cover almost all of the
|
||||
LLVM libraries.
|
||||
|
||||
* ... next change ...
|
||||
|
||||
.. NOTE
|
||||
|
@ -99,126 +53,12 @@ Non-comprehensive list of changes in this release
|
|||
|
||||
Makes programs 10x faster by doing Special New Thing.
|
||||
|
||||
Mips Target
|
||||
-----------
|
||||
|
||||
Support for the MIPS SIMD Architecture (MSA) has been added. MSA is supported
|
||||
through inline assembly, intrinsics with the prefix '__builtin_msa', and normal
|
||||
code generation.
|
||||
|
||||
For more information on MSA (including documentation for the instruction set),
|
||||
see the `MIPS SIMD page at Imagination Technologies
|
||||
<http://imgtec.com/mips/mips-simd.asp>`_
|
||||
|
||||
SPARC Target
|
||||
------------
|
||||
|
||||
The SPARC backend got many improvements, namely
|
||||
|
||||
* experimental SPARC V9 backend
|
||||
* JIT support for SPARC
|
||||
* fp128 support
|
||||
* exception handling
|
||||
* TLS support
|
||||
* leaf functions optimization
|
||||
* bug fixes
|
||||
|
||||
External Open Source Projects Using LLVM 3.4
|
||||
External Open Source Projects Using LLVM 3.5
|
||||
============================================
|
||||
|
||||
An exciting aspect of LLVM is that it is used as an enabling technology for
|
||||
a lot of other language and tools projects. This section lists some of the
|
||||
projects that have already been updated to work with LLVM 3.4.
|
||||
|
||||
DXR
|
||||
---
|
||||
|
||||
`DXR <https://wiki.mozilla.org/DXR>`_ is Mozilla's code search and navigation
|
||||
tool, aimed at making sense of large projects like Firefox. It supports
|
||||
full-text and regex searches as well as structural queries like "Find all the
|
||||
callers of this function." Behind the scenes, it uses a custom trigram index,
|
||||
the re2 library, and structural data collected by a clang compiler plugin.
|
||||
|
||||
LDC - the LLVM-based D compiler
|
||||
-------------------------------
|
||||
|
||||
`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It
|
||||
pragmatically combines efficiency, control, and modeling power, with safety and
|
||||
programmer productivity. D supports powerful concepts like Compile-Time Function
|
||||
Execution (CTFE) and Template Meta-Programming, provides an innovative approach
|
||||
to concurrency and offers many classical paradigms.
|
||||
|
||||
`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler
|
||||
combined with LLVM as backend to produce efficient native code. LDC targets
|
||||
x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux/PPC64.
|
||||
Ports to other architectures like ARM and AArch64 are underway.
|
||||
|
||||
Likely
|
||||
------
|
||||
|
||||
`Likely <http://www.liblikely.org/>`_ is an open source domain specific
|
||||
language for image recognition. Algorithms are just-in-time compiled using
|
||||
LLVM's MCJIT infrastructure to execute on single or multi-threaded CPUs as well
|
||||
as OpenCL SPIR or CUDA enabled GPUs. Likely exploits the observation that while
|
||||
image processing and statistical learning kernels must be written generically
|
||||
to handle any matrix datatype, at runtime they tend to be executed repeatedly
|
||||
on the same type.
|
||||
|
||||
Portable Computing Language (pocl)
|
||||
----------------------------------
|
||||
|
||||
In addition to producing an easily portable open source OpenCL
|
||||
implementation, another major goal of `pocl <http://portablecl.org/>`_
|
||||
is improving performance portability of OpenCL programs with
|
||||
compiler optimizations, reducing the need for target-dependent manual
|
||||
optimizations. An important part of pocl is a set of LLVM passes used to
|
||||
statically parallelize multiple work-items with the kernel compiler, even in
|
||||
the presence of work-group barriers. This enables static parallelization of
|
||||
the fine-grained static concurrency in the work groups in multiple ways.
|
||||
|
||||
Portable Native Client (PNaCl)
|
||||
------------------------------
|
||||
|
||||
`Portable Native Client (PNaCl) <http://www.chromium.org/nativeclient/pnacl>`_
|
||||
is a Chrome initiative to bring the performance and low-level control of native
|
||||
code to modern web browsers, without sacrificing the security benefits and
|
||||
portability of web applications. PNaCl works by compiling native C and C++ code
|
||||
to an intermediate representation using the LLVM clang compiler. This
|
||||
intermediate representation is a subset of LLVM bytecode that is wrapped into a
|
||||
portable executable, which can be hosted on a web server like any other website
|
||||
asset. When the site is accessed, Chrome fetches and translates the portable
|
||||
executable into an architecture-specific machine code optimized directly for
|
||||
the underlying device. PNaCl lets developers compile their code once to run on
|
||||
any hardware platform and embed their PNaCl application in any website,
|
||||
enabling developers to directly leverage the power of the underlying CPU and
|
||||
GPU.
|
||||
|
||||
TTA-based Co-design Environment (TCE)
|
||||
-------------------------------------
|
||||
|
||||
`TCE <http://tce.cs.tut.fi/>`_ is a toolset for designing new
|
||||
exposed datapath processors based on the Transport triggered architecture (TTA).
|
||||
The toolset provides a complete co-design flow from C/C++
|
||||
programs down to synthesizable VHDL/Verilog and parallel program binaries.
|
||||
Processor customization points include the register files, function units,
|
||||
supported operations, and the interconnection network.
|
||||
|
||||
TCE uses Clang and LLVM for C/C++/OpenCL C language support, target independent
|
||||
optimizations and also for parts of code generation. It generates
|
||||
new LLVM-based code generators "on the fly" for the designed processors and
|
||||
loads them in to the compiler backend as runtime libraries to avoid
|
||||
per-target recompilation of larger parts of the compiler chain.
|
||||
|
||||
WebCL Validator
|
||||
---------------
|
||||
|
||||
`WebCL Validator <https://github.com/KhronosGroup/webcl-validator>`_ implements
|
||||
validation for WebCL C language which is a subset of OpenCL ES 1.1. Validator
|
||||
checks the correctness of WebCL C, and implements memory protection for it as a
|
||||
source-2-source transformation. The transformation converts WebCL to memory
|
||||
protected OpenCL. The protected OpenCL cannot access any memory ranges which
|
||||
were not allocated for it, and its memory is always initialized to prevent
|
||||
information leakage from other programs.
|
||||
projects that have already been updated to work with LLVM 3.5.
|
||||
|
||||
|
||||
Additional Information
|
||||
|
|
|
@ -52,18 +52,16 @@ The scripts are in the ``utils/release`` directory.
|
|||
test-release.sh
|
||||
---------------
|
||||
|
||||
This script will check-out, configure and compile LLVM+Clang (+ most add-ons,
|
||||
like ``compiler-rt``, ``libcxx`` and ``clang-extra-tools``) in three stages, and
|
||||
will test the final stage. It'll have installed the final binaries on the
|
||||
Phase3/Releasei(+Asserts) directory, and that's the one you should use for the
|
||||
test-suite and other external tests.
|
||||
This script will check-out, configure and compile LLVM+Clang (+ most add-ons, like ``compiler-rt``,
|
||||
``libcxx`` and ``clang-extra-tools``) in three stages, and will test the final stage.
|
||||
It'll have installed the final binaries on the Phase3/Releasei(+Asserts) directory, and
|
||||
that's the one you should use for the test-suite and other external tests.
|
||||
|
||||
To run the script on a specific release candidate run::
|
||||
|
||||
./test-release.sh \
|
||||
-release 3.4 \
|
||||
-release 3.3 \
|
||||
-rc 1 \
|
||||
-triple x86_64-apple-darwin \
|
||||
-no-64bit \
|
||||
-test-asserts \
|
||||
-no-compare-files
|
||||
|
|
|
@ -2306,7 +2306,7 @@ stringWithCString:]``") and the basename is the selector only
|
|||
Mach-O Changes
|
||||
""""""""""""""
|
||||
|
||||
The sections names for the apple hash tables are for non mach-o files. For
|
||||
The sections names for the apple hash tables are for non-mach-o files. For
|
||||
mach-o files, the sections should be contained in the ``__DWARF`` segment with
|
||||
names as follows:
|
||||
|
||||
|
|
|
@ -0,0 +1,480 @@
|
|||
===================================
|
||||
Stack maps and patch points in LLVM
|
||||
===================================
|
||||
|
||||
.. contents::
|
||||
:local:
|
||||
:depth: 2
|
||||
|
||||
Definitions
|
||||
===========
|
||||
|
||||
In this document we refer to the "runtime" collectively as all
|
||||
components that serve as the LLVM client, including the LLVM IR
|
||||
generator, object code consumer, and code patcher.
|
||||
|
||||
A stack map records the location of ``live values`` at a particular
|
||||
instruction address. These ``live values`` do not refer to all the
|
||||
LLVM values live across the stack map. Instead, they are only the
|
||||
values that the runtime requires to be live at this point. For
|
||||
example, they may be the values the runtime will need to resume
|
||||
program execution at that point independent of the compiled function
|
||||
containing the stack map.
|
||||
|
||||
LLVM emits stack map data into the object code within a designated
|
||||
:ref:`stackmap-section`. This stack map data contains a record for
|
||||
each stack map. The record stores the stack map's instruction address
|
||||
and contains a entry for each mapped value. Each entry encodes a
|
||||
value's location as a register, stack offset, or constant.
|
||||
|
||||
A patch point is an instruction address at which space is reserved for
|
||||
patching a new instruction sequence at run time. Patch points look
|
||||
much like calls to LLVM. They take arguments that follow a calling
|
||||
convention and may return a value. They also imply stack map
|
||||
generation, which allows the runtime to locate the patchpoint and
|
||||
find the location of ``live values`` at that point.
|
||||
|
||||
Motivation
|
||||
==========
|
||||
|
||||
This functionality is currently experimental but is potentially useful
|
||||
in a variety of settings, the most obvious being a runtime (JIT)
|
||||
compiler. Example applications of the patchpoint intrinsics are
|
||||
implementing an inline call cache for polymorphic method dispatch or
|
||||
optimizing the retrieval of properties in dynamically typed languages
|
||||
such as JavaScript.
|
||||
|
||||
The intrinsics documented here are currently used by the JavaScript
|
||||
compiler within the open source WebKit project, see the `FTL JIT
|
||||
<https://trac.webkit.org/wiki/FTLJIT>`_, but they are designed to be
|
||||
used whenever stack maps or code patching are needed. Because the
|
||||
intrinsics have experimental status, compatibility across LLVM
|
||||
releases is not guaranteed.
|
||||
|
||||
The stack map functionality described in this document is separate
|
||||
from the functionality described in
|
||||
:ref:`stack-map`. `GCFunctionMetadata` provides the location of
|
||||
pointers into a collected heap captured by the `GCRoot` intrinsic,
|
||||
which can also be considered a "stack map". Unlike the stack maps
|
||||
defined above, the `GCFunctionMetadata` stack map interface does not
|
||||
provide a way to associate live register values of arbitrary type with
|
||||
an instruction address, nor does it specify a format for the resulting
|
||||
stack map. The stack maps described here could potentially provide
|
||||
richer information to a garbage collecting runtime, but that usage
|
||||
will not be discussed in this document.
|
||||
|
||||
Intrinsics
|
||||
==========
|
||||
|
||||
The following two kinds of intrinsics can be used to implement stack
|
||||
maps and patch points: ``llvm.experimental.stackmap`` and
|
||||
``llvm.experimental.patchpoint``. Both kinds of intrinsics generate a
|
||||
stack map record, and they both allow some form of code patching. They
|
||||
can be used independently (i.e. ``llvm.experimental.patchpoint``
|
||||
implicitly generates a stack map without the need for an additional
|
||||
call to ``llvm.experimental.stackmap``). The choice of which to use
|
||||
depends on whether it is necessary to reserve space for code patching
|
||||
and whether any of the intrinsic arguments should be lowered according
|
||||
to calling conventions. ``llvm.experimental.stackmap`` does not
|
||||
reserve any space, nor does it expect any call arguments. If the
|
||||
runtime patches code at the stack map's address, it will destructively
|
||||
overwrite the program text. This is unlike
|
||||
``llvm.experimental.patchpoint``, which reserves space for in-place
|
||||
patching without overwriting surrounding code. The
|
||||
``llvm.experimental.patchpoint`` intrinsic also lowers a specified
|
||||
number of arguments according to its calling convention. This allows
|
||||
patched code to make in-place function calls without marshaling.
|
||||
|
||||
Each instance of one of these intrinsics generates a stack map record
|
||||
in the :ref:`stackmap-section`. The record includes an ID, allowing
|
||||
the runtime to uniquely identify the stack map, and the offset within
|
||||
the code from the beginning of the enclosing function.
|
||||
|
||||
'``llvm.experimental.stackmap``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
|
||||
::
|
||||
|
||||
declare void
|
||||
@llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>, ...)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.experimental.stackmap``' intrinsic records the location of
|
||||
specified values in the stack map without generating any code.
|
||||
|
||||
Operands:
|
||||
"""""""""
|
||||
|
||||
The first operand is an ID to be encoded within the stack map. The
|
||||
second operand is the number of shadow bytes following the
|
||||
intrinsic. The variable number of operands that follow are the ``live
|
||||
values`` for which locations will be recorded in the stack map.
|
||||
|
||||
To use this intrinsic as a bare-bones stack map, with no code patching
|
||||
support, the number of shadow bytes can be set to zero.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
The stack map intrinsic generates no code in place, unless nops are
|
||||
needed to cover its shadow (see below). However, its offset from
|
||||
function entry is stored in the stack map. This is the relative
|
||||
instruction address immediately following the instructions that
|
||||
precede the stack map.
|
||||
|
||||
The stack map ID allows a runtime to locate the desired stack map
|
||||
record. LLVM passes this ID through directly to the stack map
|
||||
record without checking uniqueness.
|
||||
|
||||
LLVM guarantees a shadow of instructions following the stack map's
|
||||
instruction offset during which neither the end of the basic block nor
|
||||
another call to ``llvm.experimental.stackmap`` or
|
||||
``llvm.experimental.patchpoint`` may occur. This allows the runtime to
|
||||
patch the code at this point in response to an event triggered from
|
||||
outside the code. The code for instructions following the stack map
|
||||
may be emitted in the stack map's shadow, and these instructions may
|
||||
be overwritten by destructive patching. Without shadow bytes, this
|
||||
destructive patching could overwrite program text or data outside the
|
||||
current function. We disallow overlapping stack map shadows so that
|
||||
the runtime does not need to consider this corner case.
|
||||
|
||||
For example, a stack map with 8 byte shadow:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
call void @runtime()
|
||||
call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 77, i32 8,
|
||||
i64* %ptr)
|
||||
%val = load i64* %ptr
|
||||
%add = add i64 %val, 3
|
||||
ret i64 %add
|
||||
|
||||
May require one byte of nop-padding:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
0x00 callq _runtime
|
||||
0x05 nop <--- stack map address
|
||||
0x06 movq (%rdi), %rax
|
||||
0x07 addq $3, %rax
|
||||
0x0a popq %rdx
|
||||
0x0b ret <---- end of 8-byte shadow
|
||||
|
||||
Now, if the runtime needs to invalidate the compiled code, it may
|
||||
patch 8 bytes of code at the stack map's address at follows:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
0x00 callq _runtime
|
||||
0x05 movl $0xffff, %rax <--- patched code at stack map address
|
||||
0x0a callq *%rax <---- end of 8-byte shadow
|
||||
|
||||
This way, after the normal call to the runtime returns, the code will
|
||||
execute a patched call to a special entry point that can rebuild a
|
||||
stack frame from the values located by the stack map.
|
||||
|
||||
'``llvm.experimental.patchpoint.*``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
|
||||
::
|
||||
|
||||
declare void
|
||||
@llvm.experimental.patchpoint.void(i64 <id>, i32 <numBytes>,
|
||||
i8* <target>, i32 <numArgs>, ...)
|
||||
declare i64
|
||||
@llvm.experimental.patchpoint.i64(i64 <id>, i32 <numBytes>,
|
||||
i8* <target>, i32 <numArgs>, ...)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.experimental.patchpoint.*``' intrinsics creates a function
|
||||
call to the specified ``<target>`` and records the location of specified
|
||||
values in the stack map.
|
||||
|
||||
Operands:
|
||||
"""""""""
|
||||
|
||||
The first operand is an ID, the second operand is the number of bytes
|
||||
reserved for the patchable region, the third operand is the target
|
||||
address of a function (optionally null), and the fourth operand
|
||||
specifies how many of the following variable operands are considered
|
||||
function call arguments. The remaining variable number of operands are
|
||||
the ``live values`` for which locations will be recorded in the stack
|
||||
map.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
The patch point intrinsic generates a stack map. It also emits a
|
||||
function call to the address specified by ``<target>`` if the address
|
||||
is not a constant null. The function call and its arguments are
|
||||
lowered according to the calling convention specified at the
|
||||
intrinsic's callsite. Variants of the intrinsic with non-void return
|
||||
type also return a value according to calling convention.
|
||||
|
||||
Requesting zero patch point arguments is valid. In this case, all
|
||||
variable operands are handled just like
|
||||
``llvm.experimental.stackmap.*``. The difference is that space will
|
||||
still be reserved for patching, a call will be emitted, and a return
|
||||
value is allowed.
|
||||
|
||||
The location of the arguments are not normally recorded in the stack
|
||||
map because they are already fixed by the calling convention. The
|
||||
remaining ``live values`` will have their location recorded, which
|
||||
could be a register, stack location, or constant. A special calling
|
||||
convention has been introduced for use with stack maps, anyregcc,
|
||||
which forces the arguments to be loaded into registers but allows
|
||||
those register to be dynamically allocated. These argument registers
|
||||
will have their register locations recorded in the stack map in
|
||||
addition to the remaining ``live values``.
|
||||
|
||||
The patch point also emits nops to cover at least ``<numBytes>`` of
|
||||
instruction encoding space. Hence, the client must ensure that
|
||||
``<numBytes>`` is enough to encode a call to the target address on the
|
||||
supported targets. If the call target is constant null, then there is
|
||||
no minimum requirement. A zero-byte null target patchpoint is
|
||||
valid.
|
||||
|
||||
The runtime may patch the code emitted for the patch point, including
|
||||
the call sequence and nops. However, the runtime may not assume
|
||||
anything about the code LLVM emits within the reserved space. Partial
|
||||
patching is not allowed. The runtime must patch all reserved bytes,
|
||||
padding with nops if necessary.
|
||||
|
||||
This example shows a patch point reserving 15 bytes, with one argument
|
||||
in $rdi, and a return value in $rax per native calling convention:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
%target = inttoptr i64 -281474976710654 to i8*
|
||||
%val = call i64 (i64, i32, ...)*
|
||||
@llvm.experimental.patchpoint.i64(i64 78, i32 15,
|
||||
i8* %target, i32 1, i64* %ptr)
|
||||
%add = add i64 %val, 3
|
||||
ret i64 %add
|
||||
|
||||
May generate:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
0x00 movabsq $0xffff000000000002, %r11 <--- patch point address
|
||||
0x0a callq *%r11
|
||||
0x0d nop
|
||||
0x0e nop <--- end of reserved 15-bytes
|
||||
0x0f addq $0x3, %rax
|
||||
0x10 movl %rax, 8(%rsp)
|
||||
|
||||
Note that no stack map locations will be recorded. If the patched code
|
||||
sequence does not need arguments fixed to specific calling convention
|
||||
registers, then the ``anyregcc`` convention may be used:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
%val = call anyregcc @llvm.experimental.patchpoint(i64 78, i32 15,
|
||||
i8* %target, i32 1,
|
||||
i64* %ptr)
|
||||
|
||||
The stack map now indicates the location of the %ptr argument and
|
||||
return value:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
Stack Map: ID=78, Loc0=%r9 Loc1=%r8
|
||||
|
||||
The patch code sequence may now use the argument that happened to be
|
||||
allocated in %r8 and return a value allocated in %r9:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
0x00 movslq 4(%r8) %r9 <--- patched code at patch point address
|
||||
0x03 nop
|
||||
...
|
||||
0x0e nop <--- end of reserved 15-bytes
|
||||
0x0f addq $0x3, %r9
|
||||
0x10 movl %r9, 8(%rsp)
|
||||
|
||||
.. _stackmap-format:
|
||||
|
||||
Stack Map Format
|
||||
================
|
||||
|
||||
The existence of a stack map or patch point intrinsic within an LLVM
|
||||
Module forces code emission to create a :ref:`stackmap-section`. The
|
||||
format of this section follows:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
uint32 : Reserved (header)
|
||||
uint32 : NumConstants
|
||||
Constants[NumConstants] {
|
||||
uint64 : LargeConstant
|
||||
}
|
||||
uint32 : NumRecords
|
||||
StkMapRecord[NumRecords] {
|
||||
uint64 : PatchPoint ID
|
||||
uint32 : Instruction Offset
|
||||
uint16 : Reserved (record flags)
|
||||
uint16 : NumLocations
|
||||
Location[NumLocations] {
|
||||
uint8 : Register | Direct | Indirect | Constant | ConstantIndex
|
||||
uint8 : Reserved (location flags)
|
||||
uint16 : Dwarf RegNum
|
||||
int32 : Offset or SmallConstant
|
||||
}
|
||||
uint16 : NumLiveOuts
|
||||
LiveOuts[NumLiveOuts]
|
||||
uint16 : Dwarf RegNum
|
||||
uint8 : Reserved
|
||||
uint8 : Size in Bytes
|
||||
}
|
||||
}
|
||||
|
||||
The first byte of each location encodes a type that indicates how to
|
||||
interpret the ``RegNum`` and ``Offset`` fields as follows:
|
||||
|
||||
======== ========== =================== ===========================
|
||||
Encoding Type Value Description
|
||||
-------- ---------- ------------------- ---------------------------
|
||||
0x1 Register Reg Value in a register
|
||||
0x2 Direct Reg + Offset Frame index value
|
||||
0x3 Indirect [Reg + Offset] Spilled value
|
||||
0x4 Constant Offset Small constant
|
||||
0x5 ConstIndex Constants[Offset] Large constant
|
||||
======== ========== =================== ===========================
|
||||
|
||||
In the common case, a value is available in a register, and the
|
||||
``Offset`` field will be zero. Values spilled to the stack are encoded
|
||||
as ``Indirect`` locations. The runtime must load those values from a
|
||||
stack address, typically in the form ``[BP + Offset]``. If an
|
||||
``alloca`` value is passed directly to a stack map intrinsic, then
|
||||
LLVM may fold the frame index into the stack map as an optimization to
|
||||
avoid allocating a register or stack slot. These frame indices will be
|
||||
encoded as ``Direct`` locations in the form ``BP + Offset``. LLVM may
|
||||
also optimize constants by emitting them directly in the stack map,
|
||||
either in the ``Offset`` of a ``Constant`` location or in the constant
|
||||
pool, referred to by ``ConstantIndex`` locations.
|
||||
|
||||
At each callsite, a "liveout" register list is also recorded. These
|
||||
are the registers that are live across the stackmap and therefore must
|
||||
be saved by the runtime. This is an important optimization when the
|
||||
patchpoint intrinsic is used with a calling convention that by default
|
||||
preserves most registers as callee-save.
|
||||
|
||||
Each entry in the liveout register list contains a DWARF register
|
||||
number and size in bytes. The stackmap format deliberately omits
|
||||
specific subregister information. Instead the runtime must interpret
|
||||
this information conservatively. For example, if the stackmap reports
|
||||
one byte at ``%rax``, then the value may be in either ``%al`` or
|
||||
``%ah``. It doesn't matter in practice, because the runtime will
|
||||
simply save ``%rax``. However, if the stackmap reports 16 bytes at
|
||||
``%ymm0``, then the runtime can safely optimize by saving only
|
||||
``%xmm0``.
|
||||
|
||||
The stack map format is a contract between an LLVM SVN revision and
|
||||
the runtime. It is currently experimental and may change in the short
|
||||
term, but minimizing the need to update the runtime is
|
||||
important. Consequently, the stack map design is motivated by
|
||||
simplicity and extensibility. Compactness of the representation is
|
||||
secondary because the runtime is expected to parse the data
|
||||
immediately after compiling a module and encode the information in its
|
||||
own format. Since the runtime controls the allocation of sections, it
|
||||
can reuse the same stack map space for multiple modules.
|
||||
|
||||
.. _stackmap-section:
|
||||
|
||||
Stack Map Section
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
A JIT compiler can easily access this section by providing its own
|
||||
memory manager via the LLVM C API
|
||||
``LLVMCreateSimpleMCJITMemoryManager()``. When creating the memory
|
||||
manager, the JIT provides a callback:
|
||||
``LLVMMemoryManagerAllocateDataSectionCallback()``. When LLVM creates
|
||||
this section, it invokes the callback and passes the section name. The
|
||||
JIT can record the in-memory address of the section at this time and
|
||||
later parse it to recover the stack map data.
|
||||
|
||||
On Darwin, the stack map section name is "__llvm_stackmaps". The
|
||||
segment name is "__LLVM_STACKMAPS".
|
||||
|
||||
Stack Map Usage
|
||||
===============
|
||||
|
||||
The stack map support described in this document can be used to
|
||||
precisely determine the location of values at a specific position in
|
||||
the code. LLVM does not maintain any mapping between those values and
|
||||
any higher-level entity. The runtime must be able to interpret the
|
||||
stack map record given only the ID, offset, and the order of the
|
||||
locations, which LLVM preserves.
|
||||
|
||||
Note that this is quite different from the goal of debug information,
|
||||
which is a best-effort attempt to track the location of named
|
||||
variables at every instruction.
|
||||
|
||||
An important motivation for this design is to allow a runtime to
|
||||
commandeer a stack frame when execution reaches an instruction address
|
||||
associated with a stack map. The runtime must be able to rebuild a
|
||||
stack frame and resume program execution using the information
|
||||
provided by the stack map. For example, execution may resume in an
|
||||
interpreter or a recompiled version of the same function.
|
||||
|
||||
This usage restricts LLVM optimization. Clearly, LLVM must not move
|
||||
stores across a stack map. However, loads must also be handled
|
||||
conservatively. If the load may trigger an exception, hoisting it
|
||||
above a stack map could be invalid. For example, the runtime may
|
||||
determine that a load is safe to execute without a type check given
|
||||
the current state of the type system. If the type system changes while
|
||||
some activation of the load's function exists on the stack, the load
|
||||
becomes unsafe. The runtime can prevent subsequent execution of that
|
||||
load by immediately patching any stack map location that lies between
|
||||
the current call site and the load (typically, the runtime would
|
||||
simply patch all stack map locations to invalidate the function). If
|
||||
the compiler had hoisted the load above the stack map, then the
|
||||
program could crash before the runtime could take back control.
|
||||
|
||||
To enforce these semantics, stackmap and patchpoint intrinsics are
|
||||
considered to potentially read and write all memory. This may limit
|
||||
optimization more than some clients desire. To address this problem
|
||||
meta-data could be added to the intrinsic call to express aliasing,
|
||||
thereby allowing optimizations to hoist certain loads above stack
|
||||
maps.
|
||||
|
||||
Direct Stack Map Entries
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
As shown in :ref:`stackmap-section`, a Direct stack map location
|
||||
records the address of frame index. This address is itself the value
|
||||
that the runtime requested. This differs from Indirect locations,
|
||||
which refer to a stack locations from which the requested values must
|
||||
be loaded. Direct locations can communicate the address if an alloca,
|
||||
while Indirect locations handle register spills.
|
||||
|
||||
For example:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
entry:
|
||||
%a = alloca i64...
|
||||
llvm.experimental.stackmap(i64 <ID>, i32 <shadowBytes>, i64* %a)
|
||||
|
||||
The runtime can determine this alloca's relative location on the
|
||||
stack immediately after compilation, or at any time thereafter. This
|
||||
differs from Register and Indirect locations, because the runtime can
|
||||
only read the values in those locations when execution reaches the
|
||||
instruction address of the stack map.
|
||||
|
||||
This functionality requires LLVM to treat entry-block allocas
|
||||
specially when they are directly consumed by an intrinsics. (This is
|
||||
the same requirement imposed by the llvm.gcroot intrinsic.) LLVM
|
||||
transformations must not substitute the alloca with any intervening
|
||||
value. This can be verified by the runtime simply by checking that the
|
||||
stack map's location is a Direct location type.
|
|
@ -601,7 +601,7 @@ the classes multiple times yourself, e.g. by writing:
|
|||
...
|
||||
|
||||
A ``defm`` can also be used inside a multiclass providing several levels of
|
||||
multiclass instanciations.
|
||||
multiclass instantiations.
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
|
@ -727,7 +727,7 @@ opened, as in the case with the ``CALL*`` instructions above.
|
|||
|
||||
It's also possible to use "let" expressions inside multiclasses, providing more
|
||||
ways to factor out commonality from the records, specially if using several
|
||||
levels of multiclass instanciations. This also avoids the need of using "let"
|
||||
levels of multiclass instantiations. This also avoids the need of using "let"
|
||||
expressions within subsequent records inside a multiclass.
|
||||
|
||||
.. code-block:: llvm
|
||||
|
|
|
@ -238,6 +238,12 @@ For some targets, you also need to support the following methods:
|
|||
* ``getTargetLowering()``
|
||||
* ``getJITInfo()``
|
||||
|
||||
Some architectures, such as GPUs, do not support jumping to an arbitrary
|
||||
program location and implement branching using masked execution and loop using
|
||||
special instructions around the loop body. In order to avoid CFG modifications
|
||||
that introduce irreducible control flow not handled by such hardware, a target
|
||||
must call `setRequiresStructuredCFG(true)` when being initialized.
|
||||
|
||||
In addition, the ``XXXTargetMachine`` constructor should specify a
|
||||
``TargetDescription`` string that determines the data layout for the target
|
||||
machine, including characteristics such as pointer size, alignment, and
|
||||
|
|
|
@ -234,6 +234,7 @@ The following types have built-in support in YAML I/O:
|
|||
* float
|
||||
* double
|
||||
* StringRef
|
||||
* std::string
|
||||
* int64_t
|
||||
* int32_t
|
||||
* int16_t
|
||||
|
@ -640,12 +641,50 @@ The YAML syntax supports tags as a way to specify the type of a node before
|
|||
it is parsed. This allows dynamic types of nodes. But the YAML I/O model uses
|
||||
static typing, so there are limits to how you can use tags with the YAML I/O
|
||||
model. Recently, we added support to YAML I/O for checking/setting the optional
|
||||
tag on a map. Using this functionality it is even possbile to support differnt
|
||||
tag on a map. Using this functionality it is even possbile to support different
|
||||
mappings, as long as they are convertable.
|
||||
|
||||
To check a tag, inside your mapping() method you can use io.mapTag() to specify
|
||||
what the tag should be. This will also add that tag when writing yaml.
|
||||
|
||||
Validation
|
||||
----------
|
||||
|
||||
Sometimes in a yaml map, each key/value pair is valid, but the combination is
|
||||
not. This is similar to something having no syntax errors, but still having
|
||||
semantic errors. To support semantic level checking, YAML I/O allows
|
||||
an optional ``validate()`` method in a MappingTraits template specialization.
|
||||
|
||||
When parsing yaml, the ``validate()`` method is call *after* all key/values in
|
||||
the map have been processed. Any error message returned by the ``validate()``
|
||||
method during input will be printed just a like a syntax error would be printed.
|
||||
When writing yaml, the ``validate()`` method is called *before* the yaml
|
||||
key/values are written. Any error during output will trigger an ``assert()``
|
||||
because it is a programming error to have invalid struct values.
|
||||
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
using llvm::yaml::MappingTraits;
|
||||
using llvm::yaml::IO;
|
||||
|
||||
struct Stuff {
|
||||
...
|
||||
};
|
||||
|
||||
template <>
|
||||
struct MappingTraits<Stuff> {
|
||||
static void mapping(IO &io, Stuff &stuff) {
|
||||
...
|
||||
}
|
||||
static StringRef validate(IO &io, Stuff &stuff) {
|
||||
// Look at all fields in 'stuff' and if there
|
||||
// are any bad values return a string describing
|
||||
// the error. Otherwise return an empty string.
|
||||
return StringRef();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Sequence
|
||||
========
|
||||
|
|
|
@ -40,7 +40,7 @@ master_doc = 'index'
|
|||
|
||||
# General information about the project.
|
||||
project = u'LLVM'
|
||||
copyright = u'2003-2013, LLVM Project'
|
||||
copyright = u'2003-2014, LLVM Project'
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
# |version| and |release|, also used in various other places throughout the
|
||||
|
|
|
@ -280,10 +280,10 @@ TYPEDEF_HIDES_STRUCT = NO
|
|||
# For small to medium size projects (<1000 input files) the default value is
|
||||
# probably good enough. For larger projects a too small cache size can cause
|
||||
# doxygen to be busy swapping symbols to and from disk most of the time
|
||||
# causing a significant performance penality.
|
||||
# causing a significant performance penalty.
|
||||
# If the system has enough physical memory increasing the cache will improve the
|
||||
# performance by keeping more symbols in memory. Note that the value works on
|
||||
# a logarithmic scale so increasing the size by one will rougly double the
|
||||
# a logarithmic scale so increasing the size by one will roughly double the
|
||||
# memory usage. The cache size is given by this formula:
|
||||
# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
|
||||
# corresponding to a cache size of 2^16 = 65536 symbols
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
Generated on $datetime for <a href="http://llvm.org/">$projectname</a> by
|
||||
<a href="http://www.doxygen.org"><img src="doxygen.png" alt="Doxygen"
|
||||
align="middle" border="0"/>$doxygenversion</a><br>
|
||||
Copyright © 2003-2013 University of Illinois at Urbana-Champaign.
|
||||
Copyright © 2003-2014 University of Illinois at Urbana-Champaign.
|
||||
All Rights Reserved.</p>
|
||||
|
||||
<hr>
|
||||
|
|
|
@ -234,6 +234,7 @@ For API clients and LLVM developers.
|
|||
TableGen/LangRef
|
||||
HowToUseAttributes
|
||||
NVPTXUsage
|
||||
StackMaps
|
||||
|
||||
:doc:`WritingAnLLVMPass`
|
||||
Information on how to write LLVM transformations and analyses.
|
||||
|
@ -308,6 +309,9 @@ For API clients and LLVM developers.
|
|||
:doc:`NVPTXUsage`
|
||||
This document describes using the NVPTX back-end to compile GPU kernels.
|
||||
|
||||
:doc:`StackMaps`
|
||||
LLVM support for mapping instruction addresses to the location of
|
||||
values and allowing code to be patched.
|
||||
|
||||
Development Process Documentation
|
||||
=================================
|
||||
|
|
|
@ -1,4 +1,11 @@
|
|||
set(LLVM_LINK_COMPONENTS jit bitwriter nativecodegen interpreter)
|
||||
set(LLVM_LINK_COMPONENTS
|
||||
BitWriter
|
||||
Core
|
||||
ExecutionEngine
|
||||
JIT
|
||||
Support
|
||||
nativecodegen
|
||||
)
|
||||
|
||||
add_llvm_example(BrainF
|
||||
BrainF.cpp
|
||||
|
|
|
@ -1,4 +1,11 @@
|
|||
set(LLVM_LINK_COMPONENTS jit mcjit nativecodegen)
|
||||
set(LLVM_LINK_COMPONENTS
|
||||
Core
|
||||
ExecutionEngine
|
||||
MCJIT
|
||||
Support
|
||||
nativecodegen
|
||||
)
|
||||
|
||||
set(LLVM_REQUIRES_EH 1)
|
||||
|
||||
add_llvm_example(ExceptionDemo
|
||||
|
|
|
@ -1,4 +1,11 @@
|
|||
set(LLVM_LINK_COMPONENTS jit interpreter nativecodegen)
|
||||
set(LLVM_LINK_COMPONENTS
|
||||
Core
|
||||
ExecutionEngine
|
||||
Interpreter
|
||||
JIT
|
||||
Support
|
||||
nativecodegen
|
||||
)
|
||||
|
||||
add_llvm_example(Fibonacci
|
||||
fibonacci.cpp
|
||||
|
|
|
@ -1,4 +1,11 @@
|
|||
set(LLVM_LINK_COMPONENTS jit interpreter nativecodegen)
|
||||
set(LLVM_LINK_COMPONENTS
|
||||
Core
|
||||
ExecutionEngine
|
||||
Interpreter
|
||||
JIT
|
||||
Support
|
||||
nativecodegen
|
||||
)
|
||||
|
||||
add_llvm_example(HowToUseJIT
|
||||
HowToUseJIT.cpp
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
set(LLVM_LINK_COMPONENTS core)
|
||||
set(LLVM_LINK_COMPONENTS
|
||||
Core
|
||||
Support
|
||||
)
|
||||
|
||||
add_llvm_example(Kaleidoscope-Ch3
|
||||
toy.cpp
|
||||
|
|
|
@ -1,4 +1,13 @@
|
|||
set(LLVM_LINK_COMPONENTS core jit interpreter native)
|
||||
set(LLVM_LINK_COMPONENTS
|
||||
Analysis
|
||||
Core
|
||||
ExecutionEngine
|
||||
InstCombine
|
||||
JIT
|
||||
ScalarOpts
|
||||
Support
|
||||
nativecodegen
|
||||
)
|
||||
|
||||
add_llvm_example(Kaleidoscope-Ch4
|
||||
toy.cpp
|
||||
|
|
|
@ -1,4 +1,13 @@
|
|||
set(LLVM_LINK_COMPONENTS core jit interpreter native)
|
||||
set(LLVM_LINK_COMPONENTS
|
||||
Analysis
|
||||
Core
|
||||
ExecutionEngine
|
||||
InstCombine
|
||||
JIT
|
||||
ScalarOpts
|
||||
Support
|
||||
nativecodegen
|
||||
)
|
||||
|
||||
add_llvm_example(Kaleidoscope-Ch5
|
||||
toy.cpp
|
||||
|
|
|
@ -1,4 +1,13 @@
|
|||
set(LLVM_LINK_COMPONENTS core jit interpreter native)
|
||||
set(LLVM_LINK_COMPONENTS
|
||||
Analysis
|
||||
Core
|
||||
ExecutionEngine
|
||||
InstCombine
|
||||
JIT
|
||||
ScalarOpts
|
||||
Support
|
||||
nativecodegen
|
||||
)
|
||||
|
||||
add_llvm_example(Kaleidoscope-Ch6
|
||||
toy.cpp
|
||||
|
|
|
@ -1,4 +1,15 @@
|
|||
set(LLVM_LINK_COMPONENTS core jit interpreter native)
|
||||
set(LLVM_LINK_COMPONENTS
|
||||
Analysis
|
||||
Core
|
||||
ExecutionEngine
|
||||
InstCombine
|
||||
JIT
|
||||
ScalarOpts
|
||||
Support
|
||||
TransformUtils
|
||||
nativecodegen
|
||||
)
|
||||
|
||||
set(LLVM_REQUIRES_RTTI 1)
|
||||
|
||||
add_llvm_example(Kaleidoscope-Ch7
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
set(LLVM_LINK_COMPONENTS bitwriter)
|
||||
set(LLVM_LINK_COMPONENTS
|
||||
BitWriter
|
||||
Core
|
||||
Support
|
||||
)
|
||||
|
||||
add_llvm_example(ModuleMaker
|
||||
ModuleMaker.cpp
|
||||
|
|
|
@ -1,4 +1,11 @@
|
|||
set(LLVM_LINK_COMPONENTS jit interpreter nativecodegen)
|
||||
set(LLVM_LINK_COMPONENTS
|
||||
Core
|
||||
ExecutionEngine
|
||||
Interpreter
|
||||
JIT
|
||||
Support
|
||||
nativecodegen
|
||||
)
|
||||
|
||||
add_llvm_example(ParallelJIT
|
||||
ParallelJIT.cpp
|
||||
|
|
|
@ -167,7 +167,8 @@ typedef enum {
|
|||
LLVMAddressSafety = 1ULL << 32,
|
||||
LLVMStackProtectStrongAttribute = 1ULL<<33,
|
||||
LLVMCold = 1ULL << 34,
|
||||
LLVMOptimizeNone = 1ULL << 35
|
||||
LLVMOptimizeNone = 1ULL << 35,
|
||||
LLVMInAllocaAttribute = 1ULL << 36
|
||||
*/
|
||||
} LLVMAttribute;
|
||||
|
||||
|
@ -2663,7 +2664,9 @@ LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef, LLVMValueRef Val,
|
|||
const char *Name);
|
||||
LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef, LLVMValueRef LHS,
|
||||
LLVMValueRef RHS, const char *Name);
|
||||
LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op,
|
||||
LLVMValueRef LLVMBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering ordering,
|
||||
LLVMBool singleThread, const char *Name);
|
||||
LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B, LLVMAtomicRMWBinOp op,
|
||||
LLVMValueRef PTR, LLVMValueRef Val,
|
||||
LLVMAtomicOrdering ordering,
|
||||
LLVMBool singleThread);
|
||||
|
|
|
@ -41,6 +41,9 @@ void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM);
|
|||
/** See llvm::createDeadStoreEliminationPass function. */
|
||||
void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM);
|
||||
|
||||
/** See llvm::createScalarizerPass function. */
|
||||
void LLVMAddScalarizerPass(LLVMPassManagerRef PM);
|
||||
|
||||
/** See llvm::createGVNPass function. */
|
||||
void LLVMAddGVNPass(LLVMPassManagerRef PM);
|
||||
|
||||
|
|
|
@ -48,7 +48,7 @@ class BlockFrequencyImpl {
|
|||
|
||||
typedef GraphTraits< Inverse<BlockT *> > GT;
|
||||
|
||||
const uint32_t EntryFreq;
|
||||
static const uint64_t EntryFreq = 1 << 14;
|
||||
|
||||
std::string getBlockName(BasicBlock *BB) const {
|
||||
return BB->getName().str();
|
||||
|
@ -67,7 +67,8 @@ class BlockFrequencyImpl {
|
|||
|
||||
void setBlockFreq(BlockT *BB, BlockFrequency Freq) {
|
||||
Freqs[BB] = Freq;
|
||||
DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") = " << Freq << "\n");
|
||||
DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") = ";
|
||||
printBlockFreq(dbgs(), Freq) << "\n");
|
||||
}
|
||||
|
||||
/// getEdgeFreq - Return edge frequency based on SRC frequency and Src -> Dst
|
||||
|
@ -81,8 +82,9 @@ class BlockFrequencyImpl {
|
|||
///
|
||||
void incBlockFreq(BlockT *BB, BlockFrequency Freq) {
|
||||
Freqs[BB] += Freq;
|
||||
DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") += " << Freq
|
||||
<< " --> " << Freqs[BB] << "\n");
|
||||
DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") += ";
|
||||
printBlockFreq(dbgs(), Freq) << " --> ";
|
||||
printBlockFreq(dbgs(), Freqs[BB]) << "\n");
|
||||
}
|
||||
|
||||
// All blocks in postorder.
|
||||
|
@ -194,7 +196,8 @@ class BlockFrequencyImpl {
|
|||
typename LoopExitProbMap::const_iterator I = LoopExitProb.find(BB);
|
||||
assert(I != LoopExitProb.end() && "Loop header missing from table");
|
||||
Freqs[BB] /= I->second;
|
||||
DEBUG(dbgs() << "Loop header scaled to " << Freqs[BB] << ".\n");
|
||||
DEBUG(dbgs() << "Loop header scaled to ";
|
||||
printBlockFreq(dbgs(), Freqs[BB]) << ".\n");
|
||||
}
|
||||
|
||||
/// doLoop - Propagate block frequency down through the loop.
|
||||
|
@ -256,14 +259,15 @@ class BlockFrequencyImpl {
|
|||
BranchProbability LEP = BranchProbability(N, D);
|
||||
LoopExitProb.insert(std::make_pair(Head, LEP));
|
||||
DEBUG(dbgs() << "LoopExitProb[" << getBlockName(Head) << "] = " << LEP
|
||||
<< " from 1 - " << BackFreq << " / " << getBlockFreq(Head)
|
||||
<< ".\n");
|
||||
<< " from 1 - ";
|
||||
printBlockFreq(dbgs(), BackFreq) << " / ";
|
||||
printBlockFreq(dbgs(), getBlockFreq(Head)) << ".\n");
|
||||
}
|
||||
|
||||
friend class BlockFrequencyInfo;
|
||||
friend class MachineBlockFrequencyInfo;
|
||||
|
||||
BlockFrequencyImpl() : EntryFreq(BlockFrequency::getEntryFrequency()) { }
|
||||
BlockFrequencyImpl() { }
|
||||
|
||||
void doFunction(FunctionT *fn, BlockProbInfoT *bpi) {
|
||||
Fn = fn;
|
||||
|
@ -312,6 +316,9 @@ class BlockFrequencyImpl {
|
|||
}
|
||||
|
||||
public:
|
||||
|
||||
uint64_t getEntryFreq() { return EntryFreq; }
|
||||
|
||||
/// getBlockFreq - Return block frequency. Return 0 if we don't have it.
|
||||
BlockFrequency getBlockFreq(const BlockT *BB) const {
|
||||
typename DenseMap<const BlockT *, BlockFrequency>::const_iterator
|
||||
|
@ -325,14 +332,15 @@ public:
|
|||
OS << "\n\n---- Block Freqs ----\n";
|
||||
for (typename FunctionT::iterator I = Fn->begin(), E = Fn->end(); I != E;) {
|
||||
BlockT *BB = I++;
|
||||
OS << " " << getBlockName(BB) << " = " << getBlockFreq(BB) << "\n";
|
||||
OS << " " << getBlockName(BB) << " = ";
|
||||
printBlockFreq(OS, getBlockFreq(BB)) << "\n";
|
||||
|
||||
for (typename GraphTraits<BlockT *>::ChildIteratorType
|
||||
SI = GraphTraits<BlockT *>::child_begin(BB),
|
||||
SE = GraphTraits<BlockT *>::child_end(BB); SI != SE; ++SI) {
|
||||
BlockT *Succ = *SI;
|
||||
OS << " " << getBlockName(BB) << " -> " << getBlockName(Succ)
|
||||
<< " = " << getEdgeFreq(BB, Succ) << "\n";
|
||||
<< " = "; printBlockFreq(OS, getEdgeFreq(BB, Succ)) << "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -340,6 +348,30 @@ public:
|
|||
void dump() const {
|
||||
print(dbgs());
|
||||
}
|
||||
|
||||
// Utility method that looks up the block frequency associated with BB and
|
||||
// prints it to OS.
|
||||
raw_ostream &printBlockFreq(raw_ostream &OS,
|
||||
const BlockT *BB) {
|
||||
return printBlockFreq(OS, getBlockFreq(BB));
|
||||
}
|
||||
|
||||
raw_ostream &printBlockFreq(raw_ostream &OS,
|
||||
const BlockFrequency &Freq) const {
|
||||
// Convert fixed-point number to decimal.
|
||||
uint64_t Frequency = Freq.getFrequency();
|
||||
OS << Frequency / EntryFreq << ".";
|
||||
uint64_t Rem = Frequency % EntryFreq;
|
||||
uint64_t Eps = 1;
|
||||
do {
|
||||
Rem *= 10;
|
||||
Eps *= 10;
|
||||
OS << Rem / EntryFreq;
|
||||
Rem = Rem % EntryFreq;
|
||||
} while (Rem >= Eps/2);
|
||||
return OS;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -50,6 +50,17 @@ public:
|
|||
/// comparison to the other block frequencies. We do this to avoid using of
|
||||
/// floating points.
|
||||
BlockFrequency getBlockFreq(const BasicBlock *BB) const;
|
||||
|
||||
// Print the block frequency Freq to OS using the current functions entry
|
||||
// frequency to convert freq into a relative decimal form.
|
||||
raw_ostream &printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const;
|
||||
|
||||
// Convenience method that attempts to look up the frequency associated with
|
||||
// BB and print it to OS.
|
||||
raw_ostream &printBlockFreq(raw_ostream &OS, const BasicBlock *BB) const;
|
||||
|
||||
uint64_t getEntryFreq() const;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/Support/CFG.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/BranchProbability.h"
|
||||
|
@ -98,6 +99,9 @@ public:
|
|||
/// It is guaranteed to fall between 1 and UINT32_MAX.
|
||||
uint32_t getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const;
|
||||
|
||||
uint32_t getEdgeWeight(const BasicBlock *Src,
|
||||
succ_const_iterator Dst) const;
|
||||
|
||||
/// \brief Set the raw edge weight for a given edge.
|
||||
///
|
||||
/// This allows a pass to explicitly set the edge weight for an edge. It can
|
||||
|
|
|
@ -6,52 +6,54 @@
|
|||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This interface is used to build and manipulate a call graph, which is a very
|
||||
// useful tool for interprocedural optimization.
|
||||
//
|
||||
// Every function in a module is represented as a node in the call graph. The
|
||||
// callgraph node keeps track of which functions the are called by the function
|
||||
// corresponding to the node.
|
||||
//
|
||||
// A call graph may contain nodes where the function that they correspond to is
|
||||
// null. These 'external' nodes are used to represent control flow that is not
|
||||
// represented (or analyzable) in the module. In particular, this analysis
|
||||
// builds one external node such that:
|
||||
// 1. All functions in the module without internal linkage will have edges
|
||||
// from this external node, indicating that they could be called by
|
||||
// functions outside of the module.
|
||||
// 2. All functions whose address is used for something more than a direct
|
||||
// call, for example being stored into a memory location will also have an
|
||||
// edge from this external node. Since they may be called by an unknown
|
||||
// caller later, they must be tracked as such.
|
||||
//
|
||||
// There is a second external node added for calls that leave this module.
|
||||
// Functions have a call edge to the external node iff:
|
||||
// 1. The function is external, reflecting the fact that they could call
|
||||
// anything without internal linkage or that has its address taken.
|
||||
// 2. The function contains an indirect function call.
|
||||
//
|
||||
// As an extension in the future, there may be multiple nodes with a null
|
||||
// function. These will be used when we can prove (through pointer analysis)
|
||||
// that an indirect call site can call only a specific set of functions.
|
||||
//
|
||||
// Because of these properties, the CallGraph captures a conservative superset
|
||||
// of all of the caller-callee relationships, which is useful for
|
||||
// transformations.
|
||||
//
|
||||
// The CallGraph class also attempts to figure out what the root of the
|
||||
// CallGraph is, which it currently does by looking for a function named 'main'.
|
||||
// If no function named 'main' is found, the external node is used as the entry
|
||||
// node, reflecting the fact that any function without internal linkage could
|
||||
// be called into (which is common for libraries).
|
||||
//
|
||||
/// \file
|
||||
///
|
||||
/// This file provides interfaces used to build and manipulate a call graph,
|
||||
/// which is a very useful tool for interprocedural optimization.
|
||||
///
|
||||
/// Every function in a module is represented as a node in the call graph. The
|
||||
/// callgraph node keeps track of which functions the are called by the
|
||||
/// function corresponding to the node.
|
||||
///
|
||||
/// A call graph may contain nodes where the function that they correspond to
|
||||
/// is null. These 'external' nodes are used to represent control flow that is
|
||||
/// not represented (or analyzable) in the module. In particular, this
|
||||
/// analysis builds one external node such that:
|
||||
/// 1. All functions in the module without internal linkage will have edges
|
||||
/// from this external node, indicating that they could be called by
|
||||
/// functions outside of the module.
|
||||
/// 2. All functions whose address is used for something more than a direct
|
||||
/// call, for example being stored into a memory location will also have
|
||||
/// an edge from this external node. Since they may be called by an
|
||||
/// unknown caller later, they must be tracked as such.
|
||||
///
|
||||
/// There is a second external node added for calls that leave this module.
|
||||
/// Functions have a call edge to the external node iff:
|
||||
/// 1. The function is external, reflecting the fact that they could call
|
||||
/// anything without internal linkage or that has its address taken.
|
||||
/// 2. The function contains an indirect function call.
|
||||
///
|
||||
/// As an extension in the future, there may be multiple nodes with a null
|
||||
/// function. These will be used when we can prove (through pointer analysis)
|
||||
/// that an indirect call site can call only a specific set of functions.
|
||||
///
|
||||
/// Because of these properties, the CallGraph captures a conservative superset
|
||||
/// of all of the caller-callee relationships, which is useful for
|
||||
/// transformations.
|
||||
///
|
||||
/// The CallGraph class also attempts to figure out what the root of the
|
||||
/// CallGraph is, which it currently does by looking for a function named
|
||||
/// 'main'. If no function named 'main' is found, the external node is used as
|
||||
/// the entry node, reflecting the fact that any function without internal
|
||||
/// linkage could be called into (which is common for libraries).
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_ANALYSIS_CALLGRAPH_H
|
||||
#define LLVM_ANALYSIS_CALLGRAPH_H
|
||||
|
||||
#include "llvm/ADT/GraphTraits.h"
|
||||
#include "llvm/ADT/OwningPtr.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/Pass.h"
|
||||
|
@ -66,171 +68,142 @@ class Function;
|
|||
class Module;
|
||||
class CallGraphNode;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// CallGraph class definition
|
||||
//
|
||||
class CallGraph : public ModulePass {
|
||||
Module *Mod; // The module this call graph represents
|
||||
/// \brief The basic data container for the call graph of a \c Module of IR.
|
||||
///
|
||||
/// This class exposes both the interface to the call graph for a module of IR.
|
||||
///
|
||||
/// The core call graph itself can also be updated to reflect changes to the IR.
|
||||
class CallGraph {
|
||||
Module &M;
|
||||
|
||||
typedef std::map<const Function *, CallGraphNode *> FunctionMapTy;
|
||||
FunctionMapTy FunctionMap; // Map from a function to its node
|
||||
|
||||
// Root is root of the call graph, or the external node if a 'main' function
|
||||
// couldn't be found.
|
||||
//
|
||||
/// \brief A map from \c Function* to \c CallGraphNode*.
|
||||
FunctionMapTy FunctionMap;
|
||||
|
||||
/// \brief Root is root of the call graph, or the external node if a 'main'
|
||||
/// function couldn't be found.
|
||||
CallGraphNode *Root;
|
||||
|
||||
// ExternalCallingNode - This node has edges to all external functions and
|
||||
// those internal functions that have their address taken.
|
||||
/// \brief This node has edges to all external functions and those internal
|
||||
/// functions that have their address taken.
|
||||
CallGraphNode *ExternalCallingNode;
|
||||
|
||||
// CallsExternalNode - This node has edges to it from all functions making
|
||||
// indirect calls or calling an external function.
|
||||
/// \brief This node has edges to it from all functions making indirect calls
|
||||
/// or calling an external function.
|
||||
CallGraphNode *CallsExternalNode;
|
||||
|
||||
/// Replace the function represented by this node by another.
|
||||
/// \brief Replace the function represented by this node by another.
|
||||
///
|
||||
/// This does not rescan the body of the function, so it is suitable when
|
||||
/// splicing the body of one function to another while also updating all
|
||||
/// callers from the old function to the new.
|
||||
///
|
||||
void spliceFunction(const Function *From, const Function *To);
|
||||
|
||||
// Add a function to the call graph, and link the node to all of the functions
|
||||
// that it calls.
|
||||
/// \brief Add a function to the call graph, and link the node to all of the
|
||||
/// functions that it calls.
|
||||
void addToCallGraph(Function *F);
|
||||
|
||||
public:
|
||||
static char ID; // Class identification, replacement for typeinfo
|
||||
//===---------------------------------------------------------------------
|
||||
// Accessors.
|
||||
//
|
||||
CallGraph(Module &M);
|
||||
~CallGraph();
|
||||
|
||||
void print(raw_ostream &OS) const;
|
||||
void dump() const;
|
||||
|
||||
typedef FunctionMapTy::iterator iterator;
|
||||
typedef FunctionMapTy::const_iterator const_iterator;
|
||||
|
||||
/// getModule - Return the module the call graph corresponds to.
|
||||
///
|
||||
Module &getModule() const { return *Mod; }
|
||||
/// \brief Returns the module the call graph corresponds to.
|
||||
Module &getModule() const { return M; }
|
||||
|
||||
inline iterator begin() { return FunctionMap.begin(); }
|
||||
inline iterator end() { return FunctionMap.end(); }
|
||||
inline iterator begin() { return FunctionMap.begin(); }
|
||||
inline iterator end() { return FunctionMap.end(); }
|
||||
inline const_iterator begin() const { return FunctionMap.begin(); }
|
||||
inline const_iterator end() const { return FunctionMap.end(); }
|
||||
inline const_iterator end() const { return FunctionMap.end(); }
|
||||
|
||||
// Subscripting operators, return the call graph node for the provided
|
||||
// function
|
||||
/// \brief Returns the call graph node for the provided function.
|
||||
inline const CallGraphNode *operator[](const Function *F) const {
|
||||
const_iterator I = FunctionMap.find(F);
|
||||
assert(I != FunctionMap.end() && "Function not in callgraph!");
|
||||
return I->second;
|
||||
}
|
||||
|
||||
/// \brief Returns the call graph node for the provided function.
|
||||
inline CallGraphNode *operator[](const Function *F) {
|
||||
const_iterator I = FunctionMap.find(F);
|
||||
assert(I != FunctionMap.end() && "Function not in callgraph!");
|
||||
return I->second;
|
||||
}
|
||||
|
||||
/// Returns the CallGraphNode which is used to represent undetermined calls
|
||||
/// into the callgraph.
|
||||
/// \brief Returns the \c CallGraphNode which is used to represent
|
||||
/// undetermined calls into the callgraph.
|
||||
CallGraphNode *getExternalCallingNode() const { return ExternalCallingNode; }
|
||||
CallGraphNode *getCallsExternalNode() const { return CallsExternalNode; }
|
||||
|
||||
/// Return the root/main method in the module, or some other root node, such
|
||||
/// as the externalcallingnode.
|
||||
CallGraphNode *getRoot() { return Root; }
|
||||
const CallGraphNode *getRoot() const { return Root; }
|
||||
CallGraphNode *getCallsExternalNode() const { return CallsExternalNode; }
|
||||
|
||||
//===---------------------------------------------------------------------
|
||||
// Functions to keep a call graph up to date with a function that has been
|
||||
// modified.
|
||||
//
|
||||
|
||||
/// removeFunctionFromModule - Unlink the function from this module, returning
|
||||
/// it. Because this removes the function from the module, the call graph
|
||||
/// node is destroyed. This is only valid if the function does not call any
|
||||
/// other functions (ie, there are no edges in it's CGN). The easiest way to
|
||||
/// do this is to dropAllReferences before calling this.
|
||||
/// \brief Unlink the function from this module, returning it.
|
||||
///
|
||||
/// Because this removes the function from the module, the call graph node is
|
||||
/// destroyed. This is only valid if the function does not call any other
|
||||
/// functions (ie, there are no edges in it's CGN). The easiest way to do
|
||||
/// this is to dropAllReferences before calling this.
|
||||
Function *removeFunctionFromModule(CallGraphNode *CGN);
|
||||
|
||||
/// getOrInsertFunction - This method is identical to calling operator[], but
|
||||
/// it will insert a new CallGraphNode for the specified function if one does
|
||||
/// not already exist.
|
||||
/// \brief Similar to operator[], but this will insert a new CallGraphNode for
|
||||
/// \c F if one does not already exist.
|
||||
CallGraphNode *getOrInsertFunction(const Function *F);
|
||||
|
||||
CallGraph();
|
||||
virtual ~CallGraph() { releaseMemory(); }
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
|
||||
virtual bool runOnModule(Module &M);
|
||||
virtual void releaseMemory();
|
||||
|
||||
void print(raw_ostream &o, const Module *) const;
|
||||
void dump() const;
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// CallGraphNode class definition.
|
||||
//
|
||||
/// \brief A node in the call graph for a module.
|
||||
///
|
||||
/// Typically represents a function in the call graph. There are also special
|
||||
/// "null" nodes used to represent theoretical entries in the call graph.
|
||||
class CallGraphNode {
|
||||
friend class CallGraph;
|
||||
|
||||
AssertingVH<Function> F;
|
||||
|
||||
// CallRecord - This is a pair of the calling instruction (a call or invoke)
|
||||
// and the callgraph node being called.
|
||||
public:
|
||||
typedef std::pair<WeakVH, CallGraphNode*> CallRecord;
|
||||
private:
|
||||
std::vector<CallRecord> CalledFunctions;
|
||||
|
||||
/// NumReferences - This is the number of times that this CallGraphNode occurs
|
||||
/// in the CalledFunctions array of this or other CallGraphNodes.
|
||||
unsigned NumReferences;
|
||||
/// \brief A pair of the calling instruction (a call or invoke)
|
||||
/// and the call graph node being called.
|
||||
typedef std::pair<WeakVH, CallGraphNode *> CallRecord;
|
||||
|
||||
CallGraphNode(const CallGraphNode &) LLVM_DELETED_FUNCTION;
|
||||
void operator=(const CallGraphNode &) LLVM_DELETED_FUNCTION;
|
||||
|
||||
void DropRef() { --NumReferences; }
|
||||
void AddRef() { ++NumReferences; }
|
||||
public:
|
||||
typedef std::vector<CallRecord> CalledFunctionsVector;
|
||||
|
||||
|
||||
// CallGraphNode ctor - Create a node for the specified function.
|
||||
inline CallGraphNode(Function *f) : F(f), NumReferences(0) {}
|
||||
/// \brief Creates a node for the specified function.
|
||||
inline CallGraphNode(Function *F) : F(F), NumReferences(0) {}
|
||||
|
||||
~CallGraphNode() {
|
||||
assert(NumReferences == 0 && "Node deleted while references remain");
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------
|
||||
// Accessor methods.
|
||||
//
|
||||
|
||||
typedef std::vector<CallRecord>::iterator iterator;
|
||||
typedef std::vector<CallRecord>::const_iterator const_iterator;
|
||||
|
||||
// getFunction - Return the function that this call graph node represents.
|
||||
/// \brief Returns the function that this call graph node represents.
|
||||
Function *getFunction() const { return F; }
|
||||
|
||||
inline iterator begin() { return CalledFunctions.begin(); }
|
||||
inline iterator end() { return CalledFunctions.end(); }
|
||||
inline iterator end() { return CalledFunctions.end(); }
|
||||
inline const_iterator begin() const { return CalledFunctions.begin(); }
|
||||
inline const_iterator end() const { return CalledFunctions.end(); }
|
||||
inline const_iterator end() const { return CalledFunctions.end(); }
|
||||
inline bool empty() const { return CalledFunctions.empty(); }
|
||||
inline unsigned size() const { return (unsigned)CalledFunctions.size(); }
|
||||
|
||||
/// getNumReferences - Return the number of other CallGraphNodes in this
|
||||
/// CallGraph that reference this node in their callee list.
|
||||
/// \brief Returns the number of other CallGraphNodes in this CallGraph that
|
||||
/// reference this node in their callee list.
|
||||
unsigned getNumReferences() const { return NumReferences; }
|
||||
|
||||
// Subscripting operator - Return the i'th called function.
|
||||
//
|
||||
|
||||
/// \brief Returns the i'th called function.
|
||||
CallGraphNode *operator[](unsigned i) const {
|
||||
assert(i < CalledFunctions.size() && "Invalid index");
|
||||
return CalledFunctions[i].second;
|
||||
}
|
||||
|
||||
/// dump - Print out this call graph node.
|
||||
///
|
||||
/// \brief Print out this call graph node.
|
||||
void dump() const;
|
||||
void print(raw_ostream &OS) const;
|
||||
|
||||
|
@ -239,29 +212,25 @@ public:
|
|||
// modified
|
||||
//
|
||||
|
||||
/// removeAllCalledFunctions - As the name implies, this removes all edges
|
||||
/// from this CallGraphNode to any functions it calls.
|
||||
/// \brief Removes all edges from this CallGraphNode to any functions it
|
||||
/// calls.
|
||||
void removeAllCalledFunctions() {
|
||||
while (!CalledFunctions.empty()) {
|
||||
CalledFunctions.back().second->DropRef();
|
||||
CalledFunctions.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
/// stealCalledFunctionsFrom - Move all the callee information from N to this
|
||||
/// node.
|
||||
|
||||
/// \brief Moves all the callee information from N to this node.
|
||||
void stealCalledFunctionsFrom(CallGraphNode *N) {
|
||||
assert(CalledFunctions.empty() &&
|
||||
"Cannot steal callsite information if I already have some");
|
||||
std::swap(CalledFunctions, N->CalledFunctions);
|
||||
}
|
||||
|
||||
|
||||
/// addCalledFunction - Add a function to the list of functions called by this
|
||||
/// one.
|
||||
/// \brief Adds a function to the list of functions called by this one.
|
||||
void addCalledFunction(CallSite CS, CallGraphNode *M) {
|
||||
assert(!CS.getInstruction() ||
|
||||
!CS.getCalledFunction() ||
|
||||
assert(!CS.getInstruction() || !CS.getCalledFunction() ||
|
||||
!CS.getCalledFunction()->isIntrinsic());
|
||||
CalledFunctions.push_back(std::make_pair(CS.getInstruction(), M));
|
||||
M->AddRef();
|
||||
|
@ -272,32 +241,152 @@ public:
|
|||
*I = CalledFunctions.back();
|
||||
CalledFunctions.pop_back();
|
||||
}
|
||||
|
||||
|
||||
/// removeCallEdgeFor - This method removes the edge in the node for the
|
||||
/// specified call site. Note that this method takes linear time, so it
|
||||
/// should be used sparingly.
|
||||
|
||||
/// \brief Removes the edge in the node for the specified call site.
|
||||
///
|
||||
/// Note that this method takes linear time, so it should be used sparingly.
|
||||
void removeCallEdgeFor(CallSite CS);
|
||||
|
||||
/// removeAnyCallEdgeTo - This method removes all call edges from this node
|
||||
/// to the specified callee function. This takes more time to execute than
|
||||
/// removeCallEdgeTo, so it should not be used unless necessary.
|
||||
/// \brief Removes all call edges from this node to the specified callee
|
||||
/// function.
|
||||
///
|
||||
/// This takes more time to execute than removeCallEdgeTo, so it should not
|
||||
/// be used unless necessary.
|
||||
void removeAnyCallEdgeTo(CallGraphNode *Callee);
|
||||
|
||||
/// removeOneAbstractEdgeTo - Remove one edge associated with a null callsite
|
||||
/// from this node to the specified callee function.
|
||||
/// \brief Removes one edge associated with a null callsite from this node to
|
||||
/// the specified callee function.
|
||||
void removeOneAbstractEdgeTo(CallGraphNode *Callee);
|
||||
|
||||
/// replaceCallEdge - This method replaces the edge in the node for the
|
||||
/// specified call site with a new one. Note that this method takes linear
|
||||
/// time, so it should be used sparingly.
|
||||
|
||||
/// \brief Replaces the edge in the node for the specified call site with a
|
||||
/// new one.
|
||||
///
|
||||
/// Note that this method takes linear time, so it should be used sparingly.
|
||||
void replaceCallEdge(CallSite CS, CallSite NewCS, CallGraphNode *NewNode);
|
||||
|
||||
/// allReferencesDropped - This is a special function that should only be
|
||||
/// used by the CallGraph class.
|
||||
void allReferencesDropped() {
|
||||
NumReferences = 0;
|
||||
|
||||
private:
|
||||
friend class CallGraph;
|
||||
|
||||
AssertingVH<Function> F;
|
||||
|
||||
std::vector<CallRecord> CalledFunctions;
|
||||
|
||||
/// \brief The number of times that this CallGraphNode occurs in the
|
||||
/// CalledFunctions array of this or other CallGraphNodes.
|
||||
unsigned NumReferences;
|
||||
|
||||
CallGraphNode(const CallGraphNode &) LLVM_DELETED_FUNCTION;
|
||||
void operator=(const CallGraphNode &) LLVM_DELETED_FUNCTION;
|
||||
|
||||
void DropRef() { --NumReferences; }
|
||||
void AddRef() { ++NumReferences; }
|
||||
|
||||
/// \brief A special function that should only be used by the CallGraph class.
|
||||
void allReferencesDropped() { NumReferences = 0; }
|
||||
};
|
||||
|
||||
/// \brief An analysis pass to compute the \c CallGraph for a \c Module.
|
||||
///
|
||||
/// This class implements the concept of an analysis pass used by the \c
|
||||
/// ModuleAnalysisManager to run an analysis over a module and cache the
|
||||
/// resulting data.
|
||||
class CallGraphAnalysis {
|
||||
public:
|
||||
/// \brief A formulaic typedef to inform clients of the result type.
|
||||
typedef CallGraph Result;
|
||||
|
||||
static void *ID() { return (void *)&PassID; }
|
||||
|
||||
/// \brief Compute the \c CallGraph for the module \c M.
|
||||
///
|
||||
/// The real work here is done in the \c CallGraph constructor.
|
||||
CallGraph run(Module *M) { return CallGraph(*M); }
|
||||
|
||||
private:
|
||||
static char PassID;
|
||||
};
|
||||
|
||||
/// \brief The \c ModulePass which wraps up a \c CallGraph and the logic to
|
||||
/// build it.
|
||||
///
|
||||
/// This class exposes both the interface to the call graph container and the
|
||||
/// module pass which runs over a module of IR and produces the call graph. The
|
||||
/// call graph interface is entirelly a wrapper around a \c CallGraph object
|
||||
/// which is stored internally for each module.
|
||||
class CallGraphWrapperPass : public ModulePass {
|
||||
OwningPtr<CallGraph> G;
|
||||
|
||||
public:
|
||||
static char ID; // Class identification, replacement for typeinfo
|
||||
|
||||
CallGraphWrapperPass();
|
||||
virtual ~CallGraphWrapperPass();
|
||||
|
||||
/// \brief The internal \c CallGraph around which the rest of this interface
|
||||
/// is wrapped.
|
||||
const CallGraph &getCallGraph() const { return *G; }
|
||||
CallGraph &getCallGraph() { return *G; }
|
||||
|
||||
typedef CallGraph::iterator iterator;
|
||||
typedef CallGraph::const_iterator const_iterator;
|
||||
|
||||
/// \brief Returns the module the call graph corresponds to.
|
||||
Module &getModule() const { return G->getModule(); }
|
||||
|
||||
inline iterator begin() { return G->begin(); }
|
||||
inline iterator end() { return G->end(); }
|
||||
inline const_iterator begin() const { return G->begin(); }
|
||||
inline const_iterator end() const { return G->end(); }
|
||||
|
||||
/// \brief Returns the call graph node for the provided function.
|
||||
inline const CallGraphNode *operator[](const Function *F) const {
|
||||
return (*G)[F];
|
||||
}
|
||||
|
||||
/// \brief Returns the call graph node for the provided function.
|
||||
inline CallGraphNode *operator[](const Function *F) { return (*G)[F]; }
|
||||
|
||||
/// \brief Returns the \c CallGraphNode which is used to represent
|
||||
/// undetermined calls into the callgraph.
|
||||
CallGraphNode *getExternalCallingNode() const {
|
||||
return G->getExternalCallingNode();
|
||||
}
|
||||
|
||||
CallGraphNode *getCallsExternalNode() const {
|
||||
return G->getCallsExternalNode();
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------
|
||||
// Functions to keep a call graph up to date with a function that has been
|
||||
// modified.
|
||||
//
|
||||
|
||||
/// \brief Unlink the function from this module, returning it.
|
||||
///
|
||||
/// Because this removes the function from the module, the call graph node is
|
||||
/// destroyed. This is only valid if the function does not call any other
|
||||
/// functions (ie, there are no edges in it's CGN). The easiest way to do
|
||||
/// this is to dropAllReferences before calling this.
|
||||
Function *removeFunctionFromModule(CallGraphNode *CGN) {
|
||||
return G->removeFunctionFromModule(CGN);
|
||||
}
|
||||
|
||||
/// \brief Similar to operator[], but this will insert a new CallGraphNode for
|
||||
/// \c F if one does not already exist.
|
||||
CallGraphNode *getOrInsertFunction(const Function *F) {
|
||||
return G->getOrInsertFunction(F);
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------
|
||||
// Implementation of the ModulePass interface needed here.
|
||||
//
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
|
||||
virtual bool runOnModule(Module &M);
|
||||
virtual void releaseMemory();
|
||||
|
||||
void print(raw_ostream &o, const Module *) const;
|
||||
void dump() const;
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -307,11 +396,12 @@ public:
|
|||
|
||||
// Provide graph traits for tranversing call graphs using standard graph
|
||||
// traversals.
|
||||
template <> struct GraphTraits<CallGraphNode*> {
|
||||
template <> struct GraphTraits<CallGraphNode *> {
|
||||
typedef CallGraphNode NodeType;
|
||||
|
||||
typedef CallGraphNode::CallRecord CGNPairTy;
|
||||
typedef std::pointer_to_unary_function<CGNPairTy, CallGraphNode*> CGNDerefFun;
|
||||
typedef std::pointer_to_unary_function<CGNPairTy, CallGraphNode *>
|
||||
CGNDerefFun;
|
||||
|
||||
static NodeType *getEntryNode(CallGraphNode *CGN) { return CGN; }
|
||||
|
||||
|
@ -320,55 +410,54 @@ template <> struct GraphTraits<CallGraphNode*> {
|
|||
static inline ChildIteratorType child_begin(NodeType *N) {
|
||||
return map_iterator(N->begin(), CGNDerefFun(CGNDeref));
|
||||
}
|
||||
static inline ChildIteratorType child_end (NodeType *N) {
|
||||
static inline ChildIteratorType child_end(NodeType *N) {
|
||||
return map_iterator(N->end(), CGNDerefFun(CGNDeref));
|
||||
}
|
||||
|
||||
static CallGraphNode *CGNDeref(CGNPairTy P) {
|
||||
return P.second;
|
||||
}
|
||||
|
||||
static CallGraphNode *CGNDeref(CGNPairTy P) { return P.second; }
|
||||
};
|
||||
|
||||
template <> struct GraphTraits<const CallGraphNode*> {
|
||||
template <> struct GraphTraits<const CallGraphNode *> {
|
||||
typedef const CallGraphNode NodeType;
|
||||
typedef NodeType::const_iterator ChildIteratorType;
|
||||
|
||||
static NodeType *getEntryNode(const CallGraphNode *CGN) { return CGN; }
|
||||
static inline ChildIteratorType child_begin(NodeType *N) { return N->begin();}
|
||||
static inline ChildIteratorType child_end (NodeType *N) { return N->end(); }
|
||||
static inline ChildIteratorType child_begin(NodeType *N) {
|
||||
return N->begin();
|
||||
}
|
||||
static inline ChildIteratorType child_end(NodeType *N) { return N->end(); }
|
||||
};
|
||||
|
||||
template<> struct GraphTraits<CallGraph*> : public GraphTraits<CallGraphNode*> {
|
||||
template <>
|
||||
struct GraphTraits<CallGraph *> : public GraphTraits<CallGraphNode *> {
|
||||
static NodeType *getEntryNode(CallGraph *CGN) {
|
||||
return CGN->getExternalCallingNode(); // Start at the external node!
|
||||
return CGN->getExternalCallingNode(); // Start at the external node!
|
||||
}
|
||||
typedef std::pair<const Function*, CallGraphNode*> PairTy;
|
||||
typedef std::pointer_to_unary_function<PairTy, CallGraphNode&> DerefFun;
|
||||
typedef std::pair<const Function *, CallGraphNode *> PairTy;
|
||||
typedef std::pointer_to_unary_function<PairTy, CallGraphNode &> DerefFun;
|
||||
|
||||
// nodes_iterator/begin/end - Allow iteration over all nodes in the graph
|
||||
typedef mapped_iterator<CallGraph::iterator, DerefFun> nodes_iterator;
|
||||
static nodes_iterator nodes_begin(CallGraph *CG) {
|
||||
return map_iterator(CG->begin(), DerefFun(CGdereference));
|
||||
}
|
||||
static nodes_iterator nodes_end (CallGraph *CG) {
|
||||
static nodes_iterator nodes_end(CallGraph *CG) {
|
||||
return map_iterator(CG->end(), DerefFun(CGdereference));
|
||||
}
|
||||
|
||||
static CallGraphNode &CGdereference(PairTy P) {
|
||||
return *P.second;
|
||||
}
|
||||
static CallGraphNode &CGdereference(PairTy P) { return *P.second; }
|
||||
};
|
||||
|
||||
template<> struct GraphTraits<const CallGraph*> :
|
||||
public GraphTraits<const CallGraphNode*> {
|
||||
template <>
|
||||
struct GraphTraits<const CallGraph *> : public GraphTraits<
|
||||
const CallGraphNode *> {
|
||||
static NodeType *getEntryNode(const CallGraph *CGN) {
|
||||
return CGN->getExternalCallingNode();
|
||||
}
|
||||
// nodes_iterator/begin/end - Allow iteration over all nodes in the graph
|
||||
typedef CallGraph::const_iterator nodes_iterator;
|
||||
static nodes_iterator nodes_begin(const CallGraph *CG) { return CG->begin(); }
|
||||
static nodes_iterator nodes_end (const CallGraph *CG) { return CG->end(); }
|
||||
static nodes_iterator nodes_end(const CallGraph *CG) { return CG->end(); }
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
|
|
@ -19,50 +19,62 @@
|
|||
|
||||
namespace llvm {
|
||||
|
||||
template <class Analysis, bool Simple>
|
||||
/// \brief Default traits class for extracting a graph from an analysis pass.
|
||||
///
|
||||
/// This assumes that 'GraphT' is 'AnalysisT *' and so just passes it through.
|
||||
template <typename AnalysisT, typename GraphT = AnalysisT *>
|
||||
struct DefaultAnalysisGraphTraits {
|
||||
static GraphT getGraph(AnalysisT *A) { return A; }
|
||||
};
|
||||
|
||||
template <
|
||||
typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *,
|
||||
typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT> >
|
||||
class DOTGraphTraitsViewer : public FunctionPass {
|
||||
public:
|
||||
DOTGraphTraitsViewer(StringRef GraphName, char &ID)
|
||||
: FunctionPass(ID), Name(GraphName) {}
|
||||
: FunctionPass(ID), Name(GraphName) {}
|
||||
|
||||
virtual bool runOnFunction(Function &F) {
|
||||
Analysis *Graph = &getAnalysis<Analysis>();
|
||||
std::string GraphName = DOTGraphTraits<Analysis*>::getGraphName(Graph);
|
||||
GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis<AnalysisT>());
|
||||
std::string GraphName = DOTGraphTraits<GraphT>::getGraphName(Graph);
|
||||
std::string Title = GraphName + " for '" + F.getName().str() + "' function";
|
||||
|
||||
ViewGraph(Graph, Name, Simple, Title);
|
||||
ViewGraph(Graph, Name, IsSimple, Title);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesAll();
|
||||
AU.addRequired<Analysis>();
|
||||
AU.addRequired<AnalysisT>();
|
||||
}
|
||||
|
||||
private:
|
||||
std::string Name;
|
||||
};
|
||||
|
||||
template <class Analysis, bool Simple>
|
||||
template <
|
||||
typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *,
|
||||
typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT> >
|
||||
class DOTGraphTraitsPrinter : public FunctionPass {
|
||||
public:
|
||||
DOTGraphTraitsPrinter(StringRef GraphName, char &ID)
|
||||
: FunctionPass(ID), Name(GraphName) {}
|
||||
: FunctionPass(ID), Name(GraphName) {}
|
||||
|
||||
virtual bool runOnFunction(Function &F) {
|
||||
Analysis *Graph = &getAnalysis<Analysis>();
|
||||
GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis<AnalysisT>());
|
||||
std::string Filename = Name + "." + F.getName().str() + ".dot";
|
||||
std::string ErrorInfo;
|
||||
|
||||
errs() << "Writing '" << Filename << "'...";
|
||||
|
||||
raw_fd_ostream File(Filename.c_str(), ErrorInfo);
|
||||
std::string GraphName = DOTGraphTraits<Analysis*>::getGraphName(Graph);
|
||||
std::string GraphName = DOTGraphTraits<GraphT>::getGraphName(Graph);
|
||||
std::string Title = GraphName + " for '" + F.getName().str() + "' function";
|
||||
|
||||
if (ErrorInfo.empty())
|
||||
WriteGraph(File, Graph, Simple, Title);
|
||||
WriteGraph(File, Graph, IsSimple, Title);
|
||||
else
|
||||
errs() << " error opening file for writing!";
|
||||
errs() << "\n";
|
||||
|
@ -72,55 +84,59 @@ public:
|
|||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesAll();
|
||||
AU.addRequired<Analysis>();
|
||||
AU.addRequired<AnalysisT>();
|
||||
}
|
||||
|
||||
private:
|
||||
std::string Name;
|
||||
};
|
||||
|
||||
template <class Analysis, bool Simple>
|
||||
template <
|
||||
typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *,
|
||||
typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT> >
|
||||
class DOTGraphTraitsModuleViewer : public ModulePass {
|
||||
public:
|
||||
DOTGraphTraitsModuleViewer(StringRef GraphName, char &ID)
|
||||
: ModulePass(ID), Name(GraphName) {}
|
||||
: ModulePass(ID), Name(GraphName) {}
|
||||
|
||||
virtual bool runOnModule(Module &M) {
|
||||
Analysis *Graph = &getAnalysis<Analysis>();
|
||||
std::string Title = DOTGraphTraits<Analysis*>::getGraphName(Graph);
|
||||
GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis<AnalysisT>());
|
||||
std::string Title = DOTGraphTraits<GraphT>::getGraphName(Graph);
|
||||
|
||||
ViewGraph(Graph, Name, Simple, Title);
|
||||
ViewGraph(Graph, Name, IsSimple, Title);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesAll();
|
||||
AU.addRequired<Analysis>();
|
||||
AU.addRequired<AnalysisT>();
|
||||
}
|
||||
|
||||
private:
|
||||
std::string Name;
|
||||
};
|
||||
|
||||
template <class Analysis, bool Simple>
|
||||
template <
|
||||
typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *,
|
||||
typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT> >
|
||||
class DOTGraphTraitsModulePrinter : public ModulePass {
|
||||
public:
|
||||
DOTGraphTraitsModulePrinter(StringRef GraphName, char &ID)
|
||||
: ModulePass(ID), Name(GraphName) {}
|
||||
: ModulePass(ID), Name(GraphName) {}
|
||||
|
||||
virtual bool runOnModule(Module &M) {
|
||||
Analysis *Graph = &getAnalysis<Analysis>();
|
||||
GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis<AnalysisT>());
|
||||
std::string Filename = Name + ".dot";
|
||||
std::string ErrorInfo;
|
||||
|
||||
errs() << "Writing '" << Filename << "'...";
|
||||
|
||||
raw_fd_ostream File(Filename.c_str(), ErrorInfo);
|
||||
std::string Title = DOTGraphTraits<Analysis*>::getGraphName(Graph);
|
||||
std::string Title = DOTGraphTraits<GraphT>::getGraphName(Graph);
|
||||
|
||||
if (ErrorInfo.empty())
|
||||
WriteGraph(File, Graph, Simple, Title);
|
||||
WriteGraph(File, Graph, IsSimple, Title);
|
||||
else
|
||||
errs() << " error opening file for writing!";
|
||||
errs() << "\n";
|
||||
|
@ -130,7 +146,7 @@ public:
|
|||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesAll();
|
||||
AU.addRequired<Analysis>();
|
||||
AU.addRequired<AnalysisT>();
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
|
@ -346,12 +346,14 @@ public:
|
|||
DomTreeNodeBase<NodeT> *getRootNode() { return RootNode; }
|
||||
const DomTreeNodeBase<NodeT> *getRootNode() const { return RootNode; }
|
||||
|
||||
/// Get all nodes dominated by R, including R itself. Return true on success.
|
||||
/// Get all nodes dominated by R, including R itself.
|
||||
void getDescendants(NodeT *R, SmallVectorImpl<NodeT *> &Result) const {
|
||||
Result.clear();
|
||||
const DomTreeNodeBase<NodeT> *RN = getNode(R);
|
||||
if (RN == NULL)
|
||||
return; // If R is unreachable, it will not be present in the DOM tree.
|
||||
SmallVector<const DomTreeNodeBase<NodeT> *, 8> WL;
|
||||
WL.push_back(RN);
|
||||
Result.clear();
|
||||
|
||||
while (!WL.empty()) {
|
||||
const DomTreeNodeBase<NodeT> *N = WL.pop_back_val();
|
||||
|
@ -769,7 +771,7 @@ public:
|
|||
return DT->getRootNode();
|
||||
}
|
||||
|
||||
/// Get all nodes dominated by R, including R itself. Return true on success.
|
||||
/// Get all nodes dominated by R, including R itself.
|
||||
void getDescendants(BasicBlock *R,
|
||||
SmallVectorImpl<BasicBlock *> &Result) const {
|
||||
DT->getDescendants(R, Result);
|
||||
|
|
|
@ -34,7 +34,7 @@ namespace llvm {
|
|||
// IntervalPartition - This class builds and holds an "interval partition" for
|
||||
// a function. This partition divides the control flow graph into a set of
|
||||
// maximal intervals, as defined with the properties above. Intuitively, an
|
||||
// interval is a (possibly nonexistent) loop with a "tail" of non looping
|
||||
// interval is a (possibly nonexistent) loop with a "tail" of non-looping
|
||||
// nodes following it.
|
||||
//
|
||||
class IntervalPartition : public FunctionPass {
|
||||
|
|
|
@ -33,8 +33,10 @@
|
|||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/ADT/GraphTraits.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/Analysis/Dominators.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/Support/CFG.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include <algorithm>
|
||||
|
||||
|
@ -53,6 +55,7 @@ class Loop;
|
|||
class MDNode;
|
||||
class PHINode;
|
||||
class raw_ostream;
|
||||
template<class N> class DominatorTreeBase;
|
||||
template<class N, class M> class LoopInfoBase;
|
||||
template<class N, class M> class LoopBase;
|
||||
|
||||
|
@ -228,6 +231,18 @@ public:
|
|||
/// A latch block is a block that contains a branch back to the header.
|
||||
BlockT *getLoopLatch() const;
|
||||
|
||||
/// getLoopLatches - Return all loop latch blocks of this loop. A latch block
|
||||
/// is a block that contains a branch back to the header.
|
||||
void getLoopLatches(SmallVectorImpl<BlockT *> &LoopLatches) const {
|
||||
BlockT *H = getHeader();
|
||||
typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
|
||||
for (typename InvBlockTraits::ChildIteratorType I =
|
||||
InvBlockTraits::child_begin(H),
|
||||
E = InvBlockTraits::child_end(H); I != E; ++I)
|
||||
if (contains(*I))
|
||||
LoopLatches.push_back(*I);
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// APIs for updating loop information after changing the CFG
|
||||
//
|
||||
|
|
|
@ -15,8 +15,10 @@
|
|||
#ifndef LLVM_ANALYSIS_LOOPINFOIMPL_H
|
||||
#define LLVM_ANALYSIS_LOOPINFOIMPL_H
|
||||
|
||||
#include "llvm/ADT/DepthFirstIterator.h"
|
||||
#include "llvm/ADT/PostOrderIterator.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/Analysis/Dominators.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
|
|
|
@ -190,6 +190,8 @@ public:
|
|||
return knownSize(SizeOffset) && knownOffset(SizeOffset);
|
||||
}
|
||||
|
||||
// These are "private", except they can't actually be made private. Only
|
||||
// compute() should be used by external users.
|
||||
SizeOffsetType visitAllocaInst(AllocaInst &I);
|
||||
SizeOffsetType visitArgument(Argument &A);
|
||||
SizeOffsetType visitCallSite(CallSite CS);
|
||||
|
@ -256,6 +258,7 @@ public:
|
|||
return knownSize(SizeOffset) && knownOffset(SizeOffset);
|
||||
}
|
||||
|
||||
// The individual instruction visitors should be treated as private.
|
||||
SizeOffsetEvalType visitAllocaInst(AllocaInst &I);
|
||||
SizeOffsetEvalType visitCallSite(CallSite CS);
|
||||
SizeOffsetEvalType visitExtractElementInst(ExtractElementInst &I);
|
||||
|
|
|
@ -79,6 +79,12 @@ struct PostDominatorTree : public FunctionPass {
|
|||
return DT->findNearestCommonDominator(A, B);
|
||||
}
|
||||
|
||||
/// Get all nodes post-dominated by R, including R itself.
|
||||
void getDescendants(BasicBlock *R,
|
||||
SmallVectorImpl<BasicBlock *> &Result) const {
|
||||
DT->getDescendants(R, Result);
|
||||
}
|
||||
|
||||
virtual void releaseMemory() {
|
||||
DT->releaseMemory();
|
||||
}
|
||||
|
|
|
@ -312,11 +312,11 @@ public:
|
|||
/// The toplevel region represents the whole function.
|
||||
bool isTopLevelRegion() const { return exit == NULL; }
|
||||
|
||||
/// @brief Return a new (non canonical) region, that is obtained by joining
|
||||
/// @brief Return a new (non-canonical) region, that is obtained by joining
|
||||
/// this region with its predecessors.
|
||||
///
|
||||
/// @return A region also starting at getEntry(), but reaching to the next
|
||||
/// basic block that forms with getEntry() a (non canonical) region.
|
||||
/// basic block that forms with getEntry() a (non-canonical) region.
|
||||
/// NULL if such a basic block does not exist.
|
||||
Region *getExpandedRegion() const;
|
||||
|
||||
|
|
|
@ -370,7 +370,8 @@ namespace bitc {
|
|||
ATTR_KIND_Z_EXT = 34,
|
||||
ATTR_KIND_BUILTIN = 35,
|
||||
ATTR_KIND_COLD = 36,
|
||||
ATTR_KIND_OPTIMIZE_NONE = 37
|
||||
ATTR_KIND_OPTIMIZE_NONE = 37,
|
||||
ATTR_KIND_IN_ALLOCA = 38
|
||||
};
|
||||
|
||||
} // End bitc namespace
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "llvm/Support/ErrorHandling.h"
|
||||
|
||||
namespace llvm {
|
||||
class AsmPrinterHandler;
|
||||
class BlockAddress;
|
||||
class GCStrategy;
|
||||
class Constant;
|
||||
|
@ -110,13 +111,21 @@ namespace llvm {
|
|||
/// function.
|
||||
MachineLoopInfo *LI;
|
||||
|
||||
struct HandlerInfo {
|
||||
AsmPrinterHandler *Handler;
|
||||
const char *TimerName, *TimerGroupName;
|
||||
HandlerInfo(AsmPrinterHandler *Handler, const char *TimerName,
|
||||
const char *TimerGroupName)
|
||||
: Handler(Handler), TimerName(TimerName),
|
||||
TimerGroupName(TimerGroupName) {}
|
||||
};
|
||||
/// Handlers - a vector of all debug/EH info emitters we should use.
|
||||
/// This vector maintains ownership of the emitters.
|
||||
SmallVector<HandlerInfo, 1> Handlers;
|
||||
|
||||
/// DD - If the target supports dwarf debug info, this pointer is non-null.
|
||||
DwarfDebug *DD;
|
||||
|
||||
/// DE - If the target supports dwarf exception info, this pointer is
|
||||
/// non-null.
|
||||
DwarfException *DE;
|
||||
|
||||
protected:
|
||||
explicit AsmPrinter(TargetMachine &TM, MCStreamer &Streamer);
|
||||
|
||||
|
@ -200,11 +209,6 @@ namespace llvm {
|
|||
|
||||
bool needsSEHMoves();
|
||||
|
||||
/// needsRelocationsForDwarfStringPool - Specifies whether the object format
|
||||
/// expects to use relocations to refer to debug entries. Alternatively we
|
||||
/// emit section offsets in bytes from the start of the string pool.
|
||||
bool needsRelocationsForDwarfStringPool() const;
|
||||
|
||||
/// EmitConstantPool - Print to the current output stream assembly
|
||||
/// representations of the constants in the constant pool MCP. This is
|
||||
/// used to print out constants which have been "spilled to memory" by
|
||||
|
@ -304,13 +308,10 @@ namespace llvm {
|
|||
/// stem.
|
||||
MCSymbol *GetTempSymbol(StringRef Name) const;
|
||||
|
||||
|
||||
/// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with
|
||||
/// global value name as its base, with the specified suffix, and where the
|
||||
/// symbol is forced to have private linkage if ForcePrivate is true.
|
||||
MCSymbol *GetSymbolWithGlobalValueBase(const GlobalValue *GV,
|
||||
StringRef Suffix,
|
||||
bool ForcePrivate = true) const;
|
||||
/// Return the MCSymbol for a private symbol with global value name as its
|
||||
/// base, with the specified suffix.
|
||||
MCSymbol *getSymbolWithGlobalValueBase(const GlobalValue *GV,
|
||||
StringRef Suffix) const;
|
||||
|
||||
/// GetExternalSymbolSymbol - Return the MCSymbol for the specified
|
||||
/// ExternalSymbol.
|
||||
|
|
|
@ -603,7 +603,7 @@ namespace ISD {
|
|||
/// This corresponds to "load atomic" instruction.
|
||||
ATOMIC_LOAD,
|
||||
|
||||
/// OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr, val)
|
||||
/// OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val)
|
||||
/// This corresponds to "store atomic" instruction.
|
||||
ATOMIC_STORE,
|
||||
|
||||
|
|
|
@ -44,32 +44,35 @@ typedef std::pair<const MachineInstr *, const MachineInstr *> InsnRange;
|
|||
///
|
||||
class LexicalScopes {
|
||||
public:
|
||||
LexicalScopes() : MF(NULL), CurrentFnLexicalScope(NULL) { }
|
||||
virtual ~LexicalScopes();
|
||||
LexicalScopes() : MF(NULL), CurrentFnLexicalScope(NULL) {}
|
||||
~LexicalScopes();
|
||||
|
||||
/// initialize - Scan machine function and constuct lexical scope nest.
|
||||
virtual void initialize(const MachineFunction &);
|
||||
/// initialize - Scan machine function and constuct lexical scope nest, resets
|
||||
/// the instance if necessary.
|
||||
void initialize(const MachineFunction &);
|
||||
|
||||
/// releaseMemory - release memory.
|
||||
virtual void releaseMemory();
|
||||
|
||||
void reset();
|
||||
|
||||
/// empty - Return true if there is any lexical scope information available.
|
||||
bool empty() { return CurrentFnLexicalScope == NULL; }
|
||||
|
||||
/// isCurrentFunctionScope - Return true if given lexical scope represents
|
||||
/// isCurrentFunctionScope - Return true if given lexical scope represents
|
||||
/// current function.
|
||||
bool isCurrentFunctionScope(const LexicalScope *LS) {
|
||||
bool isCurrentFunctionScope(const LexicalScope *LS) {
|
||||
return LS == CurrentFnLexicalScope;
|
||||
}
|
||||
|
||||
/// getCurrentFunctionScope - Return lexical scope for the current function.
|
||||
LexicalScope *getCurrentFunctionScope() const { return CurrentFnLexicalScope;}
|
||||
LexicalScope *getCurrentFunctionScope() const {
|
||||
return CurrentFnLexicalScope;
|
||||
}
|
||||
|
||||
/// getMachineBasicBlocks - Populate given set using machine basic blocks
|
||||
/// which have machine instructions that belong to lexical scope identified by
|
||||
/// DebugLoc.
|
||||
void getMachineBasicBlocks(DebugLoc DL,
|
||||
SmallPtrSet<const MachineBasicBlock*, 4> &MBBs);
|
||||
SmallPtrSet<const MachineBasicBlock *, 4> &MBBs);
|
||||
|
||||
/// dominates - Return true if DebugLoc's lexical scope dominates at least one
|
||||
/// machine instruction's lexical scope in a given machine basic block.
|
||||
|
@ -104,7 +107,6 @@ public:
|
|||
void dump();
|
||||
|
||||
private:
|
||||
|
||||
/// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If
|
||||
/// not available then create new lexical scope.
|
||||
LexicalScope *getOrCreateLexicalScope(DebugLoc DL);
|
||||
|
@ -123,8 +125,9 @@ private:
|
|||
void extractLexicalScopes(SmallVectorImpl<InsnRange> &MIRanges,
|
||||
DenseMap<const MachineInstr *, LexicalScope *> &M);
|
||||
void constructScopeNest(LexicalScope *Scope);
|
||||
void assignInstructionRanges(SmallVectorImpl<InsnRange> &MIRanges,
|
||||
DenseMap<const MachineInstr *, LexicalScope *> &M);
|
||||
void
|
||||
assignInstructionRanges(SmallVectorImpl<InsnRange> &MIRanges,
|
||||
DenseMap<const MachineInstr *, LexicalScope *> &M);
|
||||
|
||||
private:
|
||||
const MachineFunction *MF;
|
||||
|
@ -133,10 +136,11 @@ private:
|
|||
/// contained LexicalScope*s.
|
||||
DenseMap<const MDNode *, LexicalScope *> LexicalScopeMap;
|
||||
|
||||
/// InlinedLexicalScopeMap - Tracks inlined function scopes in current function.
|
||||
/// InlinedLexicalScopeMap - Tracks inlined function scopes in current
|
||||
/// function.
|
||||
DenseMap<DebugLoc, LexicalScope *> InlinedLexicalScopeMap;
|
||||
|
||||
/// AbstractScopeMap - These scopes are not included LexicalScopeMap.
|
||||
/// AbstractScopeMap - These scopes are not included LexicalScopeMap.
|
||||
/// AbstractScopes owns its LexicalScope*s.
|
||||
DenseMap<const MDNode *, LexicalScope *> AbstractScopeMap;
|
||||
|
||||
|
@ -153,26 +157,23 @@ private:
|
|||
/// LexicalScope - This class is used to track scope information.
|
||||
///
|
||||
class LexicalScope {
|
||||
virtual void anchor();
|
||||
|
||||
public:
|
||||
LexicalScope(LexicalScope *P, const MDNode *D, const MDNode *I, bool A)
|
||||
: Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(A),
|
||||
LastInsn(0), FirstInsn(0), DFSIn(0), DFSOut(0) {
|
||||
: Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(A), LastInsn(0),
|
||||
FirstInsn(0), DFSIn(0), DFSOut(0) {
|
||||
if (Parent)
|
||||
Parent->addChild(this);
|
||||
}
|
||||
|
||||
virtual ~LexicalScope() {}
|
||||
|
||||
// Accessors.
|
||||
LexicalScope *getParent() const { return Parent; }
|
||||
const MDNode *getDesc() const { return Desc; }
|
||||
const MDNode *getInlinedAt() const { return InlinedAtLocation; }
|
||||
const MDNode *getScopeNode() const { return Desc; }
|
||||
bool isAbstractScope() const { return AbstractScope; }
|
||||
LexicalScope *getParent() const { return Parent; }
|
||||
const MDNode *getDesc() const { return Desc; }
|
||||
const MDNode *getInlinedAt() const { return InlinedAtLocation; }
|
||||
const MDNode *getScopeNode() const { return Desc; }
|
||||
bool isAbstractScope() const { return AbstractScope; }
|
||||
SmallVectorImpl<LexicalScope *> &getChildren() { return Children; }
|
||||
SmallVectorImpl<InsnRange> &getRanges() { return Ranges; }
|
||||
SmallVectorImpl<InsnRange> &getRanges() { return Ranges; }
|
||||
|
||||
/// addChild - Add a child scope.
|
||||
void addChild(LexicalScope *S) { Children.push_back(S); }
|
||||
|
@ -189,7 +190,7 @@ public:
|
|||
/// extendInsnRange - Extend the current instruction range covered by
|
||||
/// this scope.
|
||||
void extendInsnRange(const MachineInstr *MI) {
|
||||
assert (FirstInsn && "MI Range is not open!");
|
||||
assert(FirstInsn && "MI Range is not open!");
|
||||
LastInsn = MI;
|
||||
if (Parent)
|
||||
Parent->extendInsnRange(MI);
|
||||
|
@ -199,7 +200,7 @@ public:
|
|||
/// until now. This is used when a new scope is encountered while walking
|
||||
/// machine instructions.
|
||||
void closeInsnRange(LexicalScope *NewScope = NULL) {
|
||||
assert (LastInsn && "Last insn missing!");
|
||||
assert(LastInsn && "Last insn missing!");
|
||||
Ranges.push_back(InsnRange(FirstInsn, LastInsn));
|
||||
FirstInsn = NULL;
|
||||
LastInsn = NULL;
|
||||
|
@ -219,28 +220,28 @@ public:
|
|||
}
|
||||
|
||||
// Depth First Search support to walk and manipulate LexicalScope hierarchy.
|
||||
unsigned getDFSOut() const { return DFSOut; }
|
||||
void setDFSOut(unsigned O) { DFSOut = O; }
|
||||
unsigned getDFSIn() const { return DFSIn; }
|
||||
void setDFSIn(unsigned I) { DFSIn = I; }
|
||||
unsigned getDFSOut() const { return DFSOut; }
|
||||
void setDFSOut(unsigned O) { DFSOut = O; }
|
||||
unsigned getDFSIn() const { return DFSIn; }
|
||||
void setDFSIn(unsigned I) { DFSIn = I; }
|
||||
|
||||
/// dump - print lexical scope.
|
||||
void dump(unsigned Indent = 0) const;
|
||||
|
||||
private:
|
||||
LexicalScope *Parent; // Parent to this scope.
|
||||
AssertingVH<const MDNode> Desc; // Debug info descriptor.
|
||||
AssertingVH<const MDNode> InlinedAtLocation; // Location at which this
|
||||
// scope is inlined.
|
||||
bool AbstractScope; // Abstract Scope
|
||||
SmallVector<LexicalScope *, 4> Children; // Scopes defined in scope.
|
||||
// Contents not owned.
|
||||
LexicalScope *Parent; // Parent to this scope.
|
||||
AssertingVH<const MDNode> Desc; // Debug info descriptor.
|
||||
AssertingVH<const MDNode> InlinedAtLocation; // Location at which this
|
||||
// scope is inlined.
|
||||
bool AbstractScope; // Abstract Scope
|
||||
SmallVector<LexicalScope *, 4> Children; // Scopes defined in scope.
|
||||
// Contents not owned.
|
||||
SmallVector<InsnRange, 4> Ranges;
|
||||
|
||||
const MachineInstr *LastInsn; // Last instruction of this scope.
|
||||
const MachineInstr *FirstInsn; // First instruction of this scope.
|
||||
unsigned DFSIn, DFSOut; // In & Out Depth use to determine
|
||||
// scope nesting.
|
||||
const MachineInstr *LastInsn; // Last instruction of this scope.
|
||||
const MachineInstr *FirstInsn; // First instruction of this scope.
|
||||
unsigned DFSIn, DFSOut; // In & Out Depth use to determine
|
||||
// scope nesting.
|
||||
};
|
||||
|
||||
} // end llvm namespace
|
||||
|
|
|
@ -45,6 +45,7 @@ namespace llvm {
|
|||
class TargetInstrInfo;
|
||||
class TargetRegisterClass;
|
||||
class VirtRegMap;
|
||||
class MachineBlockFrequencyInfo;
|
||||
|
||||
class LiveIntervals : public MachineFunctionPass {
|
||||
MachineFunction* MF;
|
||||
|
@ -100,7 +101,9 @@ namespace llvm {
|
|||
virtual ~LiveIntervals();
|
||||
|
||||
// Calculate the spill weight to assign to a single instruction.
|
||||
static float getSpillWeight(bool isDef, bool isUse, BlockFrequency freq);
|
||||
static float getSpillWeight(bool isDef, bool isUse,
|
||||
const MachineBlockFrequencyInfo *MBFI,
|
||||
const MachineInstr *Instr);
|
||||
|
||||
LiveInterval &getInterval(unsigned Reg) {
|
||||
if (hasInterval(Reg))
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
//===- llvm/CodeGen/LivePhysRegs.h - Live Physical Register Set -*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the LivePhysRegs utility for tracking liveness of
|
||||
// physical registers. This can be used for ad-hoc liveness tracking after
|
||||
// register allocation. You can start with the live-ins/live-outs at the
|
||||
// beginning/end of a block and update the information while walking the
|
||||
// instructions inside the block. This implementation tracks the liveness on a
|
||||
// sub-register granularity.
|
||||
//
|
||||
// We assume that the high bits of a physical super-register are not preserved
|
||||
// unless the instruction has an implicit-use operand reading the super-
|
||||
// register.
|
||||
//
|
||||
// X86 Example:
|
||||
// %YMM0<def> = ...
|
||||
// %XMM0<def> = ... (Kills %XMM0, all %XMM0s sub-registers, and %YMM0)
|
||||
//
|
||||
// %YMM0<def> = ...
|
||||
// %XMM0<def> = ..., %YMM0<imp-use> (%YMM0 and all its sub-registers are alive)
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_CODEGEN_LIVE_PHYS_REGS_H
|
||||
#define LLVM_CODEGEN_LIVE_PHYS_REGS_H
|
||||
|
||||
#include "llvm/ADT/SparseSet.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
#include <cassert>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class MachineInstr;
|
||||
|
||||
/// \brief A set of live physical registers with functions to track liveness
|
||||
/// when walking backward/forward through a basic block.
|
||||
class LivePhysRegs {
|
||||
const TargetRegisterInfo *TRI;
|
||||
SparseSet<unsigned> LiveRegs;
|
||||
|
||||
LivePhysRegs(const LivePhysRegs&) LLVM_DELETED_FUNCTION;
|
||||
LivePhysRegs &operator=(const LivePhysRegs&) LLVM_DELETED_FUNCTION;
|
||||
public:
|
||||
/// \brief Constructs a new empty LivePhysRegs set.
|
||||
LivePhysRegs() : TRI(0), LiveRegs() {}
|
||||
|
||||
/// \brief Constructs and initialize an empty LivePhysRegs set.
|
||||
LivePhysRegs(const TargetRegisterInfo *TRI) : TRI(TRI) {
|
||||
assert(TRI && "Invalid TargetRegisterInfo pointer.");
|
||||
LiveRegs.setUniverse(TRI->getNumRegs());
|
||||
}
|
||||
|
||||
/// \brief Clear and initialize the LivePhysRegs set.
|
||||
void init(const TargetRegisterInfo *_TRI) {
|
||||
assert(_TRI && "Invalid TargetRegisterInfo pointer.");
|
||||
TRI = _TRI;
|
||||
LiveRegs.clear();
|
||||
LiveRegs.setUniverse(TRI->getNumRegs());
|
||||
}
|
||||
|
||||
/// \brief Clears the LivePhysRegs set.
|
||||
void clear() { LiveRegs.clear(); }
|
||||
|
||||
/// \brief Returns true if the set is empty.
|
||||
bool empty() const { return LiveRegs.empty(); }
|
||||
|
||||
/// \brief Adds a physical register and all its sub-registers to the set.
|
||||
void addReg(unsigned Reg) {
|
||||
assert(TRI && "LivePhysRegs is not initialized.");
|
||||
assert(Reg <= TRI->getNumRegs() && "Expected a physical register.");
|
||||
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
|
||||
SubRegs.isValid(); ++SubRegs)
|
||||
LiveRegs.insert(*SubRegs);
|
||||
}
|
||||
|
||||
/// \brief Removes a physical register, all its sub-registers, and all its
|
||||
/// super-registers from the set.
|
||||
void removeReg(unsigned Reg) {
|
||||
assert(TRI && "LivePhysRegs is not initialized.");
|
||||
assert(Reg <= TRI->getNumRegs() && "Expected a physical register.");
|
||||
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
|
||||
SubRegs.isValid(); ++SubRegs)
|
||||
LiveRegs.erase(*SubRegs);
|
||||
for (MCSuperRegIterator SuperRegs(Reg, TRI, /*IncludeSelf=*/false);
|
||||
SuperRegs.isValid(); ++SuperRegs)
|
||||
LiveRegs.erase(*SuperRegs);
|
||||
}
|
||||
|
||||
/// \brief Removes physical registers clobbered by the regmask operand @p MO.
|
||||
void removeRegsInMask(const MachineOperand &MO);
|
||||
|
||||
/// \brief Returns true if register @p Reg is contained in the set. This also
|
||||
/// works if only the super register of @p Reg has been defined, because we
|
||||
/// always add also all sub-registers to the set.
|
||||
bool contains(unsigned Reg) const { return LiveRegs.count(Reg); }
|
||||
|
||||
/// \brief Simulates liveness when stepping backwards over an
|
||||
/// instruction(bundle): Remove Defs, add uses. This is the recommended way of
|
||||
/// calculating liveness.
|
||||
void stepBackward(const MachineInstr &MI);
|
||||
|
||||
/// \brief Simulates liveness when stepping forward over an
|
||||
/// instruction(bundle): Remove killed-uses, add defs. This is the not
|
||||
/// recommended way, because it depends on accurate kill flags. If possible
|
||||
/// use stepBackwards() instead of this function.
|
||||
void stepForward(const MachineInstr &MI);
|
||||
|
||||
/// \brief Adds all live-in registers of basic block @p MBB.
|
||||
void addLiveIns(const MachineBasicBlock *MBB) {
|
||||
for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
|
||||
LE = MBB->livein_end(); LI != LE; ++LI)
|
||||
addReg(*LI);
|
||||
}
|
||||
|
||||
/// \brief Adds all live-out registers of basic block @p MBB.
|
||||
void addLiveOuts(const MachineBasicBlock *MBB) {
|
||||
for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
|
||||
SE = MBB->succ_end(); SI != SE; ++SI)
|
||||
addLiveIns(*SI);
|
||||
}
|
||||
|
||||
typedef SparseSet<unsigned>::const_iterator const_iterator;
|
||||
const_iterator begin() const { return LiveRegs.begin(); }
|
||||
const_iterator end() const { return LiveRegs.end(); }
|
||||
|
||||
/// \brief Prints the currently live registers to @p OS.
|
||||
void print(raw_ostream &OS) const;
|
||||
|
||||
/// \brief Dumps the currently live registers to the debug output.
|
||||
void dump() const;
|
||||
};
|
||||
|
||||
inline raw_ostream &operator<<(raw_ostream &OS, const LivePhysRegs& LR) {
|
||||
LR.print(OS);
|
||||
return OS;
|
||||
}
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
#endif // LLVM_CODEGEN_LIVE_PHYS_REGS_H
|
|
@ -1,4 +1,4 @@
|
|||
//====----- MachineBlockFrequencyInfo.h - MachineBlock Frequency Analysis ----====//
|
||||
//====-- MachineBlockFrequencyInfo.h - MBB Frequency Analysis -*- C++ -*--====//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
|
@ -49,6 +49,21 @@ public:
|
|||
/// the other block frequencies. We do this to avoid using of floating points.
|
||||
///
|
||||
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const;
|
||||
|
||||
MachineFunction *getFunction() const;
|
||||
void view() const;
|
||||
|
||||
// Print the block frequency Freq to OS using the current functions entry
|
||||
// frequency to convert freq into a relative decimal form.
|
||||
raw_ostream &printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const;
|
||||
|
||||
// Convenience method that attempts to look up the frequency associated with
|
||||
// BB and print it to OS.
|
||||
raw_ostream &printBlockFreq(raw_ostream &OS,
|
||||
const MachineBasicBlock *MBB) const;
|
||||
|
||||
uint64_t getEntryFreq() const;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -101,11 +101,6 @@ class MachineFrameInfo {
|
|||
// cannot alias any other memory objects.
|
||||
bool isSpillSlot;
|
||||
|
||||
// MayNeedSP - If true the stack object triggered the creation of the stack
|
||||
// protector. We should allocate this object right after the stack
|
||||
// protector.
|
||||
bool MayNeedSP;
|
||||
|
||||
/// Alloca - If this stack object is originated from an Alloca instruction
|
||||
/// this value saves the original IR allocation. Can be NULL.
|
||||
const AllocaInst *Alloca;
|
||||
|
@ -115,9 +110,9 @@ class MachineFrameInfo {
|
|||
bool PreAllocated;
|
||||
|
||||
StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM,
|
||||
bool isSS, bool NSP, const AllocaInst *Val)
|
||||
bool isSS, const AllocaInst *Val)
|
||||
: SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM),
|
||||
isSpillSlot(isSS), MayNeedSP(NSP), Alloca(Val), PreAllocated(false) {}
|
||||
isSpillSlot(isSS), Alloca(Val), PreAllocated(false) {}
|
||||
};
|
||||
|
||||
const TargetMachine &TM;
|
||||
|
@ -145,6 +140,14 @@ class MachineFrameInfo {
|
|||
/// to builtin \@llvm.returnaddress.
|
||||
bool ReturnAddressTaken;
|
||||
|
||||
/// HasStackMap - This boolean keeps track of whether there is a call
|
||||
/// to builtin \@llvm.experimental.stackmap.
|
||||
bool HasStackMap;
|
||||
|
||||
/// HasPatchPoint - This boolean keeps track of whether there is a call
|
||||
/// to builtin \@llvm.experimental.patchpoint.
|
||||
bool HasPatchPoint;
|
||||
|
||||
/// StackSize - The prolog/epilog code inserter calculates the final stack
|
||||
/// offsets for all of the fixed size objects, updating the Objects list
|
||||
/// above. It then updates StackSize to contain the number of bytes that need
|
||||
|
@ -223,6 +226,10 @@ class MachineFrameInfo {
|
|||
/// Whether the "realign-stack" option is on.
|
||||
bool RealignOption;
|
||||
|
||||
/// True if the function includes inline assembly that adjusts the stack
|
||||
/// pointer.
|
||||
bool HasInlineAsmWithSPAdjust;
|
||||
|
||||
const TargetFrameLowering *getFrameLowering() const;
|
||||
public:
|
||||
explicit MachineFrameInfo(const TargetMachine &TM, bool RealignOpt)
|
||||
|
@ -231,6 +238,8 @@ public:
|
|||
HasVarSizedObjects = false;
|
||||
FrameAddressTaken = false;
|
||||
ReturnAddressTaken = false;
|
||||
HasStackMap = false;
|
||||
HasPatchPoint = false;
|
||||
AdjustsStack = false;
|
||||
HasCalls = false;
|
||||
StackProtectorIdx = -1;
|
||||
|
@ -276,6 +285,18 @@ public:
|
|||
bool isReturnAddressTaken() const { return ReturnAddressTaken; }
|
||||
void setReturnAddressIsTaken(bool s) { ReturnAddressTaken = s; }
|
||||
|
||||
/// hasStackMap - This method may be called any time after instruction
|
||||
/// selection is complete to determine if there is a call to builtin
|
||||
/// \@llvm.experimental.stackmap.
|
||||
bool hasStackMap() const { return HasStackMap; }
|
||||
void setHasStackMap(bool s = true) { HasStackMap = s; }
|
||||
|
||||
/// hasPatchPoint - This method may be called any time after instruction
|
||||
/// selection is complete to determine if there is a call to builtin
|
||||
/// \@llvm.experimental.patchpoint.
|
||||
bool hasPatchPoint() const { return HasPatchPoint; }
|
||||
void setHasPatchPoint(bool s = true) { HasPatchPoint = s; }
|
||||
|
||||
/// getObjectIndexBegin - Return the minimum frame object index.
|
||||
///
|
||||
int getObjectIndexBegin() const { return -NumFixedObjects; }
|
||||
|
@ -380,14 +401,6 @@ public:
|
|||
return Objects[ObjectIdx+NumFixedObjects].Alloca;
|
||||
}
|
||||
|
||||
/// NeedsStackProtector - Returns true if the object may need stack
|
||||
/// protectors.
|
||||
bool MayNeedStackProtector(int ObjectIdx) const {
|
||||
assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
|
||||
"Invalid Object Idx!");
|
||||
return Objects[ObjectIdx+NumFixedObjects].MayNeedSP;
|
||||
}
|
||||
|
||||
/// getObjectOffset - Return the assigned stack offset of the specified object
|
||||
/// from the incoming stack pointer.
|
||||
///
|
||||
|
@ -451,6 +464,10 @@ public:
|
|||
bool hasCalls() const { return HasCalls; }
|
||||
void setHasCalls(bool V) { HasCalls = V; }
|
||||
|
||||
/// Returns true if the function contains any stack-adjusting inline assembly.
|
||||
bool hasInlineAsmWithSPAdjust() const { return HasInlineAsmWithSPAdjust; }
|
||||
void setHasInlineAsmWithSPAdjust(bool B) { HasInlineAsmWithSPAdjust = B; }
|
||||
|
||||
/// getMaxCallFrameSize - Return the maximum size of a call frame that must be
|
||||
/// allocated for an outgoing function call. This is only available if
|
||||
/// CallFrameSetup/Destroy pseudo instructions are used by the target, and
|
||||
|
@ -501,7 +518,7 @@ public:
|
|||
/// a nonnegative identifier to represent it.
|
||||
///
|
||||
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS,
|
||||
bool MayNeedSP = false, const AllocaInst *Alloca = 0);
|
||||
const AllocaInst *Alloca = 0);
|
||||
|
||||
/// CreateSpillStackObject - Create a new statically sized stack object that
|
||||
/// represents a spill slot, returning a nonnegative identifier to represent
|
||||
|
@ -521,7 +538,7 @@ public:
|
|||
/// variable sized object is created, whether or not the index returned is
|
||||
/// actually used.
|
||||
///
|
||||
int CreateVariableSizedObject(unsigned Alignment);
|
||||
int CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca);
|
||||
|
||||
/// getCalleeSavedInfo - Returns a reference to call saved info vector for the
|
||||
/// current function.
|
||||
|
|
|
@ -131,8 +131,8 @@ class MachineFunction {
|
|||
/// about the control flow of such functions.
|
||||
bool ExposesReturnsTwice;
|
||||
|
||||
/// True if the function includes MS-style inline assembly.
|
||||
bool HasMSInlineAsm;
|
||||
/// True if the function includes any inline assembly.
|
||||
bool HasInlineAsm;
|
||||
|
||||
MachineFunction(const MachineFunction &) LLVM_DELETED_FUNCTION;
|
||||
void operator=(const MachineFunction&) LLVM_DELETED_FUNCTION;
|
||||
|
@ -218,15 +218,14 @@ public:
|
|||
ExposesReturnsTwice = B;
|
||||
}
|
||||
|
||||
/// Returns true if the function contains any MS-style inline assembly.
|
||||
bool hasMSInlineAsm() const {
|
||||
return HasMSInlineAsm;
|
||||
/// Returns true if the function contains any inline assembly.
|
||||
bool hasInlineAsm() const {
|
||||
return HasInlineAsm;
|
||||
}
|
||||
|
||||
/// Set a flag that indicates that the function contains MS-style inline
|
||||
/// assembly.
|
||||
void setHasMSInlineAsm(bool B) {
|
||||
HasMSInlineAsm = B;
|
||||
/// Set a flag that indicates that the function contains inline assembly.
|
||||
void setHasInlineAsm(bool B) {
|
||||
HasInlineAsm = B;
|
||||
}
|
||||
|
||||
/// getInfo - Keep track of various per-function pieces of information for
|
||||
|
@ -427,6 +426,15 @@ public:
|
|||
OperandRecycler.deallocate(Cap, Array);
|
||||
}
|
||||
|
||||
/// \brief Allocate and initialize a register mask with @p NumRegister bits.
|
||||
uint32_t *allocateRegisterMask(unsigned NumRegister) {
|
||||
unsigned Size = (NumRegister + 31) / 32;
|
||||
uint32_t *Mask = Allocator.Allocate<uint32_t>(Size);
|
||||
for (unsigned i = 0; i != Size; ++i)
|
||||
Mask[i] = 0;
|
||||
return Mask;
|
||||
}
|
||||
|
||||
/// allocateMemRefsArray - Allocate an array to hold MachineMemOperand
|
||||
/// pointers. This array is owned by the MachineFunction.
|
||||
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num);
|
||||
|
|
|
@ -830,6 +830,37 @@ public:
|
|||
const TargetInstrInfo *TII,
|
||||
const TargetRegisterInfo *TRI) const;
|
||||
|
||||
/// \brief Applies the constraints (def/use) implied by this MI on \p Reg to
|
||||
/// the given \p CurRC.
|
||||
/// If \p ExploreBundle is set and MI is part of a bundle, all the
|
||||
/// instructions inside the bundle will be taken into account. In other words,
|
||||
/// this method accumulates all the constrains of the operand of this MI and
|
||||
/// the related bundle if MI is a bundle or inside a bundle.
|
||||
///
|
||||
/// Returns the register class that statisfies both \p CurRC and the
|
||||
/// constraints set by MI. Returns NULL if such a register class does not
|
||||
/// exist.
|
||||
///
|
||||
/// \pre CurRC must not be NULL.
|
||||
const TargetRegisterClass *getRegClassConstraintEffectForVReg(
|
||||
unsigned Reg, const TargetRegisterClass *CurRC,
|
||||
const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
|
||||
bool ExploreBundle = false) const;
|
||||
|
||||
/// \brief Applies the constraints (def/use) implied by the \p OpIdx operand
|
||||
/// to the given \p CurRC.
|
||||
///
|
||||
/// Returns the register class that statisfies both \p CurRC and the
|
||||
/// constraints set by \p OpIdx MI. Returns NULL if such a register class
|
||||
/// does not exist.
|
||||
///
|
||||
/// \pre CurRC must not be NULL.
|
||||
/// \pre The operand at \p OpIdx must be a register.
|
||||
const TargetRegisterClass *
|
||||
getRegClassConstraintEffect(unsigned OpIdx, const TargetRegisterClass *CurRC,
|
||||
const TargetInstrInfo *TII,
|
||||
const TargetRegisterInfo *TRI) const;
|
||||
|
||||
/// tieOperands - Add a tie between the register operands at DefIdx and
|
||||
/// UseIdx. The tie will cause the register allocator to ensure that the two
|
||||
/// operands are assigned the same physical register.
|
||||
|
@ -1038,6 +1069,13 @@ private:
|
|||
/// hasPropertyInBundle - Slow path for hasProperty when we're dealing with a
|
||||
/// bundle.
|
||||
bool hasPropertyInBundle(unsigned Mask, QueryType Type) const;
|
||||
|
||||
/// \brief Implements the logic of getRegClassConstraintEffectForVReg for the
|
||||
/// this MI and the given operand index \p OpIdx.
|
||||
/// If the related operand does not constrained Reg, this returns CurRC.
|
||||
const TargetRegisterClass *getRegClassConstraintEffectForVRegImpl(
|
||||
unsigned OpIdx, unsigned Reg, const TargetRegisterClass *CurRC,
|
||||
const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const;
|
||||
};
|
||||
|
||||
/// MachineInstrExpressionTrait - Special DenseMapInfo traits to compare
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#define LLVM_CODEGEN_MACHINELOOPINFO_H
|
||||
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
|
||||
namespace llvm {
|
||||
|
|
|
@ -134,6 +134,8 @@ public:
|
|||
/// number.
|
||||
int64_t getOffset() const { return PtrInfo.Offset; }
|
||||
|
||||
unsigned getAddrSpace() const { return PtrInfo.getAddrSpace(); }
|
||||
|
||||
/// getSize - Return the size in bytes of the memory reference.
|
||||
uint64_t getSize() const { return Size; }
|
||||
|
||||
|
|
|
@ -56,6 +56,7 @@ public:
|
|||
MO_GlobalAddress, ///< Address of a global value
|
||||
MO_BlockAddress, ///< Address of a basic block
|
||||
MO_RegisterMask, ///< Mask of preserved registers.
|
||||
MO_RegisterLiveOut, ///< Mask of live-out registers.
|
||||
MO_Metadata, ///< Metadata reference (for debug info)
|
||||
MO_MCSymbol ///< MCSymbol reference (for debug/eh info)
|
||||
};
|
||||
|
@ -153,7 +154,7 @@ private:
|
|||
const ConstantFP *CFP; // For MO_FPImmediate.
|
||||
const ConstantInt *CI; // For MO_CImmediate. Integers > 64bit.
|
||||
int64_t ImmVal; // For MO_Immediate.
|
||||
const uint32_t *RegMask; // For MO_RegisterMask.
|
||||
const uint32_t *RegMask; // For MO_RegisterMask and MO_RegisterLiveOut.
|
||||
const MDNode *MD; // For MO_Metadata.
|
||||
MCSymbol *Sym; // For MO_MCSymbol
|
||||
|
||||
|
@ -246,6 +247,8 @@ public:
|
|||
bool isBlockAddress() const { return OpKind == MO_BlockAddress; }
|
||||
/// isRegMask - Tests if this is a MO_RegisterMask operand.
|
||||
bool isRegMask() const { return OpKind == MO_RegisterMask; }
|
||||
/// isRegLiveOut - Tests if this is a MO_RegisterLiveOut operand.
|
||||
bool isRegLiveOut() const { return OpKind == MO_RegisterLiveOut; }
|
||||
/// isMetadata - Tests if this is a MO_Metadata operand.
|
||||
bool isMetadata() const { return OpKind == MO_Metadata; }
|
||||
bool isMCSymbol() const { return OpKind == MO_MCSymbol; }
|
||||
|
@ -476,6 +479,12 @@ public:
|
|||
return Contents.RegMask;
|
||||
}
|
||||
|
||||
/// getRegLiveOut - Returns a bit mask of live-out registers.
|
||||
const uint32_t *getRegLiveOut() const {
|
||||
assert(isRegLiveOut() && "Wrong MachineOperand accessor");
|
||||
return Contents.RegMask;
|
||||
}
|
||||
|
||||
const MDNode *getMetadata() const {
|
||||
assert(isMetadata() && "Wrong MachineOperand accessor");
|
||||
return Contents.MD;
|
||||
|
@ -659,6 +668,12 @@ public:
|
|||
Op.Contents.RegMask = Mask;
|
||||
return Op;
|
||||
}
|
||||
static MachineOperand CreateRegLiveOut(const uint32_t *Mask) {
|
||||
assert(Mask && "Missing live-out register mask");
|
||||
MachineOperand Op(MachineOperand::MO_RegisterLiveOut);
|
||||
Op.Contents.RegMask = Mask;
|
||||
return Op;
|
||||
}
|
||||
static MachineOperand CreateMetadata(const MDNode *Meta) {
|
||||
MachineOperand Op(MachineOperand::MO_Metadata);
|
||||
Op.Contents.MD = Meta;
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
// return new CustomMachineScheduler(C);
|
||||
// }
|
||||
//
|
||||
// The default scheduler, ScheduleDAGMI, builds the DAG and drives list
|
||||
// The default scheduler, ScheduleDAGMILive, builds the DAG and drives list
|
||||
// scheduling while updating the instruction stream, register pressure, and live
|
||||
// intervals. Most targets don't need to override the DAG builder and list
|
||||
// schedulier, but subtargets that require custom scheduling heuristics may
|
||||
|
@ -93,6 +93,7 @@ class MachineLoopInfo;
|
|||
class RegisterClassInfo;
|
||||
class ScheduleDAGInstrs;
|
||||
class SchedDFSResult;
|
||||
class ScheduleHazardRecognizer;
|
||||
|
||||
/// MachineSchedContext provides enough context from the MachineScheduler pass
|
||||
/// for the target to instantiate a scheduler.
|
||||
|
@ -154,8 +155,8 @@ struct MachineSchedPolicy {
|
|||
bool OnlyTopDown;
|
||||
bool OnlyBottomUp;
|
||||
|
||||
MachineSchedPolicy():
|
||||
ShouldTrackPressure(false), OnlyTopDown(false), OnlyBottomUp(false) {}
|
||||
MachineSchedPolicy(): ShouldTrackPressure(false), OnlyTopDown(false),
|
||||
OnlyBottomUp(false) {}
|
||||
};
|
||||
|
||||
/// MachineSchedStrategy - Interface to the scheduling algorithm used by
|
||||
|
@ -204,6 +205,262 @@ public:
|
|||
virtual void releaseBottomNode(SUnit *SU) = 0;
|
||||
};
|
||||
|
||||
/// Mutate the DAG as a postpass after normal DAG building.
|
||||
class ScheduleDAGMutation {
|
||||
virtual void anchor();
|
||||
public:
|
||||
virtual ~ScheduleDAGMutation() {}
|
||||
|
||||
virtual void apply(ScheduleDAGMI *DAG) = 0;
|
||||
};
|
||||
|
||||
/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply
|
||||
/// schedules machine instructions according to the given MachineSchedStrategy
|
||||
/// without much extra book-keeping. This is the common functionality between
|
||||
/// PreRA and PostRA MachineScheduler.
|
||||
class ScheduleDAGMI : public ScheduleDAGInstrs {
|
||||
protected:
|
||||
AliasAnalysis *AA;
|
||||
MachineSchedStrategy *SchedImpl;
|
||||
|
||||
/// Topo - A topological ordering for SUnits which permits fast IsReachable
|
||||
/// and similar queries.
|
||||
ScheduleDAGTopologicalSort Topo;
|
||||
|
||||
/// Ordered list of DAG postprocessing steps.
|
||||
std::vector<ScheduleDAGMutation*> Mutations;
|
||||
|
||||
/// The top of the unscheduled zone.
|
||||
MachineBasicBlock::iterator CurrentTop;
|
||||
|
||||
/// The bottom of the unscheduled zone.
|
||||
MachineBasicBlock::iterator CurrentBottom;
|
||||
|
||||
/// Record the next node in a scheduled cluster.
|
||||
const SUnit *NextClusterPred;
|
||||
const SUnit *NextClusterSucc;
|
||||
|
||||
#ifndef NDEBUG
|
||||
/// The number of instructions scheduled so far. Used to cut off the
|
||||
/// scheduler at the point determined by misched-cutoff.
|
||||
unsigned NumInstrsScheduled;
|
||||
#endif
|
||||
public:
|
||||
ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S, bool IsPostRA):
|
||||
ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, IsPostRA,
|
||||
/*RemoveKillFlags=*/IsPostRA, C->LIS),
|
||||
AA(C->AA), SchedImpl(S), Topo(SUnits, &ExitSU), CurrentTop(),
|
||||
CurrentBottom(), NextClusterPred(NULL), NextClusterSucc(NULL) {
|
||||
#ifndef NDEBUG
|
||||
NumInstrsScheduled = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual ~ScheduleDAGMI();
|
||||
|
||||
/// Return true if this DAG supports VReg liveness and RegPressure.
|
||||
virtual bool hasVRegLiveness() const { return false; }
|
||||
|
||||
/// Add a postprocessing step to the DAG builder.
|
||||
/// Mutations are applied in the order that they are added after normal DAG
|
||||
/// building and before MachineSchedStrategy initialization.
|
||||
///
|
||||
/// ScheduleDAGMI takes ownership of the Mutation object.
|
||||
void addMutation(ScheduleDAGMutation *Mutation) {
|
||||
Mutations.push_back(Mutation);
|
||||
}
|
||||
|
||||
/// \brief True if an edge can be added from PredSU to SuccSU without creating
|
||||
/// a cycle.
|
||||
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU);
|
||||
|
||||
/// \brief Add a DAG edge to the given SU with the given predecessor
|
||||
/// dependence data.
|
||||
///
|
||||
/// \returns true if the edge may be added without creating a cycle OR if an
|
||||
/// equivalent edge already existed (false indicates failure).
|
||||
bool addEdge(SUnit *SuccSU, const SDep &PredDep);
|
||||
|
||||
MachineBasicBlock::iterator top() const { return CurrentTop; }
|
||||
MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
|
||||
|
||||
/// Implement the ScheduleDAGInstrs interface for handling the next scheduling
|
||||
/// region. This covers all instructions in a block, while schedule() may only
|
||||
/// cover a subset.
|
||||
void enterRegion(MachineBasicBlock *bb,
|
||||
MachineBasicBlock::iterator begin,
|
||||
MachineBasicBlock::iterator end,
|
||||
unsigned regioninstrs) LLVM_OVERRIDE;
|
||||
|
||||
/// Implement ScheduleDAGInstrs interface for scheduling a sequence of
|
||||
/// reorderable instructions.
|
||||
virtual void schedule();
|
||||
|
||||
/// Change the position of an instruction within the basic block and update
|
||||
/// live ranges and region boundary iterators.
|
||||
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
|
||||
|
||||
const SUnit *getNextClusterPred() const { return NextClusterPred; }
|
||||
|
||||
const SUnit *getNextClusterSucc() const { return NextClusterSucc; }
|
||||
|
||||
void viewGraph(const Twine &Name, const Twine &Title) LLVM_OVERRIDE;
|
||||
void viewGraph() LLVM_OVERRIDE;
|
||||
|
||||
protected:
|
||||
// Top-Level entry points for the schedule() driver...
|
||||
|
||||
/// Apply each ScheduleDAGMutation step in order. This allows different
|
||||
/// instances of ScheduleDAGMI to perform custom DAG postprocessing.
|
||||
void postprocessDAG();
|
||||
|
||||
/// Release ExitSU predecessors and setup scheduler queues.
|
||||
void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots);
|
||||
|
||||
/// Update scheduler DAG and queues after scheduling an instruction.
|
||||
void updateQueues(SUnit *SU, bool IsTopNode);
|
||||
|
||||
/// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
|
||||
void placeDebugValues();
|
||||
|
||||
/// \brief dump the scheduled Sequence.
|
||||
void dumpSchedule() const;
|
||||
|
||||
// Lesser helpers...
|
||||
bool checkSchedLimit();
|
||||
|
||||
void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
|
||||
SmallVectorImpl<SUnit*> &BotRoots);
|
||||
|
||||
void releaseSucc(SUnit *SU, SDep *SuccEdge);
|
||||
void releaseSuccessors(SUnit *SU);
|
||||
void releasePred(SUnit *SU, SDep *PredEdge);
|
||||
void releasePredecessors(SUnit *SU);
|
||||
};
|
||||
|
||||
/// ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules
|
||||
/// machine instructions while updating LiveIntervals and tracking regpressure.
|
||||
class ScheduleDAGMILive : public ScheduleDAGMI {
|
||||
protected:
|
||||
RegisterClassInfo *RegClassInfo;
|
||||
|
||||
/// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees
|
||||
/// will be empty.
|
||||
SchedDFSResult *DFSResult;
|
||||
BitVector ScheduledTrees;
|
||||
|
||||
MachineBasicBlock::iterator LiveRegionEnd;
|
||||
|
||||
// Map each SU to its summary of pressure changes. This array is updated for
|
||||
// liveness during bottom-up scheduling. Top-down scheduling may proceed but
|
||||
// has no affect on the pressure diffs.
|
||||
PressureDiffs SUPressureDiffs;
|
||||
|
||||
/// Register pressure in this region computed by initRegPressure.
|
||||
bool ShouldTrackPressure;
|
||||
IntervalPressure RegPressure;
|
||||
RegPressureTracker RPTracker;
|
||||
|
||||
/// List of pressure sets that exceed the target's pressure limit before
|
||||
/// scheduling, listed in increasing set ID order. Each pressure set is paired
|
||||
/// with its max pressure in the currently scheduled regions.
|
||||
std::vector<PressureChange> RegionCriticalPSets;
|
||||
|
||||
/// The top of the unscheduled zone.
|
||||
IntervalPressure TopPressure;
|
||||
RegPressureTracker TopRPTracker;
|
||||
|
||||
/// The bottom of the unscheduled zone.
|
||||
IntervalPressure BotPressure;
|
||||
RegPressureTracker BotRPTracker;
|
||||
|
||||
public:
|
||||
ScheduleDAGMILive(MachineSchedContext *C, MachineSchedStrategy *S):
|
||||
ScheduleDAGMI(C, S, /*IsPostRA=*/false), RegClassInfo(C->RegClassInfo),
|
||||
DFSResult(0), ShouldTrackPressure(false), RPTracker(RegPressure),
|
||||
TopRPTracker(TopPressure), BotRPTracker(BotPressure)
|
||||
{}
|
||||
|
||||
virtual ~ScheduleDAGMILive();
|
||||
|
||||
/// Return true if this DAG supports VReg liveness and RegPressure.
|
||||
virtual bool hasVRegLiveness() const { return true; }
|
||||
|
||||
/// \brief Return true if register pressure tracking is enabled.
|
||||
bool isTrackingPressure() const { return ShouldTrackPressure; }
|
||||
|
||||
/// Get current register pressure for the top scheduled instructions.
|
||||
const IntervalPressure &getTopPressure() const { return TopPressure; }
|
||||
const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; }
|
||||
|
||||
/// Get current register pressure for the bottom scheduled instructions.
|
||||
const IntervalPressure &getBotPressure() const { return BotPressure; }
|
||||
const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; }
|
||||
|
||||
/// Get register pressure for the entire scheduling region before scheduling.
|
||||
const IntervalPressure &getRegPressure() const { return RegPressure; }
|
||||
|
||||
const std::vector<PressureChange> &getRegionCriticalPSets() const {
|
||||
return RegionCriticalPSets;
|
||||
}
|
||||
|
||||
PressureDiff &getPressureDiff(const SUnit *SU) {
|
||||
return SUPressureDiffs[SU->NodeNum];
|
||||
}
|
||||
|
||||
/// Compute a DFSResult after DAG building is complete, and before any
|
||||
/// queue comparisons.
|
||||
void computeDFSResult();
|
||||
|
||||
/// Return a non-null DFS result if the scheduling strategy initialized it.
|
||||
const SchedDFSResult *getDFSResult() const { return DFSResult; }
|
||||
|
||||
BitVector &getScheduledTrees() { return ScheduledTrees; }
|
||||
|
||||
/// Implement the ScheduleDAGInstrs interface for handling the next scheduling
|
||||
/// region. This covers all instructions in a block, while schedule() may only
|
||||
/// cover a subset.
|
||||
void enterRegion(MachineBasicBlock *bb,
|
||||
MachineBasicBlock::iterator begin,
|
||||
MachineBasicBlock::iterator end,
|
||||
unsigned regioninstrs) LLVM_OVERRIDE;
|
||||
|
||||
/// Implement ScheduleDAGInstrs interface for scheduling a sequence of
|
||||
/// reorderable instructions.
|
||||
virtual void schedule();
|
||||
|
||||
/// Compute the cyclic critical path through the DAG.
|
||||
unsigned computeCyclicCriticalPath();
|
||||
|
||||
protected:
|
||||
// Top-Level entry points for the schedule() driver...
|
||||
|
||||
/// Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking
|
||||
/// enabled. This sets up three trackers. RPTracker will cover the entire DAG
|
||||
/// region, TopTracker and BottomTracker will be initialized to the top and
|
||||
/// bottom of the DAG region without covereing any unscheduled instruction.
|
||||
void buildDAGWithRegPressure();
|
||||
|
||||
/// Move an instruction and update register pressure.
|
||||
void scheduleMI(SUnit *SU, bool IsTopNode);
|
||||
|
||||
// Lesser helpers...
|
||||
|
||||
void initRegPressure();
|
||||
|
||||
void updatePressureDiffs(ArrayRef<unsigned> LiveUses);
|
||||
|
||||
void updateScheduledPressure(const SUnit *SU,
|
||||
const std::vector<unsigned> &NewMaxPressure);
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// Helpers for implementing custom MachineSchedStrategy classes. These take
|
||||
/// care of the book-keeping associated with list scheduling heuristics.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience
|
||||
/// methods for pushing and removing nodes. ReadyQueue's are uniquely identified
|
||||
/// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in.
|
||||
|
@ -261,213 +518,220 @@ public:
|
|||
#endif
|
||||
};
|
||||
|
||||
/// Mutate the DAG as a postpass after normal DAG building.
|
||||
class ScheduleDAGMutation {
|
||||
virtual void anchor();
|
||||
public:
|
||||
virtual ~ScheduleDAGMutation() {}
|
||||
/// Summarize the unscheduled region.
|
||||
struct SchedRemainder {
|
||||
// Critical path through the DAG in expected latency.
|
||||
unsigned CriticalPath;
|
||||
unsigned CyclicCritPath;
|
||||
|
||||
virtual void apply(ScheduleDAGMI *DAG) = 0;
|
||||
// Scaled count of micro-ops left to schedule.
|
||||
unsigned RemIssueCount;
|
||||
|
||||
bool IsAcyclicLatencyLimited;
|
||||
|
||||
// Unscheduled resources
|
||||
SmallVector<unsigned, 16> RemainingCounts;
|
||||
|
||||
void reset() {
|
||||
CriticalPath = 0;
|
||||
CyclicCritPath = 0;
|
||||
RemIssueCount = 0;
|
||||
IsAcyclicLatencyLimited = false;
|
||||
RemainingCounts.clear();
|
||||
}
|
||||
|
||||
SchedRemainder() { reset(); }
|
||||
|
||||
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
|
||||
};
|
||||
|
||||
/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that schedules
|
||||
/// machine instructions while updating LiveIntervals and tracking regpressure.
|
||||
class ScheduleDAGMI : public ScheduleDAGInstrs {
|
||||
protected:
|
||||
AliasAnalysis *AA;
|
||||
RegisterClassInfo *RegClassInfo;
|
||||
MachineSchedStrategy *SchedImpl;
|
||||
/// Each Scheduling boundary is associated with ready queues. It tracks the
|
||||
/// current cycle in the direction of movement, and maintains the state
|
||||
/// of "hazards" and other interlocks at the current cycle.
|
||||
class SchedBoundary {
|
||||
public:
|
||||
/// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
|
||||
enum {
|
||||
TopQID = 1,
|
||||
BotQID = 2,
|
||||
LogMaxQID = 2
|
||||
};
|
||||
|
||||
/// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees
|
||||
/// will be empty.
|
||||
SchedDFSResult *DFSResult;
|
||||
BitVector ScheduledTrees;
|
||||
ScheduleDAGMI *DAG;
|
||||
const TargetSchedModel *SchedModel;
|
||||
SchedRemainder *Rem;
|
||||
|
||||
/// Topo - A topological ordering for SUnits which permits fast IsReachable
|
||||
/// and similar queries.
|
||||
ScheduleDAGTopologicalSort Topo;
|
||||
ReadyQueue Available;
|
||||
ReadyQueue Pending;
|
||||
|
||||
/// Ordered list of DAG postprocessing steps.
|
||||
std::vector<ScheduleDAGMutation*> Mutations;
|
||||
ScheduleHazardRecognizer *HazardRec;
|
||||
|
||||
MachineBasicBlock::iterator LiveRegionEnd;
|
||||
private:
|
||||
/// True if the pending Q should be checked/updated before scheduling another
|
||||
/// instruction.
|
||||
bool CheckPending;
|
||||
|
||||
// Map each SU to its summary of pressure changes. This array is updated for
|
||||
// liveness during bottom-up scheduling. Top-down scheduling may proceed but
|
||||
// has no affect on the pressure diffs.
|
||||
PressureDiffs SUPressureDiffs;
|
||||
// For heuristics, keep a list of the nodes that immediately depend on the
|
||||
// most recently scheduled node.
|
||||
SmallPtrSet<const SUnit*, 8> NextSUs;
|
||||
|
||||
/// Register pressure in this region computed by initRegPressure.
|
||||
bool ShouldTrackPressure;
|
||||
IntervalPressure RegPressure;
|
||||
RegPressureTracker RPTracker;
|
||||
/// Number of cycles it takes to issue the instructions scheduled in this
|
||||
/// zone. It is defined as: scheduled-micro-ops / issue-width + stalls.
|
||||
/// See getStalls().
|
||||
unsigned CurrCycle;
|
||||
|
||||
/// List of pressure sets that exceed the target's pressure limit before
|
||||
/// scheduling, listed in increasing set ID order. Each pressure set is paired
|
||||
/// with its max pressure in the currently scheduled regions.
|
||||
std::vector<PressureChange> RegionCriticalPSets;
|
||||
/// Micro-ops issued in the current cycle
|
||||
unsigned CurrMOps;
|
||||
|
||||
/// The top of the unscheduled zone.
|
||||
MachineBasicBlock::iterator CurrentTop;
|
||||
IntervalPressure TopPressure;
|
||||
RegPressureTracker TopRPTracker;
|
||||
/// MinReadyCycle - Cycle of the soonest available instruction.
|
||||
unsigned MinReadyCycle;
|
||||
|
||||
/// The bottom of the unscheduled zone.
|
||||
MachineBasicBlock::iterator CurrentBottom;
|
||||
IntervalPressure BotPressure;
|
||||
RegPressureTracker BotRPTracker;
|
||||
// The expected latency of the critical path in this scheduled zone.
|
||||
unsigned ExpectedLatency;
|
||||
|
||||
/// Record the next node in a scheduled cluster.
|
||||
const SUnit *NextClusterPred;
|
||||
const SUnit *NextClusterSucc;
|
||||
// The latency of dependence chains leading into this zone.
|
||||
// For each node scheduled bottom-up: DLat = max DLat, N.Depth.
|
||||
// For each cycle scheduled: DLat -= 1.
|
||||
unsigned DependentLatency;
|
||||
|
||||
/// Count the scheduled (issued) micro-ops that can be retired by
|
||||
/// time=CurrCycle assuming the first scheduled instr is retired at time=0.
|
||||
unsigned RetiredMOps;
|
||||
|
||||
// Count scheduled resources that have been executed. Resources are
|
||||
// considered executed if they become ready in the time that it takes to
|
||||
// saturate any resource including the one in question. Counts are scaled
|
||||
// for direct comparison with other resources. Counts can be compared with
|
||||
// MOps * getMicroOpFactor and Latency * getLatencyFactor.
|
||||
SmallVector<unsigned, 16> ExecutedResCounts;
|
||||
|
||||
/// Cache the max count for a single resource.
|
||||
unsigned MaxExecutedResCount;
|
||||
|
||||
// Cache the critical resources ID in this scheduled zone.
|
||||
unsigned ZoneCritResIdx;
|
||||
|
||||
// Is the scheduled region resource limited vs. latency limited.
|
||||
bool IsResourceLimited;
|
||||
|
||||
// Record the highest cycle at which each resource has been reserved by a
|
||||
// scheduled instruction.
|
||||
SmallVector<unsigned, 16> ReservedCycles;
|
||||
|
||||
#ifndef NDEBUG
|
||||
/// The number of instructions scheduled so far. Used to cut off the
|
||||
/// scheduler at the point determined by misched-cutoff.
|
||||
unsigned NumInstrsScheduled;
|
||||
// Remember the greatest operand latency as an upper bound on the number of
|
||||
// times we should retry the pending queue because of a hazard.
|
||||
unsigned MaxObservedLatency;
|
||||
#endif
|
||||
|
||||
public:
|
||||
ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S):
|
||||
ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS),
|
||||
AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S), DFSResult(0),
|
||||
Topo(SUnits, &ExitSU), ShouldTrackPressure(false),
|
||||
RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure),
|
||||
CurrentBottom(), BotRPTracker(BotPressure),
|
||||
NextClusterPred(NULL), NextClusterSucc(NULL) {
|
||||
/// Pending queues extend the ready queues with the same ID and the
|
||||
/// PendingFlag set.
|
||||
SchedBoundary(unsigned ID, const Twine &Name):
|
||||
DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
|
||||
Pending(ID << LogMaxQID, Name+".P"),
|
||||
HazardRec(0) {
|
||||
reset();
|
||||
}
|
||||
|
||||
~SchedBoundary();
|
||||
|
||||
void reset();
|
||||
|
||||
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
|
||||
SchedRemainder *rem);
|
||||
|
||||
bool isTop() const {
|
||||
return Available.getID() == TopQID;
|
||||
}
|
||||
|
||||
/// Number of cycles to issue the instructions scheduled in this zone.
|
||||
unsigned getCurrCycle() const { return CurrCycle; }
|
||||
|
||||
/// Micro-ops issued in the current cycle
|
||||
unsigned getCurrMOps() const { return CurrMOps; }
|
||||
|
||||
/// Return true if the given SU is used by the most recently scheduled
|
||||
/// instruction.
|
||||
bool isNextSU(const SUnit *SU) const { return NextSUs.count(SU); }
|
||||
|
||||
// The latency of dependence chains leading into this zone.
|
||||
unsigned getDependentLatency() const { return DependentLatency; }
|
||||
|
||||
/// Get the number of latency cycles "covered" by the scheduled
|
||||
/// instructions. This is the larger of the critical path within the zone
|
||||
/// and the number of cycles required to issue the instructions.
|
||||
unsigned getScheduledLatency() const {
|
||||
return std::max(ExpectedLatency, CurrCycle);
|
||||
}
|
||||
|
||||
unsigned getUnscheduledLatency(SUnit *SU) const {
|
||||
return isTop() ? SU->getHeight() : SU->getDepth();
|
||||
}
|
||||
|
||||
unsigned getResourceCount(unsigned ResIdx) const {
|
||||
return ExecutedResCounts[ResIdx];
|
||||
}
|
||||
|
||||
/// Get the scaled count of scheduled micro-ops and resources, including
|
||||
/// executed resources.
|
||||
unsigned getCriticalCount() const {
|
||||
if (!ZoneCritResIdx)
|
||||
return RetiredMOps * SchedModel->getMicroOpFactor();
|
||||
return getResourceCount(ZoneCritResIdx);
|
||||
}
|
||||
|
||||
/// Get a scaled count for the minimum execution time of the scheduled
|
||||
/// micro-ops that are ready to execute by getExecutedCount. Notice the
|
||||
/// feedback loop.
|
||||
unsigned getExecutedCount() const {
|
||||
return std::max(CurrCycle * SchedModel->getLatencyFactor(),
|
||||
MaxExecutedResCount);
|
||||
}
|
||||
|
||||
unsigned getZoneCritResIdx() const { return ZoneCritResIdx; }
|
||||
|
||||
// Is the scheduled region resource limited vs. latency limited.
|
||||
bool isResourceLimited() const { return IsResourceLimited; }
|
||||
|
||||
/// Get the difference between the given SUnit's ready time and the current
|
||||
/// cycle.
|
||||
unsigned getLatencyStallCycles(SUnit *SU);
|
||||
|
||||
unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles);
|
||||
|
||||
bool checkHazard(SUnit *SU);
|
||||
|
||||
unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs);
|
||||
|
||||
unsigned getOtherResourceCount(unsigned &OtherCritIdx);
|
||||
|
||||
void releaseNode(SUnit *SU, unsigned ReadyCycle);
|
||||
|
||||
void releaseTopNode(SUnit *SU);
|
||||
|
||||
void releaseBottomNode(SUnit *SU);
|
||||
|
||||
void bumpCycle(unsigned NextCycle);
|
||||
|
||||
void incExecutedResources(unsigned PIdx, unsigned Count);
|
||||
|
||||
unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle);
|
||||
|
||||
void bumpNode(SUnit *SU);
|
||||
|
||||
void releasePending();
|
||||
|
||||
void removeReady(SUnit *SU);
|
||||
|
||||
/// Call this before applying any other heuristics to the Available queue.
|
||||
/// Updates the Available/Pending Q's if necessary and returns the single
|
||||
/// available instruction, or NULL if there are multiple candidates.
|
||||
SUnit *pickOnlyChoice();
|
||||
|
||||
#ifndef NDEBUG
|
||||
NumInstrsScheduled = 0;
|
||||
void dumpScheduledState();
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual ~ScheduleDAGMI();
|
||||
|
||||
/// \brief Return true if register pressure tracking is enabled.
|
||||
bool isTrackingPressure() const { return ShouldTrackPressure; }
|
||||
|
||||
/// Add a postprocessing step to the DAG builder.
|
||||
/// Mutations are applied in the order that they are added after normal DAG
|
||||
/// building and before MachineSchedStrategy initialization.
|
||||
///
|
||||
/// ScheduleDAGMI takes ownership of the Mutation object.
|
||||
void addMutation(ScheduleDAGMutation *Mutation) {
|
||||
Mutations.push_back(Mutation);
|
||||
}
|
||||
|
||||
/// \brief True if an edge can be added from PredSU to SuccSU without creating
|
||||
/// a cycle.
|
||||
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU);
|
||||
|
||||
/// \brief Add a DAG edge to the given SU with the given predecessor
|
||||
/// dependence data.
|
||||
///
|
||||
/// \returns true if the edge may be added without creating a cycle OR if an
|
||||
/// equivalent edge already existed (false indicates failure).
|
||||
bool addEdge(SUnit *SuccSU, const SDep &PredDep);
|
||||
|
||||
MachineBasicBlock::iterator top() const { return CurrentTop; }
|
||||
MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
|
||||
|
||||
/// Implement the ScheduleDAGInstrs interface for handling the next scheduling
|
||||
/// region. This covers all instructions in a block, while schedule() may only
|
||||
/// cover a subset.
|
||||
void enterRegion(MachineBasicBlock *bb,
|
||||
MachineBasicBlock::iterator begin,
|
||||
MachineBasicBlock::iterator end,
|
||||
unsigned regioninstrs) LLVM_OVERRIDE;
|
||||
|
||||
/// Implement ScheduleDAGInstrs interface for scheduling a sequence of
|
||||
/// reorderable instructions.
|
||||
virtual void schedule();
|
||||
|
||||
/// Change the position of an instruction within the basic block and update
|
||||
/// live ranges and region boundary iterators.
|
||||
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
|
||||
|
||||
/// Get current register pressure for the top scheduled instructions.
|
||||
const IntervalPressure &getTopPressure() const { return TopPressure; }
|
||||
const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; }
|
||||
|
||||
/// Get current register pressure for the bottom scheduled instructions.
|
||||
const IntervalPressure &getBotPressure() const { return BotPressure; }
|
||||
const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; }
|
||||
|
||||
/// Get register pressure for the entire scheduling region before scheduling.
|
||||
const IntervalPressure &getRegPressure() const { return RegPressure; }
|
||||
|
||||
const std::vector<PressureChange> &getRegionCriticalPSets() const {
|
||||
return RegionCriticalPSets;
|
||||
}
|
||||
|
||||
PressureDiff &getPressureDiff(const SUnit *SU) {
|
||||
return SUPressureDiffs[SU->NodeNum];
|
||||
}
|
||||
|
||||
const SUnit *getNextClusterPred() const { return NextClusterPred; }
|
||||
|
||||
const SUnit *getNextClusterSucc() const { return NextClusterSucc; }
|
||||
|
||||
/// Compute a DFSResult after DAG building is complete, and before any
|
||||
/// queue comparisons.
|
||||
void computeDFSResult();
|
||||
|
||||
/// Return a non-null DFS result if the scheduling strategy initialized it.
|
||||
const SchedDFSResult *getDFSResult() const { return DFSResult; }
|
||||
|
||||
BitVector &getScheduledTrees() { return ScheduledTrees; }
|
||||
|
||||
/// Compute the cyclic critical path through the DAG.
|
||||
unsigned computeCyclicCriticalPath();
|
||||
|
||||
void viewGraph(const Twine &Name, const Twine &Title) LLVM_OVERRIDE;
|
||||
void viewGraph() LLVM_OVERRIDE;
|
||||
|
||||
protected:
|
||||
// Top-Level entry points for the schedule() driver...
|
||||
|
||||
/// Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking
|
||||
/// enabled. This sets up three trackers. RPTracker will cover the entire DAG
|
||||
/// region, TopTracker and BottomTracker will be initialized to the top and
|
||||
/// bottom of the DAG region without covereing any unscheduled instruction.
|
||||
void buildDAGWithRegPressure();
|
||||
|
||||
/// Apply each ScheduleDAGMutation step in order. This allows different
|
||||
/// instances of ScheduleDAGMI to perform custom DAG postprocessing.
|
||||
void postprocessDAG();
|
||||
|
||||
/// Release ExitSU predecessors and setup scheduler queues.
|
||||
void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots);
|
||||
|
||||
/// Move an instruction and update register pressure.
|
||||
void scheduleMI(SUnit *SU, bool IsTopNode);
|
||||
|
||||
/// Update scheduler DAG and queues after scheduling an instruction.
|
||||
void updateQueues(SUnit *SU, bool IsTopNode);
|
||||
|
||||
/// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
|
||||
void placeDebugValues();
|
||||
|
||||
/// \brief dump the scheduled Sequence.
|
||||
void dumpSchedule() const;
|
||||
|
||||
// Lesser helpers...
|
||||
|
||||
void initRegPressure();
|
||||
|
||||
void updatePressureDiffs(ArrayRef<unsigned> LiveUses);
|
||||
|
||||
void updateScheduledPressure(const SUnit *SU,
|
||||
const std::vector<unsigned> &NewMaxPressure);
|
||||
|
||||
bool checkSchedLimit();
|
||||
|
||||
void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
|
||||
SmallVectorImpl<SUnit*> &BotRoots);
|
||||
|
||||
void releaseSucc(SUnit *SU, SDep *SuccEdge);
|
||||
void releaseSuccessors(SUnit *SU);
|
||||
void releasePred(SUnit *SU, SDep *PredEdge);
|
||||
void releasePredecessors(SUnit *SU);
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
|
|
@ -437,8 +437,8 @@ namespace PBQP {
|
|||
for (NodeItr nodeItr = nodesBegin(), nodeEnd = nodesEnd();
|
||||
nodeItr != nodeEnd; ++nodeItr) {
|
||||
|
||||
os << " node" << nodeItr << " [ label=\""
|
||||
<< nodeItr << ": " << getNodeCosts(*nodeItr) << "\" ]\n";
|
||||
os << " node" << *nodeItr << " [ label=\""
|
||||
<< *nodeItr << ": " << getNodeCosts(*nodeItr) << "\" ]\n";
|
||||
}
|
||||
|
||||
os << " edge [ len=" << getNumNodes() << " ]\n";
|
||||
|
|
|
@ -207,9 +207,9 @@ public:
|
|||
/// Fully developed targets will not generally override this.
|
||||
virtual void addMachinePasses();
|
||||
|
||||
/// createTargetScheduler - Create an instance of ScheduleDAGInstrs to be run
|
||||
/// within the standard MachineScheduler pass for this function and target at
|
||||
/// the current optimization level.
|
||||
/// Create an instance of ScheduleDAGInstrs to be run within the standard
|
||||
/// MachineScheduler pass for this function and target at the current
|
||||
/// optimization level.
|
||||
///
|
||||
/// This can also be used to plug a new MachineSchedStrategy into an instance
|
||||
/// of the standard ScheduleDAGMI:
|
||||
|
@ -221,6 +221,13 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
/// Similar to createMachineScheduler but used when postRA machine scheduling
|
||||
/// is enabled.
|
||||
virtual ScheduleDAGInstrs *
|
||||
createPostMachineScheduler(MachineSchedContext *C) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
protected:
|
||||
// Helper to verify the analysis is really immutable.
|
||||
void setOpt(bool &Opt, bool Val);
|
||||
|
@ -403,6 +410,9 @@ namespace llvm {
|
|||
/// MachineScheduler - This pass schedules machine instructions.
|
||||
extern char &MachineSchedulerID;
|
||||
|
||||
/// PostMachineScheduler - This pass schedules machine instructions postRA.
|
||||
extern char &PostMachineSchedulerID;
|
||||
|
||||
/// SpillPlacement analysis. Suggest optimal placement of spill code between
|
||||
/// basic blocks.
|
||||
extern char &SpillPlacementID;
|
||||
|
@ -568,6 +578,11 @@ namespace llvm {
|
|||
/// bundles (created earlier, e.g. during pre-RA scheduling).
|
||||
extern char &FinalizeMachineBundlesID;
|
||||
|
||||
/// StackMapLiveness - This pass analyses the register live-out set of
|
||||
/// stackmap/patchpoint intrinsics and attaches the calculated information to
|
||||
/// the intrinsic for later emission to the StackMap.
|
||||
extern char &StackMapLivenessID;
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif
|
||||
|
|
|
@ -184,6 +184,12 @@ namespace llvm {
|
|||
|| Contents.OrdKind == MustAliasMem);
|
||||
}
|
||||
|
||||
/// isBarrier - Test if this is an Order dependence that is marked
|
||||
/// as a barrier.
|
||||
bool isBarrier() const {
|
||||
return getKind() == Order && Contents.OrdKind == Barrier;
|
||||
}
|
||||
|
||||
/// isMustAlias - Test if this is an Order dependence that is marked
|
||||
/// as "must alias", meaning that the SUnits at either end of the edge
|
||||
/// have a memory dependence on a known memory location.
|
||||
|
@ -292,6 +298,8 @@ namespace llvm {
|
|||
bool isScheduleHigh : 1; // True if preferable to schedule high.
|
||||
bool isScheduleLow : 1; // True if preferable to schedule low.
|
||||
bool isCloned : 1; // True if this node has been cloned.
|
||||
bool isUnbuffered : 1; // Uses an unbuffered resource.
|
||||
bool hasReservedResource : 1; // Uses a reserved resource.
|
||||
Sched::Preference SchedulingPref; // Scheduling preference.
|
||||
|
||||
private:
|
||||
|
@ -316,7 +324,8 @@ namespace llvm {
|
|||
isTwoAddress(false), isCommutable(false), hasPhysRegUses(false),
|
||||
hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
|
||||
isAvailable(false), isScheduled(false), isScheduleHigh(false),
|
||||
isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None),
|
||||
isScheduleLow(false), isCloned(false), isUnbuffered(false),
|
||||
hasReservedResource(false), SchedulingPref(Sched::None),
|
||||
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
|
||||
TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
|
||||
|
||||
|
@ -330,7 +339,8 @@ namespace llvm {
|
|||
isTwoAddress(false), isCommutable(false), hasPhysRegUses(false),
|
||||
hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
|
||||
isAvailable(false), isScheduled(false), isScheduleHigh(false),
|
||||
isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None),
|
||||
isScheduleLow(false), isCloned(false), isUnbuffered(false),
|
||||
hasReservedResource(false), SchedulingPref(Sched::None),
|
||||
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
|
||||
TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
|
||||
|
||||
|
@ -343,7 +353,8 @@ namespace llvm {
|
|||
isTwoAddress(false), isCommutable(false), hasPhysRegUses(false),
|
||||
hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
|
||||
isAvailable(false), isScheduled(false), isScheduleHigh(false),
|
||||
isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None),
|
||||
isScheduleLow(false), isCloned(false), isUnbuffered(false),
|
||||
hasReservedResource(false), SchedulingPref(Sched::None),
|
||||
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
|
||||
TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
|
||||
|
||||
|
|
|
@ -43,7 +43,7 @@ namespace llvm {
|
|||
};
|
||||
|
||||
/// Record a physical register access.
|
||||
/// For non data-dependent uses, OpIdx == -1.
|
||||
/// For non-data-dependent uses, OpIdx == -1.
|
||||
struct PhysRegSUOper {
|
||||
SUnit *SU;
|
||||
int OpIdx;
|
||||
|
@ -88,6 +88,10 @@ namespace llvm {
|
|||
/// isPostRA flag indicates vregs cannot be present.
|
||||
bool IsPostRA;
|
||||
|
||||
/// True if the DAG builder should remove kill flags (in preparation for
|
||||
/// rescheduling).
|
||||
bool RemoveKillFlags;
|
||||
|
||||
/// The standard DAG builder does not normally include terminators as DAG
|
||||
/// nodes because it does not create the necessary dependencies to prevent
|
||||
/// reordering. A specialized scheduler can overide
|
||||
|
@ -145,15 +149,21 @@ namespace llvm {
|
|||
DbgValueVector DbgValues;
|
||||
MachineInstr *FirstDbgValue;
|
||||
|
||||
/// Set of live physical registers for updating kill flags.
|
||||
BitVector LiveRegs;
|
||||
|
||||
public:
|
||||
explicit ScheduleDAGInstrs(MachineFunction &mf,
|
||||
const MachineLoopInfo &mli,
|
||||
const MachineDominatorTree &mdt,
|
||||
bool IsPostRAFlag,
|
||||
bool RemoveKillFlags = false,
|
||||
LiveIntervals *LIS = 0);
|
||||
|
||||
virtual ~ScheduleDAGInstrs() {}
|
||||
|
||||
bool isPostRA() const { return IsPostRA; }
|
||||
|
||||
/// \brief Expose LiveIntervals for use in DAG mutators and such.
|
||||
LiveIntervals *getLIS() const { return LIS; }
|
||||
|
||||
|
@ -227,12 +237,23 @@ namespace llvm {
|
|||
/// Return a label for the region of code covered by the DAG.
|
||||
virtual std::string getDAGName() const;
|
||||
|
||||
/// \brief Fix register kill flags that scheduling has made invalid.
|
||||
void fixupKills(MachineBasicBlock *MBB);
|
||||
protected:
|
||||
void initSUnits();
|
||||
void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx);
|
||||
void addPhysRegDeps(SUnit *SU, unsigned OperIdx);
|
||||
void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
|
||||
void addVRegUseDeps(SUnit *SU, unsigned OperIdx);
|
||||
|
||||
/// \brief PostRA helper for rewriting kill flags.
|
||||
void startBlockForKills(MachineBasicBlock *BB);
|
||||
|
||||
/// \brief Toggle a register operand kill flag.
|
||||
///
|
||||
/// Other adjustments may be made to the instruction if necessary. Return
|
||||
/// true if the operand has been deleted, false if not.
|
||||
bool toggleKillFlag(MachineInstr *MI, MachineOperand &MO);
|
||||
};
|
||||
|
||||
/// newSUnit - Creates a new SUnit and return a ptr to it.
|
||||
|
|
|
@ -70,6 +70,22 @@ public:
|
|||
/// emitted, to advance the hazard state.
|
||||
virtual void EmitInstruction(SUnit *) {}
|
||||
|
||||
/// PreEmitNoops - This callback is invoked prior to emitting an instruction.
|
||||
/// It should return the number of noops to emit prior to the provided
|
||||
/// instruction.
|
||||
/// Note: This is only used during PostRA scheduling. EmitNoop is not called
|
||||
/// for these noops.
|
||||
virtual unsigned PreEmitNoops(SUnit *) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// ShouldPreferAnother - This callback may be invoked if getHazardType
|
||||
/// returns NoHazard. If, even though there is no hazard, it would be better to
|
||||
/// schedule another available instruction, this callback should return true.
|
||||
virtual bool ShouldPreferAnother(SUnit *) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// AdvanceCycle - This callback is invoked whenever the next top-down
|
||||
/// instruction to be scheduled cannot issue in the current cycle, either
|
||||
/// because of latency or resource conflicts. This should increment the
|
||||
|
|
|
@ -70,6 +70,10 @@ namespace ISD {
|
|||
/// BUILD_VECTOR where all of the elements are 0 or undef.
|
||||
bool isBuildVectorAllZeros(const SDNode *N);
|
||||
|
||||
/// \brief Return true if the specified node is a BUILD_VECTOR node of
|
||||
/// all ConstantSDNode or undef.
|
||||
bool isBuildVectorOfConstantSDNodes(const SDNode *N);
|
||||
|
||||
/// isScalarToVector - Return true if the specified node is a
|
||||
/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
|
||||
/// element is not an undef.
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
//===--- StackMapLivenessAnalysis - StackMap Liveness Analysis --*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass calculates the liveness for each basic block in a function and
|
||||
// attaches the register live-out information to a stackmap or patchpoint
|
||||
// intrinsic if present.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_CODEGEN_STACKMAP_LIVENESS_ANALYSIS_H
|
||||
#define LLVM_CODEGEN_STACKMAP_LIVENESS_ANALYSIS_H
|
||||
|
||||
#include "llvm/CodeGen/LivePhysRegs.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// \brief This pass calculates the liveness information for each basic block in
|
||||
/// a function and attaches the register live-out information to a stackmap or
|
||||
/// patchpoint intrinsic if present.
|
||||
///
|
||||
/// This is an optional pass that has to be explicitly enabled via the
|
||||
/// -enable-stackmap-liveness and/or -enable-patchpoint-liveness flag. The pass
|
||||
/// skips functions that don't have any stackmap or patchpoint intrinsics. The
|
||||
/// information provided by this pass is optional and not required by the
|
||||
/// aformentioned intrinsics to function.
|
||||
class StackMapLiveness : public MachineFunctionPass {
|
||||
MachineFunction *MF;
|
||||
const TargetRegisterInfo *TRI;
|
||||
LivePhysRegs LiveRegs;
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
/// \brief Default construct and initialize the pass.
|
||||
StackMapLiveness();
|
||||
|
||||
/// \brief Tell the pass manager which passes we depend on and what
|
||||
/// information we preserve.
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
|
||||
|
||||
/// \brief Calculate the liveness information for the given machine function.
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
private:
|
||||
/// \brief Performs the actual liveness calculation for the function.
|
||||
bool calculateLiveness();
|
||||
|
||||
/// \brief Add the current register live set to the instruction.
|
||||
void addLiveOutSetToMI(MachineInstr &MI);
|
||||
|
||||
/// \brief Create a register mask and initialize it with the registers from
|
||||
/// the register live set.
|
||||
uint32_t *createRegisterMask() const;
|
||||
};
|
||||
|
||||
} // llvm namespace
|
||||
|
||||
#endif // LLVM_CODEGEN_STACKMAP_LIVENESS_ANALYSIS_H
|
|
@ -1,4 +1,5 @@
|
|||
//===------------------- StackMaps.h - StackMaps ----------------*- C++ -*-===//
|
||||
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
|
@ -92,19 +93,28 @@ public:
|
|||
: LocType(LocType), Size(Size), Reg(Reg), Offset(Offset) {}
|
||||
};
|
||||
|
||||
// Typedef a function pointer for functions that parse sequences of operands
|
||||
// and return a Location, plus a new "next" operand iterator.
|
||||
typedef std::pair<Location, MachineInstr::const_mop_iterator>
|
||||
(*OperandParser)(MachineInstr::const_mop_iterator,
|
||||
MachineInstr::const_mop_iterator, const TargetMachine&);
|
||||
struct LiveOutReg {
|
||||
unsigned short Reg;
|
||||
unsigned short RegNo;
|
||||
unsigned short Size;
|
||||
|
||||
LiveOutReg() : Reg(0), RegNo(0), Size(0) {}
|
||||
LiveOutReg(unsigned short Reg, unsigned short RegNo, unsigned short Size)
|
||||
: Reg(Reg), RegNo(RegNo), Size(Size) {}
|
||||
|
||||
void MarkInvalid() { Reg = 0; }
|
||||
|
||||
// Only sort by the dwarf register number.
|
||||
bool operator< (const LiveOutReg &LO) const { return RegNo < LO.RegNo; }
|
||||
static bool IsInvalid(const LiveOutReg &LO) { return LO.Reg == 0; }
|
||||
};
|
||||
|
||||
// OpTypes are used to encode information about the following logical
|
||||
// operand (which may consist of several MachineOperands) for the
|
||||
// OpParser.
|
||||
typedef enum { DirectMemRefOp, IndirectMemRefOp, ConstantOp } OpType;
|
||||
|
||||
StackMaps(AsmPrinter &AP, OperandParser OpParser)
|
||||
: AP(AP), OpParser(OpParser) {}
|
||||
StackMaps(AsmPrinter &AP) : AP(AP) {}
|
||||
|
||||
/// \brief Generate a stackmap record for a stackmap instruction.
|
||||
///
|
||||
|
@ -121,15 +131,18 @@ public:
|
|||
|
||||
private:
|
||||
typedef SmallVector<Location, 8> LocationVec;
|
||||
typedef SmallVector<LiveOutReg, 8> LiveOutVec;
|
||||
|
||||
struct CallsiteInfo {
|
||||
const MCExpr *CSOffsetExpr;
|
||||
unsigned ID;
|
||||
uint64_t ID;
|
||||
LocationVec Locations;
|
||||
LiveOutVec LiveOuts;
|
||||
CallsiteInfo() : CSOffsetExpr(0), ID(0) {}
|
||||
CallsiteInfo(const MCExpr *CSOffsetExpr, unsigned ID,
|
||||
LocationVec Locations)
|
||||
: CSOffsetExpr(CSOffsetExpr), ID(ID), Locations(Locations) {}
|
||||
CallsiteInfo(const MCExpr *CSOffsetExpr, uint64_t ID,
|
||||
LocationVec &Locations, LiveOutVec &LiveOuts)
|
||||
: CSOffsetExpr(CSOffsetExpr), ID(ID), Locations(Locations),
|
||||
LiveOuts(LiveOuts) {}
|
||||
};
|
||||
|
||||
typedef std::vector<CallsiteInfo> CallsiteInfoList;
|
||||
|
@ -155,16 +168,28 @@ private:
|
|||
};
|
||||
|
||||
AsmPrinter &AP;
|
||||
OperandParser OpParser;
|
||||
CallsiteInfoList CSInfos;
|
||||
ConstantPool ConstPool;
|
||||
|
||||
MachineInstr::const_mop_iterator
|
||||
parseOperand(MachineInstr::const_mop_iterator MOI,
|
||||
MachineInstr::const_mop_iterator MOE,
|
||||
LocationVec &Locs, LiveOutVec &LiveOuts) const;
|
||||
|
||||
/// \brief Create a live-out register record for the given register @p Reg.
|
||||
LiveOutReg createLiveOutReg(unsigned Reg, const MCRegisterInfo &MCRI,
|
||||
const TargetRegisterInfo *TRI) const;
|
||||
|
||||
/// \brief Parse the register live-out mask and return a vector of live-out
|
||||
/// registers that need to be recorded in the stackmap.
|
||||
LiveOutVec parseRegisterLiveOutMask(const uint32_t *Mask) const;
|
||||
|
||||
/// This should be called by the MC lowering code _immediately_ before
|
||||
/// lowering the MI to an MCInst. It records where the operands for the
|
||||
/// instruction are stored, and outputs a label to record the offset of
|
||||
/// the call from the start of the text section. In special cases (e.g. AnyReg
|
||||
/// calling convention) the return register is also recorded if requested.
|
||||
void recordStackMapOpers(const MachineInstr &MI, uint32_t ID,
|
||||
void recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
|
||||
MachineInstr::const_mop_iterator MOI,
|
||||
MachineInstr::const_mop_iterator MOE,
|
||||
bool recordResult = false);
|
||||
|
|
|
@ -20,11 +20,11 @@
|
|||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/ADT/ValueMap.h"
|
||||
#include "llvm/Analysis/Dominators.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Target/TargetLowering.h"
|
||||
|
||||
namespace llvm {
|
||||
class DominatorTree;
|
||||
class Function;
|
||||
class Module;
|
||||
class PHINode;
|
||||
|
|
|
@ -98,6 +98,14 @@ public:
|
|||
return SchedModel.getProcResource(PIdx);
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
const char *getResourceName(unsigned PIdx) const {
|
||||
if (!PIdx)
|
||||
return "MOps";
|
||||
return SchedModel.getProcResource(PIdx)->Name;
|
||||
}
|
||||
#endif
|
||||
|
||||
typedef const MCWriteProcResEntry *ProcResIter;
|
||||
|
||||
// \brief Get an iterator into the processor resources consumed by this
|
||||
|
|
|
@ -880,18 +880,18 @@ namespace llvm {
|
|||
static EVT getExtendedIntegerVT(LLVMContext &C, unsigned BitWidth);
|
||||
static EVT getExtendedVectorVT(LLVMContext &C, EVT VT,
|
||||
unsigned NumElements);
|
||||
bool isExtendedFloatingPoint() const;
|
||||
bool isExtendedInteger() const;
|
||||
bool isExtendedVector() const;
|
||||
bool isExtended16BitVector() const;
|
||||
bool isExtended32BitVector() const;
|
||||
bool isExtended64BitVector() const;
|
||||
bool isExtended128BitVector() const;
|
||||
bool isExtended256BitVector() const;
|
||||
bool isExtended512BitVector() const;
|
||||
bool isExtended1024BitVector() const;
|
||||
bool isExtendedFloatingPoint() const LLVM_READONLY;
|
||||
bool isExtendedInteger() const LLVM_READONLY;
|
||||
bool isExtendedVector() const LLVM_READONLY;
|
||||
bool isExtended16BitVector() const LLVM_READONLY;
|
||||
bool isExtended32BitVector() const LLVM_READONLY;
|
||||
bool isExtended64BitVector() const LLVM_READONLY;
|
||||
bool isExtended128BitVector() const LLVM_READONLY;
|
||||
bool isExtended256BitVector() const LLVM_READONLY;
|
||||
bool isExtended512BitVector() const LLVM_READONLY;
|
||||
bool isExtended1024BitVector() const LLVM_READONLY;
|
||||
EVT getExtendedVectorElementType() const;
|
||||
unsigned getExtendedVectorNumElements() const;
|
||||
unsigned getExtendedVectorNumElements() const LLVM_READONLY;
|
||||
unsigned getExtendedSizeInBits() const;
|
||||
};
|
||||
|
||||
|
|
|
@ -232,7 +232,7 @@ public:
|
|||
///
|
||||
/// This function is deprecated for the MCJIT execution engine.
|
||||
///
|
||||
/// FIXME: the JIT and MCJIT interfaces should be disentangled or united
|
||||
/// FIXME: the JIT and MCJIT interfaces should be disentangled or united
|
||||
/// again, if possible.
|
||||
///
|
||||
virtual void *getPointerToNamedFunction(const std::string &Name,
|
||||
|
@ -550,7 +550,7 @@ public:
|
|||
WhichEngine = w;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
/// setMCJITMemoryManager - Sets the MCJIT memory manager to use. This allows
|
||||
/// clients to customize their memory allocation policies for the MCJIT. This
|
||||
/// is only appropriate for the MCJIT; setting this and configuring the builder
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "llvm/ADT/StringMap.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCObjectFileInfo.h"
|
||||
#include "llvm/Target/Mangler.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include <string>
|
||||
|
@ -49,6 +50,7 @@ private:
|
|||
|
||||
llvm::OwningPtr<llvm::Module> _module;
|
||||
llvm::OwningPtr<llvm::TargetMachine> _target;
|
||||
llvm::MCObjectFileInfo ObjFileInfo;
|
||||
std::vector<NameAndAttributes> _symbols;
|
||||
|
||||
// _defines and _undefines only needed to disambiguate tentative definitions
|
||||
|
|
|
@ -157,6 +157,28 @@ struct import_lookup_table_entry32 {
|
|||
}
|
||||
};
|
||||
|
||||
struct export_directory_table_entry {
|
||||
support::ulittle32_t ExportFlags;
|
||||
support::ulittle32_t TimeDateStamp;
|
||||
support::ulittle16_t MajorVersion;
|
||||
support::ulittle16_t MinorVersion;
|
||||
support::ulittle32_t NameRVA;
|
||||
support::ulittle32_t OrdinalBase;
|
||||
support::ulittle32_t AddressTableEntries;
|
||||
support::ulittle32_t NumberOfNamePointers;
|
||||
support::ulittle32_t ExportAddressTableRVA;
|
||||
support::ulittle32_t NamePointerRVA;
|
||||
support::ulittle32_t OrdinalTableRVA;
|
||||
};
|
||||
|
||||
union export_address_table_entry {
|
||||
support::ulittle32_t ExportRVA;
|
||||
support::ulittle32_t ForwarderRVA;
|
||||
};
|
||||
|
||||
typedef support::ulittle32_t export_name_pointer_table_entry;
|
||||
typedef support::ulittle16_t export_ordinal_table_entry;
|
||||
|
||||
struct coff_symbol {
|
||||
struct StringTableOffset {
|
||||
support::ulittle32_t Zeroes;
|
||||
|
|
|
@ -18,10 +18,10 @@
|
|||
|
||||
namespace llvm {
|
||||
|
||||
class DataLayout;
|
||||
class GlobalValue;
|
||||
class MCContext;
|
||||
template <typename T> class SmallVectorImpl;
|
||||
class TargetMachine;
|
||||
class Twine;
|
||||
|
||||
class Mangler {
|
||||
|
@ -33,7 +33,7 @@ public:
|
|||
};
|
||||
|
||||
private:
|
||||
const TargetMachine *TM;
|
||||
const DataLayout *DL;
|
||||
|
||||
/// AnonGlobalIDs - We need to give global values the same name every time
|
||||
/// they are mangled. This keeps track of the number we give to anonymous
|
||||
|
@ -46,20 +46,18 @@ private:
|
|||
unsigned NextAnonGlobalID;
|
||||
|
||||
public:
|
||||
Mangler(const TargetMachine *TM) : TM(TM), NextAnonGlobalID(1) {}
|
||||
Mangler(const DataLayout *DL) : DL(DL), NextAnonGlobalID(1) {}
|
||||
|
||||
/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
|
||||
/// and the specified global variable's name. If the global variable doesn't
|
||||
/// have a name, this fills in a unique name for the global.
|
||||
void getNameWithPrefix(SmallVectorImpl<char> &OutName, const GlobalValue *GV,
|
||||
bool isImplicitlyPrivate, bool UseGlobalPrefix = true);
|
||||
void getNameWithPrefix(SmallVectorImpl<char> &OutName, const GlobalValue *GV);
|
||||
|
||||
/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
|
||||
/// and the specified name as the global variable name. GVName must not be
|
||||
/// empty.
|
||||
void getNameWithPrefix(SmallVectorImpl<char> &OutName, const Twine &GVName,
|
||||
ManglerPrefixTy PrefixTy = Mangler::Default,
|
||||
bool UseGlobalPrefix = true);
|
||||
ManglerPrefixTy PrefixTy = Mangler::Default);
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
|
|
@ -685,6 +685,18 @@ class InstrInfo {
|
|||
//
|
||||
// This option is a temporary migration help. It will go away.
|
||||
bit guessInstructionProperties = 1;
|
||||
|
||||
// TableGen's instruction encoder generator has support for matching operands
|
||||
// to bit-field variables both by name and by position. While matching by
|
||||
// name is preferred, this is currently not possible for complex operands,
|
||||
// and some targets still reply on the positional encoding rules. When
|
||||
// generating a decoder for such targets, the positional encoding rules must
|
||||
// be used by the decoder generator as well.
|
||||
//
|
||||
// This option is temporary; it will go away once the TableGen decoder
|
||||
// generator has better support for complex operands and targets have
|
||||
// migrated away from using positionally encoded operands.
|
||||
bit decodePositionallyEncodedOperands = 0;
|
||||
}
|
||||
|
||||
// Standard Pseudo Instructions.
|
||||
|
@ -805,6 +817,7 @@ def STACKMAP : Instruction {
|
|||
let InOperandList = (ins i32imm:$id, i32imm:$nbytes, variable_ops);
|
||||
let isCall = 1;
|
||||
let mayLoad = 1;
|
||||
let usesCustomInserter = 1;
|
||||
}
|
||||
def PATCHPOINT : Instruction {
|
||||
let OutOperandList = (outs unknown:$dst);
|
||||
|
@ -812,6 +825,7 @@ def PATCHPOINT : Instruction {
|
|||
i32imm:$nargs, i32imm:$cc, variable_ops);
|
||||
let isCall = 1;
|
||||
let mayLoad = 1;
|
||||
let usesCustomInserter = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -947,7 +961,7 @@ class AsmWriter {
|
|||
// AsmWriterClassName - This specifies the suffix to use for the asmwriter
|
||||
// class. Generated AsmWriter classes are always prefixed with the target
|
||||
// name.
|
||||
string AsmWriterClassName = "AsmPrinter";
|
||||
string AsmWriterClassName = "InstPrinter";
|
||||
|
||||
// Variant - AsmWriters can be of multiple different variants. Variants are
|
||||
// used to support targets that need to emit assembly code in ways that are
|
||||
|
@ -957,21 +971,13 @@ class AsmWriter {
|
|||
// == 1, will expand to "y".
|
||||
int Variant = 0;
|
||||
|
||||
|
||||
// FirstOperandColumn/OperandSpacing - If the assembler syntax uses a columnar
|
||||
// layout, the asmwriter can actually generate output in this columns (in
|
||||
// verbose-asm mode). These two values indicate the width of the first column
|
||||
// (the "opcode" area) and the width to reserve for subsequent operands. When
|
||||
// verbose asm mode is enabled, operands will be indented to respect this.
|
||||
int FirstOperandColumn = -1;
|
||||
|
||||
// OperandSpacing - Space between operand columns.
|
||||
int OperandSpacing = -1;
|
||||
|
||||
// isMCAsmWriter - Is this assembly writer for an MC emitter? This controls
|
||||
// generation of the printInstruction() method. For MC printers, it takes
|
||||
// an MCInstr* operand, otherwise it takes a MachineInstr*.
|
||||
bit isMCAsmWriter = 0;
|
||||
bit isMCAsmWriter = 1;
|
||||
}
|
||||
def DefaultAsmWriter : AsmWriter;
|
||||
|
||||
|
|
|
@ -251,6 +251,18 @@ namespace llvm {
|
|||
floorf,
|
||||
/// long double floorl(long double x);
|
||||
floorl,
|
||||
/// double fmax(double x, double y);
|
||||
fmax,
|
||||
/// float fmaxf(float x, float y);
|
||||
fmaxf,
|
||||
/// long double fmaxl(long double x, long double y);
|
||||
fmaxl,
|
||||
/// double fmin(double x, double y);
|
||||
fmin,
|
||||
/// float fminf(float x, float y);
|
||||
fminf,
|
||||
/// long double fminl(long double x, long double y);
|
||||
fminl,
|
||||
/// double fmod(double x, double y);
|
||||
fmod,
|
||||
/// float fmodf(float x, float y);
|
||||
|
@ -703,6 +715,8 @@ public:
|
|||
case LibFunc::sqrt: case LibFunc::sqrtf: case LibFunc::sqrtl:
|
||||
case LibFunc::sqrt_finite: case LibFunc::sqrtf_finite:
|
||||
case LibFunc::sqrtl_finite:
|
||||
case LibFunc::fmax: case LibFunc::fmaxf: case LibFunc::fmaxl:
|
||||
case LibFunc::fmin: case LibFunc::fminf: case LibFunc::fminl:
|
||||
case LibFunc::floor: case LibFunc::floorf: case LibFunc::floorl:
|
||||
case LibFunc::nearbyint: case LibFunc::nearbyintf: case LibFunc::nearbyintl:
|
||||
case LibFunc::ceil: case LibFunc::ceilf: case LibFunc::ceill:
|
||||
|
|
|
@ -173,6 +173,11 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
/// Return true if multiple condition registers are available.
|
||||
bool hasMultipleConditionRegisters() const {
|
||||
return HasMultipleConditionRegisters;
|
||||
}
|
||||
|
||||
/// Return true if a vector of the given type should be split
|
||||
/// (TypeSplitVector) instead of promoted (TypePromoteInteger) during type
|
||||
/// legalization.
|
||||
|
@ -880,13 +885,13 @@ protected:
|
|||
}
|
||||
|
||||
/// Indicate whether this target prefers to use _setjmp to implement
|
||||
/// llvm.setjmp or the non _ version. Defaults to false.
|
||||
/// llvm.setjmp or the version without _. Defaults to false.
|
||||
void setUseUnderscoreSetJmp(bool Val) {
|
||||
UseUnderscoreSetJmp = Val;
|
||||
}
|
||||
|
||||
/// Indicate whether this target prefers to use _longjmp to implement
|
||||
/// llvm.longjmp or the non _ version. Defaults to false.
|
||||
/// llvm.longjmp or the version without _. Defaults to false.
|
||||
void setUseUnderscoreLongJmp(bool Val) {
|
||||
UseUnderscoreLongJmp = Val;
|
||||
}
|
||||
|
@ -926,6 +931,15 @@ protected:
|
|||
SelectIsExpensive = isExpensive;
|
||||
}
|
||||
|
||||
/// Tells the code generator that the target has multiple (allocatable)
|
||||
/// condition registers that can be used to store the results of comparisons
|
||||
/// for use by selects and conditional branches. With multiple condition
|
||||
/// registers, the code generator will not aggressively sink comparisons into
|
||||
/// the blocks of their users.
|
||||
void setHasMultipleConditionRegisters(bool hasManyRegs = true) {
|
||||
HasMultipleConditionRegisters = hasManyRegs;
|
||||
}
|
||||
|
||||
/// Tells the code generator not to expand sequence of operations into a
|
||||
/// separate sequences that increases the amount of flow control.
|
||||
void setJumpIsExpensive(bool isExpensive = true) {
|
||||
|
@ -1321,6 +1335,13 @@ private:
|
|||
/// the select operations if possible.
|
||||
bool SelectIsExpensive;
|
||||
|
||||
/// Tells the code generator that the target has multiple (allocatable)
|
||||
/// condition registers that can be used to store the results of comparisons
|
||||
/// for use by selects and conditional branches. With multiple condition
|
||||
/// registers, the code generator will not aggressively sink comparisons into
|
||||
/// the blocks of their users.
|
||||
bool HasMultipleConditionRegisters;
|
||||
|
||||
/// Tells the code generator not to expand integer divides by constants into a
|
||||
/// sequence of muls, adds, and shifts. This is a hack until a real cost
|
||||
/// model is in place. If we ever optimize for size, this will be set to true
|
||||
|
@ -1685,6 +1706,10 @@ protected:
|
|||
/// Return true if the value types that can be represented by the specified
|
||||
/// register class are all legal.
|
||||
bool isLegalRC(const TargetRegisterClass *RC) const;
|
||||
|
||||
/// Replace/modify any TargetFrameIndex operands with a targte-dependent
|
||||
/// sequence of memory operands that is recognized by PrologEpilogInserter.
|
||||
MachineBasicBlock *emitPatchPoint(MachineInstr *MI, MachineBasicBlock *MBB) const;
|
||||
};
|
||||
|
||||
/// This class defines information used to lower LLVM code to legal SelectionDAG
|
||||
|
@ -2078,6 +2103,18 @@ public:
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/// This callback is used to prepare for a volatile or atomic load.
|
||||
/// It takes a chain node as input and returns the chain for the load itself.
|
||||
///
|
||||
/// Having a callback like this is necessary for targets like SystemZ,
|
||||
/// which allows a CPU to reuse the result of a previous load indefinitely,
|
||||
/// even if a cache-coherent store is performed by another CPU. The default
|
||||
/// implementation does nothing.
|
||||
virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL,
|
||||
SelectionDAG &DAG) const {
|
||||
return Chain;
|
||||
}
|
||||
|
||||
/// This callback is invoked by the type legalizer to legalize nodes with an
|
||||
/// illegal operand type but legal result types. It replaces the
|
||||
/// LowerOperation callback in the type Legalizer. The reason we can not do
|
||||
|
|
|
@ -34,6 +34,7 @@ namespace llvm {
|
|||
|
||||
class TargetLoweringObjectFile : public MCObjectFileInfo {
|
||||
MCContext *Ctx;
|
||||
const DataLayout *DL;
|
||||
|
||||
TargetLoweringObjectFile(
|
||||
const TargetLoweringObjectFile&) LLVM_DELETED_FUNCTION;
|
||||
|
@ -42,7 +43,7 @@ class TargetLoweringObjectFile : public MCObjectFileInfo {
|
|||
public:
|
||||
MCContext &getContext() const { return *Ctx; }
|
||||
|
||||
TargetLoweringObjectFile() : MCObjectFileInfo(), Ctx(0) {}
|
||||
TargetLoweringObjectFile() : MCObjectFileInfo(), Ctx(0), DL(0) {}
|
||||
|
||||
virtual ~TargetLoweringObjectFile();
|
||||
|
||||
|
@ -121,6 +122,11 @@ public:
|
|||
/// main label that is the address of the global
|
||||
MCSymbol *getSymbol(Mangler &M, const GlobalValue *GV) const;
|
||||
|
||||
/// Return the MCSymbol for a private symbol with global value name as its
|
||||
/// base, with the specified suffix.
|
||||
MCSymbol *getSymbolWithGlobalValueBase(Mangler &M, const GlobalValue *GV,
|
||||
StringRef Suffix) const;
|
||||
|
||||
// getCFIPersonalitySymbol - The symbol that gets passed to .cfi_personality.
|
||||
virtual MCSymbol *
|
||||
getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
|
||||
|
|
|
@ -88,6 +88,7 @@ protected: // Can only create subclasses.
|
|||
unsigned MCUseLoc : 1;
|
||||
unsigned MCUseCFI : 1;
|
||||
unsigned MCUseDwarfDirectory : 1;
|
||||
unsigned RequireStructuredCFG : 1;
|
||||
|
||||
public:
|
||||
virtual ~TargetMachine();
|
||||
|
@ -108,7 +109,7 @@ public:
|
|||
void resetTargetOptions(const MachineFunction *MF) const;
|
||||
|
||||
// Interfaces to the major aspects of target machine information:
|
||||
//
|
||||
//
|
||||
// -- Instruction opcode and operand information
|
||||
// -- Pipelines and scheduling information
|
||||
// -- Stack frame information
|
||||
|
@ -156,6 +157,9 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
bool requiresStructuredCFG() const { return RequireStructuredCFG; }
|
||||
void setRequiresStructuredCFG(bool Value) { RequireStructuredCFG = Value; }
|
||||
|
||||
/// hasMCRelaxAll - Check whether all machine code instructions should be
|
||||
/// relaxed.
|
||||
bool hasMCRelaxAll() const { return MCRelaxAll; }
|
||||
|
|
|
@ -672,6 +672,17 @@ public:
|
|||
// Do nothing.
|
||||
}
|
||||
|
||||
/// Allow the target to reverse allocation order of local live ranges. This
|
||||
/// will generally allocate shorter local live ranges first. For targets with
|
||||
/// many registers, this could reduce regalloc compile time by a large
|
||||
/// factor. It should still achieve optimal coloring; however, it can change
|
||||
/// register eviction decisions. It is disabled by default for two reasons:
|
||||
/// (1) Top-down allocation is simpler and easier to debug for targets that
|
||||
/// don't benefit from reversing the order.
|
||||
/// (2) Bottom-up allocation could result in poor evicition decisions on some
|
||||
/// targets affecting the performance of compiled code.
|
||||
virtual bool reverseLocalAssignment() const { return false; }
|
||||
|
||||
/// requiresRegisterScavenging - returns true if the target requires (and can
|
||||
/// make use of) the register scavenger.
|
||||
virtual bool requiresRegisterScavenging(const MachineFunction &MF) const {
|
||||
|
|
|
@ -114,14 +114,46 @@ class ProcResourceKind;
|
|||
// resources implies using one of the super resoruces.
|
||||
//
|
||||
// ProcResourceUnits normally model a few buffered resources within an
|
||||
// out-of-order engine that the compiler attempts to conserve.
|
||||
// Buffered resources may be held for multiple clock cycles, but the
|
||||
// scheduler does not pin them to a particular clock cycle relative to
|
||||
// instruction dispatch. Setting BufferSize=0 changes this to an
|
||||
// in-order resource. In this case, the scheduler counts down from the
|
||||
// cycle that the instruction issues in-order, forcing an interlock
|
||||
// with subsequent instructions that require the same resource until
|
||||
// the number of ResourceCyles specified in WriteRes expire.
|
||||
// out-of-order engine. Buffered resources may be held for multiple
|
||||
// clock cycles, but the scheduler does not pin them to a particular
|
||||
// clock cycle relative to instruction dispatch. Setting BufferSize=0
|
||||
// changes this to an in-order issue/dispatch resource. In this case,
|
||||
// the scheduler counts down from the cycle that the instruction
|
||||
// issues in-order, forcing a stall whenever a subsequent instruction
|
||||
// requires the same resource until the number of ResourceCyles
|
||||
// specified in WriteRes expire. Setting BufferSize=1 changes this to
|
||||
// an in-order latency resource. In this case, the scheduler models
|
||||
// producer/consumer stalls between instructions that use the
|
||||
// resource.
|
||||
//
|
||||
// Examples (all assume an out-of-order engine):
|
||||
//
|
||||
// Use BufferSize = -1 for "issue ports" fed by a unified reservation
|
||||
// station. Here the size of the reservation station is modeled by
|
||||
// MicroOpBufferSize, which should be the minimum size of either the
|
||||
// register rename pool, unified reservation station, or reorder
|
||||
// buffer.
|
||||
//
|
||||
// Use BufferSize = 0 for resources that force "dispatch/issue
|
||||
// groups". (Different processors define dispath/issue
|
||||
// differently. Here we refer to stage between decoding into micro-ops
|
||||
// and moving them into a reservation station.) Normally NumMicroOps
|
||||
// is sufficient to limit dispatch/issue groups. However, some
|
||||
// processors can form groups of with only certain combinitions of
|
||||
// instruction types. e.g. POWER7.
|
||||
//
|
||||
// Use BufferSize = 1 for in-order execution units. This is used for
|
||||
// an in-order pipeline within an out-of-order core where scheduling
|
||||
// dependent operations back-to-back is guaranteed to cause a
|
||||
// bubble. e.g. Cortex-a9 floating-point.
|
||||
//
|
||||
// Use BufferSize > 1 for out-of-order executions units with a
|
||||
// separate reservation station. This simply models the size of the
|
||||
// reservation station.
|
||||
//
|
||||
// To model both dispatch/issue groups and in-order execution units,
|
||||
// create two types of units, one with BufferSize=0 and one with
|
||||
// BufferSize=1.
|
||||
//
|
||||
// SchedModel ties these units to a processor for any stand-alone defs
|
||||
// of this class. Instances of subclass ProcResource will be automatically
|
||||
|
|
|
@ -18,7 +18,10 @@
|
|||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/CaptureTracking.h"
|
||||
#include "llvm/Analysis/CFG.h"
|
||||
#include "llvm/Analysis/Dominators.h"
|
||||
#include "llvm/Analysis/InstructionSimplify.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/MemoryBuiltins.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
|
@ -38,6 +41,12 @@
|
|||
#include <algorithm>
|
||||
using namespace llvm;
|
||||
|
||||
/// Cutoff after which to stop analysing a set of phi nodes potentially involved
|
||||
/// in a cycle. Because we are analysing 'through' phi nodes we need to be
|
||||
/// careful with value equivalence. We use reachability to make sure a value
|
||||
/// cannot be involved in a cycle.
|
||||
const unsigned MaxNumPhiBBsValueReachabilityCheck = 20;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Useful predicates
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -403,42 +412,6 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
|
|||
return V;
|
||||
}
|
||||
|
||||
/// GetIndexDifference - Dest and Src are the variable indices from two
|
||||
/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
|
||||
/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
|
||||
/// difference between the two pointers.
|
||||
static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
|
||||
const SmallVectorImpl<VariableGEPIndex> &Src) {
|
||||
if (Src.empty()) return;
|
||||
|
||||
for (unsigned i = 0, e = Src.size(); i != e; ++i) {
|
||||
const Value *V = Src[i].V;
|
||||
ExtensionKind Extension = Src[i].Extension;
|
||||
int64_t Scale = Src[i].Scale;
|
||||
|
||||
// Find V in Dest. This is N^2, but pointer indices almost never have more
|
||||
// than a few variable indexes.
|
||||
for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
|
||||
if (Dest[j].V != V || Dest[j].Extension != Extension) continue;
|
||||
|
||||
// If we found it, subtract off Scale V's from the entry in Dest. If it
|
||||
// goes to zero, remove the entry.
|
||||
if (Dest[j].Scale != Scale)
|
||||
Dest[j].Scale -= Scale;
|
||||
else
|
||||
Dest.erase(Dest.begin()+j);
|
||||
Scale = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
// If we didn't consume this entry, add it to the end of the Dest list.
|
||||
if (Scale) {
|
||||
VariableGEPIndex Entry = { V, Extension, -Scale };
|
||||
Dest.push_back(Entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// BasicAliasAnalysis Pass
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -492,6 +465,7 @@ namespace {
|
|||
// SmallDenseMap if it ever grows larger.
|
||||
// FIXME: This should really be shrink_to_inline_capacity_and_clear().
|
||||
AliasCache.shrink_and_clear();
|
||||
VisitedPhiBBs.clear();
|
||||
return Alias;
|
||||
}
|
||||
|
||||
|
@ -532,9 +506,39 @@ namespace {
|
|||
typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy;
|
||||
AliasCacheTy AliasCache;
|
||||
|
||||
/// \brief Track phi nodes we have visited. When interpret "Value" pointer
|
||||
/// equality as value equality we need to make sure that the "Value" is not
|
||||
/// part of a cycle. Otherwise, two uses could come from different
|
||||
/// "iterations" of a cycle and see different values for the same "Value"
|
||||
/// pointer.
|
||||
/// The following example shows the problem:
|
||||
/// %p = phi(%alloca1, %addr2)
|
||||
/// %l = load %ptr
|
||||
/// %addr1 = gep, %alloca2, 0, %l
|
||||
/// %addr2 = gep %alloca2, 0, (%l + 1)
|
||||
/// alias(%p, %addr1) -> MayAlias !
|
||||
/// store %l, ...
|
||||
SmallPtrSet<const BasicBlock*, 8> VisitedPhiBBs;
|
||||
|
||||
// Visited - Track instructions visited by pointsToConstantMemory.
|
||||
SmallPtrSet<const Value*, 16> Visited;
|
||||
|
||||
/// \brief Check whether two Values can be considered equivalent.
|
||||
///
|
||||
/// In addition to pointer equivalence of \p V1 and \p V2 this checks
|
||||
/// whether they can not be part of a cycle in the value graph by looking at
|
||||
/// all visited phi nodes an making sure that the phis cannot reach the
|
||||
/// value. We have to do this because we are looking through phi nodes (That
|
||||
/// is we say noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB).
|
||||
bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2);
|
||||
|
||||
/// \brief Dest and Src are the variable indices from two decomposed
|
||||
/// GetElementPtr instructions GEP1 and GEP2 which have common base
|
||||
/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
|
||||
/// difference between the two pointers.
|
||||
void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
|
||||
const SmallVectorImpl<VariableGEPIndex> &Src);
|
||||
|
||||
// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
|
||||
// instruction against another.
|
||||
AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
|
||||
|
@ -1094,6 +1098,10 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
|
|||
const MDNode *PNTBAAInfo,
|
||||
const Value *V2, uint64_t V2Size,
|
||||
const MDNode *V2TBAAInfo) {
|
||||
// Track phi nodes we have visited. We use this information when we determine
|
||||
// value equivalence.
|
||||
VisitedPhiBBs.insert(PN->getParent());
|
||||
|
||||
// If the values are PHIs in the same block, we can do a more precise
|
||||
// as well as efficient check: just check for aliases between the values
|
||||
// on corresponding edges.
|
||||
|
@ -1187,7 +1195,13 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
|
|||
V2 = V2->stripPointerCasts();
|
||||
|
||||
// Are we checking for alias of the same value?
|
||||
if (V1 == V2) return MustAlias;
|
||||
// Because we look 'through' phi nodes we could look at "Value" pointers from
|
||||
// different iterations. We must therefore make sure that this is not the
|
||||
// case. The function isValueEqualInPotentialCycles ensures that this cannot
|
||||
// happen by looking at the visited phi nodes and making sure they cannot
|
||||
// reach the value.
|
||||
if (isValueEqualInPotentialCycles(V1, V2))
|
||||
return MustAlias;
|
||||
|
||||
if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy())
|
||||
return NoAlias; // Scalars cannot alias each other
|
||||
|
@ -1307,3 +1321,71 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
|
|||
Location(V2, V2Size, V2TBAAInfo));
|
||||
return AliasCache[Locs] = Result;
|
||||
}
|
||||
|
||||
bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
|
||||
const Value *V2) {
|
||||
if (V != V2)
|
||||
return false;
|
||||
|
||||
const Instruction *Inst = dyn_cast<Instruction>(V);
|
||||
if (!Inst)
|
||||
return true;
|
||||
|
||||
if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck)
|
||||
return false;
|
||||
|
||||
// Use dominance or loop info if available.
|
||||
DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
|
||||
LoopInfo *LI = getAnalysisIfAvailable<LoopInfo>();
|
||||
|
||||
// Make sure that the visited phis cannot reach the Value. This ensures that
|
||||
// the Values cannot come from different iterations of a potential cycle the
|
||||
// phi nodes could be involved in.
|
||||
for (SmallPtrSet<const BasicBlock *, 8>::iterator PI = VisitedPhiBBs.begin(),
|
||||
PE = VisitedPhiBBs.end();
|
||||
PI != PE; ++PI)
|
||||
if (isPotentiallyReachable((*PI)->begin(), Inst, DT, LI))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// GetIndexDifference - Dest and Src are the variable indices from two
|
||||
/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
|
||||
/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
|
||||
/// difference between the two pointers.
|
||||
void BasicAliasAnalysis::GetIndexDifference(
|
||||
SmallVectorImpl<VariableGEPIndex> &Dest,
|
||||
const SmallVectorImpl<VariableGEPIndex> &Src) {
|
||||
if (Src.empty())
|
||||
return;
|
||||
|
||||
for (unsigned i = 0, e = Src.size(); i != e; ++i) {
|
||||
const Value *V = Src[i].V;
|
||||
ExtensionKind Extension = Src[i].Extension;
|
||||
int64_t Scale = Src[i].Scale;
|
||||
|
||||
// Find V in Dest. This is N^2, but pointer indices almost never have more
|
||||
// than a few variable indexes.
|
||||
for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
|
||||
if (!isValueEqualInPotentialCycles(Dest[j].V, V) ||
|
||||
Dest[j].Extension != Extension)
|
||||
continue;
|
||||
|
||||
// If we found it, subtract off Scale V's from the entry in Dest. If it
|
||||
// goes to zero, remove the entry.
|
||||
if (Dest[j].Scale != Scale)
|
||||
Dest[j].Scale -= Scale;
|
||||
else
|
||||
Dest.erase(Dest.begin() + j);
|
||||
Scale = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
// If we didn't consume this entry, add it to the end of the Dest list.
|
||||
if (Scale) {
|
||||
VariableGEPIndex Entry = { V, Extension, -Scale };
|
||||
Dest.push_back(Entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -86,7 +86,7 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {
|
|||
OS << Node->getName().str() << ":";
|
||||
switch (ViewBlockFreqPropagationDAG) {
|
||||
case GVDT_Fraction:
|
||||
Graph->getBlockFreq(Node).print(OS);
|
||||
Graph->printBlockFreq(OS, Node);
|
||||
break;
|
||||
case GVDT_Integer:
|
||||
OS << Graph->getBlockFreq(Node).getFrequency();
|
||||
|
@ -159,3 +159,18 @@ void BlockFrequencyInfo::view() const {
|
|||
const Function *BlockFrequencyInfo::getFunction() const {
|
||||
return BFI->Fn;
|
||||
}
|
||||
|
||||
raw_ostream &BlockFrequencyInfo::
|
||||
printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const {
|
||||
return BFI->printBlockFreq(OS, Freq);
|
||||
}
|
||||
|
||||
raw_ostream &
|
||||
BlockFrequencyInfo::printBlockFreq(raw_ostream &OS,
|
||||
const BasicBlock *BB) const {
|
||||
return BFI->printBlockFreq(OS, BB);
|
||||
}
|
||||
|
||||
uint64_t BlockFrequencyInfo::getEntryFreq() const {
|
||||
return BFI->getEntryFreq();
|
||||
}
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "branch-prob"
|
||||
#include "llvm/Analysis/BranchProbabilityInfo.h"
|
||||
#include "llvm/ADT/PostOrderIterator.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
|
@ -483,6 +484,8 @@ void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
|
|||
}
|
||||
|
||||
bool BranchProbabilityInfo::runOnFunction(Function &F) {
|
||||
DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
|
||||
<< " ----\n\n");
|
||||
LastF = &F; // Store the last function we ran on for printing.
|
||||
LI = &getAnalysis<LoopInfo>();
|
||||
assert(PostDominatedByUnreachable.empty());
|
||||
|
@ -591,6 +594,13 @@ getEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors) const {
|
|||
return DEFAULT_WEIGHT;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
BranchProbabilityInfo::
|
||||
getEdgeWeight(const BasicBlock *Src, succ_const_iterator Dst) const {
|
||||
size_t index = std::distance(succ_begin(Src), Dst);
|
||||
return getEdgeWeight(Src, index);
|
||||
}
|
||||
|
||||
/// Get the raw edge weight calculated for the block pair. This returns the sum
|
||||
/// of all raw edge weights from Src to Dst.
|
||||
uint32_t BranchProbabilityInfo::
|
||||
|
|
|
@ -16,9 +16,37 @@
|
|||
#include "llvm/Support/raw_ostream.h"
|
||||
using namespace llvm;
|
||||
|
||||
CallGraph::CallGraph()
|
||||
: ModulePass(ID), Root(0), ExternalCallingNode(0), CallsExternalNode(0) {
|
||||
initializeCallGraphPass(*PassRegistry::getPassRegistry());
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Implementations of the CallGraph class methods.
|
||||
//
|
||||
|
||||
CallGraph::CallGraph(Module &M)
|
||||
: M(M), Root(0), ExternalCallingNode(getOrInsertFunction(0)),
|
||||
CallsExternalNode(new CallGraphNode(0)) {
|
||||
// Add every function to the call graph.
|
||||
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
|
||||
addToCallGraph(I);
|
||||
|
||||
// If we didn't find a main function, use the external call graph node
|
||||
if (Root == 0)
|
||||
Root = ExternalCallingNode;
|
||||
}
|
||||
|
||||
CallGraph::~CallGraph() {
|
||||
// CallsExternalNode is not in the function map, delete it explicitly.
|
||||
CallsExternalNode->allReferencesDropped();
|
||||
delete CallsExternalNode;
|
||||
|
||||
// Reset all node's use counts to zero before deleting them to prevent an
|
||||
// assertion from firing.
|
||||
#ifndef NDEBUG
|
||||
for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
|
||||
I != E; ++I)
|
||||
I->second->allReferencesDropped();
|
||||
#endif
|
||||
for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
|
||||
I != E; ++I)
|
||||
delete I->second;
|
||||
}
|
||||
|
||||
void CallGraph::addToCallGraph(Function *F) {
|
||||
|
@ -62,59 +90,7 @@ void CallGraph::addToCallGraph(Function *F) {
|
|||
}
|
||||
}
|
||||
|
||||
void CallGraph::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesAll();
|
||||
}
|
||||
|
||||
bool CallGraph::runOnModule(Module &M) {
|
||||
Mod = &M;
|
||||
|
||||
ExternalCallingNode = getOrInsertFunction(0);
|
||||
assert(!CallsExternalNode);
|
||||
CallsExternalNode = new CallGraphNode(0);
|
||||
Root = 0;
|
||||
|
||||
// Add every function to the call graph.
|
||||
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
|
||||
addToCallGraph(I);
|
||||
|
||||
// If we didn't find a main function, use the external call graph node
|
||||
if (Root == 0)
|
||||
Root = ExternalCallingNode;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
INITIALIZE_PASS(CallGraph, "basiccg", "CallGraph Construction", false, true)
|
||||
|
||||
char CallGraph::ID = 0;
|
||||
|
||||
void CallGraph::releaseMemory() {
|
||||
/// CallsExternalNode is not in the function map, delete it explicitly.
|
||||
if (CallsExternalNode) {
|
||||
CallsExternalNode->allReferencesDropped();
|
||||
delete CallsExternalNode;
|
||||
CallsExternalNode = 0;
|
||||
}
|
||||
|
||||
if (FunctionMap.empty())
|
||||
return;
|
||||
|
||||
// Reset all node's use counts to zero before deleting them to prevent an
|
||||
// assertion from firing.
|
||||
#ifndef NDEBUG
|
||||
for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
|
||||
I != E; ++I)
|
||||
I->second->allReferencesDropped();
|
||||
#endif
|
||||
|
||||
for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
|
||||
I != E; ++I)
|
||||
delete I->second;
|
||||
FunctionMap.clear();
|
||||
}
|
||||
|
||||
void CallGraph::print(raw_ostream &OS, const Module*) const {
|
||||
void CallGraph::print(raw_ostream &OS) const {
|
||||
OS << "CallGraph Root is: ";
|
||||
if (Function *F = Root->getFunction())
|
||||
OS << F->getName() << "\n";
|
||||
|
@ -125,15 +101,10 @@ void CallGraph::print(raw_ostream &OS, const Module*) const {
|
|||
for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I)
|
||||
I->second->print(OS);
|
||||
}
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
void CallGraph::dump() const {
|
||||
print(dbgs(), 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Implementations of public modification methods
|
||||
//
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
void CallGraph::dump() const { print(dbgs()); }
|
||||
#endif
|
||||
|
||||
// removeFunctionFromModule - Unlink the function from this module, returning
|
||||
// it. Because this removes the function from the module, the call graph node
|
||||
|
@ -148,7 +119,7 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
|
|||
delete CGN; // Delete the call graph node for this func
|
||||
FunctionMap.erase(F); // Remove the call graph node from the map
|
||||
|
||||
Mod->getFunctionList().remove(F);
|
||||
M.getFunctionList().remove(F);
|
||||
return F;
|
||||
}
|
||||
|
||||
|
@ -172,12 +143,17 @@ void CallGraph::spliceFunction(const Function *From, const Function *To) {
|
|||
// not already exist.
|
||||
CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) {
|
||||
CallGraphNode *&CGN = FunctionMap[F];
|
||||
if (CGN) return CGN;
|
||||
|
||||
assert((!F || F->getParent() == Mod) && "Function not in current module!");
|
||||
if (CGN)
|
||||
return CGN;
|
||||
|
||||
assert((!F || F->getParent() == &M) && "Function not in current module!");
|
||||
return CGN = new CallGraphNode(const_cast<Function*>(F));
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Implementations of the CallGraphNode class methods.
|
||||
//
|
||||
|
||||
void CallGraphNode::print(raw_ostream &OS) const {
|
||||
if (Function *F = getFunction())
|
||||
OS << "Call graph node for function: '" << F->getName() << "'";
|
||||
|
@ -260,5 +236,46 @@ void CallGraphNode::replaceCallEdge(CallSite CS,
|
|||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Implementations of the CallGraphWrapperPass class methods.
|
||||
//
|
||||
|
||||
CallGraphWrapperPass::CallGraphWrapperPass() : ModulePass(ID) {
|
||||
initializeCallGraphWrapperPassPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
CallGraphWrapperPass::~CallGraphWrapperPass() {}
|
||||
|
||||
void CallGraphWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesAll();
|
||||
}
|
||||
|
||||
bool CallGraphWrapperPass::runOnModule(Module &M) {
|
||||
// All the real work is done in the constructor for the CallGraph.
|
||||
G.reset(new CallGraph(M));
|
||||
return false;
|
||||
}
|
||||
|
||||
INITIALIZE_PASS(CallGraphWrapperPass, "basiccg", "CallGraph Construction",
|
||||
false, true)
|
||||
|
||||
char CallGraphWrapperPass::ID = 0;
|
||||
|
||||
void CallGraphWrapperPass::releaseMemory() { G.reset(0); }
|
||||
|
||||
void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const {
|
||||
if (!G) {
|
||||
OS << "No call graph has been built!\n";
|
||||
return;
|
||||
}
|
||||
|
||||
// Just delegate.
|
||||
G->print(OS);
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
void CallGraphWrapperPass::dump() const { print(dbgs(), 0); }
|
||||
#endif
|
||||
|
||||
// Enuse that users of CallGraph.h also link with this file
|
||||
DEFINING_FILE_FOR(CallGraph)
|
||||
|
|
|
@ -60,7 +60,7 @@ public:
|
|||
/// Pass Manager itself does not invalidate any analysis info.
|
||||
void getAnalysisUsage(AnalysisUsage &Info) const {
|
||||
// CGPassManager walks SCC and it needs CallGraph.
|
||||
Info.addRequired<CallGraph>();
|
||||
Info.addRequired<CallGraphWrapperPass>();
|
||||
Info.setPreservesAll();
|
||||
}
|
||||
|
||||
|
@ -424,7 +424,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
|
|||
/// run - Execute all of the passes scheduled for execution. Keep track of
|
||||
/// whether any of the passes modifies the module, and if so, return true.
|
||||
bool CGPassManager::runOnModule(Module &M) {
|
||||
CallGraph &CG = getAnalysis<CallGraph>();
|
||||
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
|
||||
bool Changed = doInitialization(CG);
|
||||
|
||||
// Walk the callgraph in bottom-up SCC order.
|
||||
|
@ -570,8 +570,8 @@ void CallGraphSCCPass::assignPassManager(PMStack &PMS,
|
|||
/// the call graph. If the derived class implements this method, it should
|
||||
/// always explicitly call the implementation here.
|
||||
void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequired<CallGraph>();
|
||||
AU.addPreserved<CallGraph>();
|
||||
AU.addRequired<CallGraphWrapperPass>();
|
||||
AU.addPreserved<CallGraphWrapperPass>();
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -22,13 +22,10 @@ using namespace llvm;
|
|||
|
||||
namespace llvm {
|
||||
|
||||
template<>
|
||||
struct DOTGraphTraits<CallGraph*> : public DefaultDOTGraphTraits {
|
||||
DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
|
||||
template <> struct DOTGraphTraits<CallGraph *> : public DefaultDOTGraphTraits {
|
||||
DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
|
||||
|
||||
static std::string getGraphName(CallGraph *Graph) {
|
||||
return "Call graph";
|
||||
}
|
||||
static std::string getGraphName(CallGraph *Graph) { return "Call graph"; }
|
||||
|
||||
std::string getNodeLabel(CallGraphNode *Node, CallGraph *Graph) {
|
||||
if (Function *Func = Node->getFunction())
|
||||
|
@ -38,49 +35,57 @@ struct DOTGraphTraits<CallGraph*> : public DefaultDOTGraphTraits {
|
|||
}
|
||||
};
|
||||
|
||||
struct AnalysisCallGraphWrapperPassTraits {
|
||||
static CallGraph *getGraph(CallGraphWrapperPass *P) {
|
||||
return &P->getCallGraph();
|
||||
}
|
||||
};
|
||||
|
||||
} // end llvm namespace
|
||||
|
||||
namespace {
|
||||
|
||||
struct CallGraphViewer
|
||||
: public DOTGraphTraitsModuleViewer<CallGraph, true> {
|
||||
: public DOTGraphTraitsModuleViewer<CallGraphWrapperPass, true, CallGraph *,
|
||||
AnalysisCallGraphWrapperPassTraits> {
|
||||
static char ID;
|
||||
|
||||
CallGraphViewer()
|
||||
: DOTGraphTraitsModuleViewer<CallGraph, true>("callgraph", ID) {
|
||||
: DOTGraphTraitsModuleViewer<CallGraphWrapperPass, true, CallGraph *,
|
||||
AnalysisCallGraphWrapperPassTraits>(
|
||||
"callgraph", ID) {
|
||||
initializeCallGraphViewerPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
};
|
||||
|
||||
struct CallGraphPrinter
|
||||
: public DOTGraphTraitsModulePrinter<CallGraph, true> {
|
||||
struct CallGraphPrinter : public DOTGraphTraitsModulePrinter<
|
||||
CallGraphWrapperPass, true, CallGraph *,
|
||||
AnalysisCallGraphWrapperPassTraits> {
|
||||
static char ID;
|
||||
|
||||
CallGraphPrinter()
|
||||
: DOTGraphTraitsModulePrinter<CallGraph, true>("callgraph", ID) {
|
||||
initializeCallGraphPrinterPass(*PassRegistry::getPassRegistry());
|
||||
: DOTGraphTraitsModulePrinter<CallGraphWrapperPass, true, CallGraph *,
|
||||
AnalysisCallGraphWrapperPassTraits>(
|
||||
"callgraph", ID) {
|
||||
initializeCallGraphPrinterPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
char CallGraphViewer::ID = 0;
|
||||
INITIALIZE_PASS(CallGraphViewer, "view-callgraph",
|
||||
"View call graph",
|
||||
false, false)
|
||||
INITIALIZE_PASS(CallGraphViewer, "view-callgraph", "View call graph", false,
|
||||
false)
|
||||
|
||||
char CallGraphPrinter::ID = 0;
|
||||
INITIALIZE_PASS(CallGraphPrinter, "dot-callgraph",
|
||||
"Print call graph to 'dot' file",
|
||||
false, false)
|
||||
"Print call graph to 'dot' file", false, false)
|
||||
|
||||
// Create methods available outside of this file, to use them
|
||||
// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
|
||||
// the link time optimization.
|
||||
|
||||
ModulePass *llvm::createCallGraphViewerPass() {
|
||||
return new CallGraphViewer();
|
||||
}
|
||||
ModulePass *llvm::createCallGraphViewerPass() { return new CallGraphViewer(); }
|
||||
|
||||
ModulePass *llvm::createCallGraphPrinterPass() {
|
||||
return new CallGraphPrinter();
|
||||
|
|
|
@ -95,15 +95,19 @@ namespace {
|
|||
}
|
||||
|
||||
bool runOnModule(Module &M) {
|
||||
InitializeAliasAnalysis(this); // set up super class
|
||||
AnalyzeGlobals(M); // find non-addr taken globals
|
||||
AnalyzeCallGraph(getAnalysis<CallGraph>(), M); // Propagate on CG
|
||||
InitializeAliasAnalysis(this);
|
||||
|
||||
// Find non-addr taken globals.
|
||||
AnalyzeGlobals(M);
|
||||
|
||||
// Propagate on CG.
|
||||
AnalyzeCallGraph(getAnalysis<CallGraphWrapperPass>().getCallGraph(), M);
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AliasAnalysis::getAnalysisUsage(AU);
|
||||
AU.addRequired<CallGraph>();
|
||||
AU.addRequired<CallGraphWrapperPass>();
|
||||
AU.setPreservesAll(); // Does not transform code
|
||||
}
|
||||
|
||||
|
@ -189,7 +193,7 @@ char GlobalsModRef::ID = 0;
|
|||
INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis,
|
||||
"globalsmodref-aa", "Simple mod/ref analysis for globals",
|
||||
false, true, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(CallGraph)
|
||||
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
|
||||
INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis,
|
||||
"globalsmodref-aa", "Simple mod/ref analysis for globals",
|
||||
false, true, false)
|
||||
|
|
|
@ -19,7 +19,7 @@ using namespace llvm;
|
|||
|
||||
/// initializeIPA - Initialize all passes linked into the IPA library.
|
||||
void llvm::initializeIPA(PassRegistry &Registry) {
|
||||
initializeCallGraphPass(Registry);
|
||||
initializeCallGraphWrapperPassPass(Registry);
|
||||
initializeCallGraphPrinterPass(Registry);
|
||||
initializeCallGraphViewerPass(Registry);
|
||||
initializeFindUsedTypesPass(Registry);
|
||||
|
|
|
@ -59,6 +59,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
|
|||
bool ExposesReturnsTwice;
|
||||
bool HasDynamicAlloca;
|
||||
bool ContainsNoDuplicateCall;
|
||||
bool HasReturn;
|
||||
bool HasIndirectBr;
|
||||
|
||||
/// Number of bytes allocated statically by the callee.
|
||||
uint64_t AllocatedSize;
|
||||
|
@ -132,6 +134,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
|
|||
bool visitExtractValue(ExtractValueInst &I);
|
||||
bool visitInsertValue(InsertValueInst &I);
|
||||
bool visitCallSite(CallSite CS);
|
||||
bool visitReturnInst(ReturnInst &RI);
|
||||
bool visitBranchInst(BranchInst &BI);
|
||||
bool visitSwitchInst(SwitchInst &SI);
|
||||
bool visitIndirectBrInst(IndirectBrInst &IBI);
|
||||
bool visitResumeInst(ResumeInst &RI);
|
||||
bool visitUnreachableInst(UnreachableInst &I);
|
||||
|
||||
public:
|
||||
CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI,
|
||||
|
@ -139,12 +147,13 @@ public:
|
|||
: TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),
|
||||
IsCallerRecursive(false), IsRecursiveCall(false),
|
||||
ExposesReturnsTwice(false), HasDynamicAlloca(false),
|
||||
ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0),
|
||||
NumVectorInstructions(0), FiftyPercentVectorBonus(0),
|
||||
TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
|
||||
NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
|
||||
NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
|
||||
SROACostSavings(0), SROACostSavingsLost(0) {}
|
||||
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
|
||||
AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
|
||||
FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
|
||||
NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
|
||||
NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
|
||||
NumInstructionsSimplified(0), SROACostSavings(0),
|
||||
SROACostSavingsLost(0) {}
|
||||
|
||||
bool analyzeCall(CallSite CS);
|
||||
|
||||
|
@ -704,7 +713,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
|
|||
}
|
||||
|
||||
bool CallAnalyzer::visitCallSite(CallSite CS) {
|
||||
if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() &&
|
||||
if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
|
||||
!F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
|
||||
Attribute::ReturnsTwice)) {
|
||||
// This aborts the entire analysis.
|
||||
|
@ -785,6 +794,60 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
|
|||
return Base::visitCallSite(CS);
|
||||
}
|
||||
|
||||
bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
|
||||
// At least one return instruction will be free after inlining.
|
||||
bool Free = !HasReturn;
|
||||
HasReturn = true;
|
||||
return Free;
|
||||
}
|
||||
|
||||
bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
|
||||
// We model unconditional branches as essentially free -- they really
|
||||
// shouldn't exist at all, but handling them makes the behavior of the
|
||||
// inliner more regular and predictable. Interestingly, conditional branches
|
||||
// which will fold away are also free.
|
||||
return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
|
||||
dyn_cast_or_null<ConstantInt>(
|
||||
SimplifiedValues.lookup(BI.getCondition()));
|
||||
}
|
||||
|
||||
bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
|
||||
// We model unconditional switches as free, see the comments on handling
|
||||
// branches.
|
||||
return isa<ConstantInt>(SI.getCondition()) ||
|
||||
dyn_cast_or_null<ConstantInt>(
|
||||
SimplifiedValues.lookup(SI.getCondition()));
|
||||
}
|
||||
|
||||
bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
|
||||
// We never want to inline functions that contain an indirectbr. This is
|
||||
// incorrect because all the blockaddress's (in static global initializers
|
||||
// for example) would be referring to the original function, and this
|
||||
// indirect jump would jump from the inlined copy of the function into the
|
||||
// original function which is extremely undefined behavior.
|
||||
// FIXME: This logic isn't really right; we can safely inline functions with
|
||||
// indirectbr's as long as no other function or global references the
|
||||
// blockaddress of a block within the current function. And as a QOI issue,
|
||||
// if someone is using a blockaddress without an indirectbr, and that
|
||||
// reference somehow ends up in another function or global, we probably don't
|
||||
// want to inline this function.
|
||||
HasIndirectBr = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
|
||||
// FIXME: It's not clear that a single instruction is an accurate model for
|
||||
// the inline cost of a resume instruction.
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
|
||||
// FIXME: It might be reasonably to discount the cost of instructions leading
|
||||
// to unreachable as they have the lowest possible impact on both runtime and
|
||||
// code size.
|
||||
return true; // No actual code is needed for unreachable.
|
||||
}
|
||||
|
||||
bool CallAnalyzer::visitInstruction(Instruction &I) {
|
||||
// Some instructions are free. All of the free intrinsics can also be
|
||||
// handled by SROA, etc.
|
||||
|
@ -808,8 +871,7 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
|
|||
/// construct has been detected. It returns false if inlining is no longer
|
||||
/// viable, and true if inlining remains viable.
|
||||
bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
|
||||
for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end());
|
||||
I != E; ++I) {
|
||||
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
|
||||
++NumInstructions;
|
||||
if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
|
||||
++NumVectorInstructions;
|
||||
|
@ -825,7 +887,8 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
|
|||
Cost += InlineConstants::InstrCost;
|
||||
|
||||
// If the visit this instruction detected an uninlinable pattern, abort.
|
||||
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
|
||||
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
|
||||
HasIndirectBr)
|
||||
return false;
|
||||
|
||||
// If the caller is a recursive function then we don't want to inline
|
||||
|
@ -989,10 +1052,6 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
|
|||
}
|
||||
}
|
||||
|
||||
// Track whether we've seen a return instruction. The first return
|
||||
// instruction is free, as at least one will usually disappear in inlining.
|
||||
bool HasReturn = false;
|
||||
|
||||
// Populate our simplified values by mapping from function arguments to call
|
||||
// arguments with known important simplifications.
|
||||
CallSite::arg_iterator CAI = CS.arg_begin();
|
||||
|
@ -1039,33 +1098,11 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
|
|||
if (BB->empty())
|
||||
continue;
|
||||
|
||||
// Handle the terminator cost here where we can track returns and other
|
||||
// function-wide constructs.
|
||||
TerminatorInst *TI = BB->getTerminator();
|
||||
|
||||
// We never want to inline functions that contain an indirectbr. This is
|
||||
// incorrect because all the blockaddress's (in static global initializers
|
||||
// for example) would be referring to the original function, and this
|
||||
// indirect jump would jump from the inlined copy of the function into the
|
||||
// original function which is extremely undefined behavior.
|
||||
// FIXME: This logic isn't really right; we can safely inline functions
|
||||
// with indirectbr's as long as no other function or global references the
|
||||
// blockaddress of a block within the current function. And as a QOI issue,
|
||||
// if someone is using a blockaddress without an indirectbr, and that
|
||||
// reference somehow ends up in another function or global, we probably
|
||||
// don't want to inline this function.
|
||||
if (isa<IndirectBrInst>(TI))
|
||||
return false;
|
||||
|
||||
if (!HasReturn && isa<ReturnInst>(TI))
|
||||
HasReturn = true;
|
||||
else
|
||||
Cost += InlineConstants::InstrCost;
|
||||
|
||||
// Analyze the cost of this block. If we blow through the threshold, this
|
||||
// returns false, and we can bail on out.
|
||||
if (!analyzeBlock(BB)) {
|
||||
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
|
||||
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
|
||||
HasIndirectBr)
|
||||
return false;
|
||||
|
||||
// If the caller is a recursive function then we don't want to inline
|
||||
|
@ -1078,6 +1115,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
|
|||
break;
|
||||
}
|
||||
|
||||
TerminatorInst *TI = BB->getTerminator();
|
||||
|
||||
// Add in the live successors by first checking whether we have terminator
|
||||
// that may be simplified based on the values simplified by this call.
|
||||
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
|
||||
|
@ -1115,7 +1154,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
|
|||
}
|
||||
}
|
||||
|
||||
// If this is a noduplicate call, we can still inline as long as
|
||||
// If this is a noduplicate call, we can still inline as long as
|
||||
// inlining this would cause the removal of the caller (so the instruction
|
||||
// is not actually duplicated, just moved).
|
||||
if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
|
||||
|
|
|
@ -68,7 +68,7 @@ namespace {
|
|||
return InstTypePair(dep.getInst(), Def);
|
||||
if (dep.isNonFuncLocal())
|
||||
return InstTypePair(dep.getInst(), NonFuncLocal);
|
||||
assert(dep.isUnknown() && "unexptected dependence type");
|
||||
assert(dep.isUnknown() && "unexpected dependence type");
|
||||
return InstTypePair(dep.getInst(), Unknown);
|
||||
}
|
||||
static InstTypePair getInstTypePair(const Instruction* inst, DepType type) {
|
||||
|
|
|
@ -399,12 +399,14 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *DL,
|
|||
LLVMContext &Context,
|
||||
bool RoundToAlign)
|
||||
: DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) {
|
||||
IntegerType *IntTy = DL->getIntPtrType(Context);
|
||||
IntTyBits = IntTy->getBitWidth();
|
||||
Zero = APInt::getNullValue(IntTyBits);
|
||||
// Pointer size must be rechecked for each object visited since it could have
|
||||
// a different address space.
|
||||
}
|
||||
|
||||
SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
|
||||
IntTyBits = DL->getPointerTypeSizeInBits(V->getType());
|
||||
Zero = APInt::getNullValue(IntTyBits);
|
||||
|
||||
V = V->stripPointerCasts();
|
||||
if (Instruction *I = dyn_cast<Instruction>(V)) {
|
||||
// If we have already seen this instruction, bail out. Cycles can happen in
|
||||
|
@ -592,11 +594,15 @@ ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *DL,
|
|||
bool RoundToAlign)
|
||||
: DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)),
|
||||
RoundToAlign(RoundToAlign) {
|
||||
IntTy = DL->getIntPtrType(Context);
|
||||
Zero = ConstantInt::get(IntTy, 0);
|
||||
// IntTy and Zero must be set for each compute() since the address space may
|
||||
// be different for later objects.
|
||||
}
|
||||
|
||||
SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
|
||||
// XXX - Are vectors of pointers possible here?
|
||||
IntTy = cast<IntegerType>(DL->getIntPtrType(V->getType()));
|
||||
Zero = ConstantInt::get(IntTy, 0);
|
||||
|
||||
SizeOffsetEvalType Result = compute_(V);
|
||||
|
||||
if (!bothKnown(Result)) {
|
||||
|
|
|
@ -72,7 +72,7 @@ static bool VerifySubExpr(Value *Expr,
|
|||
// If it isn't in the InstInputs list it is a subexpr incorporated into the
|
||||
// address. Sanity check that it is phi translatable.
|
||||
if (!CanPHITrans(I)) {
|
||||
errs() << "Non phi translatable instruction found in PHITransAddr:\n";
|
||||
errs() << "Instruction in PHITransAddr is not phi-translatable:\n";
|
||||
errs() << *I << '\n';
|
||||
llvm_unreachable("Either something is missing from InstInputs or "
|
||||
"CanPHITrans is wrong.");
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue