Import LLVM 3.5svn r198450.

This commit is contained in:
joerg 2014-01-05 16:12:50 +00:00
parent d149ea39d0
commit fdaf75aa90
1656 changed files with 57713 additions and 16014 deletions

View File

@ -31,14 +31,14 @@ dnl===
dnl===-----------------------------------------------------------------------===
dnl Initialize autoconf and define the package name, version number and
dnl address for reporting bugs.
AC_INIT([LLVM],[3.4],[http://llvm.org/bugs/])
AC_INIT([LLVM],[3.5svn],[http://llvm.org/bugs/])
AC_DEFINE([LLVM_VERSION_MAJOR], [3], [Major version of the LLVM API])
AC_DEFINE([LLVM_VERSION_MINOR], [4], [Minor version of the LLVM API])
AC_DEFINE([LLVM_VERSION_MINOR], [5], [Minor version of the LLVM API])
dnl Provide a copyright substitution and ensure the copyright notice is included
dnl in the output of --version option of the generated configure script.
AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign."])
AC_COPYRIGHT([Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign.])
AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2014 University of Illinois at Urbana-Champaign."])
AC_COPYRIGHT([Copyright (c) 2003-2014 University of Illinois at Urbana-Champaign.])
dnl Indicate that we require autoconf 2.60 or later.
AC_PREREQ(2.60)

View File

@ -37,7 +37,7 @@ OcamlDir := $(LibDir)/ocaml
# Info from llvm-config and similar
ifndef IS_CLEANING_TARGET
ifdef UsedComponents
UsedLibs = $(shell $(LLVM_CONFIG) --libs $(UsedComponents))
UsedLibs = $(shell $(LLVM_CONFIG) --libs --system-libs $(UsedComponents))
UsedLibNames = $(shell $(LLVM_CONFIG) --libnames $(UsedComponents))
endif
endif

View File

@ -12,7 +12,7 @@
This interface provides an OCaml API for LLVM scalar transforms, the
classes in the [LLVMScalarOpts] library. *)
(** See the [llvm::createConstantPropogationPass] function. *)
(** See the [llvm::createConstantPropagationPass] function. *)
external add_constant_propagation : [<Llvm.PassManager.any] Llvm.PassManager.t
-> unit
= "llvm_add_constant_propagation"

View File

@ -16,9 +16,9 @@ class TestDisassembler(TestBase):
self.assertEqual(count, 3)
self.assertEqual(s, '\tjcxz\t-127')
def test_nonexistant_triple(self):
def test_nonexistent_triple(self):
with self.assertRaisesRegexp(Exception, "Could not obtain disassembler for triple"):
Disassembler("nonexistant-triple-raises")
Disassembler("nonexistent-triple-raises")
def test_get_instructions(self):
sequence = '\x67\xe3\x81\x01\xc7' # jcxz -127; addl %eax, %edi

View File

@ -211,6 +211,9 @@ LLVM-specific variables
**LLVM_ENABLE_THREADS**:BOOL
Build with threads support, if available. Defaults to ON.
**LLVM_ENABLE_CXX11**:BOOL
Build in C++11 mode, if available. Defaults to OFF.
**LLVM_ENABLE_ASSERTIONS**:BOOL
Enables code assertions. Defaults to OFF if and only if ``CMAKE_BUILD_TYPE``
is *Release*.

View File

@ -844,7 +844,7 @@ Here are more examples:
.. code-block:: c++
assert(Ty->isPointerType() && "Can't allocate a non pointer type!");
assert(Ty->isPointerType() && "Can't allocate a non-pointer type!");
assert((Opcode == Shl || Opcode == Shr) && "ShiftInst Opcode invalid!");

View File

@ -22,7 +22,6 @@ Basic Commands
llvm-link
llvm-ar
llvm-nm
llvm-prof
llvm-config
llvm-diff
llvm-cov

View File

@ -10,7 +10,9 @@ DESCRIPTION
-----------
:program:`llvm-symbolizer` reads object file names and addresses from standard
input and prints corresponding source code locations to standard output. This
input and prints corresponding source code locations to standard output.
If object file is specified in command line, :program:`llvm-symbolizer` reads
only addresses from standard input. This
program uses debug info sections and symbol table in the object files.
EXAMPLE
@ -45,10 +47,22 @@ EXAMPLE
_main
/tmp/source_x86_64.cc:8
$ cat addr2.txt
0x4004f4
0x401000
$ llvm-symbolizer -obj=a.out < addr2.txt
main
/tmp/a.cc:4
foo(int)
/tmp/a.cc:12
OPTIONS
-------
.. option:: -obj
Path to object file to be symbolized.
.. option:: -functions
Print function names as well as source file/line locations. Defaults to true.

View File

@ -1276,7 +1276,7 @@ The ``cl::getRegisteredOptions`` function
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The ``cl::getRegisteredOptions`` function is designed to give a programmer
access to declared non positional command line options so that how they appear
access to declared non-positional command line options so that how they appear
in ``-help`` can be modified prior to calling `cl::ParseCommandLineOptions`_.
Note this method should not be called during any static initialisation because
it cannot be guaranteed that all options will have been initialised. Hence it

View File

@ -80,8 +80,9 @@ R600
SPARC
-----
* `SPARC resources <http://www.sparc.org/resource.htm>`_
* `SPARC standards <http://www.sparc.org/standards.html>`_
* `SPARC standards <http://sparc.org/standards>`_
* `SPARC V9 ABI <http://sparc.org/standards/64.psabi.1.35.ps.Z>`_
* `SPARC V8 ABI <http://sparc.org/standards/psABI3rd.pdf>`_
SystemZ
-------

View File

@ -37,7 +37,7 @@ X86/COFF-Dependent
Relocations
^^^^^^^^^^^
The following additional relocation type is supported:
The following additional relocation types are supported:
**@IMGREL** (AT&T syntax only) generates an image-relative relocation that
corresponds to the COFF relocation types ``IMAGE_REL_I386_DIR32NB`` (32-bit) or
@ -54,6 +54,22 @@ corresponds to the COFF relocation types ``IMAGE_REL_I386_DIR32NB`` (32-bit) or
.long (fun@imgrel + 0x3F)
.long $unwind$fun@imgrel
**.secrel32** generates a relocation that corresponds to the COFF relocation
types ``IMAGE_REL_I386_SECREL`` (32-bit) or ``IMAGE_REL_AMD64_SECREL`` (64-bit).
**.secidx** relocation generates an index of the section that contains
the target. It corresponds to the COFF relocation types
``IMAGE_REL_I386_SECTION`` (32-bit) or ``IMAGE_REL_AMD64_SECTION`` (64-bit).
.. code-block:: gas
.section .debug$S,"rn"
.long 4
.long 242
.long 40
.secrel32 _function_name
.secidx _function_name
...
``.linkonce`` Directive
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -127,7 +143,7 @@ MC supports passing the information in ``.linkonce`` at the end of
Symbol1:
.long 1
Note that in the combined form the COMDAT symbol is explict. This
Note that in the combined form the COMDAT symbol is explicit. This
extension exits to support multiple sections with the same name in
different comdats:

View File

@ -238,6 +238,8 @@ when qualifying the build of ``llvm``, ``clang``, and ``dragonegg``.
+--------------+---------------+----------------------+
| x86-64 | FreeBSD | gcc 4.2.X |
+--------------+---------------+----------------------+
| ARMv7 | Linux | gcc 4.6.X, gcc 4.7.X |
+--------------+---------------+----------------------+
Release Qualification Criteria
------------------------------
@ -298,6 +300,10 @@ Specific Target Qualification Details
| | | | clang regression tests, |
| | | | test-suite |
+--------------+-------------+----------------+-----------------------------+
| ARMv7A | Linux | last release | llvm regression tests, |
| | | | clang regression tests, |
| | | | test-suite |
+--------------+-------------+----------------+-----------------------------+
Community Testing
-----------------

View File

@ -0,0 +1,140 @@
==========================================
Design and Usage of the InAlloca Attribute
==========================================
Introduction
============
.. Warning:: This feature is unstable and not fully implemented.
The :ref:`attr_inalloca` attribute is designed to allow taking the
address of an aggregate argument that is being passed by value through
memory. Primarily, this feature is required for compatibility with the
Microsoft C++ ABI. Under that ABI, class instances that are passed by
value are constructed directly into argument stack memory. Prior to the
addition of inalloca, calls in LLVM were indivisible instructions.
There was no way to perform intermediate work, such as object
construction, between the first stack adjustment and the final control
transfer. With inalloca, each argument is modelled as an alloca, which
can be stored to independently of the call. Unfortunately, this
complicated feature comes with a large set of restrictions designed to
bound the lifetime of the argument memory around the call, which are
explained in this document.
For now, it is recommended that frontends and optimizers avoid producing
this construct, primarily because it forces the use of a base pointer.
This feature may grow in the future to allow general mid-level
optimization, but for now, it should be regarded as less efficient than
passing by value with a copy.
Intended Usage
==============
In the example below, ``f`` is attempting to pass a default-constructed
``Foo`` object to ``g`` by value.
.. code-block:: llvm
%Foo = type { i32, i32 }
declare void @Foo_ctor(%Foo* %this)
declare void @g(%Foo* inalloca %arg)
define void @f() {
...
bb1:
%base = call i8* @llvm.stacksave()
%arg = alloca %Foo
invoke void @Foo_ctor(%Foo* %arg)
to label %invoke.cont unwind %invoke.unwind
invoke.cont:
call void @g(%Foo* inalloca %arg)
call void @llvm.stackrestore(i8* %base)
...
invoke.unwind:
call void @llvm.stackrestore(i8* %base)
...
}
The alloca in this example is dynamic, meaning it is not in the entry
block, and it can be executed more than once. Due to the restrictions
against allocas between an alloca used with inalloca and its associated
call site, all allocas used with inalloca are considered dynamic.
To avoid any stack leakage, the frontend saves the current stack pointer
with a call to :ref:`llvm.stacksave <int_stacksave>`. Then, it
allocates the argument stack space with alloca and calls the default
constructor. One important consideration is that the default
constructor could throw an exception, so the frontend has to create a
landing pad. At this point, if there were any other inalloca arguments,
the frontend would have to destruct them before restoring the stack
pointer. If the constructor does not unwind, ``g`` is called, and then
the stack is restored.
Design Considerations
=====================
Lifetime
--------
The biggest design consideration for this feature is object lifetime.
We cannot model the arguments as static allocas in the entry block,
because all calls need to use the memory that is at the end of the call
frame to pass arguments. We cannot vend pointers to that memory at
function entry because after code generation they will alias. In the
current design, the rule against allocas between the inalloca alloca
values and the call site avoids this problem, but it creates a cleanup
problem. Cleanup and lifetime is handled explicitly with stack save and
restore calls. In the future, we may be able to avoid this by using
:ref:`llvm.lifetime.start <int_lifestart>` and :ref:`llvm.lifetime.end
<int_lifeend>` instead.
Nested Calls and Copy Elision
-----------------------------
The next consideration is the ability for the frontend to perform copy
elision in the face of nested calls. Consider the evaluation of
``foo(foo(Bar()))``, where ``foo`` takes and returns a ``Bar`` object by
value and ``Bar`` has non-trivial constructors. In this case, we want
to be able to elide copies into ``foo``'s argument slots. That means we
need to have more than one set of argument frames active at the same
time. First, we need to allocate the frame for the outer call so we can
pass it in as the hidden struct return pointer to the middle call. Then
we do the same for the middle call, allocating a frame and passing its
address to ``Bar``'s default constructor. By wrapping the evaluation of
the inner ``foo`` with stack save and restore, we can have multiple
overlapping active call frames.
Callee-cleanup Calling Conventions
----------------------------------
Another wrinkle is the existence of callee-cleanup conventions. On
Windows, all methods and many other functions adjust the stack to clear
the memory used to pass their arguments. In some sense, this means that
the allocas are automatically cleared by the call. However, LLVM
instead models this as a write of undef to all of the inalloca values
passed to the call instead of a stack adjustment. Frontends should
still restore the stack pointer to avoid a stack leak.
Exceptions
----------
There is also the possibility of an exception. If argument evaluation
or copy construction throws an exception, the landing pad must do
cleanup, which includes adjusting the stack pointer to avoid a stack
leak. This means the cleanup of the stack memory cannot be tied to the
call itself. There needs to be a separate IR-level instruction that can
perform independent cleanup of arguments.
Efficiency
----------
Eventually, it should be possible to generate efficient code for this
construct. In particular, using inalloca should not require a base
pointer. If the backend can prove that all points in the CFG only have
one possible stack level, then it can address the stack directly from
the stack pointer. While this is not yet implemented, the plan is that
the inalloca attribute should not change much, but the frontend IR
generation recommendations may change.

View File

@ -315,7 +315,7 @@ the properties which are associated with that component.
``BuildTool`` components are like ``Tool`` components, except that the
tool is supposed to be built for the platform where the build is running
(instead of that platform being targetted). Build systems are expected
(instead of that platform being targeted). Build systems are expected
to handle the fact that required libraries may need to be built for
multiple platforms in order to be able to link this tool.

View File

@ -4,7 +4,7 @@ LLVM Language Reference Manual
.. contents::
:local:
:depth: 3
:depth: 4
Abstract
========
@ -289,13 +289,9 @@ symbols from (to) DLLs (Dynamic Link Libraries).
pointer to a pointer in a DLL, so that it can be referenced with the
``dllimport`` attribute. On Microsoft Windows targets, the pointer
name is formed by combining ``__imp_`` and the function or variable
name.
For example, since the "``.LC0``" variable is defined to be internal, if
another module defined a "``.LC0``" variable and was linked with this
one, one of the two would be renamed, preventing a collision. Since
"``main``" and "``puts``" are external (i.e., lacking any linkage
declarations), they are accessible outside of the current module.
name. Since this linkage exists for defining a dll interface, the
compiler, assembler and linker know it is externally referenced and
must refrain from deleting the symbol.
It is illegal for a function *declaration* to have any linkage type
other than ``external``, ``dllimport`` or ``extern_weak``.
@ -370,6 +366,18 @@ added in the future:
accessed runtime components pinned to specific hardware registers.
At the moment only X86 supports this convention (both 32 and 64
bit).
"``webkit_jscc``" - WebKit's JavaScript calling convention
This calling convention has been implemented for `WebKit FTL JIT
<https://trac.webkit.org/wiki/FTLJIT>`_. It passes arguments on the
stack right to left (as cdecl does), and returns a value in the
platform's customary return register.
"``anyregcc``" - Dynamic calling convention for code patching
This is a special convention that supports patching an arbitrary code
sequence in place of a call site. This convention forces the call
arguments into registers but allows them to be dynamcially
allocated. This can currently only be used with calls to
llvm.experimental.patchpoint because only this intrinsic records
the location of its arguments in a side table. See :doc:`StackMaps`.
"``cc <n>``" - Numbered convention
Any calling convention may be specified by number, allowing
target-specific calling conventions to be used. Target specific
@ -507,8 +515,8 @@ variables defined within the module are not modified from their
initial values before the start of the global initializer. This is
true even for variables potentially accessible from outside the
module, including those with external linkage or appearing in
``@llvm.used``. This assumption may be suppressed by marking the
variable with ``externally_initialized``.
``@llvm.used`` or dllexported variables. This assumption may be suppressed
by marking the variable with ``externally_initialized``.
An explicit alignment may be specified for a global, which must be a
power of 2. If not present, or if the alignment is set to zero, the
@ -618,7 +626,7 @@ Syntax::
The linkage must be one of ``private``, ``linker_private``,
``linker_private_weak``, ``internal``, ``linkonce``, ``weak``,
``linkonce_odr``, ``weak_odr``, ``external``. Note that some system linkers
might not correctly handle dropping a weak symbol that is aliased by a non weak
might not correctly handle dropping a weak symbol that is aliased by a non-weak
alias.
.. _namedmetadatastructure:
@ -701,6 +709,39 @@ Currently, only the following parameter attributes are defined:
site. If the alignment is not specified, then the code generator
makes a target-specific assumption.
.. _attr_inalloca:
``inalloca``
.. Warning:: This feature is unstable and not fully implemented.
The ``inalloca`` argument attribute allows the caller to get the
address of an outgoing argument to a ``call`` or ``invoke`` before
it executes. It is similar to ``byval`` in that it is used to pass
arguments by value, but it guarantees that the argument will not be
copied.
To be :ref:`well formed <wellformed>`, the caller must pass in an
alloca value into an ``inalloca`` parameter, and an alloca may be
used as an ``inalloca`` argument at most once. The attribute can
only be applied to parameters that would be passed in memory and not
registers. The ``inalloca`` attribute cannot be used in conjunction
with other attributes that affect argument storage, like ``inreg``,
``nest``, ``sret``, or ``byval``. The ``inalloca`` stack space is
considered to be clobbered by any call that uses it, so any
``inalloca`` parameters cannot be marked ``readonly``.
Allocas passed with ``inalloca`` to a call must be in the opposite
order of the parameter list, meaning that the rightmost argument
must be allocated first. If a call has inalloca arguments, no other
allocas can occur between the first alloca used by the call and the
call site, unless they are are cleared by calls to
:ref:`llvm.stackrestore <int_stackrestore>`. Violating these rules
results in undefined behavior at runtime.
See :doc:`InAlloca` for more information on how to use this
attribute.
``sret``
This indicates that the pointer parameter specifies the address of a
structure that is the return value of the function in the source
@ -1119,9 +1160,15 @@ as follows:
``a<size>:<abi>:<pref>``
This specifies the alignment for an aggregate type of a given bit
``<size>``.
``s<size>:<abi>:<pref>``
This specifies the alignment for a stack object of a given bit
``<size>``.
``m:<mangling>``
If prerest, specifies that llvm names are mangled in the output. The
options are
* ``e``: ELF mangling: Private symbols get a ``.L`` prefix.
* ``m``: Mips mangling: Private symbols get a ``$`` prefix.
* ``o``: Mach-O mangling: Private symbols get ``L`` prefix. Other
symbols get a ``_`` prefix.
* ``c``: COFF prefix: Similar to Mach-O, but stdcall and fastcall
functions also get a suffix based on the frame size.
``n<size1>:<size2>:<size3>...``
This specifies a set of native integer widths for the target CPU in
bits. For example, it might contain ``n32`` for 32-bit PowerPC,
@ -1151,7 +1198,7 @@ specifications are given in this list:
- ``f128:128:128`` - quad is 128-bit aligned
- ``v64:64:64`` - 64-bit vector is 64-bit aligned
- ``v128:128:128`` - 128-bit vector is 128-bit aligned
- ``a0:0:64`` - aggregates are 64-bit aligned
- ``a:0:64`` - aggregates are 64-bit aligned
When LLVM is determining the alignment for a given type, it uses the
following rules:
@ -1480,80 +1527,90 @@ transformation. A strong type system makes it easier to read the
generated code and enables novel analyses and transformations that are
not feasible to perform on normal three address code representations.
.. _typeclassifications:
.. _t_void:
Type Classifications
--------------------
Void Type
---------
The types fall into a few useful classifications:
:Overview:
.. list-table::
:header-rows: 1
The void type does not represent any value and has no size.
* - Classification
- Types
* - :ref:`integer <t_integer>`
- ``i1``, ``i2``, ``i3``, ... ``i8``, ... ``i16``, ... ``i32``, ...
``i64``, ...
* - :ref:`floating point <t_floating>`
- ``half``, ``float``, ``double``, ``x86_fp80``, ``fp128``,
``ppc_fp128``
:Syntax:
* - first class
::
.. _t_firstclass:
void
- :ref:`integer <t_integer>`, :ref:`floating point <t_floating>`,
:ref:`pointer <t_pointer>`, :ref:`vector <t_vector>`,
:ref:`structure <t_struct>`, :ref:`array <t_array>`,
:ref:`label <t_label>`, :ref:`metadata <t_metadata>`.
* - :ref:`primitive <t_primitive>`
- :ref:`label <t_label>`,
:ref:`void <t_void>`,
:ref:`integer <t_integer>`,
:ref:`floating point <t_floating>`,
:ref:`x86mmx <t_x86mmx>`,
:ref:`metadata <t_metadata>`.
.. _t_function:
* - :ref:`derived <t_derived>`
- :ref:`array <t_array>`,
:ref:`function <t_function>`,
:ref:`pointer <t_pointer>`,
:ref:`structure <t_struct>`,
:ref:`vector <t_vector>`,
:ref:`opaque <t_opaque>`.
Function Type
-------------
:Overview:
The function type can be thought of as a function signature. It consists of a
return type and a list of formal parameter types. The return type of a function
type is a void type or first class type --- except for :ref:`label <t_label>`
and :ref:`metadata <t_metadata>` types.
:Syntax:
::
<returntype> (<parameter list>)
...where '``<parameter list>``' is a comma-separated list of type
specifiers. Optionally, the parameter list may include a type ``...``, which
indicates that the function takes a variable number of arguments. Variable
argument functions can access their arguments with the :ref:`variable argument
handling intrinsic <int_varargs>` functions. '``<returntype>``' is any type
except :ref:`label <t_label>` and :ref:`metadata <t_metadata>`.
:Examples:
+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| ``i32 (i32)`` | function taking an ``i32``, returning an ``i32`` |
+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| ``float (i16, i32 *) *`` | :ref:`Pointer <t_pointer>` to a function that takes an ``i16`` and a :ref:`pointer <t_pointer>` to ``i32``, returning ``float``. |
+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| ``i32 (i8*, ...)`` | A vararg function that takes at least one :ref:`pointer <t_pointer>` to ``i8`` (char in C), which returns an integer. This is the signature for ``printf`` in LLVM. |
+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| ``{i32, i32} (i32)`` | A function taking an ``i32``, returning a :ref:`structure <t_struct>` containing two ``i32`` values |
+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
.. _t_firstclass:
First Class Types
-----------------
The :ref:`first class <t_firstclass>` types are perhaps the most important.
Values of these types are the only ones which can be produced by
instructions.
.. _t_primitive:
.. _t_single_value:
Primitive Types
---------------
Single Value Types
^^^^^^^^^^^^^^^^^^
The primitive types are the fundamental building blocks of the LLVM
system.
These are the types that are valid in registers from CodeGen's perspective.
.. _t_integer:
Integer Type
^^^^^^^^^^^^
""""""""""""
Overview:
"""""""""
:Overview:
The integer type is a very simple type that simply specifies an
arbitrary bit width for the integer type desired. Any bit width from 1
bit to 2\ :sup:`23`\ -1 (about 8 million) can be specified.
Syntax:
"""""""
:Syntax:
::
@ -1563,7 +1620,7 @@ The number of bits the integer will occupy is specified by the ``N``
value.
Examples:
"""""""""
*********
+----------------+------------------------------------------------+
| ``i1`` | a single-bit integer. |
@ -1576,7 +1633,7 @@ Examples:
.. _t_floating:
Floating Point Types
^^^^^^^^^^^^^^^^^^^^
""""""""""""""""""""
.. list-table::
:header-rows: 1
@ -1605,10 +1662,9 @@ Floating Point Types
.. _t_x86mmx:
X86mmx Type
^^^^^^^^^^^
"""""""""""
Overview:
"""""""""
:Overview:
The x86mmx type represents a value held in an MMX register on an x86
machine. The operations allowed on it are quite limited: parameters and
@ -1617,42 +1673,92 @@ instructions are represented as intrinsic or asm calls with arguments
and/or results of this type. There are no arrays, vectors or constants
of this type.
Syntax:
"""""""
:Syntax:
::
x86mmx
.. _t_void:
Void Type
^^^^^^^^^
.. _t_pointer:
Overview:
"""""""""
Pointer Type
""""""""""""
The void type does not represent any value and has no size.
:Overview:
Syntax:
"""""""
The pointer type is used to specify memory locations. Pointers are
commonly used to reference objects in memory.
Pointer types may have an optional address space attribute defining the
numbered address space where the pointed-to object resides. The default
address space is number zero. The semantics of non-zero address spaces
are target-specific.
Note that LLVM does not permit pointers to void (``void*``) nor does it
permit pointers to labels (``label*``). Use ``i8*`` instead.
:Syntax:
::
void
<type> *
:Examples:
+-------------------------+--------------------------------------------------------------------------------------------------------------+
| ``[4 x i32]*`` | A :ref:`pointer <t_pointer>` to :ref:`array <t_array>` of four ``i32`` values. |
+-------------------------+--------------------------------------------------------------------------------------------------------------+
| ``i32 (i32*) *`` | A :ref:`pointer <t_pointer>` to a :ref:`function <t_function>` that takes an ``i32*``, returning an ``i32``. |
+-------------------------+--------------------------------------------------------------------------------------------------------------+
| ``i32 addrspace(5)*`` | A :ref:`pointer <t_pointer>` to an ``i32`` value that resides in address space #5. |
+-------------------------+--------------------------------------------------------------------------------------------------------------+
.. _t_vector:
Vector Type
"""""""""""
:Overview:
A vector type is a simple derived type that represents a vector of
elements. Vector types are used when multiple primitive data are
operated in parallel using a single instruction (SIMD). A vector type
requires a size (number of elements) and an underlying primitive data
type. Vector types are considered :ref:`first class <t_firstclass>`.
:Syntax:
::
< <# elements> x <elementtype> >
The number of elements is a constant integer value larger than 0;
elementtype may be any integer or floating point type, or a pointer to
these types. Vectors of size zero are not allowed.
:Examples:
+-------------------+--------------------------------------------------+
| ``<4 x i32>`` | Vector of 4 32-bit integer values. |
+-------------------+--------------------------------------------------+
| ``<8 x float>`` | Vector of 8 32-bit floating-point values. |
+-------------------+--------------------------------------------------+
| ``<2 x i64>`` | Vector of 2 64-bit integer values. |
+-------------------+--------------------------------------------------+
| ``<4 x i64*>`` | Vector of 4 pointers to 64-bit integer values. |
+-------------------+--------------------------------------------------+
.. _t_label:
Label Type
^^^^^^^^^^
Overview:
"""""""""
:Overview:
The label type represents code labels.
Syntax:
"""""""
:Syntax:
::
@ -1663,31 +1769,17 @@ Syntax:
Metadata Type
^^^^^^^^^^^^^
Overview:
"""""""""
:Overview:
The metadata type represents embedded metadata. No derived types may be
created from metadata except for :ref:`function <t_function>` arguments.
Syntax:
"""""""
:Syntax:
::
metadata
.. _t_derived:
Derived Types
-------------
The real power in LLVM comes from the derived types in the system. This
is what allows a programmer to represent arrays, functions, pointers,
and other useful types. Each of these types contain one or more element
types which may be a primitive type, or another derived type. For
example, it is possible to have a two dimensional array, using an array
as the element type of another array.
.. _t_aggregate:
Aggregate Types
@ -1701,17 +1793,15 @@ aggregate types.
.. _t_array:
Array Type
^^^^^^^^^^
""""""""""
Overview:
"""""""""
:Overview:
The array type is a very simple derived type that arranges elements
sequentially in memory. The array type requires a size (number of
elements) and an underlying data type.
Syntax:
"""""""
:Syntax:
::
@ -1720,8 +1810,7 @@ Syntax:
The number of elements is a constant integer value; ``elementtype`` may
be any type with a size.
Examples:
"""""""""
:Examples:
+------------------+--------------------------------------+
| ``[40 x i32]`` | Array of 40 32-bit integer values. |
@ -1749,53 +1838,12 @@ LLVM with a zero length array type. An implementation of 'pascal style
arrays' in LLVM could use the type "``{ i32, [0 x float]}``", for
example.
.. _t_function:
Function Type
^^^^^^^^^^^^^
Overview:
"""""""""
The function type can be thought of as a function signature. It consists of a
return type and a list of formal parameter types. The return type of a function
type is a void type or first class type --- except for :ref:`label <t_label>`
and :ref:`metadata <t_metadata>` types.
Syntax:
"""""""
::
<returntype> (<parameter list>)
...where '``<parameter list>``' is a comma-separated list of type
specifiers. Optionally, the parameter list may include a type ``...``, which
indicates that the function takes a variable number of arguments. Variable
argument functions can access their arguments with the :ref:`variable argument
handling intrinsic <int_varargs>` functions. '``<returntype>``' is any type
except :ref:`label <t_label>` and :ref:`metadata <t_metadata>`.
Examples:
"""""""""
+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| ``i32 (i32)`` | function taking an ``i32``, returning an ``i32`` |
+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| ``float (i16, i32 *) *`` | :ref:`Pointer <t_pointer>` to a function that takes an ``i16`` and a :ref:`pointer <t_pointer>` to ``i32``, returning ``float``. |
+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| ``i32 (i8*, ...)`` | A vararg function that takes at least one :ref:`pointer <t_pointer>` to ``i8`` (char in C), which returns an integer. This is the signature for ``printf`` in LLVM. |
+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| ``{i32, i32} (i32)`` | A function taking an ``i32``, returning a :ref:`structure <t_struct>` containing two ``i32`` values |
+---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
.. _t_struct:
Structure Type
^^^^^^^^^^^^^^
""""""""""""""
Overview:
"""""""""
:Overview:
The structure type is used to represent a collection of data members
together in memory. The elements of a structure may be any type that has
@ -1819,16 +1867,14 @@ Literal types are uniqued by their contents and can never be recursive
or opaque since there is no way to write one. Identified types can be
recursive, can be opaqued, and are never uniqued.
Syntax:
"""""""
:Syntax:
::
%T1 = type { <type list> } ; Identified normal struct type
%T2 = type <{ <type list> }> ; Identified packed struct type
Examples:
"""""""""
:Examples:
+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| ``{ i32, i32, i32 }`` | A triple of three ``i32`` values |
@ -1841,105 +1887,27 @@ Examples:
.. _t_opaque:
Opaque Structure Types
^^^^^^^^^^^^^^^^^^^^^^
""""""""""""""""""""""
Overview:
"""""""""
:Overview:
Opaque structure types are used to represent named structure types that
do not have a body specified. This corresponds (for example) to the C
notion of a forward declared structure.
Syntax:
"""""""
:Syntax:
::
%X = type opaque
%52 = type opaque
Examples:
"""""""""
:Examples:
+--------------+-------------------+
| ``opaque`` | An opaque type. |
+--------------+-------------------+
.. _t_pointer:
Pointer Type
^^^^^^^^^^^^
Overview:
"""""""""
The pointer type is used to specify memory locations. Pointers are
commonly used to reference objects in memory.
Pointer types may have an optional address space attribute defining the
numbered address space where the pointed-to object resides. The default
address space is number zero. The semantics of non-zero address spaces
are target-specific.
Note that LLVM does not permit pointers to void (``void*``) nor does it
permit pointers to labels (``label*``). Use ``i8*`` instead.
Syntax:
"""""""
::
<type> *
Examples:
"""""""""
+-------------------------+--------------------------------------------------------------------------------------------------------------+
| ``[4 x i32]*`` | A :ref:`pointer <t_pointer>` to :ref:`array <t_array>` of four ``i32`` values. |
+-------------------------+--------------------------------------------------------------------------------------------------------------+
| ``i32 (i32*) *`` | A :ref:`pointer <t_pointer>` to a :ref:`function <t_function>` that takes an ``i32*``, returning an ``i32``. |
+-------------------------+--------------------------------------------------------------------------------------------------------------+
| ``i32 addrspace(5)*`` | A :ref:`pointer <t_pointer>` to an ``i32`` value that resides in address space #5. |
+-------------------------+--------------------------------------------------------------------------------------------------------------+
.. _t_vector:
Vector Type
^^^^^^^^^^^
Overview:
"""""""""
A vector type is a simple derived type that represents a vector of
elements. Vector types are used when multiple primitive data are
operated in parallel using a single instruction (SIMD). A vector type
requires a size (number of elements) and an underlying primitive data
type. Vector types are considered :ref:`first class <t_firstclass>`.
Syntax:
"""""""
::
< <# elements> x <elementtype> >
The number of elements is a constant integer value larger than 0;
elementtype may be any integer or floating point type, or a pointer to
these types. Vectors of size zero are not allowed.
Examples:
"""""""""
+-------------------+--------------------------------------------------+
| ``<4 x i32>`` | Vector of 4 32-bit integer values. |
+-------------------+--------------------------------------------------+
| ``<8 x float>`` | Vector of 8 32-bit floating-point values. |
+-------------------+--------------------------------------------------+
| ``<2 x i64>`` | Vector of 2 64-bit integer values. |
+-------------------+--------------------------------------------------+
| ``<4 x i64*>`` | Vector of 4 pointers to 64-bit integer values. |
+-------------------+--------------------------------------------------+
Constants
=========
@ -8502,6 +8470,8 @@ Memory Use Markers
This class of intrinsics exists to information about the lifetime of
memory objects and ranges where variables are immutable.
.. _int_lifestart:
'``llvm.lifetime.start``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -8533,6 +8503,8 @@ of the memory pointed to by ``ptr`` is dead. This means that it is known
to never be used and has an undefined value. A load from the pointer
that precedes this intrinsic can be replaced with ``'undef'``.
.. _int_lifeend:
'``llvm.lifetime.end``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -8958,3 +8930,10 @@ Semantics:
This intrinsic does nothing, and it's removed by optimizers and ignored
by codegen.
Stack Map Intrinsics
--------------------
LLVM provides experimental intrinsics to support runtime patching
mechanisms commonly desired in dynamic language JITs. These intrinsics
are described in :doc:`StackMaps`.

View File

@ -273,7 +273,7 @@ there is a separate version for each compute architecture.
For a list of all math functions implemented in libdevice, see
`libdevice Users Guide <http://docs.nvidia.com/cuda/libdevice-users-guide/index.html>`_.
To accomodate various math-related compiler flags that can affect code
To accommodate various math-related compiler flags that can affect code
generation of libdevice code, the library code depends on a special LLVM IR
pass (``NVVMReflect``) to handle conditional compilation within LLVM IR. This
pass looks for calls to the ``@__nvvm_reflect`` function and replaces them
@ -839,7 +839,7 @@ Libdevice provides an ``__nv_powf`` function that we will use.
%valB = load float addrspace(1)* %ptrB, align 4
; Compute C = pow(A, B)
%valC = call float @__nv_exp2f(float %valA, float %valB)
%valC = call float @__nv_powf(float %valA, float %valB)
; Store back to C
store float %valC, float addrspace(1)* %ptrC, align 4
@ -850,7 +850,7 @@ Libdevice provides an ``__nv_powf`` function that we will use.
!nvvm.annotations = !{!0}
!0 = metadata !{void (float addrspace(1)*,
float addrspace(1)*,
float addrspace(1)*)* @kernel, metadata !"kernel", i32 1}%
float addrspace(1)*)* @kernel, metadata !"kernel", i32 1}
To compile this kernel, we perform the following steps:

View File

@ -1,15 +1,21 @@
======================
LLVM 3.4 Release Notes
LLVM 3.5 Release Notes
======================
.. contents::
:local:
.. warning::
These are in-progress notes for the upcoming LLVM 3.5 release. You may
prefer the `LLVM 3.4 Release Notes <http://llvm.org/releases/3.4/docs
/ReleaseNotes.html>`_.
Introduction
============
This document contains the release notes for the LLVM Compiler Infrastructure,
release 3.4. Here we describe the status of LLVM, including major improvements
release 3.5. Here we describe the status of LLVM, including major improvements
from the previous release, improvements in various subprojects of LLVM, and
some of the current users of the code. All LLVM releases may be downloaded
from the `LLVM releases web site <http://llvm.org/releases/>`_.
@ -35,58 +41,6 @@ Non-comprehensive list of changes in this release
functionality, or simply have a lot to talk about), see the `NOTE` below
for adding a new subsection.
* This is expected to be the last release of LLVM which compiles using a C++98
toolchain. We expect to start using some C++11 features in LLVM and other
sub-projects starting after this release. That said, we are committed to
supporting a reasonable set of modern C++ toolchains as the host compiler on
all of the platforms. This will at least include Visual Studio 2012 on
Windows, and Clang 3.1 or GCC 4.7.x on Mac and Linux. The final set of
compilers (and the C++11 features they support) is not set in stone, but we
wanted users of LLVM to have a heads up that the next release will involve
a substantial change in the host toolchain requirements.
* The regression tests now fail if any command in a pipe fails. To disable it in
a directory, just add ``config.pipefail = False`` to its ``lit.local.cfg``.
See :doc:`Lit <CommandGuide/lit>` for the details.
* Support for exception handling has been removed from the old JIT. Use MCJIT
if you need EH support.
* The R600 backend is not marked experimental anymore and is built by default.
* APFloat::isNormal() was renamed to APFloat::isFiniteNonZero() and
APFloat::isIEEENormal() was renamed to APFloat::isNormal(). This ensures that
APFloat::isNormal() conforms to IEEE-754R-2008.
* The library call simplification pass has been removed. Its functionality
has been integrated into the instruction combiner and function attribute
marking passes.
* Support for building using Visual Studio 2008 has been dropped. Use VS 2010
or later instead. For more information, see the `Getting Started using Visual
Studio <GettingStartedVS.html>`_ page.
* The Loop Vectorizer that was previously enabled for -O3 is now enabled for
-Os and -O2.
* The new SLP Vectorizer is now enabled by default.
* llvm-ar now uses the new Object library and produces archives and
symbol tables in the gnu format.
* FileCheck now allows specifing -check-prefix multiple times. This
helps reduce duplicate check lines when using multiple RUN lines.
* The bitcast instruction no longer allows casting between pointers
with different address spaces. To achieve this, use the new
addrspacecast instruction.
* Different sized pointers for different address spaces should now
generally work. This is primarily useful for GPU targets.
* OCaml bindings have been significantly extended to cover almost all of the
LLVM libraries.
* ... next change ...
.. NOTE
@ -99,126 +53,12 @@ Non-comprehensive list of changes in this release
Makes programs 10x faster by doing Special New Thing.
Mips Target
-----------
Support for the MIPS SIMD Architecture (MSA) has been added. MSA is supported
through inline assembly, intrinsics with the prefix '__builtin_msa', and normal
code generation.
For more information on MSA (including documentation for the instruction set),
see the `MIPS SIMD page at Imagination Technologies
<http://imgtec.com/mips/mips-simd.asp>`_
SPARC Target
------------
The SPARC backend got many improvements, namely
* experimental SPARC V9 backend
* JIT support for SPARC
* fp128 support
* exception handling
* TLS support
* leaf functions optimization
* bug fixes
External Open Source Projects Using LLVM 3.4
External Open Source Projects Using LLVM 3.5
============================================
An exciting aspect of LLVM is that it is used as an enabling technology for
a lot of other language and tools projects. This section lists some of the
projects that have already been updated to work with LLVM 3.4.
DXR
---
`DXR <https://wiki.mozilla.org/DXR>`_ is Mozilla's code search and navigation
tool, aimed at making sense of large projects like Firefox. It supports
full-text and regex searches as well as structural queries like "Find all the
callers of this function." Behind the scenes, it uses a custom trigram index,
the re2 library, and structural data collected by a clang compiler plugin.
LDC - the LLVM-based D compiler
-------------------------------
`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It
pragmatically combines efficiency, control, and modeling power, with safety and
programmer productivity. D supports powerful concepts like Compile-Time Function
Execution (CTFE) and Template Meta-Programming, provides an innovative approach
to concurrency and offers many classical paradigms.
`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler
combined with LLVM as backend to produce efficient native code. LDC targets
x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux/PPC64.
Ports to other architectures like ARM and AArch64 are underway.
Likely
------
`Likely <http://www.liblikely.org/>`_ is an open source domain specific
language for image recognition. Algorithms are just-in-time compiled using
LLVM's MCJIT infrastructure to execute on single or multi-threaded CPUs as well
as OpenCL SPIR or CUDA enabled GPUs. Likely exploits the observation that while
image processing and statistical learning kernels must be written generically
to handle any matrix datatype, at runtime they tend to be executed repeatedly
on the same type.
Portable Computing Language (pocl)
----------------------------------
In addition to producing an easily portable open source OpenCL
implementation, another major goal of `pocl <http://portablecl.org/>`_
is improving performance portability of OpenCL programs with
compiler optimizations, reducing the need for target-dependent manual
optimizations. An important part of pocl is a set of LLVM passes used to
statically parallelize multiple work-items with the kernel compiler, even in
the presence of work-group barriers. This enables static parallelization of
the fine-grained static concurrency in the work groups in multiple ways.
Portable Native Client (PNaCl)
------------------------------
`Portable Native Client (PNaCl) <http://www.chromium.org/nativeclient/pnacl>`_
is a Chrome initiative to bring the performance and low-level control of native
code to modern web browsers, without sacrificing the security benefits and
portability of web applications. PNaCl works by compiling native C and C++ code
to an intermediate representation using the LLVM clang compiler. This
intermediate representation is a subset of LLVM bytecode that is wrapped into a
portable executable, which can be hosted on a web server like any other website
asset. When the site is accessed, Chrome fetches and translates the portable
executable into an architecture-specific machine code optimized directly for
the underlying device. PNaCl lets developers compile their code once to run on
any hardware platform and embed their PNaCl application in any website,
enabling developers to directly leverage the power of the underlying CPU and
GPU.
TTA-based Co-design Environment (TCE)
-------------------------------------
`TCE <http://tce.cs.tut.fi/>`_ is a toolset for designing new
exposed datapath processors based on the Transport triggered architecture (TTA).
The toolset provides a complete co-design flow from C/C++
programs down to synthesizable VHDL/Verilog and parallel program binaries.
Processor customization points include the register files, function units,
supported operations, and the interconnection network.
TCE uses Clang and LLVM for C/C++/OpenCL C language support, target independent
optimizations and also for parts of code generation. It generates
new LLVM-based code generators "on the fly" for the designed processors and
loads them in to the compiler backend as runtime libraries to avoid
per-target recompilation of larger parts of the compiler chain.
WebCL Validator
---------------
`WebCL Validator <https://github.com/KhronosGroup/webcl-validator>`_ implements
validation for WebCL C language which is a subset of OpenCL ES 1.1. Validator
checks the correctness of WebCL C, and implements memory protection for it as a
source-2-source transformation. The transformation converts WebCL to memory
protected OpenCL. The protected OpenCL cannot access any memory ranges which
were not allocated for it, and its memory is always initialized to prevent
information leakage from other programs.
projects that have already been updated to work with LLVM 3.5.
Additional Information

View File

@ -52,18 +52,16 @@ The scripts are in the ``utils/release`` directory.
test-release.sh
---------------
This script will check-out, configure and compile LLVM+Clang (+ most add-ons,
like ``compiler-rt``, ``libcxx`` and ``clang-extra-tools``) in three stages, and
will test the final stage. It'll have installed the final binaries on the
Phase3/Releasei(+Asserts) directory, and that's the one you should use for the
test-suite and other external tests.
This script will check-out, configure and compile LLVM+Clang (+ most add-ons, like ``compiler-rt``,
``libcxx`` and ``clang-extra-tools``) in three stages, and will test the final stage.
It'll have installed the final binaries on the Phase3/Releasei(+Asserts) directory, and
that's the one you should use for the test-suite and other external tests.
To run the script on a specific release candidate run::
./test-release.sh \
-release 3.4 \
-release 3.3 \
-rc 1 \
-triple x86_64-apple-darwin \
-no-64bit \
-test-asserts \
-no-compare-files

View File

@ -2306,7 +2306,7 @@ stringWithCString:]``") and the basename is the selector only
Mach-O Changes
""""""""""""""
The sections names for the apple hash tables are for non mach-o files. For
The sections names for the apple hash tables are for non-mach-o files. For
mach-o files, the sections should be contained in the ``__DWARF`` segment with
names as follows:

View File

@ -0,0 +1,480 @@
===================================
Stack maps and patch points in LLVM
===================================
.. contents::
:local:
:depth: 2
Definitions
===========
In this document we refer to the "runtime" collectively as all
components that serve as the LLVM client, including the LLVM IR
generator, object code consumer, and code patcher.
A stack map records the location of ``live values`` at a particular
instruction address. These ``live values`` do not refer to all the
LLVM values live across the stack map. Instead, they are only the
values that the runtime requires to be live at this point. For
example, they may be the values the runtime will need to resume
program execution at that point independent of the compiled function
containing the stack map.
LLVM emits stack map data into the object code within a designated
:ref:`stackmap-section`. This stack map data contains a record for
each stack map. The record stores the stack map's instruction address
and contains a entry for each mapped value. Each entry encodes a
value's location as a register, stack offset, or constant.
A patch point is an instruction address at which space is reserved for
patching a new instruction sequence at run time. Patch points look
much like calls to LLVM. They take arguments that follow a calling
convention and may return a value. They also imply stack map
generation, which allows the runtime to locate the patchpoint and
find the location of ``live values`` at that point.
Motivation
==========
This functionality is currently experimental but is potentially useful
in a variety of settings, the most obvious being a runtime (JIT)
compiler. Example applications of the patchpoint intrinsics are
implementing an inline call cache for polymorphic method dispatch or
optimizing the retrieval of properties in dynamically typed languages
such as JavaScript.
The intrinsics documented here are currently used by the JavaScript
compiler within the open source WebKit project, see the `FTL JIT
<https://trac.webkit.org/wiki/FTLJIT>`_, but they are designed to be
used whenever stack maps or code patching are needed. Because the
intrinsics have experimental status, compatibility across LLVM
releases is not guaranteed.
The stack map functionality described in this document is separate
from the functionality described in
:ref:`stack-map`. `GCFunctionMetadata` provides the location of
pointers into a collected heap captured by the `GCRoot` intrinsic,
which can also be considered a "stack map". Unlike the stack maps
defined above, the `GCFunctionMetadata` stack map interface does not
provide a way to associate live register values of arbitrary type with
an instruction address, nor does it specify a format for the resulting
stack map. The stack maps described here could potentially provide
richer information to a garbage collecting runtime, but that usage
will not be discussed in this document.
Intrinsics
==========
The following two kinds of intrinsics can be used to implement stack
maps and patch points: ``llvm.experimental.stackmap`` and
``llvm.experimental.patchpoint``. Both kinds of intrinsics generate a
stack map record, and they both allow some form of code patching. They
can be used independently (i.e. ``llvm.experimental.patchpoint``
implicitly generates a stack map without the need for an additional
call to ``llvm.experimental.stackmap``). The choice of which to use
depends on whether it is necessary to reserve space for code patching
and whether any of the intrinsic arguments should be lowered according
to calling conventions. ``llvm.experimental.stackmap`` does not
reserve any space, nor does it expect any call arguments. If the
runtime patches code at the stack map's address, it will destructively
overwrite the program text. This is unlike
``llvm.experimental.patchpoint``, which reserves space for in-place
patching without overwriting surrounding code. The
``llvm.experimental.patchpoint`` intrinsic also lowers a specified
number of arguments according to its calling convention. This allows
patched code to make in-place function calls without marshaling.
Each instance of one of these intrinsics generates a stack map record
in the :ref:`stackmap-section`. The record includes an ID, allowing
the runtime to uniquely identify the stack map, and the offset within
the code from the beginning of the enclosing function.
'``llvm.experimental.stackmap``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
::
declare void
@llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>, ...)
Overview:
"""""""""
The '``llvm.experimental.stackmap``' intrinsic records the location of
specified values in the stack map without generating any code.
Operands:
"""""""""
The first operand is an ID to be encoded within the stack map. The
second operand is the number of shadow bytes following the
intrinsic. The variable number of operands that follow are the ``live
values`` for which locations will be recorded in the stack map.
To use this intrinsic as a bare-bones stack map, with no code patching
support, the number of shadow bytes can be set to zero.
Semantics:
""""""""""
The stack map intrinsic generates no code in place, unless nops are
needed to cover its shadow (see below). However, its offset from
function entry is stored in the stack map. This is the relative
instruction address immediately following the instructions that
precede the stack map.
The stack map ID allows a runtime to locate the desired stack map
record. LLVM passes this ID through directly to the stack map
record without checking uniqueness.
LLVM guarantees a shadow of instructions following the stack map's
instruction offset during which neither the end of the basic block nor
another call to ``llvm.experimental.stackmap`` or
``llvm.experimental.patchpoint`` may occur. This allows the runtime to
patch the code at this point in response to an event triggered from
outside the code. The code for instructions following the stack map
may be emitted in the stack map's shadow, and these instructions may
be overwritten by destructive patching. Without shadow bytes, this
destructive patching could overwrite program text or data outside the
current function. We disallow overlapping stack map shadows so that
the runtime does not need to consider this corner case.
For example, a stack map with 8 byte shadow:
.. code-block:: llvm
call void @runtime()
call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 77, i32 8,
i64* %ptr)
%val = load i64* %ptr
%add = add i64 %val, 3
ret i64 %add
May require one byte of nop-padding:
.. code-block:: none
0x00 callq _runtime
0x05 nop <--- stack map address
0x06 movq (%rdi), %rax
0x07 addq $3, %rax
0x0a popq %rdx
0x0b ret <---- end of 8-byte shadow
Now, if the runtime needs to invalidate the compiled code, it may
patch 8 bytes of code at the stack map's address at follows:
.. code-block:: none
0x00 callq _runtime
0x05 movl $0xffff, %rax <--- patched code at stack map address
0x0a callq *%rax <---- end of 8-byte shadow
This way, after the normal call to the runtime returns, the code will
execute a patched call to a special entry point that can rebuild a
stack frame from the values located by the stack map.
'``llvm.experimental.patchpoint.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
::
declare void
@llvm.experimental.patchpoint.void(i64 <id>, i32 <numBytes>,
i8* <target>, i32 <numArgs>, ...)
declare i64
@llvm.experimental.patchpoint.i64(i64 <id>, i32 <numBytes>,
i8* <target>, i32 <numArgs>, ...)
Overview:
"""""""""
The '``llvm.experimental.patchpoint.*``' intrinsics creates a function
call to the specified ``<target>`` and records the location of specified
values in the stack map.
Operands:
"""""""""
The first operand is an ID, the second operand is the number of bytes
reserved for the patchable region, the third operand is the target
address of a function (optionally null), and the fourth operand
specifies how many of the following variable operands are considered
function call arguments. The remaining variable number of operands are
the ``live values`` for which locations will be recorded in the stack
map.
Semantics:
""""""""""
The patch point intrinsic generates a stack map. It also emits a
function call to the address specified by ``<target>`` if the address
is not a constant null. The function call and its arguments are
lowered according to the calling convention specified at the
intrinsic's callsite. Variants of the intrinsic with non-void return
type also return a value according to calling convention.
Requesting zero patch point arguments is valid. In this case, all
variable operands are handled just like
``llvm.experimental.stackmap.*``. The difference is that space will
still be reserved for patching, a call will be emitted, and a return
value is allowed.
The location of the arguments are not normally recorded in the stack
map because they are already fixed by the calling convention. The
remaining ``live values`` will have their location recorded, which
could be a register, stack location, or constant. A special calling
convention has been introduced for use with stack maps, anyregcc,
which forces the arguments to be loaded into registers but allows
those register to be dynamically allocated. These argument registers
will have their register locations recorded in the stack map in
addition to the remaining ``live values``.
The patch point also emits nops to cover at least ``<numBytes>`` of
instruction encoding space. Hence, the client must ensure that
``<numBytes>`` is enough to encode a call to the target address on the
supported targets. If the call target is constant null, then there is
no minimum requirement. A zero-byte null target patchpoint is
valid.
The runtime may patch the code emitted for the patch point, including
the call sequence and nops. However, the runtime may not assume
anything about the code LLVM emits within the reserved space. Partial
patching is not allowed. The runtime must patch all reserved bytes,
padding with nops if necessary.
This example shows a patch point reserving 15 bytes, with one argument
in $rdi, and a return value in $rax per native calling convention:
.. code-block:: llvm
%target = inttoptr i64 -281474976710654 to i8*
%val = call i64 (i64, i32, ...)*
@llvm.experimental.patchpoint.i64(i64 78, i32 15,
i8* %target, i32 1, i64* %ptr)
%add = add i64 %val, 3
ret i64 %add
May generate:
.. code-block:: none
0x00 movabsq $0xffff000000000002, %r11 <--- patch point address
0x0a callq *%r11
0x0d nop
0x0e nop <--- end of reserved 15-bytes
0x0f addq $0x3, %rax
0x10 movl %rax, 8(%rsp)
Note that no stack map locations will be recorded. If the patched code
sequence does not need arguments fixed to specific calling convention
registers, then the ``anyregcc`` convention may be used:
.. code-block:: none
%val = call anyregcc @llvm.experimental.patchpoint(i64 78, i32 15,
i8* %target, i32 1,
i64* %ptr)
The stack map now indicates the location of the %ptr argument and
return value:
.. code-block:: none
Stack Map: ID=78, Loc0=%r9 Loc1=%r8
The patch code sequence may now use the argument that happened to be
allocated in %r8 and return a value allocated in %r9:
.. code-block:: none
0x00 movslq 4(%r8) %r9 <--- patched code at patch point address
0x03 nop
...
0x0e nop <--- end of reserved 15-bytes
0x0f addq $0x3, %r9
0x10 movl %r9, 8(%rsp)
.. _stackmap-format:
Stack Map Format
================
The existence of a stack map or patch point intrinsic within an LLVM
Module forces code emission to create a :ref:`stackmap-section`. The
format of this section follows:
.. code-block:: none
uint32 : Reserved (header)
uint32 : NumConstants
Constants[NumConstants] {
uint64 : LargeConstant
}
uint32 : NumRecords
StkMapRecord[NumRecords] {
uint64 : PatchPoint ID
uint32 : Instruction Offset
uint16 : Reserved (record flags)
uint16 : NumLocations
Location[NumLocations] {
uint8 : Register | Direct | Indirect | Constant | ConstantIndex
uint8 : Reserved (location flags)
uint16 : Dwarf RegNum
int32 : Offset or SmallConstant
}
uint16 : NumLiveOuts
LiveOuts[NumLiveOuts]
uint16 : Dwarf RegNum
uint8 : Reserved
uint8 : Size in Bytes
}
}
The first byte of each location encodes a type that indicates how to
interpret the ``RegNum`` and ``Offset`` fields as follows:
======== ========== =================== ===========================
Encoding Type Value Description
-------- ---------- ------------------- ---------------------------
0x1 Register Reg Value in a register
0x2 Direct Reg + Offset Frame index value
0x3 Indirect [Reg + Offset] Spilled value
0x4 Constant Offset Small constant
0x5 ConstIndex Constants[Offset] Large constant
======== ========== =================== ===========================
In the common case, a value is available in a register, and the
``Offset`` field will be zero. Values spilled to the stack are encoded
as ``Indirect`` locations. The runtime must load those values from a
stack address, typically in the form ``[BP + Offset]``. If an
``alloca`` value is passed directly to a stack map intrinsic, then
LLVM may fold the frame index into the stack map as an optimization to
avoid allocating a register or stack slot. These frame indices will be
encoded as ``Direct`` locations in the form ``BP + Offset``. LLVM may
also optimize constants by emitting them directly in the stack map,
either in the ``Offset`` of a ``Constant`` location or in the constant
pool, referred to by ``ConstantIndex`` locations.
At each callsite, a "liveout" register list is also recorded. These
are the registers that are live across the stackmap and therefore must
be saved by the runtime. This is an important optimization when the
patchpoint intrinsic is used with a calling convention that by default
preserves most registers as callee-save.
Each entry in the liveout register list contains a DWARF register
number and size in bytes. The stackmap format deliberately omits
specific subregister information. Instead the runtime must interpret
this information conservatively. For example, if the stackmap reports
one byte at ``%rax``, then the value may be in either ``%al`` or
``%ah``. It doesn't matter in practice, because the runtime will
simply save ``%rax``. However, if the stackmap reports 16 bytes at
``%ymm0``, then the runtime can safely optimize by saving only
``%xmm0``.
The stack map format is a contract between an LLVM SVN revision and
the runtime. It is currently experimental and may change in the short
term, but minimizing the need to update the runtime is
important. Consequently, the stack map design is motivated by
simplicity and extensibility. Compactness of the representation is
secondary because the runtime is expected to parse the data
immediately after compiling a module and encode the information in its
own format. Since the runtime controls the allocation of sections, it
can reuse the same stack map space for multiple modules.
.. _stackmap-section:
Stack Map Section
^^^^^^^^^^^^^^^^^
A JIT compiler can easily access this section by providing its own
memory manager via the LLVM C API
``LLVMCreateSimpleMCJITMemoryManager()``. When creating the memory
manager, the JIT provides a callback:
``LLVMMemoryManagerAllocateDataSectionCallback()``. When LLVM creates
this section, it invokes the callback and passes the section name. The
JIT can record the in-memory address of the section at this time and
later parse it to recover the stack map data.
On Darwin, the stack map section name is "__llvm_stackmaps". The
segment name is "__LLVM_STACKMAPS".
Stack Map Usage
===============
The stack map support described in this document can be used to
precisely determine the location of values at a specific position in
the code. LLVM does not maintain any mapping between those values and
any higher-level entity. The runtime must be able to interpret the
stack map record given only the ID, offset, and the order of the
locations, which LLVM preserves.
Note that this is quite different from the goal of debug information,
which is a best-effort attempt to track the location of named
variables at every instruction.
An important motivation for this design is to allow a runtime to
commandeer a stack frame when execution reaches an instruction address
associated with a stack map. The runtime must be able to rebuild a
stack frame and resume program execution using the information
provided by the stack map. For example, execution may resume in an
interpreter or a recompiled version of the same function.
This usage restricts LLVM optimization. Clearly, LLVM must not move
stores across a stack map. However, loads must also be handled
conservatively. If the load may trigger an exception, hoisting it
above a stack map could be invalid. For example, the runtime may
determine that a load is safe to execute without a type check given
the current state of the type system. If the type system changes while
some activation of the load's function exists on the stack, the load
becomes unsafe. The runtime can prevent subsequent execution of that
load by immediately patching any stack map location that lies between
the current call site and the load (typically, the runtime would
simply patch all stack map locations to invalidate the function). If
the compiler had hoisted the load above the stack map, then the
program could crash before the runtime could take back control.
To enforce these semantics, stackmap and patchpoint intrinsics are
considered to potentially read and write all memory. This may limit
optimization more than some clients desire. To address this problem
meta-data could be added to the intrinsic call to express aliasing,
thereby allowing optimizations to hoist certain loads above stack
maps.
Direct Stack Map Entries
^^^^^^^^^^^^^^^^^^^^^^^^
As shown in :ref:`stackmap-section`, a Direct stack map location
records the address of frame index. This address is itself the value
that the runtime requested. This differs from Indirect locations,
which refer to a stack locations from which the requested values must
be loaded. Direct locations can communicate the address if an alloca,
while Indirect locations handle register spills.
For example:
.. code-block:: none
entry:
%a = alloca i64...
llvm.experimental.stackmap(i64 <ID>, i32 <shadowBytes>, i64* %a)
The runtime can determine this alloca's relative location on the
stack immediately after compilation, or at any time thereafter. This
differs from Register and Indirect locations, because the runtime can
only read the values in those locations when execution reaches the
instruction address of the stack map.
This functionality requires LLVM to treat entry-block allocas
specially when they are directly consumed by an intrinsics. (This is
the same requirement imposed by the llvm.gcroot intrinsic.) LLVM
transformations must not substitute the alloca with any intervening
value. This can be verified by the runtime simply by checking that the
stack map's location is a Direct location type.

View File

@ -601,7 +601,7 @@ the classes multiple times yourself, e.g. by writing:
...
A ``defm`` can also be used inside a multiclass providing several levels of
multiclass instanciations.
multiclass instantiations.
.. code-block:: llvm
@ -727,7 +727,7 @@ opened, as in the case with the ``CALL*`` instructions above.
It's also possible to use "let" expressions inside multiclasses, providing more
ways to factor out commonality from the records, specially if using several
levels of multiclass instanciations. This also avoids the need of using "let"
levels of multiclass instantiations. This also avoids the need of using "let"
expressions within subsequent records inside a multiclass.
.. code-block:: llvm

View File

@ -238,6 +238,12 @@ For some targets, you also need to support the following methods:
* ``getTargetLowering()``
* ``getJITInfo()``
Some architectures, such as GPUs, do not support jumping to an arbitrary
program location and implement branching using masked execution and loop using
special instructions around the loop body. In order to avoid CFG modifications
that introduce irreducible control flow not handled by such hardware, a target
must call `setRequiresStructuredCFG(true)` when being initialized.
In addition, the ``XXXTargetMachine`` constructor should specify a
``TargetDescription`` string that determines the data layout for the target
machine, including characteristics such as pointer size, alignment, and

View File

@ -234,6 +234,7 @@ The following types have built-in support in YAML I/O:
* float
* double
* StringRef
* std::string
* int64_t
* int32_t
* int16_t
@ -640,12 +641,50 @@ The YAML syntax supports tags as a way to specify the type of a node before
it is parsed. This allows dynamic types of nodes. But the YAML I/O model uses
static typing, so there are limits to how you can use tags with the YAML I/O
model. Recently, we added support to YAML I/O for checking/setting the optional
tag on a map. Using this functionality it is even possbile to support differnt
tag on a map. Using this functionality it is even possbile to support different
mappings, as long as they are convertable.
To check a tag, inside your mapping() method you can use io.mapTag() to specify
what the tag should be. This will also add that tag when writing yaml.
Validation
----------
Sometimes in a yaml map, each key/value pair is valid, but the combination is
not. This is similar to something having no syntax errors, but still having
semantic errors. To support semantic level checking, YAML I/O allows
an optional ``validate()`` method in a MappingTraits template specialization.
When parsing yaml, the ``validate()`` method is call *after* all key/values in
the map have been processed. Any error message returned by the ``validate()``
method during input will be printed just a like a syntax error would be printed.
When writing yaml, the ``validate()`` method is called *before* the yaml
key/values are written. Any error during output will trigger an ``assert()``
because it is a programming error to have invalid struct values.
.. code-block:: c++
using llvm::yaml::MappingTraits;
using llvm::yaml::IO;
struct Stuff {
...
};
template <>
struct MappingTraits<Stuff> {
static void mapping(IO &io, Stuff &stuff) {
...
}
static StringRef validate(IO &io, Stuff &stuff) {
// Look at all fields in 'stuff' and if there
// are any bad values return a string describing
// the error. Otherwise return an empty string.
return StringRef();
}
};
Sequence
========

View File

@ -40,7 +40,7 @@ master_doc = 'index'
# General information about the project.
project = u'LLVM'
copyright = u'2003-2013, LLVM Project'
copyright = u'2003-2014, LLVM Project'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the

View File

@ -280,10 +280,10 @@ TYPEDEF_HIDES_STRUCT = NO
# For small to medium size projects (<1000 input files) the default value is
# probably good enough. For larger projects a too small cache size can cause
# doxygen to be busy swapping symbols to and from disk most of the time
# causing a significant performance penality.
# causing a significant performance penalty.
# If the system has enough physical memory increasing the cache will improve the
# performance by keeping more symbols in memory. Note that the value works on
# a logarithmic scale so increasing the size by one will rougly double the
# a logarithmic scale so increasing the size by one will roughly double the
# memory usage. The cache size is given by this formula:
# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
# corresponding to a cache size of 2^16 = 65536 symbols

View File

@ -3,7 +3,7 @@
Generated on $datetime for <a href="http://llvm.org/">$projectname</a> by
<a href="http://www.doxygen.org"><img src="doxygen.png" alt="Doxygen"
align="middle" border="0"/>$doxygenversion</a><br>
Copyright &copy; 2003-2013 University of Illinois at Urbana-Champaign.
Copyright &copy; 2003-2014 University of Illinois at Urbana-Champaign.
All Rights Reserved.</p>
<hr>

View File

@ -234,6 +234,7 @@ For API clients and LLVM developers.
TableGen/LangRef
HowToUseAttributes
NVPTXUsage
StackMaps
:doc:`WritingAnLLVMPass`
Information on how to write LLVM transformations and analyses.
@ -308,6 +309,9 @@ For API clients and LLVM developers.
:doc:`NVPTXUsage`
This document describes using the NVPTX back-end to compile GPU kernels.
:doc:`StackMaps`
LLVM support for mapping instruction addresses to the location of
values and allowing code to be patched.
Development Process Documentation
=================================

View File

@ -1,4 +1,11 @@
set(LLVM_LINK_COMPONENTS jit bitwriter nativecodegen interpreter)
set(LLVM_LINK_COMPONENTS
BitWriter
Core
ExecutionEngine
JIT
Support
nativecodegen
)
add_llvm_example(BrainF
BrainF.cpp

View File

@ -1,4 +1,11 @@
set(LLVM_LINK_COMPONENTS jit mcjit nativecodegen)
set(LLVM_LINK_COMPONENTS
Core
ExecutionEngine
MCJIT
Support
nativecodegen
)
set(LLVM_REQUIRES_EH 1)
add_llvm_example(ExceptionDemo

View File

@ -1,4 +1,11 @@
set(LLVM_LINK_COMPONENTS jit interpreter nativecodegen)
set(LLVM_LINK_COMPONENTS
Core
ExecutionEngine
Interpreter
JIT
Support
nativecodegen
)
add_llvm_example(Fibonacci
fibonacci.cpp

View File

@ -1,4 +1,11 @@
set(LLVM_LINK_COMPONENTS jit interpreter nativecodegen)
set(LLVM_LINK_COMPONENTS
Core
ExecutionEngine
Interpreter
JIT
Support
nativecodegen
)
add_llvm_example(HowToUseJIT
HowToUseJIT.cpp

View File

@ -1,4 +1,7 @@
set(LLVM_LINK_COMPONENTS core)
set(LLVM_LINK_COMPONENTS
Core
Support
)
add_llvm_example(Kaleidoscope-Ch3
toy.cpp

View File

@ -1,4 +1,13 @@
set(LLVM_LINK_COMPONENTS core jit interpreter native)
set(LLVM_LINK_COMPONENTS
Analysis
Core
ExecutionEngine
InstCombine
JIT
ScalarOpts
Support
nativecodegen
)
add_llvm_example(Kaleidoscope-Ch4
toy.cpp

View File

@ -1,4 +1,13 @@
set(LLVM_LINK_COMPONENTS core jit interpreter native)
set(LLVM_LINK_COMPONENTS
Analysis
Core
ExecutionEngine
InstCombine
JIT
ScalarOpts
Support
nativecodegen
)
add_llvm_example(Kaleidoscope-Ch5
toy.cpp

View File

@ -1,4 +1,13 @@
set(LLVM_LINK_COMPONENTS core jit interpreter native)
set(LLVM_LINK_COMPONENTS
Analysis
Core
ExecutionEngine
InstCombine
JIT
ScalarOpts
Support
nativecodegen
)
add_llvm_example(Kaleidoscope-Ch6
toy.cpp

View File

@ -1,4 +1,15 @@
set(LLVM_LINK_COMPONENTS core jit interpreter native)
set(LLVM_LINK_COMPONENTS
Analysis
Core
ExecutionEngine
InstCombine
JIT
ScalarOpts
Support
TransformUtils
nativecodegen
)
set(LLVM_REQUIRES_RTTI 1)
add_llvm_example(Kaleidoscope-Ch7

View File

@ -1,4 +1,8 @@
set(LLVM_LINK_COMPONENTS bitwriter)
set(LLVM_LINK_COMPONENTS
BitWriter
Core
Support
)
add_llvm_example(ModuleMaker
ModuleMaker.cpp

View File

@ -1,4 +1,11 @@
set(LLVM_LINK_COMPONENTS jit interpreter nativecodegen)
set(LLVM_LINK_COMPONENTS
Core
ExecutionEngine
Interpreter
JIT
Support
nativecodegen
)
add_llvm_example(ParallelJIT
ParallelJIT.cpp

View File

@ -167,7 +167,8 @@ typedef enum {
LLVMAddressSafety = 1ULL << 32,
LLVMStackProtectStrongAttribute = 1ULL<<33,
LLVMCold = 1ULL << 34,
LLVMOptimizeNone = 1ULL << 35
LLVMOptimizeNone = 1ULL << 35,
LLVMInAllocaAttribute = 1ULL << 36
*/
} LLVMAttribute;
@ -2663,7 +2664,9 @@ LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef, LLVMValueRef Val,
const char *Name);
LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef, LLVMValueRef LHS,
LLVMValueRef RHS, const char *Name);
LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op,
LLVMValueRef LLVMBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering ordering,
LLVMBool singleThread, const char *Name);
LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B, LLVMAtomicRMWBinOp op,
LLVMValueRef PTR, LLVMValueRef Val,
LLVMAtomicOrdering ordering,
LLVMBool singleThread);

View File

@ -41,6 +41,9 @@ void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM);
/** See llvm::createDeadStoreEliminationPass function. */
void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM);
/** See llvm::createScalarizerPass function. */
void LLVMAddScalarizerPass(LLVMPassManagerRef PM);
/** See llvm::createGVNPass function. */
void LLVMAddGVNPass(LLVMPassManagerRef PM);

View File

@ -48,7 +48,7 @@ class BlockFrequencyImpl {
typedef GraphTraits< Inverse<BlockT *> > GT;
const uint32_t EntryFreq;
static const uint64_t EntryFreq = 1 << 14;
std::string getBlockName(BasicBlock *BB) const {
return BB->getName().str();
@ -67,7 +67,8 @@ class BlockFrequencyImpl {
void setBlockFreq(BlockT *BB, BlockFrequency Freq) {
Freqs[BB] = Freq;
DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") = " << Freq << "\n");
DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") = ";
printBlockFreq(dbgs(), Freq) << "\n");
}
/// getEdgeFreq - Return edge frequency based on SRC frequency and Src -> Dst
@ -81,8 +82,9 @@ class BlockFrequencyImpl {
///
void incBlockFreq(BlockT *BB, BlockFrequency Freq) {
Freqs[BB] += Freq;
DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") += " << Freq
<< " --> " << Freqs[BB] << "\n");
DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") += ";
printBlockFreq(dbgs(), Freq) << " --> ";
printBlockFreq(dbgs(), Freqs[BB]) << "\n");
}
// All blocks in postorder.
@ -194,7 +196,8 @@ class BlockFrequencyImpl {
typename LoopExitProbMap::const_iterator I = LoopExitProb.find(BB);
assert(I != LoopExitProb.end() && "Loop header missing from table");
Freqs[BB] /= I->second;
DEBUG(dbgs() << "Loop header scaled to " << Freqs[BB] << ".\n");
DEBUG(dbgs() << "Loop header scaled to ";
printBlockFreq(dbgs(), Freqs[BB]) << ".\n");
}
/// doLoop - Propagate block frequency down through the loop.
@ -256,14 +259,15 @@ class BlockFrequencyImpl {
BranchProbability LEP = BranchProbability(N, D);
LoopExitProb.insert(std::make_pair(Head, LEP));
DEBUG(dbgs() << "LoopExitProb[" << getBlockName(Head) << "] = " << LEP
<< " from 1 - " << BackFreq << " / " << getBlockFreq(Head)
<< ".\n");
<< " from 1 - ";
printBlockFreq(dbgs(), BackFreq) << " / ";
printBlockFreq(dbgs(), getBlockFreq(Head)) << ".\n");
}
friend class BlockFrequencyInfo;
friend class MachineBlockFrequencyInfo;
BlockFrequencyImpl() : EntryFreq(BlockFrequency::getEntryFrequency()) { }
BlockFrequencyImpl() { }
void doFunction(FunctionT *fn, BlockProbInfoT *bpi) {
Fn = fn;
@ -312,6 +316,9 @@ class BlockFrequencyImpl {
}
public:
uint64_t getEntryFreq() { return EntryFreq; }
/// getBlockFreq - Return block frequency. Return 0 if we don't have it.
BlockFrequency getBlockFreq(const BlockT *BB) const {
typename DenseMap<const BlockT *, BlockFrequency>::const_iterator
@ -325,14 +332,15 @@ public:
OS << "\n\n---- Block Freqs ----\n";
for (typename FunctionT::iterator I = Fn->begin(), E = Fn->end(); I != E;) {
BlockT *BB = I++;
OS << " " << getBlockName(BB) << " = " << getBlockFreq(BB) << "\n";
OS << " " << getBlockName(BB) << " = ";
printBlockFreq(OS, getBlockFreq(BB)) << "\n";
for (typename GraphTraits<BlockT *>::ChildIteratorType
SI = GraphTraits<BlockT *>::child_begin(BB),
SE = GraphTraits<BlockT *>::child_end(BB); SI != SE; ++SI) {
BlockT *Succ = *SI;
OS << " " << getBlockName(BB) << " -> " << getBlockName(Succ)
<< " = " << getEdgeFreq(BB, Succ) << "\n";
<< " = "; printBlockFreq(OS, getEdgeFreq(BB, Succ)) << "\n";
}
}
}
@ -340,6 +348,30 @@ public:
void dump() const {
print(dbgs());
}
// Utility method that looks up the block frequency associated with BB and
// prints it to OS.
raw_ostream &printBlockFreq(raw_ostream &OS,
const BlockT *BB) {
return printBlockFreq(OS, getBlockFreq(BB));
}
raw_ostream &printBlockFreq(raw_ostream &OS,
const BlockFrequency &Freq) const {
// Convert fixed-point number to decimal.
uint64_t Frequency = Freq.getFrequency();
OS << Frequency / EntryFreq << ".";
uint64_t Rem = Frequency % EntryFreq;
uint64_t Eps = 1;
do {
Rem *= 10;
Eps *= 10;
OS << Rem / EntryFreq;
Rem = Rem % EntryFreq;
} while (Rem >= Eps/2);
return OS;
}
};
}

View File

@ -50,6 +50,17 @@ public:
/// comparison to the other block frequencies. We do this to avoid using of
/// floating points.
BlockFrequency getBlockFreq(const BasicBlock *BB) const;
// Print the block frequency Freq to OS using the current functions entry
// frequency to convert freq into a relative decimal form.
raw_ostream &printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const;
// Convenience method that attempts to look up the frequency associated with
// BB and print it to OS.
raw_ostream &printBlockFreq(raw_ostream &OS, const BasicBlock *BB) const;
uint64_t getEntryFreq() const;
};
}

View File

@ -16,6 +16,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/CFG.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
@ -98,6 +99,9 @@ public:
/// It is guaranteed to fall between 1 and UINT32_MAX.
uint32_t getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const;
uint32_t getEdgeWeight(const BasicBlock *Src,
succ_const_iterator Dst) const;
/// \brief Set the raw edge weight for a given edge.
///
/// This allows a pass to explicitly set the edge weight for an edge. It can

View File

@ -6,52 +6,54 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This interface is used to build and manipulate a call graph, which is a very
// useful tool for interprocedural optimization.
//
// Every function in a module is represented as a node in the call graph. The
// callgraph node keeps track of which functions the are called by the function
// corresponding to the node.
//
// A call graph may contain nodes where the function that they correspond to is
// null. These 'external' nodes are used to represent control flow that is not
// represented (or analyzable) in the module. In particular, this analysis
// builds one external node such that:
// 1. All functions in the module without internal linkage will have edges
// from this external node, indicating that they could be called by
// functions outside of the module.
// 2. All functions whose address is used for something more than a direct
// call, for example being stored into a memory location will also have an
// edge from this external node. Since they may be called by an unknown
// caller later, they must be tracked as such.
//
// There is a second external node added for calls that leave this module.
// Functions have a call edge to the external node iff:
// 1. The function is external, reflecting the fact that they could call
// anything without internal linkage or that has its address taken.
// 2. The function contains an indirect function call.
//
// As an extension in the future, there may be multiple nodes with a null
// function. These will be used when we can prove (through pointer analysis)
// that an indirect call site can call only a specific set of functions.
//
// Because of these properties, the CallGraph captures a conservative superset
// of all of the caller-callee relationships, which is useful for
// transformations.
//
// The CallGraph class also attempts to figure out what the root of the
// CallGraph is, which it currently does by looking for a function named 'main'.
// If no function named 'main' is found, the external node is used as the entry
// node, reflecting the fact that any function without internal linkage could
// be called into (which is common for libraries).
//
/// \file
///
/// This file provides interfaces used to build and manipulate a call graph,
/// which is a very useful tool for interprocedural optimization.
///
/// Every function in a module is represented as a node in the call graph. The
/// callgraph node keeps track of which functions the are called by the
/// function corresponding to the node.
///
/// A call graph may contain nodes where the function that they correspond to
/// is null. These 'external' nodes are used to represent control flow that is
/// not represented (or analyzable) in the module. In particular, this
/// analysis builds one external node such that:
/// 1. All functions in the module without internal linkage will have edges
/// from this external node, indicating that they could be called by
/// functions outside of the module.
/// 2. All functions whose address is used for something more than a direct
/// call, for example being stored into a memory location will also have
/// an edge from this external node. Since they may be called by an
/// unknown caller later, they must be tracked as such.
///
/// There is a second external node added for calls that leave this module.
/// Functions have a call edge to the external node iff:
/// 1. The function is external, reflecting the fact that they could call
/// anything without internal linkage or that has its address taken.
/// 2. The function contains an indirect function call.
///
/// As an extension in the future, there may be multiple nodes with a null
/// function. These will be used when we can prove (through pointer analysis)
/// that an indirect call site can call only a specific set of functions.
///
/// Because of these properties, the CallGraph captures a conservative superset
/// of all of the caller-callee relationships, which is useful for
/// transformations.
///
/// The CallGraph class also attempts to figure out what the root of the
/// CallGraph is, which it currently does by looking for a function named
/// 'main'. If no function named 'main' is found, the external node is used as
/// the entry node, reflecting the fact that any function without internal
/// linkage could be called into (which is common for libraries).
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_CALLGRAPH_H
#define LLVM_ANALYSIS_CALLGRAPH_H
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Function.h"
#include "llvm/Pass.h"
@ -66,171 +68,142 @@ class Function;
class Module;
class CallGraphNode;
//===----------------------------------------------------------------------===//
// CallGraph class definition
//
class CallGraph : public ModulePass {
Module *Mod; // The module this call graph represents
/// \brief The basic data container for the call graph of a \c Module of IR.
///
/// This class exposes both the interface to the call graph for a module of IR.
///
/// The core call graph itself can also be updated to reflect changes to the IR.
class CallGraph {
Module &M;
typedef std::map<const Function *, CallGraphNode *> FunctionMapTy;
FunctionMapTy FunctionMap; // Map from a function to its node
// Root is root of the call graph, or the external node if a 'main' function
// couldn't be found.
//
/// \brief A map from \c Function* to \c CallGraphNode*.
FunctionMapTy FunctionMap;
/// \brief Root is root of the call graph, or the external node if a 'main'
/// function couldn't be found.
CallGraphNode *Root;
// ExternalCallingNode - This node has edges to all external functions and
// those internal functions that have their address taken.
/// \brief This node has edges to all external functions and those internal
/// functions that have their address taken.
CallGraphNode *ExternalCallingNode;
// CallsExternalNode - This node has edges to it from all functions making
// indirect calls or calling an external function.
/// \brief This node has edges to it from all functions making indirect calls
/// or calling an external function.
CallGraphNode *CallsExternalNode;
/// Replace the function represented by this node by another.
/// \brief Replace the function represented by this node by another.
///
/// This does not rescan the body of the function, so it is suitable when
/// splicing the body of one function to another while also updating all
/// callers from the old function to the new.
///
void spliceFunction(const Function *From, const Function *To);
// Add a function to the call graph, and link the node to all of the functions
// that it calls.
/// \brief Add a function to the call graph, and link the node to all of the
/// functions that it calls.
void addToCallGraph(Function *F);
public:
static char ID; // Class identification, replacement for typeinfo
//===---------------------------------------------------------------------
// Accessors.
//
CallGraph(Module &M);
~CallGraph();
void print(raw_ostream &OS) const;
void dump() const;
typedef FunctionMapTy::iterator iterator;
typedef FunctionMapTy::const_iterator const_iterator;
/// getModule - Return the module the call graph corresponds to.
///
Module &getModule() const { return *Mod; }
/// \brief Returns the module the call graph corresponds to.
Module &getModule() const { return M; }
inline iterator begin() { return FunctionMap.begin(); }
inline iterator end() { return FunctionMap.end(); }
inline iterator begin() { return FunctionMap.begin(); }
inline iterator end() { return FunctionMap.end(); }
inline const_iterator begin() const { return FunctionMap.begin(); }
inline const_iterator end() const { return FunctionMap.end(); }
inline const_iterator end() const { return FunctionMap.end(); }
// Subscripting operators, return the call graph node for the provided
// function
/// \brief Returns the call graph node for the provided function.
inline const CallGraphNode *operator[](const Function *F) const {
const_iterator I = FunctionMap.find(F);
assert(I != FunctionMap.end() && "Function not in callgraph!");
return I->second;
}
/// \brief Returns the call graph node for the provided function.
inline CallGraphNode *operator[](const Function *F) {
const_iterator I = FunctionMap.find(F);
assert(I != FunctionMap.end() && "Function not in callgraph!");
return I->second;
}
/// Returns the CallGraphNode which is used to represent undetermined calls
/// into the callgraph.
/// \brief Returns the \c CallGraphNode which is used to represent
/// undetermined calls into the callgraph.
CallGraphNode *getExternalCallingNode() const { return ExternalCallingNode; }
CallGraphNode *getCallsExternalNode() const { return CallsExternalNode; }
/// Return the root/main method in the module, or some other root node, such
/// as the externalcallingnode.
CallGraphNode *getRoot() { return Root; }
const CallGraphNode *getRoot() const { return Root; }
CallGraphNode *getCallsExternalNode() const { return CallsExternalNode; }
//===---------------------------------------------------------------------
// Functions to keep a call graph up to date with a function that has been
// modified.
//
/// removeFunctionFromModule - Unlink the function from this module, returning
/// it. Because this removes the function from the module, the call graph
/// node is destroyed. This is only valid if the function does not call any
/// other functions (ie, there are no edges in it's CGN). The easiest way to
/// do this is to dropAllReferences before calling this.
/// \brief Unlink the function from this module, returning it.
///
/// Because this removes the function from the module, the call graph node is
/// destroyed. This is only valid if the function does not call any other
/// functions (ie, there are no edges in it's CGN). The easiest way to do
/// this is to dropAllReferences before calling this.
Function *removeFunctionFromModule(CallGraphNode *CGN);
/// getOrInsertFunction - This method is identical to calling operator[], but
/// it will insert a new CallGraphNode for the specified function if one does
/// not already exist.
/// \brief Similar to operator[], but this will insert a new CallGraphNode for
/// \c F if one does not already exist.
CallGraphNode *getOrInsertFunction(const Function *F);
CallGraph();
virtual ~CallGraph() { releaseMemory(); }
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
virtual bool runOnModule(Module &M);
virtual void releaseMemory();
void print(raw_ostream &o, const Module *) const;
void dump() const;
};
//===----------------------------------------------------------------------===//
// CallGraphNode class definition.
//
/// \brief A node in the call graph for a module.
///
/// Typically represents a function in the call graph. There are also special
/// "null" nodes used to represent theoretical entries in the call graph.
class CallGraphNode {
friend class CallGraph;
AssertingVH<Function> F;
// CallRecord - This is a pair of the calling instruction (a call or invoke)
// and the callgraph node being called.
public:
typedef std::pair<WeakVH, CallGraphNode*> CallRecord;
private:
std::vector<CallRecord> CalledFunctions;
/// NumReferences - This is the number of times that this CallGraphNode occurs
/// in the CalledFunctions array of this or other CallGraphNodes.
unsigned NumReferences;
/// \brief A pair of the calling instruction (a call or invoke)
/// and the call graph node being called.
typedef std::pair<WeakVH, CallGraphNode *> CallRecord;
CallGraphNode(const CallGraphNode &) LLVM_DELETED_FUNCTION;
void operator=(const CallGraphNode &) LLVM_DELETED_FUNCTION;
void DropRef() { --NumReferences; }
void AddRef() { ++NumReferences; }
public:
typedef std::vector<CallRecord> CalledFunctionsVector;
// CallGraphNode ctor - Create a node for the specified function.
inline CallGraphNode(Function *f) : F(f), NumReferences(0) {}
/// \brief Creates a node for the specified function.
inline CallGraphNode(Function *F) : F(F), NumReferences(0) {}
~CallGraphNode() {
assert(NumReferences == 0 && "Node deleted while references remain");
}
//===---------------------------------------------------------------------
// Accessor methods.
//
typedef std::vector<CallRecord>::iterator iterator;
typedef std::vector<CallRecord>::const_iterator const_iterator;
// getFunction - Return the function that this call graph node represents.
/// \brief Returns the function that this call graph node represents.
Function *getFunction() const { return F; }
inline iterator begin() { return CalledFunctions.begin(); }
inline iterator end() { return CalledFunctions.end(); }
inline iterator end() { return CalledFunctions.end(); }
inline const_iterator begin() const { return CalledFunctions.begin(); }
inline const_iterator end() const { return CalledFunctions.end(); }
inline const_iterator end() const { return CalledFunctions.end(); }
inline bool empty() const { return CalledFunctions.empty(); }
inline unsigned size() const { return (unsigned)CalledFunctions.size(); }
/// getNumReferences - Return the number of other CallGraphNodes in this
/// CallGraph that reference this node in their callee list.
/// \brief Returns the number of other CallGraphNodes in this CallGraph that
/// reference this node in their callee list.
unsigned getNumReferences() const { return NumReferences; }
// Subscripting operator - Return the i'th called function.
//
/// \brief Returns the i'th called function.
CallGraphNode *operator[](unsigned i) const {
assert(i < CalledFunctions.size() && "Invalid index");
return CalledFunctions[i].second;
}
/// dump - Print out this call graph node.
///
/// \brief Print out this call graph node.
void dump() const;
void print(raw_ostream &OS) const;
@ -239,29 +212,25 @@ public:
// modified
//
/// removeAllCalledFunctions - As the name implies, this removes all edges
/// from this CallGraphNode to any functions it calls.
/// \brief Removes all edges from this CallGraphNode to any functions it
/// calls.
void removeAllCalledFunctions() {
while (!CalledFunctions.empty()) {
CalledFunctions.back().second->DropRef();
CalledFunctions.pop_back();
}
}
/// stealCalledFunctionsFrom - Move all the callee information from N to this
/// node.
/// \brief Moves all the callee information from N to this node.
void stealCalledFunctionsFrom(CallGraphNode *N) {
assert(CalledFunctions.empty() &&
"Cannot steal callsite information if I already have some");
std::swap(CalledFunctions, N->CalledFunctions);
}
/// addCalledFunction - Add a function to the list of functions called by this
/// one.
/// \brief Adds a function to the list of functions called by this one.
void addCalledFunction(CallSite CS, CallGraphNode *M) {
assert(!CS.getInstruction() ||
!CS.getCalledFunction() ||
assert(!CS.getInstruction() || !CS.getCalledFunction() ||
!CS.getCalledFunction()->isIntrinsic());
CalledFunctions.push_back(std::make_pair(CS.getInstruction(), M));
M->AddRef();
@ -272,32 +241,152 @@ public:
*I = CalledFunctions.back();
CalledFunctions.pop_back();
}
/// removeCallEdgeFor - This method removes the edge in the node for the
/// specified call site. Note that this method takes linear time, so it
/// should be used sparingly.
/// \brief Removes the edge in the node for the specified call site.
///
/// Note that this method takes linear time, so it should be used sparingly.
void removeCallEdgeFor(CallSite CS);
/// removeAnyCallEdgeTo - This method removes all call edges from this node
/// to the specified callee function. This takes more time to execute than
/// removeCallEdgeTo, so it should not be used unless necessary.
/// \brief Removes all call edges from this node to the specified callee
/// function.
///
/// This takes more time to execute than removeCallEdgeTo, so it should not
/// be used unless necessary.
void removeAnyCallEdgeTo(CallGraphNode *Callee);
/// removeOneAbstractEdgeTo - Remove one edge associated with a null callsite
/// from this node to the specified callee function.
/// \brief Removes one edge associated with a null callsite from this node to
/// the specified callee function.
void removeOneAbstractEdgeTo(CallGraphNode *Callee);
/// replaceCallEdge - This method replaces the edge in the node for the
/// specified call site with a new one. Note that this method takes linear
/// time, so it should be used sparingly.
/// \brief Replaces the edge in the node for the specified call site with a
/// new one.
///
/// Note that this method takes linear time, so it should be used sparingly.
void replaceCallEdge(CallSite CS, CallSite NewCS, CallGraphNode *NewNode);
/// allReferencesDropped - This is a special function that should only be
/// used by the CallGraph class.
void allReferencesDropped() {
NumReferences = 0;
private:
friend class CallGraph;
AssertingVH<Function> F;
std::vector<CallRecord> CalledFunctions;
/// \brief The number of times that this CallGraphNode occurs in the
/// CalledFunctions array of this or other CallGraphNodes.
unsigned NumReferences;
CallGraphNode(const CallGraphNode &) LLVM_DELETED_FUNCTION;
void operator=(const CallGraphNode &) LLVM_DELETED_FUNCTION;
void DropRef() { --NumReferences; }
void AddRef() { ++NumReferences; }
/// \brief A special function that should only be used by the CallGraph class.
void allReferencesDropped() { NumReferences = 0; }
};
/// \brief An analysis pass to compute the \c CallGraph for a \c Module.
///
/// This class implements the concept of an analysis pass used by the \c
/// ModuleAnalysisManager to run an analysis over a module and cache the
/// resulting data.
class CallGraphAnalysis {
public:
/// \brief A formulaic typedef to inform clients of the result type.
typedef CallGraph Result;
static void *ID() { return (void *)&PassID; }
/// \brief Compute the \c CallGraph for the module \c M.
///
/// The real work here is done in the \c CallGraph constructor.
CallGraph run(Module *M) { return CallGraph(*M); }
private:
static char PassID;
};
/// \brief The \c ModulePass which wraps up a \c CallGraph and the logic to
/// build it.
///
/// This class exposes both the interface to the call graph container and the
/// module pass which runs over a module of IR and produces the call graph. The
/// call graph interface is entirelly a wrapper around a \c CallGraph object
/// which is stored internally for each module.
class CallGraphWrapperPass : public ModulePass {
OwningPtr<CallGraph> G;
public:
static char ID; // Class identification, replacement for typeinfo
CallGraphWrapperPass();
virtual ~CallGraphWrapperPass();
/// \brief The internal \c CallGraph around which the rest of this interface
/// is wrapped.
const CallGraph &getCallGraph() const { return *G; }
CallGraph &getCallGraph() { return *G; }
typedef CallGraph::iterator iterator;
typedef CallGraph::const_iterator const_iterator;
/// \brief Returns the module the call graph corresponds to.
Module &getModule() const { return G->getModule(); }
inline iterator begin() { return G->begin(); }
inline iterator end() { return G->end(); }
inline const_iterator begin() const { return G->begin(); }
inline const_iterator end() const { return G->end(); }
/// \brief Returns the call graph node for the provided function.
inline const CallGraphNode *operator[](const Function *F) const {
return (*G)[F];
}
/// \brief Returns the call graph node for the provided function.
inline CallGraphNode *operator[](const Function *F) { return (*G)[F]; }
/// \brief Returns the \c CallGraphNode which is used to represent
/// undetermined calls into the callgraph.
CallGraphNode *getExternalCallingNode() const {
return G->getExternalCallingNode();
}
CallGraphNode *getCallsExternalNode() const {
return G->getCallsExternalNode();
}
//===---------------------------------------------------------------------
// Functions to keep a call graph up to date with a function that has been
// modified.
//
/// \brief Unlink the function from this module, returning it.
///
/// Because this removes the function from the module, the call graph node is
/// destroyed. This is only valid if the function does not call any other
/// functions (ie, there are no edges in it's CGN). The easiest way to do
/// this is to dropAllReferences before calling this.
Function *removeFunctionFromModule(CallGraphNode *CGN) {
return G->removeFunctionFromModule(CGN);
}
/// \brief Similar to operator[], but this will insert a new CallGraphNode for
/// \c F if one does not already exist.
CallGraphNode *getOrInsertFunction(const Function *F) {
return G->getOrInsertFunction(F);
}
//===---------------------------------------------------------------------
// Implementation of the ModulePass interface needed here.
//
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
virtual bool runOnModule(Module &M);
virtual void releaseMemory();
void print(raw_ostream &o, const Module *) const;
void dump() const;
};
//===----------------------------------------------------------------------===//
@ -307,11 +396,12 @@ public:
// Provide graph traits for tranversing call graphs using standard graph
// traversals.
template <> struct GraphTraits<CallGraphNode*> {
template <> struct GraphTraits<CallGraphNode *> {
typedef CallGraphNode NodeType;
typedef CallGraphNode::CallRecord CGNPairTy;
typedef std::pointer_to_unary_function<CGNPairTy, CallGraphNode*> CGNDerefFun;
typedef std::pointer_to_unary_function<CGNPairTy, CallGraphNode *>
CGNDerefFun;
static NodeType *getEntryNode(CallGraphNode *CGN) { return CGN; }
@ -320,55 +410,54 @@ template <> struct GraphTraits<CallGraphNode*> {
static inline ChildIteratorType child_begin(NodeType *N) {
return map_iterator(N->begin(), CGNDerefFun(CGNDeref));
}
static inline ChildIteratorType child_end (NodeType *N) {
static inline ChildIteratorType child_end(NodeType *N) {
return map_iterator(N->end(), CGNDerefFun(CGNDeref));
}
static CallGraphNode *CGNDeref(CGNPairTy P) {
return P.second;
}
static CallGraphNode *CGNDeref(CGNPairTy P) { return P.second; }
};
template <> struct GraphTraits<const CallGraphNode*> {
template <> struct GraphTraits<const CallGraphNode *> {
typedef const CallGraphNode NodeType;
typedef NodeType::const_iterator ChildIteratorType;
static NodeType *getEntryNode(const CallGraphNode *CGN) { return CGN; }
static inline ChildIteratorType child_begin(NodeType *N) { return N->begin();}
static inline ChildIteratorType child_end (NodeType *N) { return N->end(); }
static inline ChildIteratorType child_begin(NodeType *N) {
return N->begin();
}
static inline ChildIteratorType child_end(NodeType *N) { return N->end(); }
};
template<> struct GraphTraits<CallGraph*> : public GraphTraits<CallGraphNode*> {
template <>
struct GraphTraits<CallGraph *> : public GraphTraits<CallGraphNode *> {
static NodeType *getEntryNode(CallGraph *CGN) {
return CGN->getExternalCallingNode(); // Start at the external node!
return CGN->getExternalCallingNode(); // Start at the external node!
}
typedef std::pair<const Function*, CallGraphNode*> PairTy;
typedef std::pointer_to_unary_function<PairTy, CallGraphNode&> DerefFun;
typedef std::pair<const Function *, CallGraphNode *> PairTy;
typedef std::pointer_to_unary_function<PairTy, CallGraphNode &> DerefFun;
// nodes_iterator/begin/end - Allow iteration over all nodes in the graph
typedef mapped_iterator<CallGraph::iterator, DerefFun> nodes_iterator;
static nodes_iterator nodes_begin(CallGraph *CG) {
return map_iterator(CG->begin(), DerefFun(CGdereference));
}
static nodes_iterator nodes_end (CallGraph *CG) {
static nodes_iterator nodes_end(CallGraph *CG) {
return map_iterator(CG->end(), DerefFun(CGdereference));
}
static CallGraphNode &CGdereference(PairTy P) {
return *P.second;
}
static CallGraphNode &CGdereference(PairTy P) { return *P.second; }
};
template<> struct GraphTraits<const CallGraph*> :
public GraphTraits<const CallGraphNode*> {
template <>
struct GraphTraits<const CallGraph *> : public GraphTraits<
const CallGraphNode *> {
static NodeType *getEntryNode(const CallGraph *CGN) {
return CGN->getExternalCallingNode();
}
// nodes_iterator/begin/end - Allow iteration over all nodes in the graph
typedef CallGraph::const_iterator nodes_iterator;
static nodes_iterator nodes_begin(const CallGraph *CG) { return CG->begin(); }
static nodes_iterator nodes_end (const CallGraph *CG) { return CG->end(); }
static nodes_iterator nodes_end(const CallGraph *CG) { return CG->end(); }
};
} // End llvm namespace

View File

@ -19,50 +19,62 @@
namespace llvm {
template <class Analysis, bool Simple>
/// \brief Default traits class for extracting a graph from an analysis pass.
///
/// This assumes that 'GraphT' is 'AnalysisT *' and so just passes it through.
template <typename AnalysisT, typename GraphT = AnalysisT *>
struct DefaultAnalysisGraphTraits {
static GraphT getGraph(AnalysisT *A) { return A; }
};
template <
typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *,
typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT> >
class DOTGraphTraitsViewer : public FunctionPass {
public:
DOTGraphTraitsViewer(StringRef GraphName, char &ID)
: FunctionPass(ID), Name(GraphName) {}
: FunctionPass(ID), Name(GraphName) {}
virtual bool runOnFunction(Function &F) {
Analysis *Graph = &getAnalysis<Analysis>();
std::string GraphName = DOTGraphTraits<Analysis*>::getGraphName(Graph);
GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis<AnalysisT>());
std::string GraphName = DOTGraphTraits<GraphT>::getGraphName(Graph);
std::string Title = GraphName + " for '" + F.getName().str() + "' function";
ViewGraph(Graph, Name, Simple, Title);
ViewGraph(Graph, Name, IsSimple, Title);
return false;
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<Analysis>();
AU.addRequired<AnalysisT>();
}
private:
std::string Name;
};
template <class Analysis, bool Simple>
template <
typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *,
typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT> >
class DOTGraphTraitsPrinter : public FunctionPass {
public:
DOTGraphTraitsPrinter(StringRef GraphName, char &ID)
: FunctionPass(ID), Name(GraphName) {}
: FunctionPass(ID), Name(GraphName) {}
virtual bool runOnFunction(Function &F) {
Analysis *Graph = &getAnalysis<Analysis>();
GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis<AnalysisT>());
std::string Filename = Name + "." + F.getName().str() + ".dot";
std::string ErrorInfo;
errs() << "Writing '" << Filename << "'...";
raw_fd_ostream File(Filename.c_str(), ErrorInfo);
std::string GraphName = DOTGraphTraits<Analysis*>::getGraphName(Graph);
std::string GraphName = DOTGraphTraits<GraphT>::getGraphName(Graph);
std::string Title = GraphName + " for '" + F.getName().str() + "' function";
if (ErrorInfo.empty())
WriteGraph(File, Graph, Simple, Title);
WriteGraph(File, Graph, IsSimple, Title);
else
errs() << " error opening file for writing!";
errs() << "\n";
@ -72,55 +84,59 @@ public:
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<Analysis>();
AU.addRequired<AnalysisT>();
}
private:
std::string Name;
};
template <class Analysis, bool Simple>
template <
typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *,
typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT> >
class DOTGraphTraitsModuleViewer : public ModulePass {
public:
DOTGraphTraitsModuleViewer(StringRef GraphName, char &ID)
: ModulePass(ID), Name(GraphName) {}
: ModulePass(ID), Name(GraphName) {}
virtual bool runOnModule(Module &M) {
Analysis *Graph = &getAnalysis<Analysis>();
std::string Title = DOTGraphTraits<Analysis*>::getGraphName(Graph);
GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis<AnalysisT>());
std::string Title = DOTGraphTraits<GraphT>::getGraphName(Graph);
ViewGraph(Graph, Name, Simple, Title);
ViewGraph(Graph, Name, IsSimple, Title);
return false;
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<Analysis>();
AU.addRequired<AnalysisT>();
}
private:
std::string Name;
};
template <class Analysis, bool Simple>
template <
typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *,
typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT> >
class DOTGraphTraitsModulePrinter : public ModulePass {
public:
DOTGraphTraitsModulePrinter(StringRef GraphName, char &ID)
: ModulePass(ID), Name(GraphName) {}
: ModulePass(ID), Name(GraphName) {}
virtual bool runOnModule(Module &M) {
Analysis *Graph = &getAnalysis<Analysis>();
GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis<AnalysisT>());
std::string Filename = Name + ".dot";
std::string ErrorInfo;
errs() << "Writing '" << Filename << "'...";
raw_fd_ostream File(Filename.c_str(), ErrorInfo);
std::string Title = DOTGraphTraits<Analysis*>::getGraphName(Graph);
std::string Title = DOTGraphTraits<GraphT>::getGraphName(Graph);
if (ErrorInfo.empty())
WriteGraph(File, Graph, Simple, Title);
WriteGraph(File, Graph, IsSimple, Title);
else
errs() << " error opening file for writing!";
errs() << "\n";
@ -130,7 +146,7 @@ public:
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<Analysis>();
AU.addRequired<AnalysisT>();
}
private:

View File

@ -346,12 +346,14 @@ public:
DomTreeNodeBase<NodeT> *getRootNode() { return RootNode; }
const DomTreeNodeBase<NodeT> *getRootNode() const { return RootNode; }
/// Get all nodes dominated by R, including R itself. Return true on success.
/// Get all nodes dominated by R, including R itself.
void getDescendants(NodeT *R, SmallVectorImpl<NodeT *> &Result) const {
Result.clear();
const DomTreeNodeBase<NodeT> *RN = getNode(R);
if (RN == NULL)
return; // If R is unreachable, it will not be present in the DOM tree.
SmallVector<const DomTreeNodeBase<NodeT> *, 8> WL;
WL.push_back(RN);
Result.clear();
while (!WL.empty()) {
const DomTreeNodeBase<NodeT> *N = WL.pop_back_val();
@ -769,7 +771,7 @@ public:
return DT->getRootNode();
}
/// Get all nodes dominated by R, including R itself. Return true on success.
/// Get all nodes dominated by R, including R itself.
void getDescendants(BasicBlock *R,
SmallVectorImpl<BasicBlock *> &Result) const {
DT->getDescendants(R, Result);

View File

@ -34,7 +34,7 @@ namespace llvm {
// IntervalPartition - This class builds and holds an "interval partition" for
// a function. This partition divides the control flow graph into a set of
// maximal intervals, as defined with the properties above. Intuitively, an
// interval is a (possibly nonexistent) loop with a "tail" of non looping
// interval is a (possibly nonexistent) loop with a "tail" of non-looping
// nodes following it.
//
class IntervalPartition : public FunctionPass {

View File

@ -33,8 +33,10 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/IR/Instruction.h"
#include "llvm/Support/CFG.h"
#include "llvm/Pass.h"
#include <algorithm>
@ -53,6 +55,7 @@ class Loop;
class MDNode;
class PHINode;
class raw_ostream;
template<class N> class DominatorTreeBase;
template<class N, class M> class LoopInfoBase;
template<class N, class M> class LoopBase;
@ -228,6 +231,18 @@ public:
/// A latch block is a block that contains a branch back to the header.
BlockT *getLoopLatch() const;
/// getLoopLatches - Return all loop latch blocks of this loop. A latch block
/// is a block that contains a branch back to the header.
void getLoopLatches(SmallVectorImpl<BlockT *> &LoopLatches) const {
BlockT *H = getHeader();
typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
for (typename InvBlockTraits::ChildIteratorType I =
InvBlockTraits::child_begin(H),
E = InvBlockTraits::child_end(H); I != E; ++I)
if (contains(*I))
LoopLatches.push_back(*I);
}
//===--------------------------------------------------------------------===//
// APIs for updating loop information after changing the CFG
//

View File

@ -15,8 +15,10 @@
#ifndef LLVM_ANALYSIS_LOOPINFOIMPL_H
#define LLVM_ANALYSIS_LOOPINFOIMPL_H
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
namespace llvm {

View File

@ -190,6 +190,8 @@ public:
return knownSize(SizeOffset) && knownOffset(SizeOffset);
}
// These are "private", except they can't actually be made private. Only
// compute() should be used by external users.
SizeOffsetType visitAllocaInst(AllocaInst &I);
SizeOffsetType visitArgument(Argument &A);
SizeOffsetType visitCallSite(CallSite CS);
@ -256,6 +258,7 @@ public:
return knownSize(SizeOffset) && knownOffset(SizeOffset);
}
// The individual instruction visitors should be treated as private.
SizeOffsetEvalType visitAllocaInst(AllocaInst &I);
SizeOffsetEvalType visitCallSite(CallSite CS);
SizeOffsetEvalType visitExtractElementInst(ExtractElementInst &I);

View File

@ -79,6 +79,12 @@ struct PostDominatorTree : public FunctionPass {
return DT->findNearestCommonDominator(A, B);
}
/// Get all nodes post-dominated by R, including R itself.
void getDescendants(BasicBlock *R,
SmallVectorImpl<BasicBlock *> &Result) const {
DT->getDescendants(R, Result);
}
virtual void releaseMemory() {
DT->releaseMemory();
}

View File

@ -312,11 +312,11 @@ public:
/// The toplevel region represents the whole function.
bool isTopLevelRegion() const { return exit == NULL; }
/// @brief Return a new (non canonical) region, that is obtained by joining
/// @brief Return a new (non-canonical) region, that is obtained by joining
/// this region with its predecessors.
///
/// @return A region also starting at getEntry(), but reaching to the next
/// basic block that forms with getEntry() a (non canonical) region.
/// basic block that forms with getEntry() a (non-canonical) region.
/// NULL if such a basic block does not exist.
Region *getExpandedRegion() const;

View File

@ -370,7 +370,8 @@ namespace bitc {
ATTR_KIND_Z_EXT = 34,
ATTR_KIND_BUILTIN = 35,
ATTR_KIND_COLD = 36,
ATTR_KIND_OPTIMIZE_NONE = 37
ATTR_KIND_OPTIMIZE_NONE = 37,
ATTR_KIND_IN_ALLOCA = 38
};
} // End bitc namespace

View File

@ -22,6 +22,7 @@
#include "llvm/Support/ErrorHandling.h"
namespace llvm {
class AsmPrinterHandler;
class BlockAddress;
class GCStrategy;
class Constant;
@ -110,13 +111,21 @@ namespace llvm {
/// function.
MachineLoopInfo *LI;
struct HandlerInfo {
AsmPrinterHandler *Handler;
const char *TimerName, *TimerGroupName;
HandlerInfo(AsmPrinterHandler *Handler, const char *TimerName,
const char *TimerGroupName)
: Handler(Handler), TimerName(TimerName),
TimerGroupName(TimerGroupName) {}
};
/// Handlers - a vector of all debug/EH info emitters we should use.
/// This vector maintains ownership of the emitters.
SmallVector<HandlerInfo, 1> Handlers;
/// DD - If the target supports dwarf debug info, this pointer is non-null.
DwarfDebug *DD;
/// DE - If the target supports dwarf exception info, this pointer is
/// non-null.
DwarfException *DE;
protected:
explicit AsmPrinter(TargetMachine &TM, MCStreamer &Streamer);
@ -200,11 +209,6 @@ namespace llvm {
bool needsSEHMoves();
/// needsRelocationsForDwarfStringPool - Specifies whether the object format
/// expects to use relocations to refer to debug entries. Alternatively we
/// emit section offsets in bytes from the start of the string pool.
bool needsRelocationsForDwarfStringPool() const;
/// EmitConstantPool - Print to the current output stream assembly
/// representations of the constants in the constant pool MCP. This is
/// used to print out constants which have been "spilled to memory" by
@ -304,13 +308,10 @@ namespace llvm {
/// stem.
MCSymbol *GetTempSymbol(StringRef Name) const;
/// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with
/// global value name as its base, with the specified suffix, and where the
/// symbol is forced to have private linkage if ForcePrivate is true.
MCSymbol *GetSymbolWithGlobalValueBase(const GlobalValue *GV,
StringRef Suffix,
bool ForcePrivate = true) const;
/// Return the MCSymbol for a private symbol with global value name as its
/// base, with the specified suffix.
MCSymbol *getSymbolWithGlobalValueBase(const GlobalValue *GV,
StringRef Suffix) const;
/// GetExternalSymbolSymbol - Return the MCSymbol for the specified
/// ExternalSymbol.

View File

@ -603,7 +603,7 @@ namespace ISD {
/// This corresponds to "load atomic" instruction.
ATOMIC_LOAD,
/// OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr, val)
/// OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val)
/// This corresponds to "store atomic" instruction.
ATOMIC_STORE,

View File

@ -44,32 +44,35 @@ typedef std::pair<const MachineInstr *, const MachineInstr *> InsnRange;
///
class LexicalScopes {
public:
LexicalScopes() : MF(NULL), CurrentFnLexicalScope(NULL) { }
virtual ~LexicalScopes();
LexicalScopes() : MF(NULL), CurrentFnLexicalScope(NULL) {}
~LexicalScopes();
/// initialize - Scan machine function and constuct lexical scope nest.
virtual void initialize(const MachineFunction &);
/// initialize - Scan machine function and constuct lexical scope nest, resets
/// the instance if necessary.
void initialize(const MachineFunction &);
/// releaseMemory - release memory.
virtual void releaseMemory();
void reset();
/// empty - Return true if there is any lexical scope information available.
bool empty() { return CurrentFnLexicalScope == NULL; }
/// isCurrentFunctionScope - Return true if given lexical scope represents
/// isCurrentFunctionScope - Return true if given lexical scope represents
/// current function.
bool isCurrentFunctionScope(const LexicalScope *LS) {
bool isCurrentFunctionScope(const LexicalScope *LS) {
return LS == CurrentFnLexicalScope;
}
/// getCurrentFunctionScope - Return lexical scope for the current function.
LexicalScope *getCurrentFunctionScope() const { return CurrentFnLexicalScope;}
LexicalScope *getCurrentFunctionScope() const {
return CurrentFnLexicalScope;
}
/// getMachineBasicBlocks - Populate given set using machine basic blocks
/// which have machine instructions that belong to lexical scope identified by
/// DebugLoc.
void getMachineBasicBlocks(DebugLoc DL,
SmallPtrSet<const MachineBasicBlock*, 4> &MBBs);
SmallPtrSet<const MachineBasicBlock *, 4> &MBBs);
/// dominates - Return true if DebugLoc's lexical scope dominates at least one
/// machine instruction's lexical scope in a given machine basic block.
@ -104,7 +107,6 @@ public:
void dump();
private:
/// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If
/// not available then create new lexical scope.
LexicalScope *getOrCreateLexicalScope(DebugLoc DL);
@ -123,8 +125,9 @@ private:
void extractLexicalScopes(SmallVectorImpl<InsnRange> &MIRanges,
DenseMap<const MachineInstr *, LexicalScope *> &M);
void constructScopeNest(LexicalScope *Scope);
void assignInstructionRanges(SmallVectorImpl<InsnRange> &MIRanges,
DenseMap<const MachineInstr *, LexicalScope *> &M);
void
assignInstructionRanges(SmallVectorImpl<InsnRange> &MIRanges,
DenseMap<const MachineInstr *, LexicalScope *> &M);
private:
const MachineFunction *MF;
@ -133,10 +136,11 @@ private:
/// contained LexicalScope*s.
DenseMap<const MDNode *, LexicalScope *> LexicalScopeMap;
/// InlinedLexicalScopeMap - Tracks inlined function scopes in current function.
/// InlinedLexicalScopeMap - Tracks inlined function scopes in current
/// function.
DenseMap<DebugLoc, LexicalScope *> InlinedLexicalScopeMap;
/// AbstractScopeMap - These scopes are not included LexicalScopeMap.
/// AbstractScopeMap - These scopes are not included LexicalScopeMap.
/// AbstractScopes owns its LexicalScope*s.
DenseMap<const MDNode *, LexicalScope *> AbstractScopeMap;
@ -153,26 +157,23 @@ private:
/// LexicalScope - This class is used to track scope information.
///
class LexicalScope {
virtual void anchor();
public:
LexicalScope(LexicalScope *P, const MDNode *D, const MDNode *I, bool A)
: Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(A),
LastInsn(0), FirstInsn(0), DFSIn(0), DFSOut(0) {
: Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(A), LastInsn(0),
FirstInsn(0), DFSIn(0), DFSOut(0) {
if (Parent)
Parent->addChild(this);
}
virtual ~LexicalScope() {}
// Accessors.
LexicalScope *getParent() const { return Parent; }
const MDNode *getDesc() const { return Desc; }
const MDNode *getInlinedAt() const { return InlinedAtLocation; }
const MDNode *getScopeNode() const { return Desc; }
bool isAbstractScope() const { return AbstractScope; }
LexicalScope *getParent() const { return Parent; }
const MDNode *getDesc() const { return Desc; }
const MDNode *getInlinedAt() const { return InlinedAtLocation; }
const MDNode *getScopeNode() const { return Desc; }
bool isAbstractScope() const { return AbstractScope; }
SmallVectorImpl<LexicalScope *> &getChildren() { return Children; }
SmallVectorImpl<InsnRange> &getRanges() { return Ranges; }
SmallVectorImpl<InsnRange> &getRanges() { return Ranges; }
/// addChild - Add a child scope.
void addChild(LexicalScope *S) { Children.push_back(S); }
@ -189,7 +190,7 @@ public:
/// extendInsnRange - Extend the current instruction range covered by
/// this scope.
void extendInsnRange(const MachineInstr *MI) {
assert (FirstInsn && "MI Range is not open!");
assert(FirstInsn && "MI Range is not open!");
LastInsn = MI;
if (Parent)
Parent->extendInsnRange(MI);
@ -199,7 +200,7 @@ public:
/// until now. This is used when a new scope is encountered while walking
/// machine instructions.
void closeInsnRange(LexicalScope *NewScope = NULL) {
assert (LastInsn && "Last insn missing!");
assert(LastInsn && "Last insn missing!");
Ranges.push_back(InsnRange(FirstInsn, LastInsn));
FirstInsn = NULL;
LastInsn = NULL;
@ -219,28 +220,28 @@ public:
}
// Depth First Search support to walk and manipulate LexicalScope hierarchy.
unsigned getDFSOut() const { return DFSOut; }
void setDFSOut(unsigned O) { DFSOut = O; }
unsigned getDFSIn() const { return DFSIn; }
void setDFSIn(unsigned I) { DFSIn = I; }
unsigned getDFSOut() const { return DFSOut; }
void setDFSOut(unsigned O) { DFSOut = O; }
unsigned getDFSIn() const { return DFSIn; }
void setDFSIn(unsigned I) { DFSIn = I; }
/// dump - print lexical scope.
void dump(unsigned Indent = 0) const;
private:
LexicalScope *Parent; // Parent to this scope.
AssertingVH<const MDNode> Desc; // Debug info descriptor.
AssertingVH<const MDNode> InlinedAtLocation; // Location at which this
// scope is inlined.
bool AbstractScope; // Abstract Scope
SmallVector<LexicalScope *, 4> Children; // Scopes defined in scope.
// Contents not owned.
LexicalScope *Parent; // Parent to this scope.
AssertingVH<const MDNode> Desc; // Debug info descriptor.
AssertingVH<const MDNode> InlinedAtLocation; // Location at which this
// scope is inlined.
bool AbstractScope; // Abstract Scope
SmallVector<LexicalScope *, 4> Children; // Scopes defined in scope.
// Contents not owned.
SmallVector<InsnRange, 4> Ranges;
const MachineInstr *LastInsn; // Last instruction of this scope.
const MachineInstr *FirstInsn; // First instruction of this scope.
unsigned DFSIn, DFSOut; // In & Out Depth use to determine
// scope nesting.
const MachineInstr *LastInsn; // Last instruction of this scope.
const MachineInstr *FirstInsn; // First instruction of this scope.
unsigned DFSIn, DFSOut; // In & Out Depth use to determine
// scope nesting.
};
} // end llvm namespace

View File

@ -45,6 +45,7 @@ namespace llvm {
class TargetInstrInfo;
class TargetRegisterClass;
class VirtRegMap;
class MachineBlockFrequencyInfo;
class LiveIntervals : public MachineFunctionPass {
MachineFunction* MF;
@ -100,7 +101,9 @@ namespace llvm {
virtual ~LiveIntervals();
// Calculate the spill weight to assign to a single instruction.
static float getSpillWeight(bool isDef, bool isUse, BlockFrequency freq);
static float getSpillWeight(bool isDef, bool isUse,
const MachineBlockFrequencyInfo *MBFI,
const MachineInstr *Instr);
LiveInterval &getInterval(unsigned Reg) {
if (hasInterval(Reg))

View File

@ -0,0 +1,146 @@
//===- llvm/CodeGen/LivePhysRegs.h - Live Physical Register Set -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the LivePhysRegs utility for tracking liveness of
// physical registers. This can be used for ad-hoc liveness tracking after
// register allocation. You can start with the live-ins/live-outs at the
// beginning/end of a block and update the information while walking the
// instructions inside the block. This implementation tracks the liveness on a
// sub-register granularity.
//
// We assume that the high bits of a physical super-register are not preserved
// unless the instruction has an implicit-use operand reading the super-
// register.
//
// X86 Example:
// %YMM0<def> = ...
// %XMM0<def> = ... (Kills %XMM0, all %XMM0s sub-registers, and %YMM0)
//
// %YMM0<def> = ...
// %XMM0<def> = ..., %YMM0<imp-use> (%YMM0 and all its sub-registers are alive)
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_LIVE_PHYS_REGS_H
#define LLVM_CODEGEN_LIVE_PHYS_REGS_H
#include "llvm/ADT/SparseSet.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <cassert>
namespace llvm {
class MachineInstr;
/// \brief A set of live physical registers with functions to track liveness
/// when walking backward/forward through a basic block.
class LivePhysRegs {
const TargetRegisterInfo *TRI;
SparseSet<unsigned> LiveRegs;
LivePhysRegs(const LivePhysRegs&) LLVM_DELETED_FUNCTION;
LivePhysRegs &operator=(const LivePhysRegs&) LLVM_DELETED_FUNCTION;
public:
/// \brief Constructs a new empty LivePhysRegs set.
LivePhysRegs() : TRI(0), LiveRegs() {}
/// \brief Constructs and initialize an empty LivePhysRegs set.
LivePhysRegs(const TargetRegisterInfo *TRI) : TRI(TRI) {
assert(TRI && "Invalid TargetRegisterInfo pointer.");
LiveRegs.setUniverse(TRI->getNumRegs());
}
/// \brief Clear and initialize the LivePhysRegs set.
void init(const TargetRegisterInfo *_TRI) {
assert(_TRI && "Invalid TargetRegisterInfo pointer.");
TRI = _TRI;
LiveRegs.clear();
LiveRegs.setUniverse(TRI->getNumRegs());
}
/// \brief Clears the LivePhysRegs set.
void clear() { LiveRegs.clear(); }
/// \brief Returns true if the set is empty.
bool empty() const { return LiveRegs.empty(); }
/// \brief Adds a physical register and all its sub-registers to the set.
void addReg(unsigned Reg) {
assert(TRI && "LivePhysRegs is not initialized.");
assert(Reg <= TRI->getNumRegs() && "Expected a physical register.");
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
LiveRegs.insert(*SubRegs);
}
/// \brief Removes a physical register, all its sub-registers, and all its
/// super-registers from the set.
void removeReg(unsigned Reg) {
assert(TRI && "LivePhysRegs is not initialized.");
assert(Reg <= TRI->getNumRegs() && "Expected a physical register.");
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
LiveRegs.erase(*SubRegs);
for (MCSuperRegIterator SuperRegs(Reg, TRI, /*IncludeSelf=*/false);
SuperRegs.isValid(); ++SuperRegs)
LiveRegs.erase(*SuperRegs);
}
/// \brief Removes physical registers clobbered by the regmask operand @p MO.
void removeRegsInMask(const MachineOperand &MO);
/// \brief Returns true if register @p Reg is contained in the set. This also
/// works if only the super register of @p Reg has been defined, because we
/// always add also all sub-registers to the set.
bool contains(unsigned Reg) const { return LiveRegs.count(Reg); }
/// \brief Simulates liveness when stepping backwards over an
/// instruction(bundle): Remove Defs, add uses. This is the recommended way of
/// calculating liveness.
void stepBackward(const MachineInstr &MI);
/// \brief Simulates liveness when stepping forward over an
/// instruction(bundle): Remove killed-uses, add defs. This is the not
/// recommended way, because it depends on accurate kill flags. If possible
/// use stepBackwards() instead of this function.
void stepForward(const MachineInstr &MI);
/// \brief Adds all live-in registers of basic block @p MBB.
void addLiveIns(const MachineBasicBlock *MBB) {
for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
LE = MBB->livein_end(); LI != LE; ++LI)
addReg(*LI);
}
/// \brief Adds all live-out registers of basic block @p MBB.
void addLiveOuts(const MachineBasicBlock *MBB) {
for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
SE = MBB->succ_end(); SI != SE; ++SI)
addLiveIns(*SI);
}
typedef SparseSet<unsigned>::const_iterator const_iterator;
const_iterator begin() const { return LiveRegs.begin(); }
const_iterator end() const { return LiveRegs.end(); }
/// \brief Prints the currently live registers to @p OS.
void print(raw_ostream &OS) const;
/// \brief Dumps the currently live registers to the debug output.
void dump() const;
};
inline raw_ostream &operator<<(raw_ostream &OS, const LivePhysRegs& LR) {
LR.print(OS);
return OS;
}
} // namespace llvm
#endif // LLVM_CODEGEN_LIVE_PHYS_REGS_H

View File

@ -1,4 +1,4 @@
//====----- MachineBlockFrequencyInfo.h - MachineBlock Frequency Analysis ----====//
//====-- MachineBlockFrequencyInfo.h - MBB Frequency Analysis -*- C++ -*--====//
//
// The LLVM Compiler Infrastructure
//
@ -49,6 +49,21 @@ public:
/// the other block frequencies. We do this to avoid using of floating points.
///
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const;
MachineFunction *getFunction() const;
void view() const;
// Print the block frequency Freq to OS using the current functions entry
// frequency to convert freq into a relative decimal form.
raw_ostream &printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const;
// Convenience method that attempts to look up the frequency associated with
// BB and print it to OS.
raw_ostream &printBlockFreq(raw_ostream &OS,
const MachineBasicBlock *MBB) const;
uint64_t getEntryFreq() const;
};
}

View File

@ -101,11 +101,6 @@ class MachineFrameInfo {
// cannot alias any other memory objects.
bool isSpillSlot;
// MayNeedSP - If true the stack object triggered the creation of the stack
// protector. We should allocate this object right after the stack
// protector.
bool MayNeedSP;
/// Alloca - If this stack object is originated from an Alloca instruction
/// this value saves the original IR allocation. Can be NULL.
const AllocaInst *Alloca;
@ -115,9 +110,9 @@ class MachineFrameInfo {
bool PreAllocated;
StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM,
bool isSS, bool NSP, const AllocaInst *Val)
bool isSS, const AllocaInst *Val)
: SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM),
isSpillSlot(isSS), MayNeedSP(NSP), Alloca(Val), PreAllocated(false) {}
isSpillSlot(isSS), Alloca(Val), PreAllocated(false) {}
};
const TargetMachine &TM;
@ -145,6 +140,14 @@ class MachineFrameInfo {
/// to builtin \@llvm.returnaddress.
bool ReturnAddressTaken;
/// HasStackMap - This boolean keeps track of whether there is a call
/// to builtin \@llvm.experimental.stackmap.
bool HasStackMap;
/// HasPatchPoint - This boolean keeps track of whether there is a call
/// to builtin \@llvm.experimental.patchpoint.
bool HasPatchPoint;
/// StackSize - The prolog/epilog code inserter calculates the final stack
/// offsets for all of the fixed size objects, updating the Objects list
/// above. It then updates StackSize to contain the number of bytes that need
@ -223,6 +226,10 @@ class MachineFrameInfo {
/// Whether the "realign-stack" option is on.
bool RealignOption;
/// True if the function includes inline assembly that adjusts the stack
/// pointer.
bool HasInlineAsmWithSPAdjust;
const TargetFrameLowering *getFrameLowering() const;
public:
explicit MachineFrameInfo(const TargetMachine &TM, bool RealignOpt)
@ -231,6 +238,8 @@ public:
HasVarSizedObjects = false;
FrameAddressTaken = false;
ReturnAddressTaken = false;
HasStackMap = false;
HasPatchPoint = false;
AdjustsStack = false;
HasCalls = false;
StackProtectorIdx = -1;
@ -276,6 +285,18 @@ public:
bool isReturnAddressTaken() const { return ReturnAddressTaken; }
void setReturnAddressIsTaken(bool s) { ReturnAddressTaken = s; }
/// hasStackMap - This method may be called any time after instruction
/// selection is complete to determine if there is a call to builtin
/// \@llvm.experimental.stackmap.
bool hasStackMap() const { return HasStackMap; }
void setHasStackMap(bool s = true) { HasStackMap = s; }
/// hasPatchPoint - This method may be called any time after instruction
/// selection is complete to determine if there is a call to builtin
/// \@llvm.experimental.patchpoint.
bool hasPatchPoint() const { return HasPatchPoint; }
void setHasPatchPoint(bool s = true) { HasPatchPoint = s; }
/// getObjectIndexBegin - Return the minimum frame object index.
///
int getObjectIndexBegin() const { return -NumFixedObjects; }
@ -380,14 +401,6 @@ public:
return Objects[ObjectIdx+NumFixedObjects].Alloca;
}
/// NeedsStackProtector - Returns true if the object may need stack
/// protectors.
bool MayNeedStackProtector(int ObjectIdx) const {
assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
"Invalid Object Idx!");
return Objects[ObjectIdx+NumFixedObjects].MayNeedSP;
}
/// getObjectOffset - Return the assigned stack offset of the specified object
/// from the incoming stack pointer.
///
@ -451,6 +464,10 @@ public:
bool hasCalls() const { return HasCalls; }
void setHasCalls(bool V) { HasCalls = V; }
/// Returns true if the function contains any stack-adjusting inline assembly.
bool hasInlineAsmWithSPAdjust() const { return HasInlineAsmWithSPAdjust; }
void setHasInlineAsmWithSPAdjust(bool B) { HasInlineAsmWithSPAdjust = B; }
/// getMaxCallFrameSize - Return the maximum size of a call frame that must be
/// allocated for an outgoing function call. This is only available if
/// CallFrameSetup/Destroy pseudo instructions are used by the target, and
@ -501,7 +518,7 @@ public:
/// a nonnegative identifier to represent it.
///
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS,
bool MayNeedSP = false, const AllocaInst *Alloca = 0);
const AllocaInst *Alloca = 0);
/// CreateSpillStackObject - Create a new statically sized stack object that
/// represents a spill slot, returning a nonnegative identifier to represent
@ -521,7 +538,7 @@ public:
/// variable sized object is created, whether or not the index returned is
/// actually used.
///
int CreateVariableSizedObject(unsigned Alignment);
int CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca);
/// getCalleeSavedInfo - Returns a reference to call saved info vector for the
/// current function.

View File

@ -131,8 +131,8 @@ class MachineFunction {
/// about the control flow of such functions.
bool ExposesReturnsTwice;
/// True if the function includes MS-style inline assembly.
bool HasMSInlineAsm;
/// True if the function includes any inline assembly.
bool HasInlineAsm;
MachineFunction(const MachineFunction &) LLVM_DELETED_FUNCTION;
void operator=(const MachineFunction&) LLVM_DELETED_FUNCTION;
@ -218,15 +218,14 @@ public:
ExposesReturnsTwice = B;
}
/// Returns true if the function contains any MS-style inline assembly.
bool hasMSInlineAsm() const {
return HasMSInlineAsm;
/// Returns true if the function contains any inline assembly.
bool hasInlineAsm() const {
return HasInlineAsm;
}
/// Set a flag that indicates that the function contains MS-style inline
/// assembly.
void setHasMSInlineAsm(bool B) {
HasMSInlineAsm = B;
/// Set a flag that indicates that the function contains inline assembly.
void setHasInlineAsm(bool B) {
HasInlineAsm = B;
}
/// getInfo - Keep track of various per-function pieces of information for
@ -427,6 +426,15 @@ public:
OperandRecycler.deallocate(Cap, Array);
}
/// \brief Allocate and initialize a register mask with @p NumRegister bits.
uint32_t *allocateRegisterMask(unsigned NumRegister) {
unsigned Size = (NumRegister + 31) / 32;
uint32_t *Mask = Allocator.Allocate<uint32_t>(Size);
for (unsigned i = 0; i != Size; ++i)
Mask[i] = 0;
return Mask;
}
/// allocateMemRefsArray - Allocate an array to hold MachineMemOperand
/// pointers. This array is owned by the MachineFunction.
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num);

View File

@ -830,6 +830,37 @@ public:
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) const;
/// \brief Applies the constraints (def/use) implied by this MI on \p Reg to
/// the given \p CurRC.
/// If \p ExploreBundle is set and MI is part of a bundle, all the
/// instructions inside the bundle will be taken into account. In other words,
/// this method accumulates all the constrains of the operand of this MI and
/// the related bundle if MI is a bundle or inside a bundle.
///
/// Returns the register class that statisfies both \p CurRC and the
/// constraints set by MI. Returns NULL if such a register class does not
/// exist.
///
/// \pre CurRC must not be NULL.
const TargetRegisterClass *getRegClassConstraintEffectForVReg(
unsigned Reg, const TargetRegisterClass *CurRC,
const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
bool ExploreBundle = false) const;
/// \brief Applies the constraints (def/use) implied by the \p OpIdx operand
/// to the given \p CurRC.
///
/// Returns the register class that statisfies both \p CurRC and the
/// constraints set by \p OpIdx MI. Returns NULL if such a register class
/// does not exist.
///
/// \pre CurRC must not be NULL.
/// \pre The operand at \p OpIdx must be a register.
const TargetRegisterClass *
getRegClassConstraintEffect(unsigned OpIdx, const TargetRegisterClass *CurRC,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) const;
/// tieOperands - Add a tie between the register operands at DefIdx and
/// UseIdx. The tie will cause the register allocator to ensure that the two
/// operands are assigned the same physical register.
@ -1038,6 +1069,13 @@ private:
/// hasPropertyInBundle - Slow path for hasProperty when we're dealing with a
/// bundle.
bool hasPropertyInBundle(unsigned Mask, QueryType Type) const;
/// \brief Implements the logic of getRegClassConstraintEffectForVReg for the
/// this MI and the given operand index \p OpIdx.
/// If the related operand does not constrained Reg, this returns CurRC.
const TargetRegisterClass *getRegClassConstraintEffectForVRegImpl(
unsigned OpIdx, unsigned Reg, const TargetRegisterClass *CurRC,
const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const;
};
/// MachineInstrExpressionTrait - Special DenseMapInfo traits to compare

View File

@ -31,6 +31,7 @@
#define LLVM_CODEGEN_MACHINELOOPINFO_H
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
namespace llvm {

View File

@ -134,6 +134,8 @@ public:
/// number.
int64_t getOffset() const { return PtrInfo.Offset; }
unsigned getAddrSpace() const { return PtrInfo.getAddrSpace(); }
/// getSize - Return the size in bytes of the memory reference.
uint64_t getSize() const { return Size; }

View File

@ -56,6 +56,7 @@ public:
MO_GlobalAddress, ///< Address of a global value
MO_BlockAddress, ///< Address of a basic block
MO_RegisterMask, ///< Mask of preserved registers.
MO_RegisterLiveOut, ///< Mask of live-out registers.
MO_Metadata, ///< Metadata reference (for debug info)
MO_MCSymbol ///< MCSymbol reference (for debug/eh info)
};
@ -153,7 +154,7 @@ private:
const ConstantFP *CFP; // For MO_FPImmediate.
const ConstantInt *CI; // For MO_CImmediate. Integers > 64bit.
int64_t ImmVal; // For MO_Immediate.
const uint32_t *RegMask; // For MO_RegisterMask.
const uint32_t *RegMask; // For MO_RegisterMask and MO_RegisterLiveOut.
const MDNode *MD; // For MO_Metadata.
MCSymbol *Sym; // For MO_MCSymbol
@ -246,6 +247,8 @@ public:
bool isBlockAddress() const { return OpKind == MO_BlockAddress; }
/// isRegMask - Tests if this is a MO_RegisterMask operand.
bool isRegMask() const { return OpKind == MO_RegisterMask; }
/// isRegLiveOut - Tests if this is a MO_RegisterLiveOut operand.
bool isRegLiveOut() const { return OpKind == MO_RegisterLiveOut; }
/// isMetadata - Tests if this is a MO_Metadata operand.
bool isMetadata() const { return OpKind == MO_Metadata; }
bool isMCSymbol() const { return OpKind == MO_MCSymbol; }
@ -476,6 +479,12 @@ public:
return Contents.RegMask;
}
/// getRegLiveOut - Returns a bit mask of live-out registers.
const uint32_t *getRegLiveOut() const {
assert(isRegLiveOut() && "Wrong MachineOperand accessor");
return Contents.RegMask;
}
const MDNode *getMetadata() const {
assert(isMetadata() && "Wrong MachineOperand accessor");
return Contents.MD;
@ -659,6 +668,12 @@ public:
Op.Contents.RegMask = Mask;
return Op;
}
static MachineOperand CreateRegLiveOut(const uint32_t *Mask) {
assert(Mask && "Missing live-out register mask");
MachineOperand Op(MachineOperand::MO_RegisterLiveOut);
Op.Contents.RegMask = Mask;
return Op;
}
static MachineOperand CreateMetadata(const MDNode *Meta) {
MachineOperand Op(MachineOperand::MO_Metadata);
Op.Contents.MD = Meta;

View File

@ -23,7 +23,7 @@
// return new CustomMachineScheduler(C);
// }
//
// The default scheduler, ScheduleDAGMI, builds the DAG and drives list
// The default scheduler, ScheduleDAGMILive, builds the DAG and drives list
// scheduling while updating the instruction stream, register pressure, and live
// intervals. Most targets don't need to override the DAG builder and list
// schedulier, but subtargets that require custom scheduling heuristics may
@ -93,6 +93,7 @@ class MachineLoopInfo;
class RegisterClassInfo;
class ScheduleDAGInstrs;
class SchedDFSResult;
class ScheduleHazardRecognizer;
/// MachineSchedContext provides enough context from the MachineScheduler pass
/// for the target to instantiate a scheduler.
@ -154,8 +155,8 @@ struct MachineSchedPolicy {
bool OnlyTopDown;
bool OnlyBottomUp;
MachineSchedPolicy():
ShouldTrackPressure(false), OnlyTopDown(false), OnlyBottomUp(false) {}
MachineSchedPolicy(): ShouldTrackPressure(false), OnlyTopDown(false),
OnlyBottomUp(false) {}
};
/// MachineSchedStrategy - Interface to the scheduling algorithm used by
@ -204,6 +205,262 @@ public:
virtual void releaseBottomNode(SUnit *SU) = 0;
};
/// Mutate the DAG as a postpass after normal DAG building.
class ScheduleDAGMutation {
virtual void anchor();
public:
virtual ~ScheduleDAGMutation() {}
virtual void apply(ScheduleDAGMI *DAG) = 0;
};
/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply
/// schedules machine instructions according to the given MachineSchedStrategy
/// without much extra book-keeping. This is the common functionality between
/// PreRA and PostRA MachineScheduler.
class ScheduleDAGMI : public ScheduleDAGInstrs {
protected:
AliasAnalysis *AA;
MachineSchedStrategy *SchedImpl;
/// Topo - A topological ordering for SUnits which permits fast IsReachable
/// and similar queries.
ScheduleDAGTopologicalSort Topo;
/// Ordered list of DAG postprocessing steps.
std::vector<ScheduleDAGMutation*> Mutations;
/// The top of the unscheduled zone.
MachineBasicBlock::iterator CurrentTop;
/// The bottom of the unscheduled zone.
MachineBasicBlock::iterator CurrentBottom;
/// Record the next node in a scheduled cluster.
const SUnit *NextClusterPred;
const SUnit *NextClusterSucc;
#ifndef NDEBUG
/// The number of instructions scheduled so far. Used to cut off the
/// scheduler at the point determined by misched-cutoff.
unsigned NumInstrsScheduled;
#endif
public:
ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S, bool IsPostRA):
ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, IsPostRA,
/*RemoveKillFlags=*/IsPostRA, C->LIS),
AA(C->AA), SchedImpl(S), Topo(SUnits, &ExitSU), CurrentTop(),
CurrentBottom(), NextClusterPred(NULL), NextClusterSucc(NULL) {
#ifndef NDEBUG
NumInstrsScheduled = 0;
#endif
}
virtual ~ScheduleDAGMI();
/// Return true if this DAG supports VReg liveness and RegPressure.
virtual bool hasVRegLiveness() const { return false; }
/// Add a postprocessing step to the DAG builder.
/// Mutations are applied in the order that they are added after normal DAG
/// building and before MachineSchedStrategy initialization.
///
/// ScheduleDAGMI takes ownership of the Mutation object.
void addMutation(ScheduleDAGMutation *Mutation) {
Mutations.push_back(Mutation);
}
/// \brief True if an edge can be added from PredSU to SuccSU without creating
/// a cycle.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU);
/// \brief Add a DAG edge to the given SU with the given predecessor
/// dependence data.
///
/// \returns true if the edge may be added without creating a cycle OR if an
/// equivalent edge already existed (false indicates failure).
bool addEdge(SUnit *SuccSU, const SDep &PredDep);
MachineBasicBlock::iterator top() const { return CurrentTop; }
MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
/// Implement the ScheduleDAGInstrs interface for handling the next scheduling
/// region. This covers all instructions in a block, while schedule() may only
/// cover a subset.
void enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
unsigned regioninstrs) LLVM_OVERRIDE;
/// Implement ScheduleDAGInstrs interface for scheduling a sequence of
/// reorderable instructions.
virtual void schedule();
/// Change the position of an instruction within the basic block and update
/// live ranges and region boundary iterators.
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
const SUnit *getNextClusterPred() const { return NextClusterPred; }
const SUnit *getNextClusterSucc() const { return NextClusterSucc; }
void viewGraph(const Twine &Name, const Twine &Title) LLVM_OVERRIDE;
void viewGraph() LLVM_OVERRIDE;
protected:
// Top-Level entry points for the schedule() driver...
/// Apply each ScheduleDAGMutation step in order. This allows different
/// instances of ScheduleDAGMI to perform custom DAG postprocessing.
void postprocessDAG();
/// Release ExitSU predecessors and setup scheduler queues.
void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots);
/// Update scheduler DAG and queues after scheduling an instruction.
void updateQueues(SUnit *SU, bool IsTopNode);
/// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
void placeDebugValues();
/// \brief dump the scheduled Sequence.
void dumpSchedule() const;
// Lesser helpers...
bool checkSchedLimit();
void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
SmallVectorImpl<SUnit*> &BotRoots);
void releaseSucc(SUnit *SU, SDep *SuccEdge);
void releaseSuccessors(SUnit *SU);
void releasePred(SUnit *SU, SDep *PredEdge);
void releasePredecessors(SUnit *SU);
};
/// ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules
/// machine instructions while updating LiveIntervals and tracking regpressure.
class ScheduleDAGMILive : public ScheduleDAGMI {
protected:
RegisterClassInfo *RegClassInfo;
/// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees
/// will be empty.
SchedDFSResult *DFSResult;
BitVector ScheduledTrees;
MachineBasicBlock::iterator LiveRegionEnd;
// Map each SU to its summary of pressure changes. This array is updated for
// liveness during bottom-up scheduling. Top-down scheduling may proceed but
// has no affect on the pressure diffs.
PressureDiffs SUPressureDiffs;
/// Register pressure in this region computed by initRegPressure.
bool ShouldTrackPressure;
IntervalPressure RegPressure;
RegPressureTracker RPTracker;
/// List of pressure sets that exceed the target's pressure limit before
/// scheduling, listed in increasing set ID order. Each pressure set is paired
/// with its max pressure in the currently scheduled regions.
std::vector<PressureChange> RegionCriticalPSets;
/// The top of the unscheduled zone.
IntervalPressure TopPressure;
RegPressureTracker TopRPTracker;
/// The bottom of the unscheduled zone.
IntervalPressure BotPressure;
RegPressureTracker BotRPTracker;
public:
ScheduleDAGMILive(MachineSchedContext *C, MachineSchedStrategy *S):
ScheduleDAGMI(C, S, /*IsPostRA=*/false), RegClassInfo(C->RegClassInfo),
DFSResult(0), ShouldTrackPressure(false), RPTracker(RegPressure),
TopRPTracker(TopPressure), BotRPTracker(BotPressure)
{}
virtual ~ScheduleDAGMILive();
/// Return true if this DAG supports VReg liveness and RegPressure.
virtual bool hasVRegLiveness() const { return true; }
/// \brief Return true if register pressure tracking is enabled.
bool isTrackingPressure() const { return ShouldTrackPressure; }
/// Get current register pressure for the top scheduled instructions.
const IntervalPressure &getTopPressure() const { return TopPressure; }
const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; }
/// Get current register pressure for the bottom scheduled instructions.
const IntervalPressure &getBotPressure() const { return BotPressure; }
const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; }
/// Get register pressure for the entire scheduling region before scheduling.
const IntervalPressure &getRegPressure() const { return RegPressure; }
const std::vector<PressureChange> &getRegionCriticalPSets() const {
return RegionCriticalPSets;
}
PressureDiff &getPressureDiff(const SUnit *SU) {
return SUPressureDiffs[SU->NodeNum];
}
/// Compute a DFSResult after DAG building is complete, and before any
/// queue comparisons.
void computeDFSResult();
/// Return a non-null DFS result if the scheduling strategy initialized it.
const SchedDFSResult *getDFSResult() const { return DFSResult; }
BitVector &getScheduledTrees() { return ScheduledTrees; }
/// Implement the ScheduleDAGInstrs interface for handling the next scheduling
/// region. This covers all instructions in a block, while schedule() may only
/// cover a subset.
void enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
unsigned regioninstrs) LLVM_OVERRIDE;
/// Implement ScheduleDAGInstrs interface for scheduling a sequence of
/// reorderable instructions.
virtual void schedule();
/// Compute the cyclic critical path through the DAG.
unsigned computeCyclicCriticalPath();
protected:
// Top-Level entry points for the schedule() driver...
/// Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking
/// enabled. This sets up three trackers. RPTracker will cover the entire DAG
/// region, TopTracker and BottomTracker will be initialized to the top and
/// bottom of the DAG region without covereing any unscheduled instruction.
void buildDAGWithRegPressure();
/// Move an instruction and update register pressure.
void scheduleMI(SUnit *SU, bool IsTopNode);
// Lesser helpers...
void initRegPressure();
void updatePressureDiffs(ArrayRef<unsigned> LiveUses);
void updateScheduledPressure(const SUnit *SU,
const std::vector<unsigned> &NewMaxPressure);
};
//===----------------------------------------------------------------------===//
///
/// Helpers for implementing custom MachineSchedStrategy classes. These take
/// care of the book-keeping associated with list scheduling heuristics.
///
//===----------------------------------------------------------------------===//
/// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience
/// methods for pushing and removing nodes. ReadyQueue's are uniquely identified
/// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in.
@ -261,213 +518,220 @@ public:
#endif
};
/// Mutate the DAG as a postpass after normal DAG building.
class ScheduleDAGMutation {
virtual void anchor();
public:
virtual ~ScheduleDAGMutation() {}
/// Summarize the unscheduled region.
struct SchedRemainder {
// Critical path through the DAG in expected latency.
unsigned CriticalPath;
unsigned CyclicCritPath;
virtual void apply(ScheduleDAGMI *DAG) = 0;
// Scaled count of micro-ops left to schedule.
unsigned RemIssueCount;
bool IsAcyclicLatencyLimited;
// Unscheduled resources
SmallVector<unsigned, 16> RemainingCounts;
void reset() {
CriticalPath = 0;
CyclicCritPath = 0;
RemIssueCount = 0;
IsAcyclicLatencyLimited = false;
RemainingCounts.clear();
}
SchedRemainder() { reset(); }
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
};
/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that schedules
/// machine instructions while updating LiveIntervals and tracking regpressure.
class ScheduleDAGMI : public ScheduleDAGInstrs {
protected:
AliasAnalysis *AA;
RegisterClassInfo *RegClassInfo;
MachineSchedStrategy *SchedImpl;
/// Each Scheduling boundary is associated with ready queues. It tracks the
/// current cycle in the direction of movement, and maintains the state
/// of "hazards" and other interlocks at the current cycle.
class SchedBoundary {
public:
/// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
enum {
TopQID = 1,
BotQID = 2,
LogMaxQID = 2
};
/// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees
/// will be empty.
SchedDFSResult *DFSResult;
BitVector ScheduledTrees;
ScheduleDAGMI *DAG;
const TargetSchedModel *SchedModel;
SchedRemainder *Rem;
/// Topo - A topological ordering for SUnits which permits fast IsReachable
/// and similar queries.
ScheduleDAGTopologicalSort Topo;
ReadyQueue Available;
ReadyQueue Pending;
/// Ordered list of DAG postprocessing steps.
std::vector<ScheduleDAGMutation*> Mutations;
ScheduleHazardRecognizer *HazardRec;
MachineBasicBlock::iterator LiveRegionEnd;
private:
/// True if the pending Q should be checked/updated before scheduling another
/// instruction.
bool CheckPending;
// Map each SU to its summary of pressure changes. This array is updated for
// liveness during bottom-up scheduling. Top-down scheduling may proceed but
// has no affect on the pressure diffs.
PressureDiffs SUPressureDiffs;
// For heuristics, keep a list of the nodes that immediately depend on the
// most recently scheduled node.
SmallPtrSet<const SUnit*, 8> NextSUs;
/// Register pressure in this region computed by initRegPressure.
bool ShouldTrackPressure;
IntervalPressure RegPressure;
RegPressureTracker RPTracker;
/// Number of cycles it takes to issue the instructions scheduled in this
/// zone. It is defined as: scheduled-micro-ops / issue-width + stalls.
/// See getStalls().
unsigned CurrCycle;
/// List of pressure sets that exceed the target's pressure limit before
/// scheduling, listed in increasing set ID order. Each pressure set is paired
/// with its max pressure in the currently scheduled regions.
std::vector<PressureChange> RegionCriticalPSets;
/// Micro-ops issued in the current cycle
unsigned CurrMOps;
/// The top of the unscheduled zone.
MachineBasicBlock::iterator CurrentTop;
IntervalPressure TopPressure;
RegPressureTracker TopRPTracker;
/// MinReadyCycle - Cycle of the soonest available instruction.
unsigned MinReadyCycle;
/// The bottom of the unscheduled zone.
MachineBasicBlock::iterator CurrentBottom;
IntervalPressure BotPressure;
RegPressureTracker BotRPTracker;
// The expected latency of the critical path in this scheduled zone.
unsigned ExpectedLatency;
/// Record the next node in a scheduled cluster.
const SUnit *NextClusterPred;
const SUnit *NextClusterSucc;
// The latency of dependence chains leading into this zone.
// For each node scheduled bottom-up: DLat = max DLat, N.Depth.
// For each cycle scheduled: DLat -= 1.
unsigned DependentLatency;
/// Count the scheduled (issued) micro-ops that can be retired by
/// time=CurrCycle assuming the first scheduled instr is retired at time=0.
unsigned RetiredMOps;
// Count scheduled resources that have been executed. Resources are
// considered executed if they become ready in the time that it takes to
// saturate any resource including the one in question. Counts are scaled
// for direct comparison with other resources. Counts can be compared with
// MOps * getMicroOpFactor and Latency * getLatencyFactor.
SmallVector<unsigned, 16> ExecutedResCounts;
/// Cache the max count for a single resource.
unsigned MaxExecutedResCount;
// Cache the critical resources ID in this scheduled zone.
unsigned ZoneCritResIdx;
// Is the scheduled region resource limited vs. latency limited.
bool IsResourceLimited;
// Record the highest cycle at which each resource has been reserved by a
// scheduled instruction.
SmallVector<unsigned, 16> ReservedCycles;
#ifndef NDEBUG
/// The number of instructions scheduled so far. Used to cut off the
/// scheduler at the point determined by misched-cutoff.
unsigned NumInstrsScheduled;
// Remember the greatest operand latency as an upper bound on the number of
// times we should retry the pending queue because of a hazard.
unsigned MaxObservedLatency;
#endif
public:
ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S):
ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS),
AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S), DFSResult(0),
Topo(SUnits, &ExitSU), ShouldTrackPressure(false),
RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure),
CurrentBottom(), BotRPTracker(BotPressure),
NextClusterPred(NULL), NextClusterSucc(NULL) {
/// Pending queues extend the ready queues with the same ID and the
/// PendingFlag set.
SchedBoundary(unsigned ID, const Twine &Name):
DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
Pending(ID << LogMaxQID, Name+".P"),
HazardRec(0) {
reset();
}
~SchedBoundary();
void reset();
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
SchedRemainder *rem);
bool isTop() const {
return Available.getID() == TopQID;
}
/// Number of cycles to issue the instructions scheduled in this zone.
unsigned getCurrCycle() const { return CurrCycle; }
/// Micro-ops issued in the current cycle
unsigned getCurrMOps() const { return CurrMOps; }
/// Return true if the given SU is used by the most recently scheduled
/// instruction.
bool isNextSU(const SUnit *SU) const { return NextSUs.count(SU); }
// The latency of dependence chains leading into this zone.
unsigned getDependentLatency() const { return DependentLatency; }
/// Get the number of latency cycles "covered" by the scheduled
/// instructions. This is the larger of the critical path within the zone
/// and the number of cycles required to issue the instructions.
unsigned getScheduledLatency() const {
return std::max(ExpectedLatency, CurrCycle);
}
unsigned getUnscheduledLatency(SUnit *SU) const {
return isTop() ? SU->getHeight() : SU->getDepth();
}
unsigned getResourceCount(unsigned ResIdx) const {
return ExecutedResCounts[ResIdx];
}
/// Get the scaled count of scheduled micro-ops and resources, including
/// executed resources.
unsigned getCriticalCount() const {
if (!ZoneCritResIdx)
return RetiredMOps * SchedModel->getMicroOpFactor();
return getResourceCount(ZoneCritResIdx);
}
/// Get a scaled count for the minimum execution time of the scheduled
/// micro-ops that are ready to execute by getExecutedCount. Notice the
/// feedback loop.
unsigned getExecutedCount() const {
return std::max(CurrCycle * SchedModel->getLatencyFactor(),
MaxExecutedResCount);
}
unsigned getZoneCritResIdx() const { return ZoneCritResIdx; }
// Is the scheduled region resource limited vs. latency limited.
bool isResourceLimited() const { return IsResourceLimited; }
/// Get the difference between the given SUnit's ready time and the current
/// cycle.
unsigned getLatencyStallCycles(SUnit *SU);
unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles);
bool checkHazard(SUnit *SU);
unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs);
unsigned getOtherResourceCount(unsigned &OtherCritIdx);
void releaseNode(SUnit *SU, unsigned ReadyCycle);
void releaseTopNode(SUnit *SU);
void releaseBottomNode(SUnit *SU);
void bumpCycle(unsigned NextCycle);
void incExecutedResources(unsigned PIdx, unsigned Count);
unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle);
void bumpNode(SUnit *SU);
void releasePending();
void removeReady(SUnit *SU);
/// Call this before applying any other heuristics to the Available queue.
/// Updates the Available/Pending Q's if necessary and returns the single
/// available instruction, or NULL if there are multiple candidates.
SUnit *pickOnlyChoice();
#ifndef NDEBUG
NumInstrsScheduled = 0;
void dumpScheduledState();
#endif
}
virtual ~ScheduleDAGMI();
/// \brief Return true if register pressure tracking is enabled.
bool isTrackingPressure() const { return ShouldTrackPressure; }
/// Add a postprocessing step to the DAG builder.
/// Mutations are applied in the order that they are added after normal DAG
/// building and before MachineSchedStrategy initialization.
///
/// ScheduleDAGMI takes ownership of the Mutation object.
void addMutation(ScheduleDAGMutation *Mutation) {
Mutations.push_back(Mutation);
}
/// \brief True if an edge can be added from PredSU to SuccSU without creating
/// a cycle.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU);
/// \brief Add a DAG edge to the given SU with the given predecessor
/// dependence data.
///
/// \returns true if the edge may be added without creating a cycle OR if an
/// equivalent edge already existed (false indicates failure).
bool addEdge(SUnit *SuccSU, const SDep &PredDep);
MachineBasicBlock::iterator top() const { return CurrentTop; }
MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
/// Implement the ScheduleDAGInstrs interface for handling the next scheduling
/// region. This covers all instructions in a block, while schedule() may only
/// cover a subset.
void enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
unsigned regioninstrs) LLVM_OVERRIDE;
/// Implement ScheduleDAGInstrs interface for scheduling a sequence of
/// reorderable instructions.
virtual void schedule();
/// Change the position of an instruction within the basic block and update
/// live ranges and region boundary iterators.
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
/// Get current register pressure for the top scheduled instructions.
const IntervalPressure &getTopPressure() const { return TopPressure; }
const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; }
/// Get current register pressure for the bottom scheduled instructions.
const IntervalPressure &getBotPressure() const { return BotPressure; }
const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; }
/// Get register pressure for the entire scheduling region before scheduling.
const IntervalPressure &getRegPressure() const { return RegPressure; }
const std::vector<PressureChange> &getRegionCriticalPSets() const {
return RegionCriticalPSets;
}
PressureDiff &getPressureDiff(const SUnit *SU) {
return SUPressureDiffs[SU->NodeNum];
}
const SUnit *getNextClusterPred() const { return NextClusterPred; }
const SUnit *getNextClusterSucc() const { return NextClusterSucc; }
/// Compute a DFSResult after DAG building is complete, and before any
/// queue comparisons.
void computeDFSResult();
/// Return a non-null DFS result if the scheduling strategy initialized it.
const SchedDFSResult *getDFSResult() const { return DFSResult; }
BitVector &getScheduledTrees() { return ScheduledTrees; }
/// Compute the cyclic critical path through the DAG.
unsigned computeCyclicCriticalPath();
void viewGraph(const Twine &Name, const Twine &Title) LLVM_OVERRIDE;
void viewGraph() LLVM_OVERRIDE;
protected:
// Top-Level entry points for the schedule() driver...
/// Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking
/// enabled. This sets up three trackers. RPTracker will cover the entire DAG
/// region, TopTracker and BottomTracker will be initialized to the top and
/// bottom of the DAG region without covereing any unscheduled instruction.
void buildDAGWithRegPressure();
/// Apply each ScheduleDAGMutation step in order. This allows different
/// instances of ScheduleDAGMI to perform custom DAG postprocessing.
void postprocessDAG();
/// Release ExitSU predecessors and setup scheduler queues.
void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots);
/// Move an instruction and update register pressure.
void scheduleMI(SUnit *SU, bool IsTopNode);
/// Update scheduler DAG and queues after scheduling an instruction.
void updateQueues(SUnit *SU, bool IsTopNode);
/// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
void placeDebugValues();
/// \brief dump the scheduled Sequence.
void dumpSchedule() const;
// Lesser helpers...
void initRegPressure();
void updatePressureDiffs(ArrayRef<unsigned> LiveUses);
void updateScheduledPressure(const SUnit *SU,
const std::vector<unsigned> &NewMaxPressure);
bool checkSchedLimit();
void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
SmallVectorImpl<SUnit*> &BotRoots);
void releaseSucc(SUnit *SU, SDep *SuccEdge);
void releaseSuccessors(SUnit *SU);
void releasePred(SUnit *SU, SDep *PredEdge);
void releasePredecessors(SUnit *SU);
};
} // namespace llvm

View File

@ -437,8 +437,8 @@ namespace PBQP {
for (NodeItr nodeItr = nodesBegin(), nodeEnd = nodesEnd();
nodeItr != nodeEnd; ++nodeItr) {
os << " node" << nodeItr << " [ label=\""
<< nodeItr << ": " << getNodeCosts(*nodeItr) << "\" ]\n";
os << " node" << *nodeItr << " [ label=\""
<< *nodeItr << ": " << getNodeCosts(*nodeItr) << "\" ]\n";
}
os << " edge [ len=" << getNumNodes() << " ]\n";

View File

@ -207,9 +207,9 @@ public:
/// Fully developed targets will not generally override this.
virtual void addMachinePasses();
/// createTargetScheduler - Create an instance of ScheduleDAGInstrs to be run
/// within the standard MachineScheduler pass for this function and target at
/// the current optimization level.
/// Create an instance of ScheduleDAGInstrs to be run within the standard
/// MachineScheduler pass for this function and target at the current
/// optimization level.
///
/// This can also be used to plug a new MachineSchedStrategy into an instance
/// of the standard ScheduleDAGMI:
@ -221,6 +221,13 @@ public:
return 0;
}
/// Similar to createMachineScheduler but used when postRA machine scheduling
/// is enabled.
virtual ScheduleDAGInstrs *
createPostMachineScheduler(MachineSchedContext *C) const {
return 0;
}
protected:
// Helper to verify the analysis is really immutable.
void setOpt(bool &Opt, bool Val);
@ -403,6 +410,9 @@ namespace llvm {
/// MachineScheduler - This pass schedules machine instructions.
extern char &MachineSchedulerID;
/// PostMachineScheduler - This pass schedules machine instructions postRA.
extern char &PostMachineSchedulerID;
/// SpillPlacement analysis. Suggest optimal placement of spill code between
/// basic blocks.
extern char &SpillPlacementID;
@ -568,6 +578,11 @@ namespace llvm {
/// bundles (created earlier, e.g. during pre-RA scheduling).
extern char &FinalizeMachineBundlesID;
/// StackMapLiveness - This pass analyses the register live-out set of
/// stackmap/patchpoint intrinsics and attaches the calculated information to
/// the intrinsic for later emission to the StackMap.
extern char &StackMapLivenessID;
} // End llvm namespace
#endif

View File

@ -184,6 +184,12 @@ namespace llvm {
|| Contents.OrdKind == MustAliasMem);
}
/// isBarrier - Test if this is an Order dependence that is marked
/// as a barrier.
bool isBarrier() const {
return getKind() == Order && Contents.OrdKind == Barrier;
}
/// isMustAlias - Test if this is an Order dependence that is marked
/// as "must alias", meaning that the SUnits at either end of the edge
/// have a memory dependence on a known memory location.
@ -292,6 +298,8 @@ namespace llvm {
bool isScheduleHigh : 1; // True if preferable to schedule high.
bool isScheduleLow : 1; // True if preferable to schedule low.
bool isCloned : 1; // True if this node has been cloned.
bool isUnbuffered : 1; // Uses an unbuffered resource.
bool hasReservedResource : 1; // Uses a reserved resource.
Sched::Preference SchedulingPref; // Scheduling preference.
private:
@ -316,7 +324,8 @@ namespace llvm {
isTwoAddress(false), isCommutable(false), hasPhysRegUses(false),
hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
isAvailable(false), isScheduled(false), isScheduleHigh(false),
isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None),
isScheduleLow(false), isCloned(false), isUnbuffered(false),
hasReservedResource(false), SchedulingPref(Sched::None),
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
@ -330,7 +339,8 @@ namespace llvm {
isTwoAddress(false), isCommutable(false), hasPhysRegUses(false),
hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
isAvailable(false), isScheduled(false), isScheduleHigh(false),
isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None),
isScheduleLow(false), isCloned(false), isUnbuffered(false),
hasReservedResource(false), SchedulingPref(Sched::None),
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
@ -343,7 +353,8 @@ namespace llvm {
isTwoAddress(false), isCommutable(false), hasPhysRegUses(false),
hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
isAvailable(false), isScheduled(false), isScheduleHigh(false),
isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None),
isScheduleLow(false), isCloned(false), isUnbuffered(false),
hasReservedResource(false), SchedulingPref(Sched::None),
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}

View File

@ -43,7 +43,7 @@ namespace llvm {
};
/// Record a physical register access.
/// For non data-dependent uses, OpIdx == -1.
/// For non-data-dependent uses, OpIdx == -1.
struct PhysRegSUOper {
SUnit *SU;
int OpIdx;
@ -88,6 +88,10 @@ namespace llvm {
/// isPostRA flag indicates vregs cannot be present.
bool IsPostRA;
/// True if the DAG builder should remove kill flags (in preparation for
/// rescheduling).
bool RemoveKillFlags;
/// The standard DAG builder does not normally include terminators as DAG
/// nodes because it does not create the necessary dependencies to prevent
/// reordering. A specialized scheduler can overide
@ -145,15 +149,21 @@ namespace llvm {
DbgValueVector DbgValues;
MachineInstr *FirstDbgValue;
/// Set of live physical registers for updating kill flags.
BitVector LiveRegs;
public:
explicit ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo &mli,
const MachineDominatorTree &mdt,
bool IsPostRAFlag,
bool RemoveKillFlags = false,
LiveIntervals *LIS = 0);
virtual ~ScheduleDAGInstrs() {}
bool isPostRA() const { return IsPostRA; }
/// \brief Expose LiveIntervals for use in DAG mutators and such.
LiveIntervals *getLIS() const { return LIS; }
@ -227,12 +237,23 @@ namespace llvm {
/// Return a label for the region of code covered by the DAG.
virtual std::string getDAGName() const;
/// \brief Fix register kill flags that scheduling has made invalid.
void fixupKills(MachineBasicBlock *MBB);
protected:
void initSUnits();
void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx);
void addPhysRegDeps(SUnit *SU, unsigned OperIdx);
void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
void addVRegUseDeps(SUnit *SU, unsigned OperIdx);
/// \brief PostRA helper for rewriting kill flags.
void startBlockForKills(MachineBasicBlock *BB);
/// \brief Toggle a register operand kill flag.
///
/// Other adjustments may be made to the instruction if necessary. Return
/// true if the operand has been deleted, false if not.
bool toggleKillFlag(MachineInstr *MI, MachineOperand &MO);
};
/// newSUnit - Creates a new SUnit and return a ptr to it.

View File

@ -70,6 +70,22 @@ public:
/// emitted, to advance the hazard state.
virtual void EmitInstruction(SUnit *) {}
/// PreEmitNoops - This callback is invoked prior to emitting an instruction.
/// It should return the number of noops to emit prior to the provided
/// instruction.
/// Note: This is only used during PostRA scheduling. EmitNoop is not called
/// for these noops.
virtual unsigned PreEmitNoops(SUnit *) {
return 0;
}
/// ShouldPreferAnother - This callback may be invoked if getHazardType
/// returns NoHazard. If, even though there is no hazard, it would be better to
/// schedule another available instruction, this callback should return true.
virtual bool ShouldPreferAnother(SUnit *) {
return false;
}
/// AdvanceCycle - This callback is invoked whenever the next top-down
/// instruction to be scheduled cannot issue in the current cycle, either
/// because of latency or resource conflicts. This should increment the

View File

@ -70,6 +70,10 @@ namespace ISD {
/// BUILD_VECTOR where all of the elements are 0 or undef.
bool isBuildVectorAllZeros(const SDNode *N);
/// \brief Return true if the specified node is a BUILD_VECTOR node of
/// all ConstantSDNode or undef.
bool isBuildVectorOfConstantSDNodes(const SDNode *N);
/// isScalarToVector - Return true if the specified node is a
/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
/// element is not an undef.

View File

@ -0,0 +1,65 @@
//===--- StackMapLivenessAnalysis - StackMap Liveness Analysis --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass calculates the liveness for each basic block in a function and
// attaches the register live-out information to a stackmap or patchpoint
// intrinsic if present.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_STACKMAP_LIVENESS_ANALYSIS_H
#define LLVM_CODEGEN_STACKMAP_LIVENESS_ANALYSIS_H
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
namespace llvm {
/// \brief This pass calculates the liveness information for each basic block in
/// a function and attaches the register live-out information to a stackmap or
/// patchpoint intrinsic if present.
///
/// This is an optional pass that has to be explicitly enabled via the
/// -enable-stackmap-liveness and/or -enable-patchpoint-liveness flag. The pass
/// skips functions that don't have any stackmap or patchpoint intrinsics. The
/// information provided by this pass is optional and not required by the
/// aformentioned intrinsics to function.
class StackMapLiveness : public MachineFunctionPass {
MachineFunction *MF;
const TargetRegisterInfo *TRI;
LivePhysRegs LiveRegs;
public:
static char ID;
/// \brief Default construct and initialize the pass.
StackMapLiveness();
/// \brief Tell the pass manager which passes we depend on and what
/// information we preserve.
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
/// \brief Calculate the liveness information for the given machine function.
virtual bool runOnMachineFunction(MachineFunction &MF);
private:
/// \brief Performs the actual liveness calculation for the function.
bool calculateLiveness();
/// \brief Add the current register live set to the instruction.
void addLiveOutSetToMI(MachineInstr &MI);
/// \brief Create a register mask and initialize it with the registers from
/// the register live set.
uint32_t *createRegisterMask() const;
};
} // llvm namespace
#endif // LLVM_CODEGEN_STACKMAP_LIVENESS_ANALYSIS_H

View File

@ -1,4 +1,5 @@
//===------------------- StackMaps.h - StackMaps ----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@ -92,19 +93,28 @@ public:
: LocType(LocType), Size(Size), Reg(Reg), Offset(Offset) {}
};
// Typedef a function pointer for functions that parse sequences of operands
// and return a Location, plus a new "next" operand iterator.
typedef std::pair<Location, MachineInstr::const_mop_iterator>
(*OperandParser)(MachineInstr::const_mop_iterator,
MachineInstr::const_mop_iterator, const TargetMachine&);
struct LiveOutReg {
unsigned short Reg;
unsigned short RegNo;
unsigned short Size;
LiveOutReg() : Reg(0), RegNo(0), Size(0) {}
LiveOutReg(unsigned short Reg, unsigned short RegNo, unsigned short Size)
: Reg(Reg), RegNo(RegNo), Size(Size) {}
void MarkInvalid() { Reg = 0; }
// Only sort by the dwarf register number.
bool operator< (const LiveOutReg &LO) const { return RegNo < LO.RegNo; }
static bool IsInvalid(const LiveOutReg &LO) { return LO.Reg == 0; }
};
// OpTypes are used to encode information about the following logical
// operand (which may consist of several MachineOperands) for the
// OpParser.
typedef enum { DirectMemRefOp, IndirectMemRefOp, ConstantOp } OpType;
StackMaps(AsmPrinter &AP, OperandParser OpParser)
: AP(AP), OpParser(OpParser) {}
StackMaps(AsmPrinter &AP) : AP(AP) {}
/// \brief Generate a stackmap record for a stackmap instruction.
///
@ -121,15 +131,18 @@ public:
private:
typedef SmallVector<Location, 8> LocationVec;
typedef SmallVector<LiveOutReg, 8> LiveOutVec;
struct CallsiteInfo {
const MCExpr *CSOffsetExpr;
unsigned ID;
uint64_t ID;
LocationVec Locations;
LiveOutVec LiveOuts;
CallsiteInfo() : CSOffsetExpr(0), ID(0) {}
CallsiteInfo(const MCExpr *CSOffsetExpr, unsigned ID,
LocationVec Locations)
: CSOffsetExpr(CSOffsetExpr), ID(ID), Locations(Locations) {}
CallsiteInfo(const MCExpr *CSOffsetExpr, uint64_t ID,
LocationVec &Locations, LiveOutVec &LiveOuts)
: CSOffsetExpr(CSOffsetExpr), ID(ID), Locations(Locations),
LiveOuts(LiveOuts) {}
};
typedef std::vector<CallsiteInfo> CallsiteInfoList;
@ -155,16 +168,28 @@ private:
};
AsmPrinter &AP;
OperandParser OpParser;
CallsiteInfoList CSInfos;
ConstantPool ConstPool;
MachineInstr::const_mop_iterator
parseOperand(MachineInstr::const_mop_iterator MOI,
MachineInstr::const_mop_iterator MOE,
LocationVec &Locs, LiveOutVec &LiveOuts) const;
/// \brief Create a live-out register record for the given register @p Reg.
LiveOutReg createLiveOutReg(unsigned Reg, const MCRegisterInfo &MCRI,
const TargetRegisterInfo *TRI) const;
/// \brief Parse the register live-out mask and return a vector of live-out
/// registers that need to be recorded in the stackmap.
LiveOutVec parseRegisterLiveOutMask(const uint32_t *Mask) const;
/// This should be called by the MC lowering code _immediately_ before
/// lowering the MI to an MCInst. It records where the operands for the
/// instruction are stored, and outputs a label to record the offset of
/// the call from the start of the text section. In special cases (e.g. AnyReg
/// calling convention) the return register is also recorded if requested.
void recordStackMapOpers(const MachineInstr &MI, uint32_t ID,
void recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
MachineInstr::const_mop_iterator MOI,
MachineInstr::const_mop_iterator MOE,
bool recordResult = false);

View File

@ -20,11 +20,11 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/ValueMap.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetLowering.h"
namespace llvm {
class DominatorTree;
class Function;
class Module;
class PHINode;

View File

@ -98,6 +98,14 @@ public:
return SchedModel.getProcResource(PIdx);
}
#ifndef NDEBUG
const char *getResourceName(unsigned PIdx) const {
if (!PIdx)
return "MOps";
return SchedModel.getProcResource(PIdx)->Name;
}
#endif
typedef const MCWriteProcResEntry *ProcResIter;
// \brief Get an iterator into the processor resources consumed by this

View File

@ -880,18 +880,18 @@ namespace llvm {
static EVT getExtendedIntegerVT(LLVMContext &C, unsigned BitWidth);
static EVT getExtendedVectorVT(LLVMContext &C, EVT VT,
unsigned NumElements);
bool isExtendedFloatingPoint() const;
bool isExtendedInteger() const;
bool isExtendedVector() const;
bool isExtended16BitVector() const;
bool isExtended32BitVector() const;
bool isExtended64BitVector() const;
bool isExtended128BitVector() const;
bool isExtended256BitVector() const;
bool isExtended512BitVector() const;
bool isExtended1024BitVector() const;
bool isExtendedFloatingPoint() const LLVM_READONLY;
bool isExtendedInteger() const LLVM_READONLY;
bool isExtendedVector() const LLVM_READONLY;
bool isExtended16BitVector() const LLVM_READONLY;
bool isExtended32BitVector() const LLVM_READONLY;
bool isExtended64BitVector() const LLVM_READONLY;
bool isExtended128BitVector() const LLVM_READONLY;
bool isExtended256BitVector() const LLVM_READONLY;
bool isExtended512BitVector() const LLVM_READONLY;
bool isExtended1024BitVector() const LLVM_READONLY;
EVT getExtendedVectorElementType() const;
unsigned getExtendedVectorNumElements() const;
unsigned getExtendedVectorNumElements() const LLVM_READONLY;
unsigned getExtendedSizeInBits() const;
};

View File

@ -232,7 +232,7 @@ public:
///
/// This function is deprecated for the MCJIT execution engine.
///
/// FIXME: the JIT and MCJIT interfaces should be disentangled or united
/// FIXME: the JIT and MCJIT interfaces should be disentangled or united
/// again, if possible.
///
virtual void *getPointerToNamedFunction(const std::string &Name,
@ -550,7 +550,7 @@ public:
WhichEngine = w;
return *this;
}
/// setMCJITMemoryManager - Sets the MCJIT memory manager to use. This allows
/// clients to customize their memory allocation policies for the MCJIT. This
/// is only appropriate for the MCJIT; setting this and configuring the builder

View File

@ -19,6 +19,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetMachine.h"
#include <string>
@ -49,6 +50,7 @@ private:
llvm::OwningPtr<llvm::Module> _module;
llvm::OwningPtr<llvm::TargetMachine> _target;
llvm::MCObjectFileInfo ObjFileInfo;
std::vector<NameAndAttributes> _symbols;
// _defines and _undefines only needed to disambiguate tentative definitions

View File

@ -157,6 +157,28 @@ struct import_lookup_table_entry32 {
}
};
struct export_directory_table_entry {
support::ulittle32_t ExportFlags;
support::ulittle32_t TimeDateStamp;
support::ulittle16_t MajorVersion;
support::ulittle16_t MinorVersion;
support::ulittle32_t NameRVA;
support::ulittle32_t OrdinalBase;
support::ulittle32_t AddressTableEntries;
support::ulittle32_t NumberOfNamePointers;
support::ulittle32_t ExportAddressTableRVA;
support::ulittle32_t NamePointerRVA;
support::ulittle32_t OrdinalTableRVA;
};
union export_address_table_entry {
support::ulittle32_t ExportRVA;
support::ulittle32_t ForwarderRVA;
};
typedef support::ulittle32_t export_name_pointer_table_entry;
typedef support::ulittle16_t export_ordinal_table_entry;
struct coff_symbol {
struct StringTableOffset {
support::ulittle32_t Zeroes;

View File

@ -18,10 +18,10 @@
namespace llvm {
class DataLayout;
class GlobalValue;
class MCContext;
template <typename T> class SmallVectorImpl;
class TargetMachine;
class Twine;
class Mangler {
@ -33,7 +33,7 @@ public:
};
private:
const TargetMachine *TM;
const DataLayout *DL;
/// AnonGlobalIDs - We need to give global values the same name every time
/// they are mangled. This keeps track of the number we give to anonymous
@ -46,20 +46,18 @@ private:
unsigned NextAnonGlobalID;
public:
Mangler(const TargetMachine *TM) : TM(TM), NextAnonGlobalID(1) {}
Mangler(const DataLayout *DL) : DL(DL), NextAnonGlobalID(1) {}
/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
/// and the specified global variable's name. If the global variable doesn't
/// have a name, this fills in a unique name for the global.
void getNameWithPrefix(SmallVectorImpl<char> &OutName, const GlobalValue *GV,
bool isImplicitlyPrivate, bool UseGlobalPrefix = true);
void getNameWithPrefix(SmallVectorImpl<char> &OutName, const GlobalValue *GV);
/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
/// and the specified name as the global variable name. GVName must not be
/// empty.
void getNameWithPrefix(SmallVectorImpl<char> &OutName, const Twine &GVName,
ManglerPrefixTy PrefixTy = Mangler::Default,
bool UseGlobalPrefix = true);
ManglerPrefixTy PrefixTy = Mangler::Default);
};
} // End llvm namespace

View File

@ -685,6 +685,18 @@ class InstrInfo {
//
// This option is a temporary migration help. It will go away.
bit guessInstructionProperties = 1;
// TableGen's instruction encoder generator has support for matching operands
// to bit-field variables both by name and by position. While matching by
// name is preferred, this is currently not possible for complex operands,
// and some targets still reply on the positional encoding rules. When
// generating a decoder for such targets, the positional encoding rules must
// be used by the decoder generator as well.
//
// This option is temporary; it will go away once the TableGen decoder
// generator has better support for complex operands and targets have
// migrated away from using positionally encoded operands.
bit decodePositionallyEncodedOperands = 0;
}
// Standard Pseudo Instructions.
@ -805,6 +817,7 @@ def STACKMAP : Instruction {
let InOperandList = (ins i32imm:$id, i32imm:$nbytes, variable_ops);
let isCall = 1;
let mayLoad = 1;
let usesCustomInserter = 1;
}
def PATCHPOINT : Instruction {
let OutOperandList = (outs unknown:$dst);
@ -812,6 +825,7 @@ def PATCHPOINT : Instruction {
i32imm:$nargs, i32imm:$cc, variable_ops);
let isCall = 1;
let mayLoad = 1;
let usesCustomInserter = 1;
}
}
@ -947,7 +961,7 @@ class AsmWriter {
// AsmWriterClassName - This specifies the suffix to use for the asmwriter
// class. Generated AsmWriter classes are always prefixed with the target
// name.
string AsmWriterClassName = "AsmPrinter";
string AsmWriterClassName = "InstPrinter";
// Variant - AsmWriters can be of multiple different variants. Variants are
// used to support targets that need to emit assembly code in ways that are
@ -957,21 +971,13 @@ class AsmWriter {
// == 1, will expand to "y".
int Variant = 0;
// FirstOperandColumn/OperandSpacing - If the assembler syntax uses a columnar
// layout, the asmwriter can actually generate output in this columns (in
// verbose-asm mode). These two values indicate the width of the first column
// (the "opcode" area) and the width to reserve for subsequent operands. When
// verbose asm mode is enabled, operands will be indented to respect this.
int FirstOperandColumn = -1;
// OperandSpacing - Space between operand columns.
int OperandSpacing = -1;
// isMCAsmWriter - Is this assembly writer for an MC emitter? This controls
// generation of the printInstruction() method. For MC printers, it takes
// an MCInstr* operand, otherwise it takes a MachineInstr*.
bit isMCAsmWriter = 0;
bit isMCAsmWriter = 1;
}
def DefaultAsmWriter : AsmWriter;

View File

@ -251,6 +251,18 @@ namespace llvm {
floorf,
/// long double floorl(long double x);
floorl,
/// double fmax(double x, double y);
fmax,
/// float fmaxf(float x, float y);
fmaxf,
/// long double fmaxl(long double x, long double y);
fmaxl,
/// double fmin(double x, double y);
fmin,
/// float fminf(float x, float y);
fminf,
/// long double fminl(long double x, long double y);
fminl,
/// double fmod(double x, double y);
fmod,
/// float fmodf(float x, float y);
@ -703,6 +715,8 @@ public:
case LibFunc::sqrt: case LibFunc::sqrtf: case LibFunc::sqrtl:
case LibFunc::sqrt_finite: case LibFunc::sqrtf_finite:
case LibFunc::sqrtl_finite:
case LibFunc::fmax: case LibFunc::fmaxf: case LibFunc::fmaxl:
case LibFunc::fmin: case LibFunc::fminf: case LibFunc::fminl:
case LibFunc::floor: case LibFunc::floorf: case LibFunc::floorl:
case LibFunc::nearbyint: case LibFunc::nearbyintf: case LibFunc::nearbyintl:
case LibFunc::ceil: case LibFunc::ceilf: case LibFunc::ceill:

View File

@ -173,6 +173,11 @@ public:
return true;
}
/// Return true if multiple condition registers are available.
bool hasMultipleConditionRegisters() const {
return HasMultipleConditionRegisters;
}
/// Return true if a vector of the given type should be split
/// (TypeSplitVector) instead of promoted (TypePromoteInteger) during type
/// legalization.
@ -880,13 +885,13 @@ protected:
}
/// Indicate whether this target prefers to use _setjmp to implement
/// llvm.setjmp or the non _ version. Defaults to false.
/// llvm.setjmp or the version without _. Defaults to false.
void setUseUnderscoreSetJmp(bool Val) {
UseUnderscoreSetJmp = Val;
}
/// Indicate whether this target prefers to use _longjmp to implement
/// llvm.longjmp or the non _ version. Defaults to false.
/// llvm.longjmp or the version without _. Defaults to false.
void setUseUnderscoreLongJmp(bool Val) {
UseUnderscoreLongJmp = Val;
}
@ -926,6 +931,15 @@ protected:
SelectIsExpensive = isExpensive;
}
/// Tells the code generator that the target has multiple (allocatable)
/// condition registers that can be used to store the results of comparisons
/// for use by selects and conditional branches. With multiple condition
/// registers, the code generator will not aggressively sink comparisons into
/// the blocks of their users.
void setHasMultipleConditionRegisters(bool hasManyRegs = true) {
HasMultipleConditionRegisters = hasManyRegs;
}
/// Tells the code generator not to expand sequence of operations into a
/// separate sequences that increases the amount of flow control.
void setJumpIsExpensive(bool isExpensive = true) {
@ -1321,6 +1335,13 @@ private:
/// the select operations if possible.
bool SelectIsExpensive;
/// Tells the code generator that the target has multiple (allocatable)
/// condition registers that can be used to store the results of comparisons
/// for use by selects and conditional branches. With multiple condition
/// registers, the code generator will not aggressively sink comparisons into
/// the blocks of their users.
bool HasMultipleConditionRegisters;
/// Tells the code generator not to expand integer divides by constants into a
/// sequence of muls, adds, and shifts. This is a hack until a real cost
/// model is in place. If we ever optimize for size, this will be set to true
@ -1685,6 +1706,10 @@ protected:
/// Return true if the value types that can be represented by the specified
/// register class are all legal.
bool isLegalRC(const TargetRegisterClass *RC) const;
/// Replace/modify any TargetFrameIndex operands with a targte-dependent
/// sequence of memory operands that is recognized by PrologEpilogInserter.
MachineBasicBlock *emitPatchPoint(MachineInstr *MI, MachineBasicBlock *MBB) const;
};
/// This class defines information used to lower LLVM code to legal SelectionDAG
@ -2078,6 +2103,18 @@ public:
return NULL;
}
/// This callback is used to prepare for a volatile or atomic load.
/// It takes a chain node as input and returns the chain for the load itself.
///
/// Having a callback like this is necessary for targets like SystemZ,
/// which allows a CPU to reuse the result of a previous load indefinitely,
/// even if a cache-coherent store is performed by another CPU. The default
/// implementation does nothing.
virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL,
SelectionDAG &DAG) const {
return Chain;
}
/// This callback is invoked by the type legalizer to legalize nodes with an
/// illegal operand type but legal result types. It replaces the
/// LowerOperation callback in the type Legalizer. The reason we can not do

View File

@ -34,6 +34,7 @@ namespace llvm {
class TargetLoweringObjectFile : public MCObjectFileInfo {
MCContext *Ctx;
const DataLayout *DL;
TargetLoweringObjectFile(
const TargetLoweringObjectFile&) LLVM_DELETED_FUNCTION;
@ -42,7 +43,7 @@ class TargetLoweringObjectFile : public MCObjectFileInfo {
public:
MCContext &getContext() const { return *Ctx; }
TargetLoweringObjectFile() : MCObjectFileInfo(), Ctx(0) {}
TargetLoweringObjectFile() : MCObjectFileInfo(), Ctx(0), DL(0) {}
virtual ~TargetLoweringObjectFile();
@ -121,6 +122,11 @@ public:
/// main label that is the address of the global
MCSymbol *getSymbol(Mangler &M, const GlobalValue *GV) const;
/// Return the MCSymbol for a private symbol with global value name as its
/// base, with the specified suffix.
MCSymbol *getSymbolWithGlobalValueBase(Mangler &M, const GlobalValue *GV,
StringRef Suffix) const;
// getCFIPersonalitySymbol - The symbol that gets passed to .cfi_personality.
virtual MCSymbol *
getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,

View File

@ -88,6 +88,7 @@ protected: // Can only create subclasses.
unsigned MCUseLoc : 1;
unsigned MCUseCFI : 1;
unsigned MCUseDwarfDirectory : 1;
unsigned RequireStructuredCFG : 1;
public:
virtual ~TargetMachine();
@ -108,7 +109,7 @@ public:
void resetTargetOptions(const MachineFunction *MF) const;
// Interfaces to the major aspects of target machine information:
//
//
// -- Instruction opcode and operand information
// -- Pipelines and scheduling information
// -- Stack frame information
@ -156,6 +157,9 @@ public:
return 0;
}
bool requiresStructuredCFG() const { return RequireStructuredCFG; }
void setRequiresStructuredCFG(bool Value) { RequireStructuredCFG = Value; }
/// hasMCRelaxAll - Check whether all machine code instructions should be
/// relaxed.
bool hasMCRelaxAll() const { return MCRelaxAll; }

View File

@ -672,6 +672,17 @@ public:
// Do nothing.
}
/// Allow the target to reverse allocation order of local live ranges. This
/// will generally allocate shorter local live ranges first. For targets with
/// many registers, this could reduce regalloc compile time by a large
/// factor. It should still achieve optimal coloring; however, it can change
/// register eviction decisions. It is disabled by default for two reasons:
/// (1) Top-down allocation is simpler and easier to debug for targets that
/// don't benefit from reversing the order.
/// (2) Bottom-up allocation could result in poor evicition decisions on some
/// targets affecting the performance of compiled code.
virtual bool reverseLocalAssignment() const { return false; }
/// requiresRegisterScavenging - returns true if the target requires (and can
/// make use of) the register scavenger.
virtual bool requiresRegisterScavenging(const MachineFunction &MF) const {

View File

@ -114,14 +114,46 @@ class ProcResourceKind;
// resources implies using one of the super resoruces.
//
// ProcResourceUnits normally model a few buffered resources within an
// out-of-order engine that the compiler attempts to conserve.
// Buffered resources may be held for multiple clock cycles, but the
// scheduler does not pin them to a particular clock cycle relative to
// instruction dispatch. Setting BufferSize=0 changes this to an
// in-order resource. In this case, the scheduler counts down from the
// cycle that the instruction issues in-order, forcing an interlock
// with subsequent instructions that require the same resource until
// the number of ResourceCyles specified in WriteRes expire.
// out-of-order engine. Buffered resources may be held for multiple
// clock cycles, but the scheduler does not pin them to a particular
// clock cycle relative to instruction dispatch. Setting BufferSize=0
// changes this to an in-order issue/dispatch resource. In this case,
// the scheduler counts down from the cycle that the instruction
// issues in-order, forcing a stall whenever a subsequent instruction
// requires the same resource until the number of ResourceCyles
// specified in WriteRes expire. Setting BufferSize=1 changes this to
// an in-order latency resource. In this case, the scheduler models
// producer/consumer stalls between instructions that use the
// resource.
//
// Examples (all assume an out-of-order engine):
//
// Use BufferSize = -1 for "issue ports" fed by a unified reservation
// station. Here the size of the reservation station is modeled by
// MicroOpBufferSize, which should be the minimum size of either the
// register rename pool, unified reservation station, or reorder
// buffer.
//
// Use BufferSize = 0 for resources that force "dispatch/issue
// groups". (Different processors define dispath/issue
// differently. Here we refer to stage between decoding into micro-ops
// and moving them into a reservation station.) Normally NumMicroOps
// is sufficient to limit dispatch/issue groups. However, some
// processors can form groups of with only certain combinitions of
// instruction types. e.g. POWER7.
//
// Use BufferSize = 1 for in-order execution units. This is used for
// an in-order pipeline within an out-of-order core where scheduling
// dependent operations back-to-back is guaranteed to cause a
// bubble. e.g. Cortex-a9 floating-point.
//
// Use BufferSize > 1 for out-of-order executions units with a
// separate reservation station. This simply models the size of the
// reservation station.
//
// To model both dispatch/issue groups and in-order execution units,
// create two types of units, one with BufferSize=0 and one with
// BufferSize=1.
//
// SchedModel ties these units to a processor for any stand-alone defs
// of this class. Instances of subclass ProcResource will be automatically

View File

@ -18,7 +18,10 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
@ -38,6 +41,12 @@
#include <algorithm>
using namespace llvm;
/// Cutoff after which to stop analysing a set of phi nodes potentially involved
/// in a cycle. Because we are analysing 'through' phi nodes we need to be
/// careful with value equivalence. We use reachability to make sure a value
/// cannot be involved in a cycle.
const unsigned MaxNumPhiBBsValueReachabilityCheck = 20;
//===----------------------------------------------------------------------===//
// Useful predicates
//===----------------------------------------------------------------------===//
@ -403,42 +412,6 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
return V;
}
/// GetIndexDifference - Dest and Src are the variable indices from two
/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
/// difference between the two pointers.
static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
const SmallVectorImpl<VariableGEPIndex> &Src) {
if (Src.empty()) return;
for (unsigned i = 0, e = Src.size(); i != e; ++i) {
const Value *V = Src[i].V;
ExtensionKind Extension = Src[i].Extension;
int64_t Scale = Src[i].Scale;
// Find V in Dest. This is N^2, but pointer indices almost never have more
// than a few variable indexes.
for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
if (Dest[j].V != V || Dest[j].Extension != Extension) continue;
// If we found it, subtract off Scale V's from the entry in Dest. If it
// goes to zero, remove the entry.
if (Dest[j].Scale != Scale)
Dest[j].Scale -= Scale;
else
Dest.erase(Dest.begin()+j);
Scale = 0;
break;
}
// If we didn't consume this entry, add it to the end of the Dest list.
if (Scale) {
VariableGEPIndex Entry = { V, Extension, -Scale };
Dest.push_back(Entry);
}
}
}
//===----------------------------------------------------------------------===//
// BasicAliasAnalysis Pass
//===----------------------------------------------------------------------===//
@ -492,6 +465,7 @@ namespace {
// SmallDenseMap if it ever grows larger.
// FIXME: This should really be shrink_to_inline_capacity_and_clear().
AliasCache.shrink_and_clear();
VisitedPhiBBs.clear();
return Alias;
}
@ -532,9 +506,39 @@ namespace {
typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy;
AliasCacheTy AliasCache;
/// \brief Track phi nodes we have visited. When interpret "Value" pointer
/// equality as value equality we need to make sure that the "Value" is not
/// part of a cycle. Otherwise, two uses could come from different
/// "iterations" of a cycle and see different values for the same "Value"
/// pointer.
/// The following example shows the problem:
/// %p = phi(%alloca1, %addr2)
/// %l = load %ptr
/// %addr1 = gep, %alloca2, 0, %l
/// %addr2 = gep %alloca2, 0, (%l + 1)
/// alias(%p, %addr1) -> MayAlias !
/// store %l, ...
SmallPtrSet<const BasicBlock*, 8> VisitedPhiBBs;
// Visited - Track instructions visited by pointsToConstantMemory.
SmallPtrSet<const Value*, 16> Visited;
/// \brief Check whether two Values can be considered equivalent.
///
/// In addition to pointer equivalence of \p V1 and \p V2 this checks
/// whether they can not be part of a cycle in the value graph by looking at
/// all visited phi nodes an making sure that the phis cannot reach the
/// value. We have to do this because we are looking through phi nodes (That
/// is we say noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB).
bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2);
/// \brief Dest and Src are the variable indices from two decomposed
/// GetElementPtr instructions GEP1 and GEP2 which have common base
/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
/// difference between the two pointers.
void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
const SmallVectorImpl<VariableGEPIndex> &Src);
// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
// instruction against another.
AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
@ -1094,6 +1098,10 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
const MDNode *PNTBAAInfo,
const Value *V2, uint64_t V2Size,
const MDNode *V2TBAAInfo) {
// Track phi nodes we have visited. We use this information when we determine
// value equivalence.
VisitedPhiBBs.insert(PN->getParent());
// If the values are PHIs in the same block, we can do a more precise
// as well as efficient check: just check for aliases between the values
// on corresponding edges.
@ -1187,7 +1195,13 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
V2 = V2->stripPointerCasts();
// Are we checking for alias of the same value?
if (V1 == V2) return MustAlias;
// Because we look 'through' phi nodes we could look at "Value" pointers from
// different iterations. We must therefore make sure that this is not the
// case. The function isValueEqualInPotentialCycles ensures that this cannot
// happen by looking at the visited phi nodes and making sure they cannot
// reach the value.
if (isValueEqualInPotentialCycles(V1, V2))
return MustAlias;
if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy())
return NoAlias; // Scalars cannot alias each other
@ -1307,3 +1321,71 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
Location(V2, V2Size, V2TBAAInfo));
return AliasCache[Locs] = Result;
}
bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
const Value *V2) {
if (V != V2)
return false;
const Instruction *Inst = dyn_cast<Instruction>(V);
if (!Inst)
return true;
if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck)
return false;
// Use dominance or loop info if available.
DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
LoopInfo *LI = getAnalysisIfAvailable<LoopInfo>();
// Make sure that the visited phis cannot reach the Value. This ensures that
// the Values cannot come from different iterations of a potential cycle the
// phi nodes could be involved in.
for (SmallPtrSet<const BasicBlock *, 8>::iterator PI = VisitedPhiBBs.begin(),
PE = VisitedPhiBBs.end();
PI != PE; ++PI)
if (isPotentiallyReachable((*PI)->begin(), Inst, DT, LI))
return false;
return true;
}
/// GetIndexDifference - Dest and Src are the variable indices from two
/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
/// difference between the two pointers.
void BasicAliasAnalysis::GetIndexDifference(
SmallVectorImpl<VariableGEPIndex> &Dest,
const SmallVectorImpl<VariableGEPIndex> &Src) {
if (Src.empty())
return;
for (unsigned i = 0, e = Src.size(); i != e; ++i) {
const Value *V = Src[i].V;
ExtensionKind Extension = Src[i].Extension;
int64_t Scale = Src[i].Scale;
// Find V in Dest. This is N^2, but pointer indices almost never have more
// than a few variable indexes.
for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
if (!isValueEqualInPotentialCycles(Dest[j].V, V) ||
Dest[j].Extension != Extension)
continue;
// If we found it, subtract off Scale V's from the entry in Dest. If it
// goes to zero, remove the entry.
if (Dest[j].Scale != Scale)
Dest[j].Scale -= Scale;
else
Dest.erase(Dest.begin() + j);
Scale = 0;
break;
}
// If we didn't consume this entry, add it to the end of the Dest list.
if (Scale) {
VariableGEPIndex Entry = { V, Extension, -Scale };
Dest.push_back(Entry);
}
}
}

View File

@ -86,7 +86,7 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {
OS << Node->getName().str() << ":";
switch (ViewBlockFreqPropagationDAG) {
case GVDT_Fraction:
Graph->getBlockFreq(Node).print(OS);
Graph->printBlockFreq(OS, Node);
break;
case GVDT_Integer:
OS << Graph->getBlockFreq(Node).getFrequency();
@ -159,3 +159,18 @@ void BlockFrequencyInfo::view() const {
const Function *BlockFrequencyInfo::getFunction() const {
return BFI->Fn;
}
raw_ostream &BlockFrequencyInfo::
printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const {
return BFI->printBlockFreq(OS, Freq);
}
raw_ostream &
BlockFrequencyInfo::printBlockFreq(raw_ostream &OS,
const BasicBlock *BB) const {
return BFI->printBlockFreq(OS, BB);
}
uint64_t BlockFrequencyInfo::getEntryFreq() const {
return BFI->getEntryFreq();
}

View File

@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "branch-prob"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/Analysis/LoopInfo.h"
@ -483,6 +484,8 @@ void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool BranchProbabilityInfo::runOnFunction(Function &F) {
DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
<< " ----\n\n");
LastF = &F; // Store the last function we ran on for printing.
LI = &getAnalysis<LoopInfo>();
assert(PostDominatedByUnreachable.empty());
@ -591,6 +594,13 @@ getEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors) const {
return DEFAULT_WEIGHT;
}
uint32_t
BranchProbabilityInfo::
getEdgeWeight(const BasicBlock *Src, succ_const_iterator Dst) const {
size_t index = std::distance(succ_begin(Src), Dst);
return getEdgeWeight(Src, index);
}
/// Get the raw edge weight calculated for the block pair. This returns the sum
/// of all raw edge weights from Src to Dst.
uint32_t BranchProbabilityInfo::

View File

@ -16,9 +16,37 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
CallGraph::CallGraph()
: ModulePass(ID), Root(0), ExternalCallingNode(0), CallsExternalNode(0) {
initializeCallGraphPass(*PassRegistry::getPassRegistry());
//===----------------------------------------------------------------------===//
// Implementations of the CallGraph class methods.
//
CallGraph::CallGraph(Module &M)
: M(M), Root(0), ExternalCallingNode(getOrInsertFunction(0)),
CallsExternalNode(new CallGraphNode(0)) {
// Add every function to the call graph.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
addToCallGraph(I);
// If we didn't find a main function, use the external call graph node
if (Root == 0)
Root = ExternalCallingNode;
}
CallGraph::~CallGraph() {
// CallsExternalNode is not in the function map, delete it explicitly.
CallsExternalNode->allReferencesDropped();
delete CallsExternalNode;
// Reset all node's use counts to zero before deleting them to prevent an
// assertion from firing.
#ifndef NDEBUG
for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
I != E; ++I)
I->second->allReferencesDropped();
#endif
for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
I != E; ++I)
delete I->second;
}
void CallGraph::addToCallGraph(Function *F) {
@ -62,59 +90,7 @@ void CallGraph::addToCallGraph(Function *F) {
}
}
void CallGraph::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
}
bool CallGraph::runOnModule(Module &M) {
Mod = &M;
ExternalCallingNode = getOrInsertFunction(0);
assert(!CallsExternalNode);
CallsExternalNode = new CallGraphNode(0);
Root = 0;
// Add every function to the call graph.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
addToCallGraph(I);
// If we didn't find a main function, use the external call graph node
if (Root == 0)
Root = ExternalCallingNode;
return false;
}
INITIALIZE_PASS(CallGraph, "basiccg", "CallGraph Construction", false, true)
char CallGraph::ID = 0;
void CallGraph::releaseMemory() {
/// CallsExternalNode is not in the function map, delete it explicitly.
if (CallsExternalNode) {
CallsExternalNode->allReferencesDropped();
delete CallsExternalNode;
CallsExternalNode = 0;
}
if (FunctionMap.empty())
return;
// Reset all node's use counts to zero before deleting them to prevent an
// assertion from firing.
#ifndef NDEBUG
for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
I != E; ++I)
I->second->allReferencesDropped();
#endif
for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
I != E; ++I)
delete I->second;
FunctionMap.clear();
}
void CallGraph::print(raw_ostream &OS, const Module*) const {
void CallGraph::print(raw_ostream &OS) const {
OS << "CallGraph Root is: ";
if (Function *F = Root->getFunction())
OS << F->getName() << "\n";
@ -125,15 +101,10 @@ void CallGraph::print(raw_ostream &OS, const Module*) const {
for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I)
I->second->print(OS);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void CallGraph::dump() const {
print(dbgs(), 0);
}
#endif
//===----------------------------------------------------------------------===//
// Implementations of public modification methods
//
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void CallGraph::dump() const { print(dbgs()); }
#endif
// removeFunctionFromModule - Unlink the function from this module, returning
// it. Because this removes the function from the module, the call graph node
@ -148,7 +119,7 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
delete CGN; // Delete the call graph node for this func
FunctionMap.erase(F); // Remove the call graph node from the map
Mod->getFunctionList().remove(F);
M.getFunctionList().remove(F);
return F;
}
@ -172,12 +143,17 @@ void CallGraph::spliceFunction(const Function *From, const Function *To) {
// not already exist.
CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) {
CallGraphNode *&CGN = FunctionMap[F];
if (CGN) return CGN;
assert((!F || F->getParent() == Mod) && "Function not in current module!");
if (CGN)
return CGN;
assert((!F || F->getParent() == &M) && "Function not in current module!");
return CGN = new CallGraphNode(const_cast<Function*>(F));
}
//===----------------------------------------------------------------------===//
// Implementations of the CallGraphNode class methods.
//
void CallGraphNode::print(raw_ostream &OS) const {
if (Function *F = getFunction())
OS << "Call graph node for function: '" << F->getName() << "'";
@ -260,5 +236,46 @@ void CallGraphNode::replaceCallEdge(CallSite CS,
}
}
//===----------------------------------------------------------------------===//
// Implementations of the CallGraphWrapperPass class methods.
//
CallGraphWrapperPass::CallGraphWrapperPass() : ModulePass(ID) {
initializeCallGraphWrapperPassPass(*PassRegistry::getPassRegistry());
}
CallGraphWrapperPass::~CallGraphWrapperPass() {}
void CallGraphWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
}
bool CallGraphWrapperPass::runOnModule(Module &M) {
// All the real work is done in the constructor for the CallGraph.
G.reset(new CallGraph(M));
return false;
}
INITIALIZE_PASS(CallGraphWrapperPass, "basiccg", "CallGraph Construction",
false, true)
char CallGraphWrapperPass::ID = 0;
void CallGraphWrapperPass::releaseMemory() { G.reset(0); }
void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const {
if (!G) {
OS << "No call graph has been built!\n";
return;
}
// Just delegate.
G->print(OS);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void CallGraphWrapperPass::dump() const { print(dbgs(), 0); }
#endif
// Enuse that users of CallGraph.h also link with this file
DEFINING_FILE_FOR(CallGraph)

View File

@ -60,7 +60,7 @@ public:
/// Pass Manager itself does not invalidate any analysis info.
void getAnalysisUsage(AnalysisUsage &Info) const {
// CGPassManager walks SCC and it needs CallGraph.
Info.addRequired<CallGraph>();
Info.addRequired<CallGraphWrapperPass>();
Info.setPreservesAll();
}
@ -424,7 +424,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
/// run - Execute all of the passes scheduled for execution. Keep track of
/// whether any of the passes modifies the module, and if so, return true.
bool CGPassManager::runOnModule(Module &M) {
CallGraph &CG = getAnalysis<CallGraph>();
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
bool Changed = doInitialization(CG);
// Walk the callgraph in bottom-up SCC order.
@ -570,8 +570,8 @@ void CallGraphSCCPass::assignPassManager(PMStack &PMS,
/// the call graph. If the derived class implements this method, it should
/// always explicitly call the implementation here.
void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<CallGraph>();
AU.addPreserved<CallGraph>();
AU.addRequired<CallGraphWrapperPass>();
AU.addPreserved<CallGraphWrapperPass>();
}

View File

@ -22,13 +22,10 @@ using namespace llvm;
namespace llvm {
template<>
struct DOTGraphTraits<CallGraph*> : public DefaultDOTGraphTraits {
DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
template <> struct DOTGraphTraits<CallGraph *> : public DefaultDOTGraphTraits {
DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
static std::string getGraphName(CallGraph *Graph) {
return "Call graph";
}
static std::string getGraphName(CallGraph *Graph) { return "Call graph"; }
std::string getNodeLabel(CallGraphNode *Node, CallGraph *Graph) {
if (Function *Func = Node->getFunction())
@ -38,49 +35,57 @@ struct DOTGraphTraits<CallGraph*> : public DefaultDOTGraphTraits {
}
};
struct AnalysisCallGraphWrapperPassTraits {
static CallGraph *getGraph(CallGraphWrapperPass *P) {
return &P->getCallGraph();
}
};
} // end llvm namespace
namespace {
struct CallGraphViewer
: public DOTGraphTraitsModuleViewer<CallGraph, true> {
: public DOTGraphTraitsModuleViewer<CallGraphWrapperPass, true, CallGraph *,
AnalysisCallGraphWrapperPassTraits> {
static char ID;
CallGraphViewer()
: DOTGraphTraitsModuleViewer<CallGraph, true>("callgraph", ID) {
: DOTGraphTraitsModuleViewer<CallGraphWrapperPass, true, CallGraph *,
AnalysisCallGraphWrapperPassTraits>(
"callgraph", ID) {
initializeCallGraphViewerPass(*PassRegistry::getPassRegistry());
}
};
struct CallGraphPrinter
: public DOTGraphTraitsModulePrinter<CallGraph, true> {
struct CallGraphPrinter : public DOTGraphTraitsModulePrinter<
CallGraphWrapperPass, true, CallGraph *,
AnalysisCallGraphWrapperPassTraits> {
static char ID;
CallGraphPrinter()
: DOTGraphTraitsModulePrinter<CallGraph, true>("callgraph", ID) {
initializeCallGraphPrinterPass(*PassRegistry::getPassRegistry());
: DOTGraphTraitsModulePrinter<CallGraphWrapperPass, true, CallGraph *,
AnalysisCallGraphWrapperPassTraits>(
"callgraph", ID) {
initializeCallGraphPrinterPass(*PassRegistry::getPassRegistry());
}
};
} // end anonymous namespace
char CallGraphViewer::ID = 0;
INITIALIZE_PASS(CallGraphViewer, "view-callgraph",
"View call graph",
false, false)
INITIALIZE_PASS(CallGraphViewer, "view-callgraph", "View call graph", false,
false)
char CallGraphPrinter::ID = 0;
INITIALIZE_PASS(CallGraphPrinter, "dot-callgraph",
"Print call graph to 'dot' file",
false, false)
"Print call graph to 'dot' file", false, false)
// Create methods available outside of this file, to use them
// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
// the link time optimization.
ModulePass *llvm::createCallGraphViewerPass() {
return new CallGraphViewer();
}
ModulePass *llvm::createCallGraphViewerPass() { return new CallGraphViewer(); }
ModulePass *llvm::createCallGraphPrinterPass() {
return new CallGraphPrinter();

View File

@ -95,15 +95,19 @@ namespace {
}
bool runOnModule(Module &M) {
InitializeAliasAnalysis(this); // set up super class
AnalyzeGlobals(M); // find non-addr taken globals
AnalyzeCallGraph(getAnalysis<CallGraph>(), M); // Propagate on CG
InitializeAliasAnalysis(this);
// Find non-addr taken globals.
AnalyzeGlobals(M);
// Propagate on CG.
AnalyzeCallGraph(getAnalysis<CallGraphWrapperPass>().getCallGraph(), M);
return false;
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AliasAnalysis::getAnalysisUsage(AU);
AU.addRequired<CallGraph>();
AU.addRequired<CallGraphWrapperPass>();
AU.setPreservesAll(); // Does not transform code
}
@ -189,7 +193,7 @@ char GlobalsModRef::ID = 0;
INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis,
"globalsmodref-aa", "Simple mod/ref analysis for globals",
false, true, false)
INITIALIZE_PASS_DEPENDENCY(CallGraph)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis,
"globalsmodref-aa", "Simple mod/ref analysis for globals",
false, true, false)

View File

@ -19,7 +19,7 @@ using namespace llvm;
/// initializeIPA - Initialize all passes linked into the IPA library.
void llvm::initializeIPA(PassRegistry &Registry) {
initializeCallGraphPass(Registry);
initializeCallGraphWrapperPassPass(Registry);
initializeCallGraphPrinterPass(Registry);
initializeCallGraphViewerPass(Registry);
initializeFindUsedTypesPass(Registry);

View File

@ -59,6 +59,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool ExposesReturnsTwice;
bool HasDynamicAlloca;
bool ContainsNoDuplicateCall;
bool HasReturn;
bool HasIndirectBr;
/// Number of bytes allocated statically by the callee.
uint64_t AllocatedSize;
@ -132,6 +134,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool visitExtractValue(ExtractValueInst &I);
bool visitInsertValue(InsertValueInst &I);
bool visitCallSite(CallSite CS);
bool visitReturnInst(ReturnInst &RI);
bool visitBranchInst(BranchInst &BI);
bool visitSwitchInst(SwitchInst &SI);
bool visitIndirectBrInst(IndirectBrInst &IBI);
bool visitResumeInst(ResumeInst &RI);
bool visitUnreachableInst(UnreachableInst &I);
public:
CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI,
@ -139,12 +147,13 @@ public:
: TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0),
NumVectorInstructions(0), FiftyPercentVectorBonus(0),
TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
SROACostSavings(0), SROACostSavingsLost(0) {}
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
NumInstructionsSimplified(0), SROACostSavings(0),
SROACostSavingsLost(0) {}
bool analyzeCall(CallSite CS);
@ -704,7 +713,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
}
bool CallAnalyzer::visitCallSite(CallSite CS) {
if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() &&
if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
!F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::ReturnsTwice)) {
// This aborts the entire analysis.
@ -785,6 +794,60 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
return Base::visitCallSite(CS);
}
bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
// At least one return instruction will be free after inlining.
bool Free = !HasReturn;
HasReturn = true;
return Free;
}
bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
// We model unconditional branches as essentially free -- they really
// shouldn't exist at all, but handling them makes the behavior of the
// inliner more regular and predictable. Interestingly, conditional branches
// which will fold away are also free.
return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
dyn_cast_or_null<ConstantInt>(
SimplifiedValues.lookup(BI.getCondition()));
}
bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
// We model unconditional switches as free, see the comments on handling
// branches.
return isa<ConstantInt>(SI.getCondition()) ||
dyn_cast_or_null<ConstantInt>(
SimplifiedValues.lookup(SI.getCondition()));
}
bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
// We never want to inline functions that contain an indirectbr. This is
// incorrect because all the blockaddress's (in static global initializers
// for example) would be referring to the original function, and this
// indirect jump would jump from the inlined copy of the function into the
// original function which is extremely undefined behavior.
// FIXME: This logic isn't really right; we can safely inline functions with
// indirectbr's as long as no other function or global references the
// blockaddress of a block within the current function. And as a QOI issue,
// if someone is using a blockaddress without an indirectbr, and that
// reference somehow ends up in another function or global, we probably don't
// want to inline this function.
HasIndirectBr = true;
return false;
}
bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
// FIXME: It's not clear that a single instruction is an accurate model for
// the inline cost of a resume instruction.
return false;
}
bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
// FIXME: It might be reasonably to discount the cost of instructions leading
// to unreachable as they have the lowest possible impact on both runtime and
// code size.
return true; // No actual code is needed for unreachable.
}
bool CallAnalyzer::visitInstruction(Instruction &I) {
// Some instructions are free. All of the free intrinsics can also be
// handled by SROA, etc.
@ -808,8 +871,7 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
/// construct has been detected. It returns false if inlining is no longer
/// viable, and true if inlining remains viable.
bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end());
I != E; ++I) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
++NumInstructions;
if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
++NumVectorInstructions;
@ -825,7 +887,8 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
Cost += InlineConstants::InstrCost;
// If the visit this instruction detected an uninlinable pattern, abort.
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
HasIndirectBr)
return false;
// If the caller is a recursive function then we don't want to inline
@ -989,10 +1052,6 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
}
}
// Track whether we've seen a return instruction. The first return
// instruction is free, as at least one will usually disappear in inlining.
bool HasReturn = false;
// Populate our simplified values by mapping from function arguments to call
// arguments with known important simplifications.
CallSite::arg_iterator CAI = CS.arg_begin();
@ -1039,33 +1098,11 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
if (BB->empty())
continue;
// Handle the terminator cost here where we can track returns and other
// function-wide constructs.
TerminatorInst *TI = BB->getTerminator();
// We never want to inline functions that contain an indirectbr. This is
// incorrect because all the blockaddress's (in static global initializers
// for example) would be referring to the original function, and this
// indirect jump would jump from the inlined copy of the function into the
// original function which is extremely undefined behavior.
// FIXME: This logic isn't really right; we can safely inline functions
// with indirectbr's as long as no other function or global references the
// blockaddress of a block within the current function. And as a QOI issue,
// if someone is using a blockaddress without an indirectbr, and that
// reference somehow ends up in another function or global, we probably
// don't want to inline this function.
if (isa<IndirectBrInst>(TI))
return false;
if (!HasReturn && isa<ReturnInst>(TI))
HasReturn = true;
else
Cost += InlineConstants::InstrCost;
// Analyze the cost of this block. If we blow through the threshold, this
// returns false, and we can bail on out.
if (!analyzeBlock(BB)) {
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
HasIndirectBr)
return false;
// If the caller is a recursive function then we don't want to inline
@ -1078,6 +1115,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
break;
}
TerminatorInst *TI = BB->getTerminator();
// Add in the live successors by first checking whether we have terminator
// that may be simplified based on the values simplified by this call.
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
@ -1115,7 +1154,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
}
}
// If this is a noduplicate call, we can still inline as long as
// If this is a noduplicate call, we can still inline as long as
// inlining this would cause the removal of the caller (so the instruction
// is not actually duplicated, just moved).
if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)

View File

@ -68,7 +68,7 @@ namespace {
return InstTypePair(dep.getInst(), Def);
if (dep.isNonFuncLocal())
return InstTypePair(dep.getInst(), NonFuncLocal);
assert(dep.isUnknown() && "unexptected dependence type");
assert(dep.isUnknown() && "unexpected dependence type");
return InstTypePair(dep.getInst(), Unknown);
}
static InstTypePair getInstTypePair(const Instruction* inst, DepType type) {

View File

@ -399,12 +399,14 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *DL,
LLVMContext &Context,
bool RoundToAlign)
: DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) {
IntegerType *IntTy = DL->getIntPtrType(Context);
IntTyBits = IntTy->getBitWidth();
Zero = APInt::getNullValue(IntTyBits);
// Pointer size must be rechecked for each object visited since it could have
// a different address space.
}
SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
IntTyBits = DL->getPointerTypeSizeInBits(V->getType());
Zero = APInt::getNullValue(IntTyBits);
V = V->stripPointerCasts();
if (Instruction *I = dyn_cast<Instruction>(V)) {
// If we have already seen this instruction, bail out. Cycles can happen in
@ -592,11 +594,15 @@ ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *DL,
bool RoundToAlign)
: DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)),
RoundToAlign(RoundToAlign) {
IntTy = DL->getIntPtrType(Context);
Zero = ConstantInt::get(IntTy, 0);
// IntTy and Zero must be set for each compute() since the address space may
// be different for later objects.
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
// XXX - Are vectors of pointers possible here?
IntTy = cast<IntegerType>(DL->getIntPtrType(V->getType()));
Zero = ConstantInt::get(IntTy, 0);
SizeOffsetEvalType Result = compute_(V);
if (!bothKnown(Result)) {

View File

@ -72,7 +72,7 @@ static bool VerifySubExpr(Value *Expr,
// If it isn't in the InstInputs list it is a subexpr incorporated into the
// address. Sanity check that it is phi translatable.
if (!CanPHITrans(I)) {
errs() << "Non phi translatable instruction found in PHITransAddr:\n";
errs() << "Instruction in PHITransAddr is not phi-translatable:\n";
errs() << *I << '\n';
llvm_unreachable("Either something is missing from InstInputs or "
"CanPHITrans is wrong.");

Some files were not shown because too many files have changed in this diff Show More