Import LLVM r309604 from branches/release_50

This commit is contained in:
joerg 2017-08-01 19:24:44 +00:00
parent 3dd3c1d9cd
commit 51efcbbd66
8952 changed files with 1397932 additions and 191749 deletions

View File

@ -21,6 +21,9 @@
#OS X specific files.
.DS_store
# Nested build directory
/build
#==============================================================================#
# Explicit files to ignore (only matches one).
#==============================================================================#
@ -62,8 +65,9 @@ tools/polly
tools/avrlit
# Sphinx build tree, if building in-source dir.
docs/_build
# VSCode config files.
# VS2017 and VSCode config files.
.vscode
.vs
#==============================================================================#
# Files created in tree by the Go bindings.

View File

@ -20,7 +20,7 @@ if(POLICY CMP0057)
endif()
if(NOT DEFINED LLVM_VERSION_MAJOR)
set(LLVM_VERSION_MAJOR 4)
set(LLVM_VERSION_MAJOR 5)
endif()
if(NOT DEFINED LLVM_VERSION_MINOR)
set(LLVM_VERSION_MINOR 0)
@ -44,6 +44,13 @@ if (NOT PACKAGE_VERSION)
"${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}")
endif()
if ((CMAKE_GENERATOR MATCHES "Visual Studio") AND (CMAKE_GENERATOR_TOOLSET STREQUAL ""))
message(WARNING "Visual Studio generators use the x86 host compiler by "
"default, even for 64-bit targets. This can result in linker "
"instability and out of memory errors. To use the 64-bit "
"host compiler, pass -Thost=x64 on the CMake command line.")
endif()
project(LLVM
${cmake_3_0_PROJ_VERSION}
${cmake_3_0_LANGUAGES}
@ -56,17 +63,20 @@ endif()
# This should only apply if you are both on an Apple host, and targeting Apple.
if(CMAKE_HOST_APPLE AND APPLE)
if(NOT CMAKE_XCRUN)
find_program(CMAKE_XCRUN NAMES xcrun)
endif()
if(CMAKE_XCRUN)
execute_process(COMMAND ${CMAKE_XCRUN} -find libtool
OUTPUT_VARIABLE CMAKE_LIBTOOL
OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()
# if CMAKE_LIBTOOL is not set, try and find it with xcrun or find_program
if(NOT CMAKE_LIBTOOL)
if(NOT CMAKE_XCRUN)
find_program(CMAKE_XCRUN NAMES xcrun)
endif()
if(CMAKE_XCRUN)
execute_process(COMMAND ${CMAKE_XCRUN} -find libtool
OUTPUT_VARIABLE CMAKE_LIBTOOL
OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()
if(NOT CMAKE_LIBTOOL OR NOT EXISTS CMAKE_LIBTOOL)
find_program(CMAKE_LIBTOOL NAMES libtool)
if(NOT CMAKE_LIBTOOL OR NOT EXISTS CMAKE_LIBTOOL)
find_program(CMAKE_LIBTOOL NAMES libtool)
endif()
endif()
get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES)
@ -84,7 +94,7 @@ if(CMAKE_HOST_APPLE AND APPLE)
set(LIBTOOL_NO_WARNING_FLAG "-no_warning_for_no_symbols")
endif()
endif()
foreach(lang ${languages})
set(CMAKE_${lang}_CREATE_STATIC_LIBRARY
"${CMAKE_LIBTOOL} -static ${LIBTOOL_NO_WARNING_FLAG} -o <TARGET> \
@ -132,18 +142,6 @@ foreach(proj ${LLVM_ENABLE_PROJECTS})
endif()
endforeach()
# The following only works with the Ninja generator in CMake >= 3.0.
set(LLVM_PARALLEL_COMPILE_JOBS "" CACHE STRING
"Define the maximum number of concurrent compilation jobs.")
if(LLVM_PARALLEL_COMPILE_JOBS)
if(NOT CMAKE_MAKE_PROGRAM MATCHES "ninja")
message(WARNING "Job pooling is only available with Ninja generators.")
else()
set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool=${LLVM_PARALLEL_COMPILE_JOBS})
set(CMAKE_JOB_POOL_COMPILE compile_job_pool)
endif()
endif()
# Build llvm with ccache if the package is present
set(LLVM_CCACHE_BUILD OFF CACHE BOOL "Set to ON for a ccache enabled build")
if(LLVM_CCACHE_BUILD)
@ -178,21 +176,12 @@ if(LLVM_DEPENDENCY_DEBUGGING)
endif()
endif()
option(LLVM_BUILD_GLOBAL_ISEL "Experimental: Build GlobalISel" OFF)
option(LLVM_BUILD_GLOBAL_ISEL "Experimental: Build GlobalISel" ON)
if(LLVM_BUILD_GLOBAL_ISEL)
add_definitions(-DLLVM_BUILD_GLOBAL_ISEL)
endif()
set(LLVM_PARALLEL_LINK_JOBS "" CACHE STRING
"Define the maximum number of concurrent link jobs.")
if(LLVM_PARALLEL_LINK_JOBS)
if(NOT CMAKE_MAKE_PROGRAM MATCHES "ninja")
message(WARNING "Job pooling is only available with Ninja generators.")
else()
set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${LLVM_PARALLEL_LINK_JOBS})
set(CMAKE_JOB_POOL_LINK link_job_pool)
endif()
endif()
option(LLVM_ENABLE_DAGISEL_COV "Debug: Prints tablegen patterns that were used for selecting" OFF)
# Add path for custom modules
set(CMAKE_MODULE_PATH
@ -217,7 +206,7 @@ endif()
include(VersionFromVCS)
option(LLVM_APPEND_VC_REV
"Append the version control system revision id to LLVM version" OFF)
"Embed the version control system revision id in LLVM" ON)
if( LLVM_APPEND_VC_REV )
add_version_info_from_vcs(PACKAGE_VERSION)
@ -299,6 +288,10 @@ set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name
set(LLVM_TOOLS_INSTALL_DIR "bin" CACHE STRING "Path for binary subdirectory (defaults to 'bin')")
mark_as_advanced(LLVM_TOOLS_INSTALL_DIR)
set(LLVM_UTILS_INSTALL_DIR "bin" CACHE STRING
"Path to install LLVM utilities (enabled by LLVM_INSTALL_UTILS=ON) (defaults to LLVM_TOOLS_INSTALL_DIR)")
mark_as_advanced(LLVM_TOOLS_INSTALL_DIR)
# They are used as destination of target generators.
set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin)
set(LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX})
@ -385,8 +378,6 @@ set(LLVM_TARGETS_TO_BUILD
${LLVM_EXPERIMENTAL_TARGETS_TO_BUILD})
list(REMOVE_DUPLICATES LLVM_TARGETS_TO_BUILD)
include(AddLLVMDefinitions)
option(LLVM_ENABLE_PIC "Build Position-Independent Code" ON)
option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." ON)
option(LLVM_ENABLE_MODULES "Compile with C++ modules enabled." OFF)
@ -398,6 +389,7 @@ else()
option(LLVM_ENABLE_LOCAL_SUBMODULE_VISIBILITY "Compile with -fmodules-local-submodule-visibility." ON)
endif()
option(LLVM_ENABLE_CXX1Y "Compile with C++1y enabled." OFF)
option(LLVM_ENABLE_CXX1Z "Compile with C++1z enabled." OFF)
option(LLVM_ENABLE_LIBCXX "Use libc++ if available." OFF)
option(LLVM_ENABLE_LLD "Use lld as C and C++ linker." OFF)
option(LLVM_ENABLE_PEDANTIC "Compile with pedantic enabled." ON)
@ -414,9 +406,6 @@ option(LLVM_ENABLE_EXPENSIVE_CHECKS "Enable expensive checks" OFF)
set(LLVM_ABI_BREAKING_CHECKS "WITH_ASSERTS" CACHE STRING
"Enable abi-breaking checks. Can be WITH_ASSERTS, FORCE_ON or FORCE_OFF.")
option(LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING
"Disable abi-breaking checks mismatch detection at link-tim." OFF)
option(LLVM_FORCE_USE_OLD_HOST_TOOLCHAIN
"Set to ON to force using an old, unsupported host toolchain." OFF)
@ -506,6 +495,10 @@ option(LLVM_INCLUDE_UTILS "Generate build targets for the LLVM utils." ON)
option(LLVM_BUILD_UTILS
"Build LLVM utility binaries. If OFF, just generate build targets." ON)
option(LLVM_INCLUDE_RUNTIMES "Generate build targets for the LLVM runtimes." ON)
option(LLVM_BUILD_RUNTIMES
"Build the LLVM runtimes. If OFF, just generate build targets." ON)
option(LLVM_BUILD_RUNTIME
"Build the LLVM runtime libraries." ON)
option(LLVM_BUILD_EXAMPLES
@ -531,6 +524,9 @@ set(LLVM_INSTALL_OCAMLDOC_HTML_DIR "share/doc/llvm/ocaml-html"
option (LLVM_BUILD_EXTERNAL_COMPILER_RT
"Build compiler-rt as an external project." OFF)
option (LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO
"Show target and host info when tools are invoked with --version." ON)
# You can configure which libraries from LLVM you want to include in the
# shared library by setting LLVM_DYLIB_COMPONENTS to a semi-colon delimited
# list of LLVM components. All component names handled by llvm-config are valid.
@ -546,6 +542,8 @@ if(LLVM_LINK_LLVM_DYLIB OR LLVM_BUILD_LLVM_C_DYLIB)
endif()
option(LLVM_BUILD_LLVM_DYLIB "Build libllvm dynamic library" ${LLVM_BUILD_LLVM_DYLIB_default})
option(LLVM_DYLIB_SYMBOL_VERSIONING OFF)
option(LLVM_OPTIMIZED_TABLEGEN "Force TableGen to be built with optimization" OFF)
if(CMAKE_CROSSCOMPILING OR (LLVM_OPTIMIZED_TABLEGEN AND (LLVM_ENABLE_ASSERTIONS OR CMAKE_CONFIGURATION_TYPES)))
set(LLVM_USE_HOST_TOOLS ON)
@ -576,6 +574,10 @@ if (LLVM_BUILD_STATIC)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
endif()
# Override the default target with an environment variable named by LLVM_TARGET_TRIPLE_ENV.
set(LLVM_TARGET_TRIPLE_ENV CACHE STRING "The name of environment variable to override default target. Disabled by blank.")
mark_as_advanced(LLVM_TARGET_TRIPLE_ENV)
# All options referred to from HandleLLVMOptions have to be specified
# BEFORE this include, otherwise options will not be correctly set on
# first cmake run
@ -641,7 +643,7 @@ endif (LLVM_USE_OPROFILE)
message(STATUS "Constructing LLVMBuild project information")
execute_process(
COMMAND ${PYTHON_EXECUTABLE} ${LLVMBUILDTOOL}
COMMAND ${PYTHON_EXECUTABLE} -B ${LLVMBUILDTOOL}
--native-target "${LLVM_NATIVE_ARCH}"
--enable-targets "${LLVM_TARGETS_TO_BUILD}"
--enable-optional-components "${LLVMOPTIONALCOMPONENTS}"
@ -737,6 +739,30 @@ configure_file(
${LLVM_MAIN_INCLUDE_DIR}/llvm/Support/DataTypes.h.cmake
${LLVM_INCLUDE_DIR}/llvm/Support/DataTypes.h)
# Add target for generating source rpm package.
set(LLVM_SRPM_USER_BINARY_SPECFILE ${CMAKE_CURRENT_SOURCE_DIR}/llvm.spec.in
CACHE FILEPATH ".spec file to use for srpm generation")
set(LLVM_SRPM_BINARY_SPECFILE ${CMAKE_CURRENT_BINARY_DIR}/llvm.spec)
set(LLVM_SRPM_DIR "${CMAKE_CURRENT_BINARY_DIR}/srpm")
# SVN_REVISION and GIT_COMMIT get set by the call to add_version_info_from_vcs.
# DUMMY_VAR contains a version string which we don't care about.
add_version_info_from_vcs(DUMMY_VAR)
if ( SVN_REVISION )
set(LLVM_RPM_SPEC_REVISION "r${SVN_REVISION}")
elseif ( GIT_COMMIT )
set (LLVM_RPM_SPEC_REVISION "g${GIT_COMMIT}")
endif()
configure_file(
${LLVM_SRPM_USER_BINARY_SPECFILE}
${LLVM_SRPM_BINARY_SPECFILE} @ONLY)
add_custom_target(srpm
COMMAND cpack -G TGZ --config CPackSourceConfig.cmake -B ${LLVM_SRPM_DIR}/SOURCES
COMMAND rpmbuild -bs --define '_topdir ${LLVM_SRPM_DIR}' ${LLVM_SRPM_BINARY_SPECFILE})
# They are not referenced. See set_output_directory().
set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/bin )
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX} )
@ -782,7 +808,8 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)")
endif(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)")
if( ${CMAKE_SYSTEM_NAME} MATCHES SunOS )
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include llvm/Support/Solaris.h")
# special hack for Solaris to handle crazy system sys/regset.h
include_directories("${LLVM_MAIN_INCLUDE_DIR}/llvm/Support/Solaris")
endif( ${CMAKE_SYSTEM_NAME} MATCHES SunOS )
# Make sure we don't get -rdynamic in every binary. For those that need it,
@ -836,7 +863,6 @@ if( LLVM_INCLUDE_UTILS )
add_subdirectory(utils/not)
add_subdirectory(utils/llvm-lit)
add_subdirectory(utils/yaml-bench)
add_subdirectory(utils/unittest)
else()
if ( LLVM_INCLUDE_TESTS )
message(FATAL_ERROR "Including tests when not building utils will not work.
@ -861,7 +887,9 @@ if( LLVM_INCLUDE_TOOLS )
add_subdirectory(tools)
endif()
add_subdirectory(runtimes)
if( LLVM_INCLUDE_RUNTIMES )
add_subdirectory(runtimes)
endif()
if( LLVM_INCLUDE_EXAMPLES )
add_subdirectory(examples)
@ -878,6 +906,10 @@ if( LLVM_INCLUDE_TESTS )
endif()
add_subdirectory(test)
add_subdirectory(unittests)
if( LLVM_INCLUDE_UTILS )
add_subdirectory(utils/unittest)
endif()
if (WIN32)
# This utility is used to prevent crashing tests from calling Dr. Watson on
# Windows.
@ -978,3 +1010,8 @@ if(LLVM_DISTRIBUTION_COMPONENTS)
endif()
endforeach()
endif()
# This allows us to deploy the Universal CRT DLLs by passing -DCMAKE_INSTALL_UCRT_LIBRARIES=ON to CMake
if (MSVC)
include(InstallRequiredSystemLibraries)
endif()

View File

@ -5,12 +5,9 @@ what goes in or not.
The list is sorted by surname and formatted to allow easy grepping and
beautification by scripts. The fields are: name (N), email (E), web-address
(W), PGP key ID and fingerprint (P), description (D), and snail-mail address
(S). Each entry should contain at least the (N), (E) and (D) fields.
N: Joe Abbey
E: jabbey@arxan.com
D: LLVM Bitcode (lib/Bitcode/* include/llvm/Bitcode/*)
(W), PGP key ID and fingerprint (P), description (D), snail-mail address
(S) and (I) IRC handle. Each entry should contain at least the (N), (E) and
(D) fields.
N: Justin Bogner
E: mail@justinbogner.com
@ -21,6 +18,11 @@ N: Alex Bradbury
E: asb@lowrisc.org
D: RISC-V backend (lib/Target/RISCV/*)
N: Matthias Braun
E: matze@braunis.de
I: MatzeB
D: Instruction Scheduling
N: Chandler Carruth
E: chandlerc@gmail.com
E: chandlerc@google.com
@ -34,6 +36,10 @@ N: Eric Christopher
E: echristo@gmail.com
D: Debug Information, inline assembly
N: Andrey Churbanov
E: andrey.churbanov@intel.com
D: OpenMP runtime library
N: Greg Clayton
E: gclayton@apple.com
D: LLDB
@ -48,7 +54,7 @@ D: libc++
N: Peter Collingbourne
E: peter@pcc.me.uk
D: llgo, libLTO (lib/LTO/* tools/lto/*)
D: llgo, libLTO (lib/LTO/* tools/lto/*), LLVM Bitcode (lib/Bitcode/* include/llvm/Bitcode/*)
N: Quentin Colombet
E: qcolombet@apple.com
@ -64,7 +70,7 @@ D: Branch weights and BlockFrequencyInfo
N: Hal Finkel
E: hfinkel@anl.gov
D: BBVectorize, the loop reroller, alias analysis and the PowerPC target
D: The loop reroller, alias analysis and the PowerPC target
N: Dan Gohman
E: sunfish@mozilla.com
@ -96,7 +102,7 @@ D: MCJIT, RuntimeDyld and JIT event listeners, Orcish Warchief
N: Teresa Johnson
E: tejohnson@google.com
D: Gold plugin (tools/gold/*)
D: Gold plugin (tools/gold/*) and IR Linker
N: Galina Kistanova
E: gkistanova@gmail.com
@ -132,7 +138,7 @@ E: david.majnemer@gmail.com
D: IR Constant Folder, InstCombine
N: Dylan McKay
E: dylanmckay34@gmail.com
E: me@dylanmckay.io
D: AVR Backend
N: Tim Northover
@ -180,9 +186,8 @@ E: alexei.starovoitov@gmail.com
D: BPF backend
N: Tom Stellard
E: thomas.stellard@amd.com
E: mesa-dev@lists.freedesktop.org
D: Release manager for the 3.5 and 3.6 branches, R600 Backend, libclc
E: tstellar@redhat.com
D: Stable release management (x.y.[1-9] releases), AMDGPU Backend, libclc
N: Evgeniy Stepanov
E: eugenis@google.com
@ -190,20 +195,13 @@ D: MemorySanitizer (LLVM part)
N: Craig Topper
E: craig.topper@gmail.com
E: craig.topper@intel.com
D: X86 Backend
N: Andrew Trick
E: atrick@apple.com
D: Instruction Scheduling
N: Ulrich Weigand
E: uweigand@de.ibm.com
D: SystemZ Backend
N: Teresa Johnson
E: tejohnson@google.com
D: IR Linker
N: Hans Wennborg
E: hans@chromium.org
D: Release management (x.y.0 releases)
@ -211,7 +209,3 @@ D: Release management (x.y.0 releases)
N: whitequark
E: whitequark@whitequark.org
D: OCaml bindings
N: Andrey Churbanov
E: andrey.churbanov@intel.com
D: OpenMP runtime library

View File

@ -220,7 +220,7 @@ W: http://randomhacks.net/
D: llvm-config script
N: Anton Korobeynikov
E: asl@math.spbu.ru
E: anton at korobeynikov dot info
D: Mingw32 fixes, cross-compiling support, stdcall/fastcall calling conv.
D: x86/linux PIC codegen, aliases, regparm/visibility attributes
D: Switch lowering refactoring
@ -265,7 +265,7 @@ D: Release manager (1.7+)
N: Sylvestre Ledru
E: sylvestre@debian.org
W: http://sylvestre.ledru.info/
W: http://llvm.org/apt/
W: http://apt.llvm.org/
D: Debian and Ubuntu packaging
D: Continuous integration with jenkins
@ -318,11 +318,12 @@ D: Support for implicit TLS model used with MS VC runtime
D: Dumping of Win64 EH structures
N: Takumi Nakamura
I: chapuni
E: geek4civic@gmail.com
E: chapuni@hf.rim.or.jp
D: Cygwin and MinGW support.
D: Win32 tweaks.
S: Yokohama, Japan
D: Maintaining the Git monorepo
W: https://github.com/llvm-project/
S: Ebina, Japan
N: Edward O'Callaghan
E: eocallaghan@auroraux.org
@ -457,6 +458,10 @@ N: Adam Treat
E: manyoso@yahoo.com
D: C++ bugs filed, and C++ front-end bug fixes.
N: Andrew Trick
E: atrick@apple.com
D: Instruction Scheduling, ...
N: Lauro Ramos Venancio
E: lauro.venancio@indt.org.br
D: ARM backend improvements

View File

@ -15,3 +15,4 @@ documentation setup.
If you are writing a package for LLVM, see docs/Packaging.rst for our
suggestions.

View File

@ -41,17 +41,12 @@ E: hans@chromium.org
T: x86
O: Windows
N: Renato Golin
E: renato.golin@linaro.org
T: ARM
O: Linux
N: Diana Picus
E: diana.picus@linaro.org
T: AArch64
T: ARM, AArch64
O: Linux
N: Vasileios Kalintiris
E: Vasileios.Kalintiris@imgtec.com
N: Simon Dardis
E: simon.dardis@imgtec.com
T: MIPS
O: Linux

View File

@ -19,8 +19,6 @@
using namespace llvm;
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(DIBuilder, LLVMDIBuilderRef)
LLVMDIBuilderRef LLVMNewDIBuilder(LLVMModuleRef mref) {
Module *m = unwrap(mref);
return wrap(new DIBuilder(*m));
@ -119,7 +117,8 @@ LLVMMetadataRef LLVMDIBuilderCreatePointerType(LLVMDIBuilderRef Dref,
const char *Name) {
DIBuilder *D = unwrap(Dref);
return wrap(D->createPointerType(unwrap<DIType>(PointeeType), SizeInBits,
AlignInBits, Name));
AlignInBits, /* DWARFAddressSpace */ None,
Name));
}
LLVMMetadataRef

View File

@ -14,6 +14,7 @@
#include "IRBindings.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
@ -71,6 +72,18 @@ void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Bref, unsigned Line,
InlinedAt ? unwrap<MDNode>(InlinedAt) : nullptr));
}
LLVMDebugLocMetadata LLVMGetCurrentDebugLocation2(LLVMBuilderRef Bref) {
const auto& Loc = unwrap(Bref)->getCurrentDebugLocation();
const auto* InlinedAt = Loc.getInlinedAt();
const LLVMDebugLocMetadata md{
Loc.getLine(),
Loc.getCol(),
wrap(Loc.getScope()),
InlinedAt == nullptr ? nullptr : wrap(InlinedAt->getRawInlinedAt()),
};
return md;
}
void LLVMSetSubprogram(LLVMValueRef Func, LLVMMetadataRef SP) {
unwrap<Function>(Func)->setSubprogram(unwrap<DISubprogram>(SP));
}

View File

@ -26,7 +26,12 @@
extern "C" {
#endif
typedef struct LLVMOpaqueMetadata *LLVMMetadataRef;
struct LLVMDebugLocMetadata{
unsigned Line;
unsigned Col;
LLVMMetadataRef Scope;
LLVMMetadataRef InlinedAt;
};
LLVMMetadataRef LLVMConstantAsMetadata(LLVMValueRef Val);
@ -46,21 +51,13 @@ void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Bref, unsigned Line,
unsigned Col, LLVMMetadataRef Scope,
LLVMMetadataRef InlinedAt);
struct LLVMDebugLocMetadata LLVMGetCurrentDebugLocation2(LLVMBuilderRef Bref);
void LLVMSetSubprogram(LLVMValueRef Fn, LLVMMetadataRef SP);
#ifdef __cplusplus
}
namespace llvm {
DEFINE_ISA_CONVERSION_FUNCTIONS(Metadata, LLVMMetadataRef)
inline Metadata **unwrap(LLVMMetadataRef *Vals) {
return reinterpret_cast<Metadata**>(Vals);
}
}
#endif
#endif

View File

@ -611,6 +611,12 @@ func (t Type) StructElementTypes() []Type {
}
// Operations on array, pointer, and vector types (sequence types)
func (t Type) Subtypes() (ret []Type) {
ret = make([]Type, C.LLVMGetNumContainedTypes(t.C))
C.LLVMGetSubtypes(t.C, llvmTypeRefPtr(&ret[0]))
return
}
func ArrayType(elementType Type, elementCount int) (t Type) {
t.C = C.LLVMArrayType(elementType.C, C.unsigned(elementCount))
return
@ -1226,9 +1232,23 @@ func (b Builder) InsertWithName(instr Value, name string) {
func (b Builder) Dispose() { C.LLVMDisposeBuilder(b.C) }
// Metadata
type DebugLoc struct {
Line, Col uint
Scope Metadata
InlinedAt Metadata
}
func (b Builder) SetCurrentDebugLocation(line, col uint, scope, inlinedAt Metadata) {
C.LLVMSetCurrentDebugLocation2(b.C, C.unsigned(line), C.unsigned(col), scope.C, inlinedAt.C)
}
// Get current debug location. Please do not call this function until setting debug location with SetCurrentDebugLocation()
func (b Builder) GetCurrentDebugLocation() (loc DebugLoc) {
md := C.LLVMGetCurrentDebugLocation2(b.C)
loc.Line = uint(md.Line)
loc.Col = uint(md.Col)
loc.Scope = Metadata{C: md.Scope}
loc.InlinedAt = Metadata{C: md.InlinedAt}
return
}
func (b Builder) SetInstDebugLocation(v Value) { C.LLVMSetInstDebugLocation(b.C, v.C) }
func (b Builder) InsertDeclare(module Module, storage Value, md Value) Value {
f := module.NamedFunction("llvm.dbg.declare")

View File

@ -95,3 +95,68 @@ func TestAttributes(t *testing.T) {
testAttribute(t, name)
}
}
func TestDebugLoc(t *testing.T) {
mod := NewModule("")
defer mod.Dispose()
ctx := mod.Context()
b := ctx.NewBuilder()
defer b.Dispose()
d := NewDIBuilder(mod)
defer func() {
d.Destroy()
}()
file := d.CreateFile("dummy_file", "dummy_dir")
voidInfo := d.CreateBasicType(DIBasicType{Name: "void"})
typeInfo := d.CreateSubroutineType(DISubroutineType{file, []Metadata{voidInfo}})
scope := d.CreateFunction(file, DIFunction{
Name: "foo",
LinkageName: "foo",
Line: 10,
ScopeLine: 10,
Type: typeInfo,
File: file,
IsDefinition: true,
})
b.SetCurrentDebugLocation(10, 20, scope, Metadata{})
loc := b.GetCurrentDebugLocation()
if loc.Line != 10 {
t.Errorf("Got line %d, though wanted 10", loc.Line)
}
if loc.Col != 20 {
t.Errorf("Got column %d, though wanted 20", loc.Col)
}
if loc.Scope.C != scope.C {
t.Errorf("Got metadata %v as scope, though wanted %v", loc.Scope.C, scope.C)
}
}
func TestSubtypes(t *testing.T) {
cont := NewContext()
defer cont.Dispose()
int_pointer := PointerType(cont.Int32Type(), 0)
int_inner := int_pointer.Subtypes()
if len(int_inner) != 1 {
t.Errorf("Got size %d, though wanted 1")
}
if int_inner[0] != cont.Int32Type() {
t.Errorf("Expected int32 type")
}
st_pointer := cont.StructType([]Type{cont.Int32Type(), cont.Int8Type()}, false)
st_inner := st_pointer.Subtypes()
if len(st_inner) != 2 {
t.Errorf("Got size %d, though wanted 2")
}
if st_inner[0] != cont.Int32Type() {
t.Errorf("Expected first struct field to be int32")
}
if st_inner[1] != cont.Int8Type() {
t.Errorf("Expected second struct field to be int8")
}
}

View File

@ -43,6 +43,26 @@ func (pmb PassManagerBuilder) PopulateFunc(pm PassManager) {
C.LLVMPassManagerBuilderPopulateFunctionPassManager(pmb.C, pm.C)
}
func (pmb PassManagerBuilder) PopulateLTOPassManager(pm PassManager, internalize bool, runInliner bool) {
C.LLVMPassManagerBuilderPopulateLTOPassManager(pmb.C, pm.C, boolToLLVMBool(internalize), boolToLLVMBool(runInliner))
}
func (pmb PassManagerBuilder) Dispose() {
C.LLVMPassManagerBuilderDispose(pmb.C)
}
func (pmb PassManagerBuilder) SetDisableUnitAtATime(val bool) {
C.LLVMPassManagerBuilderSetDisableUnitAtATime(pmb.C, boolToLLVMBool(val))
}
func (pmb PassManagerBuilder) SetDisableUnrollLoops(val bool) {
C.LLVMPassManagerBuilderSetDisableUnrollLoops(pmb.C, boolToLLVMBool(val))
}
func (pmb PassManagerBuilder) SetDisableSimplifyLibCalls(val bool) {
C.LLVMPassManagerBuilderSetDisableSimplifyLibCalls(pmb.C, boolToLLVMBool(val))
}
func (pmb PassManagerBuilder) UseInlinerWithThreshold(threshold uint) {
C.LLVMPassManagerBuilderUseInlinerWithThreshold(pmb.C, C.uint(threshold))
}

View File

@ -20,6 +20,10 @@ type llattribute
type llmemorybuffer
type llmdkind
exception FeatureDisabled of string
let () = Callback.register_exception "Llvm.FeatureDisabled" (FeatureDisabled "")
module TypeKind = struct
type t =
| Void
@ -459,6 +463,8 @@ external is_packed : lltype -> bool = "llvm_is_packed"
external is_opaque : lltype -> bool = "llvm_is_opaque"
(*--... Operations on pointer, vector, and array types .....................--*)
external subtypes : lltype -> lltype array = "llvm_subtypes"
external array_type : lltype -> int -> lltype = "llvm_array_type"
external pointer_type : lltype -> lltype = "llvm_pointer_type"
external qualified_pointer_type : lltype -> int -> lltype

View File

@ -371,6 +371,8 @@ type ('a, 'b) llrev_pos =
(** {6 Exceptions} *)
exception FeatureDisabled of string
exception IoError of string
@ -658,6 +660,9 @@ val is_opaque : lltype -> bool
(** {7 Operations on pointer, vector, and array types} *)
(** [subtypes ty] returns [ty]'s subtypes *)
val subtypes : lltype -> lltype array
(** [array_type ty n] returns the array type containing [n] elements of type
[ty]. See the method [llvm::ArrayType::get]. *)
val array_type : lltype -> int -> lltype

View File

@ -336,7 +336,12 @@ CAMLprim LLVMContextRef llvm_type_context(LLVMTypeRef Ty) {
/* lltype -> unit */
CAMLprim value llvm_dump_type(LLVMTypeRef Val) {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVMDumpType(Val);
#else
caml_raise_with_arg(*caml_named_value("Llvm.FeatureDisabled"),
caml_copy_string("dump"));
#endif
return Val_unit;
}
@ -506,6 +511,20 @@ CAMLprim value llvm_is_opaque(LLVMTypeRef StructTy) {
/*--... Operations on array, pointer, and vector types .....................--*/
/* lltype -> lltype array */
CAMLprim value llvm_subtypes(LLVMTypeRef Ty) {
CAMLparam0();
CAMLlocal1(Arr);
unsigned Size = LLVMGetNumContainedTypes(Ty);
Arr = caml_alloc(Size, 0);
LLVMGetSubtypes(Ty, (LLVMTypeRef *) Arr);
CAMLreturn(Arr);
}
/* lltype -> int -> lltype */
CAMLprim LLVMTypeRef llvm_array_type(LLVMTypeRef ElementTy, value Count) {
return LLVMArrayType(ElementTy, Int_val(Count));

View File

@ -77,7 +77,7 @@ CAMLprim value llvm_datalayout_pointer_size(value DL) {
/* Llvm.llcontext -> DataLayout.t -> Llvm.lltype */
CAMLprim LLVMTypeRef llvm_datalayout_intptr_type(LLVMContextRef C, value DL) {
return LLVMIntPtrTypeInContext(C, DataLayout_val(DL));;
return LLVMIntPtrTypeInContext(C, DataLayout_val(DL));
}
/* int -> DataLayout.t -> int */

View File

@ -46,7 +46,6 @@ endfunction()
check_include_file(dirent.h HAVE_DIRENT_H)
check_include_file(dlfcn.h HAVE_DLFCN_H)
check_include_file(errno.h HAVE_ERRNO_H)
check_include_file(execinfo.h HAVE_EXECINFO_H)
check_include_file(fcntl.h HAVE_FCNTL_H)
check_include_file(inttypes.h HAVE_INTTYPES_H)
check_include_file(link.h HAVE_LINK_H)
@ -88,6 +87,15 @@ if(APPLE)
HAVE_CRASHREPORTER_INFO)
endif()
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
check_include_file(linux/magic.h HAVE_LINUX_MAGIC_H)
if(NOT HAVE_LINUX_MAGIC_H)
# older kernels use split files
check_include_file(linux/nfs_fs.h HAVE_LINUX_NFS_FS_H)
check_include_file(linux/smb.h HAVE_LINUX_SMB_H)
endif()
endif()
# library checks
if( NOT PURE_WINDOWS )
check_library_exists(pthread pthread_create "" HAVE_LIBPTHREAD)
@ -115,7 +123,7 @@ if(HAVE_LIBPTHREAD)
set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
set(THREADS_HAVE_PTHREAD_ARG Off)
find_package(Threads REQUIRED)
set(PTHREAD_LIB ${CMAKE_THREAD_LIBS_INIT})
set(LLVM_PTHREAD_LIB ${CMAKE_THREAD_LIBS_INIT})
endif()
# Don't look for these libraries on Windows. Also don't look for them if we're
@ -156,7 +164,9 @@ endif()
# function checks
check_symbol_exists(arc4random "stdlib.h" HAVE_DECL_ARC4RANDOM)
check_symbol_exists(backtrace "execinfo.h" HAVE_BACKTRACE)
find_package(Backtrace)
set(HAVE_BACKTRACE ${Backtrace_FOUND})
set(BACKTRACE_HEADER ${Backtrace_HEADER})
check_symbol_exists(_Unwind_Backtrace "unwind.h" HAVE__UNWIND_BACKTRACE)
check_symbol_exists(getpagesize unistd.h HAVE_GETPAGESIZE)
check_symbol_exists(sysconf unistd.h HAVE_SYSCONF)
@ -227,6 +237,7 @@ if( HAVE_DLFCN_H )
list(APPEND CMAKE_REQUIRED_LIBRARIES dl)
endif()
check_symbol_exists(dlopen dlfcn.h HAVE_DLOPEN)
check_symbol_exists(dladdr dlfcn.h HAVE_DLADDR)
if( HAVE_LIBDL )
list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES dl)
endif()
@ -234,7 +245,15 @@ endif()
check_symbol_exists(__GLIBC__ stdio.h LLVM_USING_GLIBC)
if( LLVM_USING_GLIBC )
add_llvm_definitions( -D_GNU_SOURCE )
add_definitions( -D_GNU_SOURCE )
endif()
# This check requires _GNU_SOURCE
if(HAVE_LIBPTHREAD)
check_library_exists(pthread pthread_getname_np "" HAVE_PTHREAD_GETNAME_NP)
check_library_exists(pthread pthread_setname_np "" HAVE_PTHREAD_SETNAME_NP)
elseif(PTHREAD_IN_LIBC)
check_library_exists(c pthread_getname_np "" HAVE_PTHREAD_GETNAME_NP)
check_library_exists(c pthread_setname_np "" HAVE_PTHREAD_SETNAME_NP)
endif()
set(headers "sys/types.h")
@ -489,8 +508,6 @@ if (LLVM_ENABLE_ZLIB )
endif()
endif()
set(LLVM_PREFIX ${CMAKE_INSTALL_PREFIX})
if (LLVM_ENABLE_DOXYGEN)
message(STATUS "Doxygen enabled.")
find_package(Doxygen REQUIRED)
@ -513,16 +530,6 @@ else()
message(STATUS "Doxygen disabled.")
endif()
if (LLVM_ENABLE_SPHINX)
message(STATUS "Sphinx enabled.")
find_package(Sphinx REQUIRED)
if (LLVM_BUILD_DOCS)
add_custom_target(sphinx ALL)
endif()
else()
message(STATUS "Sphinx disabled.")
endif()
set(LLVM_BINDINGS "")
if(WIN32)
message(STATUS "Go bindings disabled.")
@ -547,6 +554,9 @@ set(LLVM_BINUTILS_INCDIR "" CACHE PATH
"PATH to binutils/include containing plugin-api.h for gold plugin.")
if(CMAKE_HOST_APPLE AND APPLE)
if(NOT CMAKE_XCRUN)
find_program(CMAKE_XCRUN NAMES xcrun)
endif()
if(CMAKE_XCRUN)
execute_process(COMMAND ${CMAKE_XCRUN} -find ld
OUTPUT_VARIABLE LD64_EXECUTABLE

View File

@ -81,8 +81,9 @@ function(add_llvm_symbol_exports target_name export_file)
# Gold and BFD ld require a version script rather than a plain list.
set(native_export_file "${target_name}.exports")
# FIXME: Don't write the "local:" line on OpenBSD.
# in the export file, also add a linker script to version LLVM symbols (form: LLVM_N.M)
add_custom_command(OUTPUT ${native_export_file}
COMMAND echo "{" > ${native_export_file}
COMMAND echo "LLVM_${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR} {" > ${native_export_file}
COMMAND grep -q "[[:alnum:]]" ${export_file} && echo " global:" >> ${native_export_file} || :
COMMAND sed -e "s/$/;/" -e "s/^/ /" < ${export_file} >> ${native_export_file}
COMMAND echo " local: *;" >> ${native_export_file}
@ -90,7 +91,7 @@ function(add_llvm_symbol_exports target_name export_file)
DEPENDS ${export_file}
VERBATIM
COMMENT "Creating export file for ${target_name}")
if (${CMAKE_SYSTEM_NAME} MATCHES "SunOS")
if (${LLVM_LINKER_IS_SOLARISLD})
set_property(TARGET ${target_name} APPEND_STRING PROPERTY
LINK_FLAGS " -Wl,-M,${CMAKE_CURRENT_BINARY_DIR}/${native_export_file}")
else()
@ -147,13 +148,28 @@ function(add_llvm_symbol_exports target_name export_file)
endfunction(add_llvm_symbol_exports)
if(NOT WIN32 AND NOT APPLE)
# Detect what linker we have here
execute_process(
COMMAND ${CMAKE_C_COMPILER} -Wl,--version
OUTPUT_VARIABLE stdout
ERROR_QUIET
ERROR_VARIABLE stderr
)
set(LLVM_LINKER_DETECTED ON)
if("${stdout}" MATCHES "GNU gold")
set(LLVM_LINKER_IS_GOLD ON)
message(STATUS "Linker detection: GNU Gold")
elseif("${stdout}" MATCHES "^LLD")
set(LLVM_LINKER_IS_LLD ON)
message(STATUS "Linker detection: LLD")
elseif("${stdout}" MATCHES "GNU ld")
set(LLVM_LINKER_IS_GNULD ON)
message(STATUS "Linker detection: GNU ld")
elseif("${stderr}" MATCHES "Solaris Link Editors")
set(LLVM_LINKER_IS_SOLARISLD ON)
message(STATUS "Linker detection: Solaris ld")
else()
set(LLVM_LINKER_DETECTED OFF)
message(STATUS "Linker detection: unknown")
endif()
endif()
@ -718,11 +734,11 @@ macro(add_llvm_executable name)
if(NOT ARG_IGNORE_EXTERNALIZE_DEBUGINFO)
llvm_externalize_debuginfo(${name})
endif()
if (PTHREAD_LIB)
if (LLVM_PTHREAD_LIB)
# libpthreads overrides some standard library symbols, so main
# executable must be linked with it in order to provide consistent
# API for all shared libaries loaded by this executable.
target_link_libraries(${name} ${PTHREAD_LIB})
target_link_libraries(${name} ${LLVM_PTHREAD_LIB})
endif()
endmacro(add_llvm_executable name)
@ -864,7 +880,7 @@ macro(add_llvm_utility name)
set_target_properties(${name} PROPERTIES FOLDER "Utils")
if( LLVM_INSTALL_UTILS AND LLVM_BUILD_UTILS )
install (TARGETS ${name}
RUNTIME DESTINATION bin
RUNTIME DESTINATION ${LLVM_UTILS_INSTALL_DIR}
COMPONENT ${name})
if (NOT CMAKE_CONFIGURATION_TYPES)
add_custom_target(install-${name}
@ -1027,7 +1043,7 @@ function(add_unittest test_suite test_name)
# libpthreads overrides some standard library symbols, so main
# executable must be linked with it in order to provide consistent
# API for all shared libaries loaded by this executable.
target_link_libraries(${test_name} gtest_main gtest ${PTHREAD_LIB})
target_link_libraries(${test_name} gtest_main gtest ${LLVM_PTHREAD_LIB})
add_dependencies(${test_suite} ${test_name})
get_target_property(test_suite_folder ${test_suite} FOLDER)
@ -1132,6 +1148,19 @@ function(configure_lit_site_cfg input output)
set(LIT_SITE_CFG_IN_HEADER "## Autogenerated from ${input}\n## Do not edit!")
# Override config_target_triple (and the env)
if(LLVM_TARGET_TRIPLE_ENV)
# This is expanded into the heading.
string(CONCAT LIT_SITE_CFG_IN_HEADER "${LIT_SITE_CFG_IN_HEADER}\n\n"
"import os\n"
"target_env = \"${LLVM_TARGET_TRIPLE_ENV}\"\n"
"config.target_triple = config.environment[target_env] = os.environ.get(target_env, \"${TARGET_TRIPLE}\")\n"
)
# This is expanded to; config.target_triple = ""+config.target_triple+""
set(TARGET_TRIPLE "\"+config.target_triple+\"")
endif()
configure_file(${input} ${output} @ONLY)
endfunction()
@ -1145,11 +1174,6 @@ function(add_lit_target target comment)
list(APPEND LIT_ARGS --param build_mode=${CMAKE_CFG_INTDIR})
endif ()
if (EXISTS ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py)
# reset cache after erraneous r283029
# TODO: remove this once all buildbots run
if (LIT_COMMAND STREQUAL "${PYTHON_EXECUTABLE} ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py")
unset(LIT_COMMAND CACHE)
endif()
set (LIT_COMMAND "${PYTHON_EXECUTABLE};${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py"
CACHE STRING "Command used to spawn llvm-lit")
else()
@ -1387,7 +1411,11 @@ function(llvm_externalize_debuginfo name)
endif()
if(NOT LLVM_EXTERNALIZE_DEBUGINFO_SKIP_STRIP)
set(strip_command COMMAND xcrun strip -Sxl $<TARGET_FILE:${name}>)
if(APPLE)
set(strip_command COMMAND xcrun strip -Sxl $<TARGET_FILE:${name}>)
else()
set(strip_command COMMAND strip -gx $<TARGET_FILE:${name}>)
endif()
endif()
if(APPLE)
@ -1403,7 +1431,11 @@ function(llvm_externalize_debuginfo name)
${strip_command}
)
else()
message(FATAL_ERROR "LLVM_EXTERNALIZE_DEBUGINFO isn't implemented for non-darwin platforms!")
add_custom_command(TARGET ${name} POST_BUILD
COMMAND objcopy --only-keep-debug $<TARGET_FILE:${name}> $<TARGET_FILE:${name}>.debug
${strip_command} -R .gnu_debuglink
COMMAND objcopy --add-gnu-debuglink=$<TARGET_FILE:${name}>.debug $<TARGET_FILE:${name}>
)
endif()
endfunction()

View File

@ -87,6 +87,11 @@ function(add_ocaml_library name)
foreach( include_dir ${LLVM_INCLUDE_DIR} ${LLVM_MAIN_INCLUDE_DIR} )
set(c_flags "${c_flags} -I${include_dir}")
endforeach()
# include -D/-UNDEBUG to match dump function visibility
# regex from HandleLLVMOptions.cmake
string(REGEX MATCH "(^| )[/-][UD] *NDEBUG($| )" flag_matches
"${CMAKE_C_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${CMAKE_C_FLAGS}")
set(c_flags "${c_flags} ${flag_matches}")
foreach( ocaml_file ${ARG_OCAML} )
list(APPEND sources "${ocaml_file}.mli" "${ocaml_file}.ml")
@ -199,7 +204,7 @@ function(add_ocaml_library name)
PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE
GROUP_READ GROUP_EXECUTE
WORLD_READ WORLD_EXECUTE
DESTINATION "${LLVM_OCAML_INSTALL_PATH}/llvm")
DESTINATION "${LLVM_OCAML_INSTALL_PATH}/stublibs")
foreach( install_file ${install_files} ${install_shlibs} )
get_filename_component(filename "${install_file}" NAME)

View File

@ -1,3 +1,16 @@
# Create sphinx target
if (LLVM_ENABLE_SPHINX)
message(STATUS "Sphinx enabled.")
find_package(Sphinx REQUIRED)
if (LLVM_BUILD_DOCS AND NOT TARGET sphinx)
add_custom_target(sphinx ALL)
endif()
else()
message(STATUS "Sphinx disabled.")
endif()
# Handy function for creating the different Sphinx targets.
#
# ``builder`` should be one of the supported builders used by
@ -48,10 +61,15 @@ function (add_sphinx_target builder project)
# Handle installation
if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
if (builder STREQUAL man)
if (CMAKE_INSTALL_MANDIR)
set(INSTALL_MANDIR ${CMAKE_INSTALL_MANDIR}/)
else()
set(INSTALL_MANDIR share/man/)
endif()
# FIXME: We might not ship all the tools that these man pages describe
install(DIRECTORY "${SPHINX_BUILD_DIR}/" # Slash indicates contents of
COMPONENT "${project}-sphinx-man"
DESTINATION share/man/man1)
DESTINATION ${INSTALL_MANDIR}man1)
elseif (builder STREQUAL html)
string(TOUPPER "${project}" project_upper)

View File

@ -8,12 +8,47 @@ string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
include(CheckCompilerVersion)
include(HandleLLVMStdlib)
include(AddLLVMDefinitions)
include(CheckCCompilerFlag)
include(CheckCXXCompilerFlag)
if(CMAKE_LINKER MATCHES "lld-link.exe" OR (WIN32 AND LLVM_USE_LINKER STREQUAL "lld"))
set(LINKER_IS_LLD_LINK TRUE)
else()
set(LINKER_IS_LLD_LINK FALSE)
endif()
if (CMAKE_LINKER MATCHES "lld-link.exe")
set(LLVM_ENABLE_LTO OFF CACHE STRING "Build LLVM with LTO. May be specified as Thin or Full to use a particular kind of LTO")
string(TOUPPER "${LLVM_ENABLE_LTO}" uppercase_LLVM_ENABLE_LTO)
# Ninja Job Pool support
# The following only works with the Ninja generator in CMake >= 3.0.
set(LLVM_PARALLEL_COMPILE_JOBS "" CACHE STRING
"Define the maximum number of concurrent compilation jobs.")
if(LLVM_PARALLEL_COMPILE_JOBS)
if(NOT CMAKE_MAKE_PROGRAM MATCHES "ninja")
message(WARNING "Job pooling is only available with Ninja generators.")
else()
set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool=${LLVM_PARALLEL_COMPILE_JOBS})
set(CMAKE_JOB_POOL_COMPILE compile_job_pool)
endif()
endif()
set(LLVM_PARALLEL_LINK_JOBS "" CACHE STRING
"Define the maximum number of concurrent link jobs.")
if(CMAKE_MAKE_PROGRAM MATCHES "ninja")
if(NOT LLVM_PARALLEL_LINK_JOBS AND uppercase_LLVM_ENABLE_LTO STREQUAL "THIN")
message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.")
set(LLVM_PARALLEL_LINK_JOBS "2")
endif()
if(LLVM_PARALLEL_LINK_JOBS)
set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${LLVM_PARALLEL_LINK_JOBS})
set(CMAKE_JOB_POOL_LINK link_job_pool)
endif()
elseif(LLVM_PARALLEL_LINK_JOBS)
message(WARNING "Job pooling is only available with Ninja generators.")
endif()
if (LINKER_IS_LLD_LINK)
# Pass /MANIFEST:NO so that CMake doesn't run mt.exe on our binaries. Adding
# manifests with mt.exe breaks LLD's symbol tables and takes as much time as
# the link. See PR24476.
@ -66,6 +101,10 @@ else()
message(FATAL_ERROR "Unknown value for LLVM_ABI_BREAKING_CHECKS: \"${LLVM_ABI_BREAKING_CHECKS}\"!")
endif()
if( LLVM_REVERSE_ITERATION )
set( LLVM_ENABLE_REVERSE_ITERATION 1 )
endif()
if(WIN32)
set(LLVM_HAVE_LINK_VERSION_SCRIPT 0)
if(CYGWIN)
@ -147,9 +186,19 @@ function(add_flag_or_print_warning flag name)
endif()
endfunction()
if(LLVM_ENABLE_LLD)
check_cxx_compiler_flag("-fuse-ld=lld" CXX_SUPPORTS_LLD)
append_if(CXX_SUPPORTS_LLD "-fuse-ld=lld"
if( LLVM_ENABLE_LLD )
if ( LLVM_USE_LINKER )
message(FATAL_ERROR "LLVM_ENABLE_LLD and LLVM_USE_LINKER can't be set at the same time")
endif()
set(LLVM_USE_LINKER "lld")
endif()
if( LLVM_USE_LINKER )
check_cxx_compiler_flag("-fuse-ld=${LLVM_USE_LINKER}" CXX_SUPPORTS_CUSTOM_LINKER)
if ( NOT CXX_SUPPORTS_CUSTOM_LINKER )
message(FATAL_ERROR "Host compiler does not support '-fuse-ld=${LLVM_USE_LINKER}'")
endif()
append("-fuse-ld=${LLVM_USE_LINKER}"
CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
endif()
@ -183,6 +232,13 @@ if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
endif( LLVM_BUILD_32_BITS )
endif( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
# If building on a GNU specific 32-bit system, make sure off_t is 64 bits
# so that off_t can stored offset > 2GB
if( CMAKE_SIZEOF_VOID_P EQUAL 4 )
add_definitions( -D_LARGEFILE_SOURCE )
add_definitions( -D_FILE_OFFSET_BITS=64 )
endif()
if( XCODE )
# For Xcode enable several build settings that correspond to
# many warnings that are on by default in Clang but are
@ -213,10 +269,10 @@ if( MSVC_IDE )
"Number of parallel compiler jobs. 0 means use all processors. Default is 0.")
if( NOT LLVM_COMPILER_JOBS STREQUAL "1" )
if( LLVM_COMPILER_JOBS STREQUAL "0" )
add_llvm_definitions( /MP )
add_definitions( /MP )
else()
message(STATUS "Number of parallel compiler jobs set to " ${LLVM_COMPILER_JOBS})
add_llvm_definitions( /MP${LLVM_COMPILER_JOBS} )
add_definitions( /MP${LLVM_COMPILER_JOBS} )
endif()
else()
message(STATUS "Parallel compilation disabled")
@ -245,17 +301,17 @@ if( MSVC )
if( CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.0 )
# For MSVC 2013, disable iterator null pointer checking in debug mode,
# especially so std::equal(nullptr, nullptr, nullptr) will not assert.
add_llvm_definitions("-D_DEBUG_POINTER_IMPL=")
add_definitions("-D_DEBUG_POINTER_IMPL=")
endif()
include(ChooseMSVCCRT)
if( MSVC11 )
add_llvm_definitions(-D_VARIADIC_MAX=10)
add_definitions(-D_VARIADIC_MAX=10)
endif()
# Add definitions that make MSVC much less annoying.
add_llvm_definitions(
add_definitions(
# For some reason MS wants to deprecate a bunch of standard functions...
-D_CRT_SECURE_NO_DEPRECATE
-D_CRT_SECURE_NO_WARNINGS
@ -266,94 +322,15 @@ if( MSVC )
)
# Tell MSVC to use the Unicode version of the Win32 APIs instead of ANSI.
add_llvm_definitions(
add_definitions(
-DUNICODE
-D_UNICODE
)
set(msvc_warning_flags
# Disabled warnings.
-wd4141 # Suppress ''modifier' : used more than once' (because of __forceinline combined with inline)
-wd4146 # Suppress 'unary minus operator applied to unsigned type, result still unsigned'
-wd4180 # Suppress 'qualifier applied to function type has no meaning; ignored'
-wd4244 # Suppress ''argument' : conversion from 'type1' to 'type2', possible loss of data'
-wd4258 # Suppress ''var' : definition from the for loop is ignored; the definition from the enclosing scope is used'
-wd4267 # Suppress ''var' : conversion from 'size_t' to 'type', possible loss of data'
-wd4291 # Suppress ''declaration' : no matching operator delete found; memory will not be freed if initialization throws an exception'
-wd4345 # Suppress 'behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized'
-wd4351 # Suppress 'new behavior: elements of array 'array' will be default initialized'
-wd4355 # Suppress ''this' : used in base member initializer list'
-wd4456 # Suppress 'declaration of 'var' hides local variable'
-wd4457 # Suppress 'declaration of 'var' hides function parameter'
-wd4458 # Suppress 'declaration of 'var' hides class member'
-wd4459 # Suppress 'declaration of 'var' hides global declaration'
-wd4503 # Suppress ''identifier' : decorated name length exceeded, name was truncated'
-wd4624 # Suppress ''derived class' : destructor could not be generated because a base class destructor is inaccessible'
-wd4722 # Suppress 'function' : destructor never returns, potential memory leak
-wd4800 # Suppress ''type' : forcing value to bool 'true' or 'false' (performance warning)'
-wd4100 # Suppress 'unreferenced formal parameter'
-wd4127 # Suppress 'conditional expression is constant'
-wd4512 # Suppress 'assignment operator could not be generated'
-wd4505 # Suppress 'unreferenced local function has been removed'
-wd4610 # Suppress '<class> can never be instantiated'
-wd4510 # Suppress 'default constructor could not be generated'
-wd4702 # Suppress 'unreachable code'
-wd4245 # Suppress 'signed/unsigned mismatch'
-wd4706 # Suppress 'assignment within conditional expression'
-wd4310 # Suppress 'cast truncates constant value'
-wd4701 # Suppress 'potentially uninitialized local variable'
-wd4703 # Suppress 'potentially uninitialized local pointer variable'
-wd4389 # Suppress 'signed/unsigned mismatch'
-wd4611 # Suppress 'interaction between '_setjmp' and C++ object destruction is non-portable'
-wd4805 # Suppress 'unsafe mix of type <type> and type <type> in operation'
-wd4204 # Suppress 'nonstandard extension used : non-constant aggregate initializer'
-wd4577 # Suppress 'noexcept used with no exception handling mode specified; termination on exception is not guaranteed'
-wd4091 # Suppress 'typedef: ignored on left of '' when no variable is declared'
# C4592 is disabled because of false positives in Visual Studio 2015
# Update 1. Re-evaluate the usefulness of this diagnostic with Update 2.
-wd4592 # Suppress ''var': symbol will be dynamically initialized (implementation limitation)
-wd4319 # Suppress ''operator' : zero extending 'type' to 'type' of greater size'
# Ideally, we'd like this warning to be enabled, but MSVC 2013 doesn't
# support the 'aligned' attribute in the way that clang sources requires (for
# any code that uses the LLVM_ALIGNAS macro), so this is must be disabled to
# avoid unwanted alignment warnings.
# When we switch to requiring a version of MSVC that supports the 'alignas'
# specifier (MSVC 2015?) this warning can be re-enabled.
-wd4324 # Suppress 'structure was padded due to __declspec(align())'
# Promoted warnings.
-w14062 # Promote 'enumerator in switch of enum is not handled' to level 1 warning.
# Promoted warnings to errors.
-we4238 # Promote 'nonstandard extension used : class rvalue used as lvalue' to error.
)
# Enable warnings
if (LLVM_ENABLE_WARNINGS)
# Put /W4 in front of all the -we flags. cl.exe doesn't care, but for
# clang-cl having /W4 after the -we flags will re-enable the warnings
# disabled by -we.
set(msvc_warning_flags "/W4 ${msvc_warning_flags}")
# CMake appends /W3 by default, and having /W3 followed by /W4 will result in
# cl : Command line warning D9025 : overriding '/W3' with '/W4'. Since this is
# a command line warning and not a compiler warning, it cannot be suppressed except
# by fixing the command line.
string(REGEX REPLACE " /W[0-4]" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
string(REGEX REPLACE " /W[0-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
if (LLVM_ENABLE_PEDANTIC)
# No MSVC equivalent available
endif (LLVM_ENABLE_PEDANTIC)
endif (LLVM_ENABLE_WARNINGS)
if (LLVM_ENABLE_WERROR)
append("/WX" msvc_warning_flags)
append("/WX" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endif (LLVM_ENABLE_WERROR)
foreach(flag ${msvc_warning_flags})
append("${flag}" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endforeach(flag)
append("/Zc:inline" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
# /Zc:strictStrings is incompatible with VS12's (Visual Studio 2013's)
@ -373,11 +350,13 @@ if( MSVC )
# "Enforce type conversion rules".
append("/Zc:rvalueCast" CMAKE_CXX_FLAGS)
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT LLVM_ENABLE_LTO)
# clang-cl and cl by default produce non-deterministic binaries because
# link.exe /incremental requires a timestamp in the .obj file. clang-cl
# has the flag /Brepro to force deterministic binaries. We want to pass that
# whenever you're building with clang unless you're passing /incremental.
# whenever you're building with clang unless you're passing /incremental
# or using LTO (/Brepro with LTO would result in a warning about the flag
# being unused, because we're not generating object files).
# This checks CMAKE_CXX_COMPILER_ID in addition to check_cxx_compiler_flag()
# because cl.exe does not emit an error on flags it doesn't understand,
# letting check_cxx_compiler_flag() claim it understands all flags.
@ -401,68 +380,14 @@ if( MSVC )
endif()
elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE )
if (LLVM_ENABLE_WARNINGS)
append("-Wall -W -Wno-unused-parameter -Wwrite-strings" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
append("-Wcast-qual" CMAKE_CXX_FLAGS)
# Turn off missing field initializer warnings for gcc to avoid noise from
# false positives with empty {}. Turn them on otherwise (they're off by
# default for clang).
check_cxx_compiler_flag("-Wmissing-field-initializers" CXX_SUPPORTS_MISSING_FIELD_INITIALIZERS_FLAG)
if (CXX_SUPPORTS_MISSING_FIELD_INITIALIZERS_FLAG)
if (CMAKE_COMPILER_IS_GNUCXX)
append("-Wno-missing-field-initializers" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
else()
append("-Wmissing-field-initializers" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endif()
endif()
append_if(LLVM_ENABLE_PEDANTIC "-pedantic" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
append_if(LLVM_ENABLE_PEDANTIC "-Wno-long-long" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
add_flag_if_supported("-Wcovered-switch-default" COVERED_SWITCH_DEFAULT_FLAG)
append_if(USE_NO_UNINITIALIZED "-Wno-uninitialized" CMAKE_CXX_FLAGS)
append_if(USE_NO_MAYBE_UNINITIALIZED "-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS)
# Check if -Wnon-virtual-dtor warns even though the class is marked final.
# If it does, don't add it. So it won't be added on clang 3.4 and older.
# This also catches cases when -Wnon-virtual-dtor isn't supported by
# the compiler at all. This flag is not activated for gcc since it will
# incorrectly identify a protected non-virtual base when there is a friend
# declaration.
if (NOT CMAKE_COMPILER_IS_GNUCXX)
set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++11 -Werror=non-virtual-dtor")
CHECK_CXX_SOURCE_COMPILES("class base {public: virtual void anchor();protected: ~base();};
class derived final : public base { public: ~derived();};
int main() { return 0; }"
CXX_WONT_WARN_ON_FINAL_NONVIRTUALDTOR)
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
append_if(CXX_WONT_WARN_ON_FINAL_NONVIRTUALDTOR
"-Wnon-virtual-dtor" CMAKE_CXX_FLAGS)
endif()
# Enable -Wdelete-non-virtual-dtor if available.
add_flag_if_supported("-Wdelete-non-virtual-dtor" DELETE_NON_VIRTUAL_DTOR_FLAG)
# Check if -Wcomment is OK with an // comment ending with '\' if the next
# line is also a // comment.
set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror -Wcomment")
CHECK_C_SOURCE_COMPILES("// \\\\\\n//\\nint main() {return 0;}"
C_WCOMMENT_ALLOWS_LINE_WRAP)
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
if (NOT C_WCOMMENT_ALLOWS_LINE_WRAP)
append("-Wno-comment" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endif()
# Enable -Wstring-conversion to catch misuse of string literals.
add_flag_if_supported("-Wstring-conversion" STRING_CONVERSION_FLAG)
endif (LLVM_ENABLE_WARNINGS)
append_if(LLVM_ENABLE_WERROR "-Werror" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
add_flag_if_supported("-Werror=date-time" WERROR_DATE_TIME)
if (LLVM_ENABLE_CXX1Y)
check_cxx_compiler_flag("-std=c++1y" CXX_SUPPORTS_CXX1Y)
append_if(CXX_SUPPORTS_CXX1Y "-std=c++1y" CMAKE_CXX_FLAGS)
elseif(LLVM_ENABLE_CXX1Z)
check_cxx_compiler_flag("-std=c++1z" CXX_SUPPORTS_CXX1Z)
append_if(CXX_SUPPORTS_CXX1Z "-std=c++1z" CMAKE_CXX_FLAGS)
else()
check_cxx_compiler_flag("-std=c++11" CXX_SUPPORTS_CXX11)
if (CXX_SUPPORTS_CXX11)
@ -511,6 +436,155 @@ elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE )
endif(LLVM_ENABLE_MODULES)
endif( MSVC )
if (MSVC AND NOT CLANG_CL)
set(msvc_warning_flags
# Disabled warnings.
-wd4141 # Suppress ''modifier' : used more than once' (because of __forceinline combined with inline)
-wd4146 # Suppress 'unary minus operator applied to unsigned type, result still unsigned'
-wd4180 # Suppress 'qualifier applied to function type has no meaning; ignored'
-wd4244 # Suppress ''argument' : conversion from 'type1' to 'type2', possible loss of data'
-wd4258 # Suppress ''var' : definition from the for loop is ignored; the definition from the enclosing scope is used'
-wd4267 # Suppress ''var' : conversion from 'size_t' to 'type', possible loss of data'
-wd4291 # Suppress ''declaration' : no matching operator delete found; memory will not be freed if initialization throws an exception'
-wd4345 # Suppress 'behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized'
-wd4351 # Suppress 'new behavior: elements of array 'array' will be default initialized'
-wd4355 # Suppress ''this' : used in base member initializer list'
-wd4456 # Suppress 'declaration of 'var' hides local variable'
-wd4457 # Suppress 'declaration of 'var' hides function parameter'
-wd4458 # Suppress 'declaration of 'var' hides class member'
-wd4459 # Suppress 'declaration of 'var' hides global declaration'
-wd4503 # Suppress ''identifier' : decorated name length exceeded, name was truncated'
-wd4624 # Suppress ''derived class' : destructor could not be generated because a base class destructor is inaccessible'
-wd4722 # Suppress 'function' : destructor never returns, potential memory leak
-wd4800 # Suppress ''type' : forcing value to bool 'true' or 'false' (performance warning)'
-wd4100 # Suppress 'unreferenced formal parameter'
-wd4127 # Suppress 'conditional expression is constant'
-wd4512 # Suppress 'assignment operator could not be generated'
-wd4505 # Suppress 'unreferenced local function has been removed'
-wd4610 # Suppress '<class> can never be instantiated'
-wd4510 # Suppress 'default constructor could not be generated'
-wd4702 # Suppress 'unreachable code'
-wd4245 # Suppress 'signed/unsigned mismatch'
-wd4706 # Suppress 'assignment within conditional expression'
-wd4310 # Suppress 'cast truncates constant value'
-wd4701 # Suppress 'potentially uninitialized local variable'
-wd4703 # Suppress 'potentially uninitialized local pointer variable'
-wd4389 # Suppress 'signed/unsigned mismatch'
-wd4611 # Suppress 'interaction between '_setjmp' and C++ object destruction is non-portable'
-wd4805 # Suppress 'unsafe mix of type <type> and type <type> in operation'
-wd4204 # Suppress 'nonstandard extension used : non-constant aggregate initializer'
-wd4577 # Suppress 'noexcept used with no exception handling mode specified; termination on exception is not guaranteed'
-wd4091 # Suppress 'typedef: ignored on left of '' when no variable is declared'
# C4592 is disabled because of false positives in Visual Studio 2015
# Update 1. Re-evaluate the usefulness of this diagnostic with Update 2.
-wd4592 # Suppress ''var': symbol will be dynamically initialized (implementation limitation)
-wd4319 # Suppress ''operator' : zero extending 'type' to 'type' of greater size'
# Ideally, we'd like this warning to be enabled, but MSVC 2013 doesn't
# support the 'aligned' attribute in the way that clang sources requires (for
# any code that uses the LLVM_ALIGNAS macro), so this is must be disabled to
# avoid unwanted alignment warnings.
# When we switch to requiring a version of MSVC that supports the 'alignas'
# specifier (MSVC 2015?) this warning can be re-enabled.
-wd4324 # Suppress 'structure was padded due to __declspec(align())'
# Promoted warnings.
-w14062 # Promote 'enumerator in switch of enum is not handled' to level 1 warning.
# Promoted warnings to errors.
-we4238 # Promote 'nonstandard extension used : class rvalue used as lvalue' to error.
)
# Enable warnings
if (LLVM_ENABLE_WARNINGS)
# Put /W4 in front of all the -we flags. cl.exe doesn't care, but for
# clang-cl having /W4 after the -we flags will re-enable the warnings
# disabled by -we.
set(msvc_warning_flags "/W4 ${msvc_warning_flags}")
# CMake appends /W3 by default, and having /W3 followed by /W4 will result in
# cl : Command line warning D9025 : overriding '/W3' with '/W4'. Since this is
# a command line warning and not a compiler warning, it cannot be suppressed except
# by fixing the command line.
string(REGEX REPLACE " /W[0-4]" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
string(REGEX REPLACE " /W[0-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
if (LLVM_ENABLE_PEDANTIC)
# No MSVC equivalent available
endif (LLVM_ENABLE_PEDANTIC)
endif (LLVM_ENABLE_WARNINGS)
foreach(flag ${msvc_warning_flags})
append("${flag}" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endforeach(flag)
endif (MSVC AND NOT CLANG_CL)
if (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL))
append("-Wall -W -Wno-unused-parameter -Wwrite-strings" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
append("-Wcast-qual" CMAKE_CXX_FLAGS)
# Turn off missing field initializer warnings for gcc to avoid noise from
# false positives with empty {}. Turn them on otherwise (they're off by
# default for clang).
check_cxx_compiler_flag("-Wmissing-field-initializers" CXX_SUPPORTS_MISSING_FIELD_INITIALIZERS_FLAG)
if (CXX_SUPPORTS_MISSING_FIELD_INITIALIZERS_FLAG)
if (CMAKE_COMPILER_IS_GNUCXX)
append("-Wno-missing-field-initializers" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
else()
append("-Wmissing-field-initializers" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endif()
endif()
if (LLVM_ENABLE_PEDANTIC AND LLVM_COMPILER_IS_GCC_COMPATIBLE)
append("-pedantic" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
append("-Wno-long-long" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endif()
add_flag_if_supported("-Wcovered-switch-default" COVERED_SWITCH_DEFAULT_FLAG)
append_if(USE_NO_UNINITIALIZED "-Wno-uninitialized" CMAKE_CXX_FLAGS)
append_if(USE_NO_MAYBE_UNINITIALIZED "-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS)
# Check if -Wnon-virtual-dtor warns even though the class is marked final.
# If it does, don't add it. So it won't be added on clang 3.4 and older.
# This also catches cases when -Wnon-virtual-dtor isn't supported by
# the compiler at all. This flag is not activated for gcc since it will
# incorrectly identify a protected non-virtual base when there is a friend
# declaration. Don't activate this in general on Windows as this warning has
# too many false positives on COM-style classes, which are destroyed with
# Release() (PR32286).
if (NOT CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32)
set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++11 -Werror=non-virtual-dtor")
CHECK_CXX_SOURCE_COMPILES("class base {public: virtual void anchor();protected: ~base();};
class derived final : public base { public: ~derived();};
int main() { return 0; }"
CXX_WONT_WARN_ON_FINAL_NONVIRTUALDTOR)
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
append_if(CXX_WONT_WARN_ON_FINAL_NONVIRTUALDTOR
"-Wnon-virtual-dtor" CMAKE_CXX_FLAGS)
endif()
# Enable -Wdelete-non-virtual-dtor if available.
add_flag_if_supported("-Wdelete-non-virtual-dtor" DELETE_NON_VIRTUAL_DTOR_FLAG)
# Check if -Wcomment is OK with an // comment ending with '\' if the next
# line is also a // comment.
set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror -Wcomment")
CHECK_C_SOURCE_COMPILES("// \\\\\\n//\\nint main() {return 0;}"
C_WCOMMENT_ALLOWS_LINE_WRAP)
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
if (NOT C_WCOMMENT_ALLOWS_LINE_WRAP)
append("-Wno-comment" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endif()
# Enable -Wstring-conversion to catch misuse of string literals.
add_flag_if_supported("-Wstring-conversion" STRING_CONVERSION_FLAG)
endif (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL))
if (LLVM_COMPILER_IS_GCC_COMPATIBLE AND NOT LLVM_ENABLE_WARNINGS)
append("-w" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endif()
macro(append_common_sanitizer_flags)
if (NOT MSVC)
# Append -fno-omit-frame-pointer and turn on debug info to get better
@ -527,7 +601,7 @@ macro(append_common_sanitizer_flags)
elseif (CLANG_CL)
# Keep frame pointers around.
append("/Oy-" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
if (CMAKE_LINKER MATCHES "lld-link.exe")
if (LINKER_IS_LLD_LINK)
# Use DWARF debug info with LLD.
append("-gdwarf" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
else()
@ -555,8 +629,11 @@ if(LLVM_USE_SANITIZER)
append_common_sanitizer_flags()
append("-fsanitize=undefined -fno-sanitize=vptr,function -fno-sanitize-recover=all"
CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
append("-fsanitize-blacklist=${CMAKE_SOURCE_DIR}/utils/sanitizers/ubsan_blacklist.txt"
CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
set(BLACKLIST_FILE "${CMAKE_SOURCE_DIR}/utils/sanitizers/ubsan_blacklist.txt")
if (EXISTS "${BLACKLIST_FILE}")
append("-fsanitize-blacklist=${BLACKLIST_FILE}"
CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endif()
elseif (LLVM_USE_SANITIZER STREQUAL "Thread")
append_common_sanitizer_flags()
append("-fsanitize=thread" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
@ -565,6 +642,9 @@ if(LLVM_USE_SANITIZER)
append_common_sanitizer_flags()
append("-fsanitize=address,undefined -fno-sanitize=vptr,function -fno-sanitize-recover=all"
CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
elseif (LLVM_USE_SANITIZER STREQUAL "Leaks")
append_common_sanitizer_flags()
append("-fsanitize=leak" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
else()
message(FATAL_ERROR "Unsupported value of LLVM_USE_SANITIZER: ${LLVM_USE_SANITIZER}")
endif()
@ -578,6 +658,10 @@ if(LLVM_USE_SANITIZER)
else()
message(FATAL_ERROR "LLVM_USE_SANITIZER is not supported on this platform.")
endif()
if (LLVM_USE_SANITIZER MATCHES "(Undefined;)?Address(;Undefined)?")
add_flag_if_supported("-fsanitize-address-use-after-scope"
FSANITIZE_USE_AFTER_SCOPE_FLAG)
endif()
if (LLVM_USE_SANITIZE_COVERAGE)
append("-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endif()
@ -588,9 +672,9 @@ if(LLVM_USE_SPLIT_DWARF)
add_definitions("-gsplit-dwarf")
endif()
add_llvm_definitions( -D__STDC_CONSTANT_MACROS )
add_llvm_definitions( -D__STDC_FORMAT_MACROS )
add_llvm_definitions( -D__STDC_LIMIT_MACROS )
add_definitions( -D__STDC_CONSTANT_MACROS )
add_definitions( -D__STDC_FORMAT_MACROS )
add_definitions( -D__STDC_LIMIT_MACROS )
# clang doesn't print colored diagnostics when invoked from Ninja
if (UNIX AND
@ -602,8 +686,8 @@ endif()
# lld doesn't print colored diagnostics when invoked from Ninja
if (UNIX AND CMAKE_GENERATOR STREQUAL "Ninja")
include(CheckLinkerFlag)
check_linker_flag("-Wl,-color-diagnostics" LINKER_SUPPORTS_COLOR_DIAGNOSTICS)
append_if(LINKER_SUPPORTS_COLOR_DIAGNOSTICS "-Wl,-color-diagnostics"
check_linker_flag("-Wl,--color-diagnostics" LINKER_SUPPORTS_COLOR_DIAGNOSTICS)
append_if(LINKER_SUPPORTS_COLOR_DIAGNOSTICS "-Wl,--color-diagnostics"
CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
endif()
@ -656,22 +740,38 @@ append_if(LLVM_BUILD_INSTRUMENTED_COVERAGE "-fprofile-instr-generate='${LLVM_PRO
CMAKE_EXE_LINKER_FLAGS
CMAKE_SHARED_LINKER_FLAGS)
set(LLVM_ENABLE_LTO OFF CACHE STRING "Build LLVM with LTO. May be specified as Thin or Full to use a particular kind of LTO")
string(TOUPPER "${LLVM_ENABLE_LTO}" uppercase_LLVM_ENABLE_LTO)
if(LLVM_ENABLE_LTO AND LLVM_ON_WIN32 AND NOT LINKER_IS_LLD_LINK)
message(FATAL_ERROR "When compiling for Windows, LLVM_ENABLE_LTO requires using lld as the linker (point CMAKE_LINKER at lld-link.exe)")
endif()
if(uppercase_LLVM_ENABLE_LTO STREQUAL "THIN")
append("-flto=thin" CMAKE_CXX_FLAGS CMAKE_C_FLAGS
CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
# On darwin, enable the lto cache. This improves initial build time a little
# since we re-link a lot of the same objects, and significantly improves
# incremental build time.
append_if(APPLE "-Wl,-cache_path_lto,${PROJECT_BINARY_DIR}/lto.cache"
CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
append("-flto=thin" CMAKE_CXX_FLAGS CMAKE_C_FLAGS)
if(NOT LINKER_IS_LLD_LINK)
append("-flto=thin" CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
endif()
# If the linker supports it, enable the lto cache. This improves initial build
# time a little since we re-link a lot of the same objects, and significantly
# improves incremental build time.
# FIXME: We should move all this logic into the clang driver.
if(APPLE)
append("-Wl,-cache_path_lto,${PROJECT_BINARY_DIR}/lto.cache"
CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
elseif(UNIX AND LLVM_USE_LINKER STREQUAL "lld")
append("-Wl,--thinlto-cache-dir=${PROJECT_BINARY_DIR}/lto.cache"
CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
elseif(LLVM_USE_LINKER STREQUAL "gold")
append("-Wl,--plugin-opt,cache-dir=${PROJECT_BINARY_DIR}/lto.cache"
CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
endif()
elseif(uppercase_LLVM_ENABLE_LTO STREQUAL "FULL")
append("-flto=full" CMAKE_CXX_FLAGS CMAKE_C_FLAGS
CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
append("-flto=full" CMAKE_CXX_FLAGS CMAKE_C_FLAGS)
if(NOT LINKER_IS_LLD_LINK)
append("-flto=full" CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
endif()
elseif(LLVM_ENABLE_LTO)
append("-flto" CMAKE_CXX_FLAGS CMAKE_C_FLAGS
CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
append("-flto" CMAKE_CXX_FLAGS CMAKE_C_FLAGS)
if(NOT LINKER_IS_LLD_LINK)
append("-flto" CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
endif()
endif()
# This option makes utils/extract_symbols.py be used to determine the list of
@ -698,3 +798,16 @@ if(WIN32 OR CYGWIN)
else()
set(LLVM_ENABLE_PLUGINS ON)
endif()
function(get_compile_definitions)
get_directory_property(top_dir_definitions DIRECTORY ${CMAKE_SOURCE_DIR} COMPILE_DEFINITIONS)
foreach(definition ${top_dir_definitions})
if(DEFINED result)
string(APPEND result " -D${definition}")
else()
set(result "-D${definition}")
endif()
endforeach()
set(LLVM_DEFINITIONS "${result}" PARENT_SCOPE)
endfunction()
get_compile_definitions()

View File

@ -8,27 +8,61 @@ function(link_system_libs target)
message(AUTHOR_WARNING "link_system_libs no longer needed")
endfunction()
# is_llvm_target_library(
# library
# Name of the LLVM library to check
# return_var
# Output variable name
# ALL_TARGETS;INCLUDED_TARGETS;OMITTED_TARGETS
# ALL_TARGETS - default looks at the full list of known targets
# INCLUDED_TARGETS - looks only at targets being configured
# OMITTED_TARGETS - looks only at targets that are not being configured
# )
function(is_llvm_target_library library return_var)
cmake_parse_arguments(ARG "ALL_TARGETS;INCLUDED_TARGETS;OMITTED_TARGETS" "" "" ${ARGN})
# Sets variable `return_var' to ON if `library' corresponds to a
# LLVM supported target. To OFF if it doesn't.
set(${return_var} OFF PARENT_SCOPE)
string(TOUPPER "${library}" capitalized_lib)
string(TOUPPER "${LLVM_ALL_TARGETS}" targets)
if(ARG_INCLUDED_TARGETS)
string(TOUPPER "${LLVM_TARGETS_TO_BUILD}" targets)
elseif(ARG_OMITTED_TARGETS)
set(omitted_targets ${LLVM_ALL_TARGETS})
list(REMOVE_ITEM omitted_targets ${LLVM_TARGETS_TO_BUILD})
string(TOUPPER "${omitted_targets}" targets)
else()
string(TOUPPER "${LLVM_ALL_TARGETS}" targets)
endif()
foreach(t ${targets})
if( capitalized_lib STREQUAL t OR
capitalized_lib STREQUAL "LLVM${t}" OR
capitalized_lib STREQUAL "LLVM${t}CODEGEN" OR
capitalized_lib STREQUAL "LLVM${t}ASMPARSER" OR
capitalized_lib STREQUAL "LLVM${t}ASMPRINTER" OR
capitalized_lib STREQUAL "LLVM${t}DISASSEMBLER" OR
capitalized_lib STREQUAL "LLVM${t}INFO" )
capitalized_lib STREQUAL "${t}" OR
capitalized_lib STREQUAL "${t}DESC" OR
capitalized_lib STREQUAL "${t}CODEGEN" OR
capitalized_lib STREQUAL "${t}ASMPARSER" OR
capitalized_lib STREQUAL "${t}ASMPRINTER" OR
capitalized_lib STREQUAL "${t}DISASSEMBLER" OR
capitalized_lib STREQUAL "${t}INFO" OR
capitalized_lib STREQUAL "${t}UTILS" )
set(${return_var} ON PARENT_SCOPE)
break()
endif()
endforeach()
endfunction(is_llvm_target_library)
function(is_llvm_target_specifier library return_var)
is_llvm_target_library(${library} ${return_var} ${ARGN})
string(TOUPPER "${library}" capitalized_lib)
if(NOT ${return_var})
if( capitalized_lib STREQUAL "ALLTARGETSASMPARSERS" OR
capitalized_lib STREQUAL "ALLTARGETSDESCS" OR
capitalized_lib STREQUAL "ALLTARGETSDISASSEMBLERS" OR
capitalized_lib STREQUAL "ALLTARGETSINFOS" OR
capitalized_lib STREQUAL "NATIVE" OR
capitalized_lib STREQUAL "NATIVECODEGEN" )
set(${return_var} ON PARENT_SCOPE)
endif()
endif()
endfunction()
macro(llvm_config executable)
cmake_parse_arguments(ARG "USE_SHARED" "" "" ${ARGN})
@ -93,6 +127,21 @@ function(llvm_map_components_to_libnames out_libs)
endif()
string(TOUPPER "${LLVM_AVAILABLE_LIBS}" capitalized_libs)
get_property(LLVM_TARGETS_CONFIGURED GLOBAL PROPERTY LLVM_TARGETS_CONFIGURED)
# Generally in our build system we avoid order-dependence. Unfortunately since
# not all targets create the same set of libraries we actually need to ensure
# that all build targets associated with a target are added before we can
# process target dependencies.
if(NOT LLVM_TARGETS_CONFIGURED)
foreach(c ${link_components})
is_llvm_target_specifier(${c} iltl_result ALL_TARGETS)
if(iltl_result)
message(FATAL_ERROR "Specified target library before target registration is complete.")
endif()
endforeach()
endif()
# Expand some keywords:
list(FIND LLVM_TARGETS_TO_BUILD "${LLVM_NATIVE_ARCH}" have_native_backend)
list(FIND link_components "engine" engine_required)
@ -141,6 +190,12 @@ function(llvm_map_components_to_libnames out_libs)
if( TARGET LLVM${c}Disassembler )
list(APPEND expanded_components "LLVM${c}Disassembler")
endif()
if( TARGET LLVM${c}Info )
list(APPEND expanded_components "LLVM${c}Info")
endif()
if( TARGET LLVM${c}Utils )
list(APPEND expanded_components "LLVM${c}Utils")
endif()
elseif( c STREQUAL "native" )
# already processed
elseif( c STREQUAL "nativecodegen" )
@ -198,9 +253,16 @@ function(llvm_map_components_to_libnames out_libs)
list(FIND capitalized_libs LLVM${capitalized} lib_idx)
if( lib_idx LESS 0 )
# The component is unknown. Maybe is an omitted target?
is_llvm_target_library(${c} iltl_result)
if( NOT iltl_result )
message(FATAL_ERROR "Library `${c}' not found in list of llvm libraries.")
is_llvm_target_library(${c} iltl_result OMITTED_TARGETS)
if(iltl_result)
# A missing library to a directly referenced omitted target would be bad.
message(FATAL_ERROR "Library '${c}' is a direct reference to a target library for an omitted target.")
else()
# If it is not an omitted target we should assume it is a component
# that hasn't yet been processed by CMake. Missing components will
# cause errors later in the configuration, so we can safely assume
# that this is valid here.
list(APPEND expanded_components LLVM${c})
endif()
else( lib_idx LESS 0 )
list(GET LLVM_AVAILABLE_LIBS ${lib_idx} canonical_lib)

View File

@ -45,6 +45,10 @@ set(LLVM_ENABLE_PIC @LLVM_ENABLE_PIC@)
set(LLVM_BUILD_32_BITS @LLVM_BUILD_32_BITS@)
if (NOT "@LLVM_PTHREAD_LIB@" STREQUAL "")
set(LLVM_PTHREAD_LIB "@LLVM_PTHREAD_LIB@")
endif()
set(LLVM_ENABLE_PLUGINS @LLVM_ENABLE_PLUGINS@)
set(LLVM_EXPORT_SYMBOLS_FOR_PLUGINS @LLVM_EXPORT_SYMBOLS_FOR_PLUGINS@)
set(LLVM_PLUGIN_EXT @LLVM_PLUGIN_EXT@)
@ -75,4 +79,5 @@ if(NOT TARGET LLVMSupport)
@llvm_config_include_buildtree_only_exports@
endif()
set_property(GLOBAL PROPERTY LLVM_TARGETS_CONFIGURED On)
include(${LLVM_CMAKE_DIR}/LLVM-Config.cmake)

View File

@ -195,8 +195,16 @@ function(llvm_ExternalProject_Add name source_dir)
# Add top-level targets
foreach(target ${ARG_EXTRA_TARGETS})
string(REPLACE ":" ";" target_list ${target})
list(GET target_list 0 target)
list(LENGTH target_list target_list_len)
if(${target_list_len} GREATER 1)
list(GET target_list 1 target_name)
else()
set(target_name "${target}")
endif()
llvm_ExternalProject_BuildCmd(build_runtime_cmd ${target} ${BINARY_DIR})
add_custom_target(${target}
add_custom_target(${target_name}
COMMAND ${build_runtime_cmd}
DEPENDS ${name}-configure
WORKING_DIRECTORY ${BINARY_DIR}

View File

@ -14,8 +14,31 @@ function(tablegen project ofn)
message(FATAL_ERROR "${project}_TABLEGEN_EXE not set")
endif()
file(GLOB local_tds "*.td")
file(GLOB_RECURSE global_tds "${LLVM_MAIN_INCLUDE_DIR}/llvm/*.td")
# Use depfile instead of globbing arbitrary *.td(s)
# DEPFILE is available for Ninja Generator with CMake>=3.7.
if(CMAKE_GENERATOR STREQUAL "Ninja" AND NOT CMAKE_VERSION VERSION_LESS 3.7)
# Make output path relative to build.ninja, assuming located on
# ${CMAKE_BINARY_DIR}.
# CMake emits build targets as relative paths but Ninja doesn't identify
# absolute path (in *.d) as relative path (in build.ninja)
# Note that tblgen is executed on ${CMAKE_BINARY_DIR} as working directory.
file(RELATIVE_PATH ofn_rel
${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/${ofn})
set(additional_cmdline
-o ${ofn_rel}.tmp
-d ${ofn_rel}.d
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
DEPFILE ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.d
)
set(local_tds)
set(global_tds)
else()
file(GLOB local_tds "*.td")
file(GLOB_RECURSE global_tds "${LLVM_MAIN_INCLUDE_DIR}/llvm/*.td")
set(additional_cmdline
-o ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
)
endif()
if (IS_ABSOLUTE ${LLVM_TARGET_DEFINITIONS})
set(LLVM_TARGET_DEFINITIONS_ABSOLUTE ${LLVM_TARGET_DEFINITIONS})
@ -23,16 +46,33 @@ function(tablegen project ofn)
set(LLVM_TARGET_DEFINITIONS_ABSOLUTE
${CMAKE_CURRENT_SOURCE_DIR}/${LLVM_TARGET_DEFINITIONS})
endif()
if (LLVM_ENABLE_DAGISEL_COV)
list(FIND ARGN "-gen-dag-isel" idx)
if( NOT idx EQUAL -1 )
list(APPEND LLVM_TABLEGEN_FLAGS "-instrument-coverage")
endif()
endif()
# We need both _TABLEGEN_TARGET and _TABLEGEN_EXE in the DEPENDS list
# (both the target and the file) to have .inc files rebuilt on
# a tablegen change, as cmake does not propagate file-level dependencies
# of custom targets. See the following ticket for more information:
# https://cmake.org/Bug/view.php?id=15858
# The dependency on both, the target and the file, produces the same
# dependency twice in the result file when
# ("${${project}_TABLEGEN_TARGET}" STREQUAL "${${project}_TABLEGEN_EXE}")
# but lets us having smaller and cleaner code here.
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
# Generate tablegen output in a temporary file.
COMMAND ${${project}_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR}
${LLVM_TABLEGEN_FLAGS}
${LLVM_TABLEGEN_FLAGS}
${LLVM_TARGET_DEFINITIONS_ABSOLUTE}
-o ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
${additional_cmdline}
# The file in LLVM_TARGET_DEFINITIONS may be not in the current
# directory and local_tds may not contain it, so we must
# explicitly list it here:
DEPENDS ${${project}_TABLEGEN_TARGET} ${local_tds} ${global_tds}
DEPENDS ${${project}_TABLEGEN_TARGET} ${${project}_TABLEGEN_EXE}
${local_tds} ${global_tds}
${LLVM_TARGET_DEFINITIONS_ABSOLUTE}
COMMENT "Building ${ofn}..."
)
@ -87,12 +127,13 @@ macro(add_tablegen target project)
set(${target}_OLD_LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS})
set(LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS} TableGen)
if(NOT XCODE)
# CMake-3.9 doesn't let compilation units depend on their dependent libraries.
if(NOT (CMAKE_GENERATOR STREQUAL "Ninja" AND NOT CMAKE_VERSION VERSION_LESS 3.9) AND NOT XCODE)
# FIXME: It leaks to user, callee of add_tablegen.
set(LLVM_ENABLE_OBJLIB ON)
endif()
add_llvm_utility(${target} ${ARGN})
add_llvm_executable(${target} DISABLE_LLVM_LINK_LLVM_DYLIB ${ARGN})
set(LLVM_LINK_COMPONENTS ${${target}_OLD_LLVM_LINK_COMPONENTS})
set(${project}_TABLEGEN "${target}" CACHE

View File

@ -25,60 +25,66 @@ function(add_version_info_from_vcs VERS)
set(LLVM_REPOSITORY ${Project_WC_URL} PARENT_SCOPE)
endif()
endif()
elseif( EXISTS ${SOURCE_DIR}/.git )
set(result "${result}git")
# Try to get a ref-id
if( EXISTS ${SOURCE_DIR}/.git/svn )
find_program(git_executable NAMES git git.exe git.cmd)
if( git_executable )
set(is_git_svn_rev_exact false)
execute_process(COMMAND
${git_executable} svn info
WORKING_DIRECTORY ${SOURCE_DIR}
TIMEOUT 5
RESULT_VARIABLE git_result
OUTPUT_VARIABLE git_output)
if( git_result EQUAL 0 )
string(REGEX MATCH "URL: ([^ \n]*)" svn_url ${git_output})
if(svn_url)
set(LLVM_REPOSITORY ${CMAKE_MATCH_1} PARENT_SCOPE)
else()
find_program(git_executable NAMES git git.exe git.cmd)
if( git_executable )
# Run from a subdirectory to force git to print an absoute path.
execute_process(COMMAND ${git_executable} rev-parse --git-dir
WORKING_DIRECTORY ${SOURCE_DIR}/cmake
RESULT_VARIABLE git_result
OUTPUT_VARIABLE git_dir
ERROR_QUIET)
if(git_result EQUAL 0)
# Try to get a ref-id
string(STRIP "${git_dir}" git_dir)
set(result "${result}git")
if( EXISTS ${git_dir}/svn )
# Get the repository URL
execute_process(COMMAND
${git_executable} svn info
WORKING_DIRECTORY ${SOURCE_DIR}
TIMEOUT 5
RESULT_VARIABLE git_result
OUTPUT_VARIABLE git_output
ERROR_QUIET)
if( git_result EQUAL 0 )
string(REGEX MATCH "URL: ([^ \n]*)" svn_url ${git_output})
if(svn_url)
set(LLVM_REPOSITORY ${CMAKE_MATCH_1} PARENT_SCOPE)
endif()
endif()
string(REGEX REPLACE "^(.*\n)?Revision: ([^\n]+).*"
"\\2" git_svn_rev_number "${git_output}")
set(SVN_REVISION ${git_svn_rev_number} PARENT_SCOPE)
set(git_svn_rev "-svn-${git_svn_rev}")
# Determine if the HEAD points directly at a subversion revision.
# Get the svn revision number for this git commit if one exists.
execute_process(COMMAND ${git_executable} svn find-rev HEAD
WORKING_DIRECTORY ${SOURCE_DIR}
TIMEOUT 5
RESULT_VARIABLE git_result
OUTPUT_VARIABLE git_output)
if( git_result EQUAL 0 )
string(STRIP "${git_output}" git_head_svn_rev_number)
if( git_head_svn_rev_number EQUAL git_svn_rev_number )
set(is_git_svn_rev_exact true)
endif()
OUTPUT_VARIABLE git_head_svn_rev_number
OUTPUT_STRIP_TRAILING_WHITESPACE)
if( git_result EQUAL 0 AND git_output)
set(SVN_REVISION ${git_head_svn_rev_number} PARENT_SCOPE)
set(git_svn_rev "-svn-${git_head_svn_rev_number}")
else()
set(git_svn_rev "")
endif()
else()
set(git_svn_rev "")
endif()
# Get the git ref id
execute_process(COMMAND
${git_executable} rev-parse --short HEAD
WORKING_DIRECTORY ${SOURCE_DIR}
TIMEOUT 5
RESULT_VARIABLE git_result
OUTPUT_VARIABLE git_output)
OUTPUT_VARIABLE git_ref_id
OUTPUT_STRIP_TRAILING_WHITESPACE)
if( git_result EQUAL 0 AND NOT is_git_svn_rev_exact )
string(STRIP "${git_output}" git_ref_id)
if( git_result EQUAL 0 )
set(GIT_COMMIT ${git_ref_id} PARENT_SCOPE)
set(result "${result}${git_svn_rev}-${git_ref_id}")
else()
set(result "${result}${git_svn_rev}")
endif()
endif()
endif()
endif()

View File

@ -4,6 +4,7 @@ SET(CMAKE_SYSTEM_NAME Darwin)
SET(CMAKE_SYSTEM_VERSION 13)
SET(CMAKE_CXX_COMPILER_WORKS True)
SET(CMAKE_C_COMPILER_WORKS True)
SET(IOS True)
if(NOT CMAKE_OSX_SYSROOT)
execute_process(COMMAND xcodebuild -version -sdk iphoneos Path

File diff suppressed because it is too large Load Diff

View File

@ -132,11 +132,12 @@ The ``MayAlias`` response is used whenever the two pointers might refer to the
same object.
The ``PartialAlias`` response is used when the two memory objects are known to
be overlapping in some way, but do not start at the same address.
be overlapping in some way, regardless whether they start at the same address
or not.
The ``MustAlias`` response may only be returned if the two memory objects are
guaranteed to always start at exactly the same location. A ``MustAlias``
response implies that the pointers compare equal.
response does not imply that the pointers compare equal.
The ``getModRefInfo`` methods
-----------------------------

View File

@ -0,0 +1,87 @@
==================================
Benchmarking tips
==================================
Introduction
============
For benchmarking a patch we want to reduce all possible sources of
noise as much as possible. How to do that is very OS dependent.
Note that low noise is required, but not sufficient. It does not
exclude measurement bias. See
https://www.cis.upenn.edu/~cis501/papers/producing-wrong-data.pdf for
example.
General
================================
* Use a high resolution timer, e.g. perf under linux.
* Run the benchmark multiple times to be able to recognize noise.
* Disable as many processes or services as possible on the target system.
* Disable frequency scaling, turbo boost and address space
randomization (see OS specific section).
* Static link if the OS supports it. That avoids any variation that
might be introduced by loading dynamic libraries. This can be done
by passing ``-DLLVM_BUILD_STATIC=ON`` to cmake.
* Try to avoid storage. On some systems you can use tmpfs. Putting the
program, inputs and outputs on tmpfs avoids touching a real storage
system, which can have a pretty big variability.
To mount it (on linux and freebsd at least)::
mount -t tmpfs -o size=<XX>g none dir_to_mount
Linux
=====
* Disable address space randomization::
echo 0 > /proc/sys/kernel/randomize_va_space
* Set scaling_governor to performance::
for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
do
echo performance > /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
done
* Use https://github.com/lpechacek/cpuset to reserve cpus for just the
program you are benchmarking. If using perf, leave at least 2 cores
so that perf runs in one and your program in another::
cset shield -c N1,N2 -k on
This will move all threads out of N1 and N2. The ``-k on`` means
that even kernel threads are moved out.
* Disable the SMT pair of the cpus you will use for the benchmark. The
pair of cpu N can be found in
``/sys/devices/system/cpu/cpuN/topology/thread_siblings_list`` and
disabled with::
echo 0 > /sys/devices/system/cpu/cpuX/online
* Run the program with::
cset shield --exec -- perf stat -r 10 <cmd>
This will run the command after ``--`` in the isolated cpus. The
particular perf command runs the ``<cmd>`` 10 times and reports
statistics.
With these in place you can expect perf variations of less than 0.1%.
Linux Intel
-----------
* Disable turbo mode::
echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo

View File

@ -550,6 +550,8 @@ LLVM IR is defined with the following blocks:
* 17 --- `TYPE_BLOCK`_ --- This describes all of the types in the module.
* 23 --- `STRTAB_BLOCK`_ --- The bitcode file's string table.
.. _MODULE_BLOCK:
MODULE_BLOCK Contents
@ -577,7 +579,7 @@ MODULE_CODE_VERSION Record
``[VERSION, version#]``
The ``VERSION`` record (code 1) contains a single value indicating the format
version. Versions 0 and 1 are supported at this time. The difference between
version. Versions 0, 1 and 2 are supported at this time. The difference between
version 0 and 1 is in the encoding of instruction operands in
each `FUNCTION_BLOCK`_.
@ -620,6 +622,12 @@ as unsigned VBRs. However, forward references are rare, except in the
case of phi instructions. For phi instructions, operands are encoded as
`Signed VBRs`_ to deal with forward references.
In version 2, the meaning of module records ``FUNCTION``, ``GLOBALVAR``,
``ALIAS``, ``IFUNC`` and ``COMDAT`` change such that the first two operands
specify an offset and size of a string in a string table (see `STRTAB_BLOCK
Contents`_), the function name is removed from the ``FNENTRY`` record in the
value symbol table, and the top-level ``VALUE_SYMTAB_BLOCK`` may only contain
``FNENTRY`` records.
MODULE_CODE_TRIPLE Record
^^^^^^^^^^^^^^^^^^^^^^^^^
@ -673,11 +681,14 @@ for each library name referenced.
MODULE_CODE_GLOBALVAR Record
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
``[GLOBALVAR, pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal, unnamed_addr, externally_initialized, dllstorageclass, comdat]``
``[GLOBALVAR, strtab offset, strtab size, pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal, unnamed_addr, externally_initialized, dllstorageclass, comdat]``
The ``GLOBALVAR`` record (code 7) marks the declaration or definition of a
global variable. The operand fields are:
* *strtab offset*, *strtab size*: Specifies the name of the global variable.
See `STRTAB_BLOCK Contents`_.
* *pointer type*: The type index of the pointer type used to point to this
global variable
@ -755,11 +766,14 @@ global variable. The operand fields are:
MODULE_CODE_FUNCTION Record
^^^^^^^^^^^^^^^^^^^^^^^^^^^
``[FUNCTION, type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc, prologuedata, dllstorageclass, comdat, prefixdata, personalityfn]``
``[FUNCTION, strtab offset, strtab size, type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc, prologuedata, dllstorageclass, comdat, prefixdata, personalityfn]``
The ``FUNCTION`` record (code 8) marks the declaration or definition of a
function. The operand fields are:
* *strtab offset*, *strtab size*: Specifies the name of the function.
See `STRTAB_BLOCK Contents`_.
* *type*: The type index of the function type describing this function
* *callingconv*: The calling convention number:
@ -817,11 +831,14 @@ function. The operand fields are:
MODULE_CODE_ALIAS Record
^^^^^^^^^^^^^^^^^^^^^^^^
``[ALIAS, alias type, aliasee val#, linkage, visibility, dllstorageclass, threadlocal, unnamed_addr]``
``[ALIAS, strtab offset, strtab size, alias type, aliasee val#, linkage, visibility, dllstorageclass, threadlocal, unnamed_addr]``
The ``ALIAS`` record (code 9) marks the definition of an alias. The operand
fields are
* *strtab offset*, *strtab size*: Specifies the name of the alias.
See `STRTAB_BLOCK Contents`_.
* *alias type*: The type index of the alias
* *aliasee val#*: The value index of the aliased value
@ -839,16 +856,6 @@ fields are
* *unnamed_addr*: If present, an encoding of the
:ref:`unnamed_addr<bcunnamedaddr>` attribute of this alias
MODULE_CODE_PURGEVALS Record
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
``[PURGEVALS, numvals]``
The ``PURGEVALS`` record (code 10) resets the module-level value list to the
size given by the single operand value. Module-level value list items are added
by ``GLOBALVAR``, ``FUNCTION``, and ``ALIAS`` records. After a ``PURGEVALS``
record is seen, new value indices will start from the given *numvals* value.
.. _MODULE_CODE_GCNAME:
MODULE_CODE_GCNAME Record
@ -1310,3 +1317,20 @@ METADATA_ATTACHMENT Contents
----------------------------
The ``METADATA_ATTACHMENT`` block (id 16) ...
.. _STRTAB_BLOCK:
STRTAB_BLOCK Contents
---------------------
The ``STRTAB`` block (id 23) contains a single record (``STRTAB_BLOB``, id 1)
with a single blob operand containing the bitcode file's string table.
Strings in the string table are not null terminated. A record's *strtab
offset* and *strtab size* operands specify the byte offset and size of a
string within the string table.
The string table is used by all preceding blocks in the bitcode file that are
not succeeded by another intervening ``STRTAB`` block. Normally a bitcode
file will have a single string table, but it may have more than one if it
was created by binary concatenation of multiple bitcode files.

View File

@ -64,6 +64,20 @@ Branch weights are assigned to every destination.
[ , i32 <LABEL_BRANCH_WEIGHT> ... ]
}
``CallInst``
^^^^^^^^^^^^^^^^^^
Calls may have branch weight metadata, containing the execution count of
the call. It is currently used in SamplePGO mode only, to augment the
block and entry counts which may not be accurate with sampling.
.. code-block:: none
!0 = metadata !{
metadata !"branch_weights",
i32 <CALL_BRANCH_WEIGHT>
}
Other
^^^^^
@ -123,11 +137,11 @@ To allow comparing different functions during inter-procedural analysis and
optimization, ``MD_prof`` nodes can also be assigned to a function definition.
The first operand is a string indicating the name of the associated counter.
Currently, one counter is supported: "function_entry_count". This is a 64-bit
counter that indicates the number of times that this function was invoked (in
the case of instrumentation-based profiles). In the case of sampling-based
profiles, this counter is an approximation of how many times the function was
invoked.
Currently, one counter is supported: "function_entry_count". The second operand
is a 64-bit counter that indicates the number of times that this function was
invoked (in the case of instrumentation-based profiles). In the case of
sampling-based profiles, this operand is an approximation of how many times
the function was invoked.
For example, in the code below, the instrumentation for function foo()
indicates that it was called 2,590 times at runtime.
@ -138,3 +152,13 @@ indicates that it was called 2,590 times at runtime.
ret i32 0
}
!1 = !{!"function_entry_count", i64 2590}
If "function_entry_count" has more than 2 operands, the later operands are
the GUID of the functions that needs to be imported by ThinLTO. This is only
set by sampling based profile. It is needed because the sampling based profile
was collected on a binary that had already imported and inlined these functions,
and we need to ensure the IR matches in the ThinLTO backends for profile
annotation. The reason why we cannot annotate this on the callsite is that it
can only goes down 1 level in the call chain. For the cases where
foo_in_a_cc()->bar_in_b_cc()->baz_in_c_cc(), we will need to go down 2 levels
in the call chain to import both bar_in_b_cc and baz_in_c_cc.

View File

@ -186,8 +186,8 @@ CMake manual, or execute ``cmake --help-variable VARIABLE_NAME``.
Sets the build type for ``make``-based generators. Possible values are
Release, Debug, RelWithDebInfo and MinSizeRel. If you are using an IDE such as
Visual Studio, you should use the IDE settings to set the build type.
Be aware that Release and RelWithDebInfo are not using the same optimization
level on most platform.
Be aware that Release and RelWithDebInfo use different optimization levels on
most platforms.
**CMAKE_INSTALL_PREFIX**:PATH
Path where LLVM will be installed if "make install" is invoked or the
@ -247,9 +247,10 @@ LLVM-specific variables
tests.
**LLVM_APPEND_VC_REV**:BOOL
Append version control revision info (svn revision number or Git revision id)
to LLVM version string (stored in the PACKAGE_VERSION macro). For this to work
cmake must be invoked before the build. Defaults to OFF.
Embed version control revision info (svn revision number or Git revision id).
This is used among other things in the LLVM version string (stored in the
PACKAGE_VERSION macro). For this to work cmake must be invoked before the
build. Defaults to ON.
**LLVM_ENABLE_THREADS**:BOOL
Build with threads support, if available. Defaults to ON.
@ -382,6 +383,18 @@ LLVM-specific variables
lines, enabling link-time optimization. Possible values are ``Off``,
``On``, ``Thin`` and ``Full``. Defaults to OFF.
**LLVM_USE_LINKER**:STRING
Add ``-fuse-ld={name}`` to the link invocation. The possible value depend on
your compiler, for clang the value can be an absolute path to your custom
linker, otherwise clang will prefix the name with ``ld.`` and apply its usual
search. For example to link LLVM with the Gold linker, cmake can be invoked
with ``-DLLVM_USE_LINKER=gold``.
**LLVM_ENABLE_LLD**:BOOL
This option is equivalent to `-DLLVM_USE_LINKER=lld`, except during a 2-stage
build where a dependency is added from the first stage to the second ensuring
that lld is built before stage2 begins.
**LLVM_PARALLEL_COMPILE_JOBS**:STRING
Define the maximum number of concurrent compilation jobs.
@ -457,6 +470,8 @@ LLVM-specific variables
**SPHINX_EXECUTABLE**:STRING
The path to the ``sphinx-build`` executable detected by CMake.
For installation instructions, see
http://www.sphinx-doc.org/en/latest/install.html
**SPHINX_OUTPUT_HTML**:BOOL
If enabled (and ``LLVM_ENABLE_SPHINX`` is enabled) then the targets for
@ -521,6 +536,11 @@ LLVM-specific variables
during the build. Enabling this option can significantly speed up build times
especially when building LLVM in Debug configurations.
**LLVM_REVERSE_ITERATION**:BOOL
If enabled, all supported unordered llvm containers would be iterated in
reverse order. This is useful for uncovering non-determinism caused by
iteration of unordered containers.
CMake Caches
============

View File

@ -1,8 +1,8 @@
if (DOXYGEN_FOUND)
if (LLVM_ENABLE_DOXYGEN)
set(abs_top_srcdir ${LLVM_MAIN_SRC_DIR})
set(abs_top_builddir ${LLVM_BINARY_DIR})
set(abs_top_srcdir ${CMAKE_CURRENT_SOURCE_DIR})
set(abs_top_builddir ${CMAKE_CURRENT_BINARY_DIR})
if (HAVE_DOT)
set(DOT ${LLVM_PATH_DOT})
@ -103,8 +103,8 @@ endif()
endif()
if (LLVM_ENABLE_SPHINX)
include(AddSphinxTarget)
if (SPHINX_FOUND)
include(AddSphinxTarget)
if (${SPHINX_OUTPUT_HTML})
add_sphinx_target(html llvm)
endif()

View File

@ -112,33 +112,6 @@ In this example the ``extra_sources`` variable is only defined if you're
targeting an Apple platform. For all other targets the ``extra_sources`` will be
evaluated as empty before add_executable is given its arguments.
One big "Gotcha" with variable dereferencing is that ``if`` commands implicitly
dereference values. This has some unexpected results. For example:
.. code-block:: cmake
if("${SOME_VAR}" STREQUAL "MSVC")
In this code sample MSVC will be implicitly dereferenced, which will result in
the if command comparing the value of the dereferenced variables ``SOME_VAR``
and ``MSVC``. A common workaround to this solution is to prepend strings being
compared with an ``x``.
.. code-block:: cmake
if("x${SOME_VAR}" STREQUAL "xMSVC")
This works because while ``MSVC`` is a defined variable, ``xMSVC`` is not. This
pattern is uncommon, but it does occur in LLVM's CMake scripts.
.. note::
Once the LLVM project upgrades its minimum CMake version to 3.1 or later we
can prevent this behavior by setting CMP0054 to new. For more information on
CMake policies please see the cmake-policies manpage or the `cmake-policies
online documentation
<https://cmake.org/cmake/help/v3.4/manual/cmake-policies.7.html>`_.
Lists
-----

View File

@ -1005,7 +1005,7 @@ The TableGen DAG instruction selector generator reads the instruction patterns
in the ``.td`` file and automatically builds parts of the pattern matching code
for your target. It has the following strengths:
* At compiler-compiler time, it analyzes your instruction patterns and tells you
* At compiler-compile time, it analyzes your instruction patterns and tells you
if your patterns make sense or not.
* It can handle arbitrary constraints on operands for the pattern match. In
@ -1026,7 +1026,7 @@ for your target. It has the following strengths:
* Targets can define their own (and rely on built-in) "pattern fragments".
Pattern fragments are chunks of reusable patterns that get inlined into your
patterns during compiler-compiler time. For example, the integer "``(not
patterns during compiler-compile time. For example, the integer "``(not
x)``" operation is actually defined as a pattern fragment that expands as
"``(xor x, -1)``", since the SelectionDAG does not have a native '``not``'
operation. Targets can define their own short-hand fragments as they see fit.
@ -2642,59 +2642,6 @@ to ensure valid register usage and operand types.
The AMDGPU backend
------------------
The AMDGPU code generator lives in the lib/Target/AMDGPU directory, and is an
open source native AMD GCN ISA code generator.
Target triples supported
^^^^^^^^^^^^^^^^^^^^^^^^
The following are the known target triples that are supported by the AMDGPU
backend.
* **amdgcn--** --- AMD GCN GPUs (AMDGPU.7.0.0+)
* **amdgcn--amdhsa** --- AMD GCN GPUs (AMDGPU.7.0.0+) with HSA support
* **r600--** --- AMD GPUs HD2XXX-HD6XXX
Relocations
^^^^^^^^^^^
Supported relocatable fields are:
* **word32** --- This specifies a 32-bit field occupying 4 bytes with arbitrary
byte alignment. These values use the same byte order as other word values in
the AMD GPU architecture
* **word64** --- This specifies a 64-bit field occupying 8 bytes with arbitrary
byte alignment. These values use the same byte order as other word values in
the AMD GPU architecture
Following notations are used for specifying relocation calculations:
* **A** --- Represents the addend used to compute the value of the relocatable
field
* **G** --- Represents the offset into the global offset table at which the
relocation entrys symbol will reside during execution.
* **GOT** --- Represents the address of the global offset table.
* **P** --- Represents the place (section offset or address) of the storage unit
being relocated (computed using ``r_offset``)
* **S** --- Represents the value of the symbol whose index resides in the
relocation entry
AMDGPU Backend generates *Elf64_Rela* relocation records with the following
supported relocation types:
========================== ===== ========== ==============================
Relocation type Value Field Calculation
========================== ===== ========== ==============================
``R_AMDGPU_NONE`` 0 ``none`` ``none``
``R_AMDGPU_ABS32_LO`` 1 ``word32`` (S + A) & 0xFFFFFFFF
``R_AMDGPU_ABS32_HI`` 2 ``word32`` (S + A) >> 32
``R_AMDGPU_ABS64`` 3 ``word64`` S + A
``R_AMDGPU_REL32`` 4 ``word32`` S + A - P
``R_AMDGPU_REL64`` 5 ``word64`` S + A - P
``R_AMDGPU_ABS32`` 6 ``word32`` S + A
``R_AMDGPU_GOTPCREL`` 7 ``word32`` G + GOT + A - P
``R_AMDGPU_GOTPCREL32_LO`` 8 ``word32`` (G + GOT + A - P) & 0xFFFFFFFF
``R_AMDGPU_GOTPCREL32_HI`` 9 ``word32`` (G + GOT + A - P) >> 32
``R_AMDGPU_REL32_LO`` 10 ``word32`` (S + A - P) & 0xFFFFFFFF
``R_AMDGPU_REL32_HI`` 11 ``word32`` (S + A - P) >> 32
========================== ===== ========== ==============================
The AMDGPU code generator lives in the ``lib/Target/AMDGPU``
directory. This code generator is capable of targeting a variety of
AMD GPU processors. Refer to :doc:`AMDGPUUsage` for more information.

View File

@ -34,10 +34,10 @@ There are some conventions that are not uniformly followed in the code base
(e.g. the naming convention). This is because they are relatively new, and a
lot of code was written before they were put in place. Our long term goal is
for the entire codebase to follow the convention, but we explicitly *do not*
want patches that do large-scale reformating of existing code. On the other
want patches that do large-scale reformatting of existing code. On the other
hand, it is reasonable to rename the methods of a class if you're about to
change it in some other way. Just do the reformating as a separate commit from
the functionality change.
change it in some other way. Just do the reformatting as a separate commit
from the functionality change.
The ultimate goal of these guidelines is to increase the readability and
maintainability of our common source base. If you have suggestions for topics to

View File

@ -77,6 +77,15 @@ OPTIONS
-verify``. With this option FileCheck will verify that input does not contain
warnings not covered by any ``CHECK:`` patterns.
.. option:: --enable-var-scope
Enables scope for regex variables.
Variables with names that start with ``$`` are considered global and
remain set throughout the file.
All other variables get undefined after each encountered ``CHECK-LABEL``.
.. option:: -version
Show the version number of this program.
@ -344,6 +353,9 @@ matched by the directive cannot also be matched by any other check present in
other unique identifiers. Conceptually, the presence of ``CHECK-LABEL`` divides
the input stream into separate blocks, each of which is processed independently,
preventing a ``CHECK:`` directive in one block matching a line in another block.
If ``--enable-var-scope`` is in effect, all local variables are cleared at the
beginning of the block.
For example,
.. code-block:: llvm
@ -436,6 +448,13 @@ were defined on. For example:
Can be useful if you want the operands of ``op`` to be the same register,
and don't care exactly which register it is.
If ``--enable-var-scope`` is in effect, variables with names that
start with ``$`` are considered to be global. All others variables are
local. All local variables get undefined at the beginning of each
CHECK-LABEL block. Global variables are not affected by CHECK-LABEL.
This makes it easier to ensure that individual tests are not affected
by variables set in preceding tests.
FileCheck Expressions
~~~~~~~~~~~~~~~~~~~~~

View File

@ -56,7 +56,7 @@ GENERAL OPTIONS
Search for :file:`{NAME}.cfg` and :file:`{NAME}.site.cfg` when searching for
test suites, instead of :file:`lit.cfg` and :file:`lit.site.cfg`.
.. option:: -D NAME, -D NAME=VALUE, --param NAME, --param NAME=VALUE
.. option:: -D NAME[=VALUE], --param NAME[=VALUE]
Add a user defined parameter ``NAME`` with the given ``VALUE`` (or the empty
string if not given). The meaning and use of these parameters is test suite
@ -80,6 +80,13 @@ OUTPUT OPTIONS
Show more information on test failures, for example the entire test output
instead of just the test result.
.. option:: -vv, --echo-all-commands
Echo all commands to stdout, as they are being executed.
This can be valuable for debugging test failures, as the last echoed command
will be the one which has failed.
This option implies ``--verbose``.
.. option:: -a, --show-all
Show more information about all tests, for example the entire test
@ -152,6 +159,30 @@ SELECTION OPTIONS
Run the tests in a random order.
.. option:: --num-shards=M
Divide the set of selected tests into ``M`` equal-sized subsets or
"shards", and run only one of them. Must be used with the
``--run-shard=N`` option, which selects the shard to run. The environment
variable ``LIT_NUM_SHARDS`` can also be used in place of this
option. These two options provide a coarse mechanism for paritioning large
testsuites, for parallel execution on separate machines (say in a large
testing farm).
.. option:: --run-shard=N
Select which shard to run, assuming the ``--num-shards=M`` option was
provided. The two options must be used together, and the value of ``N``
must be in the range ``1..M``. The environment variable
``LIT_RUN_SHARD`` can also be used in place of this option.
.. option:: --filter=REGEXP
Run only those tests whose name matches the regular expression specified in
``REGEXP``. The environment variable ``LIT_FILTER`` can be also used in place
of this option, which is especially useful in environments where the call
to ``lit`` is issued indirectly.
ADDITIONAL OPTIONS
------------------
@ -362,7 +393,7 @@ PRE-DEFINED SUBSTITUTIONS
~~~~~~~~~~~~~~~~~~~~~~~~~~
:program:`lit` provides various patterns that can be used with the RUN command.
These are defined in TestRunner.py.
These are defined in TestRunner.py. The base set of substitutions are:
========== ==============
Macro Substitution
@ -374,17 +405,13 @@ These are defined in TestRunner.py.
%t temporary file name unique to the test
%T temporary directory unique to the test
%% %
%/s same as %s but replace all / with \\
%/S same as %S but replace all / with \\
%/p same as %p but replace all / with \\
%/t same as %t but replace all / with \\
%/T same as %T but replace all / with \\
========== ==============
Further substitution patterns might be defined by each test module.
See the modules :ref:`local-configuration-files`.
Other substitutions are provided that are variations on this base set and
further substitution patterns can be defined by each test module. See the
modules :ref:`local-configuration-files`.
More information on the testing infrastucture can be found in the
More detailed information on substitutions can be found in the
:doc:`../TestingGuide`.
TEST RUN OUTPUT FORMAT

View File

@ -262,6 +262,12 @@ OPTIONS
The demangler is expected to read a newline-separated list of symbols from
stdin and write a newline-separated list of the same length to stdout.
.. option:: -num-threads=N, -j=N
Use N threads to write file reports (only applicable when -output-dir is
specified). When N=0, llvm-cov auto-detects an appropriate number of threads to
use. This is the default.
.. option:: -line-coverage-gt=<N>
Show code coverage only for functions with line coverage greater than the
@ -322,6 +328,10 @@ OPTIONS
universal binary or to use an architecture that does not match a
non-universal binary.
.. option:: -show-functions
Show coverage summaries for each function.
.. program:: llvm-cov export
.. _llvm-cov-export:

View File

@ -134,9 +134,6 @@ OPTIONS
BUGS
----
* :program:`llvm-nm` cannot demangle C++ mangled names, like GNU :program:`nm`
can.
* :program:`llvm-nm` does not support the full set of arguments that GNU
:program:`nm` does.

View File

@ -192,10 +192,20 @@ OPTIONS
information is dumped in a more human readable form (also in text) with
annotations.
.. option:: -topn=n
Instruct the profile dumper to show the top ``n`` functions with the
hottest basic blocks in the summary section. By default, the topn functions
are not dumped.
.. option:: -sample
Specify that the input profile is a sample-based profile.
.. option:: -memop-sizes
Show the profiled sizes of the memory intrinsic calls for shown functions.
EXIT STATUS
-----------

View File

@ -72,16 +72,7 @@ Other documents, collections, notes
AMDGPU
------
* `AMD R6xx shader ISA <http://developer.amd.com/wordpress/media/2012/10/R600_Instruction_Set_Architecture.pdf>`_
* `AMD R7xx shader ISA <http://developer.amd.com/wordpress/media/2012/10/R700-Family_Instruction_Set_Architecture.pdf>`_
* `AMD Evergreen shader ISA <http://developer.amd.com/wordpress/media/2012/10/AMD_Evergreen-Family_Instruction_Set_Architecture.pdf>`_
* `AMD Cayman/Trinity shader ISA <http://developer.amd.com/wordpress/media/2012/10/AMD_HD_6900_Series_Instruction_Set_Architecture.pdf>`_
* `AMD Southern Islands Series ISA <http://developer.amd.com/wordpress/media/2012/12/AMD_Southern_Islands_Instruction_Set_Architecture.pdf>`_
* `AMD Sea Islands Series ISA <http://developer.amd.com/wordpress/media/2013/07/AMD_Sea_Islands_Instruction_Set_Architecture.pdf>`_
* `AMD GCN3 Instruction Set Architecture <http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_GCN3_Instruction_Set_Architecture_rev1.1.pdf>`__
* `AMD GPU Programming Guide <http://developer.amd.com/download/AMD_Accelerated_Parallel_Processing_OpenCL_Programming_Guide.pdf>`_
* `AMD Compute Resources <http://developer.amd.com/tools/heterogeneous-computing/amd-accelerated-parallel-processing-app-sdk/documentation/>`_
* `AMDGPU Compute Application Binary Interface <https://github.com/RadeonOpenCompute/ROCm-ComputeABI-Doc/blob/master/AMDGPU-ABI.md>`__
Refer to :doc:`AMDGPUUsage` for additional documentation.
RISC-V
------

View File

@ -89,7 +89,7 @@ and 6 after which the coroutine will be destroyed.
The LLVM IR for this coroutine looks like this:
.. code-block:: none
.. code-block:: llvm
define i8* @f(i32 %n) {
entry:
@ -110,7 +110,7 @@ The LLVM IR for this coroutine looks like this:
call void @free(i8* %mem)
br label %suspend
suspend:
call void @llvm.coro.end(i8* %hdl, i1 false)
%unused = call i1 @llvm.coro.end(i8* %hdl, i1 false)
ret i8* %hdl
}
@ -156,7 +156,7 @@ We also store addresses of the resume and destroy functions so that the
when its identity cannot be determined statically at compile time. For our
example, the coroutine frame will be:
.. code-block:: text
.. code-block:: llvm
%f.frame = type { void (%f.frame*)*, void (%f.frame*)*, i32 }
@ -164,7 +164,7 @@ After resume and destroy parts are outlined, function `f` will contain only the
code responsible for creation and initialization of the coroutine frame and
execution of the coroutine until a suspend point is reached:
.. code-block:: none
.. code-block:: llvm
define i8* @f(i32 %n) {
entry:
@ -224,7 +224,7 @@ In the entry block, we will call `coro.alloc`_ intrinsic that will return `true`
when dynamic allocation is required, and `false` if dynamic allocation is
elided.
.. code-block:: none
.. code-block:: llvm
entry:
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
@ -242,7 +242,7 @@ In the cleanup block, we will make freeing the coroutine frame conditional on
`coro.free`_ intrinsic. If allocation is elided, `coro.free`_ returns `null`
thus skipping the deallocation code:
.. code-block:: text
.. code-block:: llvm
cleanup:
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
@ -286,7 +286,7 @@ Let's consider the coroutine that has more than one suspend point:
Matching LLVM code would look like (with the rest of the code remaining the same
as the code in the previous section):
.. code-block:: text
.. code-block:: llvm
loop:
%n.addr = phi i32 [ %n, %entry ], [ %inc, %loop.resume ]
@ -383,17 +383,17 @@ point when coroutine should be ready for resumption (namely, when a resume index
should be stored in the coroutine frame, so that it can be resumed at the
correct resume point):
.. code-block:: text
.. code-block:: llvm
if.true:
%save1 = call token @llvm.coro.save(i8* %hdl)
call void async_op1(i8* %hdl)
call void @async_op1(i8* %hdl)
%suspend1 = call i1 @llvm.coro.suspend(token %save1, i1 false)
switch i8 %suspend1, label %suspend [i8 0, label %resume1
i8 1, label %cleanup]
if.false:
%save2 = call token @llvm.coro.save(i8* %hdl)
call void async_op2(i8* %hdl)
call void @async_op2(i8* %hdl)
%suspend2 = call i1 @llvm.coro.suspend(token %save2, i1 false)
switch i8 %suspend1, label %suspend [i8 0, label %resume2
i8 1, label %cleanup]
@ -411,7 +411,7 @@ be used to communicate with the coroutine. This distinguished alloca is called
The following coroutine designates a 32 bit integer `promise` and uses it to
store the current value produced by a coroutine.
.. code-block:: text
.. code-block:: llvm
define i8* @f(i32 %n) {
entry:
@ -440,7 +440,7 @@ store the current value produced by a coroutine.
call void @free(i8* %mem)
br label %suspend
suspend:
call void @llvm.coro.end(i8* %hdl, i1 false)
%unused = call i1 @llvm.coro.end(i8* %hdl, i1 false)
ret i8* %hdl
}
@ -692,7 +692,7 @@ a coroutine user are responsible to makes sure there is no data races.
Example:
""""""""
.. code-block:: text
.. code-block:: llvm
define i8* @f(i32 %n) {
entry:
@ -812,7 +812,7 @@ pointer that was returned by prior `coro.begin` call.
Example (custom deallocation function):
"""""""""""""""""""""""""""""""""""""""
.. code-block:: text
.. code-block:: llvm
cleanup:
%mem = call i8* @llvm.coro.free(token %id, i8* %frame)
@ -827,7 +827,7 @@ Example (custom deallocation function):
Example (standard deallocation functions):
""""""""""""""""""""""""""""""""""""""""""
.. code-block:: text
.. code-block:: llvm
cleanup:
%mem = call i8* @llvm.coro.free(token %id, i8* %frame)
@ -846,7 +846,7 @@ Overview:
"""""""""
The '``llvm.coro.alloc``' intrinsic returns `true` if dynamic allocation is
required to obtain a memory for the corutine frame and `false` otherwise.
required to obtain a memory for the coroutine frame and `false` otherwise.
Arguments:
""""""""""
@ -864,7 +864,7 @@ when possible.
Example:
""""""""
.. code-block:: text
.. code-block:: llvm
entry:
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
@ -955,41 +955,90 @@ A frontend should emit exactly one `coro.id` intrinsic per coroutine.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
::
declare void @llvm.coro.end(i8* <handle>, i1 <unwind>)
declare i1 @llvm.coro.end(i8* <handle>, i1 <unwind>)
Overview:
"""""""""
The '``llvm.coro.end``' marks the point where execution of the resume part of
the coroutine should end and control returns back to the caller.
the coroutine should end and control should return to the caller.
Arguments:
""""""""""
The first argument should refer to the coroutine handle of the enclosing coroutine.
The first argument should refer to the coroutine handle of the enclosing
coroutine. A frontend is allowed to supply null as the first parameter, in this
case `coro-early` pass will replace the null with an appropriate coroutine
handle value.
The second argument should be `true` if this coro.end is in the block that is
part of the unwind sequence leaving the coroutine body due to exception prior to
the first reaching any suspend points, and `false` otherwise.
part of the unwind sequence leaving the coroutine body due to an exception and
`false` otherwise.
Semantics:
""""""""""
The `coro.end`_ intrinsic is a no-op during an initial invocation of the
coroutine. When the coroutine resumes, the intrinsic marks the point when
coroutine need to return control back to the caller.
The purpose of this intrinsic is to allow frontends to mark the cleanup and
other code that is only relevant during the initial invocation of the coroutine
and should not be present in resume and destroy parts.
This intrinsic is removed by the CoroSplit pass when a coroutine is split into
the start, resume and destroy parts. In start part, the intrinsic is removed,
in resume and destroy parts, it is replaced with `ret void` instructions and
This intrinsic is lowered when a coroutine is split into
the start, resume and destroy parts. In the start part, it is a no-op,
in resume and destroy parts, it is replaced with `ret void` instruction and
the rest of the block containing `coro.end` instruction is discarded.
In landing pads it is replaced with an appropriate instruction to unwind to
caller.
caller. The handling of coro.end differs depending on whether the target is
using landingpad or WinEH exception model.
A frontend is allowed to supply null as the first parameter, in this case
`coro-early` pass will replace the null with an appropriate coroutine handle
value.
For landingpad based exception model, it is expected that frontend uses the
`coro.end`_ intrinsic as follows:
.. code-block:: llvm
ehcleanup:
%InResumePart = call i1 @llvm.coro.end(i8* null, i1 true)
br i1 %InResumePart, label %eh.resume, label %cleanup.cont
cleanup.cont:
; rest of the cleanup
eh.resume:
%exn = load i8*, i8** %exn.slot, align 8
%sel = load i32, i32* %ehselector.slot, align 4
%lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
%lpad.val29 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
resume { i8*, i32 } %lpad.val29
The `CoroSpit` pass replaces `coro.end` with ``True`` in the resume functions,
thus leading to immediate unwind to the caller, whereas in start function it
is replaced with ``False``, thus allowing to proceed to the rest of the cleanup
code that is only needed during initial invocation of the coroutine.
For Windows Exception handling model, a frontend should attach a funclet bundle
referring to an enclosing cleanuppad as follows:
.. code-block:: llvm
ehcleanup:
%tok = cleanuppad within none []
%unused = call i1 @llvm.coro.end(i8* null, i1 true) [ "funclet"(token %tok) ]
cleanupret from %tok unwind label %RestOfTheCleanup
The `CoroSplit` pass, if the funclet bundle is present, will insert
``cleanupret from %tok unwind to caller`` before
the `coro.end`_ intrinsic and will remove the rest of the block.
The following table summarizes the handling of `coro.end`_ intrinsic.
+--------------------------+-------------------+-------------------------------+
| | In Start Function | In Resume/Destroy Functions |
+--------------------------+-------------------+-------------------------------+
|unwind=false | nothing |``ret void`` |
+------------+-------------+-------------------+-------------------------------+
| | WinEH | nothing |``cleanupret unwind to caller``|
|unwind=true +-------------+-------------------+-------------------------------+
| | Landingpad | nothing | nothing |
+------------+-------------+-------------------+-------------------------------+
.. _coro.suspend:
.. _suspend points:
@ -1025,7 +1074,7 @@ basic blocks.
Example (normal suspend point):
"""""""""""""""""""""""""""""""
.. code-block:: text
.. code-block:: llvm
%0 = call i8 @llvm.coro.suspend(token none, i1 false)
switch i8 %0, label %suspend [i8 0, label %resume
@ -1034,7 +1083,7 @@ Example (normal suspend point):
Example (final suspend point):
""""""""""""""""""""""""""""""
.. code-block:: text
.. code-block:: llvm
while.end:
%s.final = call i8 @llvm.coro.suspend(token none, i1 true)
@ -1095,10 +1144,10 @@ In such a case, a coroutine should be ready for resumption prior to a call to
a different thread possibly prior to `async_op` call returning control back
to the coroutine:
.. code-block:: text
.. code-block:: llvm
%save1 = call token @llvm.coro.save(i8* %hdl)
call void async_op1(i8* %hdl)
call void @async_op1(i8* %hdl)
%suspend1 = call i1 @llvm.coro.suspend(token %save1, i1 false)
switch i8 %suspend1, label %suspend [i8 0, label %resume1
i8 1, label %cleanup]

View File

@ -21,7 +21,7 @@ to know how it works under the hood. A prior knowledge of how Clang's profile
guided optimization works is useful, but not required.
We start by showing how to use LLVM and Clang for code coverage analysis,
then we briefly desribe LLVM's code coverage mapping format and the
then we briefly describe LLVM's code coverage mapping format and the
way that Clang and LLVM's code coverage tool work with this format. After
the basics are down, more advanced features of the coverage mapping format
are discussed - such as the data structures, LLVM IR representation and

View File

@ -62,7 +62,7 @@ way to see what other people are interested in and watching the flow of the
project as a whole.
We recommend that active developers register an email account with `LLVM
Bugzilla <http://llvm.org/bugs/>`_ and preferably subscribe to the `llvm-bugs
Bugzilla <https://bugs.llvm.org/>`_ and preferably subscribe to the `llvm-bugs
<http://lists.llvm.org/mailman/listinfo/llvm-bugs>`_ email list to keep track
of bugs and enhancements occurring in LLVM. We really appreciate people who are
proactive at catching incoming bugs in their components and dealing with them
@ -261,7 +261,7 @@ the future that the change is responsible for. For example:
* The changes should not cause performance or correctness regressions in code
compiled by LLVM on all applicable targets.
* You are expected to address any `Bugzilla bugs <http://llvm.org/bugs/>`_ that
* You are expected to address any `Bugzilla bugs <https://bugs.llvm.org/>`_ that
result from your change.
We prefer for this to be handled before submission but understand that it isn't

View File

@ -0,0 +1,199 @@
=========================================
A guide to Dockerfiles for building LLVM
=========================================
Introduction
============
You can find a number of sources to build docker images with LLVM components in
``llvm/utils/docker``. They can be used by anyone who wants to build the docker
images for their own use, or as a starting point for someone who wants to write
their own Dockerfiles.
We currently provide Dockerfiles with ``debian8`` and ``nvidia-cuda`` base images.
We also provide an ``example`` image, which contains placeholders that one would need
to fill out in order to produce Dockerfiles for a new docker image.
Why?
----
Docker images provide a way to produce binary distributions of
software inside a controlled environment. Having Dockerfiles to builds docker images
inside LLVM repo makes them much more discoverable than putting them into any other
place.
Docker basics
-------------
If you've never heard about Docker before, you might find this section helpful
to get a very basic explanation of it.
`Docker <https://www.docker.com/>`_ is a popular solution for running programs in
an isolated and reproducible environment, especially to maintain releases for
software deployed to large distributed fleets.
It uses linux kernel namespaces and cgroups to provide a lightweight isolation
inside currently running linux kernel.
A single active instance of dockerized environment is called a *docker
container*.
A snapshot of a docker container filesystem is called a *docker image*.
One can start a container from a prebuilt docker image.
Docker images are built from a so-called *Dockerfile*, a source file written in
a specialized language that defines instructions to be used when build
the docker image (see `official
documentation <https://docs.docker.com/engine/reference/builder/>`_ for more
details). A minimal Dockerfile typically contains a base image and a number
of RUN commands that have to be executed to build the image. When building a new
image, docker will first download your base image, mount its filesystem as
read-only and then add a writable overlay on top of it to keep track of all
filesystem modifications, performed while building your image. When the build
process is finished, a diff between your image's final filesystem state and the
base image's filesystem is stored in the resulting image.
Overview
========
The ``llvm/utils/docker`` folder contains Dockerfiles and simple bash scripts to
serve as a basis for anyone who wants to create their own Docker image with
LLVM components, compiled from sources. The sources are checked out from the
upstream svn repository when building the image.
Inside each subfolder we host Dockerfiles for two images:
- ``build/`` image is used to compile LLVM, it installs a system compiler and all
build dependencies of LLVM. After the build process is finished, the build
image will have an archive with compiled components at ``/tmp/clang.tar.gz``.
- ``release/`` image usually only contains LLVM components, compiled by the
``build/`` image, and also libstdc++ and binutils to make image minimally
useful for C++ development. The assumption is that you usually want clang to
be one of the provided components.
To build both of those images, use ``build_docker_image.sh`` script.
It will checkout LLVM sources and build clang in the ``build`` container, copy results
of the build to the local filesystem and then build the ``release`` container using
those. The ``build_docker_image.sh`` accepts a list of LLVM repositories to
checkout, and arguments for CMake invocation.
If you want to write your own docker image, start with an ``example/`` subfolder.
It provides incomplete Dockerfiles with (very few) FIXMEs explaining the steps
you need to take in order to make your Dockerfiles functional.
Usage
=====
The ``llvm/utils/build_docker_image.sh`` script provides a rather high degree of
control on how to run the build. It allows you to specify the projects to
checkout from svn and provide a list of CMake arguments to use during when
building LLVM inside docker container.
Here's a very simple example of getting a docker image with clang binary,
compiled by the system compiler in the debian8 image:
.. code-block:: bash
./llvm/utils/docker/build_docker_image.sh \
--source debian8 \
--docker-repository clang-debian8 --docker-tag "staging" \
-p clang -i install-clang -i install-clang-headers \
-- \
-DCMAKE_BUILD_TYPE=Release
Note that a build like that doesn't use a 2-stage build process that
you probably want for clang. Running a 2-stage build is a little more intricate,
this command will do that:
.. code-block:: bash
# Run a 2-stage build.
# LLVM_TARGETS_TO_BUILD=Native is to reduce stage1 compile time.
# Options, starting with BOOTSTRAP_* are passed to stage2 cmake invocation.
./build_docker_image.sh \
--source debian8 \
--docker-repository clang-debian8 --docker-tag "staging" \
-p clang -i stage2-install-clang -i stage2-install-clang-headers \
-- \
-DLLVM_TARGETS_TO_BUILD=Native -DCMAKE_BUILD_TYPE=Release \
-DBOOTSTRAP_CMAKE_BUILD_TYPE=Release \
-DCLANG_ENABLE_BOOTSTRAP=ON -DCLANG_BOOTSTRAP_TARGETS="install-clang;install-clang-headers"
This will produce two images, a release image ``clang-debian8:staging`` and a
build image ``clang-debian8-build:staging`` from the latest upstream revision.
After the image is built you can run bash inside a container based on your
image like this:
.. code-block:: bash
docker run -ti clang-debian8:staging bash
Now you can run bash commands as you normally would:
.. code-block:: bash
root@80f351b51825:/# clang -v
clang version 5.0.0 (trunk 305064)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /bin
Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.8
Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.8.4
Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9
Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9.2
Selected GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9
Candidate multilib: .;@m64
Selected multilib: .;@m64
Which image should I choose?
============================
We currently provide two images: debian8-based and nvidia-cuda-based. They
differ in the base image that they use, i.e. they have a different set of
preinstalled binaries. Debian8 is very minimal, nvidia-cuda is larger, but has
preinstalled CUDA libraries and allows to access a GPU, installed on your
machine.
If you need a minimal linux distribution with only clang and libstdc++ included,
you should try debian8-based image.
If you want to use CUDA libraries and have access to a GPU on your machine,
you should choose nvidia-cuda-based image and use `nvidia-docker
<https://github.com/NVIDIA/nvidia-docker>`_ to run your docker containers. Note
that you don't need nvidia-docker to build the images, but you need it in order
to have an access to GPU from a docker container that is running the built
image.
If you have a different use-case, you could create your own image based on
``example/`` folder.
Any docker image can be built and run using only the docker binary, i.e. you can
run debian8 build on Fedora or any other Linux distribution. You don't need to
install CMake, compilers or any other clang dependencies. It is all handled
during the build process inside Docker's isolated environment.
Stable build
============
If you want a somewhat recent and somewhat stable build, use the
``branches/google/stable`` branch, i.e. the following command will produce a
debian8-based image using the latest ``google/stable`` sources for you:
.. code-block:: bash
./llvm/utils/docker/build_docker_image.sh \
-s debian8 --d clang-debian8 -t "staging" \
--branch branches/google/stable \
-p clang -i install-clang -i install-clang-headers \
-- \
-DCMAKE_BUILD_TYPE=Release
Minimizing docker image size
============================
Due to Docker restrictions we use two images (i.e., build and release folders)
for the release image to be as small as possible. It's much easier to achieve
that using two images, because Docker would store a filesystem layer for each
command in the Dockerfile, i.e. if you install some packages in one command,
then remove those in a separate command, the size of the resulting image will
still be proportinal to the size of an image with installed packages.
Therefore, we strive to provide a very simple release image which only copies
compiled clang and does not do anything else.
Docker 1.13 added a ``--squash`` flag that allows to flatten the layers of the
image, i.e. remove the parts that were actually deleted. That is an easier way
to produce the smallest images possible by using just a single image. We do not
use it because as of today the flag is in experimental stage and not everyone
may have the latest docker version available. When the flag is out of
experimental stage, we should investigate replacing two images approach with
just a single image, built using ``--squash`` flag.

View File

@ -61,7 +61,7 @@ types ``IMAGE_REL_I386_SECREL`` (32-bit) or ``IMAGE_REL_AMD64_SECREL`` (64-bit).
the target. It corresponds to the COFF relocation types
``IMAGE_REL_I386_SECTION`` (32-bit) or ``IMAGE_REL_AMD64_SECTION`` (64-bit).
.. code-block:: gas
.. code-block:: none
.section .debug$S,"rn"
.long 4
@ -204,9 +204,49 @@ For example, the following code creates two sections named ``.text``.
The unique number is not present in the resulting object at all. It is just used
in the assembler to differentiate the sections.
The 'o' flag is mapped to SHF_LINK_ORDER. If it is present, a symbol
must be given that identifies the section to be placed is the
.sh_link.
.. code-block:: gas
.section .foo,"a",@progbits
.Ltmp:
.section .bar,"ao",@progbits,.Ltmp
which is equivalent to just
.. code-block:: gas
.section .foo,"a",@progbits
.section .bar,"ao",@progbits,.foo
Target Specific Behaviour
=========================
X86
---
Relocations
^^^^^^^^^^^
``@ABS8`` can be applied to symbols which appear as immediate operands to
instructions that have an 8-bit immediate form for that operand. It causes
the assembler to use the 8-bit form and an 8-bit relocation (e.g. ``R_386_8``
or ``R_X86_64_8``) for the symbol.
For example:
.. code-block:: gas
cmpq $foo@ABS8, %rdi
This causes the assembler to select the form of the 64-bit ``cmpq`` instruction
that takes an 8-bit immediate operand that is sign extended to 64 bits, as
opposed to ``cmpq $foo, %rdi`` which takes a 32-bit immediate operand. This
is also not the same as ``cmpb $foo, %dil``, which is an 8-bit comparison.
Windows on ARM
--------------

View File

@ -47,12 +47,18 @@ The format of this section is
uint32 : NumFaultingPCs
uint32 : Reserved (expected to be 0)
FunctionFaultInfo[NumFaultingPCs] {
uint32 : FaultKind = FaultMaps::FaultingLoad (only legal value currently)
uint32 : FaultKind
uint32 : FaultingPCOffset
uint32 : HandlerPCOffset
}
}
FailtKind describes the reason of expected fault. Currently three kind
of faults are supported:
1. ``FaultMaps::FaultingLoad`` - fault due to load from memory.
2. ``FaultMaps::FaultingLoadStore`` - fault due to instruction load and store.
3. ``FaultMaps::FaultingStore`` - fault due to store to memory.
The ``ImplicitNullChecks`` pass
===============================

View File

@ -9,10 +9,11 @@ Introduction
============
This document seeks to dispel the mystery and confusion surrounding LLVM's
`GetElementPtr <LangRef.html#i_getelementptr>`_ (GEP) instruction. Questions
about the wily GEP instruction are probably the most frequently occurring
questions once a developer gets down to coding with LLVM. Here we lay out the
sources of confusion and show that the GEP instruction is really quite simple.
`GetElementPtr <LangRef.html#getelementptr-instruction>`_ (GEP) instruction.
Questions about the wily GEP instruction are probably the most frequently
occurring questions once a developer gets down to coding with LLVM. Here we lay
out the sources of confusion and show that the GEP instruction is really quite
simple.
Address Computation
===================
@ -26,7 +27,7 @@ questions.
What is the first index of the GEP instruction?
-----------------------------------------------
Quick answer: The index stepping through the first operand.
Quick answer: The index stepping through the second operand.
The confusion with the first index usually arises from thinking about the
GetElementPtr instruction as if it was a C index operator. They aren't the
@ -58,7 +59,7 @@ Sometimes this question gets rephrased as:
won't be dereferenced?*
The answer is simply because memory does not have to be accessed to perform the
computation. The first operand to the GEP instruction must be a value of a
computation. The second operand to the GEP instruction must be a value of a
pointer type. The value of the pointer is provided directly to the GEP
instruction as an operand without any need for accessing memory. It must,
therefore be indexed and requires an index operand. Consider this example:
@ -79,8 +80,8 @@ therefore be indexed and requires an index operand. Consider this example:
In this "C" example, the front end compiler (Clang) will generate three GEP
instructions for the three indices through "P" in the assignment statement. The
function argument ``P`` will be the first operand of each of these GEP
instructions. The second operand indexes through that pointer. The third
function argument ``P`` will be the second operand of each of these GEP
instructions. The third operand indexes through that pointer. The fourth
operand will be the field offset into the ``struct munger_struct`` type, for
either the ``f1`` or ``f2`` field. So, in LLVM assembly the ``munge`` function
looks like:
@ -99,8 +100,8 @@ looks like:
ret void
}
In each case the first operand is the pointer through which the GEP instruction
starts. The same is true whether the first operand is an argument, allocated
In each case the second operand is the pointer through which the GEP instruction
starts. The same is true whether the second operand is an argument, allocated
memory, or a global variable.
To make this clear, let's consider a more obtuse example:
@ -158,11 +159,11 @@ confusion:
i32 }*``. That is, ``%MyStruct`` is a pointer to a structure containing a
pointer to a ``float`` and an ``i32``.
#. Point #1 is evidenced by noticing the type of the first operand of the GEP
#. Point #1 is evidenced by noticing the type of the second operand of the GEP
instruction (``%MyStruct``) which is ``{ float*, i32 }*``.
#. The first index, ``i64 0`` is required to step over the global variable
``%MyStruct``. Since the first argument to the GEP instruction must always
``%MyStruct``. Since the second argument to the GEP instruction must always
be a value of pointer type, the first index steps through that pointer. A
value of 0 means 0 elements offset from that pointer.
@ -266,7 +267,7 @@ in the IR. In the future, it will probably be outright disallowed.
What effect do address spaces have on GEPs?
-------------------------------------------
None, except that the address space qualifier on the first operand pointer type
None, except that the address space qualifier on the second operand pointer type
always matches the address space qualifier on the result type.
How is GEP different from ``ptrtoint``, arithmetic, and ``inttoptr``?
@ -429,7 +430,8 @@ because LLVM has no restrictions on mixing types in addressing, loads or stores.
LLVM's type-based alias analysis pass uses metadata to describe a different type
system (such as the C type system), and performs type-based aliasing on top of
that. Further details are in the `language reference <LangRef.html#tbaa>`_.
that. Further details are in the
`language reference <LangRef.html#tbaa-metadata>`_.
What happens if a GEP computation overflows?
--------------------------------------------
@ -524,7 +526,7 @@ instruction:
#. The GEP instruction never accesses memory, it only provides pointer
computations.
#. The first operand to the GEP instruction is always a pointer and it must be
#. The second operand to the GEP instruction is always a pointer and it must be
indexed.
#. There are no superfluous indices for the GEP instruction.

View File

@ -52,6 +52,18 @@ Here's the short story for getting up and running quickly with LLVM:
* ``cd llvm/tools``
* ``svn co http://llvm.org/svn/llvm-project/cfe/trunk clang``
#. Checkout LLD linker **[Optional]**:
* ``cd where-you-want-llvm-to-live``
* ``cd llvm/tools``
* ``svn co http://llvm.org/svn/llvm-project/lld/trunk lld``
#. Checkout Polly Loop Optimizer **[Optional]**:
* ``cd where-you-want-llvm-to-live``
* ``cd llvm/tools``
* ``svn co http://llvm.org/svn/llvm-project/polly/trunk polly``
#. Checkout Compiler-RT (required to build the sanitizers) **[Optional]**:
* ``cd where-you-want-llvm-to-live``
@ -159,6 +171,8 @@ Linux PowerPC GCC, Clang
Solaris V9 (Ultrasparc) GCC
FreeBSD x86\ :sup:`1` GCC, Clang
FreeBSD amd64 GCC, Clang
NetBSD x86\ :sup:`1` GCC, Clang
NetBSD amd64 GCC, Clang
MacOS X\ :sup:`2` PowerPC GCC
MacOS X x86 GCC, Clang
Cygwin/Win32 x86\ :sup:`1, 3` GCC
@ -685,14 +699,14 @@ For developers to work with a git monorepo
.. note::
This set-up is using unofficial mirror hosted on GitHub, use with caution.
This set-up is using an unofficial mirror hosted on GitHub, use with caution.
To set up a clone of all the llvm projects using a unified repository:
.. code-block:: console
% export TOP_LEVEL_DIR=`pwd`
% git clone https://github.com/llvm-project/llvm-project/
% git clone https://github.com/llvm-project/llvm-project-20170507/ llvm-project
% cd llvm-project
% git config branch.master.rebase true
@ -719,10 +733,10 @@ Or a combination of multiple projects:
% cd $TOP_LEVEL_DIR
% mkdir clang-build && cd clang-build
% cmake -GNinja ../llvm-project/llvm -DLLVM_ENABLE_PROJECTS="clang;libcxx;compiler-rt"
% cmake -GNinja ../llvm-project/llvm -DLLVM_ENABLE_PROJECTS="clang;libcxx;libcxxabi"
A helper script is provided in `llvm/utils/git-svn/git-llvm`. After you add it
to your path, you can push committed changes upstream with `git llvm push`.
A helper script is provided in ``llvm/utils/git-svn/git-llvm``. After you add it
to your path, you can push committed changes upstream with ``git llvm push``.
.. code-block:: console
@ -731,10 +745,22 @@ to your path, you can push committed changes upstream with `git llvm push`.
While this is using SVN under the hood, it does not require any interaction from
you with git-svn.
After a few minutes, `git pull` should get back the changes as they were
commited. Note that a current limitation is that `git` does not directly record
file rename, and thus it is propagated to SVN as a combination of delete-add
instead of a file rename.
After a few minutes, ``git pull`` should get back the changes as they were
committed. Note that a current limitation is that ``git`` does not directly
record file rename, and thus it is propagated to SVN as a combination of
delete-add instead of a file rename.
The SVN revision of each monorepo commit can be found in the commit notes. git
does not fetch notes by default. The following commands will fetch the notes and
configure git to fetch future notes. Use ``git notes show $commit`` to look up
the SVN revision of a git commit. The notes show up ``git log``, and searching
the log is currently the recommended way to look up the git commit for a given
SVN revision.
.. code-block:: console
% git config --add remote.origin.fetch +refs/notes/commits:refs/notes/commits
% git fetch
If you are using `arc` to interact with Phabricator, you need to manually put it
at the root of the checkout:
@ -793,7 +819,8 @@ used by people developing LLVM.
+-------------------------+----------------------------------------------------+
| LLVM_ENABLE_SPHINX | Build sphinx-based documentation from the source |
| | code. This is disabled by default because it is |
| | slow and generates a lot of output. |
| | slow and generates a lot of output. Sphinx version |
| | 1.5 or later recommended. |
+-------------------------+----------------------------------------------------+
| LLVM_BUILD_LLVM_DYLIB | Generate libLLVM.so. This library contains a |
| | default set of LLVM components that can be |
@ -1138,7 +1165,7 @@ the `Command Guide <CommandGuide/index.html>`_.
``llc``
``llc`` is the LLVM backend compiler, which translates LLVM bitcode to a
native code assembly file or to C code (with the ``-march=c`` option).
native code assembly file.
``opt``

View File

@ -100,6 +100,10 @@ Here's the short story for getting up and running quickly with LLVM:
* CMake generates project files for all build types. To select a specific
build type, use the Configuration manager from the VS IDE or the
``/property:Configuration`` command line option when using MSBuild.
* By default, the Visual Studio project files generated by CMake use the
32-bit toolset. If you are developing on a 64-bit version of Windows and
want to use the 64-bit toolset, pass the ``-Thost=x64`` flag when
generating the Visual Studio solution. This requires CMake 3.8.0 or later.
6. Start Visual Studio

View File

@ -358,41 +358,6 @@ existing patterns (as any pattern we can select is by definition legal).
Expanding that to describe legalization actions is a much larger but
potentially useful project.
.. _milegalizer-scalar-narrow:
Scalar narrow types
^^^^^^^^^^^^^^^^^^^
In the AArch64 port, we currently mark as legal operations on narrow integer
types that have a legal equivalent in a wider type.
For example, this:
%2(GPR,s8) = G_ADD %0, %1
is selected to a 32-bit instruction:
%2(GPR32) = ADDWrr %0, %1
This avoids unnecessarily legalizing operations that can be seen as legal:
8-bit additions are supported, but happen to have a 32-bit result with the high
24 bits undefined.
``TODO``:
This has implications regarding vreg classes (as narrow values can now be
represented by wider vregs) and should be investigated further.
``TODO``:
In particular, s1 comparison results can be represented as wider values in
different ways.
SelectionDAG has the notion of BooleanContents, which allows targets to choose
what true and false are when in a larger register:
* ``ZeroOrOne`` --- if only 0 and 1 are valid bools, even in a larger register.
* ``ZeroOrMinusOne`` --- if -1 is true (common for vector instructions,
where compares produce -1).
* ``Undefined`` --- if only the low bit is relevant in determining truth.
.. _milegalizer-non-power-of-2:
Non-power of 2 types

View File

@ -7,7 +7,7 @@ Introduction
Building with link time optimization requires cooperation from
the system linker. LTO support on Linux systems requires that you use the
`gold linker`_ which supports LTO via plugins. This is the same mechanism
`gold linker`_ or ld.bfd from binutils >= 2.21.51.0.2, as they support LTO via plugins. This is the same mechanism
used by the `GCC LTO`_ project.
The LLVM gold plugin implements the gold plugin interface on top of
@ -23,24 +23,22 @@ The LLVM gold plugin implements the gold plugin interface on top of
How to build it
===============
You need to have gold with plugin support and build the LLVMgold plugin.
Check whether you have gold running ``/usr/bin/ld -v``. It will report "GNU
gold" or else "GNU ld" if not. If you have gold, check for plugin support
by running ``/usr/bin/ld -plugin``. If it complains "missing argument" then
you have plugin support. If not, such as an "unknown option" error then you
will either need to build gold or install a version with plugin support.
Check for plugin support by running ``/usr/bin/ld -plugin``. If it complains
"missing argument" then you have plugin support. If not, such as an "unknown option"
error then you will either need to build gold or install a recent version
of ld.bfd with plugin support and then build gold plugin.
* Download, configure and build gold with plugin support:
* Download, configure and build ld.bfd with plugin support:
.. code-block:: bash
$ git clone --depth 1 git://sourceware.org/git/binutils-gdb.git binutils
$ mkdir build
$ cd build
$ ../binutils/configure --enable-gold --enable-plugins --disable-werror
$ make all-gold
$ ../binutils/configure --disable-werror # ld.bfd includes plugin support by default
$ make all-ld
That should leave you with ``build/gold/ld-new`` which supports
That should leave you with ``build/ld/ld-new`` which supports
the ``-plugin`` option. Running ``make`` will additionally build
``build/binutils/ar`` and ``nm-new`` binaries supporting plugins.

View File

@ -6,9 +6,19 @@ Introduction
============
This document contains information about adding a build configuration and
buildslave to private slave builder to LLVM Buildbot Infrastructure
`<http://lab.llvm.org:8011>`_.
buildslave to private slave builder to LLVM Buildbot Infrastructure.
Buildmasters
============
There are two buildmasters running.
* The main buildmaster at `<http://lab.llvm.org:8011>`_. All builders attached
to this machine will notify commit authors every time they break the build.
* The staging buildbot at `<http://lab.llvm.org:8014>`_. All builders attached
to this machine will be completely silent by default when the build is broken.
Builders for experimental backends should generally be attached to this
buildmaster.
Steps To Add Builder To LLVM Buildbot
=====================================
@ -52,6 +62,9 @@ Here are the steps you can follow to do so:
lab.llvm.org:9990 \
<buildslave-access-name> <buildslave-access-password>
To point a slave to silent master please use lab.llvm.org:9994 instead
of lab.llvm.org:9990.
#. Fill the buildslave description and admin name/e-mail. Here is an
example of the buildslave description::
@ -73,6 +86,13 @@ Here are the steps you can follow to do so:
* slaves are added to ``buildbot/osuosl/master/config/slaves.py``
* builders are added to ``buildbot/osuosl/master/config/builders.py``
Please make sure your builder name and its builddir are unique through the file.
It is possible to whitelist email addresses to unconditionally receive notifications
on build failure; for this you'll need to add an ``InformativeMailNotifier`` to
``buildbot/osuosl/master/config/status.py``. This is particularly useful for the
staging buildmaster which is silent otherwise.
#. Send the buildslave access name and the access password directly to
`Galina Kistanova <mailto:gkistanova@gmail.com>`_, and wait till she
will let you know that your changes are applied and buildmaster is

View File

@ -19,7 +19,7 @@ section to narrow down the bug so that the person who fixes it will be able
to find the problem more easily.
Once you have a reduced test-case, go to `the LLVM Bug Tracking System
<http://llvm.org/bugs/enter_bug.cgi>`_ and fill out the form with the
<https://bugs.llvm.org/enter_bug.cgi>`_ and fill out the form with the
necessary details (note that you don't need to pick a category, just use
the "new-bugs" category if you're not sure). The bug description should
contain the following information:

View File

@ -38,36 +38,35 @@ Because attributes are no longer represented as a bit mask, you will need to
convert any code which does treat them as a bit mask to use the new query
methods on the Attribute class.
``AttributeSet``
================
``AttributeList``
=================
The ``AttributeSet`` class replaces the old ``AttributeList`` class. The
``AttributeSet`` stores a collection of Attribute objects for each kind of
object that may have an attribute associated with it: the function as a
whole, the return type, or the function's parameters. A function's attributes
are at index ``AttributeSet::FunctionIndex``; the return type's attributes are
at index ``AttributeSet::ReturnIndex``; and the function's parameters'
attributes are at indices 1, ..., n (where 'n' is the number of parameters).
Most methods on the ``AttributeSet`` class take an index parameter.
The ``AttributeList`` stores a collection of Attribute objects for each kind of
object that may have an attribute associated with it: the function as a whole,
the return type, or the function's parameters. A function's attributes are at
index ``AttributeList::FunctionIndex``; the return type's attributes are at
index ``AttributeList::ReturnIndex``; and the function's parameters' attributes
are at indices 1, ..., n (where 'n' is the number of parameters). Most methods
on the ``AttributeList`` class take an index parameter.
An ``AttributeSet`` is also a uniqued and immutable object. You create an
``AttributeSet`` through the ``AttributeSet::get`` methods. You can add and
remove attributes, which result in the creation of a new ``AttributeSet``.
An ``AttributeList`` is also a uniqued and immutable object. You create an
``AttributeList`` through the ``AttributeList::get`` methods. You can add and
remove attributes, which result in the creation of a new ``AttributeList``.
An ``AttributeSet`` object is designed to be passed around by value.
An ``AttributeList`` object is designed to be passed around by value.
Note: It is advised that you do *not* use the ``AttributeSet`` "introspection"
Note: It is advised that you do *not* use the ``AttributeList`` "introspection"
methods (e.g. ``Raw``, ``getRawPointer``, etc.). These methods break
encapsulation, and may be removed in a future release (i.e. LLVM 4.0).
``AttrBuilder``
===============
Lastly, we have a "builder" class to help create the ``AttributeSet`` object
Lastly, we have a "builder" class to help create the ``AttributeList`` object
without having to create several different intermediate uniqued
``AttributeSet`` objects. The ``AttrBuilder`` class allows you to add and
``AttributeList`` objects. The ``AttrBuilder`` class allows you to add and
remove attributes at will. The attributes won't be uniqued until you call the
appropriate ``AttributeSet::get`` method.
appropriate ``AttributeList::get`` method.
An ``AttrBuilder`` object is *not* designed to be passed around by value. It
should be passed by reference.

View File

@ -54,7 +54,7 @@ handled by another build system (See: :doc:`CMake <CMake>`).
The build system implementation will load the relevant contents of the
LLVMBuild files and use that to drive the actual project build.
Typically, the build system will only need to load this information at
"configure" time, and use it to generative native information. Build
"configure" time, and use it to generate native information. Build
systems will also handle automatically reconfiguring their information
when the contents of the ``LLVMBuild.txt`` files change.

File diff suppressed because it is too large Load Diff

View File

@ -38,6 +38,13 @@ B
**BB Vectorization**
Basic-Block Vectorization
**BDCE**
Bit-tracking dead code elimination. Some bit-wise instructions (shifts,
ands, ors, etc.) "kill" some of their input bits -- that is, they make it
such that those bits can be either zero or one without affecting control or
data flow of a program. The BDCE pass removes instructions that only
compute these dead bits.
**BURS**
Bottom Up Rewriting System --- A method of instruction selection for code
generation. An example is the `BURG
@ -102,6 +109,13 @@ G
Garbage Collection. The practice of using reachability analysis instead of
explicit memory management to reclaim unused memory.
**GVN**
Global Value Numbering. GVN is a pass that partitions values computed by a
function into congruence classes. Values ending up in the same congruence
class are guaranteed to be the same for every execution of the program.
In that respect, congruency is a compile-time approximation of equivalence
of values at runtime.
H
-
@ -182,7 +196,7 @@ P
**PR**
Problem report. A bug filed on `the LLVM Bug Tracking System
<http://llvm.org/bugs/enter_bug.cgi>`_.
<https://bugs.llvm.org/enter_bug.cgi>`_.
**PRE**
Partial Redundancy Elimination
@ -242,6 +256,14 @@ S
Superword-Level Parallelism, same as :ref:`Basic-Block Vectorization
<lexicon-bb-vectorization>`.
**Splat**
Splat refers to a vector of identical scalar elements.
The term is based on the PowerPC Altivec instructions that provided
this functionality in hardware. For example, "vsplth" and the corresponding
software intrinsic "vec_splat()". Examples of other hardware names for this
action include "duplicate" (ARM) and "broadcast" (x86).
**SRoA**
Scalar Replacement of Aggregates

View File

@ -87,10 +87,16 @@ Some important things to remember about fuzz targets:
* Usually, the narrower the target the better. E.g. if your target can parse several data formats, split it into several targets, one per format.
Building
--------
Fuzzer Usage
------------
Next, build the libFuzzer library as a static archive, without any sanitizer
Very recent versions of Clang (> April 20 2017) include libFuzzer,
and no installation is necessary.
In order to fuzz your binary, use the `-fsanitize=fuzzer` flag during the compilation::
clang -fsanitize=fuzzer,address mytarget.c
Otherwise, build the libFuzzer library as a static archive, without any sanitizer
options. Note that the libFuzzer library contains the ``main()`` function:
.. code-block:: console
@ -299,6 +305,10 @@ The most important command line options are:
- 1 : close ``stdout``
- 2 : close ``stderr``
- 3 : close both ``stdout`` and ``stderr``.
``-print_coverage``
If 1, print coverage information as text at exit.
``-dump_coverage``
If 1, dump coverage information as a .sancov file at exit.
For the full list of flags run the fuzzer binary with ``-help=1``.
@ -537,12 +547,19 @@ You can get the coverage for your corpus like this:
.. code-block:: console
ASAN_OPTIONS=coverage=1 ./fuzzer CORPUS_DIR -runs=0
./fuzzer CORPUS_DIR -runs=0 -print_coverage=1
This will run all tests in the CORPUS_DIR but will not perform any fuzzing.
At the end of the process it will dump a single ``.sancov`` file with coverage
information. See SanitizerCoverage_ for details on querying the file using the
``sancov`` tool.
At the end of the process it will print text describing what code has been covered and what hasn't.
Alternatively, use
.. code-block:: console
./fuzzer CORPUS_DIR -runs=0 -dump_coverage=1
which will dump a ``.sancov`` file with coverage information.
See SanitizerCoverage_ for details on querying the file using the ``sancov`` tool.
You may also use other ways to visualize coverage,
e.g. using `Clang coverage <http://clang.llvm.org/docs/SourceBasedCodeCoverage.html>`_,
@ -570,7 +587,7 @@ The simplest way is to have a statically initialized global object inside
Alternatively, you may define an optional init function and it will receive
the program arguments that you can read and modify. Do this **only** if you
realy need to access ``argv``/``argc``.
really need to access ``argv``/``argc``.
.. code-block:: c++
@ -728,6 +745,7 @@ to crash on invalid inputs.
Examples: regular expression matchers, text or binary format parsers, compression,
network, crypto.
Trophies
========
* GLIBC: https://sourceware.org/glibc/wiki/FuzzingLibc
@ -768,10 +786,12 @@ Trophies
* LLVM: `Clang <https://llvm.org/bugs/show_bug.cgi?id=23057>`_, `Clang-format <https://llvm.org/bugs/show_bug.cgi?id=23052>`_, `libc++ <https://llvm.org/bugs/show_bug.cgi?id=24411>`_, `llvm-as <https://llvm.org/bugs/show_bug.cgi?id=24639>`_, `Demangler <https://bugs.chromium.org/p/chromium/issues/detail?id=606626>`_, Disassembler: http://reviews.llvm.org/rL247405, http://reviews.llvm.org/rL247414, http://reviews.llvm.org/rL247416, http://reviews.llvm.org/rL247417, http://reviews.llvm.org/rL247420, http://reviews.llvm.org/rL247422.
* Tensorflow: `[1] <https://github.com/tensorflow/tensorflow/commit/7231d01fcb2cd9ef9ffbfea03b724892c8a4026e>`__
* Tensorflow: `[1] <https://da-data.blogspot.com/2017/01/finding-bugs-in-tensorflow-with.html>`__
* Ffmpeg: `[1] <https://github.com/FFmpeg/FFmpeg/commit/c92f55847a3d9cd12db60bfcd0831ff7f089c37c>`__ `[2] <https://github.com/FFmpeg/FFmpeg/commit/25ab1a65f3acb5ec67b53fb7a2463a7368f1ad16>`__ `[3] <https://github.com/FFmpeg/FFmpeg/commit/85d23e5cbc9ad6835eef870a5b4247de78febe56>`__ `[4] <https://github.com/FFmpeg/FFmpeg/commit/04bd1b38ee6b8df410d0ab8d4949546b6c4af26a>`__
* `Wireshark <https://bugs.wireshark.org/bugzilla/buglist.cgi?bug_status=UNCONFIRMED&bug_status=CONFIRMED&bug_status=IN_PROGRESS&bug_status=INCOMPLETE&bug_status=RESOLVED&bug_status=VERIFIED&f0=OP&f1=OP&f2=product&f3=component&f4=alias&f5=short_desc&f7=content&f8=CP&f9=CP&j1=OR&o2=substring&o3=substring&o4=substring&o5=substring&o6=substring&o7=matches&order=bug_id%20DESC&query_format=advanced&v2=libfuzzer&v3=libfuzzer&v4=libfuzzer&v5=libfuzzer&v6=libfuzzer&v7=%22libfuzzer%22>`_
.. _pcre2: http://www.pcre.org/
.. _AFL: http://lcamtuf.coredump.cx/afl/
.. _Radamsa: https://github.com/aoh/radamsa

View File

@ -39,37 +39,87 @@ MIR Testing Guide
You can use the MIR format for testing in two different ways:
- You can write MIR tests that invoke a single code generation pass using the
``run-pass`` option in llc.
``-run-pass`` option in llc.
- You can use llc's ``stop-after`` option with existing or new LLVM assembly
- You can use llc's ``-stop-after`` option with existing or new LLVM assembly
tests and check the MIR output of a specific code generation pass.
Testing Individual Code Generation Passes
-----------------------------------------
The ``run-pass`` option in llc allows you to create MIR tests that invoke
just a single code generation pass. When this option is used, llc will parse
an input MIR file, run the specified code generation pass, and print the
resulting MIR to the standard output stream.
The ``-run-pass`` option in llc allows you to create MIR tests that invoke just
a single code generation pass. When this option is used, llc will parse an
input MIR file, run the specified code generation pass(es), and output the
resulting MIR code.
You can generate an input MIR file for the test by using the ``stop-after``
option in llc. For example, if you would like to write a test for the
post register allocation pseudo instruction expansion pass, you can specify
the machine copy propagation pass in the ``stop-after`` option, as it runs
just before the pass that we are trying to test:
You can generate an input MIR file for the test by using the ``-stop-after`` or
``-stop-before`` option in llc. For example, if you would like to write a test
for the post register allocation pseudo instruction expansion pass, you can
specify the machine copy propagation pass in the ``-stop-after`` option, as it
runs just before the pass that we are trying to test:
``llc -stop-after machine-cp bug-trigger.ll > test.mir``
``llc -stop-after=machine-cp bug-trigger.ll > test.mir``
After generating the input MIR file, you'll have to add a run line that uses
the ``-run-pass`` option to it. In order to test the post register allocation
pseudo instruction expansion pass on X86-64, a run line like the one shown
below can be used:
``# RUN: llc -run-pass postrapseudos -march=x86-64 %s -o /dev/null | FileCheck %s``
``# RUN: llc -o - %s -mtriple=x86_64-- -run-pass=postrapseudos | FileCheck %s``
The MIR files are target dependent, so they have to be placed in the target
specific test directories. They also need to specify a target triple or a
target architecture either in the run line or in the embedded LLVM IR module.
specific test directories (``lib/CodeGen/TARGETNAME``). They also need to
specify a target triple or a target architecture either in the run line or in
the embedded LLVM IR module.
Simplifying MIR files
^^^^^^^^^^^^^^^^^^^^^
The MIR code coming out of ``-stop-after``/``-stop-before`` is very verbose;
Tests are more accessible and future proof when simplified:
- Use the ``-simplify-mir`` option with llc.
- Machine function attributes often have default values or the test works just
as well with default values. Typical candidates for this are: `alignment:`,
`exposesReturnsTwice`, `legalized`, `regBankSelected`, `selected`.
The whole `frameInfo` section is often unnecessary if there is no special
frame usage in the function. `tracksRegLiveness` on the other hand is often
necessary for some passes that care about block livein lists.
- The (global) `liveins:` list is typically only interesting for early
instruction selection passes and can be removed when testing later passes.
The per-block `liveins:` on the other hand are necessary if
`tracksRegLiveness` is true.
- Branch probability data in block `successors:` lists can be dropped if the
test doesn't depend on it. Example:
`successors: %bb.1(0x40000000), %bb.2(0x40000000)` can be replaced with
`successors: %bb.1, %bb.2`.
- MIR code contains a whole IR module. This is necessary because there are
no equivalents in MIR for global variables, references to external functions,
function attributes, metadata, debug info. Instead some MIR data references
the IR constructs. You can often remove them if the test doesn't depend on
them.
- Alias Analysis is performed on IR values. These are referenced by memory
operands in MIR. Example: `:: (load 8 from %ir.foobar, !alias.scope !9)`.
If the test doesn't depend on (good) alias analysis the references can be
dropped: `:: (load 8)`
- MIR blocks can reference IR blocks for debug printing, profile information
or debug locations. Example: `bb.42.myblock` in MIR references the IR block
`myblock`. It is usually possible to drop the `.myblock` reference and simply
use `bb.42`.
- If there are no memory operands or blocks referencing the IR then the
IR function can be replaced by a parameterless dummy function like
`define @func() { ret void }`.
- It is possible to drop the whole IR section of the MIR file if it only
contains dummy functions (see above). The .mir loader will create the
IR functions automatically in this case.
Limitations
-----------

View File

@ -289,7 +289,7 @@ code often follows a pattern:
return my_function_precise(a);
}
The default value for all unspecified reflection parameters is zero.
The default value for all unspecified reflection parameters is zero.
The ``NVVMReflect`` pass should be executed early in the optimization
pipeline, immediately after the link stage. The ``internalize`` pass is also
@ -326,6 +326,16 @@ often leave behind dead code of the form:
Therefore, it is recommended that ``NVVMReflect`` is executed early in the
optimization pipeline before dead-code elimination.
The NVPTX TargetMachine knows how to schedule ``NVVMReflect`` at the beginning
of your pass manager; just use the following code when setting up your pass
manager:
.. code-block:: c++
std::unique_ptr<TargetMachine> TM = ...;
PassManagerBuilder PMBuilder(...);
if (TM)
TM->adjustPassManager(PMBuilder);
Reflection Parameters
---------------------
@ -339,35 +349,17 @@ Flag Description
``__CUDA_FTZ=[0,1]`` Use optimized code paths that flush subnormals to zero
==================== ======================================================
The value of this flag is determined by the "nvvm-reflect-ftz" module flag.
The following sets the ftz flag to 1.
Invoking NVVMReflect
--------------------
To ensure that all dead code caused by the reflection pass is eliminated, it
is recommended that the reflection pass is executed early in the LLVM IR
optimization pipeline. The pass takes an optional mapping of reflection
parameter name to an integer value. This mapping can be specified as either a
command-line option to ``opt`` or as an LLVM ``StringMap<int>`` object when
programmatically creating a pass pipeline.
With ``opt``:
.. code-block:: text
# opt -nvvm-reflect -nvvm-reflect-list=<var>=<value>,<var>=<value> module.bc -o module.reflect.bc
With programmatic pass pipeline:
.. code-block:: c++
extern FunctionPass *llvm::createNVVMReflectPass(const StringMap<int>& Mapping);
StringMap<int> ReflectParams;
ReflectParams["__CUDA_FTZ"] = 1;
Passes.add(createNVVMReflectPass(ReflectParams));
.. code-block:: llvm
!llvm.module.flag = !{!0}
!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}
(``i32 4`` indicates that the value set here overrides the value in another
module we link with. See the `LangRef <LangRef.html#module-flags-metadata>`
for details.)
Executing PTX
=============

View File

@ -60,11 +60,14 @@ like this:
clang -O2 -mllvm -opt-bisect-limit=256 my_file.c
The -opt-bisect-limit option may also be applied to link-time optimizations by
using a prefix to indicate that this is a plug-in option for the linker. The
using a prefix to indicate that this is a plug-in option for the linker. The
following syntax will set a bisect limit for LTO transformations:
::
# When using lld, or ld64 (macOS)
clang -flto -Wl,-mllvm,-opt-bisect-limit=256 my_file.o my_other_file.o
# When using Gold
clang -flto -Wl,-plugin-opt,-opt-bisect-limit=256 my_file.o my_other_file.o
LTO passes are run by a library instance invoked by the linker. Therefore any
@ -186,12 +189,5 @@ Adding Finer Granularity
Once the pass in which an incorrect transformation is performed has been
determined, it may be useful to perform further analysis in order to determine
which specific transformation is causing the problem. Ideally all passes
would be instrumented to allow skipping of individual transformations. This
functionality is available through the OptBisect object but it is impractical
to proactively instrument every existing pass. It is hoped that as developers
find that they need a pass to be instrumented they will add the instrumentation
and contribute it back to the LLVM source base.
Helper functions will be added to simplify this level of instrumentation, but
this work is not yet completed. For more information, contact Andy Kaylor.
which specific transformation is causing the problem. Debug counters
can be used for this purpose.

View File

@ -54,7 +54,8 @@ reviewer understand your code.
To get a full diff, use one of the following commands (or just use Arcanist
to upload your patch):
* ``git diff -U999999 other-branch``
* ``git show HEAD -U999999 > mypatch.patch``
* ``git format-patch -U999999 @{u}``
* ``svn diff --diff-cmd=diff -x -U999999``
To upload a new patch:

View File

@ -32,7 +32,7 @@ to know when working in the LLVM infrastructure, and the second describes the
Core LLVM classes. In the future this manual will be extended with information
describing how to use extension libraries, such as dominator information, CFG
traversal routines, and useful utilities like the ``InstVisitor`` (`doxygen
<http://llvm.org/doxygen/InstVisitor_8h-source.html>`__) template.
<http://llvm.org/doxygen/InstVisitor_8h_source.html>`__) template.
.. _general:
@ -108,7 +108,7 @@ they don't have some drawbacks (primarily stemming from the fact that
``dynamic_cast<>`` only works on classes that have a v-table). Because they are
used so often, you must know what they do and how they work. All of these
templates are defined in the ``llvm/Support/Casting.h`` (`doxygen
<http://llvm.org/doxygen/Casting_8h-source.html>`__) file (note that you very
<http://llvm.org/doxygen/Casting_8h_source.html>`__) file (note that you very
rarely have to include this file directly).
``isa<>``:
@ -225,7 +225,7 @@ and clients can call it using any one of:
Similarly, APIs which need to return a string may return a ``StringRef``
instance, which can be used directly or converted to an ``std::string`` using
the ``str`` member function. See ``llvm/ADT/StringRef.h`` (`doxygen
<http://llvm.org/doxygen/classllvm_1_1StringRef_8h-source.html>`__) for more
<http://llvm.org/doxygen/StringRef_8h_source.html>`__) for more
information.
You should rarely use the ``StringRef`` class directly, because it contains
@ -482,7 +482,7 @@ that inherits from the ErrorInfo utility, E.g.:
}
};
char FileExists::ID; // This should be declared in the C++ file.
char BadFileFormat::ID; // This should be declared in the C++ file.
Error printFormattedFile(StringRef Path) {
if (<check for valid format>)
@ -564,18 +564,18 @@ the boolean conversion operator):
.. code-block:: c++
if (auto Err = canFail(...))
if (auto Err = mayFail(...))
return Err; // Failure value - move error to caller.
// Safe to continue: Err was checked.
In contrast, the following code will always cause an abort, even if ``canFail``
In contrast, the following code will always cause an abort, even if ``mayFail``
returns a success value:
.. code-block:: c++
canFail();
// Program will always abort here, even if canFail() returns Success, since
mayFail();
// Program will always abort here, even if mayFail() returns Success, since
// the value is not checked.
Failure values are considered checked once a handler for the error type has
@ -633,6 +633,12 @@ exiting with an error code, the :ref:`ExitOnError <err_exitonerr>` utility
may be a better choice than handleErrors, as it simplifies control flow when
calling fallible functions.
In situations where it is known that a particular call to a fallible function
will always succeed (for example, a call to a function that can only fail on a
subset of inputs with an input that is known to be safe) the
:ref:`cantFail <err_cantfail>` functions can be used to remove the error type,
simplifying control flow.
StringError
"""""""""""
@ -765,6 +771,42 @@ mapping can also be supplied from ``Error`` values to exit codes using the
Use ``ExitOnError`` in your tool code where possible as it can greatly improve
readability.
.. _err_cantfail:
Using cantFail to simplify safe callsites
"""""""""""""""""""""""""""""""""""""""""
Some functions may only fail for a subset of their inputs, so calls using known
safe inputs can be assumed to succeed.
The cantFail functions encapsulate this by wrapping an assertion that their
argument is a success value and, in the case of Expected<T>, unwrapping the
T value:
.. code-block:: c++
Error onlyFailsForSomeXValues(int X);
Expected<int> onlyFailsForSomeXValues2(int X);
void foo() {
cantFail(onlyFailsForSomeXValues(KnownSafeValue));
int Y = cantFail(onlyFailsForSomeXValues2(KnownSafeValue));
...
}
Like the ExitOnError utility, cantFail simplifies control flow. Their treatment
of error cases is very different however: Where ExitOnError is guaranteed to
terminate the program on an error input, cantFile simply asserts that the result
is success. In debug builds this will result in an assertion failure if an error
is encountered. In release builds the behavior of cantFail for failure values is
undefined. As such, care must be taken in the use of cantFail: clients must be
certain that a cantFail wrapped call really can not fail with the given
arguments.
Use of the cantFail functions should be rare in library code, but they are
likely to be of more use in tool and unit-test code where inputs and/or
mocked-up classes or functions may be known to be safe.
Fallible constructors
"""""""""""""""""""""
@ -864,7 +906,7 @@ completing the walk over the archive they could use the ``joinErrors`` utility:
The ``joinErrors`` routine builds a special error type called ``ErrorList``,
which holds a list of user defined errors. The ``handleErrors`` routine
recognizes this type and will attempt to handle each of the contained erorrs in
recognizes this type and will attempt to handle each of the contained errors in
order. If all contained errors can be handled, ``handleErrors`` will return
``Error::success()``, otherwise ``handleErrors`` will concatenate the remaining
errors and return the resulting ``ErrorList``.
@ -931,7 +973,7 @@ The ``function_ref`` class template
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The ``function_ref``
(`doxygen <http://llvm.org/docs/doxygen/html/classllvm_1_1function__ref_3_01Ret_07Params_8_8_8_08_4.html>`__) class
(`doxygen <http://llvm.org/doxygen/classllvm_1_1function__ref_3_01Ret_07Params_8_8_8_08_4.html>`__) class
template represents a reference to a callable object, templated over the type
of the callable. This is a good choice for passing a callback to a function,
if you don't need to hold onto the callback after the function returns. In this
@ -981,7 +1023,7 @@ you don't want them to always be noisy. A standard compromise is to comment
them out, allowing you to enable them if you need them in the future.
The ``llvm/Support/Debug.h`` (`doxygen
<http://llvm.org/doxygen/Debug_8h-source.html>`__) file provides a macro named
<http://llvm.org/doxygen/Debug_8h_source.html>`__) file provides a macro named
``DEBUG()`` that is a much nicer solution to this problem. Basically, you can
put arbitrary code into the argument of the ``DEBUG`` macro, and it is only
executed if '``opt``' (or any other tool) is run with the '``-debug``' command
@ -1078,7 +1120,7 @@ The ``Statistic`` class & ``-stats`` option
-------------------------------------------
The ``llvm/ADT/Statistic.h`` (`doxygen
<http://llvm.org/doxygen/Statistic_8h-source.html>`__) file provides a class
<http://llvm.org/doxygen/Statistic_8h_source.html>`__) file provides a class
named ``Statistic`` that is used as a unified way to keep track of what the LLVM
compiler is doing and how effective various optimizations are. It is useful to
see what optimizations are contributing to making a particular program run
@ -1094,23 +1136,23 @@ uniform manner with the rest of the passes being executed.
There are many examples of ``Statistic`` uses, but the basics of using it are as
follows:
#. Define your statistic like this:
Define your statistic like this:
.. code-block:: c++
.. code-block:: c++
#define DEBUG_TYPE "mypassname" // This goes before any #includes.
STATISTIC(NumXForms, "The # of times I did stuff");
#define DEBUG_TYPE "mypassname" // This goes before any #includes.
STATISTIC(NumXForms, "The # of times I did stuff");
The ``STATISTIC`` macro defines a static variable, whose name is specified by
the first argument. The pass name is taken from the ``DEBUG_TYPE`` macro, and
the description is taken from the second argument. The variable defined
("NumXForms" in this case) acts like an unsigned integer.
The ``STATISTIC`` macro defines a static variable, whose name is specified by
the first argument. The pass name is taken from the ``DEBUG_TYPE`` macro, and
the description is taken from the second argument. The variable defined
("NumXForms" in this case) acts like an unsigned integer.
#. Whenever you make a transformation, bump the counter:
Whenever you make a transformation, bump the counter:
.. code-block:: c++
.. code-block:: c++
++NumXForms; // I did stuff!
++NumXForms; // I did stuff!
That's all you have to do. To get '``opt``' to print out the statistics
gathered, use the '``-stats``' option:
@ -1158,6 +1200,71 @@ Obviously, with so many optimizations, having a unified framework for this stuff
is very nice. Making your pass fit well into the framework makes it more
maintainable and useful.
.. _DebugCounters:
Adding debug counters to aid in debugging your code
---------------------------------------------------
Sometimes, when writing new passes, or trying to track down bugs, it
is useful to be able to control whether certain things in your pass
happen or not. For example, there are times the minimization tooling
can only easily give you large testcases. You would like to narrow
your bug down to a specific transformation happening or not happening,
automatically, using bisection. This is where debug counters help.
They provide a framework for making parts of your code only execute a
certain number of times.
The ``llvm/Support/DebugCounter.h`` (`doxygen
<http://llvm.org/doxygen/DebugCounter_8h_source.html>`__) file
provides a class named ``DebugCounter`` that can be used to create
command line counter options that control execution of parts of your code.
Define your DebugCounter like this:
.. code-block:: c++
DEBUG_COUNTER(DeleteAnInstruction, "passname-delete-instruction",
"Controls which instructions get delete").
The ``DEBUG_COUNTER`` macro defines a static variable, whose name
is specified by the first argument. The name of the counter
(which is used on the command line) is specified by the second
argument, and the description used in the help is specified by the
third argument.
Whatever code you want that control, use ``DebugCounter::shouldExecute`` to control it.
.. code-block:: c++
if (DebugCounter::shouldExecute(DeleteAnInstruction))
I->eraseFromParent();
That's all you have to do. Now, using opt, you can control when this code triggers using
the '``--debug-counter``' option. There are two counters provided, ``skip`` and ``count``.
``skip`` is the number of times to skip execution of the codepath. ``count`` is the number
of times, once we are done skipping, to execute the codepath.
.. code-block:: none
$ opt --debug-counter=passname-delete-instruction-skip=1,passname-delete-instruction-count=2 -passname
This will skip the above code the first time we hit it, then execute it twice, then skip the rest of the executions.
So if executed on the following code:
.. code-block:: llvm
%1 = add i32 %a, %b
%2 = add i32 %a, %b
%3 = add i32 %a, %b
%4 = add i32 %a, %b
It would delete number ``%2`` and ``%3``.
A utility is provided in `utils/bisect-skip-count` to binary search
skip and count arguments. It can be used to automatically minimize the
skip and count for a debug-counter variable.
.. _ViewGraph:
Viewing graphs while debugging code
@ -2257,18 +2364,12 @@ of a ``BasicBlock`` and the number of ``Instruction``\ s it contains:
.. code-block:: c++
// func is a pointer to a Function instance
for (Function::iterator i = func->begin(), e = func->end(); i != e; ++i)
Function &Func = ...
for (BasicBlock &BB : Func)
// Print out the name of the basic block if it has one, and then the
// number of instructions that it contains
errs() << "Basic block (name=" << i->getName() << ") has "
<< i->size() << " instructions.\n";
Note that i can be used as if it were a pointer for the purposes of invoking
member functions of the ``Instruction`` class. This is because the indirection
operator is overloaded for the iterator classes. In the above code, the
expression ``i->size()`` is exactly equivalent to ``(*i).size()`` just like
you'd expect.
errs() << "Basic block (name=" << BB.getName() << ") has "
<< BB.size() << " instructions.\n";
.. _iterate_basicblock:
@ -2281,17 +2382,17 @@ a code snippet that prints out each instruction in a ``BasicBlock``:
.. code-block:: c++
// blk is a pointer to a BasicBlock instance
for (BasicBlock::iterator i = blk->begin(), e = blk->end(); i != e; ++i)
BasicBlock& BB = ...
for (Instruction &I : BB)
// The next statement works since operator<<(ostream&,...)
// is overloaded for Instruction&
errs() << *i << "\n";
errs() << I << "\n";
However, this isn't really the best way to print out the contents of a
``BasicBlock``! Since the ostream operators are overloaded for virtually
anything you'll care about, you could have just invoked the print routine on the
basic block itself: ``errs() << *blk << "\n";``.
basic block itself: ``errs() << BB << "\n";``.
.. _iterate_insiter:
@ -2425,13 +2526,13 @@ method):
OurFunctionPass(): callCounter(0) { }
virtual runOnFunction(Function& F) {
for (Function::iterator b = F.begin(), be = F.end(); b != be; ++b) {
for (BasicBlock::iterator i = b->begin(), ie = b->end(); i != ie; ++i) {
if (CallInst* callInst = dyn_cast<CallInst>(&*i)) {
for (BasicBlock &B : F) {
for (Instruction &I: B) {
if (auto *CallInst = dyn_cast<CallInst>(&I)) {
// We know we've encountered a call instruction, so we
// need to determine if it's a call to the
// function pointed to by m_func or not.
if (callInst->getCalledFunction() == targetFunc)
if (CallInst->getCalledFunction() == targetFunc)
++callCounter;
}
}
@ -2524,12 +2625,11 @@ iterate over all predecessors of BB:
#include "llvm/IR/CFG.h"
BasicBlock *BB = ...;
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *Pred = *PI;
for (BasicBlock *Pred : predecessors(BB)) {
// ...
}
Similarly, to iterate over successors use ``succ_iterator/succ_begin/succ_end``.
Similarly, to iterate over successors use ``successors``.
.. _simplechanges:
@ -2554,7 +2654,7 @@ For example, an ``AllocaInst`` only *requires* a (const-ptr-to) ``Type``. Thus:
.. code-block:: c++
AllocaInst* ai = new AllocaInst(Type::Int32Ty);
auto *ai = new AllocaInst(Type::Int32Ty);
will create an ``AllocaInst`` instance that represents the allocation of one
integer in the current stack frame, at run time. Each ``Instruction`` subclass
@ -2579,7 +2679,7 @@ intending to use it within the same ``Function``. I might do:
.. code-block:: c++
AllocaInst* pa = new AllocaInst(Type::Int32Ty, 0, "indexLoc");
auto *pa = new AllocaInst(Type::Int32Ty, 0, "indexLoc");
where ``indexLoc`` is now the logical name of the instruction's execution value,
which is a pointer to an integer on the run time stack.
@ -2599,7 +2699,7 @@ sequence of instructions that form a ``BasicBlock``:
BasicBlock *pb = ...;
Instruction *pi = ...;
Instruction *newInst = new Instruction(...);
auto *newInst = new Instruction(...);
pb->getInstList().insert(pi, newInst); // Inserts newInst before pi in pb
@ -2611,7 +2711,7 @@ sequence of instructions that form a ``BasicBlock``:
.. code-block:: c++
BasicBlock *pb = ...;
Instruction *newInst = new Instruction(...);
auto *newInst = new Instruction(...);
pb->getInstList().push_back(newInst); // Appends newInst to pb
@ -2620,7 +2720,7 @@ sequence of instructions that form a ``BasicBlock``:
.. code-block:: c++
BasicBlock *pb = ...;
Instruction *newInst = new Instruction(..., pb);
auto *newInst = new Instruction(..., pb);
which is much cleaner, especially if you are creating long instruction
streams.
@ -2635,7 +2735,7 @@ sequence of instructions that form a ``BasicBlock``:
.. code-block:: c++
Instruction *pi = ...;
Instruction *newInst = new Instruction(...);
auto *newInst = new Instruction(...);
pi->getParent()->getInstList().insert(pi, newInst);
@ -2651,7 +2751,7 @@ sequence of instructions that form a ``BasicBlock``:
.. code-block:: c++
Instruction* pi = ...;
Instruction* newInst = new Instruction(..., pi);
auto *newInst = new Instruction(..., pi);
which is much cleaner, especially if you're creating a lot of instructions and
adding them to ``BasicBlock``\ s.
@ -2718,7 +2818,7 @@ Replacing individual instructions
"""""""""""""""""""""""""""""""""
Including "`llvm/Transforms/Utils/BasicBlockUtils.h
<http://llvm.org/doxygen/BasicBlockUtils_8h-source.html>`_" permits use of two
<http://llvm.org/doxygen/BasicBlockUtils_8h_source.html>`_" permits use of two
very useful replace functions: ``ReplaceInstWithValue`` and
``ReplaceInstWithInst``.
@ -2814,7 +2914,7 @@ is easier to read and write than the equivalent
FunctionType *ft = FunctionType::get(Type::Int8Ty, params, false);
See the `class comment
<http://llvm.org/doxygen/TypeBuilder_8h-source.html#l00001>`_ for more details.
<http://llvm.org/doxygen/TypeBuilder_8h_source.html#l00001>`_ for more details.
.. _threading:
@ -2903,7 +3003,7 @@ Another way is to only call ``getPointerToFunction()`` from the
When the JIT is configured to compile lazily (using
``ExecutionEngine::DisableLazyCompilation(false)``), there is currently a `race
condition <http://llvm.org/bugs/show_bug.cgi?id=5184>`_ in updating call sites
condition <https://bugs.llvm.org/show_bug.cgi?id=5184>`_ in updating call sites
after a function is lazily-jitted. It's still possible to use the lazy JIT in a
threaded program if you ensure that only one thread at a time can call any
particular lazy stub and that the JIT lock guards any IR access, but we suggest
@ -3235,7 +3335,7 @@ The Core LLVM Class Hierarchy Reference
``#include "llvm/IR/Type.h"``
header source: `Type.h <http://llvm.org/doxygen/Type_8h-source.html>`_
header source: `Type.h <http://llvm.org/doxygen/Type_8h_source.html>`_
doxygen info: `Type Clases <http://llvm.org/doxygen/classllvm_1_1Type.html>`_
@ -3339,7 +3439,7 @@ The ``Module`` class
``#include "llvm/IR/Module.h"``
header source: `Module.h <http://llvm.org/doxygen/Module_8h-source.html>`_
header source: `Module.h <http://llvm.org/doxygen/Module_8h_source.html>`_
doxygen info: `Module Class <http://llvm.org/doxygen/classllvm_1_1Module.html>`_
@ -3426,7 +3526,7 @@ The ``Value`` class
``#include "llvm/IR/Value.h"``
header source: `Value.h <http://llvm.org/doxygen/Value_8h-source.html>`_
header source: `Value.h <http://llvm.org/doxygen/Value_8h_source.html>`_
doxygen info: `Value Class <http://llvm.org/doxygen/classllvm_1_1Value.html>`_
@ -3517,7 +3617,7 @@ The ``User`` class
``#include "llvm/IR/User.h"``
header source: `User.h <http://llvm.org/doxygen/User_8h-source.html>`_
header source: `User.h <http://llvm.org/doxygen/User_8h_source.html>`_
doxygen info: `User Class <http://llvm.org/doxygen/classllvm_1_1User.html>`_
@ -3564,7 +3664,7 @@ The ``Instruction`` class
``#include "llvm/IR/Instruction.h"``
header source: `Instruction.h
<http://llvm.org/doxygen/Instruction_8h-source.html>`_
<http://llvm.org/doxygen/Instruction_8h_source.html>`_
doxygen info: `Instruction Class
<http://llvm.org/doxygen/classllvm_1_1Instruction.html>`_
@ -3712,7 +3812,7 @@ The ``GlobalValue`` class
``#include "llvm/IR/GlobalValue.h"``
header source: `GlobalValue.h
<http://llvm.org/doxygen/GlobalValue_8h-source.html>`_
<http://llvm.org/doxygen/GlobalValue_8h_source.html>`_
doxygen info: `GlobalValue Class
<http://llvm.org/doxygen/classllvm_1_1GlobalValue.html>`_
@ -3770,7 +3870,7 @@ The ``Function`` class
``#include "llvm/IR/Function.h"``
header source: `Function.h <http://llvm.org/doxygen/Function_8h-source.html>`_
header source: `Function.h <http://llvm.org/doxygen/Function_8h_source.html>`_
doxygen info: `Function Class
<http://llvm.org/doxygen/classllvm_1_1Function.html>`_
@ -3879,7 +3979,7 @@ The ``GlobalVariable`` class
``#include "llvm/IR/GlobalVariable.h"``
header source: `GlobalVariable.h
<http://llvm.org/doxygen/GlobalVariable_8h-source.html>`_
<http://llvm.org/doxygen/GlobalVariable_8h_source.html>`_
doxygen info: `GlobalVariable Class
<http://llvm.org/doxygen/classllvm_1_1GlobalVariable.html>`_
@ -3937,7 +4037,7 @@ The ``BasicBlock`` class
``#include "llvm/IR/BasicBlock.h"``
header source: `BasicBlock.h
<http://llvm.org/doxygen/BasicBlock_8h-source.html>`_
<http://llvm.org/doxygen/BasicBlock_8h_source.html>`_
doxygen info: `BasicBlock Class
<http://llvm.org/doxygen/classllvm_1_1BasicBlock.html>`_

View File

@ -30,7 +30,7 @@ This proposal relates only to moving the hosting of our source-code repository
from SVN hosted on our own servers to Git hosted on GitHub. We are not proposing
using GitHub's issue tracker, pull-requests, or code-review.
Contributers will continue to earn commit access on demand under the Developer
Contributors will continue to earn commit access on demand under the Developer
Policy, except that that a GitHub account will be required instead of SVN
username/password-hash.
@ -433,7 +433,7 @@ Concerns
* Using the monolithic repository may add overhead for those *integrating* a
standalone sub-project, even if they aren't contributing to it, due to the
same disk space concern as the point above. The availability of the
sub-project Git mirror addesses this, even without SVN access.
sub-project Git mirror addresses this, even without SVN access.
* Preservation of the existing read/write SVN-based workflows relies on the
GitHub SVN bridge, which is an extra dependency. Maintaining this locks us
into GitHub and could restrict future workflow changes.

View File

@ -0,0 +1,182 @@
==================
Vectorization Plan
==================
.. contents::
:local:
Abstract
========
The vectorization transformation can be rather complicated, involving several
potential alternatives, especially for outer-loops [1]_ but also possibly for
innermost loops. These alternatives may have significant performance impact,
both positive and negative. A cost model is therefore employed to identify the
best alternative, including the alternative of avoiding any transformation
altogether.
The Vectorization Plan is an explicit model for describing vectorization
candidates. It serves for both optimizing candidates including estimating their
cost reliably, and for performing their final translation into IR. This
facilitates dealing with multiple vectorization candidates.
High-level Design
=================
Vectorization Workflow
----------------------
VPlan-based vectorization involves three major steps, taking a "scenario-based
approach" to vectorization planning:
1. Legal Step: check if a loop can be legally vectorized; encode constraints and
artifacts if so.
2. Plan Step:
a. Build initial VPlans following the constraints and decisions taken by
Legal Step 1, and compute their cost.
b. Apply optimizations to the VPlans, possibly forking additional VPlans.
Prune sub-optimal VPlans having relatively high cost.
3. Execute Step: materialize the best VPlan. Note that this is the only step
that modifies the IR.
Design Guidelines
-----------------
In what follows, the term "input IR" refers to code that is fed into the
vectorizer whereas the term "output IR" refers to code that is generated by the
vectorizer. The output IR contains code that has been vectorized or "widened"
according to a loop Vectorization Factor (VF), and/or loop unroll-and-jammed
according to an Unroll Factor (UF).
The design of VPlan follows several high-level guidelines:
1. Analysis-like: building and manipulating VPlans must not modify the input IR.
In particular, if the best option is not to vectorize at all, the
vectorization process terminates before reaching Step 3, and compilation
should proceed as if VPlans had not been built.
2. Align Cost & Execute: each VPlan must support both estimating the cost and
generating the output IR code, such that the cost estimation evaluates the
to-be-generated code reliably.
3. Support vectorizing additional constructs:
a. Outer-loop vectorization. In particular, VPlan must be able to model the
control-flow of the output IR which may include multiple basic-blocks and
nested loops.
b. SLP vectorization.
c. Combinations of the above, including nested vectorization: vectorizing
both an inner loop and an outer-loop at the same time (each with its own
VF and UF), mixed vectorization: vectorizing a loop with SLP patterns
inside [4]_, (re)vectorizing input IR containing vector code.
d. Function vectorization [2]_.
4. Support multiple candidates efficiently. In particular, similar candidates
related to a range of possible VF's and UF's must be represented efficiently.
Potential versioning needs to be supported efficiently.
5. Support vectorizing idioms, such as interleaved groups of strided loads or
stores. This is achieved by modeling a sequence of output instructions using
a "Recipe", which is responsible for computing its cost and generating its
code.
6. Encapsulate Single-Entry Single-Exit regions (SESE). During vectorization
such regions may need to be, for example, predicated and linearized, or
replicated VF*UF times to handle scalarized and predicated instructions.
Innerloops are also modelled as SESE regions.
Low-level Design
================
The low-level design of VPlan comprises of the following classes.
:LoopVectorizationPlanner:
A LoopVectorizationPlanner is designed to handle the vectorization of a loop
or a loop nest. It can construct, optimize and discard one or more VPlans,
each VPlan modelling a distinct way to vectorize the loop or the loop nest.
Once the best VPlan is determined, including the best VF and UF, this VPlan
drives the generation of output IR.
:VPlan:
A model of a vectorized candidate for a given input IR loop or loop nest. This
candidate is represented using a Hierarchical CFG. VPlan supports estimating
the cost and driving the generation of the output IR code it represents.
:Hierarchical CFG:
A control-flow graph whose nodes are basic-blocks or Hierarchical CFG's. The
Hierarchical CFG data structure is similar to the Tile Tree [5]_, where
cross-Tile edges are lifted to connect Tiles instead of the original
basic-blocks as in Sharir [6]_, promoting the Tile encapsulation. The terms
Region and Block are used rather than Tile [5]_ to avoid confusion with loop
tiling.
:VPBlockBase:
The building block of the Hierarchical CFG. A pure-virtual base-class of
VPBasicBlock and VPRegionBlock, see below. VPBlockBase models the hierarchical
control-flow relations with other VPBlocks. Note that in contrast to the IR
BasicBlock, a VPBlockBase models its control-flow successors and predecessors
directly, rather than through a Terminator branch or through predecessor
branches that "use" the VPBlockBase.
:VPBasicBlock:
VPBasicBlock is a subclass of VPBlockBase, and serves as the leaves of the
Hierarchical CFG. It represents a sequence of output IR instructions that will
appear consecutively in an output IR basic-block. The instructions of this
basic-block originate from one or more VPBasicBlocks. VPBasicBlock holds a
sequence of zero or more VPRecipes that model the cost and generation of the
output IR instructions.
:VPRegionBlock:
VPRegionBlock is a subclass of VPBlockBase. It models a collection of
VPBasicBlocks and VPRegionBlocks which form a SESE subgraph of the output IR
CFG. A VPRegionBlock may indicate that its contents are to be replicated a
constant number of times when output IR is generated, effectively representing
a loop with constant trip-count that will be completely unrolled. This is used
to support scalarized and predicated instructions with a single model for
multiple candidate VF's and UF's.
:VPRecipeBase:
A pure-virtual base class modeling a sequence of one or more output IR
instructions, possibly based on one or more input IR instructions. These
input IR instructions are referred to as "Ingredients" of the Recipe. A Recipe
may specify how its ingredients are to be transformed to produce the output IR
instructions; e.g., cloned once, replicated multiple times or widened
according to selected VF.
:VPTransformState:
Stores information used for generating output IR, passed from
LoopVectorizationPlanner to its selected VPlan for execution, and used to pass
additional information down to VPBlocks and VPRecipes.
Related LLVM components
-----------------------
1. SLP Vectorizer: one can compare the VPlan model with LLVM's existing SLP
tree, where TSLP [3]_ adds Plan Step 2.b.
2. RegionInfo: one can compare VPlan's H-CFG with the Region Analysis as used by
Polly [7]_.
References
----------
.. [1] "Outer-loop vectorization: revisited for short SIMD architectures", Dorit
Nuzman and Ayal Zaks, PACT 2008.
.. [2] "Proposal for function vectorization and loop vectorization with function
calls", Xinmin Tian, [`cfe-dev
<http://lists.llvm.org/pipermail/cfe-dev/2016-March/047732.html>`_].,
March 2, 2016.
See also `review <https://reviews.llvm.org/D22792>`_.
.. [3] "Throttling Automatic Vectorization: When Less is More", Vasileios
Porpodas and Tim Jones, PACT 2015 and LLVM Developers' Meeting 2015.
.. [4] "Exploiting mixed SIMD parallelism by reducing data reorganization
overhead", Hao Zhou and Jingling Xue, CGO 2016.
.. [5] "Register Allocation via Hierarchical Graph Coloring", David Callahan and
Brian Koblenz, PLDI 1991
.. [6] "Structural analysis: A new approach to flow analysis in optimizing
compilers", M. Sharir, Journal of Computer Languages, Jan. 1980
.. [7] "Enabling Polyhedral Optimizations in LLVM", Tobias Grosser, Diploma
thesis, 2011.
.. [8] "Introducing VPlan to the Loop Vectorizer", Gil Rapaport and Ayal Zaks,
European LLVM Developers' Meeting 2017.

View File

@ -51,3 +51,18 @@ running:
cd docs/
make -f Makefile.sphinx linkcheck
Doxygen page Output
==============
Install doxygen <http://www.stack.nl/~dimitri/doxygen/download.html> and dot2tex <https://dot2tex.readthedocs.io/en/latest>.
cd <build-dir>
cmake -DLLVM_ENABLE_DOXYGEN=On <llvm-top-src-dir>
make doxygen-llvm # for LLVM docs
make doxygen-clang # for clang docs
It will generate html in
<build-dir>/docs/doxygen/html # for LLVM docs
<build-dir>/tools/clang/docs/doxygen/html # for clang docs

View File

@ -1,21 +1,21 @@
========================
LLVM 4.0.0 Release Notes
LLVM 5.0.0 Release Notes
========================
.. contents::
:local:
.. warning::
These are in-progress notes for the upcoming LLVM 4.0.0 release. You may
prefer the `LLVM 3.9 Release Notes <http://llvm.org/releases/3.9.0/docs
/ReleaseNotes.html>`_.
These are in-progress notes for the upcoming LLVM 5 release.
Release notes for previous releases can be found on
`the Download Page <http://releases.llvm.org/download.html>`_.
Introduction
============
This document contains the release notes for the LLVM Compiler Infrastructure,
release 4.0.0. Here we describe the status of LLVM, including major improvements
release 5.0.0. Here we describe the status of LLVM, including major improvements
from the previous release, improvements in various subprojects of LLVM, and
some of the current users of the code. All LLVM releases may be downloaded
from the `LLVM releases web site <http://llvm.org/releases/>`_.
@ -26,15 +26,13 @@ have questions or comments, the `LLVM Developer's Mailing List
<http://lists.llvm.org/mailman/listinfo/llvm-dev>`_ is a good place to send
them.
Note that if you are reading this file from a Subversion checkout or the main
LLVM web page, this document applies to the *next* release, not the current
one. To see the release notes for a specific release, please see the `releases
page <http://llvm.org/releases/>`_.
Non-comprehensive list of changes in this release
=================================================
* The C API functions LLVMAddFunctionAttr, LLVMGetFunctionAttr,
LLVMRemoveFunctionAttr, LLVMAddAttribute, LLVMRemoveAttribute,
LLVMGetAttribute, LLVMAddInstrAttribute and
LLVMRemoveInstrAttribute have been removed.
* The C API enum LLVMAttribute has been deleted.
.. NOTE
For small 1-3 sentence descriptions, just add an entry at the end of
this list. If your description won't fit comfortably in one bullet
@ -42,20 +40,23 @@ Non-comprehensive list of changes in this release
functionality, or simply have a lot to talk about), see the `NOTE` below
for adding a new subsection.
* The definition and uses of LLVM_ATRIBUTE_UNUSED_RESULT in the LLVM source
were replaced with LLVM_NODISCARD, which matches the C++17 [[nodiscard]]
semantics rather than gcc's __attribute__((warn_unused_result)).
* Minimum compiler version to build has been raised to GCC 4.8 and VS 2015.
* The Timer related APIs now expect a Name and Description. When upgrading code
the previously used names should become descriptions and a short name in the
style of a programming language identifier should be added.
* LLVM now handles invariant.group across different basic blocks, which makes
it possible to devirtualize virtual calls inside loops.
* ... next change ...
* LLVM's ``WeakVH`` has been renamed to ``WeakTrackingVH`` and a new ``WeakVH``
has been introduced. The new ``WeakVH`` nulls itself out on deletion, but
does not track values across RAUW.
* A new library named ``BinaryFormat`` has been created which holds a collection
of code which previously lived in ``Support``. This includes the
``file_magic`` structure and ``identify_magic`` functions, as well as all the
structure and type definitions for DWARF, ELF, COFF, WASM, and MachO file
formats.
* The tool ``llvm-pdbdump`` has been renamed ``llvm-pdbutil`` to better reflect
its nature as a general purpose PDB manipulation / diagnostics tool that does
more than just dumping contents.
* The ``BBVectorize`` pass has been removed. It was fully replaced and no
longer used back in 2014 but we didn't get around to removing it. Now it is
gone. The SLP vectorizer is the suggested non-loop vectorization pass.
.. NOTE
If you would like to document a larger change, then you can add a
@ -67,46 +68,19 @@ Non-comprehensive list of changes in this release
Makes programs 10x faster by doing Special New Thing.
Improvements to ThinLTO (-flto=thin)
------------------------------------
* Integration with profile data (PGO). When available, profile data
enables more accurate function importing decisions, as well as
cross-module indirect call promotion.
* Significant build-time and binary-size improvements when compiling with
debug info (-g).
Changes to the LLVM IR
----------------------
Changes to the ARM Targets
* The datalayout string may now indicate an address space to use for
the pointer type of alloca rather than the default of 0.
* Added speculatable attribute indicating a function which does has no
side-effects which could inhibit hoisting of calls.
Changes to the ARM Backend
--------------------------
**During this release the AArch64 target has:**
* Gained support for ILP32 relocations.
* Gained support for XRay.
* Made even more progress on GlobalISel. There is still some work left before
it is production-ready though.
* Refined the support for Qualcomm's Falkor and Samsung's Exynos CPUs.
* Learned a few new tricks for lowering multiplications by constants, folding
spilled/refilled copies etc.
**During this release the ARM target has:**
* Gained support for ROPI (read-only position independence) and RWPI
(read-write position independence), which can be used to remove the need for
a dynamic linker.
* Gained support for execute-only code, which is placed in pages without read
permissions.
* Gained a machine scheduler for Cortex-R52.
* Gained support for XRay.
* Gained Thumb1 implementations for several compiler-rt builtins. It also
has some support for building the builtins for HF targets.
* Started using the generic bitreverse intrinsic instead of rbit.
* Gained very basic support for GlobalISel.
A lot of work has also been done in LLD for ARM, which now supports more
relocations and TLS.
During this release ...
Changes to the MIPS Target
@ -123,29 +97,68 @@ Changes to the PowerPC Target
Changes to the X86 Target
-------------------------
During this release ...
* Added initial AMD Ryzen (znver1) scheduler support.
* Added support for Intel Goldmont CPUs.
* Add support for avx512vpopcntdq instructions.
* Added heuristics to convert CMOV into branches when it may be profitable.
* More aggressive inlining of memcmp calls.
* Improve vXi64 shuffles on 32-bit targets.
* Improved use of PMOVMSKB for any_of/all_of comparision reductions.
* Improved Silvermont, Sandybridge, and Jaguar (btver2) schedulers.
* Improved support for AVX512 vector rotations.
* Added support for AMD Lightweight Profiling (LWP) instructions.
Changes to the AMDGPU Target
-----------------------------
During this release ...
* Initial gfx9 support
Changes to the AVR Target
-----------------------------
* The entire backend has been merged in-tree with all tests passing. All of
the instruction selection code and the machine code backend has landed
recently and is fully usable.
This release consists mainly of bugfixes and implementations of features
required for compiling basic Rust programs.
* Enable the branch relaxation pass so that we don't crash on large
stack load/stores
* Add support for lowering bit-rotations to the native `ror` and `rol`
instructions
* Fix bug where function pointers were treated as pointers to RAM and not
pointers to program memory
* Fix broken code generaton for shift-by-variable expressions
* Support zero-sized types in argument lists; this is impossible in C,
but possible in Rust
Changes to the OCaml bindings
-----------------------------
* The attribute API was completely overhauled, following the changes
to the C API.
During this release ...
External Open Source Projects Using LLVM 4.0.0
==============================================
Changes to the C API
--------------------
* Deprecated the ``LLVMAddBBVectorizePass`` interface since the ``BBVectorize``
pass has been removed. It is now a no-op and will be removed in the next
release. Use ``LLVMAddSLPVectorizePass`` instead to get the supported SLP
vectorizer.
External Open Source Projects Using LLVM 5
==========================================
* A project...

View File

@ -13,6 +13,13 @@ The Scudo Hardened Allocator is a user-mode allocator based on LLVM Sanitizer's
CombinedAllocator, which aims at providing additional mitigations against heap
based vulnerabilities, while maintaining good performance.
Currently, the allocator supports (was tested on) the following architectures:
- i386 (& i686) (32-bit);
- x86_64 (64-bit);
- armhf (32-bit);
- AArch64 (64-bit).
The name "Scudo" has been retained from the initial implementation (Escudo
meaning Shield in Spanish and Portuguese).
@ -31,29 +38,25 @@ header is accessed, and the process terminated.
The following information is stored in the header:
- the 16-bit checksum;
- the user requested size for that chunk, which is necessary for reallocation
purposes;
- the unused bytes amount for that chunk, which is necessary for computing the
size of the chunk;
- the state of the chunk (available, allocated or quarantined);
- the allocation type (malloc, new, new[] or memalign), to detect potential
mismatches in the allocation APIs used;
- whether or not the chunk is offseted (ie: if the chunk beginning is different
than the backend allocation beginning, which is most often the case with some
aligned allocations);
- the associated offset;
- a 16-bit salt.
- the offset of the chunk, which is the distance in bytes from the beginning of
the returned chunk to the beginning of the backend allocation;
- a 8-bit salt.
On x64, which is currently the only architecture supported, the header fits
within 16-bytes, which works nicely with the minimum alignment requirements.
This header fits within 8 bytes, on all platforms supported.
The checksum is computed as a CRC32 (requiring the SSE 4.2 instruction set)
of the global secret, the chunk pointer itself, and the 16 bytes of header with
The checksum is computed as a CRC32 (made faster with hardware support)
of the global secret, the chunk pointer itself, and the 8 bytes of header with
the checksum field zeroed out.
The header is atomically loaded and stored to prevent races (this requires
platform support such as the cmpxchg16b instruction). This is important as two
consecutive chunks could belong to different threads. We also want to avoid
any type of double fetches of information located in the header, and use local
copies of the header for this purpose.
The header is atomically loaded and stored to prevent races. This is important
as two consecutive chunks could belong to different threads. We also want to
avoid any type of double fetches of information located in the header, and use
local copies of the header for this purpose.
Delayed Freelist
-----------------
@ -94,9 +97,9 @@ You may also build Scudo like this:
.. code::
cd $LLVM/projects/compiler-rt/lib
clang++ -fPIC -std=c++11 -msse4.2 -mcx16 -O2 -I. scudo/*.cpp \
clang++ -fPIC -std=c++11 -msse4.2 -O2 -I. scudo/*.cpp \
$(\ls sanitizer_common/*.{cc,S} | grep -v "sanitizer_termination\|sanitizer_common_nolibc") \
-shared -o scudo-allocator.so -lpthread
-shared -o scudo-allocator.so -pthread
and then use it with existing binaries as follows:
@ -136,29 +139,29 @@ Or using the function:
The following options are available:
+-----------------------------+---------+------------------------------------------------+
| Option | Default | Description |
+-----------------------------+---------+------------------------------------------------+
| QuarantineSizeMb | 64 | The size (in Mb) of quarantine used to delay |
| | | the actual deallocation of chunks. Lower value |
| | | may reduce memory usage but decrease the |
| | | effectiveness of the mitigation; a negative |
| | | value will fallback to a default of 64Mb. |
+-----------------------------+---------+------------------------------------------------+
| ThreadLocalQuarantineSizeKb | 1024 | The size (in Kb) of per-thread cache use to |
| | | offload the global quarantine. Lower value may |
| | | reduce memory usage but might increase |
| | | contention on the global quarantine. |
+-----------------------------+---------+------------------------------------------------+
| DeallocationTypeMismatch | true | Whether or not we report errors on |
| | | malloc/delete, new/free, new/delete[], etc. |
+-----------------------------+---------+------------------------------------------------+
| DeleteSizeMismatch | true | Whether or not we report errors on mismatch |
| | | between sizes of new and delete. |
+-----------------------------+---------+------------------------------------------------+
| ZeroContents | false | Whether or not we zero chunk contents on |
| | | allocation and deallocation. |
+-----------------------------+---------+------------------------------------------------+
+-----------------------------+----------------+----------------+------------------------------------------------+
| Option | 64-bit default | 32-bit default | Description |
+-----------------------------+----------------+----------------+------------------------------------------------+
| QuarantineSizeMb | 64 | 16 | The size (in Mb) of quarantine used to delay |
| | | | the actual deallocation of chunks. Lower value |
| | | | may reduce memory usage but decrease the |
| | | | effectiveness of the mitigation; a negative |
| | | | value will fallback to a default of 64Mb. |
+-----------------------------+----------------+----------------+------------------------------------------------+
| ThreadLocalQuarantineSizeKb | 1024 | 256 | The size (in Kb) of per-thread cache use to |
| | | | offload the global quarantine. Lower value may |
| | | | reduce memory usage but might increase |
| | | | contention on the global quarantine. |
+-----------------------------+----------------+----------------+------------------------------------------------+
| DeallocationTypeMismatch | true | true | Whether or not we report errors on |
| | | | malloc/delete, new/free, new/delete[], etc. |
+-----------------------------+----------------+----------------+------------------------------------------------+
| DeleteSizeMismatch | true | true | Whether or not we report errors on mismatch |
| | | | between sizes of new and delete. |
+-----------------------------+----------------+----------------+------------------------------------------------+
| ZeroContents | false | false | Whether or not we zero chunk contents on |
| | | | allocation and deallocation. |
+-----------------------------+----------------+----------------+------------------------------------------------+
Allocator related common Sanitizer options can also be passed through Scudo
options, such as ``allocator_may_return_null``. A detailed list including those

View File

@ -180,11 +180,27 @@ provide debug information at various points in generated code.
void @llvm.dbg.declare(metadata, metadata, metadata)
This intrinsic provides information about a local element (e.g., variable).
The first argument is metadata holding the alloca for the variable. The second
This intrinsic provides information about a local element (e.g., variable). The
first argument is metadata holding the alloca for the variable. The second
argument is a `local variable <LangRef.html#dilocalvariable>`_ containing a
description of the variable. The third argument is a `complex expression
<LangRef.html#diexpression>`_.
<LangRef.html#diexpression>`_. An `llvm.dbg.declare` instrinsic describes the
*location* of a source variable.
.. code-block:: llvm
%i.addr = alloca i32, align 4
call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !1, metadata !2), !dbg !3
!1 = !DILocalVariable(name: "i", ...) ; int i
!2 = !DIExpression()
!3 = !DILocation(...)
...
%buffer = alloca [256 x i8], align 8
; The address of i is buffer+64.
call void @llvm.dbg.declare(metadata [256 x i8]* %buffer, metadata !1, metadata !2)
!1 = !DILocalVariable(name: "i", ...) ; int i
!2 = !DIExpression(DW_OP_plus, 64)
``llvm.dbg.value``
^^^^^^^^^^^^^^^^^^

View File

@ -319,7 +319,7 @@ format of this section follows:
.. code-block:: none
Header {
uint8 : Stack Map Version (current version is 2)
uint8 : Stack Map Version (current version is 3)
uint8 : Reserved (expected to be 0)
uint16 : Reserved (expected to be 0)
}
@ -341,10 +341,13 @@ format of this section follows:
uint16 : NumLocations
Location[NumLocations] {
uint8 : Register | Direct | Indirect | Constant | ConstantIndex
uint8 : Reserved (location flags)
uint8 : Reserved (expected to be 0)
uint16 : Location Size
uint16 : Dwarf RegNum
uint16 : Reserved (expected to be 0)
int32 : Offset or SmallConstant
}
uint32 : Padding (only if required to align to 8 byte)
uint16 : Padding
uint16 : NumLiveOuts
LiveOuts[NumLiveOuts]

View File

@ -9,15 +9,22 @@ Garbage Collection Safepoints in LLVM
Status
=======
This document describes a set of experimental extensions to LLVM. Use
with caution. Because the intrinsics have experimental status,
compatibility across LLVM releases is not guaranteed.
This document describes a set of extensions to LLVM to support garbage
collection. By now, these mechanisms are well proven with commercial java
implementation with a fully relocating collector having shipped using them.
There are a couple places where bugs might still linger; these are called out
below.
LLVM currently supports an alternate mechanism for conservative
garbage collection support using the ``gcroot`` intrinsic. The mechanism
described here shares little in common with the alternate ``gcroot``
implementation and it is hoped that this mechanism will eventually
replace the gc_root mechanism.
They are still listed as "experimental" to indicate that no forward or backward
compatibility guarantees are offered across versions. If your use case is such
that you need some form of forward compatibility guarantee, please raise the
issue on the llvm-dev mailing list.
LLVM still supports an alternate mechanism for conservative garbage collection
support using the ``gcroot`` intrinsic. The ``gcroot`` mechanism is mostly of
historical interest at this point with one exception - its implementation of
shadow stacks has been used successfully by a number of language frontends and
is still supported.
Overview
========
@ -86,9 +93,36 @@ the collector must be able to:
This document describes the mechanism by which an LLVM based compiler
can provide this information to a language runtime/collector, and
ensure that all pointers can be read and updated if desired. The
heart of the approach is to construct (or rewrite) the IR in a manner
where the possible updates performed by the garbage collector are
ensure that all pointers can be read and updated if desired.
At a high level, LLVM has been extended to support compiling to an abstract
machine which extends the actual target with a non-integral pointer type
suitable for representing a garbage collected reference to an object. In
particular, such non-integral pointer type have no defined mapping to an
integer representation. This semantic quirk allows the runtime to pick a
integer mapping for each point in the program allowing relocations of objects
without visible effects.
Warning: Non-Integral Pointer Types are a newly added concept in LLVM IR.
It's possible that we've missed disabling some of the optimizations which
assume an integral value for pointers. If you find such a case, please
file a bug or share a patch.
Warning: There is one currently known semantic hole in the definition of
non-integral pointers which has not been addressed upstream. To work around
this, you need to disable speculation of loads unless the memory type
(non-integral pointer vs anything else) is known to unchanged. That is, it is
not safe to speculate a load if doing causes a non-integral pointer value to
be loaded as any other type or vice versa. In practice, this restriction is
well isolated to isSafeToSpeculate in ValueTracking.cpp.
This high level abstract machine model is used for most of the LLVM optimizer.
Before starting code generation, we switch representations to an explicit form.
In theory, a frontend could directly generate this low level explicit form, but
doing so is likely to inhibit optimization.
The heart of the explicit approach is to construct (or rewrite) the IR in a
manner where the possible updates performed by the garbage collector are
explicitly visible in the IR. Doing so requires that we:
#. create a new SSA value for each potentially relocated pointer, and
@ -104,7 +138,7 @@ explicitly visible in the IR. Doing so requires that we:
At the most abstract level, inserting a safepoint can be thought of as
replacing a call instruction with a call to a multiple return value
function which both calls the original target of the call, returns
it's result, and returns updated values for any live pointers to
its result, and returns updated values for any live pointers to
garbage collected objects.
Note that the task of identifying all live pointers to garbage
@ -200,7 +234,9 @@ The relevant parts of the StackMap section for our example are:
.short 7
.long 0
This example was taken from the tests for the :ref:`RewriteStatepointsForGC` utility pass. As such, it's full StackMap can be easily examined with the following command.
This example was taken from the tests for the :ref:`RewriteStatepointsForGC`
utility pass. As such, its full StackMap can be easily examined with the
following command.
.. code-block:: bash
@ -536,7 +572,7 @@ Semantics:
""""""""""
The return value of ``gc.relocate`` is the potentially relocated value
of the pointer specified by it's arguments. It is unspecified how the
of the pointer specified by its arguments. It is unspecified how the
value of the returned pointer relates to the argument to the
``gc.statepoint`` other than that a) it points to the same source
language object with the same offset, and b) the 'based-on'
@ -654,11 +690,15 @@ Utility Passes for Safepoint Insertion
RewriteStatepointsForGC
^^^^^^^^^^^^^^^^^^^^^^^^
The pass RewriteStatepointsForGC transforms a functions IR by replacing a
``gc.statepoint`` (with an optional ``gc.result``) with a full relocation
sequence, including all required ``gc.relocates``. To function, the pass
requires that the GC strategy specified for the function be able to reliably
distinguish between GC references and non-GC references in IR it is given.
The pass RewriteStatepointsForGC transforms a function's IR to lower from the
abstract machine model described above to the explicit statepoint model of
relocations. To do this, it replaces all calls or invokes of functions which
might contain a safepoint poll with a ``gc.statepoint`` and associated full
relocation sequence, including all required ``gc.relocates``.
Note that by default, this pass only runs for the "statepoint-example" or
"core-clr" gc strategies. You will need to add your custom strategy to this
whitelist or use one of the predefined ones.
As an example, given this code:
@ -666,7 +706,7 @@ As an example, given this code:
define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj)
gc "statepoint-example" {
call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
call void @foo()
ret i8 addrspace(1)* %obj
}
@ -683,7 +723,8 @@ The pass would produce this IR:
In the above examples, the addrspace(1) marker on the pointers is the mechanism
that the ``statepoint-example`` GC strategy uses to distinguish references from
non references. Address space 1 is not globally reserved for this purpose.
non references. The pass assumes that all addrspace(1) pointers are non-integral
pointer types. Address space 1 is not globally reserved for this purpose.
This pass can be used an utility function by a language frontend that doesn't
want to manually reason about liveness, base pointers, or relocation when
@ -701,23 +742,34 @@ can be relaxed to producing interior derived pointers provided the target
collector can find the associated allocation from an arbitrary interior
derived pointer.
In practice, RewriteStatepointsForGC can be run much later in the pass
By default RewriteStatepointsForGC passes in ``0xABCDEF00`` as the statepoint
ID and ``0`` as the number of patchable bytes to the newly constructed
``gc.statepoint``. These values can be configured on a per-callsite
basis using the attributes ``"statepoint-id"`` and
``"statepoint-num-patch-bytes"``. If a call site is marked with a
``"statepoint-id"`` function attribute and its value is a positive
integer (represented as a string), then that value is used as the ID
of the newly constructed ``gc.statepoint``. If a call site is marked
with a ``"statepoint-num-patch-bytes"`` function attribute and its
value is a positive integer, then that value is used as the 'num patch
bytes' parameter of the newly constructed ``gc.statepoint``. The
``"statepoint-id"`` and ``"statepoint-num-patch-bytes"`` attributes
are not propagated to the ``gc.statepoint`` call or invoke if they
could be successfully parsed.
In practice, RewriteStatepointsForGC should be run much later in the pass
pipeline, after most optimization is already done. This helps to improve
the quality of the generated code when compiled with garbage collection support.
In the long run, this is the intended usage model. At this time, a few details
have yet to be worked out about the semantic model required to guarantee this
is always correct. As such, please use with caution and report bugs.
.. _PlaceSafepoints:
PlaceSafepoints
^^^^^^^^^^^^^^^^
The pass PlaceSafepoints transforms a function's IR by replacing any call or
invoke instructions with appropriate ``gc.statepoint`` and ``gc.result`` pairs,
and inserting safepoint polls sufficient to ensure running code checks for a
safepoint request on a timely manner. This pass is expected to be run before
RewriteStatepointsForGC and thus does not produce full relocation sequences.
The pass PlaceSafepoints inserts safepoint polls sufficient to ensure running
code checks for a safepoint request on a timely manner. This pass is expected
to be run before RewriteStatepointsForGC and thus does not produce full
relocation sequences.
As an example, given input IR of the following:
@ -740,13 +792,16 @@ This pass would produce the following IR:
.. code-block:: text
define void @test() gc "statepoint-example" {
%safepoint_token = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
%safepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
call void @do_safepoint()
call void @foo()
ret void
}
In this case, we've added an (unconditional) entry safepoint poll and converted the call into a ``gc.statepoint``. Note that despite appearances, the entry poll is not necessarily redundant. We'd have to know that ``foo`` and ``test`` were not mutually recursive for the poll to be redundant. In practice, you'd probably want to your poll definition to contain a conditional branch of some form.
In this case, we've added an (unconditional) entry safepoint poll. Note that
despite appearances, the entry poll is not necessarily redundant. We'd have to
know that ``foo`` and ``test`` were not mutually recursive for the poll to be
redundant. In practice, you'd probably want to your poll definition to contain
a conditional branch of some form.
At the moment, PlaceSafepoints can insert safepoint polls at method entry and
loop backedges locations. Extending this to work with return polls would be
@ -763,26 +818,13 @@ of this function is inserted at each poll site desired. While calls or invokes
inside this method are transformed to a ``gc.statepoints``, recursive poll
insertion is not performed.
By default PlaceSafepoints passes in ``0xABCDEF00`` as the statepoint
ID and ``0`` as the number of patchable bytes to the newly constructed
``gc.statepoint``. These values can be configured on a per-callsite
basis using the attributes ``"statepoint-id"`` and
``"statepoint-num-patch-bytes"``. If a call site is marked with a
``"statepoint-id"`` function attribute and its value is a positive
integer (represented as a string), then that value is used as the ID
of the newly constructed ``gc.statepoint``. If a call site is marked
with a ``"statepoint-num-patch-bytes"`` function attribute and its
value is a positive integer, then that value is used as the 'num patch
bytes' parameter of the newly constructed ``gc.statepoint``. The
``"statepoint-id"`` and ``"statepoint-num-patch-bytes"`` attributes
are not propagated to the ``gc.statepoint`` call or invoke if they
could be successfully parsed.
If you are scheduling the RewriteStatepointsForGC pass late in the pass order,
you should probably schedule this pass immediately before it. The exception
would be if you need to preserve abstract frame information (e.g. for
deoptimization or introspection) at safepoints. In that case, ask on the
llvm-dev mailing list for suggestions.
This pass is useful for any language frontend which only has to support
garbage collection semantics at safepoints. If you need other abstract
frame information at safepoints (e.g. for deoptimization or introspection),
you can insert safepoint polls in the frontend. If you have the later case,
please ask on llvm-dev for suggestions. There's been a good amount of work
done on making such a scheme work well in practice which is not yet documented
here.
Supported Architectures
@ -794,13 +836,6 @@ Today, only X86_64 is supported.
Problem Areas and Active Work
=============================
#. As the existing users of the late rewriting model have matured, we've found
cases where the optimizer breaks the assumption that an SSA value of
gc-pointer type actually contains a gc-pointer and vice-versa. We need to
clarify our expectations and propose at least one small IR change. (Today,
the gc-pointer distinction is managed via address spaces. This turns out
not to be quite strong enough.)
#. Support for languages which allow unmanaged pointers to garbage collected
objects (i.e. pass a pointer to an object to a C routine) via pinning.
@ -831,7 +866,7 @@ Bugs and Enhancements
Currently known bugs and enhancements under consideration can be
tracked by performing a `bugzilla search
<http://llvm.org/bugs/buglist.cgi?cmdtype=runnamed&namedcmd=Statepoint%20Bugs&list_id=64342>`_
<https://bugs.llvm.org/buglist.cgi?cmdtype=runnamed&namedcmd=Statepoint%20Bugs&list_id=64342>`_
for [Statepoint] in the summary field. When filing new bugs, please
use this tag so that interested parties see the newly filed bug. As
with most LLVM features, design discussions take place on `llvm-dev

View File

@ -228,6 +228,12 @@ CTags
format. A helper script, utils/TableGen/tdtags, provides an easier-to-use
interface; run 'tdtags -H' for documentation.
X86EVEX2VEX
-----------
**Purpose**: This X86 specific tablegen backend emits tables that map EVEX
encoded instructions to their VEX encoded identical instruction.
Clang BackEnds
==============

View File

@ -58,6 +58,10 @@ types are:
The 'string' type represents an ordered sequence of characters of arbitrary
length.
``code``
The `code` type represents a code fragment, which can be single/multi-line
string literal.
``bits<n>``
A 'bits' type is an arbitrary, but fixed, size integer that is broken up
into individual bits. This type is useful because it can handle some bits
@ -105,7 +109,7 @@ supported include:
hexadecimal integer value
``"foo"``
string value
a single-line string value, can be assigned to ``string`` or ``code`` variable.
``[{ ... }]``
usually called a "code fragment", but is just a multiline string literal
@ -126,7 +130,8 @@ supported include:
access to one bit of a value
``value{15-17}``
access to multiple bits of a value
access to an ordered sequence of bits of a value, in particular ``value{15-17}``
produces an order that is the reverse of ``value{17-15}``.
``DEF``
reference to a record definition

View File

@ -313,7 +313,7 @@ default outputs a ``ModuleID``:
ret i32 0
}
``ModuleID`` can unexpetedly match against ``CHECK`` lines. For example:
``ModuleID`` can unexpectedly match against ``CHECK`` lines. For example:
.. code-block:: llvm
@ -387,23 +387,49 @@ depends on special features of sub-architectures, you must add the specific
triple, test with the specific FileCheck and put it into the specific
directory that will filter out all other architectures.
REQUIRES and REQUIRES-ANY directive
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Some tests can be enabled only in specific situation - like having
debug build. Use ``REQUIRES`` directive to specify those requirements.
Constraining test execution
---------------------------
Some tests can be run only in specific configurations, such as
with debug builds or on particular platforms. Use ``REQUIRES``
and ``UNSUPPORTED`` to control when the test is enabled.
Some tests are expected to fail. For example, there may be a known bug
that the test detect. Use ``XFAIL`` to mark a test as an expected failure.
An ``XFAIL`` test will be successful if its execution fails, and
will be a failure if its execution succeeds.
.. code-block:: llvm
; This test will be only enabled in the build with asserts
; This test will be only enabled in the build with asserts.
; REQUIRES: asserts
; This test is disabled on Linux.
; UNSUPPORTED: -linux-
; This test is expected to fail on PowerPC.
; XFAIL: powerpc
You can separate requirements by a comma.
``REQUIRES`` means all listed requirements must be satisfied.
``REQUIRES-ANY`` means at least one must be satisfied.
``REQUIRES`` and ``UNSUPPORTED`` and ``XFAIL`` all accept a comma-separated
list of boolean expressions. The values in each expression may be:
- Features added to ``config.available_features`` by
configuration files such as ``lit.cfg``.
- Substrings of the target triple (``UNSUPPORTED`` and ``XFAIL`` only).
| ``REQUIRES`` enables the test if all expressions are true.
| ``UNSUPPORTED`` disables the test if any expression is true.
| ``XFAIL`` expects the test to fail if any expression is true.
As a special case, ``XFAIL: *`` is expected to fail everywhere.
.. code-block:: llvm
; This test is disabled on Windows,
; and is disabled on Linux, except for Android Linux.
; UNSUPPORTED: windows, linux && !android
; This test is expected to fail on both PowerPC and ARM.
; XFAIL: powerpc || arm
List of features that can be used in ``REQUIRES`` and ``REQUIRES-ANY`` can be
found in lit.cfg files.
Substitutions
-------------
@ -442,6 +468,25 @@ RUN lines:
Expands to the path separator, i.e. ``:`` (or ``;`` on Windows).
``%/s, %/S, %/t, %/T:``
Act like the corresponding substitution above but replace any ``\``
character with a ``/``. This is useful to normalize path separators.
Example: ``%s: C:\Desktop Files/foo_test.s.tmp``
Example: ``%/s: C:/Desktop Files/foo_test.s.tmp``
``%:s, %:S, %:t, %:T:``
Act like the corresponding substitution above but remove colons at
the beginning of Windows paths. This is useful to allow concatenation
of absolute paths on Windows to produce a legal path.
Example: ``%s: C:\Desktop Files\foo_test.s.tmp``
Example: ``%:s: C\Desktop Files\foo_test.s.tmp``
**LLVM-specific substitutions:**
@ -520,24 +565,6 @@ their name. For example:
This program runs its arguments and then inverts the result code from it.
Zero result codes become 1. Non-zero result codes become 0.
Sometimes it is necessary to mark a test case as "expected fail" or
XFAIL. You can easily mark a test as XFAIL just by including ``XFAIL:``
on a line near the top of the file. This signals that the test case
should succeed if the test fails. Such test cases are counted separately
by the testing tool. To specify an expected fail, use the XFAIL keyword
in the comments of the test program followed by a colon and one or more
failure patterns. Each failure pattern can be either ``*`` (to specify
fail everywhere), or a part of a target triple (indicating the test
should fail on that platform), or the name of a configurable feature
(for example, ``loadable_module``). If there is a match, the test is
expected to fail. If not, the test is expected to succeed. To XFAIL
everywhere just specify ``XFAIL: *``. Here is an example of an ``XFAIL``
line:
.. code-block:: llvm
; XFAIL: darwin,sun
To make the output more useful, :program:`lit` will scan
the lines of the test case for ones that contain a pattern that matches
``PR[0-9]+``. This is the syntax for specifying a PR (Problem Report) number

View File

@ -44,12 +44,12 @@ Users can control the vectorization SIMD width using the command line flag "-for
$ clang -mllvm -force-vector-width=8 ...
$ opt -loop-vectorize -force-vector-width=8 ...
Users can control the unroll factor using the command line flag "-force-vector-unroll"
Users can control the unroll factor using the command line flag "-force-vector-interleave"
.. code-block:: console
$ clang -mllvm -force-vector-unroll=2 ...
$ opt -loop-vectorize -force-vector-unroll=2 ...
$ clang -mllvm -force-vector-interleave=2 ...
$ opt -loop-vectorize -force-vector-interleave=2 ...
Pragma loop hint directives
^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -99,7 +99,9 @@ Optimization remarks are enabled using:
indicates if vectorization was specified.
``-Rpass-analysis=loop-vectorize`` identifies the statements that caused
vectorization to fail.
vectorization to fail. If in addition ``-fsave-optimization-record`` is
provided, multiple causes of vectorization failure may be listed (this behavior
might change in the future).
Consider the following loop:
@ -380,6 +382,17 @@ And Linpack-pc with the same configuration. Result is Mflops, higher is better.
.. image:: linpack-pc.png
Ongoing Development Directions
------------------------------
.. toctree::
:hidden:
Proposals/VectorizationPlan
:doc:`Proposals/VectorizationPlan`
Modeling the process and upgrading the infrastructure of LLVM's Loop Vectorizer.
.. _slp-vectorizer:
The SLP Vectorizer

View File

@ -593,12 +593,12 @@ the order in the definition of ``IntRegs`` in the target description file.
FPRegsClass FPRegsRegClass;
IntRegsClass IntRegsRegClass;
...
// IntRegs Sub-register Classess...
// IntRegs Sub-register Classes...
static const TargetRegisterClass* const IntRegsSubRegClasses [] = {
NULL
};
...
// IntRegs Super-register Classess...
// IntRegs Super-register Classes..
static const TargetRegisterClass* const IntRegsSuperRegClasses [] = {
NULL
};

View File

@ -28,8 +28,9 @@ XRay consists of three main parts:
- A runtime library for enabling/disabling tracing at runtime.
- A suite of tools for analysing the traces.
**NOTE:** As of the time of this writing, XRay is only available for x86_64
and arm7 32-bit (no-thumb) Linux.
**NOTE:** As of February 27, 2017 , XRay is only available for the following
architectures running Linux: x86_64, arm7 (no thumb), aarch64, powerpc64le,
mips, mipsel, mips64, mips64el.
The compiler-inserted instrumentation points come in the form of nop-sleds in
the final generated binary, and an ELF section named ``xray_instr_map`` which
@ -84,7 +85,10 @@ GCC-style attributes or C++11-style attributes.
When linking a binary, you can either manually link in the `XRay Runtime
Library`_ or use ``clang`` to link it in automatically with the
``-fxray-instrument`` flag.
``-fxray-instrument`` flag. Alternatively, you can statically link-in the XRay
runtime library from compiler-rt -- those archive files will take the name of
`libclang_rt.xray-{arch}` where `{arch}` is the mnemonic supported by clang
(x86_64, arm7, etc.).
LLVM Function Attribute
-----------------------
@ -135,7 +139,7 @@ variable, where we list down the options and their defaults below.
+-------------------+-----------------+---------------+------------------------+
| Option | Type | Default | Description |
+===================+=================+===============+========================+
| patch_premain | ``bool`` | ``true`` | Whether to patch |
| patch_premain | ``bool`` | ``false`` | Whether to patch |
| | | | instrumentation points |
| | | | before main. |
+-------------------+-----------------+---------------+------------------------+
@ -146,6 +150,11 @@ variable, where we list down the options and their defaults below.
| xray_logfile_base | ``const char*`` | ``xray-log.`` | Filename base for the |
| | | | XRay logfile. |
+-------------------+-----------------+---------------+------------------------+
| xray_fdr_log | ``bool`` | ``false`` | Whether to install the |
| | | | Flight Data Recorder |
| | | | (FDR) mode. |
+-------------------+-----------------+---------------+------------------------+
If you choose to not use the default logging implementation that comes with the
XRay runtime and/or control when/how the XRay instrumentation runs, you may use
@ -175,6 +184,64 @@ thread-safety of operations to be performed by the XRay runtime library:
XRay cannot guarantee that all threads that have ever gotten a copy of the
pointer will not invoke the function.
Flight Data Recorder Mode
-------------------------
XRay supports a logging mode which allows the application to only capture a
fixed amount of memory's worth of events. Flight Data Recorder (FDR) mode works
very much like a plane's "black box" which keeps recording data to memory in a
fixed-size circular queue of buffers, and have the data available
programmatically until the buffers are finalized and flushed. To use FDR mode
on your application, you may set the ``xray_fdr_log`` option to ``true`` in the
``XRAY_OPTIONS`` environment variable (while also optionally setting the
``xray_naive_log`` to ``false``).
When FDR mode is on, it will keep writing and recycling memory buffers until
the logging implementation is finalized -- at which point it can be flushed and
re-initialised later. To do this programmatically, we follow the workflow
provided below:
.. code-block:: c++
// Patch the sleds, if we haven't yet.
auto patch_status = __xray_patch();
// Maybe handle the patch_status errors.
// When we want to flush the log, we need to finalize it first, to give
// threads a chance to return buffers to the queue.
auto finalize_status = __xray_log_finalize();
if (finalize_status != XRAY_LOG_FINALIZED) {
// maybe retry, or bail out.
}
// At this point, we are sure that the log is finalized, so we may try
// flushing the log.
auto flush_status = __xray_log_flushLog();
if (flush_status != XRAY_LOG_FLUSHED) {
// maybe retry, or bail out.
}
The default settings for the FDR mode implementation will create logs named
similarly to the naive log implementation, but will have a different log
format. All the trace analysis tools (and the trace reading library) will
support all versions of the FDR mode format as we add more functionality and
record types in the future.
**NOTE:** We do not however promise perpetual support for when we update the
log versions we support going forward. Deprecation of the formats will be
announced and discussed on the developers mailing list.
XRay allows for replacing the default FDR mode logging implementation using the
following API:
- ``__xray_set_log_impl(...)``: This function takes a struct of type
``XRayLogImpl``, which is defined in ``xray/xray_log_interface.h``, part of
the XRay compiler-rt installation.
- ``__xray_log_init(...)``: This function allows for initializing and
re-initializing an installed logging implementation. See
``xray/xray_log_interface.h`` for details, part of the XRay compiler-rt
installation.
Trace Analysis Tools
--------------------
@ -185,7 +252,26 @@ supports the following subcommands:
- ``extract``: Extract the instrumentation map from a binary, and return it as
YAML.
- ``account``: Performs basic function call accounting statistics with various
options for sorting, and output formats (supports CSV, YAML, and
console-friendly TEXT).
- ``convert``: Converts an XRay log file from one format to another. Currently
only converts to YAML.
- ``graph``: Generates a DOT graph of the function call relationships between
functions found in an XRay trace.
These subcommands use various library components found as part of the XRay
libraries, distributed with the LLVM distribution. These are:
- ``llvm/XRay/Trace.h`` : A trace reading library for conveniently loading
an XRay trace of supported forms, into a convenient in-memory representation.
All the analysis tools that deal with traces use this implementation.
- ``llvm/XRay/Graph.h`` : A semi-generic graph type used by the graph
subcommand to conveniently represent a function call graph with statistics
associated with edges and vertices.
- ``llvm/XRay/InstrumentationMap.h``: A convenient tool for analyzing the
instrumentation map in XRay-instrumented object files and binaries. The
``extract`` subcommand uses this particular library.
Future Work
===========
@ -193,38 +279,19 @@ Future Work
There are a number of ongoing efforts for expanding the toolset building around
the XRay instrumentation system.
Flight Data Recorder Mode
-------------------------
The `XRay whitepaper`_ mentions a mode for when events are kept in memory, and
have the traces be dumped on demand through a triggering API. This work is
currently ongoing.
Trace Analysis
--------------
There are a few more subcommands making its way to the ``llvm-xray`` tool, that
are currently under review:
- ``convert``: Turns an XRay trace from one format to another. Currently
supporting conversion from the binary XRay log to YAML.
- ``account``: Do function call accounting based on data in the XRay log.
We have more subcommands and modes that we're thinking of developing, in the
following forms:
- ``stack``: Reconstruct the function call stacks in a timeline.
- ``convert``: Converting from one version of the XRay log to another (higher)
version, and converting to other trace formats (i.e. Chrome Trace Viewer,
pprof, etc.).
- ``graph``: Generate a function call graph with relative timings and distributions.
More Platforms
--------------
Since XRay is only currently available in x86_64 and arm7 32-bit (no-thumb)
running Linux, we're looking to supporting more platforms (architectures and
operating systems).
We're looking forward to contributions to port XRay to more architectures and
operating systems.
.. References...

View File

@ -0,0 +1,273 @@
===================
Debugging with XRay
===================
This document shows an example of how you would go about analyzing applications
built with XRay instrumentation. Here we will attempt to debug ``llc``
compiling some sample LLVM IR generated by Clang.
.. contents::
:local:
Building with XRay
------------------
To debug an application with XRay instrumentation, we need to build it with a
Clang that supports the ``-fxray-instrument`` option. See `XRay <XRay.html>`_
for more technical details of how XRay works for background information.
In our example, we need to add ``-fxray-instrument`` to the list of flags
passed to Clang when building a binary. Note that we need to link with Clang as
well to get the XRay runtime linked in appropriately. For building ``llc`` with
XRay, we do something similar below for our LLVM build:
::
$ mkdir -p llvm-build && cd llvm-build
# Assume that the LLVM sources are at ../llvm
$ cmake -GNinja ../llvm -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_FLAGS_RELEASE="-fxray-instrument" -DCMAKE_CXX_FLAGS="-fxray-instrument" \
# Once this finishes, we should build llc
$ ninja llc
To verify that we have an XRay instrumented binary, we can use ``objdump`` to
look for the ``xray_instr_map`` section.
::
$ objdump -h -j xray_instr_map ./bin/llc
./bin/llc: file format elf64-x86-64
Sections:
Idx Name Size VMA LMA File off Algn
14 xray_instr_map 00002fc0 00000000041516c6 00000000041516c6 03d516c6 2**0
CONTENTS, ALLOC, LOAD, READONLY, DATA
Getting Traces
--------------
By default, XRay does not write out the trace files or patch the application
before main starts. If we just run ``llc`` it should just work like a normally
built binary. However, if we want to get a full trace of the application's
operations (of the functions we do end up instrumenting with XRay) then we need
to enable XRay at application start. To do this, XRay checks the
``XRAY_OPTIONS`` environment variable.
::
# The following doesn't create an XRay trace by default.
$ ./bin/llc input.ll
# We need to set the XRAY_OPTIONS to enable some features.
$ XRAY_OPTIONS="patch_premain=true" ./bin/llc input.ll
==69819==XRay: Log file in 'xray-log.llc.m35qPB'
At this point we now have an XRay trace we can start analysing.
The ``llvm-xray`` Tool
----------------------
Having a trace then allows us to do basic accounting of the functions that were
instrumented, and how much time we're spending in parts of the code. To make
sense of this data, we use the ``llvm-xray`` tool which has a few subcommands
to help us understand our trace.
One of the simplest things we can do is to get an accounting of the functions
that have been instrumented. We can see an example accounting with ``llvm-xray
account``:
::
$ llvm-xray account xray-log.llc.m35qPB -top=10 -sort=sum -sortorder=dsc -instr_map ./bin/llc
Functions with latencies: 29
funcid count [ min, med, 90p, 99p, max] sum function
187 360 [ 0.000000, 0.000001, 0.000014, 0.000032, 0.000075] 0.001596 LLLexer.cpp:446:0: llvm::LLLexer::LexIdentifier()
85 130 [ 0.000000, 0.000000, 0.000018, 0.000023, 0.000156] 0.000799 X86ISelDAGToDAG.cpp:1984:0: (anonymous namespace)::X86DAGToDAGISel::Select(llvm::SDNode*)
138 130 [ 0.000000, 0.000000, 0.000017, 0.000155, 0.000155] 0.000774 SelectionDAGISel.cpp:2963:0: llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int)
188 103 [ 0.000000, 0.000000, 0.000003, 0.000123, 0.000214] 0.000737 LLParser.cpp:2692:0: llvm::LLParser::ParseValID(llvm::ValID&, llvm::LLParser::PerFunctionState*)
88 1 [ 0.000562, 0.000562, 0.000562, 0.000562, 0.000562] 0.000562 X86ISelLowering.cpp:83:0: llvm::X86TargetLowering::X86TargetLowering(llvm::X86TargetMachine const&, llvm::X86Subtarget const&)
125 102 [ 0.000001, 0.000003, 0.000010, 0.000017, 0.000049] 0.000471 Verifier.cpp:3714:0: (anonymous namespace)::Verifier::visitInstruction(llvm::Instruction&)
90 8 [ 0.000023, 0.000035, 0.000106, 0.000106, 0.000106] 0.000342 X86ISelLowering.cpp:3363:0: llvm::X86TargetLowering::LowerCall(llvm::TargetLowering::CallLoweringInfo&, llvm::SmallVectorImpl<llvm::SDValue>&) const
124 32 [ 0.000003, 0.000007, 0.000016, 0.000041, 0.000041] 0.000310 Verifier.cpp:1967:0: (anonymous namespace)::Verifier::visitFunction(llvm::Function const&)
123 1 [ 0.000302, 0.000302, 0.000302, 0.000302, 0.000302] 0.000302 LLVMContextImpl.cpp:54:0: llvm::LLVMContextImpl::~LLVMContextImpl()
139 46 [ 0.000000, 0.000002, 0.000006, 0.000008, 0.000019] 0.000138 TargetLowering.cpp:506:0: llvm::TargetLowering::SimplifyDemandedBits(llvm::SDValue, llvm::APInt const&, llvm::APInt&, llvm::APInt&, llvm::TargetLowering::TargetLoweringOpt&, unsigned int, bool) const
This shows us that for our input file, ``llc`` spent the most cumulative time
in the lexer (a total of 1 millisecond). If we wanted for example to work with
this data in a spreadsheet, we can output the results as CSV using the
``-format=csv`` option to the command for further analysis.
If we want to get a textual representation of the raw trace we can use the
``llvm-xray convert`` tool to get YAML output. The first few lines of that
ouput for an example trace would look like the following:
::
$ llvm-xray convert -f yaml -symbolize -instr_map=./bin/llc xray-log.llc.m35qPB
---
header:
version: 1
type: 0
constant-tsc: true
nonstop-tsc: true
cycle-frequency: 2601000000
records:
- { type: 0, func-id: 110, function: __cxx_global_var_init.8, cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426023268520 }
- { type: 0, func-id: 110, function: __cxx_global_var_init.8, cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426023523052 }
- { type: 0, func-id: 164, function: __cxx_global_var_init, cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426029925386 }
- { type: 0, func-id: 164, function: __cxx_global_var_init, cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426030031128 }
- { type: 0, func-id: 142, function: '(anonymous namespace)::CommandLineParser::ParseCommandLineOptions(int, char const* const*, llvm::StringRef, llvm::raw_ostream*)', cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426046951388 }
- { type: 0, func-id: 142, function: '(anonymous namespace)::CommandLineParser::ParseCommandLineOptions(int, char const* const*, llvm::StringRef, llvm::raw_ostream*)', cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426047282020 }
- { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426047857332 }
- { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426047984152 }
- { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426048036584 }
- { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426048042292 }
- { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426048055056 }
- { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426048067316 }
Controlling Fidelity
--------------------
So far in our examples, we haven't been getting full coverage of the functions
we have in the binary. To get that, we need to modify the compiler flags so
that we can instrument more (if not all) the functions we have in the binary.
We have two options for doing that, and we explore both of these below.
Instruction Threshold
`````````````````````
The first "blunt" way of doing this is by setting the minimum threshold for
function bodies to 1. We can do that with the
``-fxray-instruction-threshold=N`` flag when building our binary. We rebuild
``llc`` with this option and observe the results:
::
$ rm CMakeCache.txt
$ cmake -GNinja ../llvm -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_FLAGS_RELEASE="-fxray-instrument -fxray-instruction-threshold=1" \
-DCMAKE_CXX_FLAGS="-fxray-instrument -fxray-instruction-threshold=1"
$ ninja llc
$ XRAY_OPTIONS="patch_premain=true" ./bin/llc input.ll
==69819==XRay: Log file in 'xray-log.llc.5rqxkU'
$ llvm-xray account xray-log.llc.5rqxkU -top=10 -sort=sum -sortorder=dsc -instr_map ./bin/llc
Functions with latencies: 36652
funcid count [ min, med, 90p, 99p, max] sum function
75 1 [ 0.672368, 0.672368, 0.672368, 0.672368, 0.672368] 0.672368 llc.cpp:271:0: main
78 1 [ 0.626455, 0.626455, 0.626455, 0.626455, 0.626455] 0.626455 llc.cpp:381:0: compileModule(char**, llvm::LLVMContext&)
139617 1 [ 0.472618, 0.472618, 0.472618, 0.472618, 0.472618] 0.472618 LegacyPassManager.cpp:1723:0: llvm::legacy::PassManager::run(llvm::Module&)
139610 1 [ 0.472618, 0.472618, 0.472618, 0.472618, 0.472618] 0.472618 LegacyPassManager.cpp:1681:0: llvm::legacy::PassManagerImpl::run(llvm::Module&)
139612 1 [ 0.470948, 0.470948, 0.470948, 0.470948, 0.470948] 0.470948 LegacyPassManager.cpp:1564:0: (anonymous namespace)::MPPassManager::runOnModule(llvm::Module&)
139607 2 [ 0.147345, 0.315994, 0.315994, 0.315994, 0.315994] 0.463340 LegacyPassManager.cpp:1530:0: llvm::FPPassManager::runOnModule(llvm::Module&)
139605 21 [ 0.000002, 0.000002, 0.102593, 0.213336, 0.213336] 0.463331 LegacyPassManager.cpp:1491:0: llvm::FPPassManager::runOnFunction(llvm::Function&)
139563 26096 [ 0.000002, 0.000002, 0.000037, 0.000063, 0.000215] 0.225708 LegacyPassManager.cpp:1083:0: llvm::PMDataManager::findAnalysisPass(void const*, bool)
108055 188 [ 0.000002, 0.000120, 0.001375, 0.004523, 0.062624] 0.159279 MachineFunctionPass.cpp:38:0: llvm::MachineFunctionPass::runOnFunction(llvm::Function&)
62635 22 [ 0.000041, 0.000046, 0.000050, 0.126744, 0.126744] 0.127715 X86TargetMachine.cpp:242:0: llvm::X86TargetMachine::getSubtargetImpl(llvm::Function const&) const
Instrumentation Attributes
``````````````````````````
The other way is to use configuration files for selecting which functions
should always be instrumented by the compiler. This gives us a way of ensuring
that certain functions are either always or never instrumented by not having to
add the attribute to the source.
To use this feature, you can define one file for the functions to always
instrument, and another for functions to never instrument. The format of these
files are exactly the same as the SanitizerLists files that control similar
things for the sanitizer implementations. For example, we can have two
different files like below:
::
# always-instrument.txt
# always instrument functions that match the following filters:
fun:main
# never-instrument.txt
# never instrument functions that match the following filters:
fun:__cxx_*
Given the above two files we can re-build by providing those two files as
arguments to clang as ``-fxray-always-instrument=always-instrument.txt`` or
``-fxray-never-instrument=never-instrument.txt``.
Further Exploration
-------------------
The ``llvm-xray`` tool has a few other subcommands that are in various stages
of being developed. One interesting subcommand that can highlight a few
interesting things is the ``graph`` subcommand. Given for example the following
toy program that we build with XRay instrumentation, we can see how the
generated graph may be a helpful indicator of where time is being spent for the
application.
.. code-block:: c++
// sample.cc
#include <iostream>
#include <thread>
[[clang::xray_always_intrument]] void f() {
std::cerr << '.';
}
[[clang::xray_always_intrument]] void g() {
for (int i = 0; i < 1 << 10; ++i) {
std::cerr << '-';
}
}
int main(int argc, char* argv[]) {
std::thread t1([] {
for (int i = 0; i < 1 << 10; ++i)
f();
});
std::thread t2([] {
g();
});
t1.join();
t2.join();
std::cerr << '\n';
}
We then build the above with XRay instrumentation:
::
$ clang++ -o sample -O3 sample.cc -std=c++11 -fxray-instrument -fxray-instruction-threshold=1
$ XRAY_OPTIONS="patch_premain=true" ./sample
We can then explore the graph rendering of the trace generated by this sample
application. We assume you have the graphviz toosl available in your system,
including both ``unflatten`` and ``dot``. If you prefer rendering or exploring
the graph using another tool, then that should be feasible as well. ``llvm-xray
graph`` will create DOT format graphs which should be usable in most graph
rendering applications. One example invocation of the ``llvm-xray graph``
command should yield some interesting insights to the workings of C++
applications:
::
$ llvm-xray graph xray-log.sample.* -m sample -color-edges=sum -edge-label=sum \
| unflatten -f -l10 | dot -Tsvg -o sample.svg
Next Steps
----------
If you have some interesting analyses you'd like to implement as part of the
llvm-xray tool, please feel free to propose them on the llvm-dev@ mailing list.
The following are some ideas to inspire you in getting involved and potentially
making things better.
- Implement a query/filtering library that allows for finding patterns in the
XRay traces.
- A conversion from the XRay trace onto something that can be visualised
better by other tools (like the Chrome trace viewer for example).
- Collecting function call stacks and how often they're encountered in the
XRay trace.

View File

@ -731,7 +731,7 @@ it is parsed. This allows dynamic types of nodes. But the YAML I/O model uses
static typing, so there are limits to how you can use tags with the YAML I/O
model. Recently, we added support to YAML I/O for checking/setting the optional
tag on a map. Using this functionality it is even possbile to support different
mappings, as long as they are convertable.
mappings, as long as they are convertible.
To check a tag, inside your mapping() method you can use io.mapTag() to specify
what the tag should be. This will also add that tag when writing yaml.

View File

@ -47,10 +47,10 @@ copyright = u'2003-%d, LLVM Project' % date.today().year
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '4.0'
# The short version.
version = '5'
# The full version, including alpha/beta/rc tags.
release = '4.0'
release = '5'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
@ -251,3 +251,7 @@ for name in os.listdir(command_guide_path):
# FIXME: Define intersphinx configuration.
intersphinx_mapping = {}
# Pygment lexer are sometimes out of date (when parsing LLVM for example) or
# wrong. Suppress the warning so the build doesn't abort.
suppress_warnings = [ 'misc.highlighting_failure' ]

View File

@ -58,7 +58,7 @@ PROJECT_LOGO =
# entered, it will be relative to the location where doxygen was started. If
# left blank the current directory will be used.
OUTPUT_DIRECTORY = @abs_top_builddir@/docs/doxygen
OUTPUT_DIRECTORY = @abs_top_builddir@/doxygen
# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub-
# directories (in 2 levels) under the output directory of each output format and
@ -132,7 +132,7 @@ INLINE_INHERITED_MEMB = NO
# shortest path that makes the file name unique will be used
# The default value is: YES.
FULL_PATH_NAMES = NO
FULL_PATH_NAMES = YES
# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
# Stripping is only done if one of the specified strings matches the left-hand
@ -144,7 +144,7 @@ FULL_PATH_NAMES = NO
# will be relative from the directory where doxygen is started.
# This tag requires that the tag FULL_PATH_NAMES is set to YES.
STRIP_FROM_PATH = ../..
STRIP_FROM_PATH = @abs_top_srcdir@/..
# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
# path mentioned in the documentation of a class, which tells the reader which
@ -153,7 +153,8 @@ STRIP_FROM_PATH = ../..
# specify the list of include paths that are normally passed to the compiler
# using the -I flag.
STRIP_FROM_INC_PATH =
STRIP_FROM_INC_PATH = @abs_top_srcdir@/../include
STRIP_FROM_INC_PATH += @abs_top_srcdir@/../lib
# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
# less readable) file names. This can be useful is your file systems doesn't
@ -513,7 +514,7 @@ SHOW_GROUPED_MEMB_INC = NO
# files with double quotes in the documentation rather than with sharp brackets.
# The default value is: NO.
FORCE_LOCAL_INCLUDES = NO
FORCE_LOCAL_INCLUDES = YES
# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
# documentation for inline members.
@ -743,9 +744,9 @@ WARN_LOGFILE =
# spaces.
# Note: If this tag is empty the current directory is searched.
INPUT = @abs_top_srcdir@/include \
@abs_top_srcdir@/lib \
@abs_top_srcdir@/docs/doxygen-mainpage.dox
INPUT = @abs_top_srcdir@/../include \
@abs_top_srcdir@/../lib \
@abs_top_srcdir@/doxygen-mainpage.dox
# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
@ -813,7 +814,7 @@ EXCLUDE_SYMBOLS =
# that contain example code fragments that are included (see the \include
# command).
EXAMPLE_PATH = @abs_top_srcdir@/examples
EXAMPLE_PATH = @abs_top_srcdir@/../examples
# If the value of the EXAMPLE_PATH tag contains directories, you can use the
# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
@ -833,7 +834,7 @@ EXAMPLE_RECURSIVE = YES
# that contain images that are to be included in the documentation (see the
# \image command).
IMAGE_PATH = @abs_top_srcdir@/docs/img
IMAGE_PATH = @abs_top_srcdir@/img
# The INPUT_FILTER tag can be used to specify a program that doxygen should
# invoke to filter for each input file. Doxygen will invoke the filter program
@ -1885,7 +1886,7 @@ ENABLE_PREPROCESSING = YES
# The default value is: NO.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
MACRO_EXPANSION = NO
MACRO_EXPANSION = YES
# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
# the macro expansion is limited to the macros specified with the PREDEFINED and
@ -1893,7 +1894,7 @@ MACRO_EXPANSION = NO
# The default value is: NO.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
EXPAND_ONLY_PREDEF = NO
EXPAND_ONLY_PREDEF = YES
# If the SEARCH_INCLUDES tag is set to YES the includes files in the
# INCLUDE_PATH will be searched if a #include is found.
@ -1925,7 +1926,7 @@ INCLUDE_FILE_PATTERNS =
# recursively expanded use the := operator instead of the = operator.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
PREDEFINED =
PREDEFINED = LLVM_ALIGNAS(x)=
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
# tag can be used to specify a list of macro names that should be expanded. The

View File

@ -1,6 +1,11 @@
Overview
========
.. warning::
If you are using a released version of LLVM, see `the download page
<http://llvm.org/releases/>`_ to find your documentation.
The LLVM compiler infrastructure supports a wide range of projects, from
industrial strength compilers to specialized JIT applications to small
research projects.
@ -85,6 +90,8 @@ representation.
CodeOfConduct
CompileCudaWithLLVM
ReportingGuide
Benchmarking
Docker
:doc:`GettingStarted`
Discusses how to get up and running quickly with the LLVM infrastructure.
@ -155,6 +162,9 @@ representation.
A collection of tips for frontend authors on how to generate IR
which LLVM is able to effectively optimize.
:doc:`Docker`
A reference for using Dockerfiles provided with LLVM.
Programming Documentation
=========================
@ -269,6 +279,7 @@ For API clients and LLVM developers.
Coroutines
GlobalISel
XRay
XRayExample
PDB/index
:doc:`WritingAnLLVMPass`
@ -353,10 +364,10 @@ For API clients and LLVM developers.
Answers some questions about the new Attributes infrastructure.
:doc:`NVPTXUsage`
This document describes using the NVPTX back-end to compile GPU kernels.
This document describes using the NVPTX backend to compile GPU kernels.
:doc:`AMDGPUUsage`
This document describes how to use the AMDGPU back-end.
This document describes using the AMDGPU backend to compile GPU kernels.
:doc:`StackMaps`
LLVM support for mapping instruction addresses to the location of
@ -394,6 +405,9 @@ For API clients and LLVM developers.
:doc:`XRay`
High-level documentation of how to use XRay in LLVM.
:doc:`XRayExample`
An example of how to debug an application with XRay.
:doc:`The Microsoft PDB File Format <PDB/index>`
A detailed description of the Microsoft PDB (Program Database) file format.
@ -518,6 +532,7 @@ can be better.
CodeOfConduct
Proposals/GitHubMove
Proposals/VectorizationPlan
:doc:`CodeOfConduct`
Proposal to adopt a code of conduct on the LLVM social spaces (lists, events,
@ -526,6 +541,8 @@ can be better.
:doc:`Proposals/GitHubMove`
Proposal to move from SVN/Git to GitHub.
:doc:`Proposals/VectorizationPlan`
Proposal to model the process and upgrade the infrastructure of LLVM's Loop Vectorizer.
Indices and tables
==================

View File

@ -12,7 +12,7 @@ Welcome to Chapter 1 of the "Building an ORC-based JIT in LLVM" tutorial. This
tutorial runs through the implementation of a JIT compiler using LLVM's
On-Request-Compilation (ORC) APIs. It begins with a simplified version of the
KaleidoscopeJIT class used in the
`Implementing a language with LLVM <LangImpl1.html>`_ tutorials and then
`Implementing a language with LLVM <LangImpl01.html>`_ tutorials and then
introduces new features like optimization, lazy compilation and remote
execution.
@ -41,7 +41,7 @@ The structure of the tutorial is:
a remote process with reduced privileges using the JIT Remote APIs.
To provide input for our JIT we will use the Kaleidoscope REPL from
`Chapter 7 <LangImpl7.html>`_ of the "Implementing a language in LLVM tutorial",
`Chapter 7 <LangImpl07.html>`_ of the "Implementing a language in LLVM tutorial",
with one minor modification: We will remove the FunctionPassManager from the
code for that chapter and replace it with optimization support in our JIT class
in Chapter #2.
@ -91,8 +91,8 @@ KaleidoscopeJIT
In the previous section we described our API, now we examine a simple
implementation of it: The KaleidoscopeJIT class [1]_ that was used in the
`Implementing a language with LLVM <LangImpl1.html>`_ tutorials. We will use
the REPL code from `Chapter 7 <LangImpl7.html>`_ of that tutorial to supply the
`Implementing a language with LLVM <LangImpl01.html>`_ tutorials. We will use
the REPL code from `Chapter 7 <LangImpl07.html>`_ of that tutorial to supply the
input for our JIT: Each time the user enters an expression the REPL will add a
new IR module containing the code for that expression to the JIT. If the
expression is a top-level expression like '1+1' or 'sin(x)', the REPL will also
@ -125,14 +125,12 @@ usual include guards and #includes [2]_, we get to the definition of our class:
class KaleidoscopeJIT {
private:
std::unique_ptr<TargetMachine> TM;
const DataLayout DL;
ObjectLinkingLayer<> ObjectLayer;
IRCompileLayer<decltype(ObjectLayer)> CompileLayer;
public:
typedef decltype(CompileLayer)::ModuleSetHandleT ModuleHandleT;
Our class begins with four members: A TargetMachine, TM, which will be used
@ -152,16 +150,16 @@ compiling it, and passing the resulting in-memory object files down to the
object linking layer below.
That's it for member variables, after that we have a single typedef:
ModuleHandle. This is the handle type that will be returned from our JIT's
ModuleHandleT. This is the handle type that will be returned from our JIT's
addModule method, and can be passed to the removeModule method to remove a
module. The IRCompileLayer class already provides a convenient handle type
(IRCompileLayer::ModuleSetHandleT), so we just alias our ModuleHandle to this.
(IRCompileLayer::ModuleSetHandleT), so we just alias our ModuleHandleT to this.
.. code-block:: c++
KaleidoscopeJIT()
: TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()),
CompileLayer(ObjectLayer, SimpleCompiler(*TM)) {
CompileLayer(ObjectLayer, SimpleCompiler(*TM)) {
llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr);
}
@ -200,7 +198,7 @@ available for execution.
return JITSymbol(nullptr);
});
// Build a singlton module set to hold our module.
// Build a singleton module set to hold our module.
std::vector<std::unique_ptr<Module>> Ms;
Ms.push_back(std::move(M));
@ -259,16 +257,16 @@ were linked into a single, ever-growing logical dylib. To implement this our
first lambda (the one defining findSymbolInLogicalDylib) will just search for
JIT'd code by calling the CompileLayer's findSymbol method. If we don't find a
symbol in the JIT itself we'll fall back to our second lambda, which implements
findSymbol. This will use the RTDyldMemoyrManager::getSymbolAddressInProcess
findSymbol. This will use the RTDyldMemoryManager::getSymbolAddressInProcess
method to search for the symbol within the program itself. If we can't find a
symbol definition via either of these paths the JIT will refuse to accept our
symbol definition via either of these paths, the JIT will refuse to accept our
module, returning a "symbol not found" error.
Now that we've built our symbol resolver we're ready to add our module to the
Now that we've built our symbol resolver, we're ready to add our module to the
JIT. We do this by calling the CompileLayer's addModuleSet method [4]_. Since
we only have a single Module and addModuleSet expects a collection, we will
create a vector of modules and add our module as the only member. Since we
have already typedef'd our ModuleHandle type to be the same as the
have already typedef'd our ModuleHandleT type to be the same as the
CompileLayer's handle type, we can return the handle from addModuleSet
directly from our addModule method.
@ -304,7 +302,7 @@ treated as a duplicate definition when the next top-level expression is
entered. It is generally good to free any module that you know you won't need
to call further, just to free up the resources dedicated to it. However, you
don't strictly need to do this: All resources will be cleaned up when your
JIT class is destructed, if the haven't been freed before then.
JIT class is destructed, if they haven't been freed before then.
This brings us to the end of Chapter 1 of Building a JIT. You now have a basic
but fully functioning JIT stack that you can use to take LLVM IR and make it

View File

@ -25,7 +25,7 @@ IRTransformLayer, to add IR optimization support to KaleidoscopeJIT.
Optimizing Modules using the IRTransformLayer
=============================================
In `Chapter 4 <LangImpl4.html>`_ of the "Implementing a language with LLVM"
In `Chapter 4 <LangImpl04.html>`_ of the "Implementing a language with LLVM"
tutorial series the llvm *FunctionPassManager* is introduced as a means for
optimizing LLVM IR. Interested readers may read that chapter for details, but
in short: to optimize a Module we create an llvm::FunctionPassManager
@ -148,7 +148,7 @@ At the bottom of our JIT we add a private method to do the actual optimization:
*optimizeModule*. This function sets up a FunctionPassManager, adds some passes
to it, runs it over every function in the module, and then returns the mutated
module. The specific optimizations are the same ones used in
`Chapter 4 <LangImpl4.html>`_ of the "Implementing a language with LLVM"
`Chapter 4 <LangImpl04.html>`_ of the "Implementing a language with LLVM"
tutorial series. Readers may visit that chapter for a more in-depth
discussion of these, and of IR optimization in general.

View File

@ -10,7 +10,7 @@ Chapter 2 Introduction
Welcome to Chapter 2 of the "`Implementing a language with
LLVM <index.html>`_" tutorial. This chapter shows you how to use the
lexer, built in `Chapter 1 <LangImpl1.html>`_, to build a full
lexer, built in `Chapter 1 <LangImpl01.html>`_, to build a full
`parser <http://en.wikipedia.org/wiki/Parsing>`_ for our Kaleidoscope
language. Once we have a parser, we'll define and build an `Abstract
Syntax Tree <http://en.wikipedia.org/wiki/Abstract_syntax_tree>`_ (AST).
@ -119,6 +119,8 @@ way to talk about functions themselves:
public:
PrototypeAST(const std::string &name, std::vector<std::string> Args)
: Name(name), Args(std::move(Args)) {}
const std::string &getName() const { return Name; }
};
/// FunctionAST - This class represents a function definition itself.

View File

@ -10,7 +10,7 @@ Chapter 3 Introduction
Welcome to Chapter 3 of the "`Implementing a language with
LLVM <index.html>`_" tutorial. This chapter shows you how to transform
the `Abstract Syntax Tree <LangImpl2.html>`_, built in Chapter 2, into
the `Abstract Syntax Tree <LangImpl02.html>`_, built in Chapter 2, into
LLVM IR. This will teach you a little bit about how LLVM does things, as
well as demonstrate how easy it is to use. It's much more work to build
a lexer and parser than it is to generate LLVM IR code. :)
@ -122,7 +122,7 @@ First we'll do numeric literals:
.. code-block:: c++
Value *NumberExprAST::codegen() {
return ConstantFP::get(LLVMContext, APFloat(Val));
return ConstantFP::get(TheContext, APFloat(Val));
}
In the LLVM IR, numeric constants are represented with the
@ -171,7 +171,7 @@ variables <LangImpl7.html#user-defined-local-variables>`_.
case '<':
L = Builder.CreateFCmpULT(L, R, "cmptmp");
// Convert bool 0/1 to double 0.0 or 1.0
return Builder.CreateUIToFP(L, Type::getDoubleTy(LLVMContext),
return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext),
"booltmp");
default:
return LogErrorV("invalid binary operator");
@ -270,9 +270,9 @@ with:
Function *PrototypeAST::codegen() {
// Make the function type: double(double,double) etc.
std::vector<Type*> Doubles(Args.size(),
Type::getDoubleTy(LLVMContext));
Type::getDoubleTy(TheContext));
FunctionType *FT =
FunctionType::get(Type::getDoubleTy(LLVMContext), Doubles, false);
FunctionType::get(Type::getDoubleTy(TheContext), Doubles, false);
Function *F =
Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
@ -346,7 +346,7 @@ assert that the function is empty (i.e. has no body yet) before we start.
.. code-block:: c++
// Create a new basic block to start insertion into.
BasicBlock *BB = BasicBlock::Create(LLVMContext, "entry", TheFunction);
BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction);
Builder.SetInsertPoint(BB);
// Record the function arguments in the NamedValues map.
@ -362,7 +362,7 @@ end of the new basic block. Basic blocks in LLVM are an important part
of functions that define the `Control Flow
Graph <http://en.wikipedia.org/wiki/Control_flow_graph>`_. Since we
don't have any control flow, our functions will only contain one block
at this point. We'll fix this in `Chapter 5 <LangImpl5.html>`_ :).
at this point. We'll fix this in `Chapter 5 <LangImpl05.html>`_ :).
Next we add the function arguments to the NamedValues map (after first clearing
it out) so that they're accessible to ``VariableExprAST`` nodes.
@ -533,13 +533,14 @@ This shows an extern for the libm "cos" function, and a call to it.
ret double %calltmp
}
When you quit the current demo, it dumps out the IR for the entire
When you quit the current demo (by sending an EOF via CTRL+D on Linux
or CTRL+Z and ENTER on Windows), it dumps out the IR for the entire
module generated. Here you can see the big picture with all the
functions referencing each other.
This wraps up the third chapter of the Kaleidoscope tutorial. Up next,
we'll describe how to `add JIT codegen and optimizer
support <LangImpl4.html>`_ to this so we can actually start running
support <LangImpl04.html>`_ to this so we can actually start running
code!
Full Code Listing

View File

@ -131,33 +131,29 @@ for us:
void InitializeModuleAndPassManager(void) {
// Open a new module.
Context LLVMContext;
TheModule = llvm::make_unique<Module>("my cool jit", LLVMContext);
TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
TheModule = llvm::make_unique<Module>("my cool jit", TheContext);
// Create a new pass manager attached to it.
TheFPM = llvm::make_unique<FunctionPassManager>(TheModule.get());
// Provide basic AliasAnalysis support for GVN.
TheFPM.add(createBasicAliasAnalysisPass());
// Do simple "peephole" optimizations and bit-twiddling optzns.
TheFPM.add(createInstructionCombiningPass());
TheFPM->add(createInstructionCombiningPass());
// Reassociate expressions.
TheFPM.add(createReassociatePass());
TheFPM->add(createReassociatePass());
// Eliminate Common SubExpressions.
TheFPM.add(createGVNPass());
TheFPM->add(createGVNPass());
// Simplify the control flow graph (deleting unreachable blocks, etc).
TheFPM.add(createCFGSimplificationPass());
TheFPM->add(createCFGSimplificationPass());
TheFPM.doInitialization();
TheFPM->doInitialization();
}
This code initializes the global module ``TheModule``, and the function pass
manager ``TheFPM``, which is attached to ``TheModule``. Once the pass manager is
set up, we use a series of "add" calls to add a bunch of LLVM passes.
In this case, we choose to add five passes: one analysis pass (alias analysis),
and four optimization passes. The passes we choose here are a pretty standard set
In this case, we choose to add four optimization passes.
The passes we choose here are a pretty standard set
of "cleanup" optimizations that are useful for a wide variety of code. I won't
delve into what they do but, believe me, they are a good starting place :).
@ -227,8 +223,10 @@ expressions they type in. For example, if they type in "1 + 2;", we
should evaluate and print out 3. If they define a function, they should
be able to call it from the command line.
In order to do this, we first declare and initialize the JIT. This is
done by adding a global variable ``TheJIT``, and initializing it in
In order to do this, we first prepare the environment to create code for
the current native target and declare and initialize the JIT. This is
done by calling some ``InitializeNativeTarget\*`` functions and
adding a global variable ``TheJIT``, and initializing it in
``main``:
.. code-block:: c++
@ -236,7 +234,21 @@ done by adding a global variable ``TheJIT``, and initializing it in
static std::unique_ptr<KaleidoscopeJIT> TheJIT;
...
int main() {
..
InitializeNativeTarget();
InitializeNativeTargetAsmPrinter();
InitializeNativeTargetAsmParser();
// Install standard binary operators.
// 1 is lowest precedence.
BinopPrecedence['<'] = 10;
BinopPrecedence['+'] = 20;
BinopPrecedence['-'] = 20;
BinopPrecedence['*'] = 40; // highest.
// Prime the first token.
fprintf(stderr, "ready> ");
getNextToken();
TheJIT = llvm::make_unique<KaleidoscopeJIT>();
// Run the main "interpreter loop" now.
@ -245,9 +257,24 @@ done by adding a global variable ``TheJIT``, and initializing it in
return 0;
}
We also need to setup the data layout for the JIT:
.. code-block:: c++
void InitializeModuleAndPassManager(void) {
// Open a new module.
TheModule = llvm::make_unique<Module>("my cool jit", TheContext);
TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
// Create a new pass manager attached to it.
TheFPM = llvm::make_unique<FunctionPassManager>(TheModule.get());
...
The KaleidoscopeJIT class is a simple JIT built specifically for these
tutorials. In later chapters we will look at how it works and extend it with
new features, but for now we will take it as given. Its API is very simple::
tutorials, available inside the LLVM source code
at llvm-src/examples/Kaleidoscope/include/KaleidoscopeJIT.h.
In later chapters we will look at how it works and extend it with
new features, but for now we will take it as given. Its API is very simple:
``addModule`` adds an LLVM IR module to the JIT, making its functions
available for execution; ``removeModule`` removes a module, freeing any
memory associated with the code in that module; and ``findSymbol`` allows us
@ -458,7 +485,8 @@ We also need to update HandleDefinition and HandleExtern:
if (auto FnAST = ParseDefinition()) {
if (auto *FnIR = FnAST->codegen()) {
fprintf(stderr, "Read function definition:");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
TheJIT->addModule(std::move(TheModule));
InitializeModuleAndPassManager();
}
@ -472,7 +500,8 @@ We also need to update HandleDefinition and HandleExtern:
if (auto ProtoAST = ParseExtern()) {
if (auto *FnIR = ProtoAST->codegen()) {
fprintf(stderr, "Read extern: ");
FnIR->dump();
FnIR->print(errs());
fprintf(stderr, "\n");
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
}
} else {
@ -552,7 +581,10 @@ most recent to the oldest, to find the newest definition. If no definition is
found inside the JIT, it falls back to calling "``dlsym("sin")``" on the
Kaleidoscope process itself. Since "``sin``" is defined within the JIT's
address space, it simply patches up calls in the module to call the libm
version of ``sin`` directly.
version of ``sin`` directly. But in some cases this even goes further:
as sin and cos are names of standard math functions, the constant folder
will directly evaluate the function calls to the correct result when called
with constants like in the "``sin(1.0)``" above.
In the future we'll see how tweaking this symbol resolution rule can be used to
enable all sorts of useful features, from security (restricting the set of
@ -565,12 +597,21 @@ if we add:
.. code-block:: c++
#ifdef LLVM_ON_WIN32
#define DLLEXPORT __declspec(dllexport)
#else
#define DLLEXPORT
#endif
/// putchard - putchar that takes a double and returns 0.
extern "C" double putchard(double X) {
extern "C" DLLEXPORT double putchard(double X) {
fputc((char)X, stderr);
return 0;
}
Note, that for Windows we need to actually export the functions because
the dynamic symbol loader will use GetProcAddress to find the symbols.
Now we can produce simple output to the console by using things like:
"``extern putchard(x); putchard(120);``", which prints a lowercase 'x'
on the console (120 is the ASCII code for 'x'). Similar code could be
@ -581,7 +622,7 @@ This completes the JIT and optimizer chapter of the Kaleidoscope
tutorial. At this point, we can compile a non-Turing-complete
programming language, optimize and JIT compile it in a user-driven way.
Next up we'll look into `extending the language with control flow
constructs <LangImpl5.html>`_, tackling some interesting LLVM IR issues
constructs <LangImpl05.html>`_, tackling some interesting LLVM IR issues
along the way.
Full Code Listing

View File

@ -103,7 +103,8 @@ To represent the new expression we add a new AST node for it:
IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then,
std::unique_ptr<ExprAST> Else)
: Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
virtual Value *codegen();
Value *codegen() override;
};
The AST node just has pointers to the various subexpressions.
@ -268,7 +269,7 @@ Phi nodes:
#. Values that are implicit in the structure of your AST, such as the
Phi node in this case.
In `Chapter 7 <LangImpl7.html>`_ of this tutorial ("mutable variables"),
In `Chapter 7 <LangImpl07.html>`_ of this tutorial ("mutable variables"),
we'll talk about #1 in depth. For now, just believe me that you don't
need SSA construction to handle this case. For #2, you have the choice
of using the techniques that we will describe for #1, or you can insert
@ -290,9 +291,9 @@ for ``IfExprAST``:
if (!CondV)
return nullptr;
// Convert condition to a bool by comparing equal to 0.0.
// Convert condition to a bool by comparing non-equal to 0.0.
CondV = Builder.CreateFCmpONE(
CondV, ConstantFP::get(LLVMContext, APFloat(0.0)), "ifcond");
CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond");
This code is straightforward and similar to what we saw before. We emit
the expression for the condition, then compare that value to zero to get
@ -305,9 +306,9 @@ a truth value as a 1-bit (bool) value.
// Create blocks for the then and else cases. Insert the 'then' block at the
// end of the function.
BasicBlock *ThenBB =
BasicBlock::Create(LLVMContext, "then", TheFunction);
BasicBlock *ElseBB = BasicBlock::Create(LLVMContext, "else");
BasicBlock *MergeBB = BasicBlock::Create(LLVMContext, "ifcont");
BasicBlock::Create(TheContext, "then", TheFunction);
BasicBlock *ElseBB = BasicBlock::Create(TheContext, "else");
BasicBlock *MergeBB = BasicBlock::Create(TheContext, "ifcont");
Builder.CreateCondBr(CondV, ThenBB, ElseBB);
@ -400,7 +401,7 @@ code:
TheFunction->getBasicBlockList().push_back(MergeBB);
Builder.SetInsertPoint(MergeBB);
PHINode *PN =
Builder.CreatePHI(Type::getDoubleTy(LLVMContext), 2, "iftmp");
Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, "iftmp");
PN->addIncoming(ThenV, ThenBB);
PN->addIncoming(ElseV, ElseBB);
@ -433,7 +434,7 @@ something more aggressive, a 'for' expression:
::
extern putchard(char)
extern putchard(char);
def printstar(n)
for i = 1, i < n, 1.0 in
putchard(42); # ascii 42 = '*'
@ -500,7 +501,8 @@ variable name and the constituent expressions in the node.
std::unique_ptr<ExprAST> Body)
: VarName(VarName), Start(std::move(Start)), End(std::move(End)),
Step(std::move(Step)), Body(std::move(Body)) {}
virtual Value *codegen();
Value *codegen() override;
};
Parser Extensions for the 'for' Loop
@ -561,6 +563,27 @@ value to null in the AST node:
std::move(Body));
}
And again we hook it up as a primary expression:
.. code-block:: c++
static std::unique_ptr<ExprAST> ParsePrimary() {
switch (CurTok) {
default:
return LogError("unknown token when expecting an expression");
case tok_identifier:
return ParseIdentifierExpr();
case tok_number:
return ParseNumberExpr();
case '(':
return ParseParenExpr();
case tok_if:
return ParseIfExpr();
case tok_for:
return ParseForExpr();
}
}
LLVM IR for the 'for' Loop
--------------------------
@ -610,7 +633,8 @@ expression for the loop value:
Value *ForExprAST::codegen() {
// Emit the start code first, without 'variable' in scope.
Value *StartVal = Start->codegen();
if (StartVal == 0) return 0;
if (!StartVal)
return nullptr;
With this out of the way, the next step is to set up the LLVM basic
block for the start of the loop body. In the case above, the whole loop
@ -625,7 +649,7 @@ expression).
Function *TheFunction = Builder.GetInsertBlock()->getParent();
BasicBlock *PreheaderBB = Builder.GetInsertBlock();
BasicBlock *LoopBB =
BasicBlock::Create(LLVMContext, "loop", TheFunction);
BasicBlock::Create(TheContext, "loop", TheFunction);
// Insert an explicit fall through from the current block to the LoopBB.
Builder.CreateBr(LoopBB);
@ -642,7 +666,7 @@ the two blocks.
Builder.SetInsertPoint(LoopBB);
// Start the PHI node with an entry for Start.
PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(LLVMContext),
PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(TheContext),
2, VarName.c_str());
Variable->addIncoming(StartVal, PreheaderBB);
@ -693,7 +717,7 @@ table.
return nullptr;
} else {
// If not specified, use 1.0.
StepVal = ConstantFP::get(LLVMContext, APFloat(1.0));
StepVal = ConstantFP::get(TheContext, APFloat(1.0));
}
Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
@ -710,9 +734,9 @@ iteration of the loop.
if (!EndCond)
return nullptr;
// Convert condition to a bool by comparing equal to 0.0.
// Convert condition to a bool by comparing non-equal to 0.0.
EndCond = Builder.CreateFCmpONE(
EndCond, ConstantFP::get(LLVMContext, APFloat(0.0)), "loopcond");
EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond");
Finally, we evaluate the exit value of the loop, to determine whether
the loop should exit. This mirrors the condition evaluation for the
@ -723,7 +747,7 @@ if/then/else statement.
// Create the "after loop" block and insert it.
BasicBlock *LoopEndBB = Builder.GetInsertBlock();
BasicBlock *AfterBB =
BasicBlock::Create(LLVMContext, "afterloop", TheFunction);
BasicBlock::Create(TheContext, "afterloop", TheFunction);
// Insert the conditional branch into the end of LoopEndBB.
Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
@ -751,7 +775,7 @@ insertion position to it.
NamedValues.erase(VarName);
// for expr always returns 0.0.
return Constant::getNullValue(Type::getDoubleTy(LLVMContext));
return Constant::getNullValue(Type::getDoubleTy(TheContext));
}
The final code handles various cleanups: now that we have the "NextVar"
@ -766,13 +790,13 @@ of the tutorial. In this chapter we added two control flow constructs,
and used them to motivate a couple of aspects of the LLVM IR that are
important for front-end implementors to know. In the next chapter of our
saga, we will get a bit crazier and add `user-defined
operators <LangImpl6.html>`_ to our poor innocent language.
operators <LangImpl06.html>`_ to our poor innocent language.
Full Code Listing
=================
Here is the complete code listing for our running example, enhanced with
the if/then/else and for expressions.. To build this example, use:
the if/then/else and for expressions. To build this example, use:
.. code-block:: bash

View File

@ -31,7 +31,7 @@ User-defined Operators: the Idea
================================
The "operator overloading" that we will add to Kaleidoscope is more
general than languages like C++. In C++, you are only allowed to
general than in languages like C++. In C++, you are only allowed to
redefine existing operators: you can't programmatically change the
grammar, introduce new operators, change precedence levels, etc. In this
chapter, we will add this capability to Kaleidoscope, which will let the
@ -41,8 +41,8 @@ The point of going into user-defined operators in a tutorial like this
is to show the power and flexibility of using a hand-written parser.
Thus far, the parser we have been implementing uses recursive descent
for most parts of the grammar and operator precedence parsing for the
expressions. See `Chapter 2 <LangImpl2.html>`_ for details. Without
using operator precedence parsing, it would be very difficult to allow
expressions. See `Chapter 2 <LangImpl02.html>`_ for details. By
using operator precedence parsing, it is very easy to allow
the programmer to introduce new operators into the grammar: the grammar
is dynamically extensible as the JIT runs.
@ -143,17 +143,18 @@ this:
: Name(name), Args(std::move(Args)), IsOperator(IsOperator),
Precedence(Prec) {}
Function *codegen();
const std::string &getName() const { return Name; }
bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
char getOperatorName() const {
assert(isUnaryOp() || isBinaryOp());
return Name[Name.size()-1];
return Name[Name.size() - 1];
}
unsigned getBinaryPrecedence() const { return Precedence; }
Function *codegen();
};
Basically, in addition to knowing a name for the prototype, we now keep
@ -194,7 +195,7 @@ user-defined operator, we need to parse it:
// Read the precedence if present.
if (CurTok == tok_number) {
if (NumVal < 1 || NumVal > 100)
return LogErrorP("Invalid precedecnce: must be 1..100");
return LogErrorP("Invalid precedence: must be 1..100");
BinaryPrecedence = (unsigned)NumVal;
getNextToken();
}
@ -225,7 +226,7 @@ This is all fairly straightforward parsing code, and we have already
seen a lot of similar code in the past. One interesting part about the
code above is the couple lines that set up ``FnName`` for binary
operators. This builds names like "binary@" for a newly defined "@"
operator. This then takes advantage of the fact that symbol names in the
operator. It then takes advantage of the fact that symbol names in the
LLVM symbol table are allowed to have any character in them, including
embedded nul characters.
@ -251,7 +252,7 @@ default case for our existing binary operator node:
case '<':
L = Builder.CreateFCmpULT(L, R, "cmptmp");
// Convert bool 0/1 to double 0.0 or 1.0
return Builder.CreateUIToFP(L, Type::getDoubleTy(LLVMContext),
return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext),
"booltmp");
default:
break;
@ -259,7 +260,7 @@ default case for our existing binary operator node:
// If it wasn't a builtin binary operator, it must be a user defined one. Emit
// a call to it.
Function *F = TheModule->getFunction(std::string("binary") + Op);
Function *F = getFunction(std::string("binary") + Op);
assert(F && "binary operator not found!");
Value *Ops[2] = { L, R };
@ -277,22 +278,21 @@ The final piece of code we are missing, is a bit of top-level magic:
.. code-block:: c++
Function *FunctionAST::codegen() {
NamedValues.clear();
Function *TheFunction = Proto->codegen();
// Transfer ownership of the prototype to the FunctionProtos map, but keep a
// reference to it for use below.
auto &P = *Proto;
FunctionProtos[Proto->getName()] = std::move(Proto);
Function *TheFunction = getFunction(P.getName());
if (!TheFunction)
return nullptr;
// If this is an operator, install it.
if (Proto->isBinaryOp())
BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
if (P.isBinaryOp())
BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence();
// Create a new basic block to start insertion into.
BasicBlock *BB = BasicBlock::Create(LLVMContext, "entry", TheFunction);
Builder.SetInsertPoint(BB);
if (Value *RetVal = Body->codegen()) {
...
BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction);
...
Basically, before codegening a function, if it is a user-defined
operator, we register it in the precedence table. This allows the binary
@ -323,7 +323,8 @@ that, we need an AST node:
public:
UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
: Opcode(Opcode), Operand(std::move(Operand)) {}
virtual Value *codegen();
Value *codegen() override;
};
This AST node is very simple and obvious by now. It directly mirrors the
@ -345,7 +346,7 @@ simple: we'll add a new function to do it:
int Opc = CurTok;
getNextToken();
if (auto Operand = ParseUnary())
return llvm::unique_ptr<UnaryExprAST>(Opc, std::move(Operand));
return llvm::make_unique<UnaryExprAST>(Opc, std::move(Operand));
return nullptr;
}
@ -433,7 +434,7 @@ unary operators. It looks like this:
if (!OperandV)
return nullptr;
Function *F = TheModule->getFunction(std::string("unary")+Opcode);
Function *F = getFunction(std::string("unary") + Opcode);
if (!F)
return LogErrorV("Unknown unary operator");
@ -461,7 +462,7 @@ newline):
declare double @printd(double)
ready> def binary : 1 (x y) 0; # Low-precedence operator that ignores operands.
..
...
ready> printd(123) : printd(456) : printd(789);
123.000000
456.000000
@ -518,10 +519,9 @@ denser the character:
::
ready>
extern putchard(char)
def printdensity(d)
ready> extern putchard(char);
...
ready> def printdensity(d)
if d > 8 then
putchard(32) # ' '
else if d > 4 then
@ -538,9 +538,9 @@ denser the character:
Evaluated to 0.000000
Based on these simple primitive operations, we can start to define more
interesting things. For example, here's a little function that solves
for the number of iterations it takes a function in the complex plane to
converge:
interesting things. For example, here's a little function that determines
the number of iterations it takes for a certain function in the complex
plane to diverge:
::
@ -734,7 +734,7 @@ side-effects, but it can't actually define and mutate a variable itself.
Strikingly, variable mutation is an important feature of some languages,
and it is not at all obvious how to `add support for mutable
variables <LangImpl7.html>`_ without having to add an "SSA construction"
variables <LangImpl07.html>`_ without having to add an "SSA construction"
phase to your front-end. In the next chapter, we will describe how you
can add variable mutation without building SSA in your front-end.
@ -742,7 +742,7 @@ Full Code Listing
=================
Here is the complete code listing for our running example, enhanced with
the if/then/else and for expressions.. To build this example, use:
the support for user-defined operators. To build this example, use:
.. code-block:: bash

View File

@ -327,7 +327,7 @@ to update:
static std::map<std::string, AllocaInst*> NamedValues;
Also, since we will need to create these alloca's, we'll use a helper
Also, since we will need to create these allocas, we'll use a helper
function that ensures that the allocas are created in the entry block of
the function:
@ -339,7 +339,7 @@ the function:
const std::string &VarName) {
IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
TheFunction->getEntryBlock().begin());
return TmpB.CreateAlloca(Type::getDoubleTy(LLVMContext), 0,
return TmpB.CreateAlloca(Type::getDoubleTy(TheContext), 0,
VarName.c_str());
}
@ -348,7 +348,7 @@ the first instruction (.begin()) of the entry block. It then creates an
alloca with the expected name and returns it. Because all values in
Kaleidoscope are doubles, there is no need to pass in a type to use.
With this in place, the first functionality change we want to make is to
With this in place, the first functionality change we want to make belongs to
variable references. In our new scheme, variables live on the stack, so
code generating a reference to them actually needs to produce a load
from the stack slot:
@ -377,7 +377,7 @@ the unabridged code):
// Create an alloca for the variable in the entry block.
AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
// Emit the start code first, without 'variable' in scope.
// Emit the start code first, without 'variable' in scope.
Value *StartVal = Start->codegen();
if (!StartVal)
return nullptr;
@ -408,21 +408,25 @@ them. The code for this is also pretty simple:
.. code-block:: c++
/// CreateArgumentAllocas - Create an alloca for each argument and register the
/// argument in the symbol table so that references to it will succeed.
void PrototypeAST::CreateArgumentAllocas(Function *F) {
Function::arg_iterator AI = F->arg_begin();
for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
Function *FunctionAST::codegen() {
...
Builder.SetInsertPoint(BB);
// Record the function arguments in the NamedValues map.
NamedValues.clear();
for (auto &Arg : TheFunction->args()) {
// Create an alloca for this variable.
AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
// Store the initial value into the alloca.
Builder.CreateStore(AI, Alloca);
Builder.CreateStore(&Arg, Alloca);
// Add arguments to variable symbol table.
NamedValues[Args[Idx]] = Alloca;
NamedValues[Arg.getName()] = Alloca;
}
}
if (Value *RetVal = Body->codegen()) {
...
For each argument, we make an alloca, store the input value to the
function into the alloca, and register the alloca as the memory location
@ -434,15 +438,13 @@ get good codegen once again:
.. code-block:: c++
// Set up the optimizer pipeline. Start with registering info about how the
// target lays out data structures.
OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
// Promote allocas to registers.
OurFPM.add(createPromoteMemoryToRegisterPass());
TheFPM->add(createPromoteMemoryToRegisterPass());
// Do simple "peephole" optimizations and bit-twiddling optzns.
OurFPM.add(createInstructionCombiningPass());
TheFPM->add(createInstructionCombiningPass());
// Reassociate expressions.
OurFPM.add(createReassociatePass());
TheFPM->add(createReassociatePass());
...
It is interesting to see what the code looks like before and after the
mem2reg optimization runs. For example, this is the before/after code
@ -454,7 +456,7 @@ for our recursive fib function. Before the optimization:
entry:
%x1 = alloca double
store double %x, double* %x1
%x2 = load double* %x1
%x2 = load double, double* %x1
%cmptmp = fcmp ult double %x2, 3.000000e+00
%booltmp = uitofp i1 %cmptmp to double
%ifcond = fcmp one double %booltmp, 0.000000e+00
@ -464,10 +466,10 @@ for our recursive fib function. Before the optimization:
br label %ifcont
else: ; preds = %entry
%x3 = load double* %x1
%x3 = load double, double* %x1
%subtmp = fsub double %x3, 1.000000e+00
%calltmp = call double @fib(double %subtmp)
%x4 = load double* %x1
%x4 = load double, double* %x1
%subtmp5 = fsub double %x4, 2.000000e+00
%calltmp6 = call double @fib(double %subtmp5)
%addtmp = fadd double %calltmp, %calltmp6
@ -677,10 +679,10 @@ var/in, it looks like this:
public:
VarExprAST(std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames,
std::unique_ptr<ExprAST> body)
: VarNames(std::move(VarNames)), Body(std::move(Body)) {}
std::unique_ptr<ExprAST> Body)
: VarNames(std::move(VarNames)), Body(std::move(Body)) {}
virtual Value *codegen();
Value *codegen() override;
};
var/in allows a list of names to be defined all at once, and each name
@ -812,7 +814,7 @@ previous value that we replace in OldBindings.
if (!InitVal)
return nullptr;
} else { // If not specified, use 0.0.
InitVal = ConstantFP::get(LLVMContext, APFloat(0.0));
InitVal = ConstantFP::get(TheContext, APFloat(0.0));
}
AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);

View File

@ -18,7 +18,7 @@ Source level debugging uses formatted data that helps a debugger
translate from binary and the state of the machine back to the
source that the programmer wrote. In LLVM we generally use a format
called `DWARF <http://dwarfstd.org>`_. DWARF is a compact encoding
that represents types, source locations, and variable locations.
that represents types, source locations, and variable locations.
The short summary of this chapter is that we'll go through the
various things you have to add to a programming language to
@ -94,14 +94,14 @@ Then we're going to remove the command line code wherever it exists:
return;
@@ -1184,7 +1183,6 @@ int main() {
BinopPrecedence['*'] = 40; // highest.
// Prime the first token.
- fprintf(stderr, "ready> ");
getNextToken();
Lastly we're going to disable all of the optimization passes and the JIT so
that the only thing that happens after we're done parsing and generating
code is that the llvm IR goes to standard error:
code is that the LLVM IR goes to standard error:
.. code-block:: udiff
@ -140,7 +140,7 @@ code is that the llvm IR goes to standard error:
-
+ #endif
OurFPM.doInitialization();
// Set the global so the code gen can use this.
This relatively small set of changes get us to the point that we can compile
@ -166,8 +166,8 @@ DWARF Emission Setup
Similar to the ``IRBuilder`` class we have a
`DIBuilder <http://llvm.org/doxygen/classllvm_1_1DIBuilder.html>`_ class
that helps in constructing debug metadata for an llvm IR file. It
corresponds 1:1 similarly to ``IRBuilder`` and llvm IR, but with nicer names.
that helps in constructing debug metadata for an LLVM IR file. It
corresponds 1:1 similarly to ``IRBuilder`` and LLVM IR, but with nicer names.
Using it does require that you be more familiar with DWARF terminology than
you needed to be with ``IRBuilder`` and ``Instruction`` names, but if you
read through the general documentation on the
@ -194,7 +194,7 @@ expressions:
} KSDbgInfo;
DIType *DebugInfo::getDoubleTy() {
if (DblTy.isValid())
if (DblTy)
return DblTy;
DblTy = DBuilder->createBasicType("double", 64, 64, dwarf::DW_ATE_float);
@ -214,7 +214,7 @@ There are a couple of things to note here. First, while we're producing a
compile unit for a language called Kaleidoscope we used the language
constant for C. This is because a debugger wouldn't necessarily understand
the calling conventions or default ABI for a language it doesn't recognize
and we follow the C ABI in our llvm code generation so it's the closest
and we follow the C ABI in our LLVM code generation so it's the closest
thing to accurate. This ensures we can actually call functions from the
debugger and have them execute. Secondly, you'll see the "fib.ks" in the
call to ``createCompileUnit``. This is a default hard coded value since
@ -259,10 +259,11 @@ information) and construct our function definition:
unsigned LineNo = 0;
unsigned ScopeLine = 0;
DISubprogram *SP = DBuilder->createFunction(
FContext, Name, StringRef(), Unit, LineNo,
CreateFunctionType(Args.size(), Unit), false /* internal linkage */,
true /* definition */, ScopeLine, DINode::FlagPrototyped, false);
F->setSubprogram(SP);
FContext, P.getName(), StringRef(), Unit, LineNo,
CreateFunctionType(TheFunction->arg_size(), Unit),
false /* internal linkage */, true /* definition */, ScopeLine,
DINode::FlagPrototyped, false);
TheFunction->setSubprogram(SP);
and we now have an DISubprogram that contains a reference to all of our
metadata for the function.
@ -326,10 +327,9 @@ that we pass down through when we create a new expression:
giving us locations for each of our expressions and variables.
From this we can make sure to tell ``DIBuilder`` when we're at a new source
location so it can use that when we generate the rest of our code and make
sure that each instruction has source location information. We do this
by constructing another small function:
To make sure that every instruction gets proper source location information,
we have to tell ``Builder`` whenever we're at a new source location.
We use a small helper function for this:
.. code-block:: c++
@ -343,40 +343,23 @@ by constructing another small function:
DebugLoc::get(AST->getLine(), AST->getCol(), Scope));
}
that both tells the main ``IRBuilder`` where we are, but also what scope
we're in. Since we've just created a function above we can either be in
the main file scope (like when we created our function), or now we can be
in the function scope we just created. To represent this we create a stack
of scopes:
This both tells the main ``IRBuilder`` where we are, but also what scope
we're in. The scope can either be on compile-unit level or be the nearest
enclosing lexical block like the current function.
To represent this we create a stack of scopes:
.. code-block:: c++
std::vector<DIScope *> LexicalBlocks;
std::map<const PrototypeAST *, DIScope *> FnScopeMap;
and keep a map of each function to the scope that it represents (an
DISubprogram is also an DIScope).
Then we make sure to:
and push the scope (function) to the top of the stack when we start
generating the code for each function:
.. code-block:: c++
KSDbgInfo.emitLocation(this);
KSDbgInfo.LexicalBlocks.push_back(SP);
emit the location every time we start to generate code for a new AST, and
also:
.. code-block:: c++
KSDbgInfo.FnScopeMap[this] = SP;
store the scope (function) when we create it and use it:
KSDbgInfo.LexicalBlocks.push_back(&KSDbgInfo.FnScopeMap[Proto]);
when we start generating the code for each function.
also, don't forget to pop the scope back off of your scope stack at the
Also, we may not forget to pop the scope back off of the scope stack at the
end of the code generation for the function:
.. code-block:: c++
@ -385,6 +368,13 @@ end of the code generation for the function:
// unconditionally.
KSDbgInfo.LexicalBlocks.pop_back();
Then we make sure to emit the location every time we start to generate code
for a new AST object:
.. code-block:: c++
KSDbgInfo.emitLocation(this);
Variables
=========
@ -392,25 +382,37 @@ Now that we have functions, we need to be able to print out the variables
we have in scope. Let's get our function arguments set up so we can get
decent backtraces and see how our functions are being called. It isn't
a lot of code, and we generally handle it when we're creating the
argument allocas in ``PrototypeAST::CreateArgumentAllocas``.
argument allocas in ``FunctionAST::codegen``.
.. code-block:: c++
DIScope *Scope = KSDbgInfo.LexicalBlocks.back();
DIFile *Unit = DBuilder->createFile(KSDbgInfo.TheCU.getFilename(),
KSDbgInfo.TheCU.getDirectory());
DILocalVariable D = DBuilder->createParameterVariable(
Scope, Args[Idx], Idx + 1, Unit, Line, KSDbgInfo.getDoubleTy(), true);
// Record the function arguments in the NamedValues map.
NamedValues.clear();
unsigned ArgIdx = 0;
for (auto &Arg : TheFunction->args()) {
// Create an alloca for this variable.
AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
DBuilder->insertDeclare(Alloca, D, DBuilder->createExpression(),
DebugLoc::get(Line, 0, Scope),
Builder.GetInsertBlock());
// Create a debug descriptor for the variable.
DILocalVariable *D = DBuilder->createParameterVariable(
SP, Arg.getName(), ++ArgIdx, Unit, LineNo, KSDbgInfo.getDoubleTy(),
true);
Here we're doing a few things. First, we're grabbing our current scope
for the variable so we can say what range of code our variable is valid
through. Second, we're creating the variable, giving it the scope,
DBuilder->insertDeclare(Alloca, D, DBuilder->createExpression(),
DebugLoc::get(LineNo, 0, SP),
Builder.GetInsertBlock());
// Store the initial value into the alloca.
Builder.CreateStore(&Arg, Alloca);
// Add arguments to variable symbol table.
NamedValues[Arg.getName()] = Alloca;
}
Here we're first creating the variable, giving it the scope (``SP``),
the name, source location, type, and since it's an argument, the argument
index. Third, we create an ``lvm.dbg.declare`` call to indicate at the IR
index. Next, we create an ``lvm.dbg.declare`` call to indicate at the IR
level that we've got a variable in an alloca (and it gives a starting
location for the variable), and setting a source location for the
beginning of the scope on the declare.
@ -420,7 +422,7 @@ assumptions based on how code and debug information was generated for them
in the past. In this case we need to do a little bit of a hack to avoid
generating line information for the function prologue so that the debugger
knows to skip over those instructions when setting a breakpoint. So in
``FunctionAST::CodeGen`` we add a couple of lines:
``FunctionAST::CodeGen`` we add some more lines:
.. code-block:: c++
@ -434,7 +436,7 @@ body of the function:
.. code-block:: c++
KSDbgInfo.emitLocation(Body);
KSDbgInfo.emitLocation(Body.get());
With this we have enough debug information to set breakpoints in functions,
print out argument variables, and call functions. Not too bad for just a

View File

@ -103,19 +103,7 @@ Parser Extensions for If/Then/Else
Now that we have the relevant tokens coming from the lexer and we have
the AST node to build, our parsing logic is relatively straightforward.
First we define a new parsing function:
.. code-block:: ocaml
let rec parse_primary = parser
...
(* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
| [< 'Token.If; c=parse_expr;
'Token.Then ?? "expected 'then'"; t=parse_expr;
'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
Ast.If (c, t, e)
Next we hook it up as a primary expression:
Next we add a new case for parsing a if-expression as a primary expression:
.. code-block:: ocaml
@ -270,7 +258,7 @@ a truth value as a 1-bit (bool) value.
let then_bb = append_block context "then" the_function in
position_at_end then_bb builder;
As opposed to the `C++ tutorial <LangImpl5.html>`_, we have to build our
As opposed to the `C++ tutorial <LangImpl05.html>`_, we have to build our
basic blocks bottom up since we can't have dangling BasicBlocks. We
start off by saving a pointer to the first block (which might not be the
entry block), which we'll need to build a conditional branch later. We

View File

@ -74,18 +74,18 @@ void BrainF::header(LLVMContext& C) {
//declare i32 @getchar()
getchar_func = cast<Function>(module->
getOrInsertFunction("getchar", IntegerType::getInt32Ty(C), NULL));
getOrInsertFunction("getchar", IntegerType::getInt32Ty(C)));
//declare i32 @putchar(i32)
putchar_func = cast<Function>(module->
getOrInsertFunction("putchar", IntegerType::getInt32Ty(C),
IntegerType::getInt32Ty(C), NULL));
IntegerType::getInt32Ty(C)));
//Function header
//define void @brainf()
brainf_func = cast<Function>(module->
getOrInsertFunction("brainf", Type::getVoidTy(C), NULL));
getOrInsertFunction("brainf", Type::getVoidTy(C)));
builder = new IRBuilder<>(BasicBlock::Create(C, label, brainf_func));
@ -156,7 +156,7 @@ void BrainF::header(LLVMContext& C) {
//declare i32 @puts(i8 *)
Function *puts_func = cast<Function>(module->
getOrInsertFunction("puts", IntegerType::getInt32Ty(C),
PointerType::getUnqual(IntegerType::getInt8Ty(C)), NULL));
PointerType::getUnqual(IntegerType::getInt8Ty(C))));
//brainf.aberror:
aberrorbb = BasicBlock::Create(C, label, brainf_func);

View File

@ -77,7 +77,7 @@ void addMainFunction(Module *mod) {
getOrInsertFunction("main", IntegerType::getInt32Ty(mod->getContext()),
IntegerType::getInt32Ty(mod->getContext()),
PointerType::getUnqual(PointerType::getUnqual(
IntegerType::getInt8Ty(mod->getContext()))), NULL));
IntegerType::getInt8Ty(mod->getContext())))));
{
Function::arg_iterator args = main_func->arg_begin();
Value *arg_0 = &*args++;
@ -166,6 +166,10 @@ int main(int argc, char **argv) {
std::vector<GenericValue> args;
Function *brainf_func = M.getFunction("brainf");
GenericValue gv = ee->runFunction(brainf_func, args);
// Genereated code calls putchar, and output is not guaranteed without fflush.
// The better place for fflush(stdout) call would be the generated code, but it
// is unmanageable because stdout linkage name depends on stdlib implementation.
fflush(stdout);
} else {
WriteBitcodeToFile(Mod.get(), *out);
}

View File

@ -49,7 +49,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Verifier.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/ExecutionEngine/MCJIT.h"
#include "llvm/ExecutionEngine/SectionMemoryManager.h"
#include "llvm/IR/DataLayout.h"
@ -59,7 +59,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"

View File

@ -54,8 +54,7 @@ static Function *CreateFibFunction(Module *M, LLVMContext &Context) {
// to return an int and take an int parameter.
Function *FibF =
cast<Function>(M->getOrInsertFunction("fib", Type::getInt32Ty(Context),
Type::getInt32Ty(Context),
nullptr));
Type::getInt32Ty(Context)));
// Add a basic block to the function.
BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", FibF);

View File

@ -69,11 +69,9 @@ int main() {
// Create the add1 function entry and insert this entry into module M. The
// function will have a return type of "int" and take an argument of "int".
// The '0' terminates the list of argument types.
Function *Add1F =
cast<Function>(M->getOrInsertFunction("add1", Type::getInt32Ty(Context),
Type::getInt32Ty(Context),
nullptr));
Type::getInt32Ty(Context)));
// Add a basic block to the function. As before, it automatically inserts
// because of the last argument.
@ -102,8 +100,7 @@ int main() {
// Now we're going to create function `foo', which returns an int and takes no
// arguments.
Function *FooF =
cast<Function>(M->getOrInsertFunction("foo", Type::getInt32Ty(Context),
nullptr));
cast<Function>(M->getOrInsertFunction("foo", Type::getInt32Ty(Context)));
// Add a basic block to the FooF function.
BB = BasicBlock::Create(Context, "EntryBlock", FooF);

Some files were not shown because too many files have changed in this diff Show More