Updated spirv-tools.

This commit is contained in:
Бранимир Караџић 2019-07-20 10:42:04 -07:00
parent 30ab473899
commit cfb5da28f0
29 changed files with 2240 additions and 2568 deletions

View File

@ -84,7 +84,6 @@ SPVTOOLS_OPT_SRC_FILES := \
source/opt/ccp_pass.cpp \
source/opt/code_sink.cpp \
source/opt/combine_access_chains.cpp \
source/opt/common_uniform_elim_pass.cpp \
source/opt/compact_ids_pass.cpp \
source/opt/composite.cpp \
source/opt/const_folding_rules.cpp \

View File

@ -469,8 +469,6 @@ static_library("spvtools_opt") {
"source/opt/code_sink.h",
"source/opt/combine_access_chains.cpp",
"source/opt/combine_access_chains.h",
"source/opt/common_uniform_elim_pass.cpp",
"source/opt/common_uniform_elim_pass.h",
"source/opt/compact_ids_pass.cpp",
"source/opt/compact_ids_pass.h",
"source/opt/composite.cpp",
@ -670,6 +668,7 @@ static_library("spvtools_link") {
]
deps = [
":spvtools",
":spvtools_opt",
":spvtools_val",
]
public_deps = [
@ -736,6 +735,8 @@ static_library("spvtools_reduce") {
"source/reduce/structured_loop_to_selection_reduction_opportunity.h",
"source/reduce/structured_loop_to_selection_reduction_opportunity_finder.cpp",
"source/reduce/structured_loop_to_selection_reduction_opportunity_finder.h",
"source/spirv_reducer_options.cpp",
"source/spirv_reducer_options.h",
]
deps = [
":spvtools",
@ -865,6 +866,7 @@ source_set("spvtools_software_version") {
]
deps = [
":spvtools_build_version",
":spvtools_headers",
]
configs += [ ":spvtools_internal_config" ]
}
@ -949,12 +951,11 @@ if (!is_ios) {
# iOS does not allow std::system calls which spirv-reduce requires
executable("spirv-reduce") {
sources = [
"source/spirv_reducer_options.cpp",
"source/spirv_reducer_options.h",
"tools/reduce/reduce.cpp",
]
deps = [
":spvtools",
":spvtools_opt",
":spvtools_reduce",
":spvtools_software_version",
":spvtools_util_cli_consumer",

View File

@ -1 +1 @@
"v2019.4-dev", "SPIRV-Tools v2019.4-dev v2019.3-82-g55adf4cf"
"v2019.4-dev", "SPIRV-Tools v2019.4-dev v2019.3-90-g76b75c40"

View File

@ -117,6 +117,11 @@ static const int kInstGeomOutPrimitiveId = kInstCommonOutCnt;
static const int kInstGeomOutInvocationId = kInstCommonOutCnt + 1;
static const int kInstGeomOutUnused = kInstCommonOutCnt + 2;
// Ray Tracing Shader Output Record Offsets
static const int kInstRayTracingOutLaunchIdX = kInstCommonOutCnt;
static const int kInstRayTracingOutLaunchIdY = kInstCommonOutCnt + 1;
static const int kInstRayTracingOutLaunchIdZ = kInstCommonOutCnt + 2;
// Size of Common and Stage-specific Members
static const int kInstStageOutCnt = kInstCommonOutCnt + 2;
static const int kInst2StageOutCnt = kInstCommonOutCnt + 3;

View File

@ -492,20 +492,6 @@ Optimizer::PassToken CreateInsertExtractElimPass();
// inserts created by that pass.
Optimizer::PassToken CreateDeadInsertElimPass();
// Creates a pass to consolidate uniform references.
// For each entry point function in the module, first change all constant index
// access chain loads into equivalent composite extracts. Then consolidate
// identical uniform loads into one uniform load. Finally, consolidate
// identical uniform extracts into one uniform extract. This may require
// moving a load or extract to a point which dominates all uses.
//
// This pass requires a module to have structured control flow ie shader
// capability. It also requires logical addressing ie Addresses capability
// is not enabled. It also currently does not support any extensions.
//
// This pass currently only optimizes loads with a single index.
Optimizer::PassToken CreateCommonUniformElimPass();
// Create aggressive dead code elimination pass
// This pass eliminates unused code from the module. In addition,
// it detects and eliminates code which may have spurious uses but which do

View File

@ -22,7 +22,6 @@ set(SPIRV_TOOLS_OPT_SOURCES
cfg.h
code_sink.h
combine_access_chains.h
common_uniform_elim_pass.h
compact_ids_pass.h
composite.h
const_folding_rules.h
@ -123,7 +122,6 @@ set(SPIRV_TOOLS_OPT_SOURCES
cfg.cpp
code_sink.cpp
combine_access_chains.cpp
common_uniform_elim_pass.cpp
compact_ids_pass.cpp
composite.cpp
const_folding_rules.cpp

View File

@ -1,596 +0,0 @@
// Copyright (c) 2017 The Khronos Group Inc.
// Copyright (c) 2017 Valve Corporation
// Copyright (c) 2017 LunarG Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "source/opt/common_uniform_elim_pass.h"
#include "source/cfa.h"
#include "source/opt/ir_context.h"
namespace spvtools {
namespace opt {
namespace {
const uint32_t kAccessChainPtrIdInIdx = 0;
const uint32_t kTypePointerStorageClassInIdx = 0;
const uint32_t kTypePointerTypeIdInIdx = 1;
const uint32_t kConstantValueInIdx = 0;
const uint32_t kExtractCompositeIdInIdx = 0;
const uint32_t kExtractIdx0InIdx = 1;
const uint32_t kStorePtrIdInIdx = 0;
const uint32_t kLoadPtrIdInIdx = 0;
const uint32_t kCopyObjectOperandInIdx = 0;
const uint32_t kTypeIntWidthInIdx = 0;
} // anonymous namespace
bool CommonUniformElimPass::IsNonPtrAccessChain(const SpvOp opcode) const {
return opcode == SpvOpAccessChain || opcode == SpvOpInBoundsAccessChain;
}
bool CommonUniformElimPass::IsSamplerOrImageType(
const Instruction* typeInst) const {
switch (typeInst->opcode()) {
case SpvOpTypeSampler:
case SpvOpTypeImage:
case SpvOpTypeSampledImage:
return true;
default:
break;
}
if (typeInst->opcode() != SpvOpTypeStruct) return false;
// Return true if any member is a sampler or image
return !typeInst->WhileEachInId([this](const uint32_t* tid) {
const Instruction* compTypeInst = get_def_use_mgr()->GetDef(*tid);
if (IsSamplerOrImageType(compTypeInst)) {
return false;
}
return true;
});
}
bool CommonUniformElimPass::IsSamplerOrImageVar(uint32_t varId) const {
const Instruction* varInst = get_def_use_mgr()->GetDef(varId);
assert(varInst->opcode() == SpvOpVariable);
const uint32_t varTypeId = varInst->type_id();
const Instruction* varTypeInst = get_def_use_mgr()->GetDef(varTypeId);
const uint32_t varPteTypeId =
varTypeInst->GetSingleWordInOperand(kTypePointerTypeIdInIdx);
Instruction* varPteTypeInst = get_def_use_mgr()->GetDef(varPteTypeId);
return IsSamplerOrImageType(varPteTypeInst);
}
Instruction* CommonUniformElimPass::GetPtr(Instruction* ip, uint32_t* objId) {
const SpvOp op = ip->opcode();
assert(op == SpvOpStore || op == SpvOpLoad);
*objId = ip->GetSingleWordInOperand(op == SpvOpStore ? kStorePtrIdInIdx
: kLoadPtrIdInIdx);
Instruction* ptrInst = get_def_use_mgr()->GetDef(*objId);
while (ptrInst->opcode() == SpvOpCopyObject) {
*objId = ptrInst->GetSingleWordInOperand(kCopyObjectOperandInIdx);
ptrInst = get_def_use_mgr()->GetDef(*objId);
}
Instruction* objInst = ptrInst;
while (objInst->opcode() != SpvOpVariable &&
objInst->opcode() != SpvOpFunctionParameter) {
if (IsNonPtrAccessChain(objInst->opcode())) {
*objId = objInst->GetSingleWordInOperand(kAccessChainPtrIdInIdx);
} else {
assert(objInst->opcode() == SpvOpCopyObject);
*objId = objInst->GetSingleWordInOperand(kCopyObjectOperandInIdx);
}
objInst = get_def_use_mgr()->GetDef(*objId);
}
return ptrInst;
}
bool CommonUniformElimPass::IsVolatileStruct(uint32_t type_id) {
assert(get_def_use_mgr()->GetDef(type_id)->opcode() == SpvOpTypeStruct);
return !get_decoration_mgr()->WhileEachDecoration(
type_id, SpvDecorationVolatile, [](const Instruction&) { return false; });
}
bool CommonUniformElimPass::IsAccessChainToVolatileStructType(
const Instruction& AccessChainInst) {
assert(AccessChainInst.opcode() == SpvOpAccessChain);
uint32_t ptr_id = AccessChainInst.GetSingleWordInOperand(0);
const Instruction* ptr_inst = get_def_use_mgr()->GetDef(ptr_id);
uint32_t pointee_type_id = GetPointeeTypeId(ptr_inst);
const uint32_t num_operands = AccessChainInst.NumOperands();
// walk the type tree:
for (uint32_t idx = 3; idx < num_operands; ++idx) {
Instruction* pointee_type = get_def_use_mgr()->GetDef(pointee_type_id);
switch (pointee_type->opcode()) {
case SpvOpTypeMatrix:
case SpvOpTypeVector:
case SpvOpTypeArray:
case SpvOpTypeRuntimeArray:
pointee_type_id = pointee_type->GetSingleWordOperand(1);
break;
case SpvOpTypeStruct:
// check for volatile decorations:
if (IsVolatileStruct(pointee_type_id)) return true;
if (idx < num_operands - 1) {
const uint32_t index_id = AccessChainInst.GetSingleWordOperand(idx);
const Instruction* index_inst = get_def_use_mgr()->GetDef(index_id);
uint32_t index_value = index_inst->GetSingleWordOperand(
2); // TODO: replace with GetUintValueFromConstant()
pointee_type_id = pointee_type->GetSingleWordInOperand(index_value);
}
break;
default:
assert(false && "Unhandled pointee type.");
}
}
return false;
}
bool CommonUniformElimPass::IsVolatileLoad(const Instruction& loadInst) {
assert(loadInst.opcode() == SpvOpLoad);
// Check if this Load instruction has Volatile Memory Access flag
if (loadInst.NumOperands() == 4) {
uint32_t memory_access_mask = loadInst.GetSingleWordOperand(3);
if (memory_access_mask & SpvMemoryAccessVolatileMask) return true;
}
// If we load a struct directly (result type is struct),
// check if the struct is decorated volatile
uint32_t type_id = loadInst.type_id();
if (get_def_use_mgr()->GetDef(type_id)->opcode() == SpvOpTypeStruct)
return IsVolatileStruct(type_id);
else
return false;
}
bool CommonUniformElimPass::IsUniformVar(uint32_t varId) {
const Instruction* varInst =
get_def_use_mgr()->id_to_defs().find(varId)->second;
if (varInst->opcode() != SpvOpVariable) return false;
const uint32_t varTypeId = varInst->type_id();
const Instruction* varTypeInst =
get_def_use_mgr()->id_to_defs().find(varTypeId)->second;
return varTypeInst->GetSingleWordInOperand(kTypePointerStorageClassInIdx) ==
SpvStorageClassUniform ||
varTypeInst->GetSingleWordInOperand(kTypePointerStorageClassInIdx) ==
SpvStorageClassUniformConstant;
}
bool CommonUniformElimPass::HasUnsupportedDecorates(uint32_t id) const {
return !get_def_use_mgr()->WhileEachUser(id, [this](Instruction* user) {
if (IsNonTypeDecorate(user->opcode())) return false;
return true;
});
}
bool CommonUniformElimPass::HasOnlyNamesAndDecorates(uint32_t id) const {
return get_def_use_mgr()->WhileEachUser(id, [this](Instruction* user) {
SpvOp op = user->opcode();
if (op != SpvOpName && !IsNonTypeDecorate(op)) return false;
return true;
});
}
void CommonUniformElimPass::DeleteIfUseless(Instruction* inst) {
const uint32_t resId = inst->result_id();
assert(resId != 0);
if (HasOnlyNamesAndDecorates(resId)) {
context()->KillInst(inst);
}
}
Instruction* CommonUniformElimPass::ReplaceAndDeleteLoad(Instruction* loadInst,
uint32_t replId,
Instruction* ptrInst) {
const uint32_t loadId = loadInst->result_id();
context()->KillNamesAndDecorates(loadId);
(void)context()->ReplaceAllUsesWith(loadId, replId);
// remove load instruction
Instruction* next_instruction = context()->KillInst(loadInst);
// if access chain, see if it can be removed as well
if (IsNonPtrAccessChain(ptrInst->opcode())) DeleteIfUseless(ptrInst);
return next_instruction;
}
void CommonUniformElimPass::GenACLoadRepl(
const Instruction* ptrInst,
std::vector<std::unique_ptr<Instruction>>* newInsts, uint32_t* resultId) {
// Build and append Load
const uint32_t ldResultId = TakeNextId();
const uint32_t varId =
ptrInst->GetSingleWordInOperand(kAccessChainPtrIdInIdx);
const Instruction* varInst = get_def_use_mgr()->GetDef(varId);
assert(varInst->opcode() == SpvOpVariable);
const uint32_t varPteTypeId = GetPointeeTypeId(varInst);
std::vector<Operand> load_in_operands;
load_in_operands.push_back(Operand(spv_operand_type_t::SPV_OPERAND_TYPE_ID,
std::initializer_list<uint32_t>{varId}));
std::unique_ptr<Instruction> newLoad(new Instruction(
context(), SpvOpLoad, varPteTypeId, ldResultId, load_in_operands));
get_def_use_mgr()->AnalyzeInstDefUse(&*newLoad);
newInsts->emplace_back(std::move(newLoad));
// Build and append Extract
const uint32_t extResultId = TakeNextId();
const uint32_t ptrPteTypeId = GetPointeeTypeId(ptrInst);
std::vector<Operand> ext_in_opnds;
ext_in_opnds.push_back(Operand(spv_operand_type_t::SPV_OPERAND_TYPE_ID,
std::initializer_list<uint32_t>{ldResultId}));
uint32_t iidIdx = 0;
ptrInst->ForEachInId([&iidIdx, &ext_in_opnds, this](const uint32_t* iid) {
if (iidIdx > 0) {
const Instruction* cInst = get_def_use_mgr()->GetDef(*iid);
uint32_t val = cInst->GetSingleWordInOperand(kConstantValueInIdx);
ext_in_opnds.push_back(
Operand(spv_operand_type_t::SPV_OPERAND_TYPE_LITERAL_INTEGER,
std::initializer_list<uint32_t>{val}));
}
++iidIdx;
});
std::unique_ptr<Instruction> newExt(
new Instruction(context(), SpvOpCompositeExtract, ptrPteTypeId,
extResultId, ext_in_opnds));
get_def_use_mgr()->AnalyzeInstDefUse(&*newExt);
newInsts->emplace_back(std::move(newExt));
*resultId = extResultId;
}
bool CommonUniformElimPass::IsConstantIndexAccessChain(Instruction* acp) {
uint32_t inIdx = 0;
return acp->WhileEachInId([&inIdx, this](uint32_t* tid) {
if (inIdx > 0) {
Instruction* opInst = get_def_use_mgr()->GetDef(*tid);
if (opInst->opcode() != SpvOpConstant) return false;
}
++inIdx;
return true;
});
}
bool CommonUniformElimPass::UniformAccessChainConvert(Function* func) {
bool modified = false;
for (auto bi = func->begin(); bi != func->end(); ++bi) {
for (Instruction* inst = &*bi->begin(); inst; inst = inst->NextNode()) {
if (inst->opcode() != SpvOpLoad) continue;
uint32_t varId;
Instruction* ptrInst = GetPtr(inst, &varId);
if (!IsNonPtrAccessChain(ptrInst->opcode())) continue;
// Do not convert nested access chains
if (ptrInst->GetSingleWordInOperand(kAccessChainPtrIdInIdx) != varId)
continue;
if (!IsUniformVar(varId)) continue;
if (!IsConstantIndexAccessChain(ptrInst)) continue;
if (HasUnsupportedDecorates(inst->result_id())) continue;
if (HasUnsupportedDecorates(ptrInst->result_id())) continue;
if (IsVolatileLoad(*inst)) continue;
if (IsAccessChainToVolatileStructType(*ptrInst)) continue;
std::vector<std::unique_ptr<Instruction>> newInsts;
uint32_t replId;
GenACLoadRepl(ptrInst, &newInsts, &replId);
inst = ReplaceAndDeleteLoad(inst, replId, ptrInst);
assert(inst->opcode() != SpvOpPhi);
inst = inst->InsertBefore(std::move(newInsts));
modified = true;
}
}
return modified;
}
void CommonUniformElimPass::ComputeStructuredSuccessors(Function* func) {
block2structured_succs_.clear();
for (auto& blk : *func) {
// If header, make merge block first successor.
uint32_t mbid = blk.MergeBlockIdIfAny();
if (mbid != 0) {
block2structured_succs_[&blk].push_back(cfg()->block(mbid));
uint32_t cbid = blk.ContinueBlockIdIfAny();
if (cbid != 0) {
block2structured_succs_[&blk].push_back(cfg()->block(mbid));
}
}
// add true successors
const auto& const_blk = blk;
const_blk.ForEachSuccessorLabel([&blk, this](const uint32_t sbid) {
block2structured_succs_[&blk].push_back(cfg()->block(sbid));
});
}
}
void CommonUniformElimPass::ComputeStructuredOrder(
Function* func, std::list<BasicBlock*>* order) {
// Compute structured successors and do DFS
ComputeStructuredSuccessors(func);
auto ignore_block = [](cbb_ptr) {};
auto ignore_edge = [](cbb_ptr, cbb_ptr) {};
auto get_structured_successors = [this](const BasicBlock* block) {
return &(block2structured_succs_[block]);
};
// TODO(greg-lunarg): Get rid of const_cast by making moving const
// out of the cfa.h prototypes and into the invoking code.
auto post_order = [&](cbb_ptr b) {
order->push_front(const_cast<BasicBlock*>(b));
};
order->clear();
CFA<BasicBlock>::DepthFirstTraversal(&*func->begin(),
get_structured_successors, ignore_block,
post_order, ignore_edge);
}
bool CommonUniformElimPass::CommonUniformLoadElimination(Function* func) {
// Process all blocks in structured order. This is just one way (the
// simplest?) to keep track of the most recent block outside of control
// flow, used to copy common instructions, guaranteed to dominate all
// following load sites.
std::list<BasicBlock*> structuredOrder;
ComputeStructuredOrder(func, &structuredOrder);
uniform2load_id_.clear();
bool modified = false;
// Find insertion point in first block to copy non-dominating loads.
auto insertItr = func->begin()->begin();
while (insertItr->opcode() == SpvOpVariable ||
insertItr->opcode() == SpvOpNop)
++insertItr;
// Update insertItr until it will not be removed. Without this code,
// ReplaceAndDeleteLoad() can set |insertItr| as a dangling pointer.
while (IsUniformLoadToBeRemoved(&*insertItr)) ++insertItr;
uint32_t mergeBlockId = 0;
for (auto bi = structuredOrder.begin(); bi != structuredOrder.end(); ++bi) {
BasicBlock* bp = *bi;
// Check if we are exiting outermost control construct. If so, remember
// new load insertion point. Trying to keep register pressure down.
if (mergeBlockId == bp->id()) {
mergeBlockId = 0;
insertItr = bp->begin();
while (insertItr->opcode() == SpvOpPhi) {
++insertItr;
}
// Update insertItr until it will not be removed. Without this code,
// ReplaceAndDeleteLoad() can set |insertItr| as a dangling pointer.
while (IsUniformLoadToBeRemoved(&*insertItr)) ++insertItr;
}
for (Instruction* inst = &*bp->begin(); inst; inst = inst->NextNode()) {
if (inst->opcode() != SpvOpLoad) continue;
uint32_t varId;
Instruction* ptrInst = GetPtr(inst, &varId);
if (ptrInst->opcode() != SpvOpVariable) continue;
if (!IsUniformVar(varId)) continue;
if (IsSamplerOrImageVar(varId)) continue;
if (HasUnsupportedDecorates(inst->result_id())) continue;
if (IsVolatileLoad(*inst)) continue;
uint32_t replId;
const auto uItr = uniform2load_id_.find(varId);
if (uItr != uniform2load_id_.end()) {
replId = uItr->second;
} else {
if (mergeBlockId == 0) {
// Load is in dominating block; just remember it
uniform2load_id_[varId] = inst->result_id();
continue;
} else {
// Copy load into most recent dominating block and remember it
replId = TakeNextId();
std::unique_ptr<Instruction> newLoad(new Instruction(
context(), SpvOpLoad, inst->type_id(), replId,
{{spv_operand_type_t::SPV_OPERAND_TYPE_ID, {varId}}}));
get_def_use_mgr()->AnalyzeInstDefUse(&*newLoad);
insertItr = insertItr.InsertBefore(std::move(newLoad));
++insertItr;
uniform2load_id_[varId] = replId;
}
}
inst = ReplaceAndDeleteLoad(inst, replId, ptrInst);
modified = true;
}
// If we are outside of any control construct and entering one, remember
// the id of the merge block
if (mergeBlockId == 0) {
mergeBlockId = bp->MergeBlockIdIfAny();
}
}
return modified;
}
bool CommonUniformElimPass::CommonUniformLoadElimBlock(Function* func) {
bool modified = false;
for (auto& blk : *func) {
uniform2load_id_.clear();
for (Instruction* inst = &*blk.begin(); inst; inst = inst->NextNode()) {
if (inst->opcode() != SpvOpLoad) continue;
uint32_t varId;
Instruction* ptrInst = GetPtr(inst, &varId);
if (ptrInst->opcode() != SpvOpVariable) continue;
if (!IsUniformVar(varId)) continue;
if (!IsSamplerOrImageVar(varId)) continue;
if (HasUnsupportedDecorates(inst->result_id())) continue;
if (IsVolatileLoad(*inst)) continue;
uint32_t replId;
const auto uItr = uniform2load_id_.find(varId);
if (uItr != uniform2load_id_.end()) {
replId = uItr->second;
} else {
uniform2load_id_[varId] = inst->result_id();
continue;
}
inst = ReplaceAndDeleteLoad(inst, replId, ptrInst);
modified = true;
}
}
return modified;
}
bool CommonUniformElimPass::CommonExtractElimination(Function* func) {
// Find all composite ids with duplicate extracts.
for (auto bi = func->begin(); bi != func->end(); ++bi) {
for (auto ii = bi->begin(); ii != bi->end(); ++ii) {
if (ii->opcode() != SpvOpCompositeExtract) continue;
// TODO(greg-lunarg): Support multiple indices
if (ii->NumInOperands() > 2) continue;
if (HasUnsupportedDecorates(ii->result_id())) continue;
uint32_t compId = ii->GetSingleWordInOperand(kExtractCompositeIdInIdx);
uint32_t idx = ii->GetSingleWordInOperand(kExtractIdx0InIdx);
comp2idx2inst_[compId][idx].push_back(&*ii);
}
}
// For all defs of ids with duplicate extracts, insert new extracts
// after def, and replace and delete old extracts
bool modified = false;
for (auto bi = func->begin(); bi != func->end(); ++bi) {
for (auto ii = bi->begin(); ii != bi->end(); ++ii) {
const auto cItr = comp2idx2inst_.find(ii->result_id());
if (cItr == comp2idx2inst_.end()) continue;
for (auto idxItr : cItr->second) {
if (idxItr.second.size() < 2) continue;
uint32_t replId = TakeNextId();
std::unique_ptr<Instruction> newExtract(
idxItr.second.front()->Clone(context()));
newExtract->SetResultId(replId);
get_def_use_mgr()->AnalyzeInstDefUse(&*newExtract);
++ii;
ii = ii.InsertBefore(std::move(newExtract));
for (auto instItr : idxItr.second) {
uint32_t resId = instItr->result_id();
context()->KillNamesAndDecorates(resId);
(void)context()->ReplaceAllUsesWith(resId, replId);
context()->KillInst(instItr);
}
modified = true;
}
}
}
return modified;
}
bool CommonUniformElimPass::EliminateCommonUniform(Function* func) {
bool modified = false;
modified |= UniformAccessChainConvert(func);
modified |= CommonUniformLoadElimination(func);
modified |= CommonExtractElimination(func);
modified |= CommonUniformLoadElimBlock(func);
return modified;
}
void CommonUniformElimPass::Initialize() {
// Clear collections.
comp2idx2inst_.clear();
// Initialize extension whitelist
InitExtensions();
}
bool CommonUniformElimPass::AllExtensionsSupported() const {
// If any extension not in whitelist, return false
for (auto& ei : get_module()->extensions()) {
const char* extName =
reinterpret_cast<const char*>(&ei.GetInOperand(0).words[0]);
if (extensions_whitelist_.find(extName) == extensions_whitelist_.end())
return false;
}
return true;
}
Pass::Status CommonUniformElimPass::ProcessImpl() {
// Assumes all control flow structured.
// TODO(greg-lunarg): Do SSA rewrite for non-structured control flow
if (!context()->get_feature_mgr()->HasCapability(SpvCapabilityShader))
return Status::SuccessWithoutChange;
// Assumes logical addressing only
// TODO(greg-lunarg): Add support for physical addressing
if (context()->get_feature_mgr()->HasCapability(SpvCapabilityAddresses))
return Status::SuccessWithoutChange;
if (context()->get_feature_mgr()->HasCapability(
SpvCapabilityVariablePointersStorageBuffer))
return Status::SuccessWithoutChange;
// Do not process if any disallowed extensions are enabled
if (!AllExtensionsSupported()) return Status::SuccessWithoutChange;
// Do not process if module contains OpGroupDecorate. Additional
// support required in KillNamesAndDecorates().
// TODO(greg-lunarg): Add support for OpGroupDecorate
for (auto& ai : get_module()->annotations())
if (ai.opcode() == SpvOpGroupDecorate) return Status::SuccessWithoutChange;
// If non-32-bit integer type in module, terminate processing
// TODO(): Handle non-32-bit integer constants in access chains
for (const Instruction& inst : get_module()->types_values())
if (inst.opcode() == SpvOpTypeInt &&
inst.GetSingleWordInOperand(kTypeIntWidthInIdx) != 32)
return Status::SuccessWithoutChange;
// Process entry point functions
ProcessFunction pfn = [this](Function* fp) {
return EliminateCommonUniform(fp);
};
bool modified = context()->ProcessEntryPointCallTree(pfn);
return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
}
CommonUniformElimPass::CommonUniformElimPass() = default;
Pass::Status CommonUniformElimPass::Process() {
Initialize();
return ProcessImpl();
}
void CommonUniformElimPass::InitExtensions() {
extensions_whitelist_.clear();
extensions_whitelist_.insert({
"SPV_AMD_shader_explicit_vertex_parameter",
"SPV_AMD_shader_trinary_minmax",
"SPV_AMD_gcn_shader",
"SPV_KHR_shader_ballot",
"SPV_AMD_shader_ballot",
"SPV_AMD_gpu_shader_half_float",
"SPV_KHR_shader_draw_parameters",
"SPV_KHR_subgroup_vote",
"SPV_KHR_16bit_storage",
"SPV_KHR_device_group",
"SPV_KHR_multiview",
"SPV_NVX_multiview_per_view_attributes",
"SPV_NV_viewport_array2",
"SPV_NV_stereo_view_rendering",
"SPV_NV_sample_mask_override_coverage",
"SPV_NV_geometry_shader_passthrough",
"SPV_AMD_texture_gather_bias_lod",
"SPV_KHR_storage_buffer_storage_class",
// SPV_KHR_variable_pointers
// Currently do not support extended pointer expressions
"SPV_AMD_gpu_shader_int16",
"SPV_KHR_post_depth_coverage",
"SPV_KHR_shader_atomic_counter_ops",
"SPV_EXT_shader_stencil_export",
"SPV_EXT_shader_viewport_index_layer",
"SPV_AMD_shader_image_load_store_lod",
"SPV_AMD_shader_fragment_mask",
"SPV_EXT_fragment_fully_covered",
"SPV_AMD_gpu_shader_half_float_fetch",
"SPV_GOOGLE_decorate_string",
"SPV_GOOGLE_hlsl_functionality1",
"SPV_GOOGLE_user_type",
"SPV_NV_shader_subgroup_partitioned",
"SPV_EXT_descriptor_indexing",
"SPV_NV_fragment_shader_barycentric",
"SPV_NV_compute_shader_derivatives",
"SPV_NV_shader_image_footprint",
"SPV_NV_shading_rate",
"SPV_NV_mesh_shader",
"SPV_NV_ray_tracing",
"SPV_EXT_fragment_invocation_density",
});
}
} // namespace opt
} // namespace spvtools

View File

@ -1,213 +0,0 @@
// Copyright (c) 2016 The Khronos Group Inc.
// Copyright (c) 2016 Valve Corporation
// Copyright (c) 2016 LunarG Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SOURCE_OPT_COMMON_UNIFORM_ELIM_PASS_H_
#define SOURCE_OPT_COMMON_UNIFORM_ELIM_PASS_H_
#include <algorithm>
#include <list>
#include <map>
#include <memory>
#include <queue>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "source/opt/basic_block.h"
#include "source/opt/decoration_manager.h"
#include "source/opt/def_use_manager.h"
#include "source/opt/ir_context.h"
#include "source/opt/module.h"
#include "source/opt/pass.h"
namespace spvtools {
namespace opt {
// See optimizer.hpp for documentation.
class CommonUniformElimPass : public Pass {
using cbb_ptr = const BasicBlock*;
public:
using GetBlocksFunction =
std::function<std::vector<BasicBlock*>*(const BasicBlock*)>;
CommonUniformElimPass();
const char* name() const override { return "eliminate-common-uniform"; }
Status Process() override;
private:
// Returns true if |opcode| is a non-ptr access chain op
bool IsNonPtrAccessChain(const SpvOp opcode) const;
// Returns true if |typeInst| is a sampler or image type or a struct
// containing one, recursively.
bool IsSamplerOrImageType(const Instruction* typeInst) const;
// Returns true if |varId| is a variable containing a sampler or image.
bool IsSamplerOrImageVar(uint32_t varId) const;
// Given a load or store pointed at by |ip|, return the top-most
// non-CopyObj in its pointer operand. Also return the base pointer
// in |objId|.
Instruction* GetPtr(Instruction* ip, uint32_t* objId);
// Return true if variable is uniform
bool IsUniformVar(uint32_t varId);
// Given the type id for a struct type, checks if the struct type
// or any struct member is volatile decorated
bool IsVolatileStruct(uint32_t type_id);
// Given an OpAccessChain instruction, return true
// if the accessed variable belongs to a volatile
// decorated object or member of a struct type
bool IsAccessChainToVolatileStructType(const Instruction& AccessChainInst);
// Given an OpLoad instruction, return true if
// OpLoad has a Volatile Memory Access flag or if
// the resulting type is a volatile decorated struct
bool IsVolatileLoad(const Instruction& loadInst);
// Return true if any uses of |id| are decorate ops.
bool HasUnsupportedDecorates(uint32_t id) const;
// Return true if all uses of |id| are only name or decorate ops.
bool HasOnlyNamesAndDecorates(uint32_t id) const;
// Delete inst if it has no uses. Assumes inst has a resultId.
void DeleteIfUseless(Instruction* inst);
// Replace all instances of load's id with replId and delete load
// and its access chain, if any
Instruction* ReplaceAndDeleteLoad(Instruction* loadInst, uint32_t replId,
Instruction* ptrInst);
// For the (constant index) access chain ptrInst, create an
// equivalent load and extract
void GenACLoadRepl(const Instruction* ptrInst,
std::vector<std::unique_ptr<Instruction>>* newInsts,
uint32_t* resultId);
// Return true if all indices are constant
bool IsConstantIndexAccessChain(Instruction* acp);
// Convert all uniform access chain loads into load/extract.
bool UniformAccessChainConvert(Function* func);
// Compute structured successors for function |func|.
// A block's structured successors are the blocks it branches to
// together with its declared merge block if it has one.
// When order matters, the merge block always appears first.
// This assures correct depth first search in the presence of early
// returns and kills. If the successor vector contain duplicates
// if the merge block, they are safely ignored by DFS.
//
// TODO(dnovillo): This pass computes structured successors slightly different
// than the implementation in class Pass. Can this be re-factored?
void ComputeStructuredSuccessors(Function* func);
// Compute structured block order for |func| into |structuredOrder|. This
// order has the property that dominators come before all blocks they
// dominate and merge blocks come after all blocks that are in the control
// constructs of their header.
//
// TODO(dnovillo): This pass computes structured order slightly different
// than the implementation in class Pass. Can this be re-factored?
void ComputeStructuredOrder(Function* func, std::list<BasicBlock*>* order);
// Eliminate loads of uniform variables which have previously been loaded.
// If first load is in control flow, move it to first block of function.
// Most effective if preceded by UniformAccessChainRemoval().
bool CommonUniformLoadElimination(Function* func);
// Eliminate loads of uniform sampler and image variables which have
// previously
// been loaded in the same block for types whose loads cannot cross blocks.
bool CommonUniformLoadElimBlock(Function* func);
// Eliminate duplicated extracts of same id. Extract may be moved to same
// block as the id definition. This is primarily intended for extracts
// from uniform loads. Most effective if preceded by
// CommonUniformLoadElimination().
bool CommonExtractElimination(Function* func);
// For function |func|, first change all uniform constant index
// access chain loads into equivalent composite extracts. Then consolidate
// identical uniform loads into one uniform load. Finally, consolidate
// identical uniform extracts into one uniform extract. This may require
// moving a load or extract to a point which dominates all uses.
// Return true if func is modified.
//
// This pass requires the function to have structured control flow ie shader
// capability. It also requires logical addressing ie Addresses capability
// is not enabled. It also currently does not support any extensions.
//
// This function currently only optimizes loads with a single index.
bool EliminateCommonUniform(Function* func);
// Initialize extensions whitelist
void InitExtensions();
// Return true if all extensions in this module are allowed by this pass.
bool AllExtensionsSupported() const;
// Return true if |op| is a decorate for non-type instruction
inline bool IsNonTypeDecorate(uint32_t op) const {
return (op == SpvOpDecorate || op == SpvOpDecorateId);
}
// Return true if |inst| is an instruction that loads uniform variable and
// can be replaced with other uniform load instruction.
bool IsUniformLoadToBeRemoved(Instruction* inst) {
if (inst->opcode() == SpvOpLoad) {
uint32_t varId;
Instruction* ptrInst = GetPtr(inst, &varId);
if (ptrInst->opcode() == SpvOpVariable && IsUniformVar(varId) &&
!IsSamplerOrImageVar(varId) &&
!HasUnsupportedDecorates(inst->result_id()) && !IsVolatileLoad(*inst))
return true;
}
return false;
}
void Initialize();
Pass::Status ProcessImpl();
// Map from uniform variable id to its common load id
std::unordered_map<uint32_t, uint32_t> uniform2load_id_;
// Map of extract composite ids to map of indices to insts
// TODO(greg-lunarg): Consider std::vector.
std::unordered_map<uint32_t,
std::unordered_map<uint32_t, std::list<Instruction*>>>
comp2idx2inst_;
// Extensions supported by this pass.
std::unordered_set<std::string> extensions_whitelist_;
// Map from block to its structured successor blocks. See
// ComputeStructuredSuccessors() for definition.
std::unordered_map<const BasicBlock*, std::vector<BasicBlock*>>
block2structured_succs_;
};
} // namespace opt
} // namespace spvtools
#endif // SOURCE_OPT_COMMON_UNIFORM_ELIM_PASS_H_

View File

@ -629,39 +629,12 @@ bool InlinePass::GenInlineCode(
return true;
}
bool InlinePass::IsInlinableFunctionCall(Instruction* inst) {
bool InlinePass::IsInlinableFunctionCall(const Instruction* inst) {
if (inst->opcode() != SpvOp::SpvOpFunctionCall) return false;
const uint32_t calleeFnId =
inst->GetSingleWordOperand(kSpvFunctionCallFunctionId);
const auto ci = inlinable_.find(calleeFnId);
if (ci == inlinable_.cend()) {
return false;
}
if (funcs_with_opkill_.count(calleeFnId) == 0) {
return true;
}
// We cannot inline into a continue construct if the function has an OpKill.
auto* cfg_analysis = context()->GetStructuredCFGAnalysis();
BasicBlock* bb = context()->get_instr_block(inst);
uint32_t loop_header_id = cfg_analysis->ContainingLoop(bb->id());
if (loop_header_id == 0) {
// Not in a loop, so we can inline.
return true;
}
BasicBlock* loop_header_bb = context()->get_instr_block(loop_header_id);
uint32_t loop_continue =
loop_header_bb->GetLoopMergeInst()->GetSingleWordOperand(1);
Function* caller_func = bb->GetParent();
DominatorAnalysis* dom = context()->GetDominatorAnalysis(caller_func);
if (dom->Dominates(loop_continue, bb->id())) {
// The function call is the continue construct and the callee contains an
// OpKill.
return false;
}
return true;
return ci != inlinable_.cend();
}
void InlinePass::UpdateSucceedingPhis(
@ -738,9 +711,6 @@ bool InlinePass::IsInlinableFunction(Function* func) {
// the returns as a branch to the loop's merge block. However, this can only
// done validly if the return was not in a loop in the original function.
// Also remember functions with multiple (early) returns.
// Do not inline functions with an OpKill because they may be inlined into a
// continue construct.
AnalyzeReturns(func);
if (no_return_in_loop_.find(func->result_id()) == no_return_in_loop_.cend()) {
return false;
@ -771,13 +741,6 @@ void InlinePass::InitializeInline() {
}
// Compute inlinability
if (IsInlinableFunction(&fn)) inlinable_.insert(fn.result_id());
bool has_opkill = !fn.WhileEachInst(
[](Instruction* inst) { return inst->opcode() != SpvOpKill; });
if (has_opkill) {
funcs_with_opkill_.insert(fn.result_id());
}
}
}

View File

@ -122,7 +122,7 @@ class InlinePass : public Pass {
UptrVectorIterator<BasicBlock> call_block_itr);
// Return true if |inst| is a function call that can be inlined.
bool IsInlinableFunctionCall(Instruction* inst);
bool IsInlinableFunctionCall(const Instruction* inst);
// Return true if |func| does not have a return that is
// nested in a structured if, switch or loop.
@ -159,9 +159,6 @@ class InlinePass : public Pass {
// Set of ids of functions with no returns in loop
std::set<uint32_t> no_return_in_loop_;
// Set of ids of functions with no returns in loop
std::unordered_set<uint32_t> funcs_with_opkill_;
// Set of ids of inlinable functions
std::set<uint32_t> inlinable_;

View File

@ -372,6 +372,11 @@ uint32_t Instruction::GetTypeComponent(uint32_t element) const {
return subtype;
}
Instruction* Instruction::InsertBefore(std::unique_ptr<Instruction>&& i) {
i.get()->InsertBefore(this);
return i.release();
}
Instruction* Instruction::InsertBefore(
std::vector<std::unique_ptr<Instruction>>&& list) {
Instruction* first_node = list.front().get();
@ -382,11 +387,6 @@ Instruction* Instruction::InsertBefore(
return first_node;
}
Instruction* Instruction::InsertBefore(std::unique_ptr<Instruction>&& i) {
i.get()->InsertBefore(this);
return i.release();
}
bool Instruction::IsValidBasePointer() const {
uint32_t tid = type_id();
if (tid == 0) {

View File

@ -398,8 +398,13 @@ class Instruction : public utils::IntrusiveNodeBase<Instruction> {
inline bool operator!=(const Instruction&) const;
inline bool operator<(const Instruction&) const;
Instruction* InsertBefore(std::vector<std::unique_ptr<Instruction>>&& list);
// Takes ownership of the instruction owned by |i| and inserts it immediately
// before |this|. Returns the insterted instruction.
Instruction* InsertBefore(std::unique_ptr<Instruction>&& i);
// Takes ownership of the instructions in |list| and inserts them in order
// immediately before |this|. Returns the first inserted instruction.
// Assumes the list is non-empty.
Instruction* InsertBefore(std::vector<std::unique_ptr<Instruction>>&& list);
using utils::IntrusiveNodeBase<Instruction>::InsertBefore;
// Returns true if |this| is an instruction defining a constant, but not a

View File

@ -264,6 +264,28 @@ void InstrumentPass::GenStageStreamWriteCode(uint32_t stage_idx,
GenFragCoordEltDebugOutputCode(
base_offset_id, uint_frag_coord_inst->result_id(), u, builder);
} break;
case SpvExecutionModelRayGenerationNV:
case SpvExecutionModelIntersectionNV:
case SpvExecutionModelAnyHitNV:
case SpvExecutionModelClosestHitNV:
case SpvExecutionModelMissNV:
case SpvExecutionModelCallableNV: {
// Load and store LaunchIdNV.
uint32_t launch_id = GenVarLoad(
context()->GetBuiltinInputVarId(SpvBuiltInLaunchIdNV), builder);
Instruction* x_launch_inst = builder->AddIdLiteralOp(
GetUintId(), SpvOpCompositeExtract, launch_id, 0);
Instruction* y_launch_inst = builder->AddIdLiteralOp(
GetUintId(), SpvOpCompositeExtract, launch_id, 1);
Instruction* z_launch_inst = builder->AddIdLiteralOp(
GetUintId(), SpvOpCompositeExtract, launch_id, 2);
GenDebugOutputFieldCode(base_offset_id, kInstRayTracingOutLaunchIdX,
x_launch_inst->result_id(), builder);
GenDebugOutputFieldCode(base_offset_id, kInstRayTracingOutLaunchIdY,
y_launch_inst->result_id(), builder);
GenDebugOutputFieldCode(base_offset_id, kInstRayTracingOutLaunchIdZ,
z_launch_inst->result_id(), builder);
} break;
default: { assert(false && "unsupported stage"); } break;
}
}
@ -843,7 +865,12 @@ bool InstrumentPass::InstProcessEntryPointCallTree(InstProcessFunction& pfn) {
stage != SpvExecutionModelGeometry &&
stage != SpvExecutionModelGLCompute &&
stage != SpvExecutionModelTessellationControl &&
stage != SpvExecutionModelTessellationEvaluation)
stage != SpvExecutionModelTessellationEvaluation &&
stage != SpvExecutionModelRayGenerationNV &&
stage != SpvExecutionModelIntersectionNV &&
stage != SpvExecutionModelAnyHitNV &&
stage != SpvExecutionModelClosestHitNV &&
stage != SpvExecutionModelMissNV && stage != SpvExecutionModelCallableNV)
return false;
// Add together the roots of all entry points
std::queue<uint32_t> roots;

View File

@ -683,7 +683,8 @@ uint32_t IRContext::GetBuiltinInputVarId(uint32_t builtin) {
reg_type = type_mgr->GetRegisteredType(&uint_ty);
break;
}
case SpvBuiltInGlobalInvocationId: {
case SpvBuiltInGlobalInvocationId:
case SpvBuiltInLaunchIdNV: {
analysis::Integer uint_ty(32, false);
analysis::Type* reg_uint_ty = type_mgr->GetRegisteredType(&uint_ty);
analysis::Vector v3uint_ty(reg_uint_ty, 3);

View File

@ -224,6 +224,7 @@ void MergeReturnPass::BranchToBlock(BasicBlock* block, uint32_t target) {
return_inst->SetOpcode(SpvOpBranch);
return_inst->ReplaceOperands({{SPV_OPERAND_TYPE_ID, {target}}});
context()->get_def_use_mgr()->AnalyzeInstDefUse(return_inst);
new_edges_[target_block].insert(block->id());
cfg()->AddEdge(block->id(), target);
}
@ -236,28 +237,18 @@ void MergeReturnPass::UpdatePhiNodes(BasicBlock* new_source,
context()->UpdateDefUse(inst);
});
const auto& target_pred = cfg()->preds(target->id());
if (target_pred.size() == 1) {
MarkForNewPhiNodes(target, context()->get_instr_block(target_pred[0]));
} else {
// If the loop contained a break and a return, OpPhi instructions may be
// required starting from the dominator of the loop merge.
DominatorAnalysis* dom_tree =
context()->GetDominatorAnalysis(target->GetParent());
auto idom = dom_tree->ImmediateDominator(target);
if (idom) {
MarkForNewPhiNodes(target, idom);
}
}
// Store the immediate dominator for this block in case new phi nodes will be
// needed later.
RecordImmediateDominator(target);
}
void MergeReturnPass::CreatePhiNodesForInst(BasicBlock* merge_block,
Instruction& inst) {
DominatorAnalysis* dom_tree =
context()->GetDominatorAnalysis(merge_block->GetParent());
BasicBlock* inst_bb = context()->get_instr_block(&inst);
if (inst.result_id() != 0) {
BasicBlock* inst_bb = context()->get_instr_block(&inst);
std::vector<Instruction*> users_to_update;
context()->get_def_use_mgr()->ForEachUser(
&inst,
@ -295,12 +286,13 @@ void MergeReturnPass::CreatePhiNodesForInst(BasicBlock* merge_block,
IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
uint32_t undef_id = Type2Undef(inst.type_id());
std::vector<uint32_t> phi_operands;
const std::set<uint32_t>& new_edges = new_edges_[merge_block];
// Add the OpPhi operands. If the predecessor is a return block use undef,
// otherwise use |inst|'s id.
std::vector<uint32_t> preds = cfg()->preds(merge_block->id());
for (uint32_t pred_id : preds) {
if (return_blocks_.count(pred_id)) {
if (new_edges.count(pred_id)) {
phi_operands.push_back(undef_id);
} else {
phi_operands.push_back(inst.result_id());
@ -417,6 +409,8 @@ bool MergeReturnPass::BreakFromConstruct(
auto old_body_id = TakeNextId();
BasicBlock* old_body = block->SplitBasicBlock(context(), old_body_id, iter);
predicated->insert(old_body);
cfg()->AddEdges(old_body);
// If a return block is being split, mark the new body block also as a return
// block.
if (return_blocks_.count(block->id())) {
@ -456,14 +450,15 @@ bool MergeReturnPass::BreakFromConstruct(
builder.AddConditionalBranch(load_id, merge_block->id(), old_body->id(),
old_body->id());
// 3. Update OpPhi instructions in |merge_block|.
BasicBlock* merge_original_pred = MarkedSinglePred(merge_block);
if (merge_original_pred == nullptr) {
UpdatePhiNodes(block, merge_block);
} else if (merge_original_pred == block) {
MarkForNewPhiNodes(merge_block, old_body);
if (!new_edges_[merge_block].insert(block->id()).second) {
// It is possible that we already inserted a new edge to the merge block.
// If so, that edge now goes from |old_body| to |merge_block|.
new_edges_[merge_block].insert(old_body->id());
}
// 3. Update OpPhi instructions in |merge_block|.
UpdatePhiNodes(block, merge_block);
// 4. Update the CFG. We do this after updating the OpPhi instructions
// because |UpdatePhiNodes| assumes the edge from |block| has not been added
// to the CFG yet.
@ -659,26 +654,37 @@ void MergeReturnPass::MergeReturnBlocks(
}
void MergeReturnPass::AddNewPhiNodes() {
DominatorAnalysis* dom_tree = context()->GetDominatorAnalysis(function_);
std::list<BasicBlock*> order;
cfg()->ComputeStructuredOrder(function_, &*function_->begin(), &order);
for (BasicBlock* bb : order) {
BasicBlock* dominator = dom_tree->ImmediateDominator(bb);
if (dominator) {
AddNewPhiNodes(bb, new_merge_nodes_[bb], dominator->id());
}
AddNewPhiNodes(bb);
}
}
void MergeReturnPass::AddNewPhiNodes(BasicBlock* bb, BasicBlock* pred,
uint32_t header_id) {
DominatorAnalysis* dom_tree = context()->GetDominatorAnalysis(function_);
// Insert as a stopping point. We do not have to add anything in the block
// or above because the header dominates |bb|.
void MergeReturnPass::AddNewPhiNodes(BasicBlock* bb) {
// New phi nodes are needed for any id whose definition used to dominate |bb|,
// but no longer dominates |bb|. These are found by walking the dominator
// tree starting at the original immediate dominator of |bb| and ending at its
// current dominator.
BasicBlock* current_bb = pred;
while (current_bb != nullptr && current_bb->id() != header_id) {
// Because we are walking the updated dominator tree it is important that the
// new phi nodes for the original dominators of |bb| have already been added.
// Otherwise some ids might be missed. Consider the case where bb1 dominates
// bb2, and bb2 dominates bb3. Suppose there are changes such that bb1 no
// longer dominates bb2 and the same for bb2 and bb3. This algorithm will not
// look at the ids defined in bb1. However, calling |AddNewPhiNodes(bb2)|
// first will add a phi node in bb2 for that value. Then a call to
// |AddNewPhiNodes(bb3)| will process that value by processing the phi in bb2.
DominatorAnalysis* dom_tree = context()->GetDominatorAnalysis(function_);
BasicBlock* dominator = dom_tree->ImmediateDominator(bb);
if (dominator == nullptr) {
return;
}
BasicBlock* current_bb = new_merge_nodes_[bb];
while (current_bb != nullptr && current_bb != dominator) {
for (Instruction& inst : *current_bb) {
CreatePhiNodesForInst(bb, inst);
}
@ -686,9 +692,11 @@ void MergeReturnPass::AddNewPhiNodes(BasicBlock* bb, BasicBlock* pred,
}
}
void MergeReturnPass::MarkForNewPhiNodes(BasicBlock* block,
BasicBlock* single_original_pred) {
new_merge_nodes_[block] = single_original_pred;
void MergeReturnPass::RecordImmediateDominator(BasicBlock* block) {
DominatorAnalysis* dom_tree =
context()->GetDominatorAnalysis(block->GetParent());
auto idom = dom_tree->ImmediateDominator(block);
new_merge_nodes_[block] = idom;
}
void MergeReturnPass::InsertAfterElement(BasicBlock* element,

View File

@ -251,31 +251,19 @@ class MergeReturnPass : public MemPass {
// there are no unreachable blocks in the control flow graph.
void AddNewPhiNodes();
// Creates any new phi nodes that are needed in |bb| now that |pred| is no
// longer the only block that preceedes |bb|. |header_id| is the id of the
// basic block for the loop or selection construct that merges at |bb|.
void AddNewPhiNodes(BasicBlock* bb, BasicBlock* pred, uint32_t header_id);
// Creates any new phi nodes that are needed in |bb|. |AddNewPhiNodes| must
// have already been called on the original dominators of |bb|.
void AddNewPhiNodes(BasicBlock* bb);
// Saves |block| to a list of basic block that will require OpPhi nodes to be
// added by calling |AddNewPhiNodes|. It is assumed that |block| used to have
// a single predecessor, |single_original_pred|, but now has more.
void MarkForNewPhiNodes(BasicBlock* block, BasicBlock* single_original_pred);
// Return the original single predcessor of |block| if it was flagged as
// having a single predecessor. |nullptr| is returned otherwise.
BasicBlock* MarkedSinglePred(BasicBlock* block) {
auto it = new_merge_nodes_.find(block);
if (it != new_merge_nodes_.end()) {
return it->second;
} else {
return nullptr;
}
}
void RecordImmediateDominator(BasicBlock* block);
// Modifies existing OpPhi instruction in |target| block to account for the
// new edge from |new_source|. The value for that edge will be an Undef. If
// |target| only had a single predecessor, then it is marked as needing new
// phi nodes. See |MarkForNewPhiNodes|.
// phi nodes. See |RecordImmediateDominator|.
//
// The CFG must not include the edge from |new_source| to |target| yet.
void UpdatePhiNodes(BasicBlock* new_source, BasicBlock* target);
@ -301,6 +289,11 @@ class MergeReturnPass : public MemPass {
// |merge_target| as the merge node.
void CreateDummyLoop(BasicBlock* merge_target);
// Returns true if |function| has an unreachable block that is not a continue
// target that simply branches back to the header, or a merge block containing
// 1 instruction which is OpUnreachable.
bool HasNontrivialUnreachableBlocks(Function* function);
// A stack used to keep track of the innermost contain loop and selection
// constructs.
std::vector<StructuredControlState> state_;
@ -324,12 +317,13 @@ class MergeReturnPass : public MemPass {
// after processing the current function.
BasicBlock* final_return_block_;
// This map contains the set of nodes that use to have a single predcessor,
// but now have more. They will need new OpPhi nodes. For each of the nodes,
// it is mapped to it original single predcessor. It is assumed there are no
// values that will need a phi on the new edges.
// This is a map from a node to its original immediate dominator. This is
// used to determine which values will require a new phi node.
std::unordered_map<BasicBlock*, BasicBlock*> new_merge_nodes_;
bool HasNontrivialUnreachableBlocks(Function* function);
// A map from a basic block, bb, to the set of basic blocks which represent
// the new edges that reach |bb|.
std::unordered_map<BasicBlock*, std::set<uint32_t>> new_edges_;
// Contains all return blocks that are merged. This is set is populated while
// processing structured blocks and used to properly construct OpPhi

View File

@ -186,8 +186,6 @@ Optimizer& Optimizer::RegisterPerformancePasses() {
.RegisterPass(CreateDeadBranchElimPass())
.RegisterPass(CreateBlockMergePass())
.RegisterPass(CreateSimplificationPass());
// Currently exposing driver bugs resulting in crashes (#946)
// .RegisterPass(CreateCommonUniformElimPass())
}
Optimizer& Optimizer::RegisterSizePasses() {
@ -215,8 +213,6 @@ Optimizer& Optimizer::RegisterSizePasses() {
.RegisterPass(CreateDeadInsertElimPass())
.RegisterPass(CreateRedundancyEliminationPass())
.RegisterPass(CreateCFGCleanupPass())
// Currently exposing driver bugs resulting in crashes (#946)
// .RegisterPass(CreateCommonUniformElimPass())
.RegisterPass(CreateAggressiveDCEPass());
}
@ -340,8 +336,6 @@ bool Optimizer::RegisterPassFromFlag(const std::string& flag) {
RegisterPass(CreateEliminateDeadFunctionsPass());
} else if (pass_name == "eliminate-local-multi-store") {
RegisterPass(CreateLocalMultiStoreElimPass());
} else if (pass_name == "eliminate-common-uniform") {
RegisterPass(CreateCommonUniformElimPass());
} else if (pass_name == "eliminate-dead-const") {
RegisterPass(CreateEliminateDeadConstantPass());
} else if (pass_name == "eliminate-dead-inserts") {
@ -713,11 +707,6 @@ Optimizer::PassToken CreateRedundantLineInfoElimPass() {
MakeUnique<opt::ProcessLinesPass>(opt::kLinesEliminateDeadLines));
}
Optimizer::PassToken CreateCommonUniformElimPass() {
return MakeUnique<Optimizer::PassToken::Impl>(
MakeUnique<opt::CommonUniformElimPass>());
}
Optimizer::PassToken CreateCompactIdsPass() {
return MakeUnique<Optimizer::PassToken::Impl>(
MakeUnique<opt::CompactIdsPass>());

View File

@ -23,7 +23,6 @@
#include "source/opt/cfg_cleanup_pass.h"
#include "source/opt/code_sink.h"
#include "source/opt/combine_access_chains.h"
#include "source/opt/common_uniform_elim_pass.h"
#include "source/opt/compact_ids_pass.h"
#include "source/opt/copy_prop_arrays.h"
#include "source/opt/dead_branch_elim_pass.h"

View File

@ -190,6 +190,35 @@ spv_result_t ValidateTypeRuntimeArray(ValidationState_t& _,
return SPV_SUCCESS;
}
bool ContainsOpaqueType(ValidationState_t& _, const Instruction* str) {
const size_t elem_type_index = 1;
uint32_t elem_type_id;
Instruction* elem_type;
if (spvOpcodeIsBaseOpaqueType(str->opcode())) {
return true;
}
switch (str->opcode()) {
case SpvOpTypeArray:
case SpvOpTypeRuntimeArray:
elem_type_id = str->GetOperandAs<uint32_t>(elem_type_index);
elem_type = _.FindDef(elem_type_id);
return ContainsOpaqueType(_, elem_type);
case SpvOpTypeStruct:
for (size_t member_type_index = 1;
member_type_index < str->operands().size(); ++member_type_index) {
auto member_type_id = str->GetOperandAs<uint32_t>(member_type_index);
auto member_type = _.FindDef(member_type_id);
if (ContainsOpaqueType(_, member_type)) return true;
}
break;
default:
break;
}
return false;
}
spv_result_t ValidateTypeStruct(ValidationState_t& _, const Instruction* inst) {
const uint32_t struct_id = inst->GetOperandAs<uint32_t>(0);
for (size_t member_type_index = 1;
@ -289,6 +318,14 @@ spv_result_t ValidateTypeStruct(ValidationState_t& _, const Instruction* inst) {
if (num_builtin_members > 0) {
_.RegisterStructTypeWithBuiltInMember(struct_id);
}
if (spvIsVulkanEnv(_.context()->target_env) &&
!_.options()->before_hlsl_legalization && ContainsOpaqueType(_, inst)) {
return _.diag(SPV_ERROR_INVALID_ID, inst)
<< "In " << spvLogStringForEnv(_.context()->target_env)
<< ", OpTypeStruct must not contain an opaque type.";
}
return SPV_SUCCESS;
}

View File

@ -24,7 +24,6 @@ add_spvtools_unittest(TARGET opt
cfg_test.cpp
code_sink_test.cpp
combine_access_chains_test.cpp
common_uniform_elim_test.cpp
compact_ids_test.cpp
constant_manager_test.cpp
copy_prop_array_test.cpp

File diff suppressed because it is too large Load Diff

View File

@ -3112,121 +3112,6 @@ OpFunctionEnd
SinglePassRunAndCheck<InlineExhaustivePass>(test, test, false, true);
}
TEST_F(InlineTest, DontInlineFuncWithOpKill) {
const std::string test =
R"(OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %main "main"
OpExecutionMode %main OriginUpperLeft
OpSource GLSL 330
OpName %main "main"
OpName %kill_ "kill("
%void = OpTypeVoid
%3 = OpTypeFunction %void
%bool = OpTypeBool
%true = OpConstantTrue %bool
%main = OpFunction %void None %3
%5 = OpLabel
OpBranch %9
%9 = OpLabel
OpLoopMerge %11 %12 None
OpBranch %13
%13 = OpLabel
OpBranchConditional %true %10 %11
%10 = OpLabel
OpBranch %12
%12 = OpLabel
%16 = OpFunctionCall %void %kill_
OpBranch %9
%11 = OpLabel
OpReturn
OpFunctionEnd
%kill_ = OpFunction %void None %3
%7 = OpLabel
OpKill
OpFunctionEnd
)";
SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
SinglePassRunAndCheck<InlineExhaustivePass>(test, test, false, true);
}
TEST_F(InlineTest, InlineFuncWithOpKill) {
const std::string before =
R"(OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %main "main"
OpExecutionMode %main OriginUpperLeft
OpSource GLSL 330
OpName %main "main"
OpName %kill_ "kill("
%void = OpTypeVoid
%3 = OpTypeFunction %void
%bool = OpTypeBool
%true = OpConstantTrue %bool
%main = OpFunction %void None %3
%5 = OpLabel
OpBranch %9
%9 = OpLabel
OpLoopMerge %11 %12 None
OpBranch %13
%13 = OpLabel
OpBranchConditional %true %10 %11
%10 = OpLabel
%16 = OpFunctionCall %void %kill_
OpBranch %12
%12 = OpLabel
OpBranch %9
%11 = OpLabel
OpReturn
OpFunctionEnd
%kill_ = OpFunction %void None %3
%7 = OpLabel
OpKill
OpFunctionEnd
)";
const std::string after =
R"(OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %main "main"
OpExecutionMode %main OriginUpperLeft
OpSource GLSL 330
OpName %main "main"
OpName %kill_ "kill("
%void = OpTypeVoid
%3 = OpTypeFunction %void
%bool = OpTypeBool
%true = OpConstantTrue %bool
%main = OpFunction %void None %3
%5 = OpLabel
OpBranch %9
%9 = OpLabel
OpLoopMerge %11 %12 None
OpBranch %13
%13 = OpLabel
OpBranchConditional %true %10 %11
%10 = OpLabel
OpKill
%17 = OpLabel
OpBranch %12
%12 = OpLabel
OpBranch %9
%11 = OpLabel
OpReturn
OpFunctionEnd
%kill_ = OpFunction %void None %3
%7 = OpLabel
OpKill
OpFunctionEnd
)";
SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
SinglePassRunAndCheck<InlineExhaustivePass>(before, after, false, true);
}
// TODO(greg-lunarg): Add tests to verify handling of these cases:
//
// Empty modules

File diff suppressed because it is too large Load Diff

View File

@ -163,7 +163,6 @@ TEST(Optimizer, CanRegisterPassesFromFlags) {
"--eliminate-dead-branches",
"--eliminate-dead-functions",
"--eliminate-local-multi-store",
"--eliminate-common-uniform",
"--eliminate-dead-const",
"--eliminate-dead-inserts",
"--eliminate-dead-variables",

View File

@ -1724,6 +1724,62 @@ OpFunctionEnd
SinglePassRunAndMatch<MergeReturnPass>(predefs + caller + callee, true);
}
TEST_F(MergeReturnPassTest, MergeToMergeBranch) {
const std::string text =
R"(
; CHECK: [[new_undef:%\w+]] = OpUndef %uint
; CHECK: OpLoopMerge
; CHECK: OpLoopMerge [[merge1:%\w+]]
; CHECK: OpLoopMerge [[merge2:%\w+]]
; CHECK: [[merge1]] = OpLabel
; CHECK-NEXT: OpPhi %uint [[new_undef]] [[merge2]]
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %2 "main"
OpExecutionMode %2 LocalSize 100 1 1
OpSource ESSL 310
%void = OpTypeVoid
%4 = OpTypeFunction %void
%uint = OpTypeInt 32 0
%uint_1 = OpConstant %uint 1
%bool = OpTypeBool
%false = OpConstantFalse %bool
%uint_0 = OpConstant %uint 0
%int = OpTypeInt 32 1
%int_0 = OpConstant %int 0
%int_1 = OpConstant %int 1
%13 = OpUndef %bool
%2 = OpFunction %void None %4
%14 = OpLabel
OpBranch %15
%15 = OpLabel
OpLoopMerge %16 %17 None
OpBranch %18
%18 = OpLabel
OpLoopMerge %19 %20 None
OpBranchConditional %13 %21 %19
%21 = OpLabel
OpReturn
%20 = OpLabel
OpBranch %18
%19 = OpLabel
%22 = OpUndef %uint
OpBranch %23
%23 = OpLabel
OpBranch %16
%17 = OpLabel
OpBranch %15
%16 = OpLabel
%24 = OpCopyObject %uint %22
OpReturn
OpFunctionEnd
)";
SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
SinglePassRunAndMatch<MergeReturnPass>(text, true);
}
} // namespace
} // namespace opt
} // namespace spvtools

View File

@ -79,6 +79,15 @@ class ReturnCodeIsZero(SpirvTest):
return True, ''
class ReturnCodeIsNonZero(SpirvTest):
"""Mixin class for checking that the return code is not zero."""
def check_return_code_is_nonzero(self, status):
if not status.returncode:
return False, 'return code is 0'
return True, ''
class NoOutputOnStdout(SpirvTest):
"""Mixin class for checking that there is no output on stdout."""

View File

@ -59,7 +59,7 @@ class TestValidPassFlags(expect.ValidObjectFile1_4,
flags = [
'--ccp', '--cfg-cleanup', '--combine-access-chains', '--compact-ids',
'--convert-local-access-chains', '--copy-propagate-arrays',
'--eliminate-common-uniform', '--eliminate-dead-branches',
'--eliminate-dead-branches',
'--eliminate-dead-code-aggressive', '--eliminate-dead-const',
'--eliminate-dead-functions', '--eliminate-dead-inserts',
'--eliminate-dead-variables', '--eliminate-insert-extract',
@ -82,7 +82,6 @@ class TestValidPassFlags(expect.ValidObjectFile1_4,
'compact-ids',
'convert-local-access-chains',
'copy-propagate-arrays',
'eliminate-common-uniform',
'eliminate-dead-branches',
'eliminate-dead-code-aggressive',
'eliminate-dead-const',
@ -332,3 +331,45 @@ class TestLoopPeelingThresholdArgsInvalidNumber(expect.ErrorMessageSubstr):
spirv_args = ['--loop-peeling-threshold=a10f']
expected_error_substr = 'must have a positive integer argument'
@inside_spirv_testsuite('SpirvOptFlags')
class TestWebGPUToVulkanThenVulkanToWebGPUIsInvalid(expect.ReturnCodeIsNonZero, expect.ErrorMessageSubstr):
"""Tests Vulkan->WebGPU flag cannot be used after WebGPU->Vulkan flag."""
spirv_args = ['--webgpu-to-vulkan', '--vulkan-to-webgpu']
expected_error_substr = 'Cannot use both'
@inside_spirv_testsuite('SpirvOptFlags')
class TestVulkanToWebGPUThenWebGPUToVulkanIsInvalid(expect.ReturnCodeIsNonZero, expect.ErrorMessageSubstr):
"""Tests WebGPU->Vulkan flag cannot be used after Vulkan->WebGPU flag."""
spirv_args = ['--vulkan-to-webgpu', '--webgpu-to-vulkan']
expected_error_substr = 'Cannot use both'
@inside_spirv_testsuite('SpirvOptFlags')
class TestTargetEnvThenVulkanToWebGPUIsInvalid(expect.ReturnCodeIsNonZero, expect.ErrorMessageSubstr):
"""Tests Vulkan->WebGPU flag cannot be used after target env flag."""
spirv_args = ['--target-env=opengl4.0', '--vulkan-to-webgpu']
expected_error_substr = 'defines the target environment'
@inside_spirv_testsuite('SpirvOptFlags')
class TestVulkanToWebGPUThenTargetEnvIsInvalid(expect.ReturnCodeIsNonZero, expect.ErrorMessageSubstr):
"""Tests target env flag cannot be used after Vulkan->WebGPU flag."""
spirv_args = ['--vulkan-to-webgpu', '--target-env=opengl4.0']
expected_error_substr = 'defines the target environment'
@inside_spirv_testsuite('SpirvOptFlags')
class TestTargetEnvThenWebGPUToVulkanIsInvalid(expect.ReturnCodeIsNonZero, expect.ErrorMessageSubstr):
"""Tests WebGPU->Vulkan flag cannot be used after target env flag."""
spirv_args = ['--target-env=opengl4.0', '--webgpu-to-vulkan']
expected_error_substr = 'defines the target environment'
@inside_spirv_testsuite('SpirvOptFlags')
class TestWebGPUToVulkanThenTargetEnvIsInvalid(expect.ReturnCodeIsNonZero, expect.ErrorMessageSubstr):
"""Tests target env flag cannot be used after WebGPU->Vulkan flag."""
spirv_args = ['--webgpu-to-vulkan', '--target-env=opengl4.0']
expected_error_substr = 'defines the target environment'

View File

@ -937,6 +937,26 @@ TEST_F(ValidateIdWithMessage, OpTypeStructMemberTypeBad) {
"a type."));
}
TEST_F(ValidateIdWithMessage, OpTypeStructOpaqueTypeBad) {
std::string spirv = R"(
OpCapability Shader
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %main "main"
%1 = OpTypeSampler
%2 = OpTypeStruct %1
%void = OpTypeVoid
%3 = OpTypeFunction %void
%main = OpFunction %void None %3
%5 = OpLabel
OpReturn
OpFunctionEnd
)";
CompileSuccessfully(spirv.c_str(), SPV_ENV_VULKAN_1_0);
EXPECT_EQ(SPV_ERROR_INVALID_ID, ValidateInstructions(SPV_ENV_VULKAN_1_0));
EXPECT_THAT(getDiagnosticString(),
HasSubstr("OpTypeStruct must not contain an opaque type"));
}
TEST_F(ValidateIdWithMessage, OpTypePointerGood) {
std::string spirv = kGLSL450MemoryModel + R"(
%1 = OpTypeInt 32 0

View File

@ -147,13 +147,6 @@ Options (in lexicographical order):)",
around known issues with some Vulkan drivers for initialize
variables.)");
printf(R"(
--eliminate-common-uniform
Perform load/load elimination for duplicate uniform values.
Converts any constant index access chain uniform loads into
its equivalent load and extract. Some loads will be moved
to facilitate sharing. Performed only on entry point
call tree functions.)");
printf(R"(
--eliminate-dead-branches
Convert conditional branches with constant condition to the
indicated unconditional brranch. Delete all resulting dead
@ -728,16 +721,17 @@ OptStatus ParseFlags(int argc, const char** argv,
max_id_bound);
} else if (0 == strncmp(cur_arg,
"--target-env=", sizeof("--target-env=") - 1)) {
target_env_set = true;
if (vulkan_to_webgpu_set) {
spvtools::Error(opt_diagnostic, nullptr, {},
"Cannot use both --vulkan-to-webgpu and --target-env "
"at the same time");
"--vulkan-to-webgpu defines the target environment, "
"so --target-env cannot be set at the same time");
return {OPT_STOP, 1};
}
if (webgpu_to_vulkan_set) {
spvtools::Error(opt_diagnostic, nullptr, {},
"Cannot use both --webgpu-to-vulkan and --target-env "
"at the same time");
"--webgpu-to-vulkan defines the target environment, "
"so --target-env cannot be set at the same time");
return {OPT_STOP, 1};
}
const auto split_flag = spvtools::utils::SplitFlagArgs(cur_arg);
@ -750,32 +744,36 @@ OptStatus ParseFlags(int argc, const char** argv,
}
optimizer->SetTargetEnv(target_env);
} else if (0 == strcmp(cur_arg, "--vulkan-to-webgpu")) {
vulkan_to_webgpu_set = true;
if (target_env_set) {
spvtools::Error(opt_diagnostic, nullptr, {},
"Cannot use both --vulkan-to-webgpu and --target-env "
"at the same time");
"--vulkan-to-webgpu defines the target environment, "
"so --target-env cannot be set at the same time");
return {OPT_STOP, 1};
}
if (webgpu_to_vulkan_set) {
spvtools::Error(opt_diagnostic, nullptr, {},
"Cannot use both --vulkan-to-webgpu and "
"--webgpu-to-vulkan at the same time");
"Cannot use both --webgpu-to-vulkan and "
"--vulkan-to-webgpu at the same time, invoke twice "
"if you are wanting to go to and from");
return {OPT_STOP, 1};
}
optimizer->SetTargetEnv(SPV_ENV_WEBGPU_0);
optimizer->RegisterVulkanToWebGPUPasses();
} else if (0 == strcmp(cur_arg, "--webgpu-to-vulkan")) {
webgpu_to_vulkan_set = true;
if (target_env_set) {
spvtools::Error(opt_diagnostic, nullptr, {},
"Cannot use both --webgpu-to-vulkan and --target-env "
"at the same time");
"--webgpu-to-vulkan defines the target environment, "
"so --target-env cannot be set at the same time");
return {OPT_STOP, 1};
}
if (vulkan_to_webgpu_set) {
spvtools::Error(opt_diagnostic, nullptr, {},
"Cannot use both --webgpu-to-vulkan and "
"--vulkan-to-webgpu at the same time");
"--vulkan-to-webgpu at the same time, invoke twice "
"if you are wanting to go to and from");
return {OPT_STOP, 1};
}