236 lines
7.7 KiB
C++
236 lines
7.7 KiB
C++
// Copyright (c) 2017 Google Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "tools/stats/stats_analyzer.h"
|
|
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <cstring>
|
|
#include <iostream>
|
|
#include <map>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include <unordered_set>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "source/comp/markv_model.h"
|
|
#include "source/enum_string_mapping.h"
|
|
#include "source/latest_version_spirv_header.h"
|
|
#include "source/opcode.h"
|
|
#include "source/operand.h"
|
|
#include "source/spirv_constant.h"
|
|
|
|
namespace spvtools {
|
|
namespace stats {
|
|
namespace {
|
|
|
|
// Signals that the value is not in the coding scheme and a fallback method
|
|
// needs to be used.
|
|
const uint64_t kMarkvNoneOfTheAbove =
|
|
comp::MarkvModel::GetMarkvNoneOfTheAbove();
|
|
|
|
std::string GetVersionString(uint32_t word) {
|
|
std::stringstream ss;
|
|
ss << "Version " << SPV_SPIRV_VERSION_MAJOR_PART(word) << "."
|
|
<< SPV_SPIRV_VERSION_MINOR_PART(word);
|
|
return ss.str();
|
|
}
|
|
|
|
std::string GetGeneratorString(uint32_t word) {
|
|
return spvGeneratorStr(SPV_GENERATOR_TOOL_PART(word));
|
|
}
|
|
|
|
std::string GetOpcodeString(uint32_t word) {
|
|
return spvOpcodeString(static_cast<SpvOp>(word));
|
|
}
|
|
|
|
std::string GetCapabilityString(uint32_t word) {
|
|
return CapabilityToString(static_cast<SpvCapability>(word));
|
|
}
|
|
|
|
template <class T>
|
|
std::string KeyIsLabel(T key) {
|
|
std::stringstream ss;
|
|
ss << key;
|
|
return ss.str();
|
|
}
|
|
|
|
template <class Key>
|
|
std::unordered_map<Key, double> GetRecall(
|
|
const std::unordered_map<Key, uint32_t>& hist, uint64_t total) {
|
|
std::unordered_map<Key, double> freq;
|
|
for (const auto& pair : hist) {
|
|
const double frequency =
|
|
static_cast<double>(pair.second) / static_cast<double>(total);
|
|
freq.emplace(pair.first, frequency);
|
|
}
|
|
return freq;
|
|
}
|
|
|
|
template <class Key>
|
|
std::unordered_map<Key, double> GetPrevalence(
|
|
const std::unordered_map<Key, uint32_t>& hist) {
|
|
uint64_t total = 0;
|
|
for (const auto& pair : hist) {
|
|
total += pair.second;
|
|
}
|
|
|
|
return GetRecall(hist, total);
|
|
}
|
|
|
|
// Writes |freq| to |out| sorted by frequency in the following format:
|
|
// LABEL3 70%
|
|
// LABEL1 20%
|
|
// LABEL2 10%
|
|
// |label_from_key| is used to convert |Key| to label.
|
|
template <class Key>
|
|
void WriteFreq(std::ostream& out, const std::unordered_map<Key, double>& freq,
|
|
std::string (*label_from_key)(Key)) {
|
|
std::vector<std::pair<Key, double>> sorted_freq(freq.begin(), freq.end());
|
|
std::sort(sorted_freq.begin(), sorted_freq.end(),
|
|
[](const std::pair<Key, double>& left,
|
|
const std::pair<Key, double>& right) {
|
|
return left.second > right.second;
|
|
});
|
|
|
|
for (const auto& pair : sorted_freq) {
|
|
if (pair.second < 0.001) break;
|
|
out << label_from_key(pair.first) << " " << pair.second * 100.0 << "%"
|
|
<< std::endl;
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
|
|
StatsAnalyzer::StatsAnalyzer(const SpirvStats& stats) : stats_(stats) {
|
|
num_modules_ = 0;
|
|
for (const auto& pair : stats_.version_hist) {
|
|
num_modules_ += pair.second;
|
|
}
|
|
|
|
version_freq_ = GetRecall(stats_.version_hist, num_modules_);
|
|
generator_freq_ = GetRecall(stats_.generator_hist, num_modules_);
|
|
capability_freq_ = GetRecall(stats_.capability_hist, num_modules_);
|
|
extension_freq_ = GetRecall(stats_.extension_hist, num_modules_);
|
|
opcode_freq_ = GetPrevalence(stats_.opcode_hist);
|
|
}
|
|
|
|
void StatsAnalyzer::WriteVersion(std::ostream& out) {
|
|
WriteFreq(out, version_freq_, GetVersionString);
|
|
}
|
|
|
|
void StatsAnalyzer::WriteGenerator(std::ostream& out) {
|
|
WriteFreq(out, generator_freq_, GetGeneratorString);
|
|
}
|
|
|
|
void StatsAnalyzer::WriteCapability(std::ostream& out) {
|
|
WriteFreq(out, capability_freq_, GetCapabilityString);
|
|
}
|
|
|
|
void StatsAnalyzer::WriteExtension(std::ostream& out) {
|
|
WriteFreq(out, extension_freq_, KeyIsLabel);
|
|
}
|
|
|
|
void StatsAnalyzer::WriteOpcode(std::ostream& out) {
|
|
out << "Total unique opcodes used: " << opcode_freq_.size() << std::endl;
|
|
WriteFreq(out, opcode_freq_, GetOpcodeString);
|
|
}
|
|
|
|
void StatsAnalyzer::WriteConstantLiterals(std::ostream& out) {
|
|
out << "Constant literals" << std::endl;
|
|
|
|
out << "Float 32" << std::endl;
|
|
WriteFreq(out, GetPrevalence(stats_.f32_constant_hist), KeyIsLabel);
|
|
|
|
out << std::endl << "Float 64" << std::endl;
|
|
WriteFreq(out, GetPrevalence(stats_.f64_constant_hist), KeyIsLabel);
|
|
|
|
out << std::endl << "Unsigned int 16" << std::endl;
|
|
WriteFreq(out, GetPrevalence(stats_.u16_constant_hist), KeyIsLabel);
|
|
|
|
out << std::endl << "Signed int 16" << std::endl;
|
|
WriteFreq(out, GetPrevalence(stats_.s16_constant_hist), KeyIsLabel);
|
|
|
|
out << std::endl << "Unsigned int 32" << std::endl;
|
|
WriteFreq(out, GetPrevalence(stats_.u32_constant_hist), KeyIsLabel);
|
|
|
|
out << std::endl << "Signed int 32" << std::endl;
|
|
WriteFreq(out, GetPrevalence(stats_.s32_constant_hist), KeyIsLabel);
|
|
|
|
out << std::endl << "Unsigned int 64" << std::endl;
|
|
WriteFreq(out, GetPrevalence(stats_.u64_constant_hist), KeyIsLabel);
|
|
|
|
out << std::endl << "Signed int 64" << std::endl;
|
|
WriteFreq(out, GetPrevalence(stats_.s64_constant_hist), KeyIsLabel);
|
|
}
|
|
|
|
void StatsAnalyzer::WriteOpcodeMarkov(std::ostream& out) {
|
|
if (stats_.opcode_markov_hist.empty()) return;
|
|
|
|
const std::unordered_map<uint32_t, std::unordered_map<uint32_t, uint32_t>>&
|
|
cue_to_hist = stats_.opcode_markov_hist[0];
|
|
|
|
// Sort by prevalence of the opcodes in opcode_freq_ (descending).
|
|
std::vector<std::pair<uint32_t, std::unordered_map<uint32_t, uint32_t>>>
|
|
sorted_cue_to_hist(cue_to_hist.begin(), cue_to_hist.end());
|
|
std::sort(
|
|
sorted_cue_to_hist.begin(), sorted_cue_to_hist.end(),
|
|
[this](const std::pair<uint32_t, std::unordered_map<uint32_t, uint32_t>>&
|
|
left,
|
|
const std::pair<uint32_t, std::unordered_map<uint32_t, uint32_t>>&
|
|
right) {
|
|
const double lf = opcode_freq_[left.first];
|
|
const double rf = opcode_freq_[right.first];
|
|
if (lf == rf) return right.first > left.first;
|
|
return lf > rf;
|
|
});
|
|
|
|
for (const auto& kv : sorted_cue_to_hist) {
|
|
const uint32_t cue = kv.first;
|
|
const double kFrequentEnoughToAnalyze = 0.0001;
|
|
if (opcode_freq_[cue] < kFrequentEnoughToAnalyze) continue;
|
|
|
|
const std::unordered_map<uint32_t, uint32_t>& hist = kv.second;
|
|
|
|
uint32_t total = 0;
|
|
for (const auto& pair : hist) {
|
|
total += pair.second;
|
|
}
|
|
|
|
std::vector<std::pair<uint32_t, uint32_t>> sorted_hist(hist.begin(),
|
|
hist.end());
|
|
std::sort(sorted_hist.begin(), sorted_hist.end(),
|
|
[](const std::pair<uint32_t, uint32_t>& left,
|
|
const std::pair<uint32_t, uint32_t>& right) {
|
|
if (left.second == right.second)
|
|
return right.first > left.first;
|
|
return left.second > right.second;
|
|
});
|
|
|
|
for (const auto& pair : sorted_hist) {
|
|
const double prior = opcode_freq_[pair.first];
|
|
const double posterior =
|
|
static_cast<double>(pair.second) / static_cast<double>(total);
|
|
out << GetOpcodeString(cue) << " -> " << GetOpcodeString(pair.first)
|
|
<< " " << posterior * 100 << "% (base rate " << prior * 100
|
|
<< "%, pair occurrences " << pair.second << ")" << std::endl;
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace stats
|
|
} // namespace spvtools
|