Mercurial > hg > CbC > CbC_llvm
view tools/llvm-exegesis/lib/Latency.cpp @ 148:63bd29f05246
merged
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 14 Aug 2019 19:46:37 +0900 |
parents | c2174574ed3a |
children |
line wrap: on
line source
//===-- Latency.cpp ---------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Latency.h" #include "Assembler.h" #include "BenchmarkRunner.h" #include "MCInstrDescView.h" #include "PerfHelper.h" #include "Target.h" #include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/Support/FormatVariadic.h" namespace llvm { namespace exegesis { struct ExecutionClass { ExecutionMode Mask; const char *Description; } static const kExecutionClasses[] = { {ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS | ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS, "Repeating a single implicitly serial instruction"}, {ExecutionMode::SERIAL_VIA_EXPLICIT_REGS, "Repeating a single explicitly serial instruction"}, {ExecutionMode::SERIAL_VIA_MEMORY_INSTR | ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR, "Repeating two instructions"}, }; static constexpr size_t kMaxAliasingInstructions = 10; static std::vector<Instruction> computeAliasingInstructions(const LLVMState &State, const Instruction &Instr, size_t MaxAliasingInstructions) { // Randomly iterate the set of instructions. std::vector<unsigned> Opcodes; Opcodes.resize(State.getInstrInfo().getNumOpcodes()); std::iota(Opcodes.begin(), Opcodes.end(), 0U); std::shuffle(Opcodes.begin(), Opcodes.end(), randomGenerator()); std::vector<Instruction> AliasingInstructions; for (const unsigned OtherOpcode : Opcodes) { if (OtherOpcode == Instr.Description->getOpcode()) continue; const Instruction &OtherInstr = State.getIC().getInstr(OtherOpcode); if (OtherInstr.hasMemoryOperands()) continue; if (Instr.hasAliasingRegistersThrough(OtherInstr)) AliasingInstructions.push_back(std::move(OtherInstr)); if (AliasingInstructions.size() >= MaxAliasingInstructions) break; } return AliasingInstructions; } static ExecutionMode getExecutionModes(const Instruction &Instr) { ExecutionMode EM = ExecutionMode::UNKNOWN; if (Instr.hasAliasingImplicitRegisters()) EM |= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS; if (Instr.hasTiedRegisters()) EM |= ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS; if (Instr.hasMemoryOperands()) EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR; else { if (Instr.hasAliasingRegisters()) EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS; if (Instr.hasOneUseOrOneDef()) EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR; } return EM; } static void appendCodeTemplates(const LLVMState &State, const Instruction &Instr, ExecutionMode ExecutionModeBit, llvm::StringRef ExecutionClassDescription, std::vector<CodeTemplate> &CodeTemplates) { assert(isEnumValue(ExecutionModeBit) && "Bit must be a power of two"); switch (ExecutionModeBit) { case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS: // Nothing to do, the instruction is always serial. LLVM_FALLTHROUGH; case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS: { // Picking whatever value for the tied variable will make the instruction // serial. CodeTemplate CT; CT.Execution = ExecutionModeBit; CT.Info = ExecutionClassDescription; CT.Instructions.push_back(Instr); CodeTemplates.push_back(std::move(CT)); return; } case ExecutionMode::SERIAL_VIA_MEMORY_INSTR: { // Select back-to-back memory instruction. // TODO: Implement me. return; } case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: { // Making the execution of this instruction serial by selecting one def // register to alias with one use register. const AliasingConfigurations SelfAliasing(Instr, Instr); assert(!SelfAliasing.empty() && !SelfAliasing.hasImplicitAliasing() && "Instr must alias itself explicitly"); InstructionTemplate IT(Instr); // This is a self aliasing instruction so defs and uses are from the same // instance, hence twice IT in the following call. setRandomAliasing(SelfAliasing, IT, IT); CodeTemplate CT; CT.Execution = ExecutionModeBit; CT.Info = ExecutionClassDescription; CT.Instructions.push_back(std::move(IT)); CodeTemplates.push_back(std::move(CT)); return; } case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: { // Select back-to-back non-memory instruction. for (const auto OtherInstr : computeAliasingInstructions(State, Instr, kMaxAliasingInstructions)) { const AliasingConfigurations Forward(Instr, OtherInstr); const AliasingConfigurations Back(OtherInstr, Instr); InstructionTemplate ThisIT(Instr); InstructionTemplate OtherIT(OtherInstr); if (!Forward.hasImplicitAliasing()) setRandomAliasing(Forward, ThisIT, OtherIT); if (!Back.hasImplicitAliasing()) setRandomAliasing(Back, OtherIT, ThisIT); CodeTemplate CT; CT.Execution = ExecutionModeBit; CT.Info = ExecutionClassDescription; CT.Instructions.push_back(std::move(ThisIT)); CT.Instructions.push_back(std::move(OtherIT)); CodeTemplates.push_back(std::move(CT)); } return; } default: llvm_unreachable("Unhandled enum value"); } } LatencySnippetGenerator::~LatencySnippetGenerator() = default; llvm::Expected<std::vector<CodeTemplate>> LatencySnippetGenerator::generateCodeTemplates(const Instruction &Instr) const { std::vector<CodeTemplate> Results; const ExecutionMode EM = getExecutionModes(Instr); for (const auto EC : kExecutionClasses) { for (const auto ExecutionModeBit : getExecutionModeBits(EM & EC.Mask)) appendCodeTemplates(State, Instr, ExecutionModeBit, EC.Description, Results); if (!Results.empty()) break; } if (Results.empty()) return llvm::make_error<BenchmarkFailure>( "No strategy found to make the execution serial"); return std::move(Results); } LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState &State, InstructionBenchmark::ModeE Mode) : BenchmarkRunner(State, Mode) { assert((Mode == InstructionBenchmark::Latency || Mode == InstructionBenchmark::InverseThroughput) && "invalid mode"); } LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default; llvm::Expected<std::vector<BenchmarkMeasure>> LatencyBenchmarkRunner::runMeasurements( const FunctionExecutor &Executor) const { // Cycle measurements include some overhead from the kernel. Repeat the // measure several times and take the minimum value. constexpr const int NumMeasurements = 30; int64_t MinValue = std::numeric_limits<int64_t>::max(); const char *CounterName = State.getPfmCounters().CycleCounter; if (!CounterName) llvm::report_fatal_error("sched model does not define a cycle counter"); for (size_t I = 0; I < NumMeasurements; ++I) { auto ExpectedCounterValue = Executor.runAndMeasure(CounterName); if (!ExpectedCounterValue) return ExpectedCounterValue.takeError(); if (*ExpectedCounterValue < MinValue) MinValue = *ExpectedCounterValue; } std::vector<BenchmarkMeasure> Result; switch (Mode) { case InstructionBenchmark::Latency: Result = {BenchmarkMeasure::Create("latency", MinValue)}; break; case InstructionBenchmark::InverseThroughput: Result = {BenchmarkMeasure::Create("inverse_throughput", MinValue)}; break; default: break; } return std::move(Result); } } // namespace exegesis } // namespace llvm