9916 lines
397 KiB
Diff
9916 lines
397 KiB
Diff
From a9863e2b6e6783aa9be0b9d1d187084fd4b32a3a Mon Sep 17 00:00:00 2001
|
|
From: Muhammad Asif Manzoor <muhammad.asif.manzoor1@huawei.com>
|
|
Date: Thu, 21 Mar 2024 12:50:38 -0400
|
|
Subject: Add BiSheng Autotuner support for LLVM compiler
|
|
|
|
Automatic tuning is an automatic iterative process that optimizes a given
|
|
program by manipulating compilation options for optimal performance.
|
|
BiSheng Autotuner provides a resumable interface for tuning process. BiSheng
|
|
Autotuner can tune 1) individual code segments/blocks (fine grain turning) like
|
|
loops, callsites, instructions, etc. and 2) entire modules/programs (coarse
|
|
grain tuning) for compiler flags, pass ordering, etc.
|
|
This patch enables LLVM compiler to extract tuneable code regions and then apply
|
|
suggested configuration (by Autotuner) to find out the optimal configurations.
|
|
---
|
|
llvm/cmake/modules/CrossCompile.cmake | 1 +
|
|
llvm/cmake/modules/HandleLLVMOptions.cmake | 8 +
|
|
llvm/include/llvm/Analysis/AutotuningDump.h | 75 ++
|
|
llvm/include/llvm/Analysis/LoopInfo.h | 13 +
|
|
llvm/include/llvm/Analysis/Passes.h | 10 +
|
|
llvm/include/llvm/AutoTuner/AutoTuning.h | 486 ++++++++++++
|
|
.../llvm/AutoTuner/AutoTuningRemarkManager.h | 43 ++
|
|
.../llvm/AutoTuner/AutoTuningRemarkStreamer.h | 47 ++
|
|
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 13 +
|
|
llvm/include/llvm/IR/Function.h | 37 +
|
|
llvm/include/llvm/IR/InstrTypes.h | 24 +
|
|
llvm/include/llvm/IR/Instructions.h | 24 +
|
|
llvm/include/llvm/IR/Module.h | 3 +
|
|
llvm/include/llvm/IR/StructuralHash.h | 14 +
|
|
llvm/include/llvm/InitializePasses.h | 5 +
|
|
llvm/include/llvm/LinkAllPasses.h | 8 +
|
|
llvm/include/llvm/Remarks/Remark.h | 32 +
|
|
llvm/include/llvm/Support/CommandLine.h | 17 +
|
|
llvm/include/llvm/Transforms/Scalar.h | 17 +
|
|
.../Transforms/Scalar/AutoTuningCompile.h | 170 +++++
|
|
.../llvm/Transforms/Utils/UnrollLoop.h | 4 +
|
|
llvm/lib/Analysis/AutotuningDump.cpp | 265 +++++++
|
|
llvm/lib/Analysis/CMakeLists.txt | 2 +
|
|
llvm/lib/Analysis/InlineAdvisor.cpp | 18 +
|
|
llvm/lib/Analysis/InlineCost.cpp | 29 +
|
|
llvm/lib/Analysis/LoopInfo.cpp | 52 ++
|
|
llvm/lib/AutoTuner/AutoTuning.cpp | 705 ++++++++++++++++++
|
|
.../lib/AutoTuner/AutoTuningRemarkManager.cpp | 299 ++++++++
|
|
.../AutoTuner/AutoTuningRemarkStreamer.cpp | 55 ++
|
|
llvm/lib/AutoTuner/CMakeLists.txt | 11 +
|
|
llvm/lib/CMakeLists.txt | 1 +
|
|
llvm/lib/CodeGen/CMakeLists.txt | 1 +
|
|
llvm/lib/CodeGen/CalcSpillWeights.cpp | 30 +
|
|
llvm/lib/CodeGen/MachineBasicBlock.cpp | 36 +
|
|
llvm/lib/CodeGen/MachineScheduler.cpp | 44 ++
|
|
llvm/lib/CodeGen/SwitchLoweringUtils.cpp | 19 +
|
|
llvm/lib/IR/AsmWriter.cpp | 151 ++++
|
|
llvm/lib/IR/CMakeLists.txt | 1 +
|
|
llvm/lib/IR/Function.cpp | 34 +
|
|
llvm/lib/IR/Instructions.cpp | 86 +++
|
|
llvm/lib/IR/StructuralHash.cpp | 114 +++
|
|
llvm/lib/Passes/PassBuilder.cpp | 5 +
|
|
llvm/lib/Passes/PassBuilderPipelines.cpp | 46 ++
|
|
llvm/lib/Passes/PassRegistry.def | 13 +
|
|
llvm/lib/Passes/StandardInstrumentations.cpp | 23 +
|
|
.../lib/Remarks/BitstreamRemarkSerializer.cpp | 8 +
|
|
llvm/lib/Remarks/RemarkStreamer.cpp | 4 +
|
|
llvm/lib/Remarks/YAMLRemarkParser.cpp | 122 +++
|
|
llvm/lib/Remarks/YAMLRemarkParser.h | 6 +
|
|
llvm/lib/Remarks/YAMLRemarkSerializer.cpp | 84 +++
|
|
llvm/lib/Support/CommandLine.cpp | 41 +
|
|
llvm/lib/Transforms/IPO/CMakeLists.txt | 1 +
|
|
llvm/lib/Transforms/IPO/Inliner.cpp | 36 +
|
|
llvm/lib/Transforms/IPO/SampleProfile.cpp | 14 +
|
|
.../Transforms/Instrumentation/CMakeLists.txt | 1 +
|
|
.../Instrumentation/PGOInstrumentation.cpp | 8 +
|
|
.../Transforms/Scalar/AutoTuningCompile.cpp | 334 +++++++++
|
|
llvm/lib/Transforms/Scalar/CMakeLists.txt | 2 +
|
|
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 187 +++++
|
|
llvm/lib/Transforms/Scalar/Scalar.cpp | 4 +
|
|
llvm/lib/Transforms/Scalar/Sink.cpp | 5 +
|
|
llvm/lib/Transforms/Utils/CMakeLists.txt | 1 +
|
|
llvm/lib/Transforms/Utils/LCSSA.cpp | 5 +
|
|
llvm/lib/Transforms/Utils/LoopSimplify.cpp | 8 +
|
|
llvm/lib/Transforms/Utils/LoopUnroll.cpp | 3 +
|
|
llvm/lib/Transforms/Vectorize/CMakeLists.txt | 1 +
|
|
.../Vectorize/LoopVectorizationLegality.cpp | 12 +
|
|
.../Transforms/Vectorize/LoopVectorize.cpp | 34 +
|
|
.../Inputs/unroll_template.yaml | 8 +
|
|
.../AutotuningDump/create-data-dir.ll | 65 ++
|
|
llvm/test/AutoTuning/AutotuningDump/unroll.ll | 35 +
|
|
.../autotune_datadir/baseline_config.yaml | 9 +
|
|
.../autotune_datadir/random_config.yaml | 9 +
|
|
.../AutoTuning/BaselineConfig/Inputs/test.ll | 117 +++
|
|
.../BaselineConfig/apply_baseline_config.ll | 11 +
|
|
llvm/test/AutoTuning/BaselineConfig/opp.ll | 67 ++
|
|
.../CodeRegionFilter/function-filtering.ll | 62 ++
|
|
.../Error/Inputs/invalid-format.yaml | 3 +
|
|
.../AutoTuning/Error/Inputs/template.yaml | 10 +
|
|
.../AutoTuning/Error/file-not-found-error.ll | 29 +
|
|
.../AutoTuning/Error/invalid-yaml-error.ll | 27 +
|
|
.../AutoTuning/Error/malformed-input-error.ll | 136 ++++
|
|
llvm/test/AutoTuning/Error/output-error.ll | 28 +
|
|
llvm/test/AutoTuning/Error/valid-input.ll | 27 +
|
|
.../Inputs/template.yaml | 9 +
|
|
.../inc-compile-parse-input.ll | 103 +++
|
|
.../AutoTuning/Inline/Inputs/template.yaml | 9 +
|
|
.../Inline/Inputs/template_no_metadata.yaml | 7 +
|
|
.../test/AutoTuning/Inline/duplicate-calls.ll | 96 +++
|
|
llvm/test/AutoTuning/Inline/force-inline.ll | 84 +++
|
|
.../AutoTuning/Inline/inline-attribute.ll | 85 +++
|
|
llvm/test/AutoTuning/Inline/opp.ll | 64 ++
|
|
.../LoopUnroll/Inputs/debug_loc_template.yaml | 10 +
|
|
.../LoopUnroll/Inputs/loop_nest.yaml | 10 +
|
|
.../LoopUnroll/Inputs/loop_peel.yaml | 9 +
|
|
.../Inputs/unroll_raw_template.yaml | 10 +
|
|
.../LoopUnroll/Inputs/unroll_template.yaml | 10 +
|
|
.../Inputs/unroll_template_no_metadata.yaml | 8 +
|
|
llvm/test/AutoTuning/LoopUnroll/debug_loc.ll | 161 ++++
|
|
.../AutoTuning/LoopUnroll/dynamic_config.ll | 56 ++
|
|
llvm/test/AutoTuning/LoopUnroll/loop_nest.ll | 136 ++++
|
|
llvm/test/AutoTuning/LoopUnroll/loop_peel.ll | 53 ++
|
|
.../AutoTuning/LoopUnroll/unroll-pragma.ll | 129 ++++
|
|
llvm/test/AutoTuning/LoopUnroll/unroll.ll | 101 +++
|
|
llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll | 113 +++
|
|
.../Inputs/vectorize_template.yaml | 9 +
|
|
.../vectorize_template_no_metadata.yaml | 7 +
|
|
.../LoopVectorize/force-vector-interleave.ll | 88 +++
|
|
.../Inputs/misched_x86_template.yaml | 10 +
|
|
.../misched_x86_bidirectional.ll | 73 ++
|
|
.../MachineScheduler/misched_x86_bottomup.ll | 72 ++
|
|
.../MachineScheduler/misched_x86_topdown.ll | 72 ++
|
|
.../AutoTuning/MetaData/structural_hash.ll | 234 ++++++
|
|
.../AutoTuning/MetaData/write_no_metadata.ll | 191 +++++
|
|
.../MetaData/write_with_metadata.ll | 204 +++++
|
|
.../AutoTuning/PGO/Inputs/pgo-instr.proftext | 17 +
|
|
.../PGO/Inputs/pgo-sample-cold.prof | 7 +
|
|
.../AutoTuning/PGO/Inputs/pgo-sample-hot.prof | 7 +
|
|
llvm/test/AutoTuning/PGO/pgo-instr-filters.ll | 61 ++
|
|
.../test/AutoTuning/PGO/pgo-sample-filters.ll | 138 ++++
|
|
.../Inputs/pass_invocation.yaml | 10 +
|
|
.../PassInvocation/pass_invocation_read.ll | 64 ++
|
|
.../PassInvocation/pass_invocation_write.ll | 67 ++
|
|
.../PhaseOrdering/Inputs/template.yaml | 8 +
|
|
.../AutoTuning/PhaseOrdering/pass-order.ll | 65 ++
|
|
.../AutoTuning/SwitchLowering/switch-opp.ll | 47 ++
|
|
llvm/test/AutoTuning/lit.local.cfg | 2 +
|
|
llvm/test/AutoTuning/opt-opp.ll | 315 ++++++++
|
|
llvm/test/lit.site.cfg.py.in | 1 +
|
|
llvm/tools/llc/llc.cpp | 19 +
|
|
llvm/tools/opt/NewPMDriver.cpp | 42 ++
|
|
llvm/tools/opt/opt.cpp | 53 ++
|
|
132 files changed, 7801 insertions(+)
|
|
create mode 100644 llvm/include/llvm/Analysis/AutotuningDump.h
|
|
create mode 100644 llvm/include/llvm/AutoTuner/AutoTuning.h
|
|
create mode 100644 llvm/include/llvm/AutoTuner/AutoTuningRemarkManager.h
|
|
create mode 100644 llvm/include/llvm/AutoTuner/AutoTuningRemarkStreamer.h
|
|
create mode 100644 llvm/include/llvm/Transforms/Scalar/AutoTuningCompile.h
|
|
create mode 100644 llvm/lib/Analysis/AutotuningDump.cpp
|
|
create mode 100644 llvm/lib/AutoTuner/AutoTuning.cpp
|
|
create mode 100644 llvm/lib/AutoTuner/AutoTuningRemarkManager.cpp
|
|
create mode 100644 llvm/lib/AutoTuner/AutoTuningRemarkStreamer.cpp
|
|
create mode 100644 llvm/lib/AutoTuner/CMakeLists.txt
|
|
create mode 100644 llvm/lib/Transforms/Scalar/AutoTuningCompile.cpp
|
|
create mode 100644 llvm/test/AutoTuning/AutotuningDump/Inputs/unroll_template.yaml
|
|
create mode 100644 llvm/test/AutoTuning/AutotuningDump/create-data-dir.ll
|
|
create mode 100644 llvm/test/AutoTuning/AutotuningDump/unroll.ll
|
|
create mode 100644 llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/baseline_config.yaml
|
|
create mode 100644 llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/random_config.yaml
|
|
create mode 100644 llvm/test/AutoTuning/BaselineConfig/Inputs/test.ll
|
|
create mode 100644 llvm/test/AutoTuning/BaselineConfig/apply_baseline_config.ll
|
|
create mode 100644 llvm/test/AutoTuning/BaselineConfig/opp.ll
|
|
create mode 100644 llvm/test/AutoTuning/CodeRegionFilter/function-filtering.ll
|
|
create mode 100644 llvm/test/AutoTuning/Error/Inputs/invalid-format.yaml
|
|
create mode 100644 llvm/test/AutoTuning/Error/Inputs/template.yaml
|
|
create mode 100644 llvm/test/AutoTuning/Error/file-not-found-error.ll
|
|
create mode 100644 llvm/test/AutoTuning/Error/invalid-yaml-error.ll
|
|
create mode 100644 llvm/test/AutoTuning/Error/malformed-input-error.ll
|
|
create mode 100644 llvm/test/AutoTuning/Error/output-error.ll
|
|
create mode 100644 llvm/test/AutoTuning/Error/valid-input.ll
|
|
create mode 100644 llvm/test/AutoTuning/IncrementalCompilation/Inputs/template.yaml
|
|
create mode 100644 llvm/test/AutoTuning/IncrementalCompilation/inc-compile-parse-input.ll
|
|
create mode 100644 llvm/test/AutoTuning/Inline/Inputs/template.yaml
|
|
create mode 100644 llvm/test/AutoTuning/Inline/Inputs/template_no_metadata.yaml
|
|
create mode 100644 llvm/test/AutoTuning/Inline/duplicate-calls.ll
|
|
create mode 100644 llvm/test/AutoTuning/Inline/force-inline.ll
|
|
create mode 100644 llvm/test/AutoTuning/Inline/inline-attribute.ll
|
|
create mode 100644 llvm/test/AutoTuning/Inline/opp.ll
|
|
create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/debug_loc_template.yaml
|
|
create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/loop_nest.yaml
|
|
create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/loop_peel.yaml
|
|
create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_raw_template.yaml
|
|
create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template.yaml
|
|
create mode 100644 llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template_no_metadata.yaml
|
|
create mode 100644 llvm/test/AutoTuning/LoopUnroll/debug_loc.ll
|
|
create mode 100644 llvm/test/AutoTuning/LoopUnroll/dynamic_config.ll
|
|
create mode 100644 llvm/test/AutoTuning/LoopUnroll/loop_nest.ll
|
|
create mode 100644 llvm/test/AutoTuning/LoopUnroll/loop_peel.ll
|
|
create mode 100644 llvm/test/AutoTuning/LoopUnroll/unroll-pragma.ll
|
|
create mode 100644 llvm/test/AutoTuning/LoopUnroll/unroll.ll
|
|
create mode 100644 llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll
|
|
create mode 100644 llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template.yaml
|
|
create mode 100644 llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template_no_metadata.yaml
|
|
create mode 100644 llvm/test/AutoTuning/LoopVectorize/force-vector-interleave.ll
|
|
create mode 100644 llvm/test/AutoTuning/MachineScheduler/Inputs/misched_x86_template.yaml
|
|
create mode 100644 llvm/test/AutoTuning/MachineScheduler/misched_x86_bidirectional.ll
|
|
create mode 100644 llvm/test/AutoTuning/MachineScheduler/misched_x86_bottomup.ll
|
|
create mode 100644 llvm/test/AutoTuning/MachineScheduler/misched_x86_topdown.ll
|
|
create mode 100644 llvm/test/AutoTuning/MetaData/structural_hash.ll
|
|
create mode 100644 llvm/test/AutoTuning/MetaData/write_no_metadata.ll
|
|
create mode 100644 llvm/test/AutoTuning/MetaData/write_with_metadata.ll
|
|
create mode 100644 llvm/test/AutoTuning/PGO/Inputs/pgo-instr.proftext
|
|
create mode 100644 llvm/test/AutoTuning/PGO/Inputs/pgo-sample-cold.prof
|
|
create mode 100644 llvm/test/AutoTuning/PGO/Inputs/pgo-sample-hot.prof
|
|
create mode 100644 llvm/test/AutoTuning/PGO/pgo-instr-filters.ll
|
|
create mode 100644 llvm/test/AutoTuning/PGO/pgo-sample-filters.ll
|
|
create mode 100644 llvm/test/AutoTuning/PassInvocation/Inputs/pass_invocation.yaml
|
|
create mode 100644 llvm/test/AutoTuning/PassInvocation/pass_invocation_read.ll
|
|
create mode 100644 llvm/test/AutoTuning/PassInvocation/pass_invocation_write.ll
|
|
create mode 100644 llvm/test/AutoTuning/PhaseOrdering/Inputs/template.yaml
|
|
create mode 100644 llvm/test/AutoTuning/PhaseOrdering/pass-order.ll
|
|
create mode 100644 llvm/test/AutoTuning/SwitchLowering/switch-opp.ll
|
|
create mode 100644 llvm/test/AutoTuning/lit.local.cfg
|
|
create mode 100644 llvm/test/AutoTuning/opt-opp.ll
|
|
|
|
diff --git a/llvm/cmake/modules/CrossCompile.cmake b/llvm/cmake/modules/CrossCompile.cmake
|
|
index 6af47b51d4c6..1a9fb4b2dddc 100644
|
|
--- a/llvm/cmake/modules/CrossCompile.cmake
|
|
+++ b/llvm/cmake/modules/CrossCompile.cmake
|
|
@@ -82,6 +82,7 @@ function(llvm_create_cross_target project_name target_name toolchain buildtype)
|
|
-DLLVM_ENABLE_PROJECTS="${llvm_enable_projects_arg}"
|
|
-DLLVM_EXTERNAL_PROJECTS="${llvm_external_projects_arg}"
|
|
-DLLVM_ENABLE_RUNTIMES="${llvm_enable_runtimes_arg}"
|
|
+ -DLLVM_ENABLE_AUTOTUNER="${LLVM_ENABLE_AUTOTUNER}"
|
|
${external_project_source_dirs}
|
|
-DLLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN="${LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN}"
|
|
-DLLVM_INCLUDE_BENCHMARKS=OFF
|
|
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
|
|
index 62a1a64d37d4..b8e9dbe29d88 100644
|
|
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
|
|
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
|
|
@@ -112,6 +112,14 @@ else()
|
|
set(BUILD_FOR_OPENEULER 0)
|
|
endif()
|
|
|
|
+option(LLVM_ENABLE_AUTOTUNER "Enable BiSheng Auto-Tuning features" OFF)
|
|
+if (LLVM_ENABLE_AUTOTUNER)
|
|
+ set(LLVM_ENABLE_AUTOTUNER 1)
|
|
+ add_definitions( -DENABLE_AUTOTUNER )
|
|
+else()
|
|
+ set(LLVM_ENABLE_AUTOTUNER 0)
|
|
+endif()
|
|
+
|
|
if(LLVM_ENABLE_EXPENSIVE_CHECKS)
|
|
add_compile_definitions(EXPENSIVE_CHECKS)
|
|
|
|
diff --git a/llvm/include/llvm/Analysis/AutotuningDump.h b/llvm/include/llvm/Analysis/AutotuningDump.h
|
|
new file mode 100644
|
|
index 000000000000..fb973f05323e
|
|
--- /dev/null
|
|
+++ b/llvm/include/llvm/Analysis/AutotuningDump.h
|
|
@@ -0,0 +1,75 @@
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+// ===-- AutotuningDump.h - Auto-Tuning-----------------------------------===//
|
|
+// The LLVM Compiler Infrastructure
|
|
+//
|
|
+// This file is distributed under the University of Illinois Open Source
|
|
+// License. See LICENSE.TXT for details.
|
|
+//
|
|
+// ===--------------------------------------------------------------------===//
|
|
+//
|
|
+// This file contains pass collecting IR of tuned regions and storing them into
|
|
+// predetrmined locations, to be used later by autotuning ML guidance
|
|
+//
|
|
+// ===--------------------------------------------------------------------===//
|
|
+
|
|
+#include "llvm/Analysis/LoopInfo.h"
|
|
+#include "llvm/Analysis/LoopPass.h"
|
|
+#include "llvm/IR/PassManager.h"
|
|
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
|
|
+#include <string>
|
|
+
|
|
+namespace llvm {
|
|
+class AutotuningDump {
|
|
+public:
|
|
+ AutotuningDump(bool IncrementalCompilation = false);
|
|
+ bool run(Module &F, function_ref<LoopInfo &(Function &)> GetLI);
|
|
+
|
|
+private:
|
|
+ std::string AutoTuneDirPath;
|
|
+ std::unique_ptr<raw_ostream> createFile(const Twine &File);
|
|
+ int getConfigNumber();
|
|
+ void dumpToStream(llvm::raw_ostream &os, const Loop &L) const;
|
|
+ void dumpToStream(llvm::raw_ostream &os, const Function &F) const;
|
|
+ void dumpFunctions(llvm::Module &M);
|
|
+ void dumpLoops(llvm::Module &M, function_ref<LoopInfo &(Function &)> GetLI);
|
|
+ void dumpModule(llvm::Module &M);
|
|
+ std::string getDirectoryName(const std::string File) const;
|
|
+ std::string getFileName(std::string FilePath);
|
|
+
|
|
+ bool IsIncrementalCompilation;
|
|
+};
|
|
+
|
|
+class AutotuningDumpLegacy : public ModulePass {
|
|
+public:
|
|
+ static char ID;
|
|
+ AutotuningDumpLegacy(bool IncrementalCompilation = false);
|
|
+ StringRef getPassName() const override;
|
|
+ bool runOnModule(Module &M) override;
|
|
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
|
|
+
|
|
+private:
|
|
+ bool IsIncrementalCompilation;
|
|
+};
|
|
+
|
|
+class AutotuningDumpAnalysis
|
|
+ : public AnalysisInfoMixin<AutotuningDumpAnalysis> {
|
|
+ friend AnalysisInfoMixin<AutotuningDumpAnalysis>;
|
|
+ static AnalysisKey Key;
|
|
+
|
|
+public:
|
|
+ AutotuningDumpAnalysis(bool IncrementalCompilation = false) {
|
|
+ IsIncrementalCompilation = IncrementalCompilation;
|
|
+ }
|
|
+
|
|
+ // This pass only prints IRs of selected function or loops without doing any
|
|
+ // real analyses, thus the return value is meaningless. To avoid leaking data
|
|
+ // or memory, we typedef Result to Optional<bool> to avoid having to return an
|
|
+ // AutotuningDump object.
|
|
+ using Result = std::optional<bool>;
|
|
+ Result run(Module &M, ModuleAnalysisManager &AM);
|
|
+
|
|
+private:
|
|
+ bool IsIncrementalCompilation;
|
|
+};
|
|
+} // namespace llvm
|
|
+#endif
|
|
\ No newline at end of file
|
|
diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
|
|
index 3434630c27cf..9be3e056cf76 100644
|
|
--- a/llvm/include/llvm/Analysis/LoopInfo.h
|
|
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
|
|
@@ -26,6 +26,9 @@
|
|
#include <algorithm>
|
|
#include <optional>
|
|
#include <utility>
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/AutoTuner/AutoTuning.h"
|
|
+#endif
|
|
|
|
namespace llvm {
|
|
|
|
@@ -44,7 +47,12 @@ extern template class LoopBase<BasicBlock, Loop>;
|
|
|
|
/// Represents a single loop in the control flow graph. Note that not all SCCs
|
|
/// in the CFG are necessarily loops.
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase<BasicBlock, Loop>,
|
|
+ public autotuning::Container {
|
|
+#else
|
|
class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase<BasicBlock, Loop> {
|
|
+#endif
|
|
public:
|
|
/// A range representing the start and end location of a loop.
|
|
class LocRange {
|
|
@@ -395,6 +403,11 @@ public:
|
|
return "<unnamed loop>";
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ void initCodeRegion() override;
|
|
+ uint64_t computeStructuralHash() override;
|
|
+#endif
|
|
+
|
|
private:
|
|
Loop() = default;
|
|
|
|
diff --git a/llvm/include/llvm/Analysis/Passes.h b/llvm/include/llvm/Analysis/Passes.h
|
|
index ac1bc3549910..65f566cc75de 100644
|
|
--- a/llvm/include/llvm/Analysis/Passes.h
|
|
+++ b/llvm/include/llvm/Analysis/Passes.h
|
|
@@ -58,6 +58,16 @@ namespace llvm {
|
|
// in a function and builds the region hierarchy.
|
|
//
|
|
FunctionPass *createRegionInfoPass();
|
|
+
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ //===--------------------------------------------------------------------===//
|
|
+ //
|
|
+ // createAutotuningDumpPass - This pass collects IR of tuned regions
|
|
+ // and stores them into predetrmined locations.
|
|
+ // for the purpose of autotuning ML guidance
|
|
+ //
|
|
+ ModulePass *createAutotuningDumpPass();
|
|
+#endif
|
|
}
|
|
|
|
#endif
|
|
diff --git a/llvm/include/llvm/AutoTuner/AutoTuning.h b/llvm/include/llvm/AutoTuner/AutoTuning.h
|
|
new file mode 100644
|
|
index 000000000000..0f1f276306ec
|
|
--- /dev/null
|
|
+++ b/llvm/include/llvm/AutoTuner/AutoTuning.h
|
|
@@ -0,0 +1,486 @@
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+//===-- AutoTuning.h - Auto-Tuning-----------------------------------------===//
|
|
+// The LLVM Compiler Infrastructure
|
|
+//
|
|
+// This file is distributed under the University of Illinois Open Source
|
|
+// License. See LICENSE.TXT for details.
|
|
+//
|
|
+//===----------------------------------------------------------------------===//
|
|
+//
|
|
+// This file defines Auto Tuning related functions, models and interfaces.
|
|
+//
|
|
+//===----------------------------------------------------------------------===//
|
|
+
|
|
+#ifndef LLVM_AUTOTUNER_AUTOTUNING_H_
|
|
+#define LLVM_AUTOTUNER_AUTOTUNING_H_
|
|
+
|
|
+#include "llvm/ADT/DenseMapInfo.h"
|
|
+#include "llvm/ADT/Hashing.h"
|
|
+#include "llvm/ADT/SetVector.h"
|
|
+#include "llvm/ADT/SmallVector.h"
|
|
+#include "llvm/IR/DebugInfoMetadata.h"
|
|
+#include "llvm/IR/DebugLoc.h"
|
|
+#include "llvm/Support/Casting.h"
|
|
+#include <map>
|
|
+#include <memory>
|
|
+#include <string>
|
|
+#include <unordered_map>
|
|
+#include <unordered_set>
|
|
+
|
|
+// Options for AutoTuner incremental compilation.
|
|
+enum AutoTuningCompileOpt {
|
|
+ Inactive, // Disabled incremental compilation.
|
|
+ CoarseGrain, // For tuning LLVMParam.
|
|
+ FineGrain, // For tuning default code regions (Loop, CallSite, Function).
|
|
+ Basic // Same as CoarseGrain but can be applied for any code region.
|
|
+ // Can be used with ImpactRanker.
|
|
+};
|
|
+
|
|
+namespace autotuning {
|
|
+// Constant defintion for AutoTuner incremental compilation.
|
|
+const std::string CompileOptionStart = "start";
|
|
+const std::string CompileOptionEnd = "end";
|
|
+const std::string CompileOptionUnknow = "unknown";
|
|
+const std::string CompileOptionUnroll = "loop-unroll";
|
|
+const std::string CompileOptionVectorize = "loop-vectorize";
|
|
+const std::string CompileOptionInline = "inline";
|
|
+
|
|
+class ParameterBase {
|
|
+public:
|
|
+ virtual ~ParameterBase() = default;
|
|
+ enum ParameterKind {
|
|
+ PK_PARAMETER,
|
|
+ };
|
|
+ ParameterKind getKind() const { return Kind; }
|
|
+
|
|
+ explicit ParameterBase(ParameterKind K) : Kind(K) {}
|
|
+
|
|
+private:
|
|
+ const ParameterKind Kind;
|
|
+};
|
|
+
|
|
+template <typename T> class Parameter : public ParameterBase {
|
|
+public:
|
|
+ Parameter(const T &RHS) : ParameterBase(PK_PARAMETER), Value(RHS) {}
|
|
+ const T &getValue() const { return Value; }
|
|
+ void setValue(const T &RHS) { Value = RHS; }
|
|
+
|
|
+ static bool classof(const ParameterBase *P) {
|
|
+ return P->getKind() == PK_PARAMETER;
|
|
+ }
|
|
+
|
|
+private:
|
|
+ T Value;
|
|
+};
|
|
+
|
|
+/// This class manages parameters of one codeRegion.
|
|
+class ParameterManager {
|
|
+
|
|
+public:
|
|
+ // add a param into this ParameterManager
|
|
+ template <typename T>
|
|
+ void add(const std::string &ParamName, const T ParamValue) {
|
|
+ std::shared_ptr<ParameterBase> Param =
|
|
+ std::make_shared<Parameter<T>>(ParamValue);
|
|
+ this->Parameters[ParamName] = Param;
|
|
+ }
|
|
+
|
|
+ // Look up the value of a parameter by name in this ParameterManager.
|
|
+ // The found value will be assigned to the reference variable "Value".
|
|
+ // Return true if the parameter exits in this ParameterManager,
|
|
+ // and false otherwise.
|
|
+ template <typename T>
|
|
+ bool findByName(const std::string &ParamName, T &Value) const {
|
|
+ auto Iterator = Parameters.find(ParamName);
|
|
+ if (Iterator == Parameters.end()) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ auto ParamPtr = llvm::dyn_cast<Parameter<T>>(Iterator->second.get());
|
|
+ if (ParamPtr != nullptr) {
|
|
+ Value = ParamPtr->getValue();
|
|
+ return true;
|
|
+ } else {
|
|
+ return false;
|
|
+ }
|
|
+ }
|
|
+
|
|
+private:
|
|
+ std::unordered_map<std::string, std::shared_ptr<ParameterBase>> Parameters;
|
|
+};
|
|
+
|
|
+/// The debug location used to track a CodeRegion back to the source file.
|
|
+struct SourceLocation {
|
|
+ /// The source file corresponding to this CodeRegion.
|
|
+ std::string SourceFilePath;
|
|
+ unsigned SourceLine = 0;
|
|
+ unsigned SourceColumn = 0;
|
|
+
|
|
+ bool operator==(const SourceLocation &CR) const {
|
|
+ return (this->SourceFilePath == CR.SourceFilePath) &&
|
|
+ (this->SourceLine == CR.SourceLine) &&
|
|
+ (this->SourceColumn == CR.SourceColumn);
|
|
+ };
|
|
+
|
|
+ explicit operator bool() const {
|
|
+ return !(SourceFilePath.empty() && SourceLine == 0 && SourceColumn == 0);
|
|
+ }
|
|
+};
|
|
+
|
|
+enum CodeRegionType {
|
|
+ CallSite, // Code region for function inlining.
|
|
+ Function, // Used in AutoTuningDump pass for IR writing.
|
|
+ LLVMParam, // Compilation flags. Tuned individually for each module.
|
|
+ Loop, // Code region for loops.
|
|
+ MachineBasicBlock, // Instruction scheduling code region.
|
|
+ Other, // Pass ordering code region.
|
|
+ ProgramParam, // Compilation flags. Tuned collectively for program.
|
|
+ Switch, // Tuning MinJumpTableEntries parameter for switch inst.
|
|
+ Empty, // Empty CodeRegion.
|
|
+ Invalid // Invalid CodeRegion.
|
|
+};
|
|
+
|
|
+enum HotnessType {
|
|
+ Unknown,
|
|
+ Cold,
|
|
+ Hot,
|
|
+};
|
|
+
|
|
+/// DynamicOptions represent a map: Arg -> DynamicConfigs.
|
|
+/// Where Arg is a tuning parameter on the associated CodeRegion.
|
|
+/// And DynamicConfigs is the possible tuning values associated with Arg.
|
|
+typedef std::map<std::string, std::vector<unsigned int>> DynamicOptions;
|
|
+
|
|
+/// This class represents a region in source code including
|
|
+/// its name, function name, type, debug location, and associated pass name.
|
|
+class CodeRegion {
|
|
+
|
|
+public:
|
|
+ // Default constructor
|
|
+ CodeRegion(const CodeRegionType Type = CodeRegionType::Other);
|
|
+ ~CodeRegion() = default;
|
|
+ // Concrete constructors
|
|
+ CodeRegion(const std::string &Name, const std::string &FuncName,
|
|
+ const CodeRegionType &Type, const llvm::DebugLoc &DL,
|
|
+ const DynamicOptions DO = {});
|
|
+ CodeRegion(const std::string &Name, const std::string &FuncName,
|
|
+ const CodeRegionType &Type,
|
|
+ const SourceLocation &Location = SourceLocation(),
|
|
+ const DynamicOptions DO = {});
|
|
+ CodeRegion(const std::string &Name, const std::string &FuncName,
|
|
+ const std::string &PassName, const CodeRegionType &Type,
|
|
+ const SourceLocation &Location = SourceLocation(),
|
|
+ const unsigned int Invocation = 0);
|
|
+
|
|
+ bool operator==(const CodeRegion &CR) const;
|
|
+ inline bool operator!=(const CodeRegion &CR) const { return !(*this == CR); };
|
|
+
|
|
+ explicit operator bool() const {
|
|
+ return !(Name.empty() && FuncName.empty() && PassName.empty());
|
|
+ }
|
|
+
|
|
+ static std::string getTypeAsString(CodeRegionType CRType);
|
|
+ static std::string getHotnessAsString(HotnessType Hotness);
|
|
+ const std::string &getName() const { return Name; }
|
|
+ const std::string &getFuncName() const { return FuncName; }
|
|
+ const CodeRegionType &getType() const { return Type; }
|
|
+ const std::string &getFileName() const { return Location.SourceFilePath; }
|
|
+ const std::string &getTypeAsString() const { return StringType; }
|
|
+ const SourceLocation &getSourceLoc() const { return Location; }
|
|
+ const std::string &getPassName() const { return PassName; }
|
|
+ unsigned getSize() const { return Size; };
|
|
+ void setPassName(const std::string &NewPassName);
|
|
+ void setSize(unsigned Size) { this->Size = Size; };
|
|
+ void setHotness(HotnessType NewHotness) const { this->Hotness = NewHotness; }
|
|
+ HotnessType getHotness() const { return this->Hotness; }
|
|
+ std::string getHotnessAsString() const { return getHotnessAsString(Hotness); }
|
|
+ bool isCold() const { return this->Hotness == Cold; }
|
|
+ bool isHot() const { return this->Hotness == Hot; }
|
|
+ std::uint64_t getHash() const { return this->Hash; }
|
|
+ void setHash(std::uint64_t Hash) { this->Hash = Hash; }
|
|
+ DynamicOptions getAutoTunerOptions() const { return this->AutoTunerOptions; }
|
|
+ void setInvocation(unsigned int Invocation) { this->Invocation = Invocation; }
|
|
+ unsigned int getInvocation() const { return this->Invocation; }
|
|
+
|
|
+ /// Add dynamic config options with Code Region for AutoTuner to tune instead
|
|
+ /// of using static config options.
|
|
+ void addAutoTunerOptions(const std::string ParamName,
|
|
+ std::vector<unsigned int> Options) const {
|
|
+ this->AutoTunerOptions.insert(
|
|
+ std::pair<std::string, std::vector<unsigned int>>(ParamName, Options));
|
|
+ }
|
|
+ static CodeRegion getInvalidInstance();
|
|
+ static CodeRegion getEmptyInstance();
|
|
+ void setBaselineConfig(std::map<std::string, std::string> Value) const {
|
|
+ this->BaselineConfig = Value;
|
|
+ };
|
|
+ std::map<std::string, std::string> getBaselineConfig() const {
|
|
+ return this->BaselineConfig;
|
|
+ }
|
|
+
|
|
+private:
|
|
+ /// Name of the code region.
|
|
+ /// For most of cases it's set to the name of a header basic block.
|
|
+ std::string Name;
|
|
+ /// Function name of this code region if any.
|
|
+ std::string FuncName;
|
|
+ /// Name of the pass which this code region is associated.
|
|
+ std::string PassName;
|
|
+ /// Type of this code region. Options are other, function, loop,
|
|
+ /// and machine basic block.
|
|
+ CodeRegionType Type;
|
|
+ /// Source Location.
|
|
+ SourceLocation Location;
|
|
+ std::string StringType;
|
|
+ /// Structural hash for the CodeRegion.
|
|
+ std::uint64_t Hash = 0;
|
|
+ /// Configs values passed to AutoTuner for dynamic setting of search space
|
|
+ /// for code regions.
|
|
+ mutable DynamicOptions AutoTunerOptions;
|
|
+ /// Configuration values passed to AutoTuner for generating the same binary
|
|
+ /// as the baseline.
|
|
+ mutable std::map<std::string, std::string> BaselineConfig;
|
|
+
|
|
+ /// Record the order of invocation of an optimization pass during the whole
|
|
+ /// compilation pipeline. It is used to differentiate multiple invocations of
|
|
+ /// a same optimization pass.
|
|
+ /// Currently, Loop Unroll pass is invoked twice during the compilation
|
|
+ /// pipeline. 'Invocation' helps to relate a code region with the invocation
|
|
+ /// of Loop Unroll pass where the code region is generated.
|
|
+ mutable unsigned int Invocation;
|
|
+
|
|
+ /// Size of this code region. Usually it refers to the number of instructions
|
|
+ /// but could be different based on implementations.
|
|
+ unsigned Size = 0;
|
|
+ mutable HotnessType Hotness = Unknown;
|
|
+
|
|
+ /// A boolean flag to record if a CR is initialized or not.
|
|
+ /// It should only be set to true by initContainer().
|
|
+ /// We only add initialized CR to TuningOpps.
|
|
+ bool Initialized = false;
|
|
+
|
|
+ friend class AutoTuningEngine;
|
|
+};
|
|
+
|
|
+/// This class is an interface for classes representing code regions in LLVM
|
|
+/// (eg. Loop, Function and MachineBasicBlock) to inherit
|
|
+/// so that auto-tuning can be enabled on them.
|
|
+/// A Container must contain a CodeRegion.
|
|
+class Container {
|
|
+
|
|
+public:
|
|
+ Container() {}
|
|
+ virtual ~Container(){};
|
|
+
|
|
+ /// Abstract method for derived classes to overwrite
|
|
+ virtual void initCodeRegion() = 0;
|
|
+ virtual uint64_t computeStructuralHash() = 0;
|
|
+
|
|
+ /// Get the Container's CodeRegion.
|
|
+ const CodeRegion &getCodeRegion() const;
|
|
+ /// Set the Container's CodeRegion.
|
|
+ void setCodeRegion(const CodeRegion &NewCR);
|
|
+ /// This method is to look up the value of a parameter that corresponds to an
|
|
+ /// Container. The parameter being looked up is stored in a ParameterManager.
|
|
+ template <typename T>
|
|
+ bool lookUpParams(const std::string &ParamsName, T &Value) const;
|
|
+
|
|
+ /// Check if the code region is being tuned by config file.
|
|
+ bool requiresIRDump(bool IsFunctionIR = false) const;
|
|
+
|
|
+private:
|
|
+ CodeRegion CR;
|
|
+ friend class AutoTuningEngine;
|
|
+};
|
|
+} // end namespace autotuning
|
|
+
|
|
+namespace std {
|
|
+template <>
|
|
+// Implement hash for CodeRegion data type in std namespace. Only using common
|
|
+// attributes (with and without using 'OmitAutotuningMetadata' flag) of
|
|
+// CodeRegion. Remaining attributes are compared in overloaded == function.
|
|
+struct hash<autotuning::CodeRegion> {
|
|
+ std::size_t operator()(const autotuning::CodeRegion &CR) const {
|
|
+ return llvm::hash_combine(CR.getPassName(), CR.getType());
|
|
+ }
|
|
+};
|
|
+} // namespace std
|
|
+
|
|
+namespace llvm {
|
|
+// Forward Decleration.
|
|
+class CallBase;
|
|
+
|
|
+typedef autotuning::CodeRegion CodeRegion;
|
|
+template <> struct DenseMapInfo<CodeRegion> {
|
|
+ static bool isEqual(const CodeRegion &LHS, const CodeRegion &RHS) {
|
|
+ return LHS == RHS;
|
|
+ }
|
|
+ static inline CodeRegion getEmptyKey() {
|
|
+ return autotuning::CodeRegion::getEmptyInstance();
|
|
+ }
|
|
+ static inline CodeRegion getTombstoneKey() {
|
|
+ return autotuning::CodeRegion::getInvalidInstance();
|
|
+ }
|
|
+ // Implement hash for CodeRegion data type in llvm namespace. Only using
|
|
+ // common attributes (with and without using 'OmitAutotuningMetadata' flag)
|
|
+ // of CodeRegion. Remaining attributes are compared in overloaded ==
|
|
+ // function.
|
|
+ static unsigned getHashValue(const CodeRegion &CR) {
|
|
+ return llvm::hash_combine(CR.getPassName(), CR.getType());
|
|
+ }
|
|
+};
|
|
+} // namespace llvm
|
|
+
|
|
+namespace autotuning {
|
|
+using namespace llvm;
|
|
+typedef std::unordered_map<CodeRegion, ParameterManager> LookUpTable;
|
|
+typedef llvm::SetVector<CodeRegion> CodeRegions;
|
|
+
|
|
+/// Structure to store information of CallSite code regions which is used to
|
|
+/// get a different SourceLocation for multiple callsites (same callee) in a
|
|
+/// function when these callsites have same SourceLocation due to inlining.
|
|
+struct CallSiteLocation {
|
|
+ llvm::CallBase *CB;
|
|
+ llvm::Function *Caller;
|
|
+ llvm::Function *Callee;
|
|
+ SourceLocation SrcLoc;
|
|
+};
|
|
+
|
|
+class AutoTuningEngine {
|
|
+public:
|
|
+ AutoTuningEngine() { Enabled = false; }
|
|
+ ~AutoTuningEngine() {}
|
|
+
|
|
+ /// Initialize the Container for auto-tuning.
|
|
+ void initContainer(Container *Container, const std::string &PassName,
|
|
+ const StringRef FuncName = "", bool AddOpportunity = true,
|
|
+ unsigned int Invocation = 0);
|
|
+
|
|
+ /// Initialize auto-tuning. This method should only be called in the main
|
|
+ /// function.
|
|
+ /// \return Error::success() on success or the related Error otherwise.
|
|
+ llvm::Error init(const std::string &ModuleID);
|
|
+
|
|
+ /// Finalize auto-tuning. This method should only be called in the main
|
|
+ /// function.
|
|
+ /// \return Error::success() on success or the related Error otherwise.
|
|
+ llvm::Error finalize();
|
|
+
|
|
+ /// Return the number of tuning configuration used for this compilation.
|
|
+ llvm::Expected<int> getConfigNumber();
|
|
+
|
|
+ void enable() { Enabled = true; }
|
|
+ void disable() { Enabled = false; }
|
|
+ bool isEnabled() const { return Enabled; }
|
|
+ bool isMLEnabled() const { return MLEnabled; }
|
|
+ bool isDumpEnabled() const { return DumpEnabled; }
|
|
+ bool isGenerateOutput() const { return GenerateOutput; }
|
|
+ bool isParseInput() const { return ParseInput; }
|
|
+ bool isTuningAllowedForType(CodeRegionType CRType) const {
|
|
+ return (CodeRegionFilterTypes.count(CRType) > 0);
|
|
+ }
|
|
+ bool isThinLTOTuning() const;
|
|
+
|
|
+ /// Convert a pass-name to CodeRegionType.
|
|
+ CodeRegionType convertPassToType(std::string Pass);
|
|
+
|
|
+ /// First sets BaselineConfig value for the CR then
|
|
+ /// add a tuning opportunity into the TuningOpps list.
|
|
+ void addOpportunity(const CodeRegion &OppCR,
|
|
+ std::map<std::string, std::string> BaselineConfig = {});
|
|
+ bool hasOpportunities() const { return TuningOpps.empty(); }
|
|
+
|
|
+ bool shouldRunOptPass(std::string FileName, std::string Pass);
|
|
+
|
|
+ /// Insert all of the callsites of a function in CallSiteLocs vector.
|
|
+ void insertCallSiteLoc(CallSiteLocation Loc);
|
|
+
|
|
+ /// Update CallSiteLocs vector with new callsites (if any) which get available
|
|
+ /// due to inlining.
|
|
+ void updateCallSiteLocs(llvm::CallBase *CB, llvm::CallBase *Ptr,
|
|
+ llvm::Function *F, unsigned int Line);
|
|
+
|
|
+ /// Clean up the CallSiteLocs vector by keeping the callsite if there are
|
|
+ /// multiple calls to same callee. This cleaning will be perform before
|
|
+ /// inlining any callsite.
|
|
+ void cleanCallSiteLoc();
|
|
+
|
|
+ /// clear the CallSiteLocs vector.
|
|
+ void clearCallSiteLocs();
|
|
+
|
|
+ /// Return the SourceLocation::SourceLine (if available).
|
|
+ std::optional<unsigned int> getCallSiteLoc(llvm::CallBase *CB);
|
|
+
|
|
+ template <typename T>
|
|
+ bool lookUpGlobalParams(const std::string &ParamsName, T &Value) const;
|
|
+ /// A map storing llvm parameters.
|
|
+ std::unordered_map<std::string, std::string> LLVMParams;
|
|
+ /// A map storing program parameters.
|
|
+ std::unordered_map<std::string, std::string> ProgramParams;
|
|
+
|
|
+private:
|
|
+ std::string ModuleID;
|
|
+ /// This boolean indicates if the auto-tuning mode is enabled.
|
|
+ /// It will be set to true if the any of the following command line options
|
|
+ /// (auto-tuning-input, auto-tuning-result and auto-tuning-opp) is specified.
|
|
+ bool Enabled;
|
|
+ /// This boolean indicates if the ML guidance feature is enabled in
|
|
+ /// Autotuner. It will be set to true if -fautotune-rank is specified.
|
|
+ bool MLEnabled;
|
|
+ /// This boolean indicates if the IR dumping is enabled or not. IR dumping
|
|
+ /// is enabled for ML guidance feature. It can also be enabled with command
|
|
+ /// line compiler flag 'enable-autotuning-dump'.
|
|
+ bool DumpEnabled = false;
|
|
+ /// This boolean indicates if compiler is parsing/using 'config.yaml' file
|
|
+ /// generated by AutoTuner and use the configuration values instead of
|
|
+ /// determining with compiler heuristic.
|
|
+ bool ParseInput;
|
|
+ /// This boolean indicates if compiler is creating/generating opportunity
|
|
+ /// file(s) which will be consumed by AutoTuner to create the search space.
|
|
+ bool GenerateOutput;
|
|
+ /// A map of filename and set of optimization passes; an optimization pass
|
|
+ /// will be added to this set if a CodeRegion belongs to the optimization
|
|
+ /// pass.
|
|
+ std::unordered_map<std::string, std::unordered_set<std::string>> OppPassList;
|
|
+
|
|
+ /// Vector to store all of the duplicate calls in a function and the calls
|
|
+ /// which get available due to inlining.
|
|
+ SmallVector<CallSiteLocation, 10> CallSiteLocs;
|
|
+
|
|
+ /// A set to store the code region types that will be tuned in current
|
|
+ /// autotuning flow. This will be populated with code region types based on
|
|
+ /// 'auto-tuning-type-filter' for -fautotune-generate and the types will be
|
|
+ /// extracted from config.yaml in case of -fautotune.
|
|
+ /// This set is used to apply type-based filtering prior to creating/
|
|
+ /// initializing a code region.
|
|
+ std::unordered_set<CodeRegionType> CodeRegionFilterTypes;
|
|
+
|
|
+ // A statically initialized map used to convert 'pass-name' to
|
|
+ // 'CodeRegionType'.
|
|
+ std::unordered_map<std::string, CodeRegionType> PTTMap;
|
|
+
|
|
+ /// A map of CodeRegion and ParameterManager to keep track of all the
|
|
+ /// parameters of code regions loaded from input config file.
|
|
+ LookUpTable ParamTable;
|
|
+ /// A list of CodeRegions as tuning opportunities
|
|
+ CodeRegions TuningOpps;
|
|
+ /// A ParameterManager for global parameters.
|
|
+ ParameterManager GlobalParams;
|
|
+
|
|
+ /// Apply filters for CodeRegions.
|
|
+ void applyOppFilters(CodeRegions &CRs);
|
|
+
|
|
+ /// Apply function name filter for CodeRegions.
|
|
+ bool applyFunctionFilter(std::string FuncName);
|
|
+
|
|
+ friend class Container;
|
|
+ friend class CodeRegion;
|
|
+ friend class AutoTuningRemarkManager;
|
|
+};
|
|
+
|
|
+extern class AutoTuningEngine Engine; // AutoTuning Engine
|
|
+
|
|
+} // end namespace autotuning
|
|
+
|
|
+#endif /* LLVM_AUTOTUNER_AUTOTUNING_H_ */
|
|
+#endif
|
|
diff --git a/llvm/include/llvm/AutoTuner/AutoTuningRemarkManager.h b/llvm/include/llvm/AutoTuner/AutoTuningRemarkManager.h
|
|
new file mode 100644
|
|
index 000000000000..153a2c6246ad
|
|
--- /dev/null
|
|
+++ b/llvm/include/llvm/AutoTuner/AutoTuningRemarkManager.h
|
|
@@ -0,0 +1,43 @@
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+//===- llvm/AutoTuner/AutoTuningRemarkManager.h - Remark Manager ----------===//
|
|
+//
|
|
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
+// See https://llvm.org/LICENSE.txt for license information.
|
|
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
+//
|
|
+//===----------------------------------------------------------------------===//
|
|
+//
|
|
+// This file declares the main interface for inputting and outputting
|
|
+// remarks for AutoTuning.
|
|
+//
|
|
+//===----------------------------------------------------------------------===//
|
|
+
|
|
+#ifndef LLVM_AUTOTUNINGREMARKMANAGER_H
|
|
+#define LLVM_AUTOTUNINGREMARKMANAGER_H
|
|
+
|
|
+#include "llvm/AutoTuner/AutoTuning.h"
|
|
+#include "llvm/Remarks/RemarkStreamer.h"
|
|
+#include "llvm/Support/Error.h"
|
|
+#include <string>
|
|
+#include <unordered_map>
|
|
+#include <vector>
|
|
+
|
|
+namespace autotuning {
|
|
+class AutoTuningRemarkManager {
|
|
+public:
|
|
+ /// Read a list of parameters from input file.
|
|
+ /// Return true on success and false on failure.
|
|
+ static llvm::Error read(autotuning::AutoTuningEngine &E,
|
|
+ const std::string &InputName,
|
|
+ const std::string &RemarksFormat);
|
|
+
|
|
+ /// Dump a list of CodeRegions as tuning opportunities into a file.
|
|
+ /// Return true on success and false on failure.
|
|
+ static llvm::Error dump(const autotuning::AutoTuningEngine &E,
|
|
+ const std::string &DirPath,
|
|
+ const std::string &RemarksFormat,
|
|
+ const std::string &RemarksPasses);
|
|
+};
|
|
+} // namespace autotuning
|
|
+#endif // LLVM_AUTOTUNINGREMARKMANAGER_H
|
|
+#endif
|
|
diff --git a/llvm/include/llvm/AutoTuner/AutoTuningRemarkStreamer.h b/llvm/include/llvm/AutoTuner/AutoTuningRemarkStreamer.h
|
|
new file mode 100644
|
|
index 000000000000..0096139b12e9
|
|
--- /dev/null
|
|
+++ b/llvm/include/llvm/AutoTuner/AutoTuningRemarkStreamer.h
|
|
@@ -0,0 +1,47 @@
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+// ===------------ llvm/AutoTuner/AutoTuningRemarkStreamer.h --------------===//
|
|
+//
|
|
+// The LLVM Compiler Infrastructure
|
|
+//
|
|
+// This file is distributed under the University of Illinois Open Source
|
|
+// License. See LICENSE.TXT for details.
|
|
+//
|
|
+// Copyright (C) 2017-2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+//
|
|
+// ===---------------------------------------------------------------------===//
|
|
+//
|
|
+// This file contains the implementation of the conversion between AutoTuner
|
|
+// CodeRegions and serializable remarks::Remark objects.
|
|
+//
|
|
+// ===---------------------------------------------------------------------===//
|
|
+
|
|
+#ifndef LLVM_AUTOTUNER_AUTOTUNINGREMARKSTREAMER_H
|
|
+#define LLVM_AUTOTUNER_AUTOTUNINGREMARKSTREAMER_H
|
|
+
|
|
+#include "llvm/AutoTuner/AutoTuning.h"
|
|
+#include "llvm/Remarks/Remark.h"
|
|
+#include "llvm/Remarks/RemarkStreamer.h"
|
|
+#include "llvm/Support/Error.h"
|
|
+#include "llvm/Support/ToolOutputFile.h"
|
|
+#include <memory>
|
|
+#include <string>
|
|
+
|
|
+namespace llvm {
|
|
+/// Streamer for AutoTuner remarks which has logic for dealing with CodeRegions.
|
|
+class AutoTuningRemarkStreamer {
|
|
+ remarks::RemarkStreamer &RS;
|
|
+ /// Convert CodeRegion into remark objects.
|
|
+ remarks::Remark toRemark(const autotuning::CodeRegion &CR);
|
|
+
|
|
+public:
|
|
+ AutoTuningRemarkStreamer(remarks::RemarkStreamer &RS) : RS(RS) {}
|
|
+ /// Emit a CodeRegion through the streamer.
|
|
+ void emit(const autotuning::CodeRegion &CR);
|
|
+ /// Set a pass filter based on a regex \p Filter.
|
|
+ /// Returns an error if the regex is invalid.
|
|
+ Error setFilter(StringRef Filter);
|
|
+};
|
|
+} // end namespace llvm
|
|
+
|
|
+#endif // LLVM_AUTOTUNER_AUTOTUNINGREMARKSTREAMER_H
|
|
+#endif
|
|
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
|
|
index 52388692c196..95ac9acf4e5e 100644
|
|
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
|
|
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
|
|
@@ -27,6 +27,9 @@
|
|
#include <iterator>
|
|
#include <string>
|
|
#include <vector>
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/AutoTuner/AutoTuning.h"
|
|
+#endif
|
|
|
|
namespace llvm {
|
|
|
|
@@ -91,9 +94,19 @@ public:
|
|
void deleteNode(MachineInstr *MI);
|
|
};
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+class MachineBasicBlock
|
|
+ : public ilist_node_with_parent<MachineBasicBlock, MachineFunction>,
|
|
+ public autotuning::Container {
|
|
+#else
|
|
class MachineBasicBlock
|
|
: public ilist_node_with_parent<MachineBasicBlock, MachineFunction> {
|
|
+#endif
|
|
public:
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ void initCodeRegion() override;
|
|
+ uint64_t computeStructuralHash() override;
|
|
+#endif
|
|
/// Pair of physical register and lane mask.
|
|
/// This is not simply a std::pair typedef because the members should be named
|
|
/// clearly as they both have an integer type.
|
|
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
|
|
index 93cf0d27e9a7..c0db48ae1789 100644
|
|
--- a/llvm/include/llvm/IR/Function.h
|
|
+++ b/llvm/include/llvm/IR/Function.h
|
|
@@ -37,6 +37,9 @@
|
|
#include <cstdint>
|
|
#include <memory>
|
|
#include <string>
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/AutoTuner/AutoTuning.h"
|
|
+#endif
|
|
|
|
namespace llvm {
|
|
|
|
@@ -56,6 +59,24 @@ class User;
|
|
class BranchProbabilityInfo;
|
|
class BlockFrequencyInfo;
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+class AutoTuningEnabledFunction : public autotuning::Container {
|
|
+public:
|
|
+ AutoTuningEnabledFunction() = delete;
|
|
+ void initCodeRegion() override;
|
|
+ void setHot() { this->Hotness = autotuning::Hot; }
|
|
+ void setCold() { this->Hotness = autotuning::Cold; }
|
|
+ autotuning::HotnessType getHotness() const { return this->Hotness; }
|
|
+ uint64_t computeStructuralHash() override;
|
|
+
|
|
+private:
|
|
+ AutoTuningEnabledFunction(Function *F) { Func = F; };
|
|
+ Function *Func;
|
|
+ autotuning::HotnessType Hotness = autotuning::Unknown;
|
|
+ friend class Function;
|
|
+};
|
|
+#endif
|
|
+
|
|
class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject,
|
|
public ilist_node<Function> {
|
|
public:
|
|
@@ -68,6 +89,13 @@ public:
|
|
using arg_iterator = Argument *;
|
|
using const_arg_iterator = const Argument *;
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ // There is one-to-one correspondence between ATEFunction and the current
|
|
+ // Function object to avoid messing up the LLVM User and owned Use classes'
|
|
+ // memory layout.
|
|
+ AutoTuningEnabledFunction ATEFunction = AutoTuningEnabledFunction(this);
|
|
+#endif
|
|
+
|
|
private:
|
|
// Important things that make up a function!
|
|
BasicBlockListType BasicBlocks; ///< The basic blocks
|
|
@@ -128,6 +156,11 @@ public:
|
|
void operator=(const Function&) = delete;
|
|
~Function();
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ // Return the auto-tuning enabled version of this Function object.
|
|
+ AutoTuningEnabledFunction &getATEFunction() { return ATEFunction; }
|
|
+#endif
|
|
+
|
|
// This is here to help easily convert from FunctionT * (Function * or
|
|
// MachineFunction *) in BlockFrequencyInfoImpl to Function * by calling
|
|
// FunctionT->getFunction().
|
|
@@ -840,7 +873,11 @@ public:
|
|
/// AssemblyAnnotationWriter.
|
|
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW = nullptr,
|
|
bool ShouldPreserveUseListOrder = false,
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ bool IsForDebug = false, bool PrintCompleteIR = false) const;
|
|
+#else
|
|
bool IsForDebug = false) const;
|
|
+#endif
|
|
|
|
/// viewCFG - This function is meant for use from the debugger. You can just
|
|
/// say 'call F->viewCFG()' and a ghostview window should pop up from the
|
|
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
|
|
index 6095b0a1be69..dcc9bbee30fa 100644
|
|
--- a/llvm/include/llvm/IR/InstrTypes.h
|
|
+++ b/llvm/include/llvm/IR/InstrTypes.h
|
|
@@ -1169,6 +1169,23 @@ public:
|
|
using OperandBundleDef = OperandBundleDefT<Value *>;
|
|
using ConstOperandBundleDef = OperandBundleDefT<const Value *>;
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+//===----------------------------------------------------------------------===//
|
|
+// AutoTuningEnabledCallSite Class
|
|
+//===----------------------------------------------------------------------===//
|
|
+class CallBase;
|
|
+class AutoTuningEnabledCallSite : public autotuning::Container {
|
|
+public:
|
|
+ AutoTuningEnabledCallSite() = delete;
|
|
+ void initCodeRegion() override;
|
|
+ uint64_t computeStructuralHash() override;
|
|
+ AutoTuningEnabledCallSite(CallBase *CallBase) { CB = CallBase; }
|
|
+
|
|
+private:
|
|
+ CallBase *CB;
|
|
+};
|
|
+#endif
|
|
+
|
|
//===----------------------------------------------------------------------===//
|
|
// CallBase Class
|
|
//===----------------------------------------------------------------------===//
|
|
@@ -1229,6 +1246,13 @@ protected:
|
|
unsigned getNumSubclassExtraOperandsDynamic() const;
|
|
|
|
public:
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ // There is one-to-one correspondence between ATECallSite and CallBase class
|
|
+ // to enable auto-tuning.
|
|
+ std::unique_ptr<AutoTuningEnabledCallSite> ATECallSite =
|
|
+ std::make_unique<AutoTuningEnabledCallSite>(this);
|
|
+#endif
|
|
+
|
|
using Instruction::getContext;
|
|
|
|
/// Create a clone of \p CB with a different set of operand bundles and
|
|
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
|
|
index 8d60384e1a32..9d638af6eeef 100644
|
|
--- a/llvm/include/llvm/IR/Instructions.h
|
|
+++ b/llvm/include/llvm/IR/Instructions.h
|
|
@@ -3287,6 +3287,23 @@ struct OperandTraits<BranchInst> : public VariadicOperandTraits<BranchInst, 1> {
|
|
|
|
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BranchInst, Value)
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+//===----------------------------------------------------------------------===//
|
|
+// AutoTuningEnabledSwitchInst Class
|
|
+//===----------------------------------------------------------------------===//
|
|
+class SwitchInst;
|
|
+
|
|
+class AutoTuningEnabledSwitchInst : public autotuning::Container {
|
|
+public:
|
|
+ AutoTuningEnabledSwitchInst() = delete;
|
|
+ void initCodeRegion() override;
|
|
+ uint64_t computeStructuralHash() override;
|
|
+ AutoTuningEnabledSwitchInst(SwitchInst *SwitchInst) { SI = SwitchInst; }
|
|
+
|
|
+private:
|
|
+ SwitchInst *SI;
|
|
+};
|
|
+#endif
|
|
//===----------------------------------------------------------------------===//
|
|
// SwitchInst Class
|
|
//===----------------------------------------------------------------------===//
|
|
@@ -3332,6 +3349,13 @@ protected:
|
|
public:
|
|
void operator delete(void *Ptr) { User::operator delete(Ptr); }
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ // There is one-to-one correspondence between ATESwitchInst and
|
|
+ // SwitchInst class to enable AutoTuner.
|
|
+ std::unique_ptr<AutoTuningEnabledSwitchInst> ATESwitchInst =
|
|
+ std::make_unique<AutoTuningEnabledSwitchInst>(this);
|
|
+#endif
|
|
+
|
|
// -2
|
|
static const unsigned DefaultPseudoIndex = static_cast<unsigned>(~0L-1);
|
|
|
|
diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h
|
|
index 670a40b28eab..904a450a1888 100644
|
|
--- a/llvm/include/llvm/IR/Module.h
|
|
+++ b/llvm/include/llvm/IR/Module.h
|
|
@@ -38,6 +38,9 @@
|
|
#include <optional>
|
|
#include <string>
|
|
#include <vector>
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/AutoTuner/AutoTuning.h"
|
|
+#endif
|
|
|
|
namespace llvm {
|
|
|
|
diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h
|
|
index 1bdeb85afa3c..c0bcc8153eb8 100644
|
|
--- a/llvm/include/llvm/IR/StructuralHash.h
|
|
+++ b/llvm/include/llvm/IR/StructuralHash.h
|
|
@@ -15,6 +15,9 @@
|
|
#define LLVM_IR_STRUCTURALHASH_H
|
|
|
|
#include <cstdint>
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include <vector>
|
|
+#endif
|
|
|
|
namespace llvm {
|
|
|
|
@@ -24,6 +27,17 @@ class Module;
|
|
uint64_t StructuralHash(const Function &F);
|
|
uint64_t StructuralHash(const Module &M);
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+class MachineBasicBlock;
|
|
+class BasicBlock;
|
|
+class CallBase;
|
|
+class SwitchInst;
|
|
+
|
|
+uint64_t StructuralHash(const std::vector<BasicBlock *> BBs);
|
|
+uint64_t StructuralHash(const MachineBasicBlock &MBB);
|
|
+uint64_t StructuralHash(const CallBase &CB);
|
|
+uint64_t StructuralHash(const SwitchInst &SI);
|
|
+#endif
|
|
} // end namespace llvm
|
|
|
|
#endif
|
|
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
|
|
index c6fee47b464b..80bec2d82e24 100644
|
|
--- a/llvm/include/llvm/InitializePasses.h
|
|
+++ b/llvm/include/llvm/InitializePasses.h
|
|
@@ -340,6 +340,11 @@ void initializeWasmEHPreparePass(PassRegistry&);
|
|
void initializeWinEHPreparePass(PassRegistry&);
|
|
void initializeWriteBitcodePassPass(PassRegistry&);
|
|
void initializeXRayInstrumentationPass(PassRegistry&);
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+void initializeAutotuningDumpLegacyPass(PassRegistry &);
|
|
+void initializeAutoTuningCompileFunctionLegacyPass(PassRegistry &);
|
|
+void initializeAutoTuningCompileModuleLegacyPass(PassRegistry &);
|
|
+#endif
|
|
|
|
} // end namespace llvm
|
|
|
|
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
|
|
index 7420ea64e954..3a8ecb1399f1 100644
|
|
--- a/llvm/include/llvm/LinkAllPasses.h
|
|
+++ b/llvm/include/llvm/LinkAllPasses.h
|
|
@@ -54,6 +54,9 @@
|
|
#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
|
|
#include "llvm/Transforms/Vectorize.h"
|
|
#include <cstdlib>
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/Transforms/Scalar/AutoTuningCompile.h"
|
|
+#endif
|
|
|
|
namespace {
|
|
struct ForcePassLinking {
|
|
@@ -93,6 +96,11 @@ namespace {
|
|
(void) llvm::createInstSimplifyLegacyPass();
|
|
(void) llvm::createInstructionCombiningPass();
|
|
(void) llvm::createJMCInstrumenterPass();
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ (void) llvm::createAutotuningDumpPass();
|
|
+ (void) llvm::createAutoTuningCompileFunctionLegacyPass();
|
|
+ (void) llvm::createAutoTuningCompileModuleLegacyPass();
|
|
+#endif
|
|
(void) llvm::createKCFIPass();
|
|
(void) llvm::createLCSSAPass();
|
|
(void) llvm::createLICMPass();
|
|
diff --git a/llvm/include/llvm/Remarks/Remark.h b/llvm/include/llvm/Remarks/Remark.h
|
|
index a66f7ed73f2f..3bcc0c710498 100644
|
|
--- a/llvm/include/llvm/Remarks/Remark.h
|
|
+++ b/llvm/include/llvm/Remarks/Remark.h
|
|
@@ -20,6 +20,10 @@
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include <optional>
|
|
#include <string>
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include <map>
|
|
+#include <vector>
|
|
+#endif
|
|
|
|
namespace llvm {
|
|
namespace remarks {
|
|
@@ -47,6 +51,9 @@ struct Argument {
|
|
StringRef Key;
|
|
// FIXME: We might want to be able to store other types than strings here.
|
|
StringRef Val;
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ std::optional<std::vector<StringRef>> VectorVal;
|
|
+#endif
|
|
// If set, the debug location corresponding to the value.
|
|
std::optional<RemarkLocation> Loc;
|
|
|
|
@@ -65,6 +72,9 @@ enum class Type {
|
|
Analysis,
|
|
AnalysisFPCommute,
|
|
AnalysisAliasing,
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ AutoTuning,
|
|
+#endif
|
|
Failure,
|
|
First = Unknown,
|
|
Last = Failure
|
|
@@ -105,6 +115,28 @@ struct Remark {
|
|
/// Mangled name of the function that triggers the emssion of this remark.
|
|
StringRef FunctionName;
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ /// Type of the code region that the remark is associated with.
|
|
+ std::optional<StringRef> CodeRegionType;
|
|
+
|
|
+ /// Configuration value for generating the same baseline binary associated
|
|
+ /// with this remark.
|
|
+ std::optional<std::map<std::string, std::string>> BaselineConfig;
|
|
+
|
|
+ /// Hash of the code region that the remark is associated with.
|
|
+ std::optional<uint64_t> CodeRegionHash;
|
|
+
|
|
+ /// Configs values passed to AutoTuner for dynamic setting of search space
|
|
+ /// for code regions.
|
|
+ std::optional<std::map<std::string, std::vector<unsigned int>>>
|
|
+ AutoTunerOptions;
|
|
+
|
|
+ /// Invocation/Registering of Optimization Pass in the compilation pipeline.
|
|
+ /// It is used to differentiate between different invocations of same
|
|
+ /// optimization pass.
|
|
+ std::optional<unsigned int> Invocation;
|
|
+#endif
|
|
+
|
|
/// The location in the source file of the remark.
|
|
std::optional<RemarkLocation> Loc;
|
|
|
|
diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h
|
|
index d2079fead668..c59dba2749f0 100644
|
|
--- a/llvm/include/llvm/Support/CommandLine.h
|
|
+++ b/llvm/include/llvm/Support/CommandLine.h
|
|
@@ -40,6 +40,9 @@
|
|
#include <type_traits>
|
|
#include <vector>
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include <unordered_map>
|
|
+#endif
|
|
namespace llvm {
|
|
|
|
namespace vfs {
|
|
@@ -72,6 +75,20 @@ bool ParseCommandLineOptions(int argc, const char *const *argv,
|
|
const char *EnvVar = nullptr,
|
|
bool LongOptionsUseDoubleDash = false);
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+// It will parse AutoTuner options (LLVMParams & ProgramParams) and add them as
|
|
+// command line flags for the compilation process. These options are suggested
|
|
+// by AutoTuner during tuning flow. This function will always be called after
|
|
+// AutoTuner initialization.
|
|
+// Returns true on success. Otherwise, this will print the error message to
|
|
+// stderr and exit.
|
|
+bool ParseAutoTunerOptions(
|
|
+ std::unordered_map<std::string, std::string> LLVMParams,
|
|
+ std::unordered_map<std::string, std::string> ProgramParams,
|
|
+ StringRef Overview = "", raw_ostream *Errs = nullptr,
|
|
+ const char *EnvVar = nullptr, bool LongOptionsUseDoubleDash = false);
|
|
+#endif
|
|
+
|
|
// Function pointer type for printing version information.
|
|
using VersionPrinterTy = std::function<void(raw_ostream &)>;
|
|
|
|
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
|
|
index aaba710cfde6..e69beeade947 100644
|
|
--- a/llvm/include/llvm/Transforms/Scalar.h
|
|
+++ b/llvm/include/llvm/Transforms/Scalar.h
|
|
@@ -16,6 +16,10 @@
|
|
|
|
#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
|
|
#include <functional>
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/Pass.h"
|
|
+#include <string>
|
|
+#endif
|
|
|
|
namespace llvm {
|
|
|
|
@@ -299,6 +303,19 @@ Pass *createLoopSimplifyCFGPass();
|
|
//
|
|
FunctionPass *createInstSimplifyLegacyPass();
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+//===--------------------------------------------------------------------===//
|
|
+//
|
|
+// createAutotuningCompilePass - It writes IR files with -fautotune-generate
|
|
+// for autotuning flow. It also enables/disables the execution of optimization
|
|
+// passes in subsequent compilations (with -fautotune) based on autotuning
|
|
+// methodology and available opportunities.
|
|
+//
|
|
+FunctionPass *
|
|
+createAutoTuningCompileFunctionLegacyPass(std::string Pass = "unknown");
|
|
+ModulePass *
|
|
+createAutoTuningCompileModuleLegacyPass(std::string Pass = "unknown");
|
|
+#endif
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
diff --git a/llvm/include/llvm/Transforms/Scalar/AutoTuningCompile.h b/llvm/include/llvm/Transforms/Scalar/AutoTuningCompile.h
|
|
new file mode 100644
|
|
index 000000000000..2cbb48f336ef
|
|
--- /dev/null
|
|
+++ b/llvm/include/llvm/Transforms/Scalar/AutoTuningCompile.h
|
|
@@ -0,0 +1,170 @@
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+//===---------------- AutoTuningCompile.h - Auto-Tuning -------------------===//
|
|
+//
|
|
+// The LLVM Compiler Infrastructure
|
|
+//
|
|
+// This file is distributed under the University of Illinois Open Source
|
|
+// License. See LICENSE.TXT for details.
|
|
+//
|
|
+// Copyright (C) 2017-2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+//
|
|
+//===----------------------------------------------------------------------===//
|
|
+//
|
|
+/// \file
|
|
+/// This file declares the interface for AutoTuning Incremental Compilation.
|
|
+/// Incremental compilation requires two passes 1) Module Pass and 2) Function
|
|
+/// Pass for legacy pass manager. It requires an additional Loop Pass for new
|
|
+/// pass manager.
|
|
+/// AutoTuningOptPassGate class is also defined here which is used to enable/
|
|
+/// disable the execution of optimization passes for the compilation pipeline.
|
|
+//
|
|
+//===----------------------------------------------------------------------===//
|
|
+
|
|
+#ifndef LLVM_AUTOTUNER_AUTOTUNING_COMPILE_H_
|
|
+#define LLVM_AUTOTUNER_AUTOTUNING_COMPILE_H_
|
|
+
|
|
+#include "llvm/Analysis/LoopAnalysisManager.h"
|
|
+#include "llvm/Analysis/LoopInfo.h"
|
|
+#include "llvm/Analysis/LoopPass.h"
|
|
+#include "llvm/IR/OptBisect.h"
|
|
+#include "llvm/IR/PassManager.h"
|
|
+#include "llvm/Pass.h"
|
|
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
|
|
+
|
|
+namespace llvm {
|
|
+
|
|
+class Pass;
|
|
+
|
|
+// Skips or runs optimization passes.
|
|
+class AutoTuningOptPassGate : public OptPassGate {
|
|
+public:
|
|
+ explicit AutoTuningOptPassGate(bool Skip = false) : Skip(Skip) {}
|
|
+
|
|
+ bool shouldRunPass(const StringRef PassName,
|
|
+ StringRef IRDescription) override;
|
|
+ bool isEnabled() const override { return true; }
|
|
+ bool checkPass(const StringRef PassName, const StringRef TargetDesc);
|
|
+ void setSkip(bool Skip) { this->Skip = Skip; }
|
|
+ bool getSkip() const { return Skip; }
|
|
+
|
|
+private:
|
|
+ bool Skip;
|
|
+};
|
|
+
|
|
+// Returns a static AutoTuningOptPassGate object which will be used to register
|
|
+// CallBack for OptBisect instrumentation.
|
|
+// It will also be used by AutoTuningCompile passes to enable/disable
|
|
+// optimization passes.
|
|
+AutoTuningOptPassGate &getAutoTuningOptPassGate();
|
|
+
|
|
+class AutoTuningCompileModule {
|
|
+public:
|
|
+ explicit AutoTuningCompileModule(std::string Pass = "unknown");
|
|
+ bool run(Module &M);
|
|
+ // Write IR files for each module to be re-used in subsequent compilations
|
|
+ // for autotuning cycles. It only works with -fautotune-generate.
|
|
+ void writeIRFiles(Module &M) const;
|
|
+ // Enable/Disable execution of optimization passes in subsequent compilations
|
|
+ // based on autotuning methodology and available opportunities. It Only works
|
|
+ // with -fautotune
|
|
+ bool modifyCompilationPipeline(Module &M) const;
|
|
+
|
|
+ static void setSkipCompilation(bool Option) { SkipCompilation = Option; }
|
|
+ static bool getSkipCompilation() { return SkipCompilation; }
|
|
+
|
|
+private:
|
|
+ static bool SkipCompilation;
|
|
+ std::string Pass = "";
|
|
+};
|
|
+
|
|
+class AutoTuningCompileModuleLegacy : public ModulePass {
|
|
+public:
|
|
+ static char ID;
|
|
+ explicit AutoTuningCompileModuleLegacy(std::string Pass = "unknown");
|
|
+ bool runOnModule(Module &M) override;
|
|
+ StringRef getPassName() const override;
|
|
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
+ AU.setPreservesAll();
|
|
+ }
|
|
+
|
|
+private:
|
|
+ std::string Pass = "";
|
|
+};
|
|
+
|
|
+class AutoTuningCompileModulePass
|
|
+ : public PassInfoMixin<AutoTuningCompileModulePass> {
|
|
+public:
|
|
+ explicit AutoTuningCompileModulePass(std::string Pass = "unknown")
|
|
+ : Pass(Pass){};
|
|
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
|
|
+
|
|
+private:
|
|
+ std::string Pass = "";
|
|
+};
|
|
+
|
|
+class AutoTuningCompileFunction {
|
|
+public:
|
|
+ explicit AutoTuningCompileFunction(std::string Pass = "unknown");
|
|
+ bool run(Function &F);
|
|
+ // Write IR files for each module to be re-used in subsequent compilations
|
|
+ // for autotuning cycles. It only works with -fautotune-generate.
|
|
+ void writeIRFiles(Module &M);
|
|
+ // Enable/Disable execution of optimization passes in subsequent compilations
|
|
+ // based on autotuning methodology and available opportunities. It Only works
|
|
+ // with -fautotune
|
|
+ bool modifyCompilationPipeline(Function &F);
|
|
+
|
|
+private:
|
|
+ // A module may have multiple functions; decision to enable/disable
|
|
+ // execution of an optimization pass will be made for the first function and
|
|
+ // will be used for all of the functions in the module.
|
|
+ // 'SkipDecision' will be set once the decision is made for a specific 'Pass'.
|
|
+ bool SkipDecision = false;
|
|
+
|
|
+ // A module may have multiple functions; IR file will be written once for the
|
|
+ // entire module for a specific 'Pass'.
|
|
+ bool IsModuleWritten = false;
|
|
+ std::string Pass = "";
|
|
+};
|
|
+
|
|
+class AutoTuningCompileFunctionLegacy : public FunctionPass {
|
|
+public:
|
|
+ static char ID;
|
|
+ explicit AutoTuningCompileFunctionLegacy(std::string Pass = "unknown");
|
|
+ bool runOnFunction(Function &F) override;
|
|
+ StringRef getPassName() const override;
|
|
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
+ AU.setPreservesAll();
|
|
+ }
|
|
+
|
|
+private:
|
|
+ std::string Pass = "";
|
|
+};
|
|
+
|
|
+class AutoTuningCompileFunctionPass
|
|
+ : public PassInfoMixin<AutoTuningCompileFunctionPass> {
|
|
+public:
|
|
+ explicit AutoTuningCompileFunctionPass(std::string Pass = "unknown")
|
|
+ : Pass(Pass){};
|
|
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
|
|
+
|
|
+private:
|
|
+ std::string Pass = "";
|
|
+};
|
|
+
|
|
+class AutoTuningCompileLoopPass
|
|
+ : public PassInfoMixin<AutoTuningCompileLoopPass> {
|
|
+public:
|
|
+ explicit AutoTuningCompileLoopPass(std::string Pass = "unknown")
|
|
+ : Pass(Pass){};
|
|
+ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
|
|
+ LoopStandardAnalysisResults &AR, LPMUpdater &U);
|
|
+
|
|
+private:
|
|
+ std::string Pass = "";
|
|
+};
|
|
+
|
|
+} // end namespace llvm
|
|
+
|
|
+#endif /* LLVM_AUTOTUNER_AUTOTUNING_COMPILE_H_ */
|
|
+#endif
|
|
diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
|
|
index 4f3010965b59..e1cccf417898 100644
|
|
--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
|
|
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
|
|
@@ -108,7 +108,11 @@ bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
|
|
unsigned TripMultiple, unsigned LoopSize,
|
|
TargetTransformInfo::UnrollingPreferences &UP,
|
|
TargetTransformInfo::PeelingPreferences &PP,
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ bool &UseUpperBound, unsigned int Invocation = 0);
|
|
+#else
|
|
bool &UseUpperBound);
|
|
+#endif
|
|
|
|
void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
|
|
ScalarEvolution *SE, DominatorTree *DT,
|
|
diff --git a/llvm/lib/Analysis/AutotuningDump.cpp b/llvm/lib/Analysis/AutotuningDump.cpp
|
|
new file mode 100644
|
|
index 000000000000..81b2bbead70e
|
|
--- /dev/null
|
|
+++ b/llvm/lib/Analysis/AutotuningDump.cpp
|
|
@@ -0,0 +1,265 @@
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+// ===-- AutotuningDump.cpp - Auto-Tuning---------------------------------===//
|
|
+// The LLVM Compiler Infrastructure
|
|
+//
|
|
+// This file is distributed under the University of Illinois Open Source
|
|
+// License. See LICENSE.TXT for details.
|
|
+//
|
|
+// ===--------------------------------------------------------------------===//
|
|
+//
|
|
+// This file contains pass collecting IR of tuned regions and storing them into
|
|
+// predetrmined locations, to be used later by autotuning ML guidance
|
|
+//
|
|
+// ===--------------------------------------------------------------------===//
|
|
+#include "llvm/Analysis/AutotuningDump.h"
|
|
+#include "llvm/Analysis/Passes.h"
|
|
+#include "llvm/AutoTuner/AutoTuning.h"
|
|
+#include "llvm/IR/LegacyPassManager.h"
|
|
+#include "llvm/InitializePasses.h"
|
|
+#include "llvm/Pass.h"
|
|
+#include "llvm/Support/CommandLine.h"
|
|
+#include "llvm/Support/Path.h"
|
|
+#include "llvm/Support/Process.h"
|
|
+#include "llvm/Support/raw_ostream.h"
|
|
+#include <sys/stat.h>
|
|
+
|
|
+using namespace llvm;
|
|
+
|
|
+#define DEBUG_TYPE "autotuning-dump"
|
|
+
|
|
+enum AutotuningDumpOpt { whole_modules, functions, loops };
|
|
+
|
|
+// Enable Debug Options to be specified on the command line
|
|
+cl::opt<AutotuningDumpOpt> AutotuningDumpMode(
|
|
+ "autotuning-dump-mode", cl::desc("Choose autotuning dump mode:"),
|
|
+ cl::init(whole_modules),
|
|
+ cl::values(clEnumVal(whole_modules, "dump each module in its own file"),
|
|
+ clEnumVal(functions, "dump each function in its own file"),
|
|
+ clEnumVal(loops, "dump each loop in its own file")));
|
|
+
|
|
+AutotuningDump::AutotuningDump(bool IncrementalCompilation) {
|
|
+ // Check if the environment variable AUTOTUNE_DATADIR is set.
|
|
+ IsIncrementalCompilation = IncrementalCompilation;
|
|
+ AutoTuneDirPath = "autotune_datadir";
|
|
+ if (std::optional<std::string> MaybePath =
|
|
+ llvm::sys::Process::GetEnv("AUTOTUNE_DATADIR"))
|
|
+ AutoTuneDirPath = *MaybePath;
|
|
+}
|
|
+
|
|
+int AutotuningDump::getConfigNumber() {
|
|
+ auto ConfigNumOrErr = autotuning::Engine.getConfigNumber();
|
|
+ if (ConfigNumOrErr)
|
|
+ return *ConfigNumOrErr;
|
|
+ else {
|
|
+ report_fatal_error("Invalid/missing Autotuner configuration ID");
|
|
+ return -1;
|
|
+ }
|
|
+}
|
|
+
|
|
+void AutotuningDump::dumpToStream(llvm::raw_ostream &os, const Loop &L) const {
|
|
+ L.print(os);
|
|
+}
|
|
+
|
|
+void AutotuningDump::dumpToStream(llvm::raw_ostream &os,
|
|
+ const Function &F) const {
|
|
+ F.print(os, /*AAW*/ nullptr, /*ShouldPreserveUseListOrder*/ false,
|
|
+ /*IsForDebug*/ false, /*PrintCompleteIR*/ true);
|
|
+}
|
|
+
|
|
+// Create appropriate file. File will contains AbsolutePath/FileName.
|
|
+std::unique_ptr<raw_ostream> AutotuningDump::createFile(const Twine &File) {
|
|
+ std::error_code EC;
|
|
+ return std::make_unique<raw_fd_ostream>((File).str(), EC,
|
|
+ sys::fs::CD_CreateAlways,
|
|
+ sys::fs::FA_Write, sys::fs::OF_None);
|
|
+}
|
|
+
|
|
+std::string AutotuningDump::getDirectoryName(const std::string File) const {
|
|
+ std::string DirectoryName = AutoTuneDirPath;
|
|
+ if (!autotuning::Engine.isMLEnabled())
|
|
+ DirectoryName += "/IR_files";
|
|
+
|
|
+ DirectoryName = DirectoryName + "/" + File + "/";
|
|
+
|
|
+ // Create directory if not already present.
|
|
+ if (std::error_code EC = sys::fs::create_directories(DirectoryName))
|
|
+ errs() << "could not create directory: " << DirectoryName << ": "
|
|
+ << EC.message();
|
|
+
|
|
+ return DirectoryName;
|
|
+}
|
|
+
|
|
+std::string AutotuningDump::getFileName(std::string FilePath) {
|
|
+ if (autotuning::Engine.isMLEnabled())
|
|
+ return std::to_string(this->getConfigNumber()) + ".ll";
|
|
+ std::replace(FilePath.begin(), FilePath.end(), '/', '_');
|
|
+ return FilePath + ".ll";
|
|
+}
|
|
+
|
|
+void AutotuningDump::dumpModule(Module &M) {
|
|
+ std::unique_ptr<raw_ostream> fptr;
|
|
+ LLVM_DEBUG(dbgs() << "AutotuningDump: Dump module IR files.\n");
|
|
+ if (IsIncrementalCompilation) {
|
|
+ std::string Filename = M.getSourceFileName();
|
|
+ llvm::SmallString<128> FilenameVec = StringRef(Filename);
|
|
+ llvm::sys::fs::make_absolute(FilenameVec);
|
|
+ size_t Pos = FilenameVec.rfind(".");
|
|
+ if (Pos != std::string::npos) {
|
|
+ FilenameVec.pop_back_n(FilenameVec.size() - Pos);
|
|
+ FilenameVec.append(".ll");
|
|
+ }
|
|
+ fptr = createFile(FilenameVec);
|
|
+ } else {
|
|
+ std::string File = llvm::sys::path::filename(M.getName()).str();
|
|
+ std::string DirectoryName = getDirectoryName(File);
|
|
+ std::string FileName = getFileName(M.getName().str());
|
|
+ fptr = createFile(DirectoryName + FileName);
|
|
+ }
|
|
+
|
|
+ M.print(*fptr, nullptr, true, false);
|
|
+}
|
|
+
|
|
+void AutotuningDump::dumpFunctions(Module &M) {
|
|
+ std::string FilePath = M.getName().str();
|
|
+ std::replace(FilePath.begin(), FilePath.end(), '/', '_');
|
|
+ std::string DirectoryName = getDirectoryName(FilePath);
|
|
+ for (Function &F : M.getFunctionList()) { // go through all functions
|
|
+ if (F.isDeclaration() || F.empty())
|
|
+ continue;
|
|
+
|
|
+ AutoTuningEnabledFunction *AutotuneFunc = &F.getATEFunction();
|
|
+ assert(AutotuneFunc);
|
|
+ autotuning::Engine.initContainer(AutotuneFunc, "autotuning-dump",
|
|
+ F.getName(), false);
|
|
+ std::string FuncName = F.getName().str();
|
|
+ // check the whole function
|
|
+ if (AutotuneFunc->requiresIRDump(true)) {
|
|
+ auto fptr = createFile(DirectoryName + Twine(FuncName) + ".ll");
|
|
+ this->dumpToStream(*fptr, F);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+void AutotuningDump::dumpLoops(Module &M,
|
|
+ function_ref<LoopInfo &(Function &)> GetLI) {
|
|
+ for (Function &F : M) {
|
|
+ // Nothing to do for declarations.
|
|
+ if (F.isDeclaration() || F.empty())
|
|
+ continue;
|
|
+
|
|
+ LoopInfo &LI = GetLI(F);
|
|
+ for (auto &L : LI.getLoopsInPreorder()) {
|
|
+ Function *Func = nullptr;
|
|
+ StringRef FuncName = "";
|
|
+ if (!L->isInvalid())
|
|
+ Func = L->getHeader()->getParent();
|
|
+ if (Func)
|
|
+ FuncName = Func->getName();
|
|
+
|
|
+ autotuning::Engine.initContainer(L, "autotuning-dump", FuncName, false);
|
|
+ if (L->requiresIRDump()) {
|
|
+ std::string FuncName = L->getCodeRegion().getFuncName();
|
|
+ unsigned SourceLine = L->getCodeRegion().getSourceLoc().SourceLine;
|
|
+ std::string DirectoryName = AutoTuneDirPath + "/" +
|
|
+ llvm::sys::path::filename(FuncName).str() +
|
|
+ "_loop_" + std::to_string(SourceLine);
|
|
+ std::string FileName = std::to_string(this->getConfigNumber()) + ".ll";
|
|
+ auto fptr = createFile(DirectoryName + "/" + FileName);
|
|
+ this->dumpToStream(*fptr, *L);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+bool AutotuningDump::run(Module &M,
|
|
+ function_ref<LoopInfo &(Function &)> GetLI) {
|
|
+ // Change to absolute path.
|
|
+ SmallString<256> OutputPath = StringRef(AutoTuneDirPath);
|
|
+ sys::fs::make_absolute(OutputPath);
|
|
+
|
|
+ // Creating new output directory, if it does not exists.
|
|
+ if (std::error_code EC = sys::fs::create_directories(OutputPath)) {
|
|
+ llvm::errs() << (make_error<StringError>(
|
|
+ "could not create directory: " + Twine(OutputPath) + ": " +
|
|
+ EC.message(),
|
|
+ EC));
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ if (IsIncrementalCompilation) {
|
|
+ LLVM_DEBUG(
|
|
+ dbgs()
|
|
+ << "AutotuningDump: IR files writing for incremental compilation.\n");
|
|
+ dumpModule(M);
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ switch (AutotuningDumpMode) {
|
|
+ case whole_modules:
|
|
+ dumpModule(M);
|
|
+ break;
|
|
+ case functions:
|
|
+ dumpFunctions(M);
|
|
+ break;
|
|
+ case loops:
|
|
+ dumpLoops(M, GetLI);
|
|
+ }
|
|
+
|
|
+ return false;
|
|
+}
|
|
+
|
|
+AutotuningDumpLegacy::AutotuningDumpLegacy(bool IncrementalCompilation)
|
|
+ : ModulePass(AutotuningDumpLegacy::ID) {
|
|
+ IsIncrementalCompilation = IncrementalCompilation;
|
|
+ initializeAutotuningDumpLegacyPass(*PassRegistry::getPassRegistry());
|
|
+}
|
|
+
|
|
+bool AutotuningDumpLegacy::runOnModule(Module &M) {
|
|
+ if (!autotuning::Engine.isDumpEnabled())
|
|
+ return false;
|
|
+
|
|
+ auto GetLI = [this](Function &F) -> LoopInfo & {
|
|
+ return getAnalysis<LoopInfoWrapperPass>(F).getLoopInfo();
|
|
+ };
|
|
+
|
|
+ AutotuningDump Impl(IsIncrementalCompilation);
|
|
+ return Impl.run(M, GetLI);
|
|
+}
|
|
+
|
|
+StringRef AutotuningDumpLegacy::getPassName() const {
|
|
+ return "Autotuning Dump";
|
|
+}
|
|
+
|
|
+void AutotuningDumpLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
|
|
+ AU.setPreservesAll();
|
|
+ AU.addRequired<LoopInfoWrapperPass>();
|
|
+}
|
|
+
|
|
+char AutotuningDumpLegacy::ID = 0;
|
|
+INITIALIZE_PASS_BEGIN(AutotuningDumpLegacy, "autotuning-dump",
|
|
+ "Dump IR for Autotuned Code Regions", false, false)
|
|
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
|
|
+INITIALIZE_PASS_END(AutotuningDumpLegacy, "autotuning-dump",
|
|
+ "Dump IR for Autotuned Code Regions", false, false)
|
|
+
|
|
+ModulePass *llvm::createAutotuningDumpPass() {
|
|
+ return new AutotuningDumpLegacy();
|
|
+}
|
|
+
|
|
+AnalysisKey AutotuningDumpAnalysis::Key;
|
|
+
|
|
+AutotuningDumpAnalysis::Result
|
|
+AutotuningDumpAnalysis::run(Module &M, ModuleAnalysisManager &AM) {
|
|
+ if (!autotuning::Engine.isDumpEnabled())
|
|
+ return false;
|
|
+
|
|
+ auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
|
|
+ auto GetLI = [&FAM](Function &F) -> LoopInfo & {
|
|
+ return FAM.getResult<LoopAnalysis>(F);
|
|
+ };
|
|
+
|
|
+ AutotuningDump Impl(IsIncrementalCompilation);
|
|
+ Impl.run(M, GetLI);
|
|
+ return false;
|
|
+}
|
|
+#endif
|
|
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
|
|
index 4a1797c42789..9c6a70f0221f 100644
|
|
--- a/llvm/lib/Analysis/CMakeLists.txt
|
|
+++ b/llvm/lib/Analysis/CMakeLists.txt
|
|
@@ -30,6 +30,7 @@ add_llvm_component_library(LLVMAnalysis
|
|
Analysis.cpp
|
|
AssumeBundleQueries.cpp
|
|
AssumptionCache.cpp
|
|
+ AutotuningDump.cpp
|
|
BasicAliasAnalysis.cpp
|
|
BlockFrequencyInfo.cpp
|
|
BlockFrequencyInfoImpl.cpp
|
|
@@ -153,6 +154,7 @@ add_llvm_component_library(LLVMAnalysis
|
|
${MLLinkDeps}
|
|
|
|
LINK_COMPONENTS
|
|
+ AutoTuner
|
|
BinaryFormat
|
|
Core
|
|
Object
|
|
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
|
|
index e2480d51d372..f6b3c14a0345 100644
|
|
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
|
|
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
|
|
@@ -383,15 +383,27 @@ llvm::shouldInline(CallBase &CB,
|
|
Function *Callee = CB.getCalledFunction();
|
|
Function *Caller = CB.getCaller();
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ // Get the code Region to add BaselineConfig values for inline
|
|
+ const autotuning::CodeRegion &CR = CB.ATECallSite.get()->getCodeRegion();
|
|
+ static const std::string ForceInlineParamStr = "ForceInline";
|
|
+#endif
|
|
+
|
|
if (IC.isAlways()) {
|
|
LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC)
|
|
<< ", Call: " << CB << "\n");
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ autotuning::Engine.addOpportunity(CR, {{ForceInlineParamStr, "1"}});
|
|
+#endif
|
|
return IC;
|
|
}
|
|
|
|
if (!IC) {
|
|
LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC)
|
|
<< ", Call: " << CB << "\n");
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ autotuning::Engine.addOpportunity(CR, {{ForceInlineParamStr, "0"}});
|
|
+#endif
|
|
if (IC.isNever()) {
|
|
ORE.emit([&]() {
|
|
return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
|
|
@@ -417,6 +429,9 @@ llvm::shouldInline(CallBase &CB,
|
|
LLVM_DEBUG(dbgs() << " NOT Inlining: " << CB
|
|
<< " Cost = " << IC.getCost()
|
|
<< ", outer Cost = " << TotalSecondaryCost << '\n');
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ autotuning::Engine.addOpportunity(CR, {{ForceInlineParamStr, "0"}});
|
|
+#endif
|
|
ORE.emit([&]() {
|
|
return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
|
|
Call)
|
|
@@ -430,6 +445,9 @@ llvm::shouldInline(CallBase &CB,
|
|
|
|
LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) << ", Call: " << CB
|
|
<< '\n');
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ autotuning::Engine.addOpportunity(CR, {{ForceInlineParamStr, "1"}});
|
|
+#endif
|
|
return IC;
|
|
}
|
|
|
|
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
|
|
index a2f46edcf5ef..9f8f57865de2 100644
|
|
--- a/llvm/lib/Analysis/InlineCost.cpp
|
|
+++ b/llvm/lib/Analysis/InlineCost.cpp
|
|
@@ -162,6 +162,14 @@ static cl::opt<bool> DisableGEPConstOperand(
|
|
"disable-gep-const-evaluation", cl::Hidden, cl::init(false),
|
|
cl::desc("Disables evaluation of GetElementPtr with constant operands"));
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+static cl::opt<bool>
|
|
+ EnableLocalCallSiteTuning("auto-tuning-enable-local-callsite-tuning",
|
|
+ cl::init(false), cl::Hidden,
|
|
+ cl::desc("Enable AutoTuning for local callsites "
|
|
+ "as well."));
|
|
+#endif
|
|
+
|
|
namespace llvm {
|
|
std::optional<int> getStringFnAttrAsInt(const Attribute &Attr) {
|
|
if (Attr.isValid()) {
|
|
@@ -2990,6 +2998,27 @@ InlineCost llvm::getInlineCost(
|
|
return llvm::InlineCost::getNever(UserDecision->getFailureReason());
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (autotuning::Engine.isEnabled() && Call.getCaller() &&
|
|
+ (!Callee->hasLocalLinkage() || EnableLocalCallSiteTuning)) {
|
|
+ bool ForceInline = false;
|
|
+ bool Found = false;
|
|
+
|
|
+ autotuning::Engine.initContainer(Call.ATECallSite.get(), "inline",
|
|
+ Call.getCaller()->getName(),
|
|
+ /* addOpportunity */ false);
|
|
+
|
|
+ Found = Call.ATECallSite->lookUpParams<bool>("ForceInline", ForceInline);
|
|
+
|
|
+ if (Found) {
|
|
+ if (ForceInline)
|
|
+ return llvm::InlineCost::getAlways("Force inlined by auto-tuning");
|
|
+ else
|
|
+ return llvm::InlineCost::getNever("Force non-inlined by auto-tuning");
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+
|
|
LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
|
|
<< "... (caller:" << Call.getCaller()->getName()
|
|
<< ")\n");
|
|
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
|
|
index 60a72079e864..36aca73ee675 100644
|
|
--- a/llvm/lib/Analysis/LoopInfo.cpp
|
|
+++ b/llvm/lib/Analysis/LoopInfo.cpp
|
|
@@ -37,6 +37,10 @@
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/GenericLoopInfoImpl.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/AutoTuner/AutoTuning.h"
|
|
+#include "llvm/IR/StructuralHash.h"
|
|
+#endif
|
|
using namespace llvm;
|
|
|
|
// Explicitly instantiate methods in LoopInfoImpl.h for IR-level Loops.
|
|
@@ -663,6 +667,54 @@ Loop::LocRange Loop::getLocRange() const {
|
|
return LocRange();
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+uint64_t Loop::computeStructuralHash() {
|
|
+ std::vector<BasicBlock *> BBs = getBlocks();
|
|
+ return StructuralHash(BBs);
|
|
+}
|
|
+
|
|
+void Loop::initCodeRegion() {
|
|
+ std::string LoopName;
|
|
+ // use the header's name as the loop name
|
|
+ if (BasicBlock *Header = getHeader()) {
|
|
+ if (Header->hasName()) {
|
|
+ LoopName = Header->getName().str();
|
|
+ }
|
|
+ // if the header doesn't have a name,
|
|
+ // use the label of this header from AsmWriter
|
|
+ else {
|
|
+ std::string Str;
|
|
+ llvm::raw_string_ostream RSO(Str);
|
|
+ Header->printAsOperand(RSO);
|
|
+ LoopName = RSO.str();
|
|
+ }
|
|
+ } else {
|
|
+ LoopName = "<unnamed loop>";
|
|
+ }
|
|
+
|
|
+ Function *F = this->getHeader()->getParent();
|
|
+ StringRef FuncName = F->getName();
|
|
+
|
|
+ // init the CodeRegion
|
|
+ autotuning::CodeRegion CR = autotuning::CodeRegion(
|
|
+ LoopName, FuncName.data(), autotuning::CodeRegionType::Loop,
|
|
+ this->getStartLoc());
|
|
+ // Compute the number of non-debug IR instructions in this loop.
|
|
+ unsigned TotalNumInstrs = 0;
|
|
+ for (const BasicBlock *BB : this->getBlocks()) {
|
|
+ unsigned NumInstrs = std::distance(BB->instructionsWithoutDebug().begin(),
|
|
+ BB->instructionsWithoutDebug().end());
|
|
+ TotalNumInstrs += NumInstrs;
|
|
+ }
|
|
+ CR.setSize(TotalNumInstrs);
|
|
+ // Compute hotness.
|
|
+ autotuning::HotnessType Hotness = F->ATEFunction.getHotness();
|
|
+ CR.setHotness(Hotness);
|
|
+
|
|
+ this->setCodeRegion(CR);
|
|
+}
|
|
+#endif
|
|
+
|
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
|
LLVM_DUMP_METHOD void Loop::dump() const { print(dbgs()); }
|
|
|
|
diff --git a/llvm/lib/AutoTuner/AutoTuning.cpp b/llvm/lib/AutoTuner/AutoTuning.cpp
|
|
new file mode 100644
|
|
index 000000000000..1f09f06d84a2
|
|
--- /dev/null
|
|
+++ b/llvm/lib/AutoTuner/AutoTuning.cpp
|
|
@@ -0,0 +1,705 @@
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+//===-- AutoTuning.cpp - Auto-Tuning --------------------------------------===//
|
|
+// The LLVM Compiler Infrastructure
|
|
+//
|
|
+// This file is distributed under the University of Illinois Open Source
|
|
+// License. See LICENSE.TXT for details.
|
|
+//
|
|
+//===----------------------------------------------------------------------===//
|
|
+//
|
|
+// This file defines Auto Tuning related functions, models and interfaces.
|
|
+//
|
|
+//===----------------------------------------------------------------------===//
|
|
+
|
|
+#include "llvm/AutoTuner/AutoTuning.h"
|
|
+#include "llvm/ADT/STLExtras.h"
|
|
+#include "llvm/ADT/StringRef.h"
|
|
+#include "llvm/AutoTuner/AutoTuningRemarkManager.h"
|
|
+#include "llvm/Support/CommandLine.h"
|
|
+#include "llvm/Support/Error.h"
|
|
+#include "llvm/Support/Process.h"
|
|
+
|
|
+// Enable debug messages for AutoTuning.
|
|
+#define DEBUG_TYPE "autotuning"
|
|
+
|
|
+using namespace llvm;
|
|
+
|
|
+// defined in 'lib/Remarks/YAMLRemarkParser.cpp'.
|
|
+extern cl::opt<bool> OmitAutotuningMetadata;
|
|
+
|
|
+// -auto-tuning-input - Command line option to specify the input file.
|
|
+static cl::opt<std::string> InputFile("auto-tuning-input", cl::Hidden,
|
|
+ cl::desc("Specify the input file"));
|
|
+
|
|
+// -auto-tuning-opp - Command line option to specify the output directory of
|
|
+// tuning opportunities.
|
|
+static cl::opt<std::string> OutputOppDir(
|
|
+ "auto-tuning-opp", cl::Hidden,
|
|
+ cl::desc("Specify the output directory of tuning opportunities"));
|
|
+
|
|
+static cl::opt<std::string>
|
|
+ RemarksPasses("auto-tuning-pass-filter", cl::Hidden,
|
|
+ cl::desc("Only dump auto-tuning remarks from passes whose "
|
|
+ "names match the given regular expression"),
|
|
+ cl::value_desc("regex"));
|
|
+
|
|
+static cl::opt<std::string>
|
|
+ ProjectDir("autotuning-project-dir", cl::Hidden, cl::init(""),
|
|
+ cl::desc("Specify project base dir to make code region name "
|
|
+ "relative to base dir. This operation will only be "
|
|
+ "applied for coarse-grain code regions."));
|
|
+
|
|
+// -auto-tuning-config-id - Command line option to specify the config number
|
|
+// being used for compilation. Required only for ML guidance feature.
|
|
+static cl::opt<int> CFGNumber(
|
|
+ "auto-tuning-config-id", cl::Hidden,
|
|
+ cl::desc(
|
|
+ "Specify the auto-tuning configuration ID used in this compilation."));
|
|
+
|
|
+static cl::opt<std::string> OutputFormat(
|
|
+ "auto-tuning-remark-format", cl::Hidden,
|
|
+ cl::desc("The format used for auto-tuning remarks (default: YAML)"),
|
|
+ cl::value_desc("format"), cl::init("yaml"));
|
|
+
|
|
+// AutoTuner incremental compilation options.
|
|
+cl::opt<AutoTuningCompileOpt> AutoTuningCompileMode(
|
|
+ "auto-tuning-compile-mode", cl::Hidden, cl::init(Inactive),
|
|
+ cl::desc("AutoTuner: Choose incremental compilation mode."),
|
|
+ cl::values(clEnumVal(Inactive,
|
|
+ "AutoTuner: Disable incremental compilation."),
|
|
+ clEnumVal(CoarseGrain, "AutoTuner: Enable incremental "
|
|
+ "compilation for coarse grain tuning."),
|
|
+ clEnumVal(FineGrain, "AutoTuner: Enable incremental compilation "
|
|
+ "for fine grain tuning."),
|
|
+ clEnumVal(Basic, "AutoTuner: Enable incremental compilation for "
|
|
+ "any kind of code region.")));
|
|
+
|
|
+static cl::opt<bool>
|
|
+ EnableAutoTuningDump("enable-autotuning-dump", cl::Hidden, cl::init(false),
|
|
+ cl::desc("Enable AutoTuningDump Pass"));
|
|
+
|
|
+static cl::opt<bool>
|
|
+ ThinLTOTuning("autotuning-thin-lto", cl::Hidden, cl::init(false),
|
|
+ cl::desc("AutoTuner enabled in ThinLTO mode."));
|
|
+
|
|
+namespace autotuning {
|
|
+
|
|
+static cl::list<CodeRegionType> AutotuningOutputFilter(
|
|
+ "auto-tuning-type-filter", cl::Hidden, cl::CommaSeparated,
|
|
+ cl::desc(
|
|
+ "Select types of code regions to dump auto-tuning opportunities for:"),
|
|
+ cl::values(clEnumVal(LLVMParam, "LLVMParam code regions only"),
|
|
+ clEnumVal(ProgramParam, "ProgramParam code regions only"),
|
|
+ clEnumVal(CallSite, "CallSite code regions only"),
|
|
+ clEnumVal(Function, "Function code regions only"),
|
|
+ clEnumVal(Loop, "Loop code regions only"),
|
|
+ clEnumVal(MachineBasicBlock,
|
|
+ "Machine basic block code regions only"),
|
|
+ clEnumVal(Switch, "Switch code regions only"),
|
|
+ clEnumVal(Other, "All other types of code regions")));
|
|
+
|
|
+static cl::list<std::string> AutotuningFunctionFilter(
|
|
+ "auto-tuning-function-filter", cl::Hidden, cl::CommaSeparated,
|
|
+ cl::desc("Apply code region filtering based on function names"));
|
|
+
|
|
+static const cl::opt<bool> ExcludeColdCodeRegion(
|
|
+ "auto-tuning-exclude-cold", cl::Hidden, cl::init(true),
|
|
+ cl::desc("Use profile data to prune cold code regions from auto-tuning"));
|
|
+
|
|
+static const cl::opt<bool> CodeRegionMatchingWithHash(
|
|
+ "auto-tuning-code-region-matching-hash", cl::Hidden, cl::init(true),
|
|
+ cl::desc("Use IR hashing to match the Code Regions"));
|
|
+
|
|
+static const cl::opt<bool> HotCodeRegionOnly(
|
|
+ "auto-tuning-hot-only", cl::Hidden, cl::init(false),
|
|
+ cl::desc(
|
|
+ "Use profile data to include hot code regions only from auto-tuning"));
|
|
+
|
|
+static const cl::opt<unsigned>
|
|
+ SizeThreshold("auto-tuning-size-threshold", cl::Hidden, cl::init(0),
|
|
+ cl::desc("Prune small code regions from auto-tuning with a "
|
|
+ "size smaller than the threshold"));
|
|
+
|
|
+static inline const std::string generateName(const std::string &Name) {
|
|
+ if (Name.empty())
|
|
+ return "unnamed";
|
|
+ else
|
|
+ return Name;
|
|
+}
|
|
+
|
|
+//===----------------------------------------------------------------------===//
|
|
+// CodeRegion implementation
|
|
+CodeRegion::CodeRegion(const CodeRegionType Type) : Type(Type) {}
|
|
+
|
|
+CodeRegion::CodeRegion(const std::string &Name, const std::string &FuncName,
|
|
+ const CodeRegionType &Type, const DebugLoc &DL,
|
|
+ const DynamicOptions DO) {
|
|
+ this->Name = generateName(Name);
|
|
+ this->FuncName = generateName(FuncName);
|
|
+ this->Type = Type;
|
|
+ this->StringType = getTypeAsString(Type);
|
|
+ if (DL) {
|
|
+ StringRef File = DL->getFilename();
|
|
+ unsigned Line = DL->getLine();
|
|
+ unsigned Col = DL->getColumn();
|
|
+ this->Location = SourceLocation{File.str(), Line, Col};
|
|
+ }
|
|
+ this->AutoTunerOptions = DO;
|
|
+}
|
|
+
|
|
+CodeRegion::CodeRegion(const std::string &Name, const std::string &FuncName,
|
|
+ const CodeRegionType &Type,
|
|
+ const SourceLocation &Location,
|
|
+ const DynamicOptions DO) {
|
|
+ this->Name = generateName(Name);
|
|
+ this->FuncName = generateName(FuncName);
|
|
+ this->Type = Type;
|
|
+ this->StringType = getTypeAsString(Type);
|
|
+ this->Location = Location;
|
|
+ this->AutoTunerOptions = DO;
|
|
+}
|
|
+
|
|
+CodeRegion::CodeRegion(const std::string &Name, const std::string &FuncName,
|
|
+ const std::string &PassName, const CodeRegionType &Type,
|
|
+ const SourceLocation &Location,
|
|
+ const unsigned int Invocation)
|
|
+ : CodeRegion(Name, FuncName, Type, Location) {
|
|
+ this->PassName = generateName(PassName);
|
|
+ this->Invocation = Invocation;
|
|
+}
|
|
+
|
|
+bool CodeRegion::operator==(const CodeRegion &CodeRegion) const {
|
|
+ bool IsEqual = false;
|
|
+ if (OmitAutotuningMetadata)
|
|
+ IsEqual = (this->getHash() == CodeRegion.getHash()) &&
|
|
+ (this->Type == CodeRegion.getType()) &&
|
|
+ (this->PassName == CodeRegion.getPassName());
|
|
+ else {
|
|
+ IsEqual = (this->Type == CodeRegion.getType()) &&
|
|
+ (this->Name == CodeRegion.getName()) &&
|
|
+ (this->PassName == CodeRegion.getPassName()) &&
|
|
+ (this->FuncName == CodeRegion.getFuncName()) &&
|
|
+ (this->Location == CodeRegion.getSourceLoc());
|
|
+ if (CodeRegionMatchingWithHash)
|
|
+ IsEqual = IsEqual && (this->getHash() == CodeRegion.getHash());
|
|
+ }
|
|
+
|
|
+ if (autotuning::Engine.ParseInput)
|
|
+ IsEqual = IsEqual && this->getInvocation() == CodeRegion.getInvocation();
|
|
+
|
|
+ if (autotuning::Engine.GenerateOutput)
|
|
+ IsEqual =
|
|
+ IsEqual && this->getBaselineConfig() == CodeRegion.getBaselineConfig();
|
|
+
|
|
+ return IsEqual;
|
|
+}
|
|
+
|
|
+std::string CodeRegion::getTypeAsString(CodeRegionType CRType) {
|
|
+ switch (CRType) {
|
|
+ case autotuning::CodeRegionType::MachineBasicBlock:
|
|
+ return "machine_basic_block";
|
|
+ case autotuning::CodeRegionType::Loop:
|
|
+ return "loop";
|
|
+ case autotuning::CodeRegionType::Function:
|
|
+ return "function";
|
|
+ case autotuning::CodeRegionType::CallSite:
|
|
+ return "callsite";
|
|
+ case autotuning::CodeRegionType::LLVMParam:
|
|
+ return "llvm-param";
|
|
+ case autotuning::CodeRegionType::ProgramParam:
|
|
+ return "program-param";
|
|
+ case autotuning::CodeRegionType::Switch:
|
|
+ return "switch";
|
|
+ default:
|
|
+ return "other";
|
|
+ }
|
|
+}
|
|
+
|
|
+std::string CodeRegion::getHotnessAsString(HotnessType Hotness) {
|
|
+ switch (Hotness) {
|
|
+ case autotuning::HotnessType::Cold:
|
|
+ return "cold";
|
|
+ case autotuning::HotnessType::Hot:
|
|
+ return "hot";
|
|
+ default:
|
|
+ return "unknown";
|
|
+ }
|
|
+}
|
|
+
|
|
+void CodeRegion::setPassName(const std::string &NewPassName) {
|
|
+ this->PassName = generateName(NewPassName);
|
|
+}
|
|
+
|
|
+/* static */
|
|
+autotuning::CodeRegion CodeRegion::getInvalidInstance() {
|
|
+ static autotuning::CodeRegion Invalid =
|
|
+ CodeRegion(autotuning::CodeRegionType::Invalid);
|
|
+ return Invalid;
|
|
+}
|
|
+
|
|
+/* static */
|
|
+autotuning::CodeRegion CodeRegion::getEmptyInstance() {
|
|
+ static autotuning::CodeRegion Empty =
|
|
+ CodeRegion(autotuning::CodeRegionType::Empty);
|
|
+ return Empty;
|
|
+}
|
|
+
|
|
+//===----------------------------------------------------------------------===//
|
|
+// Container implementation
|
|
+//
|
|
+
|
|
+const CodeRegion &Container::getCodeRegion() const { return CR; }
|
|
+
|
|
+void Container::setCodeRegion(const CodeRegion &NewCR) { this->CR = NewCR; }
|
|
+
|
|
+template <typename T>
|
|
+bool Container::lookUpParams(const std::string &ParamsName, T &Value) const {
|
|
+ bool Found = false;
|
|
+ auto ConfigMapIterator = Engine.ParamTable.find(CR);
|
|
+ if (ConfigMapIterator != Engine.ParamTable.end()) {
|
|
+ ParameterManager InputParams = ConfigMapIterator->second;
|
|
+ Found = InputParams.findByName(ParamsName, Value);
|
|
+ if (Found) {
|
|
+ LLVM_DEBUG(dbgs() << ParamsName << " is set for the CodeRegion: \n"
|
|
+ << " Name: " << CR.getName() << "\n"
|
|
+ << " FuncName: " << CR.getFuncName() << "\n"
|
|
+ << " PassName: " << CR.getPassName() << "\n"
|
|
+ << " Type: " << CR.getTypeAsString() << "\n"
|
|
+ << " Hash: " << CR.getHash() << "\n"
|
|
+ << "\n");
|
|
+ }
|
|
+ }
|
|
+ return Found;
|
|
+}
|
|
+
|
|
+bool Container::requiresIRDump(bool IsFunctionIR) const {
|
|
+ auto findBaselineRegion = [&]() -> bool {
|
|
+ for (auto &entry : Engine.TuningOpps)
|
|
+ if (!IsFunctionIR) {
|
|
+ if (CR.getSourceLoc() == entry.getSourceLoc())
|
|
+ return true;
|
|
+ } else {
|
|
+ if (CR.getFileName() == entry.getFileName() &&
|
|
+ CR.getFuncName() == entry.getFuncName())
|
|
+ return true;
|
|
+ }
|
|
+ return false;
|
|
+ };
|
|
+ auto findNonBaselineRegion = [&]() {
|
|
+ for (auto &entry : Engine.ParamTable)
|
|
+ if (!IsFunctionIR) {
|
|
+ if (CR.getSourceLoc() == entry.first.getSourceLoc())
|
|
+ return true;
|
|
+ } else {
|
|
+ if (CR.getFileName() == entry.first.getFileName() &&
|
|
+ CR.getFuncName() == entry.first.getFuncName())
|
|
+ return true;
|
|
+ }
|
|
+ return false;
|
|
+ };
|
|
+
|
|
+ if (CFGNumber == -1)
|
|
+ return findBaselineRegion();
|
|
+ else
|
|
+ return findNonBaselineRegion();
|
|
+}
|
|
+
|
|
+template bool Container::lookUpParams<int>(const std::string &ParamsName,
|
|
+ int &Value) const;
|
|
+template bool Container::lookUpParams<bool>(const std::string &ParamsName,
|
|
+ bool &Value) const;
|
|
+template bool
|
|
+Container::lookUpParams<std::string>(const std::string &ParamsName,
|
|
+ std::string &Value) const;
|
|
+template bool Container::lookUpParams<std::vector<std::string>>(
|
|
+ const std::string &ParamsName, std::vector<std::string> &Value) const;
|
|
+
|
|
+static unsigned int count(SmallVector<CallSiteLocation, 10> CallSiteLocs,
|
|
+ CallSiteLocation Loc) {
|
|
+ unsigned int Count = 0;
|
|
+ for (unsigned int Idx = 0; Idx < CallSiteLocs.size(); ++Idx) {
|
|
+ if (Loc.Caller == CallSiteLocs[Idx].Caller &&
|
|
+ Loc.Callee == CallSiteLocs[Idx].Callee)
|
|
+ Count++;
|
|
+ }
|
|
+ return Count;
|
|
+}
|
|
+
|
|
+bool AutoTuningEngine::isThinLTOTuning() const { return ThinLTOTuning; }
|
|
+
|
|
+CodeRegionType AutoTuningEngine::convertPassToType(std::string PassName) {
|
|
+ auto Search = PTTMap.find(PassName);
|
|
+ if (Search == PTTMap.end())
|
|
+ llvm_unreachable(
|
|
+ "AutoTuningEngine: Invalid/unsupported optimization pass provided.\n");
|
|
+ return Search->second;
|
|
+}
|
|
+
|
|
+void AutoTuningEngine::insertCallSiteLoc(CallSiteLocation Loc) {
|
|
+ CallSiteLocs.emplace_back(Loc);
|
|
+}
|
|
+
|
|
+// If a function has multiple calls to same callee, then insert all the calls in
|
|
+// the CallSiteLocs vector which get available due to inlining of such calls.
|
|
+// It will use "Original Call Line No + New Call Line No" instead of using
|
|
+// "DebugLoc Line No".
|
|
+void AutoTuningEngine::updateCallSiteLocs(llvm::CallBase *OldCB,
|
|
+ llvm::CallBase *NewCB,
|
|
+ llvm::Function *Callee,
|
|
+ unsigned int Line) {
|
|
+ for (unsigned int Idx = 0; Idx < CallSiteLocs.size(); ++Idx) {
|
|
+ if (OldCB == CallSiteLocs[Idx].CB) {
|
|
+ CallSiteLocation Loc = CallSiteLocs[Idx];
|
|
+ Loc.CB = NewCB;
|
|
+ Loc.Callee = Callee;
|
|
+ Loc.SrcLoc.SourceLine = Loc.SrcLoc.SourceLine + Line;
|
|
+ CallSiteLocs.emplace_back(Loc);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+void AutoTuningEngine::cleanCallSiteLoc() {
|
|
+ unsigned int Size = CallSiteLocs.size();
|
|
+ unsigned int Idx = 0;
|
|
+ for (unsigned int I = 0; I < Size; ++I) {
|
|
+ CallSiteLocation Loc = CallSiteLocs[Idx];
|
|
+ unsigned int Count = count(CallSiteLocs, Loc);
|
|
+ if (Count == 1) {
|
|
+ CallSiteLocs.erase(CallSiteLocs.begin() + Idx);
|
|
+ continue;
|
|
+ }
|
|
+ Idx++;
|
|
+ }
|
|
+}
|
|
+
|
|
+void AutoTuningEngine::clearCallSiteLocs() { CallSiteLocs.clear(); }
|
|
+
|
|
+std::optional<unsigned int>
|
|
+AutoTuningEngine::getCallSiteLoc(llvm::CallBase *CB) {
|
|
+ for (unsigned int Idx = 0; Idx < CallSiteLocs.size(); ++Idx) {
|
|
+ if (CB == CallSiteLocs[Idx].CB)
|
|
+ return CallSiteLocs[Idx].SrcLoc.SourceLine;
|
|
+ }
|
|
+ return std::nullopt;
|
|
+}
|
|
+
|
|
+void AutoTuningEngine::addOpportunity(
|
|
+ const CodeRegion &OppCR,
|
|
+ std::map<std::string, std::string> BaselineConfig) {
|
|
+ if (!OppCR.Initialized)
|
|
+ return;
|
|
+
|
|
+ OppCR.setBaselineConfig(BaselineConfig);
|
|
+ if (!TuningOpps.contains(OppCR))
|
|
+ TuningOpps.insert(OppCR);
|
|
+ else if (OppCR.getHotness() != Unknown) {
|
|
+ // If OppCR already exists in TuningOpps with unknown hotness,
|
|
+ // then update it if the current hotness is hot/cold.
|
|
+ auto OppI = find(TuningOpps, OppCR);
|
|
+ if (OppI->getHotness() == Unknown)
|
|
+ OppI->setHotness(OppCR.getHotness());
|
|
+ }
|
|
+}
|
|
+
|
|
+void AutoTuningEngine::applyOppFilters(CodeRegions &CRs) {
|
|
+ CodeRegions NewCRs;
|
|
+ for (CodeRegion CR : CRs) {
|
|
+ if (AutotuningOutputFilter.getNumOccurrences() > 0) {
|
|
+ bool IsMatched = false;
|
|
+ for (auto CRType : AutotuningOutputFilter) {
|
|
+ if (CRType == CR.getType()) {
|
|
+ IsMatched = true;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ // Filter out the CodeRegion if its type fails to match any types
|
|
+ // specified from the command line.
|
|
+ if (!IsMatched)
|
|
+ continue;
|
|
+ }
|
|
+ if (SizeThreshold.getNumOccurrences() > 0 && CR.getSize() < SizeThreshold)
|
|
+ continue;
|
|
+ if (ExcludeColdCodeRegion && CR.isCold()) {
|
|
+ LLVM_DEBUG(dbgs() << "Skip CodeRegion with cold function "
|
|
+ << CR.getFuncName() << "\n");
|
|
+ continue;
|
|
+ }
|
|
+ if (HotCodeRegionOnly && !CR.isHot()) {
|
|
+ LLVM_DEBUG(dbgs() << "Skip CodeRegion with " << CR.getHotnessAsString()
|
|
+ << " function " << CR.getFuncName() << "\n");
|
|
+ continue;
|
|
+ }
|
|
+ NewCRs.insert(CR);
|
|
+ LLVM_DEBUG(dbgs() << "CodeRegion added as an tuning opportunity: \n"
|
|
+ << " Name: " << CR.getName() << "\n"
|
|
+ << " FuncName: " << CR.getFuncName() << "\n"
|
|
+ << " PassName: " << CR.getPassName() << "\n"
|
|
+ << " Type: " << CR.getTypeAsString() << "\n"
|
|
+ << " Size: " << CR.getSize() << "\n"
|
|
+ << " Hotness: " << CR.getHotnessAsString() << "\n"
|
|
+ << " Hash: " << CR.getHash() << "\n"
|
|
+ << " Location: " << CR.getSourceLoc().SourceFilePath
|
|
+ << "; " << CR.getSourceLoc().SourceLine << "; "
|
|
+ << CR.getSourceLoc().SourceColumn << "\n\n");
|
|
+ }
|
|
+ if (AutotuningOutputFilter.getNumOccurrences() == 0 ||
|
|
+ std::find(AutotuningOutputFilter.begin(), AutotuningOutputFilter.end(),
|
|
+ Other) != AutotuningOutputFilter.end()) {
|
|
+ // Add an empty CodeRegion with ModuleID as an tuning opportunity.
|
|
+ // It could be used to represent a module level code region.
|
|
+ autotuning::CodeRegion GlobalCR =
|
|
+ CodeRegion(ModuleID, "none", "all", Other);
|
|
+ GlobalCR.setHash(llvm::hash_combine(ModuleID, Other));
|
|
+ NewCRs.insert(GlobalCR);
|
|
+ LLVM_DEBUG(dbgs() << "Module added as an tuning opportunity: \n"
|
|
+ << " Name: " << GlobalCR.getName() << "\n"
|
|
+ << " Hash: " << GlobalCR.getHash() << "\n"
|
|
+ << "\n");
|
|
+ }
|
|
+
|
|
+ // Include LLVMParam as an tuning opportunity only if it is specified with
|
|
+ // -auto-tuning-type-filter.
|
|
+ if (std::find(AutotuningOutputFilter.begin(), AutotuningOutputFilter.end(),
|
|
+ LLVMParam) != AutotuningOutputFilter.end())
|
|
+ NewCRs.insert(CodeRegion(ModuleID, "none", "none", LLVMParam));
|
|
+
|
|
+ if (std::find(AutotuningOutputFilter.begin(), AutotuningOutputFilter.end(),
|
|
+ ProgramParam) != AutotuningOutputFilter.end())
|
|
+ NewCRs.insert(CodeRegion(ModuleID, "none", "none", ProgramParam));
|
|
+
|
|
+ CRs = NewCRs;
|
|
+}
|
|
+
|
|
+bool AutoTuningEngine::applyFunctionFilter(std::string FuncName) {
|
|
+ if (AutotuningFunctionFilter.getNumOccurrences() == 0)
|
|
+ return true;
|
|
+
|
|
+ for (std::string FunctionFilter : AutotuningFunctionFilter)
|
|
+ if (FuncName == FunctionFilter)
|
|
+ return true;
|
|
+
|
|
+ return false;
|
|
+}
|
|
+
|
|
+void AutoTuningEngine::initContainer(Container *Container,
|
|
+ const std::string &PassName,
|
|
+ const StringRef FuncName,
|
|
+ bool AddOpportunity,
|
|
+ unsigned int Invocation) {
|
|
+ if (Enabled) {
|
|
+ if (!isTuningAllowedForType(convertPassToType(PassName)) &&
|
|
+ !(isGenerateOutput() &&
|
|
+ AutotuningOutputFilter.getNumOccurrences() == 0))
|
|
+ return;
|
|
+
|
|
+ if (!applyFunctionFilter(FuncName.str()))
|
|
+ return;
|
|
+
|
|
+ // The attributes of a Container could potentially change overtime even with
|
|
+ // the same pass if the associated pass is invoked multiple times at
|
|
+ // different places in the pipeline. Therefore, we need to initCodeRegion
|
|
+ // every time when this function is called to ensure the CodeRegion with the
|
|
+ // latest information will be added as tuning opportunities.
|
|
+ Container->initCodeRegion();
|
|
+ if (Container->CR.getType() == autotuning::CodeRegionType::Invalid)
|
|
+ return;
|
|
+
|
|
+ uint64_t hash = Container->computeStructuralHash();
|
|
+ CodeRegion &OppCR = Container->CR;
|
|
+ if (GenerateOutput) {
|
|
+ if (OppCR.getSize() < SizeThreshold)
|
|
+ return;
|
|
+ if (ExcludeColdCodeRegion && OppCR.isCold()) {
|
|
+ LLVM_DEBUG(dbgs() << "Skip CodeRegion with cold function "
|
|
+ << OppCR.getFuncName() << "\n");
|
|
+ return;
|
|
+ }
|
|
+ if (HotCodeRegionOnly && !OppCR.isHot()) {
|
|
+ LLVM_DEBUG(dbgs() << "Skip CodeRegion with "
|
|
+ << OppCR.getHotnessAsString() << " function "
|
|
+ << OppCR.getFuncName() << "\n");
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+ OppCR.setPassName(PassName);
|
|
+ OppCR.setHash(hash);
|
|
+ OppCR.setInvocation(Invocation);
|
|
+ OppCR.Initialized = true;
|
|
+ if (AddOpportunity)
|
|
+ addOpportunity(OppCR);
|
|
+ }
|
|
+}
|
|
+
|
|
+bool AutoTuningEngine::shouldRunOptPass(std::string Filename,
|
|
+ std::string Pass) {
|
|
+ return OppPassList.count(Filename) ? OppPassList[Filename].count(Pass)
|
|
+ : false;
|
|
+}
|
|
+
|
|
+Error AutoTuningEngine::init(const std::string &Module) {
|
|
+ ParseInput = false;
|
|
+ if (std::optional<std::string> MaybePath =
|
|
+ llvm::sys::Process::GetEnv("AUTOTUNE_INPUT")) {
|
|
+ InputFile = *MaybePath;
|
|
+ ParseInput = true;
|
|
+ } else if (InputFile.getNumOccurrences() > 0) {
|
|
+ ParseInput = true;
|
|
+ }
|
|
+
|
|
+ GenerateOutput = false;
|
|
+ if (OutputOppDir.getNumOccurrences() > 0)
|
|
+ GenerateOutput = true;
|
|
+
|
|
+ // Invocation of any of the following command line options
|
|
+ // (auto-tuning-input and auto-tuning-opp) or env variable
|
|
+ // AUTOTUNE_ALL_INPUT can enable auto-tuning mode.
|
|
+ if (ParseInput || GenerateOutput) {
|
|
+ Enabled = true;
|
|
+ // Generate absolute path and remove the base directory (if available).
|
|
+ // A relative path will be used as (coarse-grain) code region name.
|
|
+ llvm::SmallString<128> ModuleVec = StringRef(Module);
|
|
+ llvm::sys::fs::make_absolute(ModuleVec);
|
|
+ if (ProjectDir.size() && ModuleVec.startswith(ProjectDir))
|
|
+ ModuleID = ModuleVec.substr(ProjectDir.size()).str();
|
|
+ else
|
|
+ ModuleID = std::string(ModuleVec);
|
|
+ }
|
|
+
|
|
+ // Initialization of map to be used for pass-name to CodeRegionType
|
|
+ // conversion.
|
|
+ PTTMap = {{"loop-unroll", Loop},
|
|
+ {"loop-vectorize", Loop},
|
|
+ {"inline", CallSite},
|
|
+ {"machine-scheduler", MachineBasicBlock},
|
|
+ {"switch-lowering", Switch},
|
|
+ {"autotuning-dump", Function}};
|
|
+
|
|
+ if (ParseInput) {
|
|
+ // Currently we only support yaml format for input.
|
|
+ if (Error E = AutoTuningRemarkManager::read(*this, InputFile, "yaml")) {
|
|
+ errs() << "Error parsing auto-tuning input.\n";
|
|
+ return E;
|
|
+ } else {
|
|
+ LLVM_DEBUG(dbgs() << "AutoTuningEngine is initialized.\n"
|
|
+ << " Size of ParamTable: " << this->ParamTable.size()
|
|
+ << "\n");
|
|
+ if (LLVMParams.size())
|
|
+ LLVM_DEBUG(dbgs() << "AutoTuner: LLVMParams applied.");
|
|
+ if (ProgramParams.size())
|
|
+ LLVM_DEBUG(dbgs() << "AutoTuner: ProgramParams applied.\n");
|
|
+ }
|
|
+ }
|
|
+
|
|
+ for (auto CRType : AutotuningOutputFilter)
|
|
+ CodeRegionFilterTypes.insert(CRType);
|
|
+
|
|
+ if (GenerateOutput) {
|
|
+ switch (AutoTuningCompileMode) {
|
|
+ case CoarseGrain: {
|
|
+ bool Valid = false;
|
|
+ if (AutotuningOutputFilter.getNumOccurrences() > 0) {
|
|
+ Valid = true;
|
|
+ for (auto CRType : AutotuningOutputFilter)
|
|
+ if (CRType != LLVMParam) {
|
|
+ Valid = false;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ if (!Valid) {
|
|
+ AutoTuningCompileMode = Inactive;
|
|
+ errs() << "AutoTunerCompile: Code region type filtering does not match"
|
|
+ " with incremental compilation option.\n"
|
|
+ "Disabling incremental compilation.\n";
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+ case FineGrain: {
|
|
+ bool Valid = false;
|
|
+ if (AutotuningOutputFilter.getNumOccurrences() > 0) {
|
|
+ Valid = true;
|
|
+ for (auto CRType : AutotuningOutputFilter) {
|
|
+ if (CRType != Loop && CRType != CallSite && CRType != Function) {
|
|
+ Valid = false;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ if (!Valid) {
|
|
+ AutoTuningCompileMode = Inactive;
|
|
+ errs() << "AutoTunerCompile: Code region type filtering does not match"
|
|
+ "with incremental compilation option.\n"
|
|
+ "Disabling incremental compilation.\n";
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+ case Basic:
|
|
+ case Inactive:
|
|
+ break;
|
|
+ default:
|
|
+ llvm_unreachable("AutoTuningCompile: Unknown AutoTuner Incremental "
|
|
+ "Compilation mode.\n");
|
|
+ }
|
|
+ }
|
|
+
|
|
+ MLEnabled = (CFGNumber.getNumOccurrences() > 0);
|
|
+ if (EnableAutoTuningDump || MLEnabled)
|
|
+ DumpEnabled = true;
|
|
+ return Error::success();
|
|
+}
|
|
+
|
|
+llvm::Expected<int> AutoTuningEngine::getConfigNumber() {
|
|
+ if (!isMLEnabled()) {
|
|
+ std::string errorMsg =
|
|
+ "No Autotuner configuration specified; ML guidance is unavailable.";
|
|
+ return createStringError(inconvertibleErrorCode(), errorMsg);
|
|
+ } else
|
|
+ return CFGNumber;
|
|
+}
|
|
+
|
|
+Error AutoTuningEngine::finalize() {
|
|
+ if (OutputOppDir.getNumOccurrences() > 0) {
|
|
+ // Apply filters.
|
|
+ applyOppFilters(TuningOpps);
|
|
+ if (!TuningOpps.empty()) {
|
|
+ if (Error E = AutoTuningRemarkManager::dump(
|
|
+ *this, OutputOppDir, OutputFormat, RemarksPasses)) {
|
|
+ errs() << "Error generating auto-tuning opportunities.\n";
|
|
+ return E;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Clear these two global lists when ending the auto-tuning
|
|
+ // in case of redundant information
|
|
+ TuningOpps.clear();
|
|
+ }
|
|
+ return Error::success();
|
|
+}
|
|
+
|
|
+template <typename T>
|
|
+bool AutoTuningEngine::lookUpGlobalParams(const std::string &ParamsName,
|
|
+ T &Value) const {
|
|
+ bool Found = GlobalParams.findByName(ParamsName, Value);
|
|
+ if (Found) {
|
|
+ LLVM_DEBUG(dbgs() << "Global Variable " << ParamsName << " is set.\n");
|
|
+ }
|
|
+ return Found;
|
|
+}
|
|
+
|
|
+template bool
|
|
+AutoTuningEngine::lookUpGlobalParams<int>(const std::string &ParamsName,
|
|
+ int &Value) const;
|
|
+template bool
|
|
+AutoTuningEngine::lookUpGlobalParams<bool>(const std::string &ParamsName,
|
|
+ bool &Value) const;
|
|
+template bool
|
|
+AutoTuningEngine::lookUpGlobalParams<std::string>(const std::string &ParamsName,
|
|
+ std::string &Value) const;
|
|
+template bool AutoTuningEngine::lookUpGlobalParams<std::vector<std::string>>(
|
|
+ const std::string &ParamsName, std::vector<std::string> &Value) const;
|
|
+
|
|
+class AutoTuningEngine Engine;
|
|
+
|
|
+} // namespace autotuning
|
|
+
|
|
+#endif
|
|
diff --git a/llvm/lib/AutoTuner/AutoTuningRemarkManager.cpp b/llvm/lib/AutoTuner/AutoTuningRemarkManager.cpp
|
|
new file mode 100644
|
|
index 000000000000..3e0506e534c4
|
|
--- /dev/null
|
|
+++ b/llvm/lib/AutoTuner/AutoTuningRemarkManager.cpp
|
|
@@ -0,0 +1,299 @@
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+//===- llvm/AutoTuner/AutoTuningRemarkManager.cpp - Remark Manager --------===//
|
|
+//
|
|
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
+// See https://llvm.org/LICENSE.txt for license information.
|
|
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
+//
|
|
+//===----------------------------------------------------------------------===//
|
|
+//
|
|
+// This file contains the implementation of for inputting and outputting remarks
|
|
+// for AutoTuning.
|
|
+//
|
|
+//===----------------------------------------------------------------------===//
|
|
+
|
|
+#include "llvm/AutoTuner/AutoTuningRemarkManager.h"
|
|
+#include "llvm/ADT/StringRef.h"
|
|
+#include "llvm/AutoTuner/AutoTuning.h"
|
|
+#include "llvm/AutoTuner/AutoTuningRemarkStreamer.h"
|
|
+#include "llvm/IR/DebugInfoMetadata.h"
|
|
+#include "llvm/IR/LLVMRemarkStreamer.h"
|
|
+#include "llvm/Remarks/Remark.h"
|
|
+#include "llvm/Remarks/RemarkFormat.h"
|
|
+#include "llvm/Remarks/RemarkParser.h"
|
|
+#include "llvm/Remarks/RemarkSerializer.h"
|
|
+#include "llvm/Remarks/RemarkStreamer.h"
|
|
+#include "llvm/Support/CommandLine.h"
|
|
+#include "llvm/Support/Debug.h"
|
|
+#include "llvm/Support/FileSystem.h"
|
|
+#include "llvm/Support/MemoryBuffer.h"
|
|
+#include "llvm/Support/Path.h"
|
|
+#include "llvm/Support/ToolOutputFile.h"
|
|
+
|
|
+// Enable debug messages for AutoTuner.
|
|
+#define DEBUG_TYPE "autotuning"
|
|
+
|
|
+using namespace llvm;
|
|
+using namespace autotuning;
|
|
+
|
|
+// Helper functions.
|
|
+namespace {
|
|
+// Convert string into CodeRegionType.
|
|
+Expected<CodeRegionType> StringToCodeRegionType(const std::string &CRType) {
|
|
+ if (CRType == "machine_basic_block")
|
|
+ return autotuning::CodeRegionType::MachineBasicBlock;
|
|
+ else if (CRType == "loop")
|
|
+ return autotuning::CodeRegionType::Loop;
|
|
+ else if (CRType == "function")
|
|
+ return autotuning::CodeRegionType::Function;
|
|
+ else if (CRType == "callsite")
|
|
+ return autotuning::CodeRegionType::CallSite;
|
|
+ else if (CRType == "llvm-param")
|
|
+ return autotuning::CodeRegionType::LLVMParam;
|
|
+ else if (CRType == "program-param")
|
|
+ return autotuning::CodeRegionType::ProgramParam;
|
|
+ else if (CRType == "switch")
|
|
+ return autotuning::CodeRegionType::Switch;
|
|
+ else if (CRType == "other")
|
|
+ return autotuning::CodeRegionType::Other;
|
|
+ else
|
|
+ return make_error<StringError>("Unsupported CodeRegionType:" + CRType,
|
|
+ inconvertibleErrorCode());
|
|
+}
|
|
+
|
|
+// Remark -> autotuning::ParameterManager
|
|
+ParameterManager RemarkToParameterManager(const remarks::Remark &Remark) {
|
|
+ // Create Parameters from a remark.
|
|
+ ParameterManager ParamManager;
|
|
+ for (const remarks::Argument &Arg : Remark.Args) {
|
|
+ int Value = 0;
|
|
+ if (!Arg.Val.getAsInteger(10, Value))
|
|
+ // If no errors
|
|
+ ParamManager.add(Arg.Key.str(), Value);
|
|
+ else if (Arg.Val == "true")
|
|
+ ParamManager.add(Arg.Key.str(), true);
|
|
+ else if (Arg.Val == "false")
|
|
+ ParamManager.add(Arg.Key.str(), false);
|
|
+ // If there is a value of vector type
|
|
+ else if (Arg.VectorVal) {
|
|
+ std::vector<std::string> Strings;
|
|
+ for (const StringRef &Val : *Arg.VectorVal) {
|
|
+ Strings.push_back(Val.str());
|
|
+ }
|
|
+ ParamManager.add(Arg.Key.str(), Strings);
|
|
+ } else
|
|
+ // Add as String Value
|
|
+ ParamManager.add(Arg.Key.str(), Arg.Val);
|
|
+ }
|
|
+
|
|
+ return ParamManager;
|
|
+}
|
|
+
|
|
+// Remark -> std::unordered_map<std::string, std::string>
|
|
+std::unordered_map<std::string, std::string>
|
|
+RemarkToStringMap(const remarks::Remark &Remark) {
|
|
+ std::unordered_map<std::string, std::string> LLVMParams;
|
|
+ for (const remarks::Argument &Arg : Remark.Args) {
|
|
+ // Add as String Value
|
|
+ LLVMParams[Arg.Key.str()] = Arg.Val.str();
|
|
+ }
|
|
+ return LLVMParams;
|
|
+}
|
|
+
|
|
+// Remark -> autotuning::SourceLocation
|
|
+SourceLocation RemarkToSourceLocation(const remarks::Remark &Remark) {
|
|
+ SourceLocation Location;
|
|
+ if (Remark.Loc) {
|
|
+ StringRef File = Remark.Loc->SourceFilePath;
|
|
+ unsigned Line = Remark.Loc->SourceLine;
|
|
+ unsigned Column = Remark.Loc->SourceColumn;
|
|
+ Location = {File.str(), Line, Column};
|
|
+ }
|
|
+ return Location;
|
|
+}
|
|
+
|
|
+// Remark -> autotuning::CodeRegion
|
|
+CodeRegion RemarkToCodeRegion(const remarks::Remark &Remark,
|
|
+ Expected<CodeRegionType> &Type) {
|
|
+ // Create a SourceLocation from a remark.
|
|
+ SourceLocation Location = RemarkToSourceLocation(Remark);
|
|
+ // Create a CodeRegion from a remark.
|
|
+ CodeRegion CR = CodeRegion(Remark.RemarkName.str(), Remark.FunctionName.str(),
|
|
+ Remark.PassName.str(), Type.get(), Location);
|
|
+ if (Remark.CodeRegionHash)
|
|
+ CR.setHash(Remark.CodeRegionHash.value_or(0));
|
|
+ if (Remark.Invocation)
|
|
+ CR.setInvocation(Remark.Invocation.value_or(0));
|
|
+
|
|
+ return CR;
|
|
+}
|
|
+
|
|
+Expected<std::unique_ptr<ToolOutputFile>> emitAutoTuningRemarks(
|
|
+ const StringRef RemarksFilename, const StringRef RemarksFormat,
|
|
+ const StringRef RemarksPasses, const CodeRegions &CRList) {
|
|
+ if (RemarksFilename.empty())
|
|
+ return nullptr;
|
|
+ // Parse remark format. Options are yaml, yaml-strtab and bitstream.
|
|
+ Expected<remarks::Format> Format = remarks::parseFormat(RemarksFormat);
|
|
+ if (Error E = Format.takeError())
|
|
+ return make_error<LLVMRemarkSetupFormatError>(std::move(E));
|
|
+
|
|
+ std::error_code EC;
|
|
+ auto Flags =
|
|
+ *Format == remarks::Format::YAML ? sys::fs::OF_Text : sys::fs::OF_None;
|
|
+ auto RemarksFile =
|
|
+ std::make_unique<ToolOutputFile>(RemarksFilename, EC, Flags);
|
|
+ if (EC)
|
|
+ return make_error<LLVMRemarkSetupFormatError>(errorCodeToError(EC));
|
|
+ // Create a remark serializer to emit code regions.
|
|
+ Expected<std::unique_ptr<remarks::RemarkSerializer>> RemarkSerializer =
|
|
+ remarks::createRemarkSerializer(
|
|
+ *Format, remarks::SerializerMode::Separate, RemarksFile->os());
|
|
+
|
|
+ if (Error E = RemarkSerializer.takeError())
|
|
+ return make_error<LLVMRemarkSetupFormatError>(std::move(E));
|
|
+ // Create remark streamer based on the serializer.
|
|
+ remarks::RemarkStreamer RStreamer =
|
|
+ remarks::RemarkStreamer(std::move(*RemarkSerializer), RemarksFilename);
|
|
+ AutoTuningRemarkStreamer Streamer(RStreamer);
|
|
+
|
|
+ if (!RemarksPasses.empty())
|
|
+ if (Error E = Streamer.setFilter(RemarksPasses))
|
|
+ return make_error<LLVMRemarkSetupFormatError>(std::move(E));
|
|
+ // Emit CodeRegions in Remark format.
|
|
+ for (const CodeRegion &CR : CRList) {
|
|
+ Streamer.emit(CR);
|
|
+ }
|
|
+ return std::move(RemarksFile);
|
|
+}
|
|
+} // namespace
|
|
+
|
|
+llvm::Error AutoTuningRemarkManager::read(AutoTuningEngine &E,
|
|
+ const std::string &InputFileName,
|
|
+ const std::string &RemarksFormat) {
|
|
+ ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
|
|
+ MemoryBuffer::getFile(InputFileName.c_str());
|
|
+ if (std::error_code EC = Buf.getError())
|
|
+ return make_error<StringError>(
|
|
+ "Can't open file " + InputFileName + ": " + EC.message(), EC);
|
|
+ // Parse remark format. Options are yaml, yaml-strtab and bitstream.
|
|
+ Expected<remarks::Format> Format = remarks::parseFormat(RemarksFormat);
|
|
+ if (!Format)
|
|
+ return Format.takeError();
|
|
+
|
|
+ Expected<std::unique_ptr<remarks::RemarkParser>> MaybeParser =
|
|
+ remarks::createRemarkParserFromMeta(*Format, (*Buf)->getBuffer());
|
|
+ if (!MaybeParser) {
|
|
+ return MaybeParser.takeError();
|
|
+ }
|
|
+ remarks::RemarkParser &Parser = **MaybeParser;
|
|
+
|
|
+ while (true) {
|
|
+ Expected<std::unique_ptr<remarks::Remark>> MaybeRemark = Parser.next();
|
|
+ if (!MaybeRemark) {
|
|
+ Error E = MaybeRemark.takeError();
|
|
+ if (E.isA<remarks::EndOfFileError>()) {
|
|
+ // EOF.
|
|
+ consumeError(std::move(E));
|
|
+ break;
|
|
+ }
|
|
+ return E;
|
|
+ }
|
|
+ const remarks::Remark &Remark = **MaybeRemark;
|
|
+
|
|
+ if (Remark.RemarkType != remarks::Type::AutoTuning)
|
|
+ continue;
|
|
+
|
|
+ if (!Remark.CodeRegionType)
|
|
+ return make_error<StringError>("CodeRegionType field is missing.",
|
|
+ inconvertibleErrorCode());
|
|
+ Expected<CodeRegionType> Type =
|
|
+ StringToCodeRegionType((*Remark.CodeRegionType).str());
|
|
+ if (!Type)
|
|
+ return Type.takeError();
|
|
+ CodeRegionType CRType = Type.get();
|
|
+ // If CodeRegionType is Other, this remark corresponds to global
|
|
+ // parameters, and no need to create a CodeRegion object. Check if the
|
|
+ // Remark of global parameters is for the current Module.
|
|
+ if (CRType == autotuning::Other && Remark.RemarkName == Engine.ModuleID) {
|
|
+ Engine.GlobalParams = RemarkToParameterManager(Remark);
|
|
+ continue;
|
|
+ }
|
|
+ if (CRType == autotuning::LLVMParam &&
|
|
+ Remark.RemarkName == Engine.ModuleID) {
|
|
+ Engine.LLVMParams = RemarkToStringMap(Remark);
|
|
+ continue;
|
|
+ }
|
|
+ if (CRType == autotuning::ProgramParam &&
|
|
+ Remark.RemarkName == Engine.ModuleID) {
|
|
+ Engine.ProgramParams = RemarkToStringMap(Remark);
|
|
+ continue;
|
|
+ }
|
|
+ if (Engine.isThinLTOTuning() &&
|
|
+ (CRType == autotuning::CallSite || CRType == autotuning::Loop ||
|
|
+ CRType == autotuning::MachineBasicBlock ||
|
|
+ CRType == autotuning::Function)) {
|
|
+ LLVM_DEBUG(dbgs() << "AutoTuner does not support tuning of "
|
|
+ << CodeRegion::getTypeAsString(CRType)
|
|
+ << " for thinLTO durning link-time optimization. "
|
|
+ "Ignoring current code region.\n");
|
|
+ continue;
|
|
+ }
|
|
+ // Create a SourceLocation from a remark.
|
|
+ CodeRegion CR = RemarkToCodeRegion(Remark, Type);
|
|
+ ParameterManager ParamManager = RemarkToParameterManager(Remark);
|
|
+ // Add the CodeRegion-ParameterManager entry into LoopUpTable.
|
|
+ Engine.ParamTable[CR] = ParamManager;
|
|
+
|
|
+ std::string Filename = CR.getSourceLoc().SourceFilePath;
|
|
+ size_t Pos = Filename.rfind(".");
|
|
+ if (Pos != std::string::npos)
|
|
+ Filename.erase(Pos, Filename.size());
|
|
+ Engine.OppPassList[Filename].insert(CR.getPassName());
|
|
+ Engine.CodeRegionFilterTypes.insert(CR.getType());
|
|
+ }
|
|
+ return Error::success();
|
|
+}
|
|
+
|
|
+Error AutoTuningRemarkManager::dump(const autotuning::AutoTuningEngine &E,
|
|
+ const std::string &DirName,
|
|
+ const std::string &RemarksFormat,
|
|
+ const std::string &RemarksPasses) {
|
|
+ // Change to absolute path.
|
|
+ SmallString<256> OutputPath = StringRef(DirName);
|
|
+ sys::fs::make_absolute(OutputPath);
|
|
+
|
|
+ // Make sure the new output directory exists, creating it if necessary.
|
|
+ if (std::error_code EC = sys::fs::create_directories(OutputPath)) {
|
|
+ return make_error<StringError>("could not create directory: " +
|
|
+ Twine(OutputPath) + ": " + EC.message(),
|
|
+ EC);
|
|
+ }
|
|
+ if (!Engine.TuningOpps.empty()) {
|
|
+ StringRef ModelFileName = sys::path::filename(Engine.ModuleID);
|
|
+ sys::path::append(OutputPath, ModelFileName + "." + RemarksFormat);
|
|
+
|
|
+ int i = 1; // Output file suffix starts from 1.
|
|
+ // Check all exiting xml files xml.1...i and create a new file
|
|
+ // suffix.(i+1).
|
|
+ while (sys::fs::exists(OutputPath)) {
|
|
+ sys::path::remove_filename(OutputPath);
|
|
+ sys::path::append(OutputPath,
|
|
+ ModelFileName + "." + RemarksFormat + "." + Twine(i));
|
|
+ i += 1;
|
|
+ }
|
|
+ Expected<std::unique_ptr<ToolOutputFile>> RemarksFileOrErr =
|
|
+ emitAutoTuningRemarks(OutputPath, RemarksFormat, RemarksPasses,
|
|
+ Engine.TuningOpps);
|
|
+ if (Error E = RemarksFileOrErr.takeError()) {
|
|
+ return E;
|
|
+ }
|
|
+
|
|
+ std::unique_ptr<ToolOutputFile> RemarksFile = std::move(*RemarksFileOrErr);
|
|
+ if (RemarksFile)
|
|
+ RemarksFile->keep();
|
|
+ }
|
|
+ return Error::success();
|
|
+}
|
|
+
|
|
+#endif
|
|
diff --git a/llvm/lib/AutoTuner/AutoTuningRemarkStreamer.cpp b/llvm/lib/AutoTuner/AutoTuningRemarkStreamer.cpp
|
|
new file mode 100644
|
|
index 000000000000..0516c055a139
|
|
--- /dev/null
|
|
+++ b/llvm/lib/AutoTuner/AutoTuningRemarkStreamer.cpp
|
|
@@ -0,0 +1,55 @@
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+// ===---------- llvm/AutoTuner/AutoTuningRemarkStreamer.cpp --------------===//
|
|
+//
|
|
+// The LLVM Compiler Infrastructure
|
|
+//
|
|
+// This file is distributed under the University of Illinois Open Source
|
|
+// License. See LICENSE.TXT for details.
|
|
+//
|
|
+// Copyright (C) 2017-2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+//
|
|
+// ===---------------------------------------------------------------------===//
|
|
+//
|
|
+// This file contains the implementation of the conversion between AutoTuner
|
|
+// CodeRegions and serializable remarks::Remark objects.
|
|
+//
|
|
+// ===---------------------------------------------------------------------===//
|
|
+
|
|
+#include "llvm/AutoTuner/AutoTuningRemarkStreamer.h"
|
|
+
|
|
+using namespace llvm;
|
|
+
|
|
+// autotuning::CodeRegion -> Remark
|
|
+remarks::Remark
|
|
+AutoTuningRemarkStreamer::toRemark(const autotuning::CodeRegion &CR) {
|
|
+ remarks::Remark R; // The result.
|
|
+ R.RemarkType = remarks::Type::AutoTuning;
|
|
+ R.PassName = CR.getPassName();
|
|
+ R.RemarkName = CR.getName();
|
|
+ R.FunctionName = CR.getFuncName();
|
|
+ const autotuning::SourceLocation &Location = CR.getSourceLoc();
|
|
+ if (Location)
|
|
+ R.Loc = remarks::RemarkLocation{Location.SourceFilePath,
|
|
+ Location.SourceLine, Location.SourceColumn};
|
|
+ R.CodeRegionType = CR.getTypeAsString();
|
|
+ R.CodeRegionHash = CR.getHash();
|
|
+ R.AutoTunerOptions = CR.getAutoTunerOptions();
|
|
+ R.Invocation = CR.getInvocation();
|
|
+ R.BaselineConfig = CR.getBaselineConfig();
|
|
+ return R;
|
|
+}
|
|
+
|
|
+void AutoTuningRemarkStreamer::emit(const autotuning::CodeRegion &CR) {
|
|
+ if (!RS.matchesFilter(CR.getPassName()))
|
|
+ return;
|
|
+
|
|
+ // First, convert the code region to a remark.
|
|
+ remarks::Remark R = toRemark(CR);
|
|
+ // Then, emit the remark through the serializer.
|
|
+ RS.getSerializer().emit(R);
|
|
+}
|
|
+
|
|
+Error AutoTuningRemarkStreamer::setFilter(StringRef Filter) {
|
|
+ return RS.setFilter(Filter);
|
|
+}
|
|
+#endif
|
|
diff --git a/llvm/lib/AutoTuner/CMakeLists.txt b/llvm/lib/AutoTuner/CMakeLists.txt
|
|
new file mode 100644
|
|
index 000000000000..c618474fe5ae
|
|
--- /dev/null
|
|
+++ b/llvm/lib/AutoTuner/CMakeLists.txt
|
|
@@ -0,0 +1,11 @@
|
|
+add_llvm_component_library(LLVMAutoTuner
|
|
+ AutoTuning.cpp
|
|
+ AutoTuningRemarkManager.cpp
|
|
+ AutoTuningRemarkStreamer.cpp
|
|
+
|
|
+ ADDITIONAL_HEADER_DIRS
|
|
+ ${LLVM_MAIN_INCLUDE_DIR}/llvm/AutoTuner
|
|
+
|
|
+ LINK_COMPONENTS
|
|
+ Remarks
|
|
+)
|
|
\ No newline at end of file
|
|
diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt
|
|
index 283baa6090eb..966137c0f71f 100644
|
|
--- a/llvm/lib/CMakeLists.txt
|
|
+++ b/llvm/lib/CMakeLists.txt
|
|
@@ -28,6 +28,7 @@ add_subdirectory(Object)
|
|
add_subdirectory(ObjectYAML)
|
|
add_subdirectory(Option)
|
|
add_subdirectory(Remarks)
|
|
+add_subdirectory(AutoTuner)
|
|
add_subdirectory(Debuginfod)
|
|
add_subdirectory(DebugInfo)
|
|
add_subdirectory(DWP)
|
|
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
|
|
index 106571b9061b..9029dc7bb3d9 100644
|
|
--- a/llvm/lib/CodeGen/CMakeLists.txt
|
|
+++ b/llvm/lib/CodeGen/CMakeLists.txt
|
|
@@ -273,6 +273,7 @@ add_llvm_component_library(LLVMCodeGen
|
|
|
|
LINK_COMPONENTS
|
|
Analysis
|
|
+ AutoTuner
|
|
BitReader
|
|
BitWriter
|
|
CodeGenTypes
|
|
diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp
|
|
index 5a005ba7b414..9dcb3833ab91 100644
|
|
--- a/llvm/lib/CodeGen/CalcSpillWeights.cpp
|
|
+++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp
|
|
@@ -29,6 +29,24 @@ using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "calcspillweights"
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+static cl::opt<float> LoopWeight(
|
|
+ "reg-spill-loop-weight", cl::Hidden,
|
|
+ cl::desc(
|
|
+ "Tunable extra weight to what looks like a loop induction variable"),
|
|
+ cl::init(3));
|
|
+
|
|
+static cl::opt<float> RemaWeight(
|
|
+ "reg-spill-rematerialize-weight", cl::Hidden,
|
|
+ cl::desc("Tunable reduced weight giving re-materialize oppotunities"),
|
|
+ cl::init(0.5f));
|
|
+
|
|
+static cl::opt<float>
|
|
+ HintWeight("reg-spill-hint-weight", cl::Hidden,
|
|
+ cl::desc("Tunable weakly boost weight of hinted registers"),
|
|
+ cl::init(1.01f));
|
|
+#endif
|
|
+
|
|
void VirtRegAuxInfo::calculateSpillWeightsAndHints() {
|
|
LLVM_DEBUG(dbgs() << "********** Compute Spill Weights **********\n"
|
|
<< "********** Function: " << MF.getName() << '\n');
|
|
@@ -252,7 +270,11 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
|
|
|
|
// Give extra weight to what looks like a loop induction variable update.
|
|
if (Writes && IsExiting && LIS.isLiveOutOfMBB(LI, MBB))
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ Weight *= LoopWeight;
|
|
+#else
|
|
Weight *= 3;
|
|
+#endif
|
|
|
|
TotalWeight += Weight;
|
|
}
|
|
@@ -288,7 +310,11 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
|
|
}
|
|
|
|
// Weakly boost the spill weight of hinted registers.
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ TotalWeight *= HintWeight;
|
|
+#else
|
|
TotalWeight *= 1.01F;
|
|
+#endif
|
|
}
|
|
|
|
// If the live interval was already unspillable, leave it that way.
|
|
@@ -315,7 +341,11 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
|
|
// FIXME: this gets much more complicated once we support non-trivial
|
|
// re-materialization.
|
|
if (isRematerializable(LI, LIS, VRM, *MF.getSubtarget().getInstrInfo()))
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ TotalWeight *= RemaWeight;
|
|
+#else
|
|
TotalWeight *= 0.5F;
|
|
+#endif
|
|
|
|
if (IsLocalSplitArtifact)
|
|
return normalize(TotalWeight, Start->distance(*End), NumInstr);
|
|
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
|
|
index 231544494c32..327cd40f86a4 100644
|
|
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
|
|
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
|
|
@@ -37,6 +37,9 @@
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include "llvm/Target/TargetMachine.h"
|
|
#include <algorithm>
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/IR/StructuralHash.h"
|
|
+#endif
|
|
#include <cmath>
|
|
using namespace llvm;
|
|
|
|
@@ -1703,6 +1706,39 @@ MachineBasicBlock::livein_iterator MachineBasicBlock::livein_begin() const {
|
|
return LiveIns.begin();
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+uint64_t MachineBasicBlock::computeStructuralHash() {
|
|
+ return StructuralHash(*this);
|
|
+}
|
|
+
|
|
+void MachineBasicBlock::initCodeRegion() {
|
|
+ std::string BasicBlockName =
|
|
+ ("%bb." + Twine(this->getNumber()) + ":" + this->getName()).str();
|
|
+ MachineFunction *MF = this->getParent();
|
|
+ StringRef FuncName = MF->getName();
|
|
+
|
|
+ autotuning::CodeRegion CR;
|
|
+ if (!this->empty()) {
|
|
+ const DebugLoc &StartLoc = this->front().getDebugLoc();
|
|
+ CR = autotuning::CodeRegion(BasicBlockName, FuncName.data(),
|
|
+ autotuning::CodeRegionType::MachineBasicBlock,
|
|
+ StartLoc);
|
|
+ } else {
|
|
+ CR = autotuning::CodeRegion(BasicBlockName, FuncName.data(),
|
|
+ autotuning::CodeRegionType::MachineBasicBlock);
|
|
+ }
|
|
+ // Compute the number of non-debug IR instructions in this MBB.
|
|
+ unsigned NumInstrs = std::distance(this->getFirstNonDebugInstr(),
|
|
+ this->getLastNonDebugInstr());
|
|
+ CR.setSize(NumInstrs);
|
|
+ // Compute hotness.
|
|
+ autotuning::HotnessType Hotness = MF->getFunction().ATEFunction.getHotness();
|
|
+ CR.setHotness(Hotness);
|
|
+
|
|
+ this->setCodeRegion(CR);
|
|
+}
|
|
+#endif
|
|
+
|
|
MachineBasicBlock::liveout_iterator MachineBasicBlock::liveout_begin() const {
|
|
const MachineFunction &MF = *getParent();
|
|
assert(MF.getProperties().hasProperty(
|
|
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
|
|
index ba5432459d12..caccc9e5fad4 100644
|
|
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
|
|
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
|
|
@@ -569,6 +569,12 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
|
|
for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
|
|
MBB != MBBEnd; ++MBB) {
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ // before visiting this MBB
|
|
+ // if AutoTuning is enabled, initialize this MBB for auto-tuning
|
|
+ autotuning::Engine.initContainer(&*MBB, DEBUG_TYPE);
|
|
+#endif
|
|
+
|
|
Scheduler.startBlock(&*MBB);
|
|
|
|
#ifndef NDEBUG
|
|
@@ -3244,6 +3250,44 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
|
|
RegionPolicy.ShouldTrackLaneMasks = false;
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ // AUTO-TUNING - Look up for MMB level scheduling direction if AutoTuning is
|
|
+ // enabled
|
|
+ if (autotuning::Engine.isEnabled()) {
|
|
+ MachineBasicBlock &MBB = *Begin->getParent();
|
|
+
|
|
+ bool NewForceBottomUp = false;
|
|
+ // Look up from xml file, and overwrite values
|
|
+ bool IsForceBottomUpSet =
|
|
+ MBB.lookUpParams<bool>("ForceBottomUp", NewForceBottomUp);
|
|
+
|
|
+ bool NewForceForceTopDown = false;
|
|
+ bool IsForceTopDownSet =
|
|
+ MBB.lookUpParams<bool>("ForceTopDown", NewForceForceTopDown);
|
|
+
|
|
+ assert((!NewForceBottomUp || !NewForceForceTopDown) &&
|
|
+ "BottomUp and TopDown cannot both set to true");
|
|
+
|
|
+ if (IsForceBottomUpSet) {
|
|
+ RegionPolicy.OnlyBottomUp = NewForceBottomUp;
|
|
+ if (RegionPolicy.OnlyBottomUp) {
|
|
+ RegionPolicy.OnlyTopDown = false;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (IsForceTopDownSet) {
|
|
+ RegionPolicy.OnlyTopDown = NewForceForceTopDown;
|
|
+ if (RegionPolicy.OnlyTopDown) {
|
|
+ RegionPolicy.OnlyBottomUp = false;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (IsForceBottomUpSet || IsForceTopDownSet) {
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+
|
|
// Check -misched-topdown/bottomup can force or unforce scheduling direction.
|
|
// e.g. -misched-bottomup=false allows scheduling in both directions.
|
|
assert((!ForceTopDown || !ForceBottomUp) &&
|
|
diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
|
|
index 36a02d5beb4b..d4ac95d534ed 100644
|
|
--- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
|
|
+++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
|
|
@@ -16,6 +16,9 @@
|
|
#include "llvm/CodeGen/MachineJumpTableInfo.h"
|
|
#include "llvm/CodeGen/TargetLowering.h"
|
|
#include "llvm/Target/TargetMachine.h"
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/AutoTuner/AutoTuning.h"
|
|
+#endif
|
|
|
|
using namespace llvm;
|
|
using namespace SwitchCG;
|
|
@@ -61,7 +64,23 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
|
|
if (!TLI->areJTsAllowed(SI->getParent()->getParent()))
|
|
return;
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries();
|
|
+ // Overwrite MinJumpTableEntries when it is set by Autotuner
|
|
+ if (autotuning::Engine.isEnabled()) {
|
|
+ autotuning::Engine.initContainer(SI->ATESwitchInst.get(),
|
|
+ "switch-lowering");
|
|
+
|
|
+ int NewValue = 0; // the int value is set by lookUpParams()
|
|
+ bool Changed =
|
|
+ SI->ATESwitchInst->lookUpParams<int>("MinJumpTableEntries", NewValue);
|
|
+ if (Changed)
|
|
+ MinJumpTableEntries = NewValue;
|
|
+ }
|
|
+#else
|
|
const unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries();
|
|
+#endif
|
|
+
|
|
const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2;
|
|
|
|
// Bail if not enough cases.
|
|
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
|
|
index df753b91ff90..af77e6c2dc4d 100644
|
|
--- a/llvm/lib/IR/AsmWriter.cpp
|
|
+++ b/llvm/lib/IR/AsmWriter.cpp
|
|
@@ -2602,11 +2602,21 @@ public:
|
|
void writeAllAttributeGroups();
|
|
|
|
void printTypeIdentities();
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ void printGlobal(const GlobalVariable *GV, bool PrintDeclarationOnly = false);
|
|
+ void printAlias(const GlobalAlias *GA);
|
|
+ void printIFunc(const GlobalIFunc *GI);
|
|
+ void printComdat(const Comdat *C);
|
|
+ void printRequisiteDeclarations(const Function *F);
|
|
+ void printFunction(const Function *F, bool PrintCompleteIR = false,
|
|
+ bool PrintDeclarationOnly = false);
|
|
+#else
|
|
void printGlobal(const GlobalVariable *GV);
|
|
void printAlias(const GlobalAlias *GA);
|
|
void printIFunc(const GlobalIFunc *GI);
|
|
void printComdat(const Comdat *C);
|
|
void printFunction(const Function *F);
|
|
+#endif
|
|
void printArgument(const Argument *FA, AttributeSet Attrs);
|
|
void printBasicBlock(const BasicBlock *BB);
|
|
void printInstructionLine(const Instruction &I);
|
|
@@ -3593,15 +3603,26 @@ static void maybePrintComdat(formatted_raw_ostream &Out,
|
|
Out << ')';
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+void AssemblyWriter::printGlobal(const GlobalVariable *GV,
|
|
+ bool PrintDeclarationOnly) {
|
|
+ if (GV->isMaterializable() && !PrintDeclarationOnly)
|
|
+#else
|
|
void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
|
|
if (GV->isMaterializable())
|
|
+#endif
|
|
Out << "; Materializable\n";
|
|
|
|
AsmWriterContext WriterCtx(&TypePrinter, &Machine, GV->getParent());
|
|
WriteAsOperandInternal(Out, GV, WriterCtx);
|
|
Out << " = ";
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if ((!GV->hasInitializer() || PrintDeclarationOnly) &&
|
|
+ GV->hasExternalLinkage())
|
|
+#else
|
|
if (!GV->hasInitializer() && GV->hasExternalLinkage())
|
|
+#endif
|
|
Out << "external ";
|
|
|
|
Out << getLinkageNameWithSpace(GV->getLinkage());
|
|
@@ -3619,7 +3640,11 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
|
|
Out << (GV->isConstant() ? "constant " : "global ");
|
|
TypePrinter.print(GV->getValueType(), Out);
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (GV->hasInitializer() && !PrintDeclarationOnly) {
|
|
+#else
|
|
if (GV->hasInitializer()) {
|
|
+#endif
|
|
Out << ' ';
|
|
writeOperand(GV->getInitializer(), false);
|
|
}
|
|
@@ -3769,12 +3794,102 @@ void AssemblyWriter::printTypeIdentities() {
|
|
}
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+/// printRequisiteDeclarations - Print the declarations of type identities,
|
|
+/// global variables, functions, and function attribute groups of a function.
|
|
+void AssemblyWriter::printRequisiteDeclarations(const Function *F) {
|
|
+ // walk through instructions and collect global variables & functions
|
|
+ SmallPtrSet<GlobalVariable *, 8> GVs;
|
|
+ SmallPtrSet<Function *, 8> Functions;
|
|
+ for (const BasicBlock &BB : *F) {
|
|
+ for (const Instruction &I : BB) {
|
|
+ // Check for function
|
|
+ if (const auto *CI = dyn_cast<CallInst>(&I)) {
|
|
+ Function *func = CI->getCalledFunction();
|
|
+ if (func)
|
|
+ Functions.insert(func);
|
|
+ }
|
|
+ // Check for global variables
|
|
+ for (const Use &U : I.operands()) {
|
|
+ if (GlobalVariable *gv = dyn_cast<GlobalVariable>(U))
|
|
+ GVs.insert(gv);
|
|
+ if (GEPOperator *gepo = dyn_cast<GEPOperator>(&U)) {
|
|
+ if (GlobalVariable *gv =
|
|
+ dyn_cast<GlobalVariable>(gepo->getPointerOperand()))
|
|
+ GVs.insert(gv);
|
|
+ for (auto it = gepo->idx_begin(), et = gepo->idx_end(); it != et;
|
|
+ ++it) {
|
|
+ if (GlobalVariable *gv = dyn_cast<GlobalVariable>(*it))
|
|
+ GVs.insert(gv);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // print type identities
|
|
+ printTypeIdentities();
|
|
+
|
|
+ // print global variables
|
|
+ if (!GVs.empty()) {
|
|
+ Out << '\n';
|
|
+ for (auto GVit = GVs.begin(), et = GVs.end(); GVit != et; ++GVit) {
|
|
+ // Make backups of some properties. They may be modified for printing.
|
|
+ GlobalValue::LinkageTypes SavedLinkage = (*GVit)->getLinkage();
|
|
+ GlobalVariable::VisibilityTypes SavedVisibility =
|
|
+ (*GVit)->getVisibility();
|
|
+
|
|
+ // modify property if needed
|
|
+ if (!(*GVit)->hasAvailableExternallyLinkage() &&
|
|
+ !((*GVit)->getName() == "llvm.global_ctors") &&
|
|
+ (*GVit)->hasLocalLinkage()) {
|
|
+ (*GVit)->setLinkage(GlobalValue::ExternalLinkage);
|
|
+ (*GVit)->setVisibility(GlobalValue::HiddenVisibility);
|
|
+ }
|
|
+
|
|
+ printGlobal(*GVit, true);
|
|
+ Out << '\n';
|
|
+
|
|
+ // restore backups
|
|
+ (*GVit)->setLinkage(SavedLinkage);
|
|
+ (*GVit)->setVisibility(SavedVisibility);
|
|
+ }
|
|
+ Out << '\n';
|
|
+ }
|
|
+
|
|
+ // print functions
|
|
+ for (auto FuncIt = Functions.begin(), et = Functions.end(); FuncIt != et;
|
|
+ ++FuncIt) {
|
|
+ Out << '\n';
|
|
+ printFunction(*FuncIt, false, true);
|
|
+ }
|
|
+
|
|
+ // Write attribute groups.
|
|
+ if (!Machine.as_empty()) {
|
|
+ Out << '\n';
|
|
+ writeAllAttributeGroups();
|
|
+ }
|
|
+ Out << '\n';
|
|
+}
|
|
+
|
|
/// printFunction - Print all aspects of a function.
|
|
+void AssemblyWriter::printFunction(const Function *F, bool PrintCompleteIR,
|
|
+ bool PrintDeclarationOnly) {
|
|
+ if (PrintCompleteIR && !PrintDeclarationOnly) {
|
|
+ printRequisiteDeclarations(F);
|
|
+ }
|
|
+ if (AnnotationWriter && !PrintDeclarationOnly)
|
|
+ AnnotationWriter->emitFunctionAnnot(F, Out);
|
|
+
|
|
+ if (F->isMaterializable() && !PrintDeclarationOnly)
|
|
+ Out << "; Materializable\n";
|
|
+#else
|
|
void AssemblyWriter::printFunction(const Function *F) {
|
|
if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out);
|
|
|
|
if (F->isMaterializable())
|
|
Out << "; Materializable\n";
|
|
+#endif
|
|
|
|
const AttributeList &Attrs = F->getAttributes();
|
|
if (Attrs.hasFnAttrs()) {
|
|
@@ -3792,6 +3907,18 @@ void AssemblyWriter::printFunction(const Function *F) {
|
|
Out << "; Function Attrs: " << AttrStr << '\n';
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (!PrintDeclarationOnly)
|
|
+ Machine.incorporateFunction(F);
|
|
+
|
|
+ if (F->isDeclaration() || PrintDeclarationOnly) {
|
|
+ Out << "declare";
|
|
+ if (!PrintDeclarationOnly) {
|
|
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
|
|
+ F->getAllMetadata(MDs);
|
|
+ printMetadataAttachments(MDs, " ");
|
|
+ }
|
|
+#else
|
|
Machine.incorporateFunction(F);
|
|
|
|
if (F->isDeclaration()) {
|
|
@@ -3799,6 +3926,7 @@ void AssemblyWriter::printFunction(const Function *F) {
|
|
SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
|
|
F->getAllMetadata(MDs);
|
|
printMetadataAttachments(MDs, " ");
|
|
+#endif
|
|
Out << ' ';
|
|
} else
|
|
Out << "define ";
|
|
@@ -3824,7 +3952,11 @@ void AssemblyWriter::printFunction(const Function *F) {
|
|
Out << '(';
|
|
|
|
// Loop over the arguments, printing them...
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if ((F->isDeclaration() && !IsForDebug) || PrintDeclarationOnly) {
|
|
+#else
|
|
if (F->isDeclaration() && !IsForDebug) {
|
|
+#endif
|
|
// We're only interested in the type here - don't print argument names.
|
|
for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) {
|
|
// Insert commas as we go... the first arg doesn't get a comma
|
|
@@ -3895,7 +4027,11 @@ void AssemblyWriter::printFunction(const Function *F) {
|
|
writeOperand(F->getPersonalityFn(), /*PrintType=*/true);
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (F->isDeclaration() || PrintDeclarationOnly) {
|
|
+#else
|
|
if (F->isDeclaration()) {
|
|
+#endif
|
|
Out << '\n';
|
|
} else {
|
|
SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
|
|
@@ -3913,6 +4049,13 @@ void AssemblyWriter::printFunction(const Function *F) {
|
|
Out << "}\n";
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ // Output metadata
|
|
+ if (!Machine.mdn_empty() && PrintCompleteIR && !PrintDeclarationOnly) {
|
|
+ Out << '\n';
|
|
+ writeAllMDNodes();
|
|
+ }
|
|
+#endif
|
|
Machine.purgeFunction();
|
|
}
|
|
|
|
@@ -4591,13 +4734,21 @@ void AssemblyWriter::printUseLists(const Function *F) {
|
|
|
|
void Function::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW,
|
|
bool ShouldPreserveUseListOrder,
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ bool IsForDebug, bool PrintCompleteIR) const {
|
|
+#else
|
|
bool IsForDebug) const {
|
|
+#endif
|
|
SlotTracker SlotTable(this->getParent());
|
|
formatted_raw_ostream OS(ROS);
|
|
AssemblyWriter W(OS, SlotTable, this->getParent(), AAW,
|
|
IsForDebug,
|
|
ShouldPreserveUseListOrder);
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ W.printFunction(this, PrintCompleteIR);
|
|
+#else
|
|
W.printFunction(this);
|
|
+#endif
|
|
}
|
|
|
|
void BasicBlock::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW,
|
|
diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt
|
|
index 217fe703dd4e..d44d1eea9f3e 100644
|
|
--- a/llvm/lib/IR/CMakeLists.txt
|
|
+++ b/llvm/lib/IR/CMakeLists.txt
|
|
@@ -78,6 +78,7 @@ add_llvm_component_library(LLVMCore
|
|
intrinsics_gen
|
|
|
|
LINK_COMPONENTS
|
|
+ AutoTuner
|
|
BinaryFormat
|
|
Demangle
|
|
Remarks
|
|
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
|
|
index 435800d9e5f9..ec2620efac38 100644
|
|
--- a/llvm/lib/IR/Function.cpp
|
|
+++ b/llvm/lib/IR/Function.cpp
|
|
@@ -70,6 +70,10 @@
|
|
#include <cstring>
|
|
#include <string>
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/IR/StructuralHash.h"
|
|
+#endif
|
|
+
|
|
using namespace llvm;
|
|
using ProfileCount = Function::ProfileCount;
|
|
|
|
@@ -1977,6 +1981,36 @@ std::optional<StringRef> Function::getSectionPrefix() const {
|
|
return std::nullopt;
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+uint64_t AutoTuningEnabledFunction::computeStructuralHash() {
|
|
+ return StructuralHash(*(this->Func));
|
|
+}
|
|
+
|
|
+void AutoTuningEnabledFunction::initCodeRegion() {
|
|
+ StringRef FuncName = Func->getName();
|
|
+ StringRef EntryBBName;
|
|
+ autotuning::SourceLocation Loc;
|
|
+
|
|
+ if (!Func->empty())
|
|
+ EntryBBName = Func->front().getName();
|
|
+ else
|
|
+ EntryBBName = StringRef("None");
|
|
+
|
|
+ DISubprogram *SubProgram = Func->getSubprogram();
|
|
+ if (SubProgram)
|
|
+ // Set the column number to 0 because there is no information about
|
|
+ // column number for functions.
|
|
+ Loc = {SubProgram->getFilename().str(), SubProgram->getLine(), 0};
|
|
+
|
|
+ autotuning::CodeRegion CR =
|
|
+ autotuning::CodeRegion(EntryBBName.data(), FuncName.data(),
|
|
+ autotuning::CodeRegionType::Function, Loc);
|
|
+ CR.setSize(Func->getInstructionCount());
|
|
+ CR.setHotness(this->getHotness());
|
|
+ this->setCodeRegion(CR);
|
|
+}
|
|
+#endif
|
|
+
|
|
bool Function::nullPointerIsDefined() const {
|
|
return hasFnAttribute(Attribute::NullPointerIsValid);
|
|
}
|
|
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
|
|
index cb0ac0f8eae6..e614285df07a 100644
|
|
--- a/llvm/lib/IR/Instructions.cpp
|
|
+++ b/llvm/lib/IR/Instructions.cpp
|
|
@@ -45,6 +45,9 @@
|
|
#include <cstdint>
|
|
#include <optional>
|
|
#include <vector>
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/IR/StructuralHash.h"
|
|
+#endif
|
|
|
|
using namespace llvm;
|
|
|
|
@@ -259,6 +262,89 @@ void LandingPadInst::addClause(Constant *Val) {
|
|
getOperandList()[OpNo] = Val;
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+uint64_t AutoTuningEnabledSwitchInst::computeStructuralHash() {
|
|
+ return StructuralHash(*(this->SI));
|
|
+}
|
|
+
|
|
+void AutoTuningEnabledSwitchInst::initCodeRegion() {
|
|
+ std::string SwitchName;
|
|
+ if (this->SI->hasName()) {
|
|
+ SwitchName = this->SI->getName().str();
|
|
+ } else {
|
|
+ std::string Str;
|
|
+ llvm::raw_string_ostream RSO(Str);
|
|
+ this->SI->getCondition()->printAsOperand(RSO);
|
|
+ SwitchName = RSO.str();
|
|
+ }
|
|
+
|
|
+ autotuning::CodeRegion CR = autotuning::CodeRegion(
|
|
+ SwitchName, this->SI->getFunction()->getName().str(),
|
|
+ autotuning::CodeRegionType::Switch, this->SI->getDebugLoc());
|
|
+
|
|
+ unsigned TotalNumInsts = 0;
|
|
+ for (auto Case : SI->cases()) {
|
|
+ const BasicBlock *BB = Case.getCaseSuccessor();
|
|
+ unsigned NumInsts = std::distance(BB->instructionsWithoutDebug().begin(),
|
|
+ BB->instructionsWithoutDebug().end());
|
|
+ TotalNumInsts += NumInsts;
|
|
+ }
|
|
+
|
|
+ CR.setSize(TotalNumInsts);
|
|
+ // Compute hotness.
|
|
+ autotuning::HotnessType Hotness =
|
|
+ this->SI->getFunction()->ATEFunction.getHotness();
|
|
+ CR.setHotness(Hotness);
|
|
+
|
|
+ this->setCodeRegion(CR);
|
|
+}
|
|
+
|
|
+uint64_t AutoTuningEnabledCallSite::computeStructuralHash() {
|
|
+ return StructuralHash(*(this->CB));
|
|
+}
|
|
+
|
|
+void AutoTuningEnabledCallSite::initCodeRegion() {
|
|
+ // Use Caller's name as FuncName and Callee's name as Name of a CodeRegion.
|
|
+ Function *Caller = this->CB->getCaller();
|
|
+ Function *Callee = this->CB->getCalledFunction();
|
|
+ if (Caller == nullptr || Callee == nullptr) {
|
|
+ this->setCodeRegion(autotuning::CodeRegion::getInvalidInstance());
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ autotuning::SourceLocation SrcLoc;
|
|
+ if (this->CB->getDebugLoc()) {
|
|
+ unsigned int SourceLine = this->CB->getDebugLoc()->getLine();
|
|
+ // Get modified source line number for current callsite if there is another
|
|
+ // call instruction (to same callee) which has same source line number
|
|
+ // happened due to inlining.
|
|
+ std::optional<unsigned int> LineNum = autotuning::Engine.getCallSiteLoc(CB);
|
|
+ if (LineNum)
|
|
+ SourceLine = *LineNum;
|
|
+ SrcLoc = autotuning::SourceLocation{
|
|
+ this->CB->getDebugLoc()->getFilename().str(), SourceLine,
|
|
+ this->CB->getDebugLoc()->getColumn()};
|
|
+ }
|
|
+
|
|
+ // We are using DebugLoc to distinguish between multiple calls to the same
|
|
+ // callee in a function. It may be possible that these multiple calls have
|
|
+ // same DebugLoc either 1) due to inlining of multiple calls (same callee)
|
|
+ // and callee having more calls, or 2) cloned calls added by previous
|
|
+ // optimizations. We are using 'callee name + it's parent (basic block) name'
|
|
+ // to solve these problems. Additionally we are using modified line number
|
|
+ // for the issue # 1; this will handle the cases where the multiple calls are
|
|
+ // in the same basic block.
|
|
+ autotuning::CodeRegion CR = autotuning::CodeRegion(
|
|
+ Callee->getName().str() + "-" + this->CB->getParent()->getName().str(),
|
|
+ Caller->getName().data(), autotuning::CodeRegionType::CallSite, SrcLoc,
|
|
+ autotuning::DynamicOptions{{"ForceInline", {0, 1}}});
|
|
+
|
|
+ CR.setSize(Callee->getInstructionCount());
|
|
+ CR.setHotness(Caller->ATEFunction.getHotness());
|
|
+ this->setCodeRegion(CR);
|
|
+}
|
|
+#endif
|
|
+
|
|
//===----------------------------------------------------------------------===//
|
|
// CallBase Implementation
|
|
//===----------------------------------------------------------------------===//
|
|
diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp
|
|
index 6ea108d831a1..1583e1c82b3e 100644
|
|
--- a/llvm/lib/IR/StructuralHash.cpp
|
|
+++ b/llvm/lib/IR/StructuralHash.cpp
|
|
@@ -10,9 +10,23 @@
|
|
#include "llvm/IR/Function.h"
|
|
#include "llvm/IR/GlobalVariable.h"
|
|
#include "llvm/IR/Module.h"
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/CodeGen/MachineBasicBlock.h"
|
|
+#include "llvm/IR/InstrTypes.h"
|
|
+#include "llvm/IR/Instructions.h"
|
|
+#include "llvm/Support/CommandLine.h"
|
|
+#endif
|
|
|
|
using namespace llvm;
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+// AutoTuner Flag to use callsite Debug Location for hash cacluation.
|
|
+static cl::opt<bool> HashCallSite(
|
|
+ "hash-prior-to-callsite", cl::init(true), cl::Hidden,
|
|
+ cl::desc("Use function IR prior to a call site to compute the hashcode for"
|
|
+ " the call site"));
|
|
+#endif
|
|
+
|
|
namespace {
|
|
|
|
// Basic hashing mechanism to detect structural change to the IR, used to verify
|
|
@@ -21,16 +35,81 @@ namespace {
|
|
|
|
class StructuralHashImpl {
|
|
hash_code Hash;
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ const uint64_t BLOCK_HEADER_HASH = 45798;
|
|
+#endif
|
|
|
|
template <typename T> void hash(const T &V) { Hash = hash_combine(Hash, V); }
|
|
|
|
public:
|
|
StructuralHashImpl() : Hash(4) {}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ void update(const MachineBasicBlock &MBB) {
|
|
+ // Update the structural hash when we encounter a new basic block.
|
|
+ // Prevents CodeRegions with different structures, but many empty
|
|
+ // BasicBlocks to have the same structural hash.
|
|
+ if (const BasicBlock *Block = MBB.getBasicBlock()) {
|
|
+ hash(BLOCK_HEADER_HASH); // Block header
|
|
+ for (auto &Inst : *Block)
|
|
+ hash(Inst.getOpcode());
|
|
+ }
|
|
+ }
|
|
+
|
|
+ void update(const std::vector<BasicBlock *> BBs) {
|
|
+ // Update the structural hash when we encounter a new basic block.
|
|
+ // Prevents CodeRegions with different structures, but many empty
|
|
+ // BasicBlocks to have the same structural hash.
|
|
+ for (BasicBlock *BB : BBs) {
|
|
+ if (BB == nullptr)
|
|
+ continue;
|
|
+
|
|
+ hash(BLOCK_HEADER_HASH); // Block header
|
|
+ for (auto &Inst : *BB)
|
|
+ hash(Inst.getOpcode());
|
|
+ }
|
|
+ }
|
|
+
|
|
+ void update(const llvm::CallBase &CB) {
|
|
+ StringRef Name = "";
|
|
+ if (HashCallSite) {
|
|
+ update(*CB.getCaller(), std::addressof(CB));
|
|
+ } else {
|
|
+ const Function &F = *CB.getCaller();
|
|
+ Name = F.getName();
|
|
+ std::string FileName = Name.str();
|
|
+ for (uint64_t Idx = 0; Idx < Name.size(); Idx = Idx + sizeof(uint64_t)) {
|
|
+ uint64_t Value = 0;
|
|
+ FileName.copy((char *)&Value, sizeof(uint64_t), Idx);
|
|
+ hash(Value);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ update(*CB.getCalledFunction());
|
|
+ }
|
|
+
|
|
+ void update(const SwitchInst &SI) {
|
|
+ hash(SI.getNumCases());
|
|
+ for (auto Case : SI.cases()) {
|
|
+ hash(BLOCK_HEADER_HASH);
|
|
+ const BasicBlock *BB = Case.getCaseSuccessor();
|
|
+ for (auto &Inst : *BB)
|
|
+ hash(Inst.getOpcode());
|
|
+ }
|
|
+ }
|
|
+
|
|
+ void update(const Function &F, const CallBase *TargetCB = nullptr) {
|
|
+ if (F.isDeclaration())
|
|
+ return;
|
|
+
|
|
+ const Instruction *I =
|
|
+ TargetCB ? (dyn_cast<Instruction>(TargetCB)) : nullptr;
|
|
+#else
|
|
void update(const Function &F) {
|
|
// Declarations don't affect analyses.
|
|
if (F.isDeclaration())
|
|
return;
|
|
+#endif
|
|
|
|
hash(12345); // Function header
|
|
|
|
@@ -44,9 +123,18 @@ public:
|
|
VisitedBBs.insert(BBs[0]);
|
|
while (!BBs.empty()) {
|
|
const BasicBlock *BB = BBs.pop_back_val();
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ hash(BLOCK_HEADER_HASH); // Block header
|
|
+ for (auto &Inst : *BB) {
|
|
+ hash(Inst.getOpcode());
|
|
+ if (I && Inst.isIdenticalTo(I))
|
|
+ return;
|
|
+ }
|
|
+#else
|
|
hash(45798); // Block header
|
|
for (auto &Inst : *BB)
|
|
hash(Inst.getOpcode());
|
|
+#endif
|
|
|
|
const Instruction *Term = BB->getTerminator();
|
|
for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
|
|
@@ -79,6 +167,32 @@ public:
|
|
|
|
} // namespace
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+uint64_t llvm::StructuralHash(const MachineBasicBlock &MBB) {
|
|
+ StructuralHashImpl H;
|
|
+ H.update(MBB);
|
|
+ return H.getHash();
|
|
+}
|
|
+
|
|
+uint64_t llvm::StructuralHash(const std::vector<BasicBlock *> BBs) {
|
|
+ StructuralHashImpl H;
|
|
+ H.update(BBs);
|
|
+ return H.getHash();
|
|
+}
|
|
+
|
|
+uint64_t llvm::StructuralHash(const CallBase &CB) {
|
|
+ StructuralHashImpl H;
|
|
+ H.update(CB);
|
|
+ return H.getHash();
|
|
+}
|
|
+
|
|
+uint64_t llvm::StructuralHash(const SwitchInst &SI) {
|
|
+ StructuralHashImpl H;
|
|
+ H.update(SI);
|
|
+ return H.getHash();
|
|
+}
|
|
+#endif
|
|
+
|
|
uint64_t llvm::StructuralHash(const Function &F) {
|
|
StructuralHashImpl H;
|
|
H.update(F);
|
|
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
|
|
index d0cbbcc0e310..a3ccbc6d258f 100644
|
|
--- a/llvm/lib/Passes/PassBuilder.cpp
|
|
+++ b/llvm/lib/Passes/PassBuilder.cpp
|
|
@@ -262,6 +262,11 @@
|
|
#include "llvm/Transforms/Vectorize/VectorCombine.h"
|
|
#include <optional>
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/Analysis/AutotuningDump.h"
|
|
+#include "llvm/Transforms/Scalar/AutoTuningCompile.h"
|
|
+#endif
|
|
+
|
|
using namespace llvm;
|
|
|
|
static const Regex DefaultAliasRegex(
|
|
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
|
|
index 660cb2e974d7..8009e011833c 100644
|
|
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
|
|
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
|
|
@@ -133,6 +133,11 @@
|
|
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
|
|
#include "llvm/Transforms/Vectorize/VectorCombine.h"
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/AutoTuner/AutoTuning.h"
|
|
+#include "llvm/Transforms/Scalar/AutoTuningCompile.h"
|
|
+#endif
|
|
+
|
|
using namespace llvm;
|
|
|
|
static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
|
|
@@ -289,6 +294,10 @@ PipelineTuningOptions::PipelineTuningOptions() {
|
|
EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+extern cl::opt<AutoTuningCompileOpt> AutoTuningCompileMode;
|
|
+#endif
|
|
+
|
|
namespace llvm {
|
|
extern cl::opt<unsigned> MaxDevirtIterations;
|
|
extern cl::opt<bool> EnableKnowledgeRetention;
|
|
@@ -452,9 +461,17 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
|
|
// attention to it.
|
|
if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
|
|
PGOOpt->Action != PGOOptions::SampleUse)
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ {
|
|
+ if (AutoTuningCompileMode)
|
|
+ LPM2.addPass(AutoTuningCompileLoopPass(autotuning::CompileOptionUnroll));
|
|
+#endif
|
|
LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
|
|
/* OnlyWhenForced= */ !PTO.LoopUnrolling,
|
|
PTO.ForgetAllSCEVInLoopUnroll));
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ }
|
|
+#endif
|
|
|
|
invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
|
|
|
|
@@ -631,9 +648,17 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
|
|
// attention to it.
|
|
if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
|
|
PGOOpt->Action != PGOOptions::SampleUse)
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ {
|
|
+ if (AutoTuningCompileMode)
|
|
+ LPM2.addPass(AutoTuningCompileLoopPass(autotuning::CompileOptionUnroll));
|
|
+#endif
|
|
LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
|
|
/* OnlyWhenForced= */ !PTO.LoopUnrolling,
|
|
PTO.ForgetAllSCEVInLoopUnroll));
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ }
|
|
+#endif
|
|
|
|
invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
|
|
|
|
@@ -1110,6 +1135,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
|
|
if (EnableSyntheticCounts && !PGOOpt)
|
|
MPM.addPass(SyntheticCountsPropagation());
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (AutoTuningCompileMode)
|
|
+ MPM.addPass(AutoTuningCompileModulePass(autotuning::CompileOptionInline));
|
|
+#endif
|
|
+
|
|
if (EnableModuleInliner)
|
|
MPM.addPass(buildModuleInlinerPipeline(Level, Phase));
|
|
else
|
|
@@ -1131,6 +1161,12 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
|
|
/// TODO: Should LTO cause any differences to this set of passes?
|
|
void PassBuilder::addVectorPasses(OptimizationLevel Level,
|
|
FunctionPassManager &FPM, bool IsFullLTO) {
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (AutoTuningCompileMode && !IsFullLTO)
|
|
+ FPM.addPass(
|
|
+ AutoTuningCompileFunctionPass(autotuning::CompileOptionVectorize));
|
|
+#endif
|
|
+
|
|
FPM.addPass(LoopVectorizePass(
|
|
LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
|
|
|
|
@@ -1444,6 +1480,10 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
|
|
return buildO0DefaultPipeline(Level, LTOPreLink);
|
|
|
|
ModulePassManager MPM;
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (AutoTuningCompileMode)
|
|
+ MPM.addPass(AutoTuningCompileModulePass(autotuning::CompileOptionStart));
|
|
+#endif
|
|
|
|
// Convert @llvm.global.annotations to !annotation metadata.
|
|
MPM.addPass(Annotation2MetadataPass());
|
|
@@ -1475,6 +1515,12 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
|
|
|
|
if (LTOPreLink)
|
|
addRequiredLTOPreLinkPasses(MPM);
|
|
+
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (AutoTuningCompileMode)
|
|
+ MPM.addPass(AutoTuningCompileModulePass(autotuning::CompileOptionEnd));
|
|
+#endif
|
|
+
|
|
return MPM;
|
|
}
|
|
|
|
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
|
|
index e10dc995c493..45a539f14b93 100644
|
|
--- a/llvm/lib/Passes/PassRegistry.def
|
|
+++ b/llvm/lib/Passes/PassRegistry.def
|
|
@@ -29,6 +29,10 @@ MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
|
|
MODULE_ANALYSIS("inline-advisor", InlineAdvisorAnalysis())
|
|
MODULE_ANALYSIS("ir-similarity", IRSimilarityAnalysis())
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+MODULE_ANALYSIS("autotuning-dump", AutotuningDumpAnalysis())
|
|
+#endif
|
|
+
|
|
#ifndef MODULE_ALIAS_ANALYSIS
|
|
#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \
|
|
MODULE_ANALYSIS(NAME, CREATE_PASS)
|
|
@@ -127,6 +131,9 @@ MODULE_PASS("sanmd-module", SanitizerBinaryMetadataPass())
|
|
MODULE_PASS("memprof-module", ModuleMemProfilerPass())
|
|
MODULE_PASS("poison-checking", PoisonCheckingPass())
|
|
MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass())
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+MODULE_PASS("autotuning-compile-module", AutoTuningCompileModulePass())
|
|
+#endif
|
|
#undef MODULE_PASS
|
|
|
|
#ifndef MODULE_PASS_WITH_PARAMS
|
|
@@ -430,6 +437,9 @@ FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
|
|
FUNCTION_PASS("tsan", ThreadSanitizerPass())
|
|
FUNCTION_PASS("memprof", MemProfilerPass())
|
|
FUNCTION_PASS("declare-to-assign", llvm::AssignmentTrackingPass())
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+FUNCTION_PASS("autotuning-compile-function", AutoTuningCompileFunctionPass())
|
|
+#endif
|
|
#undef FUNCTION_PASS
|
|
|
|
#ifndef FUNCTION_PASS_WITH_PARAMS
|
|
@@ -614,6 +624,9 @@ LOOP_PASS("guard-widening", GuardWideningPass())
|
|
LOOP_PASS("loop-bound-split", LoopBoundSplitPass())
|
|
LOOP_PASS("loop-reroll", LoopRerollPass())
|
|
LOOP_PASS("loop-versioning-licm", LoopVersioningLICMPass())
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+LOOP_PASS("autotuning-compile-loop", AutoTuningCompileLoopPass())
|
|
+#endif
|
|
#undef LOOP_PASS
|
|
|
|
#ifndef LOOP_PASS_WITH_PARAMS
|
|
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
|
|
index 7eef511928ec..8653027ceed2 100644
|
|
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
|
|
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
|
|
@@ -41,6 +41,10 @@
|
|
#include <unordered_set>
|
|
#include <utility>
|
|
#include <vector>
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/AutoTuner/AutoTuning.h"
|
|
+#include "llvm/Transforms/Scalar/AutoTuningCompile.h"
|
|
+#endif
|
|
|
|
using namespace llvm;
|
|
|
|
@@ -107,6 +111,10 @@ static cl::opt<bool> PrintOnCrash(
|
|
cl::desc("Print the last form of the IR before crash (use -print-on-crash-path to dump to a file)"),
|
|
cl::Hidden);
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+extern cl::opt<AutoTuningCompileOpt> AutoTuningCompileMode;
|
|
+#endif
|
|
+
|
|
static cl::opt<std::string> OptBisectPrintIRPath(
|
|
"opt-bisect-print-ir-path",
|
|
cl::desc("Print IR to path when opt-bisect-limit is reached"), cl::Hidden);
|
|
@@ -874,6 +882,21 @@ bool OptPassGateInstrumentation::shouldRun(StringRef PassName, Any IR) {
|
|
|
|
void OptPassGateInstrumentation::registerCallbacks(
|
|
PassInstrumentationCallbacks &PIC) {
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ // Using AutoTuner OptBisect to change the behavior of compilation pipeline.
|
|
+ // Flag 'opt-bisect-limit' will be preferred if both 'opt-bisect-limit' and
|
|
+ // incremental compilation flags are used.
|
|
+ if (autotuning::Engine.isParseInput() && AutoTuningCompileMode) {
|
|
+ if (!getAutoTuningOptPassGate().isEnabled())
|
|
+ return;
|
|
+
|
|
+ PIC.registerShouldRunOptionalPassCallback([](StringRef PassID, Any IR) {
|
|
+ return isIgnored(PassID) ||
|
|
+ getAutoTuningOptPassGate().checkPass(PassID, getIRName(IR));
|
|
+ });
|
|
+ return;
|
|
+ }
|
|
+#endif
|
|
OptPassGate &PassGate = Context.getOptPassGate();
|
|
if (!PassGate.isEnabled())
|
|
return;
|
|
diff --git a/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp b/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp
|
|
index b2627196bce6..b1dfa9d0f2cf 100644
|
|
--- a/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp
|
|
+++ b/llvm/lib/Remarks/BitstreamRemarkSerializer.cpp
|
|
@@ -277,6 +277,14 @@ void BitstreamRemarkSerializerHelper::emitRemarkBlock(const Remark &Remark,
|
|
R.push_back(StrTab.add(Remark.RemarkName).first);
|
|
R.push_back(StrTab.add(Remark.PassName).first);
|
|
R.push_back(StrTab.add(Remark.FunctionName).first);
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (Remark.CodeRegionType)
|
|
+ R.push_back(StrTab.add(*Remark.CodeRegionType).first);
|
|
+ if (std::optional<uint64_t> hash = Remark.CodeRegionHash)
|
|
+ R.push_back(*hash);
|
|
+ if (std::optional<unsigned int> Invocation = Remark.Invocation)
|
|
+ R.push_back(*Invocation);
|
|
+#endif
|
|
Bitstream.EmitRecordWithAbbrev(RecordRemarkHeaderAbbrevID, R);
|
|
|
|
if (const std::optional<RemarkLocation> &Loc = Remark.Loc) {
|
|
diff --git a/llvm/lib/Remarks/RemarkStreamer.cpp b/llvm/lib/Remarks/RemarkStreamer.cpp
|
|
index 9f4676ce37ab..d1faf4f1553a 100644
|
|
--- a/llvm/lib/Remarks/RemarkStreamer.cpp
|
|
+++ b/llvm/lib/Remarks/RemarkStreamer.cpp
|
|
@@ -14,6 +14,10 @@
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include <optional>
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/IR/DebugInfoMetadata.h"
|
|
+#endif
|
|
+
|
|
using namespace llvm;
|
|
using namespace llvm::remarks;
|
|
|
|
diff --git a/llvm/lib/Remarks/YAMLRemarkParser.cpp b/llvm/lib/Remarks/YAMLRemarkParser.cpp
|
|
index f5123b0f64ce..baa393c6a619 100644
|
|
--- a/llvm/lib/Remarks/YAMLRemarkParser.cpp
|
|
+++ b/llvm/lib/Remarks/YAMLRemarkParser.cpp
|
|
@@ -17,10 +17,23 @@
|
|
#include "llvm/Support/Endian.h"
|
|
#include "llvm/Support/Path.h"
|
|
#include <optional>
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/Support/CommandLine.h"
|
|
+#endif
|
|
|
|
using namespace llvm;
|
|
using namespace llvm::remarks;
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+// Creating code regions without meta data (e.g. debug Location, Function Name,
|
|
+// etc.).
|
|
+// This flag is added here instead of 'lib/AutoTuner/AutoTuning.cpp' to avoid
|
|
+// making LLVMRemarks dependent on LLVMCore.
|
|
+cl::opt<bool> OmitAutotuningMetadata(
|
|
+ "auto-tuning-omit-metadata", cl::Hidden, cl::init(false),
|
|
+ cl::desc("Include only code region hashes and types in opportunity files"));
|
|
+#endif
|
|
+
|
|
char YAMLParseError::ID = 0;
|
|
|
|
static void handleDiagnostic(const SMDiagnostic &Diag, void *Ctx) {
|
|
@@ -235,6 +248,23 @@ YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) {
|
|
TheRemark.FunctionName = *MaybeStr;
|
|
else
|
|
return MaybeStr.takeError();
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ } else if (KeyName == "CodeRegionType") {
|
|
+ if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
|
|
+ TheRemark.CodeRegionType = *MaybeStr;
|
|
+ else
|
|
+ return MaybeStr.takeError();
|
|
+ } else if (KeyName == "CodeRegionHash") {
|
|
+ if (Expected<uint64_t> MaybeULL = parseUnsignedLL(RemarkField))
|
|
+ TheRemark.CodeRegionHash = *MaybeULL;
|
|
+ else
|
|
+ return MaybeULL.takeError();
|
|
+ } else if (KeyName == "Invocation") {
|
|
+ if (Expected<unsigned int> MaybeULL = parseUnsignedLL(RemarkField))
|
|
+ TheRemark.Invocation = *MaybeULL;
|
|
+ else
|
|
+ return MaybeULL.takeError();
|
|
+#endif
|
|
} else if (KeyName == "Hotness") {
|
|
if (Expected<unsigned> MaybeU = parseUnsigned(RemarkField))
|
|
TheRemark.Hotness = *MaybeU;
|
|
@@ -261,11 +291,35 @@ YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) {
|
|
}
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ // Check if any of the mandatory fields are missing.
|
|
+ if (TheRemark.RemarkType == Type::AutoTuning) {
|
|
+ // We expect type, and pass to be present at least.
|
|
+ if (!TheRemark.CodeRegionType || TheRemark.PassName.empty())
|
|
+ return error("CodeRegionHash, CodeRegionType, or Pass missing.",
|
|
+ *RemarkEntry.getRoot());
|
|
+
|
|
+ // Sanity check for the correct command line option.
|
|
+ if (!OmitAutotuningMetadata && TheRemark.RemarkName.empty())
|
|
+ return error("Remark Name expected; enable -autotuning-omit-metadata.",
|
|
+ *RemarkEntry.getRoot());
|
|
+
|
|
+ if (!OmitAutotuningMetadata && TheRemark.FunctionName.empty())
|
|
+ return error(
|
|
+ "Remark Function Name expected; enable -autotuning-omit-metadata.",
|
|
+ *RemarkEntry.getRoot());
|
|
+ } else if (TheRemark.RemarkType == Type::Unknown ||
|
|
+ TheRemark.PassName.empty() || TheRemark.RemarkName.empty() ||
|
|
+ TheRemark.FunctionName.empty())
|
|
+ return error("Type, Pass, Name or Function missing.",
|
|
+ *RemarkEntry.getRoot());
|
|
+#else
|
|
// Check if any of the mandatory fields are missing.
|
|
if (TheRemark.RemarkType == Type::Unknown || TheRemark.PassName.empty() ||
|
|
TheRemark.RemarkName.empty() || TheRemark.FunctionName.empty())
|
|
return error("Type, Pass, Name or Function missing.",
|
|
*RemarkEntry.getRoot());
|
|
+#endif
|
|
|
|
return std::move(Result);
|
|
}
|
|
@@ -277,6 +331,9 @@ Expected<Type> YAMLRemarkParser::parseType(yaml::MappingNode &Node) {
|
|
.Case("!Analysis", remarks::Type::Analysis)
|
|
.Case("!AnalysisFPCommute", remarks::Type::AnalysisFPCommute)
|
|
.Case("!AnalysisAliasing", remarks::Type::AnalysisAliasing)
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ .Case("!AutoTuning", remarks::Type::AutoTuning)
|
|
+#endif
|
|
.Case("!Failure", remarks::Type::Failure)
|
|
.Default(remarks::Type::Unknown);
|
|
if (Type == remarks::Type::Unknown)
|
|
@@ -313,6 +370,31 @@ Expected<StringRef> YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) {
|
|
return Result;
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+Expected<std::vector<StringRef>>
|
|
+YAMLRemarkParser::parseStrVector(yaml::KeyValueNode &Node) {
|
|
+ std::vector<StringRef> Result;
|
|
+ auto *SequenceNode = dyn_cast<yaml::SequenceNode>(Node.getValue());
|
|
+ if (!SequenceNode)
|
|
+ return error("expected a value of sequence type.", Node);
|
|
+
|
|
+ for (yaml::Node &Element : *SequenceNode) {
|
|
+ auto *ScalarNode = dyn_cast<yaml::ScalarNode>(&Element);
|
|
+ if (!ScalarNode)
|
|
+ return error("expected a value of scalar type.", Element);
|
|
+ else {
|
|
+ StringRef Str = ScalarNode->getRawValue();
|
|
+ if (Str.front() == '\'')
|
|
+ Str = Str.drop_front();
|
|
+ if (Str.back() == '\'')
|
|
+ Str = Str.drop_back();
|
|
+ Result.push_back(Str);
|
|
+ }
|
|
+ }
|
|
+ return Result;
|
|
+}
|
|
+#endif
|
|
+
|
|
Expected<unsigned> YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) {
|
|
SmallVector<char, 4> Tmp;
|
|
auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
|
|
@@ -324,6 +406,19 @@ Expected<unsigned> YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) {
|
|
return UnsignedValue;
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+Expected<uint64_t> YAMLRemarkParser::parseUnsignedLL(yaml::KeyValueNode &Node) {
|
|
+ SmallVector<char, 4> Tmp;
|
|
+ if (auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue())) {
|
|
+ uint64_t UnsignedValue = 0;
|
|
+ if (Value->getValue(Tmp).getAsInteger(10, UnsignedValue))
|
|
+ return error("expected a value of integer type.", *Value);
|
|
+ return UnsignedValue;
|
|
+ }
|
|
+ return error("expected a value of scalar type.", Node);
|
|
+}
|
|
+#endif
|
|
+
|
|
Expected<RemarkLocation>
|
|
YAMLRemarkParser::parseDebugLoc(yaml::KeyValueNode &Node) {
|
|
auto *DebugLoc = dyn_cast<yaml::MappingNode>(Node.getValue());
|
|
@@ -374,6 +469,9 @@ Expected<Argument> YAMLRemarkParser::parseArg(yaml::Node &Node) {
|
|
|
|
std::optional<StringRef> KeyStr;
|
|
std::optional<StringRef> ValueStr;
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ std::optional<std::vector<StringRef>> ValueStrVector;
|
|
+#endif
|
|
std::optional<RemarkLocation> Loc;
|
|
|
|
for (yaml::KeyValueNode &ArgEntry : *ArgMap) {
|
|
@@ -400,11 +498,27 @@ Expected<Argument> YAMLRemarkParser::parseArg(yaml::Node &Node) {
|
|
if (ValueStr)
|
|
return error("only one string entry is allowed per argument.", ArgEntry);
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ // Try to parse the value to a string vector.
|
|
+ if (Expected<std::vector<StringRef>> MaybeStrVector =
|
|
+ parseStrVector(ArgEntry)) {
|
|
+ ValueStrVector = *MaybeStrVector;
|
|
+ ValueStr = "";
|
|
+ } else {
|
|
+ consumeError(MaybeStrVector.takeError());
|
|
+ // Try to parse the value.
|
|
+ if (Expected<StringRef> MaybeStr = parseStr(ArgEntry))
|
|
+ ValueStr = *MaybeStr;
|
|
+ else
|
|
+ return MaybeStr.takeError();
|
|
+ }
|
|
+#else
|
|
// Try to parse the value.
|
|
if (Expected<StringRef> MaybeStr = parseStr(ArgEntry))
|
|
ValueStr = *MaybeStr;
|
|
else
|
|
return MaybeStr.takeError();
|
|
+#endif
|
|
|
|
// Keep the key from the string.
|
|
KeyStr = KeyName;
|
|
@@ -412,10 +526,18 @@ Expected<Argument> YAMLRemarkParser::parseArg(yaml::Node &Node) {
|
|
|
|
if (!KeyStr)
|
|
return error("argument key is missing.", *ArgMap);
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (!ValueStr && !ValueStrVector)
|
|
+#else
|
|
if (!ValueStr)
|
|
+#endif
|
|
return error("argument value is missing.", *ArgMap);
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ return Argument{*KeyStr, *ValueStr, ValueStrVector, Loc};
|
|
+#else
|
|
return Argument{*KeyStr, *ValueStr, Loc};
|
|
+#endif
|
|
}
|
|
|
|
Expected<std::unique_ptr<Remark>> YAMLRemarkParser::next() {
|
|
diff --git a/llvm/lib/Remarks/YAMLRemarkParser.h b/llvm/lib/Remarks/YAMLRemarkParser.h
|
|
index 8ef72e16be74..141f10dd3900 100644
|
|
--- a/llvm/lib/Remarks/YAMLRemarkParser.h
|
|
+++ b/llvm/lib/Remarks/YAMLRemarkParser.h
|
|
@@ -91,6 +91,12 @@ protected:
|
|
Expected<RemarkLocation> parseDebugLoc(yaml::KeyValueNode &Node);
|
|
/// Parse an argument.
|
|
Expected<Argument> parseArg(yaml::Node &Node);
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ /// parse a vector of strings.
|
|
+ Expected<std::vector<StringRef>> parseStrVector(yaml::KeyValueNode &Node);
|
|
+ /// Parse one value to an unsigned long long.
|
|
+ Expected<uint64_t> parseUnsignedLL(yaml::KeyValueNode &Node);
|
|
+#endif
|
|
};
|
|
|
|
/// YAML with a string table to Remark parser.
|
|
diff --git a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
|
|
index 68285c3dde1b..1bc0f23f9221 100644
|
|
--- a/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
|
|
+++ b/llvm/lib/Remarks/YAMLRemarkSerializer.cpp
|
|
@@ -15,10 +15,45 @@
|
|
#include "llvm/Remarks/Remark.h"
|
|
#include "llvm/Support/FileSystem.h"
|
|
#include <optional>
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/Support/CommandLine.h"
|
|
+#endif
|
|
|
|
using namespace llvm;
|
|
using namespace llvm::remarks;
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+extern cl::opt<bool> OmitAutotuningMetadata;
|
|
+
|
|
+// Use the same keys whether we use a string table or not (respectively, T is an
|
|
+// unsigned or a StringRef).
|
|
+template <typename T>
|
|
+static void mapRemarkHeader(
|
|
+ yaml::IO &io, T PassName, T RemarkName, std::optional<RemarkLocation> RL,
|
|
+ T FunctionName, std::optional<StringRef> CodeRegionType,
|
|
+ std::optional<uint64_t> CodeRegionHash,
|
|
+ std::optional<unsigned int> Invocation,
|
|
+ std::optional<std::map<std::string, std::string>> BaselineConfig,
|
|
+ std::optional<std::map<std::string, std::vector<unsigned int>>>
|
|
+ AutoTunerOptions,
|
|
+ std::optional<uint64_t> Hotness, ArrayRef<Argument> Args) {
|
|
+ io.mapRequired("Pass", PassName);
|
|
+ if (!OmitAutotuningMetadata) {
|
|
+ io.mapRequired("Name", RemarkName);
|
|
+ io.mapOptional("DebugLoc", RL);
|
|
+ io.mapRequired("Function", FunctionName);
|
|
+ }
|
|
+ io.mapOptional("CodeRegionType", CodeRegionType);
|
|
+ io.mapOptional("CodeRegionHash", CodeRegionHash);
|
|
+ io.mapOptional("DynamicConfigs", AutoTunerOptions);
|
|
+ io.mapOptional("BaselineConfig", BaselineConfig);
|
|
+ io.mapOptional("Invocation", Invocation);
|
|
+ if (!OmitAutotuningMetadata) {
|
|
+ io.mapOptional("Hotness", Hotness);
|
|
+ io.mapOptional("Args", Args);
|
|
+ }
|
|
+}
|
|
+#else
|
|
// Use the same keys whether we use a string table or not (respectively, T is an
|
|
// unsigned or a StringRef).
|
|
template <typename T>
|
|
@@ -33,6 +68,7 @@ static void mapRemarkHeader(yaml::IO &io, T PassName, T RemarkName,
|
|
io.mapOptional("Hotness", Hotness);
|
|
io.mapOptional("Args", Args);
|
|
}
|
|
+#endif
|
|
|
|
namespace llvm {
|
|
namespace yaml {
|
|
@@ -53,6 +89,10 @@ template <> struct MappingTraits<remarks::Remark *> {
|
|
else if (io.mapTag("!AnalysisAliasing",
|
|
(Remark->RemarkType == Type::AnalysisAliasing)))
|
|
;
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ else if (io.mapTag("!AutoTuning", (Remark->RemarkType == Type::AutoTuning)))
|
|
+ ;
|
|
+#endif
|
|
else if (io.mapTag("!Failure", (Remark->RemarkType == Type::Failure)))
|
|
;
|
|
else
|
|
@@ -66,14 +106,58 @@ template <> struct MappingTraits<remarks::Remark *> {
|
|
unsigned NameID = StrTab.add(Remark->RemarkName).first;
|
|
unsigned FunctionID = StrTab.add(Remark->FunctionName).first;
|
|
mapRemarkHeader(io, PassID, NameID, Remark->Loc, FunctionID,
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ Remark->CodeRegionType, Remark->CodeRegionHash,
|
|
+ Remark->Invocation, Remark->BaselineConfig,
|
|
+ Remark->AutoTunerOptions, Remark->Hotness, Remark->Args);
|
|
+
|
|
+#else
|
|
Remark->Hotness, Remark->Args);
|
|
+#endif
|
|
} else {
|
|
mapRemarkHeader(io, Remark->PassName, Remark->RemarkName, Remark->Loc,
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ Remark->FunctionName, Remark->CodeRegionType,
|
|
+ Remark->CodeRegionHash, Remark->Invocation,
|
|
+ Remark->BaselineConfig, Remark->AutoTunerOptions,
|
|
+ Remark->Hotness, Remark->Args);
|
|
+#else
|
|
Remark->FunctionName, Remark->Hotness, Remark->Args);
|
|
+#endif
|
|
}
|
|
}
|
|
};
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+// YAML I/O to support dumping 'Values: { key: [...], ... }' in opportunity
|
|
+// files.
|
|
+template <>
|
|
+struct MappingTraits<std::map<std::string, std::vector<unsigned int>>> {
|
|
+ static void mapping(IO &io,
|
|
+ std::map<std::string, std::vector<unsigned int>> &OM) {
|
|
+ assert(io.outputting() && "input not yet implemented");
|
|
+
|
|
+ // Print as an abbreviated dictionary
|
|
+ llvm::yaml::StdMapStringCustomMappingTraitsImpl<
|
|
+ std::vector<unsigned int>>::output(io, OM);
|
|
+ }
|
|
+ // This sets the beginFlowMapping and endFlowMapping
|
|
+ static const bool flow = true;
|
|
+};
|
|
+
|
|
+template <> struct MappingTraits<std::map<std::string, std::string>> {
|
|
+ static void mapping(IO &io, std::map<std::string, std::string> &OM) {
|
|
+ assert(io.outputting() && "input not yet implemented");
|
|
+
|
|
+ // Print as an abbreviated dictionary
|
|
+ llvm::yaml::StdMapStringCustomMappingTraitsImpl<std::string>::output(io,
|
|
+ OM);
|
|
+ }
|
|
+ // This sets the beginFlowMapping and endFlowMapping
|
|
+ static const bool flow = true;
|
|
+};
|
|
+#endif
|
|
+
|
|
template <> struct MappingTraits<RemarkLocation> {
|
|
static void mapping(IO &io, RemarkLocation &RL) {
|
|
assert(io.outputting() && "input not yet implemented");
|
|
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
|
|
index d3efb8b67be5..b66415c0e9a9 100644
|
|
--- a/llvm/lib/Support/CommandLine.cpp
|
|
+++ b/llvm/lib/Support/CommandLine.cpp
|
|
@@ -127,6 +127,9 @@ static inline bool isPrefixedOrGrouping(const Option *O) {
|
|
O->getFormattingFlag() == cl::AlwaysPrefix;
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include <map>
|
|
+#endif
|
|
|
|
namespace {
|
|
|
|
@@ -1470,6 +1473,44 @@ bool cl::ParseCommandLineOptions(int argc, const char *const *argv,
|
|
Errs, LongOptionsUseDoubleDash);
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+bool cl::ParseAutoTunerOptions(
|
|
+ std::unordered_map<std::string, std::string> LLVMParams,
|
|
+ std::unordered_map<std::string, std::string> ProgramParams,
|
|
+ StringRef Overview, raw_ostream *Errs, const char *EnvVar,
|
|
+ bool LongOptionsUseDoubleDash) {
|
|
+ SmallVector<const char *, 20> NewArgv;
|
|
+ BumpPtrAllocator A;
|
|
+ StringSaver Saver(A);
|
|
+ // GlobalParser requires arguments similar to C style command line options
|
|
+ // (int argc, char * argv[]) where argv[0] refers to the program name.
|
|
+ // We are using a fake program name here which is consistent with LLVM.
|
|
+ NewArgv.push_back("AutoTuner (LLVM option parsing)");
|
|
+
|
|
+ for (const auto &I : LLVMParams) {
|
|
+ std::string NewOption = I.first + "=" + I.second;
|
|
+ NewArgv.push_back(Saver.save(NewOption).data());
|
|
+ }
|
|
+
|
|
+ for (const auto &I : ProgramParams) {
|
|
+ std::string NewOption = I.first + "=" + I.second;
|
|
+ NewArgv.push_back(Saver.save(NewOption).data());
|
|
+ }
|
|
+
|
|
+ // Parse options from environment variable.
|
|
+ if (EnvVar) {
|
|
+ if (std::optional<std::string> EnvValue =
|
|
+ sys::Process::GetEnv(StringRef(EnvVar)))
|
|
+ TokenizeGNUCommandLine(*EnvValue, Saver, NewArgv);
|
|
+ }
|
|
+
|
|
+ int NewArgc = static_cast<int>(NewArgv.size());
|
|
+ // Parse all options.
|
|
+ return GlobalParser->ParseCommandLineOptions(NewArgc, &NewArgv[0], Overview,
|
|
+ Errs, LongOptionsUseDoubleDash);
|
|
+}
|
|
+#endif
|
|
+
|
|
/// Reset all options at least once, so that we can parse different options.
|
|
void CommandLineParser::ResetAllOptionOccurrences() {
|
|
// Reset all option values to look like they have never been seen before.
|
|
diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt
|
|
index 034f1587ae8d..3507d357a4c6 100644
|
|
--- a/llvm/lib/Transforms/IPO/CMakeLists.txt
|
|
+++ b/llvm/lib/Transforms/IPO/CMakeLists.txt
|
|
@@ -57,6 +57,7 @@ add_llvm_component_library(LLVMipo
|
|
LINK_COMPONENTS
|
|
AggressiveInstCombine
|
|
Analysis
|
|
+ AutoTuner
|
|
BitReader
|
|
BitWriter
|
|
Core
|
|
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
|
|
index 3e00aebce372..802667819c44 100644
|
|
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
|
|
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
|
|
@@ -64,6 +64,9 @@
|
|
#include <functional>
|
|
#include <utility>
|
|
#include <vector>
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/AutoTuner/AutoTuning.h"
|
|
+#endif
|
|
|
|
using namespace llvm;
|
|
|
|
@@ -298,6 +301,27 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
|
|
// be deleted as a batch after inlining.
|
|
SmallVector<Function *, 4> DeadFunctionsInComdats;
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ bool IsAutoTunerEnabled =
|
|
+ autotuning::Engine.isEnabled() &&
|
|
+ autotuning::Engine.isTuningAllowedForType(autotuning::CallSite);
|
|
+ if (IsAutoTunerEnabled) {
|
|
+ SmallVector<std::pair<CallBase *, int>, 16> CallsCopy = Calls;
|
|
+ for (int I = 0; I < (int)CallsCopy.size(); ++I) {
|
|
+ CallBase &CB = *CallsCopy[I].first;
|
|
+ DebugLoc DLoc = CB.getDebugLoc();
|
|
+ if (!CB.getCaller() || !CB.getCalledFunction() || !DLoc)
|
|
+ continue;
|
|
+ autotuning::CallSiteLocation Loc = autotuning::CallSiteLocation{
|
|
+ &CB, CB.getCaller(), CB.getCalledFunction(),
|
|
+ autotuning::SourceLocation{DLoc->getFilename().str(), DLoc->getLine(),
|
|
+ DLoc->getColumn()}};
|
|
+ autotuning::Engine.insertCallSiteLoc(Loc);
|
|
+ }
|
|
+ autotuning::Engine.cleanCallSiteLoc();
|
|
+ }
|
|
+#endif
|
|
+
|
|
// Loop forward over all of the calls. Note that we cannot cache the size as
|
|
// inlining can introduce new calls that need to be processed.
|
|
for (int I = 0; I < (int)Calls.size(); ++I) {
|
|
@@ -412,6 +436,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
|
|
if (NewCallee) {
|
|
if (!NewCallee->isDeclaration()) {
|
|
Calls.push_back({ICB, NewHistoryID});
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (IsAutoTunerEnabled)
|
|
+ if (ICB->getDebugLoc())
|
|
+ autotuning::Engine.updateCallSiteLocs(
|
|
+ CB, ICB, ICB->getCalledFunction(),
|
|
+ ICB->getDebugLoc()->getLine());
|
|
+#endif
|
|
// Continually inlining through an SCC can result in huge compile
|
|
// times and bloated code since we arbitrarily stop at some point
|
|
// when the inliner decides it's not profitable to inline anymore.
|
|
@@ -527,6 +558,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
|
|
FAM.invalidate(F, PreservedAnalyses::none());
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (IsAutoTunerEnabled)
|
|
+ autotuning::Engine.clearCallSiteLocs();
|
|
+#endif
|
|
+
|
|
// We must ensure that we only delete functions with comdats if every function
|
|
// in the comdat is going to be deleted.
|
|
if (!DeadFunctionsInComdats.empty()) {
|
|
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
|
|
index a53baecd4776..9590cf625c64 100644
|
|
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
|
|
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
|
|
@@ -1212,6 +1212,20 @@ bool SampleProfileLoader::inlineHotFunctions(
|
|
}
|
|
}
|
|
}
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (autotuning::Engine.isEnabled()) {
|
|
+ // If a callsite is hot/cold, mark its corresponding callee as
|
|
+ // hot/cold respectively so that auto-tuning engine will be able to
|
|
+ // selectively dump code regions as tuning opportunities.
|
|
+ if (const CallInst *CI = dyn_cast<CallInst>(&I))
|
|
+ if (Function *Callee = CI->getCalledFunction()) {
|
|
+ if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
|
|
+ Callee->getATEFunction().setHot();
|
|
+ else
|
|
+ Callee->getATEFunction().setCold();
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
}
|
|
if (Hot || ExternalInlineAdvisor) {
|
|
CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
|
|
diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
|
|
index 424f1d433606..955353944b14 100644
|
|
--- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
|
|
+++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
|
|
@@ -30,6 +30,7 @@ add_llvm_component_library(LLVMInstrumentation
|
|
|
|
LINK_COMPONENTS
|
|
Analysis
|
|
+ AutoTuner
|
|
Core
|
|
Demangle
|
|
MC
|
|
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
|
|
index 3c8f25d73c62..b9459b59e704 100644
|
|
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
|
|
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
|
|
@@ -2132,6 +2132,10 @@ static bool annotateAllFunctions(
|
|
F->addFnAttr(Attribute::InlineHint);
|
|
LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
|
|
<< "\n");
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (autotuning::Engine.isEnabled())
|
|
+ F->getATEFunction().setHot();
|
|
+#endif
|
|
}
|
|
for (auto &F : ColdFunctions) {
|
|
// Only set when there is no Attribute::Hot set by the user. For Hot
|
|
@@ -2148,6 +2152,10 @@ static bool annotateAllFunctions(
|
|
F->addFnAttr(Attribute::Cold);
|
|
LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
|
|
<< "\n");
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (autotuning::Engine.isEnabled())
|
|
+ F->getATEFunction().setCold();
|
|
+#endif
|
|
}
|
|
return true;
|
|
}
|
|
diff --git a/llvm/lib/Transforms/Scalar/AutoTuningCompile.cpp b/llvm/lib/Transforms/Scalar/AutoTuningCompile.cpp
|
|
new file mode 100644
|
|
index 000000000000..c33cb7cfc256
|
|
--- /dev/null
|
|
+++ b/llvm/lib/Transforms/Scalar/AutoTuningCompile.cpp
|
|
@@ -0,0 +1,334 @@
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+//===--------------- AutoTuningCompile.cpp - Auto-Tuning ------------------===//
|
|
+//
|
|
+// The LLVM Compiler Infrastructure
|
|
+//
|
|
+// This file is distributed under the University of Illinois Open Source
|
|
+// License. See LICENSE.TXT for details.
|
|
+//
|
|
+// Copyright (C) 2017-2022, Huawei Technologies Co., Ltd. All rights reserved.
|
|
+//
|
|
+//===----------------------------------------------------------------------===//
|
|
+//
|
|
+/// \file
|
|
+/// This pass implements incremental compilation for AutoTuner to reduce the
|
|
+/// compilation time for tuning process.
|
|
+/// This pass performs 2 operations.
|
|
+/// 1. Writing module level IR files which can be used in subsequent
|
|
+/// compilations for AutoTuner flow. So clang frontend don't have to process
|
|
+/// the source code from scratch.
|
|
+/// 2. Add/Remove attributes for modules and functions to enable/disable
|
|
+/// execution of optimization pass(es). It further reduces the compilation
|
|
+/// time by skipping optimization pass(es) (If feasible).
|
|
+//
|
|
+//===----------------------------------------------------------------------===//
|
|
+
|
|
+#include "llvm/Transforms/Scalar/AutoTuningCompile.h"
|
|
+#include "llvm/Analysis/AutotuningDump.h"
|
|
+#include "llvm/AutoTuner/AutoTuning.h"
|
|
+#include "llvm/InitializePasses.h"
|
|
+#include "llvm/Support/CommandLine.h"
|
|
+#include "llvm/Transforms/Scalar.h"
|
|
+#include <string>
|
|
+
|
|
+// Enable debug messages for AutoTuning Compilation.
|
|
+#define DEBUG_TYPE "autotuning-compile"
|
|
+
|
|
+using namespace llvm;
|
|
+
|
|
+extern cl::opt<AutoTuningCompileOpt> AutoTuningCompileMode;
|
|
+
|
|
+AutoTuningOptPassGate SkipPasses = AutoTuningOptPassGate(true);
|
|
+AutoTuningOptPassGate RunPasses = AutoTuningOptPassGate(false);
|
|
+bool AutoTuningCompileModule::SkipCompilation = false;
|
|
+
|
|
+static void writeFiles(Module &M, std::string Pass) {
|
|
+ if (autotuning::Engine.isGenerateOutput()) {
|
|
+ switch (AutoTuningCompileMode) {
|
|
+ case Basic:
|
|
+ case CoarseGrain:
|
|
+ if (Pass == autotuning::CompileOptionStart) {
|
|
+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: IR files writing before Pass: "
|
|
+ << Pass << ".\n");
|
|
+ auto ATD = new AutotuningDumpLegacy(/* Incremental Compilation */ true);
|
|
+ ATD->runOnModule(M);
|
|
+ }
|
|
+ break;
|
|
+ case FineGrain:
|
|
+ if (autotuning::Engine.hasOpportunities()) {
|
|
+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: IR files writing before Pass: "
|
|
+ << Pass << ".\n");
|
|
+ auto ATD = new AutotuningDumpLegacy(/* Incremental Compilation */ true);
|
|
+ ATD->runOnModule(M);
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ llvm_unreachable("AutoTuningCompile: Unknown AutoTuner Incremental "
|
|
+ "Compilation mode.\n");
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+bool AutoTuningOptPassGate::shouldRunPass(const StringRef PassName,
|
|
+ StringRef IRDescription) {
|
|
+ LLVM_DEBUG(dbgs() << "Skip pass '" << PassName
|
|
+ << "': " << (Skip ? "True" : "False") << '\n');
|
|
+ return !Skip;
|
|
+}
|
|
+
|
|
+bool AutoTuningOptPassGate::checkPass(const StringRef PassName,
|
|
+ const StringRef TargetDesc) {
|
|
+ if (PassName.startswith("AutoTuningCompile")) {
|
|
+ LLVM_DEBUG(dbgs() << "Running '" << PassName << "'pass.\n");
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ LLVM_DEBUG(dbgs() << "Skip pass '" << PassName
|
|
+ << "': " << (Skip ? "True" : "False") << '\n');
|
|
+ return !Skip;
|
|
+}
|
|
+
|
|
+AutoTuningCompileModule::AutoTuningCompileModule(std::string Pass) {
|
|
+ this->Pass = Pass;
|
|
+}
|
|
+
|
|
+void AutoTuningCompileModule::writeIRFiles(Module &M) const {
|
|
+ writeFiles(M, Pass);
|
|
+}
|
|
+
|
|
+bool AutoTuningCompileModule::modifyCompilationPipeline(Module &M) const {
|
|
+ bool Changed = false;
|
|
+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: Deciding to enable/disable "
|
|
+ "optimization of module/functions. Pass: "
|
|
+ << Pass << '\n');
|
|
+
|
|
+ StringRef Filename = M.getName();
|
|
+ size_t Pos = Filename.rfind(".ll");
|
|
+ if (Pos == StringRef::npos) {
|
|
+ errs() << "AutoTuningCompile: Source file is not IR (.ll) file. "
|
|
+ "Disabling incremental compilation.\n";
|
|
+ AutoTuningCompileMode = Inactive;
|
|
+ return Changed;
|
|
+ }
|
|
+ Filename = Filename.substr(0, Pos);
|
|
+
|
|
+ switch (AutoTuningCompileMode) {
|
|
+ case Basic:
|
|
+ case CoarseGrain:
|
|
+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: No change in opt pipeline for "
|
|
+ "Basic/CoarseGrain incremental compilation mode.\n");
|
|
+ break;
|
|
+ case FineGrain: {
|
|
+ if (Pass == autotuning::CompileOptionStart) {
|
|
+ M.getContext().setOptPassGate(SkipPasses);
|
|
+ getAutoTuningOptPassGate().setSkip(true);
|
|
+ setSkipCompilation(true);
|
|
+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: SkipPasses enabled.\n");
|
|
+ } else if (getSkipCompilation() &&
|
|
+ (autotuning::Engine.shouldRunOptPass(Filename.str(), Pass) ||
|
|
+ Pass == "end")) {
|
|
+ M.getContext().setOptPassGate(RunPasses);
|
|
+ getAutoTuningOptPassGate().setSkip(false);
|
|
+ setSkipCompilation(false);
|
|
+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: SkipPasses disabled.\n");
|
|
+ } else
|
|
+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: Old decision (SkipPasses = "
|
|
+ << (getSkipCompilation() ? "True" : "False")
|
|
+ << " ) continued.\n");
|
|
+
|
|
+ Changed = true;
|
|
+ break;
|
|
+ }
|
|
+ default:
|
|
+ llvm_unreachable(
|
|
+ "AutoTuningCompile: Unknown AutoTuner Incremental Compilation mode.\n");
|
|
+ }
|
|
+
|
|
+ return Changed;
|
|
+}
|
|
+
|
|
+bool AutoTuningCompileModule::run(Module &M) {
|
|
+ bool Changed = false;
|
|
+ if (AutoTuningCompileMode == Inactive)
|
|
+ return Changed;
|
|
+
|
|
+ if (!autotuning::Engine.isEnabled()) {
|
|
+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: AutoTuner is not enabled.\n");
|
|
+ return Changed;
|
|
+ }
|
|
+
|
|
+ writeIRFiles(M);
|
|
+
|
|
+ if (autotuning::Engine.isParseInput())
|
|
+ Changed |= modifyCompilationPipeline(M);
|
|
+
|
|
+ return Changed;
|
|
+}
|
|
+
|
|
+AutoTuningCompileModuleLegacy::AutoTuningCompileModuleLegacy(std::string Pass)
|
|
+ : ModulePass(AutoTuningCompileModuleLegacy::ID) {
|
|
+ this->Pass = Pass;
|
|
+}
|
|
+
|
|
+bool AutoTuningCompileModuleLegacy::runOnModule(Module &M) {
|
|
+ AutoTuningCompileModule Impl(Pass);
|
|
+ return Impl.run(M);
|
|
+}
|
|
+
|
|
+char AutoTuningCompileModuleLegacy::ID = 0;
|
|
+
|
|
+StringRef AutoTuningCompileModuleLegacy::getPassName() const {
|
|
+ return "AutoTuner Incremental Compilation";
|
|
+}
|
|
+
|
|
+INITIALIZE_PASS(AutoTuningCompileModuleLegacy, "autotuning-compile-module",
|
|
+ "AutoTuner Incremental Compilation", false, false)
|
|
+
|
|
+// Public interface to the AutoTuningCompile pass
|
|
+ModulePass *llvm::createAutoTuningCompileModuleLegacyPass(std::string Pass) {
|
|
+ return new AutoTuningCompileModuleLegacy(Pass);
|
|
+}
|
|
+
|
|
+PreservedAnalyses AutoTuningCompileModulePass::run(Module &M,
|
|
+ ModuleAnalysisManager &) {
|
|
+ AutoTuningCompileModule Impl(Pass);
|
|
+ Impl.run(M);
|
|
+ return PreservedAnalyses::all();
|
|
+}
|
|
+
|
|
+AutoTuningCompileFunction::AutoTuningCompileFunction(std::string Pass) {
|
|
+ this->Pass = Pass;
|
|
+}
|
|
+
|
|
+void AutoTuningCompileFunction::writeIRFiles(Module &M) {
|
|
+ if (IsModuleWritten)
|
|
+ return;
|
|
+ IsModuleWritten = true;
|
|
+ writeFiles(M, Pass);
|
|
+}
|
|
+
|
|
+bool AutoTuningCompileFunction::modifyCompilationPipeline(Function &F) {
|
|
+ bool Changed = false;
|
|
+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: Deciding to enable/disable "
|
|
+ "optimization of module/functions. Pass: "
|
|
+ << Pass << '\n');
|
|
+ Module *M = F.getParent();
|
|
+ StringRef Filename = M->getName();
|
|
+ size_t Pos = Filename.rfind(".ll");
|
|
+ if (Pos == StringRef::npos) {
|
|
+ errs() << "AutoTuningCompile: Source file is not IR (.ll) file. "
|
|
+ "Disabling incremental compilation.\n";
|
|
+ AutoTuningCompileMode = Inactive;
|
|
+ return Changed;
|
|
+ }
|
|
+ Filename = Filename.substr(0, Pos);
|
|
+
|
|
+ switch (AutoTuningCompileMode) {
|
|
+ case Basic:
|
|
+ case CoarseGrain:
|
|
+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: No change in opt pipeline for "
|
|
+ "Basic/CoarseGrain incremental compilation mode.\n");
|
|
+ break;
|
|
+ case FineGrain: {
|
|
+ if (!AutoTuningCompileModule::getSkipCompilation() &&
|
|
+ Pass == autotuning::CompileOptionStart) {
|
|
+ if (!SkipDecision) {
|
|
+ M->getContext().setOptPassGate(SkipPasses);
|
|
+ getAutoTuningOptPassGate().setSkip(true);
|
|
+ SkipDecision = true;
|
|
+ }
|
|
+ AutoTuningCompileModule::setSkipCompilation(true);
|
|
+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: SkipPasses enabled.\n");
|
|
+ } else if (AutoTuningCompileModule::getSkipCompilation() &&
|
|
+ Pass != autotuning::CompileOptionStart &&
|
|
+ (autotuning::Engine.shouldRunOptPass(Filename.str(), Pass) ||
|
|
+ Pass == autotuning::CompileOptionEnd)) {
|
|
+ M->getContext().setOptPassGate(RunPasses);
|
|
+ getAutoTuningOptPassGate().setSkip(false);
|
|
+ SkipDecision = false;
|
|
+ AutoTuningCompileModule::setSkipCompilation(false);
|
|
+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: SkipPasses disabled.\n");
|
|
+ } else
|
|
+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: Old decision (SkipPasses = "
|
|
+ << (AutoTuningCompileModule::getSkipCompilation()
|
|
+ ? "True"
|
|
+ : "False")
|
|
+ << " ) continued.\n");
|
|
+
|
|
+ Changed = true;
|
|
+ break;
|
|
+ }
|
|
+ default:
|
|
+ llvm_unreachable(
|
|
+ "AutoTuningCompile: Unknown AutoTuner Incremental Compilation mode.\n");
|
|
+ }
|
|
+
|
|
+ return Changed;
|
|
+}
|
|
+
|
|
+bool AutoTuningCompileFunction::run(Function &F) {
|
|
+ bool Changed = false;
|
|
+ if (AutoTuningCompileMode == Inactive)
|
|
+ return Changed;
|
|
+
|
|
+ if (!autotuning::Engine.isEnabled()) {
|
|
+ LLVM_DEBUG(dbgs() << "AutoTuningCompile: AutoTuner is not enabled.\n");
|
|
+ return Changed;
|
|
+ }
|
|
+
|
|
+ writeIRFiles(*F.getParent());
|
|
+
|
|
+ if (autotuning::Engine.isParseInput())
|
|
+ Changed |= modifyCompilationPipeline(F);
|
|
+
|
|
+ return Changed;
|
|
+}
|
|
+
|
|
+AutoTuningCompileFunctionLegacy::AutoTuningCompileFunctionLegacy(
|
|
+ std::string Pass)
|
|
+ : FunctionPass(AutoTuningCompileFunctionLegacy::ID) {
|
|
+ this->Pass = Pass;
|
|
+}
|
|
+
|
|
+bool AutoTuningCompileFunctionLegacy::runOnFunction(Function &F) {
|
|
+ AutoTuningCompileFunction Impl(Pass);
|
|
+ return Impl.run(F);
|
|
+}
|
|
+
|
|
+char AutoTuningCompileFunctionLegacy::ID = 0;
|
|
+
|
|
+StringRef AutoTuningCompileFunctionLegacy::getPassName() const {
|
|
+ return "AutoTuner Incremental Compilation";
|
|
+}
|
|
+
|
|
+INITIALIZE_PASS(AutoTuningCompileFunctionLegacy, "autotuning-compile-function",
|
|
+ "AutoTuner Incremental Compilation", false, false)
|
|
+
|
|
+// Public interface to the AutoTuningCompile pass
|
|
+FunctionPass *
|
|
+llvm::createAutoTuningCompileFunctionLegacyPass(std::string Pass) {
|
|
+ return new AutoTuningCompileFunctionLegacy(Pass);
|
|
+}
|
|
+
|
|
+PreservedAnalyses
|
|
+AutoTuningCompileFunctionPass::run(Function &F, FunctionAnalysisManager &AM) {
|
|
+ AutoTuningCompileFunction Impl(Pass);
|
|
+ Impl.run(F);
|
|
+ return PreservedAnalyses::all();
|
|
+}
|
|
+
|
|
+PreservedAnalyses
|
|
+AutoTuningCompileLoopPass::run(Loop &L, LoopAnalysisManager &AM,
|
|
+ LoopStandardAnalysisResults &AR, LPMUpdater &U) {
|
|
+ AutoTuningCompileFunction Impl(Pass);
|
|
+ Function *F = L.getHeader()->getParent();
|
|
+ Impl.run(*F);
|
|
+ return PreservedAnalyses::all();
|
|
+}
|
|
+
|
|
+AutoTuningOptPassGate &llvm::getAutoTuningOptPassGate() {
|
|
+ static AutoTuningOptPassGate AutoTuningGate;
|
|
+ return AutoTuningGate;
|
|
+}
|
|
+
|
|
+#endif
|
|
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
|
|
index eb008c15903a..e5a82ea8f923 100644
|
|
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
|
|
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
|
|
@@ -2,6 +2,7 @@ add_llvm_component_library(LLVMScalarOpts
|
|
ADCE.cpp
|
|
AlignmentFromAssumptions.cpp
|
|
AnnotationRemarks.cpp
|
|
+ AutoTuningCompile.cpp
|
|
BDCE.cpp
|
|
CallSiteSplitting.cpp
|
|
ConstantHoisting.cpp
|
|
@@ -92,6 +93,7 @@ add_llvm_component_library(LLVMScalarOpts
|
|
LINK_COMPONENTS
|
|
AggressiveInstCombine
|
|
Analysis
|
|
+ AutoTuner
|
|
Core
|
|
InstCombine
|
|
Support
|
|
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
|
|
index 335b489d3cb2..feb8932eaae7 100644
|
|
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
|
|
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
|
|
@@ -66,6 +66,9 @@
|
|
#include <string>
|
|
#include <tuple>
|
|
#include <utility>
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/AutoTuner/AutoTuning.h"
|
|
+#endif
|
|
|
|
using namespace llvm;
|
|
|
|
@@ -173,6 +176,10 @@ static cl::opt<unsigned>
|
|
cl::desc("Default threshold (max size of unrolled "
|
|
"loop), used in all but O3 optimizations"));
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+static const std::string UnrollCountParamStr = "UnrollCount";
|
|
+#endif
|
|
+
|
|
/// A magic value for use with the Threshold parameter to indicate
|
|
/// that the loop unroll should be performed regardless of how much
|
|
/// code expansion would result.
|
|
@@ -893,7 +900,12 @@ bool llvm::computeUnrollCount(
|
|
OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount,
|
|
bool MaxOrZero, unsigned TripMultiple, unsigned LoopSize,
|
|
TargetTransformInfo::UnrollingPreferences &UP,
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound,
|
|
+ unsigned int Invocation) {
|
|
+#else
|
|
TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
|
|
+#endif
|
|
|
|
UnrollCostEstimator UCE(*L, LoopSize);
|
|
|
|
@@ -942,6 +954,43 @@ bool llvm::computeUnrollCount(
|
|
}
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ // Priority 2.5 is using Unroll Count set by AutoTuner (if enabled).
|
|
+ if (autotuning::Engine.isEnabled()) {
|
|
+ // Create a code region for current loop. This code region will be added to
|
|
+ // opportunity list once all the relevant information is gathered.
|
|
+ autotuning::Engine.initContainer(L, DEBUG_TYPE,
|
|
+ L->getHeader()->getParent()->getName(),
|
|
+ /* addOpportunity */ false, Invocation);
|
|
+
|
|
+ int NewValue = 0; // the int value is set by lookUpParams()
|
|
+ bool UnrollCountChanged = L->lookUpParams<int>("UnrollCount", NewValue);
|
|
+
|
|
+ if (UnrollCountChanged) {
|
|
+ // Setting the UP.Count with the value suggested by AutoTuner.
|
|
+ // AutoTuner will use UnrollCount = 0, 1, X, Y, Z in case of dynamic
|
|
+ // configuration and UnrollCount = 0, 1, 2, 4, 8 otherwise to find
|
|
+ // optimal configuration. Compiler will unroll the loop with suggested
|
|
+ // UnrollCount except when UnrollCount = 1 where AutoTuner is suggesting
|
|
+ // to try loop peeling.
|
|
+ UP.Count = NewValue;
|
|
+ UP.AllowExpensiveTripCount = true;
|
|
+ UP.Force = true;
|
|
+ UP.Runtime = true;
|
|
+ if (!UP.AllowRemainder && UP.Count != 1)
|
|
+ UP.Count = 0;
|
|
+
|
|
+ // Check for Loop Peeling
|
|
+ if (UP.Count == 1) {
|
|
+ computePeelCount(L, LoopSize, PP, TripCount, DT, SE, AC, UP.Threshold);
|
|
+ UP.Runtime = (PP.PeelCount) ? false : UP.Runtime;
|
|
+ }
|
|
+
|
|
+ return true;
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+
|
|
// 3rd priority is exact full unrolling. This will eliminate all copies
|
|
// of some exit test.
|
|
UP.Count = 0;
|
|
@@ -1119,6 +1168,59 @@ bool llvm::computeUnrollCount(
|
|
return ExplicitUnroll;
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+// Given UnrollingPreferences count (UPCount) and TripCount for CodeRegion
|
|
+// CR, compute the dynamic Unroll values for tuning and add it to CR.
|
|
+static void
|
|
+computeAutoTunerDynamicUnrollOptions(unsigned UPCount, unsigned TripCount,
|
|
+ const autotuning::CodeRegion &CR) {
|
|
+ std::vector<unsigned int> DynamicTuningOptions;
|
|
+ unsigned int PotentialTuningOptions[2];
|
|
+ unsigned int Idx = 0;
|
|
+ int Count = -1;
|
|
+ unsigned int CurrentOption = 2;
|
|
+ unsigned int MaxTuningCount = 64;
|
|
+ DynamicTuningOptions.push_back(0);
|
|
+ // Add LoopPeeling as an additional option.
|
|
+ DynamicTuningOptions.push_back(1);
|
|
+ if (!UPCount) {
|
|
+ TripCount = (TripCount > MaxTuningCount) ? MaxTuningCount : TripCount;
|
|
+ unsigned int Limit = (TripCount == 0) ? 8 : TripCount;
|
|
+ DynamicTuningOptions.push_back(TripCount ? TripCount : 8);
|
|
+ while (CurrentOption < Limit) {
|
|
+ PotentialTuningOptions[Idx] = CurrentOption;
|
|
+ CurrentOption *= 2;
|
|
+ Idx = (Idx + 1) % 2;
|
|
+ ++Count;
|
|
+ }
|
|
+ } else {
|
|
+ while (CurrentOption < UPCount) {
|
|
+ PotentialTuningOptions[Idx] = CurrentOption;
|
|
+ CurrentOption *= 2;
|
|
+ Idx = (Idx + 1) % 2;
|
|
+ ++Count;
|
|
+ }
|
|
+ if (TripCount != UPCount) {
|
|
+ if (CurrentOption == UPCount) {
|
|
+ CurrentOption *= 2;
|
|
+ }
|
|
+ if (!TripCount || CurrentOption < TripCount) {
|
|
+ PotentialTuningOptions[Idx] = CurrentOption;
|
|
+ ++Count;
|
|
+ }
|
|
+ }
|
|
+ if (UPCount != 1)
|
|
+ DynamicTuningOptions.push_back(UPCount);
|
|
+ }
|
|
+
|
|
+ Count = std::min(1, Count);
|
|
+ while (Count >= 0)
|
|
+ DynamicTuningOptions.push_back(PotentialTuningOptions[Count--]);
|
|
+
|
|
+ CR.addAutoTunerOptions("UnrollCount", DynamicTuningOptions);
|
|
+}
|
|
+#endif
|
|
+
|
|
static LoopUnrollResult
|
|
tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
|
|
const TargetTransformInfo &TTI, AssumptionCache &AC,
|
|
@@ -1132,7 +1234,12 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
|
|
std::optional<bool> ProvidedUpperBound,
|
|
std::optional<bool> ProvidedAllowPeeling,
|
|
std::optional<bool> ProvidedAllowProfileBasedPeeling,
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ std::optional<unsigned> ProvidedFullUnrollMaxCount,
|
|
+ unsigned int Invocation = 0) {
|
|
+#else
|
|
std::optional<unsigned> ProvidedFullUnrollMaxCount) {
|
|
+#endif
|
|
|
|
LLVM_DEBUG(dbgs() << "Loop Unroll: F["
|
|
<< L->getHeader()->getParent()->getName() << "] Loop %"
|
|
@@ -1276,11 +1383,28 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
|
|
// computeUnrollCount() decides whether it is beneficial to use upper bound to
|
|
// fully unroll the loop.
|
|
bool UseUpperBound = false;
|
|
+
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ bool IsCountSetExplicitly = computeUnrollCount(
|
|
+ L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount,
|
|
+ MaxOrZero, TripMultiple, LoopSize, UP, PP, UseUpperBound, Invocation);
|
|
+ const autotuning::CodeRegion CR = L->getCodeRegion();
|
|
+ // computeAutoTunerDynamicUnrollOptions() adds the dynamic Unroll values to
|
|
+ // the CodeRegion.
|
|
+ computeAutoTunerDynamicUnrollOptions(UP.Count, TripCount, CR);
|
|
+
|
|
+ if (!UP.Count) {
|
|
+ autotuning::Engine.addOpportunity(
|
|
+ CR, {{UnrollCountParamStr, std::to_string(UP.Count)}});
|
|
+ return LoopUnrollResult::Unmodified;
|
|
+ }
|
|
+#else
|
|
bool IsCountSetExplicitly = computeUnrollCount(
|
|
L, TTI, DT, LI, &AC, SE, EphValues, &ORE, TripCount, MaxTripCount, MaxOrZero,
|
|
TripMultiple, LoopSize, UP, PP, UseUpperBound);
|
|
if (!UP.Count)
|
|
return LoopUnrollResult::Unmodified;
|
|
+#endif
|
|
|
|
if (PP.PeelCount) {
|
|
assert(UP.Count == 1 && "Cannot perform peel and unroll in the same step");
|
|
@@ -1300,8 +1424,16 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
|
|
// we had, so we don't want to unroll or peel again.
|
|
if (PP.PeelProfiledIterations)
|
|
L->setLoopAlreadyUnrolled();
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ autotuning::Engine.addOpportunity(
|
|
+ CR, {{UnrollCountParamStr, std::to_string(UP.Count)}});
|
|
+ return LoopUnrollResult::PartiallyUnrolled;
|
|
+ }
|
|
+ autotuning::Engine.addOpportunity(CR, {{UnrollCountParamStr, "0"}});
|
|
+#else
|
|
return LoopUnrollResult::PartiallyUnrolled;
|
|
}
|
|
+#endif
|
|
return LoopUnrollResult::Unmodified;
|
|
}
|
|
|
|
@@ -1329,8 +1461,18 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
|
|
{UP.Count, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount,
|
|
UP.UnrollRemainder, ForgetAllSCEV},
|
|
LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop);
|
|
+
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (UnrollResult == LoopUnrollResult::Unmodified) {
|
|
+ autotuning::Engine.addOpportunity(CR, {{UnrollCountParamStr, "0"}});
|
|
+ return LoopUnrollResult::Unmodified;
|
|
+ }
|
|
+ autotuning::Engine.addOpportunity(
|
|
+ CR, {{UnrollCountParamStr, std::to_string(UP.Count)}});
|
|
+#else
|
|
if (UnrollResult == LoopUnrollResult::Unmodified)
|
|
return LoopUnrollResult::Unmodified;
|
|
+#endif
|
|
|
|
if (RemainderLoop) {
|
|
std::optional<MDNode *> RemainderLoopID =
|
|
@@ -1379,6 +1521,20 @@ public:
|
|
/// Otherwise, forgetAllLoops and rebuild when needed next.
|
|
bool ForgetAllSCEV;
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+private:
|
|
+ // 'InvocationCounter' keeps track of Invocation of Loop Unroll Pass and
|
|
+ // assign it to 'Invocation'. So each LoopUnroll Object knows when it is
|
|
+ // being invoked during optimization pipeline. It is used to identify the
|
|
+ // Invocation of a pass if it is invoked multiple times. AutoTuner will use
|
|
+ // this information to generate the Code Regions and apply the suggested
|
|
+ // configuration during the correct invocation of the Loop Unroll Pass.
|
|
+ static unsigned int InvocationCounter;
|
|
+ unsigned int Invocation;
|
|
+
|
|
+public:
|
|
+#endif
|
|
+
|
|
std::optional<unsigned> ProvidedCount;
|
|
std::optional<unsigned> ProvidedThreshold;
|
|
std::optional<bool> ProvidedAllowPartial;
|
|
@@ -1405,6 +1561,9 @@ public:
|
|
ProvidedAllowPeeling(AllowPeeling),
|
|
ProvidedAllowProfileBasedPeeling(AllowProfileBasedPeeling),
|
|
ProvidedFullUnrollMaxCount(ProvidedFullUnrollMaxCount) {
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ Invocation = InvocationCounter++;
|
|
+#endif
|
|
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
|
|
}
|
|
|
|
@@ -1431,7 +1590,12 @@ public:
|
|
/*OnlyFullUnroll*/ false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount,
|
|
ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
|
|
ProvidedUpperBound, ProvidedAllowPeeling,
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount,
|
|
+ Invocation);
|
|
+#else
|
|
ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount);
|
|
+#endif
|
|
|
|
if (Result == LoopUnrollResult::FullyUnrolled)
|
|
LPM.markLoopAsDeleted(*L);
|
|
@@ -1449,6 +1613,9 @@ public:
|
|
getLoopAnalysisUsage(AU);
|
|
}
|
|
};
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+unsigned int LoopUnroll::InvocationCounter = 0;
|
|
+#endif
|
|
|
|
} // end anonymous namespace
|
|
|
|
@@ -1496,6 +1663,11 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
|
|
|
|
std::string LoopName = std::string(L.getName());
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ // LoopFullUnrollPass will be invoked first during optimization pipeline.
|
|
+ unsigned int Invocation = 0;
|
|
+#endif
|
|
+
|
|
bool Changed =
|
|
tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE,
|
|
/*BFI*/ nullptr, /*PSI*/ nullptr,
|
|
@@ -1505,7 +1677,12 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
|
|
/*Runtime*/ false, /*UpperBound*/ false,
|
|
/*AllowPeeling*/ true,
|
|
/*AllowProfileBasedPeeling*/ false,
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ /*FullUnrollMaxCount*/ std::nullopt,
|
|
+ /*Invocation*/ Invocation) !=
|
|
+#else
|
|
/*FullUnrollMaxCount*/ std::nullopt) !=
|
|
+#endif
|
|
LoopUnrollResult::Unmodified;
|
|
if (!Changed)
|
|
return PreservedAnalyses::all();
|
|
@@ -1588,6 +1765,11 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
|
|
|
|
bool Changed = false;
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ // LoopUnrollPass will be invoked second during optimization pipeline.
|
|
+ unsigned int Invocation = 1;
|
|
+#endif
|
|
+
|
|
// The unroller requires loops to be in simplified form, and also needs LCSSA.
|
|
// Since simplification may add new inner loops, it has to run before the
|
|
// legality and profitability checks. This means running the loop unroller
|
|
@@ -1630,7 +1812,12 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
|
|
/*Count*/ std::nullopt,
|
|
/*Threshold*/ std::nullopt, UnrollOpts.AllowPartial,
|
|
UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling,
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount,
|
|
+ Invocation);
|
|
+#else
|
|
UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount);
|
|
+#endif
|
|
Changed |= Result != LoopUnrollResult::Unmodified;
|
|
|
|
// The parent must not be damaged by unrolling!
|
|
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
|
|
index 37b032e4d7c7..4b140e8d600b 100644
|
|
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
|
|
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
|
|
@@ -64,4 +64,8 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
|
|
initializeStraightLineStrengthReduceLegacyPassPass(Registry);
|
|
initializePlaceBackedgeSafepointsLegacyPassPass(Registry);
|
|
initializeLoopSimplifyCFGLegacyPassPass(Registry);
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ initializeAutoTuningCompileFunctionLegacyPass(Registry);
|
|
+ initializeAutoTuningCompileModuleLegacyPass(Registry);
|
|
+#endif
|
|
}
|
|
diff --git a/llvm/lib/Transforms/Scalar/Sink.cpp b/llvm/lib/Transforms/Scalar/Sink.cpp
|
|
index 8b99f73b850b..b3c60686e252 100644
|
|
--- a/llvm/lib/Transforms/Scalar/Sink.cpp
|
|
+++ b/llvm/lib/Transforms/Scalar/Sink.cpp
|
|
@@ -248,6 +248,11 @@ namespace {
|
|
}
|
|
|
|
bool runOnFunction(Function &F) override {
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (skipFunction(F))
|
|
+ return false;
|
|
+#endif
|
|
+
|
|
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
|
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
|
auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
|
|
diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt
|
|
index a870071f3f64..8616e7b923c0 100644
|
|
--- a/llvm/lib/Transforms/Utils/CMakeLists.txt
|
|
+++ b/llvm/lib/Transforms/Utils/CMakeLists.txt
|
|
@@ -93,6 +93,7 @@ add_llvm_component_library(LLVMTransformUtils
|
|
|
|
LINK_COMPONENTS
|
|
Analysis
|
|
+ AutoTuner
|
|
Core
|
|
Support
|
|
TargetParser
|
|
diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp
|
|
index c36b0533580b..20a4edcb29db 100644
|
|
--- a/llvm/lib/Transforms/Utils/LCSSA.cpp
|
|
+++ b/llvm/lib/Transforms/Utils/LCSSA.cpp
|
|
@@ -491,6 +491,11 @@ char &llvm::LCSSAID = LCSSAWrapperPass::ID;
|
|
|
|
/// Transform \p F into loop-closed SSA form.
|
|
bool LCSSAWrapperPass::runOnFunction(Function &F) {
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (skipFunction(F))
|
|
+ return false;
|
|
+#endif
|
|
+
|
|
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
|
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
|
auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
|
|
diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
|
|
index 3e604fdf2e11..2e42e7f1397f 100644
|
|
--- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp
|
|
+++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
|
|
@@ -69,6 +69,9 @@
|
|
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
|
#include "llvm/Transforms/Utils/Local.h"
|
|
#include "llvm/Transforms/Utils/LoopUtils.h"
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/AutoTuner/AutoTuning.h"
|
|
+#endif
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "loop-simplify"
|
|
@@ -793,6 +796,11 @@ Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
|
|
/// it in any convenient order) inserting preheaders...
|
|
///
|
|
bool LoopSimplify::runOnFunction(Function &F) {
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (autotuning::Engine.isEnabled() && skipFunction(F))
|
|
+ return false;
|
|
+#endif
|
|
+
|
|
bool Changed = false;
|
|
LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
|
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
|
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
|
|
index 511dd61308f9..2d2c3e50514b 100644
|
|
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
|
|
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
|
|
@@ -69,6 +69,9 @@
|
|
#include <numeric>
|
|
#include <type_traits>
|
|
#include <vector>
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/AutoTuner/AutoTuning.h"
|
|
+#endif
|
|
|
|
namespace llvm {
|
|
class DataLayout;
|
|
diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
|
|
index 998dfd956575..f2c5c04abb13 100644
|
|
--- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt
|
|
+++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
|
|
@@ -21,6 +21,7 @@ add_llvm_component_library(LLVMVectorize
|
|
|
|
LINK_COMPONENTS
|
|
Analysis
|
|
+ AutoTuner
|
|
Core
|
|
Support
|
|
TransformUtils
|
|
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
|
|
index f923f0be6621..f13ce6853666 100644
|
|
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
|
|
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
|
|
@@ -113,6 +113,18 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
|
|
// Populate values with existing loop metadata.
|
|
getHintsFromMetadata();
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (autotuning::Engine.isEnabled()) {
|
|
+ int NewValue = 0;
|
|
+ bool VectorizationInterleaveChanged =
|
|
+ L->lookUpParams<int>("VectorizationInterleave", NewValue);
|
|
+
|
|
+ if (VectorizationInterleaveChanged) {
|
|
+ Interleave.Value = NewValue;
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+
|
|
// force-vector-interleave overrides DisableInterleaving.
|
|
if (VectorizerParams::isInterleaveForced())
|
|
Interleave.Value = VectorizerParams::VectorizationInterleave;
|
|
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
|
|
index b603bbe55dc9..46fab860f5a3 100644
|
|
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
|
|
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
|
|
@@ -10178,6 +10178,22 @@ LoopVectorizePass::LoopVectorizePass(LoopVectorizeOptions Opts)
|
|
VectorizeOnlyWhenForced(Opts.VectorizeOnlyWhenForced ||
|
|
!EnableLoopVectorization) {}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+// Given the iterleave count (IC) and CR, compute the dynamic values for
|
|
+// interleave count. Then add it to CR.
|
|
+static void
|
|
+computeAutoTunerDynamicInterleaveOptions(unsigned IC,
|
|
+ const autotuning::CodeRegion &CR) {
|
|
+
|
|
+ std::vector<unsigned int> AutoTunerOptions{1, 2, 4};
|
|
+ if (std::find(AutoTunerOptions.begin(), AutoTunerOptions.end(), IC) ==
|
|
+ AutoTunerOptions.end())
|
|
+ AutoTunerOptions[2] = IC;
|
|
+
|
|
+ CR.addAutoTunerOptions("VectorizationInterleave", AutoTunerOptions);
|
|
+}
|
|
+#endif
|
|
+
|
|
bool LoopVectorizePass::processLoop(Loop *L) {
|
|
assert((EnableVPlanNativePath || L->isInnermost()) &&
|
|
"VPlan-native path is not enabled. Only process inner loops.");
|
|
@@ -10190,6 +10206,12 @@ bool LoopVectorizePass::processLoop(Loop *L) {
|
|
<< L->getHeader()->getParent()->getName() << "' from "
|
|
<< DebugLocStr << "\n");
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ // Initialize the loop for auto-tuning but do not add it
|
|
+ // as an tuning opportunity yet.
|
|
+ autotuning::Engine.initContainer(
|
|
+ L, LV_NAME, L->getHeader()->getParent()->getName(), false);
|
|
+#endif
|
|
LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE, TTI);
|
|
|
|
LLVM_DEBUG(
|
|
@@ -10422,6 +10444,18 @@ bool LoopVectorizePass::processLoop(Loop *L) {
|
|
InterleaveLoop = false;
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (!VectorizerParams::isInterleaveForced()) {
|
|
+ // Compute the dynamic values for VectorizationInterleave and add it to the
|
|
+ // CodeRegion.
|
|
+ computeAutoTunerDynamicInterleaveOptions(IC, L->getCodeRegion());
|
|
+
|
|
+ // Add the current loop as a tuning opportunity explicitly.
|
|
+ autotuning::Engine.addOpportunity(
|
|
+ L->getCodeRegion(), {{"VectorizationInterleave", std::to_string(IC)}});
|
|
+ }
|
|
+#endif
|
|
+
|
|
// Override IC if user provided an interleave count.
|
|
IC = UserIC > 0 ? UserIC : IC;
|
|
|
|
diff --git a/llvm/test/AutoTuning/AutotuningDump/Inputs/unroll_template.yaml b/llvm/test/AutoTuning/AutotuningDump/Inputs/unroll_template.yaml
|
|
new file mode 100644
|
|
index 000000000000..f483a269906a
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/AutotuningDump/Inputs/unroll_template.yaml
|
|
@@ -0,0 +1,8 @@
|
|
+--- !AutoTuning
|
|
+Pass: loop-unroll
|
|
+Name: [name]
|
|
+Function: foo
|
|
+CodeRegionType: loop
|
|
+Args:
|
|
+ - UnrollCount: [number]
|
|
+...
|
|
diff --git a/llvm/test/AutoTuning/AutotuningDump/create-data-dir.ll b/llvm/test/AutoTuning/AutotuningDump/create-data-dir.ll
|
|
new file mode 100644
|
|
index 000000000000..ceb9b4fb2ca6
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/AutotuningDump/create-data-dir.ll
|
|
@@ -0,0 +1,65 @@
|
|
+; UNSUPPORTED: windows
|
|
+; RUN: sed 's#\[number\]#0#g; s#\[name\]#for.body#g' \
|
|
+; RUN: %S/Inputs/unroll_template.yaml > %t.DEFAULT.yaml
|
|
+; RUN: opt --disable-output %s -S -passes='require<autotuning-dump>' \
|
|
+; RUN: -auto-tuning-input=%t.DEFAULT.yaml -auto-tuning-config-id=1
|
|
+; RUN: cat %T/../autotune_datadir/create-data-dir.ll/1.ll | FileCheck %s
|
|
+; RUN: rm -rf %T/../autotune_datadir/*
|
|
+
|
|
+; RUN: cp %t.DEFAULT.yaml %T/../autotune_datadir/config.yaml
|
|
+; RUN: opt %s -S -passes='require<autotuning-dump>' -auto-tuning-config-id=1
|
|
+; RUN: cat %T/../autotune_datadir/create-data-dir.ll/1.ll | FileCheck %s
|
|
+; RUN: rm -rf %T/../autotune_datadir/*
|
|
+
|
|
+; RUN: cp %t.DEFAULT.yaml %T/../autotune_datadir/config.yaml
|
|
+; RUN: opt %s -S -passes='require<autotuning-dump>' -enable-autotuning-dump
|
|
+; RUN: echo -n %T/../autotune_datadir/IR_files/ > %t.filename
|
|
+; RUN: echo -n "create-data-dir.ll/" >> %t.filename
|
|
+; RUN: echo -n %s | sed 's#/#_#g' >> %t.filename
|
|
+; RUN: echo -n ".ll" >> %t.filename
|
|
+; RUN: cat %t.filename | xargs cat | FileCheck %s
|
|
+; RUN: rm -rf %T/../autotune_datadir
|
|
+
|
|
+; ModuleID = 'search.c'
|
|
+source_filename = "search.c"
|
|
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|
+target triple = "aarch64-unknown-linux-gnu"
|
|
+
|
|
+; Function Attrs: argmemonly nofree norecurse nosync nounwind readonly uwtable
|
|
+define dso_local i32 @search(ptr nocapture noundef readonly %Arr, i32 noundef %Value, i32 noundef %Size) {
|
|
+entry:
|
|
+ %cmp5 = icmp sgt i32 %Size, 0
|
|
+ br i1 %cmp5, label %for.body.preheader, label %for.end
|
|
+
|
|
+for.body.preheader: ; preds = %entry
|
|
+ %wide.trip.count = zext i32 %Size to i64
|
|
+ br label %for.body
|
|
+
|
|
+for.body: ; preds = %for.body.preheader, %for.inc
|
|
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
|
|
+ %arrayidx = getelementptr inbounds i32, ptr %Arr, i64 %indvars.iv
|
|
+ %0 = load i32, ptr %arrayidx, align 4
|
|
+ %cmp1 = icmp eq i32 %0, %Value
|
|
+ br i1 %cmp1, label %for.end.loopexit.split.loop.exit, label %for.inc
|
|
+
|
|
+for.inc: ; preds = %for.body
|
|
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
|
+ br i1 %exitcond.not, label %for.end, label %for.body
|
|
+
|
|
+for.end.loopexit.split.loop.exit: ; preds = %for.body
|
|
+ %1 = trunc i64 %indvars.iv to i32
|
|
+ br label %for.end
|
|
+
|
|
+for.end: ; preds = %for.inc, %for.end.loopexit.split.loop.exit, %entry
|
|
+ %Idx.0.lcssa = phi i32 [ 0, %entry ], [ %1, %for.end.loopexit.split.loop.exit ], [ %Size, %for.inc ]
|
|
+ ret i32 %Idx.0.lcssa
|
|
+}
|
|
+
|
|
+; Check that only loop body is inside the IR File.
|
|
+; CHECK-LABEL: for.body: ; preds =
|
|
+; CHECK-NEXT: %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
|
|
+; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %Arr, i64 %indvars.iv
|
|
+; CHECK-NEXT: %0 = load i32, ptr %arrayidx, align 4
|
|
+; CHECK-NEXT: %cmp1 = icmp eq i32 %0, %Value
|
|
+; CHECK-NEXT: br i1 %cmp1, label %for.end.loopexit.split.loop.exit, label %for.inc
|
|
diff --git a/llvm/test/AutoTuning/AutotuningDump/unroll.ll b/llvm/test/AutoTuning/AutotuningDump/unroll.ll
|
|
new file mode 100644
|
|
index 000000000000..e8243da55fff
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/AutotuningDump/unroll.ll
|
|
@@ -0,0 +1,35 @@
|
|
+; RUN: rm -rf %T.tmp/Output
|
|
+; RUN: mkdir -p %T.tmp/Output
|
|
+; RUN: rm %t.DEFAULT.yaml -rf
|
|
+; RUN: sed 's#\[number\]#0#g; s#\[name\]#for.body#g' %S/Inputs/unroll_template.yaml > %t.DEFAULT.yaml
|
|
+; RUN: env AUTOTUNE_DATADIR=%T.tmp/Output opt %s -S -passes='require<autotuning-dump>' \
|
|
+; RUN: -auto-tuning-input=%t.DEFAULT.yaml -auto-tuning-config-id=1
|
|
+; RUN: env AUTOTUNE_DATADIR=%T.tmp/Output opt %s -S -passes='require<autotuning-dump>' \
|
|
+; RUN: -auto-tuning-input=%t.DEFAULT.yaml -auto-tuning-config-id=2
|
|
+; RUN: cat %T.tmp/Output/unroll.ll/1.ll | FileCheck %s -check-prefix=DEFAULT
|
|
+; RUN: cat %T.tmp/Output/unroll.ll/2.ll | FileCheck %s -check-prefix=DEFAULT
|
|
+; UNSUPPORTED: windows
|
|
+
|
|
+define void @foo(i32* nocapture %a) {
|
|
+entry:
|
|
+ br label %for.body
|
|
+for.body: ; preds = %for.body, %entry
|
|
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
|
+ %0 = load i32, i32* %arrayidx, align 4
|
|
+ %inc = add nsw i32 %0, 1
|
|
+ store i32 %inc, i32* %arrayidx, align 4
|
|
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
|
|
+ br i1 %exitcond, label %for.end, label %for.body
|
|
+for.end: ; preds = %for.body
|
|
+ ret void
|
|
+}
|
|
+; Check that only loop body is inside the IR File.
|
|
+; DEFAULT-LABEL: for.body: ; preds = %for.body, %entry
|
|
+; DEFAULT-NEXT: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
+; DEFAULT-NEXT: %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
|
|
+; DEFAULT: %exitcond = icmp eq i64 %indvars.iv.next, 64
|
|
+; DEFAULT: br i1 %exitcond, label %for.end, label %for.body
|
|
+
|
|
+; RUN: rm -rf %T.tmp/Output
|
|
diff --git a/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/baseline_config.yaml b/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/baseline_config.yaml
|
|
new file mode 100644
|
|
index 000000000000..a5e669c17a71
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/baseline_config.yaml
|
|
@@ -0,0 +1,9 @@
|
|
+!AutoTuning {Args: [{UnrollCount: 0}], CodeRegionHash: 12835463591102937421,
|
|
+ CodeRegionType: loop, Function: test, Invocation: 0, Name: for.body,
|
|
+ Pass: loop-unroll}
|
|
+--- !AutoTuning {Args: [{VectorizationInterleave: 2}],
|
|
+ CodeRegionHash: 12835463591102937421, CodeRegionType: loop, Function: test,
|
|
+ Invocation: 0, Name: for.body, Pass: loop-vectorize}
|
|
+--- !AutoTuning {Args: [{UnrollCount: 0}], CodeRegionHash: 8430337282115614432,
|
|
+ CodeRegionType: loop, Function: test, Invocation: 1, Name: vector.body,
|
|
+ Pass: loop-unroll}
|
|
diff --git a/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/random_config.yaml b/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/random_config.yaml
|
|
new file mode 100644
|
|
index 000000000000..738cf55ffe9a
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/BaselineConfig/Inputs/autotune_datadir/random_config.yaml
|
|
@@ -0,0 +1,9 @@
|
|
+!AutoTuning {Args: [{UnrollCount: 2}], CodeRegionHash: 12835463591102937421,
|
|
+ CodeRegionType: loop, Function: test, Invocation: 0, Name: for.body,
|
|
+ Pass: loop-unroll}
|
|
+--- !AutoTuning {Args: [{VectorizationInterleave: 2}],
|
|
+ CodeRegionHash: 12835463591102937421, CodeRegionType: loop, Function: test,
|
|
+ Invocation: 0, Name: for.body, Pass: loop-vectorize}
|
|
+--- !AutoTuning {Args: [{UnrollCount: 0}], CodeRegionHash: 8430337282115614432,
|
|
+ CodeRegionType: loop, Function: test, Invocation: 1, Name: vector.body,
|
|
+ Pass: loop-unroll}
|
|
diff --git a/llvm/test/AutoTuning/BaselineConfig/Inputs/test.ll b/llvm/test/AutoTuning/BaselineConfig/Inputs/test.ll
|
|
new file mode 100644
|
|
index 000000000000..667a076b2d23
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/BaselineConfig/Inputs/test.ll
|
|
@@ -0,0 +1,117 @@
|
|
+; ModuleID = 'test.c'
|
|
+source_filename = "test.c"
|
|
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|
+target triple = "aarch64-unknown-linux-gnu"
|
|
+
|
|
+@.str = private unnamed_addr constant [12 x i8] c"tmp <= 10.0\00", align 1
|
|
+@.str.1 = private unnamed_addr constant [7 x i8] c"test.c\00", align 1
|
|
+@__PRETTY_FUNCTION__.test = private unnamed_addr constant [12 x i8] c"void test()\00", align 1
|
|
+
|
|
+; Function Attrs: nounwind uwtable
|
|
+define dso_local void @test() #0 {
|
|
+entry:
|
|
+ %cs = alloca i32, align 4
|
|
+ %flush = alloca ptr, align 8
|
|
+ %i = alloca i32, align 4
|
|
+ %tmp = alloca double, align 8
|
|
+ call void @llvm.lifetime.start.p0(i64 4, ptr %cs) #5
|
|
+ store i32 16431360, ptr %cs, align 4, !tbaa !6
|
|
+ call void @llvm.lifetime.start.p0(i64 8, ptr %flush) #5
|
|
+ %0 = load i32, ptr %cs, align 4, !tbaa !6
|
|
+ %conv = sext i32 %0 to i64
|
|
+ %call = call noalias ptr @calloc(i64 noundef %conv, i64 noundef 8) #6
|
|
+ store ptr %call, ptr %flush, align 8, !tbaa !10
|
|
+ call void @llvm.lifetime.start.p0(i64 4, ptr %i) #5
|
|
+ call void @llvm.lifetime.start.p0(i64 8, ptr %tmp) #5
|
|
+ store double 0.000000e+00, ptr %tmp, align 8, !tbaa !12
|
|
+ store i32 0, ptr %i, align 4, !tbaa !6
|
|
+ br label %for.cond
|
|
+
|
|
+for.cond: ; preds = %for.inc, %entry
|
|
+ %1 = load i32, ptr %i, align 4, !tbaa !6
|
|
+ %2 = load i32, ptr %cs, align 4, !tbaa !6
|
|
+ %cmp = icmp slt i32 %1, %2
|
|
+ br i1 %cmp, label %for.body, label %for.end
|
|
+
|
|
+for.body: ; preds = %for.cond
|
|
+ %3 = load ptr, ptr %flush, align 8, !tbaa !10
|
|
+ %4 = load i32, ptr %i, align 4, !tbaa !6
|
|
+ %idxprom = sext i32 %4 to i64
|
|
+ %arrayidx = getelementptr inbounds double, ptr %3, i64 %idxprom
|
|
+ %5 = load double, ptr %arrayidx, align 8, !tbaa !12
|
|
+ %6 = load double, ptr %tmp, align 8, !tbaa !12
|
|
+ %add = fadd double %6, %5
|
|
+ store double %add, ptr %tmp, align 8, !tbaa !12
|
|
+ br label %for.inc
|
|
+
|
|
+for.inc: ; preds = %for.body
|
|
+ %7 = load i32, ptr %i, align 4, !tbaa !6
|
|
+ %inc = add nsw i32 %7, 1
|
|
+ store i32 %inc, ptr %i, align 4, !tbaa !6
|
|
+ br label %for.cond, !llvm.loop !14
|
|
+
|
|
+for.end: ; preds = %for.cond
|
|
+ %8 = load double, ptr %tmp, align 8, !tbaa !12
|
|
+ %cmp2 = fcmp ole double %8, 1.000000e+01
|
|
+ br i1 %cmp2, label %if.then, label %if.else
|
|
+
|
|
+if.then: ; preds = %for.end
|
|
+ br label %if.end
|
|
+
|
|
+if.else: ; preds = %for.end
|
|
+ call void @__assert_fail(ptr noundef @.str, ptr noundef @.str.1, i32 noundef 11, ptr noundef @__PRETTY_FUNCTION__.test) #7
|
|
+ unreachable
|
|
+
|
|
+if.end: ; preds = %if.then
|
|
+ %9 = load ptr, ptr %flush, align 8, !tbaa !10
|
|
+ call void @free(ptr noundef %9) #5
|
|
+ call void @llvm.lifetime.end.p0(i64 8, ptr %tmp) #5
|
|
+ call void @llvm.lifetime.end.p0(i64 4, ptr %i) #5
|
|
+ call void @llvm.lifetime.end.p0(i64 8, ptr %flush) #5
|
|
+ call void @llvm.lifetime.end.p0(i64 4, ptr %cs) #5
|
|
+ ret void
|
|
+}
|
|
+
|
|
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
|
|
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
|
|
+
|
|
+; Function Attrs: nounwind allocsize(0,1)
|
|
+declare noalias ptr @calloc(i64 noundef, i64 noundef) #2
|
|
+
|
|
+; Function Attrs: noreturn nounwind
|
|
+declare void @__assert_fail(ptr noundef, ptr noundef, i32 noundef, ptr noundef) #3
|
|
+
|
|
+; Function Attrs: nounwind
|
|
+declare void @free(ptr noundef) #4
|
|
+
|
|
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
|
|
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
|
|
+
|
|
+attributes #0 = { nounwind uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" }
|
|
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
|
|
+attributes #2 = { nounwind allocsize(0,1) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" }
|
|
+attributes #3 = { noreturn nounwind "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" }
|
|
+attributes #4 = { nounwind "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" }
|
|
+attributes #5 = { nounwind }
|
|
+attributes #6 = { nounwind allocsize(0,1) }
|
|
+attributes #7 = { noreturn nounwind }
|
|
+
|
|
+!llvm.module.flags = !{!0, !1, !2, !3, !4}
|
|
+!llvm.ident = !{!5}
|
|
+
|
|
+!0 = !{i32 1, !"wchar_size", i32 4}
|
|
+!1 = !{i32 8, !"PIC Level", i32 2}
|
|
+!2 = !{i32 7, !"PIE Level", i32 2}
|
|
+!3 = !{i32 7, !"uwtable", i32 2}
|
|
+!4 = !{i32 7, !"frame-pointer", i32 1}
|
|
+!5 = !{!"Huawei BiSheng Compiler clang version 18.0.0 (ssh://git@codehub-dg-y.huawei.com:2222/CompilerKernel/BiShengKernel/BiSheng.git 026024071a7fb66b26b65fb81da702cc5f0cf405)"}
|
|
+!6 = !{!7, !7, i64 0}
|
|
+!7 = !{!"int", !8, i64 0}
|
|
+!8 = !{!"omnipotent char", !9, i64 0}
|
|
+!9 = !{!"Simple C/C++ TBAA"}
|
|
+!10 = !{!11, !11, i64 0}
|
|
+!11 = !{!"any pointer", !8, i64 0}
|
|
+!12 = !{!13, !13, i64 0}
|
|
+!13 = !{!"double", !8, i64 0}
|
|
+!14 = distinct !{!14, !15}
|
|
+!15 = !{!"llvm.loop.mustprogress"}
|
|
diff --git a/llvm/test/AutoTuning/BaselineConfig/apply_baseline_config.ll b/llvm/test/AutoTuning/BaselineConfig/apply_baseline_config.ll
|
|
new file mode 100644
|
|
index 000000000000..f905208a2f3b
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/BaselineConfig/apply_baseline_config.ll
|
|
@@ -0,0 +1,11 @@
|
|
+; The purpose is to test the baseline IR is the same as the 1st iteration of
|
|
+; autotuning process with --use-baseline-config enabled.
|
|
+; RUN: rm %t.baseline %t.firstIt -f
|
|
+; RUN: opt -O3 %S/Inputs/test.ll -o %t.baseline
|
|
+; RUN: opt -O3 %S/Inputs/test.ll -o %t.firstIt_baseline \
|
|
+; RUN: -auto-tuning-input=%S/Inputs/autotune_datadir/baseline_config.yaml
|
|
+; RUN: cmp %t.firstIt_baseline %t.baseline
|
|
+
|
|
+; RUN: opt -O3 %S/Inputs/test.ll -o %t.firstIt_random \
|
|
+; RUN: -auto-tuning-input=%S/Inputs/autotune_datadir/random_config.yaml
|
|
+; RUN: not cmp %t.firstIt_random %t.baseline
|
|
diff --git a/llvm/test/AutoTuning/BaselineConfig/opp.ll b/llvm/test/AutoTuning/BaselineConfig/opp.ll
|
|
new file mode 100644
|
|
index 000000000000..b2897316fc22
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/BaselineConfig/opp.ll
|
|
@@ -0,0 +1,67 @@
|
|
+; REQUIRES: asserts
|
|
+; RUN: rm %t.callsite_opp -rf
|
|
+; RUN: opt %s -O3 -debug-only=inline -disable-output -S 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=DEFAULT
|
|
+; RUN: opt %s -O3 -auto-tuning-opp=%t.callsite_opp -disable-output -S 2>&1
|
|
+; RUN: FileCheck %s --input-file %t.callsite_opp/opp.ll.yaml -check-prefix=AUTOTUNE
|
|
+
|
|
+@a = global i32 4
|
|
+
|
|
+; Function Attrs: nounwind readnone uwtable
|
|
+define i32 @simpleFunction(i32 %a) #0 {
|
|
+entry:
|
|
+ call void @extern()
|
|
+ %a1 = load volatile i32, i32* @a
|
|
+ %x1 = add i32 %a1, %a1
|
|
+ %a2 = load volatile i32, i32* @a
|
|
+ %x2 = add i32 %x1, %a2
|
|
+ %a3 = load volatile i32, i32* @a
|
|
+ %x3 = add i32 %x2, %a3
|
|
+ %a4 = load volatile i32, i32* @a
|
|
+ %x4 = add i32 %x3, %a4
|
|
+ %a5 = load volatile i32, i32* @a
|
|
+ %x5 = add i32 %x4, %a5
|
|
+ %a6 = load volatile i32, i32* @a
|
|
+ %x6 = add i32 %x5, %a6
|
|
+ %a7 = load volatile i32, i32* @a
|
|
+ %x7 = add i32 %x6, %a6
|
|
+ %a8 = load volatile i32, i32* @a
|
|
+ %x8 = add i32 %x7, %a8
|
|
+ %a9 = load volatile i32, i32* @a
|
|
+ %x9 = add i32 %x8, %a9
|
|
+ %a10 = load volatile i32, i32* @a
|
|
+ %x10 = add i32 %x9, %a10
|
|
+ %a11 = load volatile i32, i32* @a
|
|
+ %x11 = add i32 %x10, %a11
|
|
+ %a12 = load volatile i32, i32* @a
|
|
+ %x12 = add i32 %x11, %a12
|
|
+ %add = add i32 %x12, %a
|
|
+ ret i32 %add
|
|
+}
|
|
+
|
|
+; Function Attrs: nounwind readnone uwtable
|
|
+define i32 @bar(i32 %a) #0 {
|
|
+entry:
|
|
+ %0 = tail call i32 @simpleFunction(i32 6)
|
|
+ ret i32 %0
|
|
+}
|
|
+
|
|
+declare void @extern()
|
|
+
|
|
+attributes #0 = { nounwind readnone uwtable }
|
|
+attributes #1 = { nounwind cold readnone uwtable }
|
|
+
|
|
+
|
|
+; NOTE: Need to make sure the function inling have the same behaviour as O3 and
|
|
+; 'BaselineConfig'
|
|
+; DEFAULT: Inlining calls in: bar
|
|
+; DEFAULT: Inlining (cost=115, threshold=375), Call: %0 = tail call i32 @simpleFunction(i32 6)
|
|
+
|
|
+; AUTOTUNE: Pass: inline
|
|
+; AUTOTUNE-NEXT: Name: simpleFunction
|
|
+; AUTOTUNE-NEXT: Function: bar
|
|
+; AUTOTUNE-NEXT: CodeRegionType: callsite
|
|
+; AUTOTUNE-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; AUTOTUNE-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] }
|
|
+; AUTOTUNE-NEXT: BaselineConfig: { ForceInline: '1' }
|
|
+; AUTOTUNE-NEXT: Invocation: 0
|
|
diff --git a/llvm/test/AutoTuning/CodeRegionFilter/function-filtering.ll b/llvm/test/AutoTuning/CodeRegionFilter/function-filtering.ll
|
|
new file mode 100644
|
|
index 000000000000..13acafae6fc4
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/CodeRegionFilter/function-filtering.ll
|
|
@@ -0,0 +1,62 @@
|
|
+; REQUIRES: asserts
|
|
+
|
|
+; RUN: rm -rf %t.filter
|
|
+; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \
|
|
+; RUN: -auto-tuning-opp=%t.filter -auto-tuning-type-filter=CallSite,Loop --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.filter/function-filtering.ll.yaml -check-prefix=DEFAULT
|
|
+
|
|
+; RUN: rm -rf %t.filter
|
|
+; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \
|
|
+; RUN: -auto-tuning-opp=%t.filter -auto-tuning-type-filter=CallSite,Loop \
|
|
+; RUN: -auto-tuning-function-filter=foo --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.filter/function-filtering.ll.yaml -check-prefix=FILTER_FOO
|
|
+
|
|
+; RUN: rm -rf %t.filter
|
|
+; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \
|
|
+; RUN: -auto-tuning-opp=%t.filter -auto-tuning-type-filter=CallSite,Loop \
|
|
+; RUN: -auto-tuning-function-filter=bar --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.filter/function-filtering.ll.yaml -check-prefix=FILTER_BAR
|
|
+
|
|
+; RUN: rm -rf %t.filter
|
|
+; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \
|
|
+; RUN: -auto-tuning-opp=%t.filter -auto-tuning-type-filter=CallSite,Loop \
|
|
+; RUN: -auto-tuning-function-filter=dummy -debug-only=autotuning | \
|
|
+; RUN: FileCheck %s -check-prefix=FILTER_DUMMY
|
|
+
|
|
+define void @foo(i32* nocapture %a) {
|
|
+entry:
|
|
+ br label %for.body
|
|
+
|
|
+for.body: ; preds = %for.body, %entry
|
|
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
|
+ %0 = load i32, i32* %arrayidx, align 4
|
|
+ %inc = add nsw i32 %0, 1
|
|
+ store i32 %inc, i32* %arrayidx, align 4
|
|
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
|
|
+ br i1 %exitcond, label %for.end, label %for.body
|
|
+
|
|
+for.end: ; preds = %for.body
|
|
+ ret void
|
|
+}
|
|
+
|
|
+define void @bar(i32* nocapture %a) {
|
|
+entry:
|
|
+ call void @foo(i32* %a)
|
|
+ ret void
|
|
+}
|
|
+
|
|
+; DEFAULT: --- !AutoTuning
|
|
+; DEFAULT: --- !AutoTuning
|
|
+
|
|
+; FILTER_FOO: --- !AutoTuning
|
|
+; FILTER_FOO: Function: foo
|
|
+; FILTER_FOO-NOT: --- !AutoTuning
|
|
+
|
|
+; FILTER_BAR: --- !AutoTuning
|
|
+; FILTER_BAR: Function: bar
|
|
+; FILTER_BAR-NOT: --- !AutoTuning
|
|
+
|
|
+; FILTER_DUMMY-NOT: --- !AutoTuning
|
|
+; FILTER_DUMMY-NOT: --- !AutoTuning
|
|
diff --git a/llvm/test/AutoTuning/Error/Inputs/invalid-format.yaml b/llvm/test/AutoTuning/Error/Inputs/invalid-format.yaml
|
|
new file mode 100644
|
|
index 000000000000..9c203e58f0ab
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/Error/Inputs/invalid-format.yaml
|
|
@@ -0,0 +1,3 @@
|
|
+<inpus>
|
|
+ <input>this is a xml file</input>
|
|
+</input>
|
|
diff --git a/llvm/test/AutoTuning/Error/Inputs/template.yaml b/llvm/test/AutoTuning/Error/Inputs/template.yaml
|
|
new file mode 100644
|
|
index 000000000000..1f02b52ffb38
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/Error/Inputs/template.yaml
|
|
@@ -0,0 +1,10 @@
|
|
+--- !AutoTuning
|
|
+Pass: pass
|
|
+Name: for.body
|
|
+Function: foo
|
|
+CodeRegionType: loop
|
|
+CodeRegionHash: 0
|
|
+Args:
|
|
+ - UnrollCount: 2
|
|
+ - PassOrder: [test, test2]
|
|
+...
|
|
diff --git a/llvm/test/AutoTuning/Error/file-not-found-error.ll b/llvm/test/AutoTuning/Error/file-not-found-error.ll
|
|
new file mode 100644
|
|
index 000000000000..6a364239a271
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/Error/file-not-found-error.ll
|
|
@@ -0,0 +1,29 @@
|
|
+; RUN: rm %t.non-existing.yaml -rf
|
|
+; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-input=%t.non-existing.yaml 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=ERROR
|
|
+
|
|
+; UNSUPPORTED: windows
|
|
+
|
|
+define void @foo(i32* nocapture %a) {
|
|
+entry:
|
|
+ br label %for.body
|
|
+
|
|
+for.body: ; preds = %for.body, %entry
|
|
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
|
+ %0 = load i32, i32* %arrayidx, align 4
|
|
+ %inc = add nsw i32 %0, 1
|
|
+ store i32 %inc, i32* %arrayidx, align 4
|
|
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
|
|
+ br i1 %exitcond, label %for.end, label %for.body
|
|
+
|
|
+for.end: ; preds = %for.body
|
|
+ ret void
|
|
+}
|
|
+
|
|
+; check if error massage is shown properly when input yaml is not found
|
|
+;
|
|
+; ERROR: Error parsing auto-tuning input.
|
|
+; ERROR: No such file or directory
|
|
diff --git a/llvm/test/AutoTuning/Error/invalid-yaml-error.ll b/llvm/test/AutoTuning/Error/invalid-yaml-error.ll
|
|
new file mode 100644
|
|
index 000000000000..bfc8784c4ea4
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/Error/invalid-yaml-error.ll
|
|
@@ -0,0 +1,27 @@
|
|
+; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-input=%S/Inputs/invalid-format.yaml 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=ERROR
|
|
+
|
|
+; UNSUPPORTED: windows
|
|
+
|
|
+define void @foo(i32* nocapture %a) {
|
|
+entry:
|
|
+ br label %for.body
|
|
+
|
|
+for.body: ; preds = %for.body, %entry
|
|
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
|
+ %0 = load i32, i32* %arrayidx, align 4
|
|
+ %inc = add nsw i32 %0, 1
|
|
+ store i32 %inc, i32* %arrayidx, align 4
|
|
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
|
|
+ br i1 %exitcond, label %for.end, label %for.body
|
|
+
|
|
+for.end: ; preds = %for.body
|
|
+ ret void
|
|
+}
|
|
+
|
|
+; check if error massage is shown properly when input yaml is in invalid format
|
|
+;
|
|
+; ERROR: error: YAML:1:1: error: document root is not of mapping type.
|
|
diff --git a/llvm/test/AutoTuning/Error/malformed-input-error.ll b/llvm/test/AutoTuning/Error/malformed-input-error.ll
|
|
new file mode 100644
|
|
index 000000000000..0b73c3195503
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/Error/malformed-input-error.ll
|
|
@@ -0,0 +1,136 @@
|
|
+; Check if error messages are shown properly for malformed YAML files.
|
|
+
|
|
+; Missing Pass Field
|
|
+; RUN: rm %t.missing-pass.yaml -rf
|
|
+; RUN: sed 's#Pass: pass##g' %S/Inputs/template.yaml > %t.missing-pass.yaml
|
|
+; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-input=%t.missing-pass.yaml 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=ERROR-FIELD
|
|
+
|
|
+; Missing Pass Value
|
|
+; RUN: rm %t.missing-value-pass.yaml -rf
|
|
+; RUN: sed 's#pass##g' %S/Inputs/template.yaml > %t.missing-value-pass.yaml
|
|
+; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-input=%t.missing-value-pass.yaml 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=ERROR-PASS-VALUE
|
|
+
|
|
+; Missing Name Field
|
|
+; RUN: rm %t.missing-name.yaml -rf
|
|
+; RUN: sed 's#Name: for.body##g' %S/Inputs/template.yaml > %t.missing-name.yaml
|
|
+; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-input=%t.missing-name.yaml 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=ERROR-NAME-FIELD
|
|
+
|
|
+; Missing Name Value
|
|
+; RUN: rm %t.missing-value-name.yaml -rf
|
|
+; RUN: sed 's#for.body##g' %S/Inputs/template.yaml > %t.missing-value-name.yaml
|
|
+; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-input=%t.missing-value-name.yaml 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=ERROR-NAME-VALUE
|
|
+
|
|
+; Missing Function Field
|
|
+; RUN: rm %t.missing-function.yaml -rf
|
|
+; RUN: sed 's#Function: foo##g' %S/Inputs/template.yaml > %t.missing-function.yaml
|
|
+; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' -auto-tuning-input=%t.missing-function.yaml 2>&1 | FileCheck %s -check-prefix=ERROR-FUNCTION-FIELD
|
|
+
|
|
+; Missing Function Value
|
|
+; RUN: rm %t.missing-value-func.yaml -rf
|
|
+; RUN: sed 's#foo##g' %S/Inputs/template.yaml > %t.missing-value-func.yaml
|
|
+; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-input=%t.missing-value-func.yaml 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=ERROR-FUNC-VALUE
|
|
+
|
|
+; Missing CodeRegionType Field
|
|
+; RUN: rm %t.missing-type.yaml -rf
|
|
+; RUN: sed 's#CodeRegionType: loop##g' %S/Inputs/template.yaml > %t.missing-type.yaml
|
|
+; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-input=%t.missing-type.yaml 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=ERROR-CODE-REGION-TYPE-FIELD
|
|
+
|
|
+; Missing CodeRegionType Value
|
|
+; RUN: rm %t.missing-value-type.yaml -rf
|
|
+; RUN: sed 's#loop##g' %S/Inputs/template.yaml > %t.missing-value-type.yaml
|
|
+; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-input=%t.missing-value-type.yaml 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=ERROR-CODE-REGION-TYPE-VALUE
|
|
+
|
|
+; Invalid CodeRegionType Value
|
|
+; RUN: rm %t.invalid-value-type.yaml -rf
|
|
+; RUN: sed 's#loop#error-type#g' %S/Inputs/template.yaml > %t.invalid-value-type.yaml
|
|
+; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-input=%t.invalid-value-type.yaml 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=ERROR-CODE-REGION-TYPE-INVALID
|
|
+
|
|
+; Missing Param Name
|
|
+; RUN: rm %t.missing-param-name.yaml -rf
|
|
+; RUN: sed 's#UnrollCount##g' %S/Inputs/template.yaml > %t.missing-param-name.yaml
|
|
+; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-input=%t.missing-param-name.yaml 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=ERROR-PARAM-NAME
|
|
+
|
|
+; Missing Param Value
|
|
+; RUN: rm %t.missing-value-param.yaml -rf
|
|
+; RUN: sed 's#2##g' %S/Inputs/template.yaml > %t.missing-value-param.yaml
|
|
+; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-input=%t.missing-value-param.yaml 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=ERROR-PARAM-VALUE
|
|
+
|
|
+; Empty Param List
|
|
+; RUN: rm %t.empty-value-param-list.yaml -rf
|
|
+; RUN: sed 's#\[test, test2\]#\[\]#g' %S/Inputs/template.yaml > %t.empty-value-param-list.yaml
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-input=%t.empty-value-param-list.yaml 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=VALID
|
|
+
|
|
+; UNSUPPORTED: windows
|
|
+
|
|
+define void @foo(i32* nocapture %a) {
|
|
+entry:
|
|
+ br label %for.body
|
|
+
|
|
+for.body: ; preds = %for.body, %entry
|
|
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
|
+ %0 = load i32, i32* %arrayidx, align 4
|
|
+ %inc = add nsw i32 %0, 1
|
|
+ store i32 %inc, i32* %arrayidx, align 4
|
|
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
|
|
+ br i1 %exitcond, label %for.end, label %for.body
|
|
+
|
|
+for.end: ; preds = %for.body
|
|
+ ret void
|
|
+}
|
|
+
|
|
+; check if error massage is shown properly for malformed YAML input files.
|
|
+;
|
|
+
|
|
+; ERROR-FIELD: error: CodeRegionHash, CodeRegionType, or Pass missing.
|
|
+
|
|
+; ERROR-NAME-FIELD: error: Remark Name expected; enable -autotuning-omit-metadata.
|
|
+
|
|
+; ERROR-FUNCTION-FIELD: error: Remark Function Name expected; enable -autotuning-omit-metadata.
|
|
+
|
|
+; ERROR-PASS-VALUE: error: YAML:2:1: error: expected a value of scalar type.
|
|
+; ERROR-PASS-VALUE: Pass:
|
|
+
|
|
+; ERROR-NAME-VALUE: error: YAML:3:1: error: expected a value of scalar type.
|
|
+; ERROR-NAME-VALUE: Name:
|
|
+
|
|
+; ERROR-FUNC-VALUE: error: YAML:4:1: error: expected a value of scalar type.
|
|
+; ERROR-FUNC-VALUE: Function:
|
|
+
|
|
+; ERROR-CODE-REGION-TYPE-FIELD: CodeRegionHash, CodeRegionType, or Pass missing.
|
|
+
|
|
+; ERROR-CODE-REGION-TYPE-VALUE: error: YAML:5:1: error: expected a value of scalar type.
|
|
+; ERROR-CODE-REGION-TYPE-VALUE: CodeRegionType:
|
|
+
|
|
+; ERROR-CODE-REGION-TYPE-INVALID: Unsupported CodeRegionType:error-type
|
|
+
|
|
+; ERROR-PARAM-NAME: error: YAML:8:5: error: argument key is missing.
|
|
+; ERROR-PARAM-NAME: - : 2
|
|
+
|
|
+; ERROR-PARAM-VALUE: error: YAML:8:5: error: expected a value of scalar type.
|
|
+; ERROR-PARAM-VALUE: - UnrollCount:
|
|
+
|
|
+; VALID-NOT: -auto-tuning-input=(input file) option failed.
|
|
diff --git a/llvm/test/AutoTuning/Error/output-error.ll b/llvm/test/AutoTuning/Error/output-error.ll
|
|
new file mode 100644
|
|
index 000000000000..61ffba50924b
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/Error/output-error.ll
|
|
@@ -0,0 +1,28 @@
|
|
+; RUN: rm %t.opp -rf; touch %t.opp
|
|
+; RUN: not opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-opp=%t.opp 2>&1 | FileCheck %s -check-prefix=ERROR-OPP
|
|
+
|
|
+; UNSUPPORTED: windows
|
|
+
|
|
+define void @foo(i32* nocapture %a) {
|
|
+entry:
|
|
+ br label %for.body
|
|
+
|
|
+for.body: ; preds = %for.body, %entry
|
|
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
|
+ %0 = load i32, i32* %arrayidx, align 4
|
|
+ %inc = add nsw i32 %0, 1
|
|
+ store i32 %inc, i32* %arrayidx, align 4
|
|
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
|
|
+ br i1 %exitcond, label %for.end, label %for.body
|
|
+
|
|
+for.end: ; preds = %for.body
|
|
+ ret void
|
|
+}
|
|
+
|
|
+; check if error massage is shown properly when output files cannot be created
|
|
+;
|
|
+; ERROR-OPP: Error generating auto-tuning opportunities.
|
|
+; ERROR-OPP: error: Not a directory
|
|
diff --git a/llvm/test/AutoTuning/Error/valid-input.ll b/llvm/test/AutoTuning/Error/valid-input.ll
|
|
new file mode 100644
|
|
index 000000000000..dae90cdbe408
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/Error/valid-input.ll
|
|
@@ -0,0 +1,27 @@
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-input=%S/Inputs/template.yaml 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=VALID
|
|
+; UNSUPPORTED: windows
|
|
+
|
|
+define void @foo(i32* nocapture %a) {
|
|
+entry:
|
|
+ br label %for.body
|
|
+
|
|
+for.body: ; preds = %for.body, %entry
|
|
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
|
+ %0 = load i32, i32* %arrayidx, align 4
|
|
+ %inc = add nsw i32 %0, 1
|
|
+ store i32 %inc, i32* %arrayidx, align 4
|
|
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
|
|
+ br i1 %exitcond, label %for.end, label %for.body
|
|
+
|
|
+for.end: ; preds = %for.body
|
|
+ ret void
|
|
+}
|
|
+
|
|
+; check if error massage is shown properly when the input is valid
|
|
+;
|
|
+
|
|
+; VALID-NOT: -auto-tuning-input=(input file) option failed.
|
|
diff --git a/llvm/test/AutoTuning/IncrementalCompilation/Inputs/template.yaml b/llvm/test/AutoTuning/IncrementalCompilation/Inputs/template.yaml
|
|
new file mode 100644
|
|
index 000000000000..a7d390be63e7
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/IncrementalCompilation/Inputs/template.yaml
|
|
@@ -0,0 +1,9 @@
|
|
+--- !AutoTuning
|
|
+Pass: [dummy-pass]
|
|
+CodeRegionType: [dummy-type]
|
|
+Name: foo
|
|
+DebugLoc: { File: [dummy-file], Line: 0, Column: 0 }
|
|
+Function: foo
|
|
+CodeRegionHash: 0
|
|
+Invocation: 0
|
|
+...
|
|
diff --git a/llvm/test/AutoTuning/IncrementalCompilation/inc-compile-parse-input.ll b/llvm/test/AutoTuning/IncrementalCompilation/inc-compile-parse-input.ll
|
|
new file mode 100644
|
|
index 000000000000..b9dc81089d40
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/IncrementalCompilation/inc-compile-parse-input.ll
|
|
@@ -0,0 +1,103 @@
|
|
+; REQUIRES: asserts
|
|
+; RUN: rm %t.output -rf
|
|
+; RUN: rm %t.inc_compile.yaml -rf
|
|
+; RUN: sed 's#\[dummy-pass\]#inline#g' %S/Inputs/template.yaml > %t.temp.yaml
|
|
+; RUN: sed 's#\[dummy-type\]#callsite#g' %t.temp.yaml > %t.temp2.yaml
|
|
+; RUN: sed 's#\[dummy-file\]#%s#g' %t.temp2.yaml > %t.inc_compile.yaml
|
|
+; RUN: opt -O3 %s -auto-tuning-input=%t.inc_compile.yaml \
|
|
+; RUN: -auto-tuning-compile-mode=CoarseGrain -print-after-all \
|
|
+; RUN: -debug-only=autotuning-compile \
|
|
+; RUN: -o %t.output 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=COARSEGRAIN
|
|
+
|
|
+; RUN: rm %t.output -rf
|
|
+; RUN: rm %t.inc_compile.yaml -rf
|
|
+; RUN: sed 's#\[dummy-pass\]#inline#g' %S/Inputs/template.yaml > %t.temp.yaml
|
|
+; RUN: sed 's#\[dummy-type\]#callsite#g' %t.temp.yaml > %t.temp2.yaml
|
|
+; RUN: sed 's#\[dummy-file\]#%s#g' %t.temp2.yaml > %t.inc_compile.yaml
|
|
+; RUN: opt -O3 %s -auto-tuning-input=%t.inc_compile.yaml \
|
|
+; RUN: -auto-tuning-compile-mode=FineGrain -print-after-all \
|
|
+; RUN: -debug-only=autotuning-compile \
|
|
+; RUN: -o %t.output 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefixes=FINEGRAIN-1,FINEGRAIN-INLINE
|
|
+
|
|
+; RUN: rm %t.output -rf
|
|
+; RUN: rm %t.inc_compile.yaml -rf
|
|
+; RUN: sed 's#\[dummy-pass\]#loop-unroll#g' %S/Inputs/template.yaml > %t.temp.yaml
|
|
+; RUN: sed 's#\[dummy-type\]#loop#g' %t.temp.yaml > %t.temp2.yaml
|
|
+; RUN: sed 's#\[dummy-file\]#%s#g' %t.temp2.yaml > %t.inc_compile.yaml
|
|
+; RUN: opt -O3 %s -auto-tuning-input=%t.inc_compile.yaml \
|
|
+; RUN: -auto-tuning-compile-mode=FineGrain -print-after-all \
|
|
+; RUN: -debug-only=autotuning-compile \
|
|
+; RUN: -o %t.output 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefixes=FINEGRAIN-1,FINEGRAIN-2,FINEGRAIN-UNROLL
|
|
+
|
|
+; ModuleID = 'test.c'
|
|
+source_filename = "test.c"
|
|
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|
+target triple = "aarch64-unknown-linux-gnu"
|
|
+
|
|
+; Function Attrs: argmemonly nofree norecurse nosync nounwind uwtable
|
|
+define dso_local i32 @test(i32* nocapture noundef %a, i32* nocapture noundef readonly %b, i32 noundef %size) local_unnamed_addr #0 {
|
|
+entry:
|
|
+ %cmp11 = icmp sgt i32 %size, 0
|
|
+ br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup
|
|
+
|
|
+for.body.preheader: ; preds = %entry
|
|
+ %wide.trip.count = zext i32 %size to i64
|
|
+ br label %for.body
|
|
+
|
|
+for.cond.cleanup: ; preds = %for.body, %entry
|
|
+ ret i32 undef
|
|
+
|
|
+for.body: ; preds = %for.body.preheader, %for.body
|
|
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
|
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
|
|
+ %0 = load i32, i32* %arrayidx, align 4
|
|
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
|
+ %1 = load i32, i32* %arrayidx2, align 4
|
|
+ %add = add nsw i32 %1, %0
|
|
+ store i32 %add, i32* %arrayidx2, align 4
|
|
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
|
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
|
+}
|
|
+
|
|
+attributes #0 = { argmemonly nofree norecurse nosync nounwind uwtable "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon,+v8a" }
|
|
+
|
|
+!llvm.dbg.cu = !{!0}
|
|
+!llvm.module.flags = !{!3, !4, !5, !6, !7, !8}
|
|
+!llvm.ident = !{!9}
|
|
+
|
|
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei BiSheng Compiler clang version 12.0.0 (1c7b819ced36)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None)
|
|
+!1 = !DIFile(filename: "test.c", directory: "/home/m00629332/code/autoTuner")
|
|
+!2 = !{}
|
|
+!3 = !{i32 2, !"Debug Info Version", i32 3}
|
|
+!4 = !{i32 1, !"wchar_size", i32 4}
|
|
+!5 = !{i32 1, !"branch-target-enforcement", i32 0}
|
|
+!6 = !{i32 1, !"sign-return-address", i32 0}
|
|
+!7 = !{i32 1, !"sign-return-address-all", i32 0}
|
|
+!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0}
|
|
+!9 = !{!"Huawei BiSheng Compiler clang version 12.0.0 (1c7b819ced36)"}
|
|
+!10 = distinct !DISubprogram(name: "dummy", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
|
+!11 = !DISubroutineType(types: !2)
|
|
+!12 = !DILocation(line: 2, column: 5, scope: !10)
|
|
+
|
|
+; COARSEGRAIN: AutoTuningCompile: Deciding to enable/disable optimization of module/functions. Pass: start
|
|
+; COARSEGRAIN-NEXT: AutoTuningCompile: No change in opt pipeline for Basic/CoarseGrain incremental compilation mode.
|
|
+; COARSEGRAIN-NOT: Skip pass {{.*}}: True
|
|
+
|
|
+; FINEGRAIN-1: AutoTuningCompile: Deciding to enable/disable optimization of module/functions. Pass: start
|
|
+; FINEGRAIN-1-NEXT: AutoTuningCompile: SkipPasses enabled.
|
|
+; FINEGRAIN-1-NOT: Skip pass {{.*}}: False
|
|
+; FINEGRAIN-1: AutoTuningCompile: Deciding to enable/disable optimization of module/functions. Pass: inline
|
|
+; FINEGRAIN-INLINE: AutoTuningCompile: SkipPasses disabled.
|
|
+; FINEGRAIN-INLINE: Skip pass 'InlinerPass': False
|
|
+; FINEGRAIN-INLINE-NEXT: *** IR Dump After InlinerPass
|
|
+; FINEGRAIN-INLINE-NOT: Skip pass {{.*}}: True
|
|
+
|
|
+; FINEGRAIN-2: AutoTuningCompile: Old decision (SkipPasses = True ) continued.
|
|
+; FINEGRAIN-2-NOT: Skip pass {{.*}}: False
|
|
+; FINEGRAIN-2: AutoTuningCompile: Deciding to enable/disable optimization of module/functions. Pass: loop-unroll
|
|
+; FINEGRAIN-UNROLL: AutoTuningCompile: SkipPasses disabled.
|
|
+; FINEGRAIN-UNROLL-NOT: Skip pass {{.*}}: True
|
|
diff --git a/llvm/test/AutoTuning/Inline/Inputs/template.yaml b/llvm/test/AutoTuning/Inline/Inputs/template.yaml
|
|
new file mode 100644
|
|
index 000000000000..e04612183d1f
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/Inline/Inputs/template.yaml
|
|
@@ -0,0 +1,9 @@
|
|
+--- !AutoTuning
|
|
+Pass: inline
|
|
+Name: simpleFunction-entry
|
|
+Function: bar
|
|
+CodeRegionType: callsite
|
|
+CodeRegionHash: 5550568187071847048
|
|
+Args:
|
|
+ - ForceInline: [force-inline]
|
|
+...
|
|
diff --git a/llvm/test/AutoTuning/Inline/Inputs/template_no_metadata.yaml b/llvm/test/AutoTuning/Inline/Inputs/template_no_metadata.yaml
|
|
new file mode 100644
|
|
index 000000000000..9fc88f56d6bc
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/Inline/Inputs/template_no_metadata.yaml
|
|
@@ -0,0 +1,7 @@
|
|
+--- !AutoTuning
|
|
+Pass: inline
|
|
+CodeRegionType: callsite
|
|
+CodeRegionHash: 5550568187071847048
|
|
+Args:
|
|
+ - ForceInline: [force-inline]
|
|
+...
|
|
diff --git a/llvm/test/AutoTuning/Inline/duplicate-calls.ll b/llvm/test/AutoTuning/Inline/duplicate-calls.ll
|
|
new file mode 100644
|
|
index 000000000000..ad32262ad044
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/Inline/duplicate-calls.ll
|
|
@@ -0,0 +1,96 @@
|
|
+; RUN: rm %t.duplicate_calls -rf
|
|
+; RUN: opt %s -S -passes='cgscc(inline)' -auto-tuning-opp=%t.duplicate_calls \
|
|
+; RUN: -auto-tuning-type-filter=CallSite --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.duplicate_calls/duplicate-calls.ll.yaml
|
|
+
|
|
+; ModuleID = 'duplicate-calls.c'
|
|
+source_filename = "duplicate-calls.c"
|
|
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|
+target triple = "aarch64-unknown-linux-gnu"
|
|
+
|
|
+; Function Attrs: nounwind uwtable
|
|
+define dso_local void @bar(i32* nocapture %result, i32* %cfb, i32 %bytes) local_unnamed_addr #0 !dbg !10 {
|
|
+entry:
|
|
+ %call = tail call i32 @test(i32* %cfb, i32 %bytes) #1, !dbg !12
|
|
+ store i32 %call, i32* %result, align 4, !dbg !13, !tbaa !14
|
|
+ ret void, !dbg !18
|
|
+}
|
|
+
|
|
+declare dso_local i32 @test(i32*, i32) local_unnamed_addr #0
|
|
+
|
|
+; Function Attrs: nounwind uwtable
|
|
+define dso_local void @foo(i32* %cfb, i32* readnone %saved, i32* nocapture %result, i32 %bytes) local_unnamed_addr #0 !dbg !19 {
|
|
+entry:
|
|
+ %tobool.not = icmp eq i32* %cfb, null, !dbg !20
|
|
+ br i1 %tobool.not, label %if.else, label %if.then.split, !dbg !20
|
|
+
|
|
+if.then.split: ; preds = %entry
|
|
+ tail call void @bar(i32* %result, i32* nonnull %cfb, i32 %bytes), !dbg !21
|
|
+ br label %return, !dbg !22
|
|
+
|
|
+if.else: ; preds = %entry
|
|
+ %tobool1.not = icmp eq i32* %saved, null, !dbg !23
|
|
+ br i1 %tobool1.not, label %if.else.split, label %return, !dbg !23
|
|
+
|
|
+if.else.split: ; preds = %if.else
|
|
+ tail call void @bar(i32* %result, i32* null, i32 %bytes), !dbg !21
|
|
+ br label %return, !dbg !23
|
|
+
|
|
+return: ; preds = %if.then.split, %if.else.split, %if.else
|
|
+ ret void, !dbg !24
|
|
+}
|
|
+
|
|
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
+attributes #1 = { nounwind }
|
|
+
|
|
+!llvm.dbg.cu = !{!0}
|
|
+!llvm.module.flags = !{!3, !4, !5, !6, !7, !8}
|
|
+!llvm.ident = !{!9}
|
|
+
|
|
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei BiSheng Compiler clang version 12.0.0 (clang-0d5d71fe6c22 flang-8b17fc131076)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None)
|
|
+!1 = !DIFile(filename: "duplicate-calls.c", directory: "/home/m00629332/benchmarks/cBench/source/security_pgp_d/src")
|
|
+!2 = !{}
|
|
+!3 = !{i32 2, !"Debug Info Version", i32 3}
|
|
+!4 = !{i32 1, !"wchar_size", i32 4}
|
|
+!5 = !{i32 1, !"branch-target-enforcement", i32 0}
|
|
+!6 = !{i32 1, !"sign-return-address", i32 0}
|
|
+!7 = !{i32 1, !"sign-return-address-all", i32 0}
|
|
+!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0}
|
|
+!9 = !{!"Huawei BiSheng Compiler clang version 12.0.0 (clang-0d5d71fe6c22 flang-8b17fc131076)"}
|
|
+!10 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 7, type: !11, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
|
+!11 = !DISubroutineType(types: !2)
|
|
+!12 = !DILocation(line: 10, column: 16, scope: !10)
|
|
+!13 = !DILocation(line: 10, column: 14, scope: !10)
|
|
+!14 = !{!15, !15, i64 0}
|
|
+!15 = !{!"int", !16, i64 0}
|
|
+!16 = !{!"omnipotent char", !17, i64 0}
|
|
+!17 = !{!"Simple C/C++ TBAA"}
|
|
+!18 = !DILocation(line: 14, column: 1, scope: !10)
|
|
+!19 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 17, type: !11, scopeLine: 18, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
|
+!20 = !DILocation(line: 22, column: 6, scope: !19)
|
|
+!21 = !DILocation(line: 27, column: 2, scope: !19)
|
|
+!22 = !DILocation(line: 23, column: 3, scope: !19)
|
|
+!23 = !DILocation(line: 24, column: 11, scope: !19)
|
|
+!24 = !DILocation(line: 28, column: 1, scope: !19)
|
|
+
|
|
+; CHECK: --- !AutoTuning
|
|
+; CHECK-NEXT: Pass: inline
|
|
+; CHECK-NEXT: Name: bar-if.then.split
|
|
+; CHECK-NEXT: DebugLoc: { File: duplicate-calls.c, Line: 27, Column: 2 }
|
|
+; CHECK-NEXT: Function: foo
|
|
+; CHECK-NEXT: CodeRegionType: callsite
|
|
+; CHECK-NEXT: CodeRegionHash:
|
|
+; CHECK-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] }
|
|
+; CHECK-NEXT: BaselineConfig: { ForceInline: '1' }
|
|
+; CHECK-NEXT: Invocation: 0
|
|
+; CHECK-NEXT: ...
|
|
+; CHECK-NEXT: --- !AutoTuning
|
|
+; CHECK-NEXT: Pass: inline
|
|
+; CHECK-NEXT: Name: bar-if.else.split
|
|
+; CHECK-NEXT: DebugLoc: { File: duplicate-calls.c, Line: 27, Column: 2 }
|
|
+; CHECK-NEXT: Function: foo
|
|
+; CHECK-NEXT: CodeRegionType: callsite
|
|
+; CHECK-NEXT: CodeRegionHash:
|
|
+; CHECK-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] }
|
|
+; CHECK-NEXT: BaselineConfig: { ForceInline: '1' }
|
|
+; CHECK-NEXT: Invocation: 0
|
|
diff --git a/llvm/test/AutoTuning/Inline/force-inline.ll b/llvm/test/AutoTuning/Inline/force-inline.ll
|
|
new file mode 100644
|
|
index 000000000000..cedfc8df3483
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/Inline/force-inline.ll
|
|
@@ -0,0 +1,84 @@
|
|
+; REQUIRES: asserts
|
|
+; RUN: opt < %s -passes=inline -debug-only=inline -disable-output -S 2>&1 | FileCheck %s -check-prefix=DEFAULT
|
|
+; simpleFunction will be inlined with the default behavior.
|
|
+
|
|
+; RUN: rm %t.force-inline.yaml -rf
|
|
+; RUN: sed 's#\[force-inline\]#true#g' %S/Inputs/template.yaml > %t.force-inline.yaml
|
|
+; RUN: opt %s -passes=inline -debug-only=inline -disable-output -S \
|
|
+; RUN: -auto-tuning-input=%t.force-inline.yaml 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=FORCE-INLINE
|
|
+; Test with ForceInline=true;
|
|
+
|
|
+; RUN: rm %t.force-inline.yaml -rf
|
|
+; RUN: sed 's#\[force-inline\]#true#g' %S/Inputs/template_no_metadata.yaml > %t.force-inline.yaml
|
|
+; RUN: opt %s -passes=inline -S -auto-tuning-input=%t.force-inline.yaml \
|
|
+; RUN: -debug-only=inline -disable-output -auto-tuning-omit-metadata 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=FORCE-INLINE
|
|
+; Test with ForceInline=true;
|
|
+
|
|
+; RUN: rm %t.no-inline.yaml -rf
|
|
+; RUN: sed 's#\[force-inline\]#false#g' %S/Inputs/template.yaml > %t.no-inline.yaml
|
|
+; RUN: opt %s -passes=inline -debug-only=inline -disable-output -S \
|
|
+; RUN: -auto-tuning-input=%t.no-inline.yaml 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=NO-INLINE
|
|
+; Test with ForceInline=false;
|
|
+
|
|
+; RUN: rm %t.no-inline.yaml -rf
|
|
+; RUN: sed 's#\[force-inline\]#false#g' %S/Inputs/template_no_metadata.yaml > %t.no-inline.yaml
|
|
+; RUN: opt %s -passes='cgscc(inline)' -debug-only=inline -disable-output -S \
|
|
+; RUN: -auto-tuning-input=%t.no-inline.yaml -auto-tuning-omit-metadata 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=NO-INLINE
|
|
+; Test with ForceInline=false;
|
|
+
|
|
+@a = global i32 4
|
|
+
|
|
+; Function Attrs: nounwind readnone uwtable
|
|
+define i32 @simpleFunction(i32 %a) #0 {
|
|
+entry:
|
|
+ call void @extern()
|
|
+ %a1 = load volatile i32, i32* @a
|
|
+ %x1 = add i32 %a1, %a1
|
|
+ %a2 = load volatile i32, i32* @a
|
|
+ %x2 = add i32 %x1, %a2
|
|
+ %a3 = load volatile i32, i32* @a
|
|
+ %x3 = add i32 %x2, %a3
|
|
+ %a4 = load volatile i32, i32* @a
|
|
+ %x4 = add i32 %x3, %a4
|
|
+ %a5 = load volatile i32, i32* @a
|
|
+ %x5 = add i32 %x4, %a5
|
|
+ %a6 = load volatile i32, i32* @a
|
|
+ %x6 = add i32 %x5, %a6
|
|
+ %a7 = load volatile i32, i32* @a
|
|
+ %x7 = add i32 %x6, %a6
|
|
+ %a8 = load volatile i32, i32* @a
|
|
+ %x8 = add i32 %x7, %a8
|
|
+ %a9 = load volatile i32, i32* @a
|
|
+ %x9 = add i32 %x8, %a9
|
|
+ %a10 = load volatile i32, i32* @a
|
|
+ %x10 = add i32 %x9, %a10
|
|
+ %a11 = load volatile i32, i32* @a
|
|
+ %x11 = add i32 %x10, %a11
|
|
+ %a12 = load volatile i32, i32* @a
|
|
+ %x12 = add i32 %x11, %a12
|
|
+ %add = add i32 %x12, %a
|
|
+ ret i32 %add
|
|
+}
|
|
+
|
|
+; Function Attrs: nounwind readnone uwtable
|
|
+define i32 @bar(i32 %a) #0 {
|
|
+entry:
|
|
+ %0 = tail call i32 @simpleFunction(i32 6)
|
|
+ ret i32 %0
|
|
+}
|
|
+
|
|
+declare void @extern()
|
|
+
|
|
+attributes #0 = { nounwind readnone uwtable }
|
|
+attributes #1 = { nounwind cold readnone uwtable }
|
|
+
|
|
+; DEFAULT: Inlining (cost=120, threshold=337)
|
|
+; DEFAULT-SAME: simpleFunction
|
|
+; FORCE-INLINE: Inlining (cost=always): Force inlined by auto-tuning
|
|
+; FORCE-INLINE-SAME: simpleFunction
|
|
+; NO-INLINE: NOT Inlining (cost=never): Force non-inlined by auto-tuning
|
|
+; NO-INLINE-SAME: simpleFunction
|
|
diff --git a/llvm/test/AutoTuning/Inline/inline-attribute.ll b/llvm/test/AutoTuning/Inline/inline-attribute.ll
|
|
new file mode 100644
|
|
index 000000000000..50f583d0a51e
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/Inline/inline-attribute.ll
|
|
@@ -0,0 +1,85 @@
|
|
+; RUN: rm %t.inline_opp -rf
|
|
+; RUN: opt %s -S -passes='cgscc(inline)' -auto-tuning-opp=%t.inline_opp -auto-tuning-type-filter=CallSite --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.inline_opp/inline-attribute.ll.yaml -check-prefix=TEST-1
|
|
+; RUN: FileCheck %s --input-file %t.inline_opp/inline-attribute.ll.yaml -check-prefix=TEST-2
|
|
+
|
|
+; ModuleID = 'inline.c'
|
|
+source_filename = "inline.c"
|
|
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|
+target triple = "aarch64-unknown-linux-gnu"
|
|
+
|
|
+; Function Attrs: noinline norecurse nounwind readnone uwtable willreturn
|
|
+define dso_local i32 @mul(i32 %a) local_unnamed_addr #0 !dbg !10 {
|
|
+entry:
|
|
+ %mul = mul nsw i32 %a, %a, !dbg !12
|
|
+ ret i32 %mul, !dbg !13
|
|
+}
|
|
+
|
|
+; Function Attrs: alwaysinline nounwind uwtable
|
|
+define dso_local i32 @add(i32 %a) local_unnamed_addr #1 !dbg !14 {
|
|
+entry:
|
|
+ %add = shl nsw i32 %a, 1, !dbg !15
|
|
+ ret i32 %add, !dbg !16
|
|
+}
|
|
+
|
|
+; Function Attrs: nounwind uwtable
|
|
+define dso_local i32 @inc(i32 %a) local_unnamed_addr #2 !dbg !17 {
|
|
+entry:
|
|
+ %inc = add nsw i32 %a, 1, !dbg !18
|
|
+ ret i32 %inc, !dbg !19
|
|
+}
|
|
+
|
|
+; Function Attrs: nounwind uwtable
|
|
+define dso_local i32 @func(i32 %a) local_unnamed_addr #2 !dbg !20 {
|
|
+entry:
|
|
+ %call = call i32 @add(i32 %a), !dbg !21
|
|
+ %call1 = call i32 @mul(i32 %a), !dbg !22
|
|
+ %add = add nsw i32 %call, %call1, !dbg !23
|
|
+ %call2 = call i32 @inc(i32 %a), !dbg !24
|
|
+ %add3 = add nsw i32 %add, %call2, !dbg !25
|
|
+ ret i32 %add3, !dbg !26
|
|
+}
|
|
+
|
|
+attributes #0 = { noinline norecurse nounwind readnone uwtable willreturn "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
+attributes #1 = { alwaysinline nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
+attributes #2 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
+
|
|
+!llvm.dbg.cu = !{!0}
|
|
+!llvm.module.flags = !{!3, !4, !5, !6, !7, !8}
|
|
+!llvm.ident = !{!9}
|
|
+
|
|
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei Bisheng Compiler clang version 12.0.0 (729941c4adfa)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None)
|
|
+!1 = !DIFile(filename: "test.c", directory: "/home/m00629332/code/autoTuner/ir-hashing")
|
|
+!2 = !{}
|
|
+!3 = !{i32 2, !"Debug Info Version", i32 3}
|
|
+!4 = !{i32 1, !"wchar_size", i32 4}
|
|
+!5 = !{i32 1, !"branch-target-enforcement", i32 0}
|
|
+!6 = !{i32 1, !"sign-return-address", i32 0}
|
|
+!7 = !{i32 1, !"sign-return-address-all", i32 0}
|
|
+!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0}
|
|
+!9 = !{!"Huawei Bisheng Compiler clang version 12.0.0 (729941c4adfa)"}
|
|
+!10 = distinct !DISubprogram(name: "mul", scope: !1, file: !1, line: 2, type: !11, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
|
+!11 = !DISubroutineType(types: !2)
|
|
+!12 = !DILocation(line: 3, column: 13, scope: !10)
|
|
+!13 = !DILocation(line: 3, column: 5, scope: !10)
|
|
+!14 = distinct !DISubprogram(name: "add", scope: !1, file: !1, line: 7, type: !11, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
|
+!15 = !DILocation(line: 8, column: 13, scope: !14)
|
|
+!16 = !DILocation(line: 8, column: 5, scope: !14)
|
|
+!17 = distinct !DISubprogram(name: "inc", scope: !1, file: !1, line: 11, type: !11, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
|
+!18 = !DILocation(line: 12, column: 12, scope: !17)
|
|
+!19 = !DILocation(line: 12, column: 5, scope: !17)
|
|
+!20 = distinct !DISubprogram(name: "func", scope: !1, file: !1, line: 15, type: !11, scopeLine: 15, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
|
+!21 = !DILocation(line: 16, column: 12, scope: !20)
|
|
+!22 = !DILocation(line: 16, column: 19, scope: !20)
|
|
+!23 = !DILocation(line: 16, column: 18, scope: !20)
|
|
+!24 = !DILocation(line: 16, column: 26, scope: !20)
|
|
+!25 = !DILocation(line: 16, column: 25, scope: !20)
|
|
+!26 = !DILocation(line: 16, column: 5, scope: !20)
|
|
+
|
|
+; TEST-1: Pass: inline
|
|
+; TEST-1-NOT: Pass: inline
|
|
+
|
|
+; TEST-2: Name: inc
|
|
+; TEST-2-NEXT: DebugLoc: { File: test.c, Line: 16, Column: 26 }
|
|
+; TEST-2-NEXT: Function: func
|
|
+; TEST-2-NEXT: CodeRegionType: callsite
|
|
diff --git a/llvm/test/AutoTuning/Inline/opp.ll b/llvm/test/AutoTuning/Inline/opp.ll
|
|
new file mode 100644
|
|
index 000000000000..dfe1dac29476
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/Inline/opp.ll
|
|
@@ -0,0 +1,64 @@
|
|
+; RUN: rm %t.callsite_opp -rf
|
|
+; RUN: sed 's#\[number\]#25#g; s#\[func_name\]#ColdFunction#g' %S/Inputs/template.yaml > %t.template25.yaml
|
|
+; RUN: opt %s -passes=inline -S -auto-tuning-opp=%t.callsite_opp -auto-tuning-type-filter=CallSite
|
|
+
|
|
+; RUN: FileCheck %s --input-file %t.callsite_opp/opp.ll.yaml -check-prefix=CALLSITE
|
|
+
|
|
+@a = global i32 4
|
|
+
|
|
+declare void @extern()
|
|
+; Function Attrs: nounwind readnone uwtable
|
|
+define i32 @simpleFunction(i32 %a) #1 {
|
|
+entry:
|
|
+ call void @extern()
|
|
+ %a1 = load volatile i32, i32* @a
|
|
+ %x1 = add i32 %a1, %a1
|
|
+ %a2 = load volatile i32, i32* @a
|
|
+ %x2 = add i32 %x1, %a2
|
|
+ %a3 = load volatile i32, i32* @a
|
|
+ %x3 = add i32 %x2, %a3
|
|
+ %a4 = load volatile i32, i32* @a
|
|
+ %x4 = add i32 %x3, %a4
|
|
+ %a5 = load volatile i32, i32* @a
|
|
+ %x5 = add i32 %x4, %a5
|
|
+ %a6 = load volatile i32, i32* @a
|
|
+ %x6 = add i32 %x5, %a6
|
|
+ %a7 = load volatile i32, i32* @a
|
|
+ %x7 = add i32 %x6, %a6
|
|
+ %a8 = load volatile i32, i32* @a
|
|
+ %x8 = add i32 %x7, %a8
|
|
+ %a9 = load volatile i32, i32* @a
|
|
+ %x9 = add i32 %x8, %a9
|
|
+ %a10 = load volatile i32, i32* @a
|
|
+ %x10 = add i32 %x9, %a10
|
|
+ %a11 = load volatile i32, i32* @a
|
|
+ %x11 = add i32 %x10, %a11
|
|
+ %a12 = load volatile i32, i32* @a
|
|
+ %x12 = add i32 %x11, %a12
|
|
+ %add = add i32 %x12, %a
|
|
+ ret i32 %add
|
|
+}
|
|
+
|
|
+define i32 @bar(i32 %a) #0 {
|
|
+entry:
|
|
+ %0 = tail call i32 @simpleFunction(i32 6)
|
|
+ ret i32 %0
|
|
+}
|
|
+
|
|
+attributes #0 = { nounwind readnone uwtable }
|
|
+attributes #1 = { nounwind cold readnone uwtable }
|
|
+
|
|
+; Check if code regions are properly generated as tuning opportunities.
|
|
+; CALLSITE: --- !AutoTuning
|
|
+; CALLSITE-NEXT: Pass: inline
|
|
+; CALLSITE-NEXT: Name: simpleFunction
|
|
+; CALLSITE-NEXT: Function: bar
|
|
+; CALLSITE-NEXT: CodeRegionType: callsite
|
|
+; CALLSITE-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; CALLSITE-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] }
|
|
+; CALLSITE-NEXT: BaselineConfig: { ForceInline: '1' }
|
|
+; CALLSITE-NEXT: Invocation: 0
|
|
+; CALLSITE-NEXT: ...
|
|
+
|
|
+; Check if external functions are filtered out.
|
|
+; EXTERNAL-NOT: Name: extern
|
|
diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/debug_loc_template.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/debug_loc_template.yaml
|
|
new file mode 100644
|
|
index 000000000000..6dc49a1f7dc2
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/debug_loc_template.yaml
|
|
@@ -0,0 +1,10 @@
|
|
+--- !AutoTuning
|
|
+Pass: loop-unroll
|
|
+Name: for.cond
|
|
+DebugLoc: { File: loop-opp.c, Line: 4, Column: 5 }
|
|
+Function: foo
|
|
+CodeRegionType: loop
|
|
+Args:
|
|
+ - UnrollCount: [number]
|
|
+Invocation: 0
|
|
+...
|
|
diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_nest.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_nest.yaml
|
|
new file mode 100644
|
|
index 000000000000..4920329dbd4b
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_nest.yaml
|
|
@@ -0,0 +1,10 @@
|
|
+# CodeRegionHash is correct for only first code region only.
|
|
+!AutoTuning {Args: [{UnrollCount: 2}], CodeRegionHash: 8456922293277663707, CodeRegionType: loop,
|
|
+ DebugLoc: {Column: 8, File: loop-nest.c, Line: 10}, Function: loop_nest, Invocation: 0,
|
|
+ Name: for.body6.us, Pass: loop-unroll}
|
|
+--- !AutoTuning {Args: [{UnrollCount: 4}], CodeRegionHash: 8456922293277663707, CodeRegionType: loop,
|
|
+ DebugLoc: {Column: 5, File: loop-nest.c, Line: 9}, Function: loop_nest, Invocation: 0,
|
|
+ Name: for.cond4.preheader.us, Pass: loop-unroll}
|
|
+--- !AutoTuning {Args: [{UnrollCount: 4}], CodeRegionHash: 8456922293277663707, CodeRegionType: loop,
|
|
+ DebugLoc: {Column: 3, File: loop-nest.c, Line: 8}, Function: loop_nest, Invocation: 0,
|
|
+ Name: for.cond1.preheader, Pass: loop-unroll}
|
|
diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_peel.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_peel.yaml
|
|
new file mode 100644
|
|
index 000000000000..a90cebbce88f
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/loop_peel.yaml
|
|
@@ -0,0 +1,9 @@
|
|
+--- !AutoTuning
|
|
+Pass: loop-unroll
|
|
+Name: loop
|
|
+Function: invariant_backedge_1
|
|
+CodeRegionType: loop
|
|
+Args:
|
|
+ - UnrollCount: [number]
|
|
+Invocation: 0
|
|
+...
|
|
diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_raw_template.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_raw_template.yaml
|
|
new file mode 100644
|
|
index 000000000000..18681a0e2efe
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_raw_template.yaml
|
|
@@ -0,0 +1,10 @@
|
|
+--- !AutoTuning
|
|
+Pass: loop-unroll
|
|
+Name: label %5
|
|
+Function: main
|
|
+CodeRegionType: loop
|
|
+CodeRegionHash: [hash]
|
|
+Args:
|
|
+- UnrollCount: [number]
|
|
+Invocation: 1
|
|
+...
|
|
diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template.yaml
|
|
new file mode 100644
|
|
index 000000000000..166f877a232e
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template.yaml
|
|
@@ -0,0 +1,10 @@
|
|
+--- !AutoTuning
|
|
+Pass: loop-unroll
|
|
+Name: [name]
|
|
+Function: foo
|
|
+CodeRegionType: loop
|
|
+CodeRegionHash: [hash]
|
|
+Args:
|
|
+ - UnrollCount: [number]
|
|
+Invocation: 1
|
|
+...
|
|
diff --git a/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template_no_metadata.yaml b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template_no_metadata.yaml
|
|
new file mode 100644
|
|
index 000000000000..b626473cf782
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/LoopUnroll/Inputs/unroll_template_no_metadata.yaml
|
|
@@ -0,0 +1,8 @@
|
|
+--- !AutoTuning
|
|
+Pass: loop-unroll
|
|
+CodeRegionType: loop
|
|
+CodeRegionHash: [hash]
|
|
+Args:
|
|
+ - UnrollCount: [number]
|
|
+Invocation: 1
|
|
+...
|
|
diff --git a/llvm/test/AutoTuning/LoopUnroll/debug_loc.ll b/llvm/test/AutoTuning/LoopUnroll/debug_loc.ll
|
|
new file mode 100644
|
|
index 000000000000..85dd690d01c5
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/LoopUnroll/debug_loc.ll
|
|
@@ -0,0 +1,161 @@
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' | \
|
|
+; RUN: FileCheck %s -check-prefix=DISABLE
|
|
+
|
|
+; RUN: rm %t.unroll_debug_loc0.yaml -rf
|
|
+; RUN: sed 's#\[number\]#0#g' %S/Inputs/debug_loc_template.yaml > %t.unroll_debug_loc0.yaml
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-input=%t.unroll_debug_loc0.yaml | \
|
|
+; RUN: FileCheck %s -check-prefix=UNROLL0
|
|
+
|
|
+; RUN: rm %t.unroll_debug_loc4.yaml -rf
|
|
+; RUN: sed 's#\[number\]#4#g' %S/Inputs/debug_loc_template.yaml > %t.unroll_debug_loc4.yaml
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-code-region-matching-hash=false \
|
|
+; RUN: -auto-tuning-input=%t.unroll_debug_loc4.yaml | \
|
|
+; RUN: FileCheck %s -check-prefix=UNROLL4
|
|
+
|
|
+; RUN: rm %t.unroll4.yaml -rf
|
|
+; RUN: sed 's#\[number\]#4#g; s#\[name\]#for.cond#g; s#\[hash\]#11552168367013316892#g;'\
|
|
+; RUN: %S/Inputs/unroll_template.yaml > %t.unroll4.yaml
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-code-region-matching-hash=false \
|
|
+; RUN: -auto-tuning-input=%t.unroll4.yaml | \
|
|
+; RUN: FileCheck %s -check-prefix=UNROLL4-MISMATCH
|
|
+
|
|
+; UNSUPPORTED: windows
|
|
+
|
|
+; ModuleID = 'loop-opp.c'
|
|
+source_filename = "loop-opp.c"
|
|
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
+target triple = "x86_64-unknown-linux-gnu"
|
|
+
|
|
+; Function Attrs: noinline nounwind uwtable
|
|
+define i32 @foo(i32* %n) #0 !dbg !6 {
|
|
+entry:
|
|
+ %n.addr = alloca i32*, align 8
|
|
+ %b = alloca i32, align 4
|
|
+ %i = alloca i32, align 4
|
|
+ store i32* %n, i32** %n.addr, align 8
|
|
+ call void @llvm.dbg.declare(metadata i32** %n.addr, metadata !11, metadata !12), !dbg !13
|
|
+ call void @llvm.dbg.declare(metadata i32* %b, metadata !14, metadata !12), !dbg !15
|
|
+ store i32 0, i32* %b, align 4, !dbg !15
|
|
+ call void @llvm.dbg.declare(metadata i32* %i, metadata !16, metadata !12), !dbg !18
|
|
+ store i32 0, i32* %i, align 4, !dbg !18
|
|
+ br label %for.cond, !dbg !19
|
|
+
|
|
+for.cond: ; preds = %for.inc, %entry
|
|
+ %0 = load i32, i32* %i, align 4, !dbg !20
|
|
+ %1 = load i32*, i32** %n.addr, align 8, !dbg !23
|
|
+ %2 = load i32, i32* %1, align 4, !dbg !24
|
|
+ %cmp = icmp slt i32 %0, %2, !dbg !25
|
|
+ br i1 %cmp, label %for.body, label %for.end, !dbg !26
|
|
+
|
|
+for.body: ; preds = %for.cond
|
|
+ %3 = load i32, i32* %b, align 4, !dbg !28
|
|
+ %add = add nsw i32 %3, 1, !dbg !30
|
|
+ store i32 %add, i32* %b, align 4, !dbg !31
|
|
+ br label %for.inc, !dbg !32
|
|
+
|
|
+for.inc: ; preds = %for.body
|
|
+ %4 = load i32, i32* %i, align 4, !dbg !33
|
|
+ %inc = add nsw i32 %4, 1, !dbg !33
|
|
+ store i32 %inc, i32* %i, align 4, !dbg !33
|
|
+ br label %for.cond, !dbg !35, !llvm.loop !36
|
|
+
|
|
+for.end: ; preds = %for.cond
|
|
+ %5 = load i32, i32* %b, align 4, !dbg !39
|
|
+ ret i32 %5, !dbg !40
|
|
+}
|
|
+
|
|
+; Function Attrs: nounwind readnone
|
|
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
|
|
+
|
|
+attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
+attributes #1 = { nounwind readnone }
|
|
+
|
|
+!llvm.dbg.cu = !{!0}
|
|
+!llvm.module.flags = !{!3, !4}
|
|
+!llvm.ident = !{!5}
|
|
+
|
|
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "" ,isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
|
|
+!1 = !DIFile(filename: "loop-opp.c", directory: "")
|
|
+!2 = !{}
|
|
+!3 = !{i32 2, !"Dwarf Version", i32 4}
|
|
+!4 = !{i32 2, !"Debug Info Version", i32 3}
|
|
+!5 = !{!""}
|
|
+!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
|
|
+!7 = !DISubroutineType(types: !8)
|
|
+!8 = !{!9, !10}
|
|
+!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
|
|
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64)
|
|
+!11 = !DILocalVariable(name: "n", arg: 1, scope: !6, file: !1, line: 1, type: !10)
|
|
+!12 = !DIExpression()
|
|
+!13 = !DILocation(line: 1, column: 20, scope: !6)
|
|
+!14 = !DILocalVariable(name: "b", scope: !6, file: !1, line: 3, type: !9)
|
|
+!15 = !DILocation(line: 3, column: 9, scope: !6)
|
|
+!16 = !DILocalVariable(name: "i", scope: !17, file: !1, line: 4, type: !9)
|
|
+!17 = distinct !DILexicalBlock(scope: !6, file: !1, line: 4, column: 5)
|
|
+!18 = !DILocation(line: 4, column: 14, scope: !17)
|
|
+!19 = !DILocation(line: 4, column: 10, scope: !17)
|
|
+!20 = !DILocation(line: 4, column: 20, scope: !21)
|
|
+!21 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 1)
|
|
+!22 = distinct !DILexicalBlock(scope: !17, file: !1, line: 4, column: 5)
|
|
+!23 = !DILocation(line: 4, column: 25, scope: !21)
|
|
+!24 = !DILocation(line: 4, column: 24, scope: !21)
|
|
+!25 = !DILocation(line: 4, column: 22, scope: !21)
|
|
+!26 = !DILocation(line: 4, column: 5, scope: !27)
|
|
+!27 = !DILexicalBlockFile(scope: !17, file: !1, discriminator: 1)
|
|
+!28 = !DILocation(line: 6, column: 11, scope: !29)
|
|
+!29 = distinct !DILexicalBlock(scope: !22, file: !1, line: 5, column: 5)
|
|
+!30 = !DILocation(line: 6, column: 12, scope: !29)
|
|
+!31 = !DILocation(line: 6, column: 9, scope: !29)
|
|
+!32 = !DILocation(line: 7, column: 5, scope: !29)
|
|
+!33 = !DILocation(line: 4, column: 28, scope: !34)
|
|
+!34 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 2)
|
|
+!35 = !DILocation(line: 4, column: 5, scope: !34)
|
|
+!36 = distinct !{!36, !37, !38}
|
|
+!37 = !DILocation(line: 4, column: 5, scope: !17)
|
|
+!38 = !DILocation(line: 7, column: 5, scope: !17)
|
|
+!39 = !DILocation(line: 8, column: 12, scope: !6)
|
|
+!40 = !DILocation(line: 8, column: 5, scope: !6)
|
|
+
|
|
+; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled when the auto-tuning feature is disabled when
|
|
+; the input remark contains DebugLoc info.
|
|
+;
|
|
+; DISABLE-LABEL: @foo(
|
|
+; DISABLE: for.cond
|
|
+; DISABLE: for.body
|
|
+; DISABLE-NOT: for.body.1
|
|
+; DISABLE: for.inc
|
|
+; DISABLE-NOT: llvm.loop.unroll.disable
|
|
+
|
|
+; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled
|
|
+; when unroll count explicitly set to be 0.
|
|
+;
|
|
+; UNROLL0-LABEL: @foo(
|
|
+; UNROLL0: for.cond
|
|
+; UNROLL0: for.body
|
|
+; UNROLL0-NOT: for.body.1
|
|
+; UNROLL0: for.inc
|
|
+; UNROLL0-NOT: llvm.loop.unroll.disable
|
|
+
|
|
+; Auto-tuning-enabled loop unrolling - check that we can unroll the loop by 4
|
|
+; when explicitly requested.
|
|
+;
|
|
+; UNROLL4-LABEL: @foo(
|
|
+; UNROLL4: for.cond
|
|
+; UNROLL4: for.body
|
|
+; UNROLL4: for.body.1
|
|
+; UNROLL4: for.body.2
|
|
+; UNROLL4: for.body.3
|
|
+; UNROLL4: llvm.loop.unroll.disable
|
|
+
|
|
+; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled
|
|
+; when DebugLoc is missing in the input remark.
|
|
+;
|
|
+; UNROLL4-MISMATCH-LABEL: @foo(
|
|
+; UNROLL4-MISMATCH: for.cond
|
|
+; UNROLL4-MISMATCH: for.body
|
|
+; UNROLL4-MISMATCH-NOT: for.body.1
|
|
+; UNROLL4-MISMATCH: for.inc
|
|
+; UNROLL4-MISMATCH-NOT: llvm.loop.unroll.disable
|
|
diff --git a/llvm/test/AutoTuning/LoopUnroll/dynamic_config.ll b/llvm/test/AutoTuning/LoopUnroll/dynamic_config.ll
|
|
new file mode 100644
|
|
index 000000000000..414c6ff2d1b0
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/LoopUnroll/dynamic_config.ll
|
|
@@ -0,0 +1,56 @@
|
|
+; RUN: rm %t.default_opp -rf
|
|
+; RUN: opt %s -S -auto-tuning-opp=%t.default_opp -auto-tuning-type-filter=Loop \
|
|
+; RUN: -passes='require<opt-remark-emit>,loop(loop-unroll-full)' --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.default_opp/dynamic_config.ll.yaml
|
|
+
|
|
+; Function Attrs: nofree norecurse nounwind uwtable
|
|
+define dso_local void @transform(i64* nocapture %W) local_unnamed_addr{
|
|
+entry:
|
|
+ br label %for.body
|
|
+
|
|
+for.body: ; preds = %entry, %for.body
|
|
+ %i.037 = phi i32 [ 16, %entry ], [ %inc, %for.body ]
|
|
+ %sub = add nsw i32 %i.037, -3
|
|
+ %idxprom = sext i32 %sub to i64
|
|
+ %arrayidx = getelementptr inbounds i64, i64* %W, i64 %idxprom
|
|
+ %0 = load i64, i64* %arrayidx, align 8
|
|
+ %sub1 = add nsw i32 %i.037, -6
|
|
+ %idxprom2 = sext i32 %sub1 to i64
|
|
+ %arrayidx3 = getelementptr inbounds i64, i64* %W, i64 %idxprom2
|
|
+ %1 = load i64, i64* %arrayidx3, align 8
|
|
+ %xor = xor i64 %1, %0
|
|
+ %idxprom4 = zext i32 %i.037 to i64
|
|
+ %arrayidx5 = getelementptr inbounds i64, i64* %W, i64 %idxprom4
|
|
+ store i64 %xor, i64* %arrayidx5, align 8
|
|
+ %inc = add nuw nsw i32 %i.037, 1
|
|
+ %cmp = icmp ult i32 %i.037, 79
|
|
+ br i1 %cmp, label %for.body, label %for.body8.preheader
|
|
+
|
|
+for.body8.preheader: ; preds = %for.body
|
|
+ br label %for.body8
|
|
+
|
|
+for.body8: ; preds = %for.body8.preheader, %for.body8
|
|
+ %indvars.iv = phi i64 [ 80, %for.body8.preheader ], [ %indvars.iv.next, %for.body8 ]
|
|
+ %2 = add nsw i64 %indvars.iv, -4
|
|
+ %arrayidx11 = getelementptr inbounds i64, i64* %W, i64 %2
|
|
+ %3 = load i64, i64* %arrayidx11, align 8
|
|
+ %4 = add nsw i64 %indvars.iv, -5
|
|
+ %arrayidx14 = getelementptr inbounds i64, i64* %W, i64 %4
|
|
+ %5 = load i64, i64* %arrayidx14, align 8
|
|
+ %xor15 = xor i64 %5, %3
|
|
+ %arrayidx17 = getelementptr inbounds i64, i64* %W, i64 %indvars.iv
|
|
+ store i64 %xor15, i64* %arrayidx17, align 8
|
|
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
+ %exitcond = icmp ne i64 %indvars.iv.next, 256
|
|
+ br i1 %exitcond, label %for.body8, label %for.end20
|
|
+
|
|
+for.end20: ; preds = %for.body8
|
|
+ ret void
|
|
+}
|
|
+
|
|
+; CHECK: --- !AutoTuning
|
|
+; CHECK: DynamicConfigs: { UnrollCount: [ 0, 1, 64, 16, 32 ]
|
|
+; CHECK: ...
|
|
+; CHECK-NEXT: --- !AutoTuning
|
|
+; CHECK: DynamicConfigs: { UnrollCount: [ 0, 1, 64, 16, 32 ]
|
|
+; CHECK: ...
|
|
diff --git a/llvm/test/AutoTuning/LoopUnroll/loop_nest.ll b/llvm/test/AutoTuning/LoopUnroll/loop_nest.ll
|
|
new file mode 100644
|
|
index 000000000000..7f3e27ca057a
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/LoopUnroll/loop_nest.ll
|
|
@@ -0,0 +1,136 @@
|
|
+; REQUIRES: asserts
|
|
+; CodeRegionHash matches for the first code region only. AutoTuner will find
|
|
+; match for one code region when hash matching is enabled. AutoTuner will find
|
|
+; match for all three code regions when hash matching is disabl3ed.
|
|
+
|
|
+; RUN: rm -rf %t.loop_nest.txt
|
|
+; RUN: opt %s -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -debug-only=autotuning -auto-tuning-input=%S/Inputs/loop_nest.yaml \
|
|
+; RUN: --disable-output &> %t.loop_nest.txt
|
|
+; RUN: grep 'UnrollCount is set' %t.loop_nest.txt | wc -l | \
|
|
+; RUN: FileCheck %s -check-prefix=HASH_MATCHING_ENABLED
|
|
+
|
|
+; RUN: rm -rf %t.loop_nest.txt
|
|
+; RUN: opt %s -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-input=%S/Inputs/loop_nest.yaml -debug-only=autotuning \
|
|
+; RUN: -auto-tuning-code-region-matching-hash=false --disable-output &> %t.loop_nest.txt
|
|
+; RUN: grep 'UnrollCount is set' %t.loop_nest.txt | wc -l | \
|
|
+; RUN: FileCheck %s -check-prefix=HASH_MATCHING_DISABLED
|
|
+
|
|
+; ModuleID = 'loop-nest.c'
|
|
+source_filename = "loop-nest.c"
|
|
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|
+target triple = "aarch64-unknown-linux-gnu"
|
|
+
|
|
+; Function Attrs: nofree norecurse nounwind uwtable
|
|
+define dso_local void @loop_nest(i32 %ni, i32 %nj, i32 %nk, i32 %alpha, i32 %beta, i32** nocapture readonly %A, i32** nocapture readonly %B, i32** nocapture readonly %C) local_unnamed_addr #0 !dbg !10 {
|
|
+entry:
|
|
+ %cmp41 = icmp sgt i32 %ni, 0, !dbg !12
|
|
+ br i1 %cmp41, label %for.cond1.preheader.lr.ph, label %for.end23, !dbg !13
|
|
+
|
|
+for.cond1.preheader.lr.ph: ; preds = %entry
|
|
+ %cmp238 = icmp slt i32 %nk, 1
|
|
+ %cmp536 = icmp slt i32 %nj, 1
|
|
+ %wide.trip.count51 = zext i32 %ni to i64, !dbg !12
|
|
+ %wide.trip.count47 = zext i32 %nk to i64
|
|
+ %wide.trip.count = zext i32 %nj to i64
|
|
+ %brmerge = or i1 %cmp238, %cmp536
|
|
+ br label %for.cond1.preheader, !dbg !13
|
|
+
|
|
+for.cond1.preheader: ; preds = %for.cond1.preheader.lr.ph, %for.inc21
|
|
+ %indvars.iv49 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next50, %for.inc21 ]
|
|
+ br i1 %brmerge, label %for.inc21, label %for.cond4.preheader.us.preheader, !dbg !14
|
|
+
|
|
+for.cond4.preheader.us.preheader: ; preds = %for.cond1.preheader
|
|
+ %arrayidx15 = getelementptr inbounds i32*, i32** %C, i64 %indvars.iv49
|
|
+ %arrayidx = getelementptr inbounds i32*, i32** %A, i64 %indvars.iv49
|
|
+ %.pre = load i32*, i32** %arrayidx, align 8, !tbaa !15
|
|
+ %.pre53 = load i32*, i32** %arrayidx15, align 8, !tbaa !15
|
|
+ br label %for.cond4.preheader.us, !dbg !14
|
|
+
|
|
+for.cond4.preheader.us: ; preds = %for.cond4.preheader.us.preheader, %for.cond4.for.inc18_crit_edge.us
|
|
+ %indvars.iv45 = phi i64 [ 0, %for.cond4.preheader.us.preheader ], [ %indvars.iv.next46, %for.cond4.for.inc18_crit_edge.us ]
|
|
+ %arrayidx8.us = getelementptr inbounds i32, i32* %.pre, i64 %indvars.iv45
|
|
+ %arrayidx10.us = getelementptr inbounds i32*, i32** %B, i64 %indvars.iv45
|
|
+ %0 = load i32*, i32** %arrayidx10.us, align 8, !tbaa !15
|
|
+ br label %for.body6.us, !dbg !19
|
|
+
|
|
+for.body6.us: ; preds = %for.cond4.preheader.us, %for.body6.us
|
|
+ %indvars.iv = phi i64 [ 0, %for.cond4.preheader.us ], [ %indvars.iv.next, %for.body6.us ]
|
|
+ %1 = load i32, i32* %arrayidx8.us, align 4, !dbg !20, !tbaa !21
|
|
+ %mul.us = mul nsw i32 %1, %alpha, !dbg !23
|
|
+ %arrayidx12.us = getelementptr inbounds i32, i32* %0, i64 %indvars.iv, !dbg !24
|
|
+ %2 = load i32, i32* %arrayidx12.us, align 4, !dbg !24, !tbaa !21
|
|
+ %mul13.us = mul nsw i32 %mul.us, %2, !dbg !25
|
|
+ %arrayidx17.us = getelementptr inbounds i32, i32* %.pre53, i64 %indvars.iv, !dbg !26
|
|
+ %3 = load i32, i32* %arrayidx17.us, align 4, !dbg !27, !tbaa !21
|
|
+ %add.us = add nsw i32 %3, %mul13.us, !dbg !27
|
|
+ store i32 %add.us, i32* %arrayidx17.us, align 4, !dbg !27, !tbaa !21
|
|
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !28
|
|
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count, !dbg !29
|
|
+ br i1 %exitcond.not, label %for.cond4.for.inc18_crit_edge.us, label %for.body6.us, !dbg !19, !llvm.loop !30
|
|
+
|
|
+for.cond4.for.inc18_crit_edge.us: ; preds = %for.body6.us
|
|
+ %indvars.iv.next46 = add nuw nsw i64 %indvars.iv45, 1, !dbg !33
|
|
+ %exitcond48.not = icmp eq i64 %indvars.iv.next46, %wide.trip.count47, !dbg !34
|
|
+ br i1 %exitcond48.not, label %for.inc21, label %for.cond4.preheader.us, !dbg !14, !llvm.loop !35
|
|
+
|
|
+for.inc21: ; preds = %for.cond4.for.inc18_crit_edge.us, %for.cond1.preheader
|
|
+ %indvars.iv.next50 = add nuw nsw i64 %indvars.iv49, 1, !dbg !37
|
|
+ %exitcond52.not = icmp eq i64 %indvars.iv.next50, %wide.trip.count51, !dbg !12
|
|
+ br i1 %exitcond52.not, label %for.end23, label %for.cond1.preheader, !dbg !13, !llvm.loop !38
|
|
+
|
|
+for.end23: ; preds = %for.inc21, %entry
|
|
+ ret void, !dbg !40
|
|
+}
|
|
+
|
|
+attributes #0 = { nofree norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
+
|
|
+!llvm.dbg.cu = !{!0}
|
|
+!llvm.module.flags = !{!3, !4, !5, !6, !7, !8}
|
|
+!llvm.ident = !{!9}
|
|
+
|
|
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei BiSheng Compiler clang version 12.0.0 (clang-a279e099a09a flang-9a86b70390a7)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None)
|
|
+!1 = !DIFile(filename: "loop-nest.c", directory: "/home/m00629332/code/autoTuner")
|
|
+!2 = !{}
|
|
+!3 = !{i32 2, !"Debug Info Version", i32 3}
|
|
+!4 = !{i32 1, !"wchar_size", i32 4}
|
|
+!5 = !{i32 1, !"branch-target-enforcement", i32 0}
|
|
+!6 = !{i32 1, !"sign-return-address", i32 0}
|
|
+!7 = !{i32 1, !"sign-return-address-all", i32 0}
|
|
+!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0}
|
|
+!9 = !{!"Huawei BiSheng Compiler clang version 12.0.0 (clang-a279e099a09a flang-9a86b70390a7)"}
|
|
+!10 = distinct !DISubprogram(name: "loop_nest", scope: !1, file: !1, line: 1, type: !11, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
|
+!11 = !DISubroutineType(types: !2)
|
|
+!12 = !DILocation(line: 8, column: 17, scope: !10)
|
|
+!13 = !DILocation(line: 8, column: 3, scope: !10)
|
|
+!14 = !DILocation(line: 9, column: 5, scope: !10)
|
|
+!15 = !{!16, !16, i64 0}
|
|
+!16 = !{!"any pointer", !17, i64 0}
|
|
+!17 = !{!"omnipotent char", !18, i64 0}
|
|
+!18 = !{!"Simple C/C++ TBAA"}
|
|
+!19 = !DILocation(line: 10, column: 8, scope: !10)
|
|
+!20 = !DILocation(line: 11, column: 23, scope: !10)
|
|
+!21 = !{!22, !22, i64 0}
|
|
+!22 = !{!"int", !17, i64 0}
|
|
+!23 = !DILocation(line: 11, column: 21, scope: !10)
|
|
+!24 = !DILocation(line: 11, column: 33, scope: !10)
|
|
+!25 = !DILocation(line: 11, column: 31, scope: !10)
|
|
+!26 = !DILocation(line: 11, column: 4, scope: !10)
|
|
+!27 = !DILocation(line: 11, column: 12, scope: !10)
|
|
+!28 = !DILocation(line: 10, column: 29, scope: !10)
|
|
+!29 = !DILocation(line: 10, column: 22, scope: !10)
|
|
+!30 = distinct !{!30, !19, !31, !32}
|
|
+!31 = !DILocation(line: 11, column: 39, scope: !10)
|
|
+!32 = !{!"llvm.loop.mustprogress"}
|
|
+!33 = !DILocation(line: 9, column: 26, scope: !10)
|
|
+!34 = !DILocation(line: 9, column: 19, scope: !10)
|
|
+!35 = distinct !{!35, !14, !36, !32}
|
|
+!36 = !DILocation(line: 12, column: 5, scope: !10)
|
|
+!37 = !DILocation(line: 8, column: 24, scope: !10)
|
|
+!38 = distinct !{!38, !13, !39, !32}
|
|
+!39 = !DILocation(line: 13, column: 3, scope: !10)
|
|
+!40 = !DILocation(line: 15, column: 1, scope: !10)
|
|
+
|
|
+; HASH_MATCHING_ENABLED: 1
|
|
+; HASH_MATCHING_DISABLED: 3
|
|
diff --git a/llvm/test/AutoTuning/LoopUnroll/loop_peel.ll b/llvm/test/AutoTuning/LoopUnroll/loop_peel.ll
|
|
new file mode 100644
|
|
index 000000000000..f3839a49b20e
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/LoopUnroll/loop_peel.ll
|
|
@@ -0,0 +1,53 @@
|
|
+; NOTE: This file is used to test when UnrollCount = 1 and when the compiler
|
|
+; sees that Loop Peeling is beneficial and possible, then we do Loop Peeling.
|
|
+; RUN: rm %t.unroll1.yaml -rf
|
|
+; RUN: sed 's#\[number\]#1#g;' %S/Inputs/loop_peel.yaml > %t.unroll1.yaml
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-code-region-matching-hash=false \
|
|
+; RUN: -auto-tuning-input=%t.unroll1.yaml | FileCheck %s
|
|
+
|
|
+; RUN: rm %t.unroll0.yaml -rf
|
|
+; RUN: sed 's#\[number\]#0#g;' %S/Inputs/loop_peel.yaml > %t.unroll0.yaml
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-code-region-matching-hash=false \
|
|
+; RUN: -auto-tuning-input=%t.unroll0.yaml | FileCheck %s --check-prefix=DISABLE
|
|
+
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-code-region-matching-hash=false \
|
|
+; RUN: -auto-tuning-opp=%t.unroll_opp -auto-tuning-type-filter=Loop --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.unroll_opp/loop_peel.ll.yaml -check-prefix=TEST-1
|
|
+
|
|
+define i32 @invariant_backedge_1(i32 %a, i32 %b) {
|
|
+; CHECK-LABEL: @invariant_backedge_1
|
|
+; CHECK-NOT: %plus = phi
|
|
+; CHECK: loop.peel:
|
|
+; CHECK: loop:
|
|
+; CHECK: %i = phi
|
|
+; CHECK: %sum = phi
|
|
+; DISABLE-LABEL: @invariant_backedge_1
|
|
+; DISABLE-NOT: loop.peel:
|
|
+entry:
|
|
+ br label %loop
|
|
+
|
|
+loop:
|
|
+ %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
|
|
+ %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ]
|
|
+ %plus = phi i32 [ %a, %entry ], [ %b, %loop ]
|
|
+
|
|
+ %incsum = add i32 %sum, %plus
|
|
+ %inc = add i32 %i, 1
|
|
+ %cmp = icmp slt i32 %i, 1000
|
|
+
|
|
+ br i1 %cmp, label %loop, label %exit
|
|
+
|
|
+exit:
|
|
+ ret i32 %sum
|
|
+}
|
|
+
|
|
+; Check for dynamic values when UnrollCount is set to 1:
|
|
+; TEST-1: Pass: loop-unroll
|
|
+; TEST-1-NEXT: Name: loop
|
|
+; TEST-1-NEXT: Function: invariant_backedge_1
|
|
+; TEST-1-NEXT: CodeRegionType: loop
|
|
+; TEST-1-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; TEST-1-NEXT: DynamicConfigs: { UnrollCount: [ 0, 1, 2 ] }
|
|
diff --git a/llvm/test/AutoTuning/LoopUnroll/unroll-pragma.ll b/llvm/test/AutoTuning/LoopUnroll/unroll-pragma.ll
|
|
new file mode 100644
|
|
index 000000000000..843b8e28f3d8
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/LoopUnroll/unroll-pragma.ll
|
|
@@ -0,0 +1,129 @@
|
|
+; RUN: rm %t.unroll_opp -rf
|
|
+; RUN: opt %s -S -auto-tuning-opp=%t.unroll_opp -auto-tuning-type-filter=Loop \
|
|
+; RUN: -passes='require<opt-remark-emit>,loop(loop-unroll-full)' --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.unroll_opp/unroll-pragma.ll.yaml -check-prefix=TEST-1
|
|
+; RUN: FileCheck %s --input-file %t.unroll_opp/unroll-pragma.ll.yaml -check-prefix=TEST-2
|
|
+
|
|
+; RUN: rm %t.unroll_opp -rf
|
|
+; RUN: opt %s -S -auto-tuning-opp=%t.unroll_opp -auto-tuning-type-filter=Loop \
|
|
+; RUN: -passes='require<opt-remark-emit>,function(loop-unroll)' --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.unroll_opp/unroll-pragma.ll.yaml -check-prefix=TEST-1
|
|
+; RUN: FileCheck %s --input-file %t.unroll_opp/unroll-pragma.ll.yaml -check-prefix=TEST-2
|
|
+
|
|
+; This function contains two loops. loop for.body is defined with a pragma
|
|
+; unroll_count(4) and loop for.body9 is without a pragama. AutoTuner will only
|
|
+; consider for.body9 as a tuning opportunity.
|
|
+
|
|
+; ModuleID = 'loop-unroll.c'
|
|
+source_filename = "loop-unroll.c"
|
|
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|
+target triple = "aarch64-unknown-linux-gnu"
|
|
+
|
|
+; Function Attrs: nofree norecurse nounwind uwtable
|
|
+define dso_local void @loop(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* noalias nocapture %c, i32* noalias nocapture %d, i32 %len) local_unnamed_addr #0 !dbg !10 {
|
|
+entry:
|
|
+ %cmp34 = icmp slt i32 0, %len, !dbg !12
|
|
+ br i1 %cmp34, label %for.body.lr.ph, label %for.cond6.preheader, !dbg !13
|
|
+
|
|
+for.body.lr.ph: ; preds = %entry
|
|
+ br label %for.body, !dbg !13
|
|
+
|
|
+for.cond.for.cond6.preheader_crit_edge: ; preds = %for.body
|
|
+ br label %for.cond6.preheader, !dbg !13
|
|
+
|
|
+for.cond6.preheader: ; preds = %for.cond.for.cond6.preheader_crit_edge, %entry
|
|
+ %cmp732 = icmp slt i32 0, %len, !dbg !14
|
|
+ br i1 %cmp732, label %for.body9.lr.ph, label %for.cond.cleanup8, !dbg !15
|
|
+
|
|
+for.body9.lr.ph: ; preds = %for.cond6.preheader
|
|
+ br label %for.body9, !dbg !15
|
|
+
|
|
+for.body: ; preds = %for.body.lr.ph, %for.body
|
|
+ %i.035 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
|
|
+ %idxprom = zext i32 %i.035 to i64, !dbg !16
|
|
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom, !dbg !16
|
|
+ %0 = load i32, i32* %arrayidx, align 4, !dbg !16, !tbaa !17
|
|
+ %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %idxprom, !dbg !21
|
|
+ %1 = load i32, i32* %arrayidx2, align 4, !dbg !21, !tbaa !17
|
|
+ %add = add nsw i32 %1, %0, !dbg !22
|
|
+ %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 %idxprom, !dbg !23
|
|
+ store i32 %add, i32* %arrayidx4, align 4, !dbg !24, !tbaa !17
|
|
+ %inc = add nuw nsw i32 %i.035, 1, !dbg !25
|
|
+ %cmp = icmp slt i32 %inc, %len, !dbg !12
|
|
+ br i1 %cmp, label %for.body, label %for.cond.for.cond6.preheader_crit_edge, !dbg !13, !llvm.loop !26
|
|
+
|
|
+for.cond6.for.cond.cleanup8_crit_edge: ; preds = %for.body9
|
|
+ br label %for.cond.cleanup8, !dbg !15
|
|
+
|
|
+for.cond.cleanup8: ; preds = %for.cond6.for.cond.cleanup8_crit_edge, %for.cond6.preheader
|
|
+ ret void, !dbg !30
|
|
+
|
|
+for.body9: ; preds = %for.body9.lr.ph, %for.body9
|
|
+ %i5.033 = phi i32 [ 0, %for.body9.lr.ph ], [ %inc17, %for.body9 ]
|
|
+ %idxprom10 = zext i32 %i5.033 to i64, !dbg !31
|
|
+ %arrayidx11 = getelementptr inbounds i32, i32* %a, i64 %idxprom10, !dbg !31
|
|
+ %2 = load i32, i32* %arrayidx11, align 4, !dbg !31, !tbaa !17
|
|
+ %arrayidx13 = getelementptr inbounds i32, i32* %b, i64 %idxprom10, !dbg !32
|
|
+ %3 = load i32, i32* %arrayidx13, align 4, !dbg !32, !tbaa !17
|
|
+ %mul = mul nsw i32 %3, %2, !dbg !33
|
|
+ %arrayidx15 = getelementptr inbounds i32, i32* %d, i64 %idxprom10, !dbg !34
|
|
+ store i32 %mul, i32* %arrayidx15, align 4, !dbg !35, !tbaa !17
|
|
+ %inc17 = add nuw nsw i32 %i5.033, 1, !dbg !36
|
|
+ %cmp7 = icmp slt i32 %inc17, %len, !dbg !14
|
|
+ br i1 %cmp7, label %for.body9, label %for.cond6.for.cond.cleanup8_crit_edge, !dbg !15, !llvm.loop !37
|
|
+}
|
|
+
|
|
+attributes #0 = { nofree norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
+
|
|
+!llvm.dbg.cu = !{!0}
|
|
+!llvm.module.flags = !{!3, !4, !5, !6, !7, !8}
|
|
+!llvm.ident = !{!9}
|
|
+
|
|
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Huawei Bisheng Compiler clang version 12.0.0 (0261bbf0b2fd)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None)
|
|
+!1 = !DIFile(filename: "loop-unroll.c", directory: "/home/AutoTuner/")
|
|
+!2 = !{}
|
|
+!3 = !{i32 2, !"Debug Info Version", i32 3}
|
|
+!4 = !{i32 1, !"wchar_size", i32 4}
|
|
+!5 = !{i32 1, !"branch-target-enforcement", i32 0}
|
|
+!6 = !{i32 1, !"sign-return-address", i32 0}
|
|
+!7 = !{i32 1, !"sign-return-address-all", i32 0}
|
|
+!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0}
|
|
+!9 = !{!"Huawei Bisheng Compiler clang version 12.0.0 (0261bbf0b2fd)"}
|
|
+!10 = distinct !DISubprogram(name: "a", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
|
+!11 = !DISubroutineType(types: !2)
|
|
+!12 = !DILocation(line: 3, column: 20, scope: !10)
|
|
+!13 = !DILocation(line: 3, column: 5, scope: !10)
|
|
+!14 = !DILocation(line: 7, column: 20, scope: !10)
|
|
+!15 = !DILocation(line: 7, column: 5, scope: !10)
|
|
+!16 = !DILocation(line: 4, column: 16, scope: !10)
|
|
+!17 = !{!18, !18, i64 0}
|
|
+!18 = !{!"int", !19, i64 0}
|
|
+!19 = !{!"omnipotent char", !20, i64 0}
|
|
+!20 = !{!"Simple C/C++ TBAA"}
|
|
+!21 = !DILocation(line: 4, column: 23, scope: !10)
|
|
+!22 = !DILocation(line: 4, column: 21, scope: !10)
|
|
+!23 = !DILocation(line: 4, column: 9, scope: !10)
|
|
+!24 = !DILocation(line: 4, column: 14, scope: !10)
|
|
+!25 = !DILocation(line: 3, column: 28, scope: !10)
|
|
+!26 = distinct !{!26, !13, !27, !28, !29}
|
|
+!27 = !DILocation(line: 5, column: 5, scope: !10)
|
|
+!28 = !{!"llvm.loop.mustprogress"}
|
|
+!29 = !{!"llvm.loop.unroll.count", i32 4}
|
|
+!30 = !DILocation(line: 10, column: 1, scope: !10)
|
|
+!31 = !DILocation(line: 8, column: 16, scope: !10)
|
|
+!32 = !DILocation(line: 8, column: 23, scope: !10)
|
|
+!33 = !DILocation(line: 8, column: 21, scope: !10)
|
|
+!34 = !DILocation(line: 8, column: 9, scope: !10)
|
|
+!35 = !DILocation(line: 8, column: 14, scope: !10)
|
|
+!36 = !DILocation(line: 7, column: 28, scope: !10)
|
|
+!37 = distinct !{!37, !15, !38, !28}
|
|
+!38 = !DILocation(line: 9, column: 5, scope: !10)
|
|
+
|
|
+
|
|
+; TEST-1: Pass: loop-unroll
|
|
+; TEST-1-NOT: Pass: loop-unroll
|
|
+
|
|
+; TEST-2: Name: for.body9
|
|
+; TEST-2-NEXT: DebugLoc: { File: loop-unroll.c, Line: 7, Column: 5 }
|
|
+; TEST-2-NEXT: Function: loop
|
|
+; TEST-2-NEXT: CodeRegionType: loop
|
|
diff --git a/llvm/test/AutoTuning/LoopUnroll/unroll.ll b/llvm/test/AutoTuning/LoopUnroll/unroll.ll
|
|
new file mode 100644
|
|
index 000000000000..ba5c89fffaff
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/LoopUnroll/unroll.ll
|
|
@@ -0,0 +1,101 @@
|
|
+; RUN: opt %s -S -passes=loop-unroll | FileCheck %s -check-prefix=DISABLE
|
|
+
|
|
+; RUN: rm %t.unroll0.yaml -rf
|
|
+; RUN: sed 's#\[number\]#0#g; s#\[name\]#for.body#g; s#\[hash\]#14791762861362113823#g' \
|
|
+; RUN: %S/Inputs/unroll_template.yaml > %t.unroll0.yaml
|
|
+; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll0.yaml \
|
|
+; RUN: -auto-tuning-code-region-matching-hash=false | \
|
|
+; RUN: FileCheck %s -check-prefix=UNROLL0
|
|
+
|
|
+; RUN: rm %t.unroll0.yaml -rf
|
|
+; RUN: sed 's#\[number\]#0#g; s#\[hash\]#14791762861362113823#g' \
|
|
+; RUN: %S/Inputs/unroll_template_no_metadata.yaml > %t.unroll0.yaml
|
|
+; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll0.yaml \
|
|
+; RUN: -auto-tuning-omit-metadata | \
|
|
+; RUN: FileCheck %s -check-prefix=UNROLL0
|
|
+
|
|
+; RUN: rm %t.result1 %t.unroll1.yaml -rf
|
|
+; RUN: sed 's#\[number\]#1#g; s#\[name\]#for.body#g; s#\[hash\]#14791762861362113823#g' \
|
|
+; RUN: %S/Inputs/unroll_template.yaml > %t.unroll1.yaml
|
|
+; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll1.yaml | \
|
|
+; RUN: FileCheck %s -check-prefix=UNROLL1
|
|
+
|
|
+; RUN: rm %t.result1 %t.unroll1.yaml -rf
|
|
+; RUN: sed 's#\[number\]#1#g; s#\[hash\]#14791762861362113823#g' \
|
|
+; RUN: %S/Inputs/unroll_template_no_metadata.yaml > %t.unroll1.yaml
|
|
+; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll1.yaml \
|
|
+; RUN: -auto-tuning-omit-metadata | \
|
|
+; RUN: FileCheck %s -check-prefix=UNROLL1
|
|
+
|
|
+; RUN: rm %t.result4 %t.unroll4.yaml -rf
|
|
+; RUN: sed 's#\[number\]#4#g; s#\[name\]#for.body#g; s#\[hash\]#14791762861362113823#g' \
|
|
+; RUN: %S/Inputs/unroll_template.yaml > %t.unroll4.yaml
|
|
+; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll4.yaml | \
|
|
+; RUN: FileCheck %s -check-prefix=UNROLL4
|
|
+
|
|
+; RUN: rm %t.result4 %t.unroll4.yaml -rf
|
|
+; RUN: sed 's#\[number\]#4#g; s#\[hash\]#14791762861362113823#g' \
|
|
+; RUN: %S/Inputs/unroll_template_no_metadata.yaml > %t.unroll4.yaml
|
|
+; RUN: opt %s -S -passes=loop-unroll -auto-tuning-input=%t.unroll4.yaml \
|
|
+; RUN: -auto-tuning-omit-metadata | \
|
|
+; RUN: FileCheck %s -check-prefix=UNROLL4
|
|
+
|
|
+; UNSUPPORTED: windows
|
|
+
|
|
+define void @foo(i32* nocapture %a) {
|
|
+entry:
|
|
+ br label %for.body
|
|
+
|
|
+for.body: ; preds = %for.body, %entry
|
|
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
|
+ %0 = load i32, i32* %arrayidx, align 4
|
|
+ %inc = add nsw i32 %0, 1
|
|
+ store i32 %inc, i32* %arrayidx, align 4
|
|
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
|
|
+ br i1 %exitcond, label %for.end, label %for.body
|
|
+
|
|
+for.end: ; preds = %for.body
|
|
+ ret void
|
|
+}
|
|
+
|
|
+; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled when the auto-tuning feature is disabled
|
|
+;
|
|
+; DISABLE-LABEL: @foo(
|
|
+; DISABLE: store i32
|
|
+; DISABLE-NOT: store i32
|
|
+; DISABLE: br i1
|
|
+; DISABLE-NOT: llvm.loop.unroll.disable
|
|
+
|
|
+
|
|
+; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled
|
|
+; when unroll count explicitly set to be 0.
|
|
+;
|
|
+; UNROLL0-LABEL: @foo(
|
|
+; UNROLL0: store i32
|
|
+; UNROLL0-NOT: store i32
|
|
+; UNROLL0: br i1
|
|
+; UNROLL0-NOT: llvm.loop.unroll.disable
|
|
+
|
|
+
|
|
+; Auto-tuning-enabled loop unrolling - Requesting UnrollCount = 1 will perform
|
|
+; Loop Peeling, and if Loop Peeling isn't possible/beneficial then Unroll Count
|
|
+; is unchanged.
|
|
+;
|
|
+; UNROLL1-LABEL: @foo(
|
|
+; UNROLL1: store i32
|
|
+; UNROLL1-NOT: store i32
|
|
+; UNROLL1: br i1
|
|
+; UNROLL1: llvm.loop.unroll.disable
|
|
+
|
|
+; Auto-tuning-enabled loop unrolling - check that we can unroll the loop by 4
|
|
+; when explicitly requested.
|
|
+;
|
|
+; UNROLL4-LABEL: @foo(
|
|
+; UNROLL4: store i32
|
|
+; UNROLL4: store i32
|
|
+; UNROLL4: store i32
|
|
+; UNROLL4: store i32
|
|
+; UNROLL4: br i1
|
|
+; UNROLL4: llvm.loop.unroll.disable
|
|
diff --git a/llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll b/llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll
|
|
new file mode 100644
|
|
index 000000000000..480ccad640ae
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/LoopUnroll/unroll_raw.ll
|
|
@@ -0,0 +1,113 @@
|
|
+; Test loop unrolling using auto-tuning YAML api with IRs generated when ASSERTION=OFF
|
|
+; The IRs generated when ASSERTION=OFF usually only use slot numbers as variable names.
|
|
+
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' | \
|
|
+; RUN: FileCheck %s -check-prefix=DISABLE
|
|
+
|
|
+; RUN: rm %t.result1_raw %t.unroll1_raw.yaml -rf
|
|
+; RUN: sed 's#\[number\]#1#g; s#\[hash\]#18159364858606519094#g' \
|
|
+; RUN: %S/Inputs/unroll_raw_template.yaml > %t.unroll1_raw.yaml
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,function(loop-unroll)' \
|
|
+; RUN: -auto-tuning-input=%t.unroll1_raw.yaml | FileCheck %s -check-prefix=UNROLL1
|
|
+
|
|
+; RUN: rm %t.result2_raw %t.unroll2_raw.yaml -rf
|
|
+; RUN: sed 's#\[number\]#2#g; s#\[hash\]#18159364858606519094#g' \
|
|
+; RUN: %S/Inputs/unroll_raw_template.yaml > %t.unroll2_raw.yaml
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,function(loop-unroll)' \
|
|
+; RUN: -auto-tuning-input=%t.unroll2_raw.yaml | FileCheck %s -check-prefix=UNROLL2
|
|
+
|
|
+; RUN: rm %t.result4_raw %t.unroll4_raw.yaml -rf
|
|
+; RUN: sed 's#\[number\]#4#g; s#\[hash\]#18159364858606519094#g' \
|
|
+; RUN: %S/Inputs/unroll_raw_template.yaml > %t.unroll4_raw.yaml
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,function(loop-unroll)' \
|
|
+; RUN: -auto-tuning-input=%t.unroll4_raw.yaml | FileCheck %s -check-prefix=UNROLL4
|
|
+
|
|
+; UNSUPPORTED: windows
|
|
+
|
|
+; ModuleID = 't.ll'
|
|
+source_filename = "t.ll"
|
|
+
|
|
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
|
|
+
|
|
+define void @test(i32*) {
|
|
+ %2 = alloca i32*, align 8
|
|
+ store i32* %0, i32** %2, align 8
|
|
+ %3 = load i32*, i32** %2, align 8
|
|
+ %4 = load i32, i32* %3, align 4
|
|
+ %5 = add nsw i32 %4, 2
|
|
+ %6 = load i32*, i32** %2, align 8
|
|
+ store i32 %5, i32* %6, align 4
|
|
+ ret void
|
|
+}
|
|
+
|
|
+define i32 @main() {
|
|
+ %1 = alloca i32, align 4
|
|
+ %2 = alloca i32, align 4
|
|
+ store i32 0, i32* %1, align 4
|
|
+ store i32 8, i32* %2, align 4
|
|
+ %3 = load i32, i32* %2, align 4
|
|
+ %4 = icmp sle i32 %3, 88
|
|
+ br i1 %4, label %.lr.ph, label %13
|
|
+
|
|
+.lr.ph: ; preds = %0
|
|
+ br label %5
|
|
+
|
|
+; <label>:5: ; preds = %.lr.ph, %8
|
|
+ call void @test(i32* %2)
|
|
+ %6 = load i32, i32* %2, align 4
|
|
+ %7 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %6)
|
|
+ br label %8
|
|
+
|
|
+; <label>:8: ; preds = %5
|
|
+ %9 = load i32, i32* %2, align 4
|
|
+ %10 = add nsw i32 %9, 8
|
|
+ store i32 %10, i32* %2, align 4
|
|
+ %11 = load i32, i32* %2, align 4
|
|
+ %12 = icmp sle i32 %11, 88
|
|
+ br i1 %12, label %5, label %._crit_edge
|
|
+
|
|
+._crit_edge: ; preds = %8
|
|
+ br label %13
|
|
+
|
|
+; <label>:13: ; preds = %._crit_edge, %0
|
|
+ %14 = load i32, i32* %1, align 4
|
|
+ ret i32 %14
|
|
+}
|
|
+
|
|
+declare i32 @printf(i8*, ...)
|
|
+
|
|
+
|
|
+; Auto-tuning-enabled loop unrolling - check that the loop is not unrolled when the auto-tuning feature is disabled
|
|
+;
|
|
+; DISABLE-LABEL: @main(
|
|
+; DISABLE: call void @test(ptr %2)
|
|
+; DISABLE-NOT: call void @test(ptr %2)
|
|
+; DISABLE-NOT: llvm.loop.unroll.disable
|
|
+
|
|
+
|
|
+; Auto-tuning-enabled loop unrolling - check that we can unroll the loop by 1
|
|
+; when explicitly requested.
|
|
+;
|
|
+; UNROLL1-LABEL: @main(
|
|
+; UNROLL1: call void @test(ptr %2)
|
|
+; UNROLL1-NOT: call void @test(ptr %2)
|
|
+
|
|
+; Auto-tuning-enabled loop unrolling - check that we can unroll the loop by 2
|
|
+; when explicitly requested.
|
|
+;
|
|
+; UNROLL2-LABEL: @main(
|
|
+; UNROLL2: call void @test(ptr %2)
|
|
+; UNROLL2: call void @test(ptr %2)
|
|
+; UNROLL2-NOT: call void @test(ptr %2)
|
|
+; UNROLL2: llvm.loop.unroll.disable
|
|
+
|
|
+
|
|
+; Auto-tuning-enabled loop unrolling - check that we can unroll the loop by 4
|
|
+; when explicitly requested.
|
|
+;
|
|
+; UNROLL4-LABEL: @main(
|
|
+; UNROLL4: call void @test(ptr %2)
|
|
+; UNROLL4: call void @test(ptr %2)
|
|
+; UNROLL4: call void @test(ptr %2)
|
|
+; UNROLL4: call void @test(ptr %2)
|
|
+; UNROLL4: llvm.loop.unroll.disable
|
|
diff --git a/llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template.yaml b/llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template.yaml
|
|
new file mode 100644
|
|
index 000000000000..b65fddf4e23f
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template.yaml
|
|
@@ -0,0 +1,9 @@
|
|
+--- !AutoTuning
|
|
+Pass: loop-vectorize
|
|
+Name: bb4
|
|
+Function: TestFoo
|
|
+CodeRegionType: loop
|
|
+CodeRegionHash: 14229620333597121971
|
|
+Args:
|
|
+- VectorizationInterleave: [number]
|
|
+...
|
|
diff --git a/llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template_no_metadata.yaml b/llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template_no_metadata.yaml
|
|
new file mode 100644
|
|
index 000000000000..87d2fc2587cb
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/LoopVectorize/Inputs/vectorize_template_no_metadata.yaml
|
|
@@ -0,0 +1,7 @@
|
|
+--- !AutoTuning
|
|
+Pass: loop-vectorize
|
|
+CodeRegionType: loop
|
|
+CodeRegionHash: 14229620333597121971
|
|
+Args:
|
|
+- VectorizationInterleave: [number]
|
|
+...
|
|
diff --git a/llvm/test/AutoTuning/LoopVectorize/force-vector-interleave.ll b/llvm/test/AutoTuning/LoopVectorize/force-vector-interleave.ll
|
|
new file mode 100644
|
|
index 000000000000..a1652babd8f4
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/LoopVectorize/force-vector-interleave.ll
|
|
@@ -0,0 +1,88 @@
|
|
+; RUN: rm %t.1 %t.2 %t.1.yaml -rf
|
|
+; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=1 -S -o %t.1
|
|
+; RUN: sed 's#\[number\]#1#g' %S/Inputs/vectorize_template.yaml > %t.1.yaml
|
|
+; RUN: opt %s -passes=loop-vectorize -auto-tuning-input=%t.1.yaml \
|
|
+; RUN: -S -o %t.2 -debug-only=autotuning 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=NUMBER1
|
|
+; RUN: diff %t.1 %t.2
|
|
+
|
|
+; RUN: rm %t.1 %t.2 %t.1.yaml -rf
|
|
+; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=1 -S -o %t.1
|
|
+; RUN: sed 's#\[number\]#1#g' %S/Inputs/vectorize_template_no_metadata.yaml > %t.1.yaml
|
|
+; RUN: opt %s -passes=loop-vectorize -auto-tuning-input=%t.1.yaml \
|
|
+; RUN: -auto-tuning-omit-metadata -S -o %t.2 -debug-only=autotuning 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=NUMBER1
|
|
+; RUN: diff %t.1 %t.2
|
|
+
|
|
+; RUN: rm %t.3 %t.4 %t.2.yaml -rf
|
|
+; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=2 -S -o %t.3
|
|
+; RUN: sed 's#\[number\]#2#g' %S/Inputs/vectorize_template.yaml > %t.2.yaml
|
|
+; RUN: opt %s -passes=loop-vectorize -auto-tuning-input=%t.2.yaml \
|
|
+; RUN: -S -o %t.4 -debug-only=autotuning 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=NUMBER2
|
|
+; RUN: diff %t.3 %t.4
|
|
+
|
|
+; RUN: rm %t.3 %t.4 %t.2.yaml -rf
|
|
+; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=2 -S -o %t.3
|
|
+; RUN: sed 's#\[number\]#2#g' %S/Inputs/vectorize_template_no_metadata.yaml > %t.2.yaml
|
|
+; RUN: opt %s -passes=loop-vectorize -auto-tuning-input=%t.2.yaml \
|
|
+; RUN: -auto-tuning-omit-metadata -S -o %t.4 -debug-only=autotuning 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=NUMBER2
|
|
+; RUN: diff %t.3 %t.4
|
|
+
|
|
+; Compiler should not generate tuning opportunities for AutoTuner if -force-vector-interleave is specified.
|
|
+; RUN: rm %t.interleave_opp -rf
|
|
+; RUN: opt %s -S -passes=loop-vectorize -auto-tuning-opp=%t.interleave_opp \
|
|
+; RUN: -force-vector-interleave=2 --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.interleave_opp/force-vector-interleave.ll.yaml \
|
|
+; RUN: -check-prefix=FORCE-INTERLEAVE
|
|
+
|
|
+; RUN: rm %t.interleave_opp -rf
|
|
+; RUN: opt %s -S -passes=loop-vectorize -auto-tuning-opp=%t.interleave_opp \
|
|
+; RUN: -force-vector-interleave=0 --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.interleave_opp/force-vector-interleave.ll.yaml \
|
|
+; RUN: -check-prefix=FORCE-INTERLEAVE
|
|
+
|
|
+; RUN: rm %t.interleave_opp -rf
|
|
+; RUN: opt %s -S -passes=loop-vectorize -auto-tuning-opp=%t.interleave_opp --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.interleave_opp/force-vector-interleave.ll.yaml \
|
|
+; RUN: -check-prefix=NO-FORCE-INTERLEAVE
|
|
+
|
|
+; REQUIRES: asserts
|
|
+; UNSUPPORTED: windows
|
|
+target datalayout = "e-m:e-i64:64-n32:64"
|
|
+target triple = "powerpc64le-unknown-linux-gnu"
|
|
+
|
|
+define void @TestFoo(i1 %X, i1 %Y) {
|
|
+bb:
|
|
+ br label %.loopexit5.outer
|
|
+
|
|
+.loopexit5.outer:
|
|
+ br label %.lr.ph12
|
|
+
|
|
+.loopexit:
|
|
+ br i1 %X, label %.loopexit5.outer, label %.lr.ph12
|
|
+
|
|
+.lr.ph12:
|
|
+ %f.110 = phi i32* [ %tmp1, %.loopexit ], [ null, %.loopexit5.outer ]
|
|
+ %tmp1 = getelementptr inbounds i32, i32* %f.110, i64 -2
|
|
+ br i1 %Y, label %bb4, label %.loopexit
|
|
+
|
|
+bb4:
|
|
+ %j.27 = phi i32 [ 0, %.lr.ph12 ], [ %tmp7, %bb4 ]
|
|
+ %tmp5 = load i32, i32* %f.110, align 4
|
|
+ %tmp7 = add nsw i32 %j.27, 1
|
|
+ %exitcond = icmp eq i32 %tmp7, 0
|
|
+ br i1 %exitcond, label %.loopexit, label %bb4
|
|
+}
|
|
+
|
|
+; NUMBER1: VectorizationInterleave is set for the CodeRegion:
|
|
+; NUMBER1: Name: bb4
|
|
+; NUMBER1: FuncName: TestFoo
|
|
+; NUMBER2: VectorizationInterleave is set for the CodeRegion:
|
|
+; NUMBER2: Name: bb4
|
|
+; NUMBER2: FuncName: TestFoo
|
|
+
|
|
+; FORCE-INTERLEAVE-NOT: Pass: loop-vectorize
|
|
+; NO-FORCE-INTERLEAVE: Pass: loop-vectorize
|
|
+; NO-FORCE-INTERLEAVE: BaselineConfig: { VectorizationInterleave:
|
|
diff --git a/llvm/test/AutoTuning/MachineScheduler/Inputs/misched_x86_template.yaml b/llvm/test/AutoTuning/MachineScheduler/Inputs/misched_x86_template.yaml
|
|
new file mode 100644
|
|
index 000000000000..34ea66e45a0a
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/MachineScheduler/Inputs/misched_x86_template.yaml
|
|
@@ -0,0 +1,10 @@
|
|
+--- !AutoTuning
|
|
+Pass: machine-scheduler
|
|
+Name: '%bb.1:for.cond.preheader'
|
|
+Function: _preextrapolate_helper
|
|
+CodeRegionType: machine_basic_block
|
|
+CodeRegionHash: 17389215691512956355
|
|
+Args:
|
|
+- ForceBottomUp: [bool1]
|
|
+- ForceTopDown: [bool2]
|
|
+...
|
|
diff --git a/llvm/test/AutoTuning/MachineScheduler/misched_x86_bidirectional.ll b/llvm/test/AutoTuning/MachineScheduler/misched_x86_bidirectional.ll
|
|
new file mode 100644
|
|
index 000000000000..aa4781dad204
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/MachineScheduler/misched_x86_bidirectional.ll
|
|
@@ -0,0 +1,73 @@
|
|
+; RUN: rm %t.bidirectional_result %t.misched_x86_bidirectional.yaml -rf
|
|
+; RUN: sed ' s#\[bool1\]#false#g; s#\[bool2\]#false#g' %S/Inputs/misched_x86_template.yaml > %t.misched_x86_bidirectional.yaml
|
|
+; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \
|
|
+; RUN: -auto-tuning-input=%t.misched_x86_bidirectional.yaml\
|
|
+; RUN: -verify-machineinstrs -debug-only=machine-scheduler 2>&1 \
|
|
+; RUN: | FileCheck %s
|
|
+
|
|
+; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \
|
|
+; RUN: -auto-tuning-input=%t.misched_x86_bidirectional.yaml\
|
|
+; RUN: -verify-machineinstrs -misched-topdown -debug-only=machine-scheduler 2>&1 \
|
|
+; RUN: | FileCheck %s -check-prefix=MIX-WITH-FLAG-TOPDOWN
|
|
+
|
|
+; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \
|
|
+; RUN: -auto-tuning-input=%t.misched_x86_bidirectional.yaml\
|
|
+; RUN: -verify-machineinstrs -misched-bottomup -debug-only=machine-scheduler 2>&1 \
|
|
+; RUN: | FileCheck %s -check-prefix=MIX-WITH-FLAG-BOTTOMUP
|
|
+
|
|
+; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \
|
|
+; RUN: -auto-tuning-input=%t.misched_x86_bidirectional.yaml\
|
|
+; RUN: -verify-machineinstrs -misched-bottomup=false -misched-topdown=false -debug-only=machine-scheduler 2>&1 \
|
|
+; RUN: | FileCheck %s -check-prefix=MIX-WITH-FLAG-BIDIRECTIONAL
|
|
+
|
|
+; REQUIRES: asserts
|
|
+; UNSUPPORTED: windows
|
|
+;
|
|
+; Interesting MachineScheduler cases.
|
|
+
|
|
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
|
|
+
|
|
+define fastcc void @_preextrapolate_helper() nounwind uwtable ssp {
|
|
+entry:
|
|
+ br i1 undef, label %for.cond.preheader, label %if.end
|
|
+
|
|
+for.cond.preheader: ; preds = %entry
|
|
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* null, i64 128, i32 4, i1 false) nounwind
|
|
+ unreachable
|
|
+
|
|
+if.end: ; preds = %entry
|
|
+ ret void
|
|
+}
|
|
+
|
|
+; check if the scheduling policy defined with xml is applied
|
|
+;
|
|
+; CHECK: _preextrapolate_helper:%bb.1 for.cond.preheader
|
|
+; CHECK: ScheduleDAGMILive::schedule starting
|
|
+; CHECK-NEXT: OnlyTopDown=0 OnlyBottomUp=0
|
|
+
|
|
+
|
|
+
|
|
+; check if the scheduling policies defined with xml and '-misched-topdown' are applied
|
|
+; MIX-WITH-FLAG-TOPDOWN: _preextrapolate_helper:%bb.0 entry
|
|
+; MIX-WITH-FLAG-TOPDOWN: ScheduleDAGMILive::schedule starting
|
|
+; MIX-WITH-FLAG-TOPDOWN-NEXT: OnlyTopDown=1 OnlyBottomUp=0
|
|
+; MIX-WITH-FLAG-TOPDOWN: _preextrapolate_helper:%bb.1 for.cond.preheader
|
|
+; MIX-WITH-FLAG-TOPDOWN: ScheduleDAGMILive::schedule starting
|
|
+; MIX-WITH-FLAG-TOPDOWN-NEXT: OnlyTopDown=0 OnlyBottomUp=0
|
|
+
|
|
+; check if the scheduling policies defined with xml and '-misched-bottomup' are applied
|
|
+; MIX-WITH-FLAG-BOTTOMUP: _preextrapolate_helper:%bb.0 entry
|
|
+; MIX-WITH-FLAG-BOTTOMUP: ScheduleDAGMILive::schedule starting
|
|
+; MIX-WITH-FLAG-BOTTOMUP-NEXT: OnlyTopDown=0 OnlyBottomUp=1
|
|
+; MIX-WITH-FLAG-BOTTOMUP: _preextrapolate_helper:%bb.1 for.cond.preheader
|
|
+; MIX-WITH-FLAG-BOTTOMUP: ScheduleDAGMILive::schedule starting
|
|
+; MIX-WITH-FLAG-BOTTOMUP-NEXT: OnlyTopDown=0 OnlyBottomUp=0
|
|
+
|
|
+; check if the scheduling policies defined with xml and '-misched-topdown=false' and '-misched-bottomup=false'
|
|
+; are applied
|
|
+; MIX-WITH-FLAG-BIDIRECTIONAL: _preextrapolate_helper:%bb.0 entry
|
|
+; MIX-WITH-FLAG-BIDIRECTIONAL: ScheduleDAGMILive::schedule starting
|
|
+; MIX-WITH-FLAG-BIDIRECTIONAL-NEXT: OnlyTopDown=0 OnlyBottomUp=0
|
|
+; MIX-WITH-FLAG-BIDIRECTIONAL: _preextrapolate_helper:%bb.1 for.cond.preheader
|
|
+; MIX-WITH-FLAG-BIDIRECTIONAL: ScheduleDAGMILive::schedule starting
|
|
+; MIX-WITH-FLAG-BIDIRECTIONAL-NEXT: OnlyTopDown=0 OnlyBottomUp=0
|
|
diff --git a/llvm/test/AutoTuning/MachineScheduler/misched_x86_bottomup.ll b/llvm/test/AutoTuning/MachineScheduler/misched_x86_bottomup.ll
|
|
new file mode 100644
|
|
index 000000000000..c1d6894c3fe2
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/MachineScheduler/misched_x86_bottomup.ll
|
|
@@ -0,0 +1,72 @@
|
|
+; RUN: rm %t.bottomup_result %t.misched_x86_bottomup.yaml -rf
|
|
+; RUN: sed ' s#\[bool1\]#true#g; s#\[bool2\]#false#g' %S/Inputs/misched_x86_template.yaml > %t.misched_x86_bottomup.yaml
|
|
+; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \
|
|
+; RUN: -auto-tuning-input=%t.misched_x86_bottomup.yaml\
|
|
+; RUN: -verify-machineinstrs -debug-only=machine-scheduler 2>&1\
|
|
+; RUN: | FileCheck %s
|
|
+
|
|
+; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \
|
|
+; RUN: -auto-tuning-input=%t.misched_x86_bottomup.yaml\
|
|
+; RUN: -verify-machineinstrs -misched-topdown -debug-only=machine-scheduler 2>&1 \
|
|
+; RUN: | FileCheck %s -check-prefix=MIX-WITH-FLAG-TOPDOWN
|
|
+
|
|
+; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \
|
|
+; RUN: -auto-tuning-input=%t.misched_x86_bottomup.yaml\
|
|
+; RUN: -verify-machineinstrs -misched-bottomup -debug-only=machine-scheduler 2>&1 \
|
|
+; RUN: | FileCheck %s -check-prefix=MIX-WITH-FLAG-BOTTOMUP
|
|
+
|
|
+; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \
|
|
+; RUN: -auto-tuning-input=%t.misched_x86_bottomup.yaml\
|
|
+; RUN: -verify-machineinstrs -misched-bottomup=false -misched-topdown=false -debug-only=machine-scheduler 2>&1 \
|
|
+; RUN: | FileCheck %s -check-prefix=MIX-WITH-FLAG-BIDIRECTIONAL
|
|
+
|
|
+; REQUIRES: asserts
|
|
+; UNSUPPORTED: windows
|
|
+;
|
|
+; Interesting MachineScheduler cases.
|
|
+
|
|
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
|
|
+
|
|
+define fastcc void @_preextrapolate_helper() nounwind uwtable ssp {
|
|
+entry:
|
|
+ br i1 undef, label %for.cond.preheader, label %if.end
|
|
+
|
|
+for.cond.preheader: ; preds = %entry
|
|
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* null, i64 128, i32 4, i1 false) nounwind
|
|
+ unreachable
|
|
+
|
|
+if.end: ; preds = %entry
|
|
+ ret void
|
|
+}
|
|
+
|
|
+; check if the scheduling policy defined with xml is applied
|
|
+;
|
|
+; CHECK: _preextrapolate_helper:%bb.1 for.cond.preheader
|
|
+; CHECK: ScheduleDAGMILive::schedule starting
|
|
+; CHECK-NEXT: RegionPolicy: ShouldTrackPressure=0 OnlyTopDown=0 OnlyBottomUp=1
|
|
+
|
|
+
|
|
+; check if the scheduling policies defined with xml and '-misched-topdown' are applied
|
|
+; MIX-WITH-FLAG-TOPDOWN: _preextrapolate_helper:%bb.0 entry
|
|
+; MIX-WITH-FLAG-TOPDOWN: ScheduleDAGMILive::schedule starting
|
|
+; MIX-WITH-FLAG-TOPDOWN-NEXT: OnlyTopDown=1 OnlyBottomUp=0
|
|
+; MIX-WITH-FLAG-TOPDOWN: _preextrapolate_helper:%bb.1 for.cond.preheader
|
|
+; MIX-WITH-FLAG-TOPDOWN: ScheduleDAGMILive::schedule starting
|
|
+; MIX-WITH-FLAG-TOPDOWN-NEXT: OnlyTopDown=0 OnlyBottomUp=1
|
|
+
|
|
+; check if the scheduling policies defined with xml and '-misched-bottomup' are applied
|
|
+; MIX-WITH-FLAG-BOTTOMUP: _preextrapolate_helper:%bb.0 entry
|
|
+; MIX-WITH-FLAG-BOTTOMUP: ScheduleDAGMILive::schedule starting
|
|
+; MIX-WITH-FLAG-BOTTOMUP-NEXT: OnlyTopDown=0 OnlyBottomUp=1
|
|
+; MIX-WITH-FLAG-BOTTOMUP: _preextrapolate_helper:%bb.1 for.cond.preheader
|
|
+; MIX-WITH-FLAG-BOTTOMUP: ScheduleDAGMILive::schedule starting
|
|
+; MIX-WITH-FLAG-BOTTOMUP-NEXT: OnlyTopDown=0 OnlyBottomUp=1
|
|
+
|
|
+; check if the scheduling policies defined with YAML and '-misched-topdown=false' and '-misched-bottomup=false'
|
|
+; are applied
|
|
+; MIX-WITH-FLAG-BIDIRECTIONAL: _preextrapolate_helper:%bb.0 entry
|
|
+; MIX-WITH-FLAG-BIDIRECTIONAL: ScheduleDAGMILive::schedule starting
|
|
+; MIX-WITH-FLAG-BIDIRECTIONAL-NEXT: RegionPolicy: ShouldTrackPressure=0 OnlyTopDown=0 OnlyBottomUp=0
|
|
+; MIX-WITH-FLAG-BIDIRECTIONAL: _preextrapolate_helper:%bb.1 for.cond.preheader
|
|
+; MIX-WITH-FLAG-BIDIRECTIONAL: ScheduleDAGMILive::schedule starting
|
|
+; MIX-WITH-FLAG-BIDIRECTIONAL-NEXT: OnlyTopDown=0 OnlyBottomUp=1
|
|
diff --git a/llvm/test/AutoTuning/MachineScheduler/misched_x86_topdown.ll b/llvm/test/AutoTuning/MachineScheduler/misched_x86_topdown.ll
|
|
new file mode 100644
|
|
index 000000000000..53c527e87e41
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/MachineScheduler/misched_x86_topdown.ll
|
|
@@ -0,0 +1,72 @@
|
|
+; RUN: rm %t.topdown_result %t.misched_x86_topdown.yaml -rf
|
|
+; RUN: sed 's#\[bool1\]#false#g; s#\[bool2\]#true#g' %S/Inputs/misched_x86_template.yaml > %t.misched_x86_topdown.yaml
|
|
+; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \
|
|
+; RUN: -auto-tuning-input=%t.misched_x86_topdown.yaml\
|
|
+; RUN: -verify-machineinstrs -debug-only=machine-scheduler 2>&1\
|
|
+; RUN: | FileCheck %s
|
|
+
|
|
+; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \
|
|
+; RUN: -auto-tuning-input=%t.misched_x86_topdown.yaml\
|
|
+; RUN: -verify-machineinstrs -misched-topdown -debug-only=machine-scheduler 2>&1 \
|
|
+; RUN: | FileCheck %s -check-prefix=MIX-WITH-FLAG-TOPDOWN
|
|
+
|
|
+; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \
|
|
+; RUN: -auto-tuning-input=%t.misched_x86_topdown.yaml\
|
|
+; RUN: -verify-machineinstrs -misched-bottomup -debug-only=machine-scheduler 2>&1 \
|
|
+; RUN: | FileCheck %s -check-prefix=MIX-WITH-FLAG-BOTTOMUP
|
|
+
|
|
+; RUN: llc -o - %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \
|
|
+; RUN: -auto-tuning-input=%t.misched_x86_topdown.yaml\
|
|
+; RUN: -verify-machineinstrs -misched-bottomup=false -misched-topdown=false -debug-only=machine-scheduler 2>&1 \
|
|
+; RUN: | FileCheck %s -check-prefix=MIX-WITH-FLAG-BIDIRECTIONAL
|
|
+
|
|
+; REQUIRES: asserts
|
|
+; UNSUPPORTED: windows
|
|
+;
|
|
+; Interesting MachineScheduler cases.
|
|
+
|
|
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
|
|
+
|
|
+define fastcc void @_preextrapolate_helper() nounwind uwtable ssp {
|
|
+entry:
|
|
+ br i1 undef, label %for.cond.preheader, label %if.end
|
|
+
|
|
+for.cond.preheader: ; preds = %entry
|
|
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* null, i64 128, i32 4, i1 false) nounwind
|
|
+ unreachable
|
|
+
|
|
+if.end: ; preds = %entry
|
|
+ ret void
|
|
+}
|
|
+
|
|
+; check if the scheduling policy defined with xml is applied
|
|
+;
|
|
+; CHECK: _preextrapolate_helper:%bb.1 for.cond.preheader
|
|
+; CHECK: ScheduleDAGMILive::schedule starting
|
|
+; CHECK-NEXT: OnlyTopDown=1 OnlyBottomUp=0
|
|
+
|
|
+
|
|
+; check if the scheduling policies defined with xml and '-misched-topdown' are applied
|
|
+; MIX-WITH-FLAG-TOPDOWN: _preextrapolate_helper:%bb.0 entry
|
|
+; MIX-WITH-FLAG-TOPDOWN: ScheduleDAGMILive::schedule starting
|
|
+; MIX-WITH-FLAG-TOPDOWN-NEXT: OnlyTopDown=1 OnlyBottomUp=0
|
|
+; MIX-WITH-FLAG-TOPDOWN: _preextrapolate_helper:%bb.1 for.cond.preheader
|
|
+; MIX-WITH-FLAG-TOPDOWN: ScheduleDAGMILive::schedule starting
|
|
+; MIX-WITH-FLAG-TOPDOWN-NEXT: OnlyTopDown=1 OnlyBottomUp=0
|
|
+
|
|
+; check if the scheduling policies defined with xml and '-misched-bottomup' are applied
|
|
+; MIX-WITH-FLAG-BOTTOMUP: _preextrapolate_helper:%bb.0 entry
|
|
+; MIX-WITH-FLAG-BOTTOMUP: ScheduleDAGMILive::schedule starting
|
|
+; MIX-WITH-FLAG-BOTTOMUP-NEXT: OnlyTopDown=0 OnlyBottomUp=1
|
|
+; MIX-WITH-FLAG-BOTTOMUP: _preextrapolate_helper:%bb.1 for.cond.preheader
|
|
+; MIX-WITH-FLAG-BOTTOMUP: ScheduleDAGMILive::schedule starting
|
|
+; MIX-WITH-FLAG-BOTTOMUP-NEXT: OnlyTopDown=1 OnlyBottomUp=0
|
|
+
|
|
+; check if the scheduling policies defined with xml and '-misched-topdown=false' and '-misched-bottomup=false'
|
|
+; are applied
|
|
+; MIX-WITH-FLAG-BIDIRECTIONAL: _preextrapolate_helper:%bb.0 entry
|
|
+; MIX-WITH-FLAG-BIDIRECTIONAL: ScheduleDAGMILive::schedule starting
|
|
+; MIX-WITH-FLAG-BIDIRECTIONAL-NEXT: OnlyTopDown=0 OnlyBottomUp=0
|
|
+; MIX-WITH-FLAG-BIDIRECTIONAL: _preextrapolate_helper:%bb.1 for.cond.preheader
|
|
+; MIX-WITH-FLAG-BIDIRECTIONAL: ScheduleDAGMILive::schedule starting
|
|
+; MIX-WITH-FLAG-BIDIRECTIONAL-NEXT: OnlyTopDown=1 OnlyBottomUp=0
|
|
diff --git a/llvm/test/AutoTuning/MetaData/structural_hash.ll b/llvm/test/AutoTuning/MetaData/structural_hash.ll
|
|
new file mode 100644
|
|
index 000000000000..2d8adca910bc
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/MetaData/structural_hash.ll
|
|
@@ -0,0 +1,234 @@
|
|
+; RUN: rm %t.hash_opp -rf
|
|
+; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \
|
|
+; RUN: -auto-tuning-opp=%t.hash_opp -auto-tuning-type-filter=CallSite --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.hash_opp/structural_hash.ll.yaml -check-prefix=META-CALL1
|
|
+; RUN: FileCheck %s --input-file %t.hash_opp/structural_hash.ll.yaml -check-prefix=META-CALL2
|
|
+; RUN: FileCheck %s --input-file %t.hash_opp/structural_hash.ll.yaml -check-prefix=META-CALL3
|
|
+
|
|
+; RUN: rm %t.hash_opp -rf
|
|
+; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \
|
|
+; RUN: -auto-tuning-type-filter=CallSite -auto-tuning-opp=%t.hash_opp \
|
|
+; RUN: -auto-tuning-omit-metadata --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.hash_opp/structural_hash.ll.yaml -check-prefix=NO-META-CALL
|
|
+
|
|
+; UNSUPPORTED: windows
|
|
+
|
|
+; ModuleID = 'loop_small.cpp'
|
|
+source_filename = "loop_small.cpp"
|
|
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|
+target triple = "aarch64-unknown-linux-gnu"
|
|
+
|
|
+@arr = dso_local global [1000000 x i32] zeroinitializer, align 4, !dbg !0
|
|
+
|
|
+; Function Attrs: nounwind uwtable mustprogress
|
|
+define dso_local void @_Z1fv() #0 !dbg !18 {
|
|
+entry:
|
|
+ %i = alloca i32, align 4
|
|
+ call void @llvm.dbg.declare(metadata i32* %i, metadata !21, metadata !DIExpression()), !dbg !23
|
|
+ store i32 0, i32* %i, align 4, !dbg !23
|
|
+ br label %for.cond, !dbg !24
|
|
+
|
|
+for.cond: ; preds = %for.inc, %entry
|
|
+ %0 = load i32, i32* %i, align 4, !dbg !25
|
|
+ %cmp = icmp slt i32 %0, 2000, !dbg !27
|
|
+ br i1 %cmp, label %for.body, label %for.end, !dbg !28
|
|
+
|
|
+for.body: ; preds = %for.cond
|
|
+ %1 = load i32, i32* %i, align 4, !dbg !29
|
|
+ %idxprom = sext i32 %1 to i64, !dbg !31
|
|
+ %arrayidx = getelementptr inbounds [1000000 x i32], [1000000 x i32]* @arr, i64 0, i64 %idxprom, !dbg !31
|
|
+ %2 = load i32, i32* %arrayidx, align 4, !dbg !32
|
|
+ %add = add nsw i32 %2, 2, !dbg !32
|
|
+ store i32 %add, i32* %arrayidx, align 4, !dbg !32
|
|
+ br label %for.inc, !dbg !33
|
|
+
|
|
+for.inc: ; preds = %for.body
|
|
+ %3 = load i32, i32* %i, align 4, !dbg !34
|
|
+ %inc = add nsw i32 %3, 1, !dbg !34
|
|
+ store i32 %inc, i32* %i, align 4, !dbg !34
|
|
+ br label %for.cond, !dbg !35, !llvm.loop !36
|
|
+
|
|
+for.end: ; preds = %for.cond
|
|
+ ret void, !dbg !39
|
|
+}
|
|
+
|
|
+; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
|
|
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
|
|
+
|
|
+; Function Attrs: nounwind uwtable mustprogress
|
|
+define dso_local void @_Z1gv() #0 !dbg !40 {
|
|
+entry:
|
|
+ %0 = load i32, i32* getelementptr inbounds ([1000000 x i32], [1000000 x i32]* @arr, i64 0, i64 0), align 4, !dbg !41
|
|
+ %inc = add nsw i32 %0, 1, !dbg !41
|
|
+ store i32 %inc, i32* getelementptr inbounds ([1000000 x i32], [1000000 x i32]* @arr, i64 0, i64 0), align 4, !dbg !41
|
|
+ ret void, !dbg !42
|
|
+}
|
|
+
|
|
+; Function Attrs: norecurse nounwind uwtable mustprogress
|
|
+define dso_local i32 @main() #2 !dbg !43 {
|
|
+entry:
|
|
+ %retval = alloca i32, align 4
|
|
+ %i = alloca i32, align 4
|
|
+ store i32 0, i32* %retval, align 4
|
|
+ call void @llvm.dbg.declare(metadata i32* %i, metadata !46, metadata !DIExpression()), !dbg !48
|
|
+ store i32 0, i32* %i, align 4, !dbg !48
|
|
+ br label %for.cond, !dbg !49
|
|
+
|
|
+for.cond: ; preds = %for.inc, %entry
|
|
+ %0 = load i32, i32* %i, align 4, !dbg !50
|
|
+ %cmp = icmp slt i32 %0, 1000000, !dbg !52
|
|
+ br i1 %cmp, label %for.body, label %for.end, !dbg !53
|
|
+
|
|
+for.body: ; preds = %for.cond
|
|
+ %1 = load i32, i32* %i, align 4, !dbg !54
|
|
+ %idxprom = sext i32 %1 to i64, !dbg !55
|
|
+ %arrayidx = getelementptr inbounds [1000000 x i32], [1000000 x i32]* @arr, i64 0, i64 %idxprom, !dbg !55
|
|
+ store i32 0, i32* %arrayidx, align 4, !dbg !56
|
|
+ br label %for.inc, !dbg !55
|
|
+
|
|
+for.inc: ; preds = %for.body
|
|
+ %2 = load i32, i32* %i, align 4, !dbg !57
|
|
+ %inc = add nsw i32 %2, 1, !dbg !57
|
|
+ store i32 %inc, i32* %i, align 4, !dbg !57
|
|
+ br label %for.cond, !dbg !58, !llvm.loop !59
|
|
+
|
|
+for.end: ; preds = %for.cond
|
|
+ call void @_Z1fv(), !dbg !61
|
|
+ call void @_Z1gv(), !dbg !62
|
|
+ call void @_Z1fv(), !dbg !63
|
|
+ %3 = load i32, i32* %retval, align 4, !dbg !64
|
|
+ ret i32 %3, !dbg !64
|
|
+}
|
|
+
|
|
+attributes #0 = { nounwind uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
+attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
|
|
+attributes #2 = { norecurse nounwind uwtable mustprogress "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
+
|
|
+!llvm.dbg.cu = !{!2}
|
|
+!llvm.module.flags = !{!10, !11, !12, !13, !14, !15, !16}
|
|
+!llvm.ident = !{!17}
|
|
+
|
|
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
|
|
+!1 = distinct !DIGlobalVariable(name: "arr", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true)
|
|
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "Huawei Bisheng Compiler clang version 12.0.0 (clang-6d7704116510 flang-6d7704116510)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None)
|
|
+!3 = !DIFile(filename: "loop_small.cpp", directory: "/home/g84189222/boole3/llvm-project/tuneTest")
|
|
+!4 = !{}
|
|
+!5 = !{!0}
|
|
+!6 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 32000000, elements: !8)
|
|
+!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
|
|
+!8 = !{!9}
|
|
+!9 = !DISubrange(count: 1000000)
|
|
+!10 = !{i32 7, !"Dwarf Version", i32 4}
|
|
+!11 = !{i32 2, !"Debug Info Version", i32 3}
|
|
+!12 = !{i32 1, !"wchar_size", i32 4}
|
|
+!13 = !{i32 1, !"branch-target-enforcement", i32 0}
|
|
+!14 = !{i32 1, !"sign-return-address", i32 0}
|
|
+!15 = !{i32 1, !"sign-return-address-all", i32 0}
|
|
+!16 = !{i32 1, !"sign-return-address-with-bkey", i32 0}
|
|
+!17 = !{!"Huawei Bisheng Compiler clang version 12.0.0 (clang-6d7704116510 flang-6d7704116510)"}
|
|
+!18 = distinct !DISubprogram(name: "f", linkageName: "_Z1fv", scope: !3, file: !3, line: 3, type: !19, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !4)
|
|
+!19 = !DISubroutineType(types: !20)
|
|
+!20 = !{null}
|
|
+!21 = !DILocalVariable(name: "i", scope: !22, file: !3, line: 4, type: !7)
|
|
+!22 = distinct !DILexicalBlock(scope: !18, file: !3, line: 4, column: 2)
|
|
+!23 = !DILocation(line: 4, column: 10, scope: !22)
|
|
+!24 = !DILocation(line: 4, column: 6, scope: !22)
|
|
+!25 = !DILocation(line: 4, column: 15, scope: !26)
|
|
+!26 = distinct !DILexicalBlock(scope: !22, file: !3, line: 4, column: 2)
|
|
+!27 = !DILocation(line: 4, column: 16, scope: !26)
|
|
+!28 = !DILocation(line: 4, column: 2, scope: !22)
|
|
+!29 = !DILocation(line: 5, column: 7, scope: !30)
|
|
+!30 = distinct !DILexicalBlock(scope: !26, file: !3, line: 4, column: 27)
|
|
+!31 = !DILocation(line: 5, column: 3, scope: !30)
|
|
+!32 = !DILocation(line: 5, column: 10, scope: !30)
|
|
+!33 = !DILocation(line: 6, column: 2, scope: !30)
|
|
+!34 = !DILocation(line: 4, column: 24, scope: !26)
|
|
+!35 = !DILocation(line: 4, column: 2, scope: !26)
|
|
+!36 = distinct !{!36, !28, !37, !38}
|
|
+!37 = !DILocation(line: 6, column: 2, scope: !22)
|
|
+!38 = !{!"llvm.loop.mustprogress"}
|
|
+!39 = !DILocation(line: 7, column: 1, scope: !18)
|
|
+!40 = distinct !DISubprogram(name: "g", linkageName: "_Z1gv", scope: !3, file: !3, line: 8, type: !19, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !4)
|
|
+!41 = !DILocation(line: 9, column: 8, scope: !40)
|
|
+!42 = !DILocation(line: 10, column: 1, scope: !40)
|
|
+!43 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 12, type: !44, scopeLine: 12, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !4)
|
|
+!44 = !DISubroutineType(types: !45)
|
|
+!45 = !{!7}
|
|
+!46 = !DILocalVariable(name: "i", scope: !47, file: !3, line: 13, type: !7)
|
|
+!47 = distinct !DILexicalBlock(scope: !43, file: !3, line: 13, column: 2)
|
|
+!48 = !DILocation(line: 13, column: 10, scope: !47)
|
|
+!49 = !DILocation(line: 13, column: 6, scope: !47)
|
|
+!50 = !DILocation(line: 13, column: 15, scope: !51)
|
|
+!51 = distinct !DILexicalBlock(scope: !47, file: !3, line: 13, column: 2)
|
|
+!52 = !DILocation(line: 13, column: 16, scope: !51)
|
|
+!53 = !DILocation(line: 13, column: 2, scope: !47)
|
|
+!54 = !DILocation(line: 13, column: 35, scope: !51)
|
|
+!55 = !DILocation(line: 13, column: 31, scope: !51)
|
|
+!56 = !DILocation(line: 13, column: 38, scope: !51)
|
|
+!57 = !DILocation(line: 13, column: 27, scope: !51)
|
|
+!58 = !DILocation(line: 13, column: 2, scope: !51)
|
|
+!59 = distinct !{!59, !53, !60, !38}
|
|
+!60 = !DILocation(line: 13, column: 40, scope: !47)
|
|
+!61 = !DILocation(line: 14, column: 2, scope: !43)
|
|
+!62 = !DILocation(line: 15, column: 2, scope: !43)
|
|
+!63 = !DILocation(line: 16, column: 2, scope: !43)
|
|
+!64 = !DILocation(line: 17, column: 1, scope: !43)
|
|
+
|
|
+; META-CALL1: --- !AutoTuning
|
|
+; META-CALL1: Pass: inline
|
|
+; META-CALL1: Name: _Z1fv
|
|
+; META-CALL1: DebugLoc: { File: loop_small.cpp, Line: 14, Column: 2 }
|
|
+; META-CALL1-NEXT: Function: main
|
|
+; META-CALL1-NEXT: CodeRegionType: callsite
|
|
+; META-CALL1-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; META-CALL1-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] }
|
|
+; META-CALL1-NEXT: BaselineConfig: { ForceInline: '1' }
|
|
+; META-CALL1-NEXT: Invocation: 0
|
|
+; META-CALL1-NEXT: ...
|
|
+; META-CALL2: --- !AutoTuning
|
|
+; META-CALL2: Pass: inline
|
|
+; META-CALL2: Name: _Z1fv
|
|
+; META-CALL2: DebugLoc: { File: loop_small.cpp, Line: 16, Column: 2 }
|
|
+; META-CALL2-NEXT: Function: main
|
|
+; META-CALL2-NEXT: CodeRegionType: callsite
|
|
+; META-CALL2-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; META-CALL2-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] }
|
|
+; META-CALL2-NEXT: BaselineConfig: { ForceInline: '1' }
|
|
+; META-CALL2-NEXT: Invocation: 0
|
|
+; META-CALL2-NEXT: ...
|
|
+; META-CALL3: --- !AutoTuning
|
|
+; META-CALL3: Pass: inline
|
|
+; META-CALL3: Name: _Z1gv
|
|
+; META-CALL3: DebugLoc: { File: loop_small.cpp, Line: 15, Column: 2 }
|
|
+; META-CALL3-NEXT: Function: main
|
|
+; META-CALL3-NEXT: CodeRegionType: callsite
|
|
+; META-CALL3-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; META-CALL3-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] }
|
|
+; META-CALL3-NEXT: BaselineConfig: { ForceInline: '1' }
|
|
+; META-CALL3-NEXT: Invocation: 0
|
|
+; META-CALL3-NEXT: ...
|
|
+
|
|
+; NO-META-CALL: --- !AutoTuning
|
|
+; NO-META-CALL-NEXT: Pass: inline
|
|
+; NO-META-CALL-NEXT: CodeRegionType: callsite
|
|
+; NO-META-CALL-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; NO-META-CALL-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] }
|
|
+; NO-META-CALL-NEXT: BaselineConfig: { ForceInline: '1' }
|
|
+; NO-META-CALL-NEXT: Invocation: 0
|
|
+; NO-META-CALL-NEXT: ...
|
|
+; NO-META-CALL-NEXT: --- !AutoTuning
|
|
+; NO-META-CALL-NEXT: Pass: inline
|
|
+; NO-META-CALL-NEXT: CodeRegionType: callsite
|
|
+; NO-META-CALL-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; NO-META-CALL-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] }
|
|
+; NO-META-CALL-NEXT: BaselineConfig: { ForceInline: '1' }
|
|
+; NO-META-CALL-NEXT: Invocation: 0
|
|
+; NO-META-CALL-NEXT: ...
|
|
+; NO-META-CALL-NEXT: --- !AutoTuning
|
|
+; NO-META-CALL-NEXT: Pass: inline
|
|
+; NO-META-CALL-NEXT: CodeRegionType: callsite
|
|
+; NO-META-CALL-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; NO-META-CALL-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] }
|
|
+; NO-META-CALL-NEXT: BaselineConfig: { ForceInline: '1' }
|
|
+; NO-META-CALL-NEXT: Invocation: 0
|
|
+; NO-META-CALL-NEXT: ...
|
|
diff --git a/llvm/test/AutoTuning/MetaData/write_no_metadata.ll b/llvm/test/AutoTuning/MetaData/write_no_metadata.ll
|
|
new file mode 100644
|
|
index 000000000000..344a3548a74f
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/MetaData/write_no_metadata.ll
|
|
@@ -0,0 +1,191 @@
|
|
+; REQUIRES: x86-registered-target
|
|
+; RUN: rm %t.default_opp -rf
|
|
+; RUN: opt %s -S -auto-tuning-opp=%t.default_opp -auto-tuning-omit-metadata=1 \
|
|
+; RUN: -passes='require<opt-remark-emit>,loop(loop-unroll-full)' --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.default_opp/write_no_metadata.ll.yaml -check-prefix=DEFAULT
|
|
+
|
|
+; RUN: rm %t.module_opp -rf
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-opp=%t.module_opp -auto-tuning-type-filter=Other \
|
|
+; RUN: -auto-tuning-omit-metadata=1 --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.module_opp/write_no_metadata.ll.yaml -check-prefix=OTHER
|
|
+
|
|
+; RUN: rm %t.loop_opp -rf
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-opp=%t.loop_opp -auto-tuning-type-filter=Loop \
|
|
+; RUN: -auto-tuning-omit-metadata=1 --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.loop_opp/write_no_metadata.ll.yaml -check-prefix=LOOP
|
|
+
|
|
+; RUN: rm %t.function_opp -rf
|
|
+; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \
|
|
+; RUN: -auto-tuning-opp=%t.function_opp -auto-tuning-type-filter=CallSite \
|
|
+; RUN: -auto-tuning-omit-metadata=1 --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.function_opp/write_no_metadata.ll.yaml -check-prefix=CALLSITE
|
|
+
|
|
+; RUN: rm %t.function_loop_opp -rf
|
|
+; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \
|
|
+; RUN: -auto-tuning-opp=%t.function_loop_opp -auto-tuning-omit-metadata=1 \
|
|
+; RUN: -auto-tuning-type-filter=CallSite,Loop --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.function_loop_opp/write_no_metadata.ll.yaml -check-prefix=CALLSITE-LOOP1
|
|
+; RUN: FileCheck %s --input-file %t.function_loop_opp/write_no_metadata.ll.yaml -check-prefix=CALLSITE-LOOP2
|
|
+
|
|
+; UNSUPPORTED: windows
|
|
+
|
|
+; ModuleID = 'loop-opp.c'
|
|
+source_filename = "loop-opp.c"
|
|
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
+target triple = "x86_64-unknown-linux-gnu"
|
|
+
|
|
+; Function Attrs: noinline nounwind uwtable
|
|
+define i32 @test(i32* %n) #0 !dbg !6 {
|
|
+entry:
|
|
+ call void @callee(i32 6), !dbg !18
|
|
+ %n.addr = alloca i32*, align 8
|
|
+ %b = alloca i32, align 4
|
|
+ %i = alloca i32, align 4
|
|
+ store i32* %n, i32** %n.addr, align 8
|
|
+ call void @llvm.dbg.declare(metadata i32** %n.addr, metadata !11, metadata !12), !dbg !13
|
|
+ call void @llvm.dbg.declare(metadata i32* %b, metadata !14, metadata !12), !dbg !15
|
|
+ store i32 0, i32* %b, align 4, !dbg !15
|
|
+ call void @llvm.dbg.declare(metadata i32* %i, metadata !16, metadata !12), !dbg !18
|
|
+ store i32 0, i32* %i, align 4, !dbg !18
|
|
+ br label %for.cond, !dbg !19
|
|
+
|
|
+for.cond: ; preds = %for.inc, %entry
|
|
+ %0 = load i32, i32* %i, align 4, !dbg !20
|
|
+ %1 = load i32*, i32** %n.addr, align 8, !dbg !23
|
|
+ %2 = load i32, i32* %1, align 4, !dbg !24
|
|
+ %cmp = icmp slt i32 %0, %2, !dbg !25
|
|
+ br i1 %cmp, label %for.body, label %for.end, !dbg !26
|
|
+
|
|
+for.body: ; preds = %for.cond
|
|
+ %3 = load i32, i32* %b, align 4, !dbg !28
|
|
+ %add = add nsw i32 %3, 1, !dbg !30
|
|
+ store i32 %add, i32* %b, align 4, !dbg !31
|
|
+ br label %for.inc, !dbg !32
|
|
+
|
|
+for.inc: ; preds = %for.body
|
|
+ %4 = load i32, i32* %i, align 4, !dbg !33
|
|
+ %inc = add nsw i32 %4, 1, !dbg !33
|
|
+ store i32 %inc, i32* %i, align 4, !dbg !33
|
|
+ br label %for.cond, !dbg !35, !llvm.loop !36
|
|
+
|
|
+for.end: ; preds = %for.cond
|
|
+ %5 = load i32, i32* %b, align 4, !dbg !39
|
|
+ ret i32 %5, !dbg !40
|
|
+}
|
|
+
|
|
+@a = global i32 4
|
|
+define void @callee(i32 %a) #2 {
|
|
+entry:
|
|
+ %a1 = load volatile i32, i32* @a
|
|
+ %x1 = add i32 %a1, %a1
|
|
+ %add = add i32 %x1, %a
|
|
+ ret void
|
|
+}
|
|
+
|
|
+; Function Attrs: nounwind readnone
|
|
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
|
|
+
|
|
+attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
+attributes #1 = { nounwind readnone }
|
|
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
+
|
|
+!llvm.dbg.cu = !{!0}
|
|
+!llvm.module.flags = !{!3, !4}
|
|
+!llvm.ident = !{!5}
|
|
+
|
|
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "" ,isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
|
|
+!1 = !DIFile(filename: "loop-opp.c", directory: "")
|
|
+!2 = !{}
|
|
+!3 = !{i32 2, !"Dwarf Version", i32 4}
|
|
+!4 = !{i32 2, !"Debug Info Version", i32 3}
|
|
+!5 = !{!""}
|
|
+!6 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
|
|
+!7 = !DISubroutineType(types: !8)
|
|
+!8 = !{!9, !10}
|
|
+!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
|
|
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64)
|
|
+!11 = !DILocalVariable(name: "n", arg: 1, scope: !6, file: !1, line: 1, type: !10)
|
|
+!12 = !DIExpression()
|
|
+!13 = !DILocation(line: 1, column: 20, scope: !6)
|
|
+!14 = !DILocalVariable(name: "b", scope: !6, file: !1, line: 3, type: !9)
|
|
+!15 = !DILocation(line: 3, column: 9, scope: !6)
|
|
+!16 = !DILocalVariable(name: "i", scope: !17, file: !1, line: 4, type: !9)
|
|
+!17 = distinct !DILexicalBlock(scope: !6, file: !1, line: 4, column: 5)
|
|
+!18 = !DILocation(line: 4, column: 14, scope: !17)
|
|
+!19 = !DILocation(line: 4, column: 10, scope: !17)
|
|
+!20 = !DILocation(line: 4, column: 20, scope: !21)
|
|
+!21 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 1)
|
|
+!22 = distinct !DILexicalBlock(scope: !17, file: !1, line: 4, column: 5)
|
|
+!23 = !DILocation(line: 4, column: 25, scope: !21)
|
|
+!24 = !DILocation(line: 4, column: 24, scope: !21)
|
|
+!25 = !DILocation(line: 4, column: 22, scope: !21)
|
|
+!26 = !DILocation(line: 4, column: 5, scope: !27)
|
|
+!27 = !DILexicalBlockFile(scope: !17, file: !1, discriminator: 1)
|
|
+!28 = !DILocation(line: 6, column: 11, scope: !29)
|
|
+!29 = distinct !DILexicalBlock(scope: !22, file: !1, line: 5, column: 5)
|
|
+!30 = !DILocation(line: 6, column: 12, scope: !29)
|
|
+!31 = !DILocation(line: 6, column: 9, scope: !29)
|
|
+!32 = !DILocation(line: 7, column: 5, scope: !29)
|
|
+!33 = !DILocation(line: 4, column: 28, scope: !34)
|
|
+!34 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 2)
|
|
+!35 = !DILocation(line: 4, column: 5, scope: !34)
|
|
+!36 = distinct !{!36, !37, !38}
|
|
+!37 = !DILocation(line: 4, column: 5, scope: !17)
|
|
+!38 = !DILocation(line: 7, column: 5, scope: !17)
|
|
+!39 = !DILocation(line: 8, column: 12, scope: !6)
|
|
+!40 = !DILocation(line: 8, column: 5, scope: !6)
|
|
+
|
|
+; DEFAULT: --- !AutoTuning
|
|
+; DEFAULT-NEXT: Pass: loop-unroll
|
|
+; DEFAULT-NEXT: CodeRegionType: loop
|
|
+; DEFAULT-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; COM: Clang generate dynamic values for UnrollCount so we use regex
|
|
+; DEFAULT-NEXT: DynamicConfigs: { UnrollCount: [ {{[0-9]+(, [0-9]+)*}} ] }
|
|
+; DEFAULT-NEXT: BaselineConfig: { UnrollCount: '{{[0-9]+}}' }
|
|
+; DEFAULT-NEXT: Invocation: 0
|
|
+; DEFAULT-NEXT: ...
|
|
+; DEFAULT-NEXT: --- !AutoTuning
|
|
+; DEFAULT-NEXT: Pass: all
|
|
+; DEFAULT-NEXT: CodeRegionType: other
|
|
+; COM: Module level hashes can differ based on the filepath so we check a regex
|
|
+; DEFAULT-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; DEFAULT-NEXT: DynamicConfigs: { }
|
|
+; DEFAULT-NEXT: BaselineConfig: { }
|
|
+; DEFAULT-NEXT: Invocation: 0
|
|
+; DEFAULT-NEXT: ...
|
|
+
|
|
+; LOOP: --- !AutoTuning
|
|
+; LOOP-NEXT: Pass: loop-unroll
|
|
+; LOOP-NEXT: CodeRegionType: loop
|
|
+; LOOP-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; COM: Clang generate dynamic values for UnrollCount so we use regex
|
|
+; LOOP-NEXT: DynamicConfigs: { UnrollCount: [ {{[0-9]+(, [0-9]+)*}} ] }
|
|
+; LOOP-NEXT: BaselineConfig: { UnrollCount: '{{[0-9]+}}' }
|
|
+; LOOP-NEXT: Invocation: 0
|
|
+; LOOP-NEXT: ...
|
|
+
|
|
+; CALLSITE: --- !AutoTuning
|
|
+; CALLSITE-NEXT: Pass: inline
|
|
+; CALLSITE-NEXT: CodeRegionType: callsite
|
|
+; CALLSITE-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; CALLSITE-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] }
|
|
+; CALLSITE-NEXT: BaselineConfig: { ForceInline: '1' }
|
|
+; CALLSITE-NEXT: Invocation: 0
|
|
+; CALLSITE-NEXT: ...
|
|
+
|
|
+; CALLSITE-LOOP1: CodeRegionType: loop
|
|
+; CALLSITE-LOOP1-NOT: CodeRegionType: other
|
|
+; CALLSITE-LOOP2: CodeRegionType: callsite
|
|
+; CALLSITE-LOOP2-NOT: CodeRegionType: other
|
|
+
|
|
+; OTHER: --- !AutoTuning
|
|
+; OTHER-NEXT: Pass: all
|
|
+; OTHER-NEXT: CodeRegionType: other
|
|
+; COM: Module level hashes can differ based on the filepath so we check a regex
|
|
+; OTHER-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; OTHER-NEXT: DynamicConfigs: { }
|
|
+; OTHER-NEXT: BaselineConfig: { }
|
|
+; OTHER-NEXT: Invocation: 0
|
|
+; OTHER-NEXT: ...
|
|
diff --git a/llvm/test/AutoTuning/MetaData/write_with_metadata.ll b/llvm/test/AutoTuning/MetaData/write_with_metadata.ll
|
|
new file mode 100644
|
|
index 000000000000..8b7ee9dcce37
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/MetaData/write_with_metadata.ll
|
|
@@ -0,0 +1,204 @@
|
|
+; REQUIRES: x86-registered-target
|
|
+; RUN: rm %t.default_opp -rf
|
|
+; RUN: opt %s -S -auto-tuning-opp=%t.default_opp -auto-tuning-omit-metadata=0 \
|
|
+; RUN: -passes='require<opt-remark-emit>,loop(loop-unroll-full)' --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.default_opp/write_with_metadata.ll.yaml -check-prefix=DEFAULT
|
|
+
|
|
+; RUN: rm %t.module_opp -rf
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-opp=%t.module_opp -auto-tuning-type-filter=Other \
|
|
+; RUN: -auto-tuning-omit-metadata=0 --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.module_opp/write_with_metadata.ll.yaml -check-prefix=OTHER
|
|
+
|
|
+; RUN: rm %t.loop_opp -rf
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-opp=%t.loop_opp -auto-tuning-type-filter=Loop \
|
|
+; RUN: -auto-tuning-omit-metadata=0 --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.loop_opp/write_with_metadata.ll.yaml -check-prefix=LOOP
|
|
+
|
|
+; RUN: rm %t.function_opp -rf
|
|
+; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \
|
|
+; RUN: -auto-tuning-opp=%t.function_opp -auto-tuning-type-filter=CallSite \
|
|
+; RUN: -auto-tuning-omit-metadata=0 --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.function_opp/write_with_metadata.ll.yaml -check-prefix=CALLSITE
|
|
+
|
|
+; RUN: rm %t.function_loop_opp -rf
|
|
+; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \
|
|
+; RUN: -auto-tuning-opp=%t.function_loop_opp -auto-tuning-type-filter=CallSite,Loop \
|
|
+; RUN: -auto-tuning-omit-metadata=0 --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.function_loop_opp/write_with_metadata.ll.yaml -check-prefix=CALLSITE-LOOP1
|
|
+; RUN: FileCheck %s --input-file %t.function_loop_opp/write_with_metadata.ll.yaml -check-prefix=CALLSITE-LOOP2
|
|
+
|
|
+; UNSUPPORTED: windows
|
|
+
|
|
+; ModuleID = 'loop-opp.c'
|
|
+source_filename = "loop-opp.c"
|
|
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
+target triple = "x86_64-unknown-linux-gnu"
|
|
+
|
|
+; Function Attrs: noinline nounwind uwtable
|
|
+define i32 @test(i32* %n) #0 !dbg !6 {
|
|
+entry:
|
|
+ call void @callee(i32 6), !dbg !18
|
|
+ %n.addr = alloca i32*, align 8
|
|
+ %b = alloca i32, align 4
|
|
+ %i = alloca i32, align 4
|
|
+ store i32* %n, i32** %n.addr, align 8
|
|
+ call void @llvm.dbg.declare(metadata i32** %n.addr, metadata !11, metadata !12), !dbg !13
|
|
+ call void @llvm.dbg.declare(metadata i32* %b, metadata !14, metadata !12), !dbg !15
|
|
+ store i32 0, i32* %b, align 4, !dbg !15
|
|
+ call void @llvm.dbg.declare(metadata i32* %i, metadata !16, metadata !12), !dbg !18
|
|
+ store i32 0, i32* %i, align 4, !dbg !18
|
|
+ br label %for.cond, !dbg !19
|
|
+
|
|
+for.cond: ; preds = %for.inc, %entry
|
|
+ %0 = load i32, i32* %i, align 4, !dbg !20
|
|
+ %1 = load i32*, i32** %n.addr, align 8, !dbg !23
|
|
+ %2 = load i32, i32* %1, align 4, !dbg !24
|
|
+ %cmp = icmp slt i32 %0, %2, !dbg !25
|
|
+ br i1 %cmp, label %for.body, label %for.end, !dbg !26
|
|
+
|
|
+for.body: ; preds = %for.cond
|
|
+ %3 = load i32, i32* %b, align 4, !dbg !28
|
|
+ %add = add nsw i32 %3, 1, !dbg !30
|
|
+ store i32 %add, i32* %b, align 4, !dbg !31
|
|
+ br label %for.inc, !dbg !32
|
|
+
|
|
+for.inc: ; preds = %for.body
|
|
+ %4 = load i32, i32* %i, align 4, !dbg !33
|
|
+ %inc = add nsw i32 %4, 1, !dbg !33
|
|
+ store i32 %inc, i32* %i, align 4, !dbg !33
|
|
+ br label %for.cond, !dbg !35, !llvm.loop !36
|
|
+
|
|
+for.end: ; preds = %for.cond
|
|
+ %5 = load i32, i32* %b, align 4, !dbg !39
|
|
+ ret i32 %5, !dbg !40
|
|
+}
|
|
+
|
|
+@a = global i32 4
|
|
+define void @callee(i32 %a) #2 {
|
|
+entry:
|
|
+ %a1 = load volatile i32, i32* @a
|
|
+ %x1 = add i32 %a1, %a1
|
|
+ %add = add i32 %x1, %a
|
|
+ ret void
|
|
+}
|
|
+
|
|
+; Function Attrs: nounwind readnone
|
|
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
|
|
+
|
|
+attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
+attributes #1 = { nounwind readnone }
|
|
+attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
+
|
|
+!llvm.dbg.cu = !{!0}
|
|
+!llvm.module.flags = !{!3, !4}
|
|
+!llvm.ident = !{!5}
|
|
+
|
|
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "" ,isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
|
|
+!1 = !DIFile(filename: "loop-opp.c", directory: "")
|
|
+!2 = !{}
|
|
+!3 = !{i32 2, !"Dwarf Version", i32 4}
|
|
+!4 = !{i32 2, !"Debug Info Version", i32 3}
|
|
+!5 = !{!""}
|
|
+!6 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
|
|
+!7 = !DISubroutineType(types: !8)
|
|
+!8 = !{!9, !10}
|
|
+!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
|
|
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64)
|
|
+!11 = !DILocalVariable(name: "n", arg: 1, scope: !6, file: !1, line: 1, type: !10)
|
|
+!12 = !DIExpression()
|
|
+!13 = !DILocation(line: 1, column: 20, scope: !6)
|
|
+!14 = !DILocalVariable(name: "b", scope: !6, file: !1, line: 3, type: !9)
|
|
+!15 = !DILocation(line: 3, column: 9, scope: !6)
|
|
+!16 = !DILocalVariable(name: "i", scope: !17, file: !1, line: 4, type: !9)
|
|
+!17 = distinct !DILexicalBlock(scope: !6, file: !1, line: 4, column: 5)
|
|
+!18 = !DILocation(line: 4, column: 14, scope: !17)
|
|
+!19 = !DILocation(line: 4, column: 10, scope: !17)
|
|
+!20 = !DILocation(line: 4, column: 20, scope: !21)
|
|
+!21 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 1)
|
|
+!22 = distinct !DILexicalBlock(scope: !17, file: !1, line: 4, column: 5)
|
|
+!23 = !DILocation(line: 4, column: 25, scope: !21)
|
|
+!24 = !DILocation(line: 4, column: 24, scope: !21)
|
|
+!25 = !DILocation(line: 4, column: 22, scope: !21)
|
|
+!26 = !DILocation(line: 4, column: 5, scope: !27)
|
|
+!27 = !DILexicalBlockFile(scope: !17, file: !1, discriminator: 1)
|
|
+!28 = !DILocation(line: 6, column: 11, scope: !29)
|
|
+!29 = distinct !DILexicalBlock(scope: !22, file: !1, line: 5, column: 5)
|
|
+!30 = !DILocation(line: 6, column: 12, scope: !29)
|
|
+!31 = !DILocation(line: 6, column: 9, scope: !29)
|
|
+!32 = !DILocation(line: 7, column: 5, scope: !29)
|
|
+!33 = !DILocation(line: 4, column: 28, scope: !34)
|
|
+!34 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 2)
|
|
+!35 = !DILocation(line: 4, column: 5, scope: !34)
|
|
+!36 = distinct !{!36, !37, !38}
|
|
+!37 = !DILocation(line: 4, column: 5, scope: !17)
|
|
+!38 = !DILocation(line: 7, column: 5, scope: !17)
|
|
+!39 = !DILocation(line: 8, column: 12, scope: !6)
|
|
+!40 = !DILocation(line: 8, column: 5, scope: !6)
|
|
+
|
|
+; DEFAULT: --- !AutoTuning
|
|
+; DEFAULT-NEXT: Pass: loop-unroll
|
|
+; DEFAULT-NEXT: Name: for.cond
|
|
+; DEFAULT-NEXT: DebugLoc: { File: loop-opp.c, Line: 4, Column: 5 }
|
|
+; DEFAULT-NEXT: Function: test
|
|
+; DEFAULT-NEXT: CodeRegionType: loop
|
|
+; DEFAULT-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; DEFAULT-NEXT: DynamicConfigs: { UnrollCount: [ {{[0-9]+(, [0-9]+)*}} ] }
|
|
+; DEFAULT-NEXT: BaselineConfig: { UnrollCount: '{{[0-9]+}}' }
|
|
+; DEFAULT-NEXT: Invocation: 0
|
|
+; DEFAULT-NEXT: ...
|
|
+; DEFAULT-NEXT: --- !AutoTuning
|
|
+; DEFAULT-NEXT: Pass: all
|
|
+; DEFAULT-NEXT: Name:
|
|
+; DEFAULT-SAME: write_with_metadata.ll
|
|
+; DEFAULT-NEXT: Function: none
|
|
+; DEFAULT-NEXT: CodeRegionType: other
|
|
+; COM: Module level hashes can differ based on the filepath so we check a regex
|
|
+; DEFAULT-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; DEFAULT-NEXT: DynamicConfigs: { }
|
|
+; DEFAULT-NEXT: BaselineConfig: { }
|
|
+; DEFAULT-NEXT: Invocation: 0
|
|
+; DEFAULT-NEXT: ...
|
|
+
|
|
+; LOOP: --- !AutoTuning
|
|
+; LOOP-NEXT: Pass: loop-unroll
|
|
+; LOOP-NEXT: Name: for.cond
|
|
+; LOOP-NEXT: DebugLoc: { File: loop-opp.c, Line: 4, Column: 5 }
|
|
+; LOOP-NEXT: Function: test
|
|
+; LOOP-NEXT: CodeRegionType: loop
|
|
+; LOOP-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; LOOP-NEXT: DynamicConfigs: { UnrollCount: [ {{[0-9]+(, [0-9]+)*}} ] }
|
|
+; LOOP-NEXT: BaselineConfig: { UnrollCount: '{{[0-9]+}}' }
|
|
+; LOOP-NEXT: Invocation: 0
|
|
+; LOOP-NEXT: ...
|
|
+
|
|
+; CALLSITE: --- !AutoTuning
|
|
+; CALLSITE-NEXT: Pass: inline
|
|
+; CALLSITE-NEXT: Name: callee
|
|
+; CALLSITE-NEXT: DebugLoc: { File: loop-opp.c, Line: 4, Column: 14 }
|
|
+; CALLSITE-NEXT: Function: test
|
|
+; CALLSITE-NEXT: CodeRegionType: callsite
|
|
+; CALLSITE-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; CALLSITE-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] }
|
|
+; CALLSITE-NEXT: BaselineConfig: { ForceInline: '1' }
|
|
+; CALLSITE-NEXT: Invocation: 0
|
|
+; CALLSITE-NEXT: ...
|
|
+
|
|
+; CALLSITE-LOOP1: CodeRegionType: loop
|
|
+; CALLSITE-LOOP1-NOT: CodeRegionType: other
|
|
+; CALLSITE-LOOP2: CodeRegionType: callsite
|
|
+; CALLSITE-LOOP2-NOT: CodeRegionType: other
|
|
+
|
|
+; OTHER: --- !AutoTuning
|
|
+; OTHER-NEXT: Pass: all
|
|
+; OTHER-NEXT: Name:
|
|
+; OTHER-SAME: write_with_metadata
|
|
+; OTHER-NEXT: Function: none
|
|
+; OTHER-NEXT: CodeRegionType: other
|
|
+; COM: Module level hashes can differ based on the filepath so we check a regex
|
|
+; OTHER-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; OTHER-NEXT: DynamicConfigs: { }
|
|
+; OTHER-NEXT: BaselineConfig: { }
|
|
+; OTHER-NEXT: Invocation: 0
|
|
+; OTHER-NEXT: ...
|
|
diff --git a/llvm/test/AutoTuning/PGO/Inputs/pgo-instr.proftext b/llvm/test/AutoTuning/PGO/Inputs/pgo-instr.proftext
|
|
new file mode 100644
|
|
index 000000000000..6ed79897d78c
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/PGO/Inputs/pgo-instr.proftext
|
|
@@ -0,0 +1,17 @@
|
|
+# IR level Instrumentation Flag
|
|
+:ir
|
|
+hot
|
|
+# Func Hash:
|
|
+12884901887
|
|
+# Num Counters:
|
|
+1
|
|
+# Counter Values:
|
|
+9000
|
|
+
|
|
+cold
|
|
+# Func Hash:
|
|
+12884901887
|
|
+# Num Counters:
|
|
+1
|
|
+# Counter Values:
|
|
+10
|
|
diff --git a/llvm/test/AutoTuning/PGO/Inputs/pgo-sample-cold.prof b/llvm/test/AutoTuning/PGO/Inputs/pgo-sample-cold.prof
|
|
new file mode 100644
|
|
index 000000000000..a1cb2231992e
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/PGO/Inputs/pgo-sample-cold.prof
|
|
@@ -0,0 +1,7 @@
|
|
+main:225715:0
|
|
+ 2.1: 5553
|
|
+ 3: 5391
|
|
+ 3.1: _Z3sumii:0
|
|
+ 0: 0
|
|
+ 1: 0
|
|
+ 2: 0
|
|
diff --git a/llvm/test/AutoTuning/PGO/Inputs/pgo-sample-hot.prof b/llvm/test/AutoTuning/PGO/Inputs/pgo-sample-hot.prof
|
|
new file mode 100644
|
|
index 000000000000..386cdf8a7b5e
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/PGO/Inputs/pgo-sample-hot.prof
|
|
@@ -0,0 +1,7 @@
|
|
+main:225715:0
|
|
+ 2.1: 5553
|
|
+ 3: 5391
|
|
+ 3.1: _Z3sumii:5860
|
|
+ 0: 5279
|
|
+ 1: 5279
|
|
+ 2: 5279
|
|
diff --git a/llvm/test/AutoTuning/PGO/pgo-instr-filters.ll b/llvm/test/AutoTuning/PGO/pgo-instr-filters.ll
|
|
new file mode 100644
|
|
index 000000000000..6b279df18343
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/PGO/pgo-instr-filters.ll
|
|
@@ -0,0 +1,61 @@
|
|
+; RUN: rm %t.default-opp -rf
|
|
+; RUN: llvm-profdata merge %S/Inputs/pgo-instr.proftext -o %t.profdata
|
|
+; RUN: opt %s -passes='pgo-instr-use,inline' -pgo-test-profile-file=%t.profdata -S -auto-tuning-opp=%t.default-opp -auto-tuning-exclude-cold=false --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.default-opp/pgo-instr-filters.ll.yaml -check-prefix=NON-FILTER
|
|
+
|
|
+; RUN: rm %t.filtered-opp -rf
|
|
+; RUN: llvm-profdata merge %S/Inputs/pgo-instr.proftext -o %t.profdata
|
|
+; RUN: opt %s -passes='pgo-instr-use,inline' -pgo-test-profile-file=%t.profdata -S -auto-tuning-opp=%t.filtered-opp -auto-tuning-exclude-cold --disable-output -pgo-instr-old-cfg-hashing=true
|
|
+; RUN: FileCheck %s --input-file %t.filtered-opp/pgo-instr-filters.ll.yaml -check-prefix=EXCLUDE-COLD
|
|
+
|
|
+; RUN: rm %t.filtered-opp -rf
|
|
+; RUN: llvm-profdata merge %S/Inputs/pgo-instr.proftext -o %t.profdata
|
|
+; RUN: opt %s -passes='pgo-instr-use,inline' -pgo-test-profile-file=%t.profdata -S -auto-tuning-opp=%t.filtered-opp -auto-tuning-hot-only --disable-output -pgo-instr-old-cfg-hashing=true
|
|
+; RUN: FileCheck %s --input-file %t.filtered-opp/pgo-instr-filters.ll.yaml -check-prefix=HOT-ONLY
|
|
+
|
|
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
|
+target triple = "x86_64-unknown-linux-gnu"
|
|
+
|
|
+@s = common dso_local local_unnamed_addr global i32 0, align 4
|
|
+
|
|
+define void @cold() {
|
|
+
|
|
+entry:
|
|
+ %0 = tail call i32 @callee(i32 5)
|
|
+ store i32 1, i32* @s, align 4
|
|
+ ret void
|
|
+}
|
|
+
|
|
+define void @hot() {
|
|
+entry:
|
|
+ %0 = load i32, i32* @s, align 4
|
|
+ %1 = tail call i32 @callee(i32 5)
|
|
+ %add = add nsw i32 %0, 4
|
|
+ store i32 %add, i32* @s, align 4
|
|
+ ret void
|
|
+}
|
|
+
|
|
+define void @unknown() {
|
|
+entry:
|
|
+ %0 = tail call i32 @callee(i32 5)
|
|
+ store i32 1, i32* @s, align 4
|
|
+ ret void
|
|
+}
|
|
+
|
|
+define i32 @callee(i32 %a) {
|
|
+entry:
|
|
+ %add = add nsw i32 %a, 4
|
|
+ ret i32 %add
|
|
+}
|
|
+
|
|
+; NON-FILTER-DAG: Function: cold
|
|
+; NON-FILTER-DAG: Function: hot
|
|
+; NON-FILTER-DAG: Function: unknown
|
|
+
|
|
+; EXCLUDE-COLD-NOT: Function: cold
|
|
+; EXCLUDE-COLD-DAG: Function: hot
|
|
+; EXCLUDE-COLD-DAG: Function: unknown
|
|
+
|
|
+; HOT-ONLY-NOT: Function: unknown
|
|
+; HOT-ONLY-NOT: Function: cold
|
|
+; HOT-ONLY-DAG: Function: hot
|
|
diff --git a/llvm/test/AutoTuning/PGO/pgo-sample-filters.ll b/llvm/test/AutoTuning/PGO/pgo-sample-filters.ll
|
|
new file mode 100644
|
|
index 000000000000..aa93299a7079
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/PGO/pgo-sample-filters.ll
|
|
@@ -0,0 +1,138 @@
|
|
+; RUN: rm %t.default-opp -rf
|
|
+; RUN: opt %s -passes='sample-profile,inline' -sample-profile-file=%S/Inputs/pgo-sample-cold.prof -auto-tuning-opp=%t.default-opp -auto-tuning-exclude-cold=false --disable-output -S
|
|
+; RUN: FileCheck %s -check-prefix=NON-FILTER < %t.default-opp/pgo-sample-filters.ll.yaml
|
|
+
|
|
+; Test -auto-tuning-exclude-cold with a cold caller in sample profile.
|
|
+; RUN: rm %t.filtered-opp -rf
|
|
+; RUN: opt %s -passes='sample-profile,inline' -sample-profile-file=%S/Inputs/pgo-sample-cold.prof -auto-tuning-opp=%t.filtered-opp -auto-tuning-exclude-cold --disable-output -S
|
|
+; RUN: FileCheck %s -check-prefix=COLD-PROFILE-EXCLUDE-COLD < %t.filtered-opp/pgo-sample-filters.ll.yaml
|
|
+
|
|
+; Test -auto-tuning-hot-only with a cold caller in sample profile.
|
|
+; RUN: rm %t.filtered-opp -rf
|
|
+; RUN: opt %s -passes='sample-profile,inline' -sample-profile-file=%S/Inputs/pgo-sample-cold.prof -auto-tuning-opp=%t.filtered-opp -auto-tuning-hot-only --disable-output -S
|
|
+; RUN: FileCheck %s -check-prefix=COLD-PROFILE-HOT-ONLY < %t.filtered-opp/pgo-sample-filters.ll.yaml
|
|
+
|
|
+; Test -auto-tuning-exclude-cold with a hot caller in sample profile.
|
|
+; RUN: rm %t.filtered-opp -rf
|
|
+; RUN: opt %s -passes='sample-profile,inline' -sample-profile-file=%S/Inputs/pgo-sample-hot.prof -auto-tuning-opp=%t.filtered-opp -auto-tuning-exclude-cold --disable-output -S
|
|
+; RUN: FileCheck %s -check-prefix=HOT-PROFILE-EXCLUDE-COLD < %t.filtered-opp/pgo-sample-filters.ll.yaml
|
|
+
|
|
+; Test -auto-tuning-hot-only with a hot caller in sample profile.
|
|
+; RUN: rm %t.filtered-opp -rf
|
|
+; RUN: opt %s -passes='sample-profile,inline' -sample-profile-file=%S/Inputs/pgo-sample-hot.prof -auto-tuning-opp=%t.filtered-opp -auto-tuning-hot-only --disable-output -S
|
|
+; RUN: FileCheck %s -check-prefix=HOT-PROFILE-HOT-ONLY < %t.filtered-opp/pgo-sample-filters.ll.yaml
|
|
+
|
|
+
|
|
+@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
|
|
+
|
|
+define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !6 {
|
|
+entry:
|
|
+ %0 = tail call i32 @callee(i32 5)
|
|
+ %x.addr = alloca i32, align 4
|
|
+ %y.addr = alloca i32, align 4
|
|
+ store i32 %x, i32* %x.addr, align 4
|
|
+ store i32 %y, i32* %y.addr, align 4
|
|
+ %tmp = load i32, i32* %x.addr, align 4, !dbg !8
|
|
+ %tmp1 = load i32, i32* %y.addr, align 4, !dbg !8
|
|
+ %add = add nsw i32 %tmp, %tmp1, !dbg !8
|
|
+ ret i32 %add, !dbg !8
|
|
+}
|
|
+
|
|
+define i32 @main() #0 !dbg !9 {
|
|
+entry:
|
|
+ %0 = tail call i32 @callee(i32 5)
|
|
+ %retval = alloca i32, align 4
|
|
+ %s = alloca i32, align 4
|
|
+ %i = alloca i32, align 4
|
|
+ store i32 0, i32* %retval
|
|
+ store i32 0, i32* %i, align 4, !dbg !10
|
|
+ br label %while.cond, !dbg !11
|
|
+
|
|
+while.cond: ; preds = %if.end, %entry
|
|
+ %tmp = load i32, i32* %i, align 4, !dbg !12
|
|
+ %inc = add nsw i32 %tmp, 1, !dbg !12
|
|
+ store i32 %inc, i32* %i, align 4, !dbg !12
|
|
+ %cmp = icmp slt i32 %tmp, 400000000, !dbg !12
|
|
+ br i1 %cmp, label %while.body, label %while.end, !dbg !12
|
|
+
|
|
+while.body: ; preds = %while.cond
|
|
+ %tmp1 = load i32, i32* %i, align 4, !dbg !14
|
|
+ %cmp1 = icmp ne i32 %tmp1, 100, !dbg !14
|
|
+ br i1 %cmp1, label %if.then, label %if.else, !dbg !14
|
|
+
|
|
+if.then: ; preds = %while.body
|
|
+ %tmp2 = load i32, i32* %i, align 4, !dbg !16
|
|
+ %tmp3 = load i32, i32* %s, align 4, !dbg !16
|
|
+ %call = call i32 @_Z3sumii(i32 %tmp2, i32 %tmp3), !dbg !16
|
|
+; INLINE-NOT: call i32 @_Z3sumii
|
|
+; NOTINLINE: call i32 @_Z3sumii
|
|
+ store i32 %call, i32* %s, align 4, !dbg !16
|
|
+ br label %if.end, !dbg !16
|
|
+
|
|
+if.else: ; preds = %while.body
|
|
+ store i32 30, i32* %s, align 4, !dbg !18
|
|
+ br label %if.end
|
|
+
|
|
+if.end: ; preds = %if.else, %if.then
|
|
+ br label %while.cond, !dbg !20
|
|
+
|
|
+while.end: ; preds = %while.cond
|
|
+ %tmp4 = load i32, i32* %s, align 4, !dbg !22
|
|
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %tmp4), !dbg !22
|
|
+ ret i32 0, !dbg !23
|
|
+}
|
|
+
|
|
+define i32 @callee(i32 %a) #0 {
|
|
+entry:
|
|
+ %add = add nsw i32 %a, 4
|
|
+ ret i32 %add
|
|
+}
|
|
+
|
|
+declare i32 @printf(i8*, ...)
|
|
+
|
|
+!llvm.dbg.cu = !{!0}
|
|
+!llvm.module.flags = !{!3, !4}
|
|
+!llvm.ident = !{!5}
|
|
+
|
|
+attributes #0 = {"use-sample-profile"}
|
|
+
|
|
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
|
|
+!1 = !DIFile(filename: "calls.cc", directory: ".")
|
|
+!2 = !{}
|
|
+!3 = !{i32 2, !"Dwarf Version", i32 4}
|
|
+!4 = !{i32 1, !"Debug Info Version", i32 3}
|
|
+!5 = !{!"clang version 3.5 "}
|
|
+!6 = distinct !DISubprogram(name: "sum", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
|
|
+!7 = !DISubroutineType(types: !2)
|
|
+!8 = !DILocation(line: 4, scope: !6)
|
|
+!9 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
|
|
+!10 = !DILocation(line: 8, scope: !9)
|
|
+!11 = !DILocation(line: 9, scope: !9)
|
|
+!12 = !DILocation(line: 9, scope: !13)
|
|
+!13 = !DILexicalBlockFile(scope: !9, file: !1, discriminator: 2)
|
|
+!14 = !DILocation(line: 10, scope: !15)
|
|
+!15 = distinct !DILexicalBlock(scope: !9, file: !1, line: 10)
|
|
+!16 = !DILocation(line: 10, scope: !17)
|
|
+!17 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 2)
|
|
+!18 = !DILocation(line: 10, scope: !19)
|
|
+!19 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 4)
|
|
+!20 = !DILocation(line: 10, scope: !21)
|
|
+!21 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 6)
|
|
+!22 = !DILocation(line: 11, scope: !9)
|
|
+!23 = !DILocation(line: 12, scope: !9)
|
|
+
|
|
+; Note that hotness of main is unknown.
|
|
+; NON-FILTER-DAG: Function: _Z3sumii
|
|
+; NON-FILTER-DAG: Function: main
|
|
+
|
|
+; COLD-PROFILE-EXCLUDE-COLD-NOT: Function: _Z3sumii
|
|
+; COLD-PROFILE-EXCLUDE-COLD-DAG: Function: main
|
|
+
|
|
+; COLD-PROFILE-HOT-ONLY-NOT: Function: _Z3sumii
|
|
+; COLD-PROFILE-HOT-ONLY-NOT: Function: main
|
|
+
|
|
+; HOT-PROFILE-EXCLUDE-COLD-DAG: Function: _Z3sumii
|
|
+; HOT-PROFILE-EXCLUDE-COLD-DAG: Function: main
|
|
+
|
|
+; HOT-PROFILE-HOT-ONLY-NOT: Function: main
|
|
+; HOT-PROFILE-HOT-ONLY-DAG: Function: _Z3sumii
|
|
diff --git a/llvm/test/AutoTuning/PassInvocation/Inputs/pass_invocation.yaml b/llvm/test/AutoTuning/PassInvocation/Inputs/pass_invocation.yaml
|
|
new file mode 100644
|
|
index 000000000000..00459fe9e23c
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/PassInvocation/Inputs/pass_invocation.yaml
|
|
@@ -0,0 +1,10 @@
|
|
+--- !AutoTuning
|
|
+Pass: loop-unroll
|
|
+Name: for.body
|
|
+Function: find
|
|
+CodeRegionType: loop
|
|
+CodeRegionHash: 145363925920731080
|
|
+Invocation: [number]
|
|
+Args:
|
|
+ - UnrollCount: 2
|
|
+...
|
|
diff --git a/llvm/test/AutoTuning/PassInvocation/pass_invocation_read.ll b/llvm/test/AutoTuning/PassInvocation/pass_invocation_read.ll
|
|
new file mode 100644
|
|
index 000000000000..6e41507af8b8
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/PassInvocation/pass_invocation_read.ll
|
|
@@ -0,0 +1,64 @@
|
|
+; RUN: rm %t.config.yaml -rf
|
|
+; RUN: sed 's#\[number\]#0#g;' %S/Inputs/pass_invocation.yaml > %t.config.yaml
|
|
+; RUN: opt %s -S -O3 -print-after=loop-unroll-full -print-after=loop-unroll \
|
|
+; RUN: -auto-tuning-code-region-matching-hash=false \
|
|
+; RUN: -auto-tuning-input=%t.config.yaml --disable-output 2>&1 | \
|
|
+; RUN: FileCheck %s --check-prefix=INVOCATION-0
|
|
+
|
|
+; RUN: rm %t.config.yaml -rf
|
|
+; RUN: sed 's#\[number\]#1#g;' %S/Inputs/pass_invocation.yaml > %t.config.yaml
|
|
+; RUN: opt %s -S -O3 -print-after=loop-unroll-full -print-after=loop-unroll \
|
|
+; RUN: -auto-tuning-code-region-matching-hash=false \
|
|
+; RUN: -auto-tuning-input=%t.config.yaml --disable-output 2>&1 | \
|
|
+; RUN: FileCheck %s --check-prefix=INVOCATION-1
|
|
+
|
|
+; Function Attrs: norecurse nounwind readonly uwtable
|
|
+define dso_local i64 @find(i64* nocapture readonly %a, i64 %n, i64 %Value) {
|
|
+entry:
|
|
+ %cmp6.not = icmp eq i64 %n, 0
|
|
+ br i1 %cmp6.not, label %for.end, label %for.body
|
|
+
|
|
+for.body: ; preds = %entry, %for.inc
|
|
+ %i.07 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
|
|
+ %arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.07
|
|
+ %0 = load i64, i64* %arrayidx, align 8
|
|
+ %cmp1 = icmp eq i64 %0, %Value
|
|
+ br i1 %cmp1, label %for.end, label %for.inc
|
|
+
|
|
+for.inc: ; preds = %for.body
|
|
+ %inc = add nuw i64 %i.07, 1
|
|
+ %cmp = icmp ult i64 %inc, %n
|
|
+ br i1 %cmp, label %for.body, label %for.end
|
|
+
|
|
+for.end: ; preds = %for.inc, %for.body, %entry
|
|
+ %i.0.lcssa = phi i64 [ 0, %entry ], [ %i.07, %for.body ], [ %inc, %for.inc ]
|
|
+ ret i64 %i.0.lcssa
|
|
+}
|
|
+
|
|
+; INVOCATION-0: *** IR Dump After {{.*}}Unroll
|
|
+; INVOCATION-0: for.body.preheader: ; preds = %entry
|
|
+; INVOCATION-0: for.body: ; preds = %for.inc.1, %for.body.preheader
|
|
+; INVOCATION-0: for.inc: ; preds = %for.body
|
|
+; INVOCATION-0: for.body.1: ; preds = %for.inc
|
|
+; INVOCATION-0: for.inc.1: ; preds = %for.body.1
|
|
+; INVOCATION-0: for.end.loopexit: ; preds = %for.inc.1, %for.body.1, %for.body, %for.inc
|
|
+; INVOCATION-0: *** IR Dump After {{.*}}Unroll
|
|
+; INVOCATION-0: for.body.preheader: ; preds = %entry
|
|
+; INVOCATION-0: for.body: ; preds = %for.body.preheader, %for.inc.1
|
|
+; INVOCATION-0: for.inc: ; preds = %for.body
|
|
+; INVOCATION-0: for.body.1: ; preds = %for.inc
|
|
+; INVOCATION-0: for.inc.1: ; preds = %for.body.1
|
|
+; INVOCATION-0: for.end.loopexit: ; preds = %for.inc.1, %for.body.1, %for.body, %for.inc
|
|
+
|
|
+; INVOCATION-1: *** IR Dump After {{.*}}Unroll
|
|
+; INVOCATION-1: for.body.preheader: ; preds = %entry
|
|
+; INVOCATION-1: for.body: ; preds = %for.body.preheader, %for.inc
|
|
+; INVOCATION-1: for.inc: ; preds = %for.body
|
|
+; INVOCATION-1: for.end.loopexit: ; preds = %for.body, %for.inc
|
|
+; INVOCATION-1: *** IR Dump After {{.*}}Unroll
|
|
+; INVOCATION-1: for.body.preheader: ; preds = %entry
|
|
+; INVOCATION-1: for.body: ; preds = %for.inc.1, %for.body.preheader
|
|
+; INVOCATION-1: for.inc: ; preds = %for.body
|
|
+; INVOCATION-1: for.body.1: ; preds = %for.inc
|
|
+; INVOCATION-1: for.inc.1: ; preds = %for.body.1
|
|
+; INVOCATION-1: for.end.loopexit: ; preds = %for.inc.1, %for.body.1, %for.body, %for.inc
|
|
diff --git a/llvm/test/AutoTuning/PassInvocation/pass_invocation_write.ll b/llvm/test/AutoTuning/PassInvocation/pass_invocation_write.ll
|
|
new file mode 100644
|
|
index 000000000000..81097fdd5afa
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/PassInvocation/pass_invocation_write.ll
|
|
@@ -0,0 +1,67 @@
|
|
+; REQUIRES: aarch64-registered-target
|
|
+; RUN: rm %t.pass_invocation -rf
|
|
+; RUN: opt %s -S -mtriple=aarch64-- -mcpu=tsv110 -auto-tuning-type-filter=Loop \
|
|
+; RUN: -O3 -auto-tuning-opp=%t.pass_invocation --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.pass_invocation/pass_invocation_write.ll.yaml
|
|
+
|
|
+; Function Attrs: nounwind uwtable
|
|
+define dso_local void @sum(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32 %n) {
|
|
+entry:
|
|
+ br label %for.cond
|
|
+
|
|
+for.cond: ; preds = %for.body, %entry
|
|
+ %sum.0 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
|
|
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
+ %cmp = icmp slt i32 %i.0, %n
|
|
+ br i1 %cmp, label %for.body, label %for.end
|
|
+
|
|
+for.body: ; preds = %for.cond
|
|
+ %idxprom = sext i32 %i.0 to i64
|
|
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
|
|
+ %0 = load i32, i32* %arrayidx, align 4
|
|
+ %idxprom1 = sext i32 %i.0 to i64
|
|
+ %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %idxprom1
|
|
+ %1 = load i32, i32* %arrayidx2, align 4
|
|
+ %mul = mul nsw i32 %0, %1
|
|
+ %conv = sitofp i32 %mul to float
|
|
+ %add = fadd contract float %sum.0, %conv
|
|
+ %inc = add nsw i32 %i.0, 1
|
|
+ br label %for.cond
|
|
+
|
|
+for.end: ; preds = %for.cond
|
|
+ %conv3 = fptosi float %sum.0 to i32
|
|
+ %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 0
|
|
+ store i32 %conv3, i32* %arrayidx4, align 4
|
|
+ ret void
|
|
+}
|
|
+
|
|
+; CHECK: --- !AutoTuning
|
|
+; CHECK-NEXT: Pass: loop-unroll
|
|
+; CHECK-NEXT: Name: for.body
|
|
+; CHECK-NEXT: Function: sum
|
|
+; CHECK-NEXT: CodeRegionType: loop
|
|
+; CHECK-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; CHECK-NEXT: DynamicConfigs: { UnrollCount: [ 0, 1, 8, 4, 2 ] }
|
|
+; CHECK-NEXT: BaselineConfig: { UnrollCount: '0' }
|
|
+; CHECK-NEXT: Invocation: 0
|
|
+; CHECK-NEXT: ...
|
|
+; CHECK-NEXT: --- !AutoTuning
|
|
+; CHECK-NEXT: Pass: loop-vectorize
|
|
+; CHECK-NEXT: Name: for.body
|
|
+; CHECK-NEXT: Function: sum
|
|
+; CHECK-NEXT: CodeRegionType: loop
|
|
+; CHECK-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; CHECK-NEXT: DynamicConfigs: { VectorizationInterleave: [ 1, 2, 4 ] }
|
|
+; CHECK-NEXT: BaselineConfig: { VectorizationInterleave: '2' }
|
|
+; CHECK-NEXT: Invocation: 0
|
|
+; CHECK-NEXT: ...
|
|
+; CHECK-NEXT: --- !AutoTuning
|
|
+; CHECK-NEXT: Pass: loop-unroll
|
|
+; CHECK-NEXT: Name: vector.body
|
|
+; CHECK-NEXT: Function: sum
|
|
+; CHECK-NEXT: CodeRegionType: loop
|
|
+; CHECK-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; CHECK-NEXT: DynamicConfigs: { UnrollCount: [ 0, 1, 8, 4, 2 ] }
|
|
+; CHECK-NEXT: BaselineConfig: { UnrollCount: '0' }
|
|
+; CHECK-NEXT: Invocation: 1
|
|
+; CHECK-NEXT: ...
|
|
diff --git a/llvm/test/AutoTuning/PhaseOrdering/Inputs/template.yaml b/llvm/test/AutoTuning/PhaseOrdering/Inputs/template.yaml
|
|
new file mode 100644
|
|
index 000000000000..065d3cb85b72
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/PhaseOrdering/Inputs/template.yaml
|
|
@@ -0,0 +1,8 @@
|
|
+--- !AutoTuning
|
|
+Pass: all
|
|
+Name: [filename]
|
|
+Function: none
|
|
+CodeRegionType: other
|
|
+Args:
|
|
+ - OptPass: [pass]
|
|
+...
|
|
diff --git a/llvm/test/AutoTuning/PhaseOrdering/pass-order.ll b/llvm/test/AutoTuning/PhaseOrdering/pass-order.ll
|
|
new file mode 100644
|
|
index 000000000000..9d0210b3fdde
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/PhaseOrdering/pass-order.ll
|
|
@@ -0,0 +1,65 @@
|
|
+; Run different orders of opt passes and verify that the order is respected
|
|
+; -------------------------------------------------------------------------
|
|
+; Check to see if the order is correct, trivial case (autotuning disabled)
|
|
+; RUN: opt %s -debug-pass-manager -S 2>&1 | FileCheck %s -check-prefix=DISABLE
|
|
+
|
|
+; One pass:
|
|
+; RUN: rm %t.onepass_order.yaml -rf
|
|
+; RUN: sed 's#\[filename\]#%s#g; s#\[pass\]#\[loop-extract\]#g' \
|
|
+; RUN: %S/Inputs/template.yaml > %t.onepass_order.yaml
|
|
+; RUN: opt %s -debug-pass-manager -S -auto-tuning-input=%t.onepass_order.yaml \
|
|
+; RUN: 2>&1 | FileCheck %s -check-prefix=ONEPASS
|
|
+
|
|
+; Two passes (A->B):
|
|
+; RUN: rm %t.twopass_order.yaml -rf
|
|
+; RUN: sed 's#\[filename\]#%s#g; s#\[pass\]#\[loop-extract,strip\]#g' \
|
|
+; RUN: %S/Inputs/template.yaml > %t.twopass_order.yaml
|
|
+; RUN: opt %s -debug-pass-manager -S -auto-tuning-input=%t.twopass_order.yaml \
|
|
+; RUN: 2>&1 | FileCheck %s -check-prefix=TWOPASS_AB
|
|
+
|
|
+; Two passes (B->A):
|
|
+; RUN: rm %t.twopass_ba_order.yaml -rf
|
|
+; RUN: sed 's#\[filename\]#%s#g; s#\[pass\]#\[strip, loop-extract\]#g' \
|
|
+; RUN: %S/Inputs/template.yaml > %t.twopass_ba_order.yaml
|
|
+; RUN: opt %s -debug-pass-manager -S -auto-tuning-input=%t.twopass_ba_order.yaml \
|
|
+; RUN: 2>&1 | FileCheck %s -check-prefix=TWOPASS_BA
|
|
+
|
|
+; candidate IR that can change based on many optimizations
|
|
+; for now just use the IR in the LoopUnroll test file
|
|
+define void @foo(i32* nocapture %a) {
|
|
+entry:
|
|
+ br label %for.body
|
|
+
|
|
+for.body: ; preds = %for.body, %entry
|
|
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
|
+ %0 = load i32, i32* %arrayidx, align 4
|
|
+ %inc = add nsw i32 %0, 1
|
|
+ store i32 %inc, i32* %arrayidx, align 4
|
|
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
|
|
+ br i1 %exitcond, label %for.end, label %for.body
|
|
+
|
|
+for.end: ; preds = %for.body
|
|
+ ret void
|
|
+}
|
|
+
|
|
+; DISABLE-NOT: Running pass: LoopExtractorPass on [module]
|
|
+; DISABLE-NOT: Running pass: StripSymbolsPass on [module]
|
|
+; DISABLE: Running pass: VerifierPass on [module]
|
|
+; DISABLE: Running pass: PrintModulePass on [module]
|
|
+
|
|
+; ONEPASS-NOT: Running pass: StripSymbolsPass on [module]
|
|
+; ONEPASS: Running pass: LoopExtractorPass on [module]
|
|
+; ONEPASS: Running pass: VerifierPass on [module]
|
|
+; ONEPASS: Running pass: PrintModulePass on [module]
|
|
+
|
|
+; TWOPASS_AB: Running pass: LoopExtractorPass on [module]
|
|
+; TWOPASS_AB: Running pass: StripSymbolsPass on [module]
|
|
+; TWOPASS_AB: Running pass: VerifierPass on [module]
|
|
+; TWOPASS_AB: Running pass: PrintModulePass on [module]
|
|
+
|
|
+; TWOPASS_BA: Running pass: StripSymbolsPass on [module]
|
|
+; TWOPASS_BA: Running pass: LoopExtractorPass on [module]
|
|
+; TWOPASS_BA: Running pass: VerifierPass on [module]
|
|
+; TWOPASS_BA: Running pass: PrintModulePass on [module]
|
|
diff --git a/llvm/test/AutoTuning/SwitchLowering/switch-opp.ll b/llvm/test/AutoTuning/SwitchLowering/switch-opp.ll
|
|
new file mode 100644
|
|
index 000000000000..679549180bf4
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/SwitchLowering/switch-opp.ll
|
|
@@ -0,0 +1,47 @@
|
|
+; RUN: rm %t.switch_opp -rf
|
|
+; RUN: llc %s -auto-tuning-opp=%t.switch_opp -auto-tuning-type-filter=Switch -o /dev/null
|
|
+; RUN: FileCheck %s --input-file %t.switch_opp/switch-opp.ll.yaml
|
|
+
|
|
+; UNSUPPORTED: windows
|
|
+
|
|
+define i32 @test(i32 %arg) #0 {
|
|
+entry:
|
|
+ switch i32 %arg, label %bb5 [
|
|
+ i32 1, label %bb1
|
|
+ i32 2, label %bb2
|
|
+ i32 3, label %bb3
|
|
+ i32 4, label %bb4
|
|
+ ]
|
|
+
|
|
+bb1: ; pred = %entry
|
|
+ br label %bb2
|
|
+
|
|
+bb2: ; pred = %entry, %bb1
|
|
+ %res.0 = phi i32 [ 1, %entry ], [ 2, %bb1 ]
|
|
+ br label %bb3
|
|
+
|
|
+bb3: ; pred = %entry, %bb2
|
|
+ %res.1 = phi i32 [ 0, %entry ], [ %res.0, %bb2 ]
|
|
+ %phitmp = add nsw i32 %res.1, 2
|
|
+ br label %bb4
|
|
+
|
|
+bb4: ; pred = %entry, %bb3
|
|
+ %res.2 = phi i32 [ 1, %entry ], [ %phitmp, %bb3 ]
|
|
+ br label %bb5
|
|
+
|
|
+bb5: ; pred = %entry, %bb4
|
|
+ %res.3 = phi i32 [ 0, %entry ], [ %res.2, %bb4 ]
|
|
+ %0 = add nsw i32 %res.3, 1
|
|
+ ret i32 %0
|
|
+}
|
|
+
|
|
+; CHECK: --- !AutoTuning
|
|
+; CHECK-NEXT: Pass: switch-lowering
|
|
+; CHECK-NEXT: Name: 'i32 %arg'
|
|
+; CHECK-NEXT: Function: test
|
|
+; CHECK-NEXT: CodeRegionType: switch
|
|
+; CHECK-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; CHECK-NEXT: DynamicConfigs: { }
|
|
+; CHECK-NEXT: BaselineConfig: { }
|
|
+; CHECK-NEXT: Invocation: 0
|
|
+; CHECK-NEXT: ...
|
|
diff --git a/llvm/test/AutoTuning/lit.local.cfg b/llvm/test/AutoTuning/lit.local.cfg
|
|
new file mode 100644
|
|
index 000000000000..13b4927257ab
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/lit.local.cfg
|
|
@@ -0,0 +1,2 @@
|
|
+if not config.enable_enable_autotuner:
|
|
+ config.unsupported = True
|
|
diff --git a/llvm/test/AutoTuning/opt-opp.ll b/llvm/test/AutoTuning/opt-opp.ll
|
|
new file mode 100644
|
|
index 000000000000..97f7b1d121cc
|
|
--- /dev/null
|
|
+++ b/llvm/test/AutoTuning/opt-opp.ll
|
|
@@ -0,0 +1,315 @@
|
|
+; REQUIRES: asserts
|
|
+; REQUIRES: x86-registered-target
|
|
+
|
|
+; RUN: rm %t.default_opp -rf
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-opp=%t.default_opp --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.default_opp/opt-opp.ll.yaml -check-prefix=DEFAULT
|
|
+
|
|
+; RUN: rm %t.module_opp -rf
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-opp=%t.module_opp -auto-tuning-type-filter=Other --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.module_opp/opt-opp.ll.yaml -check-prefix=OTHER
|
|
+
|
|
+; RUN: rm %t.loop_opp -rf
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-opp=%t.loop_opp -auto-tuning-type-filter=Loop --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.loop_opp/opt-opp.ll.yaml -check-prefix=LOOP
|
|
+
|
|
+; RUN: rm %t.callsite_opp -rf
|
|
+; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \
|
|
+; RUN: -auto-tuning-opp=%t.callsite_opp -auto-tuning-type-filter=CallSite --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.callsite_opp/opt-opp.ll.yaml -check-prefix=CALLSITE
|
|
+
|
|
+; RUN: rm %t.callsite_loop_opp -rf
|
|
+; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \
|
|
+; RUN: -auto-tuning-opp=%t.callsite_loop_opp -auto-tuning-type-filter=CallSite,Loop --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.callsite_loop_opp/opt-opp.ll.yaml -check-prefix=CALLSITE-LOOP1
|
|
+; RUN: FileCheck %s --input-file %t.callsite_loop_opp/opt-opp.ll.yaml -check-prefix=CALLSITE-LOOP2
|
|
+
|
|
+; RUN: rm %t.llvm_param_opp -rf
|
|
+; RUN: opt %s -S -auto-tuning-opp=%t.llvm_param_opp \
|
|
+; RUN: -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \
|
|
+; RUN: -auto-tuning-type-filter=LLVMParam --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.llvm_param_opp/opt-opp.ll.yaml -check-prefix=LLVMPARAM
|
|
+
|
|
+; RUN: rm %t.program_param_opp -rf
|
|
+; RUN: opt %s -S -passes='function(require<opt-remark-emit>,loop-unroll),cgscc(inline)' \
|
|
+; RUN: -auto-tuning-opp=%t.program_param_opp -auto-tuning-type-filter=ProgramParam --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.program_param_opp/opt-opp.ll.yaml -check-prefix=ProgramPARAM
|
|
+
|
|
+; Test if opp file with the same name exists already
|
|
+; RUN: rm %t.default_opp -rf
|
|
+; RUN: mkdir %t.default_opp && touch %t.default_opp/opt-opp.ll.yaml
|
|
+; RUN: opt %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -auto-tuning-opp=%t.default_opp --disable-output
|
|
+; RUN: FileCheck %s --input-file %t.default_opp/opt-opp.ll.yaml.1 -check-prefix=DEFAULT
|
|
+
|
|
+; Test that the loop code region is included if its size >= the threshold.
|
|
+; RUN: rm %t.loop.opp -rf
|
|
+; RUN: opt %s -S -auto-tuning-opp=%t.loop.opp -auto-tuning-size-threshold=13 \
|
|
+; RUN: -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -debug-only=autotuning --disable-output 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=SIZE-LOOP
|
|
+; RUN: FileCheck %s --input-file %t.loop.opp/opt-opp.ll.yaml -check-prefix=SIZE-LOOP-OPP
|
|
+
|
|
+; Test that the loop code region is excluded if its size < the threshold.
|
|
+; RUN: rm %t.loop.opp -rf
|
|
+; RUN: opt %s -S -auto-tuning-opp=%t.loop.opp -auto-tuning-size-threshold=14 \
|
|
+; RUN: -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
|
|
+; RUN: -debug-only=autotuning --disable-output 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=SIZE-LOOP-FILTERED
|
|
+; RUN: FileCheck %s --input-file %t.loop.opp/opt-opp.ll.yaml -check-prefix=SIZE-LOOP-OPP-FILTERED
|
|
+
|
|
+; Test that the callsite code region is included if its size >= the threshold.
|
|
+; RUN: rm %t.callsite.opp -rf
|
|
+; RUN: opt %s -S -passes=inline -auto-tuning-opp=%t.callsite.opp --disable-output \
|
|
+; RUN: -auto-tuning-size-threshold=2 -debug-only=autotuning 2>&1 | \
|
|
+; RUN: FileCheck %s -check-prefix=SIZE-CALLSITE
|
|
+; RUN: FileCheck %s --input-file %t.callsite.opp/opt-opp.ll.yaml -check-prefix=SIZE-CALLSITE-OPP
|
|
+
|
|
+; Test that the callsite code region is excluded if its size < the threshold.
|
|
+; RUN: rm %t.callsite.opp -rf
|
|
+; RUN: opt %s -S -passes=inline -auto-tuning-opp=%t.callsite.opp \
|
|
+; RUN: -auto-tuning-size-threshold=24 --disable-output -debug-only=autotuning \
|
|
+; RUN: 2>&1 | FileCheck %s -check-prefix=SIZE-CALLSITE-FILTERED
|
|
+; RUN: FileCheck %s --input-file %t.callsite.opp/opt-opp.ll.yaml -check-prefix=SIZE-CALLSITE-OPP-FILTERED
|
|
+
|
|
+; RUN: rm -rf %t.other
|
|
+; RUN: opt %s -S -O3 -auto-tuning-opp=%t.other -auto-tuning-type-filter=Other
|
|
+; RUN: grep "Name: \+'%S/opt-opp.ll'" %t.other/opt-opp.ll.yaml
|
|
+; RUN: not grep "Name: \+opt-opp.ll" %t.other/opt-opp.ll.yaml
|
|
+
|
|
+; RUN: rm -rf %t.other
|
|
+; RUN: opt %s -S -O3 -auto-tuning-opp=%t.other -auto-tuning-type-filter=Other \
|
|
+; RUN: -autotuning-project-dir=%S/
|
|
+; RUN: not grep "Name: \+'%S/opt-opp.ll'" %t.other/opt-opp.ll.yaml
|
|
+; RUN: grep "Name: \+opt-opp.ll" %t.other/opt-opp.ll.yaml
|
|
+
|
|
+; UNSUPPORTED: windows
|
|
+
|
|
+; ModuleID = 'loop-opp.c'
|
|
+source_filename = "loop-opp.c"
|
|
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
+target triple = "x86_64-unknown-linux-gnu"
|
|
+
|
|
+; Function Attrs: noinline nounwind uwtable
|
|
+define i32 @test(i32* %n) #0 !dbg !6 {
|
|
+entry:
|
|
+ call void @callee(i32 6), !dbg !18
|
|
+ %n.addr = alloca i32*, align 8
|
|
+ %b = alloca i32, align 4
|
|
+ %i = alloca i32, align 4
|
|
+ store i32* %n, i32** %n.addr, align 8
|
|
+ call void @llvm.dbg.declare(metadata i32** %n.addr, metadata !11, metadata !12), !dbg !13
|
|
+ call void @llvm.dbg.declare(metadata i32* %b, metadata !14, metadata !12), !dbg !15
|
|
+ store i32 0, i32* %b, align 4, !dbg !15
|
|
+ call void @llvm.dbg.declare(metadata i32* %i, metadata !16, metadata !12), !dbg !18
|
|
+ store i32 0, i32* %i, align 4, !dbg !18
|
|
+ br label %for.cond, !dbg !19
|
|
+
|
|
+for.cond: ; preds = %for.inc, %entry
|
|
+ %0 = load i32, i32* %i, align 4, !dbg !20
|
|
+ %1 = load i32*, i32** %n.addr, align 8, !dbg !23
|
|
+ %2 = load i32, i32* %1, align 4, !dbg !24
|
|
+ %cmp = icmp slt i32 %0, %2, !dbg !25
|
|
+ br i1 %cmp, label %for.body, label %for.end, !dbg !26
|
|
+
|
|
+for.body: ; preds = %for.cond
|
|
+ %3 = load i32, i32* %b, align 4, !dbg !28
|
|
+ %add = add nsw i32 %3, 1, !dbg !30
|
|
+ store i32 %add, i32* %b, align 4, !dbg !31
|
|
+ br label %for.inc, !dbg !32
|
|
+
|
|
+for.inc: ; preds = %for.body
|
|
+ %4 = load i32, i32* %i, align 4, !dbg !33
|
|
+ %inc = add nsw i32 %4, 1, !dbg !33
|
|
+ store i32 %inc, i32* %i, align 4, !dbg !33
|
|
+ br label %for.cond, !dbg !35, !llvm.loop !36
|
|
+
|
|
+for.end: ; preds = %for.cond
|
|
+ %5 = load i32, i32* %b, align 4, !dbg !39
|
|
+ ret i32 %5, !dbg !40
|
|
+}
|
|
+
|
|
+@a = global i32 4
|
|
+define void @callee(i32 %a) #2 {
|
|
+entry:
|
|
+ %a1 = load volatile i32, i32* @a
|
|
+ %x1 = add i32 %a1, %a1
|
|
+ %add = add i32 %x1, %a
|
|
+ ret void
|
|
+}
|
|
+
|
|
+; Function Attrs: nounwind readnone
|
|
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
|
|
+
|
|
+attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
+attributes #1 = { nounwind readnone }
|
|
+attributes #2 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
+
|
|
+!llvm.dbg.cu = !{!0}
|
|
+!llvm.module.flags = !{!3, !4}
|
|
+!llvm.ident = !{!5}
|
|
+
|
|
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "" ,isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
|
|
+!1 = !DIFile(filename: "loop-opp.c", directory: "")
|
|
+!2 = !{}
|
|
+!3 = !{i32 2, !"Dwarf Version", i32 4}
|
|
+!4 = !{i32 2, !"Debug Info Version", i32 3}
|
|
+!5 = !{!""}
|
|
+!6 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
|
|
+!7 = !DISubroutineType(types: !8)
|
|
+!8 = !{!9, !10}
|
|
+!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
|
|
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64)
|
|
+!11 = !DILocalVariable(name: "n", arg: 1, scope: !6, file: !1, line: 1, type: !10)
|
|
+!12 = !DIExpression()
|
|
+!13 = !DILocation(line: 1, column: 20, scope: !6)
|
|
+!14 = !DILocalVariable(name: "b", scope: !6, file: !1, line: 3, type: !9)
|
|
+!15 = !DILocation(line: 3, column: 9, scope: !6)
|
|
+!16 = !DILocalVariable(name: "i", scope: !17, file: !1, line: 4, type: !9)
|
|
+!17 = distinct !DILexicalBlock(scope: !6, file: !1, line: 4, column: 5)
|
|
+!18 = !DILocation(line: 4, column: 14, scope: !17)
|
|
+!19 = !DILocation(line: 4, column: 10, scope: !17)
|
|
+!20 = !DILocation(line: 4, column: 20, scope: !21)
|
|
+!21 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 1)
|
|
+!22 = distinct !DILexicalBlock(scope: !17, file: !1, line: 4, column: 5)
|
|
+!23 = !DILocation(line: 4, column: 25, scope: !21)
|
|
+!24 = !DILocation(line: 4, column: 24, scope: !21)
|
|
+!25 = !DILocation(line: 4, column: 22, scope: !21)
|
|
+!26 = !DILocation(line: 4, column: 5, scope: !27)
|
|
+!27 = !DILexicalBlockFile(scope: !17, file: !1, discriminator: 1)
|
|
+!28 = !DILocation(line: 6, column: 11, scope: !29)
|
|
+!29 = distinct !DILexicalBlock(scope: !22, file: !1, line: 5, column: 5)
|
|
+!30 = !DILocation(line: 6, column: 12, scope: !29)
|
|
+!31 = !DILocation(line: 6, column: 9, scope: !29)
|
|
+!32 = !DILocation(line: 7, column: 5, scope: !29)
|
|
+!33 = !DILocation(line: 4, column: 28, scope: !34)
|
|
+!34 = !DILexicalBlockFile(scope: !22, file: !1, discriminator: 2)
|
|
+!35 = !DILocation(line: 4, column: 5, scope: !34)
|
|
+!36 = distinct !{!36, !37, !38}
|
|
+!37 = !DILocation(line: 4, column: 5, scope: !17)
|
|
+!38 = !DILocation(line: 7, column: 5, scope: !17)
|
|
+!39 = !DILocation(line: 8, column: 12, scope: !6)
|
|
+!40 = !DILocation(line: 8, column: 5, scope: !6)
|
|
+
|
|
+; DEFAULT: --- !AutoTuning
|
|
+; DEFAULT-NEXT: Pass: loop-unroll
|
|
+; DEFAULT-NEXT: Name: for.cond
|
|
+; DEFAULT-NEXT: DebugLoc: { File: loop-opp.c, Line: 4, Column: 5 }
|
|
+; DEFAULT-NEXT: Function: test
|
|
+; DEFAULT-NEXT: CodeRegionType: loop
|
|
+; DEFAULT-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; DEFAULT-NEXT: DynamicConfigs: { UnrollCount: [ {{[0-9]+(, [0-9]+)*}} ] }
|
|
+; DEFAULT-NEXT: BaselineConfig: { UnrollCount: '{{[0-9]+}}' }
|
|
+; DEFAULT-NEXT: Invocation: 0
|
|
+; DEFAULT-NEXT: ...
|
|
+; DEFAULT-NEXT: --- !AutoTuning
|
|
+; DEFAULT-NEXT: Pass: all
|
|
+; DEFAULT-NEXT: Name:
|
|
+; DEFAULT-SAME: opt-opp.ll
|
|
+; DEFAULT-NEXT: Function: none
|
|
+; DEFAULT-NEXT: CodeRegionType: other
|
|
+; COM: Module level hashes can differ based on the filepath so we check a regex
|
|
+; DEFAULT-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; DEFAULT-NEXT: DynamicConfigs: { }
|
|
+; DEFAULT-NEXT: BaselineConfig: { }
|
|
+; DEFAULT-NEXT: Invocation: 0
|
|
+; DEFAULT-NEXT: ...
|
|
+
|
|
+; LOOP: --- !AutoTuning
|
|
+; LOOP-NEXT: Pass: loop-unroll
|
|
+; LOOP-NEXT: Name: for.cond
|
|
+; LOOP-NEXT: DebugLoc: { File: loop-opp.c, Line: 4, Column: 5 }
|
|
+; LOOP-NEXT: Function: test
|
|
+; LOOP-NEXT: CodeRegionType: loop
|
|
+; LOOP-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; LOOP-NEXT: DynamicConfigs: { UnrollCount: [ {{[0-9]+(, [0-9]+)*}} ] }
|
|
+; LOOP-NEXT: BaselineConfig: { UnrollCount: '{{[0-9]+}}' }
|
|
+; LOOP-NEXT: Invocation: 0
|
|
+; LOOP-NEXT: ...
|
|
+
|
|
+; CALLSITE: --- !AutoTuning
|
|
+; CALLSITE-NEXT: Pass: inline
|
|
+; CALLSITE-NEXT: Name: callee
|
|
+; CALLSITE-NEXT: DebugLoc: { File: loop-opp.c, Line: 4, Column: 14 }
|
|
+; CALLSITE-NEXT: Function: test
|
|
+; CALLSITE-NEXT: CodeRegionType: callsite
|
|
+; CALLSITE-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; CALLSITE-NEXT: DynamicConfigs: { ForceInline: [ 0, 1 ] }
|
|
+; CALLSITE-NEXT: BaselineConfig: { ForceInline: '1' }
|
|
+; CALLSITE-NEXT: Invocation: 0
|
|
+; CALLSITE-NEXT: ...
|
|
+
|
|
+; CALLSITE-LOOP1: CodeRegionType: loop
|
|
+; CALLSITE-LOOP1-NOT: CodeRegionType: other
|
|
+; CALLSITE-LOOP2: CodeRegionType: callsite
|
|
+; CALLSITE-LOOP2-NOT: CodeRegionType: other
|
|
+
|
|
+; OTHER: --- !AutoTuning
|
|
+; OTHER-NEXT: Pass: all
|
|
+; OTHER-NEXT: Name:
|
|
+; OTHER-SAME: opt-opp.ll
|
|
+; OTHER-NEXT: Function: none
|
|
+; OTHER-NEXT: CodeRegionType: other
|
|
+; COM: Module level hashes can differ based on the filepath so we check a regex
|
|
+; OTHER-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; OTHER-NEXT: DynamicConfigs: { }
|
|
+; OTHER-NEXT: BaselineConfig: { }
|
|
+; OTHER-NEXT: Invocation: 0
|
|
+; OTHER-NEXT: ...
|
|
+
|
|
+; LLVMPARAM: --- !AutoTuning
|
|
+; LLVMPARAM-NEXT: Pass: none
|
|
+; LLVMPARAM-NEXT: Name:
|
|
+; LLVMPARAM-SAME: opt-opp.ll
|
|
+; LLVMPARAM-NEXT: Function: none
|
|
+; LLVMPARAM-NEXT: CodeRegionType: llvm-param
|
|
+; LLVMPARAM-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; LLVMPARAM-NEXT: DynamicConfigs: { }
|
|
+; LLVMPARAM-NEXT: BaselineConfig: { }
|
|
+; LLVMPARAM-NEXT: Invocation: 0
|
|
+; LLVMPARAM-NEXT: ...
|
|
+
|
|
+; ProgramPARAM: --- !AutoTuning
|
|
+; ProgramPARAM-NEXT: Pass: none
|
|
+; ProgramPARAM-NEXT: Name:
|
|
+; ProgramPARAM-SAME: opt-opp.ll
|
|
+; ProgramPARAM-NEXT: Function: none
|
|
+; ProgramPARAM-NEXT: CodeRegionType: program-param
|
|
+; ProgramPARAM-NEXT: CodeRegionHash: {{[0-9]+}}
|
|
+; ProgramPARAM-NEXT: DynamicConfigs: { }
|
|
+; ProgramPARAM-NEXT: BaselineConfig: { }
|
|
+; ProgramPARAM-NEXT: Invocation: 0
|
|
+; ProgramPARAM-NEXT: ...
|
|
+
|
|
+; SIZE-LOOP: PassName: loop-unroll
|
|
+; SIZE-LOOP-NEXT: Type: loop
|
|
+; SIZE-LOOP-NEXT: Size: 13
|
|
+; SIZE-LOOP: Module added as an tuning opportunity
|
|
+
|
|
+; SIZE-LOOP-OPP-DAG: Pass: loop-unroll
|
|
+; SIZE-LOOP-OPP-DAG: Pass: all
|
|
+
|
|
+; SIZE-LOOP-FILTERED-NOT: PassName: loop-unroll
|
|
+; SIZE-LOOP-FILTERED: Module added as an tuning opportunity
|
|
+
|
|
+; SIZE-LOOP-OPP-FILTERED-NOT: Pass: loop-unroll
|
|
+; Ths "other" code regions should remain as-is.
|
|
+; SIZE-LOOP-OPP-FILTERED: CodeRegionType: other
|
|
+
|
|
+; SIZE-CALLSITE: PassName: inline
|
|
+; SIZE-CALLSITE-NEXT: Type: callsite
|
|
+; SIZE-CALLSITE-NEXT: Size: 4
|
|
+; SIZE-CALLSITE: Module added as an tuning opportunity
|
|
+
|
|
+; SIZE-CALLSITE-OPP-DAG: Pass: inline
|
|
+; SIZE-CALLSITE-OPP-DAG: Pass: all
|
|
+
|
|
+; SIZE-CALLSITE-FILTERED-NOT: PassName: inline
|
|
+; SIZE-CALLSITE-FILTERED: Module added as an tuning opportunity
|
|
+
|
|
+; SIZE-CALLSITE-OPP-FILTERED-NOT: Pass: inline
|
|
+; Ths "other" code regions should remain as-is.
|
|
+; SIZE-CALLSITE-OPP-FILTERED: CodeRegionType: other
|
|
diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in
|
|
index fc7ab6536309..0e9396e3b014 100644
|
|
--- a/llvm/test/lit.site.cfg.py.in
|
|
+++ b/llvm/test/lit.site.cfg.py.in
|
|
@@ -62,6 +62,7 @@ config.reverse_iteration = @LLVM_ENABLE_REVERSE_ITERATION@
|
|
config.dxil_tests = @LLVM_INCLUDE_DXIL_TESTS@
|
|
config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@
|
|
config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@
|
|
+config.enable_enable_autotuner = @LLVM_ENABLE_AUTOTUNER@
|
|
|
|
import lit.llvm
|
|
lit.llvm.initialize(lit_config, config)
|
|
diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp
|
|
index 8934130f9913..94b2028b25bc 100644
|
|
--- a/llvm/tools/llc/llc.cpp
|
|
+++ b/llvm/tools/llc/llc.cpp
|
|
@@ -645,6 +645,18 @@ static int compileModule(char **argv, LLVMContext &Context) {
|
|
reportError(EC.message(), SplitDwarfOutputFile);
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (llvm::Error E = autotuning::Engine.init(M->getModuleIdentifier())) {
|
|
+ errs() << "error: " << toString(std::move(E)) << '\n';
|
|
+ return 1;
|
|
+ }
|
|
+ if (autotuning::Engine.isEnabled() && autotuning::Engine.isParseInput() &&
|
|
+ (autotuning::Engine.LLVMParams.size() ||
|
|
+ autotuning::Engine.ProgramParams.size()))
|
|
+ llvm::cl::ParseAutoTunerOptions(autotuning::Engine.LLVMParams,
|
|
+ autotuning::Engine.ProgramParams);
|
|
+#endif
|
|
+
|
|
// Build up all of the passes that we want to do to the module.
|
|
legacy::PassManager PM;
|
|
|
|
@@ -776,6 +788,13 @@ static int compileModule(char **argv, LLVMContext &Context) {
|
|
}
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ if (llvm::Error E = autotuning::Engine.finalize()) {
|
|
+ errs() << "error: " << toString(std::move(E)) << '\n';
|
|
+ return 1;
|
|
+ }
|
|
+#endif
|
|
+
|
|
// Declare success.
|
|
Out->keep();
|
|
if (DwoOut)
|
|
diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp
|
|
index 6ae3f87099af..5ce9e4fee81f 100644
|
|
--- a/llvm/tools/opt/NewPMDriver.cpp
|
|
+++ b/llvm/tools/opt/NewPMDriver.cpp
|
|
@@ -39,6 +39,10 @@
|
|
#include "llvm/Transforms/Scalar/LoopPassManager.h"
|
|
#include "llvm/Transforms/Utils/Debugify.h"
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+#include "llvm/AutoTuner/AutoTuning.h"
|
|
+#endif
|
|
+
|
|
using namespace llvm;
|
|
using namespace opt_tool;
|
|
|
|
@@ -459,6 +463,35 @@ bool llvm::runPassPipeline(
|
|
MPM.addPass(NewPMDebugifyPass(DebugifyMode::OriginalDebugInfo, "",
|
|
&DebugInfoBeforePass));
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ bool Changed = false;
|
|
+ // If autotuning is enabled (for applying configuration), use AutoTuner
|
|
+ // generated pass ordering instead of using passes specified with -passes=...
|
|
+ // with opt tool.
|
|
+ if (autotuning::Engine.isEnabled()) {
|
|
+ std::vector<std::string> PassesList;
|
|
+ Changed = autotuning::Engine.lookUpGlobalParams("OptPass", PassesList);
|
|
+ if (Changed && PassesList.size()) {
|
|
+ std::string PassPipeline = "";
|
|
+ for (auto PassName : PassesList)
|
|
+ PassPipeline.append(PassName + ",");
|
|
+ PassPipeline.pop_back();
|
|
+
|
|
+ if (auto Err = PB.parsePassPipeline(MPM, PassPipeline))
|
|
+ errs() << "AutoTuner: cannot add pass:" << toString(std::move(Err))
|
|
+ << "\n";
|
|
+ }
|
|
+ }
|
|
+ if (!Changed) {
|
|
+ // Add passes according to the -passes options.
|
|
+ if (!PassPipeline.empty()) {
|
|
+ if (auto Err = PB.parsePassPipeline(MPM, PassPipeline)) {
|
|
+ errs() << Arg0 << ": " << toString(std::move(Err)) << "\n";
|
|
+ return false;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+#else
|
|
// Add passes according to the -passes options.
|
|
if (!PassPipeline.empty()) {
|
|
if (auto Err = PB.parsePassPipeline(MPM, PassPipeline)) {
|
|
@@ -466,6 +499,7 @@ bool llvm::runPassPipeline(
|
|
return false;
|
|
}
|
|
}
|
|
+#endif
|
|
|
|
if (VK > VK_NoVerifier)
|
|
MPM.addPass(VerifierPass());
|
|
@@ -539,6 +573,14 @@ bool llvm::runPassPipeline(
|
|
if (DebugifyEach && !DebugifyExport.empty())
|
|
exportDebugifyStats(DebugifyExport, Debugify.getDebugifyStatsMap());
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ // AUTO-TUNING - auto-tuning finalization for this module
|
|
+ if (Error E = autotuning::Engine.finalize()) {
|
|
+ errs() << "error: " << toString(std::move(E)) << '\n';
|
|
+ return false;
|
|
+ }
|
|
+#endif
|
|
+
|
|
return true;
|
|
}
|
|
|
|
diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp
|
|
index 9c20e7784223..1401352647cd 100644
|
|
--- a/llvm/tools/opt/opt.cpp
|
|
+++ b/llvm/tools/opt/opt.cpp
|
|
@@ -456,6 +456,9 @@ int main(int argc, char **argv) {
|
|
initializeWriteBitcodePassPass(Registry);
|
|
initializeReplaceWithVeclibLegacyPass(Registry);
|
|
initializeJMCInstrumenterPass(Registry);
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ initializeAutotuningDumpLegacyPass(Registry);
|
|
+#endif
|
|
|
|
SmallVector<PassPlugin, 1> PluginList;
|
|
PassPlugins.setCallback([&](const std::string &PluginPath) {
|
|
@@ -516,7 +519,11 @@ int main(int argc, char **argv) {
|
|
RemarksFormat, RemarksWithHotness,
|
|
RemarksHotnessThreshold);
|
|
if (Error E = RemarksFileOrErr.takeError()) {
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ errs() << "error: " << toString(std::move(E)) << '\n';
|
|
+#else
|
|
errs() << toString(std::move(E)) << '\n';
|
|
+#endif
|
|
return 1;
|
|
}
|
|
std::unique_ptr<ToolOutputFile> RemarksFile = std::move(*RemarksFileOrErr);
|
|
@@ -641,6 +648,20 @@ int main(int argc, char **argv) {
|
|
M->addModuleFlag(Module::Error, "UnifiedLTO", 1);
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ // AUTO-TUNING - auto-tuning initialization for this module
|
|
+ // if the auto-tuning flag is on
|
|
+ if (Error E = autotuning::Engine.init(M->getModuleIdentifier())) {
|
|
+ errs() << "error: " << toString(std::move(E)) << '\n';
|
|
+ return 1;
|
|
+ }
|
|
+ if (autotuning::Engine.isEnabled() && autotuning::Engine.isParseInput() &&
|
|
+ (autotuning::Engine.LLVMParams.size() ||
|
|
+ autotuning::Engine.ProgramParams.size()))
|
|
+ llvm::cl::ParseAutoTunerOptions(autotuning::Engine.LLVMParams,
|
|
+ autotuning::Engine.ProgramParams);
|
|
+#endif
|
|
+
|
|
// Add an appropriate TargetLibraryInfo pass for the module's triple.
|
|
TargetLibraryInfoImpl TLII(ModuleTriple);
|
|
|
|
@@ -778,6 +799,30 @@ int main(int argc, char **argv) {
|
|
Passes.add(TPC);
|
|
}
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ // AUTO-TUNING - If auto-tuning is enabled, try to generate passes
|
|
+ // from auto-tuning interface and disable all optimization passes.
|
|
+ if (autotuning::Engine.isEnabled()) {
|
|
+ std::vector<std::string> PassesList;
|
|
+ bool Changed = autotuning::Engine.lookUpGlobalParams("OptPass", PassesList);
|
|
+ if (Changed) {
|
|
+ // disable all optimization passes of all optimization levels
|
|
+ OptLevelO0 = false;
|
|
+ OptLevelO1 = false;
|
|
+ OptLevelO2 = false;
|
|
+ OptLevelOs = false;
|
|
+ OptLevelOz = false;
|
|
+ OptLevelO3 = false;
|
|
+ for (auto const &Value : PassesList) {
|
|
+ const PassInfo *PassInf = (Registry.getPassInfo(StringRef(Value)));
|
|
+ if (PassInf) {
|
|
+ PassList.push_back(PassInf);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+
|
|
// Create a new optimization pass for each one specified on the command line
|
|
for (unsigned i = 0; i < PassList.size(); ++i) {
|
|
const PassInfo *PassInf = PassList[i];
|
|
@@ -878,6 +923,14 @@ int main(int argc, char **argv) {
|
|
if (DebugifyEach && !DebugifyExport.empty())
|
|
exportDebugifyStats(DebugifyExport, Passes.getDebugifyStatsMap());
|
|
|
|
+#if defined(ENABLE_AUTOTUNER)
|
|
+ // AUTO-TUNING - auto-tuning finalization for this module
|
|
+ if (Error E = autotuning::Engine.finalize()) {
|
|
+ errs() << "error: " << toString(std::move(E)) << '\n';
|
|
+ return 1;
|
|
+ }
|
|
+#endif
|
|
+
|
|
// Declare success.
|
|
if (!NoOutput)
|
|
Out->keep();
|
|
--
|
|
2.33.0
|
|
|