[ROCM][NFC] Add option to control SLP vectorization in llvm optimizations (#18865)
We keep SLP vectorization off because it can mask perf issues or create
regressions. However, on ROCM what we have noticed is that we are
hitting issues in several untested paths in the AMDGPU llvm backend
because we dont have SLP vectorization.
Here is an example of an issue that we wouldn't hit if SLP vectorization
was turned on
https://github.com/iree-org/iree/issues/18798
In this PR we are still keeping the exisiting behavior but provide a
flag to toggle it so that we can do the required benchmarking and
analysis.
Signed-off-by: Nirvedh <nirvedh@gmail.com>
diff --git a/compiler/plugins/target/ROCM/ROCMTarget.cpp b/compiler/plugins/target/ROCM/ROCMTarget.cpp
index 0a2fcc3..37565aa 100644
--- a/compiler/plugins/target/ROCM/ROCMTarget.cpp
+++ b/compiler/plugins/target/ROCM/ROCMTarget.cpp
@@ -61,6 +61,7 @@
int wavesPerEu = 0;
std::string enableROCMUkernels = "none";
bool legacySync = true;
+ bool slpVectorization = false;
/// List of LLVM opt pass pluggins to be loaded during GPU code
/// generation. The pluggins are paths to dynamic libraries that
@@ -108,6 +109,11 @@
"to be passed to the target backend compiler during HIP "
"executable serialization"),
cl::ZeroOrMore, cl::cat(category));
+ binder.opt<bool>(
+ "iree-hip-llvm-slp-vec", slpVectorization, cl::cat(category),
+ cl::desc(
+ "Enable slp vectorization in llvm opt. This can have an impact on "
+ "performance/numerics so its turned off by default currently."));
}
LogicalResult verify(mlir::Builder &builder) const {
@@ -286,7 +292,8 @@
// https://github.com/iree-org/iree/blob/main/compiler/plugins/target/CUDA/CUDATarget.cpp
static void optimizeModule(llvm::Module &module,
llvm::TargetMachine &targetMachine,
- ArrayRef<std::string> passPlugins) {
+ ArrayRef<std::string> passPlugins,
+ bool slpVectorization) {
llvm::LoopAnalysisManager lam;
llvm::FunctionAnalysisManager fam;
llvm::CGSCCAnalysisManager cgam;
@@ -295,7 +302,7 @@
fam.registerPass([&] { return targetMachine.getTargetIRAnalysis(); });
llvm::PipelineTuningOptions pto;
- pto.SLPVectorization = false;
+ pto.SLPVectorization = slpVectorization;
llvm::PassInstrumentationCallbacks pic;
@@ -548,7 +555,8 @@
}
// Run LLVM optimization passes.
- optimizeModule(*llvmModule, *targetMachine, options.passPlugins);
+ optimizeModule(*llvmModule, *targetMachine, options.passPlugins,
+ options.slpVectorization);
if (!serOptions.dumpIntermediatesPath.empty()) {
dumpModuleToPath(serOptions.dumpIntermediatesPath,
serOptions.dumpBaseName, variantOp.getName(),