[ROCM][NFC] Add option to control SLP vectorization in llvm optimizations (#18865) We keep SLP vectorization off because it can mask perf issues or create regressions. However, on ROCM what we have noticed is that we are hitting issues in several untested paths in the AMDGPU llvm backend because we dont have SLP vectorization. Here is an example of an issue that we wouldn't hit if SLP vectorization was turned on https://github.com/iree-org/iree/issues/18798 In this PR we are still keeping the exisiting behavior but provide a flag to toggle it so that we can do the required benchmarking and analysis. Signed-off-by: Nirvedh <nirvedh@gmail.com>

commit: a400cde96289706512ac873591a9711c81edc244 [log] [tgz]
author: Nirvedh Meshram <96096277+nirvedhmeshram@users.noreply.github.com> Wed Oct 23 15:34:20 2024 -0500
committer: GitHub <noreply@github.com> Wed Oct 23 15:34:20 2024 -0500
tree: 9c885fa50192b7b6dc90ec4afa19d4bdbabb90af
parent: 563b3e73c126a56dcabc8d2b17bf6a27347e37ff [diff]
diff --git a/compiler/plugins/target/ROCM/ROCMTarget.cpp b/compiler/plugins/target/ROCM/ROCMTarget.cpp
index 0a2fcc3..37565aa 100644
--- a/compiler/plugins/target/ROCM/ROCMTarget.cpp
+++ b/compiler/plugins/target/ROCM/ROCMTarget.cpp

@@ -61,6 +61,7 @@
   int wavesPerEu = 0;
   std::string enableROCMUkernels = "none";
   bool legacySync = true;
+  bool slpVectorization = false;
 
   /// List of LLVM opt pass pluggins to be loaded during GPU code
   /// generation. The pluggins are paths to dynamic libraries that
@@ -108,6 +109,11 @@
                  "to be passed to the target backend compiler during HIP "
                  "executable serialization"),
         cl::ZeroOrMore, cl::cat(category));
+    binder.opt<bool>(
+        "iree-hip-llvm-slp-vec", slpVectorization, cl::cat(category),
+        cl::desc(
+            "Enable slp vectorization in llvm opt. This can have an impact on "
+            "performance/numerics so its turned off by default currently."));
   }
 
   LogicalResult verify(mlir::Builder &builder) const {
@@ -286,7 +292,8 @@
   // https://github.com/iree-org/iree/blob/main/compiler/plugins/target/CUDA/CUDATarget.cpp
   static void optimizeModule(llvm::Module &module,
                              llvm::TargetMachine &targetMachine,
-                             ArrayRef<std::string> passPlugins) {
+                             ArrayRef<std::string> passPlugins,
+                             bool slpVectorization) {
     llvm::LoopAnalysisManager lam;
     llvm::FunctionAnalysisManager fam;
     llvm::CGSCCAnalysisManager cgam;
@@ -295,7 +302,7 @@
     fam.registerPass([&] { return targetMachine.getTargetIRAnalysis(); });
 
     llvm::PipelineTuningOptions pto;
-    pto.SLPVectorization = false;
+    pto.SLPVectorization = slpVectorization;
 
     llvm::PassInstrumentationCallbacks pic;
 
@@ -548,7 +555,8 @@
       }
 
       // Run LLVM optimization passes.
-      optimizeModule(*llvmModule, *targetMachine, options.passPlugins);
+      optimizeModule(*llvmModule, *targetMachine, options.passPlugins,
+                     options.slpVectorization);
       if (!serOptions.dumpIntermediatesPath.empty()) {
         dumpModuleToPath(serOptions.dumpIntermediatesPath,
                          serOptions.dumpBaseName, variantOp.getName(),
commit	a400cde96289706512ac873591a9711c81edc244	[log] [tgz]
author	Nirvedh Meshram <96096277+nirvedhmeshram@users.noreply.github.com>	Wed Oct 23 15:34:20 2024 -0500
committer	GitHub <noreply@github.com>	Wed Oct 23 15:34:20 2024 -0500
tree	9c885fa50192b7b6dc90ec4afa19d4bdbabb90af
parent	563b3e73c126a56dcabc8d2b17bf6a27347e37ff [diff]