[RISCV] Limit max unroll factor to 8 by default (#10436)

We have been trying to selectively limit unrolling on RISC-V for those
cases that lead to register spilling. Unfortunately, it's becoming a
never-ending problem so this PR is limiting unrolling to 8 by default on
RISC-V while keeping unrolling of transpose operations disabled. That
seems to get rid of register spilling altogether. This sets a good
starting point for further unrolling tuning.
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
index bed380f..c7b5af5 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
@@ -198,36 +198,34 @@
     auto operandType =
         inputOutputOpOperands[map.index()]->get().getType().cast<ShapedType>();
     int64_t tileSize = getVectorSize(entryPointFn, operandType);
-    // Vectorization of reductions is driven by input tensors and considering
-    // the output's fastest varying dim leads to large unroll factors. We limit
-    // the tile size for this case to 'maxUnrollFactor'.
-    if (linalgOpInfo.isReduction() &&
-        op.isOutputTensor(inputOutputOpOperands[map.index()])) {
-      tileSize = std::min<int64_t>(
-          tileSize, targetMLTransInfo.defaultMaxReductionUnrollFactor);
-    }
 
     minTileSizes[fastestVaryingDim] =
         std::max<int64_t>(minTileSizes[fastestVaryingDim], tileSize);
   }
 
-  // Limit unrolling on transpose operations. For know, we assume the rightmost
-  // non-one tiled dimension is for vectorization and any other non-one
-  // dimension is for unrolling.
-  // TODO(dcaballe): Consider input and output transposes.
-  if (linalgOpInfo.isTranspose()) {
+  // Limit unroll factor. For now, we assume the rightmost non-one tiled
+  // dimension is for vectorization and any other non-one dimension is for
+  // unrolling.
+  auto limitUnrollFactor = [&](int64_t maxUnrollFactor) {
     int vecDim;
     for (vecDim = minTileSizes.size() - 1; vecDim >= 0; --vecDim) {
       if (minTileSizes[vecDim] > 1) {
         break;
       }
     }
-
     for (int unrollDim = vecDim - 1; unrollDim >= 0; --unrollDim) {
       minTileSizes[unrollDim] =
-          std::min<int64_t>(minTileSizes[unrollDim],
-                            targetMLTransInfo.defaultMaxTransposeUnrollFactor);
+          std::min<int64_t>(minTileSizes[unrollDim], maxUnrollFactor);
     }
+  };
+
+  if (linalgOpInfo.isTranspose()) {
+    // Limit unrolling on transpose operations.
+    // TODO(dcaballe): Consider input and output transposes.
+    limitUnrollFactor(targetMLTransInfo.defaultMaxTransposeUnrollFactor);
+  } else {
+    // Limit unrolling to the default target maximum.
+    limitUnrollFactor(targetMLTransInfo.defaultMaxUnrollFactor);
   }
 
   return minTileSizes;
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.cpp
index 75f840f..bf26b16 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.cpp
@@ -15,7 +15,7 @@
 
 struct RISCVTargetMLTransformInfo : TargetMLTransformInfo {
   RISCVTargetMLTransformInfo() {
-    defaultMaxReductionUnrollFactor = 8;
+    defaultMaxUnrollFactor = 8;
     defaultMaxTransposeUnrollFactor = 1;
   }
 };
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.h b/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.h
index cd1d450..bbdf4d3 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.h
@@ -17,7 +17,7 @@
 /// Holds target specific information to specialize ML transformations.
 // TODO(dcaballe): Move to a Concept-Model implementation when it's worth it.
 struct TargetMLTransformInfo {
-  unsigned defaultMaxReductionUnrollFactor = 8;
+  unsigned defaultMaxUnrollFactor = 8;
   unsigned defaultMaxTransposeUnrollFactor =
       std::numeric_limits<unsigned>::max();