[RISCV] Limit max unroll factor to 8 by default (#10436)
We have been trying to selectively limit unrolling on RISC-V for those
cases that lead to register spilling. Unfortunately, it's becoming a
never-ending problem so this PR is limiting unrolling to 8 by default on
RISC-V while keeping unrolling of transpose operations disabled. That
seems to get rid of register spilling altogether. This sets a good
starting point for further unrolling tuning.
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
index bed380f..c7b5af5 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
@@ -198,36 +198,34 @@
auto operandType =
inputOutputOpOperands[map.index()]->get().getType().cast<ShapedType>();
int64_t tileSize = getVectorSize(entryPointFn, operandType);
- // Vectorization of reductions is driven by input tensors and considering
- // the output's fastest varying dim leads to large unroll factors. We limit
- // the tile size for this case to 'maxUnrollFactor'.
- if (linalgOpInfo.isReduction() &&
- op.isOutputTensor(inputOutputOpOperands[map.index()])) {
- tileSize = std::min<int64_t>(
- tileSize, targetMLTransInfo.defaultMaxReductionUnrollFactor);
- }
minTileSizes[fastestVaryingDim] =
std::max<int64_t>(minTileSizes[fastestVaryingDim], tileSize);
}
- // Limit unrolling on transpose operations. For know, we assume the rightmost
- // non-one tiled dimension is for vectorization and any other non-one
- // dimension is for unrolling.
- // TODO(dcaballe): Consider input and output transposes.
- if (linalgOpInfo.isTranspose()) {
+ // Limit unroll factor. For now, we assume the rightmost non-one tiled
+ // dimension is for vectorization and any other non-one dimension is for
+ // unrolling.
+ auto limitUnrollFactor = [&](int64_t maxUnrollFactor) {
int vecDim;
for (vecDim = minTileSizes.size() - 1; vecDim >= 0; --vecDim) {
if (minTileSizes[vecDim] > 1) {
break;
}
}
-
for (int unrollDim = vecDim - 1; unrollDim >= 0; --unrollDim) {
minTileSizes[unrollDim] =
- std::min<int64_t>(minTileSizes[unrollDim],
- targetMLTransInfo.defaultMaxTransposeUnrollFactor);
+ std::min<int64_t>(minTileSizes[unrollDim], maxUnrollFactor);
}
+ };
+
+ if (linalgOpInfo.isTranspose()) {
+ // Limit unrolling on transpose operations.
+ // TODO(dcaballe): Consider input and output transposes.
+ limitUnrollFactor(targetMLTransInfo.defaultMaxTransposeUnrollFactor);
+ } else {
+ // Limit unrolling to the default target maximum.
+ limitUnrollFactor(targetMLTransInfo.defaultMaxUnrollFactor);
}
return minTileSizes;
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.cpp
index 75f840f..bf26b16 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.cpp
@@ -15,7 +15,7 @@
struct RISCVTargetMLTransformInfo : TargetMLTransformInfo {
RISCVTargetMLTransformInfo() {
- defaultMaxReductionUnrollFactor = 8;
+ defaultMaxUnrollFactor = 8;
defaultMaxTransposeUnrollFactor = 1;
}
};
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.h b/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.h
index cd1d450..bbdf4d3 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.h
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/TargetMLTransformInfo.h
@@ -17,7 +17,7 @@
/// Holds target specific information to specialize ML transformations.
// TODO(dcaballe): Move to a Concept-Model implementation when it's worth it.
struct TargetMLTransformInfo {
- unsigned defaultMaxReductionUnrollFactor = 8;
+ unsigned defaultMaxUnrollFactor = 8;
unsigned defaultMaxTransposeUnrollFactor =
std::numeric_limits<unsigned>::max();