Improve stack allocation check to infer upper bound sizes. (#9026)
This also increases the limit to 32 KB. The default workgroup size is
64. There are cases that stack allocation sizes depend on workgroup
size, which produces 64x64xi64. Thus, 32 KB is a reasonable limit.
diff --git a/iree/compiler/Codegen/LLVMCPU/LLVMCPUCheckIRBeforeLLVMConversion.cpp b/iree/compiler/Codegen/LLVMCPU/LLVMCPUCheckIRBeforeLLVMConversion.cpp
index 7173ab9..e9f4de3 100644
--- a/iree/compiler/Codegen/LLVMCPU/LLVMCPUCheckIRBeforeLLVMConversion.cpp
+++ b/iree/compiler/Codegen/LLVMCPU/LLVMCPUCheckIRBeforeLLVMConversion.cpp
@@ -6,6 +6,7 @@
#include "iree/compiler/Codegen/PassDetail.h"
#include "iree/compiler/Codegen/Passes.h"
+#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/Pass/Pass.h"
namespace mlir {
@@ -21,23 +22,39 @@
void LLVMCPUCheckIRBeforeLLVMConversionPass::runOnOperation() {
auto moduleOp = getOperation();
- int64_t bits = 0;
+ int64_t totalBits = 0;
auto walkResult = moduleOp.walk([&](memref::AllocaOp allocaOp) -> WalkResult {
auto type = allocaOp.getType().cast<ShapedType>();
- if (!type.hasStaticShape()) {
- return allocaOp.emitOpError(
- "expected no stack allocations with dynamic shapes");
+ int64_t size = 1;
+ for (auto dimSize : type.getShape()) {
+ if (dimSize == ShapedType::kDynamicSize) continue;
+ size *= dimSize;
}
- bits += type.getSizeInBits();
+ for (auto operand : allocaOp.dynamicSizes()) {
+ auto ub = linalg::getConstantUpperBoundForIndex(operand);
+ if (failed(ub)) {
+ return allocaOp.emitOpError(
+ "expected no stack allocations without upper bound shapes");
+ }
+ size *= *ub;
+ }
+ size *= type.getElementType().getIntOrFloatBitWidth();
+ if (allocaOp.alignment()) {
+ int64_t alignmentInBits = *allocaOp.alignment() * 8;
+ size = llvm::divideCeil(size, alignmentInBits) * alignmentInBits;
+ }
+ totalBits += size;
return WalkResult::advance();
});
if (walkResult.wasInterrupted()) {
return signalPassFailure();
}
- constexpr int k16KBInBits = 16 * 1024 * 8;
- if (bits >= k16KBInBits) {
+ constexpr int k32KBInBits = 32 * 1024 * 8;
+ if (totalBits > k32KBInBits) {
moduleOp.emitOpError(
- "expected total size of stack allocation is smaller than 16 KB");
+ "expected total size of stack allocation is not greater than 32 KB, "
+ "but got ")
+ << llvm::divideCeil(totalBits, 8) << " bytes";
return signalPassFailure();
}
}
diff --git a/iree/compiler/Codegen/LLVMCPU/test/check_ir_before_llvm_conversion.mlir b/iree/compiler/Codegen/LLVMCPU/test/check_ir_before_llvm_conversion.mlir
index 2765093..fe8b82d 100644
--- a/iree/compiler/Codegen/LLVMCPU/test/check_ir_before_llvm_conversion.mlir
+++ b/iree/compiler/Codegen/LLVMCPU/test/check_ir_before_llvm_conversion.mlir
@@ -1,19 +1,31 @@
// RUN: iree-opt -iree-llvmcpu-check-ir-before-llvm-conversion %s -verify-diagnostics -split-input-file
module {
-func.func @no_dynamic_allocas(%arg0: index) {
- // expected-error @+1 {{expected no stack allocations with dynamic shapes}}
- %0 = memref.alloca(%arg0) : memref<?xf32>
- return
-}
+ func.func @dynamic_allocas(%arg0: index) {
+ // expected-error @+1 {{expected no stack allocations without upper bound shapes}}
+ %0 = memref.alloca(%arg0) : memref<?xf32>
+ return
+ }
}
// -----
-// expected-error @+1 {{expected total size of stack allocation is smaller than 16 KB}}
+// expected-error @+1 {{expected total size of stack allocation is not greater than 32 KB, but got 65536 bytes}}
module {
-func.func @big_allocas(%arg0: index) {
- %0 = memref.alloca() : memref<65536xi32>
- return
+ func.func @static_big_allocas(%arg0: index) {
+ %0 = memref.alloca() : memref<16384xi32>
+ return
+ }
}
+
+// -----
+
+#map = affine_map<(d0) -> (-d0, 16384)>
+// expected-error @+1 {{expected total size of stack allocation is not greater than 32 KB, but got 65536 bytes}}
+module {
+ func @dynamic_big_allocas(%arg0: index) {
+ %0 = affine.min #map(%arg0)
+ %1 = memref.alloca(%0) : memref<?xf32>
+ return
+ }
}