Improve stack allocation check to infer upper bound sizes. (#9026) This also increases the limit to 32 KB. The default workgroup size is 64. There are cases that stack allocation sizes depend on workgroup size, which produces 64x64xi64. Thus, 32 KB is a reasonable limit.

commit: aae314e8bb72115aa00ce2556fbd8ed1559d459d [log] [tgz]
author: Han-Chung Wang <hanchung@google.com> Fri Apr 29 07:29:04 2022 -0700
committer: GitHub <noreply@github.com> Fri Apr 29 07:29:04 2022 -0700
tree: 432a10661a2ec2069db6d50607aa3a54e900f5e7
parent: 2f01d36f6a3f3db4b6978c95e6223862219fe041 [diff]
diff --git a/iree/compiler/Codegen/LLVMCPU/LLVMCPUCheckIRBeforeLLVMConversion.cpp b/iree/compiler/Codegen/LLVMCPU/LLVMCPUCheckIRBeforeLLVMConversion.cpp
index 7173ab9..e9f4de3 100644
--- a/iree/compiler/Codegen/LLVMCPU/LLVMCPUCheckIRBeforeLLVMConversion.cpp
+++ b/iree/compiler/Codegen/LLVMCPU/LLVMCPUCheckIRBeforeLLVMConversion.cpp

@@ -6,6 +6,7 @@
 
 #include "iree/compiler/Codegen/PassDetail.h"
 #include "iree/compiler/Codegen/Passes.h"
+#include "mlir/Dialect/Linalg/Utils/Utils.h"
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {
@@ -21,23 +22,39 @@
 
 void LLVMCPUCheckIRBeforeLLVMConversionPass::runOnOperation() {
   auto moduleOp = getOperation();
-  int64_t bits = 0;
+  int64_t totalBits = 0;
   auto walkResult = moduleOp.walk([&](memref::AllocaOp allocaOp) -> WalkResult {
     auto type = allocaOp.getType().cast<ShapedType>();
-    if (!type.hasStaticShape()) {
-      return allocaOp.emitOpError(
-          "expected no stack allocations with dynamic shapes");
+    int64_t size = 1;
+    for (auto dimSize : type.getShape()) {
+      if (dimSize == ShapedType::kDynamicSize) continue;
+      size *= dimSize;
     }
-    bits += type.getSizeInBits();
+    for (auto operand : allocaOp.dynamicSizes()) {
+      auto ub = linalg::getConstantUpperBoundForIndex(operand);
+      if (failed(ub)) {
+        return allocaOp.emitOpError(
+            "expected no stack allocations without upper bound shapes");
+      }
+      size *= *ub;
+    }
+    size *= type.getElementType().getIntOrFloatBitWidth();
+    if (allocaOp.alignment()) {
+      int64_t alignmentInBits = *allocaOp.alignment() * 8;
+      size = llvm::divideCeil(size, alignmentInBits) * alignmentInBits;
+    }
+    totalBits += size;
     return WalkResult::advance();
   });
   if (walkResult.wasInterrupted()) {
     return signalPassFailure();
   }
-  constexpr int k16KBInBits = 16 * 1024 * 8;
-  if (bits >= k16KBInBits) {
+  constexpr int k32KBInBits = 32 * 1024 * 8;
+  if (totalBits > k32KBInBits) {
     moduleOp.emitOpError(
-        "expected total size of stack allocation is smaller than 16 KB");
+        "expected total size of stack allocation is not greater than 32 KB, "
+        "but got ")
+        << llvm::divideCeil(totalBits, 8) << " bytes";
     return signalPassFailure();
   }
 }

diff --git a/iree/compiler/Codegen/LLVMCPU/test/check_ir_before_llvm_conversion.mlir b/iree/compiler/Codegen/LLVMCPU/test/check_ir_before_llvm_conversion.mlir
index 2765093..fe8b82d 100644
--- a/iree/compiler/Codegen/LLVMCPU/test/check_ir_before_llvm_conversion.mlir
+++ b/iree/compiler/Codegen/LLVMCPU/test/check_ir_before_llvm_conversion.mlir

@@ -1,19 +1,31 @@
 // RUN: iree-opt -iree-llvmcpu-check-ir-before-llvm-conversion %s -verify-diagnostics -split-input-file
 
 module {
-func.func @no_dynamic_allocas(%arg0: index) {
-  // expected-error @+1 {{expected no stack allocations with dynamic shapes}}
-  %0 = memref.alloca(%arg0) : memref<?xf32>
-  return
-}
+  func.func @dynamic_allocas(%arg0: index) {
+    // expected-error @+1 {{expected no stack allocations without upper bound shapes}}
+    %0 = memref.alloca(%arg0) : memref<?xf32>
+    return
+  }
 }
 
 // -----
 
-// expected-error @+1 {{expected total size of stack allocation is smaller than 16 KB}}
+// expected-error @+1 {{expected total size of stack allocation is not greater than 32 KB, but got 65536 bytes}}
 module {
-func.func @big_allocas(%arg0: index) {
-  %0 = memref.alloca() : memref<65536xi32>
-  return
+  func.func @static_big_allocas(%arg0: index) {
+    %0 = memref.alloca() : memref<16384xi32>
+    return
+  }
 }
+
+// -----
+
+#map = affine_map<(d0) -> (-d0, 16384)>
+// expected-error @+1 {{expected total size of stack allocation is not greater than 32 KB, but got 65536 bytes}}
+module {
+  func @dynamic_big_allocas(%arg0: index) {
+    %0 = affine.min #map(%arg0)
+    %1 = memref.alloca(%0) : memref<?xf32>
+    return
+  }
 }
commit	aae314e8bb72115aa00ce2556fbd8ed1559d459d	[log] [tgz]
author	Han-Chung Wang <hanchung@google.com>	Fri Apr 29 07:29:04 2022 -0700
committer	GitHub <noreply@github.com>	Fri Apr 29 07:29:04 2022 -0700
tree	432a10661a2ec2069db6d50607aa3a54e900f5e7
parent	2f01d36f6a3f3db4b6978c95e6223862219fe041 [diff]