Add a test that compiles softmax under aggressive fusion. (#11362)
All the necessary changes have landed to enable aggressive fusion on a softmax input. This compiles, but results in a stack allocation that is more than the allowed bounds. For now this test is added with the bound check relaxed, but that will need to be addressed as well.
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUCheckIRBeforeLLVMConversion.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUCheckIRBeforeLLVMConversion.cpp
index 71f6369..c26b2a9 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUCheckIRBeforeLLVMConversion.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/LLVMCPUCheckIRBeforeLLVMConversion.cpp
@@ -17,8 +17,8 @@
"iree-llvmcpu-stack-allocation-limit",
llvm::cl::desc("maximum allowed stack allocation size in bytes"),
llvm::cl::init(32768));
-static llvm::cl::opt<bool> clFailUnboundDynamicStackAllocation(
- "iree-llvmcpu-fail-unbound-dynamic-stack-allocation",
+static llvm::cl::opt<bool> clFailOnOutOfBoundsStackAllocation(
+ "iree-llvmcpu-fail-on-out-of-bounds-stack-allocation",
llvm::cl::desc("fail if the upper bound of dynamic stack allocation cannot "
"be solved"),
llvm::cl::init(true));
@@ -45,7 +45,7 @@
auto ub = linalg::getConstantUpperBoundForIndex(operand);
if (succeeded(ub)) {
size *= *ub;
- } else if (clFailUnboundDynamicStackAllocation) {
+ } else if (clFailOnOutOfBoundsStackAllocation) {
return allocaOp.emitOpError(
"expected no stack allocations without upper bound shapes");
}
@@ -62,7 +62,8 @@
return signalPassFailure();
}
int maxAllocationSizeInBits = clMaxAllocationSizeInBytes * 8;
- if (totalBits > maxAllocationSizeInBits) {
+ if (clFailOnOutOfBoundsStackAllocation &&
+ totalBits > maxAllocationSizeInBits) {
moduleOp.emitOpError(
"expected total size of stack allocation is not greater than ")
<< clMaxAllocationSizeInBytes.getValue() << " bytes, but got "
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/check_ir_before_llvm_conversion_not_fail_unbound.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/check_ir_before_llvm_conversion_not_fail_unbound.mlir
index 78d88e6..adae2c3 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/check_ir_before_llvm_conversion_not_fail_unbound.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/check_ir_before_llvm_conversion_not_fail_unbound.mlir
@@ -1,4 +1,4 @@
-// RUN: iree-opt --iree-llvmcpu-check-ir-before-llvm-conversion --iree-llvmcpu-fail-unbound-dynamic-stack-allocation=false %s --verify-diagnostics -split-input-file
+// RUN: iree-opt --iree-llvmcpu-check-ir-before-llvm-conversion --iree-llvmcpu-fail-on-out-of-bounds-stack-allocation=false %s --verify-diagnostics -split-input-file
module {
func.func @dynamic_allocas(%arg0: index) {
@@ -6,11 +6,11 @@
return
}
}
+// CHECK-LABEL: func @dynamic_allocas(
// -----
#map = affine_map<(d0) -> (-d0, 16384)>
-// expected-error @+1 {{expected total size of stack allocation is not greater than 32768 bytes, but got 65536 bytes}}
module {
func.func @dynamic_big_allocas(%arg0: index, %arg1: index) {
%0 = affine.min #map(%arg0)
@@ -18,13 +18,14 @@
return
}
}
+// CHECK-LABEL: func @dynamic_big_allocas(
// -----
-// expected-error @+1 {{expected total size of stack allocation is not greater than 32768 bytes, but got 65536 bytes}}
module {
func.func @mix_static_and_unbound_dynamic_allocas(%arg0: index) {
%0 = memref.alloca(%arg0) : memref<?x16384xf32>
return
}
}
+// CHECK-LABEL: func @mix_static_and_unbound_dynamic_allocas(
\ No newline at end of file
diff --git a/tests/e2e/regression/BUILD b/tests/e2e/regression/BUILD
index 159c45f..6c1ecf6 100644
--- a/tests/e2e/regression/BUILD
+++ b/tests/e2e/regression/BUILD
@@ -144,3 +144,16 @@
driver = "local-task",
target_backend = "llvm-cpu",
)
+
+iree_check_single_backend_test_suite(
+ name = "aggressive_fusion_test",
+ srcs = [
+ "softmax.mlir",
+ ],
+ compiler_flags = [
+ "--iree-flow-enable-aggressive-fusion",
+ "--iree-llvmcpu-fail-on-out-of-bounds-stack-allocation=false",
+ ],
+ driver = "local-task",
+ target_backend = "llvm-cpu",
+)
diff --git a/tests/e2e/regression/CMakeLists.txt b/tests/e2e/regression/CMakeLists.txt
index 0c06be4..df1a0d4 100644
--- a/tests/e2e/regression/CMakeLists.txt
+++ b/tests/e2e/regression/CMakeLists.txt
@@ -173,4 +173,18 @@
"-iree-flow-demote-f64-to-f32=false"
)
+iree_check_single_backend_test_suite(
+ NAME
+ aggressive_fusion_test
+ SRCS
+ "softmax.mlir"
+ TARGET_BACKEND
+ "llvm-cpu"
+ DRIVER
+ "local-task"
+ COMPILER_FLAGS
+ "--iree-flow-enable-aggressive-fusion"
+ "--iree-llvmcpu-fail-on-out-of-bounds-stack-allocation=false"
+)
+
### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###