Reduce default max tile size for workgroup distribution for Generic ops. (#9478)
Reducing the default max tile size used for workgroup distribution for
generic ops will reduce the amount of stack allocation
needed. (`argmax` computation that is lowered into `generic` op is the
only place we really need an allocation at this point).
Issue #9469
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
index 245c7d7..3ef85c5 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
@@ -798,7 +798,9 @@
SmallVector<int64_t> minTileSizes =
getMinTilingSizesForEachDim(entryPointFn, genericOp);
- SmallVector<int64_t> maxTileSizes(numLoops, defaultWorkgroupTileSize);
+ // For generic ops we'll use the default divided by 2 to control the stack
+ // allocation limit See #9469 for example.
+ SmallVector<int64_t> maxTileSizes(numLoops, defaultWorkgroupTileSize / 2);
if (llvm::all_of(minTileSizes, [](int64_t vs) { return vs == 1; })) {
// Nothing to vectorize just lower to loops.
return success();
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_x86_64_launch_configuration.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_x86_64_launch_configuration.mlir
index 2066c8e..d5b7d40 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_x86_64_launch_configuration.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/materialize_x86_64_launch_configuration.mlir
@@ -217,7 +217,7 @@
}
}
}
-// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64], [1, 1], [0, 0]]>
+// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[32, 32], [1, 1], [0, 0]]>
// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert>
// CHECK: hal.executable.export public @add
// CHECK-SAME: translation_info = #[[TRANSLATION]]
@@ -275,7 +275,7 @@
}
}
-// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 64, 64, 64], [1, 1, 1, 1], [0, 0, 0, 0]]>
+// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 32, 32, 32], [1, 1, 1, 1], [0, 0, 0, 0]]>
// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert>
// CHECK: hal.executable.export public @add4D
// CHECK-SAME: translation_info = #[[TRANSLATION]]
@@ -316,7 +316,7 @@
}
}
}
-// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 8, 16, 64], [1, 1, 1, 4], [0, 0, 0, 0]]>
+// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 8, 16, 32], [1, 1, 1, 4], [0, 0, 0, 0]]>
// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert>
// CHECK: hal.executable.export public @add_static
// CHECK-SAME: translation_info = #[[TRANSLATION]]
@@ -408,7 +408,7 @@
}
}
-// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64], [1, 1], [0, 0]{{\]}}>
+// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[32, 32], [1, 1], [0, 0]{{\]}}>
// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUBufferOpsTileAndVectorize>
// CHECK: hal.executable.export public @copy_op_dynamic
// CHECK-SAME: translation_info = #[[TRANSLATION]]
@@ -544,7 +544,7 @@
}
}
}
-// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64, 0], [1, 1, 0], [0, 0, 1]{{\]}}>
+// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[32, 32, 0], [1, 1, 0], [0, 0, 1]{{\]}}>
// CHECK: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert>
// CHECK: hal.executable.export public @outs_fusion_fn
// CHECK-SAME: translation_info = #[[TRANSLATION]]
@@ -1088,7 +1088,7 @@
}
}
}
-// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 0, 0, 0, 64, 64, 0, 64], [0, 1, 0, 0, 1, 1, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0]{{\]}}>
+// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 0, 0, 0, 32, 32, 0, 32], [0, 1, 0, 0, 1, 1, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0]{{\]}}>
// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<CPUDoubleTilingExpert>
// CHECK: hal.executable.export public @generic_unit_dims_dynamic
// CHECK-SAME: translation_info = #[[TRANSLATION]]