[Codegen] Rewrite test so LLVMGPUWarpReduction is not used (#21770) There is some logic (introduced in https://github.com/iree-org/iree/pull/20310) that relies on finding a ` iree_tensor_ext.dispatch.tensor.store` op to trigger the LLVMGPUVectorDistribute configuration to kick in. If no such 'beacon' op is found, we currently fall through to LLVMGPUWarpReduction. This PR just rejiggles the test so that the IR is the expected state (the state which it will be in when the full pipeline is run) so that that we're on the right pass to use LLVMGPUVectorDistribute. I can follow this PR up with a refactoring of the logic to make it more robust Signed-off-by: James Newling <james.newling@gmail.com>

commit: d24916150d4f0d571efce6b702ddfce5a77df929 [log] [tgz]
author: James Newling <james.newling@gmail.com> Tue Sep 02 09:34:01 2025 -0700
committer: GitHub <noreply@github.com> Tue Sep 02 09:34:01 2025 -0700
tree: 59842ee0f8c4a100f71ce7ed4af22eb2d67f120c
parent: f0e04ae7dfafd203fb3f88b29dc228b51a06a8a4 [diff]
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_tile_and_fuse.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_tile_and_fuse.mlir
index d858ea7..ec81464 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_tile_and_fuse.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_tile_and_fuse.mlir

@@ -216,19 +216,13 @@
 
 // -----
 
-module {
-  func.func @matmul_dynamic_dim(%11: tensor<?x256xf16>, %12: tensor<256x256xf16>) -> tensor<?x256xf32> {
-    %c0 = arith.constant 0 : index
-    %cst = arith.constant 0.000000e+00 : f32
-    %8 = tensor.dim %11, %c0 : tensor<?x256xf16>
-    %13 = tensor.empty(%8) : tensor<?x256xf32>
-    %14 = linalg.fill ins(%cst : f32) outs(%13 : tensor<?x256xf32>) -> tensor<?x256xf32>
-    %15 = linalg.matmul ins(%11, %12 : tensor<?x256xf16>, tensor<256x256xf16>) outs(%14 : tensor<?x256xf32>) -> tensor<?x256xf32>
-    return %15 : tensor<?x256xf32>
-  }
+func.func @matmul_dynamic_M(%arg0: tensor<?x256xf32>, %arg1: tensor<256x256xf32>, %arg2: tensor<?x256xf32>, %arg3: !iree_tensor_ext.dispatch.tensor<readwrite:tensor<?x256xf32>>, %arg4 : index) {
+  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x256xf32>, tensor<256x256xf32>) outs(%arg2 : tensor<?x256xf32>) -> tensor<?x256xf32>
+  iree_tensor_ext.dispatch.tensor.store %0, %arg3, offsets = [0, 0], sizes = [%arg4, 256], strides = [1, 1] : tensor<?x256xf32> -> !iree_tensor_ext.dispatch.tensor<readwrite:tensor<?x256xf32>>{%arg4}
+  return
 }
 
-// CHECK-LABEL: func.func @matmul_dynamic_dim
+// CHECK-LABEL: func.func @matmul_dynamic_M
 //  CHECK-SAME:   #iree_codegen.translation_info<pipeline = LLVMGPUTileAndFuse workgroup_size = [64, 1, 1] subgroup_size = 64>
 //       CHECK:   linalg.matmul {{.*}}lowering_config = #iree_gpu.lowering_config
 //  CHECK-SAME:     promote_operands = [0, 1]
@@ -236,7 +230,7 @@
 //  CHECK-SAME:     thread = [1, 4, 0]
 //  CHECK-SAME:     workgroup = [1, 256, 0]
 
-//        LATE:  LLVMGPUWarpReduction
+//        LATE:  LLVMGPUVectorDistribute
 
 // -----
commit	d24916150d4f0d571efce6b702ddfce5a77df929	[log] [tgz]
author	James Newling <james.newling@gmail.com>	Tue Sep 02 09:34:01 2025 -0700
committer	GitHub <noreply@github.com>	Tue Sep 02 09:34:01 2025 -0700
tree	59842ee0f8c4a100f71ce7ed4af22eb2d67f120c
parent	f0e04ae7dfafd203fb3f88b29dc228b51a06a8a4 [diff]