[GPU] Do not treat pad as a tilable producer for operand promotion (#18918) PadOp doesn't have an implementation for deriving thread configuration from derived_thread_config, so ignore promoting it until an implementation is added.

commit: 437611752055a0f3af168a8d20f7e35979927460 [log] [tgz]
author: Kunwar Grover <groverkss@gmail.com> Tue Oct 29 16:59:36 2024 +0000
committer: GitHub <noreply@github.com> Tue Oct 29 16:59:36 2024 +0000
tree: d2b59fc4e68a1668a45f92d668e54e862ae89360
parent: 3cf5b65f736ce50c9890190b80e6343c0b929d56 [diff]
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPromoteMatmulOperands.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPromoteMatmulOperands.cpp
index dd498fa..5e50a95 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPromoteMatmulOperands.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPromoteMatmulOperands.cpp

@@ -53,9 +53,15 @@
         return;
       }
     }
-    setLoweringConfig(producer, IREE::GPU::DerivedThreadConfigAttr::get(
-                                    builder.getContext()));
-    return;
+
+    // We only support thread tile size derivation of linalgOp and Im2colOp for
+    // now.
+    if (isa<linalg::LinalgOp, IREE::LinalgExt::Im2colOp>(
+            producer.getOperation())) {
+      setLoweringConfig(producer, IREE::GPU::DerivedThreadConfigAttr::get(
+                                      builder.getContext()));
+      return;
+    }
   }
 
   auto tensorType = dyn_cast<RankedTensorType>(operand.getType());

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_promote_matmul_operands.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_promote_matmul_operands.mlir
index f05cf7b..643b12c 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_promote_matmul_operands.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_promote_matmul_operands.mlir

@@ -82,3 +82,27 @@
 // CHECK-LABEL: func.func @no_promote_fill
 //   CHECK-NOT:   iree_gpu.derived_thread_config
 //       CHECK: return
+
+// -----
+
+#lowering_config = #iree_gpu.lowering_config<{promote_operands = [0]}>
+
+func.func @promote_pad(%a : tensor<4x127xf32>, %b: tensor<128x128xf32>) -> tensor<4x128xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %empty = tensor.empty() : tensor<4x128xf32>
+  %fill = linalg.fill ins(%cst : f32) outs(%empty : tensor<4x128xf32>) -> tensor<4x128xf32>
+  %padded = tensor.pad %a low[0, 0] high[0, 1] {
+  ^bb0(%arg0: index, %arg1: index):
+    tensor.yield %cst : f32
+  } : tensor<4x127xf32> to tensor<4x128xf32>
+  %mm = linalg.matmul {lowering_config = #lowering_config}
+    ins(%padded, %b : tensor<4x128xf32>, tensor<128x128xf32>) outs(%fill : tensor<4x128xf32>) -> tensor<4x128xf32>
+  return %mm : tensor<4x128xf32>
+}
+
+// Verify that pad is promoted with linalg.copy
+// CHECK-LABEL: func.func @promote_pad
+//   CHECK:   tensor.pad
+//   CHECK:   linalg.copy
+// CHECK-SAME: derived_thread_config
+//       CHECK: return
commit	437611752055a0f3af168a8d20f7e35979927460	[log] [tgz]
author	Kunwar Grover <groverkss@gmail.com>	Tue Oct 29 16:59:36 2024 +0000
committer	GitHub <noreply@github.com>	Tue Oct 29 16:59:36 2024 +0000
tree	d2b59fc4e68a1668a45f92d668e54e862ae89360
parent	3cf5b65f736ce50c9890190b80e6343c0b929d56 [diff]