[GPU] Do not treat pad as a tilable producer for operand promotion (#18918)
PadOp doesn't have an implementation for deriving thread configuration
from derived_thread_config, so ignore promoting it until an
implementation is added.
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPromoteMatmulOperands.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPromoteMatmulOperands.cpp
index dd498fa..5e50a95 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPromoteMatmulOperands.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPromoteMatmulOperands.cpp
@@ -53,9 +53,15 @@
return;
}
}
- setLoweringConfig(producer, IREE::GPU::DerivedThreadConfigAttr::get(
- builder.getContext()));
- return;
+
+ // We only support thread tile size derivation of linalgOp and Im2colOp for
+ // now.
+ if (isa<linalg::LinalgOp, IREE::LinalgExt::Im2colOp>(
+ producer.getOperation())) {
+ setLoweringConfig(producer, IREE::GPU::DerivedThreadConfigAttr::get(
+ builder.getContext()));
+ return;
+ }
}
auto tensorType = dyn_cast<RankedTensorType>(operand.getType());
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_promote_matmul_operands.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_promote_matmul_operands.mlir
index f05cf7b..643b12c 100644
--- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_promote_matmul_operands.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_promote_matmul_operands.mlir
@@ -82,3 +82,27 @@
// CHECK-LABEL: func.func @no_promote_fill
// CHECK-NOT: iree_gpu.derived_thread_config
// CHECK: return
+
+// -----
+
+#lowering_config = #iree_gpu.lowering_config<{promote_operands = [0]}>
+
+func.func @promote_pad(%a : tensor<4x127xf32>, %b: tensor<128x128xf32>) -> tensor<4x128xf32> {
+ %cst = arith.constant 0.000000e+00 : f32
+ %empty = tensor.empty() : tensor<4x128xf32>
+ %fill = linalg.fill ins(%cst : f32) outs(%empty : tensor<4x128xf32>) -> tensor<4x128xf32>
+ %padded = tensor.pad %a low[0, 0] high[0, 1] {
+ ^bb0(%arg0: index, %arg1: index):
+ tensor.yield %cst : f32
+ } : tensor<4x127xf32> to tensor<4x128xf32>
+ %mm = linalg.matmul {lowering_config = #lowering_config}
+ ins(%padded, %b : tensor<4x128xf32>, tensor<128x128xf32>) outs(%fill : tensor<4x128xf32>) -> tensor<4x128xf32>
+ return %mm : tensor<4x128xf32>
+}
+
+// Verify that pad is promoted with linalg.copy
+// CHECK-LABEL: func.func @promote_pad
+// CHECK: tensor.pad
+// CHECK: linalg.copy
+// CHECK-SAME: derived_thread_config
+// CHECK: return