[Codegen] Make amdgpu_distribute_vectors return a handle (#17239)
Now that it consumes the target handle, make it return the target
function for reuse. This makes it possible to use this op when the
target function to a named sequence is a function.
diff --git a/.github/workflows/pkgci_regression_test_amdgpu_rocm.yml b/.github/workflows/pkgci_regression_test_amdgpu_rocm.yml
index 1fa6e09..87719c5 100644
--- a/.github/workflows/pkgci_regression_test_amdgpu_rocm.yml
+++ b/.github/workflows/pkgci_regression_test_amdgpu_rocm.yml
@@ -60,7 +60,7 @@
uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
with:
repository: nod-ai/SHARK-TestSuite
- ref: 816a8af832bd8518cf966e92cf5c2929d5c11a0f
+ ref: 072e8b7f3140b31669257e6042dc1f02f2a4e2cc
path: SHARK-TestSuite
submodules: false
- name: Installing external TestSuite Python requirements
@@ -123,7 +123,7 @@
uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
with:
repository: nod-ai/SHARK-TestSuite
- ref: 320edabdfba7f5cf46e5cd88569fca52308d8988
+ ref: 072e8b7f3140b31669257e6042dc1f02f2a4e2cc
path: SHARK-TestSuite
submodules: false
lfs: true
diff --git a/.github/workflows/pkgci_regression_test_amdgpu_vulkan.yml b/.github/workflows/pkgci_regression_test_amdgpu_vulkan.yml
index 9e8ca70..f0dbfd8 100644
--- a/.github/workflows/pkgci_regression_test_amdgpu_vulkan.yml
+++ b/.github/workflows/pkgci_regression_test_amdgpu_vulkan.yml
@@ -57,7 +57,7 @@
uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
with:
repository: nod-ai/SHARK-TestSuite
- ref: 816a8af832bd8518cf966e92cf5c2929d5c11a0f
+ ref: 072e8b7f3140b31669257e6042dc1f02f2a4e2cc
path: SHARK-TestSuite
submodules: false
- name: Installing external TestSuite Python requirements
@@ -106,7 +106,7 @@
uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
with:
repository: nod-ai/SHARK-TestSuite
- ref: 816a8af832bd8518cf966e92cf5c2929d5c11a0f
+ ref: 072e8b7f3140b31669257e6042dc1f02f2a4e2cc
path: SHARK-TestSuite
submodules: false
lfs: true
diff --git a/.github/workflows/pkgci_regression_test_cpu.yml b/.github/workflows/pkgci_regression_test_cpu.yml
index e71d277..95daebc 100644
--- a/.github/workflows/pkgci_regression_test_cpu.yml
+++ b/.github/workflows/pkgci_regression_test_cpu.yml
@@ -57,7 +57,7 @@
uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
with:
repository: nod-ai/SHARK-TestSuite
- ref: 816a8af832bd8518cf966e92cf5c2929d5c11a0f
+ ref: 072e8b7f3140b31669257e6042dc1f02f2a4e2cc
path: SHARK-TestSuite
submodules: false
lfs: true
@@ -122,7 +122,7 @@
uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
with:
repository: nod-ai/SHARK-TestSuite
- ref: 816a8af832bd8518cf966e92cf5c2929d5c11a0f
+ ref: 072e8b7f3140b31669257e6042dc1f02f2a4e2cc
path: SHARK-TestSuite
submodules: false
lfs: true
diff --git a/.github/workflows/pkgci_regression_test_nvidiagpu_cuda.yml b/.github/workflows/pkgci_regression_test_nvidiagpu_cuda.yml
index 92a0b7b..2eeed0f 100644
--- a/.github/workflows/pkgci_regression_test_nvidiagpu_cuda.yml
+++ b/.github/workflows/pkgci_regression_test_nvidiagpu_cuda.yml
@@ -57,7 +57,7 @@
uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
with:
repository: nod-ai/SHARK-TestSuite
- ref: 816a8af832bd8518cf966e92cf5c2929d5c11a0f
+ ref: 072e8b7f3140b31669257e6042dc1f02f2a4e2cc
path: SHARK-TestSuite
submodules: false
- name: Installing external TestSuite Python requirements
diff --git a/.github/workflows/pkgci_regression_test_nvidiagpu_vulkan.yml b/.github/workflows/pkgci_regression_test_nvidiagpu_vulkan.yml
index 4bbe3d3..aba5e91 100644
--- a/.github/workflows/pkgci_regression_test_nvidiagpu_vulkan.yml
+++ b/.github/workflows/pkgci_regression_test_nvidiagpu_vulkan.yml
@@ -57,7 +57,7 @@
uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
with:
repository: nod-ai/SHARK-TestSuite
- ref: 816a8af832bd8518cf966e92cf5c2929d5c11a0f
+ ref: 072e8b7f3140b31669257e6042dc1f02f2a4e2cc
path: SHARK-TestSuite
submodules: false
- name: Installing external TestSuite Python requirements
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.cpp
index e73d935..54ef39a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensions.cpp
@@ -1486,12 +1486,17 @@
if (failed(distributeVectorOps(target, patterns, options))) {
return emitDefaultSilenceableFailure(target);
}
+ // TODO: The consumption of the target handle is only required because the
+ // transform dialect interpreter will crash without it. This op should not
+ // need to invalidate the handle.
+ results.push_back(target);
return DiagnosedSilenceableFailure::success();
}
void transform_dialect::AMDGPUDistributeVectorsOp::getEffects(
SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
transform::consumesHandle(getTarget(), effects);
+ transform::producesHandle(getResult(), effects);
transform::modifiesPayload(effects);
}
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensionsOps.td b/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensionsOps.td
index c0fc457..7cd1454 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensionsOps.td
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/TransformExtensions/LLVMGPUExtensionsOps.td
@@ -741,11 +741,11 @@
let arguments = (ins TransformHandleTypeInterface:$target,
UnitAttr:$test_conversion);
- let results = (outs);
+ let results = (outs TransformHandleTypeInterface:$result);
let assemblyFormat = [{
$target (`test_conversion` $test_conversion^)?
- attr-dict `:` type($target)
+ attr-dict `:` functional-type(operands, results)
}];
let cppNamespace = "mlir::iree_compiler::IREE::transform_dialect";
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/amdgpu_contraction_distribution.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/amdgpu_contraction_distribution.mlir
index 8710f76..b46a8d0 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/amdgpu_contraction_distribution.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/amdgpu_contraction_distribution.mlir
@@ -8,7 +8,7 @@
#map2 = affine_map<(d0, d1, d2) -> (d0, d2)>
#map3 = affine_map<(d0, d1, d2) -> (d1, d0)>
-// A: vector<16x16>, layout = layoutA
+// A: vector<16x16>, layout = layoutA
#row_layout = #iree_vector_ext.per_dim_layout<[BATCHX, LANEX], [1, 16]>
#col_layout = #iree_vector_ext.per_dim_layout<[BATCHY, LANEY, VECTORX], [1, 4, 4]>
#layout_a = #iree_vector_ext.layout<#row_layout, #col_layout>
@@ -43,7 +43,7 @@
}
transform.named_sequence @__transform_main(%variant_op: !transform.any_op {transform.readonly}) {
%top_level_func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
- transform.iree.amdgpu_distribute_vectors %top_level_func test_conversion : !transform.any_op
+ transform.iree.amdgpu_distribute_vectors %top_level_func test_conversion : (!transform.any_op) -> !transform.any_op
transform.yield
}
}
@@ -84,7 +84,7 @@
}
transform.named_sequence @__transform_main(%variant_op: !transform.any_op {transform.readonly}) {
%top_level_func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
- transform.iree.amdgpu_distribute_vectors %top_level_func test_conversion : !transform.any_op
+ transform.iree.amdgpu_distribute_vectors %top_level_func test_conversion : (!transform.any_op) -> !transform.any_op
transform.yield
}
}
@@ -132,7 +132,7 @@
}
transform.named_sequence @__transform_main(%variant_op: !transform.any_op {transform.readonly}) {
%top_level_func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
- transform.iree.amdgpu_distribute_vectors %top_level_func test_conversion : !transform.any_op
+ transform.iree.amdgpu_distribute_vectors %top_level_func test_conversion : (!transform.any_op) -> !transform.any_op
transform.yield
}
}
@@ -179,7 +179,7 @@
}
transform.named_sequence @__transform_main(%variant_op: !transform.any_op {transform.readonly}) {
%top_level_func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
- transform.iree.amdgpu_distribute_vectors %top_level_func test_conversion : !transform.any_op
+ transform.iree.amdgpu_distribute_vectors %top_level_func test_conversion : (!transform.any_op) -> !transform.any_op
transform.yield
}
}
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention_mfma_transform_spec.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention_mfma_transform_spec.mlir
index bb0ce64..ec99893 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention_mfma_transform_spec.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/attention_mfma_transform_spec.mlir
@@ -170,9 +170,8 @@
transform.iree.set_contraction_layout_attributes %contract2, %layout16x16x16 : !transform.any_op, !transform.any_param
%distribute_func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
- transform.iree.amdgpu_distribute_vectors %distribute_func test_conversion : !transform.any_op
+ %distribute_func_2 = transform.iree.amdgpu_distribute_vectors %distribute_func test_conversion : (!transform.any_op) -> !transform.any_op
- %distribute_func_2 = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
transform.apply_patterns to %distribute_func_2 {
transform.apply_patterns.canonicalization
} : !transform.any_op