Integrate LLVM at llvm/llvm-project@7d3a181c (#12047)
Co-authored-by: Diego Caballero <diegocaballero@google.com>
diff --git a/compiler/src/iree/compiler/API2/api_exports.c b/compiler/src/iree/compiler/API2/api_exports.c
index 21ad67f..7fd6604 100644
--- a/compiler/src/iree/compiler/API2/api_exports.c
+++ b/compiler/src/iree/compiler/API2/api_exports.c
@@ -144,6 +144,7 @@
extern void mlirAttributeIsAFloat();
extern void mlirAttributeIsAInteger();
extern void mlirAttributeIsAIntegerSet();
+extern void mlirAttributeIsALocation();
extern void mlirAttributeIsAOpaque();
extern void mlirAttributeIsASparseElements();
extern void mlirAttributeIsAStridedLayout();
@@ -355,7 +356,9 @@
extern void mlirLocationCallSiteGet();
extern void mlirLocationEqual();
extern void mlirLocationFileLineColGet();
+extern void mlirLocationFromAttribute();
extern void mlirLocationFusedGet();
+extern void mlirLocationGetAttribute();
extern void mlirLocationGetContext();
extern void mlirLocationNameGet();
extern void mlirLocationPrint();
@@ -719,6 +722,7 @@
x += (uintptr_t)&mlirAttributeIsAFloat;
x += (uintptr_t)&mlirAttributeIsAInteger;
x += (uintptr_t)&mlirAttributeIsAIntegerSet;
+ x += (uintptr_t)&mlirAttributeIsALocation;
x += (uintptr_t)&mlirAttributeIsAOpaque;
x += (uintptr_t)&mlirAttributeIsASparseElements;
x += (uintptr_t)&mlirAttributeIsAStridedLayout;
@@ -930,7 +934,9 @@
x += (uintptr_t)&mlirLocationCallSiteGet;
x += (uintptr_t)&mlirLocationEqual;
x += (uintptr_t)&mlirLocationFileLineColGet;
+ x += (uintptr_t)&mlirLocationFromAttribute;
x += (uintptr_t)&mlirLocationFusedGet;
+ x += (uintptr_t)&mlirLocationGetAttribute;
x += (uintptr_t)&mlirLocationGetContext;
x += (uintptr_t)&mlirLocationNameGet;
x += (uintptr_t)&mlirLocationPrint;
diff --git a/compiler/src/iree/compiler/API2/api_exports.def b/compiler/src/iree/compiler/API2/api_exports.def
index 4180261..e3316c9 100644
--- a/compiler/src/iree/compiler/API2/api_exports.def
+++ b/compiler/src/iree/compiler/API2/api_exports.def
@@ -136,6 +136,7 @@
mlirAttributeIsAFloat
mlirAttributeIsAInteger
mlirAttributeIsAIntegerSet
+ mlirAttributeIsALocation
mlirAttributeIsAOpaque
mlirAttributeIsASparseElements
mlirAttributeIsAStridedLayout
@@ -347,7 +348,9 @@
mlirLocationCallSiteGet
mlirLocationEqual
mlirLocationFileLineColGet
+ mlirLocationFromAttribute
mlirLocationFusedGet
+ mlirLocationGetAttribute
mlirLocationGetContext
mlirLocationNameGet
mlirLocationPrint
diff --git a/compiler/src/iree/compiler/API2/api_exports.ld b/compiler/src/iree/compiler/API2/api_exports.ld
index 1629cea..435d025 100644
--- a/compiler/src/iree/compiler/API2/api_exports.ld
+++ b/compiler/src/iree/compiler/API2/api_exports.ld
@@ -137,6 +137,7 @@
mlirAttributeIsAFloat;
mlirAttributeIsAInteger;
mlirAttributeIsAIntegerSet;
+ mlirAttributeIsALocation;
mlirAttributeIsAOpaque;
mlirAttributeIsASparseElements;
mlirAttributeIsAStridedLayout;
@@ -348,7 +349,9 @@
mlirLocationCallSiteGet;
mlirLocationEqual;
mlirLocationFileLineColGet;
+ mlirLocationFromAttribute;
mlirLocationFusedGet;
+ mlirLocationGetAttribute;
mlirLocationGetContext;
mlirLocationNameGet;
mlirLocationPrint;
diff --git a/compiler/src/iree/compiler/API2/api_exports.macos.lst b/compiler/src/iree/compiler/API2/api_exports.macos.lst
index 4d05006..b9786ad 100644
--- a/compiler/src/iree/compiler/API2/api_exports.macos.lst
+++ b/compiler/src/iree/compiler/API2/api_exports.macos.lst
@@ -135,6 +135,7 @@
_mlirAttributeIsAFloat
_mlirAttributeIsAInteger
_mlirAttributeIsAIntegerSet
+_mlirAttributeIsALocation
_mlirAttributeIsAOpaque
_mlirAttributeIsASparseElements
_mlirAttributeIsAStridedLayout
@@ -346,7 +347,9 @@
_mlirLocationCallSiteGet
_mlirLocationEqual
_mlirLocationFileLineColGet
+_mlirLocationFromAttribute
_mlirLocationFusedGet
+_mlirLocationGetAttribute
_mlirLocationGetContext
_mlirLocationNameGet
_mlirLocationPrint
diff --git a/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp b/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp
index 14f7d9b..8e39dd0 100644
--- a/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/FlattenMemRefSubspanPass.cpp
@@ -849,7 +849,7 @@
// Fold subviews if any new oportuinity has been created.
RewritePatternSet foldSubviewPatterns(context);
memref::populateFoldMemRefAliasOpPatterns(foldSubviewPatterns);
- if (failed(applyPatternsAndFoldGreedily(getOperation()->getRegions(),
+ if (failed(applyPatternsAndFoldGreedily(getOperation(),
std::move(foldSubviewPatterns)))) {
return signalPassFailure();
}
diff --git a/compiler/src/iree/compiler/Codegen/Common/RematerializeParallelOps.cpp b/compiler/src/iree/compiler/Codegen/Common/RematerializeParallelOps.cpp
index d140df4..5601a02 100644
--- a/compiler/src/iree/compiler/Codegen/Common/RematerializeParallelOps.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/RematerializeParallelOps.cpp
@@ -28,15 +28,15 @@
for (OpOperand& opOperand : genericOp->getOpOperands()) {
if (!linalg::areElementwiseOpsFusable(&opOperand)) continue;
- FailureOr<Operation*> fusedOp =
+ FailureOr<linalg::ElementwiseOpFusionResult> fusedOp =
linalg::fuseElementwiseOps(rewriter, &opOperand);
if (succeeded(fusedOp)) {
// Forward lowering config.
if (auto loweringAttr = getLoweringConfig(genericOp)) {
- setLoweringConfig(fusedOp.value(), loweringAttr);
+ setLoweringConfig(fusedOp.value().fusedOp, loweringAttr);
}
- auto replacements =
- fusedOp.value()->getResults().take_back(genericOp.getNumResults());
+ auto replacements = fusedOp.value().fusedOp->getResults().take_back(
+ genericOp.getNumResults());
rewriter.replaceOp(genericOp, replacements);
return success();
}
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/affinemin_canonicalization.mlir b/compiler/src/iree/compiler/Codegen/Common/test/affinemin_canonicalization.mlir
index 61fa94e..63ecadb 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/affinemin_canonicalization.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/affinemin_canonicalization.mlir
@@ -8,12 +8,11 @@
%0 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%id1]
%1 = affine.apply affine_map<()[s0] -> (s0 * 32)>()[%count1]
+ // CHECK-DAG: %[[C32:.*]] = arith.constant 32 : index
+ // CHECK-DAG: %[[C4:.*]] = arith.constant 4 : i64
// CHECK: scf.for
- // CHECK: %[[C32:.*]] = arith.constant 32 : index
// CHECK: scf.for %{{.*}} = %{{.*}} to %[[C32]]
- // CHECK-NEXT: %[[C4:.*]] = arith.constant 4 : index
- // CHECK-NEXT: %[[C4I64:.*]] = arith.index_cast %[[C4:.*]]
- // CHECK-NEXT: memref.store %[[C4I64]], %{{.*}}[] : memref<i64>
+ // CHECK-NEXT: memref.store %[[C4]], %{{.*}}[] : memref<i64>
scf.for %arg0 = %0 to %c1024 step %1 {
%2 = affine.min affine_map<(d0) -> (32, -d0 + 1024)>(%arg0)
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%id2]
@@ -31,9 +30,7 @@
// CHECK: scf.for
// CHECK: %[[MIN:.*]] = affine.min
// CHECK: scf.for %{{.*}} = %{{.*}} to %[[MIN]]
- // CHECK-NEXT: %[[C4:.*]] = arith.constant 4 : index
- // CHECK-NEXT: %[[C4I64:.*]] = arith.index_cast %[[C4:.*]]
- // CHECK-NEXT: memref.store %[[C4I64]], %{{.*}}[] : memref<i64>
+ // CHECK-NEXT: memref.store %[[C4]], %{{.*}}[] : memref<i64>
scf.for %arg0 = %0 to %c1020 step %1 {
%2 = affine.min affine_map<(d0) -> (32, -d0 + 1020)>(%arg0)
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%id2]
@@ -49,9 +46,7 @@
// CHECK: scf.for
// CHECK: %[[MIN:.*]] = affine.min
// CHECK: scf.parallel {{.*}} to (%[[MIN]])
- // CHECK-NEXT: %[[C4:.*]] = arith.constant 4 : index
- // CHECK-NEXT: %[[C4I64:.*]] = arith.index_cast %[[C4:.*]]
- // CHECK-NEXT: memref.store %[[C4I64]], %{{.*}}[] : memref<i64>
+ // CHECK-NEXT: memref.store %[[C4]], %{{.*}}[] : memref<i64>
scf.for %arg0 = %0 to %c1020 step %1 {
%2 = affine.min affine_map<(d0) -> (32, -d0 + 1020)>(%arg0)
%3 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%id2]
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/reductions_codegen_spec.mlir b/compiler/src/iree/compiler/Codegen/Common/test/reductions_codegen_spec.mlir
index 475b2d2..b63d491 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/reductions_codegen_spec.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/reductions_codegen_spec.mlir
@@ -17,7 +17,7 @@
%grid_loop, %outer_tiled = transform.structured.tile_to_foreach_thread_op %fusion_root_1 tile_sizes [1]
( mapping = [#gpu.block<x>] )
- %func = transform.structured.match ops{["func.func"]} in %arg0
+ %func = transform.structured.match ops{["func.func"]} in %arg0 : (!pdl.operation) -> !pdl.operation
%func_1 = transform.iree.apply_patterns %func { bubble_collapse_expand }
// Excessively eager canonicalization results in `fill`s being "fused" due to
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/transform_buffer_opt.mlir b/compiler/src/iree/compiler/Codegen/Common/test/transform_buffer_opt.mlir
index 7b8b3b3..ef59dd5 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/transform_buffer_opt.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/transform_buffer_opt.mlir
@@ -19,7 +19,7 @@
^bb0(%arg0: !pdl.operation):
transform.sequence %arg0 : !pdl.operation failures(propagate) {
^bb1(%arg1: !pdl.operation):
- %0 = transform.structured.match ops{["func.func"]} in %arg1
+ %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!pdl.operation) -> !pdl.operation
%1 = transform.iree.apply_buffer_optimizations %0
}
}
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/transform_dialect_apply_pattern_op.mlir b/compiler/src/iree/compiler/Codegen/Common/test/transform_dialect_apply_pattern_op.mlir
index eb27f03..eda27bb 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/transform_dialect_apply_pattern_op.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/transform_dialect_apply_pattern_op.mlir
@@ -12,7 +12,7 @@
^bb0(%arg0: !pdl.operation):
transform.sequence %arg0 : !pdl.operation failures(propagate) {
^bb1(%arg1: !pdl.operation):
- %0 = transform.structured.match ops{["func.func"]} in %arg1
+ %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!pdl.operation) -> !pdl.operation
transform.iree.apply_patterns %0 { canonicalization }
}
}
@@ -54,7 +54,7 @@
transform.sequence failures(propagate) {
^bb1(%arg1: !pdl.operation):
- %0 = transform.structured.match ops{["scf.foreach_thread"]} in %arg1
+ %0 = transform.structured.match ops{["scf.foreach_thread"]} in %arg1 : (!pdl.operation) -> !pdl.operation
%1 = transform.cast %0 : !pdl.operation to !transform.op<"scf.foreach_thread">
transform.iree.share_foreach_thread_operands %1 share_operands = [0] : (!transform.op<"scf.foreach_thread">) -> !transform.op<"scf.foreach_thread">
}
@@ -86,6 +86,6 @@
transform.sequence failures(propagate) {
^bb1(%arg1: !pdl.operation):
- %0 = transform.structured.match ops{["func.func"]} in %arg1
+ %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!pdl.operation) -> !pdl.operation
transform.iree.apply_patterns %0 { bubble_collapse_expand }
}
diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp b/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp
index 53d284c..c9d7060 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/BufferizationInterfaces.cpp
@@ -172,8 +172,8 @@
return false;
}
- SmallVector<OpResult> getAliasingOpResult(Operation *op, OpOperand &opOperand,
- const AnalysisState &state) const {
+ bufferization::AliasingOpResultList getAliasingOpResults(
+ Operation *op, OpOperand &opOperand, const AnalysisState &state) const {
return {};
}
@@ -264,7 +264,7 @@
SmallVector<Value> newOutputBuffers;
for (OpResult opResult : op->getOpResults()) {
SmallVector<OpOperand *> aliasingOpOperands =
- analysisState.getAliasingOpOperand(opResult);
+ analysisState.getAliasingOpOperands(opResult);
assert(aliasingOpOperands.size() == 1 && "expected 1 OpOperand");
FailureOr<Value> resultBuffer =
getBuffer(rewriter, aliasingOpOperands.front()->get(), options);
@@ -320,10 +320,10 @@
const AnalysisState &state) const {
// Operand is written to if it has an aliasing OpResult.
auto bufferizableOp = cast<BufferizableOpInterface>(op);
- return !bufferizableOp.getAliasingOpResult(opOperand, state).empty();
+ return !bufferizableOp.getAliasingOpResults(opOperand, state).empty();
}
- SmallVector<OpOperand *> getAliasingOpOperand(
+ bufferization::AliasingOpOperandList getAliasingOpOperands(
Operation *op, OpResult opResult, const AnalysisState &state) const {
auto linalgExtOp = cast<IREE::LinalgExt::LinalgExtOp>(op);
@@ -331,8 +331,8 @@
return {linalgExtOp.getOutputOperand(opResult.getResultNumber())};
}
- SmallVector<OpResult> getAliasingOpResult(Operation *op, OpOperand &opOperand,
- const AnalysisState &state) const {
+ bufferization::AliasingOpResultList getAliasingOpResults(
+ Operation *op, OpOperand &opOperand, const AnalysisState &state) const {
auto dspOp = cast<DestinationStyleOpInterface>(op);
// The i-th "out" tensor may alias with the i-th OpResult.
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
index bfdecd1..0cbac1e 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/ConvertToLLVM.cpp
@@ -473,7 +473,7 @@
populateComplexToLLVMConversionPatterns(typeConverter, patterns);
populateMathToLLVMConversionPatterns(typeConverter, patterns);
memref::populateExpandStridedMetadataPatterns(patterns);
- populateMemRefToLLVMConversionPatterns(typeConverter, patterns);
+ populateFinalizeMemRefToLLVMConversionPatterns(typeConverter, patterns);
populateFuncToLLVMConversionPatterns(typeConverter, patterns);
arith::populateArithToLLVMConversionPatterns(typeConverter, patterns);
populateVectorToSCFConversionPatterns(patterns);
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir
index b94a17f..bf02a3a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir
@@ -37,6 +37,6 @@
^bb1(%variant_op: !pdl.operation):
%variant_op_2 = transform.iree.eliminate_empty_tensors %variant_op
%variant_op_3 = transform.iree.bufferize %variant_op_2
- %func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
transform.iree.erase_hal_descriptor_type_from_memref %func
}
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToNVVM.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToNVVM.cpp
index 0606edf..4bd1f00 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToNVVM.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToNVVM.cpp
@@ -112,7 +112,7 @@
populateLLVMConversionPatterns(&getContext(), llvmPatterns, converter);
populateMathToLLVMConversionPatterns(converter, llvmPatterns);
memref::populateExpandStridedMetadataPatterns(llvmPatterns);
- populateMemRefToLLVMConversionPatterns(converter, llvmPatterns);
+ populateFinalizeMemRefToLLVMConversionPatterns(converter, llvmPatterns);
populateFuncToLLVMConversionPatterns(converter, llvmPatterns);
cf::populateControlFlowToLLVMConversionPatterns(converter, llvmPatterns);
arith::populateArithToLLVMConversionPatterns(converter, llvmPatterns);
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp
index d2c8f77..1e393a4 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/ConvertToROCDL.cpp
@@ -83,7 +83,7 @@
populateLLVMConversionPatterns(&getContext(), llvmPatterns, converter);
populateMathToLLVMConversionPatterns(converter, llvmPatterns);
memref::populateExpandStridedMetadataPatterns(llvmPatterns);
- populateMemRefToLLVMConversionPatterns(converter, llvmPatterns);
+ populateFinalizeMemRefToLLVMConversionPatterns(converter, llvmPatterns);
populateFuncToLLVMConversionPatterns(converter, llvmPatterns);
cf::populateControlFlowToLLVMConversionPatterns(converter, llvmPatterns);
arith::populateArithToLLVMConversionPatterns(converter, llvmPatterns);
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_bufferize.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_bufferize.mlir
index e1353af..dea32e2 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_bufferize.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_bufferize.mlir
@@ -31,7 +31,7 @@
^bb1(%variant_op: !pdl.operation):
%variant_op_2 = transform.iree.eliminate_empty_tensors %variant_op
%variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op_2
- %func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
transform.iree.erase_hal_descriptor_type_from_memref %func
}
}
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_bufferize_spec.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_bufferize_spec.mlir
index abbb542..e4d0557 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_bufferize_spec.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_bufferize_spec.mlir
@@ -2,6 +2,6 @@
^bb1(%variant_op: !pdl.operation):
%variant_op_2 = transform.iree.eliminate_empty_tensors %variant_op
%variant_op_3 = transform.iree.bufferize %variant_op_2
- %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
transform.iree.erase_hal_descriptor_type_from_memref %memref_func
}
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_foreach_to_gpu_spec.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_foreach_to_gpu_spec.mlir
index 5ad61a5..3aa246f 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_foreach_to_gpu_spec.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_foreach_to_gpu_spec.mlir
@@ -1,20 +1,20 @@
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%variant_op: !pdl.operation):
- %0 = transform.structured.match ops{["linalg.fill"]} in %variant_op
+ %0 = transform.structured.match ops{["linalg.fill"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%foreach_thread, %tiled_fill = transform.structured.tile_to_foreach_thread_op %0 num_threads [5, 1]
( mapping = [#gpu.thread<y>, #gpu.thread<x>] )
- %1 = transform.structured.match ops{["linalg.matmul"]} in %variant_op
+ %1 = transform.structured.match ops{["linalg.matmul"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%foreach_thread_2, %tiled_matmul = transform.structured.tile_to_foreach_thread_op %1 num_threads [7, 9]
( mapping = [#gpu.thread<x>, #gpu.thread<y>] )
%variant_op_2 = transform.iree.eliminate_empty_tensors %variant_op
%variant_op_3 = transform.iree.bufferize %variant_op_2
- %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
transform.iree.erase_hal_descriptor_type_from_memref %memref_func
// Get the function to which to apply to.
- %2 = transform.structured.match ops{["linalg.matmul"]} in %variant_op_3
+ %2 = transform.structured.match ops{["linalg.matmul"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
%func = transform.get_closest_isolated_parent %2 : (!pdl.operation) -> !pdl.operation
transform.iree.map_nested_foreach_thread_to_gpu_threads %func { workgroup_size = [10, 11]}
}
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_distribution_spec.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_distribution_spec.mlir
index d44e05c..d52461a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_distribution_spec.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_distribution_spec.mlir
@@ -1,6 +1,6 @@
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%arg1: !pdl.operation):
- %if_op = transform.structured.match ops{["scf.if"]} in %arg1
+ %if_op = transform.structured.match ops{["scf.if"]} in %arg1 : (!pdl.operation) -> !pdl.operation
%warp = transform.iree.vector.to_warp_execute_on_lane_0 %if_op { warp_size = 32 }
%isolated = transform.get_closest_isolated_parent %warp : (!pdl.operation) -> !pdl.operation
transform.iree.vector.warp_distribute %isolated
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_warp_execute_on_lane_0_spec.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_warp_execute_on_lane_0_spec.mlir
index e24e76b..26cb9bf 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_warp_execute_on_lane_0_spec.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_warp_execute_on_lane_0_spec.mlir
@@ -1,5 +1,5 @@
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%arg1: !pdl.operation):
- %if_op = transform.structured.match ops{["scf.if"]} in %arg1
+ %if_op = transform.structured.match ops{["scf.if"]} in %arg1 : (!pdl.operation) -> !pdl.operation
transform.iree.vector.to_warp_execute_on_lane_0 %if_op { warp_size = 32 }
}
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_vector_to_mma.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_vector_to_mma.mlir
index e8f80fd..f182946 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_vector_to_mma.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_vector_to_mma.mlir
@@ -49,7 +49,7 @@
}
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%variant_op: !pdl.operation):
- %func = transform.structured.match ops{["func.func"]} in %variant_op
+ %func = transform.structured.match ops{["func.func"]} in %variant_op : (!pdl.operation) -> !pdl.operation
transform.iree.vector.vector_to_mma_conversion %func
}
}
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorize.cpp b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorize.cpp
index 5cb2191..7b779bd 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorize.cpp
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/SPIRVVectorize.cpp
@@ -232,8 +232,7 @@
RewritePatternSet patterns(context);
vector::populateVectorMultiReductionLoweringPatterns(
patterns, vector::VectorMultiReductionLowering::InnerParallel);
- if (failed(applyOpPatternsAndFold(reductionOps, std::move(patterns),
- GreedyRewriteStrictness::AnyOp))) {
+ if (failed(applyOpPatternsAndFold(reductionOps, std::move(patterns)))) {
funcOp.emitOpError("vector lowering failed");
return signalPassFailure();
}
diff --git a/compiler/src/iree/compiler/Codegen/TransformDialectStrategies/GPU/Common.cpp b/compiler/src/iree/compiler/Codegen/TransformDialectStrategies/GPU/Common.cpp
index 93b4e3d..fdaef14 100644
--- a/compiler/src/iree/compiler/Codegen/TransformDialectStrategies/GPU/Common.cpp
+++ b/compiler/src/iree/compiler/Codegen/TransformDialectStrategies/GPU/Common.cpp
@@ -158,7 +158,8 @@
// Locally suppress failures for this op only because it doesn't cover the
// `threadIdx.x == 0 && threadIdx.y == 0` case at the moment.
auto sequence = b.create<SequenceOp>(
- TypeRange(), transform::FailurePropagationMode::Suppress, variantH);
+ TypeRange(), transform::FailurePropagationMode::Suppress, variantH,
+ /*extraBindings=*/ValueRange());
{
OpBuilder::InsertionGuard guard(b);
b.createBlock(&sequence.getBody(), sequence.getBody().begin(),
diff --git a/compiler/src/iree/compiler/Codegen/Transforms/AffineMinDistributedSCFCanonicalization.cpp b/compiler/src/iree/compiler/Codegen/Transforms/AffineMinDistributedSCFCanonicalization.cpp
index 34c344d..1d6cb26 100644
--- a/compiler/src/iree/compiler/Codegen/Transforms/AffineMinDistributedSCFCanonicalization.cpp
+++ b/compiler/src/iree/compiler/Codegen/Transforms/AffineMinDistributedSCFCanonicalization.cpp
@@ -191,9 +191,11 @@
// Explicitly walk and apply the pattern locally to avoid more general
// folding on the rest of the IR.
- funcOp.walk([&frozenPatterns](AffineMinOp minOp) {
- (void)applyOpPatternsAndFold(minOp, frozenPatterns);
+ SmallVector<Operation *> minOps;
+ funcOp.walk([&minOps](AffineMinOp minOp) {
+ minOps.push_back(minOp.getOperation());
});
+ (void)applyOpPatternsAndFold(minOps, frozenPatterns);
}
};
} // namespace
diff --git a/compiler/src/iree/compiler/Codegen/Utils/LinkingUtils.cpp b/compiler/src/iree/compiler/Codegen/Utils/LinkingUtils.cpp
index 75ae2f3..accf77f 100644
--- a/compiler/src/iree/compiler/Codegen/Utils/LinkingUtils.cpp
+++ b/compiler/src/iree/compiler/Codegen/Utils/LinkingUtils.cpp
@@ -83,7 +83,7 @@
// Private symbols can be safely folded into duplicates or renamed.
if (OperationEquivalence::isEquivalentTo(
targetOp, op, OperationEquivalence::exactValueMatch,
- OperationEquivalence::exactValueMatch,
+ /*markEquivalent=*/nullptr,
OperationEquivalence::Flags::IgnoreLocations)) {
// Optimization: skip over duplicate private symbols.
// We could let CSE do this later, but we may as well check here.
@@ -156,7 +156,7 @@
auto oldAttr = use.getSymbolRef();
auto newAttr = map.lookup(oldAttr);
if (!newAttr) continue;
- auto newDict = use.getUser()->getAttrDictionary().replaceSubElements(
+ auto newDict = use.getUser()->getAttrDictionary().replace(
[&](Attribute attr) -> std::pair<Attribute, WalkResult> {
if (attr == oldAttr) {
// Found old->new replacement.
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/FusionOfTensorOps.cpp b/compiler/src/iree/compiler/Dialect/Flow/Transforms/FusionOfTensorOps.cpp
index fa08d3a..e572627 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/FusionOfTensorOps.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/FusionOfTensorOps.cpp
@@ -123,15 +123,15 @@
consumerOp->removeAttr(getConsumerAttributeName());
producerOp->removeAttr(getProducerAttributeName());
- FailureOr<Operation *> fusedOperation =
+ FailureOr<linalg::ElementwiseOpFusionResult> fusedOperation =
linalg::fuseElementwiseOps(rewriter, fusedOperand);
if (failed(fusedOperation)) {
return rewriter.notifyMatchFailure(consumerOp,
"failed to fuse with producer");
}
- assert(fusedOperation.value()->getNumResults() ==
+ assert(fusedOperation.value().fusedOp->getNumResults() ==
producerOp->getNumResults() + consumerOp->getNumResults());
- auto fusedResults = fusedOperation.value()->getResults();
+ auto fusedResults = fusedOperation.value().fusedOp->getResults();
rewriter.replaceOp(producerOp,
fusedResults.take_front(producerOp->getNumResults()));
rewriter.replaceOp(consumerOp,
@@ -190,8 +190,7 @@
RewritePatternSet fusionPatterns(context);
fusionPatterns.insert<FuseElementwiseOpsWithMultipleUses>(context);
linalg::GenericOp::getCanonicalizationPatterns(fusionPatterns, context);
- if (failed(applyPatternsAndFoldGreedily(funcOp->getRegions(),
- std::move(fusionPatterns)))) {
+ if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(fusionPatterns)))) {
return funcOp->emitOpError("multi use producer -> consumer fusion failed");
}
return numCandidates;
@@ -285,8 +284,7 @@
GreedyRewriteConfig rewriteConfig;
rewriteConfig.maxIterations = GreedyRewriteConfig::kNoLimit;
- if (failed(applyPatternsAndFoldGreedily(funcOp->getRegions(),
- std::move(fusionPatterns),
+ if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(fusionPatterns),
rewriteConfig))) {
funcOp->emitError("failed to apply fusion patterns");
return signalPassFailure();
@@ -327,7 +325,7 @@
memref::populateResolveRankedShapeTypeResultDimsPatterns(
collapsingReshapePatterns);
if (failed(applyPatternsAndFoldGreedily(
- funcOp->getRegions(), std::move(collapsingReshapePatterns)))) {
+ funcOp, std::move(collapsingReshapePatterns)))) {
funcOp->emitError("failed to apply collapsing reshape patterns");
return signalPassFailure();
}
@@ -343,7 +341,7 @@
{
RewritePatternSet opFoldingPatterns(&getContext());
tensor::populateFoldTensorEmptyPatterns(opFoldingPatterns);
- if (failed(applyPatternsAndFoldGreedily(funcOp->getRegions(),
+ if (failed(applyPatternsAndFoldGreedily(funcOp,
std::move(opFoldingPatterns)))) {
funcOp->emitError("failed to apply op folding patterns");
return signalPassFailure();
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/transform_dialect_dispatch_spec.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/transform_dialect_dispatch_spec.mlir
index 87315ba..96f7f09 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/transform_dialect_dispatch_spec.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/transform_dialect_dispatch_spec.mlir
@@ -1,6 +1,6 @@
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%arg1: !pdl.operation):
- %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1
+ %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!pdl.operation) -> !pdl.operation
%foreach_op, %tiled_op = transform.structured.tile_to_foreach_thread_op %0 num_threads [42, 67]
%dispatch_op = transform.iree.foreach_thread_to_flow %foreach_op
}
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/transform_dispatch_region_formation.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/transform_dispatch_region_formation.mlir
index b5db680..b952164 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/transform_dispatch_region_formation.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/transform_dispatch_region_formation.mlir
@@ -17,7 +17,7 @@
^bb0(%arg0: !pdl.operation):
transform.sequence %arg0 : !pdl.operation failures(propagate) {
^bb1(%arg1: !pdl.operation):
- %0 = transform.structured.match ops{["tensor.extract_slice"]} in %arg1
+ %0 = transform.structured.match ops{["tensor.extract_slice"]} in %arg1 : (!pdl.operation) -> !pdl.operation
transform.iree.wrap_in_dispatch_region %0 { generateWorkload = false }
}
}
@@ -48,9 +48,9 @@
^bb0(%arg0: !pdl.operation):
transform.sequence %arg0 : !pdl.operation failures(propagate) {
^bb1(%arg1: !pdl.operation):
- %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1
+ %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!pdl.operation) -> !pdl.operation
%dispatch_op = transform.iree.wrap_in_dispatch_region %0 { generateWorkload = false }
- %1 = transform.structured.match ops{["test.dummy"]} in %arg1
+ %1 = transform.structured.match ops{["test.dummy"]} in %arg1 : (!pdl.operation) -> !pdl.operation
transform.iree.clone_preceding_op_into_dispatch_region %1 into %dispatch_op
}
}
@@ -81,9 +81,9 @@
^bb0(%arg0: !pdl.operation):
transform.sequence %arg0 : !pdl.operation failures(propagate) {
^bb1(%arg1: !pdl.operation):
- %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1
+ %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!pdl.operation) -> !pdl.operation
%dispatch_op = transform.iree.wrap_in_dispatch_region %0 { generateWorkload = false }
- %1 = transform.structured.match ops{["tensor.extract_slice"]} in %arg1
+ %1 = transform.structured.match ops{["tensor.extract_slice"]} in %arg1 : (!pdl.operation) -> !pdl.operation
transform.iree.move_preceding_op_into_dispatch_region %1 into %dispatch_op
}
}
@@ -108,7 +108,7 @@
^bb0(%arg0: !pdl.operation):
transform.sequence %arg0 : !pdl.operation failures(propagate) {
^bb1(%arg1: !pdl.operation):
- %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1
+ %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!pdl.operation) -> !pdl.operation
%region_op = transform.iree.wrap_in_dispatch_region %0 { generateWorkload = false }
transform.iree.region_to_workgroups %region_op
}
@@ -143,9 +143,9 @@
^bb0(%arg0: !pdl.operation):
transform.sequence %arg0 : !pdl.operation failures(propagate) {
^bb1(%arg1: !pdl.operation):
- %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1
+ %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!pdl.operation) -> !pdl.operation
%dispatch_op = transform.iree.wrap_in_dispatch_region %0 { generateWorkload = false }
- %1 = transform.structured.match attributes{"__tagged__"} in %arg1
+ %1 = transform.structured.match attributes{"__tagged__"} in %arg1 : (!pdl.operation) -> !pdl.operation
transform.iree.clone_preceding_op_into_dispatch_region %1 into %dispatch_op
}
}
@@ -176,9 +176,9 @@
^bb0(%arg0: !pdl.operation):
transform.sequence %arg0 : !pdl.operation failures(propagate) {
^bb1(%arg1: !pdl.operation):
- %0 = transform.structured.match ops{["tensor.extract_slice"]} in %arg1
+ %0 = transform.structured.match ops{["tensor.extract_slice"]} in %arg1 : (!pdl.operation) -> !pdl.operation
%dispatch_op = transform.iree.wrap_in_dispatch_region %0 { generateWorkload = false }
- %1 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1
+ %1 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!pdl.operation) -> !pdl.operation
transform.iree.move_succeeding_op_into_dispatch_region %1 into %dispatch_op
}
}
@@ -212,9 +212,9 @@
^bb0(%arg0: !pdl.operation):
transform.sequence %arg0 : !pdl.operation failures(propagate) {
^bb1(%arg1: !pdl.operation):
- %0 = transform.structured.match ops{["test.dummy_op"]} in %arg1
+ %0 = transform.structured.match ops{["test.dummy_op"]} in %arg1 : (!pdl.operation) -> !pdl.operation
%dispatch_op = transform.iree.wrap_in_dispatch_region %0 {generateWorkload=false}
- %1 = transform.structured.match attributes{"__tagged__"} in %arg1
+ %1 = transform.structured.match attributes{"__tagged__"} in %arg1 : (!pdl.operation) -> !pdl.operation
transform.iree.move_succeeding_op_into_dispatch_region %1 into %dispatch_op
}
}
@@ -242,9 +242,9 @@
^bb0(%arg0: !pdl.operation):
transform.sequence %arg0 : !pdl.operation failures(propagate) {
^bb1(%arg1: !pdl.operation):
- %0 = transform.structured.match ops{["tensor.extract_slice"]} in %arg1
+ %0 = transform.structured.match ops{["tensor.extract_slice"]} in %arg1 : (!pdl.operation) -> !pdl.operation
%dispatch_op = transform.iree.wrap_in_dispatch_region %0 {generateWorkload=false}
- %1 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1
+ %1 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!pdl.operation) -> !pdl.operation
transform.iree.clone_succeeding_op_into_dispatch_region %1 into %dispatch_op
}
}
@@ -278,7 +278,7 @@
^bb0(%arg0: !pdl.operation):
transform.sequence %arg0 : !pdl.operation failures(propagate) {
^bb1(%arg1: !pdl.operation):
- %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1
+ %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!pdl.operation) -> !pdl.operation
%dispatch_op = transform.iree.wrap_in_dispatch_region %0 { generateWorkload = false }
}
}
diff --git a/compiler/src/iree/compiler/Dialect/HAL/IR/HALBase.td b/compiler/src/iree/compiler/Dialect/HAL/IR/HALBase.td
index fb51853..b0c85f2 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/IR/HALBase.td
+++ b/compiler/src/iree/compiler/Dialect/HAL/IR/HALBase.td
@@ -13,7 +13,6 @@
include "iree/compiler/Dialect/Util/IR/UtilTypes.td"
include "mlir/IR/AttrTypeBase.td"
include "mlir/IR/EnumAttr.td"
-include "mlir/IR/SubElementInterfaces.td"
//===----------------------------------------------------------------------===//
// HAL enums
@@ -625,9 +624,7 @@
//===----------------------------------------------------------------------===//
def HAL_DeviceTargetAttr :
- AttrDef<HAL_Dialect, "DeviceTarget", [
- SubElementAttrInterface,
- ]> {
+ AttrDef<HAL_Dialect, "DeviceTarget"> {
let mnemonic = "device.target";
let summary = [{generic device target specification}];
let description = [{
@@ -685,9 +682,7 @@
}
def HAL_ExecutableTargetAttr :
- AttrDef<HAL_Dialect, "ExecutableTarget", [
- DeclareAttrInterfaceMethods<SubElementAttrInterface>,
- ]> {
+ AttrDef<HAL_Dialect, "ExecutableTarget"> {
let mnemonic = "executable.target";
let summary = [{generic executable target specification}];
let description = [{
@@ -902,7 +897,6 @@
def HAL_MatchAnyAttr :
AttrDef<HAL_Dialect, "MatchAny", [
- DeclareAttrInterfaceMethods<SubElementAttrInterface>,
DeclareAttrInterfaceMethods<HAL_MatchAttrInterface>,
]> {
let mnemonic = "match.any";
@@ -923,7 +917,6 @@
def HAL_MatchAllAttr :
AttrDef<HAL_Dialect, "MatchAll", [
- DeclareAttrInterfaceMethods<SubElementAttrInterface>,
DeclareAttrInterfaceMethods<HAL_MatchAttrInterface>,
]> {
let mnemonic = "match.all";
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.cpp
index 2cc5806..7225a9e 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.cpp
@@ -61,7 +61,10 @@
static llvm::StructType *makeEnvironmentType(llvm::LLVMContext &context) {
auto *type = llvm::StructType::getTypeByName(
context, "iree_hal_executable_environment_v0_t");
- assert(type && "environment type must be defined by ConvertToLLVM");
+ if (!type) {
+ type = llvm::StructType::create(context,
+ "iree_hal_executable_environment_v0_t");
+ }
return type;
}
@@ -71,7 +74,10 @@
static llvm::StructType *makeDispatchStateType(llvm::LLVMContext &context) {
auto *type = llvm::StructType::getTypeByName(
context, "iree_hal_executable_dispatch_state_v0_t");
- assert(type && "state type must be defined by ConvertToLLVM");
+ if (!type) {
+ type = llvm::StructType::create(context,
+ "iree_hal_executable_dispatch_state_v0_t");
+ }
return type;
}
@@ -81,7 +87,10 @@
static llvm::StructType *makeWorkgroupStateType(llvm::LLVMContext &context) {
auto *type = llvm::StructType::getTypeByName(
context, "iree_hal_executable_workgroup_state_v0_t");
- assert(type && "state type must be defined by ConvertToLLVM");
+ if (!type) {
+ type = llvm::StructType::create(context,
+ "iree_hal_executable_workgroup_state_v0_t");
+ }
return type;
}
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/BUILD b/compiler/src/iree/compiler/Dialect/Stream/IR/BUILD
index 3e0fa02..ef91419 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/BUILD
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/BUILD
@@ -32,7 +32,6 @@
"@llvm-project//mlir:InferTypeOpInterfaceTdFiles",
"@llvm-project//mlir:OpBaseTdFiles",
"@llvm-project//mlir:SideEffectInterfacesTdFiles",
- "@llvm-project//mlir:SubElementInterfacesTdFiles",
"@llvm-project//mlir:ViewLikeInterfaceTdFiles",
],
)
diff --git a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamBase.td b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamBase.td
index e99fc9e..b39d6f3 100644
--- a/compiler/src/iree/compiler/Dialect/Stream/IR/StreamBase.td
+++ b/compiler/src/iree/compiler/Dialect/Stream/IR/StreamBase.td
@@ -14,7 +14,6 @@
include "mlir/IR/AttrTypeBase.td"
include "mlir/IR/BuiltinAttributeInterfaces.td"
include "mlir/IR/EnumAttr.td"
-include "mlir/IR/SubElementInterfaces.td"
//===----------------------------------------------------------------------===//
// IREE stream modeling dialect
@@ -250,9 +249,7 @@
}
def Stream_PartitioningConfigAttr :
- AttrDef<Stream_Dialect, "PartitioningConfig", [
- SubElementAttrInterface,
- ]> {
+ AttrDef<Stream_Dialect, "PartitioningConfig"> {
let mnemonic = "partitioning_config";
let summary = [{defines partitioning configuration}];
let description = [{
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/BUILD b/compiler/src/iree/compiler/Dialect/Util/IR/BUILD
index a1c904d..8f89710 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/BUILD
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/BUILD
@@ -37,7 +37,6 @@
"@llvm-project//mlir:InferTypeOpInterfaceTdFiles",
"@llvm-project//mlir:OpBaseTdFiles",
"@llvm-project//mlir:SideEffectInterfacesTdFiles",
- "@llvm-project//mlir:SubElementInterfacesTdFiles",
"@llvm-project//mlir:ViewLikeInterfaceTdFiles",
],
)
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilAttrs.td b/compiler/src/iree/compiler/Dialect/Util/IR/UtilAttrs.td
index d462c0a..e5737ad 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilAttrs.td
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilAttrs.td
@@ -11,7 +11,6 @@
include "iree/compiler/Dialect/Util/IR/UtilInterfaces.td"
include "mlir/IR/AttrTypeBase.td"
include "mlir/IR/OpBase.td"
-include "mlir/IR/SubElementInterfaces.td"
//===----------------------------------------------------------------------===//
// Buffer attributes
@@ -33,7 +32,6 @@
}
def Util_CompositeAttr : AttrDef<Util_Dialect, "Composite", [
- SubElementAttrInterface,
DeclareAttrInterfaceMethods<Util_SerializableAttrInterface, [
"serializeToBuffer",
"serializeToStream",
diff --git a/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.h b/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.h
index f46be35..1915a59 100644
--- a/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.h
+++ b/compiler/src/iree/compiler/Dialect/Util/IR/UtilTypes.h
@@ -16,7 +16,6 @@
#include "mlir/IR/Location.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/SubElementInterfaces.h"
#include "mlir/IR/TypeSupport.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/IR/Types.h"
diff --git a/compiler/src/iree/compiler/InputConversion/MHLO/test/mhlo_to_mhlo_preprocessing.mlir b/compiler/src/iree/compiler/InputConversion/MHLO/test/mhlo_to_mhlo_preprocessing.mlir
index ef1208a..5a0ec47 100644
--- a/compiler/src/iree/compiler/InputConversion/MHLO/test/mhlo_to_mhlo_preprocessing.mlir
+++ b/compiler/src/iree/compiler/InputConversion/MHLO/test/mhlo_to_mhlo_preprocessing.mlir
@@ -32,7 +32,7 @@
// -----
// CHECK: @reorder_broadcast_in_dim_scalar_binary(%[[ARG0:.*]]: tensor<f32>, %[[ARG1:.*]]: tensor<f32>, %[[ARG2:.*]]: tensor<i32>, %[[ARG3:.*]]: tensor<i32>)
-func.func @reorder_broadcast_in_dim_scalar_binary(%arg0: tensor<f32>, %arg1: tensor<f32>, %arg2: tensor<i32>, %arg3: tensor<i32>) -> (tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xi32>, tensor<1x8x8x64xi32>, tensor<1x8x8x64xi32>) {
+func.func @reorder_broadcast_in_dim_scalar_binary(%arg0: tensor<f32>, %arg1: tensor<f32>, %arg2: tensor<i32>, %arg3: tensor<i32>) -> (tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xi32>, tensor<1x8x8x64xi32>, tensor<1x8x8x64xi32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xi32>, tensor<1x8x8x64xi32>, tensor<1x8x8x64xi32>) {
// CHECK: %[[ADD:.*]] = mhlo.add %[[ARG0]], %[[ARG1]] : tensor<f32>
// CHECK: "mhlo.broadcast_in_dim"(%[[ADD]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x8x8x64xf32>
// CHECK: %[[ATAN2:.*]] = mhlo.atan2 %[[ARG0]], %[[ARG1]] : tensor<f32>
@@ -49,12 +49,12 @@
// CHECK: "mhlo.broadcast_in_dim"(%[[POW]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x8x8x64xf32>
// CHECK: %[[REM:.*]] = mhlo.remainder %[[ARG0]], %[[ARG1]] : tensor<f32>
// CHECK: "mhlo.broadcast_in_dim"(%[[REM]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x8x8x64xf32>
- // CHECK: %[[SL:.*]] = mhlo.shift_left %[[ARG0]], %[[ARG1]] : tensor<f32>
- // CHECK: "mhlo.broadcast_in_dim"(%[[SL]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x8x8x64xf32>
- // CHECK: %[[SRA:.*]] = mhlo.shift_right_arithmetic %[[ARG0]], %[[ARG1]] : tensor<f32>
- // CHECK: "mhlo.broadcast_in_dim"(%[[SRA]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x8x8x64xf32>
- // CHECK: %[[SRL:.*]] = mhlo.shift_right_logical %[[ARG0]], %[[ARG1]] : tensor<f32>
- // CHECK: "mhlo.broadcast_in_dim"(%[[SRL]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x8x8x64xf32>
+ // CHECK: %[[SL:.*]] = mhlo.shift_left %[[ARG2]], %[[ARG3]] : tensor<i32>
+ // CHECK: "mhlo.broadcast_in_dim"(%[[SL]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<i32>) -> tensor<1x8x8x64xi32>
+ // CHECK: %[[SRA:.*]] = mhlo.shift_right_arithmetic %[[ARG2]], %[[ARG3]] : tensor<i32>
+ // CHECK: "mhlo.broadcast_in_dim"(%[[SRA]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<i32>) -> tensor<1x8x8x64xi32>
+ // CHECK: %[[SRL:.*]] = mhlo.shift_right_logical %[[ARG2]], %[[ARG3]] : tensor<i32>
+ // CHECK: "mhlo.broadcast_in_dim"(%[[SRL]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<i32>) -> tensor<1x8x8x64xi32>
// CHECK: %[[SUB:.*]] = mhlo.subtract %[[ARG0]], %[[ARG1]] : tensor<f32>
// CHECK: "mhlo.broadcast_in_dim"(%[[SUB]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x8x8x64xf32>
// CHECK: %[[AND:.*]] = mhlo.and %[[ARG2]], %[[ARG3]] : tensor<i32>
@@ -75,14 +75,14 @@
%9 = mhlo.multiply %0, %1 : tensor<1x8x8x64xf32>
%10 = mhlo.power %0, %1 : tensor<1x8x8x64xf32>
%11 = mhlo.remainder %0, %1 : tensor<1x8x8x64xf32>
- %12 = mhlo.shift_left %0, %1 : tensor<1x8x8x64xf32>
- %13 = mhlo.shift_right_arithmetic %0, %1 : tensor<1x8x8x64xf32>
- %14 = mhlo.shift_right_logical %0, %1 : tensor<1x8x8x64xf32>
+ %12 = mhlo.shift_left %2, %3 : tensor<1x8x8x64xi32>
+ %13 = mhlo.shift_right_arithmetic %2, %3 : tensor<1x8x8x64xi32>
+ %14 = mhlo.shift_right_logical %2, %3 : tensor<1x8x8x64xi32>
%15 = mhlo.subtract %0, %1 : tensor<1x8x8x64xf32>
%16 = mhlo.and %2, %3 : tensor<1x8x8x64xi32>
%17 = mhlo.or %2, %3 : tensor<1x8x8x64xi32>
%18 = mhlo.xor %2, %3 : tensor<1x8x8x64xi32>
- return %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15, %16, %17, %18 : tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xi32>, tensor<1x8x8x64xi32>, tensor<1x8x8x64xi32>
+ return %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15, %16, %17, %18 : tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xi32>, tensor<1x8x8x64xi32>, tensor<1x8x8x64xi32>, tensor<1x8x8x64xf32>, tensor<1x8x8x64xi32>, tensor<1x8x8x64xi32>, tensor<1x8x8x64xi32>
}
// -----
diff --git a/compiler/src/iree/compiler/Utils/ModuleUtils.cpp b/compiler/src/iree/compiler/Utils/ModuleUtils.cpp
index 119cf53..9b78231 100644
--- a/compiler/src/iree/compiler/Utils/ModuleUtils.cpp
+++ b/compiler/src/iree/compiler/Utils/ModuleUtils.cpp
@@ -92,7 +92,7 @@
// Private symbols can be safely folded into duplicates or renamed.
if (OperationEquivalence::isEquivalentTo(
targetOp, sourceOp, OperationEquivalence::exactValueMatch,
- OperationEquivalence::exactValueMatch,
+ /*markEquivalent=*/nullptr,
OperationEquivalence::Flags::IgnoreLocations)) {
// Optimization: skip over duplicate private symbols.
// We could let CSE do this later, but we may as well check here.
diff --git a/integrations/tensorflow/WORKSPACE b/integrations/tensorflow/WORKSPACE
index ed58de3..717c320 100644
--- a/integrations/tensorflow/WORKSPACE
+++ b/integrations/tensorflow/WORKSPACE
@@ -7,7 +7,7 @@
load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
-TENSORFLOW_COMMIT = "cf2c76b511f21da0da9f0fae3b8ef768bf11fe99"
+TENSORFLOW_COMMIT = "c7764610dac1883e3649b572698e11a298dffdb6"
git_repository(
name = "org_tensorflow",
diff --git a/integrations/tensorflow/iree_tf_compiler/iree-import-tf-main.cpp b/integrations/tensorflow/iree_tf_compiler/iree-import-tf-main.cpp
index 235a1e4..30ac749 100644
--- a/integrations/tensorflow/iree_tf_compiler/iree-import-tf-main.cpp
+++ b/integrations/tensorflow/iree_tf_compiler/iree-import-tf-main.cpp
@@ -248,7 +248,8 @@
// Run passes.
{
- PassManager pm(&context, PassManager::Nesting::Implicit);
+ PassManager pm(&context, module.get()->getName().getStringRef(),
+ PassManager::Nesting::Implicit);
applyPassManagerCLOptions(pm);
if (prettifyTfDebugInfo) {
diff --git a/integrations/tensorflow/iree_tf_compiler/iree-import-tflite-main.cpp b/integrations/tensorflow/iree_tf_compiler/iree-import-tflite-main.cpp
index a17238a..2d0b6a3 100644
--- a/integrations/tensorflow/iree_tf_compiler/iree-import-tflite-main.cpp
+++ b/integrations/tensorflow/iree_tf_compiler/iree-import-tflite-main.cpp
@@ -162,7 +162,8 @@
}
// Run transformations.
- PassManager pm(&context, PassManager::Nesting::Implicit);
+ PassManager pm(&context, module.get()->getName().getStringRef(),
+ PassManager::Nesting::Implicit);
applyPassManagerCLOptions(pm);
applyDefaultTimingPassManagerCLOptions(pm);
mlir::iree_integrations::TFL::buildTFLImportPassPipeline(pm);
diff --git a/integrations/tensorflow/iree_tf_compiler/iree-import-xla-main.cpp b/integrations/tensorflow/iree_tf_compiler/iree-import-xla-main.cpp
index 5f57245..d020532 100644
--- a/integrations/tensorflow/iree_tf_compiler/iree-import-xla-main.cpp
+++ b/integrations/tensorflow/iree_tf_compiler/iree-import-xla-main.cpp
@@ -296,7 +296,8 @@
}
// Run passes.
- PassManager pm(&context, PassManager::Nesting::Implicit);
+ PassManager pm(&context, module.get()->getName().getStringRef(),
+ PassManager::Nesting::Implicit);
applyPassManagerCLOptions(pm);
applyDefaultTimingPassManagerCLOptions(pm);
diff --git a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp
index a49fcca..113e61c 100644
--- a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp
+++ b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp
@@ -983,7 +983,7 @@
.enableX86Vector(getEnableX86vector())));
// clang-format on
pm.addNestedPass<func::FuncOp>(createConvertMathToLLVMPass());
- pm.addPass(createMemRefToLLVMConversionPass());
+ pm.addPass(createFinalizeMemRefToLLVMConversionPass());
if (getEnableAsync())
pm.addPass(createConvertAsyncToLLVMPass());
pm.addPass(createConvertFuncToLLVMPass());
diff --git a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/Passes/TransformInterpreterPassBase.cpp b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/Passes/TransformInterpreterPassBase.cpp
index fca84ef..87d5457 100644
--- a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/Passes/TransformInterpreterPassBase.cpp
+++ b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/Passes/TransformInterpreterPassBase.cpp
@@ -110,7 +110,7 @@
#endif
auto xform = cast<transform::TransformOpInterface>(b.clone(*transform));
auto g = llvm::make_scope_exit([&]() { xform->erase(); });
- if (failed(transform::applyTransforms(target, xform, options)))
+ if (failed(transform::applyTransforms(target, xform, {}, options)))
return failure();
}
return success();
diff --git a/llvm-external-projects/iree-dialects/lib/Transforms/ListenerCSE.cpp b/llvm-external-projects/iree-dialects/lib/Transforms/ListenerCSE.cpp
index e9ba6d7..866b6d8 100644
--- a/llvm-external-projects/iree-dialects/lib/Transforms/ListenerCSE.cpp
+++ b/llvm-external-projects/iree-dialects/lib/Transforms/ListenerCSE.cpp
@@ -44,8 +44,7 @@
return OperationEquivalence::isEquivalentTo(
const_cast<Operation *>(lhsC), const_cast<Operation *>(rhsC),
OperationEquivalence::exactValueMatch,
- OperationEquivalence::ignoreValueEquivalence,
- OperationEquivalence::IgnoreLocations);
+ /*markEquivalent=*/nullptr, OperationEquivalence::IgnoreLocations);
}
// If lhs or rhs does not have a single region with a single block, they
@@ -80,7 +79,8 @@
// Callback to compare if operands of ops in the region of `lhs` and `rhs`
// are equivalent.
- auto mapOperands = [&](Value lhsValue, Value rhsValue) -> LogicalResult {
+ auto checkEquivalent = [&](Value lhsValue,
+ Value rhsValue) -> LogicalResult {
if (lhsValue == rhsValue)
return success();
if (areEquivalentValues.lookup(lhsValue) == rhsValue)
@@ -90,17 +90,15 @@
// Callback to compare if results of ops in the region of `lhs` and `rhs`
// are equivalent.
- auto mapResults = [&](Value lhsResult, Value rhsResult) -> LogicalResult {
+ auto markEquivalent = [&](Value lhsResult, Value rhsResult) {
if (getParent(lhsResult) == lhs && getParent(rhsResult) == rhs) {
- auto insertion = areEquivalentValues.insert({lhsResult, rhsResult});
- return success(insertion.first->second == rhsResult);
+ areEquivalentValues.insert({lhsResult, rhsResult});
}
- return success();
};
return OperationEquivalence::isEquivalentTo(
const_cast<Operation *>(lhsC), const_cast<Operation *>(rhsC),
- mapOperands, mapResults, OperationEquivalence::IgnoreLocations);
+ checkEquivalent, markEquivalent, OperationEquivalence::IgnoreLocations);
}
};
} // namespace
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-async.mlir b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-async.mlir
index feb6265..9e2c353 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-async.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-async.mlir
@@ -52,6 +52,6 @@
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%module_op: !pdl.operation):
- %0 = transform.structured.match ops{["scf.foreach_thread"]} in %module_op
+ %0 = transform.structured.match ops{["scf.foreach_thread"]} in %module_op : (!pdl.operation) -> !pdl.operation
%1 = foreach_thread_to_async %0
}
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-scf-for.mlir b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-scf-for.mlir
index ba4f41c..a01da7e 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-scf-for.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-scf-for.mlir
@@ -46,6 +46,6 @@
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%module_op: !pdl.operation):
- %0 = transform.structured.match ops{["scf.foreach_thread"]} in %module_op
+ %0 = transform.structured.match ops{["scf.foreach_thread"]} in %module_op : (!pdl.operation) -> !pdl.operation
%1 = foreach_thread_to_scf_for %0
}
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/roundtrip.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/roundtrip.mlir
index 1b5c2f2..e4f0d12 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/roundtrip.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/roundtrip.mlir
@@ -23,7 +23,7 @@
bufferize
// CHECK: %[[FUNC:.*]] = transform.structured.match ops{["func.func"]} in %arg0
// CHECK: lower_vectors %[[FUNC]] {{.*}} multireduction_lowering = innerreduction
- %6 = transform.structured.match ops{["func.func"]} in %arg0
+ %6 = transform.structured.match ops{["func.func"]} in %arg0 : (!pdl.operation) -> !pdl.operation
transform.vector.lower_vectors %6 multireduction_lowering = "innerreduction"
// CHECK: lower_to_llvm
lower_to_llvm
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/single-tiling-full-script.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/single-tiling-full-script.mlir
index bececf5..62b962f 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/single-tiling-full-script.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/single-tiling-full-script.mlir
@@ -15,13 +15,13 @@
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%module_op: !pdl.operation):
- %0 = transform.structured.match ops{["linalg.matmul"]} in %module_op
+ %0 = transform.structured.match ops{["linalg.matmul"]} in %module_op : (!pdl.operation) -> !pdl.operation
%1, %loops:3 = transform.structured.tile %0 [4, 4, 4]
: (!pdl.operation) -> (!pdl.operation, !pdl.operation, !pdl.operation, !pdl.operation)
%2 = get_closest_isolated_parent %1 : (!pdl.operation) -> !pdl.operation
transform.structured.vectorize %2 { vectorize_padding }
bufferize
- %3 = transform.structured.match ops{["func.func"]} in %module_op
+ %3 = transform.structured.match ops{["func.func"]} in %module_op : (!pdl.operation) -> !pdl.operation
transform.vector.lower_vectors %3 multireduction_lowering = "innerreduction"
lower_to_llvm
}
diff --git a/tests/e2e/linalg_transform/transform_dialect_codegen_spec.mlir b/tests/e2e/linalg_transform/transform_dialect_codegen_spec.mlir
index d5f20ac..f891e49 100644
--- a/tests/e2e/linalg_transform/transform_dialect_codegen_spec.mlir
+++ b/tests/e2e/linalg_transform/transform_dialect_codegen_spec.mlir
@@ -1,6 +1,6 @@
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%variant_op: !pdl.operation):
%variant_op_2 = transform.iree.bufferize %variant_op
- %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_2
+ %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_2 : (!pdl.operation) -> !pdl.operation
transform.iree.erase_hal_descriptor_type_from_memref %memref_func
}
diff --git a/tests/e2e/linalg_transform/transform_dialect_dispatch_spec.mlir b/tests/e2e/linalg_transform/transform_dialect_dispatch_spec.mlir
index f140aff..249a926 100644
--- a/tests/e2e/linalg_transform/transform_dialect_dispatch_spec.mlir
+++ b/tests/e2e/linalg_transform/transform_dialect_dispatch_spec.mlir
@@ -1,6 +1,6 @@
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%arg1: !pdl.operation):
- %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1
+ %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!pdl.operation) -> !pdl.operation
%foreach_op, %tiled_op = transform.structured.tile_to_foreach_thread_op %0 num_threads [13, 33]
%dispatch_op = transform.iree.foreach_thread_to_flow %foreach_op
}
diff --git a/tests/transform_dialect/cpu/matmul_codegen_custom_dispatch_formation_spec.mlir b/tests/transform_dialect/cpu/matmul_codegen_custom_dispatch_formation_spec.mlir
index d49290f..3f25087 100644
--- a/tests/transform_dialect/cpu/matmul_codegen_custom_dispatch_formation_spec.mlir
+++ b/tests/transform_dialect/cpu/matmul_codegen_custom_dispatch_formation_spec.mlir
@@ -2,7 +2,7 @@
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%variant_op: !pdl.operation):
- %0 = transform.structured.match ops{["linalg.matmul"]} in %variant_op
+ %0 = transform.structured.match ops{["linalg.matmul"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%foreach_thread, %tiled_generic =
transform.structured.tile_to_foreach_thread_op %0 num_threads [2]
@@ -10,9 +10,9 @@
( mapping = [#gpu.block<x>] )
%1 = transform.iree.bufferize %variant_op
- %memref_func = transform.structured.match ops{["func.func"]} in %1
+ %memref_func = transform.structured.match ops{["func.func"]} in %1 : (!pdl.operation) -> !pdl.operation
transform.iree.erase_hal_descriptor_type_from_memref %memref_func
- %func = transform.structured.match ops{["func.func"]} in %1
+ %func = transform.structured.match ops{["func.func"]} in %1 : (!pdl.operation) -> !pdl.operation
transform.iree.foreach_thread_to_workgroup %func
}
diff --git a/tests/transform_dialect/cpu/matmul_codegen_default_spec.mlir b/tests/transform_dialect/cpu/matmul_codegen_default_spec.mlir
index 3efd0cb..8d33c24 100644
--- a/tests/transform_dialect/cpu/matmul_codegen_default_spec.mlir
+++ b/tests/transform_dialect/cpu/matmul_codegen_default_spec.mlir
@@ -2,7 +2,7 @@
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%variant_op: !pdl.operation):
- %matmul = transform.structured.match ops{["linalg.matmul"]} in %variant_op
+ %matmul = transform.structured.match ops{["linalg.matmul"]} in %variant_op : (!pdl.operation) -> !pdl.operation
// Step 1. Tile to foreach_thread with tile_sizes [2].
// ===================================================
@@ -15,11 +15,11 @@
// =========================================================
%variant_op_2 = transform.iree.eliminate_empty_tensors %variant_op
%variant_op_3 = transform.iree.bufferize %variant_op_2
- %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
transform.iree.erase_hal_descriptor_type_from_memref %memref_func
// Step 3. Post-bufferization mapping workgroup.
// =========================================================
- %func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
transform.iree.foreach_thread_to_workgroup %func
}
diff --git a/tests/transform_dialect/cuda/eltwise_reduction_codegen_spec.mlir b/tests/transform_dialect/cuda/eltwise_reduction_codegen_spec.mlir
index edb8708..beacf03 100644
--- a/tests/transform_dialect/cuda/eltwise_reduction_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/eltwise_reduction_codegen_spec.mlir
@@ -2,11 +2,11 @@
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%variant_op: !pdl.operation):
- %fill = transform.structured.match ops{["linalg.fill"]} in %variant_op
+ %fill = transform.structured.match ops{["linalg.fill"]} in %variant_op : (!pdl.operation) -> !pdl.operation
// Step 1. Split the reduction to get meatier (size(red) / 2)-way parallelism.
// ===========================================================================
- %0 = transform.structured.match ops{["linalg.generic"]} in %variant_op
+ %0 = transform.structured.match ops{["linalg.generic"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%eltwise, %reduction = transform.split_handles %0 in [2] : (!pdl.operation) -> (!pdl.operation, !pdl.operation)
%init_or_alloc_op, %more_parallel_fill_op, %more_parallel_op, %combiner_op =
transform.structured.split_reduction %reduction
@@ -24,32 +24,32 @@
// TODO: bubbling should be a proper transform op, at which point we will be
// able to preserve the handles.
// ===========================================================================
- %func = transform.structured.match ops{["func.func"]} in %variant_op
+ %func = transform.structured.match ops{["func.func"]} in %variant_op : (!pdl.operation) -> !pdl.operation
transform.iree.apply_patterns %func { bubble_collapse_expand }
- %fills = transform.structured.match ops{["linalg.fill"]} in %variant_op
+ %fills = transform.structured.match ops{["linalg.fill"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%fill_2, %more_parallel_fill_2 = transform.split_handles %fills in [2]
: (!pdl.operation) -> (!pdl.operation, !pdl.operation)
- %generics = transform.structured.match ops{["linalg.generic"]} in %variant_op
+ %generics = transform.structured.match ops{["linalg.generic"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%expanded_eltwise, %more_parallel_2, %combiner_2 =
transform.split_handles %generics in [3] : (!pdl.operation) -> (!pdl.operation, !pdl.operation, !pdl.operation)
- %foreach_thread_grid_2 = transform.structured.match ops{["scf.foreach_thread"]} in %variant_op
+ %foreach_thread_grid_2 = transform.structured.match ops{["scf.foreach_thread"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%not_combiner = transform.merge_handles %fill_2, %more_parallel_fill_2, %more_parallel_2, %expanded_eltwise : !pdl.operation
transform.structured.fuse_into_containing_op %not_combiner into %foreach_thread_grid_2
// Step 3. Second level of tiling + fusion parallelizes to threads. Also
// fuse in the leading elementwise.
// ===========================================================================
- %fill_1d = transform.structured.match ops{["linalg.fill"]} filter_result_type = tensor<1xf32> in %variant_op
+ %fill_1d = transform.structured.match ops{["linalg.fill"]} filter_result_type = tensor<1xf32> in %variant_op : (!pdl.operation) -> !pdl.operation
%foreach_thread_block_combiner_op, %block_combiner_op =
transform.structured.tile_to_foreach_thread_op %combiner_2 tile_sizes [1]
( mapping = [#gpu.thread<z>] )
transform.structured.fuse_into_containing_op %fill_1d into %foreach_thread_block_combiner_op
- %fill_2d = transform.structured.match ops{["linalg.fill"]} filter_result_type = tensor<1x2xf32> in %variant_op
- %grid_more_parallel_op = transform.structured.match ops{["linalg.generic"]}
- attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]} in %variant_op
- %grid_eltwise_op = transform.structured.match ops{["linalg.generic"]}
- attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>]} in %variant_op
+ %fill_2d = transform.structured.match ops{["linalg.fill"]} filter_result_type = tensor<1x2xf32> in %variant_op : (!pdl.operation) -> !pdl.operation
+ %grid_more_parallel_op = transform.structured.match ops{["linalg.generic"]} : (!pdl.operation) -> !pdl.operation
+ attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]} in %variant_op : (!pdl.operation) -> !pdl.operation
+ %grid_eltwise_op = transform.structured.match ops{["linalg.generic"]} : (!pdl.operation) -> !pdl.operation
+ attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>]} in %variant_op : (!pdl.operation) -> !pdl.operation
%foreach_thread_block_more_parallel_op, %block_more_parallel_op =
transform.structured.tile_to_foreach_thread_op %grid_more_parallel_op tile_sizes [1, 1]
( mapping = [#gpu.thread<z>, #gpu.thread<y>] )
@@ -58,7 +58,7 @@
// Step 4. Rank-reduce and vectorize.
// ===========================================================================
- %func_1 = transform.structured.match ops{["func.func"]} in %variant_op
+ %func_1 = transform.structured.match ops{["func.func"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%func_2 = transform.iree.apply_patterns %func_1 { rank_reducing_linalg, rank_reducing_vector }
%func_3 = transform.structured.vectorize %func_2
@@ -66,12 +66,12 @@
// ===========================================================================
%variant_op_2 = transform.iree.eliminate_empty_tensors %variant_op
%variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op_2
- %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
transform.iree.erase_hal_descriptor_type_from_memref %memref_func
// Step 6. Post-bufferization mapping to blocks and threads.
// ===========================================================================
- %func_4 = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %func_4 = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
%func_5 = transform.iree.foreach_thread_to_workgroup %func_4
%func_6 = transform.iree.map_nested_foreach_thread_to_gpu_threads %func_5
{ workgroup_size = [32, 2, 1] }
@@ -79,7 +79,7 @@
// Step 7. Post-bufferization vector distribution with rank-reduction.
// ===========================================================================
%func_7 = transform.iree.apply_patterns %func_6 { rank_reducing_linalg, rank_reducing_vector }
- %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3
+ %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
// Don't complain about unsupported if (threadIdx.x == 0 && threadIdx.y == 0)
// at this point.
transform.sequence %variant_op_3 : !pdl.operation failures(suppress) {
diff --git a/tests/transform_dialect/cuda/eltwise_reduction_eltwise_codegen_spec.mlir b/tests/transform_dialect/cuda/eltwise_reduction_eltwise_codegen_spec.mlir
index dabe691..43dedf3 100644
--- a/tests/transform_dialect/cuda/eltwise_reduction_eltwise_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/eltwise_reduction_eltwise_codegen_spec.mlir
@@ -2,11 +2,11 @@
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%variant_op: !pdl.operation):
- %fill = transform.structured.match ops{["linalg.fill"]} in %variant_op
+ %fill = transform.structured.match ops{["linalg.fill"]} in %variant_op : (!pdl.operation) -> !pdl.operation
// Step 1. Split the reduction to get meatier (size(red) / 2)-way parallelism.
// ===========================================================================
- %0 = transform.structured.match ops{["linalg.generic"]} in %variant_op
+ %0 = transform.structured.match ops{["linalg.generic"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%leading_eltwise, %reduction, %trailing_eltwise = transform.split_handles %0 in [3]
: (!pdl.operation) -> (!pdl.operation, !pdl.operation, !pdl.operation)
%init_or_alloc_op, %more_parallel_fill_op, %more_parallel_op, %combiner_op =
@@ -26,12 +26,12 @@
// TODO: bubbling should be a proper transform op, at which point we will be
// able to preserve the handles.
// ===========================================================================
- %func = transform.structured.match ops{["func.func"]} in %variant_op
+ %func = transform.structured.match ops{["func.func"]} in %variant_op : (!pdl.operation) -> !pdl.operation
transform.iree.apply_patterns %func { bubble_collapse_expand }
- %fills = transform.structured.match ops{["linalg.fill"]} in %variant_op
+ %fills = transform.structured.match ops{["linalg.fill"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%fill_2, %more_parallel_fill_2 = transform.split_handles %fills in [2]
: (!pdl.operation) -> (!pdl.operation, !pdl.operation)
- %generics = transform.structured.match ops{["linalg.generic"]} in %variant_op
+ %generics = transform.structured.match ops{["linalg.generic"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%expanded_eltwise, %more_parallel_2, %combiner_2, %trailing_eltwise_2 =
transform.split_handles %generics in [4]
: (!pdl.operation) -> (!pdl.operation, !pdl.operation, !pdl.operation, !pdl.operation)
@@ -43,20 +43,20 @@
// Step 3. Second level of tiling + fusion parallelizes to threads. Also
// fuse in the leading and trailing elementwise.
// ===========================================================================
- %fill_1d = transform.structured.match ops{["linalg.fill"]} filter_result_type = tensor<1xf32> in %variant_op
+ %fill_1d = transform.structured.match ops{["linalg.fill"]} filter_result_type = tensor<1xf32> in %variant_op : (!pdl.operation) -> !pdl.operation
%foreach_thread_trailing_eltwise_op, %block_trailing_eltwise_op =
transform.structured.tile_to_foreach_thread_op %trailing_eltwise_2 tile_sizes [1]
( mapping = [#gpu.thread<z>] )
%block_combiner_op = transform.structured.match ops{["linalg.generic"]}
- attributes {iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]} in %variant_op
+ attributes {iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]} in %variant_op : (!pdl.operation) -> !pdl.operation
%fill_and_reduction = transform.merge_handles %fill_1d, %block_combiner_op : !pdl.operation
transform.structured.fuse_into_containing_op %fill_and_reduction into %foreach_thread_trailing_eltwise_op
- %fill_2d = transform.structured.match ops{["linalg.fill"]} filter_result_type = tensor<1x2xf32> in %variant_op
+ %fill_2d = transform.structured.match ops{["linalg.fill"]} filter_result_type = tensor<1x2xf32> in %variant_op : (!pdl.operation) -> !pdl.operation
%grid_more_parallel_op = transform.structured.match ops{["linalg.generic"]}
- attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]} in %variant_op
+ attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]} in %variant_op : (!pdl.operation) -> !pdl.operation
%grid_eltwise_op = transform.structured.match ops{["linalg.generic"]}
- attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>]} in %variant_op
+ attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>]} in %variant_op : (!pdl.operation) -> !pdl.operation
%foreach_thread_block_more_parallel_op, %block_more_parallel_op =
transform.structured.tile_to_foreach_thread_op %grid_more_parallel_op tile_sizes [1, 1]
( mapping = [#gpu.thread<z>, #gpu.thread<y>] )
@@ -65,7 +65,7 @@
// Step 4. Rank-reduce and vectorize.
// ===========================================================================
- %func_1 = transform.structured.match ops{["func.func"]} in %variant_op
+ %func_1 = transform.structured.match ops{["func.func"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%func_2 = transform.iree.apply_patterns %func_1 { rank_reducing_linalg, rank_reducing_vector }
%func_3 = transform.structured.vectorize %func_2
@@ -73,12 +73,12 @@
// ===========================================================================
%variant_op_2 = transform.iree.eliminate_empty_tensors %variant_op
%variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op_2
- %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
transform.iree.erase_hal_descriptor_type_from_memref %memref_func
// Step 6. Post-bufferization mapping to blocks and threads.
// ===========================================================================
- %func_4 = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %func_4 = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
%func_5 = transform.iree.foreach_thread_to_workgroup %func_4
%func_6 = transform.iree.map_nested_foreach_thread_to_gpu_threads %func_5
{ workgroup_size = [32, 2, 1] }
@@ -86,7 +86,7 @@
// Step 7. Post-bufferization vector distribution with rank-reduction.
// ===========================================================================
%func_7 = transform.iree.apply_patterns %func_6 { rank_reducing_linalg, rank_reducing_vector, fold_memref_aliases }
- %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3
+ %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
// Don't complain about unsupported if (threadIdx.x == 0 && threadIdx.y == 0)
// at this point.
transform.sequence %variant_op_3 : !pdl.operation failures(suppress) {
diff --git a/tests/transform_dialect/cuda/reduction_codegen_spec.mlir b/tests/transform_dialect/cuda/reduction_codegen_spec.mlir
index 9f1f5ec..a29843d 100644
--- a/tests/transform_dialect/cuda/reduction_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/reduction_codegen_spec.mlir
@@ -2,11 +2,11 @@
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%variant_op: !pdl.operation):
- %fill = transform.structured.match ops{["linalg.fill"]} in %variant_op
+ %fill = transform.structured.match ops{["linalg.fill"]} in %variant_op : (!pdl.operation) -> !pdl.operation
// Step 1. Split the reduction to get meatier (size(red) / 2)-way parallelism.
// ===========================================================================
- %0 = transform.structured.match ops{["linalg.generic"]} in %variant_op
+ %0 = transform.structured.match ops{["linalg.generic"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%init_or_alloc_op, %more_parallel_fill_op, %more_parallel_op, %combiner_op =
transform.structured.split_reduction %0
{ split_factor = 2, insert_split_dimension = 1 }
@@ -21,15 +21,15 @@
// Step 3. Second level of tiling + fusion parallelizes to threads.
// ===========================================================================
- %fill_1d = transform.structured.match ops{["linalg.fill"]} filter_result_type = tensor<1xf32> in %variant_op
+ %fill_1d = transform.structured.match ops{["linalg.fill"]} filter_result_type = tensor<1xf32> in %variant_op : (!pdl.operation) -> !pdl.operation
%foreach_thread_block_combiner_op, %block_combiner_op =
transform.structured.tile_to_foreach_thread_op %grid_combiner_op tile_sizes [1]
( mapping = [#gpu.thread<z>] )
transform.structured.fuse_into_containing_op %fill_1d into %foreach_thread_block_combiner_op
- %fill_2d = transform.structured.match ops{["linalg.fill"]} filter_result_type = tensor<1x2xf32> in %variant_op
+ %fill_2d = transform.structured.match ops{["linalg.fill"]} filter_result_type = tensor<1x2xf32> in %variant_op : (!pdl.operation) -> !pdl.operation
%grid_more_parallel_op = transform.structured.match ops{["linalg.generic"]}
- attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]} in %variant_op
+ attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]} in %variant_op : (!pdl.operation) -> !pdl.operation
%foreach_thread_block_more_parallel_op, %block_more_parallel_op =
transform.structured.tile_to_foreach_thread_op %grid_more_parallel_op tile_sizes [1, 1]
( mapping = [#gpu.thread<z>, #gpu.thread<y>] )
@@ -37,7 +37,7 @@
// Step 4. Rank-reduce and vectorize.
// ===========================================================================
- %func = transform.structured.match ops{["func.func"]} in %variant_op
+ %func = transform.structured.match ops{["func.func"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%func_2 = transform.iree.apply_patterns %func { rank_reducing_linalg, rank_reducing_vector }
%func_3 = transform.structured.vectorize %func_2
@@ -46,12 +46,12 @@
%func_4 = transform.iree.apply_patterns %func_3 { fold_reassociative_reshapes }
%variant_op_2 = transform.iree.eliminate_empty_tensors %variant_op
%variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op_2
- %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
transform.iree.erase_hal_descriptor_type_from_memref %memref_func
// Step 6. Post-bufferization mapping to blocks and threads.
// ===========================================================================
- %func_5 = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %func_5 = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
%func_6 = transform.iree.foreach_thread_to_workgroup %func_5
%func_7 = transform.iree.map_nested_foreach_thread_to_gpu_threads %func_6
{ workgroup_size = [32, 2, 1] }
@@ -59,7 +59,7 @@
// Step 7. Post-bufferization vector distribution with rank-reduction.
// ===========================================================================
%func_8 = transform.iree.apply_patterns %func_7 { rank_reducing_linalg, rank_reducing_vector, fold_memref_aliases }
- %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3
+ %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
// Don't complain about unsupported if (threadIdx.x == 0 && threadIdx.y == 0)
// at this point.
transform.sequence %variant_op_3 : !pdl.operation failures(suppress) {
diff --git a/tests/transform_dialect/cuda/reduction_eltwise_codegen_spec.mlir b/tests/transform_dialect/cuda/reduction_eltwise_codegen_spec.mlir
index b3df80b..69354fb 100644
--- a/tests/transform_dialect/cuda/reduction_eltwise_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/reduction_eltwise_codegen_spec.mlir
@@ -2,11 +2,11 @@
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%variant_op: !pdl.operation):
- %fill = transform.structured.match ops{["linalg.fill"]} in %variant_op
+ %fill = transform.structured.match ops{["linalg.fill"]} in %variant_op : (!pdl.operation) -> !pdl.operation
// Step 1. Split the reduction to get meatier (size(red) / 2)-way parallelism.
// ===========================================================================
- %0 = transform.structured.match ops{["linalg.generic"]} in %variant_op
+ %0 = transform.structured.match ops{["linalg.generic"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%reduction, %eltwise = transform.split_handles %0 in [2] : (!pdl.operation) -> (!pdl.operation, !pdl.operation)
%init_or_alloc_op, %more_parallel_fill_op, %more_parallel_op, %combiner_op =
transform.structured.split_reduction %reduction
@@ -22,18 +22,18 @@
// Step 3. Second level of tiling + fusion parallelizes to threads.
// ===========================================================================
- %fill_1d = transform.structured.match ops{["linalg.fill"]} filter_result_type = tensor<1xf32> in %variant_op
+ %fill_1d = transform.structured.match ops{["linalg.fill"]} filter_result_type = tensor<1xf32> in %variant_op : (!pdl.operation) -> !pdl.operation
%eltwise_block_loop, %eltwise_block_op =
transform.structured.tile_to_foreach_thread_op %eltwise_grid_op tile_sizes [1]
( mapping = [#gpu.thread<z>] )
%block_combiner_op = transform.structured.match ops{["linalg.generic"]}
- attributes {iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]} in %variant_op
+ attributes {iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]} in %variant_op : (!pdl.operation) -> !pdl.operation
%combined_and_fill = transform.merge_handles %fill_1d, %block_combiner_op : !pdl.operation
transform.structured.fuse_into_containing_op %combined_and_fill into %eltwise_block_loop
- %fill_2d = transform.structured.match ops{["linalg.fill"]} filter_result_type = tensor<1x2xf32> in %variant_op
+ %fill_2d = transform.structured.match ops{["linalg.fill"]} filter_result_type = tensor<1x2xf32> in %variant_op : (!pdl.operation) -> !pdl.operation
%grid_more_parallel_op = transform.structured.match ops{["linalg.generic"]}
- attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]} in %variant_op
+ attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]} in %variant_op : (!pdl.operation) -> !pdl.operation
%foreach_thread_block_more_parallel_op, %block_more_parallel_op =
transform.structured.tile_to_foreach_thread_op %grid_more_parallel_op tile_sizes [1, 1]
( mapping = [#gpu.thread<z>, #gpu.thread<y>] )
@@ -41,7 +41,7 @@
// Step 4. Rank-reduce and vectorize.
// ===========================================================================
- %func = transform.structured.match ops{["func.func"]} in %variant_op
+ %func = transform.structured.match ops{["func.func"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%func_2 = transform.iree.apply_patterns %func { rank_reducing_linalg, rank_reducing_vector }
%func_3 = transform.structured.vectorize %func_2
@@ -50,12 +50,12 @@
%func_4 = transform.iree.apply_patterns %func_3 { fold_reassociative_reshapes }
%variant_op_2 = transform.iree.eliminate_empty_tensors %variant_op
%variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op_2
- %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
transform.iree.erase_hal_descriptor_type_from_memref %memref_func
// Step 6. Post-bufferization mapping to blocks and threads.
// ===========================================================================
- %func_5 = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %func_5 = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
%func_6 = transform.iree.foreach_thread_to_workgroup %func_5
%func_7 = transform.iree.map_nested_foreach_thread_to_gpu_threads %func_6
{ workgroup_size = [32, 2, 1] }
@@ -63,7 +63,7 @@
// Step 7. Post-bufferization vector distribution with rank-reduction.
// ===========================================================================
%func_8 = transform.iree.apply_patterns %func_7 { rank_reducing_linalg, rank_reducing_vector, fold_memref_aliases }
- %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3
+ %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
// Don't complain about unsupported if (threadIdx.x == 0 && threadIdx.y == 0)
// at this point.
transform.sequence %variant_op_3 : !pdl.operation failures(suppress) {
diff --git a/tests/transform_dialect/cuda/reduction_v2_codegen_spec.mlir b/tests/transform_dialect/cuda/reduction_v2_codegen_spec.mlir
index 73b0e56..8224a2e 100644
--- a/tests/transform_dialect/cuda/reduction_v2_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/reduction_v2_codegen_spec.mlir
@@ -2,8 +2,8 @@
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%variant_op: !pdl.operation):
- %fill = transform.structured.match ops{["linalg.fill"]} in %variant_op
- %reduction = transform.structured.match ops{["linalg.generic"]} in %variant_op
+ %fill = transform.structured.match ops{["linalg.fill"]} in %variant_op : (!pdl.operation) -> !pdl.operation
+ %reduction = transform.structured.match ops{["linalg.generic"]} in %variant_op : (!pdl.operation) -> !pdl.operation
// Step 1. First level of tiling + fusion parallelizes to blocks.
// ===========================================================================
@@ -34,7 +34,7 @@
// Step 4. Rank-reduce and vectorize.
// ===========================================================================
- %func = transform.structured.match ops{["func.func"]} in %variant_op
+ %func = transform.structured.match ops{["func.func"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%func_2 = transform.iree.apply_patterns %func { rank_reducing_linalg, rank_reducing_vector }
%func_3 = transform.structured.vectorize %func_2
@@ -42,15 +42,15 @@
// ===========================================================================
%func_4 = transform.iree.apply_patterns %func_3 { fold_reassociative_reshapes }
%variant_op_2 = transform.iree.eliminate_empty_tensors %variant_op
- %func_5 = transform.structured.match ops{["func.func"]} in %variant_op_2
+ %func_5 = transform.structured.match ops{["func.func"]} in %variant_op_2 : (!pdl.operation) -> !pdl.operation
%func_6 = transform.iree.apply_patterns %func_5 { erase_unnecessary_tensor_operands }
%variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op_2
- %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
transform.iree.erase_hal_descriptor_type_from_memref %memref_func
// Step 6. Post-bufferization mapping to blocks and threads.
// ===========================================================================
- %func_7 = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %func_7 = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
%func_8 = transform.iree.foreach_thread_to_workgroup %func_7
%func_9 = transform.iree.map_nested_foreach_thread_to_gpu_threads %func_8
{ workgroup_size = [32, 1, 1] }
@@ -58,7 +58,7 @@
// Step 7. Post-bufferization vector distribution with rank-reduction.
// ===========================================================================
%func_10 = transform.iree.apply_patterns %func_9 { rank_reducing_linalg, rank_reducing_vector, fold_memref_aliases }
- %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3
+ %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
%warp = transform.iree.vector.to_warp_execute_on_lane_0 %if_op { warp_size = 32 }
transform.iree.vector.warp_distribute %func_10
}
diff --git a/tests/transform_dialect/cuda/reduction_v3_codegen_spec.mlir b/tests/transform_dialect/cuda/reduction_v3_codegen_spec.mlir
index 6704bb8..867db8e 100644
--- a/tests/transform_dialect/cuda/reduction_v3_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/reduction_v3_codegen_spec.mlir
@@ -2,8 +2,8 @@
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%variant_op: !pdl.operation):
- %fill = transform.structured.match ops{["linalg.fill"]} in %variant_op
- %reduction = transform.structured.match ops{["linalg.generic"]} in %variant_op
+ %fill = transform.structured.match ops{["linalg.fill"]} in %variant_op : (!pdl.operation) -> !pdl.operation
+ %reduction = transform.structured.match ops{["linalg.generic"]} in %variant_op : (!pdl.operation) -> !pdl.operation
// Step 1. First level of tiling + fusion parallelizes to blocks.
// ===========================================================================
@@ -31,7 +31,7 @@
// Step 3. Rank-reduce and vectorize.
// ===========================================================================
- %func = transform.structured.match ops{["func.func"]} in %variant_op
+ %func = transform.structured.match ops{["func.func"]} in %variant_op : (!pdl.operation) -> !pdl.operation
// TODO: masked vectorization on block_more_parallel_op_2 if we want
// vector<4> to work as intended.
%func_2 = transform.iree.apply_patterns %func { rank_reducing_linalg, rank_reducing_vector }
@@ -41,15 +41,15 @@
// ===========================================================================
%func_4 = transform.iree.apply_patterns %func_3 { fold_reassociative_reshapes }
%variant_op_2 = transform.iree.eliminate_empty_tensors %variant_op
- %func_5 = transform.structured.match ops{["func.func"]} in %variant_op_2
+ %func_5 = transform.structured.match ops{["func.func"]} in %variant_op_2 : (!pdl.operation) -> !pdl.operation
%func_6 = transform.iree.apply_patterns %func_5 { erase_unnecessary_tensor_operands }
%variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op_2
- %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
transform.iree.erase_hal_descriptor_type_from_memref %memref_func
// Step 5. Post-bufferization mapping to blocks and threads.
// ===========================================================================
- %func_7 = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %func_7 = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
%func_8 = transform.iree.foreach_thread_to_workgroup %func_7
%func_9 = transform.iree.map_nested_foreach_thread_to_gpu_threads %func_8
{ workgroup_size = [1024, 1, 1] }
@@ -57,7 +57,7 @@
// Step 6. Post-bufferization vector distribution with rank-reduction.
// ===========================================================================
%func_10 = transform.iree.apply_patterns %func_9 { rank_reducing_linalg, rank_reducing_vector, fold_memref_aliases }
- %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3
+ %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
%warp = transform.iree.vector.to_warp_execute_on_lane_0 %if_op { warp_size = 32 }
transform.iree.vector.warp_distribute %func_10
}
diff --git a/tests/transform_dialect/cuda/softmax_codegen_spec.mlir b/tests/transform_dialect/cuda/softmax_codegen_spec.mlir
index 6c65303..9ace54f 100644
--- a/tests/transform_dialect/cuda/softmax_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/softmax_codegen_spec.mlir
@@ -4,7 +4,7 @@
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%variant_op: !pdl.operation):
%ops = transform.structured.match ops{["linalg.fill", "linalg.generic"]}
- in %variant_op
+ in %variant_op : (!pdl.operation) -> !pdl.operation
%input_max_fill,
%input_max,
%exps_sum_fill,
@@ -41,7 +41,7 @@
// Step 2. Second level of tiling + fusion parallelizes to threads.
// ================================================================
%tiled_ops = transform.structured.match ops{["linalg.fill", "linalg.generic"]}
- in %variant_op
+ in %variant_op : (!pdl.operation) -> !pdl.operation
%tiled_input_max_fill,
%tiled_input_max,
%tiled_exps_sum_fill,
@@ -70,7 +70,7 @@
// Step 3. Rank-reduce and vectorize.
// ==================================
- %func = transform.structured.match ops{["func.func"]} in %variant_op
+ %func = transform.structured.match ops{["func.func"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%funcx = transform.iree.apply_patterns %func { rank_reducing_linalg, rank_reducing_vector }
transform.structured.vectorize %funcx
@@ -78,21 +78,21 @@
// =========================================================
%variant_op_2 = transform.iree.eliminate_empty_tensors %variant_op
%variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op_2
- %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
transform.iree.erase_hal_descriptor_type_from_memref %memref_func
// Step 5. Post-bufferization mapping to blocks and threads.
// =========================================================
- %func_2 = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %func_2 = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
%func_3 = transform.iree.foreach_thread_to_workgroup %func_2
transform.iree.map_nested_foreach_thread_to_gpu_threads %func_3
{ workgroup_size = [32, 4, 1] }
// Step 6. Post-bufferization vector distribution with rank-reduction.
// ===================================================================
- %end_func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %end_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
%end_func_2 = transform.iree.apply_patterns %end_func { rank_reducing_linalg, rank_reducing_vector, fold_memref_aliases }
- %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3
+ %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
%warp = transform.iree.vector.to_warp_execute_on_lane_0 %if_op { warp_size = 32 }
transform.iree.vector.warp_distribute %end_func_2
}
diff --git a/tests/transform_dialect/cuda/softmax_dispatch_spec.mlir b/tests/transform_dialect/cuda/softmax_dispatch_spec.mlir
index 18282b8..efa5e5d 100644
--- a/tests/transform_dialect/cuda/softmax_dispatch_spec.mlir
+++ b/tests/transform_dialect/cuda/softmax_dispatch_spec.mlir
@@ -4,7 +4,7 @@
transform.structured.canonicalized_sequence failures(propagate){
^bb1(%variant_op: !pdl.operation):
%ops = transform.structured.match ops{["linalg.fill", "linalg.generic"]}
- in %variant_op
+ in %variant_op : (!pdl.operation) -> !pdl.operation
%input_max_fill, %input_max, %exps_sum_fill, %exps, %exps_sum, %div =
transform.split_handles %ops in [6]
@@ -20,7 +20,7 @@
: !pdl.operation
%region_op_2 = transform.iree.move_preceding_op_into_dispatch_region %non_div into %region_op
- %empty = transform.structured.match ops{["tensor.empty"]} in %variant_op
+ %empty = transform.structured.match ops{["tensor.empty"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%region_op_3 = transform.iree.move_preceding_op_into_dispatch_region %empty into %region_op_2
transform.iree.region_to_workgroups %region_op_3
}
diff --git a/tests/transform_dialect/cuda/softmax_partial_codegen_spec.mlir b/tests/transform_dialect/cuda/softmax_partial_codegen_spec.mlir
index 4b9b92f..46a4fe9 100644
--- a/tests/transform_dialect/cuda/softmax_partial_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/softmax_partial_codegen_spec.mlir
@@ -7,10 +7,10 @@
// Step 1. First level of tiling + fusion parallelizes to blocks.
// ==============================================================
%root = transform.structured.match interface{LinalgOp}
- attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>]} in %variant_op
- %fill = transform.structured.match ops{["linalg.fill"]} in %variant_op
+ attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>]} in %variant_op : (!pdl.operation) -> !pdl.operation
+ %fill = transform.structured.match ops{["linalg.fill"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%red = transform.structured.match interface{LinalgOp}
- attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]} in %variant_op
+ attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]} in %variant_op : (!pdl.operation) -> !pdl.operation
%not_root = merge_handles %fill, %red : !pdl.operation
%foreach_thread, %tiled_generic =
transform.iree.tile_to_foreach_thread_and_workgroup_count_region %root tile_sizes [1, 4]
@@ -19,11 +19,11 @@
// Step 2. Second level of tiling + fusion parallelizes to threads.
// ================================================================
- %fill_linalg = transform.structured.match ops{["linalg.fill"]} in %variant_op
+ %fill_linalg = transform.structured.match ops{["linalg.fill"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%reduction_linalg = transform.structured.match ops{["linalg.generic"]}
- attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]} in %variant_op
+ attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]} in %variant_op : (!pdl.operation) -> !pdl.operation
%parallel_linalg = transform.structured.match ops{["linalg.generic"]}
- attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>]} in %variant_op
+ attributes{iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>]} in %variant_op : (!pdl.operation) -> !pdl.operation
%foreach_thread_reduction, %tiled_reduction_generic =
transform.structured.tile_to_foreach_thread_op %reduction_linalg tile_sizes [1, 1]
( mapping = [#gpu.thread<z>, #gpu.thread<y>] )
@@ -53,7 +53,7 @@
// Step 3. Rank-reduce and vectorize.
// ==================================
- %func = transform.structured.match ops{["func.func"]} in %variant_op
+ %func = transform.structured.match ops{["func.func"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%funcx = transform.iree.apply_patterns %func { rank_reducing_linalg, rank_reducing_vector }
transform.structured.vectorize %funcx
@@ -61,21 +61,21 @@
// =========================================================
%variant_op_2 = transform.iree.eliminate_empty_tensors %variant_op
%variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op_2
- %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
transform.iree.erase_hal_descriptor_type_from_memref %memref_func
// Step 5. Post-bufferization mapping to blocks and threads.
// =========================================================
- %func_2 = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %func_2 = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
%func_3 = transform.iree.foreach_thread_to_workgroup %func_2
transform.iree.map_nested_foreach_thread_to_gpu_threads %func_3
{ workgroup_size = [32, 4, 1] }
// Step 6. Post-bufferization vector distribution with rank-reduction.
// ===================================================================
- %end_func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %end_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
%end_func_2 = transform.iree.apply_patterns %end_func { rank_reducing_linalg, rank_reducing_vector, fold_memref_aliases }
- %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3
+ %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
%warp = transform.iree.vector.to_warp_execute_on_lane_0 %if_op { warp_size = 32 }
transform.iree.vector.warp_distribute %end_func_2
}
diff --git a/tests/transform_dialect/cuda/softmax_v2_codegen_spec.mlir b/tests/transform_dialect/cuda/softmax_v2_codegen_spec.mlir
index 9fe3573..8e44a16 100644
--- a/tests/transform_dialect/cuda/softmax_v2_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/softmax_v2_codegen_spec.mlir
@@ -4,7 +4,7 @@
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%variant_op: !pdl.operation):
%ops = transform.structured.match ops{["linalg.fill", "linalg.generic"]}
- in %variant_op
+ in %variant_op : (!pdl.operation) -> !pdl.operation
%input_max_fill,
%input_max,
%exps_sum_fill,
@@ -34,7 +34,7 @@
// Step 2. Second level of tiling + fusion parallelizes to threads.
// ================================================================
%tiled_ops = transform.structured.match ops{["linalg.fill", "linalg.generic"]}
- in %variant_op
+ in %variant_op : (!pdl.operation) -> !pdl.operation
%tiled_input_max_fill,
%tiled_input_max,
%tiled_exps_sum_fill,
@@ -61,7 +61,7 @@
// Step 3. Rank-reduce and vectorize.
// ==================================
- %funcx_2 = transform.structured.match ops{["func.func"]} in %variant_op
+ %funcx_2 = transform.structured.match ops{["func.func"]} in %variant_op : (!pdl.operation) -> !pdl.operation
%funcx_3 = transform.iree.apply_patterns %funcx_2 { rank_reducing_linalg, rank_reducing_vector }
transform.structured.vectorize %funcx_3
@@ -69,21 +69,21 @@
// =========================================================
%variant_op_2 = transform.iree.eliminate_empty_tensors %variant_op
%variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op_2
- %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
transform.iree.erase_hal_descriptor_type_from_memref %memref_func
// Step 5. Post-bufferization mapping to blocks and threads.
// =========================================================
- %func_2 = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %func_2 = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
%func_3 = transform.iree.foreach_thread_to_workgroup %func_2
transform.iree.map_nested_foreach_thread_to_gpu_threads %func_3
{ workgroup_size = [32, 4, 1] }
// Step 6. Post-bufferization vector distribution with rank-reduction.
// ===================================================================
- %end_func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %end_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
%end_func_2 = transform.iree.apply_patterns %end_func { rank_reducing_linalg, rank_reducing_vector, fold_memref_aliases }
- %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3
+ %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
%warp = transform.iree.vector.to_warp_execute_on_lane_0 %if_op { warp_size = 32 }
transform.iree.vector.warp_distribute %end_func_2
}
diff --git a/tests/transform_dialect/cuda/vecadd2d_codegen_spec.mlir b/tests/transform_dialect/cuda/vecadd2d_codegen_spec.mlir
index b193678..f55dac8 100644
--- a/tests/transform_dialect/cuda/vecadd2d_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/vecadd2d_codegen_spec.mlir
@@ -2,21 +2,21 @@
^bb1(%variant_op: !pdl.operation):
// Step 1. Find three linalg.generics and tile to GPU thread blocks.
// ===========================================================================
- %generics = transform.structured.match ops{["linalg.generic"]} in %variant_op
+ %generics = transform.structured.match ops{["linalg.generic"]} in %variant_op : (!pdl.operation) -> !pdl.operation
transform.iree.tile_to_foreach_thread_and_workgroup_count_region %generics
tile_sizes [5, 3] ( mapping = [#gpu.block<z>, #gpu.block<x>])
// Step 2. Rank reduce and bufferize and drop HAL decriptor from memref ops.
// ===========================================================================
- %func = transform.structured.match ops{["func.func"]} in %variant_op
+ %func = transform.structured.match ops{["func.func"]} in %variant_op : (!pdl.operation) -> !pdl.operation
transform.iree.apply_patterns %func { rank_reducing_linalg, rank_reducing_vector }
%variant_op_2 = transform.iree.eliminate_empty_tensors %variant_op
%variant_op_3 = transform.iree.bufferize { target_gpu } %variant_op_2
- %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
transform.iree.erase_hal_descriptor_type_from_memref %memref_func
// Step 3. Map to GPU thread blocks.
// ===========================================================================
- %func_2 = transform.structured.match ops{["func.func"]} in %variant_op_3
+ %func_2 = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!pdl.operation) -> !pdl.operation
transform.iree.foreach_thread_to_workgroup %func_2
}
diff --git a/tests/transform_dialect/cuda/vecadd2d_codegen_spec_partial_tile.mlir b/tests/transform_dialect/cuda/vecadd2d_codegen_spec_partial_tile.mlir
index 75e2dad..84836dd 100644
--- a/tests/transform_dialect/cuda/vecadd2d_codegen_spec_partial_tile.mlir
+++ b/tests/transform_dialect/cuda/vecadd2d_codegen_spec_partial_tile.mlir
@@ -1,6 +1,6 @@
transform.structured.canonicalized_sequence failures(propagate) {
^bb1(%variant_op: !pdl.operation):
- %generics = transform.structured.match ops{["linalg.generic"]} in %variant_op
+ %generics = transform.structured.match ops{["linalg.generic"]} in %variant_op : (!pdl.operation) -> !pdl.operation
// Tile only one dimension, skip the other one.
transform.iree.tile_to_foreach_thread_and_workgroup_count_region %generics
tile_sizes [0, 3] ( mapping = [#gpu.block<z>])
diff --git a/third_party/llvm-project b/third_party/llvm-project
index 603c286..db2aad4 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 603c286334b07f568d39f6706c848f576914f323
+Subproject commit db2aad4f41a2e90ecdbddd054aa752028f54d19c
diff --git a/third_party/mlir-hlo b/third_party/mlir-hlo
index fb7ef6b..b21bc81 160000
--- a/third_party/mlir-hlo
+++ b/third_party/mlir-hlo
@@ -1 +1 @@
-Subproject commit fb7ef6b8707687cdb993f121b53da656c02aface
+Subproject commit b21bc81642e5201349ff18830e9c8574c0646b16