Fix and enable broken tests (#10347)

This commit fixes #10309.
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir
index 0658d7d..ed99f19 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/transform_dialect_bufferize.mlir
@@ -35,7 +35,7 @@
 
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%variant_op: !pdl.operation):
     transform.iree.bufferize %variant_op
   }
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_bufferize.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_bufferize.mlir
index e1153d8..ad1370a 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_bufferize.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_bufferize.mlir
@@ -28,7 +28,7 @@
   }
   transform.with_pdl_patterns {
   ^bb0(%arg0: !pdl.operation):
-    transform.structured.canonicalized_sequence %arg0 {
+    transform.structured.canonicalized_sequence %arg0 failures(propagate) {
     ^bb1(%variant_op: !pdl.operation):
       transform.iree.bufferize { target_gpu } %variant_op
     }
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_bufferize_spec.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_bufferize_spec.mlir
index 562f30d..24c98ce 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_bufferize_spec.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_bufferize_spec.mlir
@@ -1,6 +1,6 @@
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%variant_op: !pdl.operation):
     transform.iree.bufferize %variant_op
   }
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_foreach_to_gpu_spec.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_foreach_to_gpu_spec.mlir
index b6b9b58..c7f5571 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_foreach_to_gpu_spec.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_foreach_to_gpu_spec.mlir
@@ -1,6 +1,6 @@
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%variant_op: !pdl.operation):
     %0 = transform.structured.match ops{["linalg.fill"]} in %variant_op
     %foreach_thread, %tiled_fill = transform.structured.tile_to_foreach_thread_op %0 num_threads [5, 1] (mapped to dims [1, 0, 2])
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_distribution_spec.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_distribution_spec.mlir
index 279b306..3a0135e 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_distribution_spec.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_distribution_spec.mlir
@@ -1,6 +1,6 @@
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %if_op = transform.structured.match ops{["scf.if"]} in %arg1
     %warp = transform.iree.vector.to_warp_execute_on_lane_0 %if_op { warp_size = 32 }
diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_warp_execute_on_lane_0_spec.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_warp_execute_on_lane_0_spec.mlir
index 554993f..e4c1a20 100644
--- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_warp_execute_on_lane_0_spec.mlir
+++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/transform_dialect_codegen_vector_warp_execute_on_lane_0_spec.mlir
@@ -1,6 +1,6 @@
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %if_op = transform.structured.match ops{["scf.if"]} in %arg1
     transform.iree.vector.to_warp_execute_on_lane_0 %if_op { warp_size = 32 }
diff --git a/compiler/src/iree/compiler/Dialect/Flow/TransformExtensions/FlowExtensions.cpp b/compiler/src/iree/compiler/Dialect/Flow/TransformExtensions/FlowExtensions.cpp
index cad0223..c1c0bfc 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/TransformExtensions/FlowExtensions.cpp
+++ b/compiler/src/iree/compiler/Dialect/Flow/TransformExtensions/FlowExtensions.cpp
@@ -82,9 +82,10 @@
 /// DispatchTensorStoreOps.
 /// Ops are inserted just before the `block` terminator.
 static void rewriteParallelInsertSlices(
-    PatternRewriter &rewriter, scf::PerformConcurrentlyOp performConcurrentlyOp,
-    Block &block, ValueRange resultTensorOperands,
-    ValueRange resultTensorsDynamicDims, BlockAndValueMapping tensorToFlowBvm) {
+    PatternRewriter &rewriter, scf::ForeachThreadOp foreachThreadOp,
+    scf::PerformConcurrentlyOp performConcurrentlyOp, Block &block,
+    ValueRange resultTensorOperands, ValueRange resultTensorsDynamicDims,
+    BlockAndValueMapping tensorToFlowBvm) {
   Location loc = performConcurrentlyOp.getLoc();
   int64_t resultIndex = 0;
   for (const Operation &yieldingOp :
@@ -94,11 +95,15 @@
     rewriter.setInsertionPoint(block.getTerminator());
     auto dynamicDims = Util::findVariadicDynamicDims(
         resultIndex, resultTensorOperands, resultTensorsDynamicDims);
+    BlockArgument destBbArg = parallelInsertOp.getDest().cast<BlockArgument>();
+    assert(destBbArg.getOwner()->getParentOp() == foreachThreadOp &&
+           "expected that dest is an output bbArg");
+    Value dest = foreachThreadOp.getTiedOpOperand(destBbArg)->get();
     // clang-format off
     rewriter.create<Flow::DispatchTensorStoreOp>(
         loc,
         parallelInsertOp.getSource(),
-        tensorToFlowBvm.lookup(cast<Value>(parallelInsertOp.getDest())),
+        tensorToFlowBvm.lookup(dest),
         dynamicDims,
         parallelInsertOp.getMixedOffsets(),
         parallelInsertOp.getMixedSizes(),
@@ -114,12 +119,18 @@
 /// dispatchOp as well as a BlockAndValueMapping from tensor operands to the
 /// corresponding Flow dispatch tensor bbArgs.
 static void rewriteExtractSlices(PatternRewriter &rewriter,
+                                 scf::ForeachThreadOp foreachThreadOp,
                                  Flow::DispatchWorkgroupsOp dispatchOp,
                                  ValueRange tensorOperands,
                                  ValueRange tensorDynamicDims,
                                  BlockAndValueMapping tensorToFlowBvm) {
   dispatchOp->walk([&](tensor::ExtractSliceOp extractSliceOp) {
     Value source = extractSliceOp.getSource();
+    if (auto sourceBbArg = source.dyn_cast<BlockArgument>())
+      if (sourceBbArg.getOwner()->getParentOp() ==
+          foreachThreadOp.getOperation())
+        source = foreachThreadOp.getTiedOpOperand(sourceBbArg)->get();
+
     auto it = llvm::find(tensorOperands, source);
     if (it == tensorOperands.end()) return;
     int64_t index = std::distance(tensorOperands.begin(), it);
@@ -251,7 +262,8 @@
   llvm::SetVector<Value> resultTensorOperands, resultTensorsDynamicDims;
   for (const Operation &yieldingOp : performConcurrentlyOp.getYieldingOps()) {
     auto parallelInsertOp = cast<tensor::ParallelInsertSliceOp>(&yieldingOp);
-    Value dest = parallelInsertOp.getDest();
+    BlockArgument destBbArg = parallelInsertOp.getDest().cast<BlockArgument>();
+    Value dest = foreachThreadOp.getTiedOpOperand(destBbArg)->get();
     bool inserted = resultTensorOperands.insert(dest);
     if (!inserted) continue;
     auto dynamicDims =
@@ -283,6 +295,15 @@
     for (int64_t dim : getIndicesOfDynamicDims(tensorType))
       tensorDynamicDims.push_back(rewriter.create<tensor::DimOp>(loc, v, dim));
   }
+  // Also add shared outputs. (These are usually already added as result
+  // tensor operands.)
+  for (Value v : foreachThreadOp.getOutputs()) {
+    auto tensorType = v.getType().cast<RankedTensorType>();
+    if (resultTensorOperands.contains(v)) continue;
+    tensorOperands.push_back(v);
+    for (int64_t dim : getIndicesOfDynamicDims(tensorType))
+      tensorDynamicDims.push_back(rewriter.create<tensor::DimOp>(loc, v, dim));
+  }
 
   // Step 3. Create ordered vectors of operands to pass to the builder and
   // build the dispatchOp. The dispatchOp is created with an empty
@@ -409,11 +430,11 @@
 
   // Step 9. Rewrite tensor::ExtractSlice and ParallelInsert ops to the
   // relevant Flow DispatchTensorLoad/Store version.
-  rewriteParallelInsertSlices(rewriter, performConcurrentlyOp, *block,
-                              resultTensorOperands.getArrayRef(),
+  rewriteParallelInsertSlices(rewriter, foreachThreadOp, performConcurrentlyOp,
+                              *block, resultTensorOperands.getArrayRef(),
                               resultTensorsDynamicDims.getArrayRef(),
                               tensorToFlowBvm);
-  rewriteExtractSlices(rewriter, dispatchOp, allTensorOperands,
+  rewriteExtractSlices(rewriter, foreachThreadOp, dispatchOp, allTensorOperands,
                        allTensorDynamicDims, tensorToFlowBvm);
 
   // Step 10. Perform RAUWIf.
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/BUILD b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/BUILD
index 5eeacbb..cc93982 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/BUILD
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/BUILD
@@ -27,6 +27,7 @@
             "dispatch_linalg_on_tensors_fusion.mlir",
             "dispatch_linalg_on_tensors_fusion_reduction_broadcast_elementwise.mlir",
             "dispatch_linalg_on_tensors_fusion_with_transpose.mlir",
+            "dispatch_linalg_transform_dialect.mlir",
             "expand_tensor_shapes.mlir",
             "export_benchmark_funcs.mlir",
             "infer_numeric_narrowing.mlir",
@@ -50,8 +51,6 @@
         # transform_dialect_dispatch_spec is a an MLIR file that specifies a
         # transformation, it needs to be included as data.
         exclude = [
-            # TODO(#10309): Enable the test.
-            "dispatch_linalg_transform_dialect.mlir",
             "transform_dialect_dispatch_spec.mlir",
         ],
     ),
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/CMakeLists.txt b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/CMakeLists.txt
index 877ca93..a5c9cc9 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/CMakeLists.txt
@@ -25,6 +25,7 @@
     "dispatch_linalg_on_tensors_fusion.mlir"
     "dispatch_linalg_on_tensors_fusion_reduction_broadcast_elementwise.mlir"
     "dispatch_linalg_on_tensors_fusion_with_transpose.mlir"
+    "dispatch_linalg_transform_dialect.mlir"
     "expand_tensor_shapes.mlir"
     "export_benchmark_funcs.mlir"
     "infer_numeric_narrowing.mlir"
diff --git a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/transform_dialect_dispatch_spec.mlir b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/transform_dialect_dispatch_spec.mlir
index f82f888..2274e22 100644
--- a/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/transform_dialect_dispatch_spec.mlir
+++ b/compiler/src/iree/compiler/Dialect/Flow/Transforms/test/transform_dialect_dispatch_spec.mlir
@@ -1,6 +1,6 @@
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1
     %foreach_op, %tiled_op = transform.structured.tile_to_foreach_thread_op %0 num_threads [42, 67]
diff --git a/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/LinalgTransform/StructuredTransformOpsExt.td b/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/LinalgTransform/StructuredTransformOpsExt.td
index 6401ef8..8c13d22 100644
--- a/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/LinalgTransform/StructuredTransformOpsExt.td
+++ b/llvm-external-projects/iree-dialects/include/iree-dialects/Dialect/LinalgTransform/StructuredTransformOpsExt.td
@@ -8,6 +8,7 @@
 #define STRUCTURED_TRANSFORM_OPS_EXT
 
 include "mlir/Dialect/PDL/IR/PDLTypes.td"
+include "mlir/Dialect/Transform/IR/TransformAttrs.td"
 include "mlir/Dialect/Transform/IR/TransformDialect.td"
 include "mlir/Dialect/Transform/IR/TransformInterfaces.td"
 include "mlir/Interfaces/ControlFlowInterfaces.td"
@@ -31,12 +32,13 @@
     after each step in the sequence.
   }];
 
-  let arguments = (ins Optional<PDL_Operation>:$root);
+  let arguments = (ins FailurePropagationMode:$failure_propagation_mode,
+                       Optional<PDL_Operation>:$root);
   let results = (outs Variadic<AnyType>:$results);
   let regions = (region SizedRegion<1>:$body);
 
   let assemblyFormat =
-    "($root^)? attr-dict-with-keyword regions (`:` type($results)^)?";
+    "($root^)? `failures` `(` $failure_propagation_mode `)` attr-dict-with-keyword regions (`:` type($results)^)?";
 
   let extraClassDeclaration = [{
     /// Allow the dialect prefix to be omitted.
@@ -50,7 +52,7 @@
 //===----------------------------------------------------------------------===//
 
 def BufferizeOp : Op<Transform_Dialect, "bufferize",
-    [DeclareOpInterfaceMethods<TransformOpInterface>, 
+    [DeclareOpInterfaceMethods<TransformOpInterface>,
      FunctionalStyleTransformOpTrait,
      MemoryEffectsOpInterface,
      TransformOpInterface]> {
@@ -60,7 +62,7 @@
 }
 
 def LowerVectorsOp : Op<Transform_Dialect, "lower_vectors",
-    [DeclareOpInterfaceMethods<TransformOpInterface>, 
+    [DeclareOpInterfaceMethods<TransformOpInterface>,
      FunctionalStyleTransformOpTrait,
      MemoryEffectsOpInterface]> {
   let description = [{Indicates that the vector operations in the entire
diff --git a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp
index ffc93c9..850d9b2 100644
--- a/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp
+++ b/llvm-external-projects/iree-dialects/lib/Dialect/LinalgTransform/IR/StructuredTransformOpsExt.cpp
@@ -551,10 +551,17 @@
   for (Operation &transform : getBodyBlock()->without_terminator()) {
     DiagnosedSilenceableFailure result =
         state.applyTransform(cast<transform::TransformOpInterface>(transform));
-    if (!result.succeeded()) {
+    if (result.isDefiniteFailure()) {
       LLVM_DEBUG(DBGS() << "failed: " << transform << "\n");
       return result;
     }
+    if (result.isSilenceableFailure()) {
+      LLVM_DEBUG(DBGS() << "failed silently: " << transform << "\n");
+      if (getFailurePropagationMode() ==
+          transform::FailurePropagationMode::Propagate)
+        return result;
+      (void)result.silence();
+    }
     LLVM_DEBUG(DBGS() << "successfully performed: " << transform << "\n");
 
     if (failed(checkedListenerTransform(performCSE)))
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-async.mlir b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-async.mlir
index b3ad37c..dda41e1 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-async.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-async.mlir
@@ -59,7 +59,7 @@
       %2 = operation "scf.foreach_thread"(%0 : !pdl.range<value>)  -> (%1 : !pdl.range<type>)
       rewrite %2 with "transform.dialect"
     }
-    transform.structured.canonicalized_sequence %arg0 {
+    transform.structured.canonicalized_sequence %arg0 failures(propagate) {
     ^bb1(%arg1: !pdl.operation):
       %0 = pdl_match @match_foreach_thread in %arg1
       %1 = foreach_thread_to_async %0
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-scf-for.mlir b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-scf-for.mlir
index abf454e..d5a3704 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-scf-for.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/foreach-thread-to-scf-for.mlir
@@ -54,7 +54,7 @@
       %2 = operation "scf.foreach_thread"(%0 : !pdl.range<value>)  -> (%1 : !pdl.range<type>)
       rewrite %2 with "transform.dialect"
     }
-    transform.structured.canonicalized_sequence %arg0 {
+    transform.structured.canonicalized_sequence %arg0 failures(propagate) {
     ^bb1(%arg1: !pdl.operation):
       %0 = pdl_match @match_foreach_thread in %arg1
       %1 = foreach_thread_to_scf_for %0
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/fuse-operands.mlir b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/fuse-operands.mlir
index e856c2f..b536936 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/fuse-operands.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/iree_linalg_ext/fuse-operands.mlir
@@ -52,7 +52,7 @@
       %2 = operation "scf.foreach_thread"(%0 : !pdl.range<value>)  -> (%1 : !pdl.range<type>)
       rewrite %2 with "transform.dialect"
     }
-    transform.structured.canonicalized_sequence %arg0 {
+    transform.structured.canonicalized_sequence %arg0 failures(propagate) {
     ^bb1(%arg1: !pdl.operation):
       %0 = pdl_match @match_elemwise in %arg1
       %1, %fusedOps:2 = fuse_producers %0 {operands_to_fuse=[0, 1]}
@@ -113,7 +113,7 @@
       %2 = operation "scf.foreach_thread"(%0 : !pdl.range<value>)  -> (%1 : !pdl.range<type>)
       rewrite %2 with "transform.dialect"
     }
-    transform.structured.canonicalized_sequence %arg0 {
+    transform.structured.canonicalized_sequence %arg0 failures(propagate) {
     ^bb1(%arg1: !pdl.operation):
       %0 = pdl_match @match_elemwise in %arg1
       %1, %fusedOps = fuse_producers %0 {operands_to_fuse=[0]}
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/bufferize.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/bufferize.mlir
index f1bd0b5..0486119 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/bufferize.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/bufferize.mlir
@@ -30,7 +30,7 @@
     rewrite %0 with "transform.dialect"
   }
 
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     bufferize
   }
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/double-tiling.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/double-tiling.mlir
index d7211a7..bfbab68 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/double-tiling.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/double-tiling.mlir
@@ -35,7 +35,7 @@
     rewrite %0 with "transform.dialect"
   }
 
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = pdl_match @pdl_target in %arg1
     %1, %loops1:3 = transform.structured.tile %0 [32, 32, 32] {interchange = [0, 2, 1]}
@@ -77,7 +77,7 @@
     rewrite %0 with "transform.dialect"
   }
 
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = pdl_match @pdl_target in %arg1
     %1, %loops1:3 = transform.structured.tile %0 [32, 32, 32] {interchange = [0, 2, 1]}
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/drop-schedule.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/drop-schedule.mlir
index 5e05630..0c1daa2 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/drop-schedule.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/drop-schedule.mlir
@@ -23,7 +23,7 @@
   }
 
   // CHECK-NOT: canonicalized_sequence
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = pdl_match @pdl_target in %arg1
     transform.structured.tile %0 [4, 4, 4] {pad = false}
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/failure.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/failure.mlir
index add3bd4..127fc96 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/failure.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/failure.mlir
@@ -13,7 +13,7 @@
     rewrite %0 with "transform.dialect"
   }
 
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = pdl_match @some_operation in %arg1
     // Make sure we don't crash on wrong operation type.
@@ -59,7 +59,7 @@
     rewrite %0 with "transform.dialect"
   }
 
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb0(%arg1: !pdl.operation):
     // expected-note @below {{handle}}
     %0 = pdl_match @pdl_target1 in %arg1
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/fuse-and-peel.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/fuse-and-peel.mlir
index 64921fa..62ad6cd 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/fuse-and-peel.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/fuse-and-peel.mlir
@@ -30,7 +30,7 @@
     rewrite %0 with "transform.dialect"
   }
 
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = pdl_match @pdl_target in %arg1
     %1, %loops:2 = transform.structured.fuse %0 {tile_sizes = [32, 32], tile_interchange = [0, 1]}
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/fuse.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/fuse.mlir
index 23dfef8..2fd69de 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/fuse.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/fuse.mlir
@@ -27,7 +27,7 @@
     rewrite %0 with "transform.dialect"
   }
 
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = pdl_match @pdl_target in %arg1
     %1, %loops:2 = transform.structured.fuse %0 {tile_sizes = [32, 32], tile_interchange = [0, 1]}
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/generalize.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/generalize.mlir
index 7261c79..e8ff91f 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/generalize.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/generalize.mlir
@@ -23,7 +23,7 @@
     rewrite %0 with "transform.dialect"
   }
 
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = pdl_match @pdl_target in %arg1
     transform.structured.generalize %0
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/interchange.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/interchange.mlir
index d88297e..d126809 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/interchange.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/interchange.mlir
@@ -30,7 +30,7 @@
     rewrite %0 with "transform.dialect"
   }
 
-  transform.structured.canonicalized_sequence %root {
+  transform.structured.canonicalized_sequence %root failures(propagate) {
   ^bb0(%arg0: !pdl.operation):
     %0 = pdl_match @pdl_target in %arg0
     transform.structured.interchange %0 {iterator_interchange = [1, 0]}
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/invalid.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/invalid.mlir
index ae64416..ab9d12f 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/invalid.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/invalid.mlir
@@ -1,6 +1,6 @@
 // RUN: iree-dialects-opt %s --split-input-file -verify-diagnostics
 
-transform.structured.canonicalized_sequence {
+transform.structured.canonicalized_sequence failures(propagate) {
 ^bb0(%arg0: !pdl.operation):
   %0 = pdl_match @match in %arg0
   // expected-error@below {{expects iterator_interchange to be a permutation, found [1, 1]}}
@@ -9,7 +9,7 @@
 
 // -----
 
-transform.structured.canonicalized_sequence {
+transform.structured.canonicalized_sequence failures(propagate) {
 ^bb0(%arg0: !pdl.operation):
   %0 = pdl_match @match in %arg0
   // expected-error@below {{expected 'tile_sizes' attribute}}
@@ -18,7 +18,7 @@
 
 // -----
 
-transform.structured.canonicalized_sequence {
+transform.structured.canonicalized_sequence failures(propagate) {
 ^bb0(%arg0: !pdl.operation):
   %0 = pdl_match @match in %arg0
   // expected-error@below {{expects interchange to be a permutation, found [1, 1]}}
@@ -27,7 +27,7 @@
 
 // -----
 
-transform.structured.canonicalized_sequence {
+transform.structured.canonicalized_sequence failures(propagate) {
 ^bb0(%arg0: !pdl.operation):
   %0 = pdl_match @match in %arg0
   // expected-error@below {{expects pack_paddings to contain booleans (0/1), found [1, 7]}}
@@ -36,7 +36,7 @@
 
 // -----
 
-transform.structured.canonicalized_sequence {
+transform.structured.canonicalized_sequence failures(propagate) {
 ^bb0(%arg0: !pdl.operation):
   %0 = pdl_match @match in %arg0
   // expected-error@below {{expects hoist_paddings to contain positive integers, found [1, -7]}}
@@ -45,7 +45,7 @@
 
 // -----
 
-transform.structured.canonicalized_sequence {
+transform.structured.canonicalized_sequence failures(propagate) {
 ^bb0(%arg0: !pdl.operation):
   %0 = pdl_match @match in %arg0
   // expected-error@below {{expects transpose_paddings to be a permutation, found [1, 1]}}
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/pad.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/pad.mlir
index 48657f5..858a01d 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/pad.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/pad.mlir
@@ -44,7 +44,7 @@
     rewrite %0 with "transform.dialect"
   }
 
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = pdl_match @pdl_target in %arg1
     %1 = transform.structured.pad %0 {padding_values=[0.0 : f32, 0.0 : f32], padding_dimensions=[1], pack_paddings=[1, 1], hoist_paddings=[1, 0], transpose_paddings=[[1, 0], [0, 1]]}
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/peel.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/peel.mlir
index 326a266..c736220 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/peel.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/peel.mlir
@@ -45,7 +45,7 @@
     rewrite %0 with "transform.dialect"
   }
 
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = pdl_match @pdl_target in %arg1
     transform.loop.peel %0
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/print.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/print.mlir
index 3611548..f11060a 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/print.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/print.mlir
@@ -3,7 +3,7 @@
 // CHECK-LABEL: IR printer: test print
 // CHECK-NEXT:  module
 // CHECK-NEXT:  transform.structured.canonicalized_sequence
-transform.structured.canonicalized_sequence {
+transform.structured.canonicalized_sequence failures(propagate) {
 ^bb0(%arg0: !pdl.operation):
   print {name = "test print"}
 }
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/roundtrip.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/roundtrip.mlir
index bc4cf11..547ab70 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/roundtrip.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/roundtrip.mlir
@@ -1,7 +1,7 @@
 // RUN: iree-dialects-opt %s | FileCheck %s
 
 // CHECK: transform.structured.canonicalized_sequence
-transform.structured.canonicalized_sequence {
+transform.structured.canonicalized_sequence failures(propagate) {
 ^bb0(%arg0: !pdl.operation):
   // CHECK: %[[OPS:.*]] = pdl_match @match1 in %{{.*}}
   %0 = pdl_match @match1 in %arg0
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/scalarize.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/scalarize.mlir
index 127b59f..9fc49c2 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/scalarize.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/scalarize.mlir
@@ -20,7 +20,7 @@
     rewrite %2 with "transform.dialect"
   }
 
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = pdl_match @isa_linalg.matmul in %arg1
     %tiled_linalg_op, %loops:3 = transform.structured.tile %0 [6, 16, 32] {interchange = [1, 0, 2]}
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/selective-targeting.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/selective-targeting.mlir
index ef1aa0d..518dc9f 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/selective-targeting.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/selective-targeting.mlir
@@ -79,7 +79,7 @@
     rewrite %0 with "transform.dialect"
   }
 
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = pdl_match @pdl_target_attrA in %arg1
     transform.structured.tile %0 [4, 4, 4]
@@ -126,7 +126,7 @@
     rewrite %0 with "transform.dialect"
   }
 
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = pdl_match @pdl_target in %arg1
     %1 = get_closest_isolated_parent %0
@@ -153,7 +153,7 @@
   return %1 : tensor<128x128xf32>
 }
 
-transform.structured.canonicalized_sequence {
+transform.structured.canonicalized_sequence failures(propagate) {
 ^bb0(%arg0: !pdl.operation):
   transform.structured.vectorize %arg0
 }
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/single-tiling-full-script.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/single-tiling-full-script.mlir
index 9bb0ec9..45f889a 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/single-tiling-full-script.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/single-tiling-full-script.mlir
@@ -26,7 +26,7 @@
     rewrite %0 with "transform.dialect"
   }
 
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = pdl_match @pdl_target in %arg1
     %1, %loops:3 = transform.structured.tile %0 [4, 4, 4]
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/tile-and-peel.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/tile-and-peel.mlir
index 185ba16..68deb28 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/tile-and-peel.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/tile-and-peel.mlir
@@ -37,7 +37,7 @@
     rewrite %0 with "transform.dialect"
   }
 
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = pdl_match @pdl_target in %arg1
     %linalg_op, %loops:3 = transform.structured.tile %0 [4, 4, 4]
@@ -47,7 +47,7 @@
     // version of %loop#2.
     // Peeling #0 first is currently not possible as it will invalidate all the
     // nested handles.
-    // TODO: extra arguments to specify parts of IR that should not be 
+    // TODO: extra arguments to specify parts of IR that should not be
     // invalidated when we know that the transform updates in-place.
     transform.loop.peel %loops#2
     transform.loop.peel %loops#0
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/tile-interchange.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/tile-interchange.mlir
index 4b0b601..121dc1c 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/tile-interchange.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/tile-interchange.mlir
@@ -30,7 +30,7 @@
     rewrite %2 with "transform.dialect"
   }
 
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = pdl_match @target_pattern in %arg1
     %1, %loops1:3 = transform.structured.tile %0 [3, 5, 14] {interchange = [0, 2, 1]}
@@ -72,7 +72,7 @@
     rewrite %2 with "transform.dialect"
   }
 
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = pdl_match @target_pattern in %arg1
     %1, %loops1:3 = transform.structured.tile %0 [3, 5, 14] {interchange = [2, 1, 0]}
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/tile.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/tile.mlir
index c207310..86b11ec 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/tile.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/tile.mlir
@@ -40,7 +40,7 @@
     rewrite %0 with "transform.dialect"
   }
 
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = pdl_match @pdl_target in %arg1
     %1, %loops:3 = transform.structured.tile %0 [4, 4, 4]
diff --git a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/vectorize-transforms.mlir b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/vectorize-transforms.mlir
index 955f09d..a025273 100644
--- a/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/vectorize-transforms.mlir
+++ b/llvm-external-projects/iree-dialects/test/Dialect/linalg_transform/vectorize-transforms.mlir
@@ -13,7 +13,7 @@
     rewrite %0 with "transform.dialect"
   }
 
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = pdl_match @pdl_target in %arg1
     %1 = get_closest_isolated_parent %0
diff --git a/tests/e2e/linalg_transform/BUILD b/tests/e2e/linalg_transform/BUILD
index 3098af3..79f17c1 100644
--- a/tests/e2e/linalg_transform/BUILD
+++ b/tests/e2e/linalg_transform/BUILD
@@ -4,30 +4,29 @@
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-# load("//build_tools/bazel:iree_lit_test.bzl", "iree_lit_test_suite")
+load("//build_tools/bazel:iree_lit_test.bzl", "iree_lit_test_suite")
 
 package(
     features = ["layering_check"],
     licenses = ["notice"],  # Apache 2.0
 )
 
-# TODO#(10309): Enable the test.
-# iree_lit_test_suite(
-#     name = "check_linalg_transform",
-#     srcs = ["linalg_transform.mlir"],
-#     cfg = "//tests:lit.cfg.py",
-#     # transform_dialect_xxx_spec are MLIR files that specify a transformation,
-#     # they need to be included as data.
-#     data = [
-#         "//tests/e2e/linalg_transform:transform_dialect_codegen_spec.mlir",
-#         "//tests/e2e/linalg_transform:transform_dialect_dispatch_spec.mlir",
-#     ],
-#     tags = [
-#         "hostonly",
-#     ],
-#     tools = [
-#         "//tools:iree-run-mlir",
-#         "@llvm-project//lld",
-#         "@llvm-project//llvm:FileCheck",
-#     ],
-# )
+iree_lit_test_suite(
+    name = "check_linalg_transform",
+    srcs = ["linalg_transform.mlir"],
+    cfg = "//tests:lit.cfg.py",
+    # transform_dialect_xxx_spec are MLIR files that specify a transformation,
+    # they need to be included as data.
+    data = [
+        "//tests/e2e/linalg_transform:transform_dialect_codegen_spec.mlir",
+        "//tests/e2e/linalg_transform:transform_dialect_dispatch_spec.mlir",
+    ],
+    tags = [
+        "hostonly",
+    ],
+    tools = [
+        "//tools:iree-run-mlir",
+        "@llvm-project//lld",
+        "@llvm-project//llvm:FileCheck",
+    ],
+)
diff --git a/tests/e2e/linalg_transform/CMakeLists.txt b/tests/e2e/linalg_transform/CMakeLists.txt
index fd5930e..174db3e 100644
--- a/tests/e2e/linalg_transform/CMakeLists.txt
+++ b/tests/e2e/linalg_transform/CMakeLists.txt
@@ -10,4 +10,20 @@
 
 iree_add_all_subdirs()
 
+iree_lit_test_suite(
+  NAME
+    check_linalg_transform
+  SRCS
+    "linalg_transform.mlir"
+  TOOLS
+    ${IREE_LLD_TARGET}
+    FileCheck
+    iree-run-mlir
+  DATA
+    iree::tests::e2e::linalg_transform::transform_dialect_codegen_spec.mlir
+    iree::tests::e2e::linalg_transform::transform_dialect_dispatch_spec.mlir
+  LABELS
+    "hostonly"
+)
+
 ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/tests/e2e/linalg_transform/transform_dialect_codegen_spec.mlir b/tests/e2e/linalg_transform/transform_dialect_codegen_spec.mlir
index 562f30d..24c98ce 100644
--- a/tests/e2e/linalg_transform/transform_dialect_codegen_spec.mlir
+++ b/tests/e2e/linalg_transform/transform_dialect_codegen_spec.mlir
@@ -1,6 +1,6 @@
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%variant_op: !pdl.operation):
     transform.iree.bufferize %variant_op
   }
diff --git a/tests/e2e/linalg_transform/transform_dialect_dispatch_spec.mlir b/tests/e2e/linalg_transform/transform_dialect_dispatch_spec.mlir
index 1c26c05..633eeb5 100644
--- a/tests/e2e/linalg_transform/transform_dialect_dispatch_spec.mlir
+++ b/tests/e2e/linalg_transform/transform_dialect_dispatch_spec.mlir
@@ -1,6 +1,6 @@
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1
     %foreach_op, %tiled_op = transform.structured.tile_to_foreach_thread_op %0 num_threads [13, 33]
diff --git a/tests/transform_dialect/cpu/BUILD b/tests/transform_dialect/cpu/BUILD
index 750e88c..a6387ad 100644
--- a/tests/transform_dialect/cpu/BUILD
+++ b/tests/transform_dialect/cpu/BUILD
@@ -6,36 +6,35 @@
 
 # Tests for end-to-end IREE support of entire models or their close derivatives.
 
-# load("//build_tools/bazel:iree_lit_test.bzl", "iree_lit_test_suite")
+load("//build_tools/bazel:iree_lit_test.bzl", "iree_lit_test_suite")
 
 package(
     features = ["layering_check"],
     licenses = ["notice"],  # Apache 2.0
 )
 
-# TODO(#10309): Enable the test
-# iree_lit_test_suite(
-#     name = "lit",
-#     srcs = ["matmul.mlir"],
-#     cfg = "//tests:lit.cfg.py",
-#     # transform dialect spec files are MLIR files that specify a transformation,
-#     # they need to be included as data.
-#     data = [
-#         "matmul_codegen_spec.mlir",
-#         "matmul_dispatch_spec.mlir",
-#         "matmul_tiled_dispatch_spec.mlir",
-#     ],
-#     tags = [
-#         "noasan",
-#         "nomsan",
-#         "notsan",
-#         "noubsan",
-#     ],
-#     tools = [
-#         "//tools:iree-benchmark-module",
-#         "//tools:iree-compile",
-#         "//tools:iree-opt",
-#         "//tools:iree-run-module",
-#         "@llvm-project//llvm:FileCheck",
-#     ],
-# )
+iree_lit_test_suite(
+    name = "lit",
+    srcs = ["matmul.mlir"],
+    cfg = "//tests:lit.cfg.py",
+    # transform dialect spec files are MLIR files that specify a transformation,
+    # they need to be included as data.
+    data = [
+        "matmul_codegen_spec.mlir",
+        "matmul_dispatch_spec.mlir",
+        "matmul_tiled_dispatch_spec.mlir",
+    ],
+    tags = [
+        "noasan",
+        "nomsan",
+        "notsan",
+        "noubsan",
+    ],
+    tools = [
+        "//tools:iree-benchmark-module",
+        "//tools:iree-compile",
+        "//tools:iree-opt",
+        "//tools:iree-run-module",
+        "@llvm-project//llvm:FileCheck",
+    ],
+)
diff --git a/tests/transform_dialect/cpu/CMakeLists.txt b/tests/transform_dialect/cpu/CMakeLists.txt
index 160ed1c..130bd8e 100644
--- a/tests/transform_dialect/cpu/CMakeLists.txt
+++ b/tests/transform_dialect/cpu/CMakeLists.txt
@@ -10,4 +10,26 @@
 
 iree_add_all_subdirs()
 
+iree_lit_test_suite(
+  NAME
+    lit
+  SRCS
+    "matmul.mlir"
+  TOOLS
+    FileCheck
+    iree-benchmark-module
+    iree-compile
+    iree-opt
+    iree-run-module
+  DATA
+    matmul_codegen_spec.mlir
+    matmul_dispatch_spec.mlir
+    matmul_tiled_dispatch_spec.mlir
+  LABELS
+    "noasan"
+    "nomsan"
+    "notsan"
+    "noubsan"
+)
+
 ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/tests/transform_dialect/cpu/matmul_codegen_spec.mlir b/tests/transform_dialect/cpu/matmul_codegen_spec.mlir
index 24227f8..1435bfb 100644
--- a/tests/transform_dialect/cpu/matmul_codegen_spec.mlir
+++ b/tests/transform_dialect/cpu/matmul_codegen_spec.mlir
@@ -1,17 +1,17 @@
-// RUN: iree-opt %s 
+// RUN: iree-opt %s
 
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%variant_op: !pdl.operation):
     %0 = transform.structured.match ops{["linalg.matmul"]} in %variant_op
 
-    %foreach_thread, %tiled_generic = 
+    %foreach_thread, %tiled_generic =
       transform.structured.tile_to_foreach_thread_op %0 num_threads [2]
-    
-    transform.iree.bufferize %variant_op
-    
-    %func = transform.structured.match ops{["func.func"]} in %variant_op
+
+    %1 = transform.iree.bufferize %variant_op
+
+    %func = transform.structured.match ops{["func.func"]} in %1
     transform.iree.foreach_thread_to_workgroup %func
   }
 }
diff --git a/tests/transform_dialect/cpu/matmul_tiled_dispatch_spec.mlir b/tests/transform_dialect/cpu/matmul_tiled_dispatch_spec.mlir
index 0c33097..80b735d 100644
--- a/tests/transform_dialect/cpu/matmul_tiled_dispatch_spec.mlir
+++ b/tests/transform_dialect/cpu/matmul_tiled_dispatch_spec.mlir
@@ -1,6 +1,6 @@
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1
     %foreach_op, %tiled_op = transform.structured.tile_to_foreach_thread_op %0 num_threads [10, 20]
diff --git a/tests/transform_dialect/cuda/BUILD b/tests/transform_dialect/cuda/BUILD
index 0aeaead..5c3321f 100644
--- a/tests/transform_dialect/cuda/BUILD
+++ b/tests/transform_dialect/cuda/BUILD
@@ -7,7 +7,7 @@
 # Tests for end-to-end IREE support of entire models or their close derivatives.
 
 load("//build_tools/bazel:build_defs.oss.bzl", "iree_cmake_extra_content")
-# load("//build_tools/bazel:iree_lit_test.bzl", "iree_lit_test_suite")
+load("//build_tools/bazel:iree_lit_test.bzl", "iree_lit_test_suite")
 
 package(
     features = ["layering_check"],
@@ -23,35 +23,34 @@
     inline = True,
 )
 
-# TODO(#10309): Enable the test
-# iree_lit_test_suite(
-#     name = "lit",
-#     srcs = [
-#         "reduction.mlir",
-#         "softmax.mlir",
-#     ],
-#     cfg = "//tests:lit.cfg.py",
-#     # transform dialect spec files are MLIR files that specify a transformation,
-#     # they need to be included as data.
-#     data = [
-#         "reduction_codegen_spec.mlir",
-#         "reduction_dispatch_spec.mlir",
-#         "softmax_codegen_spec.mlir",
-#         "softmax_dispatch_spec.mlir",
-#     ],
-#     tags = [
-#         # CUDA cuInit fails with sanitizer on.
-#         "noasan",
-#         "nomsan",
-#         "notsan",
-#         "noubsan",
-#         "requires-gpu-nvidia",
-#         "driver=cuda",
-#     ],
-#     tools = [
-#         "//tools:iree-compile",
-#         "//tools:iree-opt",
-#         "//tools:iree-run-module",
-#         "@llvm-project//llvm:FileCheck",
-#     ],
-# )
+iree_lit_test_suite(
+    name = "lit",
+    srcs = [
+        "reduction.mlir",
+        "softmax.mlir",
+    ],
+    cfg = "//tests:lit.cfg.py",
+    # transform dialect spec files are MLIR files that specify a transformation,
+    # they need to be included as data.
+    data = [
+        "reduction_codegen_spec.mlir",
+        "reduction_dispatch_spec.mlir",
+        "softmax_codegen_spec.mlir",
+        "softmax_dispatch_spec.mlir",
+    ],
+    tags = [
+        # CUDA cuInit fails with sanitizer on.
+        "noasan",
+        "nomsan",
+        "notsan",
+        "noubsan",
+        "requires-gpu-nvidia",
+        "driver=cuda",
+    ],
+    tools = [
+        "//tools:iree-compile",
+        "//tools:iree-opt",
+        "//tools:iree-run-module",
+        "@llvm-project//llvm:FileCheck",
+    ],
+)
diff --git a/tests/transform_dialect/cuda/CMakeLists.txt b/tests/transform_dialect/cuda/CMakeLists.txt
index 79331ad..c947020 100644
--- a/tests/transform_dialect/cuda/CMakeLists.txt
+++ b/tests/transform_dialect/cuda/CMakeLists.txt
@@ -14,4 +14,29 @@
   return()
 endif()
 
+iree_lit_test_suite(
+  NAME
+    lit
+  SRCS
+    "reduction.mlir"
+    "softmax.mlir"
+  TOOLS
+    FileCheck
+    iree-compile
+    iree-opt
+    iree-run-module
+  DATA
+    reduction_codegen_spec.mlir
+    reduction_dispatch_spec.mlir
+    softmax_codegen_spec.mlir
+    softmax_dispatch_spec.mlir
+  LABELS
+    "noasan"
+    "nomsan"
+    "notsan"
+    "noubsan"
+    "requires-gpu-nvidia"
+    "driver=cuda"
+)
+
 ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/tests/transform_dialect/cuda/reduction_codegen_spec.mlir b/tests/transform_dialect/cuda/reduction_codegen_spec.mlir
index b4f5707..b2e834d 100644
--- a/tests/transform_dialect/cuda/reduction_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/reduction_codegen_spec.mlir
@@ -1,26 +1,26 @@
-// RUN: iree-opt %s 
+// RUN: iree-opt %s
 
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%variant_op: !pdl.operation):
     %0 = transform.structured.match ops{["linalg.generic"]} in %variant_op
     %fused_fill = transform.structured.match ops{["linalg.fill"]} in %variant_op
     // Note: split by 32 to vector-distribute the tail combiner_op, but
     // split by 2 to vector-distribute the meaty %more_parallel_op
-    %init_or_alloc_op, %fill_op, %more_parallel_op, %combiner_op = 
-      transform.structured.split_reduction %0 
+    %init_or_alloc_op, %fill_op, %more_parallel_op, %combiner_op =
+      transform.structured.split_reduction %0
         { split_factor = 2, insert_split_dimension = 1, use_alloc }
-    
+
     %1 = transform.structured.match ops{["linalg.generic"]} in %variant_op
-    %foreach_thread_1, %tiled_fill = 
+    %foreach_thread_1, %tiled_fill =
       transform.structured.tile_to_foreach_thread_op %fill_op num_threads [4, 2] (mapped to dims [2, 1, 0])
-    %foreach_thread_2, %tiled_more_parallel_op = 
+    %foreach_thread_2, %tiled_more_parallel_op =
        transform.structured.tile_to_foreach_thread_op %more_parallel_op num_threads [4, 2] (mapped to dims [2, 1, 0])
-    %foreach_thread_3, %tiled_combiner_op = 
+    %foreach_thread_3, %tiled_combiner_op =
       transform.structured.tile_to_foreach_thread_op %combiner_op num_threads [4] (mapped to dims [2, 1, 0])
-    %foreach_thread_4, %tiled_fused_fill_op = 
-      transform.structured.tile_to_foreach_thread_op %fused_fill num_threads [4] (mapped to dims [2, 1, 0])      
+    %foreach_thread_4, %tiled_fused_fill_op =
+      transform.structured.tile_to_foreach_thread_op %fused_fill num_threads [4] (mapped to dims [2, 1, 0])
 
     %isolated_handle_1 = transform.get_closest_isolated_parent %foreach_thread_2
     %isolated_handle_2 = transform.structured.vectorize %isolated_handle_1
@@ -28,15 +28,16 @@
 
     %variant_op_2 = transform.iree.bufferize { target_gpu } %variant_op
 
-    %isolated_handle_4 = 
-      transform.iree.foreach_thread_to_gpu_and_translation_info %isolated_handle_3 
+    %funcop = transform.structured.match ops{["func.func"]} in %variant_op_2
+    %isolated_handle_4 =
+      transform.iree.foreach_thread_to_gpu_and_translation_info %funcop
         { workgroup_size = [32, 2, 4] }
-    
+
     // Vector distribution needs to happen on buffers.
     %if_op = transform.structured.match ops{["scf.if"]} in %variant_op_2
     %warp = transform.iree.vector.to_warp_execute_on_lane_0 %if_op { warp_size = 32 }
     transform.iree.vector.warp_distribute %isolated_handle_4
-    
+
     // transform.print { name = "after codegen"}
   }
 }
diff --git a/tests/transform_dialect/cuda/reduction_dispatch_spec.mlir b/tests/transform_dialect/cuda/reduction_dispatch_spec.mlir
index bc74d6c..353342e 100644
--- a/tests/transform_dialect/cuda/reduction_dispatch_spec.mlir
+++ b/tests/transform_dialect/cuda/reduction_dispatch_spec.mlir
@@ -1,8 +1,8 @@
-// RUN: iree-opt %s 
+// RUN: iree-opt %s
 
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%arg1: !pdl.operation):
     %0 = transform.structured.match ops{["linalg.generic"]} in %arg1
     %foreach_thread, %tiled_generic = transform.structured.tile_to_foreach_thread_op %0 num_threads [2]
diff --git a/tests/transform_dialect/cuda/softmax_codegen_spec.mlir b/tests/transform_dialect/cuda/softmax_codegen_spec.mlir
index 01c6d47..6582475 100644
--- a/tests/transform_dialect/cuda/softmax_codegen_spec.mlir
+++ b/tests/transform_dialect/cuda/softmax_codegen_spec.mlir
@@ -1,36 +1,36 @@
-// RUN: iree-opt %s 
+// RUN: iree-opt %s
 
 // Codegen
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate) {
   ^bb1(%variant_op: !pdl.operation):
     // First level of tiling + fusion parallelizes to blocks.
-    // The mapping  to block ids can only happen after bufferization atm 
-    %root = transform.structured.match interface{LinalgOp} 
+    // The mapping  to block ids can only happen after bufferization atm
+    %root = transform.structured.match interface{LinalgOp}
       attributes{iterator_types = ["parallel", "parallel", "parallel"]} in %variant_op
     %fill = transform.structured.match ops{["linalg.fill"]} in %variant_op
-    %red = transform.structured.match interface{LinalgOp} 
+    %red = transform.structured.match interface{LinalgOp}
       attributes{iterator_types = ["parallel", "parallel", "reduction"]} in %variant_op
     %not_root = merge_handles %fill, %red
-    %foreach_thread, %tiled_generic = 
+    %foreach_thread, %tiled_generic =
       transform.structured.tile_to_foreach_thread_op %root tile_sizes [1, 4]
     transform.structured.fuse_into_containing_op %not_root into %foreach_thread
-    
+
     // Second level of tiling + fusion parallelizes to threads.
-    // Leaving the reduction untiled on threadIdx.x makes it sequential on 
+    // Leaving the reduction untiled on threadIdx.x makes it sequential on
     // threadIdx.x. After distribution, predication by if (threadIdx.x == 0) is
     // introduced and opportunities for distributing vector ops across warps
     // appear.
     %fill_linalg = transform.structured.match ops{["linalg.fill"]} in %variant_op
-    %reduction_linalg = transform.structured.match ops{["linalg.generic"]} 
+    %reduction_linalg = transform.structured.match ops{["linalg.generic"]}
       attributes{iterator_types = ["parallel", "parallel", "reduction"]} in %variant_op
-    %parallel_linalg = transform.structured.match ops{["linalg.generic"]} 
+    %parallel_linalg = transform.structured.match ops{["linalg.generic"]}
       attributes{iterator_types = ["parallel", "parallel", "parallel"]} in %variant_op
-    %foreach_thread_reduction, %tiled_reduction_generic = 
+    %foreach_thread_reduction, %tiled_reduction_generic =
       transform.structured.tile_to_foreach_thread_op %reduction_linalg tile_sizes [1, 1]
         (mapped to dims [2, 1, 0])
-    // TODO: this fusion currently does not happen properly, this is related to the clone 
+    // TODO: this fusion currently does not happen properly, this is related to the clone
     // behavior when fusing into scf.foreach_thread.
     // Once fixed we'll be able to fuse.
     // Fusion will save us one roundtrip to memory.
@@ -55,7 +55,7 @@
     // to enable the parallel reduction on warps.
     %func = transform.structured.match ops{["func.func"]} in %variant_op
     %func_2 = transform.structured.vectorize %func
-    
+
     // Bufferization is necessary for:
     //   1. lowering scf.foreach_thread to workgroup (block level parallelism)
     //   2. lowering scf.foreach_thread to gpu (thread level parallelism)
@@ -63,10 +63,11 @@
     //      warp_execute_on_lane_0 and later vector distribution.
     %variant_op_2 = transform.iree.bufferize { target_gpu } %variant_op
 
-    %func_3 = transform.iree.foreach_thread_to_workgroup %func_2
-    transform.iree.foreach_thread_to_gpu_and_translation_info %func_3
+    %func_3 = transform.structured.match ops{["func.func"]} in %variant_op_2
+    %func_4 = transform.iree.foreach_thread_to_workgroup %func_3
+    transform.iree.foreach_thread_to_gpu_and_translation_info %func_4
       { workgroup_size = [32, 4, 1] }
-    
+
     %end_func = transform.structured.match ops{["func.func"]} in %variant_op_2
     %end_func_2 = transform.iree.apply_patterns %end_func { rank_reducing }
 
diff --git a/tests/transform_dialect/cuda/softmax_dispatch_spec.mlir b/tests/transform_dialect/cuda/softmax_dispatch_spec.mlir
index c073496..8986ab1 100644
--- a/tests/transform_dialect/cuda/softmax_dispatch_spec.mlir
+++ b/tests/transform_dialect/cuda/softmax_dispatch_spec.mlir
@@ -1,14 +1,14 @@
-// RUN: iree-opt %s 
+// RUN: iree-opt %s
 
 // Dispatch softmax.
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
-  transform.structured.canonicalized_sequence %arg0 {
+  transform.structured.canonicalized_sequence %arg0 failures(propagate){
   ^bb1(%arg1: !pdl.operation):
-    %root = transform.structured.match interface{LinalgOp} 
+    %root = transform.structured.match interface{LinalgOp}
       attributes{iterator_types = ["parallel", "parallel", "parallel"]} in %arg1
     %fill = transform.structured.match ops{["linalg.fill"]} in %arg1
-    %red = transform.structured.match interface{LinalgOp} 
+    %red = transform.structured.match interface{LinalgOp}
       attributes{iterator_types = ["parallel", "parallel", "reduction"]} in %arg1
 
     // TODO: this could be replaced by a C++ only version.