[Codegen] Remove deprecated transform.iree.match_callback tests (#24500)

Part of #24466 (sub-task: remove deprecated tests under
`compiler/src/iree/compiler/Codegen/Common/test/`).

Removes 10 lit tests that depend on the retired
`transform.iree.match_callback` op and its supporting machinery
(`register_match_callbacks`, `take_first`, `emit_remark`) from
`llvm-external-projects/iree-dialects`.

- 7 files use the deprecated op directly
- 3 driver tests (`batch_matmuls.mlir`, `convolutions.mlir`,
`reductions.mlir`) have RUN lines that only invoke the deleted
`*_spec.mlir` files
- `BUILD.bazel` and `CMakeLists.txt` updated to drop matching
srcs/exclude/data entries
- Net diff: 12 files changed, 933 deletions(-), 0 insertions(+)
- No remaining source-tree references to the deleted files

Signed-off-by: Alex-Wengg <hanweng9@gmail.com>
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel
index ee1ee40..dbb4c96 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel
+++ b/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel
@@ -22,7 +22,6 @@
             "absorb_swizzle_hint_to_alloc.mlir",
             "add_fmfs.mlir",
             "affinemin_canonicalization.mlir",
-            "batch_matmuls.mlir",
             "block_dynamic_dims.mlir",
             "bubble_up_ordinal_ops.mlir",
             "bufferize_copy_only_dispatches.mlir",
@@ -40,7 +39,6 @@
             "convert_unsupported_float_to_int_buffers.mlir",
             "convert_workgroup_forall_to_pcf.mlir",
             "convolution_to_igemm.mlir",
-            "convolutions.mlir",
             "create_dispatch_config.mlir",
             "decompose_affine_ops.mlir",
             "decompose_boundary_pack_unpack_ops.mlir",
@@ -121,13 +119,11 @@
             "reconcile_translation_info.mlir",
             "reconcile_translation_info_linearize.mlir",
             "reconcile_translation_info_pure.mlir",
-            "reductions.mlir",
             "reinsert_swizzle_hints.mlir",
             "rematerialize_parallel_ops.mlir",
             "remove_dead_allocs.mlir",
             "remove_index_hints.mlir",
             "remove_single_iteration_loop.mlir",
-            "repeated_matcher_use.mlir",
             "replace_slow_min_max_ops.mlir",
             "resolve_swizzle_hints.mlir",
             "resolve_workgroup_count_hints.mlir",
@@ -141,8 +137,6 @@
             "transform_copy_operand.mlir",
             "transform_flatten_forall.mlir",
             "transform_hoist_forall.mlir",
-            "transform_match_partial_reduction.mlir",
-            "transform_ops_invalid.mlir",
             "transpose_canonicalization.mlir",
             "type_propagation.mlir",
             "unroll_annotated_loops.mlir",
@@ -158,12 +152,8 @@
         ],
         include = ["*.mlir"],
         exclude = [
-            "batch_matmul_match_spec.mlir",
-            "convolution_match_spec.mlir",
             "external_strategy_spec.mlir",
             "patch_func_ops_spec.mlir",
-            "reductions_codegen_spec.mlir",
-            "reductions_match_spec.mlir",
             "tuning_spec.mlir",
             "tuning_spec_default.mlir",
         ],
@@ -172,12 +162,8 @@
     # transform dialect spec files are MLIR files that specify a transformation,
     # they need to be included as data.
     data = [
-        "batch_matmul_match_spec.mlir",
-        "convolution_match_spec.mlir",
         "external_strategy_spec.mlir",
         "patch_func_ops_spec.mlir",
-        "reductions_codegen_spec.mlir",
-        "reductions_match_spec.mlir",
         "tuning_spec.mlir",
         "tuning_spec_default.mlir",
     ],
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt
index bb52d58..c950ca2 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt
@@ -17,7 +17,6 @@
     "absorb_swizzle_hint_to_alloc.mlir"
     "add_fmfs.mlir"
     "affinemin_canonicalization.mlir"
-    "batch_matmuls.mlir"
     "block_dynamic_dims.mlir"
     "bubble_up_ordinal_ops.mlir"
     "bufferize_copy_only_dispatches.mlir"
@@ -35,7 +34,6 @@
     "convert_unsupported_float_to_int_buffers.mlir"
     "convert_workgroup_forall_to_pcf.mlir"
     "convolution_to_igemm.mlir"
-    "convolutions.mlir"
     "create_dispatch_config.mlir"
     "decompose_affine_ops.mlir"
     "decompose_boundary_pack_unpack_ops.mlir"
@@ -116,13 +114,11 @@
     "reconcile_translation_info.mlir"
     "reconcile_translation_info_linearize.mlir"
     "reconcile_translation_info_pure.mlir"
-    "reductions.mlir"
     "reinsert_swizzle_hints.mlir"
     "rematerialize_parallel_ops.mlir"
     "remove_dead_allocs.mlir"
     "remove_index_hints.mlir"
     "remove_single_iteration_loop.mlir"
-    "repeated_matcher_use.mlir"
     "replace_slow_min_max_ops.mlir"
     "resolve_swizzle_hints.mlir"
     "resolve_workgroup_count_hints.mlir"
@@ -136,8 +132,6 @@
     "transform_copy_operand.mlir"
     "transform_flatten_forall.mlir"
     "transform_hoist_forall.mlir"
-    "transform_match_partial_reduction.mlir"
-    "transform_ops_invalid.mlir"
     "transpose_canonicalization.mlir"
     "type_propagation.mlir"
     "unroll_annotated_loops.mlir"
@@ -154,12 +148,8 @@
     FileCheck
     iree-opt
   DATA
-    batch_matmul_match_spec.mlir
-    convolution_match_spec.mlir
     external_strategy_spec.mlir
     patch_func_ops_spec.mlir
-    reductions_codegen_spec.mlir
-    reductions_match_spec.mlir
     tuning_spec.mlir
     tuning_spec_default.mlir
 )
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/batch_matmul_match_spec.mlir b/compiler/src/iree/compiler/Codegen/Common/test/batch_matmul_match_spec.mlir
deleted file mode 100644
index 6c4e12a..0000000
--- a/compiler/src/iree/compiler/Codegen/Common/test/batch_matmul_match_spec.mlir
+++ /dev/null
@@ -1,15 +0,0 @@
-// RUN: iree-opt %s
-
-module attributes { transform.with_named_sequence } {
-  transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) {
-    transform.iree.register_match_callbacks
-
-    %fill, %batch_matmul =
-      transform.iree.match_callback failures(propagate) "batch_matmul"(%root)
-      : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
-
-    transform.iree.emit_remark "fill" at %fill : !transform.any_op
-    transform.iree.emit_remark "batch matmul" at %batch_matmul : !transform.any_op
-    transform.yield
-  } // @__transform_main
-} // module
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/batch_matmuls.mlir b/compiler/src/iree/compiler/Codegen/Common/test/batch_matmuls.mlir
deleted file mode 100644
index 5f36f93..0000000
--- a/compiler/src/iree/compiler/Codegen/Common/test/batch_matmuls.mlir
+++ /dev/null
@@ -1,73 +0,0 @@
-// RUN: iree-opt %s \
-// RUN: --iree-transform-dialect-interpreter="library-file-name=%p/batch_matmul_match_spec.mlir" \
-// RUN: --split-input-file --verify-diagnostics
-
-!lhs = tensor<128x80x32xf32>
-!rhs = tensor<128x32x320xf32>
-!res = tensor<128x80x320xf32>
-
-func.func @batch_matmul_generic(%arg0: !lhs, %arg1: !rhs) -> !res {
-  %cst = arith.constant 0.000000e+00 : f32
-  %0 = tensor.empty() : !res
-  // expected-remark @below {{fill}}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : !res) -> !res
-  // expected-remark @below {{batch matmul}}
-  %2 = linalg.generic {
-    indexing_maps = [
-      affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>,
-      affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>,
-      affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-    ],
-    iterator_types = ["parallel", "parallel", "parallel", "reduction"]
-  } ins(%arg0, %arg1 : !lhs, !rhs) outs(%1 : !res) {
-  ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
-    %3 = arith.mulf %arg3, %arg4 : f32
-    %4 = arith.addf %arg5, %3 : f32
-    linalg.yield %4 : f32
-  } -> !res
-  return %2 : !res
-}
-
-// -----
-
-!lhs = tensor<128x80x32xf32>
-!rhs = tensor<128x32x320xf32>
-!res = tensor<128x80x320xf32>
-
-func.func @batch_matmul_named(%arg0: !lhs, %arg1: !rhs) -> !res {
-  %cst = arith.constant 0.000000e+00 : f32
-  %0 = tensor.empty() : !res
-  // expected-remark @below {{fill}}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : !res) -> !res
-  // expected-remark @below {{batch matmul}}
-  %2 = linalg.batch_matmul ins(%arg0, %arg1 : !lhs, !rhs) outs(%1 : !res) -> !res
-  return %2 : !res
-}
-
-// -----
-
-!lhs = tensor<80x128x32xf32>
-!rhs = tensor<128x32x320xf32>
-!res = tensor<80x320x128xf32>
-
-func.func @batch_matmul_generic_transposed(%arg0: !lhs, %arg1: !rhs) -> !res {
-  %cst = arith.constant 0.000000e+00 : f32
-  %0 = tensor.empty() : !res
-  // expected-remark @below {{fill}}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : !res) -> !res
-  // expected-remark @below {{batch matmul}}
-  %2 = linalg.generic {
-    indexing_maps = [
-      affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>,
-      affine_map<(d0, d1, d2, d3) -> (d2, d3, d1)>,
-      affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-    ],
-    iterator_types = ["parallel", "parallel", "parallel", "reduction"]
-  } ins(%arg0, %arg1 : !lhs, !rhs) outs(%1 : !res) {
-  ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
-    %3 = arith.mulf %arg3, %arg4 : f32
-    %4 = arith.addf %arg5, %3 : f32
-    linalg.yield %4 : f32
-  } -> !res
-  return %2 : !res
-}
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/convolution_match_spec.mlir b/compiler/src/iree/compiler/Codegen/Common/test/convolution_match_spec.mlir
deleted file mode 100644
index 5e96d6d..0000000
--- a/compiler/src/iree/compiler/Codegen/Common/test/convolution_match_spec.mlir
+++ /dev/null
@@ -1,16 +0,0 @@
-// RUN: iree-opt %s
-
-module attributes { transform.with_named_sequence } {
-  transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) {
-    transform.iree.register_match_callbacks
-
-    %fill, %convolution, %trailing =
-      transform.iree.match_callback failures(propagate) "convolution"(%root)
-      : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
-
-    transform.iree.emit_remark "fill" at %fill : !transform.any_op
-    transform.iree.emit_remark "convolution" at %convolution : !transform.any_op
-    transform.iree.emit_remark "trailing" at %trailing : !transform.any_op
-    transform.yield
-  }
-}
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/convolutions.mlir b/compiler/src/iree/compiler/Codegen/Common/test/convolutions.mlir
deleted file mode 100644
index 5182aa4..0000000
--- a/compiler/src/iree/compiler/Codegen/Common/test/convolutions.mlir
+++ /dev/null
@@ -1,81 +0,0 @@
-// RUN: iree-opt %s \
-// RUN: --iree-transform-dialect-interpreter="library-file-name=%p/convolution_match_spec.mlir" \
-// RUN: --split-input-file --verify-diagnostics
-
-
-!input_tensor_t = tensor<2x16x130x130xf32>
-!weight_tensor_t = tensor<32x16x3x3xf32>
-!output_tensor_t = tensor<2x32x128x128xf32>
-func.func @conv_2d_nchw_fchw_trailing_eltwise(%in: !input_tensor_t, %wei: !weight_tensor_t,
-                             %out: !output_tensor_t) -> !output_tensor_t {
-  // expected-remark @below {{convolution}}
-  %0 = linalg.conv_2d_nchw_fchw
-    {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> }
-     ins(%in, %wei: !input_tensor_t, !weight_tensor_t)
-    outs(%out: !output_tensor_t) -> !output_tensor_t
-
-  %1 = tensor.empty() : !output_tensor_t
-  // expected-remark @below {{trailing}}
-  %2 = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
-                     affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
-    iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-    ins(%0 : !output_tensor_t) outs(%1 : !output_tensor_t) {
-    ^bb0(%arg3: f32, %arg4: f32):
-      %3 = math.sqrt %arg3 : f32
-      linalg.yield %3 : f32
-    } -> !output_tensor_t
-  return %2 : !output_tensor_t
-}
-
-// -----
-
-!input_tensor_t = tensor<2x16x130x130xf32>
-!weight_tensor_t = tensor<32x16x3x3xf32>
-!output_tensor_t = tensor<2x32x128x128xf32>
-func.func @conv_2d_nchw_fchw_fill(%in: !input_tensor_t, %wei: !weight_tensor_t) -> !output_tensor_t {
-
-  %cst = arith.constant 0.000000e+00 : f32
-  %0 = tensor.empty() : !output_tensor_t
-  // expected-remark @below {{fill}}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : !output_tensor_t) -> !output_tensor_t
-
-  // expected-remark @below {{convolution}}
-  %2 = linalg.conv_2d_nchw_fchw
-    {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> }
-     ins(%in, %wei: !input_tensor_t, !weight_tensor_t)
-    outs(%1: !output_tensor_t) -> !output_tensor_t
-  return %2 : !output_tensor_t
-}
-
-// -----
-
-!input_tensor_t = tensor<2x130x130x16xf32>
-!weight_tensor_t = tensor<3x3x16x32xf32>
-!output_tensor_t = tensor<2x128x128x32xf32>
-func.func @conv_2d_nhwc_hwcf(%in: !input_tensor_t, %wei: !weight_tensor_t) -> !output_tensor_t {
-
-  %cst = arith.constant 0.000000e+00 : f32
-  %0 = tensor.empty() : !output_tensor_t
-  // expected-remark @below {{fill}}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : !output_tensor_t) -> !output_tensor_t
-
-  // expected-remark @below {{convolution}}
-  %2 = linalg.conv_2d_nhwc_hwcf
-    {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> }
-     ins(%in, %wei: !input_tensor_t, !weight_tensor_t)
-    outs(%1: !output_tensor_t) -> !output_tensor_t
-
-  %3 = tensor.empty() : !output_tensor_t
-  // expected-remark @below {{trailing}}
-  %4 = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
-                     affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
-    iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-    ins(%2 : !output_tensor_t) outs(%3 : !output_tensor_t) {
-    ^bb0(%arg3: f32, %arg4: f32):
-      %5 = math.sqrt %arg3 : f32
-      linalg.yield %5 : f32
-    } -> !output_tensor_t
-  return %4 : !output_tensor_t
-}
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/reductions.mlir b/compiler/src/iree/compiler/Codegen/Common/test/reductions.mlir
deleted file mode 100644
index 4f1b726..0000000
--- a/compiler/src/iree/compiler/Codegen/Common/test/reductions.mlir
+++ /dev/null
@@ -1,274 +0,0 @@
-// RUN: iree-opt %s --iree-transform-dialect-interpreter="library-file-name=%p/reductions_codegen_spec.mlir" --split-input-file | FileCheck %s
-// RUN: iree-opt %s --iree-transform-dialect-interpreter="library-file-name=%p/reductions_match_spec.mlir" --split-input-file --verify-diagnostics
-
-// Check that the same transform script applies to reductions with optional
-// leading and trailing elementwise operations, potentially reordered
-// producers and interleaving operations. This only checks for the matching
-// and the right fusion structure without the surrounding IREE dispatch, which
-// may fuse earlier, to decrease fragility.
-
-!in_tensor_t = tensor<8x64xf32>
-!out_tensor_t = tensor<8xf32>
-
-func.func @reduce(%arg : !in_tensor_t) -> (!out_tensor_t) {
-  %cst = arith.constant -0.000000e+00 : f32
-
-  %0 = tensor.empty() : !out_tensor_t
-  // expected-remark @below {{fill}}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : !out_tensor_t) ->   !out_tensor_t
-  // expected-remark @below {{reduction}}
-  %2 = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
-                     affine_map<(d0, d1) -> (d0)>],
-    iterator_types = ["parallel", "reduction"]}
-    ins(%arg : !in_tensor_t) outs(%1 : !out_tensor_t) {
-      ^bb0(%arg3: f32, %arg4: f32):
-        %3 = arith.addf %arg3, %arg4 : f32
-        linalg.yield %3 : f32
-      } -> !out_tensor_t
-  return %2 : !out_tensor_t
-}
-
-// CHECK-LABEL: @reduce
-// CHECK: scf.forall {{.*}} {
-// CHECK:   scf.forall {{.*}} {
-// CHECK:     linalg.fill
-// CHECK:     linalg.generic {{.*}} iterator_types = ["reduction"]
-// CHECK:   } {mapping = [#gpu.thread<z>, #gpu.thread<y>]}
-// CHECK:   scf.forall {{.*}} {
-// CHECK:     linalg.fill
-// CHECK:     linalg.generic {{.*}} iterator_types = ["reduction"]
-// CHECK:   } {mapping = [#gpu.thread<z>]}
-
-// -----
-
-!in_tensor_t = tensor<8x64xf32>
-!out_tensor_t = tensor<8xf32>
-
-func.func @eltwise_reduce(%arg : !in_tensor_t) -> (!out_tensor_t) {
-  %cst = arith.constant -0.000000e+00 : f32
-
-  %0 = tensor.empty() : !out_tensor_t
-  // expected-remark @below {{fill}}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : !out_tensor_t) ->  !out_tensor_t
-  %2 = tensor.empty() : !in_tensor_t
-  // expected-remark @below {{leading}}
-  %3 = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
-                     affine_map<(d0, d1) -> (d0, d1)>],
-    iterator_types = ["parallel", "parallel"]}
-    ins(%arg : !in_tensor_t) outs(%2 : !in_tensor_t) {
-    ^bb0(%arg3: f32, %arg4: f32):
-      %4 = arith.addf %arg3, %arg3 : f32
-      %5 = arith.addf %4, %4 : f32
-      linalg.yield %5 : f32
-    } -> !in_tensor_t
-
-  // expected-remark @below {{reduction}}
-  %6 = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
-                     affine_map<(d0, d1) -> (d0)>],
-    iterator_types = ["parallel", "reduction"]}
-    ins(%3 : !in_tensor_t) outs(%1 : !out_tensor_t) {
-      ^bb0(%arg3: f32, %arg4: f32):
-        %4 = arith.addf %arg3, %arg4 : f32
-        linalg.yield %4 : f32
-      } -> !out_tensor_t
-
-  return %6 : !out_tensor_t
-}
-
-// CHECK-LABEL: @eltwise_reduce
-// CHECK: scf.forall {{.*}} {
-// CHECK:   scf.forall {{.*}} {
-// CHECK:     linalg.generic {{.*}} iterator_types = ["parallel"]
-// CHECK:     linalg.fill
-// CHECK:     linalg.generic {{.*}} iterator_types = ["reduction"]
-// CHECK:   } {mapping = [#gpu.thread<z>, #gpu.thread<y>]}
-// CHECK:   scf.forall {{.*}} {
-// CHECK:     linalg.fill
-// CHECK:     linalg.generic {{.*}} iterator_types = ["reduction"]
-// CHECK:   } {mapping = [#gpu.thread<z>]}
-
-// -----
-
-!in_tensor_t = tensor<8x64xf32>
-!out_tensor_t = tensor<8xf32>
-
-func.func @reduce_eltwise(%arg : !in_tensor_t) -> (!out_tensor_t) {
-  %cst = arith.constant -0.000000e+00 : f32
-
-  %0 = tensor.empty() : !out_tensor_t
-  // expected-remark @below {{fill}}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : !out_tensor_t) -> !out_tensor_t
-  // expected-remark @below {{reduction}}
-  %5 = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
-                     affine_map<(d0, d1) -> (d0)>],
-    iterator_types = ["parallel", "reduction"]}
-    ins(%arg : !in_tensor_t) outs(%1 : !out_tensor_t) {
-      ^bb0(%arg3: f32, %arg4: f32):
-        %4 = arith.addf %arg3, %arg4 : f32
-        linalg.yield %4 : f32
-      } -> !out_tensor_t
-
-  %6 = tensor.empty() : !out_tensor_t
-  // expected-remark @below {{trailing}}
-  %7 = linalg.generic {
-    indexing_maps = [affine_map<(d0) -> (d0)>,
-                     affine_map<(d0) -> (d0)>],
-    iterator_types = ["parallel"]}
-    ins(%5 : !out_tensor_t) outs(%6 : !out_tensor_t) {
-    ^bb0(%arg3: f32, %arg4: f32):
-      %4 = math.sqrt %arg3 : f32
-      linalg.yield %4 : f32
-    } -> !out_tensor_t
-  return %7 : !out_tensor_t
-}
-
-
-// CHECK-LABEL: @reduce_eltwise
-// CHECK: scf.forall {{.*}} {
-// CHECK:   scf.forall {{.*}} {
-// CHECK:     linalg.fill
-// CHECK:     linalg.generic {{.*}} iterator_types = ["reduction"]
-// CHECK:   } {mapping = [#gpu.thread<z>, #gpu.thread<y>]}
-// CHECK:   scf.forall {{.*}} {
-// CHECK:     linalg.fill
-// CHECK:     linalg.generic {{.*}} iterator_types = ["reduction"]
-// CHECK:       arith.addf
-// CHECK:     linalg.generic {{.*}} iterator_types = []
-// CHECK:       math.sqrt
-// CHECK:   } {mapping = [#gpu.thread<z>]}
-
-// -----
-
-!in_tensor_t = tensor<8x64xf32>
-!out_tensor_t = tensor<8xf32>
-
-func.func @eltwise_reduce_eltwise(%arg : !in_tensor_t) -> (!out_tensor_t) {
-  %cst = arith.constant -0.000000e+00 : f32
-
-  %0 = tensor.empty() : !out_tensor_t
-  // expected-remark @below {{fill}}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : !out_tensor_t) ->  !out_tensor_t
-  %2 = tensor.empty() : !in_tensor_t
-  // expected-remark @below {{leading}}
-  %3 = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
-                     affine_map<(d0, d1) -> (d0, d1)>],
-    iterator_types = ["parallel", "parallel"]}
-    ins(%arg : !in_tensor_t) outs(%2 : !in_tensor_t) {
-    ^bb0(%arg3: f32, %arg4: f32):
-      %4 = arith.addf %arg3, %arg3 : f32
-      %5 = arith.addf %4, %4 : f32
-      linalg.yield %5 : f32
-    } -> !in_tensor_t
-
-  // expected-remark @below {{reduction}}
-  %6 = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
-                     affine_map<(d0, d1) -> (d0)>],
-    iterator_types = ["parallel", "reduction"]}
-    ins(%3 : !in_tensor_t) outs(%1 : !out_tensor_t) {
-      ^bb0(%arg3: f32, %arg4: f32):
-        %4 = arith.addf %arg3, %arg4 : f32
-        linalg.yield %4 : f32
-      } -> !out_tensor_t
-
-  %7 = tensor.empty() : !out_tensor_t
-  // expected-remark @below {{trailing}}
-  %8 = linalg.generic {
-    indexing_maps = [affine_map<(d0) -> (d0)>,
-                     affine_map<(d0) -> (d0)>],
-    iterator_types = ["parallel"]}
-    ins(%6 : !out_tensor_t) outs(%7 : !out_tensor_t) {
-    ^bb0(%arg3: f32, %arg4: f32):
-      %4 = math.sqrt %arg3 : f32
-      linalg.yield %4 : f32
-    } -> !out_tensor_t
-
-
-  return %8 : !out_tensor_t
-}
-
-// CHECK-LABEL: @eltwise_reduce_eltwise
-// CHECK: scf.forall {{.*}} {
-// CHECK:   scf.forall {{.*}} {
-// CHECK:     linalg.generic {{.*}} iterator_types = ["parallel"]
-// CHECK:     linalg.fill
-// CHECK:     linalg.generic {{.*}} iterator_types = ["reduction"]
-// CHECK:   } {mapping = [#gpu.thread<z>, #gpu.thread<y>]}
-// CHECK:   scf.forall {{.*}} {
-// CHECK:     linalg.fill
-// CHECK:     linalg.generic {{.*}} iterator_types = ["reduction"]
-// CHECK:       arith.addf
-// CHECK:     linalg.generic {{.*}} iterator_types = []
-// CHECK:       math.sqrt
-// CHECK:   } {mapping = [#gpu.thread<z>]}
-
-// -----
-
-!in_tensor_t = tensor<8x64xf32>
-!out_tensor_t = tensor<8xf32>
-
-func.func @eltwise_reduce_eltwise_swapped(%arg : !in_tensor_t) -> (!out_tensor_t) {
-  %cst = arith.constant -0.000000e+00 : f32
-
-  %2 = tensor.empty() : !in_tensor_t
-  // expected-remark @below {{leading}}
-  %3 = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
-                     affine_map<(d0, d1) -> (d0, d1)>],
-    iterator_types = ["parallel", "parallel"]}
-    ins(%arg : !in_tensor_t) outs(%2 : !in_tensor_t) {
-    ^bb0(%arg3: f32, %arg4: f32):
-      %4 = arith.addf %arg3, %arg3 : f32
-      %5 = arith.addf %4, %4 : f32
-      linalg.yield %5 : f32
-    } -> !in_tensor_t
-
-  %0 = tensor.empty() : !out_tensor_t
-  // expected-remark @below {{fill}}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : !out_tensor_t) ->  !out_tensor_t
-  // expected-remark @below {{reduction}}
-  %6 = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
-                     affine_map<(d0, d1) -> (d0)>],
-    iterator_types = ["parallel", "reduction"]}
-    ins(%3 : !in_tensor_t) outs(%1 : !out_tensor_t) {
-      ^bb0(%arg3: f32, %arg4: f32):
-        %4 = arith.addf %arg3, %arg4 : f32
-        linalg.yield %4 : f32
-      } -> !out_tensor_t
-
-  %7 = tensor.empty() : !out_tensor_t
-  // expected-remark @below {{trailing}}
-  %8 = linalg.generic {
-    indexing_maps = [affine_map<(d0) -> (d0)>,
-                     affine_map<(d0) -> (d0)>],
-    iterator_types = ["parallel"]}
-    ins(%6 : !out_tensor_t) outs(%7 : !out_tensor_t) {
-    ^bb0(%arg3: f32, %arg4: f32):
-      %4 = math.sqrt %arg3 : f32
-      linalg.yield %4 : f32
-    } -> !out_tensor_t
-
-
-  return %8 : !out_tensor_t
-}
-
-// CHECK-LABEL: @eltwise_reduce_eltwise_swapped
-// CHECK: scf.forall {{.*}} {
-// CHECK:   scf.forall {{.*}} {
-// CHECK:     linalg.generic {{.*}} iterator_types = ["parallel"]
-// CHECK:     linalg.fill
-// CHECK:     linalg.generic {{.*}} iterator_types = ["reduction"]
-// CHECK:   } {mapping = [#gpu.thread<z>, #gpu.thread<y>]}
-// CHECK:   scf.forall {{.*}} {
-// CHECK:     linalg.fill
-// CHECK:     linalg.generic {{.*}} iterator_types = ["reduction"]
-// CHECK:       arith.addf
-// CHECK:     linalg.generic {{.*}} iterator_types = []
-// CHECK:       math.sqrt
-// CHECK:   } {mapping = [#gpu.thread<z>]}
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/reductions_codegen_spec.mlir b/compiler/src/iree/compiler/Codegen/Common/test/reductions_codegen_spec.mlir
deleted file mode 100644
index 7b6f45b..0000000
--- a/compiler/src/iree/compiler/Codegen/Common/test/reductions_codegen_spec.mlir
+++ /dev/null
@@ -1,77 +0,0 @@
-// RUN: iree-opt %s
-
-module attributes { transform.with_named_sequence } {
-  transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) {
-    transform.iree.register_match_callbacks
-
-    %maybe_leading, %original_fill, %reduction, %maybe_trailing_0 =
-      transform.iree.match_callback failures(propagate) "reduction"(%root)
-      : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
-
-    %_, %more_parallel_fill, %parallel_reduction, %combiner_op =
-      transform.structured.split_reduction %reduction { split_factor = 2, insert_split_dimension = 1 }
-      : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
-
-    // Step 1. Map to a single block by tiling with size 1 and fusing.
-    %fusion_root_1, %fusion_group_1 = transform.iree.take_first %maybe_trailing_0, %combiner_op
-      : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
-    %outer_tiled, %grid_loop = transform.structured.tile_using_forall %fusion_root_1 tile_sizes [1]
-      ( mapping = [#gpu.block<x>] )
-      : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
-
-    %func = transform.structured.match ops{["func.func"]} in %root : (!transform.any_op) -> !transform.any_op
-    transform.apply_patterns to %func {
-      transform.apply_patterns.iree.bubble_expand
-    } : !transform.any_op
-
-    // Excessively eager canonicalization results in `fill`s being "fused" due to
-    // swapping with `extract_slice`, which confuses the fusion operation below.
-    // Wrap fusion into a non-canonicalized sequence.
-    %fused_2, %parallel_reduction_2, %more_parallel_fill_2, %original_fill_2, %maybe_leading_2 =
-      transform.sequence %root : !transform.any_op -> !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op
-      failures(propagate) {
-    ^bb1(%arg1: !transform.any_op):
-      %fused_22, %new_containing_1 = transform.structured.fuse_into_containing_op %fusion_group_1 into %grid_loop : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
-      %parallel_reduction_22, %new_containing_2 = transform.structured.fuse_into_containing_op %parallel_reduction into %grid_loop : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
-      %more_parallel_fill_22, %new_containing_3 = transform.structured.fuse_into_containing_op %more_parallel_fill into %grid_loop : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
-      %original_fill_22, %new_containing_4 = transform.structured.fuse_into_containing_op %original_fill into %grid_loop : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
-      %maybe_leading_22, %new_containing_5 = transform.structured.fuse_into_containing_op %maybe_leading into %grid_loop : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
-
-      transform.yield %fused_22, %parallel_reduction_22, %more_parallel_fill_22, %original_fill_22, %maybe_leading_22
-        : !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op
-    }
-
-    // Step 2. Map reduction to thread X and parallel dimension to other threads.
-    // ===========================================================================
-    %fusion_group_22_full = transform.merge_handles %fused_2, %original_fill_2
-      : !transform.any_op
-    %_1, %block_loop_22 =
-      transform.structured.tile_using_forall %outer_tiled
-      tile_sizes [1] ( mapping = [#gpu.thread<z>] )
-      : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
-    transform.structured.fuse_into_containing_op %fusion_group_22_full into %block_loop_22 : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
-
-
-    %fusion_group_21 = transform.merge_handles %maybe_leading_2, %more_parallel_fill_2
-      : !transform.any_op
-    %_0, %block_loop_21 =
-      transform.structured.tile_using_forall %parallel_reduction_2
-      tile_sizes [1, 1] ( mapping = [#gpu.thread<z>, #gpu.thread<y>] )
-      : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
-    transform.structured.fuse_into_containing_op %fusion_group_21 into %block_loop_21 : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
-
-    // Step 3. Rank-reduce.
-    // ===========================================================================
-    transform.apply_patterns to %func {
-      transform.apply_patterns.iree.fold_reshape_into_tensor_hal_interface
-      transform.apply_patterns.linalg.fold_unit_extent_dims_via_slices
-      transform.apply_patterns.vector.cast_away_vector_leading_one_dim
-    } : !transform.any_op
-
-    // We don't perform any following transformation (vectorization, bufferizaton,
-    // mapping) because this schedule is applied to Linalg-only code without the
-    // surrounding context and because it would make it difficult to detect, e.g.,
-    // lack of fusion.
-    transform.yield
-  } // @__transform_main
-} // module
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/reductions_match_spec.mlir b/compiler/src/iree/compiler/Codegen/Common/test/reductions_match_spec.mlir
deleted file mode 100644
index 7f19631..0000000
--- a/compiler/src/iree/compiler/Codegen/Common/test/reductions_match_spec.mlir
+++ /dev/null
@@ -1,17 +0,0 @@
-// RUN: iree-opt %s
-
-module attributes { transform.with_named_sequence } {
-  transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) {
-    transform.iree.register_match_callbacks
-
-    %leading, %fill, %reduction, %trailing =
-      transform.iree.match_callback failures(propagate) "reduction"(%root)
-      : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
-
-    transform.iree.emit_remark "leading" at %leading : !transform.any_op
-    transform.iree.emit_remark "fill" at %fill : !transform.any_op
-    transform.iree.emit_remark "reduction" at %reduction : !transform.any_op
-    transform.iree.emit_remark "trailing" at %trailing : !transform.any_op
-    transform.yield
-  } // @__transform_main
-} // module
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/repeated_matcher_use.mlir b/compiler/src/iree/compiler/Codegen/Common/test/repeated_matcher_use.mlir
deleted file mode 100644
index 33c3cfb..0000000
--- a/compiler/src/iree/compiler/Codegen/Common/test/repeated_matcher_use.mlir
+++ /dev/null
@@ -1,242 +0,0 @@
-// RUN: iree-opt %s \
-// RUN: --iree-transform-dialect-interpreter \
-// RUN: --split-input-file --verify-diagnostics
-
-module attributes { transform.with_named_sequence } {
-  transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) {
-    transform.iree.register_match_callbacks
-
-    %first, %second =
-      transform.iree.match_callback failures(propagate) "_test_repeated_matcher_use_callback"(%root)
-      : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
-
-    transform.iree.emit_remark "first" at %first : !transform.any_op
-    transform.iree.emit_remark "second" at %second : !transform.any_op
-    transform.yield
-  } // @__transform_main
-} // module
-
-module {
-  func.func private @f1(f32) -> f32
-  func.func private @f2(f32, f32) -> f32
-
-  func.func @foo() -> tensor<10xf32> {
-    %dummy1 = tensor.empty() : tensor<10xf32>
-    %dummy2 = tensor.empty() : tensor<10xf32>
-    %dummy3 = tensor.empty() : tensor<10xf32>
-    %c0 = arith.constant 0.0 : f32
-    %operand = linalg.fill ins(%c0 : f32) outs(%dummy1 : tensor<10xf32>) -> tensor<10xf32>
-
-    // expected-remark @below {{first}}
-    %first = linalg.generic {
-      indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
-      iterator_types = ["parallel"]
-    } ins(%operand : tensor<10xf32>)
-      outs(%dummy2 : tensor<10xf32>) {
-    ^bb0(%arg0: f32, %arg1: f32):
-      %0 = func.call @f1(%arg0) : (f32) -> f32
-      linalg.yield %0 : f32
-    } -> tensor<10xf32>
-
-    // expected-remark @below {{second}}
-    %second = linalg.generic {
-      indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
-      iterator_types = ["parallel"]
-    } ins(%operand, %first : tensor<10xf32>, tensor<10xf32>)
-      outs(%dummy3 : tensor<10xf32>) {
-    ^bb0(%arg0: f32, %arg1: f32, %arg2: f32):
-      %0 = func.call @f2(%arg0, %arg1) : (f32, f32) -> f32
-      linalg.yield %0 : f32
-    } -> tensor<10xf32>
-    return %second : tensor<10xf32>
-  }
-}
-
-// -----
-
-module attributes { transform.with_named_sequence } {
-  transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) {
-    transform.iree.register_match_callbacks
-
-    // expected-error @+2 {{failed to match}}
-    %first, %second =
-      transform.iree.match_callback failures(propagate) "_test_repeated_matcher_use_callback"(%root)
-      : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
-
-    transform.iree.emit_remark "first" at %first : !transform.any_op
-    transform.iree.emit_remark "second" at %second : !transform.any_op
-    transform.yield
-  } // @__transform_main
-} // module
-
-module {
-  func.func private @f1(f32) -> f32
-  func.func private @f2(f32, f32) -> f32
-
-  func.func @foo() -> tensor<10xf32> {
-    %dummy1 = tensor.empty() : tensor<10xf32>
-    %dummy2 = tensor.empty() : tensor<10xf32>
-    %dummy3 = tensor.empty() : tensor<10xf32>
-    %dummy5 = tensor.empty() : tensor<10xf32>
-    %c0 = arith.constant 0.0 : f32
-    %c5 = arith.constant 5.0 : f32
-    %operand5 = linalg.fill ins(%c5 : f32) outs(%dummy5 : tensor<10xf32>) -> tensor<10xf32>
-    %operand = linalg.fill ins(%c0 : f32) outs(%dummy1 : tensor<10xf32>) -> tensor<10xf32>
-
-    %first = linalg.generic {
-      indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
-      iterator_types = ["parallel"]
-    } ins(%operand : tensor<10xf32>)
-      outs(%dummy2 : tensor<10xf32>) {
-    ^bb0(%arg0: f32, %arg1: f32):
-      %0 = func.call @f1(%arg0) : (f32) -> f32
-      linalg.yield %0 : f32
-    } -> tensor<10xf32>
-
-    %second = linalg.generic {
-      indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
-      iterator_types = ["parallel"]
-    } ins(%operand5, %first : tensor<10xf32>, tensor<10xf32>)
-      outs(%dummy3 : tensor<10xf32>) {
-    ^bb0(%arg0: f32, %arg1: f32, %arg2: f32):
-      %0 = func.call @f2(%arg0, %arg1) : (f32, f32) -> f32
-      linalg.yield %0 : f32
-    } -> tensor<10xf32>
-    return %second : tensor<10xf32>
-  }
-}
-
-// -----
-
-module attributes { transform.with_named_sequence } {
-  transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) {
-    transform.iree.register_match_callbacks
-
-    // expected-error @+2 {{failed to match}}
-    %first, %second =
-      transform.iree.match_callback failures(propagate) "_test_repeated_matcher_use_callback"(%root)
-      : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
-
-    transform.iree.emit_remark "first" at %first : !transform.any_op
-    transform.iree.emit_remark "second" at %second : !transform.any_op
-    transform.yield
-  } // @__transform_main
-} // module
-
-module {
-  func.func private @f1(f32) -> f32
-  func.func private @f2(f32, f32) -> f32
-
-  func.func @foo() -> tensor<10xf32> {
-    %dummy1 = tensor.empty() : tensor<10xf32>
-    %dummy2 = tensor.empty() : tensor<10xf32>
-    %dummy3 = tensor.empty() : tensor<10xf32>
-    %dummy5 = tensor.empty() : tensor<10xf32>
-    %c0 = arith.constant 0.0 : f32
-    %operand = linalg.fill ins(%c0 : f32) outs(%dummy1 : tensor<10xf32>) -> tensor<10xf32>
-
-    %first = linalg.generic {
-      indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
-      iterator_types = ["parallel"]
-    } ins(%operand : tensor<10xf32>)
-      outs(%dummy2 : tensor<10xf32>) {
-    ^bb0(%arg0: f32, %arg1: f32):
-      %0 = func.call @f1(%arg0) : (f32) -> f32
-      linalg.yield %0 : f32
-    } -> tensor<10xf32>
-
-    %second = linalg.generic {
-      indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
-      iterator_types = ["parallel"]
-    } ins(%first, %first : tensor<10xf32>, tensor<10xf32>)
-      outs(%dummy3 : tensor<10xf32>) {
-    ^bb0(%arg0: f32, %arg1: f32, %arg2: f32):
-      %0 = func.call @f2(%arg0, %arg1) : (f32, f32) -> f32
-      linalg.yield %0 : f32
-    } -> tensor<10xf32>
-    return %second : tensor<10xf32>
-  }
-}
-
-// -----
-
-module attributes { transform.with_named_sequence } {
-  transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) {
-    transform.iree.register_match_callbacks
-
-    %first, %second =
-      transform.iree.match_callback failures(propagate) "_test_value_matcher_callback"(%root)
-      : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
-
-    transform.iree.emit_remark "first" at %first : !transform.any_op
-    transform.iree.emit_remark "second" at %second : !transform.any_op
-    transform.yield
-  } // @__transform_main
-} // module
-
-module {
-  func.func private @f1(f32) -> f32
-  func.func private @f2(f32, f32) -> f32
-
-  func.func @foo() -> tensor<10xf32> {
-    %dummy1 = tensor.empty() : tensor<10xf32>
-    %dummy2 = tensor.empty() : tensor<10xf32>
-    %dummy3 = tensor.empty() : tensor<10xf32>
-    %c0 = arith.constant 0.0 : f32
-    %operand = linalg.fill ins(%c0 : f32) outs(%dummy1 : tensor<10xf32>) -> tensor<10xf32>
-
-    // expected-remark @below {{first}}
-    %first = linalg.generic {
-      indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
-      iterator_types = ["parallel"]
-    } ins(%operand : tensor<10xf32>)
-      outs(%dummy2 : tensor<10xf32>) {
-    ^bb0(%arg0: f32, %arg1: f32):
-      %0 = func.call @f1(%arg0) : (f32) -> f32
-      linalg.yield %0 : f32
-    } -> tensor<10xf32>
-
-    // expected-remark @below {{second}}
-    %second = linalg.generic {
-      indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
-      iterator_types = ["parallel"]
-    } ins(%operand, %first : tensor<10xf32>, tensor<10xf32>)
-      outs(%dummy3 : tensor<10xf32>) {
-    ^bb0(%arg0: f32, %arg1: f32, %arg2: f32):
-      %0 = func.call @f2(%arg0, %arg1) : (f32, f32) -> f32
-      linalg.yield %0 : f32
-    } -> tensor<10xf32>
-    return %second : tensor<10xf32>
-  }
-}
-
-// -----
-
-module attributes { transform.with_named_sequence } {
-  transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) {
-    transform.iree.register_match_callbacks
-
-    %0 = transform.iree.match_callback failures(propagate) "_test_shaped_value_matcher_callback"(%root)
-      : (!transform.any_op) -> !transform.any_op
-    transform.iree.emit_remark "matched" at %0 : !transform.any_op
-    transform.yield
-  } // @__transform_main
-} // module
-
-module {
-  func.func @foo(%arg0: tensor<42x10xf32>) -> tensor<10x42xf32> {
-    %init = tensor.empty() : tensor<10x42xf32>
-    // expected-remark @below {{rank: 2}}
-    // expected-remark @below {{dimensions: 10, 42}}
-    // expected-remark @below {{matched}}
-    %0 = linalg.generic {
-      indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>],
-      iterator_types = ["parallel", "parallel"]
-    } ins(%arg0: tensor<42x10xf32>)
-      outs(%init: tensor<10x42xf32>) {
-    ^bb0(%arg1: f32, %arg2: f32):
-      linalg.yield %arg1 : f32
-    } -> tensor<10x42xf32>
-    return %0 : tensor<10x42xf32>
-  }
-}
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/transform_match_partial_reduction.mlir b/compiler/src/iree/compiler/Codegen/Common/test/transform_match_partial_reduction.mlir
deleted file mode 100644
index c05cd67..0000000
--- a/compiler/src/iree/compiler/Codegen/Common/test/transform_match_partial_reduction.mlir
+++ /dev/null
@@ -1,45 +0,0 @@
-// RUN: iree-opt %s --iree-transform-dialect-interpreter --verify-diagnostics --split-input-file
-
-// This can be matched by "reduction_partial" but not by "reduction".
-
-func.func @reduction_with_extra_op_in_func(%arg0: tensor<8x479xf32>) -> (tensor<8xf32>, tensor<32xf32>) {
-  %cst = arith.constant 0.0 : f32
-  %empty = tensor.empty() : tensor<8xf32>
-  // expected-remark @below {{fill}}
-  %fill = linalg.fill ins(%cst : f32) outs(%empty : tensor<8xf32>) -> tensor<8xf32>
-  // expected-remark @below {{reduction}}
-  %result = linalg.generic {
-    indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
-                     affine_map<(d0, d1) -> (d0)>],
-    iterator_types = ["parallel", "reduction"]}
-    ins(%arg0 : tensor<8x479xf32>)
-    outs(%fill : tensor<8xf32>) {
-  ^bb0(%in: f32, %out: f32):
-    %6 = arith.addf %in, %out : f32
-    linalg.yield %6 : f32
-  } -> tensor<8xf32>
-
-  %empty2 = tensor.empty() : tensor<32xf32>
-  %fill2 = linalg.fill ins(%cst : f32) outs(%empty2 : tensor<32xf32>) -> tensor<32xf32>
-  return %result, %fill2 : tensor<8xf32>, tensor<32xf32>
-}
-
-module attributes { transform.with_named_sequence } {
-  transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) {
-    transform.iree.register_match_callbacks
-
-    %leading, %fill, %reduction, %trailing =
-      transform.iree.match_callback failures(propagate) "reduction_partial"(%root)
-      : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
-
-    transform.iree.emit_remark "leading" at %leading : !transform.any_op
-    transform.iree.emit_remark "fill" at %fill : !transform.any_op
-    transform.iree.emit_remark "reduction" at %reduction : !transform.any_op
-    transform.iree.emit_remark "trailing" at %trailing : !transform.any_op
-
-    // expected-error @below {{failed to match}}
-    transform.iree.match_callback failures(propagate) "reduction"(%root)
-      : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
-    transform.yield
-  } // @__transform_main
-} // module
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/transform_ops_invalid.mlir b/compiler/src/iree/compiler/Codegen/Common/test/transform_ops_invalid.mlir
deleted file mode 100644
index a955d84..0000000
--- a/compiler/src/iree/compiler/Codegen/Common/test/transform_ops_invalid.mlir
+++ /dev/null
@@ -1,69 +0,0 @@
-// RUN: iree-opt %s --split-input-file --iree-transform-dialect-interpreter --verify-diagnostics
-
-module attributes { transform.with_named_sequence } {
-  transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) {
-    // expected-error @below {{match registry not available}}
-    transform.iree.match_callback failures(suppress) "_test_match_callback"() : () -> ()
-    transform.yield
-  } // @__transform_main
-} // module
-
-
-// -----
-
-module attributes { transform.with_named_sequence } {
-  transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) {
-    transform.iree.register_match_callbacks
-    // expected-error @below {{callback '_non_existing_name_' not found in the registry}}
-    transform.iree.match_callback failures(suppress) "_non_existing_name_"() : () -> ()
-    transform.yield
-  } // @__transform_main
-} // module
-
-
-// -----
-
-module attributes { transform.with_named_sequence } {
-  transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) {
-    transform.iree.register_match_callbacks
-    // expected-error @below {{callback produced a different number of handles than expected}}
-    transform.iree.match_callback failures(suppress) "_test_match_callback"(%root) : (!transform.any_op) -> ()
-    transform.yield
-  } // @__transform_main
-} // module
-
-
-// -----
-
-// Successful match.
-module attributes { transform.with_named_sequence } {
-  transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) {
-    transform.iree.register_match_callbacks
-    transform.iree.match_callback failures(propagate) "_test_match_callback"(%root) : (!transform.any_op) -> (!transform.any_op)
-    transform.yield
-  } // @__transform_main
-} // module
-
-
-// -----
-
-module attributes { transform.with_named_sequence , test.iree_transform_do_not_match } {
-  transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) {
-    transform.iree.register_match_callbacks
-    // expected-error @below {{failed to match}}
-    transform.iree.match_callback failures(propagate) "_test_match_callback"(%root) : (!transform.any_op) -> (!transform.any_op)
-    transform.yield
-  } // @__transform_main
-} // module
-
-
-// -----
-
-// Failed to match, but the op silences such errors.
-module attributes { transform.with_named_sequence, test.iree_transform_do_not_match } {
-  transform.named_sequence @__transform_main(%root: !transform.any_op {transform.readonly}) {
-    transform.iree.register_match_callbacks
-    transform.iree.match_callback failures(suppress) "_test_match_callback"(%root) : (!transform.any_op) -> (!transform.any_op)
-    transform.yield
-  } // @__transform_main
-} // module