Remove attention transform dialect e2e tests (#17682)

With https://github.com/iree-org/iree/pull/17681 , the state of
attention codegen for llvm-cpu is at a good enough place where we don't
need to rely on a transform dialect spec (at least for llvm-cpu)
anymore.

This patch removes e2e tests for the attention transform dialect spec as
it adds more burden on a path that will probably not be maintained in
the future. There are some e2e correctness tests in e2e/linalg_ext/ that
check correctness for small test cases and pkgci running sdxl +
attention tests.
diff --git a/tests/transform_dialect/cpu/BUILD.bazel b/tests/transform_dialect/cpu/BUILD.bazel
index aabea9d..669933c 100644
--- a/tests/transform_dialect/cpu/BUILD.bazel
+++ b/tests/transform_dialect/cpu/BUILD.bazel
@@ -14,7 +14,6 @@
 iree_lit_test_suite(
     name = "lit",
     srcs = [
-        "attention.mlir",
         "contraction-packing.mlir",
         "contraction-packing-and-dispatch.mlir",
         # DISABLED: incorrectly assuming default flag values.
@@ -28,7 +27,6 @@
     # transform dialect spec files are MLIR files that specify a transformation,
     # they need to be included as data.
     data = [
-        "attention_codegen_spec.mlir",
         "matmul_codegen_default_spec.mlir",
         "transform_library.mlir",
     ],
diff --git a/tests/transform_dialect/cpu/CMakeLists.txt b/tests/transform_dialect/cpu/CMakeLists.txt
index 4328bf4..9f24b45 100644
--- a/tests/transform_dialect/cpu/CMakeLists.txt
+++ b/tests/transform_dialect/cpu/CMakeLists.txt
@@ -14,7 +14,6 @@
   NAME
     lit
   SRCS
-    "attention.mlir"
     "contraction-packing-and-dispatch.mlir"
     "contraction-packing.mlir"
     "fold_tensor_slice_into_transfer.mlir"
@@ -27,7 +26,6 @@
     iree-opt
     iree-run-module
   DATA
-    attention_codegen_spec.mlir
     matmul_codegen_default_spec.mlir
     transform_library.mlir
   LABELS
diff --git a/tests/transform_dialect/cpu/attention.mlir b/tests/transform_dialect/cpu/attention.mlir
deleted file mode 100644
index 51cbd8e..0000000
--- a/tests/transform_dialect/cpu/attention.mlir
+++ /dev/null
@@ -1,17 +0,0 @@
-func.func @attention() -> tensor<1x4x4xf32> {
-  %init = tensor.empty() : tensor<1x4x4xf32>
-  %query = util.unfoldable_constant dense<1.0> : tensor<1x4x4xf32>
-  %key = util.unfoldable_constant dense<0.5> : tensor<1x4x4xf32>
-  %value = util.unfoldable_constant dense<2.0> : tensor<1x4x4xf32>
-  %scale = arith.constant 1.0 : f32
-  %1 = iree_linalg_ext.attention ins(%query, %key, %value, %scale : tensor<1x4x4xf32>,
-        tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32) outs(%init : tensor<1x4x4xf32>) -> tensor<1x4x4xf32>
-  return %1 : tensor<1x4x4xf32>
-}
-
-// RUN: iree-compile %s --iree-hal-target-backends=llvm-cpu \
-// RUN: --iree-codegen-transform-dialect-library=%p/attention_codegen_spec.mlir@codegen | \
-// RUN: iree-run-module --module=- --function=attention | \
-// RUN: FileCheck %s --check-prefixes=EXEC
-
-// EXEC: 1x4x4xf32={{\[}}[2 2 2 2][2 2 2 2][2 2 2 2][2 2 2 2]{{]}}
diff --git a/tests/transform_dialect/cpu/attention_codegen_spec.mlir b/tests/transform_dialect/cpu/attention_codegen_spec.mlir
deleted file mode 100644
index 3683c49..0000000
--- a/tests/transform_dialect/cpu/attention_codegen_spec.mlir
+++ /dev/null
@@ -1,87 +0,0 @@
-module attributes { transform.with_named_sequence } {
-
-  // Codegen.
-  transform.named_sequence @codegen(
-      %variant_op: !transform.any_op {transform.consumed}) {
-
-    // Get attention op
-    // ==========================================
-    %attention = transform.structured.match ops{["iree_linalg_ext.attention"]} in %variant_op : (!transform.any_op) -> !transform.any_op
-
-    // Tile and distribute to workgroups
-    // ==========================================
-    %tiled_attention, %forall_grid =
-    transform.structured.tile_using_forall %attention num_threads [1]
-      ( mapping = [#gpu.block<x>] )
-      : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
-    transform.iree.populate_workgroup_count_region_using_num_threads_slice %forall_grid
-    : (!transform.any_op) -> ()
-
-    // Tile and decompose attention
-    // ==========================================
-    %attention4 = transform.structured.match ops{["iree_linalg_ext.attention"]} in %variant_op : (!transform.any_op) -> !transform.any_op
-    %acc_fill, %max_fill, %sum_fill, %inner_loop, %final_scaling, %blocked_attention = transform.iree.tile_attention %attention4 :
-      (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
-    %scale_q, %fill_op, %first_matmul, %reduce_max, %partial_softmax, %scale_factor, %update, %reduce_sum, %scale_acc, %second_matmul
-        = transform.iree.decompose_tiled_attention %blocked_attention :
-      (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op,
-                              !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
-
-    // Vectorize function
-    // ==========================================
-    %func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
-    transform.apply_patterns to %func {
-      transform.apply_patterns.iree.fold_reshape_into_tensor_hal_interface
-      transform.apply_patterns.linalg.fold_unit_extent_dims_via_slices
-      transform.apply_patterns.vector.cast_away_vector_leading_one_dim
-    } : !transform.any_op
-    %func_3 = transform.structured.vectorize_children_and_apply_patterns %func : (!transform.any_op) -> !transform.any_op
-    transform.apply_patterns to %func_3 {
-      transform.apply_patterns.iree.fold_fill_into_pad
-      transform.apply_patterns.linalg.tiling_canonicalization
-      transform.apply_patterns.scf.for_loop_canonicalization
-      transform.apply_patterns.canonicalization
-    } : !transform.any_op
-    transform.iree.apply_licm %func_3 : !transform.any_op
-    transform.apply_cse to %func_3 : !transform.any_op
-
-    // Bufferization
-    // ==========================================
-    transform.iree.eliminate_empty_tensors %func_3 : (!transform.any_op) -> ()
-    transform.apply_patterns to %func_3 {
-      transform.apply_patterns.linalg.erase_unnecessary_inputs
-    } : !transform.any_op
-    %func_4 = transform.iree.bufferize %func_3 : (!transform.any_op) -> (!transform.any_op)
-
-    // Step 6. Post-bufferization vector distribution
-    // ===========================================================================
-    transform.iree.forall_to_workgroup %func_4 : (!transform.any_op) -> ()
-    %func_8 = transform.structured.hoist_redundant_vector_transfers %func_4
-    : (!transform.any_op) -> !transform.any_op
-    transform.apply_patterns to %func_8 {
-      transform.apply_patterns.canonicalization
-    } : !transform.any_op
-    transform.apply_cse to %func_8 : !transform.any_op
-    transform.memref.erase_dead_alloc_and_stores %func_8 : (!transform.any_op) -> ()
-
-    // Annotate the exported function as already translated.
-    %none = transform.param.constant #iree_codegen.translation_info<None> -> !transform.any_param
-    transform.annotate %func_8 "translation_info" = %none : !transform.any_op, !transform.any_param
-    transform.yield
-  } // codegen
-
-  // Find `hal.executable.variant`.
-  transform.named_sequence @match_variant_for_codegen(%root: !transform.any_op {transform.readonly})
-    -> !transform.any_op {
-    transform.match.operation_name %root ["hal.executable.variant"] : !transform.any_op
-    transform.yield %root : !transform.any_op
-  }
-
-  // Transform entry-point
-  transform.named_sequence @__transform_main(%root: !transform.any_op {transform.consumed}) {
-    transform.foreach_match in %root
-        @match_variant_for_codegen -> @codegen
-      : (!transform.any_op) -> (!transform.any_op)
-    transform.yield
-  }
-} // module