Remove attention transform dialect e2e tests (#17682)
With https://github.com/iree-org/iree/pull/17681 , the state of
attention codegen for llvm-cpu is at a good enough place where we don't
need to rely on a transform dialect spec (at least for llvm-cpu)
anymore.
This patch removes e2e tests for the attention transform dialect spec as
it adds more burden on a path that will probably not be maintained in
the future. There are some e2e correctness tests in e2e/linalg_ext/ that
check correctness for small test cases and pkgci running sdxl +
attention tests.
diff --git a/tests/transform_dialect/cpu/BUILD.bazel b/tests/transform_dialect/cpu/BUILD.bazel
index aabea9d..669933c 100644
--- a/tests/transform_dialect/cpu/BUILD.bazel
+++ b/tests/transform_dialect/cpu/BUILD.bazel
@@ -14,7 +14,6 @@
iree_lit_test_suite(
name = "lit",
srcs = [
- "attention.mlir",
"contraction-packing.mlir",
"contraction-packing-and-dispatch.mlir",
# DISABLED: incorrectly assuming default flag values.
@@ -28,7 +27,6 @@
# transform dialect spec files are MLIR files that specify a transformation,
# they need to be included as data.
data = [
- "attention_codegen_spec.mlir",
"matmul_codegen_default_spec.mlir",
"transform_library.mlir",
],
diff --git a/tests/transform_dialect/cpu/CMakeLists.txt b/tests/transform_dialect/cpu/CMakeLists.txt
index 4328bf4..9f24b45 100644
--- a/tests/transform_dialect/cpu/CMakeLists.txt
+++ b/tests/transform_dialect/cpu/CMakeLists.txt
@@ -14,7 +14,6 @@
NAME
lit
SRCS
- "attention.mlir"
"contraction-packing-and-dispatch.mlir"
"contraction-packing.mlir"
"fold_tensor_slice_into_transfer.mlir"
@@ -27,7 +26,6 @@
iree-opt
iree-run-module
DATA
- attention_codegen_spec.mlir
matmul_codegen_default_spec.mlir
transform_library.mlir
LABELS
diff --git a/tests/transform_dialect/cpu/attention.mlir b/tests/transform_dialect/cpu/attention.mlir
deleted file mode 100644
index 51cbd8e..0000000
--- a/tests/transform_dialect/cpu/attention.mlir
+++ /dev/null
@@ -1,17 +0,0 @@
-func.func @attention() -> tensor<1x4x4xf32> {
- %init = tensor.empty() : tensor<1x4x4xf32>
- %query = util.unfoldable_constant dense<1.0> : tensor<1x4x4xf32>
- %key = util.unfoldable_constant dense<0.5> : tensor<1x4x4xf32>
- %value = util.unfoldable_constant dense<2.0> : tensor<1x4x4xf32>
- %scale = arith.constant 1.0 : f32
- %1 = iree_linalg_ext.attention ins(%query, %key, %value, %scale : tensor<1x4x4xf32>,
- tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32) outs(%init : tensor<1x4x4xf32>) -> tensor<1x4x4xf32>
- return %1 : tensor<1x4x4xf32>
-}
-
-// RUN: iree-compile %s --iree-hal-target-backends=llvm-cpu \
-// RUN: --iree-codegen-transform-dialect-library=%p/attention_codegen_spec.mlir@codegen | \
-// RUN: iree-run-module --module=- --function=attention | \
-// RUN: FileCheck %s --check-prefixes=EXEC
-
-// EXEC: 1x4x4xf32={{\[}}[2 2 2 2][2 2 2 2][2 2 2 2][2 2 2 2]{{]}}
diff --git a/tests/transform_dialect/cpu/attention_codegen_spec.mlir b/tests/transform_dialect/cpu/attention_codegen_spec.mlir
deleted file mode 100644
index 3683c49..0000000
--- a/tests/transform_dialect/cpu/attention_codegen_spec.mlir
+++ /dev/null
@@ -1,87 +0,0 @@
-module attributes { transform.with_named_sequence } {
-
- // Codegen.
- transform.named_sequence @codegen(
- %variant_op: !transform.any_op {transform.consumed}) {
-
- // Get attention op
- // ==========================================
- %attention = transform.structured.match ops{["iree_linalg_ext.attention"]} in %variant_op : (!transform.any_op) -> !transform.any_op
-
- // Tile and distribute to workgroups
- // ==========================================
- %tiled_attention, %forall_grid =
- transform.structured.tile_using_forall %attention num_threads [1]
- ( mapping = [#gpu.block<x>] )
- : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
- transform.iree.populate_workgroup_count_region_using_num_threads_slice %forall_grid
- : (!transform.any_op) -> ()
-
- // Tile and decompose attention
- // ==========================================
- %attention4 = transform.structured.match ops{["iree_linalg_ext.attention"]} in %variant_op : (!transform.any_op) -> !transform.any_op
- %acc_fill, %max_fill, %sum_fill, %inner_loop, %final_scaling, %blocked_attention = transform.iree.tile_attention %attention4 :
- (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
- %scale_q, %fill_op, %first_matmul, %reduce_max, %partial_softmax, %scale_factor, %update, %reduce_sum, %scale_acc, %second_matmul
- = transform.iree.decompose_tiled_attention %blocked_attention :
- (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op,
- !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
-
- // Vectorize function
- // ==========================================
- %func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
- transform.apply_patterns to %func {
- transform.apply_patterns.iree.fold_reshape_into_tensor_hal_interface
- transform.apply_patterns.linalg.fold_unit_extent_dims_via_slices
- transform.apply_patterns.vector.cast_away_vector_leading_one_dim
- } : !transform.any_op
- %func_3 = transform.structured.vectorize_children_and_apply_patterns %func : (!transform.any_op) -> !transform.any_op
- transform.apply_patterns to %func_3 {
- transform.apply_patterns.iree.fold_fill_into_pad
- transform.apply_patterns.linalg.tiling_canonicalization
- transform.apply_patterns.scf.for_loop_canonicalization
- transform.apply_patterns.canonicalization
- } : !transform.any_op
- transform.iree.apply_licm %func_3 : !transform.any_op
- transform.apply_cse to %func_3 : !transform.any_op
-
- // Bufferization
- // ==========================================
- transform.iree.eliminate_empty_tensors %func_3 : (!transform.any_op) -> ()
- transform.apply_patterns to %func_3 {
- transform.apply_patterns.linalg.erase_unnecessary_inputs
- } : !transform.any_op
- %func_4 = transform.iree.bufferize %func_3 : (!transform.any_op) -> (!transform.any_op)
-
- // Step 6. Post-bufferization vector distribution
- // ===========================================================================
- transform.iree.forall_to_workgroup %func_4 : (!transform.any_op) -> ()
- %func_8 = transform.structured.hoist_redundant_vector_transfers %func_4
- : (!transform.any_op) -> !transform.any_op
- transform.apply_patterns to %func_8 {
- transform.apply_patterns.canonicalization
- } : !transform.any_op
- transform.apply_cse to %func_8 : !transform.any_op
- transform.memref.erase_dead_alloc_and_stores %func_8 : (!transform.any_op) -> ()
-
- // Annotate the exported function as already translated.
- %none = transform.param.constant #iree_codegen.translation_info<None> -> !transform.any_param
- transform.annotate %func_8 "translation_info" = %none : !transform.any_op, !transform.any_param
- transform.yield
- } // codegen
-
- // Find `hal.executable.variant`.
- transform.named_sequence @match_variant_for_codegen(%root: !transform.any_op {transform.readonly})
- -> !transform.any_op {
- transform.match.operation_name %root ["hal.executable.variant"] : !transform.any_op
- transform.yield %root : !transform.any_op
- }
-
- // Transform entry-point
- transform.named_sequence @__transform_main(%root: !transform.any_op {transform.consumed}) {
- transform.foreach_match in %root
- @match_variant_for_codegen -> @codegen
- : (!transform.any_op) -> (!transform.any_op)
- transform.yield
- }
-} // module