Simplify tests/e2e/stablehlo_ops. (#17843)

Forking this from https://github.com/iree-org/iree/pull/17766 to just
look at a single directory.

* Moved Metal and ROCm tests from being exclusively defined in CMake to
being defined (but then no-op'd) in Bazel
* Taught the test function to insert
`--iree-rocm-target-chip=${IREE_HIP_TEST_TARGET_CHIP}` (not happy that
this is required though)
* Merged test srcs down to a single `ALL_SRCS` glob for test suites that
work across all configurations
* Enabled previously disabled tests
  * Fixes https://github.com/iree-org/iree/issues/9583
* Fixes https://github.com/iree-org/iree/issues/12415 (maybe, might have
to disable those tests on Android/Vulkan again)
diff --git a/build_tools/bazel/iree_check_test.bzl b/build_tools/bazel/iree_check_test.bzl
index beffdc7..e76a8fa 100644
--- a/build_tools/bazel/iree_check_test.bzl
+++ b/build_tools/bazel/iree_check_test.bzl
@@ -9,7 +9,7 @@
 load("//build_tools/bazel:iree_bytecode_module.bzl", "iree_bytecode_module")
 load("//build_tools/bazel:native_binary.bzl", "native_test")
 
-ALL_TARGET_BACKENDS_AND_DRIVERS = [
+DEFAULT_TARGET_BACKENDS_AND_DRIVERS = [
     ("vmvx", "local-task"),
     ("vulkan-spirv", "vulkan"),
     ("llvm-cpu", "local-task"),
@@ -122,6 +122,14 @@
           test suite.
     """
 
+    # Metal backend/driver not supported by Bazel build.
+    if target_backend == "metal-spirv" or driver == "metal":
+        return
+
+    # ROCm/HIP backend/driver not supported by Bazel build.
+    if target_backend == "rocm" or driver == "hip":
+        return
+
     # We haven't implemented this so far because we have been using target_cpu_features so far only
     # for aarch64 targets, for which we use the CMake build. To future people implementing this:
     # target_cpu_features should be a list, and here it should be joined into a comma-separated
@@ -162,7 +170,7 @@
 def iree_check_test_suite(
         name,
         srcs,
-        target_backends_and_drivers = ALL_TARGET_BACKENDS_AND_DRIVERS,
+        target_backends_and_drivers = DEFAULT_TARGET_BACKENDS_AND_DRIVERS,
         compiler_flags = [],
         input_type = None,
         runner_args = [],
@@ -207,9 +215,6 @@
     # could just create a test suite. The latter seems simpler and more readable.
     tests = []
     for backend, driver in target_backends_and_drivers:
-        # CUDA backend/driver not supported by Bazel build.
-        if backend == "cuda" or driver == "cuda":
-            continue
         suite_name = "_".join([name, backend, driver])
         iree_check_single_backend_test_suite(
             name = suite_name,
diff --git a/build_tools/cmake/iree_check_test.cmake b/build_tools/cmake/iree_check_test.cmake
index c538342..fd89caf 100644
--- a/build_tools/cmake/iree_check_test.cmake
+++ b/build_tools/cmake/iree_check_test.cmake
@@ -57,6 +57,10 @@
     ${ARGN}
   )
 
+  # Normalize some variables before using them.
+  string(TOUPPER ${_RULE_TARGET_BACKEND} _UPPERCASE_TARGET_BACKEND)
+  string(REPLACE "-" "_" _NORMALIZED_TARGET_BACKEND ${_UPPERCASE_TARGET_BACKEND})
+
   # ---------------------------------------------------------------------------
   # Bytecode module builds require
   #   1. the compiler, either in the same build or provided in IREE_HOST_BIN_DIR
@@ -92,8 +96,6 @@
   # backends are enabled. We could query the tools in the binary directory for
   # support dynamically if optionality would be useful.
   if(NOT IREE_HOST_BIN_DIR)
-    string(TOUPPER ${_RULE_TARGET_BACKEND} _UPPERCASE_TARGET_BACKEND)
-    string(REPLACE "-" "_" _NORMALIZED_TARGET_BACKEND ${_UPPERCASE_TARGET_BACKEND})
     # TODO(scotttodd): allow plugins to provide external backends here
     if(NOT DEFINED IREE_TARGET_BACKEND_${_NORMALIZED_TARGET_BACKEND})
       message(SEND_ERROR "Unknown backend '${_RULE_TARGET_BACKEND}'. Check IREE_TARGET_BACKEND_* options.")
@@ -101,6 +103,10 @@
     if(NOT IREE_TARGET_BACKEND_${_NORMALIZED_TARGET_BACKEND})
       set(_BYTECODE_MODULE_BUILD_ENABLED FALSE)
     endif()
+    # rocm/hip require a target chip to be specified at compile time that matches the runtime device
+    if(_NORMALIZED_TARGET_BACKEND STREQUAL "ROCM" AND NOT IREE_HIP_TEST_TARGET_CHIP)
+      set(_BYTECODE_MODULE_BUILD_ENABLED FALSE)
+    endif()
   endif()
   # ---------------------------------------------------------------------------
 
@@ -159,6 +165,9 @@
   if(_RULE_TARGET_CPU_FEATURES)
     list(APPEND _BASE_COMPILER_FLAGS "--iree-llvmcpu-target-cpu-features=${_RULE_TARGET_CPU_FEATURES}")
   endif()
+  if(_NORMALIZED_TARGET_BACKEND STREQUAL "ROCM")
+    list(APPEND _BASE_COMPILER_FLAGS "--iree-rocm-target-chip=${IREE_HIP_TEST_TARGET_CHIP}")
+  endif()
 
   if(_BYTECODE_MODULE_BUILD_ENABLED)
     iree_bytecode_module(
@@ -437,6 +446,7 @@
   endif()
 
   if(NOT DEFINED _RULE_TARGET_BACKENDS AND NOT DEFINED _RULE_DRIVERS)
+    # Default backends/drivers.
     set(_RULE_TARGET_BACKENDS "vmvx" "vulkan-spirv" "llvm-cpu")
     set(_RULE_DRIVERS "local-task" "vulkan" "local-task")
   endif()
diff --git a/tests/e2e/stablehlo_ops/BUILD.bazel b/tests/e2e/stablehlo_ops/BUILD.bazel
index 579ca94..6c5d8f0 100644
--- a/tests/e2e/stablehlo_ops/BUILD.bazel
+++ b/tests/e2e/stablehlo_ops/BUILD.bazel
@@ -4,12 +4,6 @@
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-# Tests of end-to-end IREE support for individual ops in the StableHLO dialect.
-# Each test file should have a name matching the corresponding StableHLO op and test only the
-# functionality of that op (though may make use of other ops where necessary). Tests should be
-# written using the IREE Check framework and should always pass on the reference VMVX backend.
-# See https://iree.dev/developers/general/testing-guide/#iree-core-end-to-end-e2e-tests.
-
 load("//build_tools/bazel:enforce_glob.bzl", "enforce_glob")
 load("//build_tools/bazel:iree_check_test.bzl", "iree_check_single_backend_test_suite")
 
@@ -18,76 +12,77 @@
     licenses = ["notice"],  # Apache 2.0
 )
 
+ALL_SRCS = enforce_glob(
+    [
+        "abs.mlir",
+        "add.mlir",
+        "batch_norm_inference.mlir",
+        "bitcast_convert.mlir",
+        "broadcast.mlir",
+        "broadcast_add.mlir",
+        "broadcast_in_dim.mlir",
+        "clamp.mlir",
+        "compare.mlir",
+        "complex.mlir",
+        "concatenate.mlir",
+        "constant.mlir",
+        "convert.mlir",
+        "convolution.mlir",
+        "cosine.mlir",
+        "divide.mlir",
+        "dot.mlir",
+        "dot_bf16.mlir",
+        "dot_general.mlir",
+        "dynamic_slice.mlir",
+        "dynamic_update_slice.mlir",
+        "exponential.mlir",
+        "exponential_fp16.mlir",
+        "exponential_minus_one.mlir",
+        "fft.mlir",
+        "finite.mlir",
+        "floor.mlir",
+        "gather.mlir",
+        "householder.mlir",
+        "iota.mlir",
+        "log.mlir",
+        "log_plus_one.mlir",
+        "maximum.mlir",
+        "minimum.mlir",
+        "multiply.mlir",
+        "negate.mlir",
+        "pad.mlir",
+        "philox.mlir",
+        "pow.mlir",
+        "reduce.mlir",
+        "reduce_window.mlir",
+        "remainder.mlir",
+        "reshape.mlir",
+        "reverse.mlir",
+        "rng_normal.mlir",
+        "rng_uniform.mlir",
+        "round.mlir",
+        "rsqrt.mlir",
+        "scatter.mlir",
+        "scatter_dynamic.mlir",
+        "select.mlir",
+        "sine.mlir",
+        "slice.mlir",
+        "sort.mlir",
+        "sqrt.mlir",
+        "subtract.mlir",
+        "tanh.mlir",
+        "three_fry.mlir",
+        "torch_index_select.mlir",
+        "transpose.mlir",
+        "while.mlir",
+    ],
+    include = ["*.mlir"],
+    exclude = [],
+)
+
 iree_check_single_backend_test_suite(
     name = "check_llvm-cpu_local-task",
-    srcs = enforce_glob(
-        # keep sorted
-        [
-            "abs.mlir",
-            "add.mlir",
-            "batch_norm_inference.mlir",
-            "bitcast_convert.mlir",
-            "broadcast.mlir",
-            "broadcast_add.mlir",
-            "broadcast_in_dim.mlir",
-            "clamp.mlir",
-            "compare.mlir",
-            "complex.mlir",
-            "concatenate.mlir",
-            "constant.mlir",
-            "convert.mlir",
-            "convolution.mlir",
-            "cosine.mlir",
-            "divide.mlir",
-            "dot.mlir",
-            "dot_bf16.mlir",
-            "dot_general.mlir",
-            "dynamic_slice.mlir",
-            "dynamic_update_slice.mlir",
-            "exponential.mlir",
-            "exponential_fp16.mlir",
-            "exponential_minus_one.mlir",
-            "fft.mlir",
-            "finite.mlir",
-            "floor.mlir",
-            "gather.mlir",
-            "householder.mlir",
-            "iota.mlir",
-            "log.mlir",
-            "log_plus_one.mlir",
-            "maximum.mlir",
-            "minimum.mlir",
-            "multiply.mlir",
-            "negate.mlir",
-            "pad.mlir",
-            "philox.mlir",
-            "pow.mlir",
-            "reduce.mlir",
-            "reduce_window.mlir",
-            "remainder.mlir",
-            "reshape.mlir",
-            "reverse.mlir",
-            "rng_normal.mlir",
-            "rng_uniform.mlir",
-            "round.mlir",
-            "rsqrt.mlir",
-            "scatter.mlir",
-            "scatter_dynamic.mlir",
-            "select.mlir",
-            "sine.mlir",
-            "slice.mlir",
-            "sort.mlir",
-            "sqrt.mlir",
-            "subtract.mlir",
-            "tanh.mlir",
-            "three_fry.mlir",
-            "torch_index_select.mlir",
-            "transpose.mlir",
-            "while.mlir",
-        ],
-        include = ["*.mlir"],
-        exclude = [],
-    ),
+    srcs = ALL_SRCS,
     compiler_flags = [
         "--iree-input-demote-f64-to-f32",
     ],
@@ -96,6 +91,25 @@
     target_backend = "llvm-cpu",
 )
 
+# Check host features compilation (LLVM backend with host cpu features).
+iree_check_single_backend_test_suite(
+    name = "check_llvm-cpu-host_local-task",
+    srcs = ALL_SRCS,
+    compiler_flags = [
+        "--iree-input-demote-f64-to-f32",
+        "--iree-llvmcpu-target-cpu-features=host",
+    ],
+    driver = "local-task",
+    input_type = "stablehlo",
+    # Building and testing must be on the same architecture, which doesn't work
+    # with remote execution in general.
+    tags = [
+        "hostonly",
+        "local",
+    ],
+    target_backend = "llvm-cpu",
+)
+
 iree_check_single_backend_test_suite(
     name = "check_vmvx_local-task",
     srcs = enforce_glob(
@@ -203,6 +217,7 @@
             "dynamic_update_slice.mlir",
             "exponential.mlir",
             "exponential_minus_one.mlir",
+            "fft.mlir",
             "finite.mlir",
             "floor.mlir",
             "gather.mlir",
@@ -221,6 +236,7 @@
             "reduce_window.mlir",
             "remainder.mlir",
             "reshape.mlir",
+            "reverse.mlir",
             "rng_normal.mlir",
             "rng_uniform.mlir",
             "round.mlir",
@@ -242,8 +258,6 @@
         include = ["*.mlir"],
         exclude = [
             "exponential_fp16.mlir",
-            "fft.mlir",  # TODO(#9583)
-            "reverse.mlir",  # TODO(#12415): disabled due to miscompilation on Pixel 6.
         ],
     ),
     compiler_flags = [
@@ -254,9 +268,61 @@
     target_backend = "vulkan-spirv",
 )
 
-# Check host features compilation (LLVM backend with host cpu features).
 iree_check_single_backend_test_suite(
-    name = "check_llvm-cpu-host_local-task",
+    name = "check_cuda_graph",
+    srcs = ALL_SRCS,
+    compiler_flags = [
+        "--iree-input-demote-f64-to-f32",
+        # TODO(#13984): memset emulation required for graphs.
+        "--iree-stream-emulate-memset",
+    ],
+    driver = "cuda",
+    input_type = "stablehlo",
+    runner_args = ["--cuda_use_streams=false"],
+    tags = [
+        # CUDA cuInit fails with sanitizer on.
+        "noasan",
+        "nomsan",
+        "notsan",
+        "noubsan",
+        "requires-gpu-nvidia",
+    ],
+    target_backend = "cuda",
+)
+
+iree_check_single_backend_test_suite(
+    name = "check_cuda_stream",
+    srcs = ALL_SRCS,
+    compiler_flags = [
+        "--iree-input-demote-f64-to-f32",
+    ],
+    driver = "cuda",
+    input_type = "stablehlo",
+    runner_args = ["--cuda_use_streams=true"],
+    tags = [
+        # CUDA cuInit fails with sanitizer on.
+        "noasan",
+        "nomsan",
+        "notsan",
+        "noubsan",
+        "requires-gpu-nvidia",
+    ],
+    target_backend = "cuda",
+)
+
+iree_check_single_backend_test_suite(
+    name = "check_rocm_hip_stream",
+    srcs = ALL_SRCS,
+    driver = "hip",
+    input_type = "stablehlo",
+    runner_args = [
+        "--hip_use_streams=true",
+    ],
+    target_backend = "rocm",
+)
+
+iree_check_single_backend_test_suite(
+    name = "check_meta-spirv_metal",
     srcs = enforce_glob(
         # keep sorted
         [
@@ -300,7 +366,6 @@
             "philox.mlir",
             "pow.mlir",
             "reduce.mlir",
-            "reduce_window.mlir",
             "remainder.mlir",
             "reshape.mlir",
             "reverse.mlir",
@@ -323,139 +388,11 @@
             "while.mlir",
         ],
         include = ["*.mlir"],
-        exclude = [],
+        exclude = [
+            "reduce_window.mlir",  # TODO(#15012): fix test crash
+        ],
     ),
-    compiler_flags = [
-        "--iree-input-demote-f64-to-f32",
-        "--iree-llvmcpu-target-cpu-features=host",
-    ],
-    driver = "local-task",
+    driver = "metal",
     input_type = "stablehlo",
-    # Building and testing must be on the same architecture, which doesn't work
-    # with remote execution in general.
-    tags = [
-        "hostonly",
-        "local",
-    ],
-    target_backend = "llvm-cpu",
-)
-
-test_suite(
-    name = "check",
-    tests = [
-        ":check_llvm-cpu-host_local-task",
-        ":check_llvm-cpu_local-task",
-        ":check_vmvx_local-task",
-        ":check_vulkan-spirv_vulkan",
-    ],
-)
-
-CUDA_SRCS = enforce_glob(
-    [
-        "abs.mlir",
-        "add.mlir",
-        "batch_norm_inference.mlir",
-        "bitcast_convert.mlir",
-        "broadcast.mlir",
-        "broadcast_add.mlir",
-        "broadcast_in_dim.mlir",
-        "clamp.mlir",
-        "compare.mlir",
-        "complex.mlir",
-        "concatenate.mlir",
-        "constant.mlir",
-        "convert.mlir",
-        "convolution.mlir",
-        "cosine.mlir",
-        "divide.mlir",
-        "dot.mlir",
-        "dot_bf16.mlir",
-        "dot_general.mlir",
-        "dynamic_slice.mlir",
-        "dynamic_update_slice.mlir",
-        "exponential.mlir",
-        "exponential_fp16.mlir",
-        "exponential_minus_one.mlir",
-        "fft.mlir",
-        "finite.mlir",
-        "floor.mlir",
-        "gather.mlir",
-        "householder.mlir",
-        "iota.mlir",
-        "log.mlir",
-        "log_plus_one.mlir",
-        "maximum.mlir",
-        "minimum.mlir",
-        "multiply.mlir",
-        "negate.mlir",
-        "pad.mlir",
-        "philox.mlir",
-        "pow.mlir",
-        "reduce.mlir",
-        "reduce_window.mlir",
-        "remainder.mlir",
-        "reshape.mlir",
-        "reverse.mlir",
-        "rng_normal.mlir",
-        "rng_uniform.mlir",
-        "round.mlir",
-        "rsqrt.mlir",
-        "scatter.mlir",
-        "scatter_dynamic.mlir",
-        "select.mlir",
-        "sine.mlir",
-        "slice.mlir",
-        "sort.mlir",
-        "sqrt.mlir",
-        "subtract.mlir",
-        "tanh.mlir",
-        "three_fry.mlir",
-        "torch_index_select.mlir",
-        "transpose.mlir",
-        "while.mlir",
-    ],
-    include = ["*.mlir"],
-    exclude = [],
-)
-
-iree_check_single_backend_test_suite(
-    name = "check_cuda_graph",
-    srcs = CUDA_SRCS,
-    compiler_flags = [
-        "--iree-input-demote-f64-to-f32",
-        # TODO(#13984): memset emulation required for graphs.
-        "--iree-stream-emulate-memset",
-    ],
-    driver = "cuda",
-    input_type = "stablehlo",
-    runner_args = ["--cuda_use_streams=false"],
-    tags = [
-        # CUDA cuInit fails with sanitizer on.
-        "noasan",
-        "nomsan",
-        "notsan",
-        "noubsan",
-        "requires-gpu-nvidia",
-    ],
-    target_backend = "cuda",
-)
-
-iree_check_single_backend_test_suite(
-    name = "check_cuda_stream",
-    srcs = CUDA_SRCS,
-    compiler_flags = [
-        "--iree-input-demote-f64-to-f32",
-    ],
-    driver = "cuda",
-    input_type = "stablehlo",
-    runner_args = ["--cuda_use_streams=true"],
-    tags = [
-        # CUDA cuInit fails with sanitizer on.
-        "noasan",
-        "nomsan",
-        "notsan",
-        "noubsan",
-        "requires-gpu-nvidia",
-    ],
-    target_backend = "cuda",
+    target_backend = "metal-spirv",
 )
diff --git a/tests/e2e/stablehlo_ops/CMakeLists.txt b/tests/e2e/stablehlo_ops/CMakeLists.txt
index 4d353ce..9393b3e 100644
--- a/tests/e2e/stablehlo_ops/CMakeLists.txt
+++ b/tests/e2e/stablehlo_ops/CMakeLists.txt
@@ -87,6 +87,85 @@
 
 iree_check_single_backend_test_suite(
   NAME
+    check_llvm-cpu-host_local-task
+  SRCS
+    "abs.mlir"
+    "add.mlir"
+    "batch_norm_inference.mlir"
+    "bitcast_convert.mlir"
+    "broadcast.mlir"
+    "broadcast_add.mlir"
+    "broadcast_in_dim.mlir"
+    "clamp.mlir"
+    "compare.mlir"
+    "complex.mlir"
+    "concatenate.mlir"
+    "constant.mlir"
+    "convert.mlir"
+    "convolution.mlir"
+    "cosine.mlir"
+    "divide.mlir"
+    "dot.mlir"
+    "dot_bf16.mlir"
+    "dot_general.mlir"
+    "dynamic_slice.mlir"
+    "dynamic_update_slice.mlir"
+    "exponential.mlir"
+    "exponential_fp16.mlir"
+    "exponential_minus_one.mlir"
+    "fft.mlir"
+    "finite.mlir"
+    "floor.mlir"
+    "gather.mlir"
+    "householder.mlir"
+    "iota.mlir"
+    "log.mlir"
+    "log_plus_one.mlir"
+    "maximum.mlir"
+    "minimum.mlir"
+    "multiply.mlir"
+    "negate.mlir"
+    "pad.mlir"
+    "philox.mlir"
+    "pow.mlir"
+    "reduce.mlir"
+    "reduce_window.mlir"
+    "remainder.mlir"
+    "reshape.mlir"
+    "reverse.mlir"
+    "rng_normal.mlir"
+    "rng_uniform.mlir"
+    "round.mlir"
+    "rsqrt.mlir"
+    "scatter.mlir"
+    "scatter_dynamic.mlir"
+    "select.mlir"
+    "sine.mlir"
+    "slice.mlir"
+    "sort.mlir"
+    "sqrt.mlir"
+    "subtract.mlir"
+    "tanh.mlir"
+    "three_fry.mlir"
+    "torch_index_select.mlir"
+    "transpose.mlir"
+    "while.mlir"
+  TARGET_BACKEND
+    "llvm-cpu"
+  DRIVER
+    "local-task"
+  COMPILER_FLAGS
+    "--iree-input-demote-f64-to-f32"
+    "--iree-llvmcpu-target-cpu-features=host"
+  INPUT_TYPE
+    "stablehlo"
+  LABELS
+    "hostonly"
+    "local"
+)
+
+iree_check_single_backend_test_suite(
+  NAME
     check_vmvx_local-task
   SRCS
     "abs.mlir"
@@ -185,79 +264,6 @@
     "dynamic_update_slice.mlir"
     "exponential.mlir"
     "exponential_minus_one.mlir"
-    "finite.mlir"
-    "floor.mlir"
-    "gather.mlir"
-    "householder.mlir"
-    "iota.mlir"
-    "log.mlir"
-    "log_plus_one.mlir"
-    "maximum.mlir"
-    "minimum.mlir"
-    "multiply.mlir"
-    "negate.mlir"
-    "pad.mlir"
-    "philox.mlir"
-    "pow.mlir"
-    "reduce.mlir"
-    "reduce_window.mlir"
-    "remainder.mlir"
-    "reshape.mlir"
-    "rng_normal.mlir"
-    "rng_uniform.mlir"
-    "round.mlir"
-    "rsqrt.mlir"
-    "scatter.mlir"
-    "scatter_dynamic.mlir"
-    "select.mlir"
-    "sine.mlir"
-    "slice.mlir"
-    "sort.mlir"
-    "sqrt.mlir"
-    "subtract.mlir"
-    "tanh.mlir"
-    "three_fry.mlir"
-    "torch_index_select.mlir"
-    "transpose.mlir"
-    "while.mlir"
-  TARGET_BACKEND
-    "vulkan-spirv"
-  DRIVER
-    "vulkan"
-  COMPILER_FLAGS
-    "--iree-input-demote-f64-to-f32"
-  INPUT_TYPE
-    "stablehlo"
-)
-
-iree_check_single_backend_test_suite(
-  NAME
-    check_llvm-cpu-host_local-task
-  SRCS
-    "abs.mlir"
-    "add.mlir"
-    "batch_norm_inference.mlir"
-    "bitcast_convert.mlir"
-    "broadcast.mlir"
-    "broadcast_add.mlir"
-    "broadcast_in_dim.mlir"
-    "clamp.mlir"
-    "compare.mlir"
-    "complex.mlir"
-    "concatenate.mlir"
-    "constant.mlir"
-    "convert.mlir"
-    "convolution.mlir"
-    "cosine.mlir"
-    "divide.mlir"
-    "dot.mlir"
-    "dot_bf16.mlir"
-    "dot_general.mlir"
-    "dynamic_slice.mlir"
-    "dynamic_update_slice.mlir"
-    "exponential.mlir"
-    "exponential_fp16.mlir"
-    "exponential_minus_one.mlir"
     "fft.mlir"
     "finite.mlir"
     "floor.mlir"
@@ -296,17 +302,13 @@
     "transpose.mlir"
     "while.mlir"
   TARGET_BACKEND
-    "llvm-cpu"
+    "vulkan-spirv"
   DRIVER
-    "local-task"
+    "vulkan"
   COMPILER_FLAGS
     "--iree-input-demote-f64-to-f32"
-    "--iree-llvmcpu-target-cpu-features=host"
   INPUT_TYPE
     "stablehlo"
-  LABELS
-    "hostonly"
-    "local"
 )
 
 iree_check_single_backend_test_suite(
@@ -476,6 +478,153 @@
     "requires-gpu-nvidia"
 )
 
+iree_check_single_backend_test_suite(
+  NAME
+    check_rocm_hip_stream
+  SRCS
+    "abs.mlir"
+    "add.mlir"
+    "batch_norm_inference.mlir"
+    "bitcast_convert.mlir"
+    "broadcast.mlir"
+    "broadcast_add.mlir"
+    "broadcast_in_dim.mlir"
+    "clamp.mlir"
+    "compare.mlir"
+    "complex.mlir"
+    "concatenate.mlir"
+    "constant.mlir"
+    "convert.mlir"
+    "convolution.mlir"
+    "cosine.mlir"
+    "divide.mlir"
+    "dot.mlir"
+    "dot_bf16.mlir"
+    "dot_general.mlir"
+    "dynamic_slice.mlir"
+    "dynamic_update_slice.mlir"
+    "exponential.mlir"
+    "exponential_fp16.mlir"
+    "exponential_minus_one.mlir"
+    "fft.mlir"
+    "finite.mlir"
+    "floor.mlir"
+    "gather.mlir"
+    "householder.mlir"
+    "iota.mlir"
+    "log.mlir"
+    "log_plus_one.mlir"
+    "maximum.mlir"
+    "minimum.mlir"
+    "multiply.mlir"
+    "negate.mlir"
+    "pad.mlir"
+    "philox.mlir"
+    "pow.mlir"
+    "reduce.mlir"
+    "reduce_window.mlir"
+    "remainder.mlir"
+    "reshape.mlir"
+    "reverse.mlir"
+    "rng_normal.mlir"
+    "rng_uniform.mlir"
+    "round.mlir"
+    "rsqrt.mlir"
+    "scatter.mlir"
+    "scatter_dynamic.mlir"
+    "select.mlir"
+    "sine.mlir"
+    "slice.mlir"
+    "sort.mlir"
+    "sqrt.mlir"
+    "subtract.mlir"
+    "tanh.mlir"
+    "three_fry.mlir"
+    "torch_index_select.mlir"
+    "transpose.mlir"
+    "while.mlir"
+  TARGET_BACKEND
+    "rocm"
+  DRIVER
+    "hip"
+  INPUT_TYPE
+    "stablehlo"
+  RUNNER_ARGS
+    "--hip_use_streams=true"
+)
+
+iree_check_single_backend_test_suite(
+  NAME
+    check_meta-spirv_metal
+  SRCS
+    "abs.mlir"
+    "add.mlir"
+    "batch_norm_inference.mlir"
+    "bitcast_convert.mlir"
+    "broadcast.mlir"
+    "broadcast_add.mlir"
+    "broadcast_in_dim.mlir"
+    "clamp.mlir"
+    "compare.mlir"
+    "complex.mlir"
+    "concatenate.mlir"
+    "constant.mlir"
+    "convert.mlir"
+    "convolution.mlir"
+    "cosine.mlir"
+    "divide.mlir"
+    "dot.mlir"
+    "dot_bf16.mlir"
+    "dot_general.mlir"
+    "dynamic_slice.mlir"
+    "dynamic_update_slice.mlir"
+    "exponential.mlir"
+    "exponential_fp16.mlir"
+    "exponential_minus_one.mlir"
+    "fft.mlir"
+    "finite.mlir"
+    "floor.mlir"
+    "gather.mlir"
+    "householder.mlir"
+    "iota.mlir"
+    "log.mlir"
+    "log_plus_one.mlir"
+    "maximum.mlir"
+    "minimum.mlir"
+    "multiply.mlir"
+    "negate.mlir"
+    "pad.mlir"
+    "philox.mlir"
+    "pow.mlir"
+    "reduce.mlir"
+    "remainder.mlir"
+    "reshape.mlir"
+    "reverse.mlir"
+    "rng_normal.mlir"
+    "rng_uniform.mlir"
+    "round.mlir"
+    "rsqrt.mlir"
+    "scatter.mlir"
+    "scatter_dynamic.mlir"
+    "select.mlir"
+    "sine.mlir"
+    "slice.mlir"
+    "sort.mlir"
+    "sqrt.mlir"
+    "subtract.mlir"
+    "tanh.mlir"
+    "three_fry.mlir"
+    "torch_index_select.mlir"
+    "transpose.mlir"
+    "while.mlir"
+  TARGET_BACKEND
+    "metal-spirv"
+  DRIVER
+    "metal"
+  INPUT_TYPE
+    "stablehlo"
+)
+
 ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
 
 iree_check_single_backend_test_suite(
@@ -553,79 +702,6 @@
 
 iree_check_single_backend_test_suite(
   NAME
-    check_metal-spirv_metal
-  SRCS
-    "abs.mlir"
-    "add.mlir"
-    "batch_norm_inference.mlir"
-    "bitcast_convert.mlir"
-    "broadcast.mlir"
-    "broadcast_add.mlir"
-    "broadcast_in_dim.mlir"
-    "clamp.mlir"
-    "compare.mlir"
-    "complex.mlir"
-    "concatenate.mlir"
-    "constant.mlir"
-    "convert.mlir"
-    "convolution.mlir"
-    "cosine.mlir"
-    "divide.mlir"
-    "dot.mlir"
-    "dot_bf16.mlir"
-    "dot_general.mlir"
-    "dynamic_slice.mlir"
-    "dynamic_update_slice.mlir"
-    "exponential.mlir"
-    "exponential_fp16.mlir"
-    "exponential_minus_one.mlir"
-    "fft.mlir"
-    "finite.mlir"
-    "floor.mlir"
-    "gather.mlir"
-    "iota.mlir"
-    "log.mlir"
-    "log_plus_one.mlir"
-    "maximum.mlir"
-    "minimum.mlir"
-    "multiply.mlir"
-    "negate.mlir"
-    "pad.mlir"
-    "philox.mlir"
-    "pow.mlir"
-    "reduce.mlir"
-    # "reduce_window.mlir"  # TODO(15012): fix test crash
-    "remainder.mlir"
-    "reshape.mlir"
-    "reverse.mlir"
-    "rng_normal.mlir"
-    "rng_uniform.mlir"
-    "round.mlir"
-    "rsqrt.mlir"
-    "scatter.mlir"
-    "scatter_dynamic.mlir"
-    "select.mlir"
-    "sine.mlir"
-    "slice.mlir"
-    "sort.mlir"
-    "sqrt.mlir"
-    "subtract.mlir"
-    "tanh.mlir"
-    "three_fry.mlir"
-    "torch_index_select.mlir"
-    "transpose.mlir"
-    "while.mlir"
-  TARGET_BACKEND
-    "metal-spirv"
-  DRIVER
-    "metal"
-  COMPILER_FLAGS
-    "--iree-input-type=stablehlo"
-)
-
-
-iree_check_single_backend_test_suite(
-  NAME
     check_rocm-rocm
   SRCS
     "abs.mlir"
@@ -696,95 +772,3 @@
   COMPILER_FLAGS
     "--iree-input-type=stablehlo"
 )
-
-if(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx")
-
-unset(IREE_HIP_TEST_COMPILER_FLAGS)
-list(APPEND IREE_HIP_TEST_COMPILER_FLAGS
-  "--iree-rocm-target-chip=${IREE_HIP_TEST_TARGET_CHIP}"
-)
-
-iree_check_single_backend_test_suite(
-  NAME
-    check_hip_stream
-  SRCS
-    "abs.mlir"
-    "add.mlir"
-    "batch_norm_inference.mlir"
-    "bitcast_convert.mlir"
-    "broadcast.mlir"
-    "broadcast_add.mlir"
-    "broadcast_in_dim.mlir"
-    "clamp.mlir"
-    "compare.mlir"
-    "complex.mlir"
-    "concatenate.mlir"
-    "constant.mlir"
-    "convert.mlir"
-    "convolution.mlir"
-    "cosine.mlir"
-    "divide.mlir"
-    "dot.mlir"
-    "dot_bf16.mlir"
-    "dot_general.mlir"
-    "dynamic_slice.mlir"
-    "dynamic_update_slice.mlir"
-    "exponential.mlir"
-    "exponential_fp16.mlir"
-    "exponential_minus_one.mlir"
-    "fft.mlir"
-    "finite.mlir"
-    "floor.mlir"
-    "gather.mlir"
-    "householder.mlir"
-    "iota.mlir"
-    "log.mlir"
-    "log_plus_one.mlir"
-    "maximum.mlir"
-    "minimum.mlir"
-    "multiply.mlir"
-    "negate.mlir"
-    "pad.mlir"
-    "philox.mlir"
-    "pow.mlir"
-    "reduce.mlir"
-    "reduce_window.mlir"
-    "remainder.mlir"
-    "reshape.mlir"
-    "reverse.mlir"
-    "rng_normal.mlir"
-    "rng_uniform.mlir"
-    "round.mlir"
-    "rsqrt.mlir"
-    "scatter.mlir"
-    "scatter_dynamic.mlir"
-    "select.mlir"
-    "sine.mlir"
-    "slice.mlir"
-    "sort.mlir"
-    "sqrt.mlir"
-    "subtract.mlir"
-    "tanh.mlir"
-    "three_fry.mlir"
-    "torch_index_select.mlir"
-    "transpose.mlir"
-    "while.mlir"
-  TARGET_BACKEND
-    "rocm"
-  DRIVER
-    "hip"
-  COMPILER_FLAGS
-    ${IREE_HIP_TEST_COMPILER_FLAGS}
-  INPUT_TYPE
-    "stablehlo"
-  RUNNER_ARGS
-    "--hip_use_streams=true"
-  LABELS
-    "noasan"
-    "nomsan"
-    "notsan"
-    "noubsan"
-    "requires-gpu-amd"
-)
-
-endif(IREE_HIP_TEST_TARGET_CHIP MATCHES "^gfx")