Re-enable CUDA tests. (#8237)

* Remove additional bazel file and add cuda config for turing.
* Add CUDA target and driver option to turing builds.
* Remove flags to enable cuda build pending VM updates.
* Disable cuda in bazel tests
diff --git a/build_tools/bazel/iree_check_test.bzl b/build_tools/bazel/iree_check_test.bzl
index b34ef02..a72fe41 100644
--- a/build_tools/bazel/iree_check_test.bzl
+++ b/build_tools/bazel/iree_check_test.bzl
@@ -127,6 +127,9 @@
 
     tests = []
     for src in srcs:
+        # CUDA backend/driver not supported by Bazel build.
+        if target_backend == "cuda" or driver == "cuda":
+            continue
         test_name = "_".join([name, src])
         iree_check_test(
             name = test_name,
@@ -198,6 +201,9 @@
     # could just create a test suite. The latter seems simpler and more readable.
     tests = []
     for backend, driver in target_backends_and_drivers:
+        # CUDA backend/driver not supported by Bazel build.
+        if backend == "cuda" or driver == "cuda":
+            continue
         suite_name = "_".join([name, backend, driver])
         iree_check_single_backend_test_suite(
             name = suite_name,
diff --git a/build_tools/cmake/clean_build.sh b/build_tools/cmake/clean_build.sh
index 543412b..0af2e4e 100755
--- a/build_tools/cmake/clean_build.sh
+++ b/build_tools/cmake/clean_build.sh
@@ -17,4 +17,4 @@
 
 cd ${ROOT_DIR?}
 rm -rf build/
-./build_tools/cmake/rebuild.sh
+./build_tools/cmake/rebuild.sh "$@"
diff --git a/build_tools/cmake/rebuild.sh b/build_tools/cmake/rebuild.sh
index 68e9420..4151aa5 100755
--- a/build_tools/cmake/rebuild.sh
+++ b/build_tools/cmake/rebuild.sh
@@ -50,5 +50,5 @@
   "-DIREE_ENABLE_ASSERTIONS=ON"
 )
 
-"$CMAKE_BIN" "${CMAKE_ARGS[@]?}" ..
+"$CMAKE_BIN" "${CMAKE_ARGS[@]?}" "$@" ..
 "$CMAKE_BIN" --build .
diff --git a/build_tools/kokoro/gcp_ubuntu/cmake-bazel/linux/x86-turing/build.sh b/build_tools/kokoro/gcp_ubuntu/cmake-bazel/linux/x86-turing/build.sh
index a1cff76..0baddcd 100755
--- a/build_tools/kokoro/gcp_ubuntu/cmake-bazel/linux/x86-turing/build.sh
+++ b/build_tools/kokoro/gcp_ubuntu/cmake-bazel/linux/x86-turing/build.sh
@@ -57,6 +57,8 @@
 # TODO(gcmn): It would be nice to be able to build and test as much as possible,
 # so a build failure only prevents building/testing things that depend on it and
 # we can still run the other tests.
+# TODO: Add "-DIREE_TARGET_BACKEND_CUDA=ON -DIREE_HAL_DRIVER_CUDA=ON" once the
+# VMs have been updated with the correct CUDA SDK.
 echo "Configuring CMake"
 "${CMAKE_BIN}" -B "${CMAKE_BUILD_DIR?}" -G Ninja \
    -DIREE_TF_TOOLS_ROOT="${BAZEL_BINDIR?}/iree_tf_compiler/" \
diff --git a/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/build.sh b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/build.sh
index 8363787..28751fe 100755
--- a/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/build.sh
+++ b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-turing/build.sh
@@ -33,6 +33,8 @@
 # TODO(gcmn): It would be nice to be able to build and test as much as possible,
 # so a build failure only prevents building/testing things that depend on it and
 # we can still run the other tests.
+# TODO: Add "-DIREE_TARGET_BACKEND_CUDA=ON -DIREE_HAL_DRIVER_CUDA=ON" once the
+# VMs have been updated with the correct CUDA SDK.
 echo "Building with cmake"
 ./build_tools/cmake/clean_build.sh
 
diff --git a/iree/compiler/Dialect/HAL/Target/CUDA/test/BUILD b/iree/compiler/Dialect/HAL/Target/CUDA/test/BUILD
deleted file mode 100644
index ac1f0e0..0000000
--- a/iree/compiler/Dialect/HAL/Target/CUDA/test/BUILD
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright 2021 The IREE Authors
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-# load("//build_tools/bazel:iree_lit_test.bzl", "iree_lit_test_suite")
-# load("//build_tools/bazel:enforce_glob.bzl", "enforce_glob")
-
-package(
-    default_visibility = ["//visibility:public"],
-    features = ["layering_check"],
-    licenses = ["notice"],  # Apache 2.0
-)
-
-# iree_lit_test_suite(
-#     name = "lit",
-#     srcs = enforce_glob(
-#         ["smoketest.mlir"],
-#         include = ["*.mlir"],
-#     ),
-#     tools = [
-#         "//iree/tools:iree-opt",
-#         "@llvm-project//llvm:FileCheck",
-#     ],
-# )
diff --git a/iree/compiler/Dialect/HAL/Target/CUDA/test/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/CUDA/test/CMakeLists.txt
index 0b6559b..31c1f25 100644
--- a/iree/compiler/Dialect/HAL/Target/CUDA/test/CMakeLists.txt
+++ b/iree/compiler/Dialect/HAL/Target/CUDA/test/CMakeLists.txt
@@ -1,13 +1,11 @@
-################################################################################
-# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
-# iree/compiler/Dialect/HAL/Target/CUDA/test/BUILD                             #
-#                                                                              #
-# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
-# CMake-only content.                                                          #
-#                                                                              #
-# To disable autogeneration for this file entirely, delete this header.        #
-################################################################################
-
 iree_add_all_subdirs()
 
-### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
+iree_lit_test_suite(
+  NAME
+    lit
+  SRCS
+    "smoketest.mlir"
+  TOOLS
+    FileCheck
+    iree::tools::iree-opt
+)
diff --git a/iree/hal/cuda/cts/CMakeLists.txt b/iree/hal/cuda/cts/CMakeLists.txt
index 72c2269..e2d6e72 100644
--- a/iree/hal/cuda/cts/CMakeLists.txt
+++ b/iree/hal/cuda/cts/CMakeLists.txt
@@ -4,27 +4,26 @@
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-# Temporarily disabled pending build system changes.
-# iree_hal_cts_test_suite(
-#   DRIVER_NAME
-#     cuda
-#   DRIVER_REGISTRATION_HDR
-#     "iree/hal/cuda/registration/driver_module.h"
-#   DRIVER_REGISTRATION_FN
-#     "iree_hal_cuda_driver_module_register"
-#   COMPILER_TARGET_BACKEND
-#     "cuda"
-#   EXECUTABLE_FORMAT
-#     "\"PTXE\""
-#   DEPS
-#     iree::hal::cuda::registration
-#   EXCLUDED_TESTS
-#     # This test depends on iree_hal_cuda_stream_command_buffer_update_buffer
-#     # via iree_hal_buffer_view_allocate_buffer, which is not implemented yet.
-#     "command_buffer_dispatch"
-#     # Non-push descriptor sets are not implemented in the CUDA backend yet.
-#     "descriptor_set"
-#     # Semaphores are not implemented in the CUDA backend yet.
-#     "semaphore_submission"
-#     "semaphore"
-# )
+iree_hal_cts_test_suite(
+  DRIVER_NAME
+    cuda
+  DRIVER_REGISTRATION_HDR
+    "iree/hal/cuda/registration/driver_module.h"
+  DRIVER_REGISTRATION_FN
+    "iree_hal_cuda_driver_module_register"
+  COMPILER_TARGET_BACKEND
+    "cuda"
+  EXECUTABLE_FORMAT
+    "\"PTXE\""
+  DEPS
+    iree::hal::cuda::registration
+  EXCLUDED_TESTS
+    # This test depends on iree_hal_cuda_stream_command_buffer_update_buffer
+    # via iree_hal_buffer_view_allocate_buffer, which is not implemented yet.
+    "command_buffer_dispatch"
+    # Non-push descriptor sets are not implemented in the CUDA backend yet.
+    "descriptor_set"
+    # Semaphores are not implemented in the CUDA backend yet.
+    "semaphore_submission"
+    "semaphore"
+)
diff --git a/iree/samples/simple_embedding/BUILD b/iree/samples/simple_embedding/BUILD
index 4914245..21911ee 100644
--- a/iree/samples/simple_embedding/BUILD
+++ b/iree/samples/simple_embedding/BUILD
@@ -273,7 +273,8 @@
     inline = True,
 )
 
-# Temporarily disabled pending build system changes.
+# Disabled because CUDA is not universally available and Bazel does not
+# support configurability in a reasonable or useful way.
 # iree_cmake_extra_content(
 #     content = """
 # if(${IREE_HAL_DRIVER_CUDA} AND (${IREE_TARGET_BACKEND_CUDA} OR DEFINED IREE_HOST_BINARY_ROOT))
@@ -310,14 +311,13 @@
 #     ],
 # )
 
-# Simple embedding is failing in the CI.
-# native_test(
-#     name = "simple_embedding_cuda_test",
-#     tags = [
-#         "driver=cuda",
-#     ],
-#     src = ":simple_embedding_cuda",
-# )
+# # native_test(
+# #     name = "simple_embedding_cuda_test",
+# #     src = ":simple_embedding_cuda",
+# #     tags = [
+# #         "driver=cuda",
+# #     ],
+# # )
 
 # iree_cmake_extra_content(
 #     content = """
diff --git a/iree/test/e2e/cuda_specific/BUILD b/iree/test/e2e/cuda_specific/BUILD
deleted file mode 100644
index a067545..0000000
--- a/iree/test/e2e/cuda_specific/BUILD
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright 2021 The IREE Authors
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-# Tests for end-to-end IREE support specific to the CUDA backend to be able to
-# incrementally enable features.
-
-# load("//build_tools/bazel:iree_check_test.bzl", "iree_check_single_backend_test_suite")
-
-package(
-    default_visibility = ["//visibility:public"],
-    features = ["layering_check"],
-    licenses = ["notice"],  # Apache 2.0
-)
-
-# Temporarily disabled pending build system changes.
-# iree_check_single_backend_test_suite(
-#     name = "check_cuda",
-#     srcs = [
-#         "dot.mlir",
-#     ],
-#     compiler_flags = ["-iree-input-type=mhlo"],
-#     driver = "cuda",
-#     tags = [
-#         # CUDA cuInit fails with sanitizer on.
-#         "noasan",
-#         "nomsan",
-#         "notsan",
-#         "noubsan",
-#         "requires-gpu-nvidia",
-#     ],
-#     target_backend = "cuda",
-# )
diff --git a/iree/test/e2e/cuda_specific/CMakeLists.txt b/iree/test/e2e/cuda_specific/CMakeLists.txt
index ab173dc..4730d22 100644
--- a/iree/test/e2e/cuda_specific/CMakeLists.txt
+++ b/iree/test/e2e/cuda_specific/CMakeLists.txt
@@ -1,13 +1,29 @@
-################################################################################
-# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
-# iree/test/e2e/cuda_specific/BUILD                                            #
-#                                                                              #
-# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
-# CMake-only content.                                                          #
-#                                                                              #
-# To disable autogeneration for this file entirely, delete this header.        #
-################################################################################
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Tests for end-to-end IREE support specific to the CUDA backend to be able to
+# incrementally enable features.
 
 iree_add_all_subdirs()
 
-### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
+iree_check_single_backend_test_suite(
+  NAME
+    check_cuda
+  SRCS
+    "dot.mlir"
+  TARGET_BACKEND
+    "cuda"
+  DRIVER
+    "cuda"
+  COMPILER_FLAGS
+    "-iree-input-type=mhlo"
+  LABELS
+    "noasan"
+    "nomsan"
+    "notsan"
+    "noubsan"
+    "requires-gpu-nvidia"
+)
diff --git a/iree/test/e2e/linalg_ext_ops/BUILD b/iree/test/e2e/linalg_ext_ops/BUILD
index 343c8b1..824d951 100644
--- a/iree/test/e2e/linalg_ext_ops/BUILD
+++ b/iree/test/e2e/linalg_ext_ops/BUILD
@@ -13,30 +13,29 @@
     licenses = ["notice"],  # Apache 2.0
 )
 
-# Temporarily disabled pending build system changes.
-# iree_check_single_backend_test_suite(
-#     name = "check_cuda",
-#     srcs = enforce_glob(
-#         # keep sorted
-#         [
-#             "reverse.mlir",
-#             "scan.mlir",
-#         ],
-#         include = ["*.mlir"],
-#         exclude = [
-#         ],
-#     ),
-#     driver = "cuda",
-#     tags = [
-#         # CUDA cuInit fails with sanitizer on.
-#         "noasan",
-#         "nomsan",
-#         "notsan",
-#         "noubsan",
-#         "requires-gpu-nvidia",
-#     ],
-#     target_backend = "cuda",
-# )
+iree_check_single_backend_test_suite(
+    name = "check_cuda",
+    srcs = enforce_glob(
+        # keep sorted
+        [
+            "reverse.mlir",
+            "scan.mlir",
+        ],
+        include = ["*.mlir"],
+        exclude = [
+        ],
+    ),
+    driver = "cuda",
+    tags = [
+        # CUDA cuInit fails with sanitizer on.
+        "noasan",
+        "nomsan",
+        "notsan",
+        "noubsan",
+        "requires-gpu-nvidia",
+    ],
+    target_backend = "cuda",
+)
 
 iree_check_single_backend_test_suite(
     name = "check_dylib-llvm-aot_dylib",
diff --git a/iree/test/e2e/linalg_ext_ops/CMakeLists.txt b/iree/test/e2e/linalg_ext_ops/CMakeLists.txt
index 7aed204..3d9d628 100644
--- a/iree/test/e2e/linalg_ext_ops/CMakeLists.txt
+++ b/iree/test/e2e/linalg_ext_ops/CMakeLists.txt
@@ -12,6 +12,24 @@
 
 iree_check_single_backend_test_suite(
   NAME
+    check_cuda
+  SRCS
+    "reverse.mlir"
+    "scan.mlir"
+  TARGET_BACKEND
+    "cuda"
+  DRIVER
+    "cuda"
+  LABELS
+    "noasan"
+    "nomsan"
+    "notsan"
+    "noubsan"
+    "requires-gpu-nvidia"
+)
+
+iree_check_single_backend_test_suite(
+  NAME
     check_dylib-llvm-aot_dylib
   SRCS
     "reverse.mlir"
diff --git a/iree/test/e2e/models/BUILD b/iree/test/e2e/models/BUILD
index d0b1107..ae84e50 100644
--- a/iree/test/e2e/models/BUILD
+++ b/iree/test/e2e/models/BUILD
@@ -74,20 +74,19 @@
     target_backend = "vulkan-spirv",
 )
 
-# Temporarily disabled pending build system changes.
-# iree_check_single_backend_test_suite(
-#     name = "check_cuda_cuda",
-#     timeout = "long",
-#     srcs = CHECK_FRAMEWORK_TESTS,
-#     compiler_flags = ["-iree-input-type=mhlo"],
-#     driver = "cuda",
-#     tags = [
-#         # CUDA cuInit fails with sanitizer on.
-#         "noasan",
-#         "nomsan",
-#         "notsan",
-#         "noubsan",
-#         "requires-gpu-nvidia",
-#     ],
-#     target_backend = "cuda",
-# )
+iree_check_single_backend_test_suite(
+    name = "check_cuda_cuda",
+    timeout = "long",
+    srcs = CHECK_FRAMEWORK_TESTS,
+    compiler_flags = ["-iree-input-type=mhlo"],
+    driver = "cuda",
+    tags = [
+        # CUDA cuInit fails with sanitizer on.
+        "noasan",
+        "nomsan",
+        "notsan",
+        "noubsan",
+        "requires-gpu-nvidia",
+    ],
+    target_backend = "cuda",
+)
diff --git a/iree/test/e2e/models/CMakeLists.txt b/iree/test/e2e/models/CMakeLists.txt
index a668981..a15eb09 100644
--- a/iree/test/e2e/models/CMakeLists.txt
+++ b/iree/test/e2e/models/CMakeLists.txt
@@ -58,4 +58,24 @@
     "-iree-input-type=mhlo"
 )
 
+iree_check_single_backend_test_suite(
+  NAME
+    check_cuda_cuda
+  SRCS
+    "bert_encoder_unrolled_fake_weights.mlir"
+    "mobilenetv3_fake_weights.mlir"
+  TARGET_BACKEND
+    "cuda"
+  DRIVER
+    "cuda"
+  COMPILER_FLAGS
+    "-iree-input-type=mhlo"
+  LABELS
+    "noasan"
+    "nomsan"
+    "notsan"
+    "noubsan"
+    "requires-gpu-nvidia"
+)
+
 ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/iree/test/e2e/regression/BUILD b/iree/test/e2e/regression/BUILD
index cc78f00..bc0e05b 100644
--- a/iree/test/e2e/regression/BUILD
+++ b/iree/test/e2e/regression/BUILD
@@ -91,22 +91,21 @@
     target_backend = "vulkan-spirv",
 )
 
-# Temporarily disabled pending build system changes.
-# iree_check_single_backend_test_suite(
-#     name = "check_regression_cuda",
-#     srcs = BACKEND_TESTS,
-#     compiler_flags = ["-iree-input-type=mhlo"],
-#     driver = "cuda",
-#     tags = [
-#         # CUDA cuInit fails with sanitizer on.
-#         "noasan",
-#         "nomsan",
-#         "notsan",
-#         "noubsan",
-#         "requires-gpu-nvidia",
-#     ],
-#     target_backend = "cuda",
-# )
+iree_check_single_backend_test_suite(
+    name = "check_regression_cuda",
+    srcs = BACKEND_TESTS,
+    compiler_flags = ["-iree-input-type=mhlo"],
+    driver = "cuda",
+    tags = [
+        # CUDA cuInit fails with sanitizer on.
+        "noasan",
+        "nomsan",
+        "notsan",
+        "noubsan",
+        "requires-gpu-nvidia",
+    ],
+    target_backend = "cuda",
+)
 
 py_binary(
     name = "generate_e2e_matmul_tests",
diff --git a/iree/test/e2e/regression/CMakeLists.txt b/iree/test/e2e/regression/CMakeLists.txt
index 9942708..31cc3df 100644
--- a/iree/test/e2e/regression/CMakeLists.txt
+++ b/iree/test/e2e/regression/CMakeLists.txt
@@ -96,6 +96,34 @@
     "-iree-input-type=mhlo"
 )
 
+iree_check_single_backend_test_suite(
+  NAME
+    check_regression_cuda
+  SRCS
+    "dynamic_abs.mlir"
+    "dynamic_add.mlir"
+    "dynamic_dot.mlir"
+    "dynamic_reduce_min.mlir"
+    "dynamic_torch_index_select_high_rank.mlir"
+    "dynamic_torch_index_select_negative.mlir"
+    "dynamic_torch_index_select_scalar.mlir"
+    "dynamic_torch_index_select_vector.mlir"
+    "linalg_ext_ops.mlir"
+    "linalg_ops.mlir"
+  TARGET_BACKEND
+    "cuda"
+  DRIVER
+    "cuda"
+  COMPILER_FLAGS
+    "-iree-input-type=mhlo"
+  LABELS
+    "noasan"
+    "nomsan"
+    "notsan"
+    "noubsan"
+    "requires-gpu-nvidia"
+)
+
 iree_generated_trace_runner_test(
   NAME
     e2e_matmul_direct_i8_small
diff --git a/iree/test/e2e/tensor_ops/BUILD b/iree/test/e2e/tensor_ops/BUILD
index ba43404..1c694b2 100644
--- a/iree/test/e2e/tensor_ops/BUILD
+++ b/iree/test/e2e/tensor_ops/BUILD
@@ -51,30 +51,29 @@
     target_backend = "dylib-llvm-aot",
 )
 
-# Temporarily disabled pending build system changes.
-# iree_check_single_backend_test_suite(
-#     name = "check_cuda",
-#     srcs = enforce_glob(
-#         # keep sorted
-#         [
-#             "extract_slice.mlir",
-#             "tensor_insert_slice.mlir",
-#         ],
-#         include = ["*.mlir"],
-#         exclude = [
-#             "tensor_cast.mlir",
-#         ],
-#     ),
-#     driver = "cuda",
-#     tags = [
-#         "noasan",
-#         "nomsan",
-#         "notsan",
-#         "noubsan",
-#         "requires-gpu-nvidia",
-#     ],
-#     target_backend = "cuda",
-# )
+iree_check_single_backend_test_suite(
+    name = "check_cuda",
+    srcs = enforce_glob(
+        # keep sorted
+        [
+            "extract_slice.mlir",
+            "tensor_insert_slice.mlir",
+        ],
+        include = ["*.mlir"],
+        exclude = [
+            "tensor_cast.mlir",
+        ],
+    ),
+    driver = "cuda",
+    tags = [
+        "noasan",
+        "nomsan",
+        "notsan",
+        "noubsan",
+        "requires-gpu-nvidia",
+    ],
+    target_backend = "cuda",
+)
 
 iree_check_single_backend_test_suite(
     name = "check_vulkan-spirv_vulkan",
diff --git a/iree/test/e2e/tensor_ops/CMakeLists.txt b/iree/test/e2e/tensor_ops/CMakeLists.txt
index 279a5c4..2acc931 100644
--- a/iree/test/e2e/tensor_ops/CMakeLists.txt
+++ b/iree/test/e2e/tensor_ops/CMakeLists.txt
@@ -38,6 +38,24 @@
 
 iree_check_single_backend_test_suite(
   NAME
+    check_cuda
+  SRCS
+    "extract_slice.mlir"
+    "tensor_insert_slice.mlir"
+  TARGET_BACKEND
+    "cuda"
+  DRIVER
+    "cuda"
+  LABELS
+    "noasan"
+    "nomsan"
+    "notsan"
+    "noubsan"
+    "requires-gpu-nvidia"
+)
+
+iree_check_single_backend_test_suite(
+  NAME
     check_vulkan-spirv_vulkan
   SRCS
     "extract_slice.mlir"
diff --git a/iree/test/e2e/xla_ops/BUILD b/iree/test/e2e/xla_ops/BUILD
index 50f0354..20e3b73 100644
--- a/iree/test/e2e/xla_ops/BUILD
+++ b/iree/test/e2e/xla_ops/BUILD
@@ -19,167 +19,166 @@
     licenses = ["notice"],  # Apache 2.0
 )
 
-# Temporarily disabled pending build system changes.
-# iree_check_single_backend_test_suite(
-#     name = "check_cuda_graph",
-#     srcs = enforce_glob(
-#         # keep sorted
-#         [
-#             "abs.mlir",
-#             "add.mlir",
-#             "batch_norm_inference.mlir",
-#             "bitcast_convert.mlir",
-#             "broadcast.mlir",
-#             "broadcast_add.mlir",
-#             "broadcast_in_dim.mlir",
-#             "clamp.mlir",
-#             "compare.mlir",
-#             "concatenate.mlir",
-#             "constant.mlir",
-#             "convert.mlir",
-#             "convolution.mlir",
-#             "cosine.mlir",
-#             "divide.mlir",
-#             "dot.mlir",
-#             "dot_general.mlir",
-#             "dynamic_slice.mlir",
-#             "dynamic_update_slice.mlir",
-#             "exponential.mlir",
-#             "exponential_fp16.mlir",
-#             "exponential_minus_one.mlir",
-#             "fft.mlir",
-#             "finite.mlir",
-#             "floor.mlir",
-#             "gather.mlir",
-#             "iota.mlir",
-#             "log.mlir",
-#             "log_plus_one.mlir",
-#             "maximum.mlir",
-#             "minimum.mlir",
-#             "multiply.mlir",
-#             "negate.mlir",
-#             "pad.mlir",
-#             "pow.mlir",
-#             "reduce.mlir",
-#             "reduce_window.mlir",
-#             "remainder.mlir",
-#             "reshape.mlir",
-#             "reverse.mlir",
-#             "rng_normal.mlir",
-#             "rng_uniform.mlir",
-#             "rsqrt.mlir",
-#             "scatter.mlir",
-#             "scatter_dynamic.mlir",
-#             "select.mlir",
-#             "sine.mlir",
-#             "slice.mlir",
-#             "sort.mlir",
-#             "sqrt.mlir",
-#             "subtract.mlir",
-#             "tanh.mlir",
-#             "torch_index_select.mlir",
-#             "transpose.mlir",
-#             "while.mlir",
-#         ],
-#         include = ["*.mlir"],
-#         exclude = [
-#             "round.mlir",
-#         ],
-#     ),
-#     compiler_flags = ["-iree-input-type=mhlo"],
-#     driver = "cuda",
-#     runner_args = ["--cuda_use_streams=false"],
-#     tags = [
-#         # CUDA cuInit fails with sanitizer on.
-#         "noasan",
-#         "nomsan",
-#         "notsan",
-#         "noubsan",
-#         "requires-gpu-nvidia",
-#     ],
-#     target_backend = "cuda",
-# )
+iree_check_single_backend_test_suite(
+    name = "check_cuda_graph",
+    srcs = enforce_glob(
+        # keep sorted
+        [
+            "abs.mlir",
+            "add.mlir",
+            "batch_norm_inference.mlir",
+            "bitcast_convert.mlir",
+            "broadcast.mlir",
+            "broadcast_add.mlir",
+            "broadcast_in_dim.mlir",
+            "clamp.mlir",
+            "compare.mlir",
+            "concatenate.mlir",
+            "constant.mlir",
+            "convert.mlir",
+            "convolution.mlir",
+            "cosine.mlir",
+            "divide.mlir",
+            "dot.mlir",
+            "dot_general.mlir",
+            "dynamic_slice.mlir",
+            "dynamic_update_slice.mlir",
+            "exponential.mlir",
+            "exponential_fp16.mlir",
+            "exponential_minus_one.mlir",
+            "fft.mlir",
+            "finite.mlir",
+            "floor.mlir",
+            "gather.mlir",
+            "iota.mlir",
+            "log.mlir",
+            "log_plus_one.mlir",
+            "maximum.mlir",
+            "minimum.mlir",
+            "multiply.mlir",
+            "negate.mlir",
+            "pad.mlir",
+            "pow.mlir",
+            "reduce.mlir",
+            "reduce_window.mlir",
+            "remainder.mlir",
+            "reshape.mlir",
+            "reverse.mlir",
+            "rng_normal.mlir",
+            "rng_uniform.mlir",
+            "rsqrt.mlir",
+            "scatter.mlir",
+            "scatter_dynamic.mlir",
+            "select.mlir",
+            "sine.mlir",
+            "slice.mlir",
+            "sort.mlir",
+            "sqrt.mlir",
+            "subtract.mlir",
+            "tanh.mlir",
+            "torch_index_select.mlir",
+            "transpose.mlir",
+            "while.mlir",
+        ],
+        include = ["*.mlir"],
+        exclude = [
+            "round.mlir",
+        ],
+    ),
+    compiler_flags = ["-iree-input-type=mhlo"],
+    driver = "cuda",
+    runner_args = ["--cuda_use_streams=false"],
+    tags = [
+        # CUDA cuInit fails with sanitizer on.
+        "noasan",
+        "nomsan",
+        "notsan",
+        "noubsan",
+        "requires-gpu-nvidia",
+    ],
+    target_backend = "cuda",
+)
 
 # # Run cuda tests using stream command buffer
-# iree_check_single_backend_test_suite(
-#     name = "check_cuda_streams",
-#     srcs = enforce_glob(
-#         # keep sorted
-#         [
-#             "abs.mlir",
-#             "add.mlir",
-#             "batch_norm_inference.mlir",
-#             "bitcast_convert.mlir",
-#             "broadcast.mlir",
-#             "broadcast_add.mlir",
-#             "broadcast_in_dim.mlir",
-#             "clamp.mlir",
-#             "compare.mlir",
-#             "concatenate.mlir",
-#             "constant.mlir",
-#             "convert.mlir",
-#             "convolution.mlir",
-#             "cosine.mlir",
-#             "divide.mlir",
-#             "dot.mlir",
-#             "dot_general.mlir",
-#             "dynamic_slice.mlir",
-#             "dynamic_update_slice.mlir",
-#             "exponential.mlir",
-#             "exponential_fp16.mlir",
-#             "exponential_minus_one.mlir",
-#             "fft.mlir",
-#             "finite.mlir",
-#             "floor.mlir",
-#             "gather.mlir",
-#             "iota.mlir",
-#             "log.mlir",
-#             "log_plus_one.mlir",
-#             "maximum.mlir",
-#             "minimum.mlir",
-#             "multiply.mlir",
-#             "negate.mlir",
-#             "pad.mlir",
-#             "pow.mlir",
-#             "reduce.mlir",
-#             "reduce_window.mlir",
-#             "remainder.mlir",
-#             "reshape.mlir",
-#             "reverse.mlir",
-#             "rng_normal.mlir",
-#             "rng_uniform.mlir",
-#             "rsqrt.mlir",
-#             "scatter.mlir",
-#             "scatter_dynamic.mlir",
-#             "select.mlir",
-#             "sine.mlir",
-#             "slice.mlir",
-#             "sort.mlir",
-#             "sqrt.mlir",
-#             "subtract.mlir",
-#             "tanh.mlir",
-#             "torch_index_select.mlir",
-#             "transpose.mlir",
-#             "while.mlir",
-#         ],
-#         include = ["*.mlir"],
-#         exclude = [
-#             "round.mlir",
-#         ],
-#     ),
-#     compiler_flags = ["-iree-input-type=mhlo"],
-#     driver = "cuda",
-#     runner_args = ["--cuda_use_streams=true"],
-#     tags = [
-#         # CUDA cuInit fails with sanitizer on.
-#         "noasan",
-#         "nomsan",
-#         "notsan",
-#         "noubsan",
-#         "requires-gpu-nvidia",
-#     ],
-#     target_backend = "cuda",
-# )
+iree_check_single_backend_test_suite(
+    name = "check_cuda_streams",
+    srcs = enforce_glob(
+        # keep sorted
+        [
+            "abs.mlir",
+            "add.mlir",
+            "batch_norm_inference.mlir",
+            "bitcast_convert.mlir",
+            "broadcast.mlir",
+            "broadcast_add.mlir",
+            "broadcast_in_dim.mlir",
+            "clamp.mlir",
+            "compare.mlir",
+            "concatenate.mlir",
+            "constant.mlir",
+            "convert.mlir",
+            "convolution.mlir",
+            "cosine.mlir",
+            "divide.mlir",
+            "dot.mlir",
+            "dot_general.mlir",
+            "dynamic_slice.mlir",
+            "dynamic_update_slice.mlir",
+            "exponential.mlir",
+            "exponential_fp16.mlir",
+            "exponential_minus_one.mlir",
+            "fft.mlir",
+            "finite.mlir",
+            "floor.mlir",
+            "gather.mlir",
+            "iota.mlir",
+            "log.mlir",
+            "log_plus_one.mlir",
+            "maximum.mlir",
+            "minimum.mlir",
+            "multiply.mlir",
+            "negate.mlir",
+            "pad.mlir",
+            "pow.mlir",
+            "reduce.mlir",
+            "reduce_window.mlir",
+            "remainder.mlir",
+            "reshape.mlir",
+            "reverse.mlir",
+            "rng_normal.mlir",
+            "rng_uniform.mlir",
+            "rsqrt.mlir",
+            "scatter.mlir",
+            "scatter_dynamic.mlir",
+            "select.mlir",
+            "sine.mlir",
+            "slice.mlir",
+            "sort.mlir",
+            "sqrt.mlir",
+            "subtract.mlir",
+            "tanh.mlir",
+            "torch_index_select.mlir",
+            "transpose.mlir",
+            "while.mlir",
+        ],
+        include = ["*.mlir"],
+        exclude = [
+            "round.mlir",
+        ],
+    ),
+    compiler_flags = ["-iree-input-type=mhlo"],
+    driver = "cuda",
+    runner_args = ["--cuda_use_streams=true"],
+    tags = [
+        # CUDA cuInit fails with sanitizer on.
+        "noasan",
+        "nomsan",
+        "notsan",
+        "noubsan",
+        "requires-gpu-nvidia",
+    ],
+    target_backend = "cuda",
+)
 
 iree_check_single_backend_test_suite(
     name = "check_dylib-llvm-aot_dylib",
diff --git a/iree/test/e2e/xla_ops/CMakeLists.txt b/iree/test/e2e/xla_ops/CMakeLists.txt
index 0b0086e..05236c9 100644
--- a/iree/test/e2e/xla_ops/CMakeLists.txt
+++ b/iree/test/e2e/xla_ops/CMakeLists.txt
@@ -12,6 +12,156 @@
 
 iree_check_single_backend_test_suite(
   NAME
+    check_cuda_graph
+  SRCS
+    "abs.mlir"
+    "add.mlir"
+    "batch_norm_inference.mlir"
+    "bitcast_convert.mlir"
+    "broadcast.mlir"
+    "broadcast_add.mlir"
+    "broadcast_in_dim.mlir"
+    "clamp.mlir"
+    "compare.mlir"
+    "concatenate.mlir"
+    "constant.mlir"
+    "convert.mlir"
+    "convolution.mlir"
+    "cosine.mlir"
+    "divide.mlir"
+    "dot.mlir"
+    "dot_general.mlir"
+    "dynamic_slice.mlir"
+    "dynamic_update_slice.mlir"
+    "exponential.mlir"
+    "exponential_fp16.mlir"
+    "exponential_minus_one.mlir"
+    "fft.mlir"
+    "finite.mlir"
+    "floor.mlir"
+    "gather.mlir"
+    "iota.mlir"
+    "log.mlir"
+    "log_plus_one.mlir"
+    "maximum.mlir"
+    "minimum.mlir"
+    "multiply.mlir"
+    "negate.mlir"
+    "pad.mlir"
+    "pow.mlir"
+    "reduce.mlir"
+    "reduce_window.mlir"
+    "remainder.mlir"
+    "reshape.mlir"
+    "reverse.mlir"
+    "rng_normal.mlir"
+    "rng_uniform.mlir"
+    "rsqrt.mlir"
+    "scatter.mlir"
+    "scatter_dynamic.mlir"
+    "select.mlir"
+    "sine.mlir"
+    "slice.mlir"
+    "sort.mlir"
+    "sqrt.mlir"
+    "subtract.mlir"
+    "tanh.mlir"
+    "torch_index_select.mlir"
+    "transpose.mlir"
+    "while.mlir"
+  TARGET_BACKEND
+    "cuda"
+  DRIVER
+    "cuda"
+  COMPILER_FLAGS
+    "-iree-input-type=mhlo"
+  RUNNER_ARGS
+    "--cuda_use_streams=false"
+  LABELS
+    "noasan"
+    "nomsan"
+    "notsan"
+    "noubsan"
+    "requires-gpu-nvidia"
+)
+
+iree_check_single_backend_test_suite(
+  NAME
+    check_cuda_streams
+  SRCS
+    "abs.mlir"
+    "add.mlir"
+    "batch_norm_inference.mlir"
+    "bitcast_convert.mlir"
+    "broadcast.mlir"
+    "broadcast_add.mlir"
+    "broadcast_in_dim.mlir"
+    "clamp.mlir"
+    "compare.mlir"
+    "concatenate.mlir"
+    "constant.mlir"
+    "convert.mlir"
+    "convolution.mlir"
+    "cosine.mlir"
+    "divide.mlir"
+    "dot.mlir"
+    "dot_general.mlir"
+    "dynamic_slice.mlir"
+    "dynamic_update_slice.mlir"
+    "exponential.mlir"
+    "exponential_fp16.mlir"
+    "exponential_minus_one.mlir"
+    "fft.mlir"
+    "finite.mlir"
+    "floor.mlir"
+    "gather.mlir"
+    "iota.mlir"
+    "log.mlir"
+    "log_plus_one.mlir"
+    "maximum.mlir"
+    "minimum.mlir"
+    "multiply.mlir"
+    "negate.mlir"
+    "pad.mlir"
+    "pow.mlir"
+    "reduce.mlir"
+    "reduce_window.mlir"
+    "remainder.mlir"
+    "reshape.mlir"
+    "reverse.mlir"
+    "rng_normal.mlir"
+    "rng_uniform.mlir"
+    "rsqrt.mlir"
+    "scatter.mlir"
+    "scatter_dynamic.mlir"
+    "select.mlir"
+    "sine.mlir"
+    "slice.mlir"
+    "sort.mlir"
+    "sqrt.mlir"
+    "subtract.mlir"
+    "tanh.mlir"
+    "torch_index_select.mlir"
+    "transpose.mlir"
+    "while.mlir"
+  TARGET_BACKEND
+    "cuda"
+  DRIVER
+    "cuda"
+  COMPILER_FLAGS
+    "-iree-input-type=mhlo"
+  RUNNER_ARGS
+    "--cuda_use_streams=true"
+  LABELS
+    "noasan"
+    "nomsan"
+    "notsan"
+    "noubsan"
+    "requires-gpu-nvidia"
+)
+
+iree_check_single_backend_test_suite(
+  NAME
     check_dylib-llvm-aot_dylib
   SRCS
     "abs.mlir"
diff --git a/iree/tools/CMakeLists.txt b/iree/tools/CMakeLists.txt
index 31046df..833970d 100644
--- a/iree/tools/CMakeLists.txt
+++ b/iree/tools/CMakeLists.txt
@@ -19,31 +19,31 @@
 # Enable compiler targets based on options.
 set(IREE_COMPILER_TARGETS "")
 set(IREE_COMPILER_TARGET_COPTS "")
-if("${IREE_TARGET_BACKEND_DYLIB_LLVM_AOT}" OR "${IREE_TARGET_BACKEND_WASM_LLVM_AOT}")
+if(IREE_TARGET_BACKEND_DYLIB_LLVM_AOT OR IREE_TARGET_BACKEND_WASM_LLVM_AOT)
   list(APPEND IREE_COMPILER_TARGETS iree::compiler::Dialect::HAL::Target::LLVM)
   list(APPEND IREE_COMPILER_TARGET_COPTS "-DIREE_HAVE_LLVMAOT_TARGET")
 endif()
-if("${IREE_TARGET_BACKEND_METAL_SPIRV}")
+if(IREE_TARGET_BACKEND_METAL_SPIRV)
   list(APPEND IREE_COMPILER_TARGETS iree::compiler::Dialect::HAL::Target::MetalSPIRV)
   list(APPEND IREE_COMPILER_TARGET_COPTS "-DIREE_HAVE_METALSPIRV_TARGET")
 endif()
-if("${IREE_TARGET_BACKEND_VMVX}")
+if(IREE_TARGET_BACKEND_VMVX)
   list(APPEND IREE_COMPILER_TARGETS iree::compiler::Dialect::HAL::Target::VMVX)
   list(APPEND IREE_COMPILER_TARGET_COPTS "-DIREE_HAVE_VMVX_TARGET")
 endif()
-if("${IREE_TARGET_BACKEND_VULKAN_SPIRV}")
+if(IREE_TARGET_BACKEND_VULKAN_SPIRV)
   list(APPEND IREE_COMPILER_TARGETS iree::compiler::Dialect::HAL::Target::VulkanSPIRV)
   list(APPEND IREE_COMPILER_TARGET_COPTS "-DIREE_HAVE_VULKANSPIRV_TARGET")
 endif()
-if("${IREE_TARGET_BACKEND_WEBGPU}")
+if(IREE_TARGET_BACKEND_WEBGPU)
   list(APPEND IREE_COMPILER_TARGETS iree::compiler::Dialect::HAL::Target::WebGPU)
   list(APPEND IREE_COMPILER_TARGET_COPTS "-DIREE_HAVE_WEBGPU_TARGET")
 endif()
-# if("${IREE_TARGET_BACKEND_CUDA}")
-#   list(APPEND IREE_COMPILER_TARGETS iree::compiler::Dialect::HAL::Target::CUDA)
-#   list(APPEND IREE_COMPILER_TARGET_COPTS "-DIREE_HAVE_CUDA_TARGET")
-# endif()
-if("${IREE_TARGET_BACKEND_ROCM}")
+if(IREE_TARGET_BACKEND_CUDA)
+  list(APPEND IREE_COMPILER_TARGETS iree::compiler::Dialect::HAL::Target::CUDA)
+  list(APPEND IREE_COMPILER_TARGET_COPTS "-DIREE_HAVE_CUDA_TARGET")
+endif()
+if(IREE_TARGET_BACKEND_ROCM)
   list(APPEND IREE_COMPILER_TARGETS iree::compiler::Dialect::HAL::Target::ROCM)
   list(APPEND IREE_COMPILER_TARGET_COPTS "-DIREE_HAVE_ROCM_TARGET")
 endif()