[cuda] Enable various HAL CTS and e2e single-op tests (#14327)

diff --git a/experimental/cuda2/cts/CMakeLists.txt b/experimental/cuda2/cts/CMakeLists.txt
index 5fe6fe7..c6f15a5 100644
--- a/experimental/cuda2/cts/CMakeLists.txt
+++ b/experimental/cuda2/cts/CMakeLists.txt
@@ -17,17 +17,11 @@
     "\"PTXE\""
   DEPS
     iree::experimental::cuda2::registration
-  INCLUDED_TESTS
-    "allocator"
-    "buffer_mapping"
-    "command_buffer"
-    "command_buffer_dispatch"
-    "command_buffer_push_constants"
-    "executable_cache"
-    "descriptor_set_layout"
-    "driver"
-    "pipeline_layout"
-    "semaphore_submission"
+  EXCLUDED_TESTS
+    # HAL event is unimplemented for now.
+    "event"
+    # HAL semaphore is in the process of being implemented.
+    "semaphore"
   LABELS
     driver=cuda2
     requires-gpu-nvidia
diff --git a/experimental/cuda2/tests/CMakeLists.txt b/experimental/cuda2/tests/CMakeLists.txt
new file mode 100644
index 0000000..33551b5
--- /dev/null
+++ b/experimental/cuda2/tests/CMakeLists.txt
@@ -0,0 +1,7 @@
+# Copyright 2023 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+iree_add_all_subdirs()
diff --git a/experimental/cuda2/tests/stablehlo_ops/CMakeLists.txt b/experimental/cuda2/tests/stablehlo_ops/CMakeLists.txt
new file mode 100644
index 0000000..a7bb135
--- /dev/null
+++ b/experimental/cuda2/tests/stablehlo_ops/CMakeLists.txt
@@ -0,0 +1,85 @@
+# Copyright 2023 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+iree_check_single_backend_test_suite(
+  NAME
+    check_cuda2_graph
+  SRCS
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/abs.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/add.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/batch_norm_inference.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/bitcast_convert.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/broadcast.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/broadcast_add.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/broadcast_in_dim.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/clamp.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/compare.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/complex.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/concatenate.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/constant.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/convert.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/convolution.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/cosine.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/divide.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dot.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dot_bf16.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dot_general.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dynamic_slice.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/dynamic_update_slice.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/exponential.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/exponential_fp16.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/exponential_minus_one.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/fft.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/finite.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/floor.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/gather.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/iota.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/log.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/log_plus_one.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/maximum.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/minimum.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/multiply.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/negate.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/pad.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/philox.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/pow.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/reduce.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/reduce_window.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/remainder.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/reshape.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/reverse.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/rng_normal.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/rng_uniform.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/round.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/rsqrt.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/scatter.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/scatter_dynamic.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/select.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/sine.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/slice.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/sort.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/sqrt.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/subtract.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/tanh.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/three_fry.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/torch_index_select.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/transpose.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/stablehlo_ops/while.mlir"
+  TARGET_BACKEND
+    "cuda"
+  DRIVER
+    "cuda2"
+  COMPILER_FLAGS
+    "--iree-input-type=stablehlo"
+    # TODO(#13984): We need memset emulation to workaround CUDA graph issues for now.
+    "--iree-stream-emulate-memset"
+  LABELS
+    "noasan"
+    "nomsan"
+    "notsan"
+    "noubsan"
+    "requires-gpu-nvidia"
+)
diff --git a/experimental/cuda2/tests/tosa_ops/CMakeLists.txt b/experimental/cuda2/tests/tosa_ops/CMakeLists.txt
new file mode 100644
index 0000000..7b56ffa
--- /dev/null
+++ b/experimental/cuda2/tests/tosa_ops/CMakeLists.txt
@@ -0,0 +1,66 @@
+# Copyright 2023 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+iree_check_single_backend_test_suite(
+  NAME
+    check_cuda2_graph
+  SRCS
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/abs.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/add.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/arithmetic_right_shift.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/bitwise_and.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/bitwise_or.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/bitwise_xor.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/ceil.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/clamp.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/clz.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/const.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/equal.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/exp.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/floor.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/fully_connected.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/gather.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/greater.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/greater_equal.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/if.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/log.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/logical_left_shift.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/logical_right_shift.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/logical_right_shift_16.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/matmul.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/max_pool.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/maximum.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/minimum.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/mul.mlir"
+    # "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/mul_shift.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/negate.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/pad.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/reciprocal.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/reduce.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/reshape.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/rsqrt.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/select.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/sigmoid.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/sub.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/table.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/tanh.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/transpose.mlir"
+    "${IREE_SOURCE_DIR}/tests/e2e/tosa_ops/while.mlir"
+  TARGET_BACKEND
+    "cuda"
+  DRIVER
+    "cuda2"
+  COMPILER_FLAGS
+    "--iree-input-type=tosa"
+    # TODO(#13984): We need memset emulation to workaround CUDA graph issues for now.
+    "--iree-stream-emulate-memset"
+  LABELS
+    "noasan"
+    "nomsan"
+    "notsan"
+    "noubsan"
+    "requires-gpu-nvidia"
+)