Incremental improvements to the HAL CTS. (#4666)

More still to do (like https://github.com/google/iree/issues/4643), but this expands test coverage and starts to organize the tests by complexity and whether features are required or optional.
diff --git a/iree/hal/cts/BUILD b/iree/hal/cts/BUILD
index 627f3bf..778aa97 100644
--- a/iree/hal/cts/BUILD
+++ b/iree/hal/cts/BUILD
@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Conformance Test Suite (CTS) for HAL implementations.
-
 package(
     default_visibility = ["//visibility:public"],
     features = ["layering_check"],
@@ -43,6 +41,17 @@
 )
 
 cc_test(
+    name = "buffer_mapping_test",
+    srcs = ["buffer_mapping_test.cc"],
+    deps = [
+        ":cts_test_base",
+        "//iree/hal/testing:driver_registry",
+        "//iree/testing:gtest",
+        "//iree/testing:gtest_main",
+    ],
+)
+
+cc_test(
     name = "command_buffer_test",
     srcs = ["command_buffer_test.cc"],
     deps = [
@@ -118,3 +127,14 @@
         "//iree/testing:gtest_main",
     ],
 )
+
+cc_test(
+    name = "semaphore_submission_test",
+    srcs = ["semaphore_submission_test.cc"],
+    deps = [
+        ":cts_test_base",
+        "//iree/hal/testing:driver_registry",
+        "//iree/testing:gtest",
+        "//iree/testing:gtest_main",
+    ],
+)
diff --git a/iree/hal/cts/CMakeLists.txt b/iree/hal/cts/CMakeLists.txt
index f5c3d94..9287b3b 100644
--- a/iree/hal/cts/CMakeLists.txt
+++ b/iree/hal/cts/CMakeLists.txt
@@ -41,6 +41,18 @@
 
 iree_cc_test(
   NAME
+    buffer_mapping_test
+  SRCS
+    "buffer_mapping_test.cc"
+  DEPS
+    ::cts_test_base
+    iree::hal::testing::driver_registry
+    iree::testing::gtest
+    iree::testing::gtest_main
+)
+
+iree_cc_test(
+  NAME
     command_buffer_test
   SRCS
     "command_buffer_test.cc"
@@ -122,3 +134,15 @@
     iree::testing::gtest
     iree::testing::gtest_main
 )
+
+iree_cc_test(
+  NAME
+    semaphore_submission_test
+  SRCS
+    "semaphore_submission_test.cc"
+  DEPS
+    ::cts_test_base
+    iree::hal::testing::driver_registry
+    iree::testing::gtest
+    iree::testing::gtest_main
+)
diff --git a/iree/hal/cts/README.md b/iree/hal/cts/README.md
new file mode 100644
index 0000000..e5a4b45
--- /dev/null
+++ b/iree/hal/cts/README.md
@@ -0,0 +1,31 @@
+# Conformance Test Suite (CTS) for HAL implementations.
+
+These tests exercise IREE's Hardware Abstraction Layer (HAL) in a way that
+checks for conformance across implementations and devices. The tests themselves
+are structured to help with HAL driver development by using individual features
+in isolation, demonstrating typical full-system usage, and pointing out where
+capabilities are optional.
+
+## On testing for error conditions
+
+In general, error states are only lightly tested because the low level APIs that
+IREE's HAL is designed to thinly abstract over often assume programmer usage
+will be correct and treat errors as undefined behavior. See the Vulkan spec:
+
+* https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap3.html#introduction-conventions
+* https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap4.html#fundamentals-errors
+
+While the generic tests in the CTS may not be able to check for error conditions
+exhaustively, individual HAL implementations can implement stricter behavior
+or enable higher level checks like what the
+[Vulkan Validation Layers](https://github.com/KhronosGroup/Vulkan-ValidationLayers)
+provide.
+
+## Tips for adding new HAL implementations
+
+* Driver (`iree_hal_driver_t`) and device (`iree_hal_device_t`) creation, tested
+  in [driver_test](driver_test.cc), are both prerequisites for all tests.
+* Tests for individual components (e.g.
+  [descriptor_set_layout_test](descriptor_set_layout_test.cc)) are more
+  approachable than tests which use collections of components together (e.g.
+  [command_buffer_test](command_buffer_test.cc)).
diff --git a/iree/hal/cts/allocator_test.cc b/iree/hal/cts/allocator_test.cc
index d1f76a7..01bb339 100644
--- a/iree/hal/cts/allocator_test.cc
+++ b/iree/hal/cts/allocator_test.cc
@@ -15,31 +15,39 @@
 #include "iree/hal/cts/cts_test_base.h"
 #include "iree/hal/testing/driver_registry.h"
 #include "iree/testing/gtest.h"
-#include "iree/testing/status_matchers.h"
 
 namespace iree {
 namespace hal {
 namespace cts {
 
+namespace {
+
+constexpr iree_device_size_t kAllocationSize = 1024;
+
+}  // namespace
+
 class AllocatorTest : public CtsTestBase {};
 
-// Tests for baseline buffer compatibility that all HAL drivers must support.
-TEST_P(AllocatorTest, QueryBufferCompatibility) {
-  iree_host_size_t allocation_size = 1024;
-
+// All allocators must support some baseline capabilities.
+//
+// Certain capabilities or configurations are optional and may vary between
+// driver implementations or target devices, such as:
+//   IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL
+//   IREE_HAL_BUFFER_USAGE_MAPPING
+TEST_P(AllocatorTest, BaselineBufferCompatibility) {
   // Need at least one way to get data between the host and device.
   iree_hal_buffer_compatibility_t transfer_compatibility_host =
       iree_hal_allocator_query_buffer_compatibility(
           device_allocator_,
           IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
           /*allowed_usage=*/IREE_HAL_BUFFER_USAGE_TRANSFER,
-          /*intended_usage=*/IREE_HAL_BUFFER_USAGE_TRANSFER, allocation_size);
+          /*intended_usage=*/IREE_HAL_BUFFER_USAGE_TRANSFER, kAllocationSize);
   iree_hal_buffer_compatibility_t transfer_compatibility_device =
       iree_hal_allocator_query_buffer_compatibility(
           device_allocator_,
           IREE_HAL_MEMORY_TYPE_HOST_VISIBLE | IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,
           /*allowed_usage=*/IREE_HAL_BUFFER_USAGE_TRANSFER,
-          /*intended_usage=*/IREE_HAL_BUFFER_USAGE_TRANSFER, allocation_size);
+          /*intended_usage=*/IREE_HAL_BUFFER_USAGE_TRANSFER, kAllocationSize);
   iree_hal_buffer_compatibility_t required_transfer_compatibility =
       IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE |
       IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_TRANSFER;
@@ -53,22 +61,34 @@
       iree_hal_allocator_query_buffer_compatibility(
           device_allocator_, IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
           /*allowed_usage=*/IREE_HAL_BUFFER_USAGE_DISPATCH,
-          /*intended_usage=*/IREE_HAL_BUFFER_USAGE_DISPATCH, allocation_size);
+          /*intended_usage=*/IREE_HAL_BUFFER_USAGE_DISPATCH, kAllocationSize);
   EXPECT_TRUE(
       iree_all_bits_set(dispatch_compatibility,
                         IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE |
                             IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_DISPATCH));
 }
 
+TEST_P(AllocatorTest, BufferAllowedUsageDeterminesCompatibility) {
+  iree_hal_buffer_compatibility_t compatibility =
+      iree_hal_allocator_query_buffer_compatibility(
+          device_allocator_, IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
+          /*allowed_usage=*/IREE_HAL_BUFFER_USAGE_NONE,
+          /*intended_usage=*/IREE_HAL_BUFFER_USAGE_ALL, kAllocationSize);
+  EXPECT_TRUE(iree_all_bits_set(compatibility,
+                                IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE));
+  EXPECT_FALSE(iree_all_bits_set(compatibility,
+                                 IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_TRANSFER));
+  EXPECT_FALSE(iree_all_bits_set(compatibility,
+                                 IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_DISPATCH));
+}
+
 TEST_P(AllocatorTest, AllocateBuffer) {
-  iree_hal_memory_type_t memory_type =
-      IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE;
-  iree_hal_buffer_usage_t buffer_usage = IREE_HAL_BUFFER_USAGE_ALL;
-  iree_host_size_t allocation_size = 1024;
+  iree_hal_memory_type_t memory_type = IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE;
+  iree_hal_buffer_usage_t buffer_usage = IREE_HAL_BUFFER_USAGE_TRANSFER;
 
   iree_hal_buffer_t* buffer;
   IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
-      device_allocator_, memory_type, buffer_usage, allocation_size, &buffer));
+      device_allocator_, memory_type, buffer_usage, kAllocationSize, &buffer));
 
   EXPECT_EQ(device_allocator_, iree_hal_buffer_allocator(buffer));
   // At a mimimum, the requested memory type should be respected.
@@ -78,13 +98,24 @@
   EXPECT_TRUE(
       iree_all_bits_set(iree_hal_buffer_allowed_usage(buffer), buffer_usage));
   EXPECT_GE(iree_hal_buffer_allocation_size(buffer),
-            allocation_size);  // Larger is okay.
+            kAllocationSize);  // Larger is okay.
 
   iree_hal_buffer_release(buffer);
 }
 
-// TODO(scotttodd): iree_hal_allocator_wrap_buffer
-//     * if implemented (skip test if status is "IREE_STATUS_UNAVAILABLE")
+// While empty allocations aren't particularly useful, they can occur in
+// practice so we should at least be able to create them without errors.
+TEST_P(AllocatorTest, AllocateEmptyBuffer) {
+  iree_hal_memory_type_t memory_type = IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE;
+  iree_hal_buffer_usage_t buffer_usage = IREE_HAL_BUFFER_USAGE_TRANSFER;
+
+  iree_hal_buffer_t* buffer;
+  IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
+      device_allocator_, memory_type, buffer_usage, /*allocation_size=*/0,
+      &buffer));
+
+  iree_hal_buffer_release(buffer);
+}
 
 INSTANTIATE_TEST_SUITE_P(
     AllDrivers, AllocatorTest,
diff --git a/iree/hal/cts/buffer_mapping_test.cc b/iree/hal/cts/buffer_mapping_test.cc
new file mode 100644
index 0000000..19689be
--- /dev/null
+++ b/iree/hal/cts/buffer_mapping_test.cc
@@ -0,0 +1,206 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "iree/hal/cts/cts_test_base.h"
+#include "iree/hal/testing/driver_registry.h"
+#include "iree/testing/gtest.h"
+
+namespace iree {
+namespace hal {
+namespace cts {
+
+using ::testing::ContainerEq;
+
+namespace {
+
+constexpr iree_device_size_t kAllocationSize = 1024;
+
+}  // namespace
+
+class BufferMappingTest : public CtsTestBase {};
+
+// TODO(scotttodd): move this check to SetUp() and skip tests if not supported
+//   or add general support for optional features/tests into the CTS framework?
+TEST_P(BufferMappingTest, AllocatorSupportsBufferMapping) {
+  iree_hal_memory_type_t memory_type = IREE_HAL_MEMORY_TYPE_HOST_VISIBLE;
+  iree_hal_buffer_usage_t buffer_usage = IREE_HAL_BUFFER_USAGE_MAPPING;
+
+  iree_hal_buffer_compatibility_t compatibility =
+      iree_hal_allocator_query_buffer_compatibility(
+          device_allocator_, memory_type,
+          /*allowed_usage=*/buffer_usage,
+          /*intended_usage=*/buffer_usage, kAllocationSize);
+  EXPECT_TRUE(iree_all_bits_set(compatibility,
+                                IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE));
+
+  iree_hal_buffer_t* buffer;
+  IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
+      device_allocator_, memory_type, buffer_usage, kAllocationSize, &buffer));
+
+  EXPECT_EQ(device_allocator_, iree_hal_buffer_allocator(buffer));
+  EXPECT_TRUE(
+      iree_all_bits_set(iree_hal_buffer_memory_type(buffer), memory_type));
+  EXPECT_TRUE(
+      iree_all_bits_set(iree_hal_buffer_allowed_usage(buffer), buffer_usage));
+  EXPECT_GE(iree_hal_buffer_allocation_size(buffer), kAllocationSize);
+
+  iree_hal_buffer_release(buffer);
+}
+
+TEST_P(BufferMappingTest, Zero) {
+  iree_hal_memory_type_t memory_type = IREE_HAL_MEMORY_TYPE_HOST_VISIBLE;
+  iree_hal_buffer_usage_t buffer_usage = IREE_HAL_BUFFER_USAGE_MAPPING;
+
+  iree_hal_buffer_t* buffer;
+  IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
+      device_allocator_, memory_type, buffer_usage, kAllocationSize, &buffer));
+
+  IREE_ASSERT_OK(iree_hal_buffer_zero(buffer, /*byte_offset=*/0,
+                                      /*byte_length=*/kAllocationSize));
+
+  std::vector<uint8_t> reference_buffer(kAllocationSize);
+  std::memset(reference_buffer.data(), 0, kAllocationSize);
+
+  std::vector<uint8_t> actual_data(kAllocationSize);
+  IREE_ASSERT_OK(iree_hal_buffer_read_data(
+      buffer, /*source_offset=*/0, actual_data.data(), actual_data.size()));
+  EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+
+  iree_hal_buffer_release(buffer);
+}
+
+TEST_P(BufferMappingTest, FillEmpty) {
+  iree_hal_memory_type_t memory_type = IREE_HAL_MEMORY_TYPE_HOST_VISIBLE;
+  iree_hal_buffer_usage_t buffer_usage = IREE_HAL_BUFFER_USAGE_MAPPING;
+
+  iree_hal_buffer_t* buffer;
+  IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
+      device_allocator_, memory_type, buffer_usage, kAllocationSize, &buffer));
+
+  IREE_ASSERT_OK(iree_hal_buffer_zero(buffer, /*byte_offset=*/0,
+                                      /*byte_length=*/kAllocationSize));
+  uint8_t fill_value = 0x07;
+  IREE_ASSERT_OK(iree_hal_buffer_fill(buffer, /*byte_offset=*/0,
+                                      /*byte_length=*/0,  // <---- empty!
+                                      /*pattern=*/&fill_value,
+                                      /*pattern_length=*/sizeof(fill_value)));
+
+  // Note: reference is all zeros, since fill byte length is 0!
+  std::vector<uint8_t> reference_buffer(kAllocationSize);
+  std::memset(reference_buffer.data(), 0, kAllocationSize);
+
+  std::vector<uint8_t> actual_data(kAllocationSize);
+  IREE_ASSERT_OK(iree_hal_buffer_read_data(
+      buffer, /*source_offset=*/0, actual_data.data(), actual_data.size()));
+  EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+
+  iree_hal_buffer_release(buffer);
+}
+
+TEST_P(BufferMappingTest, Fill) {
+  iree_hal_memory_type_t memory_type = IREE_HAL_MEMORY_TYPE_HOST_VISIBLE;
+  iree_hal_buffer_usage_t buffer_usage = IREE_HAL_BUFFER_USAGE_MAPPING;
+
+  iree_hal_buffer_t* buffer;
+  IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
+      device_allocator_, memory_type, buffer_usage, kAllocationSize, &buffer));
+
+  uint8_t fill_value = 0x07;
+  IREE_ASSERT_OK(iree_hal_buffer_fill(buffer, /*byte_offset=*/0,
+                                      /*byte_length=*/kAllocationSize,
+                                      /*pattern=*/&fill_value,
+                                      /*pattern_length=*/sizeof(fill_value)));
+
+  std::vector<uint8_t> reference_buffer(kAllocationSize);
+  std::memset(reference_buffer.data(), fill_value, kAllocationSize);
+
+  std::vector<uint8_t> actual_data(kAllocationSize);
+  IREE_ASSERT_OK(iree_hal_buffer_read_data(
+      buffer, /*source_offset=*/0, actual_data.data(), actual_data.size()));
+  EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+
+  iree_hal_buffer_release(buffer);
+}
+
+TEST_P(BufferMappingTest, Write) {
+  iree_hal_memory_type_t memory_type = IREE_HAL_MEMORY_TYPE_HOST_VISIBLE;
+  iree_hal_buffer_usage_t buffer_usage = IREE_HAL_BUFFER_USAGE_MAPPING;
+
+  iree_hal_buffer_t* buffer;
+  IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
+      device_allocator_, memory_type, buffer_usage, kAllocationSize, &buffer));
+
+  uint8_t fill_value = 0x07;
+  std::vector<uint8_t> reference_buffer(kAllocationSize);
+  std::memset(reference_buffer.data(), fill_value, kAllocationSize);
+  IREE_ASSERT_OK(iree_hal_buffer_write_data(buffer, /*target_offset=*/0,
+                                            reference_buffer.data(),
+                                            reference_buffer.size()));
+
+  std::vector<uint8_t> actual_data(kAllocationSize);
+  IREE_ASSERT_OK(iree_hal_buffer_read_data(
+      buffer, /*source_offset=*/0, actual_data.data(), actual_data.size()));
+  EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+
+  iree_hal_buffer_release(buffer);
+}
+TEST_P(BufferMappingTest, Copy) {
+  iree_hal_memory_type_t memory_type = IREE_HAL_MEMORY_TYPE_HOST_VISIBLE;
+  iree_hal_buffer_usage_t buffer_usage = IREE_HAL_BUFFER_USAGE_MAPPING;
+
+  iree_hal_buffer_t* buffer_a;
+  iree_hal_buffer_t* buffer_b;
+  IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
+      device_allocator_, memory_type, buffer_usage, kAllocationSize,
+      &buffer_a));
+  IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
+      device_allocator_, memory_type, buffer_usage, kAllocationSize,
+      &buffer_b));
+
+  uint8_t fill_value = 0x07;
+  IREE_ASSERT_OK(iree_hal_buffer_fill(buffer_a, /*byte_offset=*/0,
+                                      /*byte_length=*/kAllocationSize,
+                                      /*pattern=*/&fill_value,
+                                      /*pattern_length=*/sizeof(fill_value)));
+  IREE_ASSERT_OK(iree_hal_buffer_copy_data(
+      /*source_buffer=*/buffer_a,
+      /*source_offset=*/0, /*target_buffer=*/buffer_b, /*target_offset=*/0,
+      /*data_length=*/kAllocationSize));
+
+  std::vector<uint8_t> reference_buffer(kAllocationSize);
+  std::memset(reference_buffer.data(), fill_value, kAllocationSize);
+
+  std::vector<uint8_t> actual_data(kAllocationSize);
+  IREE_ASSERT_OK(iree_hal_buffer_read_data(
+      buffer_b, /*source_offset=*/0, actual_data.data(), actual_data.size()));
+  EXPECT_THAT(actual_data, ContainerEq(reference_buffer));
+
+  iree_hal_buffer_release(buffer_a);
+  iree_hal_buffer_release(buffer_b);
+}
+
+// TODO(scotttodd): iree_hal_allocator_wrap_buffer
+// TODO(scotttodd): iree_hal_heap_buffer_wrap
+// TODO(scotttodd): iree_hal_buffer_map_range
+// TODO(scotttodd): revive old tests:
+//   https://github.com/google/iree/blob/440edee8a3190d73dbceb24986eed847cac8bd31/iree/hal/buffer_mapping_test.cc
+
+INSTANTIATE_TEST_SUITE_P(
+    AllDrivers, BufferMappingTest,
+    ::testing::ValuesIn(testing::EnumerateAvailableDrivers()),
+    GenerateTestName());
+
+}  // namespace cts
+}  // namespace hal
+}  // namespace iree
diff --git a/iree/hal/cts/command_buffer_test.cc b/iree/hal/cts/command_buffer_test.cc
index f82c929..b4ee8f5 100644
--- a/iree/hal/cts/command_buffer_test.cc
+++ b/iree/hal/cts/command_buffer_test.cc
@@ -20,6 +20,12 @@
 #include "iree/testing/gtest.h"
 #include "iree/testing/status_matchers.h"
 
+// TODO(scotttodd): split into several tests, for example:
+//     command_buffer_recording_test (recording/lifetime)
+//     command_buffer_dispatch_test
+//     command_buffer_fill_test (filling buffers)
+//     command_buffer_e2e_test (barriers, dispatches)
+
 namespace iree {
 namespace hal {
 namespace cts {
diff --git a/iree/hal/cts/semaphore_submission_test.cc b/iree/hal/cts/semaphore_submission_test.cc
new file mode 100644
index 0000000..5f9c529
--- /dev/null
+++ b/iree/hal/cts/semaphore_submission_test.cc
@@ -0,0 +1,210 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "iree/hal/cts/cts_test_base.h"
+#include "iree/hal/testing/driver_registry.h"
+#include "iree/testing/gtest.h"
+
+namespace iree {
+namespace hal {
+namespace cts {
+
+class SemaphoreSubmissionTest : public CtsTestBase {};
+
+TEST_P(SemaphoreSubmissionTest, SubmitWithNoCommandBuffers) {
+  // No waits, one signal which we immediately wait on after submit.
+  iree_hal_submission_batch_t submission_batch;
+  submission_batch.wait_semaphores.count = 0;
+  submission_batch.wait_semaphores.semaphores = NULL;
+  submission_batch.wait_semaphores.payload_values = NULL;
+  submission_batch.command_buffer_count = 0;
+  submission_batch.command_buffers = NULL;
+  iree_hal_semaphore_t* signal_semaphore;
+  IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &signal_semaphore));
+  iree_hal_semaphore_t* signal_semaphore_ptrs[] = {signal_semaphore};
+  submission_batch.signal_semaphores.count =
+      IREE_ARRAYSIZE(signal_semaphore_ptrs);
+  submission_batch.signal_semaphores.semaphores = signal_semaphore_ptrs;
+  uint64_t payload_values[] = {1ull};
+  submission_batch.signal_semaphores.payload_values = payload_values;
+
+  IREE_ASSERT_OK(
+      iree_hal_device_queue_submit(device_, IREE_HAL_COMMAND_CATEGORY_DISPATCH,
+                                   /*queue_affinity=*/0,
+                                   /*batch_count=*/1, &submission_batch));
+  IREE_ASSERT_OK(iree_hal_semaphore_wait_with_deadline(
+      signal_semaphore, 1ull, IREE_TIME_INFINITE_FUTURE));
+
+  iree_hal_semaphore_release(signal_semaphore);
+}
+
+TEST_P(SemaphoreSubmissionTest, SubmitAndSignal) {
+  iree_hal_command_buffer_t* command_buffer;
+  IREE_ASSERT_OK(iree_hal_command_buffer_create(
+      device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
+      IREE_HAL_COMMAND_CATEGORY_DISPATCH, &command_buffer));
+
+  IREE_ASSERT_OK(iree_hal_command_buffer_begin(command_buffer));
+  IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer));
+
+  // No waits, one signal which we immediately wait on after submit.
+  iree_hal_submission_batch_t submission_batch;
+  submission_batch.wait_semaphores.count = 0;
+  submission_batch.wait_semaphores.semaphores = NULL;
+  submission_batch.wait_semaphores.payload_values = NULL;
+  submission_batch.command_buffer_count = 1;
+  submission_batch.command_buffers = &command_buffer;
+  iree_hal_semaphore_t* signal_semaphore;
+  IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &signal_semaphore));
+  iree_hal_semaphore_t* signal_semaphore_ptrs[] = {signal_semaphore};
+  submission_batch.signal_semaphores.count =
+      IREE_ARRAYSIZE(signal_semaphore_ptrs);
+  submission_batch.signal_semaphores.semaphores = signal_semaphore_ptrs;
+  uint64_t payload_values[] = {1ull};
+  submission_batch.signal_semaphores.payload_values = payload_values;
+
+  IREE_ASSERT_OK(
+      iree_hal_device_queue_submit(device_, IREE_HAL_COMMAND_CATEGORY_DISPATCH,
+                                   /*queue_affinity=*/0,
+                                   /*batch_count=*/1, &submission_batch));
+  IREE_ASSERT_OK(iree_hal_semaphore_wait_with_deadline(
+      signal_semaphore, 1ull, IREE_TIME_INFINITE_FUTURE));
+
+  iree_hal_command_buffer_release(command_buffer);
+  iree_hal_semaphore_release(signal_semaphore);
+}
+
+TEST_P(SemaphoreSubmissionTest, SubmitWithWait) {
+  // Empty command buffer.
+  iree_hal_command_buffer_t* command_buffer;
+  IREE_ASSERT_OK(iree_hal_command_buffer_create(
+      device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
+      IREE_HAL_COMMAND_CATEGORY_DISPATCH, &command_buffer));
+  IREE_ASSERT_OK(iree_hal_command_buffer_begin(command_buffer));
+  IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer));
+
+  // One wait and one signal semaphore.
+  iree_hal_submission_batch_t submission_batch;
+  iree_hal_semaphore_t* wait_semaphore;
+  iree_hal_semaphore_t* signal_semaphore;
+  IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &wait_semaphore));
+  IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 100ull, &signal_semaphore));
+  iree_hal_semaphore_t* wait_semaphore_ptrs[] = {wait_semaphore};
+  iree_hal_semaphore_t* signal_semaphore_ptrs[] = {signal_semaphore};
+  uint64_t wait_payload_values[] = {1ull};
+  uint64_t signal_payload_values[] = {101ull};
+  submission_batch.wait_semaphores.count = IREE_ARRAYSIZE(wait_semaphore_ptrs);
+  submission_batch.wait_semaphores.semaphores = wait_semaphore_ptrs;
+  submission_batch.wait_semaphores.payload_values = wait_payload_values;
+  submission_batch.command_buffer_count = 1;
+  submission_batch.command_buffers = &command_buffer;
+  submission_batch.signal_semaphores.count =
+      IREE_ARRAYSIZE(signal_semaphore_ptrs);
+  submission_batch.signal_semaphores.semaphores = signal_semaphore_ptrs;
+  submission_batch.signal_semaphores.payload_values = signal_payload_values;
+
+  IREE_ASSERT_OK(
+      iree_hal_device_queue_submit(device_, IREE_HAL_COMMAND_CATEGORY_DISPATCH,
+                                   /*queue_affinity=*/0,
+                                   /*batch_count=*/1, &submission_batch));
+
+  // Work shouldn't start until the wait semaphore reaches its payload value.
+  uint64_t value;
+  IREE_ASSERT_OK(iree_hal_semaphore_query(signal_semaphore, &value));
+  EXPECT_EQ(100ull, value);
+
+  // Signal the wait semaphore, work should begin and complete.
+  IREE_ASSERT_OK(iree_hal_semaphore_signal(wait_semaphore, 1ull));
+  IREE_ASSERT_OK(iree_hal_semaphore_wait_with_deadline(
+      signal_semaphore, 101ull, IREE_TIME_INFINITE_FUTURE));
+
+  iree_hal_command_buffer_release(command_buffer);
+  iree_hal_semaphore_release(wait_semaphore);
+  iree_hal_semaphore_release(signal_semaphore);
+}
+
+TEST_P(SemaphoreSubmissionTest, SubmitWithMultipleSemaphores) {
+  iree_hal_command_buffer_t* command_buffer;
+  IREE_ASSERT_OK(iree_hal_command_buffer_create(
+      device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
+      IREE_HAL_COMMAND_CATEGORY_DISPATCH, &command_buffer));
+
+  IREE_ASSERT_OK(iree_hal_command_buffer_begin(command_buffer));
+  IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer));
+
+  iree_hal_submission_batch_t submission_batch;
+  iree_hal_semaphore_t* wait_semaphore_1;
+  iree_hal_semaphore_t* wait_semaphore_2;
+  iree_hal_semaphore_t* signal_semaphore_1;
+  iree_hal_semaphore_t* signal_semaphore_2;
+  IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &wait_semaphore_1));
+  IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &wait_semaphore_2));
+  IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &signal_semaphore_1));
+  IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &signal_semaphore_2));
+  iree_hal_semaphore_t* wait_semaphore_ptrs[] = {wait_semaphore_1,
+                                                 wait_semaphore_2};
+  iree_hal_semaphore_t* signal_semaphore_ptrs[] = {signal_semaphore_1,
+                                                   signal_semaphore_2};
+  uint64_t wait_payload_values[] = {1ull, 1ull};
+  uint64_t signal_payload_values[] = {1ull, 1ull};
+  submission_batch.wait_semaphores.count = IREE_ARRAYSIZE(wait_semaphore_ptrs);
+  submission_batch.wait_semaphores.semaphores = wait_semaphore_ptrs;
+  submission_batch.wait_semaphores.payload_values = wait_payload_values;
+  submission_batch.command_buffer_count = 1;
+  submission_batch.command_buffers = &command_buffer;
+  submission_batch.signal_semaphores.count =
+      IREE_ARRAYSIZE(signal_semaphore_ptrs);
+  submission_batch.signal_semaphores.semaphores = signal_semaphore_ptrs;
+  submission_batch.signal_semaphores.payload_values = signal_payload_values;
+
+  IREE_ASSERT_OK(
+      iree_hal_device_queue_submit(device_, IREE_HAL_COMMAND_CATEGORY_DISPATCH,
+                                   /*queue_affinity=*/0,
+                                   /*batch_count=*/1, &submission_batch));
+
+  // Work shouldn't start until all wait semaphores reach their payload values.
+  uint64_t value;
+  IREE_ASSERT_OK(iree_hal_semaphore_query(signal_semaphore_1, &value));
+  EXPECT_EQ(0ull, value);
+  IREE_ASSERT_OK(iree_hal_semaphore_query(signal_semaphore_2, &value));
+  EXPECT_EQ(0ull, value);
+
+  // Signal the wait semaphores, work should begin and complete.
+  IREE_ASSERT_OK(iree_hal_semaphore_signal(wait_semaphore_1, 1ull));
+  IREE_ASSERT_OK(iree_hal_semaphore_signal(wait_semaphore_2, 1ull));
+
+  iree_hal_semaphore_list_t signal_semaphore_list;
+  signal_semaphore_list.count = IREE_ARRAYSIZE(signal_semaphore_ptrs);
+  signal_semaphore_list.semaphores = signal_semaphore_ptrs;
+  uint64_t payload_values[] = {1ull, 1ull};
+  signal_semaphore_list.payload_values = payload_values;
+  IREE_ASSERT_OK(iree_hal_device_wait_semaphores_with_deadline(
+      device_, IREE_HAL_WAIT_MODE_ALL, &signal_semaphore_list,
+      IREE_TIME_INFINITE_FUTURE));
+
+  iree_hal_command_buffer_release(command_buffer);
+  iree_hal_semaphore_release(wait_semaphore_1);
+  iree_hal_semaphore_release(wait_semaphore_2);
+  iree_hal_semaphore_release(signal_semaphore_1);
+  iree_hal_semaphore_release(signal_semaphore_2);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    AllDrivers, SemaphoreSubmissionTest,
+    ::testing::ValuesIn(testing::EnumerateAvailableDrivers()),
+    GenerateTestName());
+
+}  // namespace cts
+}  // namespace hal
+}  // namespace iree
diff --git a/iree/hal/cts/semaphore_test.cc b/iree/hal/cts/semaphore_test.cc
index d2a31fa..c774241 100644
--- a/iree/hal/cts/semaphore_test.cc
+++ b/iree/hal/cts/semaphore_test.cc
@@ -85,6 +85,11 @@
       device_, IREE_HAL_WAIT_MODE_ANY, NULL, IREE_TIME_INFINITE_FUTURE));
   IREE_ASSERT_OK(iree_hal_device_wait_semaphores_with_deadline(
       device_, IREE_HAL_WAIT_MODE_ALL, NULL, IREE_TIME_INFINITE_FUTURE));
+
+  IREE_ASSERT_OK(iree_hal_device_wait_semaphores_with_timeout(
+      device_, IREE_HAL_WAIT_MODE_ANY, NULL, IREE_DURATION_INFINITE));
+  IREE_ASSERT_OK(iree_hal_device_wait_semaphores_with_timeout(
+      device_, IREE_HAL_WAIT_MODE_ALL, NULL, IREE_DURATION_INFINITE));
 }
 
 // Tests waiting on a semaphore that has already been signaled.
@@ -98,6 +103,11 @@
   IREE_ASSERT_OK(iree_hal_semaphore_wait_with_deadline(
       semaphore, 2ull, IREE_TIME_INFINITE_FUTURE));
 
+  IREE_ASSERT_OK(iree_hal_semaphore_wait_with_timeout(semaphore, 1ull,
+                                                      IREE_DURATION_INFINITE));
+  IREE_ASSERT_OK(iree_hal_semaphore_wait_with_timeout(semaphore, 2ull,
+                                                      IREE_DURATION_INFINITE));
+
   iree_hal_semaphore_release(semaphore);
 }
 
@@ -217,184 +227,6 @@
   iree_hal_semaphore_release(b2a);
 }
 
-TEST_P(SemaphoreTest, SubmitWithNoCommandBuffers) {
-  // No waits, one signal which we immediately wait on after submit.
-  iree_hal_submission_batch_t submission_batch;
-  submission_batch.wait_semaphores.count = 0;
-  submission_batch.wait_semaphores.semaphores = NULL;
-  submission_batch.wait_semaphores.payload_values = NULL;
-  submission_batch.command_buffer_count = 0;
-  submission_batch.command_buffers = NULL;
-  iree_hal_semaphore_t* signal_semaphore;
-  IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &signal_semaphore));
-  iree_hal_semaphore_t* signal_semaphore_ptrs[] = {signal_semaphore};
-  submission_batch.signal_semaphores.count =
-      IREE_ARRAYSIZE(signal_semaphore_ptrs);
-  submission_batch.signal_semaphores.semaphores = signal_semaphore_ptrs;
-  uint64_t payload_values[] = {1ull};
-  submission_batch.signal_semaphores.payload_values = payload_values;
-
-  IREE_ASSERT_OK(
-      iree_hal_device_queue_submit(device_, IREE_HAL_COMMAND_CATEGORY_DISPATCH,
-                                   /*queue_affinity=*/0,
-                                   /*batch_count=*/1, &submission_batch));
-  IREE_ASSERT_OK(iree_hal_semaphore_wait_with_deadline(
-      signal_semaphore, 1ull, IREE_TIME_INFINITE_FUTURE));
-
-  iree_hal_semaphore_release(signal_semaphore);
-}
-
-TEST_P(SemaphoreTest, SubmitAndSignal) {
-  iree_hal_command_buffer_t* command_buffer;
-  IREE_ASSERT_OK(iree_hal_command_buffer_create(
-      device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
-      IREE_HAL_COMMAND_CATEGORY_DISPATCH, &command_buffer));
-
-  IREE_ASSERT_OK(iree_hal_command_buffer_begin(command_buffer));
-  IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer));
-
-  // No waits, one signal which we immediately wait on after submit.
-  iree_hal_submission_batch_t submission_batch;
-  submission_batch.wait_semaphores.count = 0;
-  submission_batch.wait_semaphores.semaphores = NULL;
-  submission_batch.wait_semaphores.payload_values = NULL;
-  submission_batch.command_buffer_count = 1;
-  submission_batch.command_buffers = &command_buffer;
-  iree_hal_semaphore_t* signal_semaphore;
-  IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &signal_semaphore));
-  iree_hal_semaphore_t* signal_semaphore_ptrs[] = {signal_semaphore};
-  submission_batch.signal_semaphores.count =
-      IREE_ARRAYSIZE(signal_semaphore_ptrs);
-  submission_batch.signal_semaphores.semaphores = signal_semaphore_ptrs;
-  uint64_t payload_values[] = {1ull};
-  submission_batch.signal_semaphores.payload_values = payload_values;
-
-  IREE_ASSERT_OK(
-      iree_hal_device_queue_submit(device_, IREE_HAL_COMMAND_CATEGORY_DISPATCH,
-                                   /*queue_affinity=*/0,
-                                   /*batch_count=*/1, &submission_batch));
-  IREE_ASSERT_OK(iree_hal_semaphore_wait_with_deadline(
-      signal_semaphore, 1ull, IREE_TIME_INFINITE_FUTURE));
-
-  iree_hal_command_buffer_release(command_buffer);
-  iree_hal_semaphore_release(signal_semaphore);
-}
-
-TEST_P(SemaphoreTest, SubmitWithWait) {
-  // Empty command buffer.
-  iree_hal_command_buffer_t* command_buffer;
-  IREE_ASSERT_OK(iree_hal_command_buffer_create(
-      device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
-      IREE_HAL_COMMAND_CATEGORY_DISPATCH, &command_buffer));
-  IREE_ASSERT_OK(iree_hal_command_buffer_begin(command_buffer));
-  IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer));
-
-  // One wait and one signal semaphore.
-  iree_hal_submission_batch_t submission_batch;
-  iree_hal_semaphore_t* wait_semaphore;
-  iree_hal_semaphore_t* signal_semaphore;
-  IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &wait_semaphore));
-  IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 100ull, &signal_semaphore));
-  iree_hal_semaphore_t* wait_semaphore_ptrs[] = {wait_semaphore};
-  iree_hal_semaphore_t* signal_semaphore_ptrs[] = {signal_semaphore};
-  uint64_t wait_payload_values[] = {1ull};
-  uint64_t signal_payload_values[] = {101ull};
-  submission_batch.wait_semaphores.count = IREE_ARRAYSIZE(wait_semaphore_ptrs);
-  submission_batch.wait_semaphores.semaphores = wait_semaphore_ptrs;
-  submission_batch.wait_semaphores.payload_values = wait_payload_values;
-  submission_batch.command_buffer_count = 1;
-  submission_batch.command_buffers = &command_buffer;
-  submission_batch.signal_semaphores.count =
-      IREE_ARRAYSIZE(signal_semaphore_ptrs);
-  submission_batch.signal_semaphores.semaphores = signal_semaphore_ptrs;
-  submission_batch.signal_semaphores.payload_values = signal_payload_values;
-
-  IREE_ASSERT_OK(
-      iree_hal_device_queue_submit(device_, IREE_HAL_COMMAND_CATEGORY_DISPATCH,
-                                   /*queue_affinity=*/0,
-                                   /*batch_count=*/1, &submission_batch));
-
-  // Work shouldn't start until the wait semaphore reaches its payload value.
-  uint64_t value;
-  IREE_ASSERT_OK(iree_hal_semaphore_query(signal_semaphore, &value));
-  EXPECT_EQ(100ull, value);
-
-  // Signal the wait semaphore, work should begin and complete.
-  IREE_ASSERT_OK(iree_hal_semaphore_signal(wait_semaphore, 1ull));
-  IREE_ASSERT_OK(iree_hal_semaphore_wait_with_deadline(
-      signal_semaphore, 101ull, IREE_TIME_INFINITE_FUTURE));
-
-  iree_hal_command_buffer_release(command_buffer);
-  iree_hal_semaphore_release(wait_semaphore);
-  iree_hal_semaphore_release(signal_semaphore);
-}
-
-TEST_P(SemaphoreTest, SubmitWithMultipleSemaphores) {
-  iree_hal_command_buffer_t* command_buffer;
-  IREE_ASSERT_OK(iree_hal_command_buffer_create(
-      device_, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT,
-      IREE_HAL_COMMAND_CATEGORY_DISPATCH, &command_buffer));
-
-  IREE_ASSERT_OK(iree_hal_command_buffer_begin(command_buffer));
-  IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer));
-
-  iree_hal_submission_batch_t submission_batch;
-  iree_hal_semaphore_t* wait_semaphore_1;
-  iree_hal_semaphore_t* wait_semaphore_2;
-  iree_hal_semaphore_t* signal_semaphore_1;
-  iree_hal_semaphore_t* signal_semaphore_2;
-  IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &wait_semaphore_1));
-  IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &wait_semaphore_2));
-  IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &signal_semaphore_1));
-  IREE_ASSERT_OK(iree_hal_semaphore_create(device_, 0ull, &signal_semaphore_2));
-  iree_hal_semaphore_t* wait_semaphore_ptrs[] = {wait_semaphore_1,
-                                                 wait_semaphore_2};
-  iree_hal_semaphore_t* signal_semaphore_ptrs[] = {signal_semaphore_1,
-                                                   signal_semaphore_2};
-  uint64_t wait_payload_values[] = {1ull, 1ull};
-  uint64_t signal_payload_values[] = {1ull, 1ull};
-  submission_batch.wait_semaphores.count = IREE_ARRAYSIZE(wait_semaphore_ptrs);
-  submission_batch.wait_semaphores.semaphores = wait_semaphore_ptrs;
-  submission_batch.wait_semaphores.payload_values = wait_payload_values;
-  submission_batch.command_buffer_count = 1;
-  submission_batch.command_buffers = &command_buffer;
-  submission_batch.signal_semaphores.count =
-      IREE_ARRAYSIZE(signal_semaphore_ptrs);
-  submission_batch.signal_semaphores.semaphores = signal_semaphore_ptrs;
-  submission_batch.signal_semaphores.payload_values = signal_payload_values;
-
-  IREE_ASSERT_OK(
-      iree_hal_device_queue_submit(device_, IREE_HAL_COMMAND_CATEGORY_DISPATCH,
-                                   /*queue_affinity=*/0,
-                                   /*batch_count=*/1, &submission_batch));
-
-  // Work shouldn't start until all wait semaphores reach their payload values.
-  uint64_t value;
-  IREE_ASSERT_OK(iree_hal_semaphore_query(signal_semaphore_1, &value));
-  EXPECT_EQ(0ull, value);
-  IREE_ASSERT_OK(iree_hal_semaphore_query(signal_semaphore_2, &value));
-  EXPECT_EQ(0ull, value);
-
-  // Signal the wait semaphores, work should begin and complete.
-  IREE_ASSERT_OK(iree_hal_semaphore_signal(wait_semaphore_1, 1ull));
-  IREE_ASSERT_OK(iree_hal_semaphore_signal(wait_semaphore_2, 1ull));
-
-  iree_hal_semaphore_list_t signal_semaphore_list;
-  signal_semaphore_list.count = IREE_ARRAYSIZE(signal_semaphore_ptrs);
-  signal_semaphore_list.semaphores = signal_semaphore_ptrs;
-  uint64_t payload_values[] = {1ull, 1ull};
-  signal_semaphore_list.payload_values = payload_values;
-  IREE_ASSERT_OK(iree_hal_device_wait_semaphores_with_deadline(
-      device_, IREE_HAL_WAIT_MODE_ALL, &signal_semaphore_list,
-      IREE_TIME_INFINITE_FUTURE));
-
-  iree_hal_command_buffer_release(command_buffer);
-  iree_hal_semaphore_release(wait_semaphore_1);
-  iree_hal_semaphore_release(wait_semaphore_2);
-  iree_hal_semaphore_release(signal_semaphore_1);
-  iree_hal_semaphore_release(signal_semaphore_2);
-}
-
 INSTANTIATE_TEST_SUITE_P(
     AllDrivers, SemaphoreTest,
     ::testing::ValuesIn(testing::EnumerateAvailableDrivers()),
diff --git a/iree/hal/device.c b/iree/hal/device.c
index 8ad7615..5aeddec 100644
--- a/iree/hal/device.c
+++ b/iree/hal/device.c
@@ -73,7 +73,6 @@
     const iree_hal_semaphore_list_t* semaphore_list,
     iree_duration_t timeout_ns) {
   IREE_ASSERT_ARGUMENT(device);
-  IREE_ASSERT_ARGUMENT(semaphore_list);
   if (!semaphore_list || semaphore_list->count == 0) return iree_ok_status();
   IREE_TRACE_ZONE_BEGIN(z0);
   iree_status_t status = _VTABLE_DISPATCH(device, wait_semaphores_with_timeout)(