diff --git a/experimental/web/sample_webgpu/main.c b/experimental/web/sample_webgpu/main.c
index 6463fd4..8f7d4d4 100644
--- a/experimental/web/sample_webgpu/main.c
+++ b/experimental/web/sample_webgpu/main.c
@@ -794,8 +794,8 @@
     };
     status = iree_hal_device_queue_execute(
         device, IREE_HAL_QUEUE_AFFINITY_ANY, iree_hal_semaphore_list_empty(),
-        signal_semaphores, 1, &transfer_command_buffer,
-        /*binding_tables=*/NULL);
+        signal_semaphores, transfer_command_buffer,
+        iree_hal_buffer_binding_table_empty());
   }
   // TODO(scotttodd): Make this async - pass a wait source to iree_loop_wait_one
   //     1. create iree_hal_fence_t, iree_hal_fence_insert(fance, semaphore)
diff --git a/experimental/webgpu/command_buffer.c b/experimental/webgpu/command_buffer.c
index 9240320..e4ad81b 100644
--- a/experimental/webgpu/command_buffer.c
+++ b/experimental/webgpu/command_buffer.c
@@ -575,9 +575,10 @@
   return iree_ok_status();
 }
 
-static iree_status_t iree_hal_webgpu_command_buffer_discard_buffer(
+static iree_status_t iree_hal_webgpu_command_buffer_advise_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
-    iree_hal_buffer_ref_t buffer_ref) {
+    iree_hal_buffer_ref_t buffer_ref, iree_hal_memory_advise_flags_t flags,
+    uint64_t arg0, uint64_t arg1) {
   // No-op: though maybe it'd be a useful addition to the spec as otherwise
   // false dependencies can creep in.
   return iree_ok_status();
@@ -608,7 +609,7 @@
 static iree_status_t iree_hal_webgpu_command_buffer_fill_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
     iree_hal_buffer_ref_t target_ref, const void* pattern,
-    iree_host_size_t pattern_length) {
+    iree_host_size_t pattern_length, iree_hal_fill_flags_t flags) {
   iree_hal_webgpu_command_buffer_t* command_buffer =
       iree_hal_webgpu_command_buffer_cast(base_command_buffer);
 
@@ -693,7 +694,8 @@
 
 static iree_status_t iree_hal_webgpu_command_buffer_update_buffer(
     iree_hal_command_buffer_t* base_command_buffer, const void* source_buffer,
-    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref) {
+    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref,
+    iree_hal_update_flags_t flags) {
   iree_hal_webgpu_command_buffer_t* command_buffer =
       iree_hal_webgpu_command_buffer_cast(base_command_buffer);
 
@@ -734,7 +736,8 @@
 
 static iree_status_t iree_hal_webgpu_command_buffer_copy_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
-    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref) {
+    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref,
+    iree_hal_copy_flags_t flags) {
   iree_hal_webgpu_command_buffer_t* command_buffer =
       iree_hal_webgpu_command_buffer_cast(base_command_buffer);
 
@@ -1041,7 +1044,7 @@
     .signal_event = iree_hal_webgpu_command_buffer_signal_event,
     .reset_event = iree_hal_webgpu_command_buffer_reset_event,
     .wait_events = iree_hal_webgpu_command_buffer_wait_events,
-    .discard_buffer = iree_hal_webgpu_command_buffer_discard_buffer,
+    .advise_buffer = iree_hal_webgpu_command_buffer_advise_buffer,
     .fill_buffer = iree_hal_webgpu_command_buffer_fill_buffer,
     .update_buffer = iree_hal_webgpu_command_buffer_update_buffer,
     .copy_buffer = iree_hal_webgpu_command_buffer_copy_buffer,
diff --git a/experimental/webgpu/webgpu_device.c b/experimental/webgpu/webgpu_device.c
index 5498caf..c9a2457 100644
--- a/experimental/webgpu/webgpu_device.c
+++ b/experimental/webgpu/webgpu_device.c
@@ -354,7 +354,7 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_file_t* source_file, uint64_t source_offset,
     iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
-    iree_device_size_t length, uint32_t flags) {
+    iree_device_size_t length, iree_hal_read_flags_t flags) {
   // TODO: expose streaming chunk count/size options.
   iree_status_t loop_status = iree_ok_status();
   iree_hal_file_transfer_options_t options = {
@@ -376,7 +376,7 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
     iree_hal_file_t* target_file, uint64_t target_offset,
-    iree_device_size_t length, uint32_t flags) {
+    iree_device_size_t length, iree_hal_write_flags_t flags) {
   // TODO: expose streaming chunk count/size options.
   iree_status_t loop_status = iree_ok_status();
   iree_hal_file_transfer_options_t options = {
@@ -396,9 +396,8 @@
     iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity,
     const iree_hal_semaphore_list_t wait_semaphore_list,
     const iree_hal_semaphore_list_t signal_semaphore_list,
-    iree_host_size_t command_buffer_count,
-    iree_hal_command_buffer_t* const* command_buffers,
-    iree_hal_buffer_binding_table_t const* binding_tables) {
+    iree_hal_command_buffer_t* command_buffer,
+    iree_hal_buffer_binding_table_t binding_table) {
   iree_hal_webgpu_device_t* device = iree_hal_webgpu_device_cast(base_device);
 
   // TODO(benvanik): this currently assumes we are synchronizing on semaphores
@@ -410,11 +409,8 @@
                                                     iree_infinite_timeout()));
 
   // TODO(benvanik): propagate errors to semaphores.
-  for (iree_host_size_t i = 0; i < command_buffer_count; i++) {
-    iree_hal_command_buffer_t* command_buffer = command_buffers[i];
-    IREE_RETURN_IF_ERROR(
-        iree_hal_webgpu_command_buffer_issue(command_buffer, device->queue));
-  }
+  IREE_RETURN_IF_ERROR(
+      iree_hal_webgpu_command_buffer_issue(command_buffer, device->queue));
 
   IREE_RETURN_IF_ERROR(iree_hal_semaphore_list_signal(signal_semaphore_list));
 
@@ -473,6 +469,9 @@
         iree_hal_webgpu_device_query_semaphore_compatibility,
     .queue_alloca = iree_hal_webgpu_device_queue_alloca,
     .queue_dealloca = iree_hal_webgpu_device_queue_dealloca,
+    .queue_fill = iree_hal_device_queue_emulated_fill,
+    .queue_update = iree_hal_device_queue_emulated_update,
+    .queue_copy = iree_hal_device_queue_emulated_copy,
     .queue_read = iree_hal_webgpu_device_queue_read,
     .queue_write = iree_hal_webgpu_device_queue_write,
     .queue_execute = iree_hal_webgpu_device_queue_execute,
diff --git a/integrations/pjrt/src/iree_pjrt/common/api_impl.cc b/integrations/pjrt/src/iree_pjrt/common/api_impl.cc
index 694cfa8..a7c5cc7 100644
--- a/integrations/pjrt/src/iree_pjrt/common/api_impl.cc
+++ b/integrations/pjrt/src/iree_pjrt/common/api_impl.cc
@@ -590,8 +590,8 @@
       device_.device(), IREE_HAL_QUEUE_AFFINITY_ANY,
       /*wait_semaphore_list=*/iree_hal_fence_semaphore_list(ready_fence_.get()),
       /*signal_semaphore_list=*/
-      iree_hal_fence_semaphore_list(dst_buffer_ready_fence.get()),
-      /*command_buffer_count=*/1, &transfer_cb, NULL));
+      iree_hal_fence_semaphore_list(dst_buffer_ready_fence.get()), transfer_cb,
+      iree_hal_buffer_binding_table_empty()));
 
   *out_done_event = copy_done_event;
   return iree_ok_status();
@@ -837,7 +837,8 @@
   IREE_CHECK_OK(iree_hal_command_buffer_begin(transfer_cb.get()));
   IREE_RETURN_IF_ERROR(iree_hal_command_buffer_fill_buffer(
       transfer_cb.get(), buffer.get(), /*target_offset=*/0,
-      /*target_size=*/byte_length, data, element_type_byte_size));
+      /*target_size=*/byte_length, data, element_type_byte_size,
+      IREE_HAL_FILL_FLAG_NONE));
   IREE_CHECK_OK(iree_hal_command_buffer_end(transfer_cb.get()));
 
   // Execute the enqueued splat:
@@ -846,8 +847,8 @@
       /*wait_semaphore_list=*/
       {1, &transfer_timeline_, &signal_alloca_complete},
       /*signal_semaphore_list=*/
-      {1, &transfer_timeline_, &signal_copy_complete},
-      /*command_buffer_count=*/1, &transfer_cb, NULL));
+      {1, &transfer_timeline_, &signal_copy_complete}, transfer_cb,
+      iree_hal_buffer_binding_table_empty()));
 
   // Wrap in a buffer view and return:
   iree::vm::ref<iree_hal_buffer_view_t> result_buffer_view;
@@ -1190,8 +1191,8 @@
       /*wait_semaphore_list=*/
       {1, &transfer_timeline_, &signal_alloca_complete},
       /*signal_semaphore_list=*/
-      {1, &transfer_timeline_, &signal_copy_complete},
-      /*command_buffer_count=*/1, &transfer_cb, NULL));
+      {1, &transfer_timeline_, &signal_copy_complete}, transfer_cb,
+      iree_hal_buffer_binding_table_empty()));
 
   // Wrap in a buffer view and return.
   iree::vm::ref<iree_hal_buffer_view_t> result_buffer_view;
diff --git a/integrations/pjrt/src/iree_pjrt/common/iree_helpers.h b/integrations/pjrt/src/iree_pjrt/common/iree_helpers.h
index fce48ba..7577619 100644
--- a/integrations/pjrt/src/iree_pjrt/common/iree_helpers.h
+++ b/integrations/pjrt/src/iree_pjrt/common/iree_helpers.h
@@ -139,8 +139,7 @@
     iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
     const iree_hal_semaphore_list_t wait_semaphore_list,
     const iree_hal_semaphore_list_t signal_semaphore_list,
-    iree_host_size_t command_buffer_count,
-    iree_hal_command_buffer_t* const* command_buffers) {
+    iree_hal_command_buffer_t* command_buffer) {
   if (LOGGING_ENABLED) {
     LogInvoke(__func__, "device=%p, wait={%s}, signal={%s}", device,
               SemaphoreListToString(wait_semaphore_list).c_str(),
@@ -148,8 +147,8 @@
   }
   return HandleStatus(__func__, iree_hal_device_queue_execute(
                                     device, queue_affinity, wait_semaphore_list,
-                                    signal_semaphore_list, command_buffer_count,
-                                    command_buffers, /*binding_tables=*/NULL));
+                                    signal_semaphore_list, command_buffer,
+                                    iree_hal_buffer_binding_table_empty()));
 }
 
 iree_status_t hal_fence_create(iree_host_size_t capacity,
diff --git a/runtime/bindings/python/hal.cc b/runtime/bindings/python/hal.cc
index 7a0e0cd..a25a4ad 100644
--- a/runtime/bindings/python/hal.cc
+++ b/runtime/bindings/python/hal.cc
@@ -499,7 +499,7 @@
       "deallocating memory on queue");
 }
 
-void HalDevice::QueueExecute(py::handle command_buffers,
+void HalDevice::QueueExecute(py::handle command_buffer,
                              py::handle wait_semaphores,
                              py::handle signal_semaphores) {
   iree_hal_semaphore_list_t wait_list;
@@ -548,17 +548,14 @@
   }
 
   // Unpack command buffers.
-  size_t cb_count = py::len(command_buffers);
-  iree_hal_command_buffer_t** cb_list =
-      static_cast<iree_hal_command_buffer_t**>(
-          alloca(sizeof(iree_hal_command_buffer_t*) * cb_count));
-  for (size_t i = 0; i < cb_count; ++i) {
-    cb_list[i] = py::cast<HalCommandBuffer*>(command_buffers[i])->raw_ptr();
-  }
+  iree_hal_command_buffer_t* cb =
+      !command_buffer.is_none()
+          ? py::cast<HalCommandBuffer*>(command_buffer)->raw_ptr()
+          : NULL;
 
   CheckApiStatus(iree_hal_device_queue_execute(
                      raw_ptr(), IREE_HAL_QUEUE_AFFINITY_ANY, wait_list,
-                     signal_list, cb_count, cb_list, /*binding_tables=*/NULL),
+                     signal_list, cb, iree_hal_buffer_binding_table_empty()),
                  "executing command buffers");
 }
 
@@ -619,11 +616,12 @@
         "Source and buffer length must be less than the target buffer length "
         "and it does not. Please check allocations");
   }
-  CheckApiStatus(iree_hal_device_queue_copy(
-                     raw_ptr(), IREE_HAL_QUEUE_AFFINITY_ANY, wait_list,
-                     signal_list, source_buffer.raw_ptr(), 0,
-                     target_buffer.raw_ptr(), 0, source_length),
-                 "Copying buffer on queue");
+  CheckApiStatus(
+      iree_hal_device_queue_copy(
+          raw_ptr(), IREE_HAL_QUEUE_AFFINITY_ANY, wait_list, signal_list,
+          source_buffer.raw_ptr(), 0, target_buffer.raw_ptr(), 0, source_length,
+          IREE_HAL_COPY_FLAG_NONE),
+      "Copying buffer on queue");
 }
 
 py::object HalDevice::CreateDLPackCapsule(HalBufferView& buffer_view,
@@ -1729,7 +1727,8 @@
                     iree_hal_make_buffer_ref(source_buffer.raw_ptr(),
                                              source_offset, resolved_length),
                     iree_hal_make_buffer_ref(target_buffer.raw_ptr(),
-                                             target_offset, resolved_length)),
+                                             target_offset, resolved_length),
+                    IREE_HAL_COPY_FLAG_NONE),
                 "copy command");
             if (end) {
               CheckApiStatus(iree_hal_command_buffer_end(self.raw_ptr()),
@@ -1767,7 +1766,8 @@
                     self.raw_ptr(),
                     iree_hal_make_buffer_ref(target_buffer.raw_ptr(),
                                              target_offset, resolved_length),
-                    pattern_view.buf, pattern_view.len),
+                    pattern_view.buf, pattern_view.len,
+                    IREE_HAL_FILL_FLAG_NONE),
                 "command buffer fill");
             if (end) {
               CheckApiStatus(iree_hal_command_buffer_end(self.raw_ptr()),
diff --git a/runtime/bindings/python/iree/runtime/_binding.pyi b/runtime/bindings/python/iree/runtime/_binding.pyi
index 040b92f..b4ef2ba 100644
--- a/runtime/bindings/python/iree/runtime/_binding.pyi
+++ b/runtime/bindings/python/iree/runtime/_binding.pyi
@@ -185,7 +185,7 @@
     ) -> None: ...
     def queue_execute(
         self,
-        command_buffers: Sequence[HalCommandBuffer],
+        command_buffer: HalCommandBuffer,
         wait_semaphores: HalSemaphoreList,
         signal_semaphores: HalSemaphoreList,
     ) -> None: ...
diff --git a/runtime/bindings/python/tests/hal_test.py b/runtime/bindings/python/tests/hal_test.py
index 21079c9..348a628 100644
--- a/runtime/bindings/python/tests/hal_test.py
+++ b/runtime/bindings/python/tests/hal_test.py
@@ -463,7 +463,7 @@
 
         sem = self.device.create_semaphore(0)
         self.device.queue_execute(
-            [cb], wait_semaphores=[(sem, 0)], signal_semaphores=[(sem, 1)]
+            cb, wait_semaphores=[(sem, 0)], signal_semaphores=[(sem, 1)]
         )
         iree.runtime.HalFence.create_at(sem, 1).wait()
 
@@ -479,7 +479,7 @@
 
         sem = self.device.create_semaphore(0)
         self.device.queue_execute(
-            [cb],
+            cb,
             wait_semaphores=iree.runtime.HalFence.create_at(sem, 0),
             signal_semaphores=iree.runtime.HalFence.create_at(sem, 1),
         )
diff --git a/runtime/src/iree/hal/buffer_transfer.c b/runtime/src/iree/hal/buffer_transfer.c
index bb4780f..d4f30bb 100644
--- a/runtime/src/iree/hal/buffer_transfer.c
+++ b/runtime/src/iree/hal/buffer_transfer.c
@@ -78,7 +78,7 @@
     };
     status = iree_hal_device_queue_execute(
         device, IREE_HAL_QUEUE_AFFINITY_ANY, wait_semaphores, signal_semaphores,
-        1, &command_buffer, /*binding_tables=*/NULL);
+        command_buffer, iree_hal_buffer_binding_table_empty());
   }
   if (iree_status_is_ok(status)) {
     status = iree_hal_semaphore_wait(fence_semaphore, signal_value, timeout);
diff --git a/runtime/src/iree/hal/command_buffer.c b/runtime/src/iree/hal/command_buffer.c
index 44d767a..7f26263 100644
--- a/runtime/src/iree/hal/command_buffer.c
+++ b/runtime/src/iree/hal/command_buffer.c
@@ -405,25 +405,27 @@
   return status;
 }
 
-IREE_API_EXPORT iree_status_t iree_hal_command_buffer_discard_buffer(
-    iree_hal_command_buffer_t* command_buffer,
-    iree_hal_buffer_ref_t buffer_ref) {
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_advise_buffer(
+    iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_ref_t buffer_ref,
+    iree_hal_memory_advise_flags_t flags, uint64_t arg0, uint64_t arg1) {
   IREE_ASSERT_ARGUMENT(command_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
   IF_VALIDATING(command_buffer, {
     IREE_RETURN_AND_END_ZONE_IF_ERROR(
-        z0, iree_hal_command_buffer_discard_buffer_validation(
-                command_buffer, VALIDATION_STATE(command_buffer), buffer_ref));
+        z0, iree_hal_command_buffer_advise_buffer_validation(
+                command_buffer, VALIDATION_STATE(command_buffer), buffer_ref,
+                flags, arg0, arg1));
   });
-  iree_status_t status = _VTABLE_DISPATCH(command_buffer, discard_buffer)(
-      command_buffer, buffer_ref);
+  iree_status_t status = _VTABLE_DISPATCH(command_buffer, advise_buffer)(
+      command_buffer, buffer_ref, flags, arg0, arg1);
   IREE_TRACE_ZONE_END(z0);
   return status;
 }
 
 IREE_API_EXPORT iree_status_t iree_hal_command_buffer_fill_buffer(
     iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_ref_t target_ref,
-    const void* pattern, iree_host_size_t pattern_length) {
+    const void* pattern, iree_host_size_t pattern_length,
+    iree_hal_fill_flags_t flags) {
   IREE_ASSERT_ARGUMENT(command_buffer);
   if (target_ref.length == 0) {
     // No-op fill. All other validation is skipped.
@@ -434,17 +436,18 @@
     IREE_RETURN_AND_END_ZONE_IF_ERROR(
         z0, iree_hal_command_buffer_fill_buffer_validation(
                 command_buffer, VALIDATION_STATE(command_buffer), target_ref,
-                pattern, pattern_length));
+                pattern, pattern_length, flags));
   });
   iree_status_t status = _VTABLE_DISPATCH(command_buffer, fill_buffer)(
-      command_buffer, target_ref, pattern, pattern_length);
+      command_buffer, target_ref, pattern, pattern_length, flags);
   IREE_TRACE_ZONE_END(z0);
   return status;
 }
 
 IREE_API_EXPORT iree_status_t iree_hal_command_buffer_update_buffer(
     iree_hal_command_buffer_t* command_buffer, const void* source_buffer,
-    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref) {
+    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref,
+    iree_hal_update_flags_t flags) {
   IREE_ASSERT_ARGUMENT(command_buffer);
   IREE_ASSERT_ARGUMENT(source_buffer);
   if (target_ref.length == 0) {
@@ -456,17 +459,17 @@
     IREE_RETURN_AND_END_ZONE_IF_ERROR(
         z0, iree_hal_command_buffer_update_buffer_validation(
                 command_buffer, VALIDATION_STATE(command_buffer), source_buffer,
-                source_offset, target_ref));
+                source_offset, target_ref, flags));
   });
   iree_status_t status = _VTABLE_DISPATCH(command_buffer, update_buffer)(
-      command_buffer, source_buffer, source_offset, target_ref);
+      command_buffer, source_buffer, source_offset, target_ref, flags);
   IREE_TRACE_ZONE_END(z0);
   return status;
 }
 
 IREE_API_EXPORT iree_status_t iree_hal_command_buffer_copy_buffer(
     iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_ref_t source_ref,
-    iree_hal_buffer_ref_t target_ref) {
+    iree_hal_buffer_ref_t target_ref, iree_hal_copy_flags_t flags) {
   IREE_ASSERT_ARGUMENT(command_buffer);
   if (target_ref.length == 0) {
     // No-op copy. All other validation is skipped.
@@ -477,10 +480,10 @@
     IREE_RETURN_AND_END_ZONE_IF_ERROR(
         z0, iree_hal_command_buffer_copy_buffer_validation(
                 command_buffer, VALIDATION_STATE(command_buffer), source_ref,
-                target_ref));
+                target_ref, flags));
   });
   iree_status_t status = _VTABLE_DISPATCH(command_buffer, copy_buffer)(
-      command_buffer, source_ref, target_ref);
+      command_buffer, source_ref, target_ref, flags);
   IREE_TRACE_ZONE_END(z0);
   return status;
 }
@@ -589,7 +592,7 @@
 
 IREE_API_EXPORT iree_status_t iree_hal_command_buffer_validate_submission(
     iree_hal_command_buffer_t* command_buffer,
-    const iree_hal_buffer_binding_table_t* binding_table) {
+    iree_hal_buffer_binding_table_t binding_table) {
   IREE_ASSERT_ARGUMENT(command_buffer);
 
   // Validate the command buffer has been recorded properly.
@@ -604,17 +607,16 @@
   // the command buffer was allocated with.
   if (command_buffer->binding_count == 0) {
     return iree_ok_status();
-  } else if (!binding_table) {
+  } else if (binding_table.count == 0) {
     return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
                             "indirect command buffer requires at least %u "
                             "bindings but no binding table was provided",
                             command_buffer->binding_count);
-  } else if (binding_table->count < command_buffer->binding_count) {
+  } else if (binding_table.count < command_buffer->binding_count) {
     return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
                             "indirect command buffer requires at least %u "
                             "bindings but only %" PRIhsz " were provided ",
-                            command_buffer->binding_count,
-                            binding_table->count);
+                            command_buffer->binding_count, binding_table.count);
   }
 
   // Validate the binding table against the commands consuming them.
@@ -622,7 +624,7 @@
   // requested on the command buffer.
   IF_VALIDATING(command_buffer, {
     IREE_RETURN_IF_ERROR(iree_hal_command_buffer_binding_table_validation(
-        command_buffer, VALIDATION_STATE(command_buffer), *binding_table));
+        command_buffer, VALIDATION_STATE(command_buffer), binding_table));
   });
 
   return iree_ok_status();
@@ -658,7 +660,16 @@
                                        transfer_command->fill.target_offset,
                                        transfer_command->fill.length),
               transfer_command->fill.pattern,
-              transfer_command->fill.pattern_length);
+              transfer_command->fill.pattern_length, IREE_HAL_FILL_FLAG_NONE);
+          break;
+        case IREE_HAL_TRANSFER_COMMAND_TYPE_UPDATE:
+          status = iree_hal_command_buffer_update_buffer(
+              command_buffer, transfer_command->update.source_buffer,
+              transfer_command->update.source_offset,
+              iree_hal_make_buffer_ref(transfer_command->update.target_buffer,
+                                       transfer_command->update.target_offset,
+                                       transfer_command->update.length),
+              IREE_HAL_UPDATE_FLAG_NONE);
           break;
         case IREE_HAL_TRANSFER_COMMAND_TYPE_COPY:
           status = iree_hal_command_buffer_copy_buffer(
@@ -668,15 +679,8 @@
                                        transfer_command->copy.length),
               iree_hal_make_buffer_ref(transfer_command->copy.target_buffer,
                                        transfer_command->copy.target_offset,
-                                       transfer_command->copy.length));
-          break;
-        case IREE_HAL_TRANSFER_COMMAND_TYPE_UPDATE:
-          status = iree_hal_command_buffer_update_buffer(
-              command_buffer, transfer_command->update.source_buffer,
-              transfer_command->update.source_offset,
-              iree_hal_make_buffer_ref(transfer_command->update.target_buffer,
-                                       transfer_command->update.target_offset,
-                                       transfer_command->update.length));
+                                       transfer_command->copy.length),
+              IREE_HAL_COPY_FLAG_NONE);
           break;
         default:
           status =
diff --git a/runtime/src/iree/hal/command_buffer.h b/runtime/src/iree/hal/command_buffer.h
index 7d2cded..f15f74a 100644
--- a/runtime/src/iree/hal/command_buffer.h
+++ b/runtime/src/iree/hal/command_buffer.h
@@ -214,6 +214,34 @@
   iree_hal_buffer_ref_t buffer_ref;
 } iree_hal_buffer_barrier_t;
 
+// Bitfield indicating advice for implementations managing a buffer.
+typedef uint64_t iree_hal_memory_advise_flags_t;
+enum iree_hal_memory_advise_flag_bits_t {
+  IREE_HAL_MEMORY_ADVISE_FLAG_NONE = 0,
+  // TODO(benvanik): cache control operations (invalidate/flush). arg0/arg1
+  // could source/target queue affinities.
+  // TODO(benvanik): prefetch and access type hints.
+  // TODO(benvanik): ASAN hints (protect/unprotect).
+};
+
+// Bitfield specifying flags controlling a fill operation.
+typedef uint64_t iree_hal_fill_flags_t;
+enum iree_hal_fill_flag_bits_t {
+  IREE_HAL_FILL_FLAG_NONE = 0,
+};
+
+// Bitfield specifying flags controlling an update operation.
+typedef uint64_t iree_hal_update_flags_t;
+enum iree_hal_update_flag_bits_t {
+  IREE_HAL_UPDATE_FLAG_NONE = 0,
+};
+
+// Bitfield specifying flags controlling a copy operation.
+typedef uint64_t iree_hal_copy_flags_t;
+enum iree_hal_copy_flag_bits_t {
+  IREE_HAL_COPY_FLAG_NONE = 0,
+};
+
 // Specifies the type of collective operation.
 enum iree_hal_collective_kind_e {
   // Gathers N*|element_count| elements of the specified type in |recv_binding|
@@ -391,10 +419,10 @@
     iree_hal_collective_element_type_t element_type);
 
 // Bitfield specifying flags controlling a dispatch operation.
+typedef uint64_t iree_hal_dispatch_flags_t;
 enum iree_hal_dispatch_flag_bits_t {
   IREE_HAL_DISPATCH_FLAG_NONE = 0,
 };
-typedef uint64_t iree_hal_dispatch_flags_t;
 
 // An RGBA color.
 typedef struct iree_hal_label_color_t {
@@ -669,13 +697,12 @@
     iree_host_size_t buffer_barrier_count,
     const iree_hal_buffer_barrier_t* buffer_barriers);
 
-// Hints to the device queue that the given buffer will not be used again.
-// After encoding a discard the buffer contents will be considered undefined.
-// This is because the discard may be used to elide write backs to host memory
-// or aggressively reuse the allocation for other purposes.
-IREE_API_EXPORT iree_status_t iree_hal_command_buffer_discard_buffer(
-    iree_hal_command_buffer_t* command_buffer,
-    iree_hal_buffer_ref_t buffer_ref);
+// Advises the device about the usage of the given buffer.
+// The device may use this information to perform cache management or ignore it
+// entirely.
+IREE_API_EXPORT iree_status_t iree_hal_command_buffer_advise_buffer(
+    iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_ref_t buffer_ref,
+    iree_hal_memory_advise_flags_t flags, uint64_t arg0, uint64_t arg1);
 
 // Fills the target buffer with the given repeating value.
 // Expects that |pattern_length| is one of 1, 2, or 4 and that the offset and
@@ -684,7 +711,8 @@
 // device queue and be allocated with IREE_HAL_BUFFER_USAGE_TRANSFER.
 IREE_API_EXPORT iree_status_t iree_hal_command_buffer_fill_buffer(
     iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_ref_t target_ref,
-    const void* pattern, iree_host_size_t pattern_length);
+    const void* pattern, iree_host_size_t pattern_length,
+    iree_hal_fill_flags_t flags);
 
 // Updates a range of the given target buffer from the source host memory.
 // The source host memory is copied immediately into the command buffer and
@@ -697,7 +725,8 @@
 // device queue and be allocated with IREE_HAL_BUFFER_USAGE_TRANSFER.
 IREE_API_EXPORT iree_status_t iree_hal_command_buffer_update_buffer(
     iree_hal_command_buffer_t* command_buffer, const void* source_buffer,
-    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref);
+    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref,
+    iree_hal_update_flags_t flags);
 
 // Copies a range of one buffer to another.
 // Both buffers must be compatible with the devices owned by this device
@@ -709,7 +738,7 @@
 // copies.
 IREE_API_EXPORT iree_status_t iree_hal_command_buffer_copy_buffer(
     iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_ref_t source_ref,
-    iree_hal_buffer_ref_t target_ref);
+    iree_hal_buffer_ref_t target_ref, iree_hal_copy_flags_t flags);
 
 // Dispatches a collective operation defined by |op| using the given buffers.
 // |param| must be specified for operations that require a root/peer rank
@@ -763,7 +792,7 @@
 // are used by the command buffer are provided they will be ignored.
 IREE_API_EXPORT iree_status_t iree_hal_command_buffer_validate_submission(
     iree_hal_command_buffer_t* command_buffer,
-    const iree_hal_buffer_binding_table_t* binding_table);
+    iree_hal_buffer_binding_table_t binding_table);
 
 //===----------------------------------------------------------------------===//
 // Utilities for command buffer creation
@@ -773,10 +802,10 @@
 typedef enum iree_hal_transfer_command_type_t {
   // iree_hal_command_buffer_fill_buffer
   IREE_HAL_TRANSFER_COMMAND_TYPE_FILL = 0u,
-  // iree_hal_command_buffer_copy_buffer
-  IREE_HAL_TRANSFER_COMMAND_TYPE_COPY = 1u,
   // iree_hal_command_buffer_update_buffer
-  IREE_HAL_TRANSFER_COMMAND_TYPE_UPDATE = 2u,
+  IREE_HAL_TRANSFER_COMMAND_TYPE_UPDATE = 1u,
+  // iree_hal_command_buffer_copy_buffer
+  IREE_HAL_TRANSFER_COMMAND_TYPE_COPY = 2u,
 } iree_hal_transfer_command_type_t;
 
 // Represents a single transfer command within a batch of commands.
@@ -792,14 +821,6 @@
       const void* pattern;
       iree_host_size_t pattern_length;
     } fill;
-    // IREE_HAL_TRANSFER_COMMAND_TYPE_COPY
-    struct {
-      iree_hal_buffer_t* source_buffer;
-      iree_device_size_t source_offset;
-      iree_hal_buffer_t* target_buffer;
-      iree_device_size_t target_offset;
-      iree_device_size_t length;
-    } copy;
     // IREE_HAL_TRANSFER_COMMAND_TYPE_UPDATE
     struct {
       const void* source_buffer;
@@ -808,6 +829,14 @@
       iree_device_size_t target_offset;
       iree_device_size_t length;
     } update;
+    // IREE_HAL_TRANSFER_COMMAND_TYPE_COPY
+    struct {
+      iree_hal_buffer_t* source_buffer;
+      iree_device_size_t source_offset;
+      iree_hal_buffer_t* target_buffer;
+      iree_device_size_t target_offset;
+      iree_device_size_t length;
+    } copy;
   };
 } iree_hal_transfer_command_t;
 
@@ -872,22 +901,25 @@
       iree_host_size_t buffer_barrier_count,
       const iree_hal_buffer_barrier_t* buffer_barriers);
 
-  iree_status_t(IREE_API_PTR* discard_buffer)(
+  iree_status_t(IREE_API_PTR* advise_buffer)(
       iree_hal_command_buffer_t* command_buffer,
-      iree_hal_buffer_ref_t buffer_ref);
+      iree_hal_buffer_ref_t buffer_ref, iree_hal_memory_advise_flags_t flags,
+      uint64_t arg0, uint64_t arg1);
 
   iree_status_t(IREE_API_PTR* fill_buffer)(
       iree_hal_command_buffer_t* command_buffer,
       iree_hal_buffer_ref_t target_ref, const void* pattern,
-      iree_host_size_t pattern_length);
+      iree_host_size_t pattern_length, iree_hal_fill_flags_t flags);
 
   iree_status_t(IREE_API_PTR* update_buffer)(
       iree_hal_command_buffer_t* command_buffer, const void* source_buffer,
-      iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref);
+      iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref,
+      iree_hal_update_flags_t flags);
 
   iree_status_t(IREE_API_PTR* copy_buffer)(
       iree_hal_command_buffer_t* command_buffer,
-      iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref);
+      iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref,
+      iree_hal_copy_flags_t flags);
 
   iree_status_t(IREE_API_PTR* collective)(
       iree_hal_command_buffer_t* command_buffer, iree_hal_channel_t* channel,
diff --git a/runtime/src/iree/hal/command_buffer_validation.c b/runtime/src/iree/hal/command_buffer_validation.c
index 832e652..2d82932 100644
--- a/runtime/src/iree/hal/command_buffer_validation.c
+++ b/runtime/src/iree/hal/command_buffer_validation.c
@@ -331,10 +331,11 @@
   return iree_ok_status();
 }
 
-iree_status_t iree_hal_command_buffer_discard_buffer_validation(
+iree_status_t iree_hal_command_buffer_advise_buffer_validation(
     iree_hal_command_buffer_t* command_buffer,
     iree_hal_command_buffer_validation_state_t* validation_state,
-    iree_hal_buffer_ref_t buffer_ref) {
+    iree_hal_buffer_ref_t buffer_ref, iree_hal_memory_advise_flags_t flags,
+    uint64_t arg0, uint64_t arg1) {
   IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_categories(
       command_buffer, validation_state, IREE_HAL_COMMAND_CATEGORY_TRANSFER));
 
@@ -352,7 +353,7 @@
     iree_hal_command_buffer_t* command_buffer,
     iree_hal_command_buffer_validation_state_t* validation_state,
     iree_hal_buffer_ref_t target_ref, const void* pattern,
-    iree_host_size_t pattern_length) {
+    iree_host_size_t pattern_length, iree_hal_fill_flags_t flags) {
   IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_categories(
       command_buffer, validation_state, IREE_HAL_COMMAND_CATEGORY_TRANSFER));
 
@@ -392,7 +393,7 @@
     iree_hal_command_buffer_t* command_buffer,
     iree_hal_command_buffer_validation_state_t* validation_state,
     const void* source_buffer, iree_host_size_t source_offset,
-    iree_hal_buffer_ref_t target_ref) {
+    iree_hal_buffer_ref_t target_ref, iree_hal_update_flags_t flags) {
   IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_categories(
       command_buffer, validation_state, IREE_HAL_COMMAND_CATEGORY_TRANSFER));
 
@@ -412,7 +413,8 @@
 iree_status_t iree_hal_command_buffer_copy_buffer_validation(
     iree_hal_command_buffer_t* command_buffer,
     iree_hal_command_buffer_validation_state_t* validation_state,
-    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref) {
+    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref,
+    iree_hal_copy_flags_t flags) {
   IREE_RETURN_IF_ERROR(iree_hal_command_buffer_validate_categories(
       command_buffer, validation_state, IREE_HAL_COMMAND_CATEGORY_TRANSFER));
 
diff --git a/runtime/src/iree/hal/command_buffer_validation.h b/runtime/src/iree/hal/command_buffer_validation.h
index dee7bb4..5bae019 100644
--- a/runtime/src/iree/hal/command_buffer_validation.h
+++ b/runtime/src/iree/hal/command_buffer_validation.h
@@ -99,27 +99,29 @@
     iree_host_size_t buffer_barrier_count,
     const iree_hal_buffer_barrier_t* buffer_barriers);
 
-iree_status_t iree_hal_command_buffer_discard_buffer_validation(
+iree_status_t iree_hal_command_buffer_advise_buffer_validation(
     iree_hal_command_buffer_t* command_buffer,
     iree_hal_command_buffer_validation_state_t* validation_state,
-    iree_hal_buffer_ref_t buffer_ref);
+    iree_hal_buffer_ref_t buffer_ref, iree_hal_memory_advise_flags_t flags,
+    uint64_t arg0, uint64_t arg1);
 
 iree_status_t iree_hal_command_buffer_fill_buffer_validation(
     iree_hal_command_buffer_t* command_buffer,
     iree_hal_command_buffer_validation_state_t* validation_state,
     iree_hal_buffer_ref_t target_ref, const void* pattern,
-    iree_host_size_t pattern_length);
+    iree_host_size_t pattern_length, iree_hal_fill_flags_t flags);
 
 iree_status_t iree_hal_command_buffer_update_buffer_validation(
     iree_hal_command_buffer_t* command_buffer,
     iree_hal_command_buffer_validation_state_t* validation_state,
     const void* source_buffer, iree_host_size_t source_offset,
-    iree_hal_buffer_ref_t target_ref);
+    iree_hal_buffer_ref_t target_ref, iree_hal_update_flags_t flags);
 
 iree_status_t iree_hal_command_buffer_copy_buffer_validation(
     iree_hal_command_buffer_t* command_buffer,
     iree_hal_command_buffer_validation_state_t* validation_state,
-    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref);
+    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref,
+    iree_hal_copy_flags_t flags);
 
 iree_status_t iree_hal_command_buffer_collective_validation(
     iree_hal_command_buffer_t* command_buffer,
diff --git a/runtime/src/iree/hal/cts/command_buffer_copy_buffer_test.h b/runtime/src/iree/hal/cts/command_buffer_copy_buffer_test.h
index 70735c0..2cb1fef 100644
--- a/runtime/src/iree/hal/cts/command_buffer_copy_buffer_test.h
+++ b/runtime/src/iree/hal/cts/command_buffer_copy_buffer_test.h
@@ -69,7 +69,8 @@
       command_buffer, /*source_ref=*/
       iree_hal_make_buffer_ref(host_buffer, 0, kDefaultAllocationSize),
       /*target_ref=*/
-      iree_hal_make_buffer_ref(device_buffer, 0, kDefaultAllocationSize)));
+      iree_hal_make_buffer_ref(device_buffer, 0, kDefaultAllocationSize),
+      IREE_HAL_COPY_FLAG_NONE));
   IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer));
 
   IREE_ASSERT_OK(SubmitCommandBufferAndWait(command_buffer));
@@ -136,7 +137,7 @@
       command_buffer,
       iree_hal_make_buffer_ref(device_buffer, /*target_offset=*/0,
                                /*length=*/8),
-      &zero_val, /*pattern_length=*/sizeof(zero_val)));
+      &zero_val, /*pattern_length=*/sizeof(zero_val), IREE_HAL_FILL_FLAG_NONE));
   IREE_ASSERT_OK(iree_hal_command_buffer_copy_buffer(
       command_buffer,
       iree_hal_make_buffer_ref(/*source_buffer=*/host_buffer,
@@ -144,7 +145,8 @@
                                /*length=*/kDefaultAllocationSize / 2 - 4),
       iree_hal_make_buffer_ref(/*target_buffer=*/device_buffer,
                                /*target_offset=*/8,
-                               /*length=*/kDefaultAllocationSize / 2 - 4)));
+                               /*length=*/kDefaultAllocationSize / 2 - 4),
+      IREE_HAL_COPY_FLAG_NONE));
   IREE_ASSERT_OK(iree_hal_command_buffer_fill_buffer(
       command_buffer,
       iree_hal_make_buffer_ref(
@@ -153,7 +155,7 @@
           /*length=*/kDefaultAllocationSize -
               (8 + kDefaultAllocationSize / 2 - 4)),
       &zero_val,
-      /*pattern_length=*/sizeof(zero_val)));
+      /*pattern_length=*/sizeof(zero_val), IREE_HAL_FILL_FLAG_NONE));
   IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer));
 
   IREE_ASSERT_OK(SubmitCommandBufferAndWait(command_buffer));
@@ -223,7 +225,7 @@
       command_buffer,
       iree_hal_make_indirect_buffer_ref(kDeviceBufferSlot, /*offset=*/0,
                                         /*length=*/8),
-      &zero_val, /*pattern_length=*/sizeof(zero_val)));
+      &zero_val, /*pattern_length=*/sizeof(zero_val), IREE_HAL_FILL_FLAG_NONE));
   IREE_ASSERT_OK(iree_hal_command_buffer_copy_buffer(
       command_buffer,
       iree_hal_make_indirect_buffer_ref(
@@ -233,7 +235,8 @@
       iree_hal_make_indirect_buffer_ref(
           kDeviceBufferSlot,
           /*offset=*/8,
-          /*length=*/kDefaultAllocationSize / 2 - 4)));
+          /*length=*/kDefaultAllocationSize / 2 - 4),
+      IREE_HAL_COPY_FLAG_NONE));
   IREE_ASSERT_OK(iree_hal_command_buffer_fill_buffer(
       command_buffer,
       iree_hal_make_indirect_buffer_ref(
@@ -242,7 +245,7 @@
           /*length=*/kDefaultAllocationSize -
               (8 + kDefaultAllocationSize / 2 - 4)),
       &zero_val,
-      /*pattern_length=*/sizeof(zero_val)));
+      /*pattern_length=*/sizeof(zero_val), IREE_HAL_FILL_FLAG_NONE));
   IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer));
 
   const iree_hal_buffer_binding_t bindings[] = {
diff --git a/runtime/src/iree/hal/cts/command_buffer_fill_buffer_test.h b/runtime/src/iree/hal/cts/command_buffer_fill_buffer_test.h
index d5dc0a7..d28040c 100644
--- a/runtime/src/iree/hal/cts/command_buffer_fill_buffer_test.h
+++ b/runtime/src/iree/hal/cts/command_buffer_fill_buffer_test.h
@@ -58,7 +58,7 @@
     IREE_CHECK_OK(iree_hal_command_buffer_fill_buffer(
         command_buffer,
         iree_hal_make_buffer_ref(device_buffer, target_offset, fill_length),
-        pattern, pattern_length));
+        pattern, pattern_length, IREE_HAL_FILL_FLAG_NONE));
     IREE_CHECK_OK(iree_hal_command_buffer_end(command_buffer));
     IREE_CHECK_OK(SubmitCommandBufferAndWait(command_buffer));
 
@@ -94,7 +94,7 @@
     IREE_CHECK_OK(iree_hal_command_buffer_fill_buffer(
         command_buffer,
         iree_hal_make_buffer_ref(device_buffer, target_offset, fill_length),
-        pattern, pattern_length));
+        pattern, pattern_length, IREE_HAL_FILL_FLAG_NONE));
     IREE_CHECK_OK(iree_hal_command_buffer_end(command_buffer));
     IREE_CHECK_OK(SubmitCommandBufferAndWait(command_buffer));
 
diff --git a/runtime/src/iree/hal/cts/command_buffer_update_buffer_test.h b/runtime/src/iree/hal/cts/command_buffer_update_buffer_test.h
index 55f9b64..0ba96e4 100644
--- a/runtime/src/iree/hal/cts/command_buffer_update_buffer_test.h
+++ b/runtime/src/iree/hal/cts/command_buffer_update_buffer_test.h
@@ -43,7 +43,8 @@
   IREE_CHECK_OK(iree_hal_command_buffer_update_buffer(
       command_buffer,
       /*source_buffer=*/source_buffer.data(), /*source_offset=*/0,
-      iree_hal_make_buffer_ref(device_buffer, 0, target_buffer_size)));
+      iree_hal_make_buffer_ref(device_buffer, 0, target_buffer_size),
+      IREE_HAL_UPDATE_FLAG_NONE));
   IREE_CHECK_OK(iree_hal_command_buffer_end(command_buffer));
   IREE_CHECK_OK(SubmitCommandBufferAndWait(command_buffer));
 
@@ -81,7 +82,8 @@
       command_buffer,
       /*source_buffer=*/source_buffer.data(), /*source_offset=*/4,
       iree_hal_make_buffer_ref(device_buffer,
-                               /*target_offset=*/4, /*length=*/8)));
+                               /*target_offset=*/4, /*length=*/8),
+      IREE_HAL_UPDATE_FLAG_NONE));
   IREE_CHECK_OK(iree_hal_command_buffer_end(command_buffer));
   IREE_CHECK_OK(SubmitCommandBufferAndWait(command_buffer));
 
@@ -129,7 +131,8 @@
       command_buffer,
       /*source_buffer=*/source_buffer.data(), /*source_offset=*/4,
       iree_hal_make_buffer_ref(buffer_subspan,
-                               /*target_offset=*/4, /*length=*/4)));
+                               /*target_offset=*/4, /*length=*/4),
+      IREE_HAL_UPDATE_FLAG_NONE));
   IREE_CHECK_OK(iree_hal_command_buffer_end(command_buffer));
   IREE_CHECK_OK(SubmitCommandBufferAndWait(command_buffer));
 
diff --git a/runtime/src/iree/hal/cts/cts_test_base.h b/runtime/src/iree/hal/cts/cts_test_base.h
index 8799817..c388e37 100644
--- a/runtime/src/iree/hal/cts/cts_test_base.h
+++ b/runtime/src/iree/hal/cts/cts_test_base.h
@@ -219,15 +219,6 @@
       iree_hal_command_buffer_t* command_buffer,
       iree_hal_buffer_binding_table_t binding_table =
           iree_hal_buffer_binding_table_empty()) {
-    return SubmitCommandBuffersAndWait(1, &command_buffer, &binding_table);
-  }
-
-  // Submits |command_buffers| to the device and waits for them to complete
-  // before returning.
-  iree_status_t SubmitCommandBuffersAndWait(
-      iree_host_size_t command_buffer_count,
-      iree_hal_command_buffer_t** command_buffers,
-      const iree_hal_buffer_binding_table_t* binding_tables = nullptr) {
     // No wait semaphores.
     iree_hal_semaphore_list_t wait_semaphores = iree_hal_semaphore_list_empty();
 
@@ -244,8 +235,7 @@
 
     iree_status_t status = iree_hal_device_queue_execute(
         device_, IREE_HAL_QUEUE_AFFINITY_ANY, wait_semaphores,
-        signal_semaphores, command_buffer_count, command_buffers,
-        binding_tables);
+        signal_semaphores, command_buffer, binding_table);
     if (iree_status_is_ok(status)) {
       status = iree_hal_semaphore_wait(signal_semaphore, target_payload_value,
                                        iree_infinite_timeout());
diff --git a/runtime/src/iree/hal/cts/event_test.h b/runtime/src/iree/hal/cts/event_test.h
index 01d56e6..c936f59 100644
--- a/runtime/src/iree/hal/cts/event_test.h
+++ b/runtime/src/iree/hal/cts/event_test.h
@@ -85,12 +85,8 @@
       /*buffer_barriers=*/NULL));
   IREE_ASSERT_OK(iree_hal_command_buffer_end(command_buffer_2));
 
-  iree_hal_command_buffer_t* command_buffer_ptrs[] = {
-      command_buffer_1,
-      command_buffer_2,
-  };
-  IREE_ASSERT_OK(SubmitCommandBuffersAndWait(
-      IREE_ARRAYSIZE(command_buffer_ptrs), command_buffer_ptrs));
+  IREE_ASSERT_OK(SubmitCommandBufferAndWait(command_buffer_1));
+  IREE_ASSERT_OK(SubmitCommandBufferAndWait(command_buffer_2));
 
   iree_hal_command_buffer_release(command_buffer_1);
   iree_hal_command_buffer_release(command_buffer_2);
diff --git a/runtime/src/iree/hal/cts/file_test.h b/runtime/src/iree/hal/cts/file_test.h
index 5900977..9c8c8f9 100644
--- a/runtime/src/iree/hal/cts/file_test.h
+++ b/runtime/src/iree/hal/cts/file_test.h
@@ -111,7 +111,7 @@
       iree_hal_fence_semaphore_list(wait_fence),
       iree_hal_fence_semaphore_list(signal_fence), /*source_file=*/file,
       /*source_offset=*/0, /*target_buffer=*/buffer, /*target_offset=*/0,
-      /*length=*/file_size, /*flags=*/0));
+      /*length=*/file_size, IREE_HAL_READ_FLAG_NONE));
 
   IREE_ASSERT_OK(iree_hal_fence_wait(signal_fence, iree_infinite_timeout()));
   iree_hal_fence_release(wait_fence);
diff --git a/runtime/src/iree/hal/cts/semaphore_submission_test.h b/runtime/src/iree/hal/cts/semaphore_submission_test.h
index b745761..0943681 100644
--- a/runtime/src/iree/hal/cts/semaphore_submission_test.h
+++ b/runtime/src/iree/hal/cts/semaphore_submission_test.h
@@ -56,7 +56,7 @@
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_,
       /*queue_affinity=*/0, iree_hal_semaphore_list_empty(), signal_semaphores,
-      1, &command_buffer, /*binding_tables=*/NULL));
+      command_buffer, iree_hal_buffer_binding_table_empty()));
   IREE_ASSERT_OK(
       iree_hal_semaphore_wait(signal_semaphore, 1, iree_infinite_timeout()));
 
@@ -88,8 +88,8 @@
 
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_,
-      /*queue_affinity=*/0, wait_semaphores, signal_semaphores, 1,
-      &command_buffer, /*binding_tables=*/NULL));
+      /*queue_affinity=*/0, wait_semaphores, signal_semaphores, command_buffer,
+      iree_hal_buffer_binding_table_empty()));
 
   // Work shouldn't start until the wait semaphore reaches its payload value.
   CheckSemaphoreValue(signal_semaphore, 100);
@@ -131,8 +131,8 @@
 
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_,
-      /*queue_affinity=*/0, wait_semaphores, signal_semaphores, 1,
-      &command_buffer, /*binding_tables=*/NULL));
+      /*queue_affinity=*/0, wait_semaphores, signal_semaphores, command_buffer,
+      iree_hal_buffer_binding_table_empty()));
 
   // Work shouldn't start until all wait semaphores reach their payload values.
   CheckSemaphoreValue(signal_semaphore_1, 0);
@@ -177,7 +177,8 @@
   // Dispatch the device command buffer to have it wait.
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_, IREE_HAL_QUEUE_AFFINITY_ANY, device_wait_semaphores,
-      device_signal_semaphores, 1, &command_buffer, /*binding_tables=*/NULL));
+      device_signal_semaphores, command_buffer,
+      iree_hal_buffer_binding_table_empty()));
 
   // Start another thread and have it wait.
   std::thread thread([&]() {
@@ -241,7 +242,8 @@
   // Dispatch the device command buffer to have it wait.
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_, IREE_HAL_QUEUE_AFFINITY_ANY, device_wait_semaphores,
-      device_signal_semaphores, 1, &command_buffer, /*binding_tables=*/NULL));
+      device_signal_semaphores, command_buffer,
+      iree_hal_buffer_binding_table_empty()));
 
   // Start another thread and have it wait.
   std::thread thread([&]() {
@@ -309,7 +311,8 @@
   // Dispatch the device command buffer to have it wait.
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_, IREE_HAL_QUEUE_AFFINITY_ANY, device_wait_semaphores,
-      device_signal_semaphores, 1, &command_buffer, /*binding_tables=*/NULL));
+      device_signal_semaphores, command_buffer,
+      iree_hal_buffer_binding_table_empty()));
 
   // Start another thread and have it wait.
   std::thread thread([&]() {
@@ -380,8 +383,8 @@
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_, IREE_HAL_QUEUE_AFFINITY_ANY,
       /*wait_semaphore_list=*/semaphore1_list,
-      /*signal_semaphore_list=*/semaphore2_list, 1, &command_buffer2,
-      /*binding_tables=*/NULL));
+      /*signal_semaphore_list=*/semaphore2_list, command_buffer2,
+      iree_hal_buffer_binding_table_empty()));
 
   // Make sure that the intermediate and second semaphores have not advanced
   // since only command_buffer2 is queued.
@@ -394,8 +397,8 @@
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_, IREE_HAL_QUEUE_AFFINITY_ANY,
       /*wait_semaphore_list=*/command_buffer1_wait_semaphore_list,
-      /*signal_semaphore_list=*/semaphore1_list, 1, &command_buffer1,
-      /*binding_tables=*/NULL));
+      /*signal_semaphore_list=*/semaphore1_list, command_buffer1,
+      iree_hal_buffer_binding_table_empty()));
 
   // Wait on the intermediate semaphore and check its value.
   IREE_ASSERT_OK(
@@ -449,18 +452,18 @@
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_, IREE_HAL_QUEUE_AFFINITY_ANY,
       /*wait_semaphore_list=*/semaphore11_list,
-      /*signal_semaphore_list=*/semaphore22_list, 1, &command_buffer22,
-      /*binding_tables=*/NULL));
+      /*signal_semaphore_list=*/semaphore22_list, command_buffer22,
+      iree_hal_buffer_binding_table_empty()));
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_, IREE_HAL_QUEUE_AFFINITY_ANY,
       /*wait_semaphore_list=*/semaphore11_list,
-      /*signal_semaphore_list=*/semaphore21_list, 1, &command_buffer21,
-      /*binding_tables=*/NULL));
+      /*signal_semaphore_list=*/semaphore21_list, command_buffer21,
+      iree_hal_buffer_binding_table_empty()));
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_, IREE_HAL_QUEUE_AFFINITY_ANY,
       /*wait_semaphore_list=*/empty_semaphore_list,
-      /*signal_semaphore_list=*/empty_semaphore_list, 1, &command_buffer12,
-      /*binding_tables=*/NULL));
+      /*signal_semaphore_list=*/empty_semaphore_list, command_buffer12,
+      iree_hal_buffer_binding_table_empty()));
 
   // Assert that semaphores have not advance since we have not yet submitted
   // command_buffer11.
@@ -472,8 +475,8 @@
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_, IREE_HAL_QUEUE_AFFINITY_ANY,
       /*wait_semaphore_list=*/empty_semaphore_list,
-      /*signal_semaphore_list=*/semaphore11_list, 1, &command_buffer11,
-      /*binding_tables=*/NULL));
+      /*signal_semaphore_list=*/semaphore11_list, command_buffer11,
+      iree_hal_buffer_binding_table_empty()));
 
   // Wait and check that semaphore values have advanced.
   IREE_ASSERT_OK(
@@ -544,14 +547,14 @@
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_, IREE_HAL_QUEUE_AFFINITY_ANY,
       /*wait_semaphore_list=*/command_buffer22_semaphore_wait_list,
-      /*signal_semaphore_list=*/command_buffer22_signal_list, 1,
-      &command_buffer22, /*binding_tables=*/NULL));
+      /*signal_semaphore_list=*/command_buffer22_signal_list, command_buffer22,
+      iree_hal_buffer_binding_table_empty()));
   // We submit the command buffers in reverse order.
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_, IREE_HAL_QUEUE_AFFINITY_ANY,
       /*wait_semaphore_list=*/command_buffer21_semaphore_wait_list,
-      /*signal_semaphore_list=*/command_buffer21_signal_list, 1,
-      &command_buffer21, /*binding_tables=*/NULL));
+      /*signal_semaphore_list=*/command_buffer21_signal_list, command_buffer21,
+      iree_hal_buffer_binding_table_empty()));
 
   // Semaphores have not advance since we have not yet submitted
   // command_buffer11.
@@ -562,8 +565,8 @@
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_, IREE_HAL_QUEUE_AFFINITY_ANY,
       /*wait_semaphore_list=*/command_buffer11_semaphore_wait_list,
-      /*signal_semaphore_list=*/command_buffer11_semaphore_signal_list, 1,
-      &command_buffer11, /*binding_tables=*/NULL));
+      /*signal_semaphore_list=*/command_buffer11_semaphore_signal_list,
+      command_buffer11, iree_hal_buffer_binding_table_empty()));
 
   // Wait and check that semaphore values have advanced.
   IREE_ASSERT_OK(
@@ -620,8 +623,8 @@
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_, IREE_HAL_QUEUE_AFFINITY_ANY,
       /*wait_semaphore_list=*/command_buffer2_wait_list,
-      /*signal_semaphore_list=*/command_buffer2_signal_list, 1,
-      &command_buffer2, /*binding_tables=*/NULL));
+      /*signal_semaphore_list=*/command_buffer2_signal_list, command_buffer2,
+      iree_hal_buffer_binding_table_empty()));
 
   // semaphore3 must not have advanced, because it depends on semaphore1 and
   // semaphore2, which have not been signaled yet.
@@ -635,8 +638,8 @@
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_, IREE_HAL_QUEUE_AFFINITY_ANY,
       /*wait_semaphore_list=*/command_buffer1_wait_list,
-      /*signal_semaphore_list=*/command_buffer1_signal_list, 1,
-      &command_buffer1, /*binding_tables=*/NULL));
+      /*signal_semaphore_list=*/command_buffer1_signal_list, command_buffer1,
+      iree_hal_buffer_binding_table_empty()));
 
   // semaphore3 must not have advanced still, because it depends on semaphore2,
   // which has not been signaled yet.
@@ -692,8 +695,8 @@
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_, IREE_HAL_QUEUE_AFFINITY_ANY,
       /*wait_semaphore_list=*/command_buffer2_wait_list,
-      /*signal_semaphore_list=*/command_buffer2_signal_list, 1,
-      &command_buffer2, /*binding_tables=*/NULL));
+      /*signal_semaphore_list=*/command_buffer2_signal_list, command_buffer2,
+      iree_hal_buffer_binding_table_empty()));
 
   // Semaphores have not advance since we have not yet submitted
   // command_buffer1.
@@ -730,8 +733,8 @@
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_, IREE_HAL_QUEUE_AFFINITY_ANY,
       /*wait_semaphore_list=*/command_buffer1_wait_list,
-      /*signal_semaphore_list=*/command_buffer1_signal_list, 1,
-      &command_buffer1, /*binding_tables=*/NULL));
+      /*signal_semaphore_list=*/command_buffer1_signal_list, command_buffer1,
+      iree_hal_buffer_binding_table_empty()));
 
   thread11.join();
   thread12.join();
@@ -780,8 +783,8 @@
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_, IREE_HAL_QUEUE_AFFINITY_ANY,
       /*wait_semaphore_list=*/command_buffer_wait_list,
-      /*signal_semaphore_list=*/command_buffer_signal_list, 1, &command_buffer,
-      /*binding_tables=*/NULL));
+      /*signal_semaphore_list=*/command_buffer_signal_list, command_buffer,
+      iree_hal_buffer_binding_table_empty()));
 
   IREE_ASSERT_OK(
       iree_hal_semaphore_wait(semaphore2, semaphore2_signal_value,
@@ -822,8 +825,8 @@
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_, IREE_HAL_QUEUE_AFFINITY_ANY,
       /*wait_semaphore_list=*/command_buffer_wait_list,
-      /*signal_semaphore_list=*/command_buffer_signal_list, 1, &command_buffer,
-      /*binding_tables=*/NULL));
+      /*signal_semaphore_list=*/command_buffer_signal_list, command_buffer,
+      iree_hal_buffer_binding_table_empty()));
 
   std::thread signal_thread(
       [&]() { IREE_ASSERT_OK(iree_hal_semaphore_signal(semaphore1, 2)); });
@@ -867,8 +870,8 @@
   IREE_ASSERT_OK(iree_hal_device_queue_execute(
       device_, IREE_HAL_QUEUE_AFFINITY_ANY,
       /*wait_semaphore_list=*/command_buffer_wait_list,
-      /*signal_semaphore_list=*/command_buffer_signal_list, 1, &command_buffer,
-      /*binding_tables=*/NULL));
+      /*signal_semaphore_list=*/command_buffer_signal_list, command_buffer,
+      iree_hal_buffer_binding_table_empty()));
 
   iree_status_t status =
       iree_make_status(IREE_STATUS_CANCELLED, "PropagateFailSignal test.");
diff --git a/runtime/src/iree/hal/device.c b/runtime/src/iree/hal/device.c
index 7ae9abb..f2b7d78 100644
--- a/runtime/src/iree/hal/device.c
+++ b/runtime/src/iree/hal/device.c
@@ -122,13 +122,13 @@
   return status;
 }
 
-IREE_API_EXPORT iree_status_t iree_hal_device_queue_fill(
+IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_fill(
     iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
     const iree_hal_semaphore_list_t wait_semaphore_list,
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
     iree_device_size_t length, const void* pattern,
-    iree_host_size_t pattern_length) {
+    iree_host_size_t pattern_length, iree_hal_fill_flags_t flags) {
   IREE_ASSERT_ARGUMENT(device);
   IREE_ASSERT_ARGUMENT(target_buffer);
   IREE_ASSERT_ARGUMENT(pattern);
@@ -162,8 +162,8 @@
                                                   &command_buffer));
 
   iree_status_t status = iree_hal_device_queue_execute(
-      device, queue_affinity, wait_semaphore_list, signal_semaphore_list, 1,
-      &command_buffer, /*binding_tables=*/NULL);
+      device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
+      command_buffer, iree_hal_buffer_binding_table_empty());
 
   iree_hal_command_buffer_release(command_buffer);
 
@@ -171,13 +171,123 @@
   return status;
 }
 
-IREE_API_EXPORT iree_status_t iree_hal_device_queue_copy(
+IREE_API_EXPORT iree_status_t iree_hal_device_queue_fill(
+    iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
+    const iree_hal_semaphore_list_t wait_semaphore_list,
+    const iree_hal_semaphore_list_t signal_semaphore_list,
+    iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+    iree_device_size_t length, const void* pattern,
+    iree_host_size_t pattern_length, iree_hal_fill_flags_t flags) {
+  IREE_ASSERT_ARGUMENT(device);
+  IREE_ASSERT_ARGUMENT(
+      !wait_semaphore_list.count ||
+      (wait_semaphore_list.semaphores && wait_semaphore_list.payload_values));
+  IREE_ASSERT_ARGUMENT(!signal_semaphore_list.count ||
+                       (signal_semaphore_list.semaphores &&
+                        signal_semaphore_list.payload_values));
+  IREE_ASSERT_ARGUMENT(pattern);
+  IREE_ASSERT_ARGUMENT(target_buffer);
+  IREE_TRACE_ZONE_BEGIN(z0);
+  IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)length);
+  iree_status_t status = _VTABLE_DISPATCH(device, queue_fill)(
+      device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
+      target_buffer, target_offset, length, pattern, pattern_length, flags);
+  IREE_TRACE_ZONE_END(z0);
+  return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_update(
+    iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
+    const iree_hal_semaphore_list_t wait_semaphore_list,
+    const iree_hal_semaphore_list_t signal_semaphore_list,
+    const void* source_buffer, iree_host_size_t source_offset,
+    iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+    iree_device_size_t length, iree_hal_update_flags_t flags) {
+  IREE_ASSERT_ARGUMENT(device);
+  IREE_ASSERT_ARGUMENT(source_buffer);
+  IREE_ASSERT_ARGUMENT(target_buffer);
+  IREE_TRACE_ZONE_BEGIN(z0);
+  IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)length);
+
+  // If we are starting execution immediately then we can reduce latency by
+  // allowing inline command buffer execution.
+  iree_hal_command_buffer_mode_t command_buffer_mode =
+      IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT;
+  if (wait_semaphore_list.count == 0) {
+    command_buffer_mode |= IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION;
+  }
+
+  // TODO(benvanik): support splitting the update into multiple chunks to fit
+  // under the max command buffer update size limit. This provisional API is
+  // intended only for updating dispatch parameters today.
+  if (length > UINT16_MAX) {
+    return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+                            "queue buffer updates currently limited to 64KB, "
+                            "tried to update %" PRIhsz " bytes",
+                            length);
+  }
+
+  iree_hal_transfer_command_t command = {
+      .type = IREE_HAL_TRANSFER_COMMAND_TYPE_UPDATE,
+      .update =
+          {
+              .source_buffer = source_buffer,
+              .source_offset = source_offset,
+              .target_buffer = target_buffer,
+              .target_offset = target_offset,
+              .length = length,
+          },
+  };
+
+  iree_hal_command_buffer_t* command_buffer = NULL;
+  IREE_RETURN_AND_END_ZONE_IF_ERROR(
+      z0, iree_hal_create_transfer_command_buffer(device, command_buffer_mode,
+                                                  queue_affinity, 1, &command,
+                                                  &command_buffer));
+
+  iree_status_t status = iree_hal_device_queue_execute(
+      device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
+      command_buffer, iree_hal_buffer_binding_table_empty());
+
+  iree_hal_command_buffer_release(command_buffer);
+
+  IREE_TRACE_ZONE_END(z0);
+  return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_device_queue_update(
+    iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
+    const iree_hal_semaphore_list_t wait_semaphore_list,
+    const iree_hal_semaphore_list_t signal_semaphore_list,
+    const void* source_buffer, iree_host_size_t source_offset,
+    iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+    iree_device_size_t length, iree_hal_update_flags_t flags) {
+  IREE_ASSERT_ARGUMENT(device);
+  IREE_ASSERT_ARGUMENT(
+      !wait_semaphore_list.count ||
+      (wait_semaphore_list.semaphores && wait_semaphore_list.payload_values));
+  IREE_ASSERT_ARGUMENT(!signal_semaphore_list.count ||
+                       (signal_semaphore_list.semaphores &&
+                        signal_semaphore_list.payload_values));
+  IREE_ASSERT_ARGUMENT(source_buffer);
+  IREE_ASSERT_ARGUMENT(target_buffer);
+  IREE_TRACE_ZONE_BEGIN(z0);
+  IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)length);
+  iree_status_t status = _VTABLE_DISPATCH(device, queue_update)(
+      device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
+      source_buffer, source_offset, target_buffer, target_offset, length,
+      flags);
+  IREE_TRACE_ZONE_END(z0);
+  return status;
+}
+
+IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_copy(
     iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
     const iree_hal_semaphore_list_t wait_semaphore_list,
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
     iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
-    iree_device_size_t length) {
+    iree_device_size_t length, iree_hal_copy_flags_t flags) {
   IREE_ASSERT_ARGUMENT(device);
   IREE_ASSERT_ARGUMENT(source_buffer);
   IREE_ASSERT_ARGUMENT(target_buffer);
@@ -211,8 +321,8 @@
                                                   &command_buffer));
 
   iree_status_t status = iree_hal_device_queue_execute(
-      device, queue_affinity, wait_semaphore_list, signal_semaphore_list, 1,
-      &command_buffer, /*binding_tables=*/NULL);
+      device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
+      command_buffer, iree_hal_buffer_binding_table_empty());
 
   iree_hal_command_buffer_release(command_buffer);
 
@@ -220,13 +330,39 @@
   return status;
 }
 
+IREE_API_EXPORT iree_status_t iree_hal_device_queue_copy(
+    iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
+    const iree_hal_semaphore_list_t wait_semaphore_list,
+    const iree_hal_semaphore_list_t signal_semaphore_list,
+    iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
+    iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+    iree_device_size_t length, iree_hal_copy_flags_t flags) {
+  IREE_ASSERT_ARGUMENT(device);
+  IREE_ASSERT_ARGUMENT(
+      !wait_semaphore_list.count ||
+      (wait_semaphore_list.semaphores && wait_semaphore_list.payload_values));
+  IREE_ASSERT_ARGUMENT(!signal_semaphore_list.count ||
+                       (signal_semaphore_list.semaphores &&
+                        signal_semaphore_list.payload_values));
+  IREE_ASSERT_ARGUMENT(source_buffer);
+  IREE_ASSERT_ARGUMENT(target_buffer);
+  IREE_TRACE_ZONE_BEGIN(z0);
+  IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)length);
+  iree_status_t status = _VTABLE_DISPATCH(device, queue_copy)(
+      device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
+      source_buffer, source_offset, target_buffer, target_offset, length,
+      flags);
+  IREE_TRACE_ZONE_END(z0);
+  return status;
+}
+
 IREE_API_EXPORT iree_status_t iree_hal_device_queue_read(
     iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
     const iree_hal_semaphore_list_t wait_semaphore_list,
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_file_t* source_file, uint64_t source_offset,
     iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
-    iree_device_size_t length, uint32_t flags) {
+    iree_device_size_t length, iree_hal_read_flags_t flags) {
   IREE_ASSERT_ARGUMENT(device);
   IREE_ASSERT_ARGUMENT(
       !wait_semaphore_list.count ||
@@ -250,7 +386,7 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
     iree_hal_file_t* target_file, uint64_t target_offset,
-    iree_device_size_t length, uint32_t flags) {
+    iree_device_size_t length, iree_hal_write_flags_t flags) {
   IREE_ASSERT_ARGUMENT(device);
   IREE_ASSERT_ARGUMENT(
       !wait_semaphore_list.count ||
@@ -272,9 +408,8 @@
     iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
     const iree_hal_semaphore_list_t wait_semaphore_list,
     const iree_hal_semaphore_list_t signal_semaphore_list,
-    iree_host_size_t command_buffer_count,
-    iree_hal_command_buffer_t* const* command_buffers,
-    iree_hal_buffer_binding_table_t const* binding_tables) {
+    iree_hal_command_buffer_t* command_buffer,
+    iree_hal_buffer_binding_table_t binding_table) {
   IREE_ASSERT_ARGUMENT(device);
   IREE_ASSERT_ARGUMENT(
       !wait_semaphore_list.count ||
@@ -282,44 +417,39 @@
   IREE_ASSERT_ARGUMENT(!signal_semaphore_list.count ||
                        (signal_semaphore_list.semaphores &&
                         signal_semaphore_list.payload_values));
-  IREE_ASSERT_ARGUMENT(!command_buffer_count || command_buffers);
   IREE_TRACE_ZONE_BEGIN(z0);
 
   // TODO(benvanik): move into devices instead? then a synchronous/inline device
   // could assert the waits are resolved instead of blanket failing on an
   // already-resolved semaphore. This would make using stream-ordered
   // allocations easier.
-  for (iree_host_size_t i = 0; i < command_buffer_count; ++i) {
-    if (wait_semaphore_list.count > 0 &&
-        iree_all_bits_set(
-            iree_hal_command_buffer_mode(command_buffers[i]),
-            IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION)) {
-      // Inline command buffers are not allowed to wait (as they could have
-      // already been executed!). This is a requirement of the API so we
-      // validate it across all backends even if they don't support inline
-      // execution and ignore it.
-      IREE_TRACE_ZONE_END(z0);
-      return iree_make_status(
-          IREE_STATUS_INVALID_ARGUMENT,
-          "inline command buffer submitted with a wait; inline command "
-          "buffers must be ready to execute immediately");
-    }
+  if (wait_semaphore_list.count > 0 && command_buffer &&
+      iree_all_bits_set(iree_hal_command_buffer_mode(command_buffer),
+                        IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION)) {
+    // Inline command buffers are not allowed to wait (as they could have
+    // already been executed!). This is a requirement of the API so we
+    // validate it across all backends even if they don't support inline
+    // execution and ignore it.
+    IREE_TRACE_ZONE_END(z0);
+    return iree_make_status(
+        IREE_STATUS_INVALID_ARGUMENT,
+        "inline command buffer submitted with a wait; inline command "
+        "buffers must be ready to execute immediately");
   }
 
   // Validate command buffer bindings against the provided binding tables.
   // This will error out if a binding table is required but not provided or if
   // any binding in the table does not match the requirements of the command
   // buffer as recorded.
-  for (iree_host_size_t i = 0; i < command_buffer_count; ++i) {
+  if (command_buffer) {
     IREE_RETURN_AND_END_ZONE_IF_ERROR(
-        z0,
-        iree_hal_command_buffer_validate_submission(
-            command_buffers[i], binding_tables ? &binding_tables[i] : NULL));
+        z0, iree_hal_command_buffer_validate_submission(command_buffer,
+                                                        binding_table));
   }
 
   iree_status_t status = _VTABLE_DISPATCH(device, queue_execute)(
       device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
-      command_buffer_count, command_buffers, binding_tables);
+      command_buffer, binding_table);
 
   IREE_TRACE_ZONE_END(z0);
   return status;
@@ -331,9 +461,9 @@
     const iree_hal_semaphore_list_t signal_semaphore_list) {
   IREE_ASSERT_ARGUMENT(device);
   IREE_TRACE_ZONE_BEGIN(z0);
-  iree_status_t status =
-      iree_hal_device_queue_execute(device, queue_affinity, wait_semaphore_list,
-                                    signal_semaphore_list, 0, NULL, NULL);
+  iree_status_t status = iree_hal_device_queue_execute(
+      device, queue_affinity, wait_semaphore_list, signal_semaphore_list, NULL,
+      iree_hal_buffer_binding_table_empty());
   IREE_TRACE_ZONE_END(z0);
   return status;
 }
diff --git a/runtime/src/iree/hal/device.h b/runtime/src/iree/hal/device.h
index 82aac60..4511fa7 100644
--- a/runtime/src/iree/hal/device.h
+++ b/runtime/src/iree/hal/device.h
@@ -42,6 +42,7 @@
 // request of the calling application. Note that certain features may disable
 // runtime optimizations or require compilation flags to ensure the required
 // metadata is present in executables.
+typedef uint64_t iree_hal_device_feature_t;
 enum iree_hal_device_feature_bits_t {
   IREE_HAL_DEVICE_FEATURE_NONE = 0u,
 
@@ -67,7 +68,6 @@
   // partial embedded debug information to allow mapping back to source offsets.
   IREE_HAL_DEVICE_FEATURE_SUPPORTS_PROFILING = 1u << 2,
 };
-typedef uint32_t iree_hal_device_feature_t;
 
 // Describes an enumerated HAL device.
 typedef struct iree_hal_device_info_t {
@@ -81,6 +81,7 @@
 
 // Defines what information is captured during profiling.
 // Not all implementations will support all modes.
+typedef uint64_t iree_hal_device_profiling_mode_t;
 enum iree_hal_device_profiling_mode_bits_t {
   IREE_HAL_DEVICE_PROFILING_MODE_NONE = 0u,
 
@@ -98,7 +99,6 @@
   // be used when investigating the performance of an individual dispatch.
   IREE_HAL_DEVICE_PROFILING_MODE_EXECUTABLE_COUNTERS = 1u << 2,
 };
-typedef uint32_t iree_hal_device_profiling_mode_t;
 
 // Controls profiling options.
 typedef struct iree_hal_device_profiling_options_t {
@@ -113,6 +113,7 @@
 } iree_hal_device_profiling_options_t;
 
 // A bitfield indicating compatible semaphore behavior for a device.
+typedef uint64_t iree_hal_semaphore_compatibility_t;
 enum iree_hal_semaphore_compatibility_bits_t {
   // Indicates (in the absence of other bits) the semaphore is not compatible
   // with the device at all. Any attempts to use the semaphore for any usage
@@ -152,7 +153,18 @@
       IREE_HAL_SEMAPHORE_COMPATIBILITY_HOST_SIGNAL |
       IREE_HAL_SEMAPHORE_COMPATIBILITY_DEVICE_SIGNAL,
 };
-typedef uint32_t iree_hal_semaphore_compatibility_t;
+
+// Bitfield specifying flags controlling a file read operation.
+typedef uint64_t iree_hal_read_flags_t;
+enum iree_hal_read_flag_bits_t {
+  IREE_HAL_READ_FLAG_NONE = 0,
+};
+
+// Bitfield specifying flags controlling a file write operation.
+typedef uint64_t iree_hal_write_flags_t;
+enum iree_hal_write_flag_bits_t {
+  IREE_HAL_WRITE_FLAG_NONE = 0,
+};
 
 // Defines how a multi-wait operation treats the results of multiple semaphores.
 typedef enum iree_hal_wait_mode_e {
@@ -300,11 +312,12 @@
     iree_hal_buffer_t* buffer);
 
 // Enqueues a single queue-ordered fill operation.
+// The |target_buffer| must be visible to the device queue performing the fill.
 //
 // WARNING: individual fills have a high overhead and batching should be
 // performed by the caller instead of calling this multiple times. The
 // iree_hal_create_transfer_command_buffer utility makes it easy to create
-// batches of transfer operations (fill, copy, update) and is only a few lines
+// batches of transfer operations (fill, update, copy) and is only a few lines
 // more code.
 IREE_API_EXPORT iree_status_t iree_hal_device_queue_fill(
     iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
@@ -312,14 +325,38 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
     iree_device_size_t length, const void* pattern,
-    iree_host_size_t pattern_length);
+    iree_host_size_t pattern_length, iree_hal_fill_flags_t flags);
 
-// Enqueues a single queue-ordered copy operation.
+// Enqueues a single queue-ordered buffer update operation.
+// The provided |source_buffer| will be captured and need not remain live or
+// unchanged while the operation is queued. The |target_buffer| must be visible
+// to the device queue performing the update.
+//
+// Some implementations may have limits on the size of the update or may perform
+// poorly if the size is larger than an implementation-defined limit. Updates
+// should be kept as small and infrequent as possible.
 //
 // WARNING: individual copies have a high overhead and batching should be
 // performed by the caller instead of calling this multiple times. The
 // iree_hal_create_transfer_command_buffer utility makes it easy to create
-// batches of transfer operations (fill, copy, update) and is only a few lines
+// batches of transfer operations (fill, update, copy) and is only a few lines
+// more code.
+IREE_API_EXPORT iree_status_t iree_hal_device_queue_update(
+    iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
+    const iree_hal_semaphore_list_t wait_semaphore_list,
+    const iree_hal_semaphore_list_t signal_semaphore_list,
+    const void* source_buffer, iree_host_size_t source_offset,
+    iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+    iree_device_size_t length, iree_hal_update_flags_t flags);
+
+// Enqueues a single queue-ordered copy operation.
+// The |source_buffer| and |target_buffer| must both be visible to the device
+// queue performing the copy.
+//
+// WARNING: individual copies have a high overhead and batching should be
+// performed by the caller instead of calling this multiple times. The
+// iree_hal_create_transfer_command_buffer utility makes it easy to create
+// batches of transfer operations (fill, update, copy) and is only a few lines
 // more code.
 IREE_API_EXPORT iree_status_t iree_hal_device_queue_copy(
     iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
@@ -327,7 +364,7 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
     iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
-    iree_device_size_t length);
+    iree_device_size_t length, iree_hal_copy_flags_t flags);
 
 // Enqueues a file read operation that streams a segment of the |source_file|
 // defined by the |source_offset| and |length| into the HAL |target_buffer| at
@@ -340,7 +377,7 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_file_t* source_file, uint64_t source_offset,
     iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
-    iree_device_size_t length, uint32_t flags);
+    iree_device_size_t length, iree_hal_read_flags_t flags);
 
 // Enqueues a file write operation that streams a segment of the HAL
 // |source_buffer| defined by the |source_offset| and |length| into the
@@ -353,14 +390,14 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
     iree_hal_file_t* target_file, uint64_t target_offset,
-    iree_device_size_t length, uint32_t flags);
+    iree_device_size_t length, iree_hal_write_flags_t flags);
 
-// Executes zero or more command buffers on a device queue.
-// The command buffers are executed in order as if they were recorded as one.
+// Executes a command buffer on a device queue.
 // No commands will execute until the wait fence has been reached and the signal
-// fence will be signaled when all commands have completed.
+// fence will be signaled when all commands have completed. If a command buffer
+// is omitted this will act as a barrier.
 //
-// The queue is selected based on the command buffers submitted and the
+// The queue is selected based on the command buffer submitted and the
 // |queue_affinity|. As the number of available queues can vary the
 // |queue_affinity| is used to hash into the available queues for the required
 // categories. For example if 2 queues support transfer commands and the
@@ -369,10 +406,10 @@
 // placed on to the same queue. Note that the exact hashing function is
 // implementation dependent.
 //
-// A list of binding tables matching the list of command buffers must be
-// provided if any command buffer has indirect bindings and may otherwise be
-// NULL. The binding table contents will be captured during the call and need
-// not persist after the call returns.
+// A optional binding table must be provided if the command buffer has indirect
+// bindings and may otherwise be `iree_hal_buffer_binding_table_empty()`. The
+// binding table contents will be captured during the call and need not persist
+// after the call returns.
 //
 // The submission behavior matches Vulkan's vkQueueSubmit, with each submission
 // executing its command buffers in the order they are defined but allowing the
@@ -382,9 +419,8 @@
     iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
     const iree_hal_semaphore_list_t wait_semaphore_list,
     const iree_hal_semaphore_list_t signal_semaphore_list,
-    iree_host_size_t command_buffer_count,
-    iree_hal_command_buffer_t* const* command_buffers,
-    iree_hal_buffer_binding_table_t const* binding_tables);
+    iree_hal_command_buffer_t* command_buffer,
+    iree_hal_buffer_binding_table_t binding_table);
 
 // Enqueues a barrier waiting for |wait_semaphore_list| and signaling
 // |signal_semaphore_list| when reached.
@@ -559,13 +595,37 @@
       const iree_hal_semaphore_list_t signal_semaphore_list,
       iree_hal_buffer_t* buffer);
 
+  iree_status_t(IREE_API_PTR* queue_fill)(
+      iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
+      const iree_hal_semaphore_list_t wait_semaphore_list,
+      const iree_hal_semaphore_list_t signal_semaphore_list,
+      iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+      iree_device_size_t length, const void* pattern,
+      iree_host_size_t pattern_length, iree_hal_fill_flags_t flags);
+
+  iree_status_t(IREE_API_PTR* queue_update)(
+      iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
+      const iree_hal_semaphore_list_t wait_semaphore_list,
+      const iree_hal_semaphore_list_t signal_semaphore_list,
+      const void* source_buffer, iree_host_size_t source_offset,
+      iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+      iree_device_size_t length, iree_hal_update_flags_t flags);
+
+  iree_status_t(IREE_API_PTR* queue_copy)(
+      iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
+      const iree_hal_semaphore_list_t wait_semaphore_list,
+      const iree_hal_semaphore_list_t signal_semaphore_list,
+      iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
+      iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+      iree_device_size_t length, iree_hal_copy_flags_t flags);
+
   iree_status_t(IREE_API_PTR* queue_read)(
       iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
       const iree_hal_semaphore_list_t wait_semaphore_list,
       const iree_hal_semaphore_list_t signal_semaphore_list,
       iree_hal_file_t* source_file, uint64_t source_offset,
       iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
-      iree_device_size_t length, uint32_t flags);
+      iree_device_size_t length, iree_hal_read_flags_t flags);
 
   iree_status_t(IREE_API_PTR* queue_write)(
       iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
@@ -573,15 +633,14 @@
       const iree_hal_semaphore_list_t signal_semaphore_list,
       iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
       iree_hal_file_t* target_file, uint64_t target_offset,
-      iree_device_size_t length, uint32_t flags);
+      iree_device_size_t length, iree_hal_write_flags_t flags);
 
   iree_status_t(IREE_API_PTR* queue_execute)(
       iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
       const iree_hal_semaphore_list_t wait_semaphore_list,
       const iree_hal_semaphore_list_t signal_semaphore_list,
-      iree_host_size_t command_buffer_count,
-      iree_hal_command_buffer_t* const* command_buffers,
-      iree_hal_buffer_binding_table_t const* binding_tables);
+      iree_hal_command_buffer_t* command_buffer,
+      iree_hal_buffer_binding_table_t binding_table);
 
   iree_status_t(IREE_API_PTR* queue_flush)(
       iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity);
@@ -600,6 +659,30 @@
 
 IREE_API_EXPORT void iree_hal_device_destroy(iree_hal_device_t* device);
 
+IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_fill(
+    iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
+    const iree_hal_semaphore_list_t wait_semaphore_list,
+    const iree_hal_semaphore_list_t signal_semaphore_list,
+    iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+    iree_device_size_t length, const void* pattern,
+    iree_host_size_t pattern_length, iree_hal_fill_flags_t flags);
+
+IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_update(
+    iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
+    const iree_hal_semaphore_list_t wait_semaphore_list,
+    const iree_hal_semaphore_list_t signal_semaphore_list,
+    const void* source_buffer, iree_host_size_t source_offset,
+    iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+    iree_device_size_t length, iree_hal_update_flags_t flags);
+
+IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_copy(
+    iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
+    const iree_hal_semaphore_list_t wait_semaphore_list,
+    const iree_hal_semaphore_list_t signal_semaphore_list,
+    iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
+    iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+    iree_device_size_t length, iree_hal_copy_flags_t flags);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus
diff --git a/runtime/src/iree/hal/drivers/cuda/cuda_device.c b/runtime/src/iree/hal/drivers/cuda/cuda_device.c
index a12043d..2ed014b 100644
--- a/runtime/src/iree/hal/drivers/cuda/cuda_device.c
+++ b/runtime/src/iree/hal/drivers/cuda/cuda_device.c
@@ -1008,7 +1008,7 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_file_t* source_file, uint64_t source_offset,
     iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
-    iree_device_size_t length, uint32_t flags) {
+    iree_device_size_t length, iree_hal_read_flags_t flags) {
   // TODO: expose streaming chunk count/size options.
   iree_status_t loop_status = iree_ok_status();
   iree_hal_file_transfer_options_t options = {
@@ -1029,7 +1029,7 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
     iree_hal_file_t* target_file, uint64_t target_offset,
-    iree_device_size_t length, uint32_t flags) {
+    iree_device_size_t length, iree_hal_write_flags_t flags) {
   // TODO: expose streaming chunk count/size options.
   iree_status_t loop_status = iree_ok_status();
   iree_hal_file_transfer_options_t options = {
@@ -1053,16 +1053,16 @@
     iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity,
     const iree_hal_semaphore_list_t wait_semaphore_list,
     const iree_hal_semaphore_list_t signal_semaphore_list,
-    iree_host_size_t command_buffer_count,
-    iree_hal_command_buffer_t* const* command_buffers,
-    iree_hal_buffer_binding_table_t const* binding_tables) {
+    iree_hal_command_buffer_t* command_buffer,
+    iree_hal_buffer_binding_table_t binding_table) {
   iree_hal_cuda_device_t* device = iree_hal_cuda_device_cast(base_device);
   IREE_TRACE_ZONE_BEGIN(z0);
 
   iree_status_t status = iree_hal_deferred_work_queue_enqueue(
       device->work_queue, iree_hal_cuda_device_collect_tracing_context,
       device->tracing_context, wait_semaphore_list, signal_semaphore_list,
-      command_buffer_count, command_buffers, binding_tables);
+      command_buffer ? 1 : 0, command_buffer ? &command_buffer : NULL,
+      &binding_table);
   if (iree_status_is_ok(status)) {
     // Try to advance the deferred work queue.
     status = iree_hal_deferred_work_queue_issue(device->work_queue);
@@ -1129,6 +1129,9 @@
         iree_hal_cuda_device_query_semaphore_compatibility,
     .queue_alloca = iree_hal_cuda_device_queue_alloca,
     .queue_dealloca = iree_hal_cuda_device_queue_dealloca,
+    .queue_fill = iree_hal_device_queue_emulated_fill,
+    .queue_update = iree_hal_device_queue_emulated_update,
+    .queue_copy = iree_hal_device_queue_emulated_copy,
     .queue_read = iree_hal_cuda_device_queue_read,
     .queue_write = iree_hal_cuda_device_queue_write,
     .queue_execute = iree_hal_cuda_device_queue_execute,
diff --git a/runtime/src/iree/hal/drivers/cuda/graph_command_buffer.c b/runtime/src/iree/hal/drivers/cuda/graph_command_buffer.c
index 3449f82..0240a70 100644
--- a/runtime/src/iree/hal/drivers/cuda/graph_command_buffer.c
+++ b/runtime/src/iree/hal/drivers/cuda/graph_command_buffer.c
@@ -477,9 +477,10 @@
   return iree_make_status(IREE_STATUS_UNIMPLEMENTED, "event not yet supported");
 }
 
-static iree_status_t iree_hal_cuda_graph_command_buffer_discard_buffer(
+static iree_status_t iree_hal_cuda_graph_command_buffer_advise_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
-    iree_hal_buffer_ref_t buffer_ref) {
+    iree_hal_buffer_ref_t buffer_ref, iree_hal_memory_advise_flags_t flags,
+    uint64_t arg0, uint64_t arg1) {
   // We could mark the memory as invalidated so that if this is a managed buffer
   // CUDA does not try to copy it back to the host.
   return iree_ok_status();
@@ -510,7 +511,7 @@
 static iree_status_t iree_hal_cuda_graph_command_buffer_fill_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
     iree_hal_buffer_ref_t target_ref, const void* pattern,
-    iree_host_size_t pattern_length) {
+    iree_host_size_t pattern_length, iree_hal_fill_flags_t flags) {
   iree_hal_cuda_graph_command_buffer_t* command_buffer =
       iree_hal_cuda_graph_command_buffer_cast(base_command_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
@@ -562,7 +563,8 @@
 
 static iree_status_t iree_hal_cuda_graph_command_buffer_update_buffer(
     iree_hal_command_buffer_t* base_command_buffer, const void* source_buffer,
-    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref) {
+    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref,
+    iree_hal_update_flags_t flags) {
   iree_hal_cuda_graph_command_buffer_t* command_buffer =
       iree_hal_cuda_graph_command_buffer_cast(base_command_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
@@ -626,7 +628,8 @@
 
 static iree_status_t iree_hal_cuda_graph_command_buffer_copy_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
-    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref) {
+    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref,
+    iree_hal_copy_flags_t flags) {
   iree_hal_cuda_graph_command_buffer_t* command_buffer =
       iree_hal_cuda_graph_command_buffer_cast(base_command_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
@@ -847,7 +850,7 @@
         .signal_event = iree_hal_cuda_graph_command_buffer_signal_event,
         .reset_event = iree_hal_cuda_graph_command_buffer_reset_event,
         .wait_events = iree_hal_cuda_graph_command_buffer_wait_events,
-        .discard_buffer = iree_hal_cuda_graph_command_buffer_discard_buffer,
+        .advise_buffer = iree_hal_cuda_graph_command_buffer_advise_buffer,
         .fill_buffer = iree_hal_cuda_graph_command_buffer_fill_buffer,
         .update_buffer = iree_hal_cuda_graph_command_buffer_update_buffer,
         .copy_buffer = iree_hal_cuda_graph_command_buffer_copy_buffer,
diff --git a/runtime/src/iree/hal/drivers/cuda/stream_command_buffer.c b/runtime/src/iree/hal/drivers/cuda/stream_command_buffer.c
index bc02895..5d6a616 100644
--- a/runtime/src/iree/hal/drivers/cuda/stream_command_buffer.c
+++ b/runtime/src/iree/hal/drivers/cuda/stream_command_buffer.c
@@ -308,9 +308,10 @@
   return iree_make_status(IREE_STATUS_UNIMPLEMENTED, "event not yet supported");
 }
 
-static iree_status_t iree_hal_cuda_stream_command_buffer_discard_buffer(
+static iree_status_t iree_hal_cuda_stream_command_buffer_advise_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
-    iree_hal_buffer_ref_t buffer_ref) {
+    iree_hal_buffer_ref_t buffer_ref, iree_hal_memory_advise_flags_t flags,
+    uint64_t arg0, uint64_t arg1) {
   // We could mark the memory as invalidated so that if managed CUDA does not
   // try to copy it back to the host.
   return iree_ok_status();
@@ -319,7 +320,7 @@
 static iree_status_t iree_hal_cuda_stream_command_buffer_fill_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
     iree_hal_buffer_ref_t target_ref, const void* pattern,
-    iree_host_size_t pattern_length) {
+    iree_host_size_t pattern_length, iree_hal_fill_flags_t flags) {
   iree_hal_cuda_stream_command_buffer_t* command_buffer =
       iree_hal_cuda_stream_command_buffer_cast(base_command_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
@@ -371,7 +372,8 @@
 
 static iree_status_t iree_hal_cuda_stream_command_buffer_update_buffer(
     iree_hal_command_buffer_t* base_command_buffer, const void* source_buffer,
-    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref) {
+    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref,
+    iree_hal_update_flags_t flags) {
   iree_hal_cuda_stream_command_buffer_t* command_buffer =
       iree_hal_cuda_stream_command_buffer_cast(base_command_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
@@ -413,7 +415,8 @@
 
 static iree_status_t iree_hal_cuda_stream_command_buffer_copy_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
-    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref) {
+    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref,
+    iree_hal_copy_flags_t flags) {
   iree_hal_cuda_stream_command_buffer_t* command_buffer =
       iree_hal_cuda_stream_command_buffer_cast(base_command_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
@@ -599,7 +602,7 @@
         .signal_event = iree_hal_cuda_stream_command_buffer_signal_event,
         .reset_event = iree_hal_cuda_stream_command_buffer_reset_event,
         .wait_events = iree_hal_cuda_stream_command_buffer_wait_events,
-        .discard_buffer = iree_hal_cuda_stream_command_buffer_discard_buffer,
+        .advise_buffer = iree_hal_cuda_stream_command_buffer_advise_buffer,
         .fill_buffer = iree_hal_cuda_stream_command_buffer_fill_buffer,
         .update_buffer = iree_hal_cuda_stream_command_buffer_update_buffer,
         .copy_buffer = iree_hal_cuda_stream_command_buffer_copy_buffer,
diff --git a/runtime/src/iree/hal/drivers/hip/graph_command_buffer.c b/runtime/src/iree/hal/drivers/hip/graph_command_buffer.c
index d1ef975..1b6baa3 100644
--- a/runtime/src/iree/hal/drivers/hip/graph_command_buffer.c
+++ b/runtime/src/iree/hal/drivers/hip/graph_command_buffer.c
@@ -486,9 +486,10 @@
   return iree_make_status(IREE_STATUS_UNIMPLEMENTED, "event not yet supported");
 }
 
-static iree_status_t iree_hal_hip_graph_command_buffer_discard_buffer(
+static iree_status_t iree_hal_hip_graph_command_buffer_advise_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
-    iree_hal_buffer_ref_t buffer_ref) {
+    iree_hal_buffer_ref_t buffer_ref, iree_hal_memory_advise_flags_t flags,
+    uint64_t arg0, uint64_t arg1) {
   // We could mark the memory as invalidated so that if this is a managed buffer
   // HIP does not try to copy it back to the host.
   return iree_ok_status();
@@ -519,7 +520,7 @@
 static iree_status_t iree_hal_hip_graph_command_buffer_fill_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
     iree_hal_buffer_ref_t target_ref, const void* pattern,
-    iree_host_size_t pattern_length) {
+    iree_host_size_t pattern_length, iree_hal_fill_flags_t flags) {
   iree_hal_hip_graph_command_buffer_t* command_buffer =
       iree_hal_hip_graph_command_buffer_cast(base_command_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
@@ -570,7 +571,8 @@
 
 static iree_status_t iree_hal_hip_graph_command_buffer_update_buffer(
     iree_hal_command_buffer_t* base_command_buffer, const void* source_buffer,
-    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref) {
+    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref,
+    iree_hal_update_flags_t flags) {
   iree_hal_hip_graph_command_buffer_t* command_buffer =
       iree_hal_hip_graph_command_buffer_cast(base_command_buffer);
   if (command_buffer->symbols->hipDrvGraphAddMemcpyNode == NULL) {
@@ -640,7 +642,8 @@
 
 static iree_status_t iree_hal_hip_graph_command_buffer_copy_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
-    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref) {
+    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref,
+    iree_hal_copy_flags_t flags) {
   iree_hal_hip_graph_command_buffer_t* command_buffer =
       iree_hal_hip_graph_command_buffer_cast(base_command_buffer);
   if (command_buffer->symbols->hipDrvGraphAddMemcpyNode == NULL) {
@@ -856,7 +859,7 @@
         .signal_event = iree_hal_hip_graph_command_buffer_signal_event,
         .reset_event = iree_hal_hip_graph_command_buffer_reset_event,
         .wait_events = iree_hal_hip_graph_command_buffer_wait_events,
-        .discard_buffer = iree_hal_hip_graph_command_buffer_discard_buffer,
+        .advise_buffer = iree_hal_hip_graph_command_buffer_advise_buffer,
         .fill_buffer = iree_hal_hip_graph_command_buffer_fill_buffer,
         .update_buffer = iree_hal_hip_graph_command_buffer_update_buffer,
         .copy_buffer = iree_hal_hip_graph_command_buffer_copy_buffer,
diff --git a/runtime/src/iree/hal/drivers/hip/hip_device.c b/runtime/src/iree/hal/drivers/hip/hip_device.c
index d0e3c55..7f42e8d 100644
--- a/runtime/src/iree/hal/drivers/hip/hip_device.c
+++ b/runtime/src/iree/hal/drivers/hip/hip_device.c
@@ -1006,7 +1006,7 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_file_t* source_file, uint64_t source_offset,
     iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
-    iree_device_size_t length, uint32_t flags) {
+    iree_device_size_t length, iree_hal_read_flags_t flags) {
   // TODO: expose streaming chunk count/size options.
   iree_status_t loop_status = iree_ok_status();
   iree_hal_file_transfer_options_t options = {
@@ -1027,7 +1027,7 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
     iree_hal_file_t* target_file, uint64_t target_offset,
-    iree_device_size_t length, uint32_t flags) {
+    iree_device_size_t length, iree_hal_write_flags_t flags) {
   // TODO: expose streaming chunk count/size options.
   iree_status_t loop_status = iree_ok_status();
   iree_hal_file_transfer_options_t options = {
@@ -1051,16 +1051,16 @@
     iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity,
     const iree_hal_semaphore_list_t wait_semaphore_list,
     const iree_hal_semaphore_list_t signal_semaphore_list,
-    iree_host_size_t command_buffer_count,
-    iree_hal_command_buffer_t* const* command_buffers,
-    iree_hal_buffer_binding_table_t const* binding_tables) {
+    iree_hal_command_buffer_t* command_buffer,
+    iree_hal_buffer_binding_table_t binding_table) {
   iree_hal_hip_device_t* device = iree_hal_hip_device_cast(base_device);
   IREE_TRACE_ZONE_BEGIN(z0);
 
   iree_status_t status = iree_hal_deferred_work_queue_enqueue(
       device->work_queue, iree_hal_hip_device_collect_tracing_context,
       device->tracing_context, wait_semaphore_list, signal_semaphore_list,
-      command_buffer_count, command_buffers, binding_tables);
+      command_buffer ? 1 : 0, command_buffer ? &command_buffer : NULL,
+      &binding_table);
   if (iree_status_is_ok(status)) {
     // Try to advance the deferred work queue.
     status = iree_hal_deferred_work_queue_issue(device->work_queue);
@@ -1126,6 +1126,9 @@
         iree_hal_hip_device_query_semaphore_compatibility,
     .queue_alloca = iree_hal_hip_device_queue_alloca,
     .queue_dealloca = iree_hal_hip_device_queue_dealloca,
+    .queue_fill = iree_hal_device_queue_emulated_fill,
+    .queue_update = iree_hal_device_queue_emulated_update,
+    .queue_copy = iree_hal_device_queue_emulated_copy,
     .queue_read = iree_hal_hip_device_queue_read,
     .queue_write = iree_hal_hip_device_queue_write,
     .queue_execute = iree_hal_hip_device_queue_execute,
diff --git a/runtime/src/iree/hal/drivers/hip/stream_command_buffer.c b/runtime/src/iree/hal/drivers/hip/stream_command_buffer.c
index bb94053..ca5e700 100644
--- a/runtime/src/iree/hal/drivers/hip/stream_command_buffer.c
+++ b/runtime/src/iree/hal/drivers/hip/stream_command_buffer.c
@@ -299,9 +299,10 @@
   return iree_make_status(IREE_STATUS_UNIMPLEMENTED, "event not yet supported");
 }
 
-static iree_status_t iree_hal_hip_stream_command_buffer_discard_buffer(
+static iree_status_t iree_hal_hip_stream_command_buffer_advise_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
-    iree_hal_buffer_ref_t buffer_ref) {
+    iree_hal_buffer_ref_t buffer_ref, iree_hal_memory_advise_flags_t flags,
+    uint64_t arg0, uint64_t arg1) {
   // We could mark the memory as invalidated so that if managed HIP does not
   // try to copy it back to the host.
   return iree_ok_status();
@@ -310,7 +311,7 @@
 static iree_status_t iree_hal_hip_stream_command_buffer_fill_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
     iree_hal_buffer_ref_t target_ref, const void* pattern,
-    iree_host_size_t pattern_length) {
+    iree_host_size_t pattern_length, iree_hal_fill_flags_t flags) {
   iree_hal_hip_stream_command_buffer_t* command_buffer =
       iree_hal_hip_stream_command_buffer_cast(base_command_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
@@ -362,7 +363,8 @@
 
 static iree_status_t iree_hal_hip_stream_command_buffer_update_buffer(
     iree_hal_command_buffer_t* base_command_buffer, const void* source_buffer,
-    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref) {
+    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref,
+    iree_hal_update_flags_t flags) {
   iree_hal_hip_stream_command_buffer_t* command_buffer =
       iree_hal_hip_stream_command_buffer_cast(base_command_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
@@ -404,7 +406,8 @@
 
 static iree_status_t iree_hal_hip_stream_command_buffer_copy_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
-    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref) {
+    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref,
+    iree_hal_copy_flags_t flags) {
   iree_hal_hip_stream_command_buffer_t* command_buffer =
       iree_hal_hip_stream_command_buffer_cast(base_command_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
@@ -588,7 +591,7 @@
         .signal_event = iree_hal_hip_stream_command_buffer_signal_event,
         .reset_event = iree_hal_hip_stream_command_buffer_reset_event,
         .wait_events = iree_hal_hip_stream_command_buffer_wait_events,
-        .discard_buffer = iree_hal_hip_stream_command_buffer_discard_buffer,
+        .advise_buffer = iree_hal_hip_stream_command_buffer_advise_buffer,
         .fill_buffer = iree_hal_hip_stream_command_buffer_fill_buffer,
         .update_buffer = iree_hal_hip_stream_command_buffer_update_buffer,
         .copy_buffer = iree_hal_hip_stream_command_buffer_copy_buffer,
diff --git a/runtime/src/iree/hal/drivers/local_sync/sync_device.c b/runtime/src/iree/hal/drivers/local_sync/sync_device.c
index 8445241..7283e58 100644
--- a/runtime/src/iree/hal/drivers/local_sync/sync_device.c
+++ b/runtime/src/iree/hal/drivers/local_sync/sync_device.c
@@ -327,7 +327,7 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_file_t* source_file, uint64_t source_offset,
     iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
-    iree_device_size_t length, uint32_t flags) {
+    iree_device_size_t length, iree_hal_read_flags_t flags) {
   // TODO: expose streaming chunk count/size options.
   iree_status_t loop_status = iree_ok_status();
   iree_hal_file_transfer_options_t options = {
@@ -348,7 +348,7 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
     iree_hal_file_t* target_file, uint64_t target_offset,
-    iree_device_size_t length, uint32_t flags) {
+    iree_device_size_t length, iree_hal_write_flags_t flags) {
   // TODO: expose streaming chunk count/size options.
   iree_status_t loop_status = iree_ok_status();
   iree_hal_file_transfer_options_t options = {
@@ -363,86 +363,63 @@
   return loop_status;
 }
 
-static iree_status_t iree_hal_sync_device_apply_deferred_command_buffers(
-    iree_hal_sync_device_t* device, iree_host_size_t command_buffer_count,
-    iree_hal_command_buffer_t* const* command_buffers,
-    iree_hal_buffer_binding_table_t const* binding_tables) {
-  // See if there are any deferred command buffers; this saves us work in cases
-  // of pure inline execution.
-  bool any_deferred = false;
-  for (iree_host_size_t i = 0; i < command_buffer_count && !any_deferred; ++i) {
-    any_deferred = iree_hal_deferred_command_buffer_isa(command_buffers[i]);
+static iree_status_t iree_hal_sync_device_apply_deferred_command_buffer(
+    iree_hal_sync_device_t* device, iree_hal_command_buffer_t* command_buffer,
+    iree_hal_buffer_binding_table_t binding_table) {
+  // If there were no deferred command buffers no-op this call - they've already
+  // been issued.
+  if (!command_buffer ||
+      !iree_hal_deferred_command_buffer_isa(command_buffer)) {
+    return iree_ok_status();
   }
-  if (!any_deferred) return iree_ok_status();
 
   // Stack allocate storage for an inline command buffer we'll use to replay
   // the deferred command buffers. We want to reset it between each apply so
   // that we don't get state carrying across.
-  iree_host_size_t max_storage_size = 0;
-  for (iree_host_size_t i = 0; i < command_buffer_count; ++i) {
-    iree_hal_command_buffer_t* command_buffer = command_buffers[i];
-    iree_hal_buffer_binding_table_t binding_table =
-        binding_tables ? binding_tables[i]
-                       : iree_hal_buffer_binding_table_empty();
-    max_storage_size = iree_max(
-        max_storage_size,
-        iree_hal_inline_command_buffer_size(
-            iree_hal_command_buffer_mode(command_buffer) |
-                IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT |
-                IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION |
-                // NOTE: we need to validate if a binding table is provided as
-                // the bindings were not known when it was originally recorded.
-                (iree_hal_buffer_binding_table_is_empty(binding_table)
-                     ? IREE_HAL_COMMAND_BUFFER_MODE_UNVALIDATED
-                     : 0),
-            /*binding_capacity=*/0));
-  }
+  iree_host_size_t storage_size = iree_hal_inline_command_buffer_size(
+      iree_hal_command_buffer_mode(command_buffer) |
+          IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT |
+          IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION |
+          // NOTE: we need to validate if a binding table is provided as
+          // the bindings were not known when it was originally recorded.
+          (iree_hal_buffer_binding_table_is_empty(binding_table)
+               ? IREE_HAL_COMMAND_BUFFER_MODE_UNVALIDATED
+               : 0),
+      /*binding_capacity=*/0);
   iree_byte_span_t storage =
-      iree_make_byte_span(iree_alloca(max_storage_size), max_storage_size);
+      iree_make_byte_span(iree_alloca(storage_size), storage_size);
 
-  // NOTE: we ignore any inline command buffers that may be passed in as they've
-  // already executed during recording. The caller is probably in for a bad time
-  // if they mixed the two modes together!
-  for (iree_host_size_t i = 0; i < command_buffer_count; ++i) {
-    iree_hal_command_buffer_t* command_buffer = command_buffers[i];
-    iree_hal_buffer_binding_table_t binding_table =
-        binding_tables ? binding_tables[i]
-                       : iree_hal_buffer_binding_table_empty();
-    if (iree_hal_deferred_command_buffer_isa(command_buffer)) {
-      // NOTE: we run unvalidated as inline command buffers don't support
-      // binding tables and can be validated entirely while recording.
-      iree_hal_command_buffer_t* inline_command_buffer = NULL;
-      IREE_RETURN_IF_ERROR(iree_hal_inline_command_buffer_initialize(
-          device->device_allocator,
-          iree_hal_command_buffer_mode(command_buffer) |
-              IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT |
-              IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION |
-              // NOTE: we need to validate if a binding table is provided as the
-              // bindings were not known when it was originally recorded.
-              (iree_hal_buffer_binding_table_is_empty(binding_table)
-                   ? IREE_HAL_COMMAND_BUFFER_MODE_UNVALIDATED
-                   : 0),
-          iree_hal_command_buffer_allowed_categories(command_buffer),
-          IREE_HAL_QUEUE_AFFINITY_ANY,
-          /*binding_capacity=*/0, device->host_allocator, storage,
-          &inline_command_buffer));
-      iree_status_t status = iree_hal_deferred_command_buffer_apply(
-          command_buffer, inline_command_buffer, binding_table);
-      iree_hal_inline_command_buffer_deinitialize(inline_command_buffer);
-      IREE_RETURN_IF_ERROR(status);
-    }
-  }
+  // NOTE: we run unvalidated as inline command buffers don't support
+  // binding tables and can be validated entirely while recording.
+  iree_hal_command_buffer_t* inline_command_buffer = NULL;
+  IREE_RETURN_IF_ERROR(iree_hal_inline_command_buffer_initialize(
+      device->device_allocator,
+      iree_hal_command_buffer_mode(command_buffer) |
+          IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT |
+          IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION |
+          // NOTE: we need to validate if a binding table is provided as the
+          // bindings were not known when it was originally recorded.
+          (iree_hal_buffer_binding_table_is_empty(binding_table)
+               ? IREE_HAL_COMMAND_BUFFER_MODE_UNVALIDATED
+               : 0),
+      iree_hal_command_buffer_allowed_categories(command_buffer),
+      IREE_HAL_QUEUE_AFFINITY_ANY,
+      /*binding_capacity=*/0, device->host_allocator, storage,
+      &inline_command_buffer));
 
-  return iree_ok_status();
+  iree_status_t status = iree_hal_deferred_command_buffer_apply(
+      command_buffer, inline_command_buffer, binding_table);
+
+  iree_hal_inline_command_buffer_deinitialize(inline_command_buffer);
+  return status;
 }
 
 static iree_status_t iree_hal_sync_device_queue_execute(
     iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity,
     const iree_hal_semaphore_list_t wait_semaphore_list,
     const iree_hal_semaphore_list_t signal_semaphore_list,
-    iree_host_size_t command_buffer_count,
-    iree_hal_command_buffer_t* const* command_buffers,
-    iree_hal_buffer_binding_table_t const* binding_tables) {
+    iree_hal_command_buffer_t* command_buffer,
+    iree_hal_buffer_binding_table_t binding_table) {
   iree_hal_sync_device_t* device = iree_hal_sync_device_cast(base_device);
 
   // TODO(#4680): there is some better error handling here needed; we should
@@ -457,8 +434,8 @@
 
   // Run all deferred command buffers - any we could have run inline we already
   // did during recording.
-  IREE_RETURN_IF_ERROR(iree_hal_sync_device_apply_deferred_command_buffers(
-      device, command_buffer_count, command_buffers, binding_tables));
+  IREE_RETURN_IF_ERROR(iree_hal_sync_device_apply_deferred_command_buffer(
+      device, command_buffer, binding_table));
 
   // Signal all semaphores now that batch work has completed.
   IREE_RETURN_IF_ERROR(iree_hal_sync_semaphore_multi_signal(
@@ -526,6 +503,9 @@
         iree_hal_sync_device_query_semaphore_compatibility,
     .queue_alloca = iree_hal_sync_device_queue_alloca,
     .queue_dealloca = iree_hal_sync_device_queue_dealloca,
+    .queue_fill = iree_hal_device_queue_emulated_fill,
+    .queue_update = iree_hal_device_queue_emulated_update,
+    .queue_copy = iree_hal_device_queue_emulated_copy,
     .queue_read = iree_hal_sync_device_queue_read,
     .queue_write = iree_hal_sync_device_queue_write,
     .queue_execute = iree_hal_sync_device_queue_execute,
diff --git a/runtime/src/iree/hal/drivers/local_task/task_command_buffer.c b/runtime/src/iree/hal/drivers/local_task/task_command_buffer.c
index 0e60669..316ff05 100644
--- a/runtime/src/iree/hal/drivers/local_task/task_command_buffer.c
+++ b/runtime/src/iree/hal/drivers/local_task/task_command_buffer.c
@@ -455,12 +455,13 @@
 }
 
 //===----------------------------------------------------------------------===//
-// iree_hal_command_buffer_discard_buffer
+// iree_hal_command_buffer_advise_buffer
 //===----------------------------------------------------------------------===//
 
-static iree_status_t iree_hal_task_command_buffer_discard_buffer(
+static iree_status_t iree_hal_task_command_buffer_advise_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
-    iree_hal_buffer_ref_t buffer_ref) {
+    iree_hal_buffer_ref_t buffer_ref, iree_hal_memory_advise_flags_t flags,
+    uint64_t arg0, uint64_t arg1) {
   return iree_ok_status();
 }
 
@@ -509,7 +510,7 @@
 static iree_status_t iree_hal_task_command_buffer_fill_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
     iree_hal_buffer_ref_t target_ref, const void* pattern,
-    iree_host_size_t pattern_length) {
+    iree_host_size_t pattern_length, iree_hal_fill_flags_t flags) {
   iree_hal_task_command_buffer_t* command_buffer =
       iree_hal_task_command_buffer_cast(base_command_buffer);
 
@@ -567,7 +568,8 @@
 
 static iree_status_t iree_hal_task_command_buffer_update_buffer(
     iree_hal_command_buffer_t* base_command_buffer, const void* source_buffer,
-    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref) {
+    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref,
+    iree_hal_update_flags_t flags) {
   iree_hal_task_command_buffer_t* command_buffer =
       iree_hal_task_command_buffer_cast(base_command_buffer);
 
@@ -637,7 +639,8 @@
 
 static iree_status_t iree_hal_task_command_buffer_copy_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
-    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref) {
+    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref,
+    iree_hal_copy_flags_t flags) {
   iree_hal_task_command_buffer_t* command_buffer =
       iree_hal_task_command_buffer_cast(base_command_buffer);
 
@@ -946,7 +949,7 @@
         .signal_event = iree_hal_task_command_buffer_signal_event,
         .reset_event = iree_hal_task_command_buffer_reset_event,
         .wait_events = iree_hal_task_command_buffer_wait_events,
-        .discard_buffer = iree_hal_task_command_buffer_discard_buffer,
+        .advise_buffer = iree_hal_task_command_buffer_advise_buffer,
         .fill_buffer = iree_hal_task_command_buffer_fill_buffer,
         .update_buffer = iree_hal_task_command_buffer_update_buffer,
         .copy_buffer = iree_hal_task_command_buffer_copy_buffer,
diff --git a/runtime/src/iree/hal/drivers/local_task/task_device.c b/runtime/src/iree/hal/drivers/local_task/task_device.c
index df4bb93..8aa0925 100644
--- a/runtime/src/iree/hal/drivers/local_task/task_device.c
+++ b/runtime/src/iree/hal/drivers/local_task/task_device.c
@@ -415,7 +415,7 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_file_t* source_file, uint64_t source_offset,
     iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
-    iree_device_size_t length, uint32_t flags) {
+    iree_device_size_t length, iree_hal_read_flags_t flags) {
   // TODO: expose streaming chunk count/size options.
   iree_status_t loop_status = iree_ok_status();
   iree_hal_file_transfer_options_t options = {
@@ -436,7 +436,7 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
     iree_hal_file_t* target_file, uint64_t target_offset,
-    iree_device_size_t length, uint32_t flags) {
+    iree_device_size_t length, iree_hal_write_flags_t flags) {
   // TODO: expose streaming chunk count/size options.
   iree_status_t loop_status = iree_ok_status();
   iree_hal_file_transfer_options_t options = {
@@ -455,14 +455,13 @@
     iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity,
     const iree_hal_semaphore_list_t wait_semaphore_list,
     const iree_hal_semaphore_list_t signal_semaphore_list,
-    iree_host_size_t command_buffer_count,
-    iree_hal_command_buffer_t* const* command_buffers,
-    iree_hal_buffer_binding_table_t const* binding_tables) {
+    iree_hal_command_buffer_t* command_buffer,
+    iree_hal_buffer_binding_table_t binding_table) {
   iree_hal_task_device_t* device = iree_hal_task_device_cast(base_device);
   // NOTE: today we are not discriminating queues based on command type.
   iree_host_size_t queue_index = iree_hal_task_device_select_queue(
       device, IREE_HAL_COMMAND_CATEGORY_ANY, queue_affinity);
-  if (command_buffer_count == 0) {
+  if (command_buffer == NULL) {
     // Fast-path for barriers (fork/join/sequence).
     return iree_hal_task_queue_submit_barrier(&device->queues[queue_index],
                                               wait_semaphore_list,
@@ -471,9 +470,8 @@
   iree_hal_task_submission_batch_t batch = {
       .wait_semaphores = wait_semaphore_list,
       .signal_semaphores = signal_semaphore_list,
-      .command_buffer_count = command_buffer_count,
-      .command_buffers = command_buffers,
-      .binding_tables = binding_tables,
+      .command_buffer = command_buffer,
+      .binding_table = binding_table,
   };
   return iree_hal_task_queue_submit_commands(&device->queues[queue_index], 1,
                                              &batch);
@@ -540,6 +538,9 @@
         iree_hal_task_device_query_semaphore_compatibility,
     .queue_alloca = iree_hal_task_device_queue_alloca,
     .queue_dealloca = iree_hal_task_device_queue_dealloca,
+    .queue_fill = iree_hal_device_queue_emulated_fill,
+    .queue_update = iree_hal_device_queue_emulated_update,
+    .queue_copy = iree_hal_device_queue_emulated_copy,
     .queue_read = iree_hal_task_device_queue_read,
     .queue_write = iree_hal_task_device_queue_write,
     .queue_execute = iree_hal_task_device_queue_execute,
diff --git a/runtime/src/iree/hal/drivers/local_task/task_queue.c b/runtime/src/iree/hal/drivers/local_task/task_queue.c
index dde76b5..cdd4c22 100644
--- a/runtime/src/iree/hal/drivers/local_task/task_queue.c
+++ b/runtime/src/iree/hal/drivers/local_task/task_queue.c
@@ -193,10 +193,10 @@
   // the submission has completed (or failed).
   iree_hal_resource_set_t* resource_set;
 
-  // Command buffers to be issued in the order they appeared in the submission.
-  iree_host_size_t command_buffer_count;
-  iree_hal_command_buffer_t** command_buffers;
-  iree_hal_buffer_binding_table_t* binding_tables;
+  // Command buffer to be issued.
+  iree_hal_command_buffer_t* command_buffer;
+  // Optional binding table for the command buffer.
+  iree_hal_buffer_binding_table_t binding_table;
 } iree_hal_task_queue_issue_cmd_t;
 
 static iree_status_t iree_hal_task_queue_issue_cmd_deferred(
@@ -265,34 +265,28 @@
   iree_hal_task_queue_issue_cmd_t* cmd = (iree_hal_task_queue_issue_cmd_t*)task;
   IREE_TRACE_ZONE_BEGIN(z0);
 
-  iree_status_t status = iree_ok_status();
-
   // NOTE: it's ok for there to be no command buffers - in that case the
   // submission was purely for synchronization.
-  for (iree_host_size_t i = 0; i < cmd->command_buffer_count; ++i) {
-    iree_hal_command_buffer_t* command_buffer = cmd->command_buffers[i];
-    if (iree_hal_task_command_buffer_isa(command_buffer)) {
-      if (cmd->binding_tables && cmd->binding_tables[i].count > 0) {
+  iree_status_t status = iree_ok_status();
+  if (cmd->command_buffer != NULL) {
+    if (iree_hal_task_command_buffer_isa(cmd->command_buffer)) {
+      if (cmd->binding_table.count > 0) {
         status = iree_make_status(
             IREE_STATUS_UNIMPLEMENTED,
             "task command buffers do not support binding tables yet");
       } else {
         status = iree_hal_task_command_buffer_issue(
-            command_buffer, &cmd->queue->state,
+            cmd->command_buffer, &cmd->queue->state,
             cmd->task.header.completion_task, cmd->arena, pending_submission);
       }
-    } else if (iree_hal_deferred_command_buffer_isa(command_buffer)) {
-      iree_hal_buffer_binding_table_t binding_table =
-          cmd->binding_tables ? cmd->binding_tables[i]
-                              : iree_hal_buffer_binding_table_empty();
+    } else if (iree_hal_deferred_command_buffer_isa(cmd->command_buffer)) {
       status = iree_hal_task_queue_issue_cmd_deferred(
-          cmd, command_buffer, binding_table, pending_submission);
+          cmd, cmd->command_buffer, cmd->binding_table, pending_submission);
     } else {
       status = iree_make_status(
           IREE_STATUS_UNIMPLEMENTED,
           "unsupported command buffer type for task queue submission");
     }
-    if (IREE_UNLIKELY(!iree_status_is_ok(status))) break;
   }
 
   IREE_TRACE_ZONE_END(z0);
@@ -308,21 +302,9 @@
       (iree_hal_task_submission_batch_t*)user_data;
 
   iree_hal_task_queue_issue_cmd_t* cmd = NULL;
-  iree_host_size_t command_buffers_size =
-      batch->command_buffer_count * sizeof(*cmd->command_buffers);
-  iree_host_size_t binding_tables_size = 0;
-  iree_host_size_t binding_table_elements_size = 0;
-  if (batch->binding_tables) {
-    binding_tables_size =
-        batch->command_buffer_count * sizeof(*cmd->binding_tables);
-    for (iree_host_size_t i = 0; i < batch->command_buffer_count; ++i) {
-      binding_table_elements_size += batch->binding_tables[i].count *
-                                     sizeof(*batch->binding_tables[i].bindings);
-    }
-  }
-  iree_host_size_t total_cmd_size = sizeof(*cmd) + command_buffers_size +
-                                    binding_tables_size +
-                                    binding_table_elements_size;
+  iree_host_size_t binding_table_elements_size =
+      batch->binding_table.count * sizeof(*batch->binding_table.bindings);
+  iree_host_size_t total_cmd_size = sizeof(*cmd) + binding_table_elements_size;
   IREE_RETURN_IF_ERROR(
       iree_arena_allocate(arena, total_cmd_size, (void**)&cmd));
   iree_task_call_initialize(
@@ -333,42 +315,30 @@
   cmd->queue = queue;
   cmd->resource_set = resource_set;
 
-  cmd->command_buffer_count = batch->command_buffer_count;
-  cmd->command_buffers =
-      (iree_hal_command_buffer_t**)((uint8_t*)cmd + sizeof(*cmd));
-  memcpy(cmd->command_buffers, batch->command_buffers, command_buffers_size);
+  cmd->command_buffer = batch->command_buffer;
+  cmd->binding_table = iree_hal_buffer_binding_table_empty();
 
   // Binding tables are optional and we only need this extra work if there were
   // any non-empty binding tables provided during submission.
   iree_status_t status = iree_ok_status();
   if (binding_table_elements_size > 0) {
     // Copy over binding tables and all of their contents.
-    cmd->binding_tables =
-        (iree_hal_buffer_binding_table_t*)((uint8_t*)cmd->command_buffers +
-                                           command_buffers_size);
     iree_hal_buffer_binding_t* binding_element_ptr =
-        (iree_hal_buffer_binding_t*)((uint8_t*)cmd->binding_tables +
-                                     binding_tables_size);
-    for (iree_host_size_t i = 0; i < batch->command_buffer_count; ++i) {
-      iree_host_size_t element_count = batch->binding_tables[i].count;
-      cmd->binding_tables[i].count = element_count;
-      cmd->binding_tables[i].bindings = binding_element_ptr;
-      memcpy((void*)cmd->binding_tables[i].bindings,
-             batch->binding_tables[i].bindings,
-             element_count * sizeof(*binding_element_ptr));
-      binding_element_ptr += element_count;
+        (iree_hal_buffer_binding_t*)((uint8_t*)cmd + sizeof(*cmd));
+    const iree_host_size_t element_count = batch->binding_table.count;
+    cmd->binding_table.count = element_count;
+    cmd->binding_table.bindings = binding_element_ptr;
+    memcpy((void*)cmd->binding_table.bindings, batch->binding_table.bindings,
+           element_count * sizeof(*binding_element_ptr));
+    binding_element_ptr += element_count;
 
-      // Bulk insert all bindings into the resource set. This will keep the
-      // referenced buffers live until the issue has completed. Note that if we
-      // fail here we need to clean up the resource set below before returning.
-      status = iree_hal_resource_set_insert_strided(
-          cmd->resource_set, element_count, cmd->binding_tables[i].bindings,
-          offsetof(iree_hal_buffer_binding_t, buffer),
-          sizeof(iree_hal_buffer_binding_t));
-      if (!iree_status_is_ok(status)) break;
-    }
-  } else {
-    cmd->binding_tables = NULL;
+    // Bulk insert all bindings into the resource set. This will keep the
+    // referenced buffers live until the issue has completed. Note that if we
+    // fail here we need to clean up the resource set below before returning.
+    status = iree_hal_resource_set_insert_strided(
+        cmd->resource_set, element_count, cmd->binding_table.bindings,
+        offsetof(iree_hal_buffer_binding_t, buffer),
+        sizeof(iree_hal_buffer_binding_t));
   }
 
   if (iree_status_is_ok(status)) {
@@ -684,9 +654,8 @@
   for (iree_host_size_t i = 0; i < batch_count; ++i) {
     const iree_hal_task_submission_batch_t* batch = &batches[i];
     IREE_RETURN_IF_ERROR(iree_hal_task_queue_submit(
-        queue, batch->wait_semaphores, batch->signal_semaphores,
-        batch->command_buffer_count,
-        (iree_hal_resource_t* const*)batch->command_buffers,
+        queue, batch->wait_semaphores, batch->signal_semaphores, 1,
+        (iree_hal_resource_t* const*)&batch->command_buffer,
         iree_hal_task_queue_issue_cmd_allocate, (void*)batch));
   }
   return iree_ok_status();
diff --git a/runtime/src/iree/hal/drivers/local_task/task_queue.h b/runtime/src/iree/hal/drivers/local_task/task_queue.h
index 0d667ae..91065ff 100644
--- a/runtime/src/iree/hal/drivers/local_task/task_queue.h
+++ b/runtime/src/iree/hal/drivers/local_task/task_queue.h
@@ -22,22 +22,18 @@
 extern "C" {
 #endif  // __cplusplus
 
-// A single batch of command buffers submitted to a device queue.
+// A single command buffer submitted to a device queue.
 // All of the wait semaphores must reach or exceed the given payload values
-// prior to the batch beginning execution. Each command buffer begins execution
-// in the order it is present in the list, though note that the command buffers
-// execute concurrently and require internal synchronization via events if there
-// are any dependencies between them. Only after all command buffers have
+// prior to the batch beginning execution. Only after all commands have
 // completed will the signal semaphores be updated to the provided payload
 // values.
 typedef struct iree_hal_task_submission_batch_t {
   // Semaphores to wait on prior to executing any command buffer.
   iree_hal_semaphore_list_t wait_semaphores;
 
-  // Command buffers to execute, in order, and optional binding tables 1:1.
-  iree_host_size_t command_buffer_count;
-  iree_hal_command_buffer_t* const* command_buffers;
-  iree_hal_buffer_binding_table_t const* binding_tables;
+  // Command buffer to execute and optional binding table.
+  iree_hal_command_buffer_t* command_buffer;
+  iree_hal_buffer_binding_table_t binding_table;
 
   // Semaphores to signal once all command buffers have completed execution.
   iree_hal_semaphore_list_t signal_semaphores;
diff --git a/runtime/src/iree/hal/drivers/metal/direct_command_buffer.m b/runtime/src/iree/hal/drivers/metal/direct_command_buffer.m
index 16bd8c5..f5f4a32 100644
--- a/runtime/src/iree/hal/drivers/metal/direct_command_buffer.m
+++ b/runtime/src/iree/hal/drivers/metal/direct_command_buffer.m
@@ -553,8 +553,9 @@
   return iree_make_status(IREE_STATUS_UNIMPLEMENTED, "event not yet supported");
 }
 
-static iree_status_t iree_hal_metal_command_buffer_discard_buffer(
-    iree_hal_command_buffer_t* base_command_buffer, iree_hal_buffer_ref_t buffer_ref) {
+static iree_status_t iree_hal_metal_command_buffer_advise_buffer(
+    iree_hal_command_buffer_t* base_command_buffer, iree_hal_buffer_ref_t buffer_ref,
+    iree_hal_memory_advise_flags_t flags, uint64_t arg0, uint64_t arg1) {
   // This is a hint to the device and we have nothing to do for Metal.
   return iree_ok_status();
 }
@@ -620,7 +621,7 @@
 
 static iree_status_t iree_hal_metal_command_buffer_prepare_fill_buffer(
     iree_hal_command_buffer_t* base_command_buffer, iree_hal_buffer_ref_t target_ref,
-    const void* pattern, iree_host_size_t pattern_length) {
+    const void* pattern, iree_host_size_t pattern_length, iree_hal_fill_flags_t flags) {
   iree_hal_metal_command_buffer_t* command_buffer =
       iree_hal_metal_command_buffer_cast(base_command_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
@@ -764,7 +765,8 @@
 
 static iree_status_t iree_hal_metal_command_buffer_prepare_update_buffer(
     iree_hal_command_buffer_t* base_command_buffer, const void* source_buffer,
-    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref) {
+    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref,
+    iree_hal_update_flags_t flags) {
   iree_hal_metal_command_buffer_t* command_buffer =
       iree_hal_metal_command_buffer_cast(base_command_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
@@ -797,7 +799,7 @@
 
 static iree_status_t iree_hal_metal_command_buffer_prepare_copy_buffer(
     iree_hal_command_buffer_t* base_command_buffer, iree_hal_buffer_ref_t source_ref,
-    iree_hal_buffer_ref_t target_ref) {
+    iree_hal_buffer_ref_t target_ref, iree_hal_copy_flags_t flags) {
   iree_hal_metal_command_buffer_t* command_buffer =
       iree_hal_metal_command_buffer_cast(base_command_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
@@ -1068,7 +1070,7 @@
     .signal_event = iree_hal_metal_command_buffer_signal_event,
     .reset_event = iree_hal_metal_command_buffer_reset_event,
     .wait_events = iree_hal_metal_command_buffer_wait_events,
-    .discard_buffer = iree_hal_metal_command_buffer_discard_buffer,
+    .advise_buffer = iree_hal_metal_command_buffer_advise_buffer,
     .fill_buffer = iree_hal_metal_command_buffer_prepare_fill_buffer,
     .update_buffer = iree_hal_metal_command_buffer_prepare_update_buffer,
     .copy_buffer = iree_hal_metal_command_buffer_prepare_copy_buffer,
diff --git a/runtime/src/iree/hal/drivers/metal/metal_device.m b/runtime/src/iree/hal/drivers/metal/metal_device.m
index 72e09d8..4f8b4fd 100644
--- a/runtime/src/iree/hal/drivers/metal/metal_device.m
+++ b/runtime/src/iree/hal/drivers/metal/metal_device.m
@@ -347,7 +347,7 @@
     const iree_hal_semaphore_list_t wait_semaphore_list,
     const iree_hal_semaphore_list_t signal_semaphore_list, iree_hal_file_t* source_file,
     uint64_t source_offset, iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
-    iree_device_size_t length, uint32_t flags) {
+    iree_device_size_t length, iree_hal_read_flags_t flags) {
   // TODO: expose streaming chunk count/size options.
   iree_status_t loop_status = iree_ok_status();
   iree_hal_file_transfer_options_t options = {
@@ -366,7 +366,7 @@
     const iree_hal_semaphore_list_t wait_semaphore_list,
     const iree_hal_semaphore_list_t signal_semaphore_list, iree_hal_buffer_t* source_buffer,
     iree_device_size_t source_offset, iree_hal_file_t* target_file, uint64_t target_offset,
-    iree_device_size_t length, uint32_t flags) {
+    iree_device_size_t length, iree_hal_write_flags_t flags) {
   // TODO: expose streaming chunk count/size options.
   iree_status_t loop_status = iree_ok_status();
   iree_hal_file_transfer_options_t options = {
@@ -415,9 +415,8 @@
 static iree_status_t iree_hal_metal_device_queue_execute(
     iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity,
     const iree_hal_semaphore_list_t wait_semaphore_list,
-    const iree_hal_semaphore_list_t signal_semaphore_list, iree_host_size_t command_buffer_count,
-    iree_hal_command_buffer_t* const* command_buffers,
-    iree_hal_buffer_binding_table_t const* binding_tables) {
+    const iree_hal_semaphore_list_t signal_semaphore_list,
+    iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_binding_table_t binding_table) {
   iree_hal_metal_device_t* device = iree_hal_metal_device_cast(base_device);
   IREE_TRACE_ZONE_BEGIN(z0);
 
@@ -434,37 +433,30 @@
                                           signal_semaphore_list.semaphores);
   }
 
-  // Translate any deferred command buffers into real Metal command buffers.
+  // Translate deferred command buffers into real Metal command buffers.
   // We do this prior to beginning execution so that if we fail we don't leave the system in an
   // inconsistent state.
-  iree_hal_command_buffer_t** direct_command_buffers = (iree_hal_command_buffer_t**)iree_alloca(
-      command_buffer_count * sizeof(iree_hal_command_buffer_t*));
-  if (iree_status_is_ok(status)) {
-    for (iree_host_size_t i = 0; i < command_buffer_count; ++i) {
-      iree_hal_command_buffer_t* command_buffer = command_buffers[i];
-      iree_hal_command_buffer_t* direct_command_buffer = NULL;
-      if (iree_hal_deferred_command_buffer_isa(command_buffer)) {
-        // Create a temporary command buffer and replay the deferred command buffer with the
-        // binding table provided. Note that any resources used will be retained by the command
-        // buffer so we only need to retain the command buffer itself instead of the binding
-        // tables provided.
-        iree_hal_buffer_binding_table_t binding_table =
-            binding_tables ? binding_tables[i] : iree_hal_buffer_binding_table_empty();
-        @autoreleasepool {
-          status = iree_hal_metal_replay_command_buffer(device, command_buffer, binding_table,
-                                                        &direct_command_buffer);
-        }
-      } else {
-        // Retain the command buffer until the submission has completed.
-        iree_hal_command_buffer_retain(command_buffer);
-        direct_command_buffer = command_buffer;
+  iree_hal_command_buffer_t* direct_command_buffer = NULL;
+  if (iree_status_is_ok(status) && command_buffer) {
+    iree_hal_command_buffer_t* direct_command_buffer = NULL;
+    if (iree_hal_deferred_command_buffer_isa(command_buffer)) {
+      // Create a temporary command buffer and replay the deferred command buffer with the
+      // binding table provided. Note that any resources used will be retained by the command
+      // buffer so we only need to retain the command buffer itself instead of the binding
+      // tables provided.
+      @autoreleasepool {
+        status = iree_hal_metal_replay_command_buffer(device, command_buffer, binding_table,
+                                                      &direct_command_buffer);
       }
-      if (!iree_status_is_ok(status)) break;
-      status = iree_hal_resource_set_insert(resource_set, 1, &direct_command_buffer);
-      if (!iree_status_is_ok(status)) break;
-      iree_hal_command_buffer_release(direct_command_buffer);  // retained in resource set
-      direct_command_buffers[i] = direct_command_buffer;
+    } else {
+      // Retain the command buffer until the submission has completed.
+      iree_hal_command_buffer_retain(command_buffer);
+      direct_command_buffer = command_buffer;
     }
+    if (iree_status_is_ok(status)) {
+      status = iree_hal_resource_set_insert(resource_set, 1, &direct_command_buffer);
+    }
+    iree_hal_command_buffer_release(direct_command_buffer);  // retained in resource set
   }
 
   if (iree_status_is_ok(status)) {
@@ -485,16 +477,14 @@
       // Then commit all recorded compute command buffers, except the last one, which we will patch
       // up with semaphore signaling.
       id<MTLCommandBuffer> signal_command_buffer = nil;
-      for (iree_host_size_t i = 0; i < command_buffer_count; ++i) {
+      if (direct_command_buffer) {
         // NOTE: translation happens above such that we always know these are direct command
         // buffers.
         //
         // TODO(indirect-cmd): support indirect command buffers and switch here, or only use
         // indirect command buffers and assume that instead.
-        iree_hal_command_buffer_t* direct_command_buffer = direct_command_buffers[i];
         id<MTLCommandBuffer> handle =
             iree_hal_metal_direct_command_buffer_handle(direct_command_buffer);
-        if (i + 1 != command_buffer_count) [handle commit];
         signal_command_buffer = handle;
       }
       if (signal_command_buffer == nil) {
@@ -627,6 +617,9 @@
     .query_semaphore_compatibility = iree_hal_metal_device_query_semaphore_compatibility,
     .queue_alloca = iree_hal_metal_device_queue_alloca,
     .queue_dealloca = iree_hal_metal_device_queue_dealloca,
+    .queue_fill = iree_hal_device_queue_emulated_fill,
+    .queue_update = iree_hal_device_queue_emulated_update,
+    .queue_copy = iree_hal_device_queue_emulated_copy,
     .queue_read = iree_hal_metal_device_queue_read,
     .queue_write = iree_hal_metal_device_queue_write,
     .queue_execute = iree_hal_metal_device_queue_execute,
diff --git a/runtime/src/iree/hal/drivers/null/command_buffer.c b/runtime/src/iree/hal/drivers/null/command_buffer.c
index 4f8fe82..16b33e9 100644
--- a/runtime/src/iree/hal/drivers/null/command_buffer.c
+++ b/runtime/src/iree/hal/drivers/null/command_buffer.c
@@ -220,9 +220,10 @@
   return status;
 }
 
-static iree_status_t iree_hal_null_command_buffer_discard_buffer(
+static iree_status_t iree_hal_null_command_buffer_advise_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
-    iree_hal_buffer_ref_t buffer_ref) {
+    iree_hal_buffer_ref_t buffer_ref, iree_hal_memory_advise_flags_t flags,
+    uint64_t arg0, uint64_t arg1) {
   iree_hal_null_command_buffer_t* command_buffer =
       iree_hal_null_command_buffer_cast(base_command_buffer);
 
@@ -241,7 +242,7 @@
 static iree_status_t iree_hal_null_command_buffer_fill_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
     iree_hal_buffer_ref_t target_ref, const void* pattern,
-    iree_host_size_t pattern_length) {
+    iree_host_size_t pattern_length, iree_hal_fill_flags_t flags) {
   iree_hal_null_command_buffer_t* command_buffer =
       iree_hal_null_command_buffer_cast(base_command_buffer);
 
@@ -257,7 +258,8 @@
 
 static iree_status_t iree_hal_null_command_buffer_update_buffer(
     iree_hal_command_buffer_t* base_command_buffer, const void* source_buffer,
-    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref) {
+    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref,
+    iree_hal_update_flags_t flags) {
   iree_hal_null_command_buffer_t* command_buffer =
       iree_hal_null_command_buffer_cast(base_command_buffer);
 
@@ -275,7 +277,8 @@
 
 static iree_status_t iree_hal_null_command_buffer_copy_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
-    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref) {
+    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref,
+    iree_hal_copy_flags_t flags) {
   iree_hal_null_command_buffer_t* command_buffer =
       iree_hal_null_command_buffer_cast(base_command_buffer);
 
@@ -361,7 +364,7 @@
         .signal_event = iree_hal_null_command_buffer_signal_event,
         .reset_event = iree_hal_null_command_buffer_reset_event,
         .wait_events = iree_hal_null_command_buffer_wait_events,
-        .discard_buffer = iree_hal_null_command_buffer_discard_buffer,
+        .advise_buffer = iree_hal_null_command_buffer_advise_buffer,
         .fill_buffer = iree_hal_null_command_buffer_fill_buffer,
         .update_buffer = iree_hal_null_command_buffer_update_buffer,
         .copy_buffer = iree_hal_null_command_buffer_copy_buffer,
diff --git a/runtime/src/iree/hal/drivers/null/device.c b/runtime/src/iree/hal/drivers/null/device.c
index ce12240..1195364 100644
--- a/runtime/src/iree/hal/drivers/null/device.c
+++ b/runtime/src/iree/hal/drivers/null/device.c
@@ -357,13 +357,64 @@
   return status;
 }
 
+static iree_status_t iree_hal_null_device_queue_fill(
+    iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity,
+    const iree_hal_semaphore_list_t wait_semaphore_list,
+    const iree_hal_semaphore_list_t signal_semaphore_list,
+    iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+    iree_device_size_t length, const void* pattern,
+    iree_host_size_t pattern_length, iree_hal_fill_flags_t flags) {
+  // TODO(null): if a native queue fill operation is available use that instead.
+  // The emulated fill creates a command buffer and executes it and it's best if
+  // the extra recording/upload/allocation time can be avoided.
+  return iree_hal_device_queue_emulated_fill(
+      base_device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
+      target_buffer, target_offset, length, pattern, pattern_length, flags);
+}
+
+static iree_status_t iree_hal_null_device_queue_update(
+    iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity,
+    const iree_hal_semaphore_list_t wait_semaphore_list,
+    const iree_hal_semaphore_list_t signal_semaphore_list,
+    const void* source_buffer, iree_host_size_t source_offset,
+    iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+    iree_device_size_t length, iree_hal_update_flags_t flags) {
+  // TODO(null): if a native queue update operation is available use that
+  // instead. The emulated update creates a command buffer and executes it and
+  // it's best if the extra recording/upload/allocation time can be avoided.
+  // Since command buffers have a limited capacity for embedded data the
+  // emulated version may need to allocate buffers, split the update into
+  // multiple commands, or commit other sins a native implementation would be
+  // able to avoid.
+  return iree_hal_device_queue_emulated_update(
+      base_device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
+      source_buffer, source_offset, target_buffer, target_offset, length,
+      flags);
+}
+
+static iree_status_t iree_hal_null_device_queue_copy(
+    iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity,
+    const iree_hal_semaphore_list_t wait_semaphore_list,
+    const iree_hal_semaphore_list_t signal_semaphore_list,
+    iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
+    iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
+    iree_device_size_t length, iree_hal_copy_flags_t flags) {
+  // TODO(null): if a native queue copy operation is available use that instead.
+  // The emulated copy creates a command buffer and executes it and it's best if
+  // the extra recording/upload/allocation time can be avoided.
+  return iree_hal_device_queue_emulated_copy(
+      base_device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
+      source_buffer, source_offset, target_buffer, target_offset, length,
+      flags);
+}
+
 static iree_status_t iree_hal_null_device_queue_read(
     iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity,
     const iree_hal_semaphore_list_t wait_semaphore_list,
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_file_t* source_file, uint64_t source_offset,
     iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
-    iree_device_size_t length, uint32_t flags) {
+    iree_device_size_t length, iree_hal_read_flags_t flags) {
   // TODO(null): if native support for file operations are available then
   // definitely prefer those over the emulated implementation provided here by
   // default. The implementation performs allocations, creates semaphores, and
@@ -389,7 +440,7 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
     iree_hal_file_t* target_file, uint64_t target_offset,
-    iree_device_size_t length, uint32_t flags) {
+    iree_device_size_t length, iree_hal_write_flags_t flags) {
   // TODO(null): if native support for file operations are available then
   // definitely prefer those over the emulated implementation provided here by
   // default. The implementation performs allocations, creates semaphores, and
@@ -413,9 +464,8 @@
     iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity,
     const iree_hal_semaphore_list_t wait_semaphore_list,
     const iree_hal_semaphore_list_t signal_semaphore_list,
-    iree_host_size_t command_buffer_count,
-    iree_hal_command_buffer_t* const* command_buffers,
-    iree_hal_buffer_binding_table_t const* binding_tables) {
+    iree_hal_command_buffer_t* command_buffer,
+    iree_hal_buffer_binding_table_t binding_table) {
   iree_hal_null_device_t* device = iree_hal_null_device_cast(base_device);
 
   // TODO(null): implement a wait, execute, and signal queue operation. The
@@ -423,14 +473,11 @@
   // are to be used when executing and it can be assumed that all resources
   // required for execution are accessible on those queues. If more than one
   // queue is specified the implementation may use any it prefers from the set.
-  // If more than one command buffer is provided it is expected that they are
-  // executed in order on the same queue.
 
-  // TODO(null): optional binding tables matching 1:1 with the command buffers
-  // are provided for any indirect command buffers (those who have a
-  // binding_capacity > 0). The binding tables must be captured by the
-  // implementation as they may be mutated or freed by the caller immediately
-  // after this call returns.
+  // TODO(null): an optional binding table is provided for indirect command
+  // buffers (those who have a binding_capacity > 0). The binding table must be
+  // captured by the implementation as they may be mutated or freed by the
+  // caller immediately after this call returns.
 
   // TODO(null): do this async - callers may be submitting work to multiple
   // devices or queues on the same device from the same thread and blocking here
@@ -552,6 +599,9 @@
         iree_hal_null_device_query_semaphore_compatibility,
     .queue_alloca = iree_hal_null_device_queue_alloca,
     .queue_dealloca = iree_hal_null_device_queue_dealloca,
+    .queue_fill = iree_hal_null_device_queue_fill,
+    .queue_update = iree_hal_null_device_queue_update,
+    .queue_copy = iree_hal_null_device_queue_copy,
     .queue_read = iree_hal_null_device_queue_read,
     .queue_write = iree_hal_null_device_queue_write,
     .queue_execute = iree_hal_null_device_queue_execute,
diff --git a/runtime/src/iree/hal/drivers/vulkan/direct_command_buffer.cc b/runtime/src/iree/hal/drivers/vulkan/direct_command_buffer.cc
index 20782fd..b6f91d4 100644
--- a/runtime/src/iree/hal/drivers/vulkan/direct_command_buffer.cc
+++ b/runtime/src/iree/hal/drivers/vulkan/direct_command_buffer.cc
@@ -495,9 +495,10 @@
   return iree_ok_status();
 }
 
-static iree_status_t iree_hal_vulkan_direct_command_buffer_discard_buffer(
+static iree_status_t iree_hal_vulkan_direct_command_buffer_advise_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
-    iree_hal_buffer_ref_t buffer_ref) {
+    iree_hal_buffer_ref_t buffer_ref, iree_hal_memory_advise_flags_t flags,
+    uint64_t arg0, uint64_t arg1) {
   // NOTE: we could use this to prevent queue family transitions.
   return iree_ok_status();
 }
@@ -527,7 +528,7 @@
 static iree_status_t iree_hal_vulkan_direct_command_buffer_fill_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
     iree_hal_buffer_ref_t target_ref, const void* pattern,
-    iree_host_size_t pattern_length) {
+    iree_host_size_t pattern_length, iree_hal_fill_flags_t flags) {
   iree_hal_vulkan_direct_command_buffer_t* command_buffer =
       iree_hal_vulkan_direct_command_buffer_cast(base_command_buffer);
   VkBuffer target_device_buffer =
@@ -590,7 +591,8 @@
 
 static iree_status_t iree_hal_vulkan_direct_command_buffer_update_buffer(
     iree_hal_command_buffer_t* base_command_buffer, const void* source_buffer,
-    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref) {
+    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref,
+    iree_hal_update_flags_t flags) {
   iree_hal_vulkan_direct_command_buffer_t* command_buffer =
       iree_hal_vulkan_direct_command_buffer_cast(base_command_buffer);
   VkBuffer target_device_buffer =
@@ -631,7 +633,8 @@
 
 static iree_status_t iree_hal_vulkan_direct_command_buffer_copy_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
-    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref) {
+    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref,
+    iree_hal_copy_flags_t flags) {
   iree_hal_vulkan_direct_command_buffer_t* command_buffer =
       iree_hal_vulkan_direct_command_buffer_cast(base_command_buffer);
   VkBuffer source_device_buffer =
@@ -817,8 +820,8 @@
         iree_hal_vulkan_direct_command_buffer_signal_event,
         /*.reset_event=*/iree_hal_vulkan_direct_command_buffer_reset_event,
         /*.wait_events=*/iree_hal_vulkan_direct_command_buffer_wait_events,
-        /*.discard_buffer=*/
-        iree_hal_vulkan_direct_command_buffer_discard_buffer,
+        /*.advise_buffer=*/
+        iree_hal_vulkan_direct_command_buffer_advise_buffer,
         /*.fill_buffer=*/iree_hal_vulkan_direct_command_buffer_fill_buffer,
         /*.update_buffer=*/
         iree_hal_vulkan_direct_command_buffer_update_buffer,
diff --git a/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc b/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc
index 3b7192d..6db27bc 100644
--- a/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc
+++ b/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc
@@ -1666,7 +1666,7 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_file_t* source_file, uint64_t source_offset,
     iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
-    iree_device_size_t length, uint32_t flags) {
+    iree_device_size_t length, iree_hal_read_flags_t flags) {
   // TODO: expose streaming chunk count/size options.
   iree_status_t loop_status = iree_ok_status();
   iree_hal_file_transfer_options_t options = {
@@ -1687,7 +1687,7 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
     iree_hal_file_t* target_file, uint64_t target_offset,
-    iree_device_size_t length, uint32_t flags) {
+    iree_device_size_t length, iree_hal_write_flags_t flags) {
   // TODO: expose streaming chunk count/size options.
   iree_status_t loop_status = iree_ok_status();
   iree_hal_file_transfer_options_t options = {
@@ -1706,9 +1706,8 @@
     iree_hal_device_t* base_device, iree_hal_queue_affinity_t queue_affinity,
     const iree_hal_semaphore_list_t wait_semaphore_list,
     const iree_hal_semaphore_list_t signal_semaphore_list,
-    iree_host_size_t command_buffer_count,
-    iree_hal_command_buffer_t* const* command_buffers,
-    iree_hal_buffer_binding_table_t const* binding_tables) {
+    iree_hal_command_buffer_t* command_buffer,
+    iree_hal_buffer_binding_table_t binding_table) {
   iree_hal_vulkan_device_t* device = iree_hal_vulkan_device_cast(base_device);
 
   // NOTE: today we are not discriminating queues based on command type.
@@ -1720,23 +1719,10 @@
   // buffers on demand here. When we natively support them we'll still need to
   // process the binding table prior to submission but that can be done in a
   // much more lightweight way depending on our concurrency needs.
-  if (IREE_UNLIKELY(command_buffer_count > 32)) {
-    // Guard the stack allocation, yuck.
-    return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
-                            "currently limited to a reasonable number of "
-                            "command buffers per submission");
-  }
-  iree_hal_command_buffer_t** translated_command_buffers =
-      (iree_hal_command_buffer_t**)iree_alloca(
-          sizeof(iree_hal_command_buffer_t*) * command_buffer_count);
+  iree_hal_command_buffer_t* translated_command_buffer = NULL;
   iree_status_t status = iree_ok_status();
-  for (iree_host_size_t i = 0; i < command_buffer_count; ++i) {
-    iree_hal_command_buffer_t* command_buffer = command_buffers[i];
-    if (iree_hal_deferred_command_buffer_isa(command_buffers[i])) {
-      iree_hal_command_buffer_t* translated_command_buffer = NULL;
-      iree_hal_buffer_binding_table_t binding_table =
-          binding_tables ? binding_tables[i]
-                         : iree_hal_buffer_binding_table_empty();
+  if (command_buffer != NULL) {
+    if (iree_hal_deferred_command_buffer_isa(command_buffer)) {
       status = iree_hal_vulkan_device_create_command_buffer(
           base_device,
           iree_hal_command_buffer_mode(command_buffer) |
@@ -1752,9 +1738,8 @@
         status = iree_hal_deferred_command_buffer_apply(
             command_buffer, translated_command_buffer, binding_table);
       }
-      translated_command_buffers[i] = translated_command_buffer;
     } else {
-      translated_command_buffers[i] = command_buffer;
+      translated_command_buffer = command_buffer;
       iree_hal_command_buffer_retain(command_buffer);
     }
   }
@@ -1762,8 +1747,10 @@
   if (iree_status_is_ok(status)) {
     iree_hal_vulkan_submission_batch_t batch = {
         /*.wait_semaphores=*/wait_semaphore_list,
-        /*.command_buffer_count=*/command_buffer_count,
-        /*.command_buffers=*/translated_command_buffers,
+        /*.command_buffer_count=*/
+        (iree_host_size_t)(translated_command_buffer ? 1 : 0),
+        /*.command_buffers=*/
+        translated_command_buffer ? &translated_command_buffer : NULL,
         /*.signal_semaphores=*/signal_semaphore_list,
     };
     status = queue->Submit(1, &batch);
@@ -1777,9 +1764,7 @@
 
   // TODO(indirect-cmd): when async these need to be retained until the
   // submission completes.
-  for (iree_host_size_t i = 0; i < command_buffer_count; ++i) {
-    iree_hal_command_buffer_release(translated_command_buffers[i]);
-  }
+  iree_hal_command_buffer_release(translated_command_buffer);
 
   return status;
 }
@@ -1907,6 +1892,9 @@
     iree_hal_vulkan_device_query_semaphore_compatibility,
     /*.queue_alloca=*/iree_hal_vulkan_device_queue_alloca,
     /*.queue_dealloca=*/iree_hal_vulkan_device_queue_dealloca,
+    /*.queue_fill=*/iree_hal_device_queue_emulated_fill,
+    /*.queue_update=*/iree_hal_device_queue_emulated_update,
+    /*.queue_copy=*/iree_hal_device_queue_emulated_copy,
     /*.queue_read=*/iree_hal_vulkan_device_queue_read,
     /*.queue_write=*/iree_hal_vulkan_device_queue_write,
     /*.queue_execute=*/iree_hal_vulkan_device_queue_execute,
diff --git a/runtime/src/iree/hal/local/inline_command_buffer.c b/runtime/src/iree/hal/local/inline_command_buffer.c
index 7ea8513..a28417f 100644
--- a/runtime/src/iree/hal/local/inline_command_buffer.c
+++ b/runtime/src/iree/hal/local/inline_command_buffer.c
@@ -285,12 +285,13 @@
 }
 
 //===----------------------------------------------------------------------===//
-// iree_hal_command_buffer_discard_buffer
+// iree_hal_command_buffer_advise_buffer
 //===----------------------------------------------------------------------===//
 
-static iree_status_t iree_hal_inline_command_buffer_discard_buffer(
+static iree_status_t iree_hal_inline_command_buffer_advise_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
-    iree_hal_buffer_ref_t buffer_ref) {
+    iree_hal_buffer_ref_t buffer_ref, iree_hal_memory_advise_flags_t flags,
+    uint64_t arg0, uint64_t arg1) {
   // Could be treated as a cache invalidation as it indicates we won't be using
   // the existing buffer contents again.
   return iree_ok_status();
@@ -303,7 +304,7 @@
 static iree_status_t iree_hal_inline_command_buffer_fill_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
     iree_hal_buffer_ref_t target_ref, const void* pattern,
-    iree_host_size_t pattern_length) {
+    iree_host_size_t pattern_length, iree_hal_fill_flags_t flags) {
   return iree_hal_buffer_map_fill(target_ref.buffer, target_ref.offset,
                                   target_ref.length, pattern, pattern_length);
 }
@@ -314,7 +315,8 @@
 
 static iree_status_t iree_hal_inline_command_buffer_update_buffer(
     iree_hal_command_buffer_t* base_command_buffer, const void* source_buffer,
-    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref) {
+    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref,
+    iree_hal_update_flags_t flags) {
   return iree_hal_buffer_map_write(
       target_ref.buffer, target_ref.offset,
       (const uint8_t*)source_buffer + source_offset, target_ref.length);
@@ -326,7 +328,8 @@
 
 static iree_status_t iree_hal_inline_command_buffer_copy_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
-    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref) {
+    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref,
+    iree_hal_copy_flags_t flags) {
   return iree_hal_buffer_map_copy(source_ref.buffer, source_ref.offset,
                                   target_ref.buffer, target_ref.offset,
                                   target_ref.length);
@@ -503,7 +506,7 @@
         .signal_event = iree_hal_inline_command_buffer_signal_event,
         .reset_event = iree_hal_inline_command_buffer_reset_event,
         .wait_events = iree_hal_inline_command_buffer_wait_events,
-        .discard_buffer = iree_hal_inline_command_buffer_discard_buffer,
+        .advise_buffer = iree_hal_inline_command_buffer_advise_buffer,
         .fill_buffer = iree_hal_inline_command_buffer_fill_buffer,
         .update_buffer = iree_hal_inline_command_buffer_update_buffer,
         .copy_buffer = iree_hal_inline_command_buffer_copy_buffer,
diff --git a/runtime/src/iree/hal/utils/debug_allocator.c b/runtime/src/iree/hal/utils/debug_allocator.c
index b0be6d8..6389d8b 100644
--- a/runtime/src/iree/hal/utils/debug_allocator.c
+++ b/runtime/src/iree/hal/utils/debug_allocator.c
@@ -170,7 +170,7 @@
     };
     status = iree_hal_device_queue_execute(
         device, IREE_HAL_QUEUE_AFFINITY_ANY, iree_hal_semaphore_list_empty(),
-        signal_list, 1, &command_buffer, /*binding_tables=*/NULL);
+        signal_list, command_buffer, iree_hal_buffer_binding_table_empty());
   }
 
   if (iree_status_is_ok(status)) {
diff --git a/runtime/src/iree/hal/utils/deferred_command_buffer.c b/runtime/src/iree/hal/utils/deferred_command_buffer.c
index 7206254..939cddc 100644
--- a/runtime/src/iree/hal/utils/deferred_command_buffer.c
+++ b/runtime/src/iree/hal/utils/deferred_command_buffer.c
@@ -18,7 +18,7 @@
   IREE_HAL_CMD_SIGNAL_EVENT,
   IREE_HAL_CMD_RESET_EVENT,
   IREE_HAL_CMD_WAIT_EVENTS,
-  IREE_HAL_CMD_DISCARD_BUFFER,
+  IREE_HAL_CMD_ADVISE_BUFFER,
   IREE_HAL_CMD_FILL_BUFFER,
   IREE_HAL_CMD_UPDATE_BUFFER,
   IREE_HAL_CMD_COPY_BUFFER,
@@ -433,17 +433,21 @@
 }
 
 //===----------------------------------------------------------------------===//
-// IREE_HAL_CMD_DISCARD_BUFFER
+// IREE_HAL_CMD_ADVISE_BUFFER
 //===----------------------------------------------------------------------===//
 
-typedef struct iree_hal_cmd_discard_buffer_t {
+typedef struct iree_hal_cmd_advise_buffer_t {
   iree_hal_cmd_header_t header;
   iree_hal_buffer_ref_t buffer_ref;
-} iree_hal_cmd_discard_buffer_t;
+  iree_hal_memory_advise_flags_t flags;
+  uint64_t arg0;
+  uint64_t arg1;
+} iree_hal_cmd_advise_buffer_t;
 
-static iree_status_t iree_hal_deferred_command_buffer_discard_buffer(
+static iree_status_t iree_hal_deferred_command_buffer_advise_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
-    iree_hal_buffer_ref_t buffer_ref) {
+    iree_hal_buffer_ref_t buffer_ref, iree_hal_memory_advise_flags_t flags,
+    uint64_t arg0, uint64_t arg1) {
   iree_hal_deferred_command_buffer_t* command_buffer =
       iree_hal_deferred_command_buffer_cast(base_command_buffer);
   iree_hal_cmd_list_t* cmd_list = &command_buffer->cmd_list;
@@ -451,22 +455,25 @@
     IREE_RETURN_IF_ERROR(iree_hal_resource_set_insert(
         command_buffer->resource_set, 1, &buffer_ref.buffer));
   }
-  iree_hal_cmd_discard_buffer_t* cmd = NULL;
+  iree_hal_cmd_advise_buffer_t* cmd = NULL;
   IREE_RETURN_IF_ERROR(iree_hal_cmd_list_append_command(
-      cmd_list, IREE_HAL_CMD_DISCARD_BUFFER, sizeof(*cmd), (void**)&cmd));
+      cmd_list, IREE_HAL_CMD_ADVISE_BUFFER, sizeof(*cmd), (void**)&cmd));
   cmd->buffer_ref = buffer_ref;
+  cmd->flags = flags;
+  cmd->arg0 = arg0;
+  cmd->arg1 = arg1;
   return iree_ok_status();
 }
 
-static iree_status_t iree_hal_deferred_command_buffer_apply_discard_buffer(
+static iree_status_t iree_hal_deferred_command_buffer_apply_advise_buffer(
     iree_hal_command_buffer_t* target_command_buffer,
     iree_hal_buffer_binding_table_t binding_table,
-    const iree_hal_cmd_discard_buffer_t* cmd) {
+    const iree_hal_cmd_advise_buffer_t* cmd) {
   iree_hal_buffer_ref_t buffer_ref;
   IREE_RETURN_IF_ERROR(iree_hal_buffer_binding_table_resolve_ref(
       binding_table, cmd->buffer_ref, &buffer_ref));
-  return iree_hal_command_buffer_discard_buffer(target_command_buffer,
-                                                buffer_ref);
+  return iree_hal_command_buffer_advise_buffer(
+      target_command_buffer, buffer_ref, cmd->flags, cmd->arg0, cmd->arg1);
 }
 
 //===----------------------------------------------------------------------===//
@@ -478,12 +485,13 @@
   iree_hal_buffer_ref_t target_ref;
   uint64_t pattern;
   iree_host_size_t pattern_length;
+  iree_hal_fill_flags_t flags;
 } iree_hal_cmd_fill_buffer_t;
 
 static iree_status_t iree_hal_deferred_command_buffer_fill_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
     iree_hal_buffer_ref_t target_ref, const void* pattern,
-    iree_host_size_t pattern_length) {
+    iree_host_size_t pattern_length, iree_hal_fill_flags_t flags) {
   iree_hal_deferred_command_buffer_t* command_buffer =
       iree_hal_deferred_command_buffer_cast(base_command_buffer);
   iree_hal_cmd_list_t* cmd_list = &command_buffer->cmd_list;
@@ -501,6 +509,7 @@
   cmd->target_ref = target_ref;
   memcpy(&cmd->pattern, pattern, pattern_length);
   cmd->pattern_length = pattern_length;
+  cmd->flags = flags;
   return iree_ok_status();
 }
 
@@ -513,7 +522,7 @@
       binding_table, cmd->target_ref, &target_ref));
   return iree_hal_command_buffer_fill_buffer(target_command_buffer, target_ref,
                                              (void**)&cmd->pattern,
-                                             cmd->pattern_length);
+                                             cmd->pattern_length, cmd->flags);
 }
 
 //===----------------------------------------------------------------------===//
@@ -523,12 +532,14 @@
 typedef struct iree_hal_cmd_update_buffer_t {
   iree_hal_cmd_header_t header;
   iree_hal_buffer_ref_t target_ref;
+  iree_hal_update_flags_t flags;
   uint8_t source_buffer[];
 } iree_hal_cmd_update_buffer_t;
 
 static iree_status_t iree_hal_deferred_command_buffer_update_buffer(
     iree_hal_command_buffer_t* base_command_buffer, const void* source_buffer,
-    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref) {
+    iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref,
+    iree_hal_update_flags_t flags) {
   iree_hal_deferred_command_buffer_t* command_buffer =
       iree_hal_deferred_command_buffer_cast(base_command_buffer);
   iree_hal_cmd_list_t* cmd_list = &command_buffer->cmd_list;
@@ -542,6 +553,7 @@
       sizeof(*cmd) + sizeof(cmd->source_buffer[0]) * target_ref.length,
       (void**)&cmd));
   cmd->target_ref = target_ref;
+  cmd->flags = flags;
   memcpy(cmd->source_buffer, (const uint8_t*)source_buffer + source_offset,
          sizeof(cmd->source_buffer[0]) * target_ref.length);
   return iree_ok_status();
@@ -555,7 +567,7 @@
   IREE_RETURN_IF_ERROR(iree_hal_buffer_binding_table_resolve_ref(
       binding_table, cmd->target_ref, &target_ref));
   return iree_hal_command_buffer_update_buffer(
-      target_command_buffer, cmd->source_buffer, 0, target_ref);
+      target_command_buffer, cmd->source_buffer, 0, target_ref, cmd->flags);
 }
 
 //===----------------------------------------------------------------------===//
@@ -566,11 +578,13 @@
   iree_hal_cmd_header_t header;
   iree_hal_buffer_ref_t source_ref;
   iree_hal_buffer_ref_t target_ref;
+  iree_hal_copy_flags_t flags;
 } iree_hal_cmd_copy_buffer_t;
 
 static iree_status_t iree_hal_deferred_command_buffer_copy_buffer(
     iree_hal_command_buffer_t* base_command_buffer,
-    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref) {
+    iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref,
+    iree_hal_copy_flags_t flags) {
   iree_hal_deferred_command_buffer_t* command_buffer =
       iree_hal_deferred_command_buffer_cast(base_command_buffer);
   iree_hal_cmd_list_t* cmd_list = &command_buffer->cmd_list;
@@ -591,6 +605,7 @@
       cmd_list, IREE_HAL_CMD_COPY_BUFFER, sizeof(*cmd), (void**)&cmd));
   cmd->source_ref = source_ref;
   cmd->target_ref = target_ref;
+  cmd->flags = flags;
   return iree_ok_status();
 }
 
@@ -605,7 +620,7 @@
   IREE_RETURN_IF_ERROR(iree_hal_buffer_binding_table_resolve_ref(
       binding_table, cmd->target_ref, &target_ref));
   return iree_hal_command_buffer_copy_buffer(target_command_buffer, source_ref,
-                                             target_ref);
+                                             target_ref, cmd->flags);
 }
 
 //===----------------------------------------------------------------------===//
@@ -832,8 +847,8 @@
         iree_hal_deferred_command_buffer_apply_reset_event,
     [IREE_HAL_CMD_WAIT_EVENTS] = (iree_hal_cmd_apply_fn_t)
         iree_hal_deferred_command_buffer_apply_wait_events,
-    [IREE_HAL_CMD_DISCARD_BUFFER] = (iree_hal_cmd_apply_fn_t)
-        iree_hal_deferred_command_buffer_apply_discard_buffer,
+    [IREE_HAL_CMD_ADVISE_BUFFER] = (iree_hal_cmd_apply_fn_t)
+        iree_hal_deferred_command_buffer_apply_advise_buffer,
     [IREE_HAL_CMD_FILL_BUFFER] = (iree_hal_cmd_apply_fn_t)
         iree_hal_deferred_command_buffer_apply_fill_buffer,
     [IREE_HAL_CMD_UPDATE_BUFFER] = (iree_hal_cmd_apply_fn_t)
@@ -894,7 +909,7 @@
         .signal_event = iree_hal_deferred_command_buffer_signal_event,
         .reset_event = iree_hal_deferred_command_buffer_reset_event,
         .wait_events = iree_hal_deferred_command_buffer_wait_events,
-        .discard_buffer = iree_hal_deferred_command_buffer_discard_buffer,
+        .advise_buffer = iree_hal_deferred_command_buffer_advise_buffer,
         .fill_buffer = iree_hal_deferred_command_buffer_fill_buffer,
         .update_buffer = iree_hal_deferred_command_buffer_update_buffer,
         .copy_buffer = iree_hal_deferred_command_buffer_copy_buffer,
diff --git a/runtime/src/iree/hal/utils/file_transfer.c b/runtime/src/iree/hal/utils/file_transfer.c
index 2bc8dec..193a2f5 100644
--- a/runtime/src/iree/hal/utils/file_transfer.c
+++ b/runtime/src/iree/hal/utils/file_transfer.c
@@ -524,7 +524,8 @@
         operation->device, operation->queue_affinity, wait_semaphore_list,
         signal_semaphore_list, operation->staging_buffer,
         worker->staging_buffer_offset, operation->buffer,
-        operation->buffer_offset + transfer_offset, transfer_length);
+        operation->buffer_offset + transfer_offset, transfer_length,
+        IREE_HAL_COPY_FLAG_NONE);
   }
 
   // Wait for the copy to complete and tick again if we expect there to be more
@@ -688,7 +689,7 @@
       operation->device, operation->queue_affinity, wait_semaphore_list,
       signal_semaphore_list, operation->buffer,
       operation->buffer_offset + transfer_offset, operation->staging_buffer,
-      worker->staging_buffer_offset, transfer_length);
+      worker->staging_buffer_offset, transfer_length, IREE_HAL_COPY_FLAG_NONE);
 
   // Wait for the copy to complete so we can write it to the file.
   if (iree_status_is_ok(status)) {
@@ -860,7 +861,7 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_file_t* source_file, uint64_t source_offset,
     iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
-    iree_device_size_t length, uint32_t flags,
+    iree_device_size_t length, iree_hal_read_flags_t flags,
     iree_hal_file_transfer_options_t options) {
   IREE_RETURN_IF_ERROR(
       iree_hal_file_validate_access(source_file, IREE_HAL_MEMORY_ACCESS_READ));
@@ -872,7 +873,7 @@
     return iree_hal_device_queue_copy(
         device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
         storage_buffer, (iree_device_size_t)source_offset, target_buffer,
-        target_offset, length);
+        target_offset, length, IREE_HAL_COPY_FLAG_NONE);
   }
 
   // Allocate full transfer operation.
@@ -900,7 +901,7 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
     iree_hal_file_t* target_file, uint64_t target_offset,
-    iree_device_size_t length, uint32_t flags,
+    iree_device_size_t length, iree_hal_write_flags_t flags,
     iree_hal_file_transfer_options_t options) {
   // EXPERIMENTAL: assume memory files only today (as that's all we have).
   IREE_RETURN_IF_ERROR(
@@ -913,7 +914,7 @@
     return iree_hal_device_queue_copy(
         device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
         source_buffer, source_offset, storage_buffer,
-        (iree_device_size_t)target_offset, length);
+        (iree_device_size_t)target_offset, length, IREE_HAL_COPY_FLAG_NONE);
   }
 
   // Allocate full transfer operation.
diff --git a/runtime/src/iree/hal/utils/file_transfer.h b/runtime/src/iree/hal/utils/file_transfer.h
index cf85099..ece694b 100644
--- a/runtime/src/iree/hal/utils/file_transfer.h
+++ b/runtime/src/iree/hal/utils/file_transfer.h
@@ -60,7 +60,7 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_file_t* source_file, uint64_t source_offset,
     iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
-    iree_device_size_t length, uint32_t flags,
+    iree_device_size_t length, iree_hal_read_flags_t flags,
     iree_hal_file_transfer_options_t options);
 
 // EXPERIMENTAL: eventually we'll focus this only on emulating support where
@@ -83,7 +83,7 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
     iree_hal_file_t* target_file, uint64_t target_offset,
-    iree_device_size_t length, uint32_t flags,
+    iree_device_size_t length, iree_hal_write_flags_t flags,
     iree_hal_file_transfer_options_t options);
 
 #ifdef __cplusplus
diff --git a/runtime/src/iree/io/parameter_index_provider.c b/runtime/src/iree/io/parameter_index_provider.c
index 75ed9c5..9e9a5f4 100644
--- a/runtime/src/iree/io/parameter_index_provider.c
+++ b/runtime/src/iree/io/parameter_index_provider.c
@@ -512,7 +512,7 @@
       z0, iree_hal_command_buffer_fill_buffer(
               batch->transfer_command_buffer,
               iree_hal_make_buffer_ref(buffer, buffer_offset, length), pattern,
-              pattern_length));
+              pattern_length, IREE_HAL_FILL_FLAG_NONE));
 
   IREE_TRACE_ZONE_END(z0);
   return iree_ok_status();
@@ -523,7 +523,7 @@
     iree_io_parameter_op_batch_t* batch, iree_hal_file_t* source_file,
     uint64_t source_file_offset, iree_hal_buffer_t* target_buffer,
     iree_device_size_t target_buffer_offset, iree_device_size_t length,
-    uint32_t flags) {
+    iree_hal_read_flags_t flags) {
   IREE_ASSERT_ARGUMENT(batch);
   IREE_ASSERT_ARGUMENT(source_file);
   IREE_ASSERT_ARGUMENT(target_buffer);
@@ -546,7 +546,8 @@
 static iree_status_t iree_io_parameter_op_batch_enqueue_file_write(
     iree_io_parameter_op_batch_t* batch, iree_hal_buffer_t* source_buffer,
     iree_device_size_t source_buffer_offset, iree_hal_file_t* target_file,
-    uint64_t target_file_offset, iree_device_size_t length, uint32_t flags) {
+    uint64_t target_file_offset, iree_device_size_t length,
+    iree_hal_write_flags_t flags) {
   IREE_ASSERT_ARGUMENT(batch);
   IREE_ASSERT_ARGUMENT(source_buffer);
   IREE_ASSERT_ARGUMENT(target_file);
@@ -591,8 +592,8 @@
     if (iree_status_is_ok(status)) {
       status = iree_hal_device_queue_execute(
           batch->device, batch->queue_affinity, step.wait_semaphore_list,
-          step.signal_semaphore_list, 1, &batch->transfer_command_buffer,
-          /*binding_tables=*/NULL);
+          step.signal_semaphore_list, batch->transfer_command_buffer,
+          iree_hal_buffer_binding_table_empty());
     }
     IREE_TRACE_ZONE_END(z_transfer);
   }
diff --git a/runtime/src/iree/modules/check/module.cc b/runtime/src/iree/modules/check/module.cc
index 0dfe9e1..9ffd6cc 100644
--- a/runtime/src/iree/modules/check/module.cc
+++ b/runtime/src/iree/modules/check/module.cc
@@ -207,7 +207,8 @@
     IREE_RETURN_IF_ERROR(iree_hal_command_buffer_copy_buffer(
         command_buffer.get(),
         iree_hal_make_buffer_ref(source_buffer, 0, buffer_length),
-        iree_hal_make_buffer_ref(target_buffer.get(), 0, buffer_length)));
+        iree_hal_make_buffer_ref(target_buffer.get(), 0, buffer_length),
+        IREE_HAL_COPY_FLAG_NONE));
     vm::ref<iree_hal_buffer_view_t> target_view;
     IREE_RETURN_IF_ERROR(iree_hal_buffer_view_create_like(
         target_buffer.get(), source_views[i].get(),
@@ -224,8 +225,8 @@
       semaphore.get(), 1ull, iree_hal_device_host_allocator(device), &fence));
   IREE_RETURN_IF_ERROR(iree_hal_device_queue_execute(
       device, IREE_HAL_QUEUE_AFFINITY_ANY, iree_hal_semaphore_list_empty(),
-      iree_hal_fence_semaphore_list(fence.get()), 1, &command_buffer,
-      /*binding_tables=*/NULL));
+      iree_hal_fence_semaphore_list(fence.get()), command_buffer.get(),
+      iree_hal_buffer_binding_table_empty()));
   IREE_RETURN_IF_ERROR(
       iree_hal_fence_wait(fence.get(), iree_infinite_timeout()));
   return std::move(target_views);
diff --git a/runtime/src/iree/modules/hal/module.c b/runtime/src/iree/modules/hal/module.c
index 38a95c6..2aac43f 100644
--- a/runtime/src/iree/modules/hal/module.c
+++ b/runtime/src/iree/modules/hal/module.c
@@ -834,9 +834,9 @@
       iree_hal_buffer_check_deref_or_null(args->r1, &target_ref.buffer));
   uint32_t pattern = (uint32_t)args->i5;
   uint32_t pattern_length = (uint32_t)args->i6;
-
+  iree_hal_fill_flags_t flags = IREE_HAL_FILL_FLAG_NONE;
   return iree_hal_command_buffer_fill_buffer(command_buffer, target_ref,
-                                             &pattern, pattern_length);
+                                             &pattern, pattern_length, flags);
 }
 
 IREE_VM_ABI_EXPORT(iree_hal_module_command_buffer_update_buffer,  //
@@ -855,13 +855,13 @@
       target_buffer_slot, target_offset, length);
   IREE_RETURN_IF_ERROR(
       iree_hal_buffer_check_deref_or_null(args->r3, &target_ref.buffer));
-
   iree_const_byte_span_t source_span = iree_const_byte_span_empty();
   IREE_RETURN_IF_ERROR(iree_vm_buffer_map_ro(
       source_buffer, source_offset, (iree_host_size_t)length, 1, &source_span));
-
+  iree_hal_update_flags_t flags = IREE_HAL_UPDATE_FLAG_NONE;
   return iree_hal_command_buffer_update_buffer(command_buffer, source_span.data,
-                                               /*source_offset=*/0, target_ref);
+                                               /*source_offset=*/0, target_ref,
+                                               flags);
 }
 
 IREE_VM_ABI_EXPORT(iree_hal_module_command_buffer_copy_buffer,  //
@@ -883,9 +883,9 @@
       iree_hal_buffer_check_deref_or_null(args->r3, &source_ref.buffer));
   IREE_RETURN_IF_ERROR(
       iree_hal_buffer_check_deref_or_null(args->r5, &target_ref.buffer));
-
+  iree_hal_copy_flags_t flags = IREE_HAL_COPY_FLAG_NONE;
   return iree_hal_command_buffer_copy_buffer(command_buffer, source_ref,
-                                             target_ref);
+                                             target_ref, flags);
 }
 
 IREE_VM_ABI_EXPORT(iree_hal_module_command_buffer_collective,  //
@@ -911,7 +911,6 @@
   IREE_RETURN_IF_ERROR(
       iree_hal_buffer_check_deref_or_null(args->r7, &recv_ref.buffer));
   iree_device_size_t element_count = iree_hal_cast_device_size(args->i12);
-
   return iree_hal_command_buffer_collective(command_buffer, channel, op, param,
                                             send_ref, recv_ref, element_count);
 }
@@ -1215,7 +1214,7 @@
   IREE_RETURN_IF_ERROR(iree_hal_buffer_check_deref(args->r6, &target_buffer));
   iree_device_size_t target_offset = iree_hal_cast_device_size(args->i7);
   iree_device_size_t length = iree_hal_cast_device_size(args->i8);
-  uint32_t flags = (uint32_t)args->i9;
+  iree_hal_read_flags_t flags = (iree_hal_read_flags_t)args->i9;
   return iree_hal_device_queue_read(
       device, queue_affinity, iree_hal_fence_semaphore_list(wait_fence),
       iree_hal_fence_semaphore_list(signal_fence), source_file, source_offset,
@@ -1238,7 +1237,7 @@
   IREE_RETURN_IF_ERROR(iree_hal_file_check_deref(args->r6, &target_file));
   uint64_t target_offset = (uint64_t)args->i7;
   iree_device_size_t length = iree_hal_cast_device_size(args->i8);
-  uint32_t flags = (uint32_t)args->i9;
+  iree_hal_write_flags_t flags = (iree_hal_write_flags_t)args->i9;
   return iree_hal_device_queue_write(
       device, queue_affinity, iree_hal_fence_semaphore_list(wait_fence),
       iree_hal_fence_semaphore_list(signal_fence), source_buffer, source_offset,
@@ -1258,10 +1257,15 @@
   iree_hal_command_buffer_t** command_buffers = NULL;
   IREE_VM_ABI_VLA_STACK_DEREF(args, a4_count, a4, iree_hal_command_buffer, 32,
                               &command_buffer_count, &command_buffers);
+  if (command_buffer_count > 1) {
+    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                            "only zero or one command buffer is allowed");
+  }
   return iree_hal_device_queue_execute(
       device, queue_affinity, iree_hal_fence_semaphore_list(wait_fence),
-      iree_hal_fence_semaphore_list(signal_fence), command_buffer_count,
-      command_buffers, /*binding_tables=*/NULL);
+      iree_hal_fence_semaphore_list(signal_fence),
+      command_buffer_count > 0 ? command_buffers[0] : NULL,
+      iree_hal_buffer_binding_table_empty());
 }
 
 IREE_VM_ABI_EXPORT(iree_hal_module_device_queue_execute_indirect,  //
@@ -1313,8 +1317,8 @@
     };
     status = iree_hal_device_queue_execute(
         device, queue_affinity, iree_hal_fence_semaphore_list(wait_fence),
-        iree_hal_fence_semaphore_list(signal_fence), 1, &command_buffer,
-        &binding_table);
+        iree_hal_fence_semaphore_list(signal_fence), command_buffer,
+        binding_table);
   }
 
   // If we had to heap-allocate the binding table storage it must be freed
diff --git a/runtime/src/iree/tooling/function_util.c b/runtime/src/iree/tooling/function_util.c
index a21b6c5..0e2d38d 100644
--- a/runtime/src/iree/tooling/function_util.c
+++ b/runtime/src/iree/tooling/function_util.c
@@ -87,7 +87,8 @@
       iree_hal_make_buffer_ref(source_buffer, 0,
                                iree_hal_buffer_byte_length(source_buffer)),
       iree_hal_make_buffer_ref(target_buffer, 0,
-                               iree_hal_buffer_byte_length(source_buffer)));
+                               iree_hal_buffer_byte_length(source_buffer)),
+      IREE_HAL_COPY_FLAG_NONE);
 
   if (iree_status_is_ok(status)) {
     *out_target_buffer = target_buffer;
@@ -122,8 +123,8 @@
   if (iree_status_is_ok(status)) {
     status = iree_hal_device_queue_execute(
         device, queue_affinity, iree_hal_fence_semaphore_list(wait_fence),
-        iree_hal_fence_semaphore_list(signal_fence), 1, &command_buffer,
-        /*binding_tables=*/NULL);
+        iree_hal_fence_semaphore_list(signal_fence), command_buffer,
+        iree_hal_buffer_binding_table_empty());
   }
 
   if (iree_status_is_ok(status) && needs_wait) {
diff --git a/tools/iree-benchmark-executable-main.c b/tools/iree-benchmark-executable-main.c
index f5cfb4a..d769959 100644
--- a/tools/iree-benchmark-executable-main.c
+++ b/tools/iree-benchmark-executable-main.c
@@ -255,7 +255,8 @@
     ++fence_value;
     IREE_RETURN_IF_ERROR(iree_hal_device_queue_execute(
         args->device, IREE_HAL_QUEUE_AFFINITY_ANY, wait_semaphore_list,
-        signal_semaphore_list, 1, &command_buffer, /*binding_tables=*/NULL));
+        signal_semaphore_list, command_buffer,
+        iree_hal_buffer_binding_table_empty()));
 
     // Block and wait for the submission to complete.
     // Note that this will include round-trip overhead and if the dispatch or
