Revert #19074 and #19082 to fix broken AMDGPU tests. (#19086)

Failing tests from the first PR:
https://github.com/iree-org/iree/actions/runs/11748241815/job/32733175285#step:8:482
Second PR was landed on top, preventing a clean revert without also
reverting that.
diff --git a/runtime/src/iree/hal/drivers/cuda/cuda_device.c b/runtime/src/iree/hal/drivers/cuda/cuda_device.c
index d5ed448..2ed014b 100644
--- a/runtime/src/iree/hal/drivers/cuda/cuda_device.c
+++ b/runtime/src/iree/hal/drivers/cuda/cuda_device.c
@@ -1171,8 +1171,6 @@
             iree_hal_cuda_deferred_work_queue_device_interface_create_stream_command_buffer,
         .submit_command_buffer =
             iree_hal_cuda_deferred_work_queue_device_interface_submit_command_buffer,
-        .async_alloc = NULL,
-        .async_dealloc = NULL,
 };
 
 static const iree_hal_stream_tracing_device_interface_vtable_t
diff --git a/runtime/src/iree/hal/drivers/hip/dynamic_symbol_tables.h b/runtime/src/iree/hal/drivers/hip/dynamic_symbol_tables.h
index 963784d..1a3fc50 100644
--- a/runtime/src/iree/hal/drivers/hip/dynamic_symbol_tables.h
+++ b/runtime/src/iree/hal/drivers/hip/dynamic_symbol_tables.h
@@ -73,7 +73,6 @@
                                hipMemPool_t, hipStream_t)
 IREE_HAL_HIP_REQUIRED_PFN_DECL(hipMallocManaged, hipDeviceptr_t *, size_t,
                                unsigned int)
-IREE_HAL_HIP_REQUIRED_PFN_DECL(hipMallocAsync, void **, size_t, hipStream_t)
 IREE_HAL_HIP_REQUIRED_PFN_DECL(hipMemcpy, void *, const void *, size_t,
                                hipMemcpyKind)
 IREE_HAL_HIP_REQUIRED_PFN_DECL(hipMemcpyAsync, void *, const void *, size_t,
diff --git a/runtime/src/iree/hal/drivers/hip/hip_allocator.c b/runtime/src/iree/hal/drivers/hip/hip_allocator.c
index 041e011..1073ccc 100644
--- a/runtime/src/iree/hal/drivers/hip/hip_allocator.c
+++ b/runtime/src/iree/hal/drivers/hip/hip_allocator.c
@@ -111,10 +111,6 @@
   IREE_TRACE_ZONE_END(z0);
 }
 
-bool iree_hal_hip_allocator_isa(iree_hal_allocator_t* base_value) {
-  return iree_hal_resource_is(base_value, &iree_hal_hip_allocator_vtable);
-}
-
 static iree_allocator_t iree_hal_hip_allocator_host_allocator(
     const iree_hal_allocator_t* IREE_RESTRICT base_allocator) {
   iree_hal_hip_allocator_t* allocator =
@@ -594,53 +590,6 @@
   }
 }
 
-iree_status_t iree_hal_hip_allocator_alloc_async(
-    iree_hal_allocator_t* base_allocator, hipStream_t stream,
-    iree_hal_buffer_t* buffer) {
-  iree_hal_hip_allocator_t* allocator =
-      iree_hal_hip_allocator_cast(base_allocator);
-
-  hipDeviceptr_t ptr = NULL;
-  iree_status_t status = IREE_HIP_RESULT_TO_STATUS(
-      allocator->symbols,
-      hipMallocAsync(&ptr, (size_t)iree_hal_buffer_allocation_size(buffer),
-                     stream),
-      "hipMallocAsync");
-  if (iree_status_is_ok(status)) {
-    iree_hal_hip_buffer_set_device_pointer(buffer, ptr);
-    IREE_TRACE_ALLOC_NAMED(IREE_HAL_HIP_ALLOCATOR_ID, (void*)ptr,
-                           iree_hal_buffer_allocation_size(buffer));
-    IREE_STATISTICS(iree_hal_allocator_statistics_record_alloc(
-        &allocator->statistics, iree_hal_buffer_memory_type(buffer),
-        iree_hal_buffer_allocation_size(buffer)));
-  } else {
-    iree_hal_hip_buffer_set_allocation_empty(buffer);
-  }
-
-  return status;
-}
-
-iree_status_t iree_hal_hip_allocator_free_async(
-    iree_hal_allocator_t* base_allocator, hipStream_t stream,
-    iree_hal_buffer_t* buffer) {
-  iree_hal_hip_allocator_t* allocator =
-      iree_hal_hip_allocator_cast(base_allocator);
-  hipDeviceptr_t device_ptr = iree_hal_hip_buffer_device_pointer(buffer);
-  if (!device_ptr) {
-    return iree_ok_status();
-  }
-
-  IREE_RETURN_IF_ERROR(IREE_HIP_RESULT_TO_STATUS(
-      allocator->symbols, hipFreeAsync(device_ptr, stream), "hipFreeAsync"));
-  iree_hal_hip_buffer_set_allocation_empty(buffer);
-
-  IREE_TRACE_FREE_NAMED(IREE_HAL_HIP_ALLOCATOR_ID, (void*)device_ptr);
-  IREE_STATISTICS(iree_hal_allocator_statistics_record_free(
-      &allocator->statistics, iree_hal_buffer_memory_type(buffer),
-      iree_hal_buffer_allocation_size(buffer)));
-  return iree_ok_status();
-}
-
 static const iree_hal_allocator_vtable_t iree_hal_hip_allocator_vtable = {
     .destroy = iree_hal_hip_allocator_destroy,
     .host_allocator = iree_hal_hip_allocator_host_allocator,
diff --git a/runtime/src/iree/hal/drivers/hip/hip_allocator.h b/runtime/src/iree/hal/drivers/hip/hip_allocator.h
index bb83ea1..6d51060 100644
--- a/runtime/src/iree/hal/drivers/hip/hip_allocator.h
+++ b/runtime/src/iree/hal/drivers/hip/hip_allocator.h
@@ -25,16 +25,6 @@
     hipStream_t stream, iree_hal_hip_memory_pools_t* pools,
     iree_allocator_t host_allocator, iree_hal_allocator_t** out_allocator);
 
-bool iree_hal_hip_allocator_isa(iree_hal_allocator_t* base_value);
-
-iree_status_t iree_hal_hip_allocator_alloc_async(
-    iree_hal_allocator_t* base_allocator, hipStream_t stream,
-    iree_hal_buffer_t* buffer);
-
-iree_status_t iree_hal_hip_allocator_free_async(iree_hal_allocator_t* allocator,
-                                                hipStream_t stream,
-                                                iree_hal_buffer_t* buffer);
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus
diff --git a/runtime/src/iree/hal/drivers/hip/hip_buffer.c b/runtime/src/iree/hal/drivers/hip/hip_buffer.c
index 46e768a..fb318ba 100644
--- a/runtime/src/iree/hal/drivers/hip/hip_buffer.c
+++ b/runtime/src/iree/hal/drivers/hip/hip_buffer.c
@@ -11,7 +11,6 @@
 #include <string.h>
 
 #include "iree/base/api.h"
-#include "iree/base/internal/synchronization.h"
 #include "iree/base/tracing.h"
 
 typedef struct iree_hal_hip_buffer_t {
@@ -20,9 +19,6 @@
   void* host_ptr;
   hipDeviceptr_t device_ptr;
   iree_hal_buffer_release_callback_t release_callback;
-  iree_slim_mutex_t device_ptr_lock;
-  iree_notification_t device_ptr_notification;
-  bool empty;
 } iree_hal_hip_buffer_t;
 
 static const iree_hal_buffer_vtable_t iree_hal_hip_buffer_vtable;
@@ -69,9 +65,6 @@
     buffer->host_ptr = host_ptr;
     buffer->device_ptr = device_ptr;
     buffer->release_callback = release_callback;
-    buffer->empty = false;
-    iree_slim_mutex_initialize(&buffer->device_ptr_lock);
-    iree_notification_initialize(&buffer->device_ptr_notification);
     *out_buffer = &buffer->base;
   }
 
@@ -79,26 +72,6 @@
   return status;
 }
 
-void iree_hal_hip_buffer_set_device_pointer(iree_hal_buffer_t* base_buffer,
-                                            hipDeviceptr_t pointer) {
-  iree_hal_hip_buffer_t* buffer = iree_hal_hip_buffer_cast(base_buffer);
-  IREE_ASSERT(buffer->device_ptr == NULL,
-              "Cannot set a device_ptr to a buffer that already has one");
-  iree_slim_mutex_lock(&buffer->device_ptr_lock);
-  buffer->device_ptr = pointer;
-  iree_slim_mutex_unlock(&buffer->device_ptr_lock);
-  iree_notification_post(&buffer->device_ptr_notification, IREE_ALL_WAITERS);
-}
-
-void iree_hal_hip_buffer_set_allocation_empty(iree_hal_buffer_t* base_buffer) {
-  iree_hal_hip_buffer_t* buffer = iree_hal_hip_buffer_cast(base_buffer);
-  iree_slim_mutex_lock(&buffer->device_ptr_lock);
-  buffer->empty = true;
-  buffer->device_ptr = NULL;
-  iree_slim_mutex_unlock(&buffer->device_ptr_lock);
-  iree_notification_post(&buffer->device_ptr_notification, IREE_ALL_WAITERS);
-}
-
 static void iree_hal_hip_buffer_destroy(iree_hal_buffer_t* base_buffer) {
   iree_hal_hip_buffer_t* buffer = iree_hal_hip_buffer_cast(base_buffer);
   iree_allocator_t host_allocator = base_buffer->host_allocator;
@@ -107,8 +80,6 @@
     buffer->release_callback.fn(buffer->release_callback.user_data,
                                 base_buffer);
   }
-  iree_slim_mutex_deinitialize(&buffer->device_ptr_lock);
-  iree_notification_deinitialize(&buffer->device_ptr_notification);
   iree_allocator_free(host_allocator, buffer);
   IREE_TRACE_ZONE_END(z0);
 }
@@ -172,20 +143,10 @@
   return buffer->type;
 }
 
-static bool iree_hal_hip_buffer_has_device_ptr(void* arg) {
-  iree_hal_hip_buffer_t* buffer = (iree_hal_hip_buffer_t*)arg;
-  iree_slim_mutex_lock(&buffer->device_ptr_lock);
-  bool has_ptr_or_error = buffer->device_ptr || buffer->empty;
-  iree_slim_mutex_unlock(&buffer->device_ptr_lock);
-  return has_ptr_or_error;
-}
-
 hipDeviceptr_t iree_hal_hip_buffer_device_pointer(
-    iree_hal_buffer_t* base_buffer) {
-  iree_hal_hip_buffer_t* buffer = iree_hal_hip_buffer_cast(base_buffer);
-  iree_notification_await(&buffer->device_ptr_notification,
-                          iree_hal_hip_buffer_has_device_ptr, buffer,
-                          iree_infinite_timeout());
+    const iree_hal_buffer_t* base_buffer) {
+  const iree_hal_hip_buffer_t* buffer =
+      iree_hal_hip_buffer_const_cast(base_buffer);
   return buffer->device_ptr;
 }
 
diff --git a/runtime/src/iree/hal/drivers/hip/hip_buffer.h b/runtime/src/iree/hal/drivers/hip/hip_buffer.h
index 4264b54..8fe1609 100644
--- a/runtime/src/iree/hal/drivers/hip/hip_buffer.h
+++ b/runtime/src/iree/hal/drivers/hip/hip_buffer.h
@@ -49,16 +49,8 @@
 // Returns the HIP base pointer for the given |buffer|.
 // This is the entire allocated_buffer and must be offset by the buffer
 // byte_offset and byte_length when used.
-hipDeviceptr_t iree_hal_hip_buffer_device_pointer(iree_hal_buffer_t* buffer);
-
-// Sets the HIP base pointer for the given |buffer|.
-// This is the entire allocated_buffer and must be offset by the buffer
-// byte_offset and byte_length when used.
-void iree_hal_hip_buffer_set_device_pointer(iree_hal_buffer_t* buffer,
-                                            hipDeviceptr_t pointer);
-
-// Marks the buffer as having an intentionally empty allocation.
-void iree_hal_hip_buffer_set_allocation_empty(iree_hal_buffer_t* buffer);
+hipDeviceptr_t iree_hal_hip_buffer_device_pointer(
+    const iree_hal_buffer_t* buffer);
 
 // Returns the HIP host pointer for the given |buffer|, if available.
 void* iree_hal_hip_buffer_host_pointer(const iree_hal_buffer_t* buffer);
diff --git a/runtime/src/iree/hal/drivers/hip/hip_device.c b/runtime/src/iree/hal/drivers/hip/hip_device.c
index c409eaa..7f42e8d 100644
--- a/runtime/src/iree/hal/drivers/hip/hip_device.c
+++ b/runtime/src/iree/hal/drivers/hip/hip_device.c
@@ -19,7 +19,6 @@
 #include "iree/hal/drivers/hip/event_semaphore.h"
 #include "iree/hal/drivers/hip/graph_command_buffer.h"
 #include "iree/hal/drivers/hip/hip_allocator.h"
-#include "iree/hal/drivers/hip/hip_buffer.h"
 #include "iree/hal/drivers/hip/memory_pools.h"
 #include "iree/hal/drivers/hip/nop_executable_cache.h"
 #include "iree/hal/drivers/hip/rccl_channel.h"
@@ -88,9 +87,6 @@
   iree_hal_channel_provider_t* channel_provider;
 } iree_hal_hip_device_t;
 
-static iree_hal_hip_device_t* iree_hal_hip_device_cast(
-    iree_hal_device_t* base_value);
-
 static const iree_hal_device_vtable_t iree_hal_hip_device_vtable;
 static const iree_hal_deferred_work_queue_device_interface_vtable_t
     iree_hal_hip_deferred_work_queue_device_interface_vtable;
@@ -170,7 +166,6 @@
   return IREE_HIP_RESULT_TO_STATUS(device_interface->hip_symbols,
                                    hipEventSynchronize((hipEvent_t)event));
 }
-
 static iree_status_t
 iree_hal_hip_deferred_work_queue_device_interface_destroy_native_event(
     iree_hal_deferred_work_queue_device_interface_t* base_device_interface,
@@ -261,45 +256,6 @@
   return status;
 }
 
-static iree_status_t
-iree_hal_hip_deferred_work_queue_device_interface_async_alloc(
-    iree_hal_deferred_work_queue_device_interface_t* base_device_interface,
-    iree_hal_buffer_t* buffer) {
-  iree_hal_hip_deferred_work_queue_device_interface_t* device_interface =
-      (iree_hal_hip_deferred_work_queue_device_interface_t*)
-          base_device_interface;
-  iree_hal_hip_device_t* device =
-      iree_hal_hip_device_cast(device_interface->device);
-  if (device->supports_memory_pools) {
-    return iree_hal_hip_memory_pools_allocate_pointer(
-        &device->memory_pools, buffer, device->hip_dispatch_stream,
-        iree_hal_buffer_allocation_size(buffer));
-  }
-
-  return iree_hal_hip_allocator_alloc_async(
-      iree_hal_device_allocator(device_interface->device),
-      device->hip_dispatch_stream, buffer);
-}
-
-// Asynchronously frees a buffer.
-static iree_status_t
-iree_hal_hip_deferred_work_queue_device_interface_async_dealloc(
-    iree_hal_deferred_work_queue_device_interface_t* base_device_interface,
-    iree_hal_buffer_t* buffer) {
-  iree_hal_hip_deferred_work_queue_device_interface_t* device_interface =
-      (iree_hal_hip_deferred_work_queue_device_interface_t*)
-          base_device_interface;
-  iree_hal_hip_device_t* device =
-      iree_hal_hip_device_cast(device_interface->device);
-  if (device->supports_memory_pools) {
-    return iree_hal_hip_memory_pools_deallocate(
-        &device->memory_pools, device->hip_dispatch_stream, buffer);
-  }
-  return iree_hal_hip_allocator_free_async(
-      iree_hal_device_allocator(device_interface->device),
-      device->hip_dispatch_stream, buffer);
-}
-
 typedef struct iree_hal_hip_tracing_device_interface_t {
   iree_hal_stream_tracing_device_interface_t base;
   hipDevice_t device;
@@ -966,34 +922,6 @@
   return IREE_HAL_SEMAPHORE_COMPATIBILITY_HOST_ONLY;
 }
 
-static iree_status_t iree_hal_hip_device_pepare_async_alloc(
-    iree_hal_hip_device_t* device, iree_hal_buffer_params_t params,
-    iree_device_size_t allocation_size,
-    iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
-  IREE_TRACE_ZONE_BEGIN(z0);
-  IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)allocation_size);
-
-  iree_hal_buffer_params_canonicalize(&params);
-
-  iree_hal_buffer_t* buffer = NULL;
-  iree_status_t status = iree_hal_hip_buffer_wrap(
-      device->device_allocator, params.type, params.access, params.usage,
-      allocation_size, /*byte_offset=*/0,
-      /*byte_length=*/allocation_size, IREE_HAL_HIP_BUFFER_TYPE_ASYNC,
-      /*device_ptr=*/NULL, /*host_ptr=*/NULL,
-      iree_hal_buffer_release_callback_null(), device->host_allocator, &buffer);
-
-  if (iree_status_is_ok(status)) {
-    *out_buffer = buffer;
-  } else if (buffer) {
-    iree_hal_hip_buffer_set_allocation_empty(buffer);
-    iree_hal_buffer_release(buffer);
-  }
-
-  IREE_TRACE_ZONE_END(z0);
-  return status;
-}
-
 // TODO: implement multiple streams; today we only have one and queue_affinity
 //       is ignored.
 // TODO: implement proper semaphores in HIP to ensure ordering and avoid
@@ -1007,46 +935,6 @@
     iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
   iree_hal_hip_device_t* device = iree_hal_hip_device_cast(base_device);
 
-  if (device->supports_memory_pools &&
-      !iree_all_bits_set(params.type, IREE_HAL_MEMORY_TYPE_HOST_VISIBLE)) {
-    iree_hal_buffer_t* buffer = NULL;
-
-    IREE_RETURN_IF_ERROR(iree_hal_hip_memory_pools_prepare_buffer(
-        &device->memory_pools, device->hip_dispatch_stream, pool, params,
-        allocation_size, &buffer));
-
-    iree_status_t status = iree_hal_deferred_work_queue_enqueue_alloc(
-        device->work_queue, wait_semaphore_list, signal_semaphore_list, buffer);
-    if (iree_status_is_ok(status)) {
-      *out_buffer = buffer;
-    } else {
-      iree_hal_hip_buffer_set_allocation_empty(buffer);
-      iree_hal_resource_release(&buffer->resource);
-    }
-    return status;
-  } else if (!iree_all_bits_set(params.type,
-                                IREE_HAL_MEMORY_TYPE_HOST_VISIBLE) &&
-             iree_hal_hip_allocator_isa(
-                 iree_hal_device_allocator(base_device))) {
-    iree_hal_buffer_t* buffer = NULL;
-
-    IREE_RETURN_IF_ERROR(iree_hal_hip_device_pepare_async_alloc(
-        device, params, allocation_size, &buffer));
-
-    iree_status_t status = iree_hal_deferred_work_queue_enqueue_alloc(
-        device->work_queue, wait_semaphore_list, signal_semaphore_list, buffer);
-    if (iree_status_is_ok(status)) {
-      status = iree_hal_deferred_work_queue_issue(device->work_queue);
-    }
-    if (iree_status_is_ok(status)) {
-      *out_buffer = buffer;
-    } else {
-      iree_hal_hip_buffer_set_allocation_empty(buffer);
-      iree_hal_resource_release(&buffer->resource);
-    }
-    return status;
-  }
-
   // NOTE: block on the semaphores here; we could avoid this by properly
   // sequencing device work with semaphores. The HIP HAL is not currently
   // asynchronous.
@@ -1057,9 +945,17 @@
   // exhaustion but the error may be deferred until a later synchronization.
   // If pools are not supported we allocate a buffer as normal from whatever
   // allocator is set on the device.
-  iree_status_t status =
-      iree_hal_allocator_allocate_buffer(iree_hal_device_allocator(base_device),
-                                         params, allocation_size, out_buffer);
+  iree_status_t status = iree_ok_status();
+  if (device->supports_memory_pools &&
+      !iree_all_bits_set(params.type, IREE_HAL_MEMORY_TYPE_HOST_VISIBLE)) {
+    status = iree_hal_hip_memory_pools_allocate(
+        &device->memory_pools, device->hip_dispatch_stream, pool, params,
+        allocation_size, out_buffer);
+  } else {
+    status = iree_hal_allocator_allocate_buffer(
+        iree_hal_device_allocator(base_device), params, allocation_size,
+        out_buffer);
+  }
 
   // Only signal if not returning a synchronous error - synchronous failure
   // indicates that the stream is unchanged (it's not really since we waited
@@ -1080,10 +976,6 @@
     const iree_hal_semaphore_list_t signal_semaphore_list,
     iree_hal_buffer_t* buffer) {
   iree_hal_hip_device_t* device = iree_hal_hip_device_cast(base_device);
-  if (iree_hal_hip_allocator_isa(iree_hal_device_allocator(base_device))) {
-    return iree_hal_deferred_work_queue_enqueue_dealloc(
-        device->work_queue, wait_semaphore_list, signal_semaphore_list, buffer);
-  }
 
   // NOTE: block on the semaphores here; we could avoid this by properly
   // sequencing device work with semaphores. The HIP HAL is not currently
@@ -1276,10 +1168,6 @@
             iree_hal_hip_deferred_work_queue_device_interface_create_stream_command_buffer,
         .submit_command_buffer =
             iree_hal_hip_deferred_work_queue_device_interface_submit_command_buffer,
-        .async_alloc =
-            iree_hal_hip_deferred_work_queue_device_interface_async_alloc,
-        .async_dealloc =
-            iree_hal_hip_deferred_work_queue_device_interface_async_dealloc,
 };
 
 static const iree_hal_stream_tracing_device_interface_vtable_t
diff --git a/runtime/src/iree/hal/drivers/hip/memory_pools.c b/runtime/src/iree/hal/drivers/hip/memory_pools.c
index 0258fa0..89e27fa 100644
--- a/runtime/src/iree/hal/drivers/hip/memory_pools.c
+++ b/runtime/src/iree/hal/drivers/hip/memory_pools.c
@@ -202,41 +202,13 @@
   IREE_TRACE_ZONE_BEGIN(z0);
 
   hipDeviceptr_t device_ptr = iree_hal_hip_buffer_device_pointer(buffer);
-  if (device_ptr) {
-    IREE_HIP_IGNORE_ERROR(pools->hip_symbols, hipFree(device_ptr));
-  }
+  IREE_HIP_IGNORE_ERROR(pools->hip_symbols, hipFree(device_ptr));
   iree_hal_hip_memory_pool_track_free(pools, buffer);
 
   IREE_TRACE_ZONE_END(z0);
 }
 
-iree_status_t iree_hal_hip_memory_pools_allocate_pointer(
-    iree_hal_hip_memory_pools_t* pools, iree_hal_buffer_t* buffer,
-    hipStream_t stream, iree_device_size_t allocation_size) {
-  // TODO: more pools and better selection; this is coarsely deciding between
-  // only device local (variables, constants, transients) and other (staging,
-  // external) but could use more buffer properties (including usage/export
-  // flags) to better isolate the different usage patterns and keep the pools
-  // operating with reasonable limits. We should be using the |pool| arg.
-  hipMemPool_t memory_pool =
-      iree_all_bits_set(iree_hal_buffer_memory_type(buffer),
-                        IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL)
-          ? pools->device_local
-          : pools->other;
-
-  hipDeviceptr_t device_ptr = NULL;
-  IREE_RETURN_IF_ERROR(IREE_HIP_RESULT_TO_STATUS(
-      pools->hip_symbols,
-      hipMallocFromPoolAsync(&device_ptr, (size_t)allocation_size, memory_pool,
-                             stream),
-      "hipMallocFromPoolAsync"));
-
-  iree_hal_hip_buffer_set_device_pointer(buffer, device_ptr);
-  iree_hal_hip_memory_pool_track_alloc(pools, buffer);
-  return iree_ok_status();
-}
-
-iree_status_t iree_hal_hip_memory_pools_prepare_buffer(
+iree_status_t iree_hal_hip_memory_pools_allocate(
     iree_hal_hip_memory_pools_t* pools, hipStream_t stream,
     iree_hal_allocator_pool_t pool, iree_hal_buffer_params_t params,
     iree_device_size_t allocation_size,
@@ -246,27 +218,49 @@
 
   iree_hal_buffer_params_canonicalize(&params);
 
+  // TODO: more pools and better selection; this is coarsely deciding between
+  // only device local (variables, constants, transients) and other (staging,
+  // external) but could use more buffer properties (including usage/export
+  // flags) to better isolate the different usage patterns and keep the pools
+  // operating with reasonable limits. We should be using the |pool| arg.
+  hipMemPool_t memory_pool =
+      iree_all_bits_set(params.type, IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL)
+          ? pools->device_local
+          : pools->other;
+
+  hipDeviceptr_t device_ptr = NULL;
+  iree_status_t status = IREE_HIP_RESULT_TO_STATUS(
+      pools->hip_symbols,
+      hipMallocFromPoolAsync(&device_ptr, (size_t)allocation_size, memory_pool,
+                             stream),
+      "hipMallocFromPoolAsync");
+
+  // Wrap the allocated HIP buffer in a HAL buffer.
   // NOTE: we don't provide a device allocator because we didn't allocate from
   // one and instead we use a release callback to perform the free if the user
   // doesn't dealloca the buffer.
   iree_hal_buffer_t* buffer = NULL;
-  iree_hal_buffer_release_callback_t release_callback = {
-      .fn = iree_hal_hip_async_buffer_release_callback,
-      .user_data = pools,
-  };
-  iree_status_t status = iree_hal_hip_buffer_wrap(
-      /*device_allocator=*/NULL, params.type, params.access, params.usage,
-      allocation_size, /*byte_offset=*/0,
-      /*byte_length=*/allocation_size, IREE_HAL_HIP_BUFFER_TYPE_ASYNC,
-      /*device_ptr*/ NULL, /*host_ptr=*/NULL, release_callback,
-      pools->host_allocator, &buffer);
+  if (iree_status_is_ok(status)) {
+    iree_hal_buffer_release_callback_t release_callback = {
+        .fn = iree_hal_hip_async_buffer_release_callback,
+        .user_data = pools,
+    };
+    status = iree_hal_hip_buffer_wrap(
+        /*device_allocator=*/NULL, params.type, params.access, params.usage,
+        allocation_size, /*byte_offset=*/0,
+        /*byte_length=*/allocation_size, IREE_HAL_HIP_BUFFER_TYPE_ASYNC,
+        device_ptr, /*host_ptr=*/NULL, release_callback, pools->host_allocator,
+        &buffer);
+  }
 
   if (iree_status_is_ok(status)) {
     // Update statistics (note that it may not yet be accurate).
+    iree_hal_hip_memory_pool_track_alloc(pools, buffer);
     *out_buffer = buffer;
   } else if (buffer) {
-    iree_hal_hip_buffer_set_allocation_empty(buffer);
     iree_hal_buffer_release(buffer);
+  } else {
+    IREE_HIP_IGNORE_ERROR(pools->hip_symbols, hipFreeAsync(device_ptr, stream));
   }
 
   IREE_TRACE_ZONE_END(z0);
@@ -288,10 +282,8 @@
   if (iree_hal_hip_buffer_type(buffer) == IREE_HAL_HIP_BUFFER_TYPE_ASYNC) {
     // Try to schedule the buffer for freeing.
     hipDeviceptr_t device_ptr = iree_hal_hip_buffer_device_pointer(buffer);
-    if (device_ptr) {
-      status = IREE_HIP_RESULT_TO_STATUS(
-          pools->hip_symbols, hipFreeAsync(device_ptr, stream), "hipFreeAsync");
-    }
+    status = IREE_HIP_RESULT_TO_STATUS(
+        pools->hip_symbols, hipFreeAsync(device_ptr, stream), "hipFreeAsync");
     if (iree_status_is_ok(status)) {
       // Drop the release callback so that we don't try to double-free the
       // buffer. Note that we only do this if the HIP free succeeded as
diff --git a/runtime/src/iree/hal/drivers/hip/memory_pools.h b/runtime/src/iree/hal/drivers/hip/memory_pools.h
index c505bbe..e39370a 100644
--- a/runtime/src/iree/hal/drivers/hip/memory_pools.h
+++ b/runtime/src/iree/hal/drivers/hip/memory_pools.h
@@ -63,15 +63,9 @@
     iree_hal_hip_memory_pools_t* pools,
     const iree_hal_hip_memory_pooling_params_t* pooling_params);
 
-iree_status_t iree_hal_hip_memory_pools_allocate_pointer(
-    iree_hal_hip_memory_pools_t* pools, iree_hal_buffer_t* buffer,
-    hipStream_t stream, iree_device_size_t allocation_size);
-
-// Prepares a buffer to be allocated from the given pool.
-// It does no actual allocations, they must happen on another thread.
-// Any calls to get the device_ptr from the buffer will
-// result in a wait until the allocation is available.
-iree_status_t iree_hal_hip_memory_pools_prepare_buffer(
+// Asynchronously allocates a buffer from an appropriate pool.
+// The allocation will be stream-ordered on |stream|.
+iree_status_t iree_hal_hip_memory_pools_allocate(
     iree_hal_hip_memory_pools_t* pools, hipStream_t stream,
     iree_hal_allocator_pool_t pool, iree_hal_buffer_params_t params,
     iree_device_size_t allocation_size,
diff --git a/runtime/src/iree/hal/utils/deferred_work_queue.c b/runtime/src/iree/hal/utils/deferred_work_queue.c
index c783bbe..e41fe35 100644
--- a/runtime/src/iree/hal/utils/deferred_work_queue.c
+++ b/runtime/src/iree/hal/utils/deferred_work_queue.c
@@ -19,30 +19,14 @@
 
 // The maximal number of events a command buffer can wait on.
 #define IREE_HAL_MAX_WAIT_EVENT_COUNT 32
-#define IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS 0
 
-#if IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
-IREE_TRACE(static const char* IREE_HAL_DEFERRED_WORKER_QUEUE_PENDING_PLOT_NAME =
-               "iree_hal_work_queue_pending");
-IREE_TRACE(static const char* IREE_HAL_DEFERRED_WORKER_QUEUE_READY_PLOT_NAME =
-               "iree_hal_work_queue_ready");
-IREE_TRACE(
-    static const char* IREE_HAL_DEFERRED_WORKER_QUEUE_PENDING_ALLOC_PLOT_NAME =
-        "iree_hal_work_queue_pending_alloc");
-IREE_TRACE(static const char*
-               IREE_HAL_DEFERRED_WORKER_QUEUE_PENDING_DEALLOC_PLOT_NAME =
-                   "iree_hal_work_queue_pending_dealloc");
-#endif  // IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
 //===----------------------------------------------------------------------===//
 // Queue action
 //===----------------------------------------------------------------------===//
 
 typedef enum iree_hal_deferred_work_queue_action_kind_e {
   IREE_HAL_QUEUE_ACTION_TYPE_EXECUTION,
-  IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_ALLOC,
-  IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_DEALLOC,
-  IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_MAX =
-      IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_DEALLOC,
+  // TODO: Add support for queue alloca and dealloca.
 } iree_hal_deferred_work_queue_action_kind_t;
 
 typedef enum iree_hal_deferred_work_queue_action_state_e {
@@ -83,12 +67,6 @@
       iree_hal_command_buffer_t** command_buffers;
       iree_hal_buffer_binding_table_t* binding_tables;
     } execution;
-    struct {
-      iree_hal_buffer_t* buffer;
-    } alloc;
-    struct {
-      iree_hal_buffer_t* buffer;
-    } dealloc;
   } payload;
 
   // Resource set to retain all associated resources by the payload.
@@ -523,21 +501,6 @@
   IREE_ASSERT_ARGUMENT(out_actions);
   IREE_TRACE_ZONE_BEGIN(z0);
 
-#if IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
-  IREE_TRACE_SET_PLOT_TYPE(IREE_HAL_DEFERRED_WORKER_QUEUE_PENDING_PLOT_NAME,
-                           IREE_TRACING_PLOT_TYPE_NUMBER, /*step=*/true,
-                           /*fill=*/true, /*color=*/0);
-  IREE_TRACE_SET_PLOT_TYPE(IREE_HAL_DEFERRED_WORKER_QUEUE_READY_PLOT_NAME,
-                           IREE_TRACING_PLOT_TYPE_NUMBER, /*step=*/true,
-                           /*fill=*/true, /*color=*/0);
-  IREE_TRACE_SET_PLOT_TYPE(
-      IREE_HAL_DEFERRED_WORKER_QUEUE_PENDING_ALLOC_PLOT_NAME,
-      IREE_TRACING_PLOT_TYPE_NUMBER, /*step=*/true, /*fill=*/true, /*color=*/0);
-  IREE_TRACE_SET_PLOT_TYPE(
-      IREE_HAL_DEFERRED_WORKER_QUEUE_PENDING_DEALLOC_PLOT_NAME,
-      IREE_TRACING_PLOT_TYPE_NUMBER, /*step=*/true, /*fill=*/true, /*color=*/0);
-#endif  //  IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
-
   iree_hal_deferred_work_queue_t* actions = NULL;
   IREE_RETURN_AND_END_ZONE_IF_ERROR(
       z0, iree_allocator_malloc(host_allocator, sizeof(*actions),
@@ -849,140 +812,6 @@
   return status;
 }
 
-static iree_status_t iree_hal_deferred_work_queue_enqueue_buffer_operation(
-    iree_hal_deferred_work_queue_t* actions,
-    iree_hal_deferred_work_queue_action_kind_t kind,
-    const iree_hal_semaphore_list_t wait_semaphore_list,
-    const iree_hal_semaphore_list_t signal_semaphore_list,
-    iree_hal_buffer_t* buffer) {
-  IREE_ASSERT_ARGUMENT(actions);
-  IREE_ASSERT_ARGUMENT(buffer);
-  IREE_TRACE_ZONE_BEGIN(z0);
-
-  // Embed captured tables in the action allocation.
-  iree_hal_deferred_work_queue_action_t* action = NULL;
-  const iree_host_size_t wait_semaphore_list_size =
-      wait_semaphore_list.count * sizeof(*wait_semaphore_list.semaphores) +
-      wait_semaphore_list.count * sizeof(*wait_semaphore_list.payload_values);
-  const iree_host_size_t signal_semaphore_list_size =
-      signal_semaphore_list.count * sizeof(*signal_semaphore_list.semaphores) +
-      signal_semaphore_list.count *
-          sizeof(*signal_semaphore_list.payload_values);
-
-  const iree_host_size_t total_action_size =
-      sizeof(*action) + wait_semaphore_list_size + signal_semaphore_list_size;
-
-  IREE_RETURN_AND_END_ZONE_IF_ERROR(
-      z0, iree_allocator_malloc(actions->host_allocator, total_action_size,
-                                (void**)&action));
-  uint8_t* action_ptr = (uint8_t*)action + sizeof(*action);
-
-  action->owning_actions = actions;
-  action->device_interface = actions->device_interface;
-  action->state = IREE_HAL_QUEUE_ACTION_STATE_ALIVE;
-  action->cleanup_callback = NULL;
-  action->callback_user_data = NULL;
-  action->kind = kind;
-
-  // Initialize scratch fields.
-  action->event_count = 0;
-  action->is_pending = true;
-
-  // Copy wait list for later access.
-  action->wait_semaphore_list.count = wait_semaphore_list.count;
-  action->wait_semaphore_list.semaphores = (iree_hal_semaphore_t**)action_ptr;
-  memcpy(action->wait_semaphore_list.semaphores, wait_semaphore_list.semaphores,
-         wait_semaphore_list.count * sizeof(*wait_semaphore_list.semaphores));
-  action->wait_semaphore_list.payload_values =
-      (uint64_t*)(action_ptr + wait_semaphore_list.count *
-                                   sizeof(*wait_semaphore_list.semaphores));
-  memcpy(
-      action->wait_semaphore_list.payload_values,
-      wait_semaphore_list.payload_values,
-      wait_semaphore_list.count * sizeof(*wait_semaphore_list.payload_values));
-  action_ptr += wait_semaphore_list_size;
-
-  // Copy signal list for later access.
-  action->signal_semaphore_list.count = signal_semaphore_list.count;
-  action->signal_semaphore_list.semaphores = (iree_hal_semaphore_t**)action_ptr;
-  memcpy(
-      action->signal_semaphore_list.semaphores,
-      signal_semaphore_list.semaphores,
-      signal_semaphore_list.count * sizeof(*signal_semaphore_list.semaphores));
-  action->signal_semaphore_list.payload_values =
-      (uint64_t*)(action_ptr + signal_semaphore_list.count *
-                                   sizeof(*signal_semaphore_list.semaphores));
-  memcpy(action->signal_semaphore_list.payload_values,
-         signal_semaphore_list.payload_values,
-         signal_semaphore_list.count *
-             sizeof(*signal_semaphore_list.payload_values));
-  action_ptr += signal_semaphore_list_size;
-
-  // Copy the execution resources for later access.
-  action->payload.alloc.buffer = buffer;
-
-  // Retain all command buffers and semaphores.
-  iree_status_t status = iree_hal_resource_set_allocate(actions->block_pool,
-                                                        &action->resource_set);
-  if (iree_status_is_ok(status)) {
-    status = iree_hal_resource_set_insert(action->resource_set,
-                                          wait_semaphore_list.count,
-                                          wait_semaphore_list.semaphores);
-  }
-  if (iree_status_is_ok(status)) {
-    status = iree_hal_resource_set_insert(action->resource_set,
-                                          signal_semaphore_list.count,
-                                          signal_semaphore_list.semaphores);
-  }
-  if (iree_status_is_ok(status)) {
-    status = iree_hal_resource_set_insert(action->resource_set, 1, &buffer);
-  }
-
-  if (iree_status_is_ok(status)) {
-    // Now everything is okay and we can enqueue the action.
-    iree_slim_mutex_lock(&actions->action_mutex);
-    if (actions->exit_requested) {
-      status = iree_make_status(
-          IREE_STATUS_ABORTED,
-          "can not issue more executions, exit already requested");
-      iree_hal_deferred_work_queue_action_fail_locked(action, status);
-    } else {
-      iree_hal_deferred_work_queue_action_list_push_back(&actions->action_list,
-                                                         action);
-      // One work item is the callback that makes it across from the
-      // completion thread.
-      actions->pending_work_items_count += 1;
-    }
-    iree_slim_mutex_unlock(&actions->action_mutex);
-  } else {
-    iree_hal_resource_set_free(action->resource_set);
-    iree_allocator_free(actions->host_allocator, action);
-  }
-
-  IREE_TRACE_ZONE_END(z0);
-  return status;
-}
-
-iree_status_t iree_hal_deferred_work_queue_enqueue_alloc(
-    iree_hal_deferred_work_queue_t* actions,
-    const iree_hal_semaphore_list_t wait_semaphore_list,
-    const iree_hal_semaphore_list_t signal_semaphore_list,
-    iree_hal_buffer_t* buffer) {
-  return iree_hal_deferred_work_queue_enqueue_buffer_operation(
-      actions, IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_ALLOC, wait_semaphore_list,
-      signal_semaphore_list, buffer);
-}
-
-iree_status_t iree_hal_deferred_work_queue_enqueue_dealloc(
-    iree_hal_deferred_work_queue_t* actions,
-    const iree_hal_semaphore_list_t wait_semaphore_list,
-    const iree_hal_semaphore_list_t signal_semaphore_list,
-    iree_hal_buffer_t* buffer) {
-  return iree_hal_deferred_work_queue_enqueue_buffer_operation(
-      actions, IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_DEALLOC, wait_semaphore_list,
-      signal_semaphore_list, buffer);
-}
-
 // Does not consume |status|.
 static void iree_hal_deferred_work_queue_fail_status_locked(
     iree_hal_deferred_work_queue_t* actions, iree_status_t status) {
@@ -1084,7 +913,7 @@
   IREE_TRACE_ZONE_BEGIN(z0);
   iree_hal_deferred_work_queue_action_t* action =
       (iree_hal_deferred_work_queue_action_t*)user_data;
-  IREE_ASSERT_LE(action->kind, IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_MAX);
+  IREE_ASSERT_EQ(action->kind, IREE_HAL_QUEUE_ACTION_TYPE_EXECUTION);
   IREE_ASSERT_EQ(action->state, IREE_HAL_QUEUE_ACTION_STATE_ALIVE);
   if (IREE_UNLIKELY(!iree_status_is_ok(status))) {
     iree_hal_deferred_work_queue_action_fail(action, status);
@@ -1112,7 +941,7 @@
 // Issues the given kernel dispatch |action| to the GPU.
 static iree_status_t iree_hal_deferred_work_queue_issue_execution(
     iree_hal_deferred_work_queue_action_t* action) {
-  IREE_ASSERT_LE(action->kind, IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_MAX);
+  IREE_ASSERT_EQ(action->kind, IREE_HAL_QUEUE_ACTION_TYPE_EXECUTION);
   IREE_ASSERT_EQ(action->is_pending, false);
   iree_hal_deferred_work_queue_t* actions = action->owning_actions;
   iree_hal_deferred_work_queue_device_interface_t* device_interface =
@@ -1129,75 +958,56 @@
                 device_interface, action->wait_events[i]));
   }
 
-  switch (action->kind) {
-    case IREE_HAL_QUEUE_ACTION_TYPE_EXECUTION: {
-      // Then launch all command buffers to the dispatch stream.
-      IREE_TRACE_ZONE_BEGIN(z_dispatch_command_buffers);
-      IREE_TRACE_ZONE_APPEND_TEXT(z_dispatch_command_buffers,
-                                  "dispatch_command_buffers");
+  // Then launch all command buffers to the dispatch stream.
+  IREE_TRACE_ZONE_BEGIN(z_dispatch_command_buffers);
+  IREE_TRACE_ZONE_APPEND_TEXT(z_dispatch_command_buffers,
+                              "dispatch_command_buffers");
 
-      for (iree_host_size_t i = 0; i < action->payload.execution.count; ++i) {
-        iree_hal_command_buffer_t* command_buffer =
-            action->payload.execution.command_buffers[i];
-        iree_hal_buffer_binding_table_t binding_table =
-            action->payload.execution.binding_tables
-                ? action->payload.execution.binding_tables[i]
-                : iree_hal_buffer_binding_table_empty();
-        if (iree_hal_deferred_command_buffer_isa(command_buffer)) {
-          iree_hal_command_buffer_t* stream_command_buffer = NULL;
-          iree_hal_command_buffer_mode_t mode =
-              iree_hal_command_buffer_mode(command_buffer) |
-              IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT |
-              // NOTE: we need to validate if a binding table is provided as the
-              // bindings were not known when it was originally recorded.
-              (iree_hal_buffer_binding_table_is_empty(binding_table)
-                   ? IREE_HAL_COMMAND_BUFFER_MODE_UNVALIDATED
-                   : 0);
-          IREE_RETURN_AND_END_ZONE_IF_ERROR(
-              z0, device_interface->vtable->create_stream_command_buffer(
-                      device_interface, mode, IREE_HAL_COMMAND_CATEGORY_ANY,
-                      &stream_command_buffer))
-          IREE_RETURN_AND_END_ZONE_IF_ERROR(
-              z0, iree_hal_resource_set_insert(action->resource_set, 1,
-                                               &stream_command_buffer));
-
-          IREE_RETURN_AND_END_ZONE_IF_ERROR(
-              z0, iree_hal_deferred_command_buffer_apply(
-                      command_buffer, stream_command_buffer, binding_table));
-          command_buffer = stream_command_buffer;
-        } else {
-          iree_hal_resource_retain(command_buffer);
-        }
-
-        IREE_RETURN_AND_END_ZONE_IF_ERROR(
-            z0, device_interface->vtable->submit_command_buffer(
-                    device_interface, command_buffer));
-
-        // The stream_command_buffer is going to be retained by
-        // the action->resource_set and deleted after the action
-        // completes.
-        iree_hal_resource_release(command_buffer);
-      }
-
-      IREE_TRACE_ZONE_END(z_dispatch_command_buffers);
-      break;
-    }
-    case IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_ALLOC: {
-      IREE_TRACE_ZONE_APPEND_TEXT(z0, "queue_alloc");
+  for (iree_host_size_t i = 0; i < action->payload.execution.count; ++i) {
+    iree_hal_command_buffer_t* command_buffer =
+        action->payload.execution.command_buffers[i];
+    iree_hal_buffer_binding_table_t binding_table =
+        action->payload.execution.binding_tables
+            ? action->payload.execution.binding_tables[i]
+            : iree_hal_buffer_binding_table_empty();
+    if (iree_hal_deferred_command_buffer_isa(command_buffer)) {
+      iree_hal_command_buffer_t* stream_command_buffer = NULL;
+      iree_hal_command_buffer_mode_t mode =
+          iree_hal_command_buffer_mode(command_buffer) |
+          IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT |
+          // NOTE: we need to validate if a binding table is provided as the
+          // bindings were not known when it was originally recorded.
+          (iree_hal_buffer_binding_table_is_empty(binding_table)
+               ? IREE_HAL_COMMAND_BUFFER_MODE_UNVALIDATED
+               : 0);
       IREE_RETURN_AND_END_ZONE_IF_ERROR(
-          z0, device_interface->vtable->async_alloc(
-                  device_interface, action->payload.alloc.buffer));
-      break;
-    }
-    case IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_DEALLOC: {
-      IREE_TRACE_ZONE_APPEND_TEXT(z0, "queue_dealloc");
+          z0, device_interface->vtable->create_stream_command_buffer(
+                  device_interface, mode, IREE_HAL_COMMAND_CATEGORY_ANY,
+                  &stream_command_buffer))
       IREE_RETURN_AND_END_ZONE_IF_ERROR(
-          z0, device_interface->vtable->async_dealloc(
-                  device_interface, action->payload.dealloc.buffer));
-      break;
+          z0, iree_hal_resource_set_insert(action->resource_set, 1,
+                                           &stream_command_buffer));
+
+      IREE_RETURN_AND_END_ZONE_IF_ERROR(
+          z0, iree_hal_deferred_command_buffer_apply(
+                  command_buffer, stream_command_buffer, binding_table));
+      command_buffer = stream_command_buffer;
+    } else {
+      iree_hal_resource_retain(command_buffer);
     }
+
+    IREE_RETURN_AND_END_ZONE_IF_ERROR(
+        z0, device_interface->vtable->submit_command_buffer(device_interface,
+                                                            command_buffer));
+
+    // The stream_command_buffer is going to be retained by
+    // the action->resource_set and deleted after the action
+    // completes.
+    iree_hal_resource_release(command_buffer);
   }
 
+  IREE_TRACE_ZONE_END(z_dispatch_command_buffers);
+
   iree_hal_deferred_work_queue_native_event_t completion_event = NULL;
   // Last record event signals in the dispatch stream.
   for (iree_host_size_t i = 0; i < action->signal_semaphore_list.count; ++i) {
@@ -1284,12 +1094,6 @@
     return iree_ok_status();
   }
 
-  IREE_TRACE_ZONE_BEGIN(z1);
-#if IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
-  IREE_TRACE(uint32_t num_pending = 0; uint32_t num_pending_alloc = 0;
-             uint32_t num_pending_dealloc = 0; uint32_t num_ready = 0;);
-#endif  //  IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
-
   iree_status_t status = iree_ok_status();
   // Scan through the list and categorize actions into pending and ready lists.
   while (!iree_hal_deferred_work_queue_action_list_is_empty(
@@ -1381,44 +1185,12 @@
     }
 
     if (action->is_pending) {
-#if IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
-      IREE_TRACE({
-        ++num_pending;
-        switch (action->kind) {
-          case IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_ALLOC:
-            ++num_pending_alloc;
-            break;
-          case IREE_HAL_QUEUE_ACTION_TYPE_QUEUE_DEALLOC:
-            ++num_pending_dealloc;
-            break;
-          default:
-            break;
-        }
-      });
-#endif  //  IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
       iree_hal_deferred_work_queue_action_list_push_back(&pending_list, action);
     } else {
-#if IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
-      IREE_TRACE(++num_ready;);
-#endif  //  IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
       iree_hal_deferred_work_queue_action_list_push_back(&ready_list, action);
     }
   }
 
-  IREE_TRACE_ZONE_END(z1);
-#if IREE_HAL_DEFERRED_WORKER_QUEUE_VERBOSE_PLOTS
-  IREE_TRACE_PLOT_VALUE_I64(IREE_HAL_DEFERRED_WORKER_QUEUE_PENDING_PLOT_NAME,
-                            num_pending);
-  IREE_TRACE_PLOT_VALUE_I64(IREE_HAL_DEFERRED_WORKER_QUEUE_READY_PLOT_NAME,
-                            num_ready);
-  IREE_TRACE_PLOT_VALUE_I64(
-      IREE_HAL_DEFERRED_WORKER_QUEUE_PENDING_ALLOC_PLOT_NAME,
-      num_pending_alloc);
-  IREE_TRACE_PLOT_VALUE_I64(
-      IREE_HAL_DEFERRED_WORKER_QUEUE_PENDING_DEALLOC_PLOT_NAME,
-      num_pending_dealloc);
-#endif
-
   // Preserve pending timepoints.
   actions->action_list = pending_list;
 
@@ -1553,10 +1325,8 @@
       IREE_TRACE_ZONE_END(z1);
     }
 
-    if (entry->callback) {
-      status =
-          iree_status_join(status, entry->callback(status, entry->user_data));
-    }
+    status =
+        iree_status_join(status, entry->callback(status, entry->user_data));
 
     if (IREE_UNLIKELY(entry->created_event)) {
       status = iree_status_join(
diff --git a/runtime/src/iree/hal/utils/deferred_work_queue.h b/runtime/src/iree/hal/utils/deferred_work_queue.h
index c076a8d..3a25886 100644
--- a/runtime/src/iree/hal/utils/deferred_work_queue.h
+++ b/runtime/src/iree/hal/utils/deferred_work_queue.h
@@ -109,24 +109,6 @@
   iree_status_t(IREE_API_PTR* submit_command_buffer)(
       iree_hal_deferred_work_queue_device_interface_t* device_interface,
       iree_hal_command_buffer_t* command_buffer);
-
-  // Asynchronously allocates a pointer and assigns it to the given buffer.
-  //
-  // This is optional, and is only required to be valid if
-  // iree_hal_deferred_work_queue_enqueue_alloc is ever called on the work
-  // queue.
-  iree_status_t(IREE_API_PTR* async_alloc)(
-      iree_hal_deferred_work_queue_device_interface_t* device_interface,
-      iree_hal_buffer_t* buffer);
-
-  // Asynchronously frees a buffer.
-  //
-  // This is optional, and is only required to be valid if
-  // iree_hal_deferred_work_queue_enqueue_dealloc is ever called on the work
-  // queue.
-  iree_status_t(IREE_API_PTR* async_dealloc)(
-      iree_hal_deferred_work_queue_device_interface_t* device_interface,
-      iree_hal_buffer_t* buffer);
 } iree_hal_deferred_work_queue_device_interface_vtable_t;
 
 iree_status_t iree_hal_deferred_work_queue_create(
@@ -152,22 +134,6 @@
     iree_hal_command_buffer_t* const* command_buffers,
     iree_hal_buffer_binding_table_t const* binding_tables);
 
-// Enqueues allocations into the work queue to be executed
-// once all semaphores have been satisfied.
-iree_status_t iree_hal_deferred_work_queue_enqueue_alloc(
-    iree_hal_deferred_work_queue_t* deferred_work_queue,
-    const iree_hal_semaphore_list_t wait_semaphore_list,
-    const iree_hal_semaphore_list_t signal_semaphore_list,
-    iree_hal_buffer_t* buffer);
-
-// Enqueues deallocations into the work queue to be executed
-// once all semaphores have been satisfied.
-iree_status_t iree_hal_deferred_work_queue_enqueue_dealloc(
-    iree_hal_deferred_work_queue_t* deferred_work_queue,
-    const iree_hal_semaphore_list_t wait_semaphore_list,
-    const iree_hal_semaphore_list_t signal_semaphore_list,
-    iree_hal_buffer_t* buffer);
-
 // Attempts to advance the work queue by processing using
 // the current thread, rather than the worker thread.
 iree_status_t iree_hal_deferred_work_queue_issue(