Adding iree_hal_buffer_placement_t info to allocated HAL buffers. (#19160)

This allows users to query for the device, queue affinity, and origin
flags of an allocated buffer so that they can decide whether to copy
buffers when moving across devices/queues, perform synchronous or
asynchronous deallocations, and select queues to perform such
operations. The downside is that prior to this buffers could be defined
as shared across multiple devices that share a single allocator -
because it's still the case that they can be shared "placement" was used
as it's not "availability" or "access permissions" but only indicating
the origin of the buffer and where it's preferred location is. Buffer
compatibility queries are still the way to determine access visibility.

The base `iree_hal_buffer_t` structure was cleaned up a bit and though
still ugly is better prepared for removal of the device allocator back
reference. #19159 tracks removing the allocator reference that is
currently only used by the caching allocator due to our lack of dynamic
casts.

Breaking API changes:
* `iree_hal_buffer_subspan` now requires an `iree_allocator_t
host_allocator` that was previously implicit.
* `iree_hal_subspan_buffer_initialize` removed as it was not safe and
shouldn't have been used.
* `iree_hal_deferred_buffer_t` removed as it is not possible to do the
placement checks without an allocated buffer reference. This is used in
a branch for async allocations on CPU but either a different approach is
required there or it can be moved into `iree/hal/local/` where we can
make assertions about universal accessibility.
* `iree_hal_heap_buffer_wrap` and all other buffer creation now requires
a placement.
* `iree_hal_buffer_initialize` used by HAL implementations now requires
a placement.
diff --git a/experimental/web/sample_webgpu/main.c b/experimental/web/sample_webgpu/main.c
index 8f7d4d4..aaf8336 100644
--- a/experimental/web/sample_webgpu/main.c
+++ b/experimental/web/sample_webgpu/main.c
@@ -666,6 +666,10 @@
                             "unable to allocate buffer of size %" PRIdsz,
                             data_length);
   }
+  const iree_hal_buffer_placement_t placement = {
+      .device = device,
+      .queue_affinity = IREE_HAL_QUEUE_AFFINITY_ANY,
+  };
   const iree_hal_buffer_params_t target_params = {
       .usage = IREE_HAL_BUFFER_USAGE_TRANSFER | IREE_HAL_BUFFER_USAGE_MAPPING,
       .type =
@@ -673,8 +677,8 @@
       .access = IREE_HAL_MEMORY_ACCESS_ALL,
   };
   return iree_hal_webgpu_buffer_wrap(
-      device, iree_hal_device_allocator(device), target_params.type,
-      target_params.access, target_params.usage, data_length,
+      origin, target_params.type, target_params.access, target_params.usage,
+      data_length,
       /*byte_offset=*/0,
       /*byte_length=*/data_length, device_buffer_handle,
       iree_allocator_system(), out_buffer);
diff --git a/experimental/webgpu/buffer.c b/experimental/webgpu/buffer.c
index cd83587..564393f 100644
--- a/experimental/webgpu/buffer.c
+++ b/experimental/webgpu/buffer.c
@@ -19,7 +19,7 @@
 
 typedef struct iree_hal_webgpu_buffer_t {
   iree_hal_buffer_t base;
-  iree_hal_device_t* device;  // unowned
+  iree_allocator_t host_allocator;
   WGPUBuffer handle;
   bool is_mapped;
 } iree_hal_webgpu_buffer_t;
@@ -33,14 +33,12 @@
 }
 
 iree_status_t iree_hal_webgpu_buffer_wrap(
-    iree_hal_device_t* device, iree_hal_allocator_t* device_allocator,
-    iree_hal_memory_type_t memory_type, iree_hal_memory_access_t allowed_access,
+    iree_hal_buffer_placement_t placement, iree_hal_memory_type_t memory_type,
+    iree_hal_memory_access_t allowed_access,
     iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
     iree_device_size_t byte_offset, iree_device_size_t byte_length,
     WGPUBuffer handle, iree_allocator_t host_allocator,
     iree_hal_buffer_t** out_buffer) {
-  IREE_ASSERT_ARGUMENT(device);
-  IREE_ASSERT_ARGUMENT(device_allocator);
   IREE_ASSERT_ARGUMENT(handle);
   IREE_ASSERT_ARGUMENT(out_buffer);
   *out_buffer = NULL;
@@ -50,11 +48,11 @@
   iree_status_t status =
       iree_allocator_malloc(host_allocator, sizeof(*buffer), (void**)&buffer);
   if (iree_status_is_ok(status)) {
-    iree_hal_buffer_initialize(host_allocator, device_allocator, &buffer->base,
-                               allocation_size, byte_offset, byte_length,
-                               memory_type, allowed_access, allowed_usage,
+    iree_hal_buffer_initialize(placement, &buffer->base, allocation_size,
+                               byte_offset, byte_length, memory_type,
+                               allowed_access, allowed_usage,
                                &iree_hal_webgpu_buffer_vtable, &buffer->base);
-    buffer->device = device;
+    buffer->host_allocator = host_allocator;
     buffer->handle = handle;
     *out_buffer = &buffer->base;
   }
@@ -65,7 +63,7 @@
 
 static void iree_hal_webgpu_buffer_destroy(iree_hal_buffer_t* base_buffer) {
   iree_hal_webgpu_buffer_t* buffer = iree_hal_webgpu_buffer_cast(base_buffer);
-  iree_allocator_t host_allocator = base_buffer->host_allocator;
+  iree_allocator_t host_allocator = buffer->host_allocator;
   IREE_TRACE_ZONE_BEGIN(z0);
 
   if (buffer->is_mapped) {
@@ -99,7 +97,7 @@
   // Use wgpuBufferMapAsync directly to avoid this emulation.
   iree_hal_webgpu_buffer_t* buffer = iree_hal_webgpu_buffer_cast(base_buffer);
   return iree_hal_buffer_emulated_map_range(
-      buffer->device, base_buffer, mapping_mode, memory_access,
+      buffer->placement.device, base_buffer, mapping_mode, memory_access,
       local_byte_offset, local_byte_length, mapping);
 }
 
@@ -109,8 +107,8 @@
   // WebGPU does not allow for synchronous buffer mapping.
   // Use wgpuBufferMapAsync directly to avoid this emulation.
   iree_hal_webgpu_buffer_t* buffer = iree_hal_webgpu_buffer_cast(base_buffer);
-  return iree_hal_buffer_emulated_unmap_range(buffer->device, base_buffer,
-                                              local_byte_offset,
+  return iree_hal_buffer_emulated_unmap_range(buffer->placement.device,
+                                              base_buffer, local_byte_offset,
                                               local_byte_length, mapping);
 }
 
diff --git a/experimental/webgpu/buffer.h b/experimental/webgpu/buffer.h
index 056185d..c837753 100644
--- a/experimental/webgpu/buffer.h
+++ b/experimental/webgpu/buffer.h
@@ -19,8 +19,8 @@
 // we start to support pooling.
 
 iree_status_t iree_hal_webgpu_buffer_wrap(
-    iree_hal_device_t* device, iree_hal_allocator_t* device_allocator,
-    iree_hal_memory_type_t memory_type, iree_hal_memory_access_t allowed_access,
+    iree_hal_buffer_placement_t placement, iree_hal_memory_type_t memory_type,
+    iree_hal_memory_access_t allowed_access,
     iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
     iree_device_size_t byte_offset, iree_device_size_t byte_length,
     WGPUBuffer handle, iree_allocator_t host_allocator,
diff --git a/experimental/webgpu/simple_allocator.c b/experimental/webgpu/simple_allocator.c
index d6016a1..ac0cc07 100644
--- a/experimental/webgpu/simple_allocator.c
+++ b/experimental/webgpu/simple_allocator.c
@@ -195,9 +195,14 @@
                             allocation_size);
   }
 
+  const iree_hal_buffer_placement_t placement = {
+      .device = allocator->device,
+      .queue_affinity = params->queue_affinity ? params->queue_affinity
+                                               : IREE_HAL_QUEUE_AFFINITY_ANY,
+      .flags = IREE_HAL_BUFFER_PLACEMENT_FLAG_NONE,
+  };
   iree_status_t status = iree_hal_webgpu_buffer_wrap(
-      allocator->device, base_allocator, params->type, params->access,
-      params->usage, allocation_size,
+      placement, params->type, params->access, params->usage, allocation_size,
       /*byte_offset=*/0,
       /*byte_length=*/allocation_size, buffer_handle, allocator->host_allocator,
       out_buffer);
diff --git a/runtime/src/iree/hal/allocator.h b/runtime/src/iree/hal/allocator.h
index de0ec30..7698b2e 100644
--- a/runtime/src/iree/hal/allocator.h
+++ b/runtime/src/iree/hal/allocator.h
@@ -495,6 +495,8 @@
 IREE_API_EXPORT void iree_hal_allocator_destroy(
     iree_hal_allocator_t* IREE_RESTRICT allocator);
 
+// TODO(#19159): remove iree_hal_allocator_deallocate_buffer when pooling no
+// longer requires the pooling_allocator on iree_hal_buffer_t.
 IREE_API_EXPORT void iree_hal_allocator_deallocate_buffer(
     iree_hal_allocator_t* IREE_RESTRICT allocator,
     iree_hal_buffer_t* IREE_RESTRICT buffer);
diff --git a/runtime/src/iree/hal/allocator_heap.c b/runtime/src/iree/hal/allocator_heap.c
index 2008de4..e7ed9bb 100644
--- a/runtime/src/iree/hal/allocator_heap.c
+++ b/runtime/src/iree/hal/allocator_heap.c
@@ -198,8 +198,8 @@
   IREE_STATISTICS(statistics = &allocator->statistics);
   iree_hal_buffer_t* buffer = NULL;
   IREE_RETURN_IF_ERROR(iree_hal_heap_buffer_create(
-      base_allocator, statistics, &compat_params, allocation_size,
-      allocator->data_allocator, allocator->host_allocator, &buffer));
+      statistics, &compat_params, allocation_size, allocator->data_allocator,
+      allocator->host_allocator, &buffer));
 
   *out_buffer = buffer;
   return iree_ok_status();
@@ -219,6 +219,9 @@
     iree_hal_external_buffer_t* IREE_RESTRICT external_buffer,
     iree_hal_buffer_release_callback_t release_callback,
     iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
+  iree_hal_heap_allocator_t* allocator =
+      iree_hal_heap_allocator_cast(base_allocator);
+
   // Coerce options into those required for use by heap-based devices.
   iree_hal_buffer_params_t compat_params = *params;
   iree_device_size_t allocation_size = external_buffer->size;
@@ -243,11 +246,17 @@
                               "external buffer type not supported");
   }
 
+  const iree_hal_buffer_placement_t placement = {
+      .device = NULL,
+      .queue_affinity = compat_params.queue_affinity
+                            ? compat_params.queue_affinity
+                            : IREE_HAL_QUEUE_AFFINITY_ANY,
+      .flags = IREE_HAL_BUFFER_PLACEMENT_FLAG_NONE,
+  };
   return iree_hal_heap_buffer_wrap(
-      base_allocator, compat_params.type, compat_params.access,
-      compat_params.usage, external_buffer->size,
-      iree_make_byte_span(ptr, external_buffer->size), release_callback,
-      out_buffer);
+      placement, compat_params.type, compat_params.access, compat_params.usage,
+      external_buffer->size, iree_make_byte_span(ptr, external_buffer->size),
+      release_callback, allocator->host_allocator, out_buffer);
 }
 
 static iree_status_t iree_hal_heap_allocator_export_buffer(
diff --git a/runtime/src/iree/hal/buffer.c b/runtime/src/iree/hal/buffer.c
index 6b33276..a55f248 100644
--- a/runtime/src/iree/hal/buffer.c
+++ b/runtime/src/iree/hal/buffer.c
@@ -122,49 +122,34 @@
 // Subspan indirection buffer
 //===----------------------------------------------------------------------===//
 
+typedef struct iree_hal_subspan_buffer_t {
+  iree_hal_buffer_t base;
+  iree_allocator_t host_allocator;
+} iree_hal_subspan_buffer_t;
+
 static const iree_hal_buffer_vtable_t iree_hal_subspan_buffer_vtable;
 
-IREE_API_EXPORT void iree_hal_subspan_buffer_initialize(
-    iree_hal_buffer_t* allocated_buffer, iree_device_size_t byte_offset,
-    iree_device_size_t byte_length, iree_hal_allocator_t* device_allocator,
-    iree_allocator_t host_allocator, iree_hal_buffer_t* out_buffer) {
-  IREE_ASSERT_ARGUMENT(allocated_buffer);
-  IREE_ASSERT_ARGUMENT(out_buffer);
-  iree_hal_buffer_initialize(host_allocator, device_allocator, allocated_buffer,
-                             allocated_buffer->allocation_size, byte_offset,
-                             byte_length, allocated_buffer->memory_type,
-                             allocated_buffer->allowed_access,
-                             allocated_buffer->allowed_usage,
-                             &iree_hal_subspan_buffer_vtable, out_buffer);
-}
-
-IREE_API_EXPORT void iree_hal_subspan_buffer_deinitialize(
-    iree_hal_buffer_t* buffer) {
-  IREE_ASSERT_ARGUMENT(buffer);
-  iree_hal_buffer_release(buffer->allocated_buffer);
-  buffer->allocated_buffer = NULL;
-}
-
 IREE_API_EXPORT iree_status_t iree_hal_subspan_buffer_create(
     iree_hal_buffer_t* allocated_buffer, iree_device_size_t byte_offset,
-    iree_device_size_t byte_length, iree_hal_allocator_t* device_allocator,
-    iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer) {
+    iree_device_size_t byte_length, iree_allocator_t host_allocator,
+    iree_hal_buffer_t** out_buffer) {
   IREE_ASSERT_ARGUMENT(allocated_buffer);
   IREE_ASSERT_ARGUMENT(out_buffer);
   *out_buffer = NULL;
   IREE_TRACE_ZONE_BEGIN(z0);
 
-  iree_hal_buffer_t* buffer = NULL;
+  iree_hal_subspan_buffer_t* buffer = NULL;
   iree_status_t status =
       iree_allocator_malloc(host_allocator, sizeof(*buffer), (void**)&buffer);
   if (iree_status_is_ok(status)) {
     iree_hal_buffer_initialize(
-        host_allocator, device_allocator, allocated_buffer,
+        iree_hal_buffer_placement_undefined(), allocated_buffer,
         allocated_buffer->allocation_size, byte_offset, byte_length,
         allocated_buffer->memory_type, allocated_buffer->allowed_access,
         allocated_buffer->allowed_usage, &iree_hal_subspan_buffer_vtable,
-        buffer);
-    *out_buffer = buffer;
+        &buffer->base);
+    buffer->host_allocator = host_allocator;
+    *out_buffer = &buffer->base;
   }
 
   IREE_TRACE_ZONE_END(z0);
@@ -172,11 +157,12 @@
 }
 
 static void iree_hal_subspan_buffer_destroy(iree_hal_buffer_t* base_buffer) {
-  iree_allocator_t host_allocator = base_buffer->host_allocator;
+  iree_hal_subspan_buffer_t* buffer = (iree_hal_subspan_buffer_t*)base_buffer;
+  iree_allocator_t host_allocator = buffer->host_allocator;
   IREE_TRACE_ZONE_BEGIN(z0);
 
   iree_hal_buffer_release(base_buffer->allocated_buffer);
-  iree_allocator_free(host_allocator, base_buffer);
+  iree_allocator_free(host_allocator, buffer);
 
   IREE_TRACE_ZONE_END(z0);
 }
@@ -222,161 +208,18 @@
 };
 
 //===----------------------------------------------------------------------===//
-// iree_hal_deferred_buffer_t
-//===----------------------------------------------------------------------===//
-
-typedef struct iree_hal_deferred_buffer_t {
-  iree_hal_buffer_t base;
-  iree_hal_queue_affinity_t queue_affinity;
-  iree_device_size_t min_alignment;
-} iree_hal_deferred_buffer_t;
-
-static const iree_hal_buffer_vtable_t iree_hal_deferred_buffer_vtable;
-
-IREE_API_EXPORT iree_status_t iree_hal_deferred_buffer_create_reserved(
-    iree_hal_allocator_t* device_allocator, iree_device_size_t allocation_size,
-    iree_device_size_t byte_offset, iree_device_size_t byte_length,
-    iree_hal_buffer_params_t params, iree_allocator_t host_allocator,
-    iree_hal_buffer_t** out_buffer) {
-  IREE_ASSERT_ARGUMENT(out_buffer);
-  *out_buffer = NULL;
-  IREE_TRACE_ZONE_BEGIN(z0);
-
-  iree_hal_deferred_buffer_t* buffer = NULL;
-  IREE_RETURN_AND_END_ZONE_IF_ERROR(
-      z0,
-      iree_allocator_malloc(host_allocator, sizeof(*buffer), (void**)&buffer));
-  iree_hal_buffer_initialize(host_allocator, device_allocator, NULL,
-                             allocation_size, byte_offset, byte_length,
-                             params.type, params.access, params.usage,
-                             &iree_hal_deferred_buffer_vtable, &buffer->base);
-  buffer->queue_affinity = params.queue_affinity;
-  buffer->min_alignment = params.min_alignment;
-  *out_buffer = &buffer->base;
-
-  IREE_TRACE_ZONE_END(z0);
-  return iree_ok_status();
-}
-
-static void iree_hal_deferred_buffer_destroy(iree_hal_buffer_t* base_buffer) {
-  iree_allocator_t host_allocator = base_buffer->host_allocator;
-  IREE_TRACE_ZONE_BEGIN(z0);
-
-  if (base_buffer->allocated_buffer) {
-    iree_hal_buffer_release(base_buffer->allocated_buffer);
-  }
-  iree_allocator_free(host_allocator, base_buffer);
-
-  IREE_TRACE_ZONE_END(z0);
-}
-
-IREE_API_EXPORT iree_status_t
-iree_hal_deferred_buffer_commit(iree_hal_buffer_t* base_buffer) {
-  iree_hal_deferred_buffer_t* buffer = (iree_hal_deferred_buffer_t*)base_buffer;
-  if (IREE_UNLIKELY(base_buffer->allocated_buffer)) {
-    // Already committed - no-op.
-    return iree_ok_status();
-  }
-  IREE_TRACE_ZONE_BEGIN(z0);
-  iree_hal_buffer_params_t params = {
-      .usage = base_buffer->allowed_usage,
-      .access = base_buffer->allowed_access,
-      .type = base_buffer->memory_type,
-      .queue_affinity = buffer->queue_affinity,
-      .min_alignment = buffer->min_alignment,
-  };
-  iree_status_t status = iree_hal_allocator_allocate_buffer(
-      base_buffer->device_allocator, params, base_buffer->allocation_size,
-      &base_buffer->allocated_buffer);
-  IREE_TRACE_ZONE_END(z0);
-  return status;
-}
-
-IREE_API_EXPORT iree_status_t
-iree_hal_deferred_buffer_decommit(iree_hal_buffer_t* buffer) {
-  IREE_TRACE_ZONE_BEGIN(z0);
-  if (IREE_LIKELY(buffer->allocated_buffer)) {
-    iree_hal_buffer_release(buffer->allocated_buffer);
-    buffer->allocated_buffer = NULL;
-  }
-  IREE_TRACE_ZONE_END(z0);
-  return iree_ok_status();
-}
-
-static iree_status_t iree_hal_deferred_buffer_map_range(
-    iree_hal_buffer_t* buffer, iree_hal_mapping_mode_t mapping_mode,
-    iree_hal_memory_access_t memory_access,
-    iree_device_size_t local_byte_offset, iree_device_size_t local_byte_length,
-    iree_hal_buffer_mapping_t* mapping) {
-  if (IREE_UNLIKELY(!buffer->allocated_buffer)) {
-    // Performance warning: this is likely to be happening synchronously in the
-    // caller in an unexpected way. We could FAILED_PRECONDITION if we wanted
-    // to be strict but by doing this on-demand we allow deferred buffers to be
-    // used with callers that may not know that this is a reserved deferred
-    // buffer (particularly useful for outputs/copy targets).
-    IREE_RETURN_IF_ERROR(iree_hal_deferred_buffer_commit(buffer));
-  }
-  return _VTABLE_DISPATCH(buffer->allocated_buffer, map_range)(
-      buffer->allocated_buffer, mapping_mode, memory_access, local_byte_offset,
-      local_byte_length, mapping);
-}
-
-static iree_status_t iree_hal_deferred_buffer_unmap_range(
-    iree_hal_buffer_t* buffer, iree_device_size_t local_byte_offset,
-    iree_device_size_t local_byte_length, iree_hal_buffer_mapping_t* mapping) {
-  if (IREE_UNLIKELY(!buffer->allocated_buffer)) {
-    return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
-                            "buffer does not have committed storage");
-  }
-  return _VTABLE_DISPATCH(buffer->allocated_buffer, unmap_range)(
-      buffer->allocated_buffer, local_byte_offset, local_byte_length, mapping);
-}
-
-static iree_status_t iree_hal_deferred_buffer_invalidate_range(
-    iree_hal_buffer_t* buffer, iree_device_size_t local_byte_offset,
-    iree_device_size_t local_byte_length) {
-  if (IREE_UNLIKELY(!buffer->allocated_buffer)) {
-    return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
-                            "buffer does not have committed storage");
-  }
-  return _VTABLE_DISPATCH(buffer->allocated_buffer, invalidate_range)(
-      buffer->allocated_buffer, local_byte_offset, local_byte_length);
-}
-
-static iree_status_t iree_hal_deferred_buffer_flush_range(
-    iree_hal_buffer_t* buffer, iree_device_size_t local_byte_offset,
-    iree_device_size_t local_byte_length) {
-  if (IREE_UNLIKELY(!buffer->allocated_buffer)) {
-    return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
-                            "buffer does not have committed storage");
-  }
-  return _VTABLE_DISPATCH(buffer->allocated_buffer, flush_range)(
-      buffer->allocated_buffer, local_byte_offset, local_byte_length);
-}
-
-static const iree_hal_buffer_vtable_t iree_hal_deferred_buffer_vtable = {
-    .recycle = iree_hal_buffer_recycle,
-    .destroy = iree_hal_deferred_buffer_destroy,
-    .map_range = iree_hal_deferred_buffer_map_range,
-    .unmap_range = iree_hal_deferred_buffer_unmap_range,
-    .invalidate_range = iree_hal_deferred_buffer_invalidate_range,
-    .flush_range = iree_hal_deferred_buffer_flush_range,
-};
-
-//===----------------------------------------------------------------------===//
 // iree_hal_buffer_t
 //===----------------------------------------------------------------------===//
 
 IREE_API_EXPORT void iree_hal_buffer_initialize(
-    iree_allocator_t host_allocator, iree_hal_allocator_t* device_allocator,
-    iree_hal_buffer_t* allocated_buffer, iree_device_size_t allocation_size,
-    iree_device_size_t byte_offset, iree_device_size_t byte_length,
-    iree_hal_memory_type_t memory_type, iree_hal_memory_access_t allowed_access,
+    iree_hal_buffer_placement_t placement, iree_hal_buffer_t* allocated_buffer,
+    iree_device_size_t allocation_size, iree_device_size_t byte_offset,
+    iree_device_size_t byte_length, iree_hal_memory_type_t memory_type,
+    iree_hal_memory_access_t allowed_access,
     iree_hal_buffer_usage_t allowed_usage,
     const iree_hal_buffer_vtable_t* vtable, iree_hal_buffer_t* buffer) {
   iree_hal_resource_initialize(vtable, &buffer->resource);
-  buffer->host_allocator = host_allocator;
-  buffer->device_allocator = device_allocator;
+  buffer->placement = placement;
   buffer->allocated_buffer = allocated_buffer;
   buffer->allocation_size = allocation_size;
   buffer->byte_offset = byte_offset;
@@ -395,8 +238,8 @@
 IREE_API_EXPORT void iree_hal_buffer_recycle(iree_hal_buffer_t* buffer) {
   if (IREE_LIKELY(buffer)) {
     IREE_TRACE_ZONE_BEGIN(z0);
-    if (buffer->device_allocator) {
-      iree_hal_allocator_deallocate_buffer(buffer->device_allocator, buffer);
+    if (buffer->pooling_allocator) {
+      iree_hal_allocator_deallocate_buffer(buffer->pooling_allocator, buffer);
     } else {
       iree_hal_buffer_destroy(buffer);
     }
@@ -633,7 +476,8 @@
 
 IREE_API_EXPORT iree_status_t iree_hal_buffer_subspan(
     iree_hal_buffer_t* buffer, iree_device_size_t byte_offset,
-    iree_device_size_t byte_length, iree_hal_buffer_t** out_buffer) {
+    iree_device_size_t byte_length, iree_allocator_t host_allocator,
+    iree_hal_buffer_t** out_buffer) {
   IREE_ASSERT_ARGUMENT(buffer);
   IREE_ASSERT_ARGUMENT(out_buffer);
   *out_buffer = NULL;
@@ -657,12 +501,11 @@
       iree_hal_buffer_allocated_buffer(buffer);
   if (allocated_buffer && allocated_buffer != buffer) {
     return iree_hal_buffer_subspan(allocated_buffer, byte_offset, byte_length,
-                                   out_buffer);
+                                   host_allocator, out_buffer);
   }
 
   return iree_hal_subspan_buffer_create(buffer, byte_offset, byte_length,
-                                        /*device_allocator=*/NULL,
-                                        buffer->host_allocator, out_buffer);
+                                        host_allocator, out_buffer);
 }
 
 IREE_API_EXPORT iree_hal_buffer_t* iree_hal_buffer_allocated_buffer(
@@ -677,6 +520,14 @@
   return buffer->allocation_size;
 }
 
+IREE_API_EXPORT iree_hal_buffer_placement_t
+iree_hal_buffer_allocation_placement(const iree_hal_buffer_t* buffer) {
+  IREE_ASSERT_ARGUMENT(buffer);
+  return buffer == buffer->allocated_buffer
+             ? buffer->placement
+             : buffer->allocated_buffer->placement;
+}
+
 IREE_API_EXPORT iree_device_size_t
 iree_hal_buffer_byte_offset(const iree_hal_buffer_t* buffer) {
   IREE_ASSERT_ARGUMENT(buffer);
diff --git a/runtime/src/iree/hal/buffer.h b/runtime/src/iree/hal/buffer.h
index 82c1f47..502a68b 100644
--- a/runtime/src/iree/hal/buffer.h
+++ b/runtime/src/iree/hal/buffer.h
@@ -19,6 +19,7 @@
 #endif  // __cplusplus
 
 typedef struct iree_hal_allocator_t iree_hal_allocator_t;
+typedef struct iree_hal_device_t iree_hal_device_t;
 
 //===----------------------------------------------------------------------===//
 // Types and Enums
@@ -458,6 +459,65 @@
 typedef uint32_t iree_hal_mapping_mode_t;
 
 //===----------------------------------------------------------------------===//
+// iree_hal_buffer_placement_t
+//===----------------------------------------------------------------------===//
+
+// Flags describing the placement of a buffer on a device and its allocation
+// semantics. This information is only valid on allocated buffers and not
+// wrappers that may hold references to them.
+typedef uint32_t iree_hal_buffer_placement_flags_t;
+enum iree_hal_buffer_placement_flag_bits_t {
+  IREE_HAL_BUFFER_PLACEMENT_FLAG_NONE = 0u,
+  // Buffer was allocated with an asynchronous allocation API such as
+  // iree_hal_device_queue_alloca and/or can be deallocated with an asynchronous
+  // deallocation API such as iree_hal_device_queue_dealloca.
+  IREE_HAL_BUFFER_PLACEMENT_FLAG_ASYNCHRONOUS = 1u << 0,
+  // TODO(benvanik): flags for discrete/external to allow for quick export
+  // checks.
+};
+
+// Describes the origin of an allocated buffer.
+// This is used internally to route buffers back to pools and can be used by
+// hosting layers to route deallocations to appropriate devices/queues.
+// This information is generally only valid for allocated buffers (the result of
+// an iree_hal_buffer_allocated_buffer query).
+typedef struct iree_hal_buffer_placement_t {
+  // The device the buffer was allocated from. Unretained.
+  // Only valid for allocated buffers and not any intermediates (subspans, etc).
+  // May be NULL if the buffer is not associated with any particular device such
+  // as a free-floating heap-allocated buffer on the host.
+  iree_hal_device_t* device;
+  // Queues on the device to which the buffer is available. Depending on the
+  // device this may indicate which queues have exclusive access to the buffer
+  // or which queues have optimal access. This may be broader than the original
+  // request if the buffer is able to be accessed by other queues without
+  // penalty. Usage of the buffer for queue read/write or asynchronous
+  // deallocation via iree_hal_device_queue_dealloca is only legal with a queue
+  // affinity that is a subset of this affinity set.
+  iree_hal_queue_affinity_t queue_affinity;
+  // Describes the placement behavior of a buffer on a device and its allocation
+  // semantics.
+  iree_hal_buffer_placement_flags_t flags;
+  uint32_t reserved;
+} iree_hal_buffer_placement_t;
+
+// Returns a placement indicating that the buffer has no direct device it is
+// associated with. Commonly used for free-floating buffer handles such as heap
+// wrapped or allocated buffers that come from outside of the HAL.
+static inline iree_hal_buffer_placement_t iree_hal_buffer_placement_undefined(
+    void) {
+  iree_hal_buffer_placement_t placement = {0};
+  return placement;
+}
+
+// Returns true if the |placement| is undefined and the buffer has no direct
+// device it is associated with.
+static inline bool iree_hal_buffer_placement_is_undefined(
+    const iree_hal_buffer_placement_t placement) {
+  return placement.device == NULL;
+}
+
+//===----------------------------------------------------------------------===//
 // iree_hal_buffer_params_t
 //===----------------------------------------------------------------------===//
 
@@ -699,7 +759,8 @@
 // |out_buffer| must be released by the caller.
 IREE_API_EXPORT iree_status_t iree_hal_buffer_subspan(
     iree_hal_buffer_t* buffer, iree_device_size_t byte_offset,
-    iree_device_size_t byte_length, iree_hal_buffer_t** out_buffer);
+    iree_device_size_t byte_length, iree_allocator_t host_allocator,
+    iree_hal_buffer_t** out_buffer);
 
 // Retains the given |buffer| for the caller.
 IREE_API_EXPORT void iree_hal_buffer_retain(iree_hal_buffer_t* buffer);
@@ -720,6 +781,20 @@
 IREE_API_EXPORT iree_device_size_t
 iree_hal_buffer_allocation_size(const iree_hal_buffer_t* buffer);
 
+// Returns the original placement of the allocated buffer.
+// The placement applies to the entire underlying allocated buffer and not the
+// potential subspan of the |buffer| handle. Many buffer handles may be backed
+// by the same allocation. It's possible for placements to change over the
+// lifetime of a buffer as it is moved across devices but the origin will always
+// accept actions on the buffer such as deallocation.
+//
+// Note that not all buffers have a placement: e.g. host buffers allocated as
+// free-floating objects will have no device assigned.
+// iree_hal_buffer_placement_is_undefined can be used to check for this case
+// explicitly.
+IREE_API_EXPORT iree_hal_buffer_placement_t
+iree_hal_buffer_allocation_placement(const iree_hal_buffer_t* buffer);
+
 // Returns the offset in bytes of the buffer within its allocated_buffer.
 IREE_API_EXPORT iree_device_size_t
 iree_hal_buffer_byte_offset(const iree_hal_buffer_t* buffer);
@@ -916,62 +991,14 @@
 // iree_hal_subspan_buffer_t
 //===----------------------------------------------------------------------===//
 
-// Initializes in-place a subspan buffer stored in |out_buffer|.
-// The reference count of the buffer will be set to 1.
-//
-// This is intended to be used for provably on-stack transient subspans or
-// buffer wrapping where ownership is controlled externally. If the lifetime of
-// the subspan may extend beyond the lifetime of the |out_buffer| storage then
-// iree_hal_subspan_buffer_create must be used instead.
-//
-// iree_hal_subspan_buffer_deinitialize must be used to deinitialize the buffer.
-IREE_API_EXPORT void iree_hal_subspan_buffer_initialize(
-    iree_hal_buffer_t* allocated_buffer, iree_device_size_t byte_offset,
-    iree_device_size_t byte_length, iree_hal_allocator_t* device_allocator,
-    iree_allocator_t host_allocator, iree_hal_buffer_t* out_buffer);
-
-// Deinitializes a subspan buffer that was initialized with
-// iree_hal_subspan_buffer_initialize.
-IREE_API_EXPORT void iree_hal_subspan_buffer_deinitialize(
-    iree_hal_buffer_t* buffer);
-
 // Creates a buffer referencing a subspan of some base allocation.
 // Optionally |device_allocator| can be provided if this subspan references
 // managed buffers that need deallocation callbacks.
 IREE_API_EXPORT iree_status_t iree_hal_subspan_buffer_create(
     iree_hal_buffer_t* allocated_buffer, iree_device_size_t byte_offset,
-    iree_device_size_t byte_length, iree_hal_allocator_t* device_allocator,
-    iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer);
-
-//===----------------------------------------------------------------------===//
-// iree_hal_deferred_buffer_t
-//===----------------------------------------------------------------------===//
-
-// Creates a buffer with the given properties that has no backing storage.
-// The buffer can be passed around/retained/etc with just the reservation and
-// committed/decommitted on demand. All usage of the buffer beyond metadata
-// queries requires that it be committed.
-//
-// WARNING: commit/decommit are thread-compatible. Callers must ensure that no
-// threads try to use the buffer contents before a commit has completed and that
-// no threads still have access to the buffer contents prior to a decommit.
-IREE_API_EXPORT iree_status_t iree_hal_deferred_buffer_create_reserved(
-    iree_hal_allocator_t* device_allocator, iree_device_size_t allocation_size,
-    iree_device_size_t byte_offset, iree_device_size_t byte_length,
-    iree_hal_buffer_params_t params, iree_allocator_t host_allocator,
+    iree_device_size_t byte_length, iree_allocator_t host_allocator,
     iree_hal_buffer_t** out_buffer);
 
-// Commits the backing storage of the |buffer| from its device allocator.
-// Ignored if the buffer is already committed.
-IREE_API_EXPORT iree_status_t
-iree_hal_deferred_buffer_commit(iree_hal_buffer_t* buffer);
-
-// Decommits the backing storage of the |buffer| and returns it to a
-// metadata-only state. No other threads must still have access to the buffer
-// contents.
-IREE_API_EXPORT iree_status_t
-iree_hal_deferred_buffer_decommit(iree_hal_buffer_t* buffer);
-
 //===----------------------------------------------------------------------===//
 // iree_hal_heap_buffer_t
 //===----------------------------------------------------------------------===//
@@ -985,11 +1012,11 @@
 // |out_buffer| must be released by the caller. |data| must be kept live for the
 // lifetime of the wrapping buffer.
 iree_status_t iree_hal_heap_buffer_wrap(
-    iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+    iree_hal_buffer_placement_t placement, iree_hal_memory_type_t memory_type,
     iree_hal_memory_access_t allowed_access,
     iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
     iree_byte_span_t data, iree_hal_buffer_release_callback_t release_callback,
-    iree_hal_buffer_t** out_buffer);
+    iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer);
 
 //===----------------------------------------------------------------------===//
 // iree_hal_buffer_t implementation details
@@ -1024,41 +1051,81 @@
               "iree_hal_resource_vtable_t expects destroy at offset 0, we want "
               "to recycle instead");
 
+// NOTE: this shared data structure may be a mistake. If vtables were free we
+// would not provide this and rely on each buffer implementation to implement
+// all of the accessor methods. Indirection through vtables costs, though, so
+// we hoist the common information that every buffer implementation needs here.
+// Since this adds a fixed cost to every buffer on every implementation we
+// should keep the structure as small as reasonable.
+//
+// NOTE: the internals of this structure are an implementation detail and may
+// change at any time. If there's no API accessor for a field then assume it
+// should not be used except by HAL buffer implementations.
 struct iree_hal_buffer_t {
-  // Frequently accessed:
   iree_hal_resource_t resource;  // must be at 0
+  // Underlying buffer allocation. If this points back at this buffer structure
+  // then the buffer is an allocated buffer itself and otherwise the underlying
+  // allocation is referenced and retained.
   iree_hal_buffer_t* allocated_buffer;
+  // Total size of the buffer allocation in its underlying storage.
+  // This is captured on each buffer including non-allocated buffers so that
+  // internal pooling/suballocation costs can be represented.
   iree_device_size_t allocation_size;
+  // Offset into the underlying allocated buffer this buffer range starts at.
   iree_device_size_t byte_offset;
+  // Length of the buffer range in the underlying allocated buffer. This is the
+  // logical length exposed to users.
   iree_device_size_t byte_length;
 
-  // Rarely accessed:
-  iree_allocator_t host_allocator;
-  iree_hal_allocator_t* device_allocator;
+  // Placement of the buffer on a device/queue set. Captured only for allocated
+  // buffers.
+  iree_hal_buffer_placement_t placement;
+
+  // Hacky back reference to an allocator that should be notified when the
+  // buffer is released. This is a hack to support interception of buffers by
+  // pooling layers and is slated for removal.
+  //
+  // TODO(#19159): remove iree_hal_allocator_deallocate_buffer when pooling no
+  // longer requires the pooling_allocator on iree_hal_buffer_t.
+  iree_hal_allocator_t* pooling_allocator;
+
   // TODO(benvanik): bit pack these; could be ~4 bytes vs 12.
   iree_hal_memory_type_t memory_type;
   iree_hal_buffer_usage_t allowed_usage;
   iree_hal_memory_access_t allowed_access;
 
-  // Implementation-defined flags.
-  uint16_t flags;
+  // Unused padding that more flags or identifiers can be placed in, such as
+  // which implementation pool owns the buffer.
+  uint16_t reserved;
+
+  // Implementation-defined flags used for additional bookkeeping or routing
+  // by the buffer implementation.
+  uint32_t flags;
 };
 
 IREE_API_EXPORT void iree_hal_buffer_initialize(
-    iree_allocator_t host_allocator, iree_hal_allocator_t* device_allocator,
-    iree_hal_buffer_t* allocated_buffer, iree_device_size_t allocation_size,
-    iree_device_size_t byte_offset, iree_device_size_t byte_length,
-    iree_hal_memory_type_t memory_type, iree_hal_memory_access_t allowed_access,
+    iree_hal_buffer_placement_t placement, iree_hal_buffer_t* allocated_buffer,
+    iree_device_size_t allocation_size, iree_device_size_t byte_offset,
+    iree_device_size_t byte_length, iree_hal_memory_type_t memory_type,
+    iree_hal_memory_access_t allowed_access,
     iree_hal_buffer_usage_t allowed_usage,
     const iree_hal_buffer_vtable_t* vtable, iree_hal_buffer_t* buffer);
 
-// Recycles |buffer| by returning it to its allocator (or destroying it).
+// TODO(#19159): remove iree_hal_allocator_deallocate_buffer when pooling no
+// longer requires the pooling_allocator on iree_hal_buffer_t. When buffers can
+// use their normal destroy callback to return themselves to pools then we won't
+// need this extra recycle thunk.
+//
+// Recycles |buffer| by releasing it to the origin it is associated with via the
+// release callback (or destroying it, if none was specified).
 // The |buffer| pointer may remain valid if it is returned to a pool but callers
-// must assume its contents are undefined.
+// must assume its contents are undefined as if it had been freed.
 IREE_API_EXPORT void iree_hal_buffer_recycle(iree_hal_buffer_t* buffer);
 
 // Destroys |buffer| and frees its memory.
-// Implementations should use iree_hal_buffer_recycle in their vtables.
+// Implementations must use iree_hal_buffer_recycle in their vtables for the
+// common iree_hal_resource_t destroy callback as this is only to be used by
+// release callbacks that want to free the buffer.
 IREE_API_EXPORT void iree_hal_buffer_destroy(iree_hal_buffer_t* buffer);
 
 #ifdef __cplusplus
diff --git a/runtime/src/iree/hal/buffer_heap.c b/runtime/src/iree/hal/buffer_heap.c
index e7b763e..93b3827 100644
--- a/runtime/src/iree/hal/buffer_heap.c
+++ b/runtime/src/iree/hal/buffer_heap.c
@@ -34,7 +34,14 @@
   // base.flags has the iree_hal_heap_buffer_storage_mode_t.
   iree_hal_buffer_t base;
 
+  // Host allocator this buffer metadata structure was allocated from. May be
+  // different than the data allocator used for the buffer payload.
+  iree_allocator_t host_allocator;
+
+  // TODO(benvanik): change to a raw pointer as the base.allocation_size is the
+  // same as the data_length.
   iree_byte_span_t data;
+
   union {
     // Used for IREE_HAL_HEAP_BUFFER_STORAGE_MODE_SPLIT.
     iree_allocator_t data_allocator;
@@ -45,9 +52,6 @@
   // Optional statistics shared with the allocator.
   IREE_STATISTICS(iree_hal_heap_allocator_statistics_t* statistics;)
 } iree_hal_heap_buffer_t;
-static_assert(sizeof(iree_hal_heap_buffer_t) <= 128,
-              "header should be <= the minimum buffer alignment so that we "
-              "don't introduce internal waste");
 
 static const iree_hal_buffer_vtable_t iree_hal_heap_buffer_vtable;
 
@@ -111,12 +115,10 @@
 }
 
 iree_status_t iree_hal_heap_buffer_create(
-    iree_hal_allocator_t* allocator,
     iree_hal_heap_allocator_statistics_t* statistics,
     const iree_hal_buffer_params_t* params, iree_device_size_t allocation_size,
     iree_allocator_t data_allocator, iree_allocator_t host_allocator,
     iree_hal_buffer_t** out_buffer) {
-  IREE_ASSERT_ARGUMENT(allocator);
   IREE_ASSERT_ARGUMENT(params);
   IREE_ASSERT_ARGUMENT(out_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
@@ -137,10 +139,11 @@
                                                 host_allocator, &buffer, &data);
 
   if (iree_status_is_ok(status)) {
-    iree_hal_buffer_initialize(host_allocator, allocator, &buffer->base,
-                               allocation_size, 0, allocation_size,
-                               params->type, params->access, params->usage,
-                               &iree_hal_heap_buffer_vtable, &buffer->base);
+    iree_hal_buffer_initialize(
+        iree_hal_buffer_placement_undefined(), &buffer->base, allocation_size,
+        0, allocation_size, params->type, params->access, params->usage,
+        &iree_hal_heap_buffer_vtable, &buffer->base);
+    buffer->host_allocator = host_allocator;
     buffer->data = data;
 
     if (same_allocator) {
@@ -169,12 +172,11 @@
 }
 
 iree_status_t iree_hal_heap_buffer_wrap(
-    iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+    iree_hal_buffer_placement_t placement, iree_hal_memory_type_t memory_type,
     iree_hal_memory_access_t allowed_access,
     iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
     iree_byte_span_t data, iree_hal_buffer_release_callback_t release_callback,
-    iree_hal_buffer_t** out_buffer) {
-  IREE_ASSERT_ARGUMENT(allocator);
+    iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer) {
   IREE_ASSERT_ARGUMENT(out_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
 
@@ -188,16 +190,15 @@
         (int)IREE_HAL_HEAP_BUFFER_ALIGNMENT, data.data);
   }
 
-  iree_allocator_t host_allocator =
-      iree_hal_allocator_host_allocator(allocator);
   iree_hal_heap_buffer_t* buffer = NULL;
   iree_status_t status =
       iree_allocator_malloc(host_allocator, sizeof(*buffer), (void**)&buffer);
   if (iree_status_is_ok(status)) {
-    iree_hal_buffer_initialize(host_allocator, allocator, &buffer->base,
-                               allocation_size, 0, data.data_length,
-                               memory_type, allowed_access, allowed_usage,
-                               &iree_hal_heap_buffer_vtable, &buffer->base);
+    iree_hal_buffer_initialize(placement, &buffer->base, allocation_size, 0,
+                               data.data_length, memory_type, allowed_access,
+                               allowed_usage, &iree_hal_heap_buffer_vtable,
+                               &buffer->base);
+    buffer->host_allocator = host_allocator;
     buffer->data = data;
 
     // Notify the provided callback when the external data is no longer needed.
@@ -213,7 +214,7 @@
 
 static void iree_hal_heap_buffer_destroy(iree_hal_buffer_t* base_buffer) {
   iree_hal_heap_buffer_t* buffer = (iree_hal_heap_buffer_t*)base_buffer;
-  iree_allocator_t host_allocator = base_buffer->host_allocator;
+  iree_allocator_t host_allocator = buffer->host_allocator;
   IREE_TRACE_ZONE_BEGIN(z0);
 
   IREE_STATISTICS({
diff --git a/runtime/src/iree/hal/buffer_heap_impl.h b/runtime/src/iree/hal/buffer_heap_impl.h
index 6068c28..f358db8 100644
--- a/runtime/src/iree/hal/buffer_heap_impl.h
+++ b/runtime/src/iree/hal/buffer_heap_impl.h
@@ -31,7 +31,6 @@
 // |data_allocator| and |host_allocator| are the same the buffer will be created
 // as a flat slab. |out_buffer| must be released by the caller.
 iree_status_t iree_hal_heap_buffer_create(
-    iree_hal_allocator_t* allocator,
     iree_hal_heap_allocator_statistics_t* statistics,
     const iree_hal_buffer_params_t* params, iree_device_size_t allocation_size,
     iree_allocator_t data_allocator, iree_allocator_t host_allocator,
diff --git a/runtime/src/iree/hal/cts/buffer_mapping_test.h b/runtime/src/iree/hal/cts/buffer_mapping_test.h
index 504f3d5..b78c8a8 100644
--- a/runtime/src/iree/hal/cts/buffer_mapping_test.h
+++ b/runtime/src/iree/hal/cts/buffer_mapping_test.h
@@ -140,8 +140,9 @@
   // Create a subspan.
   iree_device_size_t subspan_length = 8;
   iree_hal_buffer_t* buffer_subspan = NULL;
-  IREE_ASSERT_OK(iree_hal_buffer_subspan(buffer, /*byte_offset=*/4,
-                                         subspan_length, &buffer_subspan));
+  IREE_ASSERT_OK(
+      iree_hal_buffer_subspan(buffer, /*byte_offset=*/4, subspan_length,
+                              iree_allocator_system(), &buffer_subspan));
 
   // Zero part of the subspan.
   IREE_ASSERT_OK(iree_hal_buffer_map_zero(buffer_subspan, /*byte_offset=*/4,
@@ -253,8 +254,9 @@
   // Create a subspan.
   iree_device_size_t subspan_length = 8;
   iree_hal_buffer_t* buffer_subspan = NULL;
-  IREE_ASSERT_OK(iree_hal_buffer_subspan(buffer, /*byte_offset=*/4,
-                                         subspan_length, &buffer_subspan));
+  IREE_ASSERT_OK(
+      iree_hal_buffer_subspan(buffer, /*byte_offset=*/4, subspan_length,
+                              iree_allocator_system(), &buffer_subspan));
 
   // Fill part of the subspan.
   uint8_t fill_value = 0xFF;
@@ -342,8 +344,9 @@
   // Create a subspan.
   iree_device_size_t subspan_length = 8;
   iree_hal_buffer_t* buffer_subspan = NULL;
-  IREE_ASSERT_OK(iree_hal_buffer_subspan(buffer, /*byte_offset=*/4,
-                                         subspan_length, &buffer_subspan));
+  IREE_ASSERT_OK(
+      iree_hal_buffer_subspan(buffer, /*byte_offset=*/4, subspan_length,
+                              iree_allocator_system(), &buffer_subspan));
 
   // Read the entire buffer subspan.
   std::vector<uint8_t> actual_data(subspan_length);
@@ -426,8 +429,9 @@
   // Create a subspan.
   iree_device_size_t subspan_length = 8;
   iree_hal_buffer_t* buffer_subspan = NULL;
-  IREE_ASSERT_OK(iree_hal_buffer_subspan(buffer, /*byte_offset=*/4,
-                                         subspan_length, &buffer_subspan));
+  IREE_ASSERT_OK(
+      iree_hal_buffer_subspan(buffer, /*byte_offset=*/4, subspan_length,
+                              iree_allocator_system(), &buffer_subspan));
 
   // Write over part of the subspan.
   std::vector<uint8_t> fill_buffer{0x11, 0x22, 0x33, 0x44};
diff --git a/runtime/src/iree/hal/cts/command_buffer_update_buffer_test.h b/runtime/src/iree/hal/cts/command_buffer_update_buffer_test.h
index 0ba96e4..861ff57 100644
--- a/runtime/src/iree/hal/cts/command_buffer_update_buffer_test.h
+++ b/runtime/src/iree/hal/cts/command_buffer_update_buffer_test.h
@@ -116,8 +116,9 @@
   // Create a subspan.
   iree_device_size_t subspan_length = 8;
   iree_hal_buffer_t* buffer_subspan;
-  IREE_ASSERT_OK(iree_hal_buffer_subspan(device_buffer, /*byte_offset=*/4,
-                                         subspan_length, &buffer_subspan));
+  IREE_ASSERT_OK(
+      iree_hal_buffer_subspan(device_buffer, /*byte_offset=*/4, subspan_length,
+                              iree_allocator_system(), &buffer_subspan));
 
   iree_hal_command_buffer_t* command_buffer = NULL;
   IREE_CHECK_OK(iree_hal_command_buffer_create(
diff --git a/runtime/src/iree/hal/drivers/cuda/cuda_allocator.c b/runtime/src/iree/hal/drivers/cuda/cuda_allocator.c
index 72332db..fde4abe 100644
--- a/runtime/src/iree/hal/drivers/cuda/cuda_allocator.c
+++ b/runtime/src/iree/hal/drivers/cuda/cuda_allocator.c
@@ -22,6 +22,9 @@
   // must be at offset 0.
   iree_hal_resource_t resource;
 
+  // Parent device that this allocator is associated with. Unowned.
+  iree_hal_device_t* parent_device;
+
   // The device that this allocator allocates memory from.
   CUdevice device;
 
@@ -55,9 +58,11 @@
 }
 
 iree_status_t iree_hal_cuda_allocator_create(
+    iree_hal_device_t* parent_device,
     const iree_hal_cuda_dynamic_symbols_t* cuda_symbols, CUdevice device,
     CUstream stream, iree_hal_cuda_memory_pools_t* pools,
     iree_allocator_t host_allocator, iree_hal_allocator_t** out_allocator) {
+  IREE_ASSERT_ARGUMENT(parent_device);
   IREE_ASSERT_ARGUMENT(cuda_symbols);
   IREE_ASSERT_ARGUMENT(out_allocator);
   IREE_TRACE_ZONE_BEGIN(z0);
@@ -104,6 +109,7 @@
 
   iree_hal_resource_initialize(&iree_hal_cuda_allocator_vtable,
                                &allocator->resource);
+  allocator->parent_device = parent_device;
   allocator->device = device;
   allocator->stream = stream;
   allocator->pools = pools;
@@ -419,8 +425,14 @@
 
   iree_hal_buffer_t* buffer = NULL;
   if (iree_status_is_ok(status)) {
+    const iree_hal_buffer_placement_t placement = {
+        .device = allocator->parent_device,
+        .queue_affinity = params->queue_affinity ? params->queue_affinity
+                                                 : IREE_HAL_QUEUE_AFFINITY_ANY,
+        .flags = IREE_HAL_BUFFER_PLACEMENT_FLAG_NONE,
+    };
     status = iree_hal_cuda_buffer_wrap(
-        base_allocator, compat_params.type, compat_params.access,
+        placement, compat_params.type, compat_params.access,
         compat_params.usage, allocation_size,
         /*byte_offset=*/0,
         /*byte_length=*/allocation_size, buffer_type, device_ptr, host_ptr,
@@ -584,8 +596,14 @@
 
   iree_hal_buffer_t* buffer = NULL;
   if (iree_status_is_ok(status)) {
+    const iree_hal_buffer_placement_t placement = {
+        .device = allocator->parent_device,
+        .queue_affinity = params->queue_affinity ? params->queue_affinity
+                                                 : IREE_HAL_QUEUE_AFFINITY_ANY,
+        .flags = IREE_HAL_BUFFER_PLACEMENT_FLAG_NONE,
+    };
     status = iree_hal_cuda_buffer_wrap(
-        base_allocator, compat_params.type, compat_params.access,
+        placement, compat_params.type, compat_params.access,
         compat_params.usage, external_buffer->size, /*byte_offset=*/0,
         /*byte_length=*/external_buffer->size, buffer_type, device_ptr,
         host_ptr, release_callback,
diff --git a/runtime/src/iree/hal/drivers/cuda/cuda_allocator.h b/runtime/src/iree/hal/drivers/cuda/cuda_allocator.h
index 3b4dbb3..845ef3e 100644
--- a/runtime/src/iree/hal/drivers/cuda/cuda_allocator.h
+++ b/runtime/src/iree/hal/drivers/cuda/cuda_allocator.h
@@ -22,6 +22,7 @@
 // and the pointer must remain valid for the lifetime of the allocator. Pools
 // may not be supported on all devices and can be NULL.
 iree_status_t iree_hal_cuda_allocator_create(
+    iree_hal_device_t* parent_device,
     const iree_hal_cuda_dynamic_symbols_t* cuda_symbols, CUdevice device,
     CUstream stream, iree_hal_cuda_memory_pools_t* pools,
     iree_allocator_t host_allocator, iree_hal_allocator_t** out_allocator);
diff --git a/runtime/src/iree/hal/drivers/cuda/cuda_buffer.c b/runtime/src/iree/hal/drivers/cuda/cuda_buffer.c
index 8f73aa1..ab33c5f 100644
--- a/runtime/src/iree/hal/drivers/cuda/cuda_buffer.c
+++ b/runtime/src/iree/hal/drivers/cuda/cuda_buffer.c
@@ -14,6 +14,7 @@
 
 typedef struct iree_hal_cuda_buffer_t {
   iree_hal_buffer_t base;
+  iree_allocator_t host_allocator;
   iree_hal_cuda_buffer_type_t type;
   void* host_ptr;
   CUdeviceptr device_ptr;
@@ -35,7 +36,7 @@
 }
 
 iree_status_t iree_hal_cuda_buffer_wrap(
-    iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+    iree_hal_buffer_placement_t placement, iree_hal_memory_type_t memory_type,
     iree_hal_memory_access_t allowed_access,
     iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
     iree_device_size_t byte_offset, iree_device_size_t byte_length,
@@ -56,10 +57,11 @@
   iree_status_t status =
       iree_allocator_malloc(host_allocator, sizeof(*buffer), (void**)&buffer);
   if (iree_status_is_ok(status)) {
-    iree_hal_buffer_initialize(host_allocator, allocator, &buffer->base,
-                               allocation_size, byte_offset, byte_length,
-                               memory_type, allowed_access, allowed_usage,
+    iree_hal_buffer_initialize(placement, &buffer->base, allocation_size,
+                               byte_offset, byte_length, memory_type,
+                               allowed_access, allowed_usage,
                                &iree_hal_cuda_buffer_vtable, &buffer->base);
+    buffer->host_allocator = host_allocator;
     buffer->type = buffer_type;
     buffer->host_ptr = host_ptr;
     buffer->device_ptr = device_ptr;
@@ -73,7 +75,7 @@
 
 static void iree_hal_cuda_buffer_destroy(iree_hal_buffer_t* base_buffer) {
   iree_hal_cuda_buffer_t* buffer = iree_hal_cuda_buffer_cast(base_buffer);
-  iree_allocator_t host_allocator = base_buffer->host_allocator;
+  iree_allocator_t host_allocator = buffer->host_allocator;
   IREE_TRACE_ZONE_BEGIN(z0);
   if (buffer->release_callback.fn) {
     buffer->release_callback.fn(buffer->release_callback.user_data,
diff --git a/runtime/src/iree/hal/drivers/cuda/cuda_buffer.h b/runtime/src/iree/hal/drivers/cuda/cuda_buffer.h
index 3c677ae..16a8ae8 100644
--- a/runtime/src/iree/hal/drivers/cuda/cuda_buffer.h
+++ b/runtime/src/iree/hal/drivers/cuda/cuda_buffer.h
@@ -34,7 +34,7 @@
 
 // Wraps a CUDA allocation in an iree_hal_buffer_t.
 iree_status_t iree_hal_cuda_buffer_wrap(
-    iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+    iree_hal_buffer_placement_t placement, iree_hal_memory_type_t memory_type,
     iree_hal_memory_access_t allowed_access,
     iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
     iree_device_size_t byte_offset, iree_device_size_t byte_length,
diff --git a/runtime/src/iree/hal/drivers/cuda/cuda_device.c b/runtime/src/iree/hal/drivers/cuda/cuda_device.c
index d5ed448..9c9c7c6 100644
--- a/runtime/src/iree/hal/drivers/cuda/cuda_device.c
+++ b/runtime/src/iree/hal/drivers/cuda/cuda_device.c
@@ -496,13 +496,13 @@
   // Create memory pools first so that we can share them with the allocator.
   if (iree_status_is_ok(status) && device->supports_memory_pools) {
     status = iree_hal_cuda_memory_pools_initialize(
-        cuda_symbols, cu_device, &params->memory_pools, host_allocator,
-        &device->memory_pools);
+        (iree_hal_device_t*)device, cuda_symbols, cu_device,
+        &params->memory_pools, host_allocator, &device->memory_pools);
   }
 
   if (iree_status_is_ok(status)) {
     status = iree_hal_cuda_allocator_create(
-        cuda_symbols, cu_device, dispatch_stream,
+        (iree_hal_device_t*)device, cuda_symbols, cu_device, dispatch_stream,
         device->supports_memory_pools ? &device->memory_pools : NULL,
         host_allocator, &device->device_allocator);
   }
diff --git a/runtime/src/iree/hal/drivers/cuda/memory_pools.c b/runtime/src/iree/hal/drivers/cuda/memory_pools.c
index 1e34422..ac53271 100644
--- a/runtime/src/iree/hal/drivers/cuda/memory_pools.c
+++ b/runtime/src/iree/hal/drivers/cuda/memory_pools.c
@@ -58,16 +58,19 @@
 }
 
 iree_status_t iree_hal_cuda_memory_pools_initialize(
+    iree_hal_device_t* parent_device,
     const iree_hal_cuda_dynamic_symbols_t* cuda_symbols, CUdevice cu_device,
     const iree_hal_cuda_memory_pooling_params_t* pooling_params,
     iree_allocator_t host_allocator,
     iree_hal_cuda_memory_pools_t* IREE_RESTRICT out_pools) {
+  IREE_ASSERT_ARGUMENT(parent_device);
   IREE_ASSERT_ARGUMENT(cuda_symbols);
   IREE_ASSERT_ARGUMENT(pooling_params);
   IREE_ASSERT_ARGUMENT(out_pools);
   IREE_TRACE_ZONE_BEGIN(z0);
 
   memset(out_pools, 0, sizeof(*out_pools));
+  out_pools->parent_device = parent_device;
   out_pools->cuda_symbols = cuda_symbols;
   out_pools->host_allocator = host_allocator;
 
@@ -241,13 +244,19 @@
   // doesn't dealloca the buffer.
   iree_hal_buffer_t* buffer = NULL;
   if (iree_status_is_ok(status)) {
+    const iree_hal_buffer_placement_t placement = {
+        .device = pools->parent_device,
+        .queue_affinity = params.queue_affinity ? params.queue_affinity
+                                                : IREE_HAL_QUEUE_AFFINITY_ANY,
+        .flags = IREE_HAL_BUFFER_PLACEMENT_FLAG_ASYNCHRONOUS,
+    };
     iree_hal_buffer_release_callback_t release_callback = {
         .fn = iree_hal_cuda_async_buffer_release_callback,
         .user_data = pools,
     };
     status = iree_hal_cuda_buffer_wrap(
-        /*device_allocator=*/NULL, params.type, params.access, params.usage,
-        allocation_size, /*byte_offset=*/0,
+        placement, params.type, params.access, params.usage, allocation_size,
+        /*byte_offset=*/0,
         /*byte_length=*/allocation_size, IREE_HAL_CUDA_BUFFER_TYPE_ASYNC,
         device_ptr, /*host_ptr=*/NULL, release_callback, pools->host_allocator,
         &buffer);
diff --git a/runtime/src/iree/hal/drivers/cuda/memory_pools.h b/runtime/src/iree/hal/drivers/cuda/memory_pools.h
index 98917dc..99e4d27 100644
--- a/runtime/src/iree/hal/drivers/cuda/memory_pools.h
+++ b/runtime/src/iree/hal/drivers/cuda/memory_pools.h
@@ -25,6 +25,7 @@
   // Used for any host-visible/host-local memory types.
   CUmemoryPool other;
 
+  iree_hal_device_t* parent_device;
   const iree_hal_cuda_dynamic_symbols_t* cuda_symbols;
   iree_allocator_t host_allocator;
 
@@ -38,6 +39,7 @@
 
 // Initializes |out_pools| by configuring new CUDA memory pools.
 iree_status_t iree_hal_cuda_memory_pools_initialize(
+    iree_hal_device_t* parent_device,
     const iree_hal_cuda_dynamic_symbols_t* cuda_symbols, CUdevice cu_device,
     const iree_hal_cuda_memory_pooling_params_t* pooling_params,
     iree_allocator_t host_allocator,
diff --git a/runtime/src/iree/hal/drivers/hip/hip_allocator.c b/runtime/src/iree/hal/drivers/hip/hip_allocator.c
index 95a04cd..c8bc93f 100644
--- a/runtime/src/iree/hal/drivers/hip/hip_allocator.c
+++ b/runtime/src/iree/hal/drivers/hip/hip_allocator.c
@@ -24,6 +24,9 @@
   // must be at offset 0.
   iree_hal_resource_t resource;
 
+  // Parent device that this allocator is associated with. Unowned.
+  iree_hal_device_t* parent_device;
+
   // The device that this allocator allocates memory from.
   hipDevice_t device;
 
@@ -56,10 +59,12 @@
 }
 
 iree_status_t iree_hal_hip_allocator_create(
+    iree_hal_device_t* parent_device,
     const iree_hal_hip_dynamic_symbols_t* hip_symbols, hipDevice_t device,
     hipCtx_t hip_context, hipStream_t stream,
     iree_hal_hip_memory_pools_t* pools, iree_allocator_t host_allocator,
     iree_hal_allocator_t** out_allocator) {
+  IREE_ASSERT_ARGUMENT(parent_device);
   IREE_ASSERT_ARGUMENT(hip_symbols);
   IREE_ASSERT_ARGUMENT(out_allocator);
   IREE_TRACE_ZONE_BEGIN(z0);
@@ -93,6 +98,7 @@
                                 (void**)&allocator));
   iree_hal_resource_initialize(&iree_hal_hip_allocator_vtable,
                                &allocator->resource);
+  allocator->parent_device = parent_device;
   allocator->device = device;
   allocator->stream = stream;
   allocator->pools = pools;
@@ -408,8 +414,14 @@
 
   iree_hal_buffer_t* buffer = NULL;
   if (iree_status_is_ok(status)) {
+    const iree_hal_buffer_placement_t placement = {
+        .device = allocator->parent_device,
+        .queue_affinity = params->queue_affinity ? params->queue_affinity
+                                                 : IREE_HAL_QUEUE_AFFINITY_ANY,
+        .flags = IREE_HAL_BUFFER_PLACEMENT_FLAG_NONE,
+    };
     status = iree_hal_hip_buffer_wrap(
-        base_allocator, compat_params.type, compat_params.access,
+        placement, compat_params.type, compat_params.access,
         compat_params.usage, allocation_size,
         /*byte_offset=*/0,
         /*byte_length=*/allocation_size, buffer_type, device_ptr, host_ptr,
@@ -556,8 +568,14 @@
 
   iree_hal_buffer_t* buffer = NULL;
   if (iree_status_is_ok(status)) {
+    const iree_hal_buffer_placement_t placement = {
+        .device = allocator->parent_device,
+        .queue_affinity = params->queue_affinity ? params->queue_affinity
+                                                 : IREE_HAL_QUEUE_AFFINITY_ANY,
+        .flags = IREE_HAL_BUFFER_PLACEMENT_FLAG_NONE,
+    };
     status = iree_hal_hip_buffer_wrap(
-        base_allocator, compat_params.type, compat_params.access,
+        placement, compat_params.type, compat_params.access,
         compat_params.usage, external_buffer->size,
         /*byte_offset=*/0,
         /*byte_length=*/external_buffer->size, buffer_type, device_ptr,
diff --git a/runtime/src/iree/hal/drivers/hip/hip_allocator.h b/runtime/src/iree/hal/drivers/hip/hip_allocator.h
index 89ed093..5c19a7a 100644
--- a/runtime/src/iree/hal/drivers/hip/hip_allocator.h
+++ b/runtime/src/iree/hal/drivers/hip/hip_allocator.h
@@ -21,6 +21,7 @@
 // |pools| provides memory pools that may be shared across multiple allocators
 // and the pointer must remain valid for the lifetime of the allocator.
 iree_status_t iree_hal_hip_allocator_create(
+    iree_hal_device_t* parent_device,
     const iree_hal_hip_dynamic_symbols_t* hip_symbols, hipDevice_t device,
     hipCtx_t hip_context, hipStream_t stream,
     iree_hal_hip_memory_pools_t* pools, iree_allocator_t host_allocator,
diff --git a/runtime/src/iree/hal/drivers/hip/hip_buffer.c b/runtime/src/iree/hal/drivers/hip/hip_buffer.c
index 46e768a..a0efa9a 100644
--- a/runtime/src/iree/hal/drivers/hip/hip_buffer.c
+++ b/runtime/src/iree/hal/drivers/hip/hip_buffer.c
@@ -16,6 +16,7 @@
 
 typedef struct iree_hal_hip_buffer_t {
   iree_hal_buffer_t base;
+  iree_allocator_t host_allocator;
   iree_hal_hip_buffer_type_t type;
   void* host_ptr;
   hipDeviceptr_t device_ptr;
@@ -40,7 +41,7 @@
 }
 
 iree_status_t iree_hal_hip_buffer_wrap(
-    iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+    iree_hal_buffer_placement_t placement, iree_hal_memory_type_t memory_type,
     iree_hal_memory_access_t allowed_access,
     iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
     iree_device_size_t byte_offset, iree_device_size_t byte_length,
@@ -61,10 +62,11 @@
   iree_status_t status =
       iree_allocator_malloc(host_allocator, sizeof(*buffer), (void**)&buffer);
   if (iree_status_is_ok(status)) {
-    iree_hal_buffer_initialize(host_allocator, allocator, &buffer->base,
-                               allocation_size, byte_offset, byte_length,
-                               memory_type, allowed_access, allowed_usage,
+    iree_hal_buffer_initialize(placement, &buffer->base, allocation_size,
+                               byte_offset, byte_length, memory_type,
+                               allowed_access, allowed_usage,
                                &iree_hal_hip_buffer_vtable, &buffer->base);
+    buffer->host_allocator = host_allocator;
     buffer->type = buffer_type;
     buffer->host_ptr = host_ptr;
     buffer->device_ptr = device_ptr;
@@ -101,7 +103,7 @@
 
 static void iree_hal_hip_buffer_destroy(iree_hal_buffer_t* base_buffer) {
   iree_hal_hip_buffer_t* buffer = iree_hal_hip_buffer_cast(base_buffer);
-  iree_allocator_t host_allocator = base_buffer->host_allocator;
+  iree_allocator_t host_allocator = buffer->host_allocator;
   IREE_TRACE_ZONE_BEGIN(z0);
   if (buffer->release_callback.fn) {
     buffer->release_callback.fn(buffer->release_callback.user_data,
diff --git a/runtime/src/iree/hal/drivers/hip/hip_buffer.h b/runtime/src/iree/hal/drivers/hip/hip_buffer.h
index 4264b54..3a18956 100644
--- a/runtime/src/iree/hal/drivers/hip/hip_buffer.h
+++ b/runtime/src/iree/hal/drivers/hip/hip_buffer.h
@@ -34,7 +34,7 @@
 
 // Wraps a HIP allocation in an iree_hal_buffer_t.
 iree_status_t iree_hal_hip_buffer_wrap(
-    iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+    iree_hal_buffer_placement_t placement, iree_hal_memory_type_t memory_type,
     iree_hal_memory_access_t allowed_access,
     iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
     iree_device_size_t byte_offset, iree_device_size_t byte_length,
diff --git a/runtime/src/iree/hal/drivers/hip/hip_device.c b/runtime/src/iree/hal/drivers/hip/hip_device.c
index bc001d6..c6fe929 100644
--- a/runtime/src/iree/hal/drivers/hip/hip_device.c
+++ b/runtime/src/iree/hal/drivers/hip/hip_device.c
@@ -539,13 +539,14 @@
   // Create memory pools first so that we can share them with the allocator.
   if (iree_status_is_ok(status) && device->supports_memory_pools) {
     status = iree_hal_hip_memory_pools_initialize(
-        symbols, hip_device, context, &params->memory_pools, host_allocator,
-        &device->memory_pools);
+        (iree_hal_device_t*)device, symbols, hip_device, context,
+        &params->memory_pools, host_allocator, &device->memory_pools);
   }
 
   if (iree_status_is_ok(status)) {
     status = iree_hal_hip_allocator_create(
-        symbols, hip_device, context, dispatch_stream,
+        (iree_hal_device_t*)device, symbols, hip_device, context,
+        dispatch_stream,
         device->supports_memory_pools ? &device->memory_pools : NULL,
         host_allocator, &device->device_allocator);
   }
@@ -1001,10 +1002,16 @@
 
   iree_hal_buffer_params_canonicalize(&params);
 
+  const iree_hal_buffer_placement_t placement = {
+      .device = (iree_hal_device_t*)device,
+      .queue_affinity = params.queue_affinity ? params.queue_affinity
+                                              : IREE_HAL_QUEUE_AFFINITY_ANY,
+      .flags = IREE_HAL_BUFFER_PLACEMENT_FLAG_ASYNCHRONOUS,
+  };
   iree_hal_buffer_t* buffer = NULL;
   iree_status_t status = iree_hal_hip_buffer_wrap(
-      device->device_allocator, params.type, params.access, params.usage,
-      allocation_size, /*byte_offset=*/0,
+      placement, params.type, params.access, params.usage, allocation_size,
+      /*byte_offset=*/0,
       /*byte_length=*/allocation_size, IREE_HAL_HIP_BUFFER_TYPE_ASYNC,
       /*device_ptr=*/NULL, /*host_ptr=*/NULL,
       iree_hal_buffer_release_callback_null(), device->host_allocator, &buffer);
diff --git a/runtime/src/iree/hal/drivers/hip/memory_pools.c b/runtime/src/iree/hal/drivers/hip/memory_pools.c
index 93a046b..c5a927f 100644
--- a/runtime/src/iree/hal/drivers/hip/memory_pools.c
+++ b/runtime/src/iree/hal/drivers/hip/memory_pools.c
@@ -59,11 +59,13 @@
 }
 
 iree_status_t iree_hal_hip_memory_pools_initialize(
+    iree_hal_device_t* parent_device,
     const iree_hal_hip_dynamic_symbols_t* hip_symbols, hipDevice_t hip_device,
     hipCtx_t hip_context,
     const iree_hal_hip_memory_pooling_params_t* pooling_params,
     iree_allocator_t host_allocator,
     iree_hal_hip_memory_pools_t* IREE_RESTRICT out_pools) {
+  IREE_ASSERT_ARGUMENT(parent_device);
   IREE_ASSERT_ARGUMENT(hip_symbols);
   IREE_ASSERT_ARGUMENT(pooling_params);
   IREE_ASSERT_ARGUMENT(out_pools);
@@ -72,6 +74,7 @@
       z0, iree_hal_hip_set_context(hip_symbols, hip_context));
 
   memset(out_pools, 0, sizeof(*out_pools));
+  out_pools->parent_device = parent_device;
   out_pools->hip_symbols = hip_symbols;
   out_pools->host_allocator = host_allocator;
   out_pools->hip_context = hip_context;
@@ -267,14 +270,20 @@
   // NOTE: we don't provide a device allocator because we didn't allocate from
   // one and instead we use a release callback to perform the free if the user
   // doesn't dealloca the buffer.
-  iree_hal_buffer_t* buffer = NULL;
+  const iree_hal_buffer_placement_t placement = {
+      .device = pools->parent_device,
+      .queue_affinity = params.queue_affinity ? params.queue_affinity
+                                              : IREE_HAL_QUEUE_AFFINITY_ANY,
+      .flags = IREE_HAL_BUFFER_PLACEMENT_FLAG_ASYNCHRONOUS,
+  };
   iree_hal_buffer_release_callback_t release_callback = {
       .fn = iree_hal_hip_async_buffer_release_callback,
       .user_data = pools,
   };
+  iree_hal_buffer_t* buffer = NULL;
   iree_status_t status = iree_hal_hip_buffer_wrap(
-      /*device_allocator=*/NULL, params.type, params.access, params.usage,
-      allocation_size, /*byte_offset=*/0,
+      placement, params.type, params.access, params.usage, allocation_size,
+      /*byte_offset=*/0,
       /*byte_length=*/allocation_size, IREE_HAL_HIP_BUFFER_TYPE_ASYNC,
       /*device_ptr*/ NULL, /*host_ptr=*/NULL, release_callback,
       pools->host_allocator, &buffer);
diff --git a/runtime/src/iree/hal/drivers/hip/memory_pools.h b/runtime/src/iree/hal/drivers/hip/memory_pools.h
index f95b76d..7d66090 100644
--- a/runtime/src/iree/hal/drivers/hip/memory_pools.h
+++ b/runtime/src/iree/hal/drivers/hip/memory_pools.h
@@ -32,6 +32,7 @@
   // Used for any host-visible/host-local memory types.
   hipMemPool_t other;
 
+  iree_hal_device_t* parent_device;
   const iree_hal_hip_dynamic_symbols_t* hip_symbols;
   hipCtx_t hip_context;
   iree_allocator_t host_allocator;
@@ -46,6 +47,7 @@
 
 // Initializes |out_pools| by configuring new HIP memory pools.
 iree_status_t iree_hal_hip_memory_pools_initialize(
+    iree_hal_device_t* parent_device,
     const iree_hal_hip_dynamic_symbols_t* hip_symbols, hipDevice_t hip_device,
     hipCtx_t hip_context,
     const iree_hal_hip_memory_pooling_params_t* pooling_params,
diff --git a/runtime/src/iree/hal/drivers/metal/direct_allocator.h b/runtime/src/iree/hal/drivers/metal/direct_allocator.h
index bbd4609..6b5b491 100644
--- a/runtime/src/iree/hal/drivers/metal/direct_allocator.h
+++ b/runtime/src/iree/hal/drivers/metal/direct_allocator.h
@@ -26,7 +26,7 @@
 // |out_allocator| must be released by the caller (see
 // iree_hal_allocator_release).
 iree_status_t iree_hal_metal_allocator_create(
-    id<MTLDevice> device,
+    iree_hal_device_t* parent_device, id<MTLDevice> device,
 #if defined(IREE_PLATFORM_MACOS)
     id<MTLCommandQueue> queue,
 #endif  // IREE_PLATFORM_MACOS
diff --git a/runtime/src/iree/hal/drivers/metal/direct_allocator.m b/runtime/src/iree/hal/drivers/metal/direct_allocator.m
index d97886e..b9c6a40 100644
--- a/runtime/src/iree/hal/drivers/metal/direct_allocator.m
+++ b/runtime/src/iree/hal/drivers/metal/direct_allocator.m
@@ -22,6 +22,9 @@
   // Abstract resource used for injecting reference counting and vtable; must be at offset 0.
   iree_hal_resource_t resource;
 
+  // Parent device that this allocator is associated with. Unowned.
+  iree_hal_device_t* parent_device;
+
   // The device that this allocator is attached to.
   id<MTLDevice> device;
   // The command queue that we can use to issue commands to make buffer contents visible to CPU.
@@ -51,7 +54,7 @@
 }
 
 iree_status_t iree_hal_metal_allocator_create(
-    id<MTLDevice> device,
+    iree_hal_device_t* parent_device, id<MTLDevice> device,
 #if defined(IREE_PLATFORM_MACOS)
     id<MTLCommandQueue> queue,
 #endif  // IREE_PLATFORM_MACOS
@@ -273,14 +276,22 @@
     IREE_TRACE_ZONE_END(z0);
     return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED, "unable to allocate buffer");
   }
+
+  const iree_hal_buffer_placement_t placement = {
+      .device = allocator->parent_device,
+      .queue_affinity =
+          params->queue_affinity ? params->queue_affinity : IREE_HAL_QUEUE_AFFINITY_ANY,
+      .flags = IREE_HAL_BUFFER_PLACEMENT_FLAG_NONE,
+  };
   iree_hal_buffer_t* buffer = NULL;
   iree_status_t status = iree_hal_metal_buffer_wrap(
+      placement,
 #if defined(IREE_PLATFORM_MACOS)
       allocator->queue,
 #endif  // IREE_PLATFORM_MACOS
-      metal_buffer, base_allocator, compat_params.type, compat_params.access, compat_params.usage,
-      allocation_size, /*byte_offset=*/0, /*byte_length=*/allocation_size,
-      iree_hal_buffer_release_callback_null(), &buffer);  // +1
+      metal_buffer, compat_params.type, compat_params.access, compat_params.usage, allocation_size,
+      /*byte_offset=*/0, /*byte_length=*/allocation_size, iree_hal_buffer_release_callback_null(),
+      allocator->host_allocator, &buffer);  // +1
 
   if (iree_status_is_ok(status)) {
     IREE_TRACE_ALLOC_NAMED(IREE_HAL_METAL_ALLOCATOR_ID, (void*)iree_hal_metal_buffer_handle(buffer),
@@ -336,13 +347,20 @@
     return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED, "unable to allocate buffer");
   }
 
-  return iree_hal_metal_buffer_wrap(
+  const iree_hal_buffer_placement_t placement = {
+      .device = allocator->parent_device,
+      .queue_affinity =
+          params->queue_affinity ? params->queue_affinity : IREE_HAL_QUEUE_AFFINITY_ANY,
+      .flags = IREE_HAL_BUFFER_PLACEMENT_FLAG_NONE,
+  };
+  return iree_hal_metal_buffer_wrap(placement,
 #if defined(IREE_PLATFORM_MACOS)
-      allocator->queue,
+                                    allocator->queue,
 #endif  // IREE_PLATFORM_MACOS
-      metal_buffer, base_allocator, params->type, params->access, params->usage,
-      external_buffer->size, /*byte_offset=*/0, /*byte_length=*/external_buffer->size,
-      release_callback, out_buffer);  // +1
+                                    metal_buffer, params->type, params->access, params->usage,
+                                    external_buffer->size,
+                                    /*byte_offset=*/0, /*byte_length=*/external_buffer->size,
+                                    release_callback, allocator->host_allocator, out_buffer);  // +1
 }
 
 static iree_status_t iree_hal_metal_allocator_import_device_buffer(
@@ -352,7 +370,6 @@
     iree_hal_buffer_release_callback_t release_callback,
     iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
   iree_hal_metal_allocator_t* allocator = iree_hal_metal_allocator_cast(base_allocator);
-  (void)allocator;
 
   // Device allocation is an unowned MTLBuffer; we need to retain it to keep it live.
   id<MTLBuffer> metal_buffer =
@@ -363,13 +380,20 @@
 
   // Wrap the externally-provided buffer in a HAL buffer handle that will retain the MTLBuffer until
   // it has been released.
-  return iree_hal_metal_buffer_wrap(
+  const iree_hal_buffer_placement_t placement = {
+      .device = allocator->parent_device,
+      .queue_affinity =
+          params->queue_affinity ? params->queue_affinity : IREE_HAL_QUEUE_AFFINITY_ANY,
+      .flags = IREE_HAL_BUFFER_PLACEMENT_FLAG_NONE,
+  };
+  return iree_hal_metal_buffer_wrap(placement,
 #if defined(IREE_PLATFORM_MACOS)
-      allocator->queue,
+                                    allocator->queue,
 #endif  // IREE_PLATFORM_MACOS
-      metal_buffer, base_allocator, params->type, params->access, params->usage,
-      external_buffer->size, /*byte_offset=*/0, /*byte_length=*/external_buffer->size,
-      release_callback, out_buffer);  // +1
+                                    metal_buffer, params->type, params->access, params->usage,
+                                    external_buffer->size, /*byte_offset=*/0,
+                                    /*byte_length=*/external_buffer->size, release_callback,
+                                    allocator->host_allocator, out_buffer);  // +1
 }
 
 static iree_status_t iree_hal_metal_allocator_import_buffer(
diff --git a/runtime/src/iree/hal/drivers/metal/metal_buffer.h b/runtime/src/iree/hal/drivers/metal/metal_buffer.h
index 1a30fd3..0607dc7 100644
--- a/runtime/src/iree/hal/drivers/metal/metal_buffer.h
+++ b/runtime/src/iree/hal/drivers/metal/metal_buffer.h
@@ -20,15 +20,16 @@
 //
 // |out_buffer| must be released by the caller (see iree_hal_buffer_release).
 iree_status_t iree_hal_metal_buffer_wrap(
+    iree_hal_buffer_placement_t placement,
 #if defined(IREE_PLATFORM_MACOS)
     id<MTLCommandQueue> queue,
 #endif  // IREE_PLATFORM_MACOS
-    id<MTLBuffer> metal_buffer, iree_hal_allocator_t* allocator,
-    iree_hal_memory_type_t memory_type, iree_hal_memory_access_t allowed_access,
+    id<MTLBuffer> metal_buffer, iree_hal_memory_type_t memory_type,
+    iree_hal_memory_access_t allowed_access,
     iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
     iree_device_size_t byte_offset, iree_device_size_t byte_length,
     iree_hal_buffer_release_callback_t release_callback,
-    iree_hal_buffer_t** out_buffer);
+    iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer);
 
 // Returns true if the buffer was wrapped from an external handle instead of
 // allocated by the HAL allocator.
diff --git a/runtime/src/iree/hal/drivers/metal/metal_buffer.m b/runtime/src/iree/hal/drivers/metal/metal_buffer.m
index 2f22c84..b92ee9e 100644
--- a/runtime/src/iree/hal/drivers/metal/metal_buffer.m
+++ b/runtime/src/iree/hal/drivers/metal/metal_buffer.m
@@ -17,6 +17,7 @@
 
 typedef struct iree_hal_metal_buffer_t {
   iree_hal_buffer_t base;
+  iree_allocator_t host_allocator;
   id<MTLBuffer> buffer;
   // The command queue that we can use to issue commands to make buffer contents visible to CPU.
 #if defined(IREE_PLATFORM_MACOS)
@@ -39,25 +40,26 @@
 }
 
 iree_status_t iree_hal_metal_buffer_wrap(
+    iree_hal_buffer_placement_t placement,
 #if defined(IREE_PLATFORM_MACOS)
     id<MTLCommandQueue> queue,
 #endif  // IREE_PLATFORM_MACOS
-    id<MTLBuffer> metal_buffer, iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+    id<MTLBuffer> metal_buffer, iree_hal_memory_type_t memory_type,
     iree_hal_memory_access_t allowed_access, iree_hal_buffer_usage_t allowed_usage,
     iree_device_size_t allocation_size, iree_device_size_t byte_offset,
     iree_device_size_t byte_length, iree_hal_buffer_release_callback_t release_callback,
-    iree_hal_buffer_t** out_buffer) {
-  IREE_ASSERT_ARGUMENT(allocator);
+    iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer) {
+  IREE_ASSERT_ARGUMENT(placement.device);
   IREE_ASSERT_ARGUMENT(out_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
 
-  iree_allocator_t host_allocator = iree_hal_allocator_host_allocator(allocator);
   iree_hal_metal_buffer_t* buffer = NULL;
   iree_status_t status = iree_allocator_malloc(host_allocator, sizeof(*buffer), (void**)&buffer);
   if (iree_status_is_ok(status)) {
-    iree_hal_buffer_initialize(host_allocator, allocator, &buffer->base, allocation_size,
-                               byte_offset, byte_length, memory_type, allowed_access, allowed_usage,
+    iree_hal_buffer_initialize(placement, &buffer->base, allocation_size, byte_offset, byte_length,
+                               memory_type, allowed_access, allowed_usage,
                                &iree_hal_metal_buffer_vtable, &buffer->base);
+    buffer->host_allocator = host_allocator;
     buffer->buffer = [metal_buffer retain];  // +1
 #if defined(IREE_PLATFORM_MACOS)
     buffer->queue = queue;
@@ -72,7 +74,7 @@
 
 static void iree_hal_metal_buffer_destroy(iree_hal_buffer_t* base_buffer) {
   iree_hal_metal_buffer_t* buffer = iree_hal_metal_buffer_cast(base_buffer);
-  iree_allocator_t host_allocator = base_buffer->host_allocator;
+  iree_allocator_t host_allocator = buffer->host_allocator;
   IREE_TRACE_ZONE_BEGIN(z0);
   IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)iree_hal_buffer_allocation_size(base_buffer));
 
diff --git a/runtime/src/iree/hal/drivers/metal/metal_device.m b/runtime/src/iree/hal/drivers/metal/metal_device.m
index 4f8b4fd..ef8e2c9 100644
--- a/runtime/src/iree/hal/drivers/metal/metal_device.m
+++ b/runtime/src/iree/hal/drivers/metal/metal_device.m
@@ -122,7 +122,7 @@
       initWithDispatchQueue:device->semaphore_notification_queue];  // +1
   device->capture_manager = NULL;
 
-  iree_status_t status = iree_hal_metal_allocator_create(metal_device,
+  iree_status_t status = iree_hal_metal_allocator_create((iree_hal_device_t*)device, metal_device,
 #if defined(IREE_PLATFORM_MACOS)
                                                          metal_queue,
 #endif  // IREE_PLATFORM_MACOS
diff --git a/runtime/src/iree/hal/drivers/null/buffer.c b/runtime/src/iree/hal/drivers/null/buffer.c
index 6e67652..fc52c02 100644
--- a/runtime/src/iree/hal/drivers/null/buffer.c
+++ b/runtime/src/iree/hal/drivers/null/buffer.c
@@ -12,6 +12,7 @@
 
 typedef struct iree_hal_null_buffer_t {
   iree_hal_buffer_t base;
+  iree_allocator_t host_allocator;
   iree_hal_buffer_release_callback_t release_callback;
 } iree_hal_null_buffer_t;
 
@@ -30,7 +31,7 @@
 }
 
 iree_status_t iree_hal_null_buffer_wrap(
-    iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+    iree_hal_buffer_placement_t placement, iree_hal_memory_type_t memory_type,
     iree_hal_memory_access_t allowed_access,
     iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
     iree_device_size_t byte_offset, iree_device_size_t byte_length,
@@ -44,10 +45,11 @@
   IREE_RETURN_AND_END_ZONE_IF_ERROR(
       z0,
       iree_allocator_malloc(host_allocator, sizeof(*buffer), (void**)&buffer));
-  iree_hal_buffer_initialize(host_allocator, allocator, &buffer->base,
-                             allocation_size, byte_offset, byte_length,
-                             memory_type, allowed_access, allowed_usage,
+  iree_hal_buffer_initialize(placement, &buffer->base, allocation_size,
+                             byte_offset, byte_length, memory_type,
+                             allowed_access, allowed_usage,
                              &iree_hal_null_buffer_vtable, &buffer->base);
+  buffer->host_allocator = host_allocator;
   buffer->release_callback = release_callback;
 
   // TODO(null): retain or take ownership of provided handles/pointers/etc.
@@ -68,7 +70,7 @@
 
 static void iree_hal_null_buffer_destroy(iree_hal_buffer_t* base_buffer) {
   iree_hal_null_buffer_t* buffer = iree_hal_null_buffer_cast(base_buffer);
-  iree_allocator_t host_allocator = base_buffer->host_allocator;
+  iree_allocator_t host_allocator = buffer->host_allocator;
   IREE_TRACE_ZONE_BEGIN(z0);
 
   // Optionally call a release callback when the buffer is destroyed. Not all
diff --git a/runtime/src/iree/hal/drivers/null/buffer.h b/runtime/src/iree/hal/drivers/null/buffer.h
index edf2e45..4befac5 100644
--- a/runtime/src/iree/hal/drivers/null/buffer.h
+++ b/runtime/src/iree/hal/drivers/null/buffer.h
@@ -16,7 +16,7 @@
 
 // Wraps a {Null} allocation in an iree_hal_buffer_t.
 iree_status_t iree_hal_null_buffer_wrap(
-    iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+    iree_hal_buffer_placement_t placement, iree_hal_memory_type_t memory_type,
     iree_hal_memory_access_t allowed_access,
     iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
     iree_device_size_t byte_offset, iree_device_size_t byte_length,
diff --git a/runtime/src/iree/hal/drivers/vulkan/base_buffer.h b/runtime/src/iree/hal/drivers/vulkan/base_buffer.h
index ea872b5..7f7d798 100644
--- a/runtime/src/iree/hal/drivers/vulkan/base_buffer.h
+++ b/runtime/src/iree/hal/drivers/vulkan/base_buffer.h
@@ -98,6 +98,7 @@
 // to get access to the API VkBuffer handle.
 typedef struct iree_hal_vulkan_base_buffer_t {
   iree_hal_buffer_t base;
+  iree_allocator_t host_allocator;
   // NOTE: may be VK_NULL_HANDLE if sparse residency is used to back the buffer
   // with multiple device memory allocations.
   VkDeviceMemory device_memory;
diff --git a/runtime/src/iree/hal/drivers/vulkan/native_allocator.cc b/runtime/src/iree/hal/drivers/vulkan/native_allocator.cc
index 2114c5b..47f15d8 100644
--- a/runtime/src/iree/hal/drivers/vulkan/native_allocator.cc
+++ b/runtime/src/iree/hal/drivers/vulkan/native_allocator.cc
@@ -28,6 +28,8 @@
   iree_hal_resource_t resource;
   VkDeviceHandle* logical_device;
   iree_allocator_t host_allocator;
+  // Parent device that this allocator is associated with. Unowned.
+  iree_hal_device_t* parent_device;
 
   // Cached from the API to avoid additional queries in hot paths.
   VkPhysicalDeviceProperties device_props;
@@ -56,9 +58,11 @@
     iree_hal_allocator_t* IREE_RESTRICT base_allocator);
 
 extern "C" iree_status_t iree_hal_vulkan_native_allocator_create(
-    const iree_hal_vulkan_device_options_t* options, VkInstance instance,
+    const iree_hal_vulkan_device_options_t* options,
+    iree_hal_device_t* parent_device, VkInstance instance,
     VkPhysicalDevice physical_device, VkDeviceHandle* logical_device,
     iree_hal_allocator_t** out_allocator) {
+  IREE_ASSERT_ARGUMENT(parent_device);
   IREE_ASSERT_ARGUMENT(instance);
   IREE_ASSERT_ARGUMENT(physical_device);
   IREE_ASSERT_ARGUMENT(logical_device);
@@ -74,6 +78,7 @@
                                &allocator->resource);
   allocator->logical_device = logical_device;
   allocator->host_allocator = host_allocator;
+  allocator->parent_device = parent_device;
 
   const auto& syms = logical_device->syms();
 
@@ -266,6 +271,13 @@
   VkQueue queue = VK_NULL_HANDLE;
   logical_device->syms()->vkGetDeviceQueue(*logical_device, 0, 0, &queue);
 
+  const iree_hal_buffer_placement_t placement = {
+      /*.device=*/allocator->parent_device,
+      /*.queue_affinity=*/params->queue_affinity ? params->queue_affinity
+                                                 : IREE_HAL_QUEUE_AFFINITY_ANY,
+      /*.flags=*/IREE_HAL_BUFFER_PLACEMENT_FLAG_NONE,
+  };
+
   // Ask Vulkan what the implementation requires of the allocation(s) for the
   // buffer. We should in most cases always get the same kind of values but
   // alignment and valid memory types will differ for dense and sparse buffers.
@@ -284,11 +296,12 @@
     // to allocate such buffers (synchronously from raw allocations) but this
     // path is primarily used by large persistent variables and constants.
     return iree_hal_vulkan_sparse_buffer_create_bound_sync(
-        (iree_hal_allocator_t*)allocator, params->type, params->access,
-        params->usage, allocation_size, /*byte_offset=*/0,
+        placement, params->type, params->access, params->usage, allocation_size,
+        /*byte_offset=*/0,
         /*byte_length=*/allocation_size, logical_device, queue, handle,
         requirements, memory_type_index,
-        allocator->device_props_11.maxMemoryAllocationSize, out_buffer);
+        allocator->device_props_11.maxMemoryAllocationSize,
+        allocator->host_allocator, out_buffer);
   }
 
   // Allocate the device memory we'll attach the buffer to.
@@ -321,12 +334,11 @@
       iree_hal_vulkan_native_allocator_native_buffer_release;
   internal_release_callback.user_data = NULL;
   iree_status_t status = iree_hal_vulkan_native_buffer_wrap(
-      (iree_hal_allocator_t*)allocator, params->type, params->access,
-      params->usage, allocation_size,
+      placement, params->type, params->access, params->usage, allocation_size,
       /*byte_offset=*/0,
       /*byte_length=*/allocation_size, logical_device, device_memory, handle,
       internal_release_callback, iree_hal_buffer_release_callback_null(),
-      out_buffer);
+      allocator->host_allocator, out_buffer);
   if (!iree_status_is_ok(status)) {
     logical_device->syms()->vkFreeMemory(*logical_device, device_memory,
                                          logical_device->allocator());
@@ -722,6 +734,12 @@
   }
 
   // Wrap the device memory allocation and buffer handle in our own buffer type.
+  const iree_hal_buffer_placement_t placement = {
+      /*.device=*/allocator->parent_device,
+      /*.queue_affinity=*/params->queue_affinity ? params->queue_affinity
+                                                 : IREE_HAL_QUEUE_AFFINITY_ANY,
+      /*.flags=*/IREE_HAL_BUFFER_PLACEMENT_FLAG_NONE,
+  };
   iree_hal_vulkan_native_buffer_release_callback_t internal_release_callback = {
       0};
   internal_release_callback.fn =
@@ -729,11 +747,12 @@
   internal_release_callback.user_data = NULL;
   iree_hal_buffer_t* buffer = NULL;
   status = iree_hal_vulkan_native_buffer_wrap(
-      (iree_hal_allocator_t*)allocator, params->type, params->access,
-      params->usage, (iree_device_size_t)allocation_size,
+      placement, params->type, params->access, params->usage,
+      (iree_device_size_t)allocation_size,
       /*byte_offset=*/0,
       /*byte_length=*/external_buffer->size, logical_device, device_memory,
-      handle, internal_release_callback, release_callback, &buffer);
+      handle, internal_release_callback, release_callback,
+      allocator->host_allocator, &buffer);
   if (!iree_status_is_ok(status)) {
     logical_device->syms()->vkDestroyBuffer(*logical_device, handle,
                                             logical_device->allocator());
@@ -809,17 +828,24 @@
   }
 
   // Wrap the device memory allocation and buffer handle in our own buffer type.
+  const iree_hal_buffer_placement_t placement = {
+      /*.device=*/allocator->parent_device,
+      /*.queue_affinity=*/params->queue_affinity ? params->queue_affinity
+                                                 : IREE_HAL_QUEUE_AFFINITY_ANY,
+      /*.flags=*/IREE_HAL_BUFFER_PLACEMENT_FLAG_NONE,
+  };
   iree_hal_vulkan_native_buffer_release_callback_t internal_release_callback = {
       0};
   internal_release_callback.fn =
       iree_hal_vulkan_native_allocator_external_device_buffer_release;
   internal_release_callback.user_data = NULL;
   return iree_hal_vulkan_native_buffer_wrap(
-      (iree_hal_allocator_t*)allocator, params->type, params->access,
-      params->usage, (iree_device_size_t)external_buffer->size,
+      placement, params->type, params->access, params->usage,
+      (iree_device_size_t)external_buffer->size,
       /*byte_offset=*/0,
       /*byte_length=*/external_buffer->size, logical_device, device_memory,
-      handle, internal_release_callback, release_callback, out_buffer);
+      handle, internal_release_callback, release_callback,
+      allocator->host_allocator, out_buffer);
 }
 
 static iree_status_t iree_hal_vulkan_native_allocator_import_buffer(
diff --git a/runtime/src/iree/hal/drivers/vulkan/native_allocator.h b/runtime/src/iree/hal/drivers/vulkan/native_allocator.h
index 147f1b9..481b7c0 100644
--- a/runtime/src/iree/hal/drivers/vulkan/native_allocator.h
+++ b/runtime/src/iree/hal/drivers/vulkan/native_allocator.h
@@ -18,7 +18,8 @@
 // Creates a native Vulkan API-based allocator that directly allocates memory
 // from the underlying implementation with no pooling or suballocation.
 iree_status_t iree_hal_vulkan_native_allocator_create(
-    const iree_hal_vulkan_device_options_t* options, VkInstance instance,
+    const iree_hal_vulkan_device_options_t* options,
+    iree_hal_device_t* parent_device, VkInstance instance,
     VkPhysicalDevice physical_device,
     iree::hal::vulkan::VkDeviceHandle* logical_device,
     iree_hal_allocator_t** out_allocator);
diff --git a/runtime/src/iree/hal/drivers/vulkan/native_buffer.cc b/runtime/src/iree/hal/drivers/vulkan/native_buffer.cc
index f0c0ec7..6e71900 100644
--- a/runtime/src/iree/hal/drivers/vulkan/native_buffer.cc
+++ b/runtime/src/iree/hal/drivers/vulkan/native_buffer.cc
@@ -32,7 +32,7 @@
 }
 
 iree_status_t iree_hal_vulkan_native_buffer_wrap(
-    iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+    iree_hal_buffer_placement_t placement, iree_hal_memory_type_t memory_type,
     iree_hal_memory_access_t allowed_access,
     iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
     iree_device_size_t byte_offset, iree_device_size_t byte_length,
@@ -40,24 +40,23 @@
     VkDeviceMemory device_memory, VkBuffer handle,
     iree_hal_vulkan_native_buffer_release_callback_t internal_release_callback,
     iree_hal_buffer_release_callback_t user_release_callback,
-    iree_hal_buffer_t** out_buffer) {
-  IREE_ASSERT_ARGUMENT(allocator);
+    iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer) {
+  IREE_ASSERT_ARGUMENT(placement.device);
   IREE_ASSERT_ARGUMENT(logical_device);
   IREE_ASSERT_ARGUMENT(handle);
   IREE_ASSERT_ARGUMENT(out_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
   IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)allocation_size);
 
-  iree_allocator_t host_allocator =
-      iree_hal_allocator_host_allocator(allocator);
   iree_hal_vulkan_native_buffer_t* buffer = NULL;
   iree_status_t status =
       iree_allocator_malloc(host_allocator, sizeof(*buffer), (void**)&buffer);
   if (iree_status_is_ok(status)) {
     iree_hal_buffer_initialize(
-        host_allocator, allocator, &buffer->base.base, allocation_size,
-        byte_offset, byte_length, memory_type, allowed_access, allowed_usage,
+        placement, &buffer->base.base, allocation_size, byte_offset,
+        byte_length, memory_type, allowed_access, allowed_usage,
         &iree_hal_vulkan_native_buffer_vtable, &buffer->base.base);
+    buffer->base.host_allocator = host_allocator;
     buffer->base.device_memory = device_memory;
     buffer->base.handle = handle;
     buffer->logical_device = logical_device;
@@ -75,7 +74,7 @@
     iree_hal_buffer_t* base_buffer) {
   iree_hal_vulkan_native_buffer_t* buffer =
       iree_hal_vulkan_native_buffer_cast(base_buffer);
-  iree_allocator_t host_allocator = base_buffer->host_allocator;
+  iree_allocator_t host_allocator = buffer->base.host_allocator;
   IREE_TRACE_ZONE_BEGIN(z0);
   IREE_TRACE_ZONE_APPEND_VALUE_I64(
       z0, (int64_t)iree_hal_buffer_allocation_size(base_buffer));
diff --git a/runtime/src/iree/hal/drivers/vulkan/native_buffer.h b/runtime/src/iree/hal/drivers/vulkan/native_buffer.h
index 7d8a273..0049e2e 100644
--- a/runtime/src/iree/hal/drivers/vulkan/native_buffer.h
+++ b/runtime/src/iree/hal/drivers/vulkan/native_buffer.h
@@ -35,7 +35,7 @@
 // HAL. The provided callback is made when the buffer is destroyed to allow the
 // caller to clean up as appropriate.
 iree_status_t iree_hal_vulkan_native_buffer_wrap(
-    iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+    iree_hal_buffer_placement_t placement, iree_hal_memory_type_t memory_type,
     iree_hal_memory_access_t allowed_access,
     iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
     iree_device_size_t byte_offset, iree_device_size_t byte_length,
@@ -43,7 +43,7 @@
     VkDeviceMemory device_memory, VkBuffer handle,
     iree_hal_vulkan_native_buffer_release_callback_t internal_release_callback,
     iree_hal_buffer_release_callback_t user_release_callback,
-    iree_hal_buffer_t** out_buffer);
+    iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/runtime/src/iree/hal/drivers/vulkan/sparse_buffer.cc b/runtime/src/iree/hal/drivers/vulkan/sparse_buffer.cc
index 2ed3a8f..b1f2778 100644
--- a/runtime/src/iree/hal/drivers/vulkan/sparse_buffer.cc
+++ b/runtime/src/iree/hal/drivers/vulkan/sparse_buffer.cc
@@ -123,15 +123,15 @@
 }
 
 iree_status_t iree_hal_vulkan_sparse_buffer_create_bound_sync(
-    iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+    iree_hal_buffer_placement_t placement, iree_hal_memory_type_t memory_type,
     iree_hal_memory_access_t allowed_access,
     iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
     iree_device_size_t byte_offset, iree_device_size_t byte_length,
     iree::hal::vulkan::VkDeviceHandle* logical_device, VkQueue queue,
     VkBuffer handle, VkMemoryRequirements requirements,
     uint32_t memory_type_index, VkDeviceSize max_allocation_size,
-    iree_hal_buffer_t** out_buffer) {
-  IREE_ASSERT_ARGUMENT(allocator);
+    iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer) {
+  IREE_ASSERT_ARGUMENT(placement.device);
   IREE_ASSERT_ARGUMENT(logical_device);
   IREE_ASSERT_ARGUMENT(handle);
   IREE_ASSERT_ARGUMENT(out_buffer);
@@ -151,8 +151,6 @@
       (iree_host_size_t)iree_device_size_ceil_div(requirements.size,
                                                   physical_block_size);
 
-  iree_allocator_t host_allocator =
-      iree_hal_allocator_host_allocator(allocator);
   iree_hal_vulkan_sparse_buffer_t* buffer = NULL;
   iree_host_size_t total_size =
       iree_host_align(sizeof(*buffer), iree_max_align_t) +
@@ -160,9 +158,10 @@
   IREE_RETURN_AND_END_ZONE_IF_ERROR(
       z0, iree_allocator_malloc(host_allocator, total_size, (void**)&buffer));
   iree_hal_buffer_initialize(
-      host_allocator, allocator, &buffer->base.base, allocation_size,
-      byte_offset, byte_length, memory_type, allowed_access, allowed_usage,
+      placement, &buffer->base.base, allocation_size, byte_offset, byte_length,
+      memory_type, allowed_access, allowed_usage,
       &iree_hal_vulkan_sparse_buffer_vtable, &buffer->base.base);
+  buffer->base.host_allocator = host_allocator;
   buffer->base.handle = handle;
   buffer->logical_device = logical_device;
   buffer->physical_block_count = physical_block_count;
@@ -187,7 +186,7 @@
   iree_hal_vulkan_sparse_buffer_t* buffer =
       iree_hal_vulkan_sparse_buffer_cast(base_buffer);
   iree::hal::vulkan::VkDeviceHandle* logical_device = buffer->logical_device;
-  iree_allocator_t host_allocator = base_buffer->host_allocator;
+  iree_allocator_t host_allocator = buffer->base.host_allocator;
   IREE_TRACE_ZONE_BEGIN(z0);
   IREE_TRACE_ZONE_APPEND_VALUE_I64(
       z0, (int64_t)iree_hal_buffer_allocation_size(base_buffer));
diff --git a/runtime/src/iree/hal/drivers/vulkan/sparse_buffer.h b/runtime/src/iree/hal/drivers/vulkan/sparse_buffer.h
index 9ab7d65..a74397c 100644
--- a/runtime/src/iree/hal/drivers/vulkan/sparse_buffer.h
+++ b/runtime/src/iree/hal/drivers/vulkan/sparse_buffer.h
@@ -26,14 +26,14 @@
 // This will eventually be replaced with HAL device APIs for controlling the
 // reserve/commit/decommit/release behavior of the virtual/physical storage.
 iree_status_t iree_hal_vulkan_sparse_buffer_create_bound_sync(
-    iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+    iree_hal_buffer_placement_t placement, iree_hal_memory_type_t memory_type,
     iree_hal_memory_access_t allowed_access,
     iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
     iree_device_size_t byte_offset, iree_device_size_t byte_length,
     iree::hal::vulkan::VkDeviceHandle* logical_device, VkQueue queue,
     VkBuffer handle, VkMemoryRequirements requirements,
     uint32_t memory_type_index, VkDeviceSize max_allocation_size,
-    iree_hal_buffer_t** out_buffer);
+    iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc b/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc
index 6db27bc..0433942 100644
--- a/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc
+++ b/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc
@@ -755,8 +755,8 @@
   // Create the device memory allocator that will service all buffer
   // allocation requests.
   iree_status_t status = iree_hal_vulkan_native_allocator_create(
-      options, instance, physical_device, logical_device,
-      &device->device_allocator);
+      options, (iree_hal_device_t*)device, instance, physical_device,
+      logical_device, &device->device_allocator);
 
   // Create command pools for each queue family. If we don't have a transfer
   // queue then we'll ignore that one and just use the dispatch pool.
diff --git a/runtime/src/iree/hal/utils/caching_allocator.c b/runtime/src/iree/hal/utils/caching_allocator.c
index 599f416..8b4771c 100644
--- a/runtime/src/iree/hal/utils/caching_allocator.c
+++ b/runtime/src/iree/hal/utils/caching_allocator.c
@@ -730,7 +730,12 @@
       pool, &compat_params, allocation_size, out_buffer));
 
   // Point the buffer back to us for deallocation.
-  (*out_buffer)->device_allocator = base_allocator;
+  //
+  // TODO(#19159): remove iree_hal_allocator_deallocate_buffer when pooling no
+  // longer requires the pooling_allocator on iree_hal_buffer_t. We should
+  // instead be creating a new iree_hal_cached_buffer_t that we return as if it
+  // were an allocated buffer and that can store a reference back to the pool.
+  (*out_buffer)->pooling_allocator = base_allocator;
 
   return iree_ok_status();
 }
@@ -751,6 +756,9 @@
   IREE_ASSERT(pool, "pool to return cached buffer to not found");
   if (!pool) return;
 
+  // TODO(#19159): remove iree_hal_allocator_deallocate_buffer when pooling no
+  // longer requires the pooling_allocator on iree_hal_buffer_t.
+
   // Release back to pool (which may deallocate).
   iree_hal_caching_allocator_pool_release(pool, buffer);
 }
diff --git a/runtime/src/iree/modules/hal/inline/module.c b/runtime/src/iree/modules/hal/inline/module.c
index c3c5dea..a892453 100644
--- a/runtime/src/iree/modules/hal/inline/module.c
+++ b/runtime/src/iree/modules/hal/inline/module.c
@@ -366,7 +366,7 @@
   iree_hal_buffer_t* subspan_buffer = NULL;
   IREE_RETURN_IF_ERROR(
       iree_hal_buffer_subspan(source_buffer, source_offset, length,
-                              &subspan_buffer),
+                              state->host_allocator, &subspan_buffer),
       "invalid subspan of an existing buffer (source_offset=%" PRIdsz
       ", length=%" PRIdsz ")",
       source_offset, length);
@@ -424,7 +424,7 @@
       source_length != iree_hal_buffer_byte_length(source_buffer)) {
     IREE_RETURN_IF_ERROR(
         iree_hal_buffer_subspan(source_buffer, source_offset, source_length,
-                                &subspan_buffer),
+                                state->host_allocator, &subspan_buffer),
         "invalid subspan of an existing buffer (source_offset=%" PRIdsz
         ", length=%" PRIdsz ")",
         source_offset, source_length);
diff --git a/runtime/src/iree/modules/hal/module.c b/runtime/src/iree/modules/hal/module.c
index 3baa27d..24a11db 100644
--- a/runtime/src/iree/modules/hal/module.c
+++ b/runtime/src/iree/modules/hal/module.c
@@ -462,7 +462,7 @@
   iree_hal_buffer_t* subspan_buffer = NULL;
   IREE_RETURN_IF_ERROR(
       iree_hal_buffer_subspan(source_buffer, source_offset, length,
-                              &subspan_buffer),
+                              state->host_allocator, &subspan_buffer),
       "invalid subspan of an existing buffer (source_offset=%" PRIdsz
       ", length=%" PRIdsz ")",
       source_offset, length);
@@ -549,7 +549,7 @@
       source_length != iree_hal_buffer_byte_length(source_buffer)) {
     IREE_RETURN_IF_ERROR(
         iree_hal_buffer_subspan(source_buffer, source_offset, source_length,
-                                &subspan_buffer),
+                                state->host_allocator, &subspan_buffer),
         "invalid subspan of an existing buffer (source_offset=%" PRIdsz
         ", length=%" PRIdsz ")",
         source_offset, source_length);
diff --git a/runtime/src/iree/tooling/function_io.c b/runtime/src/iree/tooling/function_io.c
index d416a0d..c38a4cf 100644
--- a/runtime/src/iree/tooling/function_io.c
+++ b/runtime/src/iree/tooling/function_io.c
@@ -906,18 +906,18 @@
       vm_buffer, 0, iree_vm_buffer_length(vm_buffer), 1, &span));
 
   // Wrap the heap memory in a HAL buffer for read-only access.
-  iree_hal_buffer_release_callback_t release_callback = {
+  const iree_hal_buffer_release_callback_t release_callback = {
       .fn = iree_hal_buffer_release_vm_buffer,
       .user_data = vm_buffer,
   };
   iree_vm_buffer_retain(vm_buffer);
   iree_hal_buffer_t* hal_buffer = NULL;
   iree_status_t status = iree_hal_heap_buffer_wrap(
-      device_allocator, IREE_HAL_MEMORY_TYPE_HOST_LOCAL,
+      iree_hal_buffer_placement_undefined(), IREE_HAL_MEMORY_TYPE_HOST_LOCAL,
       IREE_HAL_MEMORY_ACCESS_READ,
       IREE_HAL_BUFFER_USAGE_TRANSFER_SOURCE | IREE_HAL_BUFFER_USAGE_MAPPING,
       span.data_length, iree_cast_const_byte_span(span), release_callback,
-      &hal_buffer);
+      host_allocator, &hal_buffer);
   iree_vm_buffer_release(vm_buffer);
 
   // Wrap the HAL buffer in a buffer view.
@@ -931,15 +931,14 @@
 }
 
 static iree_status_t iree_tooling_create_buffer_view_empty(
-    iree_hal_allocator_t* device_allocator, iree_allocator_t host_allocator,
-    iree_hal_buffer_view_t** out_buffer_view) {
+    iree_allocator_t host_allocator, iree_hal_buffer_view_t** out_buffer_view) {
   iree_hal_buffer_t* hal_buffer = NULL;
   IREE_RETURN_IF_ERROR(iree_hal_heap_buffer_wrap(
-      device_allocator, IREE_HAL_MEMORY_TYPE_HOST_LOCAL,
+      iree_hal_buffer_placement_undefined(), IREE_HAL_MEMORY_TYPE_HOST_LOCAL,
       IREE_HAL_MEMORY_ACCESS_READ,
       IREE_HAL_BUFFER_USAGE_TRANSFER_SOURCE | IREE_HAL_BUFFER_USAGE_MAPPING, 0,
       iree_byte_span_empty(), iree_hal_buffer_release_callback_null(),
-      &hal_buffer));
+      host_allocator, &hal_buffer));
   iree_status_t status = iree_tooling_create_buffer_view_with_hal_buffer(
       hal_buffer, host_allocator, out_buffer_view);
   iree_hal_buffer_release(hal_buffer);
@@ -953,8 +952,8 @@
   iree_hal_element_type_t element_type = IREE_HAL_ELEMENT_TYPE_NONE;
   switch (value.type) {
     case IREE_VM_VALUE_TYPE_NONE:
-      return iree_tooling_create_buffer_view_empty(
-          device_allocator, host_allocator, out_buffer_view);
+      return iree_tooling_create_buffer_view_empty(host_allocator,
+                                                   out_buffer_view);
     case IREE_VM_VALUE_TYPE_I8:
       byte_length = sizeof(value.i8);
       element_type = IREE_HAL_ELEMENT_TYPE_INT_8;
@@ -1015,8 +1014,8 @@
   if (iree_vm_variant_is_empty(variant)) {
     // Empty value - we need to emit a zero-length value to keep the npy file
     // ordered when there are multiple entries.
-    return iree_tooling_create_buffer_view_empty(
-        device_allocator, host_allocator, out_buffer_view);
+    return iree_tooling_create_buffer_view_empty(host_allocator,
+                                                 out_buffer_view);
   } else if (iree_vm_variant_is_ref(variant)) {
     if (iree_hal_buffer_view_isa(variant.ref)) {
       // Buffer view returned can provide the metadata required.