Replacing iree_hal_allocator_wrap_buffer with _import_buffer. (#8537)

This removes the special case wrapping path that only made sense for
local host devices.
diff --git a/bindings/python/iree/runtime/hal.cc b/bindings/python/iree/runtime/hal.cc
index 2a9f05f..66633e1 100644
--- a/bindings/python/iree/runtime/hal.cc
+++ b/bindings/python/iree/runtime/hal.cc
@@ -442,7 +442,7 @@
       .def(
           "allocate_buffer",
           [](HalAllocator& self, int memory_type, int allowed_usage,
-             iree_host_size_t allocation_size) {
+             iree_device_size_t allocation_size) {
             iree_hal_buffer_params_t params = {0};
             params.type = memory_type;
             params.usage = allowed_usage;
diff --git a/experimental/rocm/rocm_allocator.c b/experimental/rocm/rocm_allocator.c
index d436df1..741fe41 100644
--- a/experimental/rocm/rocm_allocator.c
+++ b/experimental/rocm/rocm_allocator.c
@@ -123,7 +123,7 @@
 static iree_status_t iree_hal_rocm_allocator_allocate_buffer(
     iree_hal_allocator_t* IREE_RESTRICT base_allocator,
     const iree_hal_buffer_params_t* IREE_RESTRICT params,
-    iree_host_size_t allocation_size, iree_const_byte_span_t initial_data,
+    iree_device_size_t allocation_size, iree_const_byte_span_t initial_data,
     iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
   iree_hal_rocm_allocator_t* allocator =
       iree_hal_rocm_allocator_cast(base_allocator);
@@ -216,19 +216,11 @@
   iree_hal_buffer_destroy(base_buffer);
 }
 
-static iree_status_t iree_hal_rocm_allocator_wrap_buffer(
-    iree_hal_allocator_t* IREE_RESTRICT base_allocator,
-    const iree_hal_buffer_params_t* IREE_RESTRICT params, iree_byte_span_t data,
-    iree_allocator_t data_allocator,
-    iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
-  return iree_make_status(IREE_STATUS_UNAVAILABLE,
-                          "wrapping of external buffers not supported");
-}
-
 static iree_status_t iree_hal_rocm_allocator_import_buffer(
     iree_hal_allocator_t* IREE_RESTRICT base_allocator,
     const iree_hal_buffer_params_t* IREE_RESTRICT params,
     iree_hal_external_buffer_t* IREE_RESTRICT external_buffer,
+    iree_hal_buffer_release_callback_t release_callback,
     iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
   return iree_make_status(IREE_STATUS_UNAVAILABLE,
                           "importing from external buffers not supported");
@@ -252,7 +244,6 @@
     .query_compatibility = iree_hal_rocm_allocator_query_compatibility,
     .allocate_buffer = iree_hal_rocm_allocator_allocate_buffer,
     .deallocate_buffer = iree_hal_rocm_allocator_deallocate_buffer,
-    .wrap_buffer = iree_hal_rocm_allocator_wrap_buffer,
     .import_buffer = iree_hal_rocm_allocator_import_buffer,
     .export_buffer = iree_hal_rocm_allocator_export_buffer,
 };
diff --git a/iree/hal/allocator.c b/iree/hal/allocator.c
index 7e73ddf..daa418b 100644
--- a/iree/hal/allocator.c
+++ b/iree/hal/allocator.c
@@ -117,7 +117,7 @@
 
 IREE_API_EXPORT iree_status_t iree_hal_allocator_allocate_buffer(
     iree_hal_allocator_t* IREE_RESTRICT allocator,
-    iree_hal_buffer_params_t params, iree_host_size_t allocation_size,
+    iree_hal_buffer_params_t params, iree_device_size_t allocation_size,
     iree_const_byte_span_t initial_data,
     iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
   IREE_ASSERT_ARGUMENT(allocator);
@@ -140,25 +140,11 @@
   IREE_TRACE_ZONE_END(z0);
 }
 
-IREE_API_EXPORT iree_status_t iree_hal_allocator_wrap_buffer(
-    iree_hal_allocator_t* IREE_RESTRICT allocator,
-    iree_hal_buffer_params_t params, iree_byte_span_t data,
-    iree_allocator_t data_allocator, iree_hal_buffer_t** out_buffer) {
-  IREE_ASSERT_ARGUMENT(allocator);
-  IREE_ASSERT_ARGUMENT(out_buffer);
-  *out_buffer = NULL;
-  IREE_TRACE_ZONE_BEGIN(z0);
-  iree_hal_buffer_params_canonicalize(&params);
-  iree_status_t status = _VTABLE_DISPATCH(allocator, wrap_buffer)(
-      allocator, &params, data, data_allocator, out_buffer);
-  IREE_TRACE_ZONE_END(z0);
-  return status;
-}
-
 IREE_API_EXPORT iree_status_t iree_hal_allocator_import_buffer(
     iree_hal_allocator_t* IREE_RESTRICT allocator,
     iree_hal_buffer_params_t params,
     iree_hal_external_buffer_t* IREE_RESTRICT external_buffer,
+    iree_hal_buffer_release_callback_t release_callback,
     iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
   IREE_ASSERT_ARGUMENT(allocator);
   IREE_ASSERT_ARGUMENT(external_buffer);
@@ -167,7 +153,7 @@
   IREE_TRACE_ZONE_BEGIN(z0);
   iree_hal_buffer_params_canonicalize(&params);
   iree_status_t status = _VTABLE_DISPATCH(allocator, import_buffer)(
-      allocator, &params, external_buffer, out_buffer);
+      allocator, &params, external_buffer, release_callback, out_buffer);
   IREE_TRACE_ZONE_END(z0);
   return status;
 }
diff --git a/iree/hal/allocator.h b/iree/hal/allocator.h
index 6165e91..2c99da4 100644
--- a/iree/hal/allocator.h
+++ b/iree/hal/allocator.h
@@ -249,6 +249,25 @@
   } handle;
 } iree_hal_external_buffer_t;
 
+typedef void(IREE_API_PTR* iree_hal_buffer_release_fn_t)(
+    void* user_data, iree_hal_buffer_t* buffer);
+
+// A callback issued when a buffer is released.
+typedef struct {
+  // Callback function pointer.
+  iree_hal_buffer_release_fn_t fn;
+  // User data passed to the callback function. Unowned.
+  void* user_data;
+} iree_hal_buffer_release_callback_t;
+
+// Returns a no-op buffer release callback that implies that no cleanup is
+// required.
+static inline iree_hal_buffer_release_callback_t
+iree_hal_buffer_release_callback_null(void) {
+  iree_hal_buffer_release_callback_t callback = {NULL, NULL};
+  return callback;
+}
+
 //===----------------------------------------------------------------------===//
 // Statistics/reporting
 //===----------------------------------------------------------------------===//
@@ -327,8 +346,8 @@
 
 // Allocates a buffer from the allocator.
 // If |initial_data| is provided then the bytes will be copied into the device
-// buffer. To avoid the copy when constant data is used prefer
-// iree_hal_allocator_wrap_buffer when available.
+// buffer. To avoid the copy when device-accessible constant data is used prefer
+// iree_hal_allocator_import_buffer when available.
 //
 // The memory type of the buffer returned may differ from the requested value
 // if the device can provide more functionality; for example, if requesting
@@ -343,28 +362,9 @@
 // use strategy.
 IREE_API_EXPORT iree_status_t iree_hal_allocator_allocate_buffer(
     iree_hal_allocator_t* IREE_RESTRICT allocator,
-    iree_hal_buffer_params_t params, iree_host_size_t allocation_size,
+    iree_hal_buffer_params_t params, iree_device_size_t allocation_size,
     iree_const_byte_span_t initial_data, iree_hal_buffer_t** out_buffer);
 
-// Wraps an existing host allocation in a buffer.
-//
-// iree_hal_allocator_query_compatibility can be used to query whether a
-// buffer can be wrapped when using the given memory type and usage. A
-// compatibility result containing IREE_HAL_BUFFER_COMPATIBILITY_IMPORTABLE
-// means the wrap may succeed however if the pointer/page range is not in a
-// supported mode (no read access, etc) this call may still fail.
-//
-// |data_allocator| will be used to free the memory when the buffer is
-// destroyed. iree_allocator_null() can be passed to indicate the buffer does
-// not own the data.
-//
-// |out_buffer| must be released by the caller.
-// Fails if the allocator cannot access host memory in this way.
-IREE_API_EXPORT iree_status_t iree_hal_allocator_wrap_buffer(
-    iree_hal_allocator_t* IREE_RESTRICT allocator,
-    iree_hal_buffer_params_t params, iree_byte_span_t data,
-    iree_allocator_t data_allocator, iree_hal_buffer_t** out_buffer);
-
 // TODO(benvanik): iree_hal_allocator_query_external_buffer_compatibility to
 // check for support without needing an external buffer already. There's a few
 // usage modes and it'd be nice to have a single function for it to keep the
@@ -377,6 +377,17 @@
 // iree_hal_buffer_t. The returned external buffer may only be usable with the
 // same driver/device.
 //
+// iree_hal_allocator_query_compatibility can be used to query whether a
+// buffer can be imported when using the given memory type and usage. A
+// compatibility result containing IREE_HAL_BUFFER_COMPATIBILITY_IMPORTABLE
+// means the import _may_ succeed however if the pointer/page range is not in a
+// supported mode (no read access, etc) this call will fail with
+// IREE_STATUS_OUT_OF_RANGE.
+//
+// An optional |release_callback| can be provided to allow the caller to listen
+// for when the underlying resource is no longer in use by the HAL. This can
+// be used to perform lifetime management or flushing.
+//
 // |out_buffer| must be released by the caller.
 // Fails with IREE_STATUS_UNAVAILABLE if the allocator cannot import the buffer
 // into the given memory type. This may be due to unavailable device/platform
@@ -385,6 +396,7 @@
     iree_hal_allocator_t* IREE_RESTRICT allocator,
     iree_hal_buffer_params_t params,
     iree_hal_external_buffer_t* IREE_RESTRICT external_buffer,
+    iree_hal_buffer_release_callback_t release_callback,
     iree_hal_buffer_t** out_buffer);
 
 // Exports an allocator-owned |buffer| to an external buffer handle.
@@ -446,23 +458,18 @@
   iree_status_t(IREE_API_PTR* allocate_buffer)(
       iree_hal_allocator_t* IREE_RESTRICT allocator,
       const iree_hal_buffer_params_t* IREE_RESTRICT params,
-      iree_host_size_t allocation_size, iree_const_byte_span_t initial_data,
+      iree_device_size_t allocation_size, iree_const_byte_span_t initial_data,
       iree_hal_buffer_t** IREE_RESTRICT out_buffer);
 
   void(IREE_API_PTR* deallocate_buffer)(
       iree_hal_allocator_t* IREE_RESTRICT allocator,
       iree_hal_buffer_t* IREE_RESTRICT buffer);
 
-  iree_status_t(IREE_API_PTR* wrap_buffer)(
-      iree_hal_allocator_t* IREE_RESTRICT allocator,
-      const iree_hal_buffer_params_t* IREE_RESTRICT params,
-      iree_byte_span_t data, iree_allocator_t data_allocator,
-      iree_hal_buffer_t** IREE_RESTRICT out_buffer);
-
   iree_status_t(IREE_API_PTR* import_buffer)(
       iree_hal_allocator_t* IREE_RESTRICT allocator,
       const iree_hal_buffer_params_t* IREE_RESTRICT params,
       iree_hal_external_buffer_t* IREE_RESTRICT external_buffer,
+      iree_hal_buffer_release_callback_t release_callback,
       iree_hal_buffer_t** IREE_RESTRICT out_buffer);
 
   iree_status_t(IREE_API_PTR* export_buffer)(
diff --git a/iree/hal/allocator_heap.c b/iree/hal/allocator_heap.c
index a466b4b..7b53c27 100644
--- a/iree/hal/allocator_heap.c
+++ b/iree/hal/allocator_heap.c
@@ -148,7 +148,7 @@
 static iree_status_t iree_hal_heap_allocator_allocate_buffer(
     iree_hal_allocator_t* IREE_RESTRICT base_allocator,
     const iree_hal_buffer_params_t* IREE_RESTRICT params,
-    iree_host_size_t allocation_size, iree_const_byte_span_t initial_data,
+    iree_device_size_t allocation_size, iree_const_byte_span_t initial_data,
     iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
   iree_hal_heap_allocator_t* allocator =
       iree_hal_heap_allocator_cast(base_allocator);
@@ -162,22 +162,11 @@
   IREE_STATISTICS(statistics = &allocator->statistics);
   iree_hal_buffer_t* buffer = NULL;
   IREE_RETURN_IF_ERROR(iree_hal_heap_buffer_create(
-      base_allocator, statistics, compat_params.type, compat_params.access,
-      compat_params.usage, allocation_size, allocator->data_allocator,
-      allocator->host_allocator, &buffer));
+      base_allocator, statistics, &compat_params, allocation_size, initial_data,
+      allocator->data_allocator, allocator->host_allocator, &buffer));
 
-  iree_status_t status = iree_ok_status();
-  if (!iree_const_byte_span_is_empty(initial_data)) {
-    status = iree_hal_buffer_write_data(buffer, 0, initial_data.data,
-                                        initial_data.data_length);
-  }
-
-  if (iree_status_is_ok(status)) {
-    *out_buffer = buffer;
-  } else {
-    iree_hal_buffer_release(buffer);
-  }
-  return status;
+  *out_buffer = buffer;
+  return iree_ok_status();
 }
 
 static void iree_hal_heap_allocator_deallocate_buffer(
@@ -188,23 +177,11 @@
   iree_hal_buffer_destroy(base_buffer);
 }
 
-static iree_status_t iree_hal_heap_allocator_wrap_buffer(
-    iree_hal_allocator_t* IREE_RESTRICT base_allocator,
-    const iree_hal_buffer_params_t* IREE_RESTRICT params, iree_byte_span_t data,
-    iree_allocator_t data_allocator,
-    iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
-  // Coerce options into those required for use by heap-based devices.
-  iree_hal_buffer_params_t compat_params =
-      iree_hal_heap_allocator_make_compatible(params);
-  return iree_hal_heap_buffer_wrap(
-      base_allocator, compat_params.type, compat_params.access,
-      compat_params.usage, data.data_length, data, data_allocator, out_buffer);
-}
-
 static iree_status_t iree_hal_heap_allocator_import_buffer(
     iree_hal_allocator_t* IREE_RESTRICT base_allocator,
     const iree_hal_buffer_params_t* IREE_RESTRICT params,
     iree_hal_external_buffer_t* IREE_RESTRICT external_buffer,
+    iree_hal_buffer_release_callback_t release_callback,
     iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
   if (external_buffer->type != IREE_HAL_EXTERNAL_BUFFER_TYPE_HOST_ALLOCATION) {
     return iree_make_status(IREE_STATUS_UNAVAILABLE,
@@ -215,13 +192,12 @@
   iree_hal_buffer_params_t compat_params =
       iree_hal_heap_allocator_make_compatible(params);
 
-  // Wrap; note that the host allocation is unowned.
   return iree_hal_heap_buffer_wrap(
       base_allocator, compat_params.type, compat_params.access,
       compat_params.usage, external_buffer->size,
       iree_make_byte_span(external_buffer->handle.host_allocation.ptr,
                           external_buffer->size),
-      iree_allocator_null(), out_buffer);
+      release_callback, out_buffer);
 }
 
 static iree_status_t iree_hal_heap_allocator_export_buffer(
@@ -257,7 +233,6 @@
     .query_compatibility = iree_hal_heap_allocator_query_compatibility,
     .allocate_buffer = iree_hal_heap_allocator_allocate_buffer,
     .deallocate_buffer = iree_hal_heap_allocator_deallocate_buffer,
-    .wrap_buffer = iree_hal_heap_allocator_wrap_buffer,
     .import_buffer = iree_hal_heap_allocator_import_buffer,
     .export_buffer = iree_hal_heap_allocator_export_buffer,
 };
diff --git a/iree/hal/buffer.h b/iree/hal/buffer.h
index fb23ce0..6af80fa 100644
--- a/iree/hal/buffer.h
+++ b/iree/hal/buffer.h
@@ -117,7 +117,7 @@
                                IREE_HAL_MEMORY_ACCESS_WRITE |
                                IREE_HAL_MEMORY_ACCESS_DISCARD,
 };
-typedef uint32_t iree_hal_memory_access_t;
+typedef uint16_t iree_hal_memory_access_t;
 
 // Bitfield that defines how a buffer is intended to be used.
 // Usage allows the driver to appropriately place the buffer for more
@@ -562,8 +562,11 @@
   iree_hal_allocator_t* device_allocator;
   // TODO(benvanik): bit pack these; could be ~4 bytes vs 12.
   iree_hal_memory_type_t memory_type;
-  iree_hal_memory_access_t allowed_access;
   iree_hal_buffer_usage_t allowed_usage;
+  iree_hal_memory_access_t allowed_access;
+
+  // Implementation-defined flags.
+  uint16_t flags;
 };
 
 IREE_API_EXPORT void iree_hal_buffer_initialize(
diff --git a/iree/hal/buffer_heap.c b/iree/hal/buffer_heap.c
index e2c3ee8..47ec037 100644
--- a/iree/hal/buffer_heap.c
+++ b/iree/hal/buffer_heap.c
@@ -15,43 +15,43 @@
 #include "iree/hal/buffer_heap_impl.h"
 #include "iree/hal/resource.h"
 
+typedef enum iree_hal_heap_buffer_storage_mode_e {
+  // Allocated as a [metadata, data] slab.
+  // The base metadata pointer must be freed with iree_allocator_free_aligned.
+  // The data storage is not freed.
+  IREE_HAL_HEAP_BUFFER_STORAGE_MODE_SLAB = 0u,
+  // Allocated as split [metadata] and [data].
+  // The base metadata pointer must be freed with iree_allocator_free.
+  // The data storage must be freed with iree_allocator_free_aligned.
+  IREE_HAL_HEAP_BUFFER_STORAGE_MODE_SPLIT = 1u,
+  // Allocated as split [metadata] and an externally-owned [data].
+  // The base metadata pointer must be freed with iree_allocator_free.
+  // A user-provided buffer release callback is notified that the buffer is no
+  // longer referencing the data.
+  IREE_HAL_HEAP_BUFFER_STORAGE_MODE_EXTERNAL = 2u,
+} iree_hal_heap_buffer_storage_mode_t;
+
 typedef struct iree_hal_heap_buffer_t {
+  // base.flags has the iree_hal_heap_buffer_storage_mode_t.
   iree_hal_buffer_t base;
 
   iree_byte_span_t data;
-  iree_allocator_t data_allocator;
+  union {
+    // Used for IREE_HAL_HEAP_BUFFER_STORAGE_MODE_SPLIT.
+    iree_allocator_t data_allocator;
+    // Used for IREE_HAL_HEAP_BUFFER_STORAGE_MODE_EXTERNAL.
+    iree_hal_buffer_release_callback_t release_callback;
+  };
 
   // Optional statistics shared with the allocator.
   IREE_STATISTICS(iree_hal_heap_allocator_statistics_t* statistics;)
 } iree_hal_heap_buffer_t;
+static_assert(sizeof(iree_hal_heap_buffer_t) <= 128,
+              "header should be <= the minimum buffer alignment so that we "
+              "don't introduce internal waste");
 
 static const iree_hal_buffer_vtable_t iree_hal_heap_buffer_vtable;
 
-enum {
-  IREE_HAL_HEAP_BUFFER_DATA_IS_ALIGNED = 1u << 0,
-  IREE_HAL_HEAP_BUFFER_METADATA_IS_ALIGNED = 1u << 1,
-  IREE_HAL_HEAP_BUFFER_FLAG_MASK = IREE_HAL_HEAP_BUFFER_DATA_IS_ALIGNED |
-                                   IREE_HAL_HEAP_BUFFER_METADATA_IS_ALIGNED,
-};
-
-static inline uint8_t* iree_hal_heap_buffer_ptr(
-    const iree_hal_heap_buffer_t* buffer) {
-  return (uint8_t*)((uintptr_t)buffer->data.data &
-                    ~IREE_HAL_HEAP_BUFFER_FLAG_MASK);
-}
-
-static inline bool iree_hal_heap_buffer_data_is_aligned(
-    const iree_hal_heap_buffer_t* buffer) {
-  return iree_any_bit_set((uintptr_t)buffer->data.data,
-                          IREE_HAL_HEAP_BUFFER_DATA_IS_ALIGNED);
-}
-
-static inline bool iree_hal_heap_buffer_metadata_is_aligned(
-    const iree_hal_heap_buffer_t* buffer) {
-  return iree_any_bit_set((uintptr_t)buffer->data.data,
-                          IREE_HAL_HEAP_BUFFER_METADATA_IS_ALIGNED);
-}
-
 // Allocates a buffer with the metadata and storage split.
 // This results in an additional host allocation but allows for user-overridden
 // data storage allocations.
@@ -62,14 +62,13 @@
   // Try allocating the storage first as it's the most likely to fail if OOM.
   // It must be aligned to the minimum buffer alignment.
   out_data->data_length = allocation_size;
-  uintptr_t data_ptr = 0;
+  uint8_t* data_ptr = 0;
   IREE_RETURN_IF_ERROR(iree_allocator_malloc_aligned(
       data_allocator, allocation_size, IREE_HAL_HEAP_BUFFER_ALIGNMENT,
       /*offset=*/0, (void**)&data_ptr));
-  IREE_ASSERT_TRUE(
-      iree_host_size_has_alignment(data_ptr, IREE_HAL_HEAP_BUFFER_ALIGNMENT));
-  data_ptr |= IREE_HAL_HEAP_BUFFER_DATA_IS_ALIGNED;
-  out_data->data = (uint8_t*)data_ptr;
+  IREE_ASSERT_TRUE(iree_host_size_has_alignment(
+      (iree_host_size_t)data_ptr, IREE_HAL_HEAP_BUFFER_ALIGNMENT));
+  out_data->data = data_ptr;
 
   // Allocate the host metadata wrapper with natural alignment.
   iree_status_t status = iree_allocator_malloc(
@@ -104,11 +103,10 @@
 
   // Set bit indicating that we need to free the metadata with
   // iree_allocator_free_aligned.
-  uintptr_t data_ptr = (uintptr_t)buffer + header_size;
-  IREE_ASSERT_TRUE(
-      iree_host_size_has_alignment(data_ptr, IREE_HAL_HEAP_BUFFER_ALIGNMENT));
-  data_ptr |= IREE_HAL_HEAP_BUFFER_METADATA_IS_ALIGNED;
-  *out_data = iree_make_byte_span((uint8_t*)data_ptr, allocation_size);
+  uint8_t* data_ptr = (uint8_t*)buffer + header_size;
+  IREE_ASSERT_TRUE(iree_host_size_has_alignment(
+      (iree_host_size_t)data_ptr, IREE_HAL_HEAP_BUFFER_ALIGNMENT));
+  *out_data = iree_make_byte_span(data_ptr, allocation_size);
 
   return iree_ok_status();
 }
@@ -116,11 +114,11 @@
 iree_status_t iree_hal_heap_buffer_create(
     iree_hal_allocator_t* allocator,
     iree_hal_heap_allocator_statistics_t* statistics,
-    iree_hal_memory_type_t memory_type, iree_hal_memory_access_t allowed_access,
-    iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
-    iree_allocator_t data_allocator, iree_allocator_t host_allocator,
-    iree_hal_buffer_t** out_buffer) {
+    const iree_hal_buffer_params_t* params, iree_device_size_t allocation_size,
+    iree_const_byte_span_t initial_data, iree_allocator_t data_allocator,
+    iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer) {
   IREE_ASSERT_ARGUMENT(allocator);
+  IREE_ASSERT_ARGUMENT(params);
   IREE_ASSERT_ARGUMENT(out_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
 
@@ -141,23 +139,35 @@
 
   if (iree_status_is_ok(status)) {
     iree_hal_buffer_initialize(host_allocator, allocator, &buffer->base,
-                               allocation_size, 0, allocation_size, memory_type,
-                               allowed_access, allowed_usage,
+                               allocation_size, 0, allocation_size,
+                               params->type, params->access, params->usage,
                                &iree_hal_heap_buffer_vtable, &buffer->base);
     buffer->data = data;
-    buffer->data_allocator =
-        same_allocator ? iree_allocator_null() : data_allocator;
+
+    if (same_allocator) {
+      buffer->base.flags = IREE_HAL_HEAP_BUFFER_STORAGE_MODE_SLAB;
+      buffer->data_allocator = iree_allocator_null();
+    } else {
+      buffer->base.flags = IREE_HAL_HEAP_BUFFER_STORAGE_MODE_SPLIT;
+      buffer->data_allocator = data_allocator;
+    }
 
     IREE_STATISTICS({
       if (statistics != NULL) {
         buffer->statistics = statistics;
         iree_slim_mutex_lock(&statistics->mutex);
         iree_hal_allocator_statistics_record_alloc(
-            &statistics->base, memory_type, allocation_size);
+            &statistics->base, params->type, allocation_size);
         iree_slim_mutex_unlock(&statistics->mutex);
       }
     });
 
+    if (!iree_const_byte_span_is_empty(initial_data)) {
+      const iree_device_size_t initial_length =
+          iree_min(initial_data.data_length, allocation_size);
+      memcpy(buffer->data.data, initial_data.data, initial_length);
+    }
+
     *out_buffer = &buffer->base;
   }
 
@@ -165,25 +175,23 @@
   return status;
 }
 
-iree_status_t iree_hal_heap_buffer_wrap(iree_hal_allocator_t* allocator,
-                                        iree_hal_memory_type_t memory_type,
-                                        iree_hal_memory_access_t allowed_access,
-                                        iree_hal_buffer_usage_t allowed_usage,
-                                        iree_device_size_t allocation_size,
-                                        iree_byte_span_t data,
-                                        iree_allocator_t data_allocator,
-                                        iree_hal_buffer_t** out_buffer) {
+iree_status_t iree_hal_heap_buffer_wrap(
+    iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+    iree_hal_memory_access_t allowed_access,
+    iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
+    iree_byte_span_t data, iree_hal_buffer_release_callback_t release_callback,
+    iree_hal_buffer_t** out_buffer) {
   IREE_ASSERT_ARGUMENT(allocator);
   IREE_ASSERT_ARGUMENT(out_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
 
-  uintptr_t data_ptr = (uintptr_t)data.data & ~IREE_HAL_HEAP_BUFFER_FLAG_MASK;
-  if (!iree_host_size_has_alignment(data_ptr, IREE_HAL_HEAP_BUFFER_ALIGNMENT)) {
+  if (!iree_host_size_has_alignment((uintptr_t)data.data,
+                                    IREE_HAL_HEAP_BUFFER_ALIGNMENT)) {
     IREE_TRACE_ZONE_END(z0);
     return iree_make_status(
-        IREE_STATUS_INVALID_ARGUMENT,
+        IREE_STATUS_OUT_OF_RANGE,
         "imported heap buffer data must be aligned to %d; got %p",
-        (int)IREE_HAL_HEAP_BUFFER_ALIGNMENT, (void*)data_ptr);
+        (int)IREE_HAL_HEAP_BUFFER_ALIGNMENT, data.data);
   }
 
   iree_allocator_t host_allocator =
@@ -197,7 +205,11 @@
                                memory_type, allowed_access, allowed_usage,
                                &iree_hal_heap_buffer_vtable, &buffer->base);
     buffer->data = data;
-    buffer->data_allocator = data_allocator;
+
+    // Notify the provided callback when the external data is no longer needed.
+    buffer->base.flags = IREE_HAL_HEAP_BUFFER_STORAGE_MODE_EXTERNAL;
+    buffer->release_callback = release_callback;
+
     *out_buffer = &buffer->base;
   }
 
@@ -220,15 +232,27 @@
     }
   });
 
-  if (iree_hal_heap_buffer_data_is_aligned(buffer)) {
-    iree_allocator_free_aligned(buffer->data_allocator, buffer->data.data);
-  } else {
-    iree_allocator_free(buffer->data_allocator, buffer->data.data);
-  }
-  if (iree_hal_heap_buffer_metadata_is_aligned(buffer)) {
-    iree_allocator_free_aligned(host_allocator, buffer);
-  } else {
-    iree_allocator_free(host_allocator, buffer);
+  switch (buffer->base.flags) {
+    case IREE_HAL_HEAP_BUFFER_STORAGE_MODE_SLAB: {
+      iree_allocator_free_aligned(host_allocator, buffer);
+      break;
+    }
+    case IREE_HAL_HEAP_BUFFER_STORAGE_MODE_SPLIT: {
+      iree_allocator_free(buffer->data_allocator, buffer->data.data);
+      iree_allocator_free(host_allocator, buffer);
+      break;
+    }
+    case IREE_HAL_HEAP_BUFFER_STORAGE_MODE_EXTERNAL: {
+      if (buffer->release_callback.fn) {
+        buffer->release_callback.fn(buffer->release_callback.user_data,
+                                    base_buffer);
+      }
+      iree_allocator_free(host_allocator, buffer);
+      break;
+    }
+    default:
+      IREE_ASSERT_UNREACHABLE("unhandled buffer storage mode");
+      break;
   }
 
   IREE_TRACE_ZONE_END(z0);
@@ -240,8 +264,8 @@
     iree_device_size_t local_byte_offset, iree_device_size_t local_byte_length,
     iree_hal_buffer_mapping_t* mapping) {
   iree_hal_heap_buffer_t* buffer = (iree_hal_heap_buffer_t*)base_buffer;
-  mapping->contents = iree_make_byte_span(
-      iree_hal_heap_buffer_ptr(buffer) + local_byte_offset, local_byte_length);
+  mapping->contents = iree_make_byte_span(buffer->data.data + local_byte_offset,
+                                          local_byte_length);
 
   // If we mapped for discard scribble over the bytes. This is not a mandated
   // behavior but it will make debugging issues easier. Alternatively for
diff --git a/iree/hal/buffer_heap_impl.h b/iree/hal/buffer_heap_impl.h
index 5ade6e6..9481a3d 100644
--- a/iree/hal/buffer_heap_impl.h
+++ b/iree/hal/buffer_heap_impl.h
@@ -33,30 +33,24 @@
 iree_status_t iree_hal_heap_buffer_create(
     iree_hal_allocator_t* allocator,
     iree_hal_heap_allocator_statistics_t* statistics,
-    iree_hal_memory_type_t memory_type, iree_hal_memory_access_t allowed_access,
-    iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
-    iree_allocator_t data_allocator, iree_allocator_t host_allocator,
-    iree_hal_buffer_t** out_buffer);
+    const iree_hal_buffer_params_t* params, iree_device_size_t allocation_size,
+    iree_const_byte_span_t initial_data, iree_allocator_t data_allocator,
+    iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer);
 
 // Wraps an existing host allocation in a buffer.
-// When the buffer is destroyed the provided |data_allocator| will be used to
-// free |data| using iree_allocator_free. Pass iree_allocator_null() to wrap
-// without ownership semantics.
+// When the buffer is destroyed the provided |release_callback| will be called.
 //
-// The buffer must be aligned to at least IREE_HAL_HEAP_BUFFER_ALIGNMENT.
-// Note that it will be freed as a normal unaligned allocation. If we find
-// ourselves wanting to wrap aligned allocations requiring
-// iree_allocator_free_aligned then we'll need a flag to indicate that.
+// The buffer must be aligned to at least IREE_HAL_HEAP_BUFFER_ALIGNMENT and if
+// it is not the call will fail with IREE_STATUS_OUT_OF_RANGE.
 //
-// |out_buffer| must be released by the caller.
-iree_status_t iree_hal_heap_buffer_wrap(iree_hal_allocator_t* allocator,
-                                        iree_hal_memory_type_t memory_type,
-                                        iree_hal_memory_access_t allowed_access,
-                                        iree_hal_buffer_usage_t allowed_usage,
-                                        iree_device_size_t allocation_size,
-                                        iree_byte_span_t data,
-                                        iree_allocator_t data_allocator,
-                                        iree_hal_buffer_t** out_buffer);
+// |out_buffer| must be released by the caller. |data| must be kept live for the
+// lifetime of the wrapping buffer.
+iree_status_t iree_hal_heap_buffer_wrap(
+    iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+    iree_hal_memory_access_t allowed_access,
+    iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
+    iree_byte_span_t data, iree_hal_buffer_release_callback_t release_callback,
+    iree_hal_buffer_t** out_buffer);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/iree/hal/buffer_view_util.c b/iree/hal/buffer_view_util.c
index d8f8564..0365958 100644
--- a/iree/hal/buffer_view_util.c
+++ b/iree/hal/buffer_view_util.c
@@ -199,67 +199,6 @@
   return status;
 }
 
-IREE_API_EXPORT iree_status_t iree_hal_buffer_view_wrap_heap_buffer(
-    iree_hal_allocator_t* allocator, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    iree_hal_encoding_type_t encoding_type,
-    iree_hal_buffer_params_t buffer_params, iree_byte_span_t data,
-    iree_allocator_t data_allocator, iree_hal_buffer_view_t** out_buffer_view) {
-  IREE_ASSERT_ARGUMENT(allocator);
-  IREE_ASSERT_ARGUMENT(out_buffer_view);
-  IREE_TRACE_ZONE_BEGIN(z0);
-  iree_hal_buffer_params_canonicalize(&buffer_params);
-
-  // NOTE: this will fail if the data cannot be imported into the allocator.
-  iree_hal_buffer_t* buffer = NULL;
-  iree_status_t status = iree_hal_allocator_wrap_buffer(
-      allocator, buffer_params, data, data_allocator, &buffer);
-
-  if (iree_status_is_ok(status)) {
-    status = iree_hal_buffer_view_create(
-        buffer, shape, shape_rank, element_type, encoding_type,
-        iree_hal_allocator_host_allocator(allocator), out_buffer_view);
-  }
-
-  iree_hal_buffer_release(buffer);
-  IREE_TRACE_ZONE_END(z0);
-  return status;
-}
-
-IREE_API_EXPORT iree_status_t iree_hal_buffer_view_wrap_or_clone_heap_buffer(
-    iree_hal_allocator_t* allocator, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    iree_hal_encoding_type_t encoding_type,
-    iree_hal_buffer_params_t buffer_params, iree_byte_span_t data,
-    iree_allocator_t data_allocator, iree_hal_buffer_view_t** out_buffer_view) {
-  IREE_ASSERT_ARGUMENT(allocator);
-  iree_hal_buffer_params_canonicalize(&buffer_params);
-
-  // Not all HAL implementations support wrapping buffers, and of those that do
-  // some may only support it in special situations such as when the buffer is
-  // not DEVICE_VISIBLE. The user application can query whether the wrapping is
-  // possible and decide to use alternative means of upload if it is not; we
-  // make no policy (other than validity) over what's best here.
-  iree_hal_buffer_compatibility_t compatibility =
-      iree_hal_allocator_query_compatibility(
-          allocator,
-          iree_hal_buffer_params_with_usage(buffer_params,
-                                            IREE_HAL_BUFFER_USAGE_MAPPING),
-          (iree_device_size_t)data.data_length);
-  bool wrap_allowed = iree_all_bits_set(
-      compatibility, IREE_HAL_BUFFER_COMPATIBILITY_IMPORTABLE);
-  if (wrap_allowed) {
-    return iree_hal_buffer_view_wrap_heap_buffer(
-        allocator, shape, shape_rank, element_type, encoding_type,
-        buffer_params, data, data_allocator, out_buffer_view);
-  } else {
-    return iree_hal_buffer_view_allocate_buffer(
-        allocator, shape, shape_rank, element_type, encoding_type,
-        buffer_params, iree_make_const_byte_span(data.data, data.data_length),
-        out_buffer_view);
-  }
-}
-
 static iree_status_t iree_hal_buffer_view_generate_buffer_in_situ(
     iree_hal_allocator_t* allocator, const iree_hal_dim_t* shape,
     iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
@@ -320,13 +259,15 @@
     return status;
   }
 
-  // Try to wrap the host allocation to avoid the extra allocation and copy -
-  // this call will either hang on to the memory or do the copy and immediately
-  // free it.
-  return iree_hal_buffer_view_wrap_or_clone_heap_buffer(
+  // Allocate the buffer with the data we just generated.
+  // We could try importing but that may create buffers that are slower to
+  // access and we want users to opt in to that instead.
+  status = iree_hal_buffer_view_allocate_buffer(
       allocator, shape, shape_rank, element_type, encoding_type, buffer_params,
-      iree_make_byte_span(host_ptr, allocation_size), host_allocator,
-      out_buffer_view);
+      iree_make_const_byte_span(host_ptr, allocation_size), out_buffer_view);
+
+  iree_allocator_free(host_allocator, host_ptr);
+  return status;
 }
 
 IREE_API_EXPORT iree_status_t iree_hal_buffer_view_generate_buffer(
diff --git a/iree/hal/buffer_view_util.h b/iree/hal/buffer_view_util.h
index 37ddcd2..a7d7f61 100644
--- a/iree/hal/buffer_view_util.h
+++ b/iree/hal/buffer_view_util.h
@@ -65,41 +65,6 @@
     iree_hal_buffer_params_t buffer_params, iree_const_byte_span_t initial_data,
     iree_hal_buffer_view_t** out_buffer_view);
 
-// Imports a host buffer using |allocator| and wraps it in a buffer view.
-//
-// This is equivalent to:
-//   1. iree_hal_allocator_wrap_buffer
-//   2. iree_hal_buffer_view_create
-//
-// NOTE: not all buffers can be imported and not all allocators support
-// importing. See iree_hal_allocator_wrap_buffer for more information.
-// Fails if the buffer cannot be imported.
-IREE_API_EXPORT iree_status_t iree_hal_buffer_view_wrap_heap_buffer(
-    iree_hal_allocator_t* allocator, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    iree_hal_encoding_type_t encoding_type,
-    iree_hal_buffer_params_t buffer_params, iree_byte_span_t data,
-    iree_allocator_t data_allocator, iree_hal_buffer_view_t** out_buffer_view);
-
-// Tries to import a host buffer using |allocator| and wrap it in a buffer view.
-// If the buffer cannot be imported then a new buffer will be allocated and the
-// source data will be copied into it.
-//
-// This is equivalent to:
-//   if iree_hal_allocator_query_compatibility ok:
-//     1. iree_hal_allocator_wrap_buffer
-//     2. iree_hal_buffer_view_create
-//   else:
-//     1. iree_hal_allocator_allocate_buffer
-//     2. iree_hal_buffer_write_data
-//     3. iree_hal_buffer_view_create
-IREE_API_EXPORT iree_status_t iree_hal_buffer_view_wrap_or_clone_heap_buffer(
-    iree_hal_allocator_t* allocator, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    iree_hal_encoding_type_t encoding_type,
-    iree_hal_buffer_params_t buffer_params, iree_byte_span_t data,
-    iree_allocator_t data_allocator, iree_hal_buffer_view_t** out_buffer_view);
-
 typedef iree_status_t(IREE_API_PTR* iree_hal_buffer_view_generator_callback_t)(
     iree_hal_buffer_mapping_t* mapping, void* user_data);
 
diff --git a/iree/hal/cts/buffer_mapping_test.h b/iree/hal/cts/buffer_mapping_test.h
index 0019c67..d1d8cde 100644
--- a/iree/hal/cts/buffer_mapping_test.h
+++ b/iree/hal/cts/buffer_mapping_test.h
@@ -540,8 +540,6 @@
   iree_hal_buffer_release(buffer);
 }
 
-// TODO(scotttodd): iree_hal_allocator_wrap_buffer
-
 }  // namespace cts
 }  // namespace hal
 }  // namespace iree
diff --git a/iree/hal/cuda/cuda_allocator.c b/iree/hal/cuda/cuda_allocator.c
index bf22231..af94cf2 100644
--- a/iree/hal/cuda/cuda_allocator.c
+++ b/iree/hal/cuda/cuda_allocator.c
@@ -153,7 +153,7 @@
 static iree_status_t iree_hal_cuda_allocator_allocate_buffer(
     iree_hal_allocator_t* IREE_RESTRICT base_allocator,
     const iree_hal_buffer_params_t* IREE_RESTRICT params,
-    iree_host_size_t allocation_size, iree_const_byte_span_t initial_data,
+    iree_device_size_t allocation_size, iree_const_byte_span_t initial_data,
     iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
   iree_hal_cuda_allocator_t* allocator =
       iree_hal_cuda_allocator_cast(base_allocator);
@@ -270,19 +270,11 @@
   iree_hal_buffer_destroy(base_buffer);
 }
 
-static iree_status_t iree_hal_cuda_allocator_wrap_buffer(
-    iree_hal_allocator_t* IREE_RESTRICT base_allocator,
-    const iree_hal_buffer_params_t* IREE_RESTRICT params, iree_byte_span_t data,
-    iree_allocator_t data_allocator,
-    iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
-  return iree_make_status(IREE_STATUS_UNAVAILABLE,
-                          "wrapping of external buffers not supported");
-}
-
 static iree_status_t iree_hal_cuda_allocator_import_buffer(
     iree_hal_allocator_t* IREE_RESTRICT base_allocator,
     const iree_hal_buffer_params_t* IREE_RESTRICT params,
     iree_hal_external_buffer_t* IREE_RESTRICT external_buffer,
+    iree_hal_buffer_release_callback_t release_callback,
     iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
   return iree_make_status(IREE_STATUS_UNAVAILABLE,
                           "importing from external buffers not supported");
@@ -306,7 +298,6 @@
     .query_compatibility = iree_hal_cuda_allocator_query_compatibility,
     .allocate_buffer = iree_hal_cuda_allocator_allocate_buffer,
     .deallocate_buffer = iree_hal_cuda_allocator_deallocate_buffer,
-    .wrap_buffer = iree_hal_cuda_allocator_wrap_buffer,
     .import_buffer = iree_hal_cuda_allocator_import_buffer,
     .export_buffer = iree_hal_cuda_allocator_export_buffer,
 };
diff --git a/iree/hal/utils/buffer_transfer.c b/iree/hal/utils/buffer_transfer.c
index ae7dbc7..ce619b1 100644
--- a/iree/hal/utils/buffer_transfer.c
+++ b/iree/hal/utils/buffer_transfer.c
@@ -70,8 +70,7 @@
   if (!source_buffer) {
     // Allocate staging memory with a copy of the host data. We only initialize
     // the portion being transferred.
-    // TODO(benvanik): use wrap_buffer if supported to avoid the
-    // allocation/copy.
+    // TODO(benvanik): use import if supported to avoid the allocation/copy.
     // TODO(benvanik): make this device-local + host-visible? can be better for
     // uploads as we know we are never going to read it back.
     const iree_hal_buffer_params_t source_params = {
@@ -92,8 +91,7 @@
   if (!target_buffer) {
     // Allocate uninitialized staging memory for the transfer target.
     // We only allocate enough for the portion we are transfering.
-    // TODO(benvanik): use wrap_buffer if supported to avoid the
-    // allocation/copy.
+    // TODO(benvanik): use import if supported to avoid the allocation/copy.
     const iree_hal_buffer_params_t target_params = {
         .type = IREE_HAL_MEMORY_TYPE_HOST_LOCAL |
                 IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
diff --git a/iree/hal/vulkan/vma_allocator.cc b/iree/hal/vulkan/vma_allocator.cc
index df3829d..9cc167d 100644
--- a/iree/hal/vulkan/vma_allocator.cc
+++ b/iree/hal/vulkan/vma_allocator.cc
@@ -233,7 +233,7 @@
 static iree_status_t iree_hal_vulkan_vma_allocator_allocate_internal(
     iree_hal_vulkan_vma_allocator_t* IREE_RESTRICT allocator,
     const iree_hal_buffer_params_t* IREE_RESTRICT params,
-    iree_host_size_t allocation_size, iree_const_byte_span_t initial_data,
+    iree_device_size_t allocation_size, iree_const_byte_span_t initial_data,
     VmaAllocationCreateFlags flags,
     iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
   // Guard against the corner case where the requested buffer size is 0. The
@@ -353,7 +353,7 @@
 static iree_status_t iree_hal_vulkan_vma_allocator_allocate_buffer(
     iree_hal_allocator_t* IREE_RESTRICT base_allocator,
     const iree_hal_buffer_params_t* IREE_RESTRICT params,
-    iree_host_size_t allocation_size, iree_const_byte_span_t initial_data,
+    iree_device_size_t allocation_size, iree_const_byte_span_t initial_data,
     iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
   iree_hal_vulkan_vma_allocator_t* allocator =
       iree_hal_vulkan_vma_allocator_cast(base_allocator);
@@ -369,21 +369,13 @@
   iree_hal_buffer_destroy(base_buffer);
 }
 
-static iree_status_t iree_hal_vulkan_vma_allocator_wrap_buffer(
-    iree_hal_allocator_t* IREE_RESTRICT base_allocator,
-    const iree_hal_buffer_params_t* IREE_RESTRICT params, iree_byte_span_t data,
-    iree_allocator_t data_allocator,
-    iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
-  // TODO(#7242): use VK_EXT_external_memory_host to import memory.
-  return iree_make_status(IREE_STATUS_UNAVAILABLE,
-                          "wrapping of external buffers not supported");
-}
-
 static iree_status_t iree_hal_vulkan_vma_allocator_import_buffer(
     iree_hal_allocator_t* IREE_RESTRICT base_allocator,
     const iree_hal_buffer_params_t* IREE_RESTRICT params,
     iree_hal_external_buffer_t* IREE_RESTRICT external_buffer,
+    iree_hal_buffer_release_callback_t release_callback,
     iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
+  // TODO(#7242): use VK_EXT_external_memory_host to import memory.
   return iree_make_status(IREE_STATUS_UNAVAILABLE,
                           "importing from external buffers not supported");
 }
@@ -408,7 +400,6 @@
     iree_hal_vulkan_vma_allocator_query_compatibility,
     /*.allocate_buffer=*/iree_hal_vulkan_vma_allocator_allocate_buffer,
     /*.deallocate_buffer=*/iree_hal_vulkan_vma_allocator_deallocate_buffer,
-    /*.wrap_buffer=*/iree_hal_vulkan_vma_allocator_wrap_buffer,
     /*.import_buffer=*/iree_hal_vulkan_vma_allocator_import_buffer,
     /*.export_buffer=*/iree_hal_vulkan_vma_allocator_export_buffer,
 };
diff --git a/iree/modules/hal/module.c b/iree/modules/hal/module.c
index aa817d3..38f2327 100644
--- a/iree/modules/hal/module.c
+++ b/iree/modules/hal/module.c
@@ -264,16 +264,10 @@
   return iree_ok_status();
 }
 
-static iree_status_t iree_hal_module_map_data_ctl(
-    void* self, iree_allocator_command_t command, const void* params,
-    void** inout_ptr) {
-  IREE_ASSERT_EQ(command, IREE_ALLOCATOR_COMMAND_FREE);
-  if (IREE_UNLIKELY(command != IREE_ALLOCATOR_COMMAND_FREE)) {
-    return iree_make_status(IREE_STATUS_FAILED_PRECONDITION);
-  }
-  iree_vm_buffer_t* buffer = (iree_vm_buffer_t*)self;
-  iree_vm_buffer_release(buffer);
-  return iree_ok_status();
+static void iree_hal_module_mapped_buffer_release(void* user_data,
+                                                  iree_hal_buffer_t* buffer) {
+  iree_vm_buffer_t* backing_buffer = (iree_vm_buffer_t*)user_data;
+  iree_vm_buffer_release(backing_buffer);
 }
 
 IREE_VM_ABI_EXPORT(iree_hal_module_allocator_map_byte_buffer,  //
@@ -329,15 +323,19 @@
       .usage = buffer_usage,
       .access = allowed_access,
   };
-  iree_allocator_t buffer_deref_allocator = {
-      .self = source,
-      .ctl = iree_hal_module_map_data_ctl,
+  iree_hal_external_buffer_t external_buffer = {
+      .type = IREE_HAL_EXTERNAL_BUFFER_TYPE_HOST_ALLOCATION,
+      .flags = IREE_HAL_EXTERNAL_BUFFER_FLAG_NONE,
+      .size = length,
+      .handle.host_allocation.ptr = source->data.data + offset,
+  };
+  iree_hal_buffer_release_callback_t release_callback = {
+      .fn = iree_hal_module_mapped_buffer_release,
+      .user_data = source,
   };
   iree_hal_buffer_t* buffer = NULL;
-  iree_status_t status = iree_hal_allocator_wrap_buffer(
-      allocator, params,
-      iree_make_byte_span(source->data.data + offset, length),
-      buffer_deref_allocator, &buffer);
+  iree_status_t status = iree_hal_allocator_import_buffer(
+      allocator, params, &external_buffer, release_callback, &buffer);
   if (iree_status_is_ok(status)) {
     // Mapping succeeded - retain the source buffer that'll be released by
     // iree_hal_module_map_data_ctl when the mapping is no longer used.
diff --git a/iree/runtime/demo/hello_world_explained.c b/iree/runtime/demo/hello_world_explained.c
index 2a981aa..5735963 100644
--- a/iree/runtime/demo/hello_world_explained.c
+++ b/iree/runtime/demo/hello_world_explained.c
@@ -191,9 +191,8 @@
     iree_hal_buffer_view_t* arg0 = NULL;
     if (iree_status_is_ok(status)) {
       static const iree_hal_dim_t arg0_shape[1] = {4};
-      static const float iree_alignas(64)
-          arg0_data[4] = {1.0f, 1.1f, 1.2f, 1.3f};
-      status = iree_hal_buffer_view_wrap_or_clone_heap_buffer(
+      static const float arg0_data[4] = {1.0f, 1.1f, 1.2f, 1.3f};
+      status = iree_hal_buffer_view_allocate_buffer(
           device_allocator,
           // Shape dimensions and rank:
           arg0_shape, IREE_ARRAYSIZE(arg0_shape),
@@ -211,8 +210,7 @@
               .usage = IREE_HAL_BUFFER_USAGE_ALL,
           },
           // The actual heap buffer to wrap or clone and its allocator:
-          iree_make_byte_span((void*)arg0_data, sizeof(arg0_data)),
-          iree_allocator_null(),
+          iree_make_const_byte_span(arg0_data, sizeof(arg0_data)),
           // Buffer view + storage are returned and owned by the caller:
           &arg0);
     }
@@ -231,9 +229,8 @@
     iree_hal_buffer_view_t* arg1 = NULL;
     if (iree_status_is_ok(status)) {
       static const iree_hal_dim_t arg1_shape[1] = {4};
-      static const float iree_alignas(64)
-          arg1_data[4] = {10.0f, 100.0f, 1000.0f, 10000.0f};
-      status = iree_hal_buffer_view_wrap_or_clone_heap_buffer(
+      static const float arg1_data[4] = {10.0f, 100.0f, 1000.0f, 10000.0f};
+      status = iree_hal_buffer_view_allocate_buffer(
           device_allocator, arg1_shape, IREE_ARRAYSIZE(arg1_shape),
           IREE_HAL_ELEMENT_TYPE_FLOAT_32,
           IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
@@ -243,8 +240,7 @@
               .access = IREE_HAL_MEMORY_ACCESS_READ,
               .usage = IREE_HAL_BUFFER_USAGE_ALL,
           },
-          iree_make_byte_span((void*)arg1_data, sizeof(arg1_data)),
-          iree_allocator_null(), &arg1);
+          iree_make_const_byte_span(arg1_data, sizeof(arg1_data)), &arg1);
     }
     if (iree_status_is_ok(status)) {
       IREE_IGNORE_ERROR(iree_hal_buffer_view_fprint(
diff --git a/iree/runtime/demo/hello_world_terse.c b/iree/runtime/demo/hello_world_terse.c
index 121a2e5..c82a45c 100644
--- a/iree/runtime/demo/hello_world_terse.c
+++ b/iree/runtime/demo/hello_world_terse.c
@@ -78,9 +78,8 @@
   // %arg0: tensor<4xf32>
   iree_hal_buffer_view_t* arg0 = NULL;
   static const iree_hal_dim_t arg0_shape[1] = {4};
-  static const float iree_alignas(IREE_HAL_HEAP_BUFFER_ALIGNMENT)
-      arg0_data[4] = {1.0f, 1.1f, 1.2f, 1.3f};
-  IREE_CHECK_OK(iree_hal_buffer_view_wrap_or_clone_heap_buffer(
+  static const float arg0_data[4] = {1.0f, 1.1f, 1.2f, 1.3f};
+  IREE_CHECK_OK(iree_hal_buffer_view_allocate_buffer(
       iree_runtime_session_device_allocator(session), arg0_shape,
       IREE_ARRAYSIZE(arg0_shape), IREE_HAL_ELEMENT_TYPE_FLOAT_32,
       IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
@@ -90,8 +89,7 @@
           .access = IREE_HAL_MEMORY_ACCESS_READ,
           .usage = IREE_HAL_BUFFER_USAGE_ALL,
       },
-      iree_make_byte_span((void*)arg0_data, sizeof(arg0_data)),
-      iree_allocator_null(), &arg0));
+      iree_make_const_byte_span(arg0_data, sizeof(arg0_data)), &arg0));
   IREE_CHECK_OK(iree_hal_buffer_view_fprint(
       stdout, arg0, /*max_element_count=*/4096,
       iree_runtime_session_host_allocator(session)));
@@ -103,9 +101,8 @@
   // %arg1: tensor<4xf32>
   iree_hal_buffer_view_t* arg1 = NULL;
   static const iree_hal_dim_t arg1_shape[1] = {4};
-  static const float iree_alignas(IREE_HAL_HEAP_BUFFER_ALIGNMENT)
-      arg1_data[4] = {10.0f, 100.0f, 1000.0f, 10000.0f};
-  IREE_CHECK_OK(iree_hal_buffer_view_wrap_or_clone_heap_buffer(
+  static const float arg1_data[4] = {10.0f, 100.0f, 1000.0f, 10000.0f};
+  IREE_CHECK_OK(iree_hal_buffer_view_allocate_buffer(
       iree_runtime_session_device_allocator(session), arg1_shape,
       IREE_ARRAYSIZE(arg1_shape), IREE_HAL_ELEMENT_TYPE_FLOAT_32,
       IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
@@ -115,8 +112,7 @@
           .access = IREE_HAL_MEMORY_ACCESS_READ,
           .usage = IREE_HAL_BUFFER_USAGE_ALL,
       },
-      iree_make_byte_span((void*)arg1_data, sizeof(arg1_data)),
-      iree_allocator_null(), &arg1));
+      iree_make_const_byte_span(arg1_data, sizeof(arg1_data)), &arg1));
   IREE_CHECK_OK(iree_hal_buffer_view_fprint(
       stdout, arg1, /*max_element_count=*/4096,
       iree_runtime_session_host_allocator(session)));
diff --git a/iree/samples/custom_modules/custom_modules_test.cc b/iree/samples/custom_modules/custom_modules_test.cc
index df2f190..ec5ac19 100644
--- a/iree/samples/custom_modules/custom_modules_test.cc
+++ b/iree/samples/custom_modules/custom_modules_test.cc
@@ -131,19 +131,19 @@
 TEST_F(CustomModulesTest, PrintTensor) {
   // Allocate the buffer we'll be printing.
   static iree_hal_dim_t kShape[] = {2, 4};
-  static float iree_alignas(IREE_HAL_HEAP_BUFFER_ALIGNMENT)
-      kBufferContents[2 * 4] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
+  static const float kBufferContents[2 * 4] = {0.0f, 1.0f, 2.0f, 3.0f,
+                                               4.0f, 5.0f, 6.0f, 7.0f};
   iree_hal_buffer_params_t params = {0};
   params.type =
       IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE;
   params.usage = IREE_HAL_BUFFER_USAGE_ALL;
   iree_hal_buffer_view_t* buffer_view = nullptr;
-  IREE_ASSERT_OK(iree_hal_buffer_view_wrap_or_clone_heap_buffer(
+  IREE_ASSERT_OK(iree_hal_buffer_view_allocate_buffer(
       hal_allocator_, kShape, IREE_ARRAYSIZE(kShape),
       IREE_HAL_ELEMENT_TYPE_FLOAT_32, IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
       params,
-      iree_make_byte_span((void*)kBufferContents, sizeof(kBufferContents)),
-      iree_allocator_null(), &buffer_view));
+      iree_make_const_byte_span(kBufferContents, sizeof(kBufferContents)),
+      &buffer_view));
 
   // Pass in the tensor as an expanded HAL buffer.
   iree::vm::ref<iree_vm_list_t> inputs;
@@ -179,19 +179,19 @@
 TEST_F(CustomModulesTest, RoundTripTensor) {
   // Allocate the buffer we'll be printing/parsing.
   static iree_hal_dim_t kShape[] = {2, 4};
-  static float iree_alignas(IREE_HAL_HEAP_BUFFER_ALIGNMENT)
-      kBufferContents[2 * 4] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
+  static const float kBufferContents[2 * 4] = {0.0f, 1.0f, 2.0f, 3.0f,
+                                               4.0f, 5.0f, 6.0f, 7.0f};
   iree_hal_buffer_params_t params = {0};
   params.type =
       IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE;
   params.usage = IREE_HAL_BUFFER_USAGE_ALL;
   iree_hal_buffer_view_t* buffer_view = nullptr;
-  IREE_ASSERT_OK(iree_hal_buffer_view_wrap_or_clone_heap_buffer(
+  IREE_ASSERT_OK(iree_hal_buffer_view_allocate_buffer(
       hal_allocator_, kShape, IREE_ARRAYSIZE(kShape),
       IREE_HAL_ELEMENT_TYPE_FLOAT_32, IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
       params,
-      iree_make_byte_span((void*)kBufferContents, sizeof(kBufferContents)),
-      iree_allocator_null(), &buffer_view));
+      iree_make_const_byte_span(kBufferContents, sizeof(kBufferContents)),
+      &buffer_view));
 
   // Pass in the tensor as an expanded HAL buffer.
   iree::vm::ref<iree_vm_list_t> inputs;
diff --git a/iree/samples/dynamic_shapes/main.c b/iree/samples/dynamic_shapes/main.c
index 8e532ce..e11affa 100644
--- a/iree/samples/dynamic_shapes/main.c
+++ b/iree/samples/dynamic_shapes/main.c
@@ -17,8 +17,6 @@
   iree_hal_buffer_view_t* arg0 = NULL;
   const iree_hal_dim_t arg0_shape[1] = {values_length};
 
-  // TODO(scotttodd): use iree_hal_buffer_view_wrap_or_clone_heap_buffer
-  //   * debugging some apparent memory corruption with the stack-local value
   iree_status_t status = iree_ok_status();
   if (iree_status_is_ok(status)) {
     status = iree_hal_buffer_view_allocate_buffer(
@@ -66,8 +64,6 @@
   iree_hal_buffer_view_t* arg0 = NULL;
   const iree_hal_dim_t arg0_shape[2] = {values_length / 3, 3};
 
-  // TODO(scotttodd): use iree_hal_buffer_view_wrap_or_clone_heap_buffer
-  //   * debugging some apparent memory corruption with the stack-local value
   iree_status_t status = iree_ok_status();
   if (iree_status_is_ok(status)) {
     status = iree_hal_buffer_view_allocate_buffer(
@@ -109,8 +105,6 @@
   iree_hal_buffer_view_t* arg0 = NULL;
   const iree_hal_dim_t arg0_shape[1] = {values_length};
 
-  // TODO(scotttodd): use iree_hal_buffer_view_wrap_or_clone_heap_buffer
-  //   * debugging some apparent memory corruption with the stack-local value
   iree_status_t status = iree_ok_status();
   if (iree_status_is_ok(status)) {
     status = iree_hal_buffer_view_allocate_buffer(
diff --git a/iree/samples/variables_and_state/main.c b/iree/samples/variables_and_state/main.c
index 12cbece..0014ac6 100644
--- a/iree/samples/variables_and_state/main.c
+++ b/iree/samples/variables_and_state/main.c
@@ -42,8 +42,6 @@
   iree_hal_buffer_view_t* arg0 = NULL;
   int arg0_data[1] = {new_value};
 
-  // TODO(scotttodd): use iree_hal_buffer_view_wrap_or_clone_heap_buffer
-  //   * debugging some apparent memory corruption with the stack-local value
   iree_status_t status = iree_ok_status();
   if (iree_status_is_ok(status)) {
     status = iree_hal_buffer_view_allocate_buffer(
@@ -77,8 +75,6 @@
   iree_hal_buffer_view_t* arg0 = NULL;
   int arg0_data[1] = {x};
 
-  // TODO(scotttodd): use iree_hal_buffer_view_wrap_or_clone_heap_buffer
-  //   * debugging some apparent memory corruption with the stack-local value
   iree_status_t status = iree_ok_status();
   if (iree_status_is_ok(status)) {
     status = iree_hal_buffer_view_allocate_buffer(
diff --git a/iree/tools/utils/image_util.c b/iree/tools/utils/image_util.c
index bf6c6f9..bba193f 100644
--- a/iree/tools/utils/image_util.c
+++ b/iree/tools/utils/image_util.c
@@ -135,7 +135,7 @@
     iree_host_size_t element_byte =
         iree_hal_element_dense_byte_count(element_type);
     // SINT_8 and UINT_8 perform direct buffer wrap.
-    result = iree_hal_buffer_view_wrap_or_clone_heap_buffer(
+    result = iree_hal_buffer_view_allocate_buffer(
         allocator, shape, shape_rank, element_type,
         IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
         (iree_hal_buffer_params_t){
@@ -144,8 +144,8 @@
             .access = IREE_HAL_MEMORY_ACCESS_READ,
             .usage = IREE_HAL_BUFFER_USAGE_ALL,
         },
-        iree_make_byte_span((void*)pixel_data, element_byte * buffer_length),
-        iree_allocator_null(), out_buffer_view);
+        iree_make_const_byte_span(pixel_data, element_byte * buffer_length),
+        out_buffer_view);
   }
   stbi_image_free(pixel_data);
   IREE_TRACE_ZONE_END(z0);