Giving iree_hal_allocator_allocate_buffer initial_data.
This removes the need for a large number of the mapping/write_data calls.
On implementations where providing initial data is cheap and doing anything
else is extremely expensive (metal/webgpu) this saves us needing to stage
copies. Existing implementations still do the same thing as before only
now at least have the ability to do something better.
diff --git a/bindings/python/iree/runtime/vm.cc b/bindings/python/iree/runtime/vm.cc
index d3d9dfa..9c1afe5 100644
--- a/bindings/python/iree/runtime/vm.cc
+++ b/bindings/python/iree/runtime/vm.cc
@@ -218,16 +218,15 @@
   // TODO(laurenzo): Expand to other layouts as needed.
   // TODO(laurenzo): Wrap and retain original buffer (depends_on_pyobject=true).
   iree_hal_buffer_t* raw_buffer;
-  CheckApiStatus(iree_hal_allocator_allocate_buffer(
-                     device.allocator(),
-                     static_cast<iree_hal_memory_type_t>(
-                         IREE_HAL_MEMORY_TYPE_HOST_LOCAL |
-                         IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE),
-                     IREE_HAL_BUFFER_USAGE_ALL, py_view.len, &raw_buffer),
-                 "Failed to allocate device visible buffer");
   CheckApiStatus(
-      iree_hal_buffer_write_data(raw_buffer, 0, py_view.buf, py_view.len),
-      "Error writing to input buffer");
+      iree_hal_allocator_allocate_buffer(
+          device.allocator(),
+          static_cast<iree_hal_memory_type_t>(
+              IREE_HAL_MEMORY_TYPE_HOST_LOCAL |
+              IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE),
+          IREE_HAL_BUFFER_USAGE_ALL, py_view.len,
+          iree_make_const_byte_span(py_view.buf, py_view.len), &raw_buffer),
+      "Failed to allocate device visible buffer");
 
   // Only capture the reference to the exporting object (incrementing it)
   // once guaranteed successful.
diff --git a/bindings/tflite/tensor.c b/bindings/tflite/tensor.c
index 1fdb16d..c29c018 100644
--- a/bindings/tflite/tensor.c
+++ b/bindings/tflite/tensor.c
@@ -140,7 +140,8 @@
       iree_hal_allocator_allocate_buffer(
           buffer_allocator,
           IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL | IREE_HAL_MEMORY_TYPE_HOST_VISIBLE,
-          IREE_HAL_BUFFER_USAGE_ALL, allocation_size, &tensor->buffer));
+          IREE_HAL_BUFFER_USAGE_ALL, allocation_size,
+          iree_const_byte_span_empty(), &tensor->buffer));
 
   // Map the buffer memory immediately. The tflite API doesn't let us know if
   // this is a buffer the user will actually touch or some state buffer that is
diff --git a/experimental/rocm/rocm_allocator.c b/experimental/rocm/rocm_allocator.c
index bcb80b8..4d2e4da 100644
--- a/experimental/rocm/rocm_allocator.c
+++ b/experimental/rocm/rocm_allocator.c
@@ -128,7 +128,7 @@
 static iree_status_t iree_hal_rocm_allocator_allocate_buffer(
     iree_hal_allocator_t* base_allocator, iree_hal_memory_type_t memory_type,
     iree_hal_buffer_usage_t allowed_usage, iree_host_size_t allocation_size,
-    iree_hal_buffer_t** out_buffer) {
+    iree_const_byte_span_t initial_data, iree_hal_buffer_t** out_buffer) {
   iree_hal_rocm_allocator_t* allocator =
       iree_hal_rocm_allocator_cast(base_allocator);
   // Guard against the corner case where the requested buffer size is 0. The
@@ -172,6 +172,14 @@
         /*byte_offset=*/0,
         /*byte_length=*/allocation_size, device_ptr, host_ptr, out_buffer);
   }
+
+  // NOTE: there may be HIP methods for doing this more efficiently.
+  if (iree_status_is_ok(status) &&
+      !iree_const_byte_span_is_empty(initial_data)) {
+    status = iree_hal_buffer_write_data(*out_buffer, 0, initial_data.data,
+                                        initial_data.data_length);
+  }
+
   if (iree_status_is_ok(status)) {
     IREE_STATISTICS(iree_hal_allocator_statistics_record_alloc(
         &allocator->statistics, memory_type, allocation_size));
diff --git a/iree/base/allocator.h b/iree/base/allocator.h
index ac06123..ea2acf6 100644
--- a/iree/base/allocator.h
+++ b/iree/base/allocator.h
@@ -69,6 +69,15 @@
   return v;
 }
 
+static inline iree_byte_span_t iree_byte_span_empty() {
+  iree_byte_span_t v = {NULL, 0};
+  return v;
+}
+
+static bool iree_byte_span_is_empty(iree_byte_span_t span) {
+  return span.data == NULL || span.data_length == 0;
+}
+
 // A span of constant bytes (ala std::span of const uint8_t).
 typedef struct iree_const_byte_span_t {
   const uint8_t* data;
@@ -81,6 +90,15 @@
   return v;
 }
 
+static inline iree_const_byte_span_t iree_const_byte_span_empty() {
+  iree_const_byte_span_t v = {NULL, 0};
+  return v;
+}
+
+static bool iree_const_byte_span_is_empty(iree_const_byte_span_t span) {
+  return span.data == NULL || span.data_length == 0;
+}
+
 //===----------------------------------------------------------------------===//
 // Totally shady stack allocation
 //===----------------------------------------------------------------------===//
diff --git a/iree/hal/allocator.c b/iree/hal/allocator.c
index e37a603..c6f4794 100644
--- a/iree/hal/allocator.c
+++ b/iree/hal/allocator.c
@@ -86,13 +86,14 @@
 IREE_API_EXPORT iree_status_t iree_hal_allocator_allocate_buffer(
     iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
     iree_hal_buffer_usage_t allowed_usage, iree_host_size_t allocation_size,
-    iree_hal_buffer_t** out_buffer) {
+    iree_const_byte_span_t initial_data, iree_hal_buffer_t** out_buffer) {
   IREE_ASSERT_ARGUMENT(allocator);
   IREE_ASSERT_ARGUMENT(out_buffer);
   *out_buffer = NULL;
   IREE_TRACE_ZONE_BEGIN(z0);
   iree_status_t status = _VTABLE_DISPATCH(allocator, allocate_buffer)(
-      allocator, memory_type, allowed_usage, allocation_size, out_buffer);
+      allocator, memory_type, allowed_usage, allocation_size, initial_data,
+      out_buffer);
   IREE_TRACE_ZONE_END(z0);
   return status;
 }
diff --git a/iree/hal/allocator.h b/iree/hal/allocator.h
index d5e7a2d..a7d9d1d 100644
--- a/iree/hal/allocator.h
+++ b/iree/hal/allocator.h
@@ -125,6 +125,9 @@
     iree_hal_buffer_usage_t intended_usage, iree_device_size_t allocation_size);
 
 // Allocates a buffer from the allocator.
+// If |initial_data| is provided then the bytes will be copied into the device
+// buffer. To avoid the copy when constant data is used prefer
+// iree_hal_allocator_wrap_buffer when available.
 // Fails if the memory type requested for the given usage cannot be serviced.
 // Callers can use iree_hal_allocator_can_allocate to decide their memory use
 // strategy.
@@ -142,7 +145,7 @@
 IREE_API_EXPORT iree_status_t iree_hal_allocator_allocate_buffer(
     iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
     iree_hal_buffer_usage_t allowed_usage, iree_host_size_t allocation_size,
-    iree_hal_buffer_t** out_buffer);
+    iree_const_byte_span_t initial_data, iree_hal_buffer_t** out_buffer);
 
 // Wraps an existing host allocation in a buffer.
 //
@@ -211,7 +214,7 @@
   iree_status_t(IREE_API_PTR* allocate_buffer)(
       iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
       iree_hal_buffer_usage_t allowed_usage, iree_host_size_t allocation_size,
-      iree_hal_buffer_t** out_buffer);
+      iree_const_byte_span_t initial_data, iree_hal_buffer_t** out_buffer);
 
   iree_status_t(IREE_API_PTR* wrap_buffer)(
       iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
diff --git a/iree/hal/allocator_heap.c b/iree/hal/allocator_heap.c
index 84ce645..7513803 100644
--- a/iree/hal/allocator_heap.c
+++ b/iree/hal/allocator_heap.c
@@ -153,7 +153,7 @@
 static iree_status_t iree_hal_heap_allocator_allocate_buffer(
     iree_hal_allocator_t* base_allocator, iree_hal_memory_type_t memory_type,
     iree_hal_buffer_usage_t allowed_usage, iree_host_size_t allocation_size,
-    iree_hal_buffer_t** out_buffer) {
+    iree_const_byte_span_t initial_data, iree_hal_buffer_t** out_buffer) {
   iree_hal_heap_allocator_t* allocator =
       iree_hal_heap_allocator_cast(base_allocator);
 
@@ -165,10 +165,24 @@
   // Allocate the buffer (both the wrapper and the contents).
   iree_hal_heap_allocator_statistics_t* statistics = NULL;
   IREE_STATISTICS(statistics = &allocator->statistics);
-  return iree_hal_heap_buffer_create(base_allocator, statistics, memory_type,
-                                     allowed_access, allowed_usage,
-                                     allocation_size, allocator->data_allocator,
-                                     allocator->host_allocator, out_buffer);
+  iree_hal_buffer_t* buffer = NULL;
+  IREE_RETURN_IF_ERROR(iree_hal_heap_buffer_create(
+      base_allocator, statistics, memory_type, allowed_access, allowed_usage,
+      allocation_size, allocator->data_allocator, allocator->host_allocator,
+      &buffer));
+
+  iree_status_t status = iree_ok_status();
+  if (!iree_const_byte_span_is_empty(initial_data)) {
+    status = iree_hal_buffer_write_data(buffer, 0, initial_data.data,
+                                        initial_data.data_length);
+  }
+
+  if (iree_status_is_ok(status)) {
+    *out_buffer = buffer;
+  } else {
+    iree_hal_buffer_release(buffer);
+  }
+  return status;
 }
 
 static iree_status_t iree_hal_heap_allocator_wrap_buffer(
diff --git a/iree/hal/buffer_view.c b/iree/hal/buffer_view.c
index 3c32045..57c99eb 100644
--- a/iree/hal/buffer_view.c
+++ b/iree/hal/buffer_view.c
@@ -98,7 +98,7 @@
     iree_hal_allocator_t* allocator, const iree_hal_dim_t* shape,
     iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
     iree_hal_encoding_type_t encoding_type, iree_hal_memory_type_t memory_type,
-    iree_hal_buffer_usage_t allowed_usage,
+    iree_hal_buffer_usage_t allowed_usage, iree_const_byte_span_t initial_data,
     iree_hal_buffer_view_t** out_buffer_view) {
   IREE_ASSERT_ARGUMENT(allocator);
   IREE_ASSERT_ARGUMENT(out_buffer_view);
@@ -110,8 +110,9 @@
 
   iree_hal_buffer_t* buffer = NULL;
   if (iree_status_is_ok(status)) {
-    status = iree_hal_allocator_allocate_buffer(
-        allocator, memory_type, allowed_usage, allocation_size, &buffer);
+    status = iree_hal_allocator_allocate_buffer(allocator, memory_type,
+                                                allowed_usage, allocation_size,
+                                                initial_data, &buffer);
   }
 
   if (iree_status_is_ok(status)) {
@@ -125,40 +126,6 @@
   return status;
 }
 
-IREE_API_EXPORT iree_status_t iree_hal_buffer_view_clone_heap_buffer(
-    iree_hal_allocator_t* allocator, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    iree_hal_encoding_type_t encoding_type, iree_hal_memory_type_t memory_type,
-    iree_hal_buffer_usage_t allowed_usage, iree_const_byte_span_t data,
-    iree_hal_buffer_view_t** out_buffer_view) {
-  IREE_ASSERT_ARGUMENT(allocator);
-  IREE_ASSERT_ARGUMENT(out_buffer_view);
-  IREE_TRACE_ZONE_BEGIN(z0);
-
-  // Allocate the buffer.
-  iree_hal_buffer_view_t* buffer_view = NULL;
-  IREE_RETURN_AND_END_ZONE_IF_ERROR(
-      z0, iree_hal_buffer_view_allocate_buffer(
-              allocator, shape, shape_rank, element_type, encoding_type,
-              memory_type, allowed_usage, &buffer_view));
-
-  // Copy all of the data into it in the worst way possible.
-  // If you find yourself coming here from profiling:
-  //   Don't clone data. Allocate and then populate it in-place.
-  //   -or-
-  //   Schedule sequences of DMA transfers using iree_hal_command_buffer_t.
-  iree_status_t status = iree_hal_buffer_write_data(
-      iree_hal_buffer_view_buffer(buffer_view), 0, data.data, data.data_length);
-
-  if (iree_status_is_ok(status)) {
-    *out_buffer_view = buffer_view;
-  } else {
-    iree_hal_buffer_view_release(buffer_view);
-  }
-  IREE_TRACE_ZONE_END(z0);
-  return status;
-}
-
 IREE_API_EXPORT iree_status_t iree_hal_buffer_view_wrap_heap_buffer(
     iree_hal_allocator_t* allocator, const iree_hal_dim_t* shape,
     iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
@@ -212,7 +179,7 @@
         allocator, shape, shape_rank, element_type, encoding_type, memory_type,
         allowed_access, allowed_usage, data, data_allocator, out_buffer_view);
   } else {
-    return iree_hal_buffer_view_clone_heap_buffer(
+    return iree_hal_buffer_view_allocate_buffer(
         allocator, shape, shape_rank, element_type, encoding_type, memory_type,
         allowed_usage, iree_make_const_byte_span(data.data, data.data_length),
         out_buffer_view);
@@ -572,7 +539,7 @@
       IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
       IREE_HAL_BUFFER_USAGE_TRANSFER | IREE_HAL_BUFFER_USAGE_MAPPING |
           IREE_HAL_BUFFER_USAGE_DISPATCH,
-      buffer_length, &buffer));
+      buffer_length, iree_const_byte_span_empty(), &buffer));
 
   // Parse the elements directly into the buffer.
   iree_hal_buffer_mapping_t buffer_mapping;
diff --git a/iree/hal/buffer_view.h b/iree/hal/buffer_view.h
index 9a0c623..045ac37 100644
--- a/iree/hal/buffer_view.h
+++ b/iree/hal/buffer_view.h
@@ -213,23 +213,7 @@
     iree_hal_allocator_t* allocator, const iree_hal_dim_t* shape,
     iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
     iree_hal_encoding_type_t encoding_type, iree_hal_memory_type_t memory_type,
-    iree_hal_buffer_usage_t allowed_usage,
-    iree_hal_buffer_view_t** out_buffer_view);
-
-// Clones a host buffer using |allocator| and wraps it in a buffer view.
-// This is equivalent to:
-//   1. iree_hal_allocator_allocate_buffer
-//   2. iree_hal_buffer_write_data
-//   3. iree_hal_buffer_view_create
-//
-// Always prefer allocating a device buffer and populating it in place.
-// If cloning multiple buffers it is better to use iree_hal_command_buffer_ts to
-// batch up the memory transfer operations.
-IREE_API_EXPORT iree_status_t iree_hal_buffer_view_clone_heap_buffer(
-    iree_hal_allocator_t* allocator, const iree_hal_dim_t* shape,
-    iree_host_size_t shape_rank, iree_hal_element_type_t element_type,
-    iree_hal_encoding_type_t encoding_type, iree_hal_memory_type_t memory_type,
-    iree_hal_buffer_usage_t allowed_usage, iree_const_byte_span_t data,
+    iree_hal_buffer_usage_t allowed_usage, iree_const_byte_span_t initial_data,
     iree_hal_buffer_view_t** out_buffer_view);
 
 // Imports a host buffer using |allocator| and wraps it in a buffer view.
diff --git a/iree/hal/cts/allocator_test.h b/iree/hal/cts/allocator_test.h
index 506d6ee..d4d4c50 100644
--- a/iree/hal/cts/allocator_test.h
+++ b/iree/hal/cts/allocator_test.h
@@ -85,7 +85,8 @@
 
   iree_hal_buffer_t* buffer;
   IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
-      device_allocator_, memory_type, buffer_usage, kAllocationSize, &buffer));
+      device_allocator_, memory_type, buffer_usage, kAllocationSize,
+      iree_const_byte_span_empty(), &buffer));
 
   // At a mimimum, the requested memory type should be respected.
   // Additional bits may be optionally set depending on the allocator.
@@ -108,7 +109,7 @@
   iree_hal_buffer_t* buffer;
   IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
       device_allocator_, memory_type, buffer_usage, /*allocation_size=*/0,
-      &buffer));
+      iree_const_byte_span_empty(), &buffer));
 
   iree_hal_buffer_release(buffer);
 }
diff --git a/iree/hal/cts/buffer_mapping_test.h b/iree/hal/cts/buffer_mapping_test.h
index 106b24d..f8229c1 100644
--- a/iree/hal/cts/buffer_mapping_test.h
+++ b/iree/hal/cts/buffer_mapping_test.h
@@ -46,7 +46,8 @@
 
   iree_hal_buffer_t* buffer;
   IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
-      device_allocator_, memory_type, buffer_usage, kAllocationSize, &buffer));
+      device_allocator_, memory_type, buffer_usage, kAllocationSize,
+      iree_const_byte_span_empty(), &buffer));
 
   EXPECT_TRUE(
       iree_all_bits_set(iree_hal_buffer_memory_type(buffer), memory_type));
@@ -63,7 +64,8 @@
 
   iree_hal_buffer_t* buffer;
   IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
-      device_allocator_, memory_type, buffer_usage, kAllocationSize, &buffer));
+      device_allocator_, memory_type, buffer_usage, kAllocationSize,
+      iree_const_byte_span_empty(), &buffer));
 
   IREE_ASSERT_OK(iree_hal_buffer_zero(buffer, /*byte_offset=*/0,
                                       /*byte_length=*/kAllocationSize));
@@ -85,7 +87,8 @@
 
   iree_hal_buffer_t* buffer;
   IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
-      device_allocator_, memory_type, buffer_usage, kAllocationSize, &buffer));
+      device_allocator_, memory_type, buffer_usage, kAllocationSize,
+      iree_const_byte_span_empty(), &buffer));
 
   IREE_ASSERT_OK(iree_hal_buffer_zero(buffer, /*byte_offset=*/0,
                                       /*byte_length=*/kAllocationSize));
@@ -113,7 +116,8 @@
 
   iree_hal_buffer_t* buffer;
   IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
-      device_allocator_, memory_type, buffer_usage, kAllocationSize, &buffer));
+      device_allocator_, memory_type, buffer_usage, kAllocationSize,
+      iree_const_byte_span_empty(), &buffer));
 
   uint8_t fill_value = 0x07;
   IREE_ASSERT_OK(iree_hal_buffer_fill(buffer, /*byte_offset=*/0,
@@ -138,7 +142,8 @@
 
   iree_hal_buffer_t* buffer;
   IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
-      device_allocator_, memory_type, buffer_usage, kAllocationSize, &buffer));
+      device_allocator_, memory_type, buffer_usage, kAllocationSize,
+      iree_const_byte_span_empty(), &buffer));
 
   uint8_t fill_value = 0x07;
   std::vector<uint8_t> reference_buffer(kAllocationSize);
@@ -162,10 +167,10 @@
   iree_hal_buffer_t* buffer_b;
   IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
       device_allocator_, memory_type, buffer_usage, kAllocationSize,
-      &buffer_a));
+      iree_const_byte_span_empty(), &buffer_a));
   IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
       device_allocator_, memory_type, buffer_usage, kAllocationSize,
-      &buffer_b));
+      iree_const_byte_span_empty(), &buffer_b));
 
   uint8_t fill_value = 0x07;
   IREE_ASSERT_OK(iree_hal_buffer_fill(buffer_a, /*byte_offset=*/0,
diff --git a/iree/hal/cts/command_buffer_test.h b/iree/hal/cts/command_buffer_test.h
index 60df42d..77fe604 100644
--- a/iree/hal/cts/command_buffer_test.h
+++ b/iree/hal/cts/command_buffer_test.h
@@ -44,7 +44,8 @@
     IREE_CHECK_OK(iree_hal_allocator_allocate_buffer(
         iree_hal_device_allocator(device_),
         IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL | IREE_HAL_MEMORY_TYPE_HOST_VISIBLE,
-        IREE_HAL_BUFFER_USAGE_ALL, buffer_size, &device_buffer));
+        IREE_HAL_BUFFER_USAGE_ALL, buffer_size, iree_const_byte_span_empty(),
+        &device_buffer));
 
     IREE_CHECK_OK(iree_hal_command_buffer_begin(command_buffer));
     // Start with a zero fill on the entire buffer...
@@ -141,26 +142,28 @@
       IREE_HAL_COMMAND_CATEGORY_TRANSFER, IREE_HAL_QUEUE_AFFINITY_ANY,
       &command_buffer));
 
+  uint8_t i8_val = 0x54;
+  std::vector<uint8_t> reference_buffer(kBufferSize);
+  std::memset(reference_buffer.data(), i8_val, kBufferSize);
+
   // Create and fill a host buffer.
   iree_hal_buffer_t* host_buffer;
   IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
       device_allocator_,
       IREE_HAL_MEMORY_TYPE_HOST_VISIBLE | IREE_HAL_MEMORY_TYPE_HOST_CACHED |
           IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
-      IREE_HAL_BUFFER_USAGE_ALL, kBufferSize, &host_buffer));
-  uint8_t i8_val = 0x54;
-  IREE_ASSERT_OK(iree_hal_buffer_fill(host_buffer, /*byte_offset=*/0,
-                                      /*byte_length=*/kBufferSize, &i8_val,
-                                      /*pattern_length=*/sizeof(i8_val)));
-  std::vector<uint8_t> reference_buffer(kBufferSize);
-  std::memset(reference_buffer.data(), i8_val, kBufferSize);
+      IREE_HAL_BUFFER_USAGE_ALL, kBufferSize,
+      iree_make_const_byte_span(reference_buffer.data(),
+                                reference_buffer.size()),
+      &host_buffer));
 
   // Create a device buffer.
   iree_hal_buffer_t* device_buffer;
   IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
       device_allocator_,
       IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL | IREE_HAL_MEMORY_TYPE_HOST_VISIBLE,
-      IREE_HAL_BUFFER_USAGE_ALL, kBufferSize, &device_buffer));
+      IREE_HAL_BUFFER_USAGE_ALL, kBufferSize, iree_const_byte_span_empty(),
+      &device_buffer));
 
   // Copy the host buffer to the device buffer.
   IREE_ASSERT_OK(iree_hal_command_buffer_begin(command_buffer));
@@ -197,23 +200,24 @@
   IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
       device_allocator_,
       IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL | IREE_HAL_MEMORY_TYPE_HOST_VISIBLE,
-      IREE_HAL_BUFFER_USAGE_ALL, kBufferSize, &device_buffer));
+      IREE_HAL_BUFFER_USAGE_ALL, kBufferSize, iree_const_byte_span_empty(),
+      &device_buffer));
+
+  uint8_t i8_val = 0x88;
+  std::vector<uint8_t> reference_buffer(kBufferSize);
+  std::memset(reference_buffer.data() + 8, i8_val, kBufferSize / 2 - 4);
 
   // Create another host buffer with a smaller size.
+  std::vector<uint8_t> host_buffer_data(kBufferSize, i8_val);
   iree_hal_buffer_t* host_buffer;
   IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
       device_allocator_,
       IREE_HAL_MEMORY_TYPE_HOST_VISIBLE | IREE_HAL_MEMORY_TYPE_HOST_CACHED |
           IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
-      IREE_HAL_BUFFER_USAGE_ALL, kBufferSize / 2, &host_buffer));
-
-  // Fill the host buffer.
-  uint8_t i8_val = 0x88;
-  IREE_ASSERT_OK(iree_hal_buffer_fill(host_buffer, /*byte_offset=*/0,
-                                      /*byte_length=*/kBufferSize / 2, &i8_val,
-                                      /*pattern_length=*/sizeof(i8_val)));
-  std::vector<uint8_t> reference_buffer(kBufferSize);
-  std::memset(reference_buffer.data() + 8, i8_val, kBufferSize / 2 - 4);
+      IREE_HAL_BUFFER_USAGE_ALL, kBufferSize / 2,
+      iree_make_const_byte_span(host_buffer_data.data(),
+                                host_buffer_data.size()),
+      &host_buffer));
 
   // Copy the host buffer to the device buffer; zero fill the untouched bytes.
   uint8_t zero_val = 0x0;
diff --git a/iree/hal/cuda/cuda_allocator.c b/iree/hal/cuda/cuda_allocator.c
index ee60000..23d7938 100644
--- a/iree/hal/cuda/cuda_allocator.c
+++ b/iree/hal/cuda/cuda_allocator.c
@@ -156,7 +156,7 @@
 static iree_status_t iree_hal_cuda_allocator_allocate_buffer(
     iree_hal_allocator_t* base_allocator, iree_hal_memory_type_t memory_type,
     iree_hal_buffer_usage_t allowed_usage, iree_host_size_t allocation_size,
-    iree_hal_buffer_t** out_buffer) {
+    iree_const_byte_span_t initial_data, iree_hal_buffer_t** out_buffer) {
   iree_hal_cuda_allocator_t* allocator =
       iree_hal_cuda_allocator_cast(base_allocator);
   // Guard against the corner case where the requested buffer size is 0. The
@@ -215,6 +215,7 @@
           cuMemHostGetDevicePointer(&device_ptr, host_ptr, /*flags=*/0));
     }
   }
+
   if (iree_status_is_ok(status)) {
     status = iree_hal_cuda_buffer_wrap(
         (iree_hal_allocator_t*)allocator, memory_type,
@@ -222,6 +223,15 @@
         /*byte_offset=*/0,
         /*byte_length=*/allocation_size, device_ptr, host_ptr, out_buffer);
   }
+
+  // TODO(thomasraoux): see if there's better ways in CUDA to provide initial
+  // data. This approach (map + write + unmap) is suboptimal.
+  if (iree_status_is_ok(status) &&
+      !iree_const_byte_span_is_empty(initial_data)) {
+    status = iree_hal_buffer_write_data(*out_buffer, 0, initial_data.data,
+                                        initial_data.data_length);
+  }
+
   if (iree_status_is_ok(status)) {
     IREE_STATISTICS(iree_hal_allocator_statistics_record_alloc(
         &allocator->statistics, memory_type, allocation_size));
diff --git a/iree/hal/vulkan/vma_allocator.cc b/iree/hal/vulkan/vma_allocator.cc
index 9e58b07..ed53172 100644
--- a/iree/hal/vulkan/vma_allocator.cc
+++ b/iree/hal/vulkan/vma_allocator.cc
@@ -244,7 +244,8 @@
     iree_hal_vulkan_vma_allocator_t* allocator,
     iree_hal_memory_type_t memory_type, iree_hal_buffer_usage_t allowed_usage,
     iree_hal_memory_access_t allowed_access, iree_host_size_t allocation_size,
-    VmaAllocationCreateFlags flags, iree_hal_buffer_t** out_buffer) {
+    iree_const_byte_span_t initial_data, VmaAllocationCreateFlags flags,
+    iree_hal_buffer_t** out_buffer) {
   // Guard against the corner case where the requested buffer size is 0. The
   // application is unlikely to do anything when requesting a 0-byte buffer; but
   // it can happen in real world use cases. So we should at least not crash.
@@ -324,18 +325,36 @@
                                      &allocation, &allocation_info),
                      "vmaCreateBuffer");
 
-  return iree_hal_vulkan_vma_buffer_wrap(
+  iree_hal_buffer_t* buffer = NULL;
+  iree_status_t status = iree_hal_vulkan_vma_buffer_wrap(
       (iree_hal_allocator_t*)allocator, memory_type, allowed_access,
       allowed_usage, allocation_size,
       /*byte_offset=*/0,
       /*byte_length=*/allocation_size, allocator->vma, handle, allocation,
-      allocation_info, out_buffer);
+      allocation_info, &buffer);
+  if (!iree_status_is_ok(status)) {
+    vmaDestroyBuffer(allocator->vma, handle, allocation);
+    return status;
+  }
+
+  // TODO(benvanik): this approach (map + write + unmap) is suboptimal.
+  if (!iree_const_byte_span_is_empty(initial_data)) {
+    status = iree_hal_buffer_write_data(buffer, 0, initial_data.data,
+                                        initial_data.data_length);
+  }
+
+  if (iree_status_is_ok(status)) {
+    *out_buffer = buffer;
+  } else {
+    iree_hal_buffer_release(buffer);
+  }
+  return status;
 }
 
 static iree_status_t iree_hal_vulkan_vma_allocator_allocate_buffer(
     iree_hal_allocator_t* base_allocator, iree_hal_memory_type_t memory_type,
     iree_hal_buffer_usage_t allowed_usage, iree_host_size_t allocation_size,
-    iree_hal_buffer_t** out_buffer) {
+    iree_const_byte_span_t initial_data, iree_hal_buffer_t** out_buffer) {
   iree_hal_vulkan_vma_allocator_t* allocator =
       iree_hal_vulkan_vma_allocator_cast(base_allocator);
 
@@ -346,6 +365,7 @@
 
   return iree_hal_vulkan_vma_allocator_allocate_internal(
       allocator, memory_type, allowed_usage, allowed_access, allocation_size,
+      initial_data,
       /*flags=*/0, out_buffer);
 }
 
diff --git a/iree/modules/check/check_test.cc b/iree/modules/check/check_test.cc
index 5bcd6a9..142dd6f 100644
--- a/iree/modules/check/check_test.cc
+++ b/iree/modules/check/check_test.cc
@@ -80,18 +80,13 @@
       num_elements *= dim;
     }
     ASSERT_EQ(contents.size(), num_elements);
-    vm::ref<iree_hal_buffer_t> buffer;
-    IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
-        allocator_,
-        static_cast<iree_hal_memory_type_t>(
-            IREE_HAL_MEMORY_TYPE_HOST_LOCAL |
-            IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE),
-        IREE_HAL_BUFFER_USAGE_ALL, contents.size() * sizeof(int32_t), &buffer));
-    IREE_ASSERT_OK(iree_hal_buffer_write_data(
-        buffer.get(), 0, contents.data(), contents.size() * sizeof(int32_t)));
-    IREE_ASSERT_OK(iree_hal_buffer_view_create(
-        buffer.get(), shape.data(), shape.size(), IREE_HAL_ELEMENT_TYPE_INT_32,
-        IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, iree_allocator_system(),
+    IREE_ASSERT_OK(iree_hal_buffer_view_allocate_buffer(
+        allocator_, shape.data(), shape.size(), IREE_HAL_ELEMENT_TYPE_INT_32,
+        IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
+        IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
+        IREE_HAL_BUFFER_USAGE_ALL,
+        iree_make_const_byte_span(contents.data(),
+                                  contents.size() * sizeof(int32_t)),
         &*out_buffer_view));
   }
 
@@ -103,20 +98,14 @@
       num_elements *= dim;
     }
     ASSERT_EQ(contents.size(), num_elements);
-    vm::ref<iree_hal_buffer_t> buffer;
-    IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
-        allocator_,
-        static_cast<iree_hal_memory_type_t>(
-            IREE_HAL_MEMORY_TYPE_HOST_LOCAL |
-            IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE),
-        IREE_HAL_BUFFER_USAGE_ALL, contents.size() * sizeof(uint16_t),
-        &buffer));
-    IREE_ASSERT_OK(iree_hal_buffer_write_data(
-        buffer.get(), 0, contents.data(), contents.size() * sizeof(uint16_t)));
-    IREE_ASSERT_OK(iree_hal_buffer_view_create(
-        buffer.get(), shape.data(), shape.size(),
-        IREE_HAL_ELEMENT_TYPE_FLOAT_16, IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
-        iree_allocator_system(), &*out_buffer_view));
+    IREE_ASSERT_OK(iree_hal_buffer_view_allocate_buffer(
+        allocator_, shape.data(), shape.size(), IREE_HAL_ELEMENT_TYPE_FLOAT_16,
+        IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
+        IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
+        IREE_HAL_BUFFER_USAGE_ALL,
+        iree_make_const_byte_span(contents.data(),
+                                  contents.size() * sizeof(uint16_t)),
+        &*out_buffer_view));
   }
 
   void CreateFloat32BufferView(iree::span<const float> contents,
@@ -127,19 +116,14 @@
       num_elements *= dim;
     }
     ASSERT_EQ(contents.size(), num_elements);
-    vm::ref<iree_hal_buffer_t> buffer;
-    IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
-        allocator_,
-        static_cast<iree_hal_memory_type_t>(
-            IREE_HAL_MEMORY_TYPE_HOST_LOCAL |
-            IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE),
-        IREE_HAL_BUFFER_USAGE_ALL, contents.size() * sizeof(float), &buffer));
-    IREE_ASSERT_OK(iree_hal_buffer_write_data(buffer.get(), 0, contents.data(),
-                                              contents.size() * sizeof(float)));
-    IREE_ASSERT_OK(iree_hal_buffer_view_create(
-        buffer.get(), shape.data(), shape.size(),
-        IREE_HAL_ELEMENT_TYPE_FLOAT_32, IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
-        iree_allocator_system(), &*out_buffer_view));
+    IREE_ASSERT_OK(iree_hal_buffer_view_allocate_buffer(
+        allocator_, shape.data(), shape.size(), IREE_HAL_ELEMENT_TYPE_FLOAT_32,
+        IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
+        IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
+        IREE_HAL_BUFFER_USAGE_ALL,
+        iree_make_const_byte_span(contents.data(),
+                                  contents.size() * sizeof(float)),
+        &*out_buffer_view));
   }
 
   void CreateFloat64BufferView(iree::span<const double> contents,
@@ -150,19 +134,14 @@
       num_elements *= dim;
     }
     ASSERT_EQ(contents.size(), num_elements);
-    vm::ref<iree_hal_buffer_t> buffer;
-    IREE_ASSERT_OK(iree_hal_allocator_allocate_buffer(
-        allocator_,
-        static_cast<iree_hal_memory_type_t>(
-            IREE_HAL_MEMORY_TYPE_HOST_LOCAL |
-            IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE),
-        IREE_HAL_BUFFER_USAGE_ALL, contents.size() * sizeof(double), &buffer));
-    IREE_ASSERT_OK(iree_hal_buffer_write_data(
-        buffer.get(), 0, contents.data(), contents.size() * sizeof(double)));
-    IREE_ASSERT_OK(iree_hal_buffer_view_create(
-        buffer.get(), shape.data(), shape.size(),
-        IREE_HAL_ELEMENT_TYPE_FLOAT_64, IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
-        iree_allocator_system(), &*out_buffer_view));
+    IREE_ASSERT_OK(iree_hal_buffer_view_allocate_buffer(
+        allocator_, shape.data(), shape.size(), IREE_HAL_ELEMENT_TYPE_FLOAT_64,
+        IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
+        IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
+        IREE_HAL_BUFFER_USAGE_ALL,
+        iree_make_const_byte_span(contents.data(),
+                                  contents.size() * sizeof(double)),
+        &*out_buffer_view));
   }
 
   iree_status_t Invoke(const char* function_name) {
diff --git a/iree/modules/hal/module.c b/iree/modules/hal/module.c
index 4a4d6f3..0a6f449 100644
--- a/iree/modules/hal/module.c
+++ b/iree/modules/hal/module.c
@@ -284,7 +284,8 @@
 
   iree_hal_buffer_t* buffer = NULL;
   IREE_RETURN_IF_ERROR(iree_hal_allocator_allocate_buffer(
-      allocator, memory_types, buffer_usage, allocation_size, &buffer));
+      allocator, memory_types, buffer_usage, allocation_size,
+      iree_const_byte_span_empty(), &buffer));
   rets->r0 = iree_hal_buffer_move_ref(buffer);
   return iree_ok_status();
 }
@@ -403,18 +404,14 @@
 
   iree_hal_buffer_t* buffer = NULL;
   IREE_RETURN_IF_ERROR(
-      iree_hal_allocator_allocate_buffer(allocator, memory_types, buffer_usage,
-                                         length, &buffer),
+      iree_hal_allocator_allocate_buffer(
+          allocator, memory_types, buffer_usage, length,
+          iree_make_const_byte_span(source->data.data + offset, length),
+          &buffer),
       "failed to allocate buffer of length %d", length);
 
-  iree_status_t status =
-      iree_hal_buffer_write_data(buffer, 0, source->data.data + offset, length);
-  if (iree_status_is_ok(status)) {
-    rets->r0 = iree_hal_buffer_move_ref(buffer);
-  } else {
-    iree_hal_buffer_release(buffer);
-  }
-  return status;
+  rets->r0 = iree_hal_buffer_move_ref(buffer);
+  return iree_ok_status();
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/iree/samples/dynamic_shapes/main.c b/iree/samples/dynamic_shapes/main.c
index f82737c..4f2594a 100644
--- a/iree/samples/dynamic_shapes/main.c
+++ b/iree/samples/dynamic_shapes/main.c
@@ -21,7 +21,7 @@
   //   * debugging some apparent memory corruption with the stack-local value
   iree_status_t status = iree_ok_status();
   if (iree_status_is_ok(status)) {
-    status = iree_hal_buffer_view_clone_heap_buffer(
+    status = iree_hal_buffer_view_allocate_buffer(
         iree_runtime_session_device_allocator(session), arg0_shape,
         IREE_ARRAYSIZE(arg0_shape), IREE_HAL_ELEMENT_TYPE_SINT_32,
         IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
@@ -43,16 +43,10 @@
     status =
         iree_runtime_call_outputs_pop_front_buffer_view(&call, &buffer_view);
   }
-  iree_hal_buffer_mapping_t buffer_mapping;
   if (iree_status_is_ok(status)) {
-    status = iree_hal_buffer_map_range(iree_hal_buffer_view_buffer(buffer_view),
-                                       IREE_HAL_MEMORY_ACCESS_READ, 0,
-                                       IREE_WHOLE_BUFFER, &buffer_mapping);
+    status = iree_hal_buffer_read_data(iree_hal_buffer_view_buffer(buffer_view),
+                                       0, out_result, sizeof(*out_result));
   }
-  if (iree_status_is_ok(status)) {
-    *out_result = *buffer_mapping.contents.data;
-  }
-  iree_hal_buffer_unmap_range(&buffer_mapping);
   iree_hal_buffer_view_release(buffer_view);
 
   iree_runtime_call_deinitialize(&call);
@@ -73,7 +67,7 @@
   //   * debugging some apparent memory corruption with the stack-local value
   iree_status_t status = iree_ok_status();
   if (iree_status_is_ok(status)) {
-    status = iree_hal_buffer_view_clone_heap_buffer(
+    status = iree_hal_buffer_view_allocate_buffer(
         iree_runtime_session_device_allocator(session), arg0_shape,
         IREE_ARRAYSIZE(arg0_shape), IREE_HAL_ELEMENT_TYPE_SINT_32,
         IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
@@ -113,7 +107,7 @@
   //   * debugging some apparent memory corruption with the stack-local value
   iree_status_t status = iree_ok_status();
   if (iree_status_is_ok(status)) {
-    status = iree_hal_buffer_view_clone_heap_buffer(
+    status = iree_hal_buffer_view_allocate_buffer(
         iree_runtime_session_device_allocator(session), arg0_shape,
         IREE_ARRAYSIZE(arg0_shape), IREE_HAL_ELEMENT_TYPE_SINT_32,
         IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
diff --git a/iree/samples/simple_embedding/simple_embedding.c b/iree/samples/simple_embedding/simple_embedding.c
index db9f2b6..30a4566 100644
--- a/iree/samples/simple_embedding/simple_embedding.c
+++ b/iree/samples/simple_embedding/simple_embedding.c
@@ -66,42 +66,30 @@
   IREE_RETURN_IF_ERROR(iree_vm_context_resolve_function(
       context, iree_make_cstring_view(kMainFunctionName), &main_function));
 
-  // Allocate buffers that can be mapped on the CPU and that can also be used
-  // on the device. Not all devices support this, but the ones we have now do.
-  const int kElementCount = 4;
-  iree_hal_buffer_t* arg0_buffer = NULL;
-  iree_hal_buffer_t* arg1_buffer = NULL;
-  iree_hal_memory_type_t input_memory_type =
-      IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE;
-  IREE_RETURN_IF_ERROR(iree_hal_allocator_allocate_buffer(
-      iree_hal_device_allocator(device), input_memory_type,
-      IREE_HAL_BUFFER_USAGE_ALL, sizeof(float) * kElementCount, &arg0_buffer));
-  IREE_RETURN_IF_ERROR(iree_hal_allocator_allocate_buffer(
-      iree_hal_device_allocator(device), input_memory_type,
-      IREE_HAL_BUFFER_USAGE_ALL, sizeof(float) * kElementCount, &arg1_buffer));
+  // Initial buffer contents for 4 * 2 = 8.
+  const float kFloat4[] = {4.0f, 4.0f, 4.0f, 4.0f};
+  const float kFloat2[] = {2.0f, 2.0f, 2.0f, .0f};
+  const int kElementCount = IREE_ARRAYSIZE(kFloat4);
 
-  // Populate initial values for 4 * 2 = 8.
-  const float kFloat4 = 4.0f;
-  const float kFloat2 = 2.0f;
-  IREE_RETURN_IF_ERROR(iree_hal_buffer_fill(arg0_buffer, 0, IREE_WHOLE_BUFFER,
-                                            &kFloat4, sizeof(float)));
-  IREE_RETURN_IF_ERROR(iree_hal_buffer_fill(arg1_buffer, 0, IREE_WHOLE_BUFFER,
-                                            &kFloat2, sizeof(float)));
-
-  // Wrap buffers in shaped buffer views.
+  // Allocate buffers in device-local memory so that if the device has an
+  // independent address space they live on the fast side of the fence.
   iree_hal_dim_t shape[1] = {kElementCount};
   iree_hal_buffer_view_t* arg0_buffer_view = NULL;
   iree_hal_buffer_view_t* arg1_buffer_view = NULL;
-  IREE_RETURN_IF_ERROR(iree_hal_buffer_view_create(
-      arg0_buffer, shape, IREE_ARRAYSIZE(shape), IREE_HAL_ELEMENT_TYPE_FLOAT_32,
-      IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, iree_allocator_system(),
-      &arg0_buffer_view));
-  IREE_RETURN_IF_ERROR(iree_hal_buffer_view_create(
-      arg1_buffer, shape, IREE_ARRAYSIZE(shape), IREE_HAL_ELEMENT_TYPE_FLOAT_32,
-      IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, iree_allocator_system(),
-      &arg1_buffer_view));
-  iree_hal_buffer_release(arg0_buffer);
-  iree_hal_buffer_release(arg1_buffer);
+  IREE_RETURN_IF_ERROR(iree_hal_buffer_view_allocate_buffer(
+      iree_hal_device_allocator(device), shape, IREE_ARRAYSIZE(shape),
+      IREE_HAL_ELEMENT_TYPE_FLOAT_32, IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
+      IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL | IREE_HAL_MEMORY_TYPE_HOST_VISIBLE,
+      IREE_HAL_BUFFER_USAGE_DISPATCH | IREE_HAL_BUFFER_USAGE_TRANSFER |
+          IREE_HAL_BUFFER_USAGE_MAPPING,
+      iree_make_const_byte_span(kFloat4, sizeof(kFloat4)), &arg0_buffer_view));
+  IREE_RETURN_IF_ERROR(iree_hal_buffer_view_allocate_buffer(
+      iree_hal_device_allocator(device), shape, IREE_ARRAYSIZE(shape),
+      IREE_HAL_ELEMENT_TYPE_FLOAT_32, IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
+      IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL | IREE_HAL_MEMORY_TYPE_HOST_VISIBLE,
+      IREE_HAL_BUFFER_USAGE_DISPATCH | IREE_HAL_BUFFER_USAGE_TRANSFER |
+          IREE_HAL_BUFFER_USAGE_MAPPING,
+      iree_make_const_byte_span(kFloat2, sizeof(kFloat2)), &arg1_buffer_view));
 
   // Setup call inputs with our buffers.
   iree_vm_list_t* inputs = NULL;
diff --git a/iree/samples/static_library/static_library_demo.c b/iree/samples/static_library/static_library_demo.c
index 637d01b..2529281 100644
--- a/iree/samples/static_library/static_library_demo.c
+++ b/iree/samples/static_library/static_library_demo.c
@@ -127,7 +127,7 @@
   iree_hal_memory_type_t input_memory_type =
       IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE;
   if (iree_status_is_ok(status)) {
-    status = iree_hal_buffer_view_clone_heap_buffer(
+    status = iree_hal_buffer_view_allocate_buffer(
         iree_hal_device_allocator(device), shape, IREE_ARRAYSIZE(shape),
         IREE_HAL_ELEMENT_TYPE_FLOAT_32, IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
         input_memory_type, IREE_HAL_BUFFER_USAGE_ALL,
@@ -136,7 +136,7 @@
         &arg0_buffer_view);
   }
   if (iree_status_is_ok(status)) {
-    status = iree_hal_buffer_view_clone_heap_buffer(
+    status = iree_hal_buffer_view_allocate_buffer(
         iree_hal_device_allocator(device), shape, IREE_ARRAYSIZE(shape),
         IREE_HAL_ELEMENT_TYPE_FLOAT_32, IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
         input_memory_type, IREE_HAL_BUFFER_USAGE_ALL,
diff --git a/iree/samples/variables_and_state/main.c b/iree/samples/variables_and_state/main.c
index 302ff4e..aa08ad0 100644
--- a/iree/samples/variables_and_state/main.c
+++ b/iree/samples/variables_and_state/main.c
@@ -23,16 +23,10 @@
     status =
         iree_runtime_call_outputs_pop_front_buffer_view(&call, &buffer_view);
   }
-  iree_hal_buffer_mapping_t buffer_mapping;
   if (iree_status_is_ok(status)) {
-    status = iree_hal_buffer_map_range(iree_hal_buffer_view_buffer(buffer_view),
-                                       IREE_HAL_MEMORY_ACCESS_READ, 0,
-                                       IREE_WHOLE_BUFFER, &buffer_mapping);
+    status = iree_hal_buffer_read_data(iree_hal_buffer_view_buffer(buffer_view),
+                                       0, out_value, sizeof(*out_value));
   }
-  if (iree_status_is_ok(status)) {
-    *out_value = *buffer_mapping.contents.data;
-  }
-  iree_hal_buffer_unmap_range(&buffer_mapping);
   iree_hal_buffer_view_release(buffer_view);
 
   iree_runtime_call_deinitialize(&call);
@@ -52,7 +46,7 @@
   //   * debugging some apparent memory corruption with the stack-local value
   iree_status_t status = iree_ok_status();
   if (iree_status_is_ok(status)) {
-    status = iree_hal_buffer_view_clone_heap_buffer(
+    status = iree_hal_buffer_view_allocate_buffer(
         iree_runtime_session_device_allocator(session), /*shape=*/NULL,
         /*shape_rank=*/0, IREE_HAL_ELEMENT_TYPE_SINT_32,
         IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
@@ -84,7 +78,7 @@
   //   * debugging some apparent memory corruption with the stack-local value
   iree_status_t status = iree_ok_status();
   if (iree_status_is_ok(status)) {
-    status = iree_hal_buffer_view_clone_heap_buffer(
+    status = iree_hal_buffer_view_allocate_buffer(
         iree_runtime_session_device_allocator(session), /*shape=*/NULL,
         /*shape_rank=*/0, IREE_HAL_ELEMENT_TYPE_SINT_32,
         IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
diff --git a/iree/samples/vulkan/vulkan_inference_gui.cc b/iree/samples/vulkan/vulkan_inference_gui.cc
index 4939b93..bacf56f 100644
--- a/iree/samples/vulkan/vulkan_inference_gui.cc
+++ b/iree/samples/vulkan/vulkan_inference_gui.cc
@@ -71,8 +71,9 @@
   }
 
   // Setup window
-  SDL_WindowFlags window_flags = (SDL_WindowFlags)(
-      SDL_WINDOW_VULKAN | SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI);
+  SDL_WindowFlags window_flags =
+      (SDL_WindowFlags)(SDL_WINDOW_VULKAN | SDL_WINDOW_RESIZABLE |
+                        SDL_WINDOW_ALLOW_HIGHDPI);
   SDL_Window* window = SDL_CreateWindow(
       "IREE Samples - Vulkan Inference GUI", SDL_WINDOWPOS_CENTERED,
       SDL_WINDOWPOS_CENTERED, 1280, 720, window_flags);
@@ -363,8 +364,6 @@
         constexpr int32_t kElementCount = 4;
         iree_hal_allocator_t* allocator =
             iree_hal_device_allocator(iree_vk_device);
-        iree_hal_buffer_t* input0_buffer = nullptr;
-        iree_hal_buffer_t* input1_buffer = nullptr;
         iree_hal_memory_type_t input_memory_type =
             static_cast<iree_hal_memory_type_t>(
                 IREE_HAL_MEMORY_TYPE_HOST_LOCAL |
@@ -372,33 +371,25 @@
         iree_hal_buffer_usage_t input_buffer_usage =
             static_cast<iree_hal_buffer_usage_t>(
                 IREE_HAL_BUFFER_USAGE_ALL | IREE_HAL_BUFFER_USAGE_CONSTANT);
-        IREE_CHECK_OK(iree_hal_allocator_allocate_buffer(
-            allocator, input_memory_type, input_buffer_usage,
-            sizeof(float) * kElementCount, &input0_buffer));
-        IREE_CHECK_OK(iree_hal_allocator_allocate_buffer(
-            allocator, input_memory_type, input_buffer_usage,
-            sizeof(float) * kElementCount, &input1_buffer));
-        IREE_CHECK_OK(iree_hal_buffer_write_data(input0_buffer, 0, &input_x,
-                                                 sizeof(input_x)));
-        IREE_CHECK_OK(iree_hal_buffer_write_data(input1_buffer, 0, &input_y,
-                                                 sizeof(input_y)));
         // Wrap input buffers in buffer views.
         iree_hal_buffer_view_t* input0_buffer_view = nullptr;
         iree_hal_buffer_view_t* input1_buffer_view = nullptr;
-        IREE_CHECK_OK(iree_hal_buffer_view_create(
-            input0_buffer,
+        IREE_CHECK_OK(iree_hal_buffer_view_allocate_buffer(
+            allocator,
             /*shape=*/&kElementCount, /*shape_rank=*/1,
             IREE_HAL_ELEMENT_TYPE_FLOAT_32,
-            IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, iree_allocator_system(),
-            &input0_buffer_view));
-        IREE_CHECK_OK(iree_hal_buffer_view_create(
-            input1_buffer,
+            IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, input_memory_type,
+            input_buffer_usage,
+            iree_make_const_byte_span(&input_x, sizeof(input_x)),
+            iree_allocator_system(), &input0_buffer_view));
+        IREE_CHECK_OK(iree_hal_buffer_view_allocate_buffer(
+            allocator,
             /*shape=*/&kElementCount, /*shape_rank=*/1,
             IREE_HAL_ELEMENT_TYPE_FLOAT_32,
-            IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, iree_allocator_system(),
-            &input1_buffer_view));
-        iree_hal_buffer_release(input0_buffer);
-        iree_hal_buffer_release(input1_buffer);
+            IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, input_memory_type,
+            input_buffer_usage,
+            iree_make_const_byte_span(&input_y, sizeof(input_y)),
+            iree_allocator_system(), &input1_buffer_view));
         // Marshal inputs through a VM variant list.
         // [wait_semaphore|wait_value|arg0|arg1|signal_semaphore|signal_value]
         vm::ref<iree_vm_list_t> inputs;
diff --git a/iree/tools/iree-e2e-matmul-test.c b/iree/tools/iree-e2e-matmul-test.c
index 4d622a0..8208331 100644
--- a/iree/tools/iree-e2e-matmul-test.c
+++ b/iree/tools/iree-e2e-matmul-test.c
@@ -478,13 +478,15 @@
       iree_hal_buffer_view_element_type(src),
       iree_hal_buffer_view_encoding_type(src),
       IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
-      IREE_HAL_BUFFER_USAGE_ALL, dst);
+      IREE_HAL_BUFFER_USAGE_ALL, iree_const_byte_span_empty(), dst);
 }
 
 // Performs a deep copy of |src| into |dst|. Takes care of allocating |dst|.
 static iree_status_t copy_buffer(iree_hal_allocator_t* hal_allocator,
                                  iree_hal_buffer_view_t* src,
                                  iree_hal_buffer_view_t** dst) {
+  // TODO(benvanik): change this to use iree_hal_buffer_copy_data. Or something.
+  // I can't understand what all this code is doing.
   iree_hal_buffer_mapping_t src_mapping;
   IREE_RETURN_IF_ERROR(iree_hal_buffer_map_range(
       iree_hal_buffer_view_buffer(src), IREE_HAL_MEMORY_ACCESS_READ, 0,
@@ -492,7 +494,7 @@
   iree_const_byte_span_t src_span;
   src_span.data = src_mapping.contents.data;
   src_span.data_length = src_mapping.contents.data_length;
-  return iree_hal_buffer_view_clone_heap_buffer(
+  return iree_hal_buffer_view_allocate_buffer(
       hal_allocator, iree_hal_buffer_view_shape_dims(src),
       iree_hal_buffer_view_shape_rank(src),
       iree_hal_buffer_view_element_type(src),
diff --git a/iree/tools/utils/image_util.c b/iree/tools/utils/image_util.c
index 61e1486..fbd3340 100644
--- a/iree/tools/utils/image_util.c
+++ b/iree/tools/utils/image_util.c
@@ -174,7 +174,8 @@
     result = iree_hal_allocator_allocate_buffer(
         allocator,
         IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
-        IREE_HAL_BUFFER_USAGE_ALL, element_byte * buffer_length, &buffer);
+        IREE_HAL_BUFFER_USAGE_ALL, element_byte * buffer_length,
+        iree_const_byte_span_empty(), &buffer);
   }
   if (iree_status_is_ok(result)) {
     result = iree_hal_buffer_map_range(