Adding Vulkan sparse binding buffer support for native allocations. (#14536)

This creates one logical VkBuffer that is backed by as many aligned
max-size allocations as required. There's a lot we could tweak here and
a lot to optimize but the initial proof of concept here is specifically
for allowing large constant/variable buffers with long lifetimes. Most
implementations don't allow using these buffers with dispatches, though,
due to embarrassingly and arbitrarily small limits on shader storage
buffer access ranges. We'll need device pointers to actually use these
but at least we can allocate them now.

Future changes will add asynchronous binding and sparse residency as
part of the HAL API so that targets supporting constrained virtual
memory management (CPU, CUDA, Vulkan, etc) can have such
virtual/physical remapping exposed for use by the compiler. When that's
implemented the sparse buffer type here will be reworked as a shared
utility implementation using the binding/sparse residency APIs.

In order for this to be used for large constants host allocation
importing was implemented so that the buffers can be transferred. This
required a change in the HAL APIs exposed to the compiler as what was
there was a hack to approximate the proper import/mapping path but
insufficient for doing it properly. This has been tested with imports of
up to 15GB (and should work beyond that, device memory allowing).

On discrete systems when the module is mmapped we can't import and stage
in chunks:

![image](https://github.com/openxla/iree/assets/75337/951568e9-5cdb-4a2a-95c1-05a8d371066c)

If not mmapped we can import the host pointer as a staging source and
avoid the chunk allocation:

![image](https://github.com/openxla/iree/assets/75337/2d87982e-e98f-4e4c-a3d0-e226f72717a6)

On unified memory systems we can (sometimes) directly use the host
buffer and avoid all allocations:

![image](https://github.com/openxla/iree/assets/75337/3eb51285-3270-4b7a-a88c-240ca4312287)

Progress on #14607.
Fixes #7242.
diff --git a/runtime/src/iree/hal/drivers/vulkan/BUILD.bazel b/runtime/src/iree/hal/drivers/vulkan/BUILD.bazel
index 280e432..1d2d8d4 100644
--- a/runtime/src/iree/hal/drivers/vulkan/BUILD.bazel
+++ b/runtime/src/iree/hal/drivers/vulkan/BUILD.bazel
@@ -50,6 +50,8 @@
         "native_semaphore.h",
         "nop_executable_cache.cc",
         "nop_executable_cache.h",
+        "sparse_buffer.cc",
+        "sparse_buffer.h",
         "status_util.c",
         "status_util.h",
         "tracing.cc",
diff --git a/runtime/src/iree/hal/drivers/vulkan/CMakeLists.txt b/runtime/src/iree/hal/drivers/vulkan/CMakeLists.txt
index 86304d5..acdf0f3 100644
--- a/runtime/src/iree/hal/drivers/vulkan/CMakeLists.txt
+++ b/runtime/src/iree/hal/drivers/vulkan/CMakeLists.txt
@@ -51,6 +51,8 @@
     "native_semaphore.h"
     "nop_executable_cache.cc"
     "nop_executable_cache.h"
+    "sparse_buffer.cc"
+    "sparse_buffer.h"
     "status_util.c"
     "status_util.h"
     "tracing.cc"
diff --git a/runtime/src/iree/hal/drivers/vulkan/api.h b/runtime/src/iree/hal/drivers/vulkan/api.h
index 3c8a919..4f775f1 100644
--- a/runtime/src/iree/hal/drivers/vulkan/api.h
+++ b/runtime/src/iree/hal/drivers/vulkan/api.h
@@ -81,6 +81,15 @@
   // NOTE: This affects the pipeline state and in turn may change the code
   // generated by the Vulkan device compiler.
   IREE_HAL_VULKAN_FEATURE_ENABLE_ROBUST_BUFFER_ACCESS = 1u << 3,
+
+  // Enables `sparseBinding` to allow for buffers larger than the maximum
+  // allocation size of the device.
+  IREE_HAL_VULKAN_FEATURE_ENABLE_SPARSE_BINDING = 1u << 4,
+
+  // Enables `sparseBinding`/`sparseResidencyBuffer`/`sparseResidencyAliased`
+  // to allow for queue-ordered virtual memory management.
+  IREE_HAL_VULKAN_FEATURE_ENABLE_SPARSE_RESIDENCY_ALIASED =
+      IREE_HAL_VULKAN_FEATURE_ENABLE_SPARSE_BINDING | (1u << 5),
 };
 typedef uint32_t iree_hal_vulkan_features_t;
 
diff --git a/runtime/src/iree/hal/drivers/vulkan/base_buffer.c b/runtime/src/iree/hal/drivers/vulkan/base_buffer.c
index cc292f8..7ed6986 100644
--- a/runtime/src/iree/hal/drivers/vulkan/base_buffer.c
+++ b/runtime/src/iree/hal/drivers/vulkan/base_buffer.c
@@ -40,13 +40,24 @@
     const VkPhysicalDeviceProperties* device_props,
     const VkPhysicalDeviceMemoryProperties* memory_props,
     const iree_hal_buffer_params_t* IREE_RESTRICT params,
-    uint32_t* out_memory_type_index) {
+    uint32_t allowed_type_indices, uint32_t* out_memory_type_index) {
   *out_memory_type_index = 0;
 
+  iree_hal_memory_type_t requested_type = params->type;
+  if (device_props->deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU) {
+    // Integrated GPUs have tiny device local heaps commonly used for
+    // framebuffers and other bounded resources. We don't currently try to use
+    // them but could for very small transients.
+    if (iree_all_bits_set(requested_type, IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL)) {
+      requested_type &= ~IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL;
+      requested_type |= IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE;
+    }
+  }
+
   VkMemoryPropertyFlags require_flags = 0;
   VkMemoryPropertyFlags prefer_flags = 0;
-  if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL)) {
-    if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_HOST_VISIBLE)) {
+  if (iree_all_bits_set(requested_type, IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL)) {
+    if (iree_all_bits_set(requested_type, IREE_HAL_MEMORY_TYPE_HOST_VISIBLE)) {
       // Device-local, host-visible.
       require_flags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
       prefer_flags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
@@ -55,7 +66,8 @@
       require_flags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
     }
   } else {
-    if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE)) {
+    if (iree_all_bits_set(requested_type,
+                          IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE)) {
       // Host-local, device-visible.
       require_flags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
     } else {
@@ -63,13 +75,13 @@
       require_flags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
     }
   }
-  if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_HOST_CACHED)) {
+  if (iree_all_bits_set(requested_type, IREE_HAL_MEMORY_TYPE_HOST_CACHED)) {
     require_flags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
   }
-  if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_HOST_COHERENT)) {
+  if (iree_all_bits_set(requested_type, IREE_HAL_MEMORY_TYPE_HOST_COHERENT)) {
     require_flags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
   }
-  if (iree_any_bit_set(params->usage,
+  if (iree_any_bit_set(requested_type,
                        IREE_HAL_BUFFER_USAGE_MAPPING_SCOPED |
                            IREE_HAL_BUFFER_USAGE_MAPPING_PERSISTENT)) {
     require_flags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
@@ -80,7 +92,8 @@
   for (uint32_t i = 0; i < memory_props->memoryTypeCount; ++i) {
     VkMemoryPropertyFlags flags = memory_props->memoryTypes[i].propertyFlags;
     if (!iree_all_bits_set(flags, require_flags) ||
-        !iree_hal_vulkan_is_memory_type_usable(flags)) {
+        !iree_hal_vulkan_is_memory_type_usable(flags) ||
+        !iree_all_bits_set(allowed_type_indices, 1u << i)) {
       // Excluded (required bits missing or memory type is not usable).
       continue;
     }
diff --git a/runtime/src/iree/hal/drivers/vulkan/base_buffer.h b/runtime/src/iree/hal/drivers/vulkan/base_buffer.h
index c9a0d34..ea872b5 100644
--- a/runtime/src/iree/hal/drivers/vulkan/base_buffer.h
+++ b/runtime/src/iree/hal/drivers/vulkan/base_buffer.h
@@ -65,13 +65,14 @@
 } iree_hal_vulkan_memory_types_t;
 
 // Finds the memory type that satisfies the required and preferred buffer
-// |params| and returns it in |out_memory_type_index|. Fails if no memory type
-// satisfies the requirements.
+// |params| and returns it in |out_memory_type_index|. Only memory types present
+// in |allowed_type_indices| will be returned. Fails if no memory type satisfies
+// the requirements.
 iree_status_t iree_hal_vulkan_find_memory_type(
     const VkPhysicalDeviceProperties* device_props,
     const VkPhysicalDeviceMemoryProperties* memory_props,
     const iree_hal_buffer_params_t* IREE_RESTRICT params,
-    uint32_t* out_memory_type_index);
+    uint32_t allowed_type_indices, uint32_t* out_memory_type_index);
 
 // Queries the underlying Vulkan implementation to decide which memory type
 // should be used for particular operations.
diff --git a/runtime/src/iree/hal/drivers/vulkan/dynamic_symbol_tables.h b/runtime/src/iree/hal/drivers/vulkan/dynamic_symbol_tables.h
index 05c1fa3..314e969 100644
--- a/runtime/src/iree/hal/drivers/vulkan/dynamic_symbol_tables.h
+++ b/runtime/src/iree/hal/drivers/vulkan/dynamic_symbol_tables.h
@@ -244,7 +244,7 @@
   DEV_PFN(EXCLUDED, vkGetImageViewHandleNVX)                            \
   DEV_PFN(EXCLUDED, vkGetMemoryFdKHR)                                   \
   DEV_PFN(EXCLUDED, vkGetMemoryFdPropertiesKHR)                         \
-  DEV_PFN(EXCLUDED, vkGetMemoryHostPointerPropertiesEXT)                \
+  DEV_PFN(OPTIONAL, vkGetMemoryHostPointerPropertiesEXT)                \
   DEV_PFN(EXCLUDED, vkGetPastPresentationTimingGOOGLE)                  \
   DEV_PFN(REQUIRED, vkGetPipelineCacheData)                             \
   DEV_PFN(REQUIRED, vkGetQueryPoolResults)                              \
@@ -321,8 +321,8 @@
   INS_PFN(EXCLUDED, vkGetPhysicalDeviceDisplayPlanePropertiesKHR)       \
   INS_PFN(EXCLUDED, vkGetPhysicalDeviceDisplayProperties2KHR)           \
   INS_PFN(EXCLUDED, vkGetPhysicalDeviceDisplayPropertiesKHR)            \
-  INS_PFN(EXCLUDED, vkGetPhysicalDeviceExternalBufferProperties)        \
   INS_PFN(EXCLUDED, vkGetPhysicalDeviceExternalBufferPropertiesKHR)     \
+  INS_PFN(REQUIRED, vkGetPhysicalDeviceExternalBufferProperties)        \
   INS_PFN(EXCLUDED, vkGetPhysicalDeviceExternalFenceProperties)         \
   INS_PFN(EXCLUDED, vkGetPhysicalDeviceExternalFencePropertiesKHR)      \
   INS_PFN(EXCLUDED, vkGetPhysicalDeviceExternalImageFormatPropertiesNV) \
@@ -362,7 +362,7 @@
   INS_PFN(EXCLUDED, vkReleaseDisplayEXT)                                \
   DEV_PFN(EXCLUDED, vkGetQueueCheckpointDataNV)                         \
   DEV_PFN(OPTIONAL, vkQueueBeginDebugUtilsLabelEXT)                     \
-  DEV_PFN(EXCLUDED, vkQueueBindSparse)                                  \
+  DEV_PFN(OPTIONAL, vkQueueBindSparse)                                  \
   DEV_PFN(OPTIONAL, vkQueueEndDebugUtilsLabelEXT)                       \
   DEV_PFN(OPTIONAL, vkQueueInsertDebugUtilsLabelEXT)                    \
   DEV_PFN(EXCLUDED, vkQueuePresentKHR)                                  \
diff --git a/runtime/src/iree/hal/drivers/vulkan/extensibility_util.cc b/runtime/src/iree/hal/drivers/vulkan/extensibility_util.cc
index 3f6729f..12570e4 100644
--- a/runtime/src/iree/hal/drivers/vulkan/extensibility_util.cc
+++ b/runtime/src/iree/hal/drivers/vulkan/extensibility_util.cc
@@ -210,6 +210,9 @@
     } else if (strcmp(extension_name,
                       VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME) == 0) {
       extensions.subgroup_size_control = true;
+    } else if (strcmp(extension_name,
+                      VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME) == 0) {
+      extensions.external_memory_host = true;
     }
   }
   return extensions;
diff --git a/runtime/src/iree/hal/drivers/vulkan/extensibility_util.h b/runtime/src/iree/hal/drivers/vulkan/extensibility_util.h
index 0f4e1cd..bf049e7 100644
--- a/runtime/src/iree/hal/drivers/vulkan/extensibility_util.h
+++ b/runtime/src/iree/hal/drivers/vulkan/extensibility_util.h
@@ -82,6 +82,8 @@
   bool calibrated_timestamps : 1;
   // VK_EXT_subgroup_size_control is enabled.
   bool subgroup_size_control : 1;
+  // VK_EXT_external_memory_host is enabled.
+  bool external_memory_host : 1;
 } iree_hal_vulkan_device_extensions_t;
 
 // Returns a bitfield with all of the provided extension names.
diff --git a/runtime/src/iree/hal/drivers/vulkan/handle_util.h b/runtime/src/iree/hal/drivers/vulkan/handle_util.h
index 5ef7772..2988946 100644
--- a/runtime/src/iree/hal/drivers/vulkan/handle_util.h
+++ b/runtime/src/iree/hal/drivers/vulkan/handle_util.h
@@ -39,11 +39,14 @@
 
 class VkDeviceHandle : public RefObject<VkDeviceHandle> {
  public:
-  VkDeviceHandle(DynamicSymbols* syms,
+  VkDeviceHandle(DynamicSymbols* syms, VkPhysicalDevice physical_device,
+                 iree_hal_vulkan_features_t enabled_features,
                  iree_hal_vulkan_device_extensions_t enabled_extensions,
                  bool owns_device, iree_allocator_t host_allocator,
                  const VkAllocationCallbacks* allocator = nullptr)
       : syms_(add_ref(syms)),
+        physical_device_(physical_device),
+        enabled_features_(enabled_features),
         enabled_extensions_(enabled_extensions),
         owns_device_(owns_device),
         allocator_(allocator),
@@ -53,7 +56,10 @@
   VkDeviceHandle(const VkDeviceHandle&) = delete;
   VkDeviceHandle& operator=(const VkDeviceHandle&) = delete;
   VkDeviceHandle(VkDeviceHandle&& other) noexcept
-      : value_(exchange(other.value_, static_cast<VkDevice>(VK_NULL_HANDLE))),
+      : physical_device_(
+            exchange(other.physical_device_,
+                     static_cast<VkPhysicalDevice>(VK_NULL_HANDLE))),
+        value_(exchange(other.value_, static_cast<VkDevice>(VK_NULL_HANDLE))),
         syms_(std::move(other.syms_)),
         enabled_extensions_(other.enabled_extensions_),
         owns_device_(other.owns_device_),
@@ -68,6 +74,9 @@
     value_ = VK_NULL_HANDLE;
   }
 
+  VkPhysicalDevice physical_device() const noexcept { return physical_device_; }
+  operator VkPhysicalDevice() const noexcept { return physical_device_; }
+
   VkDevice value() const noexcept { return value_; }
   VkDevice* mutable_value() noexcept { return &value_; }
   operator VkDevice() const noexcept { return value_; }
@@ -76,13 +85,19 @@
   const VkAllocationCallbacks* allocator() const noexcept { return allocator_; }
   iree_allocator_t host_allocator() const noexcept { return host_allocator_; }
 
+  iree_hal_vulkan_features_t enabled_features() const {
+    return enabled_features_;
+  }
+
   const iree_hal_vulkan_device_extensions_t& enabled_extensions() const {
     return enabled_extensions_;
   }
 
  private:
+  VkPhysicalDevice physical_device_ = VK_NULL_HANDLE;
   VkDevice value_ = VK_NULL_HANDLE;
   ref_ptr<DynamicSymbols> syms_;
+  iree_hal_vulkan_features_t enabled_features_;
   iree_hal_vulkan_device_extensions_t enabled_extensions_;
   bool owns_device_;
   const VkAllocationCallbacks* allocator_ = nullptr;
diff --git a/runtime/src/iree/hal/drivers/vulkan/native_allocator.cc b/runtime/src/iree/hal/drivers/vulkan/native_allocator.cc
index f27db0d..3970dba 100644
--- a/runtime/src/iree/hal/drivers/vulkan/native_allocator.cc
+++ b/runtime/src/iree/hal/drivers/vulkan/native_allocator.cc
@@ -11,6 +11,7 @@
 #include "iree/hal/drivers/vulkan/base_buffer.h"
 #include "iree/hal/drivers/vulkan/dynamic_symbols.h"
 #include "iree/hal/drivers/vulkan/native_buffer.h"
+#include "iree/hal/drivers/vulkan/sparse_buffer.h"
 #include "iree/hal/drivers/vulkan/status_util.h"
 
 using namespace iree::hal::vulkan;
@@ -26,7 +27,9 @@
 
   // Cached from the API to avoid additional queries in hot paths.
   VkPhysicalDeviceProperties device_props;
+  VkPhysicalDeviceVulkan11Properties device_props_11;
   VkPhysicalDeviceMemoryProperties memory_props;
+  VkDeviceSize min_imported_host_pointer_alignment;
 
   // Used to quickly look up the memory type index used for a particular usage.
   iree_hal_vulkan_memory_types_t memory_types;
@@ -69,10 +72,24 @@
   allocator->host_allocator = host_allocator;
 
   const auto& syms = logical_device->syms();
-  syms->vkGetPhysicalDeviceProperties(physical_device,
-                                      &allocator->device_props);
+
+  VkPhysicalDeviceExternalMemoryHostPropertiesEXT external_memory_props;
+  external_memory_props.sType =
+      VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT;
+  external_memory_props.pNext = NULL;
+  allocator->device_props_11.sType =
+      VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES;
+  allocator->device_props_11.pNext = &external_memory_props;
+  VkPhysicalDeviceProperties2 device_props_2;
+  device_props_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+  device_props_2.pNext = &allocator->device_props_11;
+  syms->vkGetPhysicalDeviceProperties2(physical_device, &device_props_2);
+  allocator->device_props = device_props_2.properties;
   syms->vkGetPhysicalDeviceMemoryProperties(physical_device,
                                             &allocator->memory_props);
+  allocator->min_imported_host_pointer_alignment =
+      external_memory_props.minImportedHostPointerAlignment;
+
   iree_status_t status = iree_hal_vulkan_populate_memory_types(
       &allocator->device_props, &allocator->memory_props,
       &allocator->memory_types);
@@ -133,11 +150,26 @@
       &allocator->memory_types, capacity, heaps, out_count);
 }
 
+// Returns true if a buffer with the given parameters and size should use
+// sparse binding to attach segmented device memory to a single buffer.
+static bool iree_hal_vulkan_buffer_needs_sparse_binding(
+    iree_hal_vulkan_native_allocator_t* allocator,
+    const iree_hal_buffer_params_t* IREE_RESTRICT params,
+    iree_device_size_t allocation_size) {
+  if (allocation_size <= allocator->device_props_11.maxMemoryAllocationSize) {
+    return false;  // fits under the normal allocation limit
+  }
+  return true;
+}
+
 static iree_hal_buffer_compatibility_t
 iree_hal_vulkan_native_allocator_query_buffer_compatibility(
     iree_hal_allocator_t* IREE_RESTRICT base_allocator,
     iree_hal_buffer_params_t* IREE_RESTRICT params,
     iree_device_size_t* IREE_RESTRICT allocation_size) {
+  iree_hal_vulkan_native_allocator_t* allocator =
+      iree_hal_vulkan_native_allocator_cast(base_allocator);
+
   // TODO(benvanik): check to ensure the allocator can serve the memory type.
 
   // All buffers can be allocated on the heap.
@@ -168,6 +200,44 @@
   // act safely even on buffer ranges that are not naturally aligned.
   *allocation_size = iree_host_align(*allocation_size, 4);
 
+  // Sparse allocations are used only when required and supported.
+  const bool use_sparse_allocation =
+      iree_hal_vulkan_buffer_needs_sparse_binding(allocator, params,
+                                                  *allocation_size);
+  if (use_sparse_allocation &&
+      iree_all_bits_set(allocator->logical_device->enabled_features(),
+                        IREE_HAL_VULKAN_FEATURE_ENABLE_SPARSE_BINDING)) {
+    // For now we don't allow import/export of sparsely bound buffers. This is
+    // not a strict Vulkan requirement but it does complicate things as we
+    // cannot get a single VkDeviceMemory handle to use in managing the external
+    // buffer.
+    compatibility &= ~IREE_HAL_BUFFER_COMPATIBILITY_IMPORTABLE |
+                     IREE_HAL_BUFFER_COMPATIBILITY_EXPORTABLE;
+    if (iree_any_bit_set(params->usage,
+                         IREE_HAL_BUFFER_USAGE_MAPPING_SCOPED |
+                             IREE_HAL_BUFFER_USAGE_MAPPING_PERSISTENT)) {
+      if (iree_all_bits_set(params->usage,
+                            IREE_HAL_BUFFER_USAGE_MAPPING_OPTIONAL)) {
+        // Mapping was optionally requested and sparse buffers can't be mapped
+        // so we strip the request flags.
+        params->usage &=
+            ~(IREE_HAL_BUFFER_USAGE_MAPPING_SCOPED |
+              IREE_HAL_BUFFER_USAGE_MAPPING_PERSISTENT |
+              IREE_HAL_BUFFER_USAGE_MAPPING_OPTIONAL |
+              IREE_HAL_BUFFER_USAGE_MAPPING_ACCESS_RANDOM |
+              IREE_HAL_BUFFER_USAGE_MAPPING_ACCESS_SEQUENTIAL_WRITE);
+      } else {
+        // Mapping required but cannot be serviced with sparse bindings.
+        compatibility = IREE_HAL_BUFFER_COMPATIBILITY_NONE;
+      }
+    }
+  } else if (*allocation_size >=
+             allocator->device_props_11.maxMemoryAllocationSize) {
+    // Cannot allocate buffers larger than the max allowed without sparse
+    // binding.
+    compatibility = IREE_HAL_BUFFER_COMPATIBILITY_NONE;
+  }
+
   return compatibility;
 }
 
@@ -181,6 +251,190 @@
                                        logical_device->allocator());
 }
 
+static iree_status_t iree_hal_vulkan_native_allocator_commit_and_wrap(
+    iree_hal_vulkan_native_allocator_t* IREE_RESTRICT allocator,
+    const iree_hal_buffer_params_t* IREE_RESTRICT params,
+    iree_device_size_t allocation_size, bool use_sparse_allocation,
+    VkBuffer handle, iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
+  VkDeviceHandle* logical_device = allocator->logical_device;
+
+  // TODO(benvanik): map queue affinity.
+  VkQueue queue = VK_NULL_HANDLE;
+  logical_device->syms()->vkGetDeviceQueue(*logical_device, 0, 0, &queue);
+
+  // Ask Vulkan what the implementation requires of the allocation(s) for the
+  // buffer. We should in most cases always get the same kind of values but
+  // alignment and valid memory types will differ for dense and sparse buffers.
+  VkMemoryRequirements requirements = {0};
+  logical_device->syms()->vkGetBufferMemoryRequirements(*logical_device, handle,
+                                                        &requirements);
+  uint32_t memory_type_index = 0;
+  IREE_RETURN_IF_ERROR(iree_hal_vulkan_find_memory_type(
+      &allocator->device_props, &allocator->memory_props, params,
+      /*allowed_type_indices=*/requirements.memoryTypeBits,
+      &memory_type_index));
+
+  if (use_sparse_allocation) {
+    // Use sparse allocation for this buffer in order to exceed the maximum
+    // allocation size of the implementation. This is not a very efficient way
+    // to allocate such buffers (synchronously from raw allocations) but this
+    // path is primarily used by large persistent variables and constants.
+    return iree_hal_vulkan_sparse_buffer_create_bound_sync(
+        (iree_hal_allocator_t*)allocator, params->type, params->access,
+        params->usage, allocation_size, /*byte_offset=*/0,
+        /*byte_length=*/allocation_size, logical_device, queue, handle,
+        requirements, memory_type_index,
+        allocator->device_props_11.maxMemoryAllocationSize, out_buffer);
+  }
+
+  // Allocate the device memory we'll attach the buffer to.
+  VkMemoryAllocateInfo allocate_info = {};
+  allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
+  allocate_info.pNext = NULL;
+  allocate_info.allocationSize = requirements.size;
+  allocate_info.memoryTypeIndex = memory_type_index;
+  VkDeviceMemory device_memory = VK_NULL_HANDLE;
+  VK_RETURN_IF_ERROR(logical_device->syms()->vkAllocateMemory(
+                         *logical_device, &allocate_info,
+                         logical_device->allocator(), &device_memory),
+                     "vkAllocateMemory");
+
+  // Wrap the device memory allocation and buffer handle in our own buffer type.
+  iree_hal_vulkan_native_buffer_release_callback_t internal_release_callback = {
+      0};
+  internal_release_callback.fn =
+      iree_hal_vulkan_native_allocator_native_buffer_release;
+  internal_release_callback.user_data = NULL;
+  iree_status_t status = iree_hal_vulkan_native_buffer_wrap(
+      (iree_hal_allocator_t*)allocator, params->type, params->access,
+      params->usage, allocation_size,
+      /*byte_offset=*/0,
+      /*byte_length=*/allocation_size, logical_device, device_memory, handle,
+      internal_release_callback, iree_hal_buffer_release_callback_null(),
+      out_buffer);
+  if (!iree_status_is_ok(status)) {
+    logical_device->syms()->vkFreeMemory(*logical_device, device_memory,
+                                         logical_device->allocator());
+    return status;
+  }
+
+  // Bind the memory to the buffer.
+  if (iree_status_is_ok(status)) {
+    status = VK_RESULT_TO_STATUS(
+        logical_device->syms()->vkBindBufferMemory(
+            *logical_device, handle, device_memory, /*memoryOffset=*/0),
+        "vkBindBufferMemory");
+  }
+
+  return status;
+}
+
+static iree_status_t iree_hal_vulkan_native_allocator_create_buffer(
+    VkDeviceHandle* logical_device,
+    const iree_hal_buffer_params_t* IREE_RESTRICT params,
+    iree_device_size_t allocation_size, bool use_sparse_allocation,
+    bool bind_host_memory, VkBuffer* out_handle) {
+  IREE_ASSERT_ARGUMENT(logical_device);
+  IREE_ASSERT_ARGUMENT(params);
+  *out_handle = VK_NULL_HANDLE;
+
+  // Create an initially unbound buffer handle. The buffer is the logical view
+  // into the physical allocation(s) that are bound to it below.
+  VkBufferCreateInfo buffer_create_info = {};
+  buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
+  buffer_create_info.pNext = NULL;
+  buffer_create_info.flags = 0;
+  buffer_create_info.size = allocation_size;
+  buffer_create_info.usage = 0;
+  if (iree_all_bits_set(params->usage, IREE_HAL_BUFFER_USAGE_TRANSFER_SOURCE)) {
+    buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+  }
+  if (iree_all_bits_set(params->usage, IREE_HAL_BUFFER_USAGE_TRANSFER_TARGET)) {
+    buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+  }
+  if (iree_any_bit_set(params->usage, IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE)) {
+    buffer_create_info.usage |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
+    buffer_create_info.usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+    buffer_create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
+  }
+  if (use_sparse_allocation) {
+    buffer_create_info.flags |= VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
+                                VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT;
+  }
+  buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
+  buffer_create_info.queueFamilyIndexCount = 0;
+  buffer_create_info.pQueueFamilyIndices = NULL;
+
+  // If trying to bind to external memory we need to verify we can create a
+  // buffer that can be bound.
+  if (bind_host_memory) {
+    VkPhysicalDeviceExternalBufferInfo external_info;
+    memset(&external_info, 0, sizeof(external_info));
+    external_info.sType =
+        VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_BUFFER_INFO;
+    external_info.pNext = NULL;
+    external_info.flags = buffer_create_info.flags;
+    external_info.usage = buffer_create_info.usage;
+    external_info.handleType =
+        VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
+    VkExternalBufferProperties external_props;
+    memset(&external_props, 0, sizeof(external_props));
+    external_props.sType = VK_STRUCTURE_TYPE_EXTERNAL_BUFFER_PROPERTIES;
+    logical_device->syms()->vkGetPhysicalDeviceExternalBufferProperties(
+        logical_device->physical_device(), &external_info, &external_props);
+    if (!iree_all_bits_set(
+            external_props.externalMemoryProperties.externalMemoryFeatures,
+            VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT)) {
+#if IREE_STATUS_MODE
+      iree_bitfield_string_temp_t temp0;
+      iree_string_view_t usage_str =
+          iree_hal_buffer_usage_format(params->usage, &temp0);
+      return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+                              "implementation does not support binding "
+                              "imported host memory to buffers for usage=%.*s",
+                              (int)usage_str.size, usage_str.data);
+#else
+      return iree_make_status(IREE_STATUS_UNIMPLEMENTED);
+#endif  // IREE_STATUS_MODE
+    }
+    if (!iree_all_bits_set(
+            external_props.externalMemoryProperties.compatibleHandleTypes,
+            VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT)) {
+#if IREE_STATUS_MODE
+      iree_bitfield_string_temp_t temp0;
+      iree_string_view_t usage_str =
+          iree_hal_buffer_usage_format(params->usage, &temp0);
+      return iree_make_status(
+          IREE_STATUS_UNIMPLEMENTED,
+          "implementation does not support binding external host allocations "
+          "to buffers for usage=%.*s",
+          (int)usage_str.size, usage_str.data);
+#else
+      return iree_make_status(IREE_STATUS_UNIMPLEMENTED);
+#endif  // IREE_STATUS_MODE
+    }
+  }
+
+  VkExternalMemoryBufferCreateInfo external_create_info = {};
+  if (bind_host_memory) {
+    external_create_info.sType =
+        VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO;
+    external_create_info.pNext = NULL;
+    external_create_info.handleTypes =
+        VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
+    buffer_create_info.pNext = &external_create_info;
+  }
+
+  VkBuffer handle = VK_NULL_HANDLE;
+  VK_RETURN_IF_ERROR(logical_device->syms()->vkCreateBuffer(
+                         *logical_device, &buffer_create_info,
+                         logical_device->allocator(), &handle),
+                     "vkCreateBuffer");
+
+  *out_handle = handle;
+  return iree_ok_status();
+}
+
 static iree_status_t iree_hal_vulkan_native_allocator_allocate_internal(
     iree_hal_vulkan_native_allocator_t* IREE_RESTRICT allocator,
     const iree_hal_buffer_params_t* IREE_RESTRICT params,
@@ -191,85 +445,48 @@
   // TODO(benvanik): if on a unified memory system and initial data is present
   // we could set the mapping bit and ensure a much more efficient upload.
 
-  // Allocate the device memory we'll attach the buffer to.
-  VkMemoryAllocateInfo allocate_info = {};
-  allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
-  allocate_info.pNext = NULL;
-  allocate_info.memoryTypeIndex = 0;
-  allocate_info.allocationSize = allocation_size;
-  IREE_RETURN_IF_ERROR(iree_hal_vulkan_find_memory_type(
-      &allocator->device_props, &allocator->memory_props, params,
-      &allocate_info.memoryTypeIndex));
-  VkDeviceMemory device_memory = VK_NULL_HANDLE;
-  VK_RETURN_IF_ERROR(logical_device->syms()->vkAllocateMemory(
-                         *logical_device, &allocate_info,
-                         logical_device->allocator(), &device_memory),
-                     "vkAllocateMemory");
+  // When required and available we allocate buffers using sparse binding.
+  const bool use_sparse_allocation =
+      iree_hal_vulkan_buffer_needs_sparse_binding(allocator, params,
+                                                  allocation_size);
+  if (!iree_all_bits_set(allocator->logical_device->enabled_features(),
+                         IREE_HAL_VULKAN_FEATURE_ENABLE_SPARSE_BINDING)) {
+    return iree_make_status(
+        IREE_STATUS_RESOURCE_EXHAUSTED,
+        "sparse binding support is required for buffers larger than %" PRId64
+        " but is not present or enabled on this device",
+        (int64_t)allocator->device_props_11.maxMemoryAllocationSize);
+  }
 
-  // Create an initially unbound buffer handle.
-  VkBufferCreateInfo buffer_create_info = {};
-  buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
-  buffer_create_info.pNext = NULL;
-  buffer_create_info.flags = 0;
-  buffer_create_info.size = allocation_size;
-  buffer_create_info.usage = 0;
-  if (iree_all_bits_set(params->usage, IREE_HAL_BUFFER_USAGE_TRANSFER)) {
-    buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
-    buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
-  }
-  if (iree_all_bits_set(params->usage,
-                        IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE)) {
-    buffer_create_info.usage |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
-    buffer_create_info.usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
-    buffer_create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
-  }
-  buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
-  buffer_create_info.queueFamilyIndexCount = 0;
-  buffer_create_info.pQueueFamilyIndices = NULL;
+  // Create an initially unbound buffer handle. The buffer is the logical view
+  // into the physical allocation(s) that are bound to it below.
   VkBuffer handle = VK_NULL_HANDLE;
-  iree_status_t status =
-      VK_RESULT_TO_STATUS(logical_device->syms()->vkCreateBuffer(
-                              *logical_device, &buffer_create_info,
-                              logical_device->allocator(), &handle),
-                          "vkCreateBuffer");
+  IREE_RETURN_IF_ERROR(iree_hal_vulkan_native_allocator_create_buffer(
+      logical_device, params, allocation_size, use_sparse_allocation,
+      /*bind_host_memory=*/false, &handle));
 
-  iree_hal_vulkan_native_buffer_release_callback_t release_callback = {0};
-  release_callback.fn = iree_hal_vulkan_native_allocator_native_buffer_release;
-  release_callback.user_data = NULL;
+  // Commit the backing memory for the buffer and wrap it in a HAL buffer type.
+  // If this fails the buffer may still be set and need to be released below.
+  // If the buffer is not created the handle needs to be cleaned up.
   iree_hal_buffer_t* buffer = NULL;
-  if (iree_status_is_ok(status)) {
-    status = iree_hal_vulkan_native_buffer_wrap(
-        (iree_hal_allocator_t*)allocator, params->type, params->access,
-        params->usage, allocation_size,
-        /*byte_offset=*/0,
-        /*byte_length=*/allocation_size, logical_device, device_memory, handle,
-        release_callback, &buffer);
-  }
+  iree_status_t status = iree_hal_vulkan_native_allocator_commit_and_wrap(
+      allocator, params, allocation_size, use_sparse_allocation, handle,
+      &buffer);
   if (!iree_status_is_ok(status)) {
-    // Early exit after cleaning up the buffer and allocation.
-    // After this point releasing the wrapping buffer will take care of this.
-    if (handle) {
+    // Early exit and make sure to destroy the buffer if we didn't get the
+    // chance to wrap it.
+    if (buffer) {
+      iree_hal_buffer_release(buffer);
+    } else if (handle != VK_NULL_HANDLE) {
       logical_device->syms()->vkDestroyBuffer(*logical_device, handle,
                                               logical_device->allocator());
     }
-    if (device_memory) {
-      logical_device->syms()->vkFreeMemory(*logical_device, device_memory,
-                                           logical_device->allocator());
-    }
     return status;
   }
 
   IREE_TRACE_ALLOC_NAMED(IREE_HAL_VULKAN_NATIVE_ALLOCATOR_ID, (void*)handle,
                          allocation_size);
 
-  // Bind the memory to the buffer.
-  if (iree_status_is_ok(status)) {
-    status = VK_RESULT_TO_STATUS(
-        logical_device->syms()->vkBindBufferMemory(
-            *logical_device, handle, device_memory, /*memoryOffset=*/0),
-        "vkBindBufferMemory");
-  }
-
   if (iree_status_is_ok(status)) {
     iree_hal_allocator_statistics_record_alloc(
         &allocator->statistics, params->type, buffer->allocation_size);
@@ -315,15 +532,278 @@
   iree_hal_buffer_destroy(base_buffer);
 }
 
+static void iree_hal_vulkan_native_allocator_external_host_buffer_release(
+    void* user_data, iree::hal::vulkan::VkDeviceHandle* logical_device,
+    VkDeviceMemory device_memory, VkBuffer handle) {
+  if (handle) {
+    logical_device->syms()->vkDestroyBuffer(*logical_device, handle,
+                                            logical_device->allocator());
+  }
+  if (device_memory) {
+    logical_device->syms()->vkFreeMemory(*logical_device, device_memory,
+                                         logical_device->allocator());
+  }
+}
+
+// Aligns |host_ptr| down to the nearest minimum alignment that satisfies the
+// device and buffer requirements. Adjusts |size| up if required to cover at
+// least the originally defined range. Returns the offset into the new
+// |host_ptr| where the original pointer started.
+static VkDeviceSize iree_hal_vulkan_native_allocator_align_external_ptr(
+    iree_hal_vulkan_native_allocator_t* allocator,
+    const VkMemoryRequirements* requirements, void** host_ptr,
+    VkDeviceSize* size) {
+  VkDeviceSize desired_alignment = (VkDeviceSize)iree_device_size_lcm(
+      (iree_device_size_t)requirements->alignment,
+      (iree_device_size_t)allocator->min_imported_host_pointer_alignment);
+  VkDeviceSize unaligned_addr = *((VkDeviceSize*)host_ptr);
+  VkDeviceSize aligned_addr =
+      (unaligned_addr / desired_alignment) * desired_alignment;
+  IREE_ASSERT(unaligned_addr >= aligned_addr);
+  VkDeviceSize memory_offset = unaligned_addr - aligned_addr;
+  *host_ptr = (void*)aligned_addr;
+
+  VkDeviceSize unaligned_size = *size;
+  VkDeviceSize unaligned_end = unaligned_addr + unaligned_size;
+  IREE_ASSERT(unaligned_end >= aligned_addr);
+  VkDeviceSize aligned_end =
+      ((unaligned_end + desired_alignment - 1) / desired_alignment) *
+      desired_alignment;
+  IREE_ASSERT(aligned_end >= unaligned_end);
+  VkDeviceSize aligned_size = aligned_end - aligned_addr;
+  IREE_ASSERT(aligned_size >= unaligned_size);
+  IREE_ASSERT(aligned_size % desired_alignment == 0);
+  *size = aligned_size;
+
+  return memory_offset;
+}
+
+static iree_status_t iree_hal_vulkan_native_allocator_import_host_buffer(
+    iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+    const iree_hal_buffer_params_t* IREE_RESTRICT params,
+    iree_hal_external_buffer_t* IREE_RESTRICT external_buffer,
+    iree_hal_buffer_release_callback_t release_callback,
+    iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
+  iree_hal_vulkan_native_allocator_t* allocator =
+      iree_hal_vulkan_native_allocator_cast(base_allocator);
+  VkDeviceHandle* logical_device = allocator->logical_device;
+
+  // Extension must be present, though note that the presence of the extension
+  // does not imply that the particular pointer passed can actually be used.
+  if (!logical_device->enabled_extensions().external_memory_host) {
+    return iree_make_status(
+        IREE_STATUS_UNIMPLEMENTED,
+        "external host memory import is not supported on this device");
+  }
+
+  // Query the properties of the pointer to see what memory types it can be
+  // imported with. This can be very expensive as on some platforms it does
+  // a linear scan of the virtual address range to ensure all pages have the
+  // same properties.
+  IREE_TRACE_ZONE_BEGIN_NAMED(z_a, "vkGetMemoryHostPointerPropertiesEXT");
+  VkMemoryHostPointerPropertiesEXT props = {};
+  props.sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT;
+  props.pNext = NULL;
+  iree_status_t status = VK_RESULT_TO_STATUS(
+      logical_device->syms()->vkGetMemoryHostPointerPropertiesEXT(
+          *logical_device,
+          VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+          external_buffer->handle.host_allocation.ptr, &props),
+      "vkGetMemoryHostPointerPropertiesEXT");
+  IREE_TRACE_ZONE_END(z_a);
+  IREE_RETURN_IF_ERROR(status);
+
+  // TODO(benvanik): snoop and adjust parameters: if the returned host ptr
+  // properties memory types contains allocator->memory_types.dispatch_idx then
+  // we can import as device-local! Otherwise we should only allow host-local.
+  // For now we just trust the user to have passed the right thing and otherwise
+  // they'll get validation errors on any misuse.
+
+  // Create the unbound buffer first as we need it to query the requirements the
+  // imported buffer must satisfy.
+  VkBuffer handle = VK_NULL_HANDLE;
+  IREE_RETURN_IF_ERROR(iree_hal_vulkan_native_allocator_create_buffer(
+      logical_device, params, external_buffer->size,
+      /*use_sparse_allocation=*/false,
+      /*bind_host_memory=*/true, &handle));
+
+  // Ask Vulkan what the implementation requires of the allocation(s) for the
+  // buffer. We should in most cases always get the same kind of values but
+  // alignment and valid memory types will differ for dense and sparse buffers.
+  // We also can't trust the memory passed in is even usable.
+  IREE_TRACE_ZONE_BEGIN_NAMED(z_b, "vkGetBufferMemoryRequirements");
+  VkMemoryRequirements requirements = {0};
+  logical_device->syms()->vkGetBufferMemoryRequirements(*logical_device, handle,
+                                                        &requirements);
+  IREE_TRACE_ZONE_END(z_b);
+  uint32_t memory_type_index = 0;
+  status = iree_hal_vulkan_find_memory_type(
+      &allocator->device_props, &allocator->memory_props, params,
+      /*allowed_type_indices=*/
+      (props.memoryTypeBits & requirements.memoryTypeBits), &memory_type_index);
+  if (!iree_status_is_ok(status)) {
+    logical_device->syms()->vkDestroyBuffer(*logical_device, handle,
+                                            logical_device->allocator());
+    return status;
+  }
+
+  // Align the pointer and its size to the requirements of the memory type and
+  // allocator. This may extend the base pointer down to a page boundary and the
+  // size up to a page boundary but we'll subrange so that the buffer still
+  // appears to have the same logical range.
+  void* host_ptr = external_buffer->handle.host_allocation.ptr;
+  VkDeviceSize allocation_size = (VkDeviceSize)external_buffer->size;
+  VkDeviceSize memory_offset =
+      iree_hal_vulkan_native_allocator_align_external_ptr(
+          allocator, &requirements, &host_ptr, &allocation_size);
+
+  // Allocate the device memory we'll attach the buffer to.
+  VkMemoryAllocateInfo allocate_info = {};
+  allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
+  allocate_info.pNext = NULL;
+  allocate_info.allocationSize = allocation_size;
+  allocate_info.memoryTypeIndex = memory_type_index;
+  VkImportMemoryHostPointerInfoEXT import_host_ptr_info = {};
+  import_host_ptr_info.sType =
+      VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT;
+  import_host_ptr_info.pNext = NULL;
+  import_host_ptr_info.pHostPointer = host_ptr;
+  import_host_ptr_info.handleType =
+      VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
+  allocate_info.pNext = &import_host_ptr_info;
+  VkDeviceMemory device_memory = VK_NULL_HANDLE;
+  IREE_TRACE_ZONE_BEGIN_NAMED(z_c, "vkAllocateMemory");
+  status = VK_RESULT_TO_STATUS(logical_device->syms()->vkAllocateMemory(
+                                   *logical_device, &allocate_info,
+                                   logical_device->allocator(), &device_memory),
+                               "vkAllocateMemory");
+  IREE_TRACE_ZONE_END(z_c);
+  if (!iree_status_is_ok(status)) {
+    logical_device->syms()->vkDestroyBuffer(*logical_device, handle,
+                                            logical_device->allocator());
+    return status;
+  }
+
+  // Wrap the device memory allocation and buffer handle in our own buffer type.
+  iree_hal_vulkan_native_buffer_release_callback_t internal_release_callback = {
+      0};
+  internal_release_callback.fn =
+      iree_hal_vulkan_native_allocator_external_host_buffer_release;
+  internal_release_callback.user_data = NULL;
+  iree_hal_buffer_t* buffer = NULL;
+  status = iree_hal_vulkan_native_buffer_wrap(
+      (iree_hal_allocator_t*)allocator, params->type, params->access,
+      params->usage, (iree_device_size_t)allocation_size,
+      /*byte_offset=*/0,
+      /*byte_length=*/external_buffer->size, logical_device, device_memory,
+      handle, internal_release_callback, release_callback, &buffer);
+  if (!iree_status_is_ok(status)) {
+    logical_device->syms()->vkDestroyBuffer(*logical_device, handle,
+                                            logical_device->allocator());
+    logical_device->syms()->vkFreeMemory(*logical_device, device_memory,
+                                         logical_device->allocator());
+    return status;
+  }
+
+  // Bind the memory to the buffer at a possibly non-zero offset if we had to
+  // align the host pointer down to a page boundary.
+  IREE_TRACE_ZONE_BEGIN_NAMED(z_d, "vkBindBufferMemory");
+  status = VK_RESULT_TO_STATUS(
+      logical_device->syms()->vkBindBufferMemory(*logical_device, handle,
+                                                 device_memory, memory_offset),
+      "vkBindBufferMemory");
+  IREE_TRACE_ZONE_END(z_d);
+
+  if (iree_status_is_ok(status)) {
+    *out_buffer = buffer;
+  } else {
+    iree_hal_buffer_release(buffer);
+  }
+  return status;
+}
+
+static void iree_hal_vulkan_native_allocator_external_device_buffer_release(
+    void* user_data, iree::hal::vulkan::VkDeviceHandle* logical_device,
+    VkDeviceMemory device_memory, VkBuffer handle) {
+  // NOTE: device memory is unowned but the buffer handle is ours to clean up.
+  if (handle) {
+    logical_device->syms()->vkDestroyBuffer(*logical_device, handle,
+                                            logical_device->allocator());
+  }
+}
+
+static iree_status_t iree_hal_vulkan_native_allocator_import_device_buffer(
+    iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+    const iree_hal_buffer_params_t* IREE_RESTRICT params,
+    iree_hal_external_buffer_t* IREE_RESTRICT external_buffer,
+    iree_hal_buffer_release_callback_t release_callback,
+    iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
+  iree_hal_vulkan_native_allocator_t* allocator =
+      iree_hal_vulkan_native_allocator_cast(base_allocator);
+  VkDeviceHandle* logical_device = allocator->logical_device;
+
+  // A 'device allocation' is a VkDeviceMemory. We'll need to wrap a logical
+  // VkBuffer around it for using within the HAL.
+  VkDeviceMemory device_memory =
+      (VkDeviceMemory)external_buffer->handle.device_allocation.ptr;
+  if (IREE_UNLIKELY(!device_memory)) {
+    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                            "no device memory handle provided");
+  }
+
+  // Create the logical buffer we can attach the memory to.
+  VkBuffer handle = VK_NULL_HANDLE;
+  IREE_RETURN_IF_ERROR(iree_hal_vulkan_native_allocator_create_buffer(
+      logical_device, params, external_buffer->size,
+      /*use_sparse_allocation=*/false,
+      /*bind_host_memory=*/false, &handle));
+
+  // Bind the memory to the buffer.
+  IREE_TRACE_ZONE_BEGIN_NAMED(z_a, "vkBindBufferMemory");
+  iree_status_t status = VK_RESULT_TO_STATUS(
+      logical_device->syms()->vkBindBufferMemory(
+          *logical_device, handle, device_memory, /*memoryOffset=*/0),
+      "vkBindBufferMemory");
+  IREE_TRACE_ZONE_END(z_a);
+  if (!iree_status_is_ok(status)) {
+    logical_device->syms()->vkDestroyBuffer(*logical_device, handle,
+                                            logical_device->allocator());
+    return status;
+  }
+
+  // Wrap the device memory allocation and buffer handle in our own buffer type.
+  iree_hal_vulkan_native_buffer_release_callback_t internal_release_callback = {
+      0};
+  internal_release_callback.fn =
+      iree_hal_vulkan_native_allocator_external_device_buffer_release;
+  internal_release_callback.user_data = NULL;
+  return iree_hal_vulkan_native_buffer_wrap(
+      (iree_hal_allocator_t*)allocator, params->type, params->access,
+      params->usage, (iree_device_size_t)external_buffer->size,
+      /*byte_offset=*/0,
+      /*byte_length=*/external_buffer->size, logical_device, device_memory,
+      handle, internal_release_callback, release_callback, out_buffer);
+}
+
 static iree_status_t iree_hal_vulkan_native_allocator_import_buffer(
     iree_hal_allocator_t* IREE_RESTRICT base_allocator,
     const iree_hal_buffer_params_t* IREE_RESTRICT params,
     iree_hal_external_buffer_t* IREE_RESTRICT external_buffer,
     iree_hal_buffer_release_callback_t release_callback,
     iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
-  // TODO(#7242): use VK_EXT_external_memory_host to import memory.
-  return iree_make_status(IREE_STATUS_UNAVAILABLE,
-                          "importing from external buffers not supported");
+  switch (external_buffer->type) {
+    case IREE_HAL_EXTERNAL_BUFFER_TYPE_HOST_ALLOCATION:
+      return iree_hal_vulkan_native_allocator_import_host_buffer(
+          base_allocator, params, external_buffer, release_callback,
+          out_buffer);
+    case IREE_HAL_EXTERNAL_BUFFER_TYPE_DEVICE_ALLOCATION:
+      return iree_hal_vulkan_native_allocator_import_device_buffer(
+          base_allocator, params, external_buffer, release_callback,
+          out_buffer);
+    default:
+      return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+                              "external buffer type import not implemented");
+  }
 }
 
 static iree_status_t iree_hal_vulkan_native_allocator_export_buffer(
diff --git a/runtime/src/iree/hal/drivers/vulkan/native_buffer.cc b/runtime/src/iree/hal/drivers/vulkan/native_buffer.cc
index f7ca492..f0c0ec7 100644
--- a/runtime/src/iree/hal/drivers/vulkan/native_buffer.cc
+++ b/runtime/src/iree/hal/drivers/vulkan/native_buffer.cc
@@ -17,7 +17,8 @@
 typedef struct iree_hal_vulkan_native_buffer_t {
   iree_hal_vulkan_base_buffer_t base;
   iree::hal::vulkan::VkDeviceHandle* logical_device;
-  iree_hal_vulkan_native_buffer_release_callback_t release_callback;
+  iree_hal_vulkan_native_buffer_release_callback_t internal_release_callback;
+  iree_hal_buffer_release_callback_t user_release_callback;
 } iree_hal_vulkan_native_buffer_t;
 
 namespace {
@@ -37,11 +38,11 @@
     iree_device_size_t byte_offset, iree_device_size_t byte_length,
     iree::hal::vulkan::VkDeviceHandle* logical_device,
     VkDeviceMemory device_memory, VkBuffer handle,
-    iree_hal_vulkan_native_buffer_release_callback_t release_callback,
+    iree_hal_vulkan_native_buffer_release_callback_t internal_release_callback,
+    iree_hal_buffer_release_callback_t user_release_callback,
     iree_hal_buffer_t** out_buffer) {
   IREE_ASSERT_ARGUMENT(allocator);
   IREE_ASSERT_ARGUMENT(logical_device);
-  IREE_ASSERT_ARGUMENT(device_memory);
   IREE_ASSERT_ARGUMENT(handle);
   IREE_ASSERT_ARGUMENT(out_buffer);
   IREE_TRACE_ZONE_BEGIN(z0);
@@ -60,7 +61,8 @@
     buffer->base.device_memory = device_memory;
     buffer->base.handle = handle;
     buffer->logical_device = logical_device;
-    buffer->release_callback = release_callback;
+    buffer->internal_release_callback = internal_release_callback;
+    buffer->user_release_callback = user_release_callback;
 
     *out_buffer = &buffer->base.base;
   }
@@ -78,11 +80,15 @@
   IREE_TRACE_ZONE_APPEND_VALUE_I64(
       z0, (int64_t)iree_hal_buffer_allocation_size(base_buffer));
 
-  if (buffer->release_callback.fn) {
-    buffer->release_callback.fn(
-        buffer->release_callback.user_data, buffer->logical_device,
+  if (buffer->internal_release_callback.fn) {
+    buffer->internal_release_callback.fn(
+        buffer->internal_release_callback.user_data, buffer->logical_device,
         buffer->base.device_memory, buffer->base.handle);
   }
+  if (buffer->user_release_callback.fn) {
+    buffer->user_release_callback.fn(buffer->user_release_callback.user_data,
+                                     &buffer->base.base);
+  }
 
   iree_allocator_free(host_allocator, buffer);
 
@@ -96,6 +102,11 @@
     iree_hal_buffer_mapping_t* mapping) {
   iree_hal_vulkan_native_buffer_t* buffer =
       iree_hal_vulkan_native_buffer_cast(base_buffer);
+  if (IREE_UNLIKELY(!buffer->base.device_memory)) {
+    return iree_make_status(
+        IREE_STATUS_FAILED_PRECONDITION,
+        "buffer does not have device memory attached and cannot be mapped");
+  }
   auto* logical_device = buffer->logical_device;
 
   // TODO(benvanik): add upload/download for unmapped buffers.
@@ -137,6 +148,11 @@
     iree_device_size_t local_byte_length, iree_hal_buffer_mapping_t* mapping) {
   iree_hal_vulkan_native_buffer_t* buffer =
       iree_hal_vulkan_native_buffer_cast(base_buffer);
+  if (IREE_UNLIKELY(!buffer->base.device_memory)) {
+    return iree_make_status(
+        IREE_STATUS_FAILED_PRECONDITION,
+        "buffer does not have device memory attached and cannot be mapped");
+  }
   auto* logical_device = buffer->logical_device;
   logical_device->syms()->vkUnmapMemory(*logical_device,
                                         buffer->base.device_memory);
@@ -148,6 +164,11 @@
     iree_device_size_t local_byte_length) {
   iree_hal_vulkan_native_buffer_t* buffer =
       iree_hal_vulkan_native_buffer_cast(base_buffer);
+  if (IREE_UNLIKELY(!buffer->base.device_memory)) {
+    return iree_make_status(
+        IREE_STATUS_FAILED_PRECONDITION,
+        "buffer does not have device memory attached and cannot be mapped");
+  }
   auto* logical_device = buffer->logical_device;
   VkMappedMemoryRange range;
   range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
@@ -166,6 +187,11 @@
     iree_device_size_t local_byte_length) {
   iree_hal_vulkan_native_buffer_t* buffer =
       iree_hal_vulkan_native_buffer_cast(base_buffer);
+  if (IREE_UNLIKELY(!buffer->base.device_memory)) {
+    return iree_make_status(
+        IREE_STATUS_FAILED_PRECONDITION,
+        "buffer does not have device memory attached and cannot be mapped");
+  }
   auto* logical_device = buffer->logical_device;
   VkMappedMemoryRange range;
   range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
diff --git a/runtime/src/iree/hal/drivers/vulkan/native_buffer.h b/runtime/src/iree/hal/drivers/vulkan/native_buffer.h
index 0cb9c9e..7d8a273 100644
--- a/runtime/src/iree/hal/drivers/vulkan/native_buffer.h
+++ b/runtime/src/iree/hal/drivers/vulkan/native_buffer.h
@@ -41,7 +41,8 @@
     iree_device_size_t byte_offset, iree_device_size_t byte_length,
     iree::hal::vulkan::VkDeviceHandle* logical_device,
     VkDeviceMemory device_memory, VkBuffer handle,
-    iree_hal_vulkan_native_buffer_release_callback_t release_callback,
+    iree_hal_vulkan_native_buffer_release_callback_t internal_release_callback,
+    iree_hal_buffer_release_callback_t user_release_callback,
     iree_hal_buffer_t** out_buffer);
 
 #ifdef __cplusplus
diff --git a/runtime/src/iree/hal/drivers/vulkan/registration/driver_module.cc b/runtime/src/iree/hal/drivers/vulkan/registration/driver_module.cc
index a7182c7..c8b1264 100644
--- a/runtime/src/iree/hal/drivers/vulkan/registration/driver_module.cc
+++ b/runtime/src/iree/hal/drivers/vulkan/registration/driver_module.cc
@@ -32,6 +32,12 @@
 
 IREE_FLAG(bool, vulkan_robust_buffer_access, false,
           "Enables the Vulkan 'robustBufferAccess' feature.");
+IREE_FLAG(
+    bool, vulkan_sparse_binding, true,
+    "Enables the Vulkan 'sparseBinding' feature (and others) when available.");
+IREE_FLAG(bool, vulkan_sparse_residency, true,
+          "Enables the Vulkan 'sparseResidencyBuffer' feature (and others) "
+          "when available.");
 
 IREE_FLAG(
     bool, vulkan_dedicated_compute_queue, false,
@@ -80,6 +86,14 @@
     driver_options.requested_features |=
         IREE_HAL_VULKAN_FEATURE_ENABLE_ROBUST_BUFFER_ACCESS;
   }
+  if (FLAG_vulkan_sparse_binding) {
+    driver_options.requested_features |=
+        IREE_HAL_VULKAN_FEATURE_ENABLE_SPARSE_BINDING;
+  }
+  if (FLAG_vulkan_sparse_residency) {
+    driver_options.requested_features |=
+        IREE_HAL_VULKAN_FEATURE_ENABLE_SPARSE_RESIDENCY_ALIASED;
+  }
 
   if (FLAG_vulkan_dedicated_compute_queue) {
     driver_options.device_options.flags |=
diff --git a/runtime/src/iree/hal/drivers/vulkan/sparse_buffer.cc b/runtime/src/iree/hal/drivers/vulkan/sparse_buffer.cc
new file mode 100644
index 0000000..2ed3a8f
--- /dev/null
+++ b/runtime/src/iree/hal/drivers/vulkan/sparse_buffer.cc
@@ -0,0 +1,252 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/hal/drivers/vulkan/sparse_buffer.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+
+#include "iree/base/api.h"
+#include "iree/hal/drivers/vulkan/base_buffer.h"
+#include "iree/hal/drivers/vulkan/status_util.h"
+
+typedef struct iree_hal_vulkan_sparse_buffer_t {
+  iree_hal_vulkan_base_buffer_t base;
+  iree::hal::vulkan::VkDeviceHandle* logical_device;
+  iree_host_size_t physical_block_count;
+  VkDeviceMemory physical_blocks[];
+} iree_hal_vulkan_sparse_buffer_t;
+
+namespace {
+extern const iree_hal_buffer_vtable_t iree_hal_vulkan_sparse_buffer_vtable;
+}  // namespace
+
+static iree_hal_vulkan_sparse_buffer_t* iree_hal_vulkan_sparse_buffer_cast(
+    iree_hal_buffer_t* base_value) {
+  IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_vulkan_sparse_buffer_vtable);
+  return (iree_hal_vulkan_sparse_buffer_t*)base_value;
+}
+
+static iree_status_t iree_hal_vulkan_sparse_buffer_commit_sync(
+    iree::hal::vulkan::VkDeviceHandle* logical_device, VkQueue queue,
+    VkBuffer handle, VkMemoryRequirements requirements,
+    uint32_t memory_type_index, VkDeviceSize physical_block_size,
+    iree_host_size_t physical_block_count,
+    VkDeviceMemory out_physical_blocks[]) {
+  IREE_TRACE_ZONE_BEGIN(z0);
+  IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)requirements.size);
+  IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)requirements.alignment);
+  IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)physical_block_size);
+  IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)physical_block_count);
+
+  // Allocate all physical blocks; note that the last block may be of partial
+  // size and we'll just allocate whatever remains from the total requested
+  // size.
+  VkMemoryAllocateInfo allocate_info;
+  allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
+  allocate_info.pNext = NULL;
+  allocate_info.memoryTypeIndex = memory_type_index;
+  VkSparseMemoryBind* binds = (VkSparseMemoryBind*)iree_alloca(
+      sizeof(VkSparseMemoryBind) * physical_block_count);
+  for (iree_host_size_t i = 0; i < physical_block_count; ++i) {
+    if (i < physical_block_count - 1) {
+      allocate_info.allocationSize = physical_block_size;
+    } else {
+      allocate_info.allocationSize =
+          requirements.size - physical_block_size * (physical_block_count - 1);
+    }
+    IREE_TRACE_ZONE_BEGIN_NAMED(z1, "vkAllocateMemory");
+    IREE_TRACE_ZONE_APPEND_VALUE_I64(z1, (int64_t)allocate_info.allocationSize);
+    iree_status_t allocate_status = VK_RESULT_TO_STATUS(
+        logical_device->syms()->vkAllocateMemory(
+            *logical_device, &allocate_info, logical_device->allocator(),
+            &out_physical_blocks[i]),
+        "vkAllocateMemory");
+    IREE_TRACE_ZONE_END(z1);
+    IREE_RETURN_AND_END_ZONE_IF_ERROR(z0, allocate_status);
+
+    binds[i].resourceOffset = i * physical_block_size;
+    binds[i].size = allocate_info.allocationSize;
+    binds[i].memory = out_physical_blocks[i];
+    binds[i].memoryOffset = 0;
+    binds[i].flags = 0;
+  }
+
+  // Temporary fence for enforcing host-synchronous execution.
+  VkFenceCreateInfo fence_info;
+  fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+  fence_info.pNext = NULL;
+  fence_info.flags = 0;
+  VkFence fence = VK_NULL_HANDLE;
+  IREE_RETURN_AND_END_ZONE_IF_ERROR(
+      z0, VK_RESULT_TO_STATUS(logical_device->syms()->vkCreateFence(
+                                  *logical_device, &fence_info,
+                                  logical_device->allocator(), &fence),
+                              "vkCreateFence"));
+
+  IREE_TRACE_ZONE_BEGIN_NAMED(z1, "vkQueueBindSparse");
+
+  // Enqueue sparse binding operation. This will complete asynchronously.
+  VkSparseBufferMemoryBindInfo memory_bind_info;
+  memory_bind_info.buffer = handle;
+  memory_bind_info.bindCount = (uint32_t)physical_block_count;
+  memory_bind_info.pBinds = binds;
+  VkBindSparseInfo bind_info;
+  memset(&bind_info, 0, sizeof(bind_info));
+  bind_info.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO;
+  bind_info.pNext = NULL;
+  bind_info.bufferBindCount = 1;
+  bind_info.pBufferBinds = &memory_bind_info;
+  iree_status_t status = VK_RESULT_TO_STATUS(
+      logical_device->syms()->vkQueueBindSparse(queue, 1, &bind_info, fence),
+      "vkQueueBindSparse");
+
+  // If enqueuing succeeded then wait for the binding to finish.
+  if (iree_status_is_ok(status)) {
+    status = VK_RESULT_TO_STATUS(
+        logical_device->syms()->vkWaitForFences(
+            *logical_device, 1, &fence, /*waitAll=*/VK_TRUE, UINT64_MAX),
+        "vkWaitForFences");
+  }
+
+  IREE_TRACE_ZONE_END(z1);
+
+  logical_device->syms()->vkDestroyFence(*logical_device, fence,
+                                         logical_device->allocator());
+
+  IREE_TRACE_ZONE_END(z0);
+  return status;
+}
+
+iree_status_t iree_hal_vulkan_sparse_buffer_create_bound_sync(
+    iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+    iree_hal_memory_access_t allowed_access,
+    iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
+    iree_device_size_t byte_offset, iree_device_size_t byte_length,
+    iree::hal::vulkan::VkDeviceHandle* logical_device, VkQueue queue,
+    VkBuffer handle, VkMemoryRequirements requirements,
+    uint32_t memory_type_index, VkDeviceSize max_allocation_size,
+    iree_hal_buffer_t** out_buffer) {
+  IREE_ASSERT_ARGUMENT(allocator);
+  IREE_ASSERT_ARGUMENT(logical_device);
+  IREE_ASSERT_ARGUMENT(handle);
+  IREE_ASSERT_ARGUMENT(out_buffer);
+  IREE_TRACE_ZONE_BEGIN(z0);
+  IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)allocation_size);
+
+  // The maximum allocation size reported by Vulkan does not need to be a power
+  // of two or aligned to anything in particular - sparse buffers do require
+  // alignment though and must also be under the limit so here we adjust down to
+  // the maximum aligned value.
+  iree_device_size_t physical_block_size =
+      iree_device_size_floor_div(max_allocation_size, requirements.alignment) *
+      requirements.alignment;
+
+  // ceil-div for the number of blocks as the last block may be partial.
+  iree_host_size_t physical_block_count =
+      (iree_host_size_t)iree_device_size_ceil_div(requirements.size,
+                                                  physical_block_size);
+
+  iree_allocator_t host_allocator =
+      iree_hal_allocator_host_allocator(allocator);
+  iree_hal_vulkan_sparse_buffer_t* buffer = NULL;
+  iree_host_size_t total_size =
+      iree_host_align(sizeof(*buffer), iree_max_align_t) +
+      sizeof(buffer->physical_blocks[0]) * physical_block_count;
+  IREE_RETURN_AND_END_ZONE_IF_ERROR(
+      z0, iree_allocator_malloc(host_allocator, total_size, (void**)&buffer));
+  iree_hal_buffer_initialize(
+      host_allocator, allocator, &buffer->base.base, allocation_size,
+      byte_offset, byte_length, memory_type, allowed_access, allowed_usage,
+      &iree_hal_vulkan_sparse_buffer_vtable, &buffer->base.base);
+  buffer->base.handle = handle;
+  buffer->logical_device = logical_device;
+  buffer->physical_block_count = physical_block_count;
+
+  // Synchronously commit all physical blocks and bind them to the buffer.
+  iree_status_t status = iree_hal_vulkan_sparse_buffer_commit_sync(
+      logical_device, queue, handle, requirements, memory_type_index,
+      physical_block_size, physical_block_count, buffer->physical_blocks);
+
+  if (iree_status_is_ok(status)) {
+    *out_buffer = &buffer->base.base;
+  } else {
+    iree_hal_buffer_destroy((iree_hal_buffer_t*)buffer);
+  }
+
+  IREE_TRACE_ZONE_END(z0);
+  return status;
+}
+
+static void iree_hal_vulkan_sparse_buffer_destroy(
+    iree_hal_buffer_t* base_buffer) {
+  iree_hal_vulkan_sparse_buffer_t* buffer =
+      iree_hal_vulkan_sparse_buffer_cast(base_buffer);
+  iree::hal::vulkan::VkDeviceHandle* logical_device = buffer->logical_device;
+  iree_allocator_t host_allocator = base_buffer->host_allocator;
+  IREE_TRACE_ZONE_BEGIN(z0);
+  IREE_TRACE_ZONE_APPEND_VALUE_I64(
+      z0, (int64_t)iree_hal_buffer_allocation_size(base_buffer));
+
+  // Destroy buffer prior to freeing physical blocks.
+  if (buffer->base.handle != VK_NULL_HANDLE) {
+    logical_device->syms()->vkDestroyBuffer(
+        *logical_device, buffer->base.handle, logical_device->allocator());
+  }
+  for (iree_host_size_t i = 0; i < buffer->physical_block_count; ++i) {
+    if (buffer->physical_blocks[i] != VK_NULL_HANDLE) {
+      logical_device->syms()->vkFreeMemory(*logical_device,
+                                           buffer->physical_blocks[i],
+                                           logical_device->allocator());
+    }
+  }
+
+  iree_allocator_free(host_allocator, buffer);
+
+  IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_hal_vulkan_sparse_buffer_map_range(
+    iree_hal_buffer_t* base_buffer, iree_hal_mapping_mode_t mapping_mode,
+    iree_hal_memory_access_t memory_access,
+    iree_device_size_t local_byte_offset, iree_device_size_t local_byte_length,
+    iree_hal_buffer_mapping_t* mapping) {
+  return iree_make_status(IREE_STATUS_UNAVAILABLE,
+                          "sparse buffers do not support mapping");
+}
+
+static iree_status_t iree_hal_vulkan_sparse_buffer_unmap_range(
+    iree_hal_buffer_t* base_buffer, iree_device_size_t local_byte_offset,
+    iree_device_size_t local_byte_length, iree_hal_buffer_mapping_t* mapping) {
+  return iree_make_status(IREE_STATUS_UNAVAILABLE,
+                          "sparse buffers do not support mapping");
+}
+
+static iree_status_t iree_hal_vulkan_sparse_buffer_invalidate_range(
+    iree_hal_buffer_t* base_buffer, iree_device_size_t local_byte_offset,
+    iree_device_size_t local_byte_length) {
+  return iree_make_status(IREE_STATUS_UNAVAILABLE,
+                          "sparse buffers do not support mapping");
+}
+
+static iree_status_t iree_hal_vulkan_sparse_buffer_flush_range(
+    iree_hal_buffer_t* base_buffer, iree_device_size_t local_byte_offset,
+    iree_device_size_t local_byte_length) {
+  return iree_make_status(IREE_STATUS_UNAVAILABLE,
+                          "sparse buffers do not support mapping");
+}
+
+namespace {
+const iree_hal_buffer_vtable_t iree_hal_vulkan_sparse_buffer_vtable = {
+    /*.recycle=*/iree_hal_buffer_recycle,
+    /*.destroy=*/iree_hal_vulkan_sparse_buffer_destroy,
+    /*.map_range=*/iree_hal_vulkan_sparse_buffer_map_range,
+    /*.unmap_range=*/iree_hal_vulkan_sparse_buffer_unmap_range,
+    /*.invalidate_range=*/iree_hal_vulkan_sparse_buffer_invalidate_range,
+    /*.flush_range=*/iree_hal_vulkan_sparse_buffer_flush_range,
+};
+}  // namespace
diff --git a/runtime/src/iree/hal/drivers/vulkan/sparse_buffer.h b/runtime/src/iree/hal/drivers/vulkan/sparse_buffer.h
new file mode 100644
index 0000000..9ab7d65
--- /dev/null
+++ b/runtime/src/iree/hal/drivers/vulkan/sparse_buffer.h
@@ -0,0 +1,42 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_DRIVERS_VULKAN_SPARSE_BUFFER_H_
+#define IREE_HAL_DRIVERS_VULKAN_SPARSE_BUFFER_H_
+
+// clang-format off: must be included before all other headers.
+#include "iree/hal/drivers/vulkan/vulkan_headers.h"  // IWYU pragma: export
+// clang-format on
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/drivers/vulkan/handle_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+// EXPERIMENTAL: allocate a buffer with fully bound memory with undefined
+// contents. Allocation and binding will happen synchronously on the calling
+// thread.
+//
+// This will eventually be replaced with HAL device APIs for controlling the
+// reserve/commit/decommit/release behavior of the virtual/physical storage.
+iree_status_t iree_hal_vulkan_sparse_buffer_create_bound_sync(
+    iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type,
+    iree_hal_memory_access_t allowed_access,
+    iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
+    iree_device_size_t byte_offset, iree_device_size_t byte_length,
+    iree::hal::vulkan::VkDeviceHandle* logical_device, VkQueue queue,
+    VkBuffer handle, VkMemoryRequirements requirements,
+    uint32_t memory_type_index, VkDeviceSize max_allocation_size,
+    iree_hal_buffer_t** out_buffer);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+
+#endif  // IREE_HAL_DRIVERS_VULKAN_SPARSE_BUFFER_H_
diff --git a/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc b/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc
index 26e198d..7dca24b 100644
--- a/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc
+++ b/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc
@@ -200,6 +200,17 @@
   ADD_EXT(IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_OPTIONAL,
           VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME);
 
+  // VK_KHR_external_memory:
+  // Promoted to core in Vulkan 1.1 and not required but here just in case
+  // tooling wants to see the request.
+  ADD_EXT(IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_OPTIONAL,
+          VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME);
+
+  // VK_EXT_external_memory_host:
+  // Optional to enable import/export of host pointers.
+  ADD_EXT(IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_OPTIONAL,
+          VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
+
   //===--------------------------------------------------------------------===//
   // Vulkan forward-compatibility shims
   //===--------------------------------------------------------------------===//
@@ -820,7 +831,7 @@
 
 iree_status_t iree_hal_vulkan_device_create(
     iree_hal_driver_t* driver, iree_string_view_t identifier,
-    iree_hal_vulkan_features_t enabled_features,
+    iree_hal_vulkan_features_t requested_features,
     const iree_hal_vulkan_device_options_t* options,
     iree_hal_vulkan_syms_t* opaque_syms, VkInstance instance,
     VkPhysicalDevice physical_device, iree_allocator_t host_allocator,
@@ -833,12 +844,12 @@
   iree::Arena arena(128 * 1024);
   iree_hal_vulkan_string_list_t required_extensions;
   IREE_RETURN_IF_ERROR(iree_hal_vulkan_device_query_extensibility_set(
-      enabled_features,
+      requested_features,
       IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_REQUIRED, &arena,
       &required_extensions));
   iree_hal_vulkan_string_list_t optional_extensions;
   IREE_RETURN_IF_ERROR(iree_hal_vulkan_device_query_extensibility_set(
-      enabled_features,
+      requested_features,
       IREE_HAL_VULKAN_EXTENSIBILITY_DEVICE_EXTENSIONS_OPTIONAL, &arena,
       &optional_extensions));
   iree_hal_vulkan_string_list_t enabled_extensions;
@@ -920,7 +931,32 @@
     features2.features.shaderInt64 = VK_TRUE;
   }
 
-  if (iree_all_bits_set(enabled_features,
+  iree_hal_vulkan_features_t enabled_features = 0;
+
+  IREE_TRACE({
+    if (iree_all_bits_set(requested_features,
+                          IREE_HAL_VULKAN_FEATURE_ENABLE_TRACING)) {
+      enabled_features |= IREE_HAL_VULKAN_FEATURE_ENABLE_TRACING;
+    }
+  });
+
+  if (iree_all_bits_set(requested_features,
+                        IREE_HAL_VULKAN_FEATURE_ENABLE_SPARSE_BINDING) &&
+      physical_device_features.sparseBinding) {
+    features2.features.sparseBinding = VK_TRUE;
+    enabled_features |= IREE_HAL_VULKAN_FEATURE_ENABLE_SPARSE_BINDING;
+  }
+  if (iree_all_bits_set(
+          requested_features,
+          IREE_HAL_VULKAN_FEATURE_ENABLE_SPARSE_RESIDENCY_ALIASED) &&
+      physical_device_features.sparseResidencyBuffer &&
+      physical_device_features.sparseResidencyAliased) {
+    features2.features.sparseResidencyBuffer = VK_TRUE;
+    features2.features.sparseResidencyAliased = VK_TRUE;
+    enabled_features |= IREE_HAL_VULKAN_FEATURE_ENABLE_SPARSE_RESIDENCY_ALIASED;
+  }
+
+  if (iree_all_bits_set(requested_features,
                         IREE_HAL_VULKAN_FEATURE_ENABLE_ROBUST_BUFFER_ACCESS)) {
     if (physical_device_features.robustBufferAccess != VK_TRUE) {
       return iree_make_status(
@@ -928,6 +964,7 @@
           "Robust buffer access not supported by physical device");
     }
     features2.features.robustBufferAccess = VK_TRUE;
+    enabled_features |= IREE_HAL_VULKAN_FEATURE_ENABLE_ROBUST_BUFFER_ACCESS;
   }
 
   VkPhysicalDeviceTimelineSemaphoreFeatures semaphore_features;
@@ -959,7 +996,8 @@
   }
 
   auto logical_device = new VkDeviceHandle(
-      instance_syms, enabled_device_extensions,
+      instance_syms, physical_device, enabled_features,
+      enabled_device_extensions,
       /*owns_device=*/true, host_allocator, /*allocator=*/NULL);
 
   iree_status_t status = VK_RESULT_TO_STATUS(
@@ -1033,7 +1071,8 @@
 
   // Wrap the provided VkDevice with a VkDeviceHandle for use within the HAL.
   auto logical_device_handle = new VkDeviceHandle(
-      device_syms.get(), enabled_device_extensions,
+      device_syms.get(), physical_device, enabled_features,
+      enabled_device_extensions,
       /*owns_device=*/false, host_allocator, /*allocator=*/NULL);
   *logical_device_handle->mutable_value() = logical_device;
 
diff --git a/runtime/src/iree/hal/drivers/vulkan/vulkan_device.h b/runtime/src/iree/hal/drivers/vulkan/vulkan_device.h
index 1c734f0..69baee0 100644
--- a/runtime/src/iree/hal/drivers/vulkan/vulkan_device.h
+++ b/runtime/src/iree/hal/drivers/vulkan/vulkan_device.h
@@ -25,7 +25,7 @@
 // existing VkInstances provided by the application).
 iree_status_t iree_hal_vulkan_device_create(
     iree_hal_driver_t* driver, iree_string_view_t identifier,
-    iree_hal_vulkan_features_t enabled_features,
+    iree_hal_vulkan_features_t requested_features,
     const iree_hal_vulkan_device_options_t* options,
     iree_hal_vulkan_syms_t* instance_syms, VkInstance instance,
     VkPhysicalDevice physical_device, iree_allocator_t host_allocator,