Adding native (non-VMA) Vulkan allocator behind a flag. (#14389)
`--vulkan_vma_allocator=false` can be used to disable VMA. Strongly
recommended that an allocator shim like `--device_allocator=caching` is
used as native Vulkan API allocations are slow/limited.
diff --git a/runtime/src/iree/hal/allocator.h b/runtime/src/iree/hal/allocator.h
index 2757d8d..2d13fa0 100644
--- a/runtime/src/iree/hal/allocator.h
+++ b/runtime/src/iree/hal/allocator.h
@@ -621,8 +621,8 @@
}
#else
-#define iree_hal_allocator_statistics_record_alloc(...)
-#define iree_hal_allocator_statistics_record_free(...)
+#define iree_hal_allocator_statistics_record_alloc(statistics, ...)
+#define iree_hal_allocator_statistics_record_free(statistics, ...)
#endif // IREE_STATISTICS_ENABLE
#ifdef __cplusplus
diff --git a/runtime/src/iree/hal/drivers/vulkan/BUILD.bazel b/runtime/src/iree/hal/drivers/vulkan/BUILD.bazel
index b65c24c..96e51b7 100644
--- a/runtime/src/iree/hal/drivers/vulkan/BUILD.bazel
+++ b/runtime/src/iree/hal/drivers/vulkan/BUILD.bazel
@@ -36,6 +36,8 @@
"extensibility_util.cc",
"extensibility_util.h",
"handle_util.h",
+ "native_allocator.cc",
+ "native_allocator.h",
"native_buffer.cc",
"native_buffer.h",
"native_event.cc",
diff --git a/runtime/src/iree/hal/drivers/vulkan/CMakeLists.txt b/runtime/src/iree/hal/drivers/vulkan/CMakeLists.txt
index 93cb257..f9893db 100644
--- a/runtime/src/iree/hal/drivers/vulkan/CMakeLists.txt
+++ b/runtime/src/iree/hal/drivers/vulkan/CMakeLists.txt
@@ -37,6 +37,8 @@
"extensibility_util.cc"
"extensibility_util.h"
"handle_util.h"
+ "native_allocator.cc"
+ "native_allocator.h"
"native_buffer.cc"
"native_buffer.h"
"native_event.cc"
diff --git a/runtime/src/iree/hal/drivers/vulkan/api.h b/runtime/src/iree/hal/drivers/vulkan/api.h
index e243899..3c8a919 100644
--- a/runtime/src/iree/hal/drivers/vulkan/api.h
+++ b/runtime/src/iree/hal/drivers/vulkan/api.h
@@ -194,6 +194,11 @@
// IREE execution to run asynchronously with the graphics workloads.
// See: https://gpuopen.com/learn/concurrent-execution-asynchronous-queues/
IREE_HAL_VULKAN_DEVICE_FLAG_DEDICATED_COMPUTE_QUEUE = 1u << 0,
+
+ // Whether to use the VMA allocator instead of native Vulkan API memory
+ // allocations.
+ // NOTE: this is temporary and VMA is slated for removal in the future.
+ IREE_HAL_VULKAN_DEVICE_FLAG_VMA_ALLOCATOR = 1u << 1,
};
typedef uint32_t iree_hal_vulkan_device_flags_t;
@@ -205,7 +210,7 @@
// size of a large heap block allocation. This effectively specifies the
// minimum amount of memory required and will always allocate at least this
// much.
- // NOTE: this is temporary and likely to get removed in the future.
+ // NOTE: this is temporary and VMA is slated for removal in the future.
iree_device_size_t large_heap_block_size;
} iree_hal_vulkan_device_options_t;
@@ -307,10 +312,14 @@
//===----------------------------------------------------------------------===//
// EXPERIMENTAL: until VMA is removed this is doing a shady reinterpret cast.
+//
// TODO(benvanik): make this safer (dyn_cast-like, lookup allocated buffer).
// Returns the backing device memory and logical buffer handle of a HAL buffer
// managed by the Vulkan HAL. Invalid to call on any buffer but a base allocated
// Vulkan HAL buffer.
+//
+// NOTE: |out_memory| will be VK_NULL_HANDLE in cases where sparse residency is
+// used.
IREE_API_EXPORT iree_status_t iree_hal_vulkan_allocated_buffer_handle(
iree_hal_buffer_t* allocated_buffer, VkDeviceMemory* out_memory,
VkBuffer* out_handle);
diff --git a/runtime/src/iree/hal/drivers/vulkan/base_buffer.c b/runtime/src/iree/hal/drivers/vulkan/base_buffer.c
index 4270619..cf8eb12 100644
--- a/runtime/src/iree/hal/drivers/vulkan/base_buffer.c
+++ b/runtime/src/iree/hal/drivers/vulkan/base_buffer.c
@@ -36,6 +36,74 @@
!iree_all_bits_set(flags, VK_MEMORY_PROPERTY_PROTECTED_BIT);
}
+iree_status_t iree_hal_vulkan_find_memory_type(
+ const VkPhysicalDeviceProperties* device_props,
+ const VkPhysicalDeviceMemoryProperties* memory_props,
+ const iree_hal_buffer_params_t* IREE_RESTRICT params,
+ uint32_t* out_memory_type_index) {
+ *out_memory_type_index = 0;
+
+ VkMemoryPropertyFlags require_flags = 0;
+ VkMemoryPropertyFlags prefer_flags = 0;
+ if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL)) {
+ if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_HOST_VISIBLE)) {
+ // Device-local, host-visible.
+ require_flags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+ prefer_flags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ } else {
+ // Device-local only.
+ require_flags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ }
+ } else {
+ if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE)) {
+ // Host-local, device-visible.
+ require_flags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+ } else {
+ // Host-local only.
+ require_flags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+ }
+ }
+ if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_HOST_CACHED)) {
+ require_flags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+ }
+ if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_HOST_COHERENT)) {
+ require_flags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+ }
+ if (iree_all_bits_set(params->usage, IREE_HAL_BUFFER_USAGE_MAPPING)) {
+ require_flags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+ }
+
+ int most_bits_count = 0;
+ int most_bits_idx = -1;
+ for (uint32_t i = 0; i < memory_props->memoryTypeCount; ++i) {
+ VkMemoryPropertyFlags flags = memory_props->memoryTypes[i].propertyFlags;
+ if (!iree_all_bits_set(flags, require_flags) ||
+ !iree_hal_vulkan_is_memory_type_usable(flags)) {
+ // Excluded (required bits missing or memory type is not usable).
+ continue;
+ }
+ // When all required bits are satisfied try to find the memory type that
+ // has the most preferred bits set.
+ int bit_count = iree_math_count_ones_u32(flags & prefer_flags);
+ if (most_bits_idx == -1) {
+ most_bits_count = bit_count;
+ most_bits_idx = (int)i;
+ } else if (bit_count > most_bits_count) {
+ most_bits_count = bit_count;
+ most_bits_idx = (int)i;
+ }
+ }
+ if (most_bits_idx == -1) {
+ // No valid memory type found.
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "no memory type available that satisfies the required flags");
+ }
+
+ *out_memory_type_index = (uint32_t)most_bits_idx;
+ return iree_ok_status();
+}
+
static void iree_hal_vulkan_populate_dispatch_memory_types(
const VkPhysicalDeviceProperties* device_props,
const VkPhysicalDeviceMemoryProperties* memory_props,
diff --git a/runtime/src/iree/hal/drivers/vulkan/base_buffer.h b/runtime/src/iree/hal/drivers/vulkan/base_buffer.h
index 10fe137..c9a0d34 100644
--- a/runtime/src/iree/hal/drivers/vulkan/base_buffer.h
+++ b/runtime/src/iree/hal/drivers/vulkan/base_buffer.h
@@ -64,6 +64,15 @@
int indices[5];
} iree_hal_vulkan_memory_types_t;
+// Finds the memory type that satisfies the required and preferred buffer
+// |params| and returns it in |out_memory_type_index|. Fails if no memory type
+// satisfies the requirements.
+iree_status_t iree_hal_vulkan_find_memory_type(
+ const VkPhysicalDeviceProperties* device_props,
+ const VkPhysicalDeviceMemoryProperties* memory_props,
+ const iree_hal_buffer_params_t* IREE_RESTRICT params,
+ uint32_t* out_memory_type_index);
+
// Queries the underlying Vulkan implementation to decide which memory type
// should be used for particular operations.
iree_status_t iree_hal_vulkan_populate_memory_types(
@@ -88,6 +97,8 @@
// to get access to the API VkBuffer handle.
typedef struct iree_hal_vulkan_base_buffer_t {
iree_hal_buffer_t base;
+ // NOTE: may be VK_NULL_HANDLE if sparse residency is used to back the buffer
+ // with multiple device memory allocations.
VkDeviceMemory device_memory;
VkBuffer handle;
} iree_hal_vulkan_base_buffer_t;
diff --git a/runtime/src/iree/hal/drivers/vulkan/native_allocator.cc b/runtime/src/iree/hal/drivers/vulkan/native_allocator.cc
new file mode 100644
index 0000000..70e2ac4
--- /dev/null
+++ b/runtime/src/iree/hal/drivers/vulkan/native_allocator.cc
@@ -0,0 +1,368 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <cstddef>
+#include <cstring>
+
+#include "iree/base/api.h"
+#include "iree/hal/drivers/vulkan/base_buffer.h"
+#include "iree/hal/drivers/vulkan/dynamic_symbols.h"
+#include "iree/hal/drivers/vulkan/native_buffer.h"
+#include "iree/hal/drivers/vulkan/status_util.h"
+
+using namespace iree::hal::vulkan;
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_ALLOCATION_TRACKING
+static const char* IREE_HAL_VULKAN_NATIVE_ALLOCATOR_ID = "Vulkan/Native";
+#endif // IREE_TRACING_FEATURE_ALLOCATION_TRACKING
+
+typedef struct iree_hal_vulkan_native_allocator_t {
+ iree_hal_resource_t resource;
+ VkDeviceHandle* logical_device;
+ iree_hal_device_t* device; // unretained to avoid cycles
+ iree_allocator_t host_allocator;
+
+ // Cached from the API to avoid additional queries in hot paths.
+ VkPhysicalDeviceProperties device_props;
+ VkPhysicalDeviceMemoryProperties memory_props;
+
+ // Used to quickly look up the memory type index used for a particular usage.
+ iree_hal_vulkan_memory_types_t memory_types;
+
+ IREE_STATISTICS(iree_hal_allocator_statistics_t statistics;)
+} iree_hal_vulkan_native_allocator_t;
+
+namespace {
+extern const iree_hal_allocator_vtable_t
+ iree_hal_vulkan_native_allocator_vtable;
+} // namespace
+
+static iree_hal_vulkan_native_allocator_t*
+iree_hal_vulkan_native_allocator_cast(iree_hal_allocator_t* base_value) {
+ IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_vulkan_native_allocator_vtable);
+ return (iree_hal_vulkan_native_allocator_t*)base_value;
+}
+
+static void iree_hal_vulkan_native_allocator_destroy(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator);
+
+extern "C" iree_status_t iree_hal_vulkan_native_allocator_create(
+ const iree_hal_vulkan_device_options_t* options, VkInstance instance,
+ VkPhysicalDevice physical_device, VkDeviceHandle* logical_device,
+ iree_hal_device_t* device, iree_hal_allocator_t** out_allocator) {
+ IREE_ASSERT_ARGUMENT(instance);
+ IREE_ASSERT_ARGUMENT(physical_device);
+ IREE_ASSERT_ARGUMENT(logical_device);
+ IREE_ASSERT_ARGUMENT(device);
+ IREE_ASSERT_ARGUMENT(out_allocator);
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_allocator_t host_allocator = logical_device->host_allocator();
+ iree_hal_vulkan_native_allocator_t* allocator = NULL;
+ IREE_RETURN_AND_END_ZONE_IF_ERROR(
+ z0, iree_allocator_malloc(host_allocator, sizeof(*allocator),
+ (void**)&allocator));
+ iree_hal_resource_initialize(&iree_hal_vulkan_native_allocator_vtable,
+ &allocator->resource);
+ allocator->logical_device = logical_device;
+ allocator->device = device;
+ allocator->host_allocator = host_allocator;
+
+ const auto& syms = logical_device->syms();
+ syms->vkGetPhysicalDeviceProperties(physical_device,
+ &allocator->device_props);
+ syms->vkGetPhysicalDeviceMemoryProperties(physical_device,
+ &allocator->memory_props);
+ iree_status_t status = iree_hal_vulkan_populate_memory_types(
+ &allocator->device_props, &allocator->memory_props,
+ &allocator->memory_types);
+
+ if (iree_status_is_ok(status)) {
+ *out_allocator = (iree_hal_allocator_t*)allocator;
+ } else {
+ iree_hal_vulkan_native_allocator_destroy((iree_hal_allocator_t*)allocator);
+ }
+
+ IREE_TRACE_ZONE_END(z0);
+ return status;
+}
+
+static void iree_hal_vulkan_native_allocator_destroy(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator) {
+ iree_hal_vulkan_native_allocator_t* allocator =
+ iree_hal_vulkan_native_allocator_cast(base_allocator);
+ iree_allocator_t host_allocator = allocator->host_allocator;
+ IREE_TRACE_ZONE_BEGIN(z0);
+
+ iree_allocator_free(host_allocator, allocator);
+
+ IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_allocator_t iree_hal_vulkan_native_allocator_host_allocator(
+ const iree_hal_allocator_t* IREE_RESTRICT base_allocator) {
+ iree_hal_vulkan_native_allocator_t* allocator =
+ (iree_hal_vulkan_native_allocator_t*)base_allocator;
+ return allocator->host_allocator;
+}
+
+static iree_status_t iree_hal_vulkan_native_allocator_trim(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator) {
+ return iree_ok_status();
+}
+
+static void iree_hal_vulkan_native_allocator_query_statistics(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ iree_hal_allocator_statistics_t* IREE_RESTRICT out_statistics) {
+ IREE_STATISTICS({
+ iree_hal_vulkan_native_allocator_t* allocator =
+ iree_hal_vulkan_native_allocator_cast(base_allocator);
+ memcpy(out_statistics, &allocator->statistics, sizeof(*out_statistics));
+ });
+}
+
+static iree_status_t iree_hal_vulkan_native_allocator_query_memory_heaps(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ iree_host_size_t capacity,
+ iree_hal_allocator_memory_heap_t* IREE_RESTRICT heaps,
+ iree_host_size_t* IREE_RESTRICT out_count) {
+ iree_hal_vulkan_native_allocator_t* allocator =
+ iree_hal_vulkan_native_allocator_cast(base_allocator);
+ return iree_hal_vulkan_query_memory_heaps(
+ &allocator->device_props, &allocator->memory_props,
+ &allocator->memory_types, capacity, heaps, out_count);
+}
+
+static iree_hal_buffer_compatibility_t
+iree_hal_vulkan_native_allocator_query_buffer_compatibility(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ iree_hal_buffer_params_t* IREE_RESTRICT params,
+ iree_device_size_t* IREE_RESTRICT allocation_size) {
+ // TODO(benvanik): check to ensure the allocator can serve the memory type.
+
+ // All buffers can be allocated on the heap.
+ iree_hal_buffer_compatibility_t compatibility =
+ IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE;
+
+ if (iree_any_bit_set(params->usage, IREE_HAL_BUFFER_USAGE_TRANSFER)) {
+ compatibility |= IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_TRANSFER;
+ }
+
+ // Buffers can only be used on the queue if they are device visible.
+ if (iree_all_bits_set(params->type, IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE)) {
+ if (iree_any_bit_set(params->usage,
+ IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE)) {
+ compatibility |= IREE_HAL_BUFFER_COMPATIBILITY_QUEUE_DISPATCH;
+ }
+ }
+
+ // We are now optimal.
+ params->type &= ~IREE_HAL_MEMORY_TYPE_OPTIMAL;
+
+ // Guard against the corner case where the requested buffer size is 0. The
+ // application is unlikely to do anything when requesting a 0-byte buffer; but
+ // it can happen in real world use cases. So we should at least not crash.
+ if (*allocation_size == 0) *allocation_size = 4;
+
+ // Align allocation sizes to 4 bytes so shaders operating on 32 bit types can
+ // act safely even on buffer ranges that are not naturally aligned.
+ *allocation_size = iree_host_align(*allocation_size, 4);
+
+ return compatibility;
+}
+
+static void iree_hal_vulkan_native_allocator_native_buffer_release(
+ void* user_data, iree::hal::vulkan::VkDeviceHandle* logical_device,
+ VkDeviceMemory device_memory, VkBuffer handle) {
+ IREE_TRACE_FREE_NAMED(IREE_HAL_VULKAN_NATIVE_ALLOCATOR_ID, (void*)handle);
+ logical_device->syms()->vkDestroyBuffer(*logical_device, handle,
+ logical_device->allocator());
+ logical_device->syms()->vkFreeMemory(*logical_device, device_memory,
+ logical_device->allocator());
+}
+
+static iree_status_t iree_hal_vulkan_native_allocator_allocate_internal(
+ iree_hal_vulkan_native_allocator_t* IREE_RESTRICT allocator,
+ const iree_hal_buffer_params_t* IREE_RESTRICT params,
+ iree_device_size_t allocation_size, iree_const_byte_span_t initial_data,
+ iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
+ VkDeviceHandle* logical_device = allocator->logical_device;
+
+ // TODO(benvanik): if on a unified memory system and initial data is present
+ // we could set the mapping bit and ensure a much more efficient upload.
+
+ // Allocate the device memory we'll attach the buffer to.
+ VkMemoryAllocateInfo allocate_info = {};
+ allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
+ allocate_info.pNext = NULL;
+ allocate_info.memoryTypeIndex = 0;
+ allocate_info.allocationSize = allocation_size;
+ IREE_RETURN_IF_ERROR(iree_hal_vulkan_find_memory_type(
+ &allocator->device_props, &allocator->memory_props, params,
+ &allocate_info.memoryTypeIndex));
+ VkDeviceMemory device_memory = VK_NULL_HANDLE;
+ VK_RETURN_IF_ERROR(logical_device->syms()->vkAllocateMemory(
+ *logical_device, &allocate_info,
+ logical_device->allocator(), &device_memory),
+ "vkAllocateMemory");
+
+ // Create an initially unbound buffer handle.
+ VkBufferCreateInfo buffer_create_info = {};
+ buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
+ buffer_create_info.pNext = NULL;
+ buffer_create_info.flags = 0;
+ buffer_create_info.size = allocation_size;
+ buffer_create_info.usage = 0;
+ if (iree_all_bits_set(params->usage, IREE_HAL_BUFFER_USAGE_TRANSFER)) {
+ buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+ buffer_create_info.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+ }
+ if (iree_all_bits_set(params->usage,
+ IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE)) {
+ buffer_create_info.usage |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
+ buffer_create_info.usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+ buffer_create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
+ }
+ buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
+ buffer_create_info.queueFamilyIndexCount = 0;
+ buffer_create_info.pQueueFamilyIndices = NULL;
+ VkBuffer handle = VK_NULL_HANDLE;
+ iree_status_t status =
+ VK_RESULT_TO_STATUS(logical_device->syms()->vkCreateBuffer(
+ *logical_device, &buffer_create_info,
+ logical_device->allocator(), &handle),
+ "vkCreateBuffer");
+
+ iree_hal_vulkan_native_buffer_release_callback_t release_callback = {0};
+ release_callback.fn = iree_hal_vulkan_native_allocator_native_buffer_release;
+ release_callback.user_data = NULL;
+ iree_hal_buffer_t* buffer = NULL;
+ if (iree_status_is_ok(status)) {
+ status = iree_hal_vulkan_native_buffer_wrap(
+ (iree_hal_allocator_t*)allocator, params->type, params->access,
+ params->usage, allocation_size,
+ /*byte_offset=*/0,
+ /*byte_length=*/allocation_size, logical_device, device_memory, handle,
+ release_callback, &buffer);
+ }
+ if (!iree_status_is_ok(status)) {
+ // Early exit after cleaning up the buffer and allocation.
+ // After this point releasing the wrapping buffer will take care of this.
+ if (handle) {
+ logical_device->syms()->vkDestroyBuffer(*logical_device, handle,
+ logical_device->allocator());
+ }
+ if (device_memory) {
+ logical_device->syms()->vkFreeMemory(*logical_device, device_memory,
+ logical_device->allocator());
+ }
+ return status;
+ }
+
+ IREE_TRACE_ALLOC_NAMED(IREE_HAL_VULKAN_NATIVE_ALLOCATOR_ID, (void*)handle,
+ allocation_size);
+
+ // Bind the memory to the buffer.
+ if (iree_status_is_ok(status)) {
+ status = VK_RESULT_TO_STATUS(
+ logical_device->syms()->vkBindBufferMemory(
+ *logical_device, handle, device_memory, /*memoryOffset=*/0),
+ "vkBindBufferMemory");
+ }
+
+ // Copy the initial contents into the buffer. This may require staging.
+ if (iree_status_is_ok(status) &&
+ !iree_const_byte_span_is_empty(initial_data)) {
+ status = iree_hal_device_transfer_range(
+ allocator->device,
+ iree_hal_make_host_transfer_buffer_span((void*)initial_data.data,
+ initial_data.data_length),
+ 0, iree_hal_make_device_transfer_buffer(buffer), 0,
+ initial_data.data_length, IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT,
+ iree_infinite_timeout());
+ }
+
+ if (iree_status_is_ok(status)) {
+ iree_hal_allocator_statistics_record_alloc(
+ &allocator->statistics, params->type, buffer->allocation_size);
+ *out_buffer = buffer;
+ } else {
+ iree_hal_buffer_release(buffer);
+ }
+ return status;
+}
+
+static iree_status_t iree_hal_vulkan_native_allocator_allocate_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ const iree_hal_buffer_params_t* IREE_RESTRICT params,
+ iree_device_size_t allocation_size, iree_const_byte_span_t initial_data,
+ iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
+ iree_hal_vulkan_native_allocator_t* allocator =
+ iree_hal_vulkan_native_allocator_cast(base_allocator);
+
+ // Coerce options into those required by the current device.
+ iree_hal_buffer_params_t compat_params = *params;
+ if (!iree_all_bits_set(
+ iree_hal_vulkan_native_allocator_query_buffer_compatibility(
+ base_allocator, &compat_params, &allocation_size),
+ IREE_HAL_BUFFER_COMPATIBILITY_ALLOCATABLE)) {
+ return iree_make_status(
+ IREE_STATUS_INVALID_ARGUMENT,
+ "allocator cannot allocate a buffer with the given parameters");
+ }
+
+ return iree_hal_vulkan_native_allocator_allocate_internal(
+ allocator, &compat_params, allocation_size, initial_data, out_buffer);
+}
+
+static void iree_hal_vulkan_native_allocator_deallocate_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ iree_hal_buffer_t* IREE_RESTRICT base_buffer) {
+ iree_hal_vulkan_native_allocator_t* allocator =
+ iree_hal_vulkan_native_allocator_cast(base_buffer->device_allocator);
+ (void)allocator;
+ iree_hal_allocator_statistics_record_free(&allocator->statistics,
+ base_buffer->memory_type,
+ base_buffer->allocation_size);
+ iree_hal_buffer_destroy(base_buffer);
+}
+
+static iree_status_t iree_hal_vulkan_native_allocator_import_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ const iree_hal_buffer_params_t* IREE_RESTRICT params,
+ iree_hal_external_buffer_t* IREE_RESTRICT external_buffer,
+ iree_hal_buffer_release_callback_t release_callback,
+ iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
+ // TODO(#7242): use VK_EXT_external_memory_host to import memory.
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "importing from external buffers not supported");
+}
+
+static iree_status_t iree_hal_vulkan_native_allocator_export_buffer(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator,
+ iree_hal_buffer_t* IREE_RESTRICT buffer,
+ iree_hal_external_buffer_type_t requested_type,
+ iree_hal_external_buffer_flags_t requested_flags,
+ iree_hal_external_buffer_t* IREE_RESTRICT out_external_buffer) {
+ return iree_make_status(IREE_STATUS_UNAVAILABLE,
+ "exporting to external buffers not supported");
+}
+
+namespace {
+const iree_hal_allocator_vtable_t iree_hal_vulkan_native_allocator_vtable = {
+ /*.destroy=*/iree_hal_vulkan_native_allocator_destroy,
+ /*.host_allocator=*/iree_hal_vulkan_native_allocator_host_allocator,
+ /*.trim=*/iree_hal_vulkan_native_allocator_trim,
+ /*.query_statistics=*/iree_hal_vulkan_native_allocator_query_statistics,
+ /*.query_memory_heaps=*/iree_hal_vulkan_native_allocator_query_memory_heaps,
+ /*.query_buffer_compatibility=*/
+ iree_hal_vulkan_native_allocator_query_buffer_compatibility,
+ /*.allocate_buffer=*/iree_hal_vulkan_native_allocator_allocate_buffer,
+ /*.deallocate_buffer=*/iree_hal_vulkan_native_allocator_deallocate_buffer,
+ /*.import_buffer=*/iree_hal_vulkan_native_allocator_import_buffer,
+ /*.export_buffer=*/iree_hal_vulkan_native_allocator_export_buffer,
+};
+} // namespace
diff --git a/runtime/src/iree/hal/drivers/vulkan/native_allocator.h b/runtime/src/iree/hal/drivers/vulkan/native_allocator.h
new file mode 100644
index 0000000..9d6e524
--- /dev/null
+++ b/runtime/src/iree/hal/drivers/vulkan/native_allocator.h
@@ -0,0 +1,30 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_HAL_DRIVERS_VULKAN_NATIVE_ALLOCATOR_H_
+#define IREE_HAL_DRIVERS_VULKAN_NATIVE_ALLOCATOR_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/api.h"
+#include "iree/hal/drivers/vulkan/handle_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+// Creates a native Vulkan API-based allocator that directly allocates memory
+// from the underlying implementation with no pooling or suballocation.
+iree_status_t iree_hal_vulkan_native_allocator_create(
+ const iree_hal_vulkan_device_options_t* options, VkInstance instance,
+ VkPhysicalDevice physical_device,
+ iree::hal::vulkan::VkDeviceHandle* logical_device,
+ iree_hal_device_t* device, iree_hal_allocator_t** out_allocator);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif // __cplusplus
+
+#endif // IREE_HAL_DRIVERS_VULKAN_NATIVE_ALLOCATOR_H_
diff --git a/runtime/src/iree/hal/drivers/vulkan/registration/driver_module.cc b/runtime/src/iree/hal/drivers/vulkan/registration/driver_module.cc
index 514d302..a7182c7 100644
--- a/runtime/src/iree/hal/drivers/vulkan/registration/driver_module.cc
+++ b/runtime/src/iree/hal/drivers/vulkan/registration/driver_module.cc
@@ -36,6 +36,9 @@
IREE_FLAG(
bool, vulkan_dedicated_compute_queue, false,
"Use a dedicated queue with VK_QUEUE_COMPUTE_BIT for dispatch workloads.");
+IREE_FLAG(bool, vulkan_vma_allocator, true,
+ "Whether to use the VMA allocator instead of native Vulkan API "
+ "memory allocations.");
IREE_FLAG(
int64_t, vulkan_large_heap_block_size, 0,
"Preferred allocator block size for large allocations in bytes. Sets the\n"
@@ -82,6 +85,13 @@
driver_options.device_options.flags |=
IREE_HAL_VULKAN_DEVICE_FLAG_DEDICATED_COMPUTE_QUEUE;
}
+ if (FLAG_vulkan_vma_allocator) {
+ driver_options.device_options.flags |=
+ IREE_HAL_VULKAN_DEVICE_FLAG_VMA_ALLOCATOR;
+ } else {
+ driver_options.device_options.flags &=
+ ~IREE_HAL_VULKAN_DEVICE_FLAG_VMA_ALLOCATOR;
+ }
if (FLAG_vulkan_large_heap_block_size) {
driver_options.device_options.large_heap_block_size =
FLAG_vulkan_large_heap_block_size;
diff --git a/runtime/src/iree/hal/drivers/vulkan/vma_allocator.cc b/runtime/src/iree/hal/drivers/vulkan/vma_allocator.cc
index ad41e98..3a62218 100644
--- a/runtime/src/iree/hal/drivers/vulkan/vma_allocator.cc
+++ b/runtime/src/iree/hal/drivers/vulkan/vma_allocator.cc
@@ -257,6 +257,9 @@
#endif // IREE_STATISTICS_ENABLE
+static void iree_hal_vulkan_vma_allocator_destroy(
+ iree_hal_allocator_t* IREE_RESTRICT base_allocator);
+
iree_status_t iree_hal_vulkan_vma_allocator_create(
const iree_hal_vulkan_device_options_t* options, VkInstance instance,
VkPhysicalDevice physical_device, VkDeviceHandle* logical_device,
@@ -341,7 +344,7 @@
if (iree_status_is_ok(status)) {
*out_allocator = (iree_hal_allocator_t*)allocator;
} else {
- vmaDestroyAllocator(vma);
+ iree_hal_vulkan_vma_allocator_destroy((iree_hal_allocator_t*)allocator);
}
IREE_TRACE_ZONE_END(z0);
diff --git a/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc b/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc
index 0047243..10a6d6d 100644
--- a/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc
+++ b/runtime/src/iree/hal/drivers/vulkan/vulkan_device.cc
@@ -22,6 +22,7 @@
#include "iree/hal/drivers/vulkan/dynamic_symbols.h"
#include "iree/hal/drivers/vulkan/extensibility_util.h"
#include "iree/hal/drivers/vulkan/handle_util.h"
+#include "iree/hal/drivers/vulkan/native_allocator.h"
#include "iree/hal/drivers/vulkan/native_event.h"
#include "iree/hal/drivers/vulkan/native_pipeline_layout.h"
#include "iree/hal/drivers/vulkan/native_semaphore.h"
@@ -513,7 +514,7 @@
IREE_API_EXPORT void iree_hal_vulkan_device_options_initialize(
iree_hal_vulkan_device_options_t* out_options) {
memset(out_options, 0, sizeof(*out_options));
- out_options->flags = 0;
+ out_options->flags = IREE_HAL_VULKAN_DEVICE_FLAG_VMA_ALLOCATOR;
out_options->large_heap_block_size = 64 * 1024 * 1024;
}
@@ -711,9 +712,17 @@
// Create the device memory allocator that will service all buffer
// allocation requests.
- iree_status_t status = iree_hal_vulkan_vma_allocator_create(
- options, instance, physical_device, logical_device,
- (iree_hal_device_t*)device, &device->device_allocator);
+ iree_status_t status = iree_ok_status();
+ if (iree_all_bits_set(options->flags,
+ IREE_HAL_VULKAN_DEVICE_FLAG_VMA_ALLOCATOR)) {
+ status = iree_hal_vulkan_vma_allocator_create(
+ options, instance, physical_device, logical_device,
+ (iree_hal_device_t*)device, &device->device_allocator);
+ } else {
+ status = iree_hal_vulkan_native_allocator_create(
+ options, instance, physical_device, logical_device,
+ (iree_hal_device_t*)device, &device->device_allocator);
+ }
// Create command pools for each queue family. If we don't have a transfer
// queue then we'll ignore that one and just use the dispatch pool.