blob: b1f2778a65dcc45ef7fa7ffa10685bb51248c46f [file] [log] [blame]
// Copyright 2023 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include "iree/hal/drivers/vulkan/sparse_buffer.h"
#include <cstddef>
#include <cstdint>
#include <cstring>
#include "iree/base/api.h"
#include "iree/hal/drivers/vulkan/base_buffer.h"
#include "iree/hal/drivers/vulkan/status_util.h"
typedef struct iree_hal_vulkan_sparse_buffer_t {
iree_hal_vulkan_base_buffer_t base;
iree::hal::vulkan::VkDeviceHandle* logical_device;
iree_host_size_t physical_block_count;
VkDeviceMemory physical_blocks[];
} iree_hal_vulkan_sparse_buffer_t;
namespace {
extern const iree_hal_buffer_vtable_t iree_hal_vulkan_sparse_buffer_vtable;
} // namespace
static iree_hal_vulkan_sparse_buffer_t* iree_hal_vulkan_sparse_buffer_cast(
iree_hal_buffer_t* base_value) {
IREE_HAL_ASSERT_TYPE(base_value, &iree_hal_vulkan_sparse_buffer_vtable);
return (iree_hal_vulkan_sparse_buffer_t*)base_value;
}
static iree_status_t iree_hal_vulkan_sparse_buffer_commit_sync(
iree::hal::vulkan::VkDeviceHandle* logical_device, VkQueue queue,
VkBuffer handle, VkMemoryRequirements requirements,
uint32_t memory_type_index, VkDeviceSize physical_block_size,
iree_host_size_t physical_block_count,
VkDeviceMemory out_physical_blocks[]) {
IREE_TRACE_ZONE_BEGIN(z0);
IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)requirements.size);
IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)requirements.alignment);
IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)physical_block_size);
IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)physical_block_count);
// Allocate all physical blocks; note that the last block may be of partial
// size and we'll just allocate whatever remains from the total requested
// size.
VkMemoryAllocateInfo allocate_info;
allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocate_info.pNext = NULL;
allocate_info.memoryTypeIndex = memory_type_index;
VkSparseMemoryBind* binds = (VkSparseMemoryBind*)iree_alloca(
sizeof(VkSparseMemoryBind) * physical_block_count);
for (iree_host_size_t i = 0; i < physical_block_count; ++i) {
if (i < physical_block_count - 1) {
allocate_info.allocationSize = physical_block_size;
} else {
allocate_info.allocationSize =
requirements.size - physical_block_size * (physical_block_count - 1);
}
IREE_TRACE_ZONE_BEGIN_NAMED(z1, "vkAllocateMemory");
IREE_TRACE_ZONE_APPEND_VALUE_I64(z1, (int64_t)allocate_info.allocationSize);
iree_status_t allocate_status = VK_RESULT_TO_STATUS(
logical_device->syms()->vkAllocateMemory(
*logical_device, &allocate_info, logical_device->allocator(),
&out_physical_blocks[i]),
"vkAllocateMemory");
IREE_TRACE_ZONE_END(z1);
IREE_RETURN_AND_END_ZONE_IF_ERROR(z0, allocate_status);
binds[i].resourceOffset = i * physical_block_size;
binds[i].size = allocate_info.allocationSize;
binds[i].memory = out_physical_blocks[i];
binds[i].memoryOffset = 0;
binds[i].flags = 0;
}
// Temporary fence for enforcing host-synchronous execution.
VkFenceCreateInfo fence_info;
fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
fence_info.pNext = NULL;
fence_info.flags = 0;
VkFence fence = VK_NULL_HANDLE;
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, VK_RESULT_TO_STATUS(logical_device->syms()->vkCreateFence(
*logical_device, &fence_info,
logical_device->allocator(), &fence),
"vkCreateFence"));
IREE_TRACE_ZONE_BEGIN_NAMED(z1, "vkQueueBindSparse");
// Enqueue sparse binding operation. This will complete asynchronously.
VkSparseBufferMemoryBindInfo memory_bind_info;
memory_bind_info.buffer = handle;
memory_bind_info.bindCount = (uint32_t)physical_block_count;
memory_bind_info.pBinds = binds;
VkBindSparseInfo bind_info;
memset(&bind_info, 0, sizeof(bind_info));
bind_info.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO;
bind_info.pNext = NULL;
bind_info.bufferBindCount = 1;
bind_info.pBufferBinds = &memory_bind_info;
iree_status_t status = VK_RESULT_TO_STATUS(
logical_device->syms()->vkQueueBindSparse(queue, 1, &bind_info, fence),
"vkQueueBindSparse");
// If enqueuing succeeded then wait for the binding to finish.
if (iree_status_is_ok(status)) {
status = VK_RESULT_TO_STATUS(
logical_device->syms()->vkWaitForFences(
*logical_device, 1, &fence, /*waitAll=*/VK_TRUE, UINT64_MAX),
"vkWaitForFences");
}
IREE_TRACE_ZONE_END(z1);
logical_device->syms()->vkDestroyFence(*logical_device, fence,
logical_device->allocator());
IREE_TRACE_ZONE_END(z0);
return status;
}
iree_status_t iree_hal_vulkan_sparse_buffer_create_bound_sync(
iree_hal_buffer_placement_t placement, iree_hal_memory_type_t memory_type,
iree_hal_memory_access_t allowed_access,
iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size,
iree_device_size_t byte_offset, iree_device_size_t byte_length,
iree::hal::vulkan::VkDeviceHandle* logical_device, VkQueue queue,
VkBuffer handle, VkMemoryRequirements requirements,
uint32_t memory_type_index, VkDeviceSize max_allocation_size,
iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer) {
IREE_ASSERT_ARGUMENT(placement.device);
IREE_ASSERT_ARGUMENT(logical_device);
IREE_ASSERT_ARGUMENT(handle);
IREE_ASSERT_ARGUMENT(out_buffer);
IREE_TRACE_ZONE_BEGIN(z0);
IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)allocation_size);
// The maximum allocation size reported by Vulkan does not need to be a power
// of two or aligned to anything in particular - sparse buffers do require
// alignment though and must also be under the limit so here we adjust down to
// the maximum aligned value.
iree_device_size_t physical_block_size =
iree_device_size_floor_div(max_allocation_size, requirements.alignment) *
requirements.alignment;
// ceil-div for the number of blocks as the last block may be partial.
iree_host_size_t physical_block_count =
(iree_host_size_t)iree_device_size_ceil_div(requirements.size,
physical_block_size);
iree_hal_vulkan_sparse_buffer_t* buffer = NULL;
iree_host_size_t total_size =
iree_host_align(sizeof(*buffer), iree_max_align_t) +
sizeof(buffer->physical_blocks[0]) * physical_block_count;
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_allocator_malloc(host_allocator, total_size, (void**)&buffer));
iree_hal_buffer_initialize(
placement, &buffer->base.base, allocation_size, byte_offset, byte_length,
memory_type, allowed_access, allowed_usage,
&iree_hal_vulkan_sparse_buffer_vtable, &buffer->base.base);
buffer->base.host_allocator = host_allocator;
buffer->base.handle = handle;
buffer->logical_device = logical_device;
buffer->physical_block_count = physical_block_count;
// Synchronously commit all physical blocks and bind them to the buffer.
iree_status_t status = iree_hal_vulkan_sparse_buffer_commit_sync(
logical_device, queue, handle, requirements, memory_type_index,
physical_block_size, physical_block_count, buffer->physical_blocks);
if (iree_status_is_ok(status)) {
*out_buffer = &buffer->base.base;
} else {
iree_hal_buffer_destroy((iree_hal_buffer_t*)buffer);
}
IREE_TRACE_ZONE_END(z0);
return status;
}
static void iree_hal_vulkan_sparse_buffer_destroy(
iree_hal_buffer_t* base_buffer) {
iree_hal_vulkan_sparse_buffer_t* buffer =
iree_hal_vulkan_sparse_buffer_cast(base_buffer);
iree::hal::vulkan::VkDeviceHandle* logical_device = buffer->logical_device;
iree_allocator_t host_allocator = buffer->base.host_allocator;
IREE_TRACE_ZONE_BEGIN(z0);
IREE_TRACE_ZONE_APPEND_VALUE_I64(
z0, (int64_t)iree_hal_buffer_allocation_size(base_buffer));
// Destroy buffer prior to freeing physical blocks.
if (buffer->base.handle != VK_NULL_HANDLE) {
logical_device->syms()->vkDestroyBuffer(
*logical_device, buffer->base.handle, logical_device->allocator());
}
for (iree_host_size_t i = 0; i < buffer->physical_block_count; ++i) {
if (buffer->physical_blocks[i] != VK_NULL_HANDLE) {
logical_device->syms()->vkFreeMemory(*logical_device,
buffer->physical_blocks[i],
logical_device->allocator());
}
}
iree_allocator_free(host_allocator, buffer);
IREE_TRACE_ZONE_END(z0);
}
static iree_status_t iree_hal_vulkan_sparse_buffer_map_range(
iree_hal_buffer_t* base_buffer, iree_hal_mapping_mode_t mapping_mode,
iree_hal_memory_access_t memory_access,
iree_device_size_t local_byte_offset, iree_device_size_t local_byte_length,
iree_hal_buffer_mapping_t* mapping) {
return iree_make_status(IREE_STATUS_UNAVAILABLE,
"sparse buffers do not support mapping");
}
static iree_status_t iree_hal_vulkan_sparse_buffer_unmap_range(
iree_hal_buffer_t* base_buffer, iree_device_size_t local_byte_offset,
iree_device_size_t local_byte_length, iree_hal_buffer_mapping_t* mapping) {
return iree_make_status(IREE_STATUS_UNAVAILABLE,
"sparse buffers do not support mapping");
}
static iree_status_t iree_hal_vulkan_sparse_buffer_invalidate_range(
iree_hal_buffer_t* base_buffer, iree_device_size_t local_byte_offset,
iree_device_size_t local_byte_length) {
return iree_make_status(IREE_STATUS_UNAVAILABLE,
"sparse buffers do not support mapping");
}
static iree_status_t iree_hal_vulkan_sparse_buffer_flush_range(
iree_hal_buffer_t* base_buffer, iree_device_size_t local_byte_offset,
iree_device_size_t local_byte_length) {
return iree_make_status(IREE_STATUS_UNAVAILABLE,
"sparse buffers do not support mapping");
}
namespace {
const iree_hal_buffer_vtable_t iree_hal_vulkan_sparse_buffer_vtable = {
/*.recycle=*/iree_hal_buffer_recycle,
/*.destroy=*/iree_hal_vulkan_sparse_buffer_destroy,
/*.map_range=*/iree_hal_vulkan_sparse_buffer_map_range,
/*.unmap_range=*/iree_hal_vulkan_sparse_buffer_unmap_range,
/*.invalidate_range=*/iree_hal_vulkan_sparse_buffer_invalidate_range,
/*.flush_range=*/iree_hal_vulkan_sparse_buffer_flush_range,
};
} // namespace