blob: a12e2ab555abe4e06cbd059b22c97dc7cf55fd71 [file]
// Copyright 2026 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include "iree/hal/drivers/vulkan/queue.h"
#include <stdio.h>
#include <string.h>
#include "iree/async/notification.h"
#include "iree/async/operations/file.h"
#include "iree/async/operations/scheduling.h"
#include "iree/async/proactor.h"
#include "iree/base/threading/notification.h"
#include "iree/hal/drivers/vulkan/buffer.h"
#include "iree/hal/drivers/vulkan/command_buffer.h"
#include "iree/hal/drivers/vulkan/executable.h"
#include "iree/hal/drivers/vulkan/sparse_buffer.h"
#include "iree/hal/local/transient_buffer.h"
#include "iree/hal/utils/memory_file.h"
#define IREE_HAL_VULKAN_QUEUE_DESCRIPTOR_SLOT_ABSENT UINT32_MAX
#define IREE_HAL_VULKAN_QUEUE_DESCRIPTOR_SLOT_RESERVED UINT64_MAX
#define IREE_HAL_VULKAN_QUEUE_BUILTIN_DESCRIPTOR_SET_COUNT_MAX 2
#define IREE_HAL_VULKAN_QUEUE_COMMAND_BUFFER_SLOT_ABSENT UINT32_MAX
#define IREE_HAL_VULKAN_QUEUE_COMMAND_BUFFER_SLOT_RESERVED UINT64_MAX
#define IREE_HAL_VULKAN_QUEUE_COMMAND_BUFFER_BLOCK_CAPACITY 64
#define IREE_HAL_VULKAN_QUEUE_NATIVE_REPLAY_OWNER_RESERVED UINT64_MAX
#define IREE_HAL_VULKAN_QUEUE_NATIVE_DESCRIPTOR_BLOCK_SET_CAPACITY 4096
#define IREE_HAL_VULKAN_QUEUE_BDA_PUBLICATION_BLOCK_SIZE (64ull * 1024ull)
#define IREE_HAL_VULKAN_QUEUE_BDA_PUBLICATION_ALIGNMENT 8ull
#define IREE_HAL_VULKAN_QUEUE_TIMESTAMP_QUERY_BLOCK_CAPACITY (64 * 1024)
#define IREE_HAL_VULKAN_QUEUE_DISPATCH_INLINE_SET_CAPACITY 8
#define IREE_HAL_VULKAN_QUEUE_DISPATCH_INLINE_BINDING_CAPACITY 16
typedef void(IREE_API_PTR* iree_hal_vulkan_queue_completion_action_fn_t)(
void* user_data, iree_status_t completion_status);
typedef struct iree_hal_vulkan_queue_completion_action_t {
// Completion callback invoked after the queue epoch retires.
iree_hal_vulkan_queue_completion_action_fn_t fn;
// User data passed to |fn|.
void* user_data;
// Optional resource retained until |fn| has been invoked or cancelled.
iree_hal_resource_t* resource;
} iree_hal_vulkan_queue_completion_action_t;
static iree_hal_vulkan_queue_completion_action_t
iree_hal_vulkan_queue_completion_action_null(void) {
return (iree_hal_vulkan_queue_completion_action_t){0};
}
struct iree_hal_vulkan_queue_command_buffer_block_t {
// Next block in the queue-owned command buffer cache.
iree_hal_vulkan_queue_command_buffer_block_t* next;
// Command pool backing command_buffers.
VkCommandPool pool;
// Primary command buffers leased by one-shot queue submissions.
VkCommandBuffer
command_buffers[IREE_HAL_VULKAN_QUEUE_COMMAND_BUFFER_BLOCK_CAPACITY];
// Owning queue epoch for each command buffer, 0 when free, or RESERVED.
uint64_t owner_epochs[IREE_HAL_VULKAN_QUEUE_COMMAND_BUFFER_BLOCK_CAPACITY];
// Whether a free command buffer must be reset before its next recording.
bool needs_reset[IREE_HAL_VULKAN_QUEUE_COMMAND_BUFFER_BLOCK_CAPACITY];
// Next slot considered when acquiring from this block.
uint32_t next_slot;
// Number of command buffers whose |owner_epochs| entry is 0.
uint32_t free_count;
};
typedef struct iree_hal_vulkan_queue_command_buffer_lease_t {
// Command buffer block containing the leased command buffer.
iree_hal_vulkan_queue_command_buffer_block_t* block;
// Command buffer slot within |block|.
uint32_t slot;
} iree_hal_vulkan_queue_command_buffer_lease_t;
struct iree_hal_vulkan_queue_descriptor_block_t {
// Next block in the queue-owned descriptor cache.
iree_hal_vulkan_queue_descriptor_block_t* next;
// Descriptor pool backing |sets|.
VkDescriptorPool pool;
// Descriptor sets leased by built-in queue operations.
VkDescriptorSet sets[IREE_HAL_VULKAN_QUEUE_DESCRIPTOR_BLOCK_CAPACITY];
// Owning queue epoch for each descriptor set, or 0 when free.
uint64_t owner_epochs[IREE_HAL_VULKAN_QUEUE_DESCRIPTOR_BLOCK_CAPACITY];
// Next slot considered when acquiring from this block.
uint32_t next_slot;
// Number of descriptor sets whose |owner_epochs| entry is 0.
uint32_t free_count;
};
typedef struct iree_hal_vulkan_queue_descriptor_lease_t {
// Descriptor block containing |set|.
iree_hal_vulkan_queue_descriptor_block_t* block;
// Descriptor set slot within |block|.
uint32_t slot;
} iree_hal_vulkan_queue_descriptor_lease_t;
struct iree_hal_vulkan_queue_native_descriptor_block_t {
// Next block in the queue-owned native descriptor pool cache.
iree_hal_vulkan_queue_native_descriptor_block_t* next;
// Descriptor pool backing all allocations from this block.
VkDescriptorPool pool;
// Total descriptor capacity before the pool must be reset.
iree_hal_vulkan_command_buffer_descriptor_requirements_t capacity;
// Descriptor capacity allocated since the last pool reset.
iree_hal_vulkan_command_buffer_descriptor_requirements_t allocated;
// Number of submissions still using descriptor sets from this block.
uint32_t active_lease_count;
};
typedef struct iree_hal_vulkan_queue_native_descriptor_lease_t {
// Native descriptor block containing this submission's descriptor pool.
iree_hal_vulkan_queue_native_descriptor_block_t* block;
} iree_hal_vulkan_queue_native_descriptor_lease_t;
struct iree_hal_vulkan_queue_bda_publication_block_t {
// Next block in the queue-owned BDA publication cache.
iree_hal_vulkan_queue_bda_publication_block_t* next;
// BDA-capable host-visible buffer backing published tables.
iree_hal_buffer_t* buffer;
// Persistent host mapping of |buffer|.
iree_hal_buffer_mapping_t mapping;
// Device address of |buffer| byte offset zero.
VkDeviceAddress device_address;
// Total byte capacity of |buffer|.
iree_device_size_t capacity;
// Bump allocation position within |buffer|.
iree_device_size_t allocated_length;
// Number of submissions retaining ranges from this block.
uint32_t active_lease_count;
};
typedef struct iree_hal_vulkan_queue_bda_publication_lease_t {
// BDA publication block containing this submission's range.
iree_hal_vulkan_queue_bda_publication_block_t* block;
// Byte offset of the leased range within |block|.
iree_device_size_t offset;
// Byte length of the leased range.
iree_device_size_t length;
} iree_hal_vulkan_queue_bda_publication_lease_t;
struct iree_hal_vulkan_queue_timestamp_query_block_t {
// Next block in the queue-owned timestamp query cache.
iree_hal_vulkan_queue_timestamp_query_block_t* next;
// Query pool backing |query_values|.
VkQueryPool pool;
// Host storage used for retired timestamp query results.
uint64_t* query_values;
// Total query slot capacity of |pool|.
uint32_t capacity;
// Bump allocation position within |pool|.
uint32_t allocated_count;
// Number of submissions retaining ranges from this block.
uint32_t active_lease_count;
};
typedef struct iree_hal_vulkan_queue_timestamp_query_lease_t {
// Timestamp query block containing the leased range.
iree_hal_vulkan_queue_timestamp_query_block_t* block;
// First query slot in |block|.
uint32_t first_query;
// Number of query slots in the lease.
uint32_t query_count;
} iree_hal_vulkan_queue_timestamp_query_lease_t;
struct iree_hal_vulkan_queue_native_replay_t {
// Next native replay instance in the queue-owned cache.
iree_hal_vulkan_queue_native_replay_t* next;
// HAL command buffer whose command program was recorded.
iree_hal_command_buffer_t* command_buffer;
// Recorded native Vulkan command buffer.
VkCommandBuffer native_command_buffer;
// Command-buffer cache lease held for native_command_buffer.
iree_hal_vulkan_queue_command_buffer_lease_t command_buffer_lease;
// Stable BDA publication storage baked into native_command_buffer.
iree_hal_vulkan_queue_bda_publication_lease_t bda_publication_lease;
// Last resolved BDA slot snapshot for this replay instance.
iree_hal_vulkan_command_buffer_bda_binding_slot_t* bda_binding_slots;
// Number of entries in bda_binding_slots.
iree_host_size_t bda_binding_slot_count;
// Whether bda_binding_slots matches the current publication contents.
bool bda_binding_slots_valid;
// Queue epoch owning this replay instance, 0 when free, or RESERVED.
uint64_t owner_epoch;
};
typedef iree_status_t(
IREE_API_PTR* iree_hal_vulkan_queue_record_native_submission_fn_t)(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission);
typedef enum iree_hal_vulkan_queue_submission_kind_e {
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_BARRIER = 0,
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_HOST_CALL = 1,
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_FILL = 2,
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_UPDATE = 3,
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_COPY = 4,
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_EXECUTE = 5,
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_DISPATCH = 6,
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_ALLOCA = 7,
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_DEALLOCA = 8,
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_SPARSE_BIND = 9,
} iree_hal_vulkan_queue_submission_kind_t;
typedef enum iree_hal_vulkan_queue_deferred_state_e {
// Linked on the deferred list and waiting for software dependencies.
IREE_HAL_VULKAN_QUEUE_DEFERRED_STATE_PENDING = 0,
// A dependency wait operation is being armed by the submitting thread.
IREE_HAL_VULKAN_QUEUE_DEFERRED_STATE_ARMING = 1,
// A dependency callback owns promotion from deferred to ready.
IREE_HAL_VULKAN_QUEUE_DEFERRED_STATE_PROMOTING = 2,
// Linked on the ready list for native submission by the completion thread.
IREE_HAL_VULKAN_QUEUE_DEFERRED_STATE_READY = 3,
// Cancellation owns the unsubmitted node after unlinking it from deferred.
IREE_HAL_VULKAN_QUEUE_DEFERRED_STATE_CANCELLING = 4,
} iree_hal_vulkan_queue_deferred_state_t;
typedef enum iree_hal_vulkan_queue_alloca_memory_wait_kind_e {
// No active queue-owned memory readiness wait.
IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_NONE = 0,
// Staged backing is reserved but cannot be used until a pool death frontier
// retires.
IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_FRONTIER = 1,
// No reservation is currently available; retry after the pool notification
// advances.
IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_POOL_NOTIFICATION = 2,
} iree_hal_vulkan_queue_alloca_memory_wait_kind_t;
typedef struct iree_hal_vulkan_queue_wait_entry_t {
// Async semaphore timepoint registered for one software-resolved wait.
iree_async_semaphore_timepoint_t timepoint;
// Deferred submission owning this wait entry.
iree_hal_vulkan_queue_pending_submission_t* submission;
// Set to non-zero after the timepoint callback's final access completes.
iree_atomic_int32_t callback_complete;
} iree_hal_vulkan_queue_wait_entry_t;
typedef struct iree_hal_vulkan_queue_wait_resolution_t {
// Native Vulkan wait semaphores resolved from queue last-signal metadata.
VkSemaphoreSubmitInfo* wait_infos;
// Number of populated wait_infos entries.
uint32_t wait_info_count;
// Capacity of the wait_infos storage.
uint32_t wait_info_capacity;
// Whether unresolved waits require software timepoint deferral.
bool needs_deferral;
} iree_hal_vulkan_queue_wait_resolution_t;
typedef struct iree_hal_vulkan_queue_submission_result_t {
// Whether ownership moved to the native pending queue.
bool submitted;
// Submission parked on a queue-owned memory readiness wait.
iree_hal_vulkan_queue_pending_submission_t* memory_wait_submission;
// Sticky queue failure status recorded during native submission.
iree_status_t queue_failure_status;
} iree_hal_vulkan_queue_submission_result_t;
struct iree_hal_vulkan_queue_pending_submission_t {
// Next submission in the queue-owned list currently holding this object.
iree_hal_vulkan_queue_pending_submission_t* next;
// Queue owning this submission.
iree_hal_vulkan_queue_t* queue;
// Queue epoch signaled by the native Vulkan submit.
uint64_t epoch;
// Queue frontier published by this submission.
iree_hal_vulkan_queue_frontier_t frontier;
// Wait semaphores retained until the native submit retires.
iree_hal_semaphore_list_t wait_semaphore_list;
// Signal semaphores retained until completion side effects retire.
iree_hal_semaphore_list_t signal_semaphore_list;
// Submission kind controlling completion-side actions.
iree_hal_vulkan_queue_submission_kind_t kind;
// HAL-native profiling metadata captured for this submission.
struct {
// Recorder active when the submission was captured. Borrowed.
iree_hal_local_profile_recorder_t* recorder;
// Queue metadata scope active when the submission was captured.
iree_hal_local_profile_queue_scope_t scope;
// Queue event type corresponding to |kind|.
iree_hal_profile_queue_event_type_t type;
// Queue event flags accumulated during dependency resolution.
iree_hal_profile_queue_event_flags_t flags;
// Strategy used to satisfy wait dependencies.
iree_hal_profile_queue_dependency_strategy_t dependency_strategy;
// Host timestamp when the queue operation was captured.
iree_time_t submit_host_time_ns;
// Host timestamp when the operation was accepted for native/deferred work.
iree_time_t ready_host_time_ns;
// Profile submission id assigned to this queue operation.
uint64_t submission_id;
// Number of payload operations represented by this queue operation.
uint32_t operation_count;
// Type-specific payload length represented by this queue operation.
uint64_t payload_length;
// Set after the queue event has been appended to the recorder.
bool queue_event_recorded;
// Timestamp query cache lease held by this submission.
iree_hal_vulkan_queue_timestamp_query_lease_t timestamp_query_lease;
// Query pool receiving device timestamps, or VK_NULL_HANDLE.
VkQueryPool query_pool;
// First query slot leased from query_pool.
uint32_t first_query;
// Number of query slots leased from query_pool.
uint32_t query_count;
// Query index written before native command payloads, or ABSENT.
uint32_t queue_start_query;
// Query index written after native command payloads, or ABSENT.
uint32_t queue_end_query;
// First query slot for per-dispatch timestamp pairs, or ABSENT.
uint32_t dispatch_base_query;
// Number of dispatch commands with per-dispatch timestamp pairs.
uint32_t dispatch_query_count;
// Host storage receiving query results after native completion.
uint64_t* query_values;
// Set after the queue device event has been appended to the recorder.
bool queue_device_event_recorded;
} profile;
// Native Vulkan command buffer submitted for GPU-encoded work.
VkCommandBuffer native_command_buffer;
// Optional late native recorder invoked after wait dependencies resolve.
iree_hal_vulkan_queue_record_native_submission_fn_t record_native_submission;
// Command buffer cache lease held by native_command_buffer.
iree_hal_vulkan_queue_command_buffer_lease_t native_command_buffer_lease;
// Descriptor cache leases held by built-in queue operations.
iree_hal_vulkan_queue_descriptor_lease_t native_descriptor_leases
[IREE_HAL_VULKAN_QUEUE_BUILTIN_DESCRIPTOR_SET_COUNT_MAX];
// Number of entries populated in native_descriptor_leases.
uint32_t native_descriptor_slot_count;
// Descriptor-pool cache lease held by native command recording.
iree_hal_vulkan_queue_native_descriptor_lease_t native_descriptor_lease;
// BDA publication cache lease held by native command recording.
iree_hal_vulkan_queue_bda_publication_lease_t bda_publication_lease;
// Cached native replay instance used by command-buffer execution.
iree_hal_vulkan_queue_native_replay_t* native_replay;
// Optional action invoked after native queue completion.
iree_hal_vulkan_queue_completion_action_t completion_action;
// Queue-ordered sparse buffer memory binding payload.
struct {
// Sparse buffer receiving the memory binds.
VkBuffer buffer;
// Queue-owned sparse memory bind array.
VkSparseMemoryBind* binds;
// Number of populated entries in |binds|.
uint32_t bind_count;
} sparse_bind;
// Wait entries registered for software-resolved wait-before-signal edges.
iree_hal_vulkan_queue_wait_entry_t* wait_entries;
// Number of entries in wait_entries.
iree_host_size_t wait_entry_count;
// Outstanding software wait callbacks before the submission becomes ready.
iree_atomic_int32_t wait_count;
// Lifecycle owner for software-deferred submission promotion/cancellation.
iree_atomic_int32_t deferred_state;
// First software wait failure status. Owned by this submission.
iree_atomic_intptr_t wait_failure_status;
// Wakes cancellation or promotion owners waiting for callbacks to quiesce.
iree_notification_t callback_notification;
// Operation payload selected by kind.
union {
// Queue-ordered host action payload.
struct {
// Host callback and user data captured from queue_host_call.
iree_hal_host_call_t call;
// User arguments copied from queue_host_call.
uint64_t args[4];
// HAL host-call flags captured from queue_host_call.
iree_hal_host_call_flags_t flags;
} host_call;
// Native buffer fill payload.
struct {
// Target buffer retained until the fill retires.
iree_hal_buffer_t* target_buffer;
// Target byte offset captured from queue_fill.
iree_device_size_t target_offset;
// Number of bytes to fill in the target buffer.
iree_device_size_t length;
// Fill pattern bytes captured from queue_fill.
uint8_t pattern[4];
// Number of bytes in pattern.
iree_host_size_t pattern_length;
// HAL fill flags captured from queue_fill.
iree_hal_fill_flags_t flags;
} fill;
// Native buffer update payload.
struct {
// Source bytes copied from the queue_update caller.
void* source_data;
// Target buffer retained until the update retires.
iree_hal_buffer_t* target_buffer;
// Target byte offset captured from queue_update.
iree_device_size_t target_offset;
// Number of bytes to copy into the target buffer.
iree_device_size_t length;
// HAL update flags captured from queue_update.
iree_hal_update_flags_t flags;
} update;
// Host-mediated buffer copy payload.
struct {
// Source buffer retained until the copy retires.
iree_hal_buffer_t* source_buffer;
// Source byte offset captured from queue_copy.
iree_device_size_t source_offset;
// Target buffer retained until the copy retires.
iree_hal_buffer_t* target_buffer;
// Target byte offset captured from queue_copy.
iree_device_size_t target_offset;
// Number of bytes to copy into the target buffer.
iree_device_size_t length;
// HAL copy flags captured from queue_copy.
iree_hal_copy_flags_t flags;
} copy;
// Queue-ordered allocation payload.
struct {
// Transient buffer retained until the alloca retires.
iree_hal_buffer_t* buffer;
// Backing strategy selected by the allocator.
iree_hal_vulkan_queue_alloca_strategy_t strategy;
// Borrowed allocator used by the sparse strategy.
iree_hal_allocator_t* allocator;
// Borrowed pool used by the pool strategy.
iree_hal_pool_t* pool;
// Buffer parameters captured from queue_alloca after normalization.
iree_hal_buffer_params_t params;
// Physical allocation size requested from pool.
iree_device_size_t allocation_size;
// HAL allocation flags captured from queue_alloca.
iree_hal_alloca_flags_t flags;
// Pool reservation flags used when probing pool.
iree_hal_pool_reserve_flags_t reserve_flags;
// Active memory-readiness wait kind.
iree_hal_vulkan_queue_alloca_memory_wait_kind_t memory_wait_kind;
// Set to non-zero after the memory wait callback's final access
// completes.
iree_atomic_int32_t memory_wait_callback_complete;
// Pool death frontier borrowed while buffer keeps the reservation armed.
const iree_async_frontier_t* wait_frontier;
// Tracker waiter storage for wait_frontier.
iree_async_frontier_waiter_t frontier_waiter;
// Pool notification borrowed while a notification wait is active.
iree_async_notification_t* pool_notification;
// Notification epoch observed before retrying pool reservation.
uint32_t pool_notification_wait_token;
// Whether an observe scope is held until the wait operation is submitted.
bool pool_notification_observation_held;
// Async wait operations rotated so callbacks can arm another wait.
iree_async_notification_wait_operation_t pool_notification_wait_ops[2];
// Active slot in pool_notification_wait_ops.
uint8_t pool_notification_wait_slot;
} alloca;
// Queue-ordered deallocation payload.
struct {
// Transient buffer retained until the dealloca retires.
iree_hal_buffer_t* buffer;
// HAL dealloca flags captured from queue_dealloca.
iree_hal_dealloca_flags_t flags;
} dealloca;
// Recorded command-buffer execution payload.
struct {
// Command buffer retained until execution retires.
iree_hal_command_buffer_t* command_buffer;
// Binding table entries copied into the submission allocation tail.
iree_hal_buffer_binding_t* binding_table_bindings;
// Number of entries populated in binding_table_bindings.
iree_host_size_t binding_table_count;
// BDA slot-address cache copied into the submission allocation tail.
iree_hal_vulkan_command_buffer_bda_binding_slot_t* bda_binding_slots;
// Number of entries populated in bda_binding_slots.
iree_host_size_t bda_binding_slot_count;
// HAL execute flags captured from queue_execute.
iree_hal_execute_flags_t flags;
} execute;
// Direct dispatch payload.
struct {
// Executable retained until dispatch completion.
iree_hal_executable_t* executable;
// Export ordinal captured from queue_dispatch.
iree_hal_executable_export_ordinal_t export_ordinal;
// Dispatch workgroup configuration captured from queue_dispatch.
iree_hal_dispatch_config_t config;
// Push constant bytes copied into the submission allocation tail.
void* constants_data;
// Number of bytes in constants_data.
iree_host_size_t constants_data_length;
// Direct buffer bindings copied into the submission allocation tail.
iree_hal_buffer_ref_t* bindings;
// Number of entries populated in bindings.
iree_host_size_t binding_count;
// HAL dispatch flags captured from queue_dispatch.
iree_hal_dispatch_flags_t flags;
} dispatch;
};
};
static iree_status_t iree_hal_vulkan_queue_create_timeline_semaphore(
const iree_hal_vulkan_device_syms_t* syms, VkDevice logical_device,
uint64_t initial_value, VkSemaphore* out_semaphore) {
*out_semaphore = VK_NULL_HANDLE;
VkSemaphoreTypeCreateInfo timeline_create_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
.initialValue = initial_value,
};
VkSemaphoreCreateInfo create_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
.pNext = &timeline_create_info,
};
return iree_vkCreateSemaphore(IREE_VULKAN_DEVICE(syms), logical_device,
&create_info, /*pAllocator=*/NULL,
out_semaphore);
}
static iree_status_t iree_hal_vulkan_queue_create_command_pool(
const iree_hal_vulkan_device_syms_t* syms, VkDevice logical_device,
uint32_t queue_family_index, VkCommandPool* out_command_pool) {
*out_command_pool = VK_NULL_HANDLE;
VkCommandPoolCreateInfo create_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
.queueFamilyIndex = queue_family_index,
};
return iree_vkCreateCommandPool(IREE_VULKAN_DEVICE(syms), logical_device,
&create_info, /*pAllocator=*/NULL,
out_command_pool);
}
static iree_status_t iree_hal_vulkan_queue_descriptor_block_create(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_descriptor_block_t** out_block) {
*out_block = NULL;
iree_hal_vulkan_queue_descriptor_block_t* block = NULL;
IREE_RETURN_IF_ERROR(iree_allocator_malloc(queue->host_allocator,
sizeof(*block), (void**)&block));
memset(block, 0, sizeof(*block));
block->free_count = IREE_HAL_VULKAN_QUEUE_DESCRIPTOR_BLOCK_CAPACITY;
VkDescriptorPoolSize pool_size = {
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = IREE_HAL_VULKAN_QUEUE_DESCRIPTOR_BLOCK_CAPACITY,
};
VkDescriptorPoolCreateInfo pool_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.maxSets = IREE_HAL_VULKAN_QUEUE_DESCRIPTOR_BLOCK_CAPACITY,
.poolSizeCount = 1,
.pPoolSizes = &pool_size,
};
iree_status_t status = iree_vkCreateDescriptorPool(
IREE_VULKAN_DEVICE(&queue->syms), queue->logical_device,
&pool_create_info, /*pAllocator=*/NULL, &block->pool);
VkDescriptorSetLayout* set_layouts = NULL;
if (iree_status_is_ok(status)) {
status = iree_allocator_malloc_array(
queue->host_allocator, IREE_HAL_VULKAN_QUEUE_DESCRIPTOR_BLOCK_CAPACITY,
sizeof(*set_layouts), (void**)&set_layouts);
}
if (iree_status_is_ok(status)) {
for (uint32_t i = 0; i < IREE_HAL_VULKAN_QUEUE_DESCRIPTOR_BLOCK_CAPACITY;
++i) {
set_layouts[i] = queue->builtins->storage_buffer_descriptor_set_layout;
}
VkDescriptorSetAllocateInfo allocate_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.descriptorPool = block->pool,
.descriptorSetCount = IREE_HAL_VULKAN_QUEUE_DESCRIPTOR_BLOCK_CAPACITY,
.pSetLayouts = set_layouts,
};
status = iree_vkAllocateDescriptorSets(IREE_VULKAN_DEVICE(&queue->syms),
queue->logical_device,
&allocate_info, block->sets);
}
iree_allocator_free(queue->host_allocator, set_layouts);
if (iree_status_is_ok(status)) {
*out_block = block;
} else {
if (block->pool) {
iree_vkDestroyDescriptorPool(IREE_VULKAN_DEVICE(&queue->syms),
queue->logical_device, block->pool,
/*pAllocator=*/NULL);
}
iree_allocator_free(queue->host_allocator, block);
}
return status;
}
static void iree_hal_vulkan_queue_descriptor_block_destroy(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_descriptor_block_t* block) {
if (!block) return;
if (block->pool) {
iree_vkDestroyDescriptorPool(IREE_VULKAN_DEVICE(&queue->syms),
queue->logical_device, block->pool,
/*pAllocator=*/NULL);
}
iree_allocator_free(queue->host_allocator, block);
}
static void iree_hal_vulkan_queue_descriptor_cache_append_block(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_descriptor_block_t* block) {
block->next = NULL;
if (queue->descriptor_cache.tail) {
queue->descriptor_cache.tail->next = block;
} else {
queue->descriptor_cache.head = block;
}
queue->descriptor_cache.tail = block;
if (!queue->descriptor_cache.cursor) {
queue->descriptor_cache.cursor = block;
}
queue->descriptor_cache.block_count = queue->descriptor_cache.block_count + 1;
}
static iree_status_t iree_hal_vulkan_queue_descriptor_cache_initialize(
iree_hal_vulkan_queue_t* queue) {
iree_hal_vulkan_queue_descriptor_block_t* block = NULL;
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_queue_descriptor_block_create(queue, &block));
iree_hal_vulkan_queue_descriptor_cache_append_block(queue, block);
return iree_ok_status();
}
static void iree_hal_vulkan_queue_descriptor_cache_deinitialize(
iree_hal_vulkan_queue_t* queue) {
iree_hal_vulkan_queue_descriptor_block_t* block =
queue->descriptor_cache.head;
while (block) {
iree_hal_vulkan_queue_descriptor_block_t* next = block->next;
iree_hal_vulkan_queue_descriptor_block_destroy(queue, block);
block = next;
}
memset(&queue->descriptor_cache, 0, sizeof(queue->descriptor_cache));
}
static bool iree_hal_vulkan_queue_native_descriptor_requirements_are_zero(
const iree_hal_vulkan_command_buffer_descriptor_requirements_t*
requirements) {
return requirements->set_count == 0 && requirements->sampler_count == 0 &&
requirements->uniform_buffer_count == 0 &&
requirements->storage_buffer_count == 0;
}
static bool iree_hal_vulkan_queue_native_descriptor_requirements_fit(
const iree_hal_vulkan_command_buffer_descriptor_requirements_t* capacity,
const iree_hal_vulkan_command_buffer_descriptor_requirements_t* allocated,
const iree_hal_vulkan_command_buffer_descriptor_requirements_t*
requirements) {
return allocated->set_count <= capacity->set_count &&
allocated->sampler_count <= capacity->sampler_count &&
allocated->uniform_buffer_count <= capacity->uniform_buffer_count &&
allocated->storage_buffer_count <= capacity->storage_buffer_count &&
requirements->set_count <=
capacity->set_count - allocated->set_count &&
requirements->sampler_count <=
capacity->sampler_count - allocated->sampler_count &&
requirements->uniform_buffer_count <=
capacity->uniform_buffer_count - allocated->uniform_buffer_count &&
requirements->storage_buffer_count <=
capacity->storage_buffer_count - allocated->storage_buffer_count;
}
static void iree_hal_vulkan_queue_native_descriptor_requirements_add(
iree_hal_vulkan_command_buffer_descriptor_requirements_t* inout_value,
const iree_hal_vulkan_command_buffer_descriptor_requirements_t* value) {
inout_value->set_count = inout_value->set_count + value->set_count;
inout_value->sampler_count =
inout_value->sampler_count + value->sampler_count;
inout_value->uniform_buffer_count =
inout_value->uniform_buffer_count + value->uniform_buffer_count;
inout_value->storage_buffer_count =
inout_value->storage_buffer_count + value->storage_buffer_count;
}
static iree_status_t
iree_hal_vulkan_queue_calculate_dispatch_descriptor_requirements(
const iree_hal_vulkan_pipeline_t* pipeline,
iree_hal_vulkan_command_buffer_descriptor_requirements_t*
out_requirements) {
memset(out_requirements, 0, sizeof(*out_requirements));
if (pipeline->push_descriptors.enabled) return iree_ok_status();
out_requirements->set_count = pipeline->descriptor_requirements.set_count;
out_requirements->sampler_count =
pipeline->descriptor_requirements.sampler_count;
out_requirements->uniform_buffer_count =
pipeline->descriptor_requirements.uniform_buffer_count;
out_requirements->storage_buffer_count =
pipeline->descriptor_requirements.storage_buffer_count;
return iree_ok_status();
}
static void iree_hal_vulkan_queue_calculate_native_descriptor_block_capacity(
const iree_hal_vulkan_command_buffer_descriptor_requirements_t*
requirements,
iree_hal_vulkan_command_buffer_descriptor_requirements_t* out_capacity) {
memset(out_capacity, 0, sizeof(*out_capacity));
if (requirements->set_count == 0) return;
uint32_t submission_capacity =
IREE_HAL_VULKAN_QUEUE_NATIVE_DESCRIPTOR_BLOCK_SET_CAPACITY /
requirements->set_count;
if (submission_capacity == 0) submission_capacity = 1;
if (requirements->uniform_buffer_count != 0) {
submission_capacity = iree_min(
submission_capacity, UINT32_MAX / requirements->uniform_buffer_count);
}
if (requirements->sampler_count != 0) {
submission_capacity =
iree_min(submission_capacity, UINT32_MAX / requirements->sampler_count);
}
if (requirements->storage_buffer_count != 0) {
submission_capacity = iree_min(
submission_capacity, UINT32_MAX / requirements->storage_buffer_count);
}
out_capacity->set_count = requirements->set_count * submission_capacity;
out_capacity->sampler_count =
requirements->sampler_count * submission_capacity;
out_capacity->uniform_buffer_count =
requirements->uniform_buffer_count * submission_capacity;
out_capacity->storage_buffer_count =
requirements->storage_buffer_count * submission_capacity;
}
static iree_status_t iree_hal_vulkan_queue_native_descriptor_block_create(
iree_hal_vulkan_queue_t* queue,
const iree_hal_vulkan_command_buffer_descriptor_requirements_t*
requirements,
iree_hal_vulkan_queue_native_descriptor_block_t** out_block) {
*out_block = NULL;
iree_hal_vulkan_command_buffer_descriptor_requirements_t capacity;
iree_hal_vulkan_queue_calculate_native_descriptor_block_capacity(requirements,
&capacity);
if (capacity.set_count == 0) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan native descriptor block requires descriptor sets");
}
iree_hal_vulkan_queue_native_descriptor_block_t* block = NULL;
IREE_RETURN_IF_ERROR(iree_allocator_malloc(queue->host_allocator,
sizeof(*block), (void**)&block));
memset(block, 0, sizeof(*block));
block->capacity = capacity;
VkDescriptorPoolSize pool_sizes[3];
uint32_t pool_size_count = 0;
if (capacity.sampler_count != 0) {
pool_sizes[pool_size_count++] = (VkDescriptorPoolSize){
.type = VK_DESCRIPTOR_TYPE_SAMPLER,
.descriptorCount = capacity.sampler_count,
};
}
if (capacity.uniform_buffer_count != 0) {
pool_sizes[pool_size_count++] = (VkDescriptorPoolSize){
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.descriptorCount = capacity.uniform_buffer_count,
};
}
if (capacity.storage_buffer_count != 0) {
pool_sizes[pool_size_count++] = (VkDescriptorPoolSize){
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = capacity.storage_buffer_count,
};
}
VkDescriptorPoolCreateInfo create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.maxSets = capacity.set_count,
.poolSizeCount = pool_size_count,
.pPoolSizes = pool_sizes,
};
iree_status_t status = iree_vkCreateDescriptorPool(
IREE_VULKAN_DEVICE(&queue->syms), queue->logical_device, &create_info,
/*pAllocator=*/NULL, &block->pool);
if (iree_status_is_ok(status)) {
*out_block = block;
} else {
iree_allocator_free(queue->host_allocator, block);
}
return status;
}
static void iree_hal_vulkan_queue_native_descriptor_block_destroy(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_native_descriptor_block_t* block) {
if (!block) return;
if (block->pool) {
iree_vkDestroyDescriptorPool(IREE_VULKAN_DEVICE(&queue->syms),
queue->logical_device, block->pool,
/*pAllocator=*/NULL);
}
iree_allocator_free(queue->host_allocator, block);
}
static void iree_hal_vulkan_queue_native_descriptor_cache_append_block(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_native_descriptor_block_t* block) {
block->next = NULL;
if (queue->native_descriptor_cache.tail) {
queue->native_descriptor_cache.tail->next = block;
} else {
queue->native_descriptor_cache.head = block;
}
queue->native_descriptor_cache.tail = block;
if (!queue->native_descriptor_cache.cursor) {
queue->native_descriptor_cache.cursor = block;
}
queue->native_descriptor_cache.block_count =
queue->native_descriptor_cache.block_count + 1;
}
static void iree_hal_vulkan_queue_native_descriptor_cache_deinitialize(
iree_hal_vulkan_queue_t* queue) {
iree_hal_vulkan_queue_native_descriptor_block_t* block =
queue->native_descriptor_cache.head;
while (block) {
iree_hal_vulkan_queue_native_descriptor_block_t* next = block->next;
iree_hal_vulkan_queue_native_descriptor_block_destroy(queue, block);
block = next;
}
memset(&queue->native_descriptor_cache, 0,
sizeof(queue->native_descriptor_cache));
}
static iree_status_t iree_hal_vulkan_queue_native_descriptor_block_reset(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_native_descriptor_block_t* block) {
IREE_ASSERT(block->active_lease_count == 0,
"cannot reset a native descriptor block with active leases");
IREE_RETURN_IF_ERROR(iree_vkResetDescriptorPool(
IREE_VULKAN_DEVICE(&queue->syms), queue->logical_device, block->pool,
/*flags=*/0));
memset(&block->allocated, 0, sizeof(block->allocated));
return iree_ok_status();
}
static iree_status_t
iree_hal_vulkan_queue_acquire_native_descriptor_pool_from_block(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission,
iree_hal_vulkan_queue_native_descriptor_block_t* block,
const iree_hal_vulkan_command_buffer_descriptor_requirements_t*
requirements,
VkDescriptorPool* out_descriptor_pool) {
iree_hal_vulkan_queue_native_descriptor_requirements_add(&block->allocated,
requirements);
block->active_lease_count = block->active_lease_count + 1;
submission->native_descriptor_lease.block = block;
queue->native_descriptor_cache.cursor = block;
*out_descriptor_pool = block->pool;
return iree_ok_status();
}
static iree_status_t
iree_hal_vulkan_queue_try_acquire_native_descriptor_pool_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission,
const iree_hal_vulkan_command_buffer_descriptor_requirements_t*
requirements,
VkDescriptorPool* out_descriptor_pool, bool* out_acquired) {
*out_acquired = false;
*out_descriptor_pool = VK_NULL_HANDLE;
const iree_hal_vulkan_command_buffer_descriptor_requirements_t zero = {0};
iree_hal_vulkan_queue_native_descriptor_block_t* first_block =
queue->native_descriptor_cache.cursor
? queue->native_descriptor_cache.cursor
: queue->native_descriptor_cache.head;
iree_hal_vulkan_queue_native_descriptor_block_t* block = first_block;
while (block) {
if (iree_hal_vulkan_queue_native_descriptor_requirements_fit(
&block->capacity, &zero, requirements)) {
if (!iree_hal_vulkan_queue_native_descriptor_requirements_fit(
&block->capacity, &block->allocated, requirements) &&
block->active_lease_count == 0) {
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_queue_native_descriptor_block_reset(queue, block));
}
if (iree_hal_vulkan_queue_native_descriptor_requirements_fit(
&block->capacity, &block->allocated, requirements)) {
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_queue_acquire_native_descriptor_pool_from_block(
queue, submission, block, requirements, out_descriptor_pool));
*out_acquired = true;
return iree_ok_status();
}
}
block = block->next;
if (!block && first_block != queue->native_descriptor_cache.head) {
block = queue->native_descriptor_cache.head;
first_block = queue->native_descriptor_cache.head;
}
}
return iree_ok_status();
}
static iree_status_t
iree_hal_vulkan_queue_acquire_native_descriptor_pool_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission,
const iree_hal_vulkan_command_buffer_descriptor_requirements_t*
requirements,
VkDescriptorPool* out_descriptor_pool) {
*out_descriptor_pool = VK_NULL_HANDLE;
if (iree_hal_vulkan_queue_native_descriptor_requirements_are_zero(
requirements)) {
return iree_ok_status();
}
if (submission->native_descriptor_lease.block) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan queue submission already owns a native descriptor pool");
}
for (;;) {
bool acquired = false;
iree_status_t status =
iree_hal_vulkan_queue_try_acquire_native_descriptor_pool_under_lock(
queue, submission, requirements, out_descriptor_pool, &acquired);
if (!iree_status_is_ok(status) || acquired) return status;
iree_hal_vulkan_queue_native_descriptor_block_t* block = NULL;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_queue_native_descriptor_block_create(
queue, requirements, &block));
iree_hal_vulkan_queue_native_descriptor_cache_append_block(queue, block);
}
}
static void iree_hal_vulkan_queue_release_native_descriptor_pool(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
iree_hal_vulkan_queue_native_descriptor_lease_t* lease =
&submission->native_descriptor_lease;
if (!lease->block) return;
iree_slim_mutex_lock(&queue->submission_mutex);
IREE_ASSERT(lease->block->active_lease_count > 0,
"native descriptor block active lease count underflow");
lease->block->active_lease_count = lease->block->active_lease_count - 1;
queue->native_descriptor_cache.cursor = lease->block;
lease->block = NULL;
iree_slim_mutex_unlock(&queue->submission_mutex);
}
static iree_status_t iree_hal_vulkan_queue_bda_publication_block_create(
iree_hal_vulkan_queue_t* queue, iree_device_size_t minimum_capacity,
iree_hal_vulkan_queue_bda_publication_block_t** out_block) {
*out_block = NULL;
if (!queue->device_allocator) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan BDA publication requires initialized queue staging resources");
}
iree_device_size_t capacity = iree_max(
minimum_capacity,
(iree_device_size_t)IREE_HAL_VULKAN_QUEUE_BDA_PUBLICATION_BLOCK_SIZE);
if (!iree_device_size_checked_align(
capacity, IREE_HAL_VULKAN_QUEUE_BDA_PUBLICATION_ALIGNMENT,
&capacity)) {
return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"Vulkan BDA publication block size overflows");
}
iree_hal_vulkan_queue_bda_publication_block_t* block = NULL;
IREE_RETURN_IF_ERROR(iree_allocator_malloc(queue->host_allocator,
sizeof(*block), (void**)&block));
memset(block, 0, sizeof(*block));
block->capacity = capacity;
iree_hal_buffer_params_t params = {
.type = IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE |
IREE_HAL_MEMORY_TYPE_HOST_VISIBLE,
.access = IREE_HAL_MEMORY_ACCESS_DISCARD_WRITE,
.usage = IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE_READ |
IREE_HAL_BUFFER_USAGE_MAPPING_PERSISTENT |
IREE_HAL_BUFFER_USAGE_MAPPING_ACCESS_SEQUENTIAL_WRITE,
.queue_affinity = queue->queue_affinity,
};
iree_status_t status = iree_hal_vulkan_allocator_allocate_direct_buffer(
queue->device_allocator, &params, capacity, &block->buffer);
if (iree_status_is_ok(status)) {
status = iree_hal_buffer_map_range(
block->buffer, IREE_HAL_MAPPING_MODE_PERSISTENT,
IREE_HAL_MEMORY_ACCESS_WRITE,
/*byte_offset=*/0, capacity, &block->mapping);
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_buffer_device_address(block->buffer,
&block->device_address);
}
if (iree_status_is_ok(status) && block->device_address == 0) {
status =
iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
"Vulkan BDA publication buffer has no device address");
}
if (iree_status_is_ok(status) &&
capacity > UINT64_MAX - block->device_address) {
status = iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"Vulkan BDA publication buffer device address range overflows");
}
if (iree_status_is_ok(status)) {
*out_block = block;
} else {
iree_hal_buffer_unmap_range(&block->mapping);
iree_hal_buffer_release(block->buffer);
iree_allocator_free(queue->host_allocator, block);
}
return status;
}
static void iree_hal_vulkan_queue_bda_publication_block_destroy(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_bda_publication_block_t* block) {
if (!block) return;
iree_hal_buffer_unmap_range(&block->mapping);
iree_hal_buffer_release(block->buffer);
iree_allocator_free(queue->host_allocator, block);
}
static void iree_hal_vulkan_queue_bda_publication_cache_append_block(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_bda_publication_block_t* block) {
block->next = NULL;
if (queue->bda_publication_cache.tail) {
queue->bda_publication_cache.tail->next = block;
} else {
queue->bda_publication_cache.head = block;
}
queue->bda_publication_cache.tail = block;
if (!queue->bda_publication_cache.cursor) {
queue->bda_publication_cache.cursor = block;
}
queue->bda_publication_cache.block_count =
queue->bda_publication_cache.block_count + 1;
}
static void iree_hal_vulkan_queue_bda_publication_cache_deinitialize(
iree_hal_vulkan_queue_t* queue) {
iree_hal_vulkan_queue_bda_publication_block_t* block =
queue->bda_publication_cache.head;
while (block) {
iree_hal_vulkan_queue_bda_publication_block_t* next = block->next;
iree_hal_vulkan_queue_bda_publication_block_destroy(queue, block);
block = next;
}
memset(&queue->bda_publication_cache, 0,
sizeof(queue->bda_publication_cache));
}
static void iree_hal_vulkan_queue_bda_publication_cache_unlink_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_bda_publication_block_t* previous,
iree_hal_vulkan_queue_bda_publication_block_t* block) {
if (previous) {
previous->next = block->next;
} else {
queue->bda_publication_cache.head = block->next;
}
if (queue->bda_publication_cache.tail == block) {
queue->bda_publication_cache.tail = previous;
}
if (queue->bda_publication_cache.cursor == block) {
queue->bda_publication_cache.cursor =
block->next ? block->next : queue->bda_publication_cache.head;
}
IREE_ASSERT(queue->bda_publication_cache.block_count > 0,
"BDA publication block count underflow");
queue->bda_publication_cache.block_count =
queue->bda_publication_cache.block_count - 1;
block->next = NULL;
}
static void iree_hal_vulkan_queue_bda_publication_cache_trim(
iree_hal_vulkan_queue_t* queue) {
iree_hal_vulkan_queue_bda_publication_block_t* destroy_head = NULL;
iree_hal_vulkan_queue_bda_publication_block_t* destroy_tail = NULL;
iree_slim_mutex_lock(&queue->submission_mutex);
bool retained_default_idle_block = false;
iree_hal_vulkan_queue_bda_publication_block_t* previous = NULL;
iree_hal_vulkan_queue_bda_publication_block_t* block =
queue->bda_publication_cache.head;
while (block) {
iree_hal_vulkan_queue_bda_publication_block_t* next = block->next;
const bool has_active_leases = block->active_lease_count != 0;
const bool should_retain_idle =
!has_active_leases && !retained_default_idle_block &&
block->capacity <= (iree_device_size_t)
IREE_HAL_VULKAN_QUEUE_BDA_PUBLICATION_BLOCK_SIZE;
if (has_active_leases || should_retain_idle) {
if (should_retain_idle) {
block->allocated_length = 0;
retained_default_idle_block = true;
}
previous = block;
} else {
iree_hal_vulkan_queue_bda_publication_cache_unlink_under_lock(
queue, previous, block);
if (destroy_tail) {
destroy_tail->next = block;
} else {
destroy_head = block;
}
destroy_tail = block;
}
block = next;
}
if (!queue->bda_publication_cache.cursor) {
queue->bda_publication_cache.cursor = queue->bda_publication_cache.head;
}
iree_slim_mutex_unlock(&queue->submission_mutex);
while (destroy_head) {
iree_hal_vulkan_queue_bda_publication_block_t* next = destroy_head->next;
destroy_head->next = NULL;
iree_hal_vulkan_queue_bda_publication_block_destroy(queue, destroy_head);
destroy_head = next;
}
}
static bool iree_hal_vulkan_queue_bda_publication_block_try_allocate(
iree_hal_vulkan_queue_bda_publication_block_t* block,
iree_device_size_t length, iree_device_size_t* out_offset) {
iree_device_size_t aligned_offset = block->allocated_length;
if (!iree_device_size_checked_align(
aligned_offset, IREE_HAL_VULKAN_QUEUE_BDA_PUBLICATION_ALIGNMENT,
&aligned_offset)) {
return false;
}
if (aligned_offset > block->capacity ||
length > block->capacity - aligned_offset) {
return false;
}
*out_offset = aligned_offset;
block->allocated_length = aligned_offset + length;
block->active_lease_count = block->active_lease_count + 1;
return true;
}
static iree_status_t
iree_hal_vulkan_queue_try_acquire_bda_publication_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission,
iree_device_size_t length, bool* out_acquired) {
*out_acquired = false;
iree_hal_vulkan_queue_bda_publication_block_t* first_block =
queue->bda_publication_cache.cursor ? queue->bda_publication_cache.cursor
: queue->bda_publication_cache.head;
iree_hal_vulkan_queue_bda_publication_block_t* block = first_block;
while (block) {
if (length <= block->capacity) {
if (length > block->capacity - block->allocated_length &&
block->active_lease_count == 0) {
block->allocated_length = 0;
}
iree_device_size_t offset = 0;
if (iree_hal_vulkan_queue_bda_publication_block_try_allocate(
block, length, &offset)) {
submission->bda_publication_lease =
(iree_hal_vulkan_queue_bda_publication_lease_t){
.block = block,
.offset = offset,
.length = length,
};
queue->bda_publication_cache.cursor = block;
*out_acquired = true;
return iree_ok_status();
}
}
block = block->next;
if (!block && first_block != queue->bda_publication_cache.head) {
block = queue->bda_publication_cache.head;
first_block = queue->bda_publication_cache.head;
}
}
return iree_ok_status();
}
static iree_status_t iree_hal_vulkan_queue_acquire_bda_publication_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission,
iree_device_size_t length, VkDeviceAddress* out_device_address) {
*out_device_address = 0;
if (length == 0) return iree_ok_status();
if (length > IREE_HOST_SIZE_MAX) {
return iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"Vulkan BDA publication length exceeds host addressable size");
}
if (submission->bda_publication_lease.block) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan queue submission already owns BDA publication storage");
}
for (;;) {
bool acquired = false;
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_queue_try_acquire_bda_publication_under_lock(
queue, submission, length, &acquired));
if (acquired) break;
iree_hal_vulkan_queue_bda_publication_block_t* block = NULL;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_queue_bda_publication_block_create(
queue, length, &block));
iree_hal_vulkan_queue_bda_publication_cache_append_block(queue, block);
}
iree_hal_vulkan_queue_bda_publication_lease_t* lease =
&submission->bda_publication_lease;
*out_device_address = lease->block->device_address + lease->offset;
return iree_ok_status();
}
static iree_status_t
iree_hal_vulkan_queue_acquire_bda_publication_lease_under_lock(
iree_hal_vulkan_queue_t* queue, iree_device_size_t length,
iree_hal_vulkan_queue_bda_publication_lease_t* out_lease,
VkDeviceAddress* out_device_address) {
*out_lease = (iree_hal_vulkan_queue_bda_publication_lease_t){0};
*out_device_address = 0;
iree_hal_vulkan_queue_pending_submission_t synthetic_submission = {0};
IREE_RETURN_IF_ERROR(iree_hal_vulkan_queue_acquire_bda_publication_under_lock(
queue, &synthetic_submission, length, out_device_address));
*out_lease = synthetic_submission.bda_publication_lease;
return iree_ok_status();
}
static void iree_hal_vulkan_queue_release_bda_publication_lease_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_bda_publication_lease_t* lease) {
if (!lease->block) return;
IREE_ASSERT(lease->block->active_lease_count > 0,
"BDA publication block active lease count underflow");
lease->block->active_lease_count = lease->block->active_lease_count - 1;
if (lease->block->active_lease_count == 0) {
lease->block->allocated_length = 0;
}
queue->bda_publication_cache.cursor = lease->block;
memset(lease, 0, sizeof(*lease));
}
static void iree_hal_vulkan_queue_release_bda_publication_lease(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_bda_publication_lease_t* lease) {
if (!lease->block) return;
iree_slim_mutex_lock(&queue->submission_mutex);
iree_hal_vulkan_queue_release_bda_publication_lease_under_lock(queue, lease);
iree_slim_mutex_unlock(&queue->submission_mutex);
}
static void iree_hal_vulkan_queue_release_bda_publication(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
iree_hal_vulkan_queue_release_bda_publication_lease(
queue, &submission->bda_publication_lease);
}
static iree_hal_vulkan_command_buffer_bda_publication_t
iree_hal_vulkan_queue_bda_publication_for_lease(
const iree_hal_vulkan_queue_bda_publication_lease_t* lease) {
if (!lease->block) {
return (iree_hal_vulkan_command_buffer_bda_publication_t){0};
}
IREE_ASSERT(lease->length <= IREE_HOST_SIZE_MAX,
"BDA publication lease exceeds host addressable size");
return (iree_hal_vulkan_command_buffer_bda_publication_t){
.host_span = iree_make_byte_span(
lease->block->mapping.contents.data + lease->offset,
(iree_host_size_t)lease->length),
.device_address = lease->block->device_address + lease->offset,
};
}
static iree_status_t iree_hal_vulkan_queue_flush_bda_publication_lease(
const iree_hal_vulkan_queue_bda_publication_lease_t* lease) {
if (!lease->block || lease->length == 0) return iree_ok_status();
return iree_hal_buffer_mapping_flush_range(&lease->block->mapping,
lease->offset, lease->length);
}
typedef void(IREE_API_PTR* iree_hal_vulkan_queue_staging_waiter_fn_t)(
void* user_data);
typedef struct iree_hal_vulkan_queue_staging_waiter_t
iree_hal_vulkan_queue_staging_waiter_t;
struct iree_hal_vulkan_queue_staging_waiter_t {
// Next waiter in the ring-local list.
iree_hal_vulkan_queue_staging_waiter_t* next;
// Callback invoked after staging capacity may be available.
iree_hal_vulkan_queue_staging_waiter_fn_t fn;
// User data passed to |fn|.
void* user_data;
// Resource retained while the waiter is linked.
iree_hal_resource_t* resource;
// Whether this waiter is currently linked on a staging ring.
bool is_queued;
};
typedef struct iree_hal_vulkan_queue_staging_slot_t {
// Slot ordinal in the queue-local staging ring.
uint32_t ordinal;
// Byte offset of this slot within the staging buffer.
iree_device_size_t buffer_offset;
// Host mapped byte span for this slot.
iree_byte_span_t host_span;
// Whether a transfer currently owns this slot.
bool in_use;
} iree_hal_vulkan_queue_staging_slot_t;
struct iree_hal_vulkan_queue_staging_ring_t {
// Mutex protecting slot ownership and waiters.
iree_slim_mutex_t mutex;
// Host allocator used for ring metadata.
iree_allocator_t host_allocator;
// Queue owning this staging ring. Borrowed.
iree_hal_vulkan_queue_t* queue;
// Backing buffer for all slots.
iree_hal_buffer_t* buffer;
// Persistent host mapping of |buffer|.
iree_hal_buffer_mapping_t mapping;
// Byte length of each staging slot.
iree_device_size_t slot_size;
// Number of slots in |slots|.
uint32_t slot_count;
// Head of transfers waiting for capacity.
iree_hal_vulkan_queue_staging_waiter_t* waiter_head;
// Tail link for transfers waiting for capacity.
iree_hal_vulkan_queue_staging_waiter_t** waiter_tail;
// Slot table.
iree_hal_vulkan_queue_staging_slot_t slots[];
};
static iree_status_t iree_hal_vulkan_queue_staging_ring_create(
iree_hal_vulkan_queue_t* queue, iree_device_size_t slot_size,
uint32_t slot_count, iree_hal_vulkan_queue_staging_ring_t** out_ring) {
*out_ring = NULL;
if (slot_count == 0 || slot_size == 0) {
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"Vulkan staging ring requires non-zero capacity");
}
if (slot_size > IREE_HOST_SIZE_MAX) {
return iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"Vulkan staging ring slot size exceeds host addressable size");
}
if (slot_size > IREE_DEVICE_SIZE_MAX / slot_count) {
return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"Vulkan staging ring allocation size overflows");
}
iree_host_size_t slots_size = 0;
if (!iree_host_size_checked_mul(slot_count,
sizeof(iree_hal_vulkan_queue_staging_slot_t),
&slots_size)) {
return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"Vulkan staging ring slot table overflows");
}
iree_host_size_t total_size = 0;
if (!iree_host_size_checked_add(sizeof(**out_ring), slots_size,
&total_size)) {
return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"Vulkan staging ring metadata overflows");
}
iree_hal_vulkan_queue_staging_ring_t* ring = NULL;
IREE_RETURN_IF_ERROR(
iree_allocator_malloc(queue->host_allocator, total_size, (void**)&ring));
memset(ring, 0, total_size);
iree_slim_mutex_initialize(&ring->mutex);
ring->host_allocator = queue->host_allocator;
ring->queue = queue;
ring->slot_size = slot_size;
ring->slot_count = slot_count;
ring->waiter_tail = &ring->waiter_head;
const iree_device_size_t allocation_size = slot_size * slot_count;
iree_hal_buffer_params_t params = {
.type = IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE |
IREE_HAL_MEMORY_TYPE_HOST_VISIBLE,
.access = IREE_HAL_MEMORY_ACCESS_READ | IREE_HAL_MEMORY_ACCESS_WRITE,
.usage = IREE_HAL_BUFFER_USAGE_TRANSFER_SOURCE |
IREE_HAL_BUFFER_USAGE_TRANSFER_TARGET |
IREE_HAL_BUFFER_USAGE_MAPPING_PERSISTENT |
IREE_HAL_BUFFER_USAGE_MAPPING_ACCESS_RANDOM |
IREE_HAL_BUFFER_USAGE_MAPPING_ACCESS_SEQUENTIAL_WRITE,
.queue_affinity = queue->queue_affinity,
};
iree_status_t status = iree_hal_allocator_allocate_buffer(
queue->device_allocator, params, allocation_size, &ring->buffer);
if (iree_status_is_ok(status)) {
status = iree_hal_buffer_map_range(
ring->buffer, IREE_HAL_MAPPING_MODE_PERSISTENT,
IREE_HAL_MEMORY_ACCESS_READ | IREE_HAL_MEMORY_ACCESS_WRITE,
/*byte_offset=*/0, allocation_size, &ring->mapping);
}
if (iree_status_is_ok(status)) {
for (uint32_t i = 0; i < slot_count; ++i) {
const iree_device_size_t slot_offset = slot_size * i;
ring->slots[i].ordinal = i;
ring->slots[i].buffer_offset = slot_offset;
ring->slots[i].host_span =
iree_make_byte_span(ring->mapping.contents.data + slot_offset,
(iree_host_size_t)slot_size);
}
*out_ring = ring;
} else {
iree_hal_buffer_unmap_range(&ring->mapping);
iree_hal_buffer_release(ring->buffer);
iree_slim_mutex_deinitialize(&ring->mutex);
iree_allocator_free(queue->host_allocator, ring);
}
return status;
}
static void iree_hal_vulkan_queue_staging_ring_destroy(
iree_hal_vulkan_queue_staging_ring_t* ring) {
if (!ring) return;
iree_hal_buffer_unmap_range(&ring->mapping);
iree_hal_buffer_release(ring->buffer);
iree_slim_mutex_deinitialize(&ring->mutex);
iree_allocator_free(ring->host_allocator, ring);
}
static bool iree_hal_vulkan_queue_staging_ring_try_acquire(
iree_hal_vulkan_queue_staging_ring_t* ring,
iree_hal_vulkan_queue_staging_slot_t** out_slot) {
*out_slot = NULL;
iree_slim_mutex_lock(&ring->mutex);
for (uint32_t i = 0; i < ring->slot_count; ++i) {
if (!ring->slots[i].in_use) {
ring->slots[i].in_use = true;
*out_slot = &ring->slots[i];
break;
}
}
iree_slim_mutex_unlock(&ring->mutex);
return *out_slot != NULL;
}
static bool iree_hal_vulkan_queue_staging_ring_queue_waiter(
iree_hal_vulkan_queue_staging_ring_t* ring,
iree_hal_vulkan_queue_staging_waiter_t* waiter,
iree_hal_vulkan_queue_staging_waiter_fn_t fn, void* user_data,
iree_hal_resource_t* resource) {
iree_slim_mutex_lock(&ring->mutex);
bool should_queue = true;
for (uint32_t i = 0; i < ring->slot_count; ++i) {
if (!ring->slots[i].in_use) {
should_queue = false;
break;
}
}
if (should_queue && !waiter->is_queued) {
waiter->next = NULL;
waiter->fn = fn;
waiter->user_data = user_data;
waiter->resource = resource;
waiter->is_queued = true;
if (waiter->resource) {
iree_hal_resource_retain(waiter->resource);
}
*ring->waiter_tail = waiter;
ring->waiter_tail = &waiter->next;
}
iree_slim_mutex_unlock(&ring->mutex);
return should_queue;
}
static void iree_hal_vulkan_queue_staging_ring_cancel_waiter(
iree_hal_vulkan_queue_staging_ring_t* ring,
iree_hal_vulkan_queue_staging_waiter_t* waiter) {
bool was_queued = false;
iree_slim_mutex_lock(&ring->mutex);
iree_hal_vulkan_queue_staging_waiter_t** link = &ring->waiter_head;
while (*link) {
if (*link == waiter) {
*link = waiter->next;
if (!waiter->next) ring->waiter_tail = link;
waiter->next = NULL;
waiter->is_queued = false;
was_queued = true;
break;
}
link = &(*link)->next;
}
iree_slim_mutex_unlock(&ring->mutex);
if (was_queued && waiter->resource) {
iree_hal_resource_release(waiter->resource);
waiter->resource = NULL;
}
}
static void iree_hal_vulkan_queue_staging_ring_release(
iree_hal_vulkan_queue_staging_ring_t* ring,
iree_hal_vulkan_queue_staging_slot_t* slot) {
iree_hal_vulkan_queue_staging_waiter_t* waiter_head = NULL;
iree_slim_mutex_lock(&ring->mutex);
slot->in_use = false;
waiter_head = ring->waiter_head;
ring->waiter_head = NULL;
ring->waiter_tail = &ring->waiter_head;
for (iree_hal_vulkan_queue_staging_waiter_t* waiter = waiter_head; waiter;
waiter = waiter->next) {
waiter->is_queued = false;
}
iree_slim_mutex_unlock(&ring->mutex);
while (waiter_head) {
iree_hal_vulkan_queue_staging_waiter_t* waiter = waiter_head;
waiter_head = waiter->next;
waiter->next = NULL;
iree_hal_resource_t* resource = waiter->resource;
waiter->resource = NULL;
waiter->fn(waiter->user_data);
if (resource) {
iree_hal_resource_release(resource);
}
}
}
static iree_status_t iree_hal_vulkan_queue_command_buffer_block_create(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_command_buffer_block_t** out_block) {
*out_block = NULL;
iree_hal_vulkan_queue_command_buffer_block_t* block = NULL;
IREE_RETURN_IF_ERROR(iree_allocator_malloc(queue->host_allocator,
sizeof(*block), (void**)&block));
memset(block, 0, sizeof(*block));
block->free_count = IREE_HAL_VULKAN_QUEUE_COMMAND_BUFFER_BLOCK_CAPACITY;
iree_status_t status = iree_hal_vulkan_queue_create_command_pool(
&queue->syms, queue->logical_device, queue->queue_family_index,
&block->pool);
if (iree_status_is_ok(status)) {
VkCommandBufferAllocateInfo allocate_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.commandPool = block->pool,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount =
IREE_HAL_VULKAN_QUEUE_COMMAND_BUFFER_BLOCK_CAPACITY,
};
status = iree_vkAllocateCommandBuffers(
IREE_VULKAN_DEVICE(&queue->syms), queue->logical_device, &allocate_info,
block->command_buffers);
}
if (iree_status_is_ok(status)) {
*out_block = block;
} else {
if (block->pool) {
iree_vkDestroyCommandPool(IREE_VULKAN_DEVICE(&queue->syms),
queue->logical_device, block->pool,
/*pAllocator=*/NULL);
}
iree_allocator_free(queue->host_allocator, block);
}
return status;
}
static void iree_hal_vulkan_queue_command_buffer_block_destroy(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_command_buffer_block_t* block) {
if (!block) return;
if (block->pool) {
iree_vkDestroyCommandPool(IREE_VULKAN_DEVICE(&queue->syms),
queue->logical_device, block->pool,
/*pAllocator=*/NULL);
}
iree_allocator_free(queue->host_allocator, block);
}
static void iree_hal_vulkan_queue_command_buffer_cache_append_block(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_command_buffer_block_t* block) {
block->next = NULL;
if (queue->command_buffer_cache.tail) {
queue->command_buffer_cache.tail->next = block;
} else {
queue->command_buffer_cache.head = block;
}
queue->command_buffer_cache.tail = block;
if (!queue->command_buffer_cache.cursor) {
queue->command_buffer_cache.cursor = block;
}
queue->command_buffer_cache.block_count =
queue->command_buffer_cache.block_count + 1;
}
static void iree_hal_vulkan_queue_command_buffer_cache_deinitialize(
iree_hal_vulkan_queue_t* queue) {
iree_hal_vulkan_queue_command_buffer_block_t* block =
queue->command_buffer_cache.head;
while (block) {
iree_hal_vulkan_queue_command_buffer_block_t* next = block->next;
iree_hal_vulkan_queue_command_buffer_block_destroy(queue, block);
block = next;
}
memset(&queue->command_buffer_cache, 0, sizeof(queue->command_buffer_cache));
}
static iree_status_t iree_hal_vulkan_queue_reset_command_buffer_cache_slot(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_command_buffer_block_t* block, uint32_t slot) {
if (!block->needs_reset[slot]) return iree_ok_status();
IREE_RETURN_IF_ERROR(iree_vkResetCommandBuffer(
IREE_VULKAN_DEVICE(&queue->syms), block->command_buffers[slot],
/*flags=*/0));
block->needs_reset[slot] = false;
return iree_ok_status();
}
static iree_status_t
iree_hal_vulkan_queue_try_acquire_command_buffer_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission,
bool* out_acquired) {
*out_acquired = false;
iree_hal_vulkan_queue_command_buffer_block_t* first_block =
queue->command_buffer_cache.cursor ? queue->command_buffer_cache.cursor
: queue->command_buffer_cache.head;
iree_hal_vulkan_queue_command_buffer_block_t* block = first_block;
while (block) {
if (block->free_count != 0) {
for (uint32_t probe_ordinal = 0;
probe_ordinal < IREE_HAL_VULKAN_QUEUE_COMMAND_BUFFER_BLOCK_CAPACITY;
++probe_ordinal) {
const uint32_t slot =
(block->next_slot + probe_ordinal) %
IREE_HAL_VULKAN_QUEUE_COMMAND_BUFFER_BLOCK_CAPACITY;
if (block->owner_epochs[slot] != 0) continue;
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_queue_reset_command_buffer_cache_slot(queue, block,
slot));
block->owner_epochs[slot] =
IREE_HAL_VULKAN_QUEUE_COMMAND_BUFFER_SLOT_RESERVED;
block->next_slot =
(slot + 1) % IREE_HAL_VULKAN_QUEUE_COMMAND_BUFFER_BLOCK_CAPACITY;
block->free_count = block->free_count - 1;
submission->native_command_buffer_lease =
(iree_hal_vulkan_queue_command_buffer_lease_t){
.block = block,
.slot = slot,
};
submission->native_command_buffer = block->command_buffers[slot];
queue->command_buffer_cache.cursor = block;
*out_acquired = true;
return iree_ok_status();
}
}
block = block->next;
if (!block && first_block != queue->command_buffer_cache.head) {
block = queue->command_buffer_cache.head;
first_block = queue->command_buffer_cache.head;
}
}
return iree_ok_status();
}
static iree_status_t iree_hal_vulkan_queue_allocate_native_command_buffer(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
if (submission->native_command_buffer) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan queue submission already owns a native command buffer");
}
for (;;) {
bool acquired = false;
iree_slim_mutex_lock(&queue->submission_mutex);
iree_status_t status =
iree_hal_vulkan_queue_try_acquire_command_buffer_under_lock(
queue, submission, &acquired);
iree_slim_mutex_unlock(&queue->submission_mutex);
if (!iree_status_is_ok(status) || acquired) return status;
const iree_host_size_t drained_count =
iree_hal_vulkan_queue_drain_completions(queue);
if (drained_count != 0) continue;
iree_hal_vulkan_queue_command_buffer_block_t* block = NULL;
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_queue_command_buffer_block_create(queue, &block));
iree_slim_mutex_lock(&queue->submission_mutex);
iree_hal_vulkan_queue_command_buffer_cache_append_block(queue, block);
iree_slim_mutex_unlock(&queue->submission_mutex);
}
}
static iree_status_t
iree_hal_vulkan_queue_allocate_native_command_buffer_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
if (submission->native_command_buffer) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan queue submission already owns a native command buffer");
}
bool acquired = false;
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_queue_try_acquire_command_buffer_under_lock(
queue, submission, &acquired));
if (acquired) return iree_ok_status();
iree_hal_vulkan_queue_command_buffer_block_t* block = NULL;
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_queue_command_buffer_block_create(queue, &block));
iree_hal_vulkan_queue_command_buffer_cache_append_block(queue, block);
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_queue_try_acquire_command_buffer_under_lock(
queue, submission, &acquired));
return acquired
? iree_ok_status()
: iree_make_status(IREE_STATUS_INTERNAL,
"Vulkan command buffer block had no free slot "
"immediately after allocation");
}
static iree_status_t
iree_hal_vulkan_queue_acquire_native_command_buffer_lease_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_command_buffer_lease_t* out_lease,
VkCommandBuffer* out_native_command_buffer) {
*out_lease = (iree_hal_vulkan_queue_command_buffer_lease_t){
.slot = IREE_HAL_VULKAN_QUEUE_COMMAND_BUFFER_SLOT_ABSENT,
};
*out_native_command_buffer = VK_NULL_HANDLE;
iree_hal_vulkan_queue_pending_submission_t synthetic_submission = {
.native_command_buffer_lease =
{
.slot = IREE_HAL_VULKAN_QUEUE_COMMAND_BUFFER_SLOT_ABSENT,
},
};
bool acquired = false;
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_queue_try_acquire_command_buffer_under_lock(
queue, &synthetic_submission, &acquired));
if (!acquired) {
iree_hal_vulkan_queue_command_buffer_block_t* block = NULL;
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_queue_command_buffer_block_create(queue, &block));
iree_hal_vulkan_queue_command_buffer_cache_append_block(queue, block);
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_queue_try_acquire_command_buffer_under_lock(
queue, &synthetic_submission, &acquired));
}
if (!acquired) {
return iree_make_status(IREE_STATUS_INTERNAL,
"Vulkan command buffer block had no free slot "
"immediately after allocation");
}
*out_lease = synthetic_submission.native_command_buffer_lease;
*out_native_command_buffer = synthetic_submission.native_command_buffer;
return iree_ok_status();
}
static void iree_hal_vulkan_queue_publish_native_command_buffer_under_lock(
iree_hal_vulkan_queue_pending_submission_t* submission) {
if (!submission->native_command_buffer) return;
iree_hal_vulkan_queue_command_buffer_lease_t lease =
submission->native_command_buffer_lease;
if (lease.slot == IREE_HAL_VULKAN_QUEUE_COMMAND_BUFFER_SLOT_ABSENT) return;
lease.block->owner_epochs[lease.slot] = submission->epoch;
}
static void
iree_hal_vulkan_queue_release_native_command_buffer_lease_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_command_buffer_lease_t* lease,
VkCommandBuffer* native_command_buffer) {
if (lease->slot == IREE_HAL_VULKAN_QUEUE_COMMAND_BUFFER_SLOT_ABSENT) return;
lease->block->owner_epochs[lease->slot] = 0;
lease->block->needs_reset[lease->slot] = true;
lease->block->free_count = lease->block->free_count + 1;
queue->command_buffer_cache.cursor = lease->block;
lease->block = NULL;
lease->slot = IREE_HAL_VULKAN_QUEUE_COMMAND_BUFFER_SLOT_ABSENT;
if (native_command_buffer) *native_command_buffer = VK_NULL_HANDLE;
}
static void iree_hal_vulkan_queue_release_native_command_buffer_lease(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_command_buffer_lease_t* lease,
VkCommandBuffer* native_command_buffer) {
if (lease->slot == IREE_HAL_VULKAN_QUEUE_COMMAND_BUFFER_SLOT_ABSENT) return;
iree_slim_mutex_lock(&queue->submission_mutex);
iree_hal_vulkan_queue_release_native_command_buffer_lease_under_lock(
queue, lease, native_command_buffer);
iree_slim_mutex_unlock(&queue->submission_mutex);
}
static void iree_hal_vulkan_queue_release_native_command_buffer(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
iree_hal_vulkan_queue_release_native_command_buffer_lease(
queue, &submission->native_command_buffer_lease,
&submission->native_command_buffer);
}
static bool iree_hal_vulkan_queue_profile_requests_queue_device_event(
const iree_hal_vulkan_queue_pending_submission_t* submission) {
return iree_hal_local_profile_recorder_is_enabled(
submission->profile.recorder,
IREE_HAL_DEVICE_PROFILING_DATA_DEVICE_QUEUE_EVENTS);
}
static bool iree_hal_vulkan_queue_profile_requests_dispatch_events(
const iree_hal_vulkan_queue_pending_submission_t* submission) {
return iree_hal_local_profile_recorder_is_enabled(
submission->profile.recorder,
IREE_HAL_DEVICE_PROFILING_DATA_DISPATCH_EVENTS);
}
static iree_hal_vulkan_command_buffer_bda_binding_cache_t
iree_hal_vulkan_queue_execute_bda_binding_cache(
iree_hal_vulkan_queue_pending_submission_t* submission) {
return (iree_hal_vulkan_command_buffer_bda_binding_cache_t){
.slots = submission->execute.bda_binding_slots,
.slot_count = submission->execute.bda_binding_slot_count,
};
}
static iree_status_t iree_hal_vulkan_queue_calculate_native_replay_layout(
iree_host_size_t bda_binding_slot_count,
iree_host_size_t* out_allocation_size,
iree_host_size_t* out_bda_binding_slots_offset) {
return IREE_STRUCT_LAYOUT(
iree_sizeof_struct(iree_hal_vulkan_queue_native_replay_t),
out_allocation_size,
IREE_STRUCT_FIELD_ALIGNED(
bda_binding_slot_count,
iree_hal_vulkan_command_buffer_bda_binding_slot_t,
iree_alignof(iree_hal_vulkan_command_buffer_bda_binding_slot_t),
out_bda_binding_slots_offset));
}
static void iree_hal_vulkan_queue_store_native_replay_bda_binding_slots(
iree_hal_vulkan_queue_native_replay_t* replay,
const iree_hal_vulkan_command_buffer_bda_binding_cache_t*
bda_binding_cache) {
if (replay->bda_binding_slot_count != 0) {
memcpy(
replay->bda_binding_slots, bda_binding_cache->slots,
replay->bda_binding_slot_count * sizeof(replay->bda_binding_slots[0]));
}
replay->bda_binding_slots_valid = true;
}
static bool iree_hal_vulkan_queue_native_replay_bda_binding_slots_match(
const iree_hal_vulkan_queue_native_replay_t* replay,
const iree_hal_vulkan_command_buffer_bda_binding_cache_t*
bda_binding_cache) {
if (!replay->bda_binding_slots_valid) return false;
if (replay->bda_binding_slot_count != bda_binding_cache->slot_count) {
return false;
}
for (iree_host_size_t i = 0; i < replay->bda_binding_slot_count; ++i) {
const iree_hal_vulkan_command_buffer_bda_binding_slot_t* expected_slot =
&replay->bda_binding_slots[i];
const iree_hal_vulkan_command_buffer_bda_binding_slot_t* actual_slot =
&bda_binding_cache->slots[i];
if (expected_slot->device_address != actual_slot->device_address ||
expected_slot->length != actual_slot->length) {
return false;
}
}
return true;
}
static iree_status_t iree_hal_vulkan_queue_resolve_native_replay_bda_slots(
iree_hal_buffer_binding_table_t binding_table,
const iree_hal_vulkan_queue_native_replay_t* replay,
iree_hal_vulkan_command_buffer_bda_binding_cache_t* bda_binding_cache) {
for (iree_host_size_t i = 0; i < replay->bda_binding_slot_count; ++i) {
if (replay->bda_binding_slots[i].device_address == 0) continue;
if (i > UINT32_MAX) {
return iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"Vulkan native replay BDA binding slot ordinal exceeds uint32_t");
}
iree_hal_vulkan_command_buffer_bda_binding_slot_t slot = {0};
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_command_buffer_resolve_bda_binding_table_slot(
binding_table, (uint32_t)i, bda_binding_cache, &slot));
}
return iree_ok_status();
}
static void iree_hal_vulkan_queue_native_replay_cache_append_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_native_replay_t* replay) {
replay->next = NULL;
if (queue->native_replay_cache.tail) {
queue->native_replay_cache.tail->next = replay;
} else {
queue->native_replay_cache.head = replay;
}
queue->native_replay_cache.tail = replay;
queue->native_replay_cache.instance_count =
queue->native_replay_cache.instance_count + 1;
queue->native_replay_cache.publication_bytes =
queue->native_replay_cache.publication_bytes +
replay->bda_publication_lease.length;
queue->native_replay_cache.peak_instance_count =
iree_max(queue->native_replay_cache.peak_instance_count,
(uint64_t)queue->native_replay_cache.instance_count);
queue->native_replay_cache.peak_publication_bytes =
iree_max(queue->native_replay_cache.peak_publication_bytes,
queue->native_replay_cache.publication_bytes);
}
static void iree_hal_vulkan_queue_native_replay_destroy(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_native_replay_t* replay) {
if (!replay) return;
iree_hal_vulkan_queue_release_native_command_buffer_lease(
queue, &replay->command_buffer_lease, &replay->native_command_buffer);
iree_hal_vulkan_queue_release_bda_publication_lease(
queue, &replay->bda_publication_lease);
iree_hal_command_buffer_release(replay->command_buffer);
iree_allocator_free(queue->host_allocator, replay);
}
static void iree_hal_vulkan_queue_native_replay_destroy_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_native_replay_t* replay) {
if (!replay) return;
iree_hal_vulkan_queue_release_native_command_buffer_lease_under_lock(
queue, &replay->command_buffer_lease, &replay->native_command_buffer);
iree_hal_vulkan_queue_release_bda_publication_lease_under_lock(
queue, &replay->bda_publication_lease);
iree_hal_command_buffer_release(replay->command_buffer);
iree_allocator_free(queue->host_allocator, replay);
}
static void iree_hal_vulkan_queue_native_replay_cache_deinitialize(
iree_hal_vulkan_queue_t* queue) {
iree_hal_vulkan_queue_native_replay_t* replay =
queue->native_replay_cache.head;
while (replay) {
iree_hal_vulkan_queue_native_replay_t* next = replay->next;
iree_hal_vulkan_queue_native_replay_destroy(queue, replay);
replay = next;
}
memset(&queue->native_replay_cache, 0, sizeof(queue->native_replay_cache));
}
static void iree_hal_vulkan_queue_publish_native_replay_under_lock(
iree_hal_vulkan_queue_pending_submission_t* submission) {
iree_hal_vulkan_queue_native_replay_t* replay = submission->native_replay;
if (!replay) return;
IREE_ASSERT(
replay->owner_epoch == IREE_HAL_VULKAN_QUEUE_NATIVE_REPLAY_OWNER_RESERVED,
"cached native replay was not reserved before publish");
replay->owner_epoch = submission->epoch;
}
static void iree_hal_vulkan_queue_release_native_replay(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
iree_hal_vulkan_queue_native_replay_t* replay = submission->native_replay;
if (!replay) return;
iree_slim_mutex_lock(&queue->submission_mutex);
IREE_ASSERT(replay->owner_epoch == submission->epoch ||
replay->owner_epoch ==
IREE_HAL_VULKAN_QUEUE_NATIVE_REPLAY_OWNER_RESERVED,
"cached native replay owner epoch mismatch");
replay->owner_epoch = 0;
submission->native_replay = NULL;
submission->native_command_buffer = VK_NULL_HANDLE;
iree_slim_mutex_unlock(&queue->submission_mutex);
}
static void iree_hal_vulkan_queue_native_replay_cache_unlink_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_native_replay_t* previous,
iree_hal_vulkan_queue_native_replay_t* replay) {
if (previous) {
previous->next = replay->next;
} else {
queue->native_replay_cache.head = replay->next;
}
if (queue->native_replay_cache.tail == replay) {
queue->native_replay_cache.tail = previous;
}
queue->native_replay_cache.instance_count =
queue->native_replay_cache.instance_count - 1;
queue->native_replay_cache.publication_bytes =
queue->native_replay_cache.publication_bytes -
replay->bda_publication_lease.length;
replay->next = NULL;
}
static bool iree_hal_vulkan_queue_can_cache_native_replay_under_lock(
iree_hal_vulkan_queue_t* queue, iree_device_size_t publication_length) {
if (queue->native_replay_cache.max_instance_count == 0) return false;
if (publication_length > queue->native_replay_cache.max_publication_bytes) {
return false;
}
return true;
}
static iree_status_t iree_hal_vulkan_queue_create_native_replay_under_lock(
iree_hal_vulkan_queue_t* queue, iree_hal_command_buffer_t* command_buffer,
iree_hal_buffer_binding_table_t binding_table,
iree_hal_vulkan_command_buffer_bda_binding_cache_t* bda_binding_cache,
iree_device_size_t publication_length,
iree_hal_vulkan_queue_native_replay_t** out_replay) {
*out_replay = NULL;
if (queue->native_replay_cache.instance_count >=
queue->native_replay_cache.max_instance_count) {
return iree_ok_status();
}
if (queue->native_replay_cache.publication_bytes >
queue->native_replay_cache.max_publication_bytes ||
publication_length > queue->native_replay_cache.max_publication_bytes -
queue->native_replay_cache.publication_bytes) {
return iree_ok_status();
}
const iree_host_size_t bda_binding_slot_count = bda_binding_cache->slot_count;
iree_host_size_t bda_binding_slots_offset = 0;
iree_host_size_t allocation_size = 0;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_queue_calculate_native_replay_layout(
bda_binding_slot_count, &allocation_size, &bda_binding_slots_offset));
iree_hal_vulkan_queue_native_replay_t* replay = NULL;
IREE_RETURN_IF_ERROR(iree_allocator_malloc(queue->host_allocator,
allocation_size, (void**)&replay));
memset(replay, 0, allocation_size);
replay->bda_binding_slot_count = bda_binding_slot_count;
if (bda_binding_slot_count != 0) {
replay->bda_binding_slots =
(iree_hal_vulkan_command_buffer_bda_binding_slot_t*)((uint8_t*)replay +
bda_binding_slots_offset);
}
replay->command_buffer_lease.slot =
IREE_HAL_VULKAN_QUEUE_COMMAND_BUFFER_SLOT_ABSENT;
replay->owner_epoch = IREE_HAL_VULKAN_QUEUE_NATIVE_REPLAY_OWNER_RESERVED;
iree_status_t status =
iree_hal_vulkan_queue_acquire_native_command_buffer_lease_under_lock(
queue, &replay->command_buffer_lease, &replay->native_command_buffer);
VkDeviceAddress publication_device_address = 0;
if (iree_status_is_ok(status) && publication_length != 0) {
status = iree_hal_vulkan_queue_acquire_bda_publication_lease_under_lock(
queue, publication_length, &replay->bda_publication_lease,
&publication_device_address);
}
const iree_hal_vulkan_command_buffer_bda_publication_t publication =
iree_hal_vulkan_queue_bda_publication_for_lease(
&replay->bda_publication_lease);
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_command_buffer_record_native(
command_buffer, &queue->syms, queue->logical_device, queue->builtins,
replay->native_command_buffer, /*usage_flags=*/0, VK_NULL_HANDLE,
binding_table, publication_length != 0 ? &publication : NULL,
bda_binding_cache, /*profile_marker=*/NULL, queue->host_allocator);
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_flush_bda_publication_lease(
&replay->bda_publication_lease);
}
if (iree_status_is_ok(status)) {
iree_hal_vulkan_queue_store_native_replay_bda_binding_slots(
replay, bda_binding_cache);
iree_hal_command_buffer_retain(command_buffer);
replay->command_buffer = command_buffer;
iree_hal_vulkan_queue_native_replay_cache_append_under_lock(queue, replay);
queue->native_replay_cache.create_count =
queue->native_replay_cache.create_count + 1;
*out_replay = replay;
} else {
iree_hal_vulkan_queue_native_replay_destroy_under_lock(queue, replay);
}
return status;
}
static iree_status_t iree_hal_vulkan_queue_acquire_native_replay_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission,
iree_hal_vulkan_command_buffer_descriptor_requirements_t
descriptor_requirements,
bool has_descriptor_dispatches, iree_device_size_t publication_length,
bool* out_acquired) {
*out_acquired = false;
if (descriptor_requirements.set_count != 0 || has_descriptor_dispatches) {
queue->native_replay_cache.descriptor_bypass_count =
queue->native_replay_cache.descriptor_bypass_count + 1;
return iree_ok_status();
}
if (!iree_hal_vulkan_queue_can_cache_native_replay_under_lock(
queue, publication_length)) {
queue->native_replay_cache.capacity_bypass_count =
queue->native_replay_cache.capacity_bypass_count + 1;
return iree_ok_status();
}
if (iree_any_bit_set(
iree_hal_command_buffer_mode(submission->execute.command_buffer),
IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT)) {
queue->native_replay_cache.one_shot_bypass_count =
queue->native_replay_cache.one_shot_bypass_count + 1;
return iree_ok_status();
}
if (iree_hal_vulkan_queue_profile_requests_queue_device_event(submission) ||
iree_hal_vulkan_queue_profile_requests_dispatch_events(submission)) {
queue->native_replay_cache.profile_bypass_count =
queue->native_replay_cache.profile_bypass_count + 1;
return iree_ok_status();
}
iree_hal_vulkan_command_buffer_bda_binding_cache_t bda_binding_cache =
iree_hal_vulkan_queue_execute_bda_binding_cache(submission);
iree_hal_buffer_binding_table_t binding_table = {
.count = submission->execute.binding_table_count,
.bindings = submission->execute.binding_table_bindings,
};
bool has_busy_replay = false;
bool replay_publication_current = false;
iree_hal_vulkan_queue_native_replay_t* replay =
queue->native_replay_cache.head;
while (replay) {
if (replay->command_buffer == submission->execute.command_buffer) {
if (replay->owner_epoch == 0) {
queue->native_replay_cache.hit_count =
queue->native_replay_cache.hit_count + 1;
replay->owner_epoch =
IREE_HAL_VULKAN_QUEUE_NATIVE_REPLAY_OWNER_RESERVED;
break;
}
has_busy_replay = true;
}
replay = replay->next;
}
if (!replay) {
queue->native_replay_cache.miss_count =
queue->native_replay_cache.miss_count + 1;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_queue_create_native_replay_under_lock(
queue, submission->execute.command_buffer, binding_table,
&bda_binding_cache, publication_length, &replay));
if (!replay) return iree_ok_status();
replay_publication_current = true;
if (has_busy_replay) {
queue->native_replay_cache.fork_count =
queue->native_replay_cache.fork_count + 1;
}
}
const iree_hal_vulkan_command_buffer_bda_publication_t publication =
iree_hal_vulkan_queue_bda_publication_for_lease(
&replay->bda_publication_lease);
iree_status_t status = iree_ok_status();
if (iree_status_is_ok(status) && !replay_publication_current) {
status = iree_hal_vulkan_queue_resolve_native_replay_bda_slots(
binding_table, replay, &bda_binding_cache);
}
if (iree_status_is_ok(status) && !replay_publication_current) {
replay_publication_current =
iree_hal_vulkan_queue_native_replay_bda_binding_slots_match(
replay, &bda_binding_cache);
if (replay_publication_current && publication_length != 0) {
queue->native_replay_cache.publication_skip_count =
queue->native_replay_cache.publication_skip_count + 1;
}
}
if (iree_status_is_ok(status) && !replay_publication_current) {
replay->bda_binding_slots_valid = false;
status = iree_hal_vulkan_command_buffer_publish_bda_binding_tables(
submission->execute.command_buffer, binding_table,
publication_length != 0 ? &publication : NULL, &bda_binding_cache);
}
if (iree_status_is_ok(status) && !replay_publication_current) {
status = iree_hal_vulkan_queue_flush_bda_publication_lease(
&replay->bda_publication_lease);
}
if (iree_status_is_ok(status) && !replay_publication_current) {
iree_hal_vulkan_queue_store_native_replay_bda_binding_slots(
replay, &bda_binding_cache);
if (publication_length != 0) {
queue->native_replay_cache.publication_update_count =
queue->native_replay_cache.publication_update_count + 1;
}
}
if (iree_status_is_ok(status)) {
submission->native_replay = replay;
submission->native_command_buffer = replay->native_command_buffer;
*out_acquired = true;
} else {
replay->owner_epoch = 0;
}
return status;
}
static iree_status_t iree_hal_vulkan_queue_create_timestamp_query_pool(
iree_hal_vulkan_queue_t* queue, uint32_t query_count,
VkQueryPool* out_query_pool) {
*out_query_pool = VK_NULL_HANDLE;
VkQueryPoolCreateInfo create_info = {
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
.queryType = VK_QUERY_TYPE_TIMESTAMP,
.queryCount = query_count,
};
return iree_vkCreateQueryPool(IREE_VULKAN_DEVICE(&queue->syms),
queue->logical_device, &create_info,
/*pAllocator=*/NULL, out_query_pool);
}
static iree_status_t iree_hal_vulkan_queue_timestamp_query_block_create(
iree_hal_vulkan_queue_t* queue, uint32_t minimum_query_count,
iree_hal_vulkan_queue_timestamp_query_block_t** out_block) {
*out_block = NULL;
const uint32_t capacity =
iree_max(minimum_query_count,
(uint32_t)IREE_HAL_VULKAN_QUEUE_TIMESTAMP_QUERY_BLOCK_CAPACITY);
iree_hal_vulkan_queue_timestamp_query_block_t* block = NULL;
IREE_RETURN_IF_ERROR(iree_allocator_malloc(queue->host_allocator,
sizeof(*block), (void**)&block));
memset(block, 0, sizeof(*block));
block->capacity = capacity;
iree_status_t status = iree_hal_vulkan_queue_create_timestamp_query_pool(
queue, capacity, &block->pool);
if (iree_status_is_ok(status)) {
status = iree_allocator_malloc_array(queue->host_allocator, capacity,
sizeof(block->query_values[0]),
(void**)&block->query_values);
}
if (iree_status_is_ok(status)) {
*out_block = block;
} else {
if (block->pool) {
iree_vkDestroyQueryPool(IREE_VULKAN_DEVICE(&queue->syms),
queue->logical_device, block->pool,
/*pAllocator=*/NULL);
}
iree_allocator_free(queue->host_allocator, block->query_values);
iree_allocator_free(queue->host_allocator, block);
}
return status;
}
static void iree_hal_vulkan_queue_timestamp_query_block_destroy(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_timestamp_query_block_t* block) {
if (!block) return;
if (block->pool) {
iree_vkDestroyQueryPool(IREE_VULKAN_DEVICE(&queue->syms),
queue->logical_device, block->pool,
/*pAllocator=*/NULL);
}
iree_allocator_free(queue->host_allocator, block->query_values);
iree_allocator_free(queue->host_allocator, block);
}
static void iree_hal_vulkan_queue_timestamp_query_cache_append_block(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_timestamp_query_block_t* block) {
block->next = NULL;
if (queue->timestamp_query_cache.tail) {
queue->timestamp_query_cache.tail->next = block;
} else {
queue->timestamp_query_cache.head = block;
}
queue->timestamp_query_cache.tail = block;
if (!queue->timestamp_query_cache.cursor) {
queue->timestamp_query_cache.cursor = block;
}
queue->timestamp_query_cache.block_count =
queue->timestamp_query_cache.block_count + 1;
}
static void iree_hal_vulkan_queue_timestamp_query_cache_deinitialize(
iree_hal_vulkan_queue_t* queue) {
iree_hal_vulkan_queue_timestamp_query_block_t* block =
queue->timestamp_query_cache.head;
while (block) {
iree_hal_vulkan_queue_timestamp_query_block_t* next = block->next;
iree_hal_vulkan_queue_timestamp_query_block_destroy(queue, block);
block = next;
}
memset(&queue->timestamp_query_cache, 0,
sizeof(queue->timestamp_query_cache));
}
static void iree_hal_vulkan_queue_timestamp_query_cache_unlink_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_timestamp_query_block_t* previous,
iree_hal_vulkan_queue_timestamp_query_block_t* block) {
if (previous) {
previous->next = block->next;
} else {
queue->timestamp_query_cache.head = block->next;
}
if (queue->timestamp_query_cache.tail == block) {
queue->timestamp_query_cache.tail = previous;
}
if (queue->timestamp_query_cache.cursor == block) {
queue->timestamp_query_cache.cursor =
block->next ? block->next : queue->timestamp_query_cache.head;
}
IREE_ASSERT(queue->timestamp_query_cache.block_count > 0,
"timestamp query block count underflow");
queue->timestamp_query_cache.block_count =
queue->timestamp_query_cache.block_count - 1;
block->next = NULL;
}
static void iree_hal_vulkan_queue_timestamp_query_cache_trim(
iree_hal_vulkan_queue_t* queue) {
iree_hal_vulkan_queue_timestamp_query_block_t* destroy_head = NULL;
iree_hal_vulkan_queue_timestamp_query_block_t* destroy_tail = NULL;
iree_slim_mutex_lock(&queue->submission_mutex);
bool retained_default_idle_block = false;
iree_hal_vulkan_queue_timestamp_query_block_t* previous = NULL;
iree_hal_vulkan_queue_timestamp_query_block_t* block =
queue->timestamp_query_cache.head;
while (block) {
iree_hal_vulkan_queue_timestamp_query_block_t* next = block->next;
const bool has_active_leases = block->active_lease_count != 0;
const bool should_retain_idle =
!has_active_leases && !retained_default_idle_block &&
block->capacity <=
(uint32_t)IREE_HAL_VULKAN_QUEUE_TIMESTAMP_QUERY_BLOCK_CAPACITY;
if (has_active_leases || should_retain_idle) {
if (should_retain_idle) {
block->allocated_count = 0;
retained_default_idle_block = true;
}
previous = block;
} else {
iree_hal_vulkan_queue_timestamp_query_cache_unlink_under_lock(
queue, previous, block);
if (destroy_tail) {
destroy_tail->next = block;
} else {
destroy_head = block;
}
destroy_tail = block;
}
block = next;
}
if (!queue->timestamp_query_cache.cursor) {
queue->timestamp_query_cache.cursor = queue->timestamp_query_cache.head;
}
iree_slim_mutex_unlock(&queue->submission_mutex);
while (destroy_head) {
iree_hal_vulkan_queue_timestamp_query_block_t* next = destroy_head->next;
destroy_head->next = NULL;
iree_hal_vulkan_queue_timestamp_query_block_destroy(queue, destroy_head);
destroy_head = next;
}
}
static bool iree_hal_vulkan_queue_timestamp_query_block_try_allocate(
iree_hal_vulkan_queue_timestamp_query_block_t* block, uint32_t query_count,
uint32_t* out_first_query) {
*out_first_query = 0;
if (query_count > block->capacity) return false;
if (query_count > block->capacity - block->allocated_count) {
if (block->active_lease_count != 0) return false;
block->allocated_count = 0;
}
if (query_count > block->capacity - block->allocated_count) return false;
*out_first_query = block->allocated_count;
block->allocated_count = block->allocated_count + query_count;
return true;
}
static bool iree_hal_vulkan_queue_try_acquire_timestamp_queries_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission,
uint32_t query_count) {
uint32_t first_query = 0;
iree_hal_vulkan_queue_timestamp_query_block_t* first_block =
queue->timestamp_query_cache.cursor ? queue->timestamp_query_cache.cursor
: queue->timestamp_query_cache.head;
iree_hal_vulkan_queue_timestamp_query_block_t* block = first_block;
while (block) {
if (iree_hal_vulkan_queue_timestamp_query_block_try_allocate(
block, query_count, &first_query)) {
block->active_lease_count = block->active_lease_count + 1;
submission->profile.timestamp_query_lease =
(iree_hal_vulkan_queue_timestamp_query_lease_t){
.block = block,
.first_query = first_query,
.query_count = query_count,
};
submission->profile.query_pool = block->pool;
submission->profile.first_query = first_query;
submission->profile.query_count = query_count;
submission->profile.query_values = block->query_values;
queue->timestamp_query_cache.cursor = block;
return true;
}
block = block->next;
if (!block && first_block != queue->timestamp_query_cache.head) {
block = queue->timestamp_query_cache.head;
first_block = queue->timestamp_query_cache.head;
}
}
return false;
}
static iree_status_t iree_hal_vulkan_queue_acquire_timestamp_queries_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission,
uint32_t query_count) {
if (query_count == 0) return iree_ok_status();
if (submission->profile.timestamp_query_lease.block) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan queue submission already owns timestamp query slots");
}
if (iree_hal_vulkan_queue_try_acquire_timestamp_queries_under_lock(
queue, submission, query_count)) {
return iree_ok_status();
}
iree_hal_vulkan_queue_timestamp_query_block_t* block = NULL;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_queue_timestamp_query_block_create(
queue, query_count, &block));
iree_hal_vulkan_queue_timestamp_query_cache_append_block(queue, block);
return iree_hal_vulkan_queue_try_acquire_timestamp_queries_under_lock(
queue, submission, query_count)
? iree_ok_status()
: iree_make_status(
IREE_STATUS_INTERNAL,
"Vulkan timestamp query block had no free slots "
"immediately after allocation");
}
static void iree_hal_vulkan_queue_release_timestamp_queries(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
iree_hal_vulkan_queue_timestamp_query_lease_t* lease =
&submission->profile.timestamp_query_lease;
if (!lease->block) return;
iree_slim_mutex_lock(&queue->submission_mutex);
IREE_ASSERT(lease->block->active_lease_count > 0,
"timestamp query block active lease count underflow");
lease->block->active_lease_count = lease->block->active_lease_count - 1;
queue->timestamp_query_cache.cursor = lease->block;
lease->block = NULL;
lease->first_query = 0;
lease->query_count = 0;
submission->profile.query_pool = VK_NULL_HANDLE;
submission->profile.first_query = 0;
submission->profile.query_count = 0;
submission->profile.query_values = NULL;
iree_slim_mutex_unlock(&queue->submission_mutex);
}
static bool iree_hal_vulkan_queue_try_acquire_descriptor_cache_sets_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission,
uint32_t descriptor_set_count, VkDescriptorSet* out_descriptor_sets) {
iree_hal_vulkan_queue_descriptor_lease_t
leases[IREE_HAL_VULKAN_QUEUE_BUILTIN_DESCRIPTOR_SET_COUNT_MAX];
uint32_t lease_count = 0;
iree_hal_vulkan_queue_descriptor_block_t* first_block =
queue->descriptor_cache.cursor ? queue->descriptor_cache.cursor
: queue->descriptor_cache.head;
iree_hal_vulkan_queue_descriptor_block_t* block = first_block;
while (block && lease_count < descriptor_set_count) {
if (block->free_count != 0) {
for (uint32_t probe_ordinal = 0;
probe_ordinal < IREE_HAL_VULKAN_QUEUE_DESCRIPTOR_BLOCK_CAPACITY &&
lease_count < descriptor_set_count;
++probe_ordinal) {
const uint32_t slot = (block->next_slot + probe_ordinal) %
IREE_HAL_VULKAN_QUEUE_DESCRIPTOR_BLOCK_CAPACITY;
if (block->owner_epochs[slot] == 0) {
leases[lease_count++] = (iree_hal_vulkan_queue_descriptor_lease_t){
.block = block,
.slot = slot,
};
}
}
}
block = block->next;
if (!block && first_block != queue->descriptor_cache.head) {
block = queue->descriptor_cache.head;
first_block = queue->descriptor_cache.head;
}
}
if (lease_count < descriptor_set_count) return false;
for (uint32_t i = 0; i < descriptor_set_count; ++i) {
iree_hal_vulkan_queue_descriptor_lease_t lease = leases[i];
lease.block->owner_epochs[lease.slot] =
IREE_HAL_VULKAN_QUEUE_DESCRIPTOR_SLOT_RESERVED;
lease.block->next_slot =
(lease.slot + 1) % IREE_HAL_VULKAN_QUEUE_DESCRIPTOR_BLOCK_CAPACITY;
lease.block->free_count = lease.block->free_count - 1;
submission
->native_descriptor_leases[submission->native_descriptor_slot_count++] =
lease;
out_descriptor_sets[i] = lease.block->sets[lease.slot];
queue->descriptor_cache.cursor = lease.block;
}
return true;
}
static iree_status_t iree_hal_vulkan_queue_acquire_descriptor_cache_sets(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission,
uint32_t descriptor_set_count, VkDescriptorSet* out_descriptor_sets) {
if (descriptor_set_count == 0) return iree_ok_status();
if (descriptor_set_count >
IREE_HAL_VULKAN_QUEUE_BUILTIN_DESCRIPTOR_SET_COUNT_MAX) {
return iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"Vulkan queue built-in descriptor set count %u exceeds limit %u",
descriptor_set_count,
IREE_HAL_VULKAN_QUEUE_BUILTIN_DESCRIPTOR_SET_COUNT_MAX);
}
if (submission->native_descriptor_slot_count != 0) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan queue submission already owns built-in descriptor sets");
}
for (;;) {
iree_slim_mutex_lock(&queue->submission_mutex);
bool acquired =
iree_hal_vulkan_queue_try_acquire_descriptor_cache_sets_under_lock(
queue, submission, descriptor_set_count, out_descriptor_sets);
iree_slim_mutex_unlock(&queue->submission_mutex);
if (acquired) return iree_ok_status();
const iree_host_size_t drained_count =
iree_hal_vulkan_queue_drain_completions(queue);
if (drained_count != 0) continue;
iree_hal_vulkan_queue_descriptor_block_t* block = NULL;
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_queue_descriptor_block_create(queue, &block));
iree_slim_mutex_lock(&queue->submission_mutex);
iree_hal_vulkan_queue_descriptor_cache_append_block(queue, block);
iree_slim_mutex_unlock(&queue->submission_mutex);
}
}
static void iree_hal_vulkan_queue_publish_descriptor_cache_sets_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
(void)queue;
for (uint32_t i = 0; i < submission->native_descriptor_slot_count; ++i) {
iree_hal_vulkan_queue_descriptor_lease_t lease =
submission->native_descriptor_leases[i];
lease.block->owner_epochs[lease.slot] = submission->epoch;
}
}
static void iree_hal_vulkan_queue_release_descriptor_cache_sets(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
if (submission->native_descriptor_slot_count == 0) return;
iree_slim_mutex_lock(&queue->submission_mutex);
for (uint32_t i = 0; i < submission->native_descriptor_slot_count; ++i) {
iree_hal_vulkan_queue_descriptor_lease_t* lease =
&submission->native_descriptor_leases[i];
if (lease->slot != IREE_HAL_VULKAN_QUEUE_DESCRIPTOR_SLOT_ABSENT) {
lease->block->owner_epochs[lease->slot] = 0;
lease->block->free_count = lease->block->free_count + 1;
queue->descriptor_cache.cursor = lease->block;
lease->block = NULL;
lease->slot = IREE_HAL_VULKAN_QUEUE_DESCRIPTOR_SLOT_ABSENT;
}
}
submission->native_descriptor_slot_count = 0;
iree_slim_mutex_unlock(&queue->submission_mutex);
}
static bool iree_hal_vulkan_queue_has_pending(iree_hal_vulkan_queue_t* queue) {
bool has_pending = false;
iree_slim_mutex_lock(&queue->submission_mutex);
has_pending = queue->pending_head != NULL || queue->deferred_head != NULL ||
queue->ready_head != NULL;
iree_slim_mutex_unlock(&queue->submission_mutex);
return has_pending;
}
static void iree_hal_vulkan_queue_append_deferred_submission(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
submission->next = queue->deferred_head;
queue->deferred_head = submission;
}
static bool iree_hal_vulkan_queue_unlink_deferred_submission(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
iree_hal_vulkan_queue_pending_submission_t** link = &queue->deferred_head;
while (*link) {
if (*link == submission) {
*link = submission->next;
submission->next = NULL;
return true;
}
link = &(*link)->next;
}
return false;
}
static void iree_hal_vulkan_queue_append_ready_submission(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
submission->next = NULL;
if (queue->ready_tail) {
queue->ready_tail->next = submission;
} else {
queue->ready_head = submission;
}
queue->ready_tail = submission;
}
static bool iree_hal_vulkan_queue_unlink_ready_submission(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
iree_hal_vulkan_queue_pending_submission_t* previous = NULL;
iree_hal_vulkan_queue_pending_submission_t** link = &queue->ready_head;
while (*link) {
if (*link == submission) {
*link = submission->next;
if (queue->ready_tail == submission) queue->ready_tail = previous;
submission->next = NULL;
return true;
}
previous = *link;
link = &(*link)->next;
}
return false;
}
static iree_hal_vulkan_queue_pending_submission_t*
iree_hal_vulkan_queue_pop_ready_submission(iree_hal_vulkan_queue_t* queue) {
iree_hal_vulkan_queue_pending_submission_t* submission = queue->ready_head;
if (!submission) return NULL;
queue->ready_head = submission->next;
if (!queue->ready_head) queue->ready_tail = NULL;
submission->next = NULL;
return submission;
}
static iree_status_t iree_hal_vulkan_queue_store_error(
iree_hal_vulkan_queue_t* queue, iree_status_t status) {
IREE_ASSERT(!iree_status_is_ok(status),
"queue failure status must be non-OK");
intptr_t expected = 0;
if (iree_atomic_compare_exchange_strong(
&queue->failure_status, &expected, (intptr_t)status,
iree_memory_order_release, iree_memory_order_acquire)) {
return iree_status_clone(status);
}
iree_status_free(status);
return iree_status_clone((iree_status_t)expected);
}
static iree_status_t iree_hal_vulkan_queue_check_error(
iree_hal_vulkan_queue_t* queue) {
iree_status_t status = (iree_status_t)iree_atomic_load(
&queue->failure_status, iree_memory_order_acquire);
return iree_status_is_ok(status) ? iree_ok_status()
: iree_status_clone(status);
}
static iree_status_t iree_hal_vulkan_queue_signal_wakeup(
iree_hal_vulkan_queue_t* queue) {
const uint64_t new_value =
(uint64_t)iree_atomic_fetch_add(&queue->wakeup_value, 1,
iree_memory_order_acq_rel) +
1;
VkSemaphoreSignalInfo signal_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO,
.semaphore = queue->wakeup_semaphore,
.value = new_value,
};
return iree_vkSignalSemaphore(IREE_VULKAN_DEVICE(&queue->syms),
queue->logical_device, &signal_info);
}
static iree_status_t iree_hal_vulkan_queue_validate_host_call(
iree_hal_host_call_t call, const uint64_t args[4],
iree_hal_host_call_flags_t flags) {
const iree_hal_host_call_flags_t known_flags =
IREE_HAL_HOST_CALL_FLAG_NON_BLOCKING |
IREE_HAL_HOST_CALL_FLAG_WAIT_ACTIVE | IREE_HAL_HOST_CALL_FLAG_RELAXED;
if (!call.fn) {
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"host_call callback must be non-null");
}
if (!args) {
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"host_call args must be non-null");
}
if (iree_any_bit_set(flags, ~known_flags)) {
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"unsupported host_call flags: 0x%" PRIx64, flags);
}
return iree_ok_status();
}
static iree_status_t iree_hal_vulkan_queue_validate_semaphore_list(
iree_hal_vulkan_queue_t* queue, iree_hal_semaphore_list_t semaphore_list,
iree_string_view_t usage) {
if (semaphore_list.count != 0 &&
(!semaphore_list.semaphores || !semaphore_list.payload_values)) {
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue %.*s semaphore list storage is NULL for %" PRIhsz
" entries",
(int)usage.size, usage.data, semaphore_list.count);
}
for (iree_host_size_t i = 0; i < semaphore_list.count; ++i) {
iree_hal_semaphore_t* semaphore = semaphore_list.semaphores[i];
if (!iree_hal_vulkan_semaphore_is_local(semaphore, queue->device)) {
return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
"Vulkan queue %.*s semaphore %" PRIhsz
" is not a local Vulkan semaphore",
(int)usage.size, usage.data, i);
}
if (semaphore_list.payload_values[i] > IREE_HAL_SEMAPHORE_MAX_VALUE) {
return iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"Vulkan queue %.*s semaphore value %" PRIu64
" exceeds the maximum HAL semaphore value %" PRIu64,
(int)usage.size, usage.data, semaphore_list.payload_values[i],
(uint64_t)IREE_HAL_SEMAPHORE_MAX_VALUE);
}
}
return iree_ok_status();
}
static iree_status_t iree_hal_vulkan_queue_calculate_submission_layout(
iree_host_size_t wait_count, iree_host_size_t signal_count,
iree_host_size_t payload_storage_length, iree_host_size_t* out_total_size,
iree_host_size_t* out_wait_semaphores_offset,
iree_host_size_t* out_wait_payload_values_offset,
iree_host_size_t* out_signal_semaphores_offset,
iree_host_size_t* out_signal_payload_values_offset,
iree_host_size_t* out_payload_storage_offset) {
return IREE_STRUCT_LAYOUT(
iree_sizeof_struct(iree_hal_vulkan_queue_pending_submission_t),
out_total_size,
IREE_STRUCT_FIELD_ALIGNED(wait_count, iree_hal_semaphore_t*,
iree_alignof(iree_hal_semaphore_t*),
out_wait_semaphores_offset),
IREE_STRUCT_FIELD_ALIGNED(wait_count, uint64_t, iree_alignof(uint64_t),
out_wait_payload_values_offset),
IREE_STRUCT_FIELD_ALIGNED(signal_count, iree_hal_semaphore_t*,
iree_alignof(iree_hal_semaphore_t*),
out_signal_semaphores_offset),
IREE_STRUCT_FIELD_ALIGNED(signal_count, uint64_t, iree_alignof(uint64_t),
out_signal_payload_values_offset),
IREE_STRUCT_FIELD_ALIGNED(payload_storage_length, uint8_t,
iree_max_align_t, out_payload_storage_offset));
}
static uint32_t iree_hal_vulkan_queue_profile_count(iree_host_size_t value) {
return value > UINT32_MAX ? UINT32_MAX : (uint32_t)value;
}
static iree_hal_profile_queue_event_type_t iree_hal_vulkan_queue_profile_type(
iree_hal_vulkan_queue_submission_kind_t kind) {
switch (kind) {
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_BARRIER:
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_SPARSE_BIND:
return IREE_HAL_PROFILE_QUEUE_EVENT_TYPE_BARRIER;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_HOST_CALL:
return IREE_HAL_PROFILE_QUEUE_EVENT_TYPE_HOST_CALL;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_FILL:
return IREE_HAL_PROFILE_QUEUE_EVENT_TYPE_FILL;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_UPDATE:
return IREE_HAL_PROFILE_QUEUE_EVENT_TYPE_UPDATE;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_COPY:
return IREE_HAL_PROFILE_QUEUE_EVENT_TYPE_COPY;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_EXECUTE:
return IREE_HAL_PROFILE_QUEUE_EVENT_TYPE_EXECUTE;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_DISPATCH:
return IREE_HAL_PROFILE_QUEUE_EVENT_TYPE_DISPATCH;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_ALLOCA:
return IREE_HAL_PROFILE_QUEUE_EVENT_TYPE_ALLOCA;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_DEALLOCA:
return IREE_HAL_PROFILE_QUEUE_EVENT_TYPE_DEALLOCA;
}
return IREE_HAL_PROFILE_QUEUE_EVENT_TYPE_NONE;
}
static uint32_t iree_hal_vulkan_queue_profile_operation_count(
const iree_hal_vulkan_queue_pending_submission_t* submission) {
switch (submission->kind) {
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_BARRIER:
return 0;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_SPARSE_BIND:
return submission->sparse_bind.bind_count;
default:
return 1;
}
}
static uint64_t iree_hal_vulkan_queue_profile_payload_length(
const iree_hal_vulkan_queue_pending_submission_t* submission) {
switch (submission->kind) {
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_FILL:
return submission->fill.length;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_UPDATE:
return submission->update.length;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_COPY:
return submission->copy.length;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_ALLOCA:
return submission->alloca.allocation_size;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_SPARSE_BIND: {
uint64_t total_length = 0;
for (uint32_t i = 0; i < submission->sparse_bind.bind_count; ++i) {
const VkDeviceSize bind_size = submission->sparse_bind.binds[i].size;
if (total_length > UINT64_MAX - bind_size) return UINT64_MAX;
total_length += bind_size;
}
return total_length;
}
default:
return 0;
}
}
static void iree_hal_vulkan_queue_profile_submission_initialize(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
iree_hal_local_profile_recorder_t* recorder = queue->profile_recorder;
if (!iree_hal_local_profile_recorder_is_enabled(
recorder, IREE_HAL_DEVICE_PROFILING_DATA_QUEUE_EVENTS |
IREE_HAL_DEVICE_PROFILING_DATA_DISPATCH_EVENTS |
IREE_HAL_DEVICE_PROFILING_DATA_DEVICE_QUEUE_EVENTS |
IREE_HAL_DEVICE_PROFILING_DATA_MEMORY_EVENTS)) {
return;
}
submission->profile.recorder = recorder;
submission->profile.scope = queue->profile_scope;
submission->profile.type =
iree_hal_vulkan_queue_profile_type(submission->kind);
submission->profile.submit_host_time_ns = iree_time_now();
if (queue->profile_submission_counter) {
submission->profile.submission_id = (uint64_t)iree_atomic_fetch_add(
queue->profile_submission_counter, 1, iree_memory_order_relaxed);
}
}
static void iree_hal_vulkan_queue_profile_set_dependency_resolution(
iree_hal_vulkan_queue_pending_submission_t* submission,
const iree_hal_vulkan_queue_wait_resolution_t* resolution) {
if (!submission->profile.recorder) return;
if (submission->profile.dependency_strategy ==
IREE_HAL_PROFILE_QUEUE_DEPENDENCY_STRATEGY_SOFTWARE_DEFER) {
return;
}
if (submission->wait_semaphore_list.count == 0) {
submission->profile.dependency_strategy =
IREE_HAL_PROFILE_QUEUE_DEPENDENCY_STRATEGY_NONE;
} else if (resolution->needs_deferral) {
submission->profile.dependency_strategy =
IREE_HAL_PROFILE_QUEUE_DEPENDENCY_STRATEGY_SOFTWARE_DEFER;
submission->profile.flags |=
IREE_HAL_PROFILE_QUEUE_EVENT_FLAG_SOFTWARE_DEFERRED;
} else if (resolution->wait_info_count != 0) {
submission->profile.dependency_strategy =
IREE_HAL_PROFILE_QUEUE_DEPENDENCY_STRATEGY_DEVICE_BARRIER;
} else {
submission->profile.dependency_strategy =
IREE_HAL_PROFILE_QUEUE_DEPENDENCY_STRATEGY_INLINE;
}
}
static void iree_hal_vulkan_queue_profile_force_software_defer(
iree_hal_vulkan_queue_pending_submission_t* submission) {
if (!submission->profile.recorder) return;
submission->profile.dependency_strategy =
IREE_HAL_PROFILE_QUEUE_DEPENDENCY_STRATEGY_SOFTWARE_DEFER;
submission->profile.flags |=
IREE_HAL_PROFILE_QUEUE_EVENT_FLAG_SOFTWARE_DEFERRED;
}
static uint64_t iree_hal_vulkan_queue_profile_allocation_id(
const iree_hal_vulkan_queue_pending_submission_t* submission) {
switch (submission->kind) {
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_ALLOCA:
return iree_hal_local_transient_buffer_profile_id(
submission->alloca.buffer);
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_DEALLOCA:
return iree_hal_local_transient_buffer_profile_id(
submission->dealloca.buffer);
default:
return 0;
}
}
static uint64_t iree_hal_vulkan_queue_profile_command_buffer_id(
const iree_hal_vulkan_queue_pending_submission_t* submission) {
if (submission->kind != IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_EXECUTE ||
!submission->execute.command_buffer ||
submission->profile.type == IREE_HAL_PROFILE_QUEUE_EVENT_TYPE_DISPATCH) {
return 0;
}
return submission->execute.command_buffer->profile_id;
}
static void iree_hal_vulkan_queue_profile_populate_submission_metrics(
iree_hal_vulkan_queue_pending_submission_t* submission) {
if (!submission->profile.recorder) return;
if (submission->profile.operation_count == 0) {
submission->profile.operation_count =
iree_hal_vulkan_queue_profile_operation_count(submission);
}
if (submission->profile.payload_length == 0) {
submission->profile.payload_length =
iree_hal_vulkan_queue_profile_payload_length(submission);
}
}
static void iree_hal_vulkan_queue_profile_record_submission(
iree_hal_vulkan_queue_pending_submission_t* submission) {
if (!submission->profile.recorder ||
submission->profile.queue_event_recorded) {
return;
}
if (!iree_hal_local_profile_recorder_is_enabled(
submission->profile.recorder,
IREE_HAL_DEVICE_PROFILING_DATA_QUEUE_EVENTS)) {
return;
}
submission->profile.queue_event_recorded = true;
submission->profile.ready_host_time_ns = iree_time_now();
iree_hal_vulkan_queue_profile_populate_submission_metrics(submission);
iree_hal_local_profile_queue_event_info_t event_info =
iree_hal_local_profile_queue_event_info_default();
event_info.type = submission->profile.type;
event_info.flags = submission->profile.flags;
event_info.dependency_strategy = submission->profile.dependency_strategy;
event_info.scope = submission->profile.scope;
event_info.host_time_ns = submission->profile.submit_host_time_ns;
event_info.ready_host_time_ns = submission->profile.ready_host_time_ns;
event_info.submission_id = submission->profile.submission_id;
event_info.command_buffer_id =
iree_hal_vulkan_queue_profile_command_buffer_id(submission);
event_info.allocation_id =
iree_hal_vulkan_queue_profile_allocation_id(submission);
event_info.wait_count = iree_hal_vulkan_queue_profile_count(
submission->wait_semaphore_list.count);
event_info.signal_count = iree_hal_vulkan_queue_profile_count(
submission->signal_semaphore_list.count);
event_info.operation_count = submission->profile.operation_count;
event_info.payload_length = submission->profile.payload_length;
iree_hal_local_profile_recorder_append_queue_event(
submission->profile.recorder, &event_info, /*out_event_id=*/NULL);
}
static bool iree_hal_vulkan_queue_profile_submission_requires_native_timestamp(
const iree_hal_vulkan_queue_pending_submission_t* submission) {
switch (submission->kind) {
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_FILL:
return submission->fill.length != 0;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_UPDATE:
return submission->update.length != 0;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_COPY:
return submission->copy.length != 0;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_EXECUTE:
return true;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_DISPATCH:
return true;
default:
return false;
}
}
static bool iree_hal_vulkan_queue_profile_filter_matches_dispatch(
iree_hal_local_profile_recorder_t* profile_recorder,
iree_hal_local_profile_queue_scope_t scope, uint64_t command_buffer_id,
uint32_t command_index, const iree_hal_vulkan_pipeline_t* pipeline) {
const iree_hal_device_profiling_options_t* options =
iree_hal_local_profile_recorder_options(profile_recorder);
if (!options) return false;
const iree_hal_profile_capture_filter_t* filter = &options->capture_filter;
if (!iree_hal_profile_capture_filter_matches_location(
filter, command_buffer_id, command_index,
scope.physical_device_ordinal, scope.queue_ordinal)) {
return false;
}
if (iree_any_bit_set(
filter->flags,
IREE_HAL_PROFILE_CAPTURE_FILTER_FLAG_EXECUTABLE_EXPORT_PATTERN) &&
!iree_string_view_match_pattern(pipeline->name,
filter->executable_export_pattern)) {
return false;
}
return true;
}
static iree_status_t iree_hal_vulkan_queue_profile_dispatch_count(
const iree_hal_vulkan_queue_pending_submission_t* submission,
uint32_t* out_dispatch_count) {
*out_dispatch_count = 0;
if (!iree_hal_vulkan_queue_profile_requests_dispatch_events(submission)) {
return iree_ok_status();
}
if (submission->kind != IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_EXECUTE ||
!submission->execute.command_buffer) {
if (submission->kind == IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_DISPATCH) {
const iree_hal_vulkan_pipeline_t* pipeline = NULL;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_executable_lookup_pipeline(
submission->dispatch.executable, submission->dispatch.export_ordinal,
&pipeline));
if (iree_hal_vulkan_queue_profile_filter_matches_dispatch(
submission->profile.recorder, submission->profile.scope,
/*command_buffer_id=*/0, /*command_index=*/UINT32_MAX,
pipeline)) {
*out_dispatch_count = 1;
}
}
return iree_ok_status();
}
return iree_hal_vulkan_command_buffer_count_profiled_dispatches(
submission->execute.command_buffer, submission->profile.recorder,
submission->profile.scope,
iree_hal_vulkan_queue_profile_command_buffer_id(submission),
out_dispatch_count);
}
static iree_status_t iree_hal_vulkan_queue_append_dispatch_profile_event(
iree_hal_vulkan_queue_pending_submission_t* submission) {
if (submission->profile.dispatch_query_count != 1) {
return iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"Vulkan direct dispatch profile event requires exactly one timestamp "
"range");
}
const iree_hal_vulkan_pipeline_t* pipeline = NULL;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_executable_lookup_pipeline(
submission->dispatch.executable, submission->dispatch.export_ordinal,
&pipeline));
if (!iree_hal_vulkan_queue_profile_filter_matches_dispatch(
submission->profile.recorder, submission->profile.scope,
/*command_buffer_id=*/0, /*command_index=*/UINT32_MAX, pipeline)) {
return iree_ok_status();
}
if (pipeline->workgroup_size[0] == 0) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan dispatch profiling requires static SPIR-V LocalSize metadata");
}
const uint64_t* ticks =
&submission->profile
.query_values[submission->profile.dispatch_base_query];
if (ticks[1] < ticks[0]) {
return iree_make_status(
IREE_STATUS_DATA_LOSS,
"Vulkan dispatch profiling timestamp range is not monotonic");
}
iree_hal_local_profile_dispatch_event_info_t event_info =
iree_hal_local_profile_dispatch_event_info_default();
event_info.scope = submission->profile.scope;
event_info.submission_id = submission->profile.submission_id;
event_info.command_buffer_id = 0;
event_info.executable_id =
iree_hal_vulkan_executable_profile_id(submission->dispatch.executable);
event_info.command_index = UINT32_MAX;
event_info.export_ordinal = submission->dispatch.export_ordinal;
if (iree_hal_dispatch_uses_indirect_parameters(submission->dispatch.flags)) {
event_info.flags |=
IREE_HAL_PROFILE_DISPATCH_EVENT_FLAG_INDIRECT_PARAMETERS;
} else {
memcpy(event_info.workgroup_count,
submission->dispatch.config.workgroup_count,
sizeof(event_info.workgroup_count));
}
memcpy(event_info.workgroup_size, pipeline->workgroup_size,
sizeof(event_info.workgroup_size));
event_info.start_tick = ticks[0];
event_info.end_tick = ticks[1];
return iree_hal_local_profile_recorder_append_dispatch_event(
submission->profile.recorder, &event_info, /*out_event_id=*/NULL);
}
static iree_status_t iree_hal_vulkan_queue_record_dispatch_profile_metadata(
iree_hal_vulkan_queue_pending_submission_t* submission) {
iree_hal_local_profile_recorder_t* profile_recorder =
submission->queue->profile_recorder;
if (!iree_hal_local_profile_recorder_is_enabled(
profile_recorder,
IREE_HAL_DEVICE_PROFILING_DATA_EXECUTABLE_METADATA)) {
return iree_ok_status();
}
return iree_hal_local_profile_recorder_record_executable_with_id(
profile_recorder, submission->dispatch.executable,
iree_hal_vulkan_executable_profile_id(submission->dispatch.executable));
}
static iree_status_t
iree_hal_vulkan_queue_profile_prepare_native_timestamps_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
if (!submission->native_command_buffer) {
if (!iree_hal_vulkan_queue_profile_submission_requires_native_timestamp(
submission)) {
return iree_ok_status();
}
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan device timestamp profiling requires native command recording "
"for queue event type %u",
(uint32_t)submission->profile.type);
}
if (submission->profile.query_pool) return iree_ok_status();
const bool needs_queue_device_timestamps =
iree_hal_vulkan_queue_profile_requests_queue_device_event(submission) &&
iree_hal_vulkan_queue_profile_submission_requires_native_timestamp(
submission);
uint32_t dispatch_query_count = 0;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_queue_profile_dispatch_count(
submission, &dispatch_query_count));
if (!needs_queue_device_timestamps && dispatch_query_count == 0) {
return iree_ok_status();
}
uint32_t query_count = 0;
uint32_t queue_start_query = IREE_HAL_VULKAN_PROFILE_QUERY_ABSENT;
uint32_t queue_end_query = IREE_HAL_VULKAN_PROFILE_QUERY_ABSENT;
uint32_t dispatch_base_query = IREE_HAL_VULKAN_PROFILE_QUERY_ABSENT;
if (needs_queue_device_timestamps) {
queue_start_query = query_count++;
queue_end_query = query_count++;
}
if (dispatch_query_count != 0) {
if (dispatch_query_count > (UINT32_MAX - query_count) / 2) {
return iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"Vulkan dispatch profile query count exceeds uint32_t");
}
dispatch_base_query = query_count;
query_count += dispatch_query_count * 2;
}
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_queue_acquire_timestamp_queries_under_lock(
queue, submission, query_count));
const uint32_t first_query = submission->profile.first_query;
if (queue_start_query != IREE_HAL_VULKAN_PROFILE_QUERY_ABSENT) {
submission->profile.queue_start_query = first_query + queue_start_query;
submission->profile.queue_end_query = first_query + queue_end_query;
}
if (dispatch_base_query != IREE_HAL_VULKAN_PROFILE_QUERY_ABSENT) {
submission->profile.dispatch_base_query = first_query + dispatch_base_query;
submission->profile.dispatch_query_count = dispatch_query_count;
}
return iree_ok_status();
}
static iree_status_t iree_hal_vulkan_queue_profile_prepare_native_timestamps(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
iree_slim_mutex_lock(&queue->submission_mutex);
iree_status_t status =
iree_hal_vulkan_queue_profile_prepare_native_timestamps_under_lock(
queue, submission);
iree_slim_mutex_unlock(&queue->submission_mutex);
return status;
}
static void iree_hal_vulkan_queue_profile_reset_native_timestamps(
iree_hal_vulkan_queue_t* queue,
const iree_hal_vulkan_queue_pending_submission_t* submission) {
if (!submission->profile.query_pool || submission->profile.query_count == 0) {
return;
}
iree_vkCmdResetQueryPool(
IREE_VULKAN_DEVICE(&queue->syms), submission->native_command_buffer,
submission->profile.query_pool, submission->profile.first_query,
submission->profile.query_count);
}
static void iree_hal_vulkan_queue_profile_write_timestamp_begin(
iree_hal_vulkan_queue_t* queue,
const iree_hal_vulkan_queue_pending_submission_t* submission) {
if (!submission->profile.query_pool ||
submission->profile.queue_start_query ==
IREE_HAL_VULKAN_PROFILE_QUERY_ABSENT) {
return;
}
iree_vkCmdWriteTimestamp2(
IREE_VULKAN_DEVICE(&queue->syms), submission->native_command_buffer,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, submission->profile.query_pool,
submission->profile.queue_start_query);
}
static void iree_hal_vulkan_queue_profile_write_timestamp_end(
iree_hal_vulkan_queue_t* queue,
const iree_hal_vulkan_queue_pending_submission_t* submission) {
if (!submission->profile.query_pool ||
submission->profile.queue_end_query ==
IREE_HAL_VULKAN_PROFILE_QUERY_ABSENT) {
return;
}
iree_vkCmdWriteTimestamp2(
IREE_VULKAN_DEVICE(&queue->syms), submission->native_command_buffer,
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT, submission->profile.query_pool,
submission->profile.queue_end_query);
}
static void iree_hal_vulkan_queue_profile_record_device_tick_range(
iree_hal_vulkan_queue_t* queue, uint64_t start_tick, uint64_t end_tick) {
iree_hal_vulkan_profile_clock_alignment_t* clock_alignment =
queue->profile_clock_alignment;
if (!clock_alignment) return;
iree_slim_mutex_lock(&clock_alignment->mutex);
if (clock_alignment->has_event_ticks) {
clock_alignment->minimum_event_tick =
iree_min(clock_alignment->minimum_event_tick, start_tick);
clock_alignment->maximum_event_tick =
iree_max(clock_alignment->maximum_event_tick, end_tick);
} else {
clock_alignment->minimum_event_tick = start_tick;
clock_alignment->maximum_event_tick = end_tick;
clock_alignment->has_event_ticks = true;
}
iree_slim_mutex_unlock(&clock_alignment->mutex);
}
static iree_status_t iree_hal_vulkan_queue_profile_read_native_timestamps(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
if (!submission->profile.query_pool) {
return iree_ok_status();
}
VkResult result = iree_vkGetQueryPoolResults_raw(
&queue->syms, queue->logical_device, submission->profile.query_pool,
submission->profile.first_query, submission->profile.query_count,
submission->profile.query_count * sizeof(uint64_t),
&submission->profile.query_values[submission->profile.first_query],
sizeof(uint64_t), VK_QUERY_RESULT_64_BIT);
if (result != VK_SUCCESS) {
return iree_status_from_vk_result(__FILE__, __LINE__, result,
"vkGetQueryPoolResults");
}
if (submission->profile.queue_start_query !=
IREE_HAL_VULKAN_PROFILE_QUERY_ABSENT) {
const uint64_t start_tick =
submission->profile.query_values[submission->profile.queue_start_query];
const uint64_t end_tick =
submission->profile.query_values[submission->profile.queue_end_query];
if (end_tick < start_tick) {
return iree_make_status(
IREE_STATUS_DATA_LOSS,
"Vulkan queue device profiling timestamp range is not monotonic");
}
iree_hal_vulkan_queue_profile_record_device_tick_range(queue, start_tick,
end_tick);
}
for (uint32_t i = 0; i < submission->profile.dispatch_query_count; ++i) {
const uint32_t query_index =
submission->profile.dispatch_base_query + i * 2;
const uint64_t start_tick = submission->profile.query_values[query_index];
const uint64_t end_tick = submission->profile.query_values[query_index + 1];
if (end_tick < start_tick) {
return iree_make_status(
IREE_STATUS_DATA_LOSS,
"Vulkan dispatch profiling timestamp range is not monotonic");
}
iree_hal_vulkan_queue_profile_record_device_tick_range(queue, start_tick,
end_tick);
}
return iree_ok_status();
}
static iree_status_t iree_hal_vulkan_queue_profile_record_dispatch_events(
iree_hal_vulkan_queue_pending_submission_t* submission) {
if (!iree_hal_vulkan_queue_profile_requests_dispatch_events(submission) ||
submission->profile.dispatch_query_count == 0) {
return iree_ok_status();
}
if (submission->profile.dispatch_base_query ==
IREE_HAL_VULKAN_PROFILE_QUERY_ABSENT) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan dispatch profiling has no dispatch timestamp query range");
}
if (submission->kind == IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_DISPATCH) {
return iree_hal_vulkan_queue_append_dispatch_profile_event(submission);
}
return iree_hal_vulkan_command_buffer_append_dispatch_profile_events(
submission->execute.command_buffer, submission->profile.recorder,
submission->profile.scope, submission->profile.submission_id,
iree_hal_vulkan_queue_profile_command_buffer_id(submission),
&submission->profile
.query_values[submission->profile.dispatch_base_query],
submission->profile.dispatch_query_count);
}
static iree_status_t iree_hal_vulkan_queue_profile_record_queue_device_event(
iree_hal_vulkan_queue_pending_submission_t* submission) {
if (!iree_hal_vulkan_queue_profile_requests_queue_device_event(submission) ||
submission->profile.queue_device_event_recorded) {
return iree_ok_status();
}
if (submission->profile.queue_start_query ==
IREE_HAL_VULKAN_PROFILE_QUERY_ABSENT) {
return iree_ok_status();
}
submission->profile.queue_device_event_recorded = true;
iree_hal_vulkan_queue_profile_populate_submission_metrics(submission);
iree_hal_local_profile_queue_device_event_info_t event_info =
iree_hal_local_profile_queue_device_event_info_default();
event_info.type = submission->profile.type;
event_info.flags = submission->profile.flags;
event_info.scope = submission->profile.scope;
event_info.submission_id = submission->profile.submission_id;
event_info.command_buffer_id =
iree_hal_vulkan_queue_profile_command_buffer_id(submission);
event_info.allocation_id =
iree_hal_vulkan_queue_profile_allocation_id(submission);
event_info.operation_count = submission->profile.operation_count;
event_info.payload_length = submission->profile.payload_length;
event_info.start_tick =
submission->profile.query_values[submission->profile.queue_start_query];
event_info.end_tick =
submission->profile.query_values[submission->profile.queue_end_query];
return iree_hal_local_profile_recorder_append_queue_device_event(
submission->profile.recorder, &event_info, /*out_event_id=*/NULL);
}
static void iree_hal_vulkan_queue_profile_populate_memory_event_pool_stats(
iree_hal_pool_t* pool, iree_hal_profile_memory_event_t* event) {
if (!pool) return;
iree_hal_pool_stats_t stats;
iree_hal_pool_query_stats(pool, &stats);
event->flags |= IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_POOL_STATS;
event->pool_bytes_reserved = stats.bytes_reserved;
event->pool_bytes_free = stats.bytes_free;
event->pool_bytes_committed = stats.bytes_committed;
event->pool_budget_limit = stats.budget_limit;
event->pool_reservation_count = stats.reservation_count;
event->pool_slab_count = stats.slab_count;
}
static void iree_hal_vulkan_queue_profile_record_memory_event(
iree_hal_vulkan_queue_pending_submission_t* submission,
iree_hal_profile_memory_event_type_t type,
iree_hal_profile_memory_event_flags_t flags, uint32_t result,
iree_hal_pool_t* pool, iree_hal_buffer_params_t params,
const iree_hal_pool_reservation_t* reservation, uint64_t backing_id,
iree_device_size_t length, uint64_t frontier_entry_count) {
if (!iree_hal_local_profile_recorder_is_enabled(
submission->profile.recorder,
IREE_HAL_DEVICE_PROFILING_DATA_MEMORY_EVENTS)) {
return;
}
iree_hal_profile_memory_event_t event =
iree_hal_profile_memory_event_default();
event.type = type;
event.flags = flags;
event.result = result;
event.allocation_id = iree_hal_vulkan_queue_profile_allocation_id(submission);
event.pool_id = (uint64_t)(uintptr_t)pool;
event.backing_id = backing_id;
event.submission_id = submission->profile.submission_id;
event.physical_device_ordinal =
submission->profile.scope.physical_device_ordinal;
event.queue_ordinal = submission->profile.scope.queue_ordinal;
event.frontier_entry_count =
(uint32_t)iree_min(frontier_entry_count, (uint64_t)UINT32_MAX);
event.memory_type = params.type;
event.buffer_usage = params.usage;
event.length = length;
event.alignment = params.min_alignment ? params.min_alignment : 1;
if (reservation) {
event.flags |= IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_POOL_RESERVATION;
event.backing_id = reservation->block_handle;
event.offset = reservation->offset;
if (type != IREE_HAL_PROFILE_MEMORY_EVENT_TYPE_QUEUE_ALLOCA &&
type != IREE_HAL_PROFILE_MEMORY_EVENT_TYPE_QUEUE_DEALLOCA) {
event.length = reservation->length;
}
}
iree_hal_vulkan_queue_profile_populate_memory_event_pool_stats(pool, &event);
iree_hal_local_profile_recorder_append_memory_event(
submission->profile.recorder, &event, /*out_event_id=*/NULL);
}
static iree_hal_profile_memory_event_flags_t
iree_hal_vulkan_queue_profile_pool_reserve_event_flags(
iree_hal_pool_acquire_result_t acquire_result) {
iree_hal_profile_memory_event_flags_t flags =
IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_QUEUE_OPERATION;
if (acquire_result == IREE_HAL_POOL_ACQUIRE_OK_NEEDS_WAIT) {
flags |= IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_WAIT_FRONTIER;
} else if (acquire_result == IREE_HAL_POOL_ACQUIRE_EXHAUSTED ||
acquire_result == IREE_HAL_POOL_ACQUIRE_OVER_BUDGET) {
flags |= IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_WAIT_NOTIFICATION;
}
return flags;
}
static void iree_hal_vulkan_queue_set_completion_action(
iree_hal_vulkan_queue_pending_submission_t* submission,
iree_hal_vulkan_queue_completion_action_t action) {
submission->completion_action = action;
if (submission->completion_action.resource) {
iree_hal_resource_retain(submission->completion_action.resource);
}
}
static void iree_hal_vulkan_queue_release_completion_action(
iree_hal_vulkan_queue_pending_submission_t* submission) {
if (submission->completion_action.resource) {
iree_hal_resource_release(submission->completion_action.resource);
}
submission->completion_action =
iree_hal_vulkan_queue_completion_action_null();
}
static void iree_hal_vulkan_queue_consume_completion_action(
iree_hal_vulkan_queue_pending_submission_t* submission,
iree_status_t completion_status) {
iree_hal_vulkan_queue_completion_action_t action =
submission->completion_action;
submission->completion_action =
iree_hal_vulkan_queue_completion_action_null();
if (action.fn) {
action.fn(action.user_data, completion_status);
}
if (action.resource) {
iree_hal_resource_release(action.resource);
}
}
// Captures a retained semaphore list into caller-owned storage.
static void iree_hal_vulkan_queue_capture_semaphore_list(
iree_hal_semaphore_list_t source_list,
iree_hal_semaphore_t** semaphore_storage, uint64_t* payload_value_storage,
iree_hal_semaphore_list_t* out_list) {
*out_list = iree_hal_semaphore_list_empty();
if (source_list.count == 0) return;
*out_list = (iree_hal_semaphore_list_t){
.count = source_list.count,
.semaphores = semaphore_storage,
.payload_values = payload_value_storage,
};
for (iree_host_size_t i = 0; i < source_list.count; ++i) {
out_list->semaphores[i] = source_list.semaphores[i];
iree_hal_semaphore_retain(out_list->semaphores[i]);
out_list->payload_values[i] = source_list.payload_values[i];
}
}
// Allocates a pending submission with optional trailing payload storage owned
// by the submission allocation.
static iree_status_t iree_hal_vulkan_queue_pending_submission_create(
iree_hal_vulkan_queue_t* queue,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_vulkan_queue_submission_kind_t kind, iree_hal_host_call_t call,
const uint64_t args[4], iree_hal_host_call_flags_t flags,
iree_host_size_t payload_storage_length,
iree_byte_span_t* out_payload_storage,
iree_hal_vulkan_queue_pending_submission_t** out_submission) {
*out_submission = NULL;
if (out_payload_storage) {
*out_payload_storage = iree_byte_span_empty();
}
iree_host_size_t wait_semaphores_offset = 0;
iree_host_size_t wait_payload_values_offset = 0;
iree_host_size_t signal_semaphores_offset = 0;
iree_host_size_t signal_payload_values_offset = 0;
iree_host_size_t payload_storage_offset = 0;
iree_host_size_t allocation_size = 0;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_queue_calculate_submission_layout(
wait_semaphore_list.count, signal_semaphore_list.count,
payload_storage_length, &allocation_size, &wait_semaphores_offset,
&wait_payload_values_offset, &signal_semaphores_offset,
&signal_payload_values_offset, &payload_storage_offset));
iree_hal_vulkan_queue_pending_submission_t* submission = NULL;
IREE_RETURN_IF_ERROR(iree_allocator_malloc(
queue->host_allocator, allocation_size, (void**)&submission));
memset(submission, 0, allocation_size);
submission->queue = queue;
submission->kind = kind;
submission->native_command_buffer_lease.slot =
IREE_HAL_VULKAN_QUEUE_COMMAND_BUFFER_SLOT_ABSENT;
for (uint32_t i = 0;
i < IREE_HAL_VULKAN_QUEUE_BUILTIN_DESCRIPTOR_SET_COUNT_MAX; ++i) {
submission->native_descriptor_leases[i].slot =
IREE_HAL_VULKAN_QUEUE_DESCRIPTOR_SLOT_ABSENT;
}
submission->profile.queue_start_query = IREE_HAL_VULKAN_PROFILE_QUERY_ABSENT;
submission->profile.queue_end_query = IREE_HAL_VULKAN_PROFILE_QUERY_ABSENT;
submission->profile.dispatch_base_query =
IREE_HAL_VULKAN_PROFILE_QUERY_ABSENT;
iree_notification_initialize(&submission->callback_notification);
if (kind == IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_ALLOCA) {
iree_atomic_store(&submission->alloca.memory_wait_callback_complete, 1,
iree_memory_order_relaxed);
}
if (kind == IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_HOST_CALL) {
submission->host_call.call = call;
memcpy(submission->host_call.args, args,
sizeof(submission->host_call.args));
submission->host_call.flags = flags;
}
iree_hal_vulkan_queue_capture_semaphore_list(
wait_semaphore_list,
(iree_hal_semaphore_t**)((uint8_t*)submission + wait_semaphores_offset),
(uint64_t*)((uint8_t*)submission + wait_payload_values_offset),
&submission->wait_semaphore_list);
iree_hal_vulkan_queue_capture_semaphore_list(
signal_semaphore_list,
(iree_hal_semaphore_t**)((uint8_t*)submission + signal_semaphores_offset),
(uint64_t*)((uint8_t*)submission + signal_payload_values_offset),
&submission->signal_semaphore_list);
iree_hal_vulkan_queue_profile_submission_initialize(queue, submission);
if (out_payload_storage && payload_storage_length != 0) {
*out_payload_storage = iree_make_byte_span(
(uint8_t*)submission + payload_storage_offset, payload_storage_length);
}
*out_submission = submission;
return iree_ok_status();
}
static void iree_hal_vulkan_queue_pending_submission_destroy(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
iree_hal_semaphore_list_release(submission->wait_semaphore_list);
iree_hal_semaphore_list_release(submission->signal_semaphore_list);
iree_status_t wait_failure_status = (iree_status_t)iree_atomic_exchange(
&submission->wait_failure_status, 0, iree_memory_order_acquire);
iree_status_free(wait_failure_status);
if (submission->wait_entries) {
iree_allocator_free(queue->host_allocator, submission->wait_entries);
}
iree_allocator_free(queue->host_allocator, submission->sparse_bind.binds);
iree_hal_vulkan_queue_release_descriptor_cache_sets(queue, submission);
iree_hal_vulkan_queue_release_native_descriptor_pool(queue, submission);
iree_hal_vulkan_queue_release_bda_publication(queue, submission);
iree_hal_vulkan_queue_release_timestamp_queries(queue, submission);
iree_hal_vulkan_queue_release_completion_action(submission);
iree_hal_vulkan_queue_release_native_replay(queue, submission);
iree_hal_vulkan_queue_release_native_command_buffer(queue, submission);
switch (submission->kind) {
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_SPARSE_BIND:
break;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_FILL:
iree_hal_buffer_release(submission->fill.target_buffer);
break;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_UPDATE:
iree_allocator_free(queue->host_allocator,
submission->update.source_data);
iree_hal_buffer_release(submission->update.target_buffer);
break;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_COPY:
iree_hal_buffer_release(submission->copy.source_buffer);
iree_hal_buffer_release(submission->copy.target_buffer);
break;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_ALLOCA:
if (submission->alloca.pool_notification_observation_held) {
submission->alloca.pool_notification_observation_held = false;
iree_async_notification_end_observe(
submission->alloca.pool_notification);
}
iree_hal_buffer_release(submission->alloca.buffer);
break;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_DEALLOCA:
iree_hal_buffer_release(submission->dealloca.buffer);
break;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_EXECUTE:
iree_hal_command_buffer_release(submission->execute.command_buffer);
if (submission->execute.binding_table_bindings &&
!iree_any_bit_set(
submission->execute.flags,
IREE_HAL_EXECUTE_FLAG_BORROW_BINDING_TABLE_LIFETIME)) {
for (iree_host_size_t i = 0;
i < submission->execute.binding_table_count; ++i) {
iree_hal_buffer_release(
submission->execute.binding_table_bindings[i].buffer);
}
}
break;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_DISPATCH:
iree_hal_executable_release(submission->dispatch.executable);
if (submission->dispatch.bindings) {
for (iree_host_size_t i = 0; i < submission->dispatch.binding_count;
++i) {
iree_hal_buffer_release(submission->dispatch.bindings[i].buffer);
}
}
if (iree_hal_dispatch_uses_indirect_parameters(
submission->dispatch.flags)) {
iree_hal_buffer_release(
submission->dispatch.config.workgroup_count_ref.buffer);
}
break;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_BARRIER:
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_HOST_CALL:
break;
}
iree_notification_deinitialize(&submission->callback_notification);
iree_allocator_free(queue->host_allocator, submission);
}
static void iree_hal_vulkan_queue_append_pending_submission(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
submission->next = NULL;
if (queue->pending_tail) {
queue->pending_tail->next = submission;
} else {
queue->pending_head = submission;
}
queue->pending_tail = submission;
}
static iree_hal_vulkan_queue_pending_submission_t*
iree_hal_vulkan_queue_pop_completed_submissions(iree_hal_vulkan_queue_t* queue,
uint64_t completed_epoch) {
iree_hal_vulkan_queue_pending_submission_t* completed_head =
queue->pending_head;
iree_hal_vulkan_queue_pending_submission_t* completed_tail = NULL;
while (queue->pending_head && queue->pending_head->epoch <= completed_epoch) {
completed_tail = queue->pending_head;
queue->pending_head = queue->pending_head->next;
}
if (!completed_tail) return NULL;
if (!queue->pending_head) queue->pending_tail = NULL;
completed_tail->next = NULL;
return completed_head;
}
static void iree_hal_vulkan_queue_fail_signal_list(
iree_hal_semaphore_list_t signal_semaphore_list, iree_status_t status) {
if (signal_semaphore_list.count == 0) {
iree_status_free(status);
return;
}
iree_hal_semaphore_list_fail(signal_semaphore_list, status);
}
static void iree_hal_vulkan_queue_signal_list_or_fail(
iree_hal_semaphore_list_t signal_semaphore_list,
const iree_async_frontier_t* frontier) {
iree_status_t status =
iree_hal_semaphore_list_signal(signal_semaphore_list, frontier);
if (!iree_status_is_ok(status)) {
iree_hal_vulkan_queue_fail_signal_list(signal_semaphore_list, status);
}
}
static void iree_hal_vulkan_queue_execute_host_call(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission,
iree_status_t completion_status) {
iree_hal_semaphore_list_t signal_semaphore_list =
submission->signal_semaphore_list;
const iree_async_frontier_t* frontier =
iree_async_fixed_frontier_as_const_frontier(&submission->frontier);
if (!iree_status_is_ok(completion_status)) {
iree_hal_vulkan_queue_fail_signal_list(
signal_semaphore_list, iree_status_clone(completion_status));
return;
}
const bool is_nonblocking = iree_any_bit_set(
submission->host_call.flags, IREE_HAL_HOST_CALL_FLAG_NON_BLOCKING);
if (is_nonblocking) {
iree_hal_vulkan_queue_signal_list_or_fail(signal_semaphore_list, frontier);
}
iree_hal_host_call_context_t context = {
.device = (iree_hal_device_t*)queue->device,
.queue_affinity = queue->queue_affinity,
.signal_semaphore_list = is_nonblocking ? iree_hal_semaphore_list_empty()
: signal_semaphore_list,
};
iree_status_t call_status =
submission->host_call.call.fn(submission->host_call.call.user_data,
submission->host_call.args, &context);
if (is_nonblocking || iree_status_is_deferred(call_status)) {
iree_status_free(call_status);
} else if (iree_status_is_ok(call_status)) {
iree_hal_vulkan_queue_signal_list_or_fail(signal_semaphore_list, frontier);
} else {
iree_hal_vulkan_queue_fail_signal_list(signal_semaphore_list, call_status);
}
}
static bool iree_hal_vulkan_queue_buffer_has_recordable_backing(
iree_hal_buffer_t* buffer) {
return !iree_hal_local_transient_buffer_isa(buffer) ||
iree_hal_local_transient_buffer_backing_buffer(buffer) != NULL;
}
static iree_status_t iree_hal_vulkan_queue_validate_recordable_backing(
iree_hal_buffer_t* buffer, iree_string_view_t usage) {
if (!iree_hal_local_transient_buffer_isa(buffer)) return iree_ok_status();
if (iree_hal_local_transient_buffer_backing_buffer(buffer)) {
return iree_ok_status();
}
if (iree_hal_local_transient_buffer_is_dealloca_queued(buffer)) {
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue %.*s buffer has been queued for deallocation",
(int)usage.size, usage.data);
}
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan queue %.*s buffer has no recordable device backing",
(int)usage.size, usage.data);
}
static iree_status_t iree_hal_vulkan_queue_buffer_is_native(
iree_hal_buffer_t* buffer, bool* out_is_native) {
*out_is_native = false;
if (!iree_hal_vulkan_queue_buffer_has_recordable_backing(buffer)) {
return iree_ok_status();
}
IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_memory_type(
iree_hal_buffer_memory_type(buffer),
IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE));
iree_hal_buffer_t* backing_buffer = NULL;
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_buffer_resolve_backing(buffer, &backing_buffer));
iree_hal_buffer_t* allocated_buffer =
iree_hal_buffer_allocated_buffer(backing_buffer);
*out_is_native = iree_hal_vulkan_buffer_isa(allocated_buffer) ||
iree_hal_vulkan_sparse_buffer_isa(allocated_buffer);
return iree_ok_status();
}
static iree_status_t iree_hal_vulkan_queue_expand_fill_pattern(
const uint8_t* pattern, iree_host_size_t pattern_length,
uint32_t* out_expanded_pattern) {
*out_expanded_pattern = 0;
switch (pattern_length) {
case sizeof(uint8_t):
*out_expanded_pattern = pattern[0];
*out_expanded_pattern |= *out_expanded_pattern << 8;
*out_expanded_pattern |= *out_expanded_pattern << 16;
break;
case sizeof(uint16_t): {
uint16_t pattern16 = 0;
memcpy(&pattern16, pattern, sizeof(pattern16));
*out_expanded_pattern = pattern16;
*out_expanded_pattern |= *out_expanded_pattern << 16;
break;
}
case sizeof(uint32_t):
memcpy(out_expanded_pattern, pattern, sizeof(*out_expanded_pattern));
break;
default:
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue fill pattern length must be 1, 2, "
"or 4 bytes (got %" PRIhsz ")",
pattern_length);
}
return iree_ok_status();
}
static iree_status_t iree_hal_vulkan_queue_record_fill_native(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_usage(
iree_hal_buffer_allowed_usage(submission->fill.target_buffer),
IREE_HAL_BUFFER_USAGE_TRANSFER_TARGET));
iree_hal_buffer_t* target_backing = NULL;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_buffer_resolve_backing(
submission->fill.target_buffer, &target_backing));
VkDeviceMemory target_memory = VK_NULL_HANDLE;
VkBuffer target_handle = VK_NULL_HANDLE;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_buffer_handle(
target_backing, &target_memory, &target_handle));
(void)target_memory;
iree_device_size_t target_offset = 0;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_buffer_resolve_backing_offset(
submission->fill.target_buffer, target_backing,
submission->fill.target_offset, &target_offset));
uint32_t pattern = 0;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_queue_expand_fill_pattern(
submission->fill.pattern, submission->fill.pattern_length, &pattern));
const bool needs_unaligned_fill =
target_offset % sizeof(uint32_t) != 0 ||
submission->fill.length % sizeof(uint32_t) != 0;
if (needs_unaligned_fill && !queue->builtins) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan unaligned native queue fill requires built-in pipelines");
}
if (needs_unaligned_fill &&
submission->fill.length >
IREE_DEVICE_SIZE_MAX - (iree_device_size_t)target_offset) {
return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"Vulkan native queue fill range overflows");
}
VkDescriptorSet
descriptor_sets[IREE_HAL_VULKAN_QUEUE_BUILTIN_DESCRIPTOR_SET_COUNT_MAX] =
{
VK_NULL_HANDLE,
VK_NULL_HANDLE,
};
uint32_t descriptor_set_count = 0;
if (needs_unaligned_fill) {
descriptor_set_count =
iree_hal_vulkan_builtins_fill_unaligned_descriptor_set_count(
(VkDeviceSize)target_offset, (VkDeviceSize)submission->fill.length);
IREE_RETURN_IF_ERROR(iree_hal_vulkan_queue_acquire_descriptor_cache_sets(
queue, submission, descriptor_set_count, descriptor_sets));
}
VkCommandBufferBeginInfo begin_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
IREE_RETURN_IF_ERROR(iree_vkBeginCommandBuffer(
IREE_VULKAN_DEVICE(&queue->syms), submission->native_command_buffer,
&begin_info));
iree_hal_vulkan_queue_profile_reset_native_timestamps(queue, submission);
iree_hal_vulkan_queue_profile_write_timestamp_begin(queue, submission);
VkDeviceSize fill_offset = (VkDeviceSize)target_offset;
VkDeviceSize fill_length = (VkDeviceSize)submission->fill.length;
if (needs_unaligned_fill) {
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_builtins_record_fill_unaligned_descriptor_sets(
queue->builtins, submission->native_command_buffer, descriptor_sets,
descriptor_set_count, target_handle, (VkDeviceSize)target_offset,
(VkDeviceSize)submission->fill.length, submission->fill.pattern,
submission->fill.pattern_length));
const VkDeviceSize target_end =
(VkDeviceSize)target_offset + (VkDeviceSize)submission->fill.length;
const VkDeviceSize aligned_target_offset =
iree_device_align(target_offset, sizeof(uint32_t));
const VkDeviceSize aligned_target_end =
target_end & ~(VkDeviceSize)(sizeof(uint32_t) - 1);
if (aligned_target_offset >= aligned_target_end) {
fill_length = 0;
} else {
fill_offset = aligned_target_offset;
fill_length = aligned_target_end - aligned_target_offset;
}
}
if (fill_length != 0) {
iree_vkCmdFillBuffer(IREE_VULKAN_DEVICE(&queue->syms),
submission->native_command_buffer, target_handle,
fill_offset, fill_length, pattern);
}
iree_hal_vulkan_queue_profile_write_timestamp_end(queue, submission);
return iree_vkEndCommandBuffer(IREE_VULKAN_DEVICE(&queue->syms),
submission->native_command_buffer);
}
static void iree_hal_vulkan_queue_execute_fill(
iree_hal_vulkan_queue_pending_submission_t* submission,
iree_status_t completion_status) {
iree_hal_semaphore_list_t signal_semaphore_list =
submission->signal_semaphore_list;
const iree_async_frontier_t* frontier =
iree_async_fixed_frontier_as_const_frontier(&submission->frontier);
if (iree_status_is_ok(completion_status)) {
iree_hal_vulkan_queue_signal_list_or_fail(signal_semaphore_list, frontier);
} else {
iree_hal_vulkan_queue_fail_signal_list(
signal_semaphore_list, iree_status_clone(completion_status));
}
}
static iree_status_t iree_hal_vulkan_queue_record_update_chunks(
iree_hal_vulkan_queue_t* queue, VkCommandBuffer native_command_buffer,
VkBuffer target_handle, VkDeviceSize target_offset, VkDeviceSize length,
const uint8_t* source_data, iree_host_size_t source_data_offset) {
if (length == 0) return iree_ok_status();
if (length > (VkDeviceSize)(IREE_HOST_SIZE_MAX - source_data_offset)) {
return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"Vulkan native queue update source offset "
"overflows");
}
const VkDeviceSize max_update_length = 65536;
VkDeviceSize remaining_length = length;
VkDeviceSize update_offset = target_offset;
iree_host_size_t update_source_offset = source_data_offset;
while (remaining_length != 0) {
const VkDeviceSize chunk_length =
iree_min(remaining_length, max_update_length);
iree_vkCmdUpdateBuffer(IREE_VULKAN_DEVICE(&queue->syms),
native_command_buffer, target_handle, update_offset,
chunk_length, source_data + update_source_offset);
update_offset += chunk_length;
update_source_offset += (iree_host_size_t)chunk_length;
remaining_length -= chunk_length;
}
return iree_ok_status();
}
static iree_status_t iree_hal_vulkan_queue_record_update_native(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_usage(
iree_hal_buffer_allowed_usage(submission->update.target_buffer),
IREE_HAL_BUFFER_USAGE_TRANSFER_TARGET));
iree_hal_buffer_t* target_backing = NULL;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_buffer_resolve_backing(
submission->update.target_buffer, &target_backing));
VkDeviceMemory target_memory = VK_NULL_HANDLE;
VkBuffer target_handle = VK_NULL_HANDLE;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_buffer_handle(
target_backing, &target_memory, &target_handle));
(void)target_memory;
iree_device_size_t target_offset = 0;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_buffer_resolve_backing_offset(
submission->update.target_buffer, target_backing,
submission->update.target_offset, &target_offset));
const bool needs_unaligned_update =
target_offset % sizeof(uint32_t) != 0 ||
submission->update.length % sizeof(uint32_t) != 0;
if (needs_unaligned_update && !queue->builtins) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan unaligned native queue update requires built-in pipelines");
}
if (needs_unaligned_update &&
submission->update.length >
IREE_DEVICE_SIZE_MAX - (iree_device_size_t)target_offset) {
return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"Vulkan native queue update range overflows");
}
VkDescriptorSet
descriptor_sets[IREE_HAL_VULKAN_QUEUE_BUILTIN_DESCRIPTOR_SET_COUNT_MAX] =
{
VK_NULL_HANDLE,
VK_NULL_HANDLE,
};
uint32_t descriptor_set_count = 0;
if (needs_unaligned_update) {
descriptor_set_count =
iree_hal_vulkan_builtins_update_unaligned_descriptor_set_count(
(VkDeviceSize)target_offset,
(VkDeviceSize)submission->update.length);
IREE_RETURN_IF_ERROR(iree_hal_vulkan_queue_acquire_descriptor_cache_sets(
queue, submission, descriptor_set_count, descriptor_sets));
}
VkCommandBufferBeginInfo begin_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
IREE_RETURN_IF_ERROR(iree_vkBeginCommandBuffer(
IREE_VULKAN_DEVICE(&queue->syms), submission->native_command_buffer,
&begin_info));
iree_hal_vulkan_queue_profile_reset_native_timestamps(queue, submission);
iree_hal_vulkan_queue_profile_write_timestamp_begin(queue, submission);
VkDeviceSize update_offset = (VkDeviceSize)target_offset;
VkDeviceSize update_length = (VkDeviceSize)submission->update.length;
iree_host_size_t source_data_offset = 0;
if (needs_unaligned_update) {
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_builtins_record_update_unaligned_descriptor_sets(
queue->builtins, submission->native_command_buffer, descriptor_sets,
descriptor_set_count, target_handle, (VkDeviceSize)target_offset,
(VkDeviceSize)submission->update.length,
submission->update.source_data,
(iree_host_size_t)submission->update.length));
const VkDeviceSize target_end =
(VkDeviceSize)target_offset + (VkDeviceSize)submission->update.length;
const VkDeviceSize aligned_target_offset =
iree_device_align(target_offset, sizeof(uint32_t));
const VkDeviceSize aligned_target_end =
target_end & ~(VkDeviceSize)(sizeof(uint32_t) - 1);
if (aligned_target_offset >= aligned_target_end) {
update_length = 0;
} else {
update_offset = aligned_target_offset;
update_length = aligned_target_end - aligned_target_offset;
source_data_offset =
(iree_host_size_t)(aligned_target_offset - target_offset);
}
}
IREE_RETURN_IF_ERROR(iree_hal_vulkan_queue_record_update_chunks(
queue, submission->native_command_buffer, target_handle, update_offset,
update_length, submission->update.source_data, source_data_offset));
iree_hal_vulkan_queue_profile_write_timestamp_end(queue, submission);
return iree_vkEndCommandBuffer(IREE_VULKAN_DEVICE(&queue->syms),
submission->native_command_buffer);
}
static iree_status_t iree_hal_vulkan_queue_can_record_update_native(
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, bool* out_can_record_native) {
*out_can_record_native = false;
if (length == 0) return iree_ok_status();
if (length > IREE_HAL_COMMAND_BUFFER_MAX_UPDATE_SIZE ||
length % sizeof(uint32_t) != 0) {
return iree_ok_status();
}
iree_hal_buffer_t* target_backing = NULL;
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_buffer_resolve_backing(target_buffer, &target_backing));
iree_device_size_t backing_offset = 0;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_buffer_resolve_backing_offset(
target_buffer, target_backing, target_offset, &backing_offset));
*out_can_record_native = backing_offset % sizeof(uint32_t) == 0;
return iree_ok_status();
}
static void iree_hal_vulkan_queue_execute_update(
iree_hal_vulkan_queue_pending_submission_t* submission,
iree_status_t completion_status) {
iree_hal_semaphore_list_t signal_semaphore_list =
submission->signal_semaphore_list;
const iree_async_frontier_t* frontier =
iree_async_fixed_frontier_as_const_frontier(&submission->frontier);
if (iree_status_is_ok(completion_status)) {
iree_hal_vulkan_queue_signal_list_or_fail(signal_semaphore_list, frontier);
} else {
iree_hal_vulkan_queue_fail_signal_list(
signal_semaphore_list, iree_status_clone(completion_status));
}
}
static iree_status_t iree_hal_vulkan_queue_record_copy_native_buffers(
iree_hal_vulkan_queue_t* queue, iree_hal_buffer_t* source_buffer,
iree_device_size_t source_offset, iree_hal_buffer_t* target_buffer,
iree_device_size_t target_offset, iree_device_size_t length,
VkCommandBuffer command_buffer,
const iree_hal_vulkan_queue_pending_submission_t* submission) {
IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_usage(
iree_hal_buffer_allowed_usage(source_buffer),
IREE_HAL_BUFFER_USAGE_TRANSFER_SOURCE));
IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_usage(
iree_hal_buffer_allowed_usage(target_buffer),
IREE_HAL_BUFFER_USAGE_TRANSFER_TARGET));
iree_hal_buffer_t* source_backing = NULL;
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_buffer_resolve_backing(source_buffer, &source_backing));
iree_hal_buffer_t* target_backing = NULL;
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_buffer_resolve_backing(target_buffer, &target_backing));
VkDeviceMemory source_memory = VK_NULL_HANDLE;
VkBuffer source_handle = VK_NULL_HANDLE;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_buffer_handle(
source_backing, &source_memory, &source_handle));
(void)source_memory;
VkDeviceMemory target_memory = VK_NULL_HANDLE;
VkBuffer target_handle = VK_NULL_HANDLE;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_buffer_handle(
target_backing, &target_memory, &target_handle));
(void)target_memory;
iree_device_size_t source_backing_offset = 0;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_buffer_resolve_backing_offset(
source_buffer, source_backing, source_offset, &source_backing_offset));
iree_device_size_t target_backing_offset = 0;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_buffer_resolve_backing_offset(
target_buffer, target_backing, target_offset, &target_backing_offset));
VkCommandBufferBeginInfo begin_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
IREE_RETURN_IF_ERROR(iree_vkBeginCommandBuffer(
IREE_VULKAN_DEVICE(&queue->syms), command_buffer, &begin_info));
if (submission) {
iree_hal_vulkan_queue_profile_reset_native_timestamps(queue, submission);
iree_hal_vulkan_queue_profile_write_timestamp_begin(queue, submission);
}
VkBufferCopy copy_region = {
.srcOffset = (VkDeviceSize)source_backing_offset,
.dstOffset = (VkDeviceSize)target_backing_offset,
.size = (VkDeviceSize)length,
};
iree_vkCmdCopyBuffer(IREE_VULKAN_DEVICE(&queue->syms), command_buffer,
source_handle, target_handle, /*regionCount=*/1,
&copy_region);
if (submission) {
iree_hal_vulkan_queue_profile_write_timestamp_end(queue, submission);
}
return iree_vkEndCommandBuffer(IREE_VULKAN_DEVICE(&queue->syms),
command_buffer);
}
static iree_status_t iree_hal_vulkan_queue_record_copy_native(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
return iree_hal_vulkan_queue_record_copy_native_buffers(
queue, submission->copy.source_buffer, submission->copy.source_offset,
submission->copy.target_buffer, submission->copy.target_offset,
submission->copy.length, submission->native_command_buffer, submission);
}
static void iree_hal_vulkan_queue_execute_copy(
iree_hal_vulkan_queue_pending_submission_t* submission,
iree_status_t completion_status) {
iree_hal_semaphore_list_t signal_semaphore_list =
submission->signal_semaphore_list;
const iree_async_frontier_t* frontier =
iree_async_fixed_frontier_as_const_frontier(&submission->frontier);
if (iree_status_is_ok(completion_status)) {
iree_hal_vulkan_queue_signal_list_or_fail(signal_semaphore_list, frontier);
} else {
iree_hal_vulkan_queue_fail_signal_list(
signal_semaphore_list, iree_status_clone(completion_status));
}
}
static void iree_hal_vulkan_queue_execute_command_buffer(
iree_hal_vulkan_queue_pending_submission_t* submission,
iree_status_t completion_status) {
iree_hal_semaphore_list_t signal_semaphore_list =
submission->signal_semaphore_list;
const iree_async_frontier_t* frontier =
iree_async_fixed_frontier_as_const_frontier(&submission->frontier);
if (iree_status_is_ok(completion_status)) {
iree_hal_vulkan_queue_signal_list_or_fail(signal_semaphore_list, frontier);
} else {
iree_hal_vulkan_queue_fail_signal_list(
signal_semaphore_list, iree_status_clone(completion_status));
}
}
static void iree_hal_vulkan_queue_complete_alloca(
iree_hal_vulkan_queue_pending_submission_t* submission,
iree_status_t completion_status) {
iree_hal_semaphore_list_t signal_semaphore_list =
submission->signal_semaphore_list;
const iree_async_frontier_t* frontier =
iree_async_fixed_frontier_as_const_frontier(&submission->frontier);
if (iree_status_is_ok(completion_status)) {
iree_hal_local_transient_buffer_commit(submission->alloca.buffer);
iree_hal_pool_t* pool = NULL;
iree_hal_pool_reservation_t reservation;
const bool has_reservation =
iree_hal_local_transient_buffer_query_reservation(
submission->alloca.buffer, &pool, &reservation);
iree_hal_vulkan_queue_profile_record_memory_event(
submission, IREE_HAL_PROFILE_MEMORY_EVENT_TYPE_QUEUE_ALLOCA,
IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_QUEUE_OPERATION, UINT32_MAX, pool,
submission->alloca.params, has_reservation ? &reservation : NULL,
(uint64_t)(uintptr_t)submission->sparse_bind.buffer,
submission->alloca.allocation_size, frontier->entry_count);
iree_hal_vulkan_queue_signal_list_or_fail(signal_semaphore_list, frontier);
} else {
iree_hal_pool_t* pool = NULL;
iree_hal_pool_reservation_t reservation;
const bool has_reservation =
iree_hal_local_transient_buffer_query_reservation(
submission->alloca.buffer, &pool, &reservation);
iree_hal_local_transient_buffer_decommit(submission->alloca.buffer);
iree_hal_local_transient_buffer_release_reservation(
submission->alloca.buffer, submission->alloca.wait_frontier);
iree_hal_vulkan_queue_profile_record_memory_event(
submission, IREE_HAL_PROFILE_MEMORY_EVENT_TYPE_QUEUE_ALLOCA,
IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_QUEUE_OPERATION,
iree_status_code(completion_status), pool, submission->alloca.params,
has_reservation ? &reservation : NULL,
(uint64_t)(uintptr_t)submission->sparse_bind.buffer,
submission->alloca.allocation_size,
submission->alloca.wait_frontier
? submission->alloca.wait_frontier->entry_count
: 0);
if (has_reservation) {
iree_hal_vulkan_queue_profile_record_memory_event(
submission, IREE_HAL_PROFILE_MEMORY_EVENT_TYPE_POOL_RELEASE,
IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_QUEUE_OPERATION,
iree_status_code(completion_status), pool, submission->alloca.params,
&reservation, /*backing_id=*/0, submission->alloca.allocation_size,
submission->alloca.wait_frontier
? submission->alloca.wait_frontier->entry_count
: 0);
}
submission->alloca.wait_frontier = NULL;
iree_hal_vulkan_queue_fail_signal_list(
signal_semaphore_list, iree_status_clone(completion_status));
}
}
static void iree_hal_vulkan_queue_complete_dealloca(
iree_hal_vulkan_queue_pending_submission_t* submission,
iree_status_t completion_status) {
iree_hal_semaphore_list_t signal_semaphore_list =
submission->signal_semaphore_list;
const iree_async_frontier_t* frontier =
iree_async_fixed_frontier_as_const_frontier(&submission->frontier);
if (iree_status_is_ok(completion_status)) {
iree_hal_pool_t* pool = NULL;
iree_hal_pool_reservation_t reservation;
const bool has_reservation =
iree_hal_local_transient_buffer_query_reservation(
submission->dealloca.buffer, &pool, &reservation);
const iree_hal_buffer_params_t params = {
.type = iree_hal_buffer_memory_type(submission->dealloca.buffer),
.access = iree_hal_buffer_allowed_access(submission->dealloca.buffer),
.usage = iree_hal_buffer_allowed_usage(submission->dealloca.buffer),
};
const iree_device_size_t allocation_size =
iree_hal_buffer_allocation_size(submission->dealloca.buffer);
iree_hal_local_transient_buffer_decommit(submission->dealloca.buffer);
iree_hal_local_transient_buffer_release_reservation(
submission->dealloca.buffer, frontier);
iree_hal_vulkan_queue_profile_record_memory_event(
submission, IREE_HAL_PROFILE_MEMORY_EVENT_TYPE_QUEUE_DEALLOCA,
IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_QUEUE_OPERATION, UINT32_MAX, pool,
params, has_reservation ? &reservation : NULL, /*backing_id=*/0,
allocation_size, frontier->entry_count);
if (has_reservation) {
iree_hal_vulkan_queue_profile_record_memory_event(
submission, IREE_HAL_PROFILE_MEMORY_EVENT_TYPE_POOL_RELEASE,
IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_QUEUE_OPERATION, UINT32_MAX, pool,
params, &reservation, /*backing_id=*/0, allocation_size,
frontier->entry_count);
}
iree_hal_vulkan_queue_signal_list_or_fail(signal_semaphore_list, frontier);
} else {
const iree_hal_buffer_params_t params = {
.type = iree_hal_buffer_memory_type(submission->dealloca.buffer),
.access = iree_hal_buffer_allowed_access(submission->dealloca.buffer),
.usage = iree_hal_buffer_allowed_usage(submission->dealloca.buffer),
};
iree_hal_vulkan_queue_profile_record_memory_event(
submission, IREE_HAL_PROFILE_MEMORY_EVENT_TYPE_QUEUE_DEALLOCA,
IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_QUEUE_OPERATION,
iree_status_code(completion_status), /*pool=*/NULL, params,
/*reservation=*/NULL, /*backing_id=*/0,
iree_hal_buffer_allocation_size(submission->dealloca.buffer),
/*frontier_entry_count=*/0);
iree_hal_vulkan_queue_fail_signal_list(
signal_semaphore_list, iree_status_clone(completion_status));
}
}
static void iree_hal_vulkan_queue_complete_submission(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission,
iree_status_t completion_status) {
iree_status_t terminal_status = iree_status_clone(completion_status);
if (iree_status_is_ok(terminal_status)) {
terminal_status =
iree_hal_vulkan_queue_profile_read_native_timestamps(queue, submission);
if (iree_status_is_ok(terminal_status)) {
terminal_status =
iree_hal_vulkan_queue_profile_record_dispatch_events(submission);
}
if (iree_status_is_ok(terminal_status)) {
terminal_status =
iree_hal_vulkan_queue_profile_record_queue_device_event(submission);
}
}
switch (submission->kind) {
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_BARRIER:
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_SPARSE_BIND:
if (iree_status_is_ok(terminal_status)) {
iree_hal_vulkan_queue_signal_list_or_fail(
submission->signal_semaphore_list,
iree_async_fixed_frontier_as_const_frontier(&submission->frontier));
} else {
iree_hal_vulkan_queue_fail_signal_list(
submission->signal_semaphore_list,
iree_status_clone(terminal_status));
}
break;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_HOST_CALL:
iree_hal_vulkan_queue_execute_host_call(queue, submission,
terminal_status);
break;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_FILL:
iree_hal_vulkan_queue_execute_fill(submission, terminal_status);
break;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_UPDATE:
iree_hal_vulkan_queue_execute_update(submission, terminal_status);
break;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_COPY:
iree_hal_vulkan_queue_execute_copy(submission, terminal_status);
break;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_EXECUTE:
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_DISPATCH:
iree_hal_vulkan_queue_execute_command_buffer(submission, terminal_status);
break;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_ALLOCA:
iree_hal_vulkan_queue_complete_alloca(submission, terminal_status);
break;
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_DEALLOCA:
iree_hal_vulkan_queue_complete_dealloca(submission, terminal_status);
break;
default:
iree_hal_vulkan_queue_fail_signal_list(
submission->signal_semaphore_list,
iree_make_status(IREE_STATUS_INTERNAL,
"unknown Vulkan queue submission kind %u",
(uint32_t)submission->kind));
break;
}
iree_hal_vulkan_queue_consume_completion_action(submission, terminal_status);
if (iree_status_is_ok(terminal_status) && queue->frontier_tracker) {
iree_async_frontier_tracker_advance(queue->frontier_tracker, queue->axis,
submission->epoch);
}
iree_atomic_store(&queue->last_drained_epoch, (int64_t)submission->epoch,
iree_memory_order_release);
iree_status_free(terminal_status);
}
static void iree_hal_vulkan_queue_fail_unsubmitted_submission(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission,
iree_status_t status) {
IREE_ASSERT(!iree_status_is_ok(status),
"unsubmitted queue failure status must be non-OK");
switch (submission->kind) {
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_ALLOCA: {
iree_hal_pool_t* alloca_pool = NULL;
iree_hal_pool_reservation_t alloca_reservation;
const bool alloca_has_reservation =
iree_hal_local_transient_buffer_query_reservation(
submission->alloca.buffer, &alloca_pool, &alloca_reservation);
iree_hal_local_transient_buffer_decommit(submission->alloca.buffer);
iree_hal_local_transient_buffer_release_reservation(
submission->alloca.buffer, submission->alloca.wait_frontier);
iree_hal_vulkan_queue_profile_record_memory_event(
submission, IREE_HAL_PROFILE_MEMORY_EVENT_TYPE_QUEUE_ALLOCA,
IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_QUEUE_OPERATION,
iree_status_code(status), alloca_pool, submission->alloca.params,
alloca_has_reservation ? &alloca_reservation : NULL,
(uint64_t)(uintptr_t)submission->sparse_bind.buffer,
submission->alloca.allocation_size,
submission->alloca.wait_frontier
? submission->alloca.wait_frontier->entry_count
: 0);
if (alloca_has_reservation) {
iree_hal_vulkan_queue_profile_record_memory_event(
submission, IREE_HAL_PROFILE_MEMORY_EVENT_TYPE_POOL_RELEASE,
IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_QUEUE_OPERATION,
iree_status_code(status), alloca_pool, submission->alloca.params,
&alloca_reservation, /*backing_id=*/0,
submission->alloca.allocation_size,
submission->alloca.wait_frontier
? submission->alloca.wait_frontier->entry_count
: 0);
}
submission->alloca.wait_frontier = NULL;
iree_hal_vulkan_queue_fail_signal_list(submission->signal_semaphore_list,
status);
break;
}
case IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_DEALLOCA: {
iree_hal_local_transient_buffer_abort_dealloca(
submission->dealloca.buffer);
const iree_hal_buffer_params_t dealloca_params = {
.type = iree_hal_buffer_memory_type(submission->dealloca.buffer),
.access = iree_hal_buffer_allowed_access(submission->dealloca.buffer),
.usage = iree_hal_buffer_allowed_usage(submission->dealloca.buffer),
};
iree_hal_vulkan_queue_profile_record_memory_event(
submission, IREE_HAL_PROFILE_MEMORY_EVENT_TYPE_QUEUE_DEALLOCA,
IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_QUEUE_OPERATION,
iree_status_code(status), /*pool=*/NULL, dealloca_params,
/*reservation=*/NULL, /*backing_id=*/0,
iree_hal_buffer_allocation_size(submission->dealloca.buffer),
/*frontier_entry_count=*/0);
iree_hal_vulkan_queue_fail_signal_list(submission->signal_semaphore_list,
status);
break;
}
default:
iree_hal_vulkan_queue_fail_signal_list(submission->signal_semaphore_list,
status);
break;
}
iree_hal_vulkan_queue_pending_submission_destroy(queue, submission);
}
static iree_status_t iree_hal_vulkan_queue_resolve_waits(
iree_hal_vulkan_queue_t* queue,
const iree_hal_semaphore_list_t wait_semaphore_list,
iree_hal_vulkan_queue_frontier_t* pending_frontier,
bool allow_software_deferral,
iree_hal_vulkan_queue_wait_resolution_t* resolution) {
resolution->wait_info_count = 0;
resolution->needs_deferral = false;
for (iree_host_size_t i = 0; i < wait_semaphore_list.count; ++i) {
iree_hal_semaphore_t* semaphore = wait_semaphore_list.semaphores[i];
const uint64_t minimum_value = wait_semaphore_list.payload_values[i];
uint64_t current_value = 0;
IREE_RETURN_IF_ERROR(iree_hal_semaphore_query(semaphore, &current_value));
if (current_value >= minimum_value) continue;
iree_hal_vulkan_last_signal_flags_t signal_flags = 0;
iree_async_axis_t producer_axis = 0;
uint64_t producer_epoch = 0;
uint64_t producer_value = 0;
const bool has_last_signal = iree_hal_vulkan_last_signal_load(
iree_hal_vulkan_semaphore_last_signal(semaphore), &signal_flags,
&producer_axis, &producer_epoch, &producer_value);
(void)signal_flags;
if (!has_last_signal || producer_value < minimum_value) {
if (allow_software_deferral) {
resolution->needs_deferral = true;
return iree_ok_status();
}
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan queue wait on semaphore %" PRIhsz
" requires software deferral; only already-signaled waits and waits "
"backed by a published producer epoch are accepted",
i);
}
if (producer_axis == queue->axis &&
producer_epoch >= queue->next_epoch_value) {
if (allow_software_deferral) {
resolution->needs_deferral = true;
return iree_ok_status();
}
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan queue wait on same-queue future signal requires software "
"deferral; only prior accepted epochs are accepted");
}
iree_async_single_frontier_t producer_frontier;
iree_async_single_frontier_initialize(&producer_frontier, producer_axis,
producer_epoch);
if (!iree_async_frontier_merge(
iree_async_fixed_frontier_as_frontier(pending_frontier),
IREE_HAL_VULKAN_QUEUE_FRONTIER_CAPACITY,
iree_async_single_frontier_as_const_frontier(&producer_frontier))) {
return iree_make_status(
IREE_STATUS_RESOURCE_EXHAUSTED,
"Vulkan queue frontier capacity exhausted while resolving waits");
}
VkSemaphore wait_handle = VK_NULL_HANDLE;
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_semaphore_handle(semaphore, &wait_handle));
if (resolution->wait_info_count >= resolution->wait_info_capacity) {
return iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"too many native Vulkan wait semaphores for queue submission");
}
resolution->wait_infos[resolution->wait_info_count] =
(VkSemaphoreSubmitInfo){
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = wait_handle,
.value = minimum_value,
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
.deviceIndex = 0,
};
resolution->wait_info_count = resolution->wait_info_count + 1;
}
return iree_ok_status();
}
static void iree_hal_vulkan_queue_publish_signals(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
const iree_async_frontier_t* frontier =
iree_async_fixed_frontier_as_const_frontier(&submission->frontier);
for (iree_host_size_t i = 0; i < submission->signal_semaphore_list.count;
++i) {
iree_hal_vulkan_semaphore_publish_signal(
submission->signal_semaphore_list.semaphores[i], queue->axis, frontier,
submission->epoch, submission->signal_semaphore_list.payload_values[i]);
}
}
static void iree_hal_vulkan_queue_alloca_pool_notification_end_observe(
iree_hal_vulkan_queue_pending_submission_t* submission) {
if (submission->alloca.pool_notification_observation_held) {
submission->alloca.pool_notification_observation_held = false;
iree_async_notification_end_observe(submission->alloca.pool_notification);
}
}
static bool iree_hal_vulkan_queue_alloca_has_staged_backing(
iree_hal_vulkan_queue_pending_submission_t* submission) {
return iree_hal_local_transient_buffer_backing_buffer(
submission->alloca.buffer) != NULL;
}
static iree_status_t iree_hal_vulkan_queue_stage_alloca_reservation(
iree_hal_vulkan_queue_pending_submission_t* submission,
const iree_hal_pool_reservation_t* reservation,
const iree_async_frontier_t* wait_frontier,
iree_hal_pool_acquire_result_t acquire_result) {
if (acquire_result == IREE_HAL_POOL_ACQUIRE_OK_NEEDS_WAIT &&
!iree_all_bits_set(submission->alloca.flags,
IREE_HAL_ALLOCA_FLAG_ALLOW_POOL_WAIT_FRONTIER)) {
iree_hal_pool_release_reservation(submission->alloca.pool, reservation,
wait_frontier);
iree_hal_vulkan_queue_profile_record_memory_event(
submission, IREE_HAL_PROFILE_MEMORY_EVENT_TYPE_POOL_RELEASE,
IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_QUEUE_OPERATION |
IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_WAIT_FRONTIER,
IREE_STATUS_RESOURCE_EXHAUSTED, submission->alloca.pool,
submission->alloca.params, reservation, /*backing_id=*/0,
submission->alloca.allocation_size,
wait_frontier ? wait_frontier->entry_count : 0);
return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
"queue_alloca recycled pool memory requires "
"IREE_HAL_ALLOCA_FLAG_ALLOW_POOL_WAIT_FRONTIER");
}
iree_hal_buffer_t* backing_buffer = NULL;
iree_status_t status = iree_hal_pool_materialize_reservation(
submission->alloca.pool, submission->alloca.params, reservation,
IREE_HAL_POOL_MATERIALIZE_FLAG_NONE, &backing_buffer);
if (iree_status_is_ok(status)) {
iree_hal_local_transient_buffer_attach_reservation(
submission->alloca.buffer, submission->alloca.pool, reservation);
iree_hal_local_transient_buffer_stage_backing(submission->alloca.buffer,
backing_buffer);
submission->alloca.wait_frontier =
acquire_result == IREE_HAL_POOL_ACQUIRE_OK_NEEDS_WAIT ? wait_frontier
: NULL;
submission->alloca.memory_wait_kind =
acquire_result == IREE_HAL_POOL_ACQUIRE_OK_NEEDS_WAIT
? IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_FRONTIER
: IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_NONE;
iree_hal_vulkan_queue_profile_record_memory_event(
submission, IREE_HAL_PROFILE_MEMORY_EVENT_TYPE_POOL_MATERIALIZE,
IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_QUEUE_OPERATION, acquire_result,
submission->alloca.pool, submission->alloca.params, reservation,
/*backing_id=*/0, submission->alloca.allocation_size,
wait_frontier ? wait_frontier->entry_count : 0);
} else {
iree_hal_pool_release_reservation(
submission->alloca.pool, reservation,
acquire_result == IREE_HAL_POOL_ACQUIRE_OK_NEEDS_WAIT ? wait_frontier
: NULL);
iree_hal_profile_memory_event_flags_t release_flags =
IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_QUEUE_OPERATION;
if (acquire_result == IREE_HAL_POOL_ACQUIRE_OK_NEEDS_WAIT) {
release_flags |= IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_WAIT_FRONTIER;
}
iree_hal_vulkan_queue_profile_record_memory_event(
submission, IREE_HAL_PROFILE_MEMORY_EVENT_TYPE_POOL_RELEASE,
release_flags, iree_status_code(status), submission->alloca.pool,
submission->alloca.params, reservation, /*backing_id=*/0,
submission->alloca.allocation_size,
acquire_result == IREE_HAL_POOL_ACQUIRE_OK_NEEDS_WAIT && wait_frontier
? wait_frontier->entry_count
: 0);
}
iree_hal_buffer_release(backing_buffer);
return status;
}
static iree_status_t iree_hal_vulkan_queue_stage_alloca_sparse_backing(
iree_hal_vulkan_queue_pending_submission_t* submission) {
iree_hal_vulkan_queue_t* queue = submission->queue;
iree_hal_buffer_placement_t placement = {
.device = (iree_hal_device_t*)queue->device,
.queue_affinity = submission->alloca.params.queue_affinity,
.flags = IREE_HAL_BUFFER_PLACEMENT_FLAG_NONE,
};
iree_hal_buffer_t* backing_buffer = NULL;
iree_host_size_t bind_count = 0;
VkSparseMemoryBind* binds = NULL;
iree_status_t status = iree_hal_vulkan_allocator_allocate_queue_sparse_buffer(
submission->alloca.allocator, placement, submission->alloca.params,
submission->alloca.allocation_size,
iree_hal_buffer_byte_length(submission->alloca.buffer),
queue->host_allocator, &backing_buffer, &bind_count, &binds);
VkBuffer sparse_buffer_handle = VK_NULL_HANDLE;
if (iree_status_is_ok(status)) {
VkDeviceMemory sparse_buffer_memory = VK_NULL_HANDLE;
status = iree_hal_vulkan_sparse_buffer_handle(
backing_buffer, &sparse_buffer_memory, &sparse_buffer_handle);
(void)sparse_buffer_memory;
}
if (iree_status_is_ok(status) && bind_count > UINT32_MAX) {
status = iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"too many Vulkan sparse buffer binds for "
"queue_alloca");
}
if (iree_status_is_ok(status)) {
iree_hal_local_transient_buffer_stage_backing(submission->alloca.buffer,
backing_buffer);
submission->sparse_bind.buffer = sparse_buffer_handle;
submission->sparse_bind.binds = binds;
submission->sparse_bind.bind_count = (uint32_t)bind_count;
binds = NULL;
submission->alloca.memory_wait_kind =
IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_NONE;
}
iree_allocator_free(queue->host_allocator, binds);
iree_hal_buffer_release(backing_buffer);
return status;
}
static iree_status_t iree_hal_vulkan_queue_prepare_alloca_pool_notification(
iree_hal_vulkan_queue_pending_submission_t* submission,
const iree_async_frontier_t* requester_frontier) {
iree_async_notification_t* notification =
iree_hal_pool_notification(submission->alloca.pool);
if (IREE_UNLIKELY(!notification)) {
return iree_make_status(
IREE_STATUS_INTERNAL,
"queue_alloca exhausted pool did not provide a notification");
}
submission->alloca.pool_notification = notification;
submission->alloca.pool_notification_wait_token =
iree_async_notification_begin_observe(notification);
submission->alloca.pool_notification_observation_held = true;
iree_hal_pool_reservation_t reservation;
iree_hal_pool_acquire_info_t acquire_info;
iree_hal_pool_acquire_result_t acquire_result =
IREE_HAL_POOL_ACQUIRE_EXHAUSTED;
iree_status_t status = iree_hal_pool_acquire_reservation(
submission->alloca.pool, submission->alloca.allocation_size,
submission->alloca.params.min_alignment
? submission->alloca.params.min_alignment
: 1,
requester_frontier, submission->alloca.reserve_flags, &reservation,
&acquire_info, &acquire_result);
if (iree_status_is_ok(status)) {
iree_hal_vulkan_queue_profile_record_memory_event(
submission, IREE_HAL_PROFILE_MEMORY_EVENT_TYPE_POOL_RESERVE,
iree_hal_vulkan_queue_profile_pool_reserve_event_flags(acquire_result),
acquire_result, submission->alloca.pool, submission->alloca.params,
acquire_result == IREE_HAL_POOL_ACQUIRE_EXHAUSTED ||
acquire_result == IREE_HAL_POOL_ACQUIRE_OVER_BUDGET
? NULL
: &reservation,
/*backing_id=*/0, submission->alloca.allocation_size,
acquire_info.wait_frontier ? acquire_info.wait_frontier->entry_count
: 0);
switch (acquire_result) {
case IREE_HAL_POOL_ACQUIRE_OK:
case IREE_HAL_POOL_ACQUIRE_OK_FRESH:
case IREE_HAL_POOL_ACQUIRE_OK_NEEDS_WAIT:
iree_hal_vulkan_queue_alloca_pool_notification_end_observe(submission);
status = iree_hal_vulkan_queue_stage_alloca_reservation(
submission, &reservation, acquire_info.wait_frontier,
acquire_result);
break;
case IREE_HAL_POOL_ACQUIRE_EXHAUSTED:
case IREE_HAL_POOL_ACQUIRE_OVER_BUDGET:
submission->alloca.memory_wait_kind =
IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_POOL_NOTIFICATION;
break;
default:
status = iree_make_status(IREE_STATUS_INTERNAL,
"unrecognized pool acquire result %u",
acquire_result);
break;
}
}
if (!iree_status_is_ok(status)) {
iree_hal_vulkan_queue_alloca_pool_notification_end_observe(submission);
}
return status;
}
static iree_status_t iree_hal_vulkan_queue_prepare_alloca_backing(
iree_hal_vulkan_queue_pending_submission_t* submission,
const iree_async_frontier_t* requester_frontier,
bool* out_needs_memory_wait) {
*out_needs_memory_wait = false;
if (iree_hal_vulkan_queue_alloca_has_staged_backing(submission)) {
*out_needs_memory_wait = submission->alloca.memory_wait_kind !=
IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_NONE;
return iree_ok_status();
}
if (submission->alloca.memory_wait_kind !=
IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_NONE) {
*out_needs_memory_wait = true;
return iree_ok_status();
}
if (submission->alloca.strategy ==
IREE_HAL_VULKAN_QUEUE_ALLOCA_STRATEGY_SPARSE) {
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_queue_stage_alloca_sparse_backing(submission));
*out_needs_memory_wait = false;
return iree_ok_status();
}
if (submission->alloca.strategy !=
IREE_HAL_VULKAN_QUEUE_ALLOCA_STRATEGY_POOL) {
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"unrecognized Vulkan queue_alloca strategy %u",
(uint32_t)submission->alloca.strategy);
}
iree_hal_pool_reservation_t reservation;
iree_hal_pool_acquire_info_t acquire_info;
iree_hal_pool_acquire_result_t acquire_result =
IREE_HAL_POOL_ACQUIRE_EXHAUSTED;
IREE_RETURN_IF_ERROR(iree_hal_pool_acquire_reservation(
submission->alloca.pool, submission->alloca.allocation_size,
submission->alloca.params.min_alignment
? submission->alloca.params.min_alignment
: 1,
requester_frontier, submission->alloca.reserve_flags, &reservation,
&acquire_info, &acquire_result));
iree_hal_vulkan_queue_profile_record_memory_event(
submission, IREE_HAL_PROFILE_MEMORY_EVENT_TYPE_POOL_RESERVE,
iree_hal_vulkan_queue_profile_pool_reserve_event_flags(acquire_result),
acquire_result, submission->alloca.pool, submission->alloca.params,
acquire_result == IREE_HAL_POOL_ACQUIRE_EXHAUSTED ||
acquire_result == IREE_HAL_POOL_ACQUIRE_OVER_BUDGET
? NULL
: &reservation,
/*backing_id=*/0, submission->alloca.allocation_size,
acquire_info.wait_frontier ? acquire_info.wait_frontier->entry_count : 0);
iree_status_t status = iree_ok_status();
switch (acquire_result) {
case IREE_HAL_POOL_ACQUIRE_OK:
case IREE_HAL_POOL_ACQUIRE_OK_FRESH:
case IREE_HAL_POOL_ACQUIRE_OK_NEEDS_WAIT:
status = iree_hal_vulkan_queue_stage_alloca_reservation(
submission, &reservation, acquire_info.wait_frontier, acquire_result);
break;
case IREE_HAL_POOL_ACQUIRE_EXHAUSTED:
case IREE_HAL_POOL_ACQUIRE_OVER_BUDGET:
status = iree_hal_vulkan_queue_prepare_alloca_pool_notification(
submission, requester_frontier);
break;
default:
status = iree_make_status(IREE_STATUS_INTERNAL,
"unrecognized pool acquire result %u",
acquire_result);
break;
}
if (iree_status_is_ok(status)) {
*out_needs_memory_wait = submission->alloca.memory_wait_kind !=
IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_NONE;
}
return status;
}
static iree_status_t iree_hal_vulkan_queue_try_stage_alloca_backing_now(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
iree_hal_vulkan_queue_frontier_t requester_frontier_storage;
iree_slim_mutex_lock(&queue->submission_mutex);
requester_frontier_storage = queue->frontier;
iree_slim_mutex_unlock(&queue->submission_mutex);
const iree_async_frontier_t* requester_frontier =
iree_async_fixed_frontier_as_const_frontier(&requester_frontier_storage);
if (submission->alloca.strategy ==
IREE_HAL_VULKAN_QUEUE_ALLOCA_STRATEGY_SPARSE) {
return iree_hal_vulkan_queue_stage_alloca_sparse_backing(submission);
}
if (submission->alloca.strategy !=
IREE_HAL_VULKAN_QUEUE_ALLOCA_STRATEGY_POOL) {
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"unrecognized Vulkan queue_alloca strategy %u",
(uint32_t)submission->alloca.strategy);
}
iree_hal_pool_reservation_t reservation;
iree_hal_pool_acquire_info_t acquire_info;
iree_hal_pool_acquire_result_t acquire_result =
IREE_HAL_POOL_ACQUIRE_EXHAUSTED;
IREE_RETURN_IF_ERROR(iree_hal_pool_acquire_reservation(
submission->alloca.pool, submission->alloca.allocation_size,
submission->alloca.params.min_alignment
? submission->alloca.params.min_alignment
: 1,
requester_frontier, submission->alloca.reserve_flags, &reservation,
&acquire_info, &acquire_result));
iree_hal_vulkan_queue_profile_record_memory_event(
submission, IREE_HAL_PROFILE_MEMORY_EVENT_TYPE_POOL_RESERVE,
iree_hal_vulkan_queue_profile_pool_reserve_event_flags(acquire_result),
acquire_result, submission->alloca.pool, submission->alloca.params,
acquire_result == IREE_HAL_POOL_ACQUIRE_EXHAUSTED ||
acquire_result == IREE_HAL_POOL_ACQUIRE_OVER_BUDGET
? NULL
: &reservation,
/*backing_id=*/0, submission->alloca.allocation_size,
acquire_info.wait_frontier ? acquire_info.wait_frontier->entry_count : 0);
switch (acquire_result) {
case IREE_HAL_POOL_ACQUIRE_OK:
case IREE_HAL_POOL_ACQUIRE_OK_FRESH:
case IREE_HAL_POOL_ACQUIRE_OK_NEEDS_WAIT:
return iree_hal_vulkan_queue_stage_alloca_reservation(
submission, &reservation, acquire_info.wait_frontier, acquire_result);
case IREE_HAL_POOL_ACQUIRE_EXHAUSTED:
case IREE_HAL_POOL_ACQUIRE_OVER_BUDGET:
return iree_ok_status();
default:
return iree_make_status(IREE_STATUS_INTERNAL,
"unrecognized pool acquire result %u",
acquire_result);
}
}
static iree_status_t iree_hal_vulkan_queue_allocate_sparse_wait_arrays(
iree_hal_vulkan_queue_t* queue,
const iree_hal_vulkan_queue_wait_resolution_t* resolution,
VkSemaphore** out_wait_semaphores, uint64_t** out_wait_values) {
*out_wait_semaphores = NULL;
*out_wait_values = NULL;
if (resolution->wait_info_count == 0) return iree_ok_status();
iree_host_size_t wait_semaphore_storage_size = 0;
if (!iree_host_size_checked_mul(resolution->wait_info_count,
sizeof(**out_wait_semaphores),
&wait_semaphore_storage_size)) {
return iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"Vulkan sparse bind wait semaphore array overflows host storage");
}
iree_host_size_t wait_value_storage_size = 0;
if (!iree_host_size_checked_mul(resolution->wait_info_count,
sizeof(**out_wait_values),
&wait_value_storage_size)) {
return iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"Vulkan sparse bind wait value array overflows host storage");
}
iree_status_t status =
iree_allocator_malloc(queue->host_allocator, wait_semaphore_storage_size,
(void**)out_wait_semaphores);
if (iree_status_is_ok(status)) {
status =
iree_allocator_malloc(queue->host_allocator, wait_value_storage_size,
(void**)out_wait_values);
}
if (!iree_status_is_ok(status)) {
iree_allocator_free(queue->host_allocator, *out_wait_semaphores);
*out_wait_semaphores = NULL;
}
return status;
}
static iree_status_t iree_hal_vulkan_queue_submit_sparse_bind_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission,
const iree_hal_vulkan_queue_wait_resolution_t* resolution) {
if (!iree_all_bits_set(queue->queue_flags, VK_QUEUE_SPARSE_BINDING_BIT)) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan queue family %u does not support sparse binding",
queue->queue_family_index);
}
VkSemaphore* wait_semaphores = NULL;
uint64_t* wait_values = NULL;
iree_status_t status = iree_hal_vulkan_queue_allocate_sparse_wait_arrays(
queue, resolution, &wait_semaphores, &wait_values);
if (iree_status_is_ok(status)) {
for (uint32_t i = 0; i < resolution->wait_info_count; ++i) {
wait_semaphores[i] = resolution->wait_infos[i].semaphore;
wait_values[i] = resolution->wait_infos[i].value;
}
}
if (iree_status_is_ok(status)) {
VkSemaphore signal_semaphore = queue->epoch_semaphore;
const uint64_t signal_value = submission->epoch;
VkTimelineSemaphoreSubmitInfo timeline_info = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.waitSemaphoreValueCount = resolution->wait_info_count,
.pWaitSemaphoreValues = wait_values,
.signalSemaphoreValueCount = 1,
.pSignalSemaphoreValues = &signal_value,
};
VkSparseBufferMemoryBindInfo buffer_bind_info = {
.buffer = submission->sparse_bind.buffer,
.bindCount = submission->sparse_bind.bind_count,
.pBinds = submission->sparse_bind.binds,
};
VkBindSparseInfo bind_info = {
.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,
.pNext = &timeline_info,
.waitSemaphoreCount = resolution->wait_info_count,
.pWaitSemaphores = wait_semaphores,
.bufferBindCount = 1,
.pBufferBinds = &buffer_bind_info,
.signalSemaphoreCount = 1,
.pSignalSemaphores = &signal_semaphore,
};
iree_slim_mutex_lock(queue->queue_handle_mutex);
status =
iree_vkQueueBindSparse(IREE_VULKAN_DEVICE(&queue->syms), queue->queue,
/*bindInfoCount=*/1, &bind_info, VK_NULL_HANDLE);
iree_slim_mutex_unlock(queue->queue_handle_mutex);
}
iree_allocator_free(queue->host_allocator, wait_values);
iree_allocator_free(queue->host_allocator, wait_semaphores);
return status;
}
static bool iree_hal_vulkan_queue_submission_uses_sparse_bind(
const iree_hal_vulkan_queue_pending_submission_t* submission) {
return submission->kind ==
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_SPARSE_BIND ||
submission->sparse_bind.bind_count != 0;
}
static iree_status_t iree_hal_vulkan_queue_submit_native_under_lock(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission,
bool allow_software_deferral,
iree_hal_vulkan_queue_wait_resolution_t* resolution,
iree_hal_vulkan_queue_submission_result_t* out_result) {
out_result->submitted = false;
out_result->memory_wait_submission = NULL;
out_result->queue_failure_status = iree_ok_status();
resolution->wait_info_count = 0;
resolution->needs_deferral = false;
iree_status_t status = iree_hal_vulkan_queue_check_error(queue);
if (iree_status_is_ok(status) && !queue->frontier_tracker) {
status = iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
"Vulkan queue frontier is not assigned");
}
if (iree_status_is_ok(status) &&
iree_atomic_load(&queue->stop_requested, iree_memory_order_acquire)) {
status = iree_make_status(IREE_STATUS_CANCELLED,
"Vulkan queue is shutting down");
}
if (iree_status_is_ok(status) &&
iree_hal_vulkan_queue_submission_uses_sparse_bind(submission) &&
!iree_all_bits_set(queue->queue_flags, VK_QUEUE_SPARSE_BINDING_BIT)) {
status = iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan queue family %u does not support sparse binding",
queue->queue_family_index);
}
if (iree_status_is_ok(status)) {
submission->epoch = queue->next_epoch_value;
submission->frontier = queue->frontier;
status = iree_hal_vulkan_queue_resolve_waits(
queue, submission->wait_semaphore_list, &submission->frontier,
allow_software_deferral, resolution);
if (iree_status_is_ok(status)) {
iree_hal_vulkan_queue_profile_set_dependency_resolution(submission,
resolution);
}
}
if (iree_status_is_ok(status) && resolution->needs_deferral) {
return iree_ok_status();
}
if (iree_status_is_ok(status) &&
submission->kind == IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_ALLOCA) {
bool needs_memory_wait = false;
status = iree_hal_vulkan_queue_prepare_alloca_backing(
submission,
iree_async_fixed_frontier_as_const_frontier(&submission->frontier),
&needs_memory_wait);
if (iree_status_is_ok(status) && needs_memory_wait) {
iree_atomic_store(&submission->deferred_state,
IREE_HAL_VULKAN_QUEUE_DEFERRED_STATE_PENDING,
iree_memory_order_release);
iree_atomic_store(&submission->alloca.memory_wait_callback_complete, 1,
iree_memory_order_relaxed);
iree_hal_vulkan_queue_profile_force_software_defer(submission);
iree_hal_vulkan_queue_append_deferred_submission(queue, submission);
iree_hal_vulkan_queue_profile_record_submission(submission);
out_result->memory_wait_submission = submission;
return iree_ok_status();
}
}
if (iree_status_is_ok(status) && submission->record_native_submission &&
!submission->native_command_buffer) {
status = submission->record_native_submission(queue, submission);
}
if (iree_status_is_ok(status)) {
iree_async_single_frontier_t self_frontier;
iree_async_single_frontier_initialize(&self_frontier, queue->axis,
submission->epoch);
if (!iree_async_frontier_merge(
iree_async_fixed_frontier_as_frontier(&submission->frontier),
IREE_HAL_VULKAN_QUEUE_FRONTIER_CAPACITY,
iree_async_single_frontier_as_const_frontier(&self_frontier))) {
status = iree_make_status(
IREE_STATUS_RESOURCE_EXHAUSTED,
"Vulkan queue frontier capacity exhausted while publishing epoch");
}
}
if (iree_status_is_ok(status)) {
iree_hal_vulkan_queue_publish_descriptor_cache_sets_under_lock(queue,
submission);
iree_hal_vulkan_queue_publish_native_command_buffer_under_lock(submission);
iree_hal_vulkan_queue_publish_native_replay_under_lock(submission);
if (iree_hal_vulkan_queue_submission_uses_sparse_bind(submission)) {
status = iree_hal_vulkan_queue_submit_sparse_bind_under_lock(
queue, submission, resolution);
} else {
VkSemaphoreSubmitInfo epoch_signal_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
.semaphore = queue->epoch_semaphore,
.value = submission->epoch,
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
.deviceIndex = 0,
};
VkCommandBufferSubmitInfo command_buffer_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
.commandBuffer = submission->native_command_buffer,
.deviceMask = 0,
};
VkSubmitInfo2 submit_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
.waitSemaphoreInfoCount = resolution->wait_info_count,
.pWaitSemaphoreInfos = resolution->wait_infos,
.commandBufferInfoCount = submission->native_command_buffer ? 1u : 0u,
.pCommandBufferInfos =
submission->native_command_buffer ? &command_buffer_info : NULL,
.signalSemaphoreInfoCount = 1,
.pSignalSemaphoreInfos = &epoch_signal_info,
};
iree_slim_mutex_lock(queue->queue_handle_mutex);
status =
iree_vkQueueSubmit2(IREE_VULKAN_DEVICE(&queue->syms), queue->queue, 1,
&submit_info, VK_NULL_HANDLE);
iree_slim_mutex_unlock(queue->queue_handle_mutex);
}
if (iree_status_is_ok(status)) {
iree_hal_vulkan_queue_profile_record_submission(submission);
iree_hal_vulkan_queue_publish_signals(queue, submission);
queue->frontier = submission->frontier;
queue->next_epoch_value = queue->next_epoch_value + 1;
iree_hal_vulkan_queue_append_pending_submission(queue, submission);
out_result->submitted = true;
} else if (iree_status_code(status) == IREE_STATUS_DATA_LOSS) {
out_result->queue_failure_status =
iree_hal_vulkan_queue_store_error(queue, iree_status_clone(status));
}
}
return status;
}
static bool iree_hal_vulkan_queue_wait_entry_callback_is_complete(
const iree_hal_vulkan_queue_wait_entry_t* entry) {
return iree_atomic_load(&entry->callback_complete,
iree_memory_order_acquire) != 0;
}
static void iree_hal_vulkan_queue_wait_entry_publish_callback_complete(
iree_hal_vulkan_queue_wait_entry_t* entry) {
iree_atomic_store(&entry->callback_complete, 1, iree_memory_order_release);
iree_notification_post(&entry->submission->callback_notification,
IREE_ALL_WAITERS);
}
static bool iree_hal_vulkan_queue_wait_callbacks_are_complete(void* user_data) {
iree_hal_vulkan_queue_pending_submission_t* submission =
(iree_hal_vulkan_queue_pending_submission_t*)user_data;
for (iree_host_size_t i = 0; i < submission->wait_entry_count; ++i) {
if (!iree_hal_vulkan_queue_wait_entry_callback_is_complete(
&submission->wait_entries[i])) {
return false;
}
}
return true;
}
static void iree_hal_vulkan_queue_submission_record_wait_status(
iree_hal_vulkan_queue_pending_submission_t* submission,
iree_status_t status) {
if (iree_status_is_ok(status)) return;
intptr_t expected = 0;
if (!iree_atomic_compare_exchange_strong(
&submission->wait_failure_status, &expected, (intptr_t)status,
iree_memory_order_acq_rel, iree_memory_order_relaxed)) {
iree_status_free(status);
}
}
static bool iree_hal_vulkan_queue_submission_claim_promotion(
iree_hal_vulkan_queue_pending_submission_t* submission) {
int32_t expected_state = IREE_HAL_VULKAN_QUEUE_DEFERRED_STATE_PENDING;
return iree_atomic_compare_exchange_strong(
&submission->deferred_state, &expected_state,
IREE_HAL_VULKAN_QUEUE_DEFERRED_STATE_PROMOTING, iree_memory_order_acq_rel,
iree_memory_order_acquire);
}
static void iree_hal_vulkan_queue_deferred_submission_ready(
iree_hal_vulkan_queue_pending_submission_t* submission) {
iree_hal_vulkan_queue_t* queue = submission->queue;
iree_status_t failure_status = iree_ok_status();
iree_slim_mutex_lock(&queue->submission_mutex);
const bool was_deferred =
iree_hal_vulkan_queue_unlink_deferred_submission(queue, submission);
failure_status = (iree_status_t)iree_atomic_load(&queue->failure_status,
iree_memory_order_acquire);
if (was_deferred && iree_status_is_ok(failure_status)) {
iree_hal_vulkan_queue_append_ready_submission(queue, submission);
iree_atomic_store(&submission->deferred_state,
IREE_HAL_VULKAN_QUEUE_DEFERRED_STATE_READY,
iree_memory_order_release);
}
iree_slim_mutex_unlock(&queue->submission_mutex);
iree_notification_post(&submission->callback_notification, IREE_ALL_WAITERS);
if (!was_deferred) {
iree_hal_vulkan_queue_fail_unsubmitted_submission(
queue, submission,
iree_make_status(IREE_STATUS_INTERNAL,
"Vulkan deferred submission promotion lost queue "
"ownership"));
return;
}
if (!iree_status_is_ok(failure_status)) {
iree_hal_vulkan_queue_fail_unsubmitted_submission(
queue, submission, iree_status_clone(failure_status));
return;
}
iree_status_t wake_status = iree_hal_vulkan_queue_signal_wakeup(queue);
if (!iree_status_is_ok(wake_status)) {
iree_status_t stored_status =
iree_hal_vulkan_queue_store_error(queue, wake_status);
iree_slim_mutex_lock(&queue->submission_mutex);
const bool recovered_submission =
iree_hal_vulkan_queue_unlink_ready_submission(queue, submission);
iree_slim_mutex_unlock(&queue->submission_mutex);
if (recovered_submission) {
iree_hal_vulkan_queue_fail_unsubmitted_submission(queue, submission,
stored_status);
} else {
iree_status_free(stored_status);
}
}
}
static void iree_hal_vulkan_queue_deferred_wait_resolved(
void* user_data, iree_async_semaphore_timepoint_t* timepoint,
iree_status_t status) {
(void)timepoint;
iree_hal_vulkan_queue_wait_entry_t* entry =
(iree_hal_vulkan_queue_wait_entry_t*)user_data;
iree_hal_vulkan_queue_pending_submission_t* submission = entry->submission;
iree_hal_vulkan_queue_submission_record_wait_status(submission, status);
const int32_t previous_count = iree_atomic_fetch_sub(
&submission->wait_count, 1, iree_memory_order_acq_rel);
const bool owns_promotion =
previous_count == 1 &&
iree_hal_vulkan_queue_submission_claim_promotion(submission);
iree_hal_vulkan_queue_wait_entry_publish_callback_complete(entry);
if (!owns_promotion) return;
iree_notification_await(&submission->callback_notification,
iree_hal_vulkan_queue_wait_callbacks_are_complete,
submission, iree_infinite_timeout());
iree_hal_vulkan_queue_deferred_submission_ready(submission);
}
static iree_status_t iree_hal_vulkan_queue_prepare_deferred_submission(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
const iree_host_size_t wait_count = submission->wait_semaphore_list.count;
if (wait_count > INT32_MAX) {
return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"too many software-deferred Vulkan queue waits");
}
iree_host_size_t wait_entries_size = 0;
if (!iree_host_size_checked_mul(wait_count, sizeof(*submission->wait_entries),
&wait_entries_size)) {
return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"software-deferred Vulkan wait list is too large");
}
IREE_RETURN_IF_ERROR(
iree_allocator_malloc(queue->host_allocator, wait_entries_size,
(void**)&submission->wait_entries));
memset(submission->wait_entries, 0, wait_entries_size);
submission->wait_entry_count = wait_count;
iree_atomic_store(&submission->wait_count, (int32_t)wait_count,
iree_memory_order_release);
iree_atomic_store(&submission->deferred_state,
IREE_HAL_VULKAN_QUEUE_DEFERRED_STATE_PENDING,
iree_memory_order_release);
for (iree_host_size_t i = 0; i < wait_count; ++i) {
iree_hal_vulkan_queue_wait_entry_t* entry = &submission->wait_entries[i];
entry->submission = submission;
iree_atomic_store(&entry->callback_complete, 1, iree_memory_order_relaxed);
}
return iree_ok_status();
}
static iree_status_t iree_hal_vulkan_queue_start_deferred_submission(
iree_hal_vulkan_queue_pending_submission_t* submission) {
iree_hal_semaphore_list_t wait_semaphores = submission->wait_semaphore_list;
for (iree_host_size_t i = 0; i < wait_semaphores.count; ++i) {
iree_hal_vulkan_queue_wait_entry_t* entry = &submission->wait_entries[i];
iree_atomic_store(&entry->callback_complete, 0, iree_memory_order_relaxed);
entry->timepoint.callback = iree_hal_vulkan_queue_deferred_wait_resolved;
entry->timepoint.user_data = entry;
iree_status_t status = iree_async_semaphore_acquire_timepoint(
(iree_async_semaphore_t*)wait_semaphores.semaphores[i],
wait_semaphores.payload_values[i], &entry->timepoint);
if (!iree_status_is_ok(status)) {
iree_hal_vulkan_queue_submission_record_wait_status(submission, status);
iree_atomic_store(&entry->callback_complete, 1,
iree_memory_order_release);
const int32_t unregistered = (int32_t)(wait_semaphores.count - i);
const int32_t previous_count = iree_atomic_fetch_sub(
&submission->wait_count, unregistered, iree_memory_order_acq_rel);
if (previous_count == unregistered &&
iree_hal_vulkan_queue_submission_claim_promotion(submission)) {
iree_notification_await(
&submission->callback_notification,
iree_hal_vulkan_queue_wait_callbacks_are_complete, submission,
iree_infinite_timeout());
iree_hal_vulkan_queue_deferred_submission_ready(submission);
}
return iree_ok_status();
}
}
return iree_ok_status();
}
static bool iree_hal_vulkan_queue_alloca_memory_wait_callback_is_complete(
const iree_hal_vulkan_queue_pending_submission_t* submission) {
return iree_atomic_load(&submission->alloca.memory_wait_callback_complete,
iree_memory_order_acquire) != 0;
}
static bool iree_hal_vulkan_queue_alloca_memory_wait_callback_is_complete_thunk(
void* user_data) {
return iree_hal_vulkan_queue_alloca_memory_wait_callback_is_complete(
(const iree_hal_vulkan_queue_pending_submission_t*)user_data);
}
static void iree_hal_vulkan_queue_alloca_memory_wait_publish_complete(
iree_hal_vulkan_queue_pending_submission_t* submission) {
iree_atomic_store(&submission->alloca.memory_wait_callback_complete, 1,
iree_memory_order_release);
iree_notification_post(&submission->callback_notification, IREE_ALL_WAITERS);
}
static iree_status_t iree_hal_vulkan_queue_alloca_memory_wait_begin_arming(
iree_hal_vulkan_queue_pending_submission_t* submission) {
int32_t expected_state = IREE_HAL_VULKAN_QUEUE_DEFERRED_STATE_PENDING;
if (iree_atomic_compare_exchange_strong(
&submission->deferred_state, &expected_state,
IREE_HAL_VULKAN_QUEUE_DEFERRED_STATE_ARMING,
iree_memory_order_acq_rel, iree_memory_order_acquire)) {
return iree_ok_status();
}
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan alloca memory wait cannot arm from deferred state %d",
expected_state);
}
static bool iree_hal_vulkan_queue_alloca_memory_wait_promote_from_arming(
iree_hal_vulkan_queue_pending_submission_t* submission) {
int32_t expected_state = IREE_HAL_VULKAN_QUEUE_DEFERRED_STATE_ARMING;
return iree_atomic_compare_exchange_strong(
&submission->deferred_state, &expected_state,
IREE_HAL_VULKAN_QUEUE_DEFERRED_STATE_PROMOTING, iree_memory_order_acq_rel,
iree_memory_order_acquire);
}
static void iree_hal_vulkan_queue_alloca_memory_wait_finish_arming(
iree_hal_vulkan_queue_pending_submission_t* submission,
iree_status_t status) {
if (!iree_status_is_ok(status)) {
iree_hal_vulkan_queue_submission_record_wait_status(submission, status);
iree_hal_vulkan_queue_alloca_memory_wait_publish_complete(submission);
}
if (iree_hal_vulkan_queue_alloca_memory_wait_callback_is_complete(
submission)) {
if (iree_hal_vulkan_queue_alloca_memory_wait_promote_from_arming(
submission)) {
iree_hal_vulkan_queue_deferred_submission_ready(submission);
}
return;
}
int32_t expected_state = IREE_HAL_VULKAN_QUEUE_DEFERRED_STATE_ARMING;
if (iree_atomic_compare_exchange_strong(
&submission->deferred_state, &expected_state,
IREE_HAL_VULKAN_QUEUE_DEFERRED_STATE_PENDING,
iree_memory_order_acq_rel, iree_memory_order_acquire)) {
if (iree_hal_vulkan_queue_alloca_memory_wait_callback_is_complete(
submission) &&
iree_hal_vulkan_queue_submission_claim_promotion(submission)) {
iree_hal_vulkan_queue_deferred_submission_ready(submission);
}
}
}
static void iree_hal_vulkan_queue_alloca_memory_wait_resolved(
iree_hal_vulkan_queue_pending_submission_t* submission,
iree_status_t status) {
if (iree_status_is_ok(status)) {
switch (submission->alloca.memory_wait_kind) {
case IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_FRONTIER:
submission->alloca.memory_wait_kind =
IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_NONE;
break;
case IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_POOL_NOTIFICATION:
submission->alloca.memory_wait_kind =
IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_NONE;
break;
case IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_NONE:
break;
}
}
iree_hal_vulkan_queue_submission_record_wait_status(submission, status);
const bool owns_promotion =
iree_hal_vulkan_queue_submission_claim_promotion(submission);
iree_hal_vulkan_queue_alloca_memory_wait_publish_complete(submission);
if (owns_promotion) {
iree_hal_vulkan_queue_deferred_submission_ready(submission);
}
}
static void iree_hal_vulkan_queue_alloca_frontier_wait_resolved(
void* user_data, iree_status_t status) {
iree_hal_vulkan_queue_alloca_memory_wait_resolved(
(iree_hal_vulkan_queue_pending_submission_t*)user_data, status);
}
static void iree_hal_vulkan_queue_alloca_pool_notification_wait_resolved(
void* user_data, iree_async_operation_t* operation, iree_status_t status,
iree_async_completion_flags_t flags) {
(void)operation;
(void)flags;
iree_hal_vulkan_queue_alloca_memory_wait_resolved(
(iree_hal_vulkan_queue_pending_submission_t*)user_data, status);
}
static void iree_hal_vulkan_queue_start_alloca_frontier_wait(
iree_hal_vulkan_queue_pending_submission_t* submission) {
if (IREE_UNLIKELY(!submission->alloca.wait_frontier)) {
iree_hal_vulkan_queue_alloca_memory_wait_resolved(
submission,
iree_make_status(
IREE_STATUS_INTERNAL,
"queue_alloca waitable pool reservation did not provide a "
"frontier"));
return;
}
iree_hal_vulkan_queue_profile_record_memory_event(
submission, IREE_HAL_PROFILE_MEMORY_EVENT_TYPE_POOL_WAIT,
IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_QUEUE_OPERATION |
IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_WAIT_FRONTIER,
UINT32_MAX, submission->alloca.pool, submission->alloca.params,
/*reservation=*/NULL, /*backing_id=*/0,
submission->alloca.allocation_size,
submission->alloca.wait_frontier->entry_count);
iree_atomic_store(&submission->alloca.memory_wait_callback_complete, 0,
iree_memory_order_relaxed);
iree_status_t status =
iree_hal_vulkan_queue_alloca_memory_wait_begin_arming(submission);
if (!iree_status_is_ok(status)) {
iree_hal_vulkan_queue_alloca_memory_wait_resolved(submission, status);
return;
}
status = iree_async_frontier_tracker_wait(
submission->queue->frontier_tracker, submission->alloca.wait_frontier,
iree_hal_vulkan_queue_alloca_frontier_wait_resolved, submission,
&submission->alloca.frontier_waiter);
iree_hal_vulkan_queue_alloca_memory_wait_finish_arming(submission, status);
}
static void iree_hal_vulkan_queue_start_alloca_pool_notification_wait(
iree_hal_vulkan_queue_pending_submission_t* submission) {
iree_hal_vulkan_queue_profile_record_memory_event(
submission, IREE_HAL_PROFILE_MEMORY_EVENT_TYPE_POOL_WAIT,
IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_QUEUE_OPERATION |
IREE_HAL_PROFILE_MEMORY_EVENT_FLAG_WAIT_NOTIFICATION,
UINT32_MAX, submission->alloca.pool, submission->alloca.params,
/*reservation=*/NULL, /*backing_id=*/0,
submission->alloca.allocation_size,
/*frontier_entry_count=*/0);
iree_async_notification_wait_operation_t* wait_op =
&submission->alloca.pool_notification_wait_ops
[submission->alloca.pool_notification_wait_slot];
iree_async_operation_zero(&wait_op->base, sizeof(*wait_op));
iree_async_operation_initialize(
&wait_op->base, IREE_ASYNC_OPERATION_TYPE_NOTIFICATION_WAIT,
IREE_ASYNC_OPERATION_FLAG_NONE,
iree_hal_vulkan_queue_alloca_pool_notification_wait_resolved, submission);
wait_op->notification = submission->alloca.pool_notification;
wait_op->wait_flags = IREE_ASYNC_NOTIFICATION_WAIT_FLAG_USE_WAIT_TOKEN;
wait_op->wait_token = submission->alloca.pool_notification_wait_token;
iree_atomic_store(&submission->alloca.memory_wait_callback_complete, 0,
iree_memory_order_relaxed);
iree_status_t status =
iree_hal_vulkan_queue_alloca_memory_wait_begin_arming(submission);
if (!iree_status_is_ok(status)) {
iree_hal_vulkan_queue_alloca_pool_notification_end_observe(submission);
iree_hal_vulkan_queue_alloca_memory_wait_resolved(submission, status);
return;
}
status = iree_async_proactor_submit_one(submission->queue->proactor,
&wait_op->base);
iree_hal_vulkan_queue_alloca_pool_notification_end_observe(submission);
iree_hal_vulkan_queue_alloca_memory_wait_finish_arming(submission, status);
}
static iree_status_t iree_hal_vulkan_queue_start_alloca_memory_wait(
iree_hal_vulkan_queue_pending_submission_t* submission) {
switch (submission->alloca.memory_wait_kind) {
case IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_FRONTIER:
iree_hal_vulkan_queue_start_alloca_frontier_wait(submission);
return iree_ok_status();
case IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_POOL_NOTIFICATION:
submission->alloca.pool_notification_wait_slot =
(uint8_t)((submission->alloca.pool_notification_wait_slot + 1u) & 1u);
iree_hal_vulkan_queue_start_alloca_pool_notification_wait(submission);
return iree_ok_status();
case IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_NONE:
return iree_make_status(
IREE_STATUS_INTERNAL,
"Vulkan alloca submission has no memory wait to start");
}
return iree_make_status(IREE_STATUS_INTERNAL,
"unrecognized Vulkan alloca memory wait kind %u",
submission->alloca.memory_wait_kind);
}
static iree_status_t iree_hal_vulkan_queue_allocate_wait_infos(
iree_hal_vulkan_queue_t* queue, iree_host_size_t wait_count,
VkSemaphoreSubmitInfo** out_wait_infos, uint32_t* out_wait_info_capacity) {
*out_wait_infos = NULL;
*out_wait_info_capacity = 0;
if (wait_count == 0) return iree_ok_status();
if (wait_count > UINT32_MAX) {
return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"too many Vulkan queue wait semaphores");
}
IREE_RETURN_IF_ERROR(iree_allocator_malloc(
queue->host_allocator, wait_count * sizeof(**out_wait_infos),
(void**)out_wait_infos));
*out_wait_info_capacity = (uint32_t)wait_count;
return iree_ok_status();
}
static iree_status_t iree_hal_vulkan_queue_submission_take_wait_failure(
iree_hal_vulkan_queue_pending_submission_t* submission) {
return (iree_status_t)iree_atomic_exchange(&submission->wait_failure_status,
0, iree_memory_order_acquire);
}
static iree_hal_vulkan_queue_pending_submission_t*
iree_hal_vulkan_queue_take_cancellable_deferred_submissions_under_lock(
iree_hal_vulkan_queue_t* queue, bool* out_has_deferred_submissions) {
iree_hal_vulkan_queue_pending_submission_t* cancellable_head = NULL;
iree_hal_vulkan_queue_pending_submission_t** cancellable_tail =
&cancellable_head;
iree_hal_vulkan_queue_pending_submission_t** link = &queue->deferred_head;
while (*link) {
iree_hal_vulkan_queue_pending_submission_t* submission = *link;
int32_t expected_state = IREE_HAL_VULKAN_QUEUE_DEFERRED_STATE_PENDING;
if (iree_atomic_compare_exchange_strong(
&submission->deferred_state, &expected_state,
IREE_HAL_VULKAN_QUEUE_DEFERRED_STATE_CANCELLING,
iree_memory_order_acq_rel, iree_memory_order_acquire)) {
*link = submission->next;
submission->next = NULL;
*cancellable_tail = submission;
cancellable_tail = &submission->next;
continue;
}
link = &submission->next;
}
*out_has_deferred_submissions = queue->deferred_head != NULL;
return cancellable_head;
}
static void iree_hal_vulkan_queue_cancel_alloca_memory_wait(
iree_hal_vulkan_queue_pending_submission_t* submission) {
if (submission->kind != IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_ALLOCA) return;
if (submission->alloca.memory_wait_kind ==
IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_NONE) {
return;
}
switch (submission->alloca.memory_wait_kind) {
case IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_FRONTIER:
if (iree_hal_vulkan_queue_alloca_memory_wait_callback_is_complete(
submission)) {
break;
}
if (!iree_async_frontier_tracker_cancel_wait(
submission->queue->frontier_tracker,
&submission->alloca.frontier_waiter)) {
iree_notification_await(
&submission->callback_notification,
iree_hal_vulkan_queue_alloca_memory_wait_callback_is_complete_thunk,
submission, iree_infinite_timeout());
} else {
iree_hal_vulkan_queue_alloca_memory_wait_publish_complete(submission);
}
break;
case IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_POOL_NOTIFICATION:
iree_hal_vulkan_queue_alloca_pool_notification_end_observe(submission);
if (submission->alloca.pool_notification) {
iree_async_notification_signal(submission->alloca.pool_notification,
INT32_MAX);
}
if (!iree_hal_vulkan_queue_alloca_memory_wait_callback_is_complete(
submission)) {
iree_notification_await(
&submission->callback_notification,
iree_hal_vulkan_queue_alloca_memory_wait_callback_is_complete_thunk,
submission, iree_infinite_timeout());
}
break;
case IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_NONE:
break;
}
submission->alloca.memory_wait_kind =
IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_NONE;
}
static void iree_hal_vulkan_queue_cancel_deferred_submission(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission,
iree_status_t status) {
IREE_ASSERT(iree_atomic_load(&submission->deferred_state,
iree_memory_order_acquire) ==
IREE_HAL_VULKAN_QUEUE_DEFERRED_STATE_CANCELLING,
"Vulkan deferred submission must be claimed before "
"cancellation");
for (iree_host_size_t i = 0; i < submission->wait_entry_count; ++i) {
iree_hal_vulkan_queue_wait_entry_t* entry = &submission->wait_entries[i];
if (iree_hal_vulkan_queue_wait_entry_callback_is_complete(entry)) continue;
if (iree_async_semaphore_cancel_timepoint(entry->timepoint.semaphore,
&entry->timepoint)) {
iree_hal_vulkan_queue_wait_entry_publish_callback_complete(entry);
}
}
iree_notification_await(&submission->callback_notification,
iree_hal_vulkan_queue_wait_callbacks_are_complete,
submission, iree_infinite_timeout());
iree_hal_vulkan_queue_cancel_alloca_memory_wait(submission);
iree_hal_vulkan_queue_fail_unsubmitted_submission(queue, submission, status);
}
static bool iree_hal_vulkan_queue_cancel_deferred_submission_list(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* deferred_head,
iree_status_t status) {
bool cancelled_submission = false;
while (deferred_head) {
cancelled_submission = true;
iree_hal_vulkan_queue_pending_submission_t* next = deferred_head->next;
deferred_head->next = NULL;
iree_hal_vulkan_queue_cancel_deferred_submission(queue, deferred_head,
iree_status_clone(status));
deferred_head = next;
}
return cancelled_submission;
}
static void iree_hal_vulkan_queue_cancel_deferred_submissions(
iree_hal_vulkan_queue_t* queue, iree_status_t status) {
bool has_deferred_submissions = true;
while (has_deferred_submissions) {
iree_slim_mutex_lock(&queue->submission_mutex);
iree_hal_vulkan_queue_pending_submission_t* deferred_head =
iree_hal_vulkan_queue_take_cancellable_deferred_submissions_under_lock(
queue, &has_deferred_submissions);
iree_slim_mutex_unlock(&queue->submission_mutex);
const bool cancelled_submission =
iree_hal_vulkan_queue_cancel_deferred_submission_list(
queue, deferred_head, status);
if (has_deferred_submissions && !cancelled_submission) {
iree_thread_yield();
}
}
iree_status_free(status);
}
static void iree_hal_vulkan_queue_fail_pending_submissions(
iree_hal_vulkan_queue_t* queue, iree_status_t status) {
iree_hal_vulkan_queue_pending_submission_t* pending_head = NULL;
iree_hal_vulkan_queue_pending_submission_t* ready_head = NULL;
iree_hal_vulkan_queue_pending_submission_t* deferred_head = NULL;
iree_slim_mutex_lock(&queue->submission_mutex);
pending_head = queue->pending_head;
ready_head = queue->ready_head;
bool has_deferred_submissions = false;
deferred_head =
iree_hal_vulkan_queue_take_cancellable_deferred_submissions_under_lock(
queue, &has_deferred_submissions);
queue->pending_head = NULL;
queue->pending_tail = NULL;
queue->ready_head = NULL;
queue->ready_tail = NULL;
iree_slim_mutex_unlock(&queue->submission_mutex);
if (queue->frontier_tracker) {
iree_async_frontier_tracker_fail_axis(queue->frontier_tracker, queue->axis,
iree_status_clone(status));
}
while (pending_head) {
iree_hal_vulkan_queue_pending_submission_t* next = pending_head->next;
pending_head->next = NULL;
iree_hal_vulkan_queue_complete_submission(queue, pending_head, status);
iree_hal_vulkan_queue_pending_submission_destroy(queue, pending_head);
pending_head = next;
}
while (ready_head) {
iree_hal_vulkan_queue_pending_submission_t* next = ready_head->next;
ready_head->next = NULL;
iree_hal_vulkan_queue_fail_unsubmitted_submission(
queue, ready_head, iree_status_clone(status));
ready_head = next;
}
iree_hal_vulkan_queue_cancel_deferred_submission_list(queue, deferred_head,
status);
while (has_deferred_submissions) {
iree_slim_mutex_lock(&queue->submission_mutex);
deferred_head =
iree_hal_vulkan_queue_take_cancellable_deferred_submissions_under_lock(
queue, &has_deferred_submissions);
iree_slim_mutex_unlock(&queue->submission_mutex);
const bool cancelled_submission =
iree_hal_vulkan_queue_cancel_deferred_submission_list(
queue, deferred_head, status);
if (has_deferred_submissions && !cancelled_submission) {
iree_thread_yield();
}
}
iree_status_free(status);
}
static void iree_hal_vulkan_queue_drain_ready_submissions(
iree_hal_vulkan_queue_t* queue) {
while (true) {
iree_hal_vulkan_queue_pending_submission_t* submission = NULL;
iree_hal_vulkan_queue_submission_result_t submit_result = {
.submitted = false,
.queue_failure_status = iree_ok_status(),
};
iree_status_t status = iree_ok_status();
iree_slim_mutex_lock(&queue->submission_mutex);
submission = iree_hal_vulkan_queue_pop_ready_submission(queue);
iree_slim_mutex_unlock(&queue->submission_mutex);
if (!submission) return;
VkSemaphoreSubmitInfo* wait_infos = NULL;
uint32_t wait_info_capacity = 0;
status = iree_hal_vulkan_queue_allocate_wait_infos(
queue, submission->wait_semaphore_list.count, &wait_infos,
&wait_info_capacity);
if (iree_status_is_ok(status)) {
iree_slim_mutex_lock(&queue->submission_mutex);
status = iree_hal_vulkan_queue_submission_take_wait_failure(submission);
iree_slim_mutex_unlock(&queue->submission_mutex);
}
if (iree_status_is_ok(status)) {
iree_slim_mutex_lock(&queue->submission_mutex);
if (wait_info_capacity < submission->wait_semaphore_list.count) {
status = iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"too many Vulkan queue wait semaphores for ready submission");
} else {
iree_hal_vulkan_queue_wait_resolution_t resolution = {
.wait_infos = wait_infos,
.wait_info_capacity = wait_info_capacity,
};
status = iree_hal_vulkan_queue_submit_native_under_lock(
queue, submission, /*allow_software_deferral=*/false, &resolution,
&submit_result);
}
iree_slim_mutex_unlock(&queue->submission_mutex);
}
if (wait_infos) {
iree_allocator_free(queue->host_allocator, wait_infos);
}
if (!iree_status_is_ok(submit_result.queue_failure_status)) {
iree_hal_vulkan_queue_fail_pending_submissions(
queue, submit_result.queue_failure_status);
}
if (submit_result.memory_wait_submission) {
iree_status_t memory_wait_status =
iree_hal_vulkan_queue_start_alloca_memory_wait(
submit_result.memory_wait_submission);
if (!iree_status_is_ok(memory_wait_status)) {
iree_hal_vulkan_queue_fail_pending_submissions(queue,
memory_wait_status);
}
iree_status_free(status);
continue;
}
if (submit_result.submitted) {
iree_status_free(status);
continue;
}
if (iree_status_is_ok(status)) {
status = iree_make_status(
IREE_STATUS_INTERNAL,
"Vulkan ready submission was neither submitted nor failed");
}
iree_hal_vulkan_queue_fail_unsubmitted_submission(queue, submission,
status);
}
}
static int iree_hal_vulkan_queue_completion_thread_main(void* entry_arg) {
iree_hal_vulkan_queue_t* queue = (iree_hal_vulkan_queue_t*)entry_arg;
uint64_t observed_wakeup_value = (uint64_t)iree_atomic_load(
&queue->wakeup_value, iree_memory_order_acquire);
while (true) {
iree_hal_vulkan_queue_drain_ready_submissions(queue);
iree_hal_vulkan_queue_drain_completions(queue);
const bool stop_requested =
iree_atomic_load(&queue->stop_requested, iree_memory_order_acquire) !=
0;
if (stop_requested) {
iree_hal_vulkan_queue_cancel_deferred_submissions(
queue, iree_status_from_code(IREE_STATUS_CANCELLED));
iree_hal_vulkan_queue_drain_ready_submissions(queue);
iree_hal_vulkan_queue_drain_completions(queue);
}
const bool has_pending = iree_hal_vulkan_queue_has_pending(queue);
if (stop_requested && !has_pending) break;
iree_status_t failure_status = (iree_status_t)iree_atomic_load(
&queue->failure_status, iree_memory_order_acquire);
if (!iree_status_is_ok(failure_status) && !has_pending) break;
VkSemaphore wait_semaphores[2] = {
queue->epoch_semaphore,
queue->wakeup_semaphore,
};
uint64_t wait_values[2] = {
(uint64_t)iree_atomic_load(&queue->last_drained_epoch,
iree_memory_order_acquire) +
1,
observed_wakeup_value + 1,
};
VkSemaphoreWaitInfo wait_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
.flags = VK_SEMAPHORE_WAIT_ANY_BIT,
.semaphoreCount = IREE_ARRAYSIZE(wait_semaphores),
.pSemaphores = wait_semaphores,
.pValues = wait_values,
};
VkResult result = iree_vkWaitSemaphores_raw(
&queue->syms, queue->logical_device, &wait_info, UINT64_MAX);
if (result == VK_SUCCESS) {
observed_wakeup_value = (uint64_t)iree_atomic_load(
&queue->wakeup_value, iree_memory_order_acquire);
continue;
}
iree_status_t status = iree_status_from_vk_result(
__FILE__, __LINE__, result, "vkWaitSemaphores");
iree_status_t stored_status =
iree_hal_vulkan_queue_store_error(queue, status);
iree_hal_vulkan_queue_fail_pending_submissions(queue, stored_status);
}
return 0;
}
iree_status_t iree_hal_vulkan_queue_initialize(
const iree_hal_vulkan_queue_params_t* params,
iree_hal_vulkan_queue_t* out_queue) {
IREE_ASSERT_ARGUMENT(params);
IREE_ASSERT_ARGUMENT(params->device);
IREE_ASSERT_ARGUMENT(params->syms);
IREE_ASSERT_ARGUMENT(params->logical_device);
IREE_ASSERT_ARGUMENT(params->builtins);
IREE_ASSERT_ARGUMENT(params->queue);
IREE_ASSERT_ARGUMENT(params->queue_handle_mutex);
IREE_ASSERT_ARGUMENT(params->proactor);
IREE_ASSERT_ARGUMENT(out_queue);
memset(out_queue, 0, sizeof(*out_queue));
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_dispatch_abis_verify(params->enabled_dispatch_abis));
out_queue->device = params->device;
out_queue->syms = *params->syms;
out_queue->logical_device = params->logical_device;
out_queue->builtins = params->builtins;
out_queue->enabled_dispatch_abis = params->enabled_dispatch_abis;
out_queue->queue = params->queue;
out_queue->queue_flags = params->queue_flags;
out_queue->timestamp_valid_bits = params->timestamp_valid_bits;
out_queue->queue_handle_mutex = params->queue_handle_mutex;
out_queue->proactor = params->proactor;
out_queue->queue_family_index = params->queue_family_index;
out_queue->queue_index = params->queue_index;
out_queue->queue_affinity = params->queue_affinity;
out_queue->role = params->role;
out_queue->host_allocator = params->host_allocator;
out_queue->next_epoch_value = 1;
out_queue->native_replay_cache.max_instance_count =
params->max_cached_bda_replay_instances;
out_queue->native_replay_cache.max_publication_bytes =
params->max_cached_bda_replay_publication_bytes;
out_queue->native_replay_cache.retained_instance_count =
params->retained_cached_bda_replay_instances;
iree_slim_mutex_initialize(&out_queue->submission_mutex);
iree_async_frontier_initialize(
iree_async_fixed_frontier_as_frontier(&out_queue->frontier),
/*entry_count=*/0);
iree_atomic_store(&out_queue->last_drained_epoch, 0,
iree_memory_order_release);
iree_atomic_store(&out_queue->wakeup_value, 0, iree_memory_order_release);
iree_status_t status = iree_hal_vulkan_queue_create_timeline_semaphore(
&out_queue->syms, out_queue->logical_device, /*initial_value=*/0,
&out_queue->epoch_semaphore);
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_create_timeline_semaphore(
&out_queue->syms, out_queue->logical_device, /*initial_value=*/0,
&out_queue->wakeup_semaphore);
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_descriptor_cache_initialize(out_queue);
}
if (!iree_status_is_ok(status)) {
iree_hal_vulkan_queue_deinitialize(out_queue);
}
return status;
}
iree_status_t iree_hal_vulkan_queue_initialize_staging(
iree_hal_vulkan_queue_t* queue, iree_hal_allocator_t* device_allocator) {
IREE_ASSERT_ARGUMENT(queue);
IREE_ASSERT_ARGUMENT(device_allocator);
if (queue->device_allocator) {
return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
"Vulkan queue staging is already initialized");
}
queue->device_allocator = device_allocator;
iree_status_t status = iree_hal_vulkan_queue_staging_ring_create(
queue, IREE_HAL_VULKAN_QUEUE_STAGING_SLOT_SIZE,
IREE_HAL_VULKAN_QUEUE_STAGING_SLOT_COUNT, &queue->upload_staging_ring);
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_staging_ring_create(
queue, IREE_HAL_VULKAN_QUEUE_STAGING_SLOT_SIZE,
IREE_HAL_VULKAN_QUEUE_STAGING_SLOT_COUNT,
&queue->download_staging_ring);
}
if (!iree_status_is_ok(status)) {
iree_hal_vulkan_queue_staging_ring_destroy(queue->download_staging_ring);
queue->download_staging_ring = NULL;
iree_hal_vulkan_queue_staging_ring_destroy(queue->upload_staging_ring);
queue->upload_staging_ring = NULL;
queue->device_allocator = NULL;
}
return status;
}
void iree_hal_vulkan_queue_deinitialize(iree_hal_vulkan_queue_t* queue) {
if (!queue->logical_device) return;
iree_hal_vulkan_queue_retire_frontier(queue);
iree_hal_vulkan_queue_staging_ring_destroy(queue->download_staging_ring);
queue->download_staging_ring = NULL;
iree_hal_vulkan_queue_staging_ring_destroy(queue->upload_staging_ring);
queue->upload_staging_ring = NULL;
queue->device_allocator = NULL;
iree_hal_vulkan_queue_descriptor_cache_deinitialize(queue);
iree_hal_vulkan_queue_native_descriptor_cache_deinitialize(queue);
iree_hal_vulkan_queue_native_replay_cache_deinitialize(queue);
iree_hal_vulkan_queue_bda_publication_cache_deinitialize(queue);
iree_hal_vulkan_queue_timestamp_query_cache_deinitialize(queue);
iree_hal_vulkan_queue_command_buffer_cache_deinitialize(queue);
if (queue->epoch_semaphore) {
iree_vkDestroySemaphore(IREE_VULKAN_DEVICE(&queue->syms),
queue->logical_device, queue->epoch_semaphore,
/*pAllocator=*/NULL);
queue->epoch_semaphore = VK_NULL_HANDLE;
}
if (queue->wakeup_semaphore) {
iree_vkDestroySemaphore(IREE_VULKAN_DEVICE(&queue->syms),
queue->logical_device, queue->wakeup_semaphore,
/*pAllocator=*/NULL);
queue->wakeup_semaphore = VK_NULL_HANDLE;
}
iree_status_t failure_status = (iree_status_t)iree_atomic_load(
&queue->failure_status, iree_memory_order_acquire);
iree_status_free(failure_status);
iree_slim_mutex_deinitialize(&queue->submission_mutex);
memset(queue, 0, sizeof(*queue));
}
void iree_hal_vulkan_queue_trim(iree_hal_vulkan_queue_t* queue) {
iree_hal_vulkan_queue_native_replay_t* destroy_head = NULL;
iree_hal_vulkan_queue_native_replay_t* destroy_tail = NULL;
iree_slim_mutex_lock(&queue->submission_mutex);
iree_hal_vulkan_queue_native_replay_t* previous = NULL;
iree_hal_vulkan_queue_native_replay_t* replay =
queue->native_replay_cache.head;
while (replay) {
iree_hal_vulkan_queue_native_replay_t* next = replay->next;
uint32_t retained_idle_count = 0;
for (iree_hal_vulkan_queue_native_replay_t* retained =
queue->native_replay_cache.head;
retained != replay; retained = retained->next) {
if (retained->owner_epoch == 0 &&
retained->command_buffer == replay->command_buffer) {
retained_idle_count = retained_idle_count + 1;
}
}
const bool should_retain =
replay->owner_epoch != 0 ||
retained_idle_count <
queue->native_replay_cache.retained_instance_count;
if (should_retain) {
previous = replay;
} else {
iree_hal_vulkan_queue_native_replay_cache_unlink_under_lock(
queue, previous, replay);
queue->native_replay_cache.trim_count =
queue->native_replay_cache.trim_count + 1;
if (destroy_tail) {
destroy_tail->next = replay;
} else {
destroy_head = replay;
}
destroy_tail = replay;
}
replay = next;
}
iree_slim_mutex_unlock(&queue->submission_mutex);
while (destroy_head) {
iree_hal_vulkan_queue_native_replay_t* next = destroy_head->next;
destroy_head->next = NULL;
iree_hal_vulkan_queue_native_replay_destroy(queue, destroy_head);
destroy_head = next;
}
iree_hal_vulkan_queue_bda_publication_cache_trim(queue);
iree_hal_vulkan_queue_timestamp_query_cache_trim(queue);
}
iree_status_t iree_hal_vulkan_queue_prepare_profile_timestamp_queries(
iree_hal_vulkan_queue_t* queue) {
iree_slim_mutex_lock(&queue->submission_mutex);
const bool has_query_block = queue->timestamp_query_cache.head != NULL;
iree_slim_mutex_unlock(&queue->submission_mutex);
if (has_query_block) return iree_ok_status();
iree_hal_vulkan_queue_timestamp_query_block_t* block = NULL;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_queue_timestamp_query_block_create(
queue, IREE_HAL_VULKAN_QUEUE_TIMESTAMP_QUERY_BLOCK_CAPACITY, &block));
bool should_destroy_block = false;
iree_slim_mutex_lock(&queue->submission_mutex);
if (queue->timestamp_query_cache.head) {
should_destroy_block = true;
} else {
iree_hal_vulkan_queue_timestamp_query_cache_append_block(queue, block);
}
iree_slim_mutex_unlock(&queue->submission_mutex);
if (should_destroy_block) {
iree_hal_vulkan_queue_timestamp_query_block_destroy(queue, block);
}
return iree_ok_status();
}
static int64_t iree_hal_vulkan_queue_i64_saturate_u64(uint64_t value) {
return value > (uint64_t)INT64_MAX ? INT64_MAX : (int64_t)value;
}
bool iree_hal_vulkan_queue_query_i64(iree_hal_vulkan_queue_t* queue,
iree_string_view_t category,
iree_string_view_t key,
int64_t* out_value) {
IREE_ASSERT_ARGUMENT(queue);
IREE_ASSERT_ARGUMENT(out_value);
*out_value = 0;
if (iree_string_view_equal(category,
IREE_SV("vulkan.queue.bda_publication_cache"))) {
iree_slim_mutex_lock(&queue->submission_mutex);
if (iree_string_view_equal(key, IREE_SV("block_count"))) {
*out_value = (int64_t)queue->bda_publication_cache.block_count;
} else {
iree_slim_mutex_unlock(&queue->submission_mutex);
return false;
}
iree_slim_mutex_unlock(&queue->submission_mutex);
return true;
}
if (iree_string_view_equal(category,
IREE_SV("vulkan.queue.timestamp_query_cache"))) {
iree_slim_mutex_lock(&queue->submission_mutex);
if (iree_string_view_equal(key, IREE_SV("block_count"))) {
*out_value = (int64_t)queue->timestamp_query_cache.block_count;
} else if (iree_string_view_equal(key, IREE_SV("query_capacity"))) {
uint64_t query_capacity = 0;
for (iree_hal_vulkan_queue_timestamp_query_block_t* block =
queue->timestamp_query_cache.head;
block; block = block->next) {
query_capacity += block->capacity;
}
*out_value = iree_hal_vulkan_queue_i64_saturate_u64(query_capacity);
} else if (iree_string_view_equal(key, IREE_SV("allocated_count"))) {
uint64_t allocated_count = 0;
for (iree_hal_vulkan_queue_timestamp_query_block_t* block =
queue->timestamp_query_cache.head;
block; block = block->next) {
allocated_count += block->allocated_count;
}
*out_value = iree_hal_vulkan_queue_i64_saturate_u64(allocated_count);
} else if (iree_string_view_equal(key, IREE_SV("active_lease_count"))) {
uint64_t active_lease_count = 0;
for (iree_hal_vulkan_queue_timestamp_query_block_t* block =
queue->timestamp_query_cache.head;
block; block = block->next) {
active_lease_count += block->active_lease_count;
}
*out_value = iree_hal_vulkan_queue_i64_saturate_u64(active_lease_count);
} else {
iree_slim_mutex_unlock(&queue->submission_mutex);
return false;
}
iree_slim_mutex_unlock(&queue->submission_mutex);
return true;
}
if (!iree_string_view_equal(category,
IREE_SV("vulkan.queue.native_replay_cache"))) {
return false;
}
iree_slim_mutex_lock(&queue->submission_mutex);
if (iree_string_view_equal(key, IREE_SV("instance_count"))) {
*out_value = (int64_t)queue->native_replay_cache.instance_count;
} else if (iree_string_view_equal(key, IREE_SV("max_instance_count"))) {
*out_value = (int64_t)queue->native_replay_cache.max_instance_count;
} else if (iree_string_view_equal(key, IREE_SV("retained_instance_count"))) {
*out_value = (int64_t)queue->native_replay_cache.retained_instance_count;
} else if (iree_string_view_equal(key, IREE_SV("publication_bytes"))) {
*out_value = iree_hal_vulkan_queue_i64_saturate_u64(
queue->native_replay_cache.publication_bytes);
} else if (iree_string_view_equal(key, IREE_SV("max_publication_bytes"))) {
*out_value = iree_hal_vulkan_queue_i64_saturate_u64(
queue->native_replay_cache.max_publication_bytes);
} else if (iree_string_view_equal(key, IREE_SV("peak_instance_count"))) {
*out_value = iree_hal_vulkan_queue_i64_saturate_u64(
queue->native_replay_cache.peak_instance_count);
} else if (iree_string_view_equal(key, IREE_SV("peak_publication_bytes"))) {
*out_value = iree_hal_vulkan_queue_i64_saturate_u64(
queue->native_replay_cache.peak_publication_bytes);
} else if (iree_string_view_equal(key, IREE_SV("hit_count"))) {
*out_value = iree_hal_vulkan_queue_i64_saturate_u64(
queue->native_replay_cache.hit_count);
} else if (iree_string_view_equal(key, IREE_SV("miss_count"))) {
*out_value = iree_hal_vulkan_queue_i64_saturate_u64(
queue->native_replay_cache.miss_count);
} else if (iree_string_view_equal(key, IREE_SV("create_count"))) {
*out_value = iree_hal_vulkan_queue_i64_saturate_u64(
queue->native_replay_cache.create_count);
} else if (iree_string_view_equal(key, IREE_SV("fork_count"))) {
*out_value = iree_hal_vulkan_queue_i64_saturate_u64(
queue->native_replay_cache.fork_count);
} else if (iree_string_view_equal(key, IREE_SV("publication_skip_count"))) {
*out_value = iree_hal_vulkan_queue_i64_saturate_u64(
queue->native_replay_cache.publication_skip_count);
} else if (iree_string_view_equal(key, IREE_SV("publication_update_count"))) {
*out_value = iree_hal_vulkan_queue_i64_saturate_u64(
queue->native_replay_cache.publication_update_count);
} else if (iree_string_view_equal(key, IREE_SV("descriptor_bypass_count"))) {
*out_value = iree_hal_vulkan_queue_i64_saturate_u64(
queue->native_replay_cache.descriptor_bypass_count);
} else if (iree_string_view_equal(key, IREE_SV("profile_bypass_count"))) {
*out_value = iree_hal_vulkan_queue_i64_saturate_u64(
queue->native_replay_cache.profile_bypass_count);
} else if (iree_string_view_equal(key, IREE_SV("one_shot_bypass_count"))) {
*out_value = iree_hal_vulkan_queue_i64_saturate_u64(
queue->native_replay_cache.one_shot_bypass_count);
} else if (iree_string_view_equal(key, IREE_SV("capacity_bypass_count"))) {
*out_value = iree_hal_vulkan_queue_i64_saturate_u64(
queue->native_replay_cache.capacity_bypass_count);
} else if (iree_string_view_equal(key, IREE_SV("trim_count"))) {
*out_value = iree_hal_vulkan_queue_i64_saturate_u64(
queue->native_replay_cache.trim_count);
} else {
iree_slim_mutex_unlock(&queue->submission_mutex);
return false;
}
iree_slim_mutex_unlock(&queue->submission_mutex);
return true;
}
iree_status_t iree_hal_vulkan_queue_assign_frontier(
iree_hal_vulkan_queue_t* queue,
iree_async_frontier_tracker_t* frontier_tracker, iree_async_axis_t axis) {
IREE_ASSERT_ARGUMENT(queue);
IREE_ASSERT_ARGUMENT(frontier_tracker);
if (queue->frontier_tracker) {
return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
"Vulkan queue frontier already assigned");
}
IREE_RETURN_IF_ERROR(iree_async_frontier_tracker_register_axis(
frontier_tracker, axis, /*semaphore=*/NULL));
queue->frontier_tracker = frontier_tracker;
queue->axis = axis;
iree_atomic_store(&queue->stop_requested, 0, iree_memory_order_release);
iree_thread_create_params_t thread_params;
memset(&thread_params, 0, sizeof(thread_params));
char thread_name[48] = {0};
snprintf(thread_name, IREE_ARRAYSIZE(thread_name),
"iree-hal-vulkan-d%uq%u-complete",
(unsigned)iree_async_axis_device_index(axis),
(unsigned)iree_async_axis_queue_index(axis));
thread_params.name = iree_make_cstring_view(thread_name);
iree_status_t status = iree_thread_create(
iree_hal_vulkan_queue_completion_thread_main, queue, thread_params,
queue->host_allocator, &queue->completion_thread);
if (!iree_status_is_ok(status)) {
iree_async_frontier_tracker_retire_axis(
frontier_tracker, axis, iree_status_from_code(IREE_STATUS_CANCELLED));
queue->frontier_tracker = NULL;
queue->axis = 0;
}
return status;
}
void iree_hal_vulkan_queue_retire_frontier(iree_hal_vulkan_queue_t* queue) {
if (!queue->frontier_tracker) return;
iree_atomic_store(&queue->stop_requested, 1, iree_memory_order_release);
iree_status_t wake_status = iree_hal_vulkan_queue_signal_wakeup(queue);
IREE_ASSERT(iree_status_is_ok(wake_status) ||
iree_status_code(wake_status) == IREE_STATUS_DATA_LOSS,
"Vulkan queue completion wakeup failed during teardown");
iree_status_free(wake_status);
if (queue->completion_thread) {
iree_thread_release(queue->completion_thread);
queue->completion_thread = NULL;
}
iree_async_frontier_tracker_retire_axis(
queue->frontier_tracker, queue->axis,
iree_status_from_code(IREE_STATUS_CANCELLED));
queue->frontier_tracker = NULL;
queue->axis = 0;
}
void iree_hal_vulkan_queue_set_profile_recorder(
iree_hal_vulkan_queue_t* queue,
iree_hal_local_profile_recorder_t* profile_recorder,
iree_hal_local_profile_queue_scope_t profile_scope,
iree_atomic_int64_t* submission_counter,
iree_hal_vulkan_profile_clock_alignment_t* clock_alignment) {
if (profile_recorder) {
queue->profile_scope = profile_scope;
queue->profile_submission_counter = submission_counter;
queue->profile_clock_alignment = clock_alignment;
queue->profile_recorder = profile_recorder;
} else {
queue->profile_recorder = NULL;
queue->profile_scope = profile_scope;
queue->profile_submission_counter = NULL;
queue->profile_clock_alignment = NULL;
}
}
static iree_status_t iree_hal_vulkan_queue_submit_captured_submission(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
iree_hal_vulkan_queue_pending_submission_t* deferred_submission = NULL;
iree_hal_vulkan_queue_pending_submission_t* memory_wait_submission = NULL;
iree_hal_vulkan_queue_submission_result_t submit_result = {
.submitted = false,
.queue_failure_status = iree_ok_status(),
};
iree_status_t status = iree_ok_status();
VkSemaphoreSubmitInfo* wait_infos = NULL;
uint32_t wait_info_capacity = 0;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_allocate_wait_infos(
queue, submission->wait_semaphore_list.count, &wait_infos,
&wait_info_capacity);
}
if (iree_status_is_ok(status)) {
iree_slim_mutex_lock(&queue->submission_mutex);
iree_hal_vulkan_queue_wait_resolution_t resolution = {
.wait_infos = wait_infos,
.wait_info_capacity = wait_info_capacity,
};
status = iree_hal_vulkan_queue_submit_native_under_lock(
queue, submission, /*allow_software_deferral=*/true, &resolution,
&submit_result);
if (iree_status_is_ok(status) && resolution.needs_deferral) {
status =
iree_hal_vulkan_queue_prepare_deferred_submission(queue, submission);
if (iree_status_is_ok(status)) {
iree_hal_vulkan_queue_append_deferred_submission(queue, submission);
iree_hal_vulkan_queue_profile_record_submission(submission);
deferred_submission = submission;
submission = NULL;
}
} else if (submit_result.memory_wait_submission) {
memory_wait_submission = submit_result.memory_wait_submission;
submission = NULL;
} else if (submit_result.submitted) {
submission = NULL;
}
iree_slim_mutex_unlock(&queue->submission_mutex);
}
if (wait_infos) {
iree_allocator_free(queue->host_allocator, wait_infos);
}
if (!iree_status_is_ok(submit_result.queue_failure_status)) {
iree_hal_vulkan_queue_fail_pending_submissions(
queue, submit_result.queue_failure_status);
}
if (deferred_submission) {
iree_status_t deferred_status =
iree_hal_vulkan_queue_start_deferred_submission(deferred_submission);
if (!iree_status_is_ok(deferred_status)) {
iree_hal_vulkan_queue_fail_pending_submissions(queue, deferred_status);
}
}
if (memory_wait_submission) {
iree_status_t memory_wait_status =
iree_hal_vulkan_queue_start_alloca_memory_wait(memory_wait_submission);
if (!iree_status_is_ok(memory_wait_status)) {
iree_hal_vulkan_queue_fail_pending_submissions(queue, memory_wait_status);
}
}
if (submission) {
if (iree_status_is_ok(status)) {
status = iree_make_status(
IREE_STATUS_INTERNAL,
"Vulkan captured submission was neither submitted nor deferred");
}
iree_hal_vulkan_queue_fail_unsubmitted_submission(
queue, submission, iree_status_clone(status));
}
return status;
}
static iree_status_t iree_hal_vulkan_queue_submit_barrier_with_action(
iree_hal_vulkan_queue_t* queue,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_vulkan_queue_completion_action_t completion_action) {
IREE_ASSERT_ARGUMENT(queue);
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, wait_semaphore_list, IREE_SV("wait"));
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, signal_semaphore_list, IREE_SV("signal"));
}
iree_hal_vulkan_queue_pending_submission_t* submission = NULL;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_pending_submission_create(
queue, wait_semaphore_list, signal_semaphore_list,
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_BARRIER,
(iree_hal_host_call_t){0},
/*args=*/NULL, IREE_HAL_HOST_CALL_FLAG_NONE,
/*payload_storage_length=*/0, /*out_payload_storage=*/NULL,
&submission);
}
if (iree_status_is_ok(status)) {
iree_hal_vulkan_queue_set_completion_action(submission, completion_action);
}
if (iree_status_is_ok(status)) {
status =
iree_hal_vulkan_queue_submit_captured_submission(queue, submission);
}
IREE_TRACE_ZONE_END(z0);
return status;
}
iree_status_t iree_hal_vulkan_queue_submit_barrier(
iree_hal_vulkan_queue_t* queue,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list) {
return iree_hal_vulkan_queue_submit_barrier_with_action(
queue, wait_semaphore_list, signal_semaphore_list,
iree_hal_vulkan_queue_completion_action_null());
}
static iree_status_t iree_hal_vulkan_queue_create_transient_buffer(
iree_hal_vulkan_queue_t* queue, iree_hal_buffer_params_t params,
iree_device_size_t allocation_size, iree_device_size_t byte_length,
iree_hal_alloca_flags_t flags, iree_hal_buffer_t** out_buffer) {
iree_hal_buffer_placement_t placement = {
.device = (iree_hal_device_t*)queue->device,
.queue_affinity = params.queue_affinity,
.flags = IREE_HAL_BUFFER_PLACEMENT_FLAG_ASYNCHRONOUS,
};
if (iree_all_bits_set(flags, IREE_HAL_ALLOCA_FLAG_INDETERMINATE_LIFETIME)) {
placement.flags |= IREE_HAL_BUFFER_PLACEMENT_FLAG_INDETERMINATE_LIFETIME;
}
return iree_hal_local_transient_buffer_create(
placement, params, allocation_size, byte_length, queue->host_allocator,
out_buffer);
}
iree_status_t iree_hal_vulkan_queue_submit_alloca(
iree_hal_vulkan_queue_t* queue,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_vulkan_queue_alloca_plan_t allocation_plan,
iree_hal_buffer_params_t params, iree_device_size_t allocation_size,
iree_device_size_t byte_length, iree_hal_alloca_flags_t flags,
iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
IREE_ASSERT_ARGUMENT(queue);
IREE_ASSERT_ARGUMENT(out_buffer);
*out_buffer = NULL;
IREE_TRACE_ZONE_BEGIN(z0);
const iree_hal_alloca_flags_t known_flags =
IREE_HAL_ALLOCA_FLAG_INDETERMINATE_LIFETIME |
IREE_HAL_ALLOCA_FLAG_ALLOW_POOL_WAIT_FRONTIER;
iree_status_t status = iree_ok_status();
if (iree_any_bit_set(flags, ~known_flags)) {
status = iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"unsupported Vulkan queue alloca flags: 0x%" PRIx64, flags);
}
if (iree_status_is_ok(status) && allocation_size == 0) {
status = iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue_alloca size must be non-zero");
}
if (iree_status_is_ok(status)) {
switch (allocation_plan.strategy) {
case IREE_HAL_VULKAN_QUEUE_ALLOCA_STRATEGY_POOL:
if (!allocation_plan.pool) {
status = iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue_alloca pool strategy requires a pool");
}
break;
case IREE_HAL_VULKAN_QUEUE_ALLOCA_STRATEGY_SPARSE:
if (!allocation_plan.allocator) {
status = iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue_alloca sparse strategy requires an allocator");
} else if (!iree_all_bits_set(queue->queue_flags,
VK_QUEUE_SPARSE_BINDING_BIT)) {
status = iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan queue family %u does not support sparse binding",
queue->queue_family_index);
}
break;
case IREE_HAL_VULKAN_QUEUE_ALLOCA_STRATEGY_NONE:
default:
status = iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"unrecognized Vulkan queue_alloca strategy "
"%u",
(uint32_t)allocation_plan.strategy);
break;
}
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, wait_semaphore_list, IREE_SV("wait"));
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, signal_semaphore_list, IREE_SV("signal"));
}
iree_hal_buffer_t* buffer = NULL;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_create_transient_buffer(
queue, params, allocation_size, byte_length, flags, &buffer);
}
iree_hal_vulkan_queue_pending_submission_t* submission = NULL;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_pending_submission_create(
queue, wait_semaphore_list, signal_semaphore_list,
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_ALLOCA, (iree_hal_host_call_t){0},
/*args=*/NULL, IREE_HAL_HOST_CALL_FLAG_NONE,
/*payload_storage_length=*/0, /*out_payload_storage=*/NULL,
&submission);
}
if (iree_status_is_ok(status)) {
submission->alloca.buffer = buffer;
iree_hal_buffer_retain(buffer);
submission->alloca.strategy = allocation_plan.strategy;
submission->alloca.allocator = allocation_plan.allocator;
submission->alloca.pool = allocation_plan.pool;
submission->alloca.params = params;
submission->alloca.allocation_size = allocation_size;
submission->alloca.flags = flags;
submission->alloca.reserve_flags =
IREE_HAL_POOL_RESERVE_FLAG_ALLOW_WAIT_FRONTIER;
submission->alloca.memory_wait_kind =
IREE_HAL_VULKAN_QUEUE_ALLOCA_MEMORY_WAIT_NONE;
if (wait_semaphore_list.count > 0) {
status =
iree_hal_vulkan_queue_try_stage_alloca_backing_now(queue, submission);
}
}
if (iree_status_is_ok(status)) {
status =
iree_hal_vulkan_queue_submit_captured_submission(queue, submission);
submission = NULL;
}
if (submission) {
if (!iree_status_is_ok(status)) {
iree_hal_vulkan_queue_fail_signal_list(submission->signal_semaphore_list,
iree_status_clone(status));
}
iree_hal_vulkan_queue_pending_submission_destroy(queue, submission);
}
if (iree_status_is_ok(status)) {
*out_buffer = buffer;
buffer = NULL;
}
iree_hal_buffer_release(buffer);
IREE_TRACE_ZONE_END(z0);
return status;
}
iree_status_t iree_hal_vulkan_queue_submit_dealloca(
iree_hal_vulkan_queue_t* queue,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_buffer_t* buffer, iree_hal_dealloca_flags_t flags) {
IREE_ASSERT_ARGUMENT(queue);
IREE_ASSERT_ARGUMENT(buffer);
IREE_TRACE_ZONE_BEGIN(z0);
const iree_hal_dealloca_flags_t known_flags =
IREE_HAL_DEALLOCA_FLAG_PREFER_ORIGIN;
iree_status_t status = iree_ok_status();
if (iree_any_bit_set(flags, ~known_flags)) {
status = iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"unsupported Vulkan queue dealloca flags: 0x%" PRIx64, flags);
}
if (iree_status_is_ok(status) &&
!iree_hal_local_transient_buffer_isa(buffer)) {
status = iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue_dealloca buffer was not returned "
"by Vulkan queue_alloca");
}
if (iree_status_is_ok(status) &&
!iree_hal_local_transient_buffer_begin_dealloca(buffer)) {
status = iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan transient buffer has already been queued for deallocation");
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, wait_semaphore_list, IREE_SV("wait"));
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, signal_semaphore_list, IREE_SV("signal"));
}
iree_hal_vulkan_queue_pending_submission_t* submission = NULL;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_pending_submission_create(
queue, wait_semaphore_list, signal_semaphore_list,
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_DEALLOCA,
(iree_hal_host_call_t){0}, /*args=*/NULL, IREE_HAL_HOST_CALL_FLAG_NONE,
/*payload_storage_length=*/0, /*out_payload_storage=*/NULL,
&submission);
}
if (iree_status_is_ok(status)) {
submission->dealloca.buffer = buffer;
iree_hal_buffer_retain(buffer);
submission->dealloca.flags = flags;
status =
iree_hal_vulkan_queue_submit_captured_submission(queue, submission);
submission = NULL;
}
if (submission) {
iree_hal_local_transient_buffer_abort_dealloca(buffer);
if (!iree_status_is_ok(status)) {
iree_hal_vulkan_queue_fail_signal_list(submission->signal_semaphore_list,
iree_status_clone(status));
}
iree_hal_vulkan_queue_pending_submission_destroy(queue, submission);
} else if (!iree_status_is_ok(status) &&
iree_hal_local_transient_buffer_isa(buffer)) {
iree_hal_local_transient_buffer_abort_dealloca(buffer);
}
IREE_TRACE_ZONE_END(z0);
return status;
}
iree_status_t iree_hal_vulkan_queue_submit_sparse_bind(
iree_hal_vulkan_queue_t* queue,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list, VkBuffer buffer,
iree_host_size_t bind_count, const VkSparseMemoryBind* binds) {
IREE_ASSERT_ARGUMENT(queue);
IREE_TRACE_ZONE_BEGIN(z0);
IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)bind_count);
iree_status_t status = iree_ok_status();
if (!buffer) {
status = iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"Vulkan sparse bind buffer is NULL");
}
if (iree_status_is_ok(status) && !binds) {
status = iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"Vulkan sparse bind array is NULL");
}
if (iree_status_is_ok(status) &&
!iree_all_bits_set(queue->queue_flags, VK_QUEUE_SPARSE_BINDING_BIT)) {
status = iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan queue family %u does not support sparse binding",
queue->queue_family_index);
}
if (iree_status_is_ok(status) && bind_count == 0) {
status = iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"Vulkan sparse bind submission must contain at least one bind");
}
if (iree_status_is_ok(status) && bind_count > UINT32_MAX) {
status = iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"too many Vulkan sparse buffer binds");
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, wait_semaphore_list, IREE_SV("wait"));
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, signal_semaphore_list, IREE_SV("signal"));
}
VkSparseMemoryBind* copied_binds = NULL;
if (iree_status_is_ok(status)) {
iree_host_size_t bind_storage_size = 0;
if (iree_host_size_checked_mul(bind_count, sizeof(*binds),
&bind_storage_size)) {
status = iree_allocator_malloc(queue->host_allocator, bind_storage_size,
(void**)&copied_binds);
} else {
status =
iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"Vulkan sparse bind array overflows host storage");
}
}
if (iree_status_is_ok(status)) {
memcpy(copied_binds, binds, bind_count * sizeof(*binds));
}
iree_hal_vulkan_queue_pending_submission_t* submission = NULL;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_pending_submission_create(
queue, wait_semaphore_list, signal_semaphore_list,
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_SPARSE_BIND,
(iree_hal_host_call_t){0}, /*args=*/NULL, IREE_HAL_HOST_CALL_FLAG_NONE,
/*payload_storage_length=*/0, /*out_payload_storage=*/NULL,
&submission);
}
if (iree_status_is_ok(status)) {
submission->sparse_bind.buffer = buffer;
submission->sparse_bind.binds = copied_binds;
submission->sparse_bind.bind_count = (uint32_t)bind_count;
copied_binds = NULL;
status =
iree_hal_vulkan_queue_submit_captured_submission(queue, submission);
submission = NULL;
}
if (submission) {
if (!iree_status_is_ok(status)) {
iree_hal_vulkan_queue_fail_signal_list(submission->signal_semaphore_list,
iree_status_clone(status));
}
iree_hal_vulkan_queue_pending_submission_destroy(queue, submission);
}
iree_allocator_free(queue->host_allocator, copied_binds);
IREE_TRACE_ZONE_END(z0);
return status;
}
iree_status_t iree_hal_vulkan_queue_submit_fill(
iree_hal_vulkan_queue_t* queue,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, const void* pattern,
iree_host_size_t pattern_length, iree_hal_fill_flags_t flags) {
IREE_ASSERT_ARGUMENT(queue);
IREE_ASSERT_ARGUMENT(target_buffer);
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = iree_ok_status();
if (flags != IREE_HAL_FILL_FLAG_NONE) {
status = iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"unsupported Vulkan queue fill flags: 0x%" PRIx64,
flags);
}
if (iree_status_is_ok(status) && !pattern) {
status = iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue fill pattern is NULL");
}
if (iree_status_is_ok(status) && pattern_length != 1 && pattern_length != 2 &&
pattern_length != 4) {
status = iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue fill pattern length must be 1, 2, or 4 bytes "
"(got %" PRIhsz ")",
pattern_length);
}
if (iree_status_is_ok(status)) {
status =
iree_hal_buffer_validate_range(target_buffer, target_offset, length);
}
if (iree_status_is_ok(status)) {
status = iree_hal_buffer_validate_memory_type(
iree_hal_buffer_memory_type(target_buffer),
IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE);
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, wait_semaphore_list, IREE_SV("wait"));
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, signal_semaphore_list, IREE_SV("signal"));
}
iree_hal_vulkan_queue_pending_submission_t* submission = NULL;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_pending_submission_create(
queue, wait_semaphore_list, signal_semaphore_list,
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_FILL, (iree_hal_host_call_t){0},
/*args=*/NULL, IREE_HAL_HOST_CALL_FLAG_NONE,
/*payload_storage_length=*/0, /*out_payload_storage=*/NULL,
&submission);
}
if (iree_status_is_ok(status)) {
submission->fill.target_buffer = target_buffer;
iree_hal_buffer_retain(target_buffer);
submission->fill.target_offset = target_offset;
submission->fill.length = length;
memset(submission->fill.pattern, 0, sizeof(submission->fill.pattern));
memcpy(submission->fill.pattern, pattern, pattern_length);
submission->fill.pattern_length = pattern_length;
submission->fill.flags = flags;
}
if (iree_status_is_ok(status) && length != 0) {
status = iree_hal_vulkan_queue_validate_recordable_backing(
target_buffer, IREE_SV("fill target"));
}
if (iree_status_is_ok(status) && length != 0) {
status =
iree_hal_vulkan_queue_allocate_native_command_buffer(queue, submission);
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_profile_prepare_native_timestamps(
queue, submission);
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_record_fill_native(queue, submission);
}
}
if (iree_status_is_ok(status)) {
status =
iree_hal_vulkan_queue_submit_captured_submission(queue, submission);
submission = NULL;
}
if (submission) {
if (!iree_status_is_ok(status)) {
iree_hal_vulkan_queue_fail_signal_list(submission->signal_semaphore_list,
iree_status_clone(status));
}
iree_hal_vulkan_queue_pending_submission_destroy(queue, submission);
}
IREE_TRACE_ZONE_END(z0);
return status;
}
static iree_status_t iree_hal_vulkan_queue_submit_copy_with_action(
iree_hal_vulkan_queue_t* queue,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, iree_hal_copy_flags_t flags,
iree_hal_vulkan_queue_completion_action_t completion_action) {
IREE_ASSERT_ARGUMENT(queue);
IREE_ASSERT_ARGUMENT(source_buffer);
IREE_ASSERT_ARGUMENT(target_buffer);
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = iree_ok_status();
if (flags != IREE_HAL_COPY_FLAG_NONE) {
status = iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"unsupported Vulkan queue copy flags: 0x%" PRIx64,
flags);
}
if (iree_status_is_ok(status)) {
status =
iree_hal_buffer_validate_range(source_buffer, source_offset, length);
}
if (iree_status_is_ok(status)) {
status = iree_hal_buffer_validate_memory_type(
iree_hal_buffer_memory_type(source_buffer),
IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE);
}
if (iree_status_is_ok(status)) {
status =
iree_hal_buffer_validate_range(target_buffer, target_offset, length);
}
if (iree_status_is_ok(status)) {
status = iree_hal_buffer_validate_memory_type(
iree_hal_buffer_memory_type(target_buffer),
IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE);
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, wait_semaphore_list, IREE_SV("wait"));
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, signal_semaphore_list, IREE_SV("signal"));
}
iree_hal_vulkan_queue_pending_submission_t* submission = NULL;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_pending_submission_create(
queue, wait_semaphore_list, signal_semaphore_list,
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_COPY, (iree_hal_host_call_t){0},
/*args=*/NULL, IREE_HAL_HOST_CALL_FLAG_NONE,
/*payload_storage_length=*/0, /*out_payload_storage=*/NULL,
&submission);
}
if (iree_status_is_ok(status)) {
submission->copy.source_buffer = source_buffer;
iree_hal_buffer_retain(source_buffer);
submission->copy.source_offset = source_offset;
submission->copy.target_buffer = target_buffer;
iree_hal_buffer_retain(target_buffer);
submission->copy.target_offset = target_offset;
submission->copy.length = length;
submission->copy.flags = flags;
iree_hal_vulkan_queue_set_completion_action(submission, completion_action);
}
if (iree_status_is_ok(status) && length != 0) {
status = iree_hal_vulkan_queue_validate_recordable_backing(
source_buffer, IREE_SV("copy source"));
}
if (iree_status_is_ok(status) && length != 0) {
status = iree_hal_vulkan_queue_validate_recordable_backing(
target_buffer, IREE_SV("copy target"));
}
if (iree_status_is_ok(status) && length != 0) {
status =
iree_hal_vulkan_queue_allocate_native_command_buffer(queue, submission);
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_profile_prepare_native_timestamps(
queue, submission);
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_record_copy_native(queue, submission);
}
}
if (iree_status_is_ok(status)) {
status =
iree_hal_vulkan_queue_submit_captured_submission(queue, submission);
submission = NULL;
}
if (submission) {
if (!iree_status_is_ok(status)) {
iree_hal_vulkan_queue_fail_signal_list(submission->signal_semaphore_list,
iree_status_clone(status));
}
iree_hal_vulkan_queue_pending_submission_destroy(queue, submission);
}
IREE_TRACE_ZONE_END(z0);
return status;
}
iree_status_t iree_hal_vulkan_queue_submit_copy(
iree_hal_vulkan_queue_t* queue,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, iree_hal_copy_flags_t flags) {
return iree_hal_vulkan_queue_submit_copy_with_action(
queue, wait_semaphore_list, signal_semaphore_list, source_buffer,
source_offset, target_buffer, target_offset, length, flags,
iree_hal_vulkan_queue_completion_action_null());
}
static iree_status_t iree_hal_vulkan_queue_validate_file_range(
iree_hal_file_t* file, uint64_t offset, iree_device_size_t length,
iree_string_view_t operation) {
const uint64_t file_length = iree_hal_file_length(file);
if (file_length == 0) return iree_ok_status();
if (length > UINT64_MAX - offset) {
return iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"%.*s range [%" PRIu64 ", overflow) exceeds file length %" PRIu64,
(int)operation.size, operation.data, offset, file_length);
}
const uint64_t end_offset = offset + (uint64_t)length;
if (end_offset > file_length) {
return iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"%.*s range [%" PRIu64 ", %" PRIu64 ") exceeds file length %" PRIu64,
(int)operation.size, operation.data, offset, end_offset, file_length);
}
return iree_ok_status();
}
static bool iree_hal_vulkan_queue_file_supports_staged_transfer(
iree_hal_file_t* file) {
return iree_hal_memory_file_isa(file) ||
iree_hal_file_async_handle(file) != NULL;
}
typedef enum iree_hal_vulkan_staged_transfer_kind_e {
// File or host bytes flow through the upload ring into the target buffer.
IREE_HAL_VULKAN_STAGED_TRANSFER_READ = 0,
// Source buffer bytes flow through the download ring into the file or host.
IREE_HAL_VULKAN_STAGED_TRANSFER_WRITE = 1,
} iree_hal_vulkan_staged_transfer_kind_t;
typedef enum iree_hal_vulkan_staged_transfer_flag_bits_e {
// No special transfer behavior.
IREE_HAL_VULKAN_STAGED_TRANSFER_FLAG_NONE = 0u,
// Capture the host range into transfer-owned storage before returning.
IREE_HAL_VULKAN_STAGED_TRANSFER_FLAG_CAPTURE_HOST_RANGE = 1u << 0,
} iree_hal_vulkan_staged_transfer_flag_bits_t;
typedef uint32_t iree_hal_vulkan_staged_transfer_flags_t;
typedef enum iree_hal_vulkan_staged_transfer_host_kind_e {
// Host memory span feeds or receives staged bytes directly.
IREE_HAL_VULKAN_STAGED_TRANSFER_HOST_MEMORY = 0,
// Proactor-backed file feeds or receives staged bytes asynchronously.
IREE_HAL_VULKAN_STAGED_TRANSFER_HOST_ASYNC_FILE = 1,
} iree_hal_vulkan_staged_transfer_host_kind_t;
typedef struct iree_hal_vulkan_staged_transfer_t
iree_hal_vulkan_staged_transfer_t;
typedef struct iree_hal_vulkan_staged_transfer_chunk_t {
// Owning staged transfer.
iree_hal_vulkan_staged_transfer_t* transfer;
// Staging slot owned while this chunk is active.
iree_hal_vulkan_queue_staging_slot_t* slot;
// Byte offset from the beginning of the transfer.
iree_device_size_t transfer_offset;
// Byte length covered by this chunk.
iree_device_size_t length;
// Bytes completed by the current partial async file operation.
iree_host_size_t file_progress;
// Async read operation storage.
iree_async_file_read_operation_t read_op;
// Async write operation storage.
iree_async_file_write_operation_t write_op;
} iree_hal_vulkan_staged_transfer_chunk_t;
struct iree_hal_vulkan_staged_transfer_t {
// Resource retained by queue completion actions and ring waiters.
iree_hal_resource_t resource;
// Host allocator used for transfer metadata and signal-list storage.
iree_allocator_t host_allocator;
// Serializes transfer counters and terminal status ownership.
iree_slim_mutex_t mutex;
// Queue owning the staging rings and internal copy submissions.
iree_hal_vulkan_queue_t* queue;
// Staging ring used by this transfer.
iree_hal_vulkan_queue_staging_ring_t* ring;
// Optional file retaining the host endpoint lifetime.
iree_hal_file_t* file;
// Async file handle used for proactor-backed transfers. Borrowed from |file|.
iree_async_file_t* async_file;
// File byte offset for proactor-backed file transfers.
uint64_t file_offset;
// Host bytes read for uploads or written for downloads.
iree_byte_span_t host_contents;
// Byte offset into |host_contents| for the first requested byte.
iree_host_size_t host_offset;
// User buffer being copied to or from.
iree_hal_buffer_t* buffer;
// User buffer byte offset for the first requested byte.
iree_device_size_t buffer_offset;
// Total requested transfer length.
iree_device_size_t requested_length;
// Number of bytes assigned to chunks.
iree_device_size_t submitted_length;
// Number of bytes fully transferred through all stages.
iree_device_size_t completed_length;
// Number of chunks currently owning a staging slot or in-flight copy.
uint32_t active_chunk_count;
// Number of chunk records in |chunks|.
uint32_t chunk_count;
// Direction of this transfer.
iree_hal_vulkan_staged_transfer_kind_t kind;
// Host-side endpoint strategy used by this transfer.
iree_hal_vulkan_staged_transfer_host_kind_t host_kind;
// Whether terminal completion has started.
bool finishing;
// Owned first failure status, or OK if no failure has occurred.
iree_status_t failure_status;
// Waiter queued when all staging slots are temporarily unavailable.
iree_hal_vulkan_queue_staging_waiter_t slot_waiter;
// Cloned signal list published after the transfer completes.
iree_hal_semaphore_list_t signal_semaphore_list;
// Chunk records used to pipeline host copies and GPU copies.
iree_hal_vulkan_staged_transfer_chunk_t* chunks;
};
static void iree_hal_vulkan_staged_transfer_pump(
iree_hal_vulkan_staged_transfer_t* transfer);
static void iree_hal_vulkan_staged_transfer_try_finish(
iree_hal_vulkan_staged_transfer_t* transfer);
static void iree_hal_vulkan_staged_transfer_destroy(
iree_hal_resource_t* resource) {
iree_hal_vulkan_staged_transfer_t* transfer =
(iree_hal_vulkan_staged_transfer_t*)resource;
if (!iree_hal_semaphore_list_is_empty(transfer->signal_semaphore_list)) {
iree_hal_semaphore_list_free(transfer->signal_semaphore_list,
transfer->host_allocator);
}
iree_hal_buffer_release(transfer->buffer);
iree_hal_file_release(transfer->file);
iree_slim_mutex_deinitialize(&transfer->mutex);
iree_allocator_free(transfer->host_allocator, transfer);
}
static const iree_hal_resource_vtable_t iree_hal_vulkan_staged_transfer_vtable =
{
.destroy = iree_hal_vulkan_staged_transfer_destroy,
};
static void iree_hal_vulkan_staged_transfer_record_failure(
iree_hal_vulkan_staged_transfer_t* transfer, iree_status_t status) {
if (iree_status_is_ok(status)) return;
iree_slim_mutex_lock(&transfer->mutex);
if (iree_status_is_ok(transfer->failure_status)) {
transfer->failure_status = status;
status = iree_ok_status();
}
iree_slim_mutex_unlock(&transfer->mutex);
iree_status_free(status);
}
static void iree_hal_vulkan_staged_transfer_fail_signals(
iree_hal_vulkan_staged_transfer_t* transfer, iree_status_t status) {
if (iree_status_is_ok(status)) return;
if (iree_hal_semaphore_list_is_empty(transfer->signal_semaphore_list)) {
iree_status_free(status);
return;
}
iree_hal_semaphore_list_fail(transfer->signal_semaphore_list, status);
}
static iree_status_t iree_hal_vulkan_staged_transfer_submit_signal_barrier(
iree_hal_vulkan_staged_transfer_t* transfer) {
if (iree_hal_semaphore_list_is_empty(transfer->signal_semaphore_list)) {
return iree_ok_status();
}
return iree_hal_vulkan_queue_submit_barrier(transfer->queue,
iree_hal_semaphore_list_empty(),
transfer->signal_semaphore_list);
}
static void iree_hal_vulkan_staged_transfer_complete(
iree_hal_vulkan_staged_transfer_t* transfer, iree_status_t status) {
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_staged_transfer_submit_signal_barrier(transfer);
}
iree_hal_vulkan_staged_transfer_fail_signals(transfer, status);
iree_hal_resource_release(&transfer->resource);
}
static iree_hal_vulkan_staged_transfer_chunk_t*
iree_hal_vulkan_staged_transfer_find_idle_chunk(
iree_hal_vulkan_staged_transfer_t* transfer) {
for (uint32_t i = 0; i < transfer->chunk_count; ++i) {
if (!transfer->chunks[i].slot) return &transfer->chunks[i];
}
return NULL;
}
static void iree_hal_vulkan_staged_transfer_slot_available(void* user_data) {
iree_hal_vulkan_staged_transfer_t* transfer =
(iree_hal_vulkan_staged_transfer_t*)user_data;
iree_hal_vulkan_staged_transfer_pump(transfer);
iree_hal_vulkan_staged_transfer_try_finish(transfer);
}
static void iree_hal_vulkan_staged_transfer_try_finish(
iree_hal_vulkan_staged_transfer_t* transfer) {
bool should_complete = false;
iree_status_t status = iree_ok_status();
iree_slim_mutex_lock(&transfer->mutex);
const bool has_failure = !iree_status_is_ok(transfer->failure_status);
const bool is_complete =
transfer->completed_length == transfer->requested_length;
if (!transfer->finishing && transfer->active_chunk_count == 0 &&
(has_failure || is_complete)) {
transfer->finishing = true;
status = transfer->failure_status;
transfer->failure_status = iree_ok_status();
should_complete = true;
}
iree_slim_mutex_unlock(&transfer->mutex);
if (should_complete) {
iree_hal_vulkan_queue_staging_ring_cancel_waiter(transfer->ring,
&transfer->slot_waiter);
iree_hal_vulkan_staged_transfer_complete(transfer, status);
}
}
static void iree_hal_vulkan_staged_transfer_chunk_finish(
iree_hal_vulkan_staged_transfer_chunk_t* chunk, bool did_transfer_bytes) {
iree_hal_vulkan_staged_transfer_t* transfer = chunk->transfer;
iree_hal_vulkan_queue_staging_slot_t* slot = chunk->slot;
iree_slim_mutex_lock(&transfer->mutex);
if (did_transfer_bytes) {
transfer->completed_length += chunk->length;
}
chunk->slot = NULL;
chunk->transfer_offset = 0;
chunk->length = 0;
chunk->file_progress = 0;
--transfer->active_chunk_count;
iree_slim_mutex_unlock(&transfer->mutex);
iree_hal_vulkan_queue_staging_ring_release(transfer->ring, slot);
iree_hal_vulkan_staged_transfer_pump(transfer);
iree_hal_vulkan_staged_transfer_try_finish(transfer);
}
static void iree_hal_vulkan_staged_transfer_chunk_fail(
iree_hal_vulkan_staged_transfer_chunk_t* chunk, iree_status_t status) {
iree_hal_vulkan_staged_transfer_record_failure(chunk->transfer, status);
iree_hal_vulkan_staged_transfer_chunk_finish(chunk,
/*did_transfer_bytes=*/false);
}
static iree_status_t iree_hal_vulkan_staged_transfer_submit_copy(
iree_hal_vulkan_staged_transfer_chunk_t* chunk);
static iree_status_t iree_hal_vulkan_staged_transfer_submit_next_read(
iree_hal_vulkan_staged_transfer_chunk_t* chunk);
static iree_status_t iree_hal_vulkan_staged_transfer_submit_next_write(
iree_hal_vulkan_staged_transfer_chunk_t* chunk);
static void iree_hal_vulkan_staged_transfer_read_complete(
void* user_data, iree_async_operation_t* base_operation,
iree_status_t status, iree_async_completion_flags_t flags) {
(void)base_operation;
(void)flags;
iree_hal_vulkan_staged_transfer_chunk_t* chunk =
(iree_hal_vulkan_staged_transfer_chunk_t*)user_data;
if (iree_status_is_ok(status) && chunk->read_op.bytes_read > 0) {
chunk->file_progress += chunk->read_op.bytes_read;
if (chunk->file_progress < (iree_host_size_t)chunk->length) {
status = iree_hal_vulkan_staged_transfer_submit_next_read(chunk);
if (iree_status_is_ok(status)) {
iree_hal_resource_release(&chunk->transfer->resource);
return;
}
}
} else if (iree_status_is_ok(status) &&
chunk->file_progress < (iree_host_size_t)chunk->length) {
status = iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"short read: requested %" PRIhsz " bytes, got %" PRIhsz,
(iree_host_size_t)chunk->length, chunk->file_progress);
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_staged_transfer_submit_copy(chunk);
}
if (!iree_status_is_ok(status)) {
iree_hal_vulkan_staged_transfer_chunk_fail(chunk, status);
}
iree_hal_resource_release(&chunk->transfer->resource);
}
static void iree_hal_vulkan_staged_transfer_write_complete(
void* user_data, iree_async_operation_t* base_operation,
iree_status_t status, iree_async_completion_flags_t flags) {
(void)base_operation;
(void)flags;
iree_hal_vulkan_staged_transfer_chunk_t* chunk =
(iree_hal_vulkan_staged_transfer_chunk_t*)user_data;
if (iree_status_is_ok(status) && chunk->write_op.bytes_written > 0) {
chunk->file_progress += chunk->write_op.bytes_written;
if (chunk->file_progress < (iree_host_size_t)chunk->length) {
status = iree_hal_vulkan_staged_transfer_submit_next_write(chunk);
if (iree_status_is_ok(status)) {
iree_hal_resource_release(&chunk->transfer->resource);
return;
}
}
} else if (iree_status_is_ok(status) &&
chunk->file_progress < (iree_host_size_t)chunk->length) {
status = iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"short write: requested %" PRIhsz " bytes, wrote %" PRIhsz,
(iree_host_size_t)chunk->length, chunk->file_progress);
}
if (iree_status_is_ok(status)) {
iree_hal_vulkan_staged_transfer_chunk_finish(chunk,
/*did_transfer_bytes=*/true);
} else {
iree_hal_vulkan_staged_transfer_chunk_fail(chunk, status);
}
iree_hal_resource_release(&chunk->transfer->resource);
}
static iree_status_t iree_hal_vulkan_staged_transfer_submit_next_read(
iree_hal_vulkan_staged_transfer_chunk_t* chunk) {
iree_hal_vulkan_staged_transfer_t* transfer = chunk->transfer;
const iree_host_size_t remaining_length =
(iree_host_size_t)chunk->length - chunk->file_progress;
iree_async_operation_zero(&chunk->read_op.base, sizeof(chunk->read_op));
iree_async_operation_initialize(
&chunk->read_op.base, IREE_ASYNC_OPERATION_TYPE_FILE_READ,
IREE_ASYNC_OPERATION_FLAG_NONE,
iree_hal_vulkan_staged_transfer_read_complete, chunk);
chunk->read_op.file = transfer->async_file;
chunk->read_op.offset =
transfer->file_offset + chunk->transfer_offset + chunk->file_progress;
chunk->read_op.buffer = iree_async_span_from_ptr(
chunk->slot->host_span.data + chunk->file_progress, remaining_length);
iree_hal_resource_retain(&transfer->resource);
iree_status_t status = iree_async_proactor_submit_one(
transfer->queue->proactor, &chunk->read_op.base);
if (!iree_status_is_ok(status)) {
iree_hal_resource_release(&transfer->resource);
}
return status;
}
static iree_status_t iree_hal_vulkan_staged_transfer_submit_next_write(
iree_hal_vulkan_staged_transfer_chunk_t* chunk) {
iree_hal_vulkan_staged_transfer_t* transfer = chunk->transfer;
const iree_host_size_t remaining_length =
(iree_host_size_t)chunk->length - chunk->file_progress;
iree_async_operation_zero(&chunk->write_op.base, sizeof(chunk->write_op));
iree_async_operation_initialize(
&chunk->write_op.base, IREE_ASYNC_OPERATION_TYPE_FILE_WRITE,
IREE_ASYNC_OPERATION_FLAG_NONE,
iree_hal_vulkan_staged_transfer_write_complete, chunk);
chunk->write_op.file = transfer->async_file;
chunk->write_op.offset =
transfer->file_offset + chunk->transfer_offset + chunk->file_progress;
chunk->write_op.buffer = iree_async_span_from_ptr(
chunk->slot->host_span.data + chunk->file_progress, remaining_length);
iree_hal_resource_retain(&transfer->resource);
iree_status_t status = iree_async_proactor_submit_one(
transfer->queue->proactor, &chunk->write_op.base);
if (!iree_status_is_ok(status)) {
iree_hal_resource_release(&transfer->resource);
}
return status;
}
static void iree_hal_vulkan_staged_transfer_copy_complete(
void* user_data, iree_status_t completion_status) {
iree_hal_vulkan_staged_transfer_chunk_t* chunk =
(iree_hal_vulkan_staged_transfer_chunk_t*)user_data;
iree_hal_vulkan_staged_transfer_t* transfer = chunk->transfer;
iree_status_t status = iree_status_clone(completion_status);
if (iree_status_is_ok(status) &&
transfer->kind == IREE_HAL_VULKAN_STAGED_TRANSFER_WRITE) {
status = iree_hal_buffer_mapping_invalidate_range(
&transfer->ring->mapping, chunk->slot->buffer_offset, chunk->length);
if (iree_status_is_ok(status) &&
transfer->host_kind == IREE_HAL_VULKAN_STAGED_TRANSFER_HOST_MEMORY) {
memcpy(transfer->host_contents.data + transfer->host_offset +
chunk->transfer_offset,
chunk->slot->host_span.data, (iree_host_size_t)chunk->length);
} else if (iree_status_is_ok(status)) {
chunk->file_progress = 0;
status = iree_hal_vulkan_staged_transfer_submit_next_write(chunk);
if (iree_status_is_ok(status)) return;
}
}
if (iree_status_is_ok(status)) {
iree_hal_vulkan_staged_transfer_chunk_finish(chunk,
/*did_transfer_bytes=*/true);
} else {
iree_hal_vulkan_staged_transfer_chunk_fail(chunk, status);
}
}
static iree_status_t iree_hal_vulkan_staged_transfer_submit_copy(
iree_hal_vulkan_staged_transfer_chunk_t* chunk) {
iree_hal_vulkan_staged_transfer_t* transfer = chunk->transfer;
iree_status_t status = iree_ok_status();
iree_hal_buffer_t* source_buffer = NULL;
iree_device_size_t source_offset = 0;
iree_hal_buffer_t* target_buffer = NULL;
iree_device_size_t target_offset = 0;
if (transfer->kind == IREE_HAL_VULKAN_STAGED_TRANSFER_READ) {
if (transfer->host_kind == IREE_HAL_VULKAN_STAGED_TRANSFER_HOST_MEMORY) {
memcpy(chunk->slot->host_span.data,
transfer->host_contents.data + transfer->host_offset +
chunk->transfer_offset,
(iree_host_size_t)chunk->length);
}
status = iree_hal_buffer_mapping_flush_range(
&transfer->ring->mapping, chunk->slot->buffer_offset, chunk->length);
source_buffer = transfer->ring->buffer;
source_offset = chunk->slot->buffer_offset;
target_buffer = transfer->buffer;
target_offset = transfer->buffer_offset + chunk->transfer_offset;
} else {
source_buffer = transfer->buffer;
source_offset = transfer->buffer_offset + chunk->transfer_offset;
target_buffer = transfer->ring->buffer;
target_offset = chunk->slot->buffer_offset;
}
if (!iree_status_is_ok(status)) return status;
return iree_hal_vulkan_queue_submit_copy_with_action(
transfer->queue, iree_hal_semaphore_list_empty(),
iree_hal_semaphore_list_empty(), source_buffer, source_offset,
target_buffer, target_offset, chunk->length, IREE_HAL_COPY_FLAG_NONE,
(iree_hal_vulkan_queue_completion_action_t){
.fn = iree_hal_vulkan_staged_transfer_copy_complete,
.user_data = chunk,
.resource = &transfer->resource,
});
}
static iree_status_t iree_hal_vulkan_staged_transfer_submit_chunk(
iree_hal_vulkan_staged_transfer_chunk_t* chunk) {
iree_hal_vulkan_staged_transfer_t* transfer = chunk->transfer;
chunk->file_progress = 0;
if (transfer->host_kind == IREE_HAL_VULKAN_STAGED_TRANSFER_HOST_ASYNC_FILE &&
transfer->kind == IREE_HAL_VULKAN_STAGED_TRANSFER_READ) {
return iree_hal_vulkan_staged_transfer_submit_next_read(chunk);
}
return iree_hal_vulkan_staged_transfer_submit_copy(chunk);
}
static void iree_hal_vulkan_staged_transfer_pump(
iree_hal_vulkan_staged_transfer_t* transfer) {
for (;;) {
iree_slim_mutex_lock(&transfer->mutex);
const bool can_submit_more =
!transfer->finishing && iree_status_is_ok(transfer->failure_status) &&
transfer->submitted_length < transfer->requested_length;
iree_slim_mutex_unlock(&transfer->mutex);
if (!can_submit_more) return;
iree_hal_vulkan_queue_staging_slot_t* slot = NULL;
if (!iree_hal_vulkan_queue_staging_ring_try_acquire(transfer->ring,
&slot)) {
const bool queued = iree_hal_vulkan_queue_staging_ring_queue_waiter(
transfer->ring, &transfer->slot_waiter,
iree_hal_vulkan_staged_transfer_slot_available, transfer,
&transfer->resource);
if (queued) return;
continue;
}
iree_hal_vulkan_staged_transfer_chunk_t* chunk = NULL;
iree_slim_mutex_lock(&transfer->mutex);
if (!transfer->finishing && iree_status_is_ok(transfer->failure_status) &&
transfer->submitted_length < transfer->requested_length) {
chunk = iree_hal_vulkan_staged_transfer_find_idle_chunk(transfer);
if (chunk) {
const iree_device_size_t remaining_length =
transfer->requested_length - transfer->submitted_length;
chunk->transfer = transfer;
chunk->slot = slot;
chunk->transfer_offset = transfer->submitted_length;
chunk->length = iree_min(transfer->ring->slot_size, remaining_length);
transfer->submitted_length += chunk->length;
++transfer->active_chunk_count;
}
}
iree_slim_mutex_unlock(&transfer->mutex);
if (!chunk) {
iree_hal_vulkan_queue_staging_ring_release(transfer->ring, slot);
return;
}
iree_status_t status = iree_hal_vulkan_staged_transfer_submit_chunk(chunk);
if (!iree_status_is_ok(status)) {
iree_hal_vulkan_staged_transfer_chunk_fail(chunk, status);
return;
}
}
}
static void iree_hal_vulkan_staged_transfer_start(
void* user_data, iree_status_t completion_status) {
iree_hal_vulkan_staged_transfer_t* transfer =
(iree_hal_vulkan_staged_transfer_t*)user_data;
if (!iree_status_is_ok(completion_status)) {
iree_hal_vulkan_staged_transfer_fail_signals(
transfer, iree_status_clone(completion_status));
return;
}
iree_hal_resource_retain(&transfer->resource);
iree_hal_vulkan_staged_transfer_pump(transfer);
iree_hal_vulkan_staged_transfer_try_finish(transfer);
}
static iree_status_t iree_hal_vulkan_staged_transfer_create(
iree_hal_vulkan_queue_t* queue, iree_hal_vulkan_staged_transfer_kind_t kind,
iree_hal_vulkan_staged_transfer_host_kind_t host_kind,
iree_byte_span_t host_contents, iree_host_size_t host_offset,
iree_async_file_t* async_file, uint64_t file_offset,
iree_hal_file_t* lifetime_file, iree_hal_buffer_t* buffer,
iree_device_size_t buffer_offset, iree_device_size_t length,
iree_hal_vulkan_staged_transfer_flags_t flags,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_vulkan_staged_transfer_t** out_transfer) {
*out_transfer = NULL;
const iree_hal_vulkan_staged_transfer_flags_t known_flags =
IREE_HAL_VULKAN_STAGED_TRANSFER_FLAG_CAPTURE_HOST_RANGE;
if (iree_any_bit_set(flags, ~known_flags)) {
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"unsupported Vulkan staged transfer flags: 0x%" PRIx32, flags);
}
const bool captures_host_range = iree_all_bits_set(
flags, IREE_HAL_VULKAN_STAGED_TRANSFER_FLAG_CAPTURE_HOST_RANGE);
if (captures_host_range && kind != IREE_HAL_VULKAN_STAGED_TRANSFER_READ) {
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"Vulkan staged transfer host capture is only valid for uploads");
}
iree_host_size_t host_length = 0;
switch (host_kind) {
case IREE_HAL_VULKAN_STAGED_TRANSFER_HOST_MEMORY:
if (length > IREE_HOST_SIZE_MAX) {
return iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"Vulkan staged transfer length exceeds host addressable size");
}
host_length = (iree_host_size_t)length;
if (host_offset > host_contents.data_length ||
host_length > host_contents.data_length - host_offset) {
return iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"Vulkan staged transfer host range exceeds available contents");
}
if (length != 0 && !host_contents.data) {
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"Vulkan staged transfer host range must be non-null");
}
break;
case IREE_HAL_VULKAN_STAGED_TRANSFER_HOST_ASYNC_FILE:
if (captures_host_range) {
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"Vulkan async file staged transfers cannot capture host ranges");
}
if (!lifetime_file || !async_file) {
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"Vulkan async file staged transfers require an async file handle");
}
break;
default:
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"unsupported Vulkan staged transfer host kind %u",
(uint32_t)host_kind);
}
iree_hal_vulkan_queue_staging_ring_t* ring =
kind == IREE_HAL_VULKAN_STAGED_TRANSFER_READ
? queue->upload_staging_ring
: queue->download_staging_ring;
if (!ring) {
return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
"Vulkan queue staging ring is not initialized");
}
iree_host_size_t chunks_size = 0;
if (!iree_host_size_checked_mul(
ring->slot_count, sizeof(iree_hal_vulkan_staged_transfer_chunk_t),
&chunks_size)) {
return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"Vulkan staged transfer chunk table overflows");
}
iree_host_size_t total_size = 0;
if (!iree_host_size_checked_add(sizeof(iree_hal_vulkan_staged_transfer_t),
chunks_size, &total_size)) {
return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"Vulkan staged transfer allocation overflows");
}
const iree_host_size_t captured_host_length =
captures_host_range ? host_length : 0;
if (!iree_host_size_checked_add(total_size, captured_host_length,
&total_size)) {
return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"Vulkan staged transfer allocation overflows");
}
iree_hal_vulkan_staged_transfer_t* transfer = NULL;
IREE_RETURN_IF_ERROR(iree_allocator_malloc(queue->host_allocator, total_size,
(void**)&transfer));
memset(transfer, 0, total_size);
transfer->chunks = (iree_hal_vulkan_staged_transfer_chunk_t*)(transfer + 1);
uint8_t* captured_host_data = (uint8_t*)transfer->chunks + chunks_size;
if (captures_host_range && captured_host_length != 0) {
memcpy(captured_host_data, host_contents.data + host_offset,
captured_host_length);
host_contents =
iree_make_byte_span(captured_host_data, captured_host_length);
host_offset = 0;
}
iree_hal_resource_initialize(&iree_hal_vulkan_staged_transfer_vtable,
&transfer->resource);
transfer->host_allocator = queue->host_allocator;
iree_slim_mutex_initialize(&transfer->mutex);
transfer->queue = queue;
transfer->ring = ring;
transfer->file = lifetime_file;
iree_hal_file_retain(transfer->file);
transfer->async_file = async_file;
transfer->file_offset = file_offset;
transfer->host_contents = host_contents;
transfer->host_offset = host_offset;
transfer->buffer = buffer;
iree_hal_buffer_retain(transfer->buffer);
transfer->buffer_offset = buffer_offset;
transfer->requested_length = length;
transfer->kind = kind;
transfer->host_kind = host_kind;
transfer->chunk_count = ring->slot_count;
iree_status_t status = iree_hal_semaphore_list_clone(
&signal_semaphore_list, transfer->host_allocator,
&transfer->signal_semaphore_list);
if (iree_status_is_ok(status)) {
*out_transfer = transfer;
} else {
iree_hal_resource_release(&transfer->resource);
}
return status;
}
static iree_status_t iree_hal_vulkan_queue_submit_staged_transfer(
iree_hal_vulkan_queue_t* queue,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_vulkan_staged_transfer_kind_t kind, iree_hal_file_t* file,
uint64_t file_offset, iree_hal_buffer_t* buffer,
iree_device_size_t buffer_offset, iree_device_size_t length) {
iree_hal_vulkan_staged_transfer_host_kind_t host_kind =
IREE_HAL_VULKAN_STAGED_TRANSFER_HOST_MEMORY;
iree_byte_span_t file_contents = iree_byte_span_empty();
iree_host_size_t host_offset = 0;
iree_async_file_t* async_file = NULL;
if (iree_hal_memory_file_isa(file)) {
if (file_offset > IREE_HOST_SIZE_MAX) {
return iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"Vulkan staged transfer host file offset exceeds addressable size");
}
host_offset = (iree_host_size_t)file_offset;
IREE_RETURN_IF_ERROR(iree_hal_memory_file_contents(file, &file_contents));
} else if ((async_file = iree_hal_file_async_handle(file)) != NULL) {
host_kind = IREE_HAL_VULKAN_STAGED_TRANSFER_HOST_ASYNC_FILE;
} else {
return iree_make_status(
IREE_STATUS_UNIMPLEMENTED,
"Vulkan staged transfers require a memory file or proactor-backed "
"async file handle");
}
iree_hal_vulkan_staged_transfer_t* transfer = NULL;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_staged_transfer_create(
queue, kind, host_kind, file_contents, host_offset, async_file,
file_offset, file, buffer, buffer_offset, length,
IREE_HAL_VULKAN_STAGED_TRANSFER_FLAG_NONE, signal_semaphore_list,
&transfer));
iree_status_t status = iree_hal_vulkan_queue_submit_barrier_with_action(
queue, wait_semaphore_list, iree_hal_semaphore_list_empty(),
(iree_hal_vulkan_queue_completion_action_t){
.fn = iree_hal_vulkan_staged_transfer_start,
.user_data = transfer,
.resource = &transfer->resource,
});
if (!iree_status_is_ok(status)) {
iree_hal_vulkan_staged_transfer_fail_signals(transfer,
iree_status_clone(status));
}
iree_hal_resource_release(&transfer->resource);
return status;
}
static iree_status_t iree_hal_vulkan_queue_submit_staged_update(
iree_hal_vulkan_queue_t* queue,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
const void* source_buffer, iree_host_size_t source_offset,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length) {
iree_byte_span_t source_contents = iree_make_byte_span(
(uint8_t*)source_buffer + source_offset, (iree_host_size_t)length);
iree_hal_vulkan_staged_transfer_t* transfer = NULL;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_staged_transfer_create(
queue, IREE_HAL_VULKAN_STAGED_TRANSFER_READ,
IREE_HAL_VULKAN_STAGED_TRANSFER_HOST_MEMORY, source_contents,
/*host_offset=*/0, /*async_file=*/NULL, /*file_offset=*/0,
/*lifetime_file=*/NULL, target_buffer, target_offset, length,
IREE_HAL_VULKAN_STAGED_TRANSFER_FLAG_CAPTURE_HOST_RANGE,
signal_semaphore_list, &transfer));
iree_status_t status = iree_hal_vulkan_queue_submit_barrier_with_action(
queue, wait_semaphore_list, iree_hal_semaphore_list_empty(),
(iree_hal_vulkan_queue_completion_action_t){
.fn = iree_hal_vulkan_staged_transfer_start,
.user_data = transfer,
.resource = &transfer->resource,
});
if (!iree_status_is_ok(status)) {
iree_hal_vulkan_staged_transfer_fail_signals(transfer,
iree_status_clone(status));
}
iree_hal_resource_release(&transfer->resource);
return status;
}
iree_status_t iree_hal_vulkan_queue_submit_update(
iree_hal_vulkan_queue_t* queue,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
const void* source_buffer, iree_host_size_t source_offset,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, iree_hal_update_flags_t flags) {
IREE_ASSERT_ARGUMENT(queue);
IREE_ASSERT_ARGUMENT(target_buffer);
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = iree_ok_status();
if (flags != IREE_HAL_UPDATE_FLAG_NONE) {
status = iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"unsupported Vulkan queue update flags: 0x%" PRIx64, flags);
}
if (iree_status_is_ok(status) && length > IREE_HOST_SIZE_MAX) {
status = iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"Vulkan queue update length exceeds host size");
}
if (iree_status_is_ok(status) &&
source_offset > IREE_HOST_SIZE_MAX - (iree_host_size_t)length) {
status = iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"Vulkan queue update source range exceeds host "
"size");
}
if (iree_status_is_ok(status) && length > 0 && !source_buffer) {
status = iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"queue update source buffer must be non-null");
}
if (iree_status_is_ok(status)) {
status =
iree_hal_buffer_validate_range(target_buffer, target_offset, length);
}
if (iree_status_is_ok(status)) {
status = iree_hal_buffer_validate_memory_type(
iree_hal_buffer_memory_type(target_buffer),
IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE);
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, wait_semaphore_list, IREE_SV("wait"));
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, signal_semaphore_list, IREE_SV("signal"));
}
if (iree_status_is_ok(status) && length != 0) {
status = iree_hal_vulkan_queue_validate_recordable_backing(
target_buffer, IREE_SV("update target"));
}
bool use_native_update = false;
if (iree_status_is_ok(status) && length != 0) {
status = iree_hal_vulkan_queue_can_record_update_native(
target_buffer, target_offset, length, &use_native_update);
}
if (iree_status_is_ok(status) && length != 0 && !use_native_update) {
status = iree_hal_vulkan_queue_submit_staged_update(
queue, wait_semaphore_list, signal_semaphore_list, source_buffer,
source_offset, target_buffer, target_offset, length);
IREE_TRACE_ZONE_END(z0);
return status;
}
iree_hal_vulkan_queue_pending_submission_t* submission = NULL;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_pending_submission_create(
queue, wait_semaphore_list, signal_semaphore_list,
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_UPDATE, (iree_hal_host_call_t){0},
/*args=*/NULL, IREE_HAL_HOST_CALL_FLAG_NONE,
/*payload_storage_length=*/0, /*out_payload_storage=*/NULL,
&submission);
}
if (iree_status_is_ok(status)) {
submission->update.target_buffer = target_buffer;
iree_hal_buffer_retain(target_buffer);
submission->update.target_offset = target_offset;
submission->update.length = length;
submission->update.flags = flags;
if (length > 0) {
status =
iree_allocator_malloc(queue->host_allocator, (iree_host_size_t)length,
&submission->update.source_data);
if (iree_status_is_ok(status)) {
memcpy(submission->update.source_data,
(const uint8_t*)source_buffer + source_offset,
(iree_host_size_t)length);
}
}
}
if (iree_status_is_ok(status) && length != 0) {
status =
iree_hal_vulkan_queue_allocate_native_command_buffer(queue, submission);
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_profile_prepare_native_timestamps(
queue, submission);
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_record_update_native(queue, submission);
}
}
if (iree_status_is_ok(status)) {
status =
iree_hal_vulkan_queue_submit_captured_submission(queue, submission);
submission = NULL;
}
if (submission) {
if (!iree_status_is_ok(status)) {
iree_hal_vulkan_queue_fail_signal_list(submission->signal_semaphore_list,
iree_status_clone(status));
}
iree_hal_vulkan_queue_pending_submission_destroy(queue, submission);
}
IREE_TRACE_ZONE_END(z0);
return status;
}
iree_status_t iree_hal_vulkan_queue_submit_read(
iree_hal_vulkan_queue_t* queue,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_file_t* source_file, uint64_t source_offset,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, iree_hal_read_flags_t flags) {
IREE_ASSERT_ARGUMENT(queue);
IREE_ASSERT_ARGUMENT(source_file);
IREE_ASSERT_ARGUMENT(target_buffer);
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = iree_ok_status();
if (flags != IREE_HAL_READ_FLAG_NONE) {
status = iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"unsupported Vulkan queue read flags: 0x%" PRIx64,
flags);
}
if (iree_status_is_ok(status)) {
status =
iree_hal_file_validate_access(source_file, IREE_HAL_MEMORY_ACCESS_READ);
}
if (iree_status_is_ok(status)) {
status =
iree_hal_buffer_validate_range(target_buffer, target_offset, length);
}
if (iree_status_is_ok(status)) {
status = iree_hal_buffer_validate_memory_type(
iree_hal_buffer_memory_type(target_buffer),
IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE);
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_file_range(
source_file, source_offset, length, IREE_SV("read"));
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, wait_semaphore_list, IREE_SV("wait"));
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, signal_semaphore_list, IREE_SV("signal"));
}
if (iree_status_is_ok(status) && length == 0) {
IREE_TRACE_ZONE_END(z0);
return iree_hal_vulkan_queue_submit_barrier(queue, wait_semaphore_list,
signal_semaphore_list);
}
iree_hal_buffer_t* source_storage_buffer =
iree_status_is_ok(status) ? iree_hal_file_storage_buffer(source_file)
: NULL;
bool source_storage_is_native = false;
if (iree_status_is_ok(status) && source_storage_buffer) {
status = iree_hal_vulkan_queue_buffer_is_native(source_storage_buffer,
&source_storage_is_native);
}
bool target_is_native = false;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_buffer_is_native(target_buffer,
&target_is_native);
}
if (iree_status_is_ok(status) && source_storage_is_native &&
target_is_native) {
if (source_offset > IREE_DEVICE_SIZE_MAX) {
status = iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"read source offset %" PRIu64 " exceeds device size", source_offset);
} else {
IREE_TRACE_ZONE_END(z0);
return iree_hal_vulkan_queue_submit_copy(
queue, wait_semaphore_list, signal_semaphore_list,
source_storage_buffer, (iree_device_size_t)source_offset,
target_buffer, target_offset, length, IREE_HAL_COPY_FLAG_NONE);
}
}
if (iree_status_is_ok(status) && target_is_native &&
iree_hal_vulkan_queue_file_supports_staged_transfer(source_file)) {
IREE_TRACE_ZONE_END(z0);
return iree_hal_vulkan_queue_submit_staged_transfer(
queue, wait_semaphore_list, signal_semaphore_list,
IREE_HAL_VULKAN_STAGED_TRANSFER_READ, source_file, source_offset,
target_buffer, target_offset, length);
}
if (iree_status_is_ok(status)) {
status = iree_make_status(
IREE_STATUS_UNAVAILABLE,
"Vulkan queue read requires native target buffer storage and either "
"native device-visible file storage or staged file transfer support");
}
IREE_TRACE_ZONE_END(z0);
return status;
}
iree_status_t iree_hal_vulkan_queue_submit_write(
iree_hal_vulkan_queue_t* queue,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
iree_hal_file_t* target_file, uint64_t target_offset,
iree_device_size_t length, iree_hal_write_flags_t flags) {
IREE_ASSERT_ARGUMENT(queue);
IREE_ASSERT_ARGUMENT(source_buffer);
IREE_ASSERT_ARGUMENT(target_file);
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = iree_ok_status();
if (flags != IREE_HAL_WRITE_FLAG_NONE) {
status = iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"unsupported Vulkan queue write flags: 0x%" PRIx64, flags);
}
if (iree_status_is_ok(status)) {
status = iree_hal_file_validate_access(target_file,
IREE_HAL_MEMORY_ACCESS_WRITE);
}
if (iree_status_is_ok(status)) {
status =
iree_hal_buffer_validate_range(source_buffer, source_offset, length);
}
if (iree_status_is_ok(status)) {
status = iree_hal_buffer_validate_memory_type(
iree_hal_buffer_memory_type(source_buffer),
IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE);
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_file_range(
target_file, target_offset, length, IREE_SV("write"));
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, wait_semaphore_list, IREE_SV("wait"));
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, signal_semaphore_list, IREE_SV("signal"));
}
if (iree_status_is_ok(status) && length == 0) {
IREE_TRACE_ZONE_END(z0);
return iree_hal_vulkan_queue_submit_barrier(queue, wait_semaphore_list,
signal_semaphore_list);
}
bool source_is_native = false;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_buffer_is_native(source_buffer,
&source_is_native);
}
iree_hal_buffer_t* target_storage_buffer =
iree_status_is_ok(status) ? iree_hal_file_storage_buffer(target_file)
: NULL;
bool target_storage_is_native = false;
if (iree_status_is_ok(status) && target_storage_buffer) {
status = iree_hal_vulkan_queue_buffer_is_native(target_storage_buffer,
&target_storage_is_native);
}
if (iree_status_is_ok(status) && source_is_native &&
target_storage_is_native) {
if (target_offset > IREE_DEVICE_SIZE_MAX) {
status = iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"write target offset %" PRIu64 " exceeds device size", target_offset);
} else {
IREE_TRACE_ZONE_END(z0);
return iree_hal_vulkan_queue_submit_copy(
queue, wait_semaphore_list, signal_semaphore_list, source_buffer,
source_offset, target_storage_buffer,
(iree_device_size_t)target_offset, length, IREE_HAL_COPY_FLAG_NONE);
}
}
if (iree_status_is_ok(status) && source_is_native &&
iree_hal_vulkan_queue_file_supports_staged_transfer(target_file)) {
IREE_TRACE_ZONE_END(z0);
return iree_hal_vulkan_queue_submit_staged_transfer(
queue, wait_semaphore_list, signal_semaphore_list,
IREE_HAL_VULKAN_STAGED_TRANSFER_WRITE, target_file, target_offset,
source_buffer, source_offset, length);
}
if (iree_status_is_ok(status)) {
status = iree_make_status(
IREE_STATUS_UNAVAILABLE,
"Vulkan queue write requires native source buffer storage and either "
"native device-visible file storage or staged file transfer support");
}
IREE_TRACE_ZONE_END(z0);
return status;
}
static iree_status_t iree_hal_vulkan_queue_validate_dispatch_flags(
iree_hal_dispatch_flags_t flags) {
if (iree_hal_dispatch_uses_indirect_arguments(flags) ||
iree_any_bit_set(flags, IREE_HAL_DISPATCH_FLAG_CUSTOM_DIRECT_ARGUMENTS)) {
return iree_make_status(
IREE_STATUS_UNIMPLEMENTED,
"Vulkan queue_dispatch custom dispatch arguments are unsupported");
}
const iree_hal_dispatch_flags_t supported_flags =
IREE_HAL_DISPATCH_FLAG_DYNAMIC_INDIRECT_PARAMETERS |
IREE_HAL_DISPATCH_FLAG_STATIC_INDIRECT_PARAMETERS |
IREE_HAL_DISPATCH_FLAG_ALLOW_INLINE_EXECUTION |
IREE_HAL_DISPATCH_FLAG_BORROW_RESOURCE_LIFETIMES;
if (iree_any_bit_set(flags, ~supported_flags)) {
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"unsupported Vulkan queue_dispatch flags: 0x%" PRIx64, flags);
}
const iree_hal_dispatch_flags_t indirect_parameter_flags =
flags & (IREE_HAL_DISPATCH_FLAG_DYNAMIC_INDIRECT_PARAMETERS |
IREE_HAL_DISPATCH_FLAG_STATIC_INDIRECT_PARAMETERS);
if (indirect_parameter_flags ==
(IREE_HAL_DISPATCH_FLAG_DYNAMIC_INDIRECT_PARAMETERS |
IREE_HAL_DISPATCH_FLAG_STATIC_INDIRECT_PARAMETERS)) {
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue_dispatch cannot use both static and dynamic indirect "
"workgroup parameters");
}
return iree_ok_status();
}
static iree_status_t iree_hal_vulkan_queue_validate_dispatch_shape(
const iree_hal_dispatch_config_t config, iree_hal_dispatch_flags_t flags) {
if (config.workgroup_size[0] != 0 || config.workgroup_size[1] != 0 ||
config.workgroup_size[2] != 0) {
return iree_make_status(
IREE_STATUS_UNIMPLEMENTED,
"Vulkan queue_dispatch workgroup size overrides are unsupported");
}
if (config.dynamic_workgroup_local_memory != 0) {
return iree_make_status(
IREE_STATUS_UNIMPLEMENTED,
"Vulkan queue_dispatch dynamic workgroup local memory is unsupported");
}
(void)flags;
return iree_ok_status();
}
static iree_status_t iree_hal_vulkan_queue_validate_dispatch_storage_usage(
iree_hal_buffer_t* buffer) {
const iree_hal_buffer_usage_t allowed_usage =
iree_hal_buffer_allowed_usage(buffer);
if (iree_any_bit_set(allowed_usage, IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE)) {
return iree_ok_status();
}
#if IREE_STATUS_MODE
iree_bitfield_string_temp_t allowed_temp;
iree_bitfield_string_temp_t required_temp;
iree_string_view_t allowed_usage_string =
iree_hal_buffer_usage_format(allowed_usage, &allowed_temp);
iree_string_view_t required_usage_string = iree_hal_buffer_usage_format(
IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE, &required_temp);
return iree_make_status(
IREE_STATUS_PERMISSION_DENIED,
"requested usage was not specified when the buffer was allocated; buffer "
"allows %.*s, operation requires one of %.*s",
(int)allowed_usage_string.size, allowed_usage_string.data,
(int)required_usage_string.size, required_usage_string.data);
#else
return iree_status_from_code(IREE_STATUS_PERMISSION_DENIED);
#endif // IREE_STATUS_MODE
}
static iree_status_t iree_hal_vulkan_queue_validate_dispatch_binding(
const iree_hal_buffer_ref_t* binding, VkDescriptorType descriptor_type) {
if (binding->reserved != 0 || binding->buffer_slot != 0 || !binding->buffer) {
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue_dispatch bindings must be direct non-null buffer "
"references");
}
IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_memory_type(
iree_hal_buffer_memory_type(binding->buffer),
IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE));
switch (descriptor_type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_usage(
iree_hal_buffer_allowed_usage(binding->buffer),
IREE_HAL_BUFFER_USAGE_DISPATCH_UNIFORM_READ));
IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_access(
iree_hal_buffer_allowed_access(binding->buffer),
IREE_HAL_MEMORY_ACCESS_READ));
break;
}
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_queue_validate_dispatch_storage_usage(
binding->buffer));
break;
}
default:
return iree_make_status(
IREE_STATUS_UNIMPLEMENTED,
"Vulkan queue_dispatch descriptor type %u is unsupported",
(uint32_t)descriptor_type);
}
iree_device_size_t descriptor_offset = 0;
iree_device_size_t descriptor_length = 0;
return iree_hal_buffer_calculate_range(
/*base_offset=*/0, iree_hal_buffer_byte_length(binding->buffer),
binding->offset, binding->length, &descriptor_offset, &descriptor_length);
}
static iree_status_t
iree_hal_vulkan_queue_validate_dispatch_indirect_parameters(
const iree_hal_buffer_ref_t* workgroup_count_ref) {
const iree_device_size_t workgroup_count_length = sizeof(uint32_t[3]);
if (workgroup_count_ref->reserved != 0 ||
workgroup_count_ref->buffer_slot != 0 || !workgroup_count_ref->buffer) {
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue_dispatch indirect workgroup parameters must use a direct "
"non-null buffer reference");
}
if ((workgroup_count_ref->offset % sizeof(uint32_t)) != 0) {
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue_dispatch indirect workgroup parameter offset must be "
"4-byte aligned");
}
if (workgroup_count_ref->length != IREE_HAL_WHOLE_BUFFER &&
workgroup_count_ref->length < workgroup_count_length) {
return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"Vulkan queue_dispatch indirect workgroup "
"parameter buffer must contain "
"at least uint32_t[3]");
}
IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_memory_type(
iree_hal_buffer_memory_type(workgroup_count_ref->buffer),
IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE));
IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_usage(
iree_hal_buffer_allowed_usage(workgroup_count_ref->buffer),
IREE_HAL_BUFFER_USAGE_DISPATCH_INDIRECT_PARAMETERS));
IREE_RETURN_IF_ERROR(iree_hal_buffer_validate_access(
iree_hal_buffer_allowed_access(workgroup_count_ref->buffer),
IREE_HAL_MEMORY_ACCESS_READ));
return iree_hal_buffer_validate_range(workgroup_count_ref->buffer,
workgroup_count_ref->offset,
workgroup_count_length);
}
static iree_status_t iree_hal_vulkan_queue_validate_dispatch_descriptor(
iree_hal_vulkan_queue_t* queue, const iree_hal_vulkan_pipeline_t* pipeline,
const iree_hal_buffer_ref_list_t bindings) {
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = iree_ok_status();
if (!iree_all_bits_set(queue->enabled_dispatch_abis,
IREE_HAL_VULKAN_DISPATCH_ABI_DESCRIPTOR)) {
status = iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan descriptor dispatch ABI is disabled for this queue");
}
if (iree_status_is_ok(status) &&
bindings.count != pipeline->descriptor_binding_count) {
status =
iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue_dispatch provides %" PRIhsz
" bindings but descriptor pipeline expects %" PRIhsz,
bindings.count, pipeline->descriptor_binding_count);
}
for (iree_host_size_t i = 0;
iree_status_is_ok(status) && i < pipeline->descriptor_binding_count;
++i) {
const VkDescriptorType descriptor_type =
pipeline->descriptor_bindings[i].descriptor_type;
status = iree_hal_vulkan_queue_validate_dispatch_binding(
&bindings.values[i], descriptor_type);
if (!iree_status_is_ok(status)) {
status = iree_status_annotate_f(status, "binding[%" PRIhsz "]", i);
}
}
IREE_TRACE_ZONE_END(z0);
return status;
}
static iree_status_t iree_hal_vulkan_queue_validate_dispatch_bda(
iree_hal_vulkan_queue_t* queue, const iree_hal_vulkan_pipeline_t* pipeline,
iree_const_byte_span_t constants,
const iree_hal_buffer_ref_list_t bindings) {
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = iree_ok_status();
if (!iree_all_bits_set(queue->enabled_dispatch_abis,
IREE_HAL_VULKAN_DISPATCH_ABI_BDA)) {
status =
iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
"Vulkan BDA dispatch ABI is disabled for this queue");
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_pipeline_validate_bda_dispatch_abi(
pipeline, constants, bindings.count, IREE_SV("Vulkan queue_dispatch"));
}
for (iree_host_size_t i = 0; iree_status_is_ok(status) && i < bindings.count;
++i) {
status = iree_hal_vulkan_queue_validate_dispatch_binding(
&bindings.values[i], VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
if (!iree_status_is_ok(status)) {
status = iree_status_annotate_f(status, "binding[%" PRIhsz "]", i);
}
}
IREE_TRACE_ZONE_END(z0);
return status;
}
static iree_status_t iree_hal_vulkan_queue_validate_dispatch_abi(
iree_hal_vulkan_queue_t* queue, const iree_hal_vulkan_pipeline_t* pipeline,
iree_const_byte_span_t constants,
const iree_hal_buffer_ref_list_t bindings) {
switch (pipeline->dispatch_abi) {
case IREE_HAL_VULKAN_DISPATCH_ABI_DESCRIPTOR:
return iree_hal_vulkan_queue_validate_dispatch_descriptor(queue, pipeline,
bindings);
case IREE_HAL_VULKAN_DISPATCH_ABI_BDA:
return iree_hal_vulkan_queue_validate_dispatch_bda(queue, pipeline,
constants, bindings);
default:
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"Vulkan pipeline has invalid dispatch ABI 0x%08x",
pipeline->dispatch_abi);
}
}
static iree_status_t iree_hal_vulkan_queue_resolve_dispatch_descriptor_binding(
const iree_hal_vulkan_queue_pending_submission_t* submission,
const iree_hal_vulkan_descriptor_binding_t* descriptor_binding,
iree_host_size_t binding_ordinal, VkDescriptorBufferInfo* out_buffer_info) {
if (descriptor_binding->descriptor_type !=
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER &&
descriptor_binding->descriptor_type !=
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER) {
return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
"Vulkan queue_dispatch binding %" PRIhsz
" uses unsupported descriptor type %u",
binding_ordinal,
(uint32_t)descriptor_binding->descriptor_type);
}
const iree_hal_buffer_ref_t* binding =
&submission->dispatch.bindings[binding_ordinal];
IREE_RETURN_IF_ERROR(iree_hal_vulkan_queue_validate_dispatch_binding(
binding, descriptor_binding->descriptor_type));
iree_device_size_t descriptor_offset = 0;
iree_device_size_t descriptor_length = 0;
IREE_RETURN_IF_ERROR(iree_hal_buffer_calculate_range(
/*base_offset=*/0, iree_hal_buffer_byte_length(binding->buffer),
binding->offset, binding->length, &descriptor_offset,
&descriptor_length));
if (descriptor_length == 0) {
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue_dispatch binding %" PRIhsz
" resolved to an empty buffer range",
binding_ordinal);
}
iree_hal_buffer_t* backing_buffer = NULL;
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_buffer_resolve_backing(binding->buffer, &backing_buffer));
iree_hal_buffer_t* allocated_buffer =
iree_hal_buffer_allocated_buffer(backing_buffer);
if (!iree_hal_vulkan_buffer_isa(allocated_buffer) &&
!iree_hal_vulkan_sparse_buffer_isa(allocated_buffer)) {
return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
"Vulkan queue_dispatch binding %" PRIhsz
" buffer is not backed by the Vulkan HAL",
binding_ordinal);
}
iree_device_size_t backing_offset = 0;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_buffer_resolve_backing_offset(
binding->buffer, backing_buffer, descriptor_offset, &backing_offset));
VkDeviceMemory memory = VK_NULL_HANDLE;
VkBuffer buffer = VK_NULL_HANDLE;
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_buffer_handle(backing_buffer, &memory, &buffer));
(void)memory;
*out_buffer_info = (VkDescriptorBufferInfo){
.buffer = buffer,
.offset = (VkDeviceSize)backing_offset,
.range = (VkDeviceSize)descriptor_length,
};
return iree_ok_status();
}
static iree_status_t iree_hal_vulkan_queue_resolve_dispatch_bda_binding(
const iree_hal_vulkan_queue_pending_submission_t* submission,
const iree_hal_vulkan_pipeline_t* pipeline,
iree_host_size_t binding_ordinal, VkDeviceAddress* out_device_address) {
*out_device_address = 0;
const iree_hal_buffer_ref_t* binding =
&submission->dispatch.bindings[binding_ordinal];
iree_device_size_t binding_offset = 0;
iree_device_size_t binding_length = 0;
IREE_RETURN_IF_ERROR(iree_hal_buffer_calculate_range(
/*base_offset=*/0, iree_hal_buffer_byte_length(binding->buffer),
binding->offset, binding->length, &binding_offset, &binding_length));
if (binding_length == 0) {
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue_dispatch binding %" PRIhsz
" resolved to an empty buffer range",
binding_ordinal);
}
VkDeviceAddress buffer_address = 0;
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_buffer_device_address(binding->buffer, &buffer_address));
if (buffer_address == 0) {
return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
"Vulkan queue_dispatch binding %" PRIhsz
" buffer has no device address",
binding_ordinal);
}
if (binding_offset > UINT64_MAX - buffer_address) {
return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"Vulkan queue_dispatch binding %" PRIhsz
" device address overflows",
binding_ordinal);
}
const VkDeviceAddress device_address = buffer_address + binding_offset;
if (binding_length > UINT64_MAX - device_address) {
return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"Vulkan queue_dispatch binding %" PRIhsz
" device range overflows",
binding_ordinal);
}
if (binding_ordinal < pipeline->bda.binding_requirement_count) {
const iree_hal_vulkan_bda_binding_requirement_t* requirement =
&pipeline->bda.binding_requirements[binding_ordinal];
if (requirement->minimum_alignment > 1 &&
(device_address & (requirement->minimum_alignment - 1)) != 0) {
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue_dispatch binding %" PRIhsz
" device address 0x%" PRIx64
" does not satisfy BDA alignment %u",
binding_ordinal, (uint64_t)device_address,
requirement->minimum_alignment);
}
if (binding_length < requirement->minimum_length) {
return iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"Vulkan queue_dispatch binding %" PRIhsz " has length %" PRIdsz
" but BDA pipeline requires at least %" PRIu64 " bytes",
binding_ordinal, binding_length, requirement->minimum_length);
}
}
*out_device_address = device_address;
return iree_ok_status();
}
static iree_status_t iree_hal_vulkan_queue_resolve_dispatch_indirect_parameters(
const iree_hal_vulkan_queue_pending_submission_t* submission,
VkBuffer* out_handle, VkDeviceSize* out_offset) {
*out_handle = VK_NULL_HANDLE;
*out_offset = 0;
const iree_hal_buffer_ref_t* workgroup_count_ref =
&submission->dispatch.config.workgroup_count_ref;
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_queue_validate_dispatch_indirect_parameters(
workgroup_count_ref));
iree_hal_buffer_t* backing_buffer = NULL;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_buffer_resolve_backing(
workgroup_count_ref->buffer, &backing_buffer));
iree_hal_buffer_t* allocated_buffer =
iree_hal_buffer_allocated_buffer(backing_buffer);
if (!iree_hal_vulkan_buffer_isa(allocated_buffer) &&
!iree_hal_vulkan_sparse_buffer_isa(allocated_buffer)) {
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Vulkan queue_dispatch indirect workgroup parameter buffer is not "
"backed by the Vulkan HAL");
}
VkDeviceMemory memory = VK_NULL_HANDLE;
IREE_RETURN_IF_ERROR(
iree_hal_vulkan_buffer_handle(backing_buffer, &memory, out_handle));
(void)memory;
iree_device_size_t backing_offset = 0;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_buffer_resolve_backing_offset(
workgroup_count_ref->buffer, backing_buffer, workgroup_count_ref->offset,
&backing_offset));
*out_offset = (VkDeviceSize)backing_offset;
return iree_ok_status();
}
static iree_status_t iree_hal_vulkan_queue_record_dispatch_descriptor_native(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission,
const iree_hal_vulkan_pipeline_t* pipeline) {
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = iree_ok_status();
status = iree_hal_vulkan_queue_allocate_native_command_buffer_under_lock(
queue, submission);
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_profile_prepare_native_timestamps_under_lock(
queue, submission);
}
iree_hal_vulkan_command_buffer_descriptor_requirements_t
descriptor_requirements;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_calculate_dispatch_descriptor_requirements(
pipeline, &descriptor_requirements);
}
VkDescriptorSet inline_descriptor_sets
[IREE_HAL_VULKAN_QUEUE_DISPATCH_INLINE_SET_CAPACITY] = {0};
VkDescriptorSet* descriptor_sets = inline_descriptor_sets;
if (iree_status_is_ok(status) && descriptor_requirements.set_count >
IREE_ARRAYSIZE(inline_descriptor_sets)) {
status = iree_allocator_malloc_array(
queue->host_allocator, descriptor_requirements.set_count,
sizeof(descriptor_sets[0]), (void**)&descriptor_sets);
}
VkDescriptorPool descriptor_pool = VK_NULL_HANDLE;
if (iree_status_is_ok(status) && descriptor_requirements.set_count != 0) {
status = iree_hal_vulkan_queue_acquire_native_descriptor_pool_under_lock(
queue, submission, &descriptor_requirements, &descriptor_pool);
}
if (iree_status_is_ok(status) && descriptor_requirements.set_count != 0) {
VkDescriptorSetAllocateInfo allocate_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.descriptorPool = descriptor_pool,
.descriptorSetCount = descriptor_requirements.set_count,
.pSetLayouts = pipeline->descriptor_set_layouts,
};
status = iree_vkAllocateDescriptorSets(IREE_VULKAN_DEVICE(&queue->syms),
queue->logical_device,
&allocate_info, descriptor_sets);
}
VkDescriptorBufferInfo inline_buffer_infos
[IREE_HAL_VULKAN_QUEUE_DISPATCH_INLINE_BINDING_CAPACITY] = {0};
VkDescriptorBufferInfo* buffer_infos = inline_buffer_infos;
VkWriteDescriptorSet inline_write_infos
[IREE_HAL_VULKAN_QUEUE_DISPATCH_INLINE_BINDING_CAPACITY] = {0};
VkWriteDescriptorSet* write_infos = inline_write_infos;
if (iree_status_is_ok(status) && pipeline->descriptor_binding_count >
IREE_ARRAYSIZE(inline_buffer_infos)) {
status = iree_allocator_malloc_array(
queue->host_allocator, pipeline->descriptor_binding_count,
sizeof(buffer_infos[0]), (void**)&buffer_infos);
}
if (iree_status_is_ok(status) &&
pipeline->descriptor_binding_count > IREE_ARRAYSIZE(inline_write_infos)) {
status = iree_allocator_malloc_array(
queue->host_allocator, pipeline->descriptor_binding_count,
sizeof(write_infos[0]), (void**)&write_infos);
}
for (iree_host_size_t i = 0;
iree_status_is_ok(status) && i < pipeline->descriptor_binding_count;
++i) {
const iree_hal_vulkan_descriptor_binding_t* descriptor_binding =
&pipeline->descriptor_bindings[i];
status = iree_hal_vulkan_queue_resolve_dispatch_descriptor_binding(
submission, descriptor_binding, i, &buffer_infos[i]);
if (iree_status_is_ok(status)) {
write_infos[i] = (VkWriteDescriptorSet){
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = pipeline->push_descriptors.enabled
? VK_NULL_HANDLE
: descriptor_sets[descriptor_binding->set_ordinal],
.dstBinding = descriptor_binding->binding,
.dstArrayElement = descriptor_binding->array_element,
.descriptorCount = 1,
.descriptorType = descriptor_binding->descriptor_type,
.pBufferInfo = &buffer_infos[i],
};
}
}
if (iree_status_is_ok(status) && pipeline->descriptor_binding_count != 0 &&
!pipeline->push_descriptors.enabled) {
iree_vkUpdateDescriptorSets(
IREE_VULKAN_DEVICE(&queue->syms), queue->logical_device,
(uint32_t)pipeline->descriptor_binding_count, write_infos,
/*descriptorCopyCount=*/0, /*pDescriptorCopies=*/NULL);
}
VkBuffer indirect_parameters_handle = VK_NULL_HANDLE;
VkDeviceSize indirect_parameters_offset = 0;
if (iree_status_is_ok(status) &&
iree_hal_dispatch_uses_indirect_parameters(submission->dispatch.flags)) {
status = iree_hal_vulkan_queue_resolve_dispatch_indirect_parameters(
submission, &indirect_parameters_handle, &indirect_parameters_offset);
}
if (iree_status_is_ok(status)) {
VkCommandBufferBeginInfo begin_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
status = iree_vkBeginCommandBuffer(IREE_VULKAN_DEVICE(&queue->syms),
submission->native_command_buffer,
&begin_info);
}
if (iree_status_is_ok(status)) {
iree_hal_vulkan_queue_profile_reset_native_timestamps(queue, submission);
iree_hal_vulkan_queue_profile_write_timestamp_begin(queue, submission);
if (pipeline->push_descriptors.enabled &&
pipeline->descriptor_binding_count != 0) {
iree_vkCmdPushDescriptorSetKHR(
IREE_VULKAN_DEVICE(&queue->syms), submission->native_command_buffer,
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->layout,
pipeline->push_descriptors.set_ordinal,
(uint32_t)pipeline->descriptor_binding_count, write_infos);
} else if (descriptor_requirements.set_count != 0) {
iree_vkCmdBindDescriptorSets(
IREE_VULKAN_DEVICE(&queue->syms), submission->native_command_buffer,
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->layout, /*firstSet=*/0,
descriptor_requirements.set_count, descriptor_sets,
/*dynamicOffsetCount=*/0, /*pDynamicOffsets=*/NULL);
}
if (submission->dispatch.constants_data_length != 0) {
iree_vkCmdPushConstants(
IREE_VULKAN_DEVICE(&queue->syms), submission->native_command_buffer,
pipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT, /*offset=*/0,
(uint32_t)submission->dispatch.constants_data_length,
submission->dispatch.constants_data);
}
iree_vkCmdBindPipeline(IREE_VULKAN_DEVICE(&queue->syms),
submission->native_command_buffer,
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->handle);
if (submission->profile.query_pool &&
submission->profile.dispatch_base_query !=
IREE_HAL_VULKAN_PROFILE_QUERY_ABSENT) {
iree_vkCmdWriteTimestamp2(IREE_VULKAN_DEVICE(&queue->syms),
submission->native_command_buffer,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
submission->profile.query_pool,
submission->profile.dispatch_base_query);
}
if (indirect_parameters_handle) {
iree_vkCmdDispatchIndirect(
IREE_VULKAN_DEVICE(&queue->syms), submission->native_command_buffer,
indirect_parameters_handle, indirect_parameters_offset);
} else {
iree_vkCmdDispatch(IREE_VULKAN_DEVICE(&queue->syms),
submission->native_command_buffer,
submission->dispatch.config.workgroup_count[0],
submission->dispatch.config.workgroup_count[1],
submission->dispatch.config.workgroup_count[2]);
}
if (submission->profile.query_pool &&
submission->profile.dispatch_base_query !=
IREE_HAL_VULKAN_PROFILE_QUERY_ABSENT) {
iree_vkCmdWriteTimestamp2(IREE_VULKAN_DEVICE(&queue->syms),
submission->native_command_buffer,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
submission->profile.query_pool,
submission->profile.dispatch_base_query + 1);
}
iree_hal_vulkan_queue_profile_write_timestamp_end(queue, submission);
status = iree_vkEndCommandBuffer(IREE_VULKAN_DEVICE(&queue->syms),
submission->native_command_buffer);
}
if (write_infos != inline_write_infos) {
iree_allocator_free(queue->host_allocator, write_infos);
}
if (buffer_infos != inline_buffer_infos) {
iree_allocator_free(queue->host_allocator, buffer_infos);
}
if (descriptor_sets != inline_descriptor_sets) {
iree_allocator_free(queue->host_allocator, descriptor_sets);
}
IREE_TRACE_ZONE_END(z0);
return status;
}
static iree_status_t iree_hal_vulkan_queue_record_dispatch_bda_native(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission,
const iree_hal_vulkan_pipeline_t* pipeline) {
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = iree_ok_status();
status = iree_hal_vulkan_queue_allocate_native_command_buffer_under_lock(
queue, submission);
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_profile_prepare_native_timestamps_under_lock(
queue, submission);
}
const iree_host_size_t binding_count =
pipeline->bda.binding_count_known ? pipeline->binding_count
: submission->dispatch.binding_count;
iree_device_size_t binding_table_length = 0;
if (!iree_device_size_checked_mul((iree_device_size_t)binding_count,
pipeline->bda.binding_table_entry_length,
&binding_table_length)) {
IREE_TRACE_ZONE_END(z0);
return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"Vulkan BDA binding table length overflows");
}
VkDeviceAddress binding_table_address = 0;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_acquire_bda_publication_under_lock(
queue, submission, binding_table_length, &binding_table_address);
}
const iree_hal_vulkan_command_buffer_bda_publication_t publication =
iree_hal_vulkan_queue_bda_publication_for_lease(
&submission->bda_publication_lease);
if (iree_status_is_ok(status) && binding_count != 0) {
uint64_t* binding_table = (uint64_t*)publication.host_span.data;
for (iree_host_size_t i = 0; iree_status_is_ok(status) && i < binding_count;
++i) {
VkDeviceAddress device_address = 0;
status = iree_hal_vulkan_queue_resolve_dispatch_bda_binding(
submission, pipeline, i, &device_address);
if (iree_status_is_ok(status)) {
binding_table[i] = device_address;
}
}
}
if (iree_status_is_ok(status) && binding_table_length != 0) {
status = iree_hal_vulkan_queue_flush_bda_publication_lease(
&submission->bda_publication_lease);
}
VkBuffer indirect_parameters_handle = VK_NULL_HANDLE;
VkDeviceSize indirect_parameters_offset = 0;
if (iree_status_is_ok(status) &&
iree_hal_dispatch_uses_indirect_parameters(submission->dispatch.flags)) {
status = iree_hal_vulkan_queue_resolve_dispatch_indirect_parameters(
submission, &indirect_parameters_handle, &indirect_parameters_offset);
}
if (iree_status_is_ok(status)) {
VkCommandBufferBeginInfo begin_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
status = iree_vkBeginCommandBuffer(IREE_VULKAN_DEVICE(&queue->syms),
submission->native_command_buffer,
&begin_info);
}
if (iree_status_is_ok(status)) {
iree_hal_vulkan_queue_profile_reset_native_timestamps(queue, submission);
if (binding_table_length != 0) {
VkMemoryBarrier2 memory_barrier = {
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
.srcStageMask = VK_PIPELINE_STAGE_2_HOST_BIT,
.srcAccessMask = VK_ACCESS_2_HOST_WRITE_BIT,
.dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
.dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT,
};
VkDependencyInfo dependency_info = {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.memoryBarrierCount = 1,
.pMemoryBarriers = &memory_barrier,
};
iree_vkCmdPipelineBarrier2(IREE_VULKAN_DEVICE(&queue->syms),
submission->native_command_buffer,
&dependency_info);
}
iree_hal_vulkan_queue_profile_write_timestamp_begin(queue, submission);
iree_hal_vulkan_bda_dispatch_root_v1_t root = {
.binding_table_address = binding_table_address,
.constants_address = 0,
.binding_base = 0,
.constant_base = 0,
.flags = 0,
.reserved0 = 0,
};
iree_vkCmdPushConstants(
IREE_VULKAN_DEVICE(&queue->syms), submission->native_command_buffer,
pipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT,
pipeline->bda.root_push_constant_offset, sizeof(root), &root);
if (submission->dispatch.constants_data_length != 0) {
iree_vkCmdPushConstants(
IREE_VULKAN_DEVICE(&queue->syms), submission->native_command_buffer,
pipeline->layout, VK_SHADER_STAGE_COMPUTE_BIT,
pipeline->bda.constant_push_constant_offset,
(uint32_t)submission->dispatch.constants_data_length,
submission->dispatch.constants_data);
}
iree_vkCmdBindPipeline(IREE_VULKAN_DEVICE(&queue->syms),
submission->native_command_buffer,
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->handle);
if (submission->profile.query_pool &&
submission->profile.dispatch_base_query !=
IREE_HAL_VULKAN_PROFILE_QUERY_ABSENT) {
iree_vkCmdWriteTimestamp2(IREE_VULKAN_DEVICE(&queue->syms),
submission->native_command_buffer,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
submission->profile.query_pool,
submission->profile.dispatch_base_query);
}
if (indirect_parameters_handle) {
iree_vkCmdDispatchIndirect(
IREE_VULKAN_DEVICE(&queue->syms), submission->native_command_buffer,
indirect_parameters_handle, indirect_parameters_offset);
} else {
iree_vkCmdDispatch(IREE_VULKAN_DEVICE(&queue->syms),
submission->native_command_buffer,
submission->dispatch.config.workgroup_count[0],
submission->dispatch.config.workgroup_count[1],
submission->dispatch.config.workgroup_count[2]);
}
if (submission->profile.query_pool &&
submission->profile.dispatch_base_query !=
IREE_HAL_VULKAN_PROFILE_QUERY_ABSENT) {
iree_vkCmdWriteTimestamp2(IREE_VULKAN_DEVICE(&queue->syms),
submission->native_command_buffer,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
submission->profile.query_pool,
submission->profile.dispatch_base_query + 1);
}
iree_hal_vulkan_queue_profile_write_timestamp_end(queue, submission);
status = iree_vkEndCommandBuffer(IREE_VULKAN_DEVICE(&queue->syms),
submission->native_command_buffer);
}
IREE_TRACE_ZONE_END(z0);
return status;
}
static iree_status_t iree_hal_vulkan_queue_record_dispatch_native(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
const iree_hal_vulkan_pipeline_t* pipeline = NULL;
IREE_RETURN_IF_ERROR(iree_hal_vulkan_executable_lookup_pipeline(
submission->dispatch.executable, submission->dispatch.export_ordinal,
&pipeline));
switch (pipeline->dispatch_abi) {
case IREE_HAL_VULKAN_DISPATCH_ABI_DESCRIPTOR:
return iree_hal_vulkan_queue_record_dispatch_descriptor_native(
queue, submission, pipeline);
case IREE_HAL_VULKAN_DISPATCH_ABI_BDA:
return iree_hal_vulkan_queue_record_dispatch_bda_native(queue, submission,
pipeline);
default:
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"Vulkan pipeline has invalid dispatch ABI 0x%08x",
pipeline->dispatch_abi);
}
}
static iree_status_t iree_hal_vulkan_queue_calculate_execute_payload_layout(
iree_host_size_t binding_table_count,
iree_host_size_t bda_binding_slot_count,
iree_host_size_t* out_payload_storage_length,
iree_host_size_t* out_binding_table_offset,
iree_host_size_t* out_bda_binding_slots_offset) {
return IREE_STRUCT_LAYOUT(
0, out_payload_storage_length,
IREE_STRUCT_FIELD_ALIGNED(binding_table_count, iree_hal_buffer_binding_t,
iree_alignof(iree_hal_buffer_binding_t),
out_binding_table_offset),
IREE_STRUCT_FIELD_ALIGNED(
bda_binding_slot_count,
iree_hal_vulkan_command_buffer_bda_binding_slot_t,
iree_alignof(iree_hal_vulkan_command_buffer_bda_binding_slot_t),
out_bda_binding_slots_offset));
}
static iree_status_t iree_hal_vulkan_queue_calculate_dispatch_payload_layout(
iree_const_byte_span_t constants, iree_host_size_t binding_count,
iree_host_size_t* out_payload_storage_length,
iree_host_size_t* out_constants_data_offset,
iree_host_size_t* out_bindings_offset) {
return IREE_STRUCT_LAYOUT(
0, out_payload_storage_length,
IREE_STRUCT_FIELD(constants.data_length, uint8_t,
out_constants_data_offset),
IREE_STRUCT_FIELD_ALIGNED(binding_count, iree_hal_buffer_ref_t,
iree_alignof(iree_hal_buffer_ref_t),
out_bindings_offset));
}
iree_status_t iree_hal_vulkan_queue_submit_dispatch(
iree_hal_vulkan_queue_t* queue,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_executable_t* executable,
iree_hal_executable_export_ordinal_t export_ordinal,
const iree_hal_dispatch_config_t config, iree_const_byte_span_t constants,
const iree_hal_buffer_ref_list_t bindings,
iree_hal_dispatch_flags_t flags) {
IREE_ASSERT_ARGUMENT(queue);
IREE_ASSERT_ARGUMENT(executable);
IREE_TRACE_ZONE_BEGIN(z0);
const iree_hal_vulkan_pipeline_t* pipeline = NULL;
iree_status_t status = iree_hal_vulkan_queue_validate_dispatch_flags(flags);
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_dispatch_shape(config, flags);
}
if (iree_status_is_ok(status) &&
constants.data_length % sizeof(uint32_t) != 0) {
status = iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue_dispatch constants must be 4-byte aligned");
}
if (iree_status_is_ok(status) && constants.data_length != 0 &&
!constants.data) {
status = iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue_dispatch constant data must be non-null when length is "
"non-zero");
}
if (iree_status_is_ok(status) && constants.data_length > UINT32_MAX) {
status = iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"Vulkan queue_dispatch constants exceed Vulkan limit %u", UINT32_MAX);
}
if (iree_status_is_ok(status) && bindings.count != 0 && !bindings.values) {
status = iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue_dispatch binding storage is NULL");
}
if (iree_status_is_ok(status) &&
!iree_hal_vulkan_executable_isa(executable)) {
status = iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue_dispatch executable is not a Vulkan executable");
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_executable_lookup_pipeline(
executable, export_ordinal, &pipeline);
}
if (iree_status_is_ok(status) &&
constants.data_length >
(iree_host_size_t)pipeline->constant_count * sizeof(uint32_t)) {
status = iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"Vulkan queue_dispatch provides %" PRIhsz
" constant bytes but pipeline accepts at most %u",
constants.data_length,
(uint32_t)pipeline->constant_count * (uint32_t)sizeof(uint32_t));
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_dispatch_abi(queue, pipeline,
constants, bindings);
}
if (iree_status_is_ok(status) &&
iree_hal_dispatch_uses_indirect_parameters(flags)) {
status = iree_hal_vulkan_queue_validate_dispatch_indirect_parameters(
&config.workgroup_count_ref);
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, wait_semaphore_list, IREE_SV("wait"));
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, signal_semaphore_list, IREE_SV("signal"));
}
iree_host_size_t dispatch_payload_storage_length = 0;
iree_host_size_t dispatch_constants_data_offset = 0;
iree_host_size_t dispatch_bindings_offset = 0;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_calculate_dispatch_payload_layout(
constants, bindings.count, &dispatch_payload_storage_length,
&dispatch_constants_data_offset, &dispatch_bindings_offset);
}
iree_byte_span_t dispatch_payload_storage = iree_byte_span_empty();
iree_hal_vulkan_queue_pending_submission_t* submission = NULL;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_pending_submission_create(
queue, wait_semaphore_list, signal_semaphore_list,
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_DISPATCH,
(iree_hal_host_call_t){0}, /*args=*/NULL, IREE_HAL_HOST_CALL_FLAG_NONE,
dispatch_payload_storage_length, &dispatch_payload_storage,
&submission);
}
if (iree_status_is_ok(status)) {
submission->dispatch.executable = executable;
iree_hal_executable_retain(executable);
submission->dispatch.export_ordinal = export_ordinal;
submission->dispatch.config = config;
submission->dispatch.constants_data_length = constants.data_length;
submission->dispatch.binding_count = bindings.count;
submission->dispatch.flags = flags;
}
if (iree_status_is_ok(status) &&
iree_hal_dispatch_uses_indirect_parameters(flags)) {
iree_hal_buffer_retain(
submission->dispatch.config.workgroup_count_ref.buffer);
}
if (iree_status_is_ok(status) && constants.data_length != 0) {
submission->dispatch.constants_data =
dispatch_payload_storage.data + dispatch_constants_data_offset;
memcpy(submission->dispatch.constants_data, constants.data,
constants.data_length);
}
if (iree_status_is_ok(status) && bindings.count != 0) {
submission->dispatch.bindings =
(iree_hal_buffer_ref_t*)(dispatch_payload_storage.data +
dispatch_bindings_offset);
memcpy(submission->dispatch.bindings, bindings.values,
bindings.count * sizeof(submission->dispatch.bindings[0]));
for (iree_host_size_t i = 0; i < bindings.count; ++i) {
iree_hal_buffer_retain(submission->dispatch.bindings[i].buffer);
}
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_record_dispatch_profile_metadata(submission);
}
if (iree_status_is_ok(status)) {
submission->record_native_submission =
iree_hal_vulkan_queue_record_dispatch_native;
}
if (iree_status_is_ok(status)) {
status =
iree_hal_vulkan_queue_submit_captured_submission(queue, submission);
submission = NULL;
}
if (submission) {
if (!iree_status_is_ok(status)) {
iree_hal_vulkan_queue_fail_signal_list(submission->signal_semaphore_list,
iree_status_clone(status));
}
iree_hal_vulkan_queue_pending_submission_destroy(queue, submission);
}
IREE_TRACE_ZONE_END(z0);
return status;
}
static iree_status_t iree_hal_vulkan_queue_record_execute_native(
iree_hal_vulkan_queue_t* queue,
iree_hal_vulkan_queue_pending_submission_t* submission) {
if (!iree_hal_vulkan_command_buffer_has_native_commands(
submission->execute.command_buffer)) {
return iree_ok_status();
}
iree_hal_vulkan_command_buffer_descriptor_requirements_t
descriptor_requirements;
iree_status_t status =
iree_hal_vulkan_command_buffer_native_descriptor_pool_requirements(
submission->execute.command_buffer, &descriptor_requirements);
iree_device_size_t bda_publication_length = 0;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_command_buffer_native_bda_publication_length(
submission->execute.command_buffer, &bda_publication_length);
}
const bool has_descriptor_dispatches =
iree_hal_vulkan_command_buffer_has_descriptor_dispatches(
submission->execute.command_buffer);
bool acquired_native_replay = false;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_acquire_native_replay_under_lock(
queue, submission, descriptor_requirements, has_descriptor_dispatches,
bda_publication_length, &acquired_native_replay);
}
if (!iree_status_is_ok(status) || acquired_native_replay) {
return status;
}
status = iree_hal_vulkan_queue_allocate_native_command_buffer_under_lock(
queue, submission);
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_profile_prepare_native_timestamps_under_lock(
queue, submission);
}
VkDescriptorPool descriptor_pool = VK_NULL_HANDLE;
if (iree_status_is_ok(status) && descriptor_requirements.set_count != 0) {
status = iree_hal_vulkan_queue_acquire_native_descriptor_pool_under_lock(
queue, submission, &descriptor_requirements, &descriptor_pool);
}
iree_hal_vulkan_command_buffer_bda_publication_t bda_publication = {0};
const iree_hal_vulkan_command_buffer_bda_publication_t* bda_publication_ptr =
NULL;
if (iree_status_is_ok(status) && bda_publication_length != 0) {
status = iree_hal_vulkan_queue_acquire_bda_publication_under_lock(
queue, submission, bda_publication_length,
&bda_publication.device_address);
}
if (iree_status_is_ok(status) && bda_publication_length != 0) {
bda_publication = iree_hal_vulkan_queue_bda_publication_for_lease(
&submission->bda_publication_lease);
bda_publication_ptr = &bda_publication;
}
if (iree_status_is_ok(status)) {
iree_hal_buffer_binding_table_t binding_table = {
.count = submission->execute.binding_table_count,
.bindings = submission->execute.binding_table_bindings,
};
iree_hal_vulkan_command_buffer_bda_binding_cache_t bda_binding_cache =
iree_hal_vulkan_queue_execute_bda_binding_cache(submission);
iree_hal_vulkan_command_buffer_profile_marker_t profile_marker = {
.query_pool = submission->profile.query_pool,
.first_query = submission->profile.first_query,
.query_count = submission->profile.query_count,
.queue_start_query = submission->profile.queue_start_query,
.queue_end_query = submission->profile.queue_end_query,
.dispatch_base_query = submission->profile.dispatch_base_query,
.dispatch_query_count = submission->profile.dispatch_query_count,
.recorder = submission->profile.recorder,
.scope = submission->profile.scope,
.command_buffer_id =
iree_hal_vulkan_queue_profile_command_buffer_id(submission),
};
status = iree_hal_vulkan_command_buffer_record_native(
submission->execute.command_buffer, &queue->syms, queue->logical_device,
queue->builtins, submission->native_command_buffer,
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, descriptor_pool,
binding_table, bda_publication_ptr, &bda_binding_cache,
profile_marker.query_pool ? &profile_marker : NULL,
queue->host_allocator);
}
if (iree_status_is_ok(status) && bda_publication_length != 0) {
status = iree_hal_vulkan_queue_flush_bda_publication_lease(
&submission->bda_publication_lease);
}
return status;
}
iree_status_t iree_hal_vulkan_queue_submit_execute(
iree_hal_vulkan_queue_t* queue,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_command_buffer_t* command_buffer,
iree_hal_buffer_binding_table_t binding_table,
iree_hal_execute_flags_t flags,
iree_hal_profile_queue_event_type_t queue_event_type) {
IREE_ASSERT_ARGUMENT(queue);
IREE_ASSERT_ARGUMENT(command_buffer);
IREE_TRACE_ZONE_BEGIN(z0);
const iree_hal_execute_flags_t known_flags =
IREE_HAL_EXECUTE_FLAG_BORROW_BINDING_TABLE_LIFETIME;
iree_status_t status = iree_ok_status();
if (iree_any_bit_set(flags, ~known_flags)) {
status = iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"unsupported Vulkan queue execute flags: 0x%" PRIx64, flags);
}
if (iree_status_is_ok(status) &&
queue_event_type == IREE_HAL_PROFILE_QUEUE_EVENT_TYPE_NONE) {
status = iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"Vulkan queue execute requires a concrete profile event type");
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, wait_semaphore_list, IREE_SV("wait"));
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, signal_semaphore_list, IREE_SV("signal"));
}
if (iree_status_is_ok(status) &&
binding_table.count < command_buffer->binding_count) {
status = iree_make_status(
IREE_STATUS_OUT_OF_RANGE,
"indirect Vulkan command buffer requires at least %u bindings but only "
"%" PRIhsz " were provided",
command_buffer->binding_count, binding_table.count);
}
if (iree_status_is_ok(status) && command_buffer->binding_count != 0 &&
!binding_table.bindings) {
status = iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"indirect Vulkan command buffer binding table storage is NULL for %u "
"bindings",
command_buffer->binding_count);
}
iree_device_size_t execute_bda_publication_length = 0;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_command_buffer_native_bda_publication_length(
command_buffer, &execute_bda_publication_length);
}
const iree_host_size_t execute_bda_binding_slot_count =
execute_bda_publication_length != 0 ? command_buffer->binding_count : 0;
iree_host_size_t execute_payload_storage_length = 0;
iree_host_size_t execute_binding_table_offset = 0;
iree_host_size_t execute_bda_binding_slots_offset = 0;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_calculate_execute_payload_layout(
command_buffer->binding_count, execute_bda_binding_slot_count,
&execute_payload_storage_length, &execute_binding_table_offset,
&execute_bda_binding_slots_offset);
}
iree_byte_span_t execute_payload_storage = iree_byte_span_empty();
iree_hal_vulkan_queue_pending_submission_t* submission = NULL;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_pending_submission_create(
queue, wait_semaphore_list, signal_semaphore_list,
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_EXECUTE,
(iree_hal_host_call_t){0}, /*args=*/NULL, IREE_HAL_HOST_CALL_FLAG_NONE,
execute_payload_storage_length, &execute_payload_storage, &submission);
}
if (iree_status_is_ok(status)) {
submission->execute.command_buffer = command_buffer;
iree_hal_command_buffer_retain(command_buffer);
submission->execute.flags = flags;
submission->profile.type = queue_event_type;
submission->execute.binding_table_count = command_buffer->binding_count;
if (command_buffer->binding_count != 0) {
submission->execute.binding_table_bindings =
(iree_hal_buffer_binding_t*)(execute_payload_storage.data +
execute_binding_table_offset);
memcpy(submission->execute.binding_table_bindings, binding_table.bindings,
command_buffer->binding_count * sizeof(binding_table.bindings[0]));
if (!iree_any_bit_set(
flags, IREE_HAL_EXECUTE_FLAG_BORROW_BINDING_TABLE_LIFETIME)) {
for (iree_host_size_t i = 0; i < command_buffer->binding_count; ++i) {
iree_hal_buffer_retain(
submission->execute.binding_table_bindings[i].buffer);
}
}
}
submission->execute.bda_binding_slot_count = execute_bda_binding_slot_count;
if (execute_bda_binding_slot_count != 0) {
uint8_t* const bda_binding_slots_storage =
execute_payload_storage.data + execute_bda_binding_slots_offset;
submission->execute.bda_binding_slots =
(iree_hal_vulkan_command_buffer_bda_binding_slot_t*)
bda_binding_slots_storage;
}
}
if (iree_status_is_ok(status)) {
const uint64_t command_buffer_id =
iree_hal_vulkan_queue_profile_command_buffer_id(submission);
status = iree_hal_vulkan_command_buffer_record_profile_metadata(
command_buffer, queue->profile_recorder, submission->profile.scope,
command_buffer_id);
}
if (iree_status_is_ok(status) &&
iree_hal_vulkan_command_buffer_has_native_commands(command_buffer)) {
submission->record_native_submission =
iree_hal_vulkan_queue_record_execute_native;
}
if (iree_status_is_ok(status)) {
status =
iree_hal_vulkan_queue_submit_captured_submission(queue, submission);
submission = NULL;
}
if (submission) {
if (!iree_status_is_ok(status)) {
iree_hal_vulkan_queue_fail_signal_list(submission->signal_semaphore_list,
iree_status_clone(status));
}
iree_hal_vulkan_queue_pending_submission_destroy(queue, submission);
}
IREE_TRACE_ZONE_END(z0);
return status;
}
iree_status_t iree_hal_vulkan_queue_submit_host_call(
iree_hal_vulkan_queue_t* queue,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_host_call_t call, const uint64_t args[4],
iree_hal_host_call_flags_t flags) {
IREE_ASSERT_ARGUMENT(queue);
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status =
iree_hal_vulkan_queue_validate_host_call(call, args, flags);
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, wait_semaphore_list, IREE_SV("wait"));
}
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_validate_semaphore_list(
queue, signal_semaphore_list, IREE_SV("signal"));
}
if (iree_status_is_ok(status) && wait_semaphore_list.count > UINT32_MAX) {
status = iree_make_status(IREE_STATUS_OUT_OF_RANGE,
"too many Vulkan queue wait semaphores");
}
iree_hal_vulkan_queue_pending_submission_t* submission = NULL;
if (iree_status_is_ok(status)) {
status = iree_hal_vulkan_queue_pending_submission_create(
queue, wait_semaphore_list, signal_semaphore_list,
IREE_HAL_VULKAN_QUEUE_SUBMISSION_KIND_HOST_CALL, call, args, flags,
/*payload_storage_length=*/0, /*out_payload_storage=*/NULL,
&submission);
}
if (iree_status_is_ok(status)) {
status =
iree_hal_vulkan_queue_submit_captured_submission(queue, submission);
}
IREE_TRACE_ZONE_END(z0);
return status;
}
iree_host_size_t iree_hal_vulkan_queue_drain_completions(
iree_hal_vulkan_queue_t* queue) {
IREE_ASSERT_ARGUMENT(queue);
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t failure_status = (iree_status_t)iree_atomic_load(
&queue->failure_status, iree_memory_order_acquire);
if (!iree_status_is_ok(failure_status)) {
iree_hal_vulkan_queue_fail_pending_submissions(
queue, iree_status_clone(failure_status));
IREE_TRACE_ZONE_END(z0);
return 0;
}
uint64_t completed_epoch = 0;
VkResult result = iree_vkGetSemaphoreCounterValue_raw(
&queue->syms, queue->logical_device, queue->epoch_semaphore,
&completed_epoch);
if (result != VK_SUCCESS) {
iree_status_t status = iree_status_from_vk_result(
__FILE__, __LINE__, result, "vkGetSemaphoreCounterValue");
iree_status_t stored_status =
iree_hal_vulkan_queue_store_error(queue, status);
iree_hal_vulkan_queue_fail_pending_submissions(queue, stored_status);
IREE_TRACE_ZONE_END(z0);
return 0;
}
iree_hal_vulkan_queue_pending_submission_t* completed_head = NULL;
iree_slim_mutex_lock(&queue->submission_mutex);
completed_head =
iree_hal_vulkan_queue_pop_completed_submissions(queue, completed_epoch);
iree_slim_mutex_unlock(&queue->submission_mutex);
iree_host_size_t completed_count = 0;
while (completed_head) {
iree_hal_vulkan_queue_pending_submission_t* next = completed_head->next;
completed_head->next = NULL;
iree_hal_vulkan_queue_complete_submission(queue, completed_head,
iree_ok_status());
iree_hal_vulkan_queue_pending_submission_destroy(queue, completed_head);
completed_head = next;
++completed_count;
}
IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)completed_count);
IREE_TRACE_ZONE_END(z0);
return completed_count;
}