blob: 7ae9abb4c16968d9fdb5f084613106bf284edb80 [file] [log] [blame]
// Copyright 2020 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include "iree/hal/device.h"
#include "iree/hal/allocator.h"
#include "iree/hal/buffer.h"
#include "iree/hal/command_buffer.h"
#include "iree/hal/detail.h"
#include "iree/hal/resource.h"
//===----------------------------------------------------------------------===//
// iree_hal_device_t
//===----------------------------------------------------------------------===//
#define _VTABLE_DISPATCH(device, method_name) \
IREE_HAL_VTABLE_DISPATCH(device, iree_hal_device, method_name)
IREE_HAL_API_RETAIN_RELEASE(device);
IREE_API_EXPORT iree_string_view_t
iree_hal_device_id(iree_hal_device_t* device) {
IREE_ASSERT_ARGUMENT(device);
return _VTABLE_DISPATCH(device, id)(device);
}
IREE_API_EXPORT iree_allocator_t
iree_hal_device_host_allocator(iree_hal_device_t* device) {
IREE_ASSERT_ARGUMENT(device);
return _VTABLE_DISPATCH(device, host_allocator)(device);
}
IREE_API_EXPORT iree_hal_allocator_t* iree_hal_device_allocator(
iree_hal_device_t* device) {
IREE_ASSERT_ARGUMENT(device);
return _VTABLE_DISPATCH(device, device_allocator)(device);
}
IREE_API_EXPORT void iree_hal_device_replace_allocator(
iree_hal_device_t* device, iree_hal_allocator_t* new_allocator) {
IREE_ASSERT_ARGUMENT(device);
_VTABLE_DISPATCH(device, replace_device_allocator)(device, new_allocator);
}
IREE_API_EXPORT void iree_hal_device_replace_channel_provider(
iree_hal_device_t* device, iree_hal_channel_provider_t* new_provider) {
IREE_ASSERT_ARGUMENT(device);
_VTABLE_DISPATCH(device, replace_channel_provider)(device, new_provider);
}
IREE_API_EXPORT
iree_status_t iree_hal_device_trim(iree_hal_device_t* device) {
IREE_ASSERT_ARGUMENT(device);
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = _VTABLE_DISPATCH(device, trim)(device);
IREE_TRACE_ZONE_END(z0);
return status;
}
IREE_API_EXPORT iree_status_t iree_hal_device_query_i64(
iree_hal_device_t* device, iree_string_view_t category,
iree_string_view_t key, int64_t* out_value) {
IREE_ASSERT_ARGUMENT(device);
IREE_ASSERT_ARGUMENT(out_value);
return _VTABLE_DISPATCH(device, query_i64)(device, category, key, out_value);
}
IREE_API_EXPORT iree_hal_semaphore_compatibility_t
iree_hal_device_query_semaphore_compatibility(iree_hal_device_t* device,
iree_hal_semaphore_t* semaphore) {
IREE_ASSERT_ARGUMENT(device);
IREE_ASSERT_ARGUMENT(semaphore);
return _VTABLE_DISPATCH(device, query_semaphore_compatibility)(device,
semaphore);
}
IREE_API_EXPORT iree_status_t iree_hal_device_queue_alloca(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_allocator_pool_t pool, iree_hal_buffer_params_t params,
iree_device_size_t allocation_size,
iree_hal_buffer_t** IREE_RESTRICT out_buffer) {
IREE_ASSERT_ARGUMENT(device);
IREE_ASSERT_ARGUMENT(
!wait_semaphore_list.count ||
(wait_semaphore_list.semaphores && wait_semaphore_list.payload_values));
IREE_ASSERT_ARGUMENT(!signal_semaphore_list.count ||
(signal_semaphore_list.semaphores &&
signal_semaphore_list.payload_values));
IREE_ASSERT_ARGUMENT(out_buffer);
*out_buffer = NULL;
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = _VTABLE_DISPATCH(device, queue_alloca)(
device, queue_affinity, wait_semaphore_list, signal_semaphore_list, pool,
params, allocation_size, out_buffer);
IREE_TRACE_ZONE_END(z0);
return status;
}
IREE_API_EXPORT iree_status_t iree_hal_device_queue_dealloca(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_buffer_t* buffer) {
IREE_ASSERT_ARGUMENT(device);
IREE_ASSERT_ARGUMENT(
!wait_semaphore_list.count ||
(wait_semaphore_list.semaphores && wait_semaphore_list.payload_values));
IREE_ASSERT_ARGUMENT(!signal_semaphore_list.count ||
(signal_semaphore_list.semaphores &&
signal_semaphore_list.payload_values));
IREE_ASSERT_ARGUMENT(buffer);
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = _VTABLE_DISPATCH(device, queue_dealloca)(
device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
buffer);
IREE_TRACE_ZONE_END(z0);
return status;
}
IREE_API_EXPORT iree_status_t iree_hal_device_queue_fill(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, const void* pattern,
iree_host_size_t pattern_length) {
IREE_ASSERT_ARGUMENT(device);
IREE_ASSERT_ARGUMENT(target_buffer);
IREE_ASSERT_ARGUMENT(pattern);
IREE_TRACE_ZONE_BEGIN(z0);
IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)length);
// If we are starting execution immediately then we can reduce latency by
// allowing inline command buffer execution.
iree_hal_command_buffer_mode_t command_buffer_mode =
IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT;
if (wait_semaphore_list.count == 0) {
command_buffer_mode |= IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION;
}
iree_hal_transfer_command_t command = {
.type = IREE_HAL_TRANSFER_COMMAND_TYPE_FILL,
.fill =
{
.target_buffer = target_buffer,
.target_offset = target_offset,
.length = length,
.pattern = pattern,
.pattern_length = pattern_length,
},
};
iree_hal_command_buffer_t* command_buffer = NULL;
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_hal_create_transfer_command_buffer(device, command_buffer_mode,
queue_affinity, 1, &command,
&command_buffer));
iree_status_t status = iree_hal_device_queue_execute(
device, queue_affinity, wait_semaphore_list, signal_semaphore_list, 1,
&command_buffer, /*binding_tables=*/NULL);
iree_hal_command_buffer_release(command_buffer);
IREE_TRACE_ZONE_END(z0);
return status;
}
IREE_API_EXPORT iree_status_t iree_hal_device_queue_copy(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length) {
IREE_ASSERT_ARGUMENT(device);
IREE_ASSERT_ARGUMENT(source_buffer);
IREE_ASSERT_ARGUMENT(target_buffer);
IREE_TRACE_ZONE_BEGIN(z0);
IREE_TRACE_ZONE_APPEND_VALUE_I64(z0, (int64_t)length);
// If we are starting execution immediately then we can reduce latency by
// allowing inline command buffer execution.
iree_hal_command_buffer_mode_t command_buffer_mode =
IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT;
if (wait_semaphore_list.count == 0) {
command_buffer_mode |= IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION;
}
iree_hal_transfer_command_t command = {
.type = IREE_HAL_TRANSFER_COMMAND_TYPE_COPY,
.copy =
{
.source_buffer = source_buffer,
.source_offset = source_offset,
.target_buffer = target_buffer,
.target_offset = target_offset,
.length = length,
},
};
iree_hal_command_buffer_t* command_buffer = NULL;
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_hal_create_transfer_command_buffer(device, command_buffer_mode,
queue_affinity, 1, &command,
&command_buffer));
iree_status_t status = iree_hal_device_queue_execute(
device, queue_affinity, wait_semaphore_list, signal_semaphore_list, 1,
&command_buffer, /*binding_tables=*/NULL);
iree_hal_command_buffer_release(command_buffer);
IREE_TRACE_ZONE_END(z0);
return status;
}
IREE_API_EXPORT iree_status_t iree_hal_device_queue_read(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_file_t* source_file, uint64_t source_offset,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, uint32_t flags) {
IREE_ASSERT_ARGUMENT(device);
IREE_ASSERT_ARGUMENT(
!wait_semaphore_list.count ||
(wait_semaphore_list.semaphores && wait_semaphore_list.payload_values));
IREE_ASSERT_ARGUMENT(!signal_semaphore_list.count ||
(signal_semaphore_list.semaphores &&
signal_semaphore_list.payload_values));
IREE_ASSERT_ARGUMENT(source_file);
IREE_ASSERT_ARGUMENT(target_buffer);
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = _VTABLE_DISPATCH(device, queue_read)(
device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
source_file, source_offset, target_buffer, target_offset, length, flags);
IREE_TRACE_ZONE_END(z0);
return status;
}
IREE_API_EXPORT iree_status_t iree_hal_device_queue_write(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
iree_hal_file_t* target_file, uint64_t target_offset,
iree_device_size_t length, uint32_t flags) {
IREE_ASSERT_ARGUMENT(device);
IREE_ASSERT_ARGUMENT(
!wait_semaphore_list.count ||
(wait_semaphore_list.semaphores && wait_semaphore_list.payload_values));
IREE_ASSERT_ARGUMENT(!signal_semaphore_list.count ||
(signal_semaphore_list.semaphores &&
signal_semaphore_list.payload_values));
IREE_ASSERT_ARGUMENT(source_buffer);
IREE_ASSERT_ARGUMENT(target_file);
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = _VTABLE_DISPATCH(device, queue_write)(
device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
source_buffer, source_offset, target_file, target_offset, length, flags);
IREE_TRACE_ZONE_END(z0);
return status;
}
IREE_API_EXPORT iree_status_t iree_hal_device_queue_execute(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_host_size_t command_buffer_count,
iree_hal_command_buffer_t* const* command_buffers,
iree_hal_buffer_binding_table_t const* binding_tables) {
IREE_ASSERT_ARGUMENT(device);
IREE_ASSERT_ARGUMENT(
!wait_semaphore_list.count ||
(wait_semaphore_list.semaphores && wait_semaphore_list.payload_values));
IREE_ASSERT_ARGUMENT(!signal_semaphore_list.count ||
(signal_semaphore_list.semaphores &&
signal_semaphore_list.payload_values));
IREE_ASSERT_ARGUMENT(!command_buffer_count || command_buffers);
IREE_TRACE_ZONE_BEGIN(z0);
// TODO(benvanik): move into devices instead? then a synchronous/inline device
// could assert the waits are resolved instead of blanket failing on an
// already-resolved semaphore. This would make using stream-ordered
// allocations easier.
for (iree_host_size_t i = 0; i < command_buffer_count; ++i) {
if (wait_semaphore_list.count > 0 &&
iree_all_bits_set(
iree_hal_command_buffer_mode(command_buffers[i]),
IREE_HAL_COMMAND_BUFFER_MODE_ALLOW_INLINE_EXECUTION)) {
// Inline command buffers are not allowed to wait (as they could have
// already been executed!). This is a requirement of the API so we
// validate it across all backends even if they don't support inline
// execution and ignore it.
IREE_TRACE_ZONE_END(z0);
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"inline command buffer submitted with a wait; inline command "
"buffers must be ready to execute immediately");
}
}
// Validate command buffer bindings against the provided binding tables.
// This will error out if a binding table is required but not provided or if
// any binding in the table does not match the requirements of the command
// buffer as recorded.
for (iree_host_size_t i = 0; i < command_buffer_count; ++i) {
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0,
iree_hal_command_buffer_validate_submission(
command_buffers[i], binding_tables ? &binding_tables[i] : NULL));
}
iree_status_t status = _VTABLE_DISPATCH(device, queue_execute)(
device, queue_affinity, wait_semaphore_list, signal_semaphore_list,
command_buffer_count, command_buffers, binding_tables);
IREE_TRACE_ZONE_END(z0);
return status;
}
IREE_API_EXPORT iree_status_t iree_hal_device_queue_barrier(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list) {
IREE_ASSERT_ARGUMENT(device);
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status =
iree_hal_device_queue_execute(device, queue_affinity, wait_semaphore_list,
signal_semaphore_list, 0, NULL, NULL);
IREE_TRACE_ZONE_END(z0);
return status;
}
IREE_API_EXPORT iree_status_t iree_hal_device_queue_flush(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity) {
IREE_ASSERT_ARGUMENT(device);
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status =
_VTABLE_DISPATCH(device, queue_flush)(device, queue_affinity);
IREE_TRACE_ZONE_END(z0);
return status;
}
IREE_API_EXPORT iree_status_t iree_hal_device_wait_semaphores(
iree_hal_device_t* device, iree_hal_wait_mode_t wait_mode,
const iree_hal_semaphore_list_t semaphore_list, iree_timeout_t timeout) {
IREE_ASSERT_ARGUMENT(device);
if (semaphore_list.count == 0) return iree_ok_status();
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = _VTABLE_DISPATCH(device, wait_semaphores)(
device, wait_mode, semaphore_list, timeout);
IREE_TRACE_ZONE_END(z0);
return status;
}
IREE_API_EXPORT iree_status_t iree_hal_device_profiling_begin(
iree_hal_device_t* device,
const iree_hal_device_profiling_options_t* options) {
IREE_ASSERT_ARGUMENT(device);
IREE_ASSERT_ARGUMENT(options);
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status =
_VTABLE_DISPATCH(device, profiling_begin)(device, options);
IREE_TRACE_ZONE_END(z0);
return status;
}
IREE_API_EXPORT iree_status_t
iree_hal_device_profiling_flush(iree_hal_device_t* device) {
IREE_ASSERT_ARGUMENT(device);
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = _VTABLE_DISPATCH(device, profiling_flush)(device);
IREE_TRACE_ZONE_END(z0);
return status;
}
IREE_API_EXPORT iree_status_t
iree_hal_device_profiling_end(iree_hal_device_t* device) {
IREE_ASSERT_ARGUMENT(device);
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = _VTABLE_DISPATCH(device, profiling_end)(device);
IREE_TRACE_ZONE_END(z0);
return status;
}
//===----------------------------------------------------------------------===//
// iree_hal_device_list_t
//===----------------------------------------------------------------------===//
IREE_API_EXPORT iree_status_t iree_hal_device_list_allocate(
iree_host_size_t capacity, iree_allocator_t host_allocator,
iree_hal_device_list_t** out_list) {
IREE_ASSERT_ARGUMENT(out_list);
*out_list = NULL;
IREE_TRACE_ZONE_BEGIN(z0);
iree_hal_device_list_t* list = NULL;
iree_host_size_t total_size =
sizeof(*list) + capacity * sizeof(list->devices[0]);
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_allocator_malloc(host_allocator, total_size, (void**)&list));
list->host_allocator = host_allocator;
list->capacity = capacity;
list->count = 0;
*out_list = list;
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
IREE_API_EXPORT void iree_hal_device_list_free(iree_hal_device_list_t* list) {
if (!list) return;
IREE_TRACE_ZONE_BEGIN(z0);
iree_allocator_t host_allocator = list->host_allocator;
for (iree_host_size_t i = 0; i < list->count; ++i) {
iree_hal_device_release(list->devices[i]);
}
iree_allocator_free(host_allocator, list);
IREE_TRACE_ZONE_END(z0);
}
IREE_API_EXPORT iree_status_t iree_hal_device_list_push_back(
iree_hal_device_list_t* list, iree_hal_device_t* device) {
IREE_ASSERT_ARGUMENT(list);
IREE_ASSERT_ARGUMENT(device);
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = iree_ok_status();
if (list->count + 1 <= list->capacity) {
iree_hal_device_retain(device);
list->devices[list->count++] = device;
} else {
status = iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
"list capacity %" PRIhsz
" reached; no more devices can be added",
list->capacity);
}
IREE_TRACE_ZONE_END(z0);
return status;
}
IREE_API_EXPORT iree_hal_device_t* iree_hal_device_list_at(
const iree_hal_device_list_t* list, iree_host_size_t i) {
IREE_ASSERT_ARGUMENT(list);
return i < list->count ? list->devices[i] : NULL;
}