blob: 4511fa7b367ddd1074ec70243aee5cc4ab6289ed [file] [log] [blame]
// Copyright 2020 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#ifndef IREE_HAL_DEVICE_H_
#define IREE_HAL_DEVICE_H_
#include <stdbool.h>
#include <stdint.h>
#include "iree/base/api.h"
#include "iree/hal/allocator.h"
#include "iree/hal/buffer.h"
#include "iree/hal/channel.h"
#include "iree/hal/channel_provider.h"
#include "iree/hal/command_buffer.h"
#include "iree/hal/event.h"
#include "iree/hal/executable_cache.h"
#include "iree/hal/fence.h"
#include "iree/hal/file.h"
#include "iree/hal/queue.h"
#include "iree/hal/resource.h"
#include "iree/hal/semaphore.h"
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
//===----------------------------------------------------------------------===//
// Types and Enums
//===----------------------------------------------------------------------===//
// An opaque driver-specific handle to identify different devices.
typedef uintptr_t iree_hal_device_id_t;
#define IREE_HAL_DEVICE_ID_DEFAULT 0ull
// Describes features supported by a device.
// These flags indicate the availability of features that may be enabled at the
// request of the calling application. Note that certain features may disable
// runtime optimizations or require compilation flags to ensure the required
// metadata is present in executables.
typedef uint64_t iree_hal_device_feature_t;
enum iree_hal_device_feature_bits_t {
IREE_HAL_DEVICE_FEATURE_NONE = 0u,
// Device supports executable debugging.
// When present executables *may* be compiled with
// IREE_HAL_EXECUTABLE_CACHING_MODE_ENABLE_DEBUGGING and will have usable
// debugging related methods. Note that if the input executables do not have
// embedded debugging information they still may not be able to perform
// disassembly or fine-grained breakpoint insertion.
IREE_HAL_DEVICE_FEATURE_SUPPORTS_DEBUGGING = 1u << 0,
// Device supports executable coverage information.
// When present executables *may* be compiled with
// IREE_HAL_EXECUTABLE_CACHING_MODE_ENABLE_COVERAGE and will produce
// coverage buffers during dispatch. Note that input executables must have
// partial embedded debug information to allow mapping back to source offsets.
IREE_HAL_DEVICE_FEATURE_SUPPORTS_COVERAGE = 1u << 1,
// Device supports executable and command queue profiling.
// When present executables *may* be compiled with
// IREE_HAL_EXECUTABLE_CACHING_MODE_ENABLE_PROFILING and will produce
// profiling buffers during dispatch. Note that input executables must have
// partial embedded debug information to allow mapping back to source offsets.
IREE_HAL_DEVICE_FEATURE_SUPPORTS_PROFILING = 1u << 2,
};
// Describes an enumerated HAL device.
typedef struct iree_hal_device_info_t {
// Opaque handle used by drivers. Not valid across driver instances.
iree_hal_device_id_t device_id;
// Stable driver-specific path used to reference the device.
iree_string_view_t path;
// Human-readable name of the device as returned by the API.
iree_string_view_t name;
} iree_hal_device_info_t;
// Defines what information is captured during profiling.
// Not all implementations will support all modes.
typedef uint64_t iree_hal_device_profiling_mode_t;
enum iree_hal_device_profiling_mode_bits_t {
IREE_HAL_DEVICE_PROFILING_MODE_NONE = 0u,
// Capture queue operations such as command buffer submissions and the
// transfer/dispatch commands within them. This gives a high-level overview
// of HAL API usage with minimal overhead.
IREE_HAL_DEVICE_PROFILING_MODE_QUEUE_OPERATIONS = 1u << 0,
// Capture aggregated dispatch performance counters across all commands within
// the profiled range.
IREE_HAL_DEVICE_PROFILING_MODE_DISPATCH_COUNTERS = 1u << 1,
// Capture detailed executable performance counters correlated to source
// locations. This can have a significant performance impact and should only
// be used when investigating the performance of an individual dispatch.
IREE_HAL_DEVICE_PROFILING_MODE_EXECUTABLE_COUNTERS = 1u << 2,
};
// Controls profiling options.
typedef struct iree_hal_device_profiling_options_t {
// Defines what kind of profiling information is captured.
iree_hal_device_profiling_mode_t mode;
// A file system path where profile data will be written if supported by the
// profiling implementation. Depending on the tool this may be a template
// path/prefix for a unique per capture name or a full path that will be
// overwritten each capture.
const char* file_path;
} iree_hal_device_profiling_options_t;
// A bitfield indicating compatible semaphore behavior for a device.
typedef uint64_t iree_hal_semaphore_compatibility_t;
enum iree_hal_semaphore_compatibility_bits_t {
// Indicates (in the absence of other bits) the semaphore is not compatible
// with the device at all. Any attempts to use the semaphore for any usage
// will fail.
IREE_HAL_SEMAPHORE_COMPATIBILITY_NONE = 0u,
// Indicates the device can perform a host-side wait on the semaphore.
// The semaphore can be used as part of a submission at the cost of additional
// host-device synchronization.
IREE_HAL_SEMAPHORE_COMPATIBILITY_HOST_WAIT = 1u << 0,
// Indicates the device can perform a device-side wait on the semaphore.
// The device can efficiently pipeline submissions when waiting without
// host (or user-mode) involvement.
IREE_HAL_SEMAPHORE_COMPATIBILITY_DEVICE_WAIT = 1u << 1,
// Indicates the device can perform a host-side signal of the semaphore.
// The semaphore can be used as part of a submission at the cost of additional
// host-device synchronization.
IREE_HAL_SEMAPHORE_COMPATIBILITY_HOST_SIGNAL = 1u << 2,
// Indicates the device can perform a device-side signal of the semaphore.
// The device can efficiently pipeline submissions when signaling without
// host (or user-mode) involvement.
IREE_HAL_SEMAPHORE_COMPATIBILITY_DEVICE_SIGNAL = 1u << 3,
// Semaphore is compatible with host-side emulation. Usage is allowed but will
// prevent the pipelining of submissions on the device-side.
IREE_HAL_SEMAPHORE_COMPATIBILITY_HOST_ONLY =
IREE_HAL_SEMAPHORE_COMPATIBILITY_HOST_WAIT |
IREE_HAL_SEMAPHORE_COMPATIBILITY_HOST_SIGNAL,
// Semaphore is compatible for all usage with the device.
IREE_HAL_SEMAPHORE_COMPATIBILITY_ALL =
IREE_HAL_SEMAPHORE_COMPATIBILITY_HOST_WAIT |
IREE_HAL_SEMAPHORE_COMPATIBILITY_DEVICE_WAIT |
IREE_HAL_SEMAPHORE_COMPATIBILITY_HOST_SIGNAL |
IREE_HAL_SEMAPHORE_COMPATIBILITY_DEVICE_SIGNAL,
};
// Bitfield specifying flags controlling a file read operation.
typedef uint64_t iree_hal_read_flags_t;
enum iree_hal_read_flag_bits_t {
IREE_HAL_READ_FLAG_NONE = 0,
};
// Bitfield specifying flags controlling a file write operation.
typedef uint64_t iree_hal_write_flags_t;
enum iree_hal_write_flag_bits_t {
IREE_HAL_WRITE_FLAG_NONE = 0,
};
// Defines how a multi-wait operation treats the results of multiple semaphores.
typedef enum iree_hal_wait_mode_e {
// Waits for all semaphores to reach or exceed their specified values.
IREE_HAL_WAIT_MODE_ALL = 0,
// Waits for one or more semaphores to reach or exceed their specified values.
IREE_HAL_WAIT_MODE_ANY = 1,
} iree_hal_wait_mode_t;
//===----------------------------------------------------------------------===//
// iree_hal_device_t
//===----------------------------------------------------------------------===//
typedef struct iree_hal_device_t iree_hal_device_t;
// Retains the given |device| for the caller.
IREE_API_EXPORT void iree_hal_device_retain(iree_hal_device_t* device);
// Releases the given |device| from the caller.
IREE_API_EXPORT void iree_hal_device_release(iree_hal_device_t* device);
// Returns the device identifier.
// This identifier may vary based on the runtime device type; for example, a
// Vulkan device may return `vulkan-v1.1` or `vulkan-v1.2-spec1`.
IREE_API_EXPORT iree_string_view_t
iree_hal_device_id(iree_hal_device_t* device);
// Returns the host allocator used for objects.
IREE_API_EXPORT iree_allocator_t
iree_hal_device_host_allocator(iree_hal_device_t* device);
// Returns a reference to the allocator of the device that can be used for
// allocating buffers.
IREE_API_EXPORT iree_hal_allocator_t* iree_hal_device_allocator(
iree_hal_device_t* device);
// Replaces the default device memory allocator.
// The |new_allocator| will be retained for the lifetime of the device or until
// the allocator is replaced again. The common usage pattern is to shim the
// default allocator with a wrapper:
// // Retain the existing allocator in the new wrapper.
// wrap_allocator(iree_hal_device_allocator(device), &new_allocator);
// // Update the device to use the wrapper for allocations.
// iree_hal_device_replace_allocator(device, new_allocator);
//
// WARNING: this is not thread-safe and must only be performed when the device
// is idle and all buffers that may have been allocated from the existing
// allocator have been released. In general the only safe time to call this is
// immediately after device creation and before any buffers have been allocated.
// Beware: there are no internal checks for this condition!
//
// TODO(benvanik): remove this method and instead allow allocators to be
// composed without the safety caveats. This may take the form of unbound
// allocators that the device can inject the base allocator into. Another
// approach would be to replace the singular allocator with queue-specific pools
// and make the user register those pools explicitly with the implementation
// they desire.
IREE_API_EXPORT void iree_hal_device_replace_allocator(
iree_hal_device_t* device, iree_hal_allocator_t* new_allocator);
// Replaces the current collective channel provider.
// The |new_provider| will be retained for the lifetime of the device or until
// the provider is replaced again.
//
// WARNING: this is not thread-safe and must only be performed when the device
// is idle and all channels that may have been created from the existing
// provider have been released. In general the only safe time to call this is
// immediately after device creation and before any channels have been created.
// Beware: there are no internal checks for this condition!
IREE_API_EXPORT void iree_hal_device_replace_channel_provider(
iree_hal_device_t* device, iree_hal_channel_provider_t* new_provider);
// Trims pools and caches used by the HAL to the minimum required for live
// allocations. This can be used on low-memory conditions or when
// suspending/parking instances.
IREE_API_EXPORT
iree_status_t iree_hal_device_trim(iree_hal_device_t* device);
// Queries a configuration value as an int64_t.
// The |category| and |key| will be provided to the device driver to interpret
// in a device-specific way and if recognized the value will be converted to an
// int64_t and returned in |out_value|. Fails if the value represented by the
// key is not convertable.
//
// This is roughly equivalent to the `sysconf` linux syscall
// (https://man7.org/linux/man-pages/man3/sysconf.3.html) in that the exact
// set of categories and keys available and their interpretation is
// target-dependent.
//
// Well-known queries (category :: key):
// hal.device.id :: some-pattern-*
// hal.device.feature :: some-pattern-*
// hal.device.architecture :: some-pattern-*
// hal.executable.format :: some-pattern-*
//
// Returned values must remain the same for the lifetime of the device as
// callers may cache them to avoid redundant calls.
IREE_API_EXPORT iree_status_t iree_hal_device_query_i64(
iree_hal_device_t* device, iree_string_view_t category,
iree_string_view_t key, int64_t* out_value);
// Queries in what ways the given |semaphore| may be used with |device|.
IREE_API_EXPORT iree_hal_semaphore_compatibility_t
iree_hal_device_query_semaphore_compatibility(iree_hal_device_t* device,
iree_hal_semaphore_t* semaphore);
// Reserves and returns a device-local queue-ordered transient buffer.
// The allocation will not be committed until the entire |wait_semaphore_list|
// has been reached. Once the storage is available for use the
// |signal_semaphore_list| will be signaled. The contents of the buffer are
// undefined until signaled even if all waits have been resolved and callers
// must always wait for the signal.
//
// For optimal performance and minimal memory consumption the returned buffer
// should be deallocated using iree_hal_device_queue_dealloca as soon as
// possible. It's still safe to synchronously release the buffer but the
// lifetime will then be controlled by all potential retainers.
//
// Usage:
// iree_hal_device_queue_alloca(wait(0), signal(1), &buffer);
// iree_hal_device_queue_execute(wait(1), signal(2), commands...);
// iree_hal_device_queue_dealloca(wait(2), signal(3), buffer);
IREE_API_EXPORT iree_status_t iree_hal_device_queue_alloca(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_allocator_pool_t pool, iree_hal_buffer_params_t params,
iree_device_size_t allocation_size,
iree_hal_buffer_t** IREE_RESTRICT out_buffer);
// Deallocates a queue-ordered transient buffer.
// The deallocation will not be made until the entire |wait_semaphore_list| has
// been reached. Once the storage is available for reuse the
// |signal_semaphore_list| will be signaled. After all waits have been resolved
// the contents of the buffer are immediately undefined even if the signal has
// not yet occurred.
//
// Deallocations will only be queue-ordered if the |buffer| was originally
// allocated with iree_hal_device_queue_alloca. Any synchronous allocations will
// be ignored and deallocated when the |buffer| has been released.
IREE_API_EXPORT iree_status_t iree_hal_device_queue_dealloca(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_buffer_t* buffer);
// Enqueues a single queue-ordered fill operation.
// The |target_buffer| must be visible to the device queue performing the fill.
//
// WARNING: individual fills have a high overhead and batching should be
// performed by the caller instead of calling this multiple times. The
// iree_hal_create_transfer_command_buffer utility makes it easy to create
// batches of transfer operations (fill, update, copy) and is only a few lines
// more code.
IREE_API_EXPORT iree_status_t iree_hal_device_queue_fill(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, const void* pattern,
iree_host_size_t pattern_length, iree_hal_fill_flags_t flags);
// Enqueues a single queue-ordered buffer update operation.
// The provided |source_buffer| will be captured and need not remain live or
// unchanged while the operation is queued. The |target_buffer| must be visible
// to the device queue performing the update.
//
// Some implementations may have limits on the size of the update or may perform
// poorly if the size is larger than an implementation-defined limit. Updates
// should be kept as small and infrequent as possible.
//
// WARNING: individual copies have a high overhead and batching should be
// performed by the caller instead of calling this multiple times. The
// iree_hal_create_transfer_command_buffer utility makes it easy to create
// batches of transfer operations (fill, update, copy) and is only a few lines
// more code.
IREE_API_EXPORT iree_status_t iree_hal_device_queue_update(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
const void* source_buffer, iree_host_size_t source_offset,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, iree_hal_update_flags_t flags);
// Enqueues a single queue-ordered copy operation.
// The |source_buffer| and |target_buffer| must both be visible to the device
// queue performing the copy.
//
// WARNING: individual copies have a high overhead and batching should be
// performed by the caller instead of calling this multiple times. The
// iree_hal_create_transfer_command_buffer utility makes it easy to create
// batches of transfer operations (fill, update, copy) and is only a few lines
// more code.
IREE_API_EXPORT iree_status_t iree_hal_device_queue_copy(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, iree_hal_copy_flags_t flags);
// Enqueues a file read operation that streams a segment of the |source_file|
// defined by the |source_offset| and |length| into the HAL |target_buffer| at
// the specified |target_offset|. The |queue_affinity| should be set to where
// the target buffer will be consumed. The source file must have read permission
// and the target buffer must have transfer-target usage.
IREE_API_EXPORT iree_status_t iree_hal_device_queue_read(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_file_t* source_file, uint64_t source_offset,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, iree_hal_read_flags_t flags);
// Enqueues a file write operation that streams a segment of the HAL
// |source_buffer| defined by the |source_offset| and |length| into the
// |target_file| at the specified |target_offset|. The |queue_affinity| should
// be set to where the source buffer was produced. The source buffer must have
// transfer-source usage and the target file must have write permission.
IREE_API_EXPORT iree_status_t iree_hal_device_queue_write(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
iree_hal_file_t* target_file, uint64_t target_offset,
iree_device_size_t length, iree_hal_write_flags_t flags);
// Executes a command buffer on a device queue.
// No commands will execute until the wait fence has been reached and the signal
// fence will be signaled when all commands have completed. If a command buffer
// is omitted this will act as a barrier.
//
// The queue is selected based on the command buffer submitted and the
// |queue_affinity|. As the number of available queues can vary the
// |queue_affinity| is used to hash into the available queues for the required
// categories. For example if 2 queues support transfer commands and the
// affinity is 5 the resulting queue could be index hash(5)=1. The affinity can
// thus be treated as just a way to indicate whether two submissions must be
// placed on to the same queue. Note that the exact hashing function is
// implementation dependent.
//
// A optional binding table must be provided if the command buffer has indirect
// bindings and may otherwise be `iree_hal_buffer_binding_table_empty()`. The
// binding table contents will be captured during the call and need not persist
// after the call returns.
//
// The submission behavior matches Vulkan's vkQueueSubmit, with each submission
// executing its command buffers in the order they are defined but allowing the
// command buffers to complete out-of-order. See:
// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/vkQueueSubmit.html
IREE_API_EXPORT iree_status_t iree_hal_device_queue_execute(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_command_buffer_t* command_buffer,
iree_hal_buffer_binding_table_t binding_table);
// Enqueues a barrier waiting for |wait_semaphore_list| and signaling
// |signal_semaphore_list| when reached.
// Equivalent to iree_hal_device_queue_execute with no command buffers.
IREE_API_EXPORT iree_status_t iree_hal_device_queue_barrier(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list);
// Flushes any locally-pending submissions in the queue.
// When submitting many queue operations this can be used to eagerly flush
// earlier submissions while later ones are still being constructed.
IREE_API_EXPORT iree_status_t iree_hal_device_queue_flush(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity);
// Blocks the caller until the semaphores reach or exceed the specified payload
// values or the |timeout| elapses. All semaphores in |semaphore_list| must be
// created from this device (or be imported into it).
//
// |wait_mode| can be used to decide when the wait will proceed; whether *all*
// semaphores in |semaphore_list| must be signaled or whether *any* (one or
// more) can be signaled before an early return.
//
// Returns success if the wait is successful and semaphores have been signaled
// satisfying the |wait_mode|.
//
// Returns IREE_STATUS_DEADLINE_EXCEEDED if the |timeout| elapses without the
// |wait_mode| being satisfied. Note that even on success only a subset of the
// semaphores may have been signaled and each can be queried to see which ones.
//
// Returns IREE_STATUS_ABORTED if one or more semaphores has failed. Callers can
// use iree_hal_semaphore_query on the semaphores to find the ones that have
// failed and get the status.
IREE_API_EXPORT iree_status_t iree_hal_device_wait_semaphores(
iree_hal_device_t* device, iree_hal_wait_mode_t wait_mode,
const iree_hal_semaphore_list_t semaphore_list, iree_timeout_t timeout);
// Begins a profile capture on |device| with the given |options|.
// This will use an implementation-defined profiling API to capture all
// supported device operations until the iree_hal_device_profiling_end is
// called. If the device or current build configuration do not support profiling
// this method is a no-op. See implementation-specific device creation APIs and
// driver module registration for more information.
//
// WARNING: the device must be idle before calling this method. Behavior is
// undefined if there are any in-flight or pending queue operations or access
// from another thread while profiling is starting/stopping.
//
// WARNING: profiling in any mode can dramatically increase overhead with some
// modes being significantly more expensive in both host and device time enough
// to invalidate performance numbers from other mechanisms (perf/tracy/etc).
// When measuring end-to-end performance use only
// IREE_HAL_DEVICE_PROFILING_MODE_QUEUE_OPERATIONS.
//
// Examples of APIs this maps to (where supported):
// - CPU: perf_event_open/close or vendor APIs
// - CUDA: cuProfilerStart/cuProfilerStop
// - Direct3D: PIXBeginCapture/PIXEndCapture
// - Metal: [MTLCaptureManager startCapture/stopCapture]
// - Vulkan: vkAcquireProfilingLockKHR/vkReleaseProfilingLockKHR +
// RenderDoc StartFrameCapture/EndFrameCapture
IREE_API_EXPORT iree_status_t iree_hal_device_profiling_begin(
iree_hal_device_t* device,
const iree_hal_device_profiling_options_t* options);
// Flushes any pending profiling data. May be a no-op.
IREE_API_EXPORT iree_status_t
iree_hal_device_profiling_flush(iree_hal_device_t* device);
// Ends a profile previous started with iree_hal_device_profiling_begin.
// The device must be idle before calling this method.
IREE_API_EXPORT iree_status_t
iree_hal_device_profiling_end(iree_hal_device_t* device);
//===----------------------------------------------------------------------===//
// iree_hal_device_list_t
//===----------------------------------------------------------------------===//
// A fixed-size list of retained devices.
typedef struct iree_hal_device_list_t {
iree_allocator_t host_allocator;
iree_host_size_t capacity;
iree_host_size_t count;
iree_hal_device_t* devices[];
} iree_hal_device_list_t;
// Allocates an empty device list with the given capacity.
IREE_API_EXPORT iree_status_t iree_hal_device_list_allocate(
iree_host_size_t capacity, iree_allocator_t host_allocator,
iree_hal_device_list_t** out_list);
// Frees a device |list|.
IREE_API_EXPORT void iree_hal_device_list_free(iree_hal_device_list_t* list);
// Pushes a |device| onto the |list| and retains it.
IREE_API_EXPORT iree_status_t iree_hal_device_list_push_back(
iree_hal_device_list_t* list, iree_hal_device_t* device);
// Returns the device at index |i| in the |list| or NULL if out of range.
// Callers must retain the device if it's possible for the returned pointer to
// live beyond the list.
IREE_API_EXPORT iree_hal_device_t* iree_hal_device_list_at(
const iree_hal_device_list_t* list, iree_host_size_t i);
//===----------------------------------------------------------------------===//
// iree_hal_device_t implementation details
//===----------------------------------------------------------------------===//
typedef struct iree_hal_device_vtable_t {
void(IREE_API_PTR* destroy)(iree_hal_device_t* device);
iree_string_view_t(IREE_API_PTR* id)(iree_hal_device_t* device);
iree_allocator_t(IREE_API_PTR* host_allocator)(iree_hal_device_t* device);
iree_hal_allocator_t*(IREE_API_PTR* device_allocator)(
iree_hal_device_t* device);
void(IREE_API_PTR* replace_device_allocator)(
iree_hal_device_t* device, iree_hal_allocator_t* new_allocator);
void(IREE_API_PTR* replace_channel_provider)(
iree_hal_device_t* device, iree_hal_channel_provider_t* new_provider);
iree_status_t(IREE_API_PTR* trim)(iree_hal_device_t* device);
iree_status_t(IREE_API_PTR* query_i64)(iree_hal_device_t* device,
iree_string_view_t category,
iree_string_view_t key,
int64_t* out_value);
iree_status_t(IREE_API_PTR* create_channel)(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
iree_hal_channel_params_t params, iree_hal_channel_t** out_channel);
iree_status_t(IREE_API_PTR* create_command_buffer)(
iree_hal_device_t* device, iree_hal_command_buffer_mode_t mode,
iree_hal_command_category_t command_categories,
iree_hal_queue_affinity_t queue_affinity,
iree_host_size_t binding_capacity,
iree_hal_command_buffer_t** out_command_buffer);
iree_status_t(IREE_API_PTR* create_event)(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
iree_hal_event_flags_t flags, iree_hal_event_t** out_event);
iree_status_t(IREE_API_PTR* create_executable_cache)(
iree_hal_device_t* device, iree_string_view_t identifier,
iree_loop_t loop, iree_hal_executable_cache_t** out_executable_cache);
iree_status_t(IREE_API_PTR* import_file)(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
iree_hal_memory_access_t access, iree_io_file_handle_t* handle,
iree_hal_external_file_flags_t flags, iree_hal_file_t** out_file);
iree_status_t(IREE_API_PTR* create_semaphore)(
iree_hal_device_t* device, uint64_t initial_value,
iree_hal_semaphore_flags_t flags, iree_hal_semaphore_t** out_semaphore);
iree_hal_semaphore_compatibility_t(
IREE_API_PTR* query_semaphore_compatibility)(
iree_hal_device_t* device, iree_hal_semaphore_t* semaphore);
iree_status_t(IREE_API_PTR* queue_alloca)(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_allocator_pool_t pool, iree_hal_buffer_params_t params,
iree_device_size_t allocation_size,
iree_hal_buffer_t** IREE_RESTRICT out_buffer);
iree_status_t(IREE_API_PTR* queue_dealloca)(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_buffer_t* buffer);
iree_status_t(IREE_API_PTR* queue_fill)(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, const void* pattern,
iree_host_size_t pattern_length, iree_hal_fill_flags_t flags);
iree_status_t(IREE_API_PTR* queue_update)(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
const void* source_buffer, iree_host_size_t source_offset,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, iree_hal_update_flags_t flags);
iree_status_t(IREE_API_PTR* queue_copy)(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, iree_hal_copy_flags_t flags);
iree_status_t(IREE_API_PTR* queue_read)(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_file_t* source_file, uint64_t source_offset,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, iree_hal_read_flags_t flags);
iree_status_t(IREE_API_PTR* queue_write)(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
iree_hal_file_t* target_file, uint64_t target_offset,
iree_device_size_t length, iree_hal_write_flags_t flags);
iree_status_t(IREE_API_PTR* queue_execute)(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_command_buffer_t* command_buffer,
iree_hal_buffer_binding_table_t binding_table);
iree_status_t(IREE_API_PTR* queue_flush)(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity);
iree_status_t(IREE_API_PTR* wait_semaphores)(
iree_hal_device_t* device, iree_hal_wait_mode_t wait_mode,
const iree_hal_semaphore_list_t semaphore_list, iree_timeout_t timeout);
iree_status_t(IREE_API_PTR* profiling_begin)(
iree_hal_device_t* device,
const iree_hal_device_profiling_options_t* options);
iree_status_t(IREE_API_PTR* profiling_flush)(iree_hal_device_t* device);
iree_status_t(IREE_API_PTR* profiling_end)(iree_hal_device_t* device);
} iree_hal_device_vtable_t;
IREE_HAL_ASSERT_VTABLE_LAYOUT(iree_hal_device_vtable_t);
IREE_API_EXPORT void iree_hal_device_destroy(iree_hal_device_t* device);
IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_fill(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, const void* pattern,
iree_host_size_t pattern_length, iree_hal_fill_flags_t flags);
IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_update(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
const void* source_buffer, iree_host_size_t source_offset,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, iree_hal_update_flags_t flags);
IREE_API_EXPORT iree_status_t iree_hal_device_queue_emulated_copy(
iree_hal_device_t* device, iree_hal_queue_affinity_t queue_affinity,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, iree_hal_copy_flags_t flags);
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
#endif // IREE_HAL_DEVICE_H_