| // Copyright 2020 The IREE Authors |
| // |
| // Licensed under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| |
| #ifndef IREE_HAL_DEVICE_H_ |
| #define IREE_HAL_DEVICE_H_ |
| |
| #include <stdbool.h> |
| #include <stdint.h> |
| |
| #include "iree/base/api.h" |
| #include "iree/hal/buffer.h" |
| #include "iree/hal/command_buffer.h" |
| #include "iree/hal/descriptor_set.h" |
| #include "iree/hal/descriptor_set_layout.h" |
| #include "iree/hal/event.h" |
| #include "iree/hal/executable_cache.h" |
| #include "iree/hal/executable_layout.h" |
| #include "iree/hal/resource.h" |
| #include "iree/hal/semaphore.h" |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif // __cplusplus |
| |
| //===----------------------------------------------------------------------===// |
| // Types and Enums |
| //===----------------------------------------------------------------------===// |
| |
| // An opaque driver-specific handle to identify different devices. |
| typedef uintptr_t iree_hal_device_id_t; |
| |
| #define IREE_HAL_DEVICE_ID_INVALID 0ull |
| |
| // Describes features supported by a device. |
| // These flags indicate the availability of features that may be enabled at the |
| // request of the calling application. Note that certain features may disable |
| // runtime optimizations or require compilation flags to ensure the required |
| // metadata is present in executables. |
| enum iree_hal_device_feature_bits_t { |
| IREE_HAL_DEVICE_FEATURE_NONE = 0u, |
| |
| // Device supports executable debugging. |
| // When present executables *may* be compiled with |
| // IREE_HAL_EXECUTABLE_CACHING_MODE_ENABLE_DEBUGGING and will have usable |
| // debugging related methods. Note that if the input executables do not have |
| // embedded debugging information they still may not be able to perform |
| // disassembly or fine-grained breakpoint insertion. |
| IREE_HAL_DEVICE_FEATURE_SUPPORTS_DEBUGGING = 1u << 0, |
| |
| // Device supports executable coverage information. |
| // When present executables *may* be compiled with |
| // IREE_HAL_EXECUTABLE_CACHING_MODE_ENABLE_COVERAGE and will produce |
| // coverage buffers during dispatch. Note that input executables must have |
| // partial embedded debug information to allow mapping back to source offsets. |
| IREE_HAL_DEVICE_FEATURE_SUPPORTS_COVERAGE = 1u << 1, |
| |
| // Device supports executable and command queue profiling. |
| // When present executables *may* be compiled with |
| // IREE_HAL_EXECUTABLE_CACHING_MODE_ENABLE_PROFILING and will produce |
| // profiling buffers during dispatch. Note that input executables must have |
| // partial embedded debug information to allow mapping back to source offsets. |
| IREE_HAL_DEVICE_FEATURE_SUPPORTS_PROFILING = 1u << 2, |
| }; |
| typedef uint32_t iree_hal_device_feature_t; |
| |
| // Describes an enumerated HAL device. |
| typedef struct iree_hal_device_info_t { |
| // Opaque handle used by drivers. Not valid across driver instances. |
| iree_hal_device_id_t device_id; |
| // Name of the device as returned by the API. |
| iree_string_view_t name; |
| } iree_hal_device_info_t; |
| |
| // A transfer source or destination. |
| typedef struct iree_hal_transfer_buffer_t { |
| // A host-allocated void* buffer. |
| iree_byte_span_t host_buffer; |
| // A device-allocated buffer (may be of any memory type). |
| iree_hal_buffer_t* device_buffer; |
| } iree_hal_transfer_buffer_t; |
| |
| static inline iree_hal_transfer_buffer_t iree_hal_make_host_transfer_buffer( |
| iree_byte_span_t host_buffer) { |
| iree_hal_transfer_buffer_t transfer_buffer = { |
| host_buffer, |
| NULL, |
| }; |
| return transfer_buffer; |
| } |
| |
| static inline iree_hal_transfer_buffer_t |
| iree_hal_make_host_transfer_buffer_span(void* ptr, iree_host_size_t length) { |
| iree_hal_transfer_buffer_t transfer_buffer = { |
| iree_make_byte_span(ptr, length), |
| NULL, |
| }; |
| return transfer_buffer; |
| } |
| |
| static inline iree_hal_transfer_buffer_t iree_hal_make_device_transfer_buffer( |
| iree_hal_buffer_t* device_buffer) { |
| iree_hal_transfer_buffer_t transfer_buffer = { |
| iree_byte_span_empty(), |
| device_buffer, |
| }; |
| return transfer_buffer; |
| } |
| |
| // A list of semaphores and their corresponding payloads. |
| // When signaling each semaphore will be set to the new payload value provided. |
| // When waiting each semaphore must reach or exceed the payload value. |
| typedef struct iree_hal_semaphore_list_t { |
| iree_host_size_t count; |
| iree_hal_semaphore_t** semaphores; |
| uint64_t* payload_values; |
| } iree_hal_semaphore_list_t; |
| |
| // A single batch of command buffers submitted to a device queue. |
| // All of the wait semaphores must reach or exceed the given payload value prior |
| // to the batch beginning execution. Each command buffer begins execution in the |
| // order it is present in the list, though note that the command buffers |
| // execute concurrently and require internal synchronization via events if there |
| // are any dependencies between them. Only after all command buffers have |
| // completed will the signal semaphores be updated to the provided payload |
| // values. |
| // |
| // Matches Vulkan's VkSubmitInfo: |
| // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkSubmitInfo.html |
| // Note that as the HAL only models timeline semaphores we take the payload |
| // values directly in this struct; see: |
| // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkTimelineSemaphoreSubmitInfo.html |
| typedef struct iree_hal_submission_batch_t { |
| // Semaphores to wait on prior to executing any command buffer. |
| iree_hal_semaphore_list_t wait_semaphores; |
| |
| // Command buffers to execute, in order. |
| iree_host_size_t command_buffer_count; |
| iree_hal_command_buffer_t** command_buffers; |
| |
| // Semaphores to signal once all command buffers have completed execution. |
| iree_hal_semaphore_list_t signal_semaphores; |
| } iree_hal_submission_batch_t; |
| |
| // Defines how a multi-wait operation treats the results of multiple semaphores. |
| typedef enum iree_hal_wait_mode_e { |
| // Waits for all semaphores to reach or exceed their specified values. |
| IREE_HAL_WAIT_MODE_ALL = 0, |
| // Waits for one or more semaphores to reach or exceed their specified values. |
| IREE_HAL_WAIT_MODE_ANY = 1, |
| } iree_hal_wait_mode_t; |
| |
| //===----------------------------------------------------------------------===// |
| // iree_hal_device_t |
| //===----------------------------------------------------------------------===// |
| |
| typedef struct iree_hal_device_t iree_hal_device_t; |
| |
| // Retains the given |device| for the caller. |
| IREE_API_EXPORT void iree_hal_device_retain(iree_hal_device_t* device); |
| |
| // Releases the given |device| from the caller. |
| IREE_API_EXPORT void iree_hal_device_release(iree_hal_device_t* device); |
| |
| // Returns the device identifier. |
| // This identifier may vary based on the runtime device type; for example, a |
| // Vulkan device may return `vulkan-v1.1` or `vulkan-v1.2-spec1`. |
| IREE_API_EXPORT iree_string_view_t |
| iree_hal_device_id(iree_hal_device_t* device); |
| |
| // Returns the host allocator used for objects. |
| IREE_API_EXPORT iree_allocator_t |
| iree_hal_device_host_allocator(iree_hal_device_t* device); |
| |
| // Returns a reference to the allocator of the device that can be used for |
| // allocating buffers. |
| IREE_API_EXPORT iree_hal_allocator_t* iree_hal_device_allocator( |
| iree_hal_device_t* device); |
| |
| // Trims pools and caches used by the HAL to the minimum required for live |
| // allocations. This can be used on low-memory conditions or when |
| // suspending/parking instances. |
| IREE_API_EXPORT |
| iree_status_t iree_hal_device_trim(iree_hal_device_t* device); |
| |
| // Queries a configuration value as an int32_t. |
| // The |category| and |key| will be provided to the device driver to interpret |
| // in a device-specific way and if recognized the value will be converted to an |
| // int32_t and returned in |out_value|. Fails if the value represented by the |
| // key is not convertable (overflows a 32-bit integer, not a number, etc). |
| // |
| // This is roughly equivalent to the `sysconf` linux syscall |
| // (https://man7.org/linux/man-pages/man3/sysconf.3.html) in that the exact |
| // set of categories and keys available and their interpretation is |
| // target-dependent. |
| // |
| // Well-known queries (category :: key): |
| // hal.device.id :: some-pattern-* |
| // hal.device.feature :: some-pattern-* |
| // hal.device.architecture :: some-pattern-* |
| // hal.executable.format :: some-pattern-* |
| // |
| // Returned values must remain the same for the lifetime of the device as |
| // callers may cache them to avoid redundant calls. |
| IREE_API_EXPORT iree_status_t iree_hal_device_query_i32( |
| iree_hal_device_t* device, iree_string_view_t category, |
| iree_string_view_t key, int32_t* out_value); |
| |
| // Synchronously transfers the given |source_buffer| to a device-local |
| // buffer returned in |out_target_buffer|. Callers must release the target |
| // buffer when no longer used. If the source buffer is already device-local it |
| // will be returned without an allocation or copy occurring. |
| // |
| // This utility may incur signficant overhead and is present for simple tooling |
| // and prototypes; when transfering multiple buffers users should always prefer |
| // asynchronous command buffers submitted to device queues. Note too that the |
| // entire buffer is transferred: if reading back smaller portions it is better |
| // to perform these as ranged transfers to avoid the amount of data that needs |
| // to be moved. |
| IREE_API_EXPORT iree_status_t iree_hal_device_transfer_to_device( |
| iree_hal_device_t* device, iree_hal_buffer_t* source_buffer, |
| iree_hal_buffer_usage_t allowed_usage, |
| iree_hal_buffer_t** out_target_buffer); |
| |
| // Synchronously transfers the given |source_buffer| to a host-local |
| // buffer returned in |out_target_buffer|. Callers must release the target |
| // buffer when no longer used. If the source buffer is already host-local it |
| // will be returned without an allocation or copy occurring. |
| // |
| // This utility may incur signficant overhead and is present for simple tooling |
| // and prototypes; when transfering multiple buffers users should always prefer |
| // asynchronous command buffers submitted to device queues. |
| IREE_API_EXPORT iree_status_t iree_hal_device_transfer_to_host( |
| iree_hal_device_t* device, iree_hal_buffer_t* source_buffer, |
| iree_hal_buffer_t** out_target_buffer); |
| |
| // Synchronously copies data from |source| into |target|. |
| // |
| // Supports host->device, device->host, and device->device transfer, |
| // including across devices. This method will never fail based on device |
| // capabilities but may incur some extreme transient allocations and copies in |
| // order to perform the transfer. |
| // |
| // The ordering of the transfer is undefined with respect to queue execution on |
| // the source or target device; some may require full device flushes in order to |
| // perform this operation while others may immediately perform it while there is |
| // still work outstanding. |
| // |
| // It is strongly recommended that buffer operations are performed on transfer |
| // queues; using this synchronous function may incur additional cache flushes |
| // and synchronous blocking behavior and is not supported on all buffer types. |
| // See iree_hal_command_buffer_copy_buffer. |
| IREE_API_EXPORT iree_status_t iree_hal_device_transfer_range( |
| iree_hal_device_t* device, iree_hal_transfer_buffer_t source, |
| iree_device_size_t source_offset, iree_hal_transfer_buffer_t target, |
| iree_device_size_t target_offset, iree_device_size_t data_length, |
| iree_hal_transfer_buffer_flags_t flags, iree_timeout_t timeout); |
| |
| // Synchronously executes one or more transfer operations against a queue. |
| // All buffers must be compatible with |device| and ranges must not overlap |
| // (same as with memcpy). |
| // |
| // This is a blocking operation and may incur significant overheads as |
| // internally it issues a command buffer with the transfer operations and waits |
| // for it to complete. Users should do that themselves so that the work can be |
| // issued concurrently and batched effectively. This is only useful as a |
| // fallback for implementations that require it or tools where things like I/O |
| // are transferred without worrying about performance. When submitting other |
| // work it's preferable to use iree_hal_create_transfer_command_buffer and a |
| // normal queue submission that allows for more fine-grained sequencing and |
| // amortizes the submission cost by batching other work. |
| // |
| // The transfer will begin after the optional |wait_semaphore| reaches |
| // |wait_value|. Behavior is undefined if no semaphore is provided and there are |
| // in-flight operations concurrently using the buffer ranges. |
| // Returns only after all transfers have completed and been flushed. |
| IREE_API_EXPORT iree_status_t iree_hal_device_transfer_and_wait( |
| iree_hal_device_t* device, iree_hal_semaphore_t* wait_semaphore, |
| uint64_t wait_value, iree_host_size_t transfer_count, |
| const iree_hal_transfer_command_t* transfer_commands, |
| iree_timeout_t timeout); |
| |
| // Submits one or more batches of work to a device queue. |
| // |
| // The queue is selected based on the flags set in |command_categories| and the |
| // |queue_affinity|. As the number of available queues can vary the |
| // |queue_affinity| is used to hash into the available queues for the required |
| // categories. For example if 2 queues support transfer commands and the |
| // affinity is 5 the resulting queue could be index hash(5)=1. The affinity can |
| // thus be treated as just a way to indicate whether two submissions must be |
| // placed on to the same queue. Note that the exact hashing function is |
| // implementation dependent. |
| // |
| // The submission behavior matches Vulkan's vkQueueSubmit, with each batch |
| // executing its command buffers in the order they are defined but allowing the |
| // command buffers to complete out-of-order. See: |
| // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/vkQueueSubmit.html |
| IREE_API_EXPORT iree_status_t iree_hal_device_queue_submit( |
| iree_hal_device_t* device, iree_hal_command_category_t command_categories, |
| iree_hal_queue_affinity_t queue_affinity, iree_host_size_t batch_count, |
| const iree_hal_submission_batch_t* batches); |
| |
| // Submits batches of work and waits until |wait_semaphore| reaches or exceeds |
| // |wait_value|. |
| // |
| // This is equivalent to following iree_hal_device_queue_submit with a |
| // iree_hal_semaphore_wait on |wait_timeout|/|wait_value| but |
| // may help to reduce overhead by preventing thread wakeups, kernel calls, and |
| // internal tracking. |
| // |
| // See iree_hal_device_queue_submit for more information about the queuing |
| // behavior and iree_hal_semaphore_wait for the waiting behavior. |
| IREE_API_EXPORT iree_status_t iree_hal_device_submit_and_wait( |
| iree_hal_device_t* device, iree_hal_command_category_t command_categories, |
| iree_hal_queue_affinity_t queue_affinity, iree_host_size_t batch_count, |
| const iree_hal_submission_batch_t* batches, |
| iree_hal_semaphore_t* wait_semaphore, uint64_t wait_value, |
| iree_timeout_t timeout); |
| |
| // Blocks the caller until the semaphores reach or exceed the specified payload |
| // values or the |timeout| elapses. All semaphores in |semaphore_list| must be |
| // created from this device (or be imported into it). |
| // |
| // |wait_mode| can be used to decide when the wait will proceed; whether *all* |
| // semaphores in |semaphore_list| must be signaled or whether *any* (one or |
| // more) can be signaled before an early return. |
| // |
| // Returns success if the wait is successful and semaphores have been signaled |
| // satisfying the |wait_mode|. |
| // |
| // Returns IREE_STATUS_DEADLINE_EXCEEDED if the |timeout| elapses without the |
| // |wait_mode| being satisfied. Note that even on success only a subset of the |
| // semaphores may have been signaled and each can be queried to see which ones. |
| // |
| // Returns IREE_STATUS_ABORTED if one or more semaphores has failed. Callers can |
| // use iree_hal_semaphore_query on the semaphores to find the ones that have |
| // failed and get the status. |
| IREE_API_EXPORT iree_status_t iree_hal_device_wait_semaphores( |
| iree_hal_device_t* device, iree_hal_wait_mode_t wait_mode, |
| const iree_hal_semaphore_list_t* semaphore_list, iree_timeout_t timeout); |
| |
| // Blocks the caller until all outstanding requests on all queues have been |
| // completed or the |timeout| elapses. This is equivalent to having waited |
| // on all semaphores outstanding at the time of the call, meaning that if new |
| // work is submitted by another thread it may not be waited on prior to this |
| // call returning. |
| // |
| // Returns success if the device reaches an idle point during the call. |
| // |
| // Returns DEADLINE_EXCEEDED if the |timeout| elapses without the device having |
| // become idle. |
| IREE_API_EXPORT iree_status_t |
| iree_hal_device_wait_idle(iree_hal_device_t* device, iree_timeout_t timeout); |
| |
| //===----------------------------------------------------------------------===// |
| // iree_hal_device_t implementation details |
| //===----------------------------------------------------------------------===// |
| |
| typedef struct iree_hal_device_vtable_t { |
| void(IREE_API_PTR* destroy)(iree_hal_device_t* device); |
| |
| iree_string_view_t(IREE_API_PTR* id)(iree_hal_device_t* device); |
| |
| iree_allocator_t(IREE_API_PTR* host_allocator)(iree_hal_device_t* device); |
| iree_hal_allocator_t*(IREE_API_PTR* device_allocator)( |
| iree_hal_device_t* device); |
| |
| iree_status_t(IREE_API_PTR* trim)(iree_hal_device_t* device); |
| |
| iree_status_t(IREE_API_PTR* query_i32)(iree_hal_device_t* device, |
| iree_string_view_t category, |
| iree_string_view_t key, |
| int32_t* out_value); |
| |
| iree_status_t(IREE_API_PTR* create_command_buffer)( |
| iree_hal_device_t* device, iree_hal_command_buffer_mode_t mode, |
| iree_hal_command_category_t command_categories, |
| iree_hal_queue_affinity_t queue_affinity, |
| iree_hal_command_buffer_t** out_command_buffer); |
| |
| iree_status_t(IREE_API_PTR* create_descriptor_set)( |
| iree_hal_device_t* device, iree_hal_descriptor_set_layout_t* set_layout, |
| iree_host_size_t binding_count, |
| const iree_hal_descriptor_set_binding_t* bindings, |
| iree_hal_descriptor_set_t** out_descriptor_set); |
| |
| iree_status_t(IREE_API_PTR* create_descriptor_set_layout)( |
| iree_hal_device_t* device, |
| iree_hal_descriptor_set_layout_usage_type_t usage_type, |
| iree_host_size_t binding_count, |
| const iree_hal_descriptor_set_layout_binding_t* bindings, |
| iree_hal_descriptor_set_layout_t** out_descriptor_set_layout); |
| |
| iree_status_t(IREE_API_PTR* create_event)(iree_hal_device_t* device, |
| iree_hal_event_t** out_event); |
| |
| iree_status_t(IREE_API_PTR* create_executable_cache)( |
| iree_hal_device_t* device, iree_string_view_t identifier, |
| iree_hal_executable_cache_t** out_executable_cache); |
| |
| iree_status_t(IREE_API_PTR* create_executable_layout)( |
| iree_hal_device_t* device, iree_host_size_t push_constants, |
| iree_host_size_t set_layout_count, |
| iree_hal_descriptor_set_layout_t** set_layouts, |
| iree_hal_executable_layout_t** out_executable_layout); |
| |
| iree_status_t(IREE_API_PTR* create_semaphore)( |
| iree_hal_device_t* device, uint64_t initial_value, |
| iree_hal_semaphore_t** out_semaphore); |
| |
| iree_status_t(IREE_API_PTR* transfer_range)( |
| iree_hal_device_t* device, iree_hal_transfer_buffer_t source, |
| iree_device_size_t source_offset, iree_hal_transfer_buffer_t target, |
| iree_device_size_t target_offset, iree_device_size_t data_length, |
| iree_hal_transfer_buffer_flags_t flags, iree_timeout_t timeout); |
| |
| iree_status_t(IREE_API_PTR* queue_submit)( |
| iree_hal_device_t* device, iree_hal_command_category_t command_categories, |
| iree_hal_queue_affinity_t queue_affinity, iree_host_size_t batch_count, |
| const iree_hal_submission_batch_t* batches); |
| |
| iree_status_t(IREE_API_PTR* submit_and_wait)( |
| iree_hal_device_t* device, iree_hal_command_category_t command_categories, |
| iree_hal_queue_affinity_t queue_affinity, iree_host_size_t batch_count, |
| const iree_hal_submission_batch_t* batches, |
| iree_hal_semaphore_t* wait_semaphore, uint64_t wait_value, |
| iree_timeout_t timeout); |
| |
| iree_status_t(IREE_API_PTR* wait_semaphores)( |
| iree_hal_device_t* device, iree_hal_wait_mode_t wait_mode, |
| const iree_hal_semaphore_list_t* semaphore_list, iree_timeout_t timeout); |
| |
| iree_status_t(IREE_API_PTR* wait_idle)(iree_hal_device_t* device, |
| iree_timeout_t timeout); |
| } iree_hal_device_vtable_t; |
| IREE_HAL_ASSERT_VTABLE_LAYOUT(iree_hal_device_vtable_t); |
| |
| IREE_API_EXPORT void iree_hal_device_destroy(iree_hal_device_t* device); |
| |
| #ifdef __cplusplus |
| } // extern "C" |
| #endif // __cplusplus |
| |
| #endif // IREE_HAL_DEVICE_H_ |