| // Copyright 2019 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // See iree/base/api.h for documentation on the API conventions used. |
| |
| #ifndef IREE_HAL_API_H_ |
| #define IREE_HAL_API_H_ |
| |
| #include <stdbool.h> |
| #include <stdint.h> |
| |
| #include "iree/base/api.h" |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif // __cplusplus |
| |
| //===----------------------------------------------------------------------===// |
| // Types and Enums |
| //===----------------------------------------------------------------------===// |
| |
| typedef struct iree_hal_allocator iree_hal_allocator_t; |
| typedef struct iree_hal_buffer iree_hal_buffer_t; |
| typedef struct iree_hal_buffer_view iree_hal_buffer_view_t; |
| typedef struct iree_hal_command_buffer iree_hal_command_buffer_t; |
| typedef struct iree_hal_descriptor_set iree_hal_descriptor_set_t; |
| typedef struct iree_hal_descriptor_set_layout iree_hal_descriptor_set_layout_t; |
| typedef struct iree_hal_device iree_hal_device_t; |
| typedef struct iree_hal_driver iree_hal_driver_t; |
| typedef struct iree_hal_driver_registry_s iree_hal_driver_registry_t; |
| typedef struct iree_hal_executable iree_hal_executable_t; |
| typedef struct iree_hal_executable_cache iree_hal_executable_cache_t; |
| typedef struct iree_hal_executable_layout iree_hal_executable_layout_t; |
| typedef struct iree_hal_semaphore iree_hal_semaphore_t; |
| |
| // Reference to a buffer's mapped memory. |
| typedef struct { |
| // Contents of the buffer. Behavior is undefined if an access is performed |
| // whose type was not specified during mapping. |
| iree_byte_span_t contents; |
| |
| // Used internally - do not modify. |
| uint64_t reserved[8]; |
| } iree_hal_mapped_memory_t; |
| |
| // A bitfield specifying properties for a memory type. |
| enum iree_hal_memory_type_e { |
| IREE_HAL_MEMORY_TYPE_NONE = 0u, |
| |
| // Memory is lazily allocated by the device and only exists transiently. |
| // This is the optimal mode for memory used only within a single command |
| // buffer. Transient buffers, even if they have |
| // IREE_HAL_MEMORY_TYPE_HOST_VISIBLE set, should be treated as device-local |
| // and opaque as they may have no memory attached to them outside of the time |
| // they are being evaluated on devices. |
| // |
| // This flag can be treated as a hint in most cases; allocating a buffer with |
| // it set _may_ return the same as if it had not be set. Certain allocation |
| // routines may use the hint to more tightly control reuse or defer wiring the |
| // memory. |
| IREE_HAL_MEMORY_TYPE_TRANSIENT = 1u << 0, |
| |
| // Memory allocated with this type can be mapped for host access using |
| // iree_hal_buffer_map. |
| IREE_HAL_MEMORY_TYPE_HOST_VISIBLE = 1u << 1, |
| |
| // The host cache management commands MappedMemory::Flush and |
| // MappedMemory::Invalidate are not needed to flush host writes |
| // to the device or make device writes visible to the host, respectively. |
| IREE_HAL_MEMORY_TYPE_HOST_COHERENT = 1u << 2, |
| |
| // Memory allocated with this type is cached on the host. Host memory |
| // accesses to uncached memory are slower than to cached memory, however |
| // uncached memory is always host coherent. MappedMemory::Flush must be used |
| // to ensure the device has visibility into any changes made on the host and |
| // Invalidate must be used to ensure the host has visibility into any changes |
| // made on the device. |
| IREE_HAL_MEMORY_TYPE_HOST_CACHED = 1u << 3, |
| |
| // Memory is accessible as normal host allocated memory. |
| IREE_HAL_MEMORY_TYPE_HOST_LOCAL = |
| IREE_HAL_MEMORY_TYPE_HOST_VISIBLE | IREE_HAL_MEMORY_TYPE_HOST_COHERENT, |
| |
| // Memory allocated with this type is visible to the device for execution. |
| // Being device visible does not mean the same thing as |
| // IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL. Though an allocation may be visible to |
| // the device and therefore useable for execution it may require expensive |
| // mapping or implicit transfers. |
| IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE = 1u << 4, |
| |
| // Memory allocated with this type is the most efficient for device access. |
| // Devices may support using memory that is not device local via |
| // IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE but doing so can incur non-trivial |
| // performance penalties. Device local memory, on the other hand, is |
| // guaranteed to be fast for all operations. |
| IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL = |
| IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE | (1u << 5), |
| }; |
| typedef uint32_t iree_hal_memory_type_t; |
| |
| // A bitfield specifying how memory will be accessed in a mapped memory region. |
| enum iree_hal_memory_access_e { |
| // Memory is not mapped. |
| IREE_HAL_MEMORY_ACCESS_NONE = 0u, |
| // Memory will be read. |
| // If a buffer is only mapped for reading it may still be possible to write to |
| // it but the results will be undefined (as it may present coherency issues). |
| IREE_HAL_MEMORY_ACCESS_READ = 1u << 0, |
| // Memory will be written. |
| // If a buffer is only mapped for writing it may still be possible to read |
| // from it but the results will be undefined or incredibly slow (as it may |
| // be mapped by the driver as uncached). |
| IREE_HAL_MEMORY_ACCESS_WRITE = 1u << 1, |
| // Memory will be discarded prior to mapping. |
| // The existing contents will be undefined after mapping and must be written |
| // to ensure validity. |
| IREE_HAL_MEMORY_ACCESS_DISCARD = 1u << 2, |
| // Memory will be discarded and completely overwritten in a single operation. |
| IREE_HAL_MEMORY_ACCESS_DISCARD_WRITE = |
| IREE_HAL_MEMORY_ACCESS_WRITE | IREE_HAL_MEMORY_ACCESS_DISCARD, |
| // A flag that can be applied to any access type to indicate that the buffer |
| // storage being accessed may alias with other accesses occurring concurrently |
| // within or across operations. The lack of the flag indicates that the access |
| // is guaranteed not to alias (ala C's `restrict` keyword). |
| IREE_HAL_MEMORY_ACCESS_MAY_ALIAS = 1u << 3, |
| // Memory may have any operation performed on it. |
| IREE_HAL_MEMORY_ACCESS_ALL = IREE_HAL_MEMORY_ACCESS_READ | |
| IREE_HAL_MEMORY_ACCESS_WRITE | |
| IREE_HAL_MEMORY_ACCESS_DISCARD, |
| }; |
| typedef uint32_t iree_hal_memory_access_t; |
| |
| // Bitfield that defines how a buffer is intended to be used. |
| // Usage allows the driver to appropriately place the buffer for more |
| // efficient operations of the specified types. |
| enum iree_hal_buffer_usage_e { |
| IREE_HAL_BUFFER_USAGE_NONE = 0u, |
| |
| // The buffer, once defined, will not be mapped or updated again. |
| // This should be used for uniform parameter values such as runtime |
| // constants for executables. Doing so may allow drivers to inline values or |
| // represent them in command buffers more efficiently (avoiding memory reads |
| // or swapping, etc). |
| IREE_HAL_BUFFER_USAGE_CONSTANT = 1u << 0, |
| |
| // The buffer can be used as the source or target of a transfer command |
| // (CopyBuffer, UpdateBuffer, etc). |
| // |
| // If |IREE_HAL_BUFFER_USAGE_MAPPING| is not specified drivers may safely |
| // assume that the host may never need visibility of this buffer as all |
| // accesses will happen via command buffers. |
| IREE_HAL_BUFFER_USAGE_TRANSFER = 1u << 1, |
| |
| // The buffer can be mapped by the host application for reading and writing. |
| // |
| // As mapping may require placement in special address ranges or system |
| // calls to enable visibility the driver can use the presence (or lack of) |
| // this flag to perform allocation-type setup and avoid initial mapping |
| // overhead. |
| IREE_HAL_BUFFER_USAGE_MAPPING = 1u << 2, |
| |
| // The buffer can be provided as an input or output to an executable. |
| // Buffers of this type may be directly used by drivers during dispatch. |
| IREE_HAL_BUFFER_USAGE_DISPATCH = 1u << 3, |
| |
| // Buffer may be used for any operation. |
| IREE_HAL_BUFFER_USAGE_ALL = IREE_HAL_BUFFER_USAGE_TRANSFER | |
| IREE_HAL_BUFFER_USAGE_MAPPING | |
| IREE_HAL_BUFFER_USAGE_DISPATCH, |
| }; |
| typedef uint32_t iree_hal_buffer_usage_t; |
| |
| // An opaque driver-specific handle to identify different devices. |
| typedef uintptr_t iree_hal_device_id_t; |
| |
| // Describes an enumerated HAL device. |
| typedef struct { |
| // Opaque handle used by drivers. Not valid across driver instances. |
| iree_hal_device_id_t device_id; |
| // Name of the device as returned by the API. |
| iree_string_view_t name; |
| } iree_hal_device_info_t; |
| |
| // An opaque factory-specific handle to identify different drivers. |
| typedef uint64_t iree_hal_driver_id_t; |
| |
| #define IREE_HAL_DRIVER_ID_INVALID 0ull |
| |
| // Describes a driver providing device enumeration and creation. |
| // The lifetime of memory referenced by this structure (such as strings) is |
| // dependent on where it originated. |
| // |
| // * When using iree_hal_driver_registry_enumerate the driver info is copied |
| // into memory owned by the caller. |
| // * When queried from a live driver with iree_hal_driver_info the memory is |
| // only guaranteed to live for as long as the driver is. |
| // * When enumerating via factories the information may be valid only while the |
| // driver registry lock is held. |
| typedef struct { |
| IREE_API_UNSTABLE |
| |
| // Opaque handle used by factories. Unique across all factories. |
| iree_hal_driver_id_t driver_id; |
| |
| // Canonical name of the driver as used in command lines, documentation, etc. |
| // Examples: 'metal', 'vulkan' |
| iree_string_view_t driver_name; |
| |
| // Full human-readable name of the driver for display. |
| // Examples: 'Vulkan 1.2 (NVIDIA)'. |
| iree_string_view_t full_name; |
| |
| // TODO(benvanik): version information; useful if wanting to expose multiple |
| // versions that may have completely different implementations (like vulkan |
| // 1.0, 1.1, and 1.2) but allow a nice sort/selection process. |
| // TODO(benvanik): triple, feature flags, etc. |
| } iree_hal_driver_info_t; |
| |
| // A bitfield specifying the mode of operation for a command buffer. |
| enum iree_hal_command_buffer_mode_e { |
| // Command buffer will be submitted once and never used again. |
| // This may enable in-place patching of command buffers that reduce overhead |
| // when it's known that command buffers will not be reused. |
| IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT = 1u << 0, |
| }; |
| typedef uint32_t iree_hal_command_buffer_mode_t; |
| |
| // A bitfield specifying the category of commands in a command queue. |
| enum iree_hal_command_category_e { |
| // Command is considered a transfer operation (memcpy, etc). |
| IREE_HAL_COMMAND_CATEGORY_TRANSFER = 1u << 0, |
| // Command is considered a dispatch operation (dispatch/execute). |
| IREE_HAL_COMMAND_CATEGORY_DISPATCH = 1u << 1, |
| // Commands may be of any type. |
| // Using this value may prevent optimizations and if possible callers should |
| // always specify the strictest set possible (for example, only transfer |
| // commands to ensure they get placed on a DMA queue). |
| IREE_HAL_COMMAND_CATEGORY_ANY = |
| IREE_HAL_COMMAND_CATEGORY_TRANSFER | IREE_HAL_COMMAND_CATEGORY_DISPATCH, |
| }; |
| typedef uint32_t iree_hal_command_category_t; |
| |
| // Specifies the type of a descriptor in a descriptor set. |
| enum iree_hal_descriptor_type_e { |
| IREE_HAL_DESCRIPTOR_TYPE_UNIFORM_BUFFER = 6u, |
| IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER = 7u, |
| IREE_HAL_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC = 8u, |
| IREE_HAL_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC = 9u, |
| }; |
| typedef uint32_t iree_hal_descriptor_type_t; |
| |
| // Specifies a descriptor set binding. |
| // The range specified by [offset, length) will be made available to executables |
| // on the given binding. If the descriptor type is dynamic then the range will |
| // be [offset + dynamic_offset, length). |
| // |
| // The IREE HAL buffer type may internally be offset; such offset is applied |
| // here as if it were the base address of the buffer. Note that the offset will |
| // be applied at the time the binding is recording into the command buffer. |
| // |
| // Maps to VkDescriptorSetBinding. |
| typedef struct { |
| // The binding number of this entry and corresponds to a resource of the |
| // same binding number in the executable interface. |
| int32_t binding; |
| // Buffer bound to the binding number. |
| // May be nullptr if the binding is not used by the executable. |
| iree_hal_buffer_t* buffer; |
| // Offset, in bytes, into the buffer that the binding starts at. |
| // If the descriptor type is dynamic this will be added to the dynamic |
| // offset provided during binding. |
| iree_device_size_t offset; |
| // Length, in bytes, of the buffer that is available to the executable. |
| // This can be IREE_WHOLE_BUFFER, however note that if the entire buffer |
| // contents are larger than supported by the device (~128MiB, usually) this |
| // will fail. If the descriptor type is dynamic this will be used for all |
| // ranges regardless of offset. |
| iree_device_size_t length; |
| } iree_hal_descriptor_set_binding_t; |
| |
| // Specifies the usage type of the descriptor set. |
| enum iree_hal_descriptor_set_layout_usage_type_e { |
| // Descriptor set will be initialized once and never changed. |
| IREE_HAL_DESCRIPTOR_SET_LAYOUT_USAGE_TYPE_IMMUTABLE = 0u, |
| // Descriptor set is never created and instead used with push descriptors. |
| IREE_HAL_DESCRIPTOR_SET_LAYOUT_USAGE_TYPE_PUSH_ONLY = 1u, |
| }; |
| typedef uint32_t iree_hal_descriptor_set_layout_usage_type_t; |
| |
| // Specifies a descriptor set layout binding. |
| // |
| // Maps to VkDescriptorSetLayoutBinding. |
| typedef struct { |
| // The binding number of this entry and corresponds to a resource of the |
| // same binding number in the executable interface. |
| int32_t binding; |
| // Specifies which type of resource descriptors are used for this binding. |
| iree_hal_descriptor_type_t type; |
| // Specifies the memory access performed by the executables. |
| iree_hal_memory_access_t access; |
| } iree_hal_descriptor_set_layout_binding_t; |
| |
| // An identifier for executable formats used to query support. |
| typedef uint32_t iree_hal_executable_format_t; |
| |
| // Defines how the executable cache performs preparation. |
| enum iree_hal_executable_caching_mode_e { |
| // Allows the cache to reference the provided executable_data after it has |
| // prepared the executable. Callers must ensure the data remains valid for the |
| // lifetime of the cache. If memory mapping constant executable data from |
| // disk this can be used to avoid copies. |
| IREE_HAL_EXECUTABLE_CACHING_MODE_ALIAS_PROVIDED_DATA = 1u << 0, |
| // Allows the prepared executable to be cached persistently (on disk/etc). |
| // Enable for any executable that is likely to be used in future runs. |
| // Note that not all caches support persistent serialization and this is just |
| // a hint. |
| IREE_HAL_EXECUTABLE_CACHING_MODE_ALLOW_PERSISTENT_CACHING = 1u << 1, |
| // Allows the cache to optimize the executable as much as it can. |
| // This may cause preparation to take significantly longer while (hopefully) |
| // improving runtime performance. Avoid for one-shot executables. |
| IREE_HAL_EXECUTABLE_CACHING_MODE_ALLOW_OPTIMIZATION = 1u << 2, |
| // Enables Executable debugging methods if supported by the device and |
| // executable. This may disable certain optimizations or retain additional |
| // data to allow disassembly, stepping, etc. |
| // |
| // Device must support the DeviceFeature::kDebugging feature and executables |
| // must support the ExecutableFeature::kDebugging feature. |
| IREE_HAL_EXECUTABLE_CACHING_MODE_ENABLE_DEBUGGING = 1u << 3, |
| // Enables Executable coverage if supported by the device and executable. |
| // Depending on the optimization mode this may produce partial coverage |
| // results (for example, when certain source operations were optimized away). |
| // |
| // Device must support the DeviceFeature::kCoverage feature and executables |
| // must support the ExecutableFeature::kCoverage feature. |
| IREE_HAL_EXECUTABLE_CACHING_MODE_ENABLE_COVERAGE = 1u << 4, |
| // Enables Executable profiling if supported by the device and executable. |
| // Depending on the optimization mode this may produce partial profiling |
| // results. Profiling attribution (whether to the entire executable or |
| // specific operations) depends on the implementation. |
| // |
| // Device must support the DeviceFeature::kProfiling feature and executables |
| // must support the ExecutableFeature::kProfiling feature. |
| IREE_HAL_EXECUTABLE_CACHING_MODE_ENABLE_PROFILING = 1u << 5, |
| // Default caching mode. |
| IREE_HAL_EXECUTABLE_CACHING_MODE_DEFAULT = |
| IREE_HAL_EXECUTABLE_CACHING_MODE_ALLOW_PERSISTENT_CACHING | |
| IREE_HAL_EXECUTABLE_CACHING_MODE_ALLOW_OPTIMIZATION, |
| }; |
| typedef uint32_t iree_hal_executable_caching_mode_t; |
| |
| // Bitfield specifying which execution stage a barrier should start/end at. |
| // |
| // Maps to VkPipelineStageFlagBits. |
| enum iree_hal_execution_stage_e { |
| // Top of the pipeline when commands are initially issued by the device. |
| IREE_HAL_EXECUTION_STAGE_COMMAND_ISSUE = 1u << 0, |
| // Stage of the pipeline when dispatch parameter data is consumed. |
| IREE_HAL_EXECUTION_STAGE_COMMAND_PROCESS = 1u << 1, |
| // Stage where dispatch commands execute. |
| IREE_HAL_EXECUTION_STAGE_DISPATCH = 1u << 2, |
| // Stage where transfer (copy/clear/fill/etc) commands execute. |
| IREE_HAL_EXECUTION_STAGE_TRANSFER = 1u << 3, |
| // Final stage in the pipeline when commands are retired on the device. |
| IREE_HAL_EXECUTION_STAGE_COMMAND_RETIRE = 1u << 4, |
| // Pseudo-stage for read/writes by the host. Not executed on device. |
| IREE_HAL_EXECUTION_STAGE_HOST = 1u << 5, |
| }; |
| typedef uint32_t iree_hal_execution_stage_t; |
| |
| // Bitfield specifying which scopes will access memory and how. |
| // |
| // Maps to VkAccessFlagBits. |
| enum iree_hal_access_scope_e { |
| // Read access to indirect command data as part of an indirect dispatch. |
| IREE_HAL_ACCESS_SCOPE_INDIRECT_COMMAND_READ = 1u << 0, |
| // Constant uniform buffer reads by the device. |
| IREE_HAL_ACCESS_SCOPE_CONSTANT_READ = 1u << 1, |
| // Storage buffer reads by dispatch commands. |
| IREE_HAL_ACCESS_SCOPE_DISPATCH_READ = 1u << 2, |
| // Storage buffer writes by dispatch commands. |
| IREE_HAL_ACCESS_SCOPE_DISPATCH_WRITE = 1u << 3, |
| // Source of a transfer operation. |
| IREE_HAL_ACCESS_SCOPE_TRANSFER_READ = 1u << 4, |
| // Target of a transfer operation. |
| IREE_HAL_ACCESS_SCOPE_TRANSFER_WRITE = 1u << 5, |
| // Read operation by the host through mapped memory. |
| IREE_HAL_ACCESS_SCOPE_HOST_READ = 1u << 6, |
| // Write operation by the host through mapped memory. |
| IREE_HAL_ACCESS_SCOPE_HOST_WRITE = 1u << 7, |
| // External/non-specific read. |
| IREE_HAL_ACCESS_SCOPE_MEMORY_READ = 1u << 8, |
| // External/non-specific write. |
| IREE_HAL_ACCESS_SCOPE_MEMORY_WRITE = 1u << 9, |
| }; |
| typedef uint32_t iree_hal_access_scope_t; |
| |
| // Defines a global memory barrier. |
| // These are cheaper to encode than buffer-specific barriers but may cause |
| // stalls and bubbles in device pipelines if applied too broadly. Prefer them |
| // over equivalently large sets of buffer-specific barriers (such as when |
| // completely changing execution contexts). |
| // |
| // Maps to VkMemoryBarrier. |
| typedef struct { |
| // All access scopes prior-to the barrier (inclusive). |
| iree_hal_access_scope_t source_scope; |
| // All access scopes following the barrier (inclusive). |
| iree_hal_access_scope_t target_scope; |
| } iree_hal_memory_barrier_t; |
| |
| // Defines a memory barrier that applies to a range of a specific buffer. |
| // Use of these (vs. global memory barriers) provides fine-grained execution |
| // ordering to device command processors and allows for more aggressive |
| // reordering. |
| // |
| // Maps to VkBufferMemoryBarrier. |
| typedef struct { |
| // All access scopes prior-to the barrier (inclusive). |
| iree_hal_access_scope_t source_scope; |
| // All access scopes following the barrier (inclusive). |
| iree_hal_access_scope_t target_scope; |
| // Buffer the barrier is restricted to. |
| // The barrier will apply to the entire physical device allocation. |
| iree_hal_buffer_t* buffer; |
| // Relative offset/length within |buffer| (which may itself be mapped into the |
| // device allocation at an offset). |
| iree_device_size_t offset; |
| iree_device_size_t length; |
| } iree_hal_buffer_barrier_t; |
| |
| // A list of semaphores and their corresponding payloads. |
| // When signaling each semaphore will be set to the new payload value provided. |
| // When waiting each semaphore must reach or exceed the payload value. |
| typedef struct { |
| iree_host_size_t count; |
| iree_hal_semaphore_t** semaphores; |
| uint64_t* payload_values; |
| } iree_hal_semaphore_list_t; |
| |
| // A single batch of command buffers submitted to a device queue. |
| // All of the wait semaphores must reach or exceed the given payload value prior |
| // to the batch beginning execution. Each command buffer begins execution in the |
| // order it is present in the list, though note that the command buffers |
| // execute concurrently and require internal synchronization via events if there |
| // are any dependencies between them. Only after all command buffers have |
| // completed will the signal semaphores be updated to the provided payload |
| // values. |
| // |
| // Matches Vulkan's VkSubmitInfo: |
| // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkSubmitInfo.html |
| // Note that as the HAL only models timeline semaphores we take the payload |
| // values directly in this struct; see: |
| // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkTimelineSemaphoreSubmitInfo.html |
| typedef struct { |
| // Semaphores to wait on prior to executing any command buffer. |
| iree_hal_semaphore_list_t wait_semaphores; |
| |
| // Command buffers to execute, in order. |
| iree_host_size_t command_buffer_count; |
| iree_hal_command_buffer_t** command_buffers; |
| |
| // Semaphores to signal once all command buffers have completed execution. |
| iree_hal_semaphore_list_t signal_semaphores; |
| } iree_hal_submission_batch_t; |
| |
| // Defines how a multi-wait operation treats the results of multiple semaphores. |
| enum iree_hal_wait_mode_e { |
| // Waits for all semaphores to reach or exceed their specified values. |
| IREE_HAL_WAIT_MODE_ALL = 0, |
| // Waits for one or more semaphores to reach or exceed their specified values. |
| IREE_HAL_WAIT_MODE_ANY = 1, |
| }; |
| typedef uint8_t iree_hal_wait_mode_t; |
| |
| // Keep these in sync with iree/compiler/Dialect/HAL/IR/HALTypes.cpp |
| |
| enum iree_hal_numerical_type_e { |
| IREE_HAL_NUMERICAL_TYPE_UNKNOWN = 0x00u, |
| IREE_HAL_NUMERICAL_TYPE_INTEGER_SIGNED = 0x01u, |
| IREE_HAL_NUMERICAL_TYPE_INTEGER_UNSIGNED = 0x02u, |
| // TODO(benvanik): specialize with semantics from APFloat. |
| IREE_HAL_NUMERICAL_TYPE_FLOAT_IEEE = 0x03u, |
| }; |
| typedef uint8_t iree_hal_numerical_type_t; |
| |
| #define IREE_HAL_ELEMENT_TYPE_VALUE(numerical_type, bit_count) \ |
| (((uint32_t)(numerical_type) << 24) | (uint32_t)(bit_count)) |
| |
| #define iree_hal_make_element_type(numerical_type, bit_count) \ |
| (iree_hal_element_type_t)( \ |
| IREE_HAL_ELEMENT_TYPE_VALUE(numerical_type, bit_count)) |
| #define iree_hal_element_numerical_type(element_type) \ |
| (iree_hal_numerical_type_t)((uint32_t)(element_type) >> 24) |
| #define iree_hal_element_bit_count(element_type) (size_t)((element_type)&0xFF) |
| #define iree_hal_element_byte_count(element_type) \ |
| ((iree_hal_element_bit_count(element_type) + 8 - 1) / 8) |
| |
| // Defines the element type of a buffer in a standard format. |
| // |
| // Composed as a 32-bit bitfield to allow for opaque data types. Use |
| // iree_hal_make_element_type to make a bitfield with the appropriate ordering. |
| // |
| // MSB ----------------------------------------------- LSB |
| // [numerical type] [reserved] [reserved] [number of bits] |
| // |
| // clang-format off |
| enum iree_hal_element_type_e { |
| IREE_HAL_ELEMENT_TYPE_NONE = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_UNKNOWN, 0), // NOLINT |
| IREE_HAL_ELEMENT_TYPE_OPAQUE_8 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_UNKNOWN, 8), // NOLINT |
| IREE_HAL_ELEMENT_TYPE_OPAQUE_16 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_UNKNOWN, 16), // NOLINT |
| IREE_HAL_ELEMENT_TYPE_OPAQUE_32 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_UNKNOWN, 32), // NOLINT |
| IREE_HAL_ELEMENT_TYPE_OPAQUE_64 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_UNKNOWN, 64), // NOLINT |
| IREE_HAL_ELEMENT_TYPE_SINT_8 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER_SIGNED, 8), // NOLINT |
| IREE_HAL_ELEMENT_TYPE_UINT_8 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER_UNSIGNED, 8), // NOLINT |
| IREE_HAL_ELEMENT_TYPE_SINT_16 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER_SIGNED, 16), // NOLINT |
| IREE_HAL_ELEMENT_TYPE_UINT_16 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER_UNSIGNED, 16), // NOLINT |
| IREE_HAL_ELEMENT_TYPE_SINT_32 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER_SIGNED, 32), // NOLINT |
| IREE_HAL_ELEMENT_TYPE_UINT_32 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER_UNSIGNED, 32), // NOLINT |
| IREE_HAL_ELEMENT_TYPE_SINT_64 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER_SIGNED, 64), // NOLINT |
| IREE_HAL_ELEMENT_TYPE_UINT_64 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_INTEGER_UNSIGNED, 64), // NOLINT |
| IREE_HAL_ELEMENT_TYPE_FLOAT_16 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_FLOAT_IEEE, 16), // NOLINT |
| IREE_HAL_ELEMENT_TYPE_FLOAT_32 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_FLOAT_IEEE, 32), // NOLINT |
| IREE_HAL_ELEMENT_TYPE_FLOAT_64 = IREE_HAL_ELEMENT_TYPE_VALUE(IREE_HAL_NUMERICAL_TYPE_FLOAT_IEEE, 64), // NOLINT |
| }; |
| typedef uint32_t iree_hal_element_type_t; |
| // clang-format on |
| |
| // A dimension within a shape. |
| typedef int32_t iree_hal_dim_t; |
| |
| //===----------------------------------------------------------------------===// |
| // Utilities |
| //===----------------------------------------------------------------------===// |
| |
| // Parses a serialized set of shape dimensions using the canonical shape format |
| // (the same as produced by iree_hal_format_shape). |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_parse_shape( |
| iree_string_view_t value, iree_host_size_t shape_capacity, |
| iree_hal_dim_t* out_shape, iree_host_size_t* out_shape_rank); |
| |
| // Converts shape dimensions into a `4x5x6` format. |
| // |
| // Follows the standard API string formatting rules. See iree/base/api.h. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_format_shape(const iree_hal_dim_t* shape, iree_host_size_t shape_rank, |
| iree_host_size_t buffer_capacity, char* buffer, |
| iree_host_size_t* out_buffer_length); |
| |
| // Parses a serialized iree_hal_element_type_t and sets |out_element_type| if |
| // it is valid. The format is the same as produced by |
| // iree_hal_format_element_type. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_parse_element_type( |
| iree_string_view_t value, iree_hal_element_type_t* out_element_type); |
| |
| // Converts an iree_hal_element_type_t enum value to a canonical string |
| // representation, like `IREE_HAL_ELEMENT_TYPE_FLOAT_16` to `f16`. |
| // |buffer_capacity| defines the size of |buffer| in bytes and |
| // |out_buffer_length| will return the string length in characters. |
| // |
| // Follows the standard API string formatting rules. See iree/base/api.h. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_format_element_type( |
| iree_hal_element_type_t element_type, iree_host_size_t buffer_capacity, |
| char* buffer, iree_host_size_t* out_buffer_length); |
| |
| // Parses a serialized element of |element_type| to its in-memory form. |
| // |data_ptr| must be at least large enough to contain the bytes of the element. |
| // For example, "1.2" of type IREE_HAL_ELEMENT_TYPE_FLOAT32 will write the 4 |
| // byte float value of 1.2 to |data_ptr|. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_parse_element( |
| iree_string_view_t data_str, iree_hal_element_type_t element_type, |
| iree_byte_span_t data_ptr); |
| |
| // Converts a single element of |element_type| to a string. |
| // |
| // |buffer_capacity| defines the size of |buffer| in bytes and |
| // |out_buffer_length| will return the string length in characters. Returns |
| // IREE_STATUS_OUT_OF_RANGE if the buffer capacity is insufficient to hold the |
| // formatted elements and |out_buffer_length| will contain the required size. |
| // |
| // Follows the standard API string formatting rules. See iree/base/api.h. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_format_element( |
| iree_const_byte_span_t data, iree_hal_element_type_t element_type, |
| iree_host_size_t buffer_capacity, char* buffer, |
| iree_host_size_t* out_buffer_length); |
| |
| // Parses a serialized set of elements of the given |element_type|. |
| // The resulting parsed data is written to |data_ptr|, which must be at least |
| // large enough to contain the parsed elements. The format is the same as |
| // produced by iree_hal_format_buffer_elements. Supports additional inputs of |
| // empty to denote a 0 fill and a single element to denote a splat. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_parse_buffer_elements( |
| iree_string_view_t data_str, iree_hal_element_type_t element_type, |
| iree_byte_span_t data_ptr); |
| |
| // Converts a shaped buffer of |element_type| elements to a string. |
| // This will include []'s to denote each dimension, for example for a shape of |
| // 2x3 the elements will be formatted as `[1 2 3][4 5 6]`. |
| // |
| // |max_element_count| can be used to limit the total number of elements printed |
| // when the count may be large. Elided elements will be replaced with `...`. |
| // |
| // |buffer_capacity| defines the size of |buffer| in bytes and |
| // |out_buffer_length| will return the string length in characters. Returns |
| // IREE_STATUS_OUT_OF_RANGE if the buffer capacity is insufficient to hold the |
| // formatted elements and |out_buffer_length| will contain the required size. |
| // |
| // Follows the standard API string formatting rules. See iree/base/api.h. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_format_buffer_elements( |
| iree_const_byte_span_t data, const iree_hal_dim_t* shape, |
| iree_host_size_t shape_rank, iree_hal_element_type_t element_type, |
| iree_host_size_t max_element_count, iree_host_size_t buffer_capacity, |
| char* buffer, iree_host_size_t* out_buffer_length); |
| |
| //===----------------------------------------------------------------------===// |
| // iree::hal::Allocator |
| //===----------------------------------------------------------------------===// |
| |
| // Creates a host-local heap allocator that can be used when buffers are |
| // required that will not interact with a real hardware device (such as those |
| // used in file IO or tests). Buffers allocated with this will not be compatible |
| // with real device allocators and will likely incur a copy if used. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_allocator_create_host_local(iree_allocator_t allocator, |
| iree_hal_allocator_t** out_allocator); |
| |
| // Retains the given |allocator| for the caller. |
| IREE_API_EXPORT void IREE_API_CALL |
| iree_hal_allocator_retain(iree_hal_allocator_t* allocator); |
| |
| // Releases the given |allocator| from the caller. |
| IREE_API_EXPORT void IREE_API_CALL |
| iree_hal_allocator_release(iree_hal_allocator_t* allocator); |
| |
| // Calculates the allocation size of a buffer. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_allocator_compute_size( |
| const iree_hal_allocator_t* allocator, const iree_hal_dim_t* shape, |
| iree_host_size_t shape_rank, iree_hal_element_type_t element_type, |
| iree_device_size_t* out_allocation_size); |
| |
| // Calculates a byte offset into a buffer at the given indices. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_allocator_compute_offset( |
| const iree_hal_allocator_t* allocator, const iree_hal_dim_t* shape, |
| iree_host_size_t shape_rank, iree_hal_element_type_t element_type, |
| const iree_hal_dim_t* indices, size_t indices_count, |
| iree_device_size_t* out_offset); |
| |
| // Calculates a byte range into a buffer of the given contiguous range. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_allocator_compute_range( |
| const iree_hal_allocator_t* allocator, const iree_hal_dim_t* shape, |
| iree_host_size_t shape_rank, iree_hal_element_type_t element_type, |
| const iree_hal_dim_t* start_indices, iree_host_size_t indices_count, |
| const iree_hal_dim_t* lengths, iree_host_size_t lengths_count, |
| iree_device_size_t* out_start_offset, iree_device_size_t* out_length); |
| |
| // Allocates a buffer from the allocator. |
| // Fails if the memory type requested for the given usage cannot be serviced. |
| // Callers can use iree_hal_allocator_can_allocate to decide their memory use |
| // strategy. |
| // |
| // The memory type of the buffer returned may differ from the requested value |
| // if the device can provide more functionality; for example, if requesting |
| // MemoryType::kHostVisible but the memory is really host cached you may get |
| // a buffer back with MemoryType::kHostVisible | MemoryType::kHostCached. The |
| // only requirement is that the buffer satisfy the required bits. |
| // |
| // Fails if it is not possible to allocate and satisfy all placements for the |
| // requested |buffer_usage|. |
| // |out_buffer| must be released by the caller. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_allocator_allocate_buffer( |
| iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type, |
| iree_hal_buffer_usage_t buffer_usage, iree_host_size_t allocation_size, |
| iree_hal_buffer_t** out_buffer); |
| |
| // Wraps an existing host allocation in a buffer. |
| // Ownership of the allocation remains with the caller and the memory must |
| // remain valid for so long as the buffer may be in use. |
| // |
| // Fails if the allocator cannot access host memory in this way. |
| // |out_buffer| must be released by the caller. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_allocator_wrap_buffer( |
| iree_hal_allocator_t* allocator, iree_hal_memory_type_t memory_type, |
| iree_hal_memory_access_t allowed_access, |
| iree_hal_buffer_usage_t buffer_usage, iree_byte_span_t data, |
| iree_hal_buffer_t** out_buffer); |
| |
| //===----------------------------------------------------------------------===// |
| // iree::hal::Buffer |
| //===----------------------------------------------------------------------===// |
| |
| // Returns a reference to a subspan of the |buffer|. |
| // If |byte_length| is IREE_WHOLE_BUFFER the remaining bytes in the buffer after |
| // |byte_offset| (possibly 0) will be selected. |
| // |
| // The parent buffer will remain alive for the lifetime of the subspan |
| // returned. If the subspan is a small portion this may cause additional |
| // memory to remain allocated longer than required. |
| // |
| // Returns the given |buffer| if the requested span covers the entire range. |
| // |out_buffer| must be released by the caller. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_buffer_subspan( |
| iree_hal_buffer_t* buffer, iree_device_size_t byte_offset, |
| iree_device_size_t byte_length, iree_allocator_t allocator, |
| iree_hal_buffer_t** out_buffer); |
| |
| // Retains the given |buffer| for the caller. |
| IREE_API_EXPORT void IREE_API_CALL |
| iree_hal_buffer_retain(iree_hal_buffer_t* buffer); |
| |
| // Releases the given |buffer| from the caller. |
| IREE_API_EXPORT void IREE_API_CALL |
| iree_hal_buffer_release(iree_hal_buffer_t* buffer); |
| |
| // Returns the allocator this buffer was allocated from. |
| IREE_API_EXPORT iree_hal_allocator_t* IREE_API_CALL |
| iree_hal_buffer_allocator(const iree_hal_buffer_t* buffer); |
| |
| // Returns the size in bytes of the buffer. |
| IREE_API_EXPORT iree_device_size_t IREE_API_CALL |
| iree_hal_buffer_byte_length(const iree_hal_buffer_t* buffer); |
| |
| // Sets a range of the buffer to binary zero. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_buffer_zero(iree_hal_buffer_t* buffer, iree_device_size_t byte_offset, |
| iree_device_size_t byte_length); |
| |
| // Sets a range of the buffer to the given value. |
| // Only |pattern_length| values with 1, 2, or 4 bytes are supported. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_buffer_fill(iree_hal_buffer_t* buffer, iree_device_size_t byte_offset, |
| iree_device_size_t byte_length, const void* pattern, |
| iree_host_size_t pattern_length); |
| |
| // Reads a block of data from the buffer at the given offset. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_buffer_read_data( |
| iree_hal_buffer_t* buffer, iree_device_size_t source_offset, |
| void* target_buffer, iree_device_size_t data_length); |
| |
| // Writes a block of byte data into the buffer at the given offset. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_buffer_write_data( |
| iree_hal_buffer_t* buffer, iree_device_size_t target_offset, |
| const void* source_buffer, iree_device_size_t data_length); |
| |
| // Copies data from the provided |source_buffer| into the |target_buffer|. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_buffer_copy_data( |
| iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset, |
| iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset, |
| iree_device_size_t data_length); |
| |
| // Maps the buffer to be accessed as a host pointer into |out_mapped_memory|. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_buffer_map( |
| iree_hal_buffer_t* buffer, iree_hal_memory_access_t memory_access, |
| iree_device_size_t byte_offset, iree_device_size_t byte_length, |
| iree_hal_mapped_memory_t* out_mapped_memory); |
| |
| // Unmaps the buffer as was previously mapped to |mapped_memory|. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_buffer_unmap( |
| iree_hal_buffer_t* buffer, iree_hal_mapped_memory_t* mapped_memory); |
| |
| //===----------------------------------------------------------------------===// |
| // iree::hal::HeapBuffer |
| //===----------------------------------------------------------------------===// |
| |
| // Allocates a zeroed host heap buffer of the given size. |
| // The buffer contents will be allocated with |contents_allocator| while |
| // |allocator| is used for the iree_hal_buffer_t. |
| // |
| // Returns a buffer allocated with malloc that may not be usable by devices |
| // without copies. |memory_type| should be set to |
| // IREE_HAL_MEMORY_TYPE_HOST_LOCAL in most cases. |
| // |out_buffer| must be released by the caller. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_heap_buffer_allocate( |
| iree_hal_memory_type_t memory_type, iree_hal_buffer_usage_t usage, |
| iree_host_size_t allocation_size, iree_allocator_t contents_allocator, |
| iree_allocator_t allocator, iree_hal_buffer_t** out_buffer); |
| |
| // Allocates a host heap buffer with a copy of the given data. |
| // The buffer contents will be allocated with |contents_allocator| while |
| // |allocator| is used for the iree_hal_buffer_t. |
| // |
| // Returns a buffer allocated with malloc that may not be usable by devices |
| // without copies. |memory_type| should be set to |
| // IREE_HAL_MEMORY_TYPE_HOST_LOCAL in most cases. |
| // |out_buffer| must be released by the caller. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_heap_buffer_allocate_copy( |
| iree_hal_memory_type_t memory_type, iree_hal_buffer_usage_t usage, |
| iree_hal_memory_access_t allowed_access, iree_byte_span_t contents, |
| iree_allocator_t contents_allocator, iree_allocator_t allocator, |
| iree_hal_buffer_t** out_buffer); |
| |
| // Wraps an existing host heap allocation in a buffer. |
| // Ownership of the host allocation remains with the caller and the memory |
| // must remain valid for so long as the iree_hal_buffer_t may be in use. |
| // |
| // Returns a buffer allocated with malloc that may not be usable by devices |
| // without copies. |memory_type| should be set to |
| // IREE_HAL_MEMORY_TYPE_HOST_LOCAL in most cases. |
| // |out_buffer| must be released by the caller. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_heap_buffer_wrap( |
| iree_hal_memory_type_t memory_type, iree_hal_memory_access_t allowed_access, |
| iree_hal_buffer_usage_t usage, iree_byte_span_t contents, |
| iree_allocator_t allocator, iree_hal_buffer_t** out_buffer); |
| |
| // TODO(benvanik): add a wrap that takes an allocator just for the buffer. |
| |
| //===----------------------------------------------------------------------===// |
| // iree::hal::BufferView |
| //===----------------------------------------------------------------------===// |
| |
| // Creates a buffer view with the given |buffer|. |
| // |out_buffer_view| must be released by the caller. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_buffer_view_create( |
| iree_hal_buffer_t* buffer, const iree_hal_dim_t* shape, |
| iree_host_size_t shape_rank, iree_hal_element_type_t element_type, |
| iree_allocator_t allocator, iree_hal_buffer_view_t** out_buffer_view); |
| |
| // Creates a buffer view referencing a subview of the given |buffer_view|. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_buffer_view_subview( |
| const iree_hal_buffer_view_t* buffer_view, |
| const iree_hal_dim_t* start_indices, iree_host_size_t indices_count, |
| const iree_hal_dim_t* lengths, iree_host_size_t lengths_count, |
| iree_allocator_t allocator, iree_hal_buffer_view_t** out_buffer_view); |
| |
| // Retains the given |buffer_view| for the caller. |
| IREE_API_EXPORT void IREE_API_CALL |
| iree_hal_buffer_view_retain(iree_hal_buffer_view_t* buffer_view); |
| |
| // Releases the given |buffer_view| from the caller. |
| IREE_API_EXPORT void IREE_API_CALL |
| iree_hal_buffer_view_release(iree_hal_buffer_view_t* buffer_view); |
| |
| // Returns the buffer underlying the buffer view. |
| // The caller must retain the returned buffer if they want to continue using it. |
| IREE_API_EXPORT iree_hal_buffer_t* IREE_API_CALL |
| iree_hal_buffer_view_buffer(const iree_hal_buffer_view_t* buffer_view); |
| |
| // Returns the rank of the shape associated with the buffer view. |
| IREE_API_EXPORT iree_host_size_t IREE_API_CALL |
| iree_hal_buffer_view_shape_rank(const iree_hal_buffer_view_t* buffer_view); |
| |
| // Returns the value of the given dimension. |
| IREE_API_EXPORT iree_host_size_t IREE_API_CALL iree_hal_buffer_view_shape_dim( |
| const iree_hal_buffer_view_t* buffer_view, iree_host_size_t index); |
| |
| // Returns the dimensions of the shape in |out_shape| and its rank in |
| // |out_shape_rank|. |rank_capacity| indicates the number of dimensions |
| // available in the |out_shape| buffer. If there is not enough capacity to store |
| // all of the dimensions IREE_STATUS_OUT_OF_RANGE is returned. |
| // |out_shape_rank| can be omitted if the rank is already known. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_buffer_view_shape( |
| const iree_hal_buffer_view_t* buffer_view, iree_host_size_t rank_capacity, |
| iree_hal_dim_t* out_shape, iree_host_size_t* out_shape_rank); |
| |
| // Returns the total number of elements stored in the view. |
| IREE_API_EXPORT iree_host_size_t |
| iree_hal_buffer_view_element_count(const iree_hal_buffer_view_t* buffer_view); |
| |
| // Returns the element type of the buffer. |
| IREE_API_EXPORT iree_hal_element_type_t IREE_API_CALL |
| iree_hal_buffer_view_element_type(const iree_hal_buffer_view_t* buffer_view); |
| |
| // Returns the size of each element in the buffer view in bytes. |
| // Note that not all buffers are contiguous or densely packed. |
| IREE_API_EXPORT iree_host_size_t IREE_API_CALL |
| iree_hal_buffer_view_element_size(const iree_hal_buffer_view_t* buffer_view); |
| |
| // Returns the total size of the specified view in bytes. |
| // Note that not all buffers are contiguous or densely packed. |
| IREE_API_EXPORT iree_device_size_t IREE_API_CALL |
| iree_hal_buffer_view_byte_length(const iree_hal_buffer_view_t* buffer_view); |
| |
| // Calculates a byte offset into the |buffer_view| at the given indices. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_buffer_view_compute_offset( |
| const iree_hal_buffer_view_t* buffer_view, const iree_hal_dim_t* indices, |
| iree_host_size_t indices_count, iree_device_size_t* out_offset); |
| |
| // Calculates a byte range into the |buffer_view| of the given contiguous range. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_buffer_view_compute_range( |
| const iree_hal_buffer_view_t* buffer_view, |
| const iree_hal_dim_t* start_indices, iree_host_size_t indices_count, |
| const iree_hal_dim_t* lengths, iree_host_size_t lengths_count, |
| iree_device_size_t* out_start_offset, iree_device_size_t* out_length); |
| |
| // Parses a serialized set of buffer elements in the canonical tensor format |
| // (the same as produced by iree_hal_buffer_view_format). The underlying buffer |
| // will be allocated with |buffer_allocator| as a host-local/device-visible |
| // buffer. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_buffer_view_parse( |
| iree_string_view_t value, iree_hal_allocator_t* buffer_allocator, |
| iree_allocator_t allocator, iree_hal_buffer_view_t** out_buffer_view); |
| |
| // Converts buffer view elements into a fully-specified string-form format like |
| // `2x4xi16=[[1 2][3 4]]`. |
| // |
| // |max_element_count| can be used to limit the total number of elements printed |
| // when the count may be large. Elided elements will be replaced with `...`. |
| // |
| // |buffer_capacity| defines the size of |buffer| in bytes and |
| // |out_buffer_length| will return the string length in characters. Returns |
| // IREE_STATUS_OUT_OF_RANGE if the buffer capacity is insufficient to hold the |
| // formatted elements and |out_buffer_length| will contain the required size. |
| // |
| // Follows the standard API string formatting rules. See iree/base/api.h. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_buffer_view_format( |
| const iree_hal_buffer_view_t* buffer_view, |
| iree_host_size_t max_element_count, iree_host_size_t buffer_capacity, |
| char* buffer, iree_host_size_t* out_buffer_length); |
| |
| //===----------------------------------------------------------------------===// |
| // iree::hal::CommandBuffer |
| //===----------------------------------------------------------------------===// |
| |
| // Creates a command buffer ready to begin recording, possibly reusing an |
| // existing one from the |device| pool. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_command_buffer_create( |
| iree_hal_device_t* device, iree_hal_command_buffer_mode_t mode, |
| iree_hal_command_category_t command_categories, iree_allocator_t allocator, |
| iree_hal_command_buffer_t** out_command_buffer); |
| |
| // Retains the given |command_buffer| for the caller. |
| IREE_API_EXPORT void IREE_API_CALL |
| iree_hal_command_buffer_retain(iree_hal_command_buffer_t* command_buffer); |
| |
| // Releases the given |command_buffer| from the caller. |
| IREE_API_EXPORT void IREE_API_CALL |
| iree_hal_command_buffer_release(iree_hal_command_buffer_t* command_buffer); |
| |
| // Resets and begins recording into the command buffer, clearing all |
| // previously recorded contents. |
| // The command buffer must not be in-flight. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_command_buffer_begin(iree_hal_command_buffer_t* command_buffer); |
| |
| // Ends recording into the command buffer. |
| // This must be called prior to submitting the command buffer for execution. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_command_buffer_end(iree_hal_command_buffer_t* command_buffer); |
| |
| // Defines a memory dependency between commands recorded before and after the |
| // barrier. One or more memory or buffer barriers can be specified to indicate |
| // between which stages or buffers the dependencies exist. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_command_buffer_execution_barrier( |
| iree_hal_command_buffer_t* command_buffer, |
| iree_hal_execution_stage_t source_stage_mask, |
| iree_hal_execution_stage_t target_stage_mask, |
| iree_host_size_t memory_barrier_count, |
| const iree_hal_memory_barrier_t* memory_barriers, |
| iree_host_size_t buffer_barrier_count, |
| const iree_hal_buffer_barrier_t* buffer_barriers); |
| |
| // Fills the target buffer with the given repeating value. |
| // Expects that |pattern_length| is one of 1, 2, or 4 and that the offset and |
| // length are aligned to the natural alignment of the value. |
| // The target buffer must be compatible with the devices owned by this |
| // device queue and be allocated with IREE_HAL_BUFFER_USAGE_TRANSFER. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_command_buffer_fill_buffer( |
| iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_t* target_buffer, |
| iree_device_size_t target_offset, iree_device_size_t length, |
| const void* pattern, iree_host_size_t pattern_length); |
| |
| // Updates a range of the given target buffer from the source host memory. |
| // The source host memory is copied immediately into the command buffer and |
| // occupies command buffer space. It is strongly recommended that large buffer |
| // updates are performed via iree_hal_command_buffer_copy_buffer where there is |
| // the possibility of a zero-copy path. |
| // The |source_buffer| may be releaed by the caller immediately after this |
| // call returns. |
| // The |target_buffer| must be compatible with the devices owned by this |
| // device queue and be allocated with IREE_HAL_BUFFER_USAGE_TRANSFER. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_command_buffer_update_buffer(iree_hal_command_buffer_t* command_buffer, |
| const void* source_buffer, |
| iree_host_size_t source_offset, |
| iree_hal_buffer_t* target_buffer, |
| iree_device_size_t target_offset, |
| iree_device_size_t length); |
| |
| // Copies a range of one buffer to another. |
| // Both buffers must be compatible with the devices owned by this device |
| // queue and be allocated with IREE_HAL_BUFFER_USAGE_TRANSFER. Though the source |
| // and target buffer may be the same the ranges must not overlap (as with |
| // memcpy). |
| // |
| // This can be used to perform device->host, host->device, and device->device |
| // copies. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_command_buffer_copy_buffer( |
| iree_hal_command_buffer_t* command_buffer, iree_hal_buffer_t* source_buffer, |
| iree_device_size_t source_offset, iree_hal_buffer_t* target_buffer, |
| iree_device_size_t target_offset, iree_device_size_t length); |
| |
| // Pushes an inline set of constants that can be accessed by subsequent |
| // dispatches using a compatible executable layout. |
| // |
| // Push constants are always 4-byte values and treated as opaque, meaning that |
| // they may be bit-casted floats, bit-packed booleans, etc. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_command_buffer_push_constants( |
| iree_hal_command_buffer_t* command_buffer, |
| iree_hal_executable_layout_t* executable_layout, iree_host_size_t offset, |
| const void* values, iree_host_size_t values_length); |
| |
| // Pushes a descriptor set and associates it with |set|. |
| // This uses an internal ringbuffer inside of the command buffer to avoid the |
| // need for creating and binding descriptor sets and managing their lifetime. |
| // |
| // The descriptor set will remain bound and valid so long as the executable |
| // layouts used by dispatches are compatible (same descriptor layouts and push |
| // constant sizes). |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_command_buffer_push_descriptor_set( |
| iree_hal_command_buffer_t* command_buffer, |
| iree_hal_executable_layout_t* executable_layout, int32_t set, |
| iree_host_size_t binding_count, |
| const iree_hal_descriptor_set_binding_t* bindings); |
| |
| // Binds a descriptor set to the given |set| matching that used in the |
| // executable layout interface. |
| // |
| // The descriptor set will remain bound and valid so long as the executable |
| // layouts used by dispatches are compatible (same descriptor layouts and push |
| // constant sizes). |
| // |
| // If any dynamic descriptor types are defined in the descriptor set layout then |
| // the dynamic offsets must be provided. These offsets will be added to the base |
| // offset of the descriptor layout binding. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_command_buffer_bind_descriptor_set( |
| iree_hal_command_buffer_t* command_buffer, |
| iree_hal_executable_layout_t* executable_layout, int32_t set, |
| iree_hal_descriptor_set_t* descriptor_set, |
| iree_host_size_t dynamic_offset_count, |
| const iree_device_size_t* dynamic_offsets); |
| |
| // Dispatches an execution request. |
| // The request may execute overlapped with any other transfer operation or |
| // dispatch made within the same barrier-defined sequence. |
| // |
| // The executable specified must be registered for use with the device driver |
| // owning this queue. It must not be unregistered until all requests that use |
| // it have completed. |
| // |
| // Fails if the queue does not support dispatch operations (as indicated by |
| // can_dispatch). |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_command_buffer_dispatch( |
| iree_hal_command_buffer_t* command_buffer, |
| iree_hal_executable_t* executable, int32_t entry_point, |
| uint32_t workgroup_x, uint32_t workgroup_y, uint32_t workgroup_z); |
| |
| // Dispatches an execution request with deferred workgroup counts. |
| // This is the same as iree_hal_command_buffer_dispatch but the workgroup counts |
| // are read from the given |workgroups_buffer| at offset |workgroups_offset| as |
| // 3 uint32_t XYZ values before performing the dispatch. This allows prior |
| // dispatches within the command sequence to populate the workgroup counts. |
| // |
| // The buffer must have been allocated with IREE_HAL_BUFFER_USAGE_DISPATCH and |
| // be of IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_command_buffer_dispatch_indirect( |
| iree_hal_command_buffer_t* command_buffer, |
| iree_hal_executable_t* executable, int32_t entry_point, |
| iree_hal_buffer_t* workgroups_buffer, iree_device_size_t workgroups_offset); |
| |
| //===----------------------------------------------------------------------===// |
| // iree::hal::DescriptorSet |
| //===----------------------------------------------------------------------===// |
| |
| // Creates a descriptor set of the given layout and bindings. |
| // Descriptor sets are immutable and retain their bindings. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_descriptor_set_create( |
| iree_hal_device_t* device, iree_hal_descriptor_set_layout_t* set_layout, |
| iree_host_size_t binding_count, |
| const iree_hal_descriptor_set_binding_t* bindings, |
| iree_allocator_t allocator, iree_hal_descriptor_set_t** out_descriptor_set); |
| |
| // Retains the given |set| for the caller. |
| IREE_API_EXPORT void IREE_API_CALL |
| iree_hal_descriptor_set_retain(iree_hal_descriptor_set_t* descriptor_set); |
| |
| // Releases the given |set| from the caller. |
| IREE_API_EXPORT void IREE_API_CALL |
| iree_hal_descriptor_set_release(iree_hal_descriptor_set_t* descriptor_set); |
| |
| //===----------------------------------------------------------------------===// |
| // iree::hal::DescriptorSetLayout |
| //===----------------------------------------------------------------------===// |
| |
| // Creates a descriptor set layout with the given bindings. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_descriptor_set_layout_create( |
| iree_hal_device_t* device, |
| iree_hal_descriptor_set_layout_usage_type_t usage_type, |
| iree_host_size_t binding_count, |
| const iree_hal_descriptor_set_layout_binding_t* bindings, |
| iree_allocator_t allocator, |
| iree_hal_descriptor_set_layout_t** out_descriptor_set_layout); |
| |
| // Retains the given |descriptor_set_layout| for the caller. |
| IREE_API_EXPORT void IREE_API_CALL iree_hal_descriptor_set_layout_retain( |
| iree_hal_descriptor_set_layout_t* descriptor_set_layout); |
| |
| // Releases the given |descriptor_set_layout| from the caller. |
| IREE_API_EXPORT void IREE_API_CALL iree_hal_descriptor_set_layout_release( |
| iree_hal_descriptor_set_layout_t* descriptor_set_layout); |
| |
| //===----------------------------------------------------------------------===// |
| // iree::hal::Device |
| //===----------------------------------------------------------------------===// |
| |
| // Retains the given |device| for the caller. |
| IREE_API_EXPORT void IREE_API_CALL |
| iree_hal_device_retain(iree_hal_device_t* device); |
| |
| // Releases the given |device| from the caller. |
| IREE_API_EXPORT void IREE_API_CALL |
| iree_hal_device_release(iree_hal_device_t* device); |
| |
| // Returns a reference to the allocator of the device that can be used for |
| // allocating buffers. |
| IREE_API_EXPORT iree_hal_allocator_t* IREE_API_CALL |
| iree_hal_device_allocator(iree_hal_device_t* device); |
| |
| // Returns the device identifier. |
| // This identifier may vary based on the runtime device type; for example, a |
| // Vulkan device may return `vulkan-v1.1` or `vulkan-v1.2-spec1`. |
| IREE_API_EXPORT iree_string_view_t IREE_API_CALL |
| iree_hal_device_id(iree_hal_device_t* device); |
| |
| // Submits one or more batches of work to a device queue. |
| // |
| // The queue is selected based on the flags set in |command_categories| and the |
| // |queue_affinity|. As the number of available queues can vary the |
| // |queue_affinity| is used to hash into the available queues for the required |
| // categories. For example if 2 queues support transfer commands and the |
| // affinity is 5 the resulting queue could be index hash(5)=1. The affinity can |
| // thus be treated as just a way to indicate whether two submissions must be |
| // placed on to the same queue. Note that the exact hashing function is |
| // implementation dependent. |
| // |
| // The submission behavior matches Vulkan's vkQueueSubmit, with each batch |
| // executing its command buffers in the order they are defined but allowing the |
| // command buffers to complete out-of-order. See: |
| // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/vkQueueSubmit.html |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_device_queue_submit( |
| iree_hal_device_t* device, iree_hal_command_category_t command_categories, |
| uint64_t queue_affinity, iree_host_size_t batch_count, |
| const iree_hal_submission_batch_t* batches); |
| |
| // Blocks the caller until the semaphores reach or exceed the specified payload |
| // values or the |deadline_ns| elapses. All semaphores in |semaphore_list| must |
| // be created from this device (or be imported into it). |
| // |
| // |wait_mode| can be used to decide when the wait will proceed; whether *all* |
| // semaphores in |semaphore_list| must be signaled or whether *any* (one or |
| // more) can be signaled before an early return. |
| // |
| // Returns success if the wait is successful and semaphores have been signaled |
| // satisfying the |wait_mode|. |
| // |
| // Returns DEADLINE_EXCEEDED if the |deadline_ns| elapses without the |
| // |wait_mode| being satisfied. Note that even on success only a subset of the |
| // semaphores may have been signaled and each can be queried to see which ones. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_device_wait_semaphores_with_deadline( |
| iree_hal_device_t* device, iree_hal_wait_mode_t wait_mode, |
| const iree_hal_semaphore_list_t* semaphore_list, iree_time_t deadline_ns); |
| |
| // Blocks the caller until the semaphores reach or exceed the specified payload |
| // values or the |timeout_ns| elapses. |
| // A relative-time version of iree_hal_device_wait_semaphores_with_deadline |
| // using the relative nanoseconds from the time the call is made. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_device_wait_semaphores_with_timeout( |
| iree_hal_device_t* device, iree_hal_wait_mode_t wait_mode, |
| const iree_hal_semaphore_list_t* semaphore_list, |
| iree_duration_t timeout_ns); |
| |
| //===----------------------------------------------------------------------===// |
| // iree::hal::Driver |
| //===----------------------------------------------------------------------===// |
| |
| // Retains the given |driver| for the caller. |
| IREE_API_EXPORT void IREE_API_CALL |
| iree_hal_driver_retain(iree_hal_driver_t* driver); |
| |
| // Releases the given |driver| from the caller. |
| IREE_API_EXPORT void IREE_API_CALL |
| iree_hal_driver_release(iree_hal_driver_t* driver); |
| |
| // Queries available devices and returns them as a list. |
| // The provided |allocator| will be used to allocate the returned list and after |
| // the caller is done with it |out_device_infos| must be freed with that same |
| // allocator by the caller. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_driver_query_available_devices( |
| iree_hal_driver_t* driver, iree_allocator_t allocator, |
| iree_hal_device_info_t** out_device_infos, |
| iree_host_size_t* out_device_info_count); |
| |
| // Creates a device as queried with iree_hal_driver_query_available_devices. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_driver_create_device( |
| iree_hal_driver_t* driver, iree_hal_device_id_t device_id, |
| iree_allocator_t allocator, iree_hal_device_t** out_device); |
| |
| // Creates the driver-defined "default" device. This may simply be the first |
| // device enumerated. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_driver_create_default_device(iree_hal_driver_t* driver, |
| iree_allocator_t allocator, |
| iree_hal_device_t** out_device); |
| |
| //===----------------------------------------------------------------------===// |
| // iree_hal_driver_registry_t |
| //===----------------------------------------------------------------------===// |
| |
| // Factory interface used for driver enumeration and creation. |
| // The factory is designed to in many cases live in rodata by not requiring any |
| // real code or processing when the driver is statically known to be available. |
| // When drivers may be dynamically available based on system configuration a |
| // factory can discover them and provide them during enumeration. |
| // |
| // Delay-loaded drivers that may require non-trivial setup time (such as those |
| // implemented in dynamic libraries or over RPC) can be speculatively enumerated |
| // by a factory and then rely on the try_create to actually perform the slow |
| // work once the user has explicitly signaled that they are willing to pay the |
| // cost (and deal with the consequences). |
| // |
| // WARNING: this API is unstable until the HAL is fully ported. Do not use. |
| typedef struct { |
| // TODO(benvanik): version field. |
| IREE_API_UNSTABLE |
| |
| // User-defined pointer passed to all functions. |
| void* self; |
| |
| // Queries the list of available drivers provided by the factory, if any. |
| // |out_driver_infos| will be populated with a *reference* to factory data |
| // structures (such as the driver name) that callers may choose to clone if |
| // needed. |
| // |
| // Implementers must make their factory enumeration results immutable for the |
| // duration they are registered, though the behavior of try_create is allowed |
| // to change call-to-call. If a factory needs to mutate its set of enumerated |
| // devices then it must do so by first unregistering itself and re-registering |
| // only after the changes have been made. |
| // |
| // Called with the driver registry lock held; may be called from any thread. |
| iree_status_t(IREE_API_PTR* enumerate)( |
| void* self, const iree_hal_driver_info_t** out_driver_infos, |
| iree_host_size_t* out_driver_info_count); |
| |
| // Tries to create a driver as previously queried with enumerate. |
| // |driver_id| is the opaque ID returned from enumeration; note that there may |
| // be a significant amount of time between enumeration and creation and the |
| // driver registry lock may have been release between then. |
| // |
| // Delay-loaded drivers may still fail here if - for example - required system |
| // resources are unavailable or permission is denied. |
| // |
| // Called with the driver registry lock held; may be called from any thread. |
| iree_status_t(IREE_API_PTR* try_create)(void* self, |
| iree_hal_driver_id_t driver_id, |
| iree_allocator_t allocator, |
| iree_hal_driver_t** out_driver); |
| } iree_hal_driver_factory_t; |
| |
| // Returns the default per-process driver registry. |
| // In simple applications this is usually where you want to go to register and |
| // create drivers. More sophisticated applications that want tighter control |
| // over the visibility of drivers to certain callers such as when dealing with |
| // requests from multiple users may choose to allocate their own registries and |
| // manage their lifetime as desired. |
| // |
| // TODO(benvanik): remove global registry and make callers manage always. We can |
| // provide helpers to make that easier to do, but there's really no benefit to |
| // having this be global like it is. Alternatively, this can be opt-in thanks to |
| // LTO: if a user doesn't call this then the default registry is never |
| // allocated. |
| IREE_API_EXPORT iree_hal_driver_registry_t* IREE_API_CALL |
| iree_hal_driver_registry_default(); |
| |
| // Registers a driver factory to serve future queries/requests for drivers. |
| // See iree_hal_driver_registry_t for more information. |
| // |
| // Thread-safe. The factory is not retained and must be kept alive by the caller |
| // until it is unregistered (or the application terminates). |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_driver_registry_register_factory( |
| iree_hal_driver_registry_t* registry, |
| const iree_hal_driver_factory_t* factory); |
| |
| // Unregisters a driver factory. |
| // Unregistering a factory only prevents new drivers from being created; |
| // existing drivers may remain live even after unregistering. Factories can |
| // expect that no new drivers will be created via the factory after the call |
| // returns. |
| // |
| // Thread-safe. As the factory is not retained by the registry the caller must |
| // release its memory (if needed) after this call returns. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_driver_registry_unregister_factory( |
| iree_hal_driver_registry_t* registry, |
| const iree_hal_driver_factory_t* factory); |
| |
| // Enumerates all drivers from registered factories and returns them as a list. |
| // The provided |allocator| will be used to allocate the returned list and after |
| // the caller is done with it |out_driver_infos| must be freed with that same |
| // allocator by the caller. |
| // |
| // The set of drivers returned should be considered the superset of those that |
| // may be available for successful creation as it's possible that delay-loaded |
| // drivers may fail even if they appear in this list. |
| // |
| // Thread-safe. Note that the factory may be unregistered between the query |
| // completing and any attempt to instantiate the driver. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_driver_registry_enumerate( |
| iree_hal_driver_registry_t* registry, iree_allocator_t allocator, |
| iree_hal_driver_info_t** out_driver_infos, |
| iree_host_size_t* out_driver_info_count); |
| |
| // Attempts to create a driver registered with the driver registry by a specific |
| // ID as returned during enumeration in iree_hal_driver_info_t::driver_id. |
| // This can be used to specify the exact driver to create in cases where there |
| // may be multiple factories providing drivers with the same name. |
| // |
| // Thread-safe. May block the caller if the driver is delay-loaded and needs to |
| // perform additional loading/verification/etc before returning. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_driver_registry_try_create( |
| iree_hal_driver_registry_t* registry, iree_hal_driver_id_t driver_id, |
| iree_allocator_t allocator, iree_hal_driver_t** out_driver); |
| |
| // Attempts to create a driver registered with the given canonical driver name. |
| // Effectively enumerate + find by name + try_create if found. Factories are |
| // searched in most-recently-added order such that it's possible to override |
| // drivers with newer registrations when multiple factories provide the same |
| // driver name. |
| // |
| // Thread-safe. May block the caller if the driver is delay-loaded and needs to |
| // perform additional loading/verification/etc before returning. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_driver_registry_try_create_by_name( |
| iree_hal_driver_registry_t* registry, iree_string_view_t driver_name, |
| iree_allocator_t allocator, iree_hal_driver_t** out_driver); |
| |
| //===----------------------------------------------------------------------===// |
| // iree::hal::Executable |
| //===----------------------------------------------------------------------===// |
| |
| // Retains the given |executable| for the caller. |
| IREE_API_EXPORT void IREE_API_CALL |
| iree_hal_executable_retain(iree_hal_executable_t* executable); |
| |
| // Releases the given |executable| from the caller. |
| IREE_API_EXPORT void IREE_API_CALL |
| iree_hal_executable_release(iree_hal_executable_t* executable); |
| |
| //===----------------------------------------------------------------------===// |
| // iree::hal::ExecutableCache |
| //===----------------------------------------------------------------------===// |
| |
| // Creates an executable cache using the given identifier. |
| // The identifier is provided to the backing cache API as way to partition |
| // caches between different groups of executables (from different modules, etc). |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_executable_cache_create( |
| iree_hal_device_t* device, iree_string_view_t identifier, |
| iree_allocator_t allocator, |
| iree_hal_executable_cache_t** out_executable_cache); |
| |
| // Retains the given |executable_cache| for the caller. |
| IREE_API_EXPORT void IREE_API_CALL |
| iree_hal_executable_cache_retain(iree_hal_executable_cache_t* executable_cache); |
| |
| // Releases the given |executable_cache| from the caller. |
| IREE_API_EXPORT void IREE_API_CALL iree_hal_executable_cache_release( |
| iree_hal_executable_cache_t* executable_cache); |
| |
| // Returns true if the executable cache can prepare the given executable input |
| // format. Preparation may still fail if the particular version or features |
| // required by the executable are not supported. |
| IREE_API_EXPORT bool IREE_API_CALL iree_hal_executable_cache_can_prepare_format( |
| iree_hal_executable_cache_t* executable_cache, |
| iree_hal_executable_format_t format); |
| |
| // Prepares an executable for use. |
| // The provided |executable_data| will be used to either lookup a previously |
| // prepared executable in the cache or prepare a new one. |
| // |
| // Depending on the driver preparation may take a non-trivial amount of time |
| // (such as when JITing/etc). As the cache is internally synchronized callers |
| // can issue preparation requests from multiple threads - even for the same |
| // executables - and calls will block until preparation completes. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_executable_cache_prepare_executable( |
| iree_hal_executable_cache_t* executable_cache, |
| iree_hal_executable_layout_t* executable_layout, |
| iree_hal_executable_caching_mode_t caching_mode, |
| iree_const_byte_span_t executable_data, iree_allocator_t allocator, |
| iree_hal_executable_t** out_executable); |
| |
| //===----------------------------------------------------------------------===// |
| // iree::hal::ExecutableLayout |
| //===----------------------------------------------------------------------===// |
| |
| // Creates an executable layout composed of the given descriptor set layouts. |
| // The returned executable layout can be used by multiple executables with the |
| // same compatible resource binding layouts. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_executable_layout_create( |
| iree_hal_device_t* device, iree_host_size_t set_layout_count, |
| iree_hal_descriptor_set_layout_t** set_layouts, |
| iree_host_size_t push_constants, iree_allocator_t allocator, |
| iree_hal_executable_layout_t** out_executable_layout); |
| |
| // Retains the given |executable_layout| for the caller. |
| IREE_API_EXPORT void IREE_API_CALL iree_hal_executable_layout_retain( |
| iree_hal_executable_layout_t* executable_layout); |
| |
| // Releases the given |executable_layout| from the caller. |
| IREE_API_EXPORT void IREE_API_CALL iree_hal_executable_layout_release( |
| iree_hal_executable_layout_t* executable_layout); |
| |
| //===----------------------------------------------------------------------===// |
| // iree::hal::Semaphore |
| //===----------------------------------------------------------------------===// |
| |
| // Creates a semaphore that can be used with command queues owned by this |
| // device. To use the semaphores with other devices or instances they must |
| // first be exported. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL iree_hal_semaphore_create( |
| iree_hal_device_t* device, uint64_t initial_value, |
| iree_allocator_t allocator, iree_hal_semaphore_t** out_semaphore); |
| |
| // Retains the given |semaphore| for the caller. |
| IREE_API_EXPORT void IREE_API_CALL |
| iree_hal_semaphore_retain(iree_hal_semaphore_t* semaphore); |
| |
| // Releases the given |semaphore| from the caller. |
| IREE_API_EXPORT void IREE_API_CALL |
| iree_hal_semaphore_release(iree_hal_semaphore_t* semaphore); |
| |
| // Queries the current payload of the semaphore and stores the result in |
| // |out_value|. As the payload is monotonically increasing it is guaranteed that |
| // the value is at least equal to the previous result of a |
| // iree_hal_semaphore_query call and coherent with any waits for a |
| // specified value via iree_device_wait_all_semaphores. |
| // |
| // Returns the status at the time the method is called without blocking and as |
| // such is only valid after a semaphore has been signaled. The same failure |
| // status will be returned regardless of when in the timeline the error |
| // occurred. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_semaphore_query(iree_hal_semaphore_t* semaphore, uint64_t* out_value); |
| |
| // Signals the |semaphore| to the given payload value. |
| // The call is ignored if the current payload value exceeds |new_value|. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_semaphore_signal(iree_hal_semaphore_t* semaphore, uint64_t new_value); |
| |
| // Signals the |semaphore| with a failure. The |status| will be returned from |
| // iree_hal_semaphore_query and iree_hal_semaphore_signal for the lifetime |
| // of the semaphore. |
| IREE_API_EXPORT void IREE_API_CALL |
| iree_hal_semaphore_fail(iree_hal_semaphore_t* semaphore, iree_status_t status); |
| |
| // Blocks the caller until the semaphore reaches or exceedes the specified |
| // payload value or the |deadline_ns| elapses. |
| // |
| // Returns success if the wait is successful and the semaphore has met or |
| // exceeded the required payload value. |
| // |
| // Returns DEADLINE_EXCEEDED if the |deadline_ns| elapses without the semaphore |
| // reaching the required value. If an asynchronous failure occured this will |
| // return the failure status that was set immediately. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_semaphore_wait_with_deadline(iree_hal_semaphore_t* semaphore, |
| uint64_t value, iree_time_t deadline_ns); |
| |
| // Blocks the caller until the semaphore reaches or exceedes the specified |
| // payload value or the |timeout_ns| elapses. |
| // A relative-time version of iree_hal_semaphore_wait_with_deadline using the |
| // relative nanoseconds from the time the call is made. |
| IREE_API_EXPORT iree_status_t IREE_API_CALL |
| iree_hal_semaphore_wait_with_timeout(iree_hal_semaphore_t* semaphore, |
| uint64_t value, |
| iree_duration_t timeout_ns); |
| |
| #ifdef __cplusplus |
| } // extern "C" |
| #endif // __cplusplus |
| |
| #endif // IREE_HAL_API_H_ |