| // Copyright 2020 The IREE Authors |
| // |
| // Licensed under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| |
| #ifndef IREE_HAL_BUFFER_H_ |
| #define IREE_HAL_BUFFER_H_ |
| |
| #include <stdbool.h> |
| #include <stdint.h> |
| |
| #include "iree/base/api.h" |
| #include "iree/hal/queue.h" |
| #include "iree/hal/resource.h" |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif // __cplusplus |
| |
| typedef struct iree_hal_allocator_t iree_hal_allocator_t; |
| typedef struct iree_hal_device_t iree_hal_device_t; |
| |
| //===----------------------------------------------------------------------===// |
| // Types and Enums |
| //===----------------------------------------------------------------------===// |
| |
| // Whole length of the underlying buffer. |
| #define IREE_HAL_WHOLE_BUFFER ((iree_device_size_t)(-1)) |
| |
| // A bitfield specifying properties for a memory type. |
| enum iree_hal_memory_type_bits_t { |
| IREE_HAL_MEMORY_TYPE_NONE = 0u, |
| |
| // The allocator will choose the optimal memory type based on buffer usage. |
| // Allocation will succeed if there is a heap available for the allocator to |
| // place the memory into. |
| // |
| // Additional bits can be provided to restrict the set of memory types that |
| // are chosen. For example, if the user knows that a bulk of the accesses will |
| // happen from device the IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL bit can be set to |
| // force the allocator to place it on device. |
| // |
| // This bit is only used during allocation. |
| IREE_HAL_MEMORY_TYPE_OPTIMAL = 1u << 0, |
| |
| // Memory allocated with this type can be mapped for host access using |
| // iree_hal_buffer_map_range. |
| IREE_HAL_MEMORY_TYPE_HOST_VISIBLE = 1u << 1, |
| |
| // The host cache management commands iree_hal_buffer_mapping_flush_range and |
| // iree_hal_buffer_mapping_invalidate_range are not needed to flush host |
| // writes to the device or make device writes visible to the host, |
| // respectively. |
| IREE_HAL_MEMORY_TYPE_HOST_COHERENT = 1u << 2, |
| |
| // Memory allocated with this type is cached on the host. Host memory |
| // accesses to uncached memory are slower than to cached memory, however |
| // uncached memory is always host coherent. MappedMemory::Flush must be used |
| // to ensure the device has visibility into any changes made on the host and |
| // Invalidate must be used to ensure the host has visibility into any changes |
| // made on the device. |
| IREE_HAL_MEMORY_TYPE_HOST_CACHED = 1u << 3, |
| |
| // Memory is accessible as normal host allocated memory. |
| IREE_HAL_MEMORY_TYPE_HOST_LOCAL = IREE_HAL_MEMORY_TYPE_HOST_VISIBLE | |
| IREE_HAL_MEMORY_TYPE_HOST_COHERENT | |
| (1u << 6), |
| |
| // The allocator will choose the optimal memory type based on buffer usage, |
| // preferring to place the allocation in host-local memory. |
| // |
| // Users should set this when it is known that a bulk of accesses to the |
| // buffer will be done by the host, such as readback/download staging buffers. |
| // It should be expected that device access will be slow. |
| // |
| // This bit is only used during allocation. |
| // Allocations will fail if there is no host-local memory type that can |
| // satisfy all requested usage. |
| IREE_HAL_MEMORY_TYPE_OPTIMAL_FOR_HOST = |
| IREE_HAL_MEMORY_TYPE_OPTIMAL | IREE_HAL_MEMORY_TYPE_HOST_LOCAL, |
| |
| // Memory allocated with this type is visible to the device for execution. |
| // Being device visible does not mean the memory must reside on device (as |
| // does IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL). Though an allocation may be |
| // visible to the device and therefore useable for execution it may require |
| // expensive mapping or implicit transfers. |
| IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE = 1u << 4, |
| |
| // Memory allocated with this type is the most efficient for device access. |
| // Devices may support using memory that is not device local via |
| // IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE but doing so can incur non-trivial |
| // performance penalties. Device local memory, on the other hand, is |
| // guaranteed to be fast for all operations. |
| IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL = |
| IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE | (1u << 5), |
| |
| // The allocator will choose the optimal memory type based on buffer usage, |
| // preferring to place the allocation in device-local memory. |
| // |
| // Users should set this when it is known that a bulk of the accesses to the |
| // buffer will be done by the device, such as device transfer and dispatch |
| // operations or light usage of host -> device upload staging buffers. |
| // It should be expected that host access will be slow. |
| // |
| // This bit is only used during allocation. |
| // Allocations will fail if there is no host-local memory type that can |
| // satisfy all requested usage. |
| IREE_HAL_MEMORY_TYPE_OPTIMAL_FOR_DEVICE = |
| IREE_HAL_MEMORY_TYPE_OPTIMAL | IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL, |
| }; |
| typedef uint32_t iree_hal_memory_type_t; |
| |
| // Parses a memory type bitfield from a string. |
| // See iree_bitfield_parse for usage. |
| IREE_API_EXPORT iree_status_t iree_hal_memory_type_parse( |
| iree_string_view_t value, iree_hal_memory_type_t* out_value); |
| |
| // Formats a memory type bitfield as a string. |
| // See iree_bitfield_format for usage. |
| IREE_API_EXPORT iree_string_view_t iree_hal_memory_type_format( |
| iree_hal_memory_type_t value, iree_bitfield_string_temp_t* out_temp); |
| |
| // A bitfield specifying how memory will be accessed in a mapped memory region. |
| enum iree_hal_memory_access_bits_t { |
| // Memory is not mapped. |
| IREE_HAL_MEMORY_ACCESS_NONE = 0u, |
| // Memory will be read. |
| // If a buffer is only mapped for reading it may still be possible to write to |
| // it but the results will be undefined (as it may present coherency issues). |
| IREE_HAL_MEMORY_ACCESS_READ = 1u << 0, |
| // Memory will be written. |
| // If a buffer is only mapped for writing it may still be possible to read |
| // from it but the results will be undefined or incredibly slow (as it may |
| // be mapped by the driver as uncached). |
| IREE_HAL_MEMORY_ACCESS_WRITE = 1u << 1, |
| // Memory will be discarded prior to mapping. |
| // The existing contents will be undefined after mapping and must be written |
| // to ensure validity. |
| IREE_HAL_MEMORY_ACCESS_DISCARD = 1u << 2, |
| // Memory will be discarded and completely overwritten in a single operation. |
| IREE_HAL_MEMORY_ACCESS_DISCARD_WRITE = |
| IREE_HAL_MEMORY_ACCESS_WRITE | IREE_HAL_MEMORY_ACCESS_DISCARD, |
| // A flag that can be applied to any access type to indicate that the buffer |
| // storage being accessed may alias with other accesses occurring concurrently |
| // within or across operations. The lack of the flag indicates that the access |
| // is guaranteed not to alias (ala C's `restrict` keyword). |
| IREE_HAL_MEMORY_ACCESS_MAY_ALIAS = 1u << 3, |
| // A flag that can be applied to any access type to indicate that the buffer |
| // storage may not be aligned. |
| IREE_HAL_MEMORY_ACCESS_UNALIGNED = 1u << 4, |
| // Memory access may perform any operation and should not be validated. |
| // Used upon access to bypass access verification at the API boundary and |
| // effectively provides a `void*`. |
| // This should only be used by device-side code where it is known-safe to |
| // bypass the access verification. |
| IREE_HAL_MEMORY_ACCESS_ANY = 1u << 5, |
| // Memory may have any operation performed on it. |
| // Note that this explicitly includes 'DISCARD', which means that the |
| // mapped memory will have undefined contents. Do not use this access |
| // mode if you intend the existing contents to be accessible. |
| IREE_HAL_MEMORY_ACCESS_ALL = IREE_HAL_MEMORY_ACCESS_READ | |
| IREE_HAL_MEMORY_ACCESS_WRITE | |
| IREE_HAL_MEMORY_ACCESS_DISCARD, |
| }; |
| typedef uint16_t iree_hal_memory_access_t; |
| |
| // Parses a memory access bitfield from a string. |
| // See iree_bitfield_parse for usage. |
| IREE_API_EXPORT iree_status_t iree_hal_memory_access_parse( |
| iree_string_view_t value, iree_hal_memory_access_t* out_value); |
| |
| // Formats a memory access bitfield as a string. |
| // See iree_bitfield_format for usage. |
| IREE_API_EXPORT iree_string_view_t iree_hal_memory_access_format( |
| iree_hal_memory_access_t value, iree_bitfield_string_temp_t* out_temp); |
| |
| // Bitfield that defines how a buffer is intended to be used. |
| // Usage allows the driver to appropriately place the buffer for more |
| // efficient operations of the specified types. Validation will fail if a buffer |
| // is used in ways other than it was declared it would be used in. |
| enum iree_hal_buffer_usage_bits_t { |
| IREE_HAL_BUFFER_USAGE_NONE = 0u, |
| |
| // ==== IREE_HAL_BUFFER_USAGE_TRANSFER_* ===================================== |
| |
| // Buffer is used as a source for transfer operations. |
| // Buffer may be a source for: |
| // iree_hal_command_buffer_copy_buffer |
| // |
| // Maps to: |
| // - D3D12_RESOURCE_STATE_COPY_SOURCE |
| // - GPUBufferUsage.COPY_SRC |
| // - VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
| IREE_HAL_BUFFER_USAGE_TRANSFER_SOURCE = 1u << 0, |
| |
| // Buffer is used as a target for transfer operations. |
| // Buffer may be a target for: |
| // iree_hal_command_buffer_fill_buffer |
| // iree_hal_command_buffer_update_buffer |
| // iree_hal_command_buffer_copy_buffer |
| // |
| // Maps to: |
| // - D3D12_RESOURCE_STATE_COPY_DEST |
| // - GPUBufferUsage.COPY_DST |
| // - VK_BUFFER_USAGE_TRANSFER_DST_BIT |
| IREE_HAL_BUFFER_USAGE_TRANSFER_TARGET = 1u << 1, |
| |
| // Buffer contents are transferred using DMA operations. |
| // Buffer may be a source or target for: |
| // iree_hal_command_buffer_fill_buffer |
| // iree_hal_command_buffer_update_buffer |
| // iree_hal_command_buffer_copy_buffer |
| // |
| // Maps to: |
| // - D3D12_RESOURCE_STATE_COPY_SOURCE | D3D12_RESOURCE_STATE_COPY_DEST |
| // - GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST |
| // - VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
| IREE_HAL_BUFFER_USAGE_TRANSFER = IREE_HAL_BUFFER_USAGE_TRANSFER_SOURCE | |
| IREE_HAL_BUFFER_USAGE_TRANSFER_TARGET, |
| |
| // ==== IREE_HAL_BUFFER_USAGE_DISPATCH_* ===================================== |
| |
| // Buffer contents are used for indirect dispatch workgroup parameters. |
| // Dispatch parameters must be aligned on 16-byte boundaries and be of the |
| // form `struct { uint32_t workgroup_xyz[3]; }`. |
| // |
| // Maps to: |
| // - D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT |
| // - GPUBufferUsage.INDIRECT |
| // - MTLResourceUsageRead |
| // - VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT |
| IREE_HAL_BUFFER_USAGE_DISPATCH_INDIRECT_PARAMS = 1u << 8, |
| |
| // Buffer contents are uniformly read by dispatches. |
| // These may occasionally be written as storage buffers in cases of |
| // data-dependent sequences but are largely read-only and may have total size |
| // limitations (~32-64KB visible per binding). |
| // |
| // Uniform buffers can be used in place of push constants or as additional |
| // storage for when push constant resources are exhausted. As push constants |
| // must be recorded into the command buffer any values that may change if the |
| // command buffer were to be reused should be put in a mutable uniform |
| // buffer instead. By using uniform buffers (vs storage buffers) the hardware |
| // can perform better caching and coalescing as they are guaranteed to be |
| // read-only across all workgroups in a dispatch. |
| // |
| // Maps to: |
| // - D3D12_CONSTANT_BUFFER_VIEW_DESC |
| // - GPUBufferUsage.UNIFORM |
| // - MTLResourceUsageRead |
| // - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
| IREE_HAL_BUFFER_USAGE_DISPATCH_UNIFORM_READ = 1u << 9, |
| |
| // Buffer contents are read by dispatches as storage buffers. |
| // Read-only buffers can enable non-local prefetching and replication. |
| IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE_READ = 1u << 10, |
| |
| // Buffer contents are written by dispatches as storage buffers. |
| // Write-only buffers can reduce cache pollution and writeback latency. |
| IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE_WRITE = 1u << 11, |
| |
| // Buffer contents are read and written by dispatches as storage buffers. |
| // Storage buffers allow random read/write access to underlying data using |
| // flexible data formats and alignment. Atomic operations may be allowed |
| // depending on implementation. |
| // |
| // Maps to: |
| // - D3D12_UNORDERED_ACCESS_VIEW_DESC::D3D12_BUFFER_UAV |
| // + D3D12_RESOURCE_STATE_UNORDERED_ACCESS |
| // - GPUBufferUsage.STORAGE |
| // - MTLResourceUsageRead | MTLResourceUsageWrite |
| // - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
| IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE = |
| IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE_READ | |
| IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE_WRITE, |
| |
| // Buffer contents are read by dispatches as images. |
| // Depending on the implementation this may be ignored or treated the same as |
| // IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE_READ. |
| IREE_HAL_BUFFER_USAGE_DISPATCH_IMAGE_READ = 1u << 12, |
| |
| // Buffer contents are written by dispatches as images. |
| // Depending on the implementation this may be ignored or treated the same as |
| // IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE_WRITE. |
| IREE_HAL_BUFFER_USAGE_DISPATCH_IMAGE_WRITE = 1u << 13, |
| |
| // Buffer contents are read and written by dispatches as images. |
| // Depending on the implementation this may be ignored or treated the same as |
| // IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE. If supported then additional |
| // hardware resources may be required to perform the binding. |
| // |
| // Storage buffers are preferred in most cases due to the more flexible data |
| // types and access allowance. On specific hardware images may allow use of |
| // fixed-function sampling hardware and texture caches that are not available |
| // on the storage buffer path. The largest benefit from using images is around |
| // uniform loads (all invocations in a workgroup loading the same location) on |
| // low-end hardware in order to tickle driver compiler optimizations. In |
| // almost all other cases the image path can incur significant additional |
| // latency or correctness hazards especially in data-dependent operations. |
| // |
| // Maps to: |
| // - D3D12_SHADER_RESOURCE_VIEW_DESC::D3D12_BUFFER_SRV |
| // - MTLResourceUsageRead | MTLResourceUsageWrite |
| // - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
| IREE_HAL_BUFFER_USAGE_DISPATCH_IMAGE = |
| IREE_HAL_BUFFER_USAGE_DISPATCH_IMAGE_READ | |
| IREE_HAL_BUFFER_USAGE_DISPATCH_IMAGE_WRITE, |
| |
| // Buffer contents are available for use by all dispatch-related operations. |
| IREE_HAL_BUFFER_USAGE_DISPATCH = |
| IREE_HAL_BUFFER_USAGE_DISPATCH_INDIRECT_PARAMS | |
| IREE_HAL_BUFFER_USAGE_DISPATCH_UNIFORM_READ | |
| IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE | |
| IREE_HAL_BUFFER_USAGE_DISPATCH_IMAGE, |
| |
| // ==== IREE_HAL_BUFFER_USAGE_SHARING_* ====================================== |
| |
| // Buffer can be exported via iree_hal_allocator_export_buffer. |
| // Exported buffers may require special allocation behavior (dedicated |
| // allocations, higher alignment, etc) and may impose lifetime restrictions. |
| IREE_HAL_BUFFER_USAGE_SHARING_EXPORT = 1u << 16, |
| |
| // Buffer can be automatically replicated across peers. |
| // When multiple devices use the same buffer an implementation is allowed to |
| // clone the buffer per-device in order to keep accesses local. |
| // Implementations are free to ignore this flag if doing so would require |
| // additional overhead (managed memory/global locking/etc). |
| IREE_HAL_BUFFER_USAGE_SHARING_REPLICATE = 1u << 17, |
| |
| // Buffer can be used concurrently by multiple queues. |
| // This may incur additional implicit synchronization overhead. |
| // When omitted the buffer is assumed to be exclusive to a queue and may not |
| // be accessible from other queues without explicit transfer operations. |
| IREE_HAL_BUFFER_USAGE_SHARING_CONCURRENT = 1u << 18, |
| |
| // Buffer is immutable once initialized and implementations are allowed to |
| // avoid synchronization/transfers done in cases where the buffer may be |
| // mutable. Implementations are allowed to protect the buffer contents for |
| // read-only access if they support it. |
| IREE_HAL_BUFFER_USAGE_SHARING_IMMUTABLE = 1u << 19, |
| |
| // ==== IREE_HAL_BUFFER_USAGE_MAPPING_* ====================================== |
| |
| // Buffer may be mapped for scoped host access. |
| // Each iree_hal_buffer_map_range must be paired with an |
| // iree_hal_buffer_unmap_range. |
| // |
| // Concurrent access across host and device are not allowed during scoped |
| // mappings and will lead to desynchronization. If concurrent access is |
| // required then persistent mapping can be used (if supported) and otherwise |
| // staging buffers with transfer operations can preserve proper pipelining. |
| IREE_HAL_BUFFER_USAGE_MAPPING_SCOPED = 1u << 24, |
| |
| // Buffer may be mapped for persistent host access. |
| // iree_hal_buffer_map_range _may_ be paired with a matching |
| // iree_hal_buffer_unmap_range but it's not required. |
| // |
| // These mappings can persist for the lifetime of the buffer and allow for |
| // concurrent usage from both host and device. Depending on the memory type |
| // additional synchronization may be required via flushes and invalidation. |
| // Not all implementations support this and of those that do many may place |
| // such allocations in slower or more restrictive memory. |
| IREE_HAL_BUFFER_USAGE_MAPPING_PERSISTENT = 1u << 25, |
| |
| // Buffer is allowed to be mapped only if doing so is cheap. |
| // If omitted and one of the other mapping usage flags is set the allocation |
| // will fail if the buffer cannot be allocated in mapped memory. This can be |
| // used to optimistically request mapping for staging buffers that avoids |
| // additional allocations and copies. Code setting this flag is expected to |
| // handle the transfers when mapping is not used by checking the allocated |
| // buffer usage bits. |
| IREE_HAL_BUFFER_USAGE_MAPPING_OPTIONAL = 1u << 26, |
| |
| // Indicates that the mapped memory will be accessed by the host randomly. |
| // Reads and writes will be non-contiguous or non-temporal and host-cached |
| // memory is strongly preferred. |
| IREE_HAL_BUFFER_USAGE_MAPPING_ACCESS_RANDOM = 1u << 27, |
| |
| // Indicates that the mapped memory will be written sequentially (memcpy/etc). |
| // The host will only write to the mapped memory and do so using a sequential |
| // operation (memcpy, memset, etc). Reads may fail or be *extremely* slow. |
| IREE_HAL_BUFFER_USAGE_MAPPING_ACCESS_SEQUENTIAL_WRITE = 1u << 28, |
| |
| // Buffer may be mapped for scoped random-access host access. |
| // Allocation will fail if mapping is not available; if attempting to |
| // optimistically map in order to avoid staging transfers then add the |
| // IREE_HAL_BUFFER_USAGE_MAPPING_OPTIONAL bit. |
| // |
| // WARNING: mapping can be extremely expensive, use limited hardware |
| // resources, introduce data hazards, and synchronize host and device |
| // execution. Unless an application knows that such issues will not arise |
| // (as in tests where there's never concurrent usage) mapping should be used |
| // judiciously: do not assume mapping is a high-performance technique! |
| // |
| // If an application knows its access characteristics (such as memcpy only) |
| // then prefer specifying the bits directly and including |
| // IREE_HAL_BUFFER_USAGE_MAPPING_ACCESS_SEQUENTIAL_WRITE. Random access (set |
| // by default with this value) can severely harm device performance. |
| IREE_HAL_BUFFER_USAGE_MAPPING = IREE_HAL_BUFFER_USAGE_MAPPING_SCOPED | |
| IREE_HAL_BUFFER_USAGE_MAPPING_ACCESS_RANDOM, |
| |
| // ==== IREE_HAL_BUFFER_USAGE_* helpers ====================================== |
| |
| // Default usage mode covering transfer and dispatch. |
| // Most internal buffers will be allocated for this usage and external buffers |
| // should use this unless specific usage is required (such as mapping). |
| IREE_HAL_BUFFER_USAGE_DEFAULT = |
| IREE_HAL_BUFFER_USAGE_TRANSFER | IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE, |
| }; |
| typedef uint32_t iree_hal_buffer_usage_t; |
| |
| // Parses a buffer usage bitfield from a string. |
| // See iree_bitfield_parse for usage. |
| IREE_API_EXPORT iree_status_t iree_hal_buffer_usage_parse( |
| iree_string_view_t value, iree_hal_buffer_usage_t* out_value); |
| |
| // Formats a buffer usage bitfield as a string. |
| // See iree_bitfield_format for usage. |
| IREE_API_EXPORT iree_string_view_t iree_hal_buffer_usage_format( |
| iree_hal_buffer_usage_t value, iree_bitfield_string_temp_t* out_temp); |
| |
| // Buffer overlap testing results. |
| typedef enum iree_hal_buffer_overlap_e { |
| // No overlap between the two buffers. |
| IREE_HAL_BUFFER_OVERLAP_DISJOINT = 0, |
| // Partial overlap between the two buffers. |
| IREE_HAL_BUFFER_OVERLAP_PARTIAL, |
| // Complete overlap between the two buffers (they are the same). |
| IREE_HAL_BUFFER_OVERLAP_COMPLETE, |
| } iree_hal_buffer_overlap_t; |
| |
| typedef uint32_t iree_hal_buffer_compatibility_t; |
| |
| // A bitfield specifying buffer transfer behavior. |
| enum iree_hal_transfer_buffer_flag_bits_t { |
| // TODO(benvanik): flags controlling blocking, flushing, invalidation, and |
| // persistence. We may also want to set a bit that causes failure on emulated |
| // transfers that would otherwise be really expensive. |
| IREE_HAL_TRANSFER_BUFFER_FLAG_DEFAULT = 0, |
| }; |
| typedef uint32_t iree_hal_transfer_buffer_flags_t; |
| |
| // Determines buffer mapping behavior. |
| enum iree_hal_mapping_mode_bits_t { |
| // Buffers are mapped as part of a scoped map-access-unmap sequence. |
| // If there are any in-flight operations using the buffer contents are |
| // undefined though they may deceivingly still seem correct under certain |
| // implementations. |
| IREE_HAL_MAPPING_MODE_SCOPED = 1u << 0, |
| |
| // Buffers are mapped persistently and concurrently accessible by both the |
| // host and device. Mapping happens once and so long as there are any live |
| // mappings the buffer will remain accessible. Not all implementations or |
| // buffer memory types support this, and even ones that do may not support |
| // coherent cross-device sharing. |
| IREE_HAL_MAPPING_MODE_PERSISTENT = 1u << 1, |
| }; |
| typedef uint32_t iree_hal_mapping_mode_t; |
| |
| //===----------------------------------------------------------------------===// |
| // iree_hal_buffer_placement_t |
| //===----------------------------------------------------------------------===// |
| |
| // Flags describing the placement of a buffer on a device and its allocation |
| // semantics. This information is only valid on allocated buffers and not |
| // wrappers that may hold references to them. |
| typedef uint32_t iree_hal_buffer_placement_flags_t; |
| enum iree_hal_buffer_placement_flag_bits_t { |
| IREE_HAL_BUFFER_PLACEMENT_FLAG_NONE = 0u, |
| // Buffer was allocated with an asynchronous allocation API such as |
| // iree_hal_device_queue_alloca and/or can be deallocated with an asynchronous |
| // deallocation API such as iree_hal_device_queue_dealloca. |
| IREE_HAL_BUFFER_PLACEMENT_FLAG_ASYNCHRONOUS = 1u << 0, |
| // TODO(benvanik): flags for discrete/external to allow for quick export |
| // checks. |
| }; |
| |
| // Describes the origin of an allocated buffer. |
| // This is used internally to route buffers back to pools and can be used by |
| // hosting layers to route deallocations to appropriate devices/queues. |
| // This information is generally only valid for allocated buffers (the result of |
| // an iree_hal_buffer_allocated_buffer query). |
| typedef struct iree_hal_buffer_placement_t { |
| // The device the buffer was allocated from. Unretained. |
| // Only valid for allocated buffers and not any intermediates (subspans, etc). |
| // May be NULL if the buffer is not associated with any particular device such |
| // as a free-floating heap-allocated buffer on the host. |
| iree_hal_device_t* device; |
| // Queues on the device to which the buffer is available. Depending on the |
| // device this may indicate which queues have exclusive access to the buffer |
| // or which queues have optimal access. This may be broader than the original |
| // request if the buffer is able to be accessed by other queues without |
| // penalty. Usage of the buffer for queue read/write or asynchronous |
| // deallocation via iree_hal_device_queue_dealloca is only legal with a queue |
| // affinity that is a subset of this affinity set. |
| iree_hal_queue_affinity_t queue_affinity; |
| // Describes the placement behavior of a buffer on a device and its allocation |
| // semantics. |
| iree_hal_buffer_placement_flags_t flags; |
| uint32_t reserved; |
| } iree_hal_buffer_placement_t; |
| |
| // Returns a placement indicating that the buffer has no direct device it is |
| // associated with. Commonly used for free-floating buffer handles such as heap |
| // wrapped or allocated buffers that come from outside of the HAL. |
| static inline iree_hal_buffer_placement_t iree_hal_buffer_placement_undefined( |
| void) { |
| iree_hal_buffer_placement_t placement = {0}; |
| return placement; |
| } |
| |
| // Returns true if the |placement| is undefined and the buffer has no direct |
| // device it is associated with. |
| static inline bool iree_hal_buffer_placement_is_undefined( |
| const iree_hal_buffer_placement_t placement) { |
| return placement.device == NULL; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // iree_hal_buffer_params_t |
| //===----------------------------------------------------------------------===// |
| |
| // Parameters defining how a buffer should be allocated. |
| // |
| // Designed to be zero-initialized: any field with a 0 value will be assigned |
| // a default as indicated in the field description. |
| // |
| // For ergonomics when used from C++ w/o named initializers the first field is |
| // the most commonly used so that it can be initialized by location: |
| // some_fn(..., {IREE_HAL_BUFFER_USAGE_FOO}, ...) |
| typedef struct iree_hal_buffer_params_t { |
| // Specifies the usage allowed by HAL APIs and aids in memory placement. |
| // Devices may have different memory types for different usage and require |
| // the intended usage to be declared upon allocation. It's always best to |
| // limit the allowed usage bits to precisely what the actual usage will be to |
| // avoid additional copies, synchronization, and expensive emulation. |
| // |
| // If 0 then the usage will default to all usage modes. |
| iree_hal_buffer_usage_t usage; |
| |
| // Specifies the access allowed to the memory via the HAL APIs. |
| // For example, if the IREE_HAL_MEMORY_ACCESS_WRITE bit is not set then any |
| // API call that would write to the memory will fail (such as |
| // iree_hal_command_buffer_update_buffer). This does not limit any untrusted |
| // dispatch or external use of the buffer and should not be treated as a |
| // memory protection mechanism. |
| // |
| // If 0 then the access will be set as IREE_HAL_MEMORY_ACCESS_ALL. |
| iree_hal_memory_access_t access; |
| |
| // Specifies the memory type properties used for selecting a memory space. |
| // This should often be IREE_HAL_MEMORY_TYPE_OPTIMAL to allow the allocator |
| // to place the allocation based on usage bits but can be specified if the |
| // exact memory type must be used for compatibility with external code. |
| // |
| // If 0 then the type will be set as IREE_HAL_MEMORY_TYPE_OPTIMAL. |
| iree_hal_memory_type_t type; |
| |
| // Queue affinity bitmap indicating which queues may access this buffer. |
| // For NUMA devices this can be used to more tightly scope the allocation to |
| // particular device memory and provide better pool placement. When a device |
| // supports peering or replication the affinity bitmap will be used to choose |
| // which subdevices require configuration. |
| // |
| // If 0 then the buffer will be available on any queue as if |
| // IREE_HAL_QUEUE_AFFINITY_ANY was specified. |
| iree_hal_queue_affinity_t queue_affinity; |
| |
| // Minimum alignment, in bytes, of the resulting allocation. |
| // The actual alignment may be any value greater-than-or-equal-to this value. |
| // |
| // If 0 then the alignment will be decided by the allocator based on optimal |
| // device parameters. |
| iree_device_size_t min_alignment; |
| } iree_hal_buffer_params_t; |
| |
| // Canonicalizes |params| fields when zero initialization is used. |
| static inline void iree_hal_buffer_params_canonicalize( |
| iree_hal_buffer_params_t* params) { |
| if (!params->usage) { |
| params->usage = IREE_HAL_BUFFER_USAGE_DEFAULT; |
| } |
| if (!params->access) { |
| params->access = IREE_HAL_MEMORY_ACCESS_ALL; |
| } |
| if (!params->type) { |
| params->type = IREE_HAL_MEMORY_TYPE_OPTIMAL; |
| } |
| if (!params->queue_affinity) { |
| params->queue_affinity = IREE_HAL_QUEUE_AFFINITY_ANY; |
| } |
| } |
| |
| // Returns |params| with the given |usage| bits OR'ed in. |
| static inline iree_hal_buffer_params_t iree_hal_buffer_params_with_usage( |
| const iree_hal_buffer_params_t params, iree_hal_buffer_usage_t usage) { |
| iree_hal_buffer_params_t result = params; |
| if (!result.usage) { |
| result.usage = |
| IREE_HAL_BUFFER_USAGE_DISPATCH_STORAGE | IREE_HAL_BUFFER_USAGE_TRANSFER; |
| } |
| result.usage |= usage; |
| return result; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // iree_hal_buffer_mapping_t |
| //===----------------------------------------------------------------------===// |
| |
| // Implementation-specific private mapping data. |
| typedef struct iree_hal_buffer_mapping_impl_t { |
| // Byte offset within the buffer where the mapped data begins. |
| iree_device_size_t byte_offset; |
| // Used for validation only. |
| iree_hal_memory_access_t allowed_access; |
| // Tracking flags. |
| uint32_t is_persistent : 1; |
| uint32_t reserved_flags : 31; |
| // Backing implementation data. |
| // For backends that require additional tracking (shadow data structures/etc) |
| // this can be used to store references to them for the duration of the |
| // mapping. |
| uint64_t reserved[1]; |
| } iree_hal_buffer_mapping_impl_t; |
| |
| // Reference to a buffer's mapped memory. |
| typedef struct iree_hal_buffer_mapping_t { |
| // Contents of the buffer. Behavior is undefined if an access is performed |
| // whose type was not specified during mapping. |
| // |
| // The bytes available may be greater than what was requested if platform |
| // alignment rules require it. Only memory defined by the given span may be |
| // accessed. |
| iree_byte_span_t contents; |
| |
| // Buffer providing the backing storage for the mapping. |
| // When mapped with IREE_HAL_MAPPING_MODE_SCOPED the buffer will be retained |
| // until it is unmapped. When mapped with IREE_HAL_MAPPING_MODE_PERSISTENT the |
| // caller is responsible for retaining the buffer. |
| struct iree_hal_buffer_t* buffer; |
| |
| // Used internally - do not modify. |
| // Implementations are allowed to use the reserved fields for their own |
| // storage but should otherwise ignore the remaining parts. |
| iree_hal_buffer_mapping_impl_t impl; |
| } iree_hal_buffer_mapping_t; |
| |
| //===----------------------------------------------------------------------===// |
| // iree_hal_buffer_release_callback_t |
| //===----------------------------------------------------------------------===// |
| |
| typedef void(IREE_API_PTR* iree_hal_buffer_release_fn_t)( |
| void* user_data, struct iree_hal_buffer_t* buffer); |
| |
| // A callback issued when a buffer is released. |
| typedef struct { |
| // Callback function pointer. |
| iree_hal_buffer_release_fn_t fn; |
| // User data passed to the callback function. Unowned. |
| void* user_data; |
| } iree_hal_buffer_release_callback_t; |
| |
| // Returns a no-op buffer release callback that implies that no cleanup is |
| // required. |
| static inline iree_hal_buffer_release_callback_t |
| iree_hal_buffer_release_callback_null(void) { |
| iree_hal_buffer_release_callback_t callback = {NULL, NULL}; |
| return callback; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // iree_hal_buffer_t |
| //===----------------------------------------------------------------------===// |
| |
| // Allocated memory buffer wrapper type and utilities. |
| // |
| // Buffers are the basic unit of memory used by the inference system. They may |
| // be allocated such that they are accessible from the host (normal C++ code |
| // running on the main CPU), a particular device (such as an accelerator) or |
| // family of devices, or from some mix of all of those. |
| // |
| // The type of memory a buffer is allocated within has implications on it's |
| // performance and lifetime. For example if an application attempts to use a |
| // host-allocated buffer (IREE_HAL_MEMORY_TYPE_HOST_LOCAL) on an accelerator |
| // with discrete memory the accelerator may either be unable to access the |
| // memory or take a non-trivial performance hit when attempting to do so |
| // (involving setting up kernel mappings, doing DMA transfers, etc). Likewise, |
| // trying to access a device-allocated buffer |
| // (IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL) may incur similar overhead or not be |
| // possible at all. This may be due to restrictions in the memory visibility, |
| // address spaces, mixed endianness or pointer widths, and other weirdness. |
| // |
| // The memory types (defined by a bitfield of iree_hal_memory_type_t values) |
| // that a particular context (host or device) may use vary from device to device |
| // and must be queried by the application when allocating buffers. It's strongly |
| // recommended that the most specific memory type be set as possible. For |
| // example allocating a buffer with IREE_HAL_MEMORY_TYPE_HOST_COHERENT even when |
| // it will never be used in a way that requires coherency may occupy address |
| // space reservations or memory mapping that would otherwise not be needed. |
| // |
| // As buffers may sometimes not be accessible from the host the base buffer type |
| // does not allow for direct void* access and instead buffers must be either |
| // manipulated using utility functions (such as ReadData or WriteData) or by |
| // mapping them into a host-accessible address space via MapMemory. Buffers must |
| // be unmapped before any command may use them. |
| // |
| // Buffers may equate (roughly) 1:1 with an allocation either from the host heap |
| // or a device. iree_hal_buffer_subspan can be used to reference subspans of |
| // buffers like std::span - though unlike std::span the returned buffer holds |
| // a reference to the parent buffer. |
| typedef struct iree_hal_buffer_t iree_hal_buffer_t; |
| |
| // Returns success iff the buffer was allocated with the given memory type. |
| IREE_API_EXPORT iree_status_t iree_hal_buffer_validate_memory_type( |
| iree_hal_memory_type_t actual_memory_type, |
| iree_hal_memory_type_t expected_memory_type); |
| |
| // Returns success iff the buffer allows the requested access. |
| IREE_API_EXPORT iree_status_t iree_hal_buffer_validate_access( |
| iree_hal_memory_access_t allowed_memory_access, |
| iree_hal_memory_access_t required_memory_access); |
| |
| // Returns success iff the buffer usage allows the given usage type. |
| IREE_API_EXPORT iree_status_t |
| iree_hal_buffer_validate_usage(iree_hal_buffer_usage_t allowed_usage, |
| iree_hal_buffer_usage_t required_usage); |
| |
| // Returns success iff the given byte range falls within the valid buffer. |
| IREE_API_EXPORT iree_status_t iree_hal_buffer_validate_range( |
| iree_hal_buffer_t* buffer, iree_device_size_t byte_offset, |
| iree_device_size_t byte_length); |
| |
| // Adjusts the offset and length of a buffer subrange and returns the new |
| // subrange. Fails if the range is invalid. |
| IREE_API_EXPORT iree_status_t iree_hal_buffer_calculate_range( |
| iree_device_size_t base_offset, iree_device_size_t max_length, |
| iree_device_size_t offset, iree_device_size_t length, |
| iree_device_size_t* out_adjusted_offset, |
| iree_device_size_t* out_adjusted_length); |
| |
| // Tests whether the given buffers overlap, including support for subspans. |
| // IREE_HAL_WHOLE_BUFFER may be used for |lhs_length| and/or |rhs_length| to use |
| // the lengths of those buffers, respectively. |
| IREE_API_EXPORT iree_hal_buffer_overlap_t iree_hal_buffer_test_overlap( |
| iree_hal_buffer_t* lhs_buffer, iree_device_size_t lhs_offset, |
| iree_device_size_t lhs_length, iree_hal_buffer_t* rhs_buffer, |
| iree_device_size_t rhs_offset, iree_device_size_t rhs_length); |
| |
| // Returns a reference to a subspan of the |buffer|. |
| // If |byte_length| is IREE_HAL_WHOLE_BUFFER the remaining bytes in the buffer |
| // after |byte_offset| (possibly 0) will be selected. |
| // |
| // The parent buffer will remain alive for the lifetime of the subspan |
| // returned. If the subspan is a small portion this may cause additional |
| // memory to remain allocated longer than required. |
| // |
| // Returns the given |buffer| if the requested span covers the entire range. |
| // |out_buffer| must be released by the caller. |
| IREE_API_EXPORT iree_status_t iree_hal_buffer_subspan( |
| iree_hal_buffer_t* buffer, iree_device_size_t byte_offset, |
| iree_device_size_t byte_length, iree_allocator_t host_allocator, |
| iree_hal_buffer_t** out_buffer); |
| |
| // Retains the given |buffer| for the caller. |
| IREE_API_EXPORT void iree_hal_buffer_retain(iree_hal_buffer_t* buffer); |
| |
| // Releases the given |buffer| from the caller. |
| IREE_API_EXPORT void iree_hal_buffer_release(iree_hal_buffer_t* buffer); |
| |
| // Returns a pointer to the buffer containing the actual allocation. |
| // The buffer represents a span of the allocated bytes defined by byte_offset |
| // and byte_length. If the provided buffer *is* the allocated buffer then the |
| // returned value will be the provided buffer pointer. |
| IREE_API_EXPORT iree_hal_buffer_t* iree_hal_buffer_allocated_buffer( |
| const iree_hal_buffer_t* buffer); |
| |
| // Returns the size of the resource memory allocation in bytes. |
| // This may be rounded up from the originally requested size or the ideal |
| // size for the resource based on device restrictions. |
| IREE_API_EXPORT iree_device_size_t |
| iree_hal_buffer_allocation_size(const iree_hal_buffer_t* buffer); |
| |
| // Returns the original placement of the allocated buffer. |
| // The placement applies to the entire underlying allocated buffer and not the |
| // potential subspan of the |buffer| handle. Many buffer handles may be backed |
| // by the same allocation. It's possible for placements to change over the |
| // lifetime of a buffer as it is moved across devices but the origin will always |
| // accept actions on the buffer such as deallocation. |
| // |
| // Note that not all buffers have a placement: e.g. host buffers allocated as |
| // free-floating objects will have no device assigned. |
| // iree_hal_buffer_placement_is_undefined can be used to check for this case |
| // explicitly. |
| IREE_API_EXPORT iree_hal_buffer_placement_t |
| iree_hal_buffer_allocation_placement(const iree_hal_buffer_t* buffer); |
| |
| // Returns the offset in bytes of the buffer within its allocated_buffer. |
| IREE_API_EXPORT iree_device_size_t |
| iree_hal_buffer_byte_offset(const iree_hal_buffer_t* buffer); |
| |
| // Returns the size in bytes of the buffer. |
| IREE_API_EXPORT iree_device_size_t |
| iree_hal_buffer_byte_length(const iree_hal_buffer_t* buffer); |
| |
| // Returns the memory type the buffer was allocated with. |
| IREE_API_EXPORT |
| iree_hal_memory_type_t iree_hal_buffer_memory_type( |
| const iree_hal_buffer_t* buffer); |
| |
| // Returns the allowed memory access modes. |
| // These may be more strict than the underlying allocation, for example when the |
| // buffer is exposing read-only memory that may be in mutable pages. |
| IREE_API_EXPORT |
| iree_hal_memory_access_t iree_hal_buffer_allowed_access( |
| const iree_hal_buffer_t* buffer); |
| |
| // Returns the allowed buffer usage modes. |
| IREE_API_EXPORT |
| iree_hal_buffer_usage_t iree_hal_buffer_allowed_usage( |
| const iree_hal_buffer_t* buffer); |
| |
| // Sets a range of the buffer to binary zero. |
| // |
| // Requires that the buffer has the IREE_HAL_BUFFER_USAGE_MAPPING bit set. |
| // The byte range in |buffer| will be flushed if needed. |
| // |
| // It is strongly recommended that buffer operations are performed on transfer |
| // queues; using this synchronous function may incur additional cache flushes |
| // and synchronous blocking behavior and is not supported on all buffer types. |
| // See iree_hal_command_buffer_fill_buffer. |
| IREE_API_EXPORT iree_status_t iree_hal_buffer_map_zero( |
| iree_hal_buffer_t* buffer, iree_device_size_t byte_offset, |
| iree_device_size_t byte_length); |
| |
| // Sets a range of the buffer to the given value. |
| // Only |pattern_length| values with 1, 2, or 4 bytes are supported. |
| // |
| // Requires that the buffer has the IREE_HAL_BUFFER_USAGE_MAPPING bit set. |
| // The byte range in |buffer| will be flushed if needed. |
| // |
| // It is strongly recommended that buffer operations are performed on transfer |
| // queues; using this synchronous function may incur additional cache flushes |
| // and synchronous blocking behavior and is not supported on all buffer types. |
| // See iree_hal_command_buffer_fill_buffer. |
| IREE_API_EXPORT iree_status_t iree_hal_buffer_map_fill( |
| iree_hal_buffer_t* buffer, iree_device_size_t byte_offset, |
| iree_device_size_t byte_length, const void* pattern, |
| iree_host_size_t pattern_length); |
| |
| // Reads a block of data from the buffer at the given offset. |
| // |
| // Requires that the buffer has the IREE_HAL_BUFFER_USAGE_MAPPING bit set. |
| // |
| // It is strongly recommended that buffer operations are performed on transfer |
| // queues; using this synchronous function may incur additional cache flushes |
| // and synchronous blocking behavior and is not supported on all buffer types. |
| // See iree_hal_command_buffer_copy_buffer. |
| IREE_API_EXPORT iree_status_t iree_hal_buffer_map_read( |
| iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset, |
| void* target_buffer, iree_device_size_t data_length); |
| |
| // Writes a block of byte data into the buffer at the given offset. |
| // |
| // Requires that the buffer has the IREE_HAL_BUFFER_USAGE_MAPPING bit set. |
| // The byte range in |target_buffer| will be flushed if needed. |
| // |
| // It is strongly recommended that buffer operations are performed on transfer |
| // queues; using this synchronous function may incur additional cache flushes |
| // and synchronous blocking behavior and is not supported on all buffer types. |
| // See iree_hal_command_buffer_update_buffer and |
| // iree_hal_command_buffer_copy_buffer. |
| IREE_API_EXPORT iree_status_t iree_hal_buffer_map_write( |
| iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset, |
| const void* source_buffer, iree_device_size_t data_length); |
| |
| // Copies data from the provided |source_buffer| into the |target_buffer|. |
| // |
| // Requires that both buffers have the IREE_HAL_BUFFER_USAGE_MAPPING bit set. |
| // The byte range in |target_buffer| will be flushed if needed. Both buffers |
| // need not come from the same device. |
| // |
| // It is strongly recommended that buffer operations are performed on transfer |
| // queues; using this synchronous function may incur additional cache flushes |
| // and synchronous blocking behavior and is not supported on all buffer types. |
| // See iree_hal_command_buffer_copy_buffer. |
| IREE_API_EXPORT iree_status_t iree_hal_buffer_map_copy( |
| iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset, |
| iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset, |
| iree_device_size_t data_length); |
| |
| // Maps the buffer to be accessed as a host pointer into |out_buffer_mapping|. |
| // The byte offset and byte length may be adjusted for device alignment. |
| // The output data pointer will be properly aligned to the start of the data. |
| // Fails if the memory could not be mapped (invalid access type, invalid |
| // range, or unsupported memory type). |
| // |
| // Requires that the buffer has the IREE_HAL_BUFFER_USAGE_MAPPING bit set. |
| // If the buffer is not IREE_HAL_MEMORY_TYPE_HOST_COHERENT then the caller must |
| // invalidate the byte range they want to access to update the visibility of the |
| // mapped memory. |
| IREE_API_EXPORT iree_status_t iree_hal_buffer_map_range( |
| iree_hal_buffer_t* buffer, iree_hal_mapping_mode_t mapping_mode, |
| iree_hal_memory_access_t memory_access, iree_device_size_t byte_offset, |
| iree_device_size_t byte_length, |
| iree_hal_buffer_mapping_t* out_buffer_mapping); |
| |
| // Prepares for mapping the buffer to be accessed as a host pointer into |
| // |out_buffer_mapping|. The byte offset and byte length may be adjusted for |
| // device alignment. The output data pointer will be properly aligned to the |
| // start of the data. Fails if the memory could not be mapped (invalid access |
| // type, invalid range, or unsupported memory type). |
| // |
| // Requires that the buffer has the IREE_HAL_BUFFER_USAGE_MAPPING bit set. |
| // If the buffer is not IREE_HAL_MEMORY_TYPE_HOST_COHERENT then the caller must |
| // invalidate the byte range they want to access to update the visibility of the |
| // mapped memory. |
| // |
| // This is the first part of a paired operation with |
| // iree_hal_buffer_commit_map_range. This allows callers to prepare for mapping |
| // (performing all of the validation) without actually resolving the host |
| // pointer yet. Once prepared the mapping must be unmapped with |
| // iree_hal_buffer_unmap_range even if it is never committed. |
| // |
| // Callers are allowed to prepare mappings prior to the |buffer| having |
| // allocated storage. Committing the mapping requires that storage has been |
| // bound for the duration the mapping will be live. |
| // |
| // Example usage: |
| // iree_hal_buffer_prepare_map_range(..., &mapping); |
| // if (maybe) iree_hal_buffer_commit_map_range(..., &mapping); |
| // iree_hal_buffer_unmap_range(&mapping); |
| IREE_API_EXPORT iree_status_t iree_hal_buffer_prepare_map_range( |
| iree_hal_buffer_t* buffer, iree_hal_mapping_mode_t mapping_mode, |
| iree_hal_memory_access_t memory_access, iree_device_size_t byte_offset, |
| iree_device_size_t byte_length, |
| iree_hal_buffer_mapping_t* out_buffer_mapping); |
| |
| // Commits a mapping operation from iree_hal_buffer_prepare_map_range. |
| // May fail for internal reasons but not any of those previously validated |
| // during preparation. |
| IREE_API_EXPORT iree_status_t iree_hal_buffer_commit_map_range( |
| iree_hal_buffer_t* buffer, iree_hal_mapping_mode_t mapping_mode, |
| iree_hal_memory_access_t memory_access, |
| iree_hal_buffer_mapping_t* buffer_mapping); |
| |
| // Unmaps the buffer as was previously mapped to |buffer_mapping|. |
| // |
| // If the buffer is not IREE_HAL_MEMORY_TYPE_HOST_COHERENT then the caller must |
| // flush the byte range they want to make available to other threads/devices. |
| // |
| // May fail, though unlikely to do so for read-only mapping and the result can |
| // be safely ignored using iree_status_ignore. If writing then users must check |
| // the status to ensure their writes succeeded. |
| IREE_API_EXPORT iree_status_t |
| iree_hal_buffer_unmap_range(iree_hal_buffer_mapping_t* buffer_mapping); |
| |
| // Invalidates ranges of non-coherent memory from the host caches. |
| // This guarantees that device writes to the memory ranges provided are |
| // visible on the host. Use before reading from non-coherent memory. |
| // |
| // Only required for memory types without IREE_HAL_MEMORY_TYPE_HOST_COHERENT. |
| IREE_API_EXPORT iree_status_t iree_hal_buffer_mapping_invalidate_range( |
| iree_hal_buffer_mapping_t* buffer_mapping, iree_device_size_t byte_offset, |
| iree_device_size_t byte_length); |
| |
| // Flushes ranges of non-coherent memory from the host caches. |
| // This guarantees that host writes to the memory ranges provided are available |
| // for device access. Use after writing to non-coherent memory. |
| // |
| // Only required for memory types without IREE_HAL_MEMORY_TYPE_HOST_COHERENT. |
| IREE_API_EXPORT iree_status_t iree_hal_buffer_mapping_flush_range( |
| iree_hal_buffer_mapping_t* buffer_mapping, iree_device_size_t byte_offset, |
| iree_device_size_t byte_length); |
| |
| // Calculates and returns a byte subspan range within a buffer mapping. |
| // The byte range provided is local to the mapping. May return a 0-length span. |
| // IREE_HAL_WHOLE_BUFFER can be used for |byte_length|. |
| // |
| // Note that the access requirements of the mapping still hold: if the memory is |
| // not host coherent and writeable then the caller must use the |
| // iree_hal_buffer_mapping_invalidate_range and |
| // iree_hal_buffer_mapping_flush_range methods to ensure memory is in the |
| // expected state. |
| IREE_API_EXPORT iree_status_t iree_hal_buffer_mapping_subspan( |
| iree_hal_buffer_mapping_t* buffer_mapping, |
| iree_hal_memory_access_t memory_access, iree_device_size_t byte_offset, |
| iree_device_size_t byte_length, iree_byte_span_t* out_span); |
| |
| //===----------------------------------------------------------------------===// |
| // iree_hal_subspan_buffer_t |
| //===----------------------------------------------------------------------===// |
| |
| // Creates a buffer referencing a subspan of some base allocation. |
| // Optionally |device_allocator| can be provided if this subspan references |
| // managed buffers that need deallocation callbacks. |
| IREE_API_EXPORT iree_status_t iree_hal_subspan_buffer_create( |
| iree_hal_buffer_t* allocated_buffer, iree_device_size_t byte_offset, |
| iree_device_size_t byte_length, iree_allocator_t host_allocator, |
| iree_hal_buffer_t** out_buffer); |
| |
| //===----------------------------------------------------------------------===// |
| // iree_hal_heap_buffer_t |
| //===----------------------------------------------------------------------===// |
| |
| // Wraps an existing host allocation in a buffer. |
| // When the buffer is destroyed the provided |release_callback| will be called. |
| // |
| // The buffer must be aligned to at least IREE_HAL_HEAP_BUFFER_ALIGNMENT and if |
| // it is not the call will fail with IREE_STATUS_OUT_OF_RANGE. |
| // |
| // |out_buffer| must be released by the caller. |data| must be kept live for the |
| // lifetime of the wrapping buffer. |
| iree_status_t iree_hal_heap_buffer_wrap( |
| iree_hal_buffer_placement_t placement, iree_hal_memory_type_t memory_type, |
| iree_hal_memory_access_t allowed_access, |
| iree_hal_buffer_usage_t allowed_usage, iree_device_size_t allocation_size, |
| iree_byte_span_t data, iree_hal_buffer_release_callback_t release_callback, |
| iree_allocator_t host_allocator, iree_hal_buffer_t** out_buffer); |
| |
| //===----------------------------------------------------------------------===// |
| // iree_hal_buffer_t implementation details |
| //===----------------------------------------------------------------------===// |
| |
| typedef struct iree_hal_buffer_vtable_t { |
| // Must be iree_hal_buffer_recycle. |
| void(IREE_API_PTR* recycle)(iree_hal_buffer_t* buffer); |
| void(IREE_API_PTR* destroy)(iree_hal_buffer_t* buffer); |
| |
| iree_status_t(IREE_API_PTR* map_range)(iree_hal_buffer_t* buffer, |
| iree_hal_mapping_mode_t mapping_mode, |
| iree_hal_memory_access_t memory_access, |
| iree_device_size_t local_byte_offset, |
| iree_device_size_t local_byte_length, |
| iree_hal_buffer_mapping_t* mapping); |
| |
| iree_status_t(IREE_API_PTR* unmap_range)(iree_hal_buffer_t* buffer, |
| iree_device_size_t local_byte_offset, |
| iree_device_size_t local_byte_length, |
| iree_hal_buffer_mapping_t* mapping); |
| |
| iree_status_t(IREE_API_PTR* invalidate_range)( |
| iree_hal_buffer_t* buffer, iree_device_size_t local_byte_offset, |
| iree_device_size_t local_byte_length); |
| |
| iree_status_t(IREE_API_PTR* flush_range)( |
| iree_hal_buffer_t* buffer, iree_device_size_t local_byte_offset, |
| iree_device_size_t local_byte_length); |
| } iree_hal_buffer_vtable_t; |
| static_assert(offsetof(iree_hal_buffer_vtable_t, recycle) == 0, |
| "iree_hal_resource_vtable_t expects destroy at offset 0, we want " |
| "to recycle instead"); |
| |
| // NOTE: this shared data structure may be a mistake. If vtables were free we |
| // would not provide this and rely on each buffer implementation to implement |
| // all of the accessor methods. Indirection through vtables costs, though, so |
| // we hoist the common information that every buffer implementation needs here. |
| // Since this adds a fixed cost to every buffer on every implementation we |
| // should keep the structure as small as reasonable. |
| // |
| // NOTE: the internals of this structure are an implementation detail and may |
| // change at any time. If there's no API accessor for a field then assume it |
| // should not be used except by HAL buffer implementations. |
| struct iree_hal_buffer_t { |
| iree_hal_resource_t resource; // must be at 0 |
| // Underlying buffer allocation. If this points back at this buffer structure |
| // then the buffer is an allocated buffer itself and otherwise the underlying |
| // allocation is referenced and retained. |
| iree_hal_buffer_t* allocated_buffer; |
| // Total size of the buffer allocation in its underlying storage. |
| // This is captured on each buffer including non-allocated buffers so that |
| // internal pooling/suballocation costs can be represented. |
| iree_device_size_t allocation_size; |
| // Offset into the underlying allocated buffer this buffer range starts at. |
| iree_device_size_t byte_offset; |
| // Length of the buffer range in the underlying allocated buffer. This is the |
| // logical length exposed to users. |
| iree_device_size_t byte_length; |
| |
| // Placement of the buffer on a device/queue set. Captured only for allocated |
| // buffers. |
| iree_hal_buffer_placement_t placement; |
| |
| // Hacky back reference to an allocator that should be notified when the |
| // buffer is released. This is a hack to support interception of buffers by |
| // pooling layers and is slated for removal. |
| // |
| // TODO(#19159): remove iree_hal_allocator_deallocate_buffer when pooling no |
| // longer requires the pooling_allocator on iree_hal_buffer_t. |
| iree_hal_allocator_t* pooling_allocator; |
| |
| // TODO(benvanik): bit pack these; could be ~4 bytes vs 12. |
| iree_hal_memory_type_t memory_type; |
| iree_hal_buffer_usage_t allowed_usage; |
| iree_hal_memory_access_t allowed_access; |
| |
| // Unused padding that more flags or identifiers can be placed in, such as |
| // which implementation pool owns the buffer. |
| uint16_t reserved; |
| |
| // Implementation-defined flags used for additional bookkeeping or routing |
| // by the buffer implementation. |
| uint32_t flags; |
| }; |
| |
| IREE_API_EXPORT void iree_hal_buffer_initialize( |
| iree_hal_buffer_placement_t placement, iree_hal_buffer_t* allocated_buffer, |
| iree_device_size_t allocation_size, iree_device_size_t byte_offset, |
| iree_device_size_t byte_length, iree_hal_memory_type_t memory_type, |
| iree_hal_memory_access_t allowed_access, |
| iree_hal_buffer_usage_t allowed_usage, |
| const iree_hal_buffer_vtable_t* vtable, iree_hal_buffer_t* buffer); |
| |
| // TODO(#19159): remove iree_hal_allocator_deallocate_buffer when pooling no |
| // longer requires the pooling_allocator on iree_hal_buffer_t. When buffers can |
| // use their normal destroy callback to return themselves to pools then we won't |
| // need this extra recycle thunk. |
| // |
| // Recycles |buffer| by releasing it to the origin it is associated with via the |
| // release callback (or destroying it, if none was specified). |
| // The |buffer| pointer may remain valid if it is returned to a pool but callers |
| // must assume its contents are undefined as if it had been freed. |
| IREE_API_EXPORT void iree_hal_buffer_recycle(iree_hal_buffer_t* buffer); |
| |
| // Destroys |buffer| and frees its memory. |
| // Implementations must use iree_hal_buffer_recycle in their vtables for the |
| // common iree_hal_resource_t destroy callback as this is only to be used by |
| // release callbacks that want to free the buffer. |
| IREE_API_EXPORT void iree_hal_buffer_destroy(iree_hal_buffer_t* buffer); |
| |
| #ifdef __cplusplus |
| } // extern "C" |
| #endif // __cplusplus |
| |
| #endif // IREE_HAL_BUFFER_H_ |