blob: a9e596eed9a3e36a564ee81a8c982c185241e995 [file]
// Copyright 2025 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#ifndef IREE_HAL_DRIVERS_AMDGPU_DEVICE_BUFFER_H_
#define IREE_HAL_DRIVERS_AMDGPU_DEVICE_BUFFER_H_
#include "iree/hal/drivers/amdgpu/device/support/common.h"
typedef struct iree_hal_amdgpu_device_allocator_pool_t
iree_hal_amdgpu_device_allocator_pool_t;
//===----------------------------------------------------------------------===//
// iree_hal_amdgpu_device_allocation_handle_t
//===----------------------------------------------------------------------===//
// Fat allocation pool identifier used to allow both the host and the device to
// route to their respective pool implementations without lookups.
typedef struct iree_hal_amdgpu_device_allocation_pool_id_t {
// Device-side pool in the memory space of the device that owns the
// allocation. Note that this may not be the local device.
iree_hal_amdgpu_device_allocator_pool_t* device_pool;
// Opaque host-side pool token.
uint64_t host_pool;
} iree_hal_amdgpu_device_allocation_pool_id_t;
// A handle for a dynamically device-allocated pointer.
// The owner of the handle is responsible for storing it in device-visible
// memory and consistently passing it in buffer references with the
// IREE_HAL_AMDGPU_DEVICE_BUFFER_TYPE_HANDLE type. The device will dereference
// the handle to get the actual pointer before using it. Device-side allocs and
// frees will update the pointer in queue-order. The handle contents are only
// valid on the device between an alloca/dealloca pair and we assume the client
// code is not going to do something invalid (free and then try to use the
// handle).
//
// Though the on-device allocator is usually responsible for manipulating the
// handle there are cases where the host or a remote device may need to. For
// example if the user has the last iree_hal_buffer_t reference and drops it
// we'll need to enqueue a device-side deallocation to handle the cleanup. To
// avoid extra round-trips we also optimize for host-side pool growth by
// allowing the host to initialize the handle after it has grown a pool without
// needing to requeue the device allocation.
typedef struct iree_hal_amdgpu_device_allocation_handle_t {
// Allocated pointer, if any assigned.
void* ptr;
// Pool identifier the pointer resides in.
iree_hal_amdgpu_device_allocation_pool_id_t pool_id;
// Opaque data used by the allocator.
struct {
// TODO(benvanik): block the allocation resides in and other information
// the allocator needs to avoid lookups when deallocating.
int reserved;
} metadata;
} iree_hal_amdgpu_device_allocation_handle_t;
//===----------------------------------------------------------------------===//
// iree_hal_amdgpu_device_buffer_ref_t
//===----------------------------------------------------------------------===//
// Identifies the type of a buffer reference and how it should be resolved.
typedef uint8_t iree_hal_amdgpu_device_buffer_type_t;
enum iree_hal_amdgpu_device_buffer_type_e {
// Reference is to an absolute device pointer that can be directly accessed.
IREE_HAL_AMDGPU_DEVICE_BUFFER_TYPE_PTR = 0u,
// Reference is to a queue-ordered allocation handle that is only valid at
// the time the buffer is committed. The handle will be valid for the lifetime
// of the logical buffer and any resources referencing it but the pointer must
// only be resolved between a corresponding alloca/dealloca.
IREE_HAL_AMDGPU_DEVICE_BUFFER_TYPE_HANDLE,
// Reference is to a slot in the binding table provided during execution.
// Only one indirection is allowed (table slots cannot reference other slots
// - yet).
IREE_HAL_AMDGPU_DEVICE_BUFFER_TYPE_SLOT,
};
// The ordinal of a slot in the binding table.
typedef uint32_t iree_hal_amdgpu_device_buffer_ordinal_t;
// Describes a subrange of a buffer that can be bound to a binding slot.
typedef struct iree_hal_amdgpu_device_buffer_ref_t {
// Offset, in bytes, into the buffer that the binding starts at.
// This will be added to the offset specified on each usage of the slot.
uint64_t offset;
// Type of the buffer reference used to resolve the device pointer.
uint64_t type : 2;
// Length, in bytes, of the buffer that is available to the executable.
uint64_t length : 62;
union {
// IREE_HAL_AMDGPU_DEVICE_BUFFER_TYPE_PTR: device pointer.
void* ptr;
// IREE_HAL_AMDGPU_DEVICE_BUFFER_TYPE_HANDLE: queue-ordered allocation
// handle.
iree_hal_amdgpu_device_allocation_handle_t* handle;
// IREE_HAL_AMDGPU_DEVICE_BUFFER_TYPE_SLOT: binding table slot.
iree_hal_amdgpu_device_buffer_ordinal_t slot;
// Used for setting the value.
uint64_t bits;
} value;
} iree_hal_amdgpu_device_buffer_ref_t;
static_assert(sizeof(iree_hal_amdgpu_device_buffer_ref_t) == 24,
"binding table entries should be 8 byte aligned");
// Describes a buffer binding that contains a uint32_t[3] XYZ workgroup count.
// This is a size-optimized version of iree_hal_amdgpu_device_buffer_ref_t so
// that it will fit in our tiny packets. We know the length is a constant 12 and
// only need the offset, type, and value.
typedef struct iree_hal_amdgpu_device_workgroup_count_buffer_ref_t {
// Type of the buffer reference used to resolve the device pointer.
uint64_t type : 2; // iree_hal_amdgpu_device_buffer_type_t
// Offset, in bytes, into the buffer that the binding starts at.
// This will be added to the offset specified on each usage of the slot.
uint64_t offset : 62;
union {
// IREE_HAL_AMDGPU_DEVICE_BUFFER_TYPE_PTR: raw device pointer.
void* ptr;
// IREE_HAL_AMDGPU_DEVICE_BUFFER_TYPE_HANDLE: queue-ordered allocation
// handle.
iree_hal_amdgpu_device_allocation_handle_t* handle;
// IREE_HAL_AMDGPU_DEVICE_BUFFER_TYPE_SLOT: binding table slot.
iree_hal_amdgpu_device_buffer_ordinal_t slot;
// Used for setting the value.
uint64_t bits;
} value;
} iree_hal_amdgpu_device_workgroup_count_buffer_ref_t;
static_assert(sizeof(iree_hal_amdgpu_device_workgroup_count_buffer_ref_t) == 16,
"binding table entries should be 8 byte aligned and tiny");
#define iree_hal_amdgpu_device_workgroup_count_buffer_ref_length(buffer_ref) \
(sizeof(uint32_t) * 3)
// Describes a buffer binding that contains a single uint64_t value.
// This is a size-optimized version of iree_hal_amdgpu_device_buffer_ref_t so
// that it will fit in our tiny packets. We know the length is a constant 8 and
// only need the offset, type, and value.
typedef struct iree_hal_amdgpu_device_uint64_buffer_ref_t {
// Type of the buffer reference used to resolve the device pointer.
uint64_t type : 2; // iree_hal_amdgpu_device_buffer_type_t
// Offset, in bytes, into the buffer that the binding starts at.
// This will be added to the offset specified on each usage of the slot.
uint64_t offset : 62;
union {
// IREE_HAL_AMDGPU_DEVICE_BUFFER_TYPE_PTR: raw device pointer.
void* ptr;
// IREE_HAL_AMDGPU_DEVICE_BUFFER_TYPE_HANDLE: queue-ordered allocation
// handle.
iree_hal_amdgpu_device_allocation_handle_t* handle;
// IREE_HAL_AMDGPU_DEVICE_BUFFER_TYPE_SLOT: binding table slot.
iree_hal_amdgpu_device_buffer_ordinal_t slot;
// Used for setting the value.
uint64_t bits;
} value;
} iree_hal_amdgpu_device_uint64_buffer_ref_t;
static_assert(sizeof(iree_hal_amdgpu_device_uint64_buffer_ref_t) == 16,
"binding table entries should be 8 byte aligned and tiny");
#define iree_hal_amdgpu_device_uint64_buffer_ref_length(buffer_ref) \
sizeof(uint64_t)
#if defined(IREE_AMDGPU_TARGET_DEVICE)
// Resolves a buffer reference to an absolute device pointer.
// Expects that the binding table is provided if needed and has sufficient
// capacity for any slot that may be referenced. All queue-ordered allocations
// that may be provided via allocation handles must be committed prior to
// attempting to resolve them and must remain committed until all commands using
// the returned device pointer have completed.
void* iree_hal_amdgpu_device_buffer_ref_resolve(
iree_hal_amdgpu_device_buffer_ref_t buffer_ref,
IREE_AMDGPU_ALIGNAS(64)
const iree_hal_amdgpu_device_buffer_ref_t* IREE_AMDGPU_RESTRICT
binding_table);
// Resolves a workgroup count buffer reference to an absolute device pointer.
// This is equivalent to iree_hal_amdgpu_device_buffer_ref_resolve but for a
// fixed-size uint32_t[3] value. The returned pointer should have 4-byte
// alignment.
void* iree_hal_amdgpu_device_workgroup_count_buffer_ref_resolve(
iree_hal_amdgpu_device_workgroup_count_buffer_ref_t buffer_ref,
IREE_AMDGPU_ALIGNAS(64)
const iree_hal_amdgpu_device_buffer_ref_t* IREE_AMDGPU_RESTRICT
binding_table);
// Resolves a scalar uint64_t buffer reference to an absolute device pointer.
// This is equivalent to iree_hal_amdgpu_device_buffer_ref_resolve but for a
// fixed-size uint64_t value. The returned pointer should have 8-byte
// alignment.
void* iree_hal_amdgpu_device_uint64_buffer_ref_resolve(
iree_hal_amdgpu_device_uint64_buffer_ref_t buffer_ref,
IREE_AMDGPU_ALIGNAS(64)
const iree_hal_amdgpu_device_buffer_ref_t* IREE_AMDGPU_RESTRICT
binding_table);
#endif // IREE_AMDGPU_TARGET_DEVICE
#endif // IREE_HAL_DRIVERS_AMDGPU_DEVICE_BUFFER_H_