blob: 9c1e59b0c0d0e70a206412e05b22a970866ad145 [file] [log] [blame]
// Copyright 2023 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#ifndef IREE_HAL_DRIVERS_CUDA_MEMORY_POOLS_H_
#define IREE_HAL_DRIVERS_CUDA_MEMORY_POOLS_H_
#include "experimental/cuda2/api.h"
#include "experimental/cuda2/cuda_dynamic_symbols.h"
#include "experimental/cuda2/cuda_headers.h"
#include "iree/base/api.h"
#include "iree/base/internal/atomics.h"
#include "iree/hal/api.h"
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
// Retained CUDA memory pools for various allocation types.
typedef struct iree_hal_cuda2_memory_pools_t {
// Used exclusively for DEVICE_LOCAL allocations.
CUmemoryPool device_local;
// Used for any host-visible/host-local memory types.
CUmemoryPool other;
const iree_hal_cuda2_dynamic_symbols_t* cuda_symbols;
iree_allocator_t host_allocator;
IREE_STATISTICS(struct {
iree_atomic_int64_t device_bytes_allocated;
iree_atomic_int64_t device_bytes_freed;
iree_atomic_int64_t host_bytes_allocated;
iree_atomic_int64_t host_bytes_freed;
} statistics;)
} iree_hal_cuda2_memory_pools_t;
// Initializes |out_pools| by configuring new CUDA memory pools.
iree_status_t iree_hal_cuda2_memory_pools_initialize(
const iree_hal_cuda2_dynamic_symbols_t* cuda_symbols, CUdevice cu_device,
const iree_hal_cuda2_memory_pooling_params_t* pooling_params,
iree_allocator_t host_allocator,
iree_hal_cuda2_memory_pools_t* IREE_RESTRICT out_pools);
// Deinitializes the |pools| and releases the underlying CUDA resources.
void iree_hal_cuda2_memory_pools_deinitialize(
iree_hal_cuda2_memory_pools_t* pools);
// Merges statistics information from |pools| into |statistics|.
void iree_hal_cuda2_memory_pools_merge_statistics(
iree_hal_cuda2_memory_pools_t* pools,
iree_hal_allocator_statistics_t* statistics);
// Trims all memory pools by releasing resources back to the system.
iree_status_t iree_hal_cuda2_memory_pools_trim(
iree_hal_cuda2_memory_pools_t* pools,
const iree_hal_cuda2_memory_pooling_params_t* pooling_params);
// Asynchronously allocates a buffer from an appropriate pool.
// The allocation will be stream-ordered on |stream|.
iree_status_t iree_hal_cuda2_memory_pools_alloca(
iree_hal_cuda2_memory_pools_t* pools, CUstream stream,
iree_hal_allocator_pool_t pool, iree_hal_buffer_params_t params,
iree_device_size_t allocation_size,
iree_hal_buffer_t** IREE_RESTRICT out_buffer);
// Asynchronously deallocates a buffer from its pool.
// The deallocation will be stream-ordered on |stream|.
iree_status_t iree_hal_cuda2_memory_pools_dealloca(
iree_hal_cuda2_memory_pools_t* pools, CUstream stream,
iree_hal_buffer_t* buffer);
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
#endif // IREE_HAL_DRIVERS_CUDA_MEMORY_POOLS_H_