| // Copyright 2023 The IREE Authors |
| // |
| // Licensed under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| |
| #include "experimental/cuda2/event_pool.h" |
| |
| #include <stdbool.h> |
| #include <stddef.h> |
| #include <string.h> |
| |
| #include "experimental/cuda2/cuda_dynamic_symbols.h" |
| #include "experimental/cuda2/cuda_status_util.h" |
| #include "iree/base/api.h" |
| #include "iree/base/internal/atomics.h" |
| #include "iree/base/internal/synchronization.h" |
| #include "iree/hal/api.h" |
| |
| //===----------------------------------------------------------------------===// |
| // iree_hal_cuda2_event_t |
| //===----------------------------------------------------------------------===// |
| |
| struct iree_hal_cuda2_event_t { |
| // The allocator used to create the event. |
| iree_allocator_t host_allocator; |
| // The symbols used to create and destroy CUevent objects. |
| const iree_hal_cuda2_dynamic_symbols_t* symbols; |
| |
| // The event pool that owns this event. This cannot be NULL. |
| iree_hal_cuda2_event_pool_t* pool; |
| // The underlying CUevent object. |
| CUevent cu_event; |
| |
| // A reference count used to manage resource lifetime. Its value range: |
| // * 1 - when inside the event pool and to be acquired; |
| // * >= 1 - when acquired outside of the event pool; |
| // * 0 - when before releasing back to the pool or destruction. |
| iree_atomic_ref_count_t ref_count; |
| }; |
| |
| CUevent iree_hal_cuda2_event_handle(const iree_hal_cuda2_event_t* event) { |
| return event->cu_event; |
| } |
| |
| static inline void iree_hal_cuda2_event_destroy(iree_hal_cuda2_event_t* event) { |
| iree_allocator_t host_allocator = event->host_allocator; |
| const iree_hal_cuda2_dynamic_symbols_t* symbols = event->symbols; |
| IREE_TRACE_ZONE_BEGIN(z0); |
| |
| IREE_ASSERT_REF_COUNT_ZERO(&event->ref_count); |
| IREE_CUDA_IGNORE_ERROR(symbols, cuEventDestroy(event->cu_event)); |
| iree_allocator_free(host_allocator, event); |
| |
| IREE_TRACE_ZONE_END(z0); |
| } |
| |
| static inline iree_status_t iree_hal_cuda2_event_create( |
| const iree_hal_cuda2_dynamic_symbols_t* symbols, |
| iree_hal_cuda2_event_pool_t* pool, iree_allocator_t host_allocator, |
| iree_hal_cuda2_event_t** out_event) { |
| IREE_ASSERT_ARGUMENT(symbols); |
| IREE_ASSERT_ARGUMENT(pool); |
| IREE_ASSERT_ARGUMENT(out_event); |
| *out_event = NULL; |
| IREE_TRACE_ZONE_BEGIN(z0); |
| |
| iree_hal_cuda2_event_t* event = NULL; |
| IREE_RETURN_AND_END_ZONE_IF_ERROR( |
| z0, |
| iree_allocator_malloc(host_allocator, sizeof(*event), (void**)&event)); |
| event->host_allocator = host_allocator; |
| event->symbols = symbols; |
| event->pool = pool; |
| event->cu_event = NULL; |
| iree_atomic_ref_count_init(&event->ref_count); // -> 1 |
| |
| iree_status_t status = IREE_CURESULT_TO_STATUS( |
| symbols, cuEventCreate(&event->cu_event, CU_EVENT_DISABLE_TIMING), |
| "cuEventCreate"); |
| if (iree_status_is_ok(status)) { |
| *out_event = event; |
| } else { |
| iree_atomic_ref_count_dec(&event->ref_count); // -> 0 |
| iree_hal_cuda2_event_destroy(event); |
| } |
| |
| IREE_TRACE_ZONE_END(z0); |
| return status; |
| } |
| |
| void iree_hal_cuda2_event_retain(iree_hal_cuda2_event_t* event) { |
| iree_atomic_ref_count_inc(&event->ref_count); |
| } |
| |
| static void iree_hal_cuda2_event_pool_release( |
| iree_hal_cuda2_event_pool_t* event_pool, iree_host_size_t event_count, |
| iree_hal_cuda2_event_t** events); |
| |
| void iree_hal_cuda2_event_release(iree_hal_cuda2_event_t* event) { |
| if (iree_atomic_ref_count_dec(&event->ref_count) == 1) { |
| // Release back to the pool if the reference count becomes 0. |
| iree_hal_cuda2_event_pool_release(event->pool, 1, &event); |
| } |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // iree_hal_cuda2_event_pool_t |
| //===----------------------------------------------------------------------===// |
| |
| struct iree_hal_cuda2_event_pool_t { |
| // The allocator used to create the event pool. |
| iree_allocator_t host_allocator; |
| // The symbols used to create and destroy CUevent objects. |
| const iree_hal_cuda2_dynamic_symbols_t* symbols; |
| |
| // Guards event related fields in the pool. We don't expect a performant |
| // program to frequently allocate events for synchronization purposes; the |
| // traffic to this pool should be low. So it should be fine to use mutex to |
| // guard here. |
| iree_slim_mutex_t event_mutex; |
| |
| // Maximum number of event objects that will be maintained in the pool. |
| // More events may be allocated at any time, but they will be disposed |
| // directly when they are no longer needed. |
| iree_host_size_t available_capacity IREE_GUARDED_BY(event_mutex); |
| // Total number of currently available event objects. |
| iree_host_size_t available_count IREE_GUARDED_BY(event_mutex); |
| // The list of available_count event objects. |
| iree_hal_cuda2_event_t* available_list[] IREE_GUARDED_BY(event_mutex); |
| }; |
| // + Additional inline allocation for holding events up to the capacity. |
| |
| iree_status_t iree_hal_cuda2_event_pool_allocate( |
| const iree_hal_cuda2_dynamic_symbols_t* symbols, |
| iree_host_size_t available_capacity, iree_allocator_t host_allocator, |
| iree_hal_cuda2_event_pool_t** out_event_pool) { |
| IREE_ASSERT_ARGUMENT(symbols); |
| IREE_ASSERT_ARGUMENT(out_event_pool); |
| *out_event_pool = NULL; |
| IREE_TRACE_ZONE_BEGIN(z0); |
| |
| iree_hal_cuda2_event_pool_t* event_pool = NULL; |
| iree_host_size_t total_size = |
| sizeof(*event_pool) + |
| available_capacity * sizeof(*event_pool->available_list); |
| IREE_RETURN_AND_END_ZONE_IF_ERROR( |
| z0, |
| iree_allocator_malloc(host_allocator, total_size, (void**)&event_pool)); |
| event_pool->host_allocator = host_allocator; |
| event_pool->symbols = symbols; |
| iree_slim_mutex_initialize(&event_pool->event_mutex); |
| event_pool->available_capacity = available_capacity; |
| event_pool->available_count = 0; |
| |
| iree_status_t status = iree_ok_status(); |
| for (iree_host_size_t i = 0; i < available_capacity; ++i) { |
| status = iree_hal_cuda2_event_create( |
| symbols, event_pool, host_allocator, |
| &event_pool->available_list[event_pool->available_count++]); |
| if (!iree_status_is_ok(status)) break; |
| } |
| |
| if (iree_status_is_ok(status)) { |
| *out_event_pool = event_pool; |
| } else { |
| iree_hal_cuda2_event_pool_free(event_pool); |
| } |
| IREE_TRACE_ZONE_END(z0); |
| return status; |
| } |
| |
| void iree_hal_cuda2_event_pool_free(iree_hal_cuda2_event_pool_t* event_pool) { |
| iree_allocator_t host_allocator = event_pool->host_allocator; |
| IREE_TRACE_ZONE_BEGIN(z0); |
| |
| for (iree_host_size_t i = 0; i < event_pool->available_count; ++i) { |
| iree_hal_cuda2_event_t* event = event_pool->available_list[i]; |
| iree_atomic_ref_count_dec(&event->ref_count); // -> 0 |
| iree_hal_cuda2_event_destroy(event); |
| } |
| iree_slim_mutex_deinitialize(&event_pool->event_mutex); |
| iree_allocator_free(host_allocator, event_pool); |
| |
| IREE_TRACE_ZONE_END(z0); |
| } |
| |
| iree_status_t iree_hal_cuda2_event_pool_acquire( |
| iree_hal_cuda2_event_pool_t* event_pool, iree_host_size_t event_count, |
| iree_hal_cuda2_event_t** out_events) { |
| IREE_ASSERT_ARGUMENT(event_pool); |
| if (!event_count) return iree_ok_status(); |
| IREE_ASSERT_ARGUMENT(out_events); |
| IREE_TRACE_ZONE_BEGIN(z0); |
| |
| // We'll try to get what we can from the pool and fall back to initializing |
| // new iree_hal_cuda2_event_t objects. |
| iree_host_size_t remaining_count = event_count; |
| |
| // Try first to grab from the pool. |
| iree_slim_mutex_lock(&event_pool->event_mutex); |
| iree_host_size_t from_pool_count = |
| iree_min(event_pool->available_count, event_count); |
| if (from_pool_count > 0) { |
| iree_host_size_t pool_base_index = |
| event_pool->available_count - from_pool_count; |
| memcpy(out_events, &event_pool->available_list[pool_base_index], |
| from_pool_count * sizeof(*event_pool->available_list)); |
| event_pool->available_count -= from_pool_count; |
| remaining_count -= from_pool_count; |
| } |
| iree_slim_mutex_unlock(&event_pool->event_mutex); |
| |
| // Allocate the rest of the events. |
| if (remaining_count > 0) { |
| IREE_TRACE_ZONE_BEGIN_NAMED(z1, "event-pool-unpooled-acquire"); |
| iree_status_t status = iree_ok_status(); |
| for (iree_host_size_t i = 0; i < remaining_count; ++i) { |
| status = iree_hal_cuda2_event_create(event_pool->symbols, event_pool, |
| event_pool->host_allocator, |
| &out_events[from_pool_count + i]); |
| if (!iree_status_is_ok(status)) { |
| // Must release all events we've acquired so far. |
| iree_hal_cuda2_event_pool_release(event_pool, from_pool_count + i, |
| out_events); |
| IREE_TRACE_ZONE_END(z1); |
| IREE_TRACE_ZONE_END(z0); |
| return status; |
| } |
| } |
| IREE_TRACE_ZONE_END(z1); |
| } |
| |
| IREE_TRACE_ZONE_END(z0); |
| return iree_ok_status(); |
| } |
| |
| static void iree_hal_cuda2_event_pool_release( |
| iree_hal_cuda2_event_pool_t* event_pool, iree_host_size_t event_count, |
| iree_hal_cuda2_event_t** events) { |
| IREE_ASSERT_ARGUMENT(event_pool); |
| if (!event_count) return; |
| IREE_ASSERT_ARGUMENT(events); |
| IREE_TRACE_ZONE_BEGIN(z0); |
| |
| // We'll try to release all we can back to the pool and then deinitialize |
| // the ones that won't fit. |
| iree_host_size_t remaining_count = event_count; |
| |
| // Try first to release to the pool. |
| iree_slim_mutex_lock(&event_pool->event_mutex); |
| iree_host_size_t to_pool_count = |
| iree_min(event_pool->available_capacity - event_pool->available_count, |
| event_count); |
| if (to_pool_count > 0) { |
| for (iree_host_size_t i = 0; i < to_pool_count; ++i) { |
| IREE_ASSERT_REF_COUNT_ZERO(&events[i]->ref_count); |
| iree_hal_cuda2_event_retain(events[i]); // -> 1 |
| } |
| iree_host_size_t pool_base_index = event_pool->available_count; |
| memcpy(&event_pool->available_list[pool_base_index], events, |
| to_pool_count * sizeof(*event_pool->available_list)); |
| event_pool->available_count += to_pool_count; |
| remaining_count -= to_pool_count; |
| } |
| iree_slim_mutex_unlock(&event_pool->event_mutex); |
| |
| // Deallocate the rest of the events. We don't bother resetting them as we are |
| // getting rid of them. |
| if (remaining_count > 0) { |
| IREE_TRACE_ZONE_BEGIN_NAMED(z1, "event-pool-unpooled-release"); |
| for (iree_host_size_t i = 0; i < remaining_count; ++i) { |
| iree_hal_cuda2_event_destroy(events[to_pool_count + i]); |
| } |
| IREE_TRACE_ZONE_END(z1); |
| } |
| IREE_TRACE_ZONE_END(z0); |
| } |