blob: efcf7ec9d8b6dabeb599b1e085739d54ff682d11 [file] [log] [blame]
// Copyright 2023 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#ifndef EXPERIMENTAL_CUDA2_PENDING_QUEUE_ACTIONS_H_
#define EXPERIMENTAL_CUDA2_PENDING_QUEUE_ACTIONS_H_
#include "experimental/cuda2/cuda_dynamic_symbols.h"
#include "iree/base/api.h"
#include "iree/base/internal/arena.h"
#include "iree/hal/api.h"
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
// A data structure to manage pending queue actions (kernel launches and async
// allocations).
//
// This is needed in order to satisfy queue action dependencies. IREE uses HAL
// semaphore as the unified mechanism for synchronization directions including
// host to host, host to device, devie to device, and device to host. Plus, it
// allows wait before signal. These flexible capabilities are not all supported
// by CUevent objects. Therefore, we need supporting data structures to
// implement them on top of CUevent objects. Thus this pending queue actions.
//
// This buffers pending queue actions and their associated resources. It
// provides an API to advance the wait list on demand--queue actions are
// released to the GPU when all their wait semaphores are signaled past the
// desired value, or we can have a CUevent already recorded to some CUDA
// stream to wait on.
//
// Thread-safe; multiple threads may enqueue workloads.
typedef struct iree_hal_cuda2_pending_queue_actions_t
iree_hal_cuda2_pending_queue_actions_t;
// Creates a pending actions queue.
iree_status_t iree_hal_cuda2_pending_queue_actions_create(
const iree_hal_cuda2_dynamic_symbols_t* symbols,
iree_arena_block_pool_t* block_pool, iree_allocator_t host_allocator,
iree_hal_cuda2_pending_queue_actions_t** out_actions);
// Destroys the pending |actions| queue.
void iree_hal_cuda2_pending_queue_actions_destroy(iree_hal_resource_t* actions);
// Enqueues the given list of |command_buffers| that waits on
// |wait_semaphore_list| and signals |signal_semaphore_lsit|.
iree_status_t iree_hal_cuda2_pending_queue_actions_enqueue_execution(
CUstream dispatch_stream, CUstream callback_stream,
iree_hal_cuda2_pending_queue_actions_t* actions,
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_host_size_t command_buffer_count,
iree_hal_command_buffer_t* const* command_buffers);
// Tries to scan the pending actions and release ready ones to the GPU.
iree_status_t iree_hal_cuda2_pending_queue_actions_issue(
iree_hal_cuda2_pending_queue_actions_t* actions);
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
#endif // EXPERIMENTAL_CUDA2_PENDING_QUEUE_ACTIONS_H_