| // Copyright 2020 The IREE Authors |
| // |
| // Licensed under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| |
| #ifndef IREE_TASK_TASK_H_ |
| #define IREE_TASK_TASK_H_ |
| |
| #include <stdbool.h> |
| #include <stddef.h> |
| #include <stdint.h> |
| |
| #include "iree/base/api.h" |
| #include "iree/base/internal/atomic_slist.h" |
| #include "iree/base/internal/atomics.h" |
| #include "iree/base/internal/cpu.h" |
| #include "iree/base/internal/synchronization.h" |
| #include "iree/task/affinity_set.h" |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif // __cplusplus |
| |
| typedef struct iree_task_list_t iree_task_list_t; |
| typedef struct iree_task_pool_t iree_task_pool_t; |
| typedef struct iree_task_scope_t iree_task_scope_t; |
| typedef struct iree_task_submission_t iree_task_submission_t; |
| |
| //============================================================================== |
| // Task header for internal tracking |
| //============================================================================== |
| |
| // Specifies the type of a task and how executors handle it. |
| enum iree_task_type_bits_t { |
| // Task is a no-op (performs no work) and exists for flexibility. |
| IREE_TASK_TYPE_NOP = 0u, |
| |
| // Task will synchronously call a function before continuing. |
| IREE_TASK_TYPE_CALL = 1u, |
| |
| // Task exists only as a barrier to join/fork tasks and has no executable |
| // payload. |
| IREE_TASK_TYPE_BARRIER = 2u, |
| |
| // Task is a fence indicating that a certain point in the task graph has been |
| // reached. All tasks prior to this fence (by way of happens-before |
| // dependencies) are guaranteed to have retired. |
| IREE_TASK_TYPE_FENCE = 3u, |
| |
| // Task is a wait on an external wait handle (fd, HANDLE, etc). |
| // Executors will wait on the handle until it is signaled and meets the |
| // specified condition prior to readying the dependent tasks. |
| IREE_TASK_TYPE_WAIT = 4u, |
| |
| // Task is a 3D grid dispatch of zero or more tiles. |
| // Dispatches are issued when ready by either being split into one shard per |
| // worker that should process the dispatch. |
| // |
| // If IREE_TASK_FLAG_DISPATCH_INDIRECT is set then the dispatch reads the |
| // workgroup count from a buffer immediately prior to fan-out instead of using |
| // the values embedded in the task structure. |
| // |
| // After a dispatch has been issued the IREE_TASK_FLAG_DISPATCH_RETIRE flag is |
| // set to indicate that when the dispatch becomes ready again it will be after |
| // all shards have completed. |
| IREE_TASK_TYPE_DISPATCH = 5u, |
| |
| // Task is one of potentially many shards processing a larger dispatch grid. |
| // Each shard may have a preference as to which parts of grid it will focus |
| // on but is able to otherwise steal any available region directly from the |
| // shared dispatch coordination state. Shards retire once there are no more |
| // tiles remaining in the dispatch grid. |
| IREE_TASK_TYPE_DISPATCH_SHARD = 6u, |
| }; |
| typedef uint8_t iree_task_type_t; |
| |
| enum iree_task_flag_bits_t { |
| IREE_TASK_FLAG_NONE = 0u, |
| |
| // Indicates that a wait task is part of a wait-any operation and the |
| // cancellation flag should be latched by any wait that resolves. |
| IREE_TASK_FLAG_WAIT_ANY = 1u << 0, |
| |
| // The wait handle of the wait task has been acquired and the task can be |
| // waited on with system APIs. |
| IREE_TASK_FLAG_WAIT_EXPORTED = 1u << 1, |
| |
| // The wait handle the task is specified to wait on has resolved and the task |
| // can now be considered complete. |
| IREE_TASK_FLAG_WAIT_COMPLETED = 1u << 2, |
| |
| // The workgroup count for the dispatch is provided by way of a pointer to a |
| // list of 3 uint32_t values that will be sampled immediately prior to |
| // issuing of the dispatch. The contents of the pointer can be safely modified |
| // up until the last dependency has completed and the dispatch is about to be |
| // issued. |
| IREE_TASK_FLAG_DISPATCH_INDIRECT = 1u << 3, |
| |
| // The dispatch has been issued and the task is waiting for one or more |
| // shards to complete. After they complete the dispatch will be readied and |
| // can be retired. |
| // |
| // Though added by the executor after issuing a dispatch users can also set |
| // this to indicate that all dispatch shards for a particular dispatch have |
| // been statically scheduled. Executors will then skip issuing the dispatch |
| // and instead wait until all shards complete, enabling IREE_TASK_TYPE_BARRIER |
| // behavior but without an additional task as dispatches are still required |
| // to store information for shards. |
| IREE_TASK_FLAG_DISPATCH_RETIRE = 1u << 4, |
| |
| // An error occurred at or before the task and it has been aborted. |
| // Aborted tasks may continue to execute if they're already in-flight but must |
| // not begin execution after the flag has been set. |
| // |
| // The actual error that occurred is routed to the parent task scope as it |
| // happens and may be available for querying before all tasks have been |
| // cleaned up. |
| IREE_TASK_FLAG_ABORTED = 1u << 5, |
| }; |
| typedef uint16_t iree_task_flags_t; |
| |
| typedef struct iree_task_t iree_task_t; |
| |
| // A function called to cleanup tasks. |
| // Each task has its associated cleanup function called exactly once. |
| // The provided |status_code| indicates the execution status of the task prior |
| // to cleanup and will usually be IREE_STATUS_OK indicating the task was |
| // successfully issued or IREE_STATUS_ABORTED if the task was discard prior to |
| // issuing. |
| typedef void(IREE_API_PTR* iree_task_cleanup_fn_t)( |
| iree_task_t* task, iree_status_code_t status_code); |
| |
| // A task within the task system that runs on an executor. |
| // Tasks have an iree_task_type_t that defines which parameters are valid and |
| // how the executor is to treat the task. Dependency edges can be defined that |
| // determine the execution order of tasks within the executors. |
| struct iree_alignas(iree_max_align_t) iree_task_t { |
| // Intrusive pointer used to store tasks within iree_task_list_t and |
| // iree_atomic_task_list_t singly-linked lists. This must come first in the |
| // structure so that it is at the appropriate alignment. |
| iree_task_t* next_task; |
| |
| // The scope this task is attributed to. Errors with the task will be |
| // propagated to the scope and errors in the scope will cause pending tasks to |
| // be skipped. |
| iree_task_scope_t* scope; |
| |
| // Optional function to call to cleanup the task on completion. |
| // Will be called after the task has retired or if the task fails to issue |
| // (dependency failed, etc). |
| iree_task_cleanup_fn_t cleanup_fn; |
| |
| // Optional task that will be notified when the task completes. |
| // The task will have its pending_dependency_count decremented and will be |
| // readied for execution when the count reaches 0. |
| iree_task_t* completion_task; |
| |
| // Specifies which workers will be used to execute this task. |
| // Forked tasks will inherit their parent task affinity (possibly with some |
| // task-dependent rules) to partition workloads across workers with knowledge |
| // of the specific work being performed. For example, some dispatches can be |
| // limited to run on certain microarchitectures that workers have affinity |
| // with at the OS scheduler level (such as little.BIG topologies). |
| iree_task_affinity_set_t affinity_set; |
| |
| // Total number of dependent tasks still outstanding. Decremented each time |
| // a dependent task completes. The task is considered ready to execute when |
| // this value reaches 0. |
| iree_atomic_int32_t pending_dependency_count; |
| |
| // Optional pool the task should be returned to after it has resolved. If the |
| // task was allocated as part of a larger data structure (embedded within |
| // an arena for example) then this can be NULL to prevent the task system |
| // from interfering. |
| iree_task_pool_t* pool; |
| |
| // Specifies the type of the task and how the executor handles it. |
| iree_task_type_t type; |
| |
| // Task-specific flag bits. |
| iree_task_flags_t flags; |
| }; |
| static_assert(offsetof(iree_task_t, next_task) == 0, |
| "next_task intrusive pointer must be at offset 0"); |
| static_assert(sizeof(iree_task_t) <= 64, |
| "the task header greatly influences pool sizes due to alignment " |
| "requirements and should be kept tiny"); |
| |
| // Initializes a task header with the given type. |
| // Must be called on all tasks to ensure proper dependency tracking and list |
| // state prior to enqueuing. Only the task header structure is initialized and |
| // any additional data as part of the wrapping task type must be initialized by |
| // the caller. |
| void iree_task_initialize(iree_task_type_t type, iree_task_scope_t* scope, |
| iree_task_t* out_task); |
| |
| // Sets the optional function called when the task completes (whether successful |
| // or not). The cleanup function will receive a status indicating whether the |
| // cleanup is from expected execution as the task retires (IREE_STATUS_OK) |
| // or because it was aborted (IREE_STATUS_ABORTED). |
| void iree_task_set_cleanup_fn(iree_task_t* task, |
| iree_task_cleanup_fn_t cleanup_fn); |
| |
| // Sets up a dependency edge from |task| to |completion_task| such that when |
| // |task| completes |completion_task| will be notified and have its |
| // pending_dependency_count decremented. |
| void iree_task_set_completion_task(iree_task_t* task, |
| iree_task_t* completion_task); |
| |
| // Returns true if the |task| is ready to execute immediately. |
| // Though this is safe to call from any thread the test may have false-negatives |
| // (ready tasks are not returned as ready) due to cross-thread synchronization |
| // latency. Note that tasks may yield themselves during execution and switch |
| // from ready to waiting (such as when an indirect dispatch needs to wait for |
| // all tiles to complete). |
| bool iree_task_is_ready(iree_task_t* task); |
| |
| // Discards the task and any dependent tasks. |
| // Any dependent tasks that need to be discarded will be added to |
| // |discard_worklist| for the caller to continue discarding. |
| void iree_task_discard(iree_task_t* task, iree_task_list_t* discard_worklist); |
| |
| //============================================================================== |
| // IREE_TASK_TYPE_NOP |
| //============================================================================== |
| |
| // Task is a no-op (performs no work) and exists for flexibility. |
| // NOP tasks can be used to link together task lists from multiple threads |
| // where it may otherwise not be ideal to have heavy-weight concurrency |
| // structures. NOP tasks can also be useful for neutering another task type |
| // after it has already been recorded into a list such as when cancellations |
| // occur. |
| typedef iree_alignas(iree_max_align_t) struct { |
| // Task header: implementation detail, do not use. |
| iree_task_t header; |
| } iree_task_nop_t; |
| |
| void iree_task_nop_initialize(iree_task_scope_t* scope, |
| iree_task_nop_t* out_task); |
| |
| //============================================================================== |
| // IREE_TASK_TYPE_CALL |
| //============================================================================== |
| |
| typedef iree_status_t(IREE_API_PTR* iree_task_call_closure_fn_t)( |
| void* user_context, iree_task_t* task, |
| iree_task_submission_t* pending_submission); |
| |
| // A function closure representing the function to call and its arguments. |
| typedef struct iree_task_call_closure_t { |
| // Function called per tile invocation. |
| iree_task_call_closure_fn_t fn; |
| |
| // Opaque pointer to a user-provided data structure. |
| // No lifetime management is performed by the task system and it is required |
| // that users ensure that the memory referenced is live until after the task |
| // has completed. |
| void* user_context; |
| |
| // TODO(benvanik): cleanup function? right now assume arg is never freed. |
| } iree_task_call_closure_t; |
| |
| // Binds a function pointer and the arguments it should be called with. |
| // If the arguments represent pointers they must remain live until the task |
| // has completed execution. |
| static inline iree_task_call_closure_t iree_task_make_call_closure( |
| iree_task_call_closure_fn_t fn, void* user_context) { |
| iree_task_call_closure_t closure = {fn, user_context}; |
| return closure; |
| } |
| |
| // A task that will synchronously call a function from the executor and wait |
| // for it to complete before continuing. |
| // |
| // Memory referenced by closure arguments must be kept valid until the function |
| // executes (in general with the same lifetime as the task itself). |
| typedef iree_alignas(iree_max_align_t) struct { |
| // Task header: implementation detail, do not use. |
| iree_task_t header; |
| |
| // Function closure to call when the task is executed. |
| iree_task_call_closure_t closure; |
| |
| // Resulting status from the call available once all nested tasks have |
| // completed (or would have completed). It's possible for a call to nest |
| // additional work under it and then return a failure; to ensure we don't |
| // discard the root call while the nested tasks are still executing we set the |
| // status here and wait for the nested tasks to complete. We'll try not to |
| // issue work that was enqueued while the call was executing but it's possible |
| // for work to come from other angles and we need to err on the side of |
| // safety. |
| iree_atomic_intptr_t status; |
| } iree_task_call_t; |
| |
| void iree_task_call_initialize(iree_task_scope_t* scope, |
| iree_task_call_closure_t closure, |
| iree_task_call_t* out_task); |
| |
| //============================================================================== |
| // IREE_TASK_TYPE_BARRIER |
| //============================================================================== |
| |
| // A join point for fork/join-style scheduling. |
| // References a set of dependent tasks that will be notified and possibly |
| // readied when the barrier is reached. |
| // |
| // This allows for modeling one-to-many and many-to-many relationships. The base |
| // task dependency system only models one-to-one and should be used if possible |
| // to avoid the additional overhead of a barrier task both in memory and task |
| // indirection/queuing. |
| // |
| // Example: |
| // * [A] -> Barrier -> [C, D] |
| // - A executes |
| // - Barrier is processed after A completes |
| // - C and D execute concurrently (in any order) |
| // |
| // * [A, B] -> Barrier -> [C, D] |
| // - A and B execute concurrently (in any order) |
| // - Barrier is processed after both A and B complete |
| // - C and D execute concurrently |
| // |
| // * [A] -> Barrier -> [B] |
| // - Don't do this and use the base task dependency instead; it'll work, but |
| // it's much better to avoid the additional barrier indirection when |
| // possible. |
| typedef iree_alignas(iree_max_align_t) struct { |
| // Task header: implementation detail, do not use. |
| iree_task_t header; |
| |
| // Number of valid tasks in the dependent_tasks list. |
| iree_host_size_t dependent_task_count; |
| // [0-dependent_task_count] tasks that will be notified when the barrier is |
| // reached. Each task will have its pending_dependency_count decremented and |
| // when the count reaches 0 be added to the ready list. |
| iree_task_t* const* dependent_tasks; |
| } iree_task_barrier_t; |
| |
| void iree_task_barrier_initialize(iree_task_scope_t* scope, |
| iree_host_size_t dependent_task_count, |
| iree_task_t* const* dependent_tasks, |
| iree_task_barrier_t* out_task); |
| |
| void iree_task_barrier_initialize_empty(iree_task_scope_t* scope, |
| iree_task_barrier_t* out_task); |
| |
| void iree_task_barrier_set_dependent_tasks( |
| iree_task_barrier_t* task, iree_host_size_t dependent_task_count, |
| iree_task_t* const* dependent_tasks); |
| |
| //============================================================================== |
| // IREE_TASK_TYPE_FENCE |
| //============================================================================== |
| |
| // A fence indicating that a certain point in the task graph has been reached. |
| // All tasks prior to this fence (by way of happens-before dependencies) are |
| // guaranteed to have retired. |
| // |
| // When all of the dependencies of a fence have retired the fence will notify |
| // the parent scope of the task by decrementing the pending_submissions count |
| // and publishing an idle_notification if it was the last in-flight submission. |
| // |
| // An optional platform primitive may be provided to signal in a way determined |
| // by the primitive type via iree_event_set. |
| typedef iree_alignas(iree_max_align_t) struct { |
| // Task header: implementation detail, do not use. |
| iree_task_t header; |
| |
| // An optional wait primitive to signal when the fence is hit. |
| // If iree_wait_primitive_immediate then the signal will be ignored. |
| iree_wait_primitive_t signal_handle; |
| } iree_task_fence_t; |
| |
| // Initializes a fence in |out_task| that demarcates activity in a |scope|. |
| // An optional unowned |signal_handle| can be provided that will be signaled |
| // with iree_event_set when the fence is reached. |
| void iree_task_fence_initialize(iree_task_scope_t* scope, |
| iree_wait_primitive_t signal_handle, |
| iree_task_fence_t* out_task); |
| |
| //============================================================================== |
| // IREE_TASK_TYPE_WAIT |
| //============================================================================== |
| |
| // A task representing either a delay until a point in time or a wait on a wait |
| // source external to the task system. |
| // |
| // Waits are modeled in the task graph to enable reducing the number of times a |
| // full system wait is required by only beginning the wait when the task |
| // dependencies have completed. Wait sources will be eagerly queried and |
| // exported to wait handles when the task system would otherwise go idle. All |
| // wait sources from all pending wait tasks will be accumulated into a wait set |
| // and waited on in a single syscall. |
| // |
| // Waits will block the completion task until the wait resolves successfully or |
| // the deadline is reached or exceeded. |
| // |
| // Sleeps (where wait_source is iree_wait_source_delay) will delay the |
| // completion task until the delay time is reached or exceeded and will do so |
| // without triggering an IREE_STATUS_DEADLINE_EXCEEDED. |
| // |
| // Wait-all behavior can be modeled with multiple wait tasks joined on one task; |
| // all of the waits must successfully resolve prior to the completion task being |
| // issued. If any wait fails then the scope is failed. |
| // |
| // Wait-any behavior can be modeled with multiple wait tasks joined on one task |
| // as with wait-all but with each sharing a cancellation flag and having the |
| // IREE_TASK_FLAG_WAIT_ANY bit set. If any wait successfully resolves or fails |
| // the flag will be set to cancel all sibling waits. The cancellation flag must |
| // be owned by the completion task to ensure that it is live for the lifetime of |
| // all wait tasks sharing it. In more sophisticated scenarios the cancellation |
| // flag may be owned by anything in the system that can guarantee the lifetime, |
| // enabling cancellation actions from external code. |
| // |
| // Non-failing deadlines can be implemented with a wait-any on one or more wait |
| // sources as well as on a delay task: if the delay task is resolved before any |
| // of the other waits they will be cancelled and the completion task will be |
| // issued without an IREE_STATUS_DEADLINE_EXCEEDED being emitted. |
| typedef iree_alignas(iree_max_align_t) struct { |
| // Task header: implementation detail, do not use. |
| iree_task_t header; |
| |
| // The wait source that the task is waiting on. |
| // May be iree_wait_source_immediate if the wait is neutered or |
| // iree_wait_source_delay if this is a delay (sleep). |
| iree_wait_source_t wait_source; |
| |
| // Deadline for the wait; if this time elapses the wait will be failed with |
| // IREE_STATUS_DEADLINE_EXCEEDED. May be IREE_TIME_INFINITE_FUTURE to indicate |
| // that the wait has no deadline. |
| iree_time_t deadline_ns; |
| |
| // Optional pointer to a shared cancellation flag. |
| // Set to non-zero to have the wait cancel and issue the completion task as if |
| // it had successfully waited. No error will be raised and the completion task |
| // will need to handle the wake. This is used to model wait-any behavior where |
| // multiple waits can be issued but if any one resolves all waits are silently |
| // cancelled. |
| // |
| // The flag memory must remain valid until all waits sharing it have retired. |
| // For a wait-any it would commonly be stored on the completion task to ensure |
| // that no waits tasks will be live when it is cleaned up. |
| // |
| // If omitted no cancellation behavior is enabled. |
| // If specified the wait task will check the flag prior to entering a system |
| // wait scope. Cancellation does not impact waits once the system is entered. |
| // If the IREE_TASK_FLAG_WAIT_ANY bit is set on the task the cancellation flag |
| // will be set to non-zero after it resolves in order to cancel the sibling |
| // waits in the wait-any operation. |
| iree_atomic_int32_t* cancellation_flag; |
| } iree_task_wait_t; |
| |
| // Initializes |out_task| as a wait task on |wait_source|. |
| // The wait will fail with IREE_STATUS_DEADLINE_EXCEEDED if |deadline_ns| is |
| // exceeded prior to the wait resolving. If the wait fails (system error, etc) |
| // the failure will be propagated to the |scope|. |
| void iree_task_wait_initialize(iree_task_scope_t* scope, |
| iree_wait_source_t wait_source, |
| iree_time_t deadline_ns, |
| iree_task_wait_t* out_task); |
| |
| // Initializes |out_task| as a delay until the given |deadline_ns| is reached or |
| // exceeded. The completion task will be issued instead of failing with an |
| // IREE_STATUS_DEADLINE_EXCEEDED. |
| void iree_task_wait_initialize_delay(iree_task_scope_t* scope, |
| iree_time_t deadline_ns, |
| iree_task_wait_t* out_task); |
| |
| // Sets the wait |task| to a cooperative wait-any mode by marking the |
| // IREE_TASK_FLAG_WAIT_ANY bit and storing the |cancellation_flag|. |
| // The cancellation flag must be kept live until after the wait task has |
| // retired. |
| void iree_task_wait_set_wait_any(iree_task_wait_t* task, |
| iree_atomic_int32_t* cancellation_flag); |
| |
| //============================================================================== |
| // IREE_TASK_TYPE_DISPATCH_* structures |
| //============================================================================== |
| |
| // Statistics tracked across an entire dispatch operation. |
| // Each tile contributes to these statistics as they execute to provide an |
| // aggregate set of statistics that can be reported to tracing/user queries. |
| // |
| // We want to keep this structure relatively compact as it does add overhead. |
| // If statistics are used purely for interactive tracing then they can be |
| // piped directly to the tracing tool using IREE_TRACE_* macros. If the |
| // statistics are programmatically queried for benchmarks or reporting then |
| // they belong here where we can efficiently move them around. |
| // |
| // If we find ourselves with a lot of hardware-specific counters (vs more |
| // generic ones like 'l2 cache misses' or 'ipc') then we can sprinkle in some |
| // #ifdefs. |
| typedef struct iree_task_dispatch_statistics_t { |
| // TODO(benvanik): statistics counters. |
| // NOTE: each of these increases the command buffer storage requirements; we |
| // should always guard these with IREE_STATISTICS_ENABLE. |
| iree_atomic_int32_t reserved; |
| } iree_task_dispatch_statistics_t; |
| |
| // Merges statistics from |source| to |target| atomically per-field. |
| // As each field is updated independently and in a relaxed memory order it's |
| // possible for statistics consumers to see a tear. |
| void iree_task_dispatch_statistics_merge( |
| const iree_task_dispatch_statistics_t* source, |
| iree_task_dispatch_statistics_t* target); |
| |
| typedef struct iree_task_tile_storage_t { |
| // TODO(benvanik): coroutine storage. |
| // Ideally we'll be able to have a fixed coroutine storage size per dispatch |
| // (via @llvm.coro.size) such that we can preallocate all of the storage for |
| // a dispatch in one shot. If we need to do dynamic allocation we will need a |
| // ringbuffer or other kind of pool to allocate from on-demand. |
| uint32_t reserved; |
| } iree_task_tile_storage_t; |
| |
| // Per-tile context provided to each dispatch function invocation in the grid. |
| // This information is unique to the tile being dispatched and may contain |
| // specific state about the calling thread/fiber/etc. |
| // |
| // If tile execution is suspended by hitting a coroutine suspend point then the |
| // coroutine state will be stored within the tile context until the tile is |
| // resumed. |
| typedef iree_alignas(iree_max_align_t) struct { |
| // Workgroup ID for the current invocation. |
| uint32_t workgroup_xyz[3]; |
| // Workgroup size for each invocation. |
| uint32_t workgroup_size[3]; |
| // Total workgroup count for the task. Can be used in conjunction with the |
| // per-invocation workgroup_xyz and workgroup_size to compute offsets/indices. |
| uint32_t workgroup_count[3]; |
| // TODO(benvanik): workgroup index to amortize calculating linear offsets. |
| // (like gl_GlobalInvocationID) |
| |
| // Tile-local memory that is pinned to each worker ensuring no cache |
| // thrashing. Aligned to at least the natural pointer size of the machine. |
| // Contents are (today) undefined upon entry. |
| iree_byte_span_t local_memory; |
| |
| // Shared statistics counters for the dispatch shard. |
| iree_task_dispatch_statistics_t* statistics; |
| |
| // Opaque ID of the processor executing the tile. |
| // May be slightly out of date or 0 if the processor could not be queried. |
| iree_cpu_processor_id_t processor_id; |
| } iree_task_tile_context_t; |
| |
| typedef struct iree_task_dispatch_t iree_task_dispatch_t; |
| |
| //============================================================================== |
| // Dispatch function closures |
| //============================================================================== |
| |
| typedef iree_status_t(IREE_API_PTR* iree_task_dispatch_closure_fn_t)( |
| void* user_context, const iree_task_tile_context_t* tile_context, |
| iree_task_submission_t* pending_submission); |
| |
| // A function closure representing the function to call and its arguments. |
| typedef struct iree_task_dispatch_closure_t { |
| // Function called per tile invocation. |
| iree_task_dispatch_closure_fn_t fn; |
| |
| // User-defined argument passed to task functions during invocation. |
| // Opaque pointer-sized values that could point to user data structures or |
| // contain embedded values. No lifetime management is performed by the task |
| // system and it is required that users ensure that the memory referenced is |
| // live until after the task has completed. |
| void* user_context; |
| } iree_task_dispatch_closure_t; |
| |
| // Binds a function pointer and the arguments it should be called with. |
| // If the arguments represent pointers they must remain live until the task |
| // has completed execution. |
| static inline iree_task_dispatch_closure_t iree_task_make_dispatch_closure( |
| iree_task_dispatch_closure_fn_t fn, void* user_context) { |
| iree_task_dispatch_closure_t closure = {fn, user_context}; |
| return closure; |
| } |
| |
| //============================================================================== |
| // IREE_TASK_TYPE_DISPATCH |
| //============================================================================== |
| |
| // An execution request across a tiled grid. |
| // Dispatches are fork points where zero or more dispatch shard tasks are |
| // spawned and processed prior to joining again on the dispatch completion task. |
| // |
| // The total workgroup count defines the [x,y,z] extents of the dispatch grid. |
| // The count may either be embedded directly into the dispatch or provided as a |
| // pointer to the workgroup_count[3] that will be read immediately prior to |
| // forking. If any dimension of the workgroup count is zero then the dispatch is |
| // skipped and the completion task will be readied immediately. |
| // |
| // Example: |
| // dispatch([5, 1, 1]) |
| // forked into shards based on affinity/scheduling parameters: |
| // -> dispatch_shard for core 0, processes [0-1, 1, 1] |
| // -> dispatch_shard for core 1, processes [2-3, 1, 1] |
| // -> dispatch_shard for core 2, processes [4-5, 1, 1] |
| // completion_task run after all shards complete |
| typedef iree_alignas(iree_max_align_t) struct iree_task_dispatch_t { |
| // Task header: implementation detail, do not use. |
| iree_task_t header; |
| |
| // Function closure to call per tile. |
| iree_task_dispatch_closure_t closure; |
| |
| // Workgroup size for each invocation. Passed on to tiles without |
| // modification and not used for scheduling. |
| uint32_t workgroup_size[3]; |
| |
| // 3D workgroup count used to tile the dispatch. |
| // [1,1,1] specifies single invocation of the function. A value of 0 in |
| // any dimension will skip execution of the function. |
| union { |
| // Embedded immutable 3D workgroup count value. |
| uint32_t value[3]; |
| // Pointer to the uint32_t[3] containing the 3D workgroup count. |
| // Sampled immediately prior to execution. |
| const uint32_t* ptr; |
| } workgroup_count; |
| |
| // Optional transient shared memory size in bytes to allocate and pass into |
| // the iree_task_tile_context_t::local_memory of each invocation of the |
| // dispatch closure. |
| uint32_t local_memory_size; |
| |
| // Resulting status from the dispatch available once all workgroups have |
| // completed (or would have completed). If multiple shards processing the |
| // workgroups hit an error the first will be taken and the result ignored. A |
| // dispatch with a non-ok status will mark the parent task scope as failing |
| // when it retires. |
| iree_atomic_intptr_t status; |
| |
| // Statistics storage used for aggregating counters across all shards. |
| iree_task_dispatch_statistics_t statistics; |
| |
| // The total number of tiles in the dispatch bounding tile_index. |
| uint32_t tile_count; |
| |
| // Maximum number of tiles to fetch per tile reservation from the grid. |
| // Bounded by IREE_TASK_DISPATCH_MAX_TILES_PER_SHARD_RESERVATION and a |
| // reasonable number chosen based on the tile and shard counts. |
| uint32_t tiles_per_reservation; |
| |
| // The tail tile index; the next reservation will start from here. |
| // This is used by shards to slice off the work to perform in their inner |
| // loop. Ideally we'd have no destructive interference with other shared data |
| // in this structure but the shared parts (status/statistics) are updated once |
| // per shard instead of once per slice and are less of a concern. |
| iree_atomic_int32_t tile_index; |
| |
| // Incrementing process-lifetime dispatch identifier. |
| IREE_TRACE(int64_t dispatch_id;) |
| } iree_task_dispatch_t; |
| |
| void iree_task_dispatch_initialize(iree_task_scope_t* scope, |
| iree_task_dispatch_closure_t closure, |
| const uint32_t workgroup_size[3], |
| const uint32_t workgroup_count[3], |
| iree_task_dispatch_t* out_task); |
| |
| void iree_task_dispatch_initialize_indirect( |
| iree_task_scope_t* scope, iree_task_dispatch_closure_t closure, |
| const uint32_t workgroup_size[3], const uint32_t* workgroup_count_ptr, |
| iree_task_dispatch_t* out_task); |
| |
| //============================================================================== |
| // IREE_TASK_TYPE_DISPATCH_SHARD |
| //============================================================================== |
| |
| typedef iree_alignas(iree_max_align_t) struct { |
| // Task header: implementation detail, do not use. |
| iree_task_t header; |
| |
| // NOTE: the parent dispatch task this shard is applied to is in the |
| // header.completion_task field. |
| } iree_task_dispatch_shard_t; |
| |
| void iree_task_dispatch_shard_initialize(iree_task_dispatch_t* dispatch_task, |
| iree_task_dispatch_shard_t* out_task); |
| |
| #ifdef __cplusplus |
| } // extern "C" |
| #endif // __cplusplus |
| |
| #endif // IREE_TASK_TASK_H_ |