blob: 9ae22902e50ef92a3314fece673fe4a48d94554c [file] [log] [blame]
// Copyright 2020 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#ifndef IREE_TASK_TASK_H_
#define IREE_TASK_TASK_H_
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include "iree/base/api.h"
#include "iree/base/internal/atomic_slist.h"
#include "iree/base/internal/atomics.h"
#include "iree/base/internal/cpu.h"
#include "iree/base/internal/synchronization.h"
#include "iree/task/affinity_set.h"
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
typedef struct iree_task_list_t iree_task_list_t;
typedef struct iree_task_pool_t iree_task_pool_t;
typedef struct iree_task_scope_t iree_task_scope_t;
typedef struct iree_task_submission_t iree_task_submission_t;
//==============================================================================
// Task header for internal tracking
//==============================================================================
// Specifies the type of a task and how executors handle it.
enum iree_task_type_bits_t {
// Task is a no-op (performs no work) and exists for flexibility.
IREE_TASK_TYPE_NOP = 0u,
// Task will synchronously call a function before continuing.
IREE_TASK_TYPE_CALL = 1u,
// Task exists only as a barrier to join/fork tasks and has no executable
// payload.
IREE_TASK_TYPE_BARRIER = 2u,
// Task is a fence indicating that a certain point in the task graph has been
// reached. All tasks prior to this fence (by way of happens-before
// dependencies) are guaranteed to have retired.
IREE_TASK_TYPE_FENCE = 3u,
// Task is a wait on an external wait handle (fd, HANDLE, etc).
// Executors will wait on the handle until it is signaled and meets the
// specified condition prior to readying the dependent tasks.
IREE_TASK_TYPE_WAIT = 4u,
// Task is a 3D grid dispatch of zero or more tiles.
// Dispatches are issued when ready by either being split into one shard per
// worker that should process the dispatch.
//
// If IREE_TASK_FLAG_DISPATCH_INDIRECT is set then the dispatch reads the
// workgroup count from a buffer immediately prior to fan-out instead of using
// the values embedded in the task structure.
//
// After a dispatch has been issued the IREE_TASK_FLAG_DISPATCH_RETIRE flag is
// set to indicate that when the dispatch becomes ready again it will be after
// all shards have completed.
IREE_TASK_TYPE_DISPATCH = 5u,
// Task is one of potentially many shards processing a larger dispatch grid.
// Each shard may have a preference as to which parts of grid it will focus
// on but is able to otherwise steal any available region directly from the
// shared dispatch coordination state. Shards retire once there are no more
// tiles remaining in the dispatch grid.
IREE_TASK_TYPE_DISPATCH_SHARD = 6u,
};
typedef uint8_t iree_task_type_t;
enum iree_task_flag_bits_t {
IREE_TASK_FLAG_NONE = 0u,
// Indicates that a wait task is part of a wait-any operation and the
// cancellation flag should be latched by any wait that resolves.
IREE_TASK_FLAG_WAIT_ANY = 1u << 0,
// The wait handle of the wait task has been acquired and the task can be
// waited on with system APIs.
IREE_TASK_FLAG_WAIT_EXPORTED = 1u << 1,
// The wait handle the task is specified to wait on has resolved and the task
// can now be considered complete.
IREE_TASK_FLAG_WAIT_COMPLETED = 1u << 2,
// The workgroup count for the dispatch is provided by way of a pointer to a
// list of 3 uint32_t values that will be sampled immediately prior to
// issuing of the dispatch. The contents of the pointer can be safely modified
// up until the last dependency has completed and the dispatch is about to be
// issued.
IREE_TASK_FLAG_DISPATCH_INDIRECT = 1u << 3,
// The dispatch has been issued and the task is waiting for one or more
// shards to complete. After they complete the dispatch will be readied and
// can be retired.
//
// Though added by the executor after issuing a dispatch users can also set
// this to indicate that all dispatch shards for a particular dispatch have
// been statically scheduled. Executors will then skip issuing the dispatch
// and instead wait until all shards complete, enabling IREE_TASK_TYPE_BARRIER
// behavior but without an additional task as dispatches are still required
// to store information for shards.
IREE_TASK_FLAG_DISPATCH_RETIRE = 1u << 4,
// An error occurred at or before the task and it has been aborted.
// Aborted tasks may continue to execute if they're already in-flight but must
// not begin execution after the flag has been set.
//
// The actual error that occurred is routed to the parent task scope as it
// happens and may be available for querying before all tasks have been
// cleaned up.
IREE_TASK_FLAG_ABORTED = 1u << 5,
};
typedef uint16_t iree_task_flags_t;
typedef struct iree_task_t iree_task_t;
// A function called to cleanup tasks.
// Each task has its associated cleanup function called exactly once.
// The provided |status_code| indicates the execution status of the task prior
// to cleanup and will usually be IREE_STATUS_OK indicating the task was
// successfully issued or IREE_STATUS_ABORTED if the task was discard prior to
// issuing.
typedef void(IREE_API_PTR* iree_task_cleanup_fn_t)(
iree_task_t* task, iree_status_code_t status_code);
// A task within the task system that runs on an executor.
// Tasks have an iree_task_type_t that defines which parameters are valid and
// how the executor is to treat the task. Dependency edges can be defined that
// determine the execution order of tasks within the executors.
struct iree_alignas(iree_max_align_t) iree_task_t {
// Intrusive pointer used to store tasks within iree_task_list_t and
// iree_atomic_task_list_t singly-linked lists. This must come first in the
// structure so that it is at the appropriate alignment.
iree_task_t* next_task;
// The scope this task is attributed to. Errors with the task will be
// propagated to the scope and errors in the scope will cause pending tasks to
// be skipped.
iree_task_scope_t* scope;
// Optional function to call to cleanup the task on completion.
// Will be called after the task has retired or if the task fails to issue
// (dependency failed, etc).
iree_task_cleanup_fn_t cleanup_fn;
// Optional task that will be notified when the task completes.
// The task will have its pending_dependency_count decremented and will be
// readied for execution when the count reaches 0.
iree_task_t* completion_task;
// Specifies which workers will be used to execute this task.
// Forked tasks will inherit their parent task affinity (possibly with some
// task-dependent rules) to partition workloads across workers with knowledge
// of the specific work being performed. For example, some dispatches can be
// limited to run on certain microarchitectures that workers have affinity
// with at the OS scheduler level (such as little.BIG topologies).
iree_task_affinity_set_t affinity_set;
// Total number of dependent tasks still outstanding. Decremented each time
// a dependent task completes. The task is considered ready to execute when
// this value reaches 0.
iree_atomic_int32_t pending_dependency_count;
// Optional pool the task should be returned to after it has resolved. If the
// task was allocated as part of a larger data structure (embedded within
// an arena for example) then this can be NULL to prevent the task system
// from interfering.
iree_task_pool_t* pool;
// Specifies the type of the task and how the executor handles it.
iree_task_type_t type;
// Task-specific flag bits.
iree_task_flags_t flags;
};
static_assert(offsetof(iree_task_t, next_task) == 0,
"next_task intrusive pointer must be at offset 0");
static_assert(sizeof(iree_task_t) <= 64,
"the task header greatly influences pool sizes due to alignment "
"requirements and should be kept tiny");
// Initializes a task header with the given type.
// Must be called on all tasks to ensure proper dependency tracking and list
// state prior to enqueuing. Only the task header structure is initialized and
// any additional data as part of the wrapping task type must be initialized by
// the caller.
void iree_task_initialize(iree_task_type_t type, iree_task_scope_t* scope,
iree_task_t* out_task);
// Sets the optional function called when the task completes (whether successful
// or not). The cleanup function will receive a status indicating whether the
// cleanup is from expected execution as the task retires (IREE_STATUS_OK)
// or because it was aborted (IREE_STATUS_ABORTED).
void iree_task_set_cleanup_fn(iree_task_t* task,
iree_task_cleanup_fn_t cleanup_fn);
// Sets up a dependency edge from |task| to |completion_task| such that when
// |task| completes |completion_task| will be notified and have its
// pending_dependency_count decremented.
void iree_task_set_completion_task(iree_task_t* task,
iree_task_t* completion_task);
// Returns true if the |task| is ready to execute immediately.
// Though this is safe to call from any thread the test may have false-negatives
// (ready tasks are not returned as ready) due to cross-thread synchronization
// latency. Note that tasks may yield themselves during execution and switch
// from ready to waiting (such as when an indirect dispatch needs to wait for
// all tiles to complete).
bool iree_task_is_ready(iree_task_t* task);
// Discards the task and any dependent tasks.
// Any dependent tasks that need to be discarded will be added to
// |discard_worklist| for the caller to continue discarding.
void iree_task_discard(iree_task_t* task, iree_task_list_t* discard_worklist);
//==============================================================================
// IREE_TASK_TYPE_NOP
//==============================================================================
// Task is a no-op (performs no work) and exists for flexibility.
// NOP tasks can be used to link together task lists from multiple threads
// where it may otherwise not be ideal to have heavy-weight concurrency
// structures. NOP tasks can also be useful for neutering another task type
// after it has already been recorded into a list such as when cancellations
// occur.
typedef iree_alignas(iree_max_align_t) struct {
// Task header: implementation detail, do not use.
iree_task_t header;
} iree_task_nop_t;
void iree_task_nop_initialize(iree_task_scope_t* scope,
iree_task_nop_t* out_task);
//==============================================================================
// IREE_TASK_TYPE_CALL
//==============================================================================
typedef iree_status_t(IREE_API_PTR* iree_task_call_closure_fn_t)(
void* user_context, iree_task_t* task,
iree_task_submission_t* pending_submission);
// A function closure representing the function to call and its arguments.
typedef struct iree_task_call_closure_t {
// Function called per tile invocation.
iree_task_call_closure_fn_t fn;
// Opaque pointer to a user-provided data structure.
// No lifetime management is performed by the task system and it is required
// that users ensure that the memory referenced is live until after the task
// has completed.
void* user_context;
// TODO(benvanik): cleanup function? right now assume arg is never freed.
} iree_task_call_closure_t;
// Binds a function pointer and the arguments it should be called with.
// If the arguments represent pointers they must remain live until the task
// has completed execution.
static inline iree_task_call_closure_t iree_task_make_call_closure(
iree_task_call_closure_fn_t fn, void* user_context) {
iree_task_call_closure_t closure = {fn, user_context};
return closure;
}
// A task that will synchronously call a function from the executor and wait
// for it to complete before continuing.
//
// Memory referenced by closure arguments must be kept valid until the function
// executes (in general with the same lifetime as the task itself).
typedef iree_alignas(iree_max_align_t) struct {
// Task header: implementation detail, do not use.
iree_task_t header;
// Function closure to call when the task is executed.
iree_task_call_closure_t closure;
// Resulting status from the call available once all nested tasks have
// completed (or would have completed). It's possible for a call to nest
// additional work under it and then return a failure; to ensure we don't
// discard the root call while the nested tasks are still executing we set the
// status here and wait for the nested tasks to complete. We'll try not to
// issue work that was enqueued while the call was executing but it's possible
// for work to come from other angles and we need to err on the side of
// safety.
iree_atomic_intptr_t status;
} iree_task_call_t;
void iree_task_call_initialize(iree_task_scope_t* scope,
iree_task_call_closure_t closure,
iree_task_call_t* out_task);
//==============================================================================
// IREE_TASK_TYPE_BARRIER
//==============================================================================
// A join point for fork/join-style scheduling.
// References a set of dependent tasks that will be notified and possibly
// readied when the barrier is reached.
//
// This allows for modeling one-to-many and many-to-many relationships. The base
// task dependency system only models one-to-one and should be used if possible
// to avoid the additional overhead of a barrier task both in memory and task
// indirection/queuing.
//
// Example:
// * [A] -> Barrier -> [C, D]
// - A executes
// - Barrier is processed after A completes
// - C and D execute concurrently (in any order)
//
// * [A, B] -> Barrier -> [C, D]
// - A and B execute concurrently (in any order)
// - Barrier is processed after both A and B complete
// - C and D execute concurrently
//
// * [A] -> Barrier -> [B]
// - Don't do this and use the base task dependency instead; it'll work, but
// it's much better to avoid the additional barrier indirection when
// possible.
typedef iree_alignas(iree_max_align_t) struct {
// Task header: implementation detail, do not use.
iree_task_t header;
// Number of valid tasks in the dependent_tasks list.
iree_host_size_t dependent_task_count;
// [0-dependent_task_count] tasks that will be notified when the barrier is
// reached. Each task will have its pending_dependency_count decremented and
// when the count reaches 0 be added to the ready list.
iree_task_t* const* dependent_tasks;
} iree_task_barrier_t;
void iree_task_barrier_initialize(iree_task_scope_t* scope,
iree_host_size_t dependent_task_count,
iree_task_t* const* dependent_tasks,
iree_task_barrier_t* out_task);
void iree_task_barrier_initialize_empty(iree_task_scope_t* scope,
iree_task_barrier_t* out_task);
void iree_task_barrier_set_dependent_tasks(
iree_task_barrier_t* task, iree_host_size_t dependent_task_count,
iree_task_t* const* dependent_tasks);
//==============================================================================
// IREE_TASK_TYPE_FENCE
//==============================================================================
// A fence indicating that a certain point in the task graph has been reached.
// All tasks prior to this fence (by way of happens-before dependencies) are
// guaranteed to have retired.
//
// When all of the dependencies of a fence have retired the fence will notify
// the parent scope of the task by decrementing the pending_submissions count
// and publishing an idle_notification if it was the last in-flight submission.
//
// An optional platform primitive may be provided to signal in a way determined
// by the primitive type via iree_event_set.
typedef iree_alignas(iree_max_align_t) struct {
// Task header: implementation detail, do not use.
iree_task_t header;
// An optional wait primitive to signal when the fence is hit.
// If iree_wait_primitive_immediate then the signal will be ignored.
iree_wait_primitive_t signal_handle;
} iree_task_fence_t;
// Initializes a fence in |out_task| that demarcates activity in a |scope|.
// An optional unowned |signal_handle| can be provided that will be signaled
// with iree_event_set when the fence is reached.
void iree_task_fence_initialize(iree_task_scope_t* scope,
iree_wait_primitive_t signal_handle,
iree_task_fence_t* out_task);
//==============================================================================
// IREE_TASK_TYPE_WAIT
//==============================================================================
// A task representing either a delay until a point in time or a wait on a wait
// source external to the task system.
//
// Waits are modeled in the task graph to enable reducing the number of times a
// full system wait is required by only beginning the wait when the task
// dependencies have completed. Wait sources will be eagerly queried and
// exported to wait handles when the task system would otherwise go idle. All
// wait sources from all pending wait tasks will be accumulated into a wait set
// and waited on in a single syscall.
//
// Waits will block the completion task until the wait resolves successfully or
// the deadline is reached or exceeded.
//
// Sleeps (where wait_source is iree_wait_source_delay) will delay the
// completion task until the delay time is reached or exceeded and will do so
// without triggering an IREE_STATUS_DEADLINE_EXCEEDED.
//
// Wait-all behavior can be modeled with multiple wait tasks joined on one task;
// all of the waits must successfully resolve prior to the completion task being
// issued. If any wait fails then the scope is failed.
//
// Wait-any behavior can be modeled with multiple wait tasks joined on one task
// as with wait-all but with each sharing a cancellation flag and having the
// IREE_TASK_FLAG_WAIT_ANY bit set. If any wait successfully resolves or fails
// the flag will be set to cancel all sibling waits. The cancellation flag must
// be owned by the completion task to ensure that it is live for the lifetime of
// all wait tasks sharing it. In more sophisticated scenarios the cancellation
// flag may be owned by anything in the system that can guarantee the lifetime,
// enabling cancellation actions from external code.
//
// Non-failing deadlines can be implemented with a wait-any on one or more wait
// sources as well as on a delay task: if the delay task is resolved before any
// of the other waits they will be cancelled and the completion task will be
// issued without an IREE_STATUS_DEADLINE_EXCEEDED being emitted.
typedef iree_alignas(iree_max_align_t) struct {
// Task header: implementation detail, do not use.
iree_task_t header;
// The wait source that the task is waiting on.
// May be iree_wait_source_immediate if the wait is neutered or
// iree_wait_source_delay if this is a delay (sleep).
iree_wait_source_t wait_source;
// Deadline for the wait; if this time elapses the wait will be failed with
// IREE_STATUS_DEADLINE_EXCEEDED. May be IREE_TIME_INFINITE_FUTURE to indicate
// that the wait has no deadline.
iree_time_t deadline_ns;
// Optional pointer to a shared cancellation flag.
// Set to non-zero to have the wait cancel and issue the completion task as if
// it had successfully waited. No error will be raised and the completion task
// will need to handle the wake. This is used to model wait-any behavior where
// multiple waits can be issued but if any one resolves all waits are silently
// cancelled.
//
// The flag memory must remain valid until all waits sharing it have retired.
// For a wait-any it would commonly be stored on the completion task to ensure
// that no waits tasks will be live when it is cleaned up.
//
// If omitted no cancellation behavior is enabled.
// If specified the wait task will check the flag prior to entering a system
// wait scope. Cancellation does not impact waits once the system is entered.
// If the IREE_TASK_FLAG_WAIT_ANY bit is set on the task the cancellation flag
// will be set to non-zero after it resolves in order to cancel the sibling
// waits in the wait-any operation.
iree_atomic_int32_t* cancellation_flag;
} iree_task_wait_t;
// Initializes |out_task| as a wait task on |wait_source|.
// The wait will fail with IREE_STATUS_DEADLINE_EXCEEDED if |deadline_ns| is
// exceeded prior to the wait resolving. If the wait fails (system error, etc)
// the failure will be propagated to the |scope|.
void iree_task_wait_initialize(iree_task_scope_t* scope,
iree_wait_source_t wait_source,
iree_time_t deadline_ns,
iree_task_wait_t* out_task);
// Initializes |out_task| as a delay until the given |deadline_ns| is reached or
// exceeded. The completion task will be issued instead of failing with an
// IREE_STATUS_DEADLINE_EXCEEDED.
void iree_task_wait_initialize_delay(iree_task_scope_t* scope,
iree_time_t deadline_ns,
iree_task_wait_t* out_task);
// Sets the wait |task| to a cooperative wait-any mode by marking the
// IREE_TASK_FLAG_WAIT_ANY bit and storing the |cancellation_flag|.
// The cancellation flag must be kept live until after the wait task has
// retired.
void iree_task_wait_set_wait_any(iree_task_wait_t* task,
iree_atomic_int32_t* cancellation_flag);
//==============================================================================
// IREE_TASK_TYPE_DISPATCH_* structures
//==============================================================================
// Statistics tracked across an entire dispatch operation.
// Each tile contributes to these statistics as they execute to provide an
// aggregate set of statistics that can be reported to tracing/user queries.
//
// We want to keep this structure relatively compact as it does add overhead.
// If statistics are used purely for interactive tracing then they can be
// piped directly to the tracing tool using IREE_TRACE_* macros. If the
// statistics are programmatically queried for benchmarks or reporting then
// they belong here where we can efficiently move them around.
//
// If we find ourselves with a lot of hardware-specific counters (vs more
// generic ones like 'l2 cache misses' or 'ipc') then we can sprinkle in some
// #ifdefs.
typedef struct iree_task_dispatch_statistics_t {
// TODO(benvanik): statistics counters.
// NOTE: each of these increases the command buffer storage requirements; we
// should always guard these with IREE_STATISTICS_ENABLE.
iree_atomic_int32_t reserved;
} iree_task_dispatch_statistics_t;
// Merges statistics from |source| to |target| atomically per-field.
// As each field is updated independently and in a relaxed memory order it's
// possible for statistics consumers to see a tear.
void iree_task_dispatch_statistics_merge(
const iree_task_dispatch_statistics_t* source,
iree_task_dispatch_statistics_t* target);
typedef struct iree_task_tile_storage_t {
// TODO(benvanik): coroutine storage.
// Ideally we'll be able to have a fixed coroutine storage size per dispatch
// (via @llvm.coro.size) such that we can preallocate all of the storage for
// a dispatch in one shot. If we need to do dynamic allocation we will need a
// ringbuffer or other kind of pool to allocate from on-demand.
uint32_t reserved;
} iree_task_tile_storage_t;
// Per-tile context provided to each dispatch function invocation in the grid.
// This information is unique to the tile being dispatched and may contain
// specific state about the calling thread/fiber/etc.
//
// If tile execution is suspended by hitting a coroutine suspend point then the
// coroutine state will be stored within the tile context until the tile is
// resumed.
typedef iree_alignas(iree_max_align_t) struct {
// Workgroup ID for the current invocation.
uint32_t workgroup_xyz[3];
// Workgroup size for each invocation.
uint32_t workgroup_size[3];
// Total workgroup count for the task. Can be used in conjunction with the
// per-invocation workgroup_xyz and workgroup_size to compute offsets/indices.
uint32_t workgroup_count[3];
// TODO(benvanik): workgroup index to amortize calculating linear offsets.
// (like gl_GlobalInvocationID)
// Tile-local memory that is pinned to each worker ensuring no cache
// thrashing. Aligned to at least the natural pointer size of the machine.
// Contents are (today) undefined upon entry.
iree_byte_span_t local_memory;
// Shared statistics counters for the dispatch shard.
iree_task_dispatch_statistics_t* statistics;
// Opaque ID of the processor executing the tile.
// May be slightly out of date or 0 if the processor could not be queried.
iree_cpu_processor_id_t processor_id;
} iree_task_tile_context_t;
typedef struct iree_task_dispatch_t iree_task_dispatch_t;
//==============================================================================
// Dispatch function closures
//==============================================================================
typedef iree_status_t(IREE_API_PTR* iree_task_dispatch_closure_fn_t)(
void* user_context, const iree_task_tile_context_t* tile_context,
iree_task_submission_t* pending_submission);
// A function closure representing the function to call and its arguments.
typedef struct iree_task_dispatch_closure_t {
// Function called per tile invocation.
iree_task_dispatch_closure_fn_t fn;
// User-defined argument passed to task functions during invocation.
// Opaque pointer-sized values that could point to user data structures or
// contain embedded values. No lifetime management is performed by the task
// system and it is required that users ensure that the memory referenced is
// live until after the task has completed.
void* user_context;
} iree_task_dispatch_closure_t;
// Binds a function pointer and the arguments it should be called with.
// If the arguments represent pointers they must remain live until the task
// has completed execution.
static inline iree_task_dispatch_closure_t iree_task_make_dispatch_closure(
iree_task_dispatch_closure_fn_t fn, void* user_context) {
iree_task_dispatch_closure_t closure = {fn, user_context};
return closure;
}
//==============================================================================
// IREE_TASK_TYPE_DISPATCH
//==============================================================================
// An execution request across a tiled grid.
// Dispatches are fork points where zero or more dispatch shard tasks are
// spawned and processed prior to joining again on the dispatch completion task.
//
// The total workgroup count defines the [x,y,z] extents of the dispatch grid.
// The count may either be embedded directly into the dispatch or provided as a
// pointer to the workgroup_count[3] that will be read immediately prior to
// forking. If any dimension of the workgroup count is zero then the dispatch is
// skipped and the completion task will be readied immediately.
//
// Example:
// dispatch([5, 1, 1])
// forked into shards based on affinity/scheduling parameters:
// -> dispatch_shard for core 0, processes [0-1, 1, 1]
// -> dispatch_shard for core 1, processes [2-3, 1, 1]
// -> dispatch_shard for core 2, processes [4-5, 1, 1]
// completion_task run after all shards complete
typedef iree_alignas(iree_max_align_t) struct iree_task_dispatch_t {
// Task header: implementation detail, do not use.
iree_task_t header;
// Function closure to call per tile.
iree_task_dispatch_closure_t closure;
// Workgroup size for each invocation. Passed on to tiles without
// modification and not used for scheduling.
uint32_t workgroup_size[3];
// 3D workgroup count used to tile the dispatch.
// [1,1,1] specifies single invocation of the function. A value of 0 in
// any dimension will skip execution of the function.
union {
// Embedded immutable 3D workgroup count value.
uint32_t value[3];
// Pointer to the uint32_t[3] containing the 3D workgroup count.
// Sampled immediately prior to execution.
const uint32_t* ptr;
} workgroup_count;
// Optional transient shared memory size in bytes to allocate and pass into
// the iree_task_tile_context_t::local_memory of each invocation of the
// dispatch closure.
uint32_t local_memory_size;
// Resulting status from the dispatch available once all workgroups have
// completed (or would have completed). If multiple shards processing the
// workgroups hit an error the first will be taken and the result ignored. A
// dispatch with a non-ok status will mark the parent task scope as failing
// when it retires.
iree_atomic_intptr_t status;
// Statistics storage used for aggregating counters across all shards.
iree_task_dispatch_statistics_t statistics;
// The total number of tiles in the dispatch bounding tile_index.
uint32_t tile_count;
// Maximum number of tiles to fetch per tile reservation from the grid.
// Bounded by IREE_TASK_DISPATCH_MAX_TILES_PER_SHARD_RESERVATION and a
// reasonable number chosen based on the tile and shard counts.
uint32_t tiles_per_reservation;
// The tail tile index; the next reservation will start from here.
// This is used by shards to slice off the work to perform in their inner
// loop. Ideally we'd have no destructive interference with other shared data
// in this structure but the shared parts (status/statistics) are updated once
// per shard instead of once per slice and are less of a concern.
iree_atomic_int32_t tile_index;
// Incrementing process-lifetime dispatch identifier.
IREE_TRACE(int64_t dispatch_id;)
} iree_task_dispatch_t;
void iree_task_dispatch_initialize(iree_task_scope_t* scope,
iree_task_dispatch_closure_t closure,
const uint32_t workgroup_size[3],
const uint32_t workgroup_count[3],
iree_task_dispatch_t* out_task);
void iree_task_dispatch_initialize_indirect(
iree_task_scope_t* scope, iree_task_dispatch_closure_t closure,
const uint32_t workgroup_size[3], const uint32_t* workgroup_count_ptr,
iree_task_dispatch_t* out_task);
//==============================================================================
// IREE_TASK_TYPE_DISPATCH_SHARD
//==============================================================================
typedef iree_alignas(iree_max_align_t) struct {
// Task header: implementation detail, do not use.
iree_task_t header;
// NOTE: the parent dispatch task this shard is applied to is in the
// header.completion_task field.
} iree_task_dispatch_shard_t;
void iree_task_dispatch_shard_initialize(iree_task_dispatch_t* dispatch_task,
iree_task_dispatch_shard_t* out_task);
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
#endif // IREE_TASK_TASK_H_