blob: 3256f77c11ded5503c0d3ae3a1d53e79ef7025c1 [file] [log] [blame]
// Copyright 2020 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#ifndef IREE_TASK_EXECUTOR_IMPL_H_
#define IREE_TASK_EXECUTOR_IMPL_H_
#include "iree/base/internal/math.h"
#include "iree/base/internal/prng.h"
#include "iree/base/internal/synchronization.h"
#include "iree/base/tracing.h"
#include "iree/task/affinity_set.h"
#include "iree/task/executor.h"
#include "iree/task/list.h"
#include "iree/task/pool.h"
#include "iree/task/post_batch.h"
#include "iree/task/queue.h"
#include "iree/task/tuning.h"
#include "iree/task/worker.h"
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
struct iree_task_executor_t {
iree_atomic_ref_count_t ref_count;
iree_allocator_t allocator;
// Defines how work is selected across queues.
// TODO(benvanik): make mutable; currently always the same reserved value.
iree_task_scheduling_mode_t scheduling_mode;
// State used by the work-stealing operations performed by donated threads.
// This is **NOT SYNCHRONIZED** and relies on the fact that we actually don't
// much care about the precise selection of workers enough to mind any tears
// we get in the PRNG state that lives inside. Cache write-back order and
// incidental cache line availability/visibility update frequency is like an
// extra layer of PRNG anyway ;)
iree_prng_minilcg128_state_t donation_theft_prng;
// Pools of transient dispatch tasks shared across all workers.
// Depending on configuration the task pool may allocate after creation using
// the allocator provided upon executor creation.
iree_task_pool_t fence_task_pool;
iree_task_pool_t dispatch_task_pool;
// A list of incoming tasks that are ready to execute immediately.
// The list is LIFO and we require that task lists are reversed by the
// submitter so we can use iree_atomic_slist_concat to quickly prepend the
// LIFO list to the atomic slist. By doing this we can construct the task
// lists in LIFO order prior to submission, concat with a pointer swap into
// this list, flush from the list in LIFO order during coordination, and do a
// single LIFO->FIFO conversion while distributing work. What could have been
// half a dozen task list pointer walks and inverted sequential memory access
// becomes one.
//
// Example:
// existing tasks: C B A
// new tasks: 1 2 3
// updated tasks: 3 2 1 C B A
iree_atomic_task_slist_t incoming_ready_slist;
// A list of incoming wait tasks that need to be waited on. Order doesn't
// really matter here as all tasks will be waited on simultaneously.
iree_atomic_task_slist_t incoming_waiting_slist;
// iree_event_t pool used to acquire system wait handles.
// Many subsystems interacting with the executor will need events to park
// their work in the wait set and sharing the pool across all of them ensures
// we limit the number we have outstanding and avoid syscalls to allocate
// them.
iree_event_pool_t* event_pool;
// Guards coordination logic; only one thread at a time may be acting as the
// coordinator.
iree_slim_mutex_t coordinator_mutex;
// A list of wait tasks with external handles that need to be waited on.
// Coordinators can choose to poll/wait on these.
iree_task_list_t waiting_list;
// Guards manipulation and use of the wait_set.
// coordinator_mutex may be held when taking this lock.
iree_slim_mutex_t wait_mutex;
// Wait set containing all the tasks in waiting_list. Coordinator manages
// keeping the waiting_list and wait_set in sync.
iree_wait_set_t* wait_set;
// A bitset indicating which workers are live and usable; all attempts to
// push work onto a particular worker should check first with this mask. This
// may change over time either automatically or by user request ("don't use
// these cores for awhile I'm going to be using them" etc).
iree_atomic_task_affinity_set_t worker_live_mask;
// A bitset indicating which workers may be suspended and need to be resumed
// via iree_thread_resume prior to them being able to execute work.
iree_atomic_task_affinity_set_t worker_suspend_mask;
// A bitset indicating which workers are currently idle. Used to bias incoming
// tasks to workers that aren't doing much else. This is a balance of latency
// to wake the idle workers vs. latency to wait for existing work to complete
// on already woken workers.
iree_atomic_task_affinity_set_t worker_idle_mask;
// Specifies how many workers threads there are.
// For now this number is fixed per executor however if we wanted to enable
// live join/leave behavior we could change this to a registration mechanism.
iree_host_size_t worker_count;
iree_task_worker_t* workers; // [worker_count]
};
// Merges a submission into the primary FIFO queues.
// Coordinators will fetch items from here as workers demand them but otherwise
// not be notified of the changes (waiting until coordination runs again).
//
// May be called from any thread.
void iree_task_executor_merge_submission(iree_task_executor_t* executor,
iree_task_submission_t* submission);
// Schedules all ready tasks in the |pending_submission| list.
// Only called during coordination and expects the coordinator lock to be held.
void iree_task_executor_schedule_ready_tasks(
iree_task_executor_t* executor, iree_task_submission_t* pending_submission,
iree_task_post_batch_t* post_batch);
// Dispatches tasks in the global submission queue to workers.
// |current_worker| will be NULL if called from a non-worker thread and
// otherwise be the current worker; used to avoid round-tripping through the
// whole system to post to oneself.
//
// If the |current_worker| has no more work remaining and |wait_on_idle| is set
// then the calling thread may wait on any pending wait tasks until one resolves
// or more work is scheduled for the worker.
void iree_task_executor_coordinate(iree_task_executor_t* executor,
iree_task_worker_t* current_worker,
bool wait_on_idle);
// Tries to steal an entire task from a sibling worker (based on topology).
// Returns a task that is available (has not yet begun processing at all).
// May steal multiple tasks and add them to the |local_task_queue|.
iree_task_t* iree_task_executor_try_steal_task(
iree_task_executor_t* executor,
iree_task_affinity_set_t constructive_sharing_mask,
uint32_t max_theft_attempts, iree_prng_minilcg128_state_t* theft_prng,
iree_task_queue_t* local_task_queue);
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
#endif // IREE_TASK_EXECUTOR_IMPL_H_