blob: 3c19b9ec54722a42fe328db5b2a26046a6e1a951 [file] [log] [blame]
// Copyright 2022 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include "iree/base/loop_inline.h"
#include "iree/base/assert.h"
#include "iree/base/tracing.h"
static iree_status_t iree_loop_inline_reentrant_ctl(void* self,
iree_loop_command_t command,
const void* params,
void** inout_ptr);
static void iree_loop_inline_emit_error(iree_loop_t loop, iree_status_t status);
//===----------------------------------------------------------------------===//
// Inline execution of operations
//===----------------------------------------------------------------------===//
// IREE_LOOP_COMMAND_CALL
static void iree_loop_inline_run_call(iree_loop_t loop,
iree_loop_call_params_t params) {
IREE_TRACE_ZONE_BEGIN(z0);
// Ideally a tail call (when not tracing).
iree_status_t status =
params.callback.fn(params.callback.user_data, loop, iree_ok_status());
if (!iree_status_is_ok(status)) {
iree_loop_inline_emit_error(loop, status);
}
IREE_TRACE_ZONE_END(z0);
}
// IREE_LOOP_COMMAND_DISPATCH
static void iree_loop_inline_run_dispatch(iree_loop_t loop,
iree_loop_dispatch_params_t params) {
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = iree_ok_status();
// We run all workgroups before issuing the completion callback.
// If any workgroup fails we exit early and pass the failing status back to
// the completion handler exactly once.
uint32_t workgroup_count_x = params.workgroup_count_xyz[0];
uint32_t workgroup_count_y = params.workgroup_count_xyz[1];
uint32_t workgroup_count_z = params.workgroup_count_xyz[2];
iree_status_t workgroup_status = iree_ok_status();
for (uint32_t z = 0; z < workgroup_count_z; ++z) {
for (uint32_t y = 0; y < workgroup_count_y; ++y) {
for (uint32_t x = 0; x < workgroup_count_x; ++x) {
workgroup_status =
params.workgroup_fn(params.callback.user_data, loop, x, y, z);
if (!iree_status_is_ok(workgroup_status)) goto workgroup_failed;
}
}
}
workgroup_failed:
// Fire the completion callback with either success or the first error hit by
// a workgroup.
// Ideally a tail call (when not tracing).
status =
params.callback.fn(params.callback.user_data, loop, workgroup_status);
if (!iree_status_is_ok(status)) {
iree_loop_inline_emit_error(loop, status);
}
IREE_TRACE_ZONE_END(z0);
}
// IREE_LOOP_COMMAND_WAIT_UNTIL
static void iree_loop_inline_run_wait_until(
iree_loop_t loop, iree_loop_wait_until_params_t params) {
IREE_TRACE_ZONE_BEGIN(z0);
bool did_wait = iree_wait_until(params.deadline_ns);
iree_status_t status = params.callback.fn(
params.callback.user_data, loop,
did_wait ? iree_ok_status()
: iree_make_status(IREE_STATUS_ABORTED,
"sleep was aborted by a signal/alert"));
if (!iree_status_is_ok(status)) {
iree_loop_inline_emit_error(loop, status);
}
IREE_TRACE_ZONE_END(z0);
}
// IREE_LOOP_COMMAND_WAIT_ONE
static void iree_loop_inline_run_wait_one(iree_loop_t loop,
iree_loop_wait_one_params_t params) {
IREE_TRACE_ZONE_BEGIN(z0);
iree_timeout_t timeout = iree_make_deadline(params.deadline_ns);
// Try waiting on the wait source directly; this is usually the most optimal
// implementation when available and for others may drop down to a system
// wait primitive.
iree_status_t wait_status =
iree_wait_source_wait_one(params.wait_source, timeout);
// Callback after wait, whether it succeeded or failed.
iree_status_t status =
params.callback.fn(params.callback.user_data, loop, wait_status);
if (!iree_status_is_ok(status)) {
iree_loop_inline_emit_error(loop, status);
}
IREE_TRACE_ZONE_END(z0);
}
// IREE_LOOP_COMMAND_WAIT_ANY
static void iree_loop_inline_run_wait_any(
iree_loop_t loop, iree_loop_wait_multi_params_t params) {
IREE_TRACE_ZONE_BEGIN(z0);
iree_timeout_t timeout = iree_make_deadline(params.deadline_ns);
// Do a scan down the wait sources to see if any are already set - if so we
// can bail early. Otherwise we need to wait on any one.
// iree_wait_any is a much more efficient (and fair) way but this keeps the
// code working on bare-metal.
iree_status_t wait_status = iree_status_from_code(IREE_STATUS_DEFERRED);
for (iree_host_size_t i = 0; i < params.count; ++i) {
iree_status_code_t wait_status_code = IREE_STATUS_OK;
iree_status_t query_status =
iree_wait_source_query(params.wait_sources[i], &wait_status_code);
if (iree_status_is_ok(query_status)) {
if (wait_status_code == IREE_STATUS_OK) {
// Signaled - can bail early.
break;
} else if (wait_status_code == IREE_STATUS_DEFERRED) {
// Not signaled yet - keep scanning.
continue;
} else {
// Wait failed - can bail early.
wait_status = iree_status_from_code(wait_status_code);
break;
}
} else {
// Failed to perform the query, which we treat the same as a wait error.
wait_status = query_status;
break;
}
}
if (iree_status_is_deferred(wait_status)) {
// No queries resolved/failed - commit any real wait.
// We choose the first one to be (somewhat) deterministic but really it
// should be randomized... or if the user cares they should use a real loop.
wait_status = iree_wait_source_wait_one(params.wait_sources[0], timeout);
}
// Callback after wait, whether it succeeded or failed.
iree_status_t status =
params.callback.fn(params.callback.user_data, loop, wait_status);
if (!iree_status_is_ok(status)) {
iree_loop_inline_emit_error(loop, status);
}
IREE_TRACE_ZONE_END(z0);
}
// IREE_LOOP_COMMAND_WAIT_ALL
static void iree_loop_inline_run_wait_all(
iree_loop_t loop, iree_loop_wait_multi_params_t params) {
IREE_TRACE_ZONE_BEGIN(z0);
iree_timeout_t timeout = iree_make_deadline(params.deadline_ns);
// Run down the list waiting on each source.
// iree_wait_all is a much more efficient way but this keeps the code working
// on bare-metal.
iree_status_t wait_status = iree_ok_status();
for (iree_host_size_t i = 0; i < params.count; ++i) {
wait_status = iree_wait_source_wait_one(params.wait_sources[i], timeout);
if (!iree_status_is_ok(wait_status)) break;
}
// Callback after wait, whether it succeeded or failed.
iree_status_t status =
params.callback.fn(params.callback.user_data, loop, wait_status);
if (!iree_status_is_ok(status)) {
iree_loop_inline_emit_error(loop, status);
}
IREE_TRACE_ZONE_END(z0);
}
//===----------------------------------------------------------------------===//
// iree_loop_inline_ring_t
//===----------------------------------------------------------------------===//
// Total capacity of the ringbuffer in operations pending.
// The usable capacity is always 1 less than this as we mask it off,
// unfortunately wasting a slot but keeping this all stupid simple. If we wanted
// to drop another ~32B of stack space we could make this do the right thing.
#define IREE_LOOP_INLINE_RING_CAPACITY ((uint8_t)8)
static_assert((IREE_LOOP_INLINE_RING_CAPACITY &
(IREE_LOOP_INLINE_RING_CAPACITY - 1)) == 0,
"ringbuffer capacity must be a power of two");
// Bitmask used to perform a quick mod of the ringbuffer indices.
// This must always be ANDed with the indices before use:
// uint8_t physical_idx = logical_idx % IREE_LOOP_INLINE_RING_CAPACITY;
// or this, way better (though the compiler can usually figure it out):
// uint8_t physical_idx = logical_idx & IREE_LOOP_INLINE_RING_CAPACITY;
#define IREE_LOOP_INLINE_RING_MASK (IREE_LOOP_INLINE_RING_CAPACITY - 1)
// An operation in the inline loop ringbuffer containing all the information
// required to replay it at a future time. All pointers are unowned.
typedef struct iree_loop_inline_op_t {
iree_loop_command_t command;
union {
iree_loop_callback_t callback;
union {
iree_loop_call_params_t call;
iree_loop_dispatch_params_t dispatch;
iree_loop_wait_until_params_t wait_until;
iree_loop_wait_one_params_t wait_one;
iree_loop_wait_multi_params_t wait_multi;
} params;
};
} iree_loop_inline_op_t;
// Returns the size of the parameters required by |command|.
static inline uint8_t iree_loop_params_size(iree_loop_command_t command) {
// Keep this a tail call switch; compilers can work magic here.
switch (command) {
case IREE_LOOP_COMMAND_CALL:
return sizeof(iree_loop_call_params_t);
case IREE_LOOP_COMMAND_DISPATCH:
return sizeof(iree_loop_dispatch_params_t);
case IREE_LOOP_COMMAND_WAIT_UNTIL:
return sizeof(iree_loop_wait_until_params_t);
case IREE_LOOP_COMMAND_WAIT_ONE:
return sizeof(iree_loop_wait_one_params_t);
case IREE_LOOP_COMMAND_WAIT_ANY:
case IREE_LOOP_COMMAND_WAIT_ALL:
return sizeof(iree_loop_wait_multi_params_t);
default:
return 0;
}
}
// Fixed-size ringbuffer of commands enqueued reentrantly.
// We ensure the size stays small so we don't blow the stack of tiny systems.
// The inline loop is explicitly not designed for multi-program cooperative
// scheduling and well-formed programs shouldn't hit the limit.
//
// NOTE: this structure must be in an initialized state if zeroed.
typedef struct iree_loop_inline_ring_t {
iree_loop_inline_op_t ops[IREE_LOOP_INLINE_RING_CAPACITY];
uint8_t read_head;
uint8_t write_head;
iree_status_t* status_ptr;
} iree_loop_inline_ring_t;
static_assert(
sizeof(iree_loop_inline_ring_t) <= IREE_LOOP_INLINE_STORAGE_SIZE,
"iree_loop_inline_ring_t needs to be tiny as it's allocated on the stack");
// Returns a loop that references the current ringbuffer for reentrant usage.
static inline iree_loop_t iree_loop_inline_reentrant(
iree_loop_inline_ring_t* ring) {
iree_loop_t loop = {
.self = ring,
.ctl = iree_loop_inline_reentrant_ctl,
};
return loop;
}
// Initializes |out_ring| for use.
// We don't clear the ops as we (hopefully) don't use them unless they are valid
// as defined by the ringbuffer parameters.
static inline void iree_loop_inline_ring_initialize(
iree_status_t* status_ptr, iree_loop_inline_ring_t* out_ring) {
out_ring->read_head = 0;
out_ring->write_head = 0;
out_ring->status_ptr = status_ptr;
}
// Returns true if the ringbuffer is empty (read has caught up to write).
static inline bool iree_loop_inline_ring_is_empty(
const iree_loop_inline_ring_t* ring) {
return ring->read_head == ring->write_head;
}
// Returns true if the ringbuffer is full (write has caught up to read).
static inline bool iree_loop_inline_ring_is_full(
const iree_loop_inline_ring_t* ring) {
return ((ring->write_head - ring->read_head) & IREE_LOOP_INLINE_RING_MASK) ==
IREE_LOOP_INLINE_RING_MASK;
}
// Enqueues an operation into |ring|, capacity-permitting.
// |params| is copied into the ringbuffer and need not remain live upon return.
static iree_status_t iree_loop_inline_enqueue(iree_loop_inline_ring_t* ring,
iree_loop_command_t command,
const void* params) {
// The only thing we need to do here is memcpy the params into our ring.
// Since all the params differ in size we just effectively perform a lookup
// and do the copy.
uint8_t params_size = iree_loop_params_size(command);
if (IREE_UNLIKELY(params_size) == 0) {
return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
"unimplemented loop command");
}
// Ensure there's space for the new operation.
if (iree_loop_inline_ring_is_full(ring)) {
return iree_make_status(
IREE_STATUS_RESOURCE_EXHAUSTED,
"inline ringbuffer capacity exceeded; reduce the amount of concurrent "
"work or use a real loop implementation");
}
// Reserve a slot for the new operation.
uint8_t slot = ring->write_head;
ring->write_head = (ring->write_head + 1) & IREE_LOOP_INLINE_RING_MASK;
// Copy the operation in; the params are on the stack and won't be valid after
// the caller returns.
ring->ops[slot].command = command;
memcpy(&ring->ops[slot].params, params, params_size);
return iree_ok_status();
}
// Dequeues the next operation in |ring| and executes it.
// The operation may reentrantly enqueue more operations.
static void iree_loop_inline_dequeue_and_run_next(
iree_loop_inline_ring_t* ring) {
IREE_ASSERT(!iree_loop_inline_ring_is_empty(ring));
// Acquire the next operation.
uint8_t slot = ring->read_head;
ring->read_head = (ring->read_head + 1) & IREE_LOOP_INLINE_RING_MASK;
// Copy out the parameters; the operation we execute may overwrite them by
// enqueuing more work.
iree_loop_inline_op_t op = ring->ops[slot];
// We pass the callbacks a loop that has the reentrancy bit set.
// This allows iree_loop_inline_ctl to determine whether it needs to alloc
// more stack space.
iree_loop_t loop = iree_loop_inline_reentrant(ring);
// Tail call into the execution routine so we can hopefully tail call all the
// way up the stack.
// Ideally these are all tail calls.
switch (op.command) {
case IREE_LOOP_COMMAND_CALL:
iree_loop_inline_run_call(loop, op.params.call);
break;
case IREE_LOOP_COMMAND_DISPATCH:
iree_loop_inline_run_dispatch(loop, op.params.dispatch);
break;
case IREE_LOOP_COMMAND_WAIT_UNTIL:
iree_loop_inline_run_wait_until(loop, op.params.wait_until);
break;
case IREE_LOOP_COMMAND_WAIT_ONE:
iree_loop_inline_run_wait_one(loop, op.params.wait_one);
break;
case IREE_LOOP_COMMAND_WAIT_ANY:
iree_loop_inline_run_wait_any(loop, op.params.wait_multi);
break;
case IREE_LOOP_COMMAND_WAIT_ALL:
iree_loop_inline_run_wait_all(loop, op.params.wait_multi);
break;
default:
break;
}
}
// Aborts all operations in the ring and resets it to its initial state.
static void iree_loop_inline_abort_all(iree_loop_inline_ring_t* ring) {
IREE_TRACE_ZONE_BEGIN(z0);
// Issue the completion callback of each op to notify it of the abort.
// To prevent enqueuing more work while aborting we pass in a NULL loop.
// We can't do anything with the errors so we ignore them.
while (!iree_loop_inline_ring_is_empty(ring)) {
uint8_t slot = ring->read_head;
ring->read_head = (ring->read_head + 1) & IREE_LOOP_INLINE_RING_MASK;
iree_loop_callback_t callback = ring->ops[slot].callback;
iree_status_ignore(callback.fn(callback.user_data, iree_loop_null(),
iree_make_status(IREE_STATUS_ABORTED)));
}
IREE_TRACE_ZONE_END(z0);
}
static void iree_loop_inline_emit_error(iree_loop_t loop,
iree_status_t status) {
IREE_TRACE_ZONE_BEGIN(z0);
IREE_TRACE_ZONE_APPEND_TEXT(
z0, iree_status_code_string(iree_status_code(status)));
iree_loop_inline_ring_t* ring = (iree_loop_inline_ring_t*)loop.self;
if (ring->status_ptr && iree_status_is_ok(*ring->status_ptr)) {
*ring->status_ptr = status;
} else {
iree_status_ignore(status);
}
iree_loop_inline_abort_all(ring);
IREE_TRACE_ZONE_END(z0);
}
// Runs the |ring| until it is empty or an operation fails.
static iree_status_t iree_loop_inline_run_all(iree_loop_inline_ring_t* ring) {
IREE_TRACE_ZONE_BEGIN(z0);
do {
// Dequeue the next op and run it inline.
iree_loop_inline_dequeue_and_run_next(ring);
} while (!iree_loop_inline_ring_is_empty(ring));
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
//===----------------------------------------------------------------------===//
// iree_loop_inline_ctl functions
//===----------------------------------------------------------------------===//
IREE_API_EXPORT iree_status_t iree_loop_inline_ctl(void* self,
iree_loop_command_t command,
const void* params,
void** inout_ptr) {
IREE_ASSERT_ARGUMENT(self);
if (command == IREE_LOOP_COMMAND_DRAIN) {
// We don't really do anything with this; if called non-reentrantly then
// there is no work to drain.
return iree_ok_status();
}
iree_status_t* status_ptr = (iree_status_t*)self;
// Initialize a new execution context on the stack.
iree_loop_inline_ring_t stack_ring;
iree_loop_inline_ring_initialize(status_ptr, &stack_ring);
// Enqueue the initial command; we'll dequeue it right away but this keeps
// the code size smaller.
IREE_RETURN_IF_ERROR(iree_loop_inline_enqueue(&stack_ring, command, params));
// If the status is not OK then we bail immediately; this allows for sticky
// errors that mimic the abort behavior of an actual loop. Inline loops never
// run work from multiple scopes as they don't persist beyond the loop
// operation.
if (iree_status_is_ok(*status_ptr)) {
// Run until the ring is empty or we fail.
return iree_loop_inline_run_all(&stack_ring); // tail
} else {
// Abort all ops.
iree_loop_inline_abort_all(&stack_ring);
return iree_ok_status();
}
}
IREE_API_EXPORT iree_status_t
iree_loop_inline_using_storage_ctl(void* self, iree_loop_command_t command,
const void* params, void** inout_ptr) {
if (command == IREE_LOOP_COMMAND_DRAIN) {
// We don't really do anything with this; if called non-reentrantly then
// there is no work to drain.
return iree_ok_status();
}
iree_loop_inline_storage_t* storage = (iree_loop_inline_storage_t*)self;
iree_loop_inline_ring_t* ring = (iree_loop_inline_ring_t*)storage->opaque;
// Top-level call using external storage; run until the ring is empty or
// we fail. Note that the storage contents are undefined and we have to
// ensure the list is ready for use.
iree_loop_inline_ring_initialize(&storage->status, ring);
IREE_RETURN_IF_ERROR(iree_loop_inline_enqueue(ring, command, params));
// If the status is not OK then we bail immediately; this allows for sticky
// errors that mimic the abort behavior of an actual loop. Inline loops never
// run work from multiple scopes as they don't persist beyond the loop
// operation.
if (iree_status_is_ok(storage->status)) {
// Run until the ring is empty or we fail.
return iree_loop_inline_run_all(ring); // tail
} else {
// Abort all ops.
iree_loop_inline_abort_all(ring);
return iree_ok_status();
}
}
static iree_status_t iree_loop_inline_reentrant_ctl(void* self,
iree_loop_command_t command,
const void* params,
void** inout_ptr) {
if (command == IREE_LOOP_COMMAND_DRAIN) {
// We don't really do anything with this; when called reentrantly we are
// already draining as we drain on each top-level op.
return iree_ok_status();
}
// Enqueue the new command and return to the caller - it'll be run by
// the top-level control call.
iree_loop_inline_ring_t* ring = (iree_loop_inline_ring_t*)self;
return iree_loop_inline_enqueue(ring, command, params); // tail
}