blob: b8ac05f5142140d4a34363edd95c4ecc426db892 [file] [log] [blame]
// Copyright 2019 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include "iree/vm/invocation.h"
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include "iree/base/api.h"
#include "iree/base/internal/debugging.h"
#include "iree/vm/ref.h"
#include "iree/vm/stack.h"
#include "iree/vm/value.h"
//===----------------------------------------------------------------------===//
// Invocation utilities for I/O
//===----------------------------------------------------------------------===//
// Releases reference counted values in |storage|.
static void iree_vm_invoke_release_io_refs(iree_string_view_t cconv_fragment,
iree_byte_span_t storage) {
if (!storage.data_length) return;
if (cconv_fragment.size == 0 || cconv_fragment.data[0] != '0') return;
uint8_t* p = storage.data;
for (iree_host_size_t i = 1; i < cconv_fragment.size; ++i) {
char c = cconv_fragment.data[i];
switch (c) {
default:
IREE_ASSERT_UNREACHABLE("calling convention/FFI mismatch");
break;
case IREE_VM_CCONV_TYPE_VOID:
break;
case IREE_VM_CCONV_TYPE_I32:
case IREE_VM_CCONV_TYPE_F32:
p += sizeof(int32_t);
break;
case IREE_VM_CCONV_TYPE_I64:
case IREE_VM_CCONV_TYPE_F64:
p += sizeof(int64_t);
break;
case IREE_VM_CCONV_TYPE_REF:
iree_vm_ref_release((iree_vm_ref_t*)p);
p += sizeof(iree_vm_ref_t);
break;
}
}
}
// Releases storage for arguments.
static void iree_vm_invoke_release_argument_storage(
iree_string_view_t cconv_fragment, iree_byte_span_t storage,
bool is_heap_alloc, iree_allocator_t host_allocator) {
iree_vm_invoke_release_io_refs(cconv_fragment, storage);
if (is_heap_alloc) {
iree_allocator_free(host_allocator, storage.data);
}
}
// Releases storage for results.
static void iree_vm_invoke_release_result_storage(
iree_string_view_t cconv_fragment, iree_byte_span_t storage,
void* stack_storage, iree_allocator_t host_allocator) {
iree_vm_invoke_release_io_refs(cconv_fragment, storage);
if (storage.data != stack_storage) {
iree_allocator_free(host_allocator, storage.data);
}
}
// Marshals caller arguments from the variant list to the ABI convention.
static iree_status_t iree_vm_invoke_marshal_inputs(
iree_string_view_t cconv_arguments, const iree_vm_list_t* inputs,
iree_byte_span_t arguments) {
// We are 1:1 right now with no variadic args, so do a quick verification on
// the input list.
iree_host_size_t expected_input_count =
cconv_arguments.size > 0
? (cconv_arguments.data[0] == 'v' ? 0 : cconv_arguments.size)
: 0;
if (IREE_UNLIKELY(!inputs)) {
if (IREE_UNLIKELY(expected_input_count > 0)) {
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"no input provided to a function that has inputs");
}
return iree_ok_status();
} else if (IREE_UNLIKELY(expected_input_count != iree_vm_list_size(inputs))) {
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"input list and function mismatch; expected %zu "
"arguments but passed %zu",
expected_input_count, iree_vm_list_size(inputs));
}
uint8_t* p = arguments.data;
for (iree_host_size_t cconv_i = 0, arg_i = 0; cconv_i < cconv_arguments.size;
++cconv_i, ++arg_i) {
switch (cconv_arguments.data[cconv_i]) {
case IREE_VM_CCONV_TYPE_VOID:
break;
case IREE_VM_CCONV_TYPE_I32: {
iree_vm_value_t value;
IREE_RETURN_IF_ERROR(iree_vm_list_get_value_as(
inputs, arg_i, IREE_VM_VALUE_TYPE_I32, &value));
memcpy(p, &value.i32, sizeof(int32_t));
p += sizeof(int32_t);
} break;
case IREE_VM_CCONV_TYPE_I64: {
iree_vm_value_t value;
IREE_RETURN_IF_ERROR(iree_vm_list_get_value_as(
inputs, arg_i, IREE_VM_VALUE_TYPE_I64, &value));
memcpy(p, &value.i64, sizeof(int64_t));
p += sizeof(int64_t);
} break;
case IREE_VM_CCONV_TYPE_F32: {
iree_vm_value_t value;
IREE_RETURN_IF_ERROR(iree_vm_list_get_value_as(
inputs, arg_i, IREE_VM_VALUE_TYPE_F32, &value));
memcpy(p, &value.f32, sizeof(float));
p += sizeof(float);
} break;
case IREE_VM_CCONV_TYPE_F64: {
iree_vm_value_t value;
IREE_RETURN_IF_ERROR(iree_vm_list_get_value_as(
inputs, arg_i, IREE_VM_VALUE_TYPE_F64, &value));
memcpy(p, &value.f64, sizeof(double));
p += sizeof(double);
} break;
case IREE_VM_CCONV_TYPE_REF: {
// TODO(benvanik): see if we can't remove this retain by instead relying
// on the caller still owning the list.
IREE_RETURN_IF_ERROR(
iree_vm_list_get_ref_retain(inputs, arg_i, (iree_vm_ref_t*)p));
p += sizeof(iree_vm_ref_t);
} break;
}
}
return iree_ok_status();
}
// Marshals callee results from the ABI convention to the variant list.
static iree_status_t iree_vm_invoke_marshal_outputs(
iree_string_view_t cconv_results, iree_byte_span_t results,
iree_vm_list_t* outputs) {
iree_host_size_t expected_output_count =
cconv_results.size > 0
? (cconv_results.data[0] == 'v' ? 0 : cconv_results.size)
: 0;
if (IREE_UNLIKELY(!outputs)) {
if (IREE_UNLIKELY(expected_output_count > 0)) {
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"no output provided to a function that has outputs");
}
return iree_ok_status();
}
// Resize the output list to hold all results (and kill anything that may
// have been in there).
// TODO(benvanik): list method for resetting to a new size.
IREE_RETURN_IF_ERROR(iree_vm_list_resize(outputs, 0));
IREE_RETURN_IF_ERROR(iree_vm_list_resize(outputs, expected_output_count));
uint8_t* p = results.data;
for (iree_host_size_t cconv_i = 0, arg_i = 0; cconv_i < cconv_results.size;
++cconv_i, ++arg_i) {
switch (cconv_results.data[cconv_i]) {
case IREE_VM_CCONV_TYPE_VOID:
break;
case IREE_VM_CCONV_TYPE_I32: {
iree_vm_value_t value = iree_vm_value_make_i32(*(int32_t*)p);
IREE_RETURN_IF_ERROR(iree_vm_list_set_value(outputs, arg_i, &value));
p += sizeof(int32_t);
} break;
case IREE_VM_CCONV_TYPE_I64: {
iree_vm_value_t value = iree_vm_value_make_i64(*(int64_t*)p);
IREE_RETURN_IF_ERROR(iree_vm_list_set_value(outputs, arg_i, &value));
p += sizeof(int64_t);
} break;
case IREE_VM_CCONV_TYPE_F32: {
iree_vm_value_t value = iree_vm_value_make_f32(*(float*)p);
IREE_RETURN_IF_ERROR(iree_vm_list_set_value(outputs, arg_i, &value));
p += sizeof(float);
} break;
case IREE_VM_CCONV_TYPE_F64: {
iree_vm_value_t value = iree_vm_value_make_f64(*(double*)p);
IREE_RETURN_IF_ERROR(iree_vm_list_set_value(outputs, arg_i, &value));
p += sizeof(double);
} break;
case IREE_VM_CCONV_TYPE_REF: {
IREE_RETURN_IF_ERROR(
iree_vm_list_set_ref_move(outputs, arg_i, (iree_vm_ref_t*)p));
p += sizeof(iree_vm_ref_t);
} break;
}
}
return iree_ok_status();
}
//===----------------------------------------------------------------------===//
// Fiber tracing support
//===----------------------------------------------------------------------===//
// Fibers are tricky things to instrument as tooling support is often lacking.
// We support two major modes (beyond when tracing is entirely disabled):
// IREE_TRACING_FEATURE_FIBERS: use Tracy's native fiber support.
// Does not support concurrent/interleaved coroutines.
// !IREE_TRACING_FEATURE_FIBERS: emulated support by trace stack fiddling.
// Supports concurrent/interleaved coroutines but messes with statistics
// as the trace stack is suspended/resumed and zones get extra counts.
//
// To make concurrent coroutines work when Tracy's fiber support is enabled we
// go from treating each context as a fiber to treating each invocation as one.
// This has the side-effect of creating one fiber per invocation and in
// benchmarks that can be really noisy; best that can be done there is disabling
// native fiber support.
static iree_vm_invocation_id_t iree_vm_invoke_allocate_id(
iree_vm_context_t* context, const iree_vm_function_t* function) {
#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_FIBERS
if (iree_vm_context_flags(context) & IREE_VM_CONTEXT_FLAG_CONCURRENT) {
// Native Tracy fiber support does not handle interleaved coroutines.
// Instead we'll allocate a unique ID per invocation.
// The string must remain live for the lifetime of the process.
// TODO(benvanik): name it based on the function?
static iree_atomic_int32_t next_invocation_id = IREE_ATOMIC_VAR_INIT(1);
uint32_t invocation_id = iree_atomic_fetch_add_int32(
&next_invocation_id, 1, iree_memory_order_relaxed);
IREE_LEAK_CHECK_DISABLE_PUSH();
char* name = (char*)malloc(32);
snprintf(name, 32, "invoke-%04d", invocation_id - 1);
IREE_LEAK_CHECK_DISABLE_POP();
return (iree_vm_invocation_id_t)name;
} else {
// Non-concurrent (sequential) execution can just reuse the context ID.
return (iree_vm_invocation_id_t)iree_vm_context_id(context);
}
#else
return (iree_vm_invocation_id_t)iree_vm_context_id(context);
#endif // IREE_TRACING_FEATURE_FIBERS
}
#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
static void iree_vm_invoke_fiber_enter(iree_vm_invocation_id_t invocation_id) {
if (!invocation_id) return;
IREE_TRACE_FIBER_ENTER(invocation_id);
}
static void iree_vm_invoke_fiber_reenter(iree_vm_invocation_id_t invocation_id,
iree_vm_stack_t* stack) {
if (!invocation_id) return;
IREE_TRACE_FIBER_ENTER(invocation_id);
iree_vm_stack_resume_trace_zones(stack);
}
static void iree_vm_invoke_fiber_leave(iree_vm_invocation_id_t invocation_id,
iree_vm_stack_t* stack) {
if (!invocation_id) return;
if (stack) iree_vm_stack_suspend_trace_zones(stack);
IREE_TRACE_FIBER_LEAVE();
}
#endif // IREE_TRACING_FEATURE_INSTRUMENTATION
//===----------------------------------------------------------------------===//
// Synchronous invocation
//===----------------------------------------------------------------------===//
IREE_API_EXPORT iree_status_t iree_vm_invoke(
iree_vm_context_t* context, iree_vm_function_t function,
iree_vm_invocation_flags_t flags, const iree_vm_invocation_policy_t* policy,
const iree_vm_list_t* inputs, iree_vm_list_t* outputs,
iree_allocator_t host_allocator) {
IREE_TRACE_ZONE_BEGIN(z0);
// Bound the synchronous invocation to the timeout specified by the user
// regardless of what the target of the invocation wants when it waits.
// TODO(benvanik): add a timeout arg to iree_vm_invoke.
// For now we only use the timeouts specified on the wait operations.
iree_timeout_t timeout = iree_infinite_timeout();
iree_time_t deadline_ns = iree_timeout_as_deadline_ns(timeout);
// Allocate an invocation ID for tracing.
iree_vm_invocation_id_t invocation_id =
iree_any_bit_set(flags, IREE_VM_INVOCATION_FLAG_TRACE_INLINE)
? 0
: iree_vm_invoke_allocate_id(context, &function);
(void)invocation_id; // unused when tracing is disabled
// Begin a zone outside the fiber to represent one tick of the loop.
IREE_TRACE_ZONE_BEGIN_NAMED(zi, "iree_vm_invoke_tick");
// Enter the fiber to start attributing zones to the context.
IREE_TRACE(iree_vm_invoke_fiber_enter(invocation_id));
// Perform the initial invocation step, which if synchronous may fully
// complete the invocation before returning. If it yields we'll need to resume
// it, possibly after taking care of pending waits.
iree_vm_invoke_state_t state = {0};
iree_status_t status = iree_vm_begin_invoke(&state, context, function, flags,
policy, inputs, host_allocator);
while (iree_status_is_deferred(status)) {
// Grab the wait frame from the stack holding the wait parameters.
// This is optional: if an invocation yields for cooperative scheduling
// purposes there will not be a wait frame on the stack and we'll just
// resume it below.
iree_vm_stack_frame_t* current_frame =
iree_vm_stack_current_frame(state.stack);
if (IREE_UNLIKELY(!current_frame)) {
// Unbalanced stack.
status = iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
"unbalanced stack after yield");
break; // bail and don't attempt a resume
} else if (current_frame->type == IREE_VM_STACK_FRAME_WAIT) {
// Perform the wait operation synchronously.
// We do this outside of the fiber to match accounting with async
// executors.
IREE_TRACE(iree_vm_invoke_fiber_leave(invocation_id, state.stack));
IREE_TRACE_ZONE_END(zi);
iree_vm_wait_frame_t* wait_frame =
(iree_vm_wait_frame_t*)iree_vm_stack_frame_storage(current_frame);
status = iree_vm_wait_invoke(&state, wait_frame, deadline_ns);
// Restore tick zone and re-enter the fiber for the resume.
IREE_TRACE_ZONE_BEGIN_NAMED(zi_next, "iree_vm_invoke_tick");
zi = zi_next;
IREE_TRACE(iree_vm_invoke_fiber_reenter(invocation_id, state.stack));
if (!iree_status_is_ok(status)) break;
}
// Resume the invocation after its wait completes (if it wasn't just a
// simple yield for cooperation). This may yield again and require another
// tick or complete with OK (or an error).
status = iree_vm_resume_invoke(&state);
}
// If the invoke process itself was successful we can end the invocation
// cleanly and get the invocation status as returned by the target function.
iree_status_t invoke_status = iree_ok_status();
if (iree_status_is_ok(status)) {
status = iree_vm_end_invoke(&state, outputs, &invoke_status);
}
// Otherwise if we failed to invoke we need to tear down the state to release
// all resources retained by the stack.
if (!iree_status_is_ok(status)) {
// Cleanup the invocation state if the end wasn't able to.
// This may leave the context in an unexpected state but the caller is
// expected to tear down everything if this happens.
iree_vm_abort_invoke(&state);
}
// Leave the fiber context now that execution has completed.
IREE_TRACE(iree_vm_invoke_fiber_leave(invocation_id, state.stack));
IREE_TRACE_ZONE_END(zi);
// If we succeeded at invoking the status will be OK and the invoke_status
// will hold the status returned by the invokee. If we failed at invoking
// the invoke_status won't be set.
IREE_ASSERT(iree_status_is_ok(status) ||
(!iree_status_is_ok(status) && iree_status_is_ok(invoke_status)));
status = !iree_status_is_ok(invoke_status) ? invoke_status : status;
IREE_TRACE_ZONE_END(z0);
return status;
}
//===----------------------------------------------------------------------===//
// Asynchronous invocation
//===----------------------------------------------------------------------===//
// Argument storage larger than this will require a heap allocation.
#define IREE_VM_STACK_MAX_ARGUMENT_ALLOCA_SIZE (iree_host_size_t)(16 * 1024)
// WARNING: this function cannot have any trace markers that span the begin
// call; the begin may yield with zones still open.
IREE_API_EXPORT iree_status_t iree_vm_begin_invoke(
iree_vm_invoke_state_t* state, iree_vm_context_t* context,
iree_vm_function_t function, iree_vm_invocation_flags_t flags,
const iree_vm_invocation_policy_t* policy, const iree_vm_list_t* inputs,
iree_allocator_t host_allocator) {
IREE_ASSERT_ARGUMENT(context);
IREE_TRACE_ZONE_BEGIN(z0);
// Force tracing if specified on the context.
if (iree_vm_context_flags(context) & IREE_VM_CONTEXT_FLAG_TRACE_EXECUTION) {
flags |= IREE_VM_INVOCATION_FLAG_TRACE_EXECUTION;
}
// Grab function metadata used for marshaling inputs/outputs.
iree_vm_function_signature_t signature =
iree_vm_function_signature(&function);
iree_string_view_t cconv_arguments = iree_string_view_empty();
iree_string_view_t cconv_results = iree_string_view_empty();
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_vm_function_call_get_cconv_fragments(
&signature, &cconv_arguments, &cconv_results));
// Allocate argument storage on the native stack. It only needs to survive the
// begin call as it's consumed by the invokee.
iree_byte_span_t arguments = iree_make_byte_span(NULL, 0);
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0,
iree_vm_function_call_compute_cconv_fragment_size(
cconv_arguments, /*segment_size_list=*/NULL, &arguments.data_length));
const bool arguments_on_heap =
arguments.data_length > IREE_VM_STACK_MAX_ARGUMENT_ALLOCA_SIZE;
if (!arguments_on_heap) {
// Arguments fit on the native stack without too much worry about
// overflowing. This is the fast path (effectively just an $sp bump).
arguments.data = iree_alloca(arguments.data_length);
} else {
// Couldn't inline, do a heap allocation that we'll keep until this function
// returns.
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_allocator_malloc(host_allocator, arguments.data_length,
(void**)&arguments.data));
}
memset(arguments.data, 0, arguments.data_length);
// Allocate the result storage that will be populated by the invokee. This
// must survive until the end() so we slice it off the bottom of the stack
// storage. This reduces the overall available stack space but not by much,
// and if the stack needs to dynamically grow the inlined storage will still
// be available.
iree_byte_span_t results = iree_make_byte_span(NULL, 0);
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_vm_function_call_compute_cconv_fragment_size(
cconv_results, /*segment_size_list=*/NULL, &results.data_length));
iree_host_size_t reserved_storage_size = 0;
if (results.data_length <= sizeof(state->stack_storage) / 4) {
// Results fit in the inlined storage and we can avoid a heap allocation.
// If we exceed the maximum we'll heap allocate below inside the stack.
results.data = state->stack_storage;
reserved_storage_size =
iree_host_align(results.data_length, iree_max_align_t);
} else {
// Couldn't inline, do a heap allocation we'll have to hang on to and
// clean up when the invocation state is released.
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_allocator_malloc(host_allocator, results.data_length,
(void**)&results.data));
}
memset(results.data, 0, results.data_length);
// Marshal the input arguments into the VM ABI and preallocate the result
// buffer. If marshaling fails we need to cleanup the arguments.
// NOTE: today we don't support variadic arguments through this interface.
iree_status_t status =
iree_vm_invoke_marshal_inputs(cconv_arguments, inputs, arguments);
if (!iree_status_is_ok(status)) {
iree_vm_invoke_release_argument_storage(cconv_arguments, arguments,
arguments_on_heap, host_allocator);
iree_vm_invoke_release_result_storage(cconv_results, results,
state->stack_storage, host_allocator);
IREE_TRACE_ZONE_END(z0);
return status;
}
// Initialize the stack with the inline storage.
// We (probably) sliced off the head of the storage above to use for results
// and perform an offset here to account for that.
iree_vm_stack_t* stack = NULL;
status = iree_vm_stack_initialize(
iree_make_byte_span(state->stack_storage + reserved_storage_size,
sizeof(state->stack_storage) - reserved_storage_size),
flags, iree_vm_context_state_resolver(context), host_allocator, &stack);
if (!iree_status_is_ok(status)) {
iree_vm_invoke_release_argument_storage(cconv_arguments, arguments,
arguments_on_heap, host_allocator);
iree_vm_invoke_release_result_storage(cconv_results, results,
state->stack_storage, host_allocator);
IREE_TRACE_ZONE_END(z0);
return status;
}
// NOTE: at this point the stack must be properly deinitialized if we bail.
// Initialize state now that we are confident we're returning OK.
// If we return a failure the user won't know they have to end() and clean
// these up.
state->context = context;
state->cconv_results = cconv_results;
state->results = results;
iree_vm_context_retain(context);
state->stack = stack;
// NOTE: we must end the zone here as the begin_call will return with
// unbalanced zones if we yield.
IREE_TRACE_ZONE_END(z0);
// Execute the target function until the first yield point is reached or it
// completes. A result of OK indicates successful completion while DEFERRED
// indicates that the invocation needs to be resumed/waited again.
iree_vm_function_call_t call = {
.function = function,
.arguments = arguments,
.results = results,
};
state->status =
function.module->begin_call(function.module->self, stack, call);
// Arguments should no longer be required - they were either consumed by the
// begin_call or need to be cleaned up before we return.
iree_vm_invoke_release_argument_storage(cconv_arguments, call.arguments,
arguments_on_heap, host_allocator);
// The call may have yielded, either for cooperative scheduling purposes or
// for a wait operation (in which case the top of the stack will have a wait
// frame).
if (iree_status_is_deferred(state->status)) {
return iree_status_from_code(IREE_STATUS_DEFERRED);
}
// NOTE: the begin-invoke was ok, but the operation itself may have failed.
return iree_ok_status();
}
// WARNING: this function cannot have any trace markers that span the resume
// call; the resume may yield with zones still open.
IREE_API_EXPORT iree_status_t
iree_vm_resume_invoke(iree_vm_invoke_state_t* state) {
IREE_ASSERT_ARGUMENT(state);
// In a stackless world resuming may pop a stack frame that needs to be
// executed inline. We run here until either all stack frames have been popped
// (indicating the invocation has completed) or we yield/error and want to
// return to the scheduler.
do {
if (iree_status_is_deferred(state->status)) {
// Wait required; top of the stack should be a wait frame.
IREE_ASSERT_EQ(iree_vm_stack_current_frame(state->stack)->type,
IREE_VM_STACK_FRAME_WAIT);
return iree_status_from_code(IREE_STATUS_DEFERRED);
} else if (!iree_status_is_ok(state->status)) {
// Invocation previously failed so return immediately. The user should
// then call end() to get the result. By returning OK here we are telling
// the user the resume operation succeeded.
return iree_ok_status();
}
// Get the top execution frame of the stack where we will resume execution.
iree_vm_stack_frame_t* resume_frame = iree_vm_stack_top(state->stack);
if (IREE_UNLIKELY(!resume_frame)) {
return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
"resume called with no parent frame");
}
// Call into the VM to resume the function. It may complete (returning OK),
// defer to be waited/resumed later, or fail.
iree_vm_function_t resume_function = resume_frame->function;
state->status = resume_function.module->resume_call(
resume_function.module->self, state->stack, state->results);
// If the call yielded then return that so the user knows to resume again.
if (iree_status_is_deferred(state->status)) {
return iree_status_from_code(IREE_STATUS_DEFERRED);
}
// Stack resume: if the resume succeeded but the stack is not empty it means
// we've got to resume the parent frame. When we do a full yield up to the
// scheduler and then resume we're calling into the VM stack top from the
// host stack bottom - to have the same behavior as a normal stack pop we've
// got to continue running. To keep the trace cleaner and reduce overhead we
// jump back up and pop the next frame, which also helps us avoid
// introducing latency between pops where otherwise there should be none.
} while (iree_status_is_ok(state->status) &&
iree_vm_stack_current_frame(state->stack) != NULL);
// We're indicating the resume operation was successful, not the result of the
// VM call; the user will call end() to get that.
return iree_ok_status();
}
IREE_API_EXPORT iree_status_t
iree_vm_wait_invoke(iree_vm_invoke_state_t* state,
iree_vm_wait_frame_t* wait_frame, iree_time_t deadline_ns) {
IREE_ASSERT_ARGUMENT(state);
if (IREE_UNLIKELY(!iree_status_is_deferred(state->status))) {
// Can only wait if the invocation is actually waiting.
// We could make this OK and act as a no-op but it can be useful for
// ensuring scheduler implementations don't do extraneous work.
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"wait-invoke attempted on a non-waiting invocation");
}
// Combine the wait-invoke deadline with the one specified by the wait
// operation itself. This allows schedulers to timeslice waits without
// worrying whether user programs request to wait forever.
iree_time_t min_deadline_ns = iree_min(deadline_ns, wait_frame->deadline_ns);
// Perform the wait operation, blocking the calling thread until it completes,
// fails, or hits the min_deadline_ns.
if (wait_frame->wait_type == IREE_VM_WAIT_UNTIL) {
wait_frame->wait_status = iree_wait_until(min_deadline_ns)
? iree_ok_status()
: iree_status_from_code(IREE_STATUS_ABORTED);
} else if (wait_frame->count == 1) {
wait_frame->wait_status = iree_wait_source_wait_one(
wait_frame->wait_sources[0], iree_make_deadline(min_deadline_ns));
} else {
// TODO(benvanik): multi-wait when running synchronously. This is already
// supported by iree_loop_inline_t and maybe we can just reuse that. These
// are not currently emitted by the compiler.
return iree_make_status(
IREE_STATUS_UNIMPLEMENTED,
"multi-wait in synchronous invocations not yet implemented");
}
// Reset status to OK - the next resume will pick back up in the waiter.
iree_status_free(state->status);
state->status = iree_ok_status();
// OK here indicates we performed the wait and not the result of the wait.
return iree_ok_status();
}
IREE_API_EXPORT iree_status_t iree_vm_end_invoke(iree_vm_invoke_state_t* state,
iree_vm_list_t* outputs,
iree_status_t* out_status) {
IREE_ASSERT_ARGUMENT(state);
IREE_ASSERT_ARGUMENT(out_status);
*out_status = iree_ok_status();
// Suspend stack frame tracing zones; if the invocation failed the failing
// frames will still be on the stack and thus also still have their trace
// zones active. If we were to begin a zone here and then deinit the stack
// we'd end up with unbalanced zones.
iree_vm_stack_suspend_trace_zones(state->stack);
IREE_TRACE_ZONE_BEGIN(z0);
// Grab operation status. If this is not OK it's because the operation failed
// or the user is calling this with a wait frame on the stack.
iree_status_t invoke_status = state->status;
if (IREE_UNLIKELY(iree_status_is_deferred(invoke_status))) {
IREE_TRACE_ZONE_END(z0);
return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
"end-invoke attempted on a waiting invocation");
} else if (IREE_UNLIKELY(!iree_status_is_ok(invoke_status))) {
// Annotate failures with the stack trace (if compiled in).
invoke_status = IREE_VM_STACK_ANNOTATE_BACKTRACE_IF_ENABLED(state->stack,
invoke_status);
}
// If the operation succeeded marshal the outputs from the stack buffers into
// the user-provided storage. The outputs list will retain all results.
if (iree_status_is_ok(invoke_status)) {
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_vm_invoke_marshal_outputs(state->cconv_results, state->results,
outputs));
}
// Cleanup the invocation resources.
*out_status = invoke_status; // takes ownership
state->status = iree_ok_status();
iree_vm_abort_invoke(state);
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
IREE_API_EXPORT void iree_vm_abort_invoke(iree_vm_invoke_state_t* state) {
// We expect that the caller has already suspended the stack tracing zones,
// but in failure cases we can end up here and want to ensure that things are
// cleaned up. If we were to begin a zone now the stack deinit would lead to
// unbalanced zones.
if (state->stack) iree_vm_stack_suspend_trace_zones(state->stack);
IREE_TRACE_ZONE_BEGIN(z0);
iree_allocator_t host_allocator = state->stack
? iree_vm_stack_allocator(state->stack)
: iree_allocator_null();
if (state->stack) {
iree_vm_stack_deinitialize(state->stack);
state->stack = NULL;
}
if (!iree_byte_span_is_empty(state->results)) {
iree_vm_invoke_release_result_storage(state->cconv_results, state->results,
state->stack_storage, host_allocator);
state->results = iree_byte_span_empty();
}
if (state->context) {
iree_vm_context_release(state->context);
state->context = NULL;
}
iree_status_free(state->status);
state->status = iree_status_from_code(IREE_STATUS_INTERNAL);
IREE_TRACE_ZONE_END(z0);
}
//===----------------------------------------------------------------------===//
// Loop-based asynchronous invocation
//===----------------------------------------------------------------------===//
static iree_status_t iree_vm_async_begin_invoke(void* user_data,
iree_loop_t loop,
iree_status_t loop_status);
static iree_status_t iree_vm_async_resume_invoke(void* user_data,
iree_loop_t loop,
iree_status_t loop_status);
static iree_status_t iree_vm_async_tick_invoke(
iree_vm_async_invoke_state_t* state, iree_loop_t loop);
static iree_status_t iree_vm_async_end_invoke(
iree_vm_async_invoke_state_t* state, iree_loop_t loop);
static iree_status_t iree_vm_async_complete_invoke(
iree_vm_async_invoke_state_t* state, iree_loop_t loop,
iree_status_t status);
IREE_API_EXPORT iree_status_t iree_vm_async_invoke(
iree_loop_t loop, iree_vm_async_invoke_state_t* state,
iree_vm_context_t* context, iree_vm_function_t function,
iree_vm_invocation_flags_t flags, const iree_vm_invocation_policy_t* policy,
iree_vm_list_t* inputs, iree_vm_list_t* outputs,
iree_allocator_t host_allocator,
iree_vm_async_invoke_callback_fn_t callback, void* user_data) {
IREE_ASSERT_ARGUMENT(state);
IREE_ASSERT_ARGUMENT(context);
IREE_TRACE_ZONE_BEGIN(z0);
// Initialize to the pre-begin state.
state->begin_params.context = context;
iree_vm_context_retain(context);
state->begin_params.function = function;
state->begin_params.flags = flags;
state->begin_params.policy = policy;
state->begin_params.inputs = inputs;
iree_vm_list_retain(inputs);
state->deadline_ns = IREE_TIME_INFINITE_FUTURE;
state->host_allocator = host_allocator;
state->outputs = outputs;
iree_vm_list_retain(outputs);
state->callback = callback;
state->user_data = user_data;
// Launch the invocation; if this fails we'll need to cleanup the state we've
// already initialized.
// NOTE: based on the loop type THIS MAY COMPLETE THE INVOCATION IMMEDIATELY.
iree_status_t status = iree_loop_call(loop, IREE_LOOP_PRIORITY_DEFAULT,
iree_vm_async_begin_invoke, state);
if (!iree_status_is_ok(status)) {
iree_vm_list_release(state->outputs);
iree_vm_list_release(state->begin_params.inputs);
iree_vm_context_release(state->begin_params.context);
}
IREE_TRACE_ZONE_END(z0);
return status;
}
// Begins the invocation from the first loop callback.
// The begin_params on the state will have everything we need to initialize the
// call but since we alias with the base invocation state we must be sure to
// copy out the args first.
//
// Note that |status| may indicate a failure already, such as if the loop
// aborted. In that case we need to clean up the state before issuing the user
// callback so they can do the same.
static iree_status_t iree_vm_async_begin_invoke(void* user_data,
iree_loop_t loop,
iree_status_t loop_status) {
IREE_TRACE_ZONE_BEGIN(z0);
iree_vm_async_invoke_state_t* state =
(iree_vm_async_invoke_state_t*)user_data;
// Check to see if the loop has failed before we even begin.
if (IREE_UNLIKELY(!iree_status_is_ok(loop_status))) {
// We release our retained resources because we don't guarantee they live to
// the callback. This allows callbacks to reuse memory.
iree_vm_list_release(state->outputs);
iree_vm_list_release(state->begin_params.inputs);
iree_vm_context_release(state->begin_params.context);
// Issue user callback notifying them of the failure and pass along the loop
// status; this is likely something like IREE_STATUS_ABORTED.
iree_status_t callback_status =
state->callback(state->user_data, loop, loop_status, NULL);
IREE_TRACE_ZONE_END(z0);
return callback_status;
}
// Pull fields locally so that we can reuse the aliased storage.
// Note that we have ownership of all these and must release them if we fail
// to begin the invocation.
iree_vm_context_t* context = state->begin_params.context;
iree_vm_function_t function = state->begin_params.function;
iree_vm_invocation_flags_t flags = state->begin_params.flags;
const iree_vm_invocation_policy_t* policy = state->begin_params.policy;
iree_vm_list_t* inputs = state->begin_params.inputs;
// Allocate an invocation ID for tracing.
IREE_TRACE({
state->invocation_id =
iree_any_bit_set(flags, IREE_VM_INVOCATION_FLAG_TRACE_INLINE)
? 0
: iree_vm_invoke_allocate_id(context, &function);
});
// Try to begin the invocation. This may fail if the parameters are invalid.
// It may also complete inline if the entire invocation can be handled without
// blocking (in which case begin_status is OK).
IREE_TRACE(iree_vm_invoke_fiber_enter(state->invocation_id));
iree_status_t status =
iree_vm_begin_invoke(&state->base, context, function, flags, policy,
inputs, state->host_allocator);
if (iree_status_is_ok(status) || iree_status_is_deferred(status)) {
// Ownership transferred.
iree_vm_list_release(inputs);
inputs = NULL;
iree_vm_context_release(context);
context = NULL;
}
if (iree_status_is_deferred(status)) {
IREE_TRACE({
iree_vm_invoke_fiber_leave(state->invocation_id, state->base.stack);
});
// Deferred until a wait completes or the next tick.
status = iree_vm_async_tick_invoke(state, loop);
} else if (iree_status_is_ok(status)) {
// Completed synchronously. This is the happy path and lets us complete the
// entire invocation in a single loop operation.
status = iree_vm_async_end_invoke(state, loop);
} else {
IREE_TRACE(iree_vm_invoke_fiber_leave(state->invocation_id, NULL));
// Failed to begin the invocation; release resources and call back.
// We know the state wasn't fully initialized and don't need to clean it up.
iree_vm_list_release(state->outputs);
iree_vm_list_release(inputs);
iree_vm_context_release(context);
status = state->callback(state->user_data, loop, status, NULL);
IREE_TRACE_ZONE_END(z0);
return status;
}
// If we began but failed to tick/end we need to propagate that to the user
// and clean up our state.
if (!iree_status_is_ok(status)) {
status = iree_vm_async_complete_invoke(state, loop, status);
}
IREE_TRACE_ZONE_END(z0);
return status;
}
static iree_status_t iree_vm_async_resume_invoke(void* user_data,
iree_loop_t loop,
iree_status_t loop_status) {
IREE_TRACE_ZONE_BEGIN(z0);
iree_vm_async_invoke_state_t* state =
(iree_vm_async_invoke_state_t*)user_data;
// Resume the invocation and execute the next step.
IREE_TRACE({
iree_vm_invoke_fiber_reenter(state->invocation_id, state->base.stack);
});
iree_status_t status = iree_vm_resume_invoke(&state->base);
if (iree_status_is_deferred(status)) {
IREE_TRACE({
iree_vm_invoke_fiber_leave(state->invocation_id, state->base.stack);
});
// Deferred on a wait or yield. Enqueue waits/a resume.
status = iree_vm_async_tick_invoke(state, loop);
} else if (iree_status_is_ok(status)) {
// Completed synchronously.
status = iree_vm_async_end_invoke(state, loop);
} else {
IREE_TRACE({
iree_vm_invoke_fiber_leave(state->invocation_id, state->base.stack);
});
}
// If we failed to tick/end we need to propagate that to the user and clean up
// our state.
if (!iree_status_is_ok(status)) {
status = iree_vm_async_complete_invoke(state, loop, status);
}
IREE_TRACE_ZONE_END(z0);
return status;
}
static iree_status_t iree_vm_async_wake_invoke(void* user_data,
iree_loop_t loop,
iree_status_t loop_status) {
IREE_TRACE_ZONE_BEGIN(z0);
iree_vm_async_invoke_state_t* state =
(iree_vm_async_invoke_state_t*)user_data;
// If we were aborted then we need to tear everything down.
// TODO(benvanik): maybe allow the failures through to the target? It'd be
// impossible to tell when the loop was in an invalid state if we did. May
// need to rework the loop callback on waits so that we can differentiate.
if (iree_status_is_aborted(loop_status)) {
IREE_TRACE_ZONE_END(z0);
return iree_vm_async_complete_invoke(state, loop, loop_status);
}
// The loop_status we receive here is the result of the wait operation and
// something we need to propagate to the waiter.
iree_vm_stack_frame_t* current_frame =
iree_vm_stack_current_frame(state->base.stack);
iree_vm_wait_frame_t* wait_frame =
(iree_vm_wait_frame_t*)iree_vm_stack_frame_storage(current_frame);
wait_frame->wait_status = loop_status;
IREE_ASSERT(iree_status_is_deferred(state->base.status));
iree_status_free(state->base.status);
state->base.status = iree_ok_status();
IREE_TRACE_ZONE_END(z0);
// Resume the invocation and execute the next step.
// We do this inline instead of enqueuing a resume so that we avoid a needless
// operation in the loop. The invocation may immediately wait again and we
// want to keep the total wait-to-wait latency low.
return iree_vm_async_resume_invoke(user_data, loop, iree_ok_status());
}
static iree_status_t iree_vm_async_tick_invoke(
iree_vm_async_invoke_state_t* state, iree_loop_t loop) {
// Grab the wait frame from the stack holding the wait parameters.
// This is optional: if an invocation yields for cooperative scheduling
// purposes there will not be a wait frame on the stack and we'll just
// resume it below.
iree_vm_stack_frame_t* current_frame =
iree_vm_stack_current_frame(state->base.stack);
if (IREE_UNLIKELY(!current_frame)) {
// Unbalanced stack.
return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
"unbalanced stack after yield");
} else if (current_frame->type == IREE_VM_STACK_FRAME_WAIT) {
// Wait on a wait source.
iree_vm_wait_frame_t* wait_frame =
(iree_vm_wait_frame_t*)iree_vm_stack_frame_storage(current_frame);
// Combine the wait-invoke deadline with the one specified by the wait
// operation itself. This allows schedulers to timeslice waits without
// worrying whether user programs request to wait forever.
iree_timeout_t timeout = iree_make_deadline(
iree_min(state->deadline_ns, wait_frame->deadline_ns));
switch (wait_frame->wait_type) {
default:
case IREE_VM_WAIT_UNTIL:
return iree_loop_wait_until(loop, timeout, iree_vm_async_wake_invoke,
state);
case IREE_VM_WAIT_ANY:
return iree_loop_wait_any(loop, wait_frame->count,
wait_frame->wait_sources, timeout,
iree_vm_async_wake_invoke, state);
case IREE_VM_WAIT_ALL:
return iree_loop_wait_all(loop, wait_frame->count,
wait_frame->wait_sources, timeout,
iree_vm_async_wake_invoke, state);
}
} else {
// Resume from a yield point (cooperative scheduling).
return iree_loop_call(loop, IREE_LOOP_PRIORITY_DEFAULT,
iree_vm_async_resume_invoke, state);
}
}
static iree_status_t iree_vm_async_end_invoke(
iree_vm_async_invoke_state_t* state, iree_loop_t loop) {
// End the invocation and retrieve the results.
iree_status_t invoke_status = iree_ok_status();
IREE_RETURN_IF_ERROR(
iree_vm_end_invoke(&state->base, state->outputs, &invoke_status));
IREE_TRACE({
// We leave the fiber before completing so that the callback is attributed
// to the thread running it instead.
iree_vm_invoke_fiber_leave(state->invocation_id, state->base.stack);
});
return iree_vm_async_complete_invoke(state, loop, invoke_status);
}
static iree_status_t iree_vm_async_complete_invoke(
iree_vm_async_invoke_state_t* state, iree_loop_t loop,
iree_status_t status) {
// Release all resources if we didn't already clean them up.
if (!iree_status_is_ok(status)) {
iree_vm_abort_invoke(&state->base);
iree_vm_list_release(state->outputs);
state->outputs = NULL;
}
// Issue callback.
iree_vm_list_t* outputs = state->outputs;
return state->callback(state->user_data, loop, status, outputs);
}