Adding iree_vm_async_invoke API for loop-based invocation. This reuses the low-level iree_vm_*_invoke APIs to provide a fire-and-forget callback-style interface. Storage is externalized such that it can be embedded in higher-level binding data structures and avoid allocations. The API here is unopinionated with respect to overlapping invocations within the same context. Callers are expected to set the IREE_VM_CONTEXT_FLAG_CONCURRENT if they will overlap (mostly just to make tracing work) but otherwise there's no difference in the implementation between sequential and concurrent invocation execution.
diff --git a/runtime/src/iree/hal/local/loaders/vmvx_module_loader.c b/runtime/src/iree/hal/local/loaders/vmvx_module_loader.c index 3fd16b3..2ffa0a9 100644 --- a/runtime/src/iree/hal/local/loaders/vmvx_module_loader.c +++ b/runtime/src/iree/hal/local/loaders/vmvx_module_loader.c
@@ -119,10 +119,10 @@ // Copy the executable constants into the module state. if (iree_status_is_ok(status)) { - status = - iree_vm_invoke(executable->context, set_function, - IREE_VM_INVOCATION_FLAG_NONE, /*policy=*/NULL, inputs, - /*outputs=*/NULL, executable->base.host_allocator); + status = iree_vm_invoke(executable->context, set_function, + IREE_VM_INVOCATION_FLAG_TRACE_INLINE, + /*policy=*/NULL, inputs, + /*outputs=*/NULL, executable->base.host_allocator); } // Inputs *must* be released here as we allocated it on the stack. @@ -394,11 +394,13 @@ // On-stack stack. We really do abuse the stack too much here. // TODO(benvanik): pass in an iree_arena_t that can be used for this. IREE_VM_INLINE_STACK_INITIALIZE( - stack, IREE_VM_INVOCATION_FLAG_NONE, + stack, IREE_VM_INVOCATION_FLAG_TRACE_INLINE, iree_vm_context_state_resolver(executable->context), executable->base.host_allocator); // Direct call interface. + // This only works because we know the exact signature and that these will + // never block (if they do it'll be handled as if it's an error). iree_vm_function_call_t call; memset(&call, 0, sizeof(call)); call.function = entry_fn;
diff --git a/runtime/src/iree/vm/invocation.c b/runtime/src/iree/vm/invocation.c index a22aa12..62b5b2e 100644 --- a/runtime/src/iree/vm/invocation.c +++ b/runtime/src/iree/vm/invocation.c
@@ -11,6 +11,7 @@ #include <string.h> #include "iree/base/api.h" +#include "iree/base/internal/debugging.h" #include "iree/base/tracing.h" #include "iree/vm/ref.h" #include "iree/vm/stack.h" @@ -179,25 +180,74 @@ } //===----------------------------------------------------------------------===// -// Synchronous invocation +// Fiber tracing support //===----------------------------------------------------------------------===// -static void iree_vm_invoke_fiber_enter(iree_vm_context_t* context) { - IREE_TRACE_FIBER_ENTER(iree_vm_context_id(context)); +// Fibers are tricky things to instrument as tooling support is often lacking. +// We support two major modes (beyond when tracing is entirely disabled): +// IREE_TRACING_FEATURE_FIBERS: use Tracy's native fiber support. +// Does not support concurrent/interleaved coroutines. +// !IREE_TRACING_FEATURE_FIBERS: emulated support by trace stack fiddling. +// Supports concurrent/interleaved coroutines but messes with statistics +// as the trace stack is suspended/resumed and zones get extra counts. +// +// To make concurrent coroutines work when Tracy's fiber support is enabled we +// go from treating each context as a fiber to treating each invocation as one. +// This has the side-effect of creating one fiber per invocation and in +// benchmarks that can be really noisy; best that can be done there is disabling +// native fiber support. +static iree_vm_invocation_id_t iree_vm_invoke_allocate_id( + iree_vm_context_t* context, const iree_vm_function_t* function) { +#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_FIBERS + if (iree_vm_context_flags(context) & IREE_VM_CONTEXT_FLAG_CONCURRENT) { + // Native Tracy fiber support does not handle interleaved coroutines. + // Instead we'll allocate a unique ID per invocation. + // The string must remain live for the lifetime of the process. + // TODO(benvanik): name it based on the function? + static iree_atomic_int32_t next_invocation_id = IREE_ATOMIC_VAR_INIT(1); + uint32_t invocation_id = iree_atomic_fetch_add_int32( + &next_invocation_id, 1, iree_memory_order_seq_cst); + IREE_LEAK_CHECK_DISABLE_PUSH(); + char* name = (char*)malloc(32); + snprintf(name, 32, "invoke-%04d", invocation_id - 1); + IREE_LEAK_CHECK_DISABLE_POP(); + return (iree_vm_invocation_id_t)name; + } else { + // Non-concurrent (sequential) execution can just reuse the context ID. + return (iree_vm_invocation_id_t)iree_vm_context_id(context); + } +#else + return (iree_vm_invocation_id_t)iree_vm_context_id(context); +#endif // IREE_TRACING_FEATURE_FIBERS } -static void iree_vm_invoke_fiber_reenter(iree_vm_context_t* context, +#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION + +static void iree_vm_invoke_fiber_enter(iree_vm_invocation_id_t invocation_id) { + if (!invocation_id) return; + IREE_TRACE_FIBER_ENTER(invocation_id); +} + +static void iree_vm_invoke_fiber_reenter(iree_vm_invocation_id_t invocation_id, iree_vm_stack_t* stack) { - IREE_TRACE_FIBER_ENTER(iree_vm_context_id(context)); + if (!invocation_id) return; + IREE_TRACE_FIBER_ENTER(invocation_id); iree_vm_stack_resume_trace_zones(stack); } -static void iree_vm_invoke_fiber_leave(iree_vm_context_t* context, +static void iree_vm_invoke_fiber_leave(iree_vm_invocation_id_t invocation_id, iree_vm_stack_t* stack) { + if (!invocation_id) return; if (stack) iree_vm_stack_suspend_trace_zones(stack); IREE_TRACE_FIBER_LEAVE(); } +#endif // IREE_TRACING_FEATURE_INSTRUMENTATION + +//===----------------------------------------------------------------------===// +// Synchronous invocation +//===----------------------------------------------------------------------===// + IREE_API_EXPORT iree_status_t iree_vm_invoke( iree_vm_context_t* context, iree_vm_function_t function, iree_vm_invocation_flags_t flags, const iree_vm_invocation_policy_t* policy, @@ -212,10 +262,16 @@ iree_timeout_t timeout = iree_infinite_timeout(); iree_time_t deadline_ns = iree_timeout_as_deadline_ns(timeout); + // Allocate an invocation ID for tracing. + iree_vm_invocation_id_t invocation_id = + iree_any_bit_set(flags, IREE_VM_INVOCATION_FLAG_TRACE_INLINE) + ? 0 + : iree_vm_invoke_allocate_id(context, &function); + // Begin a zone outside the fiber to represent one tick of the loop. IREE_TRACE_ZONE_BEGIN_NAMED(zi, "iree_vm_invoke_tick"); // Enter the fiber to start attributing zones to the context. - iree_vm_invoke_fiber_enter(context); + IREE_TRACE(iree_vm_invoke_fiber_enter(invocation_id)); // Perform the initial invocation step, which if synchronous may fully // complete the invocation before returning. If it yields we'll need to resume @@ -239,7 +295,7 @@ // Perform the wait operation synchronously. // We do this outside of the fiber to match accounting with async // executors. - iree_vm_invoke_fiber_leave(context, state.stack); + IREE_TRACE(iree_vm_invoke_fiber_leave(invocation_id, state.stack)); IREE_TRACE_ZONE_END(zi); iree_vm_wait_frame_t* wait_frame = @@ -249,7 +305,7 @@ // Restore tick zone and re-enter the fiber for the resume. IREE_TRACE_ZONE_BEGIN_NAMED(zi_next, "iree_vm_invoke_tick"); zi = zi_next; - iree_vm_invoke_fiber_reenter(context, state.stack); + IREE_TRACE(iree_vm_invoke_fiber_reenter(invocation_id, state.stack)); if (!iree_status_is_ok(status)) break; } @@ -276,7 +332,7 @@ } // Leave the fiber context now that execution has completed. - iree_vm_invoke_fiber_leave(context, state.stack); + IREE_TRACE(iree_vm_invoke_fiber_leave(invocation_id, state.stack)); IREE_TRACE_ZONE_END(zi); // If we succeeded at invoking the status will be OK and the invoke_status @@ -419,49 +475,52 @@ IREE_API_EXPORT iree_status_t iree_vm_resume_invoke(iree_vm_invoke_state_t* state) { IREE_ASSERT_ARGUMENT(state); - if (iree_status_is_deferred(state->status)) { - // Wait required; top of the stack should be a wait frame. - IREE_ASSERT_EQ(iree_vm_stack_current_frame(state->stack)->type, - IREE_VM_STACK_FRAME_WAIT); - return iree_status_from_code(IREE_STATUS_DEFERRED); - } else if (!iree_status_is_ok(state->status)) { - // Invocation previously failed so return immediately. The user should then - // call end() to get the result. By returning OK here we are telling the - // user the resume operation succeeded. - return iree_ok_status(); - } - // Get the top execution frame of the stack where we will resume execution. - iree_vm_stack_frame_t* resume_frame = iree_vm_stack_top(state->stack); - if (IREE_UNLIKELY(!resume_frame)) { - return iree_make_status(IREE_STATUS_FAILED_PRECONDITION, - "resume called with no parent frame"); - } + // In a stackless world resuming may pop a stack frame that needs to be + // executed inline. We run here until either all stack frames have been popped + // (indicating the invocation has completed) or we yield/error and want to + // return to the scheduler. + do { + if (iree_status_is_deferred(state->status)) { + // Wait required; top of the stack should be a wait frame. + IREE_ASSERT_EQ(iree_vm_stack_current_frame(state->stack)->type, + IREE_VM_STACK_FRAME_WAIT); + return iree_status_from_code(IREE_STATUS_DEFERRED); + } else if (!iree_status_is_ok(state->status)) { + // Invocation previously failed so return immediately. The user should + // then call end() to get the result. By returning OK here we are telling + // the user the resume operation succeeded. + return iree_ok_status(); + } - // Call into the VM to resume the function. It may complete (returning OK), - // defer to be waited/resumed later, or fail. - iree_vm_function_t resume_function = resume_frame->function; - iree_vm_execution_result_t result; - state->status = resume_function.module->resume_call( - resume_function.module->self, state->stack, state->results, &result); + // Get the top execution frame of the stack where we will resume execution. + iree_vm_stack_frame_t* resume_frame = iree_vm_stack_top(state->stack); + if (IREE_UNLIKELY(!resume_frame)) { + return iree_make_status(IREE_STATUS_FAILED_PRECONDITION, + "resume called with no parent frame"); + } - // If the call yielded then return that so the user knows to resume again. - if (iree_status_is_deferred(state->status)) { - return iree_status_from_code(IREE_STATUS_DEFERRED); - } + // Call into the VM to resume the function. It may complete (returning OK), + // defer to be waited/resumed later, or fail. + iree_vm_function_t resume_function = resume_frame->function; + iree_vm_execution_result_t result; + state->status = resume_function.module->resume_call( + resume_function.module->self, state->stack, state->results, &result); - // Stack resume: if the resume succeeded but the stack is not empty it means - // we've got to resume the parent frame. When we do a full yield up to the - // scheduler and then resume we're calling into the VM stack top from the host - // stack bottom - to have the same behavior as a normal stack pop we've got to - // continue running. We signal this as a DEFERRED so the same machinery for - // normal yields can be used. - // TODO(benvanik): use another result type in cases where we want to - // differentiate? - if (iree_status_is_ok(state->status) && - iree_vm_stack_current_frame(state->stack) != NULL) { - return iree_status_from_code(IREE_STATUS_DEFERRED); - } + // If the call yielded then return that so the user knows to resume again. + if (iree_status_is_deferred(state->status)) { + return iree_status_from_code(IREE_STATUS_DEFERRED); + } + + // Stack resume: if the resume succeeded but the stack is not empty it means + // we've got to resume the parent frame. When we do a full yield up to the + // scheduler and then resume we're calling into the VM stack top from the + // host stack bottom - to have the same behavior as a normal stack pop we've + // got to continue running. To keep the trace cleaner and reduce overhead we + // jump back up and pop the next frame, which also helps us avoid + // introducing latency between pops where otherwise there should be none. + } while (iree_status_is_ok(state->status) && + iree_vm_stack_current_frame(state->stack) != NULL); // We're indicating the resume operation was successful, not the result of the // VM call; the user will call end() to get that. @@ -586,3 +645,300 @@ IREE_TRACE_ZONE_END(z0); } + +//===----------------------------------------------------------------------===// +// Loop-based asynchronous invocation +//===----------------------------------------------------------------------===// + +static iree_status_t iree_vm_async_begin_invoke(void* user_data, + iree_loop_t loop, + iree_status_t loop_status); +static iree_status_t iree_vm_async_resume_invoke(void* user_data, + iree_loop_t loop, + iree_status_t loop_status); +static iree_status_t iree_vm_async_tick_invoke( + iree_vm_async_invoke_state_t* state, iree_loop_t loop); +static iree_status_t iree_vm_async_end_invoke( + iree_vm_async_invoke_state_t* state, iree_loop_t loop); +static iree_status_t iree_vm_async_complete_invoke( + iree_vm_async_invoke_state_t* state, iree_loop_t loop, + iree_status_t status); + +IREE_API_EXPORT iree_status_t iree_vm_async_invoke( + iree_loop_t loop, iree_vm_async_invoke_state_t* state, + iree_vm_context_t* context, iree_vm_function_t function, + iree_vm_invocation_flags_t flags, const iree_vm_invocation_policy_t* policy, + iree_vm_list_t* inputs, iree_vm_list_t* outputs, + iree_allocator_t host_allocator, + iree_vm_async_invoke_callback_fn_t callback, void* user_data) { + IREE_ASSERT_ARGUMENT(state); + IREE_ASSERT_ARGUMENT(context); + IREE_TRACE_ZONE_BEGIN(z0); + + // Initialize to the pre-begin state. + state->begin_params.context = context; + iree_vm_context_retain(context); + state->begin_params.function = function; + state->begin_params.flags = flags; + state->begin_params.policy = policy; + state->begin_params.inputs = inputs; + iree_vm_list_retain(inputs); + state->deadline_ns = IREE_TIME_INFINITE_FUTURE; + state->host_allocator = host_allocator; + state->outputs = outputs; + iree_vm_list_retain(outputs); + state->callback = callback; + state->user_data = user_data; + + // Launch the invocation; if this fails we'll need to cleanup the state we've + // already initialized. + // NOTE: based on the loop type THIS MAY COMPLETE THE INVOCATION IMMEDIATELY. + iree_status_t status = iree_loop_call(loop, IREE_LOOP_PRIORITY_DEFAULT, + iree_vm_async_begin_invoke, state); + if (!iree_status_is_ok(status)) { + iree_vm_list_release(state->outputs); + iree_vm_list_release(state->begin_params.inputs); + iree_vm_context_release(state->begin_params.context); + } + + IREE_TRACE_ZONE_END(z0); + return status; +} + +// Begins the invocation from the first loop callback. +// The begin_params on the state will have everything we need to initialize the +// call but since we alias with the base invocation state we must be sure to +// copy out the args first. +// +// Note that |status| may indicate a failure already, such as if the loop +// aborted. In that case we need to clean up the state before issuing the user +// callback so they can do the same. +static iree_status_t iree_vm_async_begin_invoke(void* user_data, + iree_loop_t loop, + iree_status_t loop_status) { + IREE_TRACE_ZONE_BEGIN(z0); + iree_vm_async_invoke_state_t* state = + (iree_vm_async_invoke_state_t*)user_data; + + // Check to see if the loop has failed before we even begin. + if (IREE_UNLIKELY(!iree_status_is_ok(loop_status))) { + // We release our retained resources because we don't guarantee they live to + // the callback. This allows callbacks to reuse memory. + iree_vm_list_release(state->outputs); + iree_vm_list_release(state->begin_params.inputs); + iree_vm_context_release(state->begin_params.context); + + // Issue user callback notifying them of the failure and pass along the loop + // status; this is likely something like IREE_STATUS_ABORTED. + iree_status_t callback_status = + state->callback(state->user_data, loop, loop_status, NULL); + IREE_TRACE_ZONE_END(z0); + return callback_status; + } + + // Pull fields locally so that we can reuse the aliased storage. + // Note that we have ownership of all these and must release them if we fail + // to begin the invocation. + iree_vm_context_t* context = state->begin_params.context; + iree_vm_function_t function = state->begin_params.function; + iree_vm_invocation_flags_t flags = state->begin_params.flags; + const iree_vm_invocation_policy_t* policy = state->begin_params.policy; + iree_vm_list_t* inputs = state->begin_params.inputs; + + // Allocate an invocation ID for tracing. + IREE_TRACE({ + state->invocation_id = + iree_any_bit_set(flags, IREE_VM_INVOCATION_FLAG_TRACE_INLINE) + ? 0 + : iree_vm_invoke_allocate_id(context, &function); + }); + + // Try to begin the invocation. This may fail if the parameters are invalid. + // It may also complete inline if the entire invocation can be handled without + // blocking (in which case begin_status is OK). + IREE_TRACE(iree_vm_invoke_fiber_enter(state->invocation_id)); + iree_status_t status = + iree_vm_begin_invoke(&state->base, context, function, flags, policy, + inputs, state->host_allocator); + if (iree_status_is_ok(status) || iree_status_is_deferred(status)) { + // Ownership transferred. + iree_vm_list_release(inputs); + inputs = NULL; + iree_vm_context_release(context); + context = NULL; + } + if (iree_status_is_deferred(status)) { + IREE_TRACE({ + iree_vm_invoke_fiber_leave(state->invocation_id, state->base.stack); + }); + // Deferred until a wait completes or the next tick. + status = iree_vm_async_tick_invoke(state, loop); + } else if (iree_status_is_ok(status)) { + // Completed synchronously. This is the happy path and lets us complete the + // entire invocation in a single loop operation. + status = iree_vm_async_end_invoke(state, loop); + } else { + IREE_TRACE(iree_vm_invoke_fiber_leave(state->invocation_id, NULL)); + // Failed to begin the invocation; release resources and call back. + // We know the state wasn't fully initialized and don't need to clean it up. + iree_vm_list_release(state->outputs); + iree_vm_list_release(inputs); + iree_vm_context_release(context); + status = state->callback(state->user_data, loop, status, NULL); + IREE_TRACE_ZONE_END(z0); + return status; + } + + // If we began but failed to tick/end we need to propagate that to the user + // and clean up our state. + if (!iree_status_is_ok(status)) { + status = iree_vm_async_complete_invoke(state, loop, status); + } + IREE_TRACE_ZONE_END(z0); + return status; +} + +static iree_status_t iree_vm_async_resume_invoke(void* user_data, + iree_loop_t loop, + iree_status_t loop_status) { + IREE_TRACE_ZONE_BEGIN(z0); + iree_vm_async_invoke_state_t* state = + (iree_vm_async_invoke_state_t*)user_data; + + // Resume the invocation and execute the next step. + IREE_TRACE({ + iree_vm_invoke_fiber_reenter(state->invocation_id, state->base.stack); + }); + iree_status_t status = iree_vm_resume_invoke(&state->base); + if (iree_status_is_deferred(status)) { + IREE_TRACE({ + iree_vm_invoke_fiber_leave(state->invocation_id, state->base.stack); + }); + // Deferred on a wait or yield. Enqueue waits/a resume. + status = iree_vm_async_tick_invoke(state, loop); + } else if (iree_status_is_ok(status)) { + // Completed synchronously. + status = iree_vm_async_end_invoke(state, loop); + } else { + IREE_TRACE({ + iree_vm_invoke_fiber_leave(state->invocation_id, state->base.stack); + }); + } + + // If we failed to tick/end we need to propagate that to the user and clean up + // our state. + if (!iree_status_is_ok(status)) { + status = iree_vm_async_complete_invoke(state, loop, status); + } + IREE_TRACE_ZONE_END(z0); + return status; +} + +static iree_status_t iree_vm_async_wake_invoke(void* user_data, + iree_loop_t loop, + iree_status_t loop_status) { + IREE_TRACE_ZONE_BEGIN(z0); + iree_vm_async_invoke_state_t* state = + (iree_vm_async_invoke_state_t*)user_data; + + // If we were aborted then we need to tear everything down. + // TODO(benvanik): maybe allow the failures through to the target? It'd be + // impossible to tell when the loop was in an invalid state if we did. May + // need to rework the loop callback on waits so that we can differentiate. + if (iree_status_is_aborted(loop_status)) { + IREE_TRACE_ZONE_END(z0); + return iree_vm_async_complete_invoke(state, loop, loop_status); + } + + // The loop_status we receive here is the result of the wait operation and + // something we need to propagate to the waiter. + iree_vm_stack_frame_t* current_frame = + iree_vm_stack_current_frame(state->base.stack); + iree_vm_wait_frame_t* wait_frame = + (iree_vm_wait_frame_t*)iree_vm_stack_frame_storage(current_frame); + wait_frame->wait_status = loop_status; + + IREE_ASSERT(iree_status_is_deferred(state->base.status)); + iree_status_free(state->base.status); + state->base.status = iree_ok_status(); + + IREE_TRACE_ZONE_END(z0); + + // Resume the invocation and execute the next step. + // We do this inline instead of enqueuing a resume so that we avoid a needless + // operation in the loop. The invocation may immediately wait again and we + // want to keep the total wait-to-wait latency low. + return iree_vm_async_resume_invoke(user_data, loop, iree_ok_status()); +} + +static iree_status_t iree_vm_async_tick_invoke( + iree_vm_async_invoke_state_t* state, iree_loop_t loop) { + // Grab the wait frame from the stack holding the wait parameters. + // This is optional: if an invocation yields for cooperative scheduling + // purposes there will not be a wait frame on the stack and we'll just + // resume it below. + iree_vm_stack_frame_t* current_frame = + iree_vm_stack_current_frame(state->base.stack); + if (IREE_UNLIKELY(!current_frame)) { + // Unbalanced stack. + return iree_make_status(IREE_STATUS_FAILED_PRECONDITION, + "unbalanced stack after yield"); + } else if (current_frame->type == IREE_VM_STACK_FRAME_WAIT) { + // Wait on a wait source. + iree_vm_wait_frame_t* wait_frame = + (iree_vm_wait_frame_t*)iree_vm_stack_frame_storage(current_frame); + + // Combine the wait-invoke deadline with the one specified by the wait + // operation itself. This allows schedulers to timeslice waits without + // worrying whether user programs request to wait forever. + iree_timeout_t timeout = iree_make_deadline( + iree_min(state->deadline_ns, wait_frame->deadline_ns)); + switch (wait_frame->wait_type) { + default: + case IREE_VM_WAIT_UNTIL: + return iree_loop_wait_until(loop, timeout, iree_vm_async_wake_invoke, + state); + case IREE_VM_WAIT_ANY: + return iree_loop_wait_any(loop, wait_frame->count, + wait_frame->wait_sources, timeout, + iree_vm_async_wake_invoke, state); + case IREE_VM_WAIT_ALL: + return iree_loop_wait_all(loop, wait_frame->count, + wait_frame->wait_sources, timeout, + iree_vm_async_wake_invoke, state); + } + } else { + // Resume from a yield point (cooperative scheduling). + return iree_loop_call(loop, IREE_LOOP_PRIORITY_DEFAULT, + iree_vm_async_resume_invoke, state); + } +} + +static iree_status_t iree_vm_async_end_invoke( + iree_vm_async_invoke_state_t* state, iree_loop_t loop) { + // End the invocation and retrieve the results. + iree_status_t invoke_status = iree_ok_status(); + IREE_RETURN_IF_ERROR( + iree_vm_end_invoke(&state->base, state->outputs, &invoke_status)); + IREE_TRACE({ + // We leave the fiber before completing so that the callback is attributed + // to the thread running it instead. + iree_vm_invoke_fiber_leave(state->invocation_id, state->base.stack); + }); + return iree_vm_async_complete_invoke(state, loop, invoke_status); +} + +static iree_status_t iree_vm_async_complete_invoke( + iree_vm_async_invoke_state_t* state, iree_loop_t loop, + iree_status_t status) { + // Release all resources if we didn't already clean them up. + if (!iree_status_is_ok(status)) { + iree_vm_abort_invoke(&state->base); + iree_vm_list_release(state->outputs); + state->outputs = NULL; + } + + // Issue callback. + iree_vm_list_t* outputs = state->outputs; + return state->callback(state->user_data, loop, status, outputs); +}
diff --git a/runtime/src/iree/vm/invocation.h b/runtime/src/iree/vm/invocation.h index ca77590..3e4ecaa 100644 --- a/runtime/src/iree/vm/invocation.h +++ b/runtime/src/iree/vm/invocation.h
@@ -178,6 +178,118 @@ IREE_API_EXPORT void iree_vm_abort_invoke(iree_vm_invoke_state_t* state); //===----------------------------------------------------------------------===// +// Loop-based asynchronous invocation +//===----------------------------------------------------------------------===// + +typedef intptr_t iree_vm_invocation_id_t; + +// Callback notifying the caller of an iree_vm_async_invoke that the invocation +// has completed. If successful then |outputs| will contain the results and +// ownership is transferred to the callee. +// +// |status| contains either the result of the invocation process if it failed or +// the result of the invocation itself if it succeeded. +// +// This is executed from within a |loop| context and must not block. Handlers +// are encouraged to defer all processing to another loop operation in order to +// reduce stack utilization. +typedef iree_status_t(IREE_API_PTR* iree_vm_async_invoke_callback_fn_t)( + void* user_data, iree_loop_t loop, iree_status_t status, + iree_vm_list_t* outputs); + +// Storage for iree_vm_async_invoke state. +// This is intended to be embedded within higher-level invocation objects or on +// the heap. When possible (and outputs are provided) the async invocation will +// not allocate any additional memory. +typedef struct iree_vm_async_invoke_state_t { + // Until we begin the invocation we don't need the state storage so we use + // that memory to store the parameters we'll need to begin. Kind of shady, but + // saves some ~128 bytes. Hooray C! 🥴 + union { + struct { + // Retains the context the invocation is running within. + iree_vm_context_t* context; + // Target function. + iree_vm_function_t function; + // Flags controlling invocation behavior. + iree_vm_invocation_flags_t flags; + // TBD. + const iree_vm_invocation_policy_t* policy; + // Optional input storage list used to call the target function. + // Released after the function is entered. + iree_vm_list_t* inputs; + } begin_params; + // Base invoke state used to store the VM stack and resources. + iree_vm_invoke_state_t base; + }; + // ID used for fiber tracing; either unique to the invocation or the context + // based on the context concurrency mode. + iree_vm_invocation_id_t invocation_id; + // TBD: deadline for when the invocation will be aborted. + iree_time_t deadline_ns; + // Allocator used for transient allocations required during invocation. + // If an arena it must remain valid for the duration of the invocation. + iree_allocator_t host_allocator; + // Optional preallocated output storage list that will receive the results. + iree_vm_list_t* outputs; + // Callback issued when the invocation completes. + iree_vm_async_invoke_callback_fn_t callback; + void* user_data; +} iree_vm_async_invoke_state_t; + +// Asynchronously invokes |function| in |context| on the given |loop|. +// The call will return immediately with the invocation pending on the loop. +// Note that the |callback| may be issued before this function returns (such as +// when using an inline loop or one running on another thread). +// +// |state| is opaque storage that must remain live until the callback is issued. +// Callers should either allocate this from the heap to then free in the +// callback or embed the storage within their higher-level invocation data +// structures. +// +// |inputs| will be retained until no longer needed by the invocation and should +// generally not be modified until the callback is issued. |outputs| will be +// retained until the callback is made and must be released by the callback. +// +// The |callback| will receive |user_data| and is guaranteed to be called even +// if the invocation fails due to an internal error. If the loop is aborted due +// to a propagated scope failure the status passed to the callback will be +// IREE_STATUS_ABORTED and no new work can be scheduled to the provided loop. +// +// Multiple invocations to the same context are only allowed to overlap if the +// context was created with the IREE_VM_CONTEXT_FLAG_CONCURRENT flag set. +// +// Usage: +// iree_vm_async_invoke_state_t* state = malloc(...); +// iree_vm_async_invoke( +// loop, // loop to run in +// state, // state storage, must live until callback +// ...function..., // target function +// inputs, outputs, ..., // input values and output storage (if needed) +// callback, state); // user callback and user_data +// ... +// iree_status_t callback( +// void* user_data, // as passed to iree_vm_async_invoke +// iree_loop_t loop, // if needing to schedule continuations +// iree_status_t status, // result of invocation +// iree_vm_list_t* outputs) { // retained output storage w/ result values +// if (iree_status_is_ok(status)) { +// // completed successfully! process outputs: +// do_something_with_outputs(outputs); +// } +// iree_vm_list_release(outputs); // must be released! +// free(user_data); // in this example the state storage +// return iree_ok_status(); // result propagated to loop scope +// } +IREE_API_EXPORT iree_status_t iree_vm_async_invoke( + iree_loop_t loop, iree_vm_async_invoke_state_t* state, + iree_vm_context_t* context, iree_vm_function_t function, + iree_vm_invocation_flags_t flags, const iree_vm_invocation_policy_t* policy, + iree_vm_list_t* inputs, iree_vm_list_t* outputs, + iree_allocator_t host_allocator, + iree_vm_async_invoke_callback_fn_t callback, void* user_data); + +//===----------------------------------------------------------------------===// // Asynchronous stateful invocation //===----------------------------------------------------------------------===//
diff --git a/runtime/src/iree/vm/stack.c b/runtime/src/iree/vm/stack.c index 82decc4..a805a7b 100644 --- a/runtime/src/iree/vm/stack.c +++ b/runtime/src/iree/vm/stack.c
@@ -580,6 +580,9 @@ IREE_TRACE({ frame_header->trace_zone = iree_vm_stack_trace_function_zone_begin(frame_type, function); + if (frame_header->trace_zone) { + IREE_TRACE_ZONE_APPEND_VALUE(frame_header->trace_zone, (uint64_t)stack); + } }); if (out_callee_frame) *out_callee_frame = callee_frame; @@ -719,30 +722,37 @@ } static void iree_vm_stack_resume_trace_zones_recursive( - iree_vm_stack_frame_header_t* frame_header) { + iree_vm_stack_t* stack, iree_vm_stack_frame_header_t* frame_header) { if (frame_header->parent) { // To get bottom->top ordering we recurse into parent frames first. - iree_vm_stack_resume_trace_zones_recursive(frame_header->parent); + iree_vm_stack_resume_trace_zones_recursive(stack, frame_header->parent); } + IREE_ASSERT_EQ(frame_header->trace_zone, 0); if (frame_header->frame.type == IREE_VM_STACK_FRAME_WAIT) { iree_vm_wait_frame_t* wait_frame = (iree_vm_wait_frame_t*)iree_vm_stack_frame_storage( &frame_header->frame); + // TODO(benvanik): find a good way to recover the wait zone; for now we just + // mark it as "?". IREE_ASSERT_EQ(wait_frame->trace_zone, 0); - wait_frame->trace_zone = iree_vm_stack_trace_wait_zone_begin( + IREE_TRACE_ZONE_BEGIN_NAMED(z0, "iree_vm_stack_wait_recover_?"); + wait_frame->trace_zone = z0; + frame_header->trace_zone = iree_vm_stack_trace_wait_zone_begin( wait_frame->wait_type, wait_frame->count); } else { - IREE_ASSERT_EQ(frame_header->trace_zone, 0); frame_header->trace_zone = iree_vm_stack_trace_function_zone_begin( frame_header->frame.type, &frame_header->frame.function); + if (frame_header->trace_zone) { + IREE_TRACE_ZONE_APPEND_VALUE(frame_header->trace_zone, (uint64_t)stack); + } } } IREE_API_EXPORT void iree_vm_stack_resume_trace_zones(iree_vm_stack_t* stack) { // Walking the stack bottom->top only happens in this case and it's not worth // storing additional metadata in order to make it efficient. if (stack->top) { - iree_vm_stack_resume_trace_zones_recursive(stack->top); + iree_vm_stack_resume_trace_zones_recursive(stack, stack->top); } }
diff --git a/runtime/src/iree/vm/stack.h b/runtime/src/iree/vm/stack.h index e8840b6..bb77123 100644 --- a/runtime/src/iree/vm/stack.h +++ b/runtime/src/iree/vm/stack.h
@@ -49,6 +49,10 @@ // functionality is available; specifically: // -DIREE_VM_EXECUTION_TRACING_ENABLE=1 IREE_VM_INVOCATION_FLAG_TRACE_EXECUTION = 1u << 0, + + // Attributes invocation timings to the caller instead of a context or + // invocation-specific fiber. + IREE_VM_INVOCATION_FLAG_TRACE_INLINE = 1u << 1, }; typedef uint32_t iree_vm_invocation_flags_t;