blob: 5ea5b9cc2a4f6c3b9fae7a12beb5b85dc532afab [file] [log] [blame]
// Copyright 2019 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include "iree/vm/stack.h"
#include <assert.h>
#include <inttypes.h>
#include <stdbool.h>
#include <string.h>
#include "iree/base/alignment.h"
#include "iree/base/api.h"
#include "iree/base/tracing.h"
#include "iree/vm/module.h"
#ifndef NDEBUG
#define VMCHECK(expr) assert(expr)
#else
#define VMCHECK(expr)
#endif // NDEBUG
//===----------------------------------------------------------------------===//
// Stack implementation
//===----------------------------------------------------------------------===//
//
// The stack is (currently) designed to contain enough information to allow us
// to build some nice debugging tools. This means that we try hard to preserve
// all information needed for complete and precise stack dumps as well as
// allowing inspection of both current and previous stack frame registers.
// In the future we may want to toggle these modes such that registers, for
// example, are hidden by the module implementations to allow for more
// optimization opportunity but as a whole we tradeoff minimal memory
// consumption for flexibility and debugging. Given that a single activation
// tensor will usually dwarf the entire size of the stack used for an invocation
// it's generally acceptable :)
//
// Stack frames and storage
// ------------------------
// Frames are stored as a linked list of iree_vm_stack_frame_header_t's
// containing the API-visible stack frame information (such as which function
// the frame is in and it's program counter) and the storage for registers used
// by the frame. As all operations including stack dumps only ever need to
// enumerate the frames in storage order there's no need to be able to randomly
// index into them and the linked list combined with dynamic stack growth gives
// us (practically) unlimited stack depth.
//
// [iree_vm_stack_t]
// +- top -------> [frame 3 header] [registers] ---+
// |
// +--- [frame 2 header] [registers] <--+
// |
// +--> [frame 1 header] [registers] ---+
// |
// NULL <--- [frame 0 header] [registers] <--+
//
// To allow for static stack allocation and make allocating the VM stack on the
// host stack or within an existing data structure the entire stack, including
// all frame storage, can be placed into an existing allocation. This is similar
// to inlined vectors/etc where some storage is available directly in the object
// and only when exceeded will it switch to a dynamic allocation.
//
// Dynamic stack growth
// --------------------
// Though most of the stacks we deal with are rather shallow due to aggressive
// inlining in the compiler it's still possible to spill any reasonably-sized
// static storage allocation. This can be especially true in modules compiled
// with optimizations disabled; for example the debug register allocator may
// expand the required register count for a function from 30 to 3000.
//
// To support these cases the stack can optionally be provided an allocator to
// enable it to grow the stack when the initial storage is exhausted. As we
// store pointers to the stack storage within the storage itself (such as the
// iree_vm_registers_t pointers) this means we need to perform a fixup step
// during reallocation to ensure they are all updated. This also means that the
// pointers to the stack frames are possibly invalidated on every function
// entry and that users of the stack cannot rely on pointer stability during
// execution.
//
// Calling convention
// ------------------
// Callers provide an arguments buffer and results buffer sized appropriately
// for the call and with the arguments buffer populated. Callees will push
// their new stack frame, copy or move the arguments from the caller buffer into
// the callee frame, and then begin execution. Upon return the callee function
// will move return values to the result buffer and pop their stack frame.
//
// By making the actual stack frame setup and teardown callee-controlled we can
// have optimized implementations that treat register storage differently across
// various frames. For example, native modules that store their registers in
// host-machine specific registers can marshal the caller registers in/out of
// the host registers (or stack/etc) without exposing the actual implementation
// to the caller.
//
// Calling into the VM
// -------------------
// Calls from external code into the VM such as via iree_vm_invoke reuse the
// same calling convention as internal-to-internal calls: callees load arguments
// from the caller frame and store results into the caller frame.
//
// Marshaling arguments is easy given that the caller controls these and we can
// trivially map the ordered set of argument types into the VM calling
// convention buffers.
//
// A side-effect (beyond code reuse) is that ref types are retained by the VM
// for the entire lifetime they may be accessible by VM routines. This lets us
// get rich stack traces without needing to hook into external code and lets us
// timeshift via coroutines where we may otherwise not know when the external
// caller will resume a yielded call and actually read back the results.
//
// The overhead of this marshaling is minimal as external functions can always
// use move semantics on the ref objects. Since we are reusing the normal VM
// code paths which are likely still in instruction cache the bulk of the work
// amounts to some small memcpys.
// Multiplier on the capacity of the stack frame storage when growing.
// Since we never shrink stacks it's nice to keep this relative low. If we
// measure a lot of growth happening in normal models we should increase this
// but otherwise leave as small as we can to avoid overallocation.
#define IREE_VM_STACK_GROWTH_FACTOR 2
// A private stack frame header that allows us to walk the linked list of
// frames without exposing their exact structure through the API. This makes it
// easier for us to add/version additional information or hide implementation
// details.
typedef struct iree_vm_stack_frame_header_t {
// Size, in bytes, of the frame header and frame payload including registers.
// Adding this value to the base header pointer will yield the next available
// memory location. Ensure that it does not exceed the total
// frame_storage_capacity.
iree_host_size_t frame_size;
// Pointer to the parent stack frame, usually immediately preceding this one
// in the frame storage. May be NULL.
struct iree_vm_stack_frame_header_t* parent;
// Stack frame type used to determine which fields are valid.
iree_vm_stack_frame_type_t type;
// Size, in bytes, of the additional stack frame data that follows the frame.
iree_host_size_t data_size;
// Function called when the stack frame is left.
iree_vm_stack_frame_cleanup_fn_t frame_cleanup_fn;
// Actual stack frame as visible through the API.
// The registers within the frame will (likely) point to addresses immediately
// following this header in memory.
iree_vm_stack_frame_t frame;
} iree_vm_stack_frame_header_t;
// Core stack storage. This will be mapped either into dynamic memory allocated
// by the member allocator or static memory allocated externally. Static stacks
// cannot grow when storage runs out while dynamic ones will resize their stack.
struct iree_vm_stack_t {
// NOTE: to get better cache hit rates we put the most frequently accessed
// members first.
// Pointer to the current top of the stack.
// This can be used to walk the stack from top to bottom by following the
// |parent| pointers. Note that these pointers are invalidated each time the
// stack grows (if dynamic growth is enabled) and all of the frames will need
// updating.
iree_vm_stack_frame_header_t* top;
// Base pointer to stack storage.
// For statically-allocated stacks this will (likely) point to immediately
// after the iree_vm_stack_t in memory. For dynamically-allocated stacks this
// will (likely) point to heap memory.
iree_host_size_t frame_storage_capacity;
iree_host_size_t frame_storage_size;
void* frame_storage;
// Flags controlling the behavior of the invocation owning this stack.
iree_vm_invocation_flags_t flags;
// True if the stack owns the frame_storage and should free it when it is no
// longer required. Host stack-allocated stacks don't own their storage but
// may transition to owning it on dynamic growth.
bool owns_frame_storage;
// Resolves a module to a module state within a context.
// This will be called on function entry whenever module transitions occur.
iree_vm_state_resolver_t state_resolver;
// Allocator used for dynamic stack allocations. May be the null allocator
// if growth is prohibited.
iree_allocator_t allocator;
};
//===----------------------------------------------------------------------===//
// Stack implementation
//===----------------------------------------------------------------------===//
IREE_API_EXPORT iree_status_t iree_vm_stack_initialize(
iree_byte_span_t storage, iree_vm_invocation_flags_t flags,
iree_vm_state_resolver_t state_resolver, iree_allocator_t allocator,
iree_vm_stack_t** out_stack) {
IREE_ASSERT_ARGUMENT(out_stack);
*out_stack = NULL;
if (storage.data_length < IREE_VM_STACK_MIN_SIZE) {
return iree_make_status(
IREE_STATUS_INVALID_ARGUMENT,
"stack storage under minimum required amount: %zu < %d",
storage.data_length, IREE_VM_STACK_MIN_SIZE);
}
IREE_TRACE_ZONE_BEGIN(z0);
iree_vm_stack_t* stack = (iree_vm_stack_t*)storage.data;
memset(stack, 0, sizeof(iree_vm_stack_t));
stack->owns_frame_storage = false;
stack->flags = flags;
stack->state_resolver = state_resolver;
stack->allocator = allocator;
iree_host_size_t storage_offset =
iree_host_align(sizeof(iree_vm_stack_t), 16);
stack->frame_storage_capacity = storage.data_length - storage_offset;
stack->frame_storage_size = 0;
stack->frame_storage = storage.data + storage_offset;
stack->top = NULL;
*out_stack = stack;
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
IREE_API_EXPORT void iree_vm_stack_deinitialize(iree_vm_stack_t* stack) {
IREE_TRACE_ZONE_BEGIN(z0);
while (stack->top) {
iree_status_ignore(iree_vm_stack_function_leave(stack));
}
if (stack->owns_frame_storage) {
iree_allocator_free(stack->allocator, stack->frame_storage);
}
IREE_TRACE_ZONE_END(z0);
}
IREE_API_EXPORT iree_status_t iree_vm_stack_allocate(
iree_vm_invocation_flags_t flags, iree_vm_state_resolver_t state_resolver,
iree_allocator_t allocator, iree_vm_stack_t** out_stack) {
IREE_TRACE_ZONE_BEGIN(z0);
*out_stack = NULL;
iree_host_size_t storage_size = IREE_VM_STACK_DEFAULT_SIZE;
void* storage = NULL;
iree_status_t status =
iree_allocator_malloc(allocator, storage_size, &storage);
iree_vm_stack_t* stack = NULL;
if (iree_status_is_ok(status)) {
iree_byte_span_t storage_span = iree_make_byte_span(storage, storage_size);
status = iree_vm_stack_initialize(storage_span, flags, state_resolver,
allocator, &stack);
}
*out_stack = stack;
IREE_TRACE_ZONE_END(z0);
return status;
}
IREE_API_EXPORT void iree_vm_stack_free(iree_vm_stack_t* stack) {
IREE_TRACE_ZONE_BEGIN(z0);
iree_allocator_t allocator = stack->allocator;
void* storage = (void*)stack;
iree_vm_stack_deinitialize(stack);
iree_allocator_free(allocator, storage);
IREE_TRACE_ZONE_END(z0);
}
IREE_API_EXPORT iree_vm_invocation_flags_t
iree_vm_stack_invocation_flags(const iree_vm_stack_t* stack) {
return stack->flags;
}
IREE_API_EXPORT iree_vm_stack_frame_t* iree_vm_stack_current_frame(
iree_vm_stack_t* stack) {
return stack->top ? &stack->top->frame : NULL;
}
IREE_API_EXPORT iree_vm_stack_frame_t* iree_vm_stack_parent_frame(
iree_vm_stack_t* stack) {
if (!stack->top) return NULL;
iree_vm_stack_frame_header_t* parent_header = stack->top->parent;
return parent_header ? &parent_header->frame : NULL;
}
IREE_API_EXPORT iree_status_t iree_vm_stack_query_module_state(
iree_vm_stack_t* stack, iree_vm_module_t* module,
iree_vm_module_state_t** out_module_state) {
return stack->state_resolver.query_module_state(stack->state_resolver.self,
module, out_module_state);
}
// Attempts to grow the stack store to hold at least |minimum_capacity|.
// Pointers to existing stack frames will be invalidated and any pointers
// embedded in the stack frame data structures will be updated.
// Fails if dynamic stack growth is disabled or the allocator is OOM.
static iree_status_t iree_vm_stack_grow(iree_vm_stack_t* stack,
iree_host_size_t minimum_capacity) {
if (IREE_UNLIKELY(stack->allocator.ctl == NULL)) {
return iree_make_status(
IREE_STATUS_RESOURCE_EXHAUSTED,
"stack initialized on the host stack and cannot grow");
}
// Ensure we grow at least as much as required.
iree_host_size_t new_capacity = stack->frame_storage_capacity;
do {
new_capacity *= IREE_VM_STACK_GROWTH_FACTOR;
} while (new_capacity < minimum_capacity);
if (new_capacity > IREE_VM_STACK_MAX_SIZE) {
return iree_make_status(
IREE_STATUS_RESOURCE_EXHAUSTED,
"new stack size would exceed maximum size: %zu > %d", new_capacity,
IREE_VM_STACK_MAX_SIZE);
}
IREE_TRACE_ZONE_BEGIN(z0);
// Reallocate the frame storage. 99.9999% chance the new storage pointer will
// differ and we'll need to fix up pointers so we just always do that.
void* old_storage = stack->frame_storage;
void* new_storage = stack->frame_storage;
iree_status_t status;
if (stack->owns_frame_storage) {
// We own the storage already likely from a previous growth operation.
status =
iree_allocator_realloc(stack->allocator, new_capacity, &new_storage);
} else {
// We don't own the original storage so we are going to switch to our own
// newly-allocated storage instead. We need to make sure we copy over the
// existing stack contents.
status =
iree_allocator_malloc(stack->allocator, new_capacity, &new_storage);
if (iree_status_is_ok(status)) {
memcpy(new_storage, old_storage, stack->frame_storage_capacity);
}
}
if (!iree_status_is_ok(status)) {
IREE_TRACE_ZONE_END(z0);
return status;
}
stack->frame_storage = new_storage;
stack->frame_storage_capacity = new_capacity;
stack->owns_frame_storage = true;
#define REBASE_POINTER(type, ptr, old_base, new_base) \
if (ptr) { \
(ptr) = (type)(((uintptr_t)(ptr) - (uintptr_t)(old_base)) + \
(uintptr_t)(new_base)); \
}
// Fixup embedded stack frame pointers.
REBASE_POINTER(iree_vm_stack_frame_header_t*, stack->top, old_storage,
new_storage);
iree_vm_stack_frame_header_t* frame_header = stack->top;
while (frame_header != NULL) {
REBASE_POINTER(iree_vm_stack_frame_header_t*, frame_header->parent,
old_storage, new_storage);
frame_header = frame_header->parent;
}
IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}
IREE_API_EXPORT iree_status_t iree_vm_stack_function_enter(
iree_vm_stack_t* stack, const iree_vm_function_t* function,
iree_vm_stack_frame_type_t frame_type, iree_host_size_t frame_size,
iree_vm_stack_frame_cleanup_fn_t frame_cleanup_fn,
iree_vm_stack_frame_t** out_callee_frame) {
if (out_callee_frame) *out_callee_frame = NULL;
// Allocate stack space and grow stack, if required.
iree_host_size_t header_size = sizeof(iree_vm_stack_frame_header_t);
iree_host_size_t new_top =
stack->frame_storage_size + header_size + frame_size;
if (IREE_UNLIKELY(new_top > stack->frame_storage_capacity)) {
IREE_RETURN_IF_ERROR(iree_vm_stack_grow(stack, new_top));
}
// Try to reuse the same module state if the caller and callee are from the
// same module. Otherwise, query the state from the registered handler.
iree_vm_stack_frame_header_t* caller_frame_header = stack->top;
iree_vm_stack_frame_t* caller_frame =
caller_frame_header ? &caller_frame_header->frame : NULL;
iree_vm_module_state_t* module_state = NULL;
if (caller_frame && caller_frame->function.module == function->module) {
module_state = caller_frame->module_state;
} else if (function->module != NULL) {
IREE_RETURN_IF_ERROR(stack->state_resolver.query_module_state(
stack->state_resolver.self, function->module, &module_state));
}
// Bump pointer and get real stack pointer offsets.
iree_vm_stack_frame_header_t* frame_header =
(iree_vm_stack_frame_header_t*)((uintptr_t)stack->frame_storage +
stack->frame_storage_size);
memset(frame_header, 0, header_size + frame_size);
frame_header->frame_size = header_size + frame_size;
frame_header->parent = stack->top;
frame_header->type = frame_type;
frame_header->data_size = frame_size;
frame_header->frame_cleanup_fn = frame_cleanup_fn;
iree_vm_stack_frame_t* callee_frame = &frame_header->frame;
callee_frame->function = *function;
callee_frame->module_state = module_state;
callee_frame->pc = 0;
callee_frame->depth = caller_frame ? caller_frame->depth + 1 : 0;
stack->frame_storage_size = new_top;
stack->top = frame_header;
IREE_TRACE({
if (frame_type != IREE_VM_STACK_FRAME_NATIVE) {
// TODO(benvanik): cache source location and query from module.
iree_string_view_t function_name = iree_vm_function_name(function);
IREE_TRACE_ZONE_BEGIN_NAMED_DYNAMIC(z0, function_name.data,
function_name.size);
callee_frame->trace_zone = z0;
if (frame_size) {
IREE_TRACE_ZONE_APPEND_VALUE(z0, frame_size);
}
}
});
if (out_callee_frame) *out_callee_frame = callee_frame;
return iree_ok_status();
}
IREE_API_EXPORT iree_status_t
iree_vm_stack_function_leave(iree_vm_stack_t* stack) {
if (IREE_UNLIKELY(!stack->top)) {
return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
"unbalanced stack leave");
}
// Call (optional) frame storage cleanup function.
if (stack->top->frame_cleanup_fn) {
stack->top->frame_cleanup_fn(&stack->top->frame);
}
IREE_TRACE({
if (stack->top->frame.trace_zone) {
IREE_TRACE_ZONE_END(stack->top->frame.trace_zone);
}
});
// Restore the frame pointer to the caller.
stack->frame_storage_size -= stack->top->frame_size;
stack->top = stack->top->parent;
return iree_ok_status();
}
IREE_API_EXPORT iree_status_t iree_vm_stack_format_backtrace(
iree_vm_stack_t* stack, iree_string_builder_t* builder) {
for (iree_vm_stack_frame_header_t* frame = stack->top; frame != NULL;
frame = frame->parent) {
// Stack frame prefix.
const char* type_str;
switch (frame->type) {
default:
type_str = "??";
break;
case IREE_VM_STACK_FRAME_EXTERNAL:
type_str = "external";
break;
case IREE_VM_STACK_FRAME_NATIVE:
type_str = "native";
break;
case IREE_VM_STACK_FRAME_BYTECODE:
type_str = "bytecode";
break;
}
IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
builder, "\n[%*" PRId32 "] %*s ", 2, frame->frame.depth, 8, type_str));
// Common module/function name and PC.
iree_string_view_t module_name =
iree_vm_module_name(frame->frame.function.module);
iree_string_view_t function_name =
iree_vm_function_name(&frame->frame.function);
if (iree_string_view_is_empty(function_name)) {
IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
builder, "%.*s@%d", (int)module_name.size, module_name.data,
(int)frame->frame.function.ordinal));
} else {
IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
builder, "%.*s.%.*s", (int)module_name.size, module_name.data,
(int)function_name.size, function_name.data));
}
IREE_RETURN_IF_ERROR(iree_string_builder_append_format(
builder, ":%" PRIu64 " ", (uint64_t)frame->frame.pc));
iree_vm_module_t* module = frame->frame.function.module;
iree_vm_source_location_t source_location;
iree_status_t status = iree_vm_module_resolve_source_location(
module, &frame->frame, &source_location);
if (iree_status_is_ok(status)) {
status = iree_vm_source_location_format(
&source_location, IREE_VM_SOURCE_LOCATION_FORMAT_FLAG_NONE, builder);
}
if (iree_status_is_unavailable(status)) {
// TODO(benvanik): if this is an import/export we can get that name.
IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(builder, "-"));
} else if (!iree_status_is_ok(status)) {
return status;
}
}
return iree_ok_status();
}
IREE_API_EXPORT iree_status_t iree_vm_stack_annotate_backtrace(
iree_vm_stack_t* stack, iree_status_t base_status) {
iree_string_builder_t builder;
iree_string_builder_initialize(stack->allocator, &builder);
iree_status_t status = iree_vm_stack_format_backtrace(stack, &builder);
if (iree_status_is_ok(status)) {
// TODO(benvanik): don't duplicate the buffer here - we should be attaching
// a payload but that requires additional plumbing.
status = iree_status_annotate_f(base_status, "%.*s",
(int)iree_string_builder_size(&builder),
iree_string_builder_buffer(&builder));
}
iree_string_builder_deinitialize(&builder);
return status;
}