blob: aa4a3c59af49b4246d2e3a2dd031fbb6d910ad0f [file] [log] [blame]
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "bindings/tflite/interpreter.h"
#include "bindings/tflite/model.h"
#include "bindings/tflite/shim.h"
#include "bindings/tflite/tensor.h"
#include "iree/base/internal/call_once.h"
#include "iree/base/tracing.h"
#include "iree/hal/drivers/init.h"
#include "iree/modules/hal/hal_module.h"
//===----------------------------------------------------------------------===//
// HAL / driver support
//===----------------------------------------------------------------------===//
static iree_once_flag _TfLiteInterpreterRegisterDriverFlag =
IREE_ONCE_FLAG_INIT;
static void _TfLiteInterpreterRegisterDrivers(void) {
IREE_IGNORE_ERROR(iree_hal_register_all_available_drivers(
iree_hal_driver_registry_default()));
}
// TODO(#3977): if already provided a HAL device in the options use that.
static iree_status_t _TfLiteInterpreterPrepareHAL(
TfLiteInterpreter* interpreter) {
iree_call_once(&_TfLiteInterpreterRegisterDriverFlag,
_TfLiteInterpreterRegisterDrivers);
iree_hal_driver_registry_t* driver_registry =
iree_hal_driver_registry_default();
iree_hal_driver_info_t* driver_infos = NULL;
iree_host_size_t driver_info_count = 0;
IREE_RETURN_IF_ERROR(iree_hal_driver_registry_enumerate(
driver_registry, interpreter->allocator, &driver_infos,
&driver_info_count));
// TODO(benvanik): figure out how we want to emulate device selection; may
// just say "whatever is first" on a query.
// iree_string_view_t driver_name = driver_infos[0].driver_name;
// NOTE: currently the sample file is compiled only with vmvx.
iree_string_view_t driver_name = iree_make_cstring_view("vmvx");
// TODO(benvanik): switch to iree_hal_driver_registry_try_create when
// implemented.
iree_status_t status = iree_hal_driver_registry_try_create_by_name(
driver_registry, driver_name, interpreter->allocator,
&interpreter->driver);
iree_allocator_free(interpreter->allocator, driver_infos);
IREE_RETURN_IF_ERROR(status, "failed to create driver '%.*s'",
(int)driver_name.size, driver_name.data);
IREE_RETURN_IF_ERROR(
iree_hal_driver_create_default_device(
interpreter->driver, interpreter->allocator, &interpreter->device),
"failed creating the default device for driver '%.*s'",
(int)driver_name.size, driver_name.data);
IREE_RETURN_IF_ERROR(iree_hal_module_create(
interpreter->device, interpreter->allocator, &interpreter->hal_module));
return iree_ok_status();
}
//===----------------------------------------------------------------------===//
// Model shape function query/mutation utilities
//===----------------------------------------------------------------------===//
// On-stack storage for shape function invocations.
// Avoids all allocations and allows for reuse when running down lists of
// inputs and outputs calling shape functions.
typedef struct {
// Inlined list for the !vm.list in the shape function arguments.
uint8_t
shape_list_storage[128 + sizeof(int32_t) * IREE_BINDINGS_TFLITE_MAX_RANK];
iree_vm_list_t* shape_list;
// Inlined list for the shape function arguments.
uint8_t arg_list_storage[128 + sizeof(uintptr_t) * 2];
iree_vm_list_t* arg_list;
} _TfLiteInterpreterShapeFrame;
// Initializes an on-stack shape frame. Existing contents are discarded.
static iree_status_t _TfLiteInterpreterShapeFrameInitialize(
_TfLiteInterpreterShapeFrame* frame) {
// [int32...] storage for the shape dimension inputs/outputs.
iree_vm_type_def_t dim_type =
iree_vm_type_def_make_value_type(IREE_VM_VALUE_TYPE_I32);
IREE_RETURN_IF_ERROR(iree_vm_list_initialize(
iree_make_byte_span(frame->shape_list_storage,
IREE_ARRAYSIZE(frame->shape_list_storage)),
&dim_type, IREE_BINDINGS_TFLITE_MAX_RANK, &frame->shape_list));
// (%index : i32, %shape : !vm.list<i32>)
IREE_RETURN_IF_ERROR(iree_vm_list_initialize(
iree_make_byte_span(frame->arg_list_storage,
IREE_ARRAYSIZE(frame->arg_list_storage)),
/*element_type=*/NULL, /*index*/ 1 + /*shape*/ 1, &frame->arg_list));
IREE_RETURN_IF_ERROR(iree_vm_list_resize(frame->arg_list, 2));
// Arg 1 is always the shape list for all I/O, so do that once here.
iree_vm_ref_t shape_list_ref = {0};
IREE_RETURN_IF_ERROR(iree_vm_ref_wrap_assign(
frame->shape_list, iree_vm_list_type_id(), &shape_list_ref));
IREE_RETURN_IF_ERROR(
iree_vm_list_set_ref_retain(frame->arg_list, 1, &shape_list_ref));
return iree_ok_status();
}
// Deinitializes an on-stack shape frame.
// Though this does not free the frame memory (it's on the stack, afterall) it
// will release any resources that may be retained and is required.
static void _TfLiteInterpreterShapeFrameDeinitialize(
_TfLiteInterpreterShapeFrame* frame) {
iree_vm_list_deinitialize(frame->arg_list);
iree_vm_list_deinitialize(frame->shape_list);
}
// Reads the shape value in the frame storage from the prior application.
static iree_status_t _TfLiteInterpreterShapeFrameReadValue(
_TfLiteInterpreterShapeFrame* frame, int32_t* out_shape_rank,
int32_t* out_shape_dims) {
*out_shape_rank = (int32_t)iree_vm_list_size(frame->shape_list);
for (int32_t i = 0; i < *out_shape_rank; ++i) {
iree_vm_value_t dim;
IREE_RETURN_IF_ERROR(iree_vm_list_get_value_as(
frame->shape_list, i, IREE_VM_VALUE_TYPE_I32, &dim));
out_shape_dims[i] = dim.i32;
}
return iree_ok_status();
}
// Writes the shape value to the current frame storage for future applications.
static iree_status_t _TfLiteInterpreterShapeFrameWriteValue(
_TfLiteInterpreterShapeFrame* frame, int32_t shape_rank,
const int32_t* shape_dims) {
IREE_RETURN_IF_ERROR(iree_vm_list_resize(frame->shape_list, shape_rank));
for (int32_t i = 0; i < shape_rank; ++i) {
iree_vm_value_t dim = iree_vm_value_make_i32(shape_dims[i]);
IREE_RETURN_IF_ERROR(iree_vm_list_set_value(frame->shape_list, i, &dim));
}
return iree_ok_status();
}
// Calls the |apply_fn| with the current shape frame state.
static iree_status_t _TfLiteInterpreterShapeFrameApply(
_TfLiteInterpreterShapeFrame* frame, TfLiteInterpreter* interpreter,
iree_vm_function_t apply_fn, int32_t index) {
// Populate shape_list with the shape dimensions for this particular output.
iree_vm_value_t index_value = iree_vm_value_make_i32(index);
IREE_IGNORE_ERROR(iree_vm_list_set_value(frame->arg_list, 0, &index_value));
return iree_vm_invoke(interpreter->context, apply_fn,
/*policy=*/NULL, frame->arg_list, /*outputs=*/NULL,
interpreter->allocator);
}
//===----------------------------------------------------------------------===//
// Shape I/O queries
//===----------------------------------------------------------------------===//
// Queries all input shapes from the module; some may still be dynamic (-1).
static iree_status_t _TfLiteInterpreterRefreshInputShapes(
TfLiteInterpreter* interpreter, _TfLiteInterpreterShapeFrame* frame) {
// NOTE: we could optimize this more by using iree_vm_invoke_within, but that
// shouldn't be needed (it's just stack pointer manipulation).
for (int32_t i = 0; i < interpreter->model->input_count; ++i) {
TfLiteTensor* tensor = &interpreter->input_tensors[i];
IREE_RETURN_IF_ERROR(_TfLiteInterpreterShapeFrameApply(
frame, interpreter, interpreter->model->exports._query_input_shape, i));
IREE_RETURN_IF_ERROR(_TfLiteInterpreterShapeFrameReadValue(
frame, &tensor->shape_rank, tensor->shape_dims));
}
return iree_ok_status();
}
// Queries all output shapes from the module allowing it use the current input
// shapes to compute the possibly dynamic values.
static iree_status_t _TfLiteInterpreterRefreshOutputShapes(
TfLiteInterpreter* interpreter, _TfLiteInterpreterShapeFrame* frame) {
// NOTE: we could optimize this more by using iree_vm_invoke_within, but that
// shouldn't be needed (it's just stack pointer manipulation).
for (int32_t i = 0; i < interpreter->model->output_count; ++i) {
TfLiteTensor* tensor = &interpreter->output_tensors[i];
IREE_RETURN_IF_ERROR(_TfLiteInterpreterShapeFrameApply(
frame, interpreter, interpreter->model->exports._query_output_shape,
i));
IREE_RETURN_IF_ERROR(_TfLiteInterpreterShapeFrameReadValue(
frame, &tensor->shape_rank, tensor->shape_dims));
}
return iree_ok_status();
}
// Refreshes both input and output tensor shapes by querying the module.
// This should be called after each shape change so that we can let the module
// run "shape propagation" and compute the new output shapes.
static iree_status_t _TfLiteInterpreterRefreshIOShapes(
TfLiteInterpreter* interpreter) {
IREE_TRACE_ZONE_BEGIN(z0);
_TfLiteInterpreterShapeFrame frame;
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, _TfLiteInterpreterShapeFrameInitialize(&frame));
// Query all shapes.
iree_status_t status = iree_ok_status();
if (iree_status_is_ok(status)) {
status = _TfLiteInterpreterRefreshInputShapes(interpreter, &frame);
}
if (iree_status_is_ok(status)) {
status = _TfLiteInterpreterRefreshOutputShapes(interpreter, &frame);
}
_TfLiteInterpreterShapeFrameDeinitialize(&frame);
IREE_TRACE_ZONE_END(z0);
return status;
}
//===----------------------------------------------------------------------===//
// Creation and static initialization
//===----------------------------------------------------------------------===//
// Computes the storage requirement for the TfLiteInterpreter struct.
static iree_host_size_t _TfLiteInterpreterCalculateSize(
const TfLiteModel* model) {
iree_host_size_t total_size =
iree_host_align(sizeof(TfLiteInterpreter), iree_max_align_t);
iree_vm_type_def_t buffer_view_type_def =
iree_vm_type_def_make_ref_type(iree_hal_buffer_type_id());
total_size +=
iree_vm_list_storage_size(&buffer_view_type_def, model->input_count);
total_size +=
iree_vm_list_storage_size(&buffer_view_type_def, model->output_count);
total_size += sizeof(TfLiteTensor) * model->input_count;
total_size += sizeof(TfLiteTensor) * model->output_count;
return total_size;
}
// Allocates the interpreter slab and populates all internal pointers to the
// appropriate offsets.
static iree_status_t _TfLiteInterpreterAllocate(
const TfLiteModel* model, TfLiteInterpreter** out_interpreter) {
iree_host_size_t interpreter_size = _TfLiteInterpreterCalculateSize(model);
TfLiteInterpreter* interpreter = NULL;
IREE_RETURN_IF_ERROR(iree_allocator_malloc(model->allocator, interpreter_size,
(void**)&interpreter));
memset(interpreter, 0, interpreter_size);
interpreter->allocator = model->allocator;
_TfLiteInterpreterOptionsSetDefaults(&interpreter->options);
*out_interpreter = interpreter;
interpreter->model = (TfLiteModel*)model;
_TfLiteModelRetain(interpreter->model);
uint8_t* p = (uint8_t*)interpreter +
iree_host_align(sizeof(*interpreter), iree_max_align_t);
iree_vm_type_def_t buffer_view_type_def =
iree_vm_type_def_make_ref_type(iree_hal_buffer_type_id());
iree_byte_span_t input_list_storage = iree_make_byte_span(
p, iree_vm_list_storage_size(&buffer_view_type_def, model->input_count));
IREE_RETURN_IF_ERROR(
iree_vm_list_initialize(input_list_storage, &buffer_view_type_def,
model->input_count, &interpreter->input_list));
p += input_list_storage.data_length;
iree_byte_span_t output_list_storage = iree_make_byte_span(
p, iree_vm_list_storage_size(&buffer_view_type_def, model->output_count));
IREE_RETURN_IF_ERROR(
iree_vm_list_initialize(output_list_storage, &buffer_view_type_def,
model->output_count, &interpreter->output_list));
p += output_list_storage.data_length;
interpreter->input_tensors = (TfLiteTensor*)p;
p += sizeof(TfLiteTensor) * model->input_count;
interpreter->output_tensors = (TfLiteTensor*)p;
// p += sizeof(TfLiteTensor) * model->output_count;
return iree_ok_status();
}
// Populates the input and output tensor lists with static metadata from the
// model and prepares for allocation/invocation.
static iree_status_t _TfLiteInterpreterPopulateIO(
TfLiteInterpreter* interpreter) {
iree_vm_function_t main_fn = interpreter->model->exports._main;
iree_string_view_t io_names_attr = iree_vm_function_reflection_attr(
&main_fn, iree_make_cstring_view("tfl.io.names"));
iree_string_view_t io_types_attr = iree_vm_function_reflection_attr(
&main_fn, iree_make_cstring_view("tfl.io.types"));
iree_string_view_t io_quant_attr = iree_vm_function_reflection_attr(
&main_fn, iree_make_cstring_view("tfl.io.quant"));
// Setup static tensor metadata.
for (iree_host_size_t i = 0; i < interpreter->model->input_count; ++i) {
TfLiteTensor* tensor = &interpreter->input_tensors[i];
memset(tensor, 0, sizeof(*tensor));
iree_string_view_t io_name_part = iree_string_view_empty();
iree_string_view_split(io_names_attr, ';', &io_name_part, &io_names_attr);
iree_string_view_t io_type_part = iree_string_view_empty();
iree_string_view_split(io_types_attr, ';', &io_type_part, &io_types_attr);
iree_string_view_t io_quant_part = iree_string_view_empty();
iree_string_view_split(io_quant_attr, ';', &io_quant_part, &io_quant_attr);
IREE_RETURN_IF_ERROR(_TfLiteTensorParseNameAttr(tensor, io_name_part,
interpreter->allocator));
IREE_RETURN_IF_ERROR(_TfLiteTensorParseTypeAttr(tensor, io_type_part));
IREE_RETURN_IF_ERROR(_TfLiteTensorParseQuantAttr(tensor, io_quant_part));
}
for (iree_host_size_t i = 0; i < interpreter->model->output_count; ++i) {
TfLiteTensor* tensor = &interpreter->output_tensors[i];
memset(tensor, 0, sizeof(*tensor));
iree_string_view_t io_name_part = iree_string_view_empty();
iree_string_view_split(io_names_attr, ';', &io_name_part, &io_names_attr);
iree_string_view_t io_type_part = iree_string_view_empty();
iree_string_view_split(io_types_attr, ';', &io_type_part, &io_types_attr);
iree_string_view_t io_quant_part = iree_string_view_empty();
iree_string_view_split(io_quant_attr, ';', &io_quant_part, &io_quant_attr);
IREE_RETURN_IF_ERROR(_TfLiteTensorParseNameAttr(tensor, io_name_part,
interpreter->allocator));
IREE_RETURN_IF_ERROR(_TfLiteTensorParseTypeAttr(tensor, io_type_part));
IREE_RETURN_IF_ERROR(_TfLiteTensorParseQuantAttr(tensor, io_quant_part));
}
// Prepare the IO lists we use when calling into the model.
// The actual contents of these cannot be set until
// TfLiteInterpreterAllocateTensors has been called.
IREE_RETURN_IF_ERROR(iree_vm_list_reserve(interpreter->input_list,
interpreter->model->input_count));
IREE_RETURN_IF_ERROR(iree_vm_list_reserve(interpreter->output_list,
interpreter->model->output_count));
return iree_ok_status();
}
static iree_status_t _TfLiteInterpreterCreate(
const TfLiteModel* model, const TfLiteInterpreterOptions* optional_options,
TfLiteInterpreter** out_interpreter) {
*out_interpreter = NULL;
// We allocate a large majority of the interpreter structures as a single
// slab. There's still some allocations that we could prevent (like internal
// VM stuff) but this at least covers half of it.
IREE_RETURN_IF_ERROR(_TfLiteInterpreterAllocate(model, out_interpreter));
TfLiteInterpreter* interpreter = *out_interpreter;
if (optional_options) {
memcpy(&interpreter->options, optional_options,
sizeof(interpreter->options));
}
interpreter->user_module = model->module;
iree_vm_module_retain(interpreter->user_module);
// External contexts could possibly used to emulate sharing this, but really
// if a user is running with multiple models the tflite API is insufficient.
IREE_RETURN_IF_ERROR(
iree_vm_instance_create(interpreter->allocator, &interpreter->instance));
IREE_RETURN_IF_ERROR(_TfLiteInterpreterPrepareHAL(interpreter));
// Context will contain both the user-provided bytecode and the HAL module.
// If we were to support custom ops we would also have a
// tflite_resolver_module that we would register to resolve tflite ops into
// IREE functions that will call custom ops through TfLiteRegistrations.
IREE_RETURN_IF_ERROR(iree_vm_context_create_with_modules(
interpreter->instance, interpreter->all_modules,
IREE_ARRAYSIZE(interpreter->all_modules), interpreter->allocator,
&interpreter->context));
// Setup all I/O tensors and buffer views.
IREE_RETURN_IF_ERROR(_TfLiteInterpreterPopulateIO(interpreter));
return iree_ok_status();
}
//===----------------------------------------------------------------------===//
// Core API
//===----------------------------------------------------------------------===//
TFL_CAPI_EXPORT extern TfLiteInterpreter* TfLiteInterpreterCreate(
const TfLiteModel* model,
const TfLiteInterpreterOptions* optional_options) {
IREE_TRACE_ZONE_BEGIN(z0);
TfLiteInterpreter* interpreter = NULL;
iree_status_t status =
_TfLiteInterpreterCreate(model, optional_options, &interpreter);
if (iree_status_is_ok(iree_status_consume_code(status))) {
IREE_TRACE_ZONE_APPEND_TEXT(z0, "num_threads=", strlen("num_threads="));
IREE_TRACE_ZONE_APPEND_VALUE(z0, interpreter->options.num_threads);
} else {
IREE_TRACE_MESSAGE(ERROR, "failed interpreter creation");
TfLiteInterpreterDelete(interpreter);
interpreter = NULL;
}
IREE_TRACE_ZONE_END(z0);
return interpreter;
}
TFL_CAPI_EXPORT extern TfLiteInterpreter*
TfLiteInterpreterCreateWithSelectedOps(
const TfLiteModel* model, const TfLiteInterpreterOptions* options) {
// No different from TfLiteInterpreterCreate: we don't have "ops" :)
return TfLiteInterpreterCreate(model, options);
}
TFL_CAPI_EXPORT extern void TfLiteInterpreterDelete(
TfLiteInterpreter* interpreter) {
IREE_TRACE_ZONE_BEGIN(z0);
for (iree_host_size_t i = 0; i < interpreter->model->input_count; ++i) {
_TfLiteTensorReset(&interpreter->input_tensors[i], interpreter->allocator);
}
for (iree_host_size_t i = 0; i < interpreter->model->output_count; ++i) {
_TfLiteTensorReset(&interpreter->output_tensors[i], interpreter->allocator);
}
iree_vm_list_deinitialize(interpreter->input_list);
iree_vm_list_deinitialize(interpreter->output_list);
iree_vm_context_release(interpreter->context);
iree_vm_module_release(interpreter->hal_module);
iree_vm_module_release(interpreter->user_module);
iree_vm_instance_release(interpreter->instance);
_TfLiteModelRelease(interpreter->model);
iree_allocator_free(interpreter->allocator, interpreter);
IREE_TRACE_ZONE_END(z0);
}
TFL_CAPI_EXPORT extern TfLiteStatus TfLiteInterpreterResetVariableTensors(
TfLiteInterpreter* interpreter) {
IREE_TRACE_ZONE_BEGIN(z0);
// The compiler emits a special function we can use to reset just variables.
// NOTE: the function is optional if the model had no variables.
iree_status_t status = iree_ok_status();
iree_vm_function_t reset_variables_fn =
interpreter->model->exports._reset_variables;
if (!iree_vm_function_is_null(reset_variables_fn)) {
status = iree_vm_invoke(interpreter->context, reset_variables_fn,
/*policy=*/NULL, /*inputs=*/NULL, /*outputs=*/NULL,
interpreter->allocator);
}
IREE_TRACE_ZONE_END(z0);
return _TfLiteStatusFromIREEStatus(status);
}
TFL_CAPI_EXPORT extern int32_t TfLiteInterpreterGetInputTensorCount(
const TfLiteInterpreter* interpreter) {
return interpreter->model->input_count;
}
TFL_CAPI_EXPORT extern TfLiteTensor* TfLiteInterpreterGetInputTensor(
const TfLiteInterpreter* interpreter, int32_t input_index) {
if (input_index < 0 || input_index >= interpreter->model->input_count) {
return NULL;
}
return &interpreter->input_tensors[input_index];
}
static iree_status_t _TfLiteInterpreterResizeInputTensor(
TfLiteInterpreter* interpreter, int32_t input_index, const int* input_dims,
int32_t input_dims_size) {
if (input_index < 0 || input_index >= interpreter->model->input_count) {
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"input_index out of range (0 <= %d < %d)",
input_index, interpreter->model->input_count);
}
if (iree_vm_function_is_null(
interpreter->model->exports._resize_input_shape)) {
// TODO(#3975): check if this is a no-op success in tflite.
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"model has no dynamic shapes");
}
_TfLiteInterpreterShapeFrame frame;
IREE_RETURN_IF_ERROR(_TfLiteInterpreterShapeFrameInitialize(&frame));
// Poke the model and let it update its internal shape.
// TODO(#3975): return bool to allow model to say it failed.
TfLiteTensor* tensor = &interpreter->input_tensors[input_index];
iree_status_t status = _TfLiteInterpreterShapeFrameWriteValue(
&frame, tensor->shape_rank, tensor->shape_dims);
if (iree_status_is_ok(status)) {
status = _TfLiteInterpreterShapeFrameApply(
&frame, interpreter, interpreter->model->exports._resize_input_shape,
input_index);
}
// NOTE: the allocation may now not match the requested shape. This is just
// how the tflite API works unfortunately; until
// TfLiteInterpreterAllocateTensors it will remain in an indeterminate state.
_TfLiteInterpreterShapeFrameDeinitialize(&frame);
return status;
}
TFL_CAPI_EXPORT extern TfLiteStatus TfLiteInterpreterResizeInputTensor(
TfLiteInterpreter* interpreter, int32_t input_index, const int* input_dims,
int32_t input_dims_size) {
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = _TfLiteInterpreterResizeInputTensor(
interpreter, input_index, input_dims, input_dims_size);
IREE_TRACE_ZONE_END(z0);
return _TfLiteStatusFromIREEStatus(status);
}
static iree_status_t _TfLiteInterpreterAllocateTensors(
TfLiteInterpreter* interpreter) {
// NOTE: we could slab allocate like tflite does, but then if any single
// tensor has any single dimension that is resized the whole thing gets
// reallocated upon resize. That's no good. Instead, we realloc each tensor
// if their size has changed.
// Refresh all shapes from the model. It should have all of the
// non-data-dependent output shapes.
IREE_RETURN_IF_ERROR(_TfLiteInterpreterRefreshIOShapes(interpreter));
// Drop all input tensors we hang on to in the input list. This way we aren't
// double-allocating during the resize.
IREE_RETURN_IF_ERROR(iree_vm_list_resize(interpreter->input_list, 0));
// Reallocate input tensors (if needed).
for (iree_host_size_t i = 0; i < interpreter->model->input_count; ++i) {
TfLiteTensor* tensor = &interpreter->input_tensors[i];
IREE_RETURN_IF_ERROR(_TfLiteTensorReallocateIfNeeded(
tensor, iree_hal_device_allocator(interpreter->device),
interpreter->allocator));
iree_vm_ref_t buffer_ref = iree_hal_buffer_retain_ref(tensor->buffer);
IREE_RETURN_IF_ERROR(
iree_vm_list_push_ref_move(interpreter->input_list, &buffer_ref));
}
// TODO(benvanik): preallocate outputs when we support using them.
// We could stash the buffer views in interpreter->output_list.
// For now we just drop them all.
for (iree_host_size_t i = 0; i < interpreter->model->output_count; ++i) {
_TfLiteTensorDiscardBuffer(&interpreter->output_tensors[i]);
}
return iree_ok_status();
}
TFL_CAPI_EXPORT extern TfLiteStatus TfLiteInterpreterAllocateTensors(
TfLiteInterpreter* interpreter) {
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = _TfLiteInterpreterAllocateTensors(interpreter);
#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
iree_device_size_t total_input_size = 0;
for (iree_host_size_t i = 0; i < interpreter->model->input_count; ++i) {
total_input_size +=
iree_hal_buffer_byte_length(interpreter->input_tensors[i].buffer);
}
IREE_TRACE_ZONE_APPEND_VALUE(z0, total_input_size);
#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
IREE_TRACE_ZONE_END(z0);
return _TfLiteStatusFromIREEStatus(status);
}
static iree_status_t _TfLiteInterpreterInvoke(TfLiteInterpreter* interpreter) {
// tflite models only have a single entry point and the IREE converter
// emits it as '_main'.
IREE_RETURN_IF_ERROR(
iree_vm_invoke(interpreter->context, interpreter->model->exports._main,
/*policy=*/NULL, interpreter->input_list,
interpreter->output_list, interpreter->allocator));
// Refresh output shapes.
// TODO(#3975): just use buffer view results or at least just refresh outputs.
IREE_RETURN_IF_ERROR(_TfLiteInterpreterRefreshIOShapes(interpreter));
// Map the output buffers.
// NOTE: we could defer the mapping unless requested and ensure state buffers
// remain where they currently are for the next invocation.
for (iree_host_size_t i = 0; i < interpreter->model->output_count; ++i) {
iree_hal_buffer_t* buffer = (iree_hal_buffer_t*)iree_vm_list_get_ref_deref(
interpreter->output_list, i, iree_hal_buffer_get_descriptor());
TfLiteTensor* tensor = &interpreter->output_tensors[i];
IREE_RETURN_IF_ERROR(_TfLiteTensorBind(tensor, buffer));
}
return iree_ok_status();
}
TFL_CAPI_EXPORT extern TfLiteStatus TfLiteInterpreterInvoke(
TfLiteInterpreter* interpreter) {
IREE_TRACE_ZONE_BEGIN(z0);
iree_status_t status = _TfLiteInterpreterInvoke(interpreter);
#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
iree_device_size_t total_output_size = 0;
for (iree_host_size_t i = 0; i < interpreter->model->output_count; ++i) {
total_output_size +=
iree_hal_buffer_byte_length(interpreter->output_tensors[i].buffer);
}
IREE_TRACE_ZONE_APPEND_VALUE(z0, total_output_size);
#endif // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
IREE_TRACE_ZONE_END(z0);
return _TfLiteStatusFromIREEStatus(status);
}
TFL_CAPI_EXPORT extern int32_t TfLiteInterpreterGetOutputTensorCount(
const TfLiteInterpreter* interpreter) {
return interpreter->model->output_count;
}
TFL_CAPI_EXPORT extern const TfLiteTensor* TfLiteInterpreterGetOutputTensor(
const TfLiteInterpreter* interpreter, int32_t output_index) {
if (output_index < 0 || output_index >= interpreter->model->output_count) {
return NULL;
}
return &interpreter->output_tensors[output_index];
}