blob: 6617e72c81ca2285c5fb37e87404e4514f2031b2 [file] [log] [blame]
// Copyright 2022 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include <stdio.h>
// IREE APIs:
#include "iree/modules/hal/types.h"
#include "iree/runtime/api.h"
// Custom native module used in the sample.
// Modules may be linked in from native code or other bytecode modules loaded at
// runtime: there's no difference.
#include "module.h"
// NOTE: CHECKs are dangerous but this is a sample; a real application would
// want to handle errors gracefully. We know in this constrained case that
// these won't fail unless something is catastrophically wrong (out of memory,
// solar flares, etc).
int main(int argc, char** argv) {
if (argc != 3) {
fprintf(stderr,
"Usage:\n"
" custom-module-async-run - <entry.point> # read from stdin\n"
" custom-module-async-run </path/to/say_hello.vmfb> "
"<entry.point>\n");
fprintf(stderr, " (See the README for this sample for details)\n ");
return -1;
}
// Internally IREE does not (in general) use malloc and instead uses the
// provided allocator to allocate and free memory. Applications can integrate
// their own allocator as-needed.
iree_allocator_t host_allocator = iree_allocator_system();
// Create and configure the instance shared across all sessions.
iree_runtime_instance_options_t instance_options;
iree_runtime_instance_options_initialize(&instance_options);
iree_runtime_instance_options_use_all_available_drivers(&instance_options);
iree_runtime_instance_t* instance = NULL;
IREE_CHECK_OK(iree_runtime_instance_create(&instance_options, host_allocator,
&instance));
// Try to create the device - it should always succeed as it's a CPU device.
iree_hal_device_t* device = NULL;
IREE_CHECK_OK(iree_runtime_instance_try_create_default_device(
instance, iree_make_cstring_view("local-task"), &device));
// Create one session per loaded module to hold the module state.
iree_runtime_session_options_t session_options;
iree_runtime_session_options_initialize(&session_options);
// Useful to see the VM program flow:
// session_options.context_flags = IREE_VM_CONTEXT_FLAG_TRACE_EXECUTION;
iree_runtime_session_t* session = NULL;
IREE_CHECK_OK(iree_runtime_session_create_with_device(
instance, &session_options, device,
iree_runtime_instance_host_allocator(instance), &session));
// Create the custom module that can be reused across contexts.
iree_vm_module_t* custom_module = NULL;
IREE_CHECK_OK(iree_custom_module_async_create(
iree_runtime_instance_vm_instance(instance), device, host_allocator,
&custom_module));
IREE_CHECK_OK(iree_runtime_session_append_module(session, custom_module));
iree_vm_module_release(custom_module);
// Load the module from stdin or a file on disk.
const char* module_path = argv[1];
if (strcmp(module_path, "-") == 0) {
IREE_CHECK_OK(
iree_runtime_session_append_bytecode_module_from_stdin(session));
} else {
IREE_CHECK_OK(iree_runtime_session_append_bytecode_module_from_file(
session, module_path));
}
iree_vm_list_t* inputs = NULL;
IREE_CHECK_OK(iree_vm_list_create(iree_vm_make_undefined_type_def(), 1,
host_allocator, &inputs));
iree_vm_list_t* outputs = NULL;
IREE_CHECK_OK(iree_vm_list_create(iree_vm_make_undefined_type_def(), 1,
host_allocator, &outputs));
// Pass in the tensor<?xi32> arg:
const int32_t input_data[5] = {1, 2, 3, 4, 5};
const iree_hal_dim_t shape[1] = {IREE_ARRAYSIZE(input_data)};
iree_hal_buffer_view_t* input_view = NULL;
IREE_CHECK_OK(iree_hal_buffer_view_allocate_buffer_copy(
iree_runtime_session_device(session),
iree_runtime_session_device_allocator(session), IREE_ARRAYSIZE(shape),
shape, IREE_HAL_ELEMENT_TYPE_INT_32,
IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR,
(iree_hal_buffer_params_t){
.type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL,
.access = IREE_HAL_MEMORY_ACCESS_ALL,
.usage = IREE_HAL_BUFFER_USAGE_DEFAULT,
},
iree_make_const_byte_span(input_data, sizeof(input_data)), &input_view));
iree_vm_ref_t input_view_ref = iree_hal_buffer_view_move_ref(input_view);
IREE_CHECK_OK(iree_vm_list_push_ref_move(inputs, &input_view_ref));
// Create our own timeline and set fences at T=1 and T=2.
// We'll pass these in with the timeline at T=0 so that the runtime isn't
// allowed to execute anything until we give it the go-ahead.
iree_hal_semaphore_t* semaphore = NULL;
IREE_CHECK_OK(iree_hal_semaphore_create(
device, 0ull, IREE_HAL_SEMAPHORE_FLAG_NONE, &semaphore));
iree_hal_fence_t* fence_t1 = NULL;
IREE_CHECK_OK(
iree_hal_fence_create_at(semaphore, 1ull, host_allocator, &fence_t1));
iree_hal_fence_t* fence_t2 = NULL;
IREE_CHECK_OK(
iree_hal_fence_create_at(semaphore, 2ull, host_allocator, &fence_t2));
iree_hal_semaphore_release(semaphore);
fprintf(stdout, "INITIALIZE T=0\n");
fflush(stdout);
// Add the (wait_fence, signal_fence) pair to the function call.
// The --iree-execution-model=async-external flag adds these required
// arguments to the functions exported by the module.
iree_vm_ref_t fence_t1_ref = iree_hal_fence_retain_ref(fence_t1);
IREE_CHECK_OK(iree_vm_list_push_ref_move(inputs, &fence_t1_ref));
iree_vm_ref_t fence_t2_ref = iree_hal_fence_retain_ref(fence_t2);
IREE_CHECK_OK(iree_vm_list_push_ref_move(inputs, &fence_t2_ref));
// Let the call start executing by signaling the timeline to T=1.
// TODO(benvanik): fix wait-before-signal on queue-ordered allocations.
// For now we have to signal to T=1 before invoking the function but that's
// only temporary. This should be moved down to after the VM invocation
// returns so that we can show how all of the program execution can be
// deferred. We could simulate this with another thread that signaled in the
// future if we wanted.
IREE_CHECK_OK(iree_hal_fence_signal(fence_t1));
fprintf(stdout, "SIGNALED T=1\n");
fflush(stdout);
// Invoke the target function.
// This will return immediately after scheduling work - including the custom
// call - but will not actually execute anything until we say it's OK by
// advancing the timeline to T=1.
iree_string_view_t entry_point = iree_make_cstring_view(argv[2]);
fprintf(stdout, "VM INVOKE BEGIN %.*s\n", (int)entry_point.size,
entry_point.data);
fflush(stdout);
IREE_CHECK_OK(
iree_runtime_session_call_by_name(session, entry_point, inputs, outputs));
fprintf(stdout, "VM INVOKE END\n");
fflush(stdout);
// We could go do other things now while the async work progresses. Here we
// just immediately wait.
IREE_CHECK_OK(iree_hal_fence_wait(fence_t2, iree_infinite_timeout()));
fprintf(stdout, "REACHED T=2\n");
fflush(stdout);
// Read back the tensor<?xi32> result:
iree_hal_buffer_view_t* output_view =
iree_vm_list_get_buffer_view_assign(outputs, 0);
int32_t output_data[5] = {0};
IREE_CHECK_OK(
iree_hal_buffer_map_read(iree_hal_buffer_view_buffer(output_view), 0,
output_data, sizeof(output_data)));
// Expecting (e^2 * 2)^2:
bool did_match = true;
for (size_t i = 0; i < IREE_ARRAYSIZE(input_data); ++i) {
int32_t t0 = input_data[i];
int32_t t1 = t0 * t0;
int32_t t2 = t1 * 2;
int32_t t3 = t2 * t2;
if (t3 != output_data[i]) {
fprintf(stdout, "MISMATCH [%zu] expected %d but actual %d\n", i, t3,
output_data[i]);
did_match = false;
break;
}
}
if (did_match) {
fprintf(stdout, "MATCHED!\n");
}
iree_vm_list_release(inputs);
iree_vm_list_release(outputs);
iree_hal_fence_release(fence_t1);
iree_hal_fence_release(fence_t2);
iree_runtime_session_release(session);
iree_hal_device_release(device);
iree_runtime_instance_release(instance);
return 0;
}