| // Copyright 2022 The IREE Authors |
| // |
| // Licensed under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| |
| #include <stdio.h> |
| |
| // IREE APIs: |
| #include "iree/modules/hal/types.h" |
| #include "iree/runtime/api.h" |
| |
| // Custom native module used in the sample. |
| // Modules may be linked in from native code or other bytecode modules loaded at |
| // runtime: there's no difference. |
| #include "module.h" |
| |
| // NOTE: CHECKs are dangerous but this is a sample; a real application would |
| // want to handle errors gracefully. We know in this constrained case that |
| // these won't fail unless something is catastrophically wrong (out of memory, |
| // solar flares, etc). |
| int main(int argc, char** argv) { |
| if (argc != 3) { |
| fprintf(stderr, |
| "Usage:\n" |
| " custom-module-async-run - <entry.point> # read from stdin\n" |
| " custom-module-async-run </path/to/say_hello.vmfb> " |
| "<entry.point>\n"); |
| fprintf(stderr, " (See the README for this sample for details)\n "); |
| return -1; |
| } |
| |
| // Internally IREE does not (in general) use malloc and instead uses the |
| // provided allocator to allocate and free memory. Applications can integrate |
| // their own allocator as-needed. |
| iree_allocator_t host_allocator = iree_allocator_system(); |
| |
| // Create and configure the instance shared across all sessions. |
| iree_runtime_instance_options_t instance_options; |
| iree_runtime_instance_options_initialize(&instance_options); |
| iree_runtime_instance_options_use_all_available_drivers(&instance_options); |
| iree_runtime_instance_t* instance = NULL; |
| IREE_CHECK_OK(iree_runtime_instance_create(&instance_options, host_allocator, |
| &instance)); |
| |
| // Try to create the device - it should always succeed as it's a CPU device. |
| iree_hal_device_t* device = NULL; |
| IREE_CHECK_OK(iree_runtime_instance_try_create_default_device( |
| instance, iree_make_cstring_view("local-task"), &device)); |
| |
| // Create one session per loaded module to hold the module state. |
| iree_runtime_session_options_t session_options; |
| iree_runtime_session_options_initialize(&session_options); |
| // Useful to see the VM program flow: |
| // session_options.context_flags = IREE_VM_CONTEXT_FLAG_TRACE_EXECUTION; |
| iree_runtime_session_t* session = NULL; |
| IREE_CHECK_OK(iree_runtime_session_create_with_device( |
| instance, &session_options, device, |
| iree_runtime_instance_host_allocator(instance), &session)); |
| |
| // Create the custom module that can be reused across contexts. |
| iree_vm_module_t* custom_module = NULL; |
| IREE_CHECK_OK(iree_custom_module_async_create( |
| iree_runtime_instance_vm_instance(instance), device, host_allocator, |
| &custom_module)); |
| IREE_CHECK_OK(iree_runtime_session_append_module(session, custom_module)); |
| iree_vm_module_release(custom_module); |
| |
| // Load the module from stdin or a file on disk. |
| const char* module_path = argv[1]; |
| if (strcmp(module_path, "-") == 0) { |
| IREE_CHECK_OK( |
| iree_runtime_session_append_bytecode_module_from_stdin(session)); |
| } else { |
| IREE_CHECK_OK(iree_runtime_session_append_bytecode_module_from_file( |
| session, module_path)); |
| } |
| |
| iree_vm_list_t* inputs = NULL; |
| IREE_CHECK_OK(iree_vm_list_create(iree_vm_make_undefined_type_def(), 1, |
| host_allocator, &inputs)); |
| iree_vm_list_t* outputs = NULL; |
| IREE_CHECK_OK(iree_vm_list_create(iree_vm_make_undefined_type_def(), 1, |
| host_allocator, &outputs)); |
| |
| // Pass in the tensor<?xi32> arg: |
| const int32_t input_data[5] = {1, 2, 3, 4, 5}; |
| const iree_hal_dim_t shape[1] = {IREE_ARRAYSIZE(input_data)}; |
| iree_hal_buffer_view_t* input_view = NULL; |
| IREE_CHECK_OK(iree_hal_buffer_view_allocate_buffer_copy( |
| iree_runtime_session_device(session), |
| iree_runtime_session_device_allocator(session), IREE_ARRAYSIZE(shape), |
| shape, IREE_HAL_ELEMENT_TYPE_INT_32, |
| IREE_HAL_ENCODING_TYPE_DENSE_ROW_MAJOR, |
| (iree_hal_buffer_params_t){ |
| .type = IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL, |
| .access = IREE_HAL_MEMORY_ACCESS_ALL, |
| .usage = IREE_HAL_BUFFER_USAGE_DEFAULT, |
| }, |
| iree_make_const_byte_span(input_data, sizeof(input_data)), &input_view)); |
| iree_vm_ref_t input_view_ref = iree_hal_buffer_view_move_ref(input_view); |
| IREE_CHECK_OK(iree_vm_list_push_ref_move(inputs, &input_view_ref)); |
| |
| // Create our own timeline and set fences at T=1 and T=2. |
| // We'll pass these in with the timeline at T=0 so that the runtime isn't |
| // allowed to execute anything until we give it the go-ahead. |
| iree_hal_semaphore_t* semaphore = NULL; |
| IREE_CHECK_OK(iree_hal_semaphore_create( |
| device, 0ull, IREE_HAL_SEMAPHORE_FLAG_NONE, &semaphore)); |
| iree_hal_fence_t* fence_t1 = NULL; |
| IREE_CHECK_OK( |
| iree_hal_fence_create_at(semaphore, 1ull, host_allocator, &fence_t1)); |
| iree_hal_fence_t* fence_t2 = NULL; |
| IREE_CHECK_OK( |
| iree_hal_fence_create_at(semaphore, 2ull, host_allocator, &fence_t2)); |
| iree_hal_semaphore_release(semaphore); |
| fprintf(stdout, "INITIALIZE T=0\n"); |
| fflush(stdout); |
| |
| // Add the (wait_fence, signal_fence) pair to the function call. |
| // The --iree-execution-model=async-external flag adds these required |
| // arguments to the functions exported by the module. |
| iree_vm_ref_t fence_t1_ref = iree_hal_fence_retain_ref(fence_t1); |
| IREE_CHECK_OK(iree_vm_list_push_ref_move(inputs, &fence_t1_ref)); |
| iree_vm_ref_t fence_t2_ref = iree_hal_fence_retain_ref(fence_t2); |
| IREE_CHECK_OK(iree_vm_list_push_ref_move(inputs, &fence_t2_ref)); |
| |
| // Let the call start executing by signaling the timeline to T=1. |
| // TODO(benvanik): fix wait-before-signal on queue-ordered allocations. |
| // For now we have to signal to T=1 before invoking the function but that's |
| // only temporary. This should be moved down to after the VM invocation |
| // returns so that we can show how all of the program execution can be |
| // deferred. We could simulate this with another thread that signaled in the |
| // future if we wanted. |
| IREE_CHECK_OK(iree_hal_fence_signal(fence_t1)); |
| fprintf(stdout, "SIGNALED T=1\n"); |
| fflush(stdout); |
| |
| // Invoke the target function. |
| // This will return immediately after scheduling work - including the custom |
| // call - but will not actually execute anything until we say it's OK by |
| // advancing the timeline to T=1. |
| iree_string_view_t entry_point = iree_make_cstring_view(argv[2]); |
| fprintf(stdout, "VM INVOKE BEGIN %.*s\n", (int)entry_point.size, |
| entry_point.data); |
| fflush(stdout); |
| IREE_CHECK_OK( |
| iree_runtime_session_call_by_name(session, entry_point, inputs, outputs)); |
| fprintf(stdout, "VM INVOKE END\n"); |
| fflush(stdout); |
| |
| // We could go do other things now while the async work progresses. Here we |
| // just immediately wait. |
| IREE_CHECK_OK(iree_hal_fence_wait(fence_t2, iree_infinite_timeout())); |
| fprintf(stdout, "REACHED T=2\n"); |
| fflush(stdout); |
| |
| // Read back the tensor<?xi32> result: |
| iree_hal_buffer_view_t* output_view = |
| iree_vm_list_get_buffer_view_assign(outputs, 0); |
| int32_t output_data[5] = {0}; |
| IREE_CHECK_OK( |
| iree_hal_buffer_map_read(iree_hal_buffer_view_buffer(output_view), 0, |
| output_data, sizeof(output_data))); |
| |
| // Expecting (e^2 * 2)^2: |
| bool did_match = true; |
| for (size_t i = 0; i < IREE_ARRAYSIZE(input_data); ++i) { |
| int32_t t0 = input_data[i]; |
| int32_t t1 = t0 * t0; |
| int32_t t2 = t1 * 2; |
| int32_t t3 = t2 * t2; |
| if (t3 != output_data[i]) { |
| fprintf(stdout, "MISMATCH [%zu] expected %d but actual %d\n", i, t3, |
| output_data[i]); |
| did_match = false; |
| break; |
| } |
| } |
| if (did_match) { |
| fprintf(stdout, "MATCHED!\n"); |
| } |
| |
| iree_vm_list_release(inputs); |
| iree_vm_list_release(outputs); |
| iree_hal_fence_release(fence_t1); |
| iree_hal_fence_release(fence_t2); |
| iree_runtime_session_release(session); |
| iree_hal_device_release(device); |
| iree_runtime_instance_release(instance); |
| return 0; |
| } |