IREE Static Library Loader Runtime Demo (dylib-llvm-aot backend) (#6225)

This sample shows how to:

Produce a static library and bytecode module with IREE's compiler
Compile the static library into a program using the static_library_loader
Run the demo with the module using functions exported by the static library
The model compiled into the static library exports a single function simple_mul that returns the multiplication of two tensors. README.md includes full details on building and running the demo.

The sample is enabled through the IREE_BUILD_SAMPLES flag. Its tested/running on linux running the dylib-llvm-aot backend.

Fixes #5666
diff --git a/iree/samples/static_library/CMakeLists.txt b/iree/samples/static_library/CMakeLists.txt
new file mode 100644
index 0000000..4c818b8
--- /dev/null
+++ b/iree/samples/static_library/CMakeLists.txt
@@ -0,0 +1,81 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+if(NOT ${IREE_TARGET_BACKEND_DYLIB-LLVM-AOT}
+  OR NOT ${IREE_HAL_DRIVER_DYLIB}
+  OR NOT ${IREE_BUILD_COMPILER})
+  return()
+endif()
+
+# Set iree-translate binary
+set(_TRANSLATE_TOOL_EXECUTABLE $<TARGET_FILE:iree_tools_iree-translate>)
+
+# Setup args for iree-transalte
+set(_TRANSLATE_ARGS)
+list(APPEND _TRANSLATE_ARGS "-iree-mlir-to-vm-bytecode-module")
+list(APPEND _TRANSLATE_ARGS "-iree-hal-target-backends=dylib-llvm-aot")
+list(APPEND _TRANSLATE_ARGS "-iree-llvm-static-library-output-path=simple_mul.o")
+list(APPEND _TRANSLATE_ARGS "${CMAKE_CURRENT_SOURCE_DIR}/simple_mul.mlir")
+list(APPEND _TRANSLATE_ARGS "-o")
+list(APPEND _TRANSLATE_ARGS "simple_mul.vmfb")
+
+# Custom command for iree-translate to generate static library and bytecode
+add_custom_command(
+  OUTPUT
+    ${CMAKE_CURRENT_BINARY_DIR}/simple_mul.h
+    ${CMAKE_CURRENT_BINARY_DIR}/simple_mul.o
+    ${CMAKE_CURRENT_BINARY_DIR}/simple_mul.vmfb
+  COMMAND ${_TRANSLATE_TOOL_EXECUTABLE} ${_TRANSLATE_ARGS}
+  DEPENDS ${_TRANSLATE_TOOL_EXECUTABLE} "simple_mul.mlir"
+)
+
+# Tell cmake about the simple_mul library so it will link it
+add_library(simple_mul
+  STATIC
+  ${CMAKE_CURRENT_BINARY_DIR}/simple_mul.o)
+
+SET_TARGET_PROPERTIES(
+  simple_mul
+  PROPERTIES
+  LINKER_LANGUAGE C)
+
+# Note: If you're cross compiling the simple_mul for a different backend, you'll
+# need to run iree-translate manually to produce the desired '.vmfb' and '.h/.o'
+# files. Substitute the 'simple_mul' dependency in iree_cc_binary() below with
+# your own static library and the `simple_mul.vmfb` in the iree_c_embed_data()
+# rule. You can use paths to files, i.e. 'path/to/generated/output.vmfb' in
+# place of CMake targets.
+
+# Generate the embed data with the bytecode module
+iree_c_embed_data(
+  NAME
+    simple_mul_c
+  IDENTIFIER
+    iree_samples_static_library_simple_mul
+  GENERATED_SRCS
+    simple_mul.vmfb
+  C_FILE_OUTPUT
+    simple_mul_c.c
+  H_FILE_OUTPUT
+    simple_mul_c.h
+  FLATTEN
+  PUBLIC
+  TESTONLY
+)
+
+iree_cc_binary(
+NAME
+  static_library_demo
+SRCS
+  "static_library_demo.c"
+DEPS
+  ::simple_mul_c
+  iree::runtime
+  iree::hal::local::loaders::static_library_loader
+  iree::hal::local::task_driver
+  iree::task::api
+  simple_mul
+)
diff --git a/iree/samples/static_library/README.md b/iree/samples/static_library/README.md
new file mode 100644
index 0000000..b989f0a
--- /dev/null
+++ b/iree/samples/static_library/README.md
@@ -0,0 +1,79 @@
+# IREE "Static Library" sample
+
+This sample shows how to:
+1. Produce a static library and bytecode module with IREE's compiler
+2. Compile the static library into a program using the `static_library_loader`
+3. Run the demo with the module using functions exported by the static library
+
+The model compiled into the static library exports a single function
+`simple_mul` that returns the multiplication of two tensors:
+
+```mlir
+func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32>
+{
+  %0 = "mhlo.multiply"(%arg0, %arg1) {name = "mul.1"} : (tensor<4xf32>,
+    tensor<4xf32>) -> tensor<4xf32>
+  return %0 : tensor<4xf32>
+}
+```
+
+## Background
+
+IREE's `static_library_loader` allows applications to inject a set of static
+libraries that can be resolved at runtime by name. This can be particularly
+useful on "bare metal" or embedded systems running IREE that lack operating
+systems or the ability to load shared libraries in binaries.
+
+When static library output is enabled, `iree-translate` produces a separate
+static library to compile into the target program. At runtime bytecode module
+instructs the VM which static libraries to load exported functions from the
+model.
+
+## Instructions
+_Note: run the following commands from IREE's github repo root._
+
+1. Configure CMake for building the static library then demo. You'll need to set
+the flags building samples, the compiler, and the `dylib-llvm-aot`
+driver/backend. See
+[here](https://google.github.io/iree/building-from-source/getting-started/)
+for general instructions on building using CMake):
+
+  ```shell
+  cmake -B ../iree-build/
+    -DIREE_BUILD_SAMPLES=ON \
+    -DIREE_TARGET_BACKENDS_TO_BUILD=DYLIB-LLVM-AOT \
+    -DIREE_HAL_DRIVERS_TO_BUILD=DYLIB \
+    -DIREE_BUILD_COMPILER=ON \
+    -DCMAKE_BUILD_TYPE=RelWithDebInfo .
+  ```
+
+2. Build the `static_library_demo` CMake target to create the static demo. This
+target has several dependencies that will translate `simple_mul.mlir` into a
+static library (`simple_mul.h` & `simple_mul.c`) as well as a bytecode module
+(`simple_mul.vmfb`) which are finally built into the demo binary:
+
+  ```shell
+  cmake --build ../iree-build/ --target iree_samples_static_library_demo
+  ```
+
+3. Run the sample binary:
+
+  ```shell
+  ../iree-build/iree/samples/static_library/static_library_demo
+
+  # Output: static_library_run passed
+  ```
+
+### Changing compilation options
+
+The steps above build both the compiler for the host (machine doing the
+compiling) and the demo for the target using same options as the host machine.
+If you wish to target a different deployment other than the host, you'll need to
+compile the library and demo with different options.
+
+For example, see
+[documentation](https://google.github.io/iree/building-from-source/android/)
+on cross compiling on Android.
+
+Note: separating the target from the host will require modifying dependencies in
+the demos `CMakeLists.txt`. See included comments for more info.
diff --git a/iree/samples/static_library/simple_mul.mlir b/iree/samples/static_library/simple_mul.mlir
new file mode 100644
index 0000000..2b480bd
--- /dev/null
+++ b/iree/samples/static_library/simple_mul.mlir
@@ -0,0 +1,5 @@
+func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32>
+{
+  %0 = "std.mulf"(%arg0, %arg1) {name = "mul.1"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
+  return %0 : tensor<4xf32>
+}
diff --git a/iree/samples/static_library/static_library_demo.c b/iree/samples/static_library/static_library_demo.c
new file mode 100644
index 0000000..22e8127
--- /dev/null
+++ b/iree/samples/static_library/static_library_demo.c
@@ -0,0 +1,222 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// A example of static library loading in IREE. See the README.md for more info.
+// Note: this demo requires artifacts from iree-translate before it will run.
+
+#include <stdio.h>
+
+#include "iree/hal/local/loaders/static_library_loader.h"
+#include "iree/hal/local/task_device.h"
+#include "iree/modules/hal/module.h"
+#include "iree/runtime/api.h"
+#include "iree/samples/static_library/simple_mul_c.h"
+#include "iree/task/api.h"
+#include "iree/vm/bytecode_module.h"
+
+// Compiled static library module here to avoid IO:
+#include "iree/samples/static_library/simple_mul.h"
+
+// A function to create the HAL device from the different backend targets.
+// The HAL device is returned based on the implementation, and it must be
+// released by the caller.
+iree_status_t create_device_with_static_loader(iree_hal_device_t** device) {
+  iree_status_t status = iree_ok_status();
+  // Set paramters for the device created in the next step.
+  iree_hal_task_device_params_t params;
+  iree_hal_task_device_params_initialize(&params);
+
+  // Load the statically embedded library
+  const iree_hal_executable_library_header_t** static_library =
+      simple_mul_dispatch_0_library_query(
+          IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION, /*reserved=*/NULL);
+  const iree_hal_executable_library_header_t** libraries[1] = {static_library};
+
+  iree_hal_executable_loader_t* library_loader = NULL;
+  if (iree_status_is_ok(status)) {
+    status = iree_hal_static_library_loader_create(
+        IREE_ARRAYSIZE(libraries), libraries,
+        iree_hal_executable_import_provider_null(), iree_allocator_system(),
+        &library_loader);
+  }
+
+  iree_task_executor_t* executor = NULL;
+  if (iree_status_is_ok(status)) {
+    iree_task_executor_create_from_flags(iree_allocator_system(), &executor);
+  }
+
+  // Create the device and release the executor and loader afterwards.
+  if (iree_status_is_ok(status)) {
+    iree_hal_task_device_create(iree_make_cstring_view("dylib"), &params,
+                                executor, 1, &library_loader,
+                                iree_allocator_system(), device);
+  }
+  iree_task_executor_release(executor);
+  iree_hal_executable_loader_release(library_loader);
+
+  return status;
+}
+
+iree_status_t Run() {
+  iree_status_t status = iree_ok_status();
+
+  // Instance configuration (this should be shared across sessions).
+  iree_runtime_instance_options_t instance_options;
+  iree_runtime_instance_options_initialize(IREE_API_VERSION_LATEST,
+                                           &instance_options);
+  iree_runtime_instance_options_use_all_available_drivers(&instance_options);
+  iree_runtime_instance_t* instance = NULL;
+
+  if (iree_status_is_ok(status)) {
+    status = iree_runtime_instance_create(&instance_options,
+                                          iree_allocator_system(), &instance);
+  }
+
+  // Create dylib device with static loader.
+  iree_hal_device_t* device = NULL;
+  if (iree_status_is_ok(status)) {
+    status = create_device_with_static_loader(&device);
+  }
+  iree_vm_module_t* hal_module = NULL;
+  if (iree_status_is_ok(status)) {
+    status =
+        iree_hal_module_create(device, iree_allocator_system(), &hal_module);
+  }
+
+  // Session configuration (one per loaded module to hold module state).
+  iree_runtime_session_options_t session_options;
+  iree_runtime_session_options_initialize(&session_options);
+  iree_runtime_session_t* session = NULL;
+  if (iree_status_is_ok(status)) {
+    status = iree_runtime_session_create_with_device(
+        instance, &session_options, device,
+        iree_runtime_instance_host_allocator(instance), &session);
+  }
+
+  // Load bytecode module from the embedded data. Append to the session.
+  const struct iree_file_toc_t* module_file_toc =
+      iree_samples_static_library_simple_mul_create();
+  iree_const_byte_span_t module_data =
+      iree_make_const_byte_span(module_file_toc->data, module_file_toc->size);
+  iree_vm_module_t* bytecode_module = NULL;
+  if (iree_status_is_ok(status)) {
+    status = iree_vm_bytecode_module_create(module_data, iree_allocator_null(),
+                                            iree_allocator_system(),
+                                            &bytecode_module);
+  }
+  if (iree_status_is_ok(status)) {
+    status = iree_runtime_session_append_module(session, bytecode_module);
+  }
+
+  // Lookup the entry point function call.
+  const char kMainFunctionName[] = "module.simple_mul";
+  iree_runtime_call_t call;
+  memset(&call, 0, sizeof(call));
+  if (iree_status_is_ok(status)) {
+    status = iree_runtime_call_initialize_by_name(
+        session, iree_make_cstring_view(kMainFunctionName), &call);
+  }
+
+  // Populate initial values for 4 * 2 = 8.
+  const int kElementCount = 4;
+  iree_hal_dim_t shape[1] = {kElementCount};
+  iree_hal_buffer_view_t* arg0_buffer_view = NULL;
+  iree_hal_buffer_view_t* arg1_buffer_view = NULL;
+  float kFloat4[] = {4.0f, 4.0f, 4.0f, 4.0f};
+  float kFloat2[] = {2.0f, 2.0f, 2.0f, 2.0f};
+
+  iree_hal_memory_type_t input_memory_type =
+      IREE_HAL_MEMORY_TYPE_HOST_LOCAL | IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE;
+  if (iree_status_is_ok(status)) {
+    iree_hal_buffer_view_clone_heap_buffer(
+        iree_hal_device_allocator(device), shape, IREE_ARRAYSIZE(shape),
+        IREE_HAL_ELEMENT_TYPE_FLOAT_32, input_memory_type,
+        IREE_HAL_BUFFER_USAGE_ALL,
+        iree_make_const_byte_span((void*)kFloat4,
+                                  sizeof(float) * kElementCount),
+        &arg0_buffer_view);
+  }
+  if (iree_status_is_ok(status)) {
+    iree_hal_buffer_view_clone_heap_buffer(
+        iree_hal_device_allocator(device), shape, IREE_ARRAYSIZE(shape),
+        IREE_HAL_ELEMENT_TYPE_FLOAT_32, input_memory_type,
+        IREE_HAL_BUFFER_USAGE_ALL,
+        iree_make_const_byte_span((void*)kFloat2,
+                                  sizeof(float) * kElementCount),
+        &arg1_buffer_view);
+  }
+
+  // Queue buffer views for input.
+  if (iree_status_is_ok(status)) {
+    status =
+        iree_runtime_call_inputs_push_back_buffer_view(&call, arg0_buffer_view);
+  }
+  iree_hal_buffer_view_release(arg0_buffer_view);
+
+  if (iree_status_is_ok(status)) {
+    status =
+        iree_runtime_call_inputs_push_back_buffer_view(&call, arg1_buffer_view);
+  }
+  iree_hal_buffer_view_release(arg1_buffer_view);
+
+  // Invoke call.
+  if (iree_status_is_ok(status)) {
+    status = iree_runtime_call_invoke(&call, /*flags=*/0);
+  }
+
+  // Retreive output buffer view with results from the invocation.
+  iree_hal_buffer_view_t* ret_buffer_view = NULL;
+  if (iree_status_is_ok(status)) {
+    status = iree_runtime_call_outputs_pop_front_buffer_view(&call,
+                                                             &ret_buffer_view);
+  }
+
+  // Read back the results and ensure we got the right values.
+  iree_hal_buffer_mapping_t mapped_memory = {0};
+  if (iree_status_is_ok(status)) {
+    status = iree_hal_buffer_map_range(
+        iree_hal_buffer_view_buffer(ret_buffer_view),
+        IREE_HAL_MEMORY_ACCESS_READ, 0, IREE_WHOLE_BUFFER, &mapped_memory);
+  }
+  if (iree_status_is_ok(status)) {
+    if (mapped_memory.contents.data_length / sizeof(float) != kElementCount) {
+      status = iree_make_status(IREE_STATUS_UNKNOWN,
+                                "result does not match element count ");
+    }
+  }
+  if (iree_status_is_ok(status)) {
+    const float* data = (const float*)mapped_memory.contents.data;
+    for (iree_host_size_t i = 0;
+         i < mapped_memory.contents.data_length / sizeof(float); ++i) {
+      if (data[i] != 8.0f) {
+        status = iree_make_status(IREE_STATUS_UNKNOWN, "result mismatches");
+      }
+    }
+  }
+
+  // Cleanup call and buffers.
+  iree_hal_buffer_unmap_range(&mapped_memory);
+  iree_hal_buffer_view_release(ret_buffer_view);
+  iree_runtime_call_deinitialize(&call);
+
+  // Cleanup session and instance.
+  iree_hal_device_release(device);
+  iree_runtime_session_release(session);
+  iree_runtime_instance_release(instance);
+
+  return status;
+}
+
+int main() {
+  const iree_status_t result = Run();
+  if (!iree_status_is_ok(result)) {
+    iree_status_fprint(stderr, result);
+    iree_status_free(result);
+    return -1;
+  }
+  printf("static_library_run passed\n");
+  return 0;
+}