Moving existing custom_module sample to custom_module/basic/.
diff --git a/samples/custom_module/basic/CMakeLists.txt b/samples/custom_module/basic/CMakeLists.txt
new file mode 100644
index 0000000..ab87cf4
--- /dev/null
+++ b/samples/custom_module/basic/CMakeLists.txt
@@ -0,0 +1,30 @@
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+set(_NAME "iree_samples_custom_module_basic_run")
+add_executable(${_NAME} "")
+target_sources(${_NAME}
+  PRIVATE
+    main.c
+    module.cc
+    module.h
+)
+
+set_target_properties(${_NAME} PROPERTIES OUTPUT_NAME "custom-module-basic-run")
+
+# TODO(benvanik): make iree_status_annotate_f always available as a function
+# instead of defining it empty? otherwise optimized builds of the runtime won't
+# export it but external libraries may pull it in.
+target_compile_options(${_NAME} PRIVATE ${IREE_DEFAULT_COPTS})
+
+target_link_libraries(${_NAME}
+  iree_base_base
+  iree_base_internal_file_io
+  iree_vm_vm
+  iree_vm_bytecode_module
+)
+
+add_subdirectory(test)
diff --git a/samples/custom_module/basic/Makefile b/samples/custom_module/basic/Makefile
new file mode 100644
index 0000000..9a0a630
--- /dev/null
+++ b/samples/custom_module/basic/Makefile
@@ -0,0 +1,51 @@
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# This is an example showing a basic makefile that links in the IREE runtime by
+# way of the unified static library. It's recommended that IREE is added as a
+# subproject and cmake is used to add the dependencies (as in the CMakeLists.txt
+# in this directory) but when using other build systems this is easier to adapt.
+#
+# Configure the runtime:
+#   cmake -GNinja -B ../iree-build-runtime/ . \
+#       -DCMAKE_BUILD_TYPE=MinSizeRel \
+#       -DIREE_SIZE_OPTIMIZED=ON
+# Build the runtime:
+#   cmake --build ../iree-build-runtime/ --target iree_runtime_unified
+# Make this binary:
+#   make custom-module-run-min RUNTIME_BUILD_DIR=../iree-build-runtime/
+#
+# Note that if IREE_SIZE_OPTIMIZED is used to build the runtime then the
+# -DNDEBUG and -DIREE_STATUS_MODE=0 are required on any binaries using it.
+
+RUNTIME_SRC_DIR ?= ../../runtime/src/
+RUNTIME_BUILD_DIR ?= ../../../iree-build/
+
+SRC_FILES := module.h module.cc main.c
+INCLUDE_DIRS := ${RUNTIME_SRC_DIR}
+INCLUDE_FLAGS := $(addprefix -I,${INCLUDE_DIRS})
+LIBRARY_DIRS := \
+		${RUNTIME_BUILD_DIR}/build_tools/third_party/flatcc/ \
+		${RUNTIME_BUILD_DIR}/runtime/src/iree/runtime/
+LINK_LIBRARIES := \
+    iree_runtime_unified \
+		flatcc_parsing
+LIBRARY_FLAGS := $(addprefix -L,${LIBRARY_DIRS}) $(addprefix -l,${LINK_LIBRARIES})
+CXX_FLAGS := -flto ${INCLUDE_FLAGS} ${LIBRARY_FLAGS}
+MIN_FLAGS := \
+		-s \
+		-Os \
+		-DNDEBUG \
+		-DIREE_STATUS_MODE=0
+
+all: custom-module-run custom-module-run-min
+clean:
+	rm -f custom-module-run custom-module-run-min
+
+custom-module-run: ${SRC_FILES}
+	${CXX} ${SRC_FILES} ${CXX_FLAGS} -o $@
+custom-module-run-min: ${SRC_FILES}
+	${CXX} ${SRC_FILES} ${CXX_FLAGS} ${MIN_FLAGS} -o $@
diff --git a/samples/custom_module/basic/README.md b/samples/custom_module/basic/README.md
new file mode 100644
index 0000000..77d6c28
--- /dev/null
+++ b/samples/custom_module/basic/README.md
@@ -0,0 +1,231 @@
+# Basic custom module sample
+
+This sample shows how to
+
+1. Create a custom module in C++ that can be used with the IREE runtime
+2. Author an MLIR input that uses a custom module including a custom type
+3. Compile that program to an IREE VM bytecode module
+4. Load the compiled program using a low-level VM interface
+5. Call exported functions on the loaded program to exercise the custom module
+
+The custom module is declared in [`module.h`](./module.h), implemented using a
+C++ module wrapper layer in [`module.cc`](./module.cc), and called by example in
+[`main.c`](./main.c).
+
+This document uses terminology that can be found in the documentation of
+[IREE's execution model](https://github.com/iree-org/iree/blob/main/docs/developers/design_docs/execution_model.md).
+See [IREE's extensibility mechanisms](https://iree-org.github.io/iree/extensions/)
+documentation for more information specific to extenting IREE and
+alternative approaches to doing so.
+
+## Background
+
+IREE's VM is used to dynamically link modules of various types together at
+runtime (C, C++, IREE's VM bytecode, etc). Via this mechanism any number of
+modules containing exported functions and types that can be used across modules
+can extend IREE's base functionality. In most IREE programs the HAL module is
+used to provide a hardware abstraction layer for execution and both the HAL
+module itself and the types it exposes (`!hal.buffer`, `!hal.executable`, etc)
+are implemented using this mechanism.
+
+## Instructions
+
+1. Build or install the `iree-compile` binary:
+
+    ```
+    python -m pip install iree-compiler
+    ```
+
+    [See here](https://iree-org.github.io/iree/getting-started/)
+    for general instructions on installing the compiler.
+
+3. Compile the [example module](./test/example.mlir) to a .vmfb file:
+
+    ```
+    # This simple sample doesn't use tensors and can be compiled in host-only
+    # mode to avoid the need for the HAL.
+    iree-compile --iree-execution-model=host-only samples/custom_module/basic/test/example.mlir -o=/tmp/example.vmfb
+    ```
+
+3. Build the `iree_samples_custom_module_run` CMake target :
+
+    ```
+    cmake -B ../iree-build/ -DCMAKE_BUILD_TYPE=RelWithDebInfo . \
+        -DCMAKE_C_FLAGS=-DIREE_VM_EXECUTION_TRACING_FORCE_ENABLE=1
+    cmake --build ../iree-build/ --target iree_samples_custom_module_basic_run
+    ```
+    (here we force runtime execution tracing for demonstration purposes)
+
+    [See here](https://iree-org.github.io/iree/building-from-source/getting-started/)
+    for general instructions on building using CMake.
+
+4. Run the example program to call the main function:
+
+   ```
+   ../iree-build/samples/custom_module/basic/custom-module-basic-run \
+       /tmp/example.vmfb example.main
+   ```
+
+## Defining Custom Modules in C++
+
+Modules are exposed to applications and the IREE VM via the `iree_vm_module_t`
+interface. IREE canonically uses C headers to expose module and type functions
+but the implementation of the module can be anything the user is able to work
+with (C, C++, rust, etc).
+
+A C++ wrapper is provided to ease implementation when minimal code size and overhead is not a focus and provides easy definition of exports and marshaling
+of types. Utilities such as `iree::Status` and `iree::vm::ref<T>` add safety for
+managing reference counted resources and can be used within the modules.
+
+General flow:
+
+1. Expose module via a C API ([`module.h`](./module.h)):
+
+```c
+// Ideally all allocations performed by the module should use |allocator|.
+// The returned module in |out_module| should have a ref count of 1 to transfer
+// ownership to the caller.
+iree_status_t iree_table_module_create(iree_allocator_t allocator,
+                                       iree_vm_module_t** out_module);
+```
+
+2. Implement the module using C/C++/etc ([`module.cc`](./module.cc)):
+
+Modules have two parts: a shared module and instantiated state.
+
+The `iree::vm::NativeModule` helper is used to handle the shared module
+declaration and acts as a factory for per-context instantiated state and the
+methods exported by the module:
+
+```c++
+// Any mutable state stored on the module may be accessed from multiple threads
+// if the module is instantiated in multiple contexts and must be thread-safe.
+struct TableModule final : public vm::NativeModule<TableModuleState> {
+  // Each time the module is instantiated this will be called to allocate the
+  // context-specific state. The returned state must only be thread-compatible
+  // as invocations within a context will not be made from multiple threads but
+  // the thread on which they are made may change over time; this means no TLS!
+  StatusOr<std::unique_ptr<TableModuleState>> CreateState(
+      iree_allocator_t allocator) override;
+};
+```
+
+The module implementation is done on the state object so that methods may use
+`this` to access context-local state:
+
+```c++
+struct TableModuleState final {
+  // Local to the context the module was instantiated in and thread-compatible.
+  std::unordered_map<std::string, std::string> mutable_state;
+
+  // Exported functions must return Status or StatusOr. Failures will result in
+  // program termination and will be propagated up to the top-level invoker.
+  // If a module wants to provide non-fatal errors it can return results to the
+  // program: here we return a 0/1 indicating whether the key was found as well
+  // as the result or null.
+  //
+  // MLIR declaration:
+  //   func.func private @table.lookup(!util.buffer) -> (i1, !util.buffer)
+  StatusOr<std::tuple<int32_t, vm::ref<iree_vm_buffer_t>>> Lookup(
+      const vm::ref<iree_vm_buffer_t> key);
+};
+```
+
+Finally the exported methods are registered and marshaling code is expanded:
+
+```c++
+static const vm::NativeFunction<TableModuleState> kTableModuleFunctions[] = {
+    vm::MakeNativeFunction("lookup", &TableModuleState::Lookup),
+};
+extern "C" iree_status_t iree_table_module_create(
+    iree_allocator_t allocator, iree_vm_module_t** out_module) {
+  auto module = std::make_unique<TableModule>(
+      "table", /*version=*/0, allocator,
+      iree::span<const vm::NativeFunction<CustomModuleState>>
+      (kTableModuleFunctions));
+  *out_module = module.release()->interface();
+  return iree_ok_status();
+}
+```
+
+## Registering Custom Modules at Runtime
+
+Once a custom module is defined it needs to be provided to any context that it
+is going to be used in. Each context may have its own unique mix of modules and
+it's the hosting application's responsibility to inject the available modules.
+See [`main.c`](./main.c) for an example showing the entire end-to-end lifetime
+of loading a compiled bytecode module and providing a custom module for runtime
+dynamic linking.
+
+Since modules themselves can be reused across contexts it can be a way of
+creating shared caches (requires thread-safety!) that span contexts while the
+module state is context specific and isolated.
+
+Import resolution happens in reverse registration order: the most recently
+registered modules override previous ones. This combined with optional imports
+allows overriding behavior and version compatibility shims (though there is
+still some trickiness involved).
+
+```c
+// Ensure custom types are registered before loading modules that use them.
+// This only needs to be done once per instance.
+IREE_CHECK_OK(iree_basic_custom_module_register_types(instance));
+
+// Create the custom module that can be reused across contexts.
+iree_vm_module_t* custom_module = NULL;
+IREE_CHECK_OK(iree_basic_custom_module_create(instance, allocator,
+                                              &custom_module));
+
+// Create the context for this invocation reusing the loaded modules.
+// Contexts hold isolated state and can be reused for multiple calls.
+// Note that the module order matters: the input user module is dependent on
+// the custom module.
+iree_vm_module_t* modules[] = {custom_module, bytecode_module};
+iree_vm_context_t* context = NULL;
+IREE_CHECK_OK(iree_vm_context_create_with_modules(
+    instance, IREE_VM_CONTEXT_FLAG_NONE, IREE_ARRAYSIZE(modules), modules,
+    allocator, &context));
+```
+
+## Calling Custom Modules from Compiled Programs
+
+The IREE compiler allows for external functions that are resolved at runtime
+using the [MLIR `func` dialect](https://mlir.llvm.org/docs/Dialects/Func/). Some
+optional attributes are used to allow for customization where required but in
+many cases no additional IREE-specific work is required in the compiled program.
+A few advanced features of the VM FFI are not currently exposed via this
+mechanism such as variadic arguments and tuples but the advantage is that users
+need not customize the IREE compiler in order to use their modules.
+
+Prior to passing input programs to the IREE compiler users can insert the
+imported functions as external
+[`func.func`](https://mlir.llvm.org/docs/Dialects/Func/#funcfunc-mlirfuncfuncop)
+ops and calls to those functions using
+[`func.call`](https://mlir.llvm.org/docs/Dialects/Func/#funccall-mlirfunccallop):
+
+```mlir
+// An external function declaration.
+// `custom` is the runtime module and `string.create` is the exported method.
+// This call uses both IREE types (`!util.buffer`) and custom ones not known to
+// the compiler but available at runtime (`!custom.string`).
+func.func private @custom.string.create(!util.buffer) -> !custom.string
+```
+
+```mlir
+// Call the imported function.
+%buffer = util.buffer.constant : !util.buffer = "hello world!"
+%result = func.call @custom.string.create(%buffer) : (!util.buffer) -> !custom.string
+```
+
+Users with custom dialects and ops can use
+[MLIR's dialect conversion](https://mlir.llvm.org/docs/DialectConversion/)
+framework to rewrite their custom ops to this form and perform additional
+marshaling logic. For example, the above could have started as this program
+before the user ran their dialect conversion and passed it in to `iree-compile`:
+
+```mlir
+%result = custom.string.create "hello world!" : !custom.string
+```
+
+See this samples [`example.mlir`](./test/example.mlir) for examples of features
+such as signature specification and optional import fallback support.
diff --git a/samples/custom_module/basic/main.c b/samples/custom_module/basic/main.c
new file mode 100644
index 0000000..7585033
--- /dev/null
+++ b/samples/custom_module/basic/main.c
@@ -0,0 +1,116 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <stdio.h>
+
+// Low-level IREE VM APIs.
+// The higher-level iree/runtime/api.h can be used for more complete ML-like
+// programs using the hardware abstraction layer (HAL). This simple sample just
+// uses base VM types.
+#include "iree/base/api.h"
+#include "iree/vm/api.h"
+#include "iree/vm/bytecode_module.h"
+
+// HACK: this pokes in to private APIs for IO helpers while we expect
+// applications to bring their own IO.
+#include "iree/base/internal/file_io.h"
+
+// Custom native module used in the sample.
+// Modules may be linked in from native code or other bytecode modules loaded at
+// runtime: there's no difference.
+#include "module.h"
+
+// NOTE: CHECKs are dangerous but this is a sample; a real application would
+// want to handle errors gracefully. We know in this constrained case that
+// these won't fail unless something is catastrophically wrong (out of memory,
+// solar flares, etc).
+int main(int argc, char** argv) {
+  if (argc != 3) {
+    fprintf(
+        stderr,
+        "Usage:\n"
+        "  custom-module-basic-run - <entry.point> # read from stdin\n"
+        "  custom-module-basic-run </path/to/say_hello.vmfb> <entry.point>\n");
+    fprintf(stderr, "  (See the README for this sample for details)\n ");
+    return -1;
+  }
+
+  // Internally IREE does not (in general) use malloc and instead uses the
+  // provided allocator to allocate and free memory. Applications can integrate
+  // their own allocator as-needed.
+  iree_allocator_t allocator = iree_allocator_system();
+
+  // Create the root isolated VM instance that we can create contexts within.
+  iree_vm_instance_t* instance = NULL;
+  IREE_CHECK_OK(iree_vm_instance_create(allocator, &instance));
+
+  // Ensure custom types are registered before loading modules that use them.
+  // This only needs to be done once.
+  IREE_CHECK_OK(iree_custom_module_basic_register_types(instance));
+
+  // Create the custom module that can be reused across contexts.
+  iree_vm_module_t* custom_module = NULL;
+  IREE_CHECK_OK(
+      iree_custom_module_basic_create(instance, allocator, &custom_module));
+
+  // Load the module from stdin or a file on disk.
+  // Applications can ship and load modules however they want (such as mapping
+  // them into memory instead of allocating like this). Modules can also be
+  // embedded in the binary but in those cases it makes more sense to use emitc
+  // to avoid the bytecode entirely and have a fully static build (see
+  // samples/emitc_modules/ for some examples).
+  const char* module_path = argv[1];
+  iree_file_contents_t* module_contents = NULL;
+  if (strcmp(module_path, "-") == 0) {
+    IREE_CHECK_OK(iree_stdin_read_contents(allocator, &module_contents));
+  } else {
+    IREE_CHECK_OK(
+        iree_file_read_contents(module_path, allocator, &module_contents));
+  }
+
+  // Load the bytecode module from the vmfb.
+  // This module can be reused across multiple contexts.
+  // Note that we let the module retain the file contents for as long as needed.
+  iree_vm_module_t* bytecode_module = NULL;
+  IREE_CHECK_OK(iree_vm_bytecode_module_create(
+      instance, module_contents->const_buffer,
+      iree_file_contents_deallocator(module_contents), allocator,
+      &bytecode_module));
+
+  // Create the context for this invocation reusing the loaded modules.
+  // Contexts hold isolated state and can be reused for multiple calls.
+  // Note that the module order matters: the input user module is dependent on
+  // the custom module.
+  iree_vm_module_t* modules[] = {custom_module, bytecode_module};
+  iree_vm_context_t* context = NULL;
+  IREE_CHECK_OK(iree_vm_context_create_with_modules(
+      instance, IREE_VM_CONTEXT_FLAG_NONE, IREE_ARRAYSIZE(modules), modules,
+      allocator, &context));
+
+  // Lookup the function by fully-qualified name (module.func).
+  iree_vm_function_t function;
+  IREE_CHECK_OK(iree_vm_context_resolve_function(
+      context, iree_make_cstring_view(argv[2]), &function));
+
+  fprintf(stdout, "INVOKE BEGIN %s\n", argv[2]);
+  fflush(stdout);
+
+  // Synchronously invoke the requested function.
+  // We don't pass in/out anything in these simple examples so the I/O lists
+  // are not needed.
+  IREE_CHECK_OK(iree_vm_invoke(context, function, IREE_VM_INVOCATION_FLAG_NONE,
+                               /*policy=*/NULL, /*inputs=*/NULL,
+                               /*outputs=*/NULL, allocator));
+
+  fprintf(stdout, "INVOKE END\n");
+  fflush(stdout);
+
+  iree_vm_context_release(context);
+  iree_vm_module_release(bytecode_module);
+  iree_vm_module_release(custom_module);
+  iree_vm_instance_release(instance);
+  return 0;
+}
diff --git a/samples/custom_module/basic/module.cc b/samples/custom_module/basic/module.cc
new file mode 100644
index 0000000..6ba12d8
--- /dev/null
+++ b/samples/custom_module/basic/module.cc
@@ -0,0 +1,198 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "module.h"
+
+#include <cstdio>
+
+#include "iree/base/api.h"
+#include "iree/vm/api.h"
+#include "iree/vm/native_module_cc.h"
+
+// NOTE: this module is written in C++ using the native module wrapper and uses
+// template magic to handle marshaling arguments. For a lot of uses this is a
+// much friendlier way of exposing modules to the IREE VM and if performance and
+// code size are not a concern is a fine route to take. Here we do it for
+// brevity but all of the internal IREE modules are implemented in C.
+
+//===----------------------------------------------------------------------===//
+// !custom.string type
+//===----------------------------------------------------------------------===//
+
+// Runtime type descriptor for the !custom.string describing how to manage it
+// and destroy it. The type ID is allocated at runtime and does not need to
+// match the compiler ID.
+static iree_vm_ref_type_descriptor_t iree_custom_string_descriptor = {0};
+
+// The "string" type we use to store and retain string data.
+// This could be arbitrarily complex or simply wrap another user-defined type.
+// The descriptor that is registered at startup defines how to manage the
+// lifetime of the type (such as which destruction function is called, if any).
+// See ref.h for more information and additional utilities.
+typedef struct iree_custom_string_t {
+  // Must be the first field; used to track the reference count of the object.
+  iree_vm_ref_object_t ref_object;
+  // Allocator the string data was allocated from.
+  // Ideally pools and nested allocators would be used to avoid needing to store
+  // the allocator with every object.
+  iree_allocator_t allocator;
+  // Non-NUL-terminated string value.
+  iree_string_view_t value;
+} iree_custom_string_t;
+
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_custom_string, iree_custom_string_t);
+
+extern "C" iree_status_t iree_custom_string_create(
+    iree_string_view_t value, iree_allocator_t allocator,
+    iree_custom_string_t** out_string) {
+  IREE_ASSERT_ARGUMENT(out_string);
+  // Note that we allocate the string and the string value together.
+  iree_custom_string_t* string = NULL;
+  IREE_RETURN_IF_ERROR(iree_allocator_malloc(
+      allocator, sizeof(*string) + value.size, (void**)&string));
+  string->ref_object.counter = IREE_ATOMIC_VAR_INIT(1);
+  string->allocator = allocator;
+  string->value.data = ((const char*)string) + sizeof(iree_custom_string_t);
+  string->value.size = value.size;
+  memcpy((void*)string->value.data, value.data, string->value.size);
+  *out_string = string;
+  return iree_ok_status();
+}
+
+extern "C" void iree_custom_string_destroy(void* ptr) {
+  iree_custom_string_t* string = (iree_custom_string_t*)ptr;
+  iree_allocator_free(string->allocator, ptr);
+}
+
+extern "C" iree_status_t iree_custom_module_basic_register_types(
+    iree_vm_instance_t* instance) {
+  if (iree_custom_string_descriptor.type) {
+    return iree_ok_status();  // Already registered.
+  }
+  iree_custom_string_descriptor.type_name =
+      iree_make_cstring_view("custom.string");
+  iree_custom_string_descriptor.offsetof_counter =
+      offsetof(iree_custom_string_t, ref_object.counter);
+  iree_custom_string_descriptor.destroy = iree_custom_string_destroy;
+  return iree_vm_ref_register_type(&iree_custom_string_descriptor);
+}
+
+//===----------------------------------------------------------------------===//
+// VM module interface implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+using namespace iree;
+
+// Per-context module state.
+// This can contain "globals" and other arbitrary state.
+//
+// Thread-compatible; the runtime will not issue multiple calls at the same
+// time using the same state. If the implementation uses external threads then
+// it must synchronize itself.
+class CustomModuleState final {
+ public:
+  explicit CustomModuleState(iree_allocator_t allocator)
+      : allocator_(allocator) {}
+  ~CustomModuleState() = default;
+
+  // Creates a new string with a copy of the given string data.
+  // No NUL terminator is required.
+  StatusOr<vm::ref<iree_custom_string_t>> StringCreate(
+      iree_string_view_t data) {
+    vm::ref<iree_custom_string_t> string;
+    IREE_RETURN_IF_ERROR(iree_custom_string_create(data, allocator_, &string));
+    fprintf(stdout, "CREATE %.*s\n", static_cast<int>(string->value.size),
+            string->value.data);
+    fflush(stdout);
+    return std::move(string);
+  }
+
+  // Returns the length of the string in characters.
+  StatusOr<int64_t> StringLength(const vm::ref<iree_custom_string_t> string) {
+    if (!string) {
+      // Passed in refs may be null.
+      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, "null string arg");
+    }
+    fprintf(stdout, "LENGTH %.*s = %zu\n", static_cast<int>(string->value.size),
+            string->value.data, string->value.size);
+    fflush(stdout);
+    return static_cast<int64_t>(string->value.size);
+  }
+
+  // Prints the contents of the string to stdout.
+  Status StringPrint(const vm::ref<iree_custom_string_t> string) {
+    if (!string) return OkStatus();  // no-op
+    fprintf(stdout, "PRINT %.*s\n", static_cast<int>(string->value.size),
+            string->value.data);
+    fflush(stdout);
+    return OkStatus();
+  }
+
+  // Prints the contents of the string; only exported in debug mode.
+  Status StringDPrint(const vm::ref<iree_custom_string_t> string) {
+    if (!string) return OkStatus();  // no-op
+    return StringPrint(std::move(string));
+  }
+
+ private:
+  // Allocator that the caller requested we use for any allocations we need to
+  // perform during operation.
+  iree_allocator_t allocator_ = iree_allocator_system();
+};
+
+// Function table mapping imported function names to their implementation.
+static const vm::NativeFunction<CustomModuleState> kCustomModuleFunctions[] = {
+    vm::MakeNativeFunction("string.create", &CustomModuleState::StringCreate),
+    vm::MakeNativeFunction("string.length", &CustomModuleState::StringLength),
+    vm::MakeNativeFunction("string.print", &CustomModuleState::StringPrint),
+
+#if !NDEBUG
+    // This is an optional method that we purposefully compile out in release
+    // builds to demonstrate fallback paths. Consuming modules can query as to
+    // whether the function exists at runtime and call fallbacks/change their
+    // behavior.
+    vm::MakeNativeFunction("string.dprint", &CustomModuleState::StringDPrint),
+#endif  // !NDEBUG
+};
+
+// The module instance that will be allocated and reused across contexts.
+// Any context-specific state must be stored in a state structure such as
+// CustomModuleState.
+//
+// Assumed thread-safe (by construction here, as it's immutable), though if any
+// mutable state is stored here it will need to be synchronized by the
+// implementation.
+class CustomModule final : public vm::NativeModule<CustomModuleState> {
+ public:
+  using vm::NativeModule<CustomModuleState>::NativeModule;
+
+  // Creates per-context state when the module is added to a new context.
+  // May be called from any thread.
+  StatusOr<std::unique_ptr<CustomModuleState>> CreateState(
+      iree_allocator_t allocator) override {
+    auto state = std::make_unique<CustomModuleState>(allocator);
+    return state;
+  }
+};
+
+}  // namespace
+
+// Note that while we are using C++ bindings internally we still expose the
+// module as a C instance. This hides the details of our implementation.
+extern "C" iree_status_t iree_custom_module_basic_create(
+    iree_vm_instance_t* instance, iree_allocator_t allocator,
+    iree_vm_module_t** out_module) {
+  IREE_ASSERT_ARGUMENT(out_module);
+  *out_module = NULL;
+  auto module = std::make_unique<CustomModule>(
+      "custom", /*version=*/0, instance, allocator,
+      iree::span<const vm::NativeFunction<CustomModuleState>>(
+          kCustomModuleFunctions));
+  *out_module = module.release()->interface();
+  return iree_ok_status();
+}
diff --git a/samples/custom_module/basic/module.h b/samples/custom_module/basic/module.h
new file mode 100644
index 0000000..c786f6b
--- /dev/null
+++ b/samples/custom_module/basic/module.h
@@ -0,0 +1,50 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_SAMPLES_CUSTOM_MODULE_BASIC_MODULE_H_
+#define IREE_SAMPLES_CUSTOM_MODULE_BASIC_MODULE_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/vm/api.h"
+
+// A non-NUL-terminated string.
+typedef struct iree_custom_string_t iree_custom_string_t;
+IREE_VM_DECLARE_TYPE_ADAPTERS(iree_custom_string, iree_custom_string_t);
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+// Creates a new !custom.string object with a copy of the given |value|.
+// Applications could use this and any other methods we wanted to expose to
+// interop with the loaded VM modules - such as passing in/out the objects.
+// We don't need this for the demo but creating the custom object, appending it
+// to the invocation input list, and then consuming it in the compiled module
+// is straightforward.
+iree_status_t iree_custom_string_create(iree_string_view_t value,
+                                        iree_allocator_t allocator,
+                                        iree_custom_string_t** out_string);
+
+// Registers types provided by the custom module.
+iree_status_t iree_custom_module_basic_register_types(
+    iree_vm_instance_t* instance);
+
+// Creates a native custom module that can be reused in multiple contexts.
+// The module itself may hold state that can be shared by all instantiated
+// copies but it will require the module to provide synchronization; usually
+// it's safer to just treat the module as immutable and keep state within the
+// instantiated module states instead.
+iree_status_t iree_custom_module_basic_create(iree_vm_instance_t* instance,
+                                              iree_allocator_t allocator,
+                                              iree_vm_module_t** out_module);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+
+#endif  // IREE_SAMPLES_CUSTOM_MODULE_BASIC_MODULE_H_
diff --git a/samples/custom_module/basic/test/CMakeLists.txt b/samples/custom_module/basic/test/CMakeLists.txt
new file mode 100644
index 0000000..ad7361a
--- /dev/null
+++ b/samples/custom_module/basic/test/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Copyright 2022 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+iree_lit_test_suite(
+  NAME
+    lit
+  SRCS
+    "example.mlir"
+  TOOLS
+    FileCheck
+    iree-compile
+    iree_samples_custom_module_basic_run
+  LABELS
+    "hostonly"
+)
diff --git a/samples/custom_module/basic/test/example.mlir b/samples/custom_module/basic/test/example.mlir
new file mode 100644
index 0000000..c3a4a26
--- /dev/null
+++ b/samples/custom_module/basic/test/example.mlir
@@ -0,0 +1,75 @@
+// RUN: iree-compile %s --iree-execution-model=host-only | custom-module-basic-run - example.main | FileCheck %s
+
+module @example {
+  //===--------------------------------------------------------------------===//
+  // Imports
+  //===--------------------------------------------------------------------===//
+  // External function declarations for the methods implemented in the custom
+  // module C++ file. Note that they are prefixed with the `custom.` module
+  // name.
+
+  // Creates a new string with a copy of the given string data.
+  // No NUL terminator is required.
+  func.func private @custom.string.create(!util.buffer) -> !custom.string
+
+  // Returns the length of the string in characters.
+  func.func private @custom.string.length(!custom.string) -> index attributes {
+    // Explicitly force the returned type to be i64 regardless of whether the
+    // VM is in 32 or 64 bit mode and what conversion would make index.
+    vm.signature = (!vm.ref<!custom.string>) -> i64
+  }
+
+  // Prints the contents of the string to stdout.
+  func.func private @custom.string.print(!custom.string)
+
+  // Prints the contents of the string only in debug mode and otherwise prints
+  // "optimized".
+  func.func private @custom.string.dprint(!custom.string) attributes {
+    // Indicates the import is optional and if not present the specified
+    // fallback method will be called instead.
+    vm.fallback = @custom_string_dprint
+  }
+  func.func private @custom_string_dprint(%ignored: !custom.string) {
+    // Called when the import is not available at runtime (in this case when the
+    // runtime is compiled in release mode). This is a silly example but makes
+    // it easier to test.
+    %data = util.buffer.constant : !util.buffer = "optimized"
+    %str = call @custom.string.create(%data) : (!util.buffer) -> !custom.string
+    call @custom.string.print(%str) : (!custom.string) -> ()
+    return
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Sample methods
+  //===--------------------------------------------------------------------===//
+  // Note that there can be any number of publicly-exported methods; this simple
+  // sample just has one to keep things simple.
+
+  // CHECK-LABEL: INVOKE BEGIN example.main
+  func.func @main() {
+    // Create string from a byte buffer encoding the characters.
+    %hello_data = util.buffer.constant : !util.buffer = "hello"
+    // CHECK-NEXT: CREATE hello
+    %hello_str = call @custom.string.create(%hello_data) : (!util.buffer) -> !custom.string
+
+    // Print the string to stdout.
+    // CHECK-NEXT: PRINT hello
+    call @custom.string.print(%hello_str) : (!custom.string) -> ()
+
+    // Query the length of the string.
+    // We don't do anything with it here but just demonstrate how index works.
+    // CHECK-NEXT: LENGTH hello = 5
+    %strlen = call @custom.string.length(%hello_str) : (!custom.string) -> index
+    util.optimization_barrier %strlen : index
+
+    // Print "debug" if the runtime is compiled in debug mode and otherwise
+    // prints "optimized".
+    // CHECK: PRINT {{debug|optimized}}
+    %debug_data = util.buffer.constant : !util.buffer = "debug"
+    %debug_str = call @custom.string.create(%debug_data) : (!util.buffer) -> !custom.string
+    call @custom.string.dprint(%debug_str) : (!custom.string) -> ()
+
+    return
+  }
+  // CHECK-NEXT: INVOKE END
+}