Initial embedded ELF module loader. (#5504)

Enabled with `-iree-llvm-link-embedded` and
`-iree-llvm-target-triple={any}-pc-linux-elf`.

~1000x faster than the system loader on Windows (60-100ms -> 50us)
and 64b + ELF memory usage.

Imports are not supported so it fails on any executable that ends up
using -lm (floorf, etc).
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e3d7b27..9958c07 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -20,12 +20,18 @@
 endif()
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
-project(iree CXX C)
+project(iree ASM C CXX)
 set(CMAKE_C_STANDARD 11)
 set(CMAKE_CXX_STANDARD 14)
 set(IREE_IDE_FOLDER IREE)
 set_property(GLOBAL PROPERTY USE_FOLDERS ON)
 
+if(${MSVC})
+  enable_language(ASM_MASM)
+else()
+  enable_language(ASM)
+endif()
+
 #-------------------------------------------------------------------------------
 # Project component configuration
 #-------------------------------------------------------------------------------
diff --git a/iree/base/api.h b/iree/base/api.h
index f0c1376..88ecc29 100644
--- a/iree/base/api.h
+++ b/iree/base/api.h
@@ -99,6 +99,7 @@
 #ifndef IREE_BASE_API_H_
 #define IREE_BASE_API_H_
 
+#include <assert.h>
 #include <memory.h>
 #include <stdarg.h>
 #include <stdbool.h>
diff --git a/iree/base/target_platform.h b/iree/base/target_platform.h
index 506ab47..89234e9 100644
--- a/iree/base/target_platform.h
+++ b/iree/base/target_platform.h
@@ -49,6 +49,7 @@
 // IREE_PLATFORM_ANDROID_EMULATOR
 // IREE_PLATFORM_APPLE (IOS | MACOS)
 // IREE_PLATFORM_EMSCRIPTEN
+// IREE_PLATFORM_GENERIC
 // IREE_PLATFORM_IOS
 // IREE_PLATFORM_IOS_SIMULATOR
 // IREE_PLATFORM_LINUX
@@ -243,8 +244,9 @@
 //==============================================================================
 
 #if !defined(IREE_PLATFORM_ANDROID) && !defined(IREE_PLATFORM_EMSCRIPTEN) && \
-    !defined(IREE_PLATFORM_IOS) && !defined(IREE_PLATFORM_LINUX) &&          \
-    !defined(IREE_PLATFORM_MACOS) && !defined(IREE_PLATFORM_WINDOWS)
+    !defined(IREE_PLATFORM_GENERIC) && !defined(IREE_PLATFORM_IOS) &&        \
+    !defined(IREE_PLATFORM_LINUX) && !defined(IREE_PLATFORM_MACOS) &&        \
+    !defined(IREE_PLATFORM_WINDOWS)
 #error Unknown platform.
 #endif  // all archs
 
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
index 97ab76d..629a2e5 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
@@ -305,46 +305,65 @@
       linkArtifacts.keepAllFiles();
     }
 
-    FlatbufferBuilder builder;
-    iree_DyLibExecutableDef_start_as_root(builder);
+    if (options_.linkEmbedded) {
+      // Load the linked ELF file and pack into an attr.
+      auto elfFile = linkArtifacts.libraryFile.read();
+      if (!elfFile.hasValue()) {
+        return targetOp.emitError() << "failed to read back dylib temp file at "
+                                    << linkArtifacts.libraryFile.path;
+      }
+      auto bufferAttr = DenseIntElementsAttr::get(
+          VectorType::get({static_cast<int64_t>(elfFile->size())},
+                          IntegerType::get(executableBuilder.getContext(), 8)),
+          std::move(elfFile.getValue()));
 
-    // Embed debug symbols at the end of the flatbuffer by adding first in the
-    // bottoms-up builder.
-    flatbuffers_uint8_vec_ref_t debugDatabaseRef = 0;
-    flatbuffers_string_ref_t debugDatabaseFilenameRef = 0;
-    if (options_.debugSymbols && linkArtifacts.debugFile.outputFile) {
-      debugDatabaseRef = builder.streamUint8Vec([&](raw_ostream &stream) {
-        return linkArtifacts.debugFile.readInto(stream);
-      });
-      debugDatabaseFilenameRef = builder.createString(
-          llvm::sys::path::filename(linkArtifacts.debugFile.path));
-    }
+      // Add the binary to the parent hal.executable.
+      auto executableFormatAttr = executableBuilder.getStringAttr("EX_ELF");
+      executableBuilder.create<IREE::HAL::ExecutableBinaryOp>(
+          targetOp.getLoc(), targetOp.sym_name(), executableFormatAttr,
+          bufferAttr);
+    } else {
+      FlatbufferBuilder builder;
+      iree_DyLibExecutableDef_start_as_root(builder);
 
-    // Embed entire dynamic library output.
-    flatbuffers_uint8_vec_ref_t libraryEmbeddedRef =
-        builder.streamUint8Vec([&](raw_ostream &stream) {
-          return linkArtifacts.libraryFile.readInto(stream);
+      // Embed debug symbols at the end of the flatbuffer by adding first in the
+      // bottoms-up builder.
+      flatbuffers_uint8_vec_ref_t debugDatabaseRef = 0;
+      flatbuffers_string_ref_t debugDatabaseFilenameRef = 0;
+      if (options_.debugSymbols && linkArtifacts.debugFile.outputFile) {
+        debugDatabaseRef = builder.streamUint8Vec([&](raw_ostream &stream) {
+          return linkArtifacts.debugFile.readInto(stream);
         });
-    if (!libraryEmbeddedRef) {
-      return targetOp.emitError() << "failed to read back dylib temp file at "
-                                  << linkArtifacts.libraryFile.path;
+        debugDatabaseFilenameRef = builder.createString(
+            llvm::sys::path::filename(linkArtifacts.debugFile.path));
+      }
+
+      // Embed entire dynamic library output.
+      flatbuffers_uint8_vec_ref_t libraryEmbeddedRef =
+          builder.streamUint8Vec([&](raw_ostream &stream) {
+            return linkArtifacts.libraryFile.readInto(stream);
+          });
+      if (!libraryEmbeddedRef) {
+        return targetOp.emitError() << "failed to read back dylib temp file at "
+                                    << linkArtifacts.libraryFile.path;
+      }
+
+      iree_DyLibExecutableDef_library_embedded_add(builder, libraryEmbeddedRef);
+      iree_DyLibExecutableDef_debug_database_filename_add(
+          builder, debugDatabaseFilenameRef);
+      iree_DyLibExecutableDef_debug_database_embedded_add(builder,
+                                                          debugDatabaseRef);
+      iree_DyLibExecutableDef_end_as_root(builder);
+
+      auto executableFormatAttr = targetTriple.isWasm()
+                                      ? executableBuilder.getStringAttr("WASM")
+                                      : executableBuilder.getStringAttr("DLIB");
+
+      // Add the binary data to the target executable.
+      executableBuilder.create<IREE::HAL::ExecutableBinaryOp>(
+          targetOp.getLoc(), targetOp.sym_name(), executableFormatAttr,
+          builder.getBufferAttr(executableBuilder.getContext()));
     }
-
-    iree_DyLibExecutableDef_library_embedded_add(builder, libraryEmbeddedRef);
-    iree_DyLibExecutableDef_debug_database_filename_add(
-        builder, debugDatabaseFilenameRef);
-    iree_DyLibExecutableDef_debug_database_embedded_add(builder,
-                                                        debugDatabaseRef);
-    iree_DyLibExecutableDef_end_as_root(builder);
-
-    auto executableFormatAttr = targetTriple.isWasm()
-                                    ? executableBuilder.getStringAttr("WASM")
-                                    : executableBuilder.getStringAttr("DLIB");
-
-    // Add the binary data to the target executable.
-    executableBuilder.create<IREE::HAL::ExecutableBinaryOp>(
-        targetOp.getLoc(), targetOp.sym_name(), executableFormatAttr,
-        builder.getBufferAttr(executableBuilder.getContext()));
     return success();
   }
 
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.cpp
index c8ad6d5..efac365 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.cpp
@@ -137,6 +137,13 @@
       llvm::cl::init(llvmTargetOptions.debugSymbols));
   llvmTargetOptions.debugSymbols = clDebugSymbols;
 
+  static llvm::cl::opt<bool> clLinkEmbedded(
+      "iree-llvm-link-embedded",
+      llvm::cl::desc("Links binaries into a platform-agnostic ELF to be loaded "
+                     "by the embedded IREE ELF loader"),
+      llvm::cl::init(llvmTargetOptions.linkEmbedded));
+  llvmTargetOptions.linkEmbedded = clLinkEmbedded;
+
   static llvm::cl::opt<bool> clLinkStatic(
       "iree-llvm-link-static",
       llvm::cl::desc(
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.h b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.h
index 3e85ed3..fc0db2b 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.h
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.h
@@ -49,6 +49,9 @@
   // Sanitizer Kind for CPU Kernels
   SanitizerKind sanitizerKind = SanitizerKind::kNone;
 
+  // Build for the IREE embedded platform-agnostic ELF loader.
+  bool linkEmbedded = false;
+
   // Link any required runtime libraries into the produced binaries statically.
   // This increases resulting binary size but enables the binaries to be used on
   // any machine without requiring matching system libraries to be installed.
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/internal/BUILD b/iree/compiler/Dialect/HAL/Target/LLVM/internal/BUILD
index 5c90504..bed9c4c 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/internal/BUILD
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/internal/BUILD
@@ -22,6 +22,7 @@
     name = "LinkerTools_internal",
     srcs = [
         "AndroidLinkerTool.cpp",
+        "EmbeddedLinkerTool.cpp",
         "LinkerTools.cpp",
         "RiscvLinkerTool.cpp",
         "UnixLinkerTool.cpp",
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/internal/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/LLVM/internal/CMakeLists.txt
index 4f9e79f..994d77d 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/internal/CMakeLists.txt
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/internal/CMakeLists.txt
@@ -15,6 +15,7 @@
     LinkerTools_internal
   SRCS
     "AndroidLinkerTool.cpp"
+    "EmbeddedLinkerTool.cpp"
     "LinkerTools.cpp"
     "RiscvLinkerTool.cpp"
     "UnixLinkerTool.cpp"
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/internal/EmbeddedLinkerTool.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/internal/EmbeddedLinkerTool.cpp
new file mode 100644
index 0000000..36b0e4d
--- /dev/null
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/internal/EmbeddedLinkerTool.cpp
@@ -0,0 +1,159 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "iree/compiler/Dialect/HAL/Target/LLVM/LinkerTool.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FormatVariadic.h"
+
+#define DEBUG_TYPE "llvmaot-linker"
+
+namespace mlir {
+namespace iree_compiler {
+namespace IREE {
+namespace HAL {
+
+// Embedded ELF linker targeting IREE's ELF loader (or Android/Linux).
+// This uses lld exclusively (though it can be overridden) as that lets us
+// ensure we are consistently generating ELFs such that they can be used
+// across our target platforms and with our loader.
+//
+// For consistency we follow the Linux ABI rules on all architectures and
+// limit what we allow:
+// - Triples of the form "{arch}-pc-linux-elf" only.
+// - No builtin libraries are available.
+// - Extra GNU-style symbol lookups are disabled (sysv only) to save binary
+//   size. The loader does not use any hash tables but .hash is mandatory in
+//   the spec and included for compatibility.
+// - No lazy binding; all symbols must be resolved on load.
+// - GNU_RELRO is optional but used here as we don't support lazy binding.
+//
+// We allow debug information to be included in the ELFs however we don't
+// currently have a use for it at runtime. When unstripped we can possibly feed
+// it to tools or use it ourselves to generate backtraces but since all release
+// usage should be stripped nothing relies upon it.
+class EmbeddedLinkerTool : public LinkerTool {
+ public:
+  using LinkerTool::LinkerTool;
+
+  std::string getToolPath() const override {
+    // First check for setting the linker explicitly.
+    auto toolPath = LinkerTool::getToolPath();
+    if (!toolPath.empty()) return toolPath;
+
+    // No explicit linker specified, search the environment for common tools.
+    toolPath = findToolInEnvironment({"ld.lld"});
+    if (!toolPath.empty()) return toolPath;
+
+    llvm::errs() << "LLD (ld.lld) not found on path; specify with the "
+                    "IREE_LLVMAOT_LINKER_PATH environment variable\n";
+    return "";
+  }
+
+  LogicalResult configureModule(
+      llvm::Module *llvmModule,
+      ArrayRef<llvm::Function *> exportedFuncs) override {
+    for (auto &func : *llvmModule) {
+      // Enable frame pointers to ensure that stack unwinding works.
+      func.addFnAttr("frame-pointer", "all");
+
+      // -ffreestanding-like behavior.
+      func.addFnAttr("no-builtins");
+
+      // -fno-plt - prevent PLT on calls to imports.
+      func.addFnAttr("nonlazybind");
+
+      // Our dispatches are all hot - that's kind of the point.
+      // This may favor more aggressive optimizations.
+      func.addFnAttr("hot");
+    }
+    return success();
+  }
+
+  Optional<Artifacts> linkDynamicLibrary(
+      StringRef libraryName, ArrayRef<Artifact> objectFiles) override {
+    Artifacts artifacts;
+
+    // Create the shared object name; if we only have a single input object we
+    // can just reuse that.
+    if (objectFiles.size() == 1) {
+      artifacts.libraryFile =
+          Artifact::createVariant(objectFiles.front().path, "so");
+    } else {
+      artifacts.libraryFile = Artifact::createTemporary(libraryName, "so");
+    }
+    artifacts.libraryFile.close();
+
+    SmallVector<std::string, 8> flags = {
+        getToolPath(),
+        "-o " + artifacts.libraryFile.path,
+    };
+
+    // Avoids including any libc/startup files that initialize the CRT as
+    // we don't use any of that. Our shared libraries must be freestanding.
+    flags.push_back("-nostdlib");  // -nodefaultlibs + -nostartfiles
+
+    // Statically link all dependencies so we don't have any runtime deps.
+    // We cannot have any imports in the module we produce.
+    flags.push_back("-static");
+
+    // Creating a shared library.
+    flags.push_back("-shared");
+
+    // Drop unused sections.
+    flags.push_back("--gc-sections");
+
+    // Hardening (that also makes runtime linking easier):
+    // - bind all import symbols during load
+    // - make all relocations readonly.
+    // See: https://blog.quarkslab.com/clang-hardening-cheat-sheet.html
+    flags.push_back("-z now");
+    flags.push_back("-z relro");
+
+    // Strip local symbols; we only care about the global ones for lookup.
+    // This shrinks the .symtab to a single entry.
+    flags.push_back("--discard-all");
+
+    // Use sysv .hash lookup table only; we have literally a single symbol and
+    // the .gnu.hash overhead is not worth it (either in the ELF or in the
+    // runtime loader).
+    flags.push_back("--hash-style=sysv");
+
+    // Strip debug information (only, no relocations) when not requested.
+    if (!targetOptions.debugSymbols) {
+      flags.push_back("--strip-debug");
+    }
+
+    // Link all input objects. Note that we are not linking whole-archive as
+    // we want to allow dropping of unused codegen outputs.
+    for (auto &objectFile : objectFiles) {
+      flags.push_back(objectFile.path);
+    }
+
+    auto commandLine = llvm::join(flags, " ");
+    if (failed(runLinkCommand(commandLine))) return llvm::None;
+    return artifacts;
+  }
+};
+
+std::unique_ptr<LinkerTool> createEmbeddedLinkerTool(
+    llvm::Triple &targetTriple, LLVMTargetOptions &targetOptions) {
+  return std::make_unique<EmbeddedLinkerTool>(targetTriple, targetOptions);
+}
+
+}  // namespace HAL
+}  // namespace IREE
+}  // namespace iree_compiler
+}  // namespace mlir
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/internal/LinkerTools.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/internal/LinkerTools.cpp
index 73a6f61..b987acf 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/internal/LinkerTools.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/internal/LinkerTools.cpp
@@ -24,6 +24,8 @@
 
 std::unique_ptr<LinkerTool> createAndroidLinkerTool(
     llvm::Triple &targetTriple, LLVMTargetOptions &targetOptions);
+std::unique_ptr<LinkerTool> createEmbeddedLinkerTool(
+    llvm::Triple &targetTriple, LLVMTargetOptions &targetOptions);
 std::unique_ptr<LinkerTool> createRiscvLinkerTool(
     llvm::Triple &targetTriple, LLVMTargetOptions &targetOptions);
 std::unique_ptr<LinkerTool> createUnixLinkerTool(
@@ -36,7 +38,9 @@
 // static
 std::unique_ptr<LinkerTool> LinkerTool::getForTarget(
     llvm::Triple &targetTriple, LLVMTargetOptions &targetOptions) {
-  if (targetTriple.isAndroid()) {
+  if (targetOptions.linkEmbedded) {
+    return createEmbeddedLinkerTool(targetTriple, targetOptions);
+  } else if (targetTriple.isAndroid()) {
     return createAndroidLinkerTool(targetTriple, targetOptions);
   } else if (targetTriple.isOSWindows() ||
              targetTriple.isWindowsMSVCEnvironment()) {
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/internal/UnixLinkerTool.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/internal/UnixLinkerTool.cpp
index e1ebdfc..2ebf686 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/internal/UnixLinkerTool.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/internal/UnixLinkerTool.cpp
@@ -107,7 +107,7 @@
 
     // Strip debug information (only, no relocations) when not requested.
     if (!targetOptions.debugSymbols) {
-      flags.push_back("-Wl,--strip-debug");
+      flags.push_back("--strip-debug");
     }
 
     // Link all input objects. Note that we are not linking whole-archive as
diff --git a/iree/hal/dylib/registration/BUILD b/iree/hal/dylib/registration/BUILD
index 8fbd1fa..565906a 100644
--- a/iree/hal/dylib/registration/BUILD
+++ b/iree/hal/dylib/registration/BUILD
@@ -39,6 +39,7 @@
     deps = [
         "//iree/hal:api",
         "//iree/hal/local:task_driver",
+        "//iree/hal/local/loaders:embedded_library_loader",
         "//iree/hal/local/loaders:legacy_library_loader",
         "@com_google_absl//absl/flags:flag",
     ],
diff --git a/iree/hal/dylib/registration/CMakeLists.txt b/iree/hal/dylib/registration/CMakeLists.txt
index bab9115..2b7953c 100644
--- a/iree/hal/dylib/registration/CMakeLists.txt
+++ b/iree/hal/dylib/registration/CMakeLists.txt
@@ -24,6 +24,7 @@
   DEPS
     absl::flags
     iree::hal::api
+    iree::hal::local::loaders::embedded_library_loader
     iree::hal::local::loaders::legacy_library_loader
     iree::hal::local::task_driver
   DEFINES
diff --git a/iree/hal/dylib/registration/driver_module.cc b/iree/hal/dylib/registration/driver_module.cc
index 1f0a302..f529c71 100644
--- a/iree/hal/dylib/registration/driver_module.cc
+++ b/iree/hal/dylib/registration/driver_module.cc
@@ -17,6 +17,7 @@
 #include <inttypes.h>
 
 #include "absl/flags/flag.h"
+#include "iree/hal/local/loaders/embedded_library_loader.h"
 #include "iree/hal/local/loaders/legacy_library_loader.h"
 #include "iree/hal/local/task_driver.h"
 
@@ -74,10 +75,18 @@
         &topology);
   }
 
-  iree_hal_executable_loader_t* dylib_loader = NULL;
-  iree_status_t status =
-      iree_hal_legacy_library_loader_create(allocator, &dylib_loader);
-  iree_hal_executable_loader_t* loaders[1] = {dylib_loader};
+  iree_status_t status = iree_ok_status();
+
+  iree_hal_executable_loader_t* loaders[2] = {NULL, NULL};
+  iree_host_size_t loader_count = 0;
+  if (iree_status_is_ok(status)) {
+    status = iree_hal_embedded_library_loader_create(allocator,
+                                                     &loaders[loader_count++]);
+  }
+  if (iree_status_is_ok(status)) {
+    status = iree_hal_legacy_library_loader_create(allocator,
+                                                   &loaders[loader_count++]);
+  }
 
   iree_task_executor_t* executor = NULL;
   if (iree_status_is_ok(status)) {
@@ -88,12 +97,14 @@
   if (iree_status_is_ok(status)) {
     status = iree_hal_task_driver_create(
         iree_make_cstring_view("dylib"), &default_params, executor,
-        IREE_ARRAYSIZE(loaders), loaders, allocator, out_driver);
+        loader_count, loaders, allocator, out_driver);
   }
 
   iree_task_executor_release(executor);
   iree_task_topology_deinitialize(&topology);
-  iree_hal_executable_loader_release(dylib_loader);
+  for (iree_host_size_t i = 0; i < loader_count; ++i) {
+    iree_hal_executable_loader_release(loaders[i]);
+  }
   return status;
 }
 
diff --git a/iree/hal/local/elf/BUILD b/iree/hal/local/elf/BUILD
new file mode 100644
index 0000000..228c204
--- /dev/null
+++ b/iree/hal/local/elf/BUILD
@@ -0,0 +1,95 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+package(
+    default_visibility = ["//visibility:public"],
+    features = ["layering_check"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+#===------------------------------------------------------------------------===#
+# Runtime ELF module loader/linker
+#===------------------------------------------------------------------------===#
+
+cc_library(
+    name = "elf_module",
+    srcs = [
+        "elf_module.c",
+    ],
+    hdrs = [
+        "elf_module.h",
+        "elf_types.h",
+    ],
+    deps = [
+        ":arch",
+        ":platform",
+        "//iree/base:api",
+        "//iree/base:core_headers",
+        "//iree/base:tracing",
+    ],
+)
+
+cc_test(
+    name = "elf_module_test",
+    srcs = ["elf_module_test.cc"],
+    deps = [
+        ":elf_module",
+        "//iree/base:api",
+        "//iree/base:core_headers",
+        "//iree/hal/local:executable_library",
+        "//iree/hal/local/elf/testdata:simple_mul_dispatch",
+        "//iree/testing:gtest",
+        "//iree/testing:gtest_main",
+    ],
+)
+
+#===------------------------------------------------------------------------===#
+# Architecture and platform support
+#===------------------------------------------------------------------------===#
+
+cc_library(
+    name = "arch",
+    srcs = [
+        "arch/aarch64.c",
+        "arch/riscv.c",
+        "arch/x86_64.c",
+        "elf_types.h",
+    ],
+    hdrs = [
+        "arch.h",
+    ],
+    deps = [
+        "//iree/base:api",
+        "//iree/base:core_headers",
+        "//iree/base:tracing",
+    ],
+)
+
+cc_library(
+    name = "platform",
+    srcs = [
+        "platform/apple.c",
+        "platform/generic.c",
+        "platform/linux.c",
+        "platform/windows.c",
+    ],
+    hdrs = [
+        "platform.h",
+    ],
+    deps = [
+        "//iree/base:api",
+        "//iree/base:core_headers",
+        "//iree/base:tracing",
+    ],
+)
diff --git a/iree/hal/local/elf/CMakeLists.txt b/iree/hal/local/elf/CMakeLists.txt
new file mode 100644
index 0000000..3614d27
--- /dev/null
+++ b/iree/hal/local/elf/CMakeLists.txt
@@ -0,0 +1,86 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
+# iree/hal/local/elf/BUILD                                                     #
+#                                                                              #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
+# CMake-only content.                                                          #
+#                                                                              #
+# To disable autogeneration for this file entirely, delete this header.        #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+  NAME
+    elf_module
+  HDRS
+    "elf_module.h"
+    "elf_types.h"
+  SRCS
+    "elf_module.c"
+  DEPS
+    ::arch
+    ::platform
+    iree::base::api
+    iree::base::core_headers
+    iree::base::tracing
+  PUBLIC
+)
+
+iree_cc_test(
+  NAME
+    elf_module_test
+  SRCS
+    "elf_module_test.cc"
+  DEPS
+    ::elf_module
+    iree::base::api
+    iree::base::core_headers
+    iree::hal::local::elf::testdata::simple_mul_dispatch
+    iree::hal::local::executable_library
+    iree::testing::gtest
+    iree::testing::gtest_main
+)
+
+iree_cc_library(
+  NAME
+    arch
+  HDRS
+    "arch.h"
+  SRCS
+    "arch/aarch64.c"
+    "arch/riscv.c"
+    "arch/x86_64.c"
+    "elf_types.h"
+  DEPS
+    iree::base::api
+    iree::base::core_headers
+    iree::base::tracing
+  PUBLIC
+)
+
+iree_cc_library(
+  NAME
+    platform
+  HDRS
+    "platform.h"
+  SRCS
+    "platform/apple.c"
+    "platform/generic.c"
+    "platform/linux.c"
+    "platform/windows.c"
+  DEPS
+    iree::base::api
+    iree::base::core_headers
+    iree::base::tracing
+  PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
+
+# TODO(*): figure out how to make this work on Bazel+Windows.
+if(${MSVC})
+  if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
+    target_sources(iree_hal_local_elf_arch PRIVATE "arch/x86_64_msvc.asm")
+  endif()
+endif()
diff --git a/iree/hal/local/elf/arch.h b/iree/hal/local/elf/arch.h
new file mode 100644
index 0000000..4cef2ec
--- /dev/null
+++ b/iree/hal/local/elf/arch.h
@@ -0,0 +1,64 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef IREE_HAL_LOCAL_ELF_ARCH_H_
+#define IREE_HAL_LOCAL_ELF_ARCH_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/local/elf/elf_types.h"
+
+//==============================================================================
+// ELF machine type/ABI
+//==============================================================================
+
+// Returns true if the reported ELF machine specification is valid.
+bool iree_elf_arch_is_valid(const iree_elf_ehdr_t* ehdr);
+
+//==============================================================================
+// ELF relocations
+//==============================================================================
+
+// State used during relocation.
+typedef struct {
+  // Bias applied to all relative addresses (from the string table, etc) in the
+  // loaded module. This is an offset from the vaddr_base that may not be 0 if
+  // host page granularity was larger than the ELF's defined granularity.
+  uint8_t* vaddr_bias;
+
+  // PT_DYNAMIC table.
+  iree_host_size_t dyn_table_count;
+  const iree_elf_dyn_t* dyn_table;
+} iree_elf_relocation_state_t;
+
+// Applies architecture-specific relocations.
+iree_status_t iree_elf_arch_apply_relocations(
+    iree_elf_relocation_state_t* state);
+
+//==============================================================================
+// Cross-ABI function calls
+//==============================================================================
+
+// TODO(benvanik): add thunk functions (iree_elf_thunk_*) to be used by imports
+// for marshaling from linux ABI in the ELF to host ABI.
+
+// void(*)(void)
+void iree_elf_call_v_v(const void* symbol_ptr);
+
+// void*(*)(int)
+void* iree_elf_call_p_i(const void* symbol_ptr, int a0);
+
+// int(*)(void*, void*)
+int iree_elf_call_i_pp(const void* symbol_ptr, void* a0, void* a1);
+
+#endif  // IREE_HAL_LOCAL_ELF_ARCH_H_
diff --git a/iree/hal/local/elf/arch/aarch64.c b/iree/hal/local/elf/arch/aarch64.c
new file mode 100644
index 0000000..aa6815f
--- /dev/null
+++ b/iree/hal/local/elf/arch/aarch64.c
@@ -0,0 +1,136 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "iree/base/target_platform.h"
+#include "iree/hal/local/elf/arch.h"
+
+#if defined(IREE_ARCH_ARM_64)
+
+// Documentation:
+// https://developer.arm.com/documentation/ihi0056/g/
+
+//==============================================================================
+// ELF machine type/ABI
+//==============================================================================
+
+bool iree_elf_arch_is_valid(const iree_elf_ehdr_t* ehdr) {
+  return ehdr->e_machine == 0xB7;  // EM_AARCH64 / 183
+}
+
+//==============================================================================
+// ELF relocations
+//==============================================================================
+
+enum {
+  IREE_ELF_R_AARCH64_NONE = 0,
+  IREE_ELF_R_AARCH64_ABS64 = 257,
+  IREE_ELF_R_AARCH64_GLOB_DAT = 1025,   // S + A
+  IREE_ELF_R_AARCH64_JUMP_SLOT = 1026,  // S + A
+  IREE_ELF_R_AARCH64_RELATIVE = 1027,   // Delta(S) + A
+};
+
+static iree_status_t iree_elf_arch_aarch64_apply_rela(
+    iree_elf_relocation_state_t* state, iree_host_size_t rela_count,
+    const iree_elf_rela_t* rela_table) {
+  for (iree_host_size_t i = 0; i < rela_count; ++i) {
+    const iree_elf_rela_t* rela = &rela_table[i];
+    uint32_t type = IREE_ELF_R_TYPE(rela->r_info);
+    if (type == 0) continue;
+
+    // TODO(benvanik): support imports by resolving from the import table.
+    iree_elf_addr_t sym_addr = 0;
+    if (IREE_ELF_R_SYM(rela->r_info) != 0) {
+      return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+                              "symbol-relative relocations not implemented");
+    }
+
+    iree_elf_addr_t instr_ptr =
+        (iree_elf_addr_t)state->vaddr_bias + rela->r_offset;
+    switch (type) {
+      case IREE_ELF_R_AARCH64_NONE:
+        break;
+      case IREE_ELF_R_AARCH64_ABS64:
+        *(uint64_t*)instr_ptr += (uint64_t)(sym_addr + rela->r_addend);
+        break;
+      case IREE_ELF_R_AARCH64_GLOB_DAT:
+      case IREE_ELF_R_AARCH64_JUMP_SLOT:
+        *(uint64_t*)instr_ptr = (uint64_t)(sym_addr + rela->r_addend);
+        break;
+      case IREE_ELF_R_AARCH64_RELATIVE:
+        *(uint64_t*)instr_ptr = (uint64_t)(state->vaddr_bias + rela->r_addend);
+        break;
+      default:
+        return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+                                "unimplemented aarch64 relocation type %08X",
+                                type);
+    }
+  }
+  return iree_ok_status();
+}
+
+iree_status_t iree_elf_arch_apply_relocations(
+    iree_elf_relocation_state_t* state) {
+  // Gather the relevant relocation tables.
+  iree_host_size_t rela_count = 0;
+  const iree_elf_rela_t* rela_table = NULL;
+  for (iree_host_size_t i = 0; i < state->dyn_table_count; ++i) {
+    const iree_elf_dyn_t* dyn = &state->dyn_table[i];
+    switch (dyn->d_tag) {
+      case IREE_ELF_DT_RELA:
+        rela_table =
+            (const iree_elf_rela_t*)(state->vaddr_bias + dyn->d_un.d_ptr);
+        break;
+      case IREE_ELF_DT_RELASZ:
+        rela_count = dyn->d_un.d_val / sizeof(iree_elf_rela_t);
+        break;
+
+      case IREE_ELF_DT_REL:
+      case IREE_ELF_DT_RELSZ:
+        return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+                                "unsupported DT_REL relocations");
+      default:
+        // Ignored.
+        break;
+    }
+  }
+  if (!rela_table) rela_count = 0;
+
+  if (rela_count > 0) {
+    IREE_RETURN_IF_ERROR(
+        iree_elf_arch_aarch64_apply_rela(state, rela_count, rela_table));
+  }
+
+  return iree_ok_status();
+}
+
+//==============================================================================
+// Cross-ABI function calls
+//==============================================================================
+
+void iree_elf_call_v_v(const void* symbol_ptr) {
+  typedef void (*ptr_t)(void);
+  ((ptr_t)symbol_ptr)();
+}
+
+void* iree_elf_call_p_i(const void* symbol_ptr, int a0) {
+  typedef void* (*ptr_t)(int);
+  return ((ptr_t)symbol_ptr)(a0);
+}
+
+int iree_elf_call_i_pp(const void* symbol_ptr, void* a0, void* a1) {
+  typedef int (*ptr_t)(void*, void*);
+  return ((ptr_t)symbol_ptr)(a0, a1);
+}
+
+#endif  // IREE_ARCH_ARM_64
diff --git a/iree/hal/local/elf/arch/riscv.c b/iree/hal/local/elf/arch/riscv.c
new file mode 100644
index 0000000..5114c0d
--- /dev/null
+++ b/iree/hal/local/elf/arch/riscv.c
@@ -0,0 +1,179 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "iree/base/target_platform.h"
+#include "iree/hal/local/elf/arch.h"
+
+#if defined(IREE_ARCH_RISCV_32) || defined(IREE_ARCH_RISCV_64)
+
+// Documentation:
+// https://github.com/riscv/riscv-elf-psabi-doc/blob/master/riscv-elf.md
+
+//==============================================================================
+// ELF machine type/ABI
+//==============================================================================
+
+bool iree_elf_arch_is_valid(const iree_elf_ehdr_t* ehdr) {
+  return ehdr->e_machine == 0xF3;  // EM_RISCV / 243
+}
+
+//==============================================================================
+// ELF relocations
+//==============================================================================
+
+enum {
+  IREE_ELF_R_RISCV_NONE = 0,
+  IREE_ELF_R_RISCV_32 = 1,
+  IREE_ELF_R_RISCV_64 = 2,
+  IREE_ELF_R_RISCV_RELATIVE = 3,
+  IREE_ELF_R_RISCV_COPY = 4,
+  IREE_ELF_R_RISCV_JUMP_SLOT = 5,
+};
+
+#if defined(IREE_ARCH_RISCV_32)
+static iree_status_t iree_elf_arch_riscv_apply_rela(
+    iree_elf_relocation_state_t* state, iree_host_size_t rela_count,
+    const iree_elf_rela_t* rela_table) {
+  for (iree_host_size_t i = 0; i < rela_count; ++i) {
+    const iree_elf_rela_t* rela = &rela_table[i];
+    uint32_t type = IREE_ELF_R_TYPE(rela->r_info);
+    if (type == 0) continue;
+
+    // TODO(benvanik): support imports by resolving from the import table.
+    iree_elf_addr_t sym_addr = 0;
+    if (IREE_ELF_R_SYM(rela->r_info) != 0) {
+      return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+                              "symbol-relative relocations not implemented");
+    }
+
+    iree_elf_addr_t instr_ptr =
+        (iree_elf_addr_t)state->vaddr_bias + rela->r_offset;
+    switch (type) {
+      case IREE_ELF_R_RISCV_NONE:
+        break;
+      case IREE_ELF_R_RISCV_32:
+        *(uint32_t*)instr_ptr = (uint32_t)(sym_addr + rela->r_addend);
+        break;
+      case IREE_ELF_R_RISCV_JUMP_SLOT:
+        *(uint32_t*)instr_ptr = (uint32_t)sym_addr;
+        break;
+      case IREE_ELF_R_RISCV_RELATIVE:
+        *(uint32_t*)instr_ptr = (uint32_t)(state->vaddr_bias + rela->r_addend);
+        break;
+      default:
+        return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+                                "unimplemented riscv32 relocation type %08X",
+                                type);
+    }
+  }
+  return iree_ok_status();
+}
+#else   // IREE_ARCH_RISCV_64
+static iree_status_t iree_elf_arch_riscv_apply_rela(
+    iree_elf_relocation_state_t* state, iree_host_size_t rela_count,
+    const iree_elf_rela_t* rela_table) {
+  for (iree_host_size_t i = 0; i < rela_count; ++i) {
+    const iree_elf_rela_t* rela = &rela_table[i];
+    uint32_t type = IREE_ELF_R_TYPE(rela->r_info);
+    if (type == 0) continue;
+
+    // TODO(benvanik): support imports by resolving from the import table.
+    iree_elf_addr_t sym_addr = 0;
+    if (IREE_ELF_R_SYM(rela->r_info) != 0) {
+      return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+                              "symbol-relative relocations not implemented");
+    }
+
+    iree_elf_addr_t instr_ptr =
+        (iree_elf_addr_t)state->vaddr_bias + rela->r_offset;
+    switch (type) {
+      case IREE_ELF_R_RISCV_NONE:
+        break;
+      case IREE_ELF_R_RISCV_32:
+        *(uint32_t*)instr_ptr = (uint32_t)(sym_addr + rela->r_addend);
+        break;
+      case IREE_ELF_R_RISCV_64:
+        *(uint64_t*)instr_ptr = (uint64_t)(sym_addr + rela->r_addend);
+        break;
+      case IREE_ELF_R_RISCV_JUMP_SLOT:
+        *(uint64_t*)instr_ptr = (uint64_t)sym_addr;
+        break;
+      case IREE_ELF_R_RISCV_RELATIVE:
+        *(uint64_t*)instr_ptr = (uint64_t)(state->vaddr_bias + rela->r_addend);
+        break;
+      default:
+        return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+                                "unimplemented riscv64 relocation type %08X",
+                                type);
+    }
+  }
+  return iree_ok_status();
+}
+#endif  // IREE_ARCH_RISCV_*
+
+iree_status_t iree_elf_arch_apply_relocations(
+    iree_elf_relocation_state_t* state) {
+  // Gather the relevant relocation tables.
+  iree_host_size_t rela_count = 0;
+  const iree_elf_rela_t* rela_table = NULL;
+  for (iree_host_size_t i = 0; i < state->dyn_table_count; ++i) {
+    const iree_elf_dyn_t* dyn = &state->dyn_table[i];
+    switch (dyn->d_tag) {
+      case IREE_ELF_DT_RELA:
+        rela_table =
+            (const iree_elf_rela_t*)(state->vaddr_bias + dyn->d_un.d_ptr);
+        break;
+      case IREE_ELF_DT_RELASZ:
+        rela_count = dyn->d_un.d_val / sizeof(iree_elf_rela_t);
+        break;
+
+      case IREE_ELF_DT_REL:
+      case IREE_ELF_DT_RELSZ:
+        return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+                                "unsupported DT_REL relocations");
+      default:
+        // Ignored.
+        break;
+    }
+  }
+  if (!rela_table) rela_count = 0;
+
+  if (rela_count > 0) {
+    IREE_RETURN_IF_ERROR(
+        iree_elf_arch_riscv_apply_rela(state, rela_count, rela_table));
+  }
+
+  return iree_ok_status();
+}
+
+//==============================================================================
+// Cross-ABI function calls
+//==============================================================================
+
+void iree_elf_call_v_v(const void* symbol_ptr) {
+  typedef void (*ptr_t)(void);
+  ((ptr_t)symbol_ptr)();
+}
+
+void* iree_elf_call_p_i(const void* symbol_ptr, int a0) {
+  typedef void* (*ptr_t)(int);
+  return ((ptr_t)symbol_ptr)(a0);
+}
+
+int iree_elf_call_i_pp(const void* symbol_ptr, void* a0, void* a1) {
+  typedef int (*ptr_t)(void*, void*);
+  return ((ptr_t)symbol_ptr)(a0, a1);
+}
+
+#endif  // IREE_ARCH_RISCV_*
diff --git a/iree/hal/local/elf/arch/x86_64.c b/iree/hal/local/elf/arch/x86_64.c
new file mode 100644
index 0000000..a3181fe
--- /dev/null
+++ b/iree/hal/local/elf/arch/x86_64.c
@@ -0,0 +1,203 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "iree/base/target_platform.h"
+#include "iree/hal/local/elf/arch.h"
+
+#if defined(IREE_ARCH_X86_64)
+
+// Documentation:
+// https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf
+
+//==============================================================================
+// ELF machine type/ABI
+//==============================================================================
+
+bool iree_elf_arch_is_valid(const iree_elf_ehdr_t* ehdr) {
+  return ehdr->e_machine == 0x3E;  // EM_X86_64 / 62
+}
+
+//==============================================================================
+// ELF relocations
+//==============================================================================
+
+enum {
+  IREE_ELF_R_X86_64_NONE = 0,       // No reloc
+  IREE_ELF_R_X86_64_64 = 1,         // Direct 64 bit
+  IREE_ELF_R_X86_64_PC32 = 2,       // PC relative 32 bit signed
+  IREE_ELF_R_X86_64_GOT32 = 3,      // 32 bit GOT entry
+  IREE_ELF_R_X86_64_PLT32 = 4,      // 32 bit PLT address
+  IREE_ELF_R_X86_64_COPY = 5,       // Copy symbol at runtime
+  IREE_ELF_R_X86_64_GLOB_DAT = 6,   // Create GOT entry
+  IREE_ELF_R_X86_64_JUMP_SLOT = 7,  // Create PLT entry
+  IREE_ELF_R_X86_64_RELATIVE = 8,   // Adjust by program base
+  IREE_ELF_R_X86_64_GOTPCREL = 9,   // 32 bit signed pc relative offset to GOT
+  IREE_ELF_R_X86_64_32 = 10,        // Direct 32 bit zero extended
+  IREE_ELF_R_X86_64_32S = 11,       // Direct 32 bit sign extended
+  IREE_ELF_R_X86_64_16 = 12,        // Direct 16 bit zero extended
+  IREE_ELF_R_X86_64_PC16 = 13,      // 16 bit sign extended pc relative
+  IREE_ELF_R_X86_64_8 = 14,         // Direct 8 bit sign extended
+  IREE_ELF_R_X86_64_PC8 = 15,       // 8 bit sign extended pc relative
+  IREE_ELF_R_X86_64_PC64 = 24,      // Place relative 64-bit signed
+};
+
+static iree_status_t iree_elf_arch_x86_64_apply_rela(
+    iree_elf_relocation_state_t* state, iree_host_size_t rela_count,
+    const iree_elf_rela_t* rela_table) {
+  for (iree_host_size_t i = 0; i < rela_count; ++i) {
+    const iree_elf_rela_t* rela = &rela_table[i];
+    uint32_t type = IREE_ELF_R_TYPE(rela->r_info);
+    if (type == IREE_ELF_R_X86_64_NONE) continue;
+
+    // TODO(benvanik): support imports by resolving from the import table.
+    iree_elf_addr_t sym_addr = 0;
+    if (IREE_ELF_R_SYM(rela->r_info) != 0) {
+      return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+                              "symbol-relative relocations not implemented");
+    }
+
+    iree_elf_addr_t instr_ptr =
+        (iree_elf_addr_t)state->vaddr_bias + rela->r_offset;
+    switch (type) {
+      // case IREE_ELF_R_X86_64_NONE: early-exit above
+      case IREE_ELF_R_X86_64_RELATIVE:
+        *(uint64_t*)instr_ptr = (uint64_t)(state->vaddr_bias + rela->r_addend);
+        break;
+      case IREE_ELF_R_X86_64_JUMP_SLOT:
+        *(uint64_t*)instr_ptr = (uint64_t)sym_addr;
+        break;
+      case IREE_ELF_R_X86_64_GLOB_DAT:
+        *(uint64_t*)instr_ptr = (uint64_t)sym_addr;
+        break;
+      case IREE_ELF_R_X86_64_COPY:
+        *(uint64_t*)instr_ptr = (uint64_t)sym_addr;
+        break;
+      case IREE_ELF_R_X86_64_64:
+        *(uint64_t*)instr_ptr = (uint64_t)(sym_addr + rela->r_addend);
+        break;
+      case IREE_ELF_R_X86_64_32:
+        *(uint32_t*)instr_ptr = (uint32_t)(sym_addr + rela->r_addend);
+        break;
+      case IREE_ELF_R_X86_64_32S:
+        *(int32_t*)instr_ptr = (int32_t)(sym_addr + rela->r_addend);
+        break;
+      case IREE_ELF_R_X86_64_PC32:
+        *(uint32_t*)instr_ptr =
+            (uint32_t)(sym_addr + rela->r_addend - instr_ptr);
+        break;
+      default:
+        return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+                                "unimplemented x86_64 relocation type %08X",
+                                type);
+    }
+  }
+  return iree_ok_status();
+}
+
+iree_status_t iree_elf_arch_apply_relocations(
+    iree_elf_relocation_state_t* state) {
+  // Gather the relevant relocation tables.
+  iree_host_size_t rela_count = 0;
+  const iree_elf_rela_t* rela_table = NULL;
+  iree_host_size_t plt_rela_count = 0;
+  const iree_elf_rela_t* plt_rela_table = NULL;
+  for (iree_host_size_t i = 0; i < state->dyn_table_count; ++i) {
+    const iree_elf_dyn_t* dyn = &state->dyn_table[i];
+    switch (dyn->d_tag) {
+      case IREE_ELF_DT_RELA:
+        rela_table =
+            (const iree_elf_rela_t*)(state->vaddr_bias + dyn->d_un.d_ptr);
+        break;
+      case IREE_ELF_DT_RELASZ:
+        rela_count = dyn->d_un.d_val / sizeof(iree_elf_rela_t);
+        break;
+
+      case IREE_ELF_DT_PLTREL:
+        // Type of reloc in PLT; we expect DT_RELA right now.
+        if (dyn->d_un.d_val != IREE_ELF_DT_RELA) {
+          return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+                                  "unsupported DT_PLTREL != DT_RELA");
+        }
+        break;
+      case IREE_ELF_DT_JMPREL:
+        plt_rela_table =
+            (const iree_elf_rela_t*)(state->vaddr_bias + dyn->d_un.d_ptr);
+        break;
+      case IREE_ELF_DT_PLTRELSZ:
+        plt_rela_count = dyn->d_un.d_val / sizeof(iree_elf_rela_t);
+        break;
+
+      case IREE_ELF_DT_REL:
+      case IREE_ELF_DT_RELSZ:
+        return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+                                "unsupported DT_REL relocations");
+
+      default:
+        // Ignored.
+        break;
+    }
+  }
+  if (!rela_table) rela_count = 0;
+  if (!plt_rela_table) plt_rela_count = 0;
+
+  if (rela_count > 0) {
+    IREE_RETURN_IF_ERROR(
+        iree_elf_arch_x86_64_apply_rela(state, rela_count, rela_table));
+  }
+  if (plt_rela_count > 0) {
+    IREE_RETURN_IF_ERROR(
+        iree_elf_arch_x86_64_apply_rela(state, plt_rela_count, plt_rela_table));
+  }
+
+  return iree_ok_status();
+}
+
+//==============================================================================
+// Cross-ABI function calls
+//==============================================================================
+
+// System V AMD64 ABI (used in IREE):
+// https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf
+// Arguments:
+//   RDI, RSI, RDX, RCX, R8, R9, [stack]...
+// Results:
+//   RAX, RDX
+//
+// Everything but Windows uses this convention (linux/bsd/mac/etc) and as such
+// we can just use nice little C thunks.
+
+#if defined(IREE_PLATFORM_WINDOWS)
+// Host is using the Microsoft x64 calling convention and we need to translate
+// to the System V AMD64 ABI conventions. Unfortunately MSVC does not support
+// inline assembly and we have to outline the calls in x86_64_msvc.asm.
+#else
+
+void iree_elf_call_v_v(const void* symbol_ptr) {
+  typedef void (*ptr_t)(void);
+  ((ptr_t)symbol_ptr)();
+}
+
+void* iree_elf_call_p_i(const void* symbol_ptr, int a0) {
+  typedef void* (*ptr_t)(int);
+  return ((ptr_t)symbol_ptr)(a0);
+}
+
+int iree_elf_call_i_pp(const void* symbol_ptr, void* a0, void* a1) {
+  typedef int (*ptr_t)(void*, void*);
+  return ((ptr_t)symbol_ptr)(a0, a1);
+}
+
+#endif  // IREE_PLATFORM_WINDOWS
+
+#endif  // IREE_ARCH_X86_64
diff --git a/iree/hal/local/elf/arch/x86_64_msvc.asm b/iree/hal/local/elf/arch/x86_64_msvc.asm
new file mode 100644
index 0000000..1b8ffec
--- /dev/null
+++ b/iree/hal/local/elf/arch/x86_64_msvc.asm
@@ -0,0 +1,167 @@
+; Copyright 2021 Google LLC
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+;      https://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+; Microsoft x64 calling convention:
+; https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention
+; Arguments:
+;   RCX, RDX, R8, R9, [stack]...
+; Results:
+;   RAX
+; Non-volatile:
+;   RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-XMM15
+;
+; System V AMD64 ABI (used in IREE):
+; https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf
+; Arguments:
+;   RDI, RSI, RDX, RCX, R8, R9, [stack]...
+; Results:
+;   RAX, RDX
+
+; Total size of non-volatile XMM registers.
+_SYSV_INTEROP_STACK_SIZE = 10 * 10h
+
+; Function prolog that saves registers that we may clobber while in code
+; following the SYS-V x64 ABI.
+;
+; This also encodes unwind table information (.xdata/.pdata) that is used by
+; debuggers/backtrace/etc to be able to look through the function on the stack.
+; Though they debugger will be totally confused by the function we call into
+; (it'll be expecting the Microsoft conventions and won't find them) it'll at
+; least let us see the leaf guest function instead of just a bunch of our
+; iree_elf_call_* thunks.
+; Docs suck but we are in black magic territory so it's expected:
+; https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64?view=msvc-160#unwind-helpers-for-masm
+_sysv_interop_prolog MACRO
+  ; Save volatile general purpose registers to the stack.
+  push rbp
+  .pushreg rbp
+  mov rbp, rsp
+  .setframe rbp, 0
+  push rbx
+  .pushreg rbx
+  push rdi
+  .pushreg rdi
+  push rsi
+  .pushreg rsi
+  push r12
+  .pushreg r12
+  push r13
+  .pushreg r13
+  push r14
+  .pushreg r14
+  push r15
+  .pushreg r15
+
+  ; Setup stack space for storing the SIMD registers.
+  ; NOTE: we adjust this by 8 bytes to get on a 16-byte alignment so we can
+  ; use the aligned movaps instruction.
+  sub rsp, _SYSV_INTEROP_STACK_SIZE + 8
+  .allocstack _SYSV_INTEROP_STACK_SIZE + 8
+
+  ; Save volatile SIMD registers to the stack.
+  movaps [rsp + 00h], xmm6
+  .savexmm128 xmm6, 00h
+  movaps [rsp + 10h], xmm7
+  .savexmm128 xmm7, 10h
+  movaps [rsp + 20h], xmm8
+  .savexmm128 xmm8, 20h
+  movaps [rsp + 30h], xmm9
+  .savexmm128 xmm9, 30h
+  movaps [rsp + 40h], xmm10
+  .savexmm128 xmm10, 40h
+  movaps [rsp + 50h], xmm11
+  .savexmm128 xmm11, 50h
+  movaps [rsp + 60h], xmm12
+  .savexmm128 xmm12, 60h
+  movaps [rsp + 70h], xmm13
+  .savexmm128 xmm13, 70h
+  movaps [rsp + 80h], xmm14
+  .savexmm128 xmm14, 80h
+  movaps [rsp + 90h], xmm15
+  .savexmm128 xmm15, 90h
+
+  .endprolog
+ENDM
+
+; Function epilog that restores registers that we may have clobbered while in
+; code following the SYS-V x64 ABI.
+_sysv_interop_epilog MACRO
+  ; Restore volatile SIMD registers from the stack.
+  movaps xmm6, [rsp + 00h]
+  movaps xmm7, [rsp + 10h]
+  movaps xmm8, [rsp + 20h]
+  movaps xmm9, [rsp + 30h]
+  movaps xmm10, [rsp + 40h]
+  movaps xmm11, [rsp + 50h]
+  movaps xmm12, [rsp + 60h]
+  movaps xmm13, [rsp + 70h]
+  movaps xmm14, [rsp + 80h]
+  movaps xmm15, [rsp + 90h]
+  add rsp, _SYSV_INTEROP_STACK_SIZE + 8
+
+  ; Restore volatile general purpose registers from the stack.
+  pop r15
+  pop r14
+  pop r13
+  pop r12
+  pop rsi
+  pop rdi
+  pop rbx
+  leave  ; mov rsp, rbp + pop ebp
+ENDM
+
+_TEXT SEGMENT
+ALIGN 16
+
+; void iree_elf_call_v_v(const void* symbol_ptr)
+iree_elf_call_v_v PROC FRAME
+  _sysv_interop_prolog
+
+  ; RCX = symbol_ptr
+  call rcx
+
+  _sysv_interop_epilog
+  ret
+iree_elf_call_v_v ENDP
+
+; void* iree_elf_call_p_i(const void* symbol_ptr, int a0)
+iree_elf_call_p_i PROC FRAME
+  _sysv_interop_prolog
+
+  ; RCX = symbol_ptr
+  ; RDX = a0
+  mov rdi, rdx
+  call rcx
+
+  _sysv_interop_epilog
+  ret
+iree_elf_call_p_i ENDP
+
+; int iree_elf_call_i_pp(const void* symbol_ptr, void* a0, void* a1)
+iree_elf_call_i_pp PROC FRAME
+  _sysv_interop_prolog
+
+  ; RCX = symbol_ptr
+  ; RDX = a0
+  ; R8 = a1
+  mov rdi, rdx
+  mov rsi, r8
+  call rcx
+
+  _sysv_interop_epilog
+  ret
+iree_elf_call_i_pp ENDP
+
+_TEXT ENDS
+END
diff --git a/iree/hal/local/elf/elf_module.c b/iree/hal/local/elf/elf_module.c
new file mode 100644
index 0000000..c26dd55
--- /dev/null
+++ b/iree/hal/local/elf/elf_module.c
@@ -0,0 +1,637 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "iree/hal/local/elf/elf_module.h"
+
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/local/elf/platform.h"
+
+//==============================================================================
+// Verification and section/info caching
+//==============================================================================
+
+// Fields taken from the ELF headers used only during verification and loading.
+typedef struct {
+  iree_memory_info_t memory_info;
+  const iree_elf_ehdr_t* ehdr;
+  const iree_elf_phdr_t* phdr_table;  // ehdr.e_phnum has count
+  const iree_elf_shdr_t* shdr_table;  // ehdr.e_shnum has count
+
+  const iree_elf_dyn_t* dyn_table;  // PT_DYNAMIC
+  iree_host_size_t dyn_table_count;
+
+  iree_elf_addr_t init;               // DT_INIT
+  const iree_elf_addr_t* init_array;  // DT_INIT_ARRAY
+  iree_host_size_t init_array_count;  // DT_INIT_ARRAYSZ
+} iree_elf_module_load_state_t;
+
+// Verifies the ELF file header and machine class.
+static iree_status_t iree_elf_module_verify_ehdr(
+    iree_const_byte_span_t raw_data) {
+  // Size must be larger than the header we are trying to load.
+  if (raw_data.data_length < sizeof(iree_elf_ehdr_t)) {
+    return iree_make_status(
+        IREE_STATUS_FAILED_PRECONDITION,
+        "ELF data provided (%zu) is smaller than ehdr (%zu)",
+        raw_data.data_length, sizeof(iree_elf_ehdr_t));
+  }
+
+  // Check for ELF identifier.
+  const iree_elf_ehdr_t* ehdr = (const iree_elf_ehdr_t*)raw_data.data;
+  static const iree_elf_byte_t elf_magic[4] = {0x7F, 'E', 'L', 'F'};
+  if (memcmp(ehdr->e_ident, elf_magic, sizeof(elf_magic)) != 0) {
+    return iree_make_status(
+        IREE_STATUS_FAILED_PRECONDITION,
+        "data provided does not contain the ELF identifier");
+  }
+
+  // Check critical identifier bytes before attempting to deal with any more of
+  // the header; the class determines the size of the header fields and the
+  // endianness determines how multi-byte fields are interpreted.
+
+#if defined(IREE_PTR_SIZE_32)
+  if (ehdr->e_ident[IREE_ELF_EI_CLASS] != IREE_ELF_ELFCLASS32) {
+    return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+                            "system/ELF class mismatch: expected 32-bit");
+  }
+#elif defined(IREE_PTR_SIZE_64)
+  if (ehdr->e_ident[IREE_ELF_EI_CLASS] != IREE_ELF_ELFCLASS64) {
+    return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+                            "system/ELF class mismatch: expected 64-bit");
+  }
+#endif  // IREE_PTR_SIZE_*
+
+#if defined(IREE_ENDIANNESS_LITTLE)
+  if (ehdr->e_ident[IREE_ELF_EI_DATA] != IREE_ELF_ELFDATA2LSB) {
+    return iree_make_status(
+        IREE_STATUS_FAILED_PRECONDITION,
+        "system/ELF endianness mismatch: expected little-endian");
+  }
+#else
+  if (ehdr->e_ident[IREE_ELF_EI_DATA] != IREE_ELF_ELFDATA2MSB) {
+    return iree_make_status(
+        IREE_STATUS_FAILED_PRECONDITION,
+        "system/ELF endianness mismatch: expected big-endian");
+  }
+#endif  // IREE_ENDIANNESS_*
+
+  // ELF version == EV_CURRENT (1) is all we handle.
+  // Check this before other fields as they could change meaning in other
+  // versions.
+  if (ehdr->e_version != 1) {
+    return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+                            "ELF version %u unsupported; expected 1");
+  }
+
+  // Ensure we have the right architecture compiled in.
+  if (!iree_elf_arch_is_valid(ehdr)) {
+    return iree_make_status(
+        IREE_STATUS_FAILED_PRECONDITION,
+        "ELF machine specification (%04X) does not match the "
+        "running architecture",
+        (uint32_t)ehdr->e_machine);
+  }
+
+  // We could probably support non-shared object types but no need today and it
+  // allows us to make assumptions about the sections that are present (all
+  // those marked as 'mandatory' in the spec.
+  if (ehdr->e_type != IREE_ELF_ET_DYN) {
+    return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+                            "only shared object ELFs are supported");
+  }
+
+  // Sanity checks on entity sizes - they can be larger than what we expect,
+  // but overlaying our structs onto them is not going to work if they are
+  // smaller. For now we aren't doing pointer walks based on dynamic sizes so
+  // we need equality, but if we ever have a reason to do so we could change all
+  // array-style accesses to scale out based on the ehdr values
+  if (ehdr->e_ehsize != sizeof(iree_elf_ehdr_t) ||
+      ehdr->e_phentsize != sizeof(iree_elf_phdr_t) ||
+      ehdr->e_shentsize != sizeof(iree_elf_shdr_t)) {
+    return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+                            "ELF entity size mismatch");
+  }
+
+  // Verify the phdr table properties. This doesn't validate each phdr but just
+  // ensures that the table is constructed correctly and within bounds.
+  if (ehdr->e_phoff == 0 || ehdr->e_phnum == 0 ||
+      (ehdr->e_phoff + ehdr->e_phnum * ehdr->e_phentsize) >
+          raw_data.data_length) {
+    return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+                            "invalid mandatory phdr table");
+  }
+
+  // Verify the shdr table properties.
+  if (ehdr->e_shoff == 0 || ehdr->e_shnum == 0 ||
+      (ehdr->e_shoff + ehdr->e_shnum * ehdr->e_shentsize) >
+          raw_data.data_length) {
+    return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+                            "invalid mandatory shdr table");
+  }
+
+  return iree_ok_status();
+}
+
+// Verifies the phdr table for supported types and in-bounds file references.
+static iree_status_t iree_elf_module_verify_phdr_table(
+    iree_const_byte_span_t raw_data, iree_elf_module_load_state_t* load_state) {
+  for (iree_elf_half_t i = 0; i < load_state->ehdr->e_phnum; ++i) {
+    const iree_elf_phdr_t* phdr = &load_state->phdr_table[i];
+    if (phdr->p_type != IREE_ELF_PT_LOAD) continue;
+    if (phdr->p_offset + phdr->p_filesz > raw_data.data_length) {
+      return iree_make_status(
+          IREE_STATUS_FAILED_PRECONDITION,
+          "phdr reference outside of file extents: %zu-%zu of max %zu",
+          phdr->p_offset, phdr->p_offset + phdr->p_filesz,
+          raw_data.data_length);
+    }
+  }
+  return iree_ok_status();
+}
+
+// Parses the ELF to populate fields used during loading and runtime and verify
+// that the ELF matches our very, very low expectations.
+static iree_status_t iree_elf_module_parse_headers(
+    iree_const_byte_span_t raw_data,
+    iree_elf_module_load_state_t* out_load_state,
+    iree_elf_module_t* out_module) {
+  memset(out_module, 0, sizeof(*out_module));
+  memset(out_load_state, 0, sizeof(*out_load_state));
+
+  // Query the host memory information that we can use to verify we are able to
+  // meet the alignment requirements of the ELF.
+  iree_memory_query_info(&out_load_state->memory_info);
+
+  // Verify the ELF is an ELF and that it's for the current machine.
+  // NOTE: this only verifies the ehdr is as expected and nothing else: the ELF
+  // is still untrusted and may be missing mandatory sections.
+  IREE_RETURN_IF_ERROR(iree_elf_module_verify_ehdr(raw_data));
+
+  // Get the primary tables (locations verified above).
+  const iree_elf_ehdr_t* ehdr = (const iree_elf_ehdr_t*)raw_data.data;
+  const iree_elf_phdr_t* phdr_table =
+      (const iree_elf_phdr_t*)(raw_data.data + ehdr->e_phoff);
+  const iree_elf_shdr_t* shdr_table =
+      (const iree_elf_shdr_t*)(raw_data.data + ehdr->e_shoff);
+  out_load_state->ehdr = ehdr;
+  out_load_state->phdr_table = phdr_table;
+  out_load_state->shdr_table = shdr_table;
+
+  // Verify the phdr table to ensure all bounds are in range of the file.
+  IREE_RETURN_IF_ERROR(
+      iree_elf_module_verify_phdr_table(raw_data, out_load_state));
+
+  return iree_ok_status();
+}
+
+//==============================================================================
+// Allocation and layout
+//==============================================================================
+
+// Calculates the in-memory layout of the ELF module as defined by its segments.
+// Returns a byte range representing the minimum virtual address offset of any
+// segment that can be used to offset the vaddr from the host allocation and the
+// total length of the required range. The alignment will meet the requirements
+// of the ELF but is yet unadjusted for host requirements. The range will have
+// zero length if there are no segments to load (which would be weird).
+static iree_byte_range_t iree_elf_module_calculate_vaddr_range(
+    iree_elf_module_load_state_t* load_state) {
+  // Min/max virtual addresses of any allocated segment.
+  iree_elf_addr_t vaddr_min = IREE_ELF_ADDR_MAX;
+  iree_elf_addr_t vaddr_max = IREE_ELF_ADDR_MIN;
+  for (iree_elf_half_t i = 0; i < load_state->ehdr->e_phnum; ++i) {
+    const iree_elf_phdr_t* phdr = &load_state->phdr_table[i];
+    if (phdr->p_type != IREE_ELF_PT_LOAD) continue;
+    iree_elf_addr_t p_vaddr_min =
+        iree_page_align_start(phdr->p_vaddr, phdr->p_align);
+    iree_elf_addr_t p_vaddr_max =
+        iree_page_align_end(phdr->p_vaddr + phdr->p_memsz, phdr->p_align);
+    vaddr_min = iree_min(vaddr_min, p_vaddr_min);
+    vaddr_max = iree_max(vaddr_max, p_vaddr_max);
+  }
+  if (vaddr_min == IREE_ELF_ADDR_MAX) {
+    // Did not find any segments to load.
+    vaddr_min = IREE_ELF_ADDR_MIN;
+    vaddr_max = IREE_ELF_ADDR_MIN;
+  }
+  iree_byte_range_t byte_range = {
+      .offset = (iree_host_size_t)vaddr_min,
+      .length = (iree_host_size_t)(vaddr_max - vaddr_min),
+  };
+  return byte_range;
+}
+
+// Allocates space for and loads all DT_LOAD segments into the host virtual
+// address space.
+static iree_status_t iree_elf_module_load_segments(
+    iree_const_byte_span_t raw_data, iree_elf_module_load_state_t* load_state,
+    iree_elf_module_t* module) {
+  // Calculate the total internally-aligned vaddr range.
+  iree_byte_range_t vaddr_range =
+      iree_elf_module_calculate_vaddr_range(load_state);
+
+  // Reserve virtual address space in the host memory space. This memory is
+  // uncommitted by default as the ELF may only sparsely use the address space.
+  module->vaddr_size = iree_page_align_end(
+      vaddr_range.length, load_state->memory_info.normal_page_size);
+  IREE_RETURN_IF_ERROR(iree_memory_view_reserve(
+      IREE_MEMORY_VIEW_FLAG_MAY_EXECUTE, module->vaddr_size,
+      (void**)&module->vaddr_base));
+  module->vaddr_bias = module->vaddr_base - vaddr_range.offset;
+
+  // Commit and load all of the segments.
+  for (iree_elf_half_t i = 0; i < load_state->ehdr->e_phnum; ++i) {
+    const iree_elf_phdr_t* phdr = &load_state->phdr_table[i];
+    if (phdr->p_type != IREE_ELF_PT_LOAD) continue;
+
+    // Commit the range of pages used by this segment, initially with write
+    // access so that we can modify the pages.
+    iree_byte_range_t byte_range = {
+        .offset = phdr->p_vaddr,
+        .length = phdr->p_memsz,
+    };
+    IREE_RETURN_IF_ERROR(iree_memory_view_commit_ranges(
+        module->vaddr_bias, 1, &byte_range,
+        IREE_MEMORY_ACCESS_READ | IREE_MEMORY_ACCESS_WRITE));
+
+    // Copy data present in the file.
+    // TODO(benvanik): infra for being able to detect if the source model is in
+    // a mapped file - if it is, we can remap the page and directly reference it
+    // here for read-only segments and setup copy-on-write for writeable ones.
+    // We'd need a way to pass in the underlying mapping and some guarantees on
+    // the lifetime of it. Today we are just always committing above and copying
+    // here because it keeps this all super simple (you know, as simple as an
+    // entire custom ELF loader can be :).
+    if (phdr->p_filesz > 0) {
+      memcpy(module->vaddr_bias + phdr->p_vaddr, raw_data.data + phdr->p_offset,
+             phdr->p_filesz);
+    }
+
+    // NOTE: p_memsz may be larger than p_filesz - if so, the extra memory bytes
+    // must be zeroed. We require that the initial allocation is zeroed anyway
+    // so this is a no-op.
+
+    // NOTE: the pages are still writeable; we need to apply relocations before
+    // we can go back through and remove write access from read-only/executable
+    // pages in iree_elf_module_protect_segments.
+  }
+
+  return iree_ok_status();
+}
+
+// Applies segment memory protection attributes.
+// This will make pages read-only and must only be performed after relocation
+// (which writes to pages of all types). Executable pages will be flushed from
+// the instruction cache.
+static iree_status_t iree_elf_module_protect_segments(
+    iree_elf_module_load_state_t* load_state, iree_elf_module_t* module) {
+  // PT_LOAD segments (the bulk of progbits):
+  for (iree_elf_half_t i = 0; i < load_state->ehdr->e_phnum; ++i) {
+    const iree_elf_phdr_t* phdr = &load_state->phdr_table[i];
+    if (phdr->p_type != IREE_ELF_PT_LOAD) continue;
+
+    // Interpret the access bits and widen to the implicit allowable
+    // permissions. See Table 7-37:
+    // https://docs.oracle.com/cd/E19683-01/816-1386/6m7qcoblk/index.html#chapter6-34713
+    iree_memory_access_t access = 0;
+    if (phdr->p_flags & IREE_ELF_PF_R) access |= IREE_MEMORY_ACCESS_READ;
+    if (phdr->p_flags & IREE_ELF_PF_W) access |= IREE_MEMORY_ACCESS_WRITE;
+    if (phdr->p_flags & IREE_ELF_PF_X) access |= IREE_MEMORY_ACCESS_EXECUTE;
+    if (access & IREE_MEMORY_ACCESS_WRITE) access |= IREE_MEMORY_ACCESS_READ;
+    if (access & IREE_MEMORY_ACCESS_EXECUTE) access |= IREE_MEMORY_ACCESS_READ;
+
+    // We only support R+X (no W).
+    if ((phdr->p_flags & IREE_ELF_PF_X) && (phdr->p_flags & IREE_ELF_PF_W)) {
+      return iree_make_status(IREE_STATUS_PERMISSION_DENIED,
+                              "unable to create a writable executable segment");
+    }
+
+    // Apply new access protection.
+    iree_byte_range_t byte_range = {
+        .offset = phdr->p_vaddr,
+        .length = phdr->p_memsz,
+    };
+    IREE_RETURN_IF_ERROR(iree_memory_view_protect_ranges(module->vaddr_bias, 1,
+                                                         &byte_range, access));
+
+    // Flush the instruction cache if we are going to execute these pages.
+    if (access & IREE_MEMORY_ACCESS_EXECUTE) {
+      iree_memory_view_flush_icache(module->vaddr_bias + phdr->p_vaddr,
+                                    phdr->p_memsz);
+    }
+  }
+
+  // PT_GNU_RELRO: hardening of post-relocation segments.
+  // These may alias with segments above and must be processed afterward.
+  for (iree_elf_half_t i = 0; i < load_state->ehdr->e_phnum; ++i) {
+    const iree_elf_phdr_t* phdr = &load_state->phdr_table[i];
+    if (phdr->p_type != IREE_ELF_PT_GNU_RELRO) continue;
+    iree_byte_range_t byte_range = {
+        .offset = phdr->p_vaddr,
+        .length = phdr->p_memsz,
+    };
+    IREE_RETURN_IF_ERROR(iree_memory_view_protect_ranges(
+        module->vaddr_bias, 1, &byte_range, IREE_MEMORY_ACCESS_READ));
+  }
+
+  return iree_ok_status();
+}
+
+// Unloads the ELF segments from memory and releases the host virtual address
+// space reservation.
+static void iree_elf_module_unload_segments(iree_elf_module_t* module) {
+  // Decommit/unreserve the entire memory space.
+  if (module->vaddr_base != NULL) {
+    iree_memory_view_release(module->vaddr_base, module->vaddr_size);
+  }
+  module->vaddr_base = NULL;
+  module->vaddr_bias = NULL;
+  module->vaddr_size = 0;
+}
+
+//==============================================================================
+// Dynamic library handling
+//==============================================================================
+// NOTE: this happens *after* allocation and loading as the .dynsym and related
+// segments are allocated and loaded in virtual address space.
+
+// Parses, verifies, and populates dynamic symbol related tables for runtime
+// use. These tables are all in allocated memory and use fully rebased virtual
+// addresses.
+static iree_status_t iree_elf_module_parse_dynamic_tables(
+    iree_elf_module_load_state_t* load_state, iree_elf_module_t* module) {
+  // By the spec there must only be one PT_DYNAMIC.
+  // Note that we are getting the one in the loaded virtual address space.
+  const iree_elf_dyn_t* dyn_table = NULL;
+  iree_host_size_t dyn_table_count = 0;
+  for (iree_elf_half_t i = 0; i < load_state->ehdr->e_phnum; ++i) {
+    const iree_elf_phdr_t* phdr = &load_state->phdr_table[i];
+    if (phdr->p_type == IREE_ELF_PT_DYNAMIC) {
+      dyn_table = (const iree_elf_dyn_t*)(module->vaddr_bias + phdr->p_vaddr);
+      dyn_table_count = phdr->p_filesz / sizeof(iree_elf_dyn_t);
+      break;
+    }
+  }
+  if (!dyn_table || !dyn_table_count) {
+    return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+                            "no PT_DYNAMIC/.dynamic segment");
+  }
+  load_state->dyn_table = dyn_table;
+  load_state->dyn_table_count = dyn_table_count;
+
+  for (iree_host_size_t i = 0; i < dyn_table_count; ++i) {
+    const iree_elf_dyn_t* dyn = &dyn_table[i];
+    switch (dyn->d_tag) {
+      case IREE_ELF_DT_STRTAB:
+        // .dynstr table for runtime symbol lookup.
+        module->dynstr = (const char*)(module->vaddr_bias + dyn->d_un.d_ptr);
+        break;
+      case IREE_ELF_DT_STRSZ:
+        module->dynstr_size = dyn->d_un.d_val;
+        break;
+
+      case IREE_ELF_DT_SYMTAB:
+        // .dynsym table for runtime symbol lookup.
+        module->dynsym =
+            (const iree_elf_sym_t*)(module->vaddr_bias + dyn->d_un.d_ptr);
+        break;
+      case IREE_ELF_DT_SYMENT:
+        if (dyn->d_un.d_val != sizeof(iree_elf_sym_t)) {
+          return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+                                  "DT_SYMENT size mismatch");
+        }
+        break;
+      case IREE_ELF_DT_HASH: {
+        // NOTE: we don't care about the hash table (yet), but it is the only
+        // way to get the total symbol count.
+        const iree_elf_word_t* hash =
+            (const iree_elf_word_t*)(module->vaddr_bias + dyn->d_un.d_ptr);
+        module->dynsym_count = hash[1];  // symbol count, obviously~
+        break;
+      }
+
+      case IREE_ELF_DT_INIT:
+        // .init initializer function (runs before .init_array).
+        load_state->init = dyn->d_un.d_ptr;
+        break;
+      case IREE_ELF_DT_INIT_ARRAY:
+        // .init_array list of initializer functions.
+        load_state->init_array =
+            (const iree_elf_addr_t*)(module->vaddr_bias + dyn->d_un.d_ptr);
+        break;
+      case IREE_ELF_DT_INIT_ARRAYSZ:
+        load_state->init_array_count = dyn->d_un.d_val;
+        break;
+
+      case IREE_ELF_DT_RELENT:
+        if (dyn->d_un.d_val != sizeof(iree_elf_rel_t)) {
+          return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+                                  "DT_RELENT size mismatch");
+        }
+        break;
+      case IREE_ELF_DT_RELAENT:
+        if (dyn->d_un.d_val != sizeof(iree_elf_rela_t)) {
+          return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+                                  "DT_RELAENT size mismatch");
+        }
+        break;
+
+      default:
+        // Ignored.
+        break;
+    }
+  }
+
+  // Must have .dynsym/.dynstr to perform lookups.
+  if (!module->dynstr || !module->dynstr_size || !module->dynsym ||
+      !module->dynsym_count) {
+    return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+                            "missing .dynsym/.dynstr in ELF .dynamic segment");
+  }
+
+  // NOTE: we could try to verify ranges here but no one seems to do that and
+  // it's somewhat annoying. You're loading untrusted code into your memory
+  // space - this is the least of your concerns :)
+
+  return iree_ok_status();
+}
+
+//==============================================================================
+// Relocation
+//==============================================================================
+
+// Applies symbol and address base relocations to the loaded sections.
+static iree_status_t iree_elf_module_apply_relocations(
+    iree_elf_module_load_state_t* load_state, iree_elf_module_t* module) {
+  // Redirect to the architecture-specific handler.
+  iree_elf_relocation_state_t reloc_state;
+  memset(&reloc_state, 0, sizeof(reloc_state));
+  reloc_state.vaddr_bias = module->vaddr_bias;
+  reloc_state.dyn_table = load_state->dyn_table;
+  reloc_state.dyn_table_count = load_state->dyn_table_count;
+  return iree_elf_arch_apply_relocations(&reloc_state);
+}
+
+//==============================================================================
+// Initialization/finalization
+//==============================================================================
+
+// Runs initializers defined within the module, if any.
+// .init is run first and then .init_array is run in array order.
+static iree_status_t iree_elf_module_run_initializers(
+    iree_elf_module_load_state_t* load_state, iree_elf_module_t* module) {
+  if (load_state->init != IREE_ELF_ADDR_MIN) {
+    iree_elf_call_v_v((void*)(module->vaddr_bias + load_state->init));
+  }
+
+  // NOTE: entries with values of 0 or -1 must be ignored.
+  for (iree_host_size_t i = 0; i < load_state->init_array_count; ++i) {
+    iree_elf_addr_t symbol_ptr = load_state->init_array[i];
+    if (symbol_ptr == 0 || symbol_ptr == IREE_ELF_ADDR_MAX) continue;
+    iree_elf_call_v_v((void*)(module->vaddr_bias + symbol_ptr));
+  }
+
+  return iree_ok_status();
+}
+
+static void iree_elf_module_run_finalizers(iree_elf_module_t* module) {
+  // NOT IMPLEMENTED
+  // Android doesn't do this for its loader and nothing we do should ever need
+  // them: we're not doing IO or (hopefully) anything stateful inside of our
+  // HAL executables that has correctness depend on them executing.
+}
+
+//==============================================================================
+// Symbol lookup
+//==============================================================================
+
+// Resolves a global symbol within the module by symbol name.
+// Currently we don't support any hashing as we have a single exported symbol
+// and this is a simple linear scan.
+//
+// If we start to get a few dozen then it may be worth it to implement the sysv
+// style as it is smallest both in code size and ELF binary size. This can be
+// specified using --hash-style=sysv with ld/lld. By default most linkers
+// (including lld, which is what we care about) will use
+// --hash-style=both and emit both `.hash` and `.gnu.hash`, but that's silly for
+// us as ideally we'd have none. If we ever try to use this for larger libraries
+// with many exported symbols (we shouldn't!) we can add support:
+// https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-48031.html
+// https://blogs.oracle.com/solaris/gnu-hash-elf-sections-v2
+static const iree_elf_sym_t* iree_elf_module_lookup_global_symbol(
+    iree_elf_module_t* module, const char* symbol_name) {
+  // NOTE: symtab[0] is always STN_UNDEF so we skip it.
+  // NOTE: symtab has local symbols before global ones and since we are looking
+  // for global symbols we iterate in reverse.
+  for (int i = (int)module->dynsym_count - 1; i > 0; i--) {
+    const iree_elf_sym_t* sym = &module->dynsym[i];
+    iree_elf_byte_t bind = IREE_ELF_ST_BIND(sym->st_info);
+    if (bind != IREE_ELF_STB_GLOBAL && bind != IREE_ELF_STB_WEAK) continue;
+    if (sym->st_name == 0) continue;
+    if (strcmp(module->dynstr + sym->st_name, symbol_name) == 0) {
+      return sym;
+    }
+  }
+  return NULL;
+}
+
+//==============================================================================
+// API
+//==============================================================================
+
+iree_status_t iree_elf_module_initialize_from_memory(
+    iree_const_byte_span_t raw_data,
+    const iree_elf_import_table_t* import_table,
+    iree_allocator_t host_allocator, iree_elf_module_t* out_module) {
+  IREE_ASSERT_ARGUMENT(raw_data.data);
+  IREE_ASSERT_ARGUMENT(out_module);
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  // Parse the ELF headers and verify that it's something we can handle.
+  // Temporary state required during loading such as references to subtables
+  // within the ELF are tracked here on the stack while persistent fields are
+  // initialized on |out_module|.
+  iree_elf_module_load_state_t load_state;
+  iree_status_t status =
+      iree_elf_module_parse_headers(raw_data, &load_state, out_module);
+
+  // Allocate and load the ELF into memory.
+  iree_memory_jit_context_begin();
+  if (iree_status_is_ok(status)) {
+    status = iree_elf_module_load_segments(raw_data, &load_state, out_module);
+  }
+
+  // Parse required dynamic symbol tables in loaded memory. These are used for
+  // runtime symbol resolution and relocation.
+  if (iree_status_is_ok(status)) {
+    status = iree_elf_module_parse_dynamic_tables(&load_state, out_module);
+  }
+
+  // TODO(benvanik): imports would happen here.
+
+  // Apply relocations to the loaded pages.
+  if (iree_status_is_ok(status)) {
+    status = iree_elf_module_apply_relocations(&load_state, out_module);
+  }
+
+  // Apply final protections to the loaded pages now that relocations have been
+  // performed.
+  if (iree_status_is_ok(status)) {
+    status = iree_elf_module_protect_segments(&load_state, out_module);
+  }
+  iree_memory_jit_context_end();
+
+  // Run initializers prior to returning to the caller.
+  if (iree_status_is_ok(status)) {
+    status = iree_elf_module_run_initializers(&load_state, out_module);
+  }
+
+  if (!iree_status_is_ok(status)) {
+    // On failure gracefully clean up the module by releasing any allocated
+    // memory during the partial initialization.
+    iree_elf_module_deinitialize(out_module);
+  }
+  IREE_TRACE_ZONE_END(z0);
+  return status;
+}
+
+void iree_elf_module_deinitialize(iree_elf_module_t* module) {
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  iree_elf_module_run_finalizers(module);
+  iree_elf_module_unload_segments(module);
+  memset(module, 0, sizeof(*module));
+
+  IREE_TRACE_ZONE_END(z0);
+}
+
+iree_status_t iree_elf_module_lookup_export(iree_elf_module_t* module,
+                                            const char* symbol_name,
+                                            void** out_export) {
+  IREE_ASSERT_ARGUMENT(module);
+  IREE_ASSERT_ARGUMENT(out_export);
+  *out_export = NULL;
+
+  const iree_elf_sym_t* sym =
+      iree_elf_module_lookup_global_symbol(module, symbol_name);
+  if (IREE_UNLIKELY(!sym)) {
+    return iree_make_status(
+        IREE_STATUS_NOT_FOUND,
+        "exported symbol with name '%s' not found in module", symbol_name);
+  }
+
+  *out_export = module->vaddr_bias + sym->st_value;
+  return iree_ok_status();
+}
diff --git a/iree/hal/local/elf/elf_module.h b/iree/hal/local/elf/elf_module.h
new file mode 100644
index 0000000..b7fb97b
--- /dev/null
+++ b/iree/hal/local/elf/elf_module.h
@@ -0,0 +1,95 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef IREE_HAL_LOCAL_ELF_ELF_LINKER_H_
+#define IREE_HAL_LOCAL_ELF_ELF_LINKER_H_
+
+#include "iree/base/api.h"
+#include "iree/hal/local/elf/arch.h"
+#include "iree/hal/local/elf/elf_types.h"
+
+//==============================================================================
+// ELF symbol import table
+//==============================================================================
+
+typedef struct {
+  const char* sym_name;
+  void* thunk_ptr;
+} iree_elf_import_t;
+
+typedef struct {
+  iree_host_size_t import_count;
+  const iree_elf_import_t* imports;
+} iree_elf_import_table_t;
+
+// TODO(benvanik): add import declaration macros that setup a unique thunk like
+// IREE_ELF_DEFINE_IMPORT(foo).
+
+//==============================================================================
+// Runtime ELF module loader/linker
+//==============================================================================
+
+// An ELF module mapped directly from memory.
+typedef struct {
+  // Base host virtual address the module is loaded into.
+  uint8_t* vaddr_base;
+  // Total size, in bytes, of the virtual address space reservation.
+  iree_host_size_t vaddr_size;
+
+  // Bias applied to all relative addresses (from the string table, etc) in the
+  // loaded module. This is an offset from the vaddr_base that may not be 0 if
+  // host page granularity was larger than the ELF's defined granularity.
+  uint8_t* vaddr_bias;
+
+  // Dynamic symbol string table (.dynstr).
+  const char* dynstr;            // DT_STRTAB
+  iree_host_size_t dynstr_size;  // DT_STRSZ (bytes)
+
+  // Dynamic symbol table (.dynsym).
+  const iree_elf_sym_t* dynsym;   // DT_SYMTAB
+  iree_host_size_t dynsym_count;  // DT_SYMENT (bytes) / sizeof(iree_elf_sym_t)
+} iree_elf_module_t;
+
+// Initializes an ELF module from the ELF |raw_data| in memory.
+// |raw_data| only needs to remain valid for the initialization of the module
+// and may be discarded afterward.
+//
+// An optional |import_table| may be specified to provide a set of symbols that
+// the module may import. Strong imports will not be resolved from the host
+// system and initialization will fail if any are not present in the provided
+// table.
+//
+// Upon return |out_module| is initialized and ready for use with any present
+// .init initialization functions having been executed. To release memory
+// allocated by the module during loading iree_elf_module_deinitialize must be
+// called to unload when it is safe (no more outstanding pointers into the
+// loaded module, etc).
+iree_status_t iree_elf_module_initialize_from_memory(
+    iree_const_byte_span_t raw_data,
+    const iree_elf_import_table_t* import_table,
+    iree_allocator_t host_allocator, iree_elf_module_t* out_module);
+
+// Deinitializes a |module|, releasing any allocated executable or data pages.
+// Invalidates all symbol pointers previous retrieved from the module and any
+// pointer to data that may have been in the module text or rwdata.
+//
+// NOTE: .fini finalizers will not be executed.
+void iree_elf_module_deinitialize(iree_elf_module_t* module);
+
+// Returns the host pointer of an exported symbol with the given |symbol_name|.
+iree_status_t iree_elf_module_lookup_export(iree_elf_module_t* module,
+                                            const char* symbol_name,
+                                            void** out_export);
+
+#endif  // IREE_HAL_LOCAL_ELF_ELF_LINKER_H_
diff --git a/iree/hal/local/elf/elf_module_test.cc b/iree/hal/local/elf/elf_module_test.cc
new file mode 100644
index 0000000..cde09ae
--- /dev/null
+++ b/iree/hal/local/elf/elf_module_test.cc
@@ -0,0 +1,128 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "iree/base/api.h"
+#include "iree/base/target_platform.h"
+#include "iree/hal/local/executable_library.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+extern "C" {
+#include "iree/hal/local/elf/elf_module.h"
+}  // extern "C"
+
+// ELF modules for various platforms embedded in the binary:
+#include "iree/hal/local/elf/testdata/simple_mul_dispatch.h"
+
+namespace {
+
+class ELFModuleTest : public ::testing::Test {
+ protected:
+  const iree_const_byte_span_t GetCurrentPlatformFile() {
+    iree_string_view_t pattern = iree_string_view_empty();
+#if defined(IREE_ARCH_ARM_32)
+    pattern = iree_make_cstring_view("*_armeabi.so");
+#elif defined(IREE_ARCH_ARM_64)
+    pattern = iree_make_cstring_view("*_aarch64.so");
+#elif defined(IREE_ARCH_RISCV_32)
+    pattern = iree_make_cstring_view("*_riscv32.so");
+#elif defined(IREE_ARCH_RISCV_64)
+    pattern = iree_make_cstring_view("*_riscv64.so");
+#elif defined(IREE_ARCH_X86_32)
+    pattern = iree_make_cstring_view("*_x86_32.so");
+#elif defined(IREE_ARCH_X86_64)
+    pattern = iree_make_cstring_view("*_x86_64.so");
+#else
+#warning "No architecture pattern specified; ELF linker will not be tested"
+#endif  // IREE_ARCH_*
+
+    if (!iree_string_view_is_empty(pattern)) {
+      for (size_t i = 0; i < iree::elf::simple_mul_dispatch_size(); ++i) {
+        const auto* file_toc = &iree::elf::simple_mul_dispatch_create()[i];
+        if (iree_string_view_match_pattern(
+                iree_make_cstring_view(file_toc->name), pattern)) {
+          return iree_make_const_byte_span(file_toc->data, file_toc->size);
+        }
+      }
+    }
+    return iree_make_const_byte_span(nullptr, 0);
+  }
+};
+
+TEST_F(ELFModuleTest, Check) {
+  auto file_data = GetCurrentPlatformFile();
+  if (!file_data.data_length) {
+    GTEST_SKIP() << "No ELF file built for this platform";
+    return;
+  }
+
+  iree_elf_import_table_t import_table;
+  memset(&import_table, 0, sizeof(import_table));
+  iree_elf_module_t module;
+  IREE_ASSERT_OK(iree_elf_module_initialize_from_memory(
+      file_data, &import_table, iree_allocator_system(), &module));
+
+  void* query_fn_ptr = NULL;
+  IREE_ASSERT_OK(iree_elf_module_lookup_export(
+      &module, IREE_HAL_EXECUTABLE_LIBRARY_EXPORT_NAME, &query_fn_ptr));
+
+  union {
+    const iree_hal_executable_library_header_t** header;
+    const iree_hal_executable_library_v0_t* v0;
+  } library;
+  library.header =
+      (const iree_hal_executable_library_header_t**)iree_elf_call_p_i(
+          query_fn_ptr, IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION);
+  ASSERT_TRUE(library.header != NULL);
+
+  auto* header = *library.header;
+  ASSERT_EQ(header->version, IREE_HAL_EXECUTABLE_LIBRARY_VERSION_0);
+  ASSERT_STREQ(header->name, "simple_mul_dispatch_0");
+  ASSERT_EQ(1, library.v0->entry_point_count);
+
+  float arg0[4] = {1.0f, 2.0f, 3.0f, 4.0f};
+  float arg1[4] = {100.0f, 200.0f, 300.0f, 400.0f};
+  float ret0[4] = {0.0f, 0.0f, 0.0f, 0.0f};
+  iree_device_size_t binding_lengths[3] = {
+      sizeof(arg0),
+      sizeof(arg1),
+      sizeof(ret0),
+  };
+  void* binding_ptrs[3] = {
+      arg0,
+      arg1,
+      ret0,
+  };
+
+  iree_hal_executable_dispatch_state_v0_t dispatch_state;
+  memset(&dispatch_state, 0, sizeof(dispatch_state));
+  dispatch_state.workgroup_count = {{1, 1, 1}};
+  dispatch_state.workgroup_size = {{1, 1, 1}};
+  dispatch_state.binding_count = 1;
+  dispatch_state.binding_lengths = binding_lengths;
+  dispatch_state.binding_ptrs = binding_ptrs;
+  iree_hal_vec3_t workgroup_id = {{0, 0, 0}};
+  int ret = iree_elf_call_i_pp((const void*)library.v0->entry_points[0],
+                               (void*)&dispatch_state, (void*)&workgroup_id);
+  EXPECT_EQ(0, ret);
+
+  EXPECT_EQ(ret0[0], 100.0f);
+  EXPECT_EQ(ret0[1], 400.0f);
+  EXPECT_EQ(ret0[2], 900.0f);
+  EXPECT_EQ(ret0[3], 1600.0f);
+
+  iree_elf_module_deinitialize(&module);
+}
+
+}  // namespace
diff --git a/iree/hal/local/elf/elf_types.h b/iree/hal/local/elf/elf_types.h
new file mode 100644
index 0000000..29ecf2b
--- /dev/null
+++ b/iree/hal/local/elf/elf_types.h
@@ -0,0 +1,428 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef IREE_HAL_LOCAL_ELF_ELF_TYPES_H_
+#define IREE_HAL_LOCAL_ELF_ELF_TYPES_H_
+
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/base/target_platform.h"
+
+// This file contains the ELF data structures we use in our runtime linker and
+// the definitions to support them. The structure definitions are taken from
+// the System V ABI:
+//   http://www.sco.com/developers/gabi/latest/contents.html
+// LLVM's BinaryFormat ELF headers:
+//   third_party/llvm-project/llvm/include/llvm/BinaryFormat/ELF.h
+// And the Linux specification:
+//   https://linux.die.net/man/5/elf
+//   https://refspecs.linuxbase.org/LSB_3.1.1/LSB-Core-generic/LSB-Core-generic.html
+// (among others)
+//
+// We define both 32-bit and 64-bit variants of the structures as we support
+// both; however we only ever use one at a time based on the target
+// configuration so that we are only including the code for the
+// architecture-native integer width.
+//
+// We purposefully avoid inserting a large number of enums that we never use:
+// this implementation is just to load our own compiled HAL executables and as
+// such we control both the linker configuration used to produce the inputs we
+// load.
+//
+// Code can generally be written using only the iree_elf_* types and IREE_ELF_*
+// macros; if used consistently then only one source code definition is required
+// and it'll get compiled into the appropriate form with no additional
+// configuration.
+
+typedef uint8_t iree_elf32_byte_t;
+typedef uint32_t iree_elf32_addr_t;
+typedef uint16_t iree_elf32_half_t;
+typedef uint32_t iree_elf32_off_t;
+typedef int32_t iree_elf32_sword_t;
+typedef uint32_t iree_elf32_word_t;
+
+typedef uint8_t iree_elf64_byte_t;
+typedef uint64_t iree_elf64_addr_t;
+typedef uint16_t iree_elf64_half_t;
+typedef uint64_t iree_elf64_off_t;
+typedef int32_t iree_elf64_sword_t;
+typedef uint32_t iree_elf64_word_t;
+typedef uint64_t iree_elf64_xword_t;
+typedef int64_t iree_elf64_sxword_t;
+
+enum {
+  IREE_ELF_EI_CLASS = 4,       // IREE_ELF_ELFCLASS*
+  IREE_ELF_EI_DATA = 5,        // IREE_ELF_ELFDATA*
+  IREE_ELF_EI_VERSION = 6,     // File version (1 expected)
+  IREE_ELF_EI_OSABI = 7,       // Operating system/ABI identification
+  IREE_ELF_EI_ABIVERSION = 8,  // ABI version
+  IREE_ELF_EI_PAD = 9,         // Start of padding bytes
+  IREE_ELF_EI_NIDENT = 16,     // Size of e_ident[]
+};
+
+enum {
+  IREE_ELF_ELFCLASSNONE = 0,  // Invalid class
+  IREE_ELF_ELFCLASS32 = 1,    // 32-bit objects
+  IREE_ELF_ELFCLASS64 = 2,    // 64-bit objects
+};
+
+enum {
+  IREE_ELF_ELFDATANONE = 0,  // Invalid data encoding
+  IREE_ELF_ELFDATA2LSB = 1,  // Little-endian
+  IREE_ELF_ELFDATA2MSB = 2,  // Big-endian
+};
+
+enum {
+  IREE_ELF_ET_NONE = 0,  // No file type
+  IREE_ELF_ET_REL = 1,   // Relocatable file
+  IREE_ELF_ET_EXEC = 2,  // Executable file
+  IREE_ELF_ET_DYN = 3,   // Shared object file
+  IREE_ELF_ET_CORE = 4,  // Core file
+};
+
+typedef struct {
+  iree_elf32_byte_t e_ident[IREE_ELF_EI_NIDENT];
+  iree_elf32_half_t e_type;  // IREE_ELF_ET_*
+  iree_elf32_half_t e_machine;
+  iree_elf32_word_t e_version;
+  iree_elf32_addr_t e_entry;
+  iree_elf32_off_t e_phoff;
+  iree_elf32_off_t e_shoff;
+  iree_elf32_word_t e_flags;
+  iree_elf32_half_t e_ehsize;
+  iree_elf32_half_t e_phentsize;
+  iree_elf32_half_t e_phnum;
+  iree_elf32_half_t e_shentsize;
+  iree_elf32_half_t e_shnum;
+  iree_elf32_half_t e_shstrndx;
+} iree_elf32_ehdr_t;
+
+typedef struct {
+  iree_elf64_byte_t e_ident[IREE_ELF_EI_NIDENT];
+  iree_elf64_half_t e_type;  // IREE_ELF_ET_*
+  iree_elf64_half_t e_machine;
+  iree_elf64_word_t e_version;
+  iree_elf64_addr_t e_entry;
+  iree_elf64_off_t e_phoff;
+  iree_elf64_off_t e_shoff;
+  iree_elf64_word_t e_flags;
+  iree_elf64_half_t e_ehsize;
+  iree_elf64_half_t e_phentsize;
+  iree_elf64_half_t e_phnum;
+  iree_elf64_half_t e_shentsize;
+  iree_elf64_half_t e_shnum;
+  iree_elf64_half_t e_shstrndx;
+} iree_elf64_ehdr_t;
+
+enum {
+  IREE_ELF_PT_NULL = 0,
+  IREE_ELF_PT_LOAD = 1,
+  IREE_ELF_PT_DYNAMIC = 2,
+  IREE_ELF_PT_INTERP = 3,
+  IREE_ELF_PT_NOTE = 4,
+  IREE_ELF_PT_SHLIB = 5,
+  IREE_ELF_PT_PHDR = 6,
+  IREE_ELF_PT_GNU_RELRO = 0x6474e552,
+};
+
+enum {
+  IREE_ELF_PF_X = 0x1,  // Execute
+  IREE_ELF_PF_W = 0x2,  // Write
+  IREE_ELF_PF_R = 0x4,  // Read
+};
+
+typedef struct {
+  iree_elf32_word_t p_type;  // IREE_ELF_PT_*
+  iree_elf32_off_t p_offset;
+  iree_elf32_addr_t p_vaddr;
+  iree_elf32_addr_t p_paddr;
+  iree_elf32_word_t p_filesz;
+  iree_elf32_word_t p_memsz;
+  iree_elf32_word_t p_flags;  // IREE_ELF_PF_*
+  iree_elf32_word_t p_align;
+} iree_elf32_phdr_t;
+
+typedef struct {
+  iree_elf64_word_t p_type;   // IREE_ELF_PT_*
+  iree_elf64_word_t p_flags;  // IREE_ELF_PF_*
+  iree_elf64_off_t p_offset;
+  iree_elf64_addr_t p_vaddr;
+  iree_elf64_addr_t p_paddr;
+  iree_elf64_xword_t p_filesz;
+  iree_elf64_xword_t p_memsz;
+  iree_elf64_xword_t p_align;
+} iree_elf64_phdr_t;
+
+// An undefined, missing, irrelevant, or otherwise meaningless section ref.
+#define IREE_ELF_SHN_UNDEF 0
+
+enum {
+  IREE_ELF_SHT_NULL = 0,
+  IREE_ELF_SHT_PROGBITS = 1,
+  IREE_ELF_SHT_SYMTAB = 2,
+  IREE_ELF_SHT_STRTAB = 3,
+  IREE_ELF_SHT_RELA = 4,
+  IREE_ELF_SHT_HASH = 5,
+  IREE_ELF_SHT_DYNAMIC = 6,
+  IREE_ELF_SHT_NOTE = 7,
+  IREE_ELF_SHT_NOBITS = 8,
+  IREE_ELF_SHT_REL = 9,
+  IREE_ELF_SHT_SHLIB = 10,
+  IREE_ELF_SHT_DYNSYM = 11,
+};
+
+enum {
+  IREE_ELF_SHF_WRITE = 0x1,
+  IREE_ELF_SHF_ALLOC = 0x2,
+  IREE_ELF_SHF_EXECINSTR = 0x4,
+  IREE_ELF_SHF_MERGE = 0x10,
+  IREE_ELF_SHF_STRINGS = 0x20,
+  IREE_ELF_SHF_INFO_LINK = 0x40,
+  IREE_ELF_SHF_LINK_ORDER = 0x80,
+  IREE_ELF_SHF_OS_NONCONFORMING = 0x100,
+  IREE_ELF_SHF_GROUP = 0x200
+};
+
+typedef struct {
+  iree_elf32_word_t sh_name;
+  iree_elf32_word_t sh_type;   // IREE_ELF_SHT_*
+  iree_elf32_word_t sh_flags;  // IREE_ELF_SHF_*
+  iree_elf32_addr_t sh_addr;
+  iree_elf32_off_t sh_offset;
+  iree_elf32_word_t sh_size;
+  iree_elf32_word_t sh_link;
+  iree_elf32_word_t sh_info;
+  iree_elf32_word_t sh_addralign;
+  iree_elf32_word_t sh_entsize;
+} iree_elf32_shdr_t;
+
+typedef struct {
+  iree_elf64_word_t sh_name;
+  iree_elf64_word_t sh_type;    // IREE_ELF_SHT_*
+  iree_elf64_xword_t sh_flags;  // IREE_ELF_SHF_*
+  iree_elf64_addr_t sh_addr;
+  iree_elf64_off_t sh_offset;
+  iree_elf64_xword_t sh_size;
+  iree_elf64_word_t sh_link;
+  iree_elf64_word_t sh_info;
+  iree_elf64_xword_t sh_addralign;
+  iree_elf64_xword_t sh_entsize;
+} iree_elf64_shdr_t;
+
+typedef struct {
+  iree_elf32_word_t n_namesz;
+  iree_elf32_word_t n_descsz;
+  iree_elf32_word_t n_type;
+} iree_elf32_nhdr_t;
+
+typedef struct {
+  iree_elf64_word_t n_namesz;
+  iree_elf64_word_t n_descsz;
+  iree_elf64_word_t n_type;
+} iree_elf64_nhdr_t;
+
+#define IREE_ELF_ST_INFO(bind, type) (((bind) << 4) + ((type)&0xF))
+
+#define IREE_ELF_ST_TYPE(info) ((info)&0xF)
+enum {
+  IREE_ELF_STT_NOTYPE = 0,
+  IREE_ELF_STT_OBJECT = 1,
+  IREE_ELF_STT_FUNC = 2,
+  IREE_ELF_STT_SECTION = 3,
+  IREE_ELF_STT_FILE = 4,
+  IREE_ELF_STT_COMMON = 5,
+};
+
+#define IREE_ELF_ST_BIND(info) ((info) >> 4)
+enum {
+  IREE_ELF_STB_LOCAL = 0,   // Local symbol.
+  IREE_ELF_STB_GLOBAL = 1,  // Global symbol (export).
+  IREE_ELF_STB_WEAK = 2,    // Weak symbol (somewhat like global).
+};
+
+#define IREE_ELF_ST_VISIBILITY(o) ((o)&0x3)
+enum {
+  IREE_ELF_STV_DEFAULT = 0,
+  IREE_ELF_STV_INTERNAL = 1,
+  IREE_ELF_STV_HIDDEN = 2,
+  IREE_ELF_STV_PROTECTED = 3,
+};
+
+typedef struct {
+  iree_elf32_word_t st_name;
+  iree_elf32_addr_t st_value;
+  iree_elf32_word_t st_size;
+  iree_elf32_byte_t st_info;
+  iree_elf32_byte_t st_other;
+  iree_elf32_half_t st_shndx;
+} iree_elf32_sym_t;
+
+typedef struct {
+  iree_elf64_word_t st_name;
+  iree_elf64_byte_t st_info;
+  iree_elf64_byte_t st_other;
+  iree_elf64_half_t st_shndx;
+  iree_elf64_addr_t st_value;
+  iree_elf64_xword_t st_size;
+} iree_elf64_sym_t;
+
+enum {
+  IREE_ELF_DT_NULL = 0,                   // (no data)
+  IREE_ELF_DT_NEEDED = 1,                 // d_val
+  IREE_ELF_DT_PLTRELSZ = 2,               // d_val
+  IREE_ELF_DT_PLTGOT = 3,                 // d_ptr
+  IREE_ELF_DT_HASH = 4,                   // d_ptr
+  IREE_ELF_DT_STRTAB = 5,                 // d_ptr
+  IREE_ELF_DT_SYMTAB = 6,                 // d_ptr
+  IREE_ELF_DT_RELA = 7,                   // d_ptr
+  IREE_ELF_DT_RELASZ = 8,                 // d_val
+  IREE_ELF_DT_RELAENT = 9,                // d_val
+  IREE_ELF_DT_STRSZ = 10,                 // d_val
+  IREE_ELF_DT_SYMENT = 11,                // d_val
+  IREE_ELF_DT_INIT = 12,                  // d_ptr
+  IREE_ELF_DT_FINI = 13,                  // d_ptr
+  IREE_ELF_DT_SONAME = 14,                // d_val
+  IREE_ELF_DT_RPATH = 15,                 // d_val
+  IREE_ELF_DT_SYMBOLIC = 16,              // (no data)
+  IREE_ELF_DT_REL = 17,                   // d_ptr
+  IREE_ELF_DT_RELSZ = 18,                 // d_val
+  IREE_ELF_DT_RELENT = 19,                // d_val
+  IREE_ELF_DT_PLTREL = 20,                // d_val
+  IREE_ELF_DT_TEXTREL = 22,               // (no data)
+  IREE_ELF_DT_JMPREL = 23,                // d_ptr
+  IREE_ELF_DT_BIND_NOW = 24,              // (no data)
+  IREE_ELF_DT_INIT_ARRAY = 25,            // d_ptr
+  IREE_ELF_DT_FINI_ARRAY = 26,            // d_ptr
+  IREE_ELF_DT_INIT_ARRAYSZ = 27,          // d_val
+  IREE_ELF_DT_FINI_ARRAYSZ = 28,          // d_val
+  IREE_ELF_DT_RUNPATH = 29,               // d_val
+  IREE_ELF_DT_FLAGS = 30,                 // d_val
+  IREE_ELF_DT_SUNW_RTLDINF = 0x6000000e,  // d_ptr
+  IREE_ELF_DT_CHECKSUM = 0x6ffffdf8,      // d_val
+  IREE_ELF_DT_PLTPADSZ = 0x6ffffdf9,      // d_val
+  IREE_ELF_DT_MOVEENT = 0x6ffffdfa,       // d_val
+  IREE_ELF_DT_MOVESZ = 0x6ffffdfb,        // d_val
+  IREE_ELF_DT_FEATURE_1 = 0x6ffffdfc,     // d_val
+  IREE_ELF_DT_POSFLAG_1 = 0x6ffffdfd,     // d_val
+  IREE_ELF_DT_SYMINSZ = 0x6ffffdfe,       // d_val
+  IREE_ELF_DT_SYMINENT = 0x6ffffdff,      // d_val
+  IREE_ELF_DT_CONFIG = 0x6ffffefa,        // d_ptr
+  IREE_ELF_DT_DEPAUDIT = 0x6ffffefb,      // d_ptr
+  IREE_ELF_DT_AUDIT = 0x6ffffefc,         // d_ptr
+  IREE_ELF_DT_PLTPAD = 0x6ffffefd,        // d_ptr
+  IREE_ELF_DT_MOVETAB = 0x6ffffefe,       // d_ptr
+  IREE_ELF_DT_SYMINFO = 0x6ffffeff,       // d_ptr
+  IREE_ELF_DT_RELACOUNT = 0x6ffffff9,     // d_val
+  IREE_ELF_DT_RELCOUNT = 0x6ffffffa,      // d_val
+  IREE_ELF_DT_FLAGS_1 = 0x6ffffffb,       // d_val
+  IREE_ELF_DT_VERDEF = 0x6ffffffc,        // d_ptr
+  IREE_ELF_DT_VERDEFNUM = 0x6ffffffd,     // d_val
+  IREE_ELF_DT_VERNEED = 0x6ffffffe,       // d_ptr
+  IREE_ELF_DT_VERNEEDNUM = 0x6fffffff,    // d_val
+  IREE_ELF_DT_AUXILIARY = 0x7ffffffd,     // d_val
+  IREE_ELF_DT_USED = 0x7ffffffe,          // d_val
+};
+
+typedef struct {
+  iree_elf32_sword_t d_tag;  // IREE_ELF_DT_*
+  union {
+    iree_elf32_sword_t d_val;
+    iree_elf32_addr_t d_ptr;
+  } d_un;
+} iree_elf32_dyn_t;
+
+typedef struct {
+  iree_elf64_sxword_t d_tag;  // IREE_ELF_DT_*
+  union {
+    iree_elf64_xword_t d_val;
+    iree_elf64_addr_t d_ptr;
+  } d_un;
+} iree_elf64_dyn_t;
+
+typedef struct {
+  iree_elf32_addr_t r_offset;
+  iree_elf32_word_t r_info;
+} iree_elf32_rel_t;
+
+typedef struct {
+  iree_elf64_addr_t r_offset;
+  iree_elf64_xword_t r_info;
+} iree_elf64_rel_t;
+
+typedef struct {
+  iree_elf32_addr_t r_offset;
+  iree_elf32_word_t r_info;
+  iree_elf32_sword_t r_addend;
+} iree_elf32_rela_t;
+
+typedef struct {
+  iree_elf64_addr_t r_offset;
+  iree_elf64_xword_t r_info;
+  iree_elf64_sxword_t r_addend;
+} iree_elf64_rela_t;
+
+#if defined(IREE_PTR_SIZE_32)
+
+#define IREE_ELF_ADDR_MIN 0u
+#define IREE_ELF_ADDR_MAX UINT32_MAX
+
+typedef iree_elf32_byte_t iree_elf_byte_t;
+typedef iree_elf32_addr_t iree_elf_addr_t;
+typedef iree_elf32_half_t iree_elf_half_t;
+typedef iree_elf32_off_t iree_elf_off_t;
+typedef iree_elf32_sword_t iree_elf_sword_t;
+typedef iree_elf32_word_t iree_elf_word_t;
+
+typedef iree_elf32_dyn_t iree_elf_dyn_t;
+typedef iree_elf32_rel_t iree_elf_rel_t;
+typedef iree_elf32_rela_t iree_elf_rela_t;
+typedef iree_elf32_sym_t iree_elf_sym_t;
+typedef iree_elf32_ehdr_t iree_elf_ehdr_t;
+typedef iree_elf32_phdr_t iree_elf_phdr_t;
+typedef iree_elf32_shdr_t iree_elf_shdr_t;
+typedef iree_elf32_nhdr_t iree_elf_nhdr_t;
+
+#define IREE_ELF_R_SYM(x) ((x) >> 8)
+#define IREE_ELF_R_TYPE(x) ((x)&0xFF)
+
+#elif defined(IREE_PTR_SIZE_64)
+
+#define IREE_ELF_ADDR_MIN 0ull
+#define IREE_ELF_ADDR_MAX UINT64_MAX
+
+typedef iree_elf64_byte_t iree_elf_byte_t;
+typedef iree_elf64_addr_t iree_elf_addr_t;
+typedef iree_elf64_half_t iree_elf_half_t;
+typedef iree_elf64_off_t iree_elf_off_t;
+typedef iree_elf64_sword_t iree_elf_sword_t;
+typedef iree_elf64_word_t iree_elf_word_t;
+
+typedef iree_elf64_dyn_t iree_elf_dyn_t;
+typedef iree_elf64_rel_t iree_elf_rel_t;
+typedef iree_elf64_rela_t iree_elf_rela_t;
+typedef iree_elf64_sym_t iree_elf_sym_t;
+typedef iree_elf64_ehdr_t iree_elf_ehdr_t;
+typedef iree_elf64_phdr_t iree_elf_phdr_t;
+typedef iree_elf64_shdr_t iree_elf_shdr_t;
+typedef iree_elf64_nhdr_t iree_elf_nhdr_t;
+
+#define IREE_ELF_R_SYM(i) ((i) >> 32)
+#define IREE_ELF_R_TYPE(i) ((i)&0xFFFFFFFF)
+
+#else
+#error "unsupported ELF N size (only 32/64-bits are defined)"
+#endif  // IREE_PTR_SIZE_*
+
+#endif  // IREE_HAL_LOCAL_ELF_ELF_TYPES_H_
diff --git a/iree/hal/local/elf/platform.h b/iree/hal/local/elf/platform.h
new file mode 100644
index 0000000..119a51c
--- /dev/null
+++ b/iree/hal/local/elf/platform.h
@@ -0,0 +1,165 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef IREE_HAL_LOCAL_ELF_PLATFORM_H_
+#define IREE_HAL_LOCAL_ELF_PLATFORM_H_
+
+#include "iree/base/api.h"
+
+// TODO(benvanik): move some of this to iree/base/internal/. A lot of this code
+// comes from an old partial implementation of memory objects that should be
+// finished. When done it will replace the need for all of these platform files.
+
+//==============================================================================
+// Alignment utilities
+//==============================================================================
+
+// Defines a range of bytes with any arbitrary alignment.
+// Most operations will adjust this range by the allocation granularity, meaning
+// that a range that stradles a page boundary will be specifying multiple pages
+// (such as offset=1, length=4096 with a page size of 4096 indicating 2 pages).
+typedef struct {
+  iree_host_size_t offset;
+  iree_host_size_t length;
+} iree_byte_range_t;
+
+static inline uintptr_t iree_page_align_start(uintptr_t addr,
+                                              iree_host_size_t page_alignment) {
+  return addr & (~(page_alignment - 1));
+}
+
+static inline uintptr_t iree_page_align_end(uintptr_t addr,
+                                            iree_host_size_t page_alignment) {
+  return iree_page_align_start(addr + (page_alignment - 1), page_alignment);
+}
+
+//==============================================================================
+// Memory subsystem information and control
+//==============================================================================
+
+// System platform/environment information defining memory parameters.
+// These can be used to control application behavior (such as whether to enable
+// a JIT if executable pages can be allocated) and allow callers to compute
+// memory ranges based on the variable page size of the platform.
+typedef struct {
+  // The page size and the granularity of page protection and commitment. This
+  // is the page size used by the iree_memory_view_t functions.
+  iree_host_size_t normal_page_size;
+
+  // The granularity for the starting address at which virtual memory can be
+  // allocated.
+  iree_host_size_t normal_page_granularity;
+
+  // The minimum page size and granularity for large pages or 0 if unavailable.
+  // To use large pages the size and alignment must be a multiple of this value
+  // and the IREE_MEMORY_VIEW_FLAG_LARGE_PAGES must be set.
+  iree_host_size_t large_page_granularity;
+
+  // Indicates whether executable pages may be allocated within the process.
+  // Some platforms or release environments have restrictions on whether
+  // executable pages may be allocated from user code (such as iOS).
+  bool can_allocate_executable_pages;
+} iree_memory_info_t;
+
+// Queries the system platform/environment memory information.
+// Callers should cache the results to avoid repeated queries, such as storing
+// the used fields in an allocator upon initialization to reuse during
+// allocations made via the allocator.
+void iree_memory_query_info(iree_memory_info_t* out_info);
+
+// Enter a W^X region where pages will be changed RW->RX or RX->RW and write
+// protection should be suspended. Only effects the calling thread and must be
+// paired with iree_memory_jit_context_end.
+void iree_memory_jit_context_begin();
+
+// Exits a W^X region previously entered with iree_memory_jit_context_begin.
+void iree_memory_jit_context_end();
+
+//==============================================================================
+// Virtual address space manipulation
+//==============================================================================
+
+// Defines which access operations are allowed on a view of memory.
+// Attempts to perform an access not originally allowed when the view was
+// defined may result in process termination/exceptions/sadness on platforms
+// with real MMUs and are generally not detectable: treat limited access as a
+// fail-safe mechanism only.
+typedef enum {
+  // Pages in the view may be read by the process.
+  // Some platforms may not respect this value being unset meaning that reads
+  // will still succeed.
+  IREE_MEMORY_ACCESS_READ = 1u << 0,
+  // Pages in the view may be written by the process.
+  // If unset then writes will result in process termination.
+  IREE_MEMORY_ACCESS_WRITE = 1u << 1,
+  // Pages in the view can be executed as native machine code.
+  // Callers must ensure iree_memory_info_t::can_allocate_executable_pages is
+  // true prior to requesting executable memory as certain platforms or release
+  // environments may not support allocating/using executable pages.
+  IREE_MEMORY_ACCESS_EXECUTE = 1u << 2,
+} iree_memory_access_t;
+
+// Flags used to control the behavior of allocated memory views.
+typedef enum {
+  // TODO(benvanik): pull from memory_object.h.
+  IREE_MEMORY_VIEW_FLAG_NONE = 0u,
+
+  // Indicates that the memory may be used to execute code.
+  // May be used to ask for special privileges (like MAP_JIT on MacOS).
+  IREE_MEMORY_VIEW_FLAG_MAY_EXECUTE = 1u << 10,
+} iree_memory_view_flags_t;
+
+// Reserves a range of virtual address space in the host process.
+// The base alignment will be that of the page granularity as specified
+// (normal or large) in |flags| and |total_length| will be adjusted to match.
+//
+// The resulting range at |out_base_address| will be uncommitted and
+// inaccessible on systems with memory protection. Pages within the range must
+// first be committed with iree_memory_view_commit_ranges and then may have
+// their access permissions changed with iree_memory_view_protect_ranges.
+//
+// Implemented by VirtualAlloc+MEM_RESERVE/mmap+PROT_NONE.
+iree_status_t iree_memory_view_reserve(iree_memory_view_flags_t flags,
+                                       iree_host_size_t total_length,
+                                       void** out_base_address);
+
+// Releases a range of virtual address
+void iree_memory_view_release(void* base_address,
+                              iree_host_size_t total_length);
+
+// Commits pages overlapping the byte ranges defined by |byte_ranges|.
+// Ranges will be adjusted to the page granularity of the view.
+//
+// Implemented by VirtualAlloc+MEM_COMMIT/mmap+!PROT_NONE.
+iree_status_t iree_memory_view_commit_ranges(
+    void* base_address, iree_host_size_t range_count,
+    const iree_byte_range_t* ranges, iree_memory_access_t initial_access);
+
+// Changes the access protection of view byte ranges defined by |byte_ranges|.
+// Ranges will be adjusted to the page granularity of the view.
+//
+// Implemented by VirtualProtect/mprotect:
+//  https://docs.microsoft.com/en-us/windows/win32/api/memoryapi/nf-memoryapi-virtualprotect
+//  https://man7.org/linux/man-pages/man2/mprotect.2.html
+iree_status_t iree_memory_view_protect_ranges(void* base_address,
+                                              iree_host_size_t range_count,
+                                              const iree_byte_range_t* ranges,
+                                              iree_memory_access_t new_access);
+
+// Flushes the CPU instruction cache for a given range of bytes.
+// May be a no-op depending on architecture, but must be called prior to
+// executing code from any pages that have been written during load.
+void iree_memory_view_flush_icache(void* base_address, iree_host_size_t length);
+
+#endif  // IREE_HAL_LOCAL_ELF_PLATFORM_H_
diff --git a/iree/hal/local/elf/platform/apple.c b/iree/hal/local/elf/platform/apple.c
new file mode 100644
index 0000000..78b7acb
--- /dev/null
+++ b/iree/hal/local/elf/platform/apple.c
@@ -0,0 +1,157 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/local/elf/platform.h"
+
+#if defined(IREE_PLATFORM_APPLE)
+
+// NOTE: because Apple there's some hoop-jumping to get executable code.
+// https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
+// https://keith.github.io/xcode-man-pages/pthread_jit_write_protect_np.3.html
+
+#include <errno.h>
+#include <libkern/OSCacheControl.h>
+#include <mach/vm_statistics.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+//==============================================================================
+// Memory subsystem information and control
+//==============================================================================
+
+void iree_memory_query_info(iree_memory_info_t* out_info) {
+  memset(out_info, 0, sizeof(*out_info));
+
+  int page_size = sysconf(_SC_PAGESIZE);
+  out_info->normal_page_size = page_size;
+  out_info->normal_page_granularity = page_size;
+  out_info->large_page_granularity = (2 * 1024 * 1024);  // What V8 uses.
+
+  out_info->can_allocate_executable_pages = true;
+}
+
+void iree_memory_jit_context_begin() { pthread_jit_write_protect_np(0); }
+
+void iree_memory_jit_context_end() { pthread_jit_write_protect_np(1); }
+
+//==============================================================================
+// Virtual address space manipulation
+//==============================================================================
+
+// This user tag makes it easier to find our pages in vmmap dumps.
+#define IREE_MEMORY_MMAP_FD VM_MAKE_TAG(255)
+
+static int iree_memory_access_to_prot(iree_memory_access_t access) {
+  int prot = 0;
+  if (access & IREE_MEMORY_ACCESS_READ) prot |= PROT_READ;
+  if (access & IREE_MEMORY_ACCESS_WRITE) prot |= PROT_WRITE;
+  if (access & IREE_MEMORY_ACCESS_EXECUTE) prot |= PROT_EXEC;
+  return prot;
+}
+
+iree_status_t iree_memory_view_reserve(iree_memory_view_flags_t flags,
+                                       iree_host_size_t total_length,
+                                       void** out_base_address) {
+  *out_base_address = NULL;
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  int mmap_prot = PROT_NONE;
+  int mmap_flags = MAP_PRIVATE | MAP_ANON | MAP_NORESERVE;
+  if (flags & IREE_MEMORY_VIEW_FLAG_EXECUTE) {
+    mmap_flags |= MAP_JIT;
+  }
+
+  iree_status_t status = iree_ok_status();
+  void* base_address =
+      mmap(NULL, total_length, mmap_prot, mmap_flags, IREE_MEMORY_MMAP_FD, 0);
+  if (base_address == MAP_FAILED) {
+    status = iree_make_status(iree_status_code_from_errno(errno),
+                              "mmap reservation failed");
+  }
+
+  *out_base_address = base_address;
+  IREE_TRACE_ZONE_END(z0);
+  return status;
+}
+
+void iree_memory_view_release(void* base_address,
+                              iree_host_size_t total_length) {
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  // NOTE: return value ignored as this is a shutdown path.
+  munmap(base_address, total_length);
+
+  IREE_TRACE_ZONE_END(z0);
+}
+
+iree_status_t iree_memory_view_commit_ranges(
+    void* base_address, iree_host_size_t range_count,
+    const iree_byte_range_t* ranges, iree_memory_access_t initial_access) {
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  int mmap_prot = iree_memory_access_to_prot(initial_access);
+  int mmap_flags = MAP_PRIVATE | MAP_ANON | MAP_FIXED;
+
+  iree_status_t status = iree_ok_status();
+  for (iree_host_size_t i = 0; i < range_count; ++i) {
+    void* range_start = (void*)iree_page_align_start(
+        (uintptr_t)base_address + ranges[i].offset, getpagesize());
+    void* result = mmap(range_start, ranges[i].length, mmap_prot, mmap_flags,
+                        IREE_MEMORY_MMAP_FD, 0);
+    if (result == MAP_FAILED) {
+      status = iree_make_status(iree_status_code_from_errno(errno),
+                                "mmap commit failed");
+      break;
+    }
+  }
+
+  IREE_TRACE_ZONE_END(z0);
+  return status;
+}
+
+iree_status_t iree_memory_view_protect_ranges(void* base_address,
+                                              iree_host_size_t range_count,
+                                              const iree_byte_range_t* ranges,
+                                              iree_memory_access_t new_access) {
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  int mmap_prot = iree_memory_access_to_prot(new_access);
+
+  iree_status_t status = iree_ok_status();
+  for (iree_host_size_t i = 0; i < range_count; ++i) {
+    void* range_start = (void*)iree_page_align_start(
+        (uintptr_t)base_address + ranges[i].offset, getpagesize());
+    int ret = mprotect(range_start, ranges[i].length, mmap_prot);
+    if (ret != 0) {
+      status = iree_make_status(iree_status_code_from_errno(errno),
+                                "mprotect failed");
+      break;
+    }
+  }
+
+  IREE_TRACE_ZONE_END(z0);
+  return status;
+}
+
+void sys_icache_invalidate(void* start, size_t len);
+
+void iree_memory_view_flush_icache(void* base_address,
+                                   iree_host_size_t length) {
+  sys_icache_invalidate(base_address, length);
+}
+
+#endif  // IREE_PLATFORM_APPLE
diff --git a/iree/hal/local/elf/platform/generic.c b/iree/hal/local/elf/platform/generic.c
new file mode 100644
index 0000000..1f5fc6c
--- /dev/null
+++ b/iree/hal/local/elf/platform/generic.c
@@ -0,0 +1,113 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/local/elf/platform.h"
+
+#if defined(IREE_PLATFORM_GENERIC)
+
+#include <malloc.h>
+#include <stdlib.h>
+
+//==============================================================================
+// Memory subsystem information and control
+//==============================================================================
+
+// TODO(benvanik): control with a config.h.
+#define IREE_MEMORY_PAGE_SIZE_NORMAL 4096
+#define IREE_MEMORY_PAGE_SIZE_LARGE 4096
+
+void iree_memory_query_info(iree_memory_info_t* out_info) {
+  memset(out_info, 0, sizeof(*out_info));
+
+  out_info->normal_page_size = IREE_MEMORY_PAGE_SIZE_NORMAL;
+  out_info->normal_page_granularity = IREE_MEMORY_PAGE_SIZE_NORMAL;
+  out_info->large_page_granularity = IREE_MEMORY_PAGE_SIZE_LARGE;
+
+  out_info->can_allocate_executable_pages = true;
+}
+
+void iree_memory_jit_context_begin() {}
+
+void iree_memory_jit_context_end() {}
+
+//==============================================================================
+// Virtual address space manipulation
+//==============================================================================
+
+iree_status_t iree_memory_view_reserve(iree_memory_view_flags_t flags,
+                                       iree_host_size_t total_length,
+                                       void** out_base_address) {
+  *out_base_address = NULL;
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  iree_status_t status = iree_ok_status();
+
+  void* base_address =
+      aligned_alloc(IREE_MEMORY_PAGE_SIZE_NORMAL, total_length);
+  if (base_address == NULL) {
+    status = iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
+                              "malloc failed on reservation");
+  }
+
+  *out_base_address = base_address;
+  IREE_TRACE_ZONE_END(z0);
+  return status;
+}
+
+void iree_memory_view_release(void* base_address,
+                              iree_host_size_t total_length) {
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  free(base_address);
+
+  IREE_TRACE_ZONE_END(z0);
+}
+
+iree_status_t iree_memory_view_commit_ranges(
+    void* base_address, iree_host_size_t range_count,
+    const iree_byte_range_t* ranges, iree_memory_access_t initial_access) {
+  // No-op.
+  return iree_ok_status();
+}
+
+iree_status_t iree_memory_view_protect_ranges(void* base_address,
+                                              iree_host_size_t range_count,
+                                              const iree_byte_range_t* ranges,
+                                              iree_memory_access_t new_access) {
+  // No-op.
+  return iree_ok_status();
+}
+
+// IREE_ELF_CLEAR_CACHE can be defined externally to override this default
+// behavior.
+#if !defined(IREE_ELF_CLEAR_CACHE)
+#if defined __has_builtin
+#if __has_builtin(__builtin___clear_cache)
+#define IREE_ELF_CLEAR_CACHE(start, end) __builtin___clear_cache(start, end)
+#endif  // __builtin___clear_cache
+#endif  // __has_builtin
+#endif  // !defined(IREE_ELF_CLEAR_CACHE)
+
+#if !defined(IREE_ELF_CLEAR_CACHE)
+#error "no instruction cache clear implementation"
+#endif  // !defined(IREE_ELF_CLEAR_CACHE)
+
+void iree_memory_view_flush_icache(void* base_address,
+                                   iree_host_size_t length) {
+  IREE_ELF_CLEAR_CACHE(base_address, base_address + length);
+}
+
+#endif  // IREE_PLATFORM_GENERIC
diff --git a/iree/hal/local/elf/platform/linux.c b/iree/hal/local/elf/platform/linux.c
new file mode 100644
index 0000000..5e30563
--- /dev/null
+++ b/iree/hal/local/elf/platform/linux.c
@@ -0,0 +1,163 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/local/elf/platform.h"
+
+#if defined(IREE_PLATFORM_ANDROID) || defined(IREE_PLATFORM_LINUX)
+
+#include <errno.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+//==============================================================================
+// Memory subsystem information and control
+//==============================================================================
+
+void iree_memory_query_info(iree_memory_info_t* out_info) {
+  memset(out_info, 0, sizeof(*out_info));
+
+  int page_size = sysconf(_SC_PAGESIZE);
+  out_info->normal_page_size = page_size;
+  out_info->normal_page_granularity = page_size;
+
+  // Large pages arent't currently used so we aren't introducing the build goo
+  // to detect and use them yet.
+  // https://linux.die.net/man/3/gethugepagesizes
+  // http://manpages.ubuntu.com/manpages/bionic/man3/gethugepagesize.3.html
+  // Would be:
+  //   #include <hugetlbfs.h>
+  //   out_info->large_page_granularity = gethugepagesize();
+  out_info->large_page_granularity = page_size;
+
+  out_info->can_allocate_executable_pages = true;
+}
+
+void iree_memory_jit_context_begin() {}
+
+void iree_memory_jit_context_end() {}
+
+//==============================================================================
+// Virtual address space manipulation
+//==============================================================================
+
+static int iree_memory_access_to_prot(iree_memory_access_t access) {
+  int prot = 0;
+  if (access & IREE_MEMORY_ACCESS_READ) prot |= PROT_READ;
+  if (access & IREE_MEMORY_ACCESS_WRITE) prot |= PROT_WRITE;
+  if (access & IREE_MEMORY_ACCESS_EXECUTE) prot |= PROT_EXEC;
+  return prot;
+}
+
+iree_status_t iree_memory_view_reserve(iree_memory_view_flags_t flags,
+                                       iree_host_size_t total_length,
+                                       void** out_base_address) {
+  *out_base_address = NULL;
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  int mmap_prot = PROT_NONE;
+  int mmap_flags = MAP_PRIVATE | MAP_ANON | MAP_NORESERVE;
+
+  iree_status_t status = iree_ok_status();
+  void* base_address = mmap(NULL, total_length, mmap_prot, mmap_flags, -1, 0);
+  if (base_address == MAP_FAILED) {
+    status = iree_make_status(iree_status_code_from_errno(errno),
+                              "mmap reservation failed");
+  }
+
+  *out_base_address = base_address;
+  IREE_TRACE_ZONE_END(z0);
+  return status;
+}
+
+void iree_memory_view_release(void* base_address,
+                              iree_host_size_t total_length) {
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  // NOTE: return value ignored as this is a shutdown path.
+  munmap(base_address, total_length);
+
+  IREE_TRACE_ZONE_END(z0);
+}
+
+iree_status_t iree_memory_view_commit_ranges(
+    void* base_address, iree_host_size_t range_count,
+    const iree_byte_range_t* ranges, iree_memory_access_t initial_access) {
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  int mmap_prot = iree_memory_access_to_prot(initial_access);
+  int mmap_flags = MAP_PRIVATE | MAP_ANON | MAP_FIXED;
+
+  iree_status_t status = iree_ok_status();
+  for (iree_host_size_t i = 0; i < range_count; ++i) {
+    void* range_start = (void*)iree_page_align_start(
+        (uintptr_t)base_address + ranges[i].offset, getpagesize());
+    void* result =
+        mmap(range_start, ranges[i].length, mmap_prot, mmap_flags, -1, 0);
+    if (result == MAP_FAILED) {
+      status = iree_make_status(iree_status_code_from_errno(errno),
+                                "mmap commit failed");
+      break;
+    }
+  }
+
+  IREE_TRACE_ZONE_END(z0);
+  return status;
+}
+
+iree_status_t iree_memory_view_protect_ranges(void* base_address,
+                                              iree_host_size_t range_count,
+                                              const iree_byte_range_t* ranges,
+                                              iree_memory_access_t new_access) {
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  int mmap_prot = iree_memory_access_to_prot(new_access);
+
+  iree_status_t status = iree_ok_status();
+  for (iree_host_size_t i = 0; i < range_count; ++i) {
+    void* range_start = (void*)iree_page_align_start(
+        (uintptr_t)base_address + ranges[i].offset, getpagesize());
+    int ret = mprotect(range_start, ranges[i].length, mmap_prot);
+    if (ret != 0) {
+      status = iree_make_status(iree_status_code_from_errno(errno),
+                                "mprotect failed");
+      break;
+    }
+  }
+
+  IREE_TRACE_ZONE_END(z0);
+  return status;
+}
+
+// IREE_ELF_CLEAR_CACHE can be defined externally to override this default
+// behavior.
+#if !defined(IREE_ELF_CLEAR_CACHE)
+#if defined __has_builtin
+#if __has_builtin(__builtin___clear_cache)
+#define IREE_ELF_CLEAR_CACHE(start, end) __builtin___clear_cache(start, end)
+#endif  // __builtin___clear_cache
+#endif  // __has_builtin
+#endif  // !defined(IREE_ELF_CLEAR_CACHE)
+
+#if !defined(IREE_ELF_CLEAR_CACHE)
+#error "no instruction cache clear implementation"
+#endif  // !defined(IREE_ELF_CLEAR_CACHE)
+
+void iree_memory_view_flush_icache(void* base_address,
+                                   iree_host_size_t length) {
+  IREE_ELF_CLEAR_CACHE(base_address, base_address + length);
+}
+
+#endif  // IREE_PLATFORM_*
diff --git a/iree/hal/local/elf/platform/windows.c b/iree/hal/local/elf/platform/windows.c
new file mode 100644
index 0000000..9f68ada
--- /dev/null
+++ b/iree/hal/local/elf/platform/windows.c
@@ -0,0 +1,158 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/local/elf/platform.h"
+
+#if defined(IREE_PLATFORM_WINDOWS)
+
+//==============================================================================
+// Memory subsystem information and control
+//==============================================================================
+
+void iree_memory_query_info(iree_memory_info_t* out_info) {
+  memset(out_info, 0, sizeof(*out_info));
+
+  SYSTEM_INFO system_info;
+  GetSystemInfo(&system_info);
+  out_info->normal_page_size = system_info.dwPageSize;
+  out_info->normal_page_granularity = system_info.dwAllocationGranularity;
+
+  out_info->large_page_granularity = GetLargePageMinimum();
+
+#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+  out_info->can_allocate_executable_pages = true;
+#else
+  // The application can define the `codeGeneration` property to enable use of
+  // PAGE_EXECUTE but cannot use PAGE_EXECUTE_READWRITE - it's still possible to
+  // make that work but it requires aliasing views (one with READWRITE and one
+  // with EXECUTE) and I'm not sure if anyone will ever care.
+  out_info->can_allocate_executable_pages = false;
+#endif  // WINAPI_PARTITION_DESKTOP
+}
+
+void iree_memory_jit_context_begin() {}
+
+void iree_memory_jit_context_end() {}
+
+//==============================================================================
+// Virtual address space manipulation
+//==============================================================================
+
+// https://docs.microsoft.com/en-us/windows/win32/memory/memory-protection-constants
+static DWORD iree_memory_access_to_win32_page_flags(
+    iree_memory_access_t access) {
+  DWORD protect = 0;
+  if (access & IREE_MEMORY_ACCESS_EXECUTE) {
+    if (access & IREE_MEMORY_ACCESS_WRITE) {
+      protect |= PAGE_EXECUTE_READWRITE;
+    } else if (access & IREE_MEMORY_ACCESS_READ) {
+      protect |= PAGE_EXECUTE_READ;
+    } else {
+      protect |= PAGE_EXECUTE;
+    }
+  } else if (access & IREE_MEMORY_ACCESS_WRITE) {
+    protect |= PAGE_READWRITE;
+  } else if (access & IREE_MEMORY_ACCESS_READ) {
+    protect |= PAGE_READONLY;
+  } else {
+    protect |= PAGE_NOACCESS;
+  }
+  return protect;
+}
+
+iree_status_t iree_memory_view_reserve(iree_memory_view_flags_t flags,
+                                       iree_host_size_t total_length,
+                                       void** out_base_address) {
+  *out_base_address = NULL;
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  iree_status_t status = iree_ok_status();
+
+  void* base_address =
+      VirtualAlloc(NULL, total_length, MEM_RESERVE, PAGE_NOACCESS);
+  if (base_address == NULL) {
+    status = iree_make_status(iree_status_code_from_win32_error(GetLastError()),
+                              "VirtualAlloc failed to reserve");
+  }
+
+  *out_base_address = base_address;
+  IREE_TRACE_ZONE_END(z0);
+  return status;
+}
+
+void iree_memory_view_release(void* base_address,
+                              iree_host_size_t total_length) {
+  IREE_TRACE_ZONE_BEGIN(z0);
+  // NOTE: return value ignored as this is a shutdown path.
+  VirtualFree(base_address, 0, MEM_RELEASE);
+  IREE_TRACE_ZONE_END(z0);
+}
+
+iree_status_t iree_memory_view_commit_ranges(
+    void* base_address, iree_host_size_t range_count,
+    const iree_byte_range_t* ranges, iree_memory_access_t initial_access) {
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  DWORD initial_protect =
+      iree_memory_access_to_win32_page_flags(initial_access);
+
+  iree_status_t status = iree_ok_status();
+  for (iree_host_size_t i = 0; i < range_count; ++i) {
+    if (!VirtualAlloc((uint8_t*)base_address + ranges[i].offset,
+                      ranges[i].length, MEM_COMMIT, initial_protect)) {
+      status =
+          iree_make_status(iree_status_code_from_win32_error(GetLastError()),
+                           "VirtualAlloc failed to commit");
+      break;
+    }
+  }
+
+  IREE_TRACE_ZONE_END(z0);
+  return status;
+}
+
+iree_status_t iree_memory_view_protect_ranges(void* base_address,
+                                              iree_host_size_t range_count,
+                                              const iree_byte_range_t* ranges,
+                                              iree_memory_access_t new_access) {
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  DWORD new_protect = iree_memory_access_to_win32_page_flags(new_access);
+
+  iree_status_t status = iree_ok_status();
+  for (iree_host_size_t i = 0; i < range_count; ++i) {
+    uint8_t* range_address = (uint8_t*)base_address + ranges[i].offset;
+    DWORD old_protect = 0;
+    BOOL ret = VirtualProtect(range_address, ranges[i].length, new_protect,
+                              &old_protect);
+    if (!ret) {
+      status =
+          iree_make_status(iree_status_code_from_win32_error(GetLastError()),
+                           "VirtualProtect failed");
+      break;
+    }
+  }
+
+  IREE_TRACE_ZONE_END(z0);
+  return status;
+}
+
+void iree_memory_view_flush_icache(void* base_address,
+                                   iree_host_size_t length) {
+  FlushInstructionCache(GetCurrentProcess(), base_address, length);
+}
+
+#endif  // IREE_PLATFORM_WINDOWS
diff --git a/iree/hal/local/elf/testdata/BUILD b/iree/hal/local/elf/testdata/BUILD
new file mode 100644
index 0000000..3205b59
--- /dev/null
+++ b/iree/hal/local/elf/testdata/BUILD
@@ -0,0 +1,31 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load("//build_tools/embed_data:build_defs.bzl", "cc_embed_data")
+
+package(
+    default_visibility = ["//visibility:public"],
+    features = ["layering_check"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+cc_embed_data(
+    name = "simple_mul_dispatch",
+    testonly = True,
+    srcs = glob(["simple_mul_dispatch_*.so"]),
+    cc_file_output = "simple_mul_dispatch.cc",
+    cpp_namespace = "iree::elf",
+    flatten = True,
+    h_file_output = "simple_mul_dispatch.h",
+)
diff --git a/iree/hal/local/elf/testdata/CMakeLists.txt b/iree/hal/local/elf/testdata/CMakeLists.txt
new file mode 100644
index 0000000..d4488d5
--- /dev/null
+++ b/iree/hal/local/elf/testdata/CMakeLists.txt
@@ -0,0 +1,30 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
+# iree/hal/local/elf/testdata/BUILD                                            #
+#                                                                              #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
+# CMake-only content.                                                          #
+#                                                                              #
+# To disable autogeneration for this file entirely, delete this header.        #
+################################################################################
+
+iree_add_all_subdirs()
+
+file(GLOB _GLOB_SIMPLE_MUL_DISPATCH_X_SO LIST_DIRECTORIES false RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} CONFIGURE_DEPENDS simple_mul_dispatch_*.so)
+iree_cc_embed_data(
+  NAME
+    simple_mul_dispatch
+  SRCS
+    "${_GLOB_SIMPLE_MUL_DISPATCH_X_SO}"
+  CC_FILE_OUTPUT
+    "simple_mul_dispatch.cc"
+  H_FILE_OUTPUT
+    "simple_mul_dispatch.h"
+  TESTONLY
+  CPP_NAMESPACE
+    "iree::elf"
+  FLATTEN
+  PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/iree/hal/local/elf/testdata/generate.sh b/iree/hal/local/elf/testdata/generate.sh
new file mode 100644
index 0000000..18c30d6
--- /dev/null
+++ b/iree/hal/local/elf/testdata/generate.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+set -e
+
+ROOT_DIR=$(git rev-parse --show-toplevel)
+
+# TODO(benvanik): document usage.
+# Basically just copy/paste/rename by hand for now.
+
+CMD=(
+  iree/tools/iree-translate
+    -iree-mlir-to-vm-bytecode-module
+    ${ROOT_DIR}/iree/samples/simple_embedding/simple_embedding_test.mlir
+    -o=${ROOT_DIR}/../iree-tmp/simple_embedding_test_dylib_llvm_aot.vmfb
+
+    -iree-hal-target-backends=dylib-llvm-aot
+    -iree-llvm-link-embedded=true
+    -iree-llvm-debug-symbols=false
+
+    -iree-llvm-target-triple=x86_64-pc-linux-elf
+
+    #-iree-llvm-target-triple=aarch64-pc-linux-elf
+
+    #-iree-llvm-target-triple=riscv32-pc-linux-elf
+    #-iree-llvm-target-cpu=generic-rv32
+    #-iree-llvm-target-cpu-features=+f
+    #-iree-llvm-target-abi=ilp32f
+    #-iree-llvm-target-float-abi=hard
+
+    #-iree-llvm-target-triple=riscv64-pc-linux-elf
+    #-iree-llvm-target-cpu=generic-rv64
+    #-iree-llvm-target-cpu=sifive-u74
+    #-iree-llvm-target-abi=lp64d
+
+    #-iree-llvm-keep-linker-artifacts
+)
+"${CMD[@]}"
diff --git a/iree/hal/local/elf/testdata/simple_mul_dispatch_aarch64.so b/iree/hal/local/elf/testdata/simple_mul_dispatch_aarch64.so
new file mode 100644
index 0000000..030c887
--- /dev/null
+++ b/iree/hal/local/elf/testdata/simple_mul_dispatch_aarch64.so
Binary files differ
diff --git a/iree/hal/local/elf/testdata/simple_mul_dispatch_riscv32.so b/iree/hal/local/elf/testdata/simple_mul_dispatch_riscv32.so
new file mode 100644
index 0000000..d7a6f6f
--- /dev/null
+++ b/iree/hal/local/elf/testdata/simple_mul_dispatch_riscv32.so
Binary files differ
diff --git a/iree/hal/local/elf/testdata/simple_mul_dispatch_riscv64.so b/iree/hal/local/elf/testdata/simple_mul_dispatch_riscv64.so
new file mode 100644
index 0000000..ef53afc
--- /dev/null
+++ b/iree/hal/local/elf/testdata/simple_mul_dispatch_riscv64.so
Binary files differ
diff --git a/iree/hal/local/elf/testdata/simple_mul_dispatch_x86_64.so b/iree/hal/local/elf/testdata/simple_mul_dispatch_x86_64.so
new file mode 100644
index 0000000..c5e1153
--- /dev/null
+++ b/iree/hal/local/elf/testdata/simple_mul_dispatch_x86_64.so
Binary files differ
diff --git a/iree/hal/local/loaders/BUILD b/iree/hal/local/loaders/BUILD
index fa4e1f1..c3e4713 100644
--- a/iree/hal/local/loaders/BUILD
+++ b/iree/hal/local/loaders/BUILD
@@ -24,6 +24,23 @@
 )
 
 cc_library(
+    name = "embedded_library_loader",
+    srcs = ["embedded_library_loader.c"],
+    hdrs = ["embedded_library_loader.h"],
+    defines = [
+        "IREE_HAL_HAVE_ELF_LIBRARY_LOADER=1",
+    ],
+    deps = [
+        "//iree/base:api",
+        "//iree/base:core_headers",
+        "//iree/base:tracing",
+        "//iree/hal:api",
+        "//iree/hal/local",
+        "//iree/hal/local/elf:elf_module",
+    ],
+)
+
+cc_library(
     name = "legacy_library_loader",
     srcs = ["legacy_library_loader.c"],
     hdrs = ["legacy_library_loader.h"],
diff --git a/iree/hal/local/loaders/CMakeLists.txt b/iree/hal/local/loaders/CMakeLists.txt
index f440672..7ca0508 100644
--- a/iree/hal/local/loaders/CMakeLists.txt
+++ b/iree/hal/local/loaders/CMakeLists.txt
@@ -12,6 +12,25 @@
 
 iree_cc_library(
   NAME
+    embedded_library_loader
+  HDRS
+    "embedded_library_loader.h"
+  SRCS
+    "embedded_library_loader.c"
+  DEPS
+    iree::base::api
+    iree::base::core_headers
+    iree::base::tracing
+    iree::hal::api
+    iree::hal::local
+    iree::hal::local::elf::elf_module
+  DEFINES
+    "IREE_HAL_HAVE_ELF_LIBRARY_LOADER=1"
+  PUBLIC
+)
+
+iree_cc_library(
+  NAME
     legacy_library_loader
   HDRS
     "legacy_library_loader.h"
diff --git a/iree/hal/local/loaders/embedded_library_loader.c b/iree/hal/local/loaders/embedded_library_loader.c
new file mode 100644
index 0000000..34b5c32
--- /dev/null
+++ b/iree/hal/local/loaders/embedded_library_loader.c
@@ -0,0 +1,283 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "iree/hal/local/loaders/embedded_library_loader.h"
+
+#include "iree/base/target_platform.h"
+#include "iree/base/tracing.h"
+#include "iree/hal/local/elf/elf_module.h"
+#include "iree/hal/local/local_executable.h"
+
+//===----------------------------------------------------------------------===//
+// iree_hal_elf_executable_t
+//===----------------------------------------------------------------------===//
+
+typedef struct {
+  iree_hal_local_executable_t base;
+
+  // Loaded ELF module.
+  iree_elf_module_t module;
+
+  // Name used for the file field in tracy and debuggers.
+  iree_string_view_t identifier;
+
+  // Queried metadata from the library.
+  union {
+    const iree_hal_executable_library_header_t** header;
+    const iree_hal_executable_library_v0_t* v0;
+  } library;
+} iree_hal_elf_executable_t;
+
+extern const iree_hal_local_executable_vtable_t iree_hal_elf_executable_vtable;
+
+static iree_status_t iree_hal_elf_executable_query_library(
+    iree_hal_elf_executable_t* executable) {
+  // Get the exported symbol used to get the library metadata.
+  iree_hal_executable_library_query_fn_t query_fn = NULL;
+  IREE_RETURN_IF_ERROR(iree_elf_module_lookup_export(
+      &executable->module, IREE_HAL_EXECUTABLE_LIBRARY_EXPORT_NAME,
+      (void**)&query_fn));
+
+  // Query for a compatible version of the library.
+  executable->library.header =
+      (const iree_hal_executable_library_header_t**)iree_elf_call_p_i(
+          query_fn, IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION);
+  if (!executable->library.header) {
+    return iree_make_status(
+        IREE_STATUS_FAILED_PRECONDITION,
+        "executable does not support this version of the runtime (%d)",
+        IREE_HAL_EXECUTABLE_LIBRARY_LATEST_VERSION);
+  }
+  const iree_hal_executable_library_header_t* header =
+      *executable->library.header;
+
+  // Ensure that if the library is built for a particular sanitizer that we also
+  // were compiled with that sanitizer enabled.
+  switch (header->sanitizer) {
+    case IREE_HAL_EXECUTABLE_LIBRARY_SANITIZER_NONE:
+      // Always safe even if the host has a sanitizer enabled; it just means
+      // that we won't be able to catch anything from within the executable,
+      // however checks outside will (often) still trigger when guard pages are
+      // dirtied/etc.
+      break;
+    default:
+      return iree_make_status(IREE_STATUS_UNAVAILABLE,
+                              "executable requires sanitizer but they are not "
+                              "yet supported with embedded libraries: %u",
+                              (uint32_t)header->sanitizer);
+  }
+
+  executable->identifier = iree_make_cstring_view(header->name);
+
+  return iree_ok_status();
+}
+
+static iree_status_t iree_hal_elf_executable_create(
+    iree_const_byte_span_t elf_data, iree_host_size_t executable_layout_count,
+    iree_hal_executable_layout_t* const* executable_layouts,
+    iree_allocator_t host_allocator, iree_hal_executable_t** out_executable) {
+  IREE_ASSERT_ARGUMENT(elf_data.data && elf_data.data_length);
+  IREE_ASSERT_ARGUMENT(!executable_layout_count || executable_layouts);
+  IREE_ASSERT_ARGUMENT(out_executable);
+  *out_executable = NULL;
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  iree_hal_elf_executable_t* executable = NULL;
+  iree_host_size_t total_size =
+      sizeof(*executable) +
+      executable_layout_count * sizeof(iree_hal_local_executable_layout_t);
+  iree_status_t status =
+      iree_allocator_malloc(host_allocator, total_size, (void**)&executable);
+  if (iree_status_is_ok(status)) {
+    iree_hal_local_executable_layout_t** executable_layouts_ptr =
+        (iree_hal_local_executable_layout_t**)(((uint8_t*)executable) +
+                                               sizeof(*executable));
+    iree_hal_local_executable_initialize(
+        &iree_hal_elf_executable_vtable, executable_layout_count,
+        executable_layouts, executable_layouts_ptr, host_allocator,
+        &executable->base);
+  }
+  if (iree_status_is_ok(status)) {
+    // Attempt to load the ELF module.
+    status = iree_elf_module_initialize_from_memory(
+        elf_data, /*import_table=*/NULL, host_allocator, &executable->module);
+  }
+  if (iree_status_is_ok(status)) {
+    // Query metadata and get the entry point function pointers.
+    status = iree_hal_elf_executable_query_library(executable);
+  }
+  if (iree_status_is_ok(status)) {
+    // Check to make sure that the entry point count matches the layouts
+    // provided.
+    if (executable->library.v0->entry_point_count != executable_layout_count) {
+      return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
+                              "executable provides %u entry points but caller "
+                              "provided %zu; must match",
+                              executable->library.v0->entry_point_count,
+                              executable_layout_count);
+    }
+  }
+
+  if (iree_status_is_ok(status)) {
+    *out_executable = (iree_hal_executable_t*)executable;
+  } else {
+    iree_hal_executable_release((iree_hal_executable_t*)executable);
+  }
+  IREE_TRACE_ZONE_END(z0);
+  return status;
+}
+
+static void iree_hal_elf_executable_destroy(
+    iree_hal_executable_t* base_executable) {
+  iree_hal_elf_executable_t* executable =
+      (iree_hal_elf_executable_t*)base_executable;
+  iree_allocator_t host_allocator = executable->base.host_allocator;
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  iree_elf_module_deinitialize(&executable->module);
+
+  iree_hal_local_executable_deinitialize(
+      (iree_hal_local_executable_t*)base_executable);
+  iree_allocator_free(host_allocator, executable);
+
+  IREE_TRACE_ZONE_END(z0);
+}
+
+static iree_status_t iree_hal_elf_executable_issue_call(
+    iree_hal_local_executable_t* base_executable, iree_host_size_t ordinal,
+    const iree_hal_executable_dispatch_state_v0_t* dispatch_state,
+    const iree_hal_vec3_t* workgroup_id) {
+  iree_hal_elf_executable_t* executable =
+      (iree_hal_elf_executable_t*)base_executable;
+  const iree_hal_executable_library_v0_t* library = executable->library.v0;
+
+  if (IREE_UNLIKELY(ordinal >= library->entry_point_count)) {
+    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                            "entry point ordinal out of bounds");
+  }
+
+#if IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+  iree_string_view_t entry_point_name = iree_string_view_empty();
+  if (library->entry_point_names != NULL) {
+    entry_point_name =
+        iree_make_cstring_view(library->entry_point_names[ordinal]);
+  }
+  if (iree_string_view_is_empty(entry_point_name)) {
+    entry_point_name = iree_make_cstring_view("unknown_elf_call");
+  }
+  IREE_TRACE_ZONE_BEGIN_EXTERNAL(
+      z0, executable->identifier.data, executable->identifier.size, ordinal,
+      entry_point_name.data, entry_point_name.size, NULL, 0);
+#endif  // IREE_TRACING_FEATURES & IREE_TRACING_FEATURE_INSTRUMENTATION
+
+  int ret = iree_elf_call_i_pp(library->entry_points[ordinal],
+                               (void*)dispatch_state, (void*)workgroup_id);
+
+  IREE_TRACE_ZONE_END(z0);
+
+  return ret == 0 ? iree_ok_status()
+                  : iree_make_status(
+                        IREE_STATUS_INTERNAL,
+                        "executable entry point returned catastrophic error %d",
+                        ret);
+}
+
+const iree_hal_local_executable_vtable_t iree_hal_elf_executable_vtable = {
+    .base =
+        {
+            .destroy = iree_hal_elf_executable_destroy,
+        },
+    .issue_call = iree_hal_elf_executable_issue_call,
+};
+
+//===----------------------------------------------------------------------===//
+// iree_hal_embedded_library_loader_t
+//===----------------------------------------------------------------------===//
+
+typedef struct {
+  iree_hal_executable_loader_t base;
+  iree_allocator_t host_allocator;
+} iree_hal_embedded_library_loader_t;
+
+extern const iree_hal_executable_loader_vtable_t
+    iree_hal_embedded_library_loader_vtable;
+
+iree_status_t iree_hal_embedded_library_loader_create(
+    iree_allocator_t host_allocator,
+    iree_hal_executable_loader_t** out_executable_loader) {
+  IREE_ASSERT_ARGUMENT(out_executable_loader);
+  *out_executable_loader = NULL;
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  iree_hal_embedded_library_loader_t* executable_loader = NULL;
+  iree_status_t status = iree_allocator_malloc(
+      host_allocator, sizeof(*executable_loader), (void**)&executable_loader);
+  if (iree_status_is_ok(status)) {
+    iree_hal_executable_loader_initialize(
+        &iree_hal_embedded_library_loader_vtable, &executable_loader->base);
+    executable_loader->host_allocator = host_allocator;
+    *out_executable_loader = (iree_hal_executable_loader_t*)executable_loader;
+  }
+
+  IREE_TRACE_ZONE_END(z0);
+  return status;
+}
+
+static void iree_hal_embedded_library_loader_destroy(
+    iree_hal_executable_loader_t* base_executable_loader) {
+  iree_hal_embedded_library_loader_t* executable_loader =
+      (iree_hal_embedded_library_loader_t*)base_executable_loader;
+  iree_allocator_t host_allocator = executable_loader->host_allocator;
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  iree_allocator_free(host_allocator, executable_loader);
+
+  IREE_TRACE_ZONE_END(z0);
+}
+
+static bool iree_hal_embedded_library_loader_query_support(
+    iree_hal_executable_loader_t* base_executable_loader,
+    iree_hal_executable_caching_mode_t caching_mode,
+    iree_string_view_t executable_format) {
+  // TODO(benvanik): runtime configured triple. Ask the ELF loader if it can
+  // handle it.
+  return iree_string_view_equal(executable_format,
+                                iree_make_cstring_view("EX_ELF"));
+}
+
+static iree_status_t iree_hal_embedded_library_loader_try_load(
+    iree_hal_executable_loader_t* base_executable_loader,
+    const iree_hal_executable_spec_t* executable_spec,
+    iree_hal_executable_t** out_executable) {
+  iree_hal_embedded_library_loader_t* executable_loader =
+      (iree_hal_embedded_library_loader_t*)base_executable_loader;
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  // Perform the load of the ELF and wrap it in an executable handle.
+  iree_status_t status = iree_hal_elf_executable_create(
+      executable_spec->executable_data,
+      executable_spec->executable_layout_count,
+      executable_spec->executable_layouts, executable_loader->host_allocator,
+      out_executable);
+
+  IREE_TRACE_ZONE_END(z0);
+  return status;
+}
+
+const iree_hal_executable_loader_vtable_t
+    iree_hal_embedded_library_loader_vtable = {
+        .destroy = iree_hal_embedded_library_loader_destroy,
+        .query_support = iree_hal_embedded_library_loader_query_support,
+        .try_load = iree_hal_embedded_library_loader_try_load,
+};
diff --git a/iree/hal/local/loaders/embedded_library_loader.h b/iree/hal/local/loaders/embedded_library_loader.h
new file mode 100644
index 0000000..321f674
--- /dev/null
+++ b/iree/hal/local/loaders/embedded_library_loader.h
@@ -0,0 +1,40 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef IREE_HAL_LOCAL_LOADERS_ELF_LIBRARY_LOADER_H_
+#define IREE_HAL_LOCAL_LOADERS_ELF_LIBRARY_LOADER_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "iree/base/api.h"
+#include "iree/hal/local/executable_loader.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+// Creates an executable loader that can load minimally-featured ELF dynamic
+// libraries on any platform. This allows us to use a single file format across
+// all operating systems at the cost of some missing debugging/profiling
+// features.
+iree_status_t iree_hal_embedded_library_loader_create(
+    iree_allocator_t host_allocator,
+    iree_hal_executable_loader_t** out_executable_loader);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+
+#endif  // IREE_HAL_LOCAL_LOADERS_ELF_LIBRARY_LOADER_H_
diff --git a/iree/hal/local/local_executable_cache.c b/iree/hal/local/local_executable_cache.c
index 40b2d46..9f825db 100644
--- a/iree/hal/local/local_executable_cache.c
+++ b/iree/hal/local/local_executable_cache.c
@@ -107,12 +107,15 @@
   iree_hal_local_executable_cache_t* executable_cache =
       iree_hal_local_executable_cache_cast(base_executable_cache);
   for (iree_host_size_t i = 0; i < executable_cache->loader_count; ++i) {
-    if (iree_hal_executable_loader_query_support(
+    if (!iree_hal_executable_loader_query_support(
             executable_cache->loaders[i], executable_spec->caching_mode,
             executable_spec->executable_format)) {
-      return iree_hal_executable_loader_try_load(
-          executable_cache->loaders[i], executable_spec, out_executable);
+      // Loader definitely can't handle the executable; no use trying so skip.
+      continue;
     }
+    // The loader _may_ handle the executable; if the specific executable is not
+    // supported then the try will fail with IREE_STATUS_CANCELLED and we should
+    // continue trying other loaders.
     iree_status_t status = iree_hal_executable_loader_try_load(
         executable_cache->loaders[i], executable_spec, out_executable);
     if (iree_status_is_ok(status)) {
@@ -122,10 +125,13 @@
       // Error beyond just the try failing due to unsupported formats.
       return status;
     }
+    iree_status_ignore(status);
   }
   return iree_make_status(
       IREE_STATUS_NOT_FOUND,
-      "no executable loader registered for the given file format");
+      "no executable loader registered for the given executable format '%.*s'",
+      (int)executable_spec->executable_format.size,
+      executable_spec->executable_format.data);
 }
 
 static const iree_hal_executable_cache_vtable_t
diff --git a/scripts/check_tabs.sh b/scripts/check_tabs.sh
index f113524..bdb1eac 100755
--- a/scripts/check_tabs.sh
+++ b/scripts/check_tabs.sh
@@ -28,6 +28,7 @@
   "\.pb$"
   "\.fb$"
   "\.jar$"
+  "\.so$"
 )
 
 # Join on |