Splitting librt into two builtins and changing linking style. (#7721)

This is based on the work done in #7561 to better link the bitcode in the
compiler. Instead of generating object files to pass to the linker tool
this now links the bitcode modules directly together before running the
LLVM optimization passes. This gets us LTO-like behavior and makes the
bitcode equivalent to any other LLVM IR we could generate in the compiler.

As part of this the IREE-specific portion has been reworked to compile
both standalone and as part of a host binary. This lets us write tests
and benchmarks for the functions in there and debug them independently
of the compiler.

The new setup will also allow us to specialize for particular archs if we
really needed. For example, if we added MMA-like CPU primitives we could
build them out in here. Each of the Builtins/ files can make a decision
as to which bitcode file to link and what options to set based on the
target configuration being compiled (selecting SIMD variants, etc).

There's still some stuff to work out but this drops our embedded ELFs by
13KB (on x64) and will allow us to add more musl/runtime functions
without bloating binaries. I think getting further will require us to generate
a linker script.

This also prepares us for removing the linker requirement when generating
static libraries as we can include all of the code a user needs in the single
static object we emit.
diff --git a/.gitignore b/.gitignore
index 20f215e..daed71b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -61,4 +61,4 @@
 docs/website/site/
 
 # Temporary files
-iree/compiler/Dialect/HAL/Target/LLVM/librt/bin/librt.ll
+iree/builtins/**/bin/*.ll
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1a77585..cafce6b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -502,6 +502,7 @@
 add_subdirectory(build_tools/embed_data/)
 
 add_subdirectory(iree/base)
+add_subdirectory(iree/builtins)
 add_subdirectory(iree/hal)
 add_subdirectory(iree/modules)
 add_subdirectory(iree/runtime)
diff --git a/iree/builtins/BUILD b/iree/builtins/BUILD
new file mode 100644
index 0000000..f27d209
--- /dev/null
+++ b/iree/builtins/BUILD
@@ -0,0 +1,11 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+package(
+    default_visibility = ["//visibility:public"],
+    features = ["layering_check"],
+    licenses = ["notice"],  # Apache 2.0
+)
diff --git a/iree/builtins/CMakeLists.txt b/iree/builtins/CMakeLists.txt
new file mode 100644
index 0000000..25ddd46
--- /dev/null
+++ b/iree/builtins/CMakeLists.txt
@@ -0,0 +1,13 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
+# iree/builtins/BUILD                                                          #
+#                                                                              #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
+# CMake-only content.                                                          #
+#                                                                              #
+# To disable autogeneration for this file entirely, delete this header.        #
+################################################################################
+
+iree_add_all_subdirs()
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/iree/builtins/device/BUILD b/iree/builtins/device/BUILD
new file mode 100644
index 0000000..69662e0
--- /dev/null
+++ b/iree/builtins/device/BUILD
@@ -0,0 +1,21 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+package(
+    default_visibility = ["//visibility:public"],
+    features = ["layering_check"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+cc_library(
+    name = "device",
+    srcs = [
+        "device_generic.c",
+    ],
+    hdrs = [
+        "device.h",
+    ],
+)
diff --git a/iree/builtins/device/CMakeLists.txt b/iree/builtins/device/CMakeLists.txt
new file mode 100644
index 0000000..9a89402
--- /dev/null
+++ b/iree/builtins/device/CMakeLists.txt
@@ -0,0 +1,23 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
+# iree/builtins/device/BUILD                                                   #
+#                                                                              #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
+# CMake-only content.                                                          #
+#                                                                              #
+# To disable autogeneration for this file entirely, delete this header.        #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+  NAME
+    device
+  HDRS
+    "device.h"
+  SRCS
+    "device_generic.c"
+  PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/iree/builtins/device/README.md b/iree/builtins/device/README.md
new file mode 100644
index 0000000..71fdc6c
--- /dev/null
+++ b/iree/builtins/device/README.md
@@ -0,0 +1,213 @@
+IREE CPU Device Library: `libdevice`
+====================================
+
+This library provides builtin functions to the IREE generated CPU code. It
+covers the role of a compiler runtime library handling things like soft float
+builtin calls produced during code generation and a support library to ease
+implementation of more complex intrinsic-like functionality. The code in this
+library is compiled into bitcode files and embedded inside the IREE compiler
+which then links it into the generated code before emitting the final user
+output.
+
+```
++------------+      +-------+      +-------------------------------+
+| device_*.c | ---> | clang | ---> |+-------------------------------+
++------------+      +-------+      +| libdevice_[arch]_[variant].bc |
+                                    +-------------------------------+
+                                                  |||
+                                                  vvv
+      +------------+      +---------+      +================+
+      | input.mlir | ---> | codegen | ---> | iree-translate |
+      +------------+      +---------+      +================+
+                                                   |
+                      +----------------------------+
+                      v                            v
+         +------------------------+   +----------------------------+
+         | static library (.o/.a) |   | dynamic library (.so/.dll) |
+         +------------------------+   +----------------------------+
+```
+
+Good examples of things this library can provide:
+* float16/half support functions
+* MMA-like intrinsics for architecture-optimized tiled matrix multiplies
+* Atomic intrinsics
+
+Bad examples:
+* A full convolution kernel
+* Anything only used in only one particular configuration or target
+* Frequently changing code
+
+### Why Not C++ Passes?
+
+This approach of an external library that is linked in via bitcode is a tradeoff
+that favors a familiar environment for architecture-specific implementations and
+reusable code to custom MLIR passes that directly construct the IR. It will
+always be better from a technical standpoint to directly perform these
+specializations inside compiler passes as all information is available, multiple
+levels of optimization at MLIR `vector` and `llvm` dialect levels can hoist and
+fold aggressively, and specialization is possible using the entire context. It's
+encouraged that work is done there when possible and some of the cases handled
+by this library may end up being done in that environment.
+
+As a reusable library this approach allows for other backends - such as the IREE
+VMVX backend - to share the same optimized implementations. Having standalone
+tests and benchmarks also allows for fast iteration without needing to modify
+the compiler.
+
+The hope is that over time things added here will be moved into the compiler and
+this becomes mostly a lightweight intrinsics library and staging ground for
+experimental features that require quick iteration in C.
+
+## Bitcode Files
+
+The IREE compiler embeds bitcode files and when producing executable libraries
+will select one for linkage based on the specified target machine. As these
+bitcode files can only be produced by a cross-compilation-enabled Clang they are
+built offline and checked into the repository. Future improvements to the
+compiler could also allow for external files to be specified to avoid the need
+to rebuild the compiler however for now this keeps things simple and hermetic.
+
+The naming convention is `libdevice_[arch]_[features].bc`, corresponding to the
+source files of `device_[arch].c` with the features specifying conditional
+target CPU features such as extended instruction sets. When no special features
+are required `generic` is used.
+
+For example, the implementations for all ISA variants of AArch64 would be found
+in a `device_aarch64.c` and an implementation for the baseline ISA
+is compiled into `libdevice_aarch64_generic.bc`. When the dot product
+instructions are available (`-march=armv8.2-a+dotprod`) the more specialized
+`libdevice_aarch64_dotprod.bc` bitcode file would be used.
+
+### Updating Bitcode Files
+
+The bitcode files need to be rebuilt whenever the source is modified, new
+variants are added, or new architectures are targeted. The
+[`bin/build.sh`](bin/build.sh) uses a compatible Clang and LLVM toolchain to
+produce the files in the correct format and location.
+
+Requirements:
+* A modern version of Clang/LLVM (tested with 13)
+* A build of llvm-as with all target architectures linked in
+
+This script could use some usability improvements, but for now a common
+invocation will look like:
+```sh
+LLVM_AS=/usr/bin/llvm-as \
+CLANG=/usr/bin/clang-13 \
+./iree/builtins/device/bin/build.sh
+```
+
+If there are complaints that llvm-as does not support a target architecture then
+the llvm-as included in the IREE CMake distribution should be built and provided
+by way of the `IREE_BUILD_DIR`:
+```sh
+IREE_BUILD_DIR=../iree-build \
+CLANG=/usr/bin/clang-13 \
+./iree/builtins/device/bin/build.sh
+```
+
+After this the newly updated/added bitcode files can be added to git.
+
+### Compiler Bitcode Selection
+
+The logic in the compiler for selecting which bitcode file to use is found in
+[`iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Device.cpp`](/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Device.cpp).
+The `lookupDeviceFile` function uses the `llvm::TargetMachine` to query the
+architecture, CPU features, and other properties to choose the corresponding
+bitcode file. If no matching bitcode file is found a fallback of the WebAssembly
+generic implementation is used as its bitcode is generally portable. It's not
+fast, though, and should only be used for correctness testing during bringup.
+
+### Adding an Architecture/ISA Bitcode File
+
+First copy [`device_generic.c`](device_generic.c) and name it consistent with
+the canonical LLVM architecture (the first part of the target triple, e.g. if
+you pass `--target=aarch64-arm-none-eabi` to Clang you'd name it `aarch64`).
+
+From there guard the new file with the architecture-specific preprocessor guards
+and add the inverse to `device_generic.c` to prevent it from being used when the
+source files are globbed.
+
+To build the new bitcode file add a `make_arch_bc` call to [`bin/build.sh`](bin/build.sh).
+The flags provided are passed directly to Clang and can be used to control the
+compilation environment with the requirement being that the corresponding
+selection logic is updated in `Device.cpp`.
+
+Finally update the [`iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Device.cpp`](/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Device.cpp)
+file in the compiler to select the new bitcode file based on the
+`llvm::TargetMachine` in the same way that it is produced with `make_arch_bc`.
+
+Ergonomic improvements here would allow for function-level multi-versioning such
+that bitcode files per architecture could be used instead of requiring
+per-feature variants of each bitcode file.
+
+## Engineering Requirements
+
+As this library is directly merged into the compiler-generated code there are
+specific restrictions as to what can be used inherited from the IREE executable
+requirements:
+
+* No mutable globals/static variables or thread-local storage
+* No syscalls
+* No libc calls outside of builtins (like memset/memcpy) - _no mallocs_!
+
+Though the primary usage of the library is through the precompiled bitcode files
+that only need to work with Clang the library may also be built on other
+toolchains such as GCC and MSVC (or older version of Clang). When standard
+intrinsics are used this will generally not be a problem however inline assembly
+may need compiler-specific variants or at least exclusions that fall back to
+generic paths.
+
+### Compile-time Configuration
+
+Preprocessor statements used to control behavior must only use information known
+when the bitcode files are being compiled. This means that if the bitcode file
+being produced is for AArch64 it is safe to use the `__aarch64__` macro.
+Information that is only available after the bitcode file is produced - such as
+in the IREE compiler pipelines - must use link-time configuration.
+
+### Link-time Configuration
+
+As we are producing bitcode files we cannot rely on the C preprocessor for
+changing behavior based on some information only known during linking. In other
+cases we may want to specialize code paths based on knowledge about the context
+in which the kernels are used. To provide this link-time modification ability
+there is support for flags by way of `extern` globals. These globals are either
+specified by the IREE compiler when linking the bitcode or by the hosting
+application when linked statically.
+
+Each flag is defined in `device.h`; for example:
+```c
+extern int libdevice_platform_example_flag;
+```
+
+Any code may then use this flag to condition/control behavior:
+```c
+if (libdevice_platform_example_flag >= 1) {
+  // Do something special.
+}
+```
+
+When linking libdevice statically the flags can be provided by the hosting
+application via compiler defines: `-DLIBDEVICE_PLATFORM_EXAMPLE_FLAG=123`.
+
+When producing bitcode the flags are left symbolic and the IREE compiler
+provides their values:
+```c++
+overridePlatformGlobal(*bitcodeModule, "libdevice_platform_example_flag", 123u);
+```
+
+What flags are useful and how to handle cases where flags are arch-dependent are
+still TBD.
+
+## Testing and Benchmarking
+
+[`tools/libdevice_test.cc`](tools/libdevice_test.cc) provides a gtest runner
+that compares the results of the optimized implementations for the target
+architecture against a reference implementation for correctness.
+
+[`tools/libdevice_benchmark.c`](tools/libdevice_benchmark.c) provides a
+benchmark suite for the optimized implementations of the target architecture.
+
+Both are compiled for the CMake target and can be used to develop
+implementations without the need to rebuild/run the compiler.
diff --git a/iree/builtins/device/bin/BUILD b/iree/builtins/device/bin/BUILD
new file mode 100644
index 0000000..29c7065
--- /dev/null
+++ b/iree/builtins/device/bin/BUILD
@@ -0,0 +1,25 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//build_tools/embed_data:build_defs.bzl", "c_embed_data")
+
+package(
+    default_visibility = ["//visibility:public"],
+    features = ["layering_check"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+c_embed_data(
+    name = "libdevice",
+    srcs = [
+        "libdevice_wasm32_generic.bc",
+        "libdevice_wasm64_generic.bc",
+    ],
+    c_file_output = "libdevice.c",
+    flatten = True,
+    h_file_output = "libdevice.h",
+    identifier = "iree_builtins_libdevice",
+)
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/librt/CMakeLists.txt b/iree/builtins/device/bin/CMakeLists.txt
similarity index 65%
rename from iree/compiler/Dialect/HAL/Target/LLVM/librt/CMakeLists.txt
rename to iree/builtins/device/bin/CMakeLists.txt
index 210b6f2..e3976c8 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/librt/CMakeLists.txt
+++ b/iree/builtins/device/bin/CMakeLists.txt
@@ -1,6 +1,6 @@
 ################################################################################
 # Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
-# iree/compiler/Dialect/HAL/Target/LLVM/librt/BUILD                            #
+# iree/builtins/device/bin/BUILD                                               #
 #                                                                              #
 # Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
 # CMake-only content.                                                          #
@@ -8,38 +8,20 @@
 # To disable autogeneration for this file entirely, delete this header.        #
 ################################################################################
 
-if(NOT "${IREE_TARGET_BACKEND_DYLIB-LLVM-AOT}" AND NOT "${IREE_TARGET_BACKEND_WASM-LLVM-AOT}")
-  return()
-endif()
-
 iree_add_all_subdirs()
 
 iree_c_embed_data(
   NAME
-    librt
+    libdevice
   SRCS
-    "bin/librt.bc"
+    "libdevice_wasm32_generic.bc"
+    "libdevice_wasm64_generic.bc"
   C_FILE_OUTPUT
-    "librt.c"
+    "libdevice.c"
   H_FILE_OUTPUT
-    "librt.h"
+    "libdevice.h"
   IDENTIFIER
-    "iree_compiler_librt"
-  FLATTEN
-  PUBLIC
-)
-
-iree_c_embed_data(
-  NAME
-    librt64
-  SRCS
-    "bin/librt64.bc"
-  C_FILE_OUTPUT
-    "librt64.c"
-  H_FILE_OUTPUT
-    "librt64.h"
-  IDENTIFIER
-    "iree_compiler_librt64"
+    "iree_builtins_libdevice"
   FLATTEN
   PUBLIC
 )
diff --git a/iree/builtins/device/bin/build.sh b/iree/builtins/device/bin/build.sh
new file mode 100644
index 0000000..11f793a
--- /dev/null
+++ b/iree/builtins/device/bin/build.sh
@@ -0,0 +1,67 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Example command line:
+#   LLVM_AS=/usr/bin/llvm-as \
+#   CLANG=/usr/bin/clang-13 \
+#   ./iree/builtins/device/bin/build.sh
+
+set -x
+set -e
+
+CLANG="${CLANG:-clang}"
+# TODO(benvanik): figure out how to get this path from clang itself.
+CLANG_INCLUDE="${CLANG_INCLUDE:-/usr/lib/llvm-13/lib/clang/13.0.0/include/}"
+IREE_SRC_DIR="$(git rev-parse --show-toplevel)"
+IREE_BUILD_DIR="${IREE_BUILD_DIR:-${IREE_SRC_DIR?}/../build}"
+LLVM_AS="${LLVM_AS:-${IREE_BUILD_DIR}/third_party/llvm-project/llvm/bin/llvm-as}"
+
+SCRIPT_DIR="$(realpath `dirname $0`)"
+OUT="${SCRIPT_DIR?}/"
+SRC="${SCRIPT_DIR?}/.."
+
+function make_arch_bc {
+  local ARCH=$1
+  local FEATURES=$2
+  local SOURCE_FILE=$3
+  local FILE_BASENAME="${OUT}/libdevice_${ARCH}_${FEATURES}"
+
+  # Generate an LLVM IR assembly listing so we can easily read the file.
+  # This is not checked in or used by the compiler.
+  ${CLANG?} \
+      "${@:4}" \
+      -isystem "${CLANG_INCLUDE?}" \
+      -std=c17 \
+      -O3 \
+      -fno-ident \
+      -fvisibility=hidden \
+      -nostdinc \
+      -S \
+      -emit-llvm \
+      -fdiscard-value-names \
+      -DIREE_DEVICE_STANDALONE \
+      -o "${FILE_BASENAME}.ll" \
+      -c \
+      "${SRC}/${SOURCE_FILE}"
+
+  # Clang adds a bunch of bad attributes and host-specific information that we
+  # don't want (so we get at least somewhat deterministic builds).
+  sed -i 's/^;.*$//' "${FILE_BASENAME}.ll"
+  sed -i 's/^source_filename.*$//' "${FILE_BASENAME}.ll"
+  sed -i 's/^target datalayout.*$//' "${FILE_BASENAME}.ll"
+  sed -i 's/^target triple.*$//' "${FILE_BASENAME}.ll"
+  sed -i 's/^\(attributes #[0-9]* = {\).*$/\1 inlinehint }/' "${FILE_BASENAME}.ll"
+
+  # Generate a binary bitcode file embedded into the compiler binary.
+  # NOTE: we do this from stdin so that the filename on the user's system is not
+  # embedded in the bitcode file (making it non-deterministic).
+  cat "${FILE_BASENAME}.ll" | ${LLVM_AS} -o="${FILE_BASENAME}.bc"
+}
+
+make_arch_bc "wasm32" "generic" "device_generic.c" \
+    --target=wasm32
+make_arch_bc "wasm64" "generic" "device_generic.c" \
+    --target=wasm64
diff --git a/iree/builtins/device/bin/libdevice_wasm32_generic.bc b/iree/builtins/device/bin/libdevice_wasm32_generic.bc
new file mode 100644
index 0000000..26e2310
--- /dev/null
+++ b/iree/builtins/device/bin/libdevice_wasm32_generic.bc
Binary files differ
diff --git a/iree/builtins/device/bin/libdevice_wasm64_generic.bc b/iree/builtins/device/bin/libdevice_wasm64_generic.bc
new file mode 100644
index 0000000..26e2310
--- /dev/null
+++ b/iree/builtins/device/bin/libdevice_wasm64_generic.bc
Binary files differ
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/librt/src/librt.h b/iree/builtins/device/device.h
similarity index 66%
rename from iree/compiler/Dialect/HAL/Target/LLVM/librt/src/librt.h
rename to iree/builtins/device/device.h
index 0af8b21..4378101 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/librt/src/librt.h
+++ b/iree/builtins/device/device.h
@@ -4,6 +4,9 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+#ifndef IREE_BUILTINS_DEVICE_DEVICE_H_
+#define IREE_BUILTINS_DEVICE_DEVICE_H_
+
 //===----------------------------------------------------------------------===//
 // A simplified libc/libm-alike that is designed to compile to portable LLVM IR.
 //===----------------------------------------------------------------------===//
@@ -31,11 +34,35 @@
 // target-specific module.
 
 //===----------------------------------------------------------------------===//
+// Configuration
+//===----------------------------------------------------------------------===//
+
+// IREE_DEVICE_STANDALONE:
+// Define to have libdevice's implementation of builtins alias the standard
+// names. If undefined then the host toolchain implementations will be used.
+
+//===----------------------------------------------------------------------===//
 // Attributes and metadata
 //===----------------------------------------------------------------------===//
 
 // Tagged on functions that are part of the public API.
-#define LIBRT_EXPORT __attribute__((visibility("hidden")))
+#ifdef __cplusplus
+#define IREE_DEVICE_EXPORT extern "C"
+#else
+#define IREE_DEVICE_EXPORT
+#endif  // __cplusplus
+
+// `restrict` keyword, not supported by some older compilers.
+// We define our own macro in case dependencies use `restrict` differently.
+#if defined(_MSC_VER) && _MSC_VER >= 1900
+#define IREE_DEVICE_RESTRICT __restrict
+#elif defined(_MSC_VER)
+#define IREE_DEVICE_RESTRICT
+#elif defined(__cplusplus)
+#define IREE_DEVICE_RESTRICT __restrict__
+#else
+#define IREE_DEVICE_RESTRICT restrict
+#endif  // _MSC_VER
 
 //===----------------------------------------------------------------------===//
 // stdint.h
@@ -43,6 +70,8 @@
 // https://pubs.opengroup.org/onlinepubs/009604599/basedefs/stdint.h.html
 // NOTE: no size_t/ptrdiff_t/etc (as they are target dependent).
 
+#if !defined(INT8_MIN)
+
 typedef signed char int8_t;
 typedef short int16_t;
 typedef int int32_t;
@@ -65,6 +94,8 @@
 #define UINT32_MAX 0xffffffffui32
 #define UINT64_MAX 0xffffffffffffffffui64
 
+#endif  // !INT8_MIN
+
 //===----------------------------------------------------------------------===//
 // Target-specific queries
 //===----------------------------------------------------------------------===//
@@ -72,4 +103,18 @@
 // here in C before we generate the IR.
 
 // Do not use: here as an example. Remove once we have any other flag.
-extern int librt_platform_example_flag;
+extern int libdevice_platform_example_flag;
+// The value used when not coming from the compiler.
+#define LIBDEVICE_PLATFORM_EXAMPLE_FLAG 0
+
+//===----------------------------------------------------------------------===//
+// Public API
+//===----------------------------------------------------------------------===//
+
+// Converts a 16-bit floating-point value to a 32-bit C `float`.
+IREE_DEVICE_EXPORT float iree_h2f_ieee(short param);
+
+// Converts a 32-bit C `float` value to a 16-bit floating-point value.
+IREE_DEVICE_EXPORT short iree_f2h_ieee(float param);
+
+#endif  // IREE_BUILTINS_DEVICE_DEVICE_H_
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/librt/src/libm.c b/iree/builtins/device/device_generic.c
similarity index 84%
rename from iree/compiler/Dialect/HAL/Target/LLVM/librt/src/libm.c
rename to iree/builtins/device/device_generic.c
index fcb4e28..3d55f71 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/librt/src/libm.c
+++ b/iree/builtins/device/device_generic.c
@@ -4,15 +4,13 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-#include "libm.h"
+#include "device.h"
 
-// https://en.cppreference.com/w/c/numeric/math/fma
-LIBRT_EXPORT float fmaf(float x, float y, float z) {
-  // TODO(*): a real implementation :)
-  return (x * y) + z;
-}
+#if !defined(IREE_DEVICE_STANDALONE)
+int libdevice_platform_example_flag = LIBDEVICE_PLATFORM_EXAMPLE_FLAG;
+#endif  // IREE_DEVICE_STANDALONE
 
-LIBRT_EXPORT float __gnu_h2f_ieee(short param) {
+IREE_DEVICE_EXPORT float iree_h2f_ieee(short param) {
   unsigned short expHalf16 = param & 0x7C00;
   int exp1 = (int)expHalf16;
   unsigned short mantissa16 = param & 0x03FF;
@@ -53,7 +51,7 @@
   return res;
 }
 
-LIBRT_EXPORT short __gnu_f2h_ieee(float param) {
+IREE_DEVICE_EXPORT short iree_f2h_ieee(float param) {
   unsigned int param_bit = *((unsigned int*)(&param));
   int sign = param_bit >> 31;
   int mantissa = param_bit & 0x007FFFFF;
@@ -109,3 +107,15 @@
   }
   return res;
 }
+
+#if defined(IREE_DEVICE_STANDALONE)
+
+IREE_DEVICE_EXPORT float __gnu_h2f_ieee(short param) {
+  return iree_h2f_ieee(param);
+}
+
+IREE_DEVICE_EXPORT short __gnu_f2h_ieee(float param) {
+  return iree_f2h_ieee(param);
+}
+
+#endif  // IREE_DEVICE_STANDALONE
diff --git a/iree/builtins/device/tools/BUILD b/iree/builtins/device/tools/BUILD
new file mode 100644
index 0000000..bbb357c
--- /dev/null
+++ b/iree/builtins/device/tools/BUILD
@@ -0,0 +1,34 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+package(
+    default_visibility = ["//visibility:public"],
+    features = ["layering_check"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+cc_test(
+    name = "libdevice_benchmark",
+    srcs = ["libdevice_benchmark.c"],
+    deps = [
+        "//iree/base",
+        "//iree/base/internal:flags",
+        "//iree/builtins/device",
+        "//iree/testing:benchmark",
+    ],
+)
+
+cc_test(
+    name = "libdevice_test",
+    srcs = ["libdevice_test.cc"],
+    deps = [
+        "//iree/base",
+        "//iree/base/internal:flags",
+        "//iree/builtins/device",
+        "//iree/testing:gtest",
+        "//iree/testing:gtest_main",
+    ],
+)
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/librt/CMakeLists.txt b/iree/builtins/device/tools/CMakeLists.txt
similarity index 60%
copy from iree/compiler/Dialect/HAL/Target/LLVM/librt/CMakeLists.txt
copy to iree/builtins/device/tools/CMakeLists.txt
index 210b6f2..1f3c2ac 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/librt/CMakeLists.txt
+++ b/iree/builtins/device/tools/CMakeLists.txt
@@ -1,6 +1,6 @@
 ################################################################################
 # Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
-# iree/compiler/Dialect/HAL/Target/LLVM/librt/BUILD                            #
+# iree/builtins/device/tools/BUILD                                             #
 #                                                                              #
 # Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
 # CMake-only content.                                                          #
@@ -8,40 +8,31 @@
 # To disable autogeneration for this file entirely, delete this header.        #
 ################################################################################
 
-if(NOT "${IREE_TARGET_BACKEND_DYLIB-LLVM-AOT}" AND NOT "${IREE_TARGET_BACKEND_WASM-LLVM-AOT}")
-  return()
-endif()
-
 iree_add_all_subdirs()
 
-iree_c_embed_data(
+iree_cc_test(
   NAME
-    librt
+    libdevice_benchmark
   SRCS
-    "bin/librt.bc"
-  C_FILE_OUTPUT
-    "librt.c"
-  H_FILE_OUTPUT
-    "librt.h"
-  IDENTIFIER
-    "iree_compiler_librt"
-  FLATTEN
-  PUBLIC
+    "libdevice_benchmark.c"
+  DEPS
+    iree::base
+    iree::base::internal::flags
+    iree::builtins::device
+    iree::testing::benchmark
 )
 
-iree_c_embed_data(
+iree_cc_test(
   NAME
-    librt64
+    libdevice_test
   SRCS
-    "bin/librt64.bc"
-  C_FILE_OUTPUT
-    "librt64.c"
-  H_FILE_OUTPUT
-    "librt64.h"
-  IDENTIFIER
-    "iree_compiler_librt64"
-  FLATTEN
-  PUBLIC
+    "libdevice_test.cc"
+  DEPS
+    iree::base
+    iree::base::internal::flags
+    iree::builtins::device
+    iree::testing::gtest
+    iree::testing::gtest_main
 )
 
 ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/iree/builtins/device/tools/libdevice_benchmark.c b/iree/builtins/device/tools/libdevice_benchmark.c
new file mode 100644
index 0000000..0814f56
--- /dev/null
+++ b/iree/builtins/device/tools/libdevice_benchmark.c
@@ -0,0 +1,78 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/base/api.h"
+#include "iree/base/internal/flags.h"
+#include "iree/builtins/device/device.h"
+#include "iree/testing/benchmark.h"
+
+// Example flag; not really useful:
+IREE_FLAG(int32_t, batch_count, 64, "Ops to run per benchmark iteration.");
+
+static iree_status_t iree_h2f_ieee_benchmark(
+    const iree_benchmark_def_t* benchmark_def,
+    iree_benchmark_state_t* benchmark_state) {
+  while (iree_benchmark_keep_running(benchmark_state,
+                                     /*batch_count=*/FLAG_batch_count)) {
+    for (int i = 0; i < FLAG_batch_count; ++i) {
+      // TODO(benvanik): iree_do_not_optimize barrier.
+      iree_h2f_ieee(0x3400 + i);
+    }
+  }
+  return iree_ok_status();
+}
+
+static iree_status_t iree_f2h_ieee_benchmark(
+    const iree_benchmark_def_t* benchmark_def,
+    iree_benchmark_state_t* benchmark_state) {
+  while (iree_benchmark_keep_running(benchmark_state,
+                                     /*batch_count=*/FLAG_batch_count)) {
+    for (int i = 0; i < FLAG_batch_count; ++i) {
+      // TODO(benvanik): iree_do_not_optimize barrier.
+      iree_f2h_ieee(0.25f + i);
+    }
+  }
+  return iree_ok_status();
+}
+
+int main(int argc, char** argv) {
+  iree_flags_set_usage(
+      "libdevice_benchmark",
+      "Benchmarks the libdevice implementation of the target machine.\n"
+      "\n");
+
+  iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_UNDEFINED_OK, &argc, &argv);
+  iree_benchmark_initialize(&argc, argv);
+
+  {
+    static const iree_benchmark_def_t benchmark_def = {
+        .flags = IREE_BENCHMARK_FLAG_MEASURE_PROCESS_CPU_TIME |
+                 IREE_BENCHMARK_FLAG_USE_REAL_TIME,
+        .time_unit = IREE_BENCHMARK_UNIT_NANOSECOND,
+        .minimum_duration_ns = 0,
+        .iteration_count = 0,
+        .run = iree_h2f_ieee_benchmark,
+        .user_data = NULL,
+    };
+    iree_benchmark_register(IREE_SV("iree_h2f_ieee"), &benchmark_def);
+  }
+
+  {
+    static const iree_benchmark_def_t benchmark_def = {
+        .flags = IREE_BENCHMARK_FLAG_MEASURE_PROCESS_CPU_TIME |
+                 IREE_BENCHMARK_FLAG_USE_REAL_TIME,
+        .time_unit = IREE_BENCHMARK_UNIT_NANOSECOND,
+        .minimum_duration_ns = 0,
+        .iteration_count = 0,
+        .run = iree_f2h_ieee_benchmark,
+        .user_data = NULL,
+    };
+    iree_benchmark_register(IREE_SV("iree_f2h_ieee"), &benchmark_def);
+  }
+
+  iree_benchmark_run_specified();
+  return 0;
+}
diff --git a/iree/builtins/device/tools/libdevice_test.cc b/iree/builtins/device/tools/libdevice_test.cc
new file mode 100644
index 0000000..adeed0a
--- /dev/null
+++ b/iree/builtins/device/tools/libdevice_test.cc
@@ -0,0 +1,22 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <cstring>
+
+#include "iree/base/api.h"
+#include "iree/builtins/device/device.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+TEST(LibDeviceTest, iree_h2f_ieee) {
+  // Just ensuring that the code links.
+  EXPECT_EQ(0.25f, iree_h2f_ieee(0x3400));
+}
+
+TEST(LibDeviceTest, iree_f2h_ieee) {
+  // Just ensuring that the code links.
+  EXPECT_EQ(0x3400, iree_f2h_ieee(0.25f));
+}
diff --git a/iree/builtins/musl/BUILD b/iree/builtins/musl/BUILD
new file mode 100644
index 0000000..f27d209
--- /dev/null
+++ b/iree/builtins/musl/BUILD
@@ -0,0 +1,11 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+package(
+    default_visibility = ["//visibility:public"],
+    features = ["layering_check"],
+    licenses = ["notice"],  # Apache 2.0
+)
diff --git a/iree/builtins/musl/CMakeLists.txt b/iree/builtins/musl/CMakeLists.txt
new file mode 100644
index 0000000..57fa7a2
--- /dev/null
+++ b/iree/builtins/musl/CMakeLists.txt
@@ -0,0 +1,13 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
+# iree/builtins/musl/BUILD                                                     #
+#                                                                              #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
+# CMake-only content.                                                          #
+#                                                                              #
+# To disable autogeneration for this file entirely, delete this header.        #
+################################################################################
+
+iree_add_all_subdirs()
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/librt/Makefile_musl_64.iree b/iree/builtins/musl/Makefile_wasm32.iree
similarity index 83%
copy from iree/compiler/Dialect/HAL/Target/LLVM/librt/Makefile_musl_64.iree
copy to iree/builtins/musl/Makefile_wasm32.iree
index 5a628dd..b79444b 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/librt/Makefile_musl_64.iree
+++ b/iree/builtins/musl/Makefile_wasm32.iree
@@ -8,6 +8,11 @@
 include $(musldir)/Makefile
 
 IREE_BASE_SRCS = $(addprefix $(srcdir)/, \
+	src/fenv/fenv.c \
+	src/math/ceilf.c \
+	src/math/floorf.c \
+	src/math/fmaf.c \
+	src/math/fmodf.c \
 	src/math/powf.c \
 	src/math/expf.c \
 	src/math/powf_data.c \
@@ -19,7 +24,7 @@
 IREE_BASE_LLS = $(patsubst $(srcdir)/%,%.ll,$(basename $(IREE_BASE_SRCS)))
 IREE_BASE_BCS = $(patsubst $(srcdir)/%,%.bc,$(basename $(IREE_BASE_SRCS)))
 IREE_LL_FILES = $(addprefix obj/, $(IREE_BASE_LLS))
-IREE_CFLAGS=-Xclang -disable-llvm-passes -target wasm64
+IREE_CFLAGS=-Xclang -disable-llvm-passes -fno-ident -fvisibility=hidden -target wasm32
 LL_CMD = $(CC) $(CFLAGS_ALL) $(IREE_CFLAGS) -S -emit-llvm -o $@ -c $<
 
 obj/%.ll: $(musldir)/%.c obj/include/bits/alltypes.h
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/librt/Makefile_musl_64.iree b/iree/builtins/musl/Makefile_wasm64.iree
similarity index 83%
rename from iree/compiler/Dialect/HAL/Target/LLVM/librt/Makefile_musl_64.iree
rename to iree/builtins/musl/Makefile_wasm64.iree
index 5a628dd..5e3d956 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/librt/Makefile_musl_64.iree
+++ b/iree/builtins/musl/Makefile_wasm64.iree
@@ -8,6 +8,11 @@
 include $(musldir)/Makefile
 
 IREE_BASE_SRCS = $(addprefix $(srcdir)/, \
+	src/fenv/fenv.c \
+	src/math/ceilf.c \
+	src/math/floorf.c \
+	src/math/fmaf.c \
+	src/math/fmodf.c \
 	src/math/powf.c \
 	src/math/expf.c \
 	src/math/powf_data.c \
@@ -19,7 +24,7 @@
 IREE_BASE_LLS = $(patsubst $(srcdir)/%,%.ll,$(basename $(IREE_BASE_SRCS)))
 IREE_BASE_BCS = $(patsubst $(srcdir)/%,%.bc,$(basename $(IREE_BASE_SRCS)))
 IREE_LL_FILES = $(addprefix obj/, $(IREE_BASE_LLS))
-IREE_CFLAGS=-Xclang -disable-llvm-passes -target wasm64
+IREE_CFLAGS=-Xclang -disable-llvm-passes -fno-ident -fvisibility=hidden -target wasm64
 LL_CMD = $(CC) $(CFLAGS_ALL) $(IREE_CFLAGS) -S -emit-llvm -o $@ -c $<
 
 obj/%.ll: $(musldir)/%.c obj/include/bits/alltypes.h
diff --git a/iree/builtins/musl/bin/BUILD b/iree/builtins/musl/bin/BUILD
new file mode 100644
index 0000000..4fad08b
--- /dev/null
+++ b/iree/builtins/musl/bin/BUILD
@@ -0,0 +1,25 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//build_tools/embed_data:build_defs.bzl", "c_embed_data")
+
+package(
+    default_visibility = ["//visibility:public"],
+    features = ["layering_check"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+c_embed_data(
+    name = "libmusl",
+    srcs = [
+        "libmusl_wasm32_generic.bc",
+        "libmusl_wasm64_generic.bc",
+    ],
+    c_file_output = "libmusl.c",
+    flatten = True,
+    h_file_output = "libmusl.h",
+    identifier = "iree_builtins_libmusl",
+)
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/librt/CMakeLists.txt b/iree/builtins/musl/bin/CMakeLists.txt
similarity index 65%
copy from iree/compiler/Dialect/HAL/Target/LLVM/librt/CMakeLists.txt
copy to iree/builtins/musl/bin/CMakeLists.txt
index 210b6f2..4705d0a 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/librt/CMakeLists.txt
+++ b/iree/builtins/musl/bin/CMakeLists.txt
@@ -1,6 +1,6 @@
 ################################################################################
 # Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
-# iree/compiler/Dialect/HAL/Target/LLVM/librt/BUILD                            #
+# iree/builtins/musl/bin/BUILD                                                 #
 #                                                                              #
 # Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
 # CMake-only content.                                                          #
@@ -8,38 +8,20 @@
 # To disable autogeneration for this file entirely, delete this header.        #
 ################################################################################
 
-if(NOT "${IREE_TARGET_BACKEND_DYLIB-LLVM-AOT}" AND NOT "${IREE_TARGET_BACKEND_WASM-LLVM-AOT}")
-  return()
-endif()
-
 iree_add_all_subdirs()
 
 iree_c_embed_data(
   NAME
-    librt
+    libmusl
   SRCS
-    "bin/librt.bc"
+    "libmusl_wasm32_generic.bc"
+    "libmusl_wasm64_generic.bc"
   C_FILE_OUTPUT
-    "librt.c"
+    "libmusl.c"
   H_FILE_OUTPUT
-    "librt.h"
+    "libmusl.h"
   IDENTIFIER
-    "iree_compiler_librt"
-  FLATTEN
-  PUBLIC
-)
-
-iree_c_embed_data(
-  NAME
-    librt64
-  SRCS
-    "bin/librt64.bc"
-  C_FILE_OUTPUT
-    "librt64.c"
-  H_FILE_OUTPUT
-    "librt64.h"
-  IDENTIFIER
-    "iree_compiler_librt64"
+    "iree_builtins_libmusl"
   FLATTEN
   PUBLIC
 )
diff --git a/iree/builtins/musl/bin/build.sh b/iree/builtins/musl/bin/build.sh
new file mode 100644
index 0000000..c024088
--- /dev/null
+++ b/iree/builtins/musl/bin/build.sh
@@ -0,0 +1,80 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Example command line:
+#   LLVM_AS=/usr/bin/llvm-as \
+#   LLVM_LINK=/usr/bin/llvm-link \
+#   CLANG=/usr/bin/clang-13 \
+#   ./iree/builtins/musl/bin/build.sh
+
+set -x
+set -e
+
+CLANG="${CLANG:-clang}"
+CLANGXX="${CLANGXX:-$(which clang++)}"
+LLVM_AS="${LLVM_AS:-${IREE_BUILD_DIR}/third_party/llvm-project/llvm/bin/llvm-as}"
+LLVM_LINK="${LLVM_LINK:-${IREE_BUILD_DIR}/third_party/llvm-project/llvm/bin/llvm-link}"
+LLVM_OPT="${LLVM_OPT:-${IREE_BUILD_DIR}/third_party/llvm-project/llvm/bin/opt}"
+
+IREE_SRC_DIR="$(git rev-parse --show-toplevel)"
+IREE_BUILD_DIR="${IREE_BUILD_DIR:-${IREE_SRC_DIR?}/../build}"
+
+SCRIPT_DIR="$(realpath `dirname $0`)"
+OUT="${SCRIPT_DIR?}/"
+SRC="${SCRIPT_DIR?}/.."
+
+function make_arch_bc {
+  local ARCH=$1
+  local FEATURES=$2
+  local FILE_BASENAME="${OUT}/libmusl_${ARCH}_${FEATURES}"
+  local MUSL_MAKEFILE="${SCRIPT_DIR?}/../Makefile_${ARCH}.iree"
+
+  # Generate IR with 32-bit target.
+  MUSL_DIR=${IREE_SRC_DIR?}/third_party/musl
+  cd ${MUSL_DIR}
+  rm -rf obj/
+  CC=${CLANG?} CXX=${CLANGXX?} ./configure
+  MUSL_DIR=${MUSL_DIR} make -f ${MUSL_MAKEFILE} iree
+  MUSL_LL_FILES=`find obj/ -name "*.ll"`
+  cp ${MUSL_LL_FILES?} ${OUT}
+  rm ${MUSL_LL_FILES?}
+  cd ${SCRIPT_DIR?}
+
+  ALL_LL_FILES=`find ${OUT} -name "*.ll"`
+
+  cd ${OUT}
+  # git restore ${FILE_BASENAME}.bc
+  for file in ${ALL_LL_FILES}
+  do
+    # Run full LLVM optimizations.
+    # TODO(benvanik): defer this? Some of these opts may not be portable/safe.
+    ${LLVM_OPT?} ${file} -O3 -S -o ${file}.opt.ll
+
+    # Clang adds a bunch of bad attributes and host-specific information that we
+    # don't want (so we get at least somewhat deterministic builds).
+    sed -i 's/^;.*$//' "${file}.opt.ll"
+    sed -i 's/^source_filename.*$//' "${file}.opt.ll"
+    sed -i 's/^target datalayout.*$//' "${file}.opt.ll"
+    sed -i 's/^target triple.*$//' "${file}.opt.ll"
+    sed -i 's/^\(attributes #[0-9]* = {\).*$/\1 inlinehint }/' "${file}.opt.ll"
+
+    # Generate a binary bitcode file embedded into the compiler binary.
+    # NOTE: we do this from stdin so that the filename on the user's system is not
+    # embedded in the bitcode file (making it non-deterministic).
+    cat ${file}.opt.ll | ${LLVM_AS?} -o=${file}.opt.ll.bc
+    rm ${file}.opt.ll
+  done
+  rm ${ALL_LL_FILES}
+
+  ALL_BC_FILES=`ls *.ll.bc`
+  ${LLVM_LINK?} ${ALL_BC_FILES} -o ${FILE_BASENAME}.bc
+  rm ${ALL_BC_FILES}
+}
+
+make_arch_bc "wasm32" "generic" \
+    --target=wasm32
+make_arch_bc "wasm64" "generic" \
+    --target=wasm64
diff --git a/iree/builtins/musl/bin/libmusl_wasm32_generic.bc b/iree/builtins/musl/bin/libmusl_wasm32_generic.bc
new file mode 100644
index 0000000..02ecf00
--- /dev/null
+++ b/iree/builtins/musl/bin/libmusl_wasm32_generic.bc
Binary files differ
diff --git a/iree/builtins/musl/bin/libmusl_wasm64_generic.bc b/iree/builtins/musl/bin/libmusl_wasm64_generic.bc
new file mode 100644
index 0000000..3e6adcf
--- /dev/null
+++ b/iree/builtins/musl/bin/libmusl_wasm64_generic.bc
Binary files differ
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/BUILD b/iree/compiler/Dialect/HAL/Target/LLVM/BUILD
index 06dcc92..d85a652 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/BUILD
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/BUILD
@@ -41,8 +41,7 @@
         "//iree/compiler/Codegen/LLVMCPU",
         "//iree/compiler/Codegen/Utils",
         "//iree/compiler/Dialect/HAL/Target",
-        "//iree/compiler/Dialect/HAL/Target/LLVM/librt",
-        "//iree/compiler/Dialect/HAL/Target/LLVM/librt:librt64",
+        "//iree/compiler/Dialect/HAL/Target/LLVM/Builtins",
         "@llvm-project//llvm:AArch64AsmParser",
         "@llvm-project//llvm:AArch64CodeGen",
         "@llvm-project//llvm:ARMAsmParser",
@@ -50,6 +49,7 @@
         "@llvm-project//llvm:BitReader",
         "@llvm-project//llvm:BitWriter",
         "@llvm-project//llvm:Core",
+        "@llvm-project//llvm:Linker",
         "@llvm-project//llvm:RISCVAsmParser",
         "@llvm-project//llvm:RISCVCodeGen",
         "@llvm-project//llvm:Support",
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/BUILD b/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/BUILD
new file mode 100644
index 0000000..95e5b18
--- /dev/null
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/BUILD
@@ -0,0 +1,31 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+package(
+    default_visibility = ["//visibility:public"],
+    features = ["layering_check"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+cc_library(
+    name = "Builtins",
+    srcs = [
+        "Device.cpp",
+        "Musl.cpp",
+    ],
+    hdrs = [
+        "Device.h",
+        "Musl.h",
+    ],
+    deps = [
+        "//iree/builtins/device/bin:libdevice",
+        "//iree/builtins/musl/bin:libmusl",
+        "@llvm-project//llvm:BitReader",
+        "@llvm-project//llvm:Support",
+        "@llvm-project//llvm:Target",
+        "@llvm-project//mlir:Support",
+    ],
+)
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/CMakeLists.txt
new file mode 100644
index 0000000..7475a06
--- /dev/null
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/CMakeLists.txt
@@ -0,0 +1,32 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
+# iree/compiler/Dialect/HAL/Target/LLVM/Builtins/BUILD                         #
+#                                                                              #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
+# CMake-only content.                                                          #
+#                                                                              #
+# To disable autogeneration for this file entirely, delete this header.        #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+  NAME
+    Builtins
+  HDRS
+    "Device.h"
+    "Musl.h"
+  SRCS
+    "Device.cpp"
+    "Musl.cpp"
+  DEPS
+    LLVMBitReader
+    LLVMSupport
+    LLVMTarget
+    MLIRSupport
+    iree::builtins::device::bin::libdevice
+    iree::builtins::musl::bin::libmusl
+  PUBLIC
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Device.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Device.cpp
new file mode 100644
index 0000000..8672590
--- /dev/null
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Device.cpp
@@ -0,0 +1,92 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Device.h"
+
+#include "iree/builtins/device/bin/libdevice.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "mlir/Support/LLVM.h"
+
+namespace mlir {
+namespace iree_compiler {
+namespace IREE {
+namespace HAL {
+
+static const iree_file_toc_t *lookupDeviceFile(StringRef filename) {
+  for (size_t i = 0; i < iree_builtins_libdevice_size(); ++i) {
+    const auto &file_toc = iree_builtins_libdevice_create()[i];
+    if (filename == file_toc.name) return &file_toc;
+  }
+  return nullptr;
+}
+
+static const iree_file_toc_t *lookupDeviceFile(
+    llvm::TargetMachine *targetMachine) {
+  const auto &triple = targetMachine->getTargetTriple();
+
+  // NOTE: other arch-specific checks go here.
+
+  if (triple.isWasm()) {
+    // TODO(benvanik): feature detect simd and such.
+    // auto features = targetMachine->getTargetFeatureString();
+    if (triple.isArch32Bit()) {
+      return lookupDeviceFile("libdevice_wasm32_generic.bc");
+    } else if (triple.isArch64Bit()) {
+      return lookupDeviceFile("libdevice_wasm64_generic.bc");
+    }
+  }
+
+  // Fallback path using the generic wasm variants as they are largely
+  // machine-agnostic.
+  if (triple.isArch32Bit()) {
+    return lookupDeviceFile("libdevice_wasm32_generic.bc");
+  } else if (triple.isArch64Bit()) {
+    return lookupDeviceFile("libdevice_wasm64_generic.bc");
+  } else {
+    return nullptr;
+  }
+}
+
+// TODO(benvanik): move to a common file so we can reuse it.
+static void overridePlatformGlobal(llvm::Module &module, StringRef globalName,
+                                   uint32_t newValue) {
+  // NOTE: the global will not be defined if it is not used in the module.
+  auto *globalValue = module.getNamedGlobal(globalName);
+  if (!globalValue) return;
+  globalValue->setLinkage(llvm::GlobalValue::PrivateLinkage);
+  globalValue->setDSOLocal(true);
+  globalValue->setConstant(true);
+  globalValue->setInitializer(
+      llvm::ConstantInt::get(globalValue->getValueType(), APInt(32, newValue)));
+}
+
+llvm::Expected<std::unique_ptr<llvm::Module>> loadDeviceBitcode(
+    llvm::TargetMachine *targetMachine, llvm::LLVMContext &context) {
+  // Find a bitcode file for the current architecture.
+  const auto *file = lookupDeviceFile(targetMachine);
+  if (!file) {
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "no matching architecture bitcode file");
+  }
+
+  // Load the generic bitcode file contents.
+  llvm::MemoryBufferRef bitcodeBufferRef(
+      llvm::StringRef(file->data, file->size), file->name);
+  auto bitcodeModuleValue = llvm::parseBitcodeFile(bitcodeBufferRef, context);
+  if (!bitcodeModuleValue) return bitcodeModuleValue;
+  auto bitcodeModule = std::move(bitcodeModuleValue.get());
+
+  // Inject target-specific flags.
+  overridePlatformGlobal(*bitcodeModule, "libdevice_platform_example_flag", 0u);
+
+  return bitcodeModule;
+}
+
+}  // namespace HAL
+}  // namespace IREE
+}  // namespace iree_compiler
+}  // namespace mlir
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Device.h b/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Device.h
new file mode 100644
index 0000000..ca3aa2a
--- /dev/null
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Device.h
@@ -0,0 +1,26 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_COMPILER_DIALECT_HAL_TARGET_LLVM_BUILTINS_DEVICE_H_
+#define IREE_COMPILER_DIALECT_HAL_TARGET_LLVM_BUILTINS_DEVICE_H_
+
+#include "llvm/IR/Module.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace mlir {
+namespace iree_compiler {
+namespace IREE {
+namespace HAL {
+
+llvm::Expected<std::unique_ptr<llvm::Module>> loadDeviceBitcode(
+    llvm::TargetMachine *targetMachine, llvm::LLVMContext &context);
+
+}  // namespace HAL
+}  // namespace IREE
+}  // namespace iree_compiler
+}  // namespace mlir
+
+#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_LLVM_BUILTINS_DEVICE_H_
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Musl.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Musl.cpp
new file mode 100644
index 0000000..27b50f7
--- /dev/null
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Musl.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Musl.h"
+
+#include "iree/builtins/musl/bin/libmusl.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "mlir/Support/LLVM.h"
+
+namespace mlir {
+namespace iree_compiler {
+namespace IREE {
+namespace HAL {
+
+static const iree_file_toc_t *lookupMuslFile(StringRef filename) {
+  for (size_t i = 0; i < iree_builtins_libmusl_size(); ++i) {
+    const auto &file_toc = iree_builtins_libmusl_create()[i];
+    if (filename == file_toc.name) return &file_toc;
+  }
+  return nullptr;
+}
+
+static const iree_file_toc_t *lookupMuslFile(
+    llvm::TargetMachine *targetMachine) {
+  const auto &triple = targetMachine->getTargetTriple();
+
+  // NOTE: other arch-specific checks go here.
+
+  // Fallback path using the generic wasm variants as they are largely
+  // machine-agnostic.
+  if (triple.isArch32Bit()) {
+    return lookupMuslFile("libmusl_wasm32_generic.bc");
+  } else if (triple.isArch64Bit()) {
+    return lookupMuslFile("libmusl_wasm64_generic.bc");
+  } else {
+    return nullptr;
+  }
+}
+
+llvm::Expected<std::unique_ptr<llvm::Module>> loadMuslBitcode(
+    llvm::TargetMachine *targetMachine, llvm::LLVMContext &context) {
+  // Find a bitcode file for the current architecture.
+  const auto *file = lookupMuslFile(targetMachine);
+  if (!file) {
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "no matching architecture bitcode file");
+  }
+
+  // Load the generic bitcode file contents.
+  llvm::MemoryBufferRef bitcodeBufferRef(
+      llvm::StringRef(file->data, file->size), file->name);
+  return llvm::parseBitcodeFile(bitcodeBufferRef, context);
+}
+
+}  // namespace HAL
+}  // namespace IREE
+}  // namespace iree_compiler
+}  // namespace mlir
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Musl.h b/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Musl.h
new file mode 100644
index 0000000..f3fa37a
--- /dev/null
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Musl.h
@@ -0,0 +1,26 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_COMPILER_DIALECT_HAL_TARGET_LLVM_BUILTINS_MUSL_H_
+#define IREE_COMPILER_DIALECT_HAL_TARGET_LLVM_BUILTINS_MUSL_H_
+
+#include "llvm/IR/Module.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace mlir {
+namespace iree_compiler {
+namespace IREE {
+namespace HAL {
+
+llvm::Expected<std::unique_ptr<llvm::Module>> loadMuslBitcode(
+    llvm::TargetMachine *targetMachine, llvm::LLVMContext &context);
+
+}  // namespace HAL
+}  // namespace IREE
+}  // namespace iree_compiler
+}  // namespace mlir
+
+#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_LLVM_BUILTINS_MUSL_H_
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt
index a065fae..f75ba2d 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt
@@ -35,6 +35,7 @@
     LLVMBitReader
     LLVMBitWriter
     LLVMCore
+    LLVMLinker
     LLVMRISCVAsmParser
     LLVMRISCVCodeGen
     LLVMSupport
@@ -51,8 +52,7 @@
     iree::compiler::Codegen::PassHeaders
     iree::compiler::Codegen::Utils
     iree::compiler::Dialect::HAL::Target
-    iree::compiler::Dialect::HAL::Target::LLVM::librt
-    iree::compiler::Dialect::HAL::Target::LLVM::librt::librt64
+    iree::compiler::Dialect::HAL::Target::LLVM::Builtins
   PUBLIC
 )
 
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
index 84999f9..13d0a53 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
@@ -10,17 +10,18 @@
 
 #include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h"
 #include "iree/compiler/Codegen/Passes.h"
+#include "iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Device.h"
+#include "iree/compiler/Dialect/HAL/Target/LLVM/Builtins/Musl.h"
 #include "iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.h"
 #include "iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.h"
 #include "iree/compiler/Dialect/HAL/Target/LLVM/LinkerTool.h"
 #include "iree/compiler/Dialect/HAL/Target/LLVM/StaticLibraryGenerator.h"
-#include "iree/compiler/Dialect/HAL/Target/LLVM/librt/librt.h"
-#include "iree/compiler/Dialect/HAL/Target/LLVM/librt/librt64.h"
 #include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h"
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Linker/Linker.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/TargetSelect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
@@ -99,6 +100,37 @@
   return success();
 }
 
+// Verifies builtin bitcode is loaded correctly and appends it to |linker|.
+//
+// Example:
+//  if (failed(linkBuiltinLibrary(loc, linker, "libfoo", loadLibFoo(...))))
+static LogicalResult linkBuiltinLibrary(
+    Location loc, llvm::Linker &linker, llvm::TargetMachine *targetMachine,
+    StringRef name,
+    llvm::Expected<std::unique_ptr<llvm::Module>> bitcodeModuleValue) {
+  // Ensure the bitcode loaded correctly. It may fail if the LLVM version is
+  // incompatible.
+  if (!bitcodeModuleValue) {
+    return mlir::emitError(loc)
+           << "failed to parse " << name
+           << " bitcode: " << llvm::toString(bitcodeModuleValue.takeError())
+           << " (possible LLVM bitcode incompatibility?)";
+  }
+  auto bitcodeModule = std::move(bitcodeModuleValue.get());
+  bitcodeModule->setDataLayout(targetMachine->createDataLayout());
+  bitcodeModule->setTargetTriple(targetMachine->getTargetTriple().str());
+
+  // Link the bitcode into the base module. This will merge in any required
+  // symbols and override declarations that may exist.
+  if (linker.linkInModule(
+          std::move(bitcodeModule),
+          llvm::Linker::OverrideFromSrc /*| llvm::Linker::LinkOnlyNeeded*/)) {
+    return mlir::emitError(loc) << "failed to link " << name << " bitcode";
+  }
+
+  return success();
+}
+
 class LLVMAOTTargetBackend final : public TargetBackend {
  public:
   explicit LLVMAOTTargetBackend(LLVMTargetOptions options)
@@ -223,9 +255,9 @@
 
     // Build the IREE HAL executable library metadata. The runtime uses this to
     // find the entry point functions and their information.
-    // TODO(benvanik): add a flag for this (adds a few KB/binary).
     LibraryBuilder::Mode libraryBuilderMode =
-        LibraryBuilder::Mode::INCLUDE_REFLECTION_ATTRS;
+        options_.debugSymbols ? LibraryBuilder::Mode::INCLUDE_REFLECTION_ATTRS
+                              : LibraryBuilder::Mode::NONE;
     LibraryBuilder libraryBuilder(llvmModule.get(), libraryBuilderMode,
                                   LibraryBuilder::Version::V_0);
     switch (options_.sanitizerKind) {
@@ -289,8 +321,7 @@
              << "failed to configure LLVM module for target linker";
     }
 
-    // LLVM opt passes that perform code generation optimizations/transformation
-    // similar to what a frontend would do before passing to linking.
+    // Specialize the module to our target machine.
     auto targetMachine = createTargetMachine(options_);
     if (!targetMachine) {
       return mlir::emitError(variantOp.getLoc())
@@ -299,6 +330,33 @@
     }
     llvmModule->setDataLayout(targetMachine->createDataLayout());
     llvmModule->setTargetTriple(targetMachine->getTargetTriple().str());
+
+    // Statically link libraries into our module.
+    // Note that if producing a static library then the symbols we add must be
+    // weak such that we don't trigger ODR issues.
+    llvm::Linker moduleLinker(*llvmModule);
+    if (failed(linkBuiltinLibrary(
+            variantOp.getLoc(), moduleLinker, targetMachine.get(), "libdevice",
+            loadDeviceBitcode(targetMachine.get(), context)))) {
+      return mlir::emitError(variantOp.getLoc())
+             << "failed linking in builtin library for target triple '"
+             << options_.targetTriple << "'";
+    }
+    if (failed(linkBuiltinLibrary(
+            variantOp.getLoc(), moduleLinker, targetMachine.get(), "libmusl",
+            loadMuslBitcode(targetMachine.get(), context)))) {
+      return mlir::emitError(variantOp.getLoc())
+             << "failed linking in builtin library for target triple '"
+             << options_.targetTriple << "'";
+    }
+
+    // Strip any compiler identifiers that may have snuck in. We let the linker
+    // tag the module.
+    auto *llvmIdent = llvmModule->getNamedMetadata("llvm.ident");
+    if (llvmIdent) llvmIdent->clearOperands();
+
+    // LLVM opt passes that perform code generation optimizations/transformation
+    // similar to what a frontend would do.
     if (failed(
             runLLVMIRPasses(options_, targetMachine.get(), llvmModule.get()))) {
       return variantOp.emitError()
@@ -307,6 +365,18 @@
              << options_.targetTriple << "'";
     }
 
+    // Fixup visibility from any symbols we may link in - we want to hide all
+    // but the query entry point.
+    for (auto &func : *llvmModule) {
+      if (func.isDeclaration() ||
+          func.getLinkage() ==
+              llvm::GlobalValue::LinkageTypes::ExternalLinkage) {
+        continue;
+      }
+      func.setDSOLocal(true);
+      func.setLinkage(llvm::GlobalValue::LinkageTypes::PrivateLinkage);
+    }
+
     SmallVector<Artifact> objectFiles;
 
     // Emit the base object file containing the bulk of our code.
@@ -319,7 +389,7 @@
       // object file per library).
       std::string objectData;
       if (failed(runEmitObjFilePasses(targetMachine.get(), llvmModule.get(),
-                                      &objectData))) {
+                                      llvm::CGFT_ObjectFile, &objectData))) {
         return variantOp.emitError()
                << "failed to compile LLVM-IR module to an object file";
       }
@@ -330,35 +400,33 @@
       os.close();
       objectFiles.push_back(std::move(objectFile));
     }
-    // Optionally append additional object files that provide functionality that
-    // may otherwise have been runtime-dynamic (like libc/libm calls).
-    // For now we only do this for embedded uses.
-    if (options_.linkEmbedded) {
-      if (failed(buildLibraryObjects(variantOp.getLoc(), targetMachine.get(),
-                                     objectFiles, context,
-                                     iree_compiler_librt_create(), "librt"))) {
-        return variantOp.emitError() << "failed generating library objects";
-      }
-      if (targetMachine->getTargetTriple().isArch64Bit()) {
-        if (failed(buildLibraryObjects(
-                variantOp.getLoc(), targetMachine.get(), objectFiles, context,
-                iree_compiler_librt64_create(), "librt64"))) {
-          return variantOp.emitError()
-                 << "failed generating 64-bit library objects";
-        }
-      }
-    }
 
-    // If we are keeping artifacts then let's also add the bitcode for easier
-    // debugging (vs just the binary object file).
+    // If we are keeping artifacts then let's also add the bitcode and
+    // assembly listing for easier debugging (vs just the binary object file).
     if (options_.keepLinkerArtifacts) {
-      auto bitcodeFile =
-          Artifact::createVariant(objectFiles.front().path, "bc");
-      auto &os = bitcodeFile.outputFile->os();
-      llvm::WriteBitcodeToFile(*llvmModule, os);
-      os.flush();
-      os.close();
-      bitcodeFile.outputFile->keep();
+      std::string asmData;
+      if (failed(runEmitObjFilePasses(targetMachine.get(), llvmModule.get(),
+                                      llvm::CGFT_AssemblyFile, &asmData))) {
+        return variantOp.emitError()
+               << "failed to compile LLVM-IR module to an assembly file";
+      }
+      {
+        auto asmFile = Artifact::createVariant(objectFiles.front().path, "s");
+        auto &os = asmFile.outputFile->os();
+        os << asmData;
+        os.flush();
+        os.close();
+        asmFile.outputFile->keep();
+      }
+      {
+        auto bitcodeFile =
+            Artifact::createVariant(objectFiles.front().path, "bc");
+        auto &os = bitcodeFile.outputFile->os();
+        llvm::WriteBitcodeToFile(*llvmModule, os);
+        os.flush();
+        os.close();
+        bitcodeFile.outputFile->keep();
+      }
     }
 
     if (!options_.staticLibraryOutput.empty()) {
@@ -570,78 +638,6 @@
         StringAttr::get(context, format), DictionaryAttr::get(context, config));
   }
 
-  static void overridePlatformGlobal(llvm::Module &module, StringRef globalName,
-                                     uint32_t newValue) {
-    // NOTE: the global will not be defined if it is not used in the module.
-    auto *globalValue = module.getNamedGlobal(globalName);
-    if (!globalValue) return;
-    globalValue->setLinkage(llvm::GlobalValue::PrivateLinkage);
-    globalValue->setDSOLocal(true);
-    globalValue->setConstant(true);
-    globalValue->setInitializer(llvm::ConstantInt::get(
-        globalValue->getValueType(), APInt(32, newValue)));
-  }
-
-  // Builds an object file for the librt embedded runtime library.
-  // This is done per link operation so that we can match the precise target
-  // configuration. Since we (mostly) link once per user-level compilation
-  // this is fine today. If in the future we invoke the compiler for thousands
-  // of modules we'd want to (carefully) cache this.
-  LogicalResult buildLibraryObjects(Location loc,
-                                    llvm::TargetMachine *targetMachine,
-                                    SmallVector<Artifact> &objectFiles,
-                                    llvm::LLVMContext &context,
-                                    const iree_file_toc_t *librtFile,
-                                    StringRef fileNamePrefix) {
-    assert(!objectFiles.empty() && "libraries must come after the base object");
-
-    // Load the generic bitcode file contents.
-    std::string bcFileName = fileNamePrefix.str() + ".bc";
-    llvm::MemoryBufferRef bitcodeBufferRef(
-        llvm::StringRef(librtFile->data, librtFile->size), bcFileName);
-    auto bitcodeModuleValue = llvm::parseBitcodeFile(bitcodeBufferRef, context);
-    if (!bitcodeModuleValue) {
-      return mlir::emitError(loc)
-             << "failed to parse librt bitcode: "
-             << llvm::toString(bitcodeModuleValue.takeError());
-    }
-    auto bitcodeModule = std::move(bitcodeModuleValue.get());
-    bitcodeModule->setDataLayout(targetMachine->createDataLayout());
-    bitcodeModule->setTargetTriple(targetMachine->getTargetTriple().str());
-
-    // Inject target-specific flags.
-    // TODO(benvanik): move this entire function to another file that can do
-    // more complex logic cleanly. This is just an example.
-    overridePlatformGlobal(*bitcodeModule, "librt_platform_example_flag", 0u);
-
-    // Run the LLVM passes to optimize it for the current target.
-    if (failed(runLLVMIRPasses(options_, targetMachine, bitcodeModule.get()))) {
-      return mlir::emitError(loc)
-             << "failed to run librt LLVM-IR opt passes targeting '"
-             << options_.targetTriple << "'";
-    }
-
-    // Emit an object file we can pass to the linker.
-    std::string objectData;
-    if (failed(runEmitObjFilePasses(targetMachine, bitcodeModule.get(),
-                                    &objectData))) {
-      return mlir::emitError(loc)
-             << "failed to compile librt LLVM-IR module to an object file";
-    }
-
-    // Write the object file to disk with a similar name to the base file.
-    std::string objectFileName = std::string(".") + fileNamePrefix.str() + ".o";
-    auto objectFile =
-        Artifact::createVariant(objectFiles.front().path, objectFileName);
-    auto &os = objectFile.outputFile->os();
-    os << objectData;
-    os.flush();
-    os.close();
-    objectFiles.push_back(std::move(objectFile));
-
-    return success();
-  }
-
   void initConfiguration() {
     auto targetMachine = createTargetMachine(options_);
 
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.cpp
index 23dd54d..8fc150f 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.cpp
@@ -115,7 +115,9 @@
 }
 
 LogicalResult runEmitObjFilePasses(llvm::TargetMachine *machine,
-                                   llvm::Module *module, std::string *objData) {
+                                   llvm::Module *module,
+                                   llvm::CodeGenFileType fileType,
+                                   std::string *objData) {
   llvm::SmallVector<char, 0> stream_buffer;
   {
     // TODO(ataei): Use non legacy pass mamanger for this.
@@ -124,8 +126,7 @@
         new llvm::TargetLibraryInfoWrapperPass(machine->getTargetTriple()));
     llvm::raw_svector_ostream ostream(stream_buffer);
     if (machine->addPassesToEmitFile(passManager, ostream,
-                                     /*DwoOut=*/nullptr,
-                                     llvm::CGFT_ObjectFile)) {
+                                     /*DwoOut=*/nullptr, fileType)) {
       return failure();
     }
     passManager.run(*module);
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.h b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.h
index e200484..f06233e 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.h
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.h
@@ -30,7 +30,9 @@
 
 // Emits compiled module obj for the target machine.
 LogicalResult runEmitObjFilePasses(llvm::TargetMachine *machine,
-                                   llvm::Module *module, std::string *objData);
+                                   llvm::Module *module,
+                                   llvm::CodeGenFileType fileType,
+                                   std::string *objData);
 
 }  // namespace HAL
 }  // namespace IREE
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.cpp
index 768fb80..ef97c24 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMTargetOptions.cpp
@@ -51,6 +51,11 @@
     // TODO(benvanik): add an option for this.
     targetOptions.optLevel = llvm::OptimizationLevel::O3;
     targetOptions.options.FloatABIType = llvm::FloatABI::Hard;
+
+    // Force `-ffunction-sections` so we can strip unused code.
+    targetOptions.options.FunctionSections = true;
+    targetOptions.options.DataSections = true;
+    targetOptions.options.UniqueSectionNames = true;
   });
   return targetOptions;
 }
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.cpp
index 8bcd8a4..19b1565 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.cpp
@@ -368,14 +368,13 @@
         llvm::ConstantArray::get(exportAttrsType, exportAttrValues),
         /*Name=*/libraryName + "_attrs");
     // TODO(benvanik): force alignment (16? natural pointer width?)
-
     exportAttrs = llvm::ConstantExpr::getInBoundsGetElementPtr(
         exportAttrsType, global, ArrayRef<llvm::Constant *>{zero, zero});
   }
 
   // iree_hal_executable_export_table_v0_t::names
   llvm::Constant *exportNames =
-      llvm::Constant::getNullValue(i8Type->getPointerTo());
+      llvm::Constant::getNullValue(i8Type->getPointerTo()->getPointerTo());
   if (mode == Mode::INCLUDE_REFLECTION_ATTRS) {
     SmallVector<llvm::Constant *, 4> exportNameValues;
     for (auto dispatch : exports) {
@@ -389,14 +388,13 @@
         llvm::ConstantArray::get(exportNamesType, exportNameValues),
         /*Name=*/libraryName + "_names");
     // TODO(benvanik): force alignment (16? natural pointer width *2?)
-
     exportNames = llvm::ConstantExpr::getInBoundsGetElementPtr(
         exportNamesType, global, ArrayRef<llvm::Constant *>{zero, zero});
   }
 
   // iree_hal_executable_export_table_v0_t::tags
   llvm::Constant *exportTags =
-      llvm::Constant::getNullValue(i8Type->getPointerTo());
+      llvm::Constant::getNullValue(i8Type->getPointerTo()->getPointerTo());
   if (mode == Mode::INCLUDE_REFLECTION_ATTRS) {
     SmallVector<llvm::Constant *, 4> exportTagValues;
     for (auto dispatch : exports) {
@@ -410,7 +408,6 @@
         llvm::ConstantArray::get(exportTagsType, exportTagValues),
         /*Name=*/libraryName + "_tags");
     // TODO(benvanik): force alignment (16? natural pointer width *2?)
-
     exportTags = llvm::ConstantExpr::getInBoundsGetElementPtr(
         exportTagsType, global, ArrayRef<llvm::Constant *>{zero, zero});
   }
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.h b/iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.h
index aa2cb9c..c49da0e 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.h
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.h
@@ -35,6 +35,7 @@
  public:
   // Builder mode setting.
   enum class Mode : uint32_t {
+    NONE = 0u,
     // Include entry point names and tags.
     // If not specified then the reflection strings will be excluded to reduce
     // binary size.
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LinkerTool.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/LinkerTool.cpp
index b304cc6..202a682 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LinkerTool.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LinkerTool.cpp
@@ -131,7 +131,7 @@
   return "\"" + commandLine + "\"";
 #else
   return commandLine;
-#endif
+#endif  // _MSC_VER
 }
 
 static std::string normalizeToolNameForPlatform(const std::string &toolName) {
@@ -139,7 +139,7 @@
   return toolName + ".exe";
 #else
   return toolName;
-#endif
+#endif  // _MSC_VER
 }
 
 static std::string findToolAtPath(
@@ -159,14 +159,22 @@
   return "";
 }
 
-LogicalResult LinkerTool::runLinkCommand(const std::string &commandLine) {
-  LLVM_DEBUG(llvm::dbgs() << "Running linker command:\n" << commandLine);
-  auto escapedCommandLine = escapeCommandLineComponent(commandLine);
-  int exitCode = system(escapedCommandLine.c_str());
+LogicalResult LinkerTool::runLinkCommand(std::string commandLine,
+                                         StringRef env) {
+  LLVM_DEBUG(llvm::dbgs() << "Running linker command:\n"
+                          << env << " " << commandLine);
+  if (!env.empty()) {
+#if defined(_MSC_VER)
+    commandLine = ("set " + env + " && " + commandLine).str();
+#else
+    commandLine = (env + " " + commandLine).str();
+#endif  // _MSC_VER
+  }
+  int exitCode = system(commandLine.c_str());
   if (exitCode == 0) return success();
   llvm::errs() << "Linking failed; escaped command line returned exit code "
                << exitCode << ":\n\n"
-               << escapedCommandLine << "\n\n";
+               << commandLine << "\n\n";
   return failure();
 }
 
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LinkerTool.h b/iree/compiler/Dialect/HAL/Target/LLVM/LinkerTool.h
index 3c96baa..ce62441 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LinkerTool.h
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LinkerTool.h
@@ -103,7 +103,7 @@
 
  protected:
   // Runs the given command line on the shell, logging failures.
-  LogicalResult runLinkCommand(const std::string& commandLine);
+  LogicalResult runLinkCommand(std::string commandLine, StringRef env = "");
 
   // Returns the path to the first tool in |toolNames| found in the environment,
   // or empty string if no tool was found.
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/internal/EmbeddedLinkerTool.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/internal/EmbeddedLinkerTool.cpp
index e3e793e..1293b77 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/internal/EmbeddedLinkerTool.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/internal/EmbeddedLinkerTool.cpp
@@ -108,6 +108,9 @@
         "-o " + artifacts.libraryFile.path,
     };
 
+    // Hide build info that makes files unreproducable.
+    flags.push_back("--build-id=none");
+
     // Avoids including any libc/startup files that initialize the CRT as
     // we don't use any of that. Our shared libraries must be freestanding.
     flags.push_back("-nostdlib");  // -nodefaultlibs + -nostartfiles
@@ -116,8 +119,10 @@
     // We cannot have any imports in the module we produce.
     flags.push_back("-static");
 
-    // Creating a shared library.
+    // Creating a hermetic shared library.
     flags.push_back("-shared");
+    flags.push_back("--no-undefined");
+    flags.push_back("--no-allow-shlib-undefined");
 
     // Drop unused sections.
     flags.push_back("--gc-sections");
@@ -133,6 +138,14 @@
     // This shrinks the .symtab to a single entry.
     flags.push_back("--discard-all");
 
+    // Identical code folding.
+    flags.push_back("--icf=all");
+
+    // To aid ICF we allow functions and data to be aliased - we never expose
+    // pointers to our internal functions and don't care if they alias.
+    flags.push_back("--ignore-data-address-equality");
+    flags.push_back("--ignore-function-address-equality");
+
     // Use sysv .hash lookup table only; we have literally a single symbol and
     // the .gnu.hash overhead is not worth it (either in the ELF or in the
     // runtime loader).
@@ -149,8 +162,9 @@
       flags.push_back(objectFile.path);
     }
 
-    auto commandLine = llvm::join(flags, " ");
-    if (failed(runLinkCommand(commandLine))) {
+    // LLD inserts its own identifier unless the LLD_VERSION env var is set:
+    // third_party/llvm-project/lld/ELF/SyntheticSections.cpp
+    if (failed(runLinkCommand(llvm::join(flags, " "), "LLD_VERSION=IREE"))) {
       // Ensure we save inputs if we fail so that the user can replicate the
       // command themselves.
       if (targetOptions.keepLinkerArtifacts) {
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/librt/BUILD b/iree/compiler/Dialect/HAL/Target/LLVM/librt/BUILD
deleted file mode 100644
index e6202ca..0000000
--- a/iree/compiler/Dialect/HAL/Target/LLVM/librt/BUILD
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright 2021 The IREE Authors
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-load("//build_tools/embed_data:build_defs.bzl", "c_embed_data")
-load("//iree:build_defs.oss.bzl", "iree_cmake_extra_content")
-
-package(
-    default_visibility = ["//visibility:public"],
-    features = ["layering_check"],
-    licenses = ["notice"],  # Apache 2.0
-)
-
-iree_cmake_extra_content(
-    content = """
-if(NOT "${IREE_TARGET_BACKEND_DYLIB-LLVM-AOT}" AND NOT "${IREE_TARGET_BACKEND_WASM-LLVM-AOT}")
-  return()
-endif()
-""",
-)
-
-c_embed_data(
-    name = "librt",
-    srcs = ["bin/librt.bc"],
-    c_file_output = "librt.c",
-    flatten = True,
-    h_file_output = "librt.h",
-    identifier = "iree_compiler_librt",
-)
-
-c_embed_data(
-    name = "librt64",
-    srcs = ["bin/librt64.bc"],
-    c_file_output = "librt64.c",
-    flatten = True,
-    h_file_output = "librt64.h",
-    identifier = "iree_compiler_librt64",
-)
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/librt/Makefile_musl.iree b/iree/compiler/Dialect/HAL/Target/LLVM/librt/Makefile_musl.iree
deleted file mode 100644
index 4a77b40..0000000
--- a/iree/compiler/Dialect/HAL/Target/LLVM/librt/Makefile_musl.iree
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright 2021 The IREE Authors
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-musldir=${MUSL_DIR}
-include $(musldir)/Makefile
-
-IREE_BASE_SRCS = $(addprefix $(srcdir)/, \
-	src/math/ceilf.c \
-	src/math/floorf.c \
-	src/math/fmodf.c)
-IREE_BASE_LLS = $(patsubst $(srcdir)/%,%.ll,$(basename $(IREE_BASE_SRCS)))
-IREE_BASE_BCS = $(patsubst $(srcdir)/%,%.bc,$(basename $(IREE_BASE_SRCS)))
-IREE_LL_FILES = $(addprefix obj/, $(IREE_BASE_LLS))
-IREE_CFLAGS=-Xclang -disable-llvm-passes -target wasm32
-LL_CMD = $(CC) $(CFLAGS_ALL) $(IREE_CFLAGS) -S -emit-llvm -o $@ -c $<
-
-obj/%.ll: $(musldir)/%.c obj/include/bits/alltypes.h
-	$(LL_CMD)
-
-iree: $(IREE_LL_FILES)
-	$(info $$IREE_BASE_SRCS is [${IREE_BASE_SRCS}])
-	$(info $$IREE_LL_FILES is [${IREE_LL_FILES}])
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/librt/bin/librt.bc b/iree/compiler/Dialect/HAL/Target/LLVM/librt/bin/librt.bc
deleted file mode 100644
index 2f8462d..0000000
--- a/iree/compiler/Dialect/HAL/Target/LLVM/librt/bin/librt.bc
+++ /dev/null
Binary files differ
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/librt/bin/librt64.bc b/iree/compiler/Dialect/HAL/Target/LLVM/librt/bin/librt64.bc
deleted file mode 100644
index 0eb3abe..0000000
--- a/iree/compiler/Dialect/HAL/Target/LLVM/librt/bin/librt64.bc
+++ /dev/null
Binary files differ
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/librt/build.sh b/iree/compiler/Dialect/HAL/Target/LLVM/librt/build.sh
deleted file mode 100755
index ee967a9..0000000
--- a/iree/compiler/Dialect/HAL/Target/LLVM/librt/build.sh
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright 2021 The IREE Authors
-#
-# Licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-set -x
-set -e
-
-SCRIPT_DIR="$(realpath `dirname $0`)"
-OUT="${SCRIPT_DIR?}/bin"
-SRC="${SCRIPT_DIR?}/src"
-LL_FILE="${OUT}/librt.ll"
-BC_FILE="${OUT}/librt.bc"
-BC64_FILE="${OUT}/librt64.bc"
-IREE_SRC_DIR="$(git rev-parse --show-toplevel)"
-IREE_BUILD_DIR="${IREE_BUILD_DIR:-${IREE_SRC_DIR?}/../build}"
-CLANG="${CLANG:-$(which clang)}"
-CLANGXX="${CLANGXX:-$(which clang++)}"
-LLVM_AS="${LLVM_AS:-${IREE_BUILD_DIR}/third_party/llvm-project/llvm/bin/llvm-as}"
-LLVM_LINK="${LLVM_DIS:-${IREE_BUILD_DIR}/third_party/llvm-project/llvm/bin/llvm-link}"
-OPT="${OPT:-${IREE_BUILD_DIR}/third_party/llvm-project/llvm/bin/opt}"
-
-function generate_librt_from_musl ()
-{
-  ## Generate the LLVM IR assembly for the required math files from muls
-  MUSL_DIR=${IREE_SRC_DIR?}/third_party/musl
-
-  ## Generate IR with 32-bit target. This is linked by default.
-  cd ${MUSL_DIR}
-  rm -rf obj/
-  CC=${CLANG?} CXX=${CLANGXX?} ./configure
-  MUSL_DIR=${MUSL_DIR} make -f $1 iree
-  MUSL_LL_FILES=`find obj/ -name *.ll`
-  cp ${MUSL_LL_FILES?} ${OUT}
-  rm ${MUSL_LL_FILES?}
-  cd ${SCRIPT_DIR?}
-
-  ALL_LL_FILES=`find ${OUT} -name *.ll`
-
-  cd ${OUT}
-  git restore $2
-  for file in ${ALL_LL_FILES}
-  do
-    ${OPT?} ${file} -O3 -S -o ${file}.opt.ll
-    # Clang adds a bunch of bad attributes and host-specific information that we
-    # don't want (so we get at least somewhat deterministic builds).
-    sed -i 's/^;.*$//' ${file}.opt.ll
-    sed -i 's/^source_filename.*$//' ${file}.opt.ll
-    sed -i 's/^target datalayout.*$//' ${file}.opt.ll
-    sed -i 's/^target triple.*$//' ${file}.opt.ll
-    sed -i 's/^\(attributes #[0-9]* = {\).*$/\1 inlinehint }/' ${file}.opt.ll
-
-    # Generate a binary bitcode file embedded into the compiler binary.
-    # NOTE: we do this from stdin so that the filename on the user's system is not
-    # embedded in the bitcode file (making it non-deterministic).
-    cat ${file}.opt.ll | ${LLVM_AS?} -o=${file}.opt.ll.bc
-    rm ${file}.opt.ll
-  done
-  rm ${ALL_LL_FILES}
-
-  ALL_BC_FILES=`ls *.ll.bc`
-  ${LLVM_LINK?} ${ALL_BC_FILES} -o $2
-  rm ${ALL_BC_FILES}
-}
-
-# Generate an LLVM IR assembly listing so we can easily read the file.
-# This is not checked in or used by the compiler.
-${CLANG?} \
-    -target wasm32 \
-    -std=c17 \
-    -O2 \
-    -Xclang -disable-llvm-passes \
-    -fno-ident \
-    -fvisibility=hidden \
-    -nostdinc \
-    -g0 \
-    -S \
-    -emit-llvm \
-    -fno-verbose-asm \
-    -fdiscard-value-names \
-    -o "${LL_FILE}" \
-    -c \
-    "${SRC}/libm.c"
-
-### Generate the LLVM IR assembly for the required math files from musl
-
-## Generate the librt functions with `wasm32` target to be used on all
-## backends
-generate_librt_from_musl ${SCRIPT_DIR?}/Makefile_musl.iree ${BC_FILE}
-
-## Generate the librty functions with `wasm64` target to be used on 64-bit
-## backends
-generate_librt_from_musl ${SCRIPT_DIR?}/Makefile_musl_64.iree ${BC64_FILE}
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/librt/src/libm.h b/iree/compiler/Dialect/HAL/Target/LLVM/librt/src/libm.h
deleted file mode 100644
index 10dfe76..0000000
--- a/iree/compiler/Dialect/HAL/Target/LLVM/librt/src/libm.h
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2021 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#ifndef IREE_COMPILER_DIALECT_HAL_TARGET_LLVM_LIBRT_SRC_LIBM_H_
-#define IREE_COMPILER_DIALECT_HAL_TARGET_LLVM_LIBRT_SRC_LIBM_H_
-
-#include "librt.h"
-
-// https://en.cppreference.com/w/c/numeric/math/fma
-LIBRT_EXPORT float fmaf(float x, float y, float z);
-
-#endif  // IREE_COMPILER_DIALECT_HAL_TARGET_LLVM_LIBRT_SRC_LIBM_H_
diff --git a/iree/testing/benchmark.h b/iree/testing/benchmark.h
index 91fad25..cc258d5 100644
--- a/iree/testing/benchmark.h
+++ b/iree/testing/benchmark.h
@@ -118,6 +118,9 @@
   // returns false.
   iree_status_t (*run)(const iree_benchmark_def_t* benchmark_def,
                        iree_benchmark_state_t* benchmark_state);
+
+  // User-defined data accessible in the run function.
+  const void* user_data;
 };
 
 // Registers a benchmark with the given definition.