Build tracy by default in the Python runtime packages. (#14639)

This is accomplished by:

* Re-organizing the native dependencies under `iree._runtime_libs` and
`iree._runtime_libs_tracy`, which can co-exist in the same wheel.
* Have `runtime/setup.py` do an additional CMake build for
`_runtime_libs_tracy` if the env var `IREE_RUNTIME_BUILD_TRACY=ON`.
* Re-mash `setup.py` to make it happy with the new layout and make
everything optional.
* Split the package prefixes for the source and built directories.
* Move generated version.py to the built _runtime_libs package and
trampoline from `iree.runtime.version` to preserve the API.
* Modify the `_binding.py` loader (loaded at `iree.runtime` init time)
to dynamically switch its backing runtime based on the value of the
environment variable `IREE_PY_RUNTIME`, which can be `default` or
`tracy`.
* Rework all of the `__main__.py` wrappers (for `iree-run-module` and
such) to use the runtime library path found in the previous step.

This can be improved in the future to eliminate more binary size
duplication by adopting a busybox-like strategy as used on the compiler
(would also be a good time to enable LTO, clang, and debug symbols).
As-is, this ~doubles the size to about 7MiB, which is acceptable given
the usability benefit. I expect with such re-organizations, we can get
it to roughly half of its size prior to this change (1/4 after).
diff --git a/.github/workflows/build_package.yml b/.github/workflows/build_package.yml
index 97ac2a4..bfd1c16 100644
--- a/.github/workflows/build_package.yml
+++ b/.github/workflows/build_package.yml
@@ -3,7 +3,7 @@
 #   * py-compiler-pkg (`iree-compiler` Python package)
 #     * Linux, macOS, Windows
 #     * All supported Python versions (e.g. 3.8, 3.9, 3.10, 3.11)
-#   * py-runtime-pkg (`iree-runtime[-instrumented]` Python package)
+#   * py-runtime-pkg (`iree-runtime` Python package)
 #     * Linux, macOS, Windows
 #     * All supported Python versions (e.g. 3.8, 3.9, 3.10, 3.11)
 #   * py-tf-compiler-tools-pkg (`iree-tools-[tf, tflite]`, pure Python packages)
@@ -144,7 +144,7 @@
 
       ##########################################################################
       # py-runtime-pkg
-      # Builds the iree-runtime and iree-runtime-instrumented wheels.
+      # Builds the iree-runtime wheels.
       # One step per OS.
       ##########################################################################
 
@@ -153,8 +153,9 @@
         shell: bash
         env:
           package_suffix: ${{ github.event.inputs.package_suffix }}
-          packages: "iree-runtime iree-runtime-instrumented"
+          packages: "iree-runtime"
           output_dir: "${{ github.workspace }}/bindist"
+          IREE_RUNTIME_BUILD_TRACY: ON
         run: |
           ./c/build_tools/python_deploy/build_linux_packages.sh
 
@@ -163,9 +164,10 @@
         shell: bash
         env:
           package_suffix: ${{ github.event.inputs.package_suffix }}
-          packages: "iree-runtime iree-runtime-instrumented"
+          packages: "iree-runtime"
           output_dir: "${{ github.workspace }}/bindist"
           override_python_versions: "3.11"
+          IREE_RUNTIME_BUILD_TRACY: ON
         run: |
           ./c/build_tools/python_deploy/build_macos_packages.sh
 
@@ -174,9 +176,10 @@
         shell: bash
         env:
           package_suffix: ${{ github.event.inputs.package_suffix }}
-          packages: "iree-runtime iree-runtime-instrumented"
+          packages: "iree-runtime"
           output_dir: "${{ github.workspace }}/bindist"
           override_python_versions: "3.11"
+          IREE_RUNTIME_BUILD_TRACY: ON
         run: |
           ./c/build_tools/python_deploy/build_windows_packages.sh
 
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c870123..c196361 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -513,11 +513,12 @@
       - name: Build runtime wheels (Linux)
         shell: bash
         env:
-          packages: "iree-runtime iree-runtime-instrumented"
+          packages: "iree-runtime"
           output_dir: "${{ github.workspace }}/bindist"
           # Note when upgrading: Build just one Python version synced to our
           # minimum.
           override_python_versions: cp38-cp38
+          IREE_RUNTIME_BUILD_TRACY: ON
         run: |
           ./build_tools/python_deploy/build_linux_packages.sh
       # Note that it is just a trade-off decision to have this serialized
diff --git a/.gitignore b/.gitignore
index 569d140..42821b3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -52,6 +52,7 @@
 
 # Python deployment artifacts
 *.whl
+*.egg-info
 
 # Android Studio / Gradle files
 .cxx
diff --git a/build_tools/python_deploy/build_linux_packages.sh b/build_tools/python_deploy/build_linux_packages.sh
index f7ad525..a4197bc 100755
--- a/build_tools/python_deploy/build_linux_packages.sh
+++ b/build_tools/python_deploy/build_linux_packages.sh
@@ -17,7 +17,7 @@
 #
 # Build specific Python versions and packages to custom directory:
 #   override_python_versions="cp38-cp38 cp39-cp39" \
-#   packages="iree-runtime iree-runtime-instrumented" \
+#   packages="iree-runtime" \
 #   output_dir="/tmp/wheelhouse" \
 #   ./build_tools/python_deploy/build_linux_packages.sh
 #
@@ -27,7 +27,6 @@
 #
 # Valid packages:
 #   iree-runtime
-#   iree-runtime-instrumented
 #   iree-compiler
 #
 # Note that this script is meant to be run on CI and it will pollute both the
@@ -68,7 +67,7 @@
 manylinux_docker_image="${manylinux_docker_image:-$(uname -m | awk '{print ($1 == "aarch64") ? "quay.io/pypa/manylinux_2_28_aarch64" : "gcr.io/iree-oss/manylinux2014_x86_64-release@sha256:e83893d35be4ce3558c989e9d5ccc4ff88d058bc3e74a83181059cc76e2cf1f8" }')}"
 python_versions="${override_python_versions:-cp38-cp38 cp39-cp39 cp310-cp310 cp311-cp311}"
 output_dir="${output_dir:-${this_dir}/wheelhouse}"
-packages="${packages:-iree-runtime iree-runtime-instrumented iree-compiler}"
+packages="${packages:-iree-runtime iree-compiler}"
 package_suffix="${package_suffix:-}"
 
 function run_on_host() {
@@ -125,12 +124,6 @@
           build_iree_runtime
           run_audit_wheel "iree_runtime${package_suffix}" "${python_version}"
           ;;
-        iree-runtime-instrumented)
-          clean_wheels "iree_runtime_instrumented${package_suffix}" "${python_version}"
-          install_deps "iree_runtime${package_suffix}" "${python_version}"
-          build_iree_runtime_instrumented
-          run_audit_wheel "iree_runtime_instrumented${package_suffix}" "${python_version}"
-          ;;
         iree-compiler)
           clean_wheels "iree_compiler${package_suffix}" "${python_version}"
           install_deps "iree_runtime${package_suffix}" "${python_version}"
@@ -154,12 +147,6 @@
   build_wheel runtime/
 }
 
-function build_iree_runtime_instrumented() {
-  IREE_BUILD_TRACY=ON IREE_ENABLE_RUNTIME_TRACING=ON \
-  IREE_RUNTIME_CUSTOM_PACKAGE_SUFFIX="-instrumented" \
-  build_wheel runtime/
-}
-
 function build_iree_compiler() {
   build_wheel compiler/
 }
diff --git a/build_tools/python_deploy/build_macos_packages.sh b/build_tools/python_deploy/build_macos_packages.sh
index 4198ce3..7c16068 100755
--- a/build_tools/python_deploy/build_macos_packages.sh
+++ b/build_tools/python_deploy/build_macos_packages.sh
@@ -15,7 +15,6 @@
 # MacOS convention is to refer to this as major.minor (i.e. "3.9", "3.10").
 # Valid packages:
 #   iree-runtime
-#   iree-runtime-instrumented
 #   iree-compiler
 
 set -eu -o errtrace
@@ -24,7 +23,7 @@
 repo_root="$(cd $this_dir/../../ && pwd)"
 python_versions="${override_python_versions:-3.11}"
 output_dir="${output_dir:-${this_dir}/wheelhouse}"
-packages="${packages:-iree-runtime iree-runtime-instrumented iree-compiler}"
+packages="${packages:-iree-runtime iree-compiler}"
 
 # Note that this typically is selected to match the version that the official
 # Python distributed is built at.
@@ -58,10 +57,6 @@
           clean_wheels iree_runtime $python_version
           build_iree_runtime
           ;;
-        iree-runtime-instrumented)
-          clean_wheels iree_runtime_instrumented $python_version
-          build_iree_runtime_instrumented
-          ;;
         iree-compiler)
           clean_wheels iree_compiler $python_version
           build_iree_compiler
@@ -85,14 +80,6 @@
   python3 -m pip wheel -v -w $output_dir $repo_root/runtime/
 }
 
-function build_iree_runtime_instrumented() {
-  # TODO: Bundled tracy client on MacOS not yet supported.
-  # Add IREE_BUILD_TRACY=ON once it is.
-  IREE_HAL_DRIVER_VULKAN=ON IREE_ENABLE_RUNTIME_TRACING=ON \
-  IREE_RUNTIME_CUSTOM_PACKAGE_SUFFIX="-instrumented" \
-  python3 -m pip wheel -v -w $output_dir $repo_root/runtime/
-}
-
 function build_iree_compiler() {
   python3 -m pip wheel -v -w $output_dir $repo_root/compiler/
 }
diff --git a/build_tools/python_deploy/build_windows_packages.sh b/build_tools/python_deploy/build_windows_packages.sh
index 7cc1d7a..173a1e3 100644
--- a/build_tools/python_deploy/build_windows_packages.sh
+++ b/build_tools/python_deploy/build_windows_packages.sh
@@ -11,7 +11,6 @@
 #
 # Valid packages:
 #   iree-runtime
-#   iree-runtime-instrumented
 #   iree-compiler
 
 set -eu -o errtrace
@@ -20,7 +19,7 @@
 repo_root="$(cd $this_dir/../../ && pwd)"
 python_versions="${override_python_versions:-3.11}"
 output_dir="${output_dir:-${this_dir}/wheelhouse}"
-packages="${packages:-iree-runtime iree-runtime-instrumented iree-compiler}"
+packages="${packages:-iree-runtime iree-compiler}"
 
 # Canonicalize paths.
 mkdir -p "$output_dir"
@@ -46,10 +45,6 @@
           clean_wheels iree_runtime $python_version
           build_iree_runtime $python_version
           ;;
-        iree-runtime-instrumented)
-          clean_wheels iree_runtime_instrumented $python_version
-          build_iree_runtime_instrumented $python_version
-          ;;
         iree-compiler)
           clean_wheels iree_compiler $python_version
           build_iree_compiler $python_version
@@ -73,13 +68,6 @@
   py -${python_version} -m pip wheel -v -w $output_dir $repo_root/runtime/
 }
 
-function build_iree_runtime_instrumented() {
-  local python_version="$1"
-  IREE_HAL_DRIVER_VULKAN=ON IREE_ENABLE_RUNTIME_TRACING=ON \
-  IREE_RUNTIME_CUSTOM_PACKAGE_SUFFIX="-instrumented" \
-  py -${python_version} -m pip wheel -v -w $output_dir $repo_root/runtime/
-}
-
 function build_iree_compiler() {
   local python_version="$1"
   py -${python_version} -m pip wheel -v -w $output_dir $repo_root/compiler/
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
index dd42313..0c360b5 100644
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@@ -7,10 +7,5 @@
 add_subdirectory(src)
 
 if(IREE_BUILD_PYTHON_BINDINGS)
-  # Copy Python packaging files to the build dir so that we can install from
-  # there.
-  configure_file(pyproject.toml pyproject.toml COPYONLY)
-  configure_file(setup.py setup.py @ONLY)
-
   add_subdirectory(bindings/python)
 endif()
diff --git a/runtime/bindings/python/CMakeLists.txt b/runtime/bindings/python/CMakeLists.txt
index 4e447ad..9d45ca0 100644
--- a/runtime/bindings/python/CMakeLists.txt
+++ b/runtime/bindings/python/CMakeLists.txt
@@ -20,14 +20,12 @@
   find_package(nanobind CONFIG REQUIRED)
 endif()
 
-set(_PYTHON_EXTRA_SRCS)
 set(_EXTRA_INSTALL_TOOL_TARGETS)
 set(_TRACY_ENABLED OFF)
 
-if(TARGET IREETracyCaptureServer)
-  message(STATUS "Bundline Tracy CLI tools with Python API")
+if(IREE_BUILD_TRACY)
+  message(STATUS "Bundling Tracy CLI tools with Python API")
   set(_TRACY_ENABLED ON)
-  list(APPEND _PYTHON_EXTRA_SRCS "iree/runtime/scripts/iree-tracy-capture")
   list(APPEND _EXTRA_INSTALL_TOOL_TARGETS "IREETracyCaptureServer")
 endif()
 
@@ -107,7 +105,7 @@
 
 set_target_properties(
   iree_runtime_bindings_python_PyExtRt
-  PROPERTIES OUTPUT_NAME "iree/_runtime"
+  PROPERTIES OUTPUT_NAME "iree/_runtime_libs/_runtime"
 )
 
 iree_py_library(
@@ -116,6 +114,7 @@
   SRCS
     "iree/runtime/__init__.py"
     "iree/runtime/_binding.py"
+    "iree/runtime/_runtime_libs.py"
     "iree/runtime/array_interop.py"
     "iree/runtime/benchmark.py"
     "iree/runtime/flags.py"
@@ -123,12 +122,13 @@
     "iree/runtime/system_api.py"
     "iree/runtime/system_setup.py"
     "iree/runtime/tracing.py"
+    "iree/runtime/version.py"
     "iree/runtime/scripts/iree_benchmark_trace/__main__.py"
     "iree/runtime/scripts/iree_benchmark_module/__main__.py"
     "iree/runtime/scripts/iree_cpuinfo/__main__.py"
     "iree/runtime/scripts/iree_run_trace/__main__.py"
     "iree/runtime/scripts/iree_run_module/__main__.py"
-    ${_PYTHON_EXTRA_SRCS}
+    "iree/runtime/scripts/iree_tracy_capture/__main__.py"
   PYEXT_DEPS
     iree_runtime_bindings_python_PyExtRt
 )
@@ -136,44 +136,44 @@
 iree_symlink_tool(
   TARGET runtime
   FROM_TOOL_TARGET iree-benchmark-module
-  TO_EXE_NAME iree/runtime/iree-benchmark-module
+  TO_EXE_NAME iree/_runtime_libs/iree-benchmark-module
 )
 
 iree_symlink_tool(
   TARGET runtime
   FROM_TOOL_TARGET iree-benchmark-trace
-  TO_EXE_NAME iree/runtime/iree-benchmark-trace
+  TO_EXE_NAME iree/_runtime_libs/iree-benchmark-trace
 )
 
 iree_symlink_tool(
   TARGET runtime
   FROM_TOOL_TARGET iree-cpuinfo
-  TO_EXE_NAME iree/runtime/iree-cpuinfo
+  TO_EXE_NAME iree/_runtime_libs/iree-cpuinfo
 )
 
 iree_symlink_tool(
   TARGET runtime
   FROM_TOOL_TARGET iree-dump-module
-  TO_EXE_NAME iree/runtime/iree-dump-module
+  TO_EXE_NAME iree/_runtime_libs/iree-dump-module
 )
 
 iree_symlink_tool(
   TARGET runtime
   FROM_TOOL_TARGET iree-run-trace
-  TO_EXE_NAME iree/runtime/iree-run-trace
+  TO_EXE_NAME iree/_runtime_libs/iree-run-trace
 )
 
 iree_symlink_tool(
   TARGET runtime
   FROM_TOOL_TARGET iree-run-module
-  TO_EXE_NAME iree/runtime/iree-run-module
+  TO_EXE_NAME iree/_runtime_libs/iree-run-module
 )
 
 if(_TRACY_ENABLED)
   iree_symlink_tool(
     TARGET runtime
     FROM_TOOL_TARGET IREETracyCaptureServer
-    TO_EXE_NAME iree-tracy-capture
+    TO_EXE_NAME iree/_runtime_libs/iree-tracy-capture
   )
 endif()
 
@@ -248,17 +248,6 @@
     "tests/vm_types_test.py"
 )
 
-# TODO: Enable this once the CI bots are updated to install the python3-venv
-# apt package. https://github.com/openxla/iree/issues/9080
-# iree_py_test(
-#   NAME
-#     package_test
-#   SRCS
-#     "tests/package_test.py"
-#   ARGS
-#     "${IREE_BINARY_DIR}/runtime"
-# )
-
 ################################################################################
 # Install
 ################################################################################
@@ -279,7 +268,7 @@
 # _runtime.so -> python_packages/iree_runtime/iree/_runtime.so
 install(
   TARGETS iree_runtime_bindings_python_PyExtRt
-  DESTINATION "${_INSTALL_DIR}/iree"
+  DESTINATION "${_INSTALL_DIR}/iree/_runtime_libs"
   COMPONENT "${_INSTALL_COMPONENT}"
 )
 
@@ -301,7 +290,7 @@
     iree-run-module
     iree-run-trace
     ${_EXTRA_INSTALL_TOOL_TARGETS}
-  DESTINATION \"${_INSTALL_DIR}/iree/runtime\"
+  DESTINATION \"${_INSTALL_DIR}/iree/_runtime_libs\"
   COMPONENT \"${_INSTALL_COMPONENT}\"
 )
 ")
diff --git a/runtime/bindings/python/iree/runtime/_binding.py b/runtime/bindings/python/iree/runtime/_binding.py
index 4cb9d71..2237bb0 100644
--- a/runtime/bindings/python/iree/runtime/_binding.py
+++ b/runtime/bindings/python/iree/runtime/_binding.py
@@ -7,13 +7,12 @@
 
 All code in the runtime should use runtime imports via this module, which
 locates the actual _runtime module based on environment configuration.
-Currently, we only bundle a single runtime library, but in the future, this
-will let us dynamically switch between instrumented, debug, etc by changing
-the way this trampoline functions.
+
+TODO: We could rename this to _runtime since it a trampoline for
+the _runtime module we load from elsehwhere.
 """
 
 import sys
-
-from iree import _runtime
+from ._runtime_libs import _runtime
 
 sys.modules[__name__] = _runtime
diff --git a/runtime/bindings/python/iree/runtime/_runtime_libs.py b/runtime/bindings/python/iree/runtime/_runtime_libs.py
new file mode 100644
index 0000000..d27e300
--- /dev/null
+++ b/runtime/bindings/python/iree/runtime/_runtime_libs.py
@@ -0,0 +1,32 @@
+# Copyright 2023 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+"""Trampoline to the right iree._runtime_libs* module."""
+
+import os
+import sys
+import warnings
+
+variant = os.getenv("IREE_PY_RUNTIME", "default")
+if variant == "tracy":
+    try:
+        import iree._runtime_libs_tracy as _libs
+    except ModuleNotFoundError as e:
+        raise ModuleNotFoundError(
+            "IREE Tracy runtime requested via IREE_PY_RUNTIME but it is not "
+            "enabled in this build"
+        ) from e
+    print("-- Using Tracy runtime (IREE_PY_RUNTIME=tracy)", file=sys.stderr)
+else:
+    if variant != "default":
+        warnings.warn(
+            f"Unknown value for IREE_PY_RUNTIME env var ({variant}): " f"Using default"
+        )
+        variant = "default"
+    import iree._runtime_libs as _libs
+
+_libs.name = variant
+_libs.library_path = _libs.__path__[0]
+sys.modules[__name__] = _libs
diff --git a/runtime/bindings/python/iree/runtime/scripts/iree_benchmark_module/__main__.py b/runtime/bindings/python/iree/runtime/scripts/iree_benchmark_module/__main__.py
index 528bc2a..0d9eabd 100644
--- a/runtime/bindings/python/iree/runtime/scripts/iree_benchmark_module/__main__.py
+++ b/runtime/bindings/python/iree/runtime/scripts/iree_benchmark_module/__main__.py
@@ -7,12 +7,13 @@
 import os
 import subprocess
 import sys
+from ... import _runtime_libs
 
 
 def main(args=None):
     if args is None:
         args = sys.argv[1:]
-    exe = os.path.join(os.path.dirname(__file__), "..", "..", "iree-benchmark-module")
+    exe = os.path.join(_runtime_libs.library_path, "iree-benchmark-module")
     return subprocess.call(args=[exe] + args)
 
 
diff --git a/runtime/bindings/python/iree/runtime/scripts/iree_benchmark_trace/__main__.py b/runtime/bindings/python/iree/runtime/scripts/iree_benchmark_trace/__main__.py
index ba64bd6..6fbf0e2 100644
--- a/runtime/bindings/python/iree/runtime/scripts/iree_benchmark_trace/__main__.py
+++ b/runtime/bindings/python/iree/runtime/scripts/iree_benchmark_trace/__main__.py
@@ -7,12 +7,13 @@
 import os
 import subprocess
 import sys
+from ... import _runtime_libs
 
 
 def main(args=None):
     if args is None:
         args = sys.argv[1:]
-    exe = os.path.join(os.path.dirname(__file__), "..", "..", "iree-benchmark-trace")
+    exe = os.path.join(_runtime_libs.library_path, "iree-benchmark-trace")
     return subprocess.call(args=[exe] + args)
 
 
diff --git a/runtime/bindings/python/iree/runtime/scripts/iree_cpuinfo/__main__.py b/runtime/bindings/python/iree/runtime/scripts/iree_cpuinfo/__main__.py
index 948e2b5..73e634f 100644
--- a/runtime/bindings/python/iree/runtime/scripts/iree_cpuinfo/__main__.py
+++ b/runtime/bindings/python/iree/runtime/scripts/iree_cpuinfo/__main__.py
@@ -8,11 +8,14 @@
 import subprocess
 import sys
 
+# Note that cpuinfo is only in the default runtime.
+from iree import _runtime_libs
+
 
 def main(args=None):
     if args is None:
         args = sys.argv[1:]
-    exe = os.path.join(os.path.dirname(__file__), "..", "..", "iree-cpuinfo")
+    exe = os.path.join(_runtime_libs.__path__[0], "iree-cpuinfo")
     return subprocess.call(args=[exe] + args)
 
 
diff --git a/runtime/bindings/python/iree/runtime/scripts/iree_dump_module/__main__.py b/runtime/bindings/python/iree/runtime/scripts/iree_dump_module/__main__.py
index 991481d..caa84b5 100644
--- a/runtime/bindings/python/iree/runtime/scripts/iree_dump_module/__main__.py
+++ b/runtime/bindings/python/iree/runtime/scripts/iree_dump_module/__main__.py
@@ -8,11 +8,14 @@
 import subprocess
 import sys
 
+# Note that dump-module is only in the default libs.
+from iree import _runtime_libs
+
 
 def main(args=None):
     if args is None:
         args = sys.argv[1:]
-    exe = os.path.join(os.path.dirname(__file__), "..", "..", "iree-dump-module")
+    exe = os.path.join(_runtime_libs.library_path, "iree-dump-module")
     return subprocess.call(args=[exe] + args)
 
 
diff --git a/runtime/bindings/python/iree/runtime/scripts/iree_run_module/__main__.py b/runtime/bindings/python/iree/runtime/scripts/iree_run_module/__main__.py
index 7fdac38..e8c2abe 100644
--- a/runtime/bindings/python/iree/runtime/scripts/iree_run_module/__main__.py
+++ b/runtime/bindings/python/iree/runtime/scripts/iree_run_module/__main__.py
@@ -7,12 +7,13 @@
 import os
 import subprocess
 import sys
+from ... import _runtime_libs
 
 
 def main(args=None):
     if args is None:
         args = sys.argv[1:]
-    exe = os.path.join(os.path.dirname(__file__), "..", "..", "iree-run-module")
+    exe = os.path.join(_runtime_libs.library_path, "iree-run-module")
     return subprocess.call(args=[exe] + args)
 
 
diff --git a/runtime/bindings/python/iree/runtime/scripts/iree_run_trace/__main__.py b/runtime/bindings/python/iree/runtime/scripts/iree_run_trace/__main__.py
index d75a65b..4464d59 100644
--- a/runtime/bindings/python/iree/runtime/scripts/iree_run_trace/__main__.py
+++ b/runtime/bindings/python/iree/runtime/scripts/iree_run_trace/__main__.py
@@ -7,12 +7,13 @@
 import os
 import subprocess
 import sys
+from ... import _runtime_libs
 
 
 def main(args=None):
     if args is None:
         args = sys.argv[1:]
-    exe = os.path.join(os.path.dirname(__file__), "..", "..", "iree-run-trace")
+    exe = os.path.join(_runtime_libs.library_path, "iree-run-trace")
     return subprocess.call(args=[exe] + args)
 
 
diff --git a/runtime/bindings/python/iree/runtime/scripts/iree_tracy_capture/__main__.py b/runtime/bindings/python/iree/runtime/scripts/iree_tracy_capture/__main__.py
index f5e36fa..3571731 100644
--- a/runtime/bindings/python/iree/runtime/scripts/iree_tracy_capture/__main__.py
+++ b/runtime/bindings/python/iree/runtime/scripts/iree_tracy_capture/__main__.py
@@ -10,9 +10,16 @@
 
 
 def main(args=None):
+    try:
+        from iree import _runtime_libs_tracy
+    except ModuleNotFoundError as e:
+        raise ModuleNotFoundError(
+            "This command requires that a tracy runtime is available "
+            "but it was not built for this platform."
+        ) from e
     if args is None:
         args = sys.argv[1:]
-    exe = os.path.join(os.path.dirname(__file__), "..", "..", "iree-tracy-capture")
+    exe = os.path.join(_runtime_libs_tracy.__path__[0], "iree-tracy-capture")
     return subprocess.call(args=[exe] + args)
 
 
diff --git a/runtime/bindings/python/iree/runtime/version.py b/runtime/bindings/python/iree/runtime/version.py
new file mode 100644
index 0000000..8f32d1b
--- /dev/null
+++ b/runtime/bindings/python/iree/runtime/version.py
@@ -0,0 +1,12 @@
+# Copyright 2023 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+"""Trampoline to the generated version.py.
+
+The generated version.py comes from the selected _runtime_libs
+package.
+"""
+
+from ._runtime_libs.version import *
diff --git a/runtime/setup.py b/runtime/setup.py
index bfb3bb4..fe52434 100644
--- a/runtime/setup.py
+++ b/runtime/setup.py
@@ -22,8 +22,6 @@
 #
 # Select CMake options are available from environment variables:
 #   IREE_HAL_DRIVER_VULKAN
-#   IREE_ENABLE_RUNTIME_TRACING
-#   IREE_BUILD_TRACY
 #   IREE_ENABLE_CPUINFO
 
 import json
@@ -43,6 +41,28 @@
 from setuptools.command.build_py import build_py as _build_py
 
 
+def getenv_bool(key):
+    value = os.getenv(key, "OFF")
+    return value.upper() in ["ON", "1", "TRUE"]
+
+
+def combine_dicts(*ds):
+    result = {}
+    for d in ds:
+        result.update(d)
+    return result
+
+
+ENABLE_TRACY = getenv_bool("IREE_RUNTIME_BUILD_TRACY")
+if ENABLE_TRACY:
+    print("*** Enabling Tracy build (may error if missing deps)", file=sys.stderr)
+else:
+    print(
+        "*** Tracy not enabled (enable with IREE_RUNTIME_BUILD_TRACY=ON)",
+        file=sys.stderr,
+    )
+
+
 def check_pip_version():
     from packaging import version
 
@@ -65,49 +85,32 @@
 
 check_pip_version()
 
-# This file can be run directly from the source tree or it can be CMake
-# configured so it can run from the build tree with an already existing
-# build tree. We detect the difference based on whether the following
-# are expanded by CMake.
-CONFIGURED_SOURCE_DIR = "@IREE_SOURCE_DIR@"
-CONFIGURED_BINARY_DIR = "@IREE_BINARY_DIR@"
-
-IREE_SOURCE_DIR = None
-IREE_BINARY_DIR = None
-
 # We must do the intermediate installation to a fixed location that agrees
 # between what we pass to setup() and cmake. So hard-code it here.
 # Note that setup() needs a relative path (to the setup.py file).
 # We keep the path short ('i' instead of 'install') for platforms like Windows
 # that have file length limits.
 SETUPPY_DIR = os.path.realpath(os.path.dirname(__file__))
-CMAKE_INSTALL_DIR_REL = os.path.join("build", "i")
+CMAKE_INSTALL_DIR_REL = os.path.join("build", "i", "d")
 CMAKE_INSTALL_DIR_ABS = os.path.join(SETUPPY_DIR, CMAKE_INSTALL_DIR_REL)
+CMAKE_TRACY_INSTALL_DIR_REL = os.path.join("build", "i", "t")
+CMAKE_TRACY_INSTALL_DIR_ABS = os.path.join(SETUPPY_DIR, CMAKE_TRACY_INSTALL_DIR_REL)
 
-IS_CONFIGURED = CONFIGURED_SOURCE_DIR[0] != "@"
-if IS_CONFIGURED:
-    IREE_SOURCE_DIR = CONFIGURED_SOURCE_DIR
-    IREE_BINARY_DIR = CONFIGURED_BINARY_DIR
-    print(
-        f"Running setup.py from build tree: "
-        f"SOURCE_DIR = {IREE_SOURCE_DIR} "
-        f"BINARY_DIR = {IREE_BINARY_DIR}",
-        file=sys.stderr,
-    )
-else:
-    IREE_SOURCE_DIR = os.path.join(SETUPPY_DIR, "..")
-    IREE_BINARY_DIR = os.getenv("IREE_RUNTIME_API_CMAKE_BUILD_DIR")
-    if not IREE_BINARY_DIR:
-        # Note that setuptools always builds into a "build" directory that
-        # is a sibling of setup.py, so we just colonize a sub-directory of that
-        # by default.
-        IREE_BINARY_DIR = os.path.join(SETUPPY_DIR, "build", "b")
-    print(
-        f"Running setup.py from source tree: "
-        f"SOURCE_DIR = {IREE_SOURCE_DIR} "
-        f"BINARY_DIR = {IREE_BINARY_DIR}",
-        file=sys.stderr,
-    )
+IREE_SOURCE_DIR = os.path.join(SETUPPY_DIR, "..")
+# Note that setuptools always builds into a "build" directory that
+# is a sibling of setup.py, so we just colonize a sub-directory of that
+# by default.
+BASE_BINARY_DIR = os.getenv(
+    "IREE_RUNTIME_API_CMAKE_BUILD_DIR", os.path.join(SETUPPY_DIR, "build", "b")
+)
+IREE_BINARY_DIR = os.path.join(BASE_BINARY_DIR, "d")
+IREE_TRACY_BINARY_DIR = os.path.join(BASE_BINARY_DIR, "t")
+print(
+    f"Running setup.py from source tree: "
+    f"SOURCE_DIR = {IREE_SOURCE_DIR} "
+    f"BINARY_DIR = {IREE_BINARY_DIR}",
+    file=sys.stderr,
+)
 
 # Setup and get version information.
 VERSION_INFO_FILE = os.path.join(IREE_SOURCE_DIR, "version_info.json")
@@ -163,7 +166,7 @@
     PACKAGE_VERSION = f"0.dev0+{git_versions.get('IREE') or '0'}"
 
 
-def maybe_nuke_cmake_cache():
+def maybe_nuke_cmake_cache(cmake_build_dir, cmake_install_dir):
     # From run to run under pip, we can end up with different paths to ninja,
     # which isn't great and will confuse cmake. Detect if the location of
     # ninja changes and force a cache flush.
@@ -180,7 +183,7 @@
     # the CMakeCache.txt file if the path to the Python interpreter changed.
     # Ideally, CMake would let us reconfigure this dynamically... but it does
     # not (and gets very confused).
-    PYTHON_STAMP_FILE = os.path.join(IREE_BINARY_DIR, "python_stamp.txt")
+    PYTHON_STAMP_FILE = os.path.join(cmake_build_dir, "python_stamp.txt")
     if os.path.exists(PYTHON_STAMP_FILE):
         with open(PYTHON_STAMP_FILE, "rt") as f:
             actual_stamp_contents = f.read()
@@ -189,7 +192,7 @@
                 return
 
     # Mismatch or not found. Clean it.
-    cmake_cache_file = os.path.join(IREE_BINARY_DIR, "CMakeCache.txt")
+    cmake_cache_file = os.path.join(cmake_build_dir, "CMakeCache.txt")
     if os.path.exists(cmake_cache_file):
         print("Removing CMakeCache.txt because Python version changed", file=sys.stderr)
         os.remove(cmake_cache_file)
@@ -197,13 +200,13 @@
     # Also clean the install directory. This avoids version specific pileups
     # of binaries that can occur with repeated builds against different
     # Python versions.
-    if os.path.exists(CMAKE_INSTALL_DIR_ABS):
+    if os.path.exists(cmake_install_dir):
         print(
             f"Removing CMake install dir because Python version changed: "
-            f"{CMAKE_INSTALL_DIR_ABS}",
+            f"{cmake_install_dir}",
             file=sys.stderr,
         )
-        shutil.rmtree(CMAKE_INSTALL_DIR_ABS)
+        shutil.rmtree(cmake_install_dir)
 
     # And write.
     with open(PYTHON_STAMP_FILE, "wt") as f:
@@ -234,7 +237,7 @@
         args.append(f"-D{cmake_name}={svalue}")
 
 
-def prepare_installation():
+def build_configuration(cmake_build_dir, cmake_install_dir, extra_cmake_args=()):
     subprocess.check_call(["cmake", "--version"])
     version_py_content = generate_version_py()
     print(f"Generating version.py:\n{version_py_content}", file=sys.stderr)
@@ -242,98 +245,145 @@
     cfg = os.getenv("IREE_CMAKE_BUILD_TYPE", "Release")
     strip_install = cfg == "Release"
 
-    if not IS_CONFIGURED:
-        # Build from source tree.
-        os.makedirs(IREE_BINARY_DIR, exist_ok=True)
-        maybe_nuke_cmake_cache()
-        print(f"CMake build dir: {IREE_BINARY_DIR}", file=sys.stderr)
-        print(f"CMake install dir: {CMAKE_INSTALL_DIR_ABS}", file=sys.stderr)
-        cmake_args = [
-            "-GNinja",
-            "--log-level=VERBOSE",
-            "-DIREE_BUILD_PYTHON_BINDINGS=ON",
-            "-DIREE_BUILD_COMPILER=OFF",
-            "-DIREE_BUILD_SAMPLES=OFF",
-            "-DIREE_BUILD_TESTS=OFF",
-            "-DPython3_EXECUTABLE={}".format(sys.executable),
-            "-DCMAKE_BUILD_TYPE={}".format(cfg),
-            get_env_cmake_option(
-                "IREE_HAL_DRIVER_VULKAN",
-                "OFF" if platform.system() == "Darwin" else "ON",
-            ),
-            get_env_cmake_list("IREE_EXTERNAL_HAL_DRIVERS", ""),
-            get_env_cmake_option("IREE_ENABLE_RUNTIME_TRACING"),
-            get_env_cmake_option("IREE_BUILD_TRACY"),
-            get_env_cmake_option("IREE_ENABLE_CPUINFO", "ON"),
-        ]
-        add_env_cmake_setting(cmake_args, "IREE_TRACING_PROVIDER")
-        add_env_cmake_setting(cmake_args, "IREE_TRACING_PROVIDER_H")
+    # Build from source tree.
+    os.makedirs(cmake_build_dir, exist_ok=True)
+    maybe_nuke_cmake_cache(cmake_build_dir, cmake_install_dir)
+    print(f"CMake build dir: {cmake_build_dir}", file=sys.stderr)
+    print(f"CMake install dir: {cmake_install_dir}", file=sys.stderr)
+    cmake_args = [
+        "-GNinja",
+        "--log-level=VERBOSE",
+        "-DIREE_BUILD_PYTHON_BINDINGS=ON",
+        "-DIREE_BUILD_COMPILER=OFF",
+        "-DIREE_BUILD_SAMPLES=OFF",
+        "-DIREE_BUILD_TESTS=OFF",
+        "-DPython3_EXECUTABLE={}".format(sys.executable),
+        "-DCMAKE_BUILD_TYPE={}".format(cfg),
+        get_env_cmake_option(
+            "IREE_HAL_DRIVER_VULKAN",
+            "OFF" if platform.system() == "Darwin" else "ON",
+        ),
+        get_env_cmake_list("IREE_EXTERNAL_HAL_DRIVERS", ""),
+        get_env_cmake_option("IREE_ENABLE_CPUINFO", "ON"),
+    ] + list(extra_cmake_args)
+    add_env_cmake_setting(cmake_args, "IREE_TRACING_PROVIDER")
+    add_env_cmake_setting(cmake_args, "IREE_TRACING_PROVIDER_H")
 
-        # These usually flow through the environment, but we add them explicitly
-        # so that they show clearly in logs (getting them wrong can have bad
-        # outcomes).
-        add_env_cmake_setting(cmake_args, "CMAKE_OSX_ARCHITECTURES")
-        add_env_cmake_setting(
-            cmake_args, "MACOSX_DEPLOYMENT_TARGET", "CMAKE_OSX_DEPLOYMENT_TARGET"
+    # These usually flow through the environment, but we add them explicitly
+    # so that they show clearly in logs (getting them wrong can have bad
+    # outcomes).
+    add_env_cmake_setting(cmake_args, "CMAKE_OSX_ARCHITECTURES")
+    add_env_cmake_setting(
+        cmake_args, "MACOSX_DEPLOYMENT_TARGET", "CMAKE_OSX_DEPLOYMENT_TARGET"
+    )
+
+    # Only do a from-scratch configure if not already configured.
+    cmake_cache_file = os.path.join(cmake_build_dir, "CMakeCache.txt")
+    if not os.path.exists(cmake_cache_file):
+        print(f"Configuring with: {cmake_args}", file=sys.stderr)
+        subprocess.check_call(
+            ["cmake", IREE_SOURCE_DIR] + cmake_args, cwd=cmake_build_dir
         )
+    else:
+        print(f"Not re-configuring (already configured)", file=sys.stderr)
 
-        # Only do a from-scratch configure if not already configured.
-        cmake_cache_file = os.path.join(IREE_BINARY_DIR, "CMakeCache.txt")
-        if not os.path.exists(cmake_cache_file):
-            print(f"Configuring with: {cmake_args}", file=sys.stderr)
-            subprocess.check_call(
-                ["cmake", IREE_SOURCE_DIR] + cmake_args, cwd=IREE_BINARY_DIR
-            )
-        else:
-            print(f"Not re-configuring (already configured)", file=sys.stderr)
-
-        # Build. Since we have restricted to just the runtime, build everything
-        # so as to avoid fragility with more targeted selection criteria.
-        subprocess.check_call(["cmake", "--build", "."], cwd=IREE_BINARY_DIR)
-        print("Build complete.", file=sys.stderr)
+    # Build. Since we have restricted to just the runtime, build everything
+    # so as to avoid fragility with more targeted selection criteria.
+    subprocess.check_call(["cmake", "--build", "."], cwd=cmake_build_dir)
+    print("Build complete.", file=sys.stderr)
 
     # Install the component we care about.
     install_args = [
-        f"-DCMAKE_INSTALL_PREFIX={CMAKE_INSTALL_DIR_ABS}/",
+        f"-DCMAKE_INSTALL_PREFIX={cmake_install_dir}/",
         f"-DCMAKE_INSTALL_COMPONENT=IreePythonPackage-runtime",
         "-P",
-        os.path.join(IREE_BINARY_DIR, "cmake_install.cmake"),
+        os.path.join(cmake_build_dir, "cmake_install.cmake"),
     ]
     if strip_install:
         install_args.append("-DCMAKE_INSTALL_DO_STRIP=ON")
     print(f"Installing with: {install_args}", file=sys.stderr)
-    subprocess.check_call(["cmake"] + install_args, cwd=IREE_BINARY_DIR)
+    subprocess.check_call(["cmake"] + install_args, cwd=cmake_build_dir)
 
     # Write version.py directly into install dir.
     version_py_file = os.path.join(
-        CMAKE_INSTALL_DIR_ABS,
+        cmake_install_dir,
         "python_packages",
         "iree_runtime",
         "iree",
-        "runtime",
+        "_runtime_libs",
         "version.py",
     )
     os.makedirs(os.path.dirname(version_py_file), exist_ok=True)
     with open(version_py_file, "wt") as f:
         f.write(version_py_content)
 
-    print(f"Installation prepared: {CMAKE_INSTALL_DIR_ABS}", file=sys.stderr)
+    print(f"Installation prepared: {cmake_install_dir}", file=sys.stderr)
 
 
 class CMakeBuildPy(_build_py):
     def run(self):
-        # It is critical that the target directory contain all built extensions,
-        # or else setuptools will helpfully compile an empty binary for us
-        # (this is the **worst** possible thing it could do). We just copy
-        # everything. What's another hundred megs between friends?
-        target_dir = os.path.abspath(self.build_lib)
+        # The super-class handles the pure python build.
+        super().run()
+        self.build_default_configuration()
+        if ENABLE_TRACY:
+            self.build_tracy_configuration()
+
+    def build_default_configuration(self):
+        print("*****************************", file=sys.stderr)
+        print("* Building base runtime     *", file=sys.stderr)
+        print("*****************************", file=sys.stderr)
+        build_configuration(IREE_BINARY_DIR, CMAKE_INSTALL_DIR_ABS, extra_cmake_args=())
+        # We only take the iree._runtime_libs from the default build.
+        target_dir = os.path.join(
+            os.path.abspath(self.build_lib), "iree", "_runtime_libs"
+        )
         print(f"Building in target dir: {target_dir}", file=sys.stderr)
         os.makedirs(target_dir, exist_ok=True)
         print("Copying install to target.", file=sys.stderr)
         if os.path.exists(target_dir):
             shutil.rmtree(target_dir)
         shutil.copytree(
-            os.path.join(CMAKE_INSTALL_DIR_ABS, "python_packages", "iree_runtime"),
+            os.path.join(
+                CMAKE_INSTALL_DIR_ABS,
+                "python_packages",
+                "iree_runtime",
+                "iree",
+                "_runtime_libs",
+            ),
+            target_dir,
+            symlinks=False,
+        )
+        print("Target populated.", file=sys.stderr)
+
+    def build_tracy_configuration(self):
+        print("*****************************", file=sys.stderr)
+        print("* Building tracy runtime    *", file=sys.stderr)
+        print("*****************************", file=sys.stderr)
+        build_configuration(
+            IREE_TRACY_BINARY_DIR,
+            CMAKE_TRACY_INSTALL_DIR_ABS,
+            extra_cmake_args=(
+                "-DIREE_ENABLE_RUNTIME_TRACING=ON",
+                "-DIREE_BUILD_TRACY=ON",
+            ),
+        )
+        # We only take the iree._runtime_libs from the default build.
+        target_dir = os.path.join(
+            os.path.abspath(self.build_lib), "iree", "_runtime_libs_tracy"
+        )
+        print(f"Building in target dir: {target_dir}", file=sys.stderr)
+        os.makedirs(target_dir, exist_ok=True)
+        print("Copying install to target.", file=sys.stderr)
+        if os.path.exists(target_dir):
+            shutil.rmtree(target_dir)
+        shutil.copytree(
+            os.path.join(
+                CMAKE_TRACY_INSTALL_DIR_ABS,
+                "python_packages",
+                "iree_runtime",
+                "iree",
+                "_runtime_libs",
+            ),
             target_dir,
             symlinks=False,
         )
@@ -369,15 +419,19 @@
 """
 
 
-prepare_installation()
-
-packages = find_namespace_packages(
-    where=os.path.join(CMAKE_INSTALL_DIR_ABS, "python_packages", "iree_runtime"),
-    include=[
-        "iree._runtime",
-        "iree.runtime",
-        "iree.runtime.*",
-    ],
+packages = (
+    find_namespace_packages(
+        where=os.path.join(IREE_SOURCE_DIR, "runtime", "bindings", "python"),
+        include=[
+            "iree.runtime",
+            "iree.runtime.*",
+        ],
+    )
+    + [
+        # Default libraries.
+        "iree._runtime_libs",
+    ]
+    + (["iree._runtime_libs_tracy"] if ENABLE_TRACY else [])
 )
 print(f"Found runtime packages: {packages}")
 
@@ -392,6 +446,30 @@
 custom_package_suffix = os.getenv("IREE_RUNTIME_CUSTOM_PACKAGE_SUFFIX", "")
 custom_package_prefix = os.getenv("IREE_RUNTIME_CUSTOM_PACKAGE_PREFIX", "")
 
+# We need some directories to exist before setup.
+os.makedirs(
+    os.path.join(
+        CMAKE_INSTALL_DIR_ABS,
+        "python_packages",
+        "iree_runtime",
+        "iree",
+        "_runtime_libs",
+    ),
+    exist_ok=True,
+)
+if ENABLE_TRACY:
+    os.makedirs(
+        os.path.join(
+            CMAKE_TRACY_INSTALL_DIR_ABS,
+            "python_packages",
+            "iree_runtime",
+            "iree",
+            "_runtime_libs",
+        ),
+        exist_ok=True,
+    )
+
+
 setup(
     name=f"{custom_package_prefix}iree-runtime{custom_package_suffix}{PACKAGE_SUFFIX}",
     version=f"{PACKAGE_VERSION}",
@@ -412,35 +490,67 @@
     ],
     url="https://github.com/openxla/iree",
     python_requires=">=3.8",
-    ext_modules=[
-        CMakeExtension("iree._runtime"),
-    ],
+    ext_modules=(
+        [
+            CMakeExtension("iree._runtime_libs._runtime"),
+        ]
+        + (
+            [CMakeExtension("iree._runtime_libs_tracy._runtime")]
+            if ENABLE_TRACY
+            else []
+        )
+    ),
     cmdclass={
         "build": CustomBuild,
         "built_ext": NoopBuildExtension,
         "build_py": CMakeBuildPy,
     },
     zip_safe=False,
-    package_dir={
-        # Note: Must be relative path, so we line this up with the absolute
-        # path built above. Note that this must exist prior to the call.
-        "": f"{CMAKE_INSTALL_DIR_REL}/python_packages/iree_runtime",
-    },
+    package_dir=combine_dicts(
+        {
+            # Note: Must be relative path, so we line this up with the absolute
+            # path built above. Note that this must exist prior to the call.
+            "iree.runtime": "bindings/python/iree/runtime",
+            "iree._runtime_libs": f"{CMAKE_INSTALL_DIR_REL}/python_packages/iree_runtime/iree/_runtime_libs",
+        },
+        {
+            # Note that we do a switcheroo here by populating the
+            # _runtime_libs_tracy package from the tracy-enabled build of
+            # iree._runtime_libs. It is relocatable, and the Python side looks
+            # for this stuff.
+            "iree._runtime_libs_tracy": f"{CMAKE_TRACY_INSTALL_DIR_REL}/python_packages/iree_runtime/iree/_runtime_libs",
+        }
+        if ENABLE_TRACY
+        else {},
+    ),
     packages=packages,
     # Matching the native extension as a data file keeps setuptools from
     # "building" it (i.e. turning it into a static binary).
-    package_data={
-        "": [
-            f"*{sysconfig.get_config_var('EXT_SUFFIX')}",
-            "iree-run-module*",
-            "iree-run-trace*",
-            "iree-benchmark-module*",
-            "iree-benchmark-trace*",
-            "iree-dump-module*",
-            "iree-cpuinfo*",
-            "iree-tracy-capture*",
-        ],
-    },
+    package_data=combine_dicts(
+        {
+            "iree._runtime_libs": [
+                f"*{sysconfig.get_config_var('EXT_SUFFIX')}",
+                "iree-run-module*",
+                "iree-run-trace*",
+                "iree-benchmark-module*",
+                "iree-benchmark-trace*",
+                "iree-dump-module*",
+                "iree-cpuinfo*",
+            ],
+        },
+        {
+            "iree._runtime_libs_tracy": [
+                f"*{sysconfig.get_config_var('EXT_SUFFIX')}",
+                "iree-run-module*",
+                "iree-run-trace*",
+                "iree-benchmark-module*",
+                "iree-benchmark-trace*",
+                "iree-tracy-capture*",
+            ]
+        }
+        if ENABLE_TRACY
+        else {},
+    ),
     entry_points={
         "console_scripts": [
             "iree-run-module = iree.runtime.scripts.iree_run_module.__main__:main",