[Bazel] Add rt python bindings to bazel (#24553)

This is the first PR of a longer arc where I will add support to bazel
for building the python wheels natively.

Depends on https://github.com/llvm/llvm-project/pull/200516
diff --git a/MODULE.bazel b/MODULE.bazel
index 15f7553..02a431f 100644
--- a/MODULE.bazel
+++ b/MODULE.bazel
@@ -60,7 +60,6 @@
     "mpfr",
     "nanobind",
     "pfm",
-    "pybind11",
     "pyyaml",
     "robin_map",
     # Note: vulkan_headers is provided by iree_ext with IREE's custom BUILD.overlay
@@ -97,6 +96,12 @@
     "webgpu_headers",
 )
 
+# The LLVM overlay imports @llvm-raw from its own llvm_repos_extension, which
+# only generates it when the overlay is the root module. IREE is the root
+# module here, so inject IREE's @llvm-raw (created by iree_extension) into the
+# overlay extension to satisfy its use_repo("llvm-raw").
+inject_repo(llvm_repos_ext, "llvm-raw")
+
 # Configure LLVM using the llvm-raw repo (creates llvm-project)
 llvm_configure = use_repo_rule("@llvm-raw//utils/bazel:configure.bzl", "llvm_configure")
 
diff --git a/runtime/bindings/python/BUILD.bazel b/runtime/bindings/python/BUILD.bazel
new file mode 100644
index 0000000..a80fcd6
--- /dev/null
+++ b/runtime/bindings/python/BUILD.bazel
@@ -0,0 +1,136 @@
+# Copyright 2026 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# bazel-to-cmake: skip
+# Both files are written by hand
+
+load("@rules_python//python:defs.bzl", "py_library")
+load("@rules_python//python:pip.bzl", "whl_filegroup")
+load(
+    "//build_tools/bazel:build_defs.oss.bzl",
+    "iree_cc_library",
+    "iree_runtime_cc_binary",
+    "iree_runtime_cc_library",
+)
+
+package(
+    default_visibility = ["//visibility:public"],
+    features = ["layering_check"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+py_library(
+    name = "runtime_files",
+    srcs = glob(["iree/**/*.py"]),
+)
+
+exports_files(glob([
+    "iree/**/*.py",
+    "iree/**/*.pyi",
+]))
+
+whl_filegroup(
+    name = "numpy_includes",
+    pattern = "numpy/_core/include/.*",
+    whl = "@pip//numpy:whl",
+)
+
+iree_cc_library(
+    name = "numpy_headers",
+    hdrs = [":numpy_includes"],
+    includes = ["numpy_includes/numpy/_core/include"],
+    visibility = ["//visibility:private"],
+)
+
+NANOBIND_COPTS = [
+    "-fexceptions",
+    "-frtti",
+]
+
+NANOBIND_FEATURES = [
+    "-use_header_modules",
+]
+
+iree_runtime_cc_library(
+    name = "bindings_python",
+    srcs = [
+        "hal.cc",
+        "invoke.cc",
+        "io.cc",
+        "loop.cc",
+        "numpy_interop.cc",
+        "py_module.cc",
+        "status_utils.cc",
+        "tokenizer.cc",
+        "vm.cc",
+    ],
+    hdrs = [
+        "binding.h",
+        "buffer_interop.h",
+        "hal.h",
+        "invoke.h",
+        "io.h",
+        "local_dlpack.h",
+        "loop.h",
+        "numpy_interop.h",
+        "py_module.h",
+        "status_utils.h",
+        "tokenizer.h",
+        "vm.h",
+    ],
+    copts = NANOBIND_COPTS,
+    features = NANOBIND_FEATURES,
+    deps = [
+        ":numpy_headers",
+        "//runtime/src/iree/async",
+        "//runtime/src/iree/async/util:proactor_pool",
+        "//runtime/src/iree/base",
+        "//runtime/src/iree/base/internal:path",
+        "//runtime/src/iree/base/threading",
+        "//runtime/src/iree/base/tooling:flags",
+        "//runtime/src/iree/hal",
+        "//runtime/src/iree/hal/drivers",
+        "//runtime/src/iree/hal/utils:allocators",
+        "//runtime/src/iree/io:file_handle",
+        "//runtime/src/iree/io:parameter_index",
+        "//runtime/src/iree/io:parameter_index_provider",
+        "//runtime/src/iree/io:parameter_provider",
+        "//runtime/src/iree/io/formats:parser_registry",
+        "//runtime/src/iree/io/formats/irpa",
+        "//runtime/src/iree/modules/hal",
+        "//runtime/src/iree/modules/io/parameters",
+        "//runtime/src/iree/schemas:parameter_archive",
+        "//runtime/src/iree/tokenizer",
+        "//runtime/src/iree/tokenizer/format/huggingface:tokenizer_json",
+        "//runtime/src/iree/tokenizer/format/tiktoken",
+        "//runtime/src/iree/tokenizer/vocab",
+        "//runtime/src/iree/tooling:device_util",
+        "//runtime/src/iree/tooling/modules",
+        "//runtime/src/iree/vm",
+        "//runtime/src/iree/vm/bytecode:module",
+        "@nanobind",
+        "@rules_python//python/cc:current_py_cc_headers",
+    ],
+)
+
+iree_runtime_cc_binary(
+    name = "_runtime.so",
+    srcs = ["initialize_module.cc"],
+    copts = NANOBIND_COPTS,
+    features = NANOBIND_FEATURES,
+    linkopts = select({
+        "@platforms//os:linux": ["-Wl,--export-dynamic"],
+        "//conditions:default": [],
+    }),
+    linkshared = 1,
+    linkstatic = 0,
+    deps = [
+        ":bindings_python",
+        "//runtime/src/iree/base/tooling:flags",
+        "//runtime/src/iree/hal/drivers",
+        "@nanobind",
+    ],
+)
diff --git a/runtime/bindings/python/hal.cc b/runtime/bindings/python/hal.cc
index d64ccfb..e19c857 100644
--- a/runtime/bindings/python/hal.cc
+++ b/runtime/bindings/python/hal.cc
@@ -21,11 +21,10 @@
 #include "./vm.h"
 #include "iree/async/frontier_tracker.h"
 #include "iree/async/util/proactor_pool.h"
+#include "iree/base/api.h"
 #include "iree/base/internal/path.h"
-#include "iree/base/status.h"
 #include "iree/base/threading/numa.h"
 #include "iree/hal/api.h"
-#include "iree/hal/semaphore.h"
 #include "iree/hal/utils/allocators.h"
 #include "iree/modules/hal/module.h"
 #include "iree/tooling/device_util.h"
diff --git a/runtime/bindings/python/hal.h b/runtime/bindings/python/hal.h
index e4cb8d4..dd58c2e 100644
--- a/runtime/bindings/python/hal.h
+++ b/runtime/bindings/python/hal.h
@@ -19,7 +19,7 @@
 #include "./binding.h"
 #include "./status_utils.h"
 #include "./vm.h"
-#include "iree/base/string_view.h"
+#include "iree/base/api.h"
 #include "iree/hal/api.h"
 #include "iree/modules/hal/debugging.h"