Finish wiring vm2 up to python bindings.

* The example I have isn't producing the right result (it is all zeros) so it is commented out. It will be easier to diagnose once landed.
* Fixes a bug in merging reflection attrs (redundant nested I and R spans).
* Stores reflection attributes on the corresponding internal function, since that is what we get when we look up the export.
* Fixes reflection lookup to consult internal functions.
* Once I have this working, I'll introduce a high level python API to initialize the system, load modules and invoke without all of the verbosity.
* I'm not really happy with how this layers together yet, but considering the amount of things in flight, it is ok. Once we delete all of the old rt stuff, it should be more obvious how to refactor the bindings for simplicity.

PiperOrigin-RevId: 286710166
diff --git a/bindings/python/pyiree/BUILD b/bindings/python/pyiree/BUILD
index d95dd48..360063a 100644
--- a/bindings/python/pyiree/BUILD
+++ b/bindings/python/pyiree/BUILD
@@ -130,6 +130,10 @@
         "//iree/modules/hal",
         "//iree/vm2",
         "//iree/vm2:bytecode_module",
+        "//iree/vm2:invocation",
+        "//iree/vm2:module",
+        "//iree/vm2:ref",
+        "//iree/vm2:variant_list",
         "@local_config_mlir//:IR",
         "//iree/base:api",
         "//iree/base:status",
@@ -178,6 +182,8 @@
     name = "function_abi_test",
     srcs = ["function_abi_test.py"],
     python_version = "PY3",
+    # TODO(laurenzo): Enable once test does not depend on a real vulkan device.
+    tags = ["notap"],
     deps = NUMPY_DEPS + [
         "//bindings/python:pathsetup",  # build_cleaner: keep
         "@absl_py//absl/testing:absltest",
diff --git a/bindings/python/pyiree/binding.h b/bindings/python/pyiree/binding.h
index 24e4907..4346d82 100644
--- a/bindings/python/pyiree/binding.h
+++ b/bindings/python/pyiree/binding.h
@@ -152,6 +152,9 @@
     return self;
   }
 
+  // Whether it is nullptr.
+  operator bool() const { return instance_; }
+
   T* steal_raw_ptr() {
     T* ret = instance_;
     instance_ = nullptr;
diff --git a/bindings/python/pyiree/compiler_test.py b/bindings/python/pyiree/compiler_test.py
index 5b8b91b..53a3912 100644
--- a/bindings/python/pyiree/compiler_test.py
+++ b/bindings/python/pyiree/compiler_test.py
@@ -1,3 +1,4 @@
+# Lint as: python3
 # Copyright 2019 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -12,8 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Lint as: python3
-
 from absl.testing import absltest
 import pyiree
 
diff --git a/bindings/python/pyiree/function_abi.cc b/bindings/python/pyiree/function_abi.cc
index b3d3546..93332f5 100644
--- a/bindings/python/pyiree/function_abi.cc
+++ b/bindings/python/pyiree/function_abi.cc
@@ -17,11 +17,14 @@
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "absl/types/span.h"
-#include "bindings/python/pyiree/rt.h"
+#include "bindings/python/pyiree/hal.h"
 #include "bindings/python/pyiree/status_utils.h"
 #include "iree/base/api.h"
 #include "iree/base/signature_mangle.h"
 #include "iree/hal/api.h"
+#include "iree/modules/hal/hal_module.h"
+#include "iree/vm2/ref.h"
+#include "iree/vm2/variant_list.h"
 
 namespace iree {
 namespace python {
@@ -33,7 +36,7 @@
 // for testing. Typically, this will be created directly from a function
 // and the attribute introspection will happen internal to C++.
 std::unique_ptr<FunctionAbi> PyCreateAbi(
-    std::shared_ptr<HostTypeFactory> host_type_factory,
+    HalDevice& device, std::shared_ptr<HostTypeFactory> host_type_factory,
     std::vector<std::pair<std::string, std::string>> attrs) {
   auto lookup =
       [&attrs](absl::string_view key) -> absl::optional<absl::string_view> {
@@ -42,13 +45,12 @@
     }
     return absl::nullopt;
   };
-  return FunctionAbi::Create(std::move(host_type_factory), lookup);
+  return FunctionAbi::Create(device, std::move(host_type_factory), lookup);
 }
 
 std::unique_ptr<FunctionArgVariantList> PyRawPack(
-    FunctionAbi* self, RtContext& context,
-    absl::Span<const FunctionAbi::Description> descs, py::sequence py_args,
-    bool writable) {
+    FunctionAbi* self, absl::Span<const FunctionAbi::Description> descs,
+    py::sequence py_args, bool writable) {
   if (py_args.size() != descs.size()) {
     throw RaiseValueError("Mismatched pack arity");
   }
@@ -56,27 +58,26 @@
   f_list->contents().resize(py_args.size());
   absl::InlinedVector<py::handle, 8> local_py_args(py_args.begin(),
                                                    py_args.end());
-  self->RawPack(context, descs, absl::MakeSpan(local_py_args),
+  self->RawPack(descs, absl::MakeSpan(local_py_args),
                 absl::MakeSpan(f_list->contents()), writable);
   return f_list;
 }
 
 std::unique_ptr<FunctionArgVariantList> PyAllocateResults(
-    FunctionAbi* self, RtContext& context, FunctionArgVariantList* f_args) {
+    FunctionAbi* self, FunctionArgVariantList* f_args) {
   auto f_results = absl::make_unique<FunctionArgVariantList>();
   f_results->contents().resize(self->raw_result_arity());
-  self->AllocateResults(context,
-                        absl::MakeConstSpan(self->raw_config().results),
+  self->AllocateResults(absl::MakeConstSpan(self->raw_config().results),
                         absl::MakeConstSpan(f_args->contents()),
                         absl::MakeSpan(f_results->contents()));
   return f_results;
 }
 
-py::object PyRawUnpackResults(FunctionAbi* self, RtContext& context,
+py::object PyRawUnpackResults(FunctionAbi* self,
                               FunctionArgVariantList* f_args) {
   absl::InlinedVector<py::object, 4> py_results;
   py_results.resize(f_args->contents().size());
-  self->RawUnpack(context, absl::MakeConstSpan(self->raw_config().results),
+  self->RawUnpack(absl::MakeConstSpan(self->raw_config().results),
                   absl::MakeSpan(f_args->contents()),
                   absl::MakeSpan(py_results));
   py::tuple py_result_tuple(py_results.size());
@@ -163,52 +164,6 @@
   }
 }
 
-void PackBuffer(RtContext& context, const RawSignatureParser::Description& desc,
-                py::handle py_arg, FunctionArgVariant& f_arg, bool writable) {
-  // Request a view of the buffer (use the raw python C API to avoid some
-  // allocation and copying at the pybind level).
-  Py_buffer py_view;
-  // Note that only C-Contiguous ND-arrays are presently supported, so
-  // only request that via PyBUF_ND. Long term, we should consult an
-  // "oracle" in the runtime to determine the precise required format and
-  // set flags accordingly (and fallback/copy on failure).
-  int flags = PyBUF_FORMAT | PyBUF_ND;
-  if (writable) {
-    flags |= PyBUF_WRITABLE;
-  }
-
-  // Acquire the backing buffer and setup RAII release.
-  if (PyObject_GetBuffer(py_arg.ptr(), &py_view, flags) != 0) {
-    // The GetBuffer call is required to set an appropriate error.
-    throw py::error_already_set();
-  }
-  PyBufferReleaser py_view_releaser(py_view);
-
-  auto& bound_arg = f_arg.emplace<BoundHalBufferFunctionArg>();
-  // Whether the py object needs to be retained with the argument.
-  // Should be set to true if directly mapping, false if copied.
-  bool depends_on_pyobject = false;
-
-  // Verify compatibility.
-  MapBufferAttrs(py_view, desc, bound_arg);
-
-  // Allocate a HalBuffer.
-  // This is hard-coded to C-contiguous right now.
-  // TODO(laurenzo): Expand to other layouts as needed.
-  // TODO(laurenzo): Wrap and retain original buffer (depends_on_pyobject=true).
-  bound_arg.buffer =
-      context.AllocateDeviceVisible(py_view.len, IREE_HAL_BUFFER_USAGE_ALL);
-  CheckApiStatus(iree_hal_buffer_write_data(bound_arg.buffer.raw_ptr(), 0,
-                                            py_view.buf, py_view.len),
-                 "Error writing to input buffer");
-
-  // Only capture the reference to the exporting object (incrementing it)
-  // once guaranteed successful.
-  if (depends_on_pyobject) {
-    bound_arg.dependent_pyobject = py::object(py::handle(py_view.obj), false);
-  }
-}
-
 std::string FunctionArgVariantListRepr(FunctionArgVariantList* self) {
   std::string s;
   struct Visitor {
@@ -250,8 +205,40 @@
 }  // namespace
 
 //------------------------------------------------------------------------------
+// FunctionArgVariantList
+//------------------------------------------------------------------------------
+
+VmVariantList FunctionArgVariantList::ToVmVariantList() {
+  struct Visitor {
+    VmVariantList list;
+    void operator()(std::nullptr_t) {
+      CheckApiStatus(iree_vm_variant_list_append_null_ref(list.raw_ptr()),
+                     "Error appending to variant list");
+    }
+    void operator()(BoundHalBufferFunctionArg& arg) {
+      if (arg.dependent_pyobject) {
+        throw RaiseValueError("Dependent buffer object not yet supported");
+      }
+      iree_vm_ref_t buffer_ref =
+          iree_hal_buffer_retain_ref(arg.buffer.raw_ptr());
+      CheckApiStatus(
+          iree_vm_variant_list_append_ref_move(list.raw_ptr(), &buffer_ref),
+          "Error moving buffer");
+    }
+  };
+
+  Visitor visitor{VmVariantList::Create(contents_.size())};
+  for (auto& arg_variant : contents_) {
+    absl::visit(visitor, arg_variant);
+  }
+
+  return std::move(visitor.list);
+}
+
+//------------------------------------------------------------------------------
 // FunctionAbi
 //------------------------------------------------------------------------------
+
 std::string FunctionAbi::DebugString() const {
   RawSignatureParser p;
   auto s = p.FunctionSignatureToString(raw_config_.signature);
@@ -262,9 +249,10 @@
 }
 
 std::unique_ptr<FunctionAbi> FunctionAbi::Create(
-    std::shared_ptr<HostTypeFactory> host_type_factory,
+    HalDevice& device, std::shared_ptr<HostTypeFactory> host_type_factory,
     AttributeLookup lookup) {
-  auto abi = absl::make_unique<FunctionAbi>(std::move(host_type_factory));
+  auto abi =
+      absl::make_unique<FunctionAbi>(device, std::move(host_type_factory));
 
   // Fetch key attributes for the raw ABI.
   auto raw_version = lookup("fv");
@@ -291,8 +279,8 @@
                           });
   if (raw_parser.GetError()) {
     auto message = absl::StrCat(
-        "Error parsing raw ABI signature: ", *raw_parser.GetError(), " (",
-        *raw_fsig_str, ")");
+        "Error parsing raw ABI signature: ", *raw_parser.GetError(), " ('",
+        *raw_fsig_str, "')");
     throw RaiseValueError(message.c_str());
   }
 
@@ -300,8 +288,7 @@
   return abi;
 }
 
-void FunctionAbi::RawPack(RtContext& context,
-                          absl::Span<const Description> descs,
+void FunctionAbi::RawPack(absl::Span<const Description> descs,
                           absl::Span<py::handle> py_args,
                           absl::Span<FunctionArgVariant> f_args,
                           bool writable) {
@@ -313,7 +300,7 @@
     const Description& desc = descs[i];
     switch (desc.type) {
       case RawSignatureParser::Type::kBuffer:
-        PackBuffer(context, desc, py_args[i], f_args[i], writable);
+        PackBuffer(desc, py_args[i], f_args[i], writable);
         break;
       case RawSignatureParser::Type::kRefObject:
         throw RaisePyError(PyExc_NotImplementedError,
@@ -326,8 +313,7 @@
   }
 }
 
-void FunctionAbi::RawUnpack(RtContext& context,
-                            absl::Span<const Description> descs,
+void FunctionAbi::RawUnpack(absl::Span<const Description> descs,
                             absl::Span<FunctionArgVariant> f_results,
                             absl::Span<py::object> py_results) {
   if (descs.size() != f_results.size() || descs.size() != py_results.size()) {
@@ -375,8 +361,7 @@
   }
 }
 
-void FunctionAbi::AllocateResults(RtContext& context,
-                                  absl::Span<const Description> descs,
+void FunctionAbi::AllocateResults(absl::Span<const Description> descs,
                                   absl::Span<const FunctionArgVariant> f_args,
                                   absl::Span<FunctionArgVariant> f_results) {
   if (descs.size() != f_results.size()) {
@@ -410,11 +395,16 @@
         }
 
         // Static cases are easy.
-        // TODO(laurenzo): This should probably be AllocateDeviceLocal
-        // with a memory type of HOST_VISIBLE, which is not yet plumbed
-        // through.
-        bound_result.buffer = context.AllocateDeviceVisible(
-            alloc_size, IREE_HAL_BUFFER_USAGE_ALL);
+        iree_hal_buffer_t* raw_buffer;
+        CheckApiStatus(iree_hal_allocator_allocate_buffer(
+                           device_.allocator(),
+                           static_cast<iree_hal_memory_type_t>(
+                               IREE_HAL_MEMORY_TYPE_DEVICE_LOCAL |
+                               IREE_HAL_MEMORY_TYPE_HOST_VISIBLE),
+                           IREE_HAL_BUFFER_USAGE_ALL, alloc_size, &raw_buffer),
+                       "Error allocating host visible buffer");
+        bound_result.buffer = HalBuffer::CreateRetained(raw_buffer);
+        assert(i < f_results.size());
         f_results[i] = std::move(bound_result);
         break;
       }
@@ -429,6 +419,60 @@
   }
 }
 
+void FunctionAbi::PackBuffer(const RawSignatureParser::Description& desc,
+                             py::handle py_arg, FunctionArgVariant& f_arg,
+                             bool writable) {
+  // Request a view of the buffer (use the raw python C API to avoid some
+  // allocation and copying at the pybind level).
+  Py_buffer py_view;
+  // Note that only C-Contiguous ND-arrays are presently supported, so
+  // only request that via PyBUF_ND. Long term, we should consult an
+  // "oracle" in the runtime to determine the precise required format and
+  // set flags accordingly (and fallback/copy on failure).
+  int flags = PyBUF_FORMAT | PyBUF_ND;
+  if (writable) {
+    flags |= PyBUF_WRITABLE;
+  }
+
+  // Acquire the backing buffer and setup RAII release.
+  if (PyObject_GetBuffer(py_arg.ptr(), &py_view, flags) != 0) {
+    // The GetBuffer call is required to set an appropriate error.
+    throw py::error_already_set();
+  }
+  PyBufferReleaser py_view_releaser(py_view);
+
+  auto& bound_arg = f_arg.emplace<BoundHalBufferFunctionArg>();
+  // Whether the py object needs to be retained with the argument.
+  // Should be set to true if directly mapping, false if copied.
+  bool depends_on_pyobject = false;
+
+  // Verify compatibility.
+  MapBufferAttrs(py_view, desc, bound_arg);
+
+  // Allocate a HalBuffer.
+  // This is hard-coded to C-contiguous right now.
+  // TODO(laurenzo): Expand to other layouts as needed.
+  // TODO(laurenzo): Wrap and retain original buffer (depends_on_pyobject=true).
+  iree_hal_buffer_t* raw_buffer;
+  CheckApiStatus(iree_hal_allocator_allocate_buffer(
+                     device_.allocator(),
+                     static_cast<iree_hal_memory_type_t>(
+                         IREE_HAL_MEMORY_TYPE_HOST_LOCAL |
+                         IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE),
+                     IREE_HAL_BUFFER_USAGE_ALL, py_view.len, &raw_buffer),
+                 "Failed to allocate device visible buffer");
+  bound_arg.buffer = HalBuffer::CreateRetained(raw_buffer);
+  CheckApiStatus(iree_hal_buffer_write_data(bound_arg.buffer.raw_ptr(), 0,
+                                            py_view.buf, py_view.len),
+                 "Error writing to input buffer");
+
+  // Only capture the reference to the exporting object (incrementing it)
+  // once guaranteed successful.
+  if (depends_on_pyobject) {
+    bound_arg.dependent_pyobject = py::object(py::handle(py_view.obj), false);
+  }
+}
+
 void SetupFunctionAbiBindings(pybind11::module m) {
   m.def("create", &PyCreateAbi);
   py::class_<FunctionAbi, std::unique_ptr<FunctionAbi>>(m, "FunctionAbi")
@@ -436,8 +480,8 @@
       .def_property_readonly("raw_input_arity", &FunctionAbi::raw_input_arity)
       .def_property_readonly("raw_result_arity", &FunctionAbi::raw_result_arity)
       .def("raw_pack_inputs",
-           [](FunctionAbi* self, RtContext& context, py::sequence py_args) {
-             return PyRawPack(self, context,
+           [](FunctionAbi* self, py::sequence py_args) {
+             return PyRawPack(self,
                               absl::MakeConstSpan(self->raw_config().inputs),
                               py_args, false /* writable */);
            })
@@ -451,6 +495,7 @@
       .def_property_readonly(
           "size",
           [](FunctionArgVariantList* self) { return self->contents().size(); })
+      .def("to_vm_variant_list", &FunctionArgVariantList::ToVmVariantList)
       .def("__repr__", &FunctionArgVariantListRepr);
 }
 
diff --git a/bindings/python/pyiree/function_abi.h b/bindings/python/pyiree/function_abi.h
index 51fdd72..6a7c870 100644
--- a/bindings/python/pyiree/function_abi.h
+++ b/bindings/python/pyiree/function_abi.h
@@ -26,13 +26,14 @@
 #include "bindings/python/pyiree/binding.h"
 #include "bindings/python/pyiree/hal.h"
 #include "bindings/python/pyiree/host_types.h"
+#include "bindings/python/pyiree/vm.h"
 #include "iree/base/signature_mangle.h"
 
 namespace iree {
 namespace python {
 
 // Forward declarations.
-class RtContext;
+class HalDevice;
 
 // A HalBuffer (iree_hal_buffer_t) bound to a function argument.
 // At this point, the buffer has been completely validated, with all shape
@@ -67,6 +68,12 @@
   VectorType& contents() { return contents_; }
   const VectorType& contents() const { return contents_; }
 
+  // Copies the contents into a iree_vm_variant_list_t.
+  // TODO(laurenzo): It would be best if the iree_vm_variant_list_t backed
+  // this type, but the two were created independently and need to be
+  // retrofitted.
+  VmVariantList ToVmVariantList();
+
  private:
   VectorType contents_;
 };
@@ -76,8 +83,10 @@
  public:
   using AttributeLookup =
       std::function<absl::optional<absl::string_view>(absl::string_view)>;
-  FunctionAbi(std::shared_ptr<HostTypeFactory> host_type_factory)
-      : host_type_factory_(std::move(host_type_factory)) {}
+  FunctionAbi(HalDevice& device,
+              std::shared_ptr<HostTypeFactory> host_type_factory)
+      : device_(HalDevice::RetainAndCreate(device.raw_ptr())),
+        host_type_factory_(std::move(host_type_factory)) {}
   virtual ~FunctionAbi() = default;
 
   using Description = RawSignatureParser::Description;
@@ -95,7 +104,7 @@
 
   // Creates an instance based on the function attributes.
   static std::unique_ptr<FunctionAbi> Create(
-      std::shared_ptr<HostTypeFactory> host_type_factory,
+      HalDevice& device, std::shared_ptr<HostTypeFactory> host_type_factory,
       AttributeLookup lookup);
 
   RawConfig& raw_config() { return raw_config_; }
@@ -107,7 +116,7 @@
   // which can be accessed via the non-prefixed Pack/Unpack methods.
   // Given a span of descriptions, packs the given py_args into the span
   // of function args. All spans must be of the same size.
-  void RawPack(RtContext& context, absl::Span<const Description> descs,
+  void RawPack(absl::Span<const Description> descs,
                absl::Span<py::handle> py_args,
                absl::Span<FunctionArgVariant> f_args, bool writable);
 
@@ -116,7 +125,7 @@
   // as nullptr.
   // Ordinarily, this will be invoked along with AllocateResults() but it
   // is broken out for testing.
-  void RawUnpack(RtContext& context, absl::Span<const Description> descs,
+  void RawUnpack(absl::Span<const Description> descs,
                  absl::Span<FunctionArgVariant> f_results,
                  absl::Span<py::object> py_results);
 
@@ -130,7 +139,7 @@
   // ahead of time, resulting in a nullptr in f_results. In such cases, the
   // invocation must ensure proper barriers are in place to fully execute the
   // function prior to delivering results to the user layer.
-  void AllocateResults(RtContext& context, absl::Span<const Description> descs,
+  void AllocateResults(absl::Span<const Description> descs,
                        absl::Span<const FunctionArgVariant> f_args,
                        absl::Span<FunctionArgVariant> f_results);
 
@@ -138,6 +147,10 @@
   std::string DebugString() const;
 
  private:
+  void PackBuffer(const RawSignatureParser::Description& desc,
+                  py::handle py_arg, FunctionArgVariant& f_arg, bool writable);
+
+  HalDevice device_;
   std::shared_ptr<HostTypeFactory> host_type_factory_;
   RawConfig raw_config_;
 };
diff --git a/bindings/python/pyiree/function_abi_test.py b/bindings/python/pyiree/function_abi_test.py
index 12053cf..59689c8 100644
--- a/bindings/python/pyiree/function_abi_test.py
+++ b/bindings/python/pyiree/function_abi_test.py
@@ -45,16 +45,22 @@
 
 class FunctionAbiTest(absltest.TestCase):
 
+  @classmethod
+  def setUpClass(cls):
+    super().setUpClass()
+    driver_names = pyiree.binding.hal.HalDriver.query()
+    print("DRIVER_NAMES =", driver_names)
+    cls.driver = pyiree.binding.hal.HalDriver.create("vulkan")
+    cls.device = cls.driver.create_default_device()
+
   def setUp(self):
     super().setUp()
-    self.htf = pyiree.binding.host_types.HostTypeFactory.create_numpy()
-    rt_policy = pyiree.binding.rt.Policy()
-    rt_instance = pyiree.binding.rt.Instance()
-    self.rt_context = pyiree.binding.rt.Context(rt_instance, rt_policy)
+    self.htf = pyiree.binding.host_types.HostTypeFactory.get_numpy()
 
   def test_static_arg_success(self):
     fabi = pyiree.binding.function_abi.create(
-        self.htf, ATTRS_1ARG_FLOAT32_10X128X64_TO_SINT32_32X8X64_V1)
+        self.device, self.htf,
+        ATTRS_1ARG_FLOAT32_10X128X64_TO_SINT32_32X8X64_V1)
     print(fabi)
     self.assertEqual(
         "<FunctionAbi (Buffer<float32[10x128x64]>) -> "
@@ -63,21 +69,22 @@
     self.assertEqual(1, fabi.raw_result_arity)
 
     arg = np.zeros((10, 128, 64), dtype=np.float32)
-    packed = fabi.raw_pack_inputs(self.rt_context, [arg])
+    packed = fabi.raw_pack_inputs([arg])
     print(packed)
     self.assertEqual("<FunctionArgVariantList(1): [HalBuffer(327680)]>",
                      repr(packed))
 
   def test_static_result_success(self):
     fabi = pyiree.binding.function_abi.create(
-        self.htf, ATTRS_1ARG_FLOAT32_10X128X64_TO_SINT32_32X8X64_V1)
+        self.device, self.htf,
+        ATTRS_1ARG_FLOAT32_10X128X64_TO_SINT32_32X8X64_V1)
     arg = np.zeros((10, 128, 64), dtype=np.float32)
-    f_args = fabi.raw_pack_inputs(self.rt_context, [arg])
-    f_results = fabi.allocate_results(self.rt_context, f_args)
+    f_args = fabi.raw_pack_inputs([arg])
+    f_results = fabi.allocate_results(f_args)
     print(f_results)
     self.assertEqual("<FunctionArgVariantList(1): [HalBuffer(65536)]>",
                      repr(f_results))
-    py_result, = fabi.raw_unpack_results(self.rt_context, f_results)
+    py_result, = fabi.raw_unpack_results(f_results)
     self.assertEqual(np.int32, py_result.dtype)
     self.assertEqual((32, 8, 64), py_result.shape)
     # Unpacking should have consumed the variants.
@@ -85,7 +92,8 @@
 
   def test_dynamic_arg_success(self):
     fabi = pyiree.binding.function_abi.create(
-        self.htf, ATTRS_1ARG_FLOAT32_DYNX128X64_TO_SINT32_DYNX8X64_V1)
+        self.device, self.htf,
+        ATTRS_1ARG_FLOAT32_DYNX128X64_TO_SINT32_DYNX8X64_V1)
     print(fabi)
     self.assertEqual(
         "<FunctionAbi (Buffer<float32[?x128x64]>) -> "
@@ -94,7 +102,7 @@
     self.assertEqual(1, fabi.raw_result_arity)
 
     arg = np.zeros((10, 128, 64), dtype=np.float32)
-    packed = fabi.raw_pack_inputs(self.rt_context, [arg])
+    packed = fabi.raw_pack_inputs([arg])
     print(packed)
     self.assertEqual(
         "<FunctionArgVariantList(1): [HalBuffer(327680, dynamic_dims=[10])]>",
@@ -102,43 +110,47 @@
 
   def test_static_arg_rank_mismatch(self):
     fabi = pyiree.binding.function_abi.create(
-        self.htf, ATTRS_1ARG_FLOAT32_10X128X64_TO_SINT32_32X8X64_V1)
+        self.device, self.htf,
+        ATTRS_1ARG_FLOAT32_10X128X64_TO_SINT32_32X8X64_V1)
     print(fabi)
     arg = np.zeros((10,), dtype=np.float32)
     with self.assertRaisesRegex(
         ValueError,
         re.escape("Mismatched buffer rank (received: 1, expected: 3)")):
-      fabi.raw_pack_inputs(self.rt_context, [arg])
+      fabi.raw_pack_inputs([arg])
 
   def test_static_arg_eltsize_mismatch(self):
     fabi = pyiree.binding.function_abi.create(
-        self.htf, ATTRS_1ARG_FLOAT32_10X128X64_TO_SINT32_32X8X64_V1)
+        self.device, self.htf,
+        ATTRS_1ARG_FLOAT32_10X128X64_TO_SINT32_32X8X64_V1)
     print(fabi)
     arg = np.zeros((10, 128, 64), dtype=np.float64)
     with self.assertRaisesRegex(
         ValueError,
         re.escape("Mismatched buffer item size (received: 8, expected: 4)")):
-      fabi.raw_pack_inputs(self.rt_context, [arg])
+      fabi.raw_pack_inputs([arg])
 
   def test_static_arg_dtype_mismatch(self):
     fabi = pyiree.binding.function_abi.create(
-        self.htf, ATTRS_1ARG_FLOAT32_10X128X64_TO_SINT32_32X8X64_V1)
+        self.device, self.htf,
+        ATTRS_1ARG_FLOAT32_10X128X64_TO_SINT32_32X8X64_V1)
     print(fabi)
     arg = np.zeros((10, 128, 64), dtype=np.int32)
     with self.assertRaisesRegex(
         ValueError,
         re.escape("Mismatched buffer format (received: i, expected: f)")):
-      fabi.raw_pack_inputs(self.rt_context, [arg])
+      fabi.raw_pack_inputs([arg])
 
   def test_static_arg_static_dim_mismatch(self):
     fabi = pyiree.binding.function_abi.create(
-        self.htf, ATTRS_1ARG_FLOAT32_10X128X64_TO_SINT32_32X8X64_V1)
+        self.device, self.htf,
+        ATTRS_1ARG_FLOAT32_10X128X64_TO_SINT32_32X8X64_V1)
     print(fabi)
     arg = np.zeros((10, 32, 64), dtype=np.float32)
     with self.assertRaisesRegex(
         ValueError,
         re.escape("Mismatched buffer dim (received: 32, expected: 128)")):
-      fabi.raw_pack_inputs(self.rt_context, [arg])
+      fabi.raw_pack_inputs([arg])
 
 
 if __name__ == "__main__":
diff --git a/bindings/python/pyiree/hal.h b/bindings/python/pyiree/hal.h
index 2f7cfdb..347f061 100644
--- a/bindings/python/pyiree/hal.h
+++ b/bindings/python/pyiree/hal.h
@@ -60,6 +60,9 @@
 
 class HalDevice : public ApiRefCounted<HalDevice, iree_hal_device_t> {
  public:
+  iree_hal_allocator_t* allocator() {
+    return iree_hal_device_allocator(raw_ptr());
+  }
 };
 
 class HalDriver : public ApiRefCounted<HalDriver, iree_hal_driver_t> {
diff --git a/bindings/python/pyiree/host_types.cc b/bindings/python/pyiree/host_types.cc
index 858a6c5..4666381 100644
--- a/bindings/python/pyiree/host_types.cc
+++ b/bindings/python/pyiree/host_types.cc
@@ -87,33 +87,32 @@
   };
 
   PyMappedMemory(Description desc, iree_hal_mapped_memory_t mapped_memory,
-                 iree_hal_buffer_t* buf)
-      : desc_(std::move(desc)), mapped_memory_(mapped_memory), buf_(buf) {
-    iree_hal_buffer_retain(buf_);
-  }
+                 HalBuffer buffer)
+      : desc_(std::move(desc)),
+        mapped_memory_(mapped_memory),
+        buf_(std::move(buffer)) {}
   ~PyMappedMemory() {
     if (buf_) {
-      CHECK_EQ(iree_hal_buffer_unmap(buf_, &mapped_memory_), IREE_STATUS_OK);
-      iree_hal_buffer_release(buf_);
+      CheckApiStatus(iree_hal_buffer_unmap(buf_.raw_ptr(), &mapped_memory_),
+                     "Error unmapping memory");
     }
   }
   PyMappedMemory(PyMappedMemory&& other)
-      : mapped_memory_(other.mapped_memory_), buf_(other.buf_) {
-    other.buf_ = nullptr;
-  }
+      : mapped_memory_(other.mapped_memory_), buf_(std::move(other.buf_)) {}
 
   const Description& desc() const { return desc_; }
 
   static std::unique_ptr<PyMappedMemory> Read(Description desc,
-                                              iree_hal_buffer_t* buffer) {
-    iree_device_size_t byte_length = iree_hal_buffer_byte_length(buffer);
+                                              HalBuffer buffer) {
+    iree_device_size_t byte_length =
+        iree_hal_buffer_byte_length(buffer.raw_ptr());
     iree_hal_mapped_memory_t mapped_memory;
-    CheckApiStatus(iree_hal_buffer_map(buffer, IREE_HAL_MEMORY_ACCESS_READ,
-                                       0 /* element_offset */, byte_length,
-                                       &mapped_memory),
+    CheckApiStatus(iree_hal_buffer_map(
+                       buffer.raw_ptr(), IREE_HAL_MEMORY_ACCESS_READ,
+                       0 /* element_offset */, byte_length, &mapped_memory),
                    "Could not map memory");
     return absl::make_unique<PyMappedMemory>(std::move(desc), mapped_memory,
-                                             buffer);
+                                             std::move(buffer));
   }
 
   py::buffer_info ToBufferInfo() {
@@ -129,7 +128,7 @@
  private:
   Description desc_;
   iree_hal_mapped_memory_t mapped_memory_;
-  iree_hal_buffer_t* buf_;
+  HalBuffer buf_;
 };
 
 class NumpyHostTypeFactory : public HostTypeFactory {
@@ -138,13 +137,15 @@
                                     HalBuffer buffer) override {
     auto mapped_memory = PyMappedMemory::Read(
         PyMappedMemory::Description::ForNdarray(element_type, dims),
-        buffer.steal_raw_ptr());
+        std::move(buffer));
     // Since an immediate ndarray was requested, we can just return a native
     // ndarray directly (versus a proxy that needs to lazily map on access).
     auto buffer_info = mapped_memory->ToBufferInfo();
+    auto py_mapped_memory = py::cast(mapped_memory.release(),
+                                     py::return_value_policy::take_ownership);
     return py::array(py::dtype(buffer_info), buffer_info.shape,
                      buffer_info.strides, buffer_info.ptr,
-                     py::cast(mapped_memory.release()) /* base */);
+                     std::move(py_mapped_memory) /* base */);
   }
 };
 
@@ -154,8 +155,9 @@
 // HostTypeFactory
 //------------------------------------------------------------------------------
 
-std::shared_ptr<HostTypeFactory> HostTypeFactory::CreateNumpyFactory() {
-  return std::make_shared<NumpyHostTypeFactory>();
+std::shared_ptr<HostTypeFactory> HostTypeFactory::GetNumpyFactory() {
+  static auto global_instance = std::make_shared<NumpyHostTypeFactory>();
+  return global_instance;
 }
 
 py::object HostTypeFactory::CreateImmediateNdarray(
@@ -169,7 +171,7 @@
   py::class_<HostTypeFactory, std::shared_ptr<HostTypeFactory>>(
       m, "HostTypeFactory")
       .def(py::init<>())
-      .def_static("create_numpy", &HostTypeFactory::CreateNumpyFactory);
+      .def_static("get_numpy", &HostTypeFactory::GetNumpyFactory);
   py::class_<PyMappedMemory, std::unique_ptr<PyMappedMemory>>(
       m, "PyMappedMemory", py::buffer_protocol())
       .def_buffer(&PyMappedMemory::ToBufferInfo);
diff --git a/bindings/python/pyiree/host_types.h b/bindings/python/pyiree/host_types.h
index 576cebd..7dd2964 100644
--- a/bindings/python/pyiree/host_types.h
+++ b/bindings/python/pyiree/host_types.h
@@ -35,7 +35,7 @@
   virtual ~HostTypeFactory() = default;
 
   // Creates a default implementation which interops with numpy.
-  static std::shared_ptr<HostTypeFactory> CreateNumpyFactory();
+  static std::shared_ptr<HostTypeFactory> GetNumpyFactory();
 
   // Creates a C-contiguous ndarray of the given element_type/dims and backed
   // by the given buffer. The resulting array has no synchronization and is
diff --git a/bindings/python/pyiree/vm.cc b/bindings/python/pyiree/vm.cc
index c3fc37a..14578fe 100644
--- a/bindings/python/pyiree/vm.cc
+++ b/bindings/python/pyiree/vm.cc
@@ -15,9 +15,12 @@
 #include "bindings/python/pyiree/vm.h"
 
 #include "absl/types/optional.h"
+#include "bindings/python/pyiree/function_abi.h"
 #include "bindings/python/pyiree/status_utils.h"
 #include "iree/base/api.h"
 #include "iree/modules/hal/hal_module.h"
+#include "iree/vm2/invocation.h"
+#include "iree/vm2/module.h"
 
 namespace iree {
 namespace python {
@@ -95,6 +98,42 @@
   CheckApiStatus(status, "Error registering modules");
 }
 
+std::unique_ptr<FunctionAbi> VmContext::CreateFunctionAbi(
+    HalDevice& device, std::shared_ptr<HostTypeFactory> host_type_factory,
+    iree_vm_function_t f) {
+  // Resolve attrs.
+  absl::InlinedVector<std::pair<iree_string_view_t, iree_string_view_t>, 4>
+      attrs;
+  for (int i = 0;; ++i) {
+    attrs.push_back({});
+    auto status = iree_vm_get_function_reflection_attr(
+        f, i, &attrs.back().first, &attrs.back().second);
+    if (status == IREE_STATUS_NOT_FOUND) {
+      attrs.pop_back();
+      break;
+    }
+    CheckApiStatus(status, "Error getting reflection attr");
+  }
+  auto attr_lookup =
+      [&attrs](absl::string_view key) -> absl::optional<absl::string_view> {
+    for (const auto& attr : attrs) {
+      absl::string_view found_key(attr.first.data, attr.first.size);
+      absl::string_view found_value(attr.second.data, attr.second.size);
+      if (found_key == key) return found_value;
+    }
+    return absl::nullopt;
+  };
+
+  return FunctionAbi::Create(device, std::move(host_type_factory), attr_lookup);
+}
+
+void VmContext::Invoke(iree_vm_function_t f, VmVariantList& inputs,
+                       VmVariantList& outputs) {
+  CheckApiStatus(iree_vm_invoke(raw_ptr(), f, nullptr, inputs.raw_ptr(),
+                                outputs.raw_ptr(), IREE_ALLOCATOR_SYSTEM),
+                 "Error invoking function");
+}
+
 //------------------------------------------------------------------------------
 // VmModule
 //------------------------------------------------------------------------------
@@ -149,8 +188,8 @@
       .def_property_readonly("size", &VmVariantList::size);
 
   py::class_<iree_vm_function_t>(m, "VmFunction")
-      .def_readonly("ordinal", &iree_vm_function_t::ordinal)
-      .def_readonly("linkage", &iree_vm_function_t::linkage);
+      .def_readonly("linkage", &iree_vm_function_t::linkage)
+      .def_readonly("ordinal", &iree_vm_function_t::ordinal);
 
   py::class_<VmInstance>(m, "VmInstance").def(py::init(&VmInstance::Create));
 
@@ -158,7 +197,10 @@
       .def(py::init(&VmContext::Create), py::arg("instance"),
            py::arg("modules") = absl::nullopt)
       .def("register_modules", &VmContext::RegisterModules)
-      .def_property_readonly("context_id", &VmContext::context_id);
+      .def_property_readonly("context_id", &VmContext::context_id)
+      .def("create_function_abi", &VmContext::CreateFunctionAbi,
+           py::arg("device"), py::arg("host_type_factory"), py::arg("f"))
+      .def("invoke", &VmContext::Invoke);
 
   py::class_<VmModule>(m, "VmModule")
       .def_static("from_flatbuffer", &VmModule::FromFlatbufferBlob)
diff --git a/bindings/python/pyiree/vm.h b/bindings/python/pyiree/vm.h
index 8d902c0..10e7508 100644
--- a/bindings/python/pyiree/vm.h
+++ b/bindings/python/pyiree/vm.h
@@ -17,6 +17,7 @@
 
 #include "absl/types/optional.h"
 #include "bindings/python/pyiree/binding.h"
+#include "bindings/python/pyiree/host_types.h"
 #include "bindings/python/pyiree/rt.h"
 #include "iree/base/api.h"
 #include "iree/vm/api.h"
@@ -26,6 +27,8 @@
 namespace iree {
 namespace python {
 
+class FunctionAbi;
+
 //------------------------------------------------------------------------------
 // Retain/release bindings
 //------------------------------------------------------------------------------
@@ -57,6 +60,45 @@
 };
 
 //------------------------------------------------------------------------------
+// VmVariantList
+//------------------------------------------------------------------------------
+
+class VmVariantList {
+ public:
+  VmVariantList() : list_(nullptr) {}
+  ~VmVariantList() {
+    if (list_) {
+      CheckApiStatus(iree_vm_variant_list_free(list_), "Error freeing list");
+    }
+  }
+
+  VmVariantList(VmVariantList&& other) {
+    list_ = other.list_;
+    other.list_ = nullptr;
+  }
+
+  VmVariantList& operator=(const VmVariantList&) = delete;
+  VmVariantList(const VmVariantList&) = delete;
+
+  static VmVariantList Create(iree_host_size_t capacity) {
+    iree_vm_variant_list_t* list;
+    CheckApiStatus(
+        iree_vm_variant_list_alloc(capacity, IREE_ALLOCATOR_SYSTEM, &list),
+        "Error allocating variant list");
+    return VmVariantList(list);
+  }
+
+  iree_host_size_t size() const { return iree_vm_variant_list_size(list_); }
+
+  iree_vm_variant_list_t* raw_ptr() { return list_; }
+  const iree_vm_variant_list_t* raw_ptr() const { return list_; }
+
+ private:
+  VmVariantList(iree_vm_variant_list_t* list) : list_(list) {}
+  iree_vm_variant_list_t* list_;
+};
+
+//------------------------------------------------------------------------------
 // ApiRefCounted types
 //------------------------------------------------------------------------------
 
@@ -88,6 +130,15 @@
 
   // Unique id for this context.
   int context_id() const { return iree_vm_context_id(raw_ptr()); }
+
+  // Synchronously invokes the given function.
+  void Invoke(iree_vm_function_t f, VmVariantList& inputs,
+              VmVariantList& outputs);
+
+  // Creates a function ABI suitable for marshalling function inputs/results.
+  std::unique_ptr<FunctionAbi> CreateFunctionAbi(
+      HalDevice& device, std::shared_ptr<HostTypeFactory> host_type_factory,
+      iree_vm_function_t f);
 };
 
 class VmInvocation : public ApiRefCounted<VmInvocation, iree_vm_invocation_t> {
@@ -95,42 +146,6 @@
 
 void SetupVmBindings(pybind11::module m);
 
-//------------------------------------------------------------------------------
-// VmVariantList
-//------------------------------------------------------------------------------
-
-class VmVariantList {
- public:
-  VmVariantList() : list_(nullptr) {}
-  ~VmVariantList() {
-    if (list_) {
-      CheckApiStatus(iree_vm_variant_list_free(list_), "Error freeing list");
-    }
-  }
-
-  VmVariantList(VmVariantList&& other) {
-    list_ = other.list_;
-    other.list_ = nullptr;
-  }
-
-  VmVariantList& operator=(const VmVariantList&) = delete;
-  VmVariantList(const VmVariantList&) = delete;
-
-  static VmVariantList Create(iree_host_size_t capacity) {
-    iree_vm_variant_list_t* list;
-    CheckApiStatus(
-        iree_vm_variant_list_alloc(capacity, IREE_ALLOCATOR_SYSTEM, &list),
-        "Error allocating variant list");
-    return VmVariantList(list);
-  }
-
-  iree_host_size_t size() const { return iree_vm_variant_list_size(list_); }
-
- private:
-  VmVariantList(iree_vm_variant_list_t* list) : list_(list) {}
-  iree_vm_variant_list_t* list_;
-};
-
 }  // namespace python
 }  // namespace iree
 
diff --git a/bindings/python/pyiree/vm_test.py b/bindings/python/pyiree/vm_test.py
index 3302907..b7e804f 100644
--- a/bindings/python/pyiree/vm_test.py
+++ b/bindings/python/pyiree/vm_test.py
@@ -16,6 +16,7 @@
 # pylint: disable=unused-variable
 
 from absl.testing import absltest
+import numpy as np
 import pyiree
 
 
@@ -43,6 +44,7 @@
     cls.driver = pyiree.binding.hal.HalDriver.create("vulkan")
     cls.device = cls.driver.create_default_device()
     cls.hal_module = pyiree.binding.vm.create_hal_module(cls.device)
+    cls.htf = pyiree.binding.host_types.HostTypeFactory.get_numpy()
 
   def test_variant_list(self):
     l = pyiree.binding.vm.VmVariantList(5)
@@ -77,6 +79,29 @@
         instance, modules=[self.hal_module, m])
     print(context)
 
+  def test_invoke_function(self):
+    m = create_simple_mul_module()
+    instance = pyiree.binding.vm.VmInstance()
+    context = pyiree.binding.vm.VmContext(
+        instance, modules=[self.hal_module, m])
+    f = m.lookup_function("simple_mul")
+    abi = context.create_function_abi(self.device, self.htf, f)
+    print("INVOKING:", abi)
+    arg0 = np.array([1., 2., 3., 4.], dtype=np.float32)
+    arg1 = np.array([4., 5., 6., 7.], dtype=np.float32)
+    inputs = abi.raw_pack_inputs((arg0, arg1))
+    print("INPUTS:", inputs)
+    allocated_results = abi.allocate_results(inputs)
+    print("ALLOCATED RESULTS:", allocated_results)
+    print("--- INVOKE:")
+    context.invoke(f, inputs.to_vm_variant_list(),
+                   allocated_results.to_vm_variant_list())
+    print("--- DONE.")
+    results = abi.raw_unpack_results(allocated_results)
+    print("RESULTS:", results)
+    # TODO(laurenzo): Results are coming back all zero. Diagnose and uncomment.
+    # np.testing.assert_allclose(results[0], [4., 10., 18., 28.])
+
 
 if __name__ == "__main__":
   absltest.main()