Make iree.compiler.api.Output.map_memory() retain its backing reference. (#15975)

Depending on the precise structure of the code and state of the garbage
collector, this could cause very long-range crashes that appeared to
have nothing to do with the allocation mechanism.

* Adds a retaining finalizer to the map_memory() pointer return.
* Adds a test that grinds two patterns that I found which produce bugs.
* Fixes VmModule::WrapBuffer to close over its Py_buffer. I noticed this
when auditing and it is subtly wrong: it will not break in this specific
case but in cases where the backing memory has to be copied or
re-organized, it could result in a use-after-free.

Fixes #15407
diff --git a/runtime/bindings/python/vm.cc b/runtime/bindings/python/vm.cc
index a5c6859..80beb5c 100644
--- a/runtime/bindings/python/vm.cc
+++ b/runtime/bindings/python/vm.cc
@@ -271,7 +271,28 @@
 VmModule VmModule::WrapBuffer(VmInstance* instance, py::object buffer_obj,
                               py::object destroy_callback, bool close_buffer) {
   IREE_TRACE_SCOPE_NAMED("VmModule::FromAlignedMemory");
-  PyBufferRequest buffer_info(buffer_obj, PyBUF_SIMPLE);
+  // State object that is retained for the life of the module.
+  // It is responsible for keeping the backing resources alive and
+  // holding the user-level destroy callback.
+  // Note that the original buffer_obj is not captured explicitly but
+  // is available as part of the Py_buffer underlying the PyBufferRequest.
+  // Aside from being more efficient, avoiding redundant capture removes
+  // destruction race potential.
+  struct BufferState {
+    BufferState(py::object buffer_obj, py::object destroy_callback,
+                bool close_buffer)
+        : buffer_info(buffer_obj, PyBUF_SIMPLE),
+          destroy_callback(std::move(destroy_callback)),
+          close_buffer(close_buffer) {}
+    PyBufferRequest buffer_info;
+    py::object destroy_callback;
+    bool close_buffer;
+
+    py::handle get_buffer() { return py::handle(buffer_info.view().obj); }
+  };
+  BufferState* state =
+      new BufferState(buffer_obj, destroy_callback, close_buffer);
+  PyBufferRequest& buffer_info = state->buffer_info;
   if (!iree_host_size_has_alignment((uintptr_t)buffer_info.view().buf,
                                     IREE_HAL_HEAP_BUFFER_ALIGNMENT)) {
     std::stringstream err;
@@ -282,33 +303,33 @@
   }
 
   iree_vm_module_t* module = nullptr;
-
-  // Bridge to the C-based deallocator API.
-  struct DeallocateState {
-    DeallocateState(py::object buffer_obj, py::object destroy_callback,
-                    bool close_buffer)
-        : buffer_obj(std::move(buffer_obj)),
-          destroy_callback(std::move(destroy_callback)),
-          close_buffer(close_buffer) {}
-    py::object buffer_obj;
-    py::object destroy_callback;
-    bool close_buffer;
-  };
-  DeallocateState* state =
-      new DeallocateState(buffer_obj, destroy_callback, close_buffer);
   auto ctl_fn = +([](void* self, iree_allocator_command_t command,
                      const void* params, void** inout_ptr) {
     py::gil_scoped_acquire gil;
     assert(command == IREE_ALLOCATOR_COMMAND_FREE);
     try {
-      DeallocateState* state = static_cast<DeallocateState*>(self);
+      // Destruction sequencing is tricky. We must have released the
+      // PyBufferRequest before calling close, so we first get what we
+      // need out of the state into local variables, then delete the state
+      // (releasing the PyBufferRequest), then closing and issuing the
+      // destroy callback. Getting the order wrong will result in an
+      // unrecoverable exception indicating the the buffer cannot be closed
+      // with outstanding mappings.
+      BufferState* state = static_cast<BufferState*>(self);
+      py::object destroy_callback = std::move(state->destroy_callback);
+      py::object buffer_to_close;
       if (state->close_buffer) {
-        state->buffer_obj.attr("close")();
-      }
-      if (!state->destroy_callback.is_none()) {
-        state->destroy_callback();
+        buffer_to_close = py::borrow(state->get_buffer());
       }
       delete state;
+
+      if (buffer_to_close) {
+        buffer_to_close.attr("close")();
+      }
+
+      if (!destroy_callback.is_none()) {
+        destroy_callback();
+      }
     } catch (std::exception& e) {
       // There are many situations where deallocation exceptions can be
       // swallowed, so carp loudly. This is almost always a critical issue