Make iree.compiler.api.Output.map_memory() retain its backing reference. (#15975)
Depending on the precise structure of the code and state of the garbage
collector, this could cause very long-range crashes that appeared to
have nothing to do with the allocation mechanism.
* Adds a retaining finalizer to the map_memory() pointer return.
* Adds a test that grinds two patterns that I found which produce bugs.
* Fixes VmModule::WrapBuffer to close over its Py_buffer. I noticed this
when auditing and it is subtly wrong: it will not break in this specific
case but in cases where the backing memory has to be copied or
re-organized, it could result in a use-after-free.
Fixes #15407
diff --git a/runtime/bindings/python/vm.cc b/runtime/bindings/python/vm.cc
index a5c6859..80beb5c 100644
--- a/runtime/bindings/python/vm.cc
+++ b/runtime/bindings/python/vm.cc
@@ -271,7 +271,28 @@
VmModule VmModule::WrapBuffer(VmInstance* instance, py::object buffer_obj,
py::object destroy_callback, bool close_buffer) {
IREE_TRACE_SCOPE_NAMED("VmModule::FromAlignedMemory");
- PyBufferRequest buffer_info(buffer_obj, PyBUF_SIMPLE);
+ // State object that is retained for the life of the module.
+ // It is responsible for keeping the backing resources alive and
+ // holding the user-level destroy callback.
+ // Note that the original buffer_obj is not captured explicitly but
+ // is available as part of the Py_buffer underlying the PyBufferRequest.
+ // Aside from being more efficient, avoiding redundant capture removes
+ // destruction race potential.
+ struct BufferState {
+ BufferState(py::object buffer_obj, py::object destroy_callback,
+ bool close_buffer)
+ : buffer_info(buffer_obj, PyBUF_SIMPLE),
+ destroy_callback(std::move(destroy_callback)),
+ close_buffer(close_buffer) {}
+ PyBufferRequest buffer_info;
+ py::object destroy_callback;
+ bool close_buffer;
+
+ py::handle get_buffer() { return py::handle(buffer_info.view().obj); }
+ };
+ BufferState* state =
+ new BufferState(buffer_obj, destroy_callback, close_buffer);
+ PyBufferRequest& buffer_info = state->buffer_info;
if (!iree_host_size_has_alignment((uintptr_t)buffer_info.view().buf,
IREE_HAL_HEAP_BUFFER_ALIGNMENT)) {
std::stringstream err;
@@ -282,33 +303,33 @@
}
iree_vm_module_t* module = nullptr;
-
- // Bridge to the C-based deallocator API.
- struct DeallocateState {
- DeallocateState(py::object buffer_obj, py::object destroy_callback,
- bool close_buffer)
- : buffer_obj(std::move(buffer_obj)),
- destroy_callback(std::move(destroy_callback)),
- close_buffer(close_buffer) {}
- py::object buffer_obj;
- py::object destroy_callback;
- bool close_buffer;
- };
- DeallocateState* state =
- new DeallocateState(buffer_obj, destroy_callback, close_buffer);
auto ctl_fn = +([](void* self, iree_allocator_command_t command,
const void* params, void** inout_ptr) {
py::gil_scoped_acquire gil;
assert(command == IREE_ALLOCATOR_COMMAND_FREE);
try {
- DeallocateState* state = static_cast<DeallocateState*>(self);
+ // Destruction sequencing is tricky. We must have released the
+ // PyBufferRequest before calling close, so we first get what we
+ // need out of the state into local variables, then delete the state
+ // (releasing the PyBufferRequest), then closing and issuing the
+ // destroy callback. Getting the order wrong will result in an
+ // unrecoverable exception indicating the the buffer cannot be closed
+ // with outstanding mappings.
+ BufferState* state = static_cast<BufferState*>(self);
+ py::object destroy_callback = std::move(state->destroy_callback);
+ py::object buffer_to_close;
if (state->close_buffer) {
- state->buffer_obj.attr("close")();
- }
- if (!state->destroy_callback.is_none()) {
- state->destroy_callback();
+ buffer_to_close = py::borrow(state->get_buffer());
}
delete state;
+
+ if (buffer_to_close) {
+ buffer_to_close.attr("close")();
+ }
+
+ if (!destroy_callback.is_none()) {
+ destroy_callback();
+ }
} catch (std::exception& e) {
// There are many situations where deallocation exceptions can be
// swallowed, so carp loudly. This is almost always a critical issue