Adds memory mapping and alignment controls to VmModule construction. (#14153)
Adds several new VmModule static constructors:
* mmap: Creates a VmModule from a memory mapped file in a platform
specific way that works on both Posix and Windows.
* wrap_buffer: Creates a VmModule from an aligned Python buffer.
* copy_buffer: Creates a VmModule from an arbitrary Python buffer,
always making a copy into aligned storage.
* from_buffer: Create a VmModule from an arbitrary Python buffer, making
an aligned copy if needed.
The existing `from_flatbuffer` is now just an alias to `from_buffer` and
its behavior is changed so that if given an unaligned buffer, it will be
copied (and a warning issued by default).
Fixes #13887
diff --git a/runtime/bindings/python/vm.cc b/runtime/bindings/python/vm.cc
index 79db95b..7a8e0dd 100644
--- a/runtime/bindings/python/vm.cc
+++ b/runtime/bindings/python/vm.cc
@@ -8,6 +8,7 @@
#include "./status_utils.h"
#include "iree/base/api.h"
+
// TODO: We shouldn't need the HAL API but it is used for direct printing
// summaries of HAL objects in lists. We should have a better way of doing this
// dynamically vs hard depending on a type switch here.
@@ -16,11 +17,85 @@
#include "iree/vm/api.h"
#include "pybind11/numpy.h"
+using namespace pybind11::literals;
+
namespace iree {
namespace python {
namespace {
+static const char kFromBufferDocstring[] =
+ R"(Creates a Vmmodule from a Python buffer.
+
+This is intended as a quick and dirty way to instantiate a VmModule from
+a binary blob. It will implicitly make a copy if alignment is not sufficient.
+
+It is recommended to use one of the other construction methods for maximum
+determinism and efficiency:
+
+* `mmap` : To memory map from a file.
+* `wrap_buffer` : To directly wrap a Python buffer that is known to be
+ aligned properly.
+* `copy_buffer` : To always make a copy of a Python buffer such that it is
+ aligned properly.
+
+This was historically called `from_flatbuffer`. It is recommended that new
+code use `flat_buffer`.
+
+Args:
+ instance: A VmInstance.
+ buffer: An object implementing the Python buffer protocol. Typically a
+ bytes, bytearray, memoryview, etc.
+ warn_if_copy: Raises a warning if alignment is not sufficient to use the
+ buffer directly, resulting in a copy. Defaults to True.
+)";
+
+static const char kCopyBufferDocstring[] =
+ R"(Creates a VmModule by making a copy of a Python buffer.
+
+Args:
+ instance: A VmInstance.
+ buffer: An object implementing the Python buffer protocol. Typically a
+ bytes, bytearray, memoryview, etc.
+)";
+
+static const char kWrapBufferDocstring[] =
+ R"(Creates a VmModule by directly using the backing memory of a Python buffer.
+
+Args:
+ instance: A VmInstance.
+ buffer: An object implementing the Python buffer protocol. Typically a
+ bytes, bytearray, memoryview, etc.
+ destroy_callback: A no-argument callback that is invoked when the backing
+ buffer is no longer in use.
+ close_buffer: Whether to call the `close` method on the `buffer` (prior to
+ invoking `destroy_callback`). Defaults to False.
+
+Raises:
+ ValueError if alignment is not satisfied.
+)";
+
+static const char kMMapDocstring[] =
+ R"(Create a VmModule by mmap'ing a file.
+
+When backed by a file, this is generally the most effective way to create a
+VmModule. Especially for large modules, this will result in the fewest
+copies and the most effective use of the system cache across invocations.
+
+Note that mmap behavior differs between Posix and Windows. Whereas the former
+will allow the backing file to be open before an mmap call and deleted
+immediately after, Windows generally allows neither. For compatibility,
+make sure that the backing file is not open for writing before calling this
+method and that if it needs to be deleted when done, that is done in a
+`destroy_callback`.
+
+Args:
+ instance: A VmInstance.
+ filepath: Path to the file on the file system.
+ destroy_callback: A no-argument callback that is invoked when the backing
+ buffer is no longer in use.
+)";
+
// RAII wrapper for a Py_buffer which calls PyBuffer_Release when it goes
// out of scope.
class PyBufferReleaser {
@@ -150,24 +225,107 @@
return py_module;
}
-VmModule VmModule::FromFlatbufferBlob(VmInstance* instance,
- py::object flatbuffer_blob_object) {
- IREE_TRACE_SCOPE_NAMED("VmModule::FromFlatbufferBlob");
- auto flatbuffer_blob = py::cast<py::buffer>(flatbuffer_blob_object);
- auto buffer_info = flatbuffer_blob.request();
+VmModule VmModule::MMap(VmInstance* instance, std::string filepath,
+ py::object destroy_callback) {
+ IREE_TRACE_SCOPE_NAMED("VmModule::MMap");
+ auto mmap_module = py::module::import("mmap");
+ auto open_func = py::module::import("io").attr("open");
+ auto file_obj = open_func(filepath, "r+b");
+ // The signature of mmap is different on Windows vs others. On others,
+ // we use explicit flags and protection attributes for better control,
+ // triggering off of the presence of the MAP_SHARED flag constant (which
+ // is not present on Windows).
+ py::object mapped_file;
+ if (py::hasattr(mmap_module, "MAP_SHARED")) {
+ // Posix mmap signature.
+ auto flags = py::cast<int64_t>(mmap_module.attr("MAP_SHARED"));
+ // MAP_POPULATE isn't available on all versions/platforms.
+ if (py::hasattr(mmap_module, "MAP_POPULATE")) {
+ flags |= py::cast<int64_t>(mmap_module.attr("MAP_POPULATE"));
+ }
+ auto prot = py::cast<int64_t>(mmap_module.attr("PROT_READ"));
+ mapped_file = mmap_module.attr("mmap")(file_obj.attr("fileno")(), 0,
+ "flags"_a = flags, "prot"_a = prot);
+ } else {
+ // Windows mmap signature.
+ mapped_file =
+ mmap_module.attr("mmap")(file_obj.attr("fileno")(), 0,
+ "access"_a = mmap_module.attr("ACCESS_READ"));
+ }
+ // Backing file can be closed after a successful mmap call.
+ file_obj.attr("close")();
+
+ // MADV_RANDOM is not available on Windows (and possibly others?).
+ if (py::hasattr(mmap_module, "MADV_RANDOM")) {
+ mapped_file.attr("madvise")(mmap_module.attr("MADV_RANDOM"));
+ }
+ return WrapBuffer(instance, std::move(mapped_file),
+ std::move(destroy_callback),
+ /*close_buffer=*/true);
+}
+
+VmModule VmModule::WrapBuffer(VmInstance* instance, py::object buffer_obj,
+ py::object destroy_callback, bool close_buffer) {
+ IREE_TRACE_SCOPE_NAMED("VmModule::FromAlignedMemory");
+ auto py_buffer = py::cast<py::buffer>(buffer_obj);
+ auto buffer_info = py_buffer.request();
+ if (!iree_host_size_has_alignment((uintptr_t)buffer_info.ptr,
+ IREE_HAL_HEAP_BUFFER_ALIGNMENT)) {
+ std::stringstream err;
+ err << "VmModule.from_aligned_memory received an unaligned buffer. ";
+ err << "Got 0x" << (void*)buffer_info.ptr << ", expected alignment ";
+ err << IREE_HAL_HEAP_BUFFER_ALIGNMENT;
+ throw std::invalid_argument(err.str());
+ }
+
iree_vm_module_t* module = nullptr;
// Bridge to the C-based deallocator API.
- PyObject* pyobject_ptr = flatbuffer_blob_object.ptr();
+ struct DeallocateState {
+ DeallocateState(py::object buffer_obj, py::object destroy_callback,
+ bool close_buffer)
+ : buffer_obj(std::move(buffer_obj)),
+ destroy_callback(std::move(destroy_callback)),
+ close_buffer(close_buffer) {}
+ py::object buffer_obj;
+ py::object destroy_callback;
+ bool close_buffer;
+ };
+ DeallocateState* state =
+ new DeallocateState(buffer_obj, destroy_callback, close_buffer);
auto ctl_fn = +([](void* self, iree_allocator_command_t command,
const void* params, void** inout_ptr) {
+ py::gil_scoped_acquire gil;
assert(command == IREE_ALLOCATOR_COMMAND_FREE);
- PyObject* pyobject_ptr = static_cast<PyObject*>(self);
- Py_XDECREF(pyobject_ptr);
+ try {
+ DeallocateState* state = static_cast<DeallocateState*>(self);
+ if (state->close_buffer) {
+ state->buffer_obj.attr("close")();
+ }
+ if (!state->destroy_callback.is_none()) {
+ state->destroy_callback();
+ }
+ delete state;
+ } catch (std::exception& e) {
+ // There are many situations where deallocation exceptions can be
+ // swallowed, so carp loudly. This is almost always a critical issue
+ // that needs to be visible.
+ fprintf(
+ stderr,
+ "error: exception raised while deallocating storage for an "
+ "iree.runtime.VmModule. This is unrecoverable and likely indicates a "
+ "serious problem, minimally resulting in memory leaks: %s",
+ e.what());
+ return iree_make_status(
+ IREE_STATUS_UNKNOWN,
+ "exception raised while deallocating storage for an "
+ "iree.runtime.VmModule. This is unrecoverable and likely indicates a "
+ "serious problem, minimally resulting in memory leaks: %s",
+ e.what());
+ }
return iree_ok_status();
});
- Py_XINCREF(pyobject_ptr);
- iree_allocator_t deallocator{/*self=*/pyobject_ptr, /*ctl=*/ctl_fn};
+ iree_allocator_t deallocator{/*self=*/state, /*ctl=*/ctl_fn};
auto status = iree_vm_bytecode_module_create(
instance->raw_ptr(),
@@ -175,15 +333,73 @@
static_cast<iree_host_size_t>(buffer_info.size)},
deallocator, iree_allocator_system(), &module);
if (!iree_status_is_ok(status)) {
- Py_XDECREF(pyobject_ptr);
+ delete state;
}
- CheckApiStatus(status, "Error creating vm module from FlatBuffer");
+ CheckApiStatus(status, "Error creating vm module from aligned memory");
auto py_module = VmModule::StealFromRawPtr(module);
- py_module.stashed_flatbuffer_blob = flatbuffer_blob_object;
+ // Stash a reference to the flatbuffer at the Python instance level. This
+ // is exposed to the tracing API, allowing it to get at the backing contents.
+ py_module.stashed_flatbuffer_blob = buffer_obj;
return py_module;
}
+VmModule VmModule::CopyBuffer(VmInstance* instance, py::object buffer_obj) {
+ IREE_TRACE_SCOPE_NAMED("VmModule::CopyBuffer");
+ auto alignment =
+ py::cast<uintptr_t>(py::module::import("mmap").attr("PAGESIZE"));
+ auto bytearray_ctor = py::module::import("builtins").attr("bytearray");
+ auto src_buffer = py::cast<py::buffer>(buffer_obj);
+ auto src_buffer_info = src_buffer.request();
+ py::ssize_t src_buffer_size = src_buffer_info.itemsize * src_buffer_info.size;
+
+ // Need to allocate an extra page because there is no control at the Python
+ // level for the alignment it may have.
+ auto dst_buffer =
+ py::cast<py::buffer>(bytearray_ctor(src_buffer_size + alignment));
+ auto dst_buffer_info = dst_buffer.request();
+ void* dst_aligned =
+ (void*)iree_host_align((uintptr_t)dst_buffer_info.ptr, alignment);
+ uintptr_t dst_offset =
+ (uintptr_t)dst_aligned - (uintptr_t)dst_buffer_info.ptr;
+
+ // Now create a memoryview over the unaligned bytearray and slice into that
+ // to get the aligned Python buffer.
+ auto dst_slice = py::slice(dst_offset, dst_offset + src_buffer_size, 1);
+ py::object dst_view = py::memoryview(dst_buffer);
+ py::object dst_view_aligned = dst_view[dst_slice];
+
+ // If any of the indexing math was wrong, Python exceptions will be raised
+ // above, so this is implicitly guarding the memcpy if it is done last.
+ std::memcpy(dst_aligned, src_buffer_info.ptr, src_buffer_size);
+ return WrapBuffer(instance, std::move(dst_view_aligned),
+ /*destroy_callback=*/py::none(),
+ /*close_buffer=*/false);
+}
+
+VmModule VmModule::FromBuffer(VmInstance* instance, py::object buffer_obj,
+ bool warn_if_copy) {
+ IREE_TRACE_SCOPE_NAMED("VmModule::FromBuffer");
+ auto py_buffer = py::cast<py::buffer>(buffer_obj);
+ auto buffer_info = py_buffer.request();
+ if (iree_host_size_has_alignment((uintptr_t)buffer_info.ptr,
+ IREE_HAL_HEAP_BUFFER_ALIGNMENT)) {
+ return WrapBuffer(instance, std::move(buffer_obj),
+ /*destroy_callback=*/py::none(), /*close_buffer=*/false);
+ } else {
+ if (warn_if_copy) {
+ py::module::import("warnings")
+ .attr("warn")(
+ "Making copy of unaligned VmModule buffer. It is recommended to "
+ "make this deterministic by calling `copy_buffer` to always make "
+ "a copy or `mmap` to efficiently load from a file. This warning "
+ "can be silenced by adding `warn_if_copy=False` to "
+ "`from_buffer`");
+ }
+ return CopyBuffer(instance, std::move(buffer_obj));
+ }
+}
+
std::optional<iree_vm_function_t> VmModule::LookupFunction(
const std::string& name, iree_vm_function_linkage_t linkage) {
iree_vm_function_t f;
@@ -661,7 +877,20 @@
py::class_<VmModule>(m, "VmModule")
.def_static("resolve_module_dependency",
&VmModule::ResolveModuleDependency)
- .def_static("from_flatbuffer", &VmModule::FromFlatbufferBlob)
+ .def_static("from_flatbuffer", &VmModule::FromBuffer, py::arg("instance"),
+ py::arg("buffer"), py::arg("warn_if_copy") = true,
+ kFromBufferDocstring)
+ .def_static("from_buffer", &VmModule::FromBuffer, py::arg("instance"),
+ py::arg("buffer"), py::arg("warn_if_copy") = true,
+ kFromBufferDocstring)
+ .def_static("copy_buffer", &VmModule::CopyBuffer, py::arg("instance"),
+ py::arg("buffer"), kCopyBufferDocstring)
+ .def_static("wrap_buffer", &VmModule::WrapBuffer, py::arg("instance"),
+ py::arg("buffer"), py::arg("destroy_callback") = py::none(),
+ py::arg("close_buffer") = false, kWrapBufferDocstring)
+ .def_static("mmap", &VmModule::MMap, py::arg("instance"),
+ py::arg("filepath"), py::arg("destroy_callback") = py::none(),
+ kMMapDocstring)
.def_property_readonly("name", &VmModule::name)
.def_property_readonly("version",
[](VmModule& self) {