Enabling split rodata blobs in VMFB archives. This changes the VMFB files to contain a length prefix so that the end of the module flatbuffer can be calculated and trailing rodata can be located. With this the module flatbuffer is still limited to 2GB but any amount of data may follow as all offsets are tracked as 64-bit values. In order to make this work the polyglot zip emitter had to be reworked as to not mess with the required rodata alignments. Since big file-like blobs that would show in the zip are now stored outside the flatbuffer there's also no need to do the crazy embedded zip headers and we can now put a local file header in front of the module flatbuffer and show that in the zip. A few bugs and compatibility issues were worked out and now the vmfb files can be directly detected as zips by `file` and other tools and everything I've tried (`unzip`, `7z`, Windows, etc) can open them.

commit: 9a286397f776542f9da23b03eaea5be0d5ed870d [log] [tgz]
author: Ben Vanik <ben.vanik@gmail.com> Fri May 20 19:45:23 2022 -0700
committer: Ben Vanik <ben.vanik@gmail.com> Thu May 26 14:22:29 2022 -0700
tree: 70d82d72429adc51d66b2ba123e492868976d287
parent: 41de9e65fafc3439e2759d060a811b8de2fbaa49 [diff]
diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/ArchiveWriter.cpp b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/ArchiveWriter.cpp
new file mode 100644
index 0000000..edf5f9f
--- /dev/null
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/ArchiveWriter.cpp

@@ -0,0 +1,682 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/compiler/Dialect/VM/Target/Bytecode/ArchiveWriter.h"
+
+#include "iree/compiler/Dialect/Util/IR/UtilTypes.h"
+#include "iree/schemas/bytecode_module_def_json_printer.h"
+#include "llvm/Support/CRC.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace mlir {
+namespace iree_compiler {
+namespace IREE {
+namespace VM {
+
+// Alignment applied to each segment of the archive.
+// All embedded file contents (FlatBuffers, rodata, etc) are aligned to this
+// boundary.
+static constexpr unsigned kArchiveSegmentAlignment = 64;
+
+//====---------------------------------------------------------------------===//
+// JSONArchiveWriter
+//====---------------------------------------------------------------------===//
+
+JSONArchiveWriter::JSONArchiveWriter(Location loc, llvm::raw_ostream &os)
+    : loc(loc), os(os) {}
+
+JSONArchiveWriter::~JSONArchiveWriter() { os.flush(); }
+
+ArchiveWriter::File JSONArchiveWriter::declareFile(
+    std::string fileName, uint64_t fileAlignment, uint64_t fileLength,
+    std::function<LogicalResult(llvm::raw_ostream &os)> write) {
+  File file;
+  file.fileName = std::move(fileName);
+  file.relativeOffset = 0;
+  file.fileLength = fileLength;
+  return file;
+}
+
+LogicalResult JSONArchiveWriter::flush(FlatbufferBuilder &fbb) {
+  // Write the FlatBuffer contents out.
+  if (failed(fbb.printJsonToStream(/*pretty=*/true,
+                                   /*includeDefaults=*/false,
+                                   bytecode_module_def_print_json, os))) {
+    return mlir::emitError(loc)
+           << "failed to print FlatBuffer emitter contents to output "
+              "stream - possibly out of memory, possibly unprintable "
+              "structure";
+  }
+  os.flush();
+  return success();
+}
+
+//====---------------------------------------------------------------------===//
+// FlatArchiveWriter
+//====---------------------------------------------------------------------===//
+
+FlatArchiveWriter::FlatArchiveWriter(Location loc, llvm::raw_ostream &os)
+    : loc(loc), os(os) {}
+
+FlatArchiveWriter::~FlatArchiveWriter() { os.flush(); }
+
+ArchiveWriter::File FlatArchiveWriter::declareFile(
+    std::string fileName, uint64_t fileAlignment, uint64_t fileLength,
+    std::function<LogicalResult(llvm::raw_ostream &os)> write) {
+  File file;
+  file.fileName = std::move(fileName);
+  file.relativeOffset = IREE::Util::align(tailFileOffset, fileAlignment);
+  tailFileOffset = file.relativeOffset + fileLength;
+  file.fileLength = fileLength;
+  file.write = std::move(write);
+  files.push_back(file);
+  return file;
+}
+
+LogicalResult FlatArchiveWriter::flush(FlatbufferBuilder &fbb) {
+  // Write the FlatBuffer contents out.
+  if (failed(fbb.copyToStream(os))) {
+    return mlir::emitError(loc)
+           << "failed to copy FlatBuffer emitter contents to the output stream "
+              "- possibly out of memory or storage";
+  }
+
+  // Pad out to the start of the external rodata segment.
+  // This ensures we begin writing at an aligned offset; all relative offsets
+  // in the embedded files assume this.
+  uint64_t baseOffset = os.tell();
+  uint64_t basePadding =
+      IREE::Util::align(baseOffset, kArchiveSegmentAlignment) - baseOffset;
+  os.write_zeros(basePadding);
+  baseOffset = os.tell();
+
+  // Flush all files.
+  for (auto &file : files) {
+    // Pad out with zeros to the start of the file.
+    // Compute padding bytes required to align the file contents.
+    unsigned filePadding = static_cast<unsigned>(
+        baseOffset + file.relativeOffset + file.prefixLength - os.tell());
+    os.write_zeros(filePadding);
+
+    // Issue the callback to write the file to the stream at the current offset.
+    if (failed(file.write(os))) {
+      return mlir::emitError(loc)
+             << "failed to write embedded file to the output stream - possibly "
+                "out of memory or storage (file size: "
+             << file.fileLength << ")";
+    }
+  }
+
+  os.flush();
+  return success();
+}
+
+//====---------------------------------------------------------------------===//
+// ZIP data structures
+//====---------------------------------------------------------------------===//
+// These come from the ZIP APPNOTE.TXT:
+// https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
+// ZIP is not a good file format.
+//
+// We always use the 64-bit extended ZIP format (aka zip64 aka v4.5) for
+// simplicity. It's basically the same as basic ZIP but with even more sharp
+// edges: some fields may optionally be sentinel values (UINT32_MAX) to indicate
+// that their actual values are stored in a separate data structure, while in
+// other cases entirely new structures are used. The weirdest one is that the
+// 32-bit zip header must exist with bogus values _as well as_ the 64-bit one.
+
+namespace {
+using llvm::support::ulittle16_t;
+using llvm::support::ulittle32_t;
+using llvm::support::ulittle64_t;
+}  // namespace
+
+LLVM_PACKED_START
+
+struct ZIPLocalFileHeader {
+  ulittle32_t signature;  // 0x04034B50
+  ulittle16_t versionToExtract;
+  ulittle16_t generalPurposeFlag;
+  ulittle16_t compressionMethod;
+  ulittle16_t lastModifiedTime;
+  ulittle16_t lastModifiedDate;
+  ulittle32_t crc32;
+  ulittle32_t compressedSize;
+  ulittle32_t uncompressedSize;
+  ulittle16_t fileNameLength;
+  ulittle16_t extraFieldLength;
+  // file name (variable size)
+  // extra field (variable size)
+};
+static_assert(sizeof(ZIPLocalFileHeader) == 30, "bad packing");
+
+struct ZIP64DataDescriptor {
+  ulittle32_t signature;  // 0x08074B50
+  ulittle32_t crc32;
+  ulittle64_t compressedSize;
+  ulittle64_t uncompressedSize;
+};
+static_assert(sizeof(ZIP64DataDescriptor) == 24, "bad packing");
+
+struct ZIPExtraFieldHeader {
+  ulittle16_t id;
+  ulittle16_t size;
+};
+static_assert(sizeof(ZIPExtraFieldHeader) == 4, "bad packing");
+
+struct ZIP64LocalExtraField {
+  ZIPExtraFieldHeader header;
+  ulittle64_t uncompressedSize;
+  ulittle64_t compressedSize;
+};
+static_assert(sizeof(ZIP64LocalExtraField) == 20, "bad packing");
+
+struct ZIPCentralDirectoryRecord {
+  ulittle32_t signature;  // 0x02014B50
+  ulittle16_t versionMadeBy;
+  ulittle16_t versionToExtract;
+  ulittle16_t generalPurposeFlags;
+  ulittle16_t compressionMethod;
+  ulittle16_t lastModifiedTime;
+  ulittle16_t lastModifiedDate;
+  ulittle32_t crc32;
+  ulittle32_t compressedSize;
+  ulittle32_t uncompressedSize;
+  ulittle16_t fileNameLength;
+  ulittle16_t extraFieldLength;
+  ulittle16_t fileCommentLength;
+  ulittle16_t diskStartNumber;
+  ulittle16_t internalFileAttributes;
+  ulittle32_t externalFileAttributes;
+  ulittle32_t localHeaderOffset;
+  // file name (variable size)
+  // extra field (variable size)
+  // file comment (variable size)
+};
+static_assert(sizeof(ZIPCentralDirectoryRecord) == 46, "bad packing");
+
+struct ZIP64CentralExtraField {
+  ZIPExtraFieldHeader header;
+  ulittle64_t uncompressedSize;
+  ulittle64_t compressedSize;
+  ulittle64_t localHeaderOffset;
+};
+static_assert(sizeof(ZIP64CentralExtraField) == 28, "bad packing");
+
+struct ZIPEndOfCentralDirectoryRecord {
+  ulittle32_t signature;  // 0x06054B50
+  ulittle16_t diskNumber;
+  ulittle16_t startDiskNumber;
+  ulittle16_t entriesOnDisk;
+  ulittle16_t entryCount;
+  ulittle32_t directorySize;
+  ulittle32_t directoryOffset;
+  ulittle16_t commentLength;
+  // comment (variable size)
+};
+static_assert(sizeof(ZIPEndOfCentralDirectoryRecord) == 22, "bad packing");
+
+struct ZIPEndOfCentralDirectoryRecord64 {
+  ulittle32_t signature;  // 0x06064B50
+  ulittle64_t sizeOfEOCD64Minus12;
+  ulittle16_t versionMadeBy;
+  ulittle16_t versionRequired;
+  ulittle32_t diskNumber;
+  ulittle32_t startDiskNumber;
+  ulittle64_t entriesOnDisk;
+  ulittle64_t entryCount;
+  ulittle64_t directorySize;
+  ulittle64_t directoryOffset;
+  // comment (variable size up to EOCD64)
+};
+static_assert(sizeof(ZIPEndOfCentralDirectoryRecord64) == 56, "bad packing");
+
+struct ZIPEndOfCentralDirectoryLocator64 {
+  ulittle32_t signature;  // 0x07064B50
+  ulittle32_t recordDiskNumber;
+  ulittle64_t recordOffset;
+  ulittle32_t diskCount;
+};
+static_assert(sizeof(ZIPEndOfCentralDirectoryLocator64) == 20, "bad packing");
+
+LLVM_PACKED_END
+
+// A ZIP file reference into the output stream.
+// This records where the contents are and enough information to build the
+// central directory.
+struct ZIPFileRef {
+  // Name of the file used within the ZIP archive.
+  std::string fileName;
+  // Offset of the local file header in the stream relative to the stream start.
+  uint64_t headerOffset;
+  // Total size, in bytes, of the uncompressed file.
+  uint64_t totalLength;
+  // CRC32 of the file.
+  uint32_t crc32;
+};
+
+// Computes the minimum length of the ZIP header we write preceeding the file.
+// This can have any alignment. The result value is only a minimum as up to 64KB
+// of padding can be added following it.
+static uint64_t computeMinHeaderLength(StringRef fileName) {
+  return sizeof(ZIPLocalFileHeader) + fileName.size() +
+         sizeof(ZIP64LocalExtraField) + sizeof(ZIPExtraFieldHeader);
+}
+
+// Appends a ZIP local file header at the current location.
+// The header is a prefix to the actual file contents. ZIP requires that the
+// payload start immediately after the header with no padding.
+static ZIPFileRef appendZIPLocalFileHeader(std::string fileName,
+                                           uint64_t filePadding,
+                                           uint64_t fileLength, uint32_t crc32,
+                                           llvm::raw_ostream &os) {
+  // Capture the header offset that will be recorded in the central directory to
+  // locate this file.
+  uint64_t headerOffset = os.tell();
+
+  // The amount of padding we need to add between the header and the file.
+  // This ensures that once the header is written it'll end immediately on the
+  // alignment boundary required by the file.
+  uint64_t interiorPadding = filePadding - computeMinHeaderLength(fileName);
+
+  // Append local file header.
+  ZIPLocalFileHeader fileHeader;
+  fileHeader.signature = 0x04034B50u;
+  fileHeader.versionToExtract = 0x2Du;  // 4.5 (for zip64)
+  fileHeader.generalPurposeFlag = 0;
+  fileHeader.compressionMethod = 0;  // COMP_STORED
+  // https://docs.microsoft.com/en-us/windows/win32/api/oleauto/nf-oleauto-dosdatetimetovarianttime
+  fileHeader.lastModifiedTime = 0u;
+  fileHeader.lastModifiedDate = 0x21;  // 1980-01-01
+  fileHeader.crc32 = crc32;
+  fileHeader.compressedSize = 0xFFFFFFFFu;    // in extra field
+  fileHeader.uncompressedSize = 0xFFFFFFFFu;  // in extra field
+  fileHeader.fileNameLength = static_cast<uint16_t>(fileName.size());
+  fileHeader.extraFieldLength = sizeof(ZIP64LocalExtraField) +
+                                sizeof(ZIPExtraFieldHeader) + interiorPadding;
+  os.write(reinterpret_cast<char *>(&fileHeader), sizeof(fileHeader));
+
+  // File name immediately follows the header with no NUL terminator.
+  os.write(fileName.data(), fileName.size());
+
+  // Interior padding field.
+  // This shouldn't be required if we just pad out the extraFieldLength but some
+  // ZIP tools ignore that field and try to parse each extra field. We do this
+  // before we do the 64-bit size extra field because some ZIP tools are so
+  // poorly written that they only ever look at the last field present for
+  // getting the size. Have I mentioned how terrible of a format ZIP is?
+  ZIPExtraFieldHeader paddingExtra;
+  paddingExtra.id = 0xFECAu;  // 'CAFE'; in the user prefix range
+  paddingExtra.size = static_cast<uint16_t>(interiorPadding);
+  os.write(reinterpret_cast<char *>(&paddingExtra), sizeof(paddingExtra));
+  os.write_zeros(interiorPadding);
+
+  // Zip64 extension for 64-bit offsets/lengths.
+  // The -1 values above tell the extractor to use the values in this field
+  // instead. For simplicity we always use these regardless of whether we
+  // need to or not - we aren't optimizing for size when in this mode.
+  ZIP64LocalExtraField sizesExtra;
+  sizesExtra.header.id = 0x0001u;
+  sizesExtra.header.size =
+      static_cast<uint16_t>(sizeof(sizesExtra) - sizeof(ZIPExtraFieldHeader));
+  sizesExtra.compressedSize = fileLength;
+  sizesExtra.uncompressedSize = fileLength;
+  os.write(reinterpret_cast<char *>(&sizesExtra), sizeof(sizesExtra));
+
+  ZIPFileRef fileRef;
+  fileRef.fileName = std::move(fileName);
+  fileRef.headerOffset = headerOffset;
+  fileRef.totalLength = fileLength;
+  fileRef.crc32 = crc32;
+  return fileRef;
+}
+
+// Computes an Adler32 CRC and sends the data into the void.
+class null_crc32_ostream : public llvm::raw_ostream {
+ public:
+  explicit null_crc32_ostream(uint32_t &crc32) : crc32(crc32) {
+    SetUnbuffered();
+  }
+
+ private:
+  void write_impl(const char *Ptr, size_t Size) override {
+    crc32 = llvm::crc32(
+        crc32, ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(Ptr), Size));
+    pos += Size;
+  }
+  uint64_t current_pos() const override { return pos; }
+  uint32_t &crc32;
+  uint64_t pos = 0;
+};
+
+// appendZIPFile implementation used when |os| is a stream without random
+// access (like stdout). This requires us to serialize the file twice in order
+// to compute the total length and CRC32.
+static ZIPFileRef appendZIPFileToStream(
+    std::string fileName, uint64_t filePadding, uint64_t fileLength,
+    std::function<LogicalResult(llvm::raw_ostream &os)> write,
+    llvm::raw_ostream &os) {
+  // Compute the Adler32 CRC as required in the local file header (and later the
+  // central directory). Since we only have an unseekable raw_ostream we can't
+  // go patch the header after we stream out the file and instead have to stream
+  // it twice - first here to compute the CRC, we write the header, and second
+  // for real following the header.
+  //
+  // I've tried streaming zips (general purpose flag bit 3 set and
+  // ZIP64DataDescriptor suffixes on files) but several tools don't handle
+  // simultaneous use of zip64 and streaming and the trailing descriptor makes
+  // laying out files more complex. Ideally our write functions are fairly
+  // efficient and polyglot files are debug mode so the double serialization
+  // isn't too bad. Probably. Piping out multi-GB files is pretty silly, anyway.
+  uint32_t crc32 = 0;
+  null_crc32_ostream crcStream(crc32);
+  if (failed(write(crcStream))) {
+    return {};
+  }
+
+  // Write the ZIP header and padding up to the start of the file.
+  auto fileRef = appendZIPLocalFileHeader(std::move(fileName), filePadding,
+                                          fileLength, crc32, os);
+
+  // Stream out the file contents to the output stream.
+  uint64_t start = os.tell();
+  if (failed(write(os))) {
+    return {};
+  }
+  fileRef.totalLength = os.tell() - start;
+  assert(fileRef.totalLength == fileLength && "declared length mismatch");
+
+  return fileRef;
+}
+
+// Computes an Adler32 CRC and passes the data along to an underlying ostream.
+class crc32_ostream : public llvm::raw_ostream {
+ public:
+  explicit crc32_ostream(llvm::raw_ostream &impl, uint32_t &crc32)
+      : impl(impl), crc32(crc32) {
+    SetUnbuffered();
+  }
+
+ private:
+  void write_impl(const char *Ptr, size_t Size) override {
+    crc32 = llvm::crc32(
+        crc32, ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(Ptr), Size));
+    impl.write(Ptr, Size);
+  }
+  uint64_t current_pos() const override { return impl.tell(); }
+  llvm::raw_ostream &impl;
+  uint32_t &crc32;
+};
+
+// appendZIPFile implementation used when |os| is a file with random access.
+// This allows us to write the header and backpatch the CRC computed while while
+// serializing the file contents.
+static ZIPFileRef appendZIPFileToFD(
+    std::string fileName, uint64_t filePadding, uint64_t fileLength,
+    std::function<LogicalResult(llvm::raw_ostream &os)> write,
+    llvm::raw_fd_ostream &os) {
+  // Write the ZIP header and padding up to the start of the file.
+  // We write a dummy CRC we'll patch up after we compute it while serializing
+  // the file contents.
+  auto fileRef = appendZIPLocalFileHeader(std::move(fileName), filePadding,
+                                          fileLength, /*crc32=*/0, os);
+
+  // Stream out the file contents to the output stream.
+  uint64_t start = os.tell();
+  {
+    crc32_ostream crcStream(os, fileRef.crc32);
+    if (failed(write(crcStream))) {
+      return {};
+    }
+    crcStream.flush();
+  }
+  fileRef.totalLength = os.tell() - start;
+  assert(fileRef.totalLength == fileLength && "declared length mismatch");
+
+  // Patch the CRC back into the header.
+  uint64_t end = os.tell();
+  os.seek(fileRef.headerOffset + offsetof(ZIPLocalFileHeader, crc32));
+  os.write(reinterpret_cast<char *>(&fileRef.crc32), sizeof(fileRef.crc32));
+  os.seek(end);
+
+  return fileRef;
+}
+
+// Appends a file wrapped in a ZIP header and data descriptor.
+// |write| is used to stream the file contents to |os| while also capturing its
+// CRC as required for the central directory.
+static ZIPFileRef appendZIPFile(
+    std::string fileName, uint64_t filePadding, uint64_t fileLength,
+    std::function<LogicalResult(llvm::raw_ostream &os)> write,
+    llvm::raw_ostream &os) {
+  if (os.get_kind() == llvm::raw_ostream::OStreamKind::OK_FDStream) {
+    auto &osFD = static_cast<llvm::raw_fd_ostream &>(os);
+    if (osFD.supportsSeeking()) {
+      // Output stream is backed by a file descriptor and supports
+      // random-access; this allows us to write out the file contents much more
+      // efficiently.
+      return appendZIPFileToFD(std::move(fileName), filePadding, fileLength,
+                               std::move(write), osFD);
+    }
+  }
+  // Output stream does not support seeking and needs to perform extra work to
+  // get the CRC required for the ZIP header.
+  return appendZIPFileToStream(std::move(fileName), filePadding, fileLength,
+                               std::move(write), os);
+}
+
+// Appends a ZIP central directory to |os| with the references to all of
+// |zipFileRefs|. Must follow all of the local file headers.
+static void appendZIPCentralDirectory(ArrayRef<ZIPFileRef> fileRefs,
+                                      llvm::raw_ostream &os) {
+  // Append the central directory, which contains the local file headers with
+  // some extra junk and references back to where the local headers are in the
+  // file.
+  uint64_t centralDirectoryStartOffset = os.tell();
+  for (auto fileRef : fileRefs) {
+    // Fixed-size header.
+    ZIPCentralDirectoryRecord cdr;
+    cdr.signature = 0x02014B50u;
+    cdr.versionMadeBy = 0x031E;
+    cdr.versionToExtract = 0x2Du;  // 4.5 (for zip64)
+    cdr.generalPurposeFlags = 0;
+    cdr.compressionMethod = 0;  // COMP_STORED
+    // https://docs.microsoft.com/en-us/windows/win32/api/oleauto/nf-oleauto-dosdatetimetovarianttime
+    cdr.lastModifiedTime = 0u;
+    cdr.lastModifiedDate = 0x21;  // 1980-01-01
+    cdr.crc32 = fileRef.crc32;
+    cdr.compressedSize = 0xFFFFFFFFu;    // in extra field
+    cdr.uncompressedSize = 0xFFFFFFFFu;  // in extra field
+    cdr.fileNameLength = static_cast<uint16_t>(fileRef.fileName.size());
+    cdr.extraFieldLength =
+        static_cast<uint16_t>(sizeof(ZIP64CentralExtraField));
+    cdr.fileCommentLength = 0;
+    cdr.diskStartNumber = 0;
+    cdr.internalFileAttributes = 0;
+    cdr.externalFileAttributes = 0;
+    cdr.localHeaderOffset = 0xFFFFFFFFu;
+    os.write(reinterpret_cast<const char *>(&cdr), sizeof(cdr));
+    os.write(fileRef.fileName.data(), fileRef.fileName.size());
+
+    // Zip64 extension for 64-bit offsets/lengths.
+    // The -1 values above tell the extractor to use the values in this field
+    // instead. For simplicity we always use these regardless of whether we
+    // need to or not - we aren't optimizing for size when in this mode.
+    ZIP64CentralExtraField zip64Extra;
+    zip64Extra.header.id = 0x0001u;
+    zip64Extra.header.size =
+        static_cast<uint16_t>(sizeof(zip64Extra) - sizeof(ZIPExtraFieldHeader));
+    zip64Extra.localHeaderOffset = fileRef.headerOffset;
+    zip64Extra.compressedSize = fileRef.totalLength;
+    zip64Extra.uncompressedSize = fileRef.totalLength;
+    os.write(reinterpret_cast<const char *>(&zip64Extra), sizeof(zip64Extra));
+  }
+  uint64_t centralDirectoryEndOffset = os.tell();
+
+  // Append the central directory record.
+  ZIPEndOfCentralDirectoryRecord64 endOfCDR64;
+  endOfCDR64.signature = 0x06064B50u;
+  endOfCDR64.sizeOfEOCD64Minus12 = sizeof(endOfCDR64) - 12;
+  endOfCDR64.versionMadeBy = 0x002Du;
+  endOfCDR64.versionRequired = 0x002Du;  // 4.5 (for zip64)
+  endOfCDR64.diskNumber = 0;
+  endOfCDR64.startDiskNumber = 0;
+  endOfCDR64.entriesOnDisk = static_cast<uint64_t>(fileRefs.size());
+  endOfCDR64.entryCount = static_cast<uint64_t>(fileRefs.size());
+  endOfCDR64.directorySize = static_cast<uint64_t>(centralDirectoryEndOffset -
+                                                   centralDirectoryStartOffset);
+  endOfCDR64.directoryOffset =
+      static_cast<uint64_t>(centralDirectoryStartOffset);
+  os.write(reinterpret_cast<const char *>(&endOfCDR64), sizeof(endOfCDR64));
+
+  // End of central directory locator; must be at the end of the file.
+  ZIPEndOfCentralDirectoryLocator64 locator;
+  locator.signature = 0x07064B50u;
+  locator.recordDiskNumber = 0;
+  locator.recordOffset = centralDirectoryEndOffset;
+  locator.diskCount = 1;
+  os.write(reinterpret_cast<const char *>(&locator), sizeof(locator));
+
+  // Append the final ZIP file footer.
+  // NOTE: this must come at the very end of the file. Even though we have the
+  // EOCD64 record above this is still required for extractors to recognize the
+  // file as a zip file. The offset of -1 will cause incompatible extractors
+  // (like on MS-DOS I guess?) to fail and compatible ones to look for the
+  // locator.
+  ZIPEndOfCentralDirectoryRecord endOfCDR;
+  endOfCDR.signature = 0x06054B50u;
+  endOfCDR.diskNumber = 0;
+  endOfCDR.startDiskNumber = 0;
+  endOfCDR.entriesOnDisk = static_cast<uint16_t>(endOfCDR64.entriesOnDisk);
+  endOfCDR.entryCount = static_cast<uint16_t>(endOfCDR64.entryCount);
+  endOfCDR.directorySize = static_cast<uint32_t>(endOfCDR64.directorySize);
+  endOfCDR.directoryOffset = 0xFFFFFFFF;
+  endOfCDR.commentLength = 0;
+  os.write(reinterpret_cast<const char *>(&endOfCDR), sizeof(endOfCDR));
+}
+
+ZIPArchiveWriter::ZIPArchiveWriter(Location loc, llvm::raw_ostream &os)
+    : loc(loc), os(os) {}
+
+ZIPArchiveWriter::~ZIPArchiveWriter() { os.flush(); }
+
+// Files are serialized with a ZIP local file header followed by the file bytes.
+// The critical alignment applies only to the file bytes and the header
+// alignment doesn't matter (zip only requires byte alignment). The file
+// contents must immediately follow the header.
+//
+//   [padding] [header] [file] [data descriptor]
+//                      ^-- aligned
+//
+// Note that the offset we record is for the header.
+ArchiveWriter::File ZIPArchiveWriter::declareFile(
+    std::string fileName, uint64_t fileAlignment, uint64_t fileLength,
+    std::function<LogicalResult(llvm::raw_ostream &os)> write) {
+  // Align the file offset; the header will be prepended.
+  uint64_t headerOffset = tailFileOffset;
+  uint64_t headerLength = computeMinHeaderLength(fileName);
+  uint64_t fileOffset =
+      IREE::Util::align(headerOffset + headerLength, fileAlignment);
+  tailFileOffset = fileOffset + fileLength;
+
+  File file;
+  file.fileName = std::move(fileName);
+  file.relativeOffset = headerOffset;
+  file.prefixLength = fileOffset - headerOffset;
+  file.fileLength = fileLength;
+  file.write = std::move(write);
+  files.push_back(file);
+  return file;
+}
+
+LogicalResult ZIPArchiveWriter::flush(FlatbufferBuilder &fbb) {
+  SmallVector<ZIPFileRef> fileRefs;
+  fileRefs.reserve(files.size() + 1);
+
+  // Compute padding of the header to ensure the module FlatBuffer ends up with
+  // the proper alignment.
+  auto moduleName = "module.fb";
+  uint64_t startOffset = os.tell();
+  uint64_t moduleHeaderLength = computeMinHeaderLength(moduleName);
+  uint64_t modulePadding = IREE::Util::align(startOffset + moduleHeaderLength,
+                                             kArchiveSegmentAlignment);
+
+  // Serialize the module FlatBuffer to a binary blob in memory.
+  // Ideally we'd stream out using fbb.copyToStream but we have no way of
+  // computing the size without serializing and we need that for the ZIP header.
+  std::string moduleData;
+  {
+    llvm::raw_string_ostream moduleStream(moduleData);
+    if (failed(fbb.copyToStream(moduleStream))) {
+      return mlir::emitError(loc)
+             << "failed to serialize FlatBuffer emitter "
+                "contents to memory - possibly out of memory";
+    }
+    moduleStream.flush();
+  }
+
+  // Pad out the module data so we can easily compute the relative offsets.
+  auto paddedModuleLength = static_cast<flatbuffers_uoffset_t>(
+      IREE::Util::align(sizeof(flatbuffers_uoffset_t) + moduleData.size(),
+                        kArchiveSegmentAlignment) -
+      sizeof(flatbuffers_uoffset_t));
+
+  // Stream out the FlatBuffer contents.
+  fileRefs.push_back(appendZIPFile(
+      moduleName, modulePadding, paddedModuleLength,
+      [&](llvm::raw_ostream &os) -> LogicalResult {
+        os.write(reinterpret_cast<char *>(&paddedModuleLength),
+                 sizeof(flatbuffers_uoffset_t));
+        os.write(moduleData.data() + sizeof(flatbuffers_uoffset_t),
+                 moduleData.size() - sizeof(flatbuffers_uoffset_t));
+        os.write_zeros(paddedModuleLength - moduleData.size());
+        return success();
+      },
+      os));
+
+  // Pad out to the start of the external rodata segment.
+  // This ensures we begin writing at an aligned offset; all relative offsets
+  // in the embedded files assume this.
+  uint64_t baseOffset = os.tell();
+  uint64_t basePadding =
+      IREE::Util::align(baseOffset, kArchiveSegmentAlignment) - baseOffset;
+  os.write_zeros(basePadding);
+  baseOffset = os.tell();
+
+  // Flush all declared files.
+  for (auto &file : files) {
+    // Compute padding bytes required to align the file contents.
+    unsigned filePadding = static_cast<unsigned>(
+        baseOffset + file.relativeOffset + file.prefixLength - os.tell());
+
+    // Write file header and payload.
+    fileRefs.push_back(appendZIPFile(
+        file.fileName, filePadding, file.fileLength,
+        [this, file](llvm::raw_ostream &os) -> LogicalResult {
+          if (failed(file.write(os))) {
+            return mlir::emitError(loc)
+                   << "failed to write embedded file to the output stream - "
+                      "possibly out of memory or storage (file size: "
+                   << file.fileLength << ")";
+          }
+          return success();
+        },
+        os));
+  }
+
+  // Append the central directory containing an index of all the files.
+  appendZIPCentralDirectory(fileRefs, os);
+
+  os.flush();
+  return success();
+}
+
+}  // namespace VM
+}  // namespace IREE
+}  // namespace iree_compiler
+}  // namespace mlir

diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/ArchiveWriter.h b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/ArchiveWriter.h
new file mode 100644
index 0000000..b445ae9
--- /dev/null
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/ArchiveWriter.h

@@ -0,0 +1,149 @@
+// Copyright 2022 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_COMPILER_DIALECT_VM_TARGET_BYTECODE_ARCHIVE_WRITER_H_
+#define IREE_COMPILER_DIALECT_VM_TARGET_BYTECODE_ARCHIVE_WRITER_H_
+
+#include <string>
+
+#include "iree/compiler/Utils/FlatbufferUtils.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "mlir/IR/Location.h"
+
+namespace mlir {
+namespace iree_compiler {
+namespace IREE {
+namespace VM {
+
+// Interface for stateful bytecode module archive serialization.
+//
+// Intended usage:
+//  - all embedded files are declared
+//  - FlatBuffer is generated using the relative offsets of declared files
+//  - FlatBuffer is written
+//  - embedded files are flushed
+class ArchiveWriter {
+ public:
+  struct File {
+    // Name of the file when exposed to users; informational only.
+    std::string fileName;
+    // Offset of the metadata/file from the end of the archive header file.
+    uint64_t relativeOffset = 0;
+    // Size of any optional metadata before the file begins, including padding.
+    uint64_t prefixLength = 0;
+    // Total size in bytes of the file on disk.
+    uint64_t fileLength = 0;
+    // Serializes the file contents to the output stream.
+    std::function<LogicalResult(llvm::raw_ostream &os)> write;
+  };
+
+  virtual ~ArchiveWriter() = default;
+
+  virtual bool supportsFiles() = 0;
+
+  // Declares an embedded file in the archive and reserves a location for it.
+  // The relative offset returned will be stable despite the variable-length
+  // FlatBuffer header as it is relative to the header and not the archive 0.
+  virtual File declareFile(
+      std::string fileName, uint64_t fileAlignment, uint64_t fileLength,
+      std::function<LogicalResult(llvm::raw_ostream &os)> write) = 0;
+
+  // Writes an in-memory FlatBuffer to the archive as the header and flushes
+  // all archive contents.
+  virtual LogicalResult flush(FlatbufferBuilder &fbb) = 0;
+};
+
+// Textual JSON file archive containing only the FlatBuffer in textual form.
+// Declared files are ignored and only the FlatBuffer is emitted.
+//
+// Archive structure:
+//   {json text}
+class JSONArchiveWriter : public ArchiveWriter {
+ public:
+  explicit JSONArchiveWriter(Location loc, llvm::raw_ostream &os);
+  ~JSONArchiveWriter() override;
+  bool supportsFiles() override { return false; }
+  File declareFile(
+      std::string fileName, uint64_t fileAlignment, uint64_t fileLength,
+      std::function<LogicalResult(llvm::raw_ostream &os)> write) override;
+  LogicalResult flush(FlatbufferBuilder &fbb) override;
+
+ private:
+  Location loc;
+  llvm::raw_ostream &os;
+};
+
+// Flat file archive containing the FlatBuffer and trailing embedded files.
+// No additional metadata beyond that in the FlatBuffer is emitted.
+//
+// Archive structure:
+//   [4b flatbuffers_uoffset_t defining module FlatBuffer length]
+//   [module FlatBuffer contents]
+//   [zero padding to 64b alignment]
+//   <<rodata base offset>>
+//   [declared file 0]
+//   [zero padding to 64b alignment]
+//   [declared file 1]
+//   ...
+class FlatArchiveWriter : public ArchiveWriter {
+ public:
+  explicit FlatArchiveWriter(Location loc, llvm::raw_ostream &os);
+  ~FlatArchiveWriter() override;
+  bool supportsFiles() override { return true; }
+  File declareFile(
+      std::string fileName, uint64_t fileAlignment, uint64_t fileLength,
+      std::function<LogicalResult(llvm::raw_ostream &os)> write) override;
+  LogicalResult flush(FlatbufferBuilder &fbb) override;
+
+ private:
+  Location loc;
+  llvm::raw_ostream &os;
+  uint64_t tailFileOffset = 0;  // unpadded
+  SmallVector<File> files;
+};
+
+// Archive file containing .zip-compatible metadata.
+// Allows the archive to be viewed/extracted using widely available tools.
+// This does add a small amount of overhead to the file (N-NN KB depending on
+// alignment requirements) and is mostly useful for debugging. Nothing in the
+// runtime requires this information.
+//
+// Archive structure:
+//  - [zip local file header for module]
+//    [4b flatbuffers_uoffset_t defining module FlatBuffer length]
+//    [module FlatBuffer contents]
+//    [zero padding to 64b alignment]
+//    <<rodata base offset>>
+//  - [zip local file header for file 0]
+//    [declared file 0 contents, aligned]
+//  - [zip local file header for file 1]
+//    ...
+//  - [zip central directory]
+//    [zip locators]
+class ZIPArchiveWriter : public ArchiveWriter {
+ public:
+  explicit ZIPArchiveWriter(Location loc, llvm::raw_ostream &os);
+  ~ZIPArchiveWriter() override;
+  bool supportsFiles() override { return true; }
+  File declareFile(
+      std::string fileName, uint64_t fileAlignment, uint64_t fileLength,
+      std::function<LogicalResult(llvm::raw_ostream &os)> write) override;
+  LogicalResult flush(FlatbufferBuilder &fbb) override;
+
+ private:
+  Location loc;
+  llvm::raw_ostream &os;
+  uint64_t tailFileOffset = 0;  // unpadded
+  SmallVector<File> files;
+};
+
+}  // namespace VM
+}  // namespace IREE
+}  // namespace iree_compiler
+}  // namespace mlir
+
+#endif  // IREE_COMPILER_DIALECT_VM_TARGET_BYTECODE_ARCHIVE_WRITER_H_

diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BUILD b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BUILD
index 1cfa7f7..514a67b 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BUILD
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BUILD

@@ -15,6 +15,8 @@
 iree_compiler_cc_library(
     name = "Bytecode",
     srcs = [
+        "ArchiveWriter.cpp",
+        "ArchiveWriter.h",
         "BytecodeEncoder.cpp",
         "BytecodeEncoder.h",
         "BytecodeModuleTarget.cpp",

diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp
index 8d4ac83..1c2bee9 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp

@@ -16,13 +16,13 @@
 #include "iree/compiler/Dialect/VM/Analysis/ValueLiveness.h"
 #include "iree/compiler/Dialect/VM/IR/VMDialect.h"
 #include "iree/compiler/Dialect/VM/IR/VMOps.h"
+#include "iree/compiler/Dialect/VM/Target/Bytecode/ArchiveWriter.h"
 #include "iree/compiler/Dialect/VM/Target/Bytecode/BytecodeEncoder.h"
 #include "iree/compiler/Dialect/VM/Transforms/Passes.h"
 #include "iree/compiler/Dialect/VM/Utils/CallingConvention.h"
 #include "iree/compiler/Utils/FlatbufferUtils.h"
 #include "iree/compiler/Utils/TracingUtils.h"
 #include "iree/schemas/bytecode_module_def_builder.h"
-#include "iree/schemas/bytecode_module_def_json_printer.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/CRC.h"
 #include "llvm/Support/Endian.h"
@@ -48,179 +48,35 @@
 
 using namespace llvm::support;
 
+// All constants are defaulted to 16-byte aligned as that is the maximum
+// (reasonable) alignment of all data types on all platforms. This can be
+// overridden by creators of the rodata with the `alignment` attribute.
+static constexpr int kDefaultRodataAlignment = 16;
+
+// Anything over a few KB should be split out of the FlatBuffer.
+// This limit is rather arbitrary - we could support hundreds of MB of embedded
+// data at the risk of tripping the 31-bit FlatBuffer offset values.
+static constexpr int kMaxEmbeddedDataSize = 4 * 1024;
+
 struct TypeDef {
   Type type;
   std::string full_name;
 };
 
-struct SerializedConstantRef {
-  flatbuffers_uint8_vec_ref_t ref = 0;
-  int64_t totalSize = 0;
-  uint32_t crc32 = 0;
+// A rodata reference.
+// The archive file is empty if the data is to be embedded in the FlatBuffer.
+struct RodataRef {
+  // Source op.
+  IREE::VM::RodataOp rodataOp;
+  // Required alignment computed from the rodata or defaults.
+  uint64_t alignment = kDefaultRodataAlignment;
+  // Total size of the serialized data in bytes.
+  uint64_t totalSize = 0;
+  // Optional reference to the rodata in the file.
+  Optional<ArchiveWriter::File> archiveFile;
 };
 
-// Serializes a constant attribute to the FlatBuffer as a binary blob.
-// Returns the size in bytes of the serialized value and the flatbuffers offset
-// to the uint8 vec containing the data. If |calculateCRC32| is provided then a
-// CRC32 of the data will be computed and returned as well.
-SerializedConstantRef serializeConstant(Location loc, Attribute valueAttr,
-                                        size_t alignment, bool calculateCRC32,
-                                        FlatbufferBuilder &fbb) {
-  flatcc_builder_start_vector(fbb, 1, alignment, FLATBUFFERS_COUNT_MAX(1));
-
-  auto value = valueAttr.dyn_cast<IREE::Util::SerializableAttrInterface>();
-  assert(value && "expected a serializable rodata value");
-
-  // TODO(benvanik): use fbb.streamUint8Vec + value.serializeToStream.
-  // Right now this will allocate a single slab of the entire storage size and
-  // write the contents into it. streamUint8Vec also does the same thing but
-  // we could extend it with custom fbb storage such that we could reserve the
-  // size in the file and then fix it up after we write it. The complication is
-  // that we need the CRC below and thus have to have the bytes in memory at
-  // some point. An interface member for computeCRC() could be useful as even
-  // though slow it would avoid the need to malloc everything. We could also
-  // switch implementations based on calculateCRC32 - models with GB of params
-  // are probably fine not to have nice hackability :)
-  uint64_t actualSize = value.getStorageSize();
-  if (actualSize > SIZE_MAX) {
-    mlir::emitError(loc) << "constant size " << actualSize
-                         << " exceeds native size_t; unable to serialize";
-    return {};
-  }
-  size_t size = static_cast<size_t>(value.getStorageSize());
-  uint8_t *bytePtr = flatbuffers_uint8_vec_extend(fbb, size);
-  if (failed(value.serializeToBuffer(llvm::support::endianness::little,
-                                     ArrayRef<char>((char *)bytePtr, size)))) {
-    return {};
-  }
-
-  uint8_t *dataPtr =
-      reinterpret_cast<uint8_t *>(flatcc_builder_vector_edit(fbb));
-  size_t totalSize = flatcc_builder_vector_count(fbb);
-  uint32_t crc32Value = 0;
-  if (calculateCRC32) {
-    crc32Value = llvm::crc32(0u, ArrayRef<uint8_t>(dataPtr, totalSize));
-  }
-  return SerializedConstantRef{
-      flatbuffers_uint8_vec_end(fbb),
-      static_cast<int64_t>(totalSize),
-      crc32Value,
-  };
-}
-
-LLVM_PACKED_START
-struct ZIPEndOfCentralDirectoryRecord {
-  ulittle32_t signature;  // 0x06054B50
-  ulittle16_t diskNumber;
-  ulittle16_t startDiskNumber;
-  ulittle16_t entriesOnDisk;
-  ulittle16_t entryCount;
-  ulittle32_t directorySize;
-  ulittle32_t directoryOffset;
-  ulittle16_t commentLength;
-  // comment (variable size)
-};
-static_assert(sizeof(ZIPEndOfCentralDirectoryRecord) == 22, "bad packing");
-struct ZIPEndOfCentralDirectoryRecord64 {
-  ulittle32_t signature;  // 0x06064B50
-  ulittle64_t sizeOfEOCD64Minus12;
-  ulittle16_t versionMadeBy;
-  ulittle16_t versionRequired;
-  ulittle32_t diskNumber;
-  ulittle32_t startDiskNumber;
-  ulittle64_t entriesOnDisk;
-  ulittle64_t entryCount;
-  ulittle64_t directorySize;
-  ulittle64_t directoryOffset;
-  // comment (variable size up to EOCD64)
-};
-static_assert(sizeof(ZIPEndOfCentralDirectoryRecord64) == 56, "bad packing");
-struct ZIPEndOfCentralDirectoryLocator64 {
-  ulittle32_t signature;  // 0x07064B50
-  ulittle32_t recordDiskNumber;
-  ulittle64_t recordOffset;
-  ulittle32_t diskCount;
-};
-static_assert(sizeof(ZIPEndOfCentralDirectoryLocator64) == 20, "bad packing");
-struct ZIPCentralDirectoryRecord {
-  ulittle32_t signature;  // 0x02014B50
-  ulittle16_t versionMadeBy;
-  ulittle16_t versionToExtract;
-  ulittle16_t generalPurposeFlags;
-  ulittle16_t compressionMethod;
-  ulittle16_t lastModifiedTime;
-  ulittle16_t lastModifiedDate;
-  ulittle32_t crc32;
-  ulittle32_t compressedSize;
-  ulittle32_t uncompressedSize;
-  ulittle16_t fileNameLength;
-  ulittle16_t extraFieldLength;
-  ulittle16_t fileCommentLength;
-  ulittle16_t diskStartNumber;
-  ulittle16_t internalFileAttributes;
-  ulittle32_t externalFileAttributes;
-  ulittle32_t localHeaderOffset;
-  // file name (variable size)
-  // extra field (variable size)
-  // file comment (variable size)
-};
-static_assert(sizeof(ZIPCentralDirectoryRecord) == 46, "bad packing");
-struct ZIPLocalFileHeader {
-  ulittle32_t signature;  // 0x04034B50
-  ulittle16_t versionToExtract;
-  ulittle16_t generalPurposeFlag;
-  ulittle16_t compressionMethod;
-  ulittle16_t lastModifiedTime;
-  ulittle16_t lastModifiedDate;
-  ulittle32_t crc32;
-  ulittle32_t compressedSize;
-  ulittle32_t uncompressedSize;
-  ulittle16_t fileNameLength;
-  ulittle16_t extraFieldLength;
-  // file name (variable size)
-  // extra field (variable size)
-};
-static_assert(sizeof(ZIPLocalFileHeader) == 30, "bad packing");
-struct ZIPExtraFieldHeader {
-  ulittle16_t id;
-  ulittle16_t size;
-};
-static_assert(sizeof(ZIPExtraFieldHeader) == 4, "bad packing");
-struct ZIP64LocalExtraField {
-  ZIPExtraFieldHeader header;
-  ulittle64_t uncompressedSize;
-  ulittle64_t compressedSize;
-};
-static_assert(sizeof(ZIP64LocalExtraField) == 20, "bad packing");
-struct ZIP64CentralExtraField {
-  ZIPExtraFieldHeader header;
-  ulittle64_t uncompressedSize;
-  ulittle64_t compressedSize;
-  ulittle64_t localHeaderOffset;
-};
-static_assert(sizeof(ZIP64CentralExtraField) == 28, "bad packing");
-LLVM_PACKED_END
-
-// A ZIP file reference into the flatbuffer output data.
-struct ZIPFileRef {
-  // Offset of the local file header in the flatbuffer. Relative to the end of
-  // the file.
-  flatcc_builder_ref_t localHeaderOffset;
-  // Name of the file used within the ZIP archive.
-  std::string fileName;
-  // Total size, in bytes, of the file uncompressed.
-  uint64_t totalSize;
-  // CRC32 of the file.
-  uint32_t crc32;
-  // Extra field padding (total).
-  uint16_t paddingLength;
-};
-
-// TODO(benvanik): figure out why we need to offset all flatbuffer refs by this
-// value in order to get proper absolute file offsets. The current value used
-// here was derived empirically and is like a combination of the flatbuffer
-// file prefix and some alignment.
-static constexpr int kZIPMagicLocalOffset = 90;
+}  // namespace
 
 // Gets a file extension based on the given |mimeType| that can be used to help
 // applications guess the file type of embedded data.
@@ -238,178 +94,38 @@
       .Default(".bin");
 }
 
-// Appends a ZIP local file header at the current location.
-// The header is a prefix to the actual rodata contents. ZIP requires that the
-// payload start immediately after the header but we have the flatbuffer header
-// there. To skip over the flatbuffer data we pad out the header with a dummy
-// extra data field that lets us control the length.
-//
-//  [zip local file header] + 4 byte suffix length
-//  [flatbuffer vector header] (4 bytes)
-//  [payload]
-static ZIPFileRef appendZIPLocalFileHeader(IREE::VM::RodataOp rodataOp,
-                                           size_t rodataSize, uint32_t crc32,
-                                           FlatbufferBuilder &fbb) {
-  // Use the mime type to map to a file extension.
-  std::string fileName =
-      (rodataOp.getName() +
-       mimeTypeToFileExtension(rodataOp.mime_type().getValueOr("")))
-          .str();
+// Serializes a constant attribute to the FlatBuffer as a binary blob.
+// Returns the size in bytes of the serialized value and the FlatBuffers offset
+// to the uint8 vec containing the data.
+static flatbuffers_uint8_vec_ref_t serializeEmbeddedData(
+    Location loc, Attribute valueAttr, uint64_t alignment, uint64_t totalSize,
+    FlatbufferBuilder &fbb) {
+  flatcc_builder_start_vector(fbb, 1, alignment, FLATBUFFERS_COUNT_MAX(1));
 
-  // The data is stored in the flatbuffer prefixed with a vector header of
-  // a 32-bit byte count. We need to ignore this when computing the CRC as we
-  // want only the payload to be visible in the ZIP.
-  size_t vectorPrefixLength = sizeof(uint32_t);
-
-  // header + file name + extra field header
-  size_t totalHeaderLength = sizeof(ZIPLocalFileHeader) + fileName.size() +
-                             sizeof(ZIP64LocalExtraField) +
-                             sizeof(ZIPExtraFieldHeader);
-
-  // Append local file header.
-  auto *header = reinterpret_cast<ZIPLocalFileHeader *>(
-      flatcc_builder_start_struct(fbb, totalHeaderLength, 1));
-  header->signature = 0x04034B50u;
-  header->versionToExtract = 0x2Du;  // 4.5 (for zip64)
-  header->generalPurposeFlag = 0;
-  header->compressionMethod = 0;  // COMP_STORED
-  header->lastModifiedTime = 0;
-  header->lastModifiedDate = 0;
-  header->crc32 = crc32;
-  header->compressedSize = 0xFFFFFFFFu;
-  header->uncompressedSize = 0xFFFFFFFFu;
-  header->fileNameLength = static_cast<uint16_t>(fileName.size());
-  header->extraFieldLength = sizeof(ZIP64LocalExtraField) +
-                             sizeof(ZIPExtraFieldHeader) + vectorPrefixLength;
-  char *fileNamePtr = reinterpret_cast<char *>(header + 1);
-  memcpy(fileNamePtr, fileName.data(), fileName.size());
-
-  auto *zip64Extra =
-      reinterpret_cast<ZIP64LocalExtraField *>(fileNamePtr + fileName.size());
-  zip64Extra->header.id = 0x0001u;
-  zip64Extra->header.size =
-      static_cast<uint16_t>(sizeof(*zip64Extra) - sizeof(ZIPExtraFieldHeader));
-  zip64Extra->compressedSize = static_cast<uint64_t>(rodataSize);
-  zip64Extra->uncompressedSize = static_cast<uint64_t>(rodataSize);
-
-  auto *paddingExtra = reinterpret_cast<ZIPExtraFieldHeader *>(
-      (uint8_t *)zip64Extra + sizeof(*zip64Extra));
-  paddingExtra->id = 0xFECAu;
-  paddingExtra->size = static_cast<uint16_t>(vectorPrefixLength);
-
-  flatcc_builder_ref_t relativeHeaderOffset = flatcc_builder_end_struct(fbb);
-
-  ZIPFileRef fileRef;
-  fileRef.localHeaderOffset = relativeHeaderOffset;
-  fileRef.fileName = std::move(fileName);
-  fileRef.totalSize = static_cast<uint64_t>(rodataSize);
-  fileRef.crc32 = crc32;
-  fileRef.paddingLength = static_cast<uint16_t>(vectorPrefixLength);
-  return fileRef;
-}
-
-// Appends a ZIP central directory to |output| with the references to all of
-// |zipFileRefs| with offsets applied. |startOffset| and |endOffset| define the
-// absolute offsets into |output| of the flatbuffer data.
-//
-// The technique used here is the same as that used in self-extracting archives:
-// byte offset 0 of the file will contain the native format header (like the
-// flatbuffers file identifier) and a ZIP application will need to scan from the
-// back of the file to find the ZIP central directory. This often means that
-// naming the file .zip will not work: most ZIP applications will try to find
-// a PK header at byte 0.
-static void appendZIPCentralDirectory(ArrayRef<ZIPFileRef> zipFileRefs,
-                                      uint64_t startOffset, uint64_t endOffset,
-                                      llvm::raw_ostream &output) {
-  // Append the central directory, which contains the local file headers with
-  // some extra junk and references back to where the local headers are in the
-  // file.
-  uint64_t centralDirectoryStartOffset = output.tell();
-  for (auto zipFileRef : zipFileRefs) {
-    // Fixed-size header.
-    ZIPCentralDirectoryRecord cdr;
-    cdr.signature = 0x02014B50u;
-    cdr.versionMadeBy = 0x031E;
-    cdr.versionToExtract = 0x2Du;  // 4.5 (for zip64)
-    cdr.generalPurposeFlags = 0;
-    cdr.compressionMethod = 0;  // COMP_STORED
-    cdr.lastModifiedTime = 0;
-    cdr.lastModifiedDate = 0;
-    cdr.crc32 = zipFileRef.crc32;
-    cdr.compressedSize = 0xFFFFFFFFu;
-    cdr.uncompressedSize = 0xFFFFFFFFu;
-    cdr.fileNameLength = static_cast<uint16_t>(zipFileRef.fileName.size());
-    cdr.extraFieldLength = sizeof(ZIP64CentralExtraField);
-    cdr.fileCommentLength = 0;
-    cdr.diskStartNumber = 0;
-    cdr.internalFileAttributes = 0;
-    cdr.externalFileAttributes = 0;
-    cdr.localHeaderOffset = 0xFFFFFFFFu;
-    output.write(reinterpret_cast<const char *>(&cdr), sizeof(cdr));
-    output.write(zipFileRef.fileName.data(), zipFileRef.fileName.size());
-
-    // Zip64 extension for 64-bit offsets/lengths.
-    // The -1 values above tell the extractor to use the values in this field
-    // instead. For simplicity we always use these regardless of whether we
-    // need to or not - we aren't optimizing for size when in this mode.
-    ZIP64CentralExtraField zip64Extra;
-    zip64Extra.header.id = 0x0001u;
-    zip64Extra.header.size =
-        static_cast<uint16_t>(sizeof(zip64Extra) - sizeof(ZIPExtraFieldHeader));
-    zip64Extra.localHeaderOffset = static_cast<uint64_t>(
-        endOffset + zipFileRef.localHeaderOffset - kZIPMagicLocalOffset + 2);
-    zip64Extra.compressedSize = zipFileRef.totalSize;
-    zip64Extra.uncompressedSize = zipFileRef.totalSize;
-    output.write(reinterpret_cast<const char *>(&zip64Extra),
-                 sizeof(zip64Extra));
+  if (totalSize > SIZE_MAX) {
+    mlir::emitError(loc) << "constant size " << totalSize
+                         << " exceeds native size_t; unable to serialize";
+    return {};
   }
-  uint64_t centralDirectoryEndOffset = output.tell();
 
-  // Append the central directory record.
-  ZIPEndOfCentralDirectoryRecord64 endOfCDR64;
-  endOfCDR64.signature = 0x06064B50u;
-  endOfCDR64.sizeOfEOCD64Minus12 = sizeof(endOfCDR64) - 12;
-  endOfCDR64.versionMadeBy = 0x002Du;
-  endOfCDR64.versionRequired = 0x002Du;  // 4.5 (for zip64)
-  endOfCDR64.diskNumber = 0;
-  endOfCDR64.startDiskNumber = 0;
-  endOfCDR64.entriesOnDisk = static_cast<uint64_t>(zipFileRefs.size());
-  endOfCDR64.entryCount = static_cast<uint64_t>(zipFileRefs.size());
-  endOfCDR64.directorySize = static_cast<uint64_t>(centralDirectoryEndOffset -
-                                                   centralDirectoryStartOffset);
-  endOfCDR64.directoryOffset =
-      static_cast<uint64_t>(centralDirectoryStartOffset);
-  output.write(reinterpret_cast<const char *>(&endOfCDR64), sizeof(endOfCDR64));
+  auto value = valueAttr.dyn_cast<IREE::Util::SerializableAttrInterface>();
+  assert(value && "expected a serializable rodata value");
 
-  // End of central directory locator; must be at the end of the file.
-  ZIPEndOfCentralDirectoryLocator64 locator;
-  locator.signature = 0x07064B50u;
-  locator.recordDiskNumber = 0;
-  locator.recordOffset = centralDirectoryEndOffset;
-  locator.diskCount = 1;
-  output.write(reinterpret_cast<const char *>(&locator), sizeof(locator));
+  // Reserve memory in the FlatBuffer for the data.
+  uint8_t *bytePtr =
+      flatbuffers_uint8_vec_extend(fbb, static_cast<size_t>(totalSize));
 
-  // Append the final ZIP file footer.
-  // NOTE: this must come at the very end of the file. Even though we have the
-  // EOCD64 record above this is still required for extractors to recognize the
-  // file as a zip file. The offset of -1 will cause incompatible extractors
-  // (like on MS-DOS I guess?) to fail and compatible ones to look for the
-  // locator.
-  ZIPEndOfCentralDirectoryRecord endOfCDR;
-  endOfCDR.signature = 0x06054B50u;
-  endOfCDR.diskNumber = 0;
-  endOfCDR.startDiskNumber = 0;
-  endOfCDR.entriesOnDisk = static_cast<uint16_t>(zipFileRefs.size());
-  endOfCDR.entryCount = static_cast<uint16_t>(zipFileRefs.size());
-  endOfCDR.directorySize = static_cast<uint32_t>(centralDirectoryEndOffset -
-                                                 centralDirectoryStartOffset);
-  endOfCDR.directoryOffset = 0xFFFFFFFF;
-  endOfCDR.commentLength = 0;
-  output.write(reinterpret_cast<const char *>(&endOfCDR), sizeof(endOfCDR));
+  // Serialize the constant into the reserved memory.
+  if (failed(value.serializeToBuffer(
+          llvm::support::endianness::little,
+          ArrayRef<char>(reinterpret_cast<char *>(bytePtr),
+                         static_cast<size_t>(totalSize))))) {
+    return {};
+  }
+
+  return flatbuffers_uint8_vec_end(fbb);
 }
 
-}  // namespace
-
 // Finds all types in the module and builds a type table mapping the index in
 // the vector to the type represented by the type ordinal.
 static std::vector<TypeDef> buildTypeTable(IREE::VM::ModuleOp moduleOp) {
@@ -621,15 +337,13 @@
 // has been packed into the top-level table. This results in a messier function
 // here during serialization but a much more trivial (and cache-friendly)
 // representation at runtime.
-static LogicalResult buildFlatBufferModule(BytecodeTargetOptions targetOptions,
-                                           IREE::VM::ModuleOp moduleOp,
-                                           SmallVector<ZIPFileRef> &zipFileRefs,
-                                           bool emitPolyglotZip,
-                                           FlatbufferBuilder &fbb) {
+static LogicalResult buildFlatBufferModule(
+    BytecodeTargetOptions targetOptions, IREE::VM::ModuleOp moduleOp,
+    MutableArrayRef<RodataRef> rodataRefs, FlatbufferBuilder &fbb) {
   // Start the buffer so that we can begin recording data prior to the root
   // table (which we do at the very end). This does not change the layout of the
   // file and is only used to prime the flatcc builder.
-  iree_vm_BytecodeModuleDef_start_as_root(fbb);
+  iree_vm_BytecodeModuleDef_start_as_root_with_size(fbb);
 
   // Debug database is always populated but conditionally written.
   // This allows us to emit the database to a separate file if we want to strip
@@ -647,11 +361,9 @@
   std::vector<IREE::VM::ImportOp> importFuncOps;
   std::vector<IREE::VM::ExportOp> exportFuncOps;
   std::vector<IREE::VM::FuncOp> internalFuncOps;
-  std::vector<IREE::VM::RodataOp> rodataOps;
   importFuncOps.resize(ordinalCounts.import_funcs());
   exportFuncOps.resize(ordinalCounts.export_funcs());
   internalFuncOps.resize(ordinalCounts.internal_funcs());
-  rodataOps.resize(ordinalCounts.rodatas());
 
   for (auto &op : moduleOp.getBlock().getOperations()) {
     if (auto funcOp = dyn_cast<IREE::VM::FuncOp>(op)) {
@@ -660,56 +372,9 @@
       exportFuncOps[exportOp.ordinal().getValue().getLimitedValue()] = exportOp;
     } else if (auto importOp = dyn_cast<IREE::VM::ImportOp>(op)) {
       importFuncOps[importOp.ordinal().getValue().getLimitedValue()] = importOp;
-    } else if (auto rodataOp = dyn_cast<IREE::VM::RodataOp>(op)) {
-      rodataOps[rodataOp.ordinal().getValue().getLimitedValue()] = rodataOp;
     }
   }
 
-  // Serialize read-only data first so that it ends up at the end of the file.
-  // This is where large things like parameters live and we don't want that to
-  // get paged in until it is needed.
-  //
-  // NOTE: flatbuffers are built bottom-up; after each rodata we serialize we
-  // move *backward* in the file and prepend the next, meaning that if we
-  // were to serialize all rodata we'd have it in the opposite order as we do
-  // in the IR. Though this it isn't required for correctness, enabling file
-  // layout planning by preserving the order in the IR is useful.
-  SmallVector<flatbuffers_uint8_vec_ref_t, 8> rodataContentRefs;
-  rodataContentRefs.reserve(rodataOps.size());
-
-  // All constants are defaulted to 16-byte aligned as that is the maximum
-  // (reasonable) alignment of all data types on all platforms. This can be
-  // overridden by creators of the rodata with the `alignment` attribute.
-  static constexpr int kDefaultRodataAlignment = 16;
-
-  for (auto rodataOp : llvm::reverse(rodataOps)) {
-    // Only include rodata entries in the ZIP if they are file-like. This
-    // prevents all of our string tables from getting included.
-    bool includeInZIP = emitPolyglotZip && rodataOp.mime_type().hasValue();
-
-    // Embed the rodata contents.
-    size_t alignment =
-        rodataOp.alignment()
-            ? static_cast<size_t>(rodataOp.alignment().getValue())
-            : 0;
-    if (alignment == 0) alignment = kDefaultRodataAlignment;
-    auto constantRef =
-        serializeConstant(rodataOp.getLoc(), rodataOp.value(), alignment,
-                          /*calculateCRC32=*/includeInZIP, fbb);
-    if (!constantRef.ref) {
-      return rodataOp.emitOpError() << "failed to encode";
-    }
-    rodataContentRefs.push_back(constantRef.ref);
-
-    // Add the ZIP per-file header.
-    if (includeInZIP) {
-      zipFileRefs.push_back(appendZIPLocalFileHeader(
-          rodataOp, constantRef.totalSize, constantRef.crc32, fbb));
-    }
-  }
-  // List of references needs to be swapped forward (we wrote backward).
-  std::reverse(rodataContentRefs.begin(), rodataContentRefs.end());
-
   // Find all types in the module to build the type table.
   // Note that we don't emit it yet as we want to keep it near the top of the
   // file (which, in FlatBuffers, is written last).
@@ -764,15 +429,38 @@
   auto functionDescriptorsRef = iree_vm_FunctionDescriptor_vec_create(
       fbb, functionDescriptors.data(), functionDescriptors.size());
 
-  // Serialize metadata that should be near the front of the file.
-  auto rodataSegmentRefs = llvm::to_vector<8>(
-      llvm::map_range(rodataContentRefs, [&](auto rodataContentRef) {
-        iree_vm_RodataSegmentDef_start(fbb);
-        iree_vm_RodataSegmentDef_embedded_data_add(fbb, rodataContentRef);
-        return iree_vm_RodataSegmentDef_end(fbb);
-      }));
+  // Serialize embedded read-only data and build the rodata references.
+  //
+  // NOTE: FlatBuffers are built bottom-up; after each rodata we serialize we
+  // move *backward* in the file and prepend the next, meaning that if we
+  // were to serialize all rodata we'd have it in the opposite order as we do
+  // in the IR. Though this it isn't required for correctness, enabling file
+  // layout planning by preserving the order in the IR is useful.
+  SmallVector<iree_vm_RodataSegmentDef_ref_t, 8> rodataSegmentRefs;
+  for (auto &rodataRef : llvm::reverse(rodataRefs)) {
+    flatbuffers_uint8_vec_ref_t embedded_ref = 0;
+    if (!rodataRef.archiveFile.hasValue()) {
+      embedded_ref = serializeEmbeddedData(
+          rodataRef.rodataOp.getLoc(), rodataRef.rodataOp.value(),
+          rodataRef.alignment, rodataRef.totalSize, fbb);
+    }
+    iree_vm_RodataSegmentDef_start(fbb);
+    if (rodataRef.archiveFile.hasValue()) {
+      iree_vm_RodataSegmentDef_external_data_offset_add(
+          fbb, rodataRef.archiveFile->relativeOffset +
+                   rodataRef.archiveFile->prefixLength);
+      iree_vm_RodataSegmentDef_external_data_length_add(
+          fbb, rodataRef.archiveFile->fileLength);
+    } else {
+      iree_vm_RodataSegmentDef_embedded_data_add(fbb, embedded_ref);
+    }
+    rodataSegmentRefs.push_back(iree_vm_RodataSegmentDef_end(fbb));
+  }
+  std::reverse(rodataSegmentRefs.begin(), rodataSegmentRefs.end());
+
+  // NOTE: rwdata is currently unused.
   SmallVector<iree_vm_RwdataSegmentDef_ref_t, 8> rwdataSegmentRefs;
-  // NOTE: rwdata current unused.
+
   auto typeRefs =
       llvm::to_vector<8>(llvm::map_range(typeTable, [&](auto typeDef) {
         auto fullNameRef = fbb.createString(typeDef.full_name);
@@ -780,6 +468,7 @@
         iree_vm_TypeDef_full_name_add(fbb, fullNameRef);
         return iree_vm_TypeDef_end(fbb);
       }));
+
   auto importFuncRefs =
       llvm::to_vector<8>(llvm::map_range(importFuncOps, [&](auto importOp) {
         auto fullNameRef = fbb.createString(importOp.getName());
@@ -794,6 +483,7 @@
         iree_vm_ImportFunctionDef_flags_add(fbb, flags);
         return iree_vm_ImportFunctionDef_end(fbb);
       }));
+
   auto exportFuncRefs =
       llvm::to_vector<8>(llvm::map_range(exportFuncOps, [&](auto exportOp) {
         auto localNameRef = fbb.createString(exportOp.export_name());
@@ -861,13 +551,8 @@
 LogicalResult translateModuleToBytecode(IREE::VM::ModuleOp moduleOp,
                                         BytecodeTargetOptions targetOptions,
                                         llvm::raw_ostream &output) {
-  bool emitPolyglotZip =
-      targetOptions.emitPolyglotZip &&
-      targetOptions.outputFormat == BytecodeOutputFormat::kFlatBufferBinary;
   moduleOp.getContext()->getOrLoadDialect<IREE::Util::UtilDialect>();
 
-  uint64_t startOffset = output.tell();
-
   if (failed(canonicalizeModule(targetOptions, moduleOp))) {
     return moduleOp.emitError()
            << "failed to canonicalize vm.module to a serializable form";
@@ -897,6 +582,7 @@
     }
   }
 
+  // Debug-only formats:
   if (targetOptions.outputFormat == BytecodeOutputFormat::kMlirText ||
       targetOptions.outputFormat == BytecodeOutputFormat::kAnnotatedMlirText) {
     // Use the standard MLIR text printer.
@@ -905,52 +591,82 @@
     return success();
   }
 
+  // Set up the output archive builder based on output format.
+  std::unique_ptr<ArchiveWriter> archiveWriter;
+  if (targetOptions.emitPolyglotZip &&
+      targetOptions.outputFormat == BytecodeOutputFormat::kFlatBufferBinary) {
+    archiveWriter =
+        std::make_unique<ZIPArchiveWriter>(moduleOp.getLoc(), output);
+  } else if (targetOptions.outputFormat ==
+             BytecodeOutputFormat::kFlatBufferBinary) {
+    archiveWriter =
+        std::make_unique<FlatArchiveWriter>(moduleOp.getLoc(), output);
+  } else if (targetOptions.outputFormat ==
+             BytecodeOutputFormat::kFlatBufferText) {
+    archiveWriter =
+        std::make_unique<JSONArchiveWriter>(moduleOp.getLoc(), output);
+  } else {
+    assert(false && "unhandled output format combination");
+  }
+
+  // Declare all rodata entries we want to end up as external data first. This
+  // allows us to compute offsets if needed without having had to perform
+  // serialization yet. Note that not all rodata ends up as external data: if
+  // it's small (like strings) we can avoid the extra seeks and keep it more
+  // local by embedding it in the FlatBuffer.
+  std::vector<IREE::VM::RodataOp> rodataOps;
+  rodataOps.resize(moduleOp.ordinal_counts().getValue().rodatas());
+  for (auto rodataOp : moduleOp.getOps<IREE::VM::RodataOp>()) {
+    rodataOps[rodataOp.ordinal().getValue().getLimitedValue()] = rodataOp;
+  }
+  SmallVector<RodataRef> rodataRefs;
+  rodataRefs.resize(rodataOps.size());
+  for (auto &rodataOp : rodataOps) {
+    auto rodataValue =
+        rodataOp.value().dyn_cast<IREE::Util::SerializableAttrInterface>();
+    assert(rodataValue && "expected a serializable rodata value");
+
+    // Split large rodata out of the FlatBuffer to avoid going over 2GB.
+    // We also route any rodata that has a mime type defined so that it's
+    // easier to work with as a user.
+    uint64_t actualSize = rodataValue.getStorageSize();
+    bool storeExternal =
+        archiveWriter->supportsFiles() &&
+        (rodataOp.mime_type().hasValue() || actualSize >= kMaxEmbeddedDataSize);
+
+    RodataRef rodataRef;
+    rodataRef.rodataOp = rodataOp;
+    rodataRef.alignment = rodataOp.alignment() ? rodataOp.alignment().getValue()
+                                               : kDefaultRodataAlignment;
+    rodataRef.totalSize = static_cast<uint64_t>(actualSize);
+    if (storeExternal) {
+      std::string fileName =
+          (rodataOp.getName() +
+           mimeTypeToFileExtension(rodataOp.mime_type().getValueOr("")))
+              .str();
+      rodataRef.archiveFile = archiveWriter->declareFile(
+          fileName, rodataRef.alignment, rodataRef.totalSize,
+          [=](llvm::raw_ostream &os) {
+            return rodataValue.serializeToStream(
+                llvm::support::endianness::little, os);
+          });
+    }
+    rodataRefs[rodataOp.ordinal().getValue().getLimitedValue()] = rodataRef;
+  }
+
   // NOTE: we order things so that all of the metadata is close to the start of
   // the module header in memory. This ensures that when we map the file only
   // the first few pages need to be accessed to get the metadata and the rest
   // can be large bulk data.
   FlatbufferBuilder fbb;
-  SmallVector<ZIPFileRef> zipFileRefs;
-  if (failed(buildFlatBufferModule(targetOptions, moduleOp, zipFileRefs,
-                                   emitPolyglotZip, fbb))) {
-    return moduleOp.emitError()
-           << "failed to build FlatBuffer BytecodeModuleDef";
+  if (failed(buildFlatBufferModule(targetOptions, moduleOp, rodataRefs, fbb))) {
+    return failure();
   }
-
-  switch (targetOptions.outputFormat) {
-    case BytecodeOutputFormat::kFlatBufferBinary:
-      if (failed(fbb.copyToStream(output))) {
-        return moduleOp.emitError()
-               << "failed to copy flatbuffer emitter contents to output stream "
-                  "- possibly out of memory";
-      }
-      break;
-    case BytecodeOutputFormat::kFlatBufferText: {
-      if (failed(fbb.printJsonToStream(/*pretty=*/true,
-                                       /*includeDefaults=*/false,
-                                       bytecode_module_def_print_json,
-                                       output))) {
-        return moduleOp.emitError()
-               << "failed to print flatbuffer emitter contents to output "
-                  "stream - possibly out of memory, possibly unprintable "
-                  "structure";
-      }
-      break;
-    }
-    default:
-      assert(false && "unimplemented output format");
+  if (failed(archiveWriter->flush(fbb))) {
+    return failure();
   }
-  output.flush();
+  archiveWriter.reset();
 
-  if (emitPolyglotZip) {
-    // Append the ZIP central directory to the end of the output.
-    // We have to do this here as we need to have flushed the flatbuffer
-    // contents to the output so that we have their final absolute addresses.
-    uint64_t endOffset = output.tell();
-    appendZIPCentralDirectory(zipFileRefs, startOffset, endOffset, output);
-  }
-
-  output.flush();
   return success();
 }
 

diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/CMakeLists.txt b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/CMakeLists.txt
index 5f4fe20..6484f0b 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/CMakeLists.txt

@@ -16,6 +16,8 @@
   HDRS
     "BytecodeModuleTarget.h"
   SRCS
+    "ArchiveWriter.cpp"
+    "ArchiveWriter.h"
     "BytecodeEncoder.cpp"
     "BytecodeEncoder.h"
     "BytecodeModuleTarget.cpp"

diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/test/constant_encoding.mlir b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/test/constant_encoding.mlir
index 421d8d0..c931f4e 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/test/constant_encoding.mlir
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/test/constant_encoding.mlir

@@ -9,14 +9,14 @@
 
   // CHECK: "rodata_segments": [{
 
-  //      CHECK: "data": [
+  //      CHECK: "embedded_data": [
   // CHECK-NEXT:   1,
   // CHECK-NEXT:   2,
   // CHECK-NEXT:   3
   // CHECK-NEXT: ]
   vm.rodata private @dense_i8s dense<[1, 2, 3]> : tensor<3xi8>
 
-  //      CHECK: "data": [
+  //      CHECK: "embedded_data": [
   // CHECK-NEXT:   0,
   // CHECK-NEXT:   0,
   // CHECK-NEXT:   128,
@@ -32,7 +32,7 @@
   // CHECK-NEXT: ]
   vm.rodata private @dense_float32s dense<[1.000000e+00, 2.000000e+00, 3.000000e+00]> : tensor<3xf32>
 
-  //      CHECK: "data": [
+  //      CHECK: "embedded_data": [
   // CHECK-NEXT:   0,
   // CHECK-NEXT:   0,
   // CHECK-NEXT:   128,
@@ -48,7 +48,7 @@
   // CHECK-NEXT: ]
   vm.rodata private @splat_float32s dense<1.000000e+00> : tensor<3xf32>
 
-  //      CHECK: "data": [
+  //      CHECK: "embedded_data": [
   // CHECK-NEXT:   0,
   // CHECK-NEXT:   60,
   // CHECK-NEXT:   0,

diff --git a/compiler/src/iree/compiler/Translation/test/hal_executable.mlir b/compiler/src/iree/compiler/Translation/test/hal_executable.mlir
index 1dc4f61..25c463e 100644
--- a/compiler/src/iree/compiler/Translation/test/hal_executable.mlir
+++ b/compiler/src/iree/compiler/Translation/test/hal_executable.mlir

@@ -63,4 +63,4 @@
 }
 
 // Just check that there's the expected flatbuffers prefix bytes.
-// CHECK: hal.executable.binary public @vmvx_bytecode_fb attributes {data = dense<"0x08000000
+// CHECK: hal.executable.binary public @vmvx_bytecode_fb attributes {data = dense<{{.+}}> : vector<{{.+}}xi8>, format = "vmvx-bytecode-fb"}

diff --git a/compiler/src/iree/compiler/Utils/FlatbufferUtils.cpp b/compiler/src/iree/compiler/Utils/FlatbufferUtils.cpp
index 3a00885..0da18c5 100644
--- a/compiler/src/iree/compiler/Utils/FlatbufferUtils.cpp
+++ b/compiler/src/iree/compiler/Utils/FlatbufferUtils.cpp

@@ -88,11 +88,14 @@
   return success();
 }
 
-LogicalResult FlatbufferBuilder::printJsonToStream(
-    bool pretty, bool includeDefaults, print_json_fn_t print_json_fn,
-    llvm::raw_ostream &output) {
-  // The printer requires direct access to the flatbuffer bytes so clone here.
+LogicalResult FlatbufferBuilder::printJsonToStream(bool pretty,
+                                                   bool includeDefaults,
+                                                   print_json_fn_t printJsonFn,
+                                                   llvm::raw_ostream &output) {
+  // The printer requires direct access to the FlatBuffer bytes so clone here.
   auto bufferData = cloneBufferIntoContiguousBytes(*this);
+  auto moduleData = ArrayRef<uint8_t>(bufferData.data(), bufferData.size())
+                        .drop_front(sizeof(flatbuffers_uoffset_t));
 
   flatcc_json_printer_t printer;
   flatcc_json_printer_init_dynamic_buffer(&printer, /*buffer_size=*/0);
@@ -102,8 +105,8 @@
 
   // Print into the dynamically-resizing buffer. May fail if OOM.
   int rv =
-      print_json_fn(&printer, reinterpret_cast<const char *>(bufferData.data()),
-                    bufferData.size());
+      printJsonFn(&printer, reinterpret_cast<const char *>(moduleData.data()),
+                  moduleData.size());
   if (rv == -1) {
     flatcc_json_printer_clear(&printer);
     return failure();

diff --git a/compiler/src/iree/compiler/Utils/FlatbufferUtils.h b/compiler/src/iree/compiler/Utils/FlatbufferUtils.h
index f4b003c..9bfffff 100644
--- a/compiler/src/iree/compiler/Utils/FlatbufferUtils.h
+++ b/compiler/src/iree/compiler/Utils/FlatbufferUtils.h

@@ -143,7 +143,7 @@
   // referencing the same bytes; meaning that this can't be used to verify that
   // we are correctly memoizing strings/structures/etc.
   LogicalResult printJsonToStream(bool pretty, bool includeDefaults,
-                                  print_json_fn_t print_json_fn,
+                                  print_json_fn_t printJsonFn,
                                   llvm::raw_ostream &output);
 
  private:

diff --git a/runtime/src/iree/vm/bytecode_module.c b/runtime/src/iree/vm/bytecode_module.c
index c74717c..25f37b2 100644
--- a/runtime/src/iree/vm/bytecode_module.c
+++ b/runtime/src/iree/vm/bytecode_module.c

@@ -15,7 +15,143 @@
 #include "iree/vm/api.h"
 #include "iree/vm/bytecode_module_impl.h"
 
-// Perform an strcmp between a flatbuffers string and an IREE string view.
+// Alignment applied to each segment of the archive.
+// All embedded file contents (FlatBuffers, rodata, etc) are aligned to this
+// boundary.
+#define IREE_VM_ARCHIVE_SEGMENT_ALIGNMENT 64
+
+// ZIP local file header (comes immediately before each file in the archive).
+// In order to find the starting offset of the FlatBuffer in a polyglot archive
+// we need to parse this given the variable-length nature of it (we want to
+// be robust to file name and alignment changes).
+//
+// NOTE: all fields are little-endian.
+// NOTE: we don't care about the actual module size here; since we support
+//       streaming archives trying to recover it would require much more
+//       involved processing (we'd need to reference the central directory).
+//       If we wanted to support users repacking ZIPs we'd probably want to
+//       rewrite everything as we store offsets in the FlatBuffer that are
+//       difficult to update after the archive has been produced.
+#define ZIP_LOCAL_FILE_HEADER_SIGNATURE 0x04034B50u
+#if defined(IREE_COMPILER_MSVC)
+#pragma pack(push, 1)
+#endif  // IREE_COMPILER_MSVC
+typedef struct {
+  uint32_t signature;  // ZIP_LOCAL_FILE_HEADER_SIGNATURE
+  uint16_t version;
+  uint16_t general_purpose_flag;
+  uint16_t compression_method;
+  uint16_t last_modified_time;
+  uint16_t last_modified_date;
+  uint32_t crc32;              // 0 for us
+  uint32_t compressed_size;    // 0 for us
+  uint32_t uncompressed_size;  // 0 for us
+  uint16_t file_name_length;
+  uint16_t extra_field_length;
+  // file name (variable size)
+  // extra field (variable size)
+} IREE_ATTRIBUTE_PACKED zip_local_file_header_t;
+#if defined(IREE_COMPILER_MSVC)
+#pragma pack(pop)
+#endif  // IREE_COMPILER_MSVC
+static_assert(sizeof(zip_local_file_header_t) == 30, "bad packing");
+#if !defined(IREE_ENDIANNESS_LITTLE) || !IREE_ENDIANNESS_LITTLE
+#error "little endian required for zip header parsing"
+#endif  // IREE_ENDIANNESS_LITTLE
+
+// Strips any ZIP local file header from |contents| and stores the remaining
+// range in |out_stripped|.
+static iree_status_t iree_vm_bytecode_module_strip_zip_header(
+    iree_const_byte_span_t contents, iree_const_byte_span_t* out_stripped) {
+  // Ensure there's at least some bytes we can check for the header.
+  // Since we're only looking to strip zip stuff here we can check on that.
+  if (!contents.data ||
+      contents.data_length < sizeof(zip_local_file_header_t)) {
+    memmove(out_stripped, &contents, sizeof(contents));
+    return iree_ok_status();
+  }
+
+  // Check to see if there's a zip local header signature.
+  // For a compliant zip file this is expected to start at offset 0.
+  const zip_local_file_header_t* header =
+      (const zip_local_file_header_t*)contents.data;
+  if (header->signature != ZIP_LOCAL_FILE_HEADER_SIGNATURE) {
+    // No signature found, probably not a ZIP.
+    memmove(out_stripped, &contents, sizeof(contents));
+    return iree_ok_status();
+  }
+
+  // Compute the starting offset of the file.
+  // Note that we still don't know (or care) if it's the file we want; actual
+  // FlatBuffer verification happens later on.
+  uint32_t offset =
+      sizeof(*header) + header->file_name_length + header->extra_field_length;
+  if (offset > contents.data_length) {
+    // Is a ZIP but doesn't have enough data; error out with something more
+    // useful than the FlatBuffer verification failing later on given that here
+    // we know this isn't a FlatBuffer.
+    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                            "archive self-reports as a zip but does not have "
+                            "enough data to contain a module");
+  }
+
+  *out_stripped = iree_make_const_byte_span(contents.data + offset,
+                                            contents.data_length - offset);
+  return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_bytecode_module_parse_header(
+    iree_const_byte_span_t archive_contents,
+    iree_const_byte_span_t* out_flatbuffer_contents,
+    iree_host_size_t* out_rodata_offset) {
+  // Slice off any polyglot zip header we have prior to the base of the module.
+  iree_const_byte_span_t module_contents = iree_const_byte_span_empty();
+  IREE_RETURN_IF_ERROR(iree_vm_bytecode_module_strip_zip_header(
+      archive_contents, &module_contents));
+
+  // Verify there's enough data to safely check the FlatBuffer header.
+  if (!module_contents.data || module_contents.data_length < 16) {
+    return iree_make_status(
+        IREE_STATUS_INVALID_ARGUMENT,
+        "FlatBuffer data is not present or less than 16 bytes (%zu total)",
+        module_contents.data_length);
+  }
+
+  // Read the size prefix from the head of the module contents; this should be
+  // a 4 byte value indicating the total size of the FlatBuffer data.
+  size_t length_prefix = 0;
+  flatbuffers_read_size_prefix((void*)module_contents.data, &length_prefix);
+
+  // Verify the length prefix is within bounds (always <= the remaining module
+  // bytes).
+  size_t length_remaining =
+      module_contents.data_length - sizeof(flatbuffers_uoffset_t);
+  if (length_prefix > length_remaining) {
+    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                            "FlatBuffer length prefix out of bounds (prefix is "
+                            "%zu but only %zu available)",
+                            length_prefix, length_remaining);
+  }
+
+  // Form the range of bytes containing just the FlatBuffer data.
+  iree_const_byte_span_t flatbuffer_contents = iree_make_const_byte_span(
+      module_contents.data + sizeof(flatbuffers_uoffset_t), length_prefix);
+
+  if (out_flatbuffer_contents) {
+    *out_flatbuffer_contents = flatbuffer_contents;
+  }
+  if (out_rodata_offset) {
+    // rodata begins immediately following the FlatBuffer in memory.
+    iree_host_size_t rodata_offset = iree_host_align(
+        (iree_host_size_t)(flatbuffer_contents.data - archive_contents.data) +
+            length_prefix,
+        IREE_VM_ARCHIVE_SEGMENT_ALIGNMENT);
+    *out_rodata_offset = rodata_offset;
+  }
+  return iree_ok_status();
+}
+
+// Perform an strcmp between a FlatBuffers string and an IREE string view.
 static bool iree_vm_flatbuffer_strcmp(flatbuffers_string_t lhs,
                                       iree_string_view_t rhs) {
   size_t lhs_size = flatbuffers_string_len(lhs);
@@ -100,46 +236,27 @@
   return status;
 }
 
-// Computes the total length of the flatbuffer and the base offset for any
-// concatenated rodata.
-static iree_host_size_t iree_vm_bytecode_module_flatbuffer_rodata_offset(
-    iree_const_byte_span_t flatbuffer_data) {
-  if (flatbuffer_data.data_length < sizeof(flatbuffers_uoffset_t)) return 0;
-  size_t external_rodata_offset = 0;
-  flatbuffers_read_size_prefix((void*)flatbuffer_data.data,
-                               &external_rodata_offset);
-  external_rodata_offset += sizeof(flatbuffers_uoffset_t);
-  return iree_host_align(external_rodata_offset, 128);
-}
-
-// Verifies the structure of the flatbuffer so that we can avoid doing so during
+// Verifies the structure of the FlatBuffer so that we can avoid doing so during
 // runtime. There are still some conditions we must be aware of (such as omitted
 // names on functions with internal linkage), however we shouldn't need to
-// bounds check anything within the flatbuffer after this succeeds.
+// bounds check anything within the FlatBuffer after this succeeds.
 static iree_status_t iree_vm_bytecode_module_flatbuffer_verify(
-    iree_const_byte_span_t flatbuffer_data) {
-  if (!flatbuffer_data.data || flatbuffer_data.data_length < 16) {
-    return iree_make_status(
-        IREE_STATUS_INVALID_ARGUMENT,
-        "flatbuffer data is not present or less than 16 bytes (%zu total)",
-        flatbuffer_data.data_length);
-  }
-
+    iree_const_byte_span_t archive_contents,
+    iree_const_byte_span_t flatbuffer_contents,
+    iree_host_size_t archive_rodata_offset) {
   // Run flatcc generated verification. This ensures all pointers are in-bounds
   // and that we can safely walk the file, but not that the actual contents of
-  // the flatbuffer meet our expectations.
+  // the FlatBuffer meet our expectations.
   int verify_ret = iree_vm_BytecodeModuleDef_verify_as_root(
-      flatbuffer_data.data, flatbuffer_data.data_length);
+      flatbuffer_contents.data, flatbuffer_contents.data_length);
   if (verify_ret != flatcc_verify_ok) {
     return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
                             "flatbuffer verification failed: %s",
                             flatcc_verify_error_string(verify_ret));
   }
 
-  const iree_host_size_t external_rodata_offset =
-      iree_vm_bytecode_module_flatbuffer_rodata_offset(flatbuffer_data);
   iree_vm_BytecodeModuleDef_table_t module_def =
-      iree_vm_BytecodeModuleDef_as_root(flatbuffer_data.data);
+      iree_vm_BytecodeModuleDef_as_root(flatbuffer_contents.data);
 
   flatbuffers_string_t name = iree_vm_BytecodeModuleDef_name(module_def);
   if (!flatbuffers_string_len(name)) {
@@ -170,13 +287,13 @@
     if (iree_vm_RodataSegmentDef_embedded_data_is_present(segment)) {
       continue;  // embedded data is verified by FlatBuffers
     }
-    uint64_t rodata_offset =
+    uint64_t segment_offset =
         iree_vm_RodataSegmentDef_external_data_offset(segment);
-    uint64_t rodata_length =
+    uint64_t segment_length =
         iree_vm_RodataSegmentDef_external_data_length(segment);
-    uint64_t rodata_end =
-        external_rodata_offset + rodata_offset + rodata_length;
-    if (rodata_end >= flatbuffer_data.data_length) {
+    uint64_t segment_end =
+        archive_rodata_offset + segment_offset + segment_length;
+    if (segment_end > archive_contents.data_length) {
       return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
                               "rodata[%zu] external reference out of range", i);
     }
@@ -321,10 +438,11 @@
   iree_vm_bytecode_module_t* module = (iree_vm_bytecode_module_t*)self;
   IREE_TRACE_ZONE_BEGIN(z0);
 
-  iree_allocator_free(module->flatbuffer_allocator,
-                      (void*)module->flatbuffer_data.data);
-  module->flatbuffer_data = iree_make_const_byte_span(NULL, 0);
-  module->flatbuffer_allocator = iree_allocator_null();
+  module->def = NULL;
+  iree_allocator_free(module->archive_allocator,
+                      (void*)module->archive_contents.data);
+  module->archive_contents = iree_const_byte_span_empty();
+  module->archive_allocator = iree_allocator_null();
 
   iree_allocator_free(module->allocator, module);
 
@@ -763,8 +881,6 @@
   iree_vm_bytecode_module_layout_state(module_def, state);
 
   // Setup rodata segments to point directly at the FlatBuffer memory.
-  const iree_host_size_t external_rodata_offset =
-      iree_vm_bytecode_module_flatbuffer_rodata_offset(module->flatbuffer_data);
   iree_vm_RodataSegmentDef_vec_t rodata_segments =
       iree_vm_BytecodeModuleDef_rodata_segments(module_def);
   for (int i = 0; i < state->rodata_ref_count; ++i) {
@@ -781,7 +897,8 @@
       // Data is concatenated with the FlatBuffer at some relative offset.
       // Note that we've already verified the referenced range is in bounds.
       byte_span = iree_make_byte_span(
-          (uint8_t*)module->flatbuffer_data.data + external_rodata_offset +
+          (uint8_t*)module->archive_contents.data +
+              module->archive_rodata_offset +
               iree_vm_RodataSegmentDef_external_data_offset(segment),
           iree_vm_RodataSegmentDef_external_data_length(segment));
     }
@@ -920,16 +1037,22 @@
 }
 
 IREE_API_EXPORT iree_status_t iree_vm_bytecode_module_create(
-    iree_const_byte_span_t flatbuffer_data,
-    iree_allocator_t flatbuffer_allocator, iree_allocator_t allocator,
-    iree_vm_module_t** out_module) {
+    iree_const_byte_span_t archive_contents, iree_allocator_t archive_allocator,
+    iree_allocator_t allocator, iree_vm_module_t** out_module) {
   IREE_TRACE_ZONE_BEGIN(z0);
   IREE_ASSERT_ARGUMENT(out_module);
   *out_module = NULL;
 
+  // Parse and verify the archive header to locate the FlatBuffer.
+  iree_const_byte_span_t flatbuffer_contents = iree_const_byte_span_empty();
+  iree_host_size_t archive_rodata_offset = 0;
+  IREE_RETURN_AND_END_ZONE_IF_ERROR(
+      z0, iree_vm_bytecode_module_parse_header(
+              archive_contents, &flatbuffer_contents, &archive_rodata_offset));
+
   IREE_TRACE_ZONE_BEGIN_NAMED(z1, "iree_vm_bytecode_module_flatbuffer_verify");
-  iree_status_t status =
-      iree_vm_bytecode_module_flatbuffer_verify(flatbuffer_data);
+  iree_status_t status = iree_vm_bytecode_module_flatbuffer_verify(
+      archive_contents, flatbuffer_contents, archive_rodata_offset);
   if (!iree_status_is_ok(status)) {
     IREE_TRACE_ZONE_END(z1);
     IREE_TRACE_ZONE_END(z0);
@@ -938,7 +1061,7 @@
   IREE_TRACE_ZONE_END(z1);
 
   iree_vm_BytecodeModuleDef_table_t module_def =
-      iree_vm_BytecodeModuleDef_as_root(flatbuffer_data.data);
+      iree_vm_BytecodeModuleDef_as_root(flatbuffer_contents.data);
   if (!module_def) {
     IREE_TRACE_ZONE_END(z0);
     return iree_make_status(
@@ -968,8 +1091,9 @@
   module->bytecode_data = iree_make_const_byte_span(
       bytecode_data, flatbuffers_uint8_vec_len(bytecode_data));
 
-  module->flatbuffer_data = flatbuffer_data;
-  module->flatbuffer_allocator = flatbuffer_allocator;
+  module->archive_contents = archive_contents;
+  module->archive_allocator = archive_allocator;
+  module->archive_rodata_offset = archive_rodata_offset;
   module->def = module_def;
 
   module->type_count = iree_vm_TypeDef_vec_len(type_defs);

diff --git a/runtime/src/iree/vm/bytecode_module.h b/runtime/src/iree/vm/bytecode_module.h
index ed7bc04..e158d40 100644
--- a/runtime/src/iree/vm/bytecode_module.h
+++ b/runtime/src/iree/vm/bytecode_module.h

@@ -16,14 +16,22 @@
 extern "C" {
 #endif  // __cplusplus
 
-// Creates a VM module from an in-memory ModuleDef FlatBuffer.
-// If a |flatbuffer_allocator| is provided then it will be used to free the
-// |flatbuffer_data| when the module is destroyed and otherwise the ownership of
-// the flatbuffer_data remains with the caller.
+// Creates a VM module from an in-memory ModuleDef FlatBuffer archive.
+// If a |archive_allocator| is provided then it will be used to free the
+// |archive_contents| when the module is destroyed and otherwise the ownership
+// of the memory remains with the caller.
 IREE_API_EXPORT iree_status_t iree_vm_bytecode_module_create(
-    iree_const_byte_span_t flatbuffer_data,
-    iree_allocator_t flatbuffer_allocator, iree_allocator_t allocator,
-    iree_vm_module_t** out_module);
+    iree_const_byte_span_t archive_contents, iree_allocator_t archive_allocator,
+    iree_allocator_t allocator, iree_vm_module_t** out_module);
+
+// Parses the module archive header in |archive_contents|.
+// The subrange containing the FlatBuffer data is returned as well as the
+// offset where external rodata begins. Note that archives may have
+// non-contiguous layouts!
+IREE_API_EXPORT iree_status_t iree_vm_bytecode_module_parse_header(
+    iree_const_byte_span_t archive_contents,
+    iree_const_byte_span_t* out_flatbuffer_contents,
+    iree_host_size_t* out_rodata_offset);
 
 #ifdef __cplusplus
 }  // extern "C"

diff --git a/runtime/src/iree/vm/bytecode_module_impl.h b/runtime/src/iree/vm/bytecode_module_impl.h
index 57b1ac3..f487200 100644
--- a/runtime/src/iree/vm/bytecode_module_impl.h
+++ b/runtime/src/iree/vm/bytecode_module_impl.h

@@ -69,9 +69,16 @@
   // Allocator this module was allocated with and must be freed with.
   iree_allocator_t allocator;
 
-  // Underlying FlatBuffer data and allocator (which may be null).
-  iree_const_byte_span_t flatbuffer_data;
-  iree_allocator_t flatbuffer_allocator;
+  // Underlying archive data and allocator (which may be null).
+  iree_const_byte_span_t archive_contents;
+  iree_allocator_t archive_allocator;
+
+  // Offset into the archive data where external read-only data begins.
+  // This is added to any relative rodata reference in the FlatBuffer to get the
+  // aligned physical offset where content is located.
+  iree_host_size_t archive_rodata_offset;
+
+  // Loaded FlatBuffer module pointing into the archive contents.
   iree_vm_BytecodeModuleDef_table_t def;
 
   // Type table mapping module type IDs to registered VM types.

diff --git a/tools/BUILD b/tools/BUILD
index 231b532..cdbf082 100644
--- a/tools/BUILD
+++ b/tools/BUILD

@@ -85,6 +85,7 @@
         "//runtime/src/iree/base/internal:file_io",
         "//runtime/src/iree/base/internal/flatcc:debugging",
         "//runtime/src/iree/schemas:bytecode_module_def_c_fbs",
+        "//runtime/src/iree/vm:bytecode_module",
     ],
 )
 

diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index d44da6d..af4e5c5 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt

@@ -106,6 +106,7 @@
     iree::base::internal::file_io
     iree::base::internal::flatcc::debugging
     iree::schemas::bytecode_module_def_c_fbs
+    iree::vm::bytecode_module
 )
 
 iree_cc_binary(

diff --git a/tools/iree-dump-module-main.c b/tools/iree-dump-module-main.c
index 70adcbd..c4e7568 100644
--- a/tools/iree-dump-module-main.c
+++ b/tools/iree-dump-module-main.c

@@ -9,6 +9,7 @@
 #include "iree/base/api.h"
 #include "iree/base/internal/file_io.h"
 #include "iree/schemas/bytecode_module_def_json_printer.h"
+#include "iree/vm/bytecode_module.h"
 
 // Today we just print to JSON. We could do something more useful (size
 // analysis, etc), but JSON should be enough.
@@ -22,20 +23,25 @@
     return 1;
   }
 
-  iree_file_contents_t* flatbuffer_contents = NULL;
+  iree_file_contents_t* file_contents = NULL;
   IREE_CHECK_OK(iree_file_read_contents(argv[1], iree_allocator_system(),
-                                        &flatbuffer_contents));
+                                        &file_contents));
+
+  iree_const_byte_span_t flatbuffer_contents = iree_const_byte_span_empty();
+  IREE_CHECK_OK(iree_vm_bytecode_module_parse_header(
+      file_contents->const_buffer, &flatbuffer_contents,
+      /*out_rodata_offset=*/NULL));
 
   // Print direct to stdout.
   flatcc_json_printer_t printer;
   flatcc_json_printer_init(&printer, /*fp=*/NULL);
   flatcc_json_printer_set_skip_default(&printer, true);
-  bytecode_module_def_print_json(
-      &printer, (const char*)flatbuffer_contents->const_buffer.data,
-      flatbuffer_contents->const_buffer.data_length);
+  bytecode_module_def_print_json(&printer,
+                                 (const char*)flatbuffer_contents.data,
+                                 flatbuffer_contents.data_length);
   flatcc_json_printer_clear(&printer);
 
-  iree_file_contents_free(flatbuffer_contents);
+  iree_file_contents_free(file_contents);
 
   return 0;
 }
commit	9a286397f776542f9da23b03eaea5be0d5ed870d	[log] [tgz]
author	Ben Vanik <ben.vanik@gmail.com>	Fri May 20 19:45:23 2022 -0700
committer	Ben Vanik <ben.vanik@gmail.com>	Thu May 26 14:22:29 2022 -0700
tree	70d82d72429adc51d66b2ba123e492868976d287
parent	41de9e65fafc3439e2759d060a811b8de2fbaa49 [diff]