iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp - 3p/openxla/iree - Git at Google

 // Copyright 2019 The IREE Authors
 //
 // Licensed under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 #include "iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.h"

 #include <algorithm>

 #include "iree/compiler/Dialect/Util/IR/UtilOps.h"
 #include "iree/compiler/Dialect/Util/IR/UtilTypes.h"
 #include "iree/compiler/Dialect/Util/Transforms/Passes.h"
 #include "iree/compiler/Dialect/VM/Analysis/RegisterAllocation.h"
 #include "iree/compiler/Dialect/VM/Analysis/ValueLiveness.h"
 #include "iree/compiler/Dialect/VM/IR/VMDialect.h"
 #include "iree/compiler/Dialect/VM/IR/VMOps.h"
 #include "iree/compiler/Dialect/VM/Target/Bytecode/BytecodeEncoder.h"
 #include "iree/compiler/Dialect/VM/Target/Bytecode/ConstantEncoder.h"
 #include "iree/compiler/Dialect/VM/Transforms/Passes.h"
 #include "iree/compiler/Dialect/VM/Utils/CallingConvention.h"
 #include "iree/compiler/Utils/FlatbufferUtils.h"
 #include "iree/compiler/Utils/TracingUtils.h"
 #include "iree/schemas/bytecode_module_def_builder.h"
 #include "iree/schemas/bytecode_module_def_json_printer.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Diagnostics.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/IR/Visitors.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "mlir/Transforms/Passes.h"
 #include "mlir/Translation.h"

 namespace mlir {
 namespace iree_compiler {
 namespace IREE {
 namespace VM {

 namespace {

 using namespace llvm::support;

 struct TypeDef {
   Type type;
   std::string full_name;
 };

 LLVM_PACKED_START
 struct ZIPEndOfCentralDirectoryRecord {
   ulittle32_t signature;  // 0x06054B50
   ulittle16_t diskNumber;
   ulittle16_t startDiskNumber;
   ulittle16_t entriesOnDisk;
   ulittle16_t entryCount;
   ulittle32_t directorySize;
   ulittle32_t directoryOffset;
   ulittle16_t commentLength;
   // comment (variable size)
 };
 static_assert(sizeof(ZIPEndOfCentralDirectoryRecord) == 22, "bad packing");
 struct ZIPCentralDirectoryRecord {
   ulittle32_t signature;  // 0x02014B50
   ulittle16_t versionMadeBy;
   ulittle16_t versionToExtract;
   ulittle16_t generalPurposeFlags;
   ulittle16_t compressionMethod;
   ulittle16_t lastModifiedTime;
   ulittle16_t lastModifiedDate;
   ulittle32_t crc32;
   ulittle32_t compressedSize;
   ulittle32_t uncompressedSize;
   ulittle16_t fileNameLength;
   ulittle16_t extraFieldLength;
   ulittle16_t fileCommentLength;
   ulittle16_t diskStartNumber;
   ulittle16_t internalFileAttributes;
   ulittle32_t externalFileAttributes;
   ulittle32_t localHeaderOffset;
   // file name (variable size)
   // extra field (variable size)
   // file comment (variable size)
 };
 static_assert(sizeof(ZIPCentralDirectoryRecord) == 46, "bad packing");
 struct ZIPLocalFileHeader {
   ulittle32_t signature;  // 0x04034B50
   ulittle16_t versionToExtract;
   ulittle16_t generalPurposeFlag;
   ulittle16_t compressionMethod;
   ulittle16_t lastModifiedTime;
   ulittle16_t lastModifiedDate;
   ulittle32_t crc32;
   ulittle32_t compressedSize;
   ulittle32_t uncompressedSize;
   ulittle16_t fileNameLength;
   ulittle16_t extraFieldLength;
   // file name (variable size)
   // extra field (variable size)
 };
 static_assert(sizeof(ZIPLocalFileHeader) == 30, "bad packing");
 struct ZIPExtraFieldHeader {
   ulittle16_t id;
   ulittle16_t size;
 };
 static_assert(sizeof(ZIPExtraFieldHeader) == 4, "bad packing");
 LLVM_PACKED_END

 // A ZIP file reference into the flatbuffer output data.
 struct ZIPFileRef {
   // Offset of the local file header in the flatbuffer. Relative to the end of
   // the file.
   flatcc_builder_ref_t localHeaderOffset;
   // Name of the file used within the ZIP archive.
   std::string fileName;
   // Total size, in bytes, of the file uncompressed.
   uint32_t totalSize;
   // CRC32 of the file.
   uint32_t crc32;
   // Extra field padding (total).
   uint16_t paddingLength;
 };

 // TODO(benvanik): figure out why we need to offset all flatbuffer refs by this
 // value in order to get proper absolute file offsets. The current value used
 // here was derived empirically and is like a combination of the flatbuffer
 // file prefix and some alignment.
 static constexpr int kZIPMagicLocalOffset = 90;

 // Gets a file extension based on the given |mimeType| that can be used to help
 // applications guess the file type of embedded data.
 static StringRef mimeTypeToFileExtension(StringRef mimeType) {
   return StringSwitch<StringRef>(mimeType)
       .Case("application/x-flatbuffers", ".fb")
       .Case("application/x-elf", ".so")
       .Default(".bin");
 }

 // Appends a ZIP local file header at the current location.
 // The header is a prefix to the actual rodata contents. ZIP requires that the
 // payload start immediately after the header but we have the flatbuffer header
 // there. To skip over the flatbuffer data we pad out the header with a dummy
 // extra data field that lets us control the length.
 //
 //  [zip local file header] + 4 byte suffix length
 //  [flatbuffer vector header] (4 bytes)
 //  [payload]
 static ZIPFileRef appendZIPLocalFileHeader(IREE::VM::RodataOp rodataOp,
                                            size_t rodataSize, uint32_t crc32,
                                            FlatbufferBuilder &fbb) {
   // Use the mime type to map to a file extension.
   std::string fileName =
       (rodataOp.getName() +
        mimeTypeToFileExtension(rodataOp.mime_type().getValueOr("")))
           .str();

   // The data is stored in the flatbuffer prefixed with a vector header of
   // a 32-bit byte count. We need to ignore this when computing the CRC as we
   // want only the payload to be visible in the ZIP.
   size_t vectorPrefixLength = sizeof(uint32_t);

   // header + file name + extra field header
   size_t totalHeaderLength = sizeof(ZIPLocalFileHeader) + fileName.size() +
                              sizeof(ZIPExtraFieldHeader);

   // Append local file header.
   auto *header = reinterpret_cast<ZIPLocalFileHeader *>(
       flatcc_builder_start_struct(fbb, totalHeaderLength, 1));
   header->signature = 0x04034B50u;
   header->versionToExtract = 0;
   header->generalPurposeFlag = 0;
   header->compressionMethod = 0;  // COMP_STORED
   header->lastModifiedTime = 0;
   header->lastModifiedDate = 0;
   header->crc32 = crc32;
   header->compressedSize = static_cast<uint32_t>(rodataSize);
   header->uncompressedSize = static_cast<uint32_t>(rodataSize);
   header->fileNameLength = static_cast<uint16_t>(fileName.size());
   header->extraFieldLength = sizeof(ZIPExtraFieldHeader) + vectorPrefixLength;
   char *fileNamePtr = reinterpret_cast<char *>(header + 1);
   memcpy(fileNamePtr, fileName.data(), fileName.size());
   auto *extraField =
       reinterpret_cast<ZIPExtraFieldHeader *>(fileNamePtr + fileName.size());
   extraField->id = 0xFECAu;
   extraField->size = static_cast<uint16_t>(vectorPrefixLength);
   flatcc_builder_ref_t relativeHeaderOffset = flatcc_builder_end_struct(fbb);

   ZIPFileRef fileRef;
   fileRef.localHeaderOffset = relativeHeaderOffset;
   fileRef.fileName = std::move(fileName);
   fileRef.totalSize = static_cast<uint32_t>(rodataSize);
   fileRef.crc32 = crc32;
   fileRef.paddingLength = static_cast<uint16_t>(vectorPrefixLength);
   return fileRef;
 }

 // Appends a ZIP central directory to |output| with the references to all of
 // |zipFileRefs| with offsets applied. |startOffset| and |endOffset| define the
 // absolute offsets into |output| of the flatbuffer data.
 //
 // The technique used here is the same as that used in self-extracting archives:
 // byte offset 0 of the file will contain the native format header (like the
 // flatbuffers file identifier) and a ZIP application will need to scan from the
 // back of the file to find the ZIP central directory. This often means that
 // naming the file .zip will not work: most ZIP applications will try to find
 // a PK header at byte 0.
 static void appendZIPCentralDirectory(ArrayRef<ZIPFileRef> zipFileRefs,
                                       uint64_t startOffset, uint64_t endOffset,
                                       llvm::raw_ostream &output) {
   // Append the central directory, which contains the local file headers with
   // some extra junk and references back to where the local headers are in the
   // file.
   uint64_t centralDirectoryStartOffset = output.tell();
   for (auto zipFileRef : zipFileRefs) {
     // Fixed-size header.
     ZIPCentralDirectoryRecord cdr;
     cdr.signature = 0x02014B50u;
     cdr.versionMadeBy = 798;
     cdr.versionToExtract = 20;
     cdr.generalPurposeFlags = 0;
     cdr.compressionMethod = 0;  // COMP_STORED
     cdr.lastModifiedTime = 0;
     cdr.lastModifiedDate = 0;
     cdr.crc32 = zipFileRef.crc32;
     cdr.compressedSize = zipFileRef.totalSize;
     cdr.uncompressedSize = zipFileRef.totalSize;
     cdr.fileNameLength = static_cast<uint16_t>(zipFileRef.fileName.size());
     cdr.extraFieldLength =
         sizeof(ZIPExtraFieldHeader) + zipFileRef.paddingLength;
     cdr.fileCommentLength = 0;
     cdr.diskStartNumber = 0;
     cdr.internalFileAttributes = 0;
     cdr.externalFileAttributes = 0;
     cdr.localHeaderOffset = static_cast<uint32_t>(
         endOffset + zipFileRef.localHeaderOffset - kZIPMagicLocalOffset);
     output.write(reinterpret_cast<const char *>(&cdr), sizeof(cdr));
     output.write(zipFileRef.fileName.data(), zipFileRef.fileName.size());
     ZIPExtraFieldHeader extraField;
     extraField.id = 0xFECAu;
     extraField.size = zipFileRef.paddingLength;
     output.write(reinterpret_cast<const char *>(&extraField),
                  sizeof(extraField));
     output.write_zeros(extraField.size);
   }
   uint64_t centralDirectoryEndOffset = output.tell();

   // Append the final ZIP file footer.
   // NOTE: this must come at the very end of the file.
   ZIPEndOfCentralDirectoryRecord endOfCDR;
   endOfCDR.signature = 0x06054B50u;
   endOfCDR.diskNumber = 0;
   endOfCDR.startDiskNumber = 0;
   endOfCDR.entriesOnDisk = static_cast<uint16_t>(zipFileRefs.size());
   endOfCDR.entryCount = static_cast<uint16_t>(zipFileRefs.size());
   endOfCDR.directorySize = static_cast<uint32_t>(centralDirectoryEndOffset -
                                                  centralDirectoryStartOffset);
   endOfCDR.directoryOffset = static_cast<uint32_t>(centralDirectoryStartOffset);
   endOfCDR.commentLength = 0;
   output.write(reinterpret_cast<const char *>(&endOfCDR), sizeof(endOfCDR));
 }

 }  // namespace

 // Finds all types in the module and builds a type table mapping the index in
 // the vector to the type represented by the type ordinal.
 static std::vector<TypeDef> buildTypeTable(IREE::VM::ModuleOp moduleOp) {
   llvm::DenseMap<Type, std::string> typeMap;
   std::function<void(Type)> tryInsertType;
   tryInsertType = [&](Type type) {
     if (auto refPtrType = type.dyn_cast<IREE::VM::RefType>()) {
       type = refPtrType.getObjectType();
     }
     if (typeMap.count(type)) return;
     std::string str;
     llvm::raw_string_ostream sstream(str);
     type.print(sstream);
     sstream.flush();
     typeMap.try_emplace(type, str);
     if (auto listType = type.dyn_cast<IREE::VM::ListType>()) {
       assert(listType.getElementType());
       tryInsertType(listType.getElementType());
     }
   };
   for (auto funcOp : moduleOp.getBlock().getOps<IREE::VM::FuncOp>()) {
     funcOp.walk([&](Operation *op) {
       for (auto type : op->getOperandTypes()) tryInsertType(type);
       for (auto type : op->getResultTypes()) tryInsertType(type);
     });
   }

   std::vector<TypeDef> table;
   table.reserve(typeMap.size());
   for (const auto &typeString : typeMap) {
     table.push_back(TypeDef{typeString.first, typeString.second});
   }
   llvm::stable_sort(
       table, +[](const TypeDef &lhs, const TypeDef &rhs) {
         // Always sort builtins above custom types.
         if (lhs.full_name[0] != '!' && rhs.full_name[0] == '!') {
           return true;
         } else if (lhs.full_name[0] == '!' && rhs.full_name[0] != '!') {
           return false;
         }
         return lhs.full_name.compare(rhs.full_name) < 0;
       });
   return table;
 }

 // Canonicalizes the module to its final form prior to emission.
 // This verifies that we only have ops we can serialize and performs any of the
 // required transformations (such as debug op stripping).
 static LogicalResult canonicalizeModule(BytecodeTargetOptions targetOptions,
                                         IREE::VM::ModuleOp moduleOp) {
   OwningRewritePatternList patterns(moduleOp.getContext());
   ConversionTarget target(*moduleOp.getContext());
   target.addLegalDialect<IREE::VM::VMDialect>();
   target.addLegalOp<IREE::Util::DoNotOptimizeOp>();

   // Add all VM canonicalization patterns and mark pseudo-ops illegal.
   auto *context = moduleOp.getContext();
   for (auto *op : context->getRegisteredOperations()) {
     // Non-serializable ops must be removed prior to serialization.
     if (op->hasTrait<OpTrait::IREE::VM::PseudoOp>()) {
       op->getCanonicalizationPatterns(patterns, context);
       target.setOpAction(OperationName(op->name, context),
                          ConversionTarget::LegalizationAction::Illegal);
     }

     // Debug ops must not be present when stripping.
     // TODO(benvanik): add RemoveDisabledDebugOp pattern.
     if (op->hasTrait<OpTrait::IREE::VM::DebugOnly>() &&
         targetOptions.stripDebugOps) {
       target.setOpAction(OperationName(op->name, context),
                          ConversionTarget::LegalizationAction::Illegal);
     }
   }

   if (failed(applyFullConversion(moduleOp, target, std::move(patterns)))) {
     return moduleOp.emitError() << "unable to fully apply conversion to module";
   }

   PassManager passManager(context);
   mlir::applyPassManagerCLOptions(passManager);
   mlir::applyDefaultTimingPassManagerCLOptions(passManager);
   passManager.addInstrumentation(std::make_unique<PassTracing>());
   auto &modulePasses = passManager.nest<IREE::VM::ModuleOp>();

   if (targetOptions.optimize) {
     // TODO(benvanik): does this run until it quiesces?
     modulePasses.addPass(mlir::createInlinerPass());
     modulePasses.addPass(mlir::createCSEPass());
     modulePasses.addPass(mlir::createCanonicalizerPass());
   }

   modulePasses.addPass(IREE::Util::createDropCompilerHintsPass());

   // Mark up the module with ordinals for each top-level op (func, etc).
   // This will make it easier to correlate the MLIR textual output to the
   // binary output.
   // We don't want any more modifications after this point as they could
   // invalidate the ordinals.
   modulePasses.addPass(IREE::VM::createOrdinalAllocationPass());

   if (failed(passManager.run(moduleOp->getParentOfType<mlir::ModuleOp>()))) {
     return moduleOp.emitError() << "failed during transform passes";
   }

   return success();
 }

 // Creates a FunctionSignatureDef based on the given function metadata.
 // Some fields are not used on all signature defs and added only when present on
 // the argument objects/attrs.
 static iree_vm_FunctionSignatureDef_ref_t createFunctionSignatureDef(
     FunctionType functionType, llvm::DenseMap<Type, int> &typeTable,
     StringRef callingConvention,
     iree_vm_ReflectionAttrDef_vec_ref_t reflectionAttrsRef,
     FlatbufferBuilder &fbb) {
   auto resultTypesRef = fbb.createInt32Vec(
       llvm::map_range(functionType.getResults(), [&](Type type) {
         if (auto refPtrType = type.dyn_cast<IREE::VM::RefType>()) {
           type = refPtrType.getObjectType();
         }
         return typeTable.lookup(type);
       }));
   auto argumentTypesRef = fbb.createInt32Vec(
       llvm::map_range(functionType.getInputs(), [&](Type type) {
         if (auto refPtrType = type.dyn_cast<IREE::VM::RefType>()) {
           type = refPtrType.getObjectType();
         }
         return typeTable.lookup(type);
       }));

   auto callingConventionRef = fbb.createString(callingConvention);

   // If the signature would be empty then let's avoid writing the empty table.
   if (!argumentTypesRef && !resultTypesRef && !callingConventionRef &&
       !reflectionAttrsRef) {
     return 0;
   }

   iree_vm_FunctionSignatureDef_start(fbb);
   iree_vm_FunctionSignatureDef_argument_types_add(fbb, argumentTypesRef);
   iree_vm_FunctionSignatureDef_result_types_add(fbb, resultTypesRef);
   iree_vm_FunctionSignatureDef_calling_convention_add(fbb,
                                                       callingConventionRef);
   iree_vm_FunctionSignatureDef_reflection_attrs_add(fbb, reflectionAttrsRef);
   return iree_vm_FunctionSignatureDef_end(fbb);
 }

 // Returns a serialized function signature.
 static iree_vm_FunctionSignatureDef_ref_t makeImportFunctionSignatureDef(
     IREE::VM::ImportOp importOp, llvm::DenseMap<Type, int> &typeTable,
     FlatbufferBuilder &fbb) {
   // Generate the signature calling convention string based on types.
   auto cconv = makeImportCallingConventionString(importOp);
   if (!cconv.hasValue()) return {};
   return createFunctionSignatureDef(importOp.getType(), typeTable,
                                     cconv.getValue(), /*reflectionAttrsRef=*/0,
                                     fbb);
 }

 // Returns a serialized function signature.
 static iree_vm_FunctionSignatureDef_ref_t makeExportFunctionSignatureDef(
     IREE::VM::ExportOp exportOp, IREE::VM::FuncOp funcOp,
     llvm::DenseMap<Type, int> &typeTable, FlatbufferBuilder &fbb) {
   // Generate the signature calling convention string based on types.
   auto cconv = makeCallingConventionString(funcOp);
   if (!cconv.hasValue()) return {};
   return createFunctionSignatureDef(funcOp.getType(), typeTable,
                                     cconv.getValue(), /*reflectionAttrsRef=*/0,
                                     fbb);
 }

 // Returns a serialized function signature.
 static iree_vm_FunctionSignatureDef_ref_t makeInternalFunctionSignatureDef(
     IREE::VM::FuncOp funcOp, llvm::DenseMap<Type, int> &typeTable,
     FlatbufferBuilder &fbb) {
   // Generate the signature calling convention string based on types.
   // TODO(benvanik): only do this on exports. The runtime currently looks on
   // internal functions, though, so we have to have it here.
   auto cconv = makeCallingConventionString(funcOp);
   if (!cconv.hasValue()) return {};

   // Reflection attributes.
   // TODO(benvanik): move these to exports (or remove entirely).
   iree_vm_ReflectionAttrDef_vec_ref_t reflectionAttrsRef = 0;
   if (auto reflectionAttrs =
           funcOp->getAttrOfType<DictionaryAttr>("iree.reflection")) {
     SmallVector<iree_vm_ReflectionAttrDef_ref_t, 4> reflectionAttrRefs;
     for (auto reflectionAttr : reflectionAttrs) {
       auto key = reflectionAttr.first.strref();
       auto value = reflectionAttr.second.dyn_cast<StringAttr>();
       if (!value || key.empty()) continue;
       // NOTE: if we actually want to keep these we should dedupe them (as the
       // keys and likely several of the values are shared across all functions).
       auto valueRef = fbb.createString(value.getValue());
       auto keyRef = fbb.createString(key);
       reflectionAttrRefs.push_back(
           iree_vm_ReflectionAttrDef_create(fbb, keyRef, valueRef));
     }
     reflectionAttrsRef = iree_vm_ReflectionAttrDef_vec_create(
         fbb, reflectionAttrRefs.data(), reflectionAttrRefs.size());
   }

   return createFunctionSignatureDef(funcOp.getType(), typeTable,
                                     cconv.getValue(), reflectionAttrsRef, fbb);
 }

 // Builds a complete BytecodeModuleDef FlatBuffer object in |fbb|.
 // The order of the encoding is ordered to ensure that all metadata is at the
 // front of the resulting buffer. Large read-only data and bytecode blobs always
 // fill the end of the file meaning that when memory-mapping the file most will
 // not need to be paged in to do the initial module preparation.
 //
 // To keep the actual BytecodeModuleDef and resulting parsing code simple a lot
 // has been packed into the top-level table. This results in a messier function
 // here during serialization but a much more trivial (and cache-friendly)
 // representation at runtime.
 static LogicalResult buildFlatBufferModule(BytecodeTargetOptions targetOptions,
                                            IREE::VM::ModuleOp moduleOp,
                                            SmallVector<ZIPFileRef> &zipFileRefs,
                                            FlatbufferBuilder &fbb) {
   // Start the buffer so that we can begin recording data prior to the root
   // table (which we do at the very end). This does not change the layout of the
   // file and is only used to prime the flatcc builder.
   iree_vm_BytecodeModuleDef_start_as_root(fbb);

   SymbolTable symbolTable(moduleOp);
   if (!moduleOp.ordinal_counts().hasValue()) {
     return moduleOp.emitError() << "ordinal_counts attribute not found. The "
                                    "OrdinalAllocationPass must be run before.";
   }
   OrdinalCountsAttr ordinalCounts = moduleOp.ordinal_counts().getValue();

   // Find all structural ops in the module.
   std::vector<IREE::VM::ImportOp> importFuncOps;
   std::vector<IREE::VM::ExportOp> exportFuncOps;
   std::vector<IREE::VM::FuncOp> internalFuncOps;
   std::vector<IREE::VM::RodataOp> rodataOps;
   importFuncOps.resize(ordinalCounts.import_funcs());
   exportFuncOps.resize(ordinalCounts.export_funcs());
   internalFuncOps.resize(ordinalCounts.internal_funcs());
   rodataOps.resize(ordinalCounts.rodatas());

   for (auto &op : moduleOp.getBlock().getOperations()) {
     if (auto funcOp = dyn_cast<IREE::VM::FuncOp>(op)) {
       internalFuncOps[funcOp.ordinal().getValue().getLimitedValue()] = funcOp;
     } else if (auto exportOp = dyn_cast<IREE::VM::ExportOp>(op)) {
       exportFuncOps[exportOp.ordinal().getValue().getLimitedValue()] = exportOp;
     } else if (auto importOp = dyn_cast<IREE::VM::ImportOp>(op)) {
       importFuncOps[importOp.ordinal().getValue().getLimitedValue()] = importOp;
     } else if (auto rodataOp = dyn_cast<IREE::VM::RodataOp>(op)) {
       rodataOps[rodataOp.ordinal().getValue().getLimitedValue()] = rodataOp;
     }
   }

   // Serialize read-only data first so that it ends up at the end of the file.
   // This is where large things like parameters live and we don't want that to
   // get paged in until it is needed.
   //
   // NOTE: flatbuffers are built bottom-up; after each rodata we serialize we
   // move *backward* in the file and prepend the next, meaning that if we
   // were to serialize all rodata we'd have it in the opposite order as we do
   // in the IR. Though this it isn't required for correctness, enabling file
   // layout planning by preserving the order in the IR is useful.
   SmallVector<flatbuffers_uint8_vec_ref_t, 8> rodataContentRefs;
   rodataContentRefs.reserve(rodataOps.size());

   // All constants are defaulted to 16-byte aligned as that is the maximum
   // (reasonable) alignment of all data types on all platforms. This can be
   // overridden by creators of the rodata with the `alignment` attribute.
   static constexpr int kDefaultRodataAlignment = 16;

   for (auto rodataOp : llvm::reverse(rodataOps)) {
     // Only include rodata entries in the ZIP if they are file-like. This
     // prevents all of our string tables from getting included.
     bool includeInZIP =
         targetOptions.emitPolyglotZip && rodataOp.mime_type().hasValue();

     // Embed the rodata contents.
     size_t alignment =
         rodataOp.alignment()
             ? static_cast<size_t>(rodataOp.alignment().getValue())
             : 0;
     if (alignment == 0) alignment = kDefaultRodataAlignment;
     auto constantRef =
         serializeConstant(rodataOp.getLoc(), rodataOp.value(), alignment,
                           /*calculateCRC32=*/includeInZIP, fbb);
     if (!constantRef.ref) {
       return rodataOp.emitOpError() << "failed to encode";
     }
     rodataContentRefs.push_back(constantRef.ref);

     // Add the ZIP per-file header.
     if (includeInZIP) {
       zipFileRefs.push_back(appendZIPLocalFileHeader(
           rodataOp, constantRef.totalSize, constantRef.crc32, fbb));
     }
   }
   // List of references needs to be swapped forward (we wrote backward).
   std::reverse(rodataContentRefs.begin(), rodataContentRefs.end());

   // Find all types in the module to build the type table.
   // Note that we don't emit it yet as we want to keep it near the top of the
   // file (which, in FlatBuffers, is written last).
   auto typeTable = buildTypeTable(moduleOp);
   llvm::DenseMap<Type, int> typeOrdinalMap;
   for (auto typeDef : llvm::enumerate(typeTable)) {
     typeOrdinalMap[typeDef.value().type] = typeDef.index();
   }

   // Serialize function bytecode one at a time and then merge at the end.
   SmallVector<std::vector<uint8_t>, 8> bytecodeDataParts;
   SmallVector<iree_vm_FunctionDescriptor_t, 8> functionDescriptors;
   bytecodeDataParts.resize(internalFuncOps.size());
   functionDescriptors.resize(internalFuncOps.size());
   size_t totalBytecodeLength = 0;
   for (auto funcOp : llvm::enumerate(internalFuncOps)) {
     auto encodedFunction = BytecodeEncoder::encodeFunction(
         funcOp.value(), typeOrdinalMap, symbolTable);
     if (!encodedFunction) {
       return funcOp.value().emitError() << "failed to encode function bytecode";
     }
     iree_vm_FunctionDescriptor_assign(
         &functionDescriptors[funcOp.index()], totalBytecodeLength,
         encodedFunction->bytecodeData.size(), encodedFunction->i32RegisterCount,
         encodedFunction->refRegisterCount);
     totalBytecodeLength += encodedFunction->bytecodeData.size();
     bytecodeDataParts[funcOp.index()] =
         std::move(encodedFunction->bytecodeData);
   }
   flatbuffers_uint8_vec_start(fbb);
   uint8_t *bytecodeDataPtr =
       flatbuffers_uint8_vec_extend(fbb, totalBytecodeLength);
   // NOTE: we need to ensure we clear the output data in case we have gaps for
   // alignment (where otherwise uninitialized memory might sneak in and be bad
   // for both security and determinism).
   memset(bytecodeDataPtr, 0, totalBytecodeLength);
   size_t currentBytecodeOffset = 0;
   for (const auto &it : llvm::enumerate(internalFuncOps)) {
     int ordinal = it.index();
     auto data = std::move(bytecodeDataParts[ordinal]);
     std::memcpy(bytecodeDataPtr + currentBytecodeOffset, data.data(),
                 data.size());
     currentBytecodeOffset += data.size();
   }
   auto bytecodeDataRef = flatbuffers_uint8_vec_end(fbb);

   // Encode the function descriptors adjacent to the bytcode data; they are
   // always accessed together. Descriptor 0 is likely within a few hundred bytes
   // of the referenced bytecode data offset 0, and from there we are at least
   // able to hope sequential readahead caching helps; if not, at least we
   // hopefully don't fault on the first function call every time.
   auto functionDescriptorsRef = iree_vm_FunctionDescriptor_vec_create(
       fbb, functionDescriptors.data(), functionDescriptors.size());

   // Serialize metadata that should be near the front of the file.
   auto rodataSegmentRefs = llvm::to_vector<8>(
       llvm::map_range(rodataContentRefs, [&](auto rodataContentRef) {
         iree_vm_RodataSegmentDef_start(fbb);
         iree_vm_RodataSegmentDef_data_add(fbb, rodataContentRef);
         return iree_vm_RodataSegmentDef_end(fbb);
       }));
   SmallVector<iree_vm_RwdataSegmentDef_ref_t, 8> rwdataSegmentRefs;
   // NOTE: rwdata current unused.
   auto typeRefs =
       llvm::to_vector<8>(llvm::map_range(typeTable, [&](auto typeDef) {
         auto fullNameRef = fbb.createString(typeDef.full_name);
         iree_vm_TypeDef_start(fbb);
         iree_vm_TypeDef_full_name_add(fbb, fullNameRef);
         return iree_vm_TypeDef_end(fbb);
       }));
   auto importFuncRefs =
       llvm::to_vector<8>(llvm::map_range(importFuncOps, [&](auto importOp) {
         auto fullNameRef = fbb.createString(importOp.getName());
         auto signatureRef =
             makeImportFunctionSignatureDef(importOp, typeOrdinalMap, fbb);
         iree_vm_ImportFunctionDef_start(fbb);
         iree_vm_ImportFunctionDef_full_name_add(fbb, fullNameRef);
         iree_vm_ImportFunctionDef_signature_add(fbb, signatureRef);
         return iree_vm_ImportFunctionDef_end(fbb);
       }));
   auto exportFuncRefs =
       llvm::to_vector<8>(llvm::map_range(exportFuncOps, [&](auto exportOp) {
         auto localNameRef = fbb.createString(exportOp.export_name());
         auto funcOp =
             symbolTable.lookup<IREE::VM::FuncOp>(exportOp.function_ref());
         auto signatureRef = makeExportFunctionSignatureDef(exportOp, funcOp,
                                                            typeOrdinalMap, fbb);
         iree_vm_ExportFunctionDef_start(fbb);
         iree_vm_ExportFunctionDef_local_name_add(fbb, localNameRef);
         iree_vm_ExportFunctionDef_signature_add(fbb, signatureRef);
         iree_vm_ExportFunctionDef_internal_ordinal_add(
             fbb, funcOp.ordinal().getValue().getLimitedValue());
         return iree_vm_ExportFunctionDef_end(fbb);
       }));
   SmallVector<iree_vm_InternalFunctionDef_ref_t, 8> internalFuncRefs;
   if (!targetOptions.stripSymbols) {
     internalFuncRefs.reserve(internalFuncOps.size());
     for (auto funcOp : internalFuncOps) {
       auto localNameRef = fbb.createString(funcOp.getName());
       auto signatureRef =
           makeInternalFunctionSignatureDef(funcOp, typeOrdinalMap, fbb);
       iree_vm_InternalFunctionDef_start(fbb);
       iree_vm_InternalFunctionDef_local_name_add(fbb, localNameRef);
       iree_vm_InternalFunctionDef_signature_add(fbb, signatureRef);
       internalFuncRefs.push_back(iree_vm_InternalFunctionDef_end(fbb));
     }
   }

   // NOTE: we keep the vectors clustered here so that we can hopefully keep the
   // pages mapped at runtime; vector dereferences in flatbuffers require
   // touching these structs to get length/etc and as such we don't want to be
   // gathering from all over the file (with giant rodata chunks and such
   // inbetween) just to perform a bounds check and deference into another part
   // of the file.
   auto rodataSegmentsRef = fbb.createOffsetVecDestructive(rodataSegmentRefs);
   auto rwdataSegmentsRef = fbb.createOffsetVecDestructive(rwdataSegmentRefs);
   auto internalFuncsRef = fbb.createOffsetVecDestructive(internalFuncRefs);
   auto exportFuncsOffset = fbb.createOffsetVecDestructive(exportFuncRefs);
   auto importFuncsRef = fbb.createOffsetVecDestructive(importFuncRefs);
   auto typesRef = fbb.createOffsetVecDestructive(typeRefs);

   int32_t globalRefs = ordinalCounts.global_refs();
   int32_t globalBytes = ordinalCounts.global_bytes();

   iree_vm_ModuleStateDef_ref_t moduleStateDef = 0;
   if (globalBytes || globalRefs) {
     iree_vm_ModuleStateDef_start(fbb);
     iree_vm_ModuleStateDef_global_bytes_capacity_add(fbb, globalBytes);
     iree_vm_ModuleStateDef_global_ref_count_add(fbb, globalRefs);
     moduleStateDef = iree_vm_ModuleStateDef_end(fbb);
   }

   auto moduleNameRef = fbb.createString(
       moduleOp.sym_name().empty() ? "module" : moduleOp.sym_name());

   iree_vm_BytecodeModuleDef_name_add(fbb, moduleNameRef);
   iree_vm_BytecodeModuleDef_types_add(fbb, typesRef);
   iree_vm_BytecodeModuleDef_imported_functions_add(fbb, importFuncsRef);
   iree_vm_BytecodeModuleDef_exported_functions_add(fbb, exportFuncsOffset);
   iree_vm_BytecodeModuleDef_internal_functions_add(fbb, internalFuncsRef);
   iree_vm_BytecodeModuleDef_module_state_add(fbb, moduleStateDef);
   iree_vm_BytecodeModuleDef_rodata_segments_add(fbb, rodataSegmentsRef);
   iree_vm_BytecodeModuleDef_rwdata_segments_add(fbb, rwdataSegmentsRef);
   iree_vm_BytecodeModuleDef_function_descriptors_add(fbb,
                                                      functionDescriptorsRef);
   iree_vm_BytecodeModuleDef_bytecode_data_add(fbb, bytecodeDataRef);
   iree_vm_BytecodeModuleDef_end_as_root(fbb);

   return success();
 }

 LogicalResult translateModuleToBytecode(IREE::VM::ModuleOp moduleOp,
                                         BytecodeTargetOptions targetOptions,
                                         llvm::raw_ostream &output) {
   uint64_t startOffset = output.tell();

   if (failed(canonicalizeModule(targetOptions, moduleOp))) {
     return moduleOp.emitError()
            << "failed to canonicalize vm.module to a serializable form";
   }

   if (targetOptions.outputFormat == BytecodeOutputFormat::kAnnotatedMlirText) {
     // Run register allocation now and put the info in the IR so it's printed.
     for (auto funcOp : moduleOp.getBlock().getOps<IREE::VM::FuncOp>()) {
       if (!funcOp.empty()) {
         if (failed(ValueLiveness::annotateIR(funcOp))) {
           return funcOp.emitError() << "liveness analysis failed";
         } else if (failed(RegisterAllocation::annotateIR(funcOp))) {
           return funcOp.emitError() << "register allocation failed";
         }
       }
     }
   }

   if (targetOptions.outputFormat == BytecodeOutputFormat::kMlirText ||
       targetOptions.outputFormat == BytecodeOutputFormat::kAnnotatedMlirText) {
     // Use the standard MLIR text printer.
     moduleOp.getOperation()->print(output);
     output << "\n";
     return success();
   }

   // NOTE: we order things so that all of the metadata is close to the start of
   // the module header in memory. This ensures that when we map the file only
   // the first few pages need to be accessed to get the metadata and the rest
   // can be large bulk data.
   FlatbufferBuilder fbb;
   SmallVector<ZIPFileRef> zipFileRefs;
   if (failed(
           buildFlatBufferModule(targetOptions, moduleOp, zipFileRefs, fbb))) {
     return moduleOp.emitError()
            << "failed to build FlatBuffer BytecodeModuleDef";
   }

   switch (targetOptions.outputFormat) {
     case BytecodeOutputFormat::kFlatBufferBinary:
       if (failed(fbb.copyToStream(output))) {
         return moduleOp.emitError()
                << "failed to copy flatbuffer emitter contents to output stream "
                   "- possibly out of memory";
       }
       break;
     case BytecodeOutputFormat::kFlatBufferText: {
       if (failed(fbb.printJsonToStream(/*pretty=*/true,
                                        /*includeDefaults=*/false,
                                        bytecode_module_def_print_json,
                                        output))) {
         return moduleOp.emitError()
                << "failed to print flatbuffer emitter contents to output "
                   "stream - possibly out of memory, possibly unprintable "
                   "structure";
       }
       break;
     }
     default:
       llvm_unreachable("unimplemented output format");
   }
   output.flush();

   if (targetOptions.emitPolyglotZip) {
     // Append the ZIP central directory to the end of the output.
     // We have to do this here as we need to have flushed the flatbuffer
     // contents to the output so that we have their final absolute addresses.
     uint64_t endOffset = output.tell();
     appendZIPCentralDirectory(zipFileRefs, startOffset, endOffset, output);
   }

   output.flush();
   return success();
 }

 LogicalResult translateModuleToBytecode(mlir::ModuleOp outerModuleOp,
                                         BytecodeTargetOptions targetOptions,
                                         llvm::raw_ostream &output) {
   auto moduleOps = outerModuleOp.getOps<IREE::VM::ModuleOp>();
   if (moduleOps.empty()) {
     return outerModuleOp.emitError()
            << "outer module does not contain a vm.module op";
   }
   return translateModuleToBytecode(*moduleOps.begin(), targetOptions, output);
 }

 }  // namespace VM
 }  // namespace IREE
 }  // namespace iree_compiler
 }  // namespace mlir