Adding requirement bits to bytecode modules and bumping version.
The compiler now emits per-module and per-function requirement bitfields
that indicate extensions needed to execute the bytecode they contain.
This allows for nice error messages on loading when the runtime has
certain extensions omitted:
```
D:\Dev\iree\runtime\src\iree\vm\bytecode\module.c:309: INVALID_ARGUMENT; required module features [EXT_F32] are not available in this runtime configuration; have [] while module requires [EXT_F32]; while invoking native function hal.executable.create; while calling import;
[ 1]   native hal.executable.create:0 -
[ 0] bytecode module.__init:446 D:\Dev\iree/tests/e2e/models/unidirectional_lstm.mlir:0:0
```

While making the breaking change I'm also adding a block_count field
to each function descriptor and a special block opcode that can be
used for verification and disassembly. All functions now have their
signatures embedded (not just exports) to allow for internal function
call verification.
diff --git a/compiler/src/iree/compiler/API2/Internal/Embed.cpp b/compiler/src/iree/compiler/API2/Internal/Embed.cpp
index dcf0a1b..ca3bc23 100644
--- a/compiler/src/iree/compiler/API2/Internal/Embed.cpp
+++ b/compiler/src/iree/compiler/API2/Internal/Embed.cpp
@@ -472,11 +472,13 @@
   auto builtinModule = llvm::dyn_cast<mlir::ModuleOp>(*parsedModule);
   LogicalResult result = failure();
   if (vmModule) {
-    result = translateModuleToBytecode(vmModule, session.bytecodeTargetOptions,
+    result = translateModuleToBytecode(vmModule, session.vmTargetOptions,
+                                       session.bytecodeTargetOptions,
                                        *output.outputStream);
   } else if (builtinModule) {
-    result = translateModuleToBytecode(
-        builtinModule, session.bytecodeTargetOptions, *output.outputStream);
+    result = translateModuleToBytecode(builtinModule, session.vmTargetOptions,
+                                       session.bytecodeTargetOptions,
+                                       *output.outputStream);
   } else {
     (*parsedModule)->emitError() << "expected a vm.module or builtin.module";
   }
diff --git a/compiler/src/iree/compiler/API2/Internal/MLIRInterop.cpp b/compiler/src/iree/compiler/API2/Internal/MLIRInterop.cpp
index 95317b3..ad09f51 100644
--- a/compiler/src/iree/compiler/API2/Internal/MLIRInterop.cpp
+++ b/compiler/src/iree/compiler/API2/Internal/MLIRInterop.cpp
@@ -137,11 +137,13 @@
   mlir::detail::CallbackOstream output(dataCallback, dataUserObject);
   if (auto op = llvm::dyn_cast<mlir::ModuleOp>(moduleOpCpp)) {
     result = iree_compiler::IREE::VM::translateModuleToBytecode(
-        op, optionsCpp->vmBytecodeTargetOptions, output);
+        op, optionsCpp->vmTargetOptions, optionsCpp->vmBytecodeTargetOptions,
+        output);
   } else if (auto op = llvm::dyn_cast<iree_compiler::IREE::VM::ModuleOp>(
                  moduleOpCpp)) {
     result = iree_compiler::IREE::VM::translateModuleToBytecode(
-        op, optionsCpp->vmBytecodeTargetOptions, output);
+        op, optionsCpp->vmTargetOptions, optionsCpp->vmBytecodeTargetOptions,
+        output);
   } else {
     emitError(moduleOpCpp->getLoc()) << "expected a supported module operation";
     result = failure();
diff --git a/compiler/src/iree/compiler/ConstEval/Runtime.cpp b/compiler/src/iree/compiler/ConstEval/Runtime.cpp
index 51d191f..d14198a 100644
--- a/compiler/src/iree/compiler/ConstEval/Runtime.cpp
+++ b/compiler/src/iree/compiler/ConstEval/Runtime.cpp
@@ -295,9 +295,10 @@
 LogicalResult InMemoryCompiledBinary::translateFromModule(
     mlir::ModuleOp moduleOp) {
   llvm::raw_string_ostream os(binary);
-  iree_compiler::IREE::VM::BytecodeTargetOptions targetOptions;
+  iree_compiler::IREE::VM::TargetOptions vmOptions;
+  iree_compiler::IREE::VM::BytecodeTargetOptions bytecodeOptions;
   if (failed(iree_compiler::IREE::VM::translateModuleToBytecode(
-          moduleOp, targetOptions, os))) {
+          moduleOp, vmOptions, bytecodeOptions, os))) {
     return failure();
   }
   os.flush();
diff --git a/compiler/src/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.cpp b/compiler/src/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.cpp
index 031e7d4..c701689 100644
--- a/compiler/src/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.cpp
@@ -70,16 +70,22 @@
         context, b.getStringAttr(deviceID()), configAttr);
   }
 
+  IREE::VM::TargetOptions getTargetOptions(
+      IREE::HAL::ExecutableTargetAttr targetAttr) {
+    // TODO(benvanik): derive these from a vm target triple.
+    auto vmOptions = IREE::VM::TargetOptions::FromFlags::get();
+    vmOptions.f32Extension = true;
+    vmOptions.optimizeForStackSize = false;
+    return vmOptions;
+  }
+
   void buildTranslationPassPipeline(IREE::HAL::ExecutableVariantOp variantOp,
                                     OpPassManager &passManager) override {
     IREE::VMVX::buildVMVXTransformPassPipeline(passManager);
 
     OpPassManager &nestedModulePM = passManager.nest<ModuleOp>();
 
-    // TODO(benvanik): derive these from a vm target triple.
-    auto vmOptions = IREE::VM::TargetOptions::FromFlags::get();
-    vmOptions.f32Extension = true;
-    vmOptions.optimizeForStackSize = false;
+    auto vmOptions = getTargetOptions(variantOp.getTargetAttr());
     IREE::VM::buildVMTransformPassPipeline(nestedModulePM, vmOptions);
   }
 
@@ -107,10 +113,15 @@
     // Serialize the VM module to bytes and embed it directly.
     SmallVector<char> moduleData;
     {
-      IREE::VM::BytecodeTargetOptions bytecodeOptions;
+      auto vmOptions = getTargetOptions(variantOp.getTargetAttr());
+      // TODO(benvanik): plumb this through somewhere? these options are mostly
+      // about output format stuff such as debug information so it's probably
+      // fine to share.
+      auto bytecodeOptions = IREE::VM::BytecodeTargetOptions::FromFlags::get();
       llvm::raw_svector_ostream stream(moduleData);
       if (failed(translateModuleToBytecode(variantOp.getInnerModule(),
-                                           bytecodeOptions, stream))) {
+                                           vmOptions, bytecodeOptions,
+                                           stream))) {
         return variantOp.emitOpError()
                << "failed to serialize VM bytecode module";
       }
diff --git a/compiler/src/iree/compiler/Dialect/VM/IR/VMOpcodesCore.td b/compiler/src/iree/compiler/Dialect/VM/IR/VMOpcodesCore.td
index b170040..b271b5c 100644
--- a/compiler/src/iree/compiler/Dialect/VM/IR/VMOpcodesCore.td
+++ b/compiler/src/iree/compiler/Dialect/VM/IR/VMOpcodesCore.td
@@ -22,7 +22,7 @@
 // but we are a long way out to stabilizing this format :)
 //
 // Some opcodes require an extension prefix to indicate that runtime support
-// is optional. An op with the ExtI64 trait will require VM_OPC_ExtI64, for
+// is optional. An op with the ExtF64 trait will require VM_OPC_ExtF64, for
 // example. Ops that bridge extension sets have a canonical form that may
 // require multiple prefix codes (for example, the i64<->f64 extensions).
 
@@ -34,7 +34,7 @@
                       VM_OPC prefix = ?,
                       list<VM_OPC> cases> :
     IntEnumAttr<I8, name, description, cases> {
-  let cppNamespace = "IREE::VM";
+  let cppNamespace = "::mlir::iree_compiler::IREE::VM";
   let returnType = cppNamespace # "::" # name;
   let underlyingType = "uint8_t";
   let convertFromStorage = "static_cast<" # returnType # ">($_self.getInt())";
@@ -47,6 +47,8 @@
   string opcodeEnumTag = enumTag;
 }
 
+// Next available opcode: 0x7A
+
 // Globals:
 def VM_OPC_GlobalLoadI32         : VM_OPC<0x00, "GlobalLoadI32">;
 def VM_OPC_GlobalStoreI32        : VM_OPC<0x01, "GlobalStoreI32">;
@@ -170,6 +172,7 @@
 def VM_OPC_CmpNZRef              : VM_OPC<0x55, "CmpNZRef">;
 
 // Control flow:
+def VM_OPC_Block                 : VM_OPC<0x79, "Block">;
 def VM_OPC_Branch                : VM_OPC<0x56, "Branch">;
 def VM_OPC_CondBranch            : VM_OPC<0x57, "CondBranch">;
 def VM_OPC_Call                  : VM_OPC<0x58, "Call">;
@@ -358,6 +361,8 @@
     VM_OPC_BufferCopy,
     VM_OPC_BufferCompare,
 
+    VM_OPC_Block,
+
     // Extension opcodes (0xE0-0xFF):
     VM_OPC_PrefixExtF32,  // VM_ExtF32OpcodeAttr
     VM_OPC_PrefixExtF64,  // VM_ExtF64OpcodeAttr
diff --git a/compiler/src/iree/compiler/Dialect/VM/IR/VMTraits.h b/compiler/src/iree/compiler/Dialect/VM/IR/VMTraits.h
index 70faec2..7a0c981 100644
--- a/compiler/src/iree/compiler/Dialect/VM/IR/VMTraits.h
+++ b/compiler/src/iree/compiler/Dialect/VM/IR/VMTraits.h
@@ -42,15 +42,6 @@
 };
 
 template <typename ConcreteType>
-class ExtI64 : public OpTrait::TraitBase<ConcreteType, ExtI64> {
- public:
-  static LogicalResult verifyTrait(Operation *op) {
-    // TODO(benvanik): verify i64 ext is supported.
-    return success();
-  }
-};
-
-template <typename ConcreteType>
 class ExtF32 : public OpTrait::TraitBase<ConcreteType, ExtF32> {
  public:
   static LogicalResult verifyTrait(Operation *op) {
diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BUILD b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BUILD
index 43b609a..9cf32a1 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BUILD
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BUILD
@@ -31,6 +31,7 @@
         "//compiler/src/iree/compiler/Dialect/Util/IR",
         "//compiler/src/iree/compiler/Dialect/Util/Transforms",
         "//compiler/src/iree/compiler/Dialect/VM/Analysis",
+        "//compiler/src/iree/compiler/Dialect/VM/Conversion",
         "//compiler/src/iree/compiler/Dialect/VM/IR",
         "//compiler/src/iree/compiler/Dialect/VM/Transforms",
         "//compiler/src/iree/compiler/Dialect/VM/Utils:CallingConvention",
diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeEncoder.cpp b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeEncoder.cpp
index 85e1d4e..9dedbdb 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeEncoder.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeEncoder.cpp
@@ -9,6 +9,7 @@
 #include "iree/compiler/Dialect/Util/IR/UtilTypes.h"
 #include "iree/compiler/Dialect/VM/Analysis/RegisterAllocation.h"
 #include "iree/compiler/Dialect/VM/IR/VMDialect.h"
+#include "iree/compiler/Dialect/VM/IR/VMTypes.h"
 #include "llvm/ADT/STLExtras.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/Diagnostics.h"
@@ -32,7 +33,7 @@
 
   LogicalResult beginBlock(Block *block) override {
     blockOffsets_[block] = bytecode_.size();
-    return success();
+    return writeUint8(static_cast<uint8_t>(Opcode::Block));
   }
 
   LogicalResult endBlock(Block *block) override { return success(); }
@@ -389,6 +390,7 @@
 
   debugDatabase.addFunctionSourceMap(funcOp, sourceMap);
 
+  size_t finalLength = encoder.getOffset();
   if (failed(encoder.ensureAlignment(8))) {
     funcOp.emitError() << "failed to pad function";
     return std::nullopt;
@@ -399,6 +401,8 @@
     return std::nullopt;
   }
   result.bytecodeData = bytecodeData.value();
+  result.bytecodeLength = finalLength;
+  result.blockCount = funcOp.getBlocks().size();
   result.i32RegisterCount = registerAllocation.getMaxI32RegisterOrdinal() + 1;
   result.refRegisterCount = registerAllocation.getMaxRefRegisterOrdinal() + 1;
   return result;
diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeEncoder.h b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeEncoder.h
index f1e7eeb..8463980 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeEncoder.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeEncoder.h
@@ -18,8 +18,13 @@
 namespace VM {
 
 struct EncodedBytecodeFunction {
-  // Encoded bytecode data for the function body.
+  // Encoded bytecode data for the function body including padding.
   std::vector<uint8_t> bytecodeData;
+  // Precise size of the bytecode.
+  size_t bytecodeLength = 0;
+
+  // Total number of blocks including the entry block.
+  uint16_t blockCount = 0;
 
   // Total i32 register slots required for execution.
   // Note that larger types also use these slots (i64=2xi32).
@@ -32,7 +37,7 @@
 class BytecodeEncoder : public VMFuncEncoder {
  public:
   // Matches IREE_VM_BYTECODE_VERSION_MAJOR.
-  static constexpr uint32_t kVersionMajor = 13;
+  static constexpr uint32_t kVersionMajor = 14;
   // Matches IREE_VM_BYTECODE_VERSION_MINOR.
   static constexpr uint32_t kVersionMinor = 0;
   static constexpr uint32_t kVersion = (kVersionMajor << 16) | kVersionMinor;
diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp
index 31cbebe..2ab87eb 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.cpp
@@ -179,8 +179,9 @@
 // Canonicalizes the module to its final form prior to emission.
 // This verifies that we only have ops we can serialize and performs any of the
 // required transformations (such as debug op stripping).
-static LogicalResult canonicalizeModule(BytecodeTargetOptions targetOptions,
-                                        IREE::VM::ModuleOp moduleOp) {
+static LogicalResult canonicalizeModule(
+    IREE::VM::BytecodeTargetOptions bytecodeOptions,
+    IREE::VM::ModuleOp moduleOp) {
   RewritePatternSet patterns(moduleOp.getContext());
   ConversionTarget target(*moduleOp.getContext());
   target.addLegalDialect<IREE::VM::VMDialect>();
@@ -198,7 +199,7 @@
     // Debug ops must not be present when stripping.
     // TODO(benvanik): add RemoveDisabledDebugOp pattern.
     if (op.hasTrait<OpTrait::IREE::VM::DebugOnly>() &&
-        targetOptions.stripDebugOps) {
+        bytecodeOptions.stripDebugOps) {
       target.setOpAction(op, ConversionTarget::LegalizationAction::Illegal);
     }
   }
@@ -219,7 +220,7 @@
   modulePasses.addPass(IREE::VM::createGlobalInitializationPass());
   modulePasses.addPass(IREE::VM::createDropEmptyModuleInitializersPass());
 
-  if (targetOptions.optimize) {
+  if (bytecodeOptions.optimize) {
     // TODO(benvanik): run this as part of a fixed-point iteration.
     modulePasses.addPass(mlir::createInlinerPass());
     modulePasses.addPass(mlir::createCSEPass());
@@ -274,9 +275,9 @@
 }
 
 // Returns a serialized function signature.
-static iree_vm_FunctionSignatureDef_ref_t makeExportFunctionSignatureDef(
-    IREE::VM::ExportOp exportOp, IREE::VM::FuncOp funcOp,
-    llvm::DenseMap<Type, int> &typeTable, FlatbufferBuilder &fbb) {
+static iree_vm_FunctionSignatureDef_ref_t makeFunctionSignatureDef(
+    IREE::VM::FuncOp funcOp, llvm::DenseMap<Type, int> &typeTable,
+    FlatbufferBuilder &fbb) {
   // Generate the signature calling convention string based on types.
   auto cconv = makeCallingConventionString(funcOp);
   if (!cconv.has_value()) return {};
@@ -314,6 +315,20 @@
                                     cconv.value(), /*attrsRef=*/0, fbb);
 }
 
+// Walks |rootOp| to find all VM features required by it and its children.
+static iree_vm_FeatureBits_enum_t findRequiredFeatures(Operation *rootOp) {
+  iree_vm_FeatureBits_enum_t result = 0;
+  rootOp->walk([&](Operation *op) {
+    if (op->hasTrait<OpTrait::IREE::VM::ExtF32>()) {
+      result |= iree_vm_FeatureBits_EXT_F32;
+    }
+    if (op->hasTrait<OpTrait::IREE::VM::ExtF64>()) {
+      result |= iree_vm_FeatureBits_EXT_F64;
+    }
+  });
+  return result;
+}
+
 // Builds a complete BytecodeModuleDef FlatBuffer object in |fbb|.
 // The order of the encoding is ordered to ensure that all metadata is at the
 // front of the resulting buffer. Large read-only data and bytecode blobs always
@@ -325,8 +340,10 @@
 // here during serialization but a much more trivial (and cache-friendly)
 // representation at runtime.
 static LogicalResult buildFlatBufferModule(
-    BytecodeTargetOptions targetOptions, IREE::VM::ModuleOp moduleOp,
-    MutableArrayRef<RodataRef> rodataRefs, FlatbufferBuilder &fbb) {
+    IREE::VM::TargetOptions vmOptions,
+    IREE::VM::BytecodeTargetOptions bytecodeOptions,
+    IREE::VM::ModuleOp moduleOp, MutableArrayRef<RodataRef> rodataRefs,
+    FlatbufferBuilder &fbb) {
   // Start the buffer so that we can begin recording data prior to the root
   // table (which we do at the very end). This does not change the layout of the
   // file and is only used to prime the flatcc builder.
@@ -376,20 +393,23 @@
   SmallVector<iree_vm_FunctionDescriptor_t, 8> functionDescriptors;
   bytecodeDataParts.resize(internalFuncOps.size());
   functionDescriptors.resize(internalFuncOps.size());
+  iree_vm_FeatureBits_enum_t moduleRequirements = 0;
   size_t totalBytecodeLength = 0;
-  for (auto funcOp : llvm::enumerate(internalFuncOps)) {
+  for (auto [i, funcOp] : llvm::enumerate(internalFuncOps)) {
     auto encodedFunction = BytecodeEncoder::encodeFunction(
-        funcOp.value(), typeOrdinalMap, symbolTable, debugDatabase);
+        funcOp, typeOrdinalMap, symbolTable, debugDatabase);
     if (!encodedFunction) {
-      return funcOp.value().emitError() << "failed to encode function bytecode";
+      return funcOp.emitError() << "failed to encode function bytecode";
     }
+    auto funcRequirements = findRequiredFeatures(funcOp);
+    moduleRequirements |= funcRequirements;
     iree_vm_FunctionDescriptor_assign(
-        &functionDescriptors[funcOp.index()], totalBytecodeLength,
-        encodedFunction->bytecodeData.size(), encodedFunction->i32RegisterCount,
-        encodedFunction->refRegisterCount);
+        &functionDescriptors[i], totalBytecodeLength,
+        encodedFunction->bytecodeLength, funcRequirements,
+        /*reserved=*/0u, encodedFunction->blockCount,
+        encodedFunction->i32RegisterCount, encodedFunction->refRegisterCount);
     totalBytecodeLength += encodedFunction->bytecodeData.size();
-    bytecodeDataParts[funcOp.index()] =
-        std::move(encodedFunction->bytecodeData);
+    bytecodeDataParts[i] = std::move(encodedFunction->bytecodeData);
   }
   flatbuffers_uint8_vec_start(fbb);
   uint8_t *bytecodeDataPtr =
@@ -399,8 +419,7 @@
   // for both security and determinism).
   memset(bytecodeDataPtr, 0, totalBytecodeLength);
   size_t currentBytecodeOffset = 0;
-  for (const auto &it : llvm::enumerate(internalFuncOps)) {
-    int ordinal = it.index();
+  for (const auto &[ordinal, _] : llvm::enumerate(internalFuncOps)) {
     auto data = std::move(bytecodeDataParts[ordinal]);
     std::memcpy(bytecodeDataPtr + currentBytecodeOffset, data.data(),
                 data.size());
@@ -450,16 +469,18 @@
   // NOTE: rwdata is currently unused.
   SmallVector<iree_vm_RwdataSegmentDef_ref_t, 8> rwdataSegmentRefs;
 
+  auto signatureRefs =
+      llvm::to_vector<8>(llvm::map_range(internalFuncOps, [&](auto funcOp) {
+        return makeFunctionSignatureDef(funcOp, typeOrdinalMap, fbb);
+      }));
+
   auto exportFuncRefs =
       llvm::to_vector<8>(llvm::map_range(exportFuncOps, [&](auto exportOp) {
         auto localNameRef = fbb.createString(exportOp.getExportName());
         auto funcOp =
             symbolTable.lookup<IREE::VM::FuncOp>(exportOp.getFunctionRef());
-        auto signatureRef = makeExportFunctionSignatureDef(exportOp, funcOp,
-                                                           typeOrdinalMap, fbb);
         iree_vm_ExportFunctionDef_start(fbb);
         iree_vm_ExportFunctionDef_local_name_add(fbb, localNameRef);
-        iree_vm_ExportFunctionDef_signature_add(fbb, signatureRef);
         iree_vm_ExportFunctionDef_internal_ordinal_add(
             fbb, funcOp.getOrdinal()->getLimitedValue());
         return iree_vm_ExportFunctionDef_end(fbb);
@@ -516,7 +537,8 @@
   // of the file.
   auto rodataSegmentsRef = fbb.createOffsetVecDestructive(rodataSegmentRefs);
   auto rwdataSegmentsRef = fbb.createOffsetVecDestructive(rwdataSegmentRefs);
-  auto exportFuncsOffset = fbb.createOffsetVecDestructive(exportFuncRefs);
+  auto signaturesRef = fbb.createOffsetVecDestructive(signatureRefs);
+  auto exportFuncsRef = fbb.createOffsetVecDestructive(exportFuncRefs);
   auto importFuncsRef = fbb.createOffsetVecDestructive(importFuncRefs);
   auto dependenciesRef = fbb.createOffsetVecDestructive(dependencyRefs);
   auto typesRef = fbb.createOffsetVecDestructive(typeRefs);
@@ -533,21 +555,35 @@
   }
 
   iree_vm_DebugDatabaseDef_ref_t debugDatabaseRef = 0;
-  if (!targetOptions.stripSourceMap) {
+  if (!bytecodeOptions.stripSourceMap) {
     debugDatabaseRef = debugDatabase.build(fbb);
   }
 
   auto moduleNameRef = fbb.createString(
       moduleOp.getSymName().empty() ? "module" : moduleOp.getSymName());
 
+  // TODO(benvanik): let moduleRequirements be a subset of function requirements
+  // so that we can multi-version. For now the moduleRequirements will be the OR
+  // of all functions.
+  iree_vm_FeatureBits_enum_t allowedFeatures = 0;
+  if (vmOptions.f32Extension) allowedFeatures |= iree_vm_FeatureBits_EXT_F32;
+  if (vmOptions.f64Extension) allowedFeatures |= iree_vm_FeatureBits_EXT_F64;
+  if ((moduleRequirements & allowedFeatures) != moduleRequirements) {
+    return moduleOp.emitError()
+           << "module uses features not allowed by flags (requires "
+           << moduleRequirements << ", allowed " << allowedFeatures << ")";
+  }
+
   iree_vm_BytecodeModuleDef_name_add(fbb, moduleNameRef);
   iree_vm_BytecodeModuleDef_version_add(fbb,
                                         moduleOp.getVersion().value_or(0u));
+  iree_vm_BytecodeModuleDef_requirements_add(fbb, moduleRequirements);
   // TODO(benvanik): iree_vm_BytecodeModuleDef_attrs_add
   iree_vm_BytecodeModuleDef_types_add(fbb, typesRef);
   iree_vm_BytecodeModuleDef_dependencies_add(fbb, dependenciesRef);
   iree_vm_BytecodeModuleDef_imported_functions_add(fbb, importFuncsRef);
-  iree_vm_BytecodeModuleDef_exported_functions_add(fbb, exportFuncsOffset);
+  iree_vm_BytecodeModuleDef_exported_functions_add(fbb, exportFuncsRef);
+  iree_vm_BytecodeModuleDef_function_signatures_add(fbb, signaturesRef);
   iree_vm_BytecodeModuleDef_module_state_add(fbb, moduleStateDef);
   iree_vm_BytecodeModuleDef_rodata_segments_add(fbb, rodataSegmentsRef);
   iree_vm_BytecodeModuleDef_rwdata_segments_add(fbb, rwdataSegmentsRef);
@@ -562,28 +598,30 @@
   return success();
 }
 
-LogicalResult translateModuleToBytecode(IREE::VM::ModuleOp moduleOp,
-                                        BytecodeTargetOptions targetOptions,
-                                        llvm::raw_ostream &output) {
+LogicalResult translateModuleToBytecode(
+    IREE::VM::ModuleOp moduleOp, IREE::VM::TargetOptions vmOptions,
+    IREE::VM::BytecodeTargetOptions bytecodeOptions,
+    llvm::raw_ostream &output) {
   moduleOp.getContext()->getOrLoadDialect<IREE::Util::UtilDialect>();
 
-  if (failed(canonicalizeModule(targetOptions, moduleOp))) {
+  if (failed(canonicalizeModule(bytecodeOptions, moduleOp))) {
     return moduleOp.emitError()
            << "failed to canonicalize vm.module to a serializable form";
   }
 
   // Dump VM assembly source listing to a file and annotate IR locations.
-  if (!targetOptions.sourceListing.empty()) {
+  if (!bytecodeOptions.sourceListing.empty()) {
     OpPrintingFlags printFlags;
     printFlags.elideLargeElementsAttrs(8192);
-    if (failed(mlir::generateLocationsFromIR(targetOptions.sourceListing, "vm",
-                                             moduleOp, printFlags))) {
+    if (failed(mlir::generateLocationsFromIR(bytecodeOptions.sourceListing,
+                                             "vm", moduleOp, printFlags))) {
       return moduleOp.emitError() << "failed to write source listing to '"
-                                  << targetOptions.sourceListing << "'";
+                                  << bytecodeOptions.sourceListing << "'";
     }
   }
 
-  if (targetOptions.outputFormat == BytecodeOutputFormat::kAnnotatedMlirText) {
+  if (bytecodeOptions.outputFormat ==
+      BytecodeOutputFormat::kAnnotatedMlirText) {
     // Run register allocation now and put the info in the IR so it's printed.
     for (auto funcOp : moduleOp.getBlock().getOps<IREE::VM::FuncOp>()) {
       if (!funcOp.empty()) {
@@ -597,8 +635,9 @@
   }
 
   // Debug-only formats:
-  if (targetOptions.outputFormat == BytecodeOutputFormat::kMlirText ||
-      targetOptions.outputFormat == BytecodeOutputFormat::kAnnotatedMlirText) {
+  if (bytecodeOptions.outputFormat == BytecodeOutputFormat::kMlirText ||
+      bytecodeOptions.outputFormat ==
+          BytecodeOutputFormat::kAnnotatedMlirText) {
     // Use the standard MLIR text printer.
     moduleOp.getOperation()->print(output);
     output << "\n";
@@ -607,15 +646,15 @@
 
   // Set up the output archive builder based on output format.
   std::unique_ptr<ArchiveWriter> archiveWriter;
-  if (targetOptions.emitPolyglotZip &&
-      targetOptions.outputFormat == BytecodeOutputFormat::kFlatBufferBinary) {
+  if (bytecodeOptions.emitPolyglotZip &&
+      bytecodeOptions.outputFormat == BytecodeOutputFormat::kFlatBufferBinary) {
     archiveWriter =
         std::make_unique<ZIPArchiveWriter>(moduleOp.getLoc(), output);
-  } else if (targetOptions.outputFormat ==
+  } else if (bytecodeOptions.outputFormat ==
              BytecodeOutputFormat::kFlatBufferBinary) {
     archiveWriter =
         std::make_unique<FlatArchiveWriter>(moduleOp.getLoc(), output);
-  } else if (targetOptions.outputFormat ==
+  } else if (bytecodeOptions.outputFormat ==
              BytecodeOutputFormat::kFlatBufferText) {
     archiveWriter =
         std::make_unique<JSONArchiveWriter>(moduleOp.getLoc(), output);
@@ -673,7 +712,8 @@
   // the first few pages need to be accessed to get the metadata and the rest
   // can be large bulk data.
   FlatbufferBuilder fbb;
-  if (failed(buildFlatBufferModule(targetOptions, moduleOp, rodataRefs, fbb))) {
+  if (failed(buildFlatBufferModule(vmOptions, bytecodeOptions, moduleOp,
+                                   rodataRefs, fbb))) {
     return failure();
   }
   if (failed(archiveWriter->flush(fbb))) {
@@ -684,15 +724,17 @@
   return success();
 }
 
-LogicalResult translateModuleToBytecode(mlir::ModuleOp outerModuleOp,
-                                        BytecodeTargetOptions targetOptions,
-                                        llvm::raw_ostream &output) {
+LogicalResult translateModuleToBytecode(
+    mlir::ModuleOp outerModuleOp, IREE::VM::TargetOptions vmOptions,
+    IREE::VM::BytecodeTargetOptions bytecodeOptions,
+    llvm::raw_ostream &output) {
   auto moduleOps = outerModuleOp.getOps<IREE::VM::ModuleOp>();
   if (moduleOps.empty()) {
     return outerModuleOp.emitError()
            << "outer module does not contain a vm.module op";
   }
-  return translateModuleToBytecode(*moduleOps.begin(), targetOptions, output);
+  return translateModuleToBytecode(*moduleOps.begin(), vmOptions,
+                                   bytecodeOptions, output);
 }
 
 void BytecodeTargetOptions::bindOptions(OptionsBinder &binder) {
diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.h b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.h
index 6d4e884..aeed3c1 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.h
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/BytecodeModuleTarget.h
@@ -7,6 +7,7 @@
 #ifndef IREE_COMPILER_DIALECT_VM_TARGET_BYTECODE_BYTECODEMODULETARGET_H_
 #define IREE_COMPILER_DIALECT_VM_TARGET_BYTECODE_BYTECODEMODULETARGET_H_
 
+#include "iree/compiler/Dialect/VM/Conversion/TargetOptions.h"
 #include "iree/compiler/Dialect/VM/IR/VMOps.h"
 #include "iree/compiler/Utils/OptionUtils.h"
 #include "llvm/Support/raw_ostream.h"
@@ -65,12 +66,12 @@
 // serialized module format.
 //
 // Exposed via the --iree-vm-ir-to-bytecode-module translation.
-LogicalResult translateModuleToBytecode(IREE::VM::ModuleOp moduleOp,
-                                        BytecodeTargetOptions targetOptions,
-                                        llvm::raw_ostream &output);
-LogicalResult translateModuleToBytecode(mlir::ModuleOp outerModuleOp,
-                                        BytecodeTargetOptions targetOptions,
-                                        llvm::raw_ostream &output);
+LogicalResult translateModuleToBytecode(
+    IREE::VM::ModuleOp moduleOp, IREE::VM::TargetOptions vmOptions,
+    IREE::VM::BytecodeTargetOptions bytecodeOptions, llvm::raw_ostream &output);
+LogicalResult translateModuleToBytecode(
+    mlir::ModuleOp outerModuleOp, IREE::VM::TargetOptions vmOptions,
+    IREE::VM::BytecodeTargetOptions bytecodeOptions, llvm::raw_ostream &output);
 
 }  // namespace VM
 }  // namespace IREE
diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/CMakeLists.txt b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/CMakeLists.txt
index 6484f0b..a5092db 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/CMakeLists.txt
@@ -34,6 +34,7 @@
     iree::compiler::Dialect::Util::IR
     iree::compiler::Dialect::Util::Transforms
     iree::compiler::Dialect::VM::Analysis
+    iree::compiler::Dialect::VM::Conversion
     iree::compiler::Dialect::VM::IR
     iree::compiler::Dialect::VM::Transforms
     iree::compiler::Dialect::VM::Utils::CallingConvention
diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/TranslationRegistration.cpp b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/TranslationRegistration.cpp
index b654328..35879f8 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/TranslationRegistration.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/TranslationRegistration.cpp
@@ -20,7 +20,8 @@
       "Translates a vm.module to a bytecode module",
       [](mlir::ModuleOp moduleOp, llvm::raw_ostream &output) {
         return translateModuleToBytecode(
-            moduleOp, BytecodeTargetOptions::FromFlags::get(), output);
+            moduleOp, TargetOptions::FromFlags::get(),
+            BytecodeTargetOptions::FromFlags::get(), output);
       });
 }
 
diff --git a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/test/module_encoding_smoke.mlir b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/test/module_encoding_smoke.mlir
index 7cbdc65..3ebcb22 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/test/module_encoding_smoke.mlir
+++ b/compiler/src/iree/compiler/Dialect/VM/Target/Bytecode/test/module_encoding_smoke.mlir
@@ -4,27 +4,42 @@
 // CHECK: "name": "simple_module"
 vm.module @simple_module {
   // CHECK: "types": [{
-  // CHECK: "full_name": "i32"
+  // CHECK: "full_name": "f32"
 
   // CHECK: "exported_functions":
   // CHECK: "local_name": "func"
   vm.export @func
 
+  // CHECK: "function_signatures":
+  // CHECK-NEXT: "calling_convention": "0f_f"
+
   // CHECK: "function_descriptors":
 
   // CHECK-NEXT: {
   // CHECK-NEXT:   "bytecode_offset": 0
-  // CHECK-NEXT:   "bytecode_length": 8
+  // CHECK-NEXT:   "bytecode_length": 14
+  // CHECK-NEXT:   "requirements": "EXT_F32"
+  // CHECK-NEXT:   "reserved": 0
+  // CHECK-NEXT:   "block_count": 1
   // CHECK-NEXT:   "i32_register_count": 1
   // CHECK-NEXT:   "ref_register_count": 0
   // CHECK-NEXT: }
-  vm.func @func(%arg0 : i32) -> i32 {
-    vm.return %arg0 : i32
+  vm.func @func(%arg0 : f32) -> f32 {
+    %0 = vm.add.f32 %arg0, %arg0 : f32
+    vm.return %0 : f32
   }
 
   //      CHECK: "bytecode_data": [
-  // CHECK-NEXT:   90,
+  // CHECK-NEXT:   121,
+  // CHECK-NEXT:   224,
+  // CHECK-NEXT:   10,
   // CHECK-NEXT:   0,
+  // CHECK-NEXT:   0,
+  // CHECK-NEXT:   0,
+  // CHECK-NEXT:   0,
+  // CHECK-NEXT:   0,
+  // CHECK-NEXT:   0,
+  // CHECK-NEXT:   90,
   // CHECK-NEXT:   1,
   // CHECK-NEXT:   0,
   // CHECK-NEXT:   0,
diff --git a/compiler/src/iree/compiler/Dialect/VM/Transforms/Passes.cpp b/compiler/src/iree/compiler/Dialect/VM/Transforms/Passes.cpp
index 26d6d21..53baa1c 100644
--- a/compiler/src/iree/compiler/Dialect/VM/Transforms/Passes.cpp
+++ b/compiler/src/iree/compiler/Dialect/VM/Transforms/Passes.cpp
@@ -69,7 +69,7 @@
   FunctionLikeNest(passManager)
       .addPass(mlir::createSCFForLoopCanonicalizationPass);
 
-  // This pass is sketchy as it can pessimizes tight loops due to affine
+  // This pass is sketchy as it can pessimize tight loops due to affine
   // treating all indices as signed and the unsigned conversion pass not being
   // able to handle that. The scf.for canonicalization does a decent job of
   // removing trivial loops above and this catches the rest. It inserts nasty
diff --git a/runtime/src/iree/schemas/bytecode_module_def.fbs b/runtime/src/iree/schemas/bytecode_module_def.fbs
index 259ba91..a281e97 100644
--- a/runtime/src/iree/schemas/bytecode_module_def.fbs
+++ b/runtime/src/iree/schemas/bytecode_module_def.fbs
@@ -10,6 +10,14 @@
 file_identifier "IREE";
 file_extension "vmfb";
 
+// Available runtime features supported by the active VM implementation.
+enum FeatureBits:uint32 (bit_flags) {
+  // 32-bit floating point extension.
+  EXT_F32 = 0,  // 1u << 0
+  // 64-bit floating point extension.
+  EXT_F64 = 1,  // 1u << 1
+}
+
 // Arbitrary key/value reflection attribute.
 table AttrDef {
   key:string;
@@ -82,24 +90,10 @@
   // Local name of the function (excluding the module namespace).
   local_name:string;
 
-  // Signature of the function expected used for verifying that imports match.
-  signature:FunctionSignatureDef;
-
   // Ordinal in the internal_functions table that implements this function.
   internal_ordinal:int32;
 }
 
-// Defines a bytecode function.
-table InternalFunctionDef {
-  // Local name of the function or empty if the names have been stripped.
-  // The full name of the function when referenced from external modules will
-  // include the BytecodeModuleDef.name prefix.
-  local_name:string;
-
-  // Signature of the function used for reflection.
-  signature:FunctionSignatureDef;
-}
-
 table UncompressedDataDef {
 }
 
@@ -146,7 +140,17 @@
   bytecode_offset:int32;
   bytecode_length:int32;
 
-  // TODO(benvanik): remove counts and embed directly in bytecode.
+  // Features required in order to execute this function.
+  // Note that this may be a superset of the module-level features if some
+  // functions are conditionally executed based on runtime-derived capabilities.
+  requirements:FeatureBits;
+
+  // Unused, must be 0.
+  reserved:int16;
+
+  // Total number of blocks in the function.
+  block_count:int16;
+
   // Total number of i32 registers used by the function.
   i32_register_count:int16;
   // Total number of ref registers used by the function.
@@ -239,6 +243,11 @@
   // resolve.
   version:uint32;
 
+  // Features required to load and run bytecode from the module.
+  // Some functions may have additional requirements for when there are multiple
+  // variants selected based on runtime feature detection.
+  requirements:FeatureBits = 0;
+
   // Module-level attributes, if any.
   attrs:[AttrDef];
 
@@ -254,6 +263,9 @@
   // Exported function definitions used to resolve imports.
   exported_functions:[ExportFunctionDef];
 
+  // All local function signatures (internal and exports).
+  function_signatures:[FunctionSignatureDef];
+
   // Read-only data segments (like non-code .text).
   // May optionally be compressed and decompressed by the loader.
   rodata_segments:[RodataSegmentDef];
diff --git a/runtime/src/iree/vm/bytecode/disassembler.c b/runtime/src/iree/vm/bytecode/disassembler.c
index 6b0f5bf..1b38ec7 100644
--- a/runtime/src/iree/vm/bytecode/disassembler.c
+++ b/runtime/src/iree/vm/bytecode/disassembler.c
@@ -1515,6 +1515,12 @@
     // Control flow
     //===------------------------------------------------------------------===//
 
+    DISASM_OP(CORE, Block) {
+      IREE_RETURN_IF_ERROR(
+          iree_string_builder_append_string(b, IREE_SV("<block>")));
+      break;
+    }
+
     DISASM_OP(CORE, Branch) {
       int32_t block_pc = VM_ParseBranchTarget("dest");
       const iree_vm_register_remap_list_t* remap_list =
diff --git a/runtime/src/iree/vm/bytecode/dispatch.c b/runtime/src/iree/vm/bytecode/dispatch.c
index 94c987d..592adb3 100644
--- a/runtime/src/iree/vm/bytecode/dispatch.c
+++ b/runtime/src/iree/vm/bytecode/dispatch.c
@@ -1638,6 +1638,9 @@
     // Control flow
     //===------------------------------------------------------------------===//
 
+    // No-op in the interpreter.
+    DISPATCH_OP(CORE, Block, {});
+
     DISPATCH_OP(CORE, Branch, {
       int32_t block_pc = VM_DecBranchTarget("dest");
       const iree_vm_register_remap_list_t* remap_list =
diff --git a/runtime/src/iree/vm/bytecode/generated/op_table.h b/runtime/src/iree/vm/bytecode/generated/op_table.h
index b1f3a0b..3ad3c5b 100644
--- a/runtime/src/iree/vm/bytecode/generated/op_table.h
+++ b/runtime/src/iree/vm/bytecode/generated/op_table.h
@@ -128,7 +128,7 @@
   IREE_VM_OP_CORE_CtlzI64 = 0x76,
   IREE_VM_OP_CORE_AbsI32 = 0x77,
   IREE_VM_OP_CORE_AbsI64 = 0x78,
-  IREE_VM_OP_CORE_RSV_0x79,
+  IREE_VM_OP_CORE_Block = 0x79,
   IREE_VM_OP_CORE_RSV_0x7A,
   IREE_VM_OP_CORE_RSV_0x7B,
   IREE_VM_OP_CORE_RSV_0x7C,
@@ -387,7 +387,7 @@
     OPC(0x76, CtlzI64) \
     OPC(0x77, AbsI32) \
     OPC(0x78, AbsI64) \
-    RSV(0x79) \
+    OPC(0x79, Block) \
     RSV(0x7A) \
     RSV(0x7B) \
     RSV(0x7C) \
diff --git a/runtime/src/iree/vm/bytecode/module.c b/runtime/src/iree/vm/bytecode/module.c
index b122f73..83739a8 100644
--- a/runtime/src/iree/vm/bytecode/module.c
+++ b/runtime/src/iree/vm/bytecode/module.c
@@ -236,6 +236,33 @@
   return status;
 }
 
+// clang-format off
+static const iree_bitfield_string_mapping_t iree_vm_bytecode_feature_mappings[] = {
+  {iree_vm_FeatureBits_EXT_F32, IREE_SVL("EXT_F32")},
+  {iree_vm_FeatureBits_EXT_F64, IREE_SVL("EXT_F64")},
+};
+// clang-format on
+
+// Formats a buffer usage bitfield as a string.
+// See iree_bitfield_format for usage.
+static iree_string_view_t iree_vm_bytecode_features_format(
+    iree_vm_FeatureBits_enum_t value, iree_bitfield_string_temp_t* out_temp) {
+  return iree_bitfield_format_inline(
+      value, IREE_ARRAYSIZE(iree_vm_bytecode_feature_mappings),
+      iree_vm_bytecode_feature_mappings, out_temp);
+}
+
+static iree_vm_FeatureBits_enum_t iree_vm_bytecode_available_features(void) {
+  iree_vm_FeatureBits_enum_t result = 0;
+#if IREE_VM_EXT_F32_ENABLE
+  result |= iree_vm_FeatureBits_EXT_F32;
+#endif  // IREE_VM_EXT_F32_ENABLE
+#if IREE_VM_EXT_F64_ENABLE
+  result |= iree_vm_FeatureBits_EXT_F64;
+#endif  // IREE_VM_EXT_F64_ENABLE
+  return result;
+}
+
 // Verifies the structure of the FlatBuffer so that we can avoid doing so during
 // runtime. There are still some conditions we must be aware of (such as omitted
 // names on functions with internal linkage), however we shouldn't need to
@@ -258,6 +285,33 @@
   iree_vm_BytecodeModuleDef_table_t module_def =
       iree_vm_BytecodeModuleDef_as_root(flatbuffer_contents.data);
 
+  const iree_vm_FeatureBits_enum_t available_features =
+      iree_vm_bytecode_available_features();
+  const iree_vm_FeatureBits_enum_t required_features =
+      iree_vm_BytecodeModuleDef_requirements(module_def);
+  if (!iree_all_bits_set(available_features, required_features)) {
+#if IREE_STATUS_MODE
+    const iree_vm_FeatureBits_enum_t needed_features =
+        required_features & ~available_features;
+    iree_bitfield_string_temp_t temp0, temp1, temp2;
+    iree_string_view_t available_features_str =
+        iree_vm_bytecode_features_format(available_features, &temp0);
+    iree_string_view_t required_features_str =
+        iree_vm_bytecode_features_format(required_features, &temp1);
+    iree_string_view_t needed_features_str =
+        iree_vm_bytecode_features_format(needed_features, &temp2);
+    return iree_make_status(
+        IREE_STATUS_INVALID_ARGUMENT,
+        "required module features [%.*s] are not available in this runtime "
+        "configuration; have [%.*s] while module requires [%.*s]",
+        (int)needed_features_str.size, needed_features_str.data,
+        (int)available_features_str.size, available_features_str.data,
+        (int)required_features_str.size, required_features_str.data);
+#else
+    return iree_status_from_code(IREE_STATUS_INVALID_ARGUMENT);
+#endif  // IREE_STATUS_MODE
+  }
+
   flatbuffers_string_t name = iree_vm_BytecodeModuleDef_name(module_def);
   if (!flatbuffers_string_len(name)) {
     return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
@@ -317,9 +371,20 @@
       iree_vm_BytecodeModuleDef_imported_functions(module_def);
   iree_vm_ExportFunctionDef_vec_t exported_functions =
       iree_vm_BytecodeModuleDef_exported_functions(module_def);
+  iree_vm_FunctionSignatureDef_vec_t function_signatures =
+      iree_vm_BytecodeModuleDef_function_signatures(module_def);
   iree_vm_FunctionDescriptor_vec_t function_descriptors =
       iree_vm_BytecodeModuleDef_function_descriptors(module_def);
 
+  if (iree_vm_FunctionSignatureDef_vec_len(function_signatures) !=
+      iree_vm_FunctionDescriptor_vec_len(function_descriptors)) {
+    return iree_make_status(
+        IREE_STATUS_INVALID_ARGUMENT,
+        "function signature and descriptor table length mismatch (%zu vs %zu)",
+        iree_vm_FunctionSignatureDef_vec_len(function_signatures),
+        iree_vm_FunctionDescriptor_vec_len(function_descriptors));
+  }
+
   for (size_t i = 0; i < iree_vm_ImportFunctionDef_vec_len(imported_functions);
        ++i) {
     iree_vm_ImportFunctionDef_table_t import_def =
@@ -362,6 +427,16 @@
     }
   }
 
+  for (size_t i = 0;
+       i < iree_vm_FunctionSignatureDef_vec_len(function_signatures); ++i) {
+    iree_vm_FunctionSignatureDef_table_t function_signature =
+        iree_vm_FunctionSignatureDef_vec_at(function_signatures, i);
+    if (!function_signature) {
+      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                              "function_signatures[%zu] missing body", i);
+    }
+  }
+
   // Verify that we can properly handle the bytecode embedded in the module.
   // We require that major versions match and allow loading of older minor
   // versions (we keep changes backwards-compatible).
@@ -427,7 +502,8 @@
     iree_vm_ExportFunctionDef_table_t function_def =
         iree_vm_ExportFunctionDef_vec_at(exported_functions, function.ordinal);
     ordinal = iree_vm_ExportFunctionDef_internal_ordinal(function_def);
-    signature_def = iree_vm_ExportFunctionDef_signature(function_def);
+    signature_def = iree_vm_FunctionSignatureDef_vec_at(
+        iree_vm_BytecodeModuleDef_function_signatures(module->def), ordinal);
   } else {
     // TODO(benvanik): support querying the internal functions, which could be
     // useful for debugging. Or maybe we just drop them forever?
@@ -653,12 +729,16 @@
     iree_vm_ExportFunctionDef_table_t export_def =
         iree_vm_ExportFunctionDef_vec_at(exported_functions, ordinal);
     name = iree_vm_ExportFunctionDef_local_name(export_def);
-    signature = iree_vm_ExportFunctionDef_signature(export_def);
+    signature = iree_vm_FunctionSignatureDef_vec_at(
+        iree_vm_BytecodeModuleDef_function_signatures(module->def),
+        iree_vm_ExportFunctionDef_internal_ordinal(export_def));
   } else if (linkage == IREE_VM_FUNCTION_LINKAGE_INTERNAL) {
 #if IREE_VM_BACKTRACE_ENABLE
     name = iree_vm_bytecode_module_lookup_internal_function_name(module->def,
                                                                  ordinal);
 #endif  // IREE_VM_BACKTRACE_ENABLE
+    signature = iree_vm_FunctionSignatureDef_vec_at(
+        iree_vm_BytecodeModuleDef_function_signatures(module->def), ordinal);
   }
 
   if (out_function) {
@@ -692,6 +772,8 @@
   iree_vm_bytecode_module_t* module = (iree_vm_bytecode_module_t*)self;
   iree_vm_ExportFunctionDef_vec_t exported_functions =
       iree_vm_BytecodeModuleDef_exported_functions(module->def);
+  iree_vm_FunctionSignatureDef_vec_t function_signatures =
+      iree_vm_BytecodeModuleDef_function_signatures(module->def);
 
   if (ordinal >= iree_vm_ExportFunctionDef_vec_len(exported_functions)) {
     return iree_make_status(
@@ -703,7 +785,9 @@
   iree_vm_ExportFunctionDef_table_t function_def =
       iree_vm_ExportFunctionDef_vec_at(exported_functions, ordinal);
   iree_vm_FunctionSignatureDef_table_t signature_def =
-      iree_vm_ExportFunctionDef_signature(function_def);
+      iree_vm_FunctionSignatureDef_vec_at(
+          function_signatures,
+          iree_vm_ExportFunctionDef_internal_ordinal(function_def));
   if (!signature_def) {
     return iree_make_status(
         IREE_STATUS_NOT_FOUND,
diff --git a/runtime/src/iree/vm/bytecode/module_impl.h b/runtime/src/iree/vm/bytecode/module_impl.h
index 6090f94..2b9728d 100644
--- a/runtime/src/iree/vm/bytecode/module_impl.h
+++ b/runtime/src/iree/vm/bytecode/module_impl.h
@@ -33,7 +33,7 @@
 // Major bytecode version; mismatches on this will fail in either direction.
 // This allows coarse versioning of completely incompatible versions.
 // Matches BytecodeEncoder::kVersionMajor in the compiler.
-#define IREE_VM_BYTECODE_VERSION_MAJOR 13
+#define IREE_VM_BYTECODE_VERSION_MAJOR 14
 // Minor bytecode version; lower versions are allowed to enable newer runtimes
 // to load older serialized files when there are backwards-compatible changes.
 // Higher versions are disallowed as they occur when new ops are added that
diff --git a/tests/compiler_driver/smoketest.mlir b/tests/compiler_driver/smoketest.mlir
index 9f21cb3..b7fc954 100644
--- a/tests/compiler_driver/smoketest.mlir
+++ b/tests/compiler_driver/smoketest.mlir
@@ -10,7 +10,10 @@
 // CHECK: "function_descriptors":
 // CHECK-NEXT: {
 // CHECK-NEXT:   "bytecode_offset": 0
-// CHECK-NEXT:   "bytecode_length": 8
+// CHECK-NEXT:   "bytecode_length": 6
+// CHECK-NEXT:   "requirements": 0
+// CHECK-NEXT:   "reserved": 0
+// CHECK-NEXT:   "block_count": 1
 // CHECK-NEXT:   "i32_register_count": 1
 // CHECK-NEXT:   "ref_register_count": 0
 // CHECK-NEXT: }
@@ -19,11 +22,12 @@
 }
 
 // CHECK: "bytecode_data": [
+// CHECK-NEXT:   121,
 // CHECK-NEXT:   90,
-// CHECK-NEXT:   0,
 // CHECK-NEXT:   1,
 // CHECK-NEXT:   0,
 // CHECK-NEXT:   0,
+// CHECK-NEXT:   0,
 }
 
 // -----
diff --git a/tools/iree-run-mlir-main.cc b/tools/iree-run-mlir-main.cc
index 1a252e8..d7830fb 100644
--- a/tools/iree-run-mlir-main.cc
+++ b/tools/iree-run-mlir-main.cc
@@ -285,12 +285,14 @@
   // NOTE: if we have an output file specified then we could compile into that
   // for greater efficiency. Today we assume that users aren't passing multi-GB
   // models through this tool (or if they are they have the memory to run them).
+  auto vm_options =
+      mlir::iree_compiler::IREE::VM::TargetOptions::FromFlags::get();
   auto bytecode_options =
       mlir::iree_compiler::IREE::VM::BytecodeTargetOptions::FromFlags::get();
   std::string binary_contents;
   llvm::raw_string_ostream binary_output(binary_contents);
   if (failed(mlir::iree_compiler::IREE::VM::translateModuleToBytecode(
-          mlir_module.get(), bytecode_options, binary_output))) {
+          mlir_module.get(), vm_options, bytecode_options, binary_output))) {
     return iree_make_status(
         IREE_STATUS_INTERNAL,
         "serialization to flatbuffer bytecode (binary) failed");
@@ -305,7 +307,7 @@
     std::string text_contents;
     llvm::raw_string_ostream text_output(text_contents);
     if (failed(mlir::iree_compiler::IREE::VM::translateModuleToBytecode(
-            mlir_module.get(), bytecode_options, text_output))) {
+            mlir_module.get(), vm_options, bytecode_options, text_output))) {
       return iree_make_status(IREE_STATUS_INTERNAL,
                               "serialization to annotated MLIR (text) failed");
     }
@@ -318,7 +320,7 @@
     std::string text_contents;
     llvm::raw_string_ostream text_output(text_contents);
     if (failed(mlir::iree_compiler::IREE::VM::translateModuleToBytecode(
-            mlir_module.get(), bytecode_options, text_output))) {
+            mlir_module.get(), vm_options, bytecode_options, text_output))) {
       return iree_make_status(
           IREE_STATUS_INTERNAL,
           "serialization to flatbuffer bytecode (text) failed");