Adding bytecode verification.
This moves much of the interpreter checks to an ahead-of-time bytecode
verifier. This allows us to share the same verification with JITs
and disable it entirely for code size reasons using the
`-DIREE_VM_BYTECODE_VERIFICATION_ENABLE=0` compiler flag.

Verification is pretty exhaustive but may still need some additions.
It's significantly better than before, though, so even if not the final
form it's a good step. Simpler verification (and dispatch) will come
with a pending bytecode shuffling into instruction classes.
diff --git a/build_tools/cmake/iree_copts.cmake b/build_tools/cmake/iree_copts.cmake
index 4e16665..1956b04 100644
--- a/build_tools/cmake/iree_copts.cmake
+++ b/build_tools/cmake/iree_copts.cmake
@@ -402,6 +402,7 @@
       "-DIREE_HAL_MODULE_STRING_UTIL_ENABLE=0"
       "-DIREE_HAL_COMMAND_BUFFER_VALIDATION_ENABLE=0"
       "-DIREE_VM_BACKTRACE_ENABLE=0"
+      "-DIREE_VM_BYTECODE_VERIFICATION_ENABLE=0"
       "-DIREE_VM_EXT_F32_ENABLE=0"
       "-DIREE_VM_EXT_F64_ENABLE=0"
   )
diff --git a/runtime/bindings/tflite/interpreter.c b/runtime/bindings/tflite/interpreter.c
index 4183b34..0aaeb92 100644
--- a/runtime/bindings/tflite/interpreter.c
+++ b/runtime/bindings/tflite/interpreter.c
@@ -589,7 +589,7 @@
   // remain where they currently are for the next invocation.
   for (iree_host_size_t i = 0; i < interpreter->model->output_count; ++i) {
     iree_hal_buffer_t* buffer = (iree_hal_buffer_t*)iree_vm_list_get_ref_deref(
-        interpreter->output_list, i, iree_hal_buffer_get_descriptor());
+        interpreter->output_list, i, &iree_hal_buffer_descriptor);
     TfLiteTensor* tensor = &interpreter->output_tensors[i];
     IREE_RETURN_IF_ERROR(_TfLiteTensorBind(tensor, buffer));
   }
diff --git a/runtime/src/iree/base/alignment.h b/runtime/src/iree/base/alignment.h
index 7be7ebe..a1179f1 100644
--- a/runtime/src/iree/base/alignment.h
+++ b/runtime/src/iree/base/alignment.h
@@ -16,6 +16,7 @@
 #include <stdint.h>
 #include <string.h>
 
+#include "iree/base/attributes.h"
 #include "iree/base/config.h"
 #include "iree/base/target_platform.h"
 
@@ -107,6 +108,37 @@
 #define iree_sizeof_struct(t) iree_host_align(sizeof(t), iree_max_align_t)
 
 //===----------------------------------------------------------------------===//
+// Alignment intrinsics
+//===----------------------------------------------------------------------===//
+
+#if IREE_HAVE_BUILTIN(__builtin_unreachable) || defined(__GNUC__)
+#define IREE_BUILTIN_UNREACHABLE() __builtin_unreachable()
+#elif defined(IREE_COMPILER_MSVC)
+#define IREE_BUILTIN_UNREACHABLE() __assume(false)
+#else
+#define IREE_BUILTIN_UNREACHABLE() ((void)0)
+#endif  // IREE_HAVE_BUILTIN(__builtin_unreachable) || defined(__GNUC__)
+
+#if !defined(__cplusplus)
+#define IREE_DECLTYPE(v) __typeof__(v)
+#else
+#define IREE_DECLTYPE(v) decltype(v)
+#endif  // __cplusplus
+
+#if IREE_HAVE_BUILTIN(__builtin_assume_aligned) || defined(__GNUC__)
+// NOTE: gcc only assumes on the result so we have to reset ptr.
+#define IREE_BUILTIN_ASSUME_ALIGNED(ptr, size) \
+  (ptr = (IREE_DECLTYPE(ptr))(__builtin_assume_aligned((void*)(ptr), (size))))
+#elif 0  // defined(IREE_COMPILER_MSVC)
+#define IREE_BUILTIN_ASSUME_ALIGNED(ptr, size) \
+  (__assume((((uintptr_t)(ptr)) & ((1 << (size))) - 1)) == 0)
+#else
+#define IREE_BUILTIN_ASSUME_ALIGNED(ptr, size) \
+  ((((uintptr_t)(ptr) % (size)) == 0) ? (ptr)  \
+                                      : (IREE_BUILTIN_UNREACHABLE(), (ptr)))
+#endif  // IREE_HAVE_BUILTIN(__builtin_assume_aligned) || defined(__GNUC__)
+
+//===----------------------------------------------------------------------===//
 // Alignment-safe memory accesses
 //===----------------------------------------------------------------------===//
 
diff --git a/runtime/src/iree/base/attributes.h b/runtime/src/iree/base/attributes.h
index bd396a9..de74591 100644
--- a/runtime/src/iree/base/attributes.h
+++ b/runtime/src/iree/base/attributes.h
@@ -17,12 +17,18 @@
 // Any call annotated with this will be relatively stable.
 // Calls without this are considered private to the IREE implementation and
 // should not be relied upon.
-#ifdef __cplusplus
+#if defined(__cplusplus)
 #define IREE_API_EXPORT extern "C"
 #else
 #define IREE_API_EXPORT
 #endif  // __cplusplus
 
+#if defined(__cplusplus)
+#define IREE_API_EXPORT_VARIABLE extern "C"
+#else
+#define IREE_API_EXPORT_VARIABLE extern
+#endif  // __cplusplus
+
 // Denotes a function pointer that is exposed as part of the IREE API.
 // Example:
 //   iree_status_t(IREE_API_PTR* some_callback)(int value);
@@ -33,13 +39,23 @@
 //===----------------------------------------------------------------------===//
 
 // Queries for [[attribute]] identifiers in modern compilers.
-#ifdef __has_attribute
+#if defined(__has_attribute)
 #define IREE_HAVE_ATTRIBUTE(x) __has_attribute(x)
 #else
 #define IREE_HAVE_ATTRIBUTE(x) 0
 #endif  // __has_attribute
 
 //===----------------------------------------------------------------------===//
+// IREE_HAVE_BUILTIN
+//===----------------------------------------------------------------------===//
+
+#if defined(__has_builtin)
+#define IREE_HAVE_BUILTIN(x) __has_builtin(x)
+#else
+#define IREE_HAVE_BUILTIN(x) 0
+#endif  // __has_builtin
+
+//===----------------------------------------------------------------------===//
 // IREE_PRINTF_ATTRIBUTE
 //===----------------------------------------------------------------------===//
 
diff --git a/runtime/src/iree/base/config.h b/runtime/src/iree/base/config.h
index 8490c16..480ac5a 100644
--- a/runtime/src/iree/base/config.h
+++ b/runtime/src/iree/base/config.h
@@ -225,10 +225,10 @@
 // Enables disassembly of vm bytecode functions and stderr dumping of execution.
 // Increases code size quite, lowers VM performance, and is generally unsafe;
 // include only when debugging or running on trusted inputs.
-#ifdef NDEBUG
-#define IREE_VM_EXECUTION_TRACING_ENABLE 0
-#else
+#ifndef NDEBUG
 #define IREE_VM_EXECUTION_TRACING_ENABLE 1
+#else
+#define IREE_VM_EXECUTION_TRACING_ENABLE 0
 #endif  // NDEBUG
 #endif  // !IREE_VM_EXECUTION_TRACING_ENABLE
 
@@ -256,7 +256,14 @@
 // moderate performance improvement (~10-20%) on very heavy VMVX workloads but
 // adds 20-30KB to the binary size.
 #define IREE_VM_BYTECODE_DISPATCH_COMPUTED_GOTO_ENABLE 0
-#endif  // IREE_VM_BYTECODE_DISPATCH_COMPUTED_GOTO_ENABLE
+#endif  // !IREE_VM_BYTECODE_DISPATCH_COMPUTED_GOTO_ENABLE
+
+#if !defined(IREE_VM_BYTECODE_VERIFICATION_ENABLE)
+// Enables verification ensuring input bytecode is well-formed.
+// This increases binary size but should be left on in all cases where untrusted
+// inputs can be provided. Module metadata is always verified.
+#define IREE_VM_BYTECODE_VERIFICATION_ENABLE 1
+#endif  // !IREE_VM_BYTECODE_VERIFICATION_ENABLE
 
 #if !defined(IREE_VM_EXT_F32_ENABLE)
 // Enables the 32-bit floating-point instruction extension.
@@ -273,6 +280,6 @@
 #if !defined(IREE_VM_UBSAN_CHECKABLE_ENABLE)
 // Exposes VMVX kernels to UBSAN checking, else disable UBSAN checking.
 #define IREE_VM_UBSAN_CHECKABLE_ENABLE 0
-#endif  // IREE_VM_UBSAN_CHECKABLE_ENABLE
+#endif  // !IREE_VM_UBSAN_CHECKABLE_ENABLE
 
 #endif  // IREE_BASE_CONFIG_H_
diff --git a/runtime/src/iree/base/internal/debugging.h b/runtime/src/iree/base/internal/debugging.h
index 0bf232c..dec24ac 100644
--- a/runtime/src/iree/base/internal/debugging.h
+++ b/runtime/src/iree/base/internal/debugging.h
@@ -35,7 +35,7 @@
 // We implement this directly in the header with ALWAYS_INLINE so that the
 // stack doesn't get all messed up.
 IREE_ATTRIBUTE_ALWAYS_INLINE static inline void iree_debug_break(void) {
-#if defined(IREE_COMPILER_HAS_BUILTIN_DEBUG_TRAP)
+#if IREE_HAVE_BUILTIN(__builtin_debugtrap)
   __builtin_debugtrap();
 #elif defined(IREE_PLATFORM_WINDOWS)
   __debugbreak();
@@ -50,7 +50,7 @@
 #else
   // NOTE: this is unrecoverable and debugging cannot continue.
   __builtin_trap();
-#endif  // IREE_COMPILER_HAS_BUILTIN_DEBUG_TRAP
+#endif  // __builtin_debugtrap
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/runtime/src/iree/base/target_platform.h b/runtime/src/iree/base/target_platform.h
index 6d4b659..1abd987 100644
--- a/runtime/src/iree/base/target_platform.h
+++ b/runtime/src/iree/base/target_platform.h
@@ -202,16 +202,6 @@
 #endif  // defined(__has_feature)
 
 //==============================================================================
-// IREE_COMPILER_HAS_BUILTIN_DEBUG_TRAP
-//==============================================================================
-
-#if defined __has_builtin
-#if __has_builtin(__builtin_debugtrap)
-#define IREE_COMPILER_HAS_BUILTIN_DEBUG_TRAP 1
-#endif
-#endif
-
-//==============================================================================
 // IREE_PLATFORM_*
 //==============================================================================
 
diff --git a/runtime/src/iree/modules/hal/types.c b/runtime/src/iree/modules/hal/types.c
index 7109b8b..ba24f9e 100644
--- a/runtime/src/iree/modules/hal/types.c
+++ b/runtime/src/iree/modules/hal/types.c
@@ -7,22 +7,28 @@
 #include "iree/modules/hal/types.h"
 
 //===----------------------------------------------------------------------===//
-// Type registration
+// Type wrappers
 //===----------------------------------------------------------------------===//
 
-static iree_vm_ref_type_descriptor_t iree_hal_allocator_descriptor = {0};
-static iree_vm_ref_type_descriptor_t iree_hal_buffer_descriptor = {0};
-static iree_vm_ref_type_descriptor_t iree_hal_buffer_view_descriptor = {0};
-static iree_vm_ref_type_descriptor_t iree_hal_channel_descriptor = {0};
-static iree_vm_ref_type_descriptor_t iree_hal_command_buffer_descriptor = {0};
-static iree_vm_ref_type_descriptor_t iree_hal_descriptor_set_layout_descriptor =
-    {0};
-static iree_vm_ref_type_descriptor_t iree_hal_device_descriptor = {0};
-static iree_vm_ref_type_descriptor_t iree_hal_event_descriptor = {0};
-static iree_vm_ref_type_descriptor_t iree_hal_executable_descriptor = {0};
-static iree_vm_ref_type_descriptor_t iree_hal_fence_descriptor = {0};
-static iree_vm_ref_type_descriptor_t iree_hal_pipeline_layout_descriptor = {0};
-static iree_vm_ref_type_descriptor_t iree_hal_semaphore_descriptor = {0};
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_allocator, iree_hal_allocator_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_buffer, iree_hal_buffer_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_buffer_view, iree_hal_buffer_view_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_channel, iree_hal_channel_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_command_buffer,
+                             iree_hal_command_buffer_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_descriptor_set_layout,
+                             iree_hal_descriptor_set_layout_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_device, iree_hal_device_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_event, iree_hal_event_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_executable, iree_hal_executable_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_pipeline_layout,
+                             iree_hal_pipeline_layout_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_fence, iree_hal_fence_t);
+IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_semaphore, iree_hal_semaphore_t);
+
+//===----------------------------------------------------------------------===//
+// Type registration
+//===----------------------------------------------------------------------===//
 
 #define IREE_VM_REGISTER_HAL_C_TYPE(type, name, destroy_fn, descriptor)   \
   descriptor.type_name = iree_make_cstring_view(name);                    \
@@ -112,26 +118,6 @@
   return iree_ok_status();
 }
 
-//===----------------------------------------------------------------------===//
-// Type wrappers
-//===----------------------------------------------------------------------===//
-
-IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_allocator, iree_hal_allocator_t);
-IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_buffer, iree_hal_buffer_t);
-IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_buffer_view, iree_hal_buffer_view_t);
-IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_channel, iree_hal_channel_t);
-IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_command_buffer,
-                             iree_hal_command_buffer_t);
-IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_descriptor_set_layout,
-                             iree_hal_descriptor_set_layout_t);
-IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_device, iree_hal_device_t);
-IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_event, iree_hal_event_t);
-IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_executable, iree_hal_executable_t);
-IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_pipeline_layout,
-                             iree_hal_pipeline_layout_t);
-IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_fence, iree_hal_fence_t);
-IREE_VM_DEFINE_TYPE_ADAPTERS(iree_hal_semaphore, iree_hal_semaphore_t);
-
 //===--------------------------------------------------------------------===//
 // Utilities
 //===--------------------------------------------------------------------===//
@@ -139,7 +125,7 @@
 IREE_API_EXPORT iree_hal_buffer_view_t* iree_vm_list_get_buffer_view_assign(
     const iree_vm_list_t* list, iree_host_size_t i) {
   return (iree_hal_buffer_view_t*)iree_vm_list_get_ref_deref(
-      list, i, iree_hal_buffer_view_get_descriptor());
+      list, i, &iree_hal_buffer_view_descriptor);
 }
 
 IREE_API_EXPORT iree_hal_buffer_view_t* iree_vm_list_get_buffer_view_retain(
diff --git a/runtime/src/iree/task/topology.h b/runtime/src/iree/task/topology.h
index 59edd7b..7a1f568 100644
--- a/runtime/src/iree/task/topology.h
+++ b/runtime/src/iree/task/topology.h
@@ -58,7 +58,7 @@
   uint8_t group_index;
 
   // A name assigned to executor workers used for logging/tracing.
-  char name[15];
+  char name[32 - /*group_index*/ 1];
 
   // Processor index in the cpuinfo set.
   uint32_t processor_index;
diff --git a/runtime/src/iree/vm/buffer.c b/runtime/src/iree/vm/buffer.c
index 1882126..0cb2974 100644
--- a/runtime/src/iree/vm/buffer.c
+++ b/runtime/src/iree/vm/buffer.c
@@ -12,8 +12,6 @@
 #include "iree/base/tracing.h"
 #include "iree/vm/instance.h"
 
-static iree_vm_ref_type_descriptor_t iree_vm_buffer_descriptor = {0};
-
 IREE_VM_DEFINE_TYPE_ADAPTERS(iree_vm_buffer, iree_vm_buffer_t);
 
 static iree_status_t iree_vm_buffer_map(const iree_vm_buffer_t* buffer,
@@ -141,7 +139,7 @@
 
   // Try to map the source buffer first; no use continuing if we can't read the
   // data to clone.
-  iree_const_byte_span_t source_span;
+  iree_const_byte_span_t source_span = iree_const_byte_span_empty();
   IREE_RETURN_AND_END_ZONE_IF_ERROR(
       z0, iree_vm_buffer_map_ro(source_buffer, source_offset, length, 1,
                                 &source_span));
@@ -192,10 +190,10 @@
     iree_host_size_t length) {
   IREE_ASSERT_ARGUMENT(source_buffer);
   IREE_ASSERT_ARGUMENT(target_buffer);
-  iree_const_byte_span_t source_span;
+  iree_const_byte_span_t source_span = iree_const_byte_span_empty();
   IREE_RETURN_IF_ERROR(iree_vm_buffer_map_ro(source_buffer, source_offset,
                                              length, 1, &source_span));
-  iree_byte_span_t target_span;
+  iree_byte_span_t target_span = iree_byte_span_empty();
   IREE_RETURN_IF_ERROR(iree_vm_buffer_map_rw(target_buffer, target_offset,
                                              length, 1, &target_span));
   memcpy(target_span.data, source_span.data, length);
@@ -208,10 +206,10 @@
     iree_host_size_t length, bool* out_result) {
   IREE_ASSERT_ARGUMENT(lhs_buffer);
   IREE_ASSERT_ARGUMENT(rhs_buffer);
-  iree_const_byte_span_t lhs_span;
+  iree_const_byte_span_t lhs_span = iree_const_byte_span_empty();
   IREE_RETURN_IF_ERROR(
       iree_vm_buffer_map_ro(lhs_buffer, lhs_offset, length, 1, &lhs_span));
-  iree_const_byte_span_t rhs_span;
+  iree_const_byte_span_t rhs_span = iree_const_byte_span_empty();
   IREE_RETURN_IF_ERROR(
       iree_vm_buffer_map_ro(rhs_buffer, rhs_offset, length, 1, &rhs_span));
   *out_result = memcmp(lhs_span.data, rhs_span.data, length) == 0;
@@ -230,7 +228,7 @@
     iree_host_size_t element_count, iree_host_size_t element_length,
     const void* value) {
   IREE_ASSERT_ARGUMENT(target_buffer);
-  iree_byte_span_t span;
+  iree_byte_span_t span = iree_byte_span_empty();
   IREE_RETURN_IF_ERROR(iree_vm_buffer_map_rw(
       target_buffer, target_offset * element_length,
       element_count * element_length, element_length, &span));
@@ -274,7 +272,7 @@
     void* target_ptr, iree_host_size_t element_count,
     iree_host_size_t element_length) {
   IREE_ASSERT_ARGUMENT(source_buffer);
-  iree_const_byte_span_t source_span;
+  iree_const_byte_span_t source_span = iree_const_byte_span_empty();
   IREE_RETURN_IF_ERROR(iree_vm_buffer_map_ro(
       source_buffer, source_offset * element_length,
       element_count * element_length, element_length, &source_span));
@@ -288,7 +286,7 @@
     iree_host_size_t element_length) {
   IREE_ASSERT_ARGUMENT(source_ptr);
   IREE_ASSERT_ARGUMENT(target_buffer);
-  iree_byte_span_t target_span;
+  iree_byte_span_t target_span = iree_byte_span_empty();
   IREE_RETURN_IF_ERROR(iree_vm_buffer_map_rw(
       target_buffer, target_offset * element_length,
       element_count * element_length, element_length, &target_span));
@@ -301,7 +299,6 @@
     // Already registered.
     return iree_ok_status();
   }
-
   iree_vm_buffer_descriptor.destroy = iree_vm_buffer_destroy;
   iree_vm_buffer_descriptor.offsetof_counter =
       offsetof(iree_vm_buffer_t, ref_object.counter);
diff --git a/runtime/src/iree/vm/bytecode/BUILD b/runtime/src/iree/vm/bytecode/BUILD
index 54257a3..69bd228 100644
--- a/runtime/src/iree/vm/bytecode/BUILD
+++ b/runtime/src/iree/vm/bytecode/BUILD
@@ -22,48 +22,30 @@
 iree_runtime_cc_library(
     name = "module",
     srcs = [
+        "archive.c",
         "disassembler.c",
         "disassembler.h",
         "dispatch.c",
         "dispatch_util.h",
-        "generated/op_table.h",
         "module.c",
         "module_impl.h",
+        "verifier.c",
+        "verifier.h",
     ],
     hdrs = [
+        "archive.h",
         "module.h",
     ],
     deps = [
         "//runtime/src/iree/base",
-        "//runtime/src/iree/base:core_headers",
         "//runtime/src/iree/base:tracing",
         "//runtime/src/iree/base/internal",
-        "//runtime/src/iree/base/internal/flatcc:parsing",
-        "//runtime/src/iree/schemas:bytecode_module_def_c_fbs",
         "//runtime/src/iree/vm",
         "//runtime/src/iree/vm:ops",
+        "//runtime/src/iree/vm/bytecode/utils",
     ],
 )
 
-# TODO(#357): Add a script to update op_table.h.
-# iree_gentbl_cc_library(
-#     name = "op_table_gen",
-#     tbl_outs = [
-#         (["--gen-iree-vm-op-table-defs"], "op_table.h"),
-#     ],
-#     tblgen = "//tools:iree-tblgen",
-#     td_file = "//compiler/src/iree/compiler/Dialect/VM/IR:VMOps.td",
-#     deps = [
-#         "//compiler/src/iree/compiler/Dialect/Util/IR:td_files",
-#         "//compiler/src/iree/compiler/Dialect/VM/IR:td_files",
-#         "@llvm-project//mlir:CallInterfacesTdFiles",
-#         "@llvm-project//mlir:ControlFlowInterfacesTdFiles",
-#         "@llvm-project//mlir:FunctionInterfacesTdFiles",
-#         "@llvm-project//mlir:OpBaseTdFiles",
-#         "@llvm-project//mlir:SideEffectInterfacesTdFiles",
-#     ],
-# )
-
 iree_cmake_extra_content(
     content = """
 if(IREE_BUILD_COMPILER)
diff --git a/runtime/src/iree/vm/bytecode/CMakeLists.txt b/runtime/src/iree/vm/bytecode/CMakeLists.txt
index 7e5a59b..08dc9b7 100644
--- a/runtime/src/iree/vm/bytecode/CMakeLists.txt
+++ b/runtime/src/iree/vm/bytecode/CMakeLists.txt
@@ -14,23 +14,24 @@
   NAME
     module
   HDRS
+    "archive.h"
     "module.h"
   SRCS
+    "archive.c"
     "disassembler.c"
     "disassembler.h"
     "dispatch.c"
     "dispatch_util.h"
-    "generated/op_table.h"
     "module.c"
     "module_impl.h"
+    "verifier.c"
+    "verifier.h"
   DEPS
     iree::base
-    iree::base::core_headers
     iree::base::internal
-    iree::base::internal::flatcc::parsing
     iree::base::tracing
-    iree::schemas::bytecode_module_def_c_fbs
     iree::vm
+    iree::vm::bytecode::utils
     iree::vm::ops
   PUBLIC
 )
diff --git a/runtime/src/iree/vm/bytecode/archive.c b/runtime/src/iree/vm/bytecode/archive.c
new file mode 100644
index 0000000..6ee7cdc
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode/archive.c
@@ -0,0 +1,140 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/bytecode/archive.h"
+
+#include "iree/vm/bytecode/utils/isa.h"
+
+// ZIP local file header (comes immediately before each file in the archive).
+// In order to find the starting offset of the FlatBuffer in a polyglot archive
+// we need to parse this given the variable-length nature of it (we want to
+// be robust to file name and alignment changes).
+//
+// NOTE: all fields are little-endian.
+// NOTE: we don't care about the actual module size here; since we support
+//       streaming archives trying to recover it would require much more
+//       involved processing (we'd need to reference the central directory).
+//       If we wanted to support users repacking ZIPs we'd probably want to
+//       rewrite everything as we store offsets in the FlatBuffer that are
+//       difficult to update after the archive has been produced.
+#define ZIP_LOCAL_FILE_HEADER_SIGNATURE 0x04034B50u
+#if defined(IREE_COMPILER_MSVC)
+#pragma pack(push, 1)
+#endif  // IREE_COMPILER_MSVC
+typedef struct {
+  uint32_t signature;  // ZIP_LOCAL_FILE_HEADER_SIGNATURE
+  uint16_t version;
+  uint16_t general_purpose_flag;
+  uint16_t compression_method;
+  uint16_t last_modified_time;
+  uint16_t last_modified_date;
+  uint32_t crc32;              // 0 for us
+  uint32_t compressed_size;    // 0 for us
+  uint32_t uncompressed_size;  // 0 for us
+  uint16_t file_name_length;
+  uint16_t extra_field_length;
+  // file name (variable size)
+  // extra field (variable size)
+} IREE_ATTRIBUTE_PACKED zip_local_file_header_t;
+#if defined(IREE_COMPILER_MSVC)
+#pragma pack(pop)
+#endif  // IREE_COMPILER_MSVC
+static_assert(sizeof(zip_local_file_header_t) == 30, "bad packing");
+#if !defined(IREE_ENDIANNESS_LITTLE) || !IREE_ENDIANNESS_LITTLE
+#error "little endian required for zip header parsing"
+#endif  // IREE_ENDIANNESS_LITTLE
+
+// Strips any ZIP local file header from |contents| and stores the remaining
+// range in |out_stripped|.
+static iree_status_t iree_vm_bytecode_module_strip_zip_header(
+    iree_const_byte_span_t contents, iree_const_byte_span_t* out_stripped) {
+  // Ensure there's at least some bytes we can check for the header.
+  // Since we're only looking to strip zip stuff here we can check on that.
+  if (!contents.data ||
+      contents.data_length < sizeof(zip_local_file_header_t)) {
+    memmove(out_stripped, &contents, sizeof(contents));
+    return iree_ok_status();
+  }
+
+  // Check to see if there's a zip local header signature.
+  // For a compliant zip file this is expected to start at offset 0.
+  const zip_local_file_header_t* header =
+      (const zip_local_file_header_t*)contents.data;
+  if (header->signature != ZIP_LOCAL_FILE_HEADER_SIGNATURE) {
+    // No signature found, probably not a ZIP.
+    memmove(out_stripped, &contents, sizeof(contents));
+    return iree_ok_status();
+  }
+
+  // Compute the starting offset of the file.
+  // Note that we still don't know (or care) if it's the file we want; actual
+  // FlatBuffer verification happens later on.
+  uint32_t offset =
+      sizeof(*header) + header->file_name_length + header->extra_field_length;
+  if (offset > contents.data_length) {
+    // Is a ZIP but doesn't have enough data; error out with something more
+    // useful than the FlatBuffer verification failing later on given that here
+    // we know this isn't a FlatBuffer.
+    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                            "archive self-reports as a zip but does not have "
+                            "enough data to contain a module");
+  }
+
+  *out_stripped = iree_make_const_byte_span(contents.data + offset,
+                                            contents.data_length - offset);
+  return iree_ok_status();
+}
+
+IREE_API_EXPORT iree_status_t iree_vm_bytecode_archive_parse_header(
+    iree_const_byte_span_t archive_contents,
+    iree_const_byte_span_t* out_flatbuffer_contents,
+    iree_host_size_t* out_rodata_offset) {
+  // Slice off any polyglot zip header we have prior to the base of the module.
+  iree_const_byte_span_t module_contents = iree_const_byte_span_empty();
+  IREE_RETURN_IF_ERROR(iree_vm_bytecode_module_strip_zip_header(
+      archive_contents, &module_contents));
+
+  // Verify there's enough data to safely check the FlatBuffer header.
+  if (!module_contents.data || module_contents.data_length < 16) {
+    return iree_make_status(
+        IREE_STATUS_INVALID_ARGUMENT,
+        "FlatBuffer data is not present or less than 16 bytes (%zu total)",
+        module_contents.data_length);
+  }
+
+  // Read the size prefix from the head of the module contents; this should be
+  // a 4 byte value indicating the total size of the FlatBuffer data.
+  size_t length_prefix = 0;
+  flatbuffers_read_size_prefix((void*)module_contents.data, &length_prefix);
+
+  // Verify the length prefix is within bounds (always <= the remaining module
+  // bytes).
+  size_t length_remaining =
+      module_contents.data_length - sizeof(flatbuffers_uoffset_t);
+  if (length_prefix > length_remaining) {
+    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                            "FlatBuffer length prefix out of bounds (prefix is "
+                            "%zu but only %zu available)",
+                            length_prefix, length_remaining);
+  }
+
+  // Form the range of bytes containing just the FlatBuffer data.
+  iree_const_byte_span_t flatbuffer_contents = iree_make_const_byte_span(
+      module_contents.data + sizeof(flatbuffers_uoffset_t), length_prefix);
+
+  if (out_flatbuffer_contents) {
+    *out_flatbuffer_contents = flatbuffer_contents;
+  }
+  if (out_rodata_offset) {
+    // rodata begins immediately following the FlatBuffer in memory.
+    iree_host_size_t rodata_offset = iree_host_align(
+        (iree_host_size_t)(flatbuffer_contents.data - archive_contents.data) +
+            length_prefix,
+        IREE_VM_ARCHIVE_SEGMENT_ALIGNMENT);
+    *out_rodata_offset = rodata_offset;
+  }
+  return iree_ok_status();
+}
diff --git a/runtime/src/iree/vm/bytecode/archive.h b/runtime/src/iree/vm/bytecode/archive.h
new file mode 100644
index 0000000..ec4b395
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode/archive.h
@@ -0,0 +1,35 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_BYTECODE_ARCHIVE_H_
+#define IREE_VM_BYTECODE_ARCHIVE_H_
+
+#include "iree/base/api.h"
+#include "iree/vm/api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+// Alignment applied to each segment of the archive.
+// All embedded file contents (FlatBuffers, rodata, etc) are aligned to this
+// boundary.
+#define IREE_VM_ARCHIVE_SEGMENT_ALIGNMENT 64
+
+// Parses the module archive header in |archive_contents|.
+// The subrange containing the FlatBuffer data is returned as well as the
+// offset where external rodata begins. Note that archives may have
+// non-contiguous layouts!
+IREE_API_EXPORT iree_status_t iree_vm_bytecode_archive_parse_header(
+    iree_const_byte_span_t archive_contents,
+    iree_const_byte_span_t* out_flatbuffer_contents,
+    iree_host_size_t* out_rodata_offset);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+
+#endif  // IREE_VM_BYTECODE_ARCHIVE_H_
diff --git a/runtime/src/iree/vm/bytecode/disassembler.c b/runtime/src/iree/vm/bytecode/disassembler.c
index 1b38ec7..e0fbab0 100644
--- a/runtime/src/iree/vm/bytecode/disassembler.c
+++ b/runtime/src/iree/vm/bytecode/disassembler.c
@@ -8,7 +8,6 @@
 
 #include <inttypes.h>
 
-#include "iree/base/config.h"
 #include "iree/vm/ops.h"
 
 #define BEGIN_DISASM_PREFIX(op_name, ext) \
@@ -64,39 +63,39 @@
 #define VM_ParseBranchOperands(operands_name) \
   VM_DecBranchOperandsImpl(bytecode_data, &pc)
 #define VM_ParseOperandRegI32(name) \
-  OP_I16(0) & regs->i32_mask;       \
-  pc += kRegSize;
-#define VM_ParseOperandRegI64(name)  \
-  OP_I16(0) & (regs->i32_mask & ~1); \
-  pc += kRegSize;
+  OP_I16(0);                        \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
+#define VM_ParseOperandRegI64(name) \
+  OP_I16(0);                        \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
 #define VM_ParseOperandRegF32(name) \
-  OP_I16(0) & regs->i32_mask;       \
-  pc += kRegSize;
-#define VM_ParseOperandRegF64(name)  \
-  OP_I16(0) & (regs->i32_mask & ~1); \
-  pc += kRegSize;
+  OP_I16(0);                        \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
+#define VM_ParseOperandRegF64(name) \
+  OP_I16(0);                        \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
 #define VM_ParseOperandRegRef(name, out_is_move)                    \
-  OP_I16(0) & regs->ref_mask;                                       \
+  OP_I16(0) & IREE_REF_REGISTER_MASK;                               \
   *(out_is_move) = 0; /*= OP_I16(0) & IREE_REF_REGISTER_MOVE_BIT;*/ \
-  pc += kRegSize;
+  pc += IREE_REGISTER_ORDINAL_SIZE;
 #define VM_ParseVariadicOperands(name) \
   VM_DecVariadicOperandsImpl(bytecode_data, &pc)
 #define VM_ParseResultRegI32(name) \
-  OP_I16(0) & regs->i32_mask;      \
-  pc += kRegSize;
-#define VM_ParseResultRegI64(name)   \
-  OP_I16(0) & (regs->i32_mask & ~1); \
-  pc += kRegSize;
+  OP_I16(0);                       \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
+#define VM_ParseResultRegI64(name) \
+  OP_I16(0);                       \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
 #define VM_ParseResultRegF32(name) \
-  OP_I16(0) & regs->i32_mask;      \
-  pc += kRegSize;
-#define VM_ParseResultRegF64(name)   \
-  OP_I16(0) & (regs->i32_mask & ~1); \
-  pc += kRegSize;
+  OP_I16(0);                       \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
+#define VM_ParseResultRegF64(name) \
+  OP_I16(0);                       \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
 #define VM_ParseResultRegRef(name, out_is_move)                     \
-  OP_I16(0) & regs->ref_mask;                                       \
+  OP_I16(0) & IREE_REF_REGISTER_MASK;                               \
   *(out_is_move) = 0; /*= OP_I16(0) & IREE_REF_REGISTER_MOVE_BIT;*/ \
-  pc += kRegSize;
+  pc += IREE_REGISTER_ORDINAL_SIZE;
 #define VM_ParseVariadicResults(name) VM_ParseVariadicOperands(name)
 
 #define EMIT_REG_NAME(reg)                \
@@ -1320,6 +1319,7 @@
     DISASM_OP_CORE_BINARY_I32(RemI32S, "vm.rem.i32.s");
     DISASM_OP_CORE_BINARY_I32(RemI32U, "vm.rem.i32.u");
     DISASM_OP_CORE_TERNARY_I32(FMAI32, "vm.fma.i32");
+    DISASM_OP_CORE_UNARY_I32(AbsI32, "vm.abs.i32");
     DISASM_OP_CORE_UNARY_I32(NotI32, "vm.not.i32");
     DISASM_OP_CORE_BINARY_I32(AndI32, "vm.and.i32");
     DISASM_OP_CORE_BINARY_I32(OrI32, "vm.or.i32");
@@ -1334,6 +1334,7 @@
     DISASM_OP_CORE_BINARY_I64(RemI64S, "vm.rem.i64.s");
     DISASM_OP_CORE_BINARY_I64(RemI64U, "vm.rem.i64.u");
     DISASM_OP_CORE_TERNARY_I64(FMAI64, "vm.fma.i64");
+    DISASM_OP_CORE_UNARY_I64(AbsI64, "vm.abs.i64");
     DISASM_OP_CORE_UNARY_I64(NotI64, "vm.not.i64");
     DISASM_OP_CORE_BINARY_I64(AndI64, "vm.and.i64");
     DISASM_OP_CORE_BINARY_I64(OrI64, "vm.or.i64");
diff --git a/runtime/src/iree/vm/bytecode/disassembler.h b/runtime/src/iree/vm/bytecode/disassembler.h
index 0f3dd49..36c00ce 100644
--- a/runtime/src/iree/vm/bytecode/disassembler.h
+++ b/runtime/src/iree/vm/bytecode/disassembler.h
@@ -9,7 +9,7 @@
 
 #include <stdio.h>
 
-#include "iree/base/string_builder.h"
+#include "iree/base/api.h"
 #include "iree/vm/api.h"
 #include "iree/vm/bytecode/dispatch_util.h"
 #include "iree/vm/bytecode/module_impl.h"
diff --git a/runtime/src/iree/vm/bytecode/dispatch.c b/runtime/src/iree/vm/bytecode/dispatch.c
index 592adb3..220f9c5 100644
--- a/runtime/src/iree/vm/bytecode/dispatch.c
+++ b/runtime/src/iree/vm/bytecode/dispatch.c
@@ -28,7 +28,7 @@
 // no swapping hazards (such as 0->1,1->0). The register allocator in the
 // compiler should ensure this is the case when it can occur.
 static void iree_vm_bytecode_dispatch_remap_branch_registers(
-    const iree_vm_registers_t regs,
+    int32_t* IREE_RESTRICT regs_i32, iree_vm_ref_t* IREE_RESTRICT regs_ref,
     const iree_vm_register_remap_list_t* IREE_RESTRICT remap_list) {
   for (int i = 0; i < remap_list->size; ++i) {
     // TODO(benvanik): change encoding to avoid this branching.
@@ -37,10 +37,10 @@
     uint16_t dst_reg = remap_list->pairs[i].dst_reg;
     if (src_reg & IREE_REF_REGISTER_TYPE_BIT) {
       iree_vm_ref_retain_or_move(src_reg & IREE_REF_REGISTER_MOVE_BIT,
-                                 &regs.ref[src_reg & regs.ref_mask],
-                                 &regs.ref[dst_reg & regs.ref_mask]);
+                                 &regs_ref[src_reg & IREE_REF_REGISTER_MASK],
+                                 &regs_ref[dst_reg & IREE_REF_REGISTER_MASK]);
     } else {
-      regs.i32[dst_reg & regs.i32_mask] = regs.i32[src_reg & regs.i32_mask];
+      regs_i32[dst_reg] = regs_i32[src_reg];
     }
   }
 }
@@ -49,14 +49,14 @@
 // This can be used to eagerly release resources we don't need and reduces
 // memory consumption if used effectively prior to yields/waits.
 static void iree_vm_bytecode_dispatch_discard_registers(
-    const iree_vm_registers_t regs,
+    iree_vm_ref_t* IREE_RESTRICT regs_ref,
     const iree_vm_register_list_t* IREE_RESTRICT reg_list) {
   for (int i = 0; i < reg_list->size; ++i) {
     // TODO(benvanik): change encoding to avoid this branching.
     uint16_t reg = reg_list->registers[i];
     if ((reg & (IREE_REF_REGISTER_TYPE_BIT | IREE_REF_REGISTER_MOVE_BIT)) ==
         (IREE_REF_REGISTER_TYPE_BIT | IREE_REF_REGISTER_MOVE_BIT)) {
-      iree_vm_ref_release(&regs.ref[reg & regs.ref_mask]);
+      iree_vm_ref_release(&regs_ref[reg & IREE_REF_REGISTER_MASK]);
     }
   }
 }
@@ -65,45 +65,36 @@
 // Stack management
 //===----------------------------------------------------------------------===//
 
-static iree_vm_registers_t iree_vm_bytecode_get_register_storage(
+static inline iree_vm_registers_t iree_vm_bytecode_get_register_storage(
     iree_vm_stack_frame_t* frame) {
   const iree_vm_bytecode_frame_storage_t* stack_storage =
       (iree_vm_bytecode_frame_storage_t*)iree_vm_stack_frame_storage(frame);
-
-  // Masks indicate the valid bits of any register value within the range we
-  // have allocated in the storage. So for 4 registers we'd expect a 0b11 mask.
-  iree_vm_registers_t registers;
-  memset(&registers, 0, sizeof(registers));
-  registers.i32_mask = (uint16_t)(stack_storage->i32_register_count
-                                      ? stack_storage->i32_register_count - 1
-                                      : 0);
-  registers.ref_mask = (uint16_t)(stack_storage->ref_register_count
-                                      ? stack_storage->ref_register_count - 1
-                                      : 0);
-
-  // Register storage immediately follows the stack storage header.
-  registers.i32 =
-      (int32_t*)((uintptr_t)stack_storage + stack_storage->i32_register_offset);
-  registers.ref = (iree_vm_ref_t*)((uintptr_t)stack_storage +
-                                   stack_storage->ref_register_offset);
-
-  return registers;
+  return (iree_vm_registers_t){
+      .i32 = (int32_t*)((uintptr_t)stack_storage +
+                        stack_storage->i32_register_offset),
+      .ref = (iree_vm_ref_t*)((uintptr_t)stack_storage +
+                              stack_storage->ref_register_offset),
+  };
 }
 
 // Releases any remaining refs held in the frame storage.
 static void iree_vm_bytecode_stack_frame_cleanup(iree_vm_stack_frame_t* frame) {
-  iree_vm_registers_t regs = iree_vm_bytecode_get_register_storage(frame);
   // TODO(benvanik): allow the VM to elide this when it's known that there are
   // no more live registers.
-  for (uint16_t i = 0; i <= regs.ref_mask; ++i) {
-    iree_vm_ref_t* ref = &regs.ref[i];
+  const iree_vm_bytecode_frame_storage_t* stack_storage =
+      (iree_vm_bytecode_frame_storage_t*)iree_vm_stack_frame_storage(frame);
+  iree_vm_ref_t* refs = (iree_vm_ref_t*)((uintptr_t)stack_storage +
+                                         stack_storage->ref_register_offset);
+  for (uint16_t i = 0; i < stack_storage->ref_register_count; ++i) {
+    iree_vm_ref_t* ref = &refs[i];
     if (ref->ptr) iree_vm_ref_release(ref);
   }
 }
 
 static iree_status_t iree_vm_bytecode_function_enter(
     iree_vm_stack_t* stack, const iree_vm_function_t function,
-    iree_string_view_t cconv_results, iree_vm_stack_frame_t** out_callee_frame,
+    iree_string_view_t cconv_results,
+    iree_vm_stack_frame_t* IREE_RESTRICT* out_callee_frame,
     iree_vm_registers_t* out_callee_registers) {
   iree_vm_bytecode_module_t* module =
       (iree_vm_bytecode_module_t*)function.module->self;
@@ -118,26 +109,11 @@
   // bounds check register access. This lets us allocate the entire frame
   // (header, frame, and register storage) as a single pointer bump below.
 
-  // Round up register counts to the nearest power of 2 (if not already).
-  // This let's us use bit masks on register accesses to do bounds checking
-  // instead of more complex logic. The cost of these extra registers is only at
-  // worst 2x the required cost: so not large when thinking about the normal
-  // size of data used in an IREE app for tensors.
-  //
-  // Note that to allow the masking to work as a guard we need to ensure we at
-  // least allocate 1 register; this way an i32[reg & mask] will always point at
-  // valid memory even if mask == 0.
-  uint32_t i32_register_count = iree_math_round_up_to_pow2_u32(
-      VMMAX(1, target_descriptor->i32_register_count));
-  uint32_t ref_register_count = iree_math_round_up_to_pow2_u32(
-      VMMAX(1, target_descriptor->ref_register_count));
-  if (IREE_UNLIKELY(i32_register_count > IREE_I32_REGISTER_MASK) ||
-      IREE_UNLIKELY(ref_register_count > IREE_REF_REGISTER_MASK)) {
-    // Register count overflow. A valid compiler should never produce files that
-    // hit this.
-    return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
-                            "register count overflow");
-  }
+  // We've verified all register storage prior to execution.
+  uint32_t i32_register_count = target_descriptor->i32_register_count;
+  uint32_t ref_register_count = target_descriptor->ref_register_count;
+  IREE_ASSERT_LE(i32_register_count, IREE_I32_REGISTER_MASK);
+  IREE_ASSERT_LE(ref_register_count, IREE_REF_REGISTER_MASK);
 
   // We need to align the ref register start to the natural machine
   // alignment in case the compiler is expecting that (it makes it easier to
@@ -162,8 +138,8 @@
           *out_callee_frame);
   stack_storage->cconv_results = cconv_results;
   stack_storage->i32_register_count = i32_register_count;
-  stack_storage->ref_register_count = ref_register_count;
   stack_storage->i32_register_offset = header_size;
+  stack_storage->ref_register_count = ref_register_count;
   stack_storage->ref_register_offset = header_size + i32_register_size;
   *out_callee_registers =
       iree_vm_bytecode_get_register_storage(*out_callee_frame);
@@ -181,7 +157,8 @@
 static iree_status_t iree_vm_bytecode_external_enter(
     iree_vm_stack_t* stack, const iree_vm_function_t function,
     iree_string_view_t cconv_arguments, iree_byte_span_t arguments,
-    iree_string_view_t cconv_results, iree_vm_stack_frame_t** out_callee_frame,
+    iree_string_view_t cconv_results,
+    iree_vm_stack_frame_t* IREE_RESTRICT* out_callee_frame,
     iree_vm_registers_t* out_callee_registers) {
   // Enter the bytecode function and allocate registers.
   IREE_RETURN_IF_ERROR(iree_vm_bytecode_function_enter(
@@ -199,23 +176,21 @@
       case IREE_VM_CCONV_TYPE_I32:
       case IREE_VM_CCONV_TYPE_F32: {
         uint16_t dst_reg = i32_reg++;
-        memcpy(&callee_registers.i32[dst_reg & callee_registers.i32_mask], p,
-               sizeof(int32_t));
+        memcpy(&callee_registers.i32[dst_reg], p, sizeof(int32_t));
         p += sizeof(int32_t);
       } break;
       case IREE_VM_CCONV_TYPE_I64:
       case IREE_VM_CCONV_TYPE_F64: {
         uint16_t dst_reg = i32_reg;
         i32_reg += 2;
-        memcpy(&callee_registers.i32[dst_reg & callee_registers.i32_mask], p,
-               sizeof(int64_t));
+        memcpy(&callee_registers.i32[dst_reg], p, sizeof(int64_t));
         p += sizeof(int64_t);
       } break;
       case IREE_VM_CCONV_TYPE_REF: {
         uint16_t dst_reg = ref_reg++;
         iree_vm_ref_move(
             (iree_vm_ref_t*)p,
-            &callee_registers.ref[dst_reg & callee_registers.ref_mask]);
+            &callee_registers.ref[dst_reg & IREE_REF_REGISTER_MASK]);
         p += sizeof(iree_vm_ref_t);
       } break;
     }
@@ -248,22 +223,18 @@
         break;
       case IREE_VM_CCONV_TYPE_I32:
       case IREE_VM_CCONV_TYPE_F32: {
-        memcpy(p, &callee_registers->i32[src_reg & callee_registers->i32_mask],
-               sizeof(int32_t));
+        memcpy(p, &callee_registers->i32[src_reg], sizeof(int32_t));
         p += sizeof(int32_t);
       } break;
       case IREE_VM_CCONV_TYPE_I64:
       case IREE_VM_CCONV_TYPE_F64: {
-        memcpy(
-            p,
-            &callee_registers->i32[src_reg & (callee_registers->i32_mask & ~1)],
-            sizeof(int64_t));
+        memcpy(p, &callee_registers->i32[src_reg], sizeof(int64_t));
         p += sizeof(int64_t);
       } break;
       case IREE_VM_CCONV_TYPE_REF: {
         iree_vm_ref_retain_or_move(
             src_reg & IREE_REF_REGISTER_MOVE_BIT,
-            &callee_registers->ref[src_reg & callee_registers->ref_mask],
+            &callee_registers->ref[src_reg & IREE_REF_REGISTER_MASK],
             (iree_vm_ref_t*)p);
         p += sizeof(iree_vm_ref_t);
       } break;
@@ -281,7 +252,7 @@
     iree_vm_stack_t* stack, iree_vm_module_t* module, int32_t function_ordinal,
     const iree_vm_register_list_t* IREE_RESTRICT src_reg_list,
     const iree_vm_register_list_t* IREE_RESTRICT dst_reg_list,
-    iree_vm_stack_frame_t** out_callee_frame,
+    iree_vm_stack_frame_t* IREE_RESTRICT* out_callee_frame,
     iree_vm_registers_t* out_callee_registers) {
   // Stash the destination register list for result values on the caller.
   iree_vm_bytecode_frame_storage_t* caller_storage =
@@ -317,15 +288,15 @@
     uint16_t src_reg = src_reg_list->registers[i];
     if (src_reg & IREE_REF_REGISTER_TYPE_BIT) {
       uint16_t dst_reg = ref_reg_offset++;
-      memset(&dst_regs->ref[dst_reg & dst_regs->ref_mask], 0,
+      memset(&dst_regs->ref[dst_reg & IREE_REF_REGISTER_MASK], 0,
              sizeof(iree_vm_ref_t));
-      iree_vm_ref_retain_or_move(src_reg & IREE_REF_REGISTER_MOVE_BIT,
-                                 &src_regs.ref[src_reg & src_regs.ref_mask],
-                                 &dst_regs->ref[dst_reg & dst_regs->ref_mask]);
+      iree_vm_ref_retain_or_move(
+          src_reg & IREE_REF_REGISTER_MOVE_BIT,
+          &src_regs.ref[src_reg & IREE_REF_REGISTER_MASK],
+          &dst_regs->ref[dst_reg & IREE_REF_REGISTER_MASK]);
     } else {
       uint16_t dst_reg = i32_reg_offset++;
-      dst_regs->i32[dst_reg & dst_regs->i32_mask] =
-          src_regs.i32[src_reg & src_regs.i32_mask];
+      dst_regs->i32[dst_reg] = src_regs.i32[src_reg];
     }
   }
 
@@ -339,7 +310,7 @@
     iree_vm_stack_t* stack, iree_vm_stack_frame_t* callee_frame,
     const iree_vm_registers_t callee_registers,
     const iree_vm_register_list_t* IREE_RESTRICT src_reg_list,
-    iree_vm_stack_frame_t** out_caller_frame,
+    iree_vm_stack_frame_t* IREE_RESTRICT* out_caller_frame,
     iree_vm_registers_t* out_caller_registers) {
   // Remaps registers from source to destination across frames.
   // Registers from the |src_regs| will be copied/moved to |dst_regs| with the
@@ -357,7 +328,7 @@
           caller_frame);
   const iree_vm_register_list_t* dst_reg_list =
       caller_storage->return_registers;
-  VMCHECK(src_reg_list->size <= dst_reg_list->size);
+  IREE_ASSERT_LE(src_reg_list->size, dst_reg_list->size);
   if (IREE_UNLIKELY(src_reg_list->size > dst_reg_list->size)) {
     return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
                             "src/dst reg count mismatch on internal return");
@@ -372,11 +343,10 @@
     if (src_reg & IREE_REF_REGISTER_TYPE_BIT) {
       iree_vm_ref_retain_or_move(
           src_reg & IREE_REF_REGISTER_MOVE_BIT,
-          &callee_registers.ref[src_reg & callee_registers.ref_mask],
-          &caller_registers.ref[dst_reg & caller_registers.ref_mask]);
+          &callee_registers.ref[src_reg & IREE_REF_REGISTER_MASK],
+          &caller_registers.ref[dst_reg & IREE_REF_REGISTER_MASK]);
     } else {
-      caller_registers.i32[dst_reg & caller_registers.i32_mask] =
-          callee_registers.i32[src_reg & callee_registers.i32_mask];
+      caller_registers.i32[dst_reg] = callee_registers.i32[src_reg];
     }
   }
 
@@ -401,29 +371,25 @@
         break;
       case IREE_VM_CCONV_TYPE_I32:
       case IREE_VM_CCONV_TYPE_F32: {
-        memcpy(p,
-               &caller_registers.i32[src_reg_list->registers[reg_i++] &
-                                     caller_registers.i32_mask],
+        memcpy(p, &caller_registers.i32[src_reg_list->registers[reg_i++]],
                sizeof(int32_t));
         p += sizeof(int32_t);
       } break;
       case IREE_VM_CCONV_TYPE_I64:
       case IREE_VM_CCONV_TYPE_F64: {
-        memcpy(p,
-               &caller_registers.i32[src_reg_list->registers[reg_i++] &
-                                     (caller_registers.i32_mask & ~1)],
+        memcpy(p, &caller_registers.i32[src_reg_list->registers[reg_i++]],
                sizeof(int64_t));
         p += sizeof(int64_t);
       } break;
       case IREE_VM_CCONV_TYPE_REF: {
         uint16_t src_reg = src_reg_list->registers[reg_i++];
         iree_vm_ref_assign(
-            &caller_registers.ref[src_reg & caller_registers.ref_mask],
+            &caller_registers.ref[src_reg & IREE_REF_REGISTER_MASK],
             (iree_vm_ref_t*)p);
         p += sizeof(iree_vm_ref_t);
       } break;
       case IREE_VM_CCONV_TYPE_SPAN_START: {
-        VMCHECK(segment_size_list);
+        IREE_ASSERT(segment_size_list);
         int32_t span_count = segment_size_list->registers[seg_i];
         memcpy(p, &span_count, sizeof(int32_t));
         p += sizeof(int32_t);
@@ -448,23 +414,21 @@
               case IREE_VM_CCONV_TYPE_I32:
               case IREE_VM_CCONV_TYPE_F32: {
                 memcpy(p,
-                       &caller_registers.i32[src_reg_list->registers[reg_i++] &
-                                             caller_registers.i32_mask],
+                       &caller_registers.i32[src_reg_list->registers[reg_i++]],
                        sizeof(int32_t));
                 p += sizeof(int32_t);
               } break;
               case IREE_VM_CCONV_TYPE_I64:
               case IREE_VM_CCONV_TYPE_F64: {
                 memcpy(p,
-                       &caller_registers.i32[src_reg_list->registers[reg_i++] &
-                                             (caller_registers.i32_mask & ~1)],
+                       &caller_registers.i32[src_reg_list->registers[reg_i++]],
                        sizeof(int64_t));
                 p += sizeof(int64_t);
               } break;
               case IREE_VM_CCONV_TYPE_REF: {
                 uint16_t src_reg = src_reg_list->registers[reg_i++];
                 iree_vm_ref_assign(
-                    &caller_registers.ref[src_reg & caller_registers.ref_mask],
+                    &caller_registers.ref[src_reg & IREE_REF_REGISTER_MASK],
                     (iree_vm_ref_t*)p);
                 p += sizeof(iree_vm_ref_t);
               } break;
@@ -481,7 +445,7 @@
     iree_vm_stack_t* stack, const iree_vm_function_call_t call,
     iree_string_view_t cconv_results,
     const iree_vm_register_list_t* IREE_RESTRICT dst_reg_list,
-    iree_vm_stack_frame_t** out_caller_frame,
+    iree_vm_stack_frame_t* IREE_RESTRICT* out_caller_frame,
     iree_vm_registers_t* out_caller_registers) {
   // Call external function.
   iree_status_t call_status =
@@ -517,21 +481,18 @@
         break;
       case IREE_VM_CCONV_TYPE_I32:
       case IREE_VM_CCONV_TYPE_F32:
-        memcpy(&caller_registers.i32[dst_reg & caller_registers.i32_mask], p,
-               sizeof(int32_t));
+        memcpy(&caller_registers.i32[dst_reg], p, sizeof(int32_t));
         p += sizeof(int32_t);
         break;
       case IREE_VM_CCONV_TYPE_I64:
       case IREE_VM_CCONV_TYPE_F64:
-        memcpy(
-            &caller_registers.i32[dst_reg & (caller_registers.i32_mask & ~1)],
-            p, sizeof(int64_t));
+        memcpy(&caller_registers.i32[dst_reg], p, sizeof(int64_t));
         p += sizeof(int64_t);
         break;
       case IREE_VM_CCONV_TYPE_REF:
         iree_vm_ref_move(
             (iree_vm_ref_t*)p,
-            &caller_registers.ref[dst_reg & caller_registers.ref_mask]);
+            &caller_registers.ref[dst_reg & IREE_REF_REGISTER_MASK]);
         p += sizeof(iree_vm_ref_t);
         break;
     }
@@ -546,26 +507,27 @@
     uint32_t import_ordinal, const iree_vm_bytecode_import_t** out_import) {
   *out_import = NULL;
 
+  // Ordinal has been checked as in-bounds during verification.
   import_ordinal &= 0x7FFFFFFFu;
-  if (IREE_UNLIKELY(import_ordinal >= module_state->import_count)) {
-    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                            "import ordinal %u out of range", import_ordinal);
-  }
+  IREE_ASSERT(import_ordinal < module_state->import_count);
 
   const iree_vm_bytecode_import_t* import =
       &module_state->import_table[import_ordinal];
   if (!import->function.module) {
+#if IREE_STATUS_MODE
     iree_vm_function_t decl_function;
     IREE_RETURN_IF_ERROR(iree_vm_module_lookup_function_by_ordinal(
         iree_vm_stack_current_frame(stack)->function.module,
         IREE_VM_FUNCTION_LINKAGE_IMPORT_OPTIONAL, import_ordinal,
         &decl_function));
     iree_string_view_t import_name = iree_vm_function_name(&decl_function);
-    (void)import_name;
     return iree_make_status(IREE_STATUS_NOT_FOUND,
                             "optional import `%.*s` (ordinal %u) not resolved",
                             (int)import_name.size, import_name.data,
                             import_ordinal);
+#else
+    return iree_make_status(IREE_STATUS_NOT_FOUND);
+#endif  // IREE_STATUS_MODE
   }
 
   *out_import = import;
@@ -580,7 +542,7 @@
     uint32_t import_ordinal, const iree_vm_registers_t caller_registers,
     const iree_vm_register_list_t* IREE_RESTRICT src_reg_list,
     const iree_vm_register_list_t* IREE_RESTRICT dst_reg_list,
-    iree_vm_stack_frame_t** out_caller_frame,
+    iree_vm_stack_frame_t* IREE_RESTRICT* out_caller_frame,
     iree_vm_registers_t* out_caller_registers) {
   // Prepare |call| by looking up the import information.
   const iree_vm_bytecode_import_t* import = NULL;
@@ -617,7 +579,7 @@
     const iree_vm_register_list_t* IREE_RESTRICT segment_size_list,
     const iree_vm_register_list_t* IREE_RESTRICT src_reg_list,
     const iree_vm_register_list_t* IREE_RESTRICT dst_reg_list,
-    iree_vm_stack_frame_t** out_caller_frame,
+    iree_vm_stack_frame_t* IREE_RESTRICT* out_caller_frame,
     iree_vm_registers_t* out_caller_registers) {
   // Prepare |call| by looking up the import information.
   const iree_vm_bytecode_import_t* import = NULL;
@@ -692,9 +654,10 @@
 }
 
 static iree_status_t iree_vm_bytecode_dispatch(
-    iree_vm_stack_t* stack, iree_vm_bytecode_module_t* module,
-    iree_vm_stack_frame_t* current_frame, iree_vm_registers_t regs,
-    iree_byte_span_t call_results) {
+    iree_vm_stack_t* IREE_RESTRICT stack,
+    iree_vm_bytecode_module_t* IREE_RESTRICT module,
+    iree_vm_stack_frame_t* IREE_RESTRICT current_frame,
+    iree_vm_registers_t regs, iree_byte_span_t call_results) {
   // When required emit the dispatch tables here referencing the labels we are
   // defining below.
   DEFINE_DISPATCH_TABLES();
@@ -712,8 +675,13 @@
       module->bytecode_data.data +
       module->function_descriptor_table[current_frame->function.ordinal]
           .bytecode_offset;
-  iree_vm_source_offset_t pc = current_frame->pc;
 
+  int32_t* IREE_RESTRICT regs_i32 = regs.i32;
+  IREE_BUILTIN_ASSUME_ALIGNED(regs_i32, 16);
+  iree_vm_ref_t* IREE_RESTRICT regs_ref = regs.ref;
+  IREE_BUILTIN_ASSUME_ALIGNED(regs_ref, 16);
+
+  iree_vm_source_offset_t pc = current_frame->pc;
   BEGIN_DISPATCH_CORE() {
     //===------------------------------------------------------------------===//
     // Globals
@@ -721,13 +689,7 @@
 
     DISPATCH_OP(CORE, GlobalLoadI32, {
       uint32_t byte_offset = VM_DecGlobalAttr("global");
-      if (IREE_UNLIKELY(byte_offset >=
-                        module_state->rwdata_storage.data_length)) {
-        return iree_make_status(
-            IREE_STATUS_OUT_OF_RANGE,
-            "global byte_offset out of range: %d (rwdata=%zu)", byte_offset,
-            module_state->rwdata_storage.data_length);
-      }
+      IREE_ASSERT(byte_offset + 4 <= module_state->rwdata_storage.data_length);
       int32_t* value = VM_DecResultRegI32("value");
       const int32_t global_value =
           vm_global_load_i32(module_state->rwdata_storage.data, byte_offset);
@@ -736,13 +698,7 @@
 
     DISPATCH_OP(CORE, GlobalStoreI32, {
       uint32_t byte_offset = VM_DecGlobalAttr("global");
-      if (IREE_UNLIKELY(byte_offset >=
-                        module_state->rwdata_storage.data_length)) {
-        return iree_make_status(
-            IREE_STATUS_OUT_OF_RANGE,
-            "global byte_offset out of range: %d (rwdata=%zu)", byte_offset,
-            module_state->rwdata_storage.data_length);
-      }
+      IREE_ASSERT(byte_offset + 4 <= module_state->rwdata_storage.data_length);
       int32_t value = VM_DecOperandRegI32("value");
       vm_global_store_i32(module_state->rwdata_storage.data, byte_offset,
                           value);
@@ -750,7 +706,7 @@
 
     DISPATCH_OP(CORE, GlobalLoadIndirectI32, {
       uint32_t byte_offset = VM_DecOperandRegI32("global");
-      if (IREE_UNLIKELY(byte_offset >=
+      if (IREE_UNLIKELY(byte_offset + 4 >
                         module_state->rwdata_storage.data_length)) {
         return iree_make_status(
             IREE_STATUS_OUT_OF_RANGE,
@@ -765,7 +721,7 @@
 
     DISPATCH_OP(CORE, GlobalStoreIndirectI32, {
       uint32_t byte_offset = VM_DecOperandRegI32("global");
-      if (IREE_UNLIKELY(byte_offset >=
+      if (IREE_UNLIKELY(byte_offset + 4 >
                         module_state->rwdata_storage.data_length)) {
         return iree_make_status(
             IREE_STATUS_OUT_OF_RANGE,
@@ -779,13 +735,7 @@
 
     DISPATCH_OP(CORE, GlobalLoadI64, {
       uint32_t byte_offset = VM_DecGlobalAttr("global");
-      if (IREE_UNLIKELY(byte_offset >=
-                        module_state->rwdata_storage.data_length)) {
-        return iree_make_status(
-            IREE_STATUS_OUT_OF_RANGE,
-            "global byte_offset out of range: %d (rwdata=%zu)", byte_offset,
-            module_state->rwdata_storage.data_length);
-      }
+      IREE_ASSERT(byte_offset + 8 <= module_state->rwdata_storage.data_length);
       int64_t* value = VM_DecResultRegI64("value");
       const int64_t global_value =
           vm_global_load_i64(module_state->rwdata_storage.data, byte_offset);
@@ -794,13 +744,7 @@
 
     DISPATCH_OP(CORE, GlobalStoreI64, {
       uint32_t byte_offset = VM_DecGlobalAttr("global");
-      if (IREE_UNLIKELY(byte_offset >=
-                        module_state->rwdata_storage.data_length)) {
-        return iree_make_status(
-            IREE_STATUS_OUT_OF_RANGE,
-            "global byte_offset out of range: %d (rwdata=%zu)", byte_offset,
-            module_state->rwdata_storage.data_length);
-      }
+      IREE_ASSERT(byte_offset + 8 <= module_state->rwdata_storage.data_length);
       int64_t value = VM_DecOperandRegI64("value");
       vm_global_store_i64(module_state->rwdata_storage.data, byte_offset,
                           value);
@@ -808,7 +752,7 @@
 
     DISPATCH_OP(CORE, GlobalLoadIndirectI64, {
       uint32_t byte_offset = VM_DecOperandRegI32("global");
-      if (IREE_UNLIKELY(byte_offset >=
+      if (IREE_UNLIKELY(byte_offset + 8 >
                         module_state->rwdata_storage.data_length)) {
         return iree_make_status(
             IREE_STATUS_OUT_OF_RANGE,
@@ -823,7 +767,7 @@
 
     DISPATCH_OP(CORE, GlobalStoreIndirectI64, {
       uint32_t byte_offset = VM_DecOperandRegI32("global");
-      if (IREE_UNLIKELY(byte_offset >=
+      if (IREE_UNLIKELY(byte_offset + 8 >
                         module_state->rwdata_storage.data_length)) {
         return iree_make_status(
             IREE_STATUS_OUT_OF_RANGE,
@@ -837,12 +781,7 @@
 
     DISPATCH_OP(CORE, GlobalLoadRef, {
       uint32_t global = VM_DecGlobalAttr("global");
-      if (IREE_UNLIKELY(global >= module_state->global_ref_count)) {
-        return iree_make_status(
-            IREE_STATUS_OUT_OF_RANGE,
-            "global ref ordinal out of range: %d (table=%zu)", global,
-            module_state->global_ref_count);
-      }
+      IREE_ASSERT(global < module_state->global_ref_count);
       const iree_vm_type_def_t* type_def = VM_DecTypeOf("value");
       bool result_is_move;
       iree_vm_ref_t* result = VM_DecResultRegRef("value", &result_is_move);
@@ -853,12 +792,7 @@
 
     DISPATCH_OP(CORE, GlobalStoreRef, {
       uint32_t global = VM_DecGlobalAttr("global");
-      if (IREE_UNLIKELY(global >= module_state->global_ref_count)) {
-        return iree_make_status(
-            IREE_STATUS_OUT_OF_RANGE,
-            "global ref ordinal out of range: %d (table=%zu)", global,
-            module_state->global_ref_count);
-      }
+      IREE_ASSERT(global < module_state->global_ref_count);
       const iree_vm_type_def_t* type_def = VM_DecTypeOf("value");
       bool value_is_move;
       iree_vm_ref_t* value = VM_DecOperandRegRef("value", &value_is_move);
@@ -933,12 +867,7 @@
 
     DISPATCH_OP(CORE, ConstRefRodata, {
       uint32_t rodata_ordinal = VM_DecRodataAttr("rodata");
-      if (IREE_UNLIKELY(rodata_ordinal >= module_state->rodata_ref_count)) {
-        return iree_make_status(
-            IREE_STATUS_OUT_OF_RANGE,
-            "rodata ref ordinal out of range: %d (table=%zu)", rodata_ordinal,
-            module_state->rodata_ref_count);
-      }
+      IREE_ASSERT(rodata_ordinal < module_state->rodata_ref_count);
       bool result_is_move;
       iree_vm_ref_t* result = VM_DecResultRegRef("value", &result_is_move);
       IREE_RETURN_IF_ERROR(iree_vm_ref_wrap_retain(
@@ -1444,7 +1373,7 @@
           VM_DecVariadicOperands("values");
       int32_t* result = VM_DecResultRegI32("result");
       if (index >= 0 && index < value_reg_list->size) {
-        *result = regs.i32[value_reg_list->registers[index] & regs.i32_mask];
+        *result = regs_i32[value_reg_list->registers[index]];
       } else {
         *result = default_value;
       }
@@ -1457,8 +1386,7 @@
           VM_DecVariadicOperands("values");
       int64_t* result = VM_DecResultRegI64("result");
       if (index >= 0 && index < value_reg_list->size) {
-        *result =
-            regs.i32[value_reg_list->registers[index] & (regs.i32_mask & ~1)];
+        *result = regs_i32[value_reg_list->registers[index]];
       } else {
         *result = default_value;
       }
@@ -1477,8 +1405,8 @@
       if (index >= 0 && index < value_reg_list->size) {
         bool is_move =
             value_reg_list->registers[index] & IREE_REF_REGISTER_MOVE_BIT;
-        iree_vm_ref_t* new_value =
-            &regs.ref[value_reg_list->registers[index] & regs.ref_mask];
+        iree_vm_ref_t* new_value = &regs_ref[value_reg_list->registers[index] &
+                                             IREE_REF_REGISTER_MASK];
         IREE_RETURN_IF_ERROR(iree_vm_ref_retain_or_move_checked(
             is_move, new_value, type_def->ref_type, result));
       } else {
@@ -1645,8 +1573,11 @@
       int32_t block_pc = VM_DecBranchTarget("dest");
       const iree_vm_register_remap_list_t* remap_list =
           VM_DecBranchOperands("operands");
-      pc = block_pc;
-      iree_vm_bytecode_dispatch_remap_branch_registers(regs, remap_list);
+      pc = block_pc + IREE_VM_BLOCK_MARKER_SIZE;  // skip block marker
+      if (IREE_UNLIKELY(remap_list->size > 0)) {
+        iree_vm_bytecode_dispatch_remap_branch_registers(regs_i32, regs_ref,
+                                                         remap_list);
+      }
     });
 
     DISPATCH_OP(CORE, CondBranch, {
@@ -1658,12 +1589,17 @@
       const iree_vm_register_remap_list_t* false_remap_list =
           VM_DecBranchOperands("false_operands");
       if (condition) {
-        pc = true_block_pc;
-        iree_vm_bytecode_dispatch_remap_branch_registers(regs, true_remap_list);
+        pc = true_block_pc + IREE_VM_BLOCK_MARKER_SIZE;  // skip block marker
+        if (IREE_UNLIKELY(true_remap_list->size > 0)) {
+          iree_vm_bytecode_dispatch_remap_branch_registers(regs_i32, regs_ref,
+                                                           true_remap_list);
+        }
       } else {
-        pc = false_block_pc;
-        iree_vm_bytecode_dispatch_remap_branch_registers(regs,
-                                                         false_remap_list);
+        pc = false_block_pc + IREE_VM_BLOCK_MARKER_SIZE;  // skip block marker
+        if (IREE_UNLIKELY(false_remap_list->size > 0)) {
+          iree_vm_bytecode_dispatch_remap_branch_registers(regs_i32, regs_ref,
+                                                           false_remap_list);
+        }
       }
     });
 
@@ -1692,6 +1628,10 @@
         bytecode_data =
             module->bytecode_data.data +
             module->function_descriptor_table[function_ordinal].bytecode_offset;
+        regs_i32 = regs.i32;
+        IREE_BUILTIN_ASSUME_ALIGNED(regs_i32, 16);
+        regs_ref = regs.ref;
+        IREE_BUILTIN_ASSUME_ALIGNED(regs_ref, 16);
         pc = current_frame->pc;
       }
     });
@@ -1747,6 +1687,10 @@
           module->bytecode_data.data +
           module->function_descriptor_table[current_frame->function.ordinal]
               .bytecode_offset;
+      regs_i32 = regs.i32;
+      IREE_BUILTIN_ASSUME_ALIGNED(regs_i32, 16);
+      regs_ref = regs.ref;
+      IREE_BUILTIN_ASSUME_ALIGNED(regs_ref, 16);
       pc = current_frame->pc;
     });
 
@@ -1765,10 +1709,7 @@
       uint32_t function_ordinal = VM_DecFuncAttr("import");
       int32_t* result = VM_DecResultRegI32("result");
       uint32_t import_ordinal = function_ordinal & 0x7FFFFFFFu;
-      if (IREE_UNLIKELY(import_ordinal >= module_state->import_count)) {
-        return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                                "import ordinal out of range");
-      }
+      IREE_ASSERT(import_ordinal < module_state->import_count);
       const iree_vm_bytecode_import_t* import =
           &module_state->import_table[import_ordinal];
       *result = import->function.module != NULL ? 1 : 0;
@@ -1784,8 +1725,10 @@
       int32_t block_pc = VM_DecBranchTarget("dest");
       const iree_vm_register_remap_list_t* remap_list =
           VM_DecBranchOperands("operands");
-      iree_vm_bytecode_dispatch_remap_branch_registers(regs, remap_list);
-      current_frame->pc = block_pc;
+      iree_vm_bytecode_dispatch_remap_branch_registers(regs_i32, regs_ref,
+                                                       remap_list);
+      current_frame->pc =
+          block_pc + IREE_VM_BLOCK_MARKER_SIZE;  // skip block marker
 
       // Return magic status code indicating a yield.
       // This isn't an error, though callers not supporting coroutines will
@@ -1803,7 +1746,7 @@
       const iree_vm_register_list_t* src_reg_list =
           VM_DecVariadicOperands("operands");
       // TODO(benvanik): trace (if enabled).
-      iree_vm_bytecode_dispatch_discard_registers(regs, src_reg_list);
+      iree_vm_bytecode_dispatch_discard_registers(regs_ref, src_reg_list);
     });
 
     DISPATCH_OP(CORE, Print, {
@@ -1812,7 +1755,7 @@
       const iree_vm_register_list_t* src_reg_list =
           VM_DecVariadicOperands("operands");
       // TODO(benvanik): print.
-      iree_vm_bytecode_dispatch_discard_registers(regs, src_reg_list);
+      iree_vm_bytecode_dispatch_discard_registers(regs_ref, src_reg_list);
     });
 
     DISPATCH_OP(CORE, Break, {
@@ -1820,7 +1763,8 @@
       int32_t block_pc = VM_DecBranchTarget("dest");
       const iree_vm_register_remap_list_t* remap_list =
           VM_DecBranchOperands("operands");
-      iree_vm_bytecode_dispatch_remap_branch_registers(regs, remap_list);
+      iree_vm_bytecode_dispatch_remap_branch_registers(regs_i32, regs_ref,
+                                                       remap_list);
       pc = block_pc;
     });
 
@@ -1832,8 +1776,9 @@
       int32_t block_pc = VM_DecBranchTarget("dest");
       const iree_vm_register_remap_list_t* remap_list =
           VM_DecBranchOperands("operands");
-      iree_vm_bytecode_dispatch_remap_branch_registers(regs, remap_list);
-      pc = block_pc;
+      iree_vm_bytecode_dispatch_remap_branch_registers(regs_i32, regs_ref,
+                                                       remap_list);
+      pc = block_pc + IREE_VM_BLOCK_MARKER_SIZE;  // skip block marker
     });
 
     //===------------------------------------------------------------------===//
@@ -1848,13 +1793,8 @@
 
       DISPATCH_OP(EXT_F32, GlobalLoadF32, {
         uint32_t byte_offset = VM_DecGlobalAttr("global");
-        if (IREE_UNLIKELY(byte_offset >=
-                          module_state->rwdata_storage.data_length)) {
-          return iree_make_status(
-              IREE_STATUS_OUT_OF_RANGE,
-              "global byte_offset out of range: %d (rwdata=%zu)", byte_offset,
-              module_state->rwdata_storage.data_length);
-        }
+        IREE_ASSERT(byte_offset + 4 <=
+                    module_state->rwdata_storage.data_length);
         float* value = VM_DecResultRegF32("value");
         const float global_value =
             vm_global_load_f32(module_state->rwdata_storage.data, byte_offset);
@@ -1863,13 +1803,8 @@
 
       DISPATCH_OP(EXT_F32, GlobalStoreF32, {
         uint32_t byte_offset = VM_DecGlobalAttr("global");
-        if (IREE_UNLIKELY(byte_offset >=
-                          module_state->rwdata_storage.data_length)) {
-          return iree_make_status(
-              IREE_STATUS_OUT_OF_RANGE,
-              "global byte_offset out of range: %d (rwdata=%zu)", byte_offset,
-              module_state->rwdata_storage.data_length);
-        }
+        IREE_ASSERT(byte_offset + 4 <=
+                    module_state->rwdata_storage.data_length);
         float value = VM_DecOperandRegF32("value");
         vm_global_store_f32(module_state->rwdata_storage.data, byte_offset,
                             value);
@@ -1877,7 +1812,7 @@
 
       DISPATCH_OP(EXT_F32, GlobalLoadIndirectF32, {
         uint32_t byte_offset = VM_DecOperandRegI32("global");
-        if (IREE_UNLIKELY(byte_offset >=
+        if (IREE_UNLIKELY(byte_offset + 4 >
                           module_state->rwdata_storage.data_length)) {
           return iree_make_status(
               IREE_STATUS_OUT_OF_RANGE,
@@ -1892,7 +1827,7 @@
 
       DISPATCH_OP(EXT_F32, GlobalStoreIndirectF32, {
         uint32_t byte_offset = VM_DecOperandRegI32("global");
-        if (IREE_UNLIKELY(byte_offset >=
+        if (IREE_UNLIKELY(byte_offset + 4 >
                           module_state->rwdata_storage.data_length)) {
           return iree_make_status(
               IREE_STATUS_OUT_OF_RANGE,
@@ -1970,8 +1905,7 @@
             VM_DecVariadicOperands("values");
         float* result = VM_DecResultRegF32("result");
         if (index >= 0 && index < value_reg_list->size) {
-          *result = *((float*)&regs.i32[value_reg_list->registers[index] &
-                                        (regs.i32_mask & ~1)]);
+          *result = *((float*)&regs_i32[value_reg_list->registers[index]]);
         } else {
           *result = default_value;
         }
diff --git a/runtime/src/iree/vm/bytecode/dispatch_util.h b/runtime/src/iree/vm/bytecode/dispatch_util.h
index 503a372..d460df2 100644
--- a/runtime/src/iree/vm/bytecode/dispatch_util.h
+++ b/runtime/src/iree/vm/bytecode/dispatch_util.h
@@ -10,11 +10,9 @@
 #include <assert.h>
 #include <string.h>
 
-#include "iree/base/alignment.h"
-#include "iree/base/config.h"
-#include "iree/base/target_platform.h"
-#include "iree/vm/bytecode/generated/op_table.h"
+#include "iree/base/api.h"
 #include "iree/vm/bytecode/module_impl.h"
+#include "iree/vm/bytecode/utils/isa.h"
 
 //===----------------------------------------------------------------------===//
 // Shared data structures
@@ -63,14 +61,8 @@
 
 // Pointers to typed register storage.
 typedef struct iree_vm_registers_t {
-  // Ordinal mask defining which ordinal bits are valid. All i32 indexing must
-  // be ANDed with this mask.
-  uint16_t i32_mask;
   // 16-byte aligned i32 register array.
   int32_t* i32;
-  // Ordinal mask defining which ordinal bits are valid. All ref indexing must
-  // be ANDed with this mask.
-  uint16_t ref_mask;
   // Naturally aligned ref register array.
   iree_vm_ref_t* ref;
 } iree_vm_registers_t;
@@ -86,30 +78,14 @@
   // will be stored by callees upon return.
   const iree_vm_register_list_t* return_registers;
 
-  // Counts of each register type rounded up to the next power of two.
-  iree_host_size_t i32_register_count;
-  iree_host_size_t ref_register_count;
-
-  // Relative byte offsets from the head of this struct.
-  iree_host_size_t i32_register_offset;
-  iree_host_size_t ref_register_offset;
+  // Counts of each register type and their relative byte offsets from the head
+  // of this struct.
+  uint32_t i32_register_count;
+  uint32_t i32_register_offset;
+  uint32_t ref_register_count;
+  uint32_t ref_register_offset;
 } iree_vm_bytecode_frame_storage_t;
 
-// Interleaved src-dst register sets for branch register remapping.
-// This structure is an overlay for the bytecode that is serialized in a
-// matching format.
-typedef struct iree_vm_register_remap_list_t {
-  uint16_t size;
-  struct pair {
-    uint16_t src_reg;
-    uint16_t dst_reg;
-  } pairs[];
-} iree_vm_register_remap_list_t;
-static_assert(iree_alignof(iree_vm_register_remap_list_t) == 2,
-              "Expecting byte alignment (to avoid padding)");
-static_assert(offsetof(iree_vm_register_remap_list_t, pairs) == 2,
-              "Expect no padding in the struct");
-
 // Maps a type ID to a type def with clamping for out of bounds values.
 static inline const iree_vm_type_def_t* iree_vm_map_type(
     iree_vm_bytecode_module_t* module, int32_t type_id) {
@@ -147,27 +123,6 @@
 #define IREE_DISPATCH_MODE_SWITCH 1
 #endif  // IREE_VM_BYTECODE_DISPATCH_COMPUTED_GOTO_ENABLE
 
-#ifndef NDEBUG
-#define VMCHECK(expr) assert(expr)
-#else
-#define VMCHECK(expr)
-#endif  // NDEBUG
-
-//===----------------------------------------------------------------------===//
-// Bytecode data reading with little-/big-endian support
-//===----------------------------------------------------------------------===//
-
-static const int kRegSize = sizeof(uint16_t);
-
-// Bytecode data access macros for reading values of a given type from a byte
-// offset within the current function.
-#define OP_I8(i) iree_unaligned_load_le((uint8_t*)&bytecode_data[pc + (i)])
-#define OP_I16(i) iree_unaligned_load_le((uint16_t*)&bytecode_data[pc + (i)])
-#define OP_I32(i) iree_unaligned_load_le((uint32_t*)&bytecode_data[pc + (i)])
-#define OP_I64(i) iree_unaligned_load_le((uint64_t*)&bytecode_data[pc + (i)])
-#define OP_F32(i) iree_unaligned_load_le((float*)&bytecode_data[pc + (i)])
-#define OP_F64(i) iree_unaligned_load_le((double*)&bytecode_data[pc + (i)])
-
 //===----------------------------------------------------------------------===//
 // Utilities matching the tablegen op encoding scheme
 //===----------------------------------------------------------------------===//
@@ -177,9 +132,6 @@
 // Each macro will increment the pc by the number of bytes read and as such must
 // be called in the same order the values are encoded.
 
-#define VM_AlignPC(pc, alignment) \
-  (pc) = ((pc) + ((alignment)-1)) & ~((alignment)-1)
-
 #define VM_DecConstI8(name) \
   OP_I8(0);                 \
   ++pc;
@@ -195,7 +147,6 @@
 #define VM_DecConstF64(name) \
   OP_F64(0);                 \
   pc += 8;
-#define VM_DecOpcode(opcode) VM_DecConstI8(#opcode)
 #define VM_DecFuncAttr(name) VM_DecConstI32(name)
 #define VM_DecGlobalAttr(name) VM_DecConstI32(name)
 #define VM_DecRodataAttr(name) VM_DecConstI32(name)
@@ -216,58 +167,62 @@
   VM_DecBranchOperandsImpl(bytecode_data, &pc)
 static inline const iree_vm_register_remap_list_t* VM_DecBranchOperandsImpl(
     const uint8_t* IREE_RESTRICT bytecode_data, iree_vm_source_offset_t* pc) {
-  VM_AlignPC(*pc, kRegSize);
+  VM_AlignPC(*pc, IREE_REGISTER_ORDINAL_SIZE);
   const iree_vm_register_remap_list_t* list =
       (const iree_vm_register_remap_list_t*)&bytecode_data[*pc];
-  *pc = *pc + kRegSize + list->size * 2 * kRegSize;
+  *pc = *pc + IREE_REGISTER_ORDINAL_SIZE +
+        list->size * 2 * IREE_REGISTER_ORDINAL_SIZE;
   return list;
 }
-#define VM_DecOperandRegI32(name)      \
-  regs.i32[OP_I16(0) & regs.i32_mask]; \
-  pc += kRegSize;
-#define VM_DecOperandRegI64(name)                           \
-  *((int64_t*)&regs.i32[OP_I16(0) & (regs.i32_mask & ~1)]); \
-  pc += kRegSize;
+#define VM_DecOperandRegI32(name) \
+  regs_i32[OP_I16(0)];            \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
+#define VM_DecOperandRegI64(name)    \
+  *((int64_t*)&regs_i32[OP_I16(0)]); \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
 #define VM_DecOperandRegI64HostSize(name) \
   (iree_host_size_t) VM_DecOperandRegI64(name)
-#define VM_DecOperandRegF32(name)                  \
-  *((float*)&regs.i32[OP_I16(0) & regs.i32_mask]); \
-  pc += kRegSize;
-#define VM_DecOperandRegF64(name)                          \
-  *((double*)&regs.i32[OP_I16(0) & (regs.i32_mask & ~1)]); \
-  pc += kRegSize;
+#define VM_DecOperandRegF32(name)  \
+  *((float*)&regs_i32[OP_I16(0)]); \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
+#define VM_DecOperandRegF64(name)   \
+  *((double*)&regs_i32[OP_I16(0)]); \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
 #define VM_DecOperandRegRef(name, out_is_move)                      \
-  &regs.ref[OP_I16(0) & regs.ref_mask];                             \
+  &regs_ref[OP_I16(0) & IREE_REF_REGISTER_MASK];                    \
   *(out_is_move) = 0; /*= OP_I16(0) & IREE_REF_REGISTER_MOVE_BIT;*/ \
-  pc += kRegSize;
+  pc += IREE_REGISTER_ORDINAL_SIZE;
 #define VM_DecVariadicOperands(name) \
   VM_DecVariadicOperandsImpl(bytecode_data, &pc)
 static inline const iree_vm_register_list_t* VM_DecVariadicOperandsImpl(
     const uint8_t* IREE_RESTRICT bytecode_data, iree_vm_source_offset_t* pc) {
-  VM_AlignPC(*pc, kRegSize);
+  VM_AlignPC(*pc, IREE_REGISTER_ORDINAL_SIZE);
   const iree_vm_register_list_t* list =
       (const iree_vm_register_list_t*)&bytecode_data[*pc];
-  *pc = *pc + kRegSize + list->size * kRegSize;
+  *pc = *pc + IREE_REGISTER_ORDINAL_SIZE +
+        list->size * IREE_REGISTER_ORDINAL_SIZE;
   return list;
 }
-#define VM_DecResultRegI32(name)        \
-  &regs.i32[OP_I16(0) & regs.i32_mask]; \
-  pc += kRegSize;
-#define VM_DecResultRegI64(name)                           \
-  ((int64_t*)&regs.i32[OP_I16(0) & (regs.i32_mask & ~1)]); \
-  pc += kRegSize;
-#define VM_DecResultRegF32(name)                  \
-  ((float*)&regs.i32[OP_I16(0) & regs.i32_mask]); \
-  pc += kRegSize;
-#define VM_DecResultRegF64(name)                          \
-  ((double*)&regs.i32[OP_I16(0) & (regs.i32_mask & ~1)]); \
-  pc += kRegSize;
+#define VM_DecResultRegI32(name) \
+  &regs_i32[OP_I16(0)];          \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
+#define VM_DecResultRegI64(name)    \
+  ((int64_t*)&regs_i32[OP_I16(0)]); \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
+#define VM_DecResultRegF32(name)  \
+  ((float*)&regs_i32[OP_I16(0)]); \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
+#define VM_DecResultRegF64(name)   \
+  ((double*)&regs_i32[OP_I16(0)]); \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
 #define VM_DecResultRegRef(name, out_is_move)                       \
-  &regs.ref[OP_I16(0) & regs.ref_mask];                             \
+  &regs_ref[OP_I16(0) & IREE_REF_REGISTER_MASK];                    \
   *(out_is_move) = 0; /*= OP_I16(0) & IREE_REF_REGISTER_MOVE_BIT;*/ \
-  pc += kRegSize;
+  pc += IREE_REGISTER_ORDINAL_SIZE;
 #define VM_DecVariadicResults(name) VM_DecVariadicOperands(name)
 
+#define IREE_VM_BLOCK_MARKER_SIZE 1
+
 //===----------------------------------------------------------------------===//
 // Dispatch table structure
 //===----------------------------------------------------------------------===//
@@ -276,13 +231,6 @@
 // doesn't support it, though, and there may be other targets (like wasm) that
 // can only handle the switch-based approach.
 
-// Bytecode data -offset used when looking for the start of the currently
-// dispatched instruction: `instruction_start = pc - OFFSET`
-#define VM_PC_OFFSET_CORE 1
-#define VM_PC_OFFSET_EXT_I32 2
-#define VM_PC_OFFSET_EXT_F32 2
-#define VM_PC_OFFSET_EXT_F64 2
-
 #if defined(IREE_DISPATCH_MODE_COMPUTED_GOTO)
 
 // Dispatch table mapping 1:1 with bytecode ops.
@@ -334,20 +282,20 @@
 
 #define DISPATCH_UNHANDLED_CORE()                                           \
   _dispatch_unhandled : {                                                   \
-    VMCHECK(0);                                                             \
+    IREE_ASSERT(0);                                                         \
     return iree_make_status(IREE_STATUS_UNIMPLEMENTED, "unhandled opcode"); \
   }
 #define UNHANDLED_DISPATCH_PREFIX(op_name, ext)                    \
   _dispatch_CORE_##op_name : {                                     \
-    VMCHECK(0);                                                    \
+    IREE_ASSERT(0);                                                \
     return iree_make_status(IREE_STATUS_UNIMPLEMENTED,             \
                             "unhandled dispatch extension " #ext); \
   }
 
-#define DISPATCH_OP(ext, op_name, body)                          \
-  _dispatch_##ext##_##op_name:;                                  \
-  IREE_DISPATCH_TRACE_INSTRUCTION(VM_PC_OFFSET_##ext, #op_name); \
-  body;                                                          \
+#define DISPATCH_OP(ext, op_name, body)                               \
+  _dispatch_##ext##_##op_name:;                                       \
+  IREE_DISPATCH_TRACE_INSTRUCTION(IREE_VM_PC_OFFSET_##ext, #op_name); \
+  body;                                                               \
   goto* kDispatchTable_CORE[bytecode_data[pc++]];
 
 #define BEGIN_DISPATCH_PREFIX(op_name, ext)                                   \
@@ -367,23 +315,25 @@
 
 #define DEFINE_DISPATCH_TABLES()
 
-#define DISPATCH_UNHANDLED_CORE()                      \
-  default: {                                           \
-    VMCHECK(0);                                        \
-    return iree_make_status(IREE_STATUS_UNIMPLEMENTED, \
-                            "unhandled core opcode");  \
+#define DISPATCH_UNHANDLED_CORE()                         \
+  default: {                                              \
+    IREE_ASSERT(0);                                       \
+    IREE_BUILTIN_UNREACHABLE(); /* ok because verified */ \
+    return iree_make_status(IREE_STATUS_UNIMPLEMENTED,    \
+                            "unhandled core opcode");     \
   }
 #define UNHANDLED_DISPATCH_PREFIX(op_name, ext)                    \
   case IREE_VM_OP_CORE_##op_name: {                                \
-    VMCHECK(0);                                                    \
+    IREE_ASSERT(0);                                                \
+    IREE_BUILTIN_UNREACHABLE(); /* ok because verified */          \
     return iree_make_status(IREE_STATUS_UNIMPLEMENTED,             \
                             "unhandled dispatch extension " #ext); \
   }
 
-#define DISPATCH_OP(ext, op_name, body)                            \
-  case IREE_VM_OP_##ext##_##op_name: {                             \
-    IREE_DISPATCH_TRACE_INSTRUCTION(VM_PC_OFFSET_##ext, #op_name); \
-    body;                                                          \
+#define DISPATCH_OP(ext, op_name, body)                                 \
+  case IREE_VM_OP_##ext##_##op_name: {                                  \
+    IREE_DISPATCH_TRACE_INSTRUCTION(IREE_VM_PC_OFFSET_##ext, #op_name); \
+    body;                                                               \
   } break;
 
 #define BEGIN_DISPATCH_PREFIX(op_name, ext) \
diff --git a/runtime/src/iree/vm/bytecode/module.c b/runtime/src/iree/vm/bytecode/module.c
index 83739a8..4d88f4c 100644
--- a/runtime/src/iree/vm/bytecode/module.c
+++ b/runtime/src/iree/vm/bytecode/module.c
@@ -10,146 +10,10 @@
 #include <stdint.h>
 #include <string.h>
 
-#include "iree/base/api.h"
 #include "iree/base/tracing.h"
-#include "iree/vm/api.h"
+#include "iree/vm/bytecode/archive.h"
 #include "iree/vm/bytecode/module_impl.h"
-
-// Alignment applied to each segment of the archive.
-// All embedded file contents (FlatBuffers, rodata, etc) are aligned to this
-// boundary.
-#define IREE_VM_ARCHIVE_SEGMENT_ALIGNMENT 64
-
-// ZIP local file header (comes immediately before each file in the archive).
-// In order to find the starting offset of the FlatBuffer in a polyglot archive
-// we need to parse this given the variable-length nature of it (we want to
-// be robust to file name and alignment changes).
-//
-// NOTE: all fields are little-endian.
-// NOTE: we don't care about the actual module size here; since we support
-//       streaming archives trying to recover it would require much more
-//       involved processing (we'd need to reference the central directory).
-//       If we wanted to support users repacking ZIPs we'd probably want to
-//       rewrite everything as we store offsets in the FlatBuffer that are
-//       difficult to update after the archive has been produced.
-#define ZIP_LOCAL_FILE_HEADER_SIGNATURE 0x04034B50u
-#if defined(IREE_COMPILER_MSVC)
-#pragma pack(push, 1)
-#endif  // IREE_COMPILER_MSVC
-typedef struct {
-  uint32_t signature;  // ZIP_LOCAL_FILE_HEADER_SIGNATURE
-  uint16_t version;
-  uint16_t general_purpose_flag;
-  uint16_t compression_method;
-  uint16_t last_modified_time;
-  uint16_t last_modified_date;
-  uint32_t crc32;              // 0 for us
-  uint32_t compressed_size;    // 0 for us
-  uint32_t uncompressed_size;  // 0 for us
-  uint16_t file_name_length;
-  uint16_t extra_field_length;
-  // file name (variable size)
-  // extra field (variable size)
-} IREE_ATTRIBUTE_PACKED zip_local_file_header_t;
-#if defined(IREE_COMPILER_MSVC)
-#pragma pack(pop)
-#endif  // IREE_COMPILER_MSVC
-static_assert(sizeof(zip_local_file_header_t) == 30, "bad packing");
-#if !defined(IREE_ENDIANNESS_LITTLE) || !IREE_ENDIANNESS_LITTLE
-#error "little endian required for zip header parsing"
-#endif  // IREE_ENDIANNESS_LITTLE
-
-// Strips any ZIP local file header from |contents| and stores the remaining
-// range in |out_stripped|.
-static iree_status_t iree_vm_bytecode_module_strip_zip_header(
-    iree_const_byte_span_t contents, iree_const_byte_span_t* out_stripped) {
-  // Ensure there's at least some bytes we can check for the header.
-  // Since we're only looking to strip zip stuff here we can check on that.
-  if (!contents.data ||
-      contents.data_length < sizeof(zip_local_file_header_t)) {
-    memmove(out_stripped, &contents, sizeof(contents));
-    return iree_ok_status();
-  }
-
-  // Check to see if there's a zip local header signature.
-  // For a compliant zip file this is expected to start at offset 0.
-  const zip_local_file_header_t* header =
-      (const zip_local_file_header_t*)contents.data;
-  if (header->signature != ZIP_LOCAL_FILE_HEADER_SIGNATURE) {
-    // No signature found, probably not a ZIP.
-    memmove(out_stripped, &contents, sizeof(contents));
-    return iree_ok_status();
-  }
-
-  // Compute the starting offset of the file.
-  // Note that we still don't know (or care) if it's the file we want; actual
-  // FlatBuffer verification happens later on.
-  uint32_t offset =
-      sizeof(*header) + header->file_name_length + header->extra_field_length;
-  if (offset > contents.data_length) {
-    // Is a ZIP but doesn't have enough data; error out with something more
-    // useful than the FlatBuffer verification failing later on given that here
-    // we know this isn't a FlatBuffer.
-    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                            "archive self-reports as a zip but does not have "
-                            "enough data to contain a module");
-  }
-
-  *out_stripped = iree_make_const_byte_span(contents.data + offset,
-                                            contents.data_length - offset);
-  return iree_ok_status();
-}
-
-IREE_API_EXPORT iree_status_t iree_vm_bytecode_module_parse_header(
-    iree_const_byte_span_t archive_contents,
-    iree_const_byte_span_t* out_flatbuffer_contents,
-    iree_host_size_t* out_rodata_offset) {
-  // Slice off any polyglot zip header we have prior to the base of the module.
-  iree_const_byte_span_t module_contents = iree_const_byte_span_empty();
-  IREE_RETURN_IF_ERROR(iree_vm_bytecode_module_strip_zip_header(
-      archive_contents, &module_contents));
-
-  // Verify there's enough data to safely check the FlatBuffer header.
-  if (!module_contents.data || module_contents.data_length < 16) {
-    return iree_make_status(
-        IREE_STATUS_INVALID_ARGUMENT,
-        "FlatBuffer data is not present or less than 16 bytes (%zu total)",
-        module_contents.data_length);
-  }
-
-  // Read the size prefix from the head of the module contents; this should be
-  // a 4 byte value indicating the total size of the FlatBuffer data.
-  size_t length_prefix = 0;
-  flatbuffers_read_size_prefix((void*)module_contents.data, &length_prefix);
-
-  // Verify the length prefix is within bounds (always <= the remaining module
-  // bytes).
-  size_t length_remaining =
-      module_contents.data_length - sizeof(flatbuffers_uoffset_t);
-  if (length_prefix > length_remaining) {
-    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                            "FlatBuffer length prefix out of bounds (prefix is "
-                            "%zu but only %zu available)",
-                            length_prefix, length_remaining);
-  }
-
-  // Form the range of bytes containing just the FlatBuffer data.
-  iree_const_byte_span_t flatbuffer_contents = iree_make_const_byte_span(
-      module_contents.data + sizeof(flatbuffers_uoffset_t), length_prefix);
-
-  if (out_flatbuffer_contents) {
-    *out_flatbuffer_contents = flatbuffer_contents;
-  }
-  if (out_rodata_offset) {
-    // rodata begins immediately following the FlatBuffer in memory.
-    iree_host_size_t rodata_offset = iree_host_align(
-        (iree_host_size_t)(flatbuffer_contents.data - archive_contents.data) +
-            length_prefix,
-        IREE_VM_ARCHIVE_SEGMENT_ALIGNMENT);
-    *out_rodata_offset = rodata_offset;
-  }
-  return iree_ok_status();
-}
+#include "iree/vm/bytecode/verifier.h"
 
 // Perform an strcmp between a FlatBuffers string and an IREE string view.
 static bool iree_vm_flatbuffer_strcmp(flatbuffers_string_t lhs,
@@ -236,252 +100,6 @@
   return status;
 }
 
-// clang-format off
-static const iree_bitfield_string_mapping_t iree_vm_bytecode_feature_mappings[] = {
-  {iree_vm_FeatureBits_EXT_F32, IREE_SVL("EXT_F32")},
-  {iree_vm_FeatureBits_EXT_F64, IREE_SVL("EXT_F64")},
-};
-// clang-format on
-
-// Formats a buffer usage bitfield as a string.
-// See iree_bitfield_format for usage.
-static iree_string_view_t iree_vm_bytecode_features_format(
-    iree_vm_FeatureBits_enum_t value, iree_bitfield_string_temp_t* out_temp) {
-  return iree_bitfield_format_inline(
-      value, IREE_ARRAYSIZE(iree_vm_bytecode_feature_mappings),
-      iree_vm_bytecode_feature_mappings, out_temp);
-}
-
-static iree_vm_FeatureBits_enum_t iree_vm_bytecode_available_features(void) {
-  iree_vm_FeatureBits_enum_t result = 0;
-#if IREE_VM_EXT_F32_ENABLE
-  result |= iree_vm_FeatureBits_EXT_F32;
-#endif  // IREE_VM_EXT_F32_ENABLE
-#if IREE_VM_EXT_F64_ENABLE
-  result |= iree_vm_FeatureBits_EXT_F64;
-#endif  // IREE_VM_EXT_F64_ENABLE
-  return result;
-}
-
-// Verifies the structure of the FlatBuffer so that we can avoid doing so during
-// runtime. There are still some conditions we must be aware of (such as omitted
-// names on functions with internal linkage), however we shouldn't need to
-// bounds check anything within the FlatBuffer after this succeeds.
-static iree_status_t iree_vm_bytecode_module_flatbuffer_verify(
-    iree_const_byte_span_t archive_contents,
-    iree_const_byte_span_t flatbuffer_contents,
-    iree_host_size_t archive_rodata_offset) {
-  // Run flatcc generated verification. This ensures all pointers are in-bounds
-  // and that we can safely walk the file, but not that the actual contents of
-  // the FlatBuffer meet our expectations.
-  int verify_ret = iree_vm_BytecodeModuleDef_verify_as_root(
-      flatbuffer_contents.data, flatbuffer_contents.data_length);
-  if (verify_ret != flatcc_verify_ok) {
-    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                            "FlatBuffer verification failed: %s",
-                            flatcc_verify_error_string(verify_ret));
-  }
-
-  iree_vm_BytecodeModuleDef_table_t module_def =
-      iree_vm_BytecodeModuleDef_as_root(flatbuffer_contents.data);
-
-  const iree_vm_FeatureBits_enum_t available_features =
-      iree_vm_bytecode_available_features();
-  const iree_vm_FeatureBits_enum_t required_features =
-      iree_vm_BytecodeModuleDef_requirements(module_def);
-  if (!iree_all_bits_set(available_features, required_features)) {
-#if IREE_STATUS_MODE
-    const iree_vm_FeatureBits_enum_t needed_features =
-        required_features & ~available_features;
-    iree_bitfield_string_temp_t temp0, temp1, temp2;
-    iree_string_view_t available_features_str =
-        iree_vm_bytecode_features_format(available_features, &temp0);
-    iree_string_view_t required_features_str =
-        iree_vm_bytecode_features_format(required_features, &temp1);
-    iree_string_view_t needed_features_str =
-        iree_vm_bytecode_features_format(needed_features, &temp2);
-    return iree_make_status(
-        IREE_STATUS_INVALID_ARGUMENT,
-        "required module features [%.*s] are not available in this runtime "
-        "configuration; have [%.*s] while module requires [%.*s]",
-        (int)needed_features_str.size, needed_features_str.data,
-        (int)available_features_str.size, available_features_str.data,
-        (int)required_features_str.size, required_features_str.data);
-#else
-    return iree_status_from_code(IREE_STATUS_INVALID_ARGUMENT);
-#endif  // IREE_STATUS_MODE
-  }
-
-  flatbuffers_string_t name = iree_vm_BytecodeModuleDef_name(module_def);
-  if (!flatbuffers_string_len(name)) {
-    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                            "module missing name field");
-  }
-
-  iree_vm_TypeDef_vec_t types = iree_vm_BytecodeModuleDef_types(module_def);
-  for (size_t i = 0; i < iree_vm_TypeDef_vec_len(types); ++i) {
-    iree_vm_TypeDef_table_t type_def = iree_vm_TypeDef_vec_at(types, i);
-    if (!type_def) {
-      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                              "types[%zu] missing body", i);
-    }
-    flatbuffers_string_t full_name = iree_vm_TypeDef_full_name(type_def);
-    if (flatbuffers_string_len(full_name) <= 0) {
-      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                              "types[%zu] missing name", i);
-    }
-  }
-
-  iree_vm_RodataSegmentDef_vec_t rodata_segments =
-      iree_vm_BytecodeModuleDef_rodata_segments(module_def);
-  for (size_t i = 0; i < iree_vm_RodataSegmentDef_vec_len(rodata_segments);
-       ++i) {
-    iree_vm_RodataSegmentDef_table_t segment =
-        iree_vm_RodataSegmentDef_vec_at(rodata_segments, i);
-    if (iree_vm_RodataSegmentDef_embedded_data_is_present(segment)) {
-      continue;  // embedded data is verified by FlatBuffers
-    }
-    uint64_t segment_offset =
-        iree_vm_RodataSegmentDef_external_data_offset(segment);
-    uint64_t segment_length =
-        iree_vm_RodataSegmentDef_external_data_length(segment);
-    uint64_t segment_end =
-        archive_rodata_offset + segment_offset + segment_length;
-    if (segment_end > archive_contents.data_length) {
-      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                              "rodata[%zu] external reference out of range", i);
-    }
-  }
-
-  iree_vm_ModuleDependencyDef_vec_t dependencies =
-      iree_vm_BytecodeModuleDef_dependencies(module_def);
-  for (size_t i = 0; i < iree_vm_ModuleDependencyDef_vec_len(dependencies);
-       ++i) {
-    iree_vm_ModuleDependencyDef_table_t dependency_def =
-        iree_vm_ModuleDependencyDef_vec_at(dependencies, i);
-    flatbuffers_string_t module_name =
-        iree_vm_ModuleDependencyDef_name(dependency_def);
-    if (flatbuffers_string_len(module_name) == 0) {
-      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                              "dependencies[%zu] has no module name", i);
-    }
-  }
-
-  iree_vm_ImportFunctionDef_vec_t imported_functions =
-      iree_vm_BytecodeModuleDef_imported_functions(module_def);
-  iree_vm_ExportFunctionDef_vec_t exported_functions =
-      iree_vm_BytecodeModuleDef_exported_functions(module_def);
-  iree_vm_FunctionSignatureDef_vec_t function_signatures =
-      iree_vm_BytecodeModuleDef_function_signatures(module_def);
-  iree_vm_FunctionDescriptor_vec_t function_descriptors =
-      iree_vm_BytecodeModuleDef_function_descriptors(module_def);
-
-  if (iree_vm_FunctionSignatureDef_vec_len(function_signatures) !=
-      iree_vm_FunctionDescriptor_vec_len(function_descriptors)) {
-    return iree_make_status(
-        IREE_STATUS_INVALID_ARGUMENT,
-        "function signature and descriptor table length mismatch (%zu vs %zu)",
-        iree_vm_FunctionSignatureDef_vec_len(function_signatures),
-        iree_vm_FunctionDescriptor_vec_len(function_descriptors));
-  }
-
-  for (size_t i = 0; i < iree_vm_ImportFunctionDef_vec_len(imported_functions);
-       ++i) {
-    iree_vm_ImportFunctionDef_table_t import_def =
-        iree_vm_ImportFunctionDef_vec_at(imported_functions, i);
-    if (!import_def) {
-      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                              "imports[%zu] missing body", i);
-    }
-    flatbuffers_string_t full_name =
-        iree_vm_ImportFunctionDef_full_name(import_def);
-    if (!flatbuffers_string_len(full_name)) {
-      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                              "imports[%zu] missing full_name", i);
-    }
-  }
-
-  for (size_t i = 0; i < iree_vm_ExportFunctionDef_vec_len(exported_functions);
-       ++i) {
-    iree_vm_ExportFunctionDef_table_t export_def =
-        iree_vm_ExportFunctionDef_vec_at(exported_functions, i);
-    if (!export_def) {
-      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                              "exports[%zu] missing body", i);
-    }
-    flatbuffers_string_t local_name =
-        iree_vm_ExportFunctionDef_local_name(export_def);
-    if (!flatbuffers_string_len(local_name)) {
-      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                              "exports[%zu] missing local_name", i);
-    }
-    iree_host_size_t internal_ordinal =
-        iree_vm_ExportFunctionDef_internal_ordinal(export_def);
-    if (internal_ordinal >=
-        iree_vm_FunctionDescriptor_vec_len(function_descriptors)) {
-      return iree_make_status(
-          IREE_STATUS_INVALID_ARGUMENT,
-          "exports[%zu] internal_ordinal out of bounds (0 < %zu < %zu)", i,
-          internal_ordinal,
-          iree_vm_FunctionDescriptor_vec_len(function_descriptors));
-    }
-  }
-
-  for (size_t i = 0;
-       i < iree_vm_FunctionSignatureDef_vec_len(function_signatures); ++i) {
-    iree_vm_FunctionSignatureDef_table_t function_signature =
-        iree_vm_FunctionSignatureDef_vec_at(function_signatures, i);
-    if (!function_signature) {
-      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                              "function_signatures[%zu] missing body", i);
-    }
-  }
-
-  // Verify that we can properly handle the bytecode embedded in the module.
-  // We require that major versions match and allow loading of older minor
-  // versions (we keep changes backwards-compatible).
-  const uint32_t bytecode_version =
-      iree_vm_BytecodeModuleDef_bytecode_version(module_def);
-  const uint32_t bytecode_version_major = bytecode_version >> 16;
-  const uint32_t bytecode_version_minor = bytecode_version & 0xFFFF;
-  if ((bytecode_version_major != IREE_VM_BYTECODE_VERSION_MAJOR) ||
-      (bytecode_version_minor > IREE_VM_BYTECODE_VERSION_MINOR)) {
-    return iree_make_status(
-        IREE_STATUS_INVALID_ARGUMENT,
-        "bytecode version mismatch; runtime supports %d.%d, module has %d.%d",
-        IREE_VM_BYTECODE_VERSION_MAJOR, IREE_VM_BYTECODE_VERSION_MINOR,
-        bytecode_version_major, bytecode_version_minor);
-  }
-
-  flatbuffers_uint8_vec_t bytecode_data =
-      iree_vm_BytecodeModuleDef_bytecode_data(module_def);
-  for (size_t i = 0;
-       i < iree_vm_FunctionDescriptor_vec_len(function_descriptors); ++i) {
-    iree_vm_FunctionDescriptor_struct_t function_descriptor =
-        iree_vm_FunctionDescriptor_vec_at(function_descriptors, i);
-    if (function_descriptor->bytecode_offset < 0 ||
-        function_descriptor->bytecode_offset +
-                function_descriptor->bytecode_length >
-            flatbuffers_uint8_vec_len(bytecode_data)) {
-      return iree_make_status(
-          IREE_STATUS_INVALID_ARGUMENT,
-          "functions[%zu] descriptor bytecode span out of range (0 < %d < %zu)",
-          i, function_descriptor->bytecode_offset,
-          flatbuffers_uint8_vec_len(bytecode_data));
-    }
-    if (function_descriptor->i32_register_count > IREE_I32_REGISTER_COUNT ||
-        function_descriptor->ref_register_count > IREE_REF_REGISTER_COUNT) {
-      return iree_make_status(
-          IREE_STATUS_INVALID_ARGUMENT,
-          "functions[%zu] descriptor register count out of range", i);
-    }
-
-    // TODO(benvanik): run bytecode verifier on contents.
-  }
-
-  return iree_ok_status();
-}
-
 static iree_status_t iree_vm_bytecode_map_internal_ordinal(
     iree_vm_bytecode_module_t* module, iree_vm_function_t function,
     uint16_t* out_ordinal,
@@ -1214,7 +832,7 @@
   iree_const_byte_span_t flatbuffer_contents = iree_const_byte_span_empty();
   iree_host_size_t archive_rodata_offset = 0;
   IREE_RETURN_AND_END_ZONE_IF_ERROR(
-      z0, iree_vm_bytecode_module_parse_header(
+      z0, iree_vm_bytecode_archive_parse_header(
               archive_contents, &flatbuffer_contents, &archive_rodata_offset));
 
   IREE_TRACE_ZONE_BEGIN_NAMED(z1, "iree_vm_bytecode_module_flatbuffer_verify");
@@ -1294,7 +912,23 @@
   module->interface.begin_call = iree_vm_bytecode_module_begin_call;
   module->interface.resume_call = iree_vm_bytecode_module_resume_call;
 
-  *out_module = &module->interface;
+  // Verify functions in the module now that we've verified the metadata that we
+  // need to do so.
+  iree_status_t verify_status = iree_ok_status();
+#if IREE_VM_BYTECODE_VERIFICATION_ENABLE
+  for (uint16_t i = 0; i < module->function_descriptor_count; ++i) {
+    IREE_TRACE_ZONE_BEGIN_NAMED(z1, "iree_vm_bytecode_function_verify");
+    verify_status = iree_vm_bytecode_function_verify(module, i, allocator);
+    IREE_TRACE_ZONE_END(z1);
+    if (!iree_status_is_ok(verify_status)) break;
+  }
+#endif  // IREE_VM_BYTECODE_VERIFICATION_ENABLE
+  if (iree_status_is_ok(verify_status)) {
+    *out_module = &module->interface;
+  } else {
+    iree_allocator_free(allocator, module);
+  }
+
   IREE_TRACE_ZONE_END(z0);
-  return iree_ok_status();
+  return verify_status;
 }
diff --git a/runtime/src/iree/vm/bytecode/module.h b/runtime/src/iree/vm/bytecode/module.h
index 93ebd82..997762f 100644
--- a/runtime/src/iree/vm/bytecode/module.h
+++ b/runtime/src/iree/vm/bytecode/module.h
@@ -7,8 +7,6 @@
 #ifndef IREE_VM_BYTECODE_MODULE_H_
 #define IREE_VM_BYTECODE_MODULE_H_
 
-#include <stdint.h>
-
 #include "iree/base/api.h"
 #include "iree/vm/api.h"
 
@@ -25,15 +23,6 @@
     iree_allocator_t archive_allocator, iree_allocator_t allocator,
     iree_vm_module_t** out_module);
 
-// Parses the module archive header in |archive_contents|.
-// The subrange containing the FlatBuffer data is returned as well as the
-// offset where external rodata begins. Note that archives may have
-// non-contiguous layouts!
-IREE_API_EXPORT iree_status_t iree_vm_bytecode_module_parse_header(
-    iree_const_byte_span_t archive_contents,
-    iree_const_byte_span_t* out_flatbuffer_contents,
-    iree_host_size_t* out_rodata_offset);
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus
diff --git a/runtime/src/iree/vm/bytecode/module_impl.h b/runtime/src/iree/vm/bytecode/module_impl.h
index 2b9728d..70c2f3f 100644
--- a/runtime/src/iree/vm/bytecode/module_impl.h
+++ b/runtime/src/iree/vm/bytecode/module_impl.h
@@ -10,48 +10,14 @@
 #include <stdint.h>
 #include <string.h>
 
-// VC++ does not have C11's stdalign.h.
-#if !defined(_MSC_VER)
-#include <stdalign.h>
-#endif  // _MSC_VER
-
 #include "iree/base/api.h"
 #include "iree/vm/api.h"
-
-// NOTE: include order matters:
-#include "iree/base/internal/flatcc/parsing.h"
-#include "iree/schemas/bytecode_module_def_reader.h"
-#include "iree/schemas/bytecode_module_def_verifier.h"
+#include "iree/vm/bytecode/utils/isa.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif  // __cplusplus
 
-#define VMMAX(a, b) (((a) > (b)) ? (a) : (b))
-#define VMMIN(a, b) (((a) < (b)) ? (a) : (b))
-
-// Major bytecode version; mismatches on this will fail in either direction.
-// This allows coarse versioning of completely incompatible versions.
-// Matches BytecodeEncoder::kVersionMajor in the compiler.
-#define IREE_VM_BYTECODE_VERSION_MAJOR 14
-// Minor bytecode version; lower versions are allowed to enable newer runtimes
-// to load older serialized files when there are backwards-compatible changes.
-// Higher versions are disallowed as they occur when new ops are added that
-// otherwise cannot be executed by older runtimes.
-// Matches BytecodeEncoder::kVersionMinor in the compiler.
-#define IREE_VM_BYTECODE_VERSION_MINOR 0
-
-// Maximum register count per bank.
-// This determines the bits required to reference registers in the VM bytecode.
-#define IREE_I32_REGISTER_COUNT 0x7FFF
-#define IREE_REF_REGISTER_COUNT 0x7FFF
-
-#define IREE_I32_REGISTER_MASK 0x7FFF
-
-#define IREE_REF_REGISTER_TYPE_BIT 0x8000
-#define IREE_REF_REGISTER_MOVE_BIT 0x4000
-#define IREE_REF_REGISTER_MASK 0x3FFF
-
 // A loaded bytecode module.
 typedef struct iree_vm_bytecode_module_t {
   // Interface routing to the bytecode module functions.
diff --git a/runtime/src/iree/vm/bytecode/utils/BUILD b/runtime/src/iree/vm/bytecode/utils/BUILD
new file mode 100644
index 0000000..c8ec3be
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode/utils/BUILD
@@ -0,0 +1,66 @@
+# Copyright 2023 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("//build_tools/bazel:build_defs.oss.bzl", "iree_runtime_cc_library", "iree_runtime_cc_test")
+
+package(
+    default_visibility = ["//visibility:public"],
+    features = ["layering_check"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+iree_runtime_cc_library(
+    name = "utils",
+    srcs = [
+        "block_list.c",
+        "features.c",
+    ],
+    hdrs = [
+        "block_list.h",
+        "features.h",
+        "generated/op_table.h",
+        "isa.h",
+    ],
+    deps = [
+        "//runtime/src/iree/base",
+        "//runtime/src/iree/base:tracing",
+        "//runtime/src/iree/base/internal",
+        "//runtime/src/iree/base/internal/flatcc:parsing",
+        "//runtime/src/iree/schemas:bytecode_module_def_c_fbs",
+        "//runtime/src/iree/vm",
+    ],
+)
+
+# TODO(#357): Add a script to update op_table.h.
+# iree_gentbl_cc_library(
+#     name = "op_table_gen",
+#     tbl_outs = [
+#         (["--gen-iree-vm-op-table-defs"], "op_table.h"),
+#     ],
+#     tblgen = "//tools:iree-tblgen",
+#     td_file = "//compiler/src/iree/compiler/Dialect/VM/IR:VMOps.td",
+#     deps = [
+#         "//compiler/src/iree/compiler/Dialect/Util/IR:td_files",
+#         "//compiler/src/iree/compiler/Dialect/VM/IR:td_files",
+#         "@llvm-project//mlir:CallInterfacesTdFiles",
+#         "@llvm-project//mlir:ControlFlowInterfacesTdFiles",
+#         "@llvm-project//mlir:FunctionInterfacesTdFiles",
+#         "@llvm-project//mlir:OpBaseTdFiles",
+#         "@llvm-project//mlir:SideEffectInterfacesTdFiles",
+#     ],
+# )
+
+iree_runtime_cc_test(
+    name = "block_list_test",
+    srcs = ["block_list_test.cc"],
+    deps = [
+        ":utils",
+        "//runtime/src/iree/base",
+        "//runtime/src/iree/testing:gtest",
+        "//runtime/src/iree/testing:gtest_main",
+        "//runtime/src/iree/vm",
+    ],
+)
diff --git a/runtime/src/iree/vm/bytecode/utils/CMakeLists.txt b/runtime/src/iree/vm/bytecode/utils/CMakeLists.txt
new file mode 100644
index 0000000..fa539dc
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode/utils/CMakeLists.txt
@@ -0,0 +1,47 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
+# runtime/src/iree/vm/bytecode/utils/BUILD                                     #
+#                                                                              #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
+# CMake-only content.                                                          #
+#                                                                              #
+# To disable autogeneration for this file entirely, delete this header.        #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+  NAME
+    utils
+  HDRS
+    "block_list.h"
+    "features.h"
+    "generated/op_table.h"
+    "isa.h"
+  SRCS
+    "block_list.c"
+    "features.c"
+  DEPS
+    iree::base
+    iree::base::internal
+    iree::base::internal::flatcc::parsing
+    iree::base::tracing
+    iree::schemas::bytecode_module_def_c_fbs
+    iree::vm
+  PUBLIC
+)
+
+iree_cc_test(
+  NAME
+    block_list_test
+  SRCS
+    "block_list_test.cc"
+  DEPS
+    ::utils
+    iree::base
+    iree::testing::gtest
+    iree::testing::gtest_main
+    iree::vm
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/runtime/src/iree/vm/bytecode/utils/block_list.c b/runtime/src/iree/vm/bytecode/utils/block_list.c
new file mode 100644
index 0000000..d892893
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode/utils/block_list.c
@@ -0,0 +1,178 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/bytecode/utils/block_list.h"
+
+#include "iree/base/tracing.h"
+
+iree_status_t iree_vm_bytecode_block_list_initialize(
+    uint32_t capacity, iree_allocator_t allocator,
+    iree_vm_bytecode_block_list_t* out_block_list) {
+  IREE_TRACE_ZONE_BEGIN(z0);
+  IREE_ASSERT_ARGUMENT(out_block_list);
+
+  // In case we fail present an empty list.
+  out_block_list->capacity = 0;
+  out_block_list->count = 0;
+  out_block_list->values = NULL;
+
+  // Configure storage either inline if it fits or as a heap allocation.
+  if (capacity > IREE_ARRAYSIZE(out_block_list->inline_storage)) {
+    IREE_RETURN_AND_END_ZONE_IF_ERROR(
+        z0, iree_allocator_malloc(allocator,
+                                  sizeof(out_block_list->values[0]) * capacity,
+                                  (void**)&out_block_list->values));
+  } else {
+    out_block_list->values = out_block_list->inline_storage;
+  }
+
+  // Reset state and clear only the blocks we are using.
+  out_block_list->capacity = capacity;
+  out_block_list->count = 0;
+  memset(out_block_list->values, 0,
+         sizeof(out_block_list->values[0]) * capacity);
+
+  IREE_TRACE_ZONE_END(z0);
+  return iree_ok_status();
+}
+
+void iree_vm_bytecode_block_list_deinitialize(
+    iree_vm_bytecode_block_list_t* block_list, iree_allocator_t allocator) {
+  if (!block_list) return;
+  IREE_TRACE_ZONE_BEGIN(z0);
+
+  if (block_list->values != block_list->inline_storage) {
+    iree_allocator_free(allocator, block_list->values);
+  }
+  block_list->capacity = 0;
+  block_list->count = 0;
+  block_list->values = NULL;
+
+  IREE_TRACE_ZONE_END(z0);
+}
+
+iree_status_t iree_vm_bytecode_block_list_insert(
+    iree_vm_bytecode_block_list_t* block_list, uint32_t pc,
+    iree_vm_bytecode_block_t** out_block) {
+  IREE_ASSERT_ARGUMENT(block_list);
+  *out_block = NULL;
+
+  if (IREE_UNLIKELY(pc >= IREE_VM_PC_BLOCK_MAX)) {
+    return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
+                            "block pc %08X greater than max %08X", pc,
+                            IREE_VM_PC_BLOCK_MAX);
+  }
+
+  // Try to find the block or the next block greater than it in the list in case
+  // we need to insert.
+  iree_host_size_t ordinal = 0;
+  iree_status_t status =
+      iree_vm_bytecode_block_list_find(block_list, pc, &ordinal);
+  if (iree_status_is_ok(status)) {
+    // Block found.
+    *out_block = &block_list->values[ordinal];
+    return iree_ok_status();
+  }
+  status = iree_status_ignore(status);
+
+  // Not found, need to insert at ordinal point at the next greatest pc.
+  if (IREE_UNLIKELY(block_list->count + 1 > block_list->capacity)) {
+    return iree_make_status(
+        IREE_STATUS_INVALID_ARGUMENT,
+        "invalid descriptor block count %u; found at least %u blocks",
+        block_list->capacity, block_list->count + 1);
+  }
+
+  // Shift list up and declare new block.
+  if (ordinal != block_list->count) {
+    memmove(&block_list->values[ordinal + 1], &block_list->values[ordinal],
+            (block_list->count - ordinal) * sizeof(block_list->values[0]));
+  }
+  iree_vm_bytecode_block_t* block = &block_list->values[ordinal];
+  block->defined = 0;
+  block->reserved = 0;
+  block->pc = pc;
+
+  ++block_list->count;
+  *out_block = block;
+  return iree_ok_status();
+}
+
+// Finds the ordinal of the block with the given |pc| within the block list.
+// Note that these ordinals will change with each insertion and this is
+// generally only safe to use after the list has been completed.
+// If NOT_FOUND then |out_ordinal| will contain the index into the list of where
+// the block would be inserted.
+iree_status_t iree_vm_bytecode_block_list_find(
+    const iree_vm_bytecode_block_list_t* block_list, uint32_t pc,
+    iree_host_size_t* out_ordinal) {
+  IREE_ASSERT_ARGUMENT(block_list);
+  *out_ordinal = 0;
+  int low = 0;
+  int high = (int)block_list->count - 1;
+  while (low <= high) {
+    const int mid = low + (high - low) / 2;
+    const uint32_t mid_pc = block_list->values[mid].pc;
+    if (mid_pc < pc) {
+      low = mid + 1;
+    } else if (mid_pc > pc) {
+      high = mid - 1;
+    } else {
+      // Found; early exit.
+      *out_ordinal = mid;
+      return iree_ok_status();
+    }
+  }
+  // Not found; return the next highest slot. Note that this may be off the
+  // end of the list if the search pc is greater than all current values.
+  *out_ordinal = low;
+  return iree_status_from_code(IREE_STATUS_NOT_FOUND);
+}
+
+iree_status_t iree_vm_bytecode_block_list_verify(
+    const iree_vm_bytecode_block_list_t* block_list,
+    iree_const_byte_span_t bytecode_data) {
+  IREE_ASSERT_ARGUMENT(block_list);
+
+  // Ensure we have as many blocks as expected.
+  if (block_list->count != block_list->capacity) {
+    return iree_make_status(
+        IREE_STATUS_INVALID_ARGUMENT,
+        "invalid descriptor block count %u; found %u blocks",
+        block_list->capacity, block_list->count);
+  }
+
+  for (uint32_t i = 0; i < block_list->count; ++i) {
+    const iree_vm_bytecode_block_t* block = &block_list->values[i];
+
+    // Ensure all blocks are defined.
+    if (!block->defined) {
+      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                              "block at pc %08X not defined in bytecode",
+                              block->pc);
+    }
+
+    // Ensure each block pc is in bounds - we need at least 1 byte for the
+    // marker.
+    if (block->pc + 1 >= bytecode_data.data_length) {
+      return iree_make_status(
+          IREE_STATUS_INVALID_ARGUMENT,
+          "block at pc %08X (%u) out of bytecode data range %" PRIhsz,
+          block->pc, block->pc, bytecode_data.data_length);
+    }
+
+    // Ensure each block has a block opcode at its target.
+    uint8_t opc = bytecode_data.data[block->pc];
+    if (opc != IREE_VM_OP_CORE_Block) {
+      return iree_make_status(
+          IREE_STATUS_INVALID_ARGUMENT,
+          "block at pc %08X does not start with a block marker opcode",
+          block->pc);
+    }
+  }
+
+  return iree_ok_status();
+}
diff --git a/runtime/src/iree/vm/bytecode/utils/block_list.h b/runtime/src/iree/vm/bytecode/utils/block_list.h
new file mode 100644
index 0000000..65e6960
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode/utils/block_list.h
@@ -0,0 +1,90 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_BYTECODE_UTILS_BLOCK_LIST_H_
+#define IREE_VM_BYTECODE_UTILS_BLOCK_LIST_H_
+
+#include "iree/base/api.h"
+#include "iree/vm/api.h"
+#include "iree/vm/bytecode/utils/isa.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+// Reserved inline storage capacity for blocks in the block list.
+// If more blocks than this are requested we'll heap allocate instead.
+// Most programs today end up with a small number of blocks (sometimes 2-3 for
+// the entire program).
+#define IREE_VM_BYTECODE_INLINE_BLOCK_LIST_CAPACITY (32)
+
+// A tracked block within the block list.
+// Blocks may be either declared (observed as a branch target) or defined
+// (observed within the bytecode stream).
+typedef struct iree_vm_bytecode_block_t {
+  // Set only if the block definition has been seen.
+  uint32_t defined : 1;
+  uint32_t reserved : 7;
+  // Program counter of the block within the function.
+  uint32_t pc : 24;
+} iree_vm_bytecode_block_t;
+
+// A sorted list of blocks.
+// Allows for single-pass verification
+typedef struct iree_vm_bytecode_block_list_t {
+  // Available capacity in |values| and the total expected block count.
+  uint32_t capacity;
+  // Current count of blocks in the |values| list.
+  uint32_t count;
+  // List of blocks sorted by program counter.
+  // Will either point to inline_storage or be a heap allocation.
+  iree_vm_bytecode_block_t* values;
+  // Inlined storage for reasonable block counts to avoid the heap alloc.
+  iree_vm_bytecode_block_t
+      inline_storage[IREE_VM_BYTECODE_INLINE_BLOCK_LIST_CAPACITY];
+} iree_vm_bytecode_block_list_t;
+
+// Initializes a block list with the expected count of |capacity|.
+// The same |allocator| must be passed to
+// iree_vm_bytecode_block_list_deinitialize; the expectation is that the hosting
+// data structure already has a reference to the allocator.
+iree_status_t iree_vm_bytecode_block_list_initialize(
+    uint32_t capacity, iree_allocator_t allocator,
+    iree_vm_bytecode_block_list_t* out_block_list);
+
+// Deinitializes a block list using |allocator| if any heap allocations were
+// required (must be the same as passed to
+// iree_vm_bytecode_block_list_initialize).
+void iree_vm_bytecode_block_list_deinitialize(
+    iree_vm_bytecode_block_list_t* block_list, iree_allocator_t allocator);
+
+// Looks up a block at |pc| in the |block_list|. If not found the block is
+// inserted as declared. Returns the block. Fails if capacity is exceeded.
+// The returned |out_block| pointer is only valid until the next insertion.
+iree_status_t iree_vm_bytecode_block_list_insert(
+    iree_vm_bytecode_block_list_t* block_list, uint32_t pc,
+    iree_vm_bytecode_block_t** out_block);
+
+// Finds the ordinal of the block with the given |pc| within the block list.
+// Note that these ordinals will change with each insertion and this is
+// generally only safe to use after the list has been completed.
+// If NOT_FOUND then |out_ordinal| will contain the index into the list of where
+// the block would be inserted.
+iree_status_t iree_vm_bytecode_block_list_find(
+    const iree_vm_bytecode_block_list_t* block_list, uint32_t pc,
+    iree_host_size_t* out_ordinal);
+
+// Verifies that all blocks in the block list were defined and have proper
+// tracking in |bytecode_data|.
+iree_status_t iree_vm_bytecode_block_list_verify(
+    const iree_vm_bytecode_block_list_t* block_list,
+    iree_const_byte_span_t bytecode_data);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+
+#endif  // IREE_VM_BYTECODE_UTILS_BLOCK_LIST_H_
diff --git a/runtime/src/iree/vm/bytecode/utils/block_list_test.cc b/runtime/src/iree/vm/bytecode/utils/block_list_test.cc
new file mode 100644
index 0000000..bba61c8
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode/utils/block_list_test.cc
@@ -0,0 +1,313 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/bytecode/utils/block_list.h"
+
+#include "iree/base/api.h"
+#include "iree/testing/gtest.h"
+#include "iree/testing/status_matchers.h"
+
+namespace {
+
+using iree::Status;
+using iree::StatusCode;
+using iree::testing::status::IsOkAndHolds;
+using iree::testing::status::StatusIs;
+using testing::ElementsAre;
+using testing::Eq;
+
+// Tests usage on empty lists.
+TEST(BlockListTest, Empty) {
+  iree_allocator_t allocator = iree_allocator_system();
+  iree_vm_bytecode_block_list_t block_list;
+  IREE_ASSERT_OK(
+      iree_vm_bytecode_block_list_initialize(0u, allocator, &block_list));
+
+  // Try finding with an empty list. Note that we expect the ordinal to be valid
+  // even though we can't insert anything.
+  iree_host_size_t ordinal = 0;
+  EXPECT_THAT(
+      Status(iree_vm_bytecode_block_list_find(&block_list, 0u, &ordinal)),
+      StatusIs(StatusCode::kNotFound));
+  EXPECT_EQ(ordinal, 0);
+  EXPECT_THAT(
+      Status(iree_vm_bytecode_block_list_find(&block_list, 123u, &ordinal)),
+      StatusIs(StatusCode::kNotFound));
+  EXPECT_EQ(ordinal, 0);
+
+  // No blocks inserted to verify.
+  IREE_EXPECT_OK(iree_vm_bytecode_block_list_verify(
+      &block_list, iree_const_byte_span_empty()));
+
+  iree_vm_bytecode_block_list_deinitialize(&block_list, allocator);
+}
+
+// Valid IR usage for 3 blocks. Note that we insert them out of order: 1 2 0.
+// These should be stored inline in the block list struct.
+TEST(BlockListTest, Valid) {
+  iree_allocator_t allocator = iree_allocator_system();
+  iree_vm_bytecode_block_list_t block_list;
+  IREE_ASSERT_OK(
+      iree_vm_bytecode_block_list_initialize(3u, allocator, &block_list));
+
+  // Try finding blocks before anything is defined.
+  iree_host_size_t ordinal = 0;
+  EXPECT_THAT(
+      Status(iree_vm_bytecode_block_list_find(&block_list, 0u, &ordinal)),
+      StatusIs(StatusCode::kNotFound));
+  EXPECT_EQ(ordinal, 0);
+  EXPECT_THAT(
+      Status(iree_vm_bytecode_block_list_find(&block_list, 123u, &ordinal)),
+      StatusIs(StatusCode::kNotFound));
+  EXPECT_EQ(ordinal, 0);
+
+  iree_vm_bytecode_block_t* block = NULL;
+
+  // Define block 1.
+  block = NULL;
+  IREE_ASSERT_OK(iree_vm_bytecode_block_list_insert(&block_list, 1u, &block));
+  EXPECT_EQ(block_list.count, 1);
+  EXPECT_EQ(block->defined, 0);
+  EXPECT_EQ(block->pc, 1u);
+  block->defined = 1;
+
+  // Define block 2.
+  block = NULL;
+  IREE_ASSERT_OK(iree_vm_bytecode_block_list_insert(&block_list, 2u, &block));
+  EXPECT_EQ(block_list.count, 2);
+  EXPECT_EQ(block->defined, 0);
+  EXPECT_EQ(block->pc, 2u);
+  block->defined = 1;
+
+  // Define block 0.
+  block = NULL;
+  IREE_ASSERT_OK(iree_vm_bytecode_block_list_insert(&block_list, 0u, &block));
+  EXPECT_EQ(block_list.count, 3);
+  EXPECT_EQ(block->defined, 0);
+  EXPECT_EQ(block->pc, 0u);
+  block->defined = 1;
+
+  // Re-insert block 1. Should be a no-op as it is defined.
+  block = NULL;
+  IREE_ASSERT_OK(iree_vm_bytecode_block_list_insert(&block_list, 1u, &block));
+  EXPECT_EQ(block_list.count, 3);
+  EXPECT_EQ(block->defined, 1);
+  EXPECT_EQ(block->pc, 1u);
+
+  // Find each block and ensure they match.
+  IREE_EXPECT_OK(iree_vm_bytecode_block_list_find(&block_list, 0u, &ordinal));
+  EXPECT_EQ(ordinal, 0);
+  IREE_EXPECT_OK(iree_vm_bytecode_block_list_find(&block_list, 1u, &ordinal));
+  EXPECT_EQ(ordinal, 1);
+  IREE_EXPECT_OK(iree_vm_bytecode_block_list_find(&block_list, 2u, &ordinal));
+  EXPECT_EQ(ordinal, 2);
+
+  // Verify blocks are all defined and have block markers.
+  std::vector<uint8_t> bytecode_data = {
+      IREE_VM_OP_CORE_Block, IREE_VM_OP_CORE_Block, IREE_VM_OP_CORE_Block,
+      IREE_VM_OP_CORE_AbsI32,  // need at least one op in a block
+  };
+  IREE_EXPECT_OK(iree_vm_bytecode_block_list_verify(
+      &block_list,
+      iree_make_const_byte_span(bytecode_data.data(), bytecode_data.size())));
+
+  iree_vm_bytecode_block_list_deinitialize(&block_list, allocator);
+}
+
+// Tests that a declared block that was never defined errors on verification.
+TEST(BlockListTest, Undefined) {
+  iree_allocator_t allocator = iree_allocator_system();
+  iree_vm_bytecode_block_list_t block_list;
+  IREE_ASSERT_OK(
+      iree_vm_bytecode_block_list_initialize(1u, allocator, &block_list));
+
+  // Declare the block.
+  iree_vm_bytecode_block_t* block = NULL;
+  IREE_ASSERT_OK(iree_vm_bytecode_block_list_insert(&block_list, 0u, &block));
+
+  // Fail verification because it hasn't been defined.
+  std::vector<uint8_t> bytecode_data = {
+      IREE_VM_OP_CORE_Block,
+  };
+  EXPECT_THAT(
+      Status(iree_vm_bytecode_block_list_verify(
+          &block_list, iree_make_const_byte_span(bytecode_data.data(),
+                                                 bytecode_data.size()))),
+      StatusIs(StatusCode::kInvalidArgument));
+
+  iree_vm_bytecode_block_list_deinitialize(&block_list, allocator);
+}
+
+// Tests adding fewer blocks than expected by the capacity.
+TEST(BlockListTest, Underflow) {
+  iree_allocator_t allocator = iree_allocator_system();
+  iree_vm_bytecode_block_list_t block_list;
+  IREE_ASSERT_OK(
+      iree_vm_bytecode_block_list_initialize(2u, allocator, &block_list));
+
+  // Declaring; OK (count = 1, capacity = 2).
+  iree_vm_bytecode_block_t* block = NULL;
+  IREE_ASSERT_OK(iree_vm_bytecode_block_list_insert(&block_list, 0u, &block));
+
+  // Defining; OK (no change in count).
+  IREE_ASSERT_OK(iree_vm_bytecode_block_list_insert(&block_list, 0u, &block));
+  block->defined = 1;
+
+  // Fail verification because we're missing a block.
+  std::vector<uint8_t> bytecode_data = {
+      IREE_VM_OP_CORE_Block,
+      IREE_VM_OP_CORE_AbsI32,  // need at least one op in a block
+  };
+  EXPECT_THAT(
+      Status(iree_vm_bytecode_block_list_verify(
+          &block_list, iree_make_const_byte_span(bytecode_data.data(),
+                                                 bytecode_data.size()))),
+      StatusIs(StatusCode::kInvalidArgument));
+
+  iree_vm_bytecode_block_list_deinitialize(&block_list, allocator);
+}
+
+// Tests adding more blocks than allowed by the capacity.
+TEST(BlockListTest, Overflow) {
+  iree_allocator_t allocator = iree_allocator_system();
+  iree_vm_bytecode_block_list_t block_list;
+  IREE_ASSERT_OK(
+      iree_vm_bytecode_block_list_initialize(1u, allocator, &block_list));
+
+  // Declaring; OK (count = 1, capacity = 1).
+  iree_vm_bytecode_block_t* block = NULL;
+  IREE_ASSERT_OK(iree_vm_bytecode_block_list_insert(&block_list, 0u, &block));
+  EXPECT_EQ(block_list.count, 1);
+
+  // Defining; OK (no change in count).
+  IREE_ASSERT_OK(iree_vm_bytecode_block_list_insert(&block_list, 0u, &block));
+  block->defined = 1;
+
+  // Error: too many blocks.
+  EXPECT_THAT(
+      Status(iree_vm_bytecode_block_list_insert(&block_list, 1u, &block)),
+      StatusIs(StatusCode::kInvalidArgument));
+  EXPECT_EQ(block_list.count, 1);
+
+  iree_vm_bytecode_block_list_deinitialize(&block_list, allocator);
+}
+
+// Tests adding any blocks to an expected-empty list.
+TEST(BlockListTest, OverflowEmpty) {
+  iree_allocator_t allocator = iree_allocator_system();
+  iree_vm_bytecode_block_list_t block_list;
+  IREE_ASSERT_OK(
+      iree_vm_bytecode_block_list_initialize(0u, allocator, &block_list));
+
+  // Error: too many blocks.
+  iree_vm_bytecode_block_t* block = NULL;
+  EXPECT_THAT(
+      Status(iree_vm_bytecode_block_list_insert(&block_list, 0u, &block)),
+      StatusIs(StatusCode::kInvalidArgument));
+  EXPECT_EQ(block_list.count, 0);
+
+  iree_vm_bytecode_block_list_deinitialize(&block_list, allocator);
+}
+
+// Tests a block that is missing its marker in the bytecode.
+TEST(BlockListTest, MissingMarker) {
+  iree_allocator_t allocator = iree_allocator_system();
+  iree_vm_bytecode_block_list_t block_list;
+  IREE_ASSERT_OK(
+      iree_vm_bytecode_block_list_initialize(1u, allocator, &block_list));
+
+  iree_vm_bytecode_block_t* block = NULL;
+  IREE_ASSERT_OK(iree_vm_bytecode_block_list_insert(&block_list, 0u, &block));
+  block->defined = 1;
+
+  std::vector<uint8_t> bytecode_data = {
+      IREE_VM_OP_CORE_AbsI32,  // *not* the block marker
+  };
+  EXPECT_THAT(
+      Status(iree_vm_bytecode_block_list_verify(
+          &block_list, iree_make_const_byte_span(bytecode_data.data(),
+                                                 bytecode_data.size()))),
+      StatusIs(StatusCode::kInvalidArgument));
+
+  iree_vm_bytecode_block_list_deinitialize(&block_list, allocator);
+}
+
+// Tests a block with a pc outside of the bytecode range.
+TEST(BlockListTest, OutOfBoundsPC) {
+  iree_allocator_t allocator = iree_allocator_system();
+  iree_vm_bytecode_block_list_t block_list;
+  IREE_ASSERT_OK(
+      iree_vm_bytecode_block_list_initialize(1u, allocator, &block_list));
+
+  iree_vm_bytecode_block_t* block = NULL;
+  IREE_ASSERT_OK(iree_vm_bytecode_block_list_insert(&block_list, 1u, &block));
+  block->defined = 1;
+
+  std::vector<uint8_t> bytecode_data = {
+      IREE_VM_OP_CORE_AbsI32,  // *not* the block marker
+  };
+  EXPECT_THAT(
+      Status(iree_vm_bytecode_block_list_verify(
+          &block_list, iree_make_const_byte_span(bytecode_data.data(),
+                                                 bytecode_data.size()))),
+      StatusIs(StatusCode::kInvalidArgument));
+
+  iree_vm_bytecode_block_list_deinitialize(&block_list, allocator);
+}
+
+// Tests inserting a block with a PC outside of what we can track. This should
+// be really rare in practice.
+TEST(BlockListTest, OverMaxPC) {
+  iree_allocator_t allocator = iree_allocator_system();
+  iree_vm_bytecode_block_list_t block_list;
+  IREE_ASSERT_OK(
+      iree_vm_bytecode_block_list_initialize(1u, allocator, &block_list));
+
+  iree_vm_bytecode_block_t* block = NULL;
+  EXPECT_THAT(Status(iree_vm_bytecode_block_list_insert(
+                  &block_list, IREE_VM_PC_BLOCK_MAX + 1, &block)),
+              StatusIs(StatusCode::kOutOfRange));
+
+  iree_vm_bytecode_block_list_deinitialize(&block_list, allocator);
+}
+
+// Tests adding a lot of blocks such that we trigger a heap storage allocation.
+TEST(BlockListTest, HeapStorage) {
+  uint32_t count = IREE_VM_BYTECODE_INLINE_BLOCK_LIST_CAPACITY * 8;
+  iree_allocator_t allocator = iree_allocator_system();
+  iree_vm_bytecode_block_list_t block_list;
+  IREE_ASSERT_OK(
+      iree_vm_bytecode_block_list_initialize(count, allocator, &block_list));
+
+  // Declare all blocks in reverse, for fun.
+  for (uint32_t i = 0; i < count; ++i) {
+    iree_vm_bytecode_block_t* block = NULL;
+    IREE_ASSERT_OK(
+        iree_vm_bytecode_block_list_insert(&block_list, count - i - 1, &block));
+  }
+
+  // Ensure sorted.
+  for (uint32_t i = 0; i < count; ++i) {
+    EXPECT_EQ(block_list.values[i].pc, i);
+  }
+
+  // Define all blocks forward.
+  for (uint32_t i = 0; i < count; ++i) {
+    iree_vm_bytecode_block_t* block = NULL;
+    IREE_ASSERT_OK(iree_vm_bytecode_block_list_insert(&block_list, i, &block));
+    block->defined = 1;
+  }
+
+  // Fake block data (+1 trailing block op for padding) to verify.
+  std::vector<uint8_t> bytecode_data(count + 1, IREE_VM_OP_CORE_Block);
+  IREE_EXPECT_OK(iree_vm_bytecode_block_list_verify(
+      &block_list,
+      iree_make_const_byte_span(bytecode_data.data(), bytecode_data.size())));
+
+  iree_vm_bytecode_block_list_deinitialize(&block_list, allocator);
+}
+
+}  // namespace
diff --git a/runtime/src/iree/vm/bytecode/utils/features.c b/runtime/src/iree/vm/bytecode/utils/features.c
new file mode 100644
index 0000000..6a333f9
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode/utils/features.c
@@ -0,0 +1,60 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/bytecode/utils/features.h"
+
+// clang-format off
+static const iree_bitfield_string_mapping_t iree_vm_bytecode_feature_mappings[] = {
+  {iree_vm_FeatureBits_EXT_F32, IREE_SVL("EXT_F32")},
+  {iree_vm_FeatureBits_EXT_F64, IREE_SVL("EXT_F64")},
+};
+// clang-format on
+
+iree_string_view_t iree_vm_bytecode_features_format(
+    iree_vm_FeatureBits_enum_t value, iree_bitfield_string_temp_t* out_temp) {
+  return iree_bitfield_format_inline(
+      value, IREE_ARRAYSIZE(iree_vm_bytecode_feature_mappings),
+      iree_vm_bytecode_feature_mappings, out_temp);
+}
+
+iree_vm_FeatureBits_enum_t iree_vm_bytecode_available_features(void) {
+  iree_vm_FeatureBits_enum_t result = 0;
+#if IREE_VM_EXT_F32_ENABLE
+  result |= iree_vm_FeatureBits_EXT_F32;
+#endif  // IREE_VM_EXT_F32_ENABLE
+#if IREE_VM_EXT_F64_ENABLE
+  result |= iree_vm_FeatureBits_EXT_F64;
+#endif  // IREE_VM_EXT_F64_ENABLE
+  return result;
+}
+
+iree_status_t iree_vm_check_feature_mismatch(
+    const char* file, int line, iree_vm_FeatureBits_enum_t required_features,
+    iree_vm_FeatureBits_enum_t available_features) {
+  if (IREE_LIKELY(iree_all_bits_set(available_features, required_features))) {
+    return iree_ok_status();
+  }
+#if IREE_STATUS_MODE
+  const iree_vm_FeatureBits_enum_t needed_features =
+      required_features & ~available_features;
+  iree_bitfield_string_temp_t temp0, temp1, temp2;
+  iree_string_view_t available_features_str =
+      iree_vm_bytecode_features_format(available_features, &temp0);
+  iree_string_view_t required_features_str =
+      iree_vm_bytecode_features_format(required_features, &temp1);
+  iree_string_view_t needed_features_str =
+      iree_vm_bytecode_features_format(needed_features, &temp2);
+  return iree_make_status_with_location(
+      file, line, IREE_STATUS_INVALID_ARGUMENT,
+      "required module features [%.*s] are not available in this runtime "
+      "configuration; have [%.*s] while module requires [%.*s]",
+      (int)needed_features_str.size, needed_features_str.data,
+      (int)available_features_str.size, available_features_str.data,
+      (int)required_features_str.size, required_features_str.data);
+#else
+  return iree_status_from_code(IREE_STATUS_INVALID_ARGUMENT);
+#endif  // IREE_STATUS_MODE
+}
diff --git a/runtime/src/iree/vm/bytecode/utils/features.h b/runtime/src/iree/vm/bytecode/utils/features.h
new file mode 100644
index 0000000..6545803
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode/utils/features.h
@@ -0,0 +1,37 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_BYTECODE_UTILS_FEATURES_H_
+#define IREE_VM_BYTECODE_UTILS_FEATURES_H_
+
+#include "iree/base/api.h"
+#include "iree/vm/api.h"
+#include "iree/vm/bytecode/utils/isa.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
+
+// Formats a buffer usage bitfield as a string.
+// See iree_bitfield_format for usage.
+iree_string_view_t iree_vm_bytecode_features_format(
+    iree_vm_FeatureBits_enum_t value, iree_bitfield_string_temp_t* out_temp);
+
+// Returns the features available in this build of the runtime.
+iree_vm_FeatureBits_enum_t iree_vm_bytecode_available_features(void);
+
+// Returns a pretty status reported at |file|/|line| when one or more features
+// from |required_features| is missing from |available_features|.
+// Returns OK if all required features are available.
+iree_status_t iree_vm_check_feature_mismatch(
+    const char* file, int line, iree_vm_FeatureBits_enum_t required_features,
+    iree_vm_FeatureBits_enum_t available_features);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
+
+#endif  // IREE_VM_BYTECODE_UTILS_FEATURES_H_
diff --git a/runtime/src/iree/vm/bytecode/generated/.clang-format b/runtime/src/iree/vm/bytecode/utils/generated/.clang-format
similarity index 100%
rename from runtime/src/iree/vm/bytecode/generated/.clang-format
rename to runtime/src/iree/vm/bytecode/utils/generated/.clang-format
diff --git a/runtime/src/iree/vm/bytecode/generated/op_table.h b/runtime/src/iree/vm/bytecode/utils/generated/op_table.h
similarity index 100%
rename from runtime/src/iree/vm/bytecode/generated/op_table.h
rename to runtime/src/iree/vm/bytecode/utils/generated/op_table.h
diff --git a/runtime/src/iree/vm/bytecode/utils/isa.h b/runtime/src/iree/vm/bytecode/utils/isa.h
new file mode 100644
index 0000000..d24300c
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode/utils/isa.h
@@ -0,0 +1,103 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_BYTECODE_UTILS_ISA_H_
+#define IREE_VM_BYTECODE_UTILS_ISA_H_
+
+#include "iree/base/api.h"
+#include "iree/vm/api.h"
+#include "iree/vm/bytecode/utils/generated/op_table.h"  // IWYU pragma: export
+
+// NOTE: include order matters:
+#include "iree/base/internal/flatcc/parsing.h"          // IWYU pragma: export
+#include "iree/schemas/bytecode_module_def_reader.h"    // IWYU pragma: export
+#include "iree/schemas/bytecode_module_def_verifier.h"  // IWYU pragma: export
+
+//===----------------------------------------------------------------------===//
+// Misc utilities
+//===----------------------------------------------------------------------===//
+
+#define VMMAX(a, b) (((a) > (b)) ? (a) : (b))
+#define VMMIN(a, b) (((a) < (b)) ? (a) : (b))
+
+#define VM_AlignPC(pc, alignment) \
+  (pc) = ((pc) + ((alignment)-1)) & ~((alignment)-1)
+
+//===----------------------------------------------------------------------===//
+// Bytecode versioning
+//===----------------------------------------------------------------------===//
+
+// Major bytecode version; mismatches on this will fail in either direction.
+// This allows coarse versioning of completely incompatible versions.
+// Matches BytecodeEncoder::kVersionMajor in the compiler.
+#define IREE_VM_BYTECODE_VERSION_MAJOR 14
+// Minor bytecode version; lower versions are allowed to enable newer runtimes
+// to load older serialized files when there are backwards-compatible changes.
+// Higher versions are disallowed as they occur when new ops are added that
+// otherwise cannot be executed by older runtimes.
+// Matches BytecodeEncoder::kVersionMinor in the compiler.
+#define IREE_VM_BYTECODE_VERSION_MINOR 0
+
+//===----------------------------------------------------------------------===//
+// Bytecode structural constants
+//===----------------------------------------------------------------------===//
+
+// Size of a register ordinal in the bytecode.
+#define IREE_REGISTER_ORDINAL_SIZE sizeof(uint16_t)
+
+// Maximum register count per bank.
+// This determines the bits required to reference registers in the VM bytecode.
+#define IREE_I32_REGISTER_COUNT 0x7FFF
+#define IREE_REF_REGISTER_COUNT 0x3FFF
+
+#define IREE_I32_REGISTER_MASK 0x7FFF
+
+#define IREE_REF_REGISTER_TYPE_BIT 0x8000
+#define IREE_REF_REGISTER_MOVE_BIT 0x4000
+#define IREE_REF_REGISTER_MASK 0x3FFF
+
+// Maximum program counter offset within in a single block.
+// This is just so that we can steal bits for flags and such. 16MB (today)
+// should be more than enough for a single basic block. If not then we should
+// compress better!
+#define IREE_VM_PC_BLOCK_MAX 0x00FFFFFFu
+
+// Bytecode data -offset used when looking for the start of the currently
+// dispatched instruction: `instruction_start = pc - OFFSET`
+#define IREE_VM_PC_OFFSET_CORE 1
+#define IREE_VM_PC_OFFSET_EXT_I32 2
+#define IREE_VM_PC_OFFSET_EXT_F32 2
+#define IREE_VM_PC_OFFSET_EXT_F64 2
+
+// Interleaved src-dst register sets for branch register remapping.
+// This structure is an overlay for the bytecode that is serialized in a
+// matching format.
+typedef struct iree_vm_register_remap_list_t {
+  uint16_t size;
+  struct pair {
+    uint16_t src_reg;
+    uint16_t dst_reg;
+  } pairs[];
+} iree_vm_register_remap_list_t;
+static_assert(iree_alignof(iree_vm_register_remap_list_t) == 2,
+              "Expecting byte alignment (to avoid padding)");
+static_assert(offsetof(iree_vm_register_remap_list_t, pairs) == 2,
+              "Expect no padding in the struct");
+
+//===----------------------------------------------------------------------===//
+// Bytecode data reading with little-/big-endian support
+//===----------------------------------------------------------------------===//
+
+// Bytecode data access macros for reading values of a given type from a byte
+// offset within the current function.
+#define OP_I8(i) iree_unaligned_load_le((uint8_t*)&bytecode_data[pc + (i)])
+#define OP_I16(i) iree_unaligned_load_le((uint16_t*)&bytecode_data[pc + (i)])
+#define OP_I32(i) iree_unaligned_load_le((uint32_t*)&bytecode_data[pc + (i)])
+#define OP_I64(i) iree_unaligned_load_le((uint64_t*)&bytecode_data[pc + (i)])
+#define OP_F32(i) iree_unaligned_load_le((float*)&bytecode_data[pc + (i)])
+#define OP_F64(i) iree_unaligned_load_le((double*)&bytecode_data[pc + (i)])
+
+#endif  // IREE_VM_BYTECODE_UTILS_ISA_H_
diff --git a/runtime/src/iree/vm/bytecode/verifier.c b/runtime/src/iree/vm/bytecode/verifier.c
new file mode 100644
index 0000000..f26c4f3
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode/verifier.c
@@ -0,0 +1,1863 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/vm/bytecode/verifier.h"
+
+#include "iree/base/internal/math.h"
+#include "iree/vm/bytecode/utils/block_list.h"
+#include "iree/vm/bytecode/utils/features.h"
+
+//===----------------------------------------------------------------------===//
+// Module metadata verification
+//===----------------------------------------------------------------------===//
+
+iree_status_t iree_vm_bytecode_module_flatbuffer_verify(
+    iree_const_byte_span_t archive_contents,
+    iree_const_byte_span_t flatbuffer_contents,
+    iree_host_size_t archive_rodata_offset) {
+  // Run flatcc generated verification. This ensures all pointers are in-bounds
+  // and that we can safely walk the file, but not that the actual contents of
+  // the FlatBuffer meet our expectations.
+  int verify_ret = iree_vm_BytecodeModuleDef_verify_as_root(
+      flatbuffer_contents.data, flatbuffer_contents.data_length);
+  if (verify_ret != flatcc_verify_ok) {
+    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                            "FlatBuffer verification failed: %s",
+                            flatcc_verify_error_string(verify_ret));
+  }
+
+  iree_vm_BytecodeModuleDef_table_t module_def =
+      iree_vm_BytecodeModuleDef_as_root(flatbuffer_contents.data);
+
+  const iree_vm_FeatureBits_enum_t available_features =
+      iree_vm_bytecode_available_features();
+  const iree_vm_FeatureBits_enum_t required_features =
+      iree_vm_BytecodeModuleDef_requirements(module_def);
+  IREE_RETURN_IF_ERROR(iree_vm_check_feature_mismatch(
+      __FILE__, __LINE__, required_features, available_features));
+
+  flatbuffers_string_t name = iree_vm_BytecodeModuleDef_name(module_def);
+  if (!flatbuffers_string_len(name)) {
+    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                            "module missing name field");
+  }
+
+  iree_vm_TypeDef_vec_t types = iree_vm_BytecodeModuleDef_types(module_def);
+  for (size_t i = 0; i < iree_vm_TypeDef_vec_len(types); ++i) {
+    iree_vm_TypeDef_table_t type_def = iree_vm_TypeDef_vec_at(types, i);
+    if (!type_def) {
+      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                              "types[%zu] missing body", i);
+    }
+    flatbuffers_string_t full_name = iree_vm_TypeDef_full_name(type_def);
+    if (flatbuffers_string_len(full_name) <= 0) {
+      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                              "types[%zu] missing name", i);
+    }
+  }
+
+  iree_vm_RodataSegmentDef_vec_t rodata_segments =
+      iree_vm_BytecodeModuleDef_rodata_segments(module_def);
+  for (size_t i = 0; i < iree_vm_RodataSegmentDef_vec_len(rodata_segments);
+       ++i) {
+    iree_vm_RodataSegmentDef_table_t segment =
+        iree_vm_RodataSegmentDef_vec_at(rodata_segments, i);
+    if (iree_vm_RodataSegmentDef_embedded_data_is_present(segment)) {
+      continue;  // embedded data is verified by FlatBuffers
+    }
+    uint64_t segment_offset =
+        iree_vm_RodataSegmentDef_external_data_offset(segment);
+    uint64_t segment_length =
+        iree_vm_RodataSegmentDef_external_data_length(segment);
+    uint64_t segment_end =
+        archive_rodata_offset + segment_offset + segment_length;
+    if (segment_end > archive_contents.data_length) {
+      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                              "rodata[%zu] external reference out of range", i);
+    }
+  }
+
+  iree_vm_ModuleDependencyDef_vec_t dependencies =
+      iree_vm_BytecodeModuleDef_dependencies(module_def);
+  for (size_t i = 0; i < iree_vm_ModuleDependencyDef_vec_len(dependencies);
+       ++i) {
+    iree_vm_ModuleDependencyDef_table_t dependency_def =
+        iree_vm_ModuleDependencyDef_vec_at(dependencies, i);
+    flatbuffers_string_t module_name =
+        iree_vm_ModuleDependencyDef_name(dependency_def);
+    if (flatbuffers_string_len(module_name) == 0) {
+      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                              "dependencies[%zu] has no module name", i);
+    }
+  }
+
+  iree_vm_ImportFunctionDef_vec_t imported_functions =
+      iree_vm_BytecodeModuleDef_imported_functions(module_def);
+  iree_vm_ExportFunctionDef_vec_t exported_functions =
+      iree_vm_BytecodeModuleDef_exported_functions(module_def);
+  iree_vm_FunctionSignatureDef_vec_t function_signatures =
+      iree_vm_BytecodeModuleDef_function_signatures(module_def);
+  iree_vm_FunctionDescriptor_vec_t function_descriptors =
+      iree_vm_BytecodeModuleDef_function_descriptors(module_def);
+
+  for (size_t i = 0;
+       i < iree_vm_FunctionSignatureDef_vec_len(function_signatures); ++i) {
+    iree_vm_FunctionSignatureDef_table_t function_signature =
+        iree_vm_FunctionSignatureDef_vec_at(function_signatures, i);
+    if (!function_signature) {
+      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                              "function_signatures[%zu] missing body", i);
+    }
+  }
+
+  for (size_t i = 0; i < iree_vm_ImportFunctionDef_vec_len(imported_functions);
+       ++i) {
+    iree_vm_ImportFunctionDef_table_t import_def =
+        iree_vm_ImportFunctionDef_vec_at(imported_functions, i);
+    if (!import_def) {
+      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                              "imports[%zu] missing body", i);
+    }
+    flatbuffers_string_t full_name =
+        iree_vm_ImportFunctionDef_full_name(import_def);
+    if (!flatbuffers_string_len(full_name)) {
+      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                              "imports[%zu] missing full_name", i);
+    }
+  }
+
+  if (iree_vm_FunctionSignatureDef_vec_len(function_signatures) !=
+      iree_vm_FunctionDescriptor_vec_len(function_descriptors)) {
+    return iree_make_status(
+        IREE_STATUS_INVALID_ARGUMENT,
+        "function signature and descriptor table length mismatch (%zu vs %zu)",
+        iree_vm_FunctionSignatureDef_vec_len(function_signatures),
+        iree_vm_FunctionDescriptor_vec_len(function_descriptors));
+  }
+
+  for (size_t i = 0; i < iree_vm_ExportFunctionDef_vec_len(exported_functions);
+       ++i) {
+    iree_vm_ExportFunctionDef_table_t export_def =
+        iree_vm_ExportFunctionDef_vec_at(exported_functions, i);
+    if (!export_def) {
+      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                              "exports[%zu] missing body", i);
+    }
+    flatbuffers_string_t local_name =
+        iree_vm_ExportFunctionDef_local_name(export_def);
+    if (!flatbuffers_string_len(local_name)) {
+      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                              "exports[%zu] missing local_name", i);
+    }
+    iree_host_size_t internal_ordinal =
+        iree_vm_ExportFunctionDef_internal_ordinal(export_def);
+    if (internal_ordinal >=
+        iree_vm_FunctionDescriptor_vec_len(function_descriptors)) {
+      return iree_make_status(
+          IREE_STATUS_INVALID_ARGUMENT,
+          "exports[%zu] internal_ordinal out of bounds (0 < %zu < %zu)", i,
+          internal_ordinal,
+          iree_vm_FunctionDescriptor_vec_len(function_descriptors));
+    }
+  }
+
+  // Verify that we can properly handle the bytecode embedded in the module.
+  // We require that major versions match and allow loading of older minor
+  // versions (we keep changes backwards-compatible).
+  const uint32_t bytecode_version =
+      iree_vm_BytecodeModuleDef_bytecode_version(module_def);
+  const uint32_t bytecode_version_major = bytecode_version >> 16;
+  const uint32_t bytecode_version_minor = bytecode_version & 0xFFFF;
+  if ((bytecode_version_major != IREE_VM_BYTECODE_VERSION_MAJOR) ||
+      (bytecode_version_minor > IREE_VM_BYTECODE_VERSION_MINOR)) {
+    return iree_make_status(
+        IREE_STATUS_INVALID_ARGUMENT,
+        "bytecode version mismatch; runtime supports %d.%d, module has %d.%d",
+        IREE_VM_BYTECODE_VERSION_MAJOR, IREE_VM_BYTECODE_VERSION_MINOR,
+        bytecode_version_major, bytecode_version_minor);
+  }
+
+  flatbuffers_uint8_vec_t bytecode_data =
+      iree_vm_BytecodeModuleDef_bytecode_data(module_def);
+  for (size_t i = 0;
+       i < iree_vm_FunctionDescriptor_vec_len(function_descriptors); ++i) {
+    iree_vm_FunctionDescriptor_struct_t function_descriptor =
+        iree_vm_FunctionDescriptor_vec_at(function_descriptors, i);
+    if (function_descriptor->block_count == 0) {
+      return iree_make_status(
+          IREE_STATUS_INVALID_ARGUMENT,
+          "functions[%zu] descriptor block count is 0; "
+          "functions must have at least 1 block, expected %d",
+          i, function_descriptor->block_count);
+    }
+    if (function_descriptor->bytecode_length == 0) {
+      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                              "functions[%zu] descriptor bytecode reports 0 "
+                              "length; functions must have at least one block",
+                              i);
+    }
+    if (function_descriptor->bytecode_offset < 0 ||
+        function_descriptor->bytecode_offset +
+                function_descriptor->bytecode_length >
+            flatbuffers_uint8_vec_len(bytecode_data)) {
+      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                              "functions[%zu] descriptor bytecode span out of "
+                              "range (0 < %d < %" PRIhsz ")",
+                              i, function_descriptor->bytecode_offset,
+                              flatbuffers_uint8_vec_len(bytecode_data));
+    }
+    if (function_descriptor->i32_register_count > IREE_I32_REGISTER_COUNT ||
+        function_descriptor->ref_register_count > IREE_REF_REGISTER_COUNT) {
+      return iree_make_status(
+          IREE_STATUS_INVALID_ARGUMENT,
+          "functions[%zu] descriptor register count out of range", i);
+    }
+  }
+
+  return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// Function verification
+//===----------------------------------------------------------------------===//
+
+// State used during verification of a function.
+typedef struct iree_vm_bytecode_verify_state_t {
+  // Within a block (encountered a block marker and not yet a terminator).
+  uint32_t in_block : 1;
+
+  // Maximum valid register ordinals.
+  uint32_t i32_register_count;
+  uint32_t ref_register_count;
+
+  // Parsed argument and result cconv fragments.
+  iree_string_view_t cconv_arguments;
+  iree_string_view_t cconv_results;
+
+  // All block branch points.
+  iree_vm_bytecode_block_list_t block_list;
+
+  // Quick lookups of flatbuffer properties.
+  const iree_vm_ImportFunctionDef_vec_t imported_functions;
+  const iree_vm_ExportFunctionDef_vec_t exported_functions;
+  const iree_vm_FunctionSignatureDef_vec_t function_signatures;
+  const iree_vm_FunctionDescriptor_vec_t function_descriptors;
+  iree_host_size_t rodata_storage_size;
+  iree_host_size_t rodata_ref_count;
+  iree_host_size_t rwdata_storage_size;
+  iree_host_size_t global_ref_count;
+} iree_vm_bytecode_verify_state_t;
+
+// Parses the cconv fragments from the given |signature_def|.
+static iree_status_t iree_vm_bytecode_function_get_cconv_fragments(
+    iree_vm_FunctionSignatureDef_table_t signature_def,
+    iree_string_view_t* out_arguments, iree_string_view_t* out_results);
+
+// Verifies that the function has storage for the declared arguments.
+static iree_status_t iree_vm_bytecode_function_verify_arguments(
+    const iree_vm_bytecode_verify_state_t* verify_state);
+
+// Verifies a single operation at |pc| in the function |bytecode_data|.
+// Returns an error if the op is invalid and otherwise sets |out_next_pc| to the
+// program counter immediately following the op (which may be the end of data!).
+static iree_status_t iree_vm_bytecode_function_verify_bytecode_op(
+    iree_vm_bytecode_module_t* module,
+    iree_vm_bytecode_verify_state_t* verify_state,
+    iree_vm_FunctionSignatureDef_table_t function_signature,
+    iree_vm_FunctionDescriptor_struct_t function_descriptor,
+    iree_const_byte_span_t bytecode_data, uint32_t pc, uint32_t max_pc,
+    uint32_t* out_next_pc);
+
+// NOTE: by the time this is called we have the module created and can assume
+// all information on it has been verified. The only thing this verifies is
+// function bytecode and capabilities!
+iree_status_t iree_vm_bytecode_function_verify(
+    iree_vm_bytecode_module_t* module, uint16_t function_ordinal,
+    iree_allocator_t scratch_allocator) {
+  if (function_ordinal >= module->function_descriptor_count) {
+    return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
+                            "invalid function ordinal");
+  }
+  iree_vm_FunctionSignatureDef_table_t function_signature_def =
+      iree_vm_FunctionSignatureDef_vec_at(
+          iree_vm_BytecodeModuleDef_function_signatures(module->def),
+          function_ordinal);
+  const iree_vm_FunctionDescriptor_t* function_descriptor =
+      &module->function_descriptor_table[function_ordinal];
+
+  const iree_vm_FeatureBits_enum_t available_features =
+      iree_vm_bytecode_available_features();
+  const iree_vm_FeatureBits_enum_t required_features =
+      function_descriptor->requirements;
+  IREE_RETURN_IF_ERROR(iree_vm_check_feature_mismatch(
+      __FILE__, __LINE__, required_features, available_features));
+
+  if (function_descriptor->block_count == 0) {
+    return iree_make_status(
+        IREE_STATUS_OUT_OF_RANGE,
+        "no blocks defined; functions must have at least one block");
+  }
+
+  // State used during verification.
+  iree_vm_bytecode_verify_state_t verify_state = {
+      .in_block = 0,
+      .imported_functions =
+          iree_vm_BytecodeModuleDef_imported_functions(module->def),
+      .exported_functions =
+          iree_vm_BytecodeModuleDef_exported_functions(module->def),
+      .function_signatures =
+          iree_vm_BytecodeModuleDef_function_signatures(module->def),
+      .function_descriptors =
+          iree_vm_BytecodeModuleDef_function_descriptors(module->def),
+      .rodata_storage_size = 0,
+      .rodata_ref_count = 0,
+      .rwdata_storage_size = 0,
+      .global_ref_count = 0,
+  };
+
+  // NOTE: these must be consistent with iree_vm_bytecode_module_layout_state.
+  verify_state.rodata_storage_size = 0;
+  verify_state.rodata_ref_count = iree_vm_RodataSegmentDef_vec_len(
+      iree_vm_BytecodeModuleDef_rodata_segments(module->def));
+  iree_vm_ModuleStateDef_table_t module_state_def =
+      iree_vm_BytecodeModuleDef_module_state(module->def);
+  if (module_state_def) {
+    verify_state.rwdata_storage_size =
+        iree_vm_ModuleStateDef_global_bytes_capacity(module_state_def);
+    verify_state.global_ref_count =
+        iree_vm_ModuleStateDef_global_ref_count(module_state_def);
+  }
+
+  // Ensure the register storage (rounded to the nearest power of 2) won't
+  // exceed the maximum allowed registers.
+  verify_state.i32_register_count = iree_math_round_up_to_pow2_u32(
+      VMMAX(1, function_descriptor->i32_register_count));
+  verify_state.ref_register_count = iree_math_round_up_to_pow2_u32(
+      VMMAX(1, function_descriptor->ref_register_count));
+  if (IREE_UNLIKELY(verify_state.i32_register_count > IREE_I32_REGISTER_MASK) ||
+      IREE_UNLIKELY(verify_state.ref_register_count > IREE_REF_REGISTER_MASK)) {
+    // Register count overflow. A valid compiler should never produce files that
+    // hit this.
+    return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
+                            "register count overflow");
+  }
+
+  // Grab the cconv fragments declaring the arguments/results of the function.
+  verify_state.cconv_arguments = iree_string_view_empty();
+  verify_state.cconv_results = iree_string_view_empty();
+  IREE_RETURN_IF_ERROR(iree_vm_bytecode_function_get_cconv_fragments(
+      function_signature_def, &verify_state.cconv_arguments,
+      &verify_state.cconv_results));
+
+  // Verify there is storage for passed arguments.
+  IREE_RETURN_IF_ERROR(
+      iree_vm_bytecode_function_verify_arguments(&verify_state));
+
+  // NOTE: module verification ensures the function descriptor has a valid
+  // bytecode range so we can assume that's true here.
+  IREE_ASSERT(function_descriptor->bytecode_length > 0);
+  IREE_ASSERT(function_descriptor->bytecode_offset >= 0);
+  IREE_ASSERT(function_descriptor->bytecode_offset +
+                  function_descriptor->bytecode_length <=
+              module->bytecode_data.data_length);
+  iree_const_byte_span_t bytecode_data = iree_make_const_byte_span(
+      module->bytecode_data.data + function_descriptor->bytecode_offset,
+      function_descriptor->bytecode_length);
+  const uint32_t max_pc = (uint32_t)function_descriptor->bytecode_length;
+
+  // Reserve the block list. As we walk the bytecode we'll declare/define blocks
+  // and then afterward verify all were found.
+  IREE_ASSERT(function_descriptor->block_count > 0);
+  IREE_RETURN_IF_ERROR(iree_vm_bytecode_block_list_initialize(
+      function_descriptor->block_count, scratch_allocator,
+      &verify_state.block_list));
+
+  // Perform bytecode verification by performing a single-pass walk of all
+  // function bytecode.
+  iree_status_t status = iree_ok_status();
+  for (uint32_t pc = 0; pc < bytecode_data.data_length - 1;) {
+    uint32_t start_pc = pc;
+    status = iree_vm_bytecode_function_verify_bytecode_op(
+        module, &verify_state, function_signature_def, function_descriptor,
+        bytecode_data, start_pc, max_pc, &pc);
+    if (!iree_status_is_ok(status)) {
+#if IREE_STATUS_MODE
+      // To get a useful source location we have to ask the main module; the
+      // base function table may only contain public symbols and not any
+      // internal ones.
+      iree_string_view_t module_name = iree_vm_module_name(&module->interface);
+      iree_vm_function_t function = {0};
+      iree_status_ignore(iree_vm_module_lookup_function_by_ordinal(
+          &module->interface, IREE_VM_FUNCTION_LINKAGE_INTERNAL,
+          function_ordinal, &function));
+      iree_string_view_t function_name = iree_vm_function_name(&function);
+      if (!iree_string_view_is_empty(function_name)) {
+        status = iree_status_annotate_f(status, "at %.*s.%.*s+%08X",
+                                        (int)module_name.size, module_name.data,
+                                        (int)function_name.size,
+                                        function_name.data, start_pc);
+      } else {
+        status = iree_status_annotate_f(status, "at %.*s@%u+%08X",
+                                        (int)module_name.size, module_name.data,
+                                        function_ordinal, start_pc);
+      }
+#endif  // IREE_STATUS_MODE
+      break;
+    }
+  }
+
+  // Ensure there was a terminator.
+  if (iree_status_is_ok(status) && verify_state.in_block) {
+    status = iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                              "function missing terminator in the last block");
+  }
+
+  // Verify all blocks are defined and have proper markers.
+  if (iree_status_is_ok(status)) {
+    status = iree_vm_bytecode_block_list_verify(&verify_state.block_list,
+                                                bytecode_data);
+  }
+
+  iree_vm_bytecode_block_list_deinitialize(&verify_state.block_list,
+                                           scratch_allocator);
+
+  return status;
+}
+
+//===----------------------------------------------------------------------===//
+// Utilities matching the tablegen op encoding scheme
+//===----------------------------------------------------------------------===//
+// These utilities match the VM_Enc* statements in VMBase.td 1:1, allowing us
+// to have the inverse of the encoding which make things easier to read.
+//
+// Each macro will increment the pc by the number of bytes read and as such must
+// be called in the same order the values are encoded.
+
+// Bails if the |pc| exceeds the |max_pc|.
+#define IREE_VM_VERIFY_PC_RANGE(pc, max_pc)                                  \
+  if (IREE_UNLIKELY((pc) > (max_pc))) {                                      \
+    return iree_make_status(IREE_STATUS_OUT_OF_RANGE,                        \
+                            "bytecode data overrun trying to parsing op at " \
+                            "%08X (%u) of %u available bytes",               \
+                            (uint32_t)(pc), (uint32_t)(pc),                  \
+                            (uint32_t)(max_pc));                             \
+  }
+
+// Bails if the function doesn't have the given |required_features| declared.
+#define IREE_VM_VERIFY_REQUIREMENT(required_features)                         \
+  if (IREE_UNLIKELY(!iree_all_bits_set(function_descriptor->requirements,     \
+                                       (required_features)))) {               \
+    return iree_vm_check_feature_mismatch(__FILE__, __LINE__,                 \
+                                          (required_features),                \
+                                          function_descriptor->requirements); \
+  }
+
+// Bails if the register ordinal for the given register type is out of bounds.
+#define IREE_VM_VERIFY_REG_ORDINAL(name)                            \
+  IREE_VM_VERIFY_PC_RANGE(pc + IREE_REGISTER_ORDINAL_SIZE, max_pc); \
+  const uint32_t name = OP_I16(0);
+#define IREE_VM_VERIFY_REG_ORDINAL_X32(ordinal, category)                      \
+  if (IREE_UNLIKELY(((ordinal)&IREE_REF_REGISTER_TYPE_BIT) != 0)) {            \
+    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,                      \
+                            category                                           \
+                            " register required but ref register %u provided", \
+                            (ordinal));                                        \
+  } else if (IREE_UNLIKELY((ordinal) >= verify_state->i32_register_count)) {   \
+    return iree_make_status(IREE_STATUS_OUT_OF_RANGE,                          \
+                            category " register ordinal %u out of range %u",   \
+                            (ordinal), verify_state->i32_register_count);      \
+  }
+#define IREE_VM_VERIFY_REG_ORDINAL_X64(ordinal, category)                      \
+  if (IREE_UNLIKELY(((ordinal)&IREE_REF_REGISTER_TYPE_BIT) != 0)) {            \
+    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,                      \
+                            category                                           \
+                            " register required but ref register %u provided", \
+                            (ordinal));                                        \
+  } else if (IREE_UNLIKELY((ordinal & 1) != 0)) {                              \
+    return iree_make_status(                                                   \
+        IREE_STATUS_INVALID_ARGUMENT,                                          \
+        category " register ordinal %u not 8-byte aligned", (ordinal));        \
+  } else if (IREE_UNLIKELY((ordinal) + 1 >=                                    \
+                           verify_state->i32_register_count)) {                \
+    return iree_make_status(                                                   \
+        IREE_STATUS_OUT_OF_RANGE,                                              \
+        category " register ordinal %u:%u out of range %u", (ordinal),         \
+        (ordinal) + 1, verify_state->i32_register_count);                      \
+  }
+#define IREE_VM_VERIFY_REG_I32(ordinal) \
+  IREE_VM_VERIFY_REG_ORDINAL_X32(ordinal, "i32");
+#define IREE_VM_VERIFY_REG_I64(ordinal) \
+  IREE_VM_VERIFY_REG_ORDINAL_X64(ordinal, "i64");
+#define IREE_VM_VERIFY_REG_F32(ordinal) \
+  IREE_VM_VERIFY_REG_ORDINAL_X32(ordinal, "f32");
+#define IREE_VM_VERIFY_REG_F64(ordinal) \
+  IREE_VM_VERIFY_REG_ORDINAL_X64(ordinal, "f64");
+#define IREE_VM_VERIFY_REG_REF(ordinal)                                      \
+  if (IREE_UNLIKELY(((ordinal)&IREE_REF_REGISTER_TYPE_BIT) == 0)) {          \
+    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,                    \
+                            "ref register required but non-ref %u provided", \
+                            (ordinal));                                      \
+  } else if (IREE_UNLIKELY(((ordinal)&IREE_REF_REGISTER_MASK) >=             \
+                           verify_state->ref_register_count)) {              \
+    return iree_make_status(IREE_STATUS_OUT_OF_RANGE,                        \
+                            "ref register ordinal %u out of range %u",       \
+                            (ordinal), verify_state->ref_register_count);    \
+  }
+#define IREE_VM_VERIFY_REG_ANY(ordinal)                             \
+  if (IREE_UNLIKELY(((ordinal)&IREE_REF_REGISTER_TYPE_BIT) == 0)) { \
+  } else {                                                          \
+  }
+
+#define VM_VerifyConstI8(name)             \
+  IREE_VM_VERIFY_PC_RANGE(pc + 1, max_pc); \
+  uint8_t name = OP_I8(0);                 \
+  (void)(name);                            \
+  ++pc;
+#define VM_VerifyConstI32(name)            \
+  IREE_VM_VERIFY_PC_RANGE(pc + 4, max_pc); \
+  uint32_t name = OP_I32(0);               \
+  (void)(name);                            \
+  pc += 4;
+#define VM_VerifyConstI64(name)            \
+  IREE_VM_VERIFY_PC_RANGE(pc + 8, max_pc); \
+  uint32_t name = OP_I64(0);               \
+  (void)(name);                            \
+  pc += 8;
+#define VM_VerifyConstF32(name)            \
+  IREE_VM_VERIFY_PC_RANGE(pc + 4, max_pc); \
+  uint32_t name = OP_F32(0);               \
+  (void)(name);                            \
+  pc += 4;
+#define VM_VerifyConstF64(name)            \
+  IREE_VM_VERIFY_PC_RANGE(pc + 8, max_pc); \
+  uint32_t name = OP_F64(0);               \
+  (void)(name);                            \
+  pc += 8;
+
+#define VM_VerifyFuncAttr(name) VM_VerifyConstI32(name)
+#define VM_IsImportOrdinal(name) (((name)&0x80000000u) != 0)
+#define VM_UnmaskImportOrdinal(name) name &= ~0x80000000u
+#define VM_VerifyImportOrdinal(name)                                          \
+  if (IREE_UNLIKELY((name) >= iree_vm_ImportFunctionDef_vec_len(              \
+                                  verify_state->imported_functions))) {       \
+    return iree_make_status(                                                  \
+        IREE_STATUS_INVALID_ARGUMENT, "import ordinal %u out of range %zu",   \
+        name,                                                                 \
+        iree_vm_ImportFunctionDef_vec_len(verify_state->imported_functions)); \
+  }
+#define VM_VerifyFunctionOrdinal(name)                                      \
+  if (IREE_UNLIKELY((name)) >= iree_vm_FunctionDescriptor_vec_len(          \
+                                   verify_state->function_descriptors)) {   \
+    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,                   \
+                            "function ordinal %u out of range %zu", (name), \
+                            iree_vm_FunctionDescriptor_vec_len(             \
+                                verify_state->function_descriptors));       \
+  }
+#define VM_VerifyGlobalAttr(name) VM_VerifyConstI32(name)
+#define VM_VerifyRwdataOffset(name, access_length)                  \
+  if (IREE_UNLIKELY(((name) + (access_length)) >                    \
+                    verify_state->rwdata_storage_size)) {           \
+    return iree_make_status(                                        \
+        IREE_STATUS_OUT_OF_RANGE,                                   \
+        "global byte_offset out of range: %d (rwdata=%zu)", (name), \
+        verify_state->rwdata_storage_size);                         \
+  }
+#define VM_VerifyGlobalRefOrdinal(name)                                        \
+  if (IREE_UNLIKELY((name) >= verify_state->global_ref_count)) {               \
+    return iree_make_status(IREE_STATUS_OUT_OF_RANGE,                          \
+                            "global ref ordinal out of range: %d (table=%zu)", \
+                            (name), verify_state->global_ref_count);           \
+  }
+#define VM_VerifyRodataAttr(name) VM_VerifyConstI32(name)
+#define VM_VerifyRodataOrdinal(name)                                           \
+  if (IREE_UNLIKELY((name) >= verify_state->rodata_ref_count)) {               \
+    return iree_make_status(IREE_STATUS_OUT_OF_RANGE,                          \
+                            "rodata ref ordinal out of range: %d (table=%zu)", \
+                            (name), verify_state->rodata_ref_count);           \
+  }
+#define VM_VerifyType(name)                                                 \
+  IREE_VM_VERIFY_PC_RANGE(pc + 4, max_pc);                                  \
+  uint32_t name##_id = OP_I32(0);                                           \
+  if (IREE_UNLIKELY(name##_id >= module->type_count)) {                     \
+    return iree_make_status(IREE_STATUS_OUT_OF_RANGE,                       \
+                            "type id ordinal out of range: %d (table=%zu)", \
+                            name##_id, module->type_count);                 \
+  }                                                                         \
+  const iree_vm_type_def_t* name = &module->type_table[name##_id];          \
+  (void)(name);                                                             \
+  pc += 4;
+#define VM_VerifyTypeOf(name) VM_VerifyType(name)
+#define VM_VerifyIntAttr32(name) VM_VerifyConstI32(name)
+#define VM_VerifyIntAttr64(name) VM_VerifyConstI64(name)
+#define VM_VerifyFloatAttr32(name) VM_VerifyConstF32(name)
+#define VM_VerifyFloatAttr64(name) VM_VerifyConstF64(name)
+#define VM_VerifyStrAttr(name, out_str)                      \
+  IREE_VM_VERIFY_PC_RANGE(pc + 2, max_pc);                   \
+  (out_str)->size = (iree_host_size_t)OP_I16(0);             \
+  IREE_VM_VERIFY_PC_RANGE(pc + 2 + (out_str)->size, max_pc); \
+  (out_str)->data = (const char*)&bytecode_data[pc + 2];     \
+  pc += 2 + (out_str)->size;
+
+#define VM_VerifyBranchTarget(name)                        \
+  VM_VerifyConstI32(name##_pc);                            \
+  iree_vm_bytecode_block_t* name = NULL;                   \
+  IREE_RETURN_IF_ERROR(iree_vm_bytecode_block_list_insert( \
+      &verify_state->block_list, name##_pc, &name));
+#define VM_VerifyBranchOperands(name)                                         \
+  VM_AlignPC(pc, IREE_REGISTER_ORDINAL_SIZE);                                 \
+  IREE_VM_VERIFY_PC_RANGE(pc + IREE_REGISTER_ORDINAL_SIZE, max_pc);           \
+  const iree_vm_register_remap_list_t* name =                                 \
+      (const iree_vm_register_remap_list_t*)&bytecode_data[pc];               \
+  pc += IREE_REGISTER_ORDINAL_SIZE;                                           \
+  IREE_VM_VERIFY_PC_RANGE(pc + (name)->size * 2 * IREE_REGISTER_ORDINAL_SIZE, \
+                          max_pc);                                            \
+  pc += (name)->size * 2 * IREE_REGISTER_ORDINAL_SIZE;                        \
+  for (uint16_t i = 0; i < name->size; ++i) {                                 \
+    IREE_VM_VERIFY_REG_ANY(name->pairs[i].src_reg);                           \
+    IREE_VM_VERIFY_REG_ANY(name->pairs[i].dst_reg);                           \
+  }
+
+#define VM_VerifyOperandRegI32(name)          \
+  IREE_VM_VERIFY_REG_ORDINAL(name##_ordinal); \
+  IREE_VM_VERIFY_REG_I32(name##_ordinal);     \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
+#define VM_VerifyOperandRegI64(name)          \
+  IREE_VM_VERIFY_REG_ORDINAL(name##_ordinal); \
+  IREE_VM_VERIFY_REG_I64(name##_ordinal);     \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
+#define VM_VerifyOperandRegI64HostSize(name) VM_VerifyOperandRegI64(name)
+#define VM_VerifyOperandRegF32(name)          \
+  IREE_VM_VERIFY_REG_ORDINAL(name##_ordinal); \
+  IREE_VM_VERIFY_REG_F32(name##_ordinal);     \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
+#define VM_VerifyOperandRegF64(name)          \
+  IREE_VM_VERIFY_REG_ORDINAL(name##_ordinal); \
+  IREE_VM_VERIFY_REG_F32(name##_ordinal);     \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
+#define VM_VerifyOperandRegRef(name)          \
+  IREE_VM_VERIFY_REG_ORDINAL(name##_ordinal); \
+  IREE_VM_VERIFY_REG_REF(name##_ordinal);     \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
+#define VM_VerifyVariadicOperands(name)                                   \
+  VM_AlignPC(pc, IREE_REGISTER_ORDINAL_SIZE);                             \
+  IREE_VM_VERIFY_PC_RANGE(pc + IREE_REGISTER_ORDINAL_SIZE, max_pc);       \
+  const iree_vm_register_list_t* name =                                   \
+      (const iree_vm_register_list_t*)&bytecode_data[pc];                 \
+  pc += IREE_REGISTER_ORDINAL_SIZE;                                       \
+  IREE_VM_VERIFY_PC_RANGE(pc + (name)->size * IREE_REGISTER_ORDINAL_SIZE, \
+                          max_pc);                                        \
+  pc += (name)->size * IREE_REGISTER_ORDINAL_SIZE;
+#define VM_VerifyVariadicOperandsI32(name)            \
+  VM_VerifyVariadicOperands(name);                    \
+  for (uint16_t __i = 0; __i < (name)->size; ++__i) { \
+    IREE_VM_VERIFY_REG_I32((name)->registers[__i]);   \
+  }
+#define VM_VerifyVariadicOperandsI64(name)            \
+  VM_VerifyVariadicOperands(name);                    \
+  for (uint16_t __i = 0; __i < (name)->size; ++__i) { \
+    IREE_VM_VERIFY_REG_I64((name)->registers[__i]);   \
+  }
+#define VM_VerifyVariadicOperandsF32(name)            \
+  VM_VerifyVariadicOperands(name);                    \
+  for (uint16_t __i = 0; __i < (name)->size; ++__i) { \
+    IREE_VM_VERIFY_REG_F32((name)->registers[__i]);   \
+  }
+#define VM_VerifyVariadicOperandsF64(name)            \
+  VM_VerifyVariadicOperands(name);                    \
+  for (uint16_t __i = 0; __i < (name)->size; ++__i) { \
+    IREE_VM_VERIFY_REG_F64((name)->registers[__i]);   \
+  }
+#define VM_VerifyVariadicOperandsRef(name, type_def)  \
+  VM_VerifyVariadicOperands(name);                    \
+  for (uint16_t __i = 0; __i < (name)->size; ++__i) { \
+    IREE_VM_VERIFY_REG_REF((name)->registers[__i]);   \
+  }
+#define VM_VerifyVariadicOperandsAny(name)            \
+  VM_VerifyVariadicOperands(name);                    \
+  for (uint16_t __i = 0; __i < (name)->size; ++__i) { \
+    IREE_VM_VERIFY_REG_ANY((name)->registers[__i]);   \
+  }
+#define VM_VerifyResultRegI32(name)           \
+  IREE_VM_VERIFY_REG_ORDINAL(name##_ordinal); \
+  IREE_VM_VERIFY_REG_I32(name##_ordinal);     \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
+#define VM_VerifyResultRegI64(name)           \
+  IREE_VM_VERIFY_REG_ORDINAL(name##_ordinal); \
+  IREE_VM_VERIFY_REG_I64(name##_ordinal);     \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
+#define VM_VerifyResultRegF32(name)           \
+  IREE_VM_VERIFY_REG_ORDINAL(name##_ordinal); \
+  IREE_VM_VERIFY_REG_F32(name##_ordinal);     \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
+#define VM_VerifyResultRegF64(name)           \
+  IREE_VM_VERIFY_REG_ORDINAL(name##_ordinal); \
+  IREE_VM_VERIFY_REG_F64(name##_ordinal);     \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
+#define VM_VerifyResultRegRef(name)           \
+  IREE_VM_VERIFY_REG_ORDINAL(name##_ordinal); \
+  IREE_VM_VERIFY_REG_REF(name##_ordinal);     \
+  pc += IREE_REGISTER_ORDINAL_SIZE;
+#define VM_VerifyVariadicResultsAny(name) VM_VerifyVariadicOperandsAny(name)
+
+#define VERIFY_OP_CORE_UNARY_I32(op_name) \
+  VERIFY_OP(CORE, op_name, {              \
+    VM_VerifyOperandRegI32(operand);      \
+    VM_VerifyResultRegI32(result);        \
+  });
+
+#define VERIFY_OP_CORE_UNARY_I64(op_name) \
+  VERIFY_OP(CORE, op_name, {              \
+    VM_VerifyOperandRegI64(operand);      \
+    VM_VerifyResultRegI64(result);        \
+  });
+
+#define VERIFY_OP_CORE_BINARY_I32(op_name) \
+  VERIFY_OP(CORE, op_name, {               \
+    VM_VerifyOperandRegI32(lhs);           \
+    VM_VerifyOperandRegI32(rhs);           \
+    VM_VerifyResultRegI32(result);         \
+  });
+
+#define VERIFY_OP_CORE_BINARY_I64(op_name) \
+  VERIFY_OP(CORE, op_name, {               \
+    VM_VerifyOperandRegI64(lhs);           \
+    VM_VerifyOperandRegI64(rhs);           \
+    VM_VerifyResultRegI64(result);         \
+  });
+
+#define VERIFY_OP_CORE_TERNARY_I32(op_name) \
+  VERIFY_OP(CORE, op_name, {                \
+    VM_VerifyOperandRegI32(a);              \
+    VM_VerifyOperandRegI32(b);              \
+    VM_VerifyOperandRegI32(c);              \
+    VM_VerifyResultRegI32(result);          \
+  });
+
+#define VERIFY_OP_CORE_TERNARY_I64(op_name) \
+  VERIFY_OP(CORE, op_name, {                \
+    VM_VerifyOperandRegI64(a);              \
+    VM_VerifyOperandRegI64(b);              \
+    VM_VerifyOperandRegI64(c);              \
+    VM_VerifyResultRegI64(result);          \
+  });
+
+#define VERIFY_OP_EXT_F32_UNARY_F32(op_name) \
+  VERIFY_OP(EXT_F32, op_name, {              \
+    VM_VerifyOperandRegF32(operand);         \
+    VM_VerifyResultRegF32(result);           \
+  });
+
+#define VERIFY_OP_EXT_F32_BINARY_F32(op_name) \
+  VERIFY_OP(EXT_F32, op_name, {               \
+    VM_VerifyOperandRegF32(lhs);              \
+    VM_VerifyOperandRegF32(rhs);              \
+    VM_VerifyResultRegF32(result);            \
+  });
+
+#define VERIFY_OP_EXT_F32_TERNARY_F32(op_name) \
+  VERIFY_OP(EXT_F32, op_name, {                \
+    VM_VerifyOperandRegF32(a);                 \
+    VM_VerifyOperandRegF32(b);                 \
+    VM_VerifyOperandRegF32(c);                 \
+    VM_VerifyResultRegF32(result);             \
+  });
+
+#define VERIFY_OP_EXT_F64_UNARY_F64(op_name) \
+  VERIFY_OP(EXT_F64, op_name, {              \
+    VM_VerifyOperandRegF64(operand);         \
+    VM_VerifyResultRegF64(result);           \
+  });
+
+#define VERIFY_OP_EXT_F64_BINARY_F64(op_name) \
+  VERIFY_OP(EXT_F64, op_name, {               \
+    VM_VerifyOperandRegF64(lhs);              \
+    VM_VerifyOperandRegF64(rhs);              \
+    VM_VerifyResultRegF64(result);            \
+  });
+
+#define VERIFY_OP_EXT_F64_TERNARY_F64(op_name) \
+  VERIFY_OP(EXT_F64, op_name, {                \
+    VM_VerifyOperandRegF64(a);                 \
+    VM_VerifyOperandRegF64(b);                 \
+    VM_VerifyOperandRegF64(c);                 \
+    VM_VerifyResultRegF64(result);             \
+  });
+
+//===----------------------------------------------------------------------===//
+// Call verification
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_vm_bytecode_function_get_cconv_fragments(
+    iree_vm_FunctionSignatureDef_table_t signature_def,
+    iree_string_view_t* out_arguments, iree_string_view_t* out_results) {
+  flatbuffers_string_t cconv_str =
+      iree_vm_FunctionSignatureDef_calling_convention(signature_def);
+  iree_vm_function_signature_t signature = {
+      .calling_convention =
+          iree_make_string_view(cconv_str, flatbuffers_string_len(cconv_str)),
+  };
+  return iree_vm_function_call_get_cconv_fragments(&signature, out_arguments,
+                                                   out_results);
+}
+
+static iree_status_t iree_vm_bytecode_function_count_cconv_regs(
+    iree_string_view_t cconv_fragment, iree_host_size_t* out_i32_count,
+    iree_host_size_t* out_ref_count) {
+  iree_host_size_t i32_count = 0;
+  iree_host_size_t ref_count = 0;
+  for (iree_host_size_t i = 0; i < cconv_fragment.size; ++i) {
+    switch (cconv_fragment.data[i]) {
+      default:
+        return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                                "unsupported cconv fragment char '%c'",
+                                cconv_fragment.data[i]);
+      case IREE_VM_CCONV_TYPE_VOID:
+        break;
+      case IREE_VM_CCONV_TYPE_I32:
+      case IREE_VM_CCONV_TYPE_F32:
+        ++i32_count;
+        break;
+      case IREE_VM_CCONV_TYPE_I64:
+      case IREE_VM_CCONV_TYPE_F64:
+        if ((i32_count % 2) != 0) {
+          // Unaligned; pad an i32 register to get to i64 alignment.
+          ++i32_count;
+        }
+        i32_count += 2;
+        break;
+      case IREE_VM_CCONV_TYPE_REF:
+        ++ref_count;
+        break;
+      case IREE_VM_CCONV_TYPE_SPAN_START:
+        return iree_make_status(
+            IREE_STATUS_INVALID_ARGUMENT,
+            "internal functions cannot accept variadic arguments");
+    }
+  }
+  *out_i32_count = i32_count;
+  *out_ref_count = ref_count;
+  return iree_ok_status();
+}
+
+static iree_status_t iree_vm_bytecode_function_verify_arguments(
+    const iree_vm_bytecode_verify_state_t* verify_state) {
+  iree_host_size_t args_i32 = 0;
+  iree_host_size_t args_ref = 0;
+  IREE_RETURN_IF_ERROR(iree_vm_bytecode_function_count_cconv_regs(
+      verify_state->cconv_arguments, &args_i32, &args_ref));
+  iree_host_size_t rets_i32 = 0;
+  iree_host_size_t rets_ref = 0;
+  IREE_RETURN_IF_ERROR(iree_vm_bytecode_function_count_cconv_regs(
+      verify_state->cconv_results, &rets_i32, &rets_ref));
+  if (verify_state->i32_register_count < args_i32 ||
+      verify_state->i32_register_count < rets_i32 ||
+      verify_state->ref_register_count < args_ref ||
+      verify_state->ref_register_count < rets_ref) {
+    return iree_make_status(
+        IREE_STATUS_INVALID_ARGUMENT,
+        "insufficient register storage for function arguments/results");
+  }
+  return iree_ok_status();
+}
+
+static iree_status_t iree_vm_bytecode_function_verify_cconv_register(
+    const iree_vm_bytecode_verify_state_t* verify_state, char cconv_type,
+    const iree_vm_register_list_t* IREE_RESTRICT reg_list, int reg_i) {
+  if (reg_i >= reg_list->size) {
+    return iree_make_status(
+        IREE_STATUS_OUT_OF_RANGE,
+        "register list underflow (have %u, trying to access %u)",
+        reg_list->size, reg_i);
+  }
+  switch (cconv_type) {
+    case IREE_VM_CCONV_TYPE_I32:
+    case IREE_VM_CCONV_TYPE_F32: {
+      IREE_VM_VERIFY_REG_ORDINAL_X32(reg_list->registers[reg_i], "i32/f32");
+    } break;
+    case IREE_VM_CCONV_TYPE_I64:
+    case IREE_VM_CCONV_TYPE_F64: {
+      IREE_VM_VERIFY_REG_ORDINAL_X64(reg_list->registers[reg_i], "i64/f64");
+    } break;
+    case IREE_VM_CCONV_TYPE_REF: {
+      IREE_VM_VERIFY_REG_REF(reg_list->registers[reg_i]);
+    } break;
+    default:
+      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                              "unsupported cconv fragment char '%c'",
+                              cconv_type);
+  }
+  return iree_ok_status();
+}
+
+static iree_status_t iree_vm_bytecode_function_verify_cconv_registers(
+    const iree_vm_bytecode_verify_state_t* verify_state,
+    iree_string_view_t cconv_fragment,
+    const iree_vm_register_list_t* IREE_RESTRICT segment_size_list,
+    const iree_vm_register_list_t* IREE_RESTRICT reg_list) {
+  for (uint16_t i = 0, seg_i = 0, reg_i = 0; i < cconv_fragment.size;
+       ++i, ++seg_i) {
+    switch (cconv_fragment.data[i]) {
+      case IREE_VM_CCONV_TYPE_VOID:
+        break;
+      case IREE_VM_CCONV_TYPE_I32:
+      case IREE_VM_CCONV_TYPE_F32:
+      case IREE_VM_CCONV_TYPE_I64:
+      case IREE_VM_CCONV_TYPE_F64:
+      case IREE_VM_CCONV_TYPE_REF: {
+        IREE_RETURN_IF_ERROR(iree_vm_bytecode_function_verify_cconv_register(
+            verify_state, cconv_fragment.data[i], reg_list, reg_i++));
+      } break;
+      case IREE_VM_CCONV_TYPE_SPAN_START: {
+        if (!segment_size_list) {
+          return iree_make_status(
+              IREE_STATUS_INVALID_ARGUMENT,
+              "function is variadic but no segment size list provided");
+        } else if (seg_i >= segment_size_list->size) {
+          return iree_make_status(
+              IREE_STATUS_OUT_OF_RANGE,
+              "segment size list underflow (have %u, trying to access %u)",
+              segment_size_list->size, seg_i);
+        }
+        uint16_t span_count = segment_size_list->registers[seg_i];
+        if (!span_count) {
+          // No items; skip the span.
+          do {
+            ++i;
+          } while (i < cconv_fragment.size &&
+                   cconv_fragment.data[i] != IREE_VM_CCONV_TYPE_SPAN_END);
+          continue;
+        }
+        uint16_t span_start_i = i + 1;
+        for (uint16_t j = 0; j < span_count; ++j) {
+          for (i = span_start_i;
+               i < cconv_fragment.size &&
+               cconv_fragment.data[i] != IREE_VM_CCONV_TYPE_SPAN_END;
+               ++i) {
+            // TODO(benvanik): share with switch above.
+            switch (cconv_fragment.data[i]) {
+              case IREE_VM_CCONV_TYPE_VOID:
+                break;
+              case IREE_VM_CCONV_TYPE_I32:
+              case IREE_VM_CCONV_TYPE_F32:
+              case IREE_VM_CCONV_TYPE_I64:
+              case IREE_VM_CCONV_TYPE_F64:
+              case IREE_VM_CCONV_TYPE_REF: {
+                IREE_RETURN_IF_ERROR(
+                    iree_vm_bytecode_function_verify_cconv_register(
+                        verify_state, cconv_fragment.data[i], reg_list,
+                        reg_i++));
+              } break;
+              default:
+                return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                                        "unsupported cconv fragment char '%c'",
+                                        cconv_fragment.data[i]);
+            }
+          }
+        }
+      } break;
+      default:
+        return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                                "unsupported cconv fragment char '%c'",
+                                cconv_fragment.data[i]);
+    }
+  }
+  return iree_ok_status();
+}
+
+static iree_status_t iree_vm_bytecode_function_verify_call(
+    const iree_vm_bytecode_verify_state_t* verify_state,
+    iree_vm_FunctionSignatureDef_table_t signature_def,
+    const iree_vm_register_list_t* IREE_RESTRICT segment_size_list,
+    const iree_vm_register_list_t* IREE_RESTRICT src_reg_list,
+    const iree_vm_register_list_t* IREE_RESTRICT dst_reg_list) {
+  iree_string_view_t arguments = iree_string_view_empty();
+  iree_string_view_t results = iree_string_view_empty();
+  IREE_RETURN_IF_ERROR(iree_vm_bytecode_function_get_cconv_fragments(
+      signature_def, &arguments, &results));
+  IREE_RETURN_IF_ERROR(iree_vm_bytecode_function_verify_cconv_registers(
+      verify_state, arguments, segment_size_list, src_reg_list));
+  IREE_RETURN_IF_ERROR(iree_vm_bytecode_function_verify_cconv_registers(
+      verify_state, results, /*segment_sizes=*/NULL, dst_reg_list));
+  return iree_ok_status();
+}
+
+//===----------------------------------------------------------------------===//
+// Bytecode verification
+//===----------------------------------------------------------------------===//
+
+#define VERIFY_OP(ext, op_name, body)  \
+  case IREE_VM_OP_##ext##_##op_name: { \
+    body;                              \
+  } break;
+
+#define BEGIN_VERIFY_PREFIX(op_name, ext)    \
+  case IREE_VM_OP_CORE_##op_name: {          \
+    IREE_VM_VERIFY_PC_RANGE(pc + 1, max_pc); \
+    IREE_VM_VERIFY_REQUIREMENT(ext);         \
+    switch (bytecode_data[pc++]) {
+#define END_VERIFY_PREFIX()                               \
+  default:                                                \
+    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, \
+                            "unhandled ext opcode");      \
+    }                                                     \
+    break;                                                \
+    }
+#define UNHANDLED_VERIFY_PREFIX(op_name, ext)                                 \
+  case IREE_VM_OP_CORE_##op_name: {                                           \
+    return iree_vm_check_feature_mismatch(__FILE__, __LINE__, ext,            \
+                                          function_descriptor->requirements); \
+  }
+
+static iree_status_t iree_vm_bytecode_function_verify_bytecode_op(
+    iree_vm_bytecode_module_t* module,
+    iree_vm_bytecode_verify_state_t* verify_state,
+    iree_vm_FunctionSignatureDef_table_t function_signature,
+    iree_vm_FunctionDescriptor_struct_t function_descriptor,
+    iree_const_byte_span_t function_bytecode, uint32_t start_pc,
+    uint32_t max_pc, uint32_t* out_next_pc) {
+  *out_next_pc = 0;
+  uint32_t pc = start_pc;
+  const uint8_t* bytecode_data = function_bytecode.data;
+
+  // NOTE: we keep this as simple as possible so that we can one day auto
+  // generate it from tblgen, which has all the encodings in a similar form
+  // such that we could do string substitution to get the verifier macros.
+
+  // All ops except for Block must be inside of a block. We hoist that check
+  // that here before switching out.
+  IREE_VM_VERIFY_PC_RANGE(pc + 1, max_pc);
+  if (verify_state->in_block == 0) {
+    // If not in a block then the next opcode must be a block.
+    if (bytecode_data[pc] != IREE_VM_OP_CORE_Block) {
+      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                              "op at pc %08X is not in a block", pc);
+    }
+  } else {
+    // If in a block then the next opcode must not be a block.
+    if (bytecode_data[pc] == IREE_VM_OP_CORE_Block) {
+      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                              "op at pc %08X is a block while still in a block",
+                              pc);
+    }
+  }
+
+  // Get primary opcode. All ops have at least 1 byte.
+  switch (bytecode_data[pc++]) {
+    //===------------------------------------------------------------------===//
+    // Globals
+    //===------------------------------------------------------------------===//
+
+    VERIFY_OP(CORE, GlobalLoadI32, {
+      VM_VerifyGlobalAttr(byte_offset);
+      VM_VerifyRwdataOffset(byte_offset, 4);
+      VM_VerifyResultRegI32(value);
+    });
+
+    VERIFY_OP(CORE, GlobalStoreI32, {
+      VM_VerifyGlobalAttr(byte_offset);
+      VM_VerifyRwdataOffset(byte_offset, 4);
+      VM_VerifyOperandRegI32(value);
+    });
+
+    VERIFY_OP(CORE, GlobalLoadIndirectI32, {
+      VM_VerifyOperandRegI32(byte_offset);
+      // NOTE: we have to verify the offset at runtime.
+      VM_VerifyResultRegI32(value);
+    });
+
+    VERIFY_OP(CORE, GlobalStoreIndirectI32, {
+      VM_VerifyOperandRegI32(byte_offset);
+      // NOTE: we have to verify the offset at runtime.
+      VM_VerifyOperandRegI32(value);
+    });
+
+    VERIFY_OP(CORE, GlobalLoadI64, {
+      VM_VerifyGlobalAttr(byte_offset);
+      VM_VerifyRwdataOffset(byte_offset, 8);
+      VM_VerifyResultRegI64(value);
+    });
+
+    VERIFY_OP(CORE, GlobalStoreI64, {
+      VM_VerifyGlobalAttr(byte_offset);
+      VM_VerifyRwdataOffset(byte_offset, 8);
+      VM_VerifyOperandRegI64(value);
+    });
+
+    VERIFY_OP(CORE, GlobalLoadIndirectI64, {
+      VM_VerifyOperandRegI32(byte_offset);
+      // NOTE: we have to verify the offset at runtime.
+      VM_VerifyResultRegI64(value);
+    });
+
+    VERIFY_OP(CORE, GlobalStoreIndirectI64, {
+      VM_VerifyOperandRegI32(byte_offset);
+      // NOTE: we have to verify the offset at runtime.
+      VM_VerifyOperandRegI64(value);
+    });
+
+    VERIFY_OP(CORE, GlobalLoadRef, {
+      VM_VerifyGlobalAttr(global);
+      VM_VerifyGlobalRefOrdinal(global);
+      VM_VerifyTypeOf(type_def);
+      VM_VerifyResultRegRef(value);
+    });
+
+    VERIFY_OP(CORE, GlobalStoreRef, {
+      VM_VerifyGlobalAttr(global);
+      VM_VerifyGlobalRefOrdinal(global);
+      VM_VerifyTypeOf(type_def);
+      VM_VerifyOperandRegRef(value);
+    });
+
+    VERIFY_OP(CORE, GlobalLoadIndirectRef, {
+      VM_VerifyOperandRegI32(global);
+      // NOTE: we have to verify the ordinal at runtime.
+      VM_VerifyTypeOf(type_def);
+      VM_VerifyResultRegRef(value);
+    });
+
+    VERIFY_OP(CORE, GlobalStoreIndirectRef, {
+      VM_VerifyOperandRegI32(global);
+      // NOTE: we have to verify the ordinal at runtime.
+      VM_VerifyTypeOf(type_def);
+      VM_VerifyOperandRegRef(value);
+    });
+
+    //===------------------------------------------------------------------===//
+    // Constants
+    //===------------------------------------------------------------------===//
+
+    VERIFY_OP(CORE, ConstI32, {
+      VM_VerifyIntAttr32(value);
+      VM_VerifyResultRegI32(result);
+    });
+
+    VERIFY_OP(CORE, ConstI32Zero, { VM_VerifyResultRegI32(result); });
+
+    VERIFY_OP(CORE, ConstI64, {
+      VM_VerifyIntAttr64(value);
+      VM_VerifyResultRegI64(result);
+    });
+
+    VERIFY_OP(CORE, ConstI64Zero, { VM_VerifyResultRegI64(result); });
+
+    VERIFY_OP(CORE, ConstRefZero, { VM_VerifyResultRegRef(result); });
+
+    VERIFY_OP(CORE, ConstRefRodata, {
+      VM_VerifyRodataAttr(rodata);
+      VM_VerifyRodataOrdinal(rodata);
+      VM_VerifyResultRegRef(value);
+    });
+
+    //===------------------------------------------------------------------===//
+    // Buffers
+    //===------------------------------------------------------------------===//
+
+    VERIFY_OP(CORE, BufferAlloc, {
+      VM_VerifyOperandRegI64HostSize(length);
+      VM_VerifyResultRegRef(result);
+    });
+
+    VERIFY_OP(CORE, BufferClone, {
+      VM_VerifyOperandRegRef(source);
+      VM_VerifyOperandRegI64HostSize(offset);
+      VM_VerifyOperandRegI64HostSize(length);
+      VM_VerifyResultRegRef(result);
+    });
+
+    VERIFY_OP(CORE, BufferLength, {
+      VM_VerifyOperandRegRef(buffer);
+      VM_VerifyResultRegI64(result);
+    });
+
+    VERIFY_OP(CORE, BufferCopy, {
+      VM_VerifyOperandRegRef(source_buffer);
+      VM_VerifyOperandRegI64HostSize(source_offset);
+      VM_VerifyOperandRegRef(target_buffer);
+      VM_VerifyOperandRegI64HostSize(target_offset);
+      VM_VerifyOperandRegI64HostSize(length);
+    });
+
+    VERIFY_OP(CORE, BufferCompare, {
+      VM_VerifyOperandRegRef(lhs_buffer);
+      VM_VerifyOperandRegI64HostSize(lhs_offset);
+      VM_VerifyOperandRegRef(rhs_buffer);
+      VM_VerifyOperandRegI64HostSize(rhs_offset);
+      VM_VerifyOperandRegI64HostSize(length);
+      VM_VerifyResultRegI32(result);
+    });
+
+    VERIFY_OP(CORE, BufferFillI8, {
+      VM_VerifyOperandRegRef(target_buffer);
+      VM_VerifyOperandRegI64HostSize(target_offset);
+      VM_VerifyOperandRegI64HostSize(length);
+      VM_VerifyOperandRegI32(value);
+    });
+    VERIFY_OP(CORE, BufferFillI16, {
+      VM_VerifyOperandRegRef(target_buffer);
+      VM_VerifyOperandRegI64HostSize(target_offset);
+      VM_VerifyOperandRegI64HostSize(length);
+      VM_VerifyOperandRegI32(value);
+    });
+    VERIFY_OP(CORE, BufferFillI32, {
+      VM_VerifyOperandRegRef(target_buffer);
+      VM_VerifyOperandRegI64HostSize(target_offset);
+      VM_VerifyOperandRegI64HostSize(length);
+      VM_VerifyOperandRegI32(value);
+    });
+    VERIFY_OP(CORE, BufferFillI64, {
+      VM_VerifyOperandRegRef(target_buffer);
+      VM_VerifyOperandRegI64HostSize(target_offset);
+      VM_VerifyOperandRegI64HostSize(length);
+      VM_VerifyOperandRegI64(value);
+    });
+
+    VERIFY_OP(CORE, BufferLoadI8U, {
+      VM_VerifyOperandRegRef(source_buffer);
+      VM_VerifyOperandRegI64HostSize(source_offset);
+      VM_VerifyResultRegI32(result);
+    });
+    VERIFY_OP(CORE, BufferLoadI8S, {
+      VM_VerifyOperandRegRef(source_buffer);
+      VM_VerifyOperandRegI64HostSize(source_offset);
+      VM_VerifyResultRegI32(result);
+    });
+    VERIFY_OP(CORE, BufferLoadI16U, {
+      VM_VerifyOperandRegRef(source_buffer);
+      VM_VerifyOperandRegI64HostSize(source_offset);
+      VM_VerifyResultRegI32(result);
+    });
+    VERIFY_OP(CORE, BufferLoadI16S, {
+      VM_VerifyOperandRegRef(source_buffer);
+      VM_VerifyOperandRegI64HostSize(source_offset);
+      VM_VerifyResultRegI32(result);
+    });
+    VERIFY_OP(CORE, BufferLoadI32, {
+      VM_VerifyOperandRegRef(source_buffer);
+      VM_VerifyOperandRegI64HostSize(source_offset);
+      VM_VerifyResultRegI32(result);
+    });
+    VERIFY_OP(CORE, BufferLoadI64, {
+      VM_VerifyOperandRegRef(source_buffer);
+      VM_VerifyOperandRegI64HostSize(source_offset);
+      VM_VerifyResultRegI64(result);
+    });
+
+    VERIFY_OP(CORE, BufferStoreI8, {
+      VM_VerifyOperandRegRef(target_buffer);
+      VM_VerifyOperandRegI64HostSize(target_offset);
+      VM_VerifyOperandRegI32(value);
+    });
+    VERIFY_OP(CORE, BufferStoreI16, {
+      VM_VerifyOperandRegRef(target_buffer);
+      VM_VerifyOperandRegI64HostSize(target_offset);
+      VM_VerifyOperandRegI32(value);
+    });
+    VERIFY_OP(CORE, BufferStoreI32, {
+      VM_VerifyOperandRegRef(target_buffer);
+      VM_VerifyOperandRegI64HostSize(target_offset);
+      VM_VerifyOperandRegI32(value);
+    });
+    VERIFY_OP(CORE, BufferStoreI64, {
+      VM_VerifyOperandRegRef(target_buffer);
+      VM_VerifyOperandRegI64HostSize(target_offset);
+      VM_VerifyOperandRegI64(value);
+    });
+
+    //===------------------------------------------------------------------===//
+    // Lists
+    //===------------------------------------------------------------------===//
+
+    VERIFY_OP(CORE, ListAlloc, {
+      VM_VerifyTypeOf(element_type);
+      VM_VerifyOperandRegI32(initial_capacity);
+      VM_VerifyResultRegRef(result);
+    });
+
+    VERIFY_OP(CORE, ListReserve, {
+      VM_VerifyOperandRegRef(list);
+      VM_VerifyOperandRegI32(minimum_capacity);
+    });
+
+    VERIFY_OP(CORE, ListSize, {
+      VM_VerifyOperandRegRef(list);
+      VM_VerifyResultRegI32(result);
+    });
+
+    VERIFY_OP(CORE, ListResize, {
+      VM_VerifyOperandRegRef(list);
+      VM_VerifyOperandRegI32(new_size);
+    });
+
+    VERIFY_OP(CORE, ListGetI32, {
+      VM_VerifyOperandRegRef(list);
+      VM_VerifyOperandRegI32(index);
+      VM_VerifyResultRegI32(result);
+    });
+
+    VERIFY_OP(CORE, ListSetI32, {
+      VM_VerifyOperandRegRef(list);
+      VM_VerifyOperandRegI32(index);
+      VM_VerifyOperandRegI32(raw_value);
+    });
+
+    VERIFY_OP(CORE, ListGetI64, {
+      VM_VerifyOperandRegRef(list);
+      VM_VerifyOperandRegI32(index);
+      VM_VerifyResultRegI64(result);
+    });
+
+    VERIFY_OP(CORE, ListSetI64, {
+      VM_VerifyOperandRegRef(list);
+      VM_VerifyOperandRegI32(index);
+      VM_VerifyOperandRegI64(value);
+    });
+
+    VERIFY_OP(CORE, ListGetRef, {
+      VM_VerifyOperandRegRef(list);
+      VM_VerifyOperandRegI32(index);
+      VM_VerifyTypeOf(type_def);
+      VM_VerifyResultRegRef(result);
+    });
+
+    VERIFY_OP(CORE, ListSetRef, {
+      VM_VerifyOperandRegRef(list);
+      VM_VerifyOperandRegI32(index);
+      VM_VerifyOperandRegRef(value);
+    });
+
+    //===------------------------------------------------------------------===//
+    // Conditional assignment
+    //===------------------------------------------------------------------===//
+
+    VERIFY_OP(CORE, SelectI32, {
+      VM_VerifyOperandRegI32(condition);
+      VM_VerifyOperandRegI32(true_value);
+      VM_VerifyOperandRegI32(false_value);
+      VM_VerifyResultRegI32(result);
+    });
+
+    VERIFY_OP(CORE, SelectI64, {
+      VM_VerifyOperandRegI32(condition);
+      VM_VerifyOperandRegI64(true_value);
+      VM_VerifyOperandRegI64(false_value);
+      VM_VerifyResultRegI64(result);
+    });
+
+    VERIFY_OP(CORE, SelectRef, {
+      VM_VerifyOperandRegI32(condition);
+      // TODO(benvanik): remove the type_id and use either LHS/RHS (if both are
+      // null then output is always null so no need to know the type).
+      VM_VerifyTypeOf(true_value_type_def);
+      VM_VerifyOperandRegRef(true_value);
+      VM_VerifyOperandRegRef(false_value);
+      VM_VerifyResultRegRef(result);
+    });
+
+    VERIFY_OP(CORE, SwitchI32, {
+      VM_VerifyOperandRegI32(index);
+      VM_VerifyIntAttr32(default_value);
+      VM_VerifyVariadicOperandsI32(values);
+      VM_VerifyResultRegI32(result);
+    });
+
+    VERIFY_OP(CORE, SwitchI64, {
+      VM_VerifyOperandRegI32(index);
+      VM_VerifyIntAttr64(default_value);
+      VM_VerifyVariadicOperandsI64(values);
+      VM_VerifyResultRegI64(result);
+    });
+
+    VERIFY_OP(CORE, SwitchRef, {
+      VM_VerifyOperandRegI32(index);
+      VM_VerifyTypeOf(type_def);
+      VM_VerifyOperandRegRef(default_value);
+      VM_VerifyVariadicOperandsRef(values, type_def);
+      VM_VerifyResultRegRef(result);
+    });
+
+    //===------------------------------------------------------------------===//
+    // Native integer arithmetic
+    //===------------------------------------------------------------------===//
+
+    VERIFY_OP_CORE_BINARY_I32(AddI32);
+    VERIFY_OP_CORE_BINARY_I32(SubI32);
+    VERIFY_OP_CORE_BINARY_I32(MulI32);
+    VERIFY_OP_CORE_BINARY_I32(DivI32S);
+    VERIFY_OP_CORE_BINARY_I32(DivI32U);
+    VERIFY_OP_CORE_BINARY_I32(RemI32S);
+    VERIFY_OP_CORE_BINARY_I32(RemI32U);
+    VERIFY_OP_CORE_TERNARY_I32(FMAI32);
+    VERIFY_OP_CORE_UNARY_I32(AbsI32);
+    VERIFY_OP_CORE_UNARY_I32(NotI32);
+    VERIFY_OP_CORE_BINARY_I32(AndI32);
+    VERIFY_OP_CORE_BINARY_I32(OrI32);
+    VERIFY_OP_CORE_BINARY_I32(XorI32);
+    VERIFY_OP_CORE_UNARY_I32(CtlzI32);
+
+    VERIFY_OP_CORE_BINARY_I64(AddI64);
+    VERIFY_OP_CORE_BINARY_I64(SubI64);
+    VERIFY_OP_CORE_BINARY_I64(MulI64);
+    VERIFY_OP_CORE_BINARY_I64(DivI64S);
+    VERIFY_OP_CORE_BINARY_I64(DivI64U);
+    VERIFY_OP_CORE_BINARY_I64(RemI64S);
+    VERIFY_OP_CORE_BINARY_I64(RemI64U);
+    VERIFY_OP_CORE_TERNARY_I64(FMAI64);
+    VERIFY_OP_CORE_UNARY_I64(AbsI64);
+    VERIFY_OP_CORE_UNARY_I64(NotI64);
+    VERIFY_OP_CORE_BINARY_I64(AndI64);
+    VERIFY_OP_CORE_BINARY_I64(OrI64);
+    VERIFY_OP_CORE_BINARY_I64(XorI64);
+    VERIFY_OP_CORE_UNARY_I64(CtlzI64);
+
+    //===------------------------------------------------------------------===//
+    // Casting and type conversion/emulation
+    //===------------------------------------------------------------------===//
+
+    // NOTE: these all operate on 32-bit registers.
+    VERIFY_OP_CORE_UNARY_I32(TruncI32I8);
+    VERIFY_OP_CORE_UNARY_I32(TruncI32I16);
+    VERIFY_OP_CORE_UNARY_I32(ExtI8I32S);
+    VERIFY_OP_CORE_UNARY_I32(ExtI8I32U);
+    VERIFY_OP_CORE_UNARY_I32(ExtI16I32S);
+    VERIFY_OP_CORE_UNARY_I32(ExtI16I32U);
+
+    // NOTE: 64-bit ones are actually changing register widths.
+    VERIFY_OP(CORE, TruncI64I32, {
+      VM_VerifyOperandRegI64(operand);
+      VM_VerifyResultRegI32(result);
+    });
+    VERIFY_OP(CORE, ExtI32I64S, {
+      VM_VerifyOperandRegI32(operand);
+      VM_VerifyResultRegI64(result);
+    });
+    VERIFY_OP(CORE, ExtI32I64U, {
+      VM_VerifyOperandRegI32(operand);
+      VM_VerifyResultRegI64(result);
+    });
+
+    //===------------------------------------------------------------------===//
+    // Native bitwise shifts and rotates
+    //===------------------------------------------------------------------===//
+
+#define VERIFY_OP_CORE_SHIFT_I32(op_name) \
+  VERIFY_OP(CORE, op_name, {              \
+    VM_VerifyOperandRegI32(operand);      \
+    VM_VerifyOperandRegI32(amount);       \
+    VM_VerifyResultRegI32(result);        \
+  });
+
+    VERIFY_OP_CORE_SHIFT_I32(ShlI32);
+    VERIFY_OP_CORE_SHIFT_I32(ShrI32S);
+    VERIFY_OP_CORE_SHIFT_I32(ShrI32U);
+
+#define VERIFY_OP_CORE_SHIFT_I64(op_name) \
+  VERIFY_OP(CORE, op_name, {              \
+    VM_VerifyOperandRegI64(operand);      \
+    VM_VerifyOperandRegI32(amount);       \
+    VM_VerifyResultRegI64(result);        \
+  });
+
+    VERIFY_OP_CORE_SHIFT_I64(ShlI64);
+    VERIFY_OP_CORE_SHIFT_I64(ShrI64S);
+    VERIFY_OP_CORE_SHIFT_I64(ShrI64U);
+
+    //===------------------------------------------------------------------===//
+    // Comparison ops
+    //===------------------------------------------------------------------===//
+
+    VERIFY_OP_CORE_BINARY_I32(CmpEQI32);
+    VERIFY_OP_CORE_BINARY_I32(CmpNEI32);
+    VERIFY_OP_CORE_BINARY_I32(CmpLTI32S);
+    VERIFY_OP_CORE_BINARY_I32(CmpLTI32U);
+    VERIFY_OP_CORE_UNARY_I32(CmpNZI32);
+
+#define VERIFY_OP_CORE_CMP_I64(op_name) \
+  VERIFY_OP(CORE, op_name, {            \
+    VM_VerifyOperandRegI64(lhs);        \
+    VM_VerifyOperandRegI64(rhs);        \
+    VM_VerifyResultRegI32(result);      \
+  });
+
+    VERIFY_OP_CORE_CMP_I64(CmpEQI64);
+    VERIFY_OP_CORE_CMP_I64(CmpNEI64);
+    VERIFY_OP_CORE_CMP_I64(CmpLTI64S);
+    VERIFY_OP_CORE_CMP_I64(CmpLTI64U);
+    VERIFY_OP(CORE, CmpNZI64, {
+      VM_VerifyOperandRegI64(operand);
+      VM_VerifyResultRegI32(result);
+    });
+
+    VERIFY_OP(CORE, CmpEQRef, {
+      VM_VerifyOperandRegRef(lhs);
+      VM_VerifyOperandRegRef(rhs);
+      VM_VerifyResultRegI32(result);
+    });
+    VERIFY_OP(CORE, CmpNERef, {
+      VM_VerifyOperandRegRef(lhs);
+      VM_VerifyOperandRegRef(rhs);
+      VM_VerifyResultRegI32(result);
+    });
+    VERIFY_OP(CORE, CmpNZRef, {
+      VM_VerifyOperandRegRef(operand);
+      VM_VerifyResultRegI32(result);
+    });
+
+    //===------------------------------------------------------------------===//
+    // Control flow
+    //===------------------------------------------------------------------===//
+
+    VERIFY_OP(CORE, Block, {
+      // Define the new block in the block list. It may already be declared from
+      // a prior branch.
+      iree_vm_bytecode_block_t* block = NULL;
+      IREE_RETURN_IF_ERROR(iree_vm_bytecode_block_list_insert(
+          &verify_state->block_list, pc - 1, &block));
+      block->defined = 1;
+      verify_state->in_block = 1;
+    });
+
+    VERIFY_OP(CORE, Branch, {
+      VM_VerifyBranchTarget(dest_pc);
+      VM_VerifyBranchOperands(operands);
+      verify_state->in_block = 0;  // terminator
+    });
+
+    VERIFY_OP(CORE, CondBranch, {
+      VM_VerifyOperandRegI32(condition);
+      VM_VerifyBranchTarget(true_dest_pc);
+      VM_VerifyBranchOperands(true_operands);
+      VM_VerifyBranchTarget(false_dest_pc);
+      VM_VerifyBranchOperands(false_operands);
+      verify_state->in_block = 0;  // terminator
+    });
+
+    VERIFY_OP(CORE, Call, {
+      VM_VerifyFuncAttr(callee_ordinal);
+      VM_VerifyVariadicOperandsAny(operands);
+      VM_VerifyVariadicResultsAny(results);
+      if (VM_IsImportOrdinal(callee_ordinal)) {
+        VM_UnmaskImportOrdinal(callee_ordinal);
+        VM_VerifyImportOrdinal(callee_ordinal);
+        iree_vm_ImportFunctionDef_table_t import_def =
+            iree_vm_ImportFunctionDef_vec_at(verify_state->imported_functions,
+                                             callee_ordinal);
+        IREE_RETURN_IF_ERROR(
+            iree_vm_bytecode_function_verify_call(
+                verify_state, iree_vm_ImportFunctionDef_signature(import_def),
+                /*segment_sizes=*/NULL, operands, results),
+            "call to import '%s'",
+            iree_vm_ImportFunctionDef_full_name(import_def));
+      } else {
+        VM_VerifyFunctionOrdinal(callee_ordinal);
+        IREE_RETURN_IF_ERROR(
+            iree_vm_bytecode_function_verify_call(
+                verify_state,
+                iree_vm_FunctionSignatureDef_vec_at(
+                    verify_state->function_signatures, callee_ordinal),
+                /*segment_sizes=*/NULL, operands, results),
+            "call to internal function %d", callee_ordinal);
+      }
+    });
+
+    VERIFY_OP(CORE, CallVariadic, {
+      VM_VerifyFuncAttr(callee_ordinal);
+      VM_VerifyVariadicOperands(segment_sizes);
+      VM_VerifyVariadicOperandsAny(operands);
+      VM_VerifyVariadicResultsAny(results);
+      if (IREE_UNLIKELY(!VM_IsImportOrdinal(callee_ordinal))) {
+        // Variadic calls are currently only supported for import functions.
+        return iree_make_status(
+            IREE_STATUS_FAILED_PRECONDITION,
+            "variadic calls only supported for internal callees");
+      }
+      VM_UnmaskImportOrdinal(callee_ordinal);
+      VM_VerifyImportOrdinal(callee_ordinal);
+      iree_vm_ImportFunctionDef_table_t import_def =
+          iree_vm_ImportFunctionDef_vec_at(verify_state->imported_functions,
+                                           callee_ordinal);
+      IREE_RETURN_IF_ERROR(
+          iree_vm_bytecode_function_verify_call(
+              verify_state, iree_vm_ImportFunctionDef_signature(import_def),
+              segment_sizes, operands, results),
+          "variadic call to import '%s'",
+          iree_vm_ImportFunctionDef_full_name(import_def));
+    });
+
+    VERIFY_OP(CORE, Return, {
+      VM_VerifyVariadicOperandsAny(operands);
+      IREE_RETURN_IF_ERROR(iree_vm_bytecode_function_verify_cconv_registers(
+          verify_state, verify_state->cconv_results, /*segment_sizes=*/NULL,
+          operands));
+      verify_state->in_block = 0;  // terminator
+    });
+
+    VERIFY_OP(CORE, Fail, {
+      VM_VerifyOperandRegI32(status);
+      iree_string_view_t message;
+      VM_VerifyStrAttr(message, &message);
+      verify_state->in_block = 0;  // terminator
+    });
+
+    VERIFY_OP(CORE, ImportResolved, {
+      VM_VerifyFuncAttr(import_ordinal);
+      if (IREE_UNLIKELY(!VM_IsImportOrdinal(import_ordinal))) {
+        return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                                "function ordinal %u is not an import ordinal",
+                                import_ordinal);
+      }
+      VM_UnmaskImportOrdinal(import_ordinal);
+      VM_VerifyImportOrdinal(import_ordinal);
+      VM_VerifyResultRegI32(result);
+    });
+
+    //===------------------------------------------------------------------===//
+    // Async/fiber ops
+    //===------------------------------------------------------------------===//
+
+    VERIFY_OP(CORE, Yield, {
+      VM_VerifyBranchTarget(dest_pc);
+      VM_VerifyBranchOperands(operands);
+      verify_state->in_block = 0;  // terminator
+    });
+
+    //===------------------------------------------------------------------===//
+    // Debugging
+    //===------------------------------------------------------------------===//
+
+    VERIFY_OP(CORE, Trace, {
+      iree_string_view_t event_name;
+      VM_VerifyStrAttr(event_name, &event_name);
+      VM_VerifyVariadicOperandsAny(operands);
+    });
+
+    VERIFY_OP(CORE, Print, {
+      iree_string_view_t event_name;
+      VM_VerifyStrAttr(event_name, &event_name);
+      VM_VerifyVariadicOperandsAny(operands);
+    });
+
+    VERIFY_OP(CORE, Break, {
+      VM_VerifyBranchTarget(dest_pc);
+      VM_VerifyBranchOperands(operands);
+      verify_state->in_block = 0;  // terminator
+    });
+
+    VERIFY_OP(CORE, CondBreak, {
+      VM_VerifyOperandRegI32(condition);
+      VM_VerifyBranchTarget(dest);
+      VM_VerifyBranchOperands(operands);
+      verify_state->in_block = 0;  // terminator
+    });
+
+    //===------------------------------------------------------------------===//
+    // Extension trampolines
+    //===------------------------------------------------------------------===//
+
+#if IREE_VM_EXT_F32_ENABLE
+    BEGIN_VERIFY_PREFIX(PrefixExtF32, iree_vm_FeatureBits_EXT_F32)
+
+    //===----------------------------------------------------------------===//
+    // ExtF32: Globals
+    //===----------------------------------------------------------------===//
+
+    VERIFY_OP(EXT_F32, GlobalLoadF32, {
+      VM_VerifyGlobalAttr(byte_offset);
+      VM_VerifyRwdataOffset(byte_offset, 4);
+      VM_VerifyResultRegF32(value);
+    });
+
+    VERIFY_OP(EXT_F32, GlobalStoreF32, {
+      VM_VerifyGlobalAttr(byte_offset);
+      VM_VerifyRwdataOffset(byte_offset, 4);
+      VM_VerifyOperandRegF32(value);
+    });
+
+    VERIFY_OP(EXT_F32, GlobalLoadIndirectF32, {
+      VM_VerifyOperandRegI32(byte_offset);
+      // NOTE: we have to verify the offset at runtime.
+      VM_VerifyResultRegF32(value);
+    });
+
+    VERIFY_OP(EXT_F32, GlobalStoreIndirectF32, {
+      VM_VerifyOperandRegI32(byte_offset);
+      // NOTE: we have to verify the offset at runtime.
+      VM_VerifyOperandRegF32(value);
+    });
+
+    //===----------------------------------------------------------------===//
+    // ExtF32: Constants
+    //===----------------------------------------------------------------===//
+
+    VERIFY_OP(EXT_F32, ConstF32, {
+      VM_VerifyFloatAttr32(value);
+      VM_VerifyResultRegF32(result);
+    });
+
+    VERIFY_OP(EXT_F32, ConstF32Zero, { VM_VerifyResultRegF32(result); });
+
+    //===----------------------------------------------------------------===//
+    // ExtF32: Lists
+    //===----------------------------------------------------------------===//
+
+    VERIFY_OP(EXT_F32, ListGetF32, {
+      VM_VerifyOperandRegRef(list);
+      VM_VerifyOperandRegI32(index);
+      VM_VerifyResultRegF32(result);
+    });
+
+    VERIFY_OP(EXT_F32, ListSetF32, {
+      VM_VerifyOperandRegRef(list);
+      VM_VerifyOperandRegI32(index);
+      VM_VerifyOperandRegF32(value);
+    });
+
+    //===----------------------------------------------------------------===//
+    // ExtF32: Conditional assignment
+    //===----------------------------------------------------------------===//
+
+    VERIFY_OP(EXT_F32, SelectF32, {
+      VM_VerifyOperandRegI32(condition);
+      VM_VerifyOperandRegF32(true_value);
+      VM_VerifyOperandRegF32(false_value);
+      VM_VerifyResultRegF32(result);
+    });
+
+    VERIFY_OP(EXT_F32, SwitchF32, {
+      VM_VerifyOperandRegI32(index);
+      VM_VerifyFloatAttr32(default_value);
+      VM_VerifyVariadicOperandsF32(values);
+      VM_VerifyResultRegF32(result);
+    });
+
+    //===----------------------------------------------------------------===//
+    // ExtF32: Native floating-point arithmetic
+    //===----------------------------------------------------------------===//
+
+    VERIFY_OP_EXT_F32_BINARY_F32(AddF32);
+    VERIFY_OP_EXT_F32_BINARY_F32(SubF32);
+    VERIFY_OP_EXT_F32_BINARY_F32(MulF32);
+    VERIFY_OP_EXT_F32_BINARY_F32(DivF32);
+    VERIFY_OP_EXT_F32_BINARY_F32(RemF32);
+    VERIFY_OP_EXT_F32_TERNARY_F32(FMAF32);
+    VERIFY_OP_EXT_F32_UNARY_F32(AbsF32);
+    VERIFY_OP_EXT_F32_UNARY_F32(NegF32);
+    VERIFY_OP_EXT_F32_UNARY_F32(CeilF32);
+    VERIFY_OP_EXT_F32_UNARY_F32(FloorF32);
+    VERIFY_OP_EXT_F32_UNARY_F32(RoundF32);
+
+    VERIFY_OP_EXT_F32_UNARY_F32(AtanF32);
+    VERIFY_OP_EXT_F32_BINARY_F32(Atan2F32);
+    VERIFY_OP_EXT_F32_UNARY_F32(CosF32);
+    VERIFY_OP_EXT_F32_UNARY_F32(SinF32);
+    VERIFY_OP_EXT_F32_UNARY_F32(ExpF32);
+    VERIFY_OP_EXT_F32_UNARY_F32(Exp2F32);
+    VERIFY_OP_EXT_F32_UNARY_F32(ExpM1F32);
+    VERIFY_OP_EXT_F32_UNARY_F32(LogF32);
+    VERIFY_OP_EXT_F32_UNARY_F32(Log10F32);
+    VERIFY_OP_EXT_F32_UNARY_F32(Log1pF32);
+    VERIFY_OP_EXT_F32_UNARY_F32(Log2F32);
+    VERIFY_OP_EXT_F32_BINARY_F32(PowF32);
+    VERIFY_OP_EXT_F32_UNARY_F32(RsqrtF32);
+    VERIFY_OP_EXT_F32_UNARY_F32(SqrtF32);
+    VERIFY_OP_EXT_F32_UNARY_F32(TanhF32);
+    VERIFY_OP_EXT_F32_UNARY_F32(ErfF32);
+
+    //===----------------------------------------------------------------===//
+    // ExtF32: Casting and type conversion/emulation
+    //===----------------------------------------------------------------===//
+
+    VERIFY_OP(EXT_F32, CastSI32F32, {
+      VM_VerifyOperandRegI32(operand);
+      VM_VerifyResultRegF32(result);
+    });
+    VERIFY_OP(EXT_F32, CastUI32F32, {
+      VM_VerifyOperandRegI32(operand);
+      VM_VerifyResultRegF32(result);
+    });
+    VERIFY_OP(EXT_F32, CastF32SI32, {
+      VM_VerifyOperandRegF32(operand);
+      VM_VerifyResultRegI32(result);
+    });
+    VERIFY_OP(EXT_F32, CastF32UI32, {
+      VM_VerifyOperandRegF32(operand);
+      VM_VerifyResultRegI32(result);
+    });
+    VERIFY_OP(EXT_F32, BitcastI32F32, {
+      VM_VerifyOperandRegI32(operand);
+      VM_VerifyResultRegF32(result);
+    });
+    VERIFY_OP(EXT_F32, BitcastF32I32, {
+      VM_VerifyOperandRegF32(operand);
+      VM_VerifyResultRegI32(result);
+    });
+
+    //===----------------------------------------------------------------===//
+    // ExtF32: Comparison ops
+    //===----------------------------------------------------------------===//
+
+#define VERIFY_OP_EXT_F32_CMP_F32(op_name) \
+  VERIFY_OP(EXT_F32, op_name, {            \
+    VM_VerifyOperandRegF32(lhs);           \
+    VM_VerifyOperandRegF32(rhs);           \
+    VM_VerifyResultRegI32(result);         \
+  });
+
+    VERIFY_OP_EXT_F32_CMP_F32(CmpEQF32O);
+    VERIFY_OP_EXT_F32_CMP_F32(CmpEQF32U);
+    VERIFY_OP_EXT_F32_CMP_F32(CmpNEF32O);
+    VERIFY_OP_EXT_F32_CMP_F32(CmpNEF32U);
+    VERIFY_OP_EXT_F32_CMP_F32(CmpLTF32O);
+    VERIFY_OP_EXT_F32_CMP_F32(CmpLTF32U);
+    VERIFY_OP_EXT_F32_CMP_F32(CmpLTEF32O);
+    VERIFY_OP_EXT_F32_CMP_F32(CmpLTEF32U);
+    VERIFY_OP(EXT_F32, CmpNaNF32, {
+      VM_VerifyOperandRegF32(operand);
+      VM_VerifyResultRegI32(result);
+    });
+
+    //===----------------------------------------------------------------===//
+    // ExtF32: Buffers
+    //===----------------------------------------------------------------===//
+
+    VERIFY_OP(EXT_F32, BufferFillF32, {
+      VM_VerifyOperandRegRef(target_buffer);
+      VM_VerifyOperandRegI64HostSize(target_offset);
+      VM_VerifyOperandRegI64HostSize(length);
+      VM_VerifyOperandRegF32(value);
+    });
+
+    VERIFY_OP(EXT_F32, BufferLoadF32, {
+      VM_VerifyOperandRegRef(source_buffer);
+      VM_VerifyOperandRegI64HostSize(source_offset);
+      VM_VerifyResultRegF32(result);
+    });
+
+    VERIFY_OP(EXT_F32, BufferStoreF32, {
+      VM_VerifyOperandRegRef(target_buffer);
+      VM_VerifyOperandRegI64HostSize(target_offset);
+      VM_VerifyOperandRegF32(value);
+    });
+
+    END_VERIFY_PREFIX();
+#else
+    UNHANDLED_VERIFY_PREFIX(PrefixExtF32, iree_vm_FeatureBits_EXT_F32);
+#endif  // IREE_VM_EXT_F32_ENABLE
+
+    VERIFY_OP(CORE, PrefixExtF64, {
+      IREE_VM_VERIFY_REQUIREMENT(iree_vm_FeatureBits_EXT_F64);
+      return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+                              "EXT_64 not yet implemented");
+    });
+
+    default:
+      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                              "unrecognized opcode %u", bytecode_data[pc - 1]);
+  }
+
+  *out_next_pc = pc;
+  return iree_ok_status();
+}
diff --git a/runtime/src/iree/vm/bytecode/verifier.h b/runtime/src/iree/vm/bytecode/verifier.h
new file mode 100644
index 0000000..ae7f1af
--- /dev/null
+++ b/runtime/src/iree/vm/bytecode/verifier.h
@@ -0,0 +1,38 @@
+// Copyright 2023 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_VM_BYTECODE_VERIFIER_H_
+#define IREE_VM_BYTECODE_VERIFIER_H_
+
+#include "iree/base/api.h"
+#include "iree/vm/api.h"
+#include "iree/vm/bytecode/module_impl.h"
+
+// Verifies the structure of the FlatBuffer so that we can avoid doing so during
+// runtime. There are still some conditions we must be aware of (such as omitted
+// names on functions with internal linkage), however we shouldn't need to
+// bounds check anything within the FlatBuffer after this succeeds.
+iree_status_t iree_vm_bytecode_module_flatbuffer_verify(
+    iree_const_byte_span_t archive_contents,
+    iree_const_byte_span_t flatbuffer_contents,
+    iree_host_size_t archive_rodata_offset);
+
+// Verifies the bytecode contained within the given |function_ordinal|.
+// Assumes that all information on |module| has been verified and only function
+// information requires verification.
+//
+// NOTE: verification only checks that the function is well-formed and not that
+// it is correct or will execute successfully. The only thing this tries to
+// guarantee is that executing the bytecode won't cause a crash.
+//
+// If verification requires transient allocations for tracking they will be made
+// from |scratch_allocator|. No allocation will live outside of the function and
+// callers may provide stack-based arenas.
+iree_status_t iree_vm_bytecode_function_verify(
+    iree_vm_bytecode_module_t* module, uint16_t function_ordinal,
+    iree_allocator_t scratch_allocator);
+
+#endif  // IREE_VM_BYTECODE_VERIFIER_H_
diff --git a/runtime/src/iree/vm/list.c b/runtime/src/iree/vm/list.c
index d7bdef5..0dafa55 100644
--- a/runtime/src/iree/vm/list.c
+++ b/runtime/src/iree/vm/list.c
@@ -62,8 +62,6 @@
   void* storage;
 };
 
-static iree_vm_ref_type_descriptor_t iree_vm_list_descriptor = {0};
-
 IREE_VM_DEFINE_TYPE_ADAPTERS(iree_vm_list, iree_vm_list_t);
 
 static void iree_vm_list_retain_range(iree_vm_list_t* list,
diff --git a/runtime/src/iree/vm/list_test.cc b/runtime/src/iree/vm/list_test.cc
index e9b1d08..f20bc06 100644
--- a/runtime/src/iree/vm/list_test.cc
+++ b/runtime/src/iree/vm/list_test.cc
@@ -23,7 +23,6 @@
  private:
   float data_ = 1.0f;
 };
-static iree_vm_ref_type_descriptor_t test_a_descriptor = {0};
 IREE_VM_DECLARE_TYPE_ADAPTERS(test_a, A);
 IREE_VM_DEFINE_TYPE_ADAPTERS(test_a, A);
 
@@ -35,7 +34,6 @@
  private:
   int data_ = 2;
 };
-static iree_vm_ref_type_descriptor_t test_b_descriptor = {0};
 IREE_VM_DECLARE_TYPE_ADAPTERS(test_b, B);
 IREE_VM_DEFINE_TYPE_ADAPTERS(test_b, B);
 
diff --git a/runtime/src/iree/vm/module.c b/runtime/src/iree/vm/module.c
index 30b0619..0be0bfa 100644
--- a/runtime/src/iree/vm/module.c
+++ b/runtime/src/iree/vm/module.c
@@ -70,14 +70,14 @@
               break;
             default:
               return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
-                                      "unsupported cconv span type %c",
+                                      "unsupported cconv span type '%c'",
                                       cconv_fragment.data[i]);
           }
         }
       } break;
       default:
         return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
-                                "unsupported cconv type %c",
+                                "unsupported cconv type '%c'",
                                 cconv_fragment.data[i]);
     }
   }
@@ -159,7 +159,7 @@
               break;
             default:
               return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
-                                      "unsupported cconv span type %c",
+                                      "unsupported cconv span type '%c'",
                                       cconv_fragment.data[i]);
           }
         }
@@ -167,7 +167,7 @@
       } break;
       default:
         return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
-                                "unsupported cconv type %c",
+                                "unsupported cconv type '%c'",
                                 cconv_fragment.data[i]);
     }
   }
@@ -317,6 +317,7 @@
 IREE_API_EXPORT iree_string_view_t
 iree_vm_function_name(const iree_vm_function_t* function) {
   IREE_ASSERT_ARGUMENT(function);
+  if (!function->module) return iree_string_view_empty();
   iree_string_view_t name;
   iree_status_t status = function->module->get_function(
       function->module->self, function->linkage, function->ordinal,
diff --git a/runtime/src/iree/vm/ref.h b/runtime/src/iree/vm/ref.h
index aa6e7fe..c2d1678 100644
--- a/runtime/src/iree/vm/ref.h
+++ b/runtime/src/iree/vm/ref.h
@@ -239,23 +239,27 @@
 
 // TODO(benvanik): make these macros standard/document them.
 #define IREE_VM_DECLARE_TYPE_ADAPTERS(name, T)                              \
+  IREE_API_EXPORT_VARIABLE iree_vm_ref_type_descriptor_t name##_descriptor; \
+  static inline iree_vm_ref_type_t name##_type_id() {                       \
+    return name##_descriptor.type;                                          \
+  }                                                                         \
+  static inline bool name##_isa(const iree_vm_ref_t ref) {                  \
+    return name##_descriptor.type == ref.type;                              \
+  }                                                                         \
   IREE_API_EXPORT iree_vm_ref_t name##_retain_ref(T* value);                \
   IREE_API_EXPORT iree_vm_ref_t name##_move_ref(T* value);                  \
-  IREE_API_EXPORT T* name##_deref(const iree_vm_ref_t ref);                 \
+  static inline T* name##_deref(const iree_vm_ref_t ref) {                  \
+    return IREE_LIKELY(name##_isa(ref)) ? (T*)ref.ptr : NULL;               \
+  }                                                                         \
   IREE_API_EXPORT iree_status_t name##_check_deref(const iree_vm_ref_t ref, \
                                                    T** out_ptr);            \
   IREE_API_EXPORT iree_status_t name##_check_deref_or_null(                 \
       const iree_vm_ref_t ref, T** out_ptr);                                \
-  IREE_API_EXPORT const iree_vm_ref_type_descriptor_t*                      \
-      name##_get_descriptor();                                              \
-  static inline bool name##_isa(const iree_vm_ref_t ref) {                  \
-    return name##_get_descriptor()->type == ref.type;                       \
-  }                                                                         \
-  IREE_API_EXPORT iree_vm_ref_type_t name##_type_id();                      \
   IREE_VM_DECLARE_CC_TYPE_LOOKUP(name, T)
 
 // TODO(benvanik): make these macros standard/document them.
 #define IREE_VM_DEFINE_TYPE_ADAPTERS(name, T)                               \
+  iree_vm_ref_type_descriptor_t name##_descriptor = {0};                    \
   IREE_API_EXPORT iree_vm_ref_t name##_retain_ref(T* value) {               \
     iree_vm_ref_t ref = {0};                                                \
     iree_vm_ref_wrap_retain(value, name##_descriptor.type, &ref);           \
@@ -266,13 +270,6 @@
     iree_vm_ref_wrap_assign(value, name##_descriptor.type, &ref);           \
     return ref;                                                             \
   }                                                                         \
-  IREE_API_EXPORT T* name##_deref(const iree_vm_ref_t ref) {                \
-    if (IREE_UNLIKELY(ref.type != ref.type) ||                              \
-        IREE_UNLIKELY(ref.type == IREE_VM_REF_TYPE_NULL)) {                 \
-      return NULL;                                                          \
-    }                                                                       \
-    return (T*)ref.ptr;                                                     \
-  }                                                                         \
   IREE_API_EXPORT iree_status_t name##_check_deref(const iree_vm_ref_t ref, \
                                                    T** out_ptr) {           \
     IREE_RETURN_IF_ERROR(iree_vm_ref_check(ref, name##_descriptor.type));   \
@@ -288,13 +285,6 @@
       *out_ptr = NULL;                                                      \
     }                                                                       \
     return iree_ok_status();                                                \
-  }                                                                         \
-  IREE_API_EXPORT const iree_vm_ref_type_descriptor_t*                      \
-      name##_get_descriptor() {                                             \
-    return &name##_descriptor;                                              \
-  }                                                                         \
-  IREE_API_EXPORT iree_vm_ref_type_t name##_type_id() {                     \
-    return name##_descriptor.type;                                          \
   }
 
 // Optional C++ iree::vm::ref<T> wrapper.
diff --git a/runtime/src/iree/vm/ref_cc.h b/runtime/src/iree/vm/ref_cc.h
index c43494a..f026fa9 100644
--- a/runtime/src/iree/vm/ref_cc.h
+++ b/runtime/src/iree/vm/ref_cc.h
@@ -473,16 +473,16 @@
 // dynamic type registration mechanism and that can be wrapped in an
 // iree_vm_ref_t.
 
-#define IREE_VM_DECLARE_CC_TYPE_LOOKUP(name, T)         \
-  namespace iree {                                      \
-  namespace vm {                                        \
-  template <>                                           \
-  struct ref_type_descriptor<T> {                       \
-    static const iree_vm_ref_type_descriptor_t* get() { \
-      return name##_get_descriptor();                   \
-    }                                                   \
-  };                                                    \
-  }                                                     \
+#define IREE_VM_DECLARE_CC_TYPE_LOOKUP(name, T)                \
+  namespace iree {                                             \
+  namespace vm {                                               \
+  template <>                                                  \
+  struct ref_type_descriptor<T> {                              \
+    static inline const iree_vm_ref_type_descriptor_t* get() { \
+      return &name##_descriptor;                               \
+    }                                                          \
+  };                                                           \
+  }                                                            \
   }
 
 #define IREE_VM_REGISTER_CC_TYPE(type, name, descriptor)  \
diff --git a/runtime/src/iree/vm/stack.c b/runtime/src/iree/vm/stack.c
index 6b39ed2..3d1b3a2 100644
--- a/runtime/src/iree/vm/stack.c
+++ b/runtime/src/iree/vm/stack.c
@@ -16,12 +16,6 @@
 #include "iree/base/tracing.h"
 #include "iree/vm/module.h"
 
-#ifndef NDEBUG
-#define VMCHECK(expr) assert(expr)
-#else
-#define VMCHECK(expr)
-#endif  // NDEBUG
-
 //===----------------------------------------------------------------------===//
 // Stack implementation
 //===----------------------------------------------------------------------===//
@@ -548,7 +542,7 @@
     iree_vm_stack_t* stack, const iree_vm_function_t* function,
     iree_vm_stack_frame_type_t frame_type, iree_host_size_t frame_size,
     iree_vm_stack_frame_cleanup_fn_t frame_cleanup_fn,
-    iree_vm_stack_frame_t** out_callee_frame) {
+    iree_vm_stack_frame_t* IREE_RESTRICT* out_callee_frame) {
   if (out_callee_frame) *out_callee_frame = NULL;
 
   // Allocate stack space and grow stack, if required.
diff --git a/runtime/src/iree/vm/stack.h b/runtime/src/iree/vm/stack.h
index f3aa557..7cb3253 100644
--- a/runtime/src/iree/vm/stack.h
+++ b/runtime/src/iree/vm/stack.h
@@ -307,7 +307,7 @@
     iree_vm_stack_t* stack, const iree_vm_function_t* function,
     iree_vm_stack_frame_type_t frame_type, iree_host_size_t frame_size,
     iree_vm_stack_frame_cleanup_fn_t frame_cleanup_fn,
-    iree_vm_stack_frame_t** out_callee_frame);
+    iree_vm_stack_frame_t* IREE_RESTRICT* out_callee_frame);
 
 // Leaves the current stack frame.
 IREE_API_EXPORT iree_status_t
diff --git a/samples/custom_module/basic/module.cc b/samples/custom_module/basic/module.cc
index 6ba12d8..9f846d0 100644
--- a/samples/custom_module/basic/module.cc
+++ b/samples/custom_module/basic/module.cc
@@ -22,11 +22,6 @@
 // !custom.string type
 //===----------------------------------------------------------------------===//
 
-// Runtime type descriptor for the !custom.string describing how to manage it
-// and destroy it. The type ID is allocated at runtime and does not need to
-// match the compiler ID.
-static iree_vm_ref_type_descriptor_t iree_custom_string_descriptor = {0};
-
 // The "string" type we use to store and retain string data.
 // This could be arbitrarily complex or simply wrap another user-defined type.
 // The descriptor that is registered at startup defines how to manage the
@@ -43,6 +38,9 @@
   iree_string_view_t value;
 } iree_custom_string_t;
 
+// Runtime type descriptor for the !custom.string describing how to manage it
+// and destroy it. The type ID is allocated at runtime and does not need to
+// match the compiler ID.
 IREE_VM_DEFINE_TYPE_ADAPTERS(iree_custom_string, iree_custom_string_t);
 
 extern "C" iree_status_t iree_custom_string_create(
diff --git a/samples/simple_embedding/simple_embedding.c b/samples/simple_embedding/simple_embedding.c
index 3fa0c87..94272de 100644
--- a/samples/simple_embedding/simple_embedding.c
+++ b/samples/simple_embedding/simple_embedding.c
@@ -127,7 +127,7 @@
   // Get the result buffers from the invocation.
   iree_hal_buffer_view_t* ret_buffer_view =
       (iree_hal_buffer_view_t*)iree_vm_list_get_ref_deref(
-          outputs, 0, iree_hal_buffer_view_get_descriptor());
+          outputs, 0, &iree_hal_buffer_view_descriptor);
   if (ret_buffer_view == NULL) {
     return iree_make_status(IREE_STATUS_NOT_FOUND,
                             "can't find return buffer view");
diff --git a/tools/iree-dump-module-main.c b/tools/iree-dump-module-main.c
index 9f4a9a0..c769270 100644
--- a/tools/iree-dump-module-main.c
+++ b/tools/iree-dump-module-main.c
@@ -9,6 +9,7 @@
 #include "iree/base/api.h"
 #include "iree/base/internal/file_io.h"
 #include "iree/schemas/bytecode_module_def_json_printer.h"
+#include "iree/vm/bytecode/archive.h"
 #include "iree/vm/bytecode/module.h"
 
 // Today we just print to JSON. We could do something more useful (size
@@ -28,7 +29,7 @@
                                         &file_contents));
 
   iree_const_byte_span_t flatbuffer_contents = iree_const_byte_span_empty();
-  IREE_CHECK_OK(iree_vm_bytecode_module_parse_header(
+  IREE_CHECK_OK(iree_vm_bytecode_archive_parse_header(
       file_contents->const_buffer, &flatbuffer_contents,
       /*out_rodata_offset=*/NULL));