Adding `!input`/`!output`/`!blackboard` support to iree-run-trace.
This allows for trace-wide input and output tensors that are handled
just like iree-run-module. Traces can reference inputs/outputs in place
of any call argument and can set/push outputs in place of any result.
A blackboard is provided for storing temporary values used within the
trace.

Fixes #12525.
Fixes #12526.
diff --git a/build_tools/scripts/run_yamllint.sh b/build_tools/scripts/run_yamllint.sh
index a465af8..979fe0d 100755
--- a/build_tools/scripts/run_yamllint.sh
+++ b/build_tools/scripts/run_yamllint.sh
@@ -21,6 +21,8 @@
 declare -a excluded_files_patterns=(
   "/third_party/"
   "^third_party/"
+  "/tools/test/"
+  "^tools/test/"
 )
 
 # Join on |
diff --git a/runtime/src/iree/tooling/trace_replay.c b/runtime/src/iree/tooling/trace_replay.c
index 64fe7a3..93eadba 100644
--- a/runtime/src/iree/tooling/trace_replay.c
+++ b/runtime/src/iree/tooling/trace_replay.c
@@ -41,11 +41,31 @@
 
   out_replay->driver_registry = driver_registry;
 
-  return iree_ok_status();
+  iree_status_t status = iree_ok_status();
+  if (iree_status_is_ok(status)) {
+    status = iree_vm_list_create(NULL, 8u, host_allocator, &out_replay->inputs);
+  }
+  if (iree_status_is_ok(status)) {
+    status =
+        iree_vm_list_create(NULL, 8u, host_allocator, &out_replay->outputs);
+  }
+  if (iree_status_is_ok(status)) {
+    status =
+        iree_vm_list_create(NULL, 8u, host_allocator, &out_replay->blackboard);
+  }
+
+  if (!iree_status_is_ok(status)) {
+    iree_trace_replay_deinitialize(out_replay,
+                                   IREE_TRACE_REPLAY_SHUTDOWN_QUIET);
+  }
+  return status;
 }
 
 void iree_trace_replay_deinitialize(iree_trace_replay_t* replay,
                                     iree_trace_replay_shutdown_flags_t flags) {
+  iree_vm_list_release(replay->inputs);
+  iree_vm_list_release(replay->outputs);
+  iree_vm_list_release(replay->blackboard);
   iree_vm_context_release(replay->context);
   iree_vm_instance_release(replay->instance);
 
@@ -117,11 +137,11 @@
   IREE_RETURN_IF_ERROR(iree_yaml_mapping_find(document, module_node,
                                               IREE_SV("name"), &name_node));
   if (iree_yaml_string_equal(name_node, IREE_SV("hal"))) {
-    yaml_node_t* driver_node = NULL;
+    yaml_node_t* device_node = NULL;
     IREE_RETURN_IF_ERROR(iree_yaml_mapping_try_find(
-        document, module_node, IREE_SV("driver"), &driver_node));
+        document, module_node, IREE_SV("device"), &device_node));
     IREE_RETURN_IF_ERROR(iree_trace_replay_create_device(
-        replay, driver_node, replay->host_allocator, &replay->device));
+        replay, device_node, replay->host_allocator, &replay->device));
     IREE_RETURN_IF_ERROR(iree_hal_module_create(
         replay->instance, replay->device, IREE_HAL_MODULE_FLAG_NONE,
         replay->host_allocator, &module));
@@ -354,13 +374,123 @@
 }
 
 //===----------------------------------------------------------------------===//
-// Input
+// List I/O macros
 //===----------------------------------------------------------------------===//
 
-static iree_status_t iree_trace_replay_parse_item(iree_trace_replay_t* replay,
-                                                  yaml_document_t* document,
-                                                  yaml_node_t* value_node,
-                                                  iree_vm_list_t* target_list);
+// Parses an I/O macro referencing the replay-global inputs/outputs.
+// If |is_move| is true then the value is consumed from |list| and the original
+// value in the list is reset to NULL.
+//
+// ```yaml
+// !input.get 0
+// !input.take 1
+// !output.get 2
+// ```
+static iree_status_t iree_trace_replay_parse_list_get_macro(
+    iree_trace_replay_t* replay, yaml_document_t* document,
+    yaml_node_t* value_node, iree_vm_list_t* list, bool is_move,
+    iree_vm_variant_t* out_result) {
+  iree_string_view_t value_str = iree_yaml_node_as_string(value_node);
+  int32_t ordinal = 0;
+  if (!iree_string_view_atoi_int32(value_str, &ordinal)) {
+    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                            "failed to parse I/O ordinal from `%.*s`",
+                            (int)value_str.size, value_str.data);
+  }
+  iree_vm_variant_t variant = iree_vm_variant_empty();
+  if (is_move) {
+    IREE_RETURN_IF_ERROR(
+        iree_vm_list_get_variant_move(list, ordinal, &variant));
+  } else {
+    IREE_RETURN_IF_ERROR(
+        iree_vm_list_get_variant_retain(list, ordinal, &variant));
+  }
+  *out_result = variant;
+  return iree_ok_status();
+}
+
+// Parses a list load macro referencing a replay-global |list|.
+//
+// ```yaml
+// # gets |variant| at index 2, leaving it in the list for future use
+// [!input.]get 2
+// # takes |variant| at index 2, clearing the entry in the list
+// [!input.]take 2
+// ```
+static iree_status_t iree_trace_replay_parse_list_load_macro(
+    iree_trace_replay_t* replay, yaml_document_t* document,
+    yaml_node_t* value_node, iree_string_view_t name, iree_vm_list_t* list,
+    iree_vm_variant_t* out_result) {
+  if (iree_string_view_equal(name, IREE_SV("get"))) {
+    return iree_trace_replay_parse_list_get_macro(
+        replay, document, value_node, list,
+        /*is_move=*/false, out_result);
+  } else if (iree_string_view_equal(name, IREE_SV("take"))) {
+    return iree_trace_replay_parse_list_get_macro(replay, document, value_node,
+                                                  list,
+                                                  /*is_move=*/true, out_result);
+  } else if (iree_string_view_equal(name, IREE_SV("pop"))) {
+    iree_host_size_t i = iree_vm_list_size(list) - 1;
+    IREE_RETURN_IF_ERROR(iree_vm_list_get_variant_move(list, i, out_result));
+    return iree_vm_list_resize(list, i);
+  }
+  return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+                          "unsupported list load macro: `%.*s`", (int)name.size,
+                          name.data);
+}
+
+// Parses an output-set macro referencing the replay-global outputs.
+// The provided |variant| value is set at the specified index.
+//
+// ```yaml
+// !output.set 2
+// ```
+static iree_status_t iree_trace_replay_parse_list_set_macro(
+    iree_trace_replay_t* replay, yaml_document_t* document,
+    yaml_node_t* value_node, iree_vm_list_t* list, iree_vm_variant_t variant) {
+  iree_string_view_t value_str = iree_yaml_node_as_string(value_node);
+  int32_t ordinal = 0;
+  if (!iree_string_view_atoi_int32(value_str, &ordinal)) {
+    return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
+                            "failed to parse I/O ordinal from `%.*s`",
+                            (int)value_str.size, value_str.data);
+  }
+  if (iree_vm_list_size(list) <= ordinal) {
+    IREE_RETURN_IF_ERROR(iree_vm_list_resize(list, ordinal + 1));
+  }
+  return iree_vm_list_set_variant_retain(list, ordinal, &variant);
+}
+
+// Parses a list store macro referencing a replay-global |list|.
+//
+// ```yaml
+// # sets |variant| at index 2 in the output list
+// [!output.]set 2
+// # pushes |variant| to the end of the output list
+// [!output.]push
+// ```
+static iree_status_t iree_trace_replay_parse_list_store_macro(
+    iree_trace_replay_t* replay, yaml_document_t* document,
+    yaml_node_t* value_node, iree_string_view_t name, iree_vm_list_t* list,
+    iree_vm_variant_t variant) {
+  if (iree_string_view_equal(name, IREE_SV("set"))) {
+    return iree_trace_replay_parse_list_set_macro(replay, document, value_node,
+                                                  list, variant);
+  } else if (iree_string_view_equal(name, IREE_SV("push"))) {
+    return iree_vm_list_push_variant_retain(list, &variant);
+  }
+  return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
+                          "unsupported list store macro: `%.*s`",
+                          (int)name.size, name.data);
+}
+
+//===----------------------------------------------------------------------===//
+// YAML value parsing
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_trace_replay_parse_item(
+    iree_trace_replay_t* replay, yaml_document_t* document,
+    yaml_node_t* value_node, iree_vm_variant_t* out_result);
 static iree_status_t iree_trace_replay_parse_item_sequence(
     iree_trace_replay_t* replay, yaml_document_t* document,
     yaml_node_t* sequence_node, iree_vm_list_t* target_list);
@@ -372,7 +502,7 @@
 // ```
 static iree_status_t iree_trace_replay_parse_scalar(
     iree_trace_replay_t* replay, yaml_document_t* document,
-    yaml_node_t* value_node, iree_vm_list_t* target_list) {
+    yaml_node_t* value_node, iree_vm_variant_t* out_result) {
   yaml_node_t* data_node = NULL;
   IREE_RETURN_IF_ERROR(iree_yaml_mapping_try_find(document, value_node,
                                                   IREE_SV("i8"), &data_node));
@@ -384,10 +514,9 @@
           IREE_STATUS_INVALID_ARGUMENT, "failed to parse i8 value: '%.*s'",
           (int)data_node->data.scalar.length, data_node->data.scalar.value);
     }
-    iree_vm_variant_t variant = iree_vm_variant_empty();
-    variant.type.value_type = IREE_VM_VALUE_TYPE_I8;
-    variant.i8 = (int8_t)value;
-    return iree_vm_list_push_variant_move(target_list, &variant);
+    *out_result =
+        iree_vm_make_variant_value(iree_vm_value_make_i8((int8_t)value));
+    return iree_ok_status();
   }
   IREE_RETURN_IF_ERROR(iree_yaml_mapping_try_find(document, value_node,
                                                   IREE_SV("i16"), &data_node));
@@ -399,69 +528,66 @@
           IREE_STATUS_INVALID_ARGUMENT, "failed to parse i16 value: '%.*s'",
           (int)data_node->data.scalar.length, data_node->data.scalar.value);
     }
-    iree_vm_variant_t variant = iree_vm_variant_empty();
-    variant.type.value_type = IREE_VM_VALUE_TYPE_I16;
-    variant.i16 = (int16_t)value;
-    return iree_vm_list_push_variant_move(target_list, &variant);
+    *out_result =
+        iree_vm_make_variant_value(iree_vm_value_make_i16((int16_t)value));
+    return iree_ok_status();
   }
   IREE_RETURN_IF_ERROR(iree_yaml_mapping_try_find(document, value_node,
                                                   IREE_SV("i32"), &data_node));
   if (data_node) {
-    iree_vm_variant_t variant = iree_vm_variant_empty();
-    variant.type.value_type = IREE_VM_VALUE_TYPE_I32;
+    int32_t value = 0;
     if (!iree_string_view_atoi_int32(iree_yaml_node_as_string(data_node),
-                                     &variant.i32)) {
+                                     &value)) {
       return iree_make_status(
           IREE_STATUS_INVALID_ARGUMENT, "failed to parse i32 value: '%.*s'",
           (int)data_node->data.scalar.length, data_node->data.scalar.value);
     }
-    return iree_vm_list_push_variant_move(target_list, &variant);
+    *out_result = iree_vm_make_variant_value(iree_vm_value_make_i32(value));
+    return iree_ok_status();
   }
   IREE_RETURN_IF_ERROR(iree_yaml_mapping_try_find(document, value_node,
                                                   IREE_SV("i64"), &data_node));
   if (data_node) {
-    iree_vm_variant_t variant = iree_vm_variant_empty();
-    variant.type.value_type = IREE_VM_VALUE_TYPE_I64;
+    int64_t value = 0;
     if (!iree_string_view_atoi_int64(iree_yaml_node_as_string(data_node),
-                                     &variant.i64)) {
+                                     &value)) {
       return iree_make_status(
           IREE_STATUS_INVALID_ARGUMENT, "failed to parse i64 value: '%.*s'",
           (int)data_node->data.scalar.length, data_node->data.scalar.value);
     }
-    return iree_vm_list_push_variant_move(target_list, &variant);
+    *out_result = iree_vm_make_variant_value(iree_vm_value_make_i64(value));
+    return iree_ok_status();
   }
   IREE_RETURN_IF_ERROR(iree_yaml_mapping_try_find(document, value_node,
                                                   IREE_SV("f32"), &data_node));
   if (data_node) {
-    iree_vm_variant_t variant = iree_vm_variant_empty();
-    variant.type.value_type = IREE_VM_VALUE_TYPE_F32;
-    if (!iree_string_view_atof(iree_yaml_node_as_string(data_node),
-                               &variant.f32)) {
+    float value = 0.0f;
+    if (!iree_string_view_atof(iree_yaml_node_as_string(data_node), &value)) {
       return iree_make_status(
           IREE_STATUS_INVALID_ARGUMENT, "failed to parse f32 value: '%.*s'",
           (int)data_node->data.scalar.length, data_node->data.scalar.value);
     }
-    return iree_vm_list_push_variant_move(target_list, &variant);
+    *out_result = iree_vm_make_variant_value(iree_vm_value_make_f32(value));
+    return iree_ok_status();
   }
   IREE_RETURN_IF_ERROR(iree_yaml_mapping_try_find(document, value_node,
                                                   IREE_SV("f64"), &data_node));
   if (data_node) {
-    iree_vm_variant_t variant = iree_vm_variant_empty();
-    variant.type.value_type = IREE_VM_VALUE_TYPE_F64;
-    if (!iree_string_view_atod(iree_yaml_node_as_string(data_node),
-                               &variant.f64)) {
+    double value = 0.0;
+    if (!iree_string_view_atod(iree_yaml_node_as_string(data_node), &value)) {
       return iree_make_status(
           IREE_STATUS_INVALID_ARGUMENT, "failed to parse f64 value: '%.*s'",
           (int)data_node->data.scalar.length, data_node->data.scalar.value);
     }
-    return iree_vm_list_push_variant_move(target_list, &variant);
+    *out_result = iree_vm_make_variant_value(iree_vm_value_make_f64(value));
+    return iree_ok_status();
   }
   return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
                           "(%zu): unimplemented scalar type parser",
                           value_node->start_mark.line);
 }
 
-// Parses a !vm.list and appends it to |target_list|.
+// Parses a !vm.list into |out_result|.
 //
 // ```yaml
 // items:
@@ -472,7 +598,7 @@
 // ```
 static iree_status_t iree_trace_replay_parse_vm_list(
     iree_trace_replay_t* replay, yaml_document_t* document,
-    yaml_node_t* value_node, iree_vm_list_t* target_list) {
+    yaml_node_t* value_node, iree_vm_variant_t* out_result) {
   if (value_node->type != YAML_MAPPING_NODE) {
     return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
                             "(%zu): expected sequence node for type",
@@ -494,10 +620,8 @@
   }
 
   if (iree_status_is_ok(status)) {
-    iree_vm_ref_t list_ref = iree_vm_list_move_ref(list);
-    status = iree_vm_list_push_ref_move(target_list, &list_ref);
-  }
-  if (!iree_status_is_ok(status)) {
+    *out_result = iree_vm_make_variant_ref_assign(iree_vm_list_move_ref(list));
+  } else {
     iree_vm_list_release(list);
   }
   return status;
@@ -733,7 +857,7 @@
   }
 }
 
-// Parses a !hal.buffer and appends it to |target_list|.
+// Parses a !hal.buffer into |out_result|.
 //
 // ```yaml
 // shape:
@@ -742,7 +866,7 @@
 // ```
 static iree_status_t iree_trace_replay_parse_hal_buffer(
     iree_trace_replay_t* replay, yaml_document_t* document,
-    yaml_node_t* value_node, iree_vm_list_t* target_list) {
+    yaml_node_t* value_node, iree_vm_variant_t* out_result) {
   yaml_node_t* shape_node = NULL;
   IREE_RETURN_IF_ERROR(iree_yaml_mapping_try_find(
       document, value_node, IREE_SV("shape"), &shape_node));
@@ -779,13 +903,12 @@
       },
       allocation_size, iree_const_byte_span_empty(), &buffer));
 
-  iree_vm_ref_t buffer_ref = iree_hal_buffer_move_ref(buffer);
-  iree_status_t status = iree_vm_list_push_ref_move(target_list, &buffer_ref);
-  iree_vm_ref_release(&buffer_ref);
-  return status;
+  *out_result =
+      iree_vm_make_variant_ref_assign(iree_hal_buffer_move_ref(buffer));
+  return iree_ok_status();
 }
 
-// Parses a !hal.buffer_view and appends it to |target_list|.
+// Parses a !hal.buffer_view into |out_result|.
 //
 // ```yaml
 // shape:
@@ -796,7 +919,7 @@
 // ```
 static iree_status_t iree_trace_replay_parse_hal_buffer_view(
     iree_trace_replay_t* replay, yaml_document_t* document,
-    yaml_node_t* value_node, iree_vm_list_t* target_list) {
+    yaml_node_t* value_node, iree_vm_variant_t* out_result) {
   yaml_node_t* shape_node = NULL;
   IREE_RETURN_IF_ERROR(iree_yaml_mapping_try_find(
       document, value_node, IREE_SV("shape"), &shape_node));
@@ -863,14 +986,12 @@
         iree_const_byte_span_empty(), &buffer_view));
   }
 
-  iree_vm_ref_t buffer_view_ref = iree_hal_buffer_view_move_ref(buffer_view);
-  iree_status_t status =
-      iree_vm_list_push_ref_move(target_list, &buffer_view_ref);
-  iree_vm_ref_release(&buffer_view_ref);
-  return status;
+  *out_result = iree_vm_make_variant_ref_assign(
+      iree_hal_buffer_view_move_ref(buffer_view));
+  return iree_ok_status();
 }
 
-// Parses a !hal.buffer in tensor form and appends it to |target_list|.
+// Parses a !hal.buffer in tensor form into |out_result|.
 // The tensor form is used to size and initialize the buffer but then the
 // metadata is thrown away.
 //
@@ -879,35 +1000,35 @@
 // ```
 static iree_status_t iree_trace_replay_parse_inline_hal_buffer(
     iree_trace_replay_t* replay, yaml_document_t* document,
-    yaml_node_t* value_node, iree_vm_list_t* target_list) {
+    yaml_node_t* value_node, iree_vm_variant_t* out_result) {
   iree_hal_buffer_view_t* buffer_view = NULL;
   IREE_RETURN_IF_ERROR(iree_hal_buffer_view_parse(
       iree_yaml_node_as_string(value_node),
       iree_hal_device_allocator(replay->device), &buffer_view));
-  iree_vm_ref_t buffer_ref =
-      iree_hal_buffer_retain_ref(iree_hal_buffer_view_buffer(buffer_view));
-  iree_status_t status = iree_vm_list_push_ref_move(target_list, &buffer_ref);
+  *out_result = iree_vm_make_variant_ref_assign(
+      iree_hal_buffer_retain_ref(iree_hal_buffer_view_buffer(buffer_view)));
   iree_hal_buffer_view_release(buffer_view);
-  return status;
+  return iree_ok_status();
 }
 
-// Parses a !hal.buffer_view in tensor form and appends it to |target_list|.
+// Parses a !hal.buffer_view in tensor form into |out_result|.
 //
 // ```yaml
 // !hal.buffer_view 4xf32=[0 1 2 3]
 // ```
 static iree_status_t iree_trace_replay_parse_inline_hal_buffer_view(
     iree_trace_replay_t* replay, yaml_document_t* document,
-    yaml_node_t* value_node, iree_vm_list_t* target_list) {
+    yaml_node_t* value_node, iree_vm_variant_t* out_result) {
   iree_hal_buffer_view_t* buffer_view = NULL;
   IREE_RETURN_IF_ERROR(iree_hal_buffer_view_parse(
       iree_yaml_node_as_string(value_node),
       iree_hal_device_allocator(replay->device), &buffer_view));
-  iree_vm_ref_t buffer_view_ref = iree_hal_buffer_view_move_ref(buffer_view);
-  return iree_vm_list_push_ref_move(target_list, &buffer_view_ref);
+  *out_result = iree_vm_make_variant_ref_assign(
+      iree_hal_buffer_view_move_ref(buffer_view));
+  return iree_ok_status();
 }
 
-// Parses a typed item from |value_node| and appends it to |target_list|.
+// Parses a typed item from |value_node| into |out_result|.
 //
 // ```yaml
 // type: vm.list
@@ -919,16 +1040,25 @@
 // ```yaml
 // !hal.buffer_view 4xf32=[0 1 2 3]
 // ```
-static iree_status_t iree_trace_replay_parse_item(iree_trace_replay_t* replay,
-                                                  yaml_document_t* document,
-                                                  yaml_node_t* value_node,
-                                                  iree_vm_list_t* target_list) {
-  if (strcmp(value_node->tag, "!hal.buffer") == 0) {
+static iree_status_t iree_trace_replay_parse_item(
+    iree_trace_replay_t* replay, yaml_document_t* document,
+    yaml_node_t* value_node, iree_vm_variant_t* out_result) {
+  iree_string_view_t tag = iree_make_cstring_view(value_node->tag);
+  if (iree_string_view_consume_prefix(&tag, IREE_SV("!input."))) {
+    return iree_trace_replay_parse_list_load_macro(
+        replay, document, value_node, tag, replay->inputs, out_result);
+  } else if (iree_string_view_consume_prefix(&tag, IREE_SV("!output."))) {
+    return iree_trace_replay_parse_list_load_macro(
+        replay, document, value_node, tag, replay->outputs, out_result);
+  } else if (iree_string_view_consume_prefix(&tag, IREE_SV("!blackboard."))) {
+    return iree_trace_replay_parse_list_load_macro(
+        replay, document, value_node, tag, replay->blackboard, out_result);
+  } else if (strcmp(value_node->tag, "!hal.buffer") == 0) {
     return iree_trace_replay_parse_inline_hal_buffer(replay, document,
-                                                     value_node, target_list);
+                                                     value_node, out_result);
   } else if (strcmp(value_node->tag, "!hal.buffer_view") == 0) {
     return iree_trace_replay_parse_inline_hal_buffer_view(
-        replay, document, value_node, target_list);
+        replay, document, value_node, out_result);
   }
 
   yaml_node_t* type_node = NULL;
@@ -936,20 +1066,20 @@
                                               IREE_SV("type"), &type_node));
   iree_string_view_t type = iree_yaml_node_as_string(type_node);
   if (iree_string_view_equal(type, IREE_SV("null"))) {
-    iree_vm_variant_t null_value = iree_vm_variant_empty();
-    return iree_vm_list_push_variant_move(target_list, &null_value);
+    *out_result = iree_vm_variant_empty();
+    return iree_ok_status();
   } else if (iree_string_view_equal(type, IREE_SV("value"))) {
     return iree_trace_replay_parse_scalar(replay, document, value_node,
-                                          target_list);
+                                          out_result);
   } else if (iree_string_view_equal(type, IREE_SV("vm.list"))) {
     return iree_trace_replay_parse_vm_list(replay, document, value_node,
-                                           target_list);
+                                           out_result);
   } else if (iree_string_view_equal(type, IREE_SV("hal.buffer"))) {
     return iree_trace_replay_parse_hal_buffer(replay, document, value_node,
-                                              target_list);
+                                              out_result);
   } else if (iree_string_view_equal(type, IREE_SV("hal.buffer_view"))) {
     return iree_trace_replay_parse_hal_buffer_view(replay, document, value_node,
-                                                   target_list);
+                                                   out_result);
   }
   return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
                           "unimplemented type parser: '%.*s'", (int)type.size,
@@ -963,8 +1093,13 @@
   for (yaml_node_item_t* item = sequence_node->data.sequence.items.start;
        item != sequence_node->data.sequence.items.top; ++item) {
     yaml_node_t* item_node = yaml_document_get_node(document, *item);
+    iree_vm_variant_t variant = iree_vm_variant_empty();
     IREE_RETURN_IF_ERROR(
-        iree_trace_replay_parse_item(replay, document, item_node, target_list));
+        iree_trace_replay_parse_item(replay, document, item_node, &variant));
+    iree_status_t status =
+        iree_vm_list_push_variant_move(target_list, &variant);
+    iree_vm_variant_reset(&variant);
+    IREE_RETURN_IF_ERROR(status);
   }
   return iree_ok_status();
 }
@@ -973,70 +1108,36 @@
 // Output
 //===----------------------------------------------------------------------===//
 
-static iree_status_t iree_trace_replay_print_item(
-    iree_vm_variant_t* value, iree_allocator_t host_allocator);
-
-static iree_status_t iree_trace_replay_print_scalar(iree_vm_variant_t* value) {
-  switch (value->type.value_type) {
-    case IREE_VM_VALUE_TYPE_I8:
-      fprintf(stdout, "i8=%" PRIi8, value->i8);
-      break;
-    case IREE_VM_VALUE_TYPE_I16:
-      fprintf(stdout, "i16=%" PRIi16, value->i16);
-      break;
-    case IREE_VM_VALUE_TYPE_I32:
-      fprintf(stdout, "i32=%" PRIi32, value->i32);
-      break;
-    case IREE_VM_VALUE_TYPE_I64:
-      fprintf(stdout, "i64=%" PRIi64, value->i64);
-      break;
-    case IREE_VM_VALUE_TYPE_F32:
-      fprintf(stdout, "f32=%G", value->f32);
-      break;
-    case IREE_VM_VALUE_TYPE_F64:
-      fprintf(stdout, "f64=%G", value->f64);
-      break;
-    default:
-      fprintf(stdout, "?");
-      break;
+// Parses a single item.
+static iree_status_t iree_trace_replay_parse_result_item(
+    iree_trace_replay_t* replay, yaml_document_t* document,
+    yaml_node_t* item_node, iree_vm_variant_t variant) {
+  iree_string_view_t tag = iree_make_cstring_view(item_node->tag);
+  if (iree_string_view_consume_prefix(&tag, IREE_SV("!output."))) {
+    return iree_trace_replay_parse_list_store_macro(
+        replay, document, item_node, tag, replay->outputs, variant);
+  } else if (iree_string_view_consume_prefix(&tag, IREE_SV("!blackboard."))) {
+    return iree_trace_replay_parse_list_store_macro(
+        replay, document, item_node, tag, replay->blackboard, variant);
   }
+  // NOTE: we ignore other types currently; we could parse them and compare
+  // against the |source_list| values or something.
   return iree_ok_status();
 }
 
-static iree_status_t iree_trace_replay_print_vm_list(
-    iree_vm_list_t* list, iree_allocator_t host_allocator) {
-  for (iree_host_size_t i = 0; i < iree_vm_list_size(list); ++i) {
+// Parses a sequence of items and checks each against |source_list|.
+static iree_status_t iree_trace_replay_parse_result_item_sequence(
+    iree_trace_replay_t* replay, yaml_document_t* document,
+    yaml_node_t* sequence_node, iree_vm_list_t* source_list) {
+  iree_host_size_t i = 0;
+  for (yaml_node_item_t* item = sequence_node->data.sequence.items.start;
+       item != sequence_node->data.sequence.items.top; ++item, ++i) {
     iree_vm_variant_t variant = iree_vm_variant_empty();
-    IREE_RETURN_IF_ERROR(iree_vm_list_get_variant_assign(list, i, &variant),
-                         "variant %zu not present", i);
     IREE_RETURN_IF_ERROR(
-        iree_trace_replay_print_item(&variant, host_allocator));
-    fprintf(stdout, "\n");
-  }
-  return iree_ok_status();
-}
-
-static iree_status_t iree_trace_replay_print_item(
-    iree_vm_variant_t* value, iree_allocator_t host_allocator) {
-  if (iree_vm_variant_is_value(*value)) {
-    IREE_RETURN_IF_ERROR(iree_trace_replay_print_scalar(value));
-  } else if (iree_vm_variant_is_ref(*value)) {
-    if (iree_hal_buffer_view_isa(value->ref)) {
-      iree_hal_buffer_view_t* buffer_view =
-          iree_hal_buffer_view_deref(value->ref);
-      IREE_RETURN_IF_ERROR(iree_hal_buffer_view_fprint(
-          stdout, buffer_view,
-          /*max_element_count=*/1024, host_allocator));
-    } else if (iree_vm_list_isa(value->ref)) {
-      iree_vm_list_t* list = iree_vm_list_deref(value->ref);
-      IREE_RETURN_IF_ERROR(
-          iree_trace_replay_print_vm_list(list, host_allocator));
-    } else {
-      // TODO(benvanik): a way for ref types to describe themselves.
-      fprintf(stdout, "(no printer)");
-    }
-  } else {
-    fprintf(stdout, "(null)");
+        iree_vm_list_get_variant_assign(source_list, i, &variant));
+    yaml_node_t* item_node = yaml_document_get_node(document, *item);
+    IREE_RETURN_IF_ERROR(iree_trace_replay_parse_result_item(
+        replay, document, item_node, variant));
   }
   return iree_ok_status();
 }
@@ -1085,12 +1186,31 @@
   return status;
 }
 
-iree_status_t iree_trace_replay_event_call(iree_trace_replay_t* replay,
-                                           yaml_document_t* document,
-                                           yaml_node_t* event_node,
-                                           iree_vm_list_t** out_output_list) {
+iree_status_t iree_trace_replay_event_call_finish(iree_trace_replay_t* replay,
+                                                  yaml_document_t* document,
+                                                  yaml_node_t* event_node,
+                                                  iree_vm_function_t function,
+                                                  iree_vm_list_t* output_list) {
   IREE_TRACE_ZONE_BEGIN(z0);
-  if (out_output_list) *out_output_list = NULL;
+
+  yaml_node_t* results_node = NULL;
+  IREE_RETURN_AND_END_ZONE_IF_ERROR(
+      z0, iree_yaml_mapping_try_find(document, event_node, IREE_SV("results"),
+                                     &results_node));
+  if (results_node) {
+    IREE_RETURN_AND_END_ZONE_IF_ERROR(
+        z0, iree_trace_replay_parse_result_item_sequence(
+                replay, document, results_node, output_list));
+  }
+
+  IREE_TRACE_ZONE_END(z0);
+  return iree_ok_status();
+}
+
+iree_status_t iree_trace_replay_event_call(
+    iree_trace_replay_t* replay, yaml_document_t* document,
+    yaml_node_t* event_node, const iree_trace_replay_call_hooks_t* hooks) {
+  IREE_TRACE_ZONE_BEGIN(z0);
 
   iree_vm_function_t function;
   iree_vm_list_t* input_list = NULL;
@@ -1098,62 +1218,91 @@
       z0, iree_trace_replay_event_call_prepare(replay, document, event_node,
                                                &function, &input_list));
 
-  // Invoke the function to produce outputs.
+  iree_status_t status = iree_ok_status();
+  if (hooks && hooks->before) {
+    status = hooks->before(hooks->user_data, replay, document, event_node,
+                           function, input_list);
+  }
+
   iree_vm_list_t* output_list = NULL;
-  iree_status_t status =
-      iree_vm_list_create(/*element_type=*/NULL, /*initial_capacity=*/8,
-                          replay->host_allocator, &output_list);
   if (iree_status_is_ok(status)) {
-    status = iree_vm_invoke(replay->context, function,
-                            IREE_VM_INVOCATION_FLAG_NONE, /*policy=*/NULL,
-                            input_list, output_list, replay->host_allocator);
+    status = iree_vm_list_create(/*element_type=*/NULL, /*initial_capacity=*/8,
+                                 replay->host_allocator, &output_list);
+  }
+
+  // Invoke the function to produce outputs.
+  iree_status_t call_status = iree_ok_status();
+  if (iree_status_is_ok(status)) {
+    call_status = iree_vm_invoke(
+        replay->context, function, IREE_VM_INVOCATION_FLAG_NONE,
+        /*policy=*/NULL, input_list, output_list, replay->host_allocator);
   }
   iree_vm_list_release(input_list);
 
-  if (iree_status_is_ok(status) && out_output_list) {
-    *out_output_list = output_list;
-  } else {
-    iree_vm_list_release(output_list);
+  if (!iree_status_is_ok(call_status)) {
+    if (hooks && hooks->error) {
+      status = hooks->error(hooks->user_data, replay, document, event_node,
+                            function, call_status);
+    } else {
+      status = call_status;
+    }
+  } else if (hooks && hooks->after) {
+    status = hooks->after(hooks->user_data, replay, document, event_node,
+                          function, output_list);
   }
+
+  if (iree_status_is_ok(status)) {
+    status = iree_trace_replay_event_call_finish(replay, document, event_node,
+                                                 function, output_list);
+  }
+  iree_vm_list_release(output_list);
+
   IREE_TRACE_ZONE_END(z0);
   return status;
 }
 
-static iree_status_t iree_trace_replay_event_call_stdout(
+//===----------------------------------------------------------------------===//
+// Blackboard management
+//===----------------------------------------------------------------------===//
+
+static iree_status_t iree_trace_replay_event_blackboard_clear(
     iree_trace_replay_t* replay, yaml_document_t* document,
     yaml_node_t* event_node) {
-  yaml_node_t* function_node = NULL;
-  IREE_RETURN_IF_ERROR(iree_yaml_mapping_find(
-      document, event_node, IREE_SV("function"), &function_node));
-  iree_string_view_t function_name = iree_yaml_node_as_string(function_node);
-  fprintf(stdout, "--- CALL[%.*s] ---\n", (int)function_name.size,
-          function_name.data);
+  IREE_TRACE_ZONE_BEGIN(z0);
+  iree_vm_list_clear(replay->blackboard);
+  IREE_TRACE_ZONE_END(z0);
+  return iree_ok_status();
+}
 
-  // Prepare to call the function.
-  iree_vm_function_t function;
-  iree_vm_list_t* input_list = NULL;
-  IREE_RETURN_IF_ERROR(iree_trace_replay_event_call_prepare(
-      replay, document, event_node, &function, &input_list));
+static iree_status_t iree_trace_replay_event_blackboard_assign(
+    iree_trace_replay_t* replay, yaml_document_t* document,
+    yaml_node_t* event_node) {
+  IREE_TRACE_ZONE_BEGIN(z0);
 
-  // Invoke the function to produce outputs.
-  iree_vm_list_t* output_list = NULL;
+  yaml_node_t* from_node = NULL;
+  IREE_RETURN_AND_END_ZONE_IF_ERROR(
+      z0, iree_yaml_mapping_find(document, event_node, IREE_SV("from"),
+                                 &from_node));
+  yaml_node_t* to_node = NULL;
+  IREE_RETURN_AND_END_ZONE_IF_ERROR(
+      z0,
+      iree_yaml_mapping_find(document, event_node, IREE_SV("to"), &to_node));
+
+  iree_vm_list_t* list = NULL;
+  IREE_RETURN_AND_END_ZONE_IF_ERROR(
+      z0, iree_vm_list_create(/*element_type=*/NULL, 8u, replay->host_allocator,
+                              &list));
+
   iree_status_t status =
-      iree_vm_list_create(/*element_type=*/NULL, /*initial_capacity=*/8,
-                          replay->host_allocator, &output_list);
+      iree_trace_replay_parse_item_sequence(replay, document, from_node, list);
   if (iree_status_is_ok(status)) {
-    status = iree_vm_invoke(replay->context, function,
-                            IREE_VM_INVOCATION_FLAG_NONE, /*policy=*/NULL,
-                            input_list, output_list, replay->host_allocator);
+    status = iree_trace_replay_parse_result_item_sequence(replay, document,
+                                                          to_node, list);
   }
-  iree_vm_list_release(input_list);
 
-  // Print the outputs.
-  if (iree_status_is_ok(status)) {
-    status =
-        iree_trace_replay_print_vm_list(output_list, replay->host_allocator);
-  }
-  iree_vm_list_release(output_list);
+  iree_vm_list_release(list);
 
+  IREE_TRACE_ZONE_END(z0);
   return status;
 }
 
@@ -1176,8 +1325,15 @@
     return iree_trace_replay_event_context_load(replay, document, event_node);
   } else if (iree_yaml_string_equal(type_node, IREE_SV("module_load"))) {
     return iree_trace_replay_event_module_load(replay, document, event_node);
+  } else if (iree_yaml_string_equal(type_node, IREE_SV("blackboard_clear"))) {
+    return iree_trace_replay_event_blackboard_clear(replay, document,
+                                                    event_node);
+  } else if (iree_yaml_string_equal(type_node, IREE_SV("assign"))) {
+    return iree_trace_replay_event_blackboard_assign(replay, document,
+                                                     event_node);
   } else if (iree_yaml_string_equal(type_node, IREE_SV("call"))) {
-    return iree_trace_replay_event_call_stdout(replay, document, event_node);
+    return iree_trace_replay_event_call(replay, document, event_node,
+                                        &replay->call_hooks);
   }
   return iree_make_status(
       IREE_STATUS_UNIMPLEMENTED, "(%zu): unhandled type '%.*s'",
diff --git a/runtime/src/iree/tooling/trace_replay.h b/runtime/src/iree/tooling/trace_replay.h
index db2de14..ddc240c 100644
--- a/runtime/src/iree/tooling/trace_replay.h
+++ b/runtime/src/iree/tooling/trace_replay.h
@@ -16,12 +16,37 @@
 extern "C" {
 #endif  // __cplusplus
 
+typedef struct iree_trace_replay_t iree_trace_replay_t;
+
 enum iree_trace_replay_shutdown_flag_bits_e {
   IREE_TRACE_REPLAY_SHUTDOWN_QUIET = 0u,
   IREE_TRACE_REPLAY_SHUTDOWN_PRINT_STATISTICS = 1 << 0u,
 };
 typedef uint32_t iree_trace_replay_shutdown_flags_t;
 
+// Optional set of callbacks around a replay event function call.
+// Functions not required by the caller may be omitted.
+typedef struct iree_trace_replay_call_hooks_t {
+  // User context passed to each callback.
+  void* user_data;
+  // Issued before the call begins with the call inputs.
+  iree_status_t (*before)(void* user_data, iree_trace_replay_t* replay,
+                          yaml_document_t* document, yaml_node_t* event_node,
+                          iree_vm_function_t function,
+                          iree_vm_list_t* input_list);
+  // Issued after the call completes successfully with the call outputs.
+  iree_status_t (*after)(void* user_data, iree_trace_replay_t* replay,
+                         yaml_document_t* document, yaml_node_t* event_node,
+                         iree_vm_function_t function,
+                         iree_vm_list_t* output_list);
+  // Issued only when the call fails and not the replay operation itself.
+  // |status| is as returned from the call and ownership is transferred to the
+  // hook.
+  iree_status_t (*error)(void* user_data, iree_trace_replay_t* replay,
+                         yaml_document_t* document, yaml_node_t* event_node,
+                         iree_vm_function_t function, iree_status_t status);
+} iree_trace_replay_call_hooks_t;
+
 typedef struct iree_trace_replay_t {
   iree_allocator_t host_allocator;
   iree_string_view_t root_path;
@@ -33,8 +58,21 @@
   iree_host_size_t device_uri_count;
   const iree_string_view_t* device_uris;
 
+  // Context used within the replay, modules registered on-demand.
   iree_vm_context_t* context;
+
+  // Active HAL device if any. Will be initialized on the first HAL module load.
   iree_hal_device_t* device;
+
+  // Optional inputs available via `!input.get`/`!input.take`.
+  iree_vm_list_t* inputs;
+  // Optional outputs populated via `!output.set`/`!output.push`.
+  iree_vm_list_t* outputs;
+  // Blackboard used to track state within the trace.
+  iree_vm_list_t* blackboard;
+
+  // Optional call hooks allowing reflection of calls and their I/O.
+  iree_trace_replay_call_hooks_t call_hooks;
 } iree_trace_replay_t;
 
 // Initializes a trace replay context.
@@ -82,12 +120,11 @@
     iree_vm_list_t** out_input_list);
 
 // Replays a `call` event against the replay context.
-// Optionally |out_output_list| can be populated with a caller-owned set of
-// outputs from the call.
-iree_status_t iree_trace_replay_event_call(iree_trace_replay_t* replay,
-                                           yaml_document_t* document,
-                                           yaml_node_t* event_node,
-                                           iree_vm_list_t** out_output_list);
+// Optionally |hooks| may be specified to inspect the inputs and outputs of the
+// call operation.
+iree_status_t iree_trace_replay_event_call(
+    iree_trace_replay_t* replay, yaml_document_t* document,
+    yaml_node_t* event_node, const iree_trace_replay_call_hooks_t* hooks);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/tools/BUILD b/tools/BUILD
index a6bdb90..fff940c 100644
--- a/tools/BUILD
+++ b/tools/BUILD
@@ -183,6 +183,7 @@
         "//runtime/src/iree/hal",
         "//runtime/src/iree/tooling:device_util",
         "//runtime/src/iree/tooling:trace_replay",
+        "//runtime/src/iree/tooling:vm_util",
         "//runtime/src/iree/tooling:yaml_util",
         "//runtime/src/iree/vm",
         "@com_github_yaml_libyaml//:yaml",
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 7d37b6c..ea8d47c 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -161,6 +161,7 @@
     iree::modules::hal
     iree::tooling::device_util
     iree::tooling::trace_replay
+    iree::tooling::vm_util
     iree::tooling::yaml_util
     iree::vm
     yaml
diff --git a/tools/iree-run-trace-main.c b/tools/iree-run-trace-main.c
index 57a2053..56c66ca 100644
--- a/tools/iree-run-trace-main.c
+++ b/tools/iree-run-trace-main.c
@@ -14,6 +14,7 @@
 #include "iree/hal/api.h"
 #include "iree/tooling/device_util.h"
 #include "iree/tooling/trace_replay.h"
+#include "iree/tooling/vm_util.h"
 #include "iree/tooling/yaml_util.h"
 #include "iree/vm/api.h"
 
@@ -22,6 +23,102 @@
 IREE_FLAG(bool, print_statistics, false,
           "Prints runtime statistics to stderr on exit.");
 
+IREE_FLAG(bool, print_calls, false, "Prints all I/O for each call to stdout.");
+IREE_FLAG(bool, print_call_inputs, false,
+          "Prints all inputs for each call before they are made to stdout.");
+IREE_FLAG(bool, print_call_outputs, false,
+          "Prints all outputs for each call after they are made to stdout.");
+
+IREE_FLAG_LIST(
+    string, input,
+    "An input (a) value or (b) buffer of the format:\n"
+    "  (a) scalar value\n"
+    "     value\n"
+    "     e.g.: --input=\"3.14\"\n"
+    "  (b) buffer:\n"
+    "     [shape]xtype=[value]\n"
+    "     e.g.: --input=\"2x2xi32=1 2 3 4\"\n"
+    "Optionally, brackets may be used to separate the element values:\n"
+    "  2x2xi32=[[1 2][3 4]]\n"
+    "Raw binary files can be read to provide buffer contents:\n"
+    "  2x2xi32=@some/file.bin\n"
+    "\n"
+    "Numpy npy files from numpy.save can be read to provide 1+ values:\n"
+    "  @some.npy\n"
+    "\n"
+    "Each occurrence of the flag indicates an input in the order they were\n"
+    "specified on the command line.");
+
+IREE_FLAG_LIST(
+    string, output,
+    "Specifies how to handle an output from the invocation:\n"
+    "  `` (empty): ignore output\n"
+    "     e.g.: --output=\n"
+    "  `-`: print textual form to stdout\n"
+    "     e.g.: --output=-\n"
+    "  `@file.npy`: create/overwrite a numpy npy file and write buffer view\n"
+    "     e.g.: --output=@file.npy\n"
+    "  `+file.npy`: create/append a numpy npy file and write buffer view\n"
+    "     e.g.: --output=+file.npy\n"
+    "\n"
+    "Numpy npy files can be read in Python using numpy.load, for example an\n"
+    "invocation producing two outputs can be concatenated as:\n"
+    "    --output=@file.npy --output=+file.npy\n"
+    "And then loaded in Python by reading from the same file:\n"
+    "  with open('file.npy', 'rb') as f:\n"
+    "    print(numpy.load(f))\n"
+    "    print(numpy.load(f))\n"
+    "\n"
+    "Each occurrence of the flag indicates an output in the order they were\n"
+    "specified on the command line.");
+
+IREE_FLAG_LIST(string, expected_output,
+               "An expected function output following the same format as "
+               "--input. When present the results of the "
+               "invocation will be compared against these values and the "
+               "tool will return non-zero if any differ. If the value of a "
+               "particular output is not of interest provide `(ignored)`.");
+
+IREE_FLAG(int32_t, output_max_element_count, 1024,
+          "Prints up to the maximum number of elements of output tensors, "
+          "eliding the remainder.");
+
+static iree_status_t iree_trace_replay_call_before(void* user_data,
+                                                   iree_trace_replay_t* replay,
+                                                   yaml_document_t* document,
+                                                   yaml_node_t* event_node,
+                                                   iree_vm_function_t function,
+                                                   iree_vm_list_t* input_list) {
+  if (FLAG_print_calls || FLAG_print_call_inputs) {
+    iree_string_view_t function_name = iree_vm_function_name(&function);
+    fprintf(stdout, "--- CALL[%.*s] ---\n", (int)function_name.size,
+            function_name.data);
+    IREE_RETURN_IF_ERROR(iree_tooling_variant_list_fprint(
+        IREE_SV("arg"), input_list,
+        (iree_host_size_t)FLAG_output_max_element_count, stdout));
+  }
+  return iree_ok_status();
+}
+
+static iree_status_t iree_trace_replay_call_after(void* user_data,
+                                                  iree_trace_replay_t* replay,
+                                                  yaml_document_t* document,
+                                                  yaml_node_t* event_node,
+                                                  iree_vm_function_t function,
+                                                  iree_vm_list_t* output_list) {
+  if (FLAG_print_calls || FLAG_print_call_outputs) {
+    if (!FLAG_print_calls && !FLAG_print_call_inputs) {
+      iree_string_view_t function_name = iree_vm_function_name(&function);
+      fprintf(stdout, "--- CALL[%.*s] ---\n", (int)function_name.size,
+              function_name.data);
+    }
+    IREE_RETURN_IF_ERROR(iree_tooling_variant_list_fprint(
+        IREE_SV("result"), output_list,
+        (iree_host_size_t)FLAG_output_max_element_count, stdout));
+  }
+  return iree_ok_status();
+}
+
 // Runs the trace in |file| using |root_path| as the base for any path lookups
 // required for external files referenced in |file|.
 static iree_status_t iree_run_trace_file(iree_string_view_t root_path,
@@ -34,6 +131,11 @@
                            : IREE_VM_CONTEXT_FLAG_NONE,
       iree_hal_available_driver_registry(), iree_allocator_system(), &replay));
 
+  // Hook into all calls processed during the trace.
+  replay.call_hooks.user_data = NULL;
+  replay.call_hooks.before = iree_trace_replay_call_before;
+  replay.call_hooks.after = iree_trace_replay_call_after;
+
   // Query device overrides, if any. When omitted the devices from the trace
   // file will be used.
   // TODO(#5724): remove this and instead provide a device set on initialize.
@@ -51,24 +153,59 @@
   }
   yaml_parser_set_input_file(&parser, file);
 
+  bool have_parsed_inputs = false;
   iree_status_t status = iree_ok_status();
   for (bool document_eof = false; !document_eof;) {
+    // Parse the subdocument event.
     yaml_document_t document;
     if (!yaml_parser_load(&parser, &document)) {
       status = iree_status_from_yaml_parser_error(&parser);
       break;
     }
+
+    // Execute the event or handle EOF (empty document).
     yaml_node_t* event_node = yaml_document_get_root_node(&document);
     if (event_node) {
       status = iree_trace_replay_event(&replay, &document, event_node);
     } else {
       document_eof = true;
     }
+
+    // Reclaim subdocument resources before moving on to the next.
     yaml_document_delete(&document);
     if (!iree_status_is_ok(status)) break;
+
+    // If the event created a device and we haven't yet performed our input
+    // loading we can do that now before processing subsequent events.
+    if (!have_parsed_inputs && replay.device) {
+      status = iree_tooling_parse_into_variant_list(
+          iree_hal_device_allocator(replay.device), FLAG_input_list().values,
+          FLAG_input_list().count, replay.host_allocator, replay.inputs);
+      have_parsed_inputs = true;
+    }
+    if (!iree_status_is_ok(status)) break;
   }
 
   yaml_parser_delete(&parser);
+
+  // Optionally process outputs from the replay session.
+  if (iree_status_is_ok(status)) {
+    if (FLAG_output_list().count == 0) {
+      IREE_RETURN_IF_ERROR(
+          iree_tooling_variant_list_fprint(
+              IREE_SV("output"), replay.outputs,
+              (iree_host_size_t)FLAG_output_max_element_count, stdout),
+          "printing results");
+    } else {
+      IREE_RETURN_IF_ERROR(
+          iree_tooling_output_variant_list(
+              replay.outputs, FLAG_output_list().values,
+              FLAG_output_list().count,
+              (iree_host_size_t)FLAG_output_max_element_count, stdout),
+          "outputting results");
+    }
+  }
+
   iree_trace_replay_deinitialize(
       &replay, FLAG_print_statistics
                    ? IREE_TRACE_REPLAY_SHUTDOWN_PRINT_STATISTICS
@@ -97,6 +234,114 @@
 }
 
 int main(int argc, char** argv) {
+  iree_flags_set_usage(
+      "iree-run-trace",
+      "Executes a YAML trace file containing a sequence of context operations\n"
+      "and calls represented as subdocuments.\n"
+      "\n"
+      "Example loading a bytecode module and calling a function:\n"
+      "\n"
+      "```yaml\n"
+      "type: context_load\n"
+      "---\n"
+      "type: module_load\n"
+      "module:\n"
+      "  type: buildin\n"
+      "  name: hal\n"
+      "---\n"
+      "type: module_load\n"
+      "module:\n"
+      "  type: bytecode\n"
+      "  path: ../build/some_module.vmfb\n"
+      "---\n"
+      "type: call\n"
+      "function: module.mul\n"
+      "args:\n"
+      "- !input.take 0\n"
+      "- !input.take 1\n"
+      "results:\n"
+      "- !output.push\n"
+      "- !output.push\n"
+      "```\n"
+      "\n"
+      "This can be invoked like iree-run-module specifying inputs/outputs:\n"
+      "  iree-run-trace trace.yml    \\\n"
+      "      --device=local-sync     \\\n"
+      "      --input=4xf32=0,1,2,3,4 \\\n"
+      "      --input=@input1.npy     \\\n"
+      "      --output=@outputs.npy   \\\n"
+      "      --output=+outputs.npy\n"
+      "\n"
+      "In addition to `--input=`/`--output=` flag access a user-defined\n"
+      "blackboard exists for preserving temporary values used within the\n"
+      "trace. Blackboard slots are defined by ordinal and they can be used\n"
+      "in any context and input/output can be, `!blackboard.get` instead of\n"
+      "`!input.get` and `!blackboard.set` instead of `!output.set`.\n"
+      "\n"
+      "--- Events ---\n"
+      "\n"
+      "`type: context_load`\n"
+      "Loads an empty VM context with no modules registered.\n"
+      "\n"
+      "`type: module_load`\n"
+      "Loads a module into the current context. Modules may either be\n"
+      "`builtin` (compiled into the binary) or dynamically-loaded `bytecode`.\n"
+      "\n"
+      "`type: blackboard_clear`\n"
+      "Clears the contents of the blackboard and resets it to 0 elements.\n"
+      "\n"
+      "`type: assign`\n"
+      "Assigns sources from a `from` sequence to targets in a `to` sequence.\n"
+      "Equivalent to an identity function call and can be used to move\n"
+      "between inputs, outputs, and the blackboard.\n"
+      "\n"
+      "`type: call`\n"
+      "Invokes a function in the context by fully-qualified `function` name.\n"
+      "Uses arguments from an `args` sequence and produces results into a\n"
+      "`results` sequence.\n"
+      "\n"
+      "--- Sources ---\n"
+      "\n"
+      "`type: null`\n"
+      "A null ref value.\n"
+      "\n"
+      "`!hal.buffer_view 4xf32=0,1,2,3`\n"
+      "A constant iree_hal_buffer_view_t/!hal.buffer_view value using the\n"
+      "same formatting as iree-run-module's `--input=` flag.\n"
+      "\n"
+      "`!hal.buffer 4xf32=0,1,2,3`\n"
+      "An initialized iree_hal_buffer_t/!hal.buffer without the wrapping view\n"
+      "metadata.\n"
+      "\n"
+      "`!input.get ORDINAL` / `!input.take ORDINAL`\n"
+      "Returns a reference to `--input=` flag at ORDINAL. Note that a single\n"
+      "npy file may expand to multiple inputs. The `take` variant transfers\n"
+      "ownership and clears the slot in the list and is recommended to avoid\n"
+      "keeping unneeded inputs around for the duration of the trace.\n"
+      "\n"
+      "`!output.get ORDINAL` / `!output.take ORDINAL`\n"
+      "Returns a reference to the `--output=` flag at ORDINAL. These are\n"
+      "initially empty until assigned by the trace.\n"
+      "\n"
+      "`!blackboard.get ORDINAL` / `!blackboard.take ORDINAL`\n"
+      "Returns a reference to the blackboard slot ORDINAL. The blackboard is\n"
+      "initially empty and slots must be assigned in order to define them.\n"
+      "The `take` variant transfers ownership and clears the slot in the\n"
+      "blackboard and is recommended to avoid keeping large resources live\n"
+      "in the blackboard longer than they need to be.\n"
+      "\n"
+      "--- Targets ---\n"
+      "\n"
+      "`!output.set ORDINAL` / `!output.push`\n"
+      "Sets the `--output=` flag result value at ORDINAL or pushes it to the\n"
+      "back of the output list. Outputs can either be dumped to files or by\n"
+      "default printed to stdout.\n"
+      "\n"
+      "`!blackboard.set ORDINAL` / `blackboard.push`\n"
+      "Sets the value of the blackboard slot ORDINAL or pushes it to the back\n"
+      "of the blackboard list. Blackboard values will be retained until they\n"
+      "are consumed via `!blackboard.take` or the blackboard is cleared.\n"
+      "\n");
   iree_flags_parse_checked(IREE_FLAGS_PARSE_MODE_DEFAULT, &argc, &argv);
   if (argc <= 1) {
     fprintf(stderr,
diff --git a/tools/test/BUILD b/tools/test/BUILD
index f7bf4f8..af0a150 100644
--- a/tools/test/BUILD
+++ b/tools/test/BUILD
@@ -28,6 +28,7 @@
             "iree-run-module.mlir",
             "iree-run-module-expected.mlir",
             "iree-run-module-outputs.mlir",
+            "iree-run-trace.mlir",
             "multiple_args.mlir",
             "multiple_exported_functions.mlir",
             "null_values.mlir",
@@ -39,6 +40,7 @@
     cfg = "//tools:lit.cfg.py",
     data = [
         "echo_npy.py",
+        "iree-run-trace.yml",
     ],
     tags = [
         "driver=local-task",
@@ -51,6 +53,7 @@
         "//tools:iree-opt",
         "//tools:iree-run-mlir",
         "//tools:iree-run-module",
+        "//tools:iree-run-trace",
         "@llvm-project//lld",
         "@llvm-project//llvm:FileCheck",
         "@llvm-project//llvm:not",
diff --git a/tools/test/CMakeLists.txt b/tools/test/CMakeLists.txt
index 7c92d9d..305345a 100644
--- a/tools/test/CMakeLists.txt
+++ b/tools/test/CMakeLists.txt
@@ -24,6 +24,7 @@
     "iree-run-module-expected.mlir"
     "iree-run-module-outputs.mlir"
     "iree-run-module.mlir"
+    "iree-run-trace.mlir"
     "multiple_args.mlir"
     "multiple_exported_functions.mlir"
     "null_values.mlir"
@@ -37,9 +38,11 @@
     iree-opt
     iree-run-mlir
     iree-run-module
+    iree-run-trace
     not
   DATA
     echo_npy.py
+    iree-run-trace.yml
   LABELS
     "driver=local-task"
     "driver=vulkan"
diff --git a/tools/test/iree-run-trace.mlir b/tools/test/iree-run-trace.mlir
new file mode 100644
index 0000000..ad898e7
--- /dev/null
+++ b/tools/test/iree-run-trace.mlir
@@ -0,0 +1,22 @@
+// Tests iree-run-trace usage by running two calls of @mul and passing the
+// result between them. The outputs of both calls are produced as outputs from
+// the trace and both are written to a .npy file for processing. Inputs can
+// also come from an .npy file. See iree-run-module usage for more information
+// on the `--input=` and `--output=` flags.
+
+// RUN: (iree-compile --iree-hal-target-backends=vmvx %s | \
+// RUN:  iree-run-trace %S/iree-run-trace.yml \
+// RUN:                 --device=local-sync \
+// RUN:                 --input=4xf32=4,4,4,4 \
+// RUN:                 --output=@%t \
+// RUN:                 --output=+%t) && \
+// RUN:  python3 %S/echo_npy.py %t | \
+// RUN: FileCheck %s
+
+//      CHECK{LITERAL}: [ 0. 4. 8. 12.]
+// CHECK-NEXT{LITERAL}: [ 0. 12. 24. 36.]
+
+func.func @mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> {
+  %0 = arith.mulf %arg0, %arg1 : tensor<4xf32>
+  return %0 : tensor<4xf32>
+}
diff --git a/tools/test/iree-run-trace.yml b/tools/test/iree-run-trace.yml
new file mode 100644
index 0000000..b267dc2
--- /dev/null
+++ b/tools/test/iree-run-trace.yml
@@ -0,0 +1,80 @@
+# Copyright 2023 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Tests loading and executing a bytecode module and issuing a few calls showing
+# how to take input, produce output, and support temporary values within the
+# trace session. See iree-run-trace.mlir for how to compile the module and
+# invoke the iree-run-trace tool.
+
+# Prepare the VM context for use; effectively a reset.
+# API: iree_vm_context_create
+type: context_load
+
+---
+
+# Load the builtin HAL module used to execute the program.
+# API: iree_hal_module_create
+type: module_load
+module:
+  type: builtin
+  name: hal
+
+---
+
+# Load the compiled bytecode module.
+# API: iree_vm_bytecode_module_create
+type: module_load
+module:
+  type: bytecode
+  name: module
+  # The test pulls the .vmfb from stdin but you can also reference relative or
+  # absolute file paths:
+  #   path: ../iree-tmp/iree-run-trace.vmfb
+  path: <stdin>
+
+---
+
+# Call #0 of @mul.
+# API: iree_vm_invoke
+type: call
+function: module.mul
+args:
+# arg[0]: the first `--input=` buffer. !input.get would retain the input for
+# other calls to use but otherwise prefer taking ownership.
+- !input.take 0
+# arg[1]: constant value defined inline.
+- !hal.buffer_view 4xf32=0,1,2,3
+results:
+# result[0]: store in blackboard slot 4 for later use.
+- !blackboard.set 4
+
+---
+
+# Assigns one or more source values to a set of target values.
+# Effectively: outputs.push(retain(blackboard[4]))
+type: assign
+from:
+# from[0]: retain blackboard slot 4, leaving it for later use.
+- !blackboard.get 4
+to:
+# to[0]: push on to the trace output list. --output= can save off the results
+# and otherwise they are printed to stdout.
+- !output.push
+
+---
+
+# Call #1 of @mul.
+# API: iree_vm_invoke
+type: call
+function: module.mul
+args:
+# arg[0]: take the previously-stored value in blackboard slot 4.
+- !blackboard.take 4
+# arg[1]: another constant.
+- !hal.buffer_view 4xf32=3,3,3,3
+results:
+# result[0]: push on to the trace output list.
+- !output.push