diff --git a/benchmarks/benchmark.h b/benchmarks/benchmark.h
index 59383fb..16c1c85 100644
--- a/benchmarks/benchmark.h
+++ b/benchmarks/benchmark.h
@@ -17,11 +17,15 @@
 #ifndef BENCHMARKS_BENCHMARK_H_
 #define BENCHMARKS_BENCHMARK_H_
 
+#define ML_RUN_INDICATOR_IO 16
+#define ML_TOGGLE_PER_INF_IO 17
+
 typedef struct {
   uint32_t return_code;
   uint32_t iterations;
   uint64_t cycles;
   uint32_t mismatch_count;
+  uint32_t gpio_toggle_per_inference;
 } BenchmarkOutputHeader;
 
-#endif // #ifndef BENCHMARKS_BENCHMARK_H_
+#endif  // BENCHMARKS_BENCHMARK_H_
diff --git a/benchmarks/benchmark_kelvin.cc b/benchmarks/benchmark_kelvin.cc
index 079f567..7e72835 100644
--- a/benchmarks/benchmark_kelvin.cc
+++ b/benchmarks/benchmark_kelvin.cc
@@ -68,6 +68,7 @@
     .iterations = 0,
     .cycles = 0,
     .mismatch_count = 0,
+    .gpio_toggle_per_inference = 0,
 };
 
 // This includes all ops currently used in the Kelvin model suite. More can be
@@ -167,6 +168,8 @@
 
   // TODO(michaelbrooks): Possibly set/verify test data?
   for (int i = 0; i < iterations; ++i) {
+    output_header.gpio_toggle_per_inference =
+        !output_header.gpio_toggle_per_inference;
 #if (TEST_DATA_INPUT == 1)
     memcpy(tflite::GetTensorData<uint8_t>(input), g_benchmark_input,
            input->bytes);
diff --git a/benchmarks/benchmark_sec.c b/benchmarks/benchmark_sec.c
index be05531..af5c8dd 100644
--- a/benchmarks/benchmark_sec.c
+++ b/benchmarks/benchmark_sec.c
@@ -28,12 +28,13 @@
 #include "sw/device/lib/testing/test_framework/check.h"
 #include "sw/device/lib/testing/test_framework/ottf_test_config.h"
 #include "sw/device/lib/testing/test_framework/test_util.h"
+/* clang-format off */
+#include "benchmarks/benchmark.h"
+/* clang-format on */
 
 #define STRINGIZE(x) #x
 #define STR(x) STRINGIZE(x)
 
-#define TRIGGER_GPIO 16
-
 // In order to include the model data generate from Bazel, include the header
 // using the name passed as a macro. For some reason this binary (vs Kelvin)
 // adds space when concatinating so use the model format -smc_bin.h.
@@ -65,7 +66,9 @@
       CHECK_DIF_OK(dif_tlul_mailbox_irq_acknowledge(&tlul_mailbox,
                                                     kDifTlulMailboxIrqRtirq));
       CHECK_DIF_OK(dif_tlul_mailbox_read_message(&tlul_mailbox, &rx));
-      CHECK_DIF_OK(dif_gpio_write(&gpio, TRIGGER_GPIO, rx));
+      uint32_t pin = rx >> 16;
+      uint32_t value = rx & 0xFFFF;
+      CHECK_DIF_OK(dif_gpio_write(&gpio, pin, value));
       break;
     }
     default:
@@ -88,18 +91,27 @@
   CHECK_DIF_OK(dif_smc_ctrl_init(
       mmio_region_from_addr(TOP_MATCHA_SMC_CTRL_BASE_ADDR), &smc_ctrl));
 
-// PinMux: J52.5 for Sparrow (IOR7) :: PMOD3.7 on Nexus (IOD4)
+// PinMux: Total inference GPIO J52.5/CS  Sparrow (IOR7) :: PMOD3.7 on Nexus
+// (IOD4)
+//           Per inference GPIO J52.7/SCK Sparrow (IOR7) :: PMOD3.8 on Nexus
+//           (IOD5)
 #if defined(MATCHA_SPARROW)
   CHECK_DIF_OK(dif_pinmux_output_select(&pinmux, kTopMatchaPinmuxMioOutIor7,
                                         kTopMatchaPinmuxOutselGpioGpio16));
+  CHECK_DIF_OK(dif_pinmux_output_select(&pinmux, kTopMatchaPinmuxMioOutIoa1,
+                                        kTopMatchaPinmuxOutselGpioGpio17));
 #else
   CHECK_DIF_OK(dif_pinmux_output_select(&pinmux, kTopMatchaPinmuxMioOutIod4,
                                         kTopMatchaPinmuxOutselGpioGpio16));
+  CHECK_DIF_OK(dif_pinmux_output_select(&pinmux, kTopMatchaPinmuxMioOutIod5,
+                                        kTopMatchaPinmuxOutselGpioGpio17));
 #endif
   CHECK_DIF_OK(
       dif_gpio_init(mmio_region_from_addr(TOP_MATCHA_GPIO_BASE_ADDR), &gpio));
-  CHECK_DIF_OK(
-      dif_gpio_output_set_enabled(&gpio, TRIGGER_GPIO, kDifToggleEnabled));
+  CHECK_DIF_OK(dif_gpio_output_set_enabled(&gpio, ML_RUN_INDICATOR_IO,
+                                           kDifToggleEnabled));
+  CHECK_DIF_OK(dif_gpio_output_set_enabled(&gpio, ML_TOGGLE_PER_INF_IO,
+                                           kDifToggleEnabled));
 
   LOG_INFO("Loading Kelvin binary");
   spi_flash_init();
diff --git a/benchmarks/benchmark_smc.c b/benchmarks/benchmark_smc.c
index 887f00c..f290350 100644
--- a/benchmarks/benchmark_smc.c
+++ b/benchmarks/benchmark_smc.c
@@ -119,12 +119,18 @@
   CHECK_DIF_OK(dif_tlul_mailbox_init(
       mmio_region_from_addr(TOP_MATCHA_TLUL_MAILBOX_SMC_BASE_ADDR),
       &tlul_mailbox));
-  {
-    uint32_t msg = 1;
-    CHECK_DIF_OK(dif_tlul_mailbox_send_message(&tlul_mailbox, &msg));
-  }
+
+  BenchmarkOutputHeader* output_header_ptr =
+      (BenchmarkOutputHeader*)((TOP_MATCHA_ML_TOP_DMEM_BASE_ADDR +
+                                TOP_MATCHA_RAM_ML_DMEM_SIZE_BYTES) -
+                               0x40);
 
   LOG_INFO("========== Begin Benchmark (%s) ==========", STR(BENCHMARK_NAME));
+  {
+    uint32_t value = 1;
+    uint32_t tx = ML_RUN_INDICATOR_IO << 16 | value;
+    CHECK_DIF_OK(dif_tlul_mailbox_send_message(&tlul_mailbox, &tx));
+  }
 
   // start kelvin and pulse GPIO for data logger (Kibble)
   ml_top_finish_done = false;
@@ -133,17 +139,21 @@
   CHECK_DIF_OK(dif_ml_top_release_ctrl_en(&ml_top));
 
   // wfi
+  uint32_t gpio_toggle_per_inference_prev = 0;
+  uint32_t tx;
   while (!ml_top_finish_done) {
-    wait_for_interrupt();
+    uint32_t gpio_toggle_per_inference =
+        output_header_ptr->gpio_toggle_per_inference;
+    if (gpio_toggle_per_inference != gpio_toggle_per_inference_prev) {
+      tx = ML_TOGGLE_PER_INF_IO << 16 | gpio_toggle_per_inference;
+      gpio_toggle_per_inference_prev = gpio_toggle_per_inference;
+      CHECK_DIF_OK(dif_tlul_mailbox_send_message(&tlul_mailbox, &tx));
+    }
+    // wait_for_interrupt();
   }
   uint64_t timer_finish;
   CHECK_DIF_OK(dif_rv_timer_counter_read(&rv_timer, 0, &timer_finish));
 
-  BenchmarkOutputHeader* output_header_ptr =
-      (BenchmarkOutputHeader*)((TOP_MATCHA_ML_TOP_DMEM_BASE_ADDR +
-                                TOP_MATCHA_RAM_ML_DMEM_SIZE_BYTES) -
-                              0x40);
-
   if (output_header_ptr->return_code) {
     LOG_FATAL("Kelvin returned an error: %d", output_header_ptr->return_code);
     test_status_set(kTestStatusFailed);
@@ -156,8 +166,9 @@
 
   // End Test Pulse
   {
-    uint32_t msg = 0;
-    CHECK_DIF_OK(dif_tlul_mailbox_send_message(&tlul_mailbox, &msg));
+    uint32_t value = 0;
+    uint32_t tx = ML_RUN_INDICATOR_IO << 16 | value;
+    CHECK_DIF_OK(dif_tlul_mailbox_send_message(&tlul_mailbox, &tx));
   }
 
   LOG_INFO("Iterations: %d", iterations);
