Toggle GPIO for Each Inference of a Benchmark

Toggle GPIO for Each Inference of a Benchmark. This is in addition to
the GPIO that goes high for the entire duration of the benchmark. This
can be used to better evaluate inference power, and ML core operation.

Change-Id: Ib6e592b564df83dfd0855e28074f5eea80e7c4d9
diff --git a/benchmarks/benchmark.h b/benchmarks/benchmark.h
index 59383fb..16c1c85 100644
--- a/benchmarks/benchmark.h
+++ b/benchmarks/benchmark.h
@@ -17,11 +17,15 @@
 #ifndef BENCHMARKS_BENCHMARK_H_
 #define BENCHMARKS_BENCHMARK_H_
 
+#define ML_RUN_INDICATOR_IO 16
+#define ML_TOGGLE_PER_INF_IO 17
+
 typedef struct {
   uint32_t return_code;
   uint32_t iterations;
   uint64_t cycles;
   uint32_t mismatch_count;
+  uint32_t gpio_toggle_per_inference;
 } BenchmarkOutputHeader;
 
-#endif // #ifndef BENCHMARKS_BENCHMARK_H_
+#endif  // BENCHMARKS_BENCHMARK_H_
diff --git a/benchmarks/benchmark_kelvin.cc b/benchmarks/benchmark_kelvin.cc
index 079f567..7e72835 100644
--- a/benchmarks/benchmark_kelvin.cc
+++ b/benchmarks/benchmark_kelvin.cc
@@ -68,6 +68,7 @@
     .iterations = 0,
     .cycles = 0,
     .mismatch_count = 0,
+    .gpio_toggle_per_inference = 0,
 };
 
 // This includes all ops currently used in the Kelvin model suite. More can be
@@ -167,6 +168,8 @@
 
   // TODO(michaelbrooks): Possibly set/verify test data?
   for (int i = 0; i < iterations; ++i) {
+    output_header.gpio_toggle_per_inference =
+        !output_header.gpio_toggle_per_inference;
 #if (TEST_DATA_INPUT == 1)
     memcpy(tflite::GetTensorData<uint8_t>(input), g_benchmark_input,
            input->bytes);
diff --git a/benchmarks/benchmark_sec.c b/benchmarks/benchmark_sec.c
index be05531..af5c8dd 100644
--- a/benchmarks/benchmark_sec.c
+++ b/benchmarks/benchmark_sec.c
@@ -28,12 +28,13 @@
 #include "sw/device/lib/testing/test_framework/check.h"
 #include "sw/device/lib/testing/test_framework/ottf_test_config.h"
 #include "sw/device/lib/testing/test_framework/test_util.h"
+/* clang-format off */
+#include "benchmarks/benchmark.h"
+/* clang-format on */
 
 #define STRINGIZE(x) #x
 #define STR(x) STRINGIZE(x)
 
-#define TRIGGER_GPIO 16
-
 // In order to include the model data generate from Bazel, include the header
 // using the name passed as a macro. For some reason this binary (vs Kelvin)
 // adds space when concatinating so use the model format -smc_bin.h.
@@ -65,7 +66,9 @@
       CHECK_DIF_OK(dif_tlul_mailbox_irq_acknowledge(&tlul_mailbox,
                                                     kDifTlulMailboxIrqRtirq));
       CHECK_DIF_OK(dif_tlul_mailbox_read_message(&tlul_mailbox, &rx));
-      CHECK_DIF_OK(dif_gpio_write(&gpio, TRIGGER_GPIO, rx));
+      uint32_t pin = rx >> 16;
+      uint32_t value = rx & 0xFFFF;
+      CHECK_DIF_OK(dif_gpio_write(&gpio, pin, value));
       break;
     }
     default:
@@ -88,18 +91,27 @@
   CHECK_DIF_OK(dif_smc_ctrl_init(
       mmio_region_from_addr(TOP_MATCHA_SMC_CTRL_BASE_ADDR), &smc_ctrl));
 
-// PinMux: J52.5 for Sparrow (IOR7) :: PMOD3.7 on Nexus (IOD4)
+// PinMux: Total inference GPIO J52.5/CS  Sparrow (IOR7) :: PMOD3.7 on Nexus
+// (IOD4)
+//           Per inference GPIO J52.7/SCK Sparrow (IOR7) :: PMOD3.8 on Nexus
+//           (IOD5)
 #if defined(MATCHA_SPARROW)
   CHECK_DIF_OK(dif_pinmux_output_select(&pinmux, kTopMatchaPinmuxMioOutIor7,
                                         kTopMatchaPinmuxOutselGpioGpio16));
+  CHECK_DIF_OK(dif_pinmux_output_select(&pinmux, kTopMatchaPinmuxMioOutIoa1,
+                                        kTopMatchaPinmuxOutselGpioGpio17));
 #else
   CHECK_DIF_OK(dif_pinmux_output_select(&pinmux, kTopMatchaPinmuxMioOutIod4,
                                         kTopMatchaPinmuxOutselGpioGpio16));
+  CHECK_DIF_OK(dif_pinmux_output_select(&pinmux, kTopMatchaPinmuxMioOutIod5,
+                                        kTopMatchaPinmuxOutselGpioGpio17));
 #endif
   CHECK_DIF_OK(
       dif_gpio_init(mmio_region_from_addr(TOP_MATCHA_GPIO_BASE_ADDR), &gpio));
-  CHECK_DIF_OK(
-      dif_gpio_output_set_enabled(&gpio, TRIGGER_GPIO, kDifToggleEnabled));
+  CHECK_DIF_OK(dif_gpio_output_set_enabled(&gpio, ML_RUN_INDICATOR_IO,
+                                           kDifToggleEnabled));
+  CHECK_DIF_OK(dif_gpio_output_set_enabled(&gpio, ML_TOGGLE_PER_INF_IO,
+                                           kDifToggleEnabled));
 
   LOG_INFO("Loading Kelvin binary");
   spi_flash_init();
diff --git a/benchmarks/benchmark_smc.c b/benchmarks/benchmark_smc.c
index 887f00c..f290350 100644
--- a/benchmarks/benchmark_smc.c
+++ b/benchmarks/benchmark_smc.c
@@ -119,12 +119,18 @@
   CHECK_DIF_OK(dif_tlul_mailbox_init(
       mmio_region_from_addr(TOP_MATCHA_TLUL_MAILBOX_SMC_BASE_ADDR),
       &tlul_mailbox));
-  {
-    uint32_t msg = 1;
-    CHECK_DIF_OK(dif_tlul_mailbox_send_message(&tlul_mailbox, &msg));
-  }
+
+  BenchmarkOutputHeader* output_header_ptr =
+      (BenchmarkOutputHeader*)((TOP_MATCHA_ML_TOP_DMEM_BASE_ADDR +
+                                TOP_MATCHA_RAM_ML_DMEM_SIZE_BYTES) -
+                               0x40);
 
   LOG_INFO("========== Begin Benchmark (%s) ==========", STR(BENCHMARK_NAME));
+  {
+    uint32_t value = 1;
+    uint32_t tx = ML_RUN_INDICATOR_IO << 16 | value;
+    CHECK_DIF_OK(dif_tlul_mailbox_send_message(&tlul_mailbox, &tx));
+  }
 
   // start kelvin and pulse GPIO for data logger (Kibble)
   ml_top_finish_done = false;
@@ -133,17 +139,21 @@
   CHECK_DIF_OK(dif_ml_top_release_ctrl_en(&ml_top));
 
   // wfi
+  uint32_t gpio_toggle_per_inference_prev = 0;
+  uint32_t tx;
   while (!ml_top_finish_done) {
-    wait_for_interrupt();
+    uint32_t gpio_toggle_per_inference =
+        output_header_ptr->gpio_toggle_per_inference;
+    if (gpio_toggle_per_inference != gpio_toggle_per_inference_prev) {
+      tx = ML_TOGGLE_PER_INF_IO << 16 | gpio_toggle_per_inference;
+      gpio_toggle_per_inference_prev = gpio_toggle_per_inference;
+      CHECK_DIF_OK(dif_tlul_mailbox_send_message(&tlul_mailbox, &tx));
+    }
+    // wait_for_interrupt();
   }
   uint64_t timer_finish;
   CHECK_DIF_OK(dif_rv_timer_counter_read(&rv_timer, 0, &timer_finish));
 
-  BenchmarkOutputHeader* output_header_ptr =
-      (BenchmarkOutputHeader*)((TOP_MATCHA_ML_TOP_DMEM_BASE_ADDR +
-                                TOP_MATCHA_RAM_ML_DMEM_SIZE_BYTES) -
-                              0x40);
-
   if (output_header_ptr->return_code) {
     LOG_FATAL("Kelvin returned an error: %d", output_header_ptr->return_code);
     test_status_set(kTestStatusFailed);
@@ -156,8 +166,9 @@
 
   // End Test Pulse
   {
-    uint32_t msg = 0;
-    CHECK_DIF_OK(dif_tlul_mailbox_send_message(&tlul_mailbox, &msg));
+    uint32_t value = 0;
+    uint32_t tx = ML_RUN_INDICATOR_IO << 16 | value;
+    CHECK_DIF_OK(dif_tlul_mailbox_send_message(&tlul_mailbox, &tx));
   }
 
   LOG_INFO("Iterations: %d", iterations);