Add benchmark suite to sw/kelvin

Adds benchmarks libraries and bazel rules to easily run simulator
(ISS and HW) and FPGA benchmarks. Example usage can be found in
ml-models-public.

Change-Id: I9098d7893ccb093ef05b56e03e63fb79b8357201
diff --git a/benchmarks/BUILD b/benchmarks/BUILD
new file mode 100644
index 0000000..5a14073
--- /dev/null
+++ b/benchmarks/BUILD
@@ -0,0 +1,25 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+package(default_visibility = ["//visibility:public"])
+
+cc_library(
+    name = "benchmark_header",
+    hdrs = ["benchmark.h"],
+    visibility = ["//visibility:public"],
+)
+
+exports_files(
+    srcs = glob(["*.c", "*.cc", "*.h"]),
+)
diff --git a/benchmarks/benchmark.h b/benchmarks/benchmark.h
new file mode 100644
index 0000000..e36020c
--- /dev/null
+++ b/benchmarks/benchmark.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BENCHMARKS_BENCHMARK_H_
+#define BENCHMARKS_BENCHMARK_H_
+
+typedef struct {
+  uint32_t return_code;
+  uint32_t iterations;
+  uint64_t cycles;
+} BenchmarkOutputHeader;
+
+#endif // #ifndef BENCHMARKS_BENCHMARK_H_
diff --git a/benchmarks/benchmark_kelvin.cc b/benchmarks/benchmark_kelvin.cc
new file mode 100644
index 0000000..766d3d6
--- /dev/null
+++ b/benchmarks/benchmark_kelvin.cc
@@ -0,0 +1,149 @@
+/*
+ * Copyright 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+
+#include "crt/kelvin.h"
+#include "crt/log.h"
+#include "benchmarks/benchmark.h"
+#include "tensorflow/lite/micro/micro_interpreter.h"
+#include "tensorflow/lite/micro/micro_log.h"
+#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+
+#define STRINGIZE(x) #x
+#define STR(x) STRINGIZE(x)
+
+// In order to include the model data generate from Bazel, include the header
+// using the name passed as a macro.
+#define MODEL_HEADER_DIRECTORY benchmarks/
+#define MODEL_HEADER_TYPE _model.h
+#define MODEL_HEADER STR(MODEL_HEADER_DIRECTORY BENCHMARK_NAME MODEL_HEADER_TYPE)
+#include MODEL_HEADER
+
+namespace {
+constexpr int kTensorArenaSize = 1024 * 1024;
+uint8_t g_tensor_arena[kTensorArenaSize] __attribute__((aligned(64)));
+
+__attribute__((section(".model_output_header"))) BenchmarkOutputHeader output_header = {
+    .return_code = 0, // Set by kelvin_start based on return value in main.
+    .iterations = 0,
+    .cycles = 0,
+};
+
+inline uint64_t mcycle_read(void) {
+  uint32_t cycle_low = 0;
+  uint32_t cycle_high = 0;
+  uint32_t cycle_high_2 = 0;
+  asm volatile(
+      "1:"
+      "  csrr %0, mcycleh;"  // Read `mcycleh`.
+      "  csrr %1, mcycle;"   // Read `mcycle`.
+      "  csrr %2, mcycleh;"  // Read `mcycleh` again.
+      "  bne  %0, %2, 1b;"
+      : "=r"(cycle_high), "=r"(cycle_low), "=r"(cycle_high_2)
+      :);
+  return static_cast<uint64_t>(cycle_high) << 32 | cycle_low;
+}
+
+// This includes all ops currently used in the Kelvin model suite. More can be added.
+constexpr int kAllOpsNum = 22;
+std::unique_ptr<tflite::MicroMutableOpResolver<kAllOpsNum>> GetAllOpsResolver() {
+  tflite::MicroMutableOpResolver<kAllOpsNum> resolver;
+  resolver.AddAveragePool2D();
+  resolver.AddMaxPool2D();
+  resolver.AddConv2D();
+  resolver.AddConcatenation();
+  resolver.AddDepthwiseConv2D();
+  resolver.AddDequantize();
+  resolver.AddQuantize();
+  resolver.AddReshape();
+  resolver.AddSoftmax();
+  resolver.AddCallOnce();
+  resolver.AddVarHandle();
+  resolver.AddReadVariable();
+  resolver.AddAssignVariable();
+  resolver.AddLogistic();
+  resolver.AddStridedSlice();
+  resolver.AddFullyConnected();
+  resolver.AddPad();
+  resolver.AddLeakyRelu();
+  resolver.AddSplit();
+  resolver.AddTransposeConv();
+  resolver.AddAdd();
+  resolver.AddSub();
+  return std::make_unique<tflite::MicroMutableOpResolver<kAllOpsNum>>(resolver);
+}
+
+void _print64(const char* header, uint64_t number) {
+  uint32_t number_low = number & 0xFFFFFFFF;
+  uint32_t number_hi = number >> 32;
+  LOG_INFO("%s: 0x%08lx%08lx", header, number_hi, number_low);
+}
+
+constexpr int kSuccess = 0;
+constexpr int kAllocatonFailed = -1;
+constexpr int kInvokeFailed = -2;
+} // namespace
+
+
+int main(int argc, char **argv) {
+  std::unique_ptr<tflite::MicroMutableOpResolver<kAllOpsNum>> resolver = GetAllOpsResolver();
+
+  const auto* model = tflite::GetModel(g_benchmark_model_data);
+
+  uint8_t variable_arena[2048];
+  tflite::MicroAllocator *variable_allocator =
+      tflite::MicroAllocator::Create(variable_arena, 1024);
+  tflite::MicroResourceVariables *resource_variables =
+      tflite::MicroResourceVariables::Create(variable_allocator, 20);
+  std::unique_ptr<tflite::MicroInterpreter> interpreter = std::make_unique<tflite::MicroInterpreter>(
+      model, *resolver.get(), g_tensor_arena, kTensorArenaSize, resource_variables);
+
+  // Run inference outside of benchmark to intialize model.
+  if (interpreter->AllocateTensors() != kTfLiteOk) {
+    return kAllocatonFailed;
+  }
+  TfLiteTensor* input = interpreter->input(0);
+
+  // Set input tensor to zero for first inference, subsequent runs
+  // will run on output tensor data (since the memory is shared).
+  memset(tflite::GetTensorData<uint8_t>(input), 0, input->bytes);
+  if (interpreter->Invoke() != kTfLiteOk) {
+    return kInvokeFailed;
+  }
+
+  LOG_INFO("========== Begin Benchmark (%s) ==========", STR(BENCHMARK_NAME));
+  uint64_t begin = mcycle_read();
+
+  // TODO(michaelbrooks): Possibly set/verify test data?
+  for (int i = 0; i < ITERATIONS; ++i) {
+    interpreter->Invoke();
+  }
+  uint64_t end = mcycle_read();
+  uint64_t num_cycles = end - begin;
+  // Stores benchmark information in output header for other cores to access.
+  output_header.iterations = ITERATIONS;
+  output_header.cycles = num_cycles;
+
+  // If running on a simulator, print cycle information.
+  uint64_t average_cycles = num_cycles / ITERATIONS;
+  LOG_INFO("Iterations: %ld", output_header.iterations);
+  _print64("Total Cycles: ", output_header.cycles);
+  _print64("Average Cycles per Iteration: ", average_cycles);
+  LOG_INFO("========== End Benchmark ==========");
+  return kSuccess;
+}
diff --git a/benchmarks/benchmark_sec.c b/benchmarks/benchmark_sec.c
new file mode 100644
index 0000000..f5b6e72
--- /dev/null
+++ b/benchmarks/benchmark_sec.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hw/top_matcha/sw/autogen/top_matcha.h"
+#include "sw/device/lib/arch/device.h"
+#include "sw/device/lib/dif/dif_gpio.h"
+#include "sw/device/lib/dif/dif_pinmux.h"
+#include "sw/device/lib/dif/dif_rv_plic.h"
+#include "sw/device/lib/dif/dif_smc_ctrl.h"
+#include "sw/device/lib/dif/dif_uart.h"
+#include "sw/device/lib/runtime/hart.h"
+#include "sw/device/lib/runtime/irq.h"
+#include "sw/device/lib/spi_flash.h"
+#include "sw/device/lib/testing/test_framework/check.h"
+#include "sw/device/lib/testing/test_framework/ottf_test_config.h"
+#include "sw/device/lib/testing/test_framework/test_util.h"
+
+#define STRINGIZE(x) #x
+#define STR(x) STRINGIZE(x)
+
+// In order to include the model data generate from Bazel, include the header
+// using the name passed as a macro. For some reason this binary (vs Kelvin)
+// adds space when concatinating so use the model format -smc_bin.h.
+#define MODEL_HEADER_DIRECTORY benchmarks
+#define MODEL_HEADER_TYPE smc_bin.h
+#define MODEL_HEADER STR(MODEL_HEADER_DIRECTORY/BENCHMARK_NAME-MODEL_HEADER_TYPE)
+#include MODEL_HEADER
+
+static dif_pinmux_t pinmux;
+static dif_smc_ctrl_t smc_ctrl;
+static dif_uart_t uart;
+
+OTTF_DEFINE_TEST_CONFIG();
+
+void _ottf_main(void) {
+  // Initialize the UART to enable logging for non-DV simulation platforms.
+  if (kDeviceType != kDeviceSimDV) {
+    init_uart(TOP_MATCHA_UART0_BASE_ADDR, &uart);
+  }
+  LOG_INFO("Benchmark Main (SEC)");
+  CHECK_DIF_OK(dif_pinmux_init(
+      mmio_region_from_addr(TOP_MATCHA_PINMUX_AON_BASE_ADDR), &pinmux));
+  CHECK_DIF_OK(dif_smc_ctrl_init(
+      mmio_region_from_addr(TOP_MATCHA_SMC_CTRL_BASE_ADDR), &smc_ctrl));
+
+  LOG_INFO("Loading Kelvin binary");
+  spi_flash_init();
+  CHECK_DIF_OK(load_file_from_tar(
+      "kelvin.bin", (void*)TOP_MATCHA_ML_TOP_DMEM_BASE_ADDR,
+      (TOP_MATCHA_ML_TOP_DMEM_BASE_ADDR + TOP_MATCHA_RAM_ML_DMEM_SIZE_BYTES)));
+
+  if (kDeviceType == kDeviceFpgaNexus) {
+    LOG_INFO("Loading SMC binary");
+    memcpy((void*)TOP_MATCHA_RAM_SMC_BASE_ADDR, smc_bin, smc_bin_len);
+  }
+  CHECK_DIF_OK(dif_smc_ctrl_set_en(&smc_ctrl));
+  irq_global_ctrl(true);
+  irq_external_ctrl(true);
+
+  while (true) {
+    wait_for_interrupt();
+  }
+  __builtin_unreachable();
+}
diff --git a/benchmarks/benchmark_smc.c b/benchmarks/benchmark_smc.c
new file mode 100644
index 0000000..1c9f5d1
--- /dev/null
+++ b/benchmarks/benchmark_smc.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <inttypes.h>
+#include <limits.h>
+
+#include "benchmarks/benchmark.h"
+#include "hw/top_matcha/sw/autogen/top_matcha.h"
+#include "sw/device/lib/base/math.h"
+#include "sw/device/lib/dif/dif_ml_top.h"
+#include "sw/device/lib/dif/dif_rv_plic.h"
+#include "sw/device/lib/runtime/hart.h"
+#include "sw/device/lib/runtime/irq.h"
+#include "sw/device/lib/runtime/log.h"
+#include "sw/device/lib/runtime/print.h"
+#include "sw/device/lib/testing/test_framework/check.h"
+#include "sw/device/lib/testing/test_framework/ottf_test_config.h"
+#include "sw/device/lib/testing/test_framework/status.h"
+#include "sw/device/lib/testing/test_framework/test_util.h"
+
+#define STRINGIZE(x) #x
+#define STR(x) STRINGIZE(x)
+
+OTTF_DEFINE_TEST_CONFIG();
+
+static dif_rv_plic_t plic_smc;
+static dif_uart_t smc_uart;
+static dif_ml_top_t ml_top;
+
+volatile bool ml_top_finish_done = false;
+
+void _print64(const char* header, uint64_t number) {
+  uint32_t number_low = number & 0xFFFFFFFF;
+  uint32_t number_hi = number >> 32;
+  LOG_INFO("%s: 0x%08lx%08lx", header, number_hi, number_low);
+}
+
+void ottf_external_isr(void) {
+  dif_rv_plic_irq_id_t interrupt_id;
+  CHECK_DIF_OK(dif_rv_plic_irq_claim(&plic_smc, kTopMatchaPlicTargetIbex0Smc,
+                                     &interrupt_id));
+
+  top_matcha_plic_peripheral_smc_t peripheral_id =
+      top_matcha_plic_interrupt_for_peripheral_smc[interrupt_id];
+  switch (peripheral_id) {
+    case kTopMatchaPlicPeripheralMlTop: {
+      switch (interrupt_id) {
+        case kTopMatchaPlicIrqIdMlTopFinish:
+          ml_top_finish_done = true;
+          break;
+        default:
+          CHECK(false, "Unhandled ML_TOP interrupt");
+      }
+      CHECK_DIF_OK(dif_ml_top_reset_ctrl_en(&ml_top));
+      CHECK_DIF_OK(dif_ml_top_irq_acknowledge_all(&ml_top));
+      break;
+    }
+    default:
+      CHECK(false, "Unhandled peripheral! %d", peripheral_id);
+  }
+
+  CHECK_DIF_OK(dif_rv_plic_irq_complete(&plic_smc, kTopMatchaPlicTargetIbex0Smc,
+                                        interrupt_id));
+}
+
+
+void _ottf_main(void) {
+  // Initialize the SMC UART to enable logging for non-DV simulation platforms.
+  if (kDeviceType != kDeviceSimDV) {
+    init_uart(TOP_MATCHA_SMC_UART_BASE_ADDR, &smc_uart);
+  }
+
+  CHECK_DIF_OK(dif_rv_plic_init(
+      mmio_region_from_addr(TOP_MATCHA_RV_PLIC_SMC_BASE_ADDR), &plic_smc));
+  CHECK_DIF_OK(dif_ml_top_init(
+      mmio_region_from_addr(TOP_MATCHA_ML_TOP_CORE_BASE_ADDR), &ml_top));
+  CHECK_DIF_OK(dif_ml_top_irq_set_enabled(&ml_top, kDifMlTopIrqFinish,
+                                          kDifToggleEnabled));
+  CHECK_DIF_OK(dif_rv_plic_irq_set_priority(
+      &plic_smc, kTopMatchaPlicIrqIdMlTopFinish, kDifRvPlicMaxPriority));
+  CHECK_DIF_OK(dif_rv_plic_irq_set_enabled(
+      &plic_smc, kTopMatchaPlicIrqIdMlTopFinish, kTopMatchaPlicTargetIbex0Smc,
+      kDifToggleEnabled));
+  irq_global_ctrl(true);
+  irq_external_ctrl(true);
+
+  LOG_INFO("========== Begin Benchmark (%s) ==========", STR(BENCHMARK_NAME));
+
+  // start kelvin
+  ml_top_finish_done = false;
+  CHECK_DIF_OK(dif_ml_top_release_ctrl_en(&ml_top));
+
+  // wfi
+  while (!ml_top_finish_done) {
+    wait_for_interrupt();
+  }
+
+  BenchmarkOutputHeader* output_header_ptr =
+      (BenchmarkOutputHeader*)((TOP_MATCHA_ML_TOP_DMEM_BASE_ADDR +
+                                TOP_MATCHA_RAM_ML_DMEM_SIZE_BYTES) -
+                              0x40);
+
+  if (output_header_ptr->return_code) {
+    LOG_FATAL("Kelvin returned an error: %d", output_header_ptr->return_code);
+  }
+  uint32_t iterations = output_header_ptr->iterations;
+  uint64_t cycles = output_header_ptr->cycles;
+  uint64_t average_cycles = udiv64_slow(cycles, iterations, NULL);
+  LOG_INFO("Iterations: %ld", iterations);
+  _print64("Total Cycles", cycles);
+  _print64("Average Cycles per Iteration", average_cycles);
+  LOG_INFO("========== End Benchmark ==========");
+  while (true) {
+    wait_for_interrupt();
+  };
+}
diff --git a/benchmarks/benchmarks.bzl b/benchmarks/benchmarks.bzl
new file mode 100644
index 0000000..1b3b704
--- /dev/null
+++ b/benchmarks/benchmarks.bzl
@@ -0,0 +1,147 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Rules to run Kelvin benchmarks"""
+
+load("@kelvin_sw//build_tools/bazel:kelvin.bzl","kelvin_binary", "kelvin_test", "generate_cc_arrays")
+load("@matcha//rules:matcha.bzl", "bin_to_c_file", "matcha_extflash_tar", "sec_flash_binary", "smc_flash_binary", "NEXUS_CORE_TARGETS")
+
+def kelvin_benchmark_simulator(
+        name,
+        model,
+        iterations,
+        hw_test_size = "medium",
+        hw_test_tags = [],
+        iss_test_size = "small",
+        iss_test_tags = [],
+        **kwargs):
+
+        bin_to_c_file(
+            name = "{}_model".format(name),
+            srcs = [model],
+            var_name = "g_benchmark_model_data",
+        )
+
+        # Test to run in simulator and MPACT.
+        kelvin_test(
+            name = "{}".format(name),
+            srcs = ["@kelvin_sw//benchmarks:benchmark_kelvin.cc"],
+            hdrs = ["@kelvin_sw//benchmarks:benchmark.h", "{}_model.h".format(name)],
+            copts = ["-DITERATIONS={}".format(iterations), "-DBENCHMARK_NAME={}".format(name)],
+            deps = [
+                "@kelvin_sw//crt",
+                "@kelvin_sw//benchmarks:benchmark_header",
+                "@tflite-micro//tensorflow/lite/micro:micro_framework",
+                "@tflite-micro//tensorflow/lite/micro:system_setup",
+            ],
+            hw_test_size = hw_test_size,
+            hw_test_tags = hw_test_tags,
+            iss_test_size = iss_test_size,
+            iss_test_tags = iss_test_tags,
+        )
+
+def kelvin_benchmark_fpga(
+        name,
+        model,
+        iterations,
+        **kwargs):
+
+        bin_to_c_file(
+            name = "{}_model".format(name),
+            srcs = [model],
+            var_name = "g_benchmark_model_data",
+        )
+
+        # Creation of binaries for running on FPGA
+        smc_flash_binary(
+            name = "{}_smc".format(name),
+            srcs = [
+                "@kelvin_sw//benchmarks:benchmark_smc.c",
+                "@kelvin_sw//benchmarks:benchmark.h",
+            ],
+            copts = ["-DBENCHMARK_NAME={}".format(name)],
+            per_device_deps = {
+                "fpga_nexus": [NEXUS_CORE_TARGETS.get("smc")],
+            },
+            deps = [
+                "@matcha//sw/device/lib/dif:ml_top",
+                "@matcha//sw/device/tests:test_lib_smc",
+                "@matcha//sw/device/lib/dif:i2s",
+                "@matcha//sw/device/lib/dif:tlul_mailbox",
+                "@kelvin_sw//benchmarks:benchmark_header",
+                "@lowrisc_opentitan//sw/device/lib/dif:rv_plic",
+            ],
+        )
+
+        bin_to_c_file(
+            name = "{}-smc_bin".format(name),
+            srcs = ["{}_smc_fpga_nexus_bin".format(name)],
+            var_name = "smc_bin",
+        )
+
+        sec_flash_binary(
+            name = "{}_sec".format(name),
+            srcs = [
+                "@kelvin_sw//benchmarks:benchmark_sec.c",
+                "{}-smc_bin.h".format(name),
+                "@kelvin_sw//benchmarks:benchmark.h",
+            ],
+            copts = ["-DBENCHMARK_NAME={}".format(name)],
+            per_device_deps = {
+                "fpga_nexus": [NEXUS_CORE_TARGETS.get("secure_core")],
+            },
+            deps = [
+                "@matcha//sw/device/lib:spi_flash",
+                "@matcha//sw/device/tests:test_lib",
+                "@matcha//sw/device/lib/dif:smc_ctrl",
+                "@matcha//sw/device/lib/dif:tlul_mailbox",
+                "@kelvin_sw//benchmarks:benchmark_header",
+                "@lowrisc_opentitan//sw/device/lib/dif:rv_plic",
+            ],
+        )
+
+        kelvin_binary(
+            name = "{}_kelvin".format(name),
+            srcs = [
+                "@kelvin_sw//benchmarks:benchmark_kelvin.cc",
+            ],
+            copts = ["-DITERATIONS={}".format(iterations), "-DBENCHMARK_NAME={}".format(name)],
+            hdrs = [
+                "@kelvin_sw//benchmarks:benchmark.h",
+                "{}_model.h".format(name),
+            ],
+            deps = [
+                "@kelvin_sw//benchmarks:benchmark_header",
+                "@tflite-micro//tensorflow/lite/micro:micro_framework",
+                "@tflite-micro//tensorflow/lite/micro:system_setup",
+            ],
+        )
+
+        matcha_extflash_tar(
+            name = "{}_extflash".format(name),
+            kelvin_binary = ":{}_kelvin.bin".format(name),
+            sc_binary = ":{}_sec_fpga_nexus_bin".format(name),
+        )
+
+        # Create a filegroup with all FPGA targets.
+        native.filegroup(
+            name = "{}".format(name),
+            srcs = [
+                ":{}_sec".format(name),
+                ":{}-smc_bin".format(name),
+                ":{}_kelvin".format(name),
+                ":{}_extflash".format(name),
+            ],
+            output_group = "fpga_files",
+        )
diff --git a/crt/BUILD b/crt/BUILD
index b3c18ce..8fad457 100644
--- a/crt/BUILD
+++ b/crt/BUILD
@@ -49,6 +49,7 @@
     hdrs = [
         "kelvin.h",
         "kelvin_intrinsics.h",
+        "log.h",
         "printf_traits.h",
     ],
 )