Merge changes I1f0ec9d3,I55e0cf22

* changes:
  Lint cleanup benchmark_kelvin.cc
  Fix carry over benchmark mismatch count
diff --git a/benchmarks/benchmark_kelvin.cc b/benchmarks/benchmark_kelvin.cc
index fdec54a..079f567 100644
--- a/benchmarks/benchmark_kelvin.cc
+++ b/benchmarks/benchmark_kelvin.cc
@@ -16,10 +16,10 @@
 
 #include <memory>
 
-#include "crt/kelvin.h"
-#include "crt/log.h"
 #include "benchmarks/benchmark.h"
 #include "benchmarks/cycle_count.h"
+#include "crt/kelvin.h"
+#include "crt/log.h"
 #include "tensorflow/lite/micro/micro_interpreter.h"
 #include "tensorflow/lite/micro/micro_log.h"
 #include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
@@ -34,20 +34,23 @@
 
 // In order to include the model data generate from Bazel, include the header
 // using the name passed as a macro.
-#define MODEL_HEADER_DIRECTORY BENCHMARK_PATH/
+#define MODEL_HEADER_DIRECTORY BENCHMARK_PATH
 #define MODEL_HEADER_TYPE _model.h
-#define MODEL_HEADER STR(MODEL_HEADER_DIRECTORY BENCHMARK_NAME MODEL_HEADER_TYPE)
+#define MODEL_HEADER \
+  STR(MODEL_HEADER_DIRECTORY BENCHMARK_NAME MODEL_HEADER_TYPE)
 #include MODEL_HEADER
 
 #if (TEST_DATA_INPUT == 1)
 #define TEST_DATA_INPUT_HEADER_TYPE _input.h
-#define TEST_DATA_INPUT_HEADER STR(MODEL_HEADER_DIRECTORY BENCHMARK_NAME TEST_DATA_INPUT_HEADER_TYPE)
+#define TEST_DATA_INPUT_HEADER \
+  STR(MODEL_HEADER_DIRECTORY BENCHMARK_NAME TEST_DATA_INPUT_HEADER_TYPE)
 #include TEST_DATA_INPUT_HEADER
 #endif
 
 #if (TEST_DATA_OUTPUT == 1)
 #define TEST_DATA_OUTPUT_HEADER_TYPE _output.h
-#define TEST_DATA_OUTPUT_HEADER STR(MODEL_HEADER_DIRECTORY BENCHMARK_NAME TEST_DATA_OUTPUT_HEADER_TYPE)
+#define TEST_DATA_OUTPUT_HEADER \
+  STR(MODEL_HEADER_DIRECTORY BENCHMARK_NAME TEST_DATA_OUTPUT_HEADER_TYPE)
 #include TEST_DATA_OUTPUT_HEADER
 #endif
 
@@ -59,16 +62,19 @@
 #endif
 uint8_t g_tensor_arena[kTensorArenaSize] __attribute__((aligned(64)));
 
-__attribute__((section(".model_output_header"))) BenchmarkOutputHeader output_header = {
-    .return_code = 0, // Set by kelvin_start based on return value in main.
+__attribute__((
+    section(".model_output_header"))) BenchmarkOutputHeader output_header = {
+    .return_code = 0,  // Set by kelvin_start based on return value in main.
     .iterations = 0,
     .cycles = 0,
     .mismatch_count = 0,
 };
 
-// This includes all ops currently used in the Kelvin model suite. More can be added.
+// This includes all ops currently used in the Kelvin model suite. More can be
+// added.
 constexpr int kAllOpsNum = 28;
-std::unique_ptr<tflite::MicroMutableOpResolver<kAllOpsNum>> GetAllOpsResolver() {
+std::unique_ptr<tflite::MicroMutableOpResolver<kAllOpsNum>>
+GetAllOpsResolver() {
   tflite::MicroMutableOpResolver<kAllOpsNum> resolver;
   resolver.AddAveragePool2D();
   resolver.AddMaxPool2D();
@@ -110,28 +116,32 @@
 constexpr int kSuccess = 0;
 constexpr int kAllocatonFailed = -1;
 constexpr int kInvokeFailed = -2;
-} // namespace
+}  // namespace
 
-
-int main(int argc, char **argv) {
-  std::unique_ptr<tflite::MicroMutableOpResolver<kAllOpsNum>> resolver = GetAllOpsResolver();
+int main(int argc, char** argv) {
+  std::unique_ptr<tflite::MicroMutableOpResolver<kAllOpsNum>> resolver =
+      GetAllOpsResolver();
 
   const auto* model = tflite::GetModel(g_benchmark_model_data);
 
   uint8_t variable_arena[2048];
-  tflite::MicroAllocator *variable_allocator =
+  tflite::MicroAllocator* variable_allocator =
       tflite::MicroAllocator::Create(variable_arena, 1024);
-  tflite::MicroResourceVariables *resource_variables =
+  tflite::MicroResourceVariables* resource_variables =
       tflite::MicroResourceVariables::Create(variable_allocator, 20);
 #if (PROFILE == 1)
   tflite::MicroProfiler profiler;
-  std::unique_ptr<tflite::MicroInterpreter> interpreter = std::make_unique<tflite::MicroInterpreter>(
-      model, *resolver.get(), g_tensor_arena, kTensorArenaSize, resource_variables, &profiler);
+  std::unique_ptr<tflite::MicroInterpreter> interpreter =
+      std::make_unique<tflite::MicroInterpreter>(
+          model, *resolver.get(), g_tensor_arena, kTensorArenaSize,
+          resource_variables, &profiler);
   // For a profiled model, just run a single iteration
   const int iterations = 1;
 #else
-  std::unique_ptr<tflite::MicroInterpreter> interpreter = std::make_unique<tflite::MicroInterpreter>(
-      model, *resolver.get(), g_tensor_arena, kTensorArenaSize, resource_variables);
+  std::unique_ptr<tflite::MicroInterpreter> interpreter =
+      std::make_unique<tflite::MicroInterpreter>(
+          model, *resolver.get(), g_tensor_arena, kTensorArenaSize,
+          resource_variables);
   const int iterations = ITERATIONS;
 #endif
 
@@ -142,7 +152,8 @@
   TfLiteTensor* input = interpreter->input(0);
 
 #if (TEST_DATA_INPUT == 1)
-  memcpy(tflite::GetTensorData<uint8_t>(input), g_benchmark_input, input->bytes);
+  memcpy(tflite::GetTensorData<uint8_t>(input), g_benchmark_input,
+         input->bytes);
 #else
   memset(tflite::GetTensorData<uint8_t>(input), 0, input->bytes);
 #endif
@@ -157,15 +168,17 @@
   // TODO(michaelbrooks): Possibly set/verify test data?
   for (int i = 0; i < iterations; ++i) {
 #if (TEST_DATA_INPUT == 1)
-  memcpy(tflite::GetTensorData<uint8_t>(input), g_benchmark_input, input->bytes);
+    memcpy(tflite::GetTensorData<uint8_t>(input), g_benchmark_input,
+           input->bytes);
 #else
-  memset(tflite::GetTensorData<uint8_t>(input), 0, input->bytes);
+    memset(tflite::GetTensorData<uint8_t>(input), 0, input->bytes);
 #endif
     interpreter->Invoke();
   }
   uint64_t end = mcycle_read();
   uint64_t num_cycles = end - begin;
 
+  output_header.mismatch_count = 0;
 #if (TEST_DATA_OUTPUT == 1)
   TfLiteTensor* output = interpreter->output(0);
   int mismatch_count = 0;
diff --git a/benchmarks/benchmark_smc.c b/benchmarks/benchmark_smc.c
index 7edaf46..757ffe8 100644
--- a/benchmarks/benchmark_smc.c
+++ b/benchmarks/benchmark_smc.c
@@ -140,13 +140,15 @@
   uint64_t average_cycles = udiv64_slow(cycles, iterations, NULL);
   uint64_t wall_time_us = timer_finish - timer_start;
   uint64_t average_wall_time_us = udiv64_slow(wall_time_us, iterations, NULL);
-  uint32_t mismatch_count = output_header_ptr->mismatch_count;
   LOG_INFO("Iterations: %d", iterations);
   _print64("Total Cycles", cycles);
   _print64("Average Cycles per Iteration", average_cycles);
   _print64("Wall time (us)", wall_time_us);
   _print64("Wall time per Iteration (us)", average_wall_time_us);
+#if (TEST_DATA_OUTPUT == 1)
+  uint32_t mismatch_count = output_header_ptr->mismatch_count;
   LOG_INFO("Mismatch count: %d", mismatch_count);
+#endif
   LOG_INFO("========== End Benchmark ==========");
   while (true) {
     wait_for_interrupt();
diff --git a/benchmarks/benchmarks.bzl b/benchmarks/benchmarks.bzl
index c941f22..c622c99 100644
--- a/benchmarks/benchmarks.bzl
+++ b/benchmarks/benchmarks.bzl
@@ -25,7 +25,7 @@
         test_data_output = None,
         profile = False,
         kelvin_binary_info = None,
-        benchmark_path = "benchmarks",
+        benchmark_path = "benchmarks/",
         hw_test_size = "medium",
         hw_test_tags = [],
         iss_test_size = "small",
@@ -103,7 +103,7 @@
         test_data = None,
         profile = False,
         kelvin_binary_info = None,
-        benchmark_path = "benchmarks",
+        benchmark_path = "benchmarks/",
         **kwargs):
     _kelvin_benchmark_device(
         name = name,
@@ -124,7 +124,7 @@
         test_data = None,
         profile = False,
         kelvin_binary_info = None,
-        benchmark_path = "benchmarks",
+        benchmark_path = "benchmarks/",
         **kwargs):
     _kelvin_benchmark_device(
         name = name,
@@ -145,7 +145,7 @@
         test_data = None,
         profile = False,
         kelvin_binary_info = None,
-        benchmark_path = "benchmarks",
+        benchmark_path = "benchmarks/",
         **kwargs):
     kelvin_benchmark_asic(
         name = "{}_asic".format(name),
@@ -188,7 +188,7 @@
         test_data_output = None,
         profile = False,
         kelvin_binary_info = None,
-        benchmark_path = "benchmarks",
+        benchmark_path = "benchmarks/",
         arena_size_bytes = 1536 * 1024,  # 1.5MB
         tags = [],
         **kwargs):
@@ -199,7 +199,10 @@
             "@kelvin_sw//benchmarks:benchmark_smc.c",
             "@kelvin_sw//benchmarks:benchmark.h",
         ],
-        copts = ["-DBENCHMARK_NAME={}".format(name)],
+        copts = [
+            "-DBENCHMARK_NAME={}".format(name),
+            "-DTEST_DATA_OUTPUT={}".format(1 if test_data_output else 0),
+        ],
         per_device_deps = {
             device_type: device_deps("smc").get(device_type),
         },