Add support for checking output in benchmark

- Rename `test_data` to `test_data_input`.
- Add `test_data_output`, which contains golden output. The output
  tensor of the benchmarked model will be validated against this.

Change-Id: I4c6e8111fcfe6a236d1876b5a8ae6b0ee859839f
diff --git a/benchmarks/benchmark.h b/benchmarks/benchmark.h
index e36020c..59383fb 100644
--- a/benchmarks/benchmark.h
+++ b/benchmarks/benchmark.h
@@ -21,6 +21,7 @@
   uint32_t return_code;
   uint32_t iterations;
   uint64_t cycles;
+  uint32_t mismatch_count;
 } BenchmarkOutputHeader;
 
 #endif // #ifndef BENCHMARKS_BENCHMARK_H_
diff --git a/benchmarks/benchmark_kelvin.cc b/benchmarks/benchmark_kelvin.cc
index 1871619..7756b34 100644
--- a/benchmarks/benchmark_kelvin.cc
+++ b/benchmarks/benchmark_kelvin.cc
@@ -39,10 +39,16 @@
 #define MODEL_HEADER STR(MODEL_HEADER_DIRECTORY BENCHMARK_NAME MODEL_HEADER_TYPE)
 #include MODEL_HEADER
 
-#if (TEST_DATA == 1)
-#define TEST_DATA_HEADER_TYPE _test_data.h
-#define TEST_DATA_HEADER STR(MODEL_HEADER_DIRECTORY BENCHMARK_NAME TEST_DATA_HEADER_TYPE)
-#include TEST_DATA_HEADER
+#if (TEST_DATA_INPUT == 1)
+#define TEST_DATA_INPUT_HEADER_TYPE _input.h
+#define TEST_DATA_INPUT_HEADER STR(MODEL_HEADER_DIRECTORY BENCHMARK_NAME TEST_DATA_INPUT_HEADER_TYPE)
+#include TEST_DATA_INPUT_HEADER
+#endif
+
+#if (TEST_DATA_OUTPUT == 1)
+#define TEST_DATA_OUTPUT_HEADER_TYPE _output.h
+#define TEST_DATA_OUTPUT_HEADER STR(MODEL_HEADER_DIRECTORY BENCHMARK_NAME TEST_DATA_OUTPUT_HEADER_TYPE)
+#include TEST_DATA_OUTPUT_HEADER
 #endif
 
 namespace {
@@ -53,6 +59,7 @@
     .return_code = 0, // Set by kelvin_start based on return value in main.
     .iterations = 0,
     .cycles = 0,
+    .mismatch_count = 0,
 };
 
 // This includes all ops currently used in the Kelvin model suite. More can be added.
@@ -127,8 +134,8 @@
   }
   TfLiteTensor* input = interpreter->input(0);
 
-#if (TEST_DATA == 1)
-  memcpy(tflite::GetTensorData<uint8_t>(input), g_benchmark_test_data, input->bytes);
+#if (TEST_DATA_INPUT == 1)
+  memcpy(tflite::GetTensorData<uint8_t>(input), g_benchmark_input, input->bytes);
 #else
   memset(tflite::GetTensorData<uint8_t>(input), 0, input->bytes);
 #endif
@@ -142,8 +149,8 @@
 
   // TODO(michaelbrooks): Possibly set/verify test data?
   for (int i = 0; i < iterations; ++i) {
-#if (TEST_DATA == 1)
-  memcpy(tflite::GetTensorData<uint8_t>(input), g_benchmark_test_data, input->bytes);
+#if (TEST_DATA_INPUT == 1)
+  memcpy(tflite::GetTensorData<uint8_t>(input), g_benchmark_input, input->bytes);
 #else
   memset(tflite::GetTensorData<uint8_t>(input), 0, input->bytes);
 #endif
@@ -152,6 +159,20 @@
   uint64_t end = mcycle_read();
   uint64_t num_cycles = end - begin;
 
+#if (TEST_DATA_OUTPUT == 1)
+  TfLiteTensor* output = interpreter->output(0);
+  int mismatch_count = 0;
+  for (size_t i = 0; i < output->bytes; ++i) {
+    int8_t vx = (int8_t)(*(tflite::GetTensorData<int8_t>(output) + i) & 0xFF);
+    int8_t vy = (int8_t)(*(g_benchmark_output + i) & 0xFF);
+    auto delta = ((vx) > (vy)) ? ((vx) - (vy)) : ((vy) - (vx));
+    if (delta) {
+      mismatch_count += 1;
+    }
+  }
+  output_header.mismatch_count = mismatch_count;
+#endif
+
 #if (PROFILE == 1)
   profiler.LogCsv();
 #endif
@@ -165,6 +186,9 @@
   LOG_INFO("Iterations: %ld", output_header.iterations);
   _print64("Total Cycles: ", output_header.cycles);
   _print64("Average Cycles per Iteration: ", average_cycles);
+#if (TEST_DATA_OUTPUT == 1)
+  LOG_INFO("Mismatch_count: %d", mismatch_count);
+#endif
   LOG_INFO("========== End Benchmark ==========");
   return kSuccess;
 }
diff --git a/benchmarks/benchmark_smc.c b/benchmarks/benchmark_smc.c
index 2915dca..7edaf46 100644
--- a/benchmarks/benchmark_smc.c
+++ b/benchmarks/benchmark_smc.c
@@ -140,11 +140,13 @@
   uint64_t average_cycles = udiv64_slow(cycles, iterations, NULL);
   uint64_t wall_time_us = timer_finish - timer_start;
   uint64_t average_wall_time_us = udiv64_slow(wall_time_us, iterations, NULL);
+  uint32_t mismatch_count = output_header_ptr->mismatch_count;
   LOG_INFO("Iterations: %d", iterations);
   _print64("Total Cycles", cycles);
   _print64("Average Cycles per Iteration", average_cycles);
   _print64("Wall time (us)", wall_time_us);
   _print64("Wall time per Iteration (us)", average_wall_time_us);
+  LOG_INFO("Mismatch count: %d", mismatch_count);
   LOG_INFO("========== End Benchmark ==========");
   while (true) {
     wait_for_interrupt();
diff --git a/benchmarks/benchmarks.bzl b/benchmarks/benchmarks.bzl
index 3414e7d..5f919f8 100644
--- a/benchmarks/benchmarks.bzl
+++ b/benchmarks/benchmarks.bzl
@@ -21,7 +21,8 @@
         name,
         model,
         iterations,
-        test_data = None,
+        test_data_input = None,
+        test_data_output = None,
         profile = False,
         kelvin_binary_info = None,
         benchmark_path = "benchmarks",
@@ -51,14 +52,22 @@
             )
             kelvin_headers.append(model_header_name)
 
-            if test_data:
-                test_data_header_name = "{}_test_data".format(name)
+            if test_data_input:
+                input_header_name = "{}_input".format(name)
                 bin_to_c_file(
-                    name = test_data_header_name,
-                    srcs = [test_data],
-                    var_name = "g_benchmark_test_data",
+                    name = input_header_name,
+                    srcs = [test_data_input],
+                    var_name = "g_benchmark_input",
                 )
-                kelvin_headers.append(test_data_header_name)
+                kelvin_headers.append(input_header_name)
+            if test_data_output:
+                output_header_name = "{}_output".format(name)
+                bin_to_c_file(
+                    name = output_header_name,
+                    srcs = [test_data_output],
+                    var_name = "g_benchmark_output",
+                )
+                kelvin_headers.append(output_header_name)
 
             # Test to run in simulator and MPACT.
             kelvin_test(
@@ -68,7 +77,8 @@
                 copts = [
                     "-DITERATIONS={}".format(iterations),
                     "-DBENCHMARK_NAME={}".format(name),
-                    "-DTEST_DATA={}".format(1 if test_data else 0),
+                    "-DTEST_DATA_INPUT={}".format(1 if test_data_input else 0),
+                    "-DTEST_DATA_OUTPUT={}".format(1 if test_data_output else 0),
                     "-DPROFILE={}".format(1 if profile else 0),
                     "-DBENCHMARK_PATH={}".format(benchmark_path),
                 ],
@@ -175,7 +185,8 @@
         model,
         device_type,
         iterations,
-        test_data = None,
+        test_data_input = None,
+        test_data_output = None,
         profile = False,
         kelvin_binary_info = None,
         benchmark_path = "benchmarks",
@@ -254,14 +265,22 @@
             )
             kelvin_headers.append(model_header_name)
 
-            if test_data:
-                test_data_header_name = "{}_test_data".format(name)
+            if test_data_input:
+                input_header_name = "{}_input".format(name)
                 bin_to_c_file(
-                    name = test_data_header_name,
-                    srcs = [test_data],
-                    var_name = "g_benchmark_test_data",
+                    name = input_header_name,
+                    srcs = [test_data_input],
+                    var_name = "g_benchmark_input",
                 )
-                kelvin_headers.append(test_data_header_name)
+                kelvin_headers.append(input_header_name)
+            if test_data_output:
+                output_header_name = "{}_output".format(name)
+                bin_to_c_file(
+                    name = output_header_name,
+                    srcs = [test_data_output],
+                    var_name = "g_benchmark_output",
+                )
+                kelvin_headers.append(output_header_name)
 
             kelvin_binary(
                 name = "{}_kelvin".format(name),
@@ -271,7 +290,8 @@
                 copts = [
                     "-DITERATIONS={}".format(iterations),
                     "-DBENCHMARK_NAME={}".format(name),
-                    "-DTEST_DATA={}".format(1 if test_data else 0),
+                    "-DTEST_DATA_INPUT={}".format(1 if test_data_input else 0),
+                    "-DTEST_DATA_OUTPUT={}".format(1 if test_data_output else 0),
                     "-DPROFILE={}".format(1 if profile else 0),
                     "-DBENCHMARK_PATH={}".format(benchmark_path),
                 ],