Add reference outputs for soundstream

- Reference outputs for our input data, for both encoder and decoder
  side. Comparison against them is done after invoking the model.

Change-Id: I2346b0770dcbc2bfbcc0392c68404b4914e8300b
diff --git a/examples/tflm/soundstream/BUILD b/examples/tflm/soundstream/BUILD
index 1188d98..0b79a83 100644
--- a/examples/tflm/soundstream/BUILD
+++ b/examples/tflm/soundstream/BUILD
@@ -1,25 +1,30 @@
 load("//build_tools/bazel:kelvin.bzl", "generate_cc_arrays", "kelvin_binary")
+
 package(default_visibility = ["//visibility:public"])
 
 kelvin_binary(
     name = "soundstream",
     srcs = [
-        "soundstream.cc",
+        "best_of_times_s16_decoded.cc",
+        "best_of_times_s16_encoded.cc",
         "best_of_times_s16_wav.cc",
         "decoder_non_stream_q16x8_b64_io_int16_tflite.cc",
         "encoder_non_stream_q16x8_b64_io_int16_tflite.cc",
+        "soundstream.cc",
     ],
     hdrs = [
+        "best_of_times_s16_decoded.h",
+        "best_of_times_s16_encoded.h",
         "best_of_times_s16_wav.h",
         "decoder_non_stream_q16x8_b64_io_int16_tflite.h",
         "encoder_non_stream_q16x8_b64_io_int16_tflite.h",
     ],
+    tags = ["manual"],
     deps = [
         "//crt:crt_header",
         "@tflite-micro//tensorflow/lite/micro:micro_framework",
         "@tflite-micro//tensorflow/lite/micro:system_setup",
     ],
-    tags = ["manual"],
 )
 
 generate_cc_arrays(
@@ -28,6 +33,7 @@
     out = "decoder_non_stream_q16x8_b64_io_int16_tflite.cc",
     tags = ["manual"],
 )
+
 generate_cc_arrays(
     name = "decoder_non_stream_q16x8_b64_io_int16_tflite_h",
     src = "@ml-models//:quant_models/_decoder_non_stream_q16x8_b64_io_int16.tflite",
@@ -41,6 +47,7 @@
     out = "encoder_non_stream_q16x8_b64_io_int16_tflite.cc",
     tags = ["manual"],
 )
+
 generate_cc_arrays(
     name = "encoder_non_stream_q16x8_b64_io_int16_tflite_h",
     src = "@ml-models//:quant_models/_encoder_non_stream_q16x8_b64_io_int16.tflite",
@@ -53,8 +60,33 @@
     src = "best_of_times_s16.wav",
     out = "best_of_times_s16_wav.cc",
 )
+
 generate_cc_arrays(
     name = "best_of_times_s16_wav_h",
     src = "best_of_times_s16.wav",
     out = "best_of_times_s16_wav.h",
 )
+
+generate_cc_arrays(
+    name = "best_of_times_s16_encoded_cc",
+    src = "best_of_times_s16_encoded.raw",
+    out = "best_of_times_s16_encoded.cc",
+)
+
+generate_cc_arrays(
+    name = "best_of_times_s16_encoded_h",
+    src = "best_of_times_s16_encoded.raw",
+    out = "best_of_times_s16_encoded.h",
+)
+
+generate_cc_arrays(
+    name = "best_of_times_s16_decoded_cc",
+    src = "best_of_times_s16_decoded.raw",
+    out = "best_of_times_s16_decoded.cc",
+)
+
+generate_cc_arrays(
+    name = "best_of_times_s16_decoded_h",
+    src = "best_of_times_s16_decoded.raw",
+    out = "best_of_times_s16_decoded.h",
+)
diff --git a/examples/tflm/soundstream/best_of_times_s16_decoded.raw b/examples/tflm/soundstream/best_of_times_s16_decoded.raw
new file mode 100644
index 0000000..9ce7cd9
--- /dev/null
+++ b/examples/tflm/soundstream/best_of_times_s16_decoded.raw
Binary files differ
diff --git a/examples/tflm/soundstream/best_of_times_s16_encoded.raw b/examples/tflm/soundstream/best_of_times_s16_encoded.raw
new file mode 100644
index 0000000..94c79da
--- /dev/null
+++ b/examples/tflm/soundstream/best_of_times_s16_encoded.raw
Binary files differ
diff --git a/examples/tflm/soundstream/soundstream.cc b/examples/tflm/soundstream/soundstream.cc
index b2327fd..7f500a9 100644
--- a/examples/tflm/soundstream/soundstream.cc
+++ b/examples/tflm/soundstream/soundstream.cc
@@ -2,6 +2,8 @@
 // Licensed under the Apache License, Version 2.0, see LICENSE for details.
 // SPDX-License-Identifier: Apache-2.0
 
+#include "examples/tflm/soundstream/best_of_times_s16_decoded.h"
+#include "examples/tflm/soundstream/best_of_times_s16_encoded.h"
 #include "examples/tflm/soundstream/best_of_times_s16_wav.h"
 #include "examples/tflm/soundstream/decoder_non_stream_q16x8_b64_io_int16_tflite.h"
 #include "examples/tflm/soundstream/encoder_non_stream_q16x8_b64_io_int16_tflite.h"
@@ -73,9 +75,12 @@
 
   TfLiteTensor *encoder_input = encoder_interpreter->input(0);
   TfLiteTensor *encoder_output = encoder_interpreter->output(0);
+  TfLiteTensor *decoder_input = decoder_interpreter->input(0);
+  TfLiteTensor *decoder_output = decoder_interpreter->output(0);
 
   int invocation_count =
-      g_best_of_times_s16_audio_data_size / encoder_input->bytes;
+      (g_best_of_times_s16_audio_data_size * sizeof(int16_t)) /
+      encoder_input->bytes;
   for (int i = 0; i < invocation_count; ++i) {
     MicroPrintf("Invocation %d of %d", i, invocation_count);
     memcpy(encoder_input->data.uint8,
@@ -87,8 +92,13 @@
       MicroPrintf("Failed to invoke encoder");
       return -1;
     }
+    if (memcmp(encoder_output->data.uint8,
+               g_best_of_times_s16_encoded + (i * encoder_output->bytes),
+               encoder_output->bytes)) {
+      MicroPrintf("Encoder output mismatches reference");
+      return -1;
+    }
 
-    TfLiteTensor *decoder_input = decoder_interpreter->input(0);
     memcpy(decoder_input->data.uint8, encoder_output->data.uint8,
            decoder_input->bytes);
     invoke_status = decoder_interpreter->Invoke();
@@ -96,6 +106,12 @@
       MicroPrintf("Failed to invoke decoder");
       return -1;
     }
+    if (memcmp(decoder_output->data.uint8,
+               g_best_of_times_s16_decoded + (i * decoder_output->bytes),
+               decoder_output->bytes)) {
+      MicroPrintf("Decoder output mismatches reference");
+      return -1;
+    }
   }
 
   return 0;