Add reference outputs for soundstream
- Reference outputs for our input data, for both encoder and decoder
side. Comparison against them is done after invoking the model.
Change-Id: I2346b0770dcbc2bfbcc0392c68404b4914e8300b
diff --git a/examples/tflm/soundstream/BUILD b/examples/tflm/soundstream/BUILD
index 1188d98..0b79a83 100644
--- a/examples/tflm/soundstream/BUILD
+++ b/examples/tflm/soundstream/BUILD
@@ -1,25 +1,30 @@
load("//build_tools/bazel:kelvin.bzl", "generate_cc_arrays", "kelvin_binary")
+
package(default_visibility = ["//visibility:public"])
kelvin_binary(
name = "soundstream",
srcs = [
- "soundstream.cc",
+ "best_of_times_s16_decoded.cc",
+ "best_of_times_s16_encoded.cc",
"best_of_times_s16_wav.cc",
"decoder_non_stream_q16x8_b64_io_int16_tflite.cc",
"encoder_non_stream_q16x8_b64_io_int16_tflite.cc",
+ "soundstream.cc",
],
hdrs = [
+ "best_of_times_s16_decoded.h",
+ "best_of_times_s16_encoded.h",
"best_of_times_s16_wav.h",
"decoder_non_stream_q16x8_b64_io_int16_tflite.h",
"encoder_non_stream_q16x8_b64_io_int16_tflite.h",
],
+ tags = ["manual"],
deps = [
"//crt:crt_header",
"@tflite-micro//tensorflow/lite/micro:micro_framework",
"@tflite-micro//tensorflow/lite/micro:system_setup",
],
- tags = ["manual"],
)
generate_cc_arrays(
@@ -28,6 +33,7 @@
out = "decoder_non_stream_q16x8_b64_io_int16_tflite.cc",
tags = ["manual"],
)
+
generate_cc_arrays(
name = "decoder_non_stream_q16x8_b64_io_int16_tflite_h",
src = "@ml-models//:quant_models/_decoder_non_stream_q16x8_b64_io_int16.tflite",
@@ -41,6 +47,7 @@
out = "encoder_non_stream_q16x8_b64_io_int16_tflite.cc",
tags = ["manual"],
)
+
generate_cc_arrays(
name = "encoder_non_stream_q16x8_b64_io_int16_tflite_h",
src = "@ml-models//:quant_models/_encoder_non_stream_q16x8_b64_io_int16.tflite",
@@ -53,8 +60,33 @@
src = "best_of_times_s16.wav",
out = "best_of_times_s16_wav.cc",
)
+
generate_cc_arrays(
name = "best_of_times_s16_wav_h",
src = "best_of_times_s16.wav",
out = "best_of_times_s16_wav.h",
)
+
+generate_cc_arrays(
+ name = "best_of_times_s16_encoded_cc",
+ src = "best_of_times_s16_encoded.raw",
+ out = "best_of_times_s16_encoded.cc",
+)
+
+generate_cc_arrays(
+ name = "best_of_times_s16_encoded_h",
+ src = "best_of_times_s16_encoded.raw",
+ out = "best_of_times_s16_encoded.h",
+)
+
+generate_cc_arrays(
+ name = "best_of_times_s16_decoded_cc",
+ src = "best_of_times_s16_decoded.raw",
+ out = "best_of_times_s16_decoded.cc",
+)
+
+generate_cc_arrays(
+ name = "best_of_times_s16_decoded_h",
+ src = "best_of_times_s16_decoded.raw",
+ out = "best_of_times_s16_decoded.h",
+)
diff --git a/examples/tflm/soundstream/best_of_times_s16_decoded.raw b/examples/tflm/soundstream/best_of_times_s16_decoded.raw
new file mode 100644
index 0000000..9ce7cd9
--- /dev/null
+++ b/examples/tflm/soundstream/best_of_times_s16_decoded.raw
Binary files differ
diff --git a/examples/tflm/soundstream/best_of_times_s16_encoded.raw b/examples/tflm/soundstream/best_of_times_s16_encoded.raw
new file mode 100644
index 0000000..94c79da
--- /dev/null
+++ b/examples/tflm/soundstream/best_of_times_s16_encoded.raw
Binary files differ
diff --git a/examples/tflm/soundstream/soundstream.cc b/examples/tflm/soundstream/soundstream.cc
index b2327fd..7f500a9 100644
--- a/examples/tflm/soundstream/soundstream.cc
+++ b/examples/tflm/soundstream/soundstream.cc
@@ -2,6 +2,8 @@
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
+#include "examples/tflm/soundstream/best_of_times_s16_decoded.h"
+#include "examples/tflm/soundstream/best_of_times_s16_encoded.h"
#include "examples/tflm/soundstream/best_of_times_s16_wav.h"
#include "examples/tflm/soundstream/decoder_non_stream_q16x8_b64_io_int16_tflite.h"
#include "examples/tflm/soundstream/encoder_non_stream_q16x8_b64_io_int16_tflite.h"
@@ -73,9 +75,12 @@
TfLiteTensor *encoder_input = encoder_interpreter->input(0);
TfLiteTensor *encoder_output = encoder_interpreter->output(0);
+ TfLiteTensor *decoder_input = decoder_interpreter->input(0);
+ TfLiteTensor *decoder_output = decoder_interpreter->output(0);
int invocation_count =
- g_best_of_times_s16_audio_data_size / encoder_input->bytes;
+ (g_best_of_times_s16_audio_data_size * sizeof(int16_t)) /
+ encoder_input->bytes;
for (int i = 0; i < invocation_count; ++i) {
MicroPrintf("Invocation %d of %d", i, invocation_count);
memcpy(encoder_input->data.uint8,
@@ -87,8 +92,13 @@
MicroPrintf("Failed to invoke encoder");
return -1;
}
+ if (memcmp(encoder_output->data.uint8,
+ g_best_of_times_s16_encoded + (i * encoder_output->bytes),
+ encoder_output->bytes)) {
+ MicroPrintf("Encoder output mismatches reference");
+ return -1;
+ }
- TfLiteTensor *decoder_input = decoder_interpreter->input(0);
memcpy(decoder_input->data.uint8, encoder_output->data.uint8,
decoder_input->bytes);
invoke_status = decoder_interpreter->Invoke();
@@ -96,6 +106,12 @@
MicroPrintf("Failed to invoke decoder");
return -1;
}
+ if (memcmp(decoder_output->data.uint8,
+ g_best_of_times_s16_decoded + (i * decoder_output->bytes),
+ decoder_output->bytes)) {
+ MicroPrintf("Decoder output mismatches reference");
+ return -1;
+ }
}
return 0;