Add support for SoundStream "streaming" version
- Extend the soundstream library to let the consumer select streaming or
non-streaming variants of soundstream at construction time.
- Add build targets for streaming variants of e2e/encoder/decoder
samples (and new reference outputs, in the e2e case).
Change-Id: I45aedc11ada72390974a50f149fd08aac1ece071
diff --git a/examples/tflm/soundstream/BUILD b/examples/tflm/soundstream/BUILD
index b3a41f3..2cb2d2d 100644
--- a/examples/tflm/soundstream/BUILD
+++ b/examples/tflm/soundstream/BUILD
@@ -6,11 +6,15 @@
name = "soundstream",
srcs = [
"decoder.cc",
- "encoder.cc",
"decoder_non_stream_q16x8_b64_io_int16_tflite.cc",
- "encoder_non_stream_q16x8_b64_io_int16_tflite.cc",
"decoder_non_stream_q16x8_b64_io_int16_tflite.h",
+ "decoder_streaming_q16x8_b64_io_int16_tflite.cc",
+ "decoder_streaming_q16x8_b64_io_int16_tflite.h",
+ "encoder.cc",
+ "encoder_non_stream_q16x8_b64_io_int16_tflite.cc",
"encoder_non_stream_q16x8_b64_io_int16_tflite.h",
+ "encoder_streaming_q16x8_b64_io_int16_tflite.cc",
+ "encoder_streaming_q16x8_b64_io_int16_tflite.h",
],
hdrs = [
"decoder.h",
@@ -19,11 +23,12 @@
tags = ["manual"],
deps = [
"@tflite-micro//tensorflow/lite/micro:micro_framework",
+ "@tflite-micro//tensorflow/lite/micro:recording_allocators",
],
)
kelvin_binary(
- name = "soundstream_decoder",
+ name = "soundstream_decoder_non_streaming",
srcs = [
"soundstream_decoder.cc",
],
@@ -31,13 +36,24 @@
deps = [
":soundstream",
"//crt:crt_header",
- "@tflite-micro//tensorflow/lite/micro:micro_framework",
- "@tflite-micro//tensorflow/lite/micro:system_setup",
],
)
kelvin_binary(
- name = "soundstream_encoder",
+ name = "soundstream_decoder_streaming",
+ srcs = [
+ "soundstream_decoder.cc",
+ ],
+ copts = ["-DSTREAMING"],
+ tags = ["manual"],
+ deps = [
+ ":soundstream",
+ "//crt:crt_header",
+ ],
+)
+
+kelvin_binary(
+ name = "soundstream_encoder_non_streaming",
srcs = [
"soundstream_encoder.cc",
],
@@ -45,13 +61,24 @@
deps = [
":soundstream",
"//crt:crt_header",
- "@tflite-micro//tensorflow/lite/micro:micro_framework",
- "@tflite-micro//tensorflow/lite/micro:system_setup",
],
)
kelvin_binary(
- name = "soundstream_e2e",
+ name = "soundstream_encoder_streaming",
+ srcs = [
+ "soundstream_encoder.cc",
+ ],
+ copts = ["-DSTREAMING"],
+ tags = ["manual"],
+ deps = [
+ ":soundstream",
+ "//crt:crt_header",
+ ],
+)
+
+kelvin_binary(
+ name = "soundstream_e2e_non_streaming",
srcs = [
"best_of_times_s16_decoded.cc",
"best_of_times_s16_encoded.cc",
@@ -67,8 +94,27 @@
deps = [
":soundstream",
"//crt:crt_header",
- "@tflite-micro//tensorflow/lite/micro:micro_framework",
- "@tflite-micro//tensorflow/lite/micro:system_setup",
+ ],
+)
+
+kelvin_binary(
+ name = "soundstream_e2e_streaming",
+ srcs = [
+ "best_of_times_s16_decoded_streaming.cc",
+ "best_of_times_s16_encoded_streaming.cc",
+ "best_of_times_s16_wav.cc",
+ "soundstream_e2e.cc",
+ ],
+ hdrs = [
+ "best_of_times_s16_decoded_streaming.h",
+ "best_of_times_s16_encoded_streaming.h",
+ "best_of_times_s16_wav.h",
+ ],
+ copts = ["-DSTREAMING"],
+ tags = ["manual"],
+ deps = [
+ ":soundstream",
+ "//crt:crt_header",
],
)
@@ -101,6 +147,34 @@
)
generate_cc_arrays(
+ name = "decoder_streaming_q16x8_b64_io_int16_tflite_cc",
+ src = "@ml-models//:quant_models/_decoder_streaming_q16x8_b64_io_int16.tflite",
+ out = "decoder_streaming_q16x8_b64_io_int16_tflite.cc",
+ tags = ["manual"],
+)
+
+generate_cc_arrays(
+ name = "decoder_streaming_q16x8_b64_io_int16_tflite_h",
+ src = "@ml-models//:quant_models/_decoder_streaming_q16x8_b64_io_int16.tflite",
+ out = "decoder_streaming_q16x8_b64_io_int16_tflite.h",
+ tags = ["manual"],
+)
+
+generate_cc_arrays(
+ name = "encoder_streaming_q16x8_b64_io_int16_tflite_cc",
+ src = "@ml-models//:quant_models/_encoder_streaming_q16x8_b64_io_int16.tflite",
+ out = "encoder_streaming_q16x8_b64_io_int16_tflite.cc",
+ tags = ["manual"],
+)
+
+generate_cc_arrays(
+ name = "encoder_streaming_q16x8_b64_io_int16_tflite_h",
+ src = "@ml-models//:quant_models/_encoder_streaming_q16x8_b64_io_int16.tflite",
+ out = "encoder_streaming_q16x8_b64_io_int16_tflite.h",
+ tags = ["manual"],
+)
+
+generate_cc_arrays(
name = "best_of_times_s16_wav_cc",
src = "best_of_times_s16.wav",
out = "best_of_times_s16_wav.cc",
@@ -135,3 +209,27 @@
src = "best_of_times_s16_decoded.raw",
out = "best_of_times_s16_decoded.h",
)
+
+generate_cc_arrays(
+ name = "best_of_times_s16_encoded_streaming_cc",
+ src = "best_of_times_s16_encoded_streaming.raw",
+ out = "best_of_times_s16_encoded_streaming.cc",
+)
+
+generate_cc_arrays(
+ name = "best_of_times_s16_encoded_streaming_h",
+ src = "best_of_times_s16_encoded_streaming.raw",
+ out = "best_of_times_s16_encoded_streaming.h",
+)
+
+generate_cc_arrays(
+ name = "best_of_times_s16_decoded_streaming_cc",
+ src = "best_of_times_s16_decoded_streaming.raw",
+ out = "best_of_times_s16_decoded_streaming.cc",
+)
+
+generate_cc_arrays(
+ name = "best_of_times_s16_decoded_streaming_h",
+ src = "best_of_times_s16_decoded_streaming.raw",
+ out = "best_of_times_s16_decoded_streaming.h",
+)
diff --git a/examples/tflm/soundstream/best_of_times_s16_decoded_streaming.raw b/examples/tflm/soundstream/best_of_times_s16_decoded_streaming.raw
new file mode 100644
index 0000000..91f655e
--- /dev/null
+++ b/examples/tflm/soundstream/best_of_times_s16_decoded_streaming.raw
Binary files differ
diff --git a/examples/tflm/soundstream/best_of_times_s16_encoded_streaming.raw b/examples/tflm/soundstream/best_of_times_s16_encoded_streaming.raw
new file mode 100644
index 0000000..7956e3c
--- /dev/null
+++ b/examples/tflm/soundstream/best_of_times_s16_encoded_streaming.raw
Binary files differ
diff --git a/examples/tflm/soundstream/decoder.cc b/examples/tflm/soundstream/decoder.cc
index 2a0fc4c..6aa939a 100644
--- a/examples/tflm/soundstream/decoder.cc
+++ b/examples/tflm/soundstream/decoder.cc
@@ -1,37 +1,96 @@
+// Copyright 2023 Google LLC
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
#include "examples/tflm/soundstream/decoder.h"
#include "examples/tflm/soundstream/decoder_non_stream_q16x8_b64_io_int16_tflite.h"
+#include "examples/tflm/soundstream/decoder_streaming_q16x8_b64_io_int16_tflite.h"
namespace kelvin::soundstream::decoder {
-std::optional<Decoder> Setup(uint8_t* tensor_arena) {
- auto* model =
- tflite::GetModel(g__decoder_non_stream_q16x8_b64_io_int16_model_data);
- if (model->version() != TFLITE_SCHEMA_VERSION) {
- return {};
+
+constexpr unsigned int kNonStreamingOpCount = 11;
+constexpr unsigned int kStreamingOpCount = 16;
+// Not sure how to get a good upper bound on this one, so arbitrarily chosen.
+constexpr unsigned int kStreamingVariablesCount = 40;
+
+template <bool kStreaming>
+class DecoderImpl : public Decoder {
+ public:
+ static Decoder* Setup(const uint8_t* model_data, uint8_t* tensor_arena,
+ size_t tensor_arena_size) {
+ auto* model = tflite::GetModel(model_data);
+ if (model->version() != TFLITE_SCHEMA_VERSION) {
+ return nullptr;
+ }
+
+ DecoderImpl* d = new DecoderImpl(model, tensor_arena, tensor_arena_size);
+
+ TfLiteStatus allocate_status = d->interpreter()->AllocateTensors();
+ if (allocate_status != kTfLiteOk) {
+ MicroPrintf("Failed to allocate decoder's tensors");
+ return nullptr;
+ }
+ return d;
}
+ tflite::MicroInterpreter* interpreter() { return &interpreter_; }
- Decoder d;
- d.resolver = std::make_unique<tflite::MicroMutableOpResolver<11>>();
- d.resolver->AddReshape();
- d.resolver->AddPad();
- d.resolver->AddConv2D();
- d.resolver->AddLeakyRelu();
- d.resolver->AddSplit();
- d.resolver->AddTransposeConv();
- d.resolver->AddStridedSlice();
- d.resolver->AddConcatenation();
- d.resolver->AddDepthwiseConv2D();
- d.resolver->AddAdd();
- d.resolver->AddQuantize();
+ private:
+ DecoderImpl(const tflite::Model* model, uint8_t* tensor_arena,
+ size_t tensor_arena_size)
+ : resolver_(CreateResolver()),
+ allocator_(tflite::RecordingMicroAllocator::Create(tensor_arena,
+ tensor_arena_size)),
+ variables_(tflite::MicroResourceVariables::Create(
+ allocator_.get(), kStreamingVariablesCount)),
+ interpreter_(model, resolver_, allocator_.get(), variables_.get()) {}
- d.interpreter = std::make_unique<tflite::MicroInterpreter>(
- model, *d.resolver, tensor_arena, kTensorArenaSizeBytes);
-
- TfLiteStatus allocate_status = d.interpreter->AllocateTensors();
- if (allocate_status != kTfLiteOk) {
- MicroPrintf("Failed to allocate decoder's tensors");
- return {};
+ static constexpr int kOpCount =
+ kStreaming ? kStreamingOpCount : kStreamingOpCount;
+ static inline tflite::MicroMutableOpResolver<kOpCount> CreateResolver() {
+ tflite::MicroMutableOpResolver<kOpCount> resolver;
+ resolver.AddReshape();
+ resolver.AddPad();
+ resolver.AddConv2D();
+ resolver.AddLeakyRelu();
+ resolver.AddSplit();
+ resolver.AddTransposeConv();
+ resolver.AddStridedSlice();
+ resolver.AddConcatenation();
+ resolver.AddDepthwiseConv2D();
+ resolver.AddAdd();
+ resolver.AddQuantize();
+ if (kStreaming) {
+ resolver.AddCallOnce();
+ resolver.AddVarHandle();
+ resolver.AddReadVariable();
+ resolver.AddAssignVariable();
+ resolver.AddSub();
+ }
+ return resolver;
}
- return d;
+ const tflite::MicroMutableOpResolver<kOpCount> resolver_;
+ // Created in the arena
+ std::unique_ptr<tflite::RecordingMicroAllocator> allocator_;
+ // Created in the arena
+ std::unique_ptr<tflite::MicroResourceVariables> variables_;
+ tflite::MicroInterpreter interpreter_;
+};
+
+// Two separate methods to construct streaming vs non-streaming, so that the
+// compiler can eliminate one if it's unused. Perhaps with LTO we could combine
+// them together.
+std::unique_ptr<Decoder> SetupStreaming(uint8_t* tensor_arena,
+ size_t tensor_arena_size) {
+ return std::unique_ptr<Decoder>(DecoderImpl<true>::Setup(
+ g__decoder_streaming_q16x8_b64_io_int16_model_data, tensor_arena,
+ tensor_arena_size));
}
+std::unique_ptr<Decoder> Setup(uint8_t* tensor_arena,
+ size_t tensor_arena_size) {
+ return std::unique_ptr<Decoder>(DecoderImpl<false>::Setup(
+ g__decoder_non_stream_q16x8_b64_io_int16_model_data, tensor_arena,
+ tensor_arena_size));
+}
+
} // namespace kelvin::soundstream::decoder
diff --git a/examples/tflm/soundstream/decoder.h b/examples/tflm/soundstream/decoder.h
index b21c93d..337f402 100644
--- a/examples/tflm/soundstream/decoder.h
+++ b/examples/tflm/soundstream/decoder.h
@@ -1,20 +1,31 @@
+// Copyright 2023 Google LLC
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
#ifndef EXAMPLES_TFLM_SOUNDSTREAM_DECODER_H_
#define EXAMPLES_TFLM_SOUNDSTREAM_DECODER_H_
#include <cstddef>
#include <memory>
-#include <optional>
#include "tensorflow/lite/micro/micro_interpreter.h"
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
+#include "tensorflow/lite/micro/recording_micro_allocator.h"
namespace kelvin::soundstream::decoder {
+// RecordingMicroAllocator on desktop recorded 94512 bytes of allocation.
constexpr size_t kTensorArenaSizeBytes = 96 * 1024;
-struct Decoder {
- std::unique_ptr<tflite::MicroInterpreter> interpreter;
- std::unique_ptr<tflite::MicroMutableOpResolver<11>> resolver;
+// RecordingMicroAllocator on desktop recorded 143296 bytes of allocation.
+constexpr size_t kTensorArenaStreamingSizeBytes = 168 * 1024;
+
+class Decoder {
+ public:
+ virtual tflite::MicroInterpreter* interpreter() = 0;
};
-std::optional<Decoder> Setup(uint8_t* tensor_arena);
+
+std::unique_ptr<Decoder> Setup(uint8_t* tensor_arena, size_t tensor_arena_size);
+std::unique_ptr<Decoder> SetupStreaming(uint8_t* tensor_arena,
+ size_t tensor_arena_size);
} // namespace kelvin::soundstream::decoder
#endif // EXAMPLES_TFLM_SOUNDSTREAM_DECODER_H_
diff --git a/examples/tflm/soundstream/encoder.cc b/examples/tflm/soundstream/encoder.cc
index d31d517..6d3c985 100644
--- a/examples/tflm/soundstream/encoder.cc
+++ b/examples/tflm/soundstream/encoder.cc
@@ -1,32 +1,95 @@
+// Copyright 2023 Google LLC
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
#include "examples/tflm/soundstream/encoder.h"
#include "examples/tflm/soundstream/encoder_non_stream_q16x8_b64_io_int16_tflite.h"
+#include "examples/tflm/soundstream/encoder_streaming_q16x8_b64_io_int16_tflite.h"
+#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
+#include "tensorflow/lite/micro/recording_micro_allocator.h"
namespace kelvin::soundstream::encoder {
-std::optional<Encoder> Setup(uint8_t* tensor_arena) {
- auto* model =
- tflite::GetModel(g__encoder_non_stream_q16x8_b64_io_int16_model_data);
- if (model->version() != TFLITE_SCHEMA_VERSION) {
- return {};
+
+constexpr unsigned int kNonStreamingOpCount = 6;
+constexpr unsigned int kStreamingOpCount = 13;
+// Not sure how to get a good upper bound on this one, so arbitrarily chosen.
+constexpr unsigned int kStreamingVariablesCount = 40;
+
+template <bool kStreaming>
+class EncoderImpl : public Encoder {
+ public:
+ static Encoder* Setup(const uint8_t* model_data, uint8_t* tensor_arena,
+ size_t tensor_arena_size) {
+ auto* model = tflite::GetModel(model_data);
+ if (model->version() != TFLITE_SCHEMA_VERSION) {
+ return nullptr;
+ }
+
+ EncoderImpl* e = new EncoderImpl(model, tensor_arena, tensor_arena_size);
+
+ TfLiteStatus allocate_status = e->interpreter()->AllocateTensors();
+ if (allocate_status != kTfLiteOk) {
+ MicroPrintf("Failed to allocate decoder's tensors");
+ return nullptr;
+ }
+ return e;
}
+ tflite::MicroInterpreter* interpreter() { return &interpreter_; }
- Encoder e;
- e.resolver = std::make_unique<tflite::MicroMutableOpResolver<6>>();
- e.resolver->AddReshape();
- e.resolver->AddPad();
- e.resolver->AddConv2D();
- e.resolver->AddLeakyRelu();
- e.resolver->AddDepthwiseConv2D();
- e.resolver->AddAdd();
+ private:
+ EncoderImpl(const tflite::Model* model, uint8_t* tensor_arena,
+ size_t tensor_arena_size)
+ : resolver_(CreateResolver()),
+ allocator_(tflite::RecordingMicroAllocator::Create(tensor_arena,
+ tensor_arena_size)),
+ variables_(tflite::MicroResourceVariables::Create(
+ allocator_.get(), kStreamingVariablesCount)),
+ interpreter_(model, resolver_, allocator_.get(), variables_.get()) {}
- e.interpreter = std::make_unique<tflite::MicroInterpreter>(
- model, *e.resolver, tensor_arena, kTensorArenaSizeBytes);
-
- TfLiteStatus allocate_status = e.interpreter->AllocateTensors();
- if (allocate_status != kTfLiteOk) {
- MicroPrintf("Failed to allocate encoder's tensors");
- return {};
+ static constexpr int kOpCount =
+ kStreaming ? kStreamingOpCount : kStreamingOpCount;
+ static inline tflite::MicroMutableOpResolver<kOpCount> CreateResolver() {
+ tflite::MicroMutableOpResolver<kOpCount> resolver;
+ resolver.AddReshape();
+ resolver.AddPad();
+ resolver.AddConv2D();
+ resolver.AddLeakyRelu();
+ resolver.AddDepthwiseConv2D();
+ resolver.AddAdd();
+ if (kStreaming) {
+ resolver.AddCallOnce();
+ resolver.AddVarHandle();
+ resolver.AddReadVariable();
+ resolver.AddConcatenation();
+ resolver.AddStridedSlice();
+ resolver.AddAssignVariable();
+ resolver.AddQuantize();
+ }
+ return resolver;
}
- return e;
+ const tflite::MicroMutableOpResolver<kOpCount> resolver_;
+ // Created in the arena
+ std::unique_ptr<tflite::RecordingMicroAllocator> allocator_;
+ // Created in the arena
+ std::unique_ptr<tflite::MicroResourceVariables> variables_;
+ tflite::MicroInterpreter interpreter_;
+};
+
+// Two separate methods to construct streaming vs non-streaming, so that the
+// compiler can eliminate one if it's unused. Perhaps with LTO we could combine
+// them together.
+std::unique_ptr<Encoder> SetupStreaming(uint8_t* tensor_arena,
+ size_t tensor_arena_size) {
+ return std::unique_ptr<Encoder>(EncoderImpl<true>::Setup(
+ g__encoder_streaming_q16x8_b64_io_int16_model_data, tensor_arena,
+ tensor_arena_size));
}
+std::unique_ptr<Encoder> Setup(uint8_t* tensor_arena,
+ size_t tensor_arena_size) {
+ return std::unique_ptr<Encoder>(EncoderImpl<false>::Setup(
+ g__encoder_non_stream_q16x8_b64_io_int16_model_data, tensor_arena,
+ tensor_arena_size));
+}
+
} // namespace kelvin::soundstream::encoder
diff --git a/examples/tflm/soundstream/encoder.h b/examples/tflm/soundstream/encoder.h
index 3cb74f9..267d553 100644
--- a/examples/tflm/soundstream/encoder.h
+++ b/examples/tflm/soundstream/encoder.h
@@ -1,20 +1,28 @@
+// Copyright 2023 Google LLC
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
#ifndef EXAMPLES_TFLM_SOUNDSTREAM_ENCODER_H_
#define EXAMPLES_TFLM_SOUNDSTREAM_ENCODER_H_
#include <cstddef>
#include <memory>
-#include <optional>
#include "tensorflow/lite/micro/micro_interpreter.h"
-#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
namespace kelvin::soundstream::encoder {
+// RecordingMicroAllocator on desktop recorded 90064 bytes of allocation.
constexpr size_t kTensorArenaSizeBytes = 96 * 1024;
+// RecordingMicroAllocator on desktop recorded 147328 bytes of allocation.
+constexpr size_t kTensorArenaStreamingSizeBytes = 168 * 1024;
+
struct Encoder {
- std::unique_ptr<tflite::MicroInterpreter> interpreter;
- std::unique_ptr<tflite::MicroMutableOpResolver<6>> resolver;
+ virtual tflite::MicroInterpreter* interpreter() = 0;
};
-std::optional<Encoder> Setup(uint8_t* tensor_arena);
+
+std::unique_ptr<Encoder> Setup(uint8_t* tensor_arena, size_t tensor_arena_size);
+std::unique_ptr<Encoder> SetupStreaming(uint8_t* tensor_arena,
+ size_t tensor_arena_size);
} // namespace kelvin::soundstream::encoder
#endif // EXAMPLES_TFLM_SOUNDSTREAM_ENCODER_H_
diff --git a/examples/tflm/soundstream/soundstream_decoder.cc b/examples/tflm/soundstream/soundstream_decoder.cc
index 77f0f94..9a5a739 100644
--- a/examples/tflm/soundstream/soundstream_decoder.cc
+++ b/examples/tflm/soundstream/soundstream_decoder.cc
@@ -16,23 +16,34 @@
};
namespace {
-uint8_t
- decoder_tensor_arena[kelvin::soundstream::decoder::kTensorArenaSizeBytes]
- __attribute__((aligned(64)));
+#if defined(STREAMING)
+constexpr size_t tensor_arena_size =
+ kelvin::soundstream::decoder::kTensorArenaStreamingSizeBytes;
+#else
+constexpr size_t tensor_arena_size =
+ kelvin::soundstream::decoder::kTensorArenaSizeBytes;
+#endif
+uint8_t decoder_tensor_arena[tensor_arena_size] __attribute__((aligned(64)));
} // namespace
int main(int argc, char **argv) {
- auto decoder = kelvin::soundstream::decoder::Setup(decoder_tensor_arena);
+#if defined(STREAMING)
+ auto decoder = kelvin::soundstream::decoder::SetupStreaming(
+ decoder_tensor_arena, tensor_arena_size);
+#else
+ auto decoder = kelvin::soundstream::decoder::Setup(decoder_tensor_arena,
+ tensor_arena_size);
+#endif
if (!decoder) {
MicroPrintf("Unable to construct decoder");
return -1;
}
- TfLiteTensor *decoder_input = decoder->interpreter->input(0);
- TfLiteTensor *decoder_output = decoder->interpreter->output(0);
+ TfLiteTensor *decoder_input = decoder->interpreter()->input(0);
+ TfLiteTensor *decoder_output = decoder->interpreter()->output(0);
memset(decoder_input->data.uint8, 0, decoder_input->bytes);
- TfLiteStatus invoke_status = decoder->interpreter->Invoke();
+ TfLiteStatus invoke_status = decoder->interpreter()->Invoke();
if (invoke_status != kTfLiteOk) {
MicroPrintf("Failed to invoke decoder");
return -1;
diff --git a/examples/tflm/soundstream/soundstream_e2e.cc b/examples/tflm/soundstream/soundstream_e2e.cc
index 4c43060..9ba8913 100644
--- a/examples/tflm/soundstream/soundstream_e2e.cc
+++ b/examples/tflm/soundstream/soundstream_e2e.cc
@@ -2,38 +2,69 @@
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
-#include "examples/tflm/soundstream/best_of_times_s16_decoded.h"
-#include "examples/tflm/soundstream/best_of_times_s16_encoded.h"
#include "examples/tflm/soundstream/best_of_times_s16_wav.h"
#include "examples/tflm/soundstream/decoder.h"
#include "examples/tflm/soundstream/encoder.h"
+#if defined(STREAMING)
+#include "examples/tflm/soundstream/best_of_times_s16_decoded_streaming.h"
+#include "examples/tflm/soundstream/best_of_times_s16_encoded_streaming.h"
+const unsigned char *reference_decoded = g_best_of_times_s16_decoded_streaming;
+const unsigned char *reference_encoded = g_best_of_times_s16_encoded_streaming;
+#else
+#include "examples/tflm/soundstream/best_of_times_s16_decoded.h"
+#include "examples/tflm/soundstream/best_of_times_s16_encoded.h"
+const unsigned char *reference_decoded = g_best_of_times_s16_decoded;
+const unsigned char *reference_encoded = g_best_of_times_s16_encoded;
+#endif
+
namespace {
-uint8_t
- encoder_tensor_arena[kelvin::soundstream::encoder::kTensorArenaSizeBytes]
+#if defined(STREAMING)
+constexpr size_t decoder_tensor_arena_size =
+ kelvin::soundstream::decoder::kTensorArenaStreamingSizeBytes;
+constexpr size_t encoder_tensor_arena_size =
+ kelvin::soundstream::encoder::kTensorArenaStreamingSizeBytes;
+#else
+constexpr size_t decoder_tensor_arena_size =
+ kelvin::soundstream::decoder::kTensorArenaSizeBytes;
+constexpr size_t encoder_tensor_arena_size =
+ kelvin::soundstream::encoder::kTensorArenaSizeBytes;
+#endif
+uint8_t encoder_tensor_arena[encoder_tensor_arena_size]
__attribute__((aligned(64)));
-uint8_t
- decoder_tensor_arena[kelvin::soundstream::decoder::kTensorArenaSizeBytes]
+uint8_t decoder_tensor_arena[decoder_tensor_arena_size]
__attribute__((aligned(64)));
} // namespace
int main(int argc, char **argv) {
- auto encoder = kelvin::soundstream::encoder::Setup(encoder_tensor_arena);
+#if defined(STREAMING)
+ auto encoder = kelvin::soundstream::encoder::SetupStreaming(
+ encoder_tensor_arena, encoder_tensor_arena_size);
+#else
+ auto encoder = kelvin::soundstream::encoder::Setup(encoder_tensor_arena,
+ encoder_tensor_arena_size);
+#endif
if (!encoder) {
MicroPrintf("Unable to construct encoder");
return -1;
}
- auto decoder = kelvin::soundstream::decoder::Setup(decoder_tensor_arena);
+#if defined(STREAMING)
+ auto decoder = kelvin::soundstream::decoder::SetupStreaming(
+ decoder_tensor_arena, decoder_tensor_arena_size);
+#else
+ auto decoder = kelvin::soundstream::decoder::Setup(decoder_tensor_arena,
+ decoder_tensor_arena_size);
+#endif
if (!decoder) {
MicroPrintf("Unable to construct decoder");
return -1;
}
- TfLiteTensor *encoder_input = encoder->interpreter->input(0);
- TfLiteTensor *encoder_output = encoder->interpreter->output(0);
- TfLiteTensor *decoder_input = decoder->interpreter->input(0);
- TfLiteTensor *decoder_output = decoder->interpreter->output(0);
+ TfLiteTensor *encoder_input = encoder->interpreter()->input(0);
+ TfLiteTensor *encoder_output = encoder->interpreter()->output(0);
+ TfLiteTensor *decoder_input = decoder->interpreter()->input(0);
+ TfLiteTensor *decoder_output = decoder->interpreter()->output(0);
int invocation_count =
(g_best_of_times_s16_audio_data_size * sizeof(int16_t)) /
@@ -44,13 +75,13 @@
g_best_of_times_s16_audio_data +
((i * encoder_input->bytes) / sizeof(int16_t)),
encoder_input->bytes);
- TfLiteStatus invoke_status = encoder->interpreter->Invoke();
+ TfLiteStatus invoke_status = encoder->interpreter()->Invoke();
if (invoke_status != kTfLiteOk) {
MicroPrintf("Failed to invoke encoder");
return -1;
}
if (memcmp(encoder_output->data.uint8,
- g_best_of_times_s16_encoded + (i * encoder_output->bytes),
+ reference_encoded + (i * encoder_output->bytes),
encoder_output->bytes)) {
MicroPrintf("Encoder output mismatches reference");
return -1;
@@ -58,13 +89,13 @@
memcpy(decoder_input->data.uint8, encoder_output->data.uint8,
decoder_input->bytes);
- invoke_status = decoder->interpreter->Invoke();
+ invoke_status = decoder->interpreter()->Invoke();
if (invoke_status != kTfLiteOk) {
MicroPrintf("Failed to invoke decoder");
return -1;
}
if (memcmp(decoder_output->data.uint8,
- g_best_of_times_s16_decoded + (i * decoder_output->bytes),
+ reference_decoded + (i * decoder_output->bytes),
decoder_output->bytes)) {
MicroPrintf("Decoder output mismatches reference");
return -1;
diff --git a/examples/tflm/soundstream/soundstream_encoder.cc b/examples/tflm/soundstream/soundstream_encoder.cc
index bc7fc25..6c60748 100644
--- a/examples/tflm/soundstream/soundstream_encoder.cc
+++ b/examples/tflm/soundstream/soundstream_encoder.cc
@@ -16,23 +16,34 @@
};
namespace {
-uint8_t
- encoder_tensor_arena[kelvin::soundstream::encoder::kTensorArenaSizeBytes]
- __attribute__((aligned(64)));
+#if defined(STREAMING)
+constexpr size_t tensor_arena_size =
+ kelvin::soundstream::encoder::kTensorArenaStreamingSizeBytes;
+#else
+constexpr size_t tensor_arena_size =
+ kelvin::soundstream::encoder::kTensorArenaSizeBytes;
+#endif
+uint8_t tensor_arena[tensor_arena_size] __attribute__((aligned(64)));
} // namespace
int main(int argc, char **argv) {
- auto encoder = kelvin::soundstream::encoder::Setup(encoder_tensor_arena);
+#if defined(STREAMING)
+ auto encoder = kelvin::soundstream::encoder::SetupStreaming(
+ tensor_arena, tensor_arena_size);
+#else
+ auto encoder =
+ kelvin::soundstream::encoder::Setup(tensor_arena, tensor_arena_size);
+#endif
if (!encoder) {
MicroPrintf("Unable to construct encoder");
return -1;
}
- TfLiteTensor *encoder_input = encoder->interpreter->input(0);
- TfLiteTensor *encoder_output = encoder->interpreter->output(0);
+ TfLiteTensor *encoder_input = encoder->interpreter()->input(0);
+ TfLiteTensor *encoder_output = encoder->interpreter()->output(0);
memset(encoder_input->data.uint8, 0, encoder_input->bytes);
- TfLiteStatus invoke_status = encoder->interpreter->Invoke();
+ TfLiteStatus invoke_status = encoder->interpreter()->Invoke();
if (invoke_status != kTfLiteOk) {
MicroPrintf("Failed to invoke encoder");
return -1;