Merge remote-tracking branch 'spacebeaker/upstream' into master
Change-Id: Ieb5c6acc9bbafad3813c83058a9e40089fee2e3c
diff --git a/tensorflow/extra_rules.bzl b/tensorflow/extra_rules.bzl
index 4a111dc..29e0bda 100644
--- a/tensorflow/extra_rules.bzl
+++ b/tensorflow/extra_rules.bzl
@@ -1,5 +1,7 @@
def tflm_kernel_friends():
- return []
+ return [
+ "public",
+ ]
def tflm_audio_frontend_friends():
return []
@@ -32,3 +34,7 @@
def xtensa_vision_p6_config():
"""Config setting for all Vision P6 based cores."""
return "//tensorflow/lite/micro/kernels:xtensa_vision_p6_default"
+
+def kelvin_config():
+ """Config setting for Kelvin-based cores."""
+ return "//tensorflow/lite/micro/kernels:kelvin_default"
diff --git a/tensorflow/lite/micro/BUILD b/tensorflow/lite/micro/BUILD
index 1753465..58ea22d 100644
--- a/tensorflow/lite/micro/BUILD
+++ b/tensorflow/lite/micro/BUILD
@@ -334,8 +334,11 @@
hdrs = [
"micro_time.h",
],
- copts = micro_copts() + ["-DTF_LITE_USE_CTIME"],
- deps = ["//tensorflow/lite/c:common"],
+ copts = micro_copts(),
+ deps = [
+ "//tensorflow/lite/c:common",
+ "@kelvin_sw//benchmarks:cycle_count",
+ ],
)
cc_library(
diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD
index f2ccb06..22254d9 100644
--- a/tensorflow/lite/micro/kernels/BUILD
+++ b/tensorflow/lite/micro/kernels/BUILD
@@ -8,7 +8,9 @@
"xtensa_hifi_3z_config",
"xtensa_hifi_5_config",
"xtensa_vision_p6_config",
+ "kelvin_config",
)
+load("@bazel_skylib//lib:selects.bzl", "selects")
package(
features = [
@@ -33,6 +35,10 @@
packages = tflm_kernel_friends(),
)
+exports_files(
+ glob(["*_test.cc"])
+)
+
####################################
# C++ libraries
####################################
@@ -66,6 +72,9 @@
hdrs = [
"conv_test.h",
],
+ visibility = [
+ "//visibility:public",
+ ],
copts = micro_copts(),
deps = [
":kernel_runner",
@@ -191,6 +200,9 @@
"-DVISION_P6=1",
]
+KELVIN_COPTS = [
+]
+
tflm_kernel_cc_library(
name = "micro_ops",
srcs = [
@@ -331,6 +343,7 @@
xtensa_hifi_3z_config(): glob(["xtensa/**/*.h"]),
xtensa_hifi_5_config(): glob(["xtensa/**/*.h"]),
xtensa_vision_p6_config(): glob(["xtensa/**/*.h"]),
+ kelvin_config(): glob(["kelvin/**/*.h"]),
"//conditions:default": [],
}),
accelerated_srcs = {
@@ -339,6 +352,7 @@
xtensa_hifi_3z_config(): glob(["xtensa/**/*.cc"]),
xtensa_hifi_5_config(): glob(["xtensa/**/*.cc"]),
xtensa_vision_p6_config(): glob(["xtensa/**/*.cc"]),
+ kelvin_config(): glob(["kelvin/**/*.cc"]),
},
copts = micro_copts() + select({
xtensa_fusion_f1_config(): HIFI4_COPTS,
@@ -346,6 +360,7 @@
xtensa_hifi_3z_config(): HIFI4_COPTS,
xtensa_hifi_5_config(): HIFI5_COPTS,
xtensa_vision_p6_config(): VP6_COPTS,
+ kelvin_config(): KELVIN_COPTS,
"//conditions:default": [],
}),
visibility = [
@@ -382,6 +397,7 @@
xtensa_hifi_3z_config(): ["//third_party/xtensa/nnlib_hifi4:nnlib_hifi4_lib"],
xtensa_hifi_5_config(): ["//third_party/xtensa/nnlib_hifi5:nnlib_hifi5_lib"],
xtensa_vision_p6_config(): ["//third_party/xtensa/xi_tflmlib_vision_p6:xi_tflmlib_vision_p6_lib"],
+ kelvin_config(): ["@kelvin_sw//tflm/opt:opt"],
"//conditions:default": [],
}),
)
@@ -1515,3 +1531,22 @@
":optimized_kernels": "xtensa_vision_p6",
},
)
+
+config_setting(
+ name = "kelvin_default1",
+ values = {
+ "platforms": "@kelvin_sw//platforms/riscv32:kelvin",
+ },
+)
+
+config_setting(
+ name = "kelvin_default2",
+ values = {
+ "platforms": "//platforms/riscv32:kelvin",
+ },
+)
+
+selects.config_setting_group(
+ name = "kelvin_default",
+ match_any = [":kelvin_default1", ":kelvin_default2"],
+)
diff --git a/tensorflow/lite/micro/kernels/add_test.cc b/tensorflow/lite/micro/kernels/add_test.cc
index 6e8b40c..bdf0224 100644
--- a/tensorflow/lite/micro/kernels/add_test.cc
+++ b/tensorflow/lite/micro/kernels/add_test.cc
@@ -256,16 +256,42 @@
TF_LITE_MICRO_TEST(QuantizedAddNoActivationInt8) {
const float scales[] = {0.25, 0.5, 1.0};
const int zero_points[] = {-10, 4, 13};
- int inout_shape[] = {4, 1, 2, 2, 1};
- const float input1_values[] = {-2.01, -1.01, -0.01, 0.98};
- const float input2_values[] = {1.01, 1.99, 2.99, 4.02};
- const float golden_values[] = {-1, 1, 3, 5};
+ int inout_shape[] = {4, 1, 7, 6, 1};
+ // clang-format off
+ const float input1_values[] = {
+ -2.01, -1.01, -0.01, 0.98, -2.01, -1.01,
+ -0.01, 0.98, -2.01, -1.01, -0.01, 0.98,
+ -2.01, -1.01, -0.01, 0.98, -2.01, -1.01,
+ -0.01, 0.98, -2.01, -1.01, -0.01, 0.98,
+ -2.01, -1.01, -0.01, 0.98, -2.01, -1.01,
+ -0.01, 0.98, -2.01, -1.01, -0.01, 0.98,
+ -2.0, 0.2, 0.7, 0.8, 1.1, 2.0
+ };
+ const float input2_values[] = {
+ 1.01, 1.99, 2.99, 4.02, 1.01, 1.99,
+ 2.99, 4.02, 1.01, 1.99, 2.99, 4.02,
+ 1.01, 1.99, 2.99, 4.02, 1.01, 1.99,
+ 2.99, 4.02, 1.01, 1.99, 2.99, 4.02,
+ 1.01, 1.99, 2.99, 4.02, 1.01, 1.99,
+ 2.99, 4.02, 1.01, 1.99, 2.99, 4.02,
+ 0.1, 0.2, 0.3, 0.5, 1.1, 0.1
+ };
+ const float golden_values[] = {
+ -1, 1, 3, 5, -1, 1,
+ 3, 5, -1, 1, 3, 5,
+ -1, 1, 3, 5, -1, 1,
+ 3, 5, -1, 1, 3, 5,
+ -1, 1, 3, 5, -1, 1,
+ 3, 5, -1, 1, 3, 5,
+ -1.9, 0.4, 1.0, 1.3, 2.2, 2.1
+ };
+ // clang-format on
- constexpr int kOutputDimsCount = 4;
- int8_t input1_quantized[kOutputDimsCount];
- int8_t input2_quantized[kOutputDimsCount];
- int8_t golden_quantized[kOutputDimsCount];
- int8_t output[kOutputDimsCount];
+ constexpr int kOutputDimsCount = 42;
+ int8_t input1_quantized[kOutputDimsCount] __attribute__((aligned(64)));
+ int8_t input2_quantized[kOutputDimsCount] __attribute__((aligned(64)));
+ int8_t golden_quantized[kOutputDimsCount] __attribute__((aligned(64)));
+ int8_t output[kOutputDimsCount] __attribute__((aligned(64)));
tflite::testing::TestAddQuantized(
inout_shape, input1_values, input1_quantized, scales[0], zero_points[0],
diff --git a/tensorflow/lite/micro/kernels/kelvin/add.cc b/tensorflow/lite/micro/kernels/kelvin/add.cc
new file mode 100644
index 0000000..8c33716
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/kelvin/add.cc
@@ -0,0 +1,177 @@
+/*
+ * Copyright 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tensorflow/lite/kernels/internal/reference/add.h"
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
+#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "tensorflow/lite/micro/kernels/add.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/micro_log.h"
+#include "tflm/opt/opt.h"
+
+namespace tflite {
+namespace {
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+ TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+ return context->AllocatePersistentBuffer(context, sizeof(OpDataAdd));
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+ return AddPrepare(context, node);
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+ TFLITE_DCHECK(node->user_data != nullptr);
+ const OpDataAdd* data = static_cast<const OpDataAdd*>(node->user_data);
+
+ const TfLiteEvalTensor* input1 =
+ tflite::micro::GetEvalInput(context, node, kAddInputTensor1);
+ const TfLiteEvalTensor* input2 =
+ tflite::micro::GetEvalInput(context, node, kAddInputTensor2);
+ TfLiteEvalTensor* output =
+ tflite::micro::GetEvalOutput(context, node, kAddOutputTensor);
+
+ if (output->type == kTfLiteFloat32) {
+ tflite::ArithmeticParams op_params;
+ SetActivationParams(data->output_activation_min_f32,
+ data->output_activation_max_f32, &op_params);
+ if (data->requires_broadcast) {
+ reference_ops::BroadcastAdd4DSlow(
+ op_params, tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorData<float>(input1),
+ tflite::micro::GetTensorShape(input2),
+ tflite::micro::GetTensorData<float>(input2),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<float>(output));
+ } else {
+ reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorData<float>(input1),
+ tflite::micro::GetTensorShape(input2),
+ tflite::micro::GetTensorData<float>(input2),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<float>(output));
+ }
+ } else if (output->type == kTfLiteInt32) {
+ tflite::ArithmeticParams op_params;
+ SetActivationParams(std::numeric_limits<int32_t>::lowest(),
+ std::numeric_limits<int32_t>::max(), &op_params);
+ if (data->requires_broadcast) {
+ reference_ops::BroadcastAdd4DSlow(
+ op_params, tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorData<int32_t>(input1),
+ tflite::micro::GetTensorShape(input2),
+ tflite::micro::GetTensorData<int32_t>(input2),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int32_t>(output));
+ } else {
+ kelvin::opt::ElementwiseAddS32(
+ op_params, tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorData<int32_t>(input1),
+ tflite::micro::GetTensorShape(input2),
+ tflite::micro::GetTensorData<int32_t>(input2),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int32_t>(output));
+ }
+ } else if (output->type == kTfLiteInt16) {
+ tflite::ArithmeticParams op_params;
+ op_params.left_shift = data->left_shift;
+ op_params.input1_offset = data->input1_offset;
+ op_params.input1_multiplier = data->input1_multiplier;
+ op_params.input1_shift = data->input1_shift;
+ op_params.input2_offset = data->input2_offset;
+ op_params.input2_multiplier = data->input2_multiplier;
+ op_params.input2_shift = data->input2_shift;
+ op_params.output_offset = data->output_offset;
+ op_params.output_multiplier = data->output_multiplier;
+ op_params.output_shift = data->output_shift;
+ SetActivationParams(data->output_activation_min,
+ data->output_activation_max, &op_params);
+
+ bool need_broadcast = reference_ops::ProcessBroadcastShapes(
+ tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorShape(input2), &op_params);
+
+ if (need_broadcast) {
+ reference_ops::BroadcastAdd4DSlow(
+ op_params, tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorData<int16_t>(input1),
+ tflite::micro::GetTensorShape(input2),
+ tflite::micro::GetTensorData<int16_t>(input2),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int16_t>(output));
+ } else {
+ kelvin::opt::ElementwiseAddS16(
+ op_params, tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorData<int16_t>(input1),
+ tflite::micro::GetTensorShape(input2),
+ tflite::micro::GetTensorData<int16_t>(input2),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int16_t>(output));
+ }
+ } else if (output->type == kTfLiteInt8) {
+ tflite::ArithmeticParams op_params;
+ op_params.left_shift = data->left_shift;
+ op_params.input1_offset = data->input1_offset;
+ op_params.input1_multiplier = data->input1_multiplier;
+ op_params.input1_shift = data->input1_shift;
+ op_params.input2_offset = data->input2_offset;
+ op_params.input2_multiplier = data->input2_multiplier;
+ op_params.input2_shift = data->input2_shift;
+ op_params.output_offset = data->output_offset;
+ op_params.output_multiplier = data->output_multiplier;
+ op_params.output_shift = data->output_shift;
+ SetActivationParams(data->output_activation_min,
+ data->output_activation_max, &op_params);
+
+ bool need_broadcast = reference_ops::ProcessBroadcastShapes(
+ tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorShape(input2), &op_params);
+
+ if (need_broadcast) {
+ reference_integer_ops::BroadcastAdd4DSlow(
+ op_params, tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorData<int8_t>(input1),
+ tflite::micro::GetTensorShape(input2),
+ tflite::micro::GetTensorData<int8_t>(input2),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int8_t>(output));
+ } else {
+ kelvin::opt::ElementwiseAddS8(
+ op_params, tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorData<int8_t>(input1),
+ tflite::micro::GetTensorShape(input2),
+ tflite::micro::GetTensorData<int8_t>(input2),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int8_t>(output));
+ }
+ } else {
+ MicroPrintf("Unsupported output type: %s", TfLiteTypeGetName(output->type));
+ return kTfLiteError;
+ }
+
+ return kTfLiteOk;
+}
+
+} // namespace
+
+TFLMRegistration Register_ADD() {
+ return tflite::micro::RegisterOp(Init, Prepare, Eval);
+}
+
+} // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/kelvin/conv.cc b/tensorflow/lite/micro/kernels/kelvin/conv.cc
new file mode 100644
index 0000000..d8fb8a1
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/kelvin/conv.cc
@@ -0,0 +1,183 @@
+/*
+ * Copyright 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tensorflow/lite/kernels/internal/reference/conv.h"
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/conv.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/micro_log.h"
+#include "tflm/opt/opt.h"
+
+namespace tflite {
+namespace {
+
+constexpr int kFilterHeightIndex = 1;
+constexpr int kFilterWidthIndex = 2;
+constexpr int kFilterInputChannelIndex = 3;
+constexpr int kInputChannelIndex = 3;
+constexpr int kOutputChannelIndex = 3;
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+ TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+ return context->AllocatePersistentBuffer(context, sizeof(OpDataConv));
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+ const TfLiteEvalTensor* input =
+ tflite::micro::GetEvalInput(context, node, kConvInputTensor);
+ const TfLiteEvalTensor* filter =
+ tflite::micro::GetEvalInput(context, node, kConvWeightsTensor);
+ const TfLiteEvalTensor* bias =
+ (NumInputs(node) == 3)
+ ? tflite::micro::GetEvalInput(context, node, kConvBiasTensor)
+ : nullptr;
+ TfLiteEvalTensor* output =
+ tflite::micro::GetEvalOutput(context, node, kConvOutputTensor);
+
+ TFLITE_DCHECK(node->builtin_data != nullptr);
+ const auto& params =
+ *(reinterpret_cast<TfLiteConvParams*>(node->builtin_data));
+ TFLITE_DCHECK(node->user_data != nullptr);
+ const auto& data = *(static_cast<const OpDataConv*>(node->user_data));
+
+ TF_LITE_ENSURE_EQ(context, input->type, output->type);
+ TF_LITE_ENSURE_MSG(
+ context,
+ input->type == filter->type ||
+ (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8) ||
+ (input->type == kTfLiteInt8 && filter->type == kTfLiteInt4),
+ "Hybrid models are not supported on TFLite Micro.");
+
+ switch (input->type) { // Already know in/out types are same.
+ case kTfLiteFloat32: {
+ tflite::reference_ops::Conv(
+ ConvParamsFloat(params, data), tflite::micro::GetTensorShape(input),
+ tflite::micro::GetTensorData<float>(input),
+ tflite::micro::GetTensorShape(filter),
+ tflite::micro::GetTensorData<float>(filter),
+ tflite::micro::GetTensorShape(bias),
+ tflite::micro::GetOptionalTensorData<float>(bias),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<float>(output),
+ tflite::micro::GetTensorShape(nullptr), nullptr);
+ break;
+ }
+ case kTfLiteInt16: {
+ const auto params_q = ConvParamsQuantized(params, data);
+ bool opt = !(params_q.padding_values.width > 0 ||
+ params_q.padding_values.height > 0 ||
+ params_q.dilation_width_factor > 1 ||
+ params_q.dilation_height_factor > 1);
+ switch (bias->type) {
+ case kTfLiteInt32: {
+ const auto fn = opt ? kelvin::opt::ConvS16B32
+ : reference_integer_ops::ConvPerChannel<int32_t>;
+ fn(params_q, data.per_channel_output_multiplier,
+ data.per_channel_output_shift,
+ tflite::micro::GetTensorShape(input),
+ tflite::micro::GetTensorData<int16_t>(input),
+ tflite::micro::GetTensorShape(filter),
+ tflite::micro::GetTensorData<int8_t>(filter),
+ tflite::micro::GetTensorShape(bias),
+ tflite::micro::GetOptionalTensorData<std::int32_t>(bias),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int16_t>(output));
+ break;
+ }
+ case kTfLiteInt64: {
+ const auto fn = opt ? kelvin::opt::ConvS16B64
+ : reference_integer_ops::ConvPerChannel<int64_t>;
+ fn(params_q, data.per_channel_output_multiplier,
+ data.per_channel_output_shift,
+ tflite::micro::GetTensorShape(input),
+ tflite::micro::GetTensorData<int16_t>(input),
+ tflite::micro::GetTensorShape(filter),
+ tflite::micro::GetTensorData<int8_t>(filter),
+ tflite::micro::GetTensorShape(bias),
+ tflite::micro::GetOptionalTensorData<std::int64_t>(bias),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int16_t>(output));
+ break;
+ }
+ default:
+ MicroPrintf("Bias type %s (%d) not supported.",
+ TfLiteTypeGetName(bias->type), bias->type);
+ return kTfLiteError;
+ }
+ break;
+ }
+ case kTfLiteInt8: {
+ switch (filter->type) {
+ case kTfLiteInt4: {
+ int8_t* unpacked_filter_data = reinterpret_cast<int8_t*>(
+ context->GetScratchBuffer(context, data.filter_buffer_index));
+ tflite::tensor_utils::UnpackDenseInt4IntoInt8(
+ tflite::micro::GetTensorData<int8_t>(filter),
+ tflite::micro::GetTensorShape(filter).FlatSize(),
+ unpacked_filter_data);
+ reference_integer_ops::ConvPerChannel(
+ ConvParamsQuantized(params, data),
+ data.per_channel_output_multiplier, data.per_channel_output_shift,
+ tflite::micro::GetTensorShape(input),
+ tflite::micro::GetTensorData<int8_t>(input),
+ tflite::micro::GetTensorShape(filter), unpacked_filter_data,
+ tflite::micro::GetTensorShape(bias),
+ tflite::micro::GetOptionalTensorData<int32_t>(bias),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int8_t>(output));
+ break;
+ }
+ case kTfLiteInt8: {
+ const auto params_q = ConvParamsQuantized(params, data);
+ kelvin::opt::ConvS8(
+ params_q, data.per_channel_output_multiplier,
+ data.per_channel_output_shift,
+ tflite::micro::GetTensorShape(input),
+ tflite::micro::GetTensorData<int8_t>(input),
+ tflite::micro::GetTensorShape(filter),
+ tflite::micro::GetTensorData<int8_t>(filter),
+ tflite::micro::GetTensorShape(bias),
+ tflite::micro::GetOptionalTensorData<int32_t>(bias),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int8_t>(output));
+ break;
+ }
+ default:
+ MicroPrintf("Weight type %s (%d) not supported.",
+ TfLiteTypeGetName(filter->type), filter->type);
+ return kTfLiteError;
+ }
+ break;
+ }
+ default:
+ MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type),
+ input->type);
+ return kTfLiteError;
+ }
+ return kTfLiteOk;
+}
+
+} // namespace
+
+TFLMRegistration Register_CONV_2D() {
+ return tflite::micro::RegisterOp(Init, ConvPrepare, Eval);
+}
+
+} // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/kelvin/depthwise_conv.cc b/tensorflow/lite/micro/kernels/kelvin/depthwise_conv.cc
new file mode 100644
index 0000000..f8d9307
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/kelvin/depthwise_conv.cc
@@ -0,0 +1,155 @@
+/*
+ * Copyright 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
+#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/depthwise_conv.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/micro_log.h"
+#include "tflm/opt/opt.h"
+
+namespace tflite {
+namespace {
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+ TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+ return context->AllocatePersistentBuffer(context, sizeof(OpDataConv));
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+ TFLITE_DCHECK(node->user_data != nullptr);
+ TFLITE_DCHECK(node->builtin_data != nullptr);
+
+ auto& params =
+ *(reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data));
+ const OpDataConv& data = *(static_cast<const OpDataConv*>(node->user_data));
+
+ TfLiteEvalTensor* output =
+ tflite::micro::GetEvalOutput(context, node, kDepthwiseConvOutputTensor);
+ const TfLiteEvalTensor* input =
+ tflite::micro::GetEvalInput(context, node, kDepthwiseConvInputTensor);
+ const TfLiteEvalTensor* filter =
+ tflite::micro::GetEvalInput(context, node, kDepthwiseConvWeightsTensor);
+ const TfLiteEvalTensor* bias =
+ (NumInputs(node) == 3)
+ ? tflite::micro::GetEvalInput(context, node, kDepthwiseConvBiasTensor)
+ : nullptr;
+
+ switch (input->type) { // Already know in/out types are same.
+ case kTfLiteFloat32: {
+ tflite::reference_ops::DepthwiseConv(
+ DepthwiseConvParamsFloat(params, data),
+ tflite::micro::GetTensorShape(input),
+ tflite::micro::GetTensorData<float>(input),
+ tflite::micro::GetTensorShape(filter),
+ tflite::micro::GetTensorData<float>(filter),
+ tflite::micro::GetTensorShape(bias),
+ tflite::micro::GetOptionalTensorData<float>(bias),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<float>(output));
+ break;
+ }
+ case kTfLiteInt8: {
+ switch (filter->type) {
+ case kTfLiteInt4: {
+ int8_t* unpacked_filter_data = static_cast<int8_t*>(
+ context->GetScratchBuffer(context, data.filter_buffer_index));
+ tflite::tensor_utils::UnpackDenseInt4IntoInt8(
+ tflite::micro::GetTensorData<int8_t>(filter),
+ tflite::micro::GetTensorShape(filter).FlatSize(),
+ unpacked_filter_data);
+ reference_integer_ops::DepthwiseConvPerChannel(
+ DepthwiseConvParamsQuantized(params, data),
+ data.per_channel_output_multiplier, data.per_channel_output_shift,
+ tflite::micro::GetTensorShape(input),
+ tflite::micro::GetTensorData<int8_t>(input),
+ tflite::micro::GetTensorShape(filter), unpacked_filter_data,
+ tflite::micro::GetTensorShape(bias),
+ tflite::micro::GetOptionalTensorData<int32_t>(bias),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int8_t>(output));
+ break;
+ }
+ case kTfLiteInt8: {
+ tflite::DepthwiseParams dw_params =
+ DepthwiseConvParamsQuantized(params, data);
+ kelvin::opt::DepthwiseConvS8(
+ dw_params, data.per_channel_output_multiplier,
+ data.per_channel_output_shift,
+ tflite::micro::GetTensorShape(input),
+ tflite::micro::GetTensorData<int8_t>(input),
+ tflite::micro::GetTensorShape(filter),
+ tflite::micro::GetTensorData<int8_t>(filter),
+ tflite::micro::GetTensorShape(bias),
+ tflite::micro::GetOptionalTensorData<int32_t>(bias),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int8_t>(output));
+ break;
+ }
+ default:
+ MicroPrintf("Filter type %s (%d) for input type %s not supported.",
+ TfLiteTypeGetName(filter->type), filter->type,
+ TfLiteTypeGetName(input->type));
+ return kTfLiteError;
+ }
+ break;
+ }
+ case kTfLiteInt16: {
+ switch (filter->type) {
+ case kTfLiteInt8: {
+ tflite::DepthwiseParams dw_params =
+ DepthwiseConvParamsQuantized(params, data);
+ kelvin::opt::DepthwiseConvS16(
+ dw_params, data.per_channel_output_multiplier,
+ data.per_channel_output_shift,
+ tflite::micro::GetTensorShape(input),
+ tflite::micro::GetTensorData<int16_t>(input),
+ tflite::micro::GetTensorShape(filter),
+ tflite::micro::GetTensorData<int8_t>(filter),
+ tflite::micro::GetTensorShape(bias),
+ tflite::micro::GetOptionalTensorData<int64_t>(bias),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int16_t>(output));
+ break;
+ }
+ default:
+ MicroPrintf("Filter type %s (%d) for input type %s not supported.",
+ TfLiteTypeGetName(filter->type), filter->type,
+ TfLiteTypeGetName(input->type));
+ return kTfLiteError;
+ }
+ break;
+ }
+ default:
+ MicroPrintf("Input type %s (%d) not supported.",
+ TfLiteTypeGetName(input->type), input->type);
+ return kTfLiteError;
+ }
+ return kTfLiteOk;
+}
+
+} // namespace
+
+TFLMRegistration Register_DEPTHWISE_CONV_2D() {
+ return tflite::micro::RegisterOp(Init, DepthwiseConvPrepare, Eval);
+}
+
+} // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/kelvin/leaky_relu.cc b/tensorflow/lite/micro/kernels/kelvin/leaky_relu.cc
new file mode 100644
index 0000000..fafcfed
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/kelvin/leaky_relu.cc
@@ -0,0 +1,100 @@
+/*
+ * Copyright 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h"
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/leaky_relu.h"
+#include "tensorflow/lite/micro/micro_log.h"
+#include "tflm/opt/opt.h"
+
+namespace tflite {
+
+namespace {
+void* LeakyReluInit(TfLiteContext* context, const char* buffer, size_t length) {
+ TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+ return context->AllocatePersistentBuffer(context, sizeof(LeakyReluOpData));
+}
+
+TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
+ const TfLiteEvalTensor* input =
+ tflite::micro::GetEvalInput(context, node, kInputTensor);
+ TfLiteEvalTensor* output =
+ tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+ const LeakyReluOpData& data = *static_cast<LeakyReluOpData*>(node->user_data);
+
+ // Kelvin's vector ISA is used to implement Int8 and Int16.
+ // Float32 uses the reference op.
+ switch (input->type) {
+ case kTfLiteFloat32: {
+ LeakyReluParams op_params = {};
+ const auto* params =
+ static_cast<TfLiteLeakyReluParams*>(node->builtin_data);
+
+ op_params.alpha = params->alpha;
+ reference_ops::LeakyRelu(op_params, tflite::micro::GetTensorShape(input),
+ tflite::micro::GetTensorData<float>(input),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<float>(output));
+ return kTfLiteOk;
+ } break;
+ case kTfLiteInt8: {
+ LeakyReluParams op_params = {};
+ op_params.input_offset = data.input_zero_point;
+ op_params.output_offset = data.output_zero_point;
+ op_params.output_multiplier_alpha = data.output_multiplier_alpha;
+ op_params.output_shift_alpha = data.output_shift_alpha;
+ op_params.output_multiplier_identity = data.output_multiplier_identity;
+ op_params.output_shift_identity = data.output_shift_identity;
+ kelvin::opt::LeakyReluS8(op_params, tflite::micro::GetTensorShape(input),
+ tflite::micro::GetTensorData<int8_t>(input),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int8_t>(output));
+ return kTfLiteOk;
+ } break;
+ case kTfLiteInt16: {
+ LeakyReluParams op_params = {};
+ op_params.input_offset = data.input_zero_point;
+ op_params.output_offset = data.output_zero_point;
+ op_params.output_multiplier_alpha = data.output_multiplier_alpha;
+ op_params.output_shift_alpha = data.output_shift_alpha;
+ op_params.output_multiplier_identity = data.output_multiplier_identity;
+ op_params.output_shift_identity = data.output_shift_identity;
+ kelvin::opt::LeakyReluS16(op_params, tflite::micro::GetTensorShape(input),
+ tflite::micro::GetTensorData<int16_t>(input),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int16_t>(output));
+ return kTfLiteOk;
+ } break;
+ default:
+ MicroPrintf(
+ "Only float32, int8, int16 are supported by LEAKY_RELU, got %s.",
+ TfLiteTypeGetName(input->type));
+ return kTfLiteError;
+ }
+
+ return kTfLiteError;
+}
+
+} // namespace
+
+TFLMRegistration Register_LEAKY_RELU() {
+ return tflite::micro::RegisterOp(LeakyReluInit, LeakyReluPrepare,
+ LeakyReluEval);
+}
+
+} // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/kelvin/logistic.cc b/tensorflow/lite/micro/kernels/kelvin/logistic.cc
new file mode 100644
index 0000000..974ef12
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/kelvin/logistic.cc
@@ -0,0 +1,112 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h"
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/logistic.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/op_macros.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/logistic.h"
+#include "tensorflow/lite/micro/micro_log.h"
+#include "tflm/opt/opt.h"
+
+namespace tflite {
+namespace {
+
+void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) {
+ TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+ return context->AllocatePersistentBuffer(context, sizeof(OpDataLogistic));
+}
+
+TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
+ const TfLiteEvalTensor* input =
+ tflite::micro::GetEvalInput(context, node, kLogisticInputTensor);
+ TfLiteEvalTensor* output =
+ tflite::micro::GetEvalOutput(context, node, kLogisticOutputTensor);
+
+ TFLITE_DCHECK(node->user_data != nullptr);
+ OpDataLogistic* data = static_cast<OpDataLogistic*>(node->user_data);
+
+ if (input->type == kTfLiteFloat32) {
+ switch (output->type) {
+ case kTfLiteFloat32: {
+ reference_ops::Logistic(tflite::micro::GetTensorShape(input),
+ tflite::micro::GetTensorData<float>(input),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<float>(output));
+ return kTfLiteOk;
+ }
+ default:
+ MicroPrintf("Input %s, output %s not supported.",
+ TfLiteTypeGetName(input->type),
+ TfLiteTypeGetName(output->type));
+ return kTfLiteError;
+ }
+ } else if (input->type == kTfLiteInt16) {
+ switch (output->type) {
+ case kTfLiteInt16: {
+ reference_integer_ops::Logistic(
+ data->input_multiplier, data->input_left_shift,
+ NumElements(input->dims),
+ tflite::micro::GetTensorData<int16_t>(input),
+ tflite::micro::GetTensorData<int16_t>(output));
+ return kTfLiteOk;
+ }
+ default:
+ MicroPrintf("Input %s, output %s not supported.",
+ TfLiteTypeGetName(input->type),
+ TfLiteTypeGetName(output->type));
+ return kTfLiteError;
+ }
+ } else if (input->type == kTfLiteInt8) {
+ switch (output->type) {
+ case kTfLiteInt8: {
+ kelvin::opt::LogisticS8(
+ data->input_zero_point, data->input_range_radius,
+ data->input_multiplier, data->input_left_shift,
+ NumElements(input->dims),
+ tflite::micro::GetTensorData<int8_t>(input),
+ tflite::micro::GetTensorData<int8_t>(output));
+ return kTfLiteOk;
+ }
+ default:
+ MicroPrintf("Input %s, output %s not supported.",
+ TfLiteTypeGetName(input->type),
+ TfLiteTypeGetName(output->type));
+ return kTfLiteError;
+ }
+ } else {
+ // TODO(b/141211002): Also support other data types once we have supported
+ // temporary tensors in TFLM.
+ MicroPrintf("Input %s, output %s not supported.",
+ TfLiteTypeGetName(input->type),
+ TfLiteTypeGetName(output->type));
+ return kTfLiteError;
+ }
+ return kTfLiteOk;
+}
+
+} // namespace
+
+TFLMRegistration Register_LOGISTIC() {
+ return tflite::micro::RegisterOp(LogisticInit, LogisticPrepare, LogisticEval);
+}
+} // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/kelvin/mul.cc b/tensorflow/lite/micro/kernels/kelvin/mul.cc
new file mode 100644
index 0000000..e006e9b
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/kelvin/mul.cc
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/mul.h"
+#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/mul.h"
+#include "tensorflow/lite/micro/memory_helpers.h"
+#include "tensorflow/lite/micro/micro_log.h"
+#include "tflm/opt/opt.h"
+
+namespace tflite {
+
+TfLiteStatus MulEval(TfLiteContext* context, TfLiteNode* node) {
+ TFLITE_DCHECK(node->builtin_data != nullptr);
+ auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
+
+ TFLITE_DCHECK(node->user_data != nullptr);
+ const OpDataMul* data = static_cast<const OpDataMul*>(node->user_data);
+
+ const TfLiteEvalTensor* input1 =
+ tflite::micro::GetEvalInput(context, node, kMulInput1Tensor);
+ const TfLiteEvalTensor* input2 =
+ tflite::micro::GetEvalInput(context, node, kMulInput2Tensor);
+ TfLiteEvalTensor* output =
+ tflite::micro::GetEvalOutput(context, node, kMulOutputTensor);
+
+ if (output->type == kTfLiteFloat32) {
+ EvalMulFloatReference(context, node, params, data, input1, input2, output);
+ } else if (output->type == kTfLiteInt32) {
+ EvalMulQuantizedReference(context, node, data, input1, input2, output);
+ } else if (output->type == kTfLiteInt16) {
+ tflite::ArithmeticParams op_params = {};
+ op_params.quantized_activation_min = data->output_activation_min;
+ op_params.quantized_activation_max = data->output_activation_max;
+ op_params.input1_offset = -data->input1_zero_point;
+ op_params.input2_offset = -data->input2_zero_point;
+ op_params.output_offset = data->output_zero_point;
+ op_params.output_multiplier = data->output_multiplier;
+ op_params.output_shift = data->output_shift;
+
+ bool need_broadcast = reference_ops::ProcessBroadcastShapes(
+ tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorShape(input2), &op_params);
+
+ if (need_broadcast) {
+ reference_integer_ops::BroadcastMul4DSlow(
+ op_params, tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorData<int16_t>(input1),
+ tflite::micro::GetTensorShape(input2),
+ tflite::micro::GetTensorData<int16_t>(input2),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int16_t>(output));
+ } else {
+ kelvin::opt::MulS16(op_params, tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorData<int16_t>(input1),
+ tflite::micro::GetTensorShape(input2),
+ tflite::micro::GetTensorData<int16_t>(input2),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int16_t>(output));
+ }
+ } else if (output->type == kTfLiteInt8) {
+ tflite::ArithmeticParams op_params = {};
+ op_params.quantized_activation_min = data->output_activation_min;
+ op_params.quantized_activation_max = data->output_activation_max;
+ op_params.input1_offset = -data->input1_zero_point;
+ op_params.input2_offset = -data->input2_zero_point;
+ op_params.output_offset = data->output_zero_point;
+ op_params.output_multiplier = data->output_multiplier;
+ op_params.output_shift = data->output_shift;
+
+ bool need_broadcast = reference_ops::ProcessBroadcastShapes(
+ tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorShape(input2), &op_params);
+ if (need_broadcast) {
+ reference_integer_ops::BroadcastMul4DSlow(
+ op_params, tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorData<int8_t>(input1),
+ tflite::micro::GetTensorShape(input2),
+ tflite::micro::GetTensorData<int8_t>(input2),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int8_t>(output));
+ } else {
+ kelvin::opt::MulS8(op_params, tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorData<int8_t>(input1),
+ tflite::micro::GetTensorShape(input2),
+ tflite::micro::GetTensorData<int8_t>(input2),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int8_t>(output));
+ }
+ } else {
+ MicroPrintf("Unsupported output type: %s", TfLiteTypeGetName(output->type));
+ return kTfLiteError;
+ }
+
+ return kTfLiteOk;
+}
+
+TFLMRegistration Register_MUL() {
+ return tflite::micro::RegisterOp(MulInit, MulPrepare, MulEval);
+}
+
+} // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/kelvin/pooling.cc b/tensorflow/lite/micro/kernels/kelvin/pooling.cc
new file mode 100644
index 0000000..94fc6f2
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/kelvin/pooling.cc
@@ -0,0 +1,129 @@
+/*
+ * Copyright 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tensorflow/lite/kernels/internal/reference/pooling.h"
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/pooling.h"
+#include "tensorflow/lite/micro/micro_log.h"
+#include "tflm/opt/opt.h"
+
+namespace tflite {
+
+namespace {
+
+TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
+ TFLITE_DCHECK(node->builtin_data != nullptr);
+ auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
+
+ TFLITE_DCHECK(node->user_data != nullptr);
+ const OpDataPooling* data =
+ static_cast<const OpDataPooling*>(node->user_data);
+
+ const TfLiteEvalTensor* input =
+ micro::GetEvalInput(context, node, kPoolingInputTensor);
+ TfLiteEvalTensor* output =
+ micro::GetEvalOutput(context, node, kPoolingOutputTensor);
+
+ // Inputs and outputs share the same type, guaranteed by the converter.
+ switch (input->type) {
+ case kTfLiteFloat32:
+ AveragePoolingEvalFloat(context, node, params, data, input, output);
+ break;
+ case kTfLiteInt8:
+ AveragePoolingEvalQuantized<int8_t>(context, node, params, data, input,
+ output);
+ break;
+ case kTfLiteInt16:
+ AveragePoolingEvalQuantized<int16_t>(context, node, params, data, input,
+ output);
+ break;
+ default:
+ MicroPrintf("Input type %s is not currently supported",
+ TfLiteTypeGetName(input->type));
+ return kTfLiteError;
+ }
+ return kTfLiteOk;
+}
+
+TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) {
+ TFLITE_DCHECK(node->builtin_data != nullptr);
+ auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
+
+ TFLITE_DCHECK(node->user_data != nullptr);
+ const OpDataPooling* data =
+ static_cast<const OpDataPooling*>(node->user_data);
+
+ const TfLiteEvalTensor* input =
+ micro::GetEvalInput(context, node, kPoolingInputTensor);
+ TfLiteEvalTensor* output =
+ micro::GetEvalOutput(context, node, kPoolingOutputTensor);
+
+ tflite::PoolParams op_params;
+ op_params.stride_height = params->stride_height;
+ op_params.stride_width = params->stride_width;
+ op_params.filter_height = params->filter_height;
+ op_params.filter_width = params->filter_width;
+ op_params.padding_values.height = data->padding.height;
+ op_params.padding_values.width = data->padding.width;
+ op_params.quantized_activation_min = data->activation_min;
+ op_params.quantized_activation_max = data->activation_max;
+ op_params.float_activation_min = data->activation_min_f32;
+ op_params.float_activation_max = data->activation_max_f32;
+
+ switch (input->type) {
+ case kTfLiteFloat32:
+ reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input),
+ tflite::micro::GetTensorData<float>(input),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<float>(output));
+ break;
+ case kTfLiteInt8:
+ kelvin::opt::MaxPoolS8(
+ op_params, tflite::micro::GetTensorShape(input), input->data.int8,
+ tflite::micro::GetTensorShape(output), output->data.int8);
+ break;
+ case kTfLiteInt16:
+ kelvin::opt::MaxPoolS16(
+ op_params, tflite::micro::GetTensorShape(input), input->data.i16,
+ tflite::micro::GetTensorShape(output), output->data.i16);
+ break;
+ default:
+ MicroPrintf("Type %s not currently supported.",
+ TfLiteTypeGetName(input->type));
+ return kTfLiteError;
+ }
+ return kTfLiteOk;
+}
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+ TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+ return context->AllocatePersistentBuffer(context, sizeof(OpDataPooling));
+}
+
+} // namespace
+
+TFLMRegistration Register_AVERAGE_POOL_2D() {
+ return tflite::micro::RegisterOp(Init, PoolingPrepare, AverageEval);
+}
+
+TFLMRegistration Register_MAX_POOL_2D() {
+ return tflite::micro::RegisterOp(Init, PoolingPrepare, MaxEval);
+}
+
+} // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/kelvin/reshape.cc b/tensorflow/lite/micro/kernels/kelvin/reshape.cc
new file mode 100644
index 0000000..76e3e52
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/kelvin/reshape.cc
@@ -0,0 +1,116 @@
+/*
+ * Copyright 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/memory_helpers.h"
+#include "tensorflow/lite/micro/micro_utils.h"
+#include "tflm/opt/opt.h"
+
+namespace tflite {
+namespace {
+
+constexpr int kInputTensor = 0;
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
+ MicroContext* micro_context = GetMicroContext(context);
+
+ TfLiteTensor* input =
+ micro_context->AllocateTempInputTensor(node, kInputTensor);
+ TF_LITE_ENSURE(context, input != nullptr);
+ TfLiteTensor* output =
+ micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+ TF_LITE_ENSURE(context, output != nullptr);
+ // Tensorflow's Reshape allows one of the shape components to have the
+ // special -1 value, meaning it will be calculated automatically based on the
+ // input. Here we calculate what that dimension should be so that the number
+ // of output elements in the same as the number of input elements.
+ int num_input_elements = NumElements(input);
+ TfLiteIntArray* output_shape = output->dims;
+
+ if (NumInputs(node) == 1 && // Legacy scalar supported with params.
+ output_shape->size == 1 && output_shape->data[0] == 0) {
+ // Legacy tflite models use a shape parameter of [0] to indicate scalars,
+ // so adjust accordingly. TODO(b/111614235): Allow zero-sized buffers during
+ // toco conversion.
+ output_shape->size = 0;
+ }
+
+ int num_output_elements = 1;
+ int stretch_dim = -1;
+ for (int i = 0; i < output_shape->size; ++i) {
+ int value = output_shape->data[i];
+ if (value == -1) {
+ TF_LITE_ENSURE_EQ(context, stretch_dim, -1);
+ stretch_dim = i;
+ } else {
+ num_output_elements *= value;
+ }
+ }
+ if (stretch_dim != -1) {
+ TfLiteEvalTensor* output_eval =
+ tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+ TF_LITE_ENSURE_STATUS(tflite::micro::CreateWritableTensorDimsWithCopy(
+ context, output, output_eval));
+ output_shape = output->dims; // output tensor dims were moved
+ output_shape->data[stretch_dim] = num_input_elements / num_output_elements;
+ num_output_elements *= output_shape->data[stretch_dim];
+ }
+
+ TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
+ TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements);
+
+ micro_context->DeallocateTempTfLiteTensor(input);
+ micro_context->DeallocateTempTfLiteTensor(output);
+ return kTfLiteOk;
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+ TF_LITE_ENSURE(context, NumInputs(node) == 1 || NumInputs(node) == 2);
+ TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+ TF_LITE_ENSURE_EQ(context, ReshapeOutput(context, node), kTfLiteOk);
+ return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+ const TfLiteEvalTensor* input =
+ tflite::micro::GetEvalInput(context, node, kInputTensor);
+ TfLiteEvalTensor* output =
+ tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+ // TODO(b/162522304): storing input bytes in OpData increases some models
+ // significantly, possibly due to alignment issues.
+ size_t input_bytes;
+ TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(input->type, &input_bytes));
+ input_bytes *= ElementCount(*input->dims);
+
+ // Do nothing for in-place reshape.
+ if (input->data.raw != output->data.raw) {
+ // Otherwise perform reshape with copy.
+ kelvin::opt::Memcpy(output->data.raw, input->data.raw, input_bytes);
+ }
+ return kTfLiteOk;
+}
+
+} // namespace
+
+TFLMRegistration Register_RESHAPE() {
+ return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
+}
+
+} // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/kelvin/resize_nearest_neighbor.cc b/tensorflow/lite/micro/kernels/kelvin/resize_nearest_neighbor.cc
new file mode 100644
index 0000000..5b700ae
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/kelvin/resize_nearest_neighbor.cc
@@ -0,0 +1,124 @@
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h"
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/op_macros.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/micro_log.h"
+#include "tflm/opt/opt.h"
+
+namespace tflite {
+
+namespace {
+
+constexpr int kInputTensor = 0;
+constexpr int kSizeTensor = 1;
+constexpr int kOutputTensor = 0;
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+ MicroContext* micro_context = GetMicroContext(context);
+
+ TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+ TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+ TfLiteTensor* input =
+ micro_context->AllocateTempInputTensor(node, kInputTensor);
+ TfLiteTensor* size =
+ micro_context->AllocateTempInputTensor(node, kSizeTensor);
+ TfLiteTensor* output =
+ micro_context->AllocateTempOutputTensor(node, kOutputTensor);
+
+ // Our current implementations rely on the input being 4D,
+ // and the size being 1D tensor with exactly 2 elements.
+ TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
+ TF_LITE_ENSURE_EQ(context, NumDimensions(size), 1);
+ TF_LITE_ENSURE_EQ(context, size->type, kTfLiteInt32);
+ TF_LITE_ENSURE_EQ(context, size->dims->data[0], 2);
+
+ output->type = input->type;
+
+ if (!IsConstantTensor(size)) {
+ MicroPrintf("Dynamic tensors are unsupported in tfmicro.");
+ return kTfLiteError;
+ }
+
+ micro_context->DeallocateTempTfLiteTensor(input);
+ micro_context->DeallocateTempTfLiteTensor(size);
+ micro_context->DeallocateTempTfLiteTensor(output);
+
+ return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+ auto* params =
+ reinterpret_cast<TfLiteResizeNearestNeighborParams*>(node->builtin_data);
+
+ const TfLiteEvalTensor* input =
+ tflite::micro::GetEvalInput(context, node, kInputTensor);
+ const TfLiteEvalTensor* size =
+ tflite::micro::GetEvalInput(context, node, kSizeTensor);
+ TfLiteEvalTensor* output =
+ tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+
+ tflite::ResizeNearestNeighborParams op_params;
+ op_params.align_corners = params->align_corners;
+ op_params.half_pixel_centers = false;
+
+ if (output->type == kTfLiteFloat32) {
+ reference_ops::ResizeNearestNeighbor(
+ op_params, tflite::micro::GetTensorShape(input),
+ tflite::micro::GetTensorData<int32_t>(input),
+ tflite::micro::GetTensorShape(size),
+ tflite::micro::GetTensorData<int32_t>(size),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int32_t>(output));
+ } else if (output->type == kTfLiteInt8) {
+ kelvin::opt::ResizeNearestNeighborS8(
+ op_params, tflite::micro::GetTensorShape(input),
+ tflite::micro::GetTensorData<int8_t>(input),
+ tflite::micro::GetTensorShape(size),
+ tflite::micro::GetTensorData<int32_t>(size),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int8_t>(output));
+ } else if (output->type == kTfLiteInt16) {
+ reference_ops::ResizeNearestNeighbor(
+ op_params, tflite::micro::GetTensorShape(input),
+ tflite::micro::GetTensorData<int16_t>(input),
+ tflite::micro::GetTensorShape(size),
+ tflite::micro::GetTensorData<int32_t>(size),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int16_t>(output));
+ } else {
+ MicroPrintf("Output tensor type %s (%d) not supported.",
+ TfLiteTypeGetName(output->type), output->type);
+
+ return kTfLiteError;
+ }
+
+ return kTfLiteOk;
+}
+
+} // namespace
+
+TFLMRegistration Register_RESIZE_NEAREST_NEIGHBOR() {
+ return tflite::micro::RegisterOp(nullptr, Prepare, Eval);
+}
+
+} // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/pack.cc b/tensorflow/lite/micro/kernels/pack.cc
index f254329..0cfd91b 100644
--- a/tensorflow/lite/micro/kernels/pack.cc
+++ b/tensorflow/lite/micro/kernels/pack.cc
@@ -85,6 +85,9 @@
return PackImpl<int8_t>(context, node, output, data->values_count,
data->axis);
}
+ case kTfLiteInt16: {
+ return PackImpl<int16_t>(context, node, output, data->values_count, data->axis);
+ }
case kTfLiteInt32: {
return PackImpl<int32_t>(context, node, output, data->values_count,
data->axis);
diff --git a/tensorflow/lite/micro/kernels/testdata/BUILD b/tensorflow/lite/micro/kernels/testdata/BUILD
index 0c7822d..c93bc7d 100644
--- a/tensorflow/lite/micro/kernels/testdata/BUILD
+++ b/tensorflow/lite/micro/kernels/testdata/BUILD
@@ -16,6 +16,7 @@
name = "conv_test_data",
srcs = ["conv_test_data.cc"],
hdrs = ["conv_test_data.h"],
+ visibility = ["//visibility:public"],
deps = ["//tensorflow/lite/c:common"],
)
diff --git a/tensorflow/lite/micro/kernels/transpose.cc b/tensorflow/lite/micro/kernels/transpose.cc
index fd17e89..915def5 100644
--- a/tensorflow/lite/micro/kernels/transpose.cc
+++ b/tensorflow/lite/micro/kernels/transpose.cc
@@ -97,6 +97,12 @@
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
break;
+ case kTfLiteInt16:
+ reference_ops::Transpose(params, tflite::micro::GetTensorShape(input),
+ tflite::micro::GetTensorData<int16_t>(input),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int16_t>(output));
+ break;
case kTfLiteInt8:
reference_ops::Transpose(params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input),
diff --git a/tensorflow/lite/micro/micro_time.cc b/tensorflow/lite/micro/micro_time.cc
index 2d74fdb..d543820 100644
--- a/tensorflow/lite/micro/micro_time.cc
+++ b/tensorflow/lite/micro/micro_time.cc
@@ -26,6 +26,8 @@
#include "tensorflow/lite/micro/micro_time.h"
+#include "benchmarks/cycle_count.h"
+
#if defined(TF_LITE_USE_CTIME)
#include <ctime>
#endif
@@ -34,17 +36,10 @@
#if !defined(TF_LITE_USE_CTIME)
-// Reference implementation of the ticks_per_second() function that's required
-// for a platform to support Tensorflow Lite for Microcontrollers profiling.
-// This returns 0 by default because timing is an optional feature that builds
-// without errors on platforms that do not need it.
+// Currently disable ticks_per_second as it won't work for simulator targets.
uint32_t ticks_per_second() { return 0; }
-// Reference implementation of the GetCurrentTimeTicks() function that's
-// required for a platform to support Tensorflow Lite for Microcontrollers
-// profiling. This returns 0 by default because timing is an optional feature
-// that builds without errors on platforms that do not need it.
-uint32_t GetCurrentTimeTicks() { return 0; }
+uint32_t GetCurrentTimeTicks() { return static_cast<uint32_t>(mcycle_read()); }
#else // defined(TF_LITE_USE_CTIME)
diff --git a/tensorflow/lite/micro/tools/BUILD b/tensorflow/lite/micro/tools/BUILD
index a85a7ba..e3c6f0c 100644
--- a/tensorflow/lite/micro/tools/BUILD
+++ b/tensorflow/lite/micro/tools/BUILD
@@ -5,7 +5,7 @@
load("//tensorflow:extra_rules.bzl", "tflm_application_friends")
package(
- default_visibility = ["//:__subpackages__"],
+ default_visibility = ["//visibility:public"],
licenses = ["notice"],
)
diff --git a/tensorflow/lite/micro/tools/generate_cc_arrays.py b/tensorflow/lite/micro/tools/generate_cc_arrays.py
index 16d72c1..2a77b4d 100644
--- a/tensorflow/lite/micro/tools/generate_cc_arrays.py
+++ b/tensorflow/lite/micro/tools/generate_cc_arrays.py
@@ -92,9 +92,12 @@
data_1d = data.flatten()
out_string = ','.join([str(x) for x in data_1d])
return [len(data_1d), out_string]
-
else:
- raise ValueError('input file must be .tflite, .bmp, .wav or .csv')
+ with open(input_fname, 'rb') as input_file:
+ buffer = input_file.read()
+ size = len(buffer)
+ out_string = bytes_to_hexstring(buffer)
+ return [size, out_string]
def get_array_name(input_fname):
@@ -117,6 +120,8 @@
return [base_array_name + '_test_data', 'float']
elif input_fname.endswith('npy'):
return [base_array_name + '_test_data', 'float']
+ else:
+ return [base_array_name, 'unsigned char']
def main():
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index b799523..597dc14 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -35,6 +35,8 @@
"https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip",
"https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip",
],
+ patch_file =
+ "@tflite-micro//third_party/gemmlowp:pthread.patch",
)
tf_http_archive(
diff --git a/third_party/gemmlowp/BUILD b/third_party/gemmlowp/BUILD
new file mode 100644
index 0000000..c9776cc
--- /dev/null
+++ b/third_party/gemmlowp/BUILD
@@ -0,0 +1,6 @@
+package(
+ default_visibility = ["//visibility:public"],
+ licenses = ["notice"],
+)
+
+exports_files(glob(["*.patch"]))
diff --git a/third_party/gemmlowp/pthread.patch b/third_party/gemmlowp/pthread.patch
new file mode 100644
index 0000000..547dd52
--- /dev/null
+++ b/third_party/gemmlowp/pthread.patch
@@ -0,0 +1,13 @@
+diff --git a/flags.bzl b/flags.bzl
+index e35fe9e..e26a448 100644
+--- a/flags.bzl
++++ b/flags.bzl
+@@ -4,7 +4,7 @@ LIB_COPTS = []
+ LIB_LINKOPTS = select({
+ ":android": [],
+ ":windows": [],
+- "//conditions:default": ["-lpthread"],
++ "//conditions:default": [],
+ })
+
+ BIN_LINKOPTS = LIB_LINKOPTS
\ No newline at end of file
diff --git a/third_party/ruy/BUILD b/third_party/ruy/BUILD
index 518fea8..8fabe49 100644
--- a/third_party/ruy/BUILD
+++ b/third_party/ruy/BUILD
@@ -4,3 +4,5 @@
default_visibility = ["//visibility:public"],
licenses = ["notice"],
)
+
+exports_files(glob(["*.patch"]))
diff --git a/third_party/ruy/pthread.patch b/third_party/ruy/pthread.patch
new file mode 100644
index 0000000..c8ddf4d
--- /dev/null
+++ b/third_party/ruy/pthread.patch
@@ -0,0 +1,11 @@
+diff --git a/ruy/build_defs.oss.bzl b/ruy/build_defs.oss.bzl
+index e405b41..1d7612b 100644
+--- a/ruy/build_defs.oss.bzl
++++ b/ruy/build_defs.oss.bzl
+@@ -11,5 +11,5 @@ def ruy_linkopts_thread_standard_library():
+ # https://github.com/abseil/abseil-cpp/blob/1112609635037a32435de7aa70a9188dcb591458/absl/base/BUILD.bazel#L155
+ return select({
+ "@bazel_tools//src/conditions:windows": [],
+- "//conditions:default": ["-pthread"],
++ "//conditions:default": [],
+ })
\ No newline at end of file
diff --git a/third_party/ruy/workspace.bzl b/third_party/ruy/workspace.bzl
index 5076962..1671ab5 100644
--- a/third_party/ruy/workspace.bzl
+++ b/third_party/ruy/workspace.bzl
@@ -12,4 +12,5 @@
"https://github.com/google/ruy/archive/54774a7a2cf85963777289193629d4bd42de4a59.zip",
],
build_file = "//third_party/ruy:BUILD",
+ patch_file = "@tflite-micro//third_party/ruy:pthread.patch",
)