Merge remote-tracking branch 'spacebeaker/upstream' into master Change-Id: Ieb5c6acc9bbafad3813c83058a9e40089fee2e3c
diff --git a/tensorflow/extra_rules.bzl b/tensorflow/extra_rules.bzl index 4a111dc..29e0bda 100644 --- a/tensorflow/extra_rules.bzl +++ b/tensorflow/extra_rules.bzl
@@ -1,5 +1,7 @@ def tflm_kernel_friends(): - return [] + return [ + "public", + ] def tflm_audio_frontend_friends(): return [] @@ -32,3 +34,7 @@ def xtensa_vision_p6_config(): """Config setting for all Vision P6 based cores.""" return "//tensorflow/lite/micro/kernels:xtensa_vision_p6_default" + +def kelvin_config(): + """Config setting for Kelvin-based cores.""" + return "//tensorflow/lite/micro/kernels:kelvin_default"
diff --git a/tensorflow/lite/micro/BUILD b/tensorflow/lite/micro/BUILD index 1753465..58ea22d 100644 --- a/tensorflow/lite/micro/BUILD +++ b/tensorflow/lite/micro/BUILD
@@ -334,8 +334,11 @@ hdrs = [ "micro_time.h", ], - copts = micro_copts() + ["-DTF_LITE_USE_CTIME"], - deps = ["//tensorflow/lite/c:common"], + copts = micro_copts(), + deps = [ + "//tensorflow/lite/c:common", + "@kelvin_sw//benchmarks:cycle_count", + ], ) cc_library(
diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index f2ccb06..22254d9 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD
@@ -8,7 +8,9 @@ "xtensa_hifi_3z_config", "xtensa_hifi_5_config", "xtensa_vision_p6_config", + "kelvin_config", ) +load("@bazel_skylib//lib:selects.bzl", "selects") package( features = [ @@ -33,6 +35,10 @@ packages = tflm_kernel_friends(), ) +exports_files( + glob(["*_test.cc"]) +) + #################################### # C++ libraries #################################### @@ -66,6 +72,9 @@ hdrs = [ "conv_test.h", ], + visibility = [ + "//visibility:public", + ], copts = micro_copts(), deps = [ ":kernel_runner", @@ -191,6 +200,9 @@ "-DVISION_P6=1", ] +KELVIN_COPTS = [ +] + tflm_kernel_cc_library( name = "micro_ops", srcs = [ @@ -331,6 +343,7 @@ xtensa_hifi_3z_config(): glob(["xtensa/**/*.h"]), xtensa_hifi_5_config(): glob(["xtensa/**/*.h"]), xtensa_vision_p6_config(): glob(["xtensa/**/*.h"]), + kelvin_config(): glob(["kelvin/**/*.h"]), "//conditions:default": [], }), accelerated_srcs = { @@ -339,6 +352,7 @@ xtensa_hifi_3z_config(): glob(["xtensa/**/*.cc"]), xtensa_hifi_5_config(): glob(["xtensa/**/*.cc"]), xtensa_vision_p6_config(): glob(["xtensa/**/*.cc"]), + kelvin_config(): glob(["kelvin/**/*.cc"]), }, copts = micro_copts() + select({ xtensa_fusion_f1_config(): HIFI4_COPTS, @@ -346,6 +360,7 @@ xtensa_hifi_3z_config(): HIFI4_COPTS, xtensa_hifi_5_config(): HIFI5_COPTS, xtensa_vision_p6_config(): VP6_COPTS, + kelvin_config(): KELVIN_COPTS, "//conditions:default": [], }), visibility = [ @@ -382,6 +397,7 @@ xtensa_hifi_3z_config(): ["//third_party/xtensa/nnlib_hifi4:nnlib_hifi4_lib"], xtensa_hifi_5_config(): ["//third_party/xtensa/nnlib_hifi5:nnlib_hifi5_lib"], xtensa_vision_p6_config(): ["//third_party/xtensa/xi_tflmlib_vision_p6:xi_tflmlib_vision_p6_lib"], + kelvin_config(): ["@kelvin_sw//tflm/opt:opt"], "//conditions:default": [], }), ) @@ -1515,3 +1531,22 @@ ":optimized_kernels": "xtensa_vision_p6", }, ) + +config_setting( + name = "kelvin_default1", + values = { + "platforms": "@kelvin_sw//platforms/riscv32:kelvin", + }, +) + +config_setting( + name = "kelvin_default2", + values = { + "platforms": "//platforms/riscv32:kelvin", + }, +) + +selects.config_setting_group( + name = "kelvin_default", + match_any = [":kelvin_default1", ":kelvin_default2"], +)
diff --git a/tensorflow/lite/micro/kernels/add_test.cc b/tensorflow/lite/micro/kernels/add_test.cc index 6e8b40c..bdf0224 100644 --- a/tensorflow/lite/micro/kernels/add_test.cc +++ b/tensorflow/lite/micro/kernels/add_test.cc
@@ -256,16 +256,42 @@ TF_LITE_MICRO_TEST(QuantizedAddNoActivationInt8) { const float scales[] = {0.25, 0.5, 1.0}; const int zero_points[] = {-10, 4, 13}; - int inout_shape[] = {4, 1, 2, 2, 1}; - const float input1_values[] = {-2.01, -1.01, -0.01, 0.98}; - const float input2_values[] = {1.01, 1.99, 2.99, 4.02}; - const float golden_values[] = {-1, 1, 3, 5}; + int inout_shape[] = {4, 1, 7, 6, 1}; + // clang-format off + const float input1_values[] = { + -2.01, -1.01, -0.01, 0.98, -2.01, -1.01, + -0.01, 0.98, -2.01, -1.01, -0.01, 0.98, + -2.01, -1.01, -0.01, 0.98, -2.01, -1.01, + -0.01, 0.98, -2.01, -1.01, -0.01, 0.98, + -2.01, -1.01, -0.01, 0.98, -2.01, -1.01, + -0.01, 0.98, -2.01, -1.01, -0.01, 0.98, + -2.0, 0.2, 0.7, 0.8, 1.1, 2.0 + }; + const float input2_values[] = { + 1.01, 1.99, 2.99, 4.02, 1.01, 1.99, + 2.99, 4.02, 1.01, 1.99, 2.99, 4.02, + 1.01, 1.99, 2.99, 4.02, 1.01, 1.99, + 2.99, 4.02, 1.01, 1.99, 2.99, 4.02, + 1.01, 1.99, 2.99, 4.02, 1.01, 1.99, + 2.99, 4.02, 1.01, 1.99, 2.99, 4.02, + 0.1, 0.2, 0.3, 0.5, 1.1, 0.1 + }; + const float golden_values[] = { + -1, 1, 3, 5, -1, 1, + 3, 5, -1, 1, 3, 5, + -1, 1, 3, 5, -1, 1, + 3, 5, -1, 1, 3, 5, + -1, 1, 3, 5, -1, 1, + 3, 5, -1, 1, 3, 5, + -1.9, 0.4, 1.0, 1.3, 2.2, 2.1 + }; + // clang-format on - constexpr int kOutputDimsCount = 4; - int8_t input1_quantized[kOutputDimsCount]; - int8_t input2_quantized[kOutputDimsCount]; - int8_t golden_quantized[kOutputDimsCount]; - int8_t output[kOutputDimsCount]; + constexpr int kOutputDimsCount = 42; + int8_t input1_quantized[kOutputDimsCount] __attribute__((aligned(64))); + int8_t input2_quantized[kOutputDimsCount] __attribute__((aligned(64))); + int8_t golden_quantized[kOutputDimsCount] __attribute__((aligned(64))); + int8_t output[kOutputDimsCount] __attribute__((aligned(64))); tflite::testing::TestAddQuantized( inout_shape, input1_values, input1_quantized, scales[0], zero_points[0],
diff --git a/tensorflow/lite/micro/kernels/kelvin/add.cc b/tensorflow/lite/micro/kernels/kelvin/add.cc new file mode 100644 index 0000000..8c33716 --- /dev/null +++ b/tensorflow/lite/micro/kernels/kelvin/add.cc
@@ -0,0 +1,177 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tensorflow/lite/kernels/internal/reference/add.h" + +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h" +#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "tensorflow/lite/micro/kernels/add.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" +#include "tensorflow/lite/micro/micro_log.h" +#include "tflm/opt/opt.h" + +namespace tflite { +namespace { + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataAdd)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + return AddPrepare(context, node); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const OpDataAdd* data = static_cast<const OpDataAdd*>(node->user_data); + + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kAddInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kAddInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kAddOutputTensor); + + if (output->type == kTfLiteFloat32) { + tflite::ArithmeticParams op_params; + SetActivationParams(data->output_activation_min_f32, + data->output_activation_max_f32, &op_params); + if (data->requires_broadcast) { + reference_ops::BroadcastAdd4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData<float>(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData<float>(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<float>(output)); + } else { + reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData<float>(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData<float>(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<float>(output)); + } + } else if (output->type == kTfLiteInt32) { + tflite::ArithmeticParams op_params; + SetActivationParams(std::numeric_limits<int32_t>::lowest(), + std::numeric_limits<int32_t>::max(), &op_params); + if (data->requires_broadcast) { + reference_ops::BroadcastAdd4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData<int32_t>(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData<int32_t>(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int32_t>(output)); + } else { + kelvin::opt::ElementwiseAddS32( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData<int32_t>(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData<int32_t>(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int32_t>(output)); + } + } else if (output->type == kTfLiteInt16) { + tflite::ArithmeticParams op_params; + op_params.left_shift = data->left_shift; + op_params.input1_offset = data->input1_offset; + op_params.input1_multiplier = data->input1_multiplier; + op_params.input1_shift = data->input1_shift; + op_params.input2_offset = data->input2_offset; + op_params.input2_multiplier = data->input2_multiplier; + op_params.input2_shift = data->input2_shift; + op_params.output_offset = data->output_offset; + op_params.output_multiplier = data->output_multiplier; + op_params.output_shift = data->output_shift; + SetActivationParams(data->output_activation_min, + data->output_activation_max, &op_params); + + bool need_broadcast = reference_ops::ProcessBroadcastShapes( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); + + if (need_broadcast) { + reference_ops::BroadcastAdd4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData<int16_t>(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData<int16_t>(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int16_t>(output)); + } else { + kelvin::opt::ElementwiseAddS16( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData<int16_t>(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData<int16_t>(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int16_t>(output)); + } + } else if (output->type == kTfLiteInt8) { + tflite::ArithmeticParams op_params; + op_params.left_shift = data->left_shift; + op_params.input1_offset = data->input1_offset; + op_params.input1_multiplier = data->input1_multiplier; + op_params.input1_shift = data->input1_shift; + op_params.input2_offset = data->input2_offset; + op_params.input2_multiplier = data->input2_multiplier; + op_params.input2_shift = data->input2_shift; + op_params.output_offset = data->output_offset; + op_params.output_multiplier = data->output_multiplier; + op_params.output_shift = data->output_shift; + SetActivationParams(data->output_activation_min, + data->output_activation_max, &op_params); + + bool need_broadcast = reference_ops::ProcessBroadcastShapes( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); + + if (need_broadcast) { + reference_integer_ops::BroadcastAdd4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData<int8_t>(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData<int8_t>(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int8_t>(output)); + } else { + kelvin::opt::ElementwiseAddS8( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData<int8_t>(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData<int8_t>(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int8_t>(output)); + } + } else { + MicroPrintf("Unsupported output type: %s", TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace + +TFLMRegistration Register_ADD() { + return tflite::micro::RegisterOp(Init, Prepare, Eval); +} + +} // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/kelvin/conv.cc b/tensorflow/lite/micro/kernels/kelvin/conv.cc new file mode 100644 index 0000000..d8fb8a1 --- /dev/null +++ b/tensorflow/lite/micro/kernels/kelvin/conv.cc
@@ -0,0 +1,183 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tensorflow/lite/kernels/internal/reference/conv.h" + +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" +#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/conv.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" +#include "tensorflow/lite/micro/micro_log.h" +#include "tflm/opt/opt.h" + +namespace tflite { +namespace { + +constexpr int kFilterHeightIndex = 1; +constexpr int kFilterWidthIndex = 2; +constexpr int kFilterInputChannelIndex = 3; +constexpr int kInputChannelIndex = 3; +constexpr int kOutputChannelIndex = 3; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataConv)); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kConvInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kConvWeightsTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 3) + ? tflite::micro::GetEvalInput(context, node, kConvBiasTensor) + : nullptr; + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kConvOutputTensor); + + TFLITE_DCHECK(node->builtin_data != nullptr); + const auto& params = + *(reinterpret_cast<TfLiteConvParams*>(node->builtin_data)); + TFLITE_DCHECK(node->user_data != nullptr); + const auto& data = *(static_cast<const OpDataConv*>(node->user_data)); + + TF_LITE_ENSURE_EQ(context, input->type, output->type); + TF_LITE_ENSURE_MSG( + context, + input->type == filter->type || + (input->type == kTfLiteInt16 && filter->type == kTfLiteInt8) || + (input->type == kTfLiteInt8 && filter->type == kTfLiteInt4), + "Hybrid models are not supported on TFLite Micro."); + + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: { + tflite::reference_ops::Conv( + ConvParamsFloat(params, data), tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData<float>(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData<float>(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData<float>(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<float>(output), + tflite::micro::GetTensorShape(nullptr), nullptr); + break; + } + case kTfLiteInt16: { + const auto params_q = ConvParamsQuantized(params, data); + bool opt = !(params_q.padding_values.width > 0 || + params_q.padding_values.height > 0 || + params_q.dilation_width_factor > 1 || + params_q.dilation_height_factor > 1); + switch (bias->type) { + case kTfLiteInt32: { + const auto fn = opt ? kelvin::opt::ConvS16B32 + : reference_integer_ops::ConvPerChannel<int32_t>; + fn(params_q, data.per_channel_output_multiplier, + data.per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData<int16_t>(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData<int8_t>(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData<std::int32_t>(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int16_t>(output)); + break; + } + case kTfLiteInt64: { + const auto fn = opt ? kelvin::opt::ConvS16B64 + : reference_integer_ops::ConvPerChannel<int64_t>; + fn(params_q, data.per_channel_output_multiplier, + data.per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData<int16_t>(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData<int8_t>(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData<std::int64_t>(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int16_t>(output)); + break; + } + default: + MicroPrintf("Bias type %s (%d) not supported.", + TfLiteTypeGetName(bias->type), bias->type); + return kTfLiteError; + } + break; + } + case kTfLiteInt8: { + switch (filter->type) { + case kTfLiteInt4: { + int8_t* unpacked_filter_data = reinterpret_cast<int8_t*>( + context->GetScratchBuffer(context, data.filter_buffer_index)); + tflite::tensor_utils::UnpackDenseInt4IntoInt8( + tflite::micro::GetTensorData<int8_t>(filter), + tflite::micro::GetTensorShape(filter).FlatSize(), + unpacked_filter_data); + reference_integer_ops::ConvPerChannel( + ConvParamsQuantized(params, data), + data.per_channel_output_multiplier, data.per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData<int8_t>(input), + tflite::micro::GetTensorShape(filter), unpacked_filter_data, + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData<int32_t>(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int8_t>(output)); + break; + } + case kTfLiteInt8: { + const auto params_q = ConvParamsQuantized(params, data); + kelvin::opt::ConvS8( + params_q, data.per_channel_output_multiplier, + data.per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData<int8_t>(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData<int8_t>(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData<int32_t>(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int8_t>(output)); + break; + } + default: + MicroPrintf("Weight type %s (%d) not supported.", + TfLiteTypeGetName(filter->type), filter->type); + return kTfLiteError; + } + break; + } + default: + MicroPrintf("Type %s (%d) not supported.", TfLiteTypeGetName(input->type), + input->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TFLMRegistration Register_CONV_2D() { + return tflite::micro::RegisterOp(Init, ConvPrepare, Eval); +} + +} // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/kelvin/depthwise_conv.cc b/tensorflow/lite/micro/kernels/kelvin/depthwise_conv.cc new file mode 100644 index 0000000..f8d9307 --- /dev/null +++ b/tensorflow/lite/micro/kernels/kelvin/depthwise_conv.cc
@@ -0,0 +1,155 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h" + +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h" +#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/depthwise_conv.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" +#include "tensorflow/lite/micro/micro_log.h" +#include "tflm/opt/opt.h" + +namespace tflite { +namespace { + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataConv)); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + auto& params = + *(reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data)); + const OpDataConv& data = *(static_cast<const OpDataConv*>(node->user_data)); + + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kDepthwiseConvOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kDepthwiseConvInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kDepthwiseConvWeightsTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 3) + ? tflite::micro::GetEvalInput(context, node, kDepthwiseConvBiasTensor) + : nullptr; + + switch (input->type) { // Already know in/out types are same. + case kTfLiteFloat32: { + tflite::reference_ops::DepthwiseConv( + DepthwiseConvParamsFloat(params, data), + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData<float>(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData<float>(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData<float>(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<float>(output)); + break; + } + case kTfLiteInt8: { + switch (filter->type) { + case kTfLiteInt4: { + int8_t* unpacked_filter_data = static_cast<int8_t*>( + context->GetScratchBuffer(context, data.filter_buffer_index)); + tflite::tensor_utils::UnpackDenseInt4IntoInt8( + tflite::micro::GetTensorData<int8_t>(filter), + tflite::micro::GetTensorShape(filter).FlatSize(), + unpacked_filter_data); + reference_integer_ops::DepthwiseConvPerChannel( + DepthwiseConvParamsQuantized(params, data), + data.per_channel_output_multiplier, data.per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData<int8_t>(input), + tflite::micro::GetTensorShape(filter), unpacked_filter_data, + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData<int32_t>(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int8_t>(output)); + break; + } + case kTfLiteInt8: { + tflite::DepthwiseParams dw_params = + DepthwiseConvParamsQuantized(params, data); + kelvin::opt::DepthwiseConvS8( + dw_params, data.per_channel_output_multiplier, + data.per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData<int8_t>(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData<int8_t>(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData<int32_t>(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int8_t>(output)); + break; + } + default: + MicroPrintf("Filter type %s (%d) for input type %s not supported.", + TfLiteTypeGetName(filter->type), filter->type, + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + break; + } + case kTfLiteInt16: { + switch (filter->type) { + case kTfLiteInt8: { + tflite::DepthwiseParams dw_params = + DepthwiseConvParamsQuantized(params, data); + kelvin::opt::DepthwiseConvS16( + dw_params, data.per_channel_output_multiplier, + data.per_channel_output_shift, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData<int16_t>(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData<int8_t>(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetOptionalTensorData<int64_t>(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int16_t>(output)); + break; + } + default: + MicroPrintf("Filter type %s (%d) for input type %s not supported.", + TfLiteTypeGetName(filter->type), filter->type, + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + break; + } + default: + MicroPrintf("Input type %s (%d) not supported.", + TfLiteTypeGetName(input->type), input->type); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TFLMRegistration Register_DEPTHWISE_CONV_2D() { + return tflite::micro::RegisterOp(Init, DepthwiseConvPrepare, Eval); +} + +} // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/kelvin/leaky_relu.cc b/tensorflow/lite/micro/kernels/kelvin/leaky_relu.cc new file mode 100644 index 0000000..fafcfed --- /dev/null +++ b/tensorflow/lite/micro/kernels/kelvin/leaky_relu.cc
@@ -0,0 +1,100 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tensorflow/lite/kernels/internal/reference/leaky_relu.h" + +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/leaky_relu.h" +#include "tensorflow/lite/micro/micro_log.h" +#include "tflm/opt/opt.h" + +namespace tflite { + +namespace { +void* LeakyReluInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(LeakyReluOpData)); +} + +TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + const LeakyReluOpData& data = *static_cast<LeakyReluOpData*>(node->user_data); + + // Kelvin's vector ISA is used to implement Int8 and Int16. + // Float32 uses the reference op. + switch (input->type) { + case kTfLiteFloat32: { + LeakyReluParams op_params = {}; + const auto* params = + static_cast<TfLiteLeakyReluParams*>(node->builtin_data); + + op_params.alpha = params->alpha; + reference_ops::LeakyRelu(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData<float>(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<float>(output)); + return kTfLiteOk; + } break; + case kTfLiteInt8: { + LeakyReluParams op_params = {}; + op_params.input_offset = data.input_zero_point; + op_params.output_offset = data.output_zero_point; + op_params.output_multiplier_alpha = data.output_multiplier_alpha; + op_params.output_shift_alpha = data.output_shift_alpha; + op_params.output_multiplier_identity = data.output_multiplier_identity; + op_params.output_shift_identity = data.output_shift_identity; + kelvin::opt::LeakyReluS8(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData<int8_t>(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int8_t>(output)); + return kTfLiteOk; + } break; + case kTfLiteInt16: { + LeakyReluParams op_params = {}; + op_params.input_offset = data.input_zero_point; + op_params.output_offset = data.output_zero_point; + op_params.output_multiplier_alpha = data.output_multiplier_alpha; + op_params.output_shift_alpha = data.output_shift_alpha; + op_params.output_multiplier_identity = data.output_multiplier_identity; + op_params.output_shift_identity = data.output_shift_identity; + kelvin::opt::LeakyReluS16(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData<int16_t>(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int16_t>(output)); + return kTfLiteOk; + } break; + default: + MicroPrintf( + "Only float32, int8, int16 are supported by LEAKY_RELU, got %s.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + + return kTfLiteError; +} + +} // namespace + +TFLMRegistration Register_LEAKY_RELU() { + return tflite::micro::RegisterOp(LeakyReluInit, LeakyReluPrepare, + LeakyReluEval); +} + +} // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/kelvin/logistic.cc b/tensorflow/lite/micro/kernels/kelvin/logistic.cc new file mode 100644 index 0000000..974ef12 --- /dev/null +++ b/tensorflow/lite/micro/kernels/kelvin/logistic.cc
@@ -0,0 +1,112 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h" + +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/common.h" +#include "tensorflow/lite/kernels/internal/quantization_util.h" +#include "tensorflow/lite/kernels/internal/reference/logistic.h" +#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/kernels/op_macros.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/logistic.h" +#include "tensorflow/lite/micro/micro_log.h" +#include "tflm/opt/opt.h" + +namespace tflite { +namespace { + +void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataLogistic)); +} + +TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kLogisticInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kLogisticOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + OpDataLogistic* data = static_cast<OpDataLogistic*>(node->user_data); + + if (input->type == kTfLiteFloat32) { + switch (output->type) { + case kTfLiteFloat32: { + reference_ops::Logistic(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData<float>(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<float>(output)); + return kTfLiteOk; + } + default: + MicroPrintf("Input %s, output %s not supported.", + TfLiteTypeGetName(input->type), + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + } else if (input->type == kTfLiteInt16) { + switch (output->type) { + case kTfLiteInt16: { + reference_integer_ops::Logistic( + data->input_multiplier, data->input_left_shift, + NumElements(input->dims), + tflite::micro::GetTensorData<int16_t>(input), + tflite::micro::GetTensorData<int16_t>(output)); + return kTfLiteOk; + } + default: + MicroPrintf("Input %s, output %s not supported.", + TfLiteTypeGetName(input->type), + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + } else if (input->type == kTfLiteInt8) { + switch (output->type) { + case kTfLiteInt8: { + kelvin::opt::LogisticS8( + data->input_zero_point, data->input_range_radius, + data->input_multiplier, data->input_left_shift, + NumElements(input->dims), + tflite::micro::GetTensorData<int8_t>(input), + tflite::micro::GetTensorData<int8_t>(output)); + return kTfLiteOk; + } + default: + MicroPrintf("Input %s, output %s not supported.", + TfLiteTypeGetName(input->type), + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + } else { + // TODO(b/141211002): Also support other data types once we have supported + // temporary tensors in TFLM. + MicroPrintf("Input %s, output %s not supported.", + TfLiteTypeGetName(input->type), + TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + return kTfLiteOk; +} + +} // namespace + +TFLMRegistration Register_LOGISTIC() { + return tflite::micro::RegisterOp(LogisticInit, LogisticPrepare, LogisticEval); +} +} // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/kelvin/mul.cc b/tensorflow/lite/micro/kernels/kelvin/mul.cc new file mode 100644 index 0000000..e006e9b --- /dev/null +++ b/tensorflow/lite/micro/kernels/kelvin/mul.cc
@@ -0,0 +1,122 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h" + +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/quantization_util.h" +#include "tensorflow/lite/kernels/internal/reference/mul.h" +#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/mul.h" +#include "tensorflow/lite/micro/memory_helpers.h" +#include "tensorflow/lite/micro/micro_log.h" +#include "tflm/opt/opt.h" + +namespace tflite { + +TfLiteStatus MulEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpDataMul* data = static_cast<const OpDataMul*>(node->user_data); + + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kMulInput1Tensor); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kMulInput2Tensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kMulOutputTensor); + + if (output->type == kTfLiteFloat32) { + EvalMulFloatReference(context, node, params, data, input1, input2, output); + } else if (output->type == kTfLiteInt32) { + EvalMulQuantizedReference(context, node, data, input1, input2, output); + } else if (output->type == kTfLiteInt16) { + tflite::ArithmeticParams op_params = {}; + op_params.quantized_activation_min = data->output_activation_min; + op_params.quantized_activation_max = data->output_activation_max; + op_params.input1_offset = -data->input1_zero_point; + op_params.input2_offset = -data->input2_zero_point; + op_params.output_offset = data->output_zero_point; + op_params.output_multiplier = data->output_multiplier; + op_params.output_shift = data->output_shift; + + bool need_broadcast = reference_ops::ProcessBroadcastShapes( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); + + if (need_broadcast) { + reference_integer_ops::BroadcastMul4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData<int16_t>(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData<int16_t>(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int16_t>(output)); + } else { + kelvin::opt::MulS16(op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData<int16_t>(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData<int16_t>(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int16_t>(output)); + } + } else if (output->type == kTfLiteInt8) { + tflite::ArithmeticParams op_params = {}; + op_params.quantized_activation_min = data->output_activation_min; + op_params.quantized_activation_max = data->output_activation_max; + op_params.input1_offset = -data->input1_zero_point; + op_params.input2_offset = -data->input2_zero_point; + op_params.output_offset = data->output_zero_point; + op_params.output_multiplier = data->output_multiplier; + op_params.output_shift = data->output_shift; + + bool need_broadcast = reference_ops::ProcessBroadcastShapes( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); + if (need_broadcast) { + reference_integer_ops::BroadcastMul4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData<int8_t>(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData<int8_t>(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int8_t>(output)); + } else { + kelvin::opt::MulS8(op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData<int8_t>(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData<int8_t>(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int8_t>(output)); + } + } else { + MicroPrintf("Unsupported output type: %s", TfLiteTypeGetName(output->type)); + return kTfLiteError; + } + + return kTfLiteOk; +} + +TFLMRegistration Register_MUL() { + return tflite::micro::RegisterOp(MulInit, MulPrepare, MulEval); +} + +} // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/kelvin/pooling.cc b/tensorflow/lite/micro/kernels/kelvin/pooling.cc new file mode 100644 index 0000000..94fc6f2 --- /dev/null +++ b/tensorflow/lite/micro/kernels/kelvin/pooling.cc
@@ -0,0 +1,129 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tensorflow/lite/kernels/internal/reference/pooling.h" + +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/pooling.h" +#include "tensorflow/lite/micro/micro_log.h" +#include "tflm/opt/opt.h" + +namespace tflite { + +namespace { + +TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpDataPooling* data = + static_cast<const OpDataPooling*>(node->user_data); + + const TfLiteEvalTensor* input = + micro::GetEvalInput(context, node, kPoolingInputTensor); + TfLiteEvalTensor* output = + micro::GetEvalOutput(context, node, kPoolingOutputTensor); + + // Inputs and outputs share the same type, guaranteed by the converter. + switch (input->type) { + case kTfLiteFloat32: + AveragePoolingEvalFloat(context, node, params, data, input, output); + break; + case kTfLiteInt8: + AveragePoolingEvalQuantized<int8_t>(context, node, params, data, input, + output); + break; + case kTfLiteInt16: + AveragePoolingEvalQuantized<int16_t>(context, node, params, data, input, + output); + break; + default: + MicroPrintf("Input type %s is not currently supported", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast<TfLitePoolParams*>(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpDataPooling* data = + static_cast<const OpDataPooling*>(node->user_data); + + const TfLiteEvalTensor* input = + micro::GetEvalInput(context, node, kPoolingInputTensor); + TfLiteEvalTensor* output = + micro::GetEvalOutput(context, node, kPoolingOutputTensor); + + tflite::PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data->padding.width; + op_params.quantized_activation_min = data->activation_min; + op_params.quantized_activation_max = data->activation_max; + op_params.float_activation_min = data->activation_min_f32; + op_params.float_activation_max = data->activation_max_f32; + + switch (input->type) { + case kTfLiteFloat32: + reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData<float>(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<float>(output)); + break; + case kTfLiteInt8: + kelvin::opt::MaxPoolS8( + op_params, tflite::micro::GetTensorShape(input), input->data.int8, + tflite::micro::GetTensorShape(output), output->data.int8); + break; + case kTfLiteInt16: + kelvin::opt::MaxPoolS16( + op_params, tflite::micro::GetTensorShape(input), input->data.i16, + tflite::micro::GetTensorShape(output), output->data.i16); + break; + default: + MicroPrintf("Type %s not currently supported.", + TfLiteTypeGetName(input->type)); + return kTfLiteError; + } + return kTfLiteOk; +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpDataPooling)); +} + +} // namespace + +TFLMRegistration Register_AVERAGE_POOL_2D() { + return tflite::micro::RegisterOp(Init, PoolingPrepare, AverageEval); +} + +TFLMRegistration Register_MAX_POOL_2D() { + return tflite::micro::RegisterOp(Init, PoolingPrepare, MaxEval); +} + +} // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/kelvin/reshape.cc b/tensorflow/lite/micro/kernels/kelvin/reshape.cc new file mode 100644 index 0000000..76e3e52 --- /dev/null +++ b/tensorflow/lite/micro/kernels/kelvin/reshape.cc
@@ -0,0 +1,116 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" +#include "tensorflow/lite/micro/memory_helpers.h" +#include "tensorflow/lite/micro/micro_utils.h" +#include "tflm/opt/opt.h" + +namespace tflite { +namespace { + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TF_LITE_ENSURE(context, input != nullptr); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + TF_LITE_ENSURE(context, output != nullptr); + // Tensorflow's Reshape allows one of the shape components to have the + // special -1 value, meaning it will be calculated automatically based on the + // input. Here we calculate what that dimension should be so that the number + // of output elements in the same as the number of input elements. + int num_input_elements = NumElements(input); + TfLiteIntArray* output_shape = output->dims; + + if (NumInputs(node) == 1 && // Legacy scalar supported with params. + output_shape->size == 1 && output_shape->data[0] == 0) { + // Legacy tflite models use a shape parameter of [0] to indicate scalars, + // so adjust accordingly. TODO(b/111614235): Allow zero-sized buffers during + // toco conversion. + output_shape->size = 0; + } + + int num_output_elements = 1; + int stretch_dim = -1; + for (int i = 0; i < output_shape->size; ++i) { + int value = output_shape->data[i]; + if (value == -1) { + TF_LITE_ENSURE_EQ(context, stretch_dim, -1); + stretch_dim = i; + } else { + num_output_elements *= value; + } + } + if (stretch_dim != -1) { + TfLiteEvalTensor* output_eval = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + TF_LITE_ENSURE_STATUS(tflite::micro::CreateWritableTensorDimsWithCopy( + context, output, output_eval)); + output_shape = output->dims; // output tensor dims were moved + output_shape->data[stretch_dim] = num_input_elements / num_output_elements; + num_output_elements *= output_shape->data[stretch_dim]; + } + + TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); + TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements); + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(output); + return kTfLiteOk; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE(context, NumInputs(node) == 1 || NumInputs(node) == 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TF_LITE_ENSURE_EQ(context, ReshapeOutput(context, node), kTfLiteOk); + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + // TODO(b/162522304): storing input bytes in OpData increases some models + // significantly, possibly due to alignment issues. + size_t input_bytes; + TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(input->type, &input_bytes)); + input_bytes *= ElementCount(*input->dims); + + // Do nothing for in-place reshape. + if (input->data.raw != output->data.raw) { + // Otherwise perform reshape with copy. + kelvin::opt::Memcpy(output->data.raw, input->data.raw, input_bytes); + } + return kTfLiteOk; +} + +} // namespace + +TFLMRegistration Register_RESHAPE() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/kelvin/resize_nearest_neighbor.cc b/tensorflow/lite/micro/kernels/kelvin/resize_nearest_neighbor.cc new file mode 100644 index 0000000..5b700ae --- /dev/null +++ b/tensorflow/lite/micro/kernels/kelvin/resize_nearest_neighbor.cc
@@ -0,0 +1,124 @@ +/* Copyright 2024 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h" + +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/kernels/op_macros.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" +#include "tensorflow/lite/micro/micro_log.h" +#include "tflm/opt/opt.h" + +namespace tflite { + +namespace { + +constexpr int kInputTensor = 0; +constexpr int kSizeTensor = 1; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + MicroContext* micro_context = GetMicroContext(context); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input = + micro_context->AllocateTempInputTensor(node, kInputTensor); + TfLiteTensor* size = + micro_context->AllocateTempInputTensor(node, kSizeTensor); + TfLiteTensor* output = + micro_context->AllocateTempOutputTensor(node, kOutputTensor); + + // Our current implementations rely on the input being 4D, + // and the size being 1D tensor with exactly 2 elements. + TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4); + TF_LITE_ENSURE_EQ(context, NumDimensions(size), 1); + TF_LITE_ENSURE_EQ(context, size->type, kTfLiteInt32); + TF_LITE_ENSURE_EQ(context, size->dims->data[0], 2); + + output->type = input->type; + + if (!IsConstantTensor(size)) { + MicroPrintf("Dynamic tensors are unsupported in tfmicro."); + return kTfLiteError; + } + + micro_context->DeallocateTempTfLiteTensor(input); + micro_context->DeallocateTempTfLiteTensor(size); + micro_context->DeallocateTempTfLiteTensor(output); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast<TfLiteResizeNearestNeighborParams*>(node->builtin_data); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* size = + tflite::micro::GetEvalInput(context, node, kSizeTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + tflite::ResizeNearestNeighborParams op_params; + op_params.align_corners = params->align_corners; + op_params.half_pixel_centers = false; + + if (output->type == kTfLiteFloat32) { + reference_ops::ResizeNearestNeighbor( + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData<int32_t>(input), + tflite::micro::GetTensorShape(size), + tflite::micro::GetTensorData<int32_t>(size), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int32_t>(output)); + } else if (output->type == kTfLiteInt8) { + kelvin::opt::ResizeNearestNeighborS8( + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData<int8_t>(input), + tflite::micro::GetTensorShape(size), + tflite::micro::GetTensorData<int32_t>(size), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int8_t>(output)); + } else if (output->type == kTfLiteInt16) { + reference_ops::ResizeNearestNeighbor( + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData<int16_t>(input), + tflite::micro::GetTensorShape(size), + tflite::micro::GetTensorData<int32_t>(size), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int16_t>(output)); + } else { + MicroPrintf("Output tensor type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace + +TFLMRegistration Register_RESIZE_NEAREST_NEIGHBOR() { + return tflite::micro::RegisterOp(nullptr, Prepare, Eval); +} + +} // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/pack.cc b/tensorflow/lite/micro/kernels/pack.cc index f254329..0cfd91b 100644 --- a/tensorflow/lite/micro/kernels/pack.cc +++ b/tensorflow/lite/micro/kernels/pack.cc
@@ -85,6 +85,9 @@ return PackImpl<int8_t>(context, node, output, data->values_count, data->axis); } + case kTfLiteInt16: { + return PackImpl<int16_t>(context, node, output, data->values_count, data->axis); + } case kTfLiteInt32: { return PackImpl<int32_t>(context, node, output, data->values_count, data->axis);
diff --git a/tensorflow/lite/micro/kernels/testdata/BUILD b/tensorflow/lite/micro/kernels/testdata/BUILD index 0c7822d..c93bc7d 100644 --- a/tensorflow/lite/micro/kernels/testdata/BUILD +++ b/tensorflow/lite/micro/kernels/testdata/BUILD
@@ -16,6 +16,7 @@ name = "conv_test_data", srcs = ["conv_test_data.cc"], hdrs = ["conv_test_data.h"], + visibility = ["//visibility:public"], deps = ["//tensorflow/lite/c:common"], )
diff --git a/tensorflow/lite/micro/kernels/transpose.cc b/tensorflow/lite/micro/kernels/transpose.cc index fd17e89..915def5 100644 --- a/tensorflow/lite/micro/kernels/transpose.cc +++ b/tensorflow/lite/micro/kernels/transpose.cc
@@ -97,6 +97,12 @@ tflite::micro::GetTensorShape(output), tflite::micro::GetTensorData<float>(output)); break; + case kTfLiteInt16: + reference_ops::Transpose(params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData<int16_t>(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData<int16_t>(output)); + break; case kTfLiteInt8: reference_ops::Transpose(params, tflite::micro::GetTensorShape(input), tflite::micro::GetTensorData<int8_t>(input),
diff --git a/tensorflow/lite/micro/micro_time.cc b/tensorflow/lite/micro/micro_time.cc index 2d74fdb..d543820 100644 --- a/tensorflow/lite/micro/micro_time.cc +++ b/tensorflow/lite/micro/micro_time.cc
@@ -26,6 +26,8 @@ #include "tensorflow/lite/micro/micro_time.h" +#include "benchmarks/cycle_count.h" + #if defined(TF_LITE_USE_CTIME) #include <ctime> #endif @@ -34,17 +36,10 @@ #if !defined(TF_LITE_USE_CTIME) -// Reference implementation of the ticks_per_second() function that's required -// for a platform to support Tensorflow Lite for Microcontrollers profiling. -// This returns 0 by default because timing is an optional feature that builds -// without errors on platforms that do not need it. +// Currently disable ticks_per_second as it won't work for simulator targets. uint32_t ticks_per_second() { return 0; } -// Reference implementation of the GetCurrentTimeTicks() function that's -// required for a platform to support Tensorflow Lite for Microcontrollers -// profiling. This returns 0 by default because timing is an optional feature -// that builds without errors on platforms that do not need it. -uint32_t GetCurrentTimeTicks() { return 0; } +uint32_t GetCurrentTimeTicks() { return static_cast<uint32_t>(mcycle_read()); } #else // defined(TF_LITE_USE_CTIME)
diff --git a/tensorflow/lite/micro/tools/BUILD b/tensorflow/lite/micro/tools/BUILD index a85a7ba..e3c6f0c 100644 --- a/tensorflow/lite/micro/tools/BUILD +++ b/tensorflow/lite/micro/tools/BUILD
@@ -5,7 +5,7 @@ load("//tensorflow:extra_rules.bzl", "tflm_application_friends") package( - default_visibility = ["//:__subpackages__"], + default_visibility = ["//visibility:public"], licenses = ["notice"], )
diff --git a/tensorflow/lite/micro/tools/generate_cc_arrays.py b/tensorflow/lite/micro/tools/generate_cc_arrays.py index 16d72c1..2a77b4d 100644 --- a/tensorflow/lite/micro/tools/generate_cc_arrays.py +++ b/tensorflow/lite/micro/tools/generate_cc_arrays.py
@@ -92,9 +92,12 @@ data_1d = data.flatten() out_string = ','.join([str(x) for x in data_1d]) return [len(data_1d), out_string] - else: - raise ValueError('input file must be .tflite, .bmp, .wav or .csv') + with open(input_fname, 'rb') as input_file: + buffer = input_file.read() + size = len(buffer) + out_string = bytes_to_hexstring(buffer) + return [size, out_string] def get_array_name(input_fname): @@ -117,6 +120,8 @@ return [base_array_name + '_test_data', 'float'] elif input_fname.endswith('npy'): return [base_array_name + '_test_data', 'float'] + else: + return [base_array_name, 'unsigned char'] def main():
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index b799523..597dc14 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl
@@ -35,6 +35,8 @@ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip", "https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip", ], + patch_file = + "@tflite-micro//third_party/gemmlowp:pthread.patch", ) tf_http_archive(
diff --git a/third_party/gemmlowp/BUILD b/third_party/gemmlowp/BUILD new file mode 100644 index 0000000..c9776cc --- /dev/null +++ b/third_party/gemmlowp/BUILD
@@ -0,0 +1,6 @@ +package( + default_visibility = ["//visibility:public"], + licenses = ["notice"], +) + +exports_files(glob(["*.patch"]))
diff --git a/third_party/gemmlowp/pthread.patch b/third_party/gemmlowp/pthread.patch new file mode 100644 index 0000000..547dd52 --- /dev/null +++ b/third_party/gemmlowp/pthread.patch
@@ -0,0 +1,13 @@ +diff --git a/flags.bzl b/flags.bzl +index e35fe9e..e26a448 100644 +--- a/flags.bzl ++++ b/flags.bzl +@@ -4,7 +4,7 @@ LIB_COPTS = [] + LIB_LINKOPTS = select({ + ":android": [], + ":windows": [], +- "//conditions:default": ["-lpthread"], ++ "//conditions:default": [], + }) + + BIN_LINKOPTS = LIB_LINKOPTS \ No newline at end of file
diff --git a/third_party/ruy/BUILD b/third_party/ruy/BUILD index 518fea8..8fabe49 100644 --- a/third_party/ruy/BUILD +++ b/third_party/ruy/BUILD
@@ -4,3 +4,5 @@ default_visibility = ["//visibility:public"], licenses = ["notice"], ) + +exports_files(glob(["*.patch"]))
diff --git a/third_party/ruy/pthread.patch b/third_party/ruy/pthread.patch new file mode 100644 index 0000000..c8ddf4d --- /dev/null +++ b/third_party/ruy/pthread.patch
@@ -0,0 +1,11 @@ +diff --git a/ruy/build_defs.oss.bzl b/ruy/build_defs.oss.bzl +index e405b41..1d7612b 100644 +--- a/ruy/build_defs.oss.bzl ++++ b/ruy/build_defs.oss.bzl +@@ -11,5 +11,5 @@ def ruy_linkopts_thread_standard_library(): + # https://github.com/abseil/abseil-cpp/blob/1112609635037a32435de7aa70a9188dcb591458/absl/base/BUILD.bazel#L155 + return select({ + "@bazel_tools//src/conditions:windows": [], +- "//conditions:default": ["-pthread"], ++ "//conditions:default": [], + }) \ No newline at end of file
diff --git a/third_party/ruy/workspace.bzl b/third_party/ruy/workspace.bzl index 5076962..1671ab5 100644 --- a/third_party/ruy/workspace.bzl +++ b/third_party/ruy/workspace.bzl
@@ -12,4 +12,5 @@ "https://github.com/google/ruy/archive/54774a7a2cf85963777289193629d4bd42de4a59.zip", ], build_file = "//third_party/ruy:BUILD", + patch_file = "@tflite-micro//third_party/ruy:pthread.patch", )