blob: 9a647a09cdb767e7b438ec474b6cd264661af9df [file] [log] [blame]
/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "tensorflow/lite/kernels/internal/reference/add.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
#include "tensorflow/lite/micro/kernels/add.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/micro_log.h"
#include "tflm/opt/opt.h"
namespace tflite {
namespace {
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpDataAdd));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
return AddPrepare(context, node);
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
const OpDataAdd* data = static_cast<const OpDataAdd*>(node->user_data);
const TfLiteEvalTensor* input1 =
tflite::micro::GetEvalInput(context, node, kAddInputTensor1);
const TfLiteEvalTensor* input2 =
tflite::micro::GetEvalInput(context, node, kAddInputTensor2);
TfLiteEvalTensor* output =
tflite::micro::GetEvalOutput(context, node, kAddOutputTensor);
if (output->type == kTfLiteFloat32) {
tflite::ArithmeticParams op_params;
SetActivationParams(data->output_activation_min_f32,
data->output_activation_max_f32, &op_params);
if (data->requires_broadcast) {
reference_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
} else {
reference_ops::Add(op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<float>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<float>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<float>(output));
}
} else if (output->type == kTfLiteInt32) {
tflite::ArithmeticParams op_params;
SetActivationParams(std::numeric_limits<int32_t>::lowest(),
std::numeric_limits<int32_t>::max(), &op_params);
if (data->requires_broadcast) {
reference_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int32_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int32_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int32_t>(output));
} else {
kelvin::opt::ElementwiseAddS32(
tflite::micro::GetTensorData<int32_t>(input1),
tflite::micro::GetTensorData<int32_t>(input2),
tflite::micro::GetTensorData<int32_t>(output),
op_params.quantized_activation_min,
op_params.quantized_activation_max,
MatchingElementsSize(tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorShape(output)));
}
return kTfLiteOk;
} else if (output->type == kTfLiteInt16) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
op_params.input1_offset = data->input1_offset;
op_params.input1_multiplier = data->input1_multiplier;
op_params.input1_shift = data->input1_shift;
op_params.input2_offset = data->input2_offset;
op_params.input2_multiplier = data->input2_multiplier;
op_params.input2_shift = data->input2_shift;
op_params.output_offset = data->output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (need_broadcast) {
reference_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int16_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int16_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
} else {
kelvin::opt::ElementwiseAddS16(
tflite::micro::GetTensorData<int16_t>(input1),
tflite::micro::GetTensorData<int16_t>(input2),
op_params.input1_offset, op_params.input1_multiplier,
op_params.input1_shift, op_params.input2_offset,
op_params.input2_multiplier, op_params.input2_shift,
op_params.left_shift, tflite::micro::GetTensorData<int16_t>(output),
op_params.output_offset, op_params.output_multiplier,
op_params.output_shift, op_params.quantized_activation_min,
op_params.quantized_activation_max,
MatchingElementsSize(tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorShape(output)));
}
return kTfLiteOk;
} else if (output->type == kTfLiteInt8) {
tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift;
op_params.input1_offset = data->input1_offset;
op_params.input1_multiplier = data->input1_multiplier;
op_params.input1_shift = data->input1_shift;
op_params.input2_offset = data->input2_offset;
op_params.input2_multiplier = data->input2_multiplier;
op_params.input2_shift = data->input2_shift;
op_params.output_offset = data->output_offset;
op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
bool need_broadcast = reference_ops::ProcessBroadcastShapes(
tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2), &op_params);
if (need_broadcast) {
reference_integer_ops::BroadcastAdd4DSlow(
op_params, tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorData<int8_t>(input2),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
} else {
kelvin::opt::ElementwiseAddS8(
tflite::micro::GetTensorData<int8_t>(input1),
tflite::micro::GetTensorData<int8_t>(input2), op_params.input1_offset,
op_params.input1_multiplier, op_params.input1_shift,
op_params.input2_offset, op_params.input2_multiplier,
op_params.input2_shift, op_params.left_shift,
tflite::micro::GetTensorData<int8_t>(output), op_params.output_offset,
op_params.output_multiplier, op_params.output_shift,
op_params.quantized_activation_min,
op_params.quantized_activation_max,
MatchingElementsSize(tflite::micro::GetTensorShape(input1),
tflite::micro::GetTensorShape(input2),
tflite::micro::GetTensorShape(output)));
}
} else {
MicroPrintf("Unsupported output type: %s", TfLiteTypeGetName(output->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace
TFLMRegistration Register_ADD() {
return tflite::micro::RegisterOp(Init, Prepare, Eval);
}
} // namespace tflite