tflite-micro: add elementwise multiplication kernel for Kelvin
Change-Id: I14946324e5a460f0c345b8c5528f577310ee532f
diff --git a/tensorflow/lite/micro/kernels/kelvin/mul.cc b/tensorflow/lite/micro/kernels/kelvin/mul.cc
new file mode 100644
index 0000000..e006e9b
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/kelvin/mul.cc
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/mul.h"
+#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/mul.h"
+#include "tensorflow/lite/micro/memory_helpers.h"
+#include "tensorflow/lite/micro/micro_log.h"
+#include "tflm/opt/opt.h"
+
+namespace tflite {
+
+TfLiteStatus MulEval(TfLiteContext* context, TfLiteNode* node) {
+ TFLITE_DCHECK(node->builtin_data != nullptr);
+ auto* params = reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
+
+ TFLITE_DCHECK(node->user_data != nullptr);
+ const OpDataMul* data = static_cast<const OpDataMul*>(node->user_data);
+
+ const TfLiteEvalTensor* input1 =
+ tflite::micro::GetEvalInput(context, node, kMulInput1Tensor);
+ const TfLiteEvalTensor* input2 =
+ tflite::micro::GetEvalInput(context, node, kMulInput2Tensor);
+ TfLiteEvalTensor* output =
+ tflite::micro::GetEvalOutput(context, node, kMulOutputTensor);
+
+ if (output->type == kTfLiteFloat32) {
+ EvalMulFloatReference(context, node, params, data, input1, input2, output);
+ } else if (output->type == kTfLiteInt32) {
+ EvalMulQuantizedReference(context, node, data, input1, input2, output);
+ } else if (output->type == kTfLiteInt16) {
+ tflite::ArithmeticParams op_params = {};
+ op_params.quantized_activation_min = data->output_activation_min;
+ op_params.quantized_activation_max = data->output_activation_max;
+ op_params.input1_offset = -data->input1_zero_point;
+ op_params.input2_offset = -data->input2_zero_point;
+ op_params.output_offset = data->output_zero_point;
+ op_params.output_multiplier = data->output_multiplier;
+ op_params.output_shift = data->output_shift;
+
+ bool need_broadcast = reference_ops::ProcessBroadcastShapes(
+ tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorShape(input2), &op_params);
+
+ if (need_broadcast) {
+ reference_integer_ops::BroadcastMul4DSlow(
+ op_params, tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorData<int16_t>(input1),
+ tflite::micro::GetTensorShape(input2),
+ tflite::micro::GetTensorData<int16_t>(input2),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int16_t>(output));
+ } else {
+ kelvin::opt::MulS16(op_params, tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorData<int16_t>(input1),
+ tflite::micro::GetTensorShape(input2),
+ tflite::micro::GetTensorData<int16_t>(input2),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int16_t>(output));
+ }
+ } else if (output->type == kTfLiteInt8) {
+ tflite::ArithmeticParams op_params = {};
+ op_params.quantized_activation_min = data->output_activation_min;
+ op_params.quantized_activation_max = data->output_activation_max;
+ op_params.input1_offset = -data->input1_zero_point;
+ op_params.input2_offset = -data->input2_zero_point;
+ op_params.output_offset = data->output_zero_point;
+ op_params.output_multiplier = data->output_multiplier;
+ op_params.output_shift = data->output_shift;
+
+ bool need_broadcast = reference_ops::ProcessBroadcastShapes(
+ tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorShape(input2), &op_params);
+ if (need_broadcast) {
+ reference_integer_ops::BroadcastMul4DSlow(
+ op_params, tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorData<int8_t>(input1),
+ tflite::micro::GetTensorShape(input2),
+ tflite::micro::GetTensorData<int8_t>(input2),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int8_t>(output));
+ } else {
+ kelvin::opt::MulS8(op_params, tflite::micro::GetTensorShape(input1),
+ tflite::micro::GetTensorData<int8_t>(input1),
+ tflite::micro::GetTensorShape(input2),
+ tflite::micro::GetTensorData<int8_t>(input2),
+ tflite::micro::GetTensorShape(output),
+ tflite::micro::GetTensorData<int8_t>(output));
+ }
+ } else {
+ MicroPrintf("Unsupported output type: %s", TfLiteTypeGetName(output->type));
+ return kTfLiteError;
+ }
+
+ return kTfLiteOk;
+}
+
+TFLMRegistration Register_MUL() {
+ return tflite::micro::RegisterOp(MulInit, MulPrepare, MulEval);
+}
+
+} // namespace tflite