sw/kelvin: clean up function definitions

Try to use a same function definition as in reference implementations. This makes codes cleaner and more expandable.

Change-Id: Ie79cad76340194297f7a7ed8812679993966e020
diff --git a/tflm/opt/elementwise_add_s16.cc b/tflm/opt/elementwise_add_s16.cc
index 106742b..bb445c3 100644
--- a/tflm/opt/elementwise_add_s16.cc
+++ b/tflm/opt/elementwise_add_s16.cc
@@ -20,16 +20,28 @@
 
 namespace kelvin::opt {
 
-void ElementwiseAddS16(const int16_t* input1, const int16_t* input2,
-                       const int32_t input1_offset, const int32_t input1_mult,
-                       const int32_t input1_shift, const int32_t input2_offset,
-                       const int32_t input2_mult, const int32_t input2_shift,
-                       const int32_t left_shift, int16_t* output,
-                       const int32_t output_offset, const int32_t output_mult,
-                       const int32_t output_shift,
-                       const int32_t output_activation_min,
-                       const int32_t output_activation_max,
-                       const int32_t block_size) {
+void ElementwiseAddS16(const tflite::ArithmeticParams& params,
+                       const tflite::RuntimeShape& input1_shape,
+                       const int16_t* input1,
+                       const tflite::RuntimeShape& input2_shape,
+                       const int16_t* input2,
+                       const tflite::RuntimeShape& output_shape,
+                       int16_t* output) {
+  const int32_t input1_offset = params.input1_offset;
+  const int32_t input1_mult = params.input1_multiplier;
+  const int32_t input1_shift = params.input1_shift;
+  const int32_t input2_offset = params.input2_offset;
+  const int32_t input2_mult = params.input2_multiplier;
+  const int32_t input2_shift = params.input2_shift;
+  const int32_t left_shift = params.left_shift;
+  const int32_t output_offset = params.output_offset;
+  const int32_t output_mult = params.output_multiplier;
+  const int32_t output_shift = params.output_shift;
+  const int32_t output_activation_min = params.quantized_activation_min;
+  const int32_t output_activation_max = params.quantized_activation_max;
+  const int block_size =
+      MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
   int blocks = block_size;
   int vl;
   getmaxvl_h(vl);
diff --git a/tflm/opt/elementwise_add_s32.cc b/tflm/opt/elementwise_add_s32.cc
index ab2b3d1..e83d1eb 100644
--- a/tflm/opt/elementwise_add_s32.cc
+++ b/tflm/opt/elementwise_add_s32.cc
@@ -18,10 +18,18 @@
 #include "tflm/opt/opt.h"
 
 namespace kelvin::opt {
-void ElementwiseAddS32(const int32_t* input1, const int32_t* input2,
-                       int32_t* output, const int32_t output_activation_min,
-                       const int32_t output_activation_max,
-                       const int32_t block_size) {
+void ElementwiseAddS32(const tflite::ArithmeticParams& params,
+                       const tflite::RuntimeShape& input1_shape,
+                       const int32_t* input1,
+                       const tflite::RuntimeShape& input2_shape,
+                       const int32_t* input2,
+                       const tflite::RuntimeShape& output_shape,
+                       int32_t* output) {
+  const int32_t output_activation_min = params.quantized_activation_min;
+  const int32_t output_activation_max = params.quantized_activation_max;
+  const int block_size =
+      MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
   int blocks = block_size;
   int vl;
   getmaxvl_w_m(vl);
diff --git a/tflm/opt/elementwise_add_s8.cc b/tflm/opt/elementwise_add_s8.cc
index e664769..9d24449 100644
--- a/tflm/opt/elementwise_add_s8.cc
+++ b/tflm/opt/elementwise_add_s8.cc
@@ -20,16 +20,28 @@
 
 namespace kelvin::opt {
 
-void ElementwiseAddS8(const int8_t* input1, const int8_t* input2,
-                      const int32_t input1_offset, const int32_t input1_mult,
-                      const int32_t input1_shift, const int32_t input2_offset,
-                      const int32_t input2_mult, const int32_t input2_shift,
-                      const int32_t left_shift, int8_t* output,
-                      const int32_t output_offset, const int32_t output_mult,
-                      const int32_t output_shift,
-                      const int32_t output_activation_min,
-                      const int32_t output_activation_max,
-                      const int32_t block_size) {
+void ElementwiseAddS8(const tflite::ArithmeticParams& params,
+                      const tflite::RuntimeShape& input1_shape,
+                      const int8_t* input1,
+                      const tflite::RuntimeShape& input2_shape,
+                      const int8_t* input2,
+                      const tflite::RuntimeShape& output_shape,
+                      int8_t* output) {
+  const int32_t input1_offset = params.input1_offset;
+  const int32_t input1_mult = params.input1_multiplier;
+  const int32_t input1_shift = params.input1_shift;
+  const int32_t input2_offset = params.input2_offset;
+  const int32_t input2_mult = params.input2_multiplier;
+  const int32_t input2_shift = params.input2_shift;
+  const int32_t left_shift = params.left_shift;
+  const int32_t output_offset = params.output_offset;
+  const int32_t output_mult = params.output_multiplier;
+  const int32_t output_shift = params.output_shift;
+  const int32_t output_activation_min = params.quantized_activation_min;
+  const int32_t output_activation_max = params.quantized_activation_max;
+  const int block_size =
+      MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
   int blocks = block_size;
 
   const int32_t input1_shift_mul = 1 << LEFT_SHIFT(input1_shift);
diff --git a/tflm/opt/leaky_relu_s16.cc b/tflm/opt/leaky_relu_s16.cc
index 7427a6c..c750f84 100644
--- a/tflm/opt/leaky_relu_s16.cc
+++ b/tflm/opt/leaky_relu_s16.cc
@@ -21,13 +21,17 @@
 #include "tflm/opt/util.h"
 
 namespace kelvin::opt {
-void LeakyReluS16(const int16_t* input, int16_t* output,
-                  const int32_t block_size, const int32_t input_zero_point,
-                  const int32_t output_zero_point,
-                  const int32_t output_multiplier_alpha,
-                  const int32_t output_shift_alpha,
-                  const int32_t output_multiplier_identity,
-                  const int32_t output_shift_identity) {
+void LeakyReluS16(const tflite::LeakyReluParams &params,
+                  const tflite::RuntimeShape &input_shape, const int16_t *input,
+                  const tflite::RuntimeShape &output_shape, int16_t *output) {
+  const int32_t input_zero_point = params.input_offset;
+  const int32_t output_zero_point = params.output_offset;
+  const int32_t output_multiplier_alpha = params.output_multiplier_alpha;
+  const int32_t output_shift_alpha = params.output_shift_alpha;
+  const int32_t output_multiplier_identity = params.output_multiplier_identity;
+  const int32_t output_shift_identity = params.output_shift_identity;
+  const int block_size = MatchingFlatSize(input_shape, output_shape);
+
   constexpr int32_t quantized_output_min = std::numeric_limits<int16_t>::min();
   constexpr int32_t quantized_output_max = std::numeric_limits<int16_t>::max();
   int32_t right_shift_identity = std::min(output_shift_identity, 0L);
diff --git a/tflm/opt/leaky_relu_s8.cc b/tflm/opt/leaky_relu_s8.cc
index 8b30d19..8e43100 100644
--- a/tflm/opt/leaky_relu_s8.cc
+++ b/tflm/opt/leaky_relu_s8.cc
@@ -22,19 +22,24 @@
 
 namespace kelvin::opt {
 
-void LeakyReluS8(const int8_t* input, int8_t* output, const int32_t block_size,
-                 const int32_t input_zero_point,
-                 const int32_t output_zero_point,
-                 const int32_t output_multiplier_alpha,
-                 const int32_t output_shift_alpha,
-                 const int32_t output_multiplier_identity,
-                 const int32_t output_shift_identity) {
+void LeakyReluS8(const tflite::LeakyReluParams& params,
+                 const tflite::RuntimeShape& input_shape, const int8_t* input,
+                 const tflite::RuntimeShape& output_shape, int8_t* output) {
+  const int32_t input_zero_point = params.input_offset;
+  const int32_t output_zero_point = params.output_offset;
+  const int32_t output_multiplier_alpha = params.output_multiplier_alpha;
+  const int32_t output_shift_alpha = params.output_shift_alpha;
+  const int32_t output_multiplier_identity = params.output_multiplier_identity;
+  const int32_t output_shift_identity = params.output_shift_identity;
+  const int block_size = MatchingFlatSize(input_shape, output_shape);
+
   constexpr int32_t quantized_output_min = std::numeric_limits<int16_t>::min();
   constexpr int32_t quantized_output_max = std::numeric_limits<int16_t>::max();
   int32_t right_shift_identity = std::min(output_shift_identity, 0L);
   int32_t left_shift_identity = std::max(output_shift_identity, 0L);
   int32_t right_shift_alpha = std::min(output_shift_alpha, 0L);
   int32_t left_shift_alpha = std::max(output_shift_alpha, 0L);
+
   int blocks = block_size;
   int vl;
   getmaxvl_b(vl);
diff --git a/tflm/opt/opt.h b/tflm/opt/opt.h
index e5169b2..65d6262 100644
--- a/tflm/opt/opt.h
+++ b/tflm/opt/opt.h
@@ -25,44 +25,36 @@
 
 namespace kelvin::opt {
 void* Memcpy(void* dst, const void* src, size_t n);
-void ElementwiseAddS8(const int8_t* input1, const int8_t* input2,
-                      const int32_t input1_offset, const int32_t input1_mult,
-                      const int32_t input1_shift, const int32_t input2_offset,
-                      const int32_t input2_mult, const int32_t input2_shift,
-                      const int32_t left_shift, int8_t* output,
-                      const int32_t output_offset, const int32_t output_mult,
-                      const int32_t output_shift,
-                      const int32_t output_activation_min,
-                      const int32_t output_activation_max,
-                      const int32_t block_size);
-void ElementwiseAddS16(const int16_t* input1, const int16_t* input2,
-                       const int32_t input1_offset, const int32_t input1_mult,
-                       const int32_t input1_shift, const int32_t input2_offset,
-                       const int32_t input2_mult, const int32_t input2_shift,
-                       const int32_t left_shift, int16_t* output,
-                       const int32_t output_offset, const int32_t output_mult,
-                       const int32_t output_shift,
-                       const int32_t output_activation_min,
-                       const int32_t output_activation_max,
-                       const int32_t block_size);
-void ElementwiseAddS32(const int32_t* input1, const int32_t* input2,
-                       int32_t* output, const int32_t output_activation_min,
-                       const int32_t output_activation_max,
-                       const int32_t block_size);
-void LeakyReluS8(const int8_t* input, int8_t* output, const int32_t block_size,
-                 const int32_t input_zero_point,
-                 const int32_t output_zero_point,
-                 const int32_t output_multiplier_alpha,
-                 const int32_t output_shift_alpha,
-                 const int32_t output_multiplier_identity,
-                 const int32_t output_shift_identity);
-void LeakyReluS16(const int16_t* input, int16_t* output,
-                  const int32_t block_size, const int32_t input_zero_point,
-                  const int32_t output_zero_point,
-                  const int32_t output_multiplier_alpha,
-                  const int32_t output_shift_alpha,
-                  const int32_t output_multiplier_identity,
-                  const int32_t output_shift_identity);
+void ElementwiseAddS8(const tflite::ArithmeticParams& params,
+                      const tflite::RuntimeShape& input1_shape,
+                      const int8_t* input1_data,
+                      const tflite::RuntimeShape& input2_shape,
+                      const int8_t* input2_data,
+                      const tflite::RuntimeShape& output_shape,
+                      int8_t* output_data);
+void ElementwiseAddS16(const tflite::ArithmeticParams& params,
+                       const tflite::RuntimeShape& input1_shape,
+                       const int16_t* input1_data,
+                       const tflite::RuntimeShape& input2_shape,
+                       const int16_t* input2_data,
+                       const tflite::RuntimeShape& output_shape,
+                       int16_t* output_data);
+void ElementwiseAddS32(const tflite::ArithmeticParams& params,
+                       const tflite::RuntimeShape& input1_shape,
+                       const int32_t* input1_data,
+                       const tflite::RuntimeShape& input2_shape,
+                       const int32_t* input2_data,
+                       const tflite::RuntimeShape& output_shape,
+                       int32_t* output_data);
+void LeakyReluS8(const tflite::LeakyReluParams& params,
+                 const tflite::RuntimeShape& input_shape,
+                 const int8_t* input_data,
+                 const tflite::RuntimeShape& output_shape, int8_t* output_data);
+void LeakyReluS16(const tflite::LeakyReluParams& params,
+                  const tflite::RuntimeShape& input_shape,
+                  const int16_t* input_data,
+                  const tflite::RuntimeShape& output_shape,
+                  int16_t* output_data);
 void ConvS16B32(const tflite::ConvParams& params,
                 const int32_t* output_multiplier, const int32_t* output_shift,
                 const tflite::RuntimeShape& input_shape,
@@ -109,26 +101,12 @@
                 int32_t input_multiplier, int32_t input_left_shift,
                 int32_t input_size, const int8_t* input_data,
                 int8_t* output_data);
-void KelvinResizeNearestNeighbor(
+void ResizeNearestNeighborS8(
     const tflite::ResizeNearestNeighborParams& op_params,
     const tflite::RuntimeShape& unextended_input_shape,
     const int8_t* input_data, const tflite::RuntimeShape& output_size_shape,
     const int32_t* output_size_data,
     const tflite::RuntimeShape& unextended_output_shape, int8_t* output_data);
-void KelvinResizeNN2x(const tflite::ResizeNearestNeighborParams& op_params,
-                      const tflite::RuntimeShape& input_shape,
-                      const tflite::RuntimeShape& output_shape,
-                      const int32_t input_height, const int32_t input_width,
-                      const int32_t output_height, const int32_t output_width,
-                      const int8_t* input_data, int8_t* output_data);
-void KelvinResizeNNGeneric(const tflite::ResizeNearestNeighborParams& op_params,
-                           const tflite::RuntimeShape& input_shape,
-                           const tflite::RuntimeShape& output_shape,
-                           const int32_t input_height,
-                           const int32_t input_width,
-                           const int32_t output_height,
-                           const int32_t output_width, const int8_t* input_data,
-                           int8_t* output_data);
 
 }  // namespace kelvin::opt
 
diff --git a/tflm/opt/resize_nearest_neighbor_s8.cc b/tflm/opt/resize_nearest_neighbor_s8.cc
index 1f5cafb..8da7ee9 100644
--- a/tflm/opt/resize_nearest_neighbor_s8.cc
+++ b/tflm/opt/resize_nearest_neighbor_s8.cc
@@ -21,12 +21,12 @@
 #include "tensorflow/lite/kernels/internal/types.h"
 #include "tflm/opt/opt.h"
 
-inline int32_t KelvinGetNearestNeighbor(const int input_value,
-                                        const int32_t input_size,
-                                        const int32_t output_size,
-                                        const bool align_corners,
-                                        const bool half_pixel_centers,
-                                        const float scale, const float offset) {
+namespace kelvin::opt {
+namespace {
+int32_t GetNearestNeighbor(const int input_value, const int32_t input_size,
+                           const int32_t output_size, const bool align_corners,
+                           const bool half_pixel_centers, const float scale,
+                           const float offset) {
   int32_t output_value = std::min(
       align_corners
           ? static_cast<int32_t>(
@@ -39,14 +39,12 @@
   return output_value;
 }
 
-namespace kelvin::opt {
-
-void KelvinResizeNN2x(const tflite::ResizeNearestNeighborParams& op_params,
-                      const tflite::RuntimeShape& input_shape,
-                      const tflite::RuntimeShape& output_shape,
-                      const int32_t input_height, const int32_t input_width,
-                      const int32_t output_height, const int32_t output_width,
-                      const int8_t* input_data, int8_t* output_data) {
+void ResizeNN2x(const tflite::ResizeNearestNeighborParams& op_params,
+                const tflite::RuntimeShape& input_shape,
+                const tflite::RuntimeShape& output_shape,
+                const int32_t input_height, const int32_t input_width,
+                const int32_t output_height, const int32_t output_width,
+                const int8_t* input_data, int8_t* output_data) {
   int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
   int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
   const int col_offset = input_shape.Dims(3);
@@ -54,7 +52,6 @@
   const int batch_offset = input_shape.Dims(1) * row_offset;
 
   const int8_t* input_ptr = input_data;
-  const int8_t* input_tmp_ptr = input_data;
   int8_t* output_ptr = output_data;
 
   for (int b = 0; b < batches; ++b) {
@@ -83,14 +80,12 @@
   }
 }
 
-void KelvinResizeNNGeneric(const tflite::ResizeNearestNeighborParams& op_params,
-                           const tflite::RuntimeShape& input_shape,
-                           const tflite::RuntimeShape& output_shape,
-                           const int32_t input_height,
-                           const int32_t input_width,
-                           const int32_t output_height,
-                           const int32_t output_width, const int8_t* input_data,
-                           int8_t* output_data) {
+void ResizeNNGeneric(const tflite::ResizeNearestNeighborParams& op_params,
+                     const tflite::RuntimeShape& input_shape,
+                     const tflite::RuntimeShape& output_shape,
+                     const int32_t input_height, const int32_t input_width,
+                     const int32_t output_height, const int32_t output_width,
+                     const int8_t* input_data, int8_t* output_data) {
   int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
   int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
   const int col_offset = input_shape.Dims(3);
@@ -113,12 +108,12 @@
 
   for (int b = 0; b < batches; ++b) {
     for (int y = 0; y < output_height; ++y) {
-      int32_t in_y = KelvinGetNearestNeighbor(
+      int32_t in_y = GetNearestNeighbor(
           y, input_height, output_height, op_params.align_corners,
           op_params.half_pixel_centers, y_scale, offset);
       const int8_t* y_input_ptr = input_ptr + in_y * row_offset;
       for (int x = 0; x < output_width; ++x) {
-        int32_t in_x = KelvinGetNearestNeighbor(
+        int32_t in_x = GetNearestNeighbor(
             x, input_width, output_width, op_params.align_corners,
             op_params.half_pixel_centers, x_scale, offset);
         const int8_t* x_input_ptr = y_input_ptr + in_x * col_offset;
@@ -130,8 +125,9 @@
     input_ptr += batch_offset;
   }
 }
+}  // namespace
 
-void KelvinResizeNearestNeighbor(
+void ResizeNearestNeighborS8(
     const tflite::ResizeNearestNeighborParams& op_params,
     const tflite::RuntimeShape& unextended_input_shape,
     const int8_t* input_data, const tflite::RuntimeShape& output_size_shape,
@@ -153,14 +149,13 @@
   int32_t output_width = output_size_data[1];
 
   if (output_height == 2 * input_height && output_width == 2 * input_width) {
-    KelvinResizeNN2x(op_params, input_shape, output_shape, input_height,
-                     input_width, output_height, output_width, input_data,
-                     output_data);
+    ResizeNN2x(op_params, input_shape, output_shape, input_height, input_width,
+               output_height, output_width, input_data, output_data);
 
   } else {
-    KelvinResizeNNGeneric(op_params, input_shape, output_shape, input_height,
-                          input_width, output_height, output_width, input_data,
-                          output_data);
+    ResizeNNGeneric(op_params, input_shape, output_shape, input_height,
+                    input_width, output_height, output_width, input_data,
+                    output_data);
   }
 }