sw/kelvin: clean up function definitions
Try to use a same function definition as in reference implementations. This makes codes cleaner and more expandable.
Change-Id: Ie79cad76340194297f7a7ed8812679993966e020
diff --git a/tflm/opt/elementwise_add_s16.cc b/tflm/opt/elementwise_add_s16.cc
index 106742b..bb445c3 100644
--- a/tflm/opt/elementwise_add_s16.cc
+++ b/tflm/opt/elementwise_add_s16.cc
@@ -20,16 +20,28 @@
namespace kelvin::opt {
-void ElementwiseAddS16(const int16_t* input1, const int16_t* input2,
- const int32_t input1_offset, const int32_t input1_mult,
- const int32_t input1_shift, const int32_t input2_offset,
- const int32_t input2_mult, const int32_t input2_shift,
- const int32_t left_shift, int16_t* output,
- const int32_t output_offset, const int32_t output_mult,
- const int32_t output_shift,
- const int32_t output_activation_min,
- const int32_t output_activation_max,
- const int32_t block_size) {
+void ElementwiseAddS16(const tflite::ArithmeticParams& params,
+ const tflite::RuntimeShape& input1_shape,
+ const int16_t* input1,
+ const tflite::RuntimeShape& input2_shape,
+ const int16_t* input2,
+ const tflite::RuntimeShape& output_shape,
+ int16_t* output) {
+ const int32_t input1_offset = params.input1_offset;
+ const int32_t input1_mult = params.input1_multiplier;
+ const int32_t input1_shift = params.input1_shift;
+ const int32_t input2_offset = params.input2_offset;
+ const int32_t input2_mult = params.input2_multiplier;
+ const int32_t input2_shift = params.input2_shift;
+ const int32_t left_shift = params.left_shift;
+ const int32_t output_offset = params.output_offset;
+ const int32_t output_mult = params.output_multiplier;
+ const int32_t output_shift = params.output_shift;
+ const int32_t output_activation_min = params.quantized_activation_min;
+ const int32_t output_activation_max = params.quantized_activation_max;
+ const int block_size =
+ MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
int blocks = block_size;
int vl;
getmaxvl_h(vl);
diff --git a/tflm/opt/elementwise_add_s32.cc b/tflm/opt/elementwise_add_s32.cc
index ab2b3d1..e83d1eb 100644
--- a/tflm/opt/elementwise_add_s32.cc
+++ b/tflm/opt/elementwise_add_s32.cc
@@ -18,10 +18,18 @@
#include "tflm/opt/opt.h"
namespace kelvin::opt {
-void ElementwiseAddS32(const int32_t* input1, const int32_t* input2,
- int32_t* output, const int32_t output_activation_min,
- const int32_t output_activation_max,
- const int32_t block_size) {
+void ElementwiseAddS32(const tflite::ArithmeticParams& params,
+ const tflite::RuntimeShape& input1_shape,
+ const int32_t* input1,
+ const tflite::RuntimeShape& input2_shape,
+ const int32_t* input2,
+ const tflite::RuntimeShape& output_shape,
+ int32_t* output) {
+ const int32_t output_activation_min = params.quantized_activation_min;
+ const int32_t output_activation_max = params.quantized_activation_max;
+ const int block_size =
+ MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
int blocks = block_size;
int vl;
getmaxvl_w_m(vl);
diff --git a/tflm/opt/elementwise_add_s8.cc b/tflm/opt/elementwise_add_s8.cc
index e664769..9d24449 100644
--- a/tflm/opt/elementwise_add_s8.cc
+++ b/tflm/opt/elementwise_add_s8.cc
@@ -20,16 +20,28 @@
namespace kelvin::opt {
-void ElementwiseAddS8(const int8_t* input1, const int8_t* input2,
- const int32_t input1_offset, const int32_t input1_mult,
- const int32_t input1_shift, const int32_t input2_offset,
- const int32_t input2_mult, const int32_t input2_shift,
- const int32_t left_shift, int8_t* output,
- const int32_t output_offset, const int32_t output_mult,
- const int32_t output_shift,
- const int32_t output_activation_min,
- const int32_t output_activation_max,
- const int32_t block_size) {
+void ElementwiseAddS8(const tflite::ArithmeticParams& params,
+ const tflite::RuntimeShape& input1_shape,
+ const int8_t* input1,
+ const tflite::RuntimeShape& input2_shape,
+ const int8_t* input2,
+ const tflite::RuntimeShape& output_shape,
+ int8_t* output) {
+ const int32_t input1_offset = params.input1_offset;
+ const int32_t input1_mult = params.input1_multiplier;
+ const int32_t input1_shift = params.input1_shift;
+ const int32_t input2_offset = params.input2_offset;
+ const int32_t input2_mult = params.input2_multiplier;
+ const int32_t input2_shift = params.input2_shift;
+ const int32_t left_shift = params.left_shift;
+ const int32_t output_offset = params.output_offset;
+ const int32_t output_mult = params.output_multiplier;
+ const int32_t output_shift = params.output_shift;
+ const int32_t output_activation_min = params.quantized_activation_min;
+ const int32_t output_activation_max = params.quantized_activation_max;
+ const int block_size =
+ MatchingElementsSize(input1_shape, input2_shape, output_shape);
+
int blocks = block_size;
const int32_t input1_shift_mul = 1 << LEFT_SHIFT(input1_shift);
diff --git a/tflm/opt/leaky_relu_s16.cc b/tflm/opt/leaky_relu_s16.cc
index 7427a6c..c750f84 100644
--- a/tflm/opt/leaky_relu_s16.cc
+++ b/tflm/opt/leaky_relu_s16.cc
@@ -21,13 +21,17 @@
#include "tflm/opt/util.h"
namespace kelvin::opt {
-void LeakyReluS16(const int16_t* input, int16_t* output,
- const int32_t block_size, const int32_t input_zero_point,
- const int32_t output_zero_point,
- const int32_t output_multiplier_alpha,
- const int32_t output_shift_alpha,
- const int32_t output_multiplier_identity,
- const int32_t output_shift_identity) {
+void LeakyReluS16(const tflite::LeakyReluParams ¶ms,
+ const tflite::RuntimeShape &input_shape, const int16_t *input,
+ const tflite::RuntimeShape &output_shape, int16_t *output) {
+ const int32_t input_zero_point = params.input_offset;
+ const int32_t output_zero_point = params.output_offset;
+ const int32_t output_multiplier_alpha = params.output_multiplier_alpha;
+ const int32_t output_shift_alpha = params.output_shift_alpha;
+ const int32_t output_multiplier_identity = params.output_multiplier_identity;
+ const int32_t output_shift_identity = params.output_shift_identity;
+ const int block_size = MatchingFlatSize(input_shape, output_shape);
+
constexpr int32_t quantized_output_min = std::numeric_limits<int16_t>::min();
constexpr int32_t quantized_output_max = std::numeric_limits<int16_t>::max();
int32_t right_shift_identity = std::min(output_shift_identity, 0L);
diff --git a/tflm/opt/leaky_relu_s8.cc b/tflm/opt/leaky_relu_s8.cc
index 8b30d19..8e43100 100644
--- a/tflm/opt/leaky_relu_s8.cc
+++ b/tflm/opt/leaky_relu_s8.cc
@@ -22,19 +22,24 @@
namespace kelvin::opt {
-void LeakyReluS8(const int8_t* input, int8_t* output, const int32_t block_size,
- const int32_t input_zero_point,
- const int32_t output_zero_point,
- const int32_t output_multiplier_alpha,
- const int32_t output_shift_alpha,
- const int32_t output_multiplier_identity,
- const int32_t output_shift_identity) {
+void LeakyReluS8(const tflite::LeakyReluParams& params,
+ const tflite::RuntimeShape& input_shape, const int8_t* input,
+ const tflite::RuntimeShape& output_shape, int8_t* output) {
+ const int32_t input_zero_point = params.input_offset;
+ const int32_t output_zero_point = params.output_offset;
+ const int32_t output_multiplier_alpha = params.output_multiplier_alpha;
+ const int32_t output_shift_alpha = params.output_shift_alpha;
+ const int32_t output_multiplier_identity = params.output_multiplier_identity;
+ const int32_t output_shift_identity = params.output_shift_identity;
+ const int block_size = MatchingFlatSize(input_shape, output_shape);
+
constexpr int32_t quantized_output_min = std::numeric_limits<int16_t>::min();
constexpr int32_t quantized_output_max = std::numeric_limits<int16_t>::max();
int32_t right_shift_identity = std::min(output_shift_identity, 0L);
int32_t left_shift_identity = std::max(output_shift_identity, 0L);
int32_t right_shift_alpha = std::min(output_shift_alpha, 0L);
int32_t left_shift_alpha = std::max(output_shift_alpha, 0L);
+
int blocks = block_size;
int vl;
getmaxvl_b(vl);
diff --git a/tflm/opt/opt.h b/tflm/opt/opt.h
index e5169b2..65d6262 100644
--- a/tflm/opt/opt.h
+++ b/tflm/opt/opt.h
@@ -25,44 +25,36 @@
namespace kelvin::opt {
void* Memcpy(void* dst, const void* src, size_t n);
-void ElementwiseAddS8(const int8_t* input1, const int8_t* input2,
- const int32_t input1_offset, const int32_t input1_mult,
- const int32_t input1_shift, const int32_t input2_offset,
- const int32_t input2_mult, const int32_t input2_shift,
- const int32_t left_shift, int8_t* output,
- const int32_t output_offset, const int32_t output_mult,
- const int32_t output_shift,
- const int32_t output_activation_min,
- const int32_t output_activation_max,
- const int32_t block_size);
-void ElementwiseAddS16(const int16_t* input1, const int16_t* input2,
- const int32_t input1_offset, const int32_t input1_mult,
- const int32_t input1_shift, const int32_t input2_offset,
- const int32_t input2_mult, const int32_t input2_shift,
- const int32_t left_shift, int16_t* output,
- const int32_t output_offset, const int32_t output_mult,
- const int32_t output_shift,
- const int32_t output_activation_min,
- const int32_t output_activation_max,
- const int32_t block_size);
-void ElementwiseAddS32(const int32_t* input1, const int32_t* input2,
- int32_t* output, const int32_t output_activation_min,
- const int32_t output_activation_max,
- const int32_t block_size);
-void LeakyReluS8(const int8_t* input, int8_t* output, const int32_t block_size,
- const int32_t input_zero_point,
- const int32_t output_zero_point,
- const int32_t output_multiplier_alpha,
- const int32_t output_shift_alpha,
- const int32_t output_multiplier_identity,
- const int32_t output_shift_identity);
-void LeakyReluS16(const int16_t* input, int16_t* output,
- const int32_t block_size, const int32_t input_zero_point,
- const int32_t output_zero_point,
- const int32_t output_multiplier_alpha,
- const int32_t output_shift_alpha,
- const int32_t output_multiplier_identity,
- const int32_t output_shift_identity);
+void ElementwiseAddS8(const tflite::ArithmeticParams& params,
+ const tflite::RuntimeShape& input1_shape,
+ const int8_t* input1_data,
+ const tflite::RuntimeShape& input2_shape,
+ const int8_t* input2_data,
+ const tflite::RuntimeShape& output_shape,
+ int8_t* output_data);
+void ElementwiseAddS16(const tflite::ArithmeticParams& params,
+ const tflite::RuntimeShape& input1_shape,
+ const int16_t* input1_data,
+ const tflite::RuntimeShape& input2_shape,
+ const int16_t* input2_data,
+ const tflite::RuntimeShape& output_shape,
+ int16_t* output_data);
+void ElementwiseAddS32(const tflite::ArithmeticParams& params,
+ const tflite::RuntimeShape& input1_shape,
+ const int32_t* input1_data,
+ const tflite::RuntimeShape& input2_shape,
+ const int32_t* input2_data,
+ const tflite::RuntimeShape& output_shape,
+ int32_t* output_data);
+void LeakyReluS8(const tflite::LeakyReluParams& params,
+ const tflite::RuntimeShape& input_shape,
+ const int8_t* input_data,
+ const tflite::RuntimeShape& output_shape, int8_t* output_data);
+void LeakyReluS16(const tflite::LeakyReluParams& params,
+ const tflite::RuntimeShape& input_shape,
+ const int16_t* input_data,
+ const tflite::RuntimeShape& output_shape,
+ int16_t* output_data);
void ConvS16B32(const tflite::ConvParams& params,
const int32_t* output_multiplier, const int32_t* output_shift,
const tflite::RuntimeShape& input_shape,
@@ -109,26 +101,12 @@
int32_t input_multiplier, int32_t input_left_shift,
int32_t input_size, const int8_t* input_data,
int8_t* output_data);
-void KelvinResizeNearestNeighbor(
+void ResizeNearestNeighborS8(
const tflite::ResizeNearestNeighborParams& op_params,
const tflite::RuntimeShape& unextended_input_shape,
const int8_t* input_data, const tflite::RuntimeShape& output_size_shape,
const int32_t* output_size_data,
const tflite::RuntimeShape& unextended_output_shape, int8_t* output_data);
-void KelvinResizeNN2x(const tflite::ResizeNearestNeighborParams& op_params,
- const tflite::RuntimeShape& input_shape,
- const tflite::RuntimeShape& output_shape,
- const int32_t input_height, const int32_t input_width,
- const int32_t output_height, const int32_t output_width,
- const int8_t* input_data, int8_t* output_data);
-void KelvinResizeNNGeneric(const tflite::ResizeNearestNeighborParams& op_params,
- const tflite::RuntimeShape& input_shape,
- const tflite::RuntimeShape& output_shape,
- const int32_t input_height,
- const int32_t input_width,
- const int32_t output_height,
- const int32_t output_width, const int8_t* input_data,
- int8_t* output_data);
} // namespace kelvin::opt
diff --git a/tflm/opt/resize_nearest_neighbor_s8.cc b/tflm/opt/resize_nearest_neighbor_s8.cc
index 1f5cafb..8da7ee9 100644
--- a/tflm/opt/resize_nearest_neighbor_s8.cc
+++ b/tflm/opt/resize_nearest_neighbor_s8.cc
@@ -21,12 +21,12 @@
#include "tensorflow/lite/kernels/internal/types.h"
#include "tflm/opt/opt.h"
-inline int32_t KelvinGetNearestNeighbor(const int input_value,
- const int32_t input_size,
- const int32_t output_size,
- const bool align_corners,
- const bool half_pixel_centers,
- const float scale, const float offset) {
+namespace kelvin::opt {
+namespace {
+int32_t GetNearestNeighbor(const int input_value, const int32_t input_size,
+ const int32_t output_size, const bool align_corners,
+ const bool half_pixel_centers, const float scale,
+ const float offset) {
int32_t output_value = std::min(
align_corners
? static_cast<int32_t>(
@@ -39,14 +39,12 @@
return output_value;
}
-namespace kelvin::opt {
-
-void KelvinResizeNN2x(const tflite::ResizeNearestNeighborParams& op_params,
- const tflite::RuntimeShape& input_shape,
- const tflite::RuntimeShape& output_shape,
- const int32_t input_height, const int32_t input_width,
- const int32_t output_height, const int32_t output_width,
- const int8_t* input_data, int8_t* output_data) {
+void ResizeNN2x(const tflite::ResizeNearestNeighborParams& op_params,
+ const tflite::RuntimeShape& input_shape,
+ const tflite::RuntimeShape& output_shape,
+ const int32_t input_height, const int32_t input_width,
+ const int32_t output_height, const int32_t output_width,
+ const int8_t* input_data, int8_t* output_data) {
int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
const int col_offset = input_shape.Dims(3);
@@ -54,7 +52,6 @@
const int batch_offset = input_shape.Dims(1) * row_offset;
const int8_t* input_ptr = input_data;
- const int8_t* input_tmp_ptr = input_data;
int8_t* output_ptr = output_data;
for (int b = 0; b < batches; ++b) {
@@ -83,14 +80,12 @@
}
}
-void KelvinResizeNNGeneric(const tflite::ResizeNearestNeighborParams& op_params,
- const tflite::RuntimeShape& input_shape,
- const tflite::RuntimeShape& output_shape,
- const int32_t input_height,
- const int32_t input_width,
- const int32_t output_height,
- const int32_t output_width, const int8_t* input_data,
- int8_t* output_data) {
+void ResizeNNGeneric(const tflite::ResizeNearestNeighborParams& op_params,
+ const tflite::RuntimeShape& input_shape,
+ const tflite::RuntimeShape& output_shape,
+ const int32_t input_height, const int32_t input_width,
+ const int32_t output_height, const int32_t output_width,
+ const int8_t* input_data, int8_t* output_data) {
int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
const int col_offset = input_shape.Dims(3);
@@ -113,12 +108,12 @@
for (int b = 0; b < batches; ++b) {
for (int y = 0; y < output_height; ++y) {
- int32_t in_y = KelvinGetNearestNeighbor(
+ int32_t in_y = GetNearestNeighbor(
y, input_height, output_height, op_params.align_corners,
op_params.half_pixel_centers, y_scale, offset);
const int8_t* y_input_ptr = input_ptr + in_y * row_offset;
for (int x = 0; x < output_width; ++x) {
- int32_t in_x = KelvinGetNearestNeighbor(
+ int32_t in_x = GetNearestNeighbor(
x, input_width, output_width, op_params.align_corners,
op_params.half_pixel_centers, x_scale, offset);
const int8_t* x_input_ptr = y_input_ptr + in_x * col_offset;
@@ -130,8 +125,9 @@
input_ptr += batch_offset;
}
}
+} // namespace
-void KelvinResizeNearestNeighbor(
+void ResizeNearestNeighborS8(
const tflite::ResizeNearestNeighborParams& op_params,
const tflite::RuntimeShape& unextended_input_shape,
const int8_t* input_data, const tflite::RuntimeShape& output_size_shape,
@@ -153,14 +149,13 @@
int32_t output_width = output_size_data[1];
if (output_height == 2 * input_height && output_width == 2 * input_width) {
- KelvinResizeNN2x(op_params, input_shape, output_shape, input_height,
- input_width, output_height, output_width, input_data,
- output_data);
+ ResizeNN2x(op_params, input_shape, output_shape, input_height, input_width,
+ output_height, output_width, input_data, output_data);
} else {
- KelvinResizeNNGeneric(op_params, input_shape, output_shape, input_height,
- input_width, output_height, output_width, input_data,
- output_data);
+ ResizeNNGeneric(op_params, input_shape, output_shape, input_height,
+ input_width, output_height, output_width, input_data,
+ output_data);
}
}