Kelvin Resize nearest neighbors for int8
* Specialized for scale == 2 and depth%32 == 0
* verified ( mismatch = 0)
Change-Id: Ibff24f419ef5e01eaf9ed4f8670ff91af2794c56
diff --git a/tests/tflm/BUILD b/tests/tflm/BUILD
index 33a91a9..183451f 100644
--- a/tests/tflm/BUILD
+++ b/tests/tflm/BUILD
@@ -134,3 +134,20 @@
"@tflite-micro//tensorflow/lite/micro/testing:micro_test",
],
)
+
+kelvin_test(
+ name = "resize_nearest_neighbor_test",
+ srcs = [
+ "@tflite-micro//tensorflow/lite/micro/kernels:resize_nearest_neighbor_test.cc",
+ ],
+ hw_test_size = "large",
+ deps = [
+ "//crt",
+ "@tflite-micro//tensorflow/lite/c:common",
+ "@tflite-micro//tensorflow/lite/kernels/internal:tensor",
+ "@tflite-micro//tensorflow/lite/micro:micro_utils",
+ "@tflite-micro//tensorflow/lite/micro:test_helpers",
+ "@tflite-micro//tensorflow/lite/micro/kernels:kernel_runner",
+ "@tflite-micro//tensorflow/lite/micro/testing:micro_test",
+ ],
+)
diff --git a/tflm/opt/BUILD b/tflm/opt/BUILD
index a23b7df..b9c0d76 100644
--- a/tflm/opt/BUILD
+++ b/tflm/opt/BUILD
@@ -35,6 +35,7 @@
"logistic_s8.cc",
"max_pool_s8.cc",
"memcpy.cc",
+ "resize_nearest_neighbor_s8.cc",
],
hdrs = [
"conv_s8.h",
diff --git a/tflm/opt/opt.h b/tflm/opt/opt.h
index 4fb9c51..e5169b2 100644
--- a/tflm/opt/opt.h
+++ b/tflm/opt/opt.h
@@ -109,6 +109,26 @@
int32_t input_multiplier, int32_t input_left_shift,
int32_t input_size, const int8_t* input_data,
int8_t* output_data);
+void KelvinResizeNearestNeighbor(
+ const tflite::ResizeNearestNeighborParams& op_params,
+ const tflite::RuntimeShape& unextended_input_shape,
+ const int8_t* input_data, const tflite::RuntimeShape& output_size_shape,
+ const int32_t* output_size_data,
+ const tflite::RuntimeShape& unextended_output_shape, int8_t* output_data);
+void KelvinResizeNN2x(const tflite::ResizeNearestNeighborParams& op_params,
+ const tflite::RuntimeShape& input_shape,
+ const tflite::RuntimeShape& output_shape,
+ const int32_t input_height, const int32_t input_width,
+ const int32_t output_height, const int32_t output_width,
+ const int8_t* input_data, int8_t* output_data);
+void KelvinResizeNNGeneric(const tflite::ResizeNearestNeighborParams& op_params,
+ const tflite::RuntimeShape& input_shape,
+ const tflite::RuntimeShape& output_shape,
+ const int32_t input_height,
+ const int32_t input_width,
+ const int32_t output_height,
+ const int32_t output_width, const int8_t* input_data,
+ int8_t* output_data);
} // namespace kelvin::opt
diff --git a/tflm/opt/resize_nearest_neighbor_s8.cc b/tflm/opt/resize_nearest_neighbor_s8.cc
new file mode 100644
index 0000000..1f5cafb
--- /dev/null
+++ b/tflm/opt/resize_nearest_neighbor_s8.cc
@@ -0,0 +1,167 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <algorithm>
+#include <cmath>
+
+#include "crt/kelvin.h"
+#include "tensorflow/lite/kernels/internal/cppmath.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+#include "tflm/opt/opt.h"
+
+inline int32_t KelvinGetNearestNeighbor(const int input_value,
+ const int32_t input_size,
+ const int32_t output_size,
+ const bool align_corners,
+ const bool half_pixel_centers,
+ const float scale, const float offset) {
+ int32_t output_value = std::min(
+ align_corners
+ ? static_cast<int32_t>(
+ tflite::TfLiteRound((input_value + offset) * scale))
+ : static_cast<int32_t>(std::floor((input_value + offset) * scale)),
+ input_size - 1);
+ if (half_pixel_centers) {
+ output_value = std::max(static_cast<int32_t>(0), output_value);
+ }
+ return output_value;
+}
+
+namespace kelvin::opt {
+
+void KelvinResizeNN2x(const tflite::ResizeNearestNeighborParams& op_params,
+ const tflite::RuntimeShape& input_shape,
+ const tflite::RuntimeShape& output_shape,
+ const int32_t input_height, const int32_t input_width,
+ const int32_t output_height, const int32_t output_width,
+ const int8_t* input_data, int8_t* output_data) {
+ int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
+ int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
+ const int col_offset = input_shape.Dims(3);
+ const int row_offset = input_shape.Dims(2) * col_offset;
+ const int batch_offset = input_shape.Dims(1) * row_offset;
+
+ const int8_t* input_ptr = input_data;
+ const int8_t* input_tmp_ptr = input_data;
+ int8_t* output_ptr = output_data;
+
+ for (int b = 0; b < batches; ++b) {
+ for (int y = 0; y < input_height; ++y) {
+ const int8_t* input_row_ptr = input_ptr + y * input_width * depth;
+ int8_t* output_row_ptr0 = output_ptr + 2 * y * output_width * depth;
+ int8_t* output_row_ptr1 = output_row_ptr0 + output_width * depth;
+
+ for (int x = 0; x < input_width; ++x) {
+ int channel = 0;
+ const int8_t* input_col_ptr = input_row_ptr + x * depth;
+ int8_t* output_col_ptr0 = output_row_ptr0 + 2 * x * depth;
+ int8_t* output_col_ptr1 = output_row_ptr1 + 2 * x * depth;
+
+ while (channel < depth) {
+ vld_b_x(v0, input_col_ptr + channel);
+ vst_b_x(v0, output_col_ptr0 + channel);
+ vst_b_x(v0, output_col_ptr0 + depth + channel);
+ vst_b_x(v0, output_col_ptr1 + channel);
+ vst_b_x(v0, output_col_ptr1 + depth + channel);
+ channel += 32;
+ }
+ }
+ }
+ input_ptr += batch_offset;
+ }
+}
+
+void KelvinResizeNNGeneric(const tflite::ResizeNearestNeighborParams& op_params,
+ const tflite::RuntimeShape& input_shape,
+ const tflite::RuntimeShape& output_shape,
+ const int32_t input_height,
+ const int32_t input_width,
+ const int32_t output_height,
+ const int32_t output_width, const int8_t* input_data,
+ int8_t* output_data) {
+ int32_t batches = MatchingDim(input_shape, 0, output_shape, 0);
+ int32_t depth = MatchingDim(input_shape, 3, output_shape, 3);
+ const int col_offset = input_shape.Dims(3);
+ const int row_offset = input_shape.Dims(2) * col_offset;
+ const int batch_offset = input_shape.Dims(1) * row_offset;
+
+ const int8_t* input_ptr = input_data;
+ int8_t* output_ptr = output_data;
+
+ const float y_scale =
+ (op_params.align_corners && output_height > 1)
+ ? (input_height - 1) / static_cast<float>(output_height - 1)
+ : input_height / static_cast<float>(output_height);
+ const float offset = op_params.half_pixel_centers ? 0.5f : 0.0f;
+
+ const float x_scale =
+ (op_params.align_corners && output_width > 1)
+ ? (input_width - 1) / static_cast<float>(output_width - 1)
+ : input_width / static_cast<float>(output_width);
+
+ for (int b = 0; b < batches; ++b) {
+ for (int y = 0; y < output_height; ++y) {
+ int32_t in_y = KelvinGetNearestNeighbor(
+ y, input_height, output_height, op_params.align_corners,
+ op_params.half_pixel_centers, y_scale, offset);
+ const int8_t* y_input_ptr = input_ptr + in_y * row_offset;
+ for (int x = 0; x < output_width; ++x) {
+ int32_t in_x = KelvinGetNearestNeighbor(
+ x, input_width, output_width, op_params.align_corners,
+ op_params.half_pixel_centers, x_scale, offset);
+ const int8_t* x_input_ptr = y_input_ptr + in_x * col_offset;
+ kelvin::opt::Memcpy(output_ptr, x_input_ptr, depth * sizeof(int8_t));
+
+ output_ptr += depth;
+ }
+ }
+ input_ptr += batch_offset;
+ }
+}
+
+void KelvinResizeNearestNeighbor(
+ const tflite::ResizeNearestNeighborParams& op_params,
+ const tflite::RuntimeShape& unextended_input_shape,
+ const int8_t* input_data, const tflite::RuntimeShape& output_size_shape,
+ const int32_t* output_size_data,
+ const tflite::RuntimeShape& unextended_output_shape, int8_t* output_data) {
+ TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
+ TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+
+ const tflite::RuntimeShape input_shape =
+ tflite::RuntimeShape::ExtendedShape(4, unextended_input_shape);
+ const tflite::RuntimeShape output_shape =
+ tflite::RuntimeShape::ExtendedShape(4, unextended_output_shape);
+
+ int32_t input_height = input_shape.Dims(1);
+ int32_t input_width = input_shape.Dims(2);
+
+ TFLITE_DCHECK_EQ(output_size_shape.FlatSize(), 2);
+ int32_t output_height = output_size_data[0];
+ int32_t output_width = output_size_data[1];
+
+ if (output_height == 2 * input_height && output_width == 2 * input_width) {
+ KelvinResizeNN2x(op_params, input_shape, output_shape, input_height,
+ input_width, output_height, output_width, input_data,
+ output_data);
+
+ } else {
+ KelvinResizeNNGeneric(op_params, input_shape, output_shape, input_height,
+ input_width, output_height, output_width, input_data,
+ output_data);
+ }
+}
+
+} // namespace kelvin::opt