Support functions for LeakyRelu
- Add int8 and int16 helper methods for implementing a LeakyRelu kernel.
Change-Id: Id017f614fa6638273eb63f50e1196ca6da44ca51
diff --git a/tests/tflm/BUILD b/tests/tflm/BUILD
index 71f70ab..05c196b 100644
--- a/tests/tflm/BUILD
+++ b/tests/tflm/BUILD
@@ -1,4 +1,5 @@
load("//build_tools/bazel:kelvin.bzl", "kelvin_test")
+
package(default_visibility = ["//visibility:public"])
kelvin_test(
@@ -10,10 +11,26 @@
"//crt:crt_header",
"@tflite-micro//tensorflow/lite/c:common",
"@tflite-micro//tensorflow/lite/kernels/internal:tensor",
- "@tflite-micro//tensorflow/lite/micro/kernels:kernel_runner",
- "@tflite-micro//tensorflow/lite/micro/testing:micro_test",
"@tflite-micro//tensorflow/lite/micro:micro_utils",
"@tflite-micro//tensorflow/lite/micro:test_helpers",
+ "@tflite-micro//tensorflow/lite/micro/kernels:kernel_runner",
+ "@tflite-micro//tensorflow/lite/micro/testing:micro_test",
+ ],
+)
+
+kelvin_test(
+ name = "leaky_relu_test",
+ srcs = [
+ "@tflite-micro//tensorflow/lite/micro/kernels:leaky_relu_test.cc",
+ ],
+ deps = [
+ "//crt:crt_header",
+ "@tflite-micro//tensorflow/lite/c:common",
+ "@tflite-micro//tensorflow/lite/kernels/internal:tensor",
+ "@tflite-micro//tensorflow/lite/micro:micro_utils",
+ "@tflite-micro//tensorflow/lite/micro:test_helpers",
+ "@tflite-micro//tensorflow/lite/micro/kernels:kernel_runner",
+ "@tflite-micro//tensorflow/lite/micro/testing:micro_test",
],
)
@@ -26,9 +43,9 @@
"//crt:crt_header",
"@tflite-micro//tensorflow/lite/c:common",
"@tflite-micro//tensorflow/lite/kernels/internal:tensor",
- "@tflite-micro//tensorflow/lite/micro/kernels:kernel_runner",
- "@tflite-micro//tensorflow/lite/micro/testing:micro_test",
"@tflite-micro//tensorflow/lite/micro:micro_utils",
"@tflite-micro//tensorflow/lite/micro:test_helpers",
+ "@tflite-micro//tensorflow/lite/micro/kernels:kernel_runner",
+ "@tflite-micro//tensorflow/lite/micro/testing:micro_test",
],
)
diff --git a/tflm/opt/BUILD b/tflm/opt/BUILD
index 2ba3900..8da2593 100644
--- a/tflm/opt/BUILD
+++ b/tflm/opt/BUILD
@@ -3,18 +3,20 @@
cc_library(
name = "opt",
srcs = [
- "elementwise_add_s8.cc",
"elementwise_add_s16.cc",
"elementwise_add_s32.cc",
+ "elementwise_add_s8.cc",
+ "leaky_relu_s16.cc",
+ "leaky_relu_s8.cc",
"memcpy.cc",
],
hdrs = [
"opt.h",
"util.h",
],
+ target_compatible_with = ["@kelvin_sw//platforms/cpu:kelvin"],
deps = [
"//crt:crt_header",
],
alwayslink = True,
- target_compatible_with = ["@kelvin_sw//platforms/cpu:kelvin"],
)
diff --git a/tflm/opt/leaky_relu_s16.cc b/tflm/opt/leaky_relu_s16.cc
new file mode 100644
index 0000000..e3ac66d
--- /dev/null
+++ b/tflm/opt/leaky_relu_s16.cc
@@ -0,0 +1,73 @@
+// Copyright 2023 Google LLC
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+#include <limits>
+
+#include "crt/kelvin.h"
+#include "tflm/opt/opt.h"
+#include "tflm/opt/util.h"
+
+namespace kelvin::opt {
+void leaky_relu_s16(const int16_t* input, int16_t* output,
+ const int32_t block_size, const int32_t input_zero_point,
+ const int32_t output_zero_point,
+ const int32_t output_multiplier_alpha,
+ const int32_t output_shift_alpha,
+ const int32_t output_multiplier_identity,
+ const int32_t output_shift_identity) {
+ constexpr int32_t quantized_output_min = std::numeric_limits<int16_t>::min();
+ constexpr int32_t quantized_output_max = std::numeric_limits<int16_t>::max();
+ int32_t right_shift_identity = std::min(output_shift_identity, 0L);
+ int32_t left_shift_identity = std::max(output_shift_identity, 0L);
+ int32_t right_shift_alpha = std::min(output_shift_alpha, 0L);
+ int32_t left_shift_alpha = std::max(output_shift_alpha, 0L);
+ int blocks = block_size;
+ int vl;
+ getmaxvl_h(vl);
+ while (blocks) {
+ int count = std::min(blocks, vl);
+
+ // Load data from the input, and widen.
+ vld_h_lp_xx(v0, input, count);
+ vaddw_w_vx(v0, v0, 0);
+
+ // Subtract out the provided offset from the inputs.
+ vsub_w_vx_m(vm0, vm0, input_zero_point);
+
+ // Compute the Relu on all inputs, as if they were >=0.
+ vsll_w_vx_m(vm2, vm0, left_shift_identity);
+ vdmulh_w_r_vx_m(vm2, vm2, output_multiplier_identity);
+ vsha_w_vx_m(vm2, vm2, RIGHT_SHIFT(right_shift_identity));
+ vadd_w_vx_m(vm2, vm2, output_zero_point);
+ vmax_w_vx_m(vm2, vm2, quantized_output_min);
+ vmin_w_vx_m(vm2, vm2, quantized_output_max);
+
+ // Compute the Relu on all inputs, as if they were <0.
+ vsll_w_vx_m(vm1, vm0, left_shift_alpha);
+ vdmulh_w_r_vx_m(vm1, vm1, output_multiplier_alpha);
+ vsha_w_vx_m(vm1, vm1, RIGHT_SHIFT(right_shift_alpha));
+ vadd_w_vx_m(vm1, vm1, output_zero_point);
+ vmax_w_vx_m(vm1, vm1, quantized_output_min);
+ vmin_w_vx_m(vm1, vm1, quantized_output_max);
+
+ // Compute a boolean vector for inputs >=0.
+ vge_w_vx_m(vm3, vm0, 0);
+ // Compute a boolean vector for inputs <0.
+ vlt_w_vx_m(vm0, vm0, 0);
+ // Multiply the `identity` results by the >=0 vector.
+ vmul_w_vv_m(vm2, vm2, vm3);
+ // Multiply the `alpha` results by the <0 vector.
+ vmul_w_vv_m(vm0, vm1, vm0);
+ // Sum the two resulting vectors.
+ vadd_w_vv_m(vm0, vm0, vm2);
+
+ // Narrow/swizzle, and store to output.
+ vsrans_h_vx(v0, v0, 0);
+ vst_h_lp_xx(v0, output, count);
+
+ blocks -= count;
+ }
+}
+
+} // namespace kelvin::opt
diff --git a/tflm/opt/leaky_relu_s8.cc b/tflm/opt/leaky_relu_s8.cc
new file mode 100644
index 0000000..15b9218
--- /dev/null
+++ b/tflm/opt/leaky_relu_s8.cc
@@ -0,0 +1,76 @@
+// Copyright 2023 Google LLC
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+
+#include <limits>
+
+#include "crt/kelvin.h"
+#include "tflm/opt/opt.h"
+#include "tflm/opt/util.h"
+
+namespace kelvin::opt {
+
+void leaky_relu_s8(const int8_t* input, int8_t* output,
+ const int32_t block_size, const int32_t input_zero_point,
+ const int32_t output_zero_point,
+ const int32_t output_multiplier_alpha,
+ const int32_t output_shift_alpha,
+ const int32_t output_multiplier_identity,
+ const int32_t output_shift_identity) {
+ constexpr int32_t quantized_output_min = std::numeric_limits<int16_t>::min();
+ constexpr int32_t quantized_output_max = std::numeric_limits<int16_t>::max();
+ int32_t right_shift_identity = std::min(output_shift_identity, 0L);
+ int32_t left_shift_identity = std::max(output_shift_identity, 0L);
+ int32_t right_shift_alpha = std::min(output_shift_alpha, 0L);
+ int32_t left_shift_alpha = std::max(output_shift_alpha, 0L);
+ int blocks = block_size;
+ int vl;
+ getmaxvl_b(vl);
+ while (blocks) {
+ int count = std::min(blocks, vl);
+
+ // Load data from the input, and widen (now we can use vm0).
+ vld_b_lp_xx(v0, input, count);
+ vaddw_h_vx(v0, v0, 0);
+ vaddw_w_vx(v2, v1, 0);
+ vaddw_w_vx(v0, v0, 0);
+
+ // Subtract out the provided offset from the inputs.
+ vsub_w_vx_m(vm0, vm0, input_zero_point);
+
+ // Compute the Relu on all inputs, as if they were >=0.
+ vsll_w_vx_m(vm2, vm0, left_shift_identity);
+ vdmulh_w_r_vx_m(vm2, vm2, output_multiplier_identity);
+ vsha_w_vx_m(vm2, vm2, RIGHT_SHIFT(right_shift_identity));
+ vadd_w_vx_m(vm2, vm2, output_zero_point);
+ vmax_w_vx_m(vm2, vm2, quantized_output_min);
+ vmin_w_vx_m(vm2, vm2, quantized_output_max);
+
+ // Compute the Relu on all inputs, as if they were <0.
+ vsll_w_vx_m(vm1, vm0, left_shift_alpha);
+ vdmulh_w_r_vx_m(vm1, vm1, output_multiplier_alpha);
+ vsha_w_vx_m(vm1, vm1, RIGHT_SHIFT(right_shift_alpha));
+ vadd_w_vx_m(vm1, vm1, output_zero_point);
+ vmax_w_vx_m(vm1, vm1, quantized_output_min);
+ vmin_w_vx_m(vm1, vm1, quantized_output_max);
+
+ // Compute a boolean vector for inputs >=0.
+ vge_w_vx_m(vm3, vm0, 0);
+ // Compute a boolean vector for inputs <0.
+ vlt_w_vx_m(vm0, vm0, 0);
+ // Multiply the `identity` results by the >=0 vector.
+ vmul_w_vv_m(vm2, vm2, vm3);
+ // Multiply the `alpha` results by the <0 vector.
+ vmul_w_vv_m(vm0, vm1, vm0);
+ // Sum the two resulting vectors.
+ vadd_w_vv_m(vm0, vm0, vm2);
+
+ // Narrow/swizzle, and store to output.
+ vsraqs_b_vx(v0, v0, 0);
+ vst_b_lp_xx(v0, output, count);
+
+ blocks -= count;
+ }
+}
+
+} // namespace kelvin::opt
diff --git a/tflm/opt/opt.h b/tflm/opt/opt.h
index 12075ab..6009ee3 100644
--- a/tflm/opt/opt.h
+++ b/tflm/opt/opt.h
@@ -31,6 +31,20 @@
int32_t* output, const int32_t output_activation_min,
const int32_t output_activation_max,
const int32_t block_size);
+void leaky_relu_s8(const int8_t* input, int8_t* output,
+ const int32_t block_size, const int32_t input_zero_point,
+ const int32_t output_zero_point,
+ const int32_t output_multiplier_alpha,
+ const int32_t output_shift_alpha,
+ const int32_t output_multiplier_identity,
+ const int32_t output_shift_identity);
+void leaky_relu_s16(const int16_t* input, int16_t* output,
+ const int32_t block_size, const int32_t input_zero_point,
+ const int32_t output_zero_point,
+ const int32_t output_multiplier_alpha,
+ const int32_t output_shift_alpha,
+ const int32_t output_multiplier_identity,
+ const int32_t output_shift_identity);
} // namespace kelvin::opt
#endif // TFLM_OPT_OPT_H_