| // Copyright 2023 Google LLC |
| // Licensed under the Apache License, Version 2.0, see LICENSE for details. |
| // SPDX-License-Identifier: Apache-2.0 |
| |
| #include "tests/cv/diff.h" |
| |
| #include <cstdint> |
| |
| #include "crt/kelvin.h" |
| |
| #define likely(x) __builtin_expect(!!(x), 1) |
| #define unlikely(x) __builtin_expect(!!(x), 0) |
| |
| namespace kelvin::cv { |
| |
| void diff(int num_cols, const uint16_t* input0_row, const uint16_t* input1_row, |
| uint16_t* output_row) { |
| int vl; |
| int n = num_cols; |
| do { |
| getvl_h_x_m(vl, n); |
| n -= vl; |
| vld_h_lp_xx_m(vm1, input0_row, vl); |
| vld_h_lp_xx_m(vm2, input1_row, vl); |
| vsub_h_vv_m(vm0, vm1, vm2); |
| vst_h_lp_xx_m(vm0, output_row, vl); |
| } while (n > 0); |
| } |
| |
| void diffp(int num_cols, const uint16_t* input0_row, const uint16_t* input1_row, |
| uint16_t* output_row) { |
| int vl_0, vl_1; |
| int n = num_cols; |
| |
| // [0] load |
| getvl_h_x_m(vl_0, n); |
| n -= vl_0; |
| vld_h_lp_xx_m(v4, input0_row, vl_0); |
| vld_h_lp_xx_m(v8, input1_row, vl_0); |
| |
| while (true) { |
| // [1] load |
| getvl_h_x_m(vl_1, n); |
| n -= vl_1; |
| vld_h_lp_xx_m(v20, input0_row, vl_1); |
| vld_h_lp_xx_m(v24, input1_row, vl_1); |
| |
| // [0] store |
| vsub_h_vv_m(v0, v4, v8); |
| vst_h_lp_xx_m(v0, output_row, vl_0); |
| if (unlikely(!vl_1)) break; |
| |
| // [0] load |
| getvl_h_x_m(vl_0, n); |
| n -= vl_0; |
| vld_h_lp_xx_m(v4, input0_row, vl_0); |
| vld_h_lp_xx_m(v8, input1_row, vl_0); |
| |
| // [1] store |
| vsub_h_vv_m(v16, v20, v24); |
| vst_h_lp_xx_m(v16, output_row, vl_1); |
| if (unlikely(!vl_0)) break; |
| } |
| } |
| |
| void diff4(int num_cols, int stride, const uint16_t* input0_row, |
| const uint16_t* input1_row, uint16_t* output_row) { |
| int vl; |
| int n = num_cols; |
| do { |
| getvl_h_x(vl, n); |
| n -= vl; |
| vld_h_tp_xx_m(v4, input0_row, stride); |
| vld_h_tp_xx_m(v8, input1_row, stride); |
| vsub_h_vv_m(v0, v4, v8); |
| vst_h_tp_xx_m(v0, output_row, stride); |
| } while (n > 0); |
| } |
| |
| void diff4p(int num_cols, int stride, const uint16_t* input0_row, |
| const uint16_t* input1_row, uint16_t* output_row) { |
| int vl_0, vl_1; |
| int n = num_cols; |
| |
| // [0] load |
| getvl_h_x(vl_0, n); |
| n -= vl_0; |
| vld_h_tp_xx_m(v4, input0_row, stride); |
| vld_h_tp_xx_m(v8, input1_row, stride); |
| |
| while (true) { |
| // [1] load |
| getvl_h_x(vl_1, n); |
| n -= vl_1; |
| if (likely(vl_1)) { |
| vld_h_tp_xx_m(v20, input0_row, stride); |
| vld_h_tp_xx_m(v24, input1_row, stride); |
| } |
| |
| // [0] store |
| vsub_h_vv_m(v0, v4, v8); |
| vst_h_tp_xx_m(v0, output_row, stride); |
| if (unlikely(!vl_1)) break; |
| |
| // [0] load |
| getvl_h_x(vl_0, n); |
| n -= vl_0; |
| if (likely(vl_0)) { |
| vld_h_tp_xx_m(v4, input0_row, stride); |
| vld_h_tp_xx_m(v8, input1_row, stride); |
| } |
| |
| // [1] store |
| vsub_h_vv_m(v16, v20, v24); |
| vst_h_tp_xx_m(v16, output_row, stride); |
| if (unlikely(!vl_0)) break; |
| } |
| } |
| |
| }; // namespace kelvin::cv |