blob: db39296ce5a763aadbf85274f3027e1ab480ebbf [file] [log] [blame]
// Copyright 2023 Google LLC
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
#include "tests/cv/diff.h"
#include <cstdint>
#include "crt/kelvin.h"
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
namespace kelvin::cv {
void diff(int num_cols, const uint16_t* input0_row, const uint16_t* input1_row,
uint16_t* output_row) {
int vl;
int n = num_cols;
do {
getvl_h_x_m(vl, n);
n -= vl;
vld_h_lp_xx_m(vm1, input0_row, vl);
vld_h_lp_xx_m(vm2, input1_row, vl);
vsub_h_vv_m(vm0, vm1, vm2);
vst_h_lp_xx_m(vm0, output_row, vl);
} while (n > 0);
}
void diffp(int num_cols, const uint16_t* input0_row, const uint16_t* input1_row,
uint16_t* output_row) {
int vl_0, vl_1;
int n = num_cols;
// [0] load
getvl_h_x_m(vl_0, n);
n -= vl_0;
vld_h_lp_xx_m(v4, input0_row, vl_0);
vld_h_lp_xx_m(v8, input1_row, vl_0);
while (true) {
// [1] load
getvl_h_x_m(vl_1, n);
n -= vl_1;
vld_h_lp_xx_m(v20, input0_row, vl_1);
vld_h_lp_xx_m(v24, input1_row, vl_1);
// [0] store
vsub_h_vv_m(v0, v4, v8);
vst_h_lp_xx_m(v0, output_row, vl_0);
if (unlikely(!vl_1)) break;
// [0] load
getvl_h_x_m(vl_0, n);
n -= vl_0;
vld_h_lp_xx_m(v4, input0_row, vl_0);
vld_h_lp_xx_m(v8, input1_row, vl_0);
// [1] store
vsub_h_vv_m(v16, v20, v24);
vst_h_lp_xx_m(v16, output_row, vl_1);
if (unlikely(!vl_0)) break;
}
}
void diff4(int num_cols, int stride, const uint16_t* input0_row,
const uint16_t* input1_row, uint16_t* output_row) {
int vl;
int n = num_cols;
do {
getvl_h_x(vl, n);
n -= vl;
vld_h_tp_xx_m(v4, input0_row, stride);
vld_h_tp_xx_m(v8, input1_row, stride);
vsub_h_vv_m(v0, v4, v8);
vst_h_tp_xx_m(v0, output_row, stride);
} while (n > 0);
}
void diff4p(int num_cols, int stride, const uint16_t* input0_row,
const uint16_t* input1_row, uint16_t* output_row) {
int vl_0, vl_1;
int n = num_cols;
// [0] load
getvl_h_x(vl_0, n);
n -= vl_0;
vld_h_tp_xx_m(v4, input0_row, stride);
vld_h_tp_xx_m(v8, input1_row, stride);
while (true) {
// [1] load
getvl_h_x(vl_1, n);
n -= vl_1;
if (likely(vl_1)) {
vld_h_tp_xx_m(v20, input0_row, stride);
vld_h_tp_xx_m(v24, input1_row, stride);
}
// [0] store
vsub_h_vv_m(v0, v4, v8);
vst_h_tp_xx_m(v0, output_row, stride);
if (unlikely(!vl_1)) break;
// [0] load
getvl_h_x(vl_0, n);
n -= vl_0;
if (likely(vl_0)) {
vld_h_tp_xx_m(v4, input0_row, stride);
vld_h_tp_xx_m(v8, input1_row, stride);
}
// [1] store
vsub_h_vv_m(v16, v20, v24);
vst_h_tp_xx_m(v16, output_row, stride);
if (unlikely(!vl_0)) break;
}
}
}; // namespace kelvin::cv