Add the option to optimize RISP4ML using RVV
This is the comprehensive code change for adding the option of using vector instructions to reduce # of instructions for the RISP4ML toolchain.
Summary:
- All 6 blocks (BLC, DG, WBG, Demosaic, Gamma and Downscaler) have been optimized.
- Numerical correctness has been verified. All unit tests passed (after
necessary rewriting).
- Significant saving: 40% -> 1%
Change-Id: I6abc8f253f5a74044985ab64797e5de5a76364a2
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 05fe4cc..c227663 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -31,6 +31,7 @@
add_link_options("LINKER:--defsym=__stack_size__=${STACK_SIZE}")
set(SPRINGBOK_LINKER_SCRIPT "$ENV{ROOTDIR}/sw/vec/springbok/springbok.ld" CACHE PATH "Springbok linker script path (default: springbok.ld)")
set(BUILD_WITH_SPRINGBOK ON CACHE BOOL "Build the target with springbok BSP (default: ON)")
+set(BUILD_ISP_WITH_RVV ON CACHE BOOL "Build the ISP pipeline with RVV (default: ON)")
#-------------------------------------------------------------------------------
# IREE-specific settings
diff --git a/samples/risp4ml/isp_stages/CMakeLists.txt b/samples/risp4ml/isp_stages/CMakeLists.txt
index bc2c798..7d47d93 100644
--- a/samples/risp4ml/isp_stages/CMakeLists.txt
+++ b/samples/risp4ml/isp_stages/CMakeLists.txt
@@ -1,10 +1,36 @@
+#-------------------------------------------------------------------------------
+# Build libraries and unit tests for RISP4ML blocks.
+# Use different source files for RVV and non-RVV versions
+#-------------------------------------------------------------------------------
+
+set(BLC_SRC "blc.c")
+set(DEMOSAIC_SRC "demosaic.c")
+set(DG_SRC "dg.c")
+set(DOWNSCALE_SRC "downscale.c")
+set(GAMMA_SRC "gamma.c")
+set(WBG_SRC "wbg.c")
+set(DOWNSCALE_TEST_SRC "downscale_test.cc")
+set(GAMMA_TEST_SRC "gamma_test.cc")
+
+if (${BUILD_ISP_WITH_RVV})
+ set(BLC_SRC "blc_rvv.c")
+ set(DEMOSAIC_SRC "demosaic_rvv.c")
+ set(DG_SRC "dg_rvv.c")
+ set(DOWNSCALE_SRC "downscale_rvv.c")
+ set(GAMMA_SRC "gamma_rvv.c")
+ set(WBG_SRC "wbg_rvv.c")
+ set(DOWNSCALE_TEST_SRC "downscale_rvv_test.cc")
+ set(GAMMA_TEST_SRC "gamma_rvv_test.cc")
+ add_definitions(-DISP_WITH_RVV)
+endif()
+
iree_cc_library(
NAME
blc
HDRS
"blc.h"
SRCS
- "blc.c"
+ ${BLC_SRC}
DEPS
samples::risp4ml::common::image
samples::risp4ml::common::utils
@@ -16,7 +42,7 @@
HDRS
"demosaic.h"
SRCS
- "demosaic.c"
+ ${DEMOSAIC_SRC}
DEPS
samples::risp4ml::common::image
samples::risp4ml::common::utils
@@ -28,7 +54,7 @@
HDRS
"dg.h"
SRCS
- "dg.c"
+ ${DG_SRC}
DEPS
samples::risp4ml::common::image
samples::risp4ml::common::utils
@@ -40,7 +66,7 @@
HDRS
"downscale.h"
SRCS
- "downscale.c"
+ ${DOWNSCALE_SRC}
DEPS
samples::risp4ml::common::image
samples::risp4ml::common::utils
@@ -52,7 +78,7 @@
HDRS
"gamma.h"
SRCS
- "gamma.c"
+ ${GAMMA_SRC}
DEPS
samples::risp4ml::common::image
samples::risp4ml::common::utils
@@ -64,7 +90,7 @@
HDRS
"wbg.h"
SRCS
- "wbg.c"
+ ${WBG_SRC}
DEPS
samples::risp4ml::common::image
samples::risp4ml::common::utils
@@ -100,7 +126,7 @@
NAME
downscale_test
SRCS
- "downscale_test.cc"
+ ${DOWNSCALE_TEST_SRC}
DEPS
::downscale
pw_unit_test
@@ -112,7 +138,7 @@
NAME
gamma_test
SRCS
- "gamma_test.cc"
+ ${GAMMA_TEST_SRC}
DEPS
::gamma
pw_unit_test
diff --git a/samples/risp4ml/isp_stages/blc_rvv.c b/samples/risp4ml/isp_stages/blc_rvv.c
new file mode 100644
index 0000000..21c53fe
--- /dev/null
+++ b/samples/risp4ml/isp_stages/blc_rvv.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <riscv_vector.h>
+
+#include "samples/risp4ml/common/utils.h"
+#include "samples/risp4ml/isp_stages/blc.h"
+
+static BlcParams blc_params = {.enable = true,
+ .offsets = {2048, 2048, 2048, 2048}};
+
+void set_blc_params(BlcParams* params) { blc_params = *params; }
+
+void blc_process(Image* img) {
+ if (!blc_params.enable) return;
+
+ size_t vl;
+ size_t n = img->height * img->width * img->num_channels;
+ uint16_t offset = blc_params.offsets[0];
+ vuint16m8_t vx; // auxiliary variable
+ uint16_t* x;
+ for (size_t i = 0; i < n; i += vl) {
+ x = img->data + i;
+ vl = vsetvl_e16m8(n - i);
+ vx = vle16_v_u16m8(x, vl); // load
+ vx = vssubu(vx, offset, vl); // subtract
+ vse16(x, vx, vl); // save
+ }
+}
diff --git a/samples/risp4ml/isp_stages/demosaic_rvv.c b/samples/risp4ml/isp_stages/demosaic_rvv.c
new file mode 100644
index 0000000..2ed9d25
--- /dev/null
+++ b/samples/risp4ml/isp_stages/demosaic_rvv.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <riscv_vector.h>
+
+#include "samples/risp4ml/common/utils.h"
+#include "samples/risp4ml/isp_stages/demosaic.h"
+
+#define kRgbColorChannels 3
+
+static DemosaicParams demosaic_params = {.enable = true};
+
+void set_demosaic_params(DemosaicParams* params) { demosaic_params = *params; }
+
+// Basic bilinear demosaic
+void demosaic_process(Image* input, Image* output) {
+ if (!demosaic_params.enable) {
+ return;
+ }
+
+ const pixel_type_t* line_in[kRgbColorChannels];
+ pixel_type_t* line_out[kRgbColorChannels];
+ int x_offset[kRgbColorChannels];
+ const uint16_t boundary[kRgbColorChannels] = {0, input->width - 2,
+ input->width - 1};
+
+ size_t vl;
+ // auxiliary variables
+ vuint16m4_t vx, vy;
+ vuint32m8_t vz;
+
+ for (uint16_t y = 0; y < input->height; ++y) {
+ line_in[0] = (y) ? image_row(input, 0, y - 1) : image_row(input, 0, 1);
+ line_in[1] = image_row(input, 0, y);
+ line_in[2] = (y < input->height - 1)
+ ? image_row(input, 0, y + 1)
+ : image_row(input, 0, input->height - 2);
+
+ line_out[1] = image_row(output, 1, y);
+ if ((y & 0x1) == 0) {
+ line_out[0] = image_row(output, 0, y);
+ line_out[2] = image_row(output, 2, y);
+ } else {
+ line_out[0] = image_row(output, 2, y);
+ line_out[2] = image_row(output, 0, y);
+ }
+
+ // x at boundary
+ for (uint8_t i = 0; i < 3; ++i) {
+ uint16_t x = boundary[i];
+ for (uint16_t c = 0; c < kRgbColorChannels; ++c) {
+ x_offset[c] = BayerMirrorBoundary(x - 1 + c, input->width);
+ }
+
+ BayerIndex bayer_index = GetBayerIndex(kBayerType, x, y);
+ switch (bayer_index) {
+ case (kB):
+ case (kR): {
+ line_out[0][x] = line_in[1][x_offset[1]];
+ line_out[1][x] = (line_in[0][x_offset[1]] + line_in[2][x_offset[1]] +
+ line_in[1][x_offset[0]] + line_in[1][x_offset[2]]) /
+ 4;
+ line_out[2][x] = (line_in[0][x_offset[0]] + line_in[0][x_offset[2]] +
+ line_in[2][x_offset[0]] + line_in[2][x_offset[2]]) /
+ 4;
+ }; break;
+ case (kGb):
+ case (kGr): {
+ line_out[0][x] =
+ (line_in[1][x_offset[0]] + line_in[1][x_offset[2]]) / 2;
+ line_out[1][x] = line_in[1][x_offset[1]];
+ line_out[2][x] =
+ (line_in[0][x_offset[1]] + line_in[2][x_offset[1]]) / 2;
+ }; break;
+ default:
+ break;
+ }
+ }
+
+ // x not at boundary: vector instructions
+ for (uint8_t n = 1; n <= 2; n++) {
+ for (uint16_t x = n; x < input->width - 2; x += 2 * vl) {
+ x_offset[0] = x - 1;
+ x_offset[1] = x;
+ x_offset[2] = x + 1;
+ ptrdiff_t stride = 2 * sizeof(uint16_t);
+ size_t avl = (input->width - 1 - x) / 2;
+ vl = vsetvl_e16m4(avl);
+
+ if (n + (y & 0x1) == 2) { // kR or kB
+ // ch0
+ vx = vlse16_v_u16m4(line_in[1] + x_offset[1], stride, vl); // load
+ vsse16(line_out[0] + x, stride, vx, vl); // save
+ // ch1
+ vx = vlse16_v_u16m4(line_in[0] + x_offset[1], stride, vl); // load
+ vy = vlse16_v_u16m4(line_in[2] + x_offset[1], stride, vl); // load
+ vz = vwaddu_vv(vx, vy, vl); // add
+ vy = vlse16_v_u16m4(line_in[1] + x_offset[0], stride, vl); // load
+ vz = vwaddu_wv(vz, vy, vl); // add
+ vy = vlse16_v_u16m4(line_in[1] + x_offset[2], stride, vl); // load
+ vz = vwaddu_wv(vz, vy, vl); // add
+ vx = vnsrl(vz, 2, vl); // 1/4
+ vsse16(line_out[1] + x, stride, vx, vl); // save
+ // ch2
+ vx = vlse16_v_u16m4(line_in[0] + x_offset[0], stride, vl); // load
+ vy = vlse16_v_u16m4(line_in[0] + x_offset[2], stride, vl); // load
+ vz = vwaddu_vv(vx, vy, vl); // add
+ vy = vlse16_v_u16m4(line_in[2] + x_offset[0], stride, vl); // load
+ vz = vwaddu_wv(vz, vy, vl); // add
+ vy = vlse16_v_u16m4(line_in[2] + x_offset[2], stride, vl); // load
+ vz = vwaddu_wv(vz, vy, vl); // add
+ vx = vnsrl(vz, 2, vl); // 1/4
+ vsse16(line_out[2] + x, stride, vx, vl); // save
+ } else { // kGr or kRb
+ // ch0
+ vx = vlse16_v_u16m4(line_in[1] + x_offset[0], stride, vl); // load
+ vy = vlse16_v_u16m4(line_in[1] + x_offset[2], stride, vl); // load
+ vz = vwaddu_vv(vx, vy, vl); // add
+ vx = vnsrl(vz, 1, vl); // 1/2
+ vsse16(line_out[0] + x, stride, vx, vl); // save
+ // ch1
+ vx = vlse16_v_u16m4(line_in[1] + x_offset[1], stride, vl); // load
+ vsse16(line_out[1] + x, stride, vx, vl); // save
+ // ch2
+ vx = vlse16_v_u16m4(line_in[0] + x_offset[1], stride, vl); // load
+ vy = vlse16_v_u16m4(line_in[2] + x_offset[1], stride, vl); // load
+ vz = vwaddu_vv(vx, vy, vl); // add
+ vx = vnsrl(vz, 1, vl); // 1/2
+ vsse16(line_out[2] + x, stride, vx, vl); // save
+ }
+ }
+ }
+ }
+}
diff --git a/samples/risp4ml/isp_stages/dg_rvv.c b/samples/risp4ml/isp_stages/dg_rvv.c
new file mode 100644
index 0000000..8cfa816
--- /dev/null
+++ b/samples/risp4ml/isp_stages/dg_rvv.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <riscv_vector.h>
+
+#include "samples/risp4ml/common/utils.h"
+#include "samples/risp4ml/isp_stages/dg.h"
+
+static const uint16_t kDgFractional = kRawPipelineFraction;
+static const uint16_t kDgUnityGain = 1 << kDgFractional;
+static DgParams dg_params = {
+ .enable = true,
+ .gains = {kDgUnityGain, kDgUnityGain, kDgUnityGain, kDgUnityGain}};
+
+void set_dg_params(DgParams* params) { dg_params = *params; }
+
+void dg_process(Image* img) {
+ if (!dg_params.enable) return;
+
+ size_t vl;
+ size_t n = img->height * img->width * img->num_channels;
+ uint16_t gain = dg_params.gains[0];
+ uint32_t offset = 1 << (kDgFractional - 1);
+ uint32_t max_val = kRawPipelineMaxVal << kDgFractional;
+ // auxiliary variables
+ vuint16m4_t vx;
+ vuint32m8_t vy;
+ uint16_t* x;
+ for (size_t i = 0; i < n; i += vl) {
+ x = img->data + i;
+ vl = vsetvl_e16m4(n - i);
+ vx = vle16_v_u16m4(x, vl); // load
+ vy = vwmulu(vx, gain, vl); // multiply
+ vy = vadd(vy, offset, vl); // add
+ vy = vminu(vy, max_val, vl); // clamp
+ vx = vnsrl(vy, kDgFractional, vl); // bit shift
+ vse16(x, vx, vl); // save
+ }
+}
diff --git a/samples/risp4ml/isp_stages/downscale.h b/samples/risp4ml/isp_stages/downscale.h
index d166610..0ca46bd 100644
--- a/samples/risp4ml/isp_stages/downscale.h
+++ b/samples/risp4ml/isp_stages/downscale.h
@@ -28,13 +28,14 @@
// scale_precision is the number of fractional bits used for scale factors and
// initial offsets
uint32_t scale_precision;
+ uint32_t scale_fixed_one;
+#ifndef ISP_WITH_RVV
// interpolate_precision is the number of fractional bits used for
// interpolation weights
uint32_t interpolate_precision;
// interpolate_shift is the shift for pixel value before interpolation to
// avoid rounding error.
uint32_t interpolate_shift;
- uint32_t scale_fixed_one;
uint32_t scale_fraction_mask;
uint32_t weight_shift;
@@ -51,6 +52,7 @@
// output pixel from the first input pixel in each direction respectively
uint32_t ver_initial_offset;
uint32_t hor_initial_offset;
+#endif // ! ISP_WITH_RVV
} DownscaleParams;
void set_downscale_param(DownscaleParams* params);
diff --git a/samples/risp4ml/isp_stages/downscale_rvv.c b/samples/risp4ml/isp_stages/downscale_rvv.c
new file mode 100644
index 0000000..454bd6c
--- /dev/null
+++ b/samples/risp4ml/isp_stages/downscale_rvv.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <riscv_vector.h>
+
+#include "samples/risp4ml/common/utils.h"
+#include "samples/risp4ml/isp_stages/downscale.h"
+
+static const uint16_t kScalePrecision = 10;
+static const uint32_t kScaleFixedOne = (1 << kScalePrecision);
+
+static DownscaleParams params = {
+ .enable = true,
+ .scale_precision = kScalePrecision,
+ .scale_fixed_one = kScaleFixedOne,
+};
+
+void set_downscale_param(DownscaleParams* in_params) { params = *in_params; }
+void set_downscale_factor(Image* input, ImageU8* output) { return; }
+
+// Basic bilinear downscale
+// Implementation based on:
+// https://chao-ji.github.io/jekyll/update/2018/07/19/BilinearResize.html
+// Resamples image using bilinear interpolation.
+// 'output' is modified by this function to store the output image.
+void downscale_process(Image* input, ImageU8* output) {
+ if (!params.enable) {
+ return;
+ }
+
+ uint32_t input_width = input->width;
+ uint32_t input_w_1 = input->width - 1;
+ uint32_t input_h_1 = input->height - 1;
+ uint32_t w_1 = output->width - 1;
+ uint32_t h_1 = output->height - 1;
+
+ size_t vl;
+ size_t n = output->height * output->width;
+ // auxiliary variables
+ vuint32m8_t vx, vy, vz, vid, vp, vq;
+ // neiboring x & y coordinates
+ vuint32m8_t vx_l, vy_l, vx_h, vy_h;
+ // weights of neighbors
+ vuint32m8_t vx_weight, vy_weight, vx_weight_1minus, vy_weight_1minus;
+ // neighboring data points
+ vuint32m8_t va, vb, vc, vd;
+ vuint32m8_t vo; // 32bit output
+ vuint16m4_t vo_16b; // 16bit output
+ vuint8m2_t vo_8b; // 8bit output
+
+ for (uint16_t c = 0; c < output->num_channels; ++c) {
+ pixel_type_t* in = image_row(input, c, 0);
+ uint8_t* out = output->data + c;
+
+ for (size_t i = 0; i < n; i += vl) {
+ vl = vsetvl_e16m4(n - i);
+ vid = vid_v_u32m8(vl);
+ vid = vadd(vid, i, vl);
+
+ vy = vdivu(vid, output->width, vl);
+ vx = vremu(vid, output->width, vl);
+
+ // find neighbors
+ vx_l = vmul(vx, input_w_1, vl);
+ vx_l = vdivu(vx_l, w_1, vl);
+ vx_h = vadd(vx_l, 1, vl);
+ vx_h = vminu(vx_h, input_w_1, vl); // clamp
+
+ vy_l = vmul(vy, input_h_1, vl);
+ vy_l = vdivu(vy_l, h_1, vl);
+ vy_h = vadd(vy_l, 1, vl);
+ vy_h = vminu(vy_h, input_h_1, vl); // clamp
+
+ // load a, b, c, d
+ vz = vmul(vy_l, input_width, vl);
+ vz = vadd(vz, vx_l, vl);
+ vz = vsll(vz, 1, vl); // *2
+ vo_16b = vluxei32(in, vz, vl);
+ va = vwaddu_vx(vo_16b, 0, vl);
+
+ vz = vmul(vy_l, input_width, vl);
+ vz = vadd(vz, vx_h, vl);
+ vz = vsll(vz, 1, vl); // *2
+ vo_16b = vluxei32(in, vz, vl);
+ vb = vwaddu_vx(vo_16b, 0, vl);
+
+ vz = vmul(vy_h, input_width, vl);
+ vz = vadd(vz, vx_l, vl);
+ vz = vsll(vz, 1, vl); // *2
+ vo_16b = vluxei32(in, vz, vl);
+ vc = vwaddu_vx(vo_16b, 0, vl);
+
+ vz = vmul(vy_h, input_width, vl);
+ vz = vadd(vz, vx_h, vl);
+ vz = vsll(vz, 1, vl); // *2
+ vo_16b = vluxei32(in, vz, vl);
+ vd = vwaddu_vx(vo_16b, 0, vl);
+
+ // weights
+ vp = vmul(vx, input_w_1, vl);
+ vq = vmul(vx_l, w_1, vl);
+ vp = vssubu(vp, vq, vl);
+ vp = vsll(vp, params.scale_precision, vl);
+ vx_weight = vdivu(vp, w_1, vl);
+
+ vp = vmul(vy, input_h_1, vl);
+ vq = vmul(vy_l, h_1, vl);
+ vp = vssubu(vp, vq, vl);
+ vp = vsll(vp, params.scale_precision, vl);
+ vy_weight = vdivu(vp, h_1, vl);
+
+ vx_weight_1minus = vrsub(vx_weight, params.scale_fixed_one, vl);
+ vy_weight_1minus = vrsub(vy_weight, params.scale_fixed_one, vl);
+
+ // resized
+ vo = vmul(va, vx_weight_1minus, vl);
+ vo = vsrl(vo, params.scale_precision, vl);
+ vo = vmul(vo, vy_weight_1minus, vl);
+
+ vp = vmul(vb, vx_weight, vl);
+ vp = vsrl(vp, params.scale_precision, vl);
+ vp = vmul(vp, vy_weight_1minus, vl);
+ vo = vadd(vo, vp, vl);
+
+ vp = vmul(vc, vx_weight_1minus, vl);
+ vp = vsrl(vp, params.scale_precision, vl);
+ vp = vmul(vp, vy_weight, vl);
+ vo = vadd(vo, vp, vl);
+
+ vp = vmul(vd, vx_weight, vl);
+ vp = vsrl(vp, params.scale_precision, vl);
+ vp = vmul(vp, vy_weight, vl);
+ vo = vadd(vo, vp, vl);
+
+ // bit shift
+ vo_16b = vnsrl(vo, params.scale_precision, vl);
+ vo_8b = vnsrl(vo_16b, kRawPipelineBpp - kPipeOutputBpp, vl);
+
+ // save
+ vsse8(out + i * output->num_channels, output->num_channels, vo_8b, vl);
+ }
+ }
+}
diff --git a/samples/risp4ml/isp_stages/downscale_rvv_test.cc b/samples/risp4ml/isp_stages/downscale_rvv_test.cc
new file mode 100644
index 0000000..f38e8b8
--- /dev/null
+++ b/samples/risp4ml/isp_stages/downscale_rvv_test.cc
@@ -0,0 +1,210 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Use separate test for RVV as algorithm is implemented differently
+// Implementation based on:
+// https://chao-ji.github.io/jekyll/update/2018/07/19/BilinearResize.html
+
+#include <climits>
+#include <cmath>
+
+#include "pw_unit_test/framework.h"
+#include "samples/risp4ml/common/constants.h"
+#include "samples/risp4ml/common/test_utils.h"
+#include "samples/risp4ml/isp_stages/downscale.h"
+
+static constexpr uint16_t kScalePrecision = 10;
+static constexpr uint32_t kScaleFixedOne = (1 << kScalePrecision);
+static constexpr float kOutBitsShift = 1 << (kRawPipelineBpp - kPipeOutputBpp);
+
+class DownscaleRvvTest : public ::testing::Test {
+ protected:
+ void setup(uint16_t in_ch, uint16_t in_height, uint16_t in_width,
+ uint16_t out_ch, uint16_t out_height, uint16_t out_width) {
+ in_ = image_new(in_ch, in_height, in_width);
+ out_ = imageu8_new(out_ch, out_height, out_width);
+ params_.enable = true;
+ params_.scale_precision = kScalePrecision;
+ params_.scale_fixed_one = kScaleFixedOne;
+ }
+ void TearDown() override {
+ image_delete(in_);
+ imageu8_delete(out_);
+ }
+ ImageU8* imageu8_new(uint16_t num_channels, uint16_t height, uint16_t width);
+ void imageu8_delete(ImageU8* image) {
+ if (image) {
+ if (image->data) free(image->data);
+ free(image);
+ }
+ }
+ pixel_type_t imageu8_pixel_val(ImageU8* image, uint16_t c, uint16_t y,
+ uint16_t x) {
+ const uint32_t stride_c = 1;
+ const uint16_t stride_y = image->num_channels * image->width;
+ const uint16_t stride_x = image->num_channels;
+
+ return *(image->data + c * stride_c + y * stride_y + x * stride_x);
+ }
+ float ExpectedOut(uint16_t y, uint16_t x);
+ void ScaleRampImageTest(uint16_t output_width, uint16_t output_height,
+ uint16_t input_width = 640,
+ uint16_t input_height = 480);
+ struct BilinearScaleTestValues {
+ uint16_t output_width;
+ uint16_t output_height;
+ uint16_t input_width;
+ uint16_t input_height;
+ };
+
+ Image* in_;
+ ImageU8* out_;
+ DownscaleParams params_;
+};
+
+ImageU8* DownscaleRvvTest::imageu8_new(uint16_t num_channels, uint16_t height,
+ uint16_t width) {
+ ImageU8* image = (ImageU8*)malloc(sizeof(ImageU8));
+ if (image) {
+ image->num_channels = num_channels;
+ image->height = height;
+ image->width = width;
+ uint32_t num_pixels = width * height * num_channels;
+ image->data = (uint8_t*)malloc(num_pixels * sizeof(uint8_t));
+ }
+ return image;
+}
+
+float DownscaleRvvTest::ExpectedOut(uint16_t y, uint16_t x) {
+ float x_ratio = ((float)in_->width - 1) / (out_->width - 1);
+ float y_ratio = ((float)in_->height - 1) / (out_->height - 1);
+
+ uint32_t x_l = (uint32_t)(x_ratio * x);
+ uint32_t x_h = (x_l == in_->width - 1) ? x_l : x_l + 1;
+ uint32_t y_l = y_ratio * y;
+ uint32_t y_h = y_l == in_->height - 1 ? y_l : y_l + 1;
+ float x_weight = (x_ratio * x) - x_l;
+ float y_weight = (y_ratio * y) - y_l;
+
+ pixel_type_t a = image_pixel_val(in_, 0, y_l, x_l);
+ pixel_type_t b = image_pixel_val(in_, 0, y_l, x_h);
+ pixel_type_t c = image_pixel_val(in_, 0, y_h, x_l);
+ pixel_type_t d = image_pixel_val(in_, 0, y_h, x_h);
+
+ float expected_out = a * (1 - x_weight) * (1 - y_weight) +
+ b * x_weight * (1 - y_weight) +
+ c * y_weight * (1 - x_weight) + d * x_weight * y_weight;
+
+ expected_out = floorf(expected_out / kOutBitsShift);
+
+ return expected_out;
+}
+
+// Helper function for 2D ramp tests. image is downscaled successfully.
+void DownscaleRvvTest::ScaleRampImageTest(uint16_t output_width,
+ uint16_t output_height,
+ uint16_t input_width,
+ uint16_t input_height) {
+ constexpr int kTolerance = 1; // Tolerance for rounding error.
+ setup(1, input_height, input_width, 1, output_height, output_width);
+
+ // Fill in_ images as 2D ramp whose values are increased from the
+ // top-left corner to the bottom-right corner.
+ for (uint16_t y = 0; y < input_height; ++y) {
+ for (uint16_t x = 0; x < input_width; ++x) {
+ *image_pixel(in_, 0, y, x) = (y * input_width + x) % (1024);
+ }
+ }
+
+ set_downscale_param(¶ms_);
+ downscale_process(in_, out_);
+
+ for (uint16_t y = 0; y < output_height; ++y) {
+ for (uint16_t x = 0; x < output_width; ++x) {
+ float expected_out = ExpectedOut(y, x);
+ float diff =
+ std::abs((float)imageu8_pixel_val(out_, 0, y, x) - expected_out);
+ ASSERT_LE(diff, kTolerance);
+ }
+ }
+}
+
+TEST_F(DownscaleRvvTest, NoScaleTest) {
+ constexpr uint16_t kOutputWidth = 128;
+ constexpr uint16_t kInputHeight = 96;
+ setup(1, kInputHeight, kOutputWidth, 1, kInputHeight, kOutputWidth);
+
+ // Generate random image.
+ InitImageRandom(in_, 0, USHRT_MAX);
+
+ set_downscale_param(¶ms_);
+ downscale_process(in_, out_);
+
+ // Verify the out_ image is identical to the in_ image.
+ for (uint16_t y = 0; y < kInputHeight; ++y) {
+ for (uint16_t x = 0; x < kOutputWidth; ++x) {
+ ASSERT_EQ(imageu8_pixel_val(out_, 0, y, x),
+ static_cast<pixel_type_t>(
+ floorf(image_pixel_val(in_, 0, y, x) >>
+ (kRawPipelineBpp - kPipeOutputBpp))));
+ }
+ }
+}
+
+TEST_F(DownscaleRvvTest, DownscaleRvvTest) {
+ std::vector<BilinearScaleTestValues> tests = {
+ {8, 12, 64, 64}, {320, 240, 640, 480}, {80, 60, 640, 480},
+ {220, 95, 640, 480}, {415, 125, 640, 480}, {122, 13, 200, 100}};
+
+ for (const auto& test : tests) {
+ ScaleRampImageTest(test.output_width, test.output_height, test.input_width,
+ test.input_height);
+ }
+}
+
+TEST_F(DownscaleRvvTest, Trivial3DTest) {
+ constexpr uint16_t kChannels = 3;
+ constexpr uint16_t kInputHeight = 5;
+ constexpr uint16_t kInputWidth = 5;
+
+ constexpr uint16_t kVerScale = 2;
+ constexpr uint16_t kHorScale = 2;
+
+ constexpr uint16_t kOutputHeight = (kInputHeight - 1) / kVerScale + 1;
+ constexpr uint16_t kOutputWidth = (kInputWidth - 1) / kHorScale + 1;
+
+ setup(kChannels, kInputHeight, kInputWidth, kChannels, kOutputHeight,
+ kOutputWidth);
+
+ for (uint16_t c = 0; c < kChannels; ++c) {
+ for (uint16_t y = 0; y < kInputHeight; ++y) {
+ for (uint16_t x = 0; x < kInputWidth; ++x) {
+ *image_pixel(in_, c, y, x) = ((y * kInputWidth + x) * 10 + c) << 8;
+ }
+ }
+ }
+
+ set_downscale_param(¶ms_);
+ downscale_process(in_, out_);
+
+ // for exact integer ratios out_ is just downsampled in_
+ for (uint16_t c = 0; c < kChannels; ++c) {
+ for (uint16_t y = 0; y < kOutputHeight; ++y) {
+ for (uint16_t x = 0; x < kOutputWidth; ++x) {
+ ASSERT_EQ(imageu8_pixel_val(out_, c, y, x),
+ image_pixel_val(in_, c, y * kVerScale, x * kHorScale) >> 8);
+ }
+ }
+ }
+}
diff --git a/samples/risp4ml/isp_stages/gamma.h b/samples/risp4ml/isp_stages/gamma.h
index c3044ac..24b42cd 100644
--- a/samples/risp4ml/isp_stages/gamma.h
+++ b/samples/risp4ml/isp_stages/gamma.h
@@ -23,7 +23,11 @@
extern "C" {
#endif // __cplusplus
+#ifndef ISP_WITH_RVV
#define kGammaNumberPoints 81
+#else
+#define kGammaNumberPoints 2049
+#endif
typedef struct {
bool enable;
diff --git a/samples/risp4ml/isp_stages/gamma_rvv.c b/samples/risp4ml/isp_stages/gamma_rvv.c
new file mode 100644
index 0000000..a0f5503
--- /dev/null
+++ b/samples/risp4ml/isp_stages/gamma_rvv.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <riscv_vector.h>
+
+#include "samples/risp4ml/common/utils.h"
+#include "samples/risp4ml/isp_stages/gamma.h"
+
+#define kRgbColorChannels 3
+
+static const uint16_t kRgbPipelineBpp = 16;
+static const uint16_t kRgbPipelineMaxVal = (1 << kRgbPipelineBpp) - 1;
+static const uint16_t kGammaShiftBits = 5;
+static const uint16_t kGammaSpacing = (1 << kGammaShiftBits);
+static GammaParams gamma_params = {
+ .enable = true,
+ .lut = {
+ 0, 413, 827, 1240, 1654, 2067, 2481, 2884, 3255, 3600,
+ 3923, 4228, 4518, 4793, 5056, 5309, 5552, 5786, 6013, 6232,
+ 6444, 6651, 6851, 7047, 7237, 7423, 7605, 7783, 7956, 8127,
+ 8294, 8457, 8618, 8776, 8931, 9083, 9233, 9380, 9525, 9668,
+ 9809, 9948, 10084, 10219, 10352, 10484, 10613, 10741, 10868, 10992,
+ 11116, 11238, 11358, 11478, 11595, 11712, 11828, 11942, 12055, 12167,
+ 12278, 12387, 12496, 12604, 12711, 12816, 12921, 13025, 13128, 13230,
+ 13331, 13432, 13531, 13630, 13728, 13825, 13922, 14017, 14112, 14207,
+ 14300, 14393, 14485, 14577, 14668, 14758, 14848, 14937, 15026, 15113,
+ 15201, 15288, 15374, 15459, 15545, 15629, 15713, 15797, 15880, 15963,
+ 16045, 16126, 16208, 16288, 16368, 16448, 16528, 16607, 16685, 16763,
+ 16841, 16918, 16995, 17071, 17147, 17223, 17298, 17373, 17448, 17522,
+ 17596, 17669, 17742, 17815, 17887, 17959, 18031, 18102, 18173, 18244,
+ 18315, 18385, 18454, 18524, 18593, 18662, 18731, 18799, 18867, 18935,
+ 19002, 19069, 19136, 19203, 19269, 19335, 19401, 19466, 19531, 19596,
+ 19661, 19726, 19790, 19854, 19918, 19981, 20045, 20108, 20170, 20233,
+ 20295, 20357, 20419, 20481, 20543, 20604, 20665, 20726, 20786, 20847,
+ 20907, 20967, 21027, 21086, 21145, 21205, 21264, 21322, 21381, 21439,
+ 21498, 21556, 21613, 21671, 21728, 21786, 21843, 21900, 21956, 22013,
+ 22069, 22126, 22182, 22238, 22293, 22349, 22404, 22459, 22514, 22569,
+ 22624, 22679, 22733, 22787, 22841, 22895, 22949, 23003, 23056, 23110,
+ 23163, 23216, 23269, 23321, 23374, 23426, 23479, 23531, 23583, 23635,
+ 23687, 23738, 23790, 23841, 23892, 23943, 23994, 24045, 24096, 24146,
+ 24197, 24247, 24297, 24347, 24397, 24447, 24497, 24546, 24596, 24645,
+ 24694, 24743, 24792, 24841, 24890, 24938, 24987, 25035, 25083, 25132,
+ 25180, 25228, 25275, 25323, 25371, 25418, 25466, 25513, 25560, 25607,
+ 25654, 25701, 25747, 25794, 25841, 25887, 25933, 25980, 26026, 26072,
+ 26118, 26163, 26209, 26255, 26300, 26346, 26391, 26436, 26481, 26527,
+ 26571, 26616, 26661, 26706, 26750, 26795, 26839, 26884, 26928, 26972,
+ 27016, 27060, 27104, 27148, 27191, 27235, 27278, 27322, 27365, 27408,
+ 27452, 27495, 27538, 27581, 27623, 27666, 27709, 27751, 27794, 27836,
+ 27879, 27921, 27963, 28005, 28047, 28089, 28131, 28173, 28215, 28256,
+ 28298, 28339, 28381, 28422, 28464, 28505, 28546, 28587, 28628, 28669,
+ 28710, 28750, 28791, 28832, 28872, 28913, 28953, 28994, 29034, 29074,
+ 29114, 29154, 29194, 29234, 29274, 29314, 29353, 29393, 29433, 29472,
+ 29512, 29551, 29590, 29630, 29669, 29708, 29747, 29786, 29825, 29864,
+ 29903, 29941, 29980, 30019, 30057, 30096, 30134, 30173, 30211, 30249,
+ 30287, 30326, 30364, 30402, 30440, 30477, 30515, 30553, 30591, 30628,
+ 30666, 30704, 30741, 30779, 30816, 30853, 30891, 30928, 30965, 31002,
+ 31039, 31076, 31113, 31150, 31187, 31223, 31260, 31297, 31333, 31370,
+ 31406, 31443, 31479, 31516, 31552, 31588, 31624, 31660, 31696, 31733,
+ 31768, 31804, 31840, 31876, 31912, 31948, 31983, 32019, 32054, 32090,
+ 32125, 32161, 32196, 32232, 32267, 32302, 32337, 32372, 32407, 32442,
+ 32477, 32512, 32547, 32582, 32617, 32652, 32686, 32721, 32756, 32790,
+ 32825, 32859, 32894, 32928, 32962, 32997, 33031, 33065, 33099, 33133,
+ 33167, 33202, 33235, 33269, 33303, 33337, 33371, 33405, 33438, 33472,
+ 33506, 33539, 33573, 33606, 33640, 33673, 33707, 33740, 33773, 33807,
+ 33840, 33873, 33906, 33939, 33972, 34005, 34038, 34071, 34104, 34137,
+ 34170, 34203, 34235, 34268, 34301, 34333, 34366, 34398, 34431, 34463,
+ 34496, 34528, 34560, 34593, 34625, 34657, 34689, 34722, 34754, 34786,
+ 34818, 34850, 34882, 34914, 34946, 34977, 35009, 35041, 35073, 35104,
+ 35136, 35168, 35199, 35231, 35262, 35294, 35325, 35357, 35388, 35419,
+ 35451, 35482, 35513, 35545, 35576, 35607, 35638, 35669, 35700, 35731,
+ 35762, 35793, 35824, 35855, 35886, 35916, 35947, 35978, 36008, 36039,
+ 36070, 36100, 36131, 36161, 36192, 36222, 36253, 36283, 36314, 36344,
+ 36374, 36405, 36435, 36465, 36495, 36525, 36555, 36586, 36616, 36646,
+ 36676, 36706, 36735, 36765, 36795, 36825, 36855, 36885, 36914, 36944,
+ 36974, 37003, 37033, 37063, 37092, 37122, 37151, 37181, 37210, 37239,
+ 37269, 37298, 37328, 37357, 37386, 37415, 37444, 37474, 37503, 37532,
+ 37561, 37590, 37619, 37648, 37677, 37706, 37735, 37764, 37793, 37821,
+ 37850, 37879, 37908, 37937, 37965, 37994, 38023, 38051, 38080, 38108,
+ 38137, 38165, 38194, 38222, 38251, 38279, 38307, 38336, 38364, 38392,
+ 38421, 38449, 38477, 38505, 38533, 38561, 38590, 38618, 38646, 38674,
+ 38702, 38730, 38758, 38785, 38813, 38841, 38869, 38897, 38925, 38952,
+ 38980, 39008, 39036, 39063, 39091, 39118, 39146, 39174, 39201, 39229,
+ 39256, 39284, 39311, 39338, 39366, 39393, 39421, 39448, 39475, 39502,
+ 39530, 39557, 39584, 39611, 39638, 39666, 39693, 39720, 39747, 39774,
+ 39801, 39828, 39855, 39882, 39909, 39935, 39962, 39989, 40016, 40043,
+ 40070, 40096, 40123, 40150, 40176, 40203, 40230, 40256, 40283, 40310,
+ 40336, 40363, 40389, 40416, 40442, 40468, 40495, 40521, 40548, 40574,
+ 40600, 40627, 40653, 40679, 40705, 40732, 40758, 40784, 40810, 40836,
+ 40862, 40888, 40914, 40941, 40967, 40993, 41019, 41044, 41070, 41096,
+ 41122, 41148, 41174, 41200, 41226, 41251, 41277, 41303, 41329, 41354,
+ 41380, 41406, 41431, 41457, 41483, 41508, 41534, 41559, 41585, 41610,
+ 41636, 41661, 41687, 41712, 41737, 41763, 41788, 41814, 41839, 41864,
+ 41889, 41915, 41940, 41965, 41990, 42016, 42041, 42066, 42091, 42116,
+ 42141, 42166, 42191, 42216, 42241, 42266, 42291, 42316, 42341, 42366,
+ 42391, 42416, 42441, 42466, 42490, 42515, 42540, 42565, 42590, 42614,
+ 42639, 42664, 42688, 42713, 42738, 42762, 42787, 42812, 42836, 42861,
+ 42885, 42910, 42934, 42959, 42983, 43008, 43032, 43056, 43081, 43105,
+ 43129, 43154, 43178, 43202, 43227, 43251, 43275, 43299, 43324, 43348,
+ 43372, 43396, 43420, 43444, 43469, 43493, 43517, 43541, 43565, 43589,
+ 43613, 43637, 43661, 43685, 43709, 43733, 43756, 43780, 43804, 43828,
+ 43852, 43876, 43899, 43923, 43947, 43971, 43994, 44018, 44042, 44066,
+ 44089, 44113, 44137, 44160, 44184, 44207, 44231, 44254, 44278, 44302,
+ 44325, 44348, 44372, 44395, 44419, 44442, 44466, 44489, 44512, 44536,
+ 44559, 44582, 44606, 44629, 44652, 44676, 44699, 44722, 44745, 44769,
+ 44792, 44815, 44838, 44861, 44884, 44907, 44931, 44954, 44977, 45000,
+ 45023, 45046, 45069, 45092, 45115, 45138, 45161, 45184, 45206, 45229,
+ 45252, 45275, 45298, 45321, 45344, 45366, 45389, 45412, 45435, 45457,
+ 45480, 45503, 45526, 45548, 45571, 45594, 45616, 45639, 45662, 45684,
+ 45707, 45729, 45752, 45774, 45797, 45819, 45842, 45864, 45887, 45909,
+ 45932, 45954, 45977, 45999, 46021, 46044, 46066, 46088, 46111, 46133,
+ 46155, 46178, 46200, 46222, 46244, 46267, 46289, 46311, 46333, 46355,
+ 46378, 46400, 46422, 46444, 46466, 46488, 46510, 46532, 46554, 46576,
+ 46599, 46621, 46643, 46665, 46686, 46708, 46730, 46752, 46774, 46796,
+ 46818, 46840, 46862, 46884, 46905, 46927, 46949, 46971, 46993, 47014,
+ 47036, 47058, 47080, 47101, 47123, 47145, 47167, 47188, 47210, 47231,
+ 47253, 47275, 47296, 47318, 47339, 47361, 47383, 47404, 47426, 47447,
+ 47469, 47490, 47512, 47533, 47555, 47576, 47597, 47619, 47640, 47662,
+ 47683, 47704, 47726, 47747, 47768, 47790, 47811, 47832, 47854, 47875,
+ 47896, 47917, 47939, 47960, 47981, 48002, 48023, 48045, 48066, 48087,
+ 48108, 48129, 48150, 48171, 48192, 48213, 48234, 48256, 48277, 48298,
+ 48319, 48340, 48361, 48382, 48403, 48423, 48444, 48465, 48486, 48507,
+ 48528, 48549, 48570, 48591, 48612, 48632, 48653, 48674, 48695, 48716,
+ 48736, 48757, 48778, 48799, 48819, 48840, 48861, 48881, 48902, 48923,
+ 48943, 48964, 48985, 49005, 49026, 49047, 49067, 49088, 49108, 49129,
+ 49149, 49170, 49191, 49211, 49232, 49252, 49273, 49293, 49313, 49334,
+ 49354, 49375, 49395, 49416, 49436, 49456, 49477, 49497, 49517, 49538,
+ 49558, 49578, 49599, 49619, 49639, 49660, 49680, 49700, 49720, 49741,
+ 49761, 49781, 49801, 49821, 49842, 49862, 49882, 49902, 49922, 49942,
+ 49962, 49982, 50003, 50023, 50043, 50063, 50083, 50103, 50123, 50143,
+ 50163, 50183, 50203, 50223, 50243, 50263, 50283, 50303, 50323, 50343,
+ 50362, 50382, 50402, 50422, 50442, 50462, 50482, 50501, 50521, 50541,
+ 50561, 50581, 50600, 50620, 50640, 50660, 50680, 50699, 50719, 50739,
+ 50758, 50778, 50798, 50817, 50837, 50857, 50876, 50896, 50916, 50935,
+ 50955, 50974, 50994, 51014, 51033, 51053, 51072, 51092, 51111, 51131,
+ 51150, 51170, 51189, 51209, 51228, 51248, 51267, 51287, 51306, 51325,
+ 51345, 51364, 51384, 51403, 51422, 51442, 51461, 51480, 51500, 51519,
+ 51538, 51558, 51577, 51596, 51616, 51635, 51654, 51673, 51693, 51712,
+ 51731, 51750, 51769, 51789, 51808, 51827, 51846, 51865, 51884, 51904,
+ 51923, 51942, 51961, 51980, 51999, 52018, 52037, 52056, 52075, 52094,
+ 52113, 52132, 52151, 52170, 52189, 52208, 52227, 52246, 52265, 52284,
+ 52303, 52322, 52341, 52360, 52379, 52398, 52417, 52436, 52454, 52473,
+ 52492, 52511, 52530, 52549, 52568, 52586, 52605, 52624, 52643, 52662,
+ 52680, 52699, 52718, 52737, 52755, 52774, 52793, 52811, 52830, 52849,
+ 52867, 52886, 52905, 52923, 52942, 52961, 52979, 52998, 53017, 53035,
+ 53054, 53072, 53091, 53109, 53128, 53147, 53165, 53184, 53202, 53221,
+ 53239, 53258, 53276, 53295, 53313, 53332, 53350, 53369, 53387, 53405,
+ 53424, 53442, 53461, 53479, 53497, 53516, 53534, 53553, 53571, 53589,
+ 53608, 53626, 53644, 53663, 53681, 53699, 53718, 53736, 53754, 53772,
+ 53791, 53809, 53827, 53845, 53864, 53882, 53900, 53918, 53936, 53955,
+ 53973, 53991, 54009, 54027, 54045, 54064, 54082, 54100, 54118, 54136,
+ 54154, 54172, 54190, 54208, 54226, 54244, 54263, 54281, 54299, 54317,
+ 54335, 54353, 54371, 54389, 54407, 54425, 54443, 54461, 54479, 54496,
+ 54514, 54532, 54550, 54568, 54586, 54604, 54622, 54640, 54658, 54676,
+ 54693, 54711, 54729, 54747, 54765, 54783, 54800, 54818, 54836, 54854,
+ 54872, 54889, 54907, 54925, 54943, 54960, 54978, 54996, 55014, 55031,
+ 55049, 55067, 55084, 55102, 55120, 55138, 55155, 55173, 55191, 55208,
+ 55226, 55243, 55261, 55279, 55296, 55314, 55331, 55349, 55367, 55384,
+ 55402, 55419, 55437, 55454, 55472, 55490, 55507, 55525, 55542, 55560,
+ 55577, 55595, 55612, 55629, 55647, 55664, 55682, 55699, 55717, 55734,
+ 55752, 55769, 55786, 55804, 55821, 55839, 55856, 55873, 55891, 55908,
+ 55925, 55943, 55960, 55977, 55995, 56012, 56029, 56047, 56064, 56081,
+ 56098, 56116, 56133, 56150, 56168, 56185, 56202, 56219, 56236, 56254,
+ 56271, 56288, 56305, 56322, 56340, 56357, 56374, 56391, 56408, 56425,
+ 56443, 56460, 56477, 56494, 56511, 56528, 56545, 56562, 56579, 56597,
+ 56614, 56631, 56648, 56665, 56682, 56699, 56716, 56733, 56750, 56767,
+ 56784, 56801, 56818, 56835, 56852, 56869, 56886, 56903, 56920, 56937,
+ 56954, 56971, 56988, 57004, 57021, 57038, 57055, 57072, 57089, 57106,
+ 57123, 57140, 57156, 57173, 57190, 57207, 57224, 57241, 57257, 57274,
+ 57291, 57308, 57325, 57341, 57358, 57375, 57392, 57408, 57425, 57442,
+ 57459, 57475, 57492, 57509, 57526, 57542, 57559, 57576, 57592, 57609,
+ 57626, 57642, 57659, 57676, 57692, 57709, 57726, 57742, 57759, 57776,
+ 57792, 57809, 57825, 57842, 57859, 57875, 57892, 57908, 57925, 57941,
+ 57958, 57975, 57991, 58008, 58024, 58041, 58057, 58074, 58090, 58107,
+ 58123, 58140, 58156, 58173, 58189, 58206, 58222, 58238, 58255, 58271,
+ 58288, 58304, 58321, 58337, 58353, 58370, 58386, 58403, 58419, 58435,
+ 58452, 58468, 58484, 58501, 58517, 58533, 58550, 58566, 58582, 58599,
+ 58615, 58631, 58648, 58664, 58680, 58697, 58713, 58729, 58745, 58762,
+ 58778, 58794, 58810, 58827, 58843, 58859, 58875, 58891, 58908, 58924,
+ 58940, 58956, 58972, 58989, 59005, 59021, 59037, 59053, 59069, 59085,
+ 59102, 59118, 59134, 59150, 59166, 59182, 59198, 59214, 59230, 59246,
+ 59263, 59279, 59295, 59311, 59327, 59343, 59359, 59375, 59391, 59407,
+ 59423, 59439, 59455, 59471, 59487, 59503, 59519, 59535, 59551, 59567,
+ 59583, 59599, 59615, 59631, 59647, 59663, 59678, 59694, 59710, 59726,
+ 59742, 59758, 59774, 59790, 59806, 59822, 59837, 59853, 59869, 59885,
+ 59901, 59917, 59933, 59948, 59964, 59980, 59996, 60012, 60027, 60043,
+ 60059, 60075, 60091, 60106, 60122, 60138, 60154, 60169, 60185, 60201,
+ 60217, 60232, 60248, 60264, 60280, 60295, 60311, 60327, 60342, 60358,
+ 60374, 60389, 60405, 60421, 60436, 60452, 60468, 60483, 60499, 60515,
+ 60530, 60546, 60561, 60577, 60593, 60608, 60624, 60640, 60655, 60671,
+ 60686, 60702, 60717, 60733, 60749, 60764, 60780, 60795, 60811, 60826,
+ 60842, 60857, 60873, 60888, 60904, 60919, 60935, 60950, 60966, 60981,
+ 60997, 61012, 61028, 61043, 61059, 61074, 61089, 61105, 61120, 61136,
+ 61151, 61167, 61182, 61197, 61213, 61228, 61244, 61259, 61274, 61290,
+ 61305, 61320, 61336, 61351, 61366, 61382, 61397, 61412, 61428, 61443,
+ 61458, 61474, 61489, 61504, 61520, 61535, 61550, 61566, 61581, 61596,
+ 61611, 61627, 61642, 61657, 61672, 61688, 61703, 61718, 61733, 61749,
+ 61764, 61779, 61794, 61809, 61825, 61840, 61855, 61870, 61885, 61901,
+ 61916, 61931, 61946, 61961, 61976, 61991, 62007, 62022, 62037, 62052,
+ 62067, 62082, 62097, 62112, 62128, 62143, 62158, 62173, 62188, 62203,
+ 62218, 62233, 62248, 62263, 62278, 62293, 62308, 62323, 62338, 62353,
+ 62369, 62384, 62399, 62414, 62429, 62444, 62459, 62474, 62489, 62504,
+ 62518, 62533, 62548, 62563, 62578, 62593, 62608, 62623, 62638, 62653,
+ 62668, 62683, 62698, 62713, 62728, 62743, 62757, 62772, 62787, 62802,
+ 62817, 62832, 62847, 62862, 62876, 62891, 62906, 62921, 62936, 62951,
+ 62966, 62980, 62995, 63010, 63025, 63040, 63054, 63069, 63084, 63099,
+ 63114, 63128, 63143, 63158, 63173, 63188, 63202, 63217, 63232, 63247,
+ 63261, 63276, 63291, 63306, 63320, 63335, 63350, 63364, 63379, 63394,
+ 63408, 63423, 63438, 63453, 63467, 63482, 63497, 63511, 63526, 63541,
+ 63555, 63570, 63585, 63599, 63614, 63628, 63643, 63658, 63672, 63687,
+ 63702, 63716, 63731, 63745, 63760, 63774, 63789, 63804, 63818, 63833,
+ 63847, 63862, 63876, 63891, 63906, 63920, 63935, 63949, 63964, 63978,
+ 63993, 64007, 64022, 64036, 64051, 64065, 64080, 64094, 64109, 64123,
+ 64138, 64152, 64167, 64181, 64196, 64210, 64224, 64239, 64253, 64268,
+ 64282, 64297, 64311, 64326, 64340, 64354, 64369, 64383, 64398, 64412,
+ 64426, 64441, 64455, 64470, 64484, 64498, 64513, 64527, 64541, 64556,
+ 64570, 64584, 64599, 64613, 64627, 64642, 64656, 64670, 64685, 64699,
+ 64713, 64728, 64742, 64756, 64770, 64785, 64799, 64813, 64828, 64842,
+ 64856, 64870, 64885, 64899, 64913, 64927, 64942, 64956, 64970, 64984,
+ 64999, 65013, 65027, 65041, 65055, 65070, 65084, 65098, 65112, 65126,
+ 65141, 65155, 65169, 65183, 65197, 65211, 65226, 65240, 65254, 65268,
+ 65282, 65296, 65310, 65325, 65339, 65353, 65367, 65381, 65395, 65409,
+ 65423, 65437, 65452, 65466, 65480, 65494, 65508, 65522, 65535}};
+
+void set_gamma_params(GammaParams* params) { gamma_params = *params; }
+
+void gamma_process(Image* img) {
+ if (!gamma_params.enable) return;
+
+ size_t vl;
+ size_t n = img->height * img->width * img->num_channels;
+ // auxiliary variables
+ vuint16m8_t vx, vy;
+ vuint16m8_t v_offset;
+ vbool2_t mask;
+ uint16_t* x;
+
+ for (size_t i = 0; i < n; i += vl) {
+ x = img->data + i;
+
+ vl = vsetvl_e16m8(n - i);
+ vx = vle16_v_u16m8(x, vl); // load
+ mask = vmsne(vx, kRgbPipelineMaxVal,
+ vl); // mask to exclude kRgbPipelineMaxVal
+
+ vy = vsrl(vx, kGammaShiftBits, vl); // 1/32
+ vy = vsll(vy, 1, vl); // *2
+ v_offset = vand(vx, kGammaSpacing - 1, vl); // offset within bin
+
+ vx = vluxei16(gamma_params.lut, vy, vl); // left
+ vy = vluxei16(gamma_params.lut + 1, vy, vl); // right
+
+ vy = vsub(vy, vx, vl); // right - left
+ vy = vmul(vy, v_offset, vl); // multiply offset_within_bin
+ vy = vsrl(vy, kGammaShiftBits, vl); // 1/32
+ vx = vadd(vx, vy, vl); // add
+
+ vse16(mask, x, vx, vl); // save
+ }
+}
diff --git a/samples/risp4ml/isp_stages/gamma_rvv_test.cc b/samples/risp4ml/isp_stages/gamma_rvv_test.cc
new file mode 100644
index 0000000..e379e6d
--- /dev/null
+++ b/samples/risp4ml/isp_stages/gamma_rvv_test.cc
@@ -0,0 +1,141 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Use separate test for RVV as gamma LUT is generated differently
+#include <cmath>
+
+#include "pw_unit_test/framework.h"
+#include "samples/risp4ml/isp_stages/gamma.h"
+
+static constexpr uint16_t kRgbPipelineBpp = 16;
+static constexpr uint16_t kPostGammaPipelineBpp = 16;
+static constexpr uint16_t kRgbPipelineMaxVal = (1 << kRgbPipelineBpp) - 1;
+static constexpr uint16_t kGammaShiftBits = 5;
+static constexpr uint16_t kGammaSpacing = (1 << kGammaShiftBits);
+
+class GammaRvvTest : public ::testing::Test {
+ protected:
+ void setup(uint16_t width) {
+ in_ = image_new(3, 2, width);
+ for (uint16_t c = 0; c < in_->num_channels; ++c) {
+ for (uint16_t y = 0; y < in_->height; ++y) {
+ for (uint16_t x = 0; x < in_->width; ++x) {
+ *image_pixel(in_, c, y, x) = x;
+ }
+ }
+ }
+ out_ = image_new(3, 2, width);
+ const uint32_t num_bytes =
+ in_->num_channels * in_->height * in_->width * sizeof(pixel_type_t);
+ memcpy(out_->data, in_->data, num_bytes);
+ }
+ void TearDown() override {
+ image_delete(in_);
+ image_delete(out_);
+ }
+ void CreateLinearGamma(GammaParams* params) {
+ params->enable = true;
+ for (int n = 0; n <= kRgbPipelineMaxVal; n += kGammaSpacing) {
+ params->lut[n / kGammaSpacing] = n;
+ }
+ params->lut[kGammaNumberPoints - 1] = kRgbPipelineMaxVal;
+ }
+ float sRgb_gamma(float in_) {
+ return (in_ < 0.0031308f) ? 12.92f * in_
+ : 1.055f * std::pow(in_, 1.0f / 2.4f) - 0.055f;
+ }
+ void CreateRgbGamma(GammaParams* params) {
+ params->enable = true;
+ params->lut[0] = 0;
+
+ for (int n = 0; n <= kRgbPipelineMaxVal; n += kGammaSpacing) {
+ params->lut[n / kGammaSpacing] =
+ (1 << kRgbPipelineBpp) *
+ sRgb_gamma((float)n / (1 << kRgbPipelineBpp));
+ }
+ params->lut[kGammaNumberPoints - 1] = kRgbPipelineMaxVal;
+ }
+
+ Image* in_;
+ Image* out_;
+};
+
+TEST_F(GammaRvvTest, Bypass) {
+ setup((1 << 15) - 1);
+
+ GammaParams params;
+ CreateRgbGamma(¶ms);
+ params.enable = false;
+
+ set_gamma_params(¶ms);
+
+ gamma_process(out_);
+
+ for (uint16_t c = 0; c < in_->num_channels; ++c) {
+ for (uint16_t y = 0; y < in_->height; ++y) {
+ for (uint16_t x = 0; x < in_->width; ++x) {
+ pixel_type_t expected_val =
+ x >> (kRgbPipelineBpp - kPostGammaPipelineBpp);
+ ASSERT_EQ(expected_val, image_pixel_val(out_, c, y, x));
+ }
+ }
+ }
+}
+
+TEST_F(GammaRvvTest, Linear) {
+ setup((1 << 15) - 2);
+
+ GammaParams params;
+ CreateLinearGamma(¶ms);
+
+ set_gamma_params(¶ms);
+
+ gamma_process(out_);
+
+ for (uint16_t c = 0; c < in_->num_channels; ++c) {
+ for (uint16_t y = 0; y < in_->height; ++y) {
+ for (uint16_t x = 0; x < in_->width; ++x) {
+ ASSERT_EQ(image_pixel_val(out_, 0, y, x),
+ image_pixel_val(in_, 0, y, x));
+ }
+ }
+ }
+}
+
+TEST_F(GammaRvvTest, sRgbLUT) {
+ setup((1 << 15) - 1);
+
+ constexpr float kToleranceRatio = 0.03;
+
+ GammaParams params;
+ CreateRgbGamma(¶ms);
+ set_gamma_params(¶ms);
+
+ gamma_process(out_);
+
+ for (uint16_t c = 0; c < in_->num_channels; ++c) {
+ for (uint16_t y = 0; y < in_->height; ++y) {
+ for (uint16_t x = 0; x < in_->width; ++x) {
+ pixel_type_t expected_val =
+ (pixel_type_t)((1 << kRgbPipelineBpp) *
+ sRgb_gamma(static_cast<float>(x) /
+ (1 << kRgbPipelineBpp)));
+ float tolerance = ceilf(kToleranceRatio * expected_val);
+ float diff = std::abs((float)expected_val -
+ (float)image_pixel_val(out_, c, y, x));
+ ASSERT_LE(diff, tolerance);
+ }
+ }
+ }
+}
diff --git a/samples/risp4ml/isp_stages/wbg_rvv.c b/samples/risp4ml/isp_stages/wbg_rvv.c
new file mode 100644
index 0000000..a628f66
--- /dev/null
+++ b/samples/risp4ml/isp_stages/wbg_rvv.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <riscv_vector.h>
+
+#include "samples/risp4ml/common/utils.h"
+#include "samples/risp4ml/isp_stages/wbg.h"
+
+#define kBayerColorChannels 4
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+
+static const uint16_t kWbgFractional = kRawPipelineFraction;
+static const uint16_t kWbgUnityGain = 1 << kWbgFractional;
+static WbgParams wbg_params = {
+ .enable = true,
+ .fixed = false,
+ .gains = {kWbgUnityGain, kWbgUnityGain, kWbgUnityGain, kWbgUnityGain}};
+
+void set_wbg_params(WbgParams* params) { wbg_params = *params; }
+
+static void compute_wbg_gain(Image* img) {
+ // Calculate the white-balance gain values using the "gray world" algorithm
+ uint32_t size = img->num_channels * img->height * img->width;
+ uint64_t sum_of_reds = 0;
+ // will use only one of the greens for scaling, since the difference between
+ // the two green sensor pixels is negligible
+ uint64_t sum_of_greens = 0;
+ uint64_t sum_of_blues = 0;
+ size_t vl;
+ // auxiliary variables
+ vuint16m8_t vx;
+ vuint32m1_t vy;
+
+ for (uint16_t y = 0; y < img->height; ++y) {
+ pixel_type_t* line = image_row(img, 0, y);
+ for (uint8_t n = 0; n < 2; n++) {
+ for (uint16_t x = n; x < img->width; x += 2 * vl) {
+ size_t avl = (img->width + 1 - x) / 2;
+ vl = vsetvl_e16m8(avl);
+ vx = vlse16_v_u16m8(line + x, 2 * sizeof(uint16_t), vl); // load
+
+ vy = vmv_v_x_u32m1(0, vl); // init
+ vy = vwredsumu(vy, vx, vy, vl); // sum
+ uint32_t sum = vmv_x(vy);
+ if ((y & 0x1) == 0 && n == 0) {
+ sum_of_reds += sum;
+ } else if ((y & 0x1) == 1 && n == 1) {
+ sum_of_blues += sum;
+ } else {
+ sum_of_greens += sum;
+ }
+ }
+ }
+ }
+
+ // scale values to green channel
+ float average_red = 4.0 * sum_of_reds / size;
+ float average_green = 2.0 * sum_of_greens / size;
+ float average_blue = 4.0 * sum_of_blues / size;
+
+ float max_average = MAX(MAX(average_red, average_green), average_blue);
+
+ // Convert the float value to fixed point representation, i.e. 0xFF.FF
+ uint32_t red_wb = FloatToFixedPoint(max_average / average_red,
+ kRawPipelineInteger, kRawPipelineFraction,
+ /*bool is_signed*/ false);
+ uint32_t green_wb = FloatToFixedPoint(
+ max_average / average_green, kRawPipelineInteger, kRawPipelineFraction,
+ /*bool is_signed*/ false);
+ uint32_t blue_wb = FloatToFixedPoint(
+ max_average / average_blue, kRawPipelineInteger, kRawPipelineFraction,
+ /*bool is_signed*/ false);
+
+ wbg_params.gains[0] = red_wb;
+ wbg_params.gains[1] = green_wb;
+ wbg_params.gains[2] = green_wb;
+ wbg_params.gains[3] = blue_wb;
+}
+
+void wbg_process(Image* img) {
+ if (!wbg_params.enable) return;
+ if (!wbg_params.fixed) {
+ compute_wbg_gain(img);
+ }
+
+ size_t vl;
+ uint32_t offset = 1 << (kWbgFractional - 1);
+ uint32_t max_val = kRawPipelineMaxVal << kWbgFractional;
+ uint16_t gain;
+ // auxiliary variables
+ vuint16m4_t vx;
+ vuint32m8_t vy;
+ for (uint16_t y = 0; y < img->height; ++y) {
+ pixel_type_t* line = image_row(img, 0, y);
+ for (uint8_t n = 0; n < 2; n++) {
+ gain = (y & 0x1) ? wbg_params.gains[2 + n] : wbg_params.gains[n];
+ for (uint16_t x = n; x < img->width; x += 2 * vl) {
+ size_t avl = (img->width + 1 - x) / 2;
+ vl = vsetvl_e16m4(avl);
+ vx = vlse16_v_u16m4(line + x, 2 * sizeof(uint16_t), vl); // load
+ vy = vwmulu(vx, gain, vl); // multiply
+ vy = vadd(vy, offset, vl); // add
+ vy = vminu(vy, max_val, vl); // clamp
+ vx = vnsrl(vy, kWbgFractional, vl); // bit shift
+ vsse16(line + x, 2 * sizeof(uint16_t), vx, vl); // save
+ }
+ }
+ }
+}