Add the option to optimize RISP4ML using RVV

This is the comprehensive code change for adding the option of using vector instructions to reduce # of instructions for the RISP4ML toolchain.

Summary:
- All 6 blocks (BLC, DG, WBG, Demosaic, Gamma and Downscaler) have been optimized.
- Numerical correctness has been verified. All unit tests passed (after
  necessary rewriting).
- Significant saving: 40% -> 1%

Change-Id: I6abc8f253f5a74044985ab64797e5de5a76364a2
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 05fe4cc..c227663 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -31,6 +31,7 @@
 add_link_options("LINKER:--defsym=__stack_size__=${STACK_SIZE}")
 set(SPRINGBOK_LINKER_SCRIPT "$ENV{ROOTDIR}/sw/vec/springbok/springbok.ld" CACHE PATH "Springbok linker script path (default: springbok.ld)")
 set(BUILD_WITH_SPRINGBOK ON CACHE BOOL "Build the target with springbok BSP (default: ON)")
+set(BUILD_ISP_WITH_RVV ON CACHE BOOL "Build the ISP pipeline with RVV (default: ON)")
 
 #-------------------------------------------------------------------------------
 # IREE-specific settings
diff --git a/samples/risp4ml/isp_stages/CMakeLists.txt b/samples/risp4ml/isp_stages/CMakeLists.txt
index bc2c798..7d47d93 100644
--- a/samples/risp4ml/isp_stages/CMakeLists.txt
+++ b/samples/risp4ml/isp_stages/CMakeLists.txt
@@ -1,10 +1,36 @@
+#-------------------------------------------------------------------------------
+# Build libraries and unit tests for RISP4ML blocks.
+# Use different source files for RVV and non-RVV versions
+#-------------------------------------------------------------------------------
+
+set(BLC_SRC "blc.c")
+set(DEMOSAIC_SRC "demosaic.c")
+set(DG_SRC "dg.c")
+set(DOWNSCALE_SRC "downscale.c")
+set(GAMMA_SRC "gamma.c")
+set(WBG_SRC "wbg.c")
+set(DOWNSCALE_TEST_SRC "downscale_test.cc")
+set(GAMMA_TEST_SRC "gamma_test.cc")
+
+if (${BUILD_ISP_WITH_RVV})
+  set(BLC_SRC "blc_rvv.c")
+  set(DEMOSAIC_SRC "demosaic_rvv.c")
+  set(DG_SRC "dg_rvv.c")
+  set(DOWNSCALE_SRC "downscale_rvv.c")
+  set(GAMMA_SRC "gamma_rvv.c")
+  set(WBG_SRC "wbg_rvv.c")
+  set(DOWNSCALE_TEST_SRC "downscale_rvv_test.cc")
+  set(GAMMA_TEST_SRC "gamma_rvv_test.cc")
+  add_definitions(-DISP_WITH_RVV)
+endif()
+
 iree_cc_library(
   NAME
     blc
   HDRS
     "blc.h"
   SRCS
-    "blc.c"
+    ${BLC_SRC}
   DEPS
     samples::risp4ml::common::image
     samples::risp4ml::common::utils
@@ -16,7 +42,7 @@
   HDRS
     "demosaic.h"
   SRCS
-    "demosaic.c"
+    ${DEMOSAIC_SRC}
   DEPS
     samples::risp4ml::common::image
     samples::risp4ml::common::utils
@@ -28,7 +54,7 @@
   HDRS
     "dg.h"
   SRCS
-    "dg.c"
+    ${DG_SRC}
   DEPS
     samples::risp4ml::common::image
     samples::risp4ml::common::utils
@@ -40,7 +66,7 @@
   HDRS
     "downscale.h"
   SRCS
-    "downscale.c"
+    ${DOWNSCALE_SRC}
   DEPS
     samples::risp4ml::common::image
     samples::risp4ml::common::utils
@@ -52,7 +78,7 @@
   HDRS
     "gamma.h"
   SRCS
-    "gamma.c"
+    ${GAMMA_SRC}
   DEPS
     samples::risp4ml::common::image
     samples::risp4ml::common::utils
@@ -64,7 +90,7 @@
   HDRS
     "wbg.h"
   SRCS
-    "wbg.c"
+    ${WBG_SRC}
   DEPS
     samples::risp4ml::common::image
     samples::risp4ml::common::utils
@@ -100,7 +126,7 @@
   NAME
     downscale_test
   SRCS
-    "downscale_test.cc"
+    ${DOWNSCALE_TEST_SRC}
   DEPS
     ::downscale
     pw_unit_test
@@ -112,7 +138,7 @@
   NAME
     gamma_test
   SRCS
-    "gamma_test.cc"
+    ${GAMMA_TEST_SRC}
   DEPS
     ::gamma
     pw_unit_test
diff --git a/samples/risp4ml/isp_stages/blc_rvv.c b/samples/risp4ml/isp_stages/blc_rvv.c
new file mode 100644
index 0000000..21c53fe
--- /dev/null
+++ b/samples/risp4ml/isp_stages/blc_rvv.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <riscv_vector.h>
+
+#include "samples/risp4ml/common/utils.h"
+#include "samples/risp4ml/isp_stages/blc.h"
+
+static BlcParams blc_params = {.enable = true,
+                               .offsets = {2048, 2048, 2048, 2048}};
+
+void set_blc_params(BlcParams* params) { blc_params = *params; }
+
+void blc_process(Image* img) {
+  if (!blc_params.enable) return;
+
+  size_t vl;
+  size_t n = img->height * img->width * img->num_channels;
+  uint16_t offset = blc_params.offsets[0];
+  vuint16m8_t vx;  // auxiliary variable
+  uint16_t* x;
+  for (size_t i = 0; i < n; i += vl) {
+    x = img->data + i;
+    vl = vsetvl_e16m8(n - i);
+    vx = vle16_v_u16m8(x, vl);    // load
+    vx = vssubu(vx, offset, vl);  // subtract
+    vse16(x, vx, vl);             // save
+  }
+}
diff --git a/samples/risp4ml/isp_stages/demosaic_rvv.c b/samples/risp4ml/isp_stages/demosaic_rvv.c
new file mode 100644
index 0000000..2ed9d25
--- /dev/null
+++ b/samples/risp4ml/isp_stages/demosaic_rvv.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <riscv_vector.h>
+
+#include "samples/risp4ml/common/utils.h"
+#include "samples/risp4ml/isp_stages/demosaic.h"
+
+#define kRgbColorChannels 3
+
+static DemosaicParams demosaic_params = {.enable = true};
+
+void set_demosaic_params(DemosaicParams* params) { demosaic_params = *params; }
+
+// Basic bilinear demosaic
+void demosaic_process(Image* input, Image* output) {
+  if (!demosaic_params.enable) {
+    return;
+  }
+
+  const pixel_type_t* line_in[kRgbColorChannels];
+  pixel_type_t* line_out[kRgbColorChannels];
+  int x_offset[kRgbColorChannels];
+  const uint16_t boundary[kRgbColorChannels] = {0, input->width - 2,
+                                                input->width - 1};
+
+  size_t vl;
+  // auxiliary variables
+  vuint16m4_t vx, vy;
+  vuint32m8_t vz;
+
+  for (uint16_t y = 0; y < input->height; ++y) {
+    line_in[0] = (y) ? image_row(input, 0, y - 1) : image_row(input, 0, 1);
+    line_in[1] = image_row(input, 0, y);
+    line_in[2] = (y < input->height - 1)
+                     ? image_row(input, 0, y + 1)
+                     : image_row(input, 0, input->height - 2);
+
+    line_out[1] = image_row(output, 1, y);
+    if ((y & 0x1) == 0) {
+      line_out[0] = image_row(output, 0, y);
+      line_out[2] = image_row(output, 2, y);
+    } else {
+      line_out[0] = image_row(output, 2, y);
+      line_out[2] = image_row(output, 0, y);
+    }
+
+    // x at boundary
+    for (uint8_t i = 0; i < 3; ++i) {
+      uint16_t x = boundary[i];
+      for (uint16_t c = 0; c < kRgbColorChannels; ++c) {
+        x_offset[c] = BayerMirrorBoundary(x - 1 + c, input->width);
+      }
+
+      BayerIndex bayer_index = GetBayerIndex(kBayerType, x, y);
+      switch (bayer_index) {
+        case (kB):
+        case (kR): {
+          line_out[0][x] = line_in[1][x_offset[1]];
+          line_out[1][x] = (line_in[0][x_offset[1]] + line_in[2][x_offset[1]] +
+                            line_in[1][x_offset[0]] + line_in[1][x_offset[2]]) /
+                           4;
+          line_out[2][x] = (line_in[0][x_offset[0]] + line_in[0][x_offset[2]] +
+                            line_in[2][x_offset[0]] + line_in[2][x_offset[2]]) /
+                           4;
+        }; break;
+        case (kGb):
+        case (kGr): {
+          line_out[0][x] =
+              (line_in[1][x_offset[0]] + line_in[1][x_offset[2]]) / 2;
+          line_out[1][x] = line_in[1][x_offset[1]];
+          line_out[2][x] =
+              (line_in[0][x_offset[1]] + line_in[2][x_offset[1]]) / 2;
+        }; break;
+        default:
+          break;
+      }
+    }
+
+    // x not at boundary: vector instructions
+    for (uint8_t n = 1; n <= 2; n++) {
+      for (uint16_t x = n; x < input->width - 2; x += 2 * vl) {
+        x_offset[0] = x - 1;
+        x_offset[1] = x;
+        x_offset[2] = x + 1;
+        ptrdiff_t stride = 2 * sizeof(uint16_t);
+        size_t avl = (input->width - 1 - x) / 2;
+        vl = vsetvl_e16m4(avl);
+
+        if (n + (y & 0x1) == 2) {  // kR or kB
+          // ch0
+          vx = vlse16_v_u16m4(line_in[1] + x_offset[1], stride, vl);  // load
+          vsse16(line_out[0] + x, stride, vx, vl);                    // save
+          // ch1
+          vx = vlse16_v_u16m4(line_in[0] + x_offset[1], stride, vl);  // load
+          vy = vlse16_v_u16m4(line_in[2] + x_offset[1], stride, vl);  // load
+          vz = vwaddu_vv(vx, vy, vl);                                 // add
+          vy = vlse16_v_u16m4(line_in[1] + x_offset[0], stride, vl);  // load
+          vz = vwaddu_wv(vz, vy, vl);                                 // add
+          vy = vlse16_v_u16m4(line_in[1] + x_offset[2], stride, vl);  // load
+          vz = vwaddu_wv(vz, vy, vl);                                 // add
+          vx = vnsrl(vz, 2, vl);                                      // 1/4
+          vsse16(line_out[1] + x, stride, vx, vl);                    // save
+          // ch2
+          vx = vlse16_v_u16m4(line_in[0] + x_offset[0], stride, vl);  // load
+          vy = vlse16_v_u16m4(line_in[0] + x_offset[2], stride, vl);  // load
+          vz = vwaddu_vv(vx, vy, vl);                                 // add
+          vy = vlse16_v_u16m4(line_in[2] + x_offset[0], stride, vl);  // load
+          vz = vwaddu_wv(vz, vy, vl);                                 // add
+          vy = vlse16_v_u16m4(line_in[2] + x_offset[2], stride, vl);  // load
+          vz = vwaddu_wv(vz, vy, vl);                                 // add
+          vx = vnsrl(vz, 2, vl);                                      // 1/4
+          vsse16(line_out[2] + x, stride, vx, vl);                    // save
+        } else {  // kGr or kRb
+          // ch0
+          vx = vlse16_v_u16m4(line_in[1] + x_offset[0], stride, vl);  // load
+          vy = vlse16_v_u16m4(line_in[1] + x_offset[2], stride, vl);  // load
+          vz = vwaddu_vv(vx, vy, vl);                                 // add
+          vx = vnsrl(vz, 1, vl);                                      // 1/2
+          vsse16(line_out[0] + x, stride, vx, vl);                    // save
+          // ch1
+          vx = vlse16_v_u16m4(line_in[1] + x_offset[1], stride, vl);  // load
+          vsse16(line_out[1] + x, stride, vx, vl);                    // save
+          // ch2
+          vx = vlse16_v_u16m4(line_in[0] + x_offset[1], stride, vl);  // load
+          vy = vlse16_v_u16m4(line_in[2] + x_offset[1], stride, vl);  // load
+          vz = vwaddu_vv(vx, vy, vl);                                 // add
+          vx = vnsrl(vz, 1, vl);                                      // 1/2
+          vsse16(line_out[2] + x, stride, vx, vl);                    // save
+        }
+      }
+    }
+  }
+}
diff --git a/samples/risp4ml/isp_stages/dg_rvv.c b/samples/risp4ml/isp_stages/dg_rvv.c
new file mode 100644
index 0000000..8cfa816
--- /dev/null
+++ b/samples/risp4ml/isp_stages/dg_rvv.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <riscv_vector.h>
+
+#include "samples/risp4ml/common/utils.h"
+#include "samples/risp4ml/isp_stages/dg.h"
+
+static const uint16_t kDgFractional = kRawPipelineFraction;
+static const uint16_t kDgUnityGain = 1 << kDgFractional;
+static DgParams dg_params = {
+    .enable = true,
+    .gains = {kDgUnityGain, kDgUnityGain, kDgUnityGain, kDgUnityGain}};
+
+void set_dg_params(DgParams* params) { dg_params = *params; }
+
+void dg_process(Image* img) {
+  if (!dg_params.enable) return;
+
+  size_t vl;
+  size_t n = img->height * img->width * img->num_channels;
+  uint16_t gain = dg_params.gains[0];
+  uint32_t offset = 1 << (kDgFractional - 1);
+  uint32_t max_val = kRawPipelineMaxVal << kDgFractional;
+  // auxiliary variables
+  vuint16m4_t vx;
+  vuint32m8_t vy;
+  uint16_t* x;
+  for (size_t i = 0; i < n; i += vl) {
+    x = img->data + i;
+    vl = vsetvl_e16m4(n - i);
+    vx = vle16_v_u16m4(x, vl);          // load
+    vy = vwmulu(vx, gain, vl);          // multiply
+    vy = vadd(vy, offset, vl);          // add
+    vy = vminu(vy, max_val, vl);        // clamp
+    vx = vnsrl(vy, kDgFractional, vl);  // bit shift
+    vse16(x, vx, vl);                   // save
+  }
+}
diff --git a/samples/risp4ml/isp_stages/downscale.h b/samples/risp4ml/isp_stages/downscale.h
index d166610..0ca46bd 100644
--- a/samples/risp4ml/isp_stages/downscale.h
+++ b/samples/risp4ml/isp_stages/downscale.h
@@ -28,13 +28,14 @@
   // scale_precision is the number of fractional bits used for scale factors and
   // initial offsets
   uint32_t scale_precision;
+  uint32_t scale_fixed_one;
+#ifndef ISP_WITH_RVV
   // interpolate_precision is the number of fractional bits used for
   // interpolation weights
   uint32_t interpolate_precision;
   // interpolate_shift is the shift for pixel value before interpolation to
   // avoid rounding error.
   uint32_t interpolate_shift;
-  uint32_t scale_fixed_one;
   uint32_t scale_fraction_mask;
   uint32_t weight_shift;
 
@@ -51,6 +52,7 @@
   // output pixel from the first input pixel in each direction respectively
   uint32_t ver_initial_offset;
   uint32_t hor_initial_offset;
+#endif // ! ISP_WITH_RVV
 } DownscaleParams;
 
 void set_downscale_param(DownscaleParams* params);
diff --git a/samples/risp4ml/isp_stages/downscale_rvv.c b/samples/risp4ml/isp_stages/downscale_rvv.c
new file mode 100644
index 0000000..454bd6c
--- /dev/null
+++ b/samples/risp4ml/isp_stages/downscale_rvv.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <riscv_vector.h>
+
+#include "samples/risp4ml/common/utils.h"
+#include "samples/risp4ml/isp_stages/downscale.h"
+
+static const uint16_t kScalePrecision = 10;
+static const uint32_t kScaleFixedOne = (1 << kScalePrecision);
+
+static DownscaleParams params = {
+    .enable = true,
+    .scale_precision = kScalePrecision,
+    .scale_fixed_one = kScaleFixedOne,
+};
+
+void set_downscale_param(DownscaleParams* in_params) { params = *in_params; }
+void set_downscale_factor(Image* input, ImageU8* output) { return; }
+
+// Basic bilinear downscale
+// Implementation based on:
+// https://chao-ji.github.io/jekyll/update/2018/07/19/BilinearResize.html
+// Resamples image using bilinear interpolation.
+// 'output' is modified by this function to store the output image.
+void downscale_process(Image* input, ImageU8* output) {
+  if (!params.enable) {
+    return;
+  }
+
+  uint32_t input_width = input->width;
+  uint32_t input_w_1 = input->width - 1;
+  uint32_t input_h_1 = input->height - 1;
+  uint32_t w_1 = output->width - 1;
+  uint32_t h_1 = output->height - 1;
+
+  size_t vl;
+  size_t n = output->height * output->width;
+  // auxiliary variables
+  vuint32m8_t vx, vy, vz, vid, vp, vq;
+  // neiboring x & y coordinates
+  vuint32m8_t vx_l, vy_l, vx_h, vy_h;
+  // weights of neighbors
+  vuint32m8_t vx_weight, vy_weight, vx_weight_1minus, vy_weight_1minus;
+  // neighboring data points
+  vuint32m8_t va, vb, vc, vd;
+  vuint32m8_t vo;      // 32bit output
+  vuint16m4_t vo_16b;  // 16bit output
+  vuint8m2_t vo_8b;    // 8bit output
+
+  for (uint16_t c = 0; c < output->num_channels; ++c) {
+    pixel_type_t* in = image_row(input, c, 0);
+    uint8_t* out = output->data + c;
+
+    for (size_t i = 0; i < n; i += vl) {
+      vl = vsetvl_e16m4(n - i);
+      vid = vid_v_u32m8(vl);
+      vid = vadd(vid, i, vl);
+
+      vy = vdivu(vid, output->width, vl);
+      vx = vremu(vid, output->width, vl);
+
+      // find neighbors
+      vx_l = vmul(vx, input_w_1, vl);
+      vx_l = vdivu(vx_l, w_1, vl);
+      vx_h = vadd(vx_l, 1, vl);
+      vx_h = vminu(vx_h, input_w_1, vl);  // clamp
+
+      vy_l = vmul(vy, input_h_1, vl);
+      vy_l = vdivu(vy_l, h_1, vl);
+      vy_h = vadd(vy_l, 1, vl);
+      vy_h = vminu(vy_h, input_h_1, vl);  // clamp
+
+      // load a, b, c, d
+      vz = vmul(vy_l, input_width, vl);
+      vz = vadd(vz, vx_l, vl);
+      vz = vsll(vz, 1, vl);  // *2
+      vo_16b = vluxei32(in, vz, vl);
+      va = vwaddu_vx(vo_16b, 0, vl);
+
+      vz = vmul(vy_l, input_width, vl);
+      vz = vadd(vz, vx_h, vl);
+      vz = vsll(vz, 1, vl);  // *2
+      vo_16b = vluxei32(in, vz, vl);
+      vb = vwaddu_vx(vo_16b, 0, vl);
+
+      vz = vmul(vy_h, input_width, vl);
+      vz = vadd(vz, vx_l, vl);
+      vz = vsll(vz, 1, vl);  // *2
+      vo_16b = vluxei32(in, vz, vl);
+      vc = vwaddu_vx(vo_16b, 0, vl);
+
+      vz = vmul(vy_h, input_width, vl);
+      vz = vadd(vz, vx_h, vl);
+      vz = vsll(vz, 1, vl);  // *2
+      vo_16b = vluxei32(in, vz, vl);
+      vd = vwaddu_vx(vo_16b, 0, vl);
+
+      // weights
+      vp = vmul(vx, input_w_1, vl);
+      vq = vmul(vx_l, w_1, vl);
+      vp = vssubu(vp, vq, vl);
+      vp = vsll(vp, params.scale_precision, vl);
+      vx_weight = vdivu(vp, w_1, vl);
+
+      vp = vmul(vy, input_h_1, vl);
+      vq = vmul(vy_l, h_1, vl);
+      vp = vssubu(vp, vq, vl);
+      vp = vsll(vp, params.scale_precision, vl);
+      vy_weight = vdivu(vp, h_1, vl);
+
+      vx_weight_1minus = vrsub(vx_weight, params.scale_fixed_one, vl);
+      vy_weight_1minus = vrsub(vy_weight, params.scale_fixed_one, vl);
+
+      // resized
+      vo = vmul(va, vx_weight_1minus, vl);
+      vo = vsrl(vo, params.scale_precision, vl);
+      vo = vmul(vo, vy_weight_1minus, vl);
+
+      vp = vmul(vb, vx_weight, vl);
+      vp = vsrl(vp, params.scale_precision, vl);
+      vp = vmul(vp, vy_weight_1minus, vl);
+      vo = vadd(vo, vp, vl);
+
+      vp = vmul(vc, vx_weight_1minus, vl);
+      vp = vsrl(vp, params.scale_precision, vl);
+      vp = vmul(vp, vy_weight, vl);
+      vo = vadd(vo, vp, vl);
+
+      vp = vmul(vd, vx_weight, vl);
+      vp = vsrl(vp, params.scale_precision, vl);
+      vp = vmul(vp, vy_weight, vl);
+      vo = vadd(vo, vp, vl);
+
+      // bit shift
+      vo_16b = vnsrl(vo, params.scale_precision, vl);
+      vo_8b = vnsrl(vo_16b, kRawPipelineBpp - kPipeOutputBpp, vl);
+
+      // save
+      vsse8(out + i * output->num_channels, output->num_channels, vo_8b, vl);
+    }
+  }
+}
diff --git a/samples/risp4ml/isp_stages/downscale_rvv_test.cc b/samples/risp4ml/isp_stages/downscale_rvv_test.cc
new file mode 100644
index 0000000..f38e8b8
--- /dev/null
+++ b/samples/risp4ml/isp_stages/downscale_rvv_test.cc
@@ -0,0 +1,210 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Use separate test for RVV as algorithm is implemented differently
+// Implementation based on:
+// https://chao-ji.github.io/jekyll/update/2018/07/19/BilinearResize.html
+
+#include <climits>
+#include <cmath>
+
+#include "pw_unit_test/framework.h"
+#include "samples/risp4ml/common/constants.h"
+#include "samples/risp4ml/common/test_utils.h"
+#include "samples/risp4ml/isp_stages/downscale.h"
+
+static constexpr uint16_t kScalePrecision = 10;
+static constexpr uint32_t kScaleFixedOne = (1 << kScalePrecision);
+static constexpr float kOutBitsShift = 1 << (kRawPipelineBpp - kPipeOutputBpp);
+
+class DownscaleRvvTest : public ::testing::Test {
+ protected:
+  void setup(uint16_t in_ch, uint16_t in_height, uint16_t in_width,
+             uint16_t out_ch, uint16_t out_height, uint16_t out_width) {
+    in_ = image_new(in_ch, in_height, in_width);
+    out_ = imageu8_new(out_ch, out_height, out_width);
+    params_.enable = true;
+    params_.scale_precision = kScalePrecision;
+    params_.scale_fixed_one = kScaleFixedOne;
+  }
+  void TearDown() override {
+    image_delete(in_);
+    imageu8_delete(out_);
+  }
+  ImageU8* imageu8_new(uint16_t num_channels, uint16_t height, uint16_t width);
+  void imageu8_delete(ImageU8* image) {
+    if (image) {
+      if (image->data) free(image->data);
+      free(image);
+    }
+  }
+  pixel_type_t imageu8_pixel_val(ImageU8* image, uint16_t c, uint16_t y,
+                                 uint16_t x) {
+    const uint32_t stride_c = 1;
+    const uint16_t stride_y = image->num_channels * image->width;
+    const uint16_t stride_x = image->num_channels;
+
+    return *(image->data + c * stride_c + y * stride_y + x * stride_x);
+  }
+  float ExpectedOut(uint16_t y, uint16_t x);
+  void ScaleRampImageTest(uint16_t output_width, uint16_t output_height,
+                          uint16_t input_width = 640,
+                          uint16_t input_height = 480);
+  struct BilinearScaleTestValues {
+    uint16_t output_width;
+    uint16_t output_height;
+    uint16_t input_width;
+    uint16_t input_height;
+  };
+
+  Image* in_;
+  ImageU8* out_;
+  DownscaleParams params_;
+};
+
+ImageU8* DownscaleRvvTest::imageu8_new(uint16_t num_channels, uint16_t height,
+                                       uint16_t width) {
+  ImageU8* image = (ImageU8*)malloc(sizeof(ImageU8));
+  if (image) {
+    image->num_channels = num_channels;
+    image->height = height;
+    image->width = width;
+    uint32_t num_pixels = width * height * num_channels;
+    image->data = (uint8_t*)malloc(num_pixels * sizeof(uint8_t));
+  }
+  return image;
+}
+
+float DownscaleRvvTest::ExpectedOut(uint16_t y, uint16_t x) {
+  float x_ratio = ((float)in_->width - 1) / (out_->width - 1);
+  float y_ratio = ((float)in_->height - 1) / (out_->height - 1);
+
+  uint32_t x_l = (uint32_t)(x_ratio * x);
+  uint32_t x_h = (x_l == in_->width - 1) ? x_l : x_l + 1;
+  uint32_t y_l = y_ratio * y;
+  uint32_t y_h = y_l == in_->height - 1 ? y_l : y_l + 1;
+  float x_weight = (x_ratio * x) - x_l;
+  float y_weight = (y_ratio * y) - y_l;
+
+  pixel_type_t a = image_pixel_val(in_, 0, y_l, x_l);
+  pixel_type_t b = image_pixel_val(in_, 0, y_l, x_h);
+  pixel_type_t c = image_pixel_val(in_, 0, y_h, x_l);
+  pixel_type_t d = image_pixel_val(in_, 0, y_h, x_h);
+
+  float expected_out = a * (1 - x_weight) * (1 - y_weight) +
+                       b * x_weight * (1 - y_weight) +
+                       c * y_weight * (1 - x_weight) + d * x_weight * y_weight;
+
+  expected_out = floorf(expected_out / kOutBitsShift);
+
+  return expected_out;
+}
+
+// Helper function for 2D ramp tests. image is downscaled successfully.
+void DownscaleRvvTest::ScaleRampImageTest(uint16_t output_width,
+                                          uint16_t output_height,
+                                          uint16_t input_width,
+                                          uint16_t input_height) {
+  constexpr int kTolerance = 1;  // Tolerance for rounding error.
+  setup(1, input_height, input_width, 1, output_height, output_width);
+
+  // Fill in_ images as 2D ramp whose values are increased from the
+  // top-left corner to the bottom-right corner.
+  for (uint16_t y = 0; y < input_height; ++y) {
+    for (uint16_t x = 0; x < input_width; ++x) {
+      *image_pixel(in_, 0, y, x) = (y * input_width + x) % (1024);
+    }
+  }
+
+  set_downscale_param(&params_);
+  downscale_process(in_, out_);
+
+  for (uint16_t y = 0; y < output_height; ++y) {
+    for (uint16_t x = 0; x < output_width; ++x) {
+      float expected_out = ExpectedOut(y, x);
+      float diff =
+          std::abs((float)imageu8_pixel_val(out_, 0, y, x) - expected_out);
+      ASSERT_LE(diff, kTolerance);
+    }
+  }
+}
+
+TEST_F(DownscaleRvvTest, NoScaleTest) {
+  constexpr uint16_t kOutputWidth = 128;
+  constexpr uint16_t kInputHeight = 96;
+  setup(1, kInputHeight, kOutputWidth, 1, kInputHeight, kOutputWidth);
+
+  // Generate random image.
+  InitImageRandom(in_, 0, USHRT_MAX);
+
+  set_downscale_param(&params_);
+  downscale_process(in_, out_);
+
+  // Verify the out_ image is identical to the in_ image.
+  for (uint16_t y = 0; y < kInputHeight; ++y) {
+    for (uint16_t x = 0; x < kOutputWidth; ++x) {
+      ASSERT_EQ(imageu8_pixel_val(out_, 0, y, x),
+                static_cast<pixel_type_t>(
+                    floorf(image_pixel_val(in_, 0, y, x) >>
+                           (kRawPipelineBpp - kPipeOutputBpp))));
+    }
+  }
+}
+
+TEST_F(DownscaleRvvTest, DownscaleRvvTest) {
+  std::vector<BilinearScaleTestValues> tests = {
+      {8, 12, 64, 64},     {320, 240, 640, 480}, {80, 60, 640, 480},
+      {220, 95, 640, 480}, {415, 125, 640, 480}, {122, 13, 200, 100}};
+
+  for (const auto& test : tests) {
+    ScaleRampImageTest(test.output_width, test.output_height, test.input_width,
+                       test.input_height);
+  }
+}
+
+TEST_F(DownscaleRvvTest, Trivial3DTest) {
+  constexpr uint16_t kChannels = 3;
+  constexpr uint16_t kInputHeight = 5;
+  constexpr uint16_t kInputWidth = 5;
+
+  constexpr uint16_t kVerScale = 2;
+  constexpr uint16_t kHorScale = 2;
+
+  constexpr uint16_t kOutputHeight = (kInputHeight - 1) / kVerScale + 1;
+  constexpr uint16_t kOutputWidth = (kInputWidth - 1) / kHorScale + 1;
+
+  setup(kChannels, kInputHeight, kInputWidth, kChannels, kOutputHeight,
+        kOutputWidth);
+
+  for (uint16_t c = 0; c < kChannels; ++c) {
+    for (uint16_t y = 0; y < kInputHeight; ++y) {
+      for (uint16_t x = 0; x < kInputWidth; ++x) {
+        *image_pixel(in_, c, y, x) = ((y * kInputWidth + x) * 10 + c) << 8;
+      }
+    }
+  }
+
+  set_downscale_param(&params_);
+  downscale_process(in_, out_);
+
+  // for exact integer ratios out_ is just downsampled in_
+  for (uint16_t c = 0; c < kChannels; ++c) {
+    for (uint16_t y = 0; y < kOutputHeight; ++y) {
+      for (uint16_t x = 0; x < kOutputWidth; ++x) {
+        ASSERT_EQ(imageu8_pixel_val(out_, c, y, x),
+                  image_pixel_val(in_, c, y * kVerScale, x * kHorScale) >> 8);
+      }
+    }
+  }
+}
diff --git a/samples/risp4ml/isp_stages/gamma.h b/samples/risp4ml/isp_stages/gamma.h
index c3044ac..24b42cd 100644
--- a/samples/risp4ml/isp_stages/gamma.h
+++ b/samples/risp4ml/isp_stages/gamma.h
@@ -23,7 +23,11 @@
 extern "C" {
 #endif  // __cplusplus
 
+#ifndef ISP_WITH_RVV
 #define kGammaNumberPoints 81
+#else
+#define kGammaNumberPoints 2049
+#endif
 
 typedef struct {
   bool enable;
diff --git a/samples/risp4ml/isp_stages/gamma_rvv.c b/samples/risp4ml/isp_stages/gamma_rvv.c
new file mode 100644
index 0000000..a0f5503
--- /dev/null
+++ b/samples/risp4ml/isp_stages/gamma_rvv.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <riscv_vector.h>
+
+#include "samples/risp4ml/common/utils.h"
+#include "samples/risp4ml/isp_stages/gamma.h"
+
+#define kRgbColorChannels 3
+
+static const uint16_t kRgbPipelineBpp = 16;
+static const uint16_t kRgbPipelineMaxVal = (1 << kRgbPipelineBpp) - 1;
+static const uint16_t kGammaShiftBits = 5;
+static const uint16_t kGammaSpacing = (1 << kGammaShiftBits);
+static GammaParams gamma_params = {
+    .enable = true,
+    .lut = {
+        0,     413,   827,   1240,  1654,  2067,  2481,  2884,  3255,  3600,
+        3923,  4228,  4518,  4793,  5056,  5309,  5552,  5786,  6013,  6232,
+        6444,  6651,  6851,  7047,  7237,  7423,  7605,  7783,  7956,  8127,
+        8294,  8457,  8618,  8776,  8931,  9083,  9233,  9380,  9525,  9668,
+        9809,  9948,  10084, 10219, 10352, 10484, 10613, 10741, 10868, 10992,
+        11116, 11238, 11358, 11478, 11595, 11712, 11828, 11942, 12055, 12167,
+        12278, 12387, 12496, 12604, 12711, 12816, 12921, 13025, 13128, 13230,
+        13331, 13432, 13531, 13630, 13728, 13825, 13922, 14017, 14112, 14207,
+        14300, 14393, 14485, 14577, 14668, 14758, 14848, 14937, 15026, 15113,
+        15201, 15288, 15374, 15459, 15545, 15629, 15713, 15797, 15880, 15963,
+        16045, 16126, 16208, 16288, 16368, 16448, 16528, 16607, 16685, 16763,
+        16841, 16918, 16995, 17071, 17147, 17223, 17298, 17373, 17448, 17522,
+        17596, 17669, 17742, 17815, 17887, 17959, 18031, 18102, 18173, 18244,
+        18315, 18385, 18454, 18524, 18593, 18662, 18731, 18799, 18867, 18935,
+        19002, 19069, 19136, 19203, 19269, 19335, 19401, 19466, 19531, 19596,
+        19661, 19726, 19790, 19854, 19918, 19981, 20045, 20108, 20170, 20233,
+        20295, 20357, 20419, 20481, 20543, 20604, 20665, 20726, 20786, 20847,
+        20907, 20967, 21027, 21086, 21145, 21205, 21264, 21322, 21381, 21439,
+        21498, 21556, 21613, 21671, 21728, 21786, 21843, 21900, 21956, 22013,
+        22069, 22126, 22182, 22238, 22293, 22349, 22404, 22459, 22514, 22569,
+        22624, 22679, 22733, 22787, 22841, 22895, 22949, 23003, 23056, 23110,
+        23163, 23216, 23269, 23321, 23374, 23426, 23479, 23531, 23583, 23635,
+        23687, 23738, 23790, 23841, 23892, 23943, 23994, 24045, 24096, 24146,
+        24197, 24247, 24297, 24347, 24397, 24447, 24497, 24546, 24596, 24645,
+        24694, 24743, 24792, 24841, 24890, 24938, 24987, 25035, 25083, 25132,
+        25180, 25228, 25275, 25323, 25371, 25418, 25466, 25513, 25560, 25607,
+        25654, 25701, 25747, 25794, 25841, 25887, 25933, 25980, 26026, 26072,
+        26118, 26163, 26209, 26255, 26300, 26346, 26391, 26436, 26481, 26527,
+        26571, 26616, 26661, 26706, 26750, 26795, 26839, 26884, 26928, 26972,
+        27016, 27060, 27104, 27148, 27191, 27235, 27278, 27322, 27365, 27408,
+        27452, 27495, 27538, 27581, 27623, 27666, 27709, 27751, 27794, 27836,
+        27879, 27921, 27963, 28005, 28047, 28089, 28131, 28173, 28215, 28256,
+        28298, 28339, 28381, 28422, 28464, 28505, 28546, 28587, 28628, 28669,
+        28710, 28750, 28791, 28832, 28872, 28913, 28953, 28994, 29034, 29074,
+        29114, 29154, 29194, 29234, 29274, 29314, 29353, 29393, 29433, 29472,
+        29512, 29551, 29590, 29630, 29669, 29708, 29747, 29786, 29825, 29864,
+        29903, 29941, 29980, 30019, 30057, 30096, 30134, 30173, 30211, 30249,
+        30287, 30326, 30364, 30402, 30440, 30477, 30515, 30553, 30591, 30628,
+        30666, 30704, 30741, 30779, 30816, 30853, 30891, 30928, 30965, 31002,
+        31039, 31076, 31113, 31150, 31187, 31223, 31260, 31297, 31333, 31370,
+        31406, 31443, 31479, 31516, 31552, 31588, 31624, 31660, 31696, 31733,
+        31768, 31804, 31840, 31876, 31912, 31948, 31983, 32019, 32054, 32090,
+        32125, 32161, 32196, 32232, 32267, 32302, 32337, 32372, 32407, 32442,
+        32477, 32512, 32547, 32582, 32617, 32652, 32686, 32721, 32756, 32790,
+        32825, 32859, 32894, 32928, 32962, 32997, 33031, 33065, 33099, 33133,
+        33167, 33202, 33235, 33269, 33303, 33337, 33371, 33405, 33438, 33472,
+        33506, 33539, 33573, 33606, 33640, 33673, 33707, 33740, 33773, 33807,
+        33840, 33873, 33906, 33939, 33972, 34005, 34038, 34071, 34104, 34137,
+        34170, 34203, 34235, 34268, 34301, 34333, 34366, 34398, 34431, 34463,
+        34496, 34528, 34560, 34593, 34625, 34657, 34689, 34722, 34754, 34786,
+        34818, 34850, 34882, 34914, 34946, 34977, 35009, 35041, 35073, 35104,
+        35136, 35168, 35199, 35231, 35262, 35294, 35325, 35357, 35388, 35419,
+        35451, 35482, 35513, 35545, 35576, 35607, 35638, 35669, 35700, 35731,
+        35762, 35793, 35824, 35855, 35886, 35916, 35947, 35978, 36008, 36039,
+        36070, 36100, 36131, 36161, 36192, 36222, 36253, 36283, 36314, 36344,
+        36374, 36405, 36435, 36465, 36495, 36525, 36555, 36586, 36616, 36646,
+        36676, 36706, 36735, 36765, 36795, 36825, 36855, 36885, 36914, 36944,
+        36974, 37003, 37033, 37063, 37092, 37122, 37151, 37181, 37210, 37239,
+        37269, 37298, 37328, 37357, 37386, 37415, 37444, 37474, 37503, 37532,
+        37561, 37590, 37619, 37648, 37677, 37706, 37735, 37764, 37793, 37821,
+        37850, 37879, 37908, 37937, 37965, 37994, 38023, 38051, 38080, 38108,
+        38137, 38165, 38194, 38222, 38251, 38279, 38307, 38336, 38364, 38392,
+        38421, 38449, 38477, 38505, 38533, 38561, 38590, 38618, 38646, 38674,
+        38702, 38730, 38758, 38785, 38813, 38841, 38869, 38897, 38925, 38952,
+        38980, 39008, 39036, 39063, 39091, 39118, 39146, 39174, 39201, 39229,
+        39256, 39284, 39311, 39338, 39366, 39393, 39421, 39448, 39475, 39502,
+        39530, 39557, 39584, 39611, 39638, 39666, 39693, 39720, 39747, 39774,
+        39801, 39828, 39855, 39882, 39909, 39935, 39962, 39989, 40016, 40043,
+        40070, 40096, 40123, 40150, 40176, 40203, 40230, 40256, 40283, 40310,
+        40336, 40363, 40389, 40416, 40442, 40468, 40495, 40521, 40548, 40574,
+        40600, 40627, 40653, 40679, 40705, 40732, 40758, 40784, 40810, 40836,
+        40862, 40888, 40914, 40941, 40967, 40993, 41019, 41044, 41070, 41096,
+        41122, 41148, 41174, 41200, 41226, 41251, 41277, 41303, 41329, 41354,
+        41380, 41406, 41431, 41457, 41483, 41508, 41534, 41559, 41585, 41610,
+        41636, 41661, 41687, 41712, 41737, 41763, 41788, 41814, 41839, 41864,
+        41889, 41915, 41940, 41965, 41990, 42016, 42041, 42066, 42091, 42116,
+        42141, 42166, 42191, 42216, 42241, 42266, 42291, 42316, 42341, 42366,
+        42391, 42416, 42441, 42466, 42490, 42515, 42540, 42565, 42590, 42614,
+        42639, 42664, 42688, 42713, 42738, 42762, 42787, 42812, 42836, 42861,
+        42885, 42910, 42934, 42959, 42983, 43008, 43032, 43056, 43081, 43105,
+        43129, 43154, 43178, 43202, 43227, 43251, 43275, 43299, 43324, 43348,
+        43372, 43396, 43420, 43444, 43469, 43493, 43517, 43541, 43565, 43589,
+        43613, 43637, 43661, 43685, 43709, 43733, 43756, 43780, 43804, 43828,
+        43852, 43876, 43899, 43923, 43947, 43971, 43994, 44018, 44042, 44066,
+        44089, 44113, 44137, 44160, 44184, 44207, 44231, 44254, 44278, 44302,
+        44325, 44348, 44372, 44395, 44419, 44442, 44466, 44489, 44512, 44536,
+        44559, 44582, 44606, 44629, 44652, 44676, 44699, 44722, 44745, 44769,
+        44792, 44815, 44838, 44861, 44884, 44907, 44931, 44954, 44977, 45000,
+        45023, 45046, 45069, 45092, 45115, 45138, 45161, 45184, 45206, 45229,
+        45252, 45275, 45298, 45321, 45344, 45366, 45389, 45412, 45435, 45457,
+        45480, 45503, 45526, 45548, 45571, 45594, 45616, 45639, 45662, 45684,
+        45707, 45729, 45752, 45774, 45797, 45819, 45842, 45864, 45887, 45909,
+        45932, 45954, 45977, 45999, 46021, 46044, 46066, 46088, 46111, 46133,
+        46155, 46178, 46200, 46222, 46244, 46267, 46289, 46311, 46333, 46355,
+        46378, 46400, 46422, 46444, 46466, 46488, 46510, 46532, 46554, 46576,
+        46599, 46621, 46643, 46665, 46686, 46708, 46730, 46752, 46774, 46796,
+        46818, 46840, 46862, 46884, 46905, 46927, 46949, 46971, 46993, 47014,
+        47036, 47058, 47080, 47101, 47123, 47145, 47167, 47188, 47210, 47231,
+        47253, 47275, 47296, 47318, 47339, 47361, 47383, 47404, 47426, 47447,
+        47469, 47490, 47512, 47533, 47555, 47576, 47597, 47619, 47640, 47662,
+        47683, 47704, 47726, 47747, 47768, 47790, 47811, 47832, 47854, 47875,
+        47896, 47917, 47939, 47960, 47981, 48002, 48023, 48045, 48066, 48087,
+        48108, 48129, 48150, 48171, 48192, 48213, 48234, 48256, 48277, 48298,
+        48319, 48340, 48361, 48382, 48403, 48423, 48444, 48465, 48486, 48507,
+        48528, 48549, 48570, 48591, 48612, 48632, 48653, 48674, 48695, 48716,
+        48736, 48757, 48778, 48799, 48819, 48840, 48861, 48881, 48902, 48923,
+        48943, 48964, 48985, 49005, 49026, 49047, 49067, 49088, 49108, 49129,
+        49149, 49170, 49191, 49211, 49232, 49252, 49273, 49293, 49313, 49334,
+        49354, 49375, 49395, 49416, 49436, 49456, 49477, 49497, 49517, 49538,
+        49558, 49578, 49599, 49619, 49639, 49660, 49680, 49700, 49720, 49741,
+        49761, 49781, 49801, 49821, 49842, 49862, 49882, 49902, 49922, 49942,
+        49962, 49982, 50003, 50023, 50043, 50063, 50083, 50103, 50123, 50143,
+        50163, 50183, 50203, 50223, 50243, 50263, 50283, 50303, 50323, 50343,
+        50362, 50382, 50402, 50422, 50442, 50462, 50482, 50501, 50521, 50541,
+        50561, 50581, 50600, 50620, 50640, 50660, 50680, 50699, 50719, 50739,
+        50758, 50778, 50798, 50817, 50837, 50857, 50876, 50896, 50916, 50935,
+        50955, 50974, 50994, 51014, 51033, 51053, 51072, 51092, 51111, 51131,
+        51150, 51170, 51189, 51209, 51228, 51248, 51267, 51287, 51306, 51325,
+        51345, 51364, 51384, 51403, 51422, 51442, 51461, 51480, 51500, 51519,
+        51538, 51558, 51577, 51596, 51616, 51635, 51654, 51673, 51693, 51712,
+        51731, 51750, 51769, 51789, 51808, 51827, 51846, 51865, 51884, 51904,
+        51923, 51942, 51961, 51980, 51999, 52018, 52037, 52056, 52075, 52094,
+        52113, 52132, 52151, 52170, 52189, 52208, 52227, 52246, 52265, 52284,
+        52303, 52322, 52341, 52360, 52379, 52398, 52417, 52436, 52454, 52473,
+        52492, 52511, 52530, 52549, 52568, 52586, 52605, 52624, 52643, 52662,
+        52680, 52699, 52718, 52737, 52755, 52774, 52793, 52811, 52830, 52849,
+        52867, 52886, 52905, 52923, 52942, 52961, 52979, 52998, 53017, 53035,
+        53054, 53072, 53091, 53109, 53128, 53147, 53165, 53184, 53202, 53221,
+        53239, 53258, 53276, 53295, 53313, 53332, 53350, 53369, 53387, 53405,
+        53424, 53442, 53461, 53479, 53497, 53516, 53534, 53553, 53571, 53589,
+        53608, 53626, 53644, 53663, 53681, 53699, 53718, 53736, 53754, 53772,
+        53791, 53809, 53827, 53845, 53864, 53882, 53900, 53918, 53936, 53955,
+        53973, 53991, 54009, 54027, 54045, 54064, 54082, 54100, 54118, 54136,
+        54154, 54172, 54190, 54208, 54226, 54244, 54263, 54281, 54299, 54317,
+        54335, 54353, 54371, 54389, 54407, 54425, 54443, 54461, 54479, 54496,
+        54514, 54532, 54550, 54568, 54586, 54604, 54622, 54640, 54658, 54676,
+        54693, 54711, 54729, 54747, 54765, 54783, 54800, 54818, 54836, 54854,
+        54872, 54889, 54907, 54925, 54943, 54960, 54978, 54996, 55014, 55031,
+        55049, 55067, 55084, 55102, 55120, 55138, 55155, 55173, 55191, 55208,
+        55226, 55243, 55261, 55279, 55296, 55314, 55331, 55349, 55367, 55384,
+        55402, 55419, 55437, 55454, 55472, 55490, 55507, 55525, 55542, 55560,
+        55577, 55595, 55612, 55629, 55647, 55664, 55682, 55699, 55717, 55734,
+        55752, 55769, 55786, 55804, 55821, 55839, 55856, 55873, 55891, 55908,
+        55925, 55943, 55960, 55977, 55995, 56012, 56029, 56047, 56064, 56081,
+        56098, 56116, 56133, 56150, 56168, 56185, 56202, 56219, 56236, 56254,
+        56271, 56288, 56305, 56322, 56340, 56357, 56374, 56391, 56408, 56425,
+        56443, 56460, 56477, 56494, 56511, 56528, 56545, 56562, 56579, 56597,
+        56614, 56631, 56648, 56665, 56682, 56699, 56716, 56733, 56750, 56767,
+        56784, 56801, 56818, 56835, 56852, 56869, 56886, 56903, 56920, 56937,
+        56954, 56971, 56988, 57004, 57021, 57038, 57055, 57072, 57089, 57106,
+        57123, 57140, 57156, 57173, 57190, 57207, 57224, 57241, 57257, 57274,
+        57291, 57308, 57325, 57341, 57358, 57375, 57392, 57408, 57425, 57442,
+        57459, 57475, 57492, 57509, 57526, 57542, 57559, 57576, 57592, 57609,
+        57626, 57642, 57659, 57676, 57692, 57709, 57726, 57742, 57759, 57776,
+        57792, 57809, 57825, 57842, 57859, 57875, 57892, 57908, 57925, 57941,
+        57958, 57975, 57991, 58008, 58024, 58041, 58057, 58074, 58090, 58107,
+        58123, 58140, 58156, 58173, 58189, 58206, 58222, 58238, 58255, 58271,
+        58288, 58304, 58321, 58337, 58353, 58370, 58386, 58403, 58419, 58435,
+        58452, 58468, 58484, 58501, 58517, 58533, 58550, 58566, 58582, 58599,
+        58615, 58631, 58648, 58664, 58680, 58697, 58713, 58729, 58745, 58762,
+        58778, 58794, 58810, 58827, 58843, 58859, 58875, 58891, 58908, 58924,
+        58940, 58956, 58972, 58989, 59005, 59021, 59037, 59053, 59069, 59085,
+        59102, 59118, 59134, 59150, 59166, 59182, 59198, 59214, 59230, 59246,
+        59263, 59279, 59295, 59311, 59327, 59343, 59359, 59375, 59391, 59407,
+        59423, 59439, 59455, 59471, 59487, 59503, 59519, 59535, 59551, 59567,
+        59583, 59599, 59615, 59631, 59647, 59663, 59678, 59694, 59710, 59726,
+        59742, 59758, 59774, 59790, 59806, 59822, 59837, 59853, 59869, 59885,
+        59901, 59917, 59933, 59948, 59964, 59980, 59996, 60012, 60027, 60043,
+        60059, 60075, 60091, 60106, 60122, 60138, 60154, 60169, 60185, 60201,
+        60217, 60232, 60248, 60264, 60280, 60295, 60311, 60327, 60342, 60358,
+        60374, 60389, 60405, 60421, 60436, 60452, 60468, 60483, 60499, 60515,
+        60530, 60546, 60561, 60577, 60593, 60608, 60624, 60640, 60655, 60671,
+        60686, 60702, 60717, 60733, 60749, 60764, 60780, 60795, 60811, 60826,
+        60842, 60857, 60873, 60888, 60904, 60919, 60935, 60950, 60966, 60981,
+        60997, 61012, 61028, 61043, 61059, 61074, 61089, 61105, 61120, 61136,
+        61151, 61167, 61182, 61197, 61213, 61228, 61244, 61259, 61274, 61290,
+        61305, 61320, 61336, 61351, 61366, 61382, 61397, 61412, 61428, 61443,
+        61458, 61474, 61489, 61504, 61520, 61535, 61550, 61566, 61581, 61596,
+        61611, 61627, 61642, 61657, 61672, 61688, 61703, 61718, 61733, 61749,
+        61764, 61779, 61794, 61809, 61825, 61840, 61855, 61870, 61885, 61901,
+        61916, 61931, 61946, 61961, 61976, 61991, 62007, 62022, 62037, 62052,
+        62067, 62082, 62097, 62112, 62128, 62143, 62158, 62173, 62188, 62203,
+        62218, 62233, 62248, 62263, 62278, 62293, 62308, 62323, 62338, 62353,
+        62369, 62384, 62399, 62414, 62429, 62444, 62459, 62474, 62489, 62504,
+        62518, 62533, 62548, 62563, 62578, 62593, 62608, 62623, 62638, 62653,
+        62668, 62683, 62698, 62713, 62728, 62743, 62757, 62772, 62787, 62802,
+        62817, 62832, 62847, 62862, 62876, 62891, 62906, 62921, 62936, 62951,
+        62966, 62980, 62995, 63010, 63025, 63040, 63054, 63069, 63084, 63099,
+        63114, 63128, 63143, 63158, 63173, 63188, 63202, 63217, 63232, 63247,
+        63261, 63276, 63291, 63306, 63320, 63335, 63350, 63364, 63379, 63394,
+        63408, 63423, 63438, 63453, 63467, 63482, 63497, 63511, 63526, 63541,
+        63555, 63570, 63585, 63599, 63614, 63628, 63643, 63658, 63672, 63687,
+        63702, 63716, 63731, 63745, 63760, 63774, 63789, 63804, 63818, 63833,
+        63847, 63862, 63876, 63891, 63906, 63920, 63935, 63949, 63964, 63978,
+        63993, 64007, 64022, 64036, 64051, 64065, 64080, 64094, 64109, 64123,
+        64138, 64152, 64167, 64181, 64196, 64210, 64224, 64239, 64253, 64268,
+        64282, 64297, 64311, 64326, 64340, 64354, 64369, 64383, 64398, 64412,
+        64426, 64441, 64455, 64470, 64484, 64498, 64513, 64527, 64541, 64556,
+        64570, 64584, 64599, 64613, 64627, 64642, 64656, 64670, 64685, 64699,
+        64713, 64728, 64742, 64756, 64770, 64785, 64799, 64813, 64828, 64842,
+        64856, 64870, 64885, 64899, 64913, 64927, 64942, 64956, 64970, 64984,
+        64999, 65013, 65027, 65041, 65055, 65070, 65084, 65098, 65112, 65126,
+        65141, 65155, 65169, 65183, 65197, 65211, 65226, 65240, 65254, 65268,
+        65282, 65296, 65310, 65325, 65339, 65353, 65367, 65381, 65395, 65409,
+        65423, 65437, 65452, 65466, 65480, 65494, 65508, 65522, 65535}};
+
+void set_gamma_params(GammaParams* params) { gamma_params = *params; }
+
+void gamma_process(Image* img) {
+  if (!gamma_params.enable) return;
+
+  size_t vl;
+  size_t n = img->height * img->width * img->num_channels;
+  // auxiliary variables
+  vuint16m8_t vx, vy;
+  vuint16m8_t v_offset;
+  vbool2_t mask;
+  uint16_t* x;
+
+  for (size_t i = 0; i < n; i += vl) {
+    x = img->data + i;
+
+    vl = vsetvl_e16m8(n - i);
+    vx = vle16_v_u16m8(x, vl);  // load
+    mask = vmsne(vx, kRgbPipelineMaxVal,
+                 vl);  // mask to exclude kRgbPipelineMaxVal
+
+    vy = vsrl(vx, kGammaShiftBits, vl);          // 1/32
+    vy = vsll(vy, 1, vl);                        // *2
+    v_offset = vand(vx, kGammaSpacing - 1, vl);  // offset within bin
+
+    vx = vluxei16(gamma_params.lut, vy, vl);      // left
+    vy = vluxei16(gamma_params.lut + 1, vy, vl);  // right
+
+    vy = vsub(vy, vx, vl);               // right - left
+    vy = vmul(vy, v_offset, vl);         // multiply offset_within_bin
+    vy = vsrl(vy, kGammaShiftBits, vl);  // 1/32
+    vx = vadd(vx, vy, vl);               // add
+
+    vse16(mask, x, vx, vl);  // save
+  }
+}
diff --git a/samples/risp4ml/isp_stages/gamma_rvv_test.cc b/samples/risp4ml/isp_stages/gamma_rvv_test.cc
new file mode 100644
index 0000000..e379e6d
--- /dev/null
+++ b/samples/risp4ml/isp_stages/gamma_rvv_test.cc
@@ -0,0 +1,141 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Use separate test for RVV as gamma LUT is generated differently
+#include <cmath>
+
+#include "pw_unit_test/framework.h"
+#include "samples/risp4ml/isp_stages/gamma.h"
+
+static constexpr uint16_t kRgbPipelineBpp = 16;
+static constexpr uint16_t kPostGammaPipelineBpp = 16;
+static constexpr uint16_t kRgbPipelineMaxVal = (1 << kRgbPipelineBpp) - 1;
+static constexpr uint16_t kGammaShiftBits = 5;
+static constexpr uint16_t kGammaSpacing = (1 << kGammaShiftBits);
+
+class GammaRvvTest : public ::testing::Test {
+ protected:
+  void setup(uint16_t width) {
+    in_ = image_new(3, 2, width);
+    for (uint16_t c = 0; c < in_->num_channels; ++c) {
+      for (uint16_t y = 0; y < in_->height; ++y) {
+        for (uint16_t x = 0; x < in_->width; ++x) {
+          *image_pixel(in_, c, y, x) = x;
+        }
+      }
+    }
+    out_ = image_new(3, 2, width);
+    const uint32_t num_bytes =
+        in_->num_channels * in_->height * in_->width * sizeof(pixel_type_t);
+    memcpy(out_->data, in_->data, num_bytes);
+  }
+  void TearDown() override {
+    image_delete(in_);
+    image_delete(out_);
+  }
+  void CreateLinearGamma(GammaParams* params) {
+    params->enable = true;
+    for (int n = 0; n <= kRgbPipelineMaxVal; n += kGammaSpacing) {
+      params->lut[n / kGammaSpacing] = n;
+    }
+    params->lut[kGammaNumberPoints - 1] = kRgbPipelineMaxVal;
+  }
+  float sRgb_gamma(float in_) {
+    return (in_ < 0.0031308f) ? 12.92f * in_
+                              : 1.055f * std::pow(in_, 1.0f / 2.4f) - 0.055f;
+  }
+  void CreateRgbGamma(GammaParams* params) {
+    params->enable = true;
+    params->lut[0] = 0;
+
+    for (int n = 0; n <= kRgbPipelineMaxVal; n += kGammaSpacing) {
+      params->lut[n / kGammaSpacing] =
+          (1 << kRgbPipelineBpp) *
+          sRgb_gamma((float)n / (1 << kRgbPipelineBpp));
+    }
+    params->lut[kGammaNumberPoints - 1] = kRgbPipelineMaxVal;
+  }
+
+  Image* in_;
+  Image* out_;
+};
+
+TEST_F(GammaRvvTest, Bypass) {
+  setup((1 << 15) - 1);
+
+  GammaParams params;
+  CreateRgbGamma(&params);
+  params.enable = false;
+
+  set_gamma_params(&params);
+
+  gamma_process(out_);
+
+  for (uint16_t c = 0; c < in_->num_channels; ++c) {
+    for (uint16_t y = 0; y < in_->height; ++y) {
+      for (uint16_t x = 0; x < in_->width; ++x) {
+        pixel_type_t expected_val =
+            x >> (kRgbPipelineBpp - kPostGammaPipelineBpp);
+        ASSERT_EQ(expected_val, image_pixel_val(out_, c, y, x));
+      }
+    }
+  }
+}
+
+TEST_F(GammaRvvTest, Linear) {
+  setup((1 << 15) - 2);
+
+  GammaParams params;
+  CreateLinearGamma(&params);
+
+  set_gamma_params(&params);
+
+  gamma_process(out_);
+
+  for (uint16_t c = 0; c < in_->num_channels; ++c) {
+    for (uint16_t y = 0; y < in_->height; ++y) {
+      for (uint16_t x = 0; x < in_->width; ++x) {
+        ASSERT_EQ(image_pixel_val(out_, 0, y, x),
+                  image_pixel_val(in_, 0, y, x));
+      }
+    }
+  }
+}
+
+TEST_F(GammaRvvTest, sRgbLUT) {
+  setup((1 << 15) - 1);
+
+  constexpr float kToleranceRatio = 0.03;
+
+  GammaParams params;
+  CreateRgbGamma(&params);
+  set_gamma_params(&params);
+
+  gamma_process(out_);
+
+  for (uint16_t c = 0; c < in_->num_channels; ++c) {
+    for (uint16_t y = 0; y < in_->height; ++y) {
+      for (uint16_t x = 0; x < in_->width; ++x) {
+        pixel_type_t expected_val =
+            (pixel_type_t)((1 << kRgbPipelineBpp) *
+                           sRgb_gamma(static_cast<float>(x) /
+                                      (1 << kRgbPipelineBpp)));
+        float tolerance = ceilf(kToleranceRatio * expected_val);
+        float diff = std::abs((float)expected_val -
+                              (float)image_pixel_val(out_, c, y, x));
+        ASSERT_LE(diff, tolerance);
+      }
+    }
+  }
+}
diff --git a/samples/risp4ml/isp_stages/wbg_rvv.c b/samples/risp4ml/isp_stages/wbg_rvv.c
new file mode 100644
index 0000000..a628f66
--- /dev/null
+++ b/samples/risp4ml/isp_stages/wbg_rvv.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2022 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <riscv_vector.h>
+
+#include "samples/risp4ml/common/utils.h"
+#include "samples/risp4ml/isp_stages/wbg.h"
+
+#define kBayerColorChannels 4
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+
+static const uint16_t kWbgFractional = kRawPipelineFraction;
+static const uint16_t kWbgUnityGain = 1 << kWbgFractional;
+static WbgParams wbg_params = {
+    .enable = true,
+    .fixed = false,
+    .gains = {kWbgUnityGain, kWbgUnityGain, kWbgUnityGain, kWbgUnityGain}};
+
+void set_wbg_params(WbgParams* params) { wbg_params = *params; }
+
+static void compute_wbg_gain(Image* img) {
+  // Calculate the white-balance gain values using the "gray world" algorithm
+  uint32_t size = img->num_channels * img->height * img->width;
+  uint64_t sum_of_reds = 0;
+  // will use only one of the greens for scaling, since the difference between
+  // the two green sensor pixels is negligible
+  uint64_t sum_of_greens = 0;
+  uint64_t sum_of_blues = 0;
+  size_t vl;
+  // auxiliary variables
+  vuint16m8_t vx;
+  vuint32m1_t vy;
+
+  for (uint16_t y = 0; y < img->height; ++y) {
+    pixel_type_t* line = image_row(img, 0, y);
+    for (uint8_t n = 0; n < 2; n++) {
+      for (uint16_t x = n; x < img->width; x += 2 * vl) {
+        size_t avl = (img->width + 1 - x) / 2;
+        vl = vsetvl_e16m8(avl);
+        vx = vlse16_v_u16m8(line + x, 2 * sizeof(uint16_t), vl);  // load
+
+        vy = vmv_v_x_u32m1(0, vl);       // init
+        vy = vwredsumu(vy, vx, vy, vl);  // sum
+        uint32_t sum = vmv_x(vy);
+        if ((y & 0x1) == 0 && n == 0) {
+          sum_of_reds += sum;
+        } else if ((y & 0x1) == 1 && n == 1) {
+          sum_of_blues += sum;
+        } else {
+          sum_of_greens += sum;
+        }
+      }
+    }
+  }
+
+  // scale values to green channel
+  float average_red = 4.0 * sum_of_reds / size;
+  float average_green = 2.0 * sum_of_greens / size;
+  float average_blue = 4.0 * sum_of_blues / size;
+
+  float max_average = MAX(MAX(average_red, average_green), average_blue);
+
+  // Convert the float value to fixed point representation, i.e. 0xFF.FF
+  uint32_t red_wb = FloatToFixedPoint(max_average / average_red,
+                                      kRawPipelineInteger, kRawPipelineFraction,
+                                      /*bool is_signed*/ false);
+  uint32_t green_wb = FloatToFixedPoint(
+      max_average / average_green, kRawPipelineInteger, kRawPipelineFraction,
+      /*bool is_signed*/ false);
+  uint32_t blue_wb = FloatToFixedPoint(
+      max_average / average_blue, kRawPipelineInteger, kRawPipelineFraction,
+      /*bool is_signed*/ false);
+
+  wbg_params.gains[0] = red_wb;
+  wbg_params.gains[1] = green_wb;
+  wbg_params.gains[2] = green_wb;
+  wbg_params.gains[3] = blue_wb;
+}
+
+void wbg_process(Image* img) {
+  if (!wbg_params.enable) return;
+  if (!wbg_params.fixed) {
+    compute_wbg_gain(img);
+  }
+
+  size_t vl;
+  uint32_t offset = 1 << (kWbgFractional - 1);
+  uint32_t max_val = kRawPipelineMaxVal << kWbgFractional;
+  uint16_t gain;
+  // auxiliary variables
+  vuint16m4_t vx;
+  vuint32m8_t vy;
+  for (uint16_t y = 0; y < img->height; ++y) {
+    pixel_type_t* line = image_row(img, 0, y);
+    for (uint8_t n = 0; n < 2; n++) {
+      gain = (y & 0x1) ? wbg_params.gains[2 + n] : wbg_params.gains[n];
+      for (uint16_t x = n; x < img->width; x += 2 * vl) {
+        size_t avl = (img->width + 1 - x) / 2;
+        vl = vsetvl_e16m4(avl);
+        vx = vlse16_v_u16m4(line + x, 2 * sizeof(uint16_t), vl);  // load
+        vy = vwmulu(vx, gain, vl);                                // multiply
+        vy = vadd(vy, offset, vl);                                // add
+        vy = vminu(vy, max_val, vl);                              // clamp
+        vx = vnsrl(vy, kWbgFractional, vl);                       // bit shift
+        vsse16(line + x, 2 * sizeof(uint16_t), vx, vl);           // save
+      }
+    }
+  }
+}