Add tests for RVV reduction operations

- Tests vredmin, vredmax, vredsum.
- Also uses vmv to move from vector->scalar, so some additional coverage
  for that.

Change-Id: I6f6bd52cc0bbb012279d4635a34d25cc0727f67b
diff --git a/tests/cocotb/BUILD b/tests/cocotb/BUILD
index 1f6dc48..8489fb4 100644
--- a/tests/cocotb/BUILD
+++ b/tests/cocotb/BUILD
@@ -184,8 +184,8 @@
         "//tests/cocotb/rvv:rvv_add.elf",
         "//tests/cocotb/rvv:vstart_store.elf"] + [
         "//tests/cocotb/rvv/arithmetics:rvv_{}_{}_m1.elf".format(TEST_OP, DTYPE)
-        for DTYPE in ["int8", "int16", "int32", "uint8", "uint16", "uint32"]
-        for TEST_OP in ["add", "sub", "mul", "div"]
+            for DTYPE in ["int8", "int16", "int32", "uint8", "uint16", "uint32"]
+            for TEST_OP in ["add", "sub", "mul", "div", "redsum", "redmin", "redmax"]
         ]
 
 verilator_cocotb_test(
@@ -343,3 +343,4 @@
     data = RVV_TEST_BINARY_TARGETS,
     size = "large",
 )
+
diff --git a/tests/cocotb/rvv/arithmetics/BUILD b/tests/cocotb/rvv/arithmetics/BUILD
index bde953f..38d8ed7 100644
--- a/tests/cocotb/rvv/arithmetics/BUILD
+++ b/tests/cocotb/rvv/arithmetics/BUILD
@@ -14,10 +14,12 @@
 
 load("//rules:kelvin_v2.bzl", "kelvin_v2_binary")
 load("//rules:utils.bzl", "template_rule")
-load(":rvv_arithmetic.bzl", "rvv_arithmetic_test")
+load("//tests/cocotb/rvv/arithmetics:rvv_arithmetic.bzl", "rvv_arithmetic_test", "rvv_reduction_test")
 package(default_visibility = ["//visibility:public"])
 
 MATH_OPS = ["add", "sub", "mul","div"]
+REDUCTION_OPS = ["redsum", "redmin", "redmax"]
+
 # tuple format DTYPE (sew, sign, dtype, vl)
 DTYPES = [
     ("8", "i", "int8", "16"),
@@ -27,7 +29,8 @@
     ("16", "u", "uint16", "8"),
     ("32", "u", "uint32", "4"),
 ]
-OP_TYPE_PAIRS = [(op, sew, sign, dtype, vl) for op in MATH_OPS for (sew, sign, dtype, vl) in DTYPES]
+MATH_OP_TYPE_PAIRS = [(op, sew, sign, dtype, vl) for op in MATH_OPS for (sew, sign, dtype, vl) in DTYPES]
+REDUCTION_OP_TYPE_PAIRS = [(op, sew, sign, dtype, vl) for op in REDUCTION_OPS for (sew, sign, dtype, vl) in DTYPES]
 
 # Division has different op code for signed and usigned
 template_rule(
@@ -42,7 +45,24 @@
             "in_data_size": "16",
             "out_data_size": "16",
         }
-        for (op, sew, sign, dtype, vl) in OP_TYPE_PAIRS
+        for (op, sew, sign, dtype, vl) in MATH_OP_TYPE_PAIRS
+    }
+)
+
+template_rule(
+    rvv_reduction_test,
+    {
+        "template_{}_{}_m1".format(op, dtype): {
+            "dtype": dtype,
+            "sew": sew,
+            "sign": sign,
+            "num_operands": vl,
+            # redmin and redmax have different operators for signed/unsigned
+            "reduction_op": op + "u" if ((op == "redmin" or op == "redmax") and dtype[0] == "u") else op,
+            "in_data_size": "16",
+            "out_data_size": "16",
+        }
+        for (op, sew, sign, dtype, vl) in REDUCTION_OP_TYPE_PAIRS
     }
 )
 
@@ -52,6 +72,6 @@
         "rvv_{}_{}_m1".format(op, dtype): {
             "srcs": ["template_{}_{}_m1".format(op, dtype)],
         }
-        for (op, _, _, dtype, _) in OP_TYPE_PAIRS
+        for (op, _, _, dtype, _) in MATH_OP_TYPE_PAIRS + REDUCTION_OP_TYPE_PAIRS
     }
-)
\ No newline at end of file
+)
diff --git a/tests/cocotb/rvv/arithmetics/rvv_arithmetic.bzl b/tests/cocotb/rvv/arithmetics/rvv_arithmetic.bzl
index 6d6a6ce..ff8ca27 100644
--- a/tests/cocotb/rvv/arithmetics/rvv_arithmetic.bzl
+++ b/tests/cocotb/rvv/arithmetics/rvv_arithmetic.bzl
@@ -6,6 +6,12 @@
         **kwargs
     )
 
+def rvv_reduction_test(**kwargs):
+    rvv_reduction_template(
+        source_file = "{name}.cc".format(**kwargs),
+        **kwargs
+    )
+
 def rvv_arithmetic_template_impl(ctx):
     ctx.actions.expand_template(
         template = ctx.file._template,
@@ -21,6 +27,21 @@
         },
     )
 
+def rvv_reduction_template_impl(ctx):
+    ctx.actions.expand_template(
+        template = ctx.file._template,
+        output = ctx.outputs.source_file,
+        substitutions = {
+            "{DTYPE}" : ctx.attr.dtype,
+            "{IN_DATA_SIZE}" : ctx.attr.in_data_size,
+            "{OUT_DATA_SIZE}" : ctx.attr.out_data_size,
+            "{REDUCTION_OP}" : ctx.attr.reduction_op,
+            "{NUM_OPERANDS}" : ctx.attr.num_operands,
+            "{SEW}" : ctx.attr.sew,
+            "{SIGN}" : ctx.attr.sign,
+        },
+    )
+
 rvv_arithmetic_template = rule(
     implementation = rvv_arithmetic_template_impl,
     attrs = {
@@ -37,4 +58,22 @@
         ),
         "source_file": attr.output(mandatory=True)
     }
+)
+
+rvv_reduction_template = rule(
+    implementation = rvv_reduction_template_impl,
+    attrs = {
+        "dtype" : attr.string(mandatory = True),
+        "in_data_size" : attr.string(mandatory = True),
+        "out_data_size" : attr.string(mandatory = True),
+        "reduction_op" : attr.string(mandatory = True),
+        "num_operands" : attr.string(mandatory = True),
+        "sew" : attr.string(mandatory = True),
+        "sign" : attr.string(mandatory = True),
+        "_template": attr.label(
+            default = ":rvv_reduction_template.cc",
+            allow_single_file = True,
+        ),
+        "source_file": attr.output(mandatory=True)
+    }
 )
\ No newline at end of file
diff --git a/tests/cocotb/rvv/arithmetics/rvv_reduction_template.cc b/tests/cocotb/rvv/arithmetics/rvv_reduction_template.cc
new file mode 100644
index 0000000..780e3e4
--- /dev/null
+++ b/tests/cocotb/rvv/arithmetics/rvv_reduction_template.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <riscv_vector.h>
+
+
+{DTYPE}_t in_buf_1[{IN_DATA_SIZE}] __attribute__((section(".data"))) __attribute__((aligned(16)));
+{DTYPE}_t scalar_input __attribute__((section(".data"))) __attribute__((aligned(16)));
+{DTYPE}_t out_buf __attribute__((section(".data"))) __attribute__((aligned(16)));
+
+void {REDUCTION_OP}_{SIGN}{SEW}_m1(const {DTYPE}_t* in_buf_1, const {DTYPE}_t scalar_input, {DTYPE}_t* out_buf){
+
+    v{DTYPE}m1_t input_v1 = __riscv_vle{SEW}_v_{SIGN}{SEW}m1(in_buf_1, {NUM_OPERANDS});
+    v{DTYPE}m1_t input_s1 = __riscv_vmv_v_x_{SIGN}{SEW}m1(scalar_input, {NUM_OPERANDS});
+    v{DTYPE}m1_t {REDUCTION_OP}_result = __riscv_v{REDUCTION_OP}_vs_{SIGN}{SEW}m1_{SIGN}{SEW}m1(input_v1, input_s1, {NUM_OPERANDS});
+    *out_buf = __riscv_vmv_x_s_{SIGN}{SEW}m1_{SIGN}{SEW}({REDUCTION_OP}_result);
+}
+
+
+int main(int argc, char **argv) {
+  {REDUCTION_OP}_{SIGN}{SEW}_m1(in_buf_1, scalar_input, &out_buf);
+  return 0;
+}
\ No newline at end of file
diff --git a/tests/cocotb/rvv_arithmetic_cocotb_test.py b/tests/cocotb/rvv_arithmetic_cocotb_test.py
index 7bc5f90..d8eb73d 100644
--- a/tests/cocotb/rvv_arithmetic_cocotb_test.py
+++ b/tests/cocotb/rvv_arithmetic_cocotb_test.py
@@ -16,6 +16,7 @@
 import tqdm
 import re
 import numpy as np
+import os
 
 from bazel_tools.tools.python.runfiles import runfiles
 from kelvin_test_utils.sim_test_fixture import Fixture
@@ -34,9 +35,15 @@
         orig_settings = np.seterr(divide='ignore')
         divide_output = np.divide(x, y)
         np.seterr(**orig_settings)
-
         return divide_output
-    return 0 # todo raise error
+    elif symbol == 'redsum':
+        return y[0] + np.add.reduce(x)
+    elif symbol == 'redmin':
+        return np.min(np.concatenate((x, y)))
+    elif symbol == 'redmax':
+        return np.max(np.concatenate((x, y)))
+    raise ValueError(f"Unsupported math symbol: {symbol}")
+
 
 async def arithmetic_m1_vanilla_ops_test(dut,
                           dtypes,
@@ -63,6 +70,7 @@
     fixture = await Fixture.Create(dut)
     with tqdm.tqdm(m1_vanilla_op_elfs) as t:
         for elf_name in tqdm.tqdm(m1_vanilla_op_elfs):
+            t.set_postfix({"binary": os.path.basename(elf_name)})
             elf_path = r.Rlocation("kelvin_hw/tests/cocotb/rvv/arithmetics/" + elf_name)
             await fixture.load_elf_and_lookup_symbols(
                 elf_path,
@@ -107,3 +115,65 @@
                           dtypes = ["int8", "int16", "int32", "uint8", "uint16", "uint32"],
                           math_ops = ["add", "sub", "mul", "div"],
                           num_bytes = 16)
+
+
+async def reduction_m1_vanilla_ops_test(dut,
+                          dtypes,
+                          math_ops: str,
+                          num_bytes: int):
+
+    """RVV reduction test template.
+
+    Each test performs a reduction op loading `in_buf_1` and storing the output to `out_buf`.
+    """
+    str_to_np_type ={
+        "int8": np.int8,
+        "int16": np.int16,
+        "int32": np.int32,
+        "uint8": np.uint8,
+        "uint16": np.uint16,
+        "uint32": np.uint32,
+    }
+    m1_vanilla_op_elfs = [f"rvv_{math_op}_{dtype}_m1.elf" for math_op in math_ops for dtype in dtypes]
+    pattern_extract = re.compile("rvv_(.*)_(.*)_m1.elf")
+
+    r = runfiles.Create()
+    fixture = await Fixture.Create(dut)
+    with tqdm.tqdm(m1_vanilla_op_elfs) as t:
+        for elf_name in tqdm.tqdm(m1_vanilla_op_elfs):
+            t.set_postfix({"binary": os.path.basename(elf_name)})
+            elf_path = r.Rlocation(f"kelvin_hw/tests/cocotb/rvv/arithmetics/{elf_name}")
+            await fixture.load_elf_and_lookup_symbols(
+                elf_path,
+                ['in_buf_1', 'scalar_input', 'out_buf'],
+            )
+            math_op, dtype = pattern_extract.match(elf_name).groups()
+            np_type = str_to_np_type[dtype]
+            itemsize = np.dtype(np_type).itemsize
+            num_values = int(num_bytes / np.dtype(np_type).itemsize)
+            min_value = np.iinfo(np_type).min
+            max_value = np.iinfo(np_type).max + 1  # One above.
+            input_1 = np.random.randint(min_value, max_value, num_values, dtype=np_type)
+            input_2 = np.random.randint(min_value, max_value, 1, dtype=np_type)
+            expected_output = np.asarray(_get_math_result(input_1, input_2, math_op), dtype=np_type)
+
+            await fixture.write('in_buf_1', input_1)
+            await fixture.write('scalar_input', input_2)
+            await fixture.write('out_buf', np.zeros(1, dtype=np_type))
+            await fixture.run_to_halt()
+
+            actual_output = (await fixture.read('out_buf', itemsize)).view(np_type)
+            debug_msg = str({
+                'input_1': input_1,
+                'input_2': input_2,
+                'expected': expected_output,
+                'actual': actual_output,
+            })
+            assert (actual_output == expected_output).all(), debug_msg
+
+@cocotb.test()
+async def reduction_m1_vanilla_ops(dut):
+    await reduction_m1_vanilla_ops_test(dut = dut,
+                          dtypes = ["int8", "int16", "int32", "uint8", "uint16", "uint32"],
+                          math_ops = ["redsum", "redmin", "redmax"],
+                          num_bytes = 16)