Add vmax_vv unit test

Change-Id: I9e458713ef543b9b30ed73066aa22c52cc91cf59
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 2583463..8aa2b11 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -64,6 +64,7 @@
     vmax_test
   SRCS
     vmax_vx_test.cpp
+    vmax_vv_test.cpp
   LINKOPTS
    -Xlinker --defsym=__itcm_length__=256K
   TIMEOUT
diff --git a/tests/vmax_vv_test.cpp b/tests/vmax_vv_test.cpp
new file mode 100644
index 0000000..24daa7c
--- /dev/null
+++ b/tests/vmax_vv_test.cpp
@@ -0,0 +1,144 @@
+#include <limits.h>
+#include <riscv_vector.h>
+#include <springbok.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <bit>
+#include <tuple>
+
+#include "pw_unit_test/framework.h"
+#include "test_v_helpers.h"
+
+namespace vmax_vv_test {
+namespace {
+
+using namespace test_v_helpers;
+
+uint8_t test_vector_1[MAXVL_BYTES];
+uint8_t test_vector_2[MAXVL_BYTES];
+
+template <typename T>
+static std::tuple<int, int> vmax_vv_test_setup(VLMUL lmul, int32_t avl) {
+  // Clear all vector registers
+  zero_vector_registers();
+
+  // Initialize test_vector_1 and determine vl, vlmax
+  uint32_t bw = std::__bit_width(sizeof(T));
+  VSEW sew = static_cast<VSEW>(bw - 1);
+  int vlmax = get_vsetvlmax_intrinsic(sew, lmul);
+  if (avl > vlmax) {
+    avl = vlmax;
+  }
+  memset(test_vector_1, 0, MAXVL_BYTES);
+  memset(test_vector_2, 0, MAXVL_BYTES);
+  int vl = set_vsetvl_intrinsic(sew, lmul, avl);
+  EXPECT_EQ(avl, vl);
+
+  return std::make_tuple(vlmax, vl);
+}
+
+class VmaxVvTest : public ::testing::Test {
+ protected:
+  void SetUp() override { zero_vector_registers(); }
+  void TearDown() override { zero_vector_registers(); }
+};
+
+// Below is a non-macro version of the test for more convenient debugging.
+// Remove the "DISABLED_" prefix to enable this test for debugging.
+TEST_F(VmaxVvTest, DISABLED_vmax_vv_demo) {
+  for (int i = 0; i < AVL_COUNT; i++) {
+    int32_t avl = AVLS[i];
+    int vlmax;
+    int vl;
+    std::tie(vlmax, vl) = vmax_vv_test_setup<int16_t>(VLMUL::LMUL_M1, avl);
+    if (avl > vlmax) {
+      continue;
+    }
+    int16_t *ptr_vec_1 = reinterpret_cast<int16_t *>(test_vector_1);
+    int16_t *ptr_vec_2 = reinterpret_cast<int16_t *>(test_vector_2);
+
+    const int16_t start_value = INT16_MAX;
+
+    // set up values for vs2 of vmax.vv
+    for (int idx = 0; idx < avl; idx++) {
+      // restrict values to valid int8_t range
+      ptr_vec_1[idx] = (int16_t)(idx + start_value);
+    }
+
+    __asm__ volatile("vle16.v v8, (%0)" : : "r"(ptr_vec_1));
+
+    // set up values for vs2 of vmax.vv
+    for (int idx = 0; idx < avl; idx++) {
+      // offset sequence for ptr_vec_2
+      ptr_vec_2[idx] = (int16_t)(idx - start_value);
+    }
+
+    __asm__ volatile("vle16.v v16, (%0)" : : "r"(ptr_vec_2));
+
+    __asm__ volatile("vmax.vv v24, v16, v8");
+
+    // emulate operation in C
+    for (int idx = 0; idx < vl; idx++) {
+      ptr_vec_1[idx] =
+          (ptr_vec_1[idx] > ptr_vec_2[idx]) ? ptr_vec_1[idx] : ptr_vec_2[idx];
+    }
+
+    __asm__ volatile("vse16.v v24, (%0)" : : "r"(ptr_vec_2));
+    assert_vec_elem_eq<int16_t>(vlmax, test_vector_1, test_vector_2);
+  }
+}
+
+#define DEFINE_TEST_VMAX_VV(_SEW_, _LMUL_, START_VALUE)                        \
+  TEST_F(VmaxVvTest, vmax_vv##_SEW_##m##_LMUL_) {                              \
+    for (int i = 0; i < AVL_COUNT; i++) {                                      \
+      int32_t avl = AVLS[i];                                                   \
+      int vlmax;                                                               \
+      int vl;                                                                  \
+      std::tie(vlmax, vl) =                                                    \
+          vmax_vv_test_setup<int##_SEW_##_t>(VLMUL::LMUL_M##_LMUL_, avl);      \
+      if (avl > vlmax) {                                                       \
+        continue;                                                              \
+      }                                                                        \
+      int##_SEW_##_t *ptr_vec_1 =                                              \
+          reinterpret_cast<int##_SEW_##_t *>(test_vector_1);                   \
+      int##_SEW_##_t *ptr_vec_2 =                                              \
+          reinterpret_cast<int##_SEW_##_t *>(test_vector_2);                   \
+      const int##_SEW_##_t start_value = START_VALUE;                          \
+      for (int idx = 0; idx < avl; idx++) {                                    \
+        ptr_vec_1[idx] = (int##_SEW_##_t)(idx + start_value);                  \
+      }                                                                        \
+      for (int idx = 0; idx < avl; idx++) {                                    \
+        ptr_vec_1[idx] = (int##_SEW_##_t)(idx - start_value);                  \
+      }                                                                        \
+      __asm__ volatile("vle" #_SEW_ ".v v8, (%0)" : : "r"(ptr_vec_1));         \
+      __asm__ volatile("vle" #_SEW_ ".v v16, (%0)" : : "r"(ptr_vec_2));        \
+      __asm__ volatile("vmax.vv v24, v16, v8");                                \
+      for (int idx = 0; idx < vl; idx++) {                                     \
+        ptr_vec_1[idx] = (ptr_vec_1[idx] > ptr_vec_2[idx]) ? ptr_vec_1[idx]    \
+                                                           : ptr_vec_2[idx];   \
+      }                                                                        \
+      __asm__ volatile("vse" #_SEW_ ".v v24, (%0)" : : "r"(ptr_vec_2));        \
+      assert_vec_elem_eq<int##_SEW_##_t>(vlmax, test_vector_1, test_vector_2); \
+    }                                                                          \
+  }
+
+// TODO(gkielian): modify macro to permit more than one test per sew/lmul pair
+DEFINE_TEST_VMAX_VV(8, 1, INT8_MAX)
+DEFINE_TEST_VMAX_VV(8, 2, INT8_MIN)
+DEFINE_TEST_VMAX_VV(8, 4, 120)
+DEFINE_TEST_VMAX_VV(8, 8, 2)
+
+DEFINE_TEST_VMAX_VV(16, 1, INT16_MAX)
+DEFINE_TEST_VMAX_VV(16, 2, INT16_MIN)
+DEFINE_TEST_VMAX_VV(16, 4, 16000)
+DEFINE_TEST_VMAX_VV(16, 8, 2)
+
+DEFINE_TEST_VMAX_VV(32, 1, INT32_MAX)
+DEFINE_TEST_VMAX_VV(32, 2, INT32_MIN)
+DEFINE_TEST_VMAX_VV(32, 4, 1000000000)
+DEFINE_TEST_VMAX_VV(32, 8, 2)
+
+}  // namespace
+}  // namespace vmax_vv_test
+