Add test for vmv, current support for vmv_v_v_i.

* Move example AVLs into test_helpers.
* Add support for intrinsic and asm instructions.
* Clang intrinsic vle8_v_i8m1 uses vs1r.v which unsupported in tlib
* Fix formatting on test_v_helpers.

Change-Id: I1355d6cbdb2921fc688ff4fd17ad04b855d9dbc5
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index afbd117..4642da1 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -39,3 +39,15 @@
   TIMEOUT
     500
 )
+
+vec_cc_test(
+  NAME
+    vmv_test
+  SRCS
+    vmv_test.cpp
+  DEPS
+    springbok
+  LINKOPTS
+   -T${LINKER_SCRIPT}
+   -Xlinker --defsym=__itcm_length__=256K
+)
diff --git a/tests/include/test_v_helpers.h b/tests/include/test_v_helpers.h
index e8ac315..2ba95a0 100644
--- a/tests/include/test_v_helpers.h
+++ b/tests/include/test_v_helpers.h
@@ -1,38 +1,48 @@
 #ifndef TEST_V_HELPERS_H
 #define TEST_V_HELPERS_H
 
+#include <stdint.h>
+
 namespace test_v_helpers {
 
+const uint32_t AVLS[] = {1,    4,    3,     2,     16,    8,    5,    17,
+                         32,   36,   64,    55,    100,   321,  256,  128,
+                         512,  623,  1024,  1100,  1543,  2048, 3052, 4096,
+                         5555, 8192, 10241, 16384, 24325, 32768};
+const int32_t AVL_COUNT = sizeof(AVLS) / sizeof(AVLS[0]);
+
 enum VSEW {
   SEW_E8 = 0,
   SEW_E16 = 1,
   SEW_E32 = 2,
-/* // SEW limited to E32
-  SEW_E64 = 3,
-  SEW_E128 = 4,
-  SEW_E256 = 5,
-  SEW_E512 = 6,
-  SEW_E1024 = 7,
-*/
+  /* // SEW limited to E32
+    SEW_E64 = 3,
+    SEW_E128 = 4,
+    SEW_E256 = 5,
+    SEW_E512 = 6,
+    SEW_E1024 = 7,
+  */
 };
 
 enum VLMUL {
 
-/* // Fractional LMUL not supported by our intrinsic compiler
-  LMUL_MF8 = 5,
-  LMUL_MF4 = 6,
-  LMUL_MF2 = 7,
-*/
+  /* // Fractional LMUL not supported by our intrinsic compiler
+    LMUL_MF8 = 5,
+    LMUL_MF4 = 6,
+    LMUL_MF2 = 7,
+  */
   LMUL_M1 = 0,
   LMUL_M2 = 1,
   LMUL_M4 = 2,
   LMUL_M8 = 3,
 };
 
-uint32_t get_vtype(VSEW sew, VLMUL lmul, bool tail_agnostic, bool mask_agnostic);
+uint32_t get_vtype(VSEW sew, VLMUL lmul, bool tail_agnostic,
+                   bool mask_agnostic);
 
 // vsetvl  rd, rs1, rs2      # rd = new vl, rs1 = AVL, rs2 = new vtype value
-uint32_t set_vsetvl(VSEW sew, VLMUL lmul, uint32_t avl, bool tail_agnostic, bool mask_agnostic);
+uint32_t set_vsetvl(VSEW sew, VLMUL lmul, uint32_t avl, bool tail_agnostic,
+                    bool mask_agnostic);
 
 int set_vsetvl_intrinsic(VSEW sew, VLMUL lmul, uint32_t avl);
 
@@ -40,5 +50,5 @@
 
 int set_vsetvli(VSEW sew, VLMUL lmul, uint32_t avl);
 
-}
+}  // namespace test_v_helpers
 #endif
diff --git a/tests/test_v_helpers.cpp b/tests/test_v_helpers.cpp
index ed8d5f2..9b12c15 100644
--- a/tests/test_v_helpers.cpp
+++ b/tests/test_v_helpers.cpp
@@ -1,32 +1,29 @@
-#include <riscv_vector.h>
 #include "test_v_helpers.h"
 
+#include <riscv_vector.h>
 
 namespace test_v_helpers {
 
 uint32_t get_vtype(VSEW sew, VLMUL lmul, bool tail_agnostic,
                    bool mask_agnostic) {
-  return (static_cast<int>(lmul) & 0x7) |
-          (static_cast<int>(sew) & 0x7) << 3 |
-          (tail_agnostic & 0x1) << 6 |
-          (mask_agnostic & 0x1) << 7;
+  return (static_cast<int>(lmul) & 0x7) | (static_cast<int>(sew) & 0x7) << 3 |
+         (tail_agnostic & 0x1) << 6 | (mask_agnostic & 0x1) << 7;
 }
 
-uint32_t set_vsetvl(VSEW sew, VLMUL lmul, uint32_t avl, bool tail_agnostic, bool mask_agnostic) {
+uint32_t set_vsetvl(VSEW sew, VLMUL lmul, uint32_t avl, bool tail_agnostic,
+                    bool mask_agnostic) {
   uint32_t vtype = get_vtype(sew, lmul, tail_agnostic, mask_agnostic);
   uint32_t vl;
-  __asm__ volatile(
-    "vsetvl %[VL], %[AVL], %[VTYPE]"
-    : [VL] "=r" (vl)
-    : [AVL] "r" (avl), [VTYPE] "r" (vtype)
-  );
+  __asm__ volatile("vsetvl %[VL], %[AVL], %[VTYPE]"
+                   : [VL] "=r"(vl)
+                   : [AVL] "r"(avl), [VTYPE] "r"(vtype));
   return vl;
 }
 
 int set_vsetvl_intrinsic(VSEW sew, VLMUL lmul, uint32_t avl) {
-  switch(lmul) {
+  switch (lmul) {
     case VLMUL::LMUL_M1:
-      switch(sew) {
+      switch (sew) {
         case VSEW::SEW_E8:
           return vsetvl_e8m1(avl);
           break;
@@ -42,7 +39,7 @@
       }
       break;
     case VLMUL::LMUL_M2:
-      switch(sew) {
+      switch (sew) {
         case VSEW::SEW_E8:
           return vsetvl_e8m2(avl);
           break;
@@ -57,7 +54,7 @@
           break;
       }
     case VLMUL::LMUL_M4:
-      switch(sew) {
+      switch (sew) {
         case VSEW::SEW_E8:
           return vsetvl_e8m4(avl);
           break;
@@ -73,7 +70,7 @@
       }
       break;
     case VLMUL::LMUL_M8:
-      switch(sew) {
+      switch (sew) {
         case VSEW::SEW_E8:
           return vsetvl_e8m8(avl);
           break;
@@ -95,9 +92,9 @@
 }
 
 int get_vsetvlmax_intrinsic(VSEW sew, VLMUL lmul) {
-  switch(lmul) {
+  switch (lmul) {
     case VLMUL::LMUL_M1:
-      switch(sew) {
+      switch (sew) {
         case VSEW::SEW_E8:
           return vsetvlmax_e8m1();
           break;
@@ -113,7 +110,7 @@
       }
       break;
     case VLMUL::LMUL_M2:
-      switch(sew) {
+      switch (sew) {
         case VSEW::SEW_E8:
           return vsetvlmax_e8m2();
           break;
@@ -128,7 +125,7 @@
           break;
       }
     case VLMUL::LMUL_M4:
-      switch(sew) {
+      switch (sew) {
         case VSEW::SEW_E8:
           return vsetvlmax_e8m4();
           break;
@@ -144,7 +141,7 @@
       }
       break;
     case VLMUL::LMUL_M8:
-      switch(sew) {
+      switch (sew) {
         case VSEW::SEW_E8:
           return vsetvlmax_e8m8();
           break;
@@ -167,113 +164,89 @@
 
 int set_vsetvli(VSEW sew, VLMUL lmul, uint32_t avl) {
   uint32_t vl = 0;
-  switch(lmul) {
+  switch (lmul) {
     case VLMUL::LMUL_M1:
-      switch(sew) {
-          case VSEW::SEW_E8:
-            __asm__ volatile(
-              "vsetvli %[VL], %[AVL], e8, m1, tu, mu"
-              : [VL] "=r" (vl)
-              : [AVL] "r" (avl)
-            );
-            break;
-          case VSEW::SEW_E16:
-            __asm__ volatile(
-              "vsetvli %[VL], %[AVL], e16, m1, tu, mu"
-              : [VL] "=r" (vl)
-              : [AVL] "r" (avl)
-            );
-            break;
-          case VSEW::SEW_E32:
-            __asm__ volatile(
-              "vsetvli %[VL], %[AVL], e32, m1, tu, mu"
-              : [VL] "=r" (vl)
-              : [AVL] "r" (avl)
-            );
-            break;
-          default:
-              return 0;
+      switch (sew) {
+        case VSEW::SEW_E8:
+          __asm__ volatile("vsetvli %[VL], %[AVL], e8, m1, tu, mu"
+                           : [VL] "=r"(vl)
+                           : [AVL] "r"(avl));
+          break;
+        case VSEW::SEW_E16:
+          __asm__ volatile("vsetvli %[VL], %[AVL], e16, m1, tu, mu"
+                           : [VL] "=r"(vl)
+                           : [AVL] "r"(avl));
+          break;
+        case VSEW::SEW_E32:
+          __asm__ volatile("vsetvli %[VL], %[AVL], e32, m1, tu, mu"
+                           : [VL] "=r"(vl)
+                           : [AVL] "r"(avl));
+          break;
+        default:
+          return 0;
       }
       break;
     case VLMUL::LMUL_M2:
-      switch(sew) {
-          case VSEW::SEW_E8:
-            __asm__ volatile(
-              "vsetvli %[VL], %[AVL], e8, m2, tu, mu"
-              : [VL] "=r" (vl)
-              : [AVL] "r" (avl)
-            );
-            break;
-          case VSEW::SEW_E16:
-            __asm__ volatile(
-              "vsetvli %[VL], %[AVL], e16, m2, tu, mu"
-              : [VL] "=r" (vl)
-              : [AVL] "r" (avl)
-            );
-            break;
-          case VSEW::SEW_E32:
-            __asm__ volatile(
-              "vsetvli %[VL], %[AVL], e32, m2, tu, mu"
-              : [VL] "=r" (vl)
-              : [AVL] "r" (avl)
-            );
-            break;
-          default:
-              return 0;
+      switch (sew) {
+        case VSEW::SEW_E8:
+          __asm__ volatile("vsetvli %[VL], %[AVL], e8, m2, tu, mu"
+                           : [VL] "=r"(vl)
+                           : [AVL] "r"(avl));
+          break;
+        case VSEW::SEW_E16:
+          __asm__ volatile("vsetvli %[VL], %[AVL], e16, m2, tu, mu"
+                           : [VL] "=r"(vl)
+                           : [AVL] "r"(avl));
+          break;
+        case VSEW::SEW_E32:
+          __asm__ volatile("vsetvli %[VL], %[AVL], e32, m2, tu, mu"
+                           : [VL] "=r"(vl)
+                           : [AVL] "r"(avl));
+          break;
+        default:
+          return 0;
       }
       break;
     case VLMUL::LMUL_M4:
-      switch(sew) {
-          case VSEW::SEW_E8:
-            __asm__ volatile(
-              "vsetvli %[VL], %[AVL], e8, m4, tu, mu"
-              : [VL] "=r" (vl)
-              : [AVL] "r" (avl)
-            );
-            break;
-          case VSEW::SEW_E16:
-            __asm__ volatile(
-              "vsetvli %[VL], %[AVL], e16, m4, tu, mu"
-              : [VL] "=r" (vl)
-              : [AVL] "r" (avl)
-            );
-            break;
-          case VSEW::SEW_E32:
-            __asm__ volatile(
-              "vsetvli %[VL], %[AVL], e32, m4, tu, mu"
-              : [VL] "=r" (vl)
-              : [AVL] "r" (avl)
-            );
-            break;
-          default:
-              return 0;
+      switch (sew) {
+        case VSEW::SEW_E8:
+          __asm__ volatile("vsetvli %[VL], %[AVL], e8, m4, tu, mu"
+                           : [VL] "=r"(vl)
+                           : [AVL] "r"(avl));
+          break;
+        case VSEW::SEW_E16:
+          __asm__ volatile("vsetvli %[VL], %[AVL], e16, m4, tu, mu"
+                           : [VL] "=r"(vl)
+                           : [AVL] "r"(avl));
+          break;
+        case VSEW::SEW_E32:
+          __asm__ volatile("vsetvli %[VL], %[AVL], e32, m4, tu, mu"
+                           : [VL] "=r"(vl)
+                           : [AVL] "r"(avl));
+          break;
+        default:
+          return 0;
       }
       break;
     case VLMUL::LMUL_M8:
-      switch(sew) {
-          case VSEW::SEW_E8:
-            __asm__ volatile(
-              "vsetvli %[VL], %[AVL], e8, m8, tu, mu"
-              : [VL] "=r" (vl)
-              : [AVL] "r" (avl)
-            );
-            break;
-          case VSEW::SEW_E16:
-            __asm__ volatile(
-              "vsetvli %[VL], %[AVL], e16, m8, tu, mu"
-              : [VL] "=r" (vl)
-              : [AVL] "r" (avl)
-            );
-            break;
-          case VSEW::SEW_E32:
-            __asm__ volatile(
-              "vsetvli %[VL], %[AVL], e32, m8, tu, mu"
-              : [VL] "=r" (vl)
-              : [AVL] "r" (avl)
-            );
-            break;
-          default:
-              return 0;
+      switch (sew) {
+        case VSEW::SEW_E8:
+          __asm__ volatile("vsetvli %[VL], %[AVL], e8, m8, tu, mu"
+                           : [VL] "=r"(vl)
+                           : [AVL] "r"(avl));
+          break;
+        case VSEW::SEW_E16:
+          __asm__ volatile("vsetvli %[VL], %[AVL], e16, m8, tu, mu"
+                           : [VL] "=r"(vl)
+                           : [AVL] "r"(avl));
+          break;
+        case VSEW::SEW_E32:
+          __asm__ volatile("vsetvli %[VL], %[AVL], e32, m8, tu, mu"
+                           : [VL] "=r"(vl)
+                           : [AVL] "r"(avl));
+          break;
+        default:
+          return 0;
       }
       break;
     default:
@@ -282,4 +255,4 @@
   return vl;
 }
 
-}
+}  // namespace test_v_helpers
diff --git a/tests/vmv_test.cpp b/tests/vmv_test.cpp
new file mode 100644
index 0000000..804baff
--- /dev/null
+++ b/tests/vmv_test.cpp
@@ -0,0 +1,173 @@
+#include <riscv_vector.h>
+#include <springbok.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <bit>
+#include <tuple>
+
+#include "pw_unit_test/framework.h"
+#include "test_v_helpers.h"
+
+namespace vmv_test {
+namespace {
+
+const int VLEN = 512u;
+const int MAXVL_BYTES = VLEN / sizeof(uint8_t);
+
+using namespace test_v_helpers;
+
+uint8_t test_vector_1[MAXVL_BYTES];
+uint8_t test_vector_2[MAXVL_BYTES];
+
+static void zero_vector_registers() {
+  // Clear all vector registers
+  int vlmax = get_vsetvlmax_intrinsic(VSEW::SEW_E32, VLMUL::LMUL_M8);
+  set_vsetvl_intrinsic(VSEW::SEW_E32, VLMUL::LMUL_M8, vlmax);
+  __asm__ volatile("vmv.v.i v0, 0");
+  __asm__ volatile("vmv.v.i v8, 0");
+  __asm__ volatile("vmv.v.i v16, 0");
+  __asm__ volatile("vmv.v.i v24, 0");
+}
+
+template <typename T>
+static std::tuple<int, int> vmv_v_v_i_test_setup(VLMUL lmul, uint32_t avl) {
+  // Clear all vector registers
+  zero_vector_registers();
+
+  // Initialize test_vector1 and determine vl, vlmax
+  uint32_t bw = std::__bit_width(sizeof(T));
+  VSEW sew = static_cast<VSEW>(bw - 1);
+  int vlmax = get_vsetvlmax_intrinsic(sew, lmul);
+  if (avl > vlmax) {
+    avl = vlmax;
+  }
+  T *ptr_vec_1 = reinterpret_cast<T *>(test_vector_1);
+  memset(test_vector_1, 0, MAXVL_BYTES);
+  memset(test_vector_2, 0, MAXVL_BYTES);
+  int vl = set_vsetvl_intrinsic(sew, lmul, avl);
+  EXPECT_EQ(avl, vl);
+  for (int idx = 0; idx < vl; idx++) {
+    ptr_vec_1[idx] = idx;
+  }
+  return std::make_tuple(vlmax, vl);
+}
+
+template <typename T>
+static void vmv_check(int avl) {
+  T *ptr_vec_1 = reinterpret_cast<T *>(test_vector_1);
+  T *ptr_vec_2 = reinterpret_cast<T *>(test_vector_2);
+  for (int idx = 0; idx < avl; idx++) {
+    ASSERT_EQ(ptr_vec_1[idx], ptr_vec_2[idx]);
+  }
+}
+
+TEST(VmvTest, vmv_demo) {
+  for (int i = 0; i < AVL_COUNT; i++) {
+    uint32_t avl = AVLS[i];
+    int vlmax;
+    int vl;
+    std::tie(vlmax, vl) = vmv_v_v_i_test_setup<uint8_t>(VLMUL::LMUL_M1, avl);
+    if (avl > vlmax) {
+      continue;
+    }
+
+    __asm__ volatile("vle8.v v0, (%0)" : : "r"(test_vector_1));
+    __asm__ volatile("vmv.v.v v1, v0");
+    __asm__ volatile("vse8.v v1, (%0)" : : "r"(test_vector_2));
+    vmv_check<uint8_t>(vlmax);
+  }
+}
+
+// TODO(henryherman): clang vle intrinsic uses vs1r.v (unsupported in renode)
+TEST(VmvTest, DISABLED_intrinsic_vmv_demo) {
+  for (int i = 0; i < AVL_COUNT; i++) {
+    uint32_t avl = AVLS[i];
+    int vlmax;
+    int vl;
+    std::tie(vlmax, vl) = vmv_v_v_i_test_setup<uint8_t>(VLMUL::LMUL_M1, avl);
+    if (avl > vlmax) {
+      continue;
+    }
+    vint8m1_t vec1 = vle8_v_i8m1(test_vector_1, vl);
+    vint8m1_t vec2 = vmv_v_v_i8m1(vec1, vl);
+    uint8_t *ptr_vec_2 = reinterpret_cast<uint8_t *>(test_vector_2);
+    vse8_v_i8m1(ptr_vec_2, vec2, vl);
+    vmv_check<uint8_t>(vlmax);
+  }
+}
+
+#define DEFINE_TEST_VMV_V_V_I_INTRINSIC(_SEW_, _LMUL_)                         \
+  TEST(DISABLED_VmvTest, intrinsic_vmv_v_v_i##_SEW_##m##_LMUL_) {              \
+    for (int i = 0; i < AVL_COUNT; i++) {                                      \
+      uint32_t avl = AVLS[i];                                                  \
+      int vlmax;                                                               \
+      int vl;                                                                  \
+      std::tie(vlmax, vl) =                                                    \
+          vmv_v_v_i_test_setup<uint##_SEW_##_t>(VLMUL::LMUL_M##_LMUL_, avl);   \
+      if (avl > vlmax) {                                                       \
+        continue;                                                              \
+      }                                                                        \
+      vint##_SEW_##m##_LMUL_##_t vec1 =                                        \
+          vle##_SEW_##_v_i##_SEW_##m##_LMUL_(test_vector_1, vl);               \
+      vint##_SEW_##m##_LMUL_##_t vec2 = vmv_v_v_i##_SEW_##m##_LMUL_(vec1, vl); \
+      uint##_SEW_##_t *ptr_vec_2 =                                             \
+          reinterpret_cast<uint##_SEW_##_t *>(test_vector_2);                  \
+      vse##_SEW_##_v_i##_SEW_##m##_LMUL_(ptr_vec_2, vec2, vl);                 \
+      vmv_check<uint##_SEW_##_t>(vlmax);                                       \
+    }                                                                          \
+  }
+
+DEFINE_TEST_VMV_V_V_I_INTRINSIC(8, 1)
+DEFINE_TEST_VMV_V_V_I_INTRINSIC(8, 2)
+DEFINE_TEST_VMV_V_V_I_INTRINSIC(8, 4)
+DEFINE_TEST_VMV_V_V_I_INTRINSIC(8, 8)
+
+DEFINE_TEST_VMV_V_V_I_INTRINSIC(16, 1)
+DEFINE_TEST_VMV_V_V_I_INTRINSIC(16, 2)
+DEFINE_TEST_VMV_V_V_I_INTRINSIC(16, 4)
+DEFINE_TEST_VMV_V_V_I_INTRINSIC(16, 8)
+
+DEFINE_TEST_VMV_V_V_I_INTRINSIC(32, 1)
+DEFINE_TEST_VMV_V_V_I_INTRINSIC(32, 2)
+DEFINE_TEST_VMV_V_V_I_INTRINSIC(32, 4)
+DEFINE_TEST_VMV_V_V_I_INTRINSIC(32, 8)
+
+#define DEFINE_TEST_VMV_V_V_I(_SEW_, _LMUL_)                                 \
+  TEST(VmvTest, vmv_v_v_i##_SEW_##m##_LMUL_) {                               \
+    for (int i = 0; i < AVL_COUNT; i++) {                                    \
+      uint32_t avl = AVLS[i];                                                \
+      int vlmax;                                                             \
+      int vl;                                                                \
+      std::tie(vlmax, vl) =                                                  \
+          vmv_v_v_i_test_setup<uint##_SEW_##_t>(VLMUL::LMUL_M##_LMUL_, avl); \
+      if (avl > vlmax) {                                                     \
+        continue;                                                            \
+      }                                                                      \
+      uint##_SEW_##_t *ptr_vec_1 =                                           \
+          reinterpret_cast<uint##_SEW_##_t *>(test_vector_1);                \
+      uint##_SEW_##_t *ptr_vec_2 =                                           \
+          reinterpret_cast<uint##_SEW_##_t *>(test_vector_2);                \
+      __asm__ volatile("vle" #_SEW_ ".v v0, (%0)" : : "r"(ptr_vec_1));       \
+      __asm__ volatile("vmv.v.v v8, v0");                                    \
+      __asm__ volatile("vse" #_SEW_ ".v v8, (%0)" : : "r"(ptr_vec_2));       \
+      vmv_check<uint##_SEW_##_t>(vlmax);                                     \
+    }                                                                        \
+  }
+
+DEFINE_TEST_VMV_V_V_I(8, 1)
+DEFINE_TEST_VMV_V_V_I(8, 2)
+DEFINE_TEST_VMV_V_V_I(8, 4)
+DEFINE_TEST_VMV_V_V_I(8, 8)
+
+DEFINE_TEST_VMV_V_V_I(16, 1)
+DEFINE_TEST_VMV_V_V_I(16, 2)
+DEFINE_TEST_VMV_V_V_I(16, 4)
+DEFINE_TEST_VMV_V_V_I(16, 8)
+
+DEFINE_TEST_VMV_V_V_I(32, 1)
+DEFINE_TEST_VMV_V_V_I(32, 2)
+DEFINE_TEST_VMV_V_V_I(32, 4)
+DEFINE_TEST_VMV_V_V_I(32, 8)
+}  // namespace
+}  // namespace vmv_test
diff --git a/tests/vsetvl_test.cpp b/tests/vsetvl_test.cpp
index b31a553..b824144 100644
--- a/tests/vsetvl_test.cpp
+++ b/tests/vsetvl_test.cpp
@@ -13,11 +13,6 @@
 
 const uint64_t VLEN = 512u;
 
-uint32_t AVLS[] = {1,    2,    3,     4,     5,     8,    16,   17,
-                   32,   36,   55,    64,    100,   128,  256,  321,
-                   512,  623,  1024,  1100,  1543,  2048, 3052, 4096,
-                   5555, 8192, 10241, 16384, 24325, 32768};
-const int32_t AVL_COUNT = sizeof(AVLS) / sizeof(AVLS[0]);
 
 #define MIN(X, Y) ((X) < (Y) ? (X) : (Y))