Add vmv.s.x test

This test saves a register into vd[0] element.

This is important for the vector reduction operations (vred*)
where the result of the operation (e.g. sum of all elements)
is placed into the 0th element of vd[0].

The complication of this operation when the scalar reg value (which is
always e32) will be saved over a vector element of a different SEW.

For e32 > SEW, there must be proper truncation, and for e32 < SEW
there must be proper sign extension.

Due to this having a unique direction (scalar to vector), creating a
custom test instead of the typical softrvv approach.

Note: there is a inverse operation which places vd[0] back into
a scalar register called `vmv.x.s`, and will be added in a separate
commit.

Change-Id: Ib35b3f56d2ca3e05ea0e98705bf90ebe59a5ff0e
diff --git a/softrvv/include/softrvv.h b/softrvv/include/softrvv.h
index 5595634..d82e7a0 100644
--- a/softrvv/include/softrvv.h
+++ b/softrvv/include/softrvv.h
@@ -10,20 +10,21 @@
 #include "softrvv_vdiv.h"
 #include "softrvv_vmax.h"
 #include "softrvv_vmin.h"
-#include "softrvv_vmul_vmulh.h"
 #include "softrvv_vmseq.h"
-#include "softrvv_vmsgt.h"
 #include "softrvv_vmsge.h"
+#include "softrvv_vmsgt.h"
 #include "softrvv_vmsle.h"
 #include "softrvv_vmslt.h"
 #include "softrvv_vmsne.h"
+#include "softrvv_vmul_vmulh.h"
+#include "softrvv_vmv_s_x.h"
 #include "softrvv_vnsra.h"
 #include "softrvv_vnsrl.h"
 #include "softrvv_vor.h"
 #include "softrvv_vrem.h"
 #include "softrvv_vsext_vzext.h"
-#include "softrvv_vsra.h"
 #include "softrvv_vsll.h"
+#include "softrvv_vsra.h"
 #include "softrvv_vsrl.h"
 #include "softrvv_vsub.h"
 #include "softrvv_vwadd.h"
diff --git a/softrvv/include/softrvv_vmv_s_x.h b/softrvv/include/softrvv_vmv_s_x.h
new file mode 100644
index 0000000..5e249a0
--- /dev/null
+++ b/softrvv/include/softrvv_vmv_s_x.h
@@ -0,0 +1,15 @@
+#ifndef SOFTRVV_VMV_S_X_H
+#define SOFTRVV_VMV_S_X_H
+
+#include <stddef.h>
+
+namespace softrvv {
+
+template <typename T1, typename T2>
+void vmv_s_x(T1 *vd, const T2 *rs1) {
+  vd[0] = static_cast<T1>(*rs1);
+}
+
+}  // namespace softrvv
+
+#endif  // SOFTRVV_VMV_S_X_H
diff --git a/softrvv/tests/CMakeLists.txt b/softrvv/tests/CMakeLists.txt
index fd87980..62cbc9a 100644
--- a/softrvv/tests/CMakeLists.txt
+++ b/softrvv/tests/CMakeLists.txt
@@ -292,3 +292,15 @@
   LINKOPTS
    -Xlinker --defsym=__itcm_length__=128K
 )
+
+vec_cc_test(
+  NAME
+    softrvv_vmv_s_x
+  SRCS
+    softrvv_vmv_s_x_test.cpp
+  DEPS
+    softrvv
+  LINKOPTS
+   -Xlinker --defsym=__itcm_length__=128K
+)
+
diff --git a/softrvv/tests/softrvv_vmv_s_x_test.cpp b/softrvv/tests/softrvv_vmv_s_x_test.cpp
new file mode 100644
index 0000000..fd10f46
--- /dev/null
+++ b/softrvv/tests/softrvv_vmv_s_x_test.cpp
@@ -0,0 +1,55 @@
+#include <limits.h>
+#include <riscv_vector.h>
+#include <springbok.h>
+#include <stdio.h>
+
+#include "pw_unit_test/framework.h"
+#include "softrvv.h"
+
+namespace softrvv_vand_test {
+namespace {
+
+int8_t dest_e8[] = {-5, 3, 0};
+int16_t dest_e16[] = {-5, 3, 0};
+int32_t dest_e32[] = {-5, 3, 0};
+
+// register value will truncate and replace vd[0]
+int32_t rs1[] = {INT32_MIN, -1, 1, INT32_MAX};
+
+const uint32_t kAVL = sizeof(dest_e8) / sizeof(dest_e8[0]);
+const int32_t num_vx_tests = sizeof(rs1) / sizeof(rs1[0]);
+
+int8_t ref_vx_e8[num_vx_tests][kAVL] = {{static_cast<int8_t>(INT32_MIN), 3, 0},
+                                        {-1, 3, 0},
+                                        {1, 3, 0},
+                                        {static_cast<int8_t>(INT32_MAX), 3, 0}};
+
+int16_t ref_vx_e16[num_vx_tests][kAVL] = {
+    {static_cast<int16_t>(INT32_MIN), 3, 0},
+    {-1, 3, 0},
+    {1, 3, 0},
+    {static_cast<int16_t>(INT32_MAX), 3, 0}};
+
+int32_t ref_vx_e32[num_vx_tests][kAVL] = {
+    {INT32_MIN, 3, 0}, {-1, 3, 0}, {1, 3, 0}, {INT32_MAX, 3, 0}};
+
+class SoftRvvVmvsxTest : public ::testing::Test {
+ protected:
+  void SetUp() override {}
+};
+
+TEST_F(SoftRvvVmvsxTest, S_X) {
+  for (int32_t i = 0; i < num_vx_tests; i++) {
+    softrvv::vmv_s_x<int8_t, int32_t>(dest_e8, &rs1[i]);
+    ASSERT_EQ(memcmp(dest_e8, &ref_vx_e8[i], sizeof(dest_e8)), 0);
+
+    softrvv::vmv_s_x<int16_t, int32_t>(dest_e16, &rs1[i]);
+    ASSERT_EQ(memcmp(dest_e16, &ref_vx_e16[i], sizeof(dest_e16)), 0);
+
+    softrvv::vmv_s_x<int32_t, int32_t>(dest_e32, &rs1[i]);
+    ASSERT_EQ(memcmp(dest_e32, &ref_vx_e32[i], sizeof(dest_e32)), 0);
+  }
+}
+
+}  // namespace
+}  // namespace softrvv_vand_test
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 58a22df..fc3db8d 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -387,6 +387,17 @@
 
 vec_cc_test(
   NAME
+    vmv_s_x_test
+  SRCS
+    vmv_s_x_test.cpp
+  LINKOPTS
+   -Xlinker --defsym=__itcm_length__=128K
+  TIMEOUT
+    40
+)
+
+vec_cc_test(
+  NAME
     vlnr_vsnr_test
   SRCS
     vlnr_vsnr_test.cpp
diff --git a/tests/vmv_s_x_test.cpp b/tests/vmv_s_x_test.cpp
new file mode 100644
index 0000000..f7993cc
--- /dev/null
+++ b/tests/vmv_s_x_test.cpp
@@ -0,0 +1,123 @@
+#include <limits.h>
+#include <riscv_vector.h>
+#include <springbok.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <bit>
+#include <tuple>
+#include <type_traits>
+
+#include "pw_unit_test/framework.h"
+#include "test_v_helpers.h"
+
+namespace vmv_s_x_test {
+namespace {
+
+using namespace test_v_helpers;
+
+uint8_t test_vector_1[MAXVL_BYTES];
+uint8_t reference_vector_1[MAXVL_BYTES];
+
+class VmvsxTest : public ::testing::Test {
+ protected:
+  void SetUp() override { zero_vector_registers(); }
+  void TearDown() override { zero_vector_registers(); }
+};
+
+template <typename T>
+void vmv_s_x_test(const int32_t test_value) {
+  for (int lmul = LMUL_M1; lmul <= LMUL_M8; lmul++) {
+    // TODO(gkielian): increase upper bound to LMUL_MF2 after support is added
+    // TODO(gkielian): skip lmul==4 (unused) on adding fractional lmul to range
+
+    for (int32_t i = 0; i < AVL_COUNT; i++) {
+      int32_t avl = AVLS[i];
+      int32_t vlmax;
+      int32_t vl;
+
+      // reset vector elements to zero and perform appropriate vsetvl
+      std::tie(vlmax, vl) = vector_test_setup<T>(
+          static_cast<VLMUL>(lmul), avl, {test_vector_1, reference_vector_1});
+      if (avl > vlmax) {
+        continue;
+      }
+
+      // recast test and ref vector to appropriate sew
+      T *test_ptr_vec = reinterpret_cast<T *>(test_vector_1);
+      T *ref_ptr_vec = reinterpret_cast<T *>(reference_vector_1);
+
+      // populate test and ref vectors
+      fill_random_vector<T>(ref_ptr_vec, avl);
+
+      // load test_value to scalar register
+      __asm__ volatile("lw x5, (%0)" : : "r"(&test_value));
+
+      switch (sizeof(T)) {
+        case sizeof(int8_t):
+          __asm__ volatile("vle8.v v8, (%0)" : : "r"(ref_ptr_vec));
+          break;
+        case sizeof(int16_t):
+          __asm__ volatile("vle16.v v8, (%0)" : : "r"(ref_ptr_vec));
+          break;
+        case sizeof(int32_t):
+          __asm__ volatile("vle32.v v8, (%0)" : : "r"(ref_ptr_vec));
+          break;
+        default:
+          EXPECT_TRUE(false);
+          break;
+      }
+
+      // load value of x5 into v8[0], truncating x5 to fit into the current SEW
+      __asm__ volatile("vmv.s.x v8, x5");
+
+      switch (sizeof(T)) {
+        case sizeof(int8_t):
+          __asm__ volatile("vse8.v v8, (%0)" : : "r"(test_ptr_vec));
+          break;
+        case sizeof(int16_t):
+          __asm__ volatile("vse16.v v8, (%0)" : : "r"(test_ptr_vec));
+          break;
+        case sizeof(int32_t):
+          __asm__ volatile("vse32.v v8, (%0)" : : "r"(test_ptr_vec));
+          break;
+        default:
+          EXPECT_TRUE(false);
+          break;
+      }
+
+      // perform operations on reference vector
+      ref_ptr_vec[0] = static_cast<T>(test_value);
+
+      assert_vec_elem_eq<T>(vlmax, test_ptr_vec, ref_ptr_vec);
+    }
+  }
+}
+
+TEST_F(VmvsxTest, vmv_s_x_test_e8) {
+  vmv_s_x_test<int8_t>(INT32_MIN);
+  vmv_s_x_test<int8_t>(INT32_MAX);
+  vmv_s_x_test<int8_t>(INT8_MIN);
+  vmv_s_x_test<int8_t>(INT8_MAX);
+  vmv_s_x_test<int8_t>(0);
+}
+
+TEST_F(VmvsxTest, vmv_s_x_test_e16) {
+  vmv_s_x_test<int16_t>(INT32_MIN);
+  vmv_s_x_test<int16_t>(INT32_MAX);
+  vmv_s_x_test<int16_t>(INT8_MIN);
+  vmv_s_x_test<int16_t>(INT8_MAX);
+  vmv_s_x_test<int16_t>(0);
+}
+
+TEST_F(VmvsxTest, vmv_s_x_test_e32) {
+  vmv_s_x_test<int32_t>(INT32_MIN);
+  vmv_s_x_test<int32_t>(INT32_MAX);
+  vmv_s_x_test<int32_t>(INT8_MIN);
+  vmv_s_x_test<int32_t>(INT8_MAX);
+  vmv_s_x_test<int32_t>(0);
+}
+
+}  // namespace
+}  // namespace vmv_s_x_test
+