Add vector_vadd_vsub_tests.

* Break the test into VV, VX, and VI based on operands so it fits in ITCM.

Change-Id: I3c0e9dfba1a272116e6b509241156e255a33ab7f
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3a6f209..556308a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,3 +16,4 @@
 add_subdirectory(hello_vec)
 add_subdirectory(vector_tests)
 add_subdirectory(vector_load_tests)
+add_subdirectory(vector_vadd_vsub_tests)
\ No newline at end of file
diff --git a/springbok/CMakeLists.txt b/springbok/CMakeLists.txt
index 2abc1af..5cc98c4 100644
--- a/springbok/CMakeLists.txt
+++ b/springbok/CMakeLists.txt
@@ -22,7 +22,7 @@
     -Wall
     -Werror
     -std=c11
-    -O3
+    -Os
     -g3
     -ggdb
     -ffreestanding
diff --git a/vector_vadd_vsub_tests/CMakeLists.txt b/vector_vadd_vsub_tests/CMakeLists.txt
new file mode 100644
index 0000000..1b7af84
--- /dev/null
+++ b/vector_vadd_vsub_tests/CMakeLists.txt
@@ -0,0 +1,43 @@
+cmake_minimum_required(VERSION 3.10)
+
+set (OPERAND_TYPES VV VX VI)
+
+foreach(OPERAND_TYPE ${OPERAND_TYPES})
+project(vector_vadd_vsub_tests_${OPERAND_TYPE})
+
+string(TOLOWER ${OPERAND_TYPE} OP_SUFFIX)
+
+set(TARGET vector_vadd_vsub_tests_${OP_SUFFIX})
+set(ELF ${TARGET}.elf)
+
+add_executable(${ELF} vector_vadd_vsub_tests.c)
+
+target_include_directories(${ELF} PUBLIC include)
+
+set_target_properties(${ELF} PROPERTIES LINK_DEPENDS "${LINKER_SCRIPT}")
+
+target_link_libraries(${ELF} vector_tests)
+
+set_target_properties(
+	${ELF}
+	PROPERTIES
+	LINK_FLAGS
+	"-T${LINKER_SCRIPT} \
+	 -specs=nano.specs \
+	 -Wl,--gc-sections \
+	 -Wl,--print-memory-usage \
+	 -Wl,-Map=${PROJECT_NAME}.map")
+
+target_compile_options(${ELF} PUBLIC
+	-nostdlib
+	-ffreestanding
+	-ffunction-sections
+	-Wall
+	-Werror
+    -Wno-unused-value
+	-std=gnu11
+	-O3
+	-g
+    -D=TEST_${OPERAND_TYPE})
+
+endforeach()
\ No newline at end of file
diff --git a/vector_vadd_vsub_tests/include/vector_vadd_vsub_tests.h b/vector_vadd_vsub_tests/include/vector_vadd_vsub_tests.h
new file mode 100644
index 0000000..3fd35f4
--- /dev/null
+++ b/vector_vadd_vsub_tests/include/vector_vadd_vsub_tests.h
@@ -0,0 +1,15 @@
+#ifndef SHODAN_SW_VECTOR_TESTS_VECTOR_VADD_VSUB_TESTS_H_
+#define SHODAN_SW_VECTOR_TESTS_VECTOR_VADD_VSUB_TESTS_H_
+
+#include "test_vector.h"
+#include "common_vector_test.h"
+
+void test_vector_vadd_vv(void);
+void test_vector_vsub_vv(void);
+void test_vector_vadd_vx(void);
+void test_vector_vsub_vx(void);
+void test_vector_vadd_vi(void);
+void test_vector_vrsub_vx(void);
+void test_vector_vrsub_vi(void);
+
+#endif
diff --git a/vector_vadd_vsub_tests/vector_vadd_vsub_tests.c b/vector_vadd_vsub_tests/vector_vadd_vsub_tests.c
new file mode 100644
index 0000000..30895f3
--- /dev/null
+++ b/vector_vadd_vsub_tests/vector_vadd_vsub_tests.c
@@ -0,0 +1,455 @@
+#include <assert.h>
+#include <string.h>
+
+#include "vector_vadd_vsub_tests.h"
+#include <springbok.h>
+
+
+static void randomize_array(void *, size_t, size_t);
+static void check_array_equality(void *, void *, size_t);
+static void calculate_expected_output(void *, void *, void *, uint32_t, uint8_t,
+                                      const char *, const char *);
+
+#define SETUP_TEST(VTYPE, BUFFER_SIZE, VL_DST) \
+  do {                                         \
+    VSET(BUFFER_SIZE, VTYPE, m1);              \
+    COPY_SCALAR_REG("t0", VL_DST);             \
+    LOG_INFO(#VL_DST " = %lu", VL_DST);         \
+  } while (0)
+
+#define MAKE_TEST(VTYPE, OPERATION, SUBOPERATION, DATATYPE)                             \
+  void                                                                                  \
+      run_operations_test_vector_##OPERATION##_##SUBOPERATION##_##VTYPE##_##DATATYPE(   \
+          void *output, void *input1, void *input2) {                                   \
+    __asm__ volatile("vl" #VTYPE ".v v1, (%0)" ::"r"(input1));                          \
+    if (strequal(#SUBOPERATION, "vv")) {                                                \
+      __asm__ volatile("vl" #VTYPE ".v v2, (%0)" ::"r"(input2));                        \
+      if (strequal(#OPERATION, "vadd")) {                                               \
+        __asm__ volatile("vadd.vv v3, v1, v2");                                         \
+      }                                                                                 \
+      if (strequal(#OPERATION, "vsub")) {                                               \
+        __asm__ volatile("vsub.vv v3, v1, v2");                                         \
+      }                                                                                 \
+    } else if (strequal(#SUBOPERATION, "vx")) {                                         \
+      uint32_t unsigned_scalar = *((uint32_t *)input2);                                 \
+      int32_t signed_scalar = *((int32_t *)input2);                                     \
+      if (strequal(#OPERATION, "vadd")) {                                               \
+        if (*(#DATATYPE) == 'u') {                                                      \
+          __asm__ volatile(                                                             \
+              "vadd.vx v3, v1, %[RS1]" ::[RS1] "r"(unsigned_scalar));                   \
+        } else {                                                                        \
+          __asm__ volatile(                                                             \
+              "vadd.vx v3, v1, %[RS1]" ::[RS1] "r"(signed_scalar));                     \
+        }                                                                               \
+      }                                                                                 \
+      if (strequal(#OPERATION, "vsub")) {                                               \
+        if (*(#DATATYPE) == 'u') {                                                      \
+          __asm__ volatile(                                                             \
+              "vsub.vx v3, v1, %[RS1]" ::[RS1] "r"(unsigned_scalar));                   \
+        } else {                                                                        \
+          __asm__ volatile(                                                             \
+              "vsub.vx v3, v1, %[RS1]" ::[RS1] "r"(signed_scalar));                     \
+        }                                                                               \
+      }                                                                                 \
+      if (strequal(#OPERATION, "vrsub")) {                                              \
+        if (*(#DATATYPE) == 'u') {                                                      \
+          __asm__ volatile(                                                             \
+              "vrsub.vx v3, v1, %[RS1]" ::[RS1] "r"(unsigned_scalar));                  \
+        } else {                                                                        \
+          __asm__ volatile(                                                             \
+              "vrsub.vx v3, v1, %[RS1]" ::[RS1] "r"(signed_scalar));                    \
+        }                                                                               \
+      }                                                                                 \
+    } else if (strequal(#SUBOPERATION, "vi")) {                                         \
+      int32_t input2_ = *((int32_t *)input2);                                           \
+      switch (input2_) {                                                                \
+        case -16:                                                                       \
+          if (strequal(#OPERATION, "vadd"))                                             \
+            __asm__ volatile("vadd.vi v3, v1, -16");                                    \
+          if (strequal(#OPERATION, "vrsub"))                                            \
+            __asm__ volatile("vrsub.vi v3, v1, -16");                                   \
+          break;                                                                        \
+        case 15:                                                                        \
+          if (strequal(#OPERATION, "vadd"))                                             \
+            __asm__ volatile("vadd.vi v3, v1, 15");                                     \
+          if (strequal(#OPERATION, "vrsub"))                                            \
+            __asm__ volatile("vrsub.vi v3, v1, 15");                                    \
+          break;                                                                        \
+        default:                                                                        \
+          assert(("unhandled intermediate for " #OPERATION ".vi", false));                \
+      }                                                                                 \
+    }                                                                                   \
+    __asm__ volatile("vs" #VTYPE ".v v3, (%0)" ::"r"(output));                          \
+  }                                                                                     \
+                                                                                        \
+  void test_vector_##OPERATION##_##SUBOPERATION##_##VTYPE##_##DATATYPE(void) {          \
+    LOG_INFO("%s", __FUNCTION__);                                                       \
+    uint32_t allocation_size = 256;                                                     \
+    DATATYPE input1[allocation_size];                                                   \
+    DATATYPE input2[allocation_size]; /* for vadd.vx/vsub.vx treat input2[0]            \
+                                         as the scalar */                               \
+    uint32_t unsigned_scalar = (uint32_t)random64();                                    \
+    int32_t signed_scalar = *((int32_t *)&unsigned_scalar);                             \
+    DATATYPE observed_output[allocation_size];                                          \
+    DATATYPE expected_output[allocation_size];                                          \
+    uint32_t new_vl;                                                                    \
+    SETUP_TEST(VTYPE, allocation_size, new_vl);                                         \
+    randomize_array(input1, new_vl, 8 * sizeof(DATATYPE));                              \
+    randomize_array(input2, new_vl, 8 * sizeof(DATATYPE));                              \
+    if (strequal(#SUBOPERATION, "vi")) {                                                \
+      int32_t immediate_inputs[] = {-16, 15};                                           \
+      for (int i = 0;                                                                   \
+           i < sizeof(immediate_inputs) / sizeof(*immediate_inputs); i++) {             \
+        run_operations_test_vector_##OPERATION##_##SUBOPERATION##_##VTYPE##_##DATATYPE( \
+            observed_output, input1, &immediate_inputs[i]);                             \
+        calculate_expected_output(                                                      \
+            expected_output, input1, &immediate_inputs[i], new_vl,                      \
+            sizeof(DATATYPE), #DATATYPE, #OPERATION "." #SUBOPERATION);                 \
+        check_array_equality(observed_output, expected_output,                          \
+                             new_vl * sizeof(DATATYPE));                                \
+      }                                                                                 \
+    } else if (strequal(#SUBOPERATION, "vx")) {                                         \
+      run_operations_test_vector_##OPERATION##_##SUBOPERATION##_##VTYPE##_##DATATYPE(   \
+          observed_output, input1,                                                      \
+          (*(#DATATYPE) == 'u') ? (void *)&unsigned_scalar                              \
+                                : (void *)&signed_scalar);                              \
+      calculate_expected_output(                                                        \
+          expected_output, input1,                                                      \
+          (*(#DATATYPE) == 'u') ? (void *)&unsigned_scalar                              \
+                                : (void *)&signed_scalar,                               \
+          new_vl, sizeof(DATATYPE), #DATATYPE, #OPERATION "." #SUBOPERATION);           \
+      check_array_equality(observed_output, expected_output,                            \
+                           new_vl * sizeof(DATATYPE));                                  \
+    } else {                                                                            \
+      run_operations_test_vector_##OPERATION##_##SUBOPERATION##_##VTYPE##_##DATATYPE(   \
+          observed_output, input1, input2);                                             \
+      calculate_expected_output(expected_output, input1, input2, new_vl,                \
+                                sizeof(DATATYPE), #DATATYPE,                            \
+                                #OPERATION "." #SUBOPERATION);                          \
+      check_array_equality(expected_output, observed_output,                            \
+                           new_vl * sizeof(DATATYPE));                                  \
+    }                                                                                   \
+  }
+
+bool test_vector(void) {
+  LOG_INFO("%s", __FUNCTION__);
+  for (int i = 0; i < 100; i++) {
+#ifdef TEST_VV
+    test_vector_vadd_vv();
+    test_vector_vsub_vv();
+#endif
+#ifdef TEST_VX
+    test_vector_vadd_vx();
+    test_vector_vsub_vx();
+    test_vector_vrsub_vx();
+#endif
+#ifdef TEST_VI
+    test_vector_vadd_vi();
+    test_vector_vrsub_vi();
+#endif
+  }
+  return true;
+}
+
+MAKE_TEST(e8, vadd, vv, uint8_t);
+MAKE_TEST(e8, vadd, vv, int8_t);
+MAKE_TEST(e16, vadd, vv, uint16_t);
+MAKE_TEST(e16, vadd, vv, int16_t);
+MAKE_TEST(e32, vadd, vv, uint32_t);
+MAKE_TEST(e32, vadd, vv, int32_t);
+MAKE_TEST(e64, vadd, vv, uint64_t);
+MAKE_TEST(e64, vadd, vv, int64_t);
+void test_vector_vadd_vv(void) {
+  // TODO(julianmb): test signed + unsigned
+  test_vector_vadd_vv_e8_uint8_t();
+  test_vector_vadd_vv_e8_int8_t();
+  test_vector_vadd_vv_e16_uint16_t();
+  test_vector_vadd_vv_e16_int16_t();
+  test_vector_vadd_vv_e32_uint32_t();
+  test_vector_vadd_vv_e32_int32_t();
+  test_vector_vadd_vv_e64_uint64_t();
+  test_vector_vadd_vv_e64_int64_t();
+}
+
+MAKE_TEST(e8, vsub, vv, uint8_t);
+MAKE_TEST(e8, vsub, vv, int8_t);
+MAKE_TEST(e16, vsub, vv, uint16_t);
+MAKE_TEST(e16, vsub, vv, int16_t);
+MAKE_TEST(e32, vsub, vv, uint32_t);
+MAKE_TEST(e32, vsub, vv, int32_t);
+MAKE_TEST(e64, vsub, vv, uint64_t);
+MAKE_TEST(e64, vsub, vv, int64_t);
+void test_vector_vsub_vv(void) {
+  // TODO(julianmb): test signed + unsigned
+  test_vector_vsub_vv_e8_uint8_t();
+  test_vector_vsub_vv_e8_int8_t();
+  test_vector_vsub_vv_e16_uint16_t();
+  test_vector_vsub_vv_e16_int16_t();
+  test_vector_vsub_vv_e32_uint32_t();
+  test_vector_vsub_vv_e32_int32_t();
+  test_vector_vsub_vv_e64_uint64_t();
+  test_vector_vsub_vv_e64_int64_t();
+}
+
+MAKE_TEST(e8, vadd, vx, uint8_t);
+MAKE_TEST(e8, vadd, vx, int8_t);
+MAKE_TEST(e16, vadd, vx, uint16_t);
+MAKE_TEST(e16, vadd, vx, int16_t);
+MAKE_TEST(e32, vadd, vx, uint32_t);
+MAKE_TEST(e32, vadd, vx, int32_t);
+MAKE_TEST(e64, vadd, vx, uint64_t);
+MAKE_TEST(e64, vadd, vx, int64_t);
+void test_vector_vadd_vx(void) {
+  LOG_INFO("%s", __FUNCTION__);
+  // TODO(julianmb): test signed + unsigned
+  test_vector_vadd_vx_e8_uint8_t();
+  test_vector_vadd_vx_e8_int8_t();
+  test_vector_vadd_vx_e16_uint16_t();
+  test_vector_vadd_vv_e16_int16_t();
+  test_vector_vadd_vx_e32_uint32_t();
+  test_vector_vadd_vx_e32_int32_t();
+  // test_vector_vadd_vx_e64_uint64_t();
+  test_vector_vadd_vx_e64_int64_t();
+}
+
+MAKE_TEST(e8, vsub, vx, uint8_t);
+MAKE_TEST(e8, vsub, vx, int8_t);
+MAKE_TEST(e16, vsub, vx, uint16_t);
+MAKE_TEST(e16, vsub, vx, int16_t);
+MAKE_TEST(e32, vsub, vx, uint32_t);
+MAKE_TEST(e32, vsub, vx, int32_t);
+MAKE_TEST(e64, vsub, vx, uint64_t);
+MAKE_TEST(e64, vsub, vx, int64_t);
+void test_vector_vsub_vx(void) {
+  LOG_INFO("%s", __FUNCTION__);
+  // TODO(julianmb): test signed + unsigned
+  test_vector_vsub_vx_e8_uint8_t();
+  test_vector_vsub_vx_e8_int8_t();
+  test_vector_vsub_vx_e16_uint16_t();
+  test_vector_vsub_vv_e16_int16_t();
+  test_vector_vsub_vx_e32_uint32_t();
+  test_vector_vsub_vx_e32_int32_t();
+  // test_vector_vsub_vx_e64_uint64_t();
+  test_vector_vsub_vx_e64_int64_t();
+}
+
+MAKE_TEST(e8, vadd, vi, uint8_t);
+MAKE_TEST(e8, vadd, vi, int8_t);
+MAKE_TEST(e16, vadd, vi, uint16_t);
+MAKE_TEST(e16, vadd, vi, int16_t);
+MAKE_TEST(e32, vadd, vi, uint32_t);
+MAKE_TEST(e32, vadd, vi, int32_t);
+MAKE_TEST(e64, vadd, vi, uint64_t);
+MAKE_TEST(e64, vadd, vi, int64_t);
+void test_vector_vadd_vi(void) {
+  LOG_INFO("%s", __FUNCTION__);
+  // TODO(julianmb): test signed + unsigned
+  test_vector_vadd_vi_e8_uint8_t();
+  test_vector_vadd_vi_e8_int8_t();
+  test_vector_vadd_vi_e16_uint16_t();
+  test_vector_vadd_vi_e16_int16_t();
+  test_vector_vadd_vi_e32_uint32_t();
+  test_vector_vadd_vi_e32_int32_t();
+  test_vector_vadd_vi_e64_uint64_t();
+  test_vector_vadd_vi_e64_int64_t();
+}
+
+MAKE_TEST(e8, vrsub, vx, uint8_t);
+MAKE_TEST(e8, vrsub, vx, int8_t);
+MAKE_TEST(e16, vrsub, vx, uint16_t);
+MAKE_TEST(e16, vrsub, vx, int16_t);
+MAKE_TEST(e32, vrsub, vx, uint32_t);
+MAKE_TEST(e32, vrsub, vx, int32_t);
+MAKE_TEST(e64, vrsub, vx, uint64_t);
+MAKE_TEST(e64, vrsub, vx, int64_t);
+void test_vector_vrsub_vx(void) {
+  LOG_INFO("%s", __FUNCTION__);
+  // TODO(julianmb): test signed + unsigned
+  test_vector_vrsub_vx_e8_uint8_t();
+  test_vector_vrsub_vx_e8_int8_t();
+  test_vector_vrsub_vx_e16_uint16_t();
+  test_vector_vrsub_vx_e16_int16_t();
+  test_vector_vrsub_vx_e32_uint32_t();
+  test_vector_vrsub_vx_e32_int32_t();
+  // test_vector_vrsub_vx_e64_uint64_t();
+  test_vector_vrsub_vx_e64_int64_t();
+}
+
+MAKE_TEST(e8, vrsub, vi, uint8_t);
+MAKE_TEST(e8, vrsub, vi, int8_t);
+MAKE_TEST(e16, vrsub, vi, uint16_t);
+MAKE_TEST(e16, vrsub, vi, int16_t);
+MAKE_TEST(e32, vrsub, vi, uint32_t);
+MAKE_TEST(e32, vrsub, vi, int32_t);
+MAKE_TEST(e64, vrsub, vi, uint64_t);
+MAKE_TEST(e64, vrsub, vi, int64_t);
+void test_vector_vrsub_vi(void) {
+  LOG_INFO("%s", __FUNCTION__);
+  // TODO(julianmb): test signed + unsigned
+  test_vector_vrsub_vi_e8_uint8_t();
+  test_vector_vrsub_vi_e8_int8_t();
+  test_vector_vrsub_vi_e16_uint16_t();
+  test_vector_vrsub_vi_e16_int16_t();
+  test_vector_vrsub_vi_e32_uint32_t();
+  test_vector_vrsub_vi_e32_int32_t();
+  test_vector_vrsub_vi_e64_uint64_t();
+  test_vector_vrsub_vi_e64_int64_t();
+}
+
+static void randomize_array(void *array, size_t length, size_t element_size) {
+  uint8_t *array8 = (uint8_t *)array;
+  for (int i = 0; i < ((length * element_size) >> 3); i++) {
+    array8[i] = (uint8_t)random64();
+  }
+}
+
+static void check_array_equality(void *array1, void *array2,
+                                 size_t byte_count) {
+  assert(memcmp(array1, array2, byte_count) == 0);
+}
+
+#define VADD_VV_EXPECTED_OUTPUT(DTYPE)                              \
+  do {                                                              \
+    if (strequal("vadd.vv", operation_str)) {                       \
+      *((DTYPE *)output) = *((DTYPE *)input1) + *((DTYPE *)input2); \
+    }                                                               \
+  } while (0)
+
+#define VSUB_VV_EXPECTED_OUTPUT(DTYPE)                              \
+  do {                                                              \
+    if (strequal("vsub.vv", operation_str)) {                       \
+      *((DTYPE *)output) = *((DTYPE *)input1) - *((DTYPE *)input2); \
+    }                                                               \
+  } while (0)
+
+#define VADD_VX_EXPECTED_OUTPUT(DTYPE)                                   \
+  do {                                                                   \
+    if (strequal("vadd.vx", operation_str) && *(#DTYPE) == 'u') {        \
+      *((DTYPE *)output) = *((DTYPE *)input1) + *((uint32_t *)input2);   \
+    } else if (strequal("vadd.vx", operation_str) && *(#DTYPE) == 'i') { \
+      *((DTYPE *)output) = *((DTYPE *)input1) + *((int32_t *)input2);    \
+    }                                                                    \
+  } while (0)
+
+#define VSUB_VX_EXPECTED_OUTPUT(DTYPE)                                   \
+  do {                                                                   \
+    if (strequal("vsub.vx", operation_str) && *(#DTYPE) == 'u') {        \
+      *((DTYPE *)output) = *((DTYPE *)input1) - *((uint32_t *)input2);   \
+    } else if (strequal("vsub.vx", operation_str) && *(#DTYPE) == 'i') { \
+      *((DTYPE *)output) = *((DTYPE *)input1) - *((int32_t *)input2);    \
+    }                                                                    \
+  } while (0)
+
+#define VADD_VI_EXPECTED_OUTPUT(DTYPE)                                \
+  do {                                                                \
+    if (strequal("vadd.vi", operation_str)) {                         \
+      *((DTYPE *)output) = *((DTYPE *)input1) + *((int32_t *)input2); \
+    }                                                                 \
+  } while (0)
+
+#define VRSUB_VX_EXPECTED_OUTPUT(DTYPE)                                   \
+  do {                                                                    \
+    if (strequal("vrsub.vx", operation_str) && *(#DTYPE) == 'u') {        \
+      *((DTYPE *)output) = *((uint32_t *)input2) - *((DTYPE *)input1);    \
+    } else if (strequal("vrsub.vx", operation_str) && *(#DTYPE) == 'i') { \
+      *((DTYPE *)output) = *((int32_t *)input2) - *((DTYPE *)input1);     \
+    }                                                                     \
+  } while (0)
+
+#define VRSUB_VI_EXPECTED_OUTPUT(DTYPE)                               \
+  do {                                                                \
+    if (strequal("vrsub.vi", operation_str)) {                        \
+      *((DTYPE *)output) = *((int32_t *)input2) - *((DTYPE *)input1); \
+    }                                                                 \
+  } while (0)
+
+static void calculate_expected_output(void *output, void *input1, void *input2,
+                                      uint32_t len, uint8_t sizeof_datatype,
+                                      const char *datatype_str,
+                                      const char *operation_str) {
+  uint8_t output_inc = sizeof_datatype;
+  uint8_t input1_inc = sizeof_datatype;
+  uint8_t input2_inc = sizeof_datatype;
+  uint8_t input2_inc_mul =
+      strequal("vadd.vv", operation_str) || strequal("vsub.vv", operation_str)
+          ? 1
+          : 0;
+
+  for (int i = 0; i < len; i++) {
+    if (strequal("uint8_t", datatype_str)) {
+      VADD_VV_EXPECTED_OUTPUT(uint8_t);
+      VSUB_VV_EXPECTED_OUTPUT(uint8_t);
+      VADD_VX_EXPECTED_OUTPUT(uint8_t);
+      VSUB_VX_EXPECTED_OUTPUT(uint8_t);
+      VADD_VI_EXPECTED_OUTPUT(uint8_t);
+      VRSUB_VX_EXPECTED_OUTPUT(uint8_t);
+      VRSUB_VI_EXPECTED_OUTPUT(uint8_t);
+    }
+    if (strequal("int8_t", datatype_str)) {
+      VADD_VV_EXPECTED_OUTPUT(int8_t);
+      VSUB_VV_EXPECTED_OUTPUT(int8_t);
+      VADD_VX_EXPECTED_OUTPUT(int8_t);
+      VSUB_VX_EXPECTED_OUTPUT(int8_t);
+      VADD_VI_EXPECTED_OUTPUT(int8_t);
+      VRSUB_VX_EXPECTED_OUTPUT(int8_t);
+      VRSUB_VI_EXPECTED_OUTPUT(int8_t);
+    }
+    if (strequal("uint16_t", datatype_str)) {
+      VADD_VV_EXPECTED_OUTPUT(uint16_t);
+      VSUB_VV_EXPECTED_OUTPUT(uint16_t);
+      VADD_VX_EXPECTED_OUTPUT(uint16_t);
+      VSUB_VX_EXPECTED_OUTPUT(uint16_t);
+      VADD_VI_EXPECTED_OUTPUT(uint16_t);
+      VRSUB_VX_EXPECTED_OUTPUT(uint16_t);
+      VRSUB_VI_EXPECTED_OUTPUT(uint16_t);
+    }
+    if (strequal("int16_t", datatype_str)) {
+      VADD_VV_EXPECTED_OUTPUT(int16_t);
+      VSUB_VV_EXPECTED_OUTPUT(int16_t);
+      VADD_VX_EXPECTED_OUTPUT(int16_t);
+      VSUB_VX_EXPECTED_OUTPUT(int16_t);
+      VADD_VI_EXPECTED_OUTPUT(int16_t);
+      VRSUB_VX_EXPECTED_OUTPUT(int16_t);
+      VRSUB_VI_EXPECTED_OUTPUT(int16_t);
+    }
+    if (strequal("uint32_t", datatype_str)) {
+      VADD_VV_EXPECTED_OUTPUT(uint32_t);
+      VSUB_VV_EXPECTED_OUTPUT(uint32_t);
+      VADD_VX_EXPECTED_OUTPUT(uint32_t);
+      VSUB_VX_EXPECTED_OUTPUT(uint32_t);
+      VADD_VI_EXPECTED_OUTPUT(uint32_t);
+      VRSUB_VX_EXPECTED_OUTPUT(uint32_t);
+      VRSUB_VI_EXPECTED_OUTPUT(uint32_t);
+    }
+    if (strequal("int32_t", datatype_str)) {
+      VADD_VV_EXPECTED_OUTPUT(int32_t);
+      VSUB_VV_EXPECTED_OUTPUT(int32_t);
+      VADD_VX_EXPECTED_OUTPUT(int32_t);
+      VSUB_VX_EXPECTED_OUTPUT(int32_t);
+      VADD_VI_EXPECTED_OUTPUT(int32_t);
+      VRSUB_VX_EXPECTED_OUTPUT(int32_t);
+      VRSUB_VI_EXPECTED_OUTPUT(int32_t);
+    }
+    if (strequal("uint64_t", datatype_str)) {
+      VADD_VV_EXPECTED_OUTPUT(uint64_t);
+      VSUB_VV_EXPECTED_OUTPUT(uint64_t);
+      VADD_VX_EXPECTED_OUTPUT(uint64_t);
+      VSUB_VX_EXPECTED_OUTPUT(uint64_t);
+      VADD_VI_EXPECTED_OUTPUT(uint64_t);
+      VRSUB_VX_EXPECTED_OUTPUT(uint64_t);
+      VRSUB_VI_EXPECTED_OUTPUT(uint64_t);
+    }
+    if (strequal("int64_t", datatype_str)) {
+      VADD_VV_EXPECTED_OUTPUT(int64_t);
+      VSUB_VV_EXPECTED_OUTPUT(int64_t);
+      VADD_VX_EXPECTED_OUTPUT(int64_t);
+      VSUB_VX_EXPECTED_OUTPUT(int64_t);
+      VADD_VI_EXPECTED_OUTPUT(int64_t);
+      VRSUB_VX_EXPECTED_OUTPUT(int64_t);
+      VRSUB_VI_EXPECTED_OUTPUT(int64_t);
+    }
+    output += output_inc;
+    input1 += input1_inc;
+    input2 += input2_inc_mul * input2_inc;
+  }
+}