diff --git a/crt/BUILD b/crt/BUILD
index 72a00c5..acb27d3 100644
--- a/crt/BUILD
+++ b/crt/BUILD
@@ -7,3 +7,11 @@
     "kelvin_gloss.cc",
     "kelvin_start.S",
 ])
+
+cc_library(
+    name = "crt_header",
+    hdrs = [
+        "kelvin.h",
+        "kelvin_intrinsics.h",
+    ],
+)
diff --git a/crt/CPPLINT.cfg b/crt/CPPLINT.cfg
new file mode 100644
index 0000000..a253cf5
--- /dev/null
+++ b/crt/CPPLINT.cfg
@@ -0,0 +1,3 @@
+set noparent
+
+exclude_files=kelvin_intrinsics.h
diff --git a/crt/kelvin.h b/crt/kelvin.h
new file mode 100644
index 0000000..cbf5578
--- /dev/null
+++ b/crt/kelvin.h
@@ -0,0 +1,82 @@
+// Copyright 2023 Google LLC
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Kelvin helper header
+
+#ifndef CRT_KELVIN_H_
+#define CRT_KELVIN_H_
+
+#include <math.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <algorithm>
+
+#define __volatile_always__ volatile
+
+// Helper macros for Intrinsics definitions.
+#define ARGS_F_A(FN, A0) FN " " #A0 "\n"
+#define ARGS_F_A_A(FN, A0, A1) FN " " #A0 ", " #A1 "\n"
+#define ARGS_F_A_A_A(FN, A0, A1, A2) FN " " #A0 ", " #A1 ", " #A2 "\n"
+#define ARGS_F_A_A_A_A(FN, A0, A1, A2, A3) \
+  FN " " #A0 ", " #A1 ", " #A2 ", " #A3 "\n"
+
+#include "crt/kelvin_intrinsics.h"
+
+#define vm0 v0
+#define vm1 v4
+#define vm2 v8
+#define vm3 v12
+#define vm4 v16
+#define vm5 v20
+#define vm6 v24
+#define vm7 v28
+#define vm8 v32
+#define vm9 v36
+#define vm10 v40
+#define vm11 v44
+#define vm12 v48
+#define vm13 v52
+#define vm14 v56
+#define vm15 v60
+
+// Stop printing the string when \0 is found in the word.
+static inline bool WordHasZero(uint32_t data) {
+  return (((data >> 24) & 0xff) == 0) || (((data >> 16) & 0xff) == 0) ||
+         (((data >> 8) & 0xff) == 0) || ((data & 0xff) == 0);
+}
+
+template <typename T>
+static inline void PrintArg(const T arg) {
+  if (std::is_same<T, const uint8_t *>::value ||
+      std::is_same<T, const char *>::value) {
+    klog(arg);
+  } else if (std::is_same<T, uint8_t *>::value ||
+             std::is_same<T, char *>::value) {
+    const uint32_t *p_str = reinterpret_cast<const uint32_t *>(arg);
+    uint32_t data = 0;
+    do {
+      data = *p_str;
+      p_str++;
+      clog(data);
+    } while (!WordHasZero(data));
+  } else {  // scalar argument.
+    slog(arg);
+  }
+}
+
+// General printf helper function. The c++11 pack expansion + braced-init-list
+// is used to support arbitrary variadic template.
+// The unused list is initialized by expanding the arguments in order, and then
+// processed by `PrintArg`.
+template <typename... Types>
+static inline void printf(const char *format, Types... args) {
+  constexpr auto size = sizeof...(args);
+  if (size > 0) {
+    __attribute__((unused)) int x[] = {0, ((void)PrintArg(args), 0)...};
+  }
+  flog(format);
+}
+
+#endif  // CRT_KELVIN_H_
diff --git a/crt/kelvin_intrinsics.h b/crt/kelvin_intrinsics.h
new file mode 100644
index 0000000..23af097
--- /dev/null
+++ b/crt/kelvin_intrinsics.h
@@ -0,0 +1,1229 @@
+// Copyright 2023 Google LLC
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Kelvin instruction intrinsics
+
+#ifndef CRT_KELVIN_INTRINSICS_H_
+#define CRT_KELVIN_INTRINSICS_H_
+// clang-format off
+#define eexit()                     __asm__ __volatile_always__("eexit");
+#define eyield()                    __asm__ __volatile_always__("eyield");
+#define ectxsw()                    __asm__ __volatile_always__("ectxsw");
+#define mpause()                    __asm__ __volatile_always__("mpause");
+#define flog(s)                     __asm__ __volatile_always__(ARGS_F_A("flog", %0) : : "r"(s))
+#define slog(s)                     __asm__ __volatile_always__(ARGS_F_A("slog", %0) : : "r"(s))
+#define clog(s)                     __asm__ __volatile_always__(ARGS_F_A("clog", %0) : : "r"(s))
+#define klog(s)                     __asm__ __volatile_always__(ARGS_F_A("klog", %0) : : "r"(s))
+#define flushall()                  __asm__ __volatile__("flushall");
+#define flushat(s)                  __asm__ __volatile__(ARGS_F_A("flushat", %0) : : "r"(s))
+#define getmaxvl_b(d)               __asm__ __volatile__(ARGS_F_A("getmaxvl.b", %0) : "=r"(d) : )
+#define getmaxvl_h(d)               __asm__ __volatile__(ARGS_F_A("getmaxvl.h", %0) : "=r"(d) : )
+#define getmaxvl_w(d)               __asm__ __volatile__(ARGS_F_A("getmaxvl.w", %0) : "=r"(d) : )
+#define getvl_b_x(d, s)             __asm__ __volatile__(ARGS_F_A_A("getvl.b.x", %0, %1) : "=r"(d) : "r"(s))
+#define getvl_h_x(d, s)             __asm__ __volatile__(ARGS_F_A_A("getvl.h.x", %0, %1) : "=r"(d) : "r"(s))
+#define getvl_w_x(d, s)             __asm__ __volatile__(ARGS_F_A_A("getvl.w.x", %0, %1) : "=r"(d) : "r"(s))
+#define getvl_b_xx(d, s, t)         __asm__ __volatile__(ARGS_F_A_A_A("getvl.b.xx", %0, %1, %2) : "=r"(d) : "r"(s), "r"(t))
+#define getvl_h_xx(d, s, t)         __asm__ __volatile__(ARGS_F_A_A_A("getvl.h.xx", %0, %1, %2) : "=r"(d) : "r"(s), "r"(t))
+#define getvl_w_xx(d, s, t)         __asm__ __volatile__(ARGS_F_A_A_A("getvl.w.xx", %0, %1, %2) : "=r"(d) : "r"(s), "r"(t))
+#define getmaxvl_b_m(d)             __asm__ __volatile__(ARGS_F_A("getmaxvl.b.m", %0) : "=r"(d) : )
+#define getmaxvl_h_m(d)             __asm__ __volatile__(ARGS_F_A("getmaxvl.h.m", %0) : "=r"(d) : )
+#define getmaxvl_w_m(d)             __asm__ __volatile__(ARGS_F_A("getmaxvl.w.m", %0) : "=r"(d) : )
+#define getvl_b_x_m(d, s)           __asm__ __volatile__(ARGS_F_A_A("getvl.b.x.m", %0, %1) : "=r"(d) : "r"(s))
+#define getvl_h_x_m(d, s)           __asm__ __volatile__(ARGS_F_A_A("getvl.h.x.m", %0, %1) : "=r"(d) : "r"(s))
+#define getvl_w_x_m(d, s)           __asm__ __volatile__(ARGS_F_A_A("getvl.w.x.m", %0, %1) : "=r"(d) : "r"(s))
+#define getvl_b_xx_m(d, s, t)       __asm__ __volatile__(ARGS_F_A_A_A("getvl.b.xx.m", %0, %1, %2) : "=r"(d) : "r"(s), "r"(t))
+#define getvl_h_xx_m(d, s, t)       __asm__ __volatile__(ARGS_F_A_A_A("getvl.h.xx.m", %0, %1, %2) : "=r"(d) : "r"(s), "r"(t))
+#define getvl_w_xx_m(d, s, t)       __asm__ __volatile__(ARGS_F_A_A_A("getvl.w.xx.m", %0, %1, %2) : "=r"(d) : "r"(s), "r"(t))
+// 111 Load/Store
+#define vld_b_x(Vd, s)              __asm__ __volatile_always__(ARGS_F_A_A("vld.b.x", Vd, %0) : : "r"(s) : "memory")
+#define vld_b_p_x(Vd, s)            __asm__ __volatile_always__(ARGS_F_A_A("vld.b.p.x", Vd, %0) : "=r"(s) : "0"(s) : "memory")
+#define vld_b_l_xx(Vd, s, t)        __asm__ __volatile_always__(ARGS_F_A_A_A("vld.b.l.xx", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vld_b_s_xx(Vd, s, t)        __asm__ __volatile_always__(ARGS_F_A_A_A("vld.b.s.xx", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vld_b_p_xx(Vd, s, t)        __asm__ __volatile_always__(ARGS_F_A_A_A("vld.b.p.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_b_lp_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vld.b.lp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_b_sp_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vld.b.sp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_b_tp_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vld.b.tp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_b_x_m(Vd, s)            __asm__ __volatile_always__(ARGS_F_A_A("vld.b.x.m", Vd, %0) : : "r"(s) : "memory")
+#define vld_b_p_x_m(Vd, s)          __asm__ __volatile_always__(ARGS_F_A_A("vld.b.p.x.m", Vd, %0) : "=r"(s) : "0"(s) : "memory")
+#define vld_b_l_xx_m(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vld.b.l.xx.m", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vld_b_s_xx_m(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vld.b.s.xx.m", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vld_b_p_xx_m(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vld.b.p.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_b_lp_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vld.b.lp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_b_sp_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vld.b.sp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_b_tp_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vld.b.tp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_h_x(Vd, s)              __asm__ __volatile_always__(ARGS_F_A_A("vld.h.x", Vd, %0) : : "r"(s) : "memory")
+#define vld_h_p_x(Vd, s)            __asm__ __volatile_always__(ARGS_F_A_A("vld.h.p.x", Vd, %0) : "=r"(s) : "0"(s) : "memory")
+#define vld_h_l_xx(Vd, s, t)        __asm__ __volatile_always__(ARGS_F_A_A_A("vld.h.l.xx", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vld_h_s_xx(Vd, s, t)        __asm__ __volatile_always__(ARGS_F_A_A_A("vld.h.s.xx", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vld_h_p_xx(Vd, s, t)        __asm__ __volatile_always__(ARGS_F_A_A_A("vld.h.p.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_h_lp_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vld.h.lp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_h_sp_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vld.h.sp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_h_tp_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vld.h.tp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_h_x_m(Vd, s)            __asm__ __volatile_always__(ARGS_F_A_A("vld.h.x.m", Vd, %0) : : "r"(s) : "memory")
+#define vld_h_p_x_m(Vd, s)          __asm__ __volatile_always__(ARGS_F_A_A("vld.h.p.x.m", Vd, %0) : "=r"(s) : "0"(s) : "memory")
+#define vld_h_l_xx_m(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vld.h.l.xx.m", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vld_h_s_xx_m(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vld.h.s.xx.m", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vld_h_p_xx_m(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vld.h.p.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_h_lp_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vld.h.lp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_h_sp_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vld.h.sp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_h_tp_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vld.h.tp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_w_x(Vd, s)              __asm__ __volatile_always__(ARGS_F_A_A("vld.w.x", Vd, %0) : : "r"(s) : "memory")
+#define vld_w_p_x(Vd, s)            __asm__ __volatile_always__(ARGS_F_A_A("vld.w.p.x", Vd, %0) : "=r"(s) : "0"(s) : "memory")
+#define vld_w_l_xx(Vd, s, t)        __asm__ __volatile_always__(ARGS_F_A_A_A("vld.w.l.xx", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vld_w_s_xx(Vd, s, t)        __asm__ __volatile_always__(ARGS_F_A_A_A("vld.w.s.xx", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vld_w_p_xx(Vd, s, t)        __asm__ __volatile_always__(ARGS_F_A_A_A("vld.w.p.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_w_lp_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vld.w.lp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_w_sp_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vld.w.sp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_w_tp_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vld.w.tp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_w_x_m(Vd, s)            __asm__ __volatile_always__(ARGS_F_A_A("vld.w.x.m", Vd, %0) : : "r"(s) : "memory")
+#define vld_w_p_x_m(Vd, s)          __asm__ __volatile_always__(ARGS_F_A_A("vld.w.p.x.m", Vd, %0) : "=r"(s) : "0"(s) : "memory")
+#define vld_w_l_xx_m(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vld.w.l.xx.m", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vld_w_s_xx_m(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vld.w.s.xx.m", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vld_w_p_xx_m(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vld.w.p.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_w_lp_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vld.w.lp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_w_sp_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vld.w.sp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vld_w_tp_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vld.w.tp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_b_x(Vd, s)              __asm__ __volatile_always__(ARGS_F_A_A("vst.b.x", Vd, %0) : : "r"(s) : "memory")
+#define vst_b_p_x(Vd, s)            __asm__ __volatile_always__(ARGS_F_A_A("vst.b.p.x", Vd, %0) : "=r"(s) : "0"(s) : "memory")
+#define vst_b_l_xx(Vd, s, t)        __asm__ __volatile_always__(ARGS_F_A_A_A("vst.b.l.xx", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vst_b_s_xx(Vd, s, t)        __asm__ __volatile_always__(ARGS_F_A_A_A("vst.b.s.xx", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vst_b_p_xx(Vd, s, t)        __asm__ __volatile_always__(ARGS_F_A_A_A("vst.b.p.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_b_lp_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vst.b.lp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_b_sp_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vst.b.sp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_b_tp_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vst.b.tp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_b_x_m(Vd, s)            __asm__ __volatile_always__(ARGS_F_A_A("vst.b.x.m", Vd, %0) : : "r"(s) : "memory")
+#define vst_b_p_x_m(Vd, s)          __asm__ __volatile_always__(ARGS_F_A_A("vst.b.p.x.m", Vd, %0) : "=r"(s) : "0"(s) : "memory")
+#define vst_b_l_xx_m(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vst.b.l.xx.m", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vst_b_s_xx_m(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vst.b.s.xx.m", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vst_b_p_xx_m(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vst.b.p.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_b_lp_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vst.b.lp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_b_sp_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vst.b.sp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_b_tp_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vst.b.tp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_h_x(Vd, s)              __asm__ __volatile_always__(ARGS_F_A_A("vst.h.x", Vd, %0) : : "r"(s) : "memory")
+#define vst_h_p_x(Vd, s)            __asm__ __volatile_always__(ARGS_F_A_A("vst.h.p.x", Vd, %0) : "=r"(s) : "0"(s) : "memory")
+#define vst_h_l_xx(Vd, s, t)        __asm__ __volatile_always__(ARGS_F_A_A_A("vst.h.l.xx", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vst_h_s_xx(Vd, s, t)        __asm__ __volatile_always__(ARGS_F_A_A_A("vst.h.s.xx", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vst_h_p_xx(Vd, s, t)        __asm__ __volatile_always__(ARGS_F_A_A_A("vst.h.p.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_h_lp_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vst.h.lp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_h_sp_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vst.h.sp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_h_tp_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vst.h.tp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_h_x_m(Vd, s)            __asm__ __volatile_always__(ARGS_F_A_A("vst.h.x.m", Vd, %0) : : "r"(s) : "memory")
+#define vst_h_p_x_m(Vd, s)          __asm__ __volatile_always__(ARGS_F_A_A("vst.h.p.x.m", Vd, %0) : "=r"(s) : "0"(s) : "memory")
+#define vst_h_l_xx_m(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vst.h.l.xx.m", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vst_h_s_xx_m(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vst.h.s.xx.m", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vst_h_p_xx_m(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vst.h.p.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_h_lp_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vst.h.lp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_h_sp_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vst.h.sp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_h_tp_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vst.h.tp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_w_x(Vd, s)              __asm__ __volatile_always__(ARGS_F_A_A("vst.w.x", Vd, %0) : : "r"(s) : "memory")
+#define vst_w_p_x(Vd, s)            __asm__ __volatile_always__(ARGS_F_A_A("vst.w.p.x", Vd, %0) : "=r"(s) : "0"(s) : "memory")
+#define vst_w_l_xx(Vd, s, t)        __asm__ __volatile_always__(ARGS_F_A_A_A("vst.w.l.xx", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vst_w_s_xx(Vd, s, t)        __asm__ __volatile_always__(ARGS_F_A_A_A("vst.w.s.xx", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vst_w_p_xx(Vd, s, t)        __asm__ __volatile_always__(ARGS_F_A_A_A("vst.w.p.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_w_lp_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vst.w.lp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_w_sp_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vst.w.sp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_w_tp_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vst.w.tp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_w_x_m(Vd, s)            __asm__ __volatile_always__(ARGS_F_A_A("vst.w.x.m", Vd, %0) : : "r"(s) : "memory")
+#define vst_w_p_x_m(Vd, s)          __asm__ __volatile_always__(ARGS_F_A_A("vst.w.p.x.m", Vd, %0) : "=r"(s) : "0"(s) : "memory")
+#define vst_w_l_xx_m(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vst.w.l.xx.m", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vst_w_s_xx_m(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vst.w.s.xx.m", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vst_w_p_xx_m(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vst.w.p.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_w_lp_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vst.w.lp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_w_sp_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vst.w.sp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vst_w_tp_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vst.w.tp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vdup_b_x(Vd, t)             __asm__ __volatile__(ARGS_F_A_A("vdup.b.x", Vd, %0) : : "r"(t))
+#define vdup_b_x_m(Vd, t)           __asm__ __volatile__(ARGS_F_A_A("vdup.b.x.m", Vd, %0) : : "r"(t))
+#define vdup_h_x(Vd, t)             __asm__ __volatile__(ARGS_F_A_A("vdup.h.x", Vd, %0) : : "r"(t))
+#define vdup_h_x_m(Vd, t)           __asm__ __volatile__(ARGS_F_A_A("vdup.h.x.m", Vd, %0) : : "r"(t))
+#define vdup_w_x(Vd, t)             __asm__ __volatile__(ARGS_F_A_A("vdup.w.x", Vd, %0) : : "r"(t))
+#define vdup_w_x_m(Vd, t)           __asm__ __volatile__(ARGS_F_A_A("vdup.w.x.m", Vd, %0) : : "r"(t))
+#define vdup_s_f(Vd, f)             __asm__ __volatile__(ARGS_F_A_A("vdup.s.f", Vd, %0) : : "f"(f))
+#define vdup_s_f_m(Vd, f)           __asm__ __volatile__(ARGS_F_A_A("vdup.s.f.m", Vd, %0) : : "f"(f))
+#define vcget(Vd)                   __asm__ __volatile__(ARGS_F_A("vcget", Vd) : : )
+#define vstq_b_s_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vstq.b.s.xx", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vstq_b_sp_xx(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vstq.b.sp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vstq_b_s_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vstq.b.s.xx.m", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vstq_b_sp_xx_m(Vd, s, t)    __asm__ __volatile_always__(ARGS_F_A_A_A("vstq.b.sp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vstq_h_s_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vstq.h.s.xx", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vstq_h_sp_xx(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vstq.h.sp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vstq_h_s_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vstq.h.s.xx.m", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vstq_h_sp_xx_m(Vd, s, t)    __asm__ __volatile_always__(ARGS_F_A_A_A("vstq.h.sp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vstq_w_s_xx(Vd, s, t)       __asm__ __volatile_always__(ARGS_F_A_A_A("vstq.w.s.xx", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vstq_w_sp_xx(Vd, s, t)      __asm__ __volatile_always__(ARGS_F_A_A_A("vstq.w.sp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+#define vstq_w_s_xx_m(Vd, s, t)     __asm__ __volatile_always__(ARGS_F_A_A_A("vstq.w.s.xx.m", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
+#define vstq_w_sp_xx_m(Vd, s, t)    __asm__ __volatile_always__(ARGS_F_A_A_A("vstq.w.sp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+// 000 Arithmetic
+#define vadd_b_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vadd.b.vv", Vd, Vs, Vt))
+#define vadd_b_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vadd.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vadd_b_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vadd.b.vv.m", Vd, Vs, Vt))
+#define vadd_b_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vadd.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vadd_h_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vadd.h.vv", Vd, Vs, Vt))
+#define vadd_h_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vadd.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vadd_h_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vadd.h.vv.m", Vd, Vs, Vt))
+#define vadd_h_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vadd.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vadd_w_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vadd.w.vv", Vd, Vs, Vt))
+#define vadd_w_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vadd.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vadd_w_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vadd.w.vv.m", Vd, Vs, Vt))
+#define vadd_w_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vadd.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsub_b_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vsub.b.vv", Vd, Vs, Vt))
+#define vsub_b_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vsub.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vsub_b_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsub.b.vv.m", Vd, Vs, Vt))
+#define vsub_b_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsub.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsub_h_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vsub.h.vv", Vd, Vs, Vt))
+#define vsub_h_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vsub.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vsub_h_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsub.h.vv.m", Vd, Vs, Vt))
+#define vsub_h_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsub.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsub_w_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vsub.w.vv", Vd, Vs, Vt))
+#define vsub_w_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vsub.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vsub_w_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsub.w.vv.m", Vd, Vs, Vt))
+#define vsub_w_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsub.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vrsub_b_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vrsub.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vrsub_b_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vrsub.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vrsub_h_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vrsub.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vrsub_h_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vrsub.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vrsub_w_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vrsub.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vrsub_w_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vrsub.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define veq_b_vv(Vd, Vs, Vt)        __asm__ __volatile__(ARGS_F_A_A_A("veq.b.vv", Vd, Vs, Vt))
+#define veq_b_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("veq.b.vx", Vd, Vs, %0) : : "r"(t))
+#define veq_b_vv_m(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("veq.b.vv.m", Vd, Vs, Vt))
+#define veq_b_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("veq.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define veq_h_vv(Vd, Vs, Vt)        __asm__ __volatile__(ARGS_F_A_A_A("veq.h.vv", Vd, Vs, Vt))
+#define veq_h_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("veq.h.vx", Vd, Vs, %0) : : "r"(t))
+#define veq_h_vv_m(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("veq.h.vv.m", Vd, Vs, Vt))
+#define veq_h_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("veq.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define veq_w_vv(Vd, Vs, Vt)        __asm__ __volatile__(ARGS_F_A_A_A("veq.w.vv", Vd, Vs, Vt))
+#define veq_w_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("veq.w.vx", Vd, Vs, %0) : : "r"(t))
+#define veq_w_vv_m(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("veq.w.vv.m", Vd, Vs, Vt))
+#define veq_w_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("veq.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vne_b_vv(Vd, Vs, Vt)        __asm__ __volatile__(ARGS_F_A_A_A("vne.b.vv", Vd, Vs, Vt))
+#define vne_b_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("vne.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vne_b_vv_m(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vne.b.vv.m", Vd, Vs, Vt))
+#define vne_b_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vne.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vne_h_vv(Vd, Vs, Vt)        __asm__ __volatile__(ARGS_F_A_A_A("vne.h.vv", Vd, Vs, Vt))
+#define vne_h_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("vne.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vne_h_vv_m(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vne.h.vv.m", Vd, Vs, Vt))
+#define vne_h_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vne.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vne_w_vv(Vd, Vs, Vt)        __asm__ __volatile__(ARGS_F_A_A_A("vne.w.vv", Vd, Vs, Vt))
+#define vne_w_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("vne.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vne_w_vv_m(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vne.w.vv.m", Vd, Vs, Vt))
+#define vne_w_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vne.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vlt_b_vv(Vd, Vs, Vt)        __asm__ __volatile__(ARGS_F_A_A_A("vlt.b.vv", Vd, Vs, Vt))
+#define vlt_b_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("vlt.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vlt_b_u_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vlt.b.u.vv", Vd, Vs, Vt))
+#define vlt_b_u_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vlt.b.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vlt_b_vv_m(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vlt.b.vv.m", Vd, Vs, Vt))
+#define vlt_b_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vlt.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vlt_b_u_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vlt.b.u.vv.m", Vd, Vs, Vt))
+#define vlt_b_u_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vlt.b.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vlt_h_vv(Vd, Vs, Vt)        __asm__ __volatile__(ARGS_F_A_A_A("vlt.h.vv", Vd, Vs, Vt))
+#define vlt_h_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("vlt.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vlt_h_u_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vlt.h.u.vv", Vd, Vs, Vt))
+#define vlt_h_u_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vlt.h.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vlt_h_vv_m(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vlt.h.vv.m", Vd, Vs, Vt))
+#define vlt_h_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vlt.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vlt_h_u_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vlt.h.u.vv.m", Vd, Vs, Vt))
+#define vlt_h_u_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vlt.h.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vlt_w_vv(Vd, Vs, Vt)        __asm__ __volatile__(ARGS_F_A_A_A("vlt.w.vv", Vd, Vs, Vt))
+#define vlt_w_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("vlt.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vlt_w_u_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vlt.w.u.vv", Vd, Vs, Vt))
+#define vlt_w_u_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vlt.w.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vlt_w_vv_m(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vlt.w.vv.m", Vd, Vs, Vt))
+#define vlt_w_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vlt.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vlt_w_u_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vlt.w.u.vv.m", Vd, Vs, Vt))
+#define vlt_w_u_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vlt.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vle_b_vv(Vd, Vs, Vt)        __asm__ __volatile__(ARGS_F_A_A_A("vle.b.vv", Vd, Vs, Vt))
+#define vle_b_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("vle.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vle_b_u_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vle.b.u.vv", Vd, Vs, Vt))
+#define vle_b_u_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vle.b.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vle_b_vv_m(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vle.b.vv.m", Vd, Vs, Vt))
+#define vle_b_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vle.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vle_b_u_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vle.b.u.vv.m", Vd, Vs, Vt))
+#define vle_b_u_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vle.b.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vle_h_vv(Vd, Vs, Vt)        __asm__ __volatile__(ARGS_F_A_A_A("vle.h.vv", Vd, Vs, Vt))
+#define vle_h_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("vle.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vle_h_u_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vle.h.u.vv", Vd, Vs, Vt))
+#define vle_h_u_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vle.h.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vle_h_vv_m(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vle.h.vv.m", Vd, Vs, Vt))
+#define vle_h_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vle.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vle_h_u_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vle.h.u.vv.m", Vd, Vs, Vt))
+#define vle_h_u_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vle.h.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vle_w_vv(Vd, Vs, Vt)        __asm__ __volatile__(ARGS_F_A_A_A("vle.w.vv", Vd, Vs, Vt))
+#define vle_w_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("vle.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vle_w_u_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vle.w.u.vv", Vd, Vs, Vt))
+#define vle_w_u_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vle.w.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vle_w_vv_m(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vle.w.vv.m", Vd, Vs, Vt))
+#define vle_w_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vle.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vle_w_u_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vle.w.u.vv.m", Vd, Vs, Vt))
+#define vle_w_u_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vle.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vgt_b_vv(Vd, Vs, Vt)        __asm__ __volatile__(ARGS_F_A_A_A("vgt.b.vv", Vd, Vs, Vt))
+#define vgt_b_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("vgt.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vgt_b_u_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vgt.b.u.vv", Vd, Vs, Vt))
+#define vgt_b_u_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vgt.b.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vgt_b_vv_m(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vgt.b.vv.m", Vd, Vs, Vt))
+#define vgt_b_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vgt.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vgt_b_u_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vgt.b.u.vv.m", Vd, Vs, Vt))
+#define vgt_b_u_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vgt.b.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vgt_h_vv(Vd, Vs, Vt)        __asm__ __volatile__(ARGS_F_A_A_A("vgt.h.vv", Vd, Vs, Vt))
+#define vgt_h_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("vgt.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vgt_h_u_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vgt.h.u.vv", Vd, Vs, Vt))
+#define vgt_h_u_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vgt.h.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vgt_h_vv_m(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vgt.h.vv.m", Vd, Vs, Vt))
+#define vgt_h_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vgt.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vgt_h_u_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vgt.h.u.vv.m", Vd, Vs, Vt))
+#define vgt_h_u_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vgt.h.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vgt_w_vv(Vd, Vs, Vt)        __asm__ __volatile__(ARGS_F_A_A_A("vgt.w.vv", Vd, Vs, Vt))
+#define vgt_w_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("vgt.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vgt_w_u_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vgt.w.u.vv", Vd, Vs, Vt))
+#define vgt_w_u_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vgt.w.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vgt_w_vv_m(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vgt.w.vv.m", Vd, Vs, Vt))
+#define vgt_w_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vgt.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vgt_w_u_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vgt.w.u.vv.m", Vd, Vs, Vt))
+#define vgt_w_u_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vgt.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vge_b_vv(Vd, Vs, Vt)        __asm__ __volatile__(ARGS_F_A_A_A("vge.b.vv", Vd, Vs, Vt))
+#define vge_b_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("vge.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vge_b_u_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vge.b.u.vv", Vd, Vs, Vt))
+#define vge_b_u_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vge.b.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vge_b_vv_m(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vge.b.vv.m", Vd, Vs, Vt))
+#define vge_b_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vge.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vge_b_u_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vge.b.u.vv.m", Vd, Vs, Vt))
+#define vge_b_u_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vge.b.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vge_h_vv(Vd, Vs, Vt)        __asm__ __volatile__(ARGS_F_A_A_A("vge.h.vv", Vd, Vs, Vt))
+#define vge_h_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("vge.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vge_h_u_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vge.h.u.vv", Vd, Vs, Vt))
+#define vge_h_u_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vge.h.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vge_h_vv_m(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vge.h.vv.m", Vd, Vs, Vt))
+#define vge_h_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vge.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vge_h_u_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vge.h.u.vv.m", Vd, Vs, Vt))
+#define vge_h_u_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vge.h.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vge_w_vv(Vd, Vs, Vt)        __asm__ __volatile__(ARGS_F_A_A_A("vge.w.vv", Vd, Vs, Vt))
+#define vge_w_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("vge.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vge_w_u_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vge.w.u.vv", Vd, Vs, Vt))
+#define vge_w_u_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vge.w.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vge_w_vv_m(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vge.w.vv.m", Vd, Vs, Vt))
+#define vge_w_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vge.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vge_w_u_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vge.w.u.vv.m", Vd, Vs, Vt))
+#define vge_w_u_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vge.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vabsd_b_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vabsd.b.vv", Vd, Vs, Vt))
+#define vabsd_b_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vabsd.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vabsd_b_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vabsd.b.u.vv", Vd, Vs, Vt))
+#define vabsd_b_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vabsd.b.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vabsd_b_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vabsd.b.vv.m", Vd, Vs, Vt))
+#define vabsd_b_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vabsd.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vabsd_b_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vabsd.b.u.vv.m", Vd, Vs, Vt))
+#define vabsd_b_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vabsd.b.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vabsd_h_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vabsd.h.vv", Vd, Vs, Vt))
+#define vabsd_h_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vabsd.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vabsd_h_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vabsd.h.u.vv", Vd, Vs, Vt))
+#define vabsd_h_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vabsd.h.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vabsd_h_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vabsd.h.vv.m", Vd, Vs, Vt))
+#define vabsd_h_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vabsd.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vabsd_h_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vabsd.h.u.vv.m", Vd, Vs, Vt))
+#define vabsd_h_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vabsd.h.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vabsd_w_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vabsd.w.vv", Vd, Vs, Vt))
+#define vabsd_w_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vabsd.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vabsd_w_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vabsd.w.u.vv", Vd, Vs, Vt))
+#define vabsd_w_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vabsd.w.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vabsd_w_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vabsd.w.vv.m", Vd, Vs, Vt))
+#define vabsd_w_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vabsd.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vabsd_w_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vabsd.w.u.vv.m", Vd, Vs, Vt))
+#define vabsd_w_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vabsd.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmax_b_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vmax.b.vv", Vd, Vs, Vt))
+#define vmax_b_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vmax.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vmax_b_u_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vmax.b.u.vv", Vd, Vs, Vt))
+#define vmax_b_u_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmax.b.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vmax_b_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vmax.b.vv.m", Vd, Vs, Vt))
+#define vmax_b_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmax.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmax_b_u_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vmax.b.u.vv.m", Vd, Vs, Vt))
+#define vmax_b_u_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vmax.b.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmax_h_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vmax.h.vv", Vd, Vs, Vt))
+#define vmax_h_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vmax.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vmax_h_u_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vmax.h.u.vv", Vd, Vs, Vt))
+#define vmax_h_u_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmax.h.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vmax_h_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vmax.h.vv.m", Vd, Vs, Vt))
+#define vmax_h_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmax.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmax_h_u_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vmax.h.u.vv.m", Vd, Vs, Vt))
+#define vmax_h_u_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vmax.h.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmax_w_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vmax.w.vv", Vd, Vs, Vt))
+#define vmax_w_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vmax.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vmax_w_u_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vmax.w.u.vv", Vd, Vs, Vt))
+#define vmax_w_u_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmax.w.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vmax_w_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vmax.w.vv.m", Vd, Vs, Vt))
+#define vmax_w_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmax.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmax_w_u_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vmax.w.u.vv.m", Vd, Vs, Vt))
+#define vmax_w_u_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vmax.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmin_b_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vmin.b.vv", Vd, Vs, Vt))
+#define vmin_b_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vmin.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vmin_b_u_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vmin.b.u.vv", Vd, Vs, Vt))
+#define vmin_b_u_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmin.b.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vmin_b_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vmin.b.vv.m", Vd, Vs, Vt))
+#define vmin_b_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmin.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmin_b_u_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vmin.b.u.vv.m", Vd, Vs, Vt))
+#define vmin_b_u_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vmin.b.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmin_h_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vmin.h.vv", Vd, Vs, Vt))
+#define vmin_h_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vmin.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vmin_h_u_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vmin.h.u.vv", Vd, Vs, Vt))
+#define vmin_h_u_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmin.h.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vmin_h_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vmin.h.vv.m", Vd, Vs, Vt))
+#define vmin_h_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmin.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmin_h_u_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vmin.h.u.vv.m", Vd, Vs, Vt))
+#define vmin_h_u_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vmin.h.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmin_w_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vmin.w.vv", Vd, Vs, Vt))
+#define vmin_w_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vmin.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vmin_w_u_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vmin.w.u.vv", Vd, Vs, Vt))
+#define vmin_w_u_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmin.w.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vmin_w_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vmin.w.vv.m", Vd, Vs, Vt))
+#define vmin_w_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmin.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmin_w_u_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vmin.w.u.vv.m", Vd, Vs, Vt))
+#define vmin_w_u_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vmin.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vadd3_b_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vadd3.b.vv", Vd, Vs, Vt))
+#define vadd3_b_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vadd3.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vadd3_b_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vadd3.b.vv.m", Vd, Vs, Vt))
+#define vadd3_b_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vadd3.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vadd3_h_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vadd3.h.vv", Vd, Vs, Vt))
+#define vadd3_h_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vadd3.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vadd3_h_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vadd3.h.vv.m", Vd, Vs, Vt))
+#define vadd3_h_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vadd3.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vadd3_w_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vadd3.w.vv", Vd, Vs, Vt))
+#define vadd3_w_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vadd3.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vadd3_w_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vadd3.w.vv.m", Vd, Vs, Vt))
+#define vadd3_w_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vadd3.w.vx.m", Vd, Vs, %0) : : "r"(t))
+// 100 Arithmetic2
+#define vadds_b_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vadds.b.vv", Vd, Vs, Vt))
+#define vadds_b_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vadds.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vadds_b_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vadds.b.u.vv", Vd, Vs, Vt))
+#define vadds_b_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vadds.b.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vadds_b_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vadds.b.vv.m", Vd, Vs, Vt))
+#define vadds_b_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vadds.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vadds_b_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vadds.b.u.vv.m", Vd, Vs, Vt))
+#define vadds_b_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vadds.b.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vadds_h_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vadds.h.vv", Vd, Vs, Vt))
+#define vadds_h_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vadds.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vadds_h_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vadds.h.u.vv", Vd, Vs, Vt))
+#define vadds_h_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vadds.h.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vadds_h_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vadds.h.vv.m", Vd, Vs, Vt))
+#define vadds_h_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vadds.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vadds_h_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vadds.h.u.vv.m", Vd, Vs, Vt))
+#define vadds_h_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vadds.h.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vadds_w_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vadds.w.vv", Vd, Vs, Vt))
+#define vadds_w_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vadds.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vadds_w_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vadds.w.u.vv", Vd, Vs, Vt))
+#define vadds_w_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vadds.w.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vadds_w_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vadds.w.vv.m", Vd, Vs, Vt))
+#define vadds_w_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vadds.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vadds_w_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vadds.w.u.vv.m", Vd, Vs, Vt))
+#define vadds_w_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vadds.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsubs_b_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vsubs.b.vv", Vd, Vs, Vt))
+#define vsubs_b_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vsubs.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vsubs_b_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vsubs.b.u.vv", Vd, Vs, Vt))
+#define vsubs_b_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vsubs.b.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vsubs_b_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vsubs.b.vv.m", Vd, Vs, Vt))
+#define vsubs_b_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vsubs.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsubs_b_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsubs.b.u.vv.m", Vd, Vs, Vt))
+#define vsubs_b_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsubs.b.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsubs_h_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vsubs.h.vv", Vd, Vs, Vt))
+#define vsubs_h_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vsubs.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vsubs_h_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vsubs.h.u.vv", Vd, Vs, Vt))
+#define vsubs_h_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vsubs.h.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vsubs_h_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vsubs.h.vv.m", Vd, Vs, Vt))
+#define vsubs_h_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vsubs.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsubs_h_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsubs.h.u.vv.m", Vd, Vs, Vt))
+#define vsubs_h_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsubs.h.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsubs_w_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vsubs.w.vv", Vd, Vs, Vt))
+#define vsubs_w_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vsubs.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vsubs_w_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vsubs.w.u.vv", Vd, Vs, Vt))
+#define vsubs_w_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vsubs.w.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vsubs_w_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vsubs.w.vv.m", Vd, Vs, Vt))
+#define vsubs_w_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vsubs.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsubs_w_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsubs.w.u.vv.m", Vd, Vs, Vt))
+#define vsubs_w_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsubs.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vaddw_h_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vaddw.h.vv", Vd, Vs, Vt))
+#define vaddw_h_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vaddw.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vaddw_h_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vaddw.h.u.vv", Vd, Vs, Vt))
+#define vaddw_h_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vaddw.h.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vaddw_h_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vaddw.h.vv.m", Vd, Vs, Vt))
+#define vaddw_h_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vaddw.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vaddw_h_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vaddw.h.u.vv.m", Vd, Vs, Vt))
+#define vaddw_h_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vaddw.h.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vaddw_w_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vaddw.w.vv", Vd, Vs, Vt))
+#define vaddw_w_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vaddw.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vaddw_w_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vaddw.w.u.vv", Vd, Vs, Vt))
+#define vaddw_w_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vaddw.w.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vaddw_w_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vaddw.w.vv.m", Vd, Vs, Vt))
+#define vaddw_w_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vaddw.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vaddw_w_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vaddw.w.u.vv.m", Vd, Vs, Vt))
+#define vaddw_w_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vaddw.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsubw_h_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vsubw.h.vv", Vd, Vs, Vt))
+#define vsubw_h_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vsubw.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vsubw_h_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vsubw.h.u.vv", Vd, Vs, Vt))
+#define vsubw_h_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vsubw.h.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vsubw_h_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vsubw.h.vv.m", Vd, Vs, Vt))
+#define vsubw_h_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vsubw.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsubw_h_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsubw.h.u.vv.m", Vd, Vs, Vt))
+#define vsubw_h_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsubw.h.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsubw_w_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vsubw.w.vv", Vd, Vs, Vt))
+#define vsubw_w_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vsubw.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vsubw_w_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vsubw.w.u.vv", Vd, Vs, Vt))
+#define vsubw_w_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vsubw.w.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vsubw_w_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vsubw.w.vv.m", Vd, Vs, Vt))
+#define vsubw_w_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vsubw.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsubw_w_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsubw.w.u.vv.m", Vd, Vs, Vt))
+#define vsubw_w_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsubw.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vacc_h_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vacc.h.vv", Vd, Vs, Vt))
+#define vacc_h_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vacc.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vacc_h_u_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vacc.h.u.vv", Vd, Vs, Vt))
+#define vacc_h_u_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vacc.h.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vacc_h_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vacc.h.vv.m", Vd, Vs, Vt))
+#define vacc_h_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vacc.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vacc_h_u_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vacc.h.u.vv.m", Vd, Vs, Vt))
+#define vacc_h_u_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vacc.h.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vacc_w_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vacc.w.vv", Vd, Vs, Vt))
+#define vacc_w_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vacc.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vacc_w_u_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vacc.w.u.vv", Vd, Vs, Vt))
+#define vacc_w_u_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vacc.w.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vacc_w_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vacc.w.vv.m", Vd, Vs, Vt))
+#define vacc_w_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vacc.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vacc_w_u_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vacc.w.u.vv.m", Vd, Vs, Vt))
+#define vacc_w_u_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vacc.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vpadd_h_v(Vd, Vs)           __asm__ __volatile__(ARGS_F_A_A("vpadd.h.v", Vd, Vs))
+#define vpadd_h_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vpadd.h.vv", Vd, Vs, Vt))
+#define vpadd_h_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vpadd.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vpadd_h_u_v(Vd, Vs)         __asm__ __volatile__(ARGS_F_A_A("vpadd.h.u.v", Vd, Vs))
+#define vpadd_h_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vpadd.h.u.vv", Vd, Vs, Vt))
+#define vpadd_h_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vpadd.h.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vpadd_h_v_m(Vd, Vs)         __asm__ __volatile__(ARGS_F_A_A("vpadd.h.v.m", Vd, Vs))
+#define vpadd_h_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vpadd.h.vv.m", Vd, Vs, Vt))
+#define vpadd_h_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vpadd.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vpadd_h_u_v_m(Vd, Vs)       __asm__ __volatile__(ARGS_F_A_A("vpadd.h.u.v.m", Vd, Vs))
+#define vpadd_h_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vpadd.h.u.vv.m", Vd, Vs, Vt))
+#define vpadd_h_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vpadd.h.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vpadd_w_v(Vd, Vs)           __asm__ __volatile__(ARGS_F_A_A("vpadd.w.v", Vd, Vs))
+#define vpadd_w_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vpadd.w.vv", Vd, Vs, Vt))
+#define vpadd_w_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vpadd.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vpadd_w_u_v(Vd, Vs)         __asm__ __volatile__(ARGS_F_A_A("vpadd.w.u.v", Vd, Vs))
+#define vpadd_w_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vpadd.w.u.vv", Vd, Vs, Vt))
+#define vpadd_w_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vpadd.w.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vpadd_w_v_m(Vd, Vs)         __asm__ __volatile__(ARGS_F_A_A("vpadd.w.v.m", Vd, Vs))
+#define vpadd_w_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vpadd.w.vv.m", Vd, Vs, Vt))
+#define vpadd_w_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vpadd.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vpadd_w_u_v_m(Vd, Vs)       __asm__ __volatile__(ARGS_F_A_A("vpadd.w.u.v.m", Vd, Vs))
+#define vpadd_w_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vpadd.w.u.vv.m", Vd, Vs, Vt))
+#define vpadd_w_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vpadd.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vpsub_h_v(Vd, Vs)           __asm__ __volatile__(ARGS_F_A_A("vpsub.h.v", Vd, Vs))
+#define vpsub_h_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vpsub.h.vv", Vd, Vs, Vt))
+#define vpsub_h_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vpsub.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vpsub_h_u_v(Vd, Vs)         __asm__ __volatile__(ARGS_F_A_A("vpsub.h.u.v", Vd, Vs))
+#define vpsub_h_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vpsub.h.u.vv", Vd, Vs, Vt))
+#define vpsub_h_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vpsub.h.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vpsub_h_v_m(Vd, Vs)         __asm__ __volatile__(ARGS_F_A_A("vpsub.h.v.m", Vd, Vs))
+#define vpsub_h_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vpsub.h.vv.m", Vd, Vs, Vt))
+#define vpsub_h_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vpsub.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vpsub_h_u_v_m(Vd, Vs)       __asm__ __volatile__(ARGS_F_A_A("vpsub.h.u.v.m", Vd, Vs))
+#define vpsub_h_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vpsub.h.u.vv.m", Vd, Vs, Vt))
+#define vpsub_h_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vpsub.h.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vpsub_w_v(Vd, Vs)           __asm__ __volatile__(ARGS_F_A_A("vpsub.w.v", Vd, Vs))
+#define vpsub_w_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vpsub.w.vv", Vd, Vs, Vt))
+#define vpsub_w_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vpsub.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vpsub_w_u_v(Vd, Vs)         __asm__ __volatile__(ARGS_F_A_A("vpsub.w.u.v", Vd, Vs))
+#define vpsub_w_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vpsub.w.u.vv", Vd, Vs, Vt))
+#define vpsub_w_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vpsub.w.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vpsub_w_v_m(Vd, Vs)         __asm__ __volatile__(ARGS_F_A_A("vpsub.w.v.m", Vd, Vs))
+#define vpsub_w_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vpsub.w.vv.m", Vd, Vs, Vt))
+#define vpsub_w_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vpsub.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vpsub_w_u_v_m(Vd, Vs)       __asm__ __volatile__(ARGS_F_A_A("vpsub.w.u.v.m", Vd, Vs))
+#define vpsub_w_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vpsub.w.u.vv.m", Vd, Vs, Vt))
+#define vpsub_w_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vpsub.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhadd_b_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.vv", Vd, Vs, Vt))
+#define vhadd_b_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vhadd_b_1_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.1.vv", Vd, Vs, Vt))
+#define vhadd_b_1_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhadd_b_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.u.vv", Vd, Vs, Vt))
+#define vhadd_b_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vhadd_b_u_1_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.u.1.vv", Vd, Vs, Vt))
+#define vhadd_b_u_1_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.u.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhadd_b_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.vv.m", Vd, Vs, Vt))
+#define vhadd_b_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhadd_b_1_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.1.vv.m", Vd, Vs, Vt))
+#define vhadd_b_1_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhadd_b_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.u.vv.m", Vd, Vs, Vt))
+#define vhadd_b_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhadd_b_u_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.u.1.vv.m", Vd, Vs, Vt))
+#define vhadd_b_u_1_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.u.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhadd_h_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.vv", Vd, Vs, Vt))
+#define vhadd_h_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vhadd_h_1_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.1.vv", Vd, Vs, Vt))
+#define vhadd_h_1_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhadd_h_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.u.vv", Vd, Vs, Vt))
+#define vhadd_h_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vhadd_h_u_1_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.u.1.vv", Vd, Vs, Vt))
+#define vhadd_h_u_1_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.u.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhadd_h_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.vv.m", Vd, Vs, Vt))
+#define vhadd_h_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhadd_h_1_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.1.vv.m", Vd, Vs, Vt))
+#define vhadd_h_1_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhadd_h_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.u.vv.m", Vd, Vs, Vt))
+#define vhadd_h_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhadd_h_u_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.u.1.vv.m", Vd, Vs, Vt))
+#define vhadd_h_u_1_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.u.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhadd_w_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.vv", Vd, Vs, Vt))
+#define vhadd_w_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vhadd_w_1_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.1.vv", Vd, Vs, Vt))
+#define vhadd_w_1_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhadd_w_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.u.vv", Vd, Vs, Vt))
+#define vhadd_w_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vhadd_w_u_1_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.u.1.vv", Vd, Vs, Vt))
+#define vhadd_w_u_1_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.u.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhadd_w_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.vv.m", Vd, Vs, Vt))
+#define vhadd_w_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhadd_w_1_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.1.vv.m", Vd, Vs, Vt))
+#define vhadd_w_1_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhadd_w_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.u.vv.m", Vd, Vs, Vt))
+#define vhadd_w_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhadd_w_u_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.u.1.vv.m", Vd, Vs, Vt))
+#define vhadd_w_u_1_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.u.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhsub_b_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.vv", Vd, Vs, Vt))
+#define vhsub_b_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vhsub_b_1_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.1.vv", Vd, Vs, Vt))
+#define vhsub_b_1_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhsub_b_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.u.vv", Vd, Vs, Vt))
+#define vhsub_b_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vhsub_b_u_1_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.u.1.vv", Vd, Vs, Vt))
+#define vhsub_b_u_1_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.u.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhsub_b_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.vv.m", Vd, Vs, Vt))
+#define vhsub_b_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhsub_b_1_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.1.vv.m", Vd, Vs, Vt))
+#define vhsub_b_1_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhsub_b_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.u.vv.m", Vd, Vs, Vt))
+#define vhsub_b_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhsub_b_u_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.u.1.vv.m", Vd, Vs, Vt))
+#define vhsub_b_u_1_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.u.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhsub_h_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.vv", Vd, Vs, Vt))
+#define vhsub_h_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vhsub_h_1_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.1.vv", Vd, Vs, Vt))
+#define vhsub_h_1_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhsub_h_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.u.vv", Vd, Vs, Vt))
+#define vhsub_h_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vhsub_h_u_1_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.u.1.vv", Vd, Vs, Vt))
+#define vhsub_h_u_1_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.u.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhsub_h_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.vv.m", Vd, Vs, Vt))
+#define vhsub_h_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhsub_h_1_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.1.vv.m", Vd, Vs, Vt))
+#define vhsub_h_1_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhsub_h_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.u.vv.m", Vd, Vs, Vt))
+#define vhsub_h_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhsub_h_u_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.u.1.vv.m", Vd, Vs, Vt))
+#define vhsub_h_u_1_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.u.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhsub_w_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.vv", Vd, Vs, Vt))
+#define vhsub_w_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vhsub_w_1_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.1.vv", Vd, Vs, Vt))
+#define vhsub_w_1_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhsub_w_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.u.vv", Vd, Vs, Vt))
+#define vhsub_w_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vhsub_w_u_1_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.u.1.vv", Vd, Vs, Vt))
+#define vhsub_w_u_1_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.u.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhsub_w_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.vv.m", Vd, Vs, Vt))
+#define vhsub_w_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhsub_w_1_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.1.vv.m", Vd, Vs, Vt))
+#define vhsub_w_1_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhsub_w_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.u.vv.m", Vd, Vs, Vt))
+#define vhsub_w_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhsub_w_u_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.u.1.vv.m", Vd, Vs, Vt))
+#define vhsub_w_u_1_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.u.1.vx.m", Vd, Vs, %0) : : "r"(t))
+// 001 Logical
+#define vand_vv(Vd, Vs, Vt)         __asm__ __volatile__(ARGS_F_A_A_A("vand.vv", Vd, Vs, Vt))
+#define vand_b_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vand.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vand_vv_m(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vand.vv.m", Vd, Vs, Vt))
+#define vand_b_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vand.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vand_h_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vand.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vand_h_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vand.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vand_w_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vand.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vand_w_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vand.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vor_vv(Vd, Vs, Vt)          __asm__ __volatile__(ARGS_F_A_A_A("vor.vv", Vd, Vs, Vt))
+#define vor_b_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("vor.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vor_vv_m(Vd, Vs, Vt)        __asm__ __volatile__(ARGS_F_A_A_A("vor.vv.m", Vd, Vs, Vt))
+#define vor_b_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vor.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vor_h_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("vor.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vor_h_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vor.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vor_w_vx(Vd, Vs, t)         __asm__ __volatile__(ARGS_F_A_A_A("vor.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vor_w_vx_m(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vor.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vxor_vv(Vd, Vs, Vt)         __asm__ __volatile__(ARGS_F_A_A_A("vxor.vv", Vd, Vs, Vt))
+#define vxor_b_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vxor.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vxor_vv_m(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vxor.vv.m", Vd, Vs, Vt))
+#define vxor_b_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vxor.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vxor_h_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vxor.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vxor_h_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vxor.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vxor_w_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vxor.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vxor_w_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vxor.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vnot_v(Vd, Vs)              __asm__ __volatile__(ARGS_F_A_A("vnot.v", Vd, Vs))
+#define vnot_v_m(Vd, Vs)            __asm__ __volatile__(ARGS_F_A_A("vnot.v.m", Vd, Vs))
+#define vrev_vv(Vd, Vs, Vt)         __asm__ __volatile__(ARGS_F_A_A_A("vrev.vv", Vd, Vs, Vt))
+#define vrev_vx(Vd, Vs, t)          __asm__ __volatile__(ARGS_F_A_A_A("vrev.vx", Vd, Vs, %0) : : "r"(t))
+#define vrev_vv_m(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vrev.vv.m", Vd, Vs, Vt))
+#define vrev_vx_m(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vrev.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vror_vv(Vd, Vs, Vt)         __asm__ __volatile__(ARGS_F_A_A_A("vror.vv", Vd, Vs, Vt))
+#define vror_vx(Vd, Vs, t)          __asm__ __volatile__(ARGS_F_A_A_A("vror.vx", Vd, Vs, %0) : : "r"(t))
+#define vror_vv_m(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vror.vv.m", Vd, Vs, Vt))
+#define vror_vx_m(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vror.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vclb_b_v(Vd, Vs)            __asm__ __volatile__(ARGS_F_A_A("vclb.b.v", Vd, Vs))
+#define vclb_b_v_m(Vd, Vs)          __asm__ __volatile__(ARGS_F_A_A("vclb.b.v.m", Vd, Vs))
+#define vclb_h_v(Vd, Vs)            __asm__ __volatile__(ARGS_F_A_A("vclb.h.v", Vd, Vs))
+#define vclb_h_v_m(Vd, Vs)          __asm__ __volatile__(ARGS_F_A_A("vclb.h.v.m", Vd, Vs))
+#define vclb_w_v(Vd, Vs)            __asm__ __volatile__(ARGS_F_A_A("vclb.w.v", Vd, Vs))
+#define vclb_w_v_m(Vd, Vs)          __asm__ __volatile__(ARGS_F_A_A("vclb.w.v.m", Vd, Vs))
+#define vclz_b_v(Vd, Vs)            __asm__ __volatile__(ARGS_F_A_A("vclz.b.v", Vd, Vs))
+#define vclz_b_v_m(Vd, Vs)          __asm__ __volatile__(ARGS_F_A_A("vclz.b.v.m", Vd, Vs))
+#define vclz_h_v(Vd, Vs)            __asm__ __volatile__(ARGS_F_A_A("vclz.h.v", Vd, Vs))
+#define vclz_h_v_m(Vd, Vs)          __asm__ __volatile__(ARGS_F_A_A("vclz.h.v.m", Vd, Vs))
+#define vclz_w_v(Vd, Vs)            __asm__ __volatile__(ARGS_F_A_A("vclz.w.v", Vd, Vs))
+#define vclz_w_v_m(Vd, Vs)          __asm__ __volatile__(ARGS_F_A_A("vclz.w.v.m", Vd, Vs))
+#define vcpop_b_v(Vd, Vs)           __asm__ __volatile__(ARGS_F_A_A("vcpop.b.v", Vd, Vs))
+#define vcpop_b_v_m(Vd, Vs)         __asm__ __volatile__(ARGS_F_A_A("vcpop.b.v.m", Vd, Vs))
+#define vcpop_h_v(Vd, Vs)           __asm__ __volatile__(ARGS_F_A_A("vcpop.h.v", Vd, Vs))
+#define vcpop_h_v_m(Vd, Vs)         __asm__ __volatile__(ARGS_F_A_A("vcpop.h.v.m", Vd, Vs))
+#define vcpop_w_v(Vd, Vs)           __asm__ __volatile__(ARGS_F_A_A("vcpop.w.v", Vd, Vs))
+#define vcpop_w_v_m(Vd, Vs)         __asm__ __volatile__(ARGS_F_A_A("vcpop.w.v.m", Vd, Vs))
+#define vmv_v(Vd, Vs)               __asm__ __volatile__(ARGS_F_A_A("vmv.v", Vd, Vs))
+#define vmv_v_m(Vd, Vs)             __asm__ __volatile__(ARGS_F_A_A("vmv.v.m", Vd, Vs))
+#define vmvp_vv(Vd, Vs, Vt)         __asm__ __volatile__(ARGS_F_A_A_A("vmvp.vv", Vd, Vs, Vt))
+#define vmvp_b_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vmvp.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vmvp_vv_m(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vmvp.vv.m", Vd, Vs, Vt))
+#define vmvp_b_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmvp.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmvp_h_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vmvp.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vmvp_h_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmvp.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmvp_w_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vmvp.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vmvp_w_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmvp.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define acset_v(Vd, Vs)             __asm__ __volatile__(ARGS_F_A_A("acset.v", Vd, Vs))
+#define actr_v(Vd, Vs)              __asm__ __volatile__(ARGS_F_A_A("actr.v", Vd, Vs))
+#define adwinit_v(Vd, Vs)           __asm__ __volatile__(ARGS_F_A_A("adwinit.v", Vd, Vs))
+// 010 Shift
+#define vsll_b_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vsll.b.vv", Vd, Vs, Vt))
+#define vsll_b_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vsll.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vsll_b_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsll.b.vv.m", Vd, Vs, Vt))
+#define vsll_b_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsll.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsll_h_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vsll.h.vv", Vd, Vs, Vt))
+#define vsll_h_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vsll.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vsll_h_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsll.h.vv.m", Vd, Vs, Vt))
+#define vsll_h_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsll.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsll_w_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vsll.w.vv", Vd, Vs, Vt))
+#define vsll_w_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vsll.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vsll_w_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsll.w.vv.m", Vd, Vs, Vt))
+#define vsll_w_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsll.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsra_b_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vsra.b.vv", Vd, Vs, Vt))
+#define vsra_b_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vsra.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vsra_b_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsra.b.vv.m", Vd, Vs, Vt))
+#define vsra_b_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsra.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsra_h_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vsra.h.vv", Vd, Vs, Vt))
+#define vsra_h_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vsra.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vsra_h_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsra.h.vv.m", Vd, Vs, Vt))
+#define vsra_h_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsra.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsra_w_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vsra.w.vv", Vd, Vs, Vt))
+#define vsra_w_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vsra.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vsra_w_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsra.w.vv.m", Vd, Vs, Vt))
+#define vsra_w_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsra.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsrl_b_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vsrl.b.vv", Vd, Vs, Vt))
+#define vsrl_b_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vsrl.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vsrl_b_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsrl.b.vv.m", Vd, Vs, Vt))
+#define vsrl_b_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsrl.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsrl_h_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vsrl.h.vv", Vd, Vs, Vt))
+#define vsrl_h_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vsrl.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vsrl_h_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsrl.h.vv.m", Vd, Vs, Vt))
+#define vsrl_h_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsrl.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsrl_w_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vsrl.w.vv", Vd, Vs, Vt))
+#define vsrl_w_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vsrl.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vsrl_w_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsrl.w.vv.m", Vd, Vs, Vt))
+#define vsrl_w_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsrl.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsha_b_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vsha.b.vv", Vd, Vs, Vt))
+#define vsha_b_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vsha.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vsha_b_r_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsha.b.r.vv", Vd, Vs, Vt))
+#define vsha_b_r_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsha.b.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vsha_b_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsha.b.vv.m", Vd, Vs, Vt))
+#define vsha_b_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsha.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsha_b_r_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vsha.b.r.vv.m", Vd, Vs, Vt))
+#define vsha_b_r_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vsha.b.r.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsha_h_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vsha.h.vv", Vd, Vs, Vt))
+#define vsha_h_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vsha.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vsha_h_r_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsha.h.r.vv", Vd, Vs, Vt))
+#define vsha_h_r_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsha.h.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vsha_h_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsha.h.vv.m", Vd, Vs, Vt))
+#define vsha_h_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsha.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsha_h_r_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vsha.h.r.vv.m", Vd, Vs, Vt))
+#define vsha_h_r_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vsha.h.r.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsha_w_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vsha.w.vv", Vd, Vs, Vt))
+#define vsha_w_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vsha.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vsha_w_r_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsha.w.r.vv", Vd, Vs, Vt))
+#define vsha_w_r_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsha.w.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vsha_w_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsha.w.vv.m", Vd, Vs, Vt))
+#define vsha_w_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsha.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsha_w_r_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vsha.w.r.vv.m", Vd, Vs, Vt))
+#define vsha_w_r_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vsha.w.r.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vshl_b_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vshl.b.vv", Vd, Vs, Vt))
+#define vshl_b_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vshl.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vshl_b_r_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vshl.b.r.vv", Vd, Vs, Vt))
+#define vshl_b_r_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vshl.b.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vshl_b_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vshl.b.vv.m", Vd, Vs, Vt))
+#define vshl_b_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vshl.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vshl_b_r_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vshl.b.r.vv.m", Vd, Vs, Vt))
+#define vshl_b_r_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vshl.b.r.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vshl_h_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vshl.h.vv", Vd, Vs, Vt))
+#define vshl_h_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vshl.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vshl_h_r_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vshl.h.r.vv", Vd, Vs, Vt))
+#define vshl_h_r_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vshl.h.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vshl_h_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vshl.h.vv.m", Vd, Vs, Vt))
+#define vshl_h_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vshl.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vshl_h_r_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vshl.h.r.vv.m", Vd, Vs, Vt))
+#define vshl_h_r_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vshl.h.r.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vshl_w_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vshl.w.vv", Vd, Vs, Vt))
+#define vshl_w_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vshl.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vshl_w_r_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vshl.w.r.vv", Vd, Vs, Vt))
+#define vshl_w_r_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vshl.w.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vshl_w_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vshl.w.vv.m", Vd, Vs, Vt))
+#define vshl_w_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vshl.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vshl_w_r_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vshl.w.r.vv.m", Vd, Vs, Vt))
+#define vshl_w_r_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vshl.w.r.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsrans_b_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsrans.b.vv", Vd, Vs, Vt))
+#define vsrans_b_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsrans.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vsrans_b_r_vv(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vsrans.b.r.vv", Vd, Vs, Vt))
+#define vsrans_b_r_vx(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vsrans.b.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vsrans_b_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vsrans.b.vv.m", Vd, Vs, Vt))
+#define vsrans_b_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vsrans.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsrans_b_r_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vsrans.b.r.vv.m", Vd, Vs, Vt))
+#define vsrans_b_r_vx_m(Vd, Vs, t)  __asm__ __volatile__(ARGS_F_A_A_A("vsrans.b.r.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsrans_h_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsrans.h.vv", Vd, Vs, Vt))
+#define vsrans_h_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsrans.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vsrans_h_r_vv(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vsrans.h.r.vv", Vd, Vs, Vt))
+#define vsrans_h_r_vx(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vsrans.h.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vsrans_h_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vsrans.h.vv.m", Vd, Vs, Vt))
+#define vsrans_h_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vsrans.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsrans_h_r_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vsrans.h.r.vv.m", Vd, Vs, Vt))
+#define vsrans_h_r_vx_m(Vd, Vs, t)  __asm__ __volatile__(ARGS_F_A_A_A("vsrans.h.r.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsransu_b_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vsransu.b.vv", Vd, Vs, Vt))
+#define vsransu_b_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vsransu.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vsransu_b_r_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsransu.b.r.vv", Vd, Vs, Vt))
+#define vsransu_b_r_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsransu.b.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vsransu_b_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsransu.b.vv.m", Vd, Vs, Vt))
+#define vsransu_b_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsransu.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsransu_b_r_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vsransu.b.r.vv.m", Vd, Vs, Vt))
+#define vsransu_b_r_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vsransu.b.r.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsransu_h_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vsransu.h.vv", Vd, Vs, Vt))
+#define vsransu_h_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vsransu.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vsransu_h_r_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsransu.h.r.vv", Vd, Vs, Vt))
+#define vsransu_h_r_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsransu.h.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vsransu_h_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsransu.h.vv.m", Vd, Vs, Vt))
+#define vsransu_h_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsransu.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsransu_h_r_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vsransu.h.r.vv.m", Vd, Vs, Vt))
+#define vsransu_h_r_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vsransu.h.r.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsraqs_b_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsraqs.b.vv", Vd, Vs, Vt))
+#define vsraqs_b_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsraqs.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vsraqs_b_r_vv(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vsraqs.b.r.vv", Vd, Vs, Vt))
+#define vsraqs_b_r_vx(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vsraqs.b.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vsraqs_b_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vsraqs.b.vv.m", Vd, Vs, Vt))
+#define vsraqs_b_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vsraqs.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsraqs_b_r_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vsraqs.b.r.vv.m", Vd, Vs, Vt))
+#define vsraqs_b_r_vx_m(Vd, Vs, t)  __asm__ __volatile__(ARGS_F_A_A_A("vsraqs.b.r.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsraqsu_b_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vsraqsu.b.vv", Vd, Vs, Vt))
+#define vsraqsu_b_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vsraqsu.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vsraqsu_b_r_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsraqsu.b.r.vv", Vd, Vs, Vt))
+#define vsraqsu_b_r_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsraqsu.b.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vsraqsu_b_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsraqsu.b.vv.m", Vd, Vs, Vt))
+#define vsraqsu_b_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsraqsu.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsraqsu_b_r_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vsraqsu.b.r.vv.m", Vd, Vs, Vt))
+#define vsraqsu_b_r_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vsraqsu.b.r.vx.m", Vd, Vs, %0) : : "r"(t))
+// 011 Mul/Div
+#define vmul_b_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vmul.b.vv", Vd, Vs, Vt))
+#define vmul_b_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vmul.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vmul_b_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vmul.b.vv.m", Vd, Vs, Vt))
+#define vmul_b_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmul.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmul_h_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vmul.h.vv", Vd, Vs, Vt))
+#define vmul_h_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vmul.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vmul_h_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vmul.h.vv.m", Vd, Vs, Vt))
+#define vmul_h_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmul.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmul_w_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vmul.w.vv", Vd, Vs, Vt))
+#define vmul_w_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vmul.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vmul_w_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vmul.w.vv.m", Vd, Vs, Vt))
+#define vmul_w_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmul.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmuls_b_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vmuls.b.vv", Vd, Vs, Vt))
+#define vmuls_b_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vmuls.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vmuls_b_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmuls.b.u.vv", Vd, Vs, Vt))
+#define vmuls_b_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmuls.b.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vmuls_b_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmuls.b.vv.m", Vd, Vs, Vt))
+#define vmuls_b_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmuls.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmuls_b_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vmuls.b.u.vv.m", Vd, Vs, Vt))
+#define vmuls_b_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vmuls.b.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmuls_h_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vmuls.h.vv", Vd, Vs, Vt))
+#define vmuls_h_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vmuls.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vmuls_h_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmuls.h.u.vv", Vd, Vs, Vt))
+#define vmuls_h_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmuls.h.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vmuls_h_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmuls.h.vv.m", Vd, Vs, Vt))
+#define vmuls_h_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmuls.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmuls_h_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vmuls.h.u.vv.m", Vd, Vs, Vt))
+#define vmuls_h_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vmuls.h.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmuls_w_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vmuls.w.vv", Vd, Vs, Vt))
+#define vmuls_w_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vmuls.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vmuls_w_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmuls.w.u.vv", Vd, Vs, Vt))
+#define vmuls_w_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmuls.w.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vmuls_w_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmuls.w.vv.m", Vd, Vs, Vt))
+#define vmuls_w_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmuls.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmuls_w_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vmuls.w.u.vv.m", Vd, Vs, Vt))
+#define vmuls_w_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vmuls.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmulw_b_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vmulw.b.vv", Vd, Vs, Vt))
+#define vmulw_b_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vmulw.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vmulw_b_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmulw.b.u.vv", Vd, Vs, Vt))
+#define vmulw_b_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmulw.b.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vmulw_b_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmulw.b.vv.m", Vd, Vs, Vt))
+#define vmulw_b_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmulw.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmulw_b_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vmulw.b.u.vv.m", Vd, Vs, Vt))
+#define vmulw_b_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vmulw.b.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmulw_h_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vmulw.h.vv", Vd, Vs, Vt))
+#define vmulw_h_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vmulw.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vmulw_h_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmulw.h.u.vv", Vd, Vs, Vt))
+#define vmulw_h_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmulw.h.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vmulw_h_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmulw.h.vv.m", Vd, Vs, Vt))
+#define vmulw_h_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmulw.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmulw_h_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vmulw.h.u.vv.m", Vd, Vs, Vt))
+#define vmulw_h_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vmulw.h.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmulw_w_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vmulw.w.vv", Vd, Vs, Vt))
+#define vmulw_w_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vmulw.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vmulw_w_u_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmulw.w.u.vv", Vd, Vs, Vt))
+#define vmulw_w_u_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmulw.w.u.vx", Vd, Vs, %0) : : "r"(t))
+#define vmulw_w_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmulw.w.vv.m", Vd, Vs, Vt))
+#define vmulw_w_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmulw.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmulw_w_u_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vmulw.w.u.vv.m", Vd, Vs, Vt))
+#define vmulw_w_u_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vmulw.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmulh_b_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vmulh.b.vv", Vd, Vs, Vt))
+#define vmulh_b_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vmulh.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vmulh_b_r_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmulh.b.r.vv", Vd, Vs, Vt))
+#define vmulh_b_r_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmulh.b.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vmulh_b_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmulh.b.vv.m", Vd, Vs, Vt))
+#define vmulh_b_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmulh.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmulh_b_r_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vmulh.b.r.vv.m", Vd, Vs, Vt))
+#define vmulh_b_r_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vmulh.b.r.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmulh_h_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vmulh.h.vv", Vd, Vs, Vt))
+#define vmulh_h_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vmulh.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vmulh_h_r_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmulh.h.r.vv", Vd, Vs, Vt))
+#define vmulh_h_r_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmulh.h.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vmulh_h_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmulh.h.vv.m", Vd, Vs, Vt))
+#define vmulh_h_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmulh.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmulh_h_r_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vmulh.h.r.vv.m", Vd, Vs, Vt))
+#define vmulh_h_r_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vmulh.h.r.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmulh_w_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vmulh.w.vv", Vd, Vs, Vt))
+#define vmulh_w_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vmulh.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vmulh_w_r_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmulh.w.r.vv", Vd, Vs, Vt))
+#define vmulh_w_r_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmulh.w.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vmulh_w_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmulh.w.vv.m", Vd, Vs, Vt))
+#define vmulh_w_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmulh.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmulh_w_r_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vmulh.w.r.vv.m", Vd, Vs, Vt))
+#define vmulh_w_r_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vmulh.w.r.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmulhu_b_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.b.vv", Vd, Vs, Vt))
+#define vmulhu_b_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vmulhu_b_r_vv(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.b.r.vv", Vd, Vs, Vt))
+#define vmulhu_b_r_vx(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.b.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vmulhu_b_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.b.vv.m", Vd, Vs, Vt))
+#define vmulhu_b_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmulhu_b_r_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.b.r.vv.m", Vd, Vs, Vt))
+#define vmulhu_b_r_vx_m(Vd, Vs, t)  __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.b.r.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmulhu_h_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.h.vv", Vd, Vs, Vt))
+#define vmulhu_h_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vmulhu_h_r_vv(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.h.r.vv", Vd, Vs, Vt))
+#define vmulhu_h_r_vx(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.h.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vmulhu_h_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.h.vv.m", Vd, Vs, Vt))
+#define vmulhu_h_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmulhu_h_r_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.h.r.vv.m", Vd, Vs, Vt))
+#define vmulhu_h_r_vx_m(Vd, Vs, t)  __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.h.r.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmulhu_w_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.w.vv", Vd, Vs, Vt))
+#define vmulhu_w_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vmulhu_w_r_vv(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.w.r.vv", Vd, Vs, Vt))
+#define vmulhu_w_r_vx(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.w.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vmulhu_w_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.w.vv.m", Vd, Vs, Vt))
+#define vmulhu_w_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmulhu_w_r_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.w.r.vv.m", Vd, Vs, Vt))
+#define vmulhu_w_r_vx_m(Vd, Vs, t)  __asm__ __volatile__(ARGS_F_A_A_A("vmulhu.w.r.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vdmulh_b_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.b.vv", Vd, Vs, Vt))
+#define vdmulh_b_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vdmulh_b_r_vv(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.b.r.vv", Vd, Vs, Vt))
+#define vdmulh_b_r_vx(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.b.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vdmulh_b_rn_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.b.rn.vv", Vd, Vs, Vt))
+#define vdmulh_b_rn_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.b.rn.vx", Vd, Vs, %0) : : "r"(t))
+#define vdmulh_b_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.b.vv.m", Vd, Vs, Vt))
+#define vdmulh_b_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vdmulh_b_r_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.b.r.vv.m", Vd, Vs, Vt))
+#define vdmulh_b_r_vx_m(Vd, Vs, t)  __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.b.r.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vdmulh_b_rn_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vdmulh.b.rn.vv.m", Vd, Vs, Vt))
+#define vdmulh_b_rn_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.b.rn.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vdmulh_h_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.h.vv", Vd, Vs, Vt))
+#define vdmulh_h_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vdmulh_h_r_vv(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.h.r.vv", Vd, Vs, Vt))
+#define vdmulh_h_r_vx(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.h.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vdmulh_h_rn_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.h.rn.vv", Vd, Vs, Vt))
+#define vdmulh_h_rn_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.h.rn.vx", Vd, Vs, %0) : : "r"(t))
+#define vdmulh_h_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.h.vv.m", Vd, Vs, Vt))
+#define vdmulh_h_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vdmulh_h_r_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.h.r.vv.m", Vd, Vs, Vt))
+#define vdmulh_h_r_vx_m(Vd, Vs, t)  __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.h.r.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vdmulh_h_rn_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vdmulh.h.rn.vv.m", Vd, Vs, Vt))
+#define vdmulh_h_rn_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.h.rn.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vdmulh_w_vv(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.w.vv", Vd, Vs, Vt))
+#define vdmulh_w_vx(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vdmulh_w_r_vv(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.w.r.vv", Vd, Vs, Vt))
+#define vdmulh_w_r_vx(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.w.r.vx", Vd, Vs, %0) : : "r"(t))
+#define vdmulh_w_rn_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.w.rn.vv", Vd, Vs, Vt))
+#define vdmulh_w_rn_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.w.rn.vx", Vd, Vs, %0) : : "r"(t))
+#define vdmulh_w_vv_m(Vd, Vs, Vt)   __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.w.vv.m", Vd, Vs, Vt))
+#define vdmulh_w_vx_m(Vd, Vs, t)    __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vdmulh_w_r_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.w.r.vv.m", Vd, Vs, Vt))
+#define vdmulh_w_r_vx_m(Vd, Vs, t)  __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.w.r.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vdmulh_w_rn_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vdmulh.w.rn.vv.m", Vd, Vs, Vt))
+#define vdmulh_w_rn_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vdmulh.w.rn.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmacc_b_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vmacc.b.vv", Vd, Vs, Vt))
+#define vmacc_b_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vmacc.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vmacc_b_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmacc.b.vv.m", Vd, Vs, Vt))
+#define vmacc_b_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmacc.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmacc_h_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vmacc.h.vv", Vd, Vs, Vt))
+#define vmacc_h_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vmacc.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vmacc_h_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmacc.h.vv.m", Vd, Vs, Vt))
+#define vmacc_h_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmacc.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmacc_w_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vmacc.w.vv", Vd, Vs, Vt))
+#define vmacc_w_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vmacc.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vmacc_w_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmacc.w.vv.m", Vd, Vs, Vt))
+#define vmacc_w_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmacc.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmadd_b_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vmadd.b.vv", Vd, Vs, Vt))
+#define vmadd_b_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vmadd.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vmadd_b_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmadd.b.vv.m", Vd, Vs, Vt))
+#define vmadd_b_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmadd.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmadd_h_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vmadd.h.vv", Vd, Vs, Vt))
+#define vmadd_h_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vmadd.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vmadd_h_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmadd.h.vv.m", Vd, Vs, Vt))
+#define vmadd_h_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmadd.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vmadd_w_vv(Vd, Vs, Vt)      __asm__ __volatile__(ARGS_F_A_A_A("vmadd.w.vv", Vd, Vs, Vt))
+#define vmadd_w_vx(Vd, Vs, t)       __asm__ __volatile__(ARGS_F_A_A_A("vmadd.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vmadd_w_vv_m(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vmadd.w.vv.m", Vd, Vs, Vt))
+#define vmadd_w_vx_m(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vmadd.w.vx.m", Vd, Vs, %0) : : "r"(t))
+// 110 Shuffle
+#define vsliden_b_1_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsliden.b.1.vv", Vd, Vs, Vt))
+#define vsliden_b_1_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsliden.b.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vsliden_b_2_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsliden.b.2.vv", Vd, Vs, Vt))
+#define vsliden_b_2_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsliden.b.2.vx", Vd, Vs, %0) : : "r"(t))
+#define vsliden_b_3_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsliden.b.3.vv", Vd, Vs, Vt))
+#define vsliden_b_3_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsliden.b.3.vx", Vd, Vs, %0) : : "r"(t))
+#define vsliden_b_4_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsliden.b.4.vv", Vd, Vs, Vt))
+#define vsliden_b_4_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsliden.b.4.vx", Vd, Vs, %0) : : "r"(t))
+#define vsliden_h_1_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsliden.h.1.vv", Vd, Vs, Vt))
+#define vsliden_h_1_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsliden.h.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vsliden_h_2_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsliden.h.2.vv", Vd, Vs, Vt))
+#define vsliden_h_2_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsliden.h.2.vx", Vd, Vs, %0) : : "r"(t))
+#define vsliden_h_3_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsliden.h.3.vv", Vd, Vs, Vt))
+#define vsliden_h_3_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsliden.h.3.vx", Vd, Vs, %0) : : "r"(t))
+#define vsliden_h_4_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsliden.h.4.vv", Vd, Vs, Vt))
+#define vsliden_h_4_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsliden.h.4.vx", Vd, Vs, %0) : : "r"(t))
+#define vsliden_w_1_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsliden.w.1.vv", Vd, Vs, Vt))
+#define vsliden_w_1_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsliden.w.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vsliden_w_2_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsliden.w.2.vv", Vd, Vs, Vt))
+#define vsliden_w_2_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsliden.w.2.vx", Vd, Vs, %0) : : "r"(t))
+#define vsliden_w_3_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsliden.w.3.vv", Vd, Vs, Vt))
+#define vsliden_w_3_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsliden.w.3.vx", Vd, Vs, %0) : : "r"(t))
+#define vsliden_w_4_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vsliden.w.4.vv", Vd, Vs, Vt))
+#define vsliden_w_4_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vsliden.w.4.vx", Vd, Vs, %0) : : "r"(t))
+#define vslidevn_b_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.b.1.vv.m", Vd, Vs, Vt))
+#define vslidevn_b_1_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.b.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevn_b_2_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.b.2.vv.m", Vd, Vs, Vt))
+#define vslidevn_b_2_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.b.2.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevn_b_3_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.b.3.vv.m", Vd, Vs, Vt))
+#define vslidevn_b_3_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.b.3.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevn_b_4_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.b.4.vv.m", Vd, Vs, Vt))
+#define vslidevn_b_4_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.b.4.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevn_h_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.h.1.vv.m", Vd, Vs, Vt))
+#define vslidevn_h_1_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.h.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevn_h_2_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.h.2.vv.m", Vd, Vs, Vt))
+#define vslidevn_h_2_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.h.2.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevn_h_3_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.h.3.vv.m", Vd, Vs, Vt))
+#define vslidevn_h_3_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.h.3.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevn_h_4_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.h.4.vv.m", Vd, Vs, Vt))
+#define vslidevn_h_4_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.h.4.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevn_w_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.w.1.vv.m", Vd, Vs, Vt))
+#define vslidevn_w_1_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.w.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevn_w_2_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.w.2.vv.m", Vd, Vs, Vt))
+#define vslidevn_w_2_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.w.2.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevn_w_3_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.w.3.vv.m", Vd, Vs, Vt))
+#define vslidevn_w_3_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.w.3.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevn_w_4_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.w.4.vv.m", Vd, Vs, Vt))
+#define vslidevn_w_4_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.w.4.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehn_b_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.b.1.vv.m", Vd, Vs, Vt))
+#define vslidehn_b_1_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.b.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehn_b_2_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.b.2.vv.m", Vd, Vs, Vt))
+#define vslidehn_b_2_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.b.2.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehn_b_3_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.b.3.vv.m", Vd, Vs, Vt))
+#define vslidehn_b_3_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.b.3.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehn_b_4_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.b.4.vv.m", Vd, Vs, Vt))
+#define vslidehn_b_4_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.b.4.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehn_h_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.h.1.vv.m", Vd, Vs, Vt))
+#define vslidehn_h_1_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.h.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehn_h_2_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.h.2.vv.m", Vd, Vs, Vt))
+#define vslidehn_h_2_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.h.2.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehn_h_3_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.h.3.vv.m", Vd, Vs, Vt))
+#define vslidehn_h_3_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.h.3.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehn_h_4_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.h.4.vv.m", Vd, Vs, Vt))
+#define vslidehn_h_4_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.h.4.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehn_w_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.w.1.vv.m", Vd, Vs, Vt))
+#define vslidehn_w_1_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.w.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehn_w_2_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.w.2.vv.m", Vd, Vs, Vt))
+#define vslidehn_w_2_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.w.2.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehn_w_3_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.w.3.vv.m", Vd, Vs, Vt))
+#define vslidehn_w_3_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.w.3.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehn_w_4_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.w.4.vv.m", Vd, Vs, Vt))
+#define vslidehn_w_4_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.w.4.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidep_b_1_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vslidep.b.1.vv", Vd, Vs, Vt))
+#define vslidep_b_1_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vslidep.b.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vslidep_b_2_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vslidep.b.2.vv", Vd, Vs, Vt))
+#define vslidep_b_2_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vslidep.b.2.vx", Vd, Vs, %0) : : "r"(t))
+#define vslidep_b_3_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vslidep.b.3.vv", Vd, Vs, Vt))
+#define vslidep_b_3_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vslidep.b.3.vx", Vd, Vs, %0) : : "r"(t))
+#define vslidep_b_4_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vslidep.b.4.vv", Vd, Vs, Vt))
+#define vslidep_b_4_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vslidep.b.4.vx", Vd, Vs, %0) : : "r"(t))
+#define vslidep_h_1_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vslidep.h.1.vv", Vd, Vs, Vt))
+#define vslidep_h_1_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vslidep.h.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vslidep_h_2_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vslidep.h.2.vv", Vd, Vs, Vt))
+#define vslidep_h_2_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vslidep.h.2.vx", Vd, Vs, %0) : : "r"(t))
+#define vslidep_h_3_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vslidep.h.3.vv", Vd, Vs, Vt))
+#define vslidep_h_3_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vslidep.h.3.vx", Vd, Vs, %0) : : "r"(t))
+#define vslidep_h_4_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vslidep.h.4.vv", Vd, Vs, Vt))
+#define vslidep_h_4_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vslidep.h.4.vx", Vd, Vs, %0) : : "r"(t))
+#define vslidep_w_1_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vslidep.w.1.vv", Vd, Vs, Vt))
+#define vslidep_w_1_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vslidep.w.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vslidep_w_2_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vslidep.w.2.vv", Vd, Vs, Vt))
+#define vslidep_w_2_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vslidep.w.2.vx", Vd, Vs, %0) : : "r"(t))
+#define vslidep_w_3_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vslidep.w.3.vv", Vd, Vs, Vt))
+#define vslidep_w_3_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vslidep.w.3.vx", Vd, Vs, %0) : : "r"(t))
+#define vslidep_w_4_vv(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vslidep.w.4.vv", Vd, Vs, Vt))
+#define vslidep_w_4_vx(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vslidep.w.4.vx", Vd, Vs, %0) : : "r"(t))
+#define vslidevp_b_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.b.1.vv.m", Vd, Vs, Vt))
+#define vslidevp_b_1_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.b.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevp_b_2_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.b.2.vv.m", Vd, Vs, Vt))
+#define vslidevp_b_2_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.b.2.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevp_b_3_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.b.3.vv.m", Vd, Vs, Vt))
+#define vslidevp_b_3_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.b.3.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevp_b_4_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.b.4.vv.m", Vd, Vs, Vt))
+#define vslidevp_b_4_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.b.4.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevp_h_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.h.1.vv.m", Vd, Vs, Vt))
+#define vslidevp_h_1_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.h.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevp_h_2_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.h.2.vv.m", Vd, Vs, Vt))
+#define vslidevp_h_2_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.h.2.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevp_h_3_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.h.3.vv.m", Vd, Vs, Vt))
+#define vslidevp_h_3_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.h.3.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevp_h_4_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.h.4.vv.m", Vd, Vs, Vt))
+#define vslidevp_h_4_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.h.4.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevp_w_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.w.1.vv.m", Vd, Vs, Vt))
+#define vslidevp_w_1_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.w.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevp_w_2_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.w.2.vv.m", Vd, Vs, Vt))
+#define vslidevp_w_2_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.w.2.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevp_w_3_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.w.3.vv.m", Vd, Vs, Vt))
+#define vslidevp_w_3_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.w.3.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidevp_w_4_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.w.4.vv.m", Vd, Vs, Vt))
+#define vslidevp_w_4_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.w.4.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehp_b_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.b.1.vv.m", Vd, Vs, Vt))
+#define vslidehp_b_1_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.b.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehp_b_2_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.b.2.vv.m", Vd, Vs, Vt))
+#define vslidehp_b_2_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.b.2.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehp_b_3_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.b.3.vv.m", Vd, Vs, Vt))
+#define vslidehp_b_3_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.b.3.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehp_b_4_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.b.4.vv.m", Vd, Vs, Vt))
+#define vslidehp_b_4_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.b.4.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehp_h_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.h.1.vv.m", Vd, Vs, Vt))
+#define vslidehp_h_1_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.h.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehp_h_2_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.h.2.vv.m", Vd, Vs, Vt))
+#define vslidehp_h_2_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.h.2.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehp_h_3_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.h.3.vv.m", Vd, Vs, Vt))
+#define vslidehp_h_3_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.h.3.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehp_h_4_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.h.4.vv.m", Vd, Vs, Vt))
+#define vslidehp_h_4_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.h.4.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehp_w_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.w.1.vv.m", Vd, Vs, Vt))
+#define vslidehp_w_1_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.w.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehp_w_2_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.w.2.vv.m", Vd, Vs, Vt))
+#define vslidehp_w_2_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.w.2.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehp_w_3_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.w.3.vv.m", Vd, Vs, Vt))
+#define vslidehp_w_3_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.w.3.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vslidehp_w_4_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.w.4.vv.m", Vd, Vs, Vt))
+#define vslidehp_w_4_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehp.w.4.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsel_b_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vsel.b.vv", Vd, Vs, Vt))
+#define vsel_b_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vsel.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vsel_b_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsel.b.vv.m", Vd, Vs, Vt))
+#define vsel_b_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsel.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsel_h_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vsel.h.vv", Vd, Vs, Vt))
+#define vsel_h_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vsel.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vsel_h_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsel.h.vv.m", Vd, Vs, Vt))
+#define vsel_h_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsel.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vsel_w_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vsel.w.vv", Vd, Vs, Vt))
+#define vsel_w_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vsel.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vsel_w_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vsel.w.vv.m", Vd, Vs, Vt))
+#define vsel_w_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vsel.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vevn_b_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vevn.b.vv", Vd, Vs, Vt))
+#define vevn_b_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vevn.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vevn_b_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vevn.b.vv.m", Vd, Vs, Vt))
+#define vevn_b_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vevn.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vevn_h_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vevn.h.vv", Vd, Vs, Vt))
+#define vevn_h_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vevn.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vevn_h_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vevn.h.vv.m", Vd, Vs, Vt))
+#define vevn_h_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vevn.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vevn_w_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vevn.w.vv", Vd, Vs, Vt))
+#define vevn_w_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vevn.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vevn_w_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vevn.w.vv.m", Vd, Vs, Vt))
+#define vevn_w_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vevn.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vodd_b_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vodd.b.vv", Vd, Vs, Vt))
+#define vodd_b_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vodd.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vodd_b_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vodd.b.vv.m", Vd, Vs, Vt))
+#define vodd_b_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vodd.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vodd_h_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vodd.h.vv", Vd, Vs, Vt))
+#define vodd_h_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vodd.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vodd_h_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vodd.h.vv.m", Vd, Vs, Vt))
+#define vodd_h_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vodd.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vodd_w_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vodd.w.vv", Vd, Vs, Vt))
+#define vodd_w_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vodd.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vodd_w_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vodd.w.vv.m", Vd, Vs, Vt))
+#define vodd_w_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vodd.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vevnodd_b_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vevnodd.b.vv", Vd, Vs, Vt))
+#define vevnodd_b_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vevnodd.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vevnodd_b_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vevnodd.b.vv.m", Vd, Vs, Vt))
+#define vevnodd_b_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vevnodd.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vevnodd_h_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vevnodd.h.vv", Vd, Vs, Vt))
+#define vevnodd_h_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vevnodd.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vevnodd_h_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vevnodd.h.vv.m", Vd, Vs, Vt))
+#define vevnodd_h_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vevnodd.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vevnodd_w_vv(Vd, Vs, Vt)    __asm__ __volatile__(ARGS_F_A_A_A("vevnodd.w.vv", Vd, Vs, Vt))
+#define vevnodd_w_vx(Vd, Vs, t)     __asm__ __volatile__(ARGS_F_A_A_A("vevnodd.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vevnodd_w_vv_m(Vd, Vs, Vt)  __asm__ __volatile__(ARGS_F_A_A_A("vevnodd.w.vv.m", Vd, Vs, Vt))
+#define vevnodd_w_vx_m(Vd, Vs, t)   __asm__ __volatile__(ARGS_F_A_A_A("vevnodd.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vdupg_v(Vd, Vs)             __asm__ __volatile__(ARGS_F_A_A("vdupg.v", Vd, Vs))
+#define vzip_b_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vzip.b.vv", Vd, Vs, Vt))
+#define vzip_b_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vzip.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vzip_b_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vzip.b.vv.m", Vd, Vs, Vt))
+#define vzip_b_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vzip.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vzip_h_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vzip.h.vv", Vd, Vs, Vt))
+#define vzip_h_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vzip.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vzip_h_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vzip.h.vv.m", Vd, Vs, Vt))
+#define vzip_h_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vzip.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vzip_w_vv(Vd, Vs, Vt)       __asm__ __volatile__(ARGS_F_A_A_A("vzip.w.vv", Vd, Vs, Vt))
+#define vzip_w_vx(Vd, Vs, t)        __asm__ __volatile__(ARGS_F_A_A_A("vzip.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vzip_w_vv_m(Vd, Vs, Vt)     __asm__ __volatile__(ARGS_F_A_A_A("vzip.w.vv.m", Vd, Vs, Vt))
+#define vzip_w_vx_m(Vd, Vs, t)      __asm__ __volatile__(ARGS_F_A_A_A("vzip.w.vx.m", Vd, Vs, %0) : : "r"(t))
+// 3arg
+#define vfmadd4_s_vvv(Vd, Vs, Vt, Vr)__asm__ __volatile__(ARGS_F_A_A_A_A("vfmadd4.s.vvv", Vd, Vs, Vt, Vr))
+#define vfmadd4_s_vfv(Vd, Vs, t, Vr)__asm__ __volatile__(ARGS_F_A_A_A_A("vfmadd4.s.vfv", Vd, Vs, %0, Vr) : : "r"(t))
+#define vfmadd4_s_vvv_m(Vd, Vs, Vt, Vr)__asm__ __volatile__(ARGS_F_A_A_A_A("vfmadd4.s.vvv.m", Vd, Vs, Vt, Vr))
+#define vfmadd4_s_vfv_m(Vd, Vs, t, Vr)__asm__ __volatile__(ARGS_F_A_A_A_A("vfmadd4.s.vfv.m", Vd, Vs, %0, Vr) : : "r"(t))
+#define vadd3_w_vvv(Vd, Vs, Vt, Vr) __asm__ __volatile__(ARGS_F_A_A_A_A("vadd3.w.vvv", Vd, Vs, Vt, Vr))
+#define vadd3_w_vxv(Vd, Vs, t, Vr)  __asm__ __volatile__(ARGS_F_A_A_A_A("vadd3.w.vxv", Vd, Vs, %0, Vr) : : "r"(t))
+#define vadd3_w_vvv_m(Vd, Vs, Vt, Vr)__asm__ __volatile__(ARGS_F_A_A_A_A("vadd3.w.vvv.m", Vd, Vs, Vt, Vr))
+#define vadd3_w_vxv_m(Vd, Vs, t, Vr)__asm__ __volatile__(ARGS_F_A_A_A_A("vadd3.w.vxv.m", Vd, Vs, %0, Vr) : : "r"(t))
+#define aconv_vxv(Vd, Vs, t, Vr)    __asm__ __volatile__(ARGS_F_A_A_A_A("aconv.vxv", Vd, Vs, %0, Vr) : : "r"(t))
+#define vconv_vxv(Vd, Vs, t, Vr)    __asm__ __volatile__(ARGS_F_A_A_A_A("vconv.vxv", Vd, Vs, %0, Vr) : : "r"(t))
+#define adwconv_vxv(Vd, Vs, t, Vr)  __asm__ __volatile__(ARGS_F_A_A_A_A("adwconv.vxv", Vd, Vs, %0, Vr) : : "r"(t))
+#define vdwconv_vxv(Vd, Vs, t, Vr)  __asm__ __volatile__(ARGS_F_A_A_A_A("vdwconv.vxv", Vd, Vs, %0, Vr) : : "r"(t))
+// Scalar
+#define clb_x(d, s)                 __asm__ __volatile__(ARGS_F_A_A("clb", %0, %1) : "=r"(d) : "r"(s))
+#define clz_x(d, s)                 __asm__ __volatile__(ARGS_F_A_A("clz", %0, %1) : "=r"(d) : "r"(s))
+#define ctz_x(d, s)                 __asm__ __volatile__(ARGS_F_A_A("ctz", %0, %1) : "=r"(d) : "r"(s))
+#define cpop_x(d, s)                __asm__ __volatile__(ARGS_F_A_A("cpop", %0, %1) : "=r"(d) : "r"(s))
+#define min_xx(d, s, t)             __asm__ __volatile__(ARGS_F_A_A_A("min", %0, %1, %2) : "=r"(d) : "r"(s), "r"(t))
+#define max_xx(d, s, t)             __asm__ __volatile__(ARGS_F_A_A_A("max", %0, %1, %2) : "=r"(d) : "r"(s), "r"(t))
+#define minu_xx(d, s, t)            __asm__ __volatile__(ARGS_F_A_A_A("minu", %0, %1, %2) : "=r"(d) : "r"(s), "r"(t))
+#define maxu_xx(d, s, t)            __asm__ __volatile__(ARGS_F_A_A_A("maxu", %0, %1, %2) : "=r"(d) : "r"(s), "r"(t))
+// clang-format on
+#endif  // CRT_KELVIN_INTRINSICS_H_
diff --git a/tests/kelvin_isa/BUILD b/tests/kelvin_isa/BUILD
new file mode 100644
index 0000000..610ad28
--- /dev/null
+++ b/tests/kelvin_isa/BUILD
@@ -0,0 +1,17 @@
+load("//build_tools/bazel:kelvin.bzl", "kelvin_binary")
+
+kelvin_binary(
+  name = "getvl_test",
+  srcs = [
+    "getvl_test.cc",
+  ],
+  hdrs = [
+    "kelvin_test.h",
+  ],
+  copts = [
+    "-Wno-address",
+  ],
+  deps = [
+    "//crt:crt_header",
+  ]
+)
diff --git a/tests/kelvin_isa/getvl_test.cc b/tests/kelvin_isa/getvl_test.cc
new file mode 100644
index 0000000..bae225e
--- /dev/null
+++ b/tests/kelvin_isa/getvl_test.cc
@@ -0,0 +1,141 @@
+// Copyright 2023 Google LLC
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "tests/kelvin_isa/kelvin_test.h"
+
+// clang-format off
+#define TEST_GETVL_X(op, in)                                              \
+  {                                                                       \
+    int ref, dut;                                                         \
+    if (op == "getvl.b.x" || op == "getvl.b.x.m") {                       \
+      ref = std::min(vlb, in);                                            \
+    } else if (op == "getvl.h.x" || op == "getvl.h.x.m") {                \
+      ref = std::min(vlh, in);                                            \
+    } else if (op == "getvl.w.x" || op == "getvl.w.x.m") {                \
+      ref = std::min(vlw, in);                                            \
+    } else {                                                              \
+      printf("**error(%d)[unknown getvl]\n", __LINE__);                   \
+      exit(-1);                                                           \
+    }                                                                     \
+    __asm__ __volatile_always__(ARGS_F_A_A(op, %0, %1)                    \
+                                : "=r"(dut)                               \
+                                : "r"(in));                               \
+    if (ref != dut) {                                                     \
+      printf("**error(%d)[%s] %d : %d %d\n", __LINE__, op, in, ref, dut); \
+      exit(-1);                                                           \
+    }                                                                     \
+  }
+#define TEST_GETVL_XX(op, in0, in1)                                          \
+  {                                                                          \
+    int ref, dut;                                                            \
+    if (op == "getvl.b.xx" || op == "getvl.b.xx.m") {                        \
+      ref = std::min(vlb, std::min(in0, in1));                               \
+    } else if (op == "getvl.h.xx" || op == "getvl.h.xx.m") {                 \
+      ref = std::min(vlh, std::min(in0, in1));                               \
+    } else if (op == "getvl.w.xx" || op == "getvl.w.xx.m") {                 \
+      ref = std::min(vlw, std::min(in0, in1));                               \
+    } else {                                                                 \
+      printf("**error(%d)[unknown getvl]\n", __LINE__);                      \
+      exit(-1);                                                              \
+    }                                                                        \
+    __asm__ __volatile_always__(ARGS_F_A_A_A(op, %0, %1, %2)                 \
+                                : "=r"(dut)                                  \
+                                : "r"(in0), "r"(in1));                       \
+    if (ref != dut) {                                                        \
+      printf("**error(%d)[%s] %d %d : %d %d\n", __LINE__, op, in0, in1, ref, \
+             dut);                                                           \
+      exit(-1);                                                              \
+    }                                                                        \
+  }
+// clang-format on
+
+int main() {
+  const int pad = 3;
+  int vlb, vlh, vlw;
+  // ---------------------------------------------------------------------------
+  // Test baseline.
+  getmaxvl_w(vlw);
+  getmaxvl_h(vlh);
+  getmaxvl_b(vlb);
+  if (vlw != VLENW) {
+    printf("**error(%d)[%s] %d\n", __LINE__, "getmaxvl.w", vlw);
+    exit(-1);
+  }
+  if (vlh != vlw * 2) {
+    printf("**error(%d)[%s] %d\n", __LINE__, "getmaxvl.h", vlh);
+    exit(-1);
+  }
+  if (vlb != vlw * 4) {
+    printf("**error(%d)[%s] %d\n", __LINE__, "getmaxvl.b", vlb);
+    exit(-1);
+  }
+  for (int i = 0; i < vlb + pad; ++i) {
+    TEST_GETVL_X("getvl.b.x", i);
+  }
+  for (int i = 0; i < vlh + pad; ++i) {
+    TEST_GETVL_X("getvl.h.x", i);
+  }
+  for (int i = 0; i < vlw + pad; ++i) {
+    TEST_GETVL_X("getvl.w.x", i);
+  }
+  for (int i = 0; i < vlb + pad; ++i) {
+    for (int j = 0; j < vlb + pad; ++j) {
+      TEST_GETVL_XX("getvl.b.xx", i, j);
+    }
+  }
+  for (int i = 0; i < vlh + pad; ++i) {
+    for (int j = 0; j < vlh + pad; ++j) {
+      TEST_GETVL_XX("getvl.h.xx", i, j);
+    }
+  }
+  for (int i = 0; i < vlw + pad; ++i) {
+    for (int j = 0; j < vlw + pad; ++j) {
+      TEST_GETVL_XX("getvl.w.xx", i, j);
+    }
+  }
+  // ---------------------------------------------------------------------------
+  // Test stripmine.
+  int vlw_p = vlw;
+  getmaxvl_w_m(vlw);
+  getmaxvl_h_m(vlh);
+  getmaxvl_b_m(vlb);
+  if (vlw != 4 * vlw_p) {
+    printf("**error(%d)[%s] %d\n", __LINE__, "getmaxvl.w.m", vlw);
+    exit(-1);
+  }
+  if (vlh != vlw * 2) {
+    printf("**error(%d)[%s] %d\n", __LINE__, "getmaxvl.h.m", vlh);
+    exit(-1);
+  }
+  if (vlb != vlw * 4) {
+    printf("**error(%d)[%s] %d\n", __LINE__, "getmaxvl.b.m", vlb);
+    exit(-1);
+  }
+  for (int i = 0; i < vlb + pad; ++i) {
+    TEST_GETVL_X("getvl.b.x.m", i);
+  }
+  for (int i = 0; i < vlh + pad; ++i) {
+    TEST_GETVL_X("getvl.h.x.m", i);
+  }
+  for (int i = 0; i < vlw + pad; ++i) {
+    TEST_GETVL_X("getvl.w.x.m", i);
+  }
+  for (int i = 0; i < vlb + pad; ++i) {
+    for (int j = 0; j < vlb + pad; ++j) {
+      TEST_GETVL_XX("getvl.b.xx.m", i, j);
+    }
+  }
+  for (int i = 0; i < vlh + pad; ++i) {
+    for (int j = 0; j < vlh + pad; ++j) {
+      TEST_GETVL_XX("getvl.h.xx.m", i, j);
+    }
+  }
+  for (int i = 0; i < vlw + pad; ++i) {
+    for (int j = 0; j < vlw + pad; ++j) {
+      TEST_GETVL_XX("getvl.w.xx.m", i, j);
+    }
+  }
+  return 0;
+}
diff --git a/tests/kelvin_isa/kelvin_test.h b/tests/kelvin_isa/kelvin_test.h
new file mode 100644
index 0000000..b877ec6
--- /dev/null
+++ b/tests/kelvin_isa/kelvin_test.h
@@ -0,0 +1,18 @@
+// Copyright 2023 Google LLC
+// Licensed under the Apache License, Version 2.0, see LICENSE for details.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Kelvin ISA test common header
+
+#ifndef TESTS_KELVIN_ISA_KELVIN_TEST_H_
+#define TESTS_KELVIN_ISA_KELVIN_TEST_H_
+
+#include "crt/kelvin.h"
+
+// Maximum storage required for parameterized machine load/store.
+constexpr int VLEN = 256;  // simd register bits. Need to match the HW parameter
+constexpr int VLENB = VLEN / 8;
+constexpr int VLENH = VLEN / 16;
+constexpr int VLENW = VLEN / 32;
+
+#endif  // TESTS_KELVIN_ISA_KELVIN_TEST_H_
