Merge "Fix kelvin intrinsic macros"
diff --git a/crt/kelvin_intrinsics.h b/crt/kelvin_intrinsics.h
index 23af097..db22707 100644
--- a/crt/kelvin_intrinsics.h
+++ b/crt/kelvin_intrinsics.h
@@ -2,21 +2,26 @@
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
+// clang-format off
+
// Kelvin instruction intrinsics
#ifndef CRT_KELVIN_INTRINSICS_H_
#define CRT_KELVIN_INTRINSICS_H_
-// clang-format off
+
#define eexit() __asm__ __volatile_always__("eexit");
#define eyield() __asm__ __volatile_always__("eyield");
#define ectxsw() __asm__ __volatile_always__("ectxsw");
#define mpause() __asm__ __volatile_always__("mpause");
+
#define flog(s) __asm__ __volatile_always__(ARGS_F_A("flog", %0) : : "r"(s))
#define slog(s) __asm__ __volatile_always__(ARGS_F_A("slog", %0) : : "r"(s))
#define clog(s) __asm__ __volatile_always__(ARGS_F_A("clog", %0) : : "r"(s))
#define klog(s) __asm__ __volatile_always__(ARGS_F_A("klog", %0) : : "r"(s))
+
#define flushall() __asm__ __volatile__("flushall");
#define flushat(s) __asm__ __volatile__(ARGS_F_A("flushat", %0) : : "r"(s))
+
#define getmaxvl_b(d) __asm__ __volatile__(ARGS_F_A("getmaxvl.b", %0) : "=r"(d) : )
#define getmaxvl_h(d) __asm__ __volatile__(ARGS_F_A("getmaxvl.h", %0) : "=r"(d) : )
#define getmaxvl_w(d) __asm__ __volatile__(ARGS_F_A("getmaxvl.w", %0) : "=r"(d) : )
@@ -26,6 +31,7 @@
#define getvl_b_xx(d, s, t) __asm__ __volatile__(ARGS_F_A_A_A("getvl.b.xx", %0, %1, %2) : "=r"(d) : "r"(s), "r"(t))
#define getvl_h_xx(d, s, t) __asm__ __volatile__(ARGS_F_A_A_A("getvl.h.xx", %0, %1, %2) : "=r"(d) : "r"(s), "r"(t))
#define getvl_w_xx(d, s, t) __asm__ __volatile__(ARGS_F_A_A_A("getvl.w.xx", %0, %1, %2) : "=r"(d) : "r"(s), "r"(t))
+
#define getmaxvl_b_m(d) __asm__ __volatile__(ARGS_F_A("getmaxvl.b.m", %0) : "=r"(d) : )
#define getmaxvl_h_m(d) __asm__ __volatile__(ARGS_F_A("getmaxvl.h.m", %0) : "=r"(d) : )
#define getmaxvl_w_m(d) __asm__ __volatile__(ARGS_F_A("getmaxvl.w.m", %0) : "=r"(d) : )
@@ -35,6 +41,7 @@
#define getvl_b_xx_m(d, s, t) __asm__ __volatile__(ARGS_F_A_A_A("getvl.b.xx.m", %0, %1, %2) : "=r"(d) : "r"(s), "r"(t))
#define getvl_h_xx_m(d, s, t) __asm__ __volatile__(ARGS_F_A_A_A("getvl.h.xx.m", %0, %1, %2) : "=r"(d) : "r"(s), "r"(t))
#define getvl_w_xx_m(d, s, t) __asm__ __volatile__(ARGS_F_A_A_A("getvl.w.xx.m", %0, %1, %2) : "=r"(d) : "r"(s), "r"(t))
+
// 111 Load/Store
#define vld_b_x(Vd, s) __asm__ __volatile_always__(ARGS_F_A_A("vld.b.x", Vd, %0) : : "r"(s) : "memory")
#define vld_b_p_x(Vd, s) __asm__ __volatile_always__(ARGS_F_A_A("vld.b.p.x", Vd, %0) : "=r"(s) : "0"(s) : "memory")
@@ -138,8 +145,6 @@
#define vdup_h_x_m(Vd, t) __asm__ __volatile__(ARGS_F_A_A("vdup.h.x.m", Vd, %0) : : "r"(t))
#define vdup_w_x(Vd, t) __asm__ __volatile__(ARGS_F_A_A("vdup.w.x", Vd, %0) : : "r"(t))
#define vdup_w_x_m(Vd, t) __asm__ __volatile__(ARGS_F_A_A("vdup.w.x.m", Vd, %0) : : "r"(t))
-#define vdup_s_f(Vd, f) __asm__ __volatile__(ARGS_F_A_A("vdup.s.f", Vd, %0) : : "f"(f))
-#define vdup_s_f_m(Vd, f) __asm__ __volatile__(ARGS_F_A_A("vdup.s.f.m", Vd, %0) : : "f"(f))
#define vcget(Vd) __asm__ __volatile__(ARGS_F_A("vcget", Vd) : : )
#define vstq_b_s_xx(Vd, s, t) __asm__ __volatile_always__(ARGS_F_A_A_A("vstq.b.s.xx", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
#define vstq_b_sp_xx(Vd, s, t) __asm__ __volatile_always__(ARGS_F_A_A_A("vstq.b.sp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
@@ -153,6 +158,7 @@
#define vstq_w_sp_xx(Vd, s, t) __asm__ __volatile_always__(ARGS_F_A_A_A("vstq.w.sp.xx", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
#define vstq_w_s_xx_m(Vd, s, t) __asm__ __volatile_always__(ARGS_F_A_A_A("vstq.w.s.xx.m", Vd, %0, %1) : : "r"(s), "r"(t) : "memory")
#define vstq_w_sp_xx_m(Vd, s, t) __asm__ __volatile_always__(ARGS_F_A_A_A("vstq.w.sp.xx.m", Vd, %0, %1) : "=r"(s) : "r"(t), "0"(s) : "memory")
+
// 000 Arithmetic
#define vadd_b_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vadd.b.vv", Vd, Vs, Vt))
#define vadd_b_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vadd.b.vx", Vd, Vs, %0) : : "r"(t))
@@ -388,6 +394,7 @@
#define vadd3_w_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vadd3.w.vx", Vd, Vs, %0) : : "r"(t))
#define vadd3_w_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vadd3.w.vv.m", Vd, Vs, Vt))
#define vadd3_w_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vadd3.w.vx.m", Vd, Vs, %0) : : "r"(t))
+
// 100 Arithmetic2
#define vadds_b_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vadds.b.vv", Vd, Vs, Vt))
#define vadds_b_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vadds.b.vx", Vd, Vs, %0) : : "r"(t))
@@ -535,100 +542,101 @@
#define vpsub_w_u_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vpsub.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
#define vhadd_b_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.vv", Vd, Vs, Vt))
#define vhadd_b_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.vx", Vd, Vs, %0) : : "r"(t))
-#define vhadd_b_1_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.1.vv", Vd, Vs, Vt))
-#define vhadd_b_1_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhadd_b_r_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.r.vv", Vd, Vs, Vt))
+#define vhadd_b_r_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.r.vx", Vd, Vs, %0) : : "r"(t))
#define vhadd_b_u_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.u.vv", Vd, Vs, Vt))
#define vhadd_b_u_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.u.vx", Vd, Vs, %0) : : "r"(t))
-#define vhadd_b_u_1_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.u.1.vv", Vd, Vs, Vt))
-#define vhadd_b_u_1_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.u.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhadd_b_ur_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.ur.vv", Vd, Vs, Vt))
+#define vhadd_b_ur_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.ur.vx", Vd, Vs, %0) : : "r"(t))
#define vhadd_b_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.vv.m", Vd, Vs, Vt))
#define vhadd_b_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.vx.m", Vd, Vs, %0) : : "r"(t))
-#define vhadd_b_1_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.1.vv.m", Vd, Vs, Vt))
-#define vhadd_b_1_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhadd_b_r_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.r.vv.m", Vd, Vs, Vt))
+#define vhadd_b_r_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.r.vx.m", Vd, Vs, %0) : : "r"(t))
#define vhadd_b_u_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.u.vv.m", Vd, Vs, Vt))
#define vhadd_b_u_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.u.vx.m", Vd, Vs, %0) : : "r"(t))
-#define vhadd_b_u_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.u.1.vv.m", Vd, Vs, Vt))
-#define vhadd_b_u_1_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.u.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhadd_b_ur_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.ur.vv.m", Vd, Vs, Vt))
+#define vhadd_b_ur_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.b.ur.vx.m", Vd, Vs, %0) : : "r"(t))
#define vhadd_h_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.vv", Vd, Vs, Vt))
#define vhadd_h_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.vx", Vd, Vs, %0) : : "r"(t))
-#define vhadd_h_1_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.1.vv", Vd, Vs, Vt))
-#define vhadd_h_1_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhadd_h_r_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.r.vv", Vd, Vs, Vt))
+#define vhadd_h_r_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.r.vx", Vd, Vs, %0) : : "r"(t))
#define vhadd_h_u_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.u.vv", Vd, Vs, Vt))
#define vhadd_h_u_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.u.vx", Vd, Vs, %0) : : "r"(t))
-#define vhadd_h_u_1_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.u.1.vv", Vd, Vs, Vt))
-#define vhadd_h_u_1_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.u.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhadd_h_ur_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.ur.vv", Vd, Vs, Vt))
+#define vhadd_h_ur_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.ur.vx", Vd, Vs, %0) : : "r"(t))
#define vhadd_h_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.vv.m", Vd, Vs, Vt))
#define vhadd_h_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.vx.m", Vd, Vs, %0) : : "r"(t))
-#define vhadd_h_1_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.1.vv.m", Vd, Vs, Vt))
-#define vhadd_h_1_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhadd_h_r_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.r.vv.m", Vd, Vs, Vt))
+#define vhadd_h_r_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.r.vx.m", Vd, Vs, %0) : : "r"(t))
#define vhadd_h_u_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.u.vv.m", Vd, Vs, Vt))
#define vhadd_h_u_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.u.vx.m", Vd, Vs, %0) : : "r"(t))
-#define vhadd_h_u_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.u.1.vv.m", Vd, Vs, Vt))
-#define vhadd_h_u_1_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.u.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhadd_h_ur_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.ur.vv.m", Vd, Vs, Vt))
+#define vhadd_h_ur_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.h.ur.vx.m", Vd, Vs, %0) : : "r"(t))
#define vhadd_w_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.vv", Vd, Vs, Vt))
#define vhadd_w_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.vx", Vd, Vs, %0) : : "r"(t))
-#define vhadd_w_1_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.1.vv", Vd, Vs, Vt))
-#define vhadd_w_1_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhadd_w_r_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.r.vv", Vd, Vs, Vt))
+#define vhadd_w_r_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.r.vx", Vd, Vs, %0) : : "r"(t))
#define vhadd_w_u_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.u.vv", Vd, Vs, Vt))
#define vhadd_w_u_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.u.vx", Vd, Vs, %0) : : "r"(t))
-#define vhadd_w_u_1_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.u.1.vv", Vd, Vs, Vt))
-#define vhadd_w_u_1_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.u.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhadd_w_ur_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.ur.vv", Vd, Vs, Vt))
+#define vhadd_w_ur_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.ur.vx", Vd, Vs, %0) : : "r"(t))
#define vhadd_w_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.vv.m", Vd, Vs, Vt))
#define vhadd_w_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.vx.m", Vd, Vs, %0) : : "r"(t))
-#define vhadd_w_1_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.1.vv.m", Vd, Vs, Vt))
-#define vhadd_w_1_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhadd_w_r_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.r.vv.m", Vd, Vs, Vt))
+#define vhadd_w_r_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.r.vx.m", Vd, Vs, %0) : : "r"(t))
#define vhadd_w_u_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.u.vv.m", Vd, Vs, Vt))
#define vhadd_w_u_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
-#define vhadd_w_u_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.u.1.vv.m", Vd, Vs, Vt))
-#define vhadd_w_u_1_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.u.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhadd_w_ur_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.ur.vv.m", Vd, Vs, Vt))
+#define vhadd_w_ur_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhadd.w.ur.vx.m", Vd, Vs, %0) : : "r"(t))
#define vhsub_b_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.vv", Vd, Vs, Vt))
#define vhsub_b_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.vx", Vd, Vs, %0) : : "r"(t))
-#define vhsub_b_1_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.1.vv", Vd, Vs, Vt))
-#define vhsub_b_1_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhsub_b_r_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.r.vv", Vd, Vs, Vt))
+#define vhsub_b_r_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.r.vx", Vd, Vs, %0) : : "r"(t))
#define vhsub_b_u_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.u.vv", Vd, Vs, Vt))
#define vhsub_b_u_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.u.vx", Vd, Vs, %0) : : "r"(t))
-#define vhsub_b_u_1_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.u.1.vv", Vd, Vs, Vt))
-#define vhsub_b_u_1_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.u.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhsub_b_ur_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.ur.vv", Vd, Vs, Vt))
+#define vhsub_b_ur_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.ur.vx", Vd, Vs, %0) : : "r"(t))
#define vhsub_b_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.vv.m", Vd, Vs, Vt))
#define vhsub_b_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.vx.m", Vd, Vs, %0) : : "r"(t))
-#define vhsub_b_1_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.1.vv.m", Vd, Vs, Vt))
-#define vhsub_b_1_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhsub_b_r_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.r.vv.m", Vd, Vs, Vt))
+#define vhsub_b_r_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.r.vx.m", Vd, Vs, %0) : : "r"(t))
#define vhsub_b_u_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.u.vv.m", Vd, Vs, Vt))
#define vhsub_b_u_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.u.vx.m", Vd, Vs, %0) : : "r"(t))
-#define vhsub_b_u_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.u.1.vv.m", Vd, Vs, Vt))
-#define vhsub_b_u_1_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.u.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhsub_b_ur_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.ur.vv.m", Vd, Vs, Vt))
+#define vhsub_b_ur_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.b.ur.vx.m", Vd, Vs, %0) : : "r"(t))
#define vhsub_h_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.vv", Vd, Vs, Vt))
#define vhsub_h_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.vx", Vd, Vs, %0) : : "r"(t))
-#define vhsub_h_1_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.1.vv", Vd, Vs, Vt))
-#define vhsub_h_1_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhsub_h_r_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.r.vv", Vd, Vs, Vt))
+#define vhsub_h_r_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.r.vx", Vd, Vs, %0) : : "r"(t))
#define vhsub_h_u_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.u.vv", Vd, Vs, Vt))
#define vhsub_h_u_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.u.vx", Vd, Vs, %0) : : "r"(t))
-#define vhsub_h_u_1_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.u.1.vv", Vd, Vs, Vt))
-#define vhsub_h_u_1_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.u.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhsub_h_ur_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.ur.vv", Vd, Vs, Vt))
+#define vhsub_h_ur_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.ur.vx", Vd, Vs, %0) : : "r"(t))
#define vhsub_h_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.vv.m", Vd, Vs, Vt))
#define vhsub_h_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.vx.m", Vd, Vs, %0) : : "r"(t))
-#define vhsub_h_1_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.1.vv.m", Vd, Vs, Vt))
-#define vhsub_h_1_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhsub_h_r_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.r.vv.m", Vd, Vs, Vt))
+#define vhsub_h_r_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.r.vx.m", Vd, Vs, %0) : : "r"(t))
#define vhsub_h_u_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.u.vv.m", Vd, Vs, Vt))
#define vhsub_h_u_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.u.vx.m", Vd, Vs, %0) : : "r"(t))
-#define vhsub_h_u_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.u.1.vv.m", Vd, Vs, Vt))
-#define vhsub_h_u_1_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.u.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhsub_h_ur_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.ur.vv.m", Vd, Vs, Vt))
+#define vhsub_h_ur_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.h.ur.vx.m", Vd, Vs, %0) : : "r"(t))
#define vhsub_w_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.vv", Vd, Vs, Vt))
#define vhsub_w_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.vx", Vd, Vs, %0) : : "r"(t))
-#define vhsub_w_1_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.1.vv", Vd, Vs, Vt))
-#define vhsub_w_1_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhsub_w_r_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.r.vv", Vd, Vs, Vt))
+#define vhsub_w_r_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.r.vx", Vd, Vs, %0) : : "r"(t))
#define vhsub_w_u_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.u.vv", Vd, Vs, Vt))
#define vhsub_w_u_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.u.vx", Vd, Vs, %0) : : "r"(t))
-#define vhsub_w_u_1_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.u.1.vv", Vd, Vs, Vt))
-#define vhsub_w_u_1_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.u.1.vx", Vd, Vs, %0) : : "r"(t))
+#define vhsub_w_ur_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.ur.vv", Vd, Vs, Vt))
+#define vhsub_w_ur_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.ur.vx", Vd, Vs, %0) : : "r"(t))
#define vhsub_w_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.vv.m", Vd, Vs, Vt))
#define vhsub_w_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.vx.m", Vd, Vs, %0) : : "r"(t))
-#define vhsub_w_1_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.1.vv.m", Vd, Vs, Vt))
-#define vhsub_w_1_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhsub_w_r_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.r.vv.m", Vd, Vs, Vt))
+#define vhsub_w_r_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.r.vx.m", Vd, Vs, %0) : : "r"(t))
#define vhsub_w_u_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.u.vv.m", Vd, Vs, Vt))
#define vhsub_w_u_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.u.vx.m", Vd, Vs, %0) : : "r"(t))
-#define vhsub_w_u_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.u.1.vv.m", Vd, Vs, Vt))
-#define vhsub_w_u_1_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.u.1.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vhsub_w_ur_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.ur.vv.m", Vd, Vs, Vt))
+#define vhsub_w_ur_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vhsub.w.ur.vx.m", Vd, Vs, %0) : : "r"(t))
+
// 001 Logical
#define vand_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vand.vv", Vd, Vs, Vt))
#define vand_b_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vand.b.vx", Vd, Vs, %0) : : "r"(t))
@@ -656,14 +664,30 @@
#define vxor_w_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vxor.w.vx.m", Vd, Vs, %0) : : "r"(t))
#define vnot_v(Vd, Vs) __asm__ __volatile__(ARGS_F_A_A("vnot.v", Vd, Vs))
#define vnot_v_m(Vd, Vs) __asm__ __volatile__(ARGS_F_A_A("vnot.v.m", Vd, Vs))
-#define vrev_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vrev.vv", Vd, Vs, Vt))
-#define vrev_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vrev.vx", Vd, Vs, %0) : : "r"(t))
-#define vrev_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vrev.vv.m", Vd, Vs, Vt))
-#define vrev_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vrev.vx.m", Vd, Vs, %0) : : "r"(t))
-#define vror_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vror.vv", Vd, Vs, Vt))
-#define vror_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vror.vx", Vd, Vs, %0) : : "r"(t))
-#define vror_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vror.vv.m", Vd, Vs, Vt))
-#define vror_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vror.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vrev_b_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vrev.b.vv", Vd, Vs, Vt))
+#define vrev_b_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vrev.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vrev_b_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vrev.b.vv.m", Vd, Vs, Vt))
+#define vrev_b_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vrev.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vrev_h_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vrev.h.vv", Vd, Vs, Vt))
+#define vrev_h_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vrev.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vrev_h_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vrev.h.vv.m", Vd, Vs, Vt))
+#define vrev_h_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vrev.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vrev_w_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vrev.w.vv", Vd, Vs, Vt))
+#define vrev_w_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vrev.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vrev_w_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vrev.w.vv.m", Vd, Vs, Vt))
+#define vrev_w_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vrev.w.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vror_b_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vror.b.vv", Vd, Vs, Vt))
+#define vror_b_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vror.b.vx", Vd, Vs, %0) : : "r"(t))
+#define vror_b_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vror.b.vv.m", Vd, Vs, Vt))
+#define vror_b_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vror.b.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vror_h_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vror.h.vv", Vd, Vs, Vt))
+#define vror_h_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vror.h.vx", Vd, Vs, %0) : : "r"(t))
+#define vror_h_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vror.h.vv.m", Vd, Vs, Vt))
+#define vror_h_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vror.h.vx.m", Vd, Vs, %0) : : "r"(t))
+#define vror_w_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vror.w.vv", Vd, Vs, Vt))
+#define vror_w_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vror.w.vx", Vd, Vs, %0) : : "r"(t))
+#define vror_w_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vror.w.vv.m", Vd, Vs, Vt))
+#define vror_w_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vror.w.vx.m", Vd, Vs, %0) : : "r"(t))
#define vclb_b_v(Vd, Vs) __asm__ __volatile__(ARGS_F_A_A("vclb.b.v", Vd, Vs))
#define vclb_b_v_m(Vd, Vs) __asm__ __volatile__(ARGS_F_A_A("vclb.b.v.m", Vd, Vs))
#define vclb_h_v(Vd, Vs) __asm__ __volatile__(ARGS_F_A_A("vclb.h.v", Vd, Vs))
@@ -695,6 +719,7 @@
#define acset_v(Vd, Vs) __asm__ __volatile__(ARGS_F_A_A("acset.v", Vd, Vs))
#define actr_v(Vd, Vs) __asm__ __volatile__(ARGS_F_A_A("actr.v", Vd, Vs))
#define adwinit_v(Vd, Vs) __asm__ __volatile__(ARGS_F_A_A("adwinit.v", Vd, Vs))
+
// 010 Shift
#define vsll_b_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vsll.b.vv", Vd, Vs, Vt))
#define vsll_b_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vsll.b.vx", Vd, Vs, %0) : : "r"(t))
@@ -828,6 +853,7 @@
#define vsraqsu_b_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vsraqsu.b.vx.m", Vd, Vs, %0) : : "r"(t))
#define vsraqsu_b_r_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vsraqsu.b.r.vv.m", Vd, Vs, Vt))
#define vsraqsu_b_r_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vsraqsu.b.r.vx.m", Vd, Vs, %0) : : "r"(t))
+
// 011 Mul/Div
#define vmul_b_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vmul.b.vv", Vd, Vs, Vt))
#define vmul_b_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vmul.b.vx", Vd, Vs, %0) : : "r"(t))
@@ -997,31 +1023,8 @@
#define vmadd_w_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vmadd.w.vx", Vd, Vs, %0) : : "r"(t))
#define vmadd_w_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vmadd.w.vv.m", Vd, Vs, Vt))
#define vmadd_w_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vmadd.w.vx.m", Vd, Vs, %0) : : "r"(t))
+
// 110 Shuffle
-#define vsliden_b_1_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.b.1.vv", Vd, Vs, Vt))
-#define vsliden_b_1_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.b.1.vx", Vd, Vs, %0) : : "r"(t))
-#define vsliden_b_2_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.b.2.vv", Vd, Vs, Vt))
-#define vsliden_b_2_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.b.2.vx", Vd, Vs, %0) : : "r"(t))
-#define vsliden_b_3_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.b.3.vv", Vd, Vs, Vt))
-#define vsliden_b_3_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.b.3.vx", Vd, Vs, %0) : : "r"(t))
-#define vsliden_b_4_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.b.4.vv", Vd, Vs, Vt))
-#define vsliden_b_4_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.b.4.vx", Vd, Vs, %0) : : "r"(t))
-#define vsliden_h_1_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.h.1.vv", Vd, Vs, Vt))
-#define vsliden_h_1_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.h.1.vx", Vd, Vs, %0) : : "r"(t))
-#define vsliden_h_2_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.h.2.vv", Vd, Vs, Vt))
-#define vsliden_h_2_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.h.2.vx", Vd, Vs, %0) : : "r"(t))
-#define vsliden_h_3_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.h.3.vv", Vd, Vs, Vt))
-#define vsliden_h_3_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.h.3.vx", Vd, Vs, %0) : : "r"(t))
-#define vsliden_h_4_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.h.4.vv", Vd, Vs, Vt))
-#define vsliden_h_4_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.h.4.vx", Vd, Vs, %0) : : "r"(t))
-#define vsliden_w_1_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.w.1.vv", Vd, Vs, Vt))
-#define vsliden_w_1_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.w.1.vx", Vd, Vs, %0) : : "r"(t))
-#define vsliden_w_2_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.w.2.vv", Vd, Vs, Vt))
-#define vsliden_w_2_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.w.2.vx", Vd, Vs, %0) : : "r"(t))
-#define vsliden_w_3_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.w.3.vv", Vd, Vs, Vt))
-#define vsliden_w_3_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.w.3.vx", Vd, Vs, %0) : : "r"(t))
-#define vsliden_w_4_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.w.4.vv", Vd, Vs, Vt))
-#define vsliden_w_4_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vsliden.w.4.vx", Vd, Vs, %0) : : "r"(t))
#define vslidevn_b_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.b.1.vv.m", Vd, Vs, Vt))
#define vslidevn_b_1_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.b.1.vx.m", Vd, Vs, %0) : : "r"(t))
#define vslidevn_b_2_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevn.b.2.vv.m", Vd, Vs, Vt))
@@ -1070,30 +1073,6 @@
#define vslidehn_w_3_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.w.3.vx.m", Vd, Vs, %0) : : "r"(t))
#define vslidehn_w_4_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.w.4.vv.m", Vd, Vs, Vt))
#define vslidehn_w_4_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidehn.w.4.vx.m", Vd, Vs, %0) : : "r"(t))
-#define vslidep_b_1_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.b.1.vv", Vd, Vs, Vt))
-#define vslidep_b_1_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.b.1.vx", Vd, Vs, %0) : : "r"(t))
-#define vslidep_b_2_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.b.2.vv", Vd, Vs, Vt))
-#define vslidep_b_2_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.b.2.vx", Vd, Vs, %0) : : "r"(t))
-#define vslidep_b_3_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.b.3.vv", Vd, Vs, Vt))
-#define vslidep_b_3_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.b.3.vx", Vd, Vs, %0) : : "r"(t))
-#define vslidep_b_4_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.b.4.vv", Vd, Vs, Vt))
-#define vslidep_b_4_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.b.4.vx", Vd, Vs, %0) : : "r"(t))
-#define vslidep_h_1_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.h.1.vv", Vd, Vs, Vt))
-#define vslidep_h_1_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.h.1.vx", Vd, Vs, %0) : : "r"(t))
-#define vslidep_h_2_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.h.2.vv", Vd, Vs, Vt))
-#define vslidep_h_2_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.h.2.vx", Vd, Vs, %0) : : "r"(t))
-#define vslidep_h_3_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.h.3.vv", Vd, Vs, Vt))
-#define vslidep_h_3_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.h.3.vx", Vd, Vs, %0) : : "r"(t))
-#define vslidep_h_4_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.h.4.vv", Vd, Vs, Vt))
-#define vslidep_h_4_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.h.4.vx", Vd, Vs, %0) : : "r"(t))
-#define vslidep_w_1_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.w.1.vv", Vd, Vs, Vt))
-#define vslidep_w_1_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.w.1.vx", Vd, Vs, %0) : : "r"(t))
-#define vslidep_w_2_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.w.2.vv", Vd, Vs, Vt))
-#define vslidep_w_2_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.w.2.vx", Vd, Vs, %0) : : "r"(t))
-#define vslidep_w_3_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.w.3.vv", Vd, Vs, Vt))
-#define vslidep_w_3_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.w.3.vx", Vd, Vs, %0) : : "r"(t))
-#define vslidep_w_4_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.w.4.vv", Vd, Vs, Vt))
-#define vslidep_w_4_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vslidep.w.4.vx", Vd, Vs, %0) : : "r"(t))
#define vslidevp_b_1_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.b.1.vv.m", Vd, Vs, Vt))
#define vslidevp_b_1_vx_m(Vd, Vs, t)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.b.1.vx.m", Vd, Vs, %0) : : "r"(t))
#define vslidevp_b_2_vv_m(Vd, Vs, Vt)__asm__ __volatile__(ARGS_F_A_A_A("vslidevp.b.2.vv.m", Vd, Vs, Vt))
@@ -1190,7 +1169,6 @@
#define vevnodd_w_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vevnodd.w.vx", Vd, Vs, %0) : : "r"(t))
#define vevnodd_w_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vevnodd.w.vv.m", Vd, Vs, Vt))
#define vevnodd_w_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vevnodd.w.vx.m", Vd, Vs, %0) : : "r"(t))
-#define vdupg_v(Vd, Vs) __asm__ __volatile__(ARGS_F_A_A("vdupg.v", Vd, Vs))
#define vzip_b_vv(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vzip.b.vv", Vd, Vs, Vt))
#define vzip_b_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vzip.b.vx", Vd, Vs, %0) : : "r"(t))
#define vzip_b_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vzip.b.vv.m", Vd, Vs, Vt))
@@ -1203,27 +1181,10 @@
#define vzip_w_vx(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vzip.w.vx", Vd, Vs, %0) : : "r"(t))
#define vzip_w_vv_m(Vd, Vs, Vt) __asm__ __volatile__(ARGS_F_A_A_A("vzip.w.vv.m", Vd, Vs, Vt))
#define vzip_w_vx_m(Vd, Vs, t) __asm__ __volatile__(ARGS_F_A_A_A("vzip.w.vx.m", Vd, Vs, %0) : : "r"(t))
+
// 3arg
-#define vfmadd4_s_vvv(Vd, Vs, Vt, Vr)__asm__ __volatile__(ARGS_F_A_A_A_A("vfmadd4.s.vvv", Vd, Vs, Vt, Vr))
-#define vfmadd4_s_vfv(Vd, Vs, t, Vr)__asm__ __volatile__(ARGS_F_A_A_A_A("vfmadd4.s.vfv", Vd, Vs, %0, Vr) : : "r"(t))
-#define vfmadd4_s_vvv_m(Vd, Vs, Vt, Vr)__asm__ __volatile__(ARGS_F_A_A_A_A("vfmadd4.s.vvv.m", Vd, Vs, Vt, Vr))
-#define vfmadd4_s_vfv_m(Vd, Vs, t, Vr)__asm__ __volatile__(ARGS_F_A_A_A_A("vfmadd4.s.vfv.m", Vd, Vs, %0, Vr) : : "r"(t))
-#define vadd3_w_vvv(Vd, Vs, Vt, Vr) __asm__ __volatile__(ARGS_F_A_A_A_A("vadd3.w.vvv", Vd, Vs, Vt, Vr))
-#define vadd3_w_vxv(Vd, Vs, t, Vr) __asm__ __volatile__(ARGS_F_A_A_A_A("vadd3.w.vxv", Vd, Vs, %0, Vr) : : "r"(t))
-#define vadd3_w_vvv_m(Vd, Vs, Vt, Vr)__asm__ __volatile__(ARGS_F_A_A_A_A("vadd3.w.vvv.m", Vd, Vs, Vt, Vr))
-#define vadd3_w_vxv_m(Vd, Vs, t, Vr)__asm__ __volatile__(ARGS_F_A_A_A_A("vadd3.w.vxv.m", Vd, Vs, %0, Vr) : : "r"(t))
#define aconv_vxv(Vd, Vs, t, Vr) __asm__ __volatile__(ARGS_F_A_A_A_A("aconv.vxv", Vd, Vs, %0, Vr) : : "r"(t))
-#define vconv_vxv(Vd, Vs, t, Vr) __asm__ __volatile__(ARGS_F_A_A_A_A("vconv.vxv", Vd, Vs, %0, Vr) : : "r"(t))
#define adwconv_vxv(Vd, Vs, t, Vr) __asm__ __volatile__(ARGS_F_A_A_A_A("adwconv.vxv", Vd, Vs, %0, Vr) : : "r"(t))
#define vdwconv_vxv(Vd, Vs, t, Vr) __asm__ __volatile__(ARGS_F_A_A_A_A("vdwconv.vxv", Vd, Vs, %0, Vr) : : "r"(t))
-// Scalar
-#define clb_x(d, s) __asm__ __volatile__(ARGS_F_A_A("clb", %0, %1) : "=r"(d) : "r"(s))
-#define clz_x(d, s) __asm__ __volatile__(ARGS_F_A_A("clz", %0, %1) : "=r"(d) : "r"(s))
-#define ctz_x(d, s) __asm__ __volatile__(ARGS_F_A_A("ctz", %0, %1) : "=r"(d) : "r"(s))
-#define cpop_x(d, s) __asm__ __volatile__(ARGS_F_A_A("cpop", %0, %1) : "=r"(d) : "r"(s))
-#define min_xx(d, s, t) __asm__ __volatile__(ARGS_F_A_A_A("min", %0, %1, %2) : "=r"(d) : "r"(s), "r"(t))
-#define max_xx(d, s, t) __asm__ __volatile__(ARGS_F_A_A_A("max", %0, %1, %2) : "=r"(d) : "r"(s), "r"(t))
-#define minu_xx(d, s, t) __asm__ __volatile__(ARGS_F_A_A_A("minu", %0, %1, %2) : "=r"(d) : "r"(s), "r"(t))
-#define maxu_xx(d, s, t) __asm__ __volatile__(ARGS_F_A_A_A("maxu", %0, %1, %2) : "=r"(d) : "r"(s), "r"(t))
-// clang-format on
#endif // CRT_KELVIN_INTRINSICS_H_
+// clang-format on