Support vv variant for vpsub instructions

PiperOrigin-RevId: 559141082
diff --git a/sim/kelvin_arith.bin_fmt b/sim/kelvin_arith.bin_fmt
index d8ac28e..8b95dbc 100644
--- a/sim/kelvin_arith.bin_fmt
+++ b/sim/kelvin_arith.bin_fmt
@@ -421,12 +421,20 @@
   vpsub_h_v_m   : KelvinV2ArgsType : func2 == 0b00'1110, vs2 == 0, sz == 0b01, m == 0b01, func1 == 0b100, form == 0b10;
   vpsub_w_v     : KelvinV2ArgsType : func2 == 0b00'1110, vs2 == 0, sz == 0b10, m == 0b00, func1 == 0b100, form == 0b10;
   vpsub_w_v_m   : KelvinV2ArgsType : func2 == 0b00'1110, vs2 == 0, sz == 0b10, m == 0b01, func1 == 0b100, form == 0b10;
+  vpsub_h_vv     : KelvinV2ArgsType : func2 == 0b00'1110, sz == 0b01, m == 0b00, func1 == 0b100, form == 0b00;
+  vpsub_h_vv_m   : KelvinV2ArgsType : func2 == 0b00'1110, sz == 0b01, m == 0b01, func1 == 0b100, form == 0b00;
+  vpsub_w_vv     : KelvinV2ArgsType : func2 == 0b00'1110, sz == 0b10, m == 0b00, func1 == 0b100, form == 0b00;
+  vpsub_w_vv_m   : KelvinV2ArgsType : func2 == 0b00'1110, sz == 0b10, m == 0b01, func1 == 0b100, form == 0b00;
 
   //vpsubu
   vpsub_h_u_v     : KelvinV2ArgsType : func2 == 0b00'1111, vs2 == 0, sz == 0b01, m == 0b00, func1 == 0b100, form == 0b10;
   vpsub_h_u_v_m   : KelvinV2ArgsType : func2 == 0b00'1111, vs2 == 0, sz == 0b01, m == 0b01, func1 == 0b100, form == 0b10;
   vpsub_w_u_v     : KelvinV2ArgsType : func2 == 0b00'1111, vs2 == 0, sz == 0b10, m == 0b00, func1 == 0b100, form == 0b10;
   vpsub_w_u_v_m   : KelvinV2ArgsType : func2 == 0b00'1111, vs2 == 0, sz == 0b10, m == 0b01, func1 == 0b100, form == 0b10;
+  vpsub_h_u_vv     : KelvinV2ArgsType : func2 == 0b00'1111, sz == 0b01, m == 0b00, func1 == 0b100, form == 0b00;
+  vpsub_h_u_vv_m   : KelvinV2ArgsType : func2 == 0b00'1111, sz == 0b01, m == 0b01, func1 == 0b100, form == 0b00;
+  vpsub_w_u_vv     : KelvinV2ArgsType : func2 == 0b00'1111, sz == 0b10, m == 0b00, func1 == 0b100, form == 0b00;
+  vpsub_w_u_vv_m   : KelvinV2ArgsType : func2 == 0b00'1111, sz == 0b10, m == 0b01, func1 == 0b100, form == 0b00;
 
   //vhadd
   vhadd_b_vv     : KelvinV2ArgsType : func2 == 0b01'0000, sz == 0b00, m == 0b00, func1 == 0b100, form == 0b00;
diff --git a/sim/kelvin_arith.isa b/sim/kelvin_arith.isa
index d245f79..8faf3e7 100644
--- a/sim/kelvin_arith.isa
+++ b/sim/kelvin_arith.isa
@@ -1130,6 +1130,18 @@
     vpsub_w_v_m{: vs1 : vd},
       disasm: "vpsub.w.v.m", "%vd, %vs1",
       semfunc: "absl::bind_front(&KelvinVPsub<int32_t, int16_t>, /*strip_mine*/ true)";
+    vpsub_h_vv{: vs1, vs2 : vd},
+      disasm: "vpsub.h.vv", "%vd, %vs1, %vs2",
+      semfunc: "absl::bind_front(&KelvinVPsub<int16_t, int8_t>, /*strip_mine*/ false)";
+    vpsub_h_vv_m{: vs1, vs2 : vd},
+      disasm: "vpsub.h.vv.m", "%vd, %vs1, %vs2",
+      semfunc: "absl::bind_front(&KelvinVPsub<int16_t, int8_t>, /*strip_mine*/ true)";
+    vpsub_w_vv{: vs1, vs2 : vd},
+      disasm: "vpsub.w.vv", "%vd, %vs1, %vs2",
+      semfunc: "absl::bind_front(&KelvinVPsub<int32_t, int16_t>, /*strip_mine*/ false)";
+    vpsub_w_vv_m{: vs1, vs2 : vd},
+      disasm: "vpsub.w.vv.m", "%vd, %vs1, %vs2",
+      semfunc: "absl::bind_front(&KelvinVPsub<int32_t, int16_t>, /*strip_mine*/ true)";
 
     //vpsubu
     vpsub_h_u_v{: vs1 : vd},
@@ -1144,6 +1156,18 @@
     vpsub_w_u_v_m{: vs1 : vd},
       disasm: "vpsub.w.u.v.m", "%vd, %vs1",
       semfunc: "absl::bind_front(&KelvinVPsub<uint32_t, uint16_t>, /*strip_mine*/ true)";
+    vpsub_h_u_vv{: vs1, vs2 : vd},
+      disasm: "vpsub.h.u.vv", "%vd, %vs1, %vs2",
+      semfunc: "absl::bind_front(&KelvinVPsub<uint16_t, uint8_t>, /*strip_mine*/ false)";
+    vpsub_h_u_vv_m{: vs1, vs2 : vd},
+      disasm: "vpsub.h.u.vv.m", "%vd, %vs1, %vs2",
+      semfunc: "absl::bind_front(&KelvinVPsub<uint16_t, uint8_t>, /*strip_mine*/ true)";
+    vpsub_w_u_vv{: vs1, vs2 : vd},
+      disasm: "vpsub.w.u.vv", "%vd, %vs1, %vs2",
+      semfunc: "absl::bind_front(&KelvinVPsub<uint32_t, uint16_t>, /*strip_mine*/ false)";
+    vpsub_w_u_vv_m{: vs1, vs2 : vd},
+      disasm: "vpsub.w.u.vv.m", "%vd, %vs1, %vs2",
+      semfunc: "absl::bind_front(&KelvinVPsub<uint32_t, uint16_t>, /*strip_mine*/ true)";
 
     //vhadd
     vhadd_b_vv{: vs1, vs2 : vd},
diff --git a/sim/kelvin_encoding.cc b/sim/kelvin_encoding.cc
index f3a9f54..21dbce1 100644
--- a/sim/kelvin_encoding.cc
+++ b/sim/kelvin_encoding.cc
@@ -388,8 +388,9 @@
     return true;
   }
 
-  // Func1 VPadd with ".vv" form needs 2x destination registers.
-  if ((func1 == 0b100) && (func2_ignore_unsigned == 0b001100)) {
+  // Func1 VPadd and VPsub with ".vv" form needs 2x destination registers.
+  if ((func1 == 0b100) && (func2_ignore_unsigned == 0b001100 ||
+                           func2_ignore_unsigned == 0b001110)) {
     auto form = encoding::kelvin_v2_args_type::ExtractForm(inst_word_);
     if (form == 0b00) {
       return true;
diff --git a/sim/kelvin_vector_instructions.cc b/sim/kelvin_vector_instructions.cc
index b18f44a..d914bed 100644
--- a/sim/kelvin_vector_instructions.cc
+++ b/sim/kelvin_vector_instructions.cc
@@ -527,13 +527,24 @@
 template <typename Td, typename Ts>
 void KelvinVPsub(bool strip_mine, Instruction *inst) {
   // Subtracts lane pairs.
-  KelvinBinaryVectorOp<true /* halftype */, false /* widen_dst */, Td, Ts, Ts>(
-      inst, false /* scalar */, strip_mine,
-      std::function<Td(Ts, Ts)>([](Ts vs1, Ts vs2) -> Td {
-        return static_cast<Td>(vs1) - static_cast<Td>(vs2);
-      }),
-      SourceArgGetter<Ts, Td, Ts, Ts>(PackedBinaryOpGetArg1<Td, Ts, Ts>),
-      SourceArgGetter<Ts, Td, Ts, Ts>(PackedBinaryOpGetArg2<Td, Ts, Ts>));
+  if (inst->SourcesSize() == 2) {  // .vv
+    KelvinBinaryVectorOp<true /* halftype */, true /* widen_dst */, Td, Ts, Ts>(
+        inst, false /* scalar */, strip_mine,
+        std::function<Td(Ts, Ts)>([](Ts vs1, Ts vs2) -> Td {
+          return static_cast<Td>(vs1) - static_cast<Td>(vs2);
+        }),
+        SourceArgGetter<Ts, Td, Ts, Ts>(PackedBinaryOpGetArg1<Td, Ts, Ts>),
+        SourceArgGetter<Ts, Td, Ts, Ts>(PackedBinaryOpGetArg2<Td, Ts, Ts>));
+  } else {
+    KelvinBinaryVectorOp<true /* halftype */, false /* widen_dst */, Td, Ts,
+                         Ts>(
+        inst, false /* scalar */, strip_mine,
+        std::function<Td(Ts, Ts)>([](Ts vs1, Ts vs2) -> Td {
+          return static_cast<Td>(vs1) - static_cast<Td>(vs2);
+        }),
+        SourceArgGetter<Ts, Td, Ts, Ts>(PackedBinaryOpGetArg1<Td, Ts, Ts>),
+        SourceArgGetter<Ts, Td, Ts, Ts>(PackedBinaryOpGetArg2<Td, Ts, Ts>));
+  }
 }
 template void KelvinVPsub<int16_t, int8_t>(bool, Instruction *);
 template void KelvinVPsub<int32_t, int16_t>(bool, Instruction *);
diff --git a/sim/test/kelvin_encoding_test.cc b/sim/test/kelvin_encoding_test.cc
index d962321..b7fa6c3 100644
--- a/sim/test/kelvin_encoding_test.cc
+++ b/sim/test/kelvin_encoding_test.cc
@@ -473,6 +473,20 @@
   EXPECT_EQ(v_dest->size(), 2);
   delete v_dest;
 
+  // Test vpadd.w.vv
+  constexpr uint32_t kVPAdd = 0b001100'000001'000000'10'001000'0'100'00;
+  v_dest = EncodeOpHelper<RV32VectorDestOperand>(kVPAdd, OpcodeEnum::kVpaddWVv,
+                                                 DestOpEnum::kVd);
+  EXPECT_EQ(v_dest->size(), 2);
+  delete v_dest;
+
+  // Test vpsub.h.u.vv
+  constexpr uint32_t kVPSub = 0b001111'000001'000000'01'001000'0'100'00;
+  v_dest = EncodeOpHelper<RV32VectorDestOperand>(kVPSub, OpcodeEnum::kVpsubHUVv,
+                                                 DestOpEnum::kVd);
+  EXPECT_EQ(v_dest->size(), 2);
+  delete v_dest;
+
   // Test vzip.h.vv
   v_dest = EncodeOpHelper<RV32VectorDestOperand>(
       SetSz(kVEvnoddBase, 0b1) | (0b100 << 26 /* vzip */), OpcodeEnum::kVzipHVv,
diff --git a/sim/test/kelvin_vector_instructions_test.cc b/sim/test/kelvin_vector_instructions_test.cc
index 5abf01d..5c75baa 100644
--- a/sim/test/kelvin_vector_instructions_test.cc
+++ b/sim/test/kelvin_vector_instructions_test.cc
@@ -715,6 +715,7 @@
   static void KelvinOp(bool strip_mine, Instruction *inst) {
     KelvinVPsub<Vd, Vs2>(strip_mine, inst);
   }
+  static constexpr auto kArgsGetter = PairwiseOpArgsGetter<Vs1>;
 };
 
 TEST_F(KelvinVectorInstructionsTest, VPsub) {
@@ -727,6 +728,16 @@
                                      uint32_t, uint16_t, uint16_t>("VPsubOp");
 }
 
+TEST_F(KelvinVectorInstructionsTest, VPsubVV) {
+  KelvinPairwiseVectorBinaryOpHelper<VPsubOp, int16_t, int8_t, int8_t, int32_t,
+                                     int16_t, int16_t>("VPsubOp");
+}
+
+TEST_F(KelvinVectorInstructionsTest, VPsubuVV) {
+  KelvinPairwiseVectorBinaryOpHelper<VPsubOp, uint16_t, uint8_t, uint8_t,
+                                     uint32_t, uint16_t, uint16_t>("VPsubOp");
+}
+
 // Vector halving addition.
 template <typename Vd, typename Vs1, typename Vs2>
 struct VHaddOp {