Support vv variant for vpadd instructions

PiperOrigin-RevId: 558925623
diff --git a/sim/kelvin_arith.bin_fmt b/sim/kelvin_arith.bin_fmt
index b679386..d8ac28e 100644
--- a/sim/kelvin_arith.bin_fmt
+++ b/sim/kelvin_arith.bin_fmt
@@ -401,12 +401,20 @@
   vpadd_h_v_m   : KelvinV2ArgsType : func2 == 0b00'1100, vs2 == 0, sz == 0b01, m == 0b01, func1 == 0b100, form == 0b10;
   vpadd_w_v     : KelvinV2ArgsType : func2 == 0b00'1100, vs2 == 0, sz == 0b10, m == 0b00, func1 == 0b100, form == 0b10;
   vpadd_w_v_m   : KelvinV2ArgsType : func2 == 0b00'1100, vs2 == 0, sz == 0b10, m == 0b01, func1 == 0b100, form == 0b10;
+  vpadd_h_vv     : KelvinV2ArgsType : func2 == 0b00'1100, sz == 0b01, m == 0b00, func1 == 0b100, form == 0b00;
+  vpadd_h_vv_m   : KelvinV2ArgsType : func2 == 0b00'1100, sz == 0b01, m == 0b01, func1 == 0b100, form == 0b00;
+  vpadd_w_vv     : KelvinV2ArgsType : func2 == 0b00'1100, sz == 0b10, m == 0b00, func1 == 0b100, form == 0b00;
+  vpadd_w_vv_m   : KelvinV2ArgsType : func2 == 0b00'1100, sz == 0b10, m == 0b01, func1 == 0b100, form == 0b00;
 
   //vpaddu
   vpadd_h_u_v     : KelvinV2ArgsType : func2 == 0b00'1101, vs2 == 0, sz == 0b01, m == 0b00, func1 == 0b100, form == 0b10;
   vpadd_h_u_v_m   : KelvinV2ArgsType : func2 == 0b00'1101, vs2 == 0, sz == 0b01, m == 0b01, func1 == 0b100, form == 0b10;
   vpadd_w_u_v     : KelvinV2ArgsType : func2 == 0b00'1101, vs2 == 0, sz == 0b10, m == 0b00, func1 == 0b100, form == 0b10;
   vpadd_w_u_v_m   : KelvinV2ArgsType : func2 == 0b00'1101, vs2 == 0, sz == 0b10, m == 0b01, func1 == 0b100, form == 0b10;
+  vpadd_h_u_vv     : KelvinV2ArgsType : func2 == 0b00'1101, sz == 0b01, m == 0b00, func1 == 0b100, form == 0b00;
+  vpadd_h_u_vv_m   : KelvinV2ArgsType : func2 == 0b00'1101, sz == 0b01, m == 0b01, func1 == 0b100, form == 0b00;
+  vpadd_w_u_vv     : KelvinV2ArgsType : func2 == 0b00'1101, sz == 0b10, m == 0b00, func1 == 0b100, form == 0b00;
+  vpadd_w_u_vv_m   : KelvinV2ArgsType : func2 == 0b00'1101, sz == 0b10, m == 0b01, func1 == 0b100, form == 0b00;
 
   //vpsub
   vpsub_h_v     : KelvinV2ArgsType : func2 == 0b00'1110, vs2 == 0, sz == 0b01, m == 0b00, func1 == 0b100, form == 0b10;
diff --git a/sim/kelvin_arith.isa b/sim/kelvin_arith.isa
index 0b4ad8f..d245f79 100644
--- a/sim/kelvin_arith.isa
+++ b/sim/kelvin_arith.isa
@@ -1078,6 +1078,18 @@
     vpadd_w_v_m{: vs1 : vd},
       disasm: "vpadd.w.v.m", "%vd, %vs1",
       semfunc: "absl::bind_front(&KelvinVPadd<int32_t, int16_t>, /*strip_mine*/ true)";
+    vpadd_h_vv{: vs1, vs2 : vd},
+      disasm: "vpadd.h.vv", "%vd, %vs1, %vs2",
+      semfunc: "absl::bind_front(&KelvinVPadd<int16_t, int8_t>, /*strip_mine*/ false)";
+    vpadd_h_vv_m{: vs1, vs2 : vd},
+      disasm: "vpadd.h.vv.m", "%vd, %vs1, %vs2",
+      semfunc: "absl::bind_front(&KelvinVPadd<int16_t, int8_t>, /*strip_mine*/ true)";
+    vpadd_w_vv{: vs1, vs2 : vd},
+      disasm: "vpadd.w.vv", "%vd, %vs1, %vs2",
+      semfunc: "absl::bind_front(&KelvinVPadd<int32_t, int16_t>, /*strip_mine*/ false)";
+    vpadd_w_vv_m{: vs1, vs2 : vd},
+      disasm: "vpadd.w.vv.m", "%vd, %vs1, %vs2",
+      semfunc: "absl::bind_front(&KelvinVPadd<int32_t, int16_t>, /*strip_mine*/ true)";
 
    //vpaddu
     vpadd_h_u_v{: vs1 : vd},
@@ -1092,6 +1104,18 @@
     vpadd_w_u_v_m{: vs1 : vd},
       disasm: "vpadd.w.u.v.m", "%vd, %vs1",
       semfunc: "absl::bind_front(&KelvinVPadd<uint32_t, uint16_t>, /*strip_mine*/ true)";
+    vpadd_h_u_vv{: vs1, vs2 : vd},
+      disasm: "vpadd.h.u.vv", "%vd, %vs1, %vs2",
+      semfunc: "absl::bind_front(&KelvinVPadd<uint16_t, uint8_t>, /*strip_mine*/ false)";
+    vpadd_h_u_vv_m{: vs1, vs2 : vd},
+      disasm: "vpadd.h.u.vv.m", "%vd, %vs1, %vs2",
+      semfunc: "absl::bind_front(&KelvinVPadd<uint16_t, uint8_t>, /*strip_mine*/ true)";
+    vpadd_w_u_vv{: vs1, vs2 : vd},
+      disasm: "vpadd.w.u.vv", "%vd, %vs1, %vs2",
+      semfunc: "absl::bind_front(&KelvinVPadd<uint32_t, uint16_t>, /*strip_mine*/ false)";
+    vpadd_w_u_vv_m{: vs1, vs2 : vd},
+      disasm: "vpadd.w.u.vv.m", "%vd, %vs1, %vs2",
+      semfunc: "absl::bind_front(&KelvinVPadd<uint32_t, uint16_t>, /*strip_mine*/ true)";
 
    //vpsub
     vpsub_h_v{: vs1 : vd},
diff --git a/sim/kelvin_encoding.cc b/sim/kelvin_encoding.cc
index 6f4d1dc..f3a9f54 100644
--- a/sim/kelvin_encoding.cc
+++ b/sim/kelvin_encoding.cc
@@ -384,10 +384,18 @@
   }
 
   // Func1 0b110 VEvnodd and VZip needs 2x destination registers.
-  if ((func1 == 0b110) && (func2 == 0b011010 || func2 == 0b011100)) {
+  if ((func1 == 0b110) && (func2 == 0b011100 || func2 == 0b011010)) {
     return true;
   }
 
+  // Func1 VPadd with ".vv" form needs 2x destination registers.
+  if ((func1 == 0b100) && (func2_ignore_unsigned == 0b001100)) {
+    auto form = encoding::kelvin_v2_args_type::ExtractForm(inst_word_);
+    if (form == 0b00) {
+      return true;
+    }
+  }
+
   return false;
 }
 
diff --git a/sim/kelvin_vector_instructions.cc b/sim/kelvin_vector_instructions.cc
index c979a41..b18f44a 100644
--- a/sim/kelvin_vector_instructions.cc
+++ b/sim/kelvin_vector_instructions.cc
@@ -482,7 +482,7 @@
   auto elts_per_register = vector_size_in_bytes / sizeof(Vs1);
   auto src_element_index = op_index * elts_per_register +
                            dst_element_index * sizeof(Vd) / sizeof(Vs1);
-  return GetInstructionSource<Vs1>(inst, 0, src_element_index);
+  return GetInstructionSource<Vs1>(inst, dst_reg_index, src_element_index);
 }
 
 template <typename Vd, typename Vs1, typename Vs2>
@@ -494,19 +494,30 @@
   auto elts_per_register = vector_size_in_bytes / sizeof(Vs2);
   auto src_element_index = op_index * elts_per_register +
                            dst_element_index * sizeof(Vd) / sizeof(Vs2) + 1;
-  return GetInstructionSource<Vs2>(inst, 0, src_element_index);
+  return GetInstructionSource<Vs2>(inst, dst_reg_index, src_element_index);
 }
 
 template <typename Td, typename Ts>
 void KelvinVPadd(bool strip_mine, Instruction *inst) {
   // Adds lane pairs.
-  KelvinBinaryVectorOp<true /* halftype */, false /* widen_dst */, Td, Ts, Ts>(
-      inst, false /* scalar */, strip_mine,
-      std::function<Td(Ts, Ts)>([](Ts vs1, Ts vs2) -> Td {
-        return static_cast<Td>(vs1) + static_cast<Td>(vs2);
-      }),
-      SourceArgGetter<Ts, Td, Ts, Ts>(PackedBinaryOpGetArg1<Td, Ts, Ts>),
-      SourceArgGetter<Ts, Td, Ts, Ts>(PackedBinaryOpGetArg2<Td, Ts, Ts>));
+  if (inst->SourcesSize() == 2) {  // .vv
+    KelvinBinaryVectorOp<true /* halftype */, true /* widen_dst */, Td, Ts, Ts>(
+        inst, false /* scalar */, strip_mine,
+        std::function<Td(Ts, Ts)>([](Ts vs1, Ts vs2) -> Td {
+          return static_cast<Td>(vs1) + static_cast<Td>(vs2);
+        }),
+        SourceArgGetter<Ts, Td, Ts, Ts>(PackedBinaryOpGetArg1<Td, Ts, Ts>),
+        SourceArgGetter<Ts, Td, Ts, Ts>(PackedBinaryOpGetArg2<Td, Ts, Ts>));
+  } else {
+    KelvinBinaryVectorOp<true /* halftype */, false /* widen_dst */, Td, Ts,
+                         Ts>(
+        inst, false /* scalar */, strip_mine,
+        std::function<Td(Ts, Ts)>([](Ts vs1, Ts vs2) -> Td {
+          return static_cast<Td>(vs1) + static_cast<Td>(vs2);
+        }),
+        SourceArgGetter<Ts, Td, Ts, Ts>(PackedBinaryOpGetArg1<Td, Ts, Ts>),
+        SourceArgGetter<Ts, Td, Ts, Ts>(PackedBinaryOpGetArg2<Td, Ts, Ts>));
+  }
 }
 template void KelvinVPadd<int16_t, int8_t>(bool, Instruction *);
 template void KelvinVPadd<int32_t, int16_t>(bool, Instruction *);
diff --git a/sim/test/kelvin_vector_instructions_test.cc b/sim/test/kelvin_vector_instructions_test.cc
index ac1b1d0..5abf01d 100644
--- a/sim/test/kelvin_vector_instructions_test.cc
+++ b/sim/test/kelvin_vector_instructions_test.cc
@@ -164,6 +164,31 @@
 
   template <template <typename, typename, typename> class F, typename TD,
             typename TS1, typename TS2>
+  void KelvinPairwiseVectorBinaryOpHelper(absl::string_view name) {
+    const auto name_with_type = absl::StrCat(name, KelvinTestTypeSuffix<TD>());
+
+    // Vector OP single vector.
+    BinaryOpTestHelper<TD, TS1, TS2>(
+        absl::bind_front(F<TD, TS1, TS2>::KelvinOp, kNonStripmine),
+        absl::StrCat(name_with_type, "VV"), kNonScalar, kNonStripmine,
+        F<TD, TS1, TS2>::Op, F<TD, TS1, TS2>::kArgsGetter, false, false, true);
+
+    // Vector OP single vector stripmined.
+    BinaryOpTestHelper<TD, TS1, TS2>(
+        absl::bind_front(F<TD, TS1, TS2>::KelvinOp, kIsStripmine),
+        absl::StrCat(name_with_type, "VVM"), kNonScalar, kIsStripmine,
+        F<TD, TS1, TS2>::Op, F<TD, TS1, TS2>::kArgsGetter, false, false, true);
+  }
+
+  template <template <typename, typename, typename> class F, typename TD,
+            typename TS1, typename TS2, typename TNext1, typename... TNext>
+  void KelvinPairwiseVectorBinaryOpHelper(absl::string_view name) {
+    KelvinPairwiseVectorBinaryOpHelper<F, TD, TS1, TS2>(name);
+    KelvinPairwiseVectorBinaryOpHelper<F, TNext1, TNext...>(name);
+  }
+
+  template <template <typename, typename, typename> class F, typename TD,
+            typename TS1, typename TS2>
   void KelvinVectorVXBinaryOpHelper(absl::string_view name) {
     const auto name_with_type = absl::StrCat(name, KelvinTestTypeSuffix<TD>());
 
@@ -633,6 +658,22 @@
                              uint32_t, uint16_t>("VAccuOp");
 }
 
+// Selects pairs from register
+template <typename T>
+static std::pair<T, T> PairwiseOpArgsGetter(
+    int num_ops, int op_num, int dest_reg_sub_index, int element_index,
+    int vd_size, bool widen_dst, int src1_widen_factor, int vs1_size,
+    const std::vector<T> &vs1_value, int vs2_size, bool s2_scalar,
+    const std::vector<T> &vs2_value, T rs2_value, bool halftype_op,
+    bool vmvp_op) {
+  int start_index = (op_num * vs1_size) + (2 * element_index);
+  if (dest_reg_sub_index == 0) {
+    return {vs1_value[start_index], vs1_value[start_index + 1]};
+  }
+
+  return {vs2_value[start_index], vs2_value[start_index + 1]};
+}
+
 // Vector packed add
 template <typename Vd, typename Vs1, typename Vs2>
 struct VPaddOp {
@@ -642,6 +683,7 @@
   static void KelvinOp(bool strip_mine, Instruction *inst) {
     KelvinVPadd<Vd, Vs2>(strip_mine, inst);
   }
+  static constexpr auto kArgsGetter = PairwiseOpArgsGetter<Vs1>;
 };
 
 TEST_F(KelvinVectorInstructionsTest, VPadd) {
@@ -654,6 +696,16 @@
                                      uint32_t, uint16_t, uint16_t>("VPaddOp");
 }
 
+TEST_F(KelvinVectorInstructionsTest, VPaddVV) {
+  KelvinPairwiseVectorBinaryOpHelper<VPaddOp, int16_t, int8_t, int8_t, int32_t,
+                                     int16_t, int16_t>("VPaddVVOp");
+}
+
+TEST_F(KelvinVectorInstructionsTest, VPadduVV) {
+  KelvinPairwiseVectorBinaryOpHelper<VPaddOp, uint16_t, uint8_t, uint8_t,
+                                     uint32_t, uint16_t, uint16_t>("VPaddVVOp");
+}
+
 // Vector packed sub
 template <typename Vd, typename Vs1, typename Vs2>
 struct VPsubOp {