Add depthwise convolution init instruction

`adwinit` for depthwise convolution register preparation.

PiperOrigin-RevId: 556853240
diff --git a/sim/kelvin_format.bin_fmt b/sim/kelvin_format.bin_fmt
index 812187c..e84324e 100644
--- a/sim/kelvin_format.bin_fmt
+++ b/sim/kelvin_format.bin_fmt
@@ -114,4 +114,6 @@
     unsigned form[2]; // .vv==0b00, .vx==0b10, .xx==0b11
   overlays:  // For accumulation register support.
     unsigned vs1_low3[3] = vs1[2..0];
+    unsigned vs1_low2[2] = vs1[1..0];
+    unsigned vd_low2[2] = vd[1..0];
 };
diff --git a/sim/kelvin_memory.bin_fmt b/sim/kelvin_memory.bin_fmt
index 0776b69..37b8154 100644
--- a/sim/kelvin_memory.bin_fmt
+++ b/sim/kelvin_memory.bin_fmt
@@ -116,7 +116,8 @@
   // vcget
   vcget           : KelvinV2ArgsType : func2 == 0b01'0100, vs2 == 0, vs1 == 0, vd == 48, func1 == 0b111, form == 0b11;
 
-  // acset / actr
+  // acset / actr / adwinit
   acset           : KelvinV2ArgsType : func2 == 0b01'0000, vs2 == 0, m == 0, vd == 48;
   actr            : KelvinV2ArgsType : func2 == 0b01'0001, vs2 == 0, vs1_low3 == 0, m == 0, vd == 48;
+  adwinit         : KelvinV2ArgsType : func2 == 0b01'0010, vs2 == 0, vs1_low2 == 0, sz == 0b00, m == 0, vd_low2 == 0;
 };
diff --git a/sim/kelvin_memory.isa b/sim/kelvin_memory.isa
index 51e6119..88a380e 100644
--- a/sim/kelvin_memory.isa
+++ b/sim/kelvin_memory.isa
@@ -487,12 +487,15 @@
       disasm: "vcget", "%vd",
       semfunc: "absl::bind_front(&KelvinVcGet)";
 
-    // acset/actr
+    // acset/actr/adwinit
     acset{: vs1 : vd},
       disasm: "acset.v", "%vd, %vs1",
       semfunc: "absl::bind_front(&KelvinAcSet, /* is_transpose */ false)";
     actr{: vs1 : vd},
       disasm: "actr.v", "%vd, %vs1",
       semfunc: "absl::bind_front(&KelvinAcSet, /* is_transpose */ true)";
+    adwinit{: vs1 : vd},
+      disasm: "adwinit.v", "%vd, %vs1",
+      semfunc: "&KelvinADwInit";
   }
 }
diff --git a/sim/kelvin_vector_memory_instructions.cc b/sim/kelvin_vector_memory_instructions.cc
index 9385359..19f3d42 100644
--- a/sim/kelvin_vector_memory_instructions.cc
+++ b/sim/kelvin_vector_memory_instructions.cc
@@ -318,4 +318,26 @@
   }
 }
 
+// Copy the content from the source `vs1` banks to the `vd` banks to prepare the
+// depthwise convolution. Due to compiler encoding, this op is typeless and only
+// assumes `vs1` and `vd` content in 8-bit type.
+void KelvinADwInit(const mpact::sim::generic::Instruction *inst) {
+  auto *state = static_cast<KelvinState *>(inst->state());
+  // Only set a quarter of the to prepare for double-widening in depth-wise
+  // convolution.
+  const uint32_t init_n = state->vector_length() / (8 * 4);
+  constexpr int kInitSize = 4;
+  auto vs = static_cast<RV32VectorSourceOperand *>(inst->Source(0));
+  auto vd = static_cast<RV32VectorDestinationOperand *>(inst->Destination(0));
+  for (int op_index = 0; op_index < kInitSize; ++op_index) {
+    auto source_span = vs->GetRegister(op_index)->data_buffer()->Get<uint8_t>();
+    DataBuffer *dest_db = vd->AllocateDataBuffer(op_index);
+    absl::Span<uint8_t> dest_span = dest_db->Get<uint8_t>();
+    for (int i = 0; i < init_n; ++i) {
+      dest_span[i] = source_span[i];
+    }
+    dest_db->Submit();
+  }
+}
+
 }  // namespace kelvin::sim
diff --git a/sim/kelvin_vector_memory_instructions.h b/sim/kelvin_vector_memory_instructions.h
index 9b141aa..ff53da4 100644
--- a/sim/kelvin_vector_memory_instructions.h
+++ b/sim/kelvin_vector_memory_instructions.h
@@ -30,6 +30,8 @@
 void KelvinAcSet(bool is_transpose,
                  const mpact::sim::generic::Instruction *inst);
 
+void KelvinADwInit(const mpact::sim::generic::Instruction *inst);
+
 }  // namespace kelvin::sim
 
 #endif  // SIM_KELVIN_VECTOR_MEMORY_INSTRUCTIONS_H_
diff --git a/sim/test/kelvin_vector_memory_instructions_test.cc b/sim/test/kelvin_vector_memory_instructions_test.cc
index 53d7a89..3445d6f 100644
--- a/sim/test/kelvin_vector_memory_instructions_test.cc
+++ b/sim/test/kelvin_vector_memory_instructions_test.cc
@@ -24,6 +24,7 @@
 
 // Semantic functions.
 using kelvin::sim::KelvinAcSet;
+using kelvin::sim::KelvinADwInit;
 using kelvin::sim::KelvinGetVl;
 using kelvin::sim::KelvinVcGet;
 using kelvin::sim::KelvinVLd;
@@ -453,7 +454,7 @@
       }
     }
   }
-  void AcSetTestHelper(bool is_transpose, bool expected_fail = false) {
+  void AcSetTestHelper(bool is_transpose) {
     constexpr int kVd = 48;
     constexpr int kVs = 16;
     const uint32_t kVLenInWord = state_->vector_length() / 32;
@@ -509,4 +510,46 @@
   AcSetTestHelper(/*is_transpose=*/true);
 }
 
+TEST_F(KelvinAccumulateInstructionTest, ADwInit) {
+  constexpr int kVd = 16;
+  constexpr int kVs = 32;
+  const uint32_t kVLenInByte = state_->vector_length() / 8;
+  constexpr int kInitLength = 4;
+  // Set vs and vd with random values.
+  std::vector<uint8_t> vs_value(kVLenInByte * kInitLength);
+  auto vs_span = absl::Span<uint8_t>(vs_value);
+  FillArrayWithRandomValues<uint8_t>(vs_span);
+  std::vector<uint8_t> vd_value(kVLenInByte * kInitLength);
+  auto vd_span = absl::Span<uint8_t>(vd_value);
+  FillArrayWithRandomValues<uint8_t>(vd_span);
+  for (int i = 0; i < kInitLength; ++i) {
+    auto vd_name = absl::StrCat("v", kVd + i);
+    auto vs_name = absl::StrCat("v", kVs + i);
+    SetVectorRegisterValues<uint8_t>(
+        {{vs_name, vs_span.subspan(kVLenInByte * i, kVLenInByte)},
+         {vd_name, vd_span.subspan(kVLenInByte * i, kVLenInByte)}});
+  }
+  auto instruction = CreateInstruction();
+  AppendVectorRegisterOperands(instruction.get(), kVLenInByte,
+                               1 /* src1_widen_factor */, kVs, {},
+                               false /* widen_dst */, {kVd});
+  instruction->set_semantic_function(&KelvinADwInit);
+  instruction->Execute();
+  // Resulting `vd` should match `vs` in the first quarter of each vector
+  for (int i = 0; i < kInitLength; ++i) {
+    auto vreg_num = kVd + i;
+    auto test_vreg = vreg_[vreg_num];
+    auto vreg_span = test_vreg->data_buffer()->Get<uint8_t>();
+    auto vref_num = kVs + i;
+    auto ref_vreg = vreg_[vref_num];
+    auto ref_span = ref_vreg->data_buffer()->Get<uint8_t>();
+    for (int element_index = 0; element_index < ref_span.size() / 4;
+         element_index++) {
+      EXPECT_EQ(vreg_span[element_index], ref_span[element_index])
+          << absl::StrCat("vreg[", vreg_num, "][", element_index, "] != ref[",
+                          vref_num, "][", element_index, "]");
+    }
+  }
+}
+
 }  // namespace