Add convolution accumulation register support

Kelvin has a set of instructions specifically for convolution and depth-wise convolution, and it has special accumulation register to support it.

Add the instructions to update the accumulation register.

PiperOrigin-RevId: 555295471
diff --git a/sim/kelvin_format.bin_fmt b/sim/kelvin_format.bin_fmt
index 17f2114..812187c 100644
--- a/sim/kelvin_format.bin_fmt
+++ b/sim/kelvin_format.bin_fmt
@@ -112,4 +112,6 @@
     unsigned m[1];
     unsigned func1[3];
     unsigned form[2]; // .vv==0b00, .vx==0b10, .xx==0b11
+  overlays:  // For accumulation register support.
+    unsigned vs1_low3[3] = vs1[2..0];
 };
diff --git a/sim/kelvin_memory.bin_fmt b/sim/kelvin_memory.bin_fmt
index 626d03f..0776b69 100644
--- a/sim/kelvin_memory.bin_fmt
+++ b/sim/kelvin_memory.bin_fmt
@@ -112,4 +112,11 @@
   vstq_b_sp_xx_m  : KelvinV2ArgsType : func2 == 0b01'1110, vs2 != 0, sz == 0b00, m == 0b01, func1 == 0b111, form == 0b11;
   vstq_h_sp_xx_m  : KelvinV2ArgsType : func2 == 0b01'1110, vs2 != 0, sz == 0b01, m == 0b01, func1 == 0b111, form == 0b11;
   vstq_w_sp_xx_m  : KelvinV2ArgsType : func2 == 0b01'1110, vs2 != 0, sz == 0b10, m == 0b01, func1 == 0b111, form == 0b11;
+
+  // vcget
+  vcget           : KelvinV2ArgsType : func2 == 0b01'0100, vs2 == 0, vs1 == 0, vd == 48, func1 == 0b111, form == 0b11;
+
+  // acset / actr
+  acset           : KelvinV2ArgsType : func2 == 0b01'0000, vs2 == 0, m == 0, vd == 48;
+  actr            : KelvinV2ArgsType : func2 == 0b01'0001, vs2 == 0, vs1_low3 == 0, m == 0, vd == 48;
 };
diff --git a/sim/kelvin_memory.isa b/sim/kelvin_memory.isa
index 5ecb15a..b09fc13 100644
--- a/sim/kelvin_memory.isa
+++ b/sim/kelvin_memory.isa
@@ -481,5 +481,18 @@
     vstq_w_sp_xx_m{: vd, vs1, vs2 : vs1},
     disasm: "vstq.w.s.xx.m", "%vd, %vs1, %vs2",
     semfunc: "absl::bind_front(&KelvinVStQ<int32_t>, /*strip_mine*/ true)";
+
+    // vcget
+    vcget{: : vd},
+      disasm: "vcget", "%vd",
+      semfunc: "absl::bind_front(&KelvinVcGet)";
+
+    // acset/actr
+    acset{: vs1 : vd},
+      disasm: "acset.v", "%vd, %vs1",
+      semfunc: "absl::bind_front(&KelvinAcSet, /* is_transpose */ false)";
+    actr{: vs1 : vd},
+      disasm: "actr.v", "%vd, %vs1",
+      semfunc: "absl::bind_front(&KelvinAcSet, /* is_transpose */ true)";
   }
 }
diff --git a/sim/kelvin_state.cc b/sim/kelvin_state.cc
index 4aa02e2..2b1dbbc 100644
--- a/sim/kelvin_state.cc
+++ b/sim/kelvin_state.cc
@@ -19,6 +19,9 @@
     mpact::sim::util::AtomicMemoryOpInterface *atomic_memory)
     : mpact::sim::riscv::RiscVState(id, xlen, memory, atomic_memory) {
   set_vector_register_width(kVectorRegisterWidth);
+  for (int i = 0; i < acc_register_.size(); ++i) {
+    acc_register_.at(i).fill(0);
+  }
 }
 
 KelvinState::KelvinState(absl::string_view id,
diff --git a/sim/kelvin_state.h b/sim/kelvin_state.h
index abac4a2..42088cc 100644
--- a/sim/kelvin_state.h
+++ b/sim/kelvin_state.h
@@ -2,6 +2,7 @@
 #define SIM_KELVIN_STATE_H_
 
 #include <any>
+#include <array>
 #include <cstdint>
 #include <string>
 #include <utility>
@@ -19,6 +20,11 @@
 // https://spacebeaker.googlesource.com/shodan/experimental-kelvin/+/refs/heads/master/tools/iss/iss.cc#18.
 inline constexpr uint32_t kVectorLengthInBits = 256;
 
+template <typename T>
+using AccArrayTemplate = std::array<T, kVectorLengthInBits / 32>;
+
+using AccArrayType = AccArrayTemplate<uint32_t>;
+
 class KelvinState : public mpact::sim::riscv::RiscVState {
  public:
   KelvinState(absl::string_view id, mpact::sim::riscv::RiscVXlen xlen,
@@ -39,6 +45,9 @@
   void set_vector_length(uint32_t length) { vector_length_ = length; }
   uint32_t vector_length() const { return vector_length_; }
 
+  void SetAccRegister(uint32_t *data, int index);
+  AccArrayType *acc_vec(int index) { return &(acc_register_.at(index)); }
+
   void SetLogArgs(std::any data) { log_args_.emplace_back(std::move(data)); }
   std::string *clog_string() { return &clog_string_; }
   void PrintLog(absl::string_view format_string);
@@ -59,6 +68,9 @@
   std::string clog_string_;
   // Extra state handlers
   std::vector<absl::AnyInvocable<bool(const Instruction *)>> on_mpause_;
+
+  // Convolution accumulation register, set to be uint32[VLENW][VLENW].
+  AccArrayTemplate<AccArrayType> acc_register_;
 };
 
 }  // namespace kelvin::sim
diff --git a/sim/kelvin_vector_memory_instructions.cc b/sim/kelvin_vector_memory_instructions.cc
index 4072892..0941750 100644
--- a/sim/kelvin_vector_memory_instructions.cc
+++ b/sim/kelvin_vector_memory_instructions.cc
@@ -8,6 +8,7 @@
 #include "sim/kelvin_state.h"
 #include "absl/types/span.h"
 #include "riscv/riscv_register.h"
+#include "riscv/riscv_state.h"
 #include "mpact/sim/generic/data_buffer.h"
 #include "mpact/sim/generic/instruction.h"
 
@@ -279,4 +280,45 @@
 template void KelvinGetVl<int16_t>(bool, bool, bool, const Instruction *);
 template void KelvinGetVl<int32_t>(bool, bool, bool, const Instruction *);
 
+// Copy convolution accumulation registers into general vector register. In HW,
+// it is set to be v48..55.
+void KelvinVcGet(const mpact::sim::generic::Instruction *inst) {
+  auto vd = static_cast<RV32VectorDestinationOperand *>(inst->Destination(0));
+  auto *state = static_cast<KelvinState *>(inst->state());
+  const uint32_t kVecLenInWord = state->vector_length() / 32;
+  for (int op_index = 0; op_index < kVecLenInWord; ++op_index) {
+    DataBuffer *dest_db = vd->AllocateDataBuffer(op_index);
+    absl::Span<uint32_t> dest_span = dest_db->Get<uint32_t>();
+    auto *acc_vec = state->acc_vec(op_index);
+    for (int i = 0; i < dest_span.size(); ++i) {
+      dest_span[i] = acc_vec->data()[i];
+    }
+    acc_vec->fill(0);
+    dest_db->Submit();
+  }
+}
+
+// Copy the content from the general vector registers to convolution
+// accumulation register. In HW, vs has to be 16-register aligned, and vd has
+// to be set to v48.
+void KelvinAcSet(bool is_transpose,
+                 const mpact::sim::generic::Instruction *inst) {
+  auto vs = static_cast<RV32VectorSourceOperand *>(inst->Source(0));
+  auto *state = static_cast<KelvinState *>(inst->state());
+  const uint32_t kVecLenInWord = state->vector_length() / 32;
+  for (int op_index = 0; op_index < kVecLenInWord; ++op_index) {
+    auto source_span =
+        vs->GetRegister(op_index)->data_buffer()->Get<uint32_t>();
+    for (int i = 0; i < source_span.size(); ++i) {
+      if (is_transpose) {
+        auto *acc_vec = state->acc_vec(i);
+        acc_vec->at(op_index) = source_span[i];
+      } else {
+        auto *acc_vec = state->acc_vec(op_index);
+        acc_vec->at(i) = source_span[i];
+      }
+    }
+  }
+}
+
 }  // namespace kelvin::sim
diff --git a/sim/kelvin_vector_memory_instructions.h b/sim/kelvin_vector_memory_instructions.h
index f62a9c5..9b141aa 100644
--- a/sim/kelvin_vector_memory_instructions.h
+++ b/sim/kelvin_vector_memory_instructions.h
@@ -25,6 +25,11 @@
 void KelvinGetVl(bool strip_mine, bool is_rs1, bool is_rs2,
                  const mpact::sim::generic::Instruction *inst);
 
+void KelvinVcGet(const mpact::sim::generic::Instruction *inst);
+
+void KelvinAcSet(bool is_transpose,
+                 const mpact::sim::generic::Instruction *inst);
+
 }  // namespace kelvin::sim
 
 #endif  // SIM_KELVIN_VECTOR_MEMORY_INSTRUCTIONS_H_
diff --git a/sim/test/BUILD b/sim/test/BUILD
index 7cf1e1d..41d756f 100644
--- a/sim/test/BUILD
+++ b/sim/test/BUILD
@@ -122,6 +122,7 @@
         "//sim:kelvin_instructions",
         "@com_google_absl//absl/functional:bind_front",
         "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
         "@com_google_googletest//:gtest_main",
         "@com_google_mpact-sim//mpact/sim/generic:instruction",
     ],
diff --git a/sim/test/kelvin_vector_memory_instructions_test.cc b/sim/test/kelvin_vector_memory_instructions_test.cc
index 4b4fc82..53d7a89 100644
--- a/sim/test/kelvin_vector_memory_instructions_test.cc
+++ b/sim/test/kelvin_vector_memory_instructions_test.cc
@@ -13,6 +13,7 @@
 #include "absl/functional/bind_front.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/string_view.h"
+#include "absl/types/span.h"
 #include "mpact/sim/generic/instruction.h"
 
 // This file contains the tests for testing kelvin vector memory instructions.
@@ -22,7 +23,9 @@
 using mpact::sim::generic::Instruction;
 
 // Semantic functions.
+using kelvin::sim::KelvinAcSet;
 using kelvin::sim::KelvinGetVl;
+using kelvin::sim::KelvinVcGet;
 using kelvin::sim::KelvinVLd;
 using kelvin::sim::KelvinVLdRegWrite;
 using kelvin::sim::KelvinVSt;
@@ -417,4 +420,93 @@
   GetVlTestHelper<int8_t, int16_t, int32_t>();
 }
 
+class KelvinAccumulateInstructionTest
+    : public kelvin::sim::test::KelvinVectorInstructionsTestBase {
+ public:
+  void VcGetTestHelper() {
+    constexpr int kVd = 48;
+    const uint32_t kVLenInWord = state_->vector_length() / 32;
+    // Set v48..55 with random values.
+    std::vector<uint32_t> vd_value(kVLenInWord * kVLenInWord);
+    auto vd_span = absl::Span<uint32_t>(vd_value);
+    FillArrayWithRandomValues<uint32_t>(vd_span);
+    for (int i = 0; i < kVLenInWord; ++i) {
+      auto vd_name = absl::StrCat("v", kVd + i);
+      SetVectorRegisterValues<uint32_t>(
+          {{vd_name, vd_span.subspan(kVLenInWord * i, kVLenInWord)}});
+    }
+    auto instruction = CreateInstruction();
+    AppendVectorRegisterOperands(instruction.get(), kVLenInWord,
+                                 1 /* src1_widen_factor */, {}, {},
+                                 false /* widen_dst */, {kVd});
+    instruction->set_semantic_function(&KelvinVcGet);
+    instruction->Execute();
+    // Resulting v48..55 should all have 0 values
+    for (int i = 0; i < kVLenInWord; ++i) {
+      auto vreg_num = kVd + i;
+      auto test_vreg = vreg_[vreg_num];
+      auto vreg_span = test_vreg->data_buffer()->Get<uint32_t>();
+      for (int element_index = 0; element_index < kVLenInWord;
+           element_index++) {
+        EXPECT_EQ(vreg_span[element_index], 0)
+            << absl::StrCat("vreg[", vreg_num, "][", element_index, "] != 0");
+      }
+    }
+  }
+  void AcSetTestHelper(bool is_transpose, bool expected_fail = false) {
+    constexpr int kVd = 48;
+    constexpr int kVs = 16;
+    const uint32_t kVLenInWord = state_->vector_length() / 32;
+    // Set v24..31, 48..55 with random values.
+    std::vector<uint32_t> vd_value(kVLenInWord * kVLenInWord);
+    auto vd_span = absl::Span<uint32_t>(vd_value);
+    FillArrayWithRandomValues<uint32_t>(vd_span);
+    for (int i = 0; i < kVLenInWord; ++i) {
+      auto vd_name = absl::StrCat("v", kVd + i);
+      auto vs_name = absl::StrCat("v", kVs + i);
+      SetVectorRegisterValues<uint32_t>(
+          {{vd_name, vd_span.subspan(kVLenInWord * i, kVLenInWord)}});
+      SetVectorRegisterValues<uint32_t>(
+          {{vs_name, vd_span.subspan(kVLenInWord * i, kVLenInWord)}});
+    }
+    auto instruction = CreateInstruction();
+    AppendVectorRegisterOperands(instruction.get(), kVLenInWord,
+                                 1 /* src1_widen_factor */, kVs, {},
+                                 false /* widen_dst */, {kVd});
+    instruction->set_semantic_function(
+        absl::bind_front(&KelvinAcSet, is_transpose));
+    instruction->Execute();
+    // Resulting acc_register_ should match `vs` content
+    for (int i = 0; i < kVLenInWord; ++i) {
+      auto vreg_num = kVs + i;
+      auto test_vreg = vreg_[vreg_num];
+      auto vreg_span = test_vreg->data_buffer()->Get<uint32_t>();
+      for (int element_index = 0; element_index < kVLenInWord;
+           element_index++) {
+        if (is_transpose) {
+          auto *acc_vec = state_->acc_vec(element_index);
+          EXPECT_EQ(vreg_span[element_index], acc_vec->at(i))
+              << absl::StrCat("vreg[", vreg_num, "][", element_index,
+                              "] != acc[", element_index, "][", i, "]");
+        } else {
+          auto *acc_vec = state_->acc_vec(i);
+          EXPECT_EQ(vreg_span[element_index], acc_vec->at(element_index))
+              << absl::StrCat("vreg[", vreg_num, "][", element_index,
+                              "] != acc[", i, "][", element_index, "]");
+        }
+      }
+    }
+  }
+};
+
+TEST_F(KelvinAccumulateInstructionTest, VcGet) { VcGetTestHelper(); }
+
+TEST_F(KelvinAccumulateInstructionTest, AcSet) {
+  AcSetTestHelper(/*is_transpose=*/false);
+}
+
+TEST_F(KelvinAccumulateInstructionTest, AcTr) {
+  AcSetTestHelper(/*is_transpose=*/true);
+}
+
 }  // namespace