Add convolution accumulation register support
Kelvin has a set of instructions specifically for convolution and depth-wise convolution, and it has special accumulation register to support it.
Add the instructions to update the accumulation register.
PiperOrigin-RevId: 555295471
diff --git a/sim/kelvin_format.bin_fmt b/sim/kelvin_format.bin_fmt
index 17f2114..812187c 100644
--- a/sim/kelvin_format.bin_fmt
+++ b/sim/kelvin_format.bin_fmt
@@ -112,4 +112,6 @@
unsigned m[1];
unsigned func1[3];
unsigned form[2]; // .vv==0b00, .vx==0b10, .xx==0b11
+ overlays: // For accumulation register support.
+ unsigned vs1_low3[3] = vs1[2..0];
};
diff --git a/sim/kelvin_memory.bin_fmt b/sim/kelvin_memory.bin_fmt
index 626d03f..0776b69 100644
--- a/sim/kelvin_memory.bin_fmt
+++ b/sim/kelvin_memory.bin_fmt
@@ -112,4 +112,11 @@
vstq_b_sp_xx_m : KelvinV2ArgsType : func2 == 0b01'1110, vs2 != 0, sz == 0b00, m == 0b01, func1 == 0b111, form == 0b11;
vstq_h_sp_xx_m : KelvinV2ArgsType : func2 == 0b01'1110, vs2 != 0, sz == 0b01, m == 0b01, func1 == 0b111, form == 0b11;
vstq_w_sp_xx_m : KelvinV2ArgsType : func2 == 0b01'1110, vs2 != 0, sz == 0b10, m == 0b01, func1 == 0b111, form == 0b11;
+
+ // vcget
+ vcget : KelvinV2ArgsType : func2 == 0b01'0100, vs2 == 0, vs1 == 0, vd == 48, func1 == 0b111, form == 0b11;
+
+ // acset / actr
+ acset : KelvinV2ArgsType : func2 == 0b01'0000, vs2 == 0, m == 0, vd == 48;
+ actr : KelvinV2ArgsType : func2 == 0b01'0001, vs2 == 0, vs1_low3 == 0, m == 0, vd == 48;
};
diff --git a/sim/kelvin_memory.isa b/sim/kelvin_memory.isa
index 5ecb15a..b09fc13 100644
--- a/sim/kelvin_memory.isa
+++ b/sim/kelvin_memory.isa
@@ -481,5 +481,18 @@
vstq_w_sp_xx_m{: vd, vs1, vs2 : vs1},
disasm: "vstq.w.s.xx.m", "%vd, %vs1, %vs2",
semfunc: "absl::bind_front(&KelvinVStQ<int32_t>, /*strip_mine*/ true)";
+
+ // vcget
+ vcget{: : vd},
+ disasm: "vcget", "%vd",
+ semfunc: "absl::bind_front(&KelvinVcGet)";
+
+ // acset/actr
+ acset{: vs1 : vd},
+ disasm: "acset.v", "%vd, %vs1",
+ semfunc: "absl::bind_front(&KelvinAcSet, /* is_transpose */ false)";
+ actr{: vs1 : vd},
+ disasm: "actr.v", "%vd, %vs1",
+ semfunc: "absl::bind_front(&KelvinAcSet, /* is_transpose */ true)";
}
}
diff --git a/sim/kelvin_state.cc b/sim/kelvin_state.cc
index 4aa02e2..2b1dbbc 100644
--- a/sim/kelvin_state.cc
+++ b/sim/kelvin_state.cc
@@ -19,6 +19,9 @@
mpact::sim::util::AtomicMemoryOpInterface *atomic_memory)
: mpact::sim::riscv::RiscVState(id, xlen, memory, atomic_memory) {
set_vector_register_width(kVectorRegisterWidth);
+ for (int i = 0; i < acc_register_.size(); ++i) {
+ acc_register_.at(i).fill(0);
+ }
}
KelvinState::KelvinState(absl::string_view id,
diff --git a/sim/kelvin_state.h b/sim/kelvin_state.h
index abac4a2..42088cc 100644
--- a/sim/kelvin_state.h
+++ b/sim/kelvin_state.h
@@ -2,6 +2,7 @@
#define SIM_KELVIN_STATE_H_
#include <any>
+#include <array>
#include <cstdint>
#include <string>
#include <utility>
@@ -19,6 +20,11 @@
// https://spacebeaker.googlesource.com/shodan/experimental-kelvin/+/refs/heads/master/tools/iss/iss.cc#18.
inline constexpr uint32_t kVectorLengthInBits = 256;
+template <typename T>
+using AccArrayTemplate = std::array<T, kVectorLengthInBits / 32>;
+
+using AccArrayType = AccArrayTemplate<uint32_t>;
+
class KelvinState : public mpact::sim::riscv::RiscVState {
public:
KelvinState(absl::string_view id, mpact::sim::riscv::RiscVXlen xlen,
@@ -39,6 +45,9 @@
void set_vector_length(uint32_t length) { vector_length_ = length; }
uint32_t vector_length() const { return vector_length_; }
+ void SetAccRegister(uint32_t *data, int index);
+ AccArrayType *acc_vec(int index) { return &(acc_register_.at(index)); }
+
void SetLogArgs(std::any data) { log_args_.emplace_back(std::move(data)); }
std::string *clog_string() { return &clog_string_; }
void PrintLog(absl::string_view format_string);
@@ -59,6 +68,9 @@
std::string clog_string_;
// Extra state handlers
std::vector<absl::AnyInvocable<bool(const Instruction *)>> on_mpause_;
+
+ // Convolution accumulation register, set to be uint32[VLENW][VLENW].
+ AccArrayTemplate<AccArrayType> acc_register_;
};
} // namespace kelvin::sim
diff --git a/sim/kelvin_vector_memory_instructions.cc b/sim/kelvin_vector_memory_instructions.cc
index 4072892..0941750 100644
--- a/sim/kelvin_vector_memory_instructions.cc
+++ b/sim/kelvin_vector_memory_instructions.cc
@@ -8,6 +8,7 @@
#include "sim/kelvin_state.h"
#include "absl/types/span.h"
#include "riscv/riscv_register.h"
+#include "riscv/riscv_state.h"
#include "mpact/sim/generic/data_buffer.h"
#include "mpact/sim/generic/instruction.h"
@@ -279,4 +280,45 @@
template void KelvinGetVl<int16_t>(bool, bool, bool, const Instruction *);
template void KelvinGetVl<int32_t>(bool, bool, bool, const Instruction *);
+// Copy convolution accumulation registers into general vector register. In HW,
+// it is set to be v48..55.
+void KelvinVcGet(const mpact::sim::generic::Instruction *inst) {
+ auto vd = static_cast<RV32VectorDestinationOperand *>(inst->Destination(0));
+ auto *state = static_cast<KelvinState *>(inst->state());
+ const uint32_t kVecLenInWord = state->vector_length() / 32;
+ for (int op_index = 0; op_index < kVecLenInWord; ++op_index) {
+ DataBuffer *dest_db = vd->AllocateDataBuffer(op_index);
+ absl::Span<uint32_t> dest_span = dest_db->Get<uint32_t>();
+ auto *acc_vec = state->acc_vec(op_index);
+ for (int i = 0; i < dest_span.size(); ++i) {
+ dest_span[i] = acc_vec->data()[i];
+ }
+ acc_vec->fill(0);
+ dest_db->Submit();
+ }
+}
+
+// Copy the content from the general vector registers to convolution
+// accumulation register. In HW, vs has to be 16-register aligned, and vd has
+// to be set to v48.
+void KelvinAcSet(bool is_transpose,
+ const mpact::sim::generic::Instruction *inst) {
+ auto vs = static_cast<RV32VectorSourceOperand *>(inst->Source(0));
+ auto *state = static_cast<KelvinState *>(inst->state());
+ const uint32_t kVecLenInWord = state->vector_length() / 32;
+ for (int op_index = 0; op_index < kVecLenInWord; ++op_index) {
+ auto source_span =
+ vs->GetRegister(op_index)->data_buffer()->Get<uint32_t>();
+ for (int i = 0; i < source_span.size(); ++i) {
+ if (is_transpose) {
+ auto *acc_vec = state->acc_vec(i);
+ acc_vec->at(op_index) = source_span[i];
+ } else {
+ auto *acc_vec = state->acc_vec(op_index);
+ acc_vec->at(i) = source_span[i];
+ }
+ }
+ }
+}
+
} // namespace kelvin::sim
diff --git a/sim/kelvin_vector_memory_instructions.h b/sim/kelvin_vector_memory_instructions.h
index f62a9c5..9b141aa 100644
--- a/sim/kelvin_vector_memory_instructions.h
+++ b/sim/kelvin_vector_memory_instructions.h
@@ -25,6 +25,11 @@
void KelvinGetVl(bool strip_mine, bool is_rs1, bool is_rs2,
const mpact::sim::generic::Instruction *inst);
+void KelvinVcGet(const mpact::sim::generic::Instruction *inst);
+
+void KelvinAcSet(bool is_transpose,
+ const mpact::sim::generic::Instruction *inst);
+
} // namespace kelvin::sim
#endif // SIM_KELVIN_VECTOR_MEMORY_INSTRUCTIONS_H_
diff --git a/sim/test/BUILD b/sim/test/BUILD
index 7cf1e1d..41d756f 100644
--- a/sim/test/BUILD
+++ b/sim/test/BUILD
@@ -122,6 +122,7 @@
"//sim:kelvin_instructions",
"@com_google_absl//absl/functional:bind_front",
"@com_google_absl//absl/strings",
+ "@com_google_absl//absl/types:span",
"@com_google_googletest//:gtest_main",
"@com_google_mpact-sim//mpact/sim/generic:instruction",
],
diff --git a/sim/test/kelvin_vector_memory_instructions_test.cc b/sim/test/kelvin_vector_memory_instructions_test.cc
index 4b4fc82..53d7a89 100644
--- a/sim/test/kelvin_vector_memory_instructions_test.cc
+++ b/sim/test/kelvin_vector_memory_instructions_test.cc
@@ -13,6 +13,7 @@
#include "absl/functional/bind_front.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
#include "mpact/sim/generic/instruction.h"
// This file contains the tests for testing kelvin vector memory instructions.
@@ -22,7 +23,9 @@
using mpact::sim::generic::Instruction;
// Semantic functions.
+using kelvin::sim::KelvinAcSet;
using kelvin::sim::KelvinGetVl;
+using kelvin::sim::KelvinVcGet;
using kelvin::sim::KelvinVLd;
using kelvin::sim::KelvinVLdRegWrite;
using kelvin::sim::KelvinVSt;
@@ -417,4 +420,93 @@
GetVlTestHelper<int8_t, int16_t, int32_t>();
}
+class KelvinAccumulateInstructionTest
+ : public kelvin::sim::test::KelvinVectorInstructionsTestBase {
+ public:
+ void VcGetTestHelper() {
+ constexpr int kVd = 48;
+ const uint32_t kVLenInWord = state_->vector_length() / 32;
+ // Set v48..55 with random values.
+ std::vector<uint32_t> vd_value(kVLenInWord * kVLenInWord);
+ auto vd_span = absl::Span<uint32_t>(vd_value);
+ FillArrayWithRandomValues<uint32_t>(vd_span);
+ for (int i = 0; i < kVLenInWord; ++i) {
+ auto vd_name = absl::StrCat("v", kVd + i);
+ SetVectorRegisterValues<uint32_t>(
+ {{vd_name, vd_span.subspan(kVLenInWord * i, kVLenInWord)}});
+ }
+ auto instruction = CreateInstruction();
+ AppendVectorRegisterOperands(instruction.get(), kVLenInWord,
+ 1 /* src1_widen_factor */, {}, {},
+ false /* widen_dst */, {kVd});
+ instruction->set_semantic_function(&KelvinVcGet);
+ instruction->Execute();
+ // Resulting v48..55 should all have 0 values
+ for (int i = 0; i < kVLenInWord; ++i) {
+ auto vreg_num = kVd + i;
+ auto test_vreg = vreg_[vreg_num];
+ auto vreg_span = test_vreg->data_buffer()->Get<uint32_t>();
+ for (int element_index = 0; element_index < kVLenInWord;
+ element_index++) {
+ EXPECT_EQ(vreg_span[element_index], 0)
+ << absl::StrCat("vreg[", vreg_num, "][", element_index, "] != 0");
+ }
+ }
+ }
+ void AcSetTestHelper(bool is_transpose, bool expected_fail = false) {
+ constexpr int kVd = 48;
+ constexpr int kVs = 16;
+ const uint32_t kVLenInWord = state_->vector_length() / 32;
+ // Set v24..31, 48..55 with random values.
+ std::vector<uint32_t> vd_value(kVLenInWord * kVLenInWord);
+ auto vd_span = absl::Span<uint32_t>(vd_value);
+ FillArrayWithRandomValues<uint32_t>(vd_span);
+ for (int i = 0; i < kVLenInWord; ++i) {
+ auto vd_name = absl::StrCat("v", kVd + i);
+ auto vs_name = absl::StrCat("v", kVs + i);
+ SetVectorRegisterValues<uint32_t>(
+ {{vd_name, vd_span.subspan(kVLenInWord * i, kVLenInWord)}});
+ SetVectorRegisterValues<uint32_t>(
+ {{vs_name, vd_span.subspan(kVLenInWord * i, kVLenInWord)}});
+ }
+ auto instruction = CreateInstruction();
+ AppendVectorRegisterOperands(instruction.get(), kVLenInWord,
+ 1 /* src1_widen_factor */, kVs, {},
+ false /* widen_dst */, {kVd});
+ instruction->set_semantic_function(
+ absl::bind_front(&KelvinAcSet, is_transpose));
+ instruction->Execute();
+ // Resulting acc_register_ should match `vs` content
+ for (int i = 0; i < kVLenInWord; ++i) {
+ auto vreg_num = kVs + i;
+ auto test_vreg = vreg_[vreg_num];
+ auto vreg_span = test_vreg->data_buffer()->Get<uint32_t>();
+ for (int element_index = 0; element_index < kVLenInWord;
+ element_index++) {
+ if (is_transpose) {
+ auto *acc_vec = state_->acc_vec(element_index);
+ EXPECT_EQ(vreg_span[element_index], acc_vec->at(i))
+ << absl::StrCat("vreg[", vreg_num, "][", element_index,
+ "] != acc[", element_index, "][", i, "]");
+ } else {
+ auto *acc_vec = state_->acc_vec(i);
+ EXPECT_EQ(vreg_span[element_index], acc_vec->at(element_index))
+ << absl::StrCat("vreg[", vreg_num, "][", element_index,
+ "] != acc[", i, "][", element_index, "]");
+ }
+ }
+ }
+ }
+};
+
+TEST_F(KelvinAccumulateInstructionTest, VcGet) { VcGetTestHelper(); }
+
+TEST_F(KelvinAccumulateInstructionTest, AcSet) {
+ AcSetTestHelper(/*is_transpose=*/false);
+}
+
+TEST_F(KelvinAccumulateInstructionTest, AcTr) {
+ AcSetTestHelper(/*is_transpose=*/true);
+}
+
} // namespace