Add convolution accumulation register support Kelvin has a set of instructions specifically for convolution and depth-wise convolution, and it has special accumulation register to support it. Add the instructions to update the accumulation register. PiperOrigin-RevId: 555295471
diff --git a/sim/kelvin_format.bin_fmt b/sim/kelvin_format.bin_fmt index 17f2114..812187c 100644 --- a/sim/kelvin_format.bin_fmt +++ b/sim/kelvin_format.bin_fmt
@@ -112,4 +112,6 @@ unsigned m[1]; unsigned func1[3]; unsigned form[2]; // .vv==0b00, .vx==0b10, .xx==0b11 + overlays: // For accumulation register support. + unsigned vs1_low3[3] = vs1[2..0]; };
diff --git a/sim/kelvin_memory.bin_fmt b/sim/kelvin_memory.bin_fmt index 626d03f..0776b69 100644 --- a/sim/kelvin_memory.bin_fmt +++ b/sim/kelvin_memory.bin_fmt
@@ -112,4 +112,11 @@ vstq_b_sp_xx_m : KelvinV2ArgsType : func2 == 0b01'1110, vs2 != 0, sz == 0b00, m == 0b01, func1 == 0b111, form == 0b11; vstq_h_sp_xx_m : KelvinV2ArgsType : func2 == 0b01'1110, vs2 != 0, sz == 0b01, m == 0b01, func1 == 0b111, form == 0b11; vstq_w_sp_xx_m : KelvinV2ArgsType : func2 == 0b01'1110, vs2 != 0, sz == 0b10, m == 0b01, func1 == 0b111, form == 0b11; + + // vcget + vcget : KelvinV2ArgsType : func2 == 0b01'0100, vs2 == 0, vs1 == 0, vd == 48, func1 == 0b111, form == 0b11; + + // acset / actr + acset : KelvinV2ArgsType : func2 == 0b01'0000, vs2 == 0, m == 0, vd == 48; + actr : KelvinV2ArgsType : func2 == 0b01'0001, vs2 == 0, vs1_low3 == 0, m == 0, vd == 48; };
diff --git a/sim/kelvin_memory.isa b/sim/kelvin_memory.isa index 5ecb15a..b09fc13 100644 --- a/sim/kelvin_memory.isa +++ b/sim/kelvin_memory.isa
@@ -481,5 +481,18 @@ vstq_w_sp_xx_m{: vd, vs1, vs2 : vs1}, disasm: "vstq.w.s.xx.m", "%vd, %vs1, %vs2", semfunc: "absl::bind_front(&KelvinVStQ<int32_t>, /*strip_mine*/ true)"; + + // vcget + vcget{: : vd}, + disasm: "vcget", "%vd", + semfunc: "absl::bind_front(&KelvinVcGet)"; + + // acset/actr + acset{: vs1 : vd}, + disasm: "acset.v", "%vd, %vs1", + semfunc: "absl::bind_front(&KelvinAcSet, /* is_transpose */ false)"; + actr{: vs1 : vd}, + disasm: "actr.v", "%vd, %vs1", + semfunc: "absl::bind_front(&KelvinAcSet, /* is_transpose */ true)"; } }
diff --git a/sim/kelvin_state.cc b/sim/kelvin_state.cc index 4aa02e2..2b1dbbc 100644 --- a/sim/kelvin_state.cc +++ b/sim/kelvin_state.cc
@@ -19,6 +19,9 @@ mpact::sim::util::AtomicMemoryOpInterface *atomic_memory) : mpact::sim::riscv::RiscVState(id, xlen, memory, atomic_memory) { set_vector_register_width(kVectorRegisterWidth); + for (int i = 0; i < acc_register_.size(); ++i) { + acc_register_.at(i).fill(0); + } } KelvinState::KelvinState(absl::string_view id,
diff --git a/sim/kelvin_state.h b/sim/kelvin_state.h index abac4a2..42088cc 100644 --- a/sim/kelvin_state.h +++ b/sim/kelvin_state.h
@@ -2,6 +2,7 @@ #define SIM_KELVIN_STATE_H_ #include <any> +#include <array> #include <cstdint> #include <string> #include <utility> @@ -19,6 +20,11 @@ // https://spacebeaker.googlesource.com/shodan/experimental-kelvin/+/refs/heads/master/tools/iss/iss.cc#18. inline constexpr uint32_t kVectorLengthInBits = 256; +template <typename T> +using AccArrayTemplate = std::array<T, kVectorLengthInBits / 32>; + +using AccArrayType = AccArrayTemplate<uint32_t>; + class KelvinState : public mpact::sim::riscv::RiscVState { public: KelvinState(absl::string_view id, mpact::sim::riscv::RiscVXlen xlen, @@ -39,6 +45,9 @@ void set_vector_length(uint32_t length) { vector_length_ = length; } uint32_t vector_length() const { return vector_length_; } + void SetAccRegister(uint32_t *data, int index); + AccArrayType *acc_vec(int index) { return &(acc_register_.at(index)); } + void SetLogArgs(std::any data) { log_args_.emplace_back(std::move(data)); } std::string *clog_string() { return &clog_string_; } void PrintLog(absl::string_view format_string); @@ -59,6 +68,9 @@ std::string clog_string_; // Extra state handlers std::vector<absl::AnyInvocable<bool(const Instruction *)>> on_mpause_; + + // Convolution accumulation register, set to be uint32[VLENW][VLENW]. + AccArrayTemplate<AccArrayType> acc_register_; }; } // namespace kelvin::sim
diff --git a/sim/kelvin_vector_memory_instructions.cc b/sim/kelvin_vector_memory_instructions.cc index 4072892..0941750 100644 --- a/sim/kelvin_vector_memory_instructions.cc +++ b/sim/kelvin_vector_memory_instructions.cc
@@ -8,6 +8,7 @@ #include "sim/kelvin_state.h" #include "absl/types/span.h" #include "riscv/riscv_register.h" +#include "riscv/riscv_state.h" #include "mpact/sim/generic/data_buffer.h" #include "mpact/sim/generic/instruction.h" @@ -279,4 +280,45 @@ template void KelvinGetVl<int16_t>(bool, bool, bool, const Instruction *); template void KelvinGetVl<int32_t>(bool, bool, bool, const Instruction *); +// Copy convolution accumulation registers into general vector register. In HW, +// it is set to be v48..55. +void KelvinVcGet(const mpact::sim::generic::Instruction *inst) { + auto vd = static_cast<RV32VectorDestinationOperand *>(inst->Destination(0)); + auto *state = static_cast<KelvinState *>(inst->state()); + const uint32_t kVecLenInWord = state->vector_length() / 32; + for (int op_index = 0; op_index < kVecLenInWord; ++op_index) { + DataBuffer *dest_db = vd->AllocateDataBuffer(op_index); + absl::Span<uint32_t> dest_span = dest_db->Get<uint32_t>(); + auto *acc_vec = state->acc_vec(op_index); + for (int i = 0; i < dest_span.size(); ++i) { + dest_span[i] = acc_vec->data()[i]; + } + acc_vec->fill(0); + dest_db->Submit(); + } +} + +// Copy the content from the general vector registers to convolution +// accumulation register. In HW, vs has to be 16-register aligned, and vd has +// to be set to v48. +void KelvinAcSet(bool is_transpose, + const mpact::sim::generic::Instruction *inst) { + auto vs = static_cast<RV32VectorSourceOperand *>(inst->Source(0)); + auto *state = static_cast<KelvinState *>(inst->state()); + const uint32_t kVecLenInWord = state->vector_length() / 32; + for (int op_index = 0; op_index < kVecLenInWord; ++op_index) { + auto source_span = + vs->GetRegister(op_index)->data_buffer()->Get<uint32_t>(); + for (int i = 0; i < source_span.size(); ++i) { + if (is_transpose) { + auto *acc_vec = state->acc_vec(i); + acc_vec->at(op_index) = source_span[i]; + } else { + auto *acc_vec = state->acc_vec(op_index); + acc_vec->at(i) = source_span[i]; + } + } + } +} + } // namespace kelvin::sim
diff --git a/sim/kelvin_vector_memory_instructions.h b/sim/kelvin_vector_memory_instructions.h index f62a9c5..9b141aa 100644 --- a/sim/kelvin_vector_memory_instructions.h +++ b/sim/kelvin_vector_memory_instructions.h
@@ -25,6 +25,11 @@ void KelvinGetVl(bool strip_mine, bool is_rs1, bool is_rs2, const mpact::sim::generic::Instruction *inst); +void KelvinVcGet(const mpact::sim::generic::Instruction *inst); + +void KelvinAcSet(bool is_transpose, + const mpact::sim::generic::Instruction *inst); + } // namespace kelvin::sim #endif // SIM_KELVIN_VECTOR_MEMORY_INSTRUCTIONS_H_
diff --git a/sim/test/BUILD b/sim/test/BUILD index 7cf1e1d..41d756f 100644 --- a/sim/test/BUILD +++ b/sim/test/BUILD
@@ -122,6 +122,7 @@ "//sim:kelvin_instructions", "@com_google_absl//absl/functional:bind_front", "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:span", "@com_google_googletest//:gtest_main", "@com_google_mpact-sim//mpact/sim/generic:instruction", ],
diff --git a/sim/test/kelvin_vector_memory_instructions_test.cc b/sim/test/kelvin_vector_memory_instructions_test.cc index 4b4fc82..53d7a89 100644 --- a/sim/test/kelvin_vector_memory_instructions_test.cc +++ b/sim/test/kelvin_vector_memory_instructions_test.cc
@@ -13,6 +13,7 @@ #include "absl/functional/bind_front.h" #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" +#include "absl/types/span.h" #include "mpact/sim/generic/instruction.h" // This file contains the tests for testing kelvin vector memory instructions. @@ -22,7 +23,9 @@ using mpact::sim::generic::Instruction; // Semantic functions. +using kelvin::sim::KelvinAcSet; using kelvin::sim::KelvinGetVl; +using kelvin::sim::KelvinVcGet; using kelvin::sim::KelvinVLd; using kelvin::sim::KelvinVLdRegWrite; using kelvin::sim::KelvinVSt; @@ -417,4 +420,93 @@ GetVlTestHelper<int8_t, int16_t, int32_t>(); } +class KelvinAccumulateInstructionTest + : public kelvin::sim::test::KelvinVectorInstructionsTestBase { + public: + void VcGetTestHelper() { + constexpr int kVd = 48; + const uint32_t kVLenInWord = state_->vector_length() / 32; + // Set v48..55 with random values. + std::vector<uint32_t> vd_value(kVLenInWord * kVLenInWord); + auto vd_span = absl::Span<uint32_t>(vd_value); + FillArrayWithRandomValues<uint32_t>(vd_span); + for (int i = 0; i < kVLenInWord; ++i) { + auto vd_name = absl::StrCat("v", kVd + i); + SetVectorRegisterValues<uint32_t>( + {{vd_name, vd_span.subspan(kVLenInWord * i, kVLenInWord)}}); + } + auto instruction = CreateInstruction(); + AppendVectorRegisterOperands(instruction.get(), kVLenInWord, + 1 /* src1_widen_factor */, {}, {}, + false /* widen_dst */, {kVd}); + instruction->set_semantic_function(&KelvinVcGet); + instruction->Execute(); + // Resulting v48..55 should all have 0 values + for (int i = 0; i < kVLenInWord; ++i) { + auto vreg_num = kVd + i; + auto test_vreg = vreg_[vreg_num]; + auto vreg_span = test_vreg->data_buffer()->Get<uint32_t>(); + for (int element_index = 0; element_index < kVLenInWord; + element_index++) { + EXPECT_EQ(vreg_span[element_index], 0) + << absl::StrCat("vreg[", vreg_num, "][", element_index, "] != 0"); + } + } + } + void AcSetTestHelper(bool is_transpose, bool expected_fail = false) { + constexpr int kVd = 48; + constexpr int kVs = 16; + const uint32_t kVLenInWord = state_->vector_length() / 32; + // Set v24..31, 48..55 with random values. + std::vector<uint32_t> vd_value(kVLenInWord * kVLenInWord); + auto vd_span = absl::Span<uint32_t>(vd_value); + FillArrayWithRandomValues<uint32_t>(vd_span); + for (int i = 0; i < kVLenInWord; ++i) { + auto vd_name = absl::StrCat("v", kVd + i); + auto vs_name = absl::StrCat("v", kVs + i); + SetVectorRegisterValues<uint32_t>( + {{vd_name, vd_span.subspan(kVLenInWord * i, kVLenInWord)}}); + SetVectorRegisterValues<uint32_t>( + {{vs_name, vd_span.subspan(kVLenInWord * i, kVLenInWord)}}); + } + auto instruction = CreateInstruction(); + AppendVectorRegisterOperands(instruction.get(), kVLenInWord, + 1 /* src1_widen_factor */, kVs, {}, + false /* widen_dst */, {kVd}); + instruction->set_semantic_function( + absl::bind_front(&KelvinAcSet, is_transpose)); + instruction->Execute(); + // Resulting acc_register_ should match `vs` content + for (int i = 0; i < kVLenInWord; ++i) { + auto vreg_num = kVs + i; + auto test_vreg = vreg_[vreg_num]; + auto vreg_span = test_vreg->data_buffer()->Get<uint32_t>(); + for (int element_index = 0; element_index < kVLenInWord; + element_index++) { + if (is_transpose) { + auto *acc_vec = state_->acc_vec(element_index); + EXPECT_EQ(vreg_span[element_index], acc_vec->at(i)) + << absl::StrCat("vreg[", vreg_num, "][", element_index, + "] != acc[", element_index, "][", i, "]"); + } else { + auto *acc_vec = state_->acc_vec(i); + EXPECT_EQ(vreg_span[element_index], acc_vec->at(element_index)) + << absl::StrCat("vreg[", vreg_num, "][", element_index, + "] != acc[", i, "][", element_index, "]"); + } + } + } + } +}; + +TEST_F(KelvinAccumulateInstructionTest, VcGet) { VcGetTestHelper(); } + +TEST_F(KelvinAccumulateInstructionTest, AcSet) { + AcSetTestHelper(/*is_transpose=*/false); +} + +TEST_F(KelvinAccumulateInstructionTest, AcTr) { + AcSetTestHelper(/*is_transpose=*/true); +} + } // namespace