Kelvin dwconv instructions.
PiperOrigin-RevId: 565407892
diff --git a/sim/kelvin_conv.bin_fmt b/sim/kelvin_conv.bin_fmt
index 9526fdd..627262d 100644
--- a/sim/kelvin_conv.bin_fmt
+++ b/sim/kelvin_conv.bin_fmt
@@ -1,4 +1,8 @@
instruction group KelvinVectorConvInst[32] : KelvinV3ArgsType {
// vconv
aconv_vxv : KelvinV3ArgsType : func3_hi == 0b10, func3_lo == 0b00, vd == 48, vs1_low4 == 0, vs2 != 0, vs3_low3 == 0, m == 0, form == 0b101;
+
+ // vdwconv
+ adwconv_vxv : KelvinV3ArgsType : func3_hi == 0b10, func3_lo == 0b10, vd_low2 == 0b00, vs2_hi1 == 0b1, m == 0, form == 0b101;
+ vdwconv_vxv : KelvinV3ArgsType : func3_hi == 0b10, func3_lo == 0b10, vd_low2 == 0b00, vs2_hi1 == 0b0, m == 0, form == 0b101;
};
diff --git a/sim/kelvin_encoding.cc b/sim/kelvin_encoding.cc
index 5b68af7..5597833 100644
--- a/sim/kelvin_encoding.cc
+++ b/sim/kelvin_encoding.cc
@@ -212,6 +212,11 @@
absl::StrCat(mpact::sim::riscv::RiscVState::kXregPrefix, reg_num),
mpact::sim::riscv::kXRegisterAliases[reg_num]);
}
+ if (opcode_ == OpcodeEnum::kAdwconvVxv ||
+ opcode_ == OpcodeEnum::kVdwconvVxv) {
+ return GetVectorRegisterSourceOp<mpact::sim::riscv::RVVectorRegister>(
+ state_, reg_num, /*strip_mine*/ false, /*widen_factor*/ 9);
+ }
if (opcode_ == OpcodeEnum::kAdwinit) {
// Borrow the strip_mine setting to set 4x registers.
strip_mine = true;
@@ -295,7 +300,8 @@
[this](int latency) -> DestinationOperandInterface * {
auto reg_num = encoding::kelvin_v2_args_type::ExtractVd(inst_word_);
bool strip_mine = encoding::kelvin_v2_args_type::ExtractM(inst_word_);
- if (opcode_ == OpcodeEnum::kVcget || opcode_ == OpcodeEnum::kAdwinit) {
+ if (opcode_ == OpcodeEnum::kVcget || opcode_ == OpcodeEnum::kAdwinit ||
+ opcode_ == OpcodeEnum::kVdwconvVxv) {
// Borrow the strip_mine setting to set 4x/8x registers although it is
// not part of the encoding.
strip_mine = true;
diff --git a/sim/kelvin_format.bin_fmt b/sim/kelvin_format.bin_fmt
index a8bc126..bf34e6f 100644
--- a/sim/kelvin_format.bin_fmt
+++ b/sim/kelvin_format.bin_fmt
@@ -130,6 +130,9 @@
unsigned func3_lo[2];
unsigned form[3]; // .vvv=0b001, .vxv=0b101.
overlays:
+ unsigned vs2_hi1[1] = vs2[5];
+ unsigned rs2[5] = vs2[4..0];
unsigned vs1_low4[4] = vs1[3..0];
unsigned vs3_low3[3] = vs3[2..0];
+ unsigned vd_low2[2] = vd[1..0];
};
diff --git a/sim/kelvin_mul.isa b/sim/kelvin_mul.isa
index cb2fa73..7d9ef03 100644
--- a/sim/kelvin_mul.isa
+++ b/sim/kelvin_mul.isa
@@ -537,5 +537,13 @@
aconv_vxv{: vs1, vs2, vs3 : vd},
disasm: "aconv.vxv", "%vd, %vs1, %vs2, %vs3",
semfunc: "&KelvinVConv";
+ // adwconv
+ adwconv_vxv{: vs1, rs2, vs3 : vd},
+ disasm: "adwconv.vxv", "%vd, %vs1, %rs2, %vs3",
+ semfunc: "absl::bind_front(&KelvinVDwconv, /*write_acc*/ false)";
+ // vdwconv
+ vdwconv_vxv{: vs1, rs2, vs3 : vd},
+ disasm: "adwconv.vxv", "%vd, %vs1, %rs2, %vs3",
+ semfunc: "absl::bind_front(&KelvinVDwconv, /*write_acc*/ true)";
}
}
diff --git a/sim/kelvin_state.h b/sim/kelvin_state.h
index 52d271b..2c09e99 100644
--- a/sim/kelvin_state.h
+++ b/sim/kelvin_state.h
@@ -29,6 +29,8 @@
using AccArrayType = AccArrayTemplate<uint32_t>;
+using DwAccArray = std::array<uint32_t, 32>;
+
class KelvinState : public mpact::sim::riscv::RiscVState {
public:
KelvinState(absl::string_view id, mpact::sim::riscv::RiscVXlen xlen,
@@ -52,6 +54,10 @@
AccArrayType *acc_vec(int index) { return &(acc_register_[index]); }
AccArrayTemplate<AccArrayType> acc_register() const { return acc_register_; }
+ uint32_t *dw_acc_vec(int i) { return &depthwise_acc_register_[i]; }
+ DwAccArray &dw_acc_register() { return depthwise_acc_register_; }
+ const DwAccArray &dw_acc_register() const { return depthwise_acc_register_; }
+
void SetLogArgs(std::any data) { log_args_.emplace_back(std::move(data)); }
std::string *clog_string() { return &clog_string_; }
void PrintLog(absl::string_view format_string);
@@ -75,6 +81,9 @@
// Convolution accumulation register, set to be uint32[VLENW][VLENW].
AccArrayTemplate<AccArrayType> acc_register_;
+
+ // Depthwise convolution accumulation register.
+ DwAccArray depthwise_acc_register_;
};
} // namespace kelvin::sim
diff --git a/sim/kelvin_vector_convolution_instructions.cc b/sim/kelvin_vector_convolution_instructions.cc
index 4557362..9f4d006 100644
--- a/sim/kelvin_vector_convolution_instructions.cc
+++ b/sim/kelvin_vector_convolution_instructions.cc
@@ -13,11 +13,17 @@
#include "mpact/sim/generic/type_helpers.h"
namespace kelvin::sim {
+namespace {
+constexpr int kVectorLenInByte = kVectorLengthInBits / 8;
+constexpr int kVectorLenInWord = kVectorLenInByte / sizeof(uint32_t);
+constexpr int kDwRegisterProducts = 3;
+} // namespace
+using ::mpact::sim::generic::DataBuffer;
using ::mpact::sim::generic::operator*; // NOLINT: is used below (clang error).
-
-using mpact::sim::generic::GetInstructionSource;
-using mpact::sim::riscv::RV32VectorSourceOperand;
+using ::mpact::sim::generic::GetInstructionSource;
+using ::mpact::sim::riscv::RV32VectorDestinationOperand;
+using ::mpact::sim::riscv::RV32VectorSourceOperand;
// Implement the 3-arg vector convolution (im2col + matmul)
// vs1 (narrow) represents the starting register of 8 vector registers
@@ -26,8 +32,6 @@
// `vd` is not used in the op.
void KelvinVConv(Instruction *inst) {
auto state = static_cast<KelvinState *>(inst->state());
- constexpr int kVectorLenInByte = kVectorLengthInBits / 8;
- constexpr int kVectorLenInWord = kVectorLenInByte / sizeof(uint32_t);
vconv_cmd_t conv_cmd;
auto reg_data = GetInstructionSource<uint32_t>(inst, 1, 0);
@@ -111,4 +115,128 @@
}
}
+// Implements accumulation of 3 32-element 8bit*8bit Hadamard products.
+// vs1 is the starting register of 9 vector activation registers, of which
+// three are selected.
+// vs3 (wide) is the starting register of group of 3 vector registers.
+// xs2 stores the convolution command.
+// `vd` is used if |write_acc| is set to true.
+void KelvinVDwconv(bool write_acc, Instruction *inst) {
+ KelvinState *state = static_cast<KelvinState *>(inst->state());
+ uint32_t reg_data = GetInstructionSource<uint32_t>(inst, 1, 0);
+ vdwconv_u8_t dwconv_cmd;
+ memcpy(&dwconv_cmd, ®_data, sizeof(dwconv_cmd));
+
+ int vs1_idx[3];
+ switch (dwconv_cmd.regbase) {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ vs1_idx[0] = dwconv_cmd.regbase;
+ vs1_idx[1] = dwconv_cmd.regbase + 1;
+ vs1_idx[2] = dwconv_cmd.regbase + 2;
+ break;
+ case 7:
+ vs1_idx[0] = 1;
+ vs1_idx[1] = 0;
+ vs1_idx[2] = 2;
+ break;
+ case 8:
+ case 9:
+ case 10:
+ case 11:
+ vs1_idx[0] = (2 * dwconv_cmd.regbase) - 15;
+ vs1_idx[1] = vs1_idx[0] + 1;
+ vs1_idx[2] = 0;
+ break;
+ case 12:
+ case 13:
+ case 14:
+ case 15:
+ vs1_idx[0] = (2 * dwconv_cmd.regbase) - 22;
+ vs1_idx[1] = 0;
+ vs1_idx[2] = 1;
+ break;
+ }
+
+ auto vs1 = static_cast<RV32VectorSourceOperand *>(inst->Source(0));
+ absl::Span<uint32_t> vs10_span =
+ vs1->GetRegister(vs1_idx[0])->data_buffer()->Get<uint32_t>();
+ absl::Span<uint32_t> vs11_span =
+ vs1->GetRegister(vs1_idx[1])->data_buffer()->Get<uint32_t>();
+ absl::Span<uint32_t> vs12_span =
+ vs1->GetRegister(vs1_idx[2])->data_buffer()->Get<uint32_t>();
+ uint32_t a_data[kDwRegisterProducts * kVectorLenInWord];
+ switch (dwconv_cmd.sparsity) {
+ case 0:
+ memcpy(a_data, vs10_span.data(), 8 * sizeof(uint32_t));
+ memcpy(a_data + 8, vs11_span.data(), 8 * sizeof(uint32_t));
+ memcpy(a_data + 16, vs12_span.data(), 8 * sizeof(uint32_t));
+ break;
+ case 1:
+ a_data[0] = vs10_span[7];
+ memcpy(a_data + 1, vs11_span.data(), 7 * sizeof(uint32_t));
+ memcpy(a_data + 8, vs11_span.data(), 8 * sizeof(uint32_t));
+ memcpy(a_data + 16, vs11_span.data() + 1, 7 * sizeof(uint32_t));
+ a_data[23] = vs12_span[0];
+ break;
+ case 2:
+ memcpy(a_data, vs10_span.data(), 8 * sizeof(uint32_t));
+ memcpy(a_data + 8, vs10_span.data() + 1, 7 * sizeof(uint32_t));
+ a_data[15] = vs11_span[0];
+ memcpy(a_data + 16, vs10_span.data() + 2, 6 * sizeof(uint32_t));
+ a_data[22] = vs11_span[0];
+ a_data[23] = vs11_span[1];
+ break;
+ default:
+ // Invalid state enum
+ state->Trap(/*is_interrupt=*/false, /*trap_value=*/0,
+ *mpact::sim::riscv::ExceptionCode::kIllegalInstruction,
+ /*epc=*/inst->address(), inst);
+ }
+
+ auto vs3 = static_cast<RV32VectorSourceOperand *>(inst->Source(2));
+ int32_t *acc = reinterpret_cast<int32_t *>(state->dw_acc_vec(0));
+
+ for (int r = 0; r < kDwRegisterProducts; r++) {
+ absl::Span<uint8_t> a_span = absl::Span<uint8_t>(
+ reinterpret_cast<uint8_t *>(a_data + (r * kVectorLenInWord)),
+ kVectorLenInByte);
+ absl::Span<uint8_t> b_span =
+ vs3->GetRegister(r)->data_buffer()->Get<uint8_t>();
+
+ for (int i = 0; i < kVectorLenInByte; i++) {
+ int32_t a =
+ dwconv_cmd.sdata1 ? static_cast<int8_t>(a_span[i]) : a_span[i];
+ int32_t b =
+ dwconv_cmd.sdata2 ? static_cast<int8_t>(b_span[i]) : b_span[i];
+ a += dwconv_cmd.sbias1;
+ b += dwconv_cmd.sbias2;
+
+ constexpr static int interleave[4] = {0, 2, 1, 3};
+ int acc_reg = interleave[(i & 0b11)];
+ int reg_offset = i >> 2;
+ acc[kVectorLenInWord * acc_reg + reg_offset] += a * b;
+ }
+ }
+
+ if (!write_acc) {
+ return;
+ }
+
+ auto vd = static_cast<RV32VectorDestinationOperand *>(inst->Destination(0));
+ for (int i = 0; i < 4; i++) {
+ DataBuffer *dest_db = vd->AllocateDataBuffer(i);
+ absl::Span<uint32_t> dest_span = dest_db->Get<uint32_t>();
+ for (int j = 0; j < kVectorLenInWord; j++) {
+ dest_span[j] = acc[i * kVectorLenInWord + j];
+ }
+ dest_db->Submit();
+ }
+}
+
} // namespace kelvin::sim
diff --git a/sim/kelvin_vector_convolution_instructions.h b/sim/kelvin_vector_convolution_instructions.h
index 6d08f98..23d01c7 100644
--- a/sim/kelvin_vector_convolution_instructions.h
+++ b/sim/kelvin_vector_convolution_instructions.h
@@ -20,8 +20,22 @@
uint32_t sdata2 : 1; // 0
} vconv_cmd_t;
+// Command structure for the depthwise convolution instruction.
+typedef struct KelvinVDwconvCmd {
+ uint32_t mode : 2; // 1:0
+ uint32_t sparsity : 2; // 3:2
+ uint32_t regbase : 4; // 7:4
+ uint32_t rsvd : 4; // 11:8
+ int32_t sbias1 : 9; // 20:12
+ uint32_t sdata1 : 1; // 21
+ int32_t sbias2 : 9; // 30:22
+ uint32_t sdata2 : 1; // 31
+} vdwconv_u8_t;
+
void KelvinVConv(Instruction *inst);
+void KelvinVDwconv(bool write_acc, Instruction *inst);
+
} // namespace kelvin::sim
#endif // SIM_KELVIN_VECTOR_CONVOLUTION_INSTRUCTIONS_H_
diff --git a/sim/kelvin_vector_memory_instructions.cc b/sim/kelvin_vector_memory_instructions.cc
index fd1f099..84069a8 100644
--- a/sim/kelvin_vector_memory_instructions.cc
+++ b/sim/kelvin_vector_memory_instructions.cc
@@ -379,6 +379,12 @@
auto vd = static_cast<RV32VectorDestinationOperand *>(inst->Destination(0));
for (int op_index = 0; op_index < kInitSize; ++op_index) {
auto source_span = vs->GetRegister(op_index)->data_buffer()->Get<uint8_t>();
+ uint8_t *dwacc_span =
+ reinterpret_cast<uint8_t *>(state->dw_acc_vec(8 * op_index));
+ for (int i = 0; i < 32; i++) {
+ dwacc_span[i] = source_span[i];
+ }
+
DataBuffer *dest_db = vd->AllocateDataBuffer(op_index);
absl::Span<uint8_t> dest_span = dest_db->Get<uint8_t>();
for (int i = 0; i < init_n; ++i) {
diff --git a/sim/test/BUILD b/sim/test/BUILD
index 7db1c7b..816ee4d 100644
--- a/sim/test/BUILD
+++ b/sim/test/BUILD
@@ -136,6 +136,7 @@
deps = [
":kelvin_vector_instructions_test_base",
"//sim:kelvin_instructions",
+ "@com_google_absl//absl/functional:bind_front",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/types:span",
"@com_google_googletest//:gtest_main",
diff --git a/sim/test/kelvin_encoding_test.cc b/sim/test/kelvin_encoding_test.cc
index 96c4500..8e5285d 100644
--- a/sim/test/kelvin_encoding_test.cc
+++ b/sim/test/kelvin_encoding_test.cc
@@ -100,6 +100,7 @@
constexpr uint32_t kVAccsBase = 0b001010'000001'000000'00'001000'0'100'00;
constexpr uint32_t kVAddBase = 0b000000'000000'000001'00'000010'0'000'00;
constexpr uint32_t kAconvBase = 0b001000'000001'010000'10'110000'0'00'101;
+constexpr uint32_t kVdwconvBase = 0b001000'000001'010000'10'110000'0'10'101;
class KelvinEncodingTest : public testing::Test {
protected:
@@ -497,6 +498,12 @@
kVAddBase, OpcodeEnum::kVaddBVv, SourceOpEnum::kVs1);
EXPECT_EQ(v_src->size(), 1);
delete v_src;
+
+ // Test vdwconv.vxv
+ v_src = EncodeOpHelper<RV32VectorSourceOperand>(
+ kVdwconvBase, OpcodeEnum::kVdwconvVxv, SourceOpEnum::kVs1);
+ EXPECT_EQ(v_src->size(), 9);
+ delete v_src;
}
TEST_F(KelvinEncodingTest, KelvinWideningVd) {
@@ -588,6 +595,12 @@
DestOpEnum::kVd);
EXPECT_EQ(v_dest->size(), 8);
delete v_dest;
+
+ // Test vdwconv
+ v_dest = EncodeOpHelper<RV32VectorDestOperand>(
+ kVdwconvBase, OpcodeEnum::kVdwconvVxv, DestOpEnum::kVd);
+ EXPECT_EQ(v_dest->size(), 4);
+ delete v_dest;
}
TEST_F(KelvinEncodingTest, KelvinEncodeVs3) {
diff --git a/sim/test/kelvin_vector_convolution_instructions_test.cc b/sim/test/kelvin_vector_convolution_instructions_test.cc
index e064785..80c98c1 100644
--- a/sim/test/kelvin_vector_convolution_instructions_test.cc
+++ b/sim/test/kelvin_vector_convolution_instructions_test.cc
@@ -4,11 +4,13 @@
#include <cstdint>
#include <cstring>
#include <functional>
+#include <type_traits>
#include <vector>
#include "sim/test/kelvin_vector_instructions_test_base.h"
#include "sim/test/testfiles/kelvin_vector_convolution_testdata.h"
#include "googletest/include/gtest/gtest.h"
+#include "absl/functional/bind_front.h"
#include "absl/strings/str_cat.h"
#include "absl/types/span.h"
#include "riscv/riscv_state.h"
@@ -20,10 +22,210 @@
// Semantic functions.
using kelvin::sim::KelvinVConv;
+using kelvin::sim::KelvinVDwconv;
class KelvinVectorConvolutionInstructionsTest
: public kelvin::sim::test::KelvinVectorInstructionsTestBase {
protected:
+ // Write [1-32] into register, accounting for internal dwconv swizzle
+ template <typename T>
+ void SetRegisterAscending(int reg, T offset) {
+ std::vector<T> data(32);
+ for (uint32_t i = 0; i < data.size(); i++) {
+ uint32_t reg;
+ switch ((i >> 3) & 0b11) {
+ case 0:
+ reg = 0;
+ break;
+ case 1:
+ reg = 2;
+ break;
+ case 2:
+ reg = 1;
+ break;
+ case 3:
+ reg = 3;
+ break;
+ }
+ uint32_t pos = i & 0b111;
+ uint32_t target = (pos << 2) | reg;
+ data[target] = i + offset;
+ }
+
+ auto reg_name = absl::StrCat("v", reg);
+ SetVectorRegisterValues<T>({{reg_name, absl::Span<T>(data)}});
+ }
+
+ template <typename T>
+ void SetRegisterConstant(int reg, T val) {
+ std::vector<T> data(32, val);
+ auto reg_name = absl::StrCat("v", reg);
+ SetVectorRegisterValues<T>({{reg_name, absl::Span<T>(data)}});
+ }
+
+ void ResetDwAccumulator() { state_->dw_acc_register().fill(0); }
+
+ template <bool kWriteAcc = true>
+ void ExecuteDwconv(bool expect_fail = false) {
+ constexpr int kVs1 = 0;
+ constexpr int kVs3 = 16;
+ constexpr int kVd = 48;
+ InstructionPtr instruction = CreateInstruction();
+ instruction->set_semantic_function(
+ absl::bind_front(KelvinVDwconv, kWriteAcc));
+ AppendVectorRegisterOperands(instruction.get(), 1, 9, kVs1, {},
+ false /* widen_dst*/, {});
+ AppendRegisterOperands(instruction.get(), {kelvin::sim::test::kRs2Name},
+ {});
+ AppendVectorRegisterOperands(instruction.get(), 1, 3, kVs3, {},
+ false /* widen_dst*/, {});
+ if (kWriteAcc) {
+ std::vector<kelvin::sim::test::RegisterBase *> reg_vec;
+ for (int i = 0; i < 4; i++) {
+ auto reg_name = absl::StrCat("v", kVd + i);
+ reg_vec.push_back(
+ state_->GetRegister<kelvin::sim::test::RVVectorRegister>(reg_name)
+ .first);
+ }
+ auto *op = new kelvin::sim::test::RV32VectorDestinationOperand(
+ absl::Span<kelvin::sim::test::RegisterBase *>(reg_vec), 0,
+ absl::StrCat("v", kVd));
+ instruction->AppendDestination(op);
+ }
+
+ execution_fail_ = false;
+ state_->set_on_trap(trap_call_back_);
+ instruction->Execute();
+ EXPECT_EQ(expect_fail, execution_fail_);
+ }
+
+ template <bool kWriteAcc = true>
+ void TestAccumulatorAndRegisters(
+ std::function<void(int /*index*/, int32_t /*value*/)> f) {
+ constexpr int kVd = 48;
+
+ // Check internal accumulator.
+ auto acc_vec = state_->dw_acc_register();
+ for (int i = 0; i < 32; i++) {
+ f(i, acc_vec[i]);
+ }
+
+ // Check Registers
+ if (kWriteAcc) {
+ for (int r = 0; r < 4; r++) {
+ auto reg = state_
+ ->GetRegister<kelvin::sim::test::RVVectorRegister>(
+ absl::StrCat("v", kVd + r))
+ .first;
+ auto reg_data = reg->data_buffer()->Get<int32_t>();
+ for (int elem = 0; elem < 8; elem++) {
+ int i = (r * 8) + elem;
+ int32_t value = reg_data[elem];
+ f(i, value);
+ }
+ }
+ }
+ }
+
+ void DepthwiseConvolutionBiasTestHelper(uint32_t sbias1, uint32_t sbias2) {
+ constexpr int kVs1 = 0;
+ constexpr int kVs3 = 16;
+
+ kelvin::sim::vdwconv_u8_t dwconv_cmd;
+ memset(&dwconv_cmd, 0, sizeof(dwconv_cmd));
+ dwconv_cmd.sdata1 = 1;
+ dwconv_cmd.sdata2 = 1;
+ dwconv_cmd.sbias1 = sbias1;
+ dwconv_cmd.sbias2 = sbias2;
+ uint32_t vdwconv_cmd_value;
+ memcpy(&vdwconv_cmd_value, &dwconv_cmd, sizeof(vdwconv_cmd_value));
+ SetRegisterValues<uint32_t>(
+ {{kelvin::sim::test::kRs2Name, vdwconv_cmd_value}});
+
+ ResetDwAccumulator();
+ SetRegisterAscending<int8_t>(kVs1, 1 - sbias1);
+ SetRegisterConstant<int8_t>(kVs1 + 1, -sbias1);
+ SetRegisterConstant<int8_t>(kVs1 + 2, -sbias1);
+
+ SetRegisterConstant<int8_t>(kVs3, 1 - sbias2);
+ SetRegisterConstant<int8_t>(kVs3 + 1, -sbias2);
+ SetRegisterConstant<int8_t>(kVs3 + 2, -sbias2);
+ ExecuteDwconv();
+
+ TestAccumulatorAndRegisters(
+ [](int i, int32_t value) { EXPECT_EQ(i + 1, value); });
+ }
+
+ template <typename T, bool kWriteAcc = true>
+ void DepthwiseConvolutionRegbaseTestHelper(int regbase, int prev, int curr,
+ int next) {
+ constexpr int kVs1 = 0;
+ constexpr int kVs3 = 16;
+
+ kelvin::sim::vdwconv_u8_t dwconv_cmd;
+ memset(&dwconv_cmd, 0, sizeof(dwconv_cmd));
+ dwconv_cmd.regbase = regbase;
+ if (std::is_signed<T>::value) {
+ dwconv_cmd.sdata1 = 1;
+ dwconv_cmd.sdata2 = 1;
+ }
+ uint32_t vdwconv_cmd_value;
+ memcpy(&vdwconv_cmd_value, &dwconv_cmd, sizeof(vdwconv_cmd_value));
+ SetRegisterValues<uint32_t>(
+ {{kelvin::sim::test::kRs2Name, vdwconv_cmd_value}});
+
+ // Test prev reg
+ {
+ ResetDwAccumulator();
+
+ SetRegisterAscending<T>(kVs1 + prev, 1);
+ SetRegisterConstant<T>(kVs1 + curr, 0);
+ SetRegisterConstant<T>(kVs1 + next, 0);
+
+ SetRegisterConstant<T>(kVs3, 1);
+ SetRegisterConstant<T>(kVs3 + 1, 0);
+ SetRegisterConstant<T>(kVs3 + 2, 0);
+
+ ExecuteDwconv<kWriteAcc>();
+ TestAccumulatorAndRegisters<kWriteAcc>(
+ [](int i, int32_t value) { EXPECT_EQ(i + 1, value); });
+ }
+
+ // Test curr reg
+ {
+ ResetDwAccumulator();
+
+ SetRegisterConstant<T>(kVs1 + prev, 0);
+ SetRegisterAscending<T>(kVs1 + curr, 1);
+ SetRegisterConstant<T>(kVs1 + next, 0);
+
+ SetRegisterConstant<T>(kVs3, 0);
+ SetRegisterConstant<T>(kVs3 + 1, 2);
+ SetRegisterConstant<T>(kVs3 + 2, 0);
+
+ ExecuteDwconv<kWriteAcc>();
+ TestAccumulatorAndRegisters<kWriteAcc>(
+ [](int i, int32_t value) { EXPECT_EQ(2 * (i + 1), value); });
+ }
+
+ // Test next reg
+ {
+ ResetDwAccumulator();
+
+ SetRegisterConstant<T>(kVs1 + prev, 0);
+ SetRegisterConstant<T>(kVs1 + curr, 0);
+ SetRegisterAscending<T>(kVs1 + next, 1);
+
+ SetRegisterConstant<T>(kVs3, 0);
+ SetRegisterConstant<T>(kVs3 + 1, 0);
+ SetRegisterConstant<T>(kVs3 + 2, 3);
+
+ ExecuteDwconv<kWriteAcc>();
+ TestAccumulatorAndRegisters<kWriteAcc>(
+ [](int i, int32_t value) { EXPECT_EQ(3 * (i + 1), value); });
+ }
+ }
+
void ConvolutionTestHelper(const kelvin::sim::vconv_cmd_t vconv_cmd,
bool expect_fail = false) {
constexpr int kVs1 = 0;
@@ -161,4 +363,231 @@
.sdata2 = true};
ConvolutionTestHelper(vconv_cmd, true);
}
+
+TEST_F(KelvinVectorConvolutionInstructionsTest, VDwconvRegbase) {
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, true>(0, 0, 1, 2);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, true>(1, 1, 2, 3);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, true>(2, 2, 3, 4);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, true>(3, 3, 4, 5);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, true>(4, 4, 5, 6);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, true>(5, 5, 6, 7);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, true>(6, 6, 7, 8);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, true>(7, 1, 0, 2);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, true>(8, 1, 2, 0);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, true>(9, 3, 4, 0);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, true>(10, 5, 6, 0);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, true>(11, 7, 8, 0);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, true>(12, 2, 0, 1);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, true>(13, 4, 0, 1);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, true>(14, 6, 0, 1);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, true>(15, 8, 0, 1);
+
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, true>(0, 0, 1, 2);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, true>(1, 1, 2, 3);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, true>(2, 2, 3, 4);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, true>(3, 3, 4, 5);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, true>(4, 4, 5, 6);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, true>(5, 5, 6, 7);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, true>(6, 6, 7, 8);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, true>(7, 1, 0, 2);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, true>(8, 1, 2, 0);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, true>(9, 3, 4, 0);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, true>(10, 5, 6, 0);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, true>(11, 7, 8, 0);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, true>(12, 2, 0, 1);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, true>(13, 4, 0, 1);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, true>(14, 6, 0, 1);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, true>(15, 8, 0, 1);
+
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, false>(0, 0, 1, 2);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, false>(1, 1, 2, 3);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, false>(2, 2, 3, 4);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, false>(3, 3, 4, 5);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, false>(4, 4, 5, 6);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, false>(5, 5, 6, 7);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, false>(6, 6, 7, 8);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, false>(7, 1, 0, 2);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, false>(8, 1, 2, 0);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, false>(9, 3, 4, 0);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, false>(10, 5, 6, 0);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, false>(11, 7, 8, 0);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, false>(12, 2, 0, 1);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, false>(13, 4, 0, 1);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, false>(14, 6, 0, 1);
+ DepthwiseConvolutionRegbaseTestHelper<uint8_t, false>(15, 8, 0, 1);
+
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, false>(0, 0, 1, 2);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, false>(1, 1, 2, 3);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, false>(2, 2, 3, 4);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, false>(3, 3, 4, 5);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, false>(4, 4, 5, 6);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, false>(5, 5, 6, 7);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, false>(6, 6, 7, 8);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, false>(7, 1, 0, 2);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, false>(8, 1, 2, 0);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, false>(9, 3, 4, 0);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, false>(10, 5, 6, 0);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, false>(11, 7, 8, 0);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, false>(12, 2, 0, 1);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, false>(13, 4, 0, 1);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, false>(14, 6, 0, 1);
+ DepthwiseConvolutionRegbaseTestHelper<int8_t, false>(15, 8, 0, 1);
+}
+
+TEST_F(KelvinVectorConvolutionInstructionsTest, VDwconvSignBiases) {
+ DepthwiseConvolutionBiasTestHelper(2, 0);
+ DepthwiseConvolutionBiasTestHelper(0, 3);
+ DepthwiseConvolutionBiasTestHelper(5, 5);
+}
+
+TEST_F(KelvinVectorConvolutionInstructionsTest, VDwconvSparsity1) {
+ constexpr int kVs1 = 0;
+ constexpr int kVs3 = 16;
+
+ kelvin::sim::vdwconv_u8_t dwconv_cmd;
+ memset(&dwconv_cmd, 0, sizeof(dwconv_cmd));
+ dwconv_cmd.regbase = 0;
+ dwconv_cmd.sparsity = 1;
+ uint32_t vdwconv_cmd_value;
+ memcpy(&vdwconv_cmd_value, &dwconv_cmd, sizeof(vdwconv_cmd_value));
+ SetRegisterValues<uint32_t>(
+ {{kelvin::sim::test::kRs2Name, vdwconv_cmd_value}});
+
+ {
+ ResetDwAccumulator();
+
+ SetRegisterConstant<uint8_t>(kVs1, 42);
+ SetRegisterAscending<uint8_t>(kVs1 + 1, 1);
+ SetRegisterConstant<uint8_t>(kVs1 + 2, 0);
+
+ SetRegisterConstant<uint8_t>(kVs3, 1);
+ SetRegisterConstant<uint8_t>(kVs3 + 1, 0);
+ SetRegisterConstant<uint8_t>(kVs3 + 2, 0);
+
+ ExecuteDwconv();
+ TestAccumulatorAndRegisters([](int i, int32_t value) {
+ EXPECT_EQ((i % 8 == 0 ? 42 : i), value)
+ << "Incorrect value at index " << i;
+ });
+ }
+
+ {
+ ResetDwAccumulator();
+
+ SetRegisterConstant<uint8_t>(kVs1, 0);
+ SetRegisterAscending<uint8_t>(kVs1 + 1, 1);
+ SetRegisterConstant<uint8_t>(kVs1 + 2, 0);
+
+ SetRegisterConstant<uint8_t>(kVs3, 0);
+ SetRegisterConstant<uint8_t>(kVs3 + 1, 1);
+ SetRegisterConstant<uint8_t>(kVs3 + 2, 0);
+
+ ExecuteDwconv();
+ TestAccumulatorAndRegisters([](int i, int32_t value) {
+ EXPECT_EQ(i + 1, value) << "Incorrect value at index " << i;
+ });
+ }
+
+ {
+ ResetDwAccumulator();
+
+ SetRegisterConstant<uint8_t>(kVs1, 0);
+ SetRegisterAscending<uint8_t>(kVs1 + 1, 0);
+ SetRegisterConstant<uint8_t>(kVs1 + 2, 42);
+
+ SetRegisterConstant<uint8_t>(kVs3, 0);
+ SetRegisterConstant<uint8_t>(kVs3 + 1, 0);
+ SetRegisterConstant<uint8_t>(kVs3 + 2, 1);
+
+ ExecuteDwconv();
+ TestAccumulatorAndRegisters([](int i, int32_t value) {
+ EXPECT_EQ((i % 8 == 7 ? 42 : i + 1), value)
+ << "Incorrect value at index " << i;
+ });
+ }
+}
+
+TEST_F(KelvinVectorConvolutionInstructionsTest, VDwconvSparsity2) {
+ constexpr int kVs1 = 0;
+ constexpr int kVs3 = 16;
+
+ kelvin::sim::vdwconv_u8_t dwconv_cmd;
+ memset(&dwconv_cmd, 0, sizeof(dwconv_cmd));
+ dwconv_cmd.regbase = 0;
+ dwconv_cmd.sparsity = 2;
+ uint32_t vdwconv_cmd_value;
+ memcpy(&vdwconv_cmd_value, &dwconv_cmd, sizeof(vdwconv_cmd_value));
+ SetRegisterValues<uint32_t>(
+ {{kelvin::sim::test::kRs2Name, vdwconv_cmd_value}});
+
+ {
+ ResetDwAccumulator();
+
+ SetRegisterAscending<uint8_t>(kVs1, 1);
+ SetRegisterConstant<uint8_t>(kVs1 + 1, 0);
+ SetRegisterConstant<uint8_t>(kVs1 + 2, 0);
+
+ SetRegisterConstant<uint8_t>(kVs3, 1);
+ SetRegisterConstant<uint8_t>(kVs3 + 1, 0);
+ SetRegisterConstant<uint8_t>(kVs3 + 2, 0);
+
+ ExecuteDwconv();
+ TestAccumulatorAndRegisters([](int i, int32_t value) {
+ EXPECT_EQ(i + 1, value) << "Incorrect value at index " << i;
+ });
+ }
+
+ {
+ ResetDwAccumulator();
+
+ SetRegisterAscending<uint8_t>(kVs1, 0);
+ SetRegisterConstant<uint8_t>(kVs1 + 1, 42);
+ SetRegisterConstant<uint8_t>(kVs1 + 2, 0);
+
+ SetRegisterConstant<uint8_t>(kVs3, 0);
+ SetRegisterConstant<uint8_t>(kVs3 + 1, 1);
+ SetRegisterConstant<uint8_t>(kVs3 + 2, 0);
+
+ ExecuteDwconv();
+ TestAccumulatorAndRegisters([](int i, int32_t value) {
+ EXPECT_EQ((i % 8 == 7 ? 42 : i + 1), value)
+ << "Incorrect value at index " << i;
+ });
+ }
+
+ {
+ ResetDwAccumulator();
+
+ SetRegisterAscending<uint8_t>(kVs1, 0);
+ SetRegisterConstant<uint8_t>(kVs1 + 1, 42);
+ SetRegisterConstant<uint8_t>(kVs1 + 2, 0);
+
+ SetRegisterConstant<uint8_t>(kVs3, 0);
+ SetRegisterConstant<uint8_t>(kVs3 + 1, 0);
+ SetRegisterConstant<uint8_t>(kVs3 + 2, 1);
+
+ ExecuteDwconv();
+ TestAccumulatorAndRegisters([](int i, int32_t value) {
+ if (i % 8 >= 6) {
+ EXPECT_EQ(42, value) << "Incorrect value at index " << i;
+ } else {
+ EXPECT_EQ(i + 2, value) << "Incorrect value at index " << i;
+ }
+ });
+ }
+}
+
+TEST_F(KelvinVectorConvolutionInstructionsTest, VDwconvSparsity3) {
+ // Sparsity value of 3 is invalid.
+ kelvin::sim::vdwconv_u8_t dwconv_cmd;
+ memset(&dwconv_cmd, 0, sizeof(dwconv_cmd));
+ dwconv_cmd.regbase = 0;
+ dwconv_cmd.sparsity = 3;
+ uint32_t vdwconv_cmd_value;
+ memcpy(&vdwconv_cmd_value, &dwconv_cmd, sizeof(vdwconv_cmd_value));
+ SetRegisterValues<uint32_t>(
+ {{kelvin::sim::test::kRs2Name, vdwconv_cmd_value}});
+ ExecuteDwconv(/* expect_fail */ true);
+}
+
} // namespace
diff --git a/sim/test/kelvin_vector_memory_instructions_test.cc b/sim/test/kelvin_vector_memory_instructions_test.cc
index 524b3a4..4191c84 100644
--- a/sim/test/kelvin_vector_memory_instructions_test.cc
+++ b/sim/test/kelvin_vector_memory_instructions_test.cc
@@ -616,11 +616,17 @@
auto vref_num = kVs + i;
auto ref_vreg = vreg_[vref_num];
auto ref_span = ref_vreg->data_buffer()->Get<uint8_t>();
+
+ uint8_t *dwacc_span =
+ reinterpret_cast<uint8_t *>(state_->dw_acc_vec(8 * i));
for (int element_index = 0; element_index < ref_span.size() / 4;
element_index++) {
EXPECT_EQ(vreg_span[element_index], ref_span[element_index])
<< absl::StrCat("vreg[", vreg_num, "][", element_index, "] != ref[",
vref_num, "][", element_index, "]");
+ EXPECT_EQ(dwacc_span[element_index], ref_span[element_index])
+ << absl::StrCat("dwacc_span[", vreg_num, "][", element_index,
+ "] != ref[", vref_num, "][", element_index, "]");
}
}
}