Add depthwise convolution init instruction
`adwinit` for depthwise convolution register preparation.
PiperOrigin-RevId: 556853240
diff --git a/sim/kelvin_format.bin_fmt b/sim/kelvin_format.bin_fmt
index 812187c..e84324e 100644
--- a/sim/kelvin_format.bin_fmt
+++ b/sim/kelvin_format.bin_fmt
@@ -114,4 +114,6 @@
unsigned form[2]; // .vv==0b00, .vx==0b10, .xx==0b11
overlays: // For accumulation register support.
unsigned vs1_low3[3] = vs1[2..0];
+ unsigned vs1_low2[2] = vs1[1..0];
+ unsigned vd_low2[2] = vd[1..0];
};
diff --git a/sim/kelvin_memory.bin_fmt b/sim/kelvin_memory.bin_fmt
index 0776b69..37b8154 100644
--- a/sim/kelvin_memory.bin_fmt
+++ b/sim/kelvin_memory.bin_fmt
@@ -116,7 +116,8 @@
// vcget
vcget : KelvinV2ArgsType : func2 == 0b01'0100, vs2 == 0, vs1 == 0, vd == 48, func1 == 0b111, form == 0b11;
- // acset / actr
+ // acset / actr / adwinit
acset : KelvinV2ArgsType : func2 == 0b01'0000, vs2 == 0, m == 0, vd == 48;
actr : KelvinV2ArgsType : func2 == 0b01'0001, vs2 == 0, vs1_low3 == 0, m == 0, vd == 48;
+ adwinit : KelvinV2ArgsType : func2 == 0b01'0010, vs2 == 0, vs1_low2 == 0, sz == 0b00, m == 0, vd_low2 == 0;
};
diff --git a/sim/kelvin_memory.isa b/sim/kelvin_memory.isa
index 51e6119..88a380e 100644
--- a/sim/kelvin_memory.isa
+++ b/sim/kelvin_memory.isa
@@ -487,12 +487,15 @@
disasm: "vcget", "%vd",
semfunc: "absl::bind_front(&KelvinVcGet)";
- // acset/actr
+ // acset/actr/adwinit
acset{: vs1 : vd},
disasm: "acset.v", "%vd, %vs1",
semfunc: "absl::bind_front(&KelvinAcSet, /* is_transpose */ false)";
actr{: vs1 : vd},
disasm: "actr.v", "%vd, %vs1",
semfunc: "absl::bind_front(&KelvinAcSet, /* is_transpose */ true)";
+ adwinit{: vs1 : vd},
+ disasm: "adwinit.v", "%vd, %vs1",
+ semfunc: "&KelvinADwInit";
}
}
diff --git a/sim/kelvin_vector_memory_instructions.cc b/sim/kelvin_vector_memory_instructions.cc
index 9385359..19f3d42 100644
--- a/sim/kelvin_vector_memory_instructions.cc
+++ b/sim/kelvin_vector_memory_instructions.cc
@@ -318,4 +318,26 @@
}
}
+// Copy the content from the source `vs1` banks to the `vd` banks to prepare the
+// depthwise convolution. Due to compiler encoding, this op is typeless and only
+// assumes `vs1` and `vd` content in 8-bit type.
+void KelvinADwInit(const mpact::sim::generic::Instruction *inst) {
+ auto *state = static_cast<KelvinState *>(inst->state());
+ // Only set a quarter of the to prepare for double-widening in depth-wise
+ // convolution.
+ const uint32_t init_n = state->vector_length() / (8 * 4);
+ constexpr int kInitSize = 4;
+ auto vs = static_cast<RV32VectorSourceOperand *>(inst->Source(0));
+ auto vd = static_cast<RV32VectorDestinationOperand *>(inst->Destination(0));
+ for (int op_index = 0; op_index < kInitSize; ++op_index) {
+ auto source_span = vs->GetRegister(op_index)->data_buffer()->Get<uint8_t>();
+ DataBuffer *dest_db = vd->AllocateDataBuffer(op_index);
+ absl::Span<uint8_t> dest_span = dest_db->Get<uint8_t>();
+ for (int i = 0; i < init_n; ++i) {
+ dest_span[i] = source_span[i];
+ }
+ dest_db->Submit();
+ }
+}
+
} // namespace kelvin::sim
diff --git a/sim/kelvin_vector_memory_instructions.h b/sim/kelvin_vector_memory_instructions.h
index 9b141aa..ff53da4 100644
--- a/sim/kelvin_vector_memory_instructions.h
+++ b/sim/kelvin_vector_memory_instructions.h
@@ -30,6 +30,8 @@
void KelvinAcSet(bool is_transpose,
const mpact::sim::generic::Instruction *inst);
+void KelvinADwInit(const mpact::sim::generic::Instruction *inst);
+
} // namespace kelvin::sim
#endif // SIM_KELVIN_VECTOR_MEMORY_INSTRUCTIONS_H_
diff --git a/sim/test/kelvin_vector_memory_instructions_test.cc b/sim/test/kelvin_vector_memory_instructions_test.cc
index 53d7a89..3445d6f 100644
--- a/sim/test/kelvin_vector_memory_instructions_test.cc
+++ b/sim/test/kelvin_vector_memory_instructions_test.cc
@@ -24,6 +24,7 @@
// Semantic functions.
using kelvin::sim::KelvinAcSet;
+using kelvin::sim::KelvinADwInit;
using kelvin::sim::KelvinGetVl;
using kelvin::sim::KelvinVcGet;
using kelvin::sim::KelvinVLd;
@@ -453,7 +454,7 @@
}
}
}
- void AcSetTestHelper(bool is_transpose, bool expected_fail = false) {
+ void AcSetTestHelper(bool is_transpose) {
constexpr int kVd = 48;
constexpr int kVs = 16;
const uint32_t kVLenInWord = state_->vector_length() / 32;
@@ -509,4 +510,46 @@
AcSetTestHelper(/*is_transpose=*/true);
}
+TEST_F(KelvinAccumulateInstructionTest, ADwInit) {
+ constexpr int kVd = 16;
+ constexpr int kVs = 32;
+ const uint32_t kVLenInByte = state_->vector_length() / 8;
+ constexpr int kInitLength = 4;
+ // Set vs and vd with random values.
+ std::vector<uint8_t> vs_value(kVLenInByte * kInitLength);
+ auto vs_span = absl::Span<uint8_t>(vs_value);
+ FillArrayWithRandomValues<uint8_t>(vs_span);
+ std::vector<uint8_t> vd_value(kVLenInByte * kInitLength);
+ auto vd_span = absl::Span<uint8_t>(vd_value);
+ FillArrayWithRandomValues<uint8_t>(vd_span);
+ for (int i = 0; i < kInitLength; ++i) {
+ auto vd_name = absl::StrCat("v", kVd + i);
+ auto vs_name = absl::StrCat("v", kVs + i);
+ SetVectorRegisterValues<uint8_t>(
+ {{vs_name, vs_span.subspan(kVLenInByte * i, kVLenInByte)},
+ {vd_name, vd_span.subspan(kVLenInByte * i, kVLenInByte)}});
+ }
+ auto instruction = CreateInstruction();
+ AppendVectorRegisterOperands(instruction.get(), kVLenInByte,
+ 1 /* src1_widen_factor */, kVs, {},
+ false /* widen_dst */, {kVd});
+ instruction->set_semantic_function(&KelvinADwInit);
+ instruction->Execute();
+ // Resulting `vd` should match `vs` in the first quarter of each vector
+ for (int i = 0; i < kInitLength; ++i) {
+ auto vreg_num = kVd + i;
+ auto test_vreg = vreg_[vreg_num];
+ auto vreg_span = test_vreg->data_buffer()->Get<uint8_t>();
+ auto vref_num = kVs + i;
+ auto ref_vreg = vreg_[vref_num];
+ auto ref_span = ref_vreg->data_buffer()->Get<uint8_t>();
+ for (int element_index = 0; element_index < ref_span.size() / 4;
+ element_index++) {
+ EXPECT_EQ(vreg_span[element_index], ref_span[element_index])
+ << absl::StrCat("vreg[", vreg_num, "][", element_index, "] != ref[",
+ vref_num, "][", element_index, "]");
+ }
+ }
+}
+
} // namespace