Add convolution instruction support
Add the simulation support for `aconv` instruction
PiperOrigin-RevId: 556952596
diff --git a/sim/BUILD b/sim/BUILD
index 8437844..53942c9 100644
--- a/sim/BUILD
+++ b/sim/BUILD
@@ -28,11 +28,13 @@
name = "kelvin_instructions",
srcs = [
"kelvin_instructions.cc",
+ "kelvin_vector_convolution_instructions.cc",
"kelvin_vector_instructions.cc",
"kelvin_vector_memory_instructions.cc",
],
hdrs = [
"kelvin_instructions.h",
+ "kelvin_vector_convolution_instructions.h",
"kelvin_vector_instructions.h",
"kelvin_vector_memory_instructions.h",
],
@@ -74,6 +76,7 @@
includes = [
"kelvin_arith.bin_fmt",
"kelvin_base.bin_fmt",
+ "kelvin_conv.bin_fmt",
"kelvin_format.bin_fmt",
"kelvin_memory.bin_fmt",
"kelvin_mul.bin_fmt",
diff --git a/sim/kelvin.bin_fmt b/sim/kelvin.bin_fmt
index 39703dc..c6e7d98 100644
--- a/sim/kelvin.bin_fmt
+++ b/sim/kelvin.bin_fmt
@@ -7,6 +7,7 @@
}
KelvinInst;
KelvinVectorArithInst;
+ KelvinVectorConvInst;
KelvinVectorMemoryInst;
KelvinVectorMulInst;
KelvinVectorShiftInst;
@@ -15,6 +16,7 @@
#include "sim/kelvin_format.bin_fmt"
#include "sim/kelvin_arith.bin_fmt"
#include "sim/kelvin_base.bin_fmt"
+#include "sim/kelvin_conv.bin_fmt"
#include "sim/kelvin_memory.bin_fmt"
#include "sim/kelvin_mul.bin_fmt"
#include "sim/kelvin_shift.bin_fmt"
diff --git a/sim/kelvin.isa b/sim/kelvin.isa
index db1a680..0b046da 100644
--- a/sim/kelvin.isa
+++ b/sim/kelvin.isa
@@ -19,7 +19,8 @@
// Combining all kelvin instruction sets.
slot kelvin : riscv32i, riscv32m, zicsr, zfencei, privileged, kelvin_arith,
- kelvin_log, kelvin_memory, kelvin_mul, kelvin_shift, kelvin_vector_memory {
+ kelvin_conv, kelvin_log, kelvin_memory, kelvin_mul, kelvin_shift,
+ kelvin_vector_memory {
includes {
#include "sim/kelvin_instructions.h"
}
diff --git a/sim/kelvin_conv.bin_fmt b/sim/kelvin_conv.bin_fmt
new file mode 100644
index 0000000..48b0634
--- /dev/null
+++ b/sim/kelvin_conv.bin_fmt
@@ -0,0 +1,4 @@
+instruction group KelvinVectorConvInst[32] : KelvinV3ArgsType {
+ // vconv
+ aconv_vxv : KelvinV3ArgsType : func3_hi == 0b10, func3_lo == 0b00, vd == 48, vs1_low3 == 0, vs2 != 0, vs3_low3 == 0, m == 0, form == 0b101;
+};
diff --git a/sim/kelvin_encoding.cc b/sim/kelvin_encoding.cc
index 1f1f544..53047f0 100644
--- a/sim/kelvin_encoding.cc
+++ b/sim/kelvin_encoding.cc
@@ -239,15 +239,22 @@
});
source_op_getters_.emplace(
// vst and vstq use `vd` field as the source for the vector store.
+ // convolution instructions also use `vd` as one of the sources.
static_cast<int>(SourceOpEnum::kVd),
[this]() -> SourceOperandInterface * {
auto reg_num = encoding::kelvin_v2_args_type::ExtractVd(inst_word_);
bool strip_mine = encoding::kelvin_v2_args_type::ExtractM(inst_word_);
- if (opcode_ < OpcodeEnum::kVstBLXx || opcode_ > OpcodeEnum::kVstqWSpXxM)
- return nullptr;
return GetVectorRegisterSourceOp<mpact::sim::riscv::RVVectorRegister>(
state_, reg_num, strip_mine, 1 /* widen_factor */);
});
+ source_op_getters_.emplace(
+ // Used by convolution instructions.
+ static_cast<int>(SourceOpEnum::kVs3),
+ [this]() -> SourceOperandInterface * {
+ auto reg_num = encoding::kelvin_v3_args_type::ExtractVs3(inst_word_);
+ return GetVectorRegisterSourceOp<mpact::sim::riscv::RVVectorRegister>(
+ state_, reg_num, false /* strip_mine */, 1 /* widen_factor */);
+ });
source_op_getters_.insert(std::make_pair(
static_cast<int>(SourceOpEnum::kNone), []() { return nullptr; }));
}
@@ -316,6 +323,7 @@
decode_functions;
decode_functions.push_back(encoding::DecodeKelvinInst);
decode_functions.push_back(encoding::DecodeKelvinVectorArithInst);
+ decode_functions.push_back(encoding::DecodeKelvinVectorConvInst);
decode_functions.push_back(encoding::DecodeKelvinVectorMemoryInst);
decode_functions.push_back(encoding::DecodeKelvinVectorMulInst);
decode_functions.push_back(encoding::DecodeKelvinVectorShiftInst);
diff --git a/sim/kelvin_format.bin_fmt b/sim/kelvin_format.bin_fmt
index e84324e..7efefdf 100644
--- a/sim/kelvin_format.bin_fmt
+++ b/sim/kelvin_format.bin_fmt
@@ -117,3 +117,19 @@
unsigned vs1_low2[2] = vs1[1..0];
unsigned vd_low2[2] = vd[1..0];
};
+
+// Kelvin 3 args vector format.
+format KelvinV3ArgsType[32] {
+ fields:
+ unsigned vs3[6];
+ unsigned vs2[6];
+ unsigned vs1[6];
+ unsigned func3_hi[2];
+ unsigned vd[6];
+ unsigned m[1];
+ unsigned func3_lo[2];
+ unsigned form[3]; // .vvv=0b001, .vxv=0b101.
+ overlays:
+ unsigned vs1_low3[3] = vs1[2..0];
+ unsigned vs3_low3[3] = vs3[2..0];
+};
diff --git a/sim/kelvin_mul.isa b/sim/kelvin_mul.isa
index f1456ac..cb2fa73 100644
--- a/sim/kelvin_mul.isa
+++ b/sim/kelvin_mul.isa
@@ -9,7 +9,6 @@
// Mul/Div instructions (func1 011)
slot kelvin_mul {
includes {
- #include "sim/kelvin_instructions.h"
#include "sim/kelvin_vector_instructions.h"
#include "absl/functional/bind_front.h"
}
@@ -525,3 +524,18 @@
semfunc: "absl::bind_front(&KelvinVMadd<int32_t>, /*scalar*/ true, /*strip_mine*/ true)";
}
}
+
+slot kelvin_conv {
+ includes {
+ #include "sim/kelvin_vector_convolution_instructions.h"
+ #include "absl/functional/bind_front.h"
+ }
+ default size = 4;
+ default latency = global_latency;
+ opcodes {
+ // vconv
+ aconv_vxv{: vs1, vs2, vs3 : vd},
+ disasm: "aconv.vxv", "%vd, %vs1, %vs2, %vs3",
+ semfunc: "&KelvinVConv";
+ }
+}
diff --git a/sim/kelvin_state.h b/sim/kelvin_state.h
index ceff045..0cfada0 100644
--- a/sim/kelvin_state.h
+++ b/sim/kelvin_state.h
@@ -45,8 +45,8 @@
void set_vector_length(uint32_t length) { vector_length_ = length; }
uint32_t vector_length() const { return vector_length_; }
- void SetAccRegister(uint32_t *data, int index);
AccArrayType *acc_vec(int index) { return &(acc_register_.at(index)); }
+ AccArrayTemplate<AccArrayType> acc_register() const { return acc_register_; }
void SetLogArgs(std::any data) { log_args_.emplace_back(std::move(data)); }
std::string *clog_string() { return &clog_string_; }
diff --git a/sim/kelvin_vector_convolution_instructions.cc b/sim/kelvin_vector_convolution_instructions.cc
new file mode 100644
index 0000000..2f7fbf1
--- /dev/null
+++ b/sim/kelvin_vector_convolution_instructions.cc
@@ -0,0 +1,111 @@
+#include "sim/kelvin_vector_convolution_instructions.h"
+
+#include <array>
+#include <cstdint>
+#include <cstring>
+
+#include "sim/kelvin_state.h"
+#include "absl/types/span.h"
+#include "riscv/riscv_register.h"
+#include "riscv/riscv_state.h"
+#include "mpact/sim/generic/data_buffer.h"
+#include "mpact/sim/generic/instruction.h"
+
+namespace kelvin::sim {
+
+using mpact::sim::generic::GetInstructionSource;
+using mpact::sim::riscv::RV32VectorSourceOperand;
+
+// Implement the 3-arg vector convolution (im2col + matmul)
+// vs1 (narrow) represents the starting register of 8 vector registers
+// vs3 (wide) is the starting register of group of up-to 8 vector
+// registers. xs2 stores the convolution command.
+// `vd` is not used in the op.
+void KelvinVConv(Instruction *inst) {
+ auto state = static_cast<KelvinState *>(inst->state());
+ constexpr int kVectorLenInByte = kVectorLengthInBits / 8;
+ constexpr int kVectorLenInWord = kVectorLenInByte / sizeof(uint32_t);
+
+ vconv_cmd_t conv_cmd;
+ auto reg_data = GetInstructionSource<uint32_t>(inst, 1, 0);
+ memcpy(&conv_cmd, ®_data, sizeof(conv_cmd));
+
+ // Exam the content of the cmd.
+ if (conv_cmd.mode != 0) { // only supports 8-bit mode
+ state->Trap(/*is_interrupt=*/false, /*trap_value=*/0,
+ *mpact::sim::riscv::ExceptionCode::kIllegalInstruction,
+ /*epc=*/inst->address(), inst);
+ return;
+ }
+ if (conv_cmd.start > conv_cmd.stop) {
+ state->Trap(/*is_interrupt=*/false, /*trap_value=*/0,
+ *mpact::sim::riscv::ExceptionCode::kIllegalInstruction,
+ /*epc=*/inst->address(), inst);
+ return;
+ }
+ if (conv_cmd.start >= kVectorLenInWord || conv_cmd.stop >= kVectorLenInWord) {
+ state->Trap(/*is_interrupt=*/false, /*trap_value=*/0,
+ *mpact::sim::riscv::ExceptionCode::kIllegalInstruction,
+ /*epc=*/inst->address(), inst);
+ return;
+ }
+
+ // Read the narrow source.
+ auto vs1 = static_cast<RV32VectorSourceOperand *>(inst->Source(0));
+ auto vs3 = static_cast<RV32VectorSourceOperand *>(inst->Source(2));
+ AccArrayTemplate<std::array<uint8_t, kVectorLenInByte>> vec_narrow;
+ for (int vec_idx = 0; vec_idx < vec_narrow.size(); ++vec_idx) {
+ auto source_span = vs1->GetRegister(vec_idx)->data_buffer()->Get<uint8_t>();
+ for (int j = 0; j < vec_narrow[vec_idx].size(); ++j) {
+ vec_narrow[vec_idx][j] = source_span[j];
+ }
+ }
+
+ // Prepare the accumulator.
+ auto accumulator = state->acc_register();
+
+ // Convert the biases to 9-bit signed values.
+ int32_t sbias1 = (static_cast<int32_t>(conv_cmd.sbias1) << 23) >> 23;
+ int32_t sbias2 = (static_cast<int32_t>(conv_cmd.sbias2) << 23) >> 23;
+
+ // Multiply-Accumulate of conv(8x32xi8, 8x32xi8) -> 8x8xi32.
+ // Internally they are broken into 4 groups to for accumulation to handle the
+ // double-widening data without extra interleaving steps. Also, the operation
+ // has both im2col and matmul in one shot (image data in `vs1`, filter/kernel
+ // in `vs3`), so for the typical matmul, the input re-shuffling is required.
+ //
+ // Note the output of this op CANNOT be used directly, because it is still
+ // in the double-widening format. It is expected to be followed by some
+ // double-reduction instructions to read the 8-bit data back in order.
+ constexpr int kInterleave[] = {0, 2, 1, 3}; // (ee, oe, eo, oo)
+ constexpr int kQuadBase = 4; // For double-widening.
+ constexpr int kQuadMask = kQuadBase - 1;
+ for (int k = conv_cmd.start; k <= conv_cmd.stop; ++k) {
+ auto wide_source_span =
+ vs3->GetRegister(k - conv_cmd.start)->data_buffer()->Get<uint8_t>();
+ for (int i = 0; i < vec_narrow.size(); ++i) {
+ for (int j = 0; j < wide_source_span.size(); ++j) {
+ // data1 (narrow) is transposed and broadcasted.
+ uint8_t n = vec_narrow[i][kQuadBase * k + (j & kQuadMask)];
+ int32_t sdata1 = conv_cmd.sdata1 ? static_cast<int8_t>(n) : n;
+ uint8_t w = wide_source_span[j];
+ int32_t sdata2 = conv_cmd.sdata2 ? static_cast<int8_t>(w) : w;
+ const int rbase = i & ~kQuadMask;
+ const int rquad = i & kQuadMask;
+ const int word = j / kQuadBase;
+ const int idx_i = rbase + kInterleave[word & kQuadMask];
+ const int idx_j =
+ rquad * (accumulator.size() / kQuadBase) + (word / kQuadBase);
+ accumulator[idx_i][idx_j] += (sdata1 + sbias1) * (sdata2 + sbias2);
+ }
+ }
+ }
+
+ // Write the results back to the accumulation register
+ for (int i = 0; i < state->acc_register().size(); ++i) {
+ auto acc_array = state->acc_vec(i);
+ *acc_array = accumulator[i];
+ }
+}
+
+} // namespace kelvin::sim
diff --git a/sim/kelvin_vector_convolution_instructions.h b/sim/kelvin_vector_convolution_instructions.h
new file mode 100644
index 0000000..6d08f98
--- /dev/null
+++ b/sim/kelvin_vector_convolution_instructions.h
@@ -0,0 +1,27 @@
+#ifndef SIM_KELVIN_VECTOR_CONVOLUTION_INSTRUCTIONS_H_
+#define SIM_KELVIN_VECTOR_CONVOLUTION_INSTRUCTIONS_H_
+
+#include <cstdint>
+
+#include "mpact/sim/generic/instruction.h"
+
+namespace kelvin::sim {
+
+using mpact::sim::generic::Instruction;
+
+// Command structure for the convolution instruction.
+typedef struct KelvinVConvCmd {
+ uint32_t mode : 2; // 31:30
+ uint32_t start : 5; // 29:25
+ uint32_t stop : 5; // 24:20
+ uint32_t sbias1 : 9; // 19:11
+ uint32_t sdata1 : 1; // 10
+ uint32_t sbias2 : 9; // 9:1
+ uint32_t sdata2 : 1; // 0
+} vconv_cmd_t;
+
+void KelvinVConv(Instruction *inst);
+
+} // namespace kelvin::sim
+
+#endif // SIM_KELVIN_VECTOR_CONVOLUTION_INSTRUCTIONS_H_
diff --git a/sim/test/BUILD b/sim/test/BUILD
index 41d756f..73a0b56 100644
--- a/sim/test/BUILD
+++ b/sim/test/BUILD
@@ -111,6 +111,27 @@
)
cc_test(
+ name = "kelvin_vector_convolution_instructions_test",
+ srcs = [
+ "kelvin_vector_convolution_instructions_test.cc",
+ "testfiles/kelvin_vector_convolution_testdata.h",
+ ],
+ copts = [
+ "-Werror",
+ "-Wvla-extension",
+ ],
+ deps = [
+ ":kelvin_vector_instructions_test_base",
+ "//sim:kelvin_instructions",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/types:span",
+ "@com_google_googletest//:gtest_main",
+ "@com_google_mpact-riscv//riscv:riscv_state",
+ "@com_google_mpact-sim//mpact/sim/generic:instruction",
+ ],
+)
+
+cc_test(
name = "kelvin_vector_memory_instructions_test",
srcs = ["kelvin_vector_memory_instructions_test.cc"],
copts = [
diff --git a/sim/test/kelvin_vector_convolution_instructions_test.cc b/sim/test/kelvin_vector_convolution_instructions_test.cc
new file mode 100644
index 0000000..d02aef2
--- /dev/null
+++ b/sim/test/kelvin_vector_convolution_instructions_test.cc
@@ -0,0 +1,164 @@
+#include "sim/kelvin_vector_convolution_instructions.h"
+
+#include <array>
+#include <cstdint>
+#include <cstring>
+#include <functional>
+#include <vector>
+
+#include "sim/test/kelvin_vector_instructions_test_base.h"
+#include "sim/test/testfiles/kelvin_vector_convolution_testdata.h"
+#include "googletest/include/gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "absl/types/span.h"
+#include "riscv/riscv_state.h"
+#include "mpact/sim/generic/instruction.h"
+
+namespace {
+
+using mpact::sim::generic::Instruction;
+
+// Semantic functions.
+using kelvin::sim::KelvinVConv;
+
+class KelvinVectorConvolutionInstructionsTest
+ : public kelvin::sim::test::KelvinVectorInstructionsTestBase {
+ protected:
+ void ConvolutionTestHelper(const kelvin::sim::vconv_cmd_t vconv_cmd,
+ bool expect_fail = false) {
+ constexpr int kVs1 = 0;
+ constexpr int kVs3 = 16;
+ constexpr int kVd = 48;
+ const uint32_t kVLenInByte = state_->vector_length() / 8;
+ const uint32_t kVLenInWord = state_->vector_length() / 32;
+ // Set vs1 and vs3
+ std::vector<uint8_t> vs1_value(kVLenInWord * kVLenInByte);
+ auto vs1_span = absl::Span<uint8_t>(vs1_value);
+ memcpy(vs1_span.data(), kVConvIn1, sizeof(kVConvIn1));
+ std::vector<uint8_t> vs3_value(kVLenInWord * kVLenInByte);
+ auto vs3_span = absl::Span<uint8_t>(vs3_value);
+ memcpy(vs3_span.data(), kVConvIn2, sizeof(kVConvIn2));
+ for (int i = 0; i < kVLenInWord; ++i) {
+ auto vs1_name = absl::StrCat("v", kVs1 + i);
+ auto vs3_name = absl::StrCat("v", kVs3 + i);
+ SetVectorRegisterValues<uint8_t>(
+ {{vs1_name, vs1_span.subspan(i * kVLenInByte, kVLenInByte)},
+ {vs3_name, vs3_span.subspan(i * kVLenInByte, kVLenInByte)}});
+ }
+ uint32_t vconv_cmd_value;
+ memcpy(&vconv_cmd_value, &vconv_cmd, sizeof(vconv_cmd_value));
+ SetRegisterValues<uint32_t>({{kelvin::sim::test::kRs2Name,
+ static_cast<uint32_t>(vconv_cmd_value)}});
+
+ // Reset accumulation register
+ for (int i = 0; i < kVLenInWord; ++i) {
+ auto acc_vec = state_->acc_vec(i);
+ acc_vec->fill(0);
+ }
+
+ // Call VConv twice with the swapped vs1 and vs3
+ std::array<InstructionPtr, 2> instructions = {CreateInstruction(),
+ CreateInstruction()};
+ instructions[0]->set_semantic_function(KelvinVConv);
+ AppendVectorRegisterOperands(instructions[0].get(), kVLenInWord,
+ 1 /* src1_widen_factor*/, kVs1, {},
+ false /* widen_dst*/, {kVd});
+ AppendRegisterOperands(instructions[0].get(), {kelvin::sim::test::kRs2Name},
+ {});
+ AppendVectorRegisterOperands(instructions[0].get(), kVLenInWord,
+ 1 /* src3_widen_factor*/, kVs3, {},
+ false /* widen_dst*/, {});
+
+ instructions[1]->set_semantic_function(KelvinVConv);
+ AppendVectorRegisterOperands(instructions[1].get(), kVLenInWord,
+ 1 /* src1_widen_factor*/, kVs3, {},
+ false /* widen_dst*/, {kVd});
+ AppendRegisterOperands(instructions[1].get(), {kelvin::sim::test::kRs2Name},
+ {});
+ AppendVectorRegisterOperands(instructions[1].get(), kVLenInWord,
+ 1 /* src3_widen_factor*/, kVs1, {},
+ false /* widen_dst*/, {});
+ execution_fail_ = false;
+ state_->set_on_trap(trap_call_back_);
+ instructions[0]->Execute();
+ if (expect_fail) {
+ EXPECT_TRUE(execution_fail_);
+ return;
+ }
+ instructions[1]->Execute();
+ EXPECT_FALSE(execution_fail_);
+ auto result_acc = state_->acc_register();
+ for (int i = 0; i < result_acc.size(); ++i) {
+ for (int j = 0; j < result_acc[i].size(); ++j) {
+ EXPECT_EQ(result_acc[i][j], kVConvOutRef[i][j])
+ << absl::StrCat("acc[", i, "][", j, "] != Ref[", i, "][", j, "]");
+ }
+ }
+ }
+
+ private:
+ bool execution_fail_;
+ std::function<bool(bool, uint64_t, uint64_t, uint64_t, const Instruction *)>
+ trap_call_back_ = [this](bool is_interrupt, uint64_t trap_value,
+ uint64_t exception_code, uint64_t epc,
+ const Instruction *instruction) {
+ auto code =
+ static_cast<mpact::sim::riscv::ExceptionCode>(exception_code);
+ if (code == mpact::sim::riscv::ExceptionCode::kIllegalInstruction) {
+ this->execution_fail_ = true;
+ return true;
+ }
+ return false;
+ };
+};
+
+TEST_F(KelvinVectorConvolutionInstructionsTest, VConv) {
+ // Set the convolution to have 8 filters (starting from index 0), with the
+ // data bias of 86 (unsigned) and the filter bias of 188 (signed).
+ kelvin::sim::vconv_cmd_t vconv_cmd{.mode = 0,
+ .start = 0,
+ .stop = 7,
+ .sbias1 = 86,
+ .sdata1 = false,
+ .sbias2 = 188,
+ .sdata2 = true};
+ ConvolutionTestHelper(vconv_cmd);
+}
+
+TEST_F(KelvinVectorConvolutionInstructionsTest, VConvWrongMode) {
+ // Set the convolution to work on 16-bit input/filter (illegal setting).
+ kelvin::sim::vconv_cmd_t vconv_cmd{.mode = 1,
+ .start = 0,
+ .stop = 7,
+ .sbias1 = 86,
+ .sdata1 = false,
+ .sbias2 = 188,
+ .sdata2 = true};
+ ConvolutionTestHelper(vconv_cmd, true);
+}
+
+TEST_F(KelvinVectorConvolutionInstructionsTest, VConvTooLargeStop) {
+ // Set the convolution to work on 9 filters (too many filters).
+ kelvin::sim::vconv_cmd_t vconv_cmd{.mode = 0,
+ .start = 0,
+ .stop = 8,
+ .sbias1 = 86,
+ .sdata1 = false,
+ .sbias2 = 188,
+ .sdata2 = true};
+ ConvolutionTestHelper(vconv_cmd, true);
+}
+
+TEST_F(KelvinVectorConvolutionInstructionsTest, VConvWrongStop) {
+ // Set the convolution to start from filter 7 and to stop at filter 5 (reverse
+ // order).
+ kelvin::sim::vconv_cmd_t vconv_cmd{.mode = 0,
+ .start = 7,
+ .stop = 5,
+ .sbias1 = 86,
+ .sdata1 = false,
+ .sbias2 = 188,
+ .sdata2 = true};
+ ConvolutionTestHelper(vconv_cmd, true);
+}
+} // namespace
diff --git a/sim/test/testfiles/kelvin_vector_convolution_testdata.h b/sim/test/testfiles/kelvin_vector_convolution_testdata.h
new file mode 100644
index 0000000..0cb2779
--- /dev/null
+++ b/sim/test/testfiles/kelvin_vector_convolution_testdata.h
@@ -0,0 +1,146 @@
+// Test input/reference vectors for the vector convolution instruction
+// functions.
+#ifndef SIM_TEST_TESTFILES_KELVIN_VECTOR_CONVOLUTION_TESTDATA_H_
+#define SIM_TEST_TESTFILES_KELVIN_VECTOR_CONVOLUTION_TESTDATA_H_
+
+#include <cstdint>
+
+// clang-format off
+constexpr uint8_t kVConvIn1[8][32] = {
+ {
+ 0xea, 0xe6, 0xaa, 0xb0, 0x5e, 0x46, 0x43, 0x2c,
+ 0x58, 0x69, 0xd4, 0x25, 0xf0, 0xe9, 0x74, 0xd5,
+ 0x34, 0x16, 0x4c, 0x86, 0xa8, 0x0c, 0xac, 0xa8,
+ 0x9f, 0x99, 0x5d, 0xe0, 0x1a, 0x93, 0x65, 0x88,
+ },
+ {
+ 0x4a, 0x3a, 0x57, 0x8b, 0x50, 0x6c, 0x1b, 0x37,
+ 0x76, 0x8c, 0x72, 0x55, 0xb3, 0xce, 0xf3, 0x50,
+ 0x74, 0x51, 0xb2, 0xb9, 0xb9, 0x76, 0xc4, 0x94,
+ 0x29, 0x52, 0x16, 0xa8, 0x68, 0xa7, 0x8e, 0xe2,
+ },
+ {
+ 0x61, 0xe8, 0xb8, 0xae, 0x43, 0x0a, 0xbe, 0xfa,
+ 0x0b, 0x32, 0x7a, 0x92, 0x44, 0x3a, 0x60, 0xd3,
+ 0x11, 0xc0, 0xb3, 0x8c, 0x7e, 0x5b, 0x9c, 0xe9,
+ 0x03, 0x94, 0x7f, 0x10, 0x38, 0xd5, 0xc6, 0xeb,
+ },
+ {
+ 0x5c, 0x79, 0x44, 0xe9, 0xfb, 0x32, 0x00, 0xf0,
+ 0x79, 0xed, 0x92, 0x77, 0x93, 0x45, 0xc3, 0x63,
+ 0x33, 0x7e, 0x07, 0xc5, 0x07, 0x76, 0x9d, 0xf0,
+ 0x4e, 0x6a, 0x67, 0xcc, 0xca, 0xab, 0xd4, 0x24,
+ },
+ {
+ 0x82, 0x86, 0xbb, 0xd9, 0x4b, 0xda, 0x2e, 0xbb,
+ 0x8d, 0xbb, 0x82, 0x0e, 0x53, 0xaa, 0xb8, 0xfa,
+ 0xa1, 0x75, 0x6e, 0x4a, 0x3c, 0xba, 0xa1, 0x88,
+ 0x88, 0x6c, 0x74, 0x5b, 0x09, 0x83, 0x98, 0xdc,
+ },
+ {
+ 0x91, 0x9a, 0x9f, 0xcd, 0x87, 0xd7, 0x74, 0x90,
+ 0xa5, 0x25, 0xcf, 0x56, 0x5d, 0x41, 0x56, 0xc2,
+ 0x0d, 0xdd, 0xd8, 0x2c, 0x59, 0x1d, 0x1c, 0x66,
+ 0x06, 0xe9, 0xd3, 0x51, 0x83, 0x16, 0x65, 0x56,
+ },
+ {
+ 0xf6, 0x95, 0x7c, 0x47, 0xf5, 0x56, 0x53, 0x58,
+ 0x87, 0x9c, 0xde, 0xac, 0x76, 0x78, 0x71, 0x86,
+ 0x5c, 0xdb, 0x5f, 0x0d, 0xc4, 0x5f, 0x48, 0x61,
+ 0x48, 0x6f, 0x77, 0x26, 0x68, 0xf1, 0x39, 0x58,
+ },
+ {
+ 0x32, 0x36, 0x68, 0x29, 0x67, 0xb8, 0x7c, 0xdd,
+ 0xb9, 0x17, 0xb0, 0xec, 0x2e, 0xcc, 0xa5, 0x54,
+ 0x76, 0x56, 0xc8, 0x0b, 0x77, 0xa1, 0xef, 0xf5,
+ 0xcf, 0xd6, 0x84, 0x7a, 0x84, 0x0f, 0x4e, 0x16,
+ },
+};
+
+constexpr uint8_t kVConvIn2[8][32] = {
+ {
+ 0x33, 0xc9, 0xb8, 0xa1, 0xea, 0x2d, 0x2c, 0x18,
+ 0x92, 0x98, 0x8e, 0x19, 0xf2, 0xd8, 0x55, 0x92,
+ 0x26, 0x4a, 0x8d, 0x3e, 0xb0, 0x01, 0x81, 0x6c,
+ 0x3e, 0xcc, 0x8f, 0x9b, 0xf9, 0xde, 0x94, 0x9e,
+ },
+ {
+ 0x7f, 0xa9, 0xa8, 0x3a, 0xa3, 0xef, 0xb7, 0xb5,
+ 0x44, 0x93, 0xa1, 0xf7, 0x09, 0x7b, 0xb6, 0x6f,
+ 0x98, 0x9e, 0xaa, 0x60, 0xb4, 0xe2, 0x9b, 0x93,
+ 0x4b, 0x8f, 0xa7, 0xe4, 0x96, 0xe6, 0xcd, 0x93,
+ },
+ {
+ 0x63, 0xfd, 0x8e, 0xd2, 0xfd, 0xe0, 0x13, 0x5b,
+ 0xd7, 0x5f, 0xa0, 0x56, 0x02, 0x29, 0x4e, 0xfa,
+ 0x9b, 0x30, 0xa5, 0xdb, 0xb4, 0xe7, 0xb9, 0x52,
+ 0x05, 0xda, 0x57, 0xa8, 0xbd, 0x65, 0xfe, 0xf0,
+ },
+ {
+ 0x1c, 0x22, 0x48, 0x3f, 0x5c, 0xae, 0x08, 0x8c,
+ 0x40, 0xd4, 0x97, 0xeb, 0xb1, 0x92, 0x50, 0xd4,
+ 0x66, 0xac, 0x58, 0x20, 0x3c, 0x92, 0xc0, 0x5c,
+ 0x50, 0x89, 0x42, 0x93, 0x7b, 0x8b, 0x0a, 0x33,
+ },
+ {
+ 0x3e, 0x98, 0x0c, 0x1a, 0xcd, 0x6c, 0xd5, 0x26,
+ 0xad, 0x94, 0xd0, 0x6a, 0xbd, 0x19, 0x02, 0x42,
+ 0xc1, 0x69, 0x20, 0x94, 0xc2, 0x74, 0xb7, 0xbf,
+ 0x9f, 0x45, 0xd5, 0x6f, 0x22, 0x92, 0xbe, 0x88,
+ },
+ {
+ 0x4c, 0xbc, 0xed, 0x2f, 0x9e, 0xe5, 0x27, 0xf9,
+ 0x0e, 0xd3, 0xb5, 0x74, 0x83, 0x5f, 0x5a, 0xa2,
+ 0xaf, 0x0f, 0x49, 0x0a, 0xe3, 0x1b, 0xc7, 0xd1,
+ 0xd1, 0x51, 0xa9, 0x86, 0x75, 0xb3, 0xbc, 0xe9,
+ },
+ {
+ 0x9f, 0x2a, 0xe9, 0xfc, 0x4f, 0x10, 0x57, 0x7d,
+ 0x14, 0xec, 0x5f, 0x39, 0xa3, 0x0e, 0x92, 0x62,
+ 0x86, 0xad, 0xae, 0x38, 0x95, 0x7f, 0xde, 0x30,
+ 0x9d, 0xbf, 0xe5, 0xb2, 0xb3, 0xb6, 0xf1, 0x42,
+ },
+ {
+ 0x0c, 0xd7, 0xd4, 0xa8, 0xae, 0x1a, 0xe6, 0x3c,
+ 0xf0, 0xd7, 0x4f, 0x36, 0x37, 0xeb, 0x1d, 0xe9,
+ 0xc7, 0xe4, 0x82, 0xe4, 0x44, 0x1c, 0x6a, 0xa1,
+ 0x96, 0xfe, 0x2e, 0x9d, 0x36, 0xb4, 0x1c, 0x03,
+ },
+};
+
+constexpr uint32_t kVConvOutRef[8][8] = {
+ {
+ 0x002706b4, 0x00284ddd, 0x00279af7, 0x00293b6a,
+ 0x00265a51, 0x0028f490, 0x0024222a, 0x0025a738,
+ },
+ {
+ 0x00260af1, 0x002687fd, 0x0026d33e, 0x0028700b,
+ 0x002701d0, 0x00273546, 0x0025002d, 0x00254eaa,
+ },
+ {
+ 0x002b2935, 0x00262aea, 0x002ca80a, 0x00275a99,
+ 0x002cc3e7, 0x0027e665, 0x00288ed7, 0x00254319,
+ },
+ {
+ 0x0028690a, 0x0023f3b1, 0x002aed22, 0x0025a60b,
+ 0x002ad4e5, 0x0024a4c3, 0x00283b5b, 0x0021ce1e,
+ },
+ {
+ 0x0025e1a3, 0x002700e9, 0x00255faa, 0x00270fbc,
+ 0x00257a3b, 0x0026dd70, 0x0025c9d7, 0x0026fcc3,
+ },
+ {
+ 0x0025a025, 0x00258f27, 0x0024d04f, 0x00261465,
+ 0x00244b49, 0x00269500, 0x0025ddb5, 0x0025ca26,
+ },
+ {
+ 0x00299efe, 0x0025a702, 0x002a766b, 0x00269ec7,
+ 0x002acd15, 0x0025366f, 0x002a4de2, 0x002568bf,
+ },
+ {
+ 0x0029429d, 0x0023c5ba, 0x002a03f7, 0x00240475,
+ 0x0028fac3, 0x0023f507, 0x0029f7e5, 0x0023008b,
+ },
+};
+// clang-format on
+#endif // SIM_TEST_TESTFILES_KELVIN_VECTOR_CONVOLUTION_TESTDATA_H_