Add convolution instruction support Add the simulation support for `aconv` instruction PiperOrigin-RevId: 556952596
diff --git a/sim/BUILD b/sim/BUILD index 8437844..53942c9 100644 --- a/sim/BUILD +++ b/sim/BUILD
@@ -28,11 +28,13 @@ name = "kelvin_instructions", srcs = [ "kelvin_instructions.cc", + "kelvin_vector_convolution_instructions.cc", "kelvin_vector_instructions.cc", "kelvin_vector_memory_instructions.cc", ], hdrs = [ "kelvin_instructions.h", + "kelvin_vector_convolution_instructions.h", "kelvin_vector_instructions.h", "kelvin_vector_memory_instructions.h", ], @@ -74,6 +76,7 @@ includes = [ "kelvin_arith.bin_fmt", "kelvin_base.bin_fmt", + "kelvin_conv.bin_fmt", "kelvin_format.bin_fmt", "kelvin_memory.bin_fmt", "kelvin_mul.bin_fmt",
diff --git a/sim/kelvin.bin_fmt b/sim/kelvin.bin_fmt index 39703dc..c6e7d98 100644 --- a/sim/kelvin.bin_fmt +++ b/sim/kelvin.bin_fmt
@@ -7,6 +7,7 @@ } KelvinInst; KelvinVectorArithInst; + KelvinVectorConvInst; KelvinVectorMemoryInst; KelvinVectorMulInst; KelvinVectorShiftInst; @@ -15,6 +16,7 @@ #include "sim/kelvin_format.bin_fmt" #include "sim/kelvin_arith.bin_fmt" #include "sim/kelvin_base.bin_fmt" +#include "sim/kelvin_conv.bin_fmt" #include "sim/kelvin_memory.bin_fmt" #include "sim/kelvin_mul.bin_fmt" #include "sim/kelvin_shift.bin_fmt"
diff --git a/sim/kelvin.isa b/sim/kelvin.isa index db1a680..0b046da 100644 --- a/sim/kelvin.isa +++ b/sim/kelvin.isa
@@ -19,7 +19,8 @@ // Combining all kelvin instruction sets. slot kelvin : riscv32i, riscv32m, zicsr, zfencei, privileged, kelvin_arith, - kelvin_log, kelvin_memory, kelvin_mul, kelvin_shift, kelvin_vector_memory { + kelvin_conv, kelvin_log, kelvin_memory, kelvin_mul, kelvin_shift, + kelvin_vector_memory { includes { #include "sim/kelvin_instructions.h" }
diff --git a/sim/kelvin_conv.bin_fmt b/sim/kelvin_conv.bin_fmt new file mode 100644 index 0000000..48b0634 --- /dev/null +++ b/sim/kelvin_conv.bin_fmt
@@ -0,0 +1,4 @@ +instruction group KelvinVectorConvInst[32] : KelvinV3ArgsType { + // vconv + aconv_vxv : KelvinV3ArgsType : func3_hi == 0b10, func3_lo == 0b00, vd == 48, vs1_low3 == 0, vs2 != 0, vs3_low3 == 0, m == 0, form == 0b101; +};
diff --git a/sim/kelvin_encoding.cc b/sim/kelvin_encoding.cc index 1f1f544..53047f0 100644 --- a/sim/kelvin_encoding.cc +++ b/sim/kelvin_encoding.cc
@@ -239,15 +239,22 @@ }); source_op_getters_.emplace( // vst and vstq use `vd` field as the source for the vector store. + // convolution instructions also use `vd` as one of the sources. static_cast<int>(SourceOpEnum::kVd), [this]() -> SourceOperandInterface * { auto reg_num = encoding::kelvin_v2_args_type::ExtractVd(inst_word_); bool strip_mine = encoding::kelvin_v2_args_type::ExtractM(inst_word_); - if (opcode_ < OpcodeEnum::kVstBLXx || opcode_ > OpcodeEnum::kVstqWSpXxM) - return nullptr; return GetVectorRegisterSourceOp<mpact::sim::riscv::RVVectorRegister>( state_, reg_num, strip_mine, 1 /* widen_factor */); }); + source_op_getters_.emplace( + // Used by convolution instructions. + static_cast<int>(SourceOpEnum::kVs3), + [this]() -> SourceOperandInterface * { + auto reg_num = encoding::kelvin_v3_args_type::ExtractVs3(inst_word_); + return GetVectorRegisterSourceOp<mpact::sim::riscv::RVVectorRegister>( + state_, reg_num, false /* strip_mine */, 1 /* widen_factor */); + }); source_op_getters_.insert(std::make_pair( static_cast<int>(SourceOpEnum::kNone), []() { return nullptr; })); } @@ -316,6 +323,7 @@ decode_functions; decode_functions.push_back(encoding::DecodeKelvinInst); decode_functions.push_back(encoding::DecodeKelvinVectorArithInst); + decode_functions.push_back(encoding::DecodeKelvinVectorConvInst); decode_functions.push_back(encoding::DecodeKelvinVectorMemoryInst); decode_functions.push_back(encoding::DecodeKelvinVectorMulInst); decode_functions.push_back(encoding::DecodeKelvinVectorShiftInst);
diff --git a/sim/kelvin_format.bin_fmt b/sim/kelvin_format.bin_fmt index e84324e..7efefdf 100644 --- a/sim/kelvin_format.bin_fmt +++ b/sim/kelvin_format.bin_fmt
@@ -117,3 +117,19 @@ unsigned vs1_low2[2] = vs1[1..0]; unsigned vd_low2[2] = vd[1..0]; }; + +// Kelvin 3 args vector format. +format KelvinV3ArgsType[32] { + fields: + unsigned vs3[6]; + unsigned vs2[6]; + unsigned vs1[6]; + unsigned func3_hi[2]; + unsigned vd[6]; + unsigned m[1]; + unsigned func3_lo[2]; + unsigned form[3]; // .vvv=0b001, .vxv=0b101. + overlays: + unsigned vs1_low3[3] = vs1[2..0]; + unsigned vs3_low3[3] = vs3[2..0]; +};
diff --git a/sim/kelvin_mul.isa b/sim/kelvin_mul.isa index f1456ac..cb2fa73 100644 --- a/sim/kelvin_mul.isa +++ b/sim/kelvin_mul.isa
@@ -9,7 +9,6 @@ // Mul/Div instructions (func1 011) slot kelvin_mul { includes { - #include "sim/kelvin_instructions.h" #include "sim/kelvin_vector_instructions.h" #include "absl/functional/bind_front.h" } @@ -525,3 +524,18 @@ semfunc: "absl::bind_front(&KelvinVMadd<int32_t>, /*scalar*/ true, /*strip_mine*/ true)"; } } + +slot kelvin_conv { + includes { + #include "sim/kelvin_vector_convolution_instructions.h" + #include "absl/functional/bind_front.h" + } + default size = 4; + default latency = global_latency; + opcodes { + // vconv + aconv_vxv{: vs1, vs2, vs3 : vd}, + disasm: "aconv.vxv", "%vd, %vs1, %vs2, %vs3", + semfunc: "&KelvinVConv"; + } +}
diff --git a/sim/kelvin_state.h b/sim/kelvin_state.h index ceff045..0cfada0 100644 --- a/sim/kelvin_state.h +++ b/sim/kelvin_state.h
@@ -45,8 +45,8 @@ void set_vector_length(uint32_t length) { vector_length_ = length; } uint32_t vector_length() const { return vector_length_; } - void SetAccRegister(uint32_t *data, int index); AccArrayType *acc_vec(int index) { return &(acc_register_.at(index)); } + AccArrayTemplate<AccArrayType> acc_register() const { return acc_register_; } void SetLogArgs(std::any data) { log_args_.emplace_back(std::move(data)); } std::string *clog_string() { return &clog_string_; }
diff --git a/sim/kelvin_vector_convolution_instructions.cc b/sim/kelvin_vector_convolution_instructions.cc new file mode 100644 index 0000000..2f7fbf1 --- /dev/null +++ b/sim/kelvin_vector_convolution_instructions.cc
@@ -0,0 +1,111 @@ +#include "sim/kelvin_vector_convolution_instructions.h" + +#include <array> +#include <cstdint> +#include <cstring> + +#include "sim/kelvin_state.h" +#include "absl/types/span.h" +#include "riscv/riscv_register.h" +#include "riscv/riscv_state.h" +#include "mpact/sim/generic/data_buffer.h" +#include "mpact/sim/generic/instruction.h" + +namespace kelvin::sim { + +using mpact::sim::generic::GetInstructionSource; +using mpact::sim::riscv::RV32VectorSourceOperand; + +// Implement the 3-arg vector convolution (im2col + matmul) +// vs1 (narrow) represents the starting register of 8 vector registers +// vs3 (wide) is the starting register of group of up-to 8 vector +// registers. xs2 stores the convolution command. +// `vd` is not used in the op. +void KelvinVConv(Instruction *inst) { + auto state = static_cast<KelvinState *>(inst->state()); + constexpr int kVectorLenInByte = kVectorLengthInBits / 8; + constexpr int kVectorLenInWord = kVectorLenInByte / sizeof(uint32_t); + + vconv_cmd_t conv_cmd; + auto reg_data = GetInstructionSource<uint32_t>(inst, 1, 0); + memcpy(&conv_cmd, ®_data, sizeof(conv_cmd)); + + // Exam the content of the cmd. + if (conv_cmd.mode != 0) { // only supports 8-bit mode + state->Trap(/*is_interrupt=*/false, /*trap_value=*/0, + *mpact::sim::riscv::ExceptionCode::kIllegalInstruction, + /*epc=*/inst->address(), inst); + return; + } + if (conv_cmd.start > conv_cmd.stop) { + state->Trap(/*is_interrupt=*/false, /*trap_value=*/0, + *mpact::sim::riscv::ExceptionCode::kIllegalInstruction, + /*epc=*/inst->address(), inst); + return; + } + if (conv_cmd.start >= kVectorLenInWord || conv_cmd.stop >= kVectorLenInWord) { + state->Trap(/*is_interrupt=*/false, /*trap_value=*/0, + *mpact::sim::riscv::ExceptionCode::kIllegalInstruction, + /*epc=*/inst->address(), inst); + return; + } + + // Read the narrow source. + auto vs1 = static_cast<RV32VectorSourceOperand *>(inst->Source(0)); + auto vs3 = static_cast<RV32VectorSourceOperand *>(inst->Source(2)); + AccArrayTemplate<std::array<uint8_t, kVectorLenInByte>> vec_narrow; + for (int vec_idx = 0; vec_idx < vec_narrow.size(); ++vec_idx) { + auto source_span = vs1->GetRegister(vec_idx)->data_buffer()->Get<uint8_t>(); + for (int j = 0; j < vec_narrow[vec_idx].size(); ++j) { + vec_narrow[vec_idx][j] = source_span[j]; + } + } + + // Prepare the accumulator. + auto accumulator = state->acc_register(); + + // Convert the biases to 9-bit signed values. + int32_t sbias1 = (static_cast<int32_t>(conv_cmd.sbias1) << 23) >> 23; + int32_t sbias2 = (static_cast<int32_t>(conv_cmd.sbias2) << 23) >> 23; + + // Multiply-Accumulate of conv(8x32xi8, 8x32xi8) -> 8x8xi32. + // Internally they are broken into 4 groups to for accumulation to handle the + // double-widening data without extra interleaving steps. Also, the operation + // has both im2col and matmul in one shot (image data in `vs1`, filter/kernel + // in `vs3`), so for the typical matmul, the input re-shuffling is required. + // + // Note the output of this op CANNOT be used directly, because it is still + // in the double-widening format. It is expected to be followed by some + // double-reduction instructions to read the 8-bit data back in order. + constexpr int kInterleave[] = {0, 2, 1, 3}; // (ee, oe, eo, oo) + constexpr int kQuadBase = 4; // For double-widening. + constexpr int kQuadMask = kQuadBase - 1; + for (int k = conv_cmd.start; k <= conv_cmd.stop; ++k) { + auto wide_source_span = + vs3->GetRegister(k - conv_cmd.start)->data_buffer()->Get<uint8_t>(); + for (int i = 0; i < vec_narrow.size(); ++i) { + for (int j = 0; j < wide_source_span.size(); ++j) { + // data1 (narrow) is transposed and broadcasted. + uint8_t n = vec_narrow[i][kQuadBase * k + (j & kQuadMask)]; + int32_t sdata1 = conv_cmd.sdata1 ? static_cast<int8_t>(n) : n; + uint8_t w = wide_source_span[j]; + int32_t sdata2 = conv_cmd.sdata2 ? static_cast<int8_t>(w) : w; + const int rbase = i & ~kQuadMask; + const int rquad = i & kQuadMask; + const int word = j / kQuadBase; + const int idx_i = rbase + kInterleave[word & kQuadMask]; + const int idx_j = + rquad * (accumulator.size() / kQuadBase) + (word / kQuadBase); + accumulator[idx_i][idx_j] += (sdata1 + sbias1) * (sdata2 + sbias2); + } + } + } + + // Write the results back to the accumulation register + for (int i = 0; i < state->acc_register().size(); ++i) { + auto acc_array = state->acc_vec(i); + *acc_array = accumulator[i]; + } +} + +} // namespace kelvin::sim
diff --git a/sim/kelvin_vector_convolution_instructions.h b/sim/kelvin_vector_convolution_instructions.h new file mode 100644 index 0000000..6d08f98 --- /dev/null +++ b/sim/kelvin_vector_convolution_instructions.h
@@ -0,0 +1,27 @@ +#ifndef SIM_KELVIN_VECTOR_CONVOLUTION_INSTRUCTIONS_H_ +#define SIM_KELVIN_VECTOR_CONVOLUTION_INSTRUCTIONS_H_ + +#include <cstdint> + +#include "mpact/sim/generic/instruction.h" + +namespace kelvin::sim { + +using mpact::sim::generic::Instruction; + +// Command structure for the convolution instruction. +typedef struct KelvinVConvCmd { + uint32_t mode : 2; // 31:30 + uint32_t start : 5; // 29:25 + uint32_t stop : 5; // 24:20 + uint32_t sbias1 : 9; // 19:11 + uint32_t sdata1 : 1; // 10 + uint32_t sbias2 : 9; // 9:1 + uint32_t sdata2 : 1; // 0 +} vconv_cmd_t; + +void KelvinVConv(Instruction *inst); + +} // namespace kelvin::sim + +#endif // SIM_KELVIN_VECTOR_CONVOLUTION_INSTRUCTIONS_H_
diff --git a/sim/test/BUILD b/sim/test/BUILD index 41d756f..73a0b56 100644 --- a/sim/test/BUILD +++ b/sim/test/BUILD
@@ -111,6 +111,27 @@ ) cc_test( + name = "kelvin_vector_convolution_instructions_test", + srcs = [ + "kelvin_vector_convolution_instructions_test.cc", + "testfiles/kelvin_vector_convolution_testdata.h", + ], + copts = [ + "-Werror", + "-Wvla-extension", + ], + deps = [ + ":kelvin_vector_instructions_test_base", + "//sim:kelvin_instructions", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:span", + "@com_google_googletest//:gtest_main", + "@com_google_mpact-riscv//riscv:riscv_state", + "@com_google_mpact-sim//mpact/sim/generic:instruction", + ], +) + +cc_test( name = "kelvin_vector_memory_instructions_test", srcs = ["kelvin_vector_memory_instructions_test.cc"], copts = [
diff --git a/sim/test/kelvin_vector_convolution_instructions_test.cc b/sim/test/kelvin_vector_convolution_instructions_test.cc new file mode 100644 index 0000000..d02aef2 --- /dev/null +++ b/sim/test/kelvin_vector_convolution_instructions_test.cc
@@ -0,0 +1,164 @@ +#include "sim/kelvin_vector_convolution_instructions.h" + +#include <array> +#include <cstdint> +#include <cstring> +#include <functional> +#include <vector> + +#include "sim/test/kelvin_vector_instructions_test_base.h" +#include "sim/test/testfiles/kelvin_vector_convolution_testdata.h" +#include "googletest/include/gtest/gtest.h" +#include "absl/strings/str_cat.h" +#include "absl/types/span.h" +#include "riscv/riscv_state.h" +#include "mpact/sim/generic/instruction.h" + +namespace { + +using mpact::sim::generic::Instruction; + +// Semantic functions. +using kelvin::sim::KelvinVConv; + +class KelvinVectorConvolutionInstructionsTest + : public kelvin::sim::test::KelvinVectorInstructionsTestBase { + protected: + void ConvolutionTestHelper(const kelvin::sim::vconv_cmd_t vconv_cmd, + bool expect_fail = false) { + constexpr int kVs1 = 0; + constexpr int kVs3 = 16; + constexpr int kVd = 48; + const uint32_t kVLenInByte = state_->vector_length() / 8; + const uint32_t kVLenInWord = state_->vector_length() / 32; + // Set vs1 and vs3 + std::vector<uint8_t> vs1_value(kVLenInWord * kVLenInByte); + auto vs1_span = absl::Span<uint8_t>(vs1_value); + memcpy(vs1_span.data(), kVConvIn1, sizeof(kVConvIn1)); + std::vector<uint8_t> vs3_value(kVLenInWord * kVLenInByte); + auto vs3_span = absl::Span<uint8_t>(vs3_value); + memcpy(vs3_span.data(), kVConvIn2, sizeof(kVConvIn2)); + for (int i = 0; i < kVLenInWord; ++i) { + auto vs1_name = absl::StrCat("v", kVs1 + i); + auto vs3_name = absl::StrCat("v", kVs3 + i); + SetVectorRegisterValues<uint8_t>( + {{vs1_name, vs1_span.subspan(i * kVLenInByte, kVLenInByte)}, + {vs3_name, vs3_span.subspan(i * kVLenInByte, kVLenInByte)}}); + } + uint32_t vconv_cmd_value; + memcpy(&vconv_cmd_value, &vconv_cmd, sizeof(vconv_cmd_value)); + SetRegisterValues<uint32_t>({{kelvin::sim::test::kRs2Name, + static_cast<uint32_t>(vconv_cmd_value)}}); + + // Reset accumulation register + for (int i = 0; i < kVLenInWord; ++i) { + auto acc_vec = state_->acc_vec(i); + acc_vec->fill(0); + } + + // Call VConv twice with the swapped vs1 and vs3 + std::array<InstructionPtr, 2> instructions = {CreateInstruction(), + CreateInstruction()}; + instructions[0]->set_semantic_function(KelvinVConv); + AppendVectorRegisterOperands(instructions[0].get(), kVLenInWord, + 1 /* src1_widen_factor*/, kVs1, {}, + false /* widen_dst*/, {kVd}); + AppendRegisterOperands(instructions[0].get(), {kelvin::sim::test::kRs2Name}, + {}); + AppendVectorRegisterOperands(instructions[0].get(), kVLenInWord, + 1 /* src3_widen_factor*/, kVs3, {}, + false /* widen_dst*/, {}); + + instructions[1]->set_semantic_function(KelvinVConv); + AppendVectorRegisterOperands(instructions[1].get(), kVLenInWord, + 1 /* src1_widen_factor*/, kVs3, {}, + false /* widen_dst*/, {kVd}); + AppendRegisterOperands(instructions[1].get(), {kelvin::sim::test::kRs2Name}, + {}); + AppendVectorRegisterOperands(instructions[1].get(), kVLenInWord, + 1 /* src3_widen_factor*/, kVs1, {}, + false /* widen_dst*/, {}); + execution_fail_ = false; + state_->set_on_trap(trap_call_back_); + instructions[0]->Execute(); + if (expect_fail) { + EXPECT_TRUE(execution_fail_); + return; + } + instructions[1]->Execute(); + EXPECT_FALSE(execution_fail_); + auto result_acc = state_->acc_register(); + for (int i = 0; i < result_acc.size(); ++i) { + for (int j = 0; j < result_acc[i].size(); ++j) { + EXPECT_EQ(result_acc[i][j], kVConvOutRef[i][j]) + << absl::StrCat("acc[", i, "][", j, "] != Ref[", i, "][", j, "]"); + } + } + } + + private: + bool execution_fail_; + std::function<bool(bool, uint64_t, uint64_t, uint64_t, const Instruction *)> + trap_call_back_ = [this](bool is_interrupt, uint64_t trap_value, + uint64_t exception_code, uint64_t epc, + const Instruction *instruction) { + auto code = + static_cast<mpact::sim::riscv::ExceptionCode>(exception_code); + if (code == mpact::sim::riscv::ExceptionCode::kIllegalInstruction) { + this->execution_fail_ = true; + return true; + } + return false; + }; +}; + +TEST_F(KelvinVectorConvolutionInstructionsTest, VConv) { + // Set the convolution to have 8 filters (starting from index 0), with the + // data bias of 86 (unsigned) and the filter bias of 188 (signed). + kelvin::sim::vconv_cmd_t vconv_cmd{.mode = 0, + .start = 0, + .stop = 7, + .sbias1 = 86, + .sdata1 = false, + .sbias2 = 188, + .sdata2 = true}; + ConvolutionTestHelper(vconv_cmd); +} + +TEST_F(KelvinVectorConvolutionInstructionsTest, VConvWrongMode) { + // Set the convolution to work on 16-bit input/filter (illegal setting). + kelvin::sim::vconv_cmd_t vconv_cmd{.mode = 1, + .start = 0, + .stop = 7, + .sbias1 = 86, + .sdata1 = false, + .sbias2 = 188, + .sdata2 = true}; + ConvolutionTestHelper(vconv_cmd, true); +} + +TEST_F(KelvinVectorConvolutionInstructionsTest, VConvTooLargeStop) { + // Set the convolution to work on 9 filters (too many filters). + kelvin::sim::vconv_cmd_t vconv_cmd{.mode = 0, + .start = 0, + .stop = 8, + .sbias1 = 86, + .sdata1 = false, + .sbias2 = 188, + .sdata2 = true}; + ConvolutionTestHelper(vconv_cmd, true); +} + +TEST_F(KelvinVectorConvolutionInstructionsTest, VConvWrongStop) { + // Set the convolution to start from filter 7 and to stop at filter 5 (reverse + // order). + kelvin::sim::vconv_cmd_t vconv_cmd{.mode = 0, + .start = 7, + .stop = 5, + .sbias1 = 86, + .sdata1 = false, + .sbias2 = 188, + .sdata2 = true}; + ConvolutionTestHelper(vconv_cmd, true); +} +} // namespace
diff --git a/sim/test/testfiles/kelvin_vector_convolution_testdata.h b/sim/test/testfiles/kelvin_vector_convolution_testdata.h new file mode 100644 index 0000000..0cb2779 --- /dev/null +++ b/sim/test/testfiles/kelvin_vector_convolution_testdata.h
@@ -0,0 +1,146 @@ +// Test input/reference vectors for the vector convolution instruction +// functions. +#ifndef SIM_TEST_TESTFILES_KELVIN_VECTOR_CONVOLUTION_TESTDATA_H_ +#define SIM_TEST_TESTFILES_KELVIN_VECTOR_CONVOLUTION_TESTDATA_H_ + +#include <cstdint> + +// clang-format off +constexpr uint8_t kVConvIn1[8][32] = { + { + 0xea, 0xe6, 0xaa, 0xb0, 0x5e, 0x46, 0x43, 0x2c, + 0x58, 0x69, 0xd4, 0x25, 0xf0, 0xe9, 0x74, 0xd5, + 0x34, 0x16, 0x4c, 0x86, 0xa8, 0x0c, 0xac, 0xa8, + 0x9f, 0x99, 0x5d, 0xe0, 0x1a, 0x93, 0x65, 0x88, + }, + { + 0x4a, 0x3a, 0x57, 0x8b, 0x50, 0x6c, 0x1b, 0x37, + 0x76, 0x8c, 0x72, 0x55, 0xb3, 0xce, 0xf3, 0x50, + 0x74, 0x51, 0xb2, 0xb9, 0xb9, 0x76, 0xc4, 0x94, + 0x29, 0x52, 0x16, 0xa8, 0x68, 0xa7, 0x8e, 0xe2, + }, + { + 0x61, 0xe8, 0xb8, 0xae, 0x43, 0x0a, 0xbe, 0xfa, + 0x0b, 0x32, 0x7a, 0x92, 0x44, 0x3a, 0x60, 0xd3, + 0x11, 0xc0, 0xb3, 0x8c, 0x7e, 0x5b, 0x9c, 0xe9, + 0x03, 0x94, 0x7f, 0x10, 0x38, 0xd5, 0xc6, 0xeb, + }, + { + 0x5c, 0x79, 0x44, 0xe9, 0xfb, 0x32, 0x00, 0xf0, + 0x79, 0xed, 0x92, 0x77, 0x93, 0x45, 0xc3, 0x63, + 0x33, 0x7e, 0x07, 0xc5, 0x07, 0x76, 0x9d, 0xf0, + 0x4e, 0x6a, 0x67, 0xcc, 0xca, 0xab, 0xd4, 0x24, + }, + { + 0x82, 0x86, 0xbb, 0xd9, 0x4b, 0xda, 0x2e, 0xbb, + 0x8d, 0xbb, 0x82, 0x0e, 0x53, 0xaa, 0xb8, 0xfa, + 0xa1, 0x75, 0x6e, 0x4a, 0x3c, 0xba, 0xa1, 0x88, + 0x88, 0x6c, 0x74, 0x5b, 0x09, 0x83, 0x98, 0xdc, + }, + { + 0x91, 0x9a, 0x9f, 0xcd, 0x87, 0xd7, 0x74, 0x90, + 0xa5, 0x25, 0xcf, 0x56, 0x5d, 0x41, 0x56, 0xc2, + 0x0d, 0xdd, 0xd8, 0x2c, 0x59, 0x1d, 0x1c, 0x66, + 0x06, 0xe9, 0xd3, 0x51, 0x83, 0x16, 0x65, 0x56, + }, + { + 0xf6, 0x95, 0x7c, 0x47, 0xf5, 0x56, 0x53, 0x58, + 0x87, 0x9c, 0xde, 0xac, 0x76, 0x78, 0x71, 0x86, + 0x5c, 0xdb, 0x5f, 0x0d, 0xc4, 0x5f, 0x48, 0x61, + 0x48, 0x6f, 0x77, 0x26, 0x68, 0xf1, 0x39, 0x58, + }, + { + 0x32, 0x36, 0x68, 0x29, 0x67, 0xb8, 0x7c, 0xdd, + 0xb9, 0x17, 0xb0, 0xec, 0x2e, 0xcc, 0xa5, 0x54, + 0x76, 0x56, 0xc8, 0x0b, 0x77, 0xa1, 0xef, 0xf5, + 0xcf, 0xd6, 0x84, 0x7a, 0x84, 0x0f, 0x4e, 0x16, + }, +}; + +constexpr uint8_t kVConvIn2[8][32] = { + { + 0x33, 0xc9, 0xb8, 0xa1, 0xea, 0x2d, 0x2c, 0x18, + 0x92, 0x98, 0x8e, 0x19, 0xf2, 0xd8, 0x55, 0x92, + 0x26, 0x4a, 0x8d, 0x3e, 0xb0, 0x01, 0x81, 0x6c, + 0x3e, 0xcc, 0x8f, 0x9b, 0xf9, 0xde, 0x94, 0x9e, + }, + { + 0x7f, 0xa9, 0xa8, 0x3a, 0xa3, 0xef, 0xb7, 0xb5, + 0x44, 0x93, 0xa1, 0xf7, 0x09, 0x7b, 0xb6, 0x6f, + 0x98, 0x9e, 0xaa, 0x60, 0xb4, 0xe2, 0x9b, 0x93, + 0x4b, 0x8f, 0xa7, 0xe4, 0x96, 0xe6, 0xcd, 0x93, + }, + { + 0x63, 0xfd, 0x8e, 0xd2, 0xfd, 0xe0, 0x13, 0x5b, + 0xd7, 0x5f, 0xa0, 0x56, 0x02, 0x29, 0x4e, 0xfa, + 0x9b, 0x30, 0xa5, 0xdb, 0xb4, 0xe7, 0xb9, 0x52, + 0x05, 0xda, 0x57, 0xa8, 0xbd, 0x65, 0xfe, 0xf0, + }, + { + 0x1c, 0x22, 0x48, 0x3f, 0x5c, 0xae, 0x08, 0x8c, + 0x40, 0xd4, 0x97, 0xeb, 0xb1, 0x92, 0x50, 0xd4, + 0x66, 0xac, 0x58, 0x20, 0x3c, 0x92, 0xc0, 0x5c, + 0x50, 0x89, 0x42, 0x93, 0x7b, 0x8b, 0x0a, 0x33, + }, + { + 0x3e, 0x98, 0x0c, 0x1a, 0xcd, 0x6c, 0xd5, 0x26, + 0xad, 0x94, 0xd0, 0x6a, 0xbd, 0x19, 0x02, 0x42, + 0xc1, 0x69, 0x20, 0x94, 0xc2, 0x74, 0xb7, 0xbf, + 0x9f, 0x45, 0xd5, 0x6f, 0x22, 0x92, 0xbe, 0x88, + }, + { + 0x4c, 0xbc, 0xed, 0x2f, 0x9e, 0xe5, 0x27, 0xf9, + 0x0e, 0xd3, 0xb5, 0x74, 0x83, 0x5f, 0x5a, 0xa2, + 0xaf, 0x0f, 0x49, 0x0a, 0xe3, 0x1b, 0xc7, 0xd1, + 0xd1, 0x51, 0xa9, 0x86, 0x75, 0xb3, 0xbc, 0xe9, + }, + { + 0x9f, 0x2a, 0xe9, 0xfc, 0x4f, 0x10, 0x57, 0x7d, + 0x14, 0xec, 0x5f, 0x39, 0xa3, 0x0e, 0x92, 0x62, + 0x86, 0xad, 0xae, 0x38, 0x95, 0x7f, 0xde, 0x30, + 0x9d, 0xbf, 0xe5, 0xb2, 0xb3, 0xb6, 0xf1, 0x42, + }, + { + 0x0c, 0xd7, 0xd4, 0xa8, 0xae, 0x1a, 0xe6, 0x3c, + 0xf0, 0xd7, 0x4f, 0x36, 0x37, 0xeb, 0x1d, 0xe9, + 0xc7, 0xe4, 0x82, 0xe4, 0x44, 0x1c, 0x6a, 0xa1, + 0x96, 0xfe, 0x2e, 0x9d, 0x36, 0xb4, 0x1c, 0x03, + }, +}; + +constexpr uint32_t kVConvOutRef[8][8] = { + { + 0x002706b4, 0x00284ddd, 0x00279af7, 0x00293b6a, + 0x00265a51, 0x0028f490, 0x0024222a, 0x0025a738, + }, + { + 0x00260af1, 0x002687fd, 0x0026d33e, 0x0028700b, + 0x002701d0, 0x00273546, 0x0025002d, 0x00254eaa, + }, + { + 0x002b2935, 0x00262aea, 0x002ca80a, 0x00275a99, + 0x002cc3e7, 0x0027e665, 0x00288ed7, 0x00254319, + }, + { + 0x0028690a, 0x0023f3b1, 0x002aed22, 0x0025a60b, + 0x002ad4e5, 0x0024a4c3, 0x00283b5b, 0x0021ce1e, + }, + { + 0x0025e1a3, 0x002700e9, 0x00255faa, 0x00270fbc, + 0x00257a3b, 0x0026dd70, 0x0025c9d7, 0x0026fcc3, + }, + { + 0x0025a025, 0x00258f27, 0x0024d04f, 0x00261465, + 0x00244b49, 0x00269500, 0x0025ddb5, 0x0025ca26, + }, + { + 0x00299efe, 0x0025a702, 0x002a766b, 0x00269ec7, + 0x002acd15, 0x0025366f, 0x002a4de2, 0x002568bf, + }, + { + 0x0029429d, 0x0023c5ba, 0x002a03f7, 0x00240475, + 0x0028fac3, 0x0023f507, 0x0029f7e5, 0x0023008b, + }, +}; +// clang-format on +#endif // SIM_TEST_TESTFILES_KELVIN_VECTOR_CONVOLUTION_TESTDATA_H_