Add convolution instruction support

Add the simulation support for `aconv` instruction

PiperOrigin-RevId: 556952596
diff --git a/sim/BUILD b/sim/BUILD
index 8437844..53942c9 100644
--- a/sim/BUILD
+++ b/sim/BUILD
@@ -28,11 +28,13 @@
     name = "kelvin_instructions",
     srcs = [
         "kelvin_instructions.cc",
+        "kelvin_vector_convolution_instructions.cc",
         "kelvin_vector_instructions.cc",
         "kelvin_vector_memory_instructions.cc",
     ],
     hdrs = [
         "kelvin_instructions.h",
+        "kelvin_vector_convolution_instructions.h",
         "kelvin_vector_instructions.h",
         "kelvin_vector_memory_instructions.h",
     ],
@@ -74,6 +76,7 @@
     includes = [
         "kelvin_arith.bin_fmt",
         "kelvin_base.bin_fmt",
+        "kelvin_conv.bin_fmt",
         "kelvin_format.bin_fmt",
         "kelvin_memory.bin_fmt",
         "kelvin_mul.bin_fmt",
diff --git a/sim/kelvin.bin_fmt b/sim/kelvin.bin_fmt
index 39703dc..c6e7d98 100644
--- a/sim/kelvin.bin_fmt
+++ b/sim/kelvin.bin_fmt
@@ -7,6 +7,7 @@
   }
   KelvinInst;
   KelvinVectorArithInst;
+  KelvinVectorConvInst;
   KelvinVectorMemoryInst;
   KelvinVectorMulInst;
   KelvinVectorShiftInst;
@@ -15,6 +16,7 @@
 #include "sim/kelvin_format.bin_fmt"
 #include "sim/kelvin_arith.bin_fmt"
 #include "sim/kelvin_base.bin_fmt"
+#include "sim/kelvin_conv.bin_fmt"
 #include "sim/kelvin_memory.bin_fmt"
 #include "sim/kelvin_mul.bin_fmt"
 #include "sim/kelvin_shift.bin_fmt"
diff --git a/sim/kelvin.isa b/sim/kelvin.isa
index db1a680..0b046da 100644
--- a/sim/kelvin.isa
+++ b/sim/kelvin.isa
@@ -19,7 +19,8 @@
 
 // Combining all kelvin instruction sets.
 slot kelvin : riscv32i, riscv32m, zicsr, zfencei, privileged, kelvin_arith,
-    kelvin_log, kelvin_memory, kelvin_mul, kelvin_shift, kelvin_vector_memory {
+    kelvin_conv, kelvin_log, kelvin_memory, kelvin_mul, kelvin_shift,
+    kelvin_vector_memory {
   includes {
     #include "sim/kelvin_instructions.h"
   }
diff --git a/sim/kelvin_conv.bin_fmt b/sim/kelvin_conv.bin_fmt
new file mode 100644
index 0000000..48b0634
--- /dev/null
+++ b/sim/kelvin_conv.bin_fmt
@@ -0,0 +1,4 @@
+instruction group KelvinVectorConvInst[32] : KelvinV3ArgsType {
+  // vconv
+  aconv_vxv      : KelvinV3ArgsType : func3_hi == 0b10, func3_lo == 0b00, vd == 48, vs1_low3 == 0, vs2 != 0, vs3_low3 == 0, m == 0, form == 0b101;
+};
diff --git a/sim/kelvin_encoding.cc b/sim/kelvin_encoding.cc
index 1f1f544..53047f0 100644
--- a/sim/kelvin_encoding.cc
+++ b/sim/kelvin_encoding.cc
@@ -239,15 +239,22 @@
       });
   source_op_getters_.emplace(
       // vst and vstq use `vd` field as the source for the vector store.
+      // convolution instructions also use `vd` as one of the sources.
       static_cast<int>(SourceOpEnum::kVd),
       [this]() -> SourceOperandInterface * {
         auto reg_num = encoding::kelvin_v2_args_type::ExtractVd(inst_word_);
         bool strip_mine = encoding::kelvin_v2_args_type::ExtractM(inst_word_);
-        if (opcode_ < OpcodeEnum::kVstBLXx || opcode_ > OpcodeEnum::kVstqWSpXxM)
-          return nullptr;
         return GetVectorRegisterSourceOp<mpact::sim::riscv::RVVectorRegister>(
             state_, reg_num, strip_mine, 1 /* widen_factor */);
       });
+  source_op_getters_.emplace(
+      // Used by convolution instructions.
+      static_cast<int>(SourceOpEnum::kVs3),
+      [this]() -> SourceOperandInterface * {
+        auto reg_num = encoding::kelvin_v3_args_type::ExtractVs3(inst_word_);
+        return GetVectorRegisterSourceOp<mpact::sim::riscv::RVVectorRegister>(
+            state_, reg_num, false /* strip_mine */, 1 /* widen_factor */);
+      });
   source_op_getters_.insert(std::make_pair(
       static_cast<int>(SourceOpEnum::kNone), []() { return nullptr; }));
 }
@@ -316,6 +323,7 @@
       decode_functions;
   decode_functions.push_back(encoding::DecodeKelvinInst);
   decode_functions.push_back(encoding::DecodeKelvinVectorArithInst);
+  decode_functions.push_back(encoding::DecodeKelvinVectorConvInst);
   decode_functions.push_back(encoding::DecodeKelvinVectorMemoryInst);
   decode_functions.push_back(encoding::DecodeKelvinVectorMulInst);
   decode_functions.push_back(encoding::DecodeKelvinVectorShiftInst);
diff --git a/sim/kelvin_format.bin_fmt b/sim/kelvin_format.bin_fmt
index e84324e..7efefdf 100644
--- a/sim/kelvin_format.bin_fmt
+++ b/sim/kelvin_format.bin_fmt
@@ -117,3 +117,19 @@
     unsigned vs1_low2[2] = vs1[1..0];
     unsigned vd_low2[2] = vd[1..0];
 };
+
+// Kelvin 3 args vector format.
+format KelvinV3ArgsType[32] {
+  fields:
+    unsigned vs3[6];
+    unsigned vs2[6];
+    unsigned vs1[6];
+    unsigned func3_hi[2];
+    unsigned vd[6];
+    unsigned m[1];
+    unsigned func3_lo[2];
+    unsigned form[3];  // .vvv=0b001, .vxv=0b101.
+  overlays:
+    unsigned vs1_low3[3] = vs1[2..0];
+    unsigned vs3_low3[3] = vs3[2..0];
+};
diff --git a/sim/kelvin_mul.isa b/sim/kelvin_mul.isa
index f1456ac..cb2fa73 100644
--- a/sim/kelvin_mul.isa
+++ b/sim/kelvin_mul.isa
@@ -9,7 +9,6 @@
 // Mul/Div instructions (func1 011)
 slot kelvin_mul {
   includes {
-    #include "sim/kelvin_instructions.h"
     #include "sim/kelvin_vector_instructions.h"
     #include "absl/functional/bind_front.h"
   }
@@ -525,3 +524,18 @@
       semfunc: "absl::bind_front(&KelvinVMadd<int32_t>, /*scalar*/ true, /*strip_mine*/ true)";
   }
 }
+
+slot kelvin_conv {
+  includes {
+    #include "sim/kelvin_vector_convolution_instructions.h"
+    #include "absl/functional/bind_front.h"
+  }
+  default size = 4;
+  default latency = global_latency;
+  opcodes {
+    // vconv
+    aconv_vxv{: vs1, vs2, vs3 : vd},
+      disasm: "aconv.vxv", "%vd, %vs1, %vs2, %vs3",
+      semfunc: "&KelvinVConv";
+  }
+}
diff --git a/sim/kelvin_state.h b/sim/kelvin_state.h
index ceff045..0cfada0 100644
--- a/sim/kelvin_state.h
+++ b/sim/kelvin_state.h
@@ -45,8 +45,8 @@
   void set_vector_length(uint32_t length) { vector_length_ = length; }
   uint32_t vector_length() const { return vector_length_; }
 
-  void SetAccRegister(uint32_t *data, int index);
   AccArrayType *acc_vec(int index) { return &(acc_register_.at(index)); }
+  AccArrayTemplate<AccArrayType> acc_register() const { return acc_register_; }
 
   void SetLogArgs(std::any data) { log_args_.emplace_back(std::move(data)); }
   std::string *clog_string() { return &clog_string_; }
diff --git a/sim/kelvin_vector_convolution_instructions.cc b/sim/kelvin_vector_convolution_instructions.cc
new file mode 100644
index 0000000..2f7fbf1
--- /dev/null
+++ b/sim/kelvin_vector_convolution_instructions.cc
@@ -0,0 +1,111 @@
+#include "sim/kelvin_vector_convolution_instructions.h"
+
+#include <array>
+#include <cstdint>
+#include <cstring>
+
+#include "sim/kelvin_state.h"
+#include "absl/types/span.h"
+#include "riscv/riscv_register.h"
+#include "riscv/riscv_state.h"
+#include "mpact/sim/generic/data_buffer.h"
+#include "mpact/sim/generic/instruction.h"
+
+namespace kelvin::sim {
+
+using mpact::sim::generic::GetInstructionSource;
+using mpact::sim::riscv::RV32VectorSourceOperand;
+
+// Implement the 3-arg vector convolution (im2col + matmul)
+// vs1 (narrow) represents the starting register of 8 vector registers
+// vs3 (wide) is the starting register of group of up-to 8 vector
+// registers. xs2 stores the convolution command.
+// `vd` is not used in the op.
+void KelvinVConv(Instruction *inst) {
+  auto state = static_cast<KelvinState *>(inst->state());
+  constexpr int kVectorLenInByte = kVectorLengthInBits / 8;
+  constexpr int kVectorLenInWord = kVectorLenInByte / sizeof(uint32_t);
+
+  vconv_cmd_t conv_cmd;
+  auto reg_data = GetInstructionSource<uint32_t>(inst, 1, 0);
+  memcpy(&conv_cmd, &reg_data, sizeof(conv_cmd));
+
+  // Exam the content of the cmd.
+  if (conv_cmd.mode != 0) {  // only supports 8-bit mode
+    state->Trap(/*is_interrupt=*/false, /*trap_value=*/0,
+                *mpact::sim::riscv::ExceptionCode::kIllegalInstruction,
+                /*epc=*/inst->address(), inst);
+    return;
+  }
+  if (conv_cmd.start > conv_cmd.stop) {
+    state->Trap(/*is_interrupt=*/false, /*trap_value=*/0,
+                *mpact::sim::riscv::ExceptionCode::kIllegalInstruction,
+                /*epc=*/inst->address(), inst);
+    return;
+  }
+  if (conv_cmd.start >= kVectorLenInWord || conv_cmd.stop >= kVectorLenInWord) {
+    state->Trap(/*is_interrupt=*/false, /*trap_value=*/0,
+                *mpact::sim::riscv::ExceptionCode::kIllegalInstruction,
+                /*epc=*/inst->address(), inst);
+    return;
+  }
+
+  // Read the narrow source.
+  auto vs1 = static_cast<RV32VectorSourceOperand *>(inst->Source(0));
+  auto vs3 = static_cast<RV32VectorSourceOperand *>(inst->Source(2));
+  AccArrayTemplate<std::array<uint8_t, kVectorLenInByte>> vec_narrow;
+  for (int vec_idx = 0; vec_idx < vec_narrow.size(); ++vec_idx) {
+    auto source_span = vs1->GetRegister(vec_idx)->data_buffer()->Get<uint8_t>();
+    for (int j = 0; j < vec_narrow[vec_idx].size(); ++j) {
+      vec_narrow[vec_idx][j] = source_span[j];
+    }
+  }
+
+  // Prepare the accumulator.
+  auto accumulator = state->acc_register();
+
+  // Convert the biases to 9-bit signed values.
+  int32_t sbias1 = (static_cast<int32_t>(conv_cmd.sbias1) << 23) >> 23;
+  int32_t sbias2 = (static_cast<int32_t>(conv_cmd.sbias2) << 23) >> 23;
+
+  // Multiply-Accumulate of conv(8x32xi8,  8x32xi8) -> 8x8xi32.
+  // Internally they are broken into 4 groups to for accumulation to handle the
+  // double-widening data without extra interleaving steps. Also, the operation
+  // has both im2col and matmul in one shot (image data in `vs1`, filter/kernel
+  // in `vs3`), so for the typical matmul, the input re-shuffling is required.
+  //
+  // Note the output of this op CANNOT be used directly, because it is still
+  // in the double-widening format. It is expected to be followed by some
+  // double-reduction instructions to read the 8-bit data back in order.
+  constexpr int kInterleave[] = {0, 2, 1, 3};  // (ee, oe, eo, oo)
+  constexpr int kQuadBase = 4;                 // For double-widening.
+  constexpr int kQuadMask = kQuadBase - 1;
+  for (int k = conv_cmd.start; k <= conv_cmd.stop; ++k) {
+    auto wide_source_span =
+        vs3->GetRegister(k - conv_cmd.start)->data_buffer()->Get<uint8_t>();
+    for (int i = 0; i < vec_narrow.size(); ++i) {
+      for (int j = 0; j < wide_source_span.size(); ++j) {
+        // data1 (narrow) is transposed and broadcasted.
+        uint8_t n = vec_narrow[i][kQuadBase * k + (j & kQuadMask)];
+        int32_t sdata1 = conv_cmd.sdata1 ? static_cast<int8_t>(n) : n;
+        uint8_t w = wide_source_span[j];
+        int32_t sdata2 = conv_cmd.sdata2 ? static_cast<int8_t>(w) : w;
+        const int rbase = i & ~kQuadMask;
+        const int rquad = i & kQuadMask;
+        const int word = j / kQuadBase;
+        const int idx_i = rbase + kInterleave[word & kQuadMask];
+        const int idx_j =
+            rquad * (accumulator.size() / kQuadBase) + (word / kQuadBase);
+        accumulator[idx_i][idx_j] += (sdata1 + sbias1) * (sdata2 + sbias2);
+      }
+    }
+  }
+
+  // Write the results back to the accumulation register
+  for (int i = 0; i < state->acc_register().size(); ++i) {
+    auto acc_array = state->acc_vec(i);
+    *acc_array = accumulator[i];
+  }
+}
+
+}  // namespace kelvin::sim
diff --git a/sim/kelvin_vector_convolution_instructions.h b/sim/kelvin_vector_convolution_instructions.h
new file mode 100644
index 0000000..6d08f98
--- /dev/null
+++ b/sim/kelvin_vector_convolution_instructions.h
@@ -0,0 +1,27 @@
+#ifndef SIM_KELVIN_VECTOR_CONVOLUTION_INSTRUCTIONS_H_
+#define SIM_KELVIN_VECTOR_CONVOLUTION_INSTRUCTIONS_H_
+
+#include <cstdint>
+
+#include "mpact/sim/generic/instruction.h"
+
+namespace kelvin::sim {
+
+using mpact::sim::generic::Instruction;
+
+// Command structure for the convolution instruction.
+typedef struct KelvinVConvCmd {
+  uint32_t mode : 2;    // 31:30
+  uint32_t start : 5;   // 29:25
+  uint32_t stop : 5;    // 24:20
+  uint32_t sbias1 : 9;  // 19:11
+  uint32_t sdata1 : 1;  // 10
+  uint32_t sbias2 : 9;  // 9:1
+  uint32_t sdata2 : 1;  // 0
+} vconv_cmd_t;
+
+void KelvinVConv(Instruction *inst);
+
+}  // namespace kelvin::sim
+
+#endif  // SIM_KELVIN_VECTOR_CONVOLUTION_INSTRUCTIONS_H_
diff --git a/sim/test/BUILD b/sim/test/BUILD
index 41d756f..73a0b56 100644
--- a/sim/test/BUILD
+++ b/sim/test/BUILD
@@ -111,6 +111,27 @@
 )
 
 cc_test(
+    name = "kelvin_vector_convolution_instructions_test",
+    srcs = [
+        "kelvin_vector_convolution_instructions_test.cc",
+        "testfiles/kelvin_vector_convolution_testdata.h",
+    ],
+    copts = [
+        "-Werror",
+        "-Wvla-extension",
+    ],
+    deps = [
+        ":kelvin_vector_instructions_test_base",
+        "//sim:kelvin_instructions",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
+        "@com_google_googletest//:gtest_main",
+        "@com_google_mpact-riscv//riscv:riscv_state",
+        "@com_google_mpact-sim//mpact/sim/generic:instruction",
+    ],
+)
+
+cc_test(
     name = "kelvin_vector_memory_instructions_test",
     srcs = ["kelvin_vector_memory_instructions_test.cc"],
     copts = [
diff --git a/sim/test/kelvin_vector_convolution_instructions_test.cc b/sim/test/kelvin_vector_convolution_instructions_test.cc
new file mode 100644
index 0000000..d02aef2
--- /dev/null
+++ b/sim/test/kelvin_vector_convolution_instructions_test.cc
@@ -0,0 +1,164 @@
+#include "sim/kelvin_vector_convolution_instructions.h"
+
+#include <array>
+#include <cstdint>
+#include <cstring>
+#include <functional>
+#include <vector>
+
+#include "sim/test/kelvin_vector_instructions_test_base.h"
+#include "sim/test/testfiles/kelvin_vector_convolution_testdata.h"
+#include "googletest/include/gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "absl/types/span.h"
+#include "riscv/riscv_state.h"
+#include "mpact/sim/generic/instruction.h"
+
+namespace {
+
+using mpact::sim::generic::Instruction;
+
+// Semantic functions.
+using kelvin::sim::KelvinVConv;
+
+class KelvinVectorConvolutionInstructionsTest
+    : public kelvin::sim::test::KelvinVectorInstructionsTestBase {
+ protected:
+  void ConvolutionTestHelper(const kelvin::sim::vconv_cmd_t vconv_cmd,
+                             bool expect_fail = false) {
+    constexpr int kVs1 = 0;
+    constexpr int kVs3 = 16;
+    constexpr int kVd = 48;
+    const uint32_t kVLenInByte = state_->vector_length() / 8;
+    const uint32_t kVLenInWord = state_->vector_length() / 32;
+    // Set vs1 and vs3
+    std::vector<uint8_t> vs1_value(kVLenInWord * kVLenInByte);
+    auto vs1_span = absl::Span<uint8_t>(vs1_value);
+    memcpy(vs1_span.data(), kVConvIn1, sizeof(kVConvIn1));
+    std::vector<uint8_t> vs3_value(kVLenInWord * kVLenInByte);
+    auto vs3_span = absl::Span<uint8_t>(vs3_value);
+    memcpy(vs3_span.data(), kVConvIn2, sizeof(kVConvIn2));
+    for (int i = 0; i < kVLenInWord; ++i) {
+      auto vs1_name = absl::StrCat("v", kVs1 + i);
+      auto vs3_name = absl::StrCat("v", kVs3 + i);
+      SetVectorRegisterValues<uint8_t>(
+          {{vs1_name, vs1_span.subspan(i * kVLenInByte, kVLenInByte)},
+           {vs3_name, vs3_span.subspan(i * kVLenInByte, kVLenInByte)}});
+    }
+    uint32_t vconv_cmd_value;
+    memcpy(&vconv_cmd_value, &vconv_cmd, sizeof(vconv_cmd_value));
+    SetRegisterValues<uint32_t>({{kelvin::sim::test::kRs2Name,
+                                  static_cast<uint32_t>(vconv_cmd_value)}});
+
+    // Reset accumulation register
+    for (int i = 0; i < kVLenInWord; ++i) {
+      auto acc_vec = state_->acc_vec(i);
+      acc_vec->fill(0);
+    }
+
+    // Call VConv twice with the swapped vs1 and vs3
+    std::array<InstructionPtr, 2> instructions = {CreateInstruction(),
+                                                  CreateInstruction()};
+    instructions[0]->set_semantic_function(KelvinVConv);
+    AppendVectorRegisterOperands(instructions[0].get(), kVLenInWord,
+                                 1 /* src1_widen_factor*/, kVs1, {},
+                                 false /* widen_dst*/, {kVd});
+    AppendRegisterOperands(instructions[0].get(), {kelvin::sim::test::kRs2Name},
+                           {});
+    AppendVectorRegisterOperands(instructions[0].get(), kVLenInWord,
+                                 1 /* src3_widen_factor*/, kVs3, {},
+                                 false /* widen_dst*/, {});
+
+    instructions[1]->set_semantic_function(KelvinVConv);
+    AppendVectorRegisterOperands(instructions[1].get(), kVLenInWord,
+                                 1 /* src1_widen_factor*/, kVs3, {},
+                                 false /* widen_dst*/, {kVd});
+    AppendRegisterOperands(instructions[1].get(), {kelvin::sim::test::kRs2Name},
+                           {});
+    AppendVectorRegisterOperands(instructions[1].get(), kVLenInWord,
+                                 1 /* src3_widen_factor*/, kVs1, {},
+                                 false /* widen_dst*/, {});
+    execution_fail_ = false;
+    state_->set_on_trap(trap_call_back_);
+    instructions[0]->Execute();
+    if (expect_fail) {
+      EXPECT_TRUE(execution_fail_);
+      return;
+    }
+    instructions[1]->Execute();
+    EXPECT_FALSE(execution_fail_);
+    auto result_acc = state_->acc_register();
+    for (int i = 0; i < result_acc.size(); ++i) {
+      for (int j = 0; j < result_acc[i].size(); ++j) {
+        EXPECT_EQ(result_acc[i][j], kVConvOutRef[i][j])
+            << absl::StrCat("acc[", i, "][", j, "] != Ref[", i, "][", j, "]");
+      }
+    }
+  }
+
+ private:
+  bool execution_fail_;
+  std::function<bool(bool, uint64_t, uint64_t, uint64_t, const Instruction *)>
+      trap_call_back_ = [this](bool is_interrupt, uint64_t trap_value,
+                               uint64_t exception_code, uint64_t epc,
+                               const Instruction *instruction) {
+        auto code =
+            static_cast<mpact::sim::riscv::ExceptionCode>(exception_code);
+        if (code == mpact::sim::riscv::ExceptionCode::kIllegalInstruction) {
+          this->execution_fail_ = true;
+          return true;
+        }
+        return false;
+      };
+};
+
+TEST_F(KelvinVectorConvolutionInstructionsTest, VConv) {
+  // Set the convolution to have 8 filters (starting from index 0), with the
+  // data bias of 86 (unsigned) and the filter bias of 188 (signed).
+  kelvin::sim::vconv_cmd_t vconv_cmd{.mode = 0,
+                                     .start = 0,
+                                     .stop = 7,
+                                     .sbias1 = 86,
+                                     .sdata1 = false,
+                                     .sbias2 = 188,
+                                     .sdata2 = true};
+  ConvolutionTestHelper(vconv_cmd);
+}
+
+TEST_F(KelvinVectorConvolutionInstructionsTest, VConvWrongMode) {
+  // Set the convolution to work on 16-bit input/filter (illegal setting).
+  kelvin::sim::vconv_cmd_t vconv_cmd{.mode = 1,
+                                     .start = 0,
+                                     .stop = 7,
+                                     .sbias1 = 86,
+                                     .sdata1 = false,
+                                     .sbias2 = 188,
+                                     .sdata2 = true};
+  ConvolutionTestHelper(vconv_cmd, true);
+}
+
+TEST_F(KelvinVectorConvolutionInstructionsTest, VConvTooLargeStop) {
+  // Set the convolution to work on 9 filters (too many filters).
+  kelvin::sim::vconv_cmd_t vconv_cmd{.mode = 0,
+                                     .start = 0,
+                                     .stop = 8,
+                                     .sbias1 = 86,
+                                     .sdata1 = false,
+                                     .sbias2 = 188,
+                                     .sdata2 = true};
+  ConvolutionTestHelper(vconv_cmd, true);
+}
+
+TEST_F(KelvinVectorConvolutionInstructionsTest, VConvWrongStop) {
+  // Set the convolution to start from filter 7 and to stop at filter 5 (reverse
+  // order).
+  kelvin::sim::vconv_cmd_t vconv_cmd{.mode = 0,
+                                     .start = 7,
+                                     .stop = 5,
+                                     .sbias1 = 86,
+                                     .sdata1 = false,
+                                     .sbias2 = 188,
+                                     .sdata2 = true};
+  ConvolutionTestHelper(vconv_cmd, true);
+}
+}  // namespace
diff --git a/sim/test/testfiles/kelvin_vector_convolution_testdata.h b/sim/test/testfiles/kelvin_vector_convolution_testdata.h
new file mode 100644
index 0000000..0cb2779
--- /dev/null
+++ b/sim/test/testfiles/kelvin_vector_convolution_testdata.h
@@ -0,0 +1,146 @@
+// Test input/reference vectors for the vector convolution instruction
+// functions.
+#ifndef SIM_TEST_TESTFILES_KELVIN_VECTOR_CONVOLUTION_TESTDATA_H_
+#define SIM_TEST_TESTFILES_KELVIN_VECTOR_CONVOLUTION_TESTDATA_H_
+
+#include <cstdint>
+
+// clang-format off
+constexpr uint8_t kVConvIn1[8][32] = {
+    {
+        0xea, 0xe6, 0xaa, 0xb0, 0x5e, 0x46, 0x43, 0x2c,
+        0x58, 0x69, 0xd4, 0x25, 0xf0, 0xe9, 0x74, 0xd5,
+        0x34, 0x16, 0x4c, 0x86, 0xa8, 0x0c, 0xac, 0xa8,
+        0x9f, 0x99, 0x5d, 0xe0, 0x1a, 0x93, 0x65, 0x88,
+    },
+    {
+        0x4a, 0x3a, 0x57, 0x8b, 0x50, 0x6c, 0x1b, 0x37,
+        0x76, 0x8c, 0x72, 0x55, 0xb3, 0xce, 0xf3, 0x50,
+        0x74, 0x51, 0xb2, 0xb9, 0xb9, 0x76, 0xc4, 0x94,
+        0x29, 0x52, 0x16, 0xa8, 0x68, 0xa7, 0x8e, 0xe2,
+    },
+    {
+        0x61, 0xe8, 0xb8, 0xae, 0x43, 0x0a, 0xbe, 0xfa,
+        0x0b, 0x32, 0x7a, 0x92, 0x44, 0x3a, 0x60, 0xd3,
+        0x11, 0xc0, 0xb3, 0x8c, 0x7e, 0x5b, 0x9c, 0xe9,
+        0x03, 0x94, 0x7f, 0x10, 0x38, 0xd5, 0xc6, 0xeb,
+    },
+    {
+        0x5c, 0x79, 0x44, 0xe9, 0xfb, 0x32, 0x00, 0xf0,
+        0x79, 0xed, 0x92, 0x77, 0x93, 0x45, 0xc3, 0x63,
+        0x33, 0x7e, 0x07, 0xc5, 0x07, 0x76, 0x9d, 0xf0,
+        0x4e, 0x6a, 0x67, 0xcc, 0xca, 0xab, 0xd4, 0x24,
+    },
+    {
+        0x82, 0x86, 0xbb, 0xd9, 0x4b, 0xda, 0x2e, 0xbb,
+        0x8d, 0xbb, 0x82, 0x0e, 0x53, 0xaa, 0xb8, 0xfa,
+        0xa1, 0x75, 0x6e, 0x4a, 0x3c, 0xba, 0xa1, 0x88,
+        0x88, 0x6c, 0x74, 0x5b, 0x09, 0x83, 0x98, 0xdc,
+    },
+    {
+        0x91, 0x9a, 0x9f, 0xcd, 0x87, 0xd7, 0x74, 0x90,
+        0xa5, 0x25, 0xcf, 0x56, 0x5d, 0x41, 0x56, 0xc2,
+        0x0d, 0xdd, 0xd8, 0x2c, 0x59, 0x1d, 0x1c, 0x66,
+        0x06, 0xe9, 0xd3, 0x51, 0x83, 0x16, 0x65, 0x56,
+    },
+    {
+        0xf6, 0x95, 0x7c, 0x47, 0xf5, 0x56, 0x53, 0x58,
+        0x87, 0x9c, 0xde, 0xac, 0x76, 0x78, 0x71, 0x86,
+        0x5c, 0xdb, 0x5f, 0x0d, 0xc4, 0x5f, 0x48, 0x61,
+        0x48, 0x6f, 0x77, 0x26, 0x68, 0xf1, 0x39, 0x58,
+    },
+    {
+        0x32, 0x36, 0x68, 0x29, 0x67, 0xb8, 0x7c, 0xdd,
+        0xb9, 0x17, 0xb0, 0xec, 0x2e, 0xcc, 0xa5, 0x54,
+        0x76, 0x56, 0xc8, 0x0b, 0x77, 0xa1, 0xef, 0xf5,
+        0xcf, 0xd6, 0x84, 0x7a, 0x84, 0x0f, 0x4e, 0x16,
+    },
+};
+
+constexpr uint8_t kVConvIn2[8][32] = {
+    {
+        0x33, 0xc9, 0xb8, 0xa1, 0xea, 0x2d, 0x2c, 0x18,
+        0x92, 0x98, 0x8e, 0x19, 0xf2, 0xd8, 0x55, 0x92,
+        0x26, 0x4a, 0x8d, 0x3e, 0xb0, 0x01, 0x81, 0x6c,
+        0x3e, 0xcc, 0x8f, 0x9b, 0xf9, 0xde, 0x94, 0x9e,
+    },
+    {
+        0x7f, 0xa9, 0xa8, 0x3a, 0xa3, 0xef, 0xb7, 0xb5,
+        0x44, 0x93, 0xa1, 0xf7, 0x09, 0x7b, 0xb6, 0x6f,
+        0x98, 0x9e, 0xaa, 0x60, 0xb4, 0xe2, 0x9b, 0x93,
+        0x4b, 0x8f, 0xa7, 0xe4, 0x96, 0xe6, 0xcd, 0x93,
+    },
+    {
+        0x63, 0xfd, 0x8e, 0xd2, 0xfd, 0xe0, 0x13, 0x5b,
+        0xd7, 0x5f, 0xa0, 0x56, 0x02, 0x29, 0x4e, 0xfa,
+        0x9b, 0x30, 0xa5, 0xdb, 0xb4, 0xe7, 0xb9, 0x52,
+        0x05, 0xda, 0x57, 0xa8, 0xbd, 0x65, 0xfe, 0xf0,
+    },
+    {
+        0x1c, 0x22, 0x48, 0x3f, 0x5c, 0xae, 0x08, 0x8c,
+        0x40, 0xd4, 0x97, 0xeb, 0xb1, 0x92, 0x50, 0xd4,
+        0x66, 0xac, 0x58, 0x20, 0x3c, 0x92, 0xc0, 0x5c,
+        0x50, 0x89, 0x42, 0x93, 0x7b, 0x8b, 0x0a, 0x33,
+    },
+    {
+        0x3e, 0x98, 0x0c, 0x1a, 0xcd, 0x6c, 0xd5, 0x26,
+        0xad, 0x94, 0xd0, 0x6a, 0xbd, 0x19, 0x02, 0x42,
+        0xc1, 0x69, 0x20, 0x94, 0xc2, 0x74, 0xb7, 0xbf,
+        0x9f, 0x45, 0xd5, 0x6f, 0x22, 0x92, 0xbe, 0x88,
+    },
+    {
+        0x4c, 0xbc, 0xed, 0x2f, 0x9e, 0xe5, 0x27, 0xf9,
+        0x0e, 0xd3, 0xb5, 0x74, 0x83, 0x5f, 0x5a, 0xa2,
+        0xaf, 0x0f, 0x49, 0x0a, 0xe3, 0x1b, 0xc7, 0xd1,
+        0xd1, 0x51, 0xa9, 0x86, 0x75, 0xb3, 0xbc, 0xe9,
+    },
+    {
+        0x9f, 0x2a, 0xe9, 0xfc, 0x4f, 0x10, 0x57, 0x7d,
+        0x14, 0xec, 0x5f, 0x39, 0xa3, 0x0e, 0x92, 0x62,
+        0x86, 0xad, 0xae, 0x38, 0x95, 0x7f, 0xde, 0x30,
+        0x9d, 0xbf, 0xe5, 0xb2, 0xb3, 0xb6, 0xf1, 0x42,
+    },
+    {
+        0x0c, 0xd7, 0xd4, 0xa8, 0xae, 0x1a, 0xe6, 0x3c,
+        0xf0, 0xd7, 0x4f, 0x36, 0x37, 0xeb, 0x1d, 0xe9,
+        0xc7, 0xe4, 0x82, 0xe4, 0x44, 0x1c, 0x6a, 0xa1,
+        0x96, 0xfe, 0x2e, 0x9d, 0x36, 0xb4, 0x1c, 0x03,
+    },
+};
+
+constexpr uint32_t kVConvOutRef[8][8] = {
+    {
+        0x002706b4, 0x00284ddd, 0x00279af7, 0x00293b6a,
+        0x00265a51, 0x0028f490, 0x0024222a, 0x0025a738,
+    },
+    {
+        0x00260af1, 0x002687fd, 0x0026d33e, 0x0028700b,
+        0x002701d0, 0x00273546, 0x0025002d, 0x00254eaa,
+    },
+    {
+        0x002b2935, 0x00262aea, 0x002ca80a, 0x00275a99,
+        0x002cc3e7, 0x0027e665, 0x00288ed7, 0x00254319,
+    },
+    {
+        0x0028690a, 0x0023f3b1, 0x002aed22, 0x0025a60b,
+        0x002ad4e5, 0x0024a4c3, 0x00283b5b, 0x0021ce1e,
+    },
+    {
+        0x0025e1a3, 0x002700e9, 0x00255faa, 0x00270fbc,
+        0x00257a3b, 0x0026dd70, 0x0025c9d7, 0x0026fcc3,
+    },
+    {
+        0x0025a025, 0x00258f27, 0x0024d04f, 0x00261465,
+        0x00244b49, 0x00269500, 0x0025ddb5, 0x0025ca26,
+    },
+    {
+        0x00299efe, 0x0025a702, 0x002a766b, 0x00269ec7,
+        0x002acd15, 0x0025366f, 0x002a4de2, 0x002568bf,
+    },
+    {
+        0x0029429d, 0x0023c5ba, 0x002a03f7, 0x00240475,
+        0x0028fac3, 0x0023f507, 0x0029f7e5, 0x0023008b,
+    },
+};
+// clang-format on
+#endif  // SIM_TEST_TESTFILES_KELVIN_VECTOR_CONVOLUTION_TESTDATA_H_