Support for segmented loads.
Change-Id: Iabe9a16f4a44e5c3815a1af994d3c6a66cb1a0cd
diff --git a/hdl/chisel/src/kelvin/scalar/Decode.scala b/hdl/chisel/src/kelvin/scalar/Decode.scala
index 8b0715d..5f93edc 100644
--- a/hdl/chisel/src/kelvin/scalar/Decode.scala
+++ b/hdl/chisel/src/kelvin/scalar/Decode.scala
@@ -636,6 +636,7 @@
io.lsu(i).bits.pc := io.inst(i).bits.addr
if (p.enableRvv) {
io.lsu(i).bits.elemWidth.get := io.inst(i).bits.inst(14,12)
+ io.lsu(i).bits.nfields.get := io.inst(i).bits.inst(31,29)
}
// -------------------------------------------------------------------------
@@ -1105,6 +1106,7 @@
io.lsu.bits.pc := io.inst.bits.addr
if (p.enableRvv) {
io.lsu.bits.elemWidth.get := io.inst.bits.inst(14,12)
+ io.lsu.bits.nfields.get := io.inst.bits.inst(31,29)
}
// MLU opcode.
diff --git a/hdl/chisel/src/kelvin/scalar/Lsu.scala b/hdl/chisel/src/kelvin/scalar/Lsu.scala
index 94bd30b..e3e390f 100644
--- a/hdl/chisel/src/kelvin/scalar/Lsu.scala
+++ b/hdl/chisel/src/kelvin/scalar/Lsu.scala
@@ -140,10 +140,11 @@
val op = LsuOp()
val pc = UInt(32.W)
val elemWidth = Option.when(p.enableRvv) { UInt(3.W) }
+ val nfields = Option.when(p.enableRvv) { UInt(3.W) }
override def toPrintable: Printable = {
cf"LsuCmd(store -> ${store}, addr -> 0x${addr}%x, op -> ${op}, " +
- cf"pc -> 0x${pc}%x, elemWidth -> ${elemWidth})"
+ cf"pc -> 0x${pc}%x, elemWidth -> ${elemWidth}, nfields -> ${nfields})"
}
}
@@ -156,6 +157,7 @@
val data = UInt(32.W) // Doubles as rs2
val elemWidth = Option.when(p.enableRvv) { UInt(3.W) }
val lmul = Option.when(p.enableRvv) { UInt(4.W) }
+ val nfields = Option.when(p.enableRvv) { UInt(3.W) }
override def toPrintable: Printable = {
cf"LsuUOp(store -> ${store}, rd -> ${rd}, op -> ${op}, " +
@@ -185,6 +187,7 @@
}
if (p.enableRvv) {
result.elemWidth.get := cmd.elemWidth.get
+ result.nfields.get := cmd.nfields.get
// Treat fractional LMULs as LMUL=1
val effectiveLmul = Mux(rvvState.get.bits.lmul(2),
0.U(2.W), rvvState.get.bits.lmul(1, 0))
@@ -253,14 +256,18 @@
val pendingVector = Bool()
val pendingWriteback = Bool()
val lmul = UInt(4.W)
- val stride = UInt(32.W)
+ val elemStride = UInt(32.W) // Stride between lanes in a vector
+ val segmentStride = UInt(32.W) // Stride between base addr between segments
val elemWidth = UInt(3.W)
+ val nfields = UInt(3.W)
+ val segment = UInt(3.W)
// If the slot has no pending tasks and can accept a new operation
def slotIdle(): Bool = {
!(pendingVector || // Awaiting data from RVV Core
active.reduce(_||_) || // Active transaction
pendingWriteback || // Send result back to regfile
+ (nfields =/= segment) || // No pending segments
(LsuOp.isVector(op) && (lmul =/= 0.U)) // More operations in progress
)
}
@@ -298,17 +305,20 @@
result.pendingWriteback := pendingWriteback
result.lmul := lmul
result.baseAddr := baseAddr
- result.stride := stride
+ result.elemStride := elemStride
+ result.segmentStride := segmentStride
+
+
+ val segmentBaseAddr = baseAddr + (segmentStride * segment)
result.addrs := MuxCase(addrs, Seq(
op.isOneOf(LsuOp.VLOAD_UNIT, LsuOp.VSTORE_UNIT) ->
- VecInit((0 until bytesPerSlot).map(i => baseAddr + i.U)),
+ ComputeStridedAddrs(bytesPerSlot, segmentBaseAddr, elemStride, elemWidth),
op.isOneOf(LsuOp.VLOAD_STRIDED, LsuOp.VSTORE_STRIDED) ->
- ComputeStridedAddrs(bytesPerSlot, baseAddr, stride, elemWidth),
+ ComputeStridedAddrs(bytesPerSlot, segmentBaseAddr, elemStride, elemWidth),
op.isOneOf(LsuOp.VLOAD_OINDEXED, LsuOp.VLOAD_UINDEXED,
LsuOp.VSTORE_OINDEXED, LsuOp.VSTORE_UINDEXED) ->
ComputeIndexedAddrs(bytesPerSlot, baseAddr, rvv2lsu.idx.bits.data,
elemWidth)
- // TODO(derekjchow): Support segmented
))
result.elemWidth := elemWidth
@@ -317,6 +327,9 @@
result.active := Mux(updated && LsuOp.isVector(op) && rvv2lsu.mask.valid,
VecInit(rvv2lsu.mask.bits.asBools), active)
result.pendingVector := pendingVector && !updated
+ result.nfields := nfields
+ result.segment := segment
+
result
}
@@ -349,8 +362,11 @@
i => active(i) & ~lineActive(i))
result.data := VecInit((0 until bytesPerSlot).map(
i => Mux(lineActive(i), gatheredData(i), data(i))))
- result.stride := stride
+ result.elemStride := elemStride
+ result.segmentStride := segmentStride
result.elemWidth := elemWidth
+ result.nfields := nfields
+ result.segment := segment
result
}
@@ -370,35 +386,51 @@
result.addrs := addrs
result.active := active
result.data := data
- result.stride := stride
+ result.elemStride := elemStride
+ result.segmentStride := segmentStride
result.elemWidth := elemWidth
+ result.nfields := nfields
val vectorWriteback = writeback && LsuOp.isVector(op)
+
+ val segmentNext = MuxCase(segment, Seq(
+ // Final segment, No next LMUL: don't reset segment
+ (vectorWriteback && (segment === nfields) && (lmul === 1.U)) -> segment,
+ // Final segment, next LMUL: reset segment
+ (vectorWriteback && (segment === nfields)) -> 0.U,
+ // Next segment
+ vectorWriteback -> (segment + 1.U),
+ ))
+ result.segment := segmentNext
+
+ val lmulUpdate = vectorWriteback && (segment === nfields)
val lmulNext = MuxCase(lmul, Seq(
+ // Don't decrease below 0!
(lmul === 0.U) -> 0.U,
- vectorWriteback -> (lmul - 1.U),
+ // Move to next LMUL if final segment
+ (lmulUpdate) -> (lmul - 1.U),
))
result.lmul := lmulNext
result.pendingVector := MuxCase(pendingVector, Seq(
(!writeback) -> pendingWriteback,
- (!LsuOp.isVector(op)) -> false.B, // No vector update for non-vector
- (lmulNext =/= 0.U) -> true.B, // Next LMUL
- (lmulNext === 0.U) -> false.B, // Final LMUL
+ (!LsuOp.isVector(op)) -> false.B, // No vector update for non-vector
+ (lmulNext =/= 0.U) -> true.B, // Next LMUL
+ (lmulNext === 0.U) -> false.B, // Final LMUL
))
result.pendingWriteback := MuxCase(pendingWriteback, Seq(
(!writeback) -> pendingWriteback,
- (!LsuOp.isVector(op)) -> false.B, // One writeback for non-vector ops
- (lmulNext =/= 0.U) -> true.B, // Next LMUL
- (lmulNext === 0.U) -> false.B, // Final LMUL
+ (!LsuOp.isVector(op)) -> false.B, // One writeback for non-vector ops
+ (lmulNext =/= 0.U) -> true.B, // Next LMUL
+ (lmulNext === 0.U) -> false.B, // Final LMUL
))
result.baseAddr := MuxCase(baseAddr, Seq(
- !writeback -> baseAddr,
- // For Unit Updates
- op.isOneOf(LsuOp.VLOAD_UNIT, LsuOp.VSTORE_UNIT) -> (baseAddr + 16.U),
- // For Strided Updates
- op.isOneOf(LsuOp.VLOAD_STRIDED, LsuOp.VSTORE_STRIDED) ->
- (baseAddr + (stride*16.U)),
+ (!writeback || !lmulUpdate) -> baseAddr,
+ // For Unit and strided updates
+ op.isOneOf(LsuOp.VLOAD_UNIT, LsuOp.VSTORE_UNIT,
+ LsuOp.VLOAD_STRIDED, LsuOp.VSTORE_STRIDED) ->
+ (baseAddr + (nfields * 16.U) + 16.U),
+ // Indexed don't have base addr changed.
))
result.rd := rd + 1.U // Move to next vector register
@@ -427,8 +459,11 @@
result.addrs := addrs
result.data := data
result.lmul := lmul
- result.stride := stride
+ result.elemStride := elemStride
+ result.segmentStride := segmentStride
result.elemWidth := elemWidth
+ result.nfields := nfields
+ result.segment := segment
result
}
@@ -456,6 +491,8 @@
cf" $i: ${active(i)}, 0x${addrs(i)}%x, 0x${data(i)}%x\n")
cf"store: $store\n op: ${op}\n pendingVector: ${pendingVector}\n" +
cf" pendingWriteback: ${pendingWriteback}\n lmul: ${lmul}\n" +
+ cf" nfields: ${nfields}\n segment: ${segment}\n" +
+ cf" elemWidth: 0b${elemWidth}%b elemStride: ${elemStride}\n" +
lines.reduce(_+_)
}
}
@@ -471,7 +508,13 @@
result.rd := uop.rd
result.store := uop.store
result.pc := uop.pc
- result.lmul := uop.lmul.getOrElse(0.U)
+ if (p.enableRvv) {
+ result.lmul := uop.lmul.getOrElse(0.U)
+ result.nfields := Mux(LsuOp.isVector(uop.op), uop.nfields.get, 0.U)
+ result.segment := 0.U
+ } else {
+ result.lmul := 0.U
+ }
// All vector ops require writeback. Lsu needs to inform RVV core store uop
// has completed.
@@ -494,7 +537,18 @@
uop.op.isOneOf(LsuOp.VLOAD_STRIDED, LsuOp.VSTORE_STRIDED),
ComputeStridedAddrs(bytesPerSlot, uop.addr, uop.data, uop.elemWidth.getOrElse(0.U(3.W))),
VecInit((0 until bytesPerSlot).map(i => uop.addr + i.U)))
- result.stride := uop.data
+
+ val unitStride = MuxCase(1.U, Seq(
+ (uop.elemWidth.get === "b000".U) -> 1.U, // 1-byte elements
+ (uop.elemWidth.get === "b101".U) -> 2.U, // 2-byte elements
+ (uop.elemWidth.get === "b110".U) -> 4.U, // 4-byte elements
+ ))
+
+ result.segmentStride := unitStride
+ result.elemStride := Mux(
+ uop.op.isOneOf(LsuOp.VLOAD_UNIT, LsuOp.VSTORE_UNIT),
+ unitStride + (uop.nfields.get * unitStride),
+ uop.data)
result.data(0) := uop.data(7, 0)
result.data(1) := uop.data(15, 8)
diff --git a/tests/cocotb/BUILD b/tests/cocotb/BUILD
index d274166..45f2ab4 100644
--- a/tests/cocotb/BUILD
+++ b/tests/cocotb/BUILD
@@ -316,10 +316,20 @@
],
data = [
'//tests/cocotb/rvv/load_store:load8_indexed_m1.elf',
+ '//tests/cocotb/rvv/load_store:load8_segment2_unit_m1.elf',
+ '//tests/cocotb/rvv/load_store:load16_segment2_unit_m1.elf',
+ '//tests/cocotb/rvv/load_store:load32_segment2_unit_m1.elf',
+ '//tests/cocotb/rvv/load_store:load8_segment2_unit_m2.elf',
+ '//tests/cocotb/rvv/load_store:load16_segment2_unit_m2.elf',
+ '//tests/cocotb/rvv/load_store:load32_segment2_unit_m2.elf',
+ '//tests/cocotb/rvv/load_store:load8_segment2_stride6_m1.elf',
+ '//tests/cocotb/rvv/load_store:load16_segment2_stride6_m1.elf',
'//tests/cocotb/rvv/load_store:load8_stride2_m1.elf',
'//tests/cocotb/rvv/load_store:load8_stride2_m1_partial.elf',
'//tests/cocotb/rvv/load_store:load8_stride2_mf4.elf',
'//tests/cocotb/rvv/load_store:load_store8_unit_m2.elf',
+ '//tests/cocotb/rvv/load_store:load_store16_unit_m2.elf',
+ '//tests/cocotb/rvv/load_store:load_store32_unit_m2.elf',
'//tests/cocotb/rvv/load_store:load16_stride4_m1.elf',
'//tests/cocotb/rvv/load_store:load16_stride4_m1_partial.elf',
'//tests/cocotb/rvv/load_store:load16_stride4_mf2.elf',
diff --git a/tests/cocotb/rvv/load_store/BUILD b/tests/cocotb/rvv/load_store/BUILD
index d19281c..2664908 100644
--- a/tests/cocotb/rvv/load_store/BUILD
+++ b/tests/cocotb/rvv/load_store/BUILD
@@ -23,6 +23,30 @@
"load8_indexed_m1": {
"srcs": ["load8_indexed_m1.cc"],
},
+ "load8_segment2_unit_m1": {
+ "srcs": ["load8_segment2_unit_m1.cc"],
+ },
+ "load16_segment2_unit_m1": {
+ "srcs": ["load16_segment2_unit_m1.cc"],
+ },
+ "load32_segment2_unit_m1": {
+ "srcs": ["load32_segment2_unit_m1.cc"],
+ },
+ "load8_segment2_unit_m2": {
+ "srcs": ["load8_segment2_unit_m2.cc"],
+ },
+ "load16_segment2_unit_m2": {
+ "srcs": ["load16_segment2_unit_m2.cc"],
+ },
+ "load32_segment2_unit_m2": {
+ "srcs": ["load32_segment2_unit_m2.cc"],
+ },
+ "load8_segment2_stride6_m1": {
+ "srcs": ["load8_segment2_stride6_m1.cc"],
+ },
+ "load16_segment2_stride6_m1": {
+ "srcs": ["load16_segment2_stride6_m1.cc"],
+ },
"load8_stride2_m1": {
"srcs": ["load8_stride2_m1.cc"],
},
@@ -35,6 +59,12 @@
"load_store8_unit_m2": {
"srcs": ["load_store8_unit_m2.cc"],
},
+ "load_store16_unit_m2": {
+ "srcs": ["load_store16_unit_m2.cc"],
+ },
+ "load_store32_unit_m2": {
+ "srcs": ["load_store32_unit_m2.cc"],
+ },
"load16_stride4_m1": {
"srcs": ["load16_stride4_m1.cc"],
},
diff --git a/tests/cocotb/rvv/load_store/load16_segment2_stride6_m1.cc b/tests/cocotb/rvv/load_store/load16_segment2_stride6_m1.cc
new file mode 100644
index 0000000..97e461e
--- /dev/null
+++ b/tests/cocotb/rvv/load_store/load16_segment2_stride6_m1.cc
@@ -0,0 +1,35 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+uint16_t in_buf[128] __attribute__((section(".data")));
+uint16_t out_buf[32] __attribute__((section(".data")));
+
+__attribute__((used, retain)) void test_intrinsic(const uint16_t *x,
+ uint16_t *y) {
+ vuint16m1x2_t v = __riscv_vlsseg2e16_v_u16m1x2(in_buf, 6, 8);
+
+ vuint16m2_t vv = __riscv_vcreate_v_u16m1_u16m2(
+ __riscv_vget_v_u16m1x2_u16m1(v, 0),
+ __riscv_vget_v_u16m1x2_u16m1(v, 1));
+
+ __riscv_vse16_v_u16m2(y, vv, /*vl=*/16);
+}
+
+int main(int argc, char **argv) {
+ test_intrinsic(in_buf, out_buf);
+ return 0;
+}
diff --git a/tests/cocotb/rvv/load_store/load16_segment2_unit_m1.cc b/tests/cocotb/rvv/load_store/load16_segment2_unit_m1.cc
new file mode 100644
index 0000000..f7f8d03
--- /dev/null
+++ b/tests/cocotb/rvv/load_store/load16_segment2_unit_m1.cc
@@ -0,0 +1,36 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+// Enough space for m2.
+uint16_t in_buf[32] __attribute__((section(".data")));
+uint16_t out_buf[32] __attribute__((section(".data")));
+
+__attribute__((used, retain)) void test_intrinsic(const uint16_t *x,
+ uint16_t *y) {
+ vuint16m1x2_t v = __riscv_vlseg2e16_v_u16m1x2(in_buf, 16);
+
+ vuint16m2_t vv = __riscv_vcreate_v_u16m1_u16m2(
+ __riscv_vget_v_u16m1x2_u16m1(v, 0),
+ __riscv_vget_v_u16m1x2_u16m1(v, 1));
+
+ __riscv_vse16_v_u16m2(y, vv, /*vl=*/16);
+}
+
+int main(int argc, char **argv) {
+ test_intrinsic(in_buf, out_buf);
+ return 0;
+}
diff --git a/tests/cocotb/rvv/load_store/load16_segment2_unit_m2.cc b/tests/cocotb/rvv/load_store/load16_segment2_unit_m2.cc
new file mode 100644
index 0000000..98c2c7b
--- /dev/null
+++ b/tests/cocotb/rvv/load_store/load16_segment2_unit_m2.cc
@@ -0,0 +1,36 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+// Enough space for m4.
+uint16_t in_buf[64] __attribute__((section(".data")));
+uint16_t out_buf[64] __attribute__((section(".data")));
+
+__attribute__((used, retain)) void test_intrinsic(const uint16_t *x,
+ uint16_t *y) {
+ vuint16m2x2_t v = __riscv_vlseg2e16_v_u16m2x2(in_buf, 32);
+
+ vuint16m4_t vv = __riscv_vcreate_v_u16m2_u16m4(
+ __riscv_vget_v_u16m2x2_u16m2(v, 0),
+ __riscv_vget_v_u16m2x2_u16m2(v, 1));
+
+ __riscv_vse16_v_u16m4(y, vv, /*vl=*/32);
+}
+
+int main(int argc, char **argv) {
+ test_intrinsic(in_buf, out_buf);
+ return 0;
+}
diff --git a/tests/cocotb/rvv/load_store/load32_segment2_unit_m1.cc b/tests/cocotb/rvv/load_store/load32_segment2_unit_m1.cc
new file mode 100644
index 0000000..bfb3a7b
--- /dev/null
+++ b/tests/cocotb/rvv/load_store/load32_segment2_unit_m1.cc
@@ -0,0 +1,36 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+// Enough space for m2.
+uint32_t in_buf[16] __attribute__((section(".data")));
+uint32_t out_buf[16] __attribute__((section(".data")));
+
+__attribute__((used, retain)) void test_intrinsic(const uint32_t *x,
+ uint32_t *y) {
+ vuint32m1x2_t v = __riscv_vlseg2e32_v_u32m1x2(in_buf, 8);
+
+ vuint32m2_t vv = __riscv_vcreate_v_u32m1_u32m2(
+ __riscv_vget_v_u32m1x2_u32m1(v, 0),
+ __riscv_vget_v_u32m1x2_u32m1(v, 1));
+
+ __riscv_vse32_v_u32m2(y, vv, /*vl=*/8);
+}
+
+int main(int argc, char **argv) {
+ test_intrinsic(in_buf, out_buf);
+ return 0;
+}
diff --git a/tests/cocotb/rvv/load_store/load32_segment2_unit_m2.cc b/tests/cocotb/rvv/load_store/load32_segment2_unit_m2.cc
new file mode 100644
index 0000000..e88f774
--- /dev/null
+++ b/tests/cocotb/rvv/load_store/load32_segment2_unit_m2.cc
@@ -0,0 +1,36 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+// Enough space for m4.
+uint32_t in_buf[32] __attribute__((section(".data")));
+uint32_t out_buf[32] __attribute__((section(".data")));
+
+__attribute__((used, retain)) void test_intrinsic(const uint32_t *x,
+ uint32_t *y) {
+ vuint32m2x2_t v = __riscv_vlseg2e32_v_u32m2x2(in_buf, 16);
+
+ vuint32m4_t vv = __riscv_vcreate_v_u32m2_u32m4(
+ __riscv_vget_v_u32m2x2_u32m2(v, 0),
+ __riscv_vget_v_u32m2x2_u32m2(v, 1));
+
+ __riscv_vse32_v_u32m4(y, vv, /*vl=*/16);
+}
+
+int main(int argc, char **argv) {
+ test_intrinsic(in_buf, out_buf);
+ return 0;
+}
diff --git a/tests/cocotb/rvv/load_store/load8_segment2_stride6_m1.cc b/tests/cocotb/rvv/load_store/load8_segment2_stride6_m1.cc
new file mode 100644
index 0000000..bafc188
--- /dev/null
+++ b/tests/cocotb/rvv/load_store/load8_segment2_stride6_m1.cc
@@ -0,0 +1,35 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+uint8_t in_buf[256] __attribute__((section(".data")));
+uint8_t out_buf[64] __attribute__((section(".data")));
+
+__attribute__((used, retain)) void test_intrinsic(const uint8_t *x,
+ uint8_t *y) {
+ vuint8m1x2_t v = __riscv_vlsseg2e8_v_u8m1x2(in_buf, 6, 16);
+
+ vuint8m2_t vv = __riscv_vcreate_v_u8m1_u8m2(
+ __riscv_vget_v_u8m1x2_u8m1(v, 0),
+ __riscv_vget_v_u8m1x2_u8m1(v, 1));
+
+ __riscv_vse8_v_u8m2(y, vv, /*vl=*/32);
+}
+
+int main(int argc, char **argv) {
+ test_intrinsic(in_buf, out_buf);
+ return 0;
+}
diff --git a/tests/cocotb/rvv/load_store/load8_segment2_unit_m1.cc b/tests/cocotb/rvv/load_store/load8_segment2_unit_m1.cc
new file mode 100644
index 0000000..6d187d9
--- /dev/null
+++ b/tests/cocotb/rvv/load_store/load8_segment2_unit_m1.cc
@@ -0,0 +1,36 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+// Enough space for m2.
+uint8_t in_buf[64] __attribute__((section(".data")));
+uint8_t out_buf[64] __attribute__((section(".data")));
+
+__attribute__((used, retain)) void test_intrinsic(const uint8_t *x,
+ uint8_t *y) {
+ vuint8m1x2_t v = __riscv_vlseg2e8_v_u8m1x2(in_buf, 32);
+
+ vuint8m2_t vv = __riscv_vcreate_v_u8m1_u8m2(
+ __riscv_vget_v_u8m1x2_u8m1(v, 0),
+ __riscv_vget_v_u8m1x2_u8m1(v, 1));
+
+ __riscv_vse8_v_u8m2(y, vv, /*vl=*/32);
+}
+
+int main(int argc, char **argv) {
+ test_intrinsic(in_buf, out_buf);
+ return 0;
+}
diff --git a/tests/cocotb/rvv/load_store/load8_segment2_unit_m2.cc b/tests/cocotb/rvv/load_store/load8_segment2_unit_m2.cc
new file mode 100644
index 0000000..97da9ae
--- /dev/null
+++ b/tests/cocotb/rvv/load_store/load8_segment2_unit_m2.cc
@@ -0,0 +1,36 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+// Enough space for m4.
+uint8_t in_buf[128] __attribute__((section(".data")));
+uint8_t out_buf[128] __attribute__((section(".data")));
+
+__attribute__((used, retain)) void test_intrinsic(const uint8_t *x,
+ uint8_t *y) {
+ vuint8m2x2_t v = __riscv_vlseg2e8_v_u8m2x2(in_buf, 64);
+
+ vuint8m4_t vv = __riscv_vcreate_v_u8m2_u8m4(
+ __riscv_vget_v_u8m2x2_u8m2(v, 0),
+ __riscv_vget_v_u8m2x2_u8m2(v, 1));
+
+ __riscv_vse8_v_u8m4(y, vv, /*vl=*/64);
+}
+
+int main(int argc, char **argv) {
+ test_intrinsic(in_buf, out_buf);
+ return 0;
+}
diff --git a/tests/cocotb/rvv/load_store/load_store16_unit_m2.cc b/tests/cocotb/rvv/load_store/load_store16_unit_m2.cc
new file mode 100644
index 0000000..c73813c
--- /dev/null
+++ b/tests/cocotb/rvv/load_store/load_store16_unit_m2.cc
@@ -0,0 +1,31 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+// Enough space for m2.
+uint16_t in_buf[32] __attribute__((section(".data")));
+uint16_t out_buf[32] __attribute__((section(".data")));
+
+__attribute__((used, retain)) void test_intrinsic(const uint16_t *x,
+ uint16_t *y) {
+ vuint16m2_t v = __riscv_vle16_v_u16m2(x, /*vl=*/16);
+ __riscv_vse16_v_u16m2(y, v, /*vl=*/16);
+}
+
+int main(int argc, char **argv) {
+ test_intrinsic(in_buf, out_buf);
+ return 0;
+}
diff --git a/tests/cocotb/rvv/load_store/load_store32_unit_m2.cc b/tests/cocotb/rvv/load_store/load_store32_unit_m2.cc
new file mode 100644
index 0000000..e327d12
--- /dev/null
+++ b/tests/cocotb/rvv/load_store/load_store32_unit_m2.cc
@@ -0,0 +1,31 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+// Enough space for m2.
+uint32_t in_buf[16] __attribute__((section(".data")));
+uint32_t out_buf[16] __attribute__((section(".data")));
+
+__attribute__((used, retain)) void test_intrinsic(const uint32_t *x,
+ uint32_t *y) {
+ vuint32m2_t v = __riscv_vle32_v_u32m2(x, /*vl=*/8);
+ __riscv_vse32_v_u32m2(y, v, /*vl=*/8);
+}
+
+int main(int argc, char **argv) {
+ test_intrinsic(in_buf, out_buf);
+ return 0;
+}
diff --git a/tests/cocotb/rvv_load_store_test.py b/tests/cocotb/rvv_load_store_test.py
index 640d1f2..7fadb4a 100644
--- a/tests/cocotb/rvv_load_store_test.py
+++ b/tests/cocotb/rvv_load_store_test.py
@@ -240,6 +240,129 @@
pattern = list(range(0, 32)),
)
+
+@cocotb.test()
+async def load_store16_unit_m2(dut):
+ await vector_load_store(
+ dut=dut,
+ elf_name='load_store16_unit_m2.elf',
+ dtype=np.uint16,
+ in_size=32,
+ out_size=32,
+ pattern=list(range(0, 16)),
+ )
+
+
+@cocotb.test()
+async def load_store32_unit_m2(dut):
+ await vector_load_store(
+ dut=dut,
+ elf_name='load_store32_unit_m2.elf',
+ dtype=np.uint32,
+ in_size=16,
+ out_size=16,
+ pattern=list(range(0, 8)),
+ )
+
+@cocotb.test()
+async def load8_segment2_unit_m1(dut):
+ await vector_load_store(
+ dut=dut,
+ elf_name='load8_segment2_unit_m1.elf',
+ dtype=np.uint8,
+ in_size=64,
+ out_size=64,
+ pattern=(list(range(0, 32, 2)) + list(range(1, 32, 2))),
+ )
+
+
+@cocotb.test()
+async def load16_segment2_unit_m1(dut):
+ await vector_load_store(
+ dut=dut,
+ elf_name='load16_segment2_unit_m1.elf',
+ dtype=np.uint16,
+ in_size=32,
+ out_size=32,
+ pattern=(list(range(0, 16, 2)) + list(range(1, 16, 2))),
+ )
+
+
+@cocotb.test()
+async def load32_segment2_unit_m1(dut):
+ await vector_load_store(
+ dut=dut,
+ elf_name='load32_segment2_unit_m1.elf',
+ dtype=np.uint32,
+ in_size=16,
+ out_size=16,
+ pattern=(list(range(0, 8, 2)) + list(range(1, 8, 2))),
+ )
+
+
+@cocotb.test()
+async def load8_segment2_unit_m2(dut):
+ await vector_load_store(
+ dut=dut,
+ elf_name='load8_segment2_unit_m2.elf',
+ dtype=np.uint8,
+ in_size=128,
+ out_size=128,
+ pattern=(list(range(0, 32, 2)) + list(range(1, 32, 2)) +
+ list(range(32, 64, 2)) + list(range(33, 64, 2))),
+ )
+
+
+@cocotb.test()
+async def load16_segment2_unit_m2(dut):
+ await vector_load_store(
+ dut=dut,
+ elf_name='load16_segment2_unit_m2.elf',
+ dtype=np.uint16,
+ in_size=64,
+ out_size=64,
+ pattern=(list(range(0, 16, 2)) + list(range(1, 16, 2)) +
+ list(range(16, 32, 2)) + list(range(17, 32, 2))),
+ )
+
+
+@cocotb.test()
+async def load32_segment2_unit_m2(dut):
+ await vector_load_store(
+ dut=dut,
+ elf_name='load32_segment2_unit_m2.elf',
+ dtype=np.uint32,
+ in_size=32,
+ out_size=32,
+ pattern=(list(range(0, 8, 2)) + list(range(1, 8, 2)) +
+ list(range(8, 16, 2)) + list(range(9, 16, 2))),
+ )
+
+
+@cocotb.test()
+async def load8_segment2_stride6_m1(dut):
+ await vector_load_store(
+ dut=dut,
+ elf_name='load8_segment2_stride6_m1.elf',
+ dtype=np.uint8,
+ in_size=256,
+ out_size=64,
+ pattern=([i * 6 for i in range(16)] + [i * 6 + 1 for i in range(16)]),
+ )
+
+
+@cocotb.test()
+async def load16_segment2_stride6_m1(dut):
+ await vector_load_store(
+ dut=dut,
+ elf_name='load16_segment2_stride6_m1.elf',
+ dtype=np.uint16,
+ in_size=128,
+ out_size=32,
+ pattern=([i * 3 for i in range(8)] + [i * 3 + 1 for i in range(8)]),
+ )
+
+
@cocotb.test()
async def load8_indexed_m1(dut):
await vector_load_indexed(