Add vsseg*e16/32 tests
Change-Id: I7d38628c389743c1d0bf335e3cfdc7518ddeb20d
diff --git a/tests/cocotb/BUILD b/tests/cocotb/BUILD
index 7a09d29..ee7bec8 100644
--- a/tests/cocotb/BUILD
+++ b/tests/cocotb/BUILD
@@ -192,6 +192,8 @@
"load8_indexed_m1",
"store8_indexed_m1",
"store8_seg_unit",
+ "store16_seg_unit",
+ "store32_seg_unit",
"load_store8_test",
]
# END_TESTCASES_FOR_rvv_load_store_test
diff --git a/tests/cocotb/rvv/load_store/BUILD b/tests/cocotb/rvv/load_store/BUILD
index cf3dae6..c5a79d3 100644
--- a/tests/cocotb/rvv/load_store/BUILD
+++ b/tests/cocotb/rvv/load_store/BUILD
@@ -83,6 +83,12 @@
"store8_seg_unit": {
"srcs": ["store8_seg_unit.cc"],
},
+ "store16_seg_unit": {
+ "srcs": ["store16_seg_unit.cc"],
+ },
+ "store32_seg_unit": {
+ "srcs": ["store32_seg_unit.cc"],
+ },
},
)
@@ -110,5 +116,7 @@
":load32_stride8_m1_partial.elf",
":store8_indexed_m1.elf",
":store8_seg_unit",
+ ":store16_seg_unit",
+ ":store32_seg_unit",
],
)
\ No newline at end of file
diff --git a/tests/cocotb/rvv/load_store/store16_seg_unit.cc b/tests/cocotb/rvv/load_store/store16_seg_unit.cc
new file mode 100644
index 0000000..1e06608
--- /dev/null
+++ b/tests/cocotb/rvv/load_store/store16_seg_unit.cc
@@ -0,0 +1,203 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+namespace {
+// Double sized so we can check trailing regions are not read/written.
+constexpr size_t buf_size = 128;
+} // namespace
+
+size_t vl __attribute__((section(".data"))) = 8;
+// These instructions don't differentiate signed/unsigned so we only need to
+// test one. The types come from intrinsic level.
+uint16_t in_buf[buf_size] __attribute__((section(".data")));
+uint16_t out_buf[buf_size] __attribute__((section(".data")));
+
+extern "C" {
+// Segment 2
+__attribute__((used, retain)) void vsseg2e16_v_u16mf2x2() {
+ auto data =
+ __riscv_vcreate_v_u16mf2x2(__riscv_vle16_v_u16mf2(in_buf, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl, vl));
+ __riscv_vsseg2e16_v_u16mf2x2(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg2e16_v_u16m1x2() {
+ auto data = __riscv_vcreate_v_u16m1x2(__riscv_vle16_v_u16m1(in_buf, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl, vl));
+ __riscv_vsseg2e16_v_u16m1x2(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg2e16_v_u16m2x2() {
+ auto data = __riscv_vcreate_v_u16m2x2(__riscv_vle16_v_u16m2(in_buf, vl),
+ __riscv_vle16_v_u16m2(in_buf + vl, vl));
+ __riscv_vsseg2e16_v_u16m2x2(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg2e16_v_u16m4x2() {
+ auto data = __riscv_vcreate_v_u16m4x2(__riscv_vle16_v_u16m4(in_buf, vl),
+ __riscv_vle16_v_u16m4(in_buf + vl, vl));
+ __riscv_vsseg2e16_v_u16m4x2(out_buf, data, vl);
+}
+
+// Segment 3
+__attribute__((used, retain)) void vsseg3e16_v_u16mf2x3() {
+ auto data =
+ __riscv_vcreate_v_u16mf2x3(__riscv_vle16_v_u16mf2(in_buf, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 2, vl));
+ __riscv_vsseg3e16_v_u16mf2x3(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg3e16_v_u16m1x3() {
+ auto data = __riscv_vcreate_v_u16m1x3(
+ __riscv_vle16_v_u16m1(in_buf, vl), __riscv_vle16_v_u16m1(in_buf + vl, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 2, vl));
+ __riscv_vsseg3e16_v_u16m1x3(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg3e16_v_u16m2x3() {
+ auto data = __riscv_vcreate_v_u16m2x3(
+ __riscv_vle16_v_u16m2(in_buf, vl), __riscv_vle16_v_u16m2(in_buf + vl, vl),
+ __riscv_vle16_v_u16m2(in_buf + vl * 2, vl));
+ __riscv_vsseg3e16_v_u16m2x3(out_buf, data, vl);
+}
+
+// Segment 4
+__attribute__((used, retain)) void vsseg4e16_v_u16mf2x4() {
+ auto data =
+ __riscv_vcreate_v_u16mf2x4(__riscv_vle16_v_u16mf2(in_buf, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 2, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 3, vl));
+ __riscv_vsseg4e16_v_u16mf2x4(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg4e16_v_u16m1x4() {
+ auto data = __riscv_vcreate_v_u16m1x4(
+ __riscv_vle16_v_u16m1(in_buf, vl), __riscv_vle16_v_u16m1(in_buf + vl, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 2, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 3, vl));
+ __riscv_vsseg4e16_v_u16m1x4(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg4e16_v_u16m2x4() {
+ auto data = __riscv_vcreate_v_u16m2x4(
+ __riscv_vle16_v_u16m2(in_buf, vl), __riscv_vle16_v_u16m2(in_buf + vl, vl),
+ __riscv_vle16_v_u16m2(in_buf + vl * 2, vl),
+ __riscv_vle16_v_u16m2(in_buf + vl * 3, vl));
+ __riscv_vsseg4e16_v_u16m2x4(out_buf, data, vl);
+}
+
+// Segment 5
+__attribute__((used, retain)) void vsseg5e16_v_u16mf2x5() {
+ auto data =
+ __riscv_vcreate_v_u16mf2x5(__riscv_vle16_v_u16mf2(in_buf, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 2, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 3, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 4, vl));
+ __riscv_vsseg5e16_v_u16mf2x5(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg5e16_v_u16m1x5() {
+ auto data = __riscv_vcreate_v_u16m1x5(
+ __riscv_vle16_v_u16m1(in_buf, vl), __riscv_vle16_v_u16m1(in_buf + vl, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 2, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 3, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 4, vl));
+ __riscv_vsseg5e16_v_u16m1x5(out_buf, data, vl);
+}
+
+// Segment 6
+__attribute__((used, retain)) void vsseg6e16_v_u16mf2x6() {
+ auto data =
+ __riscv_vcreate_v_u16mf2x6(__riscv_vle16_v_u16mf2(in_buf, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 2, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 3, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 4, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 5, vl));
+ __riscv_vsseg6e16_v_u16mf2x6(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg6e16_v_u16m1x6() {
+ auto data = __riscv_vcreate_v_u16m1x6(
+ __riscv_vle16_v_u16m1(in_buf, vl), __riscv_vle16_v_u16m1(in_buf + vl, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 2, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 3, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 4, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 5, vl));
+ __riscv_vsseg6e16_v_u16m1x6(out_buf, data, vl);
+}
+
+// Segment 7
+__attribute__((used, retain)) void vsseg7e16_v_u16mf2x7() {
+ auto data =
+ __riscv_vcreate_v_u16mf2x7(__riscv_vle16_v_u16mf2(in_buf, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 2, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 3, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 4, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 5, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 6, vl));
+ __riscv_vsseg7e16_v_u16mf2x7(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg7e16_v_u16m1x7() {
+ auto data = __riscv_vcreate_v_u16m1x7(
+ __riscv_vle16_v_u16m1(in_buf, vl), __riscv_vle16_v_u16m1(in_buf + vl, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 2, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 3, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 4, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 5, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 6, vl));
+ __riscv_vsseg7e16_v_u16m1x7(out_buf, data, vl);
+}
+
+// Segment 8
+__attribute__((used, retain)) void vsseg8e16_v_u16mf2x8() {
+ auto data =
+ __riscv_vcreate_v_u16mf2x8(__riscv_vle16_v_u16mf2(in_buf, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 2, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 3, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 4, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 5, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 6, vl),
+ __riscv_vle16_v_u16mf2(in_buf + vl * 7, vl));
+ __riscv_vsseg8e16_v_u16mf2x8(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg8e16_v_u16m1x8() {
+ auto data = __riscv_vcreate_v_u16m1x8(
+ __riscv_vle16_v_u16m1(in_buf, vl), __riscv_vle16_v_u16m1(in_buf + vl, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 2, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 3, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 4, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 5, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 6, vl),
+ __riscv_vle16_v_u16m1(in_buf + vl * 7, vl));
+ __riscv_vsseg8e16_v_u16m1x8(out_buf, data, vl);
+}
+}
+
+void (*impl)() __attribute__((section(".data"))) = &vsseg2e16_v_u16m1x2;
+
+int main(int argc, char** argv) {
+ impl();
+ return 0;
+}
diff --git a/tests/cocotb/rvv/load_store/store32_seg_unit.cc b/tests/cocotb/rvv/load_store/store32_seg_unit.cc
new file mode 100644
index 0000000..c30da64
--- /dev/null
+++ b/tests/cocotb/rvv/load_store/store32_seg_unit.cc
@@ -0,0 +1,133 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+namespace {
+// Double sized so we can check trailing regions are not read/written.
+constexpr size_t buf_size = 64;
+} // namespace
+
+size_t vl __attribute__((section(".data"))) = 4;
+// These instructions don't differentiate signed/unsigned so we only need to
+// test one. The types come from intrinsic level.
+uint32_t in_buf[buf_size] __attribute__((section(".data")));
+uint32_t out_buf[buf_size] __attribute__((section(".data")));
+
+extern "C" {
+// Segment 2
+__attribute__((used, retain)) void vsseg2e32_v_u32m1x2() {
+ auto data = __riscv_vcreate_v_u32m1x2(__riscv_vle32_v_u32m1(in_buf, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl, vl));
+ __riscv_vsseg2e32_v_u32m1x2(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg2e32_v_u32m2x2() {
+ auto data = __riscv_vcreate_v_u32m2x2(__riscv_vle32_v_u32m2(in_buf, vl),
+ __riscv_vle32_v_u32m2(in_buf + vl, vl));
+ __riscv_vsseg2e32_v_u32m2x2(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg2e32_v_u32m4x2() {
+ auto data = __riscv_vcreate_v_u32m4x2(__riscv_vle32_v_u32m4(in_buf, vl),
+ __riscv_vle32_v_u32m4(in_buf + vl, vl));
+ __riscv_vsseg2e32_v_u32m4x2(out_buf, data, vl);
+}
+
+// Segment 3
+__attribute__((used, retain)) void vsseg3e32_v_u32m1x3() {
+ auto data = __riscv_vcreate_v_u32m1x3(
+ __riscv_vle32_v_u32m1(in_buf, vl), __riscv_vle32_v_u32m1(in_buf + vl, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 2, vl));
+ __riscv_vsseg3e32_v_u32m1x3(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg3e32_v_u32m2x3() {
+ auto data = __riscv_vcreate_v_u32m2x3(
+ __riscv_vle32_v_u32m2(in_buf, vl), __riscv_vle32_v_u32m2(in_buf + vl, vl),
+ __riscv_vle32_v_u32m2(in_buf + vl * 2, vl));
+ __riscv_vsseg3e32_v_u32m2x3(out_buf, data, vl);
+}
+
+// Segment 4
+__attribute__((used, retain)) void vsseg4e32_v_u32m1x4() {
+ auto data = __riscv_vcreate_v_u32m1x4(
+ __riscv_vle32_v_u32m1(in_buf, vl), __riscv_vle32_v_u32m1(in_buf + vl, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 2, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 3, vl));
+ __riscv_vsseg4e32_v_u32m1x4(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg4e32_v_u32m2x4() {
+ auto data = __riscv_vcreate_v_u32m2x4(
+ __riscv_vle32_v_u32m2(in_buf, vl), __riscv_vle32_v_u32m2(in_buf + vl, vl),
+ __riscv_vle32_v_u32m2(in_buf + vl * 2, vl),
+ __riscv_vle32_v_u32m2(in_buf + vl * 3, vl));
+ __riscv_vsseg4e32_v_u32m2x4(out_buf, data, vl);
+}
+
+// Segment 5
+__attribute__((used, retain)) void vsseg5e32_v_u32m1x5() {
+ auto data = __riscv_vcreate_v_u32m1x5(
+ __riscv_vle32_v_u32m1(in_buf, vl), __riscv_vle32_v_u32m1(in_buf + vl, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 2, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 3, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 4, vl));
+ __riscv_vsseg5e32_v_u32m1x5(out_buf, data, vl);
+}
+
+// Segment 6
+__attribute__((used, retain)) void vsseg6e32_v_u32m1x6() {
+ auto data = __riscv_vcreate_v_u32m1x6(
+ __riscv_vle32_v_u32m1(in_buf, vl), __riscv_vle32_v_u32m1(in_buf + vl, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 2, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 3, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 4, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 5, vl));
+ __riscv_vsseg6e32_v_u32m1x6(out_buf, data, vl);
+}
+
+// Segment 7
+__attribute__((used, retain)) void vsseg7e32_v_u32m1x7() {
+ auto data = __riscv_vcreate_v_u32m1x7(
+ __riscv_vle32_v_u32m1(in_buf, vl), __riscv_vle32_v_u32m1(in_buf + vl, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 2, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 3, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 4, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 5, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 6, vl));
+ __riscv_vsseg7e32_v_u32m1x7(out_buf, data, vl);
+}
+
+// Segment 8
+__attribute__((used, retain)) void vsseg8e32_v_u32m1x8() {
+ auto data = __riscv_vcreate_v_u32m1x8(
+ __riscv_vle32_v_u32m1(in_buf, vl), __riscv_vle32_v_u32m1(in_buf + vl, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 2, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 3, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 4, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 5, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 6, vl),
+ __riscv_vle32_v_u32m1(in_buf + vl * 7, vl));
+ __riscv_vsseg8e32_v_u32m1x8(out_buf, data, vl);
+}
+}
+
+void (*impl)() __attribute__((section(".data"))) = &vsseg2e32_v_u32m1x2;
+
+int main(int argc, char** argv) {
+ impl();
+ return 0;
+}
diff --git a/tests/cocotb/rvv_load_store_test.py b/tests/cocotb/rvv_load_store_test.py
index b22f8b9..2f8d9d3 100644
--- a/tests/cocotb/rvv_load_store_test.py
+++ b/tests/cocotb/rvv_load_store_test.py
@@ -389,8 +389,8 @@
'in_size': vl * n_segs * 2,
'out_size': vl * n_segs * 2,
'pattern':[
- i * n_segs + j
- for j in range(n_segs) for i in range(vl)]
+ elem * n_segs + seg
+ for seg in range(n_segs) for elem in range(vl)]
}
await vector_load_store_v2(
@@ -469,8 +469,8 @@
'in_size': vl * n_segs * 2,
'out_size': vl * n_segs * 2,
'pattern':[
- i * n_segs + j
- for j in range(n_segs) for i in range(vl)]
+ elem * n_segs + seg
+ for seg in range(n_segs) for elem in range(vl)]
}
await vector_load_store_v2(
@@ -535,8 +535,8 @@
'in_size': vl * n_segs * 2,
'out_size': vl * n_segs * 2,
'pattern':[
- i * n_segs + j
- for j in range(n_segs) for i in range(vl)]
+ elem * n_segs + seg
+ for seg in range(n_segs) for elem in range(vl)]
}
await vector_load_store_v2(
@@ -627,7 +627,9 @@
'vl': vl,
'in_size': vl * n_segs * 2,
'out_size': vl * n_segs * 2,
- 'pattern': [i * vl + j for j in range(vl) for i in range(n_segs)]
+ 'pattern': [
+ seg * vl + elem
+ for elem in range(vl) for seg in range(n_segs)]
}
await vector_load_store_v2(
@@ -697,6 +699,125 @@
@cocotb.test()
+async def store16_seg_unit(dut):
+ """Test vsseg*e16 usage accessible from intrinsics."""
+ def make_test_case(impl: str, vl: int, n_segs: int):
+ return {
+ 'impl': impl,
+ 'vl': vl,
+ 'in_size': vl * n_segs * 2,
+ 'out_size': vl * n_segs * 2,
+ 'pattern': [
+ seg * vl + elem
+ for elem in range(vl) for seg in range(n_segs)]
+ }
+
+ await vector_load_store_v2(
+ dut = dut,
+ elf_name = 'store16_seg_unit.elf',
+ cases = [
+ # Seg 2
+ make_test_case('vsseg2e16_v_u16mf2x2', vl=4, n_segs=2),
+ make_test_case('vsseg2e16_v_u16mf2x2', vl=3, n_segs=2),
+ make_test_case('vsseg2e16_v_u16m1x2', vl=8, n_segs=2),
+ make_test_case('vsseg2e16_v_u16m1x2', vl=7, n_segs=2),
+ make_test_case('vsseg2e16_v_u16m2x2', vl=16, n_segs=2),
+ make_test_case('vsseg2e16_v_u16m2x2', vl=15, n_segs=2),
+ make_test_case('vsseg2e16_v_u16m4x2', vl=32, n_segs=2),
+ make_test_case('vsseg2e16_v_u16m4x2', vl=31, n_segs=2),
+ # Seg 3
+ make_test_case('vsseg3e16_v_u16mf2x3', vl=4, n_segs=3),
+ make_test_case('vsseg3e16_v_u16mf2x3', vl=3, n_segs=3),
+ make_test_case('vsseg3e16_v_u16m1x3', vl=8, n_segs=3),
+ make_test_case('vsseg3e16_v_u16m1x3', vl=7, n_segs=3),
+ make_test_case('vsseg3e16_v_u16m2x3', vl=16, n_segs=3),
+ make_test_case('vsseg3e16_v_u16m2x3', vl=15, n_segs=3),
+ # Seg 4
+ make_test_case('vsseg4e16_v_u16mf2x4', vl=4, n_segs=4),
+ make_test_case('vsseg4e16_v_u16mf2x4', vl=3, n_segs=4),
+ make_test_case('vsseg4e16_v_u16m1x4', vl=8, n_segs=4),
+ make_test_case('vsseg4e16_v_u16m1x4', vl=7, n_segs=4),
+ make_test_case('vsseg4e16_v_u16m2x4', vl=16, n_segs=4),
+ make_test_case('vsseg4e16_v_u16m2x4', vl=15, n_segs=4),
+ # Seg 5
+ make_test_case('vsseg5e16_v_u16mf2x5', vl=4, n_segs=5),
+ make_test_case('vsseg5e16_v_u16mf2x5', vl=3, n_segs=5),
+ make_test_case('vsseg5e16_v_u16m1x5', vl=8, n_segs=5),
+ make_test_case('vsseg5e16_v_u16m1x5', vl=7, n_segs=5),
+ # Seg 6
+ make_test_case('vsseg6e16_v_u16mf2x6', vl=4, n_segs=6),
+ make_test_case('vsseg6e16_v_u16mf2x6', vl=3, n_segs=6),
+ make_test_case('vsseg6e16_v_u16m1x6', vl=8, n_segs=6),
+ make_test_case('vsseg6e16_v_u16m1x6', vl=7, n_segs=6),
+ # Seg 7
+ make_test_case('vsseg7e16_v_u16mf2x7', vl=4, n_segs=7),
+ make_test_case('vsseg7e16_v_u16mf2x7', vl=3, n_segs=7),
+ make_test_case('vsseg7e16_v_u16m1x7', vl=8, n_segs=7),
+ make_test_case('vsseg7e16_v_u16m1x7', vl=7, n_segs=7),
+ # Seg 8
+ make_test_case('vsseg8e16_v_u16mf2x8', vl=4, n_segs=8),
+ make_test_case('vsseg8e16_v_u16mf2x8', vl=3, n_segs=8),
+ make_test_case('vsseg8e16_v_u16m1x8', vl=8, n_segs=8),
+ make_test_case('vsseg8e16_v_u16m1x8', vl=7, n_segs=8),
+ ],
+ dtype = np.uint16,
+ )
+
+
+
+@cocotb.test()
+async def store32_seg_unit(dut):
+ """Test vsseg*e32 usage accessible from intrinsics."""
+ def make_test_case(impl: str, vl: int, n_segs: int):
+ return {
+ 'impl': impl,
+ 'vl': vl,
+ 'in_size': vl * n_segs * 2,
+ 'out_size': vl * n_segs * 2,
+ 'pattern': [
+ seg * vl + elem
+ for elem in range(vl) for seg in range(n_segs)]
+ }
+
+ await vector_load_store_v2(
+ dut = dut,
+ elf_name = 'store32_seg_unit.elf',
+ cases = [
+ # Seg 2
+ make_test_case('vsseg2e32_v_u32m1x2', vl=4, n_segs=2),
+ make_test_case('vsseg2e32_v_u32m1x2', vl=3, n_segs=2),
+ make_test_case('vsseg2e32_v_u32m2x2', vl=8, n_segs=2),
+ make_test_case('vsseg2e32_v_u32m2x2', vl=7, n_segs=2),
+ make_test_case('vsseg2e32_v_u32m4x2', vl=16, n_segs=2),
+ make_test_case('vsseg2e32_v_u32m4x2', vl=15, n_segs=2),
+ # Seg 3
+ make_test_case('vsseg3e32_v_u32m1x3', vl=4, n_segs=3),
+ make_test_case('vsseg3e32_v_u32m1x3', vl=3, n_segs=3),
+ make_test_case('vsseg3e32_v_u32m2x3', vl=8, n_segs=3),
+ make_test_case('vsseg3e32_v_u32m2x3', vl=7, n_segs=3),
+ # Seg 4
+ make_test_case('vsseg4e32_v_u32m1x4', vl=4, n_segs=4),
+ make_test_case('vsseg4e32_v_u32m1x4', vl=3, n_segs=4),
+ make_test_case('vsseg4e32_v_u32m2x4', vl=8, n_segs=4),
+ make_test_case('vsseg4e32_v_u32m2x4', vl=7, n_segs=4),
+ # Seg 5
+ make_test_case('vsseg5e32_v_u32m1x5', vl=4, n_segs=5),
+ make_test_case('vsseg5e32_v_u32m1x5', vl=3, n_segs=5),
+ # Seg 6
+ make_test_case('vsseg6e32_v_u32m1x6', vl=4, n_segs=6),
+ make_test_case('vsseg6e32_v_u32m1x6', vl=3, n_segs=6),
+ # Seg 7
+ make_test_case('vsseg7e32_v_u32m1x7', vl=4, n_segs=7),
+ make_test_case('vsseg7e32_v_u32m1x7', vl=3, n_segs=7),
+ # Seg 8
+ make_test_case('vsseg8e32_v_u32m1x8', vl=4, n_segs=8),
+ make_test_case('vsseg8e32_v_u32m1x8', vl=3, n_segs=8),
+ ],
+ dtype = np.uint32,
+ )
+
+
+@cocotb.test()
async def load_store8_test(dut):
"""Testbench to test RVV load."""
fixture = await Fixture.Create(dut)