Add vsseg*e16/32 tests Change-Id: I7d38628c389743c1d0bf335e3cfdc7518ddeb20d
diff --git a/tests/cocotb/BUILD b/tests/cocotb/BUILD index 7a09d29..ee7bec8 100644 --- a/tests/cocotb/BUILD +++ b/tests/cocotb/BUILD
@@ -192,6 +192,8 @@ "load8_indexed_m1", "store8_indexed_m1", "store8_seg_unit", + "store16_seg_unit", + "store32_seg_unit", "load_store8_test", ] # END_TESTCASES_FOR_rvv_load_store_test
diff --git a/tests/cocotb/rvv/load_store/BUILD b/tests/cocotb/rvv/load_store/BUILD index cf3dae6..c5a79d3 100644 --- a/tests/cocotb/rvv/load_store/BUILD +++ b/tests/cocotb/rvv/load_store/BUILD
@@ -83,6 +83,12 @@ "store8_seg_unit": { "srcs": ["store8_seg_unit.cc"], }, + "store16_seg_unit": { + "srcs": ["store16_seg_unit.cc"], + }, + "store32_seg_unit": { + "srcs": ["store32_seg_unit.cc"], + }, }, ) @@ -110,5 +116,7 @@ ":load32_stride8_m1_partial.elf", ":store8_indexed_m1.elf", ":store8_seg_unit", + ":store16_seg_unit", + ":store32_seg_unit", ], ) \ No newline at end of file
diff --git a/tests/cocotb/rvv/load_store/store16_seg_unit.cc b/tests/cocotb/rvv/load_store/store16_seg_unit.cc new file mode 100644 index 0000000..1e06608 --- /dev/null +++ b/tests/cocotb/rvv/load_store/store16_seg_unit.cc
@@ -0,0 +1,203 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <riscv_vector.h> +#include <stdint.h> + +namespace { +// Double sized so we can check trailing regions are not read/written. +constexpr size_t buf_size = 128; +} // namespace + +size_t vl __attribute__((section(".data"))) = 8; +// These instructions don't differentiate signed/unsigned so we only need to +// test one. The types come from intrinsic level. +uint16_t in_buf[buf_size] __attribute__((section(".data"))); +uint16_t out_buf[buf_size] __attribute__((section(".data"))); + +extern "C" { +// Segment 2 +__attribute__((used, retain)) void vsseg2e16_v_u16mf2x2() { + auto data = + __riscv_vcreate_v_u16mf2x2(__riscv_vle16_v_u16mf2(in_buf, vl), + __riscv_vle16_v_u16mf2(in_buf + vl, vl)); + __riscv_vsseg2e16_v_u16mf2x2(out_buf, data, vl); +} + +__attribute__((used, retain)) void vsseg2e16_v_u16m1x2() { + auto data = __riscv_vcreate_v_u16m1x2(__riscv_vle16_v_u16m1(in_buf, vl), + __riscv_vle16_v_u16m1(in_buf + vl, vl)); + __riscv_vsseg2e16_v_u16m1x2(out_buf, data, vl); +} + +__attribute__((used, retain)) void vsseg2e16_v_u16m2x2() { + auto data = __riscv_vcreate_v_u16m2x2(__riscv_vle16_v_u16m2(in_buf, vl), + __riscv_vle16_v_u16m2(in_buf + vl, vl)); + __riscv_vsseg2e16_v_u16m2x2(out_buf, data, vl); +} + +__attribute__((used, retain)) void vsseg2e16_v_u16m4x2() { + auto data = __riscv_vcreate_v_u16m4x2(__riscv_vle16_v_u16m4(in_buf, vl), + __riscv_vle16_v_u16m4(in_buf + vl, vl)); + __riscv_vsseg2e16_v_u16m4x2(out_buf, data, vl); +} + +// Segment 3 +__attribute__((used, retain)) void vsseg3e16_v_u16mf2x3() { + auto data = + __riscv_vcreate_v_u16mf2x3(__riscv_vle16_v_u16mf2(in_buf, vl), + __riscv_vle16_v_u16mf2(in_buf + vl, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 2, vl)); + __riscv_vsseg3e16_v_u16mf2x3(out_buf, data, vl); +} + +__attribute__((used, retain)) void vsseg3e16_v_u16m1x3() { + auto data = __riscv_vcreate_v_u16m1x3( + __riscv_vle16_v_u16m1(in_buf, vl), __riscv_vle16_v_u16m1(in_buf + vl, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 2, vl)); + __riscv_vsseg3e16_v_u16m1x3(out_buf, data, vl); +} + +__attribute__((used, retain)) void vsseg3e16_v_u16m2x3() { + auto data = __riscv_vcreate_v_u16m2x3( + __riscv_vle16_v_u16m2(in_buf, vl), __riscv_vle16_v_u16m2(in_buf + vl, vl), + __riscv_vle16_v_u16m2(in_buf + vl * 2, vl)); + __riscv_vsseg3e16_v_u16m2x3(out_buf, data, vl); +} + +// Segment 4 +__attribute__((used, retain)) void vsseg4e16_v_u16mf2x4() { + auto data = + __riscv_vcreate_v_u16mf2x4(__riscv_vle16_v_u16mf2(in_buf, vl), + __riscv_vle16_v_u16mf2(in_buf + vl, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 2, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 3, vl)); + __riscv_vsseg4e16_v_u16mf2x4(out_buf, data, vl); +} + +__attribute__((used, retain)) void vsseg4e16_v_u16m1x4() { + auto data = __riscv_vcreate_v_u16m1x4( + __riscv_vle16_v_u16m1(in_buf, vl), __riscv_vle16_v_u16m1(in_buf + vl, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 2, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 3, vl)); + __riscv_vsseg4e16_v_u16m1x4(out_buf, data, vl); +} + +__attribute__((used, retain)) void vsseg4e16_v_u16m2x4() { + auto data = __riscv_vcreate_v_u16m2x4( + __riscv_vle16_v_u16m2(in_buf, vl), __riscv_vle16_v_u16m2(in_buf + vl, vl), + __riscv_vle16_v_u16m2(in_buf + vl * 2, vl), + __riscv_vle16_v_u16m2(in_buf + vl * 3, vl)); + __riscv_vsseg4e16_v_u16m2x4(out_buf, data, vl); +} + +// Segment 5 +__attribute__((used, retain)) void vsseg5e16_v_u16mf2x5() { + auto data = + __riscv_vcreate_v_u16mf2x5(__riscv_vle16_v_u16mf2(in_buf, vl), + __riscv_vle16_v_u16mf2(in_buf + vl, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 2, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 3, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 4, vl)); + __riscv_vsseg5e16_v_u16mf2x5(out_buf, data, vl); +} + +__attribute__((used, retain)) void vsseg5e16_v_u16m1x5() { + auto data = __riscv_vcreate_v_u16m1x5( + __riscv_vle16_v_u16m1(in_buf, vl), __riscv_vle16_v_u16m1(in_buf + vl, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 2, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 3, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 4, vl)); + __riscv_vsseg5e16_v_u16m1x5(out_buf, data, vl); +} + +// Segment 6 +__attribute__((used, retain)) void vsseg6e16_v_u16mf2x6() { + auto data = + __riscv_vcreate_v_u16mf2x6(__riscv_vle16_v_u16mf2(in_buf, vl), + __riscv_vle16_v_u16mf2(in_buf + vl, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 2, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 3, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 4, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 5, vl)); + __riscv_vsseg6e16_v_u16mf2x6(out_buf, data, vl); +} + +__attribute__((used, retain)) void vsseg6e16_v_u16m1x6() { + auto data = __riscv_vcreate_v_u16m1x6( + __riscv_vle16_v_u16m1(in_buf, vl), __riscv_vle16_v_u16m1(in_buf + vl, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 2, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 3, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 4, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 5, vl)); + __riscv_vsseg6e16_v_u16m1x6(out_buf, data, vl); +} + +// Segment 7 +__attribute__((used, retain)) void vsseg7e16_v_u16mf2x7() { + auto data = + __riscv_vcreate_v_u16mf2x7(__riscv_vle16_v_u16mf2(in_buf, vl), + __riscv_vle16_v_u16mf2(in_buf + vl, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 2, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 3, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 4, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 5, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 6, vl)); + __riscv_vsseg7e16_v_u16mf2x7(out_buf, data, vl); +} + +__attribute__((used, retain)) void vsseg7e16_v_u16m1x7() { + auto data = __riscv_vcreate_v_u16m1x7( + __riscv_vle16_v_u16m1(in_buf, vl), __riscv_vle16_v_u16m1(in_buf + vl, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 2, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 3, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 4, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 5, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 6, vl)); + __riscv_vsseg7e16_v_u16m1x7(out_buf, data, vl); +} + +// Segment 8 +__attribute__((used, retain)) void vsseg8e16_v_u16mf2x8() { + auto data = + __riscv_vcreate_v_u16mf2x8(__riscv_vle16_v_u16mf2(in_buf, vl), + __riscv_vle16_v_u16mf2(in_buf + vl, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 2, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 3, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 4, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 5, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 6, vl), + __riscv_vle16_v_u16mf2(in_buf + vl * 7, vl)); + __riscv_vsseg8e16_v_u16mf2x8(out_buf, data, vl); +} + +__attribute__((used, retain)) void vsseg8e16_v_u16m1x8() { + auto data = __riscv_vcreate_v_u16m1x8( + __riscv_vle16_v_u16m1(in_buf, vl), __riscv_vle16_v_u16m1(in_buf + vl, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 2, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 3, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 4, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 5, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 6, vl), + __riscv_vle16_v_u16m1(in_buf + vl * 7, vl)); + __riscv_vsseg8e16_v_u16m1x8(out_buf, data, vl); +} +} + +void (*impl)() __attribute__((section(".data"))) = &vsseg2e16_v_u16m1x2; + +int main(int argc, char** argv) { + impl(); + return 0; +}
diff --git a/tests/cocotb/rvv/load_store/store32_seg_unit.cc b/tests/cocotb/rvv/load_store/store32_seg_unit.cc new file mode 100644 index 0000000..c30da64 --- /dev/null +++ b/tests/cocotb/rvv/load_store/store32_seg_unit.cc
@@ -0,0 +1,133 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <riscv_vector.h> +#include <stdint.h> + +namespace { +// Double sized so we can check trailing regions are not read/written. +constexpr size_t buf_size = 64; +} // namespace + +size_t vl __attribute__((section(".data"))) = 4; +// These instructions don't differentiate signed/unsigned so we only need to +// test one. The types come from intrinsic level. +uint32_t in_buf[buf_size] __attribute__((section(".data"))); +uint32_t out_buf[buf_size] __attribute__((section(".data"))); + +extern "C" { +// Segment 2 +__attribute__((used, retain)) void vsseg2e32_v_u32m1x2() { + auto data = __riscv_vcreate_v_u32m1x2(__riscv_vle32_v_u32m1(in_buf, vl), + __riscv_vle32_v_u32m1(in_buf + vl, vl)); + __riscv_vsseg2e32_v_u32m1x2(out_buf, data, vl); +} + +__attribute__((used, retain)) void vsseg2e32_v_u32m2x2() { + auto data = __riscv_vcreate_v_u32m2x2(__riscv_vle32_v_u32m2(in_buf, vl), + __riscv_vle32_v_u32m2(in_buf + vl, vl)); + __riscv_vsseg2e32_v_u32m2x2(out_buf, data, vl); +} + +__attribute__((used, retain)) void vsseg2e32_v_u32m4x2() { + auto data = __riscv_vcreate_v_u32m4x2(__riscv_vle32_v_u32m4(in_buf, vl), + __riscv_vle32_v_u32m4(in_buf + vl, vl)); + __riscv_vsseg2e32_v_u32m4x2(out_buf, data, vl); +} + +// Segment 3 +__attribute__((used, retain)) void vsseg3e32_v_u32m1x3() { + auto data = __riscv_vcreate_v_u32m1x3( + __riscv_vle32_v_u32m1(in_buf, vl), __riscv_vle32_v_u32m1(in_buf + vl, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 2, vl)); + __riscv_vsseg3e32_v_u32m1x3(out_buf, data, vl); +} + +__attribute__((used, retain)) void vsseg3e32_v_u32m2x3() { + auto data = __riscv_vcreate_v_u32m2x3( + __riscv_vle32_v_u32m2(in_buf, vl), __riscv_vle32_v_u32m2(in_buf + vl, vl), + __riscv_vle32_v_u32m2(in_buf + vl * 2, vl)); + __riscv_vsseg3e32_v_u32m2x3(out_buf, data, vl); +} + +// Segment 4 +__attribute__((used, retain)) void vsseg4e32_v_u32m1x4() { + auto data = __riscv_vcreate_v_u32m1x4( + __riscv_vle32_v_u32m1(in_buf, vl), __riscv_vle32_v_u32m1(in_buf + vl, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 2, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 3, vl)); + __riscv_vsseg4e32_v_u32m1x4(out_buf, data, vl); +} + +__attribute__((used, retain)) void vsseg4e32_v_u32m2x4() { + auto data = __riscv_vcreate_v_u32m2x4( + __riscv_vle32_v_u32m2(in_buf, vl), __riscv_vle32_v_u32m2(in_buf + vl, vl), + __riscv_vle32_v_u32m2(in_buf + vl * 2, vl), + __riscv_vle32_v_u32m2(in_buf + vl * 3, vl)); + __riscv_vsseg4e32_v_u32m2x4(out_buf, data, vl); +} + +// Segment 5 +__attribute__((used, retain)) void vsseg5e32_v_u32m1x5() { + auto data = __riscv_vcreate_v_u32m1x5( + __riscv_vle32_v_u32m1(in_buf, vl), __riscv_vle32_v_u32m1(in_buf + vl, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 2, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 3, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 4, vl)); + __riscv_vsseg5e32_v_u32m1x5(out_buf, data, vl); +} + +// Segment 6 +__attribute__((used, retain)) void vsseg6e32_v_u32m1x6() { + auto data = __riscv_vcreate_v_u32m1x6( + __riscv_vle32_v_u32m1(in_buf, vl), __riscv_vle32_v_u32m1(in_buf + vl, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 2, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 3, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 4, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 5, vl)); + __riscv_vsseg6e32_v_u32m1x6(out_buf, data, vl); +} + +// Segment 7 +__attribute__((used, retain)) void vsseg7e32_v_u32m1x7() { + auto data = __riscv_vcreate_v_u32m1x7( + __riscv_vle32_v_u32m1(in_buf, vl), __riscv_vle32_v_u32m1(in_buf + vl, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 2, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 3, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 4, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 5, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 6, vl)); + __riscv_vsseg7e32_v_u32m1x7(out_buf, data, vl); +} + +// Segment 8 +__attribute__((used, retain)) void vsseg8e32_v_u32m1x8() { + auto data = __riscv_vcreate_v_u32m1x8( + __riscv_vle32_v_u32m1(in_buf, vl), __riscv_vle32_v_u32m1(in_buf + vl, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 2, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 3, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 4, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 5, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 6, vl), + __riscv_vle32_v_u32m1(in_buf + vl * 7, vl)); + __riscv_vsseg8e32_v_u32m1x8(out_buf, data, vl); +} +} + +void (*impl)() __attribute__((section(".data"))) = &vsseg2e32_v_u32m1x2; + +int main(int argc, char** argv) { + impl(); + return 0; +}
diff --git a/tests/cocotb/rvv_load_store_test.py b/tests/cocotb/rvv_load_store_test.py index b22f8b9..2f8d9d3 100644 --- a/tests/cocotb/rvv_load_store_test.py +++ b/tests/cocotb/rvv_load_store_test.py
@@ -389,8 +389,8 @@ 'in_size': vl * n_segs * 2, 'out_size': vl * n_segs * 2, 'pattern':[ - i * n_segs + j - for j in range(n_segs) for i in range(vl)] + elem * n_segs + seg + for seg in range(n_segs) for elem in range(vl)] } await vector_load_store_v2( @@ -469,8 +469,8 @@ 'in_size': vl * n_segs * 2, 'out_size': vl * n_segs * 2, 'pattern':[ - i * n_segs + j - for j in range(n_segs) for i in range(vl)] + elem * n_segs + seg + for seg in range(n_segs) for elem in range(vl)] } await vector_load_store_v2( @@ -535,8 +535,8 @@ 'in_size': vl * n_segs * 2, 'out_size': vl * n_segs * 2, 'pattern':[ - i * n_segs + j - for j in range(n_segs) for i in range(vl)] + elem * n_segs + seg + for seg in range(n_segs) for elem in range(vl)] } await vector_load_store_v2( @@ -627,7 +627,9 @@ 'vl': vl, 'in_size': vl * n_segs * 2, 'out_size': vl * n_segs * 2, - 'pattern': [i * vl + j for j in range(vl) for i in range(n_segs)] + 'pattern': [ + seg * vl + elem + for elem in range(vl) for seg in range(n_segs)] } await vector_load_store_v2( @@ -697,6 +699,125 @@ @cocotb.test() +async def store16_seg_unit(dut): + """Test vsseg*e16 usage accessible from intrinsics.""" + def make_test_case(impl: str, vl: int, n_segs: int): + return { + 'impl': impl, + 'vl': vl, + 'in_size': vl * n_segs * 2, + 'out_size': vl * n_segs * 2, + 'pattern': [ + seg * vl + elem + for elem in range(vl) for seg in range(n_segs)] + } + + await vector_load_store_v2( + dut = dut, + elf_name = 'store16_seg_unit.elf', + cases = [ + # Seg 2 + make_test_case('vsseg2e16_v_u16mf2x2', vl=4, n_segs=2), + make_test_case('vsseg2e16_v_u16mf2x2', vl=3, n_segs=2), + make_test_case('vsseg2e16_v_u16m1x2', vl=8, n_segs=2), + make_test_case('vsseg2e16_v_u16m1x2', vl=7, n_segs=2), + make_test_case('vsseg2e16_v_u16m2x2', vl=16, n_segs=2), + make_test_case('vsseg2e16_v_u16m2x2', vl=15, n_segs=2), + make_test_case('vsseg2e16_v_u16m4x2', vl=32, n_segs=2), + make_test_case('vsseg2e16_v_u16m4x2', vl=31, n_segs=2), + # Seg 3 + make_test_case('vsseg3e16_v_u16mf2x3', vl=4, n_segs=3), + make_test_case('vsseg3e16_v_u16mf2x3', vl=3, n_segs=3), + make_test_case('vsseg3e16_v_u16m1x3', vl=8, n_segs=3), + make_test_case('vsseg3e16_v_u16m1x3', vl=7, n_segs=3), + make_test_case('vsseg3e16_v_u16m2x3', vl=16, n_segs=3), + make_test_case('vsseg3e16_v_u16m2x3', vl=15, n_segs=3), + # Seg 4 + make_test_case('vsseg4e16_v_u16mf2x4', vl=4, n_segs=4), + make_test_case('vsseg4e16_v_u16mf2x4', vl=3, n_segs=4), + make_test_case('vsseg4e16_v_u16m1x4', vl=8, n_segs=4), + make_test_case('vsseg4e16_v_u16m1x4', vl=7, n_segs=4), + make_test_case('vsseg4e16_v_u16m2x4', vl=16, n_segs=4), + make_test_case('vsseg4e16_v_u16m2x4', vl=15, n_segs=4), + # Seg 5 + make_test_case('vsseg5e16_v_u16mf2x5', vl=4, n_segs=5), + make_test_case('vsseg5e16_v_u16mf2x5', vl=3, n_segs=5), + make_test_case('vsseg5e16_v_u16m1x5', vl=8, n_segs=5), + make_test_case('vsseg5e16_v_u16m1x5', vl=7, n_segs=5), + # Seg 6 + make_test_case('vsseg6e16_v_u16mf2x6', vl=4, n_segs=6), + make_test_case('vsseg6e16_v_u16mf2x6', vl=3, n_segs=6), + make_test_case('vsseg6e16_v_u16m1x6', vl=8, n_segs=6), + make_test_case('vsseg6e16_v_u16m1x6', vl=7, n_segs=6), + # Seg 7 + make_test_case('vsseg7e16_v_u16mf2x7', vl=4, n_segs=7), + make_test_case('vsseg7e16_v_u16mf2x7', vl=3, n_segs=7), + make_test_case('vsseg7e16_v_u16m1x7', vl=8, n_segs=7), + make_test_case('vsseg7e16_v_u16m1x7', vl=7, n_segs=7), + # Seg 8 + make_test_case('vsseg8e16_v_u16mf2x8', vl=4, n_segs=8), + make_test_case('vsseg8e16_v_u16mf2x8', vl=3, n_segs=8), + make_test_case('vsseg8e16_v_u16m1x8', vl=8, n_segs=8), + make_test_case('vsseg8e16_v_u16m1x8', vl=7, n_segs=8), + ], + dtype = np.uint16, + ) + + + +@cocotb.test() +async def store32_seg_unit(dut): + """Test vsseg*e32 usage accessible from intrinsics.""" + def make_test_case(impl: str, vl: int, n_segs: int): + return { + 'impl': impl, + 'vl': vl, + 'in_size': vl * n_segs * 2, + 'out_size': vl * n_segs * 2, + 'pattern': [ + seg * vl + elem + for elem in range(vl) for seg in range(n_segs)] + } + + await vector_load_store_v2( + dut = dut, + elf_name = 'store32_seg_unit.elf', + cases = [ + # Seg 2 + make_test_case('vsseg2e32_v_u32m1x2', vl=4, n_segs=2), + make_test_case('vsseg2e32_v_u32m1x2', vl=3, n_segs=2), + make_test_case('vsseg2e32_v_u32m2x2', vl=8, n_segs=2), + make_test_case('vsseg2e32_v_u32m2x2', vl=7, n_segs=2), + make_test_case('vsseg2e32_v_u32m4x2', vl=16, n_segs=2), + make_test_case('vsseg2e32_v_u32m4x2', vl=15, n_segs=2), + # Seg 3 + make_test_case('vsseg3e32_v_u32m1x3', vl=4, n_segs=3), + make_test_case('vsseg3e32_v_u32m1x3', vl=3, n_segs=3), + make_test_case('vsseg3e32_v_u32m2x3', vl=8, n_segs=3), + make_test_case('vsseg3e32_v_u32m2x3', vl=7, n_segs=3), + # Seg 4 + make_test_case('vsseg4e32_v_u32m1x4', vl=4, n_segs=4), + make_test_case('vsseg4e32_v_u32m1x4', vl=3, n_segs=4), + make_test_case('vsseg4e32_v_u32m2x4', vl=8, n_segs=4), + make_test_case('vsseg4e32_v_u32m2x4', vl=7, n_segs=4), + # Seg 5 + make_test_case('vsseg5e32_v_u32m1x5', vl=4, n_segs=5), + make_test_case('vsseg5e32_v_u32m1x5', vl=3, n_segs=5), + # Seg 6 + make_test_case('vsseg6e32_v_u32m1x6', vl=4, n_segs=6), + make_test_case('vsseg6e32_v_u32m1x6', vl=3, n_segs=6), + # Seg 7 + make_test_case('vsseg7e32_v_u32m1x7', vl=4, n_segs=7), + make_test_case('vsseg7e32_v_u32m1x7', vl=3, n_segs=7), + # Seg 8 + make_test_case('vsseg8e32_v_u32m1x8', vl=4, n_segs=8), + make_test_case('vsseg8e32_v_u32m1x8', vl=3, n_segs=8), + ], + dtype = np.uint32, + ) + + +@cocotb.test() async def load_store8_test(dut): """Testbench to test RVV load.""" fixture = await Fixture.Create(dut)