Add segmented store8 tests

seg 2 to seg 8.
m2 and m4 currently fail.

Change-Id: I8ab3dee832de91651b949bb8763b47066a20fe8a
diff --git a/tests/cocotb/BUILD b/tests/cocotb/BUILD
index 499258f..43b8c76 100644
--- a/tests/cocotb/BUILD
+++ b/tests/cocotb/BUILD
@@ -195,6 +195,7 @@
     "load16_segment2_stride6_m1",
     "load8_indexed_m1",
     "store8_indexed_m1",
+    "store8_seg_unit",
     "load_store8_test",
 ]
 # END_TESTCASES_FOR_rvv_load_store_test
diff --git a/tests/cocotb/rvv/load_store/BUILD b/tests/cocotb/rvv/load_store/BUILD
index 31e659f..0131047 100644
--- a/tests/cocotb/rvv/load_store/BUILD
+++ b/tests/cocotb/rvv/load_store/BUILD
@@ -89,6 +89,9 @@
         "store8_indexed_m1": {
             "srcs": ["store8_indexed_m1.cc"],
         },
+        "store8_seg_unit": {
+            "srcs": ["store8_seg_unit.cc"],
+        },
     },
 )
 
@@ -118,5 +121,6 @@
         ":load32_stride8_m1.elf",
         ":load32_stride8_m1_partial.elf",
         ":store8_indexed_m1.elf",
+        ":store8_seg_unit",
     ],
 )
\ No newline at end of file
diff --git a/tests/cocotb/rvv/load_store/store8_seg_unit.cc b/tests/cocotb/rvv/load_store/store8_seg_unit.cc
new file mode 100644
index 0000000..399de73
--- /dev/null
+++ b/tests/cocotb/rvv/load_store/store8_seg_unit.cc
@@ -0,0 +1,259 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+namespace {
+// Double sized so we can check trailing regions are not read/written.
+constexpr size_t buf_size = 256;
+}  // namespace
+
+size_t vl __attribute__((section(".data"))) = 64;
+// These instructions don't differentiate signed/unsigned so we only need to
+// test one. The types come from intrinsic level.
+uint8_t in_buf[buf_size] __attribute__((section(".data")));
+uint8_t out_buf[buf_size] __attribute__((section(".data")));
+
+extern "C" {
+// Segment 2
+__attribute__((used, retain)) void vsseg2e8_v_u8mf4x2() {
+  auto data = __riscv_vcreate_v_u8mf4x2(__riscv_vle8_v_u8mf4(in_buf, vl),
+                                        __riscv_vle8_v_u8mf4(in_buf + vl, vl));
+  __riscv_vsseg2e8_v_u8mf4x2(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg2e8_v_u8mf2x2() {
+  auto data = __riscv_vcreate_v_u8mf2x2(__riscv_vle8_v_u8mf2(in_buf, vl),
+                                        __riscv_vle8_v_u8mf2(in_buf + vl, vl));
+  __riscv_vsseg2e8_v_u8mf2x2(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg2e8_v_u8m1x2() {
+  auto data = __riscv_vcreate_v_u8m1x2(__riscv_vle8_v_u8m1(in_buf, vl),
+                                       __riscv_vle8_v_u8m1(in_buf + vl, vl));
+  __riscv_vsseg2e8_v_u8m1x2(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg2e8_v_u8m2x2() {
+  auto data = __riscv_vcreate_v_u8m2x2(__riscv_vle8_v_u8m2(in_buf, vl),
+                                       __riscv_vle8_v_u8m2(in_buf + vl, vl));
+  __riscv_vsseg2e8_v_u8m2x2(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg2e8_v_u8m4x2() {
+  auto data = __riscv_vcreate_v_u8m4x2(__riscv_vle8_v_u8m4(in_buf, vl),
+                                       __riscv_vle8_v_u8m4(in_buf + vl, vl));
+  __riscv_vsseg2e8_v_u8m4x2(out_buf, data, vl);
+}
+
+// Segment 3
+__attribute__((used, retain)) void vsseg3e8_v_u8mf4x3() {
+  auto data = __riscv_vcreate_v_u8mf4x3(
+      __riscv_vle8_v_u8mf4(in_buf, vl), __riscv_vle8_v_u8mf4(in_buf + vl, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 2, vl));
+  __riscv_vsseg3e8_v_u8mf4x3(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg3e8_v_u8mf2x3() {
+  auto data = __riscv_vcreate_v_u8mf2x3(
+      __riscv_vle8_v_u8mf2(in_buf, vl), __riscv_vle8_v_u8mf2(in_buf + vl, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 2, vl));
+  __riscv_vsseg3e8_v_u8mf2x3(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg3e8_v_u8m1x3() {
+  auto data = __riscv_vcreate_v_u8m1x3(
+      __riscv_vle8_v_u8m1(in_buf, vl), __riscv_vle8_v_u8m1(in_buf + vl, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 2, vl));
+  __riscv_vsseg3e8_v_u8m1x3(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg3e8_v_u8m2x3() {
+  auto data = __riscv_vcreate_v_u8m2x3(
+      __riscv_vle8_v_u8m2(in_buf, vl), __riscv_vle8_v_u8m2(in_buf + vl, vl),
+      __riscv_vle8_v_u8m2(in_buf + vl * 2, vl));
+  __riscv_vsseg3e8_v_u8m2x3(out_buf, data, vl);
+}
+
+// Segment 4
+__attribute__((used, retain)) void vsseg4e8_v_u8mf4x4() {
+  auto data = __riscv_vcreate_v_u8mf4x4(
+      __riscv_vle8_v_u8mf4(in_buf, vl), __riscv_vle8_v_u8mf4(in_buf + vl, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 2, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 3, vl));
+  __riscv_vsseg4e8_v_u8mf4x4(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg4e8_v_u8mf2x4() {
+  auto data = __riscv_vcreate_v_u8mf2x4(
+      __riscv_vle8_v_u8mf2(in_buf, vl), __riscv_vle8_v_u8mf2(in_buf + vl, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 2, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 3, vl));
+  __riscv_vsseg4e8_v_u8mf2x4(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg4e8_v_u8m1x4() {
+  auto data = __riscv_vcreate_v_u8m1x4(
+      __riscv_vle8_v_u8m1(in_buf, vl), __riscv_vle8_v_u8m1(in_buf + vl, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 2, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 3, vl));
+  __riscv_vsseg4e8_v_u8m1x4(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg4e8_v_u8m2x4() {
+  auto data = __riscv_vcreate_v_u8m2x4(
+      __riscv_vle8_v_u8m2(in_buf, vl), __riscv_vle8_v_u8m2(in_buf + vl, vl),
+      __riscv_vle8_v_u8m2(in_buf + vl * 2, vl),
+      __riscv_vle8_v_u8m2(in_buf + vl * 3, vl));
+  __riscv_vsseg4e8_v_u8m2x4(out_buf, data, vl);
+}
+
+// Segment 5
+__attribute__((used, retain)) void vsseg5e8_v_u8mf4x5() {
+  auto data = __riscv_vcreate_v_u8mf4x5(
+      __riscv_vle8_v_u8mf4(in_buf, vl), __riscv_vle8_v_u8mf4(in_buf + vl, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 2, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 3, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 4, vl));
+  __riscv_vsseg5e8_v_u8mf4x5(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg5e8_v_u8mf2x5() {
+  auto data = __riscv_vcreate_v_u8mf2x5(
+      __riscv_vle8_v_u8mf2(in_buf, vl), __riscv_vle8_v_u8mf2(in_buf + vl, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 2, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 3, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 4, vl));
+  __riscv_vsseg5e8_v_u8mf2x5(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg5e8_v_u8m1x5() {
+  auto data = __riscv_vcreate_v_u8m1x5(
+      __riscv_vle8_v_u8m1(in_buf, vl), __riscv_vle8_v_u8m1(in_buf + vl, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 2, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 3, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 4, vl));
+  __riscv_vsseg5e8_v_u8m1x5(out_buf, data, vl);
+}
+
+// Segment 6
+__attribute__((used, retain)) void vsseg6e8_v_u8mf4x6() {
+  auto data = __riscv_vcreate_v_u8mf4x6(
+      __riscv_vle8_v_u8mf4(in_buf, vl), __riscv_vle8_v_u8mf4(in_buf + vl, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 2, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 3, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 4, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 5, vl));
+  __riscv_vsseg6e8_v_u8mf4x6(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg6e8_v_u8mf2x6() {
+  auto data = __riscv_vcreate_v_u8mf2x6(
+      __riscv_vle8_v_u8mf2(in_buf, vl), __riscv_vle8_v_u8mf2(in_buf + vl, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 2, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 3, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 4, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 5, vl));
+  __riscv_vsseg6e8_v_u8mf2x6(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg6e8_v_u8m1x6() {
+  auto data = __riscv_vcreate_v_u8m1x6(
+      __riscv_vle8_v_u8m1(in_buf, vl), __riscv_vle8_v_u8m1(in_buf + vl, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 2, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 3, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 4, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 5, vl));
+  __riscv_vsseg6e8_v_u8m1x6(out_buf, data, vl);
+}
+
+// Segment 7
+__attribute__((used, retain)) void vsseg7e8_v_u8mf4x7() {
+  auto data = __riscv_vcreate_v_u8mf4x7(
+      __riscv_vle8_v_u8mf4(in_buf, vl), __riscv_vle8_v_u8mf4(in_buf + vl, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 2, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 3, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 4, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 5, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 6, vl));
+  __riscv_vsseg7e8_v_u8mf4x7(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg7e8_v_u8mf2x7() {
+  auto data = __riscv_vcreate_v_u8mf2x7(
+      __riscv_vle8_v_u8mf2(in_buf, vl), __riscv_vle8_v_u8mf2(in_buf + vl, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 2, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 3, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 4, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 5, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 6, vl));
+  __riscv_vsseg7e8_v_u8mf2x7(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg7e8_v_u8m1x7() {
+  auto data = __riscv_vcreate_v_u8m1x7(
+      __riscv_vle8_v_u8m1(in_buf, vl), __riscv_vle8_v_u8m1(in_buf + vl, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 2, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 3, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 4, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 5, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 6, vl));
+  __riscv_vsseg7e8_v_u8m1x7(out_buf, data, vl);
+}
+
+// Segment 8
+__attribute__((used, retain)) void vsseg8e8_v_u8mf4x8() {
+  auto data = __riscv_vcreate_v_u8mf4x8(
+      __riscv_vle8_v_u8mf4(in_buf, vl), __riscv_vle8_v_u8mf4(in_buf + vl, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 2, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 3, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 4, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 5, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 6, vl),
+      __riscv_vle8_v_u8mf4(in_buf + vl * 7, vl));
+  __riscv_vsseg8e8_v_u8mf4x8(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg8e8_v_u8mf2x8() {
+  auto data = __riscv_vcreate_v_u8mf2x8(
+      __riscv_vle8_v_u8mf2(in_buf, vl), __riscv_vle8_v_u8mf2(in_buf + vl, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 2, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 3, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 4, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 5, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 6, vl),
+      __riscv_vle8_v_u8mf2(in_buf + vl * 7, vl));
+  __riscv_vsseg8e8_v_u8mf2x8(out_buf, data, vl);
+}
+
+__attribute__((used, retain)) void vsseg8e8_v_u8m1x8() {
+  auto data = __riscv_vcreate_v_u8m1x8(
+      __riscv_vle8_v_u8m1(in_buf, vl), __riscv_vle8_v_u8m1(in_buf + vl, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 2, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 3, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 4, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 5, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 6, vl),
+      __riscv_vle8_v_u8m1(in_buf + vl * 7, vl));
+  __riscv_vsseg8e8_v_u8m1x8(out_buf, data, vl);
+}
+}
+
+void (*impl)() __attribute__((section(".data"))) = &vsseg2e8_v_u8m1x2;
+
+int main(int argc, char** argv) {
+  impl();
+  return 0;
+}
diff --git a/tests/cocotb/rvv_load_store_test.py b/tests/cocotb/rvv_load_store_test.py
index 6fb4963..4395db2 100644
--- a/tests/cocotb/rvv_load_store_test.py
+++ b/tests/cocotb/rvv_load_store_test.py
@@ -61,6 +61,56 @@
 
     assert (actual_outputs == expected_outputs).all(), debug_msg
 
+async def vector_load_store_v2(
+        dut,
+        elf_name: str,
+        cases: list[dict],  # keys: impl, vl, in_size, out_size, pattern.
+        dtype,
+):
+    """RVV load-store test template.
+
+    Each test performs some kind of patterned copy from `in_buf` to `out_buf`.
+    """
+    fixture = await Fixture.Create(dut)
+    r = runfiles.Create()
+    await fixture.load_elf_and_lookup_symbols(
+        r.Rlocation('kelvin_hw/tests/cocotb/rvv/load_store/' + elf_name),
+        ['impl', 'vl', 'in_buf', 'out_buf'] + [c['impl'] for c in cases],
+    )
+
+    min_value = np.iinfo(dtype).min
+    max_value = np.iinfo(dtype).max + 1  # One above.
+    rng = np.random.default_rng()
+    for c in cases:
+        impl = c['impl']
+        vl = c['vl']
+        in_size = c['in_size']
+        out_size = c['out_size']
+        pattern = c['pattern']
+
+        input_data = rng.integers(min_value, max_value, in_size, dtype=dtype)
+        expected_outputs = input_data[pattern]
+        sbz = np.zeros(out_size - len(pattern), dtype=dtype)
+        expected_outputs = np.concat((expected_outputs, sbz))
+
+        await fixture.write_ptr('impl', impl)
+        await fixture.write_word('vl', vl)
+        await fixture.write('in_buf', input_data)
+        await fixture.write('out_buf', np.zeros([out_size], dtype=dtype))
+
+        await fixture.run_to_halt()
+
+        actual_outputs = (await fixture.read(
+            'out_buf', out_size * np.dtype(dtype).itemsize)).view(dtype)
+
+        debug_msg = str({
+            'impl': impl,
+            'input': input_data,
+            'expected': expected_outputs,
+            'actual': actual_outputs,
+        })
+        assert (actual_outputs == expected_outputs).all(), debug_msg
+
 async def vector_load_indexed(
         dut,
         elf_name: str,
@@ -443,6 +493,85 @@
         dtype = np.uint8,
     )
 
+
+@cocotb.test()
+async def store8_seg_unit(dut):
+    """Test vsseg*e8  usage accessible from intrinsics."""
+    def make_test_case(impl: str, vl: int, n_segs: int):
+        return {
+            'impl': impl,
+            'vl': vl,
+            'in_size': vl * n_segs * 2,
+            'out_size': vl * n_segs * 2,
+            'pattern': [i * vl + j for j in range(vl) for i in range(n_segs)]
+        }
+
+    await vector_load_store_v2(
+        dut = dut,
+        elf_name = 'store8_seg_unit.elf',
+        cases = [
+            # Seg 2
+            make_test_case('vsseg2e8_v_u8mf4x2', vl=4, n_segs=2),
+            make_test_case('vsseg2e8_v_u8mf4x2', vl=3, n_segs=2),
+            make_test_case('vsseg2e8_v_u8mf2x2', vl=8, n_segs=2),
+            make_test_case('vsseg2e8_v_u8mf2x2', vl=7, n_segs=2),
+            make_test_case('vsseg2e8_v_u8m1x2', vl=16, n_segs=2),
+            make_test_case('vsseg2e8_v_u8m1x2', vl=15, n_segs=2),
+            # make_test_case('vsseg2e8_v_u8m2x2', vl=32, n_segs=2),
+            # make_test_case('vsseg2e8_v_u8m2x2', vl=31, n_segs=2),
+            # make_test_case('vsseg2e8_v_u8m4x2', vl=64, n_segs=2),
+            # make_test_case('vsseg2e8_v_u8m4x2', vl=63, n_segs=2),
+            # Seg 3
+            make_test_case('vsseg3e8_v_u8mf4x3', vl=4, n_segs=3),
+            make_test_case('vsseg3e8_v_u8mf4x3', vl=3, n_segs=3),
+            make_test_case('vsseg3e8_v_u8mf2x3', vl=8, n_segs=3),
+            make_test_case('vsseg3e8_v_u8mf2x3', vl=7, n_segs=3),
+            make_test_case('vsseg3e8_v_u8m1x3', vl=16, n_segs=3),
+            make_test_case('vsseg3e8_v_u8m1x3', vl=15, n_segs=3),
+            # make_test_case('vsseg3e8_v_u8m2x3', vl=32, n_segs=3),
+            # make_test_case('vsseg3e8_v_u8m2x3', vl=31, n_segs=3),
+            # Seg 4
+            make_test_case('vsseg4e8_v_u8mf4x4', vl=4, n_segs=4),
+            make_test_case('vsseg4e8_v_u8mf4x4', vl=3, n_segs=4),
+            make_test_case('vsseg4e8_v_u8mf2x4', vl=8, n_segs=4),
+            make_test_case('vsseg4e8_v_u8mf2x4', vl=7, n_segs=4),
+            make_test_case('vsseg4e8_v_u8m1x4', vl=16, n_segs=4),
+            make_test_case('vsseg4e8_v_u8m1x4', vl=15, n_segs=4),
+            # make_test_case('vsseg4e8_v_u8m2x4', vl=32, n_segs=4),
+            # make_test_case('vsseg4e8_v_u8m2x4', vl=31, n_segs=4),
+            # Seg 5
+            make_test_case('vsseg5e8_v_u8mf4x5', vl=4, n_segs=5),
+            make_test_case('vsseg5e8_v_u8mf4x5', vl=3, n_segs=5),
+            make_test_case('vsseg5e8_v_u8mf2x5', vl=8, n_segs=5),
+            make_test_case('vsseg5e8_v_u8mf2x5', vl=7, n_segs=5),
+            make_test_case('vsseg5e8_v_u8m1x5', vl=16, n_segs=5),
+            make_test_case('vsseg5e8_v_u8m1x5', vl=15, n_segs=5),
+            # Seg 6
+            make_test_case('vsseg6e8_v_u8mf4x6', vl=4, n_segs=6),
+            make_test_case('vsseg6e8_v_u8mf4x6', vl=3, n_segs=6),
+            make_test_case('vsseg6e8_v_u8mf2x6', vl=8, n_segs=6),
+            make_test_case('vsseg6e8_v_u8mf2x6', vl=7, n_segs=6),
+            make_test_case('vsseg6e8_v_u8m1x6', vl=16, n_segs=6),
+            make_test_case('vsseg6e8_v_u8m1x6', vl=15, n_segs=6),
+            # Seg 7
+            make_test_case('vsseg7e8_v_u8mf4x7', vl=4, n_segs=7),
+            make_test_case('vsseg7e8_v_u8mf4x7', vl=3, n_segs=7),
+            make_test_case('vsseg7e8_v_u8mf2x7', vl=8, n_segs=7),
+            make_test_case('vsseg7e8_v_u8mf2x7', vl=7, n_segs=7),
+            make_test_case('vsseg7e8_v_u8m1x7', vl=16, n_segs=7),
+            make_test_case('vsseg7e8_v_u8m1x7', vl=15, n_segs=7),
+            # Seg 8
+            make_test_case('vsseg8e8_v_u8mf4x8', vl=4, n_segs=8),
+            make_test_case('vsseg8e8_v_u8mf4x8', vl=3, n_segs=8),
+            make_test_case('vsseg8e8_v_u8mf2x8', vl=8, n_segs=8),
+            make_test_case('vsseg8e8_v_u8mf2x8', vl=7, n_segs=8),
+            make_test_case('vsseg8e8_v_u8m1x8', vl=16, n_segs=8),
+            make_test_case('vsseg8e8_v_u8m1x8', vl=15, n_segs=8),
+        ],
+        dtype = np.uint8,
+    )
+
+
 @cocotb.test()
 async def load_store8_test(dut):
     """Testbench to test RVV load."""