Finish intrinsic coverage for indexed stores

Change-Id: I2b655b4f7a0588eaf6a6b553b9a43818a16382f1
diff --git a/tests/cocotb/BUILD b/tests/cocotb/BUILD
index e33dc35..6c0da8b 100644
--- a/tests/cocotb/BUILD
+++ b/tests/cocotb/BUILD
@@ -222,8 +222,11 @@
     "store_strided_all_vtypes_test",
     "store8_index8",
     "store8_index8_seg",
+    "store8_index16",
+    "store8_index32",
     "store16_index8",
     "store16_index16",
+    "store16_index32",
     "store32_index8",
     "store32_index16",
     "store32_index32",
diff --git a/tests/cocotb/rvv/load_store/BUILD b/tests/cocotb/rvv/load_store/BUILD
index f129395..a2aa111 100644
--- a/tests/cocotb/rvv/load_store/BUILD
+++ b/tests/cocotb/rvv/load_store/BUILD
@@ -134,12 +134,21 @@
         "store8_index8_seg": {
             "srcs": ["store8_index8_seg.cc"],
         },
+        "store8_index16": {
+            "srcs": ["store8_index16.cc"],
+        },
+        "store8_index32": {
+            "srcs": ["store8_index32.cc"],
+        },
         "store16_index8": {
             "srcs": ["store16_index8.cc"],
         },
         "store16_index16": {
             "srcs": ["store16_index16.cc"],
         },
+        "store16_index32": {
+            "srcs": ["store16_index32.cc"],
+        },
         "store32_index8": {
             "srcs": ["store32_index8.cc"],
         },
@@ -211,8 +220,11 @@
         ":load32_stride8_m1_partial.elf",
         ":store8_index8.elf",
         ":store8_index8_seg.elf",
+        ":store8_index16.elf",
+        ":store8_index32.elf",
         ":store16_index8.elf",
         ":store16_index16.elf",
+        ":store16_index32.elf",
         ":store32_index8.elf",
         ":store32_index16.elf",
         ":store32_index32.elf",
diff --git a/tests/cocotb/rvv/load_store/store16_index16.cc b/tests/cocotb/rvv/load_store/store16_index16.cc
index 6487165..dcffd60 100644
--- a/tests/cocotb/rvv/load_store/store16_index16.cc
+++ b/tests/cocotb/rvv/load_store/store16_index16.cc
@@ -17,7 +17,7 @@
 
 namespace {
 constexpr size_t scatter_count = 64;
-constexpr size_t buf_size = 16000;
+constexpr size_t buf_size = 15000;
 }  // namespace
 
 size_t vl __attribute__((section(".data"))) = 8;
diff --git a/tests/cocotb/rvv/load_store/store16_index32.cc b/tests/cocotb/rvv/load_store/store16_index32.cc
new file mode 100644
index 0000000..fb199b4
--- /dev/null
+++ b/tests/cocotb/rvv/load_store/store16_index32.cc
@@ -0,0 +1,104 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+namespace {
+constexpr size_t scatter_count = 64;
+constexpr size_t buf_size = 15000;
+}  // namespace
+
+size_t vl __attribute__((section(".data"))) = 8;
+// Indices are always unsigned.
+uint32_t index_buf[scatter_count] __attribute__((section(".data")));
+// These instructions don't differentiate signed/unsigned so we only need to
+// test one. The types come from intrinsic level.
+uint16_t in_buf[scatter_count] __attribute__((section(".data")));
+uint16_t out_buf[buf_size] __attribute__((section(".data")));
+
+extern "C" {
+// Unordered
+__attribute__((used, retain)) void vsuxei32_v_u16mf2() {
+  auto indices = __riscv_vle32_v_u32m1(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle16_v_u16mf2(in_buf, vl);
+  __riscv_vsuxei32_v_u16mf2(out_buf, indices, data, vl);
+}
+
+__attribute__((used, retain)) void vsuxei32_v_u16m1() {
+  auto indices = __riscv_vle32_v_u32m2(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle16_v_u16m1(in_buf, vl);
+  __riscv_vsuxei32_v_u16m1(out_buf, indices, data, vl);
+}
+
+__attribute__((used, retain)) void vsuxei32_v_u16m2() {
+  auto indices = __riscv_vle32_v_u32m4(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle16_v_u16m2(in_buf, vl);
+  __riscv_vsuxei32_v_u16m2(out_buf, indices, data, vl);
+}
+
+__attribute__((used, retain)) void vsuxei32_v_u16m4() {
+  auto indices = __riscv_vle32_v_u32m8(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle16_v_u16m4(in_buf, vl);
+  __riscv_vsuxei32_v_u16m4(out_buf, indices, data, vl);
+}
+
+// Ordered
+__attribute__((used, retain)) void vsoxei32_v_u16mf2() {
+  auto indices = __riscv_vle32_v_u32m1(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle16_v_u16mf2(in_buf, vl);
+  __riscv_vsoxei32_v_u16mf2(out_buf, indices, data, vl);
+}
+
+__attribute__((used, retain)) void vsoxei32_v_u16m1() {
+  auto indices = __riscv_vle32_v_u32m2(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle16_v_u16m1(in_buf, vl);
+  __riscv_vsoxei32_v_u16m1(out_buf, indices, data, vl);
+}
+
+__attribute__((used, retain)) void vsoxei32_v_u16m2() {
+  auto indices = __riscv_vle32_v_u32m4(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle16_v_u16m2(in_buf, vl);
+  __riscv_vsoxei32_v_u16m2(out_buf, indices, data, vl);
+}
+
+__attribute__((used, retain)) void vsoxei32_v_u16m4() {
+  auto indices = __riscv_vle32_v_u32m8(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle16_v_u16m4(in_buf, vl);
+  __riscv_vsoxei32_v_u16m4(out_buf, indices, data, vl);
+}
+}
+
+void (*impl)() __attribute__((section(".data"))) = &vsuxei32_v_u16m1;
+
+int main(int argc, char** argv) {
+  impl();
+  return 0;
+}
diff --git a/tests/cocotb/rvv/load_store/store32_index16.cc b/tests/cocotb/rvv/load_store/store32_index16.cc
index 4c081f3..5b3f1e2 100644
--- a/tests/cocotb/rvv/load_store/store32_index16.cc
+++ b/tests/cocotb/rvv/load_store/store32_index16.cc
@@ -17,7 +17,7 @@
 
 namespace {
 constexpr size_t scatter_count = 32;
-constexpr size_t buf_size = 4000;
+constexpr size_t buf_size = 7000;
 }  // namespace
 
 size_t vl __attribute__((section(".data"))) = 4;
diff --git a/tests/cocotb/rvv/load_store/store32_index32.cc b/tests/cocotb/rvv/load_store/store32_index32.cc
index 07001f5..01ce3b8 100644
--- a/tests/cocotb/rvv/load_store/store32_index32.cc
+++ b/tests/cocotb/rvv/load_store/store32_index32.cc
@@ -17,7 +17,7 @@
 
 namespace {
 constexpr size_t scatter_count = 32;
-constexpr size_t buf_size = 8000;
+constexpr size_t buf_size = 7000;
 }  // namespace
 
 size_t vl __attribute__((section(".data"))) = 8;
diff --git a/tests/cocotb/rvv/load_store/store32_index8.cc b/tests/cocotb/rvv/load_store/store32_index8.cc
index 465994f..19d5546 100644
--- a/tests/cocotb/rvv/load_store/store32_index8.cc
+++ b/tests/cocotb/rvv/load_store/store32_index8.cc
@@ -17,7 +17,7 @@
 
 namespace {
 constexpr size_t scatter_count = 32;
-constexpr size_t buf_size = 257;
+constexpr size_t buf_size = 512;
 }  // namespace
 
 size_t vl __attribute__((section(".data"))) = 4;
diff --git a/tests/cocotb/rvv/load_store/store8_index16.cc b/tests/cocotb/rvv/load_store/store8_index16.cc
new file mode 100644
index 0000000..412e3e1
--- /dev/null
+++ b/tests/cocotb/rvv/load_store/store8_index16.cc
@@ -0,0 +1,120 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+namespace {
+constexpr size_t scatter_count = 128;
+constexpr size_t buf_size = 30000;
+}  // namespace
+
+size_t vl __attribute__((section(".data"))) = 16;
+// Indices are always unsigned.
+uint16_t index_buf[scatter_count] __attribute__((section(".data")));
+// These instructions don't differentiate signed/unsigned so we only need to
+// test one. The types come from intrinsic level.
+uint8_t in_buf[scatter_count] __attribute__((section(".data")));
+uint8_t out_buf[buf_size] __attribute__((section(".data")));
+
+extern "C" {
+// Unordered
+__attribute__((used, retain)) void vsuxei16_v_u8mf4() {
+  auto indices = __riscv_vle16_v_u16mf2(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle8_v_u8mf4(in_buf, vl);
+  __riscv_vsuxei16_v_u8mf4(out_buf, indices, data, vl);
+}
+
+__attribute__((used, retain)) void vsuxei16_v_u8mf2() {
+  auto indices = __riscv_vle16_v_u16m1(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle8_v_u8mf2(in_buf, vl);
+  __riscv_vsuxei16_v_u8mf2(out_buf, indices, data, vl);
+}
+
+__attribute__((used, retain)) void vsuxei16_v_u8m1() {
+  auto indices = __riscv_vle16_v_u16m2(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle8_v_u8m1(in_buf, vl);
+  __riscv_vsuxei16_v_u8m1(out_buf, indices, data, vl);
+}
+
+__attribute__((used, retain)) void vsuxei16_v_u8m2() {
+  auto indices = __riscv_vle16_v_u16m4(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle8_v_u8m2(in_buf, vl);
+  __riscv_vsuxei16_v_u8m2(out_buf, indices, data, vl);
+}
+
+__attribute__((used, retain)) void vsuxei16_v_u8m4() {
+  auto indices = __riscv_vle16_v_u16m8(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle8_v_u8m4(in_buf, vl);
+  __riscv_vsuxei16_v_u8m4(out_buf, indices, data, vl);
+}
+
+// Ordered
+__attribute__((used, retain)) void vsoxei16_v_u8mf4() {
+  auto indices = __riscv_vle16_v_u16mf2(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle8_v_u8mf4(in_buf, vl);
+  __riscv_vsoxei16_v_u8mf4(out_buf, indices, data, vl);
+}
+
+__attribute__((used, retain)) void vsoxei16_v_u8mf2() {
+  auto indices = __riscv_vle16_v_u16m1(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle8_v_u8mf2(in_buf, vl);
+  __riscv_vsoxei16_v_u8mf2(out_buf, indices, data, vl);
+}
+
+__attribute__((used, retain)) void vsoxei16_v_u8m1() {
+  auto indices = __riscv_vle16_v_u16m2(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle8_v_u8m1(in_buf, vl);
+  __riscv_vsoxei16_v_u8m1(out_buf, indices, data, vl);
+}
+
+__attribute__((used, retain)) void vsoxei16_v_u8m2() {
+  auto indices = __riscv_vle16_v_u16m4(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle8_v_u8m2(in_buf, vl);
+  __riscv_vsoxei16_v_u8m2(out_buf, indices, data, vl);
+}
+
+__attribute__((used, retain)) void vsoxei16_v_u8m4() {
+  auto indices = __riscv_vle16_v_u16m8(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle8_v_u8m4(in_buf, vl);
+  __riscv_vsoxei16_v_u8m4(out_buf, indices, data, vl);
+}
+}
+
+void (*impl)() __attribute__((section(".data"))) = &vsuxei16_v_u8m1;
+
+int main(int argc, char** argv) {
+  impl();
+  return 0;
+}
diff --git a/tests/cocotb/rvv/load_store/store8_index32.cc b/tests/cocotb/rvv/load_store/store8_index32.cc
new file mode 100644
index 0000000..2cd6d35
--- /dev/null
+++ b/tests/cocotb/rvv/load_store/store8_index32.cc
@@ -0,0 +1,104 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+namespace {
+constexpr size_t scatter_count = 128;
+constexpr size_t buf_size = 30000;
+}  // namespace
+
+size_t vl __attribute__((section(".data"))) = 16;
+// Indices are always unsigned.
+uint32_t index_buf[scatter_count] __attribute__((section(".data")));
+// These instructions don't differentiate signed/unsigned so we only need to
+// test one. The types come from intrinsic level.
+uint8_t in_buf[scatter_count] __attribute__((section(".data")));
+uint8_t out_buf[buf_size] __attribute__((section(".data")));
+
+extern "C" {
+// Unordered
+__attribute__((used, retain)) void vsuxei32_v_u8mf4() {
+  auto indices = __riscv_vle32_v_u32m1(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle8_v_u8mf4(in_buf, vl);
+  __riscv_vsuxei32_v_u8mf4(out_buf, indices, data, vl);
+}
+
+__attribute__((used, retain)) void vsuxei32_v_u8mf2() {
+  auto indices = __riscv_vle32_v_u32m2(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle8_v_u8mf2(in_buf, vl);
+  __riscv_vsuxei32_v_u8mf2(out_buf, indices, data, vl);
+}
+
+__attribute__((used, retain)) void vsuxei32_v_u8m1() {
+  auto indices = __riscv_vle32_v_u32m4(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle8_v_u8m1(in_buf, vl);
+  __riscv_vsuxei32_v_u8m1(out_buf, indices, data, vl);
+}
+
+__attribute__((used, retain)) void vsuxei32_v_u8m2() {
+  auto indices = __riscv_vle32_v_u32m8(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle8_v_u8m2(in_buf, vl);
+  __riscv_vsuxei32_v_u8m2(out_buf, indices, data, vl);
+}
+
+// Ordered
+__attribute__((used, retain)) void vsoxei32_v_u8mf4() {
+  auto indices = __riscv_vle32_v_u32m1(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle8_v_u8mf4(in_buf, vl);
+  __riscv_vsoxei32_v_u8mf4(out_buf, indices, data, vl);
+}
+
+__attribute__((used, retain)) void vsoxei32_v_u8mf2() {
+  auto indices = __riscv_vle32_v_u32m2(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle8_v_u8mf2(in_buf, vl);
+  __riscv_vsoxei32_v_u8mf2(out_buf, indices, data, vl);
+}
+
+__attribute__((used, retain)) void vsoxei32_v_u8m1() {
+  auto indices = __riscv_vle32_v_u32m4(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle8_v_u8m1(in_buf, vl);
+  __riscv_vsoxei32_v_u8m1(out_buf, indices, data, vl);
+}
+
+__attribute__((used, retain)) void vsoxei32_v_u8m2() {
+  auto indices = __riscv_vle32_v_u32m8(index_buf, vl);
+  // TODO(davidgao): Remove once compiler bug is eliminated
+  asm volatile("" ::: "vtype");
+  auto data = __riscv_vle8_v_u8m2(in_buf, vl);
+  __riscv_vsoxei32_v_u8m2(out_buf, indices, data, vl);
+}
+}
+
+void (*impl)() __attribute__((section(".data"))) = &vsuxei32_v_u8m1;
+
+int main(int argc, char** argv) {
+  impl();
+  return 0;
+}
diff --git a/tests/cocotb/rvv_load_store_test.py b/tests/cocotb/rvv_load_store_test.py
index 424a247..c3c2f79 100644
--- a/tests/cocotb/rvv_load_store_test.py
+++ b/tests/cocotb/rvv_load_store_test.py
@@ -214,7 +214,9 @@
 
         struct_bytes = np.dtype(data_dtype).itemsize * segments
         # Don't go beyond the buffer.
-        index_max = min(np.iinfo(index_dtype).max, out_size)
+        index_max = min(
+            np.iinfo(index_dtype).max,
+            out_size * np.dtype(data_dtype).itemsize)
         assert vl * struct_bytes <= index_max
         index_max = index_max - struct_bytes
         # TODO(davidgao): currently assuming the vl is supported.
@@ -2229,6 +2231,88 @@
 
 
 @cocotb.test()
+async def store8_index16(dut):
+    """Test vs*xei16_v_u8 usage accessible from intrinsics."""
+    def make_test_case(impl: str, vl: int):
+        return {
+            'impl': impl,
+            'vl': vl,
+            'segments': 1,
+            'out_size': 30000,
+        }
+
+    await vector_store_segmented_indexed(
+        dut = dut,
+        elf_name = 'store8_index16.elf',
+        cases = [
+            # Unordered
+            make_test_case('vsuxei16_v_u8mf4', vl = 4),
+            make_test_case('vsuxei16_v_u8mf4', vl = 3),
+            make_test_case('vsuxei16_v_u8mf2', vl = 8),
+            make_test_case('vsuxei16_v_u8mf2', vl = 7),
+            make_test_case('vsuxei16_v_u8m1', vl = 16),
+            make_test_case('vsuxei16_v_u8m1', vl = 15),
+            make_test_case('vsuxei16_v_u8m2', vl = 32),
+            make_test_case('vsuxei16_v_u8m2', vl = 31),
+            make_test_case('vsuxei16_v_u8m4', vl = 64),
+            make_test_case('vsuxei16_v_u8m4', vl = 63),
+            # Ordered
+            make_test_case('vsoxei16_v_u8mf2', vl = 4),
+            make_test_case('vsoxei16_v_u8mf2', vl = 3),
+            make_test_case('vsoxei16_v_u8mf2', vl = 8),
+            make_test_case('vsoxei16_v_u8mf2', vl = 7),
+            make_test_case('vsoxei16_v_u8m1', vl = 16),
+            make_test_case('vsoxei16_v_u8m1', vl = 15),
+            make_test_case('vsoxei16_v_u8m2', vl = 32),
+            make_test_case('vsoxei16_v_u8m2', vl = 31),
+            make_test_case('vsoxei16_v_u8m4', vl = 64),
+            make_test_case('vsoxei16_v_u8m4', vl = 63),
+        ],
+        data_dtype = np.uint8,
+        index_dtype = np.uint16,
+    )
+
+
+@cocotb.test()
+async def store8_index32(dut):
+    """Test vs*xei32_v_u8 usage accessible from intrinsics."""
+    def make_test_case(impl: str, vl: int):
+        return {
+            'impl': impl,
+            'vl': vl,
+            'segments': 1,
+            'out_size': 30000,
+        }
+
+    await vector_store_segmented_indexed(
+        dut = dut,
+        elf_name = 'store8_index32.elf',
+        cases = [
+            # Unordered
+            make_test_case('vsuxei32_v_u8mf4', vl = 4),
+            make_test_case('vsuxei32_v_u8mf4', vl = 3),
+            make_test_case('vsuxei32_v_u8mf2', vl = 8),
+            make_test_case('vsuxei32_v_u8mf2', vl = 7),
+            make_test_case('vsuxei32_v_u8m1', vl = 16),
+            make_test_case('vsuxei32_v_u8m1', vl = 15),
+            make_test_case('vsuxei32_v_u8m2', vl = 32),
+            make_test_case('vsuxei32_v_u8m2', vl = 31),
+            # Ordered
+            make_test_case('vsoxei32_v_u8mf2', vl = 4),
+            make_test_case('vsoxei32_v_u8mf2', vl = 3),
+            make_test_case('vsoxei32_v_u8mf2', vl = 8),
+            make_test_case('vsoxei32_v_u8mf2', vl = 7),
+            make_test_case('vsoxei32_v_u8m1', vl = 16),
+            make_test_case('vsoxei32_v_u8m1', vl = 15),
+            make_test_case('vsoxei32_v_u8m2', vl = 32),
+            make_test_case('vsoxei32_v_u8m2', vl = 31),
+        ],
+        data_dtype = np.uint8,
+        index_dtype = np.uint32,
+    )
+
+
+@cocotb.test()
 async def store16_index8(dut):
     """Test vs*xei8_v_u16 usage accessible from intrinsics."""
     def make_test_case(impl: str, vl: int):
@@ -2279,7 +2363,7 @@
             'impl': impl,
             'vl': vl,
             'segments': 1,
-            'out_size': 16000,
+            'out_size': 15000,
         }
 
     await vector_store_segmented_indexed(
@@ -2315,6 +2399,45 @@
 
 
 @cocotb.test()
+async def store16_index32(dut):
+    """Test vs*xei32_v_u16 usage accessible from intrinsics."""
+    def make_test_case(impl: str, vl: int):
+        return {
+            'impl': impl,
+            'vl': vl,
+            'segments': 1,
+            'out_size': 15000,
+        }
+
+    await vector_store_segmented_indexed(
+        dut = dut,
+        elf_name = 'store16_index32.elf',
+        cases = [
+            # Unordered
+            make_test_case('vsuxei32_v_u16mf2', vl = 4),
+            make_test_case('vsuxei32_v_u16mf2', vl = 3),
+            make_test_case('vsuxei32_v_u16m1', vl = 8),
+            make_test_case('vsuxei32_v_u16m1', vl = 7),
+            make_test_case('vsuxei32_v_u16m2', vl = 16),
+            make_test_case('vsuxei32_v_u16m2', vl = 15),
+            make_test_case('vsuxei32_v_u16m4', vl = 32),
+            make_test_case('vsuxei32_v_u16m4', vl = 31),
+            # Ordered
+            make_test_case('vsoxei32_v_u16mf2', vl = 4),
+            make_test_case('vsoxei32_v_u16mf2', vl = 3),
+            make_test_case('vsoxei32_v_u16m1', vl = 8),
+            make_test_case('vsoxei32_v_u16m1', vl = 7),
+            make_test_case('vsoxei32_v_u16m2', vl = 16),
+            make_test_case('vsoxei32_v_u16m2', vl = 15),
+            make_test_case('vsoxei32_v_u16m4', vl = 32),
+            make_test_case('vsoxei32_v_u16m4', vl = 31),
+        ],
+        data_dtype = np.uint16,
+        index_dtype = np.uint32,
+    )
+
+
+@cocotb.test()
 async def store32_index8(dut):
     """Test vs*xei8_v_u32 usage accessible from intrinsics."""
     def make_test_case(impl: str, vl: int):
@@ -2322,7 +2445,7 @@
             'impl': impl,
             'vl': vl,
             'segments': 1,
-            'out_size': 257,
+            'out_size': 512,
         }
 
     await vector_store_segmented_indexed(
@@ -2361,7 +2484,7 @@
             'impl': impl,
             'vl': vl,
             'segments': 1,
-            'out_size': 4000,
+            'out_size': 7000,
         }
 
     await vector_store_segmented_indexed(
@@ -2400,7 +2523,7 @@
             'impl': impl,
             'vl': vl,
             'segments': 1,
-            'out_size': 8000,
+            'out_size': 7000,
         }
 
     await vector_store_segmented_indexed(