Test load16 index8 segmented Timeout of the load-store test suite is extended to "eternal" as it now has a lot more cases and timeout frequently, blocking submission. Change-Id: I6df5a482ccbaf9e6613b02970722d84f248719f5
diff --git a/tests/cocotb/BUILD b/tests/cocotb/BUILD index c319045..1af24cf 100644 --- a/tests/cocotb/BUILD +++ b/tests/cocotb/BUILD
@@ -184,6 +184,7 @@ "load8_stride2_m1_partial", "load8_stride2_mf4", "load16_index8", + "load16_index8_seg", "load16_index16_seg", "load16_seg_unit", "load16_stride4_m1", @@ -397,6 +398,7 @@ ], "data": ["//tests/cocotb/rvv/load_store:rvv_load_store_tests"], "size": "large", + "timeout": "eternal", }, vcs_data = ["//tests/cocotb/rvv/load_store:rvv_load_store_tests"] + [":coverage_exclude.cfg"], vcs_build_args = VCS_BUILD_ARGS,
diff --git a/tests/cocotb/rvv/load_store/BUILD b/tests/cocotb/rvv/load_store/BUILD index 4ab7ca0..9eb1c18 100644 --- a/tests/cocotb/rvv/load_store/BUILD +++ b/tests/cocotb/rvv/load_store/BUILD
@@ -74,6 +74,9 @@ "load16_index8": { "srcs": ["load16_index8.cc"], }, + "load16_index8_seg": { + "srcs": ["load16_index8_seg.cc"], + }, "load16_index16_seg": { "srcs": ["load16_index16_seg.cc"], }, @@ -158,6 +161,7 @@ ":load_store16_unit_m2.elf", ":load_store32_unit_m2.elf", ":load16_index8.elf", + ":load16_index8_seg.elf", ":load16_index16_seg.elf", ":load16_seg_unit.elf", ":load16_stride4_m1.elf",
diff --git a/tests/cocotb/rvv/load_store/load16_index8_seg.cc b/tests/cocotb/rvv/load_store/load16_index8_seg.cc new file mode 100644 index 0000000..7fb9b5b --- /dev/null +++ b/tests/cocotb/rvv/load_store/load16_index8_seg.cc
@@ -0,0 +1,593 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <riscv_vector.h> +#include <stdint.h> + +namespace { +constexpr size_t lut_size = 271; +// Double sized so we can check trailing regions are not read/written. +constexpr size_t buf_size = 128; +} // namespace + +size_t vl __attribute__((section(".data"))) = 8; +// Indices are always unsigned. +uint8_t index_buf[buf_size] __attribute__((section(".data"))); +// These instructions don't differentiate signed/unsigned so we only need to +// test one. The types come from intrinsic level. +uint16_t in_buf[lut_size] __attribute__((section(".data"))); +uint16_t out_buf[buf_size] __attribute__((section(".data"))); + +extern "C" { +// Unordered, segment 2 +__attribute__((used, retain)) void vluxseg2ei8_v_u16mf2x2() { + auto indices = __riscv_vle8_v_u8mf4(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vluxseg2ei8_v_u16mf2x2(in_buf, indices, vl); + __riscv_vse16_v_u16mf2(out_buf, __riscv_vget_v_u16mf2x2_u16mf2(data, 0), vl); + __riscv_vse16_v_u16mf2(out_buf + vl, __riscv_vget_v_u16mf2x2_u16mf2(data, 1), + vl); +} + +__attribute__((used, retain)) void vluxseg2ei8_v_u16m1x2() { + auto indices = __riscv_vle8_v_u8mf2(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vluxseg2ei8_v_u16m1x2(in_buf, indices, vl); + __riscv_vse16_v_u16m1(out_buf, __riscv_vget_v_u16m1x2_u16m1(data, 0), vl); + __riscv_vse16_v_u16m1(out_buf + vl, __riscv_vget_v_u16m1x2_u16m1(data, 1), + vl); +} + +__attribute__((used, retain)) void vluxseg2ei8_v_u16m2x2() { + auto indices = __riscv_vle8_v_u8m1(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vluxseg2ei8_v_u16m2x2(in_buf, indices, vl); + __riscv_vse16_v_u16m2(out_buf, __riscv_vget_v_u16m2x2_u16m2(data, 0), vl); + __riscv_vse16_v_u16m2(out_buf + vl, __riscv_vget_v_u16m2x2_u16m2(data, 1), + vl); +} + +__attribute__((used, retain)) void vluxseg2ei8_v_u16m4x2() { + auto indices = __riscv_vle8_v_u8m2(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vluxseg2ei8_v_u16m4x2(in_buf, indices, vl); + __riscv_vse16_v_u16m4(out_buf, __riscv_vget_v_u16m4x2_u16m4(data, 0), vl); + __riscv_vse16_v_u16m4(out_buf + vl, __riscv_vget_v_u16m4x2_u16m4(data, 1), + vl); +} + +// // Unordered, segment 3 +__attribute__((used, retain)) void vluxseg3ei8_v_u16mf2x3() { + auto indices = __riscv_vle8_v_u8mf4(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vluxseg3ei8_v_u16mf2x3(in_buf, indices, vl); + __riscv_vse16_v_u16mf2(out_buf, __riscv_vget_v_u16mf2x3_u16mf2(data, 0), vl); + __riscv_vse16_v_u16mf2(out_buf + vl, __riscv_vget_v_u16mf2x3_u16mf2(data, 1), + vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 2, + __riscv_vget_v_u16mf2x3_u16mf2(data, 2), vl); +} + +__attribute__((used, retain)) void vluxseg3ei8_v_u16m1x3() { + auto indices = __riscv_vle8_v_u8mf2(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vluxseg3ei8_v_u16m1x3(in_buf, indices, vl); + __riscv_vse16_v_u16m1(out_buf, __riscv_vget_v_u16m1x3_u16m1(data, 0), vl); + __riscv_vse16_v_u16m1(out_buf + vl, __riscv_vget_v_u16m1x3_u16m1(data, 1), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 2, __riscv_vget_v_u16m1x3_u16m1(data, 2), + vl); +} + +__attribute__((used, retain)) void vluxseg3ei8_v_u16m2x3() { + auto indices = __riscv_vle8_v_u8m1(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vluxseg3ei8_v_u16m2x3(in_buf, indices, vl); + __riscv_vse16_v_u16m2(out_buf, __riscv_vget_v_u16m2x3_u16m2(data, 0), vl); + __riscv_vse16_v_u16m2(out_buf + vl, __riscv_vget_v_u16m2x3_u16m2(data, 1), + vl); + __riscv_vse16_v_u16m2(out_buf + vl * 2, __riscv_vget_v_u16m2x3_u16m2(data, 2), + vl); +} + +// Unordered, segment 4 +__attribute__((used, retain)) void vluxseg4ei8_v_u16mf2x4() { + auto indices = __riscv_vle8_v_u8mf4(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vluxseg4ei8_v_u16mf2x4(in_buf, indices, vl); + __riscv_vse16_v_u16mf2(out_buf, __riscv_vget_v_u16mf2x4_u16mf2(data, 0), vl); + __riscv_vse16_v_u16mf2(out_buf + vl, __riscv_vget_v_u16mf2x4_u16mf2(data, 1), + vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 2, + __riscv_vget_v_u16mf2x4_u16mf2(data, 2), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 3, + __riscv_vget_v_u16mf2x4_u16mf2(data, 3), vl); +} + +__attribute__((used, retain)) void vluxseg4ei8_v_u16m1x4() { + auto indices = __riscv_vle8_v_u8mf2(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vluxseg4ei8_v_u16m1x4(in_buf, indices, vl); + __riscv_vse16_v_u16m1(out_buf, __riscv_vget_v_u16m1x4_u16m1(data, 0), vl); + __riscv_vse16_v_u16m1(out_buf + vl, __riscv_vget_v_u16m1x4_u16m1(data, 1), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 2, __riscv_vget_v_u16m1x4_u16m1(data, 2), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 3, __riscv_vget_v_u16m1x4_u16m1(data, 3), + vl); +} + +__attribute__((used, retain)) void vluxseg4ei8_v_u16m2x4() { + auto indices = __riscv_vle8_v_u8m1(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vluxseg4ei8_v_u16m2x4(in_buf, indices, vl); + __riscv_vse16_v_u16m2(out_buf, __riscv_vget_v_u16m2x4_u16m2(data, 0), vl); + __riscv_vse16_v_u16m2(out_buf + vl, __riscv_vget_v_u16m2x4_u16m2(data, 1), + vl); + __riscv_vse16_v_u16m2(out_buf + vl * 2, __riscv_vget_v_u16m2x4_u16m2(data, 2), + vl); + __riscv_vse16_v_u16m2(out_buf + vl * 3, __riscv_vget_v_u16m2x4_u16m2(data, 3), + vl); +} + +// Unordered, segment 5 +__attribute__((used, retain)) void vluxseg5ei8_v_u16mf2x5() { + auto indices = __riscv_vle8_v_u8mf4(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vluxseg5ei8_v_u16mf2x5(in_buf, indices, vl); + __riscv_vse16_v_u16mf2(out_buf, __riscv_vget_v_u16mf2x5_u16mf2(data, 0), vl); + __riscv_vse16_v_u16mf2(out_buf + vl, __riscv_vget_v_u16mf2x5_u16mf2(data, 1), + vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 2, + __riscv_vget_v_u16mf2x5_u16mf2(data, 2), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 3, + __riscv_vget_v_u16mf2x5_u16mf2(data, 3), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 4, + __riscv_vget_v_u16mf2x5_u16mf2(data, 4), vl); +} + +__attribute__((used, retain)) void vluxseg5ei8_v_u16m1x5() { + auto indices = __riscv_vle8_v_u8mf2(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vluxseg5ei8_v_u16m1x5(in_buf, indices, vl); + __riscv_vse16_v_u16m1(out_buf, __riscv_vget_v_u16m1x5_u16m1(data, 0), vl); + __riscv_vse16_v_u16m1(out_buf + vl, __riscv_vget_v_u16m1x5_u16m1(data, 1), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 2, __riscv_vget_v_u16m1x5_u16m1(data, 2), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 3, __riscv_vget_v_u16m1x5_u16m1(data, 3), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 4, __riscv_vget_v_u16m1x5_u16m1(data, 4), + vl); +} + +// Unordered, segment 6 +__attribute__((used, retain)) void vluxseg6ei8_v_u16mf2x6() { + auto indices = __riscv_vle8_v_u8mf4(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vluxseg6ei8_v_u16mf2x6(in_buf, indices, vl); + __riscv_vse16_v_u16mf2(out_buf, __riscv_vget_v_u16mf2x6_u16mf2(data, 0), vl); + __riscv_vse16_v_u16mf2(out_buf + vl, __riscv_vget_v_u16mf2x6_u16mf2(data, 1), + vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 2, + __riscv_vget_v_u16mf2x6_u16mf2(data, 2), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 3, + __riscv_vget_v_u16mf2x6_u16mf2(data, 3), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 4, + __riscv_vget_v_u16mf2x6_u16mf2(data, 4), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 5, + __riscv_vget_v_u16mf2x6_u16mf2(data, 5), vl); +} + +__attribute__((used, retain)) void vluxseg6ei8_v_u16m1x6() { + auto indices = __riscv_vle8_v_u8mf2(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vluxseg6ei8_v_u16m1x6(in_buf, indices, vl); + __riscv_vse16_v_u16m1(out_buf, __riscv_vget_v_u16m1x6_u16m1(data, 0), vl); + __riscv_vse16_v_u16m1(out_buf + vl, __riscv_vget_v_u16m1x6_u16m1(data, 1), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 2, __riscv_vget_v_u16m1x6_u16m1(data, 2), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 3, __riscv_vget_v_u16m1x6_u16m1(data, 3), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 4, __riscv_vget_v_u16m1x6_u16m1(data, 4), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 5, __riscv_vget_v_u16m1x6_u16m1(data, 5), + vl); +} + +// Unordered, segment 7 +__attribute__((used, retain)) void vluxseg7ei8_v_u16mf2x7() { + auto indices = __riscv_vle8_v_u8mf4(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vluxseg7ei8_v_u16mf2x7(in_buf, indices, vl); + __riscv_vse16_v_u16mf2(out_buf, __riscv_vget_v_u16mf2x7_u16mf2(data, 0), vl); + __riscv_vse16_v_u16mf2(out_buf + vl, __riscv_vget_v_u16mf2x7_u16mf2(data, 1), + vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 2, + __riscv_vget_v_u16mf2x7_u16mf2(data, 2), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 3, + __riscv_vget_v_u16mf2x7_u16mf2(data, 3), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 4, + __riscv_vget_v_u16mf2x7_u16mf2(data, 4), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 5, + __riscv_vget_v_u16mf2x7_u16mf2(data, 5), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 6, + __riscv_vget_v_u16mf2x7_u16mf2(data, 6), vl); +} + +__attribute__((used, retain)) void vluxseg7ei8_v_u16m1x7() { + auto indices = __riscv_vle8_v_u8mf2(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vluxseg7ei8_v_u16m1x7(in_buf, indices, vl); + __riscv_vse16_v_u16m1(out_buf, __riscv_vget_v_u16m1x7_u16m1(data, 0), vl); + __riscv_vse16_v_u16m1(out_buf + vl, __riscv_vget_v_u16m1x7_u16m1(data, 1), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 2, __riscv_vget_v_u16m1x7_u16m1(data, 2), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 3, __riscv_vget_v_u16m1x7_u16m1(data, 3), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 4, __riscv_vget_v_u16m1x7_u16m1(data, 4), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 5, __riscv_vget_v_u16m1x7_u16m1(data, 5), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 6, __riscv_vget_v_u16m1x7_u16m1(data, 6), + vl); +} + +// Unordered, segment 8 +__attribute__((used, retain)) void vluxseg8ei8_v_u16mf2x8() { + auto indices = __riscv_vle8_v_u8mf4(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vluxseg8ei8_v_u16mf2x8(in_buf, indices, vl); + __riscv_vse16_v_u16mf2(out_buf, __riscv_vget_v_u16mf2x8_u16mf2(data, 0), vl); + __riscv_vse16_v_u16mf2(out_buf + vl, __riscv_vget_v_u16mf2x8_u16mf2(data, 1), + vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 2, + __riscv_vget_v_u16mf2x8_u16mf2(data, 2), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 3, + __riscv_vget_v_u16mf2x8_u16mf2(data, 3), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 4, + __riscv_vget_v_u16mf2x8_u16mf2(data, 4), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 5, + __riscv_vget_v_u16mf2x8_u16mf2(data, 5), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 6, + __riscv_vget_v_u16mf2x8_u16mf2(data, 6), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 7, + __riscv_vget_v_u16mf2x8_u16mf2(data, 7), vl); +} + +__attribute__((used, retain)) void vluxseg8ei8_v_u16m1x8() { + auto indices = __riscv_vle8_v_u8mf2(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vluxseg8ei8_v_u16m1x8(in_buf, indices, vl); + __riscv_vse16_v_u16m1(out_buf, __riscv_vget_v_u16m1x8_u16m1(data, 0), vl); + __riscv_vse16_v_u16m1(out_buf + vl, __riscv_vget_v_u16m1x8_u16m1(data, 1), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 2, __riscv_vget_v_u16m1x8_u16m1(data, 2), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 3, __riscv_vget_v_u16m1x8_u16m1(data, 3), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 4, __riscv_vget_v_u16m1x8_u16m1(data, 4), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 5, __riscv_vget_v_u16m1x8_u16m1(data, 5), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 6, __riscv_vget_v_u16m1x8_u16m1(data, 6), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 7, __riscv_vget_v_u16m1x8_u16m1(data, 7), + vl); +} + +// Ordered, segment 2 +__attribute__((used, retain)) void vloxseg2ei8_v_u16mf2x2() { + auto indices = __riscv_vle8_v_u8mf4(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vloxseg2ei8_v_u16mf2x2(in_buf, indices, vl); + __riscv_vse16_v_u16mf2(out_buf, __riscv_vget_v_u16mf2x2_u16mf2(data, 0), vl); + __riscv_vse16_v_u16mf2(out_buf + vl, __riscv_vget_v_u16mf2x2_u16mf2(data, 1), + vl); +} + +__attribute__((used, retain)) void vloxseg2ei8_v_u16m1x2() { + auto indices = __riscv_vle8_v_u8mf2(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vloxseg2ei8_v_u16m1x2(in_buf, indices, vl); + __riscv_vse16_v_u16m1(out_buf, __riscv_vget_v_u16m1x2_u16m1(data, 0), vl); + __riscv_vse16_v_u16m1(out_buf + vl, __riscv_vget_v_u16m1x2_u16m1(data, 1), + vl); +} + +__attribute__((used, retain)) void vloxseg2ei8_v_u16m2x2() { + auto indices = __riscv_vle8_v_u8m1(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vloxseg2ei8_v_u16m2x2(in_buf, indices, vl); + __riscv_vse16_v_u16m2(out_buf, __riscv_vget_v_u16m2x2_u16m2(data, 0), vl); + __riscv_vse16_v_u16m2(out_buf + vl, __riscv_vget_v_u16m2x2_u16m2(data, 1), + vl); +} + +__attribute__((used, retain)) void vloxseg2ei8_v_u16m4x2() { + auto indices = __riscv_vle8_v_u8m2(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vloxseg2ei8_v_u16m4x2(in_buf, indices, vl); + __riscv_vse16_v_u16m4(out_buf, __riscv_vget_v_u16m4x2_u16m4(data, 0), vl); + __riscv_vse16_v_u16m4(out_buf + vl, __riscv_vget_v_u16m4x2_u16m4(data, 1), + vl); +} + +// // Ordered, segment 3 +__attribute__((used, retain)) void vloxseg3ei8_v_u16mf2x3() { + auto indices = __riscv_vle8_v_u8mf4(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vloxseg3ei8_v_u16mf2x3(in_buf, indices, vl); + __riscv_vse16_v_u16mf2(out_buf, __riscv_vget_v_u16mf2x3_u16mf2(data, 0), vl); + __riscv_vse16_v_u16mf2(out_buf + vl, __riscv_vget_v_u16mf2x3_u16mf2(data, 1), + vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 2, + __riscv_vget_v_u16mf2x3_u16mf2(data, 2), vl); +} + +__attribute__((used, retain)) void vloxseg3ei8_v_u16m1x3() { + auto indices = __riscv_vle8_v_u8mf2(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vloxseg3ei8_v_u16m1x3(in_buf, indices, vl); + __riscv_vse16_v_u16m1(out_buf, __riscv_vget_v_u16m1x3_u16m1(data, 0), vl); + __riscv_vse16_v_u16m1(out_buf + vl, __riscv_vget_v_u16m1x3_u16m1(data, 1), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 2, __riscv_vget_v_u16m1x3_u16m1(data, 2), + vl); +} + +__attribute__((used, retain)) void vloxseg3ei8_v_u16m2x3() { + auto indices = __riscv_vle8_v_u8m1(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vloxseg3ei8_v_u16m2x3(in_buf, indices, vl); + __riscv_vse16_v_u16m2(out_buf, __riscv_vget_v_u16m2x3_u16m2(data, 0), vl); + __riscv_vse16_v_u16m2(out_buf + vl, __riscv_vget_v_u16m2x3_u16m2(data, 1), + vl); + __riscv_vse16_v_u16m2(out_buf + vl * 2, __riscv_vget_v_u16m2x3_u16m2(data, 2), + vl); +} + +// Ordered, segment 4 +__attribute__((used, retain)) void vloxseg4ei8_v_u16mf2x4() { + auto indices = __riscv_vle8_v_u8mf4(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vloxseg4ei8_v_u16mf2x4(in_buf, indices, vl); + __riscv_vse16_v_u16mf2(out_buf, __riscv_vget_v_u16mf2x4_u16mf2(data, 0), vl); + __riscv_vse16_v_u16mf2(out_buf + vl, __riscv_vget_v_u16mf2x4_u16mf2(data, 1), + vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 2, + __riscv_vget_v_u16mf2x4_u16mf2(data, 2), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 3, + __riscv_vget_v_u16mf2x4_u16mf2(data, 3), vl); +} + +__attribute__((used, retain)) void vloxseg4ei8_v_u16m1x4() { + auto indices = __riscv_vle8_v_u8mf2(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vloxseg4ei8_v_u16m1x4(in_buf, indices, vl); + __riscv_vse16_v_u16m1(out_buf, __riscv_vget_v_u16m1x4_u16m1(data, 0), vl); + __riscv_vse16_v_u16m1(out_buf + vl, __riscv_vget_v_u16m1x4_u16m1(data, 1), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 2, __riscv_vget_v_u16m1x4_u16m1(data, 2), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 3, __riscv_vget_v_u16m1x4_u16m1(data, 3), + vl); +} + +__attribute__((used, retain)) void vloxseg4ei8_v_u16m2x4() { + auto indices = __riscv_vle8_v_u8m1(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vloxseg4ei8_v_u16m2x4(in_buf, indices, vl); + __riscv_vse16_v_u16m2(out_buf, __riscv_vget_v_u16m2x4_u16m2(data, 0), vl); + __riscv_vse16_v_u16m2(out_buf + vl, __riscv_vget_v_u16m2x4_u16m2(data, 1), + vl); + __riscv_vse16_v_u16m2(out_buf + vl * 2, __riscv_vget_v_u16m2x4_u16m2(data, 2), + vl); + __riscv_vse16_v_u16m2(out_buf + vl * 3, __riscv_vget_v_u16m2x4_u16m2(data, 3), + vl); +} + +// Ordered, segment 5 +__attribute__((used, retain)) void vloxseg5ei8_v_u16mf2x5() { + auto indices = __riscv_vle8_v_u8mf4(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vloxseg5ei8_v_u16mf2x5(in_buf, indices, vl); + __riscv_vse16_v_u16mf2(out_buf, __riscv_vget_v_u16mf2x5_u16mf2(data, 0), vl); + __riscv_vse16_v_u16mf2(out_buf + vl, __riscv_vget_v_u16mf2x5_u16mf2(data, 1), + vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 2, + __riscv_vget_v_u16mf2x5_u16mf2(data, 2), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 3, + __riscv_vget_v_u16mf2x5_u16mf2(data, 3), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 4, + __riscv_vget_v_u16mf2x5_u16mf2(data, 4), vl); +} + +__attribute__((used, retain)) void vloxseg5ei8_v_u16m1x5() { + auto indices = __riscv_vle8_v_u8mf2(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vloxseg5ei8_v_u16m1x5(in_buf, indices, vl); + __riscv_vse16_v_u16m1(out_buf, __riscv_vget_v_u16m1x5_u16m1(data, 0), vl); + __riscv_vse16_v_u16m1(out_buf + vl, __riscv_vget_v_u16m1x5_u16m1(data, 1), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 2, __riscv_vget_v_u16m1x5_u16m1(data, 2), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 3, __riscv_vget_v_u16m1x5_u16m1(data, 3), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 4, __riscv_vget_v_u16m1x5_u16m1(data, 4), + vl); +} + +// Ordered, segment 6 +__attribute__((used, retain)) void vloxseg6ei8_v_u16mf2x6() { + auto indices = __riscv_vle8_v_u8mf4(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vloxseg6ei8_v_u16mf2x6(in_buf, indices, vl); + __riscv_vse16_v_u16mf2(out_buf, __riscv_vget_v_u16mf2x6_u16mf2(data, 0), vl); + __riscv_vse16_v_u16mf2(out_buf + vl, __riscv_vget_v_u16mf2x6_u16mf2(data, 1), + vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 2, + __riscv_vget_v_u16mf2x6_u16mf2(data, 2), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 3, + __riscv_vget_v_u16mf2x6_u16mf2(data, 3), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 4, + __riscv_vget_v_u16mf2x6_u16mf2(data, 4), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 5, + __riscv_vget_v_u16mf2x6_u16mf2(data, 5), vl); +} + +__attribute__((used, retain)) void vloxseg6ei8_v_u16m1x6() { + auto indices = __riscv_vle8_v_u8mf2(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vloxseg6ei8_v_u16m1x6(in_buf, indices, vl); + __riscv_vse16_v_u16m1(out_buf, __riscv_vget_v_u16m1x6_u16m1(data, 0), vl); + __riscv_vse16_v_u16m1(out_buf + vl, __riscv_vget_v_u16m1x6_u16m1(data, 1), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 2, __riscv_vget_v_u16m1x6_u16m1(data, 2), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 3, __riscv_vget_v_u16m1x6_u16m1(data, 3), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 4, __riscv_vget_v_u16m1x6_u16m1(data, 4), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 5, __riscv_vget_v_u16m1x6_u16m1(data, 5), + vl); +} + +// Ordered, segment 7 +__attribute__((used, retain)) void vloxseg7ei8_v_u16mf2x7() { + auto indices = __riscv_vle8_v_u8mf4(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vloxseg7ei8_v_u16mf2x7(in_buf, indices, vl); + __riscv_vse16_v_u16mf2(out_buf, __riscv_vget_v_u16mf2x7_u16mf2(data, 0), vl); + __riscv_vse16_v_u16mf2(out_buf + vl, __riscv_vget_v_u16mf2x7_u16mf2(data, 1), + vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 2, + __riscv_vget_v_u16mf2x7_u16mf2(data, 2), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 3, + __riscv_vget_v_u16mf2x7_u16mf2(data, 3), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 4, + __riscv_vget_v_u16mf2x7_u16mf2(data, 4), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 5, + __riscv_vget_v_u16mf2x7_u16mf2(data, 5), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 6, + __riscv_vget_v_u16mf2x7_u16mf2(data, 6), vl); +} + +__attribute__((used, retain)) void vloxseg7ei8_v_u16m1x7() { + auto indices = __riscv_vle8_v_u8mf2(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vloxseg7ei8_v_u16m1x7(in_buf, indices, vl); + __riscv_vse16_v_u16m1(out_buf, __riscv_vget_v_u16m1x7_u16m1(data, 0), vl); + __riscv_vse16_v_u16m1(out_buf + vl, __riscv_vget_v_u16m1x7_u16m1(data, 1), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 2, __riscv_vget_v_u16m1x7_u16m1(data, 2), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 3, __riscv_vget_v_u16m1x7_u16m1(data, 3), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 4, __riscv_vget_v_u16m1x7_u16m1(data, 4), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 5, __riscv_vget_v_u16m1x7_u16m1(data, 5), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 6, __riscv_vget_v_u16m1x7_u16m1(data, 6), + vl); +} + +// Ordered, segment 8 +__attribute__((used, retain)) void vloxseg8ei8_v_u16mf2x8() { + auto indices = __riscv_vle8_v_u8mf4(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vloxseg8ei8_v_u16mf2x8(in_buf, indices, vl); + __riscv_vse16_v_u16mf2(out_buf, __riscv_vget_v_u16mf2x8_u16mf2(data, 0), vl); + __riscv_vse16_v_u16mf2(out_buf + vl, __riscv_vget_v_u16mf2x8_u16mf2(data, 1), + vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 2, + __riscv_vget_v_u16mf2x8_u16mf2(data, 2), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 3, + __riscv_vget_v_u16mf2x8_u16mf2(data, 3), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 4, + __riscv_vget_v_u16mf2x8_u16mf2(data, 4), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 5, + __riscv_vget_v_u16mf2x8_u16mf2(data, 5), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 6, + __riscv_vget_v_u16mf2x8_u16mf2(data, 6), vl); + __riscv_vse16_v_u16mf2(out_buf + vl * 7, + __riscv_vget_v_u16mf2x8_u16mf2(data, 7), vl); +} + +__attribute__((used, retain)) void vloxseg8ei8_v_u16m1x8() { + auto indices = __riscv_vle8_v_u8mf2(index_buf, vl); + // TODO(davidgao): Remove once compiler bug is eliminated + asm volatile("" ::: "vtype"); + auto data = __riscv_vloxseg8ei8_v_u16m1x8(in_buf, indices, vl); + __riscv_vse16_v_u16m1(out_buf, __riscv_vget_v_u16m1x8_u16m1(data, 0), vl); + __riscv_vse16_v_u16m1(out_buf + vl, __riscv_vget_v_u16m1x8_u16m1(data, 1), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 2, __riscv_vget_v_u16m1x8_u16m1(data, 2), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 3, __riscv_vget_v_u16m1x8_u16m1(data, 3), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 4, __riscv_vget_v_u16m1x8_u16m1(data, 4), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 5, __riscv_vget_v_u16m1x8_u16m1(data, 5), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 6, __riscv_vget_v_u16m1x8_u16m1(data, 6), + vl); + __riscv_vse16_v_u16m1(out_buf + vl * 7, __riscv_vget_v_u16m1x8_u16m1(data, 7), + vl); +} +} + +void (*impl)() __attribute__((section(".data"))) = &vluxseg2ei8_v_u16m1x2; + +int main(int argc, char** argv) { + impl(); + return 0; +}
diff --git a/tests/cocotb/rvv_load_store_test.py b/tests/cocotb/rvv_load_store_test.py index b6f3b86..bc79775 100644 --- a/tests/cocotb/rvv_load_store_test.py +++ b/tests/cocotb/rvv_load_store_test.py
@@ -948,7 +948,7 @@ make_test_case('vluxseg3ei16_v_u8m1x3', vl=15, n_segs=3), # make_test_case('vluxseg3ei16_v_u8m2x3', vl=32, n_segs=3), # make_test_case('vluxseg3ei16_v_u8m2x3', vl=31, n_segs=3), - # # Unordered, segment 4 + # Unordered, segment 4 make_test_case('vluxseg4ei16_v_u8mf4x4', vl=4, n_segs=4), make_test_case('vluxseg4ei16_v_u8mf4x4', vl=3, n_segs=4), make_test_case('vluxseg4ei16_v_u8mf2x4', vl=8, n_segs=4), @@ -957,35 +957,35 @@ make_test_case('vluxseg4ei16_v_u8m1x4', vl=15, n_segs=4), # make_test_case('vluxseg4ei16_v_u8m2x4', vl=32, n_segs=4), # make_test_case('vluxseg4ei16_v_u8m2x4', vl=31, n_segs=4), - # # Unordered, segment 5 + # Unordered, segment 5 make_test_case('vluxseg5ei16_v_u8mf4x5', vl=4, n_segs=5), make_test_case('vluxseg5ei16_v_u8mf4x5', vl=3, n_segs=5), make_test_case('vluxseg5ei16_v_u8mf2x5', vl=8, n_segs=5), make_test_case('vluxseg5ei16_v_u8mf2x5', vl=7, n_segs=5), # make_test_case('vluxseg5ei16_v_u8m1x5', vl=16, n_segs=5), # make_test_case('vluxseg5ei16_v_u8m1x5', vl=15, n_segs=5), - # # Unordered, segment 6 + # Unordered, segment 6 make_test_case('vluxseg6ei16_v_u8mf4x6', vl=4, n_segs=6), make_test_case('vluxseg6ei16_v_u8mf4x6', vl=3, n_segs=6), make_test_case('vluxseg6ei16_v_u8mf2x6', vl=8, n_segs=6), make_test_case('vluxseg6ei16_v_u8mf2x6', vl=7, n_segs=6), # make_test_case('vluxseg6ei16_v_u8m1x6', vl=16, n_segs=6), # make_test_case('vluxseg6ei16_v_u8m1x6', vl=15, n_segs=6), - # # Unordered, segment 7 + # Unordered, segment 7 make_test_case('vluxseg7ei16_v_u8mf4x7', vl=4, n_segs=7), make_test_case('vluxseg7ei16_v_u8mf4x7', vl=3, n_segs=7), make_test_case('vluxseg7ei16_v_u8mf2x7', vl=8, n_segs=7), make_test_case('vluxseg7ei16_v_u8mf2x7', vl=7, n_segs=7), # make_test_case('vluxseg7ei16_v_u8m1x7', vl=16, n_segs=7), # make_test_case('vluxseg7ei16_v_u8m1x7', vl=15, n_segs=7), - # # Unordered, segment 8 + # Unordered, segment 8 make_test_case('vluxseg8ei16_v_u8mf4x8', vl=4, n_segs=8), make_test_case('vluxseg8ei16_v_u8mf4x8', vl=3, n_segs=8), make_test_case('vluxseg8ei16_v_u8mf2x8', vl=8, n_segs=8), make_test_case('vluxseg8ei16_v_u8mf2x8', vl=7, n_segs=8), # make_test_case('vluxseg8ei16_v_u8m1x8', vl=16, n_segs=8), # make_test_case('vluxseg8ei16_v_u8m1x8', vl=15, n_segs=8), - # # Ordered, segment 2 + # Ordered, segment 2 make_test_case('vloxseg2ei16_v_u8mf4x2', vl=4, n_segs=2), make_test_case('vloxseg2ei16_v_u8mf4x2', vl=3, n_segs=2), make_test_case('vloxseg2ei16_v_u8mf2x2', vl=8, n_segs=2), @@ -996,7 +996,7 @@ make_test_case('vloxseg2ei16_v_u8m2x2', vl=31, n_segs=2), # make_test_case('vloxseg2ei16_v_u8m4x2', vl=64, n_segs=2), # make_test_case('vloxseg2ei16_v_u8m4x2', vl=63, n_segs=2), - # # Ordered, segment 3 + # Ordered, segment 3 make_test_case('vloxseg3ei16_v_u8mf4x3', vl=4, n_segs=3), make_test_case('vloxseg3ei16_v_u8mf4x3', vl=3, n_segs=3), make_test_case('vloxseg3ei16_v_u8mf2x3', vl=8, n_segs=3), @@ -1005,7 +1005,7 @@ make_test_case('vloxseg3ei16_v_u8m1x3', vl=15, n_segs=3), # make_test_case('vloxseg3ei16_v_u8m2x3', vl=32, n_segs=3), # make_test_case('vloxseg3ei16_v_u8m2x3', vl=31, n_segs=3), - # # Ordered, segment 4 + # Ordered, segment 4 make_test_case('vloxseg4ei16_v_u8mf4x4', vl=4, n_segs=4), make_test_case('vloxseg4ei16_v_u8mf4x4', vl=3, n_segs=4), make_test_case('vloxseg4ei16_v_u8mf2x4', vl=8, n_segs=4), @@ -1014,28 +1014,28 @@ make_test_case('vloxseg4ei16_v_u8m1x4', vl=15, n_segs=4), # make_test_case('vloxseg4ei16_v_u8m2x4', vl=32, n_segs=4), # make_test_case('vloxseg4ei16_v_u8m2x4', vl=31, n_segs=4), - # # Ordered, segment 5 + # Ordered, segment 5 make_test_case('vloxseg5ei16_v_u8mf4x5', vl=4, n_segs=5), make_test_case('vloxseg5ei16_v_u8mf4x5', vl=3, n_segs=5), make_test_case('vloxseg5ei16_v_u8mf2x5', vl=8, n_segs=5), make_test_case('vloxseg5ei16_v_u8mf2x5', vl=7, n_segs=5), # make_test_case('vloxseg5ei16_v_u8m1x5', vl=16, n_segs=5), # make_test_case('vloxseg5ei16_v_u8m1x5', vl=15, n_segs=5), - # # Ordered, segment 6 + # Ordered, segment 6 make_test_case('vloxseg6ei16_v_u8mf4x6', vl=4, n_segs=6), make_test_case('vloxseg6ei16_v_u8mf4x6', vl=3, n_segs=6), make_test_case('vloxseg6ei16_v_u8mf2x6', vl=8, n_segs=6), make_test_case('vloxseg6ei16_v_u8mf2x6', vl=7, n_segs=6), # make_test_case('vloxseg6ei16_v_u8m1x6', vl=16, n_segs=6), # make_test_case('vloxseg6ei16_v_u8m1x6', vl=15, n_segs=6), - # # Ordered, segment 7 + # Ordered, segment 7 make_test_case('vloxseg7ei16_v_u8mf4x7', vl=4, n_segs=7), make_test_case('vloxseg7ei16_v_u8mf4x7', vl=3, n_segs=7), make_test_case('vloxseg7ei16_v_u8mf2x7', vl=8, n_segs=7), make_test_case('vloxseg7ei16_v_u8mf2x7', vl=7, n_segs=7), # make_test_case('vloxseg7ei16_v_u8m1x7', vl=16, n_segs=7), # make_test_case('vloxseg7ei16_v_u8m1x7', vl=15, n_segs=7), - # # Ordered, segment 8 + # Ordered, segment 8 make_test_case('vloxseg8ei16_v_u8mf4x8', vl=4, n_segs=8), make_test_case('vloxseg8ei16_v_u8mf4x8', vl=3, n_segs=8), make_test_case('vloxseg8ei16_v_u8mf2x8', vl=8, n_segs=8), @@ -1211,6 +1211,114 @@ @cocotb.test() +async def load16_index8_seg(dut): + """Test vl*xseg*ei8_v_u16 usage accessible from intrinsics.""" + def make_test_case(impl: str, vl: int, n_segs: int): + return { + 'impl': impl, + 'vl': vl, + 'segments': n_segs, + 'in_bytes': 271, + 'out_size': vl * n_segs * 2, + } + + await vector_load_segmented_indexed( + dut = dut, + elf_name = 'load16_index8_seg.elf', + cases = [ + # Unordered, segment 2 + make_test_case('vluxseg2ei8_v_u16mf2x2', vl=4, n_segs=2), + make_test_case('vluxseg2ei8_v_u16mf2x2', vl=3, n_segs=2), + make_test_case('vluxseg2ei8_v_u16m1x2', vl=8, n_segs=2), + make_test_case('vluxseg2ei8_v_u16m1x2', vl=7, n_segs=2), + make_test_case('vluxseg2ei8_v_u16m2x2', vl=16, n_segs=2), + make_test_case('vluxseg2ei8_v_u16m2x2', vl=15, n_segs=2), + make_test_case('vluxseg2ei8_v_u16m4x2', vl=32, n_segs=2), + make_test_case('vluxseg2ei8_v_u16m4x2', vl=31, n_segs=2), + # Unordered, segment 3 + make_test_case('vluxseg3ei8_v_u16mf2x3', vl=4, n_segs=3), + make_test_case('vluxseg3ei8_v_u16mf2x3', vl=5, n_segs=3), + make_test_case('vluxseg3ei8_v_u16m1x3', vl=7, n_segs=3), + make_test_case('vluxseg3ei8_v_u16m1x3', vl=8, n_segs=3), + make_test_case('vluxseg3ei8_v_u16m2x3', vl=16, n_segs=3), + make_test_case('vluxseg3ei8_v_u16m2x3', vl=15, n_segs=3), + # Unordered, segment 4 + make_test_case('vluxseg4ei8_v_u16mf2x4', vl=4, n_segs=4), + make_test_case('vluxseg4ei8_v_u16mf2x4', vl=5, n_segs=4), + make_test_case('vluxseg4ei8_v_u16m1x4', vl=7, n_segs=4), + make_test_case('vluxseg4ei8_v_u16m1x4', vl=8, n_segs=4), + make_test_case('vluxseg4ei8_v_u16m2x4', vl=16, n_segs=4), + make_test_case('vluxseg4ei8_v_u16m2x4', vl=15, n_segs=4), + # Unordered, segment 5 + make_test_case('vluxseg5ei8_v_u16mf2x5', vl=4, n_segs=5), + make_test_case('vluxseg5ei8_v_u16mf2x5', vl=5, n_segs=5), + make_test_case('vluxseg5ei8_v_u16m1x5', vl=7, n_segs=5), + make_test_case('vluxseg5ei8_v_u16m1x5', vl=8, n_segs=5), + # Unordered, segment 6 + make_test_case('vluxseg6ei8_v_u16mf2x6', vl=4, n_segs=6), + make_test_case('vluxseg6ei8_v_u16mf2x6', vl=5, n_segs=6), + make_test_case('vluxseg6ei8_v_u16m1x6', vl=7, n_segs=6), + make_test_case('vluxseg6ei8_v_u16m1x6', vl=8, n_segs=6), + # Unordered, segment 7 + make_test_case('vluxseg7ei8_v_u16mf2x7', vl=4, n_segs=7), + make_test_case('vluxseg7ei8_v_u16mf2x7', vl=5, n_segs=7), + make_test_case('vluxseg7ei8_v_u16m1x7', vl=7, n_segs=7), + make_test_case('vluxseg7ei8_v_u16m1x7', vl=8, n_segs=7), + # Unordered, segment 8 + make_test_case('vluxseg8ei8_v_u16mf2x8', vl=4, n_segs=8), + make_test_case('vluxseg8ei8_v_u16mf2x8', vl=5, n_segs=8), + make_test_case('vluxseg8ei8_v_u16m1x8', vl=7, n_segs=8), + make_test_case('vluxseg8ei8_v_u16m1x8', vl=8, n_segs=8), + # Ordered, segment 2 + make_test_case('vloxseg2ei8_v_u16mf2x2', vl=4, n_segs=2), + make_test_case('vloxseg2ei8_v_u16mf2x2', vl=5, n_segs=2), + make_test_case('vloxseg2ei8_v_u16m1x2', vl=7, n_segs=2), + make_test_case('vloxseg2ei8_v_u16m1x2', vl=8, n_segs=2), + make_test_case('vloxseg2ei8_v_u16m2x2', vl=16, n_segs=2), + make_test_case('vloxseg2ei8_v_u16m2x2', vl=15, n_segs=2), + make_test_case('vloxseg2ei8_v_u16m4x2', vl=32, n_segs=2), + make_test_case('vloxseg2ei8_v_u16m4x2', vl=31, n_segs=2), + # Ordered, segment 3 + make_test_case('vloxseg3ei8_v_u16mf2x3', vl=4, n_segs=3), + make_test_case('vloxseg3ei8_v_u16mf2x3', vl=5, n_segs=3), + make_test_case('vloxseg3ei8_v_u16m1x3', vl=7, n_segs=3), + make_test_case('vloxseg3ei8_v_u16m1x3', vl=8, n_segs=3), + make_test_case('vloxseg3ei8_v_u16m2x3', vl=16, n_segs=3), + make_test_case('vloxseg3ei8_v_u16m2x3', vl=15, n_segs=3), + # Ordered, segment 4 + make_test_case('vloxseg4ei8_v_u16mf2x4', vl=4, n_segs=4), + make_test_case('vloxseg4ei8_v_u16mf2x4', vl=5, n_segs=4), + make_test_case('vloxseg4ei8_v_u16m1x4', vl=7, n_segs=4), + make_test_case('vloxseg4ei8_v_u16m1x4', vl=8, n_segs=4), + make_test_case('vloxseg4ei8_v_u16m2x4', vl=16, n_segs=4), + make_test_case('vloxseg4ei8_v_u16m2x4', vl=15, n_segs=4), + # Ordered, segment 5 + make_test_case('vloxseg5ei8_v_u16mf2x5', vl=4, n_segs=5), + make_test_case('vloxseg5ei8_v_u16mf2x5', vl=5, n_segs=5), + make_test_case('vloxseg5ei8_v_u16m1x5', vl=7, n_segs=5), + make_test_case('vloxseg5ei8_v_u16m1x5', vl=8, n_segs=5), + # Ordered, segment 6 + make_test_case('vloxseg6ei8_v_u16mf2x6', vl=4, n_segs=6), + make_test_case('vloxseg6ei8_v_u16mf2x6', vl=5, n_segs=6), + make_test_case('vloxseg6ei8_v_u16m1x6', vl=7, n_segs=6), + make_test_case('vloxseg6ei8_v_u16m1x6', vl=8, n_segs=6), + # Ordered, segment 7 + make_test_case('vloxseg7ei8_v_u16mf2x7', vl=4, n_segs=7), + make_test_case('vloxseg7ei8_v_u16mf2x7', vl=5, n_segs=7), + make_test_case('vloxseg7ei8_v_u16m1x7', vl=7, n_segs=7), + make_test_case('vloxseg7ei8_v_u16m1x7', vl=8, n_segs=7), + # Ordered, segment 8 + make_test_case('vloxseg8ei8_v_u16mf2x8', vl=4, n_segs=8), + make_test_case('vloxseg8ei8_v_u16mf2x8', vl=5, n_segs=8), + make_test_case('vloxseg8ei8_v_u16m1x8', vl=7, n_segs=8), + make_test_case('vloxseg8ei8_v_u16m1x8', vl=8, n_segs=8), + ], + dtype = np.uint16, + index_dtype = np.uint8, + ) + + +@cocotb.test() async def load16_index16_seg(dut): """Test vl*xseg*ei16_v_u16 usage accessible from intrinsics.""" def make_test_case(impl: str, vl: int, n_segs: int):