Add indexed load/store support.
Change-Id: I684024c6f41cab613c4cf8003d8ad548c8a26bc7
diff --git a/hdl/chisel/src/kelvin/scalar/Lsu.scala b/hdl/chisel/src/kelvin/scalar/Lsu.scala
index 60ae397..e034011 100644
--- a/hdl/chisel/src/kelvin/scalar/Lsu.scala
+++ b/hdl/chisel/src/kelvin/scalar/Lsu.scala
@@ -187,6 +187,30 @@
}
}
+object ComputeIndexedAddrs {
+ def apply(bytesPerSlot: Int,
+ baseAddr: UInt,
+ indices: UInt,
+ elemWidth: UInt): Vec[UInt] = {
+ val indices8 = UIntToVec(indices, 8)
+ val indices16 = UIntToVec(indices, 16)
+ val indices32 = UIntToVec(indices, 32)
+
+ MuxCase(VecInit.fill(bytesPerSlot)(0.U(32.W)), Seq(
+ // elemWidth validation is done at decode time.
+ // 8-bit indices. Each byte has its own offset.
+ (elemWidth === "b000".U) -> VecInit((0 until bytesPerSlot).map(
+ i => (baseAddr + indices8(i))(31, 0))),
+ // 16-bit indices. Each 2-byte element has an offset.
+ (elemWidth === "b101".U) -> VecInit((0 until bytesPerSlot).map(
+ i => (baseAddr + indices16(i >> 1))(31, 0) + (i & 1).U)),
+ // 32-bit indices. Each 4-byte element has an offset.
+ (elemWidth === "b110".U) -> VecInit((0 until bytesPerSlot).map(
+ i => (baseAddr + indices32(i >> 2))(31, 0) + (i & 3).U))
+ ))
+ }
+}
+
// bytesPerSlot is the number of bytes in a vector register
// bytesPerLine is the number of bytes in the AXI bus
class LsuSlot(bytesPerSlot: Int, bytesPerLine: Int) extends Bundle {
@@ -253,8 +277,11 @@
op.isOneOf(LsuOp.VLOAD_UNIT, LsuOp.VSTORE_UNIT) ->
VecInit((0 until bytesPerSlot).map(i => baseAddr + i.U)),
op.isOneOf(LsuOp.VLOAD_STRIDED, LsuOp.VSTORE_STRIDED) ->
- ComputeStridedAddrs(bytesPerSlot, baseAddr, stride, elemWidth)
- // TODO(derekjchow): Support indexed
+ ComputeStridedAddrs(bytesPerSlot, baseAddr, stride, elemWidth),
+ op.isOneOf(LsuOp.VLOAD_OINDEXED, LsuOp.VLOAD_UINDEXED,
+ LsuOp.VSTORE_OINDEXED, LsuOp.VSTORE_UINDEXED) ->
+ ComputeIndexedAddrs(bytesPerSlot, baseAddr, rvv2lsu.idx.bits.data,
+ elemWidth)
// TODO(derekjchow): Support segmented
))
result.elemWidth := elemWidth
diff --git a/tests/cocotb/BUILD b/tests/cocotb/BUILD
index 989df39..d274166 100644
--- a/tests/cocotb/BUILD
+++ b/tests/cocotb/BUILD
@@ -315,6 +315,7 @@
"//kelvin_test_utils:sim_test_fixture",
],
data = [
+ '//tests/cocotb/rvv/load_store:load8_indexed_m1.elf',
'//tests/cocotb/rvv/load_store:load8_stride2_m1.elf',
'//tests/cocotb/rvv/load_store:load8_stride2_m1_partial.elf',
'//tests/cocotb/rvv/load_store:load8_stride2_mf4.elf',
@@ -324,6 +325,7 @@
'//tests/cocotb/rvv/load_store:load16_stride4_mf2.elf',
'//tests/cocotb/rvv/load_store:load32_stride8_m1.elf',
'//tests/cocotb/rvv/load_store:load32_stride8_m1_partial.elf',
+ '//tests/cocotb/rvv/load_store:store8_indexed_m1.elf',
],
size = "large",
)
diff --git a/tests/cocotb/rvv/load_store/BUILD b/tests/cocotb/rvv/load_store/BUILD
index f6c3aad..d19281c 100644
--- a/tests/cocotb/rvv/load_store/BUILD
+++ b/tests/cocotb/rvv/load_store/BUILD
@@ -20,6 +20,9 @@
template_rule(
kelvin_v2_binary,
{
+ "load8_indexed_m1": {
+ "srcs": ["load8_indexed_m1.cc"],
+ },
"load8_stride2_m1": {
"srcs": ["load8_stride2_m1.cc"],
},
@@ -47,5 +50,8 @@
"load32_stride8_m1_partial": {
"srcs": ["load32_stride8_m1_partial.cc"],
},
+ "store8_indexed_m1": {
+ "srcs": ["store8_indexed_m1.cc"],
+ },
},
)
diff --git a/tests/cocotb/rvv/load_store/load8_indexed_m1.cc b/tests/cocotb/rvv/load_store/load8_indexed_m1.cc
new file mode 100644
index 0000000..8c6c9a7
--- /dev/null
+++ b/tests/cocotb/rvv/load_store/load8_indexed_m1.cc
@@ -0,0 +1,28 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+uint8_t input_indices[128] __attribute__((section(".data")));
+uint8_t input_data[4096] __attribute__((section(".data")));
+uint8_t output_data[128] __attribute__((section(".data")));
+
+int main(int argc, char **argv) {
+ vuint8m1_t indices = __riscv_vle8_v_u8m1(input_indices, /*vl=*/16);
+ vuint8m1_t data = __riscv_vloxei8_v_u8m1(input_data, indices, /*vl=*/16);
+ __riscv_vse8_v_u8m1(output_data, data, /*vl=*/16);
+
+ return 0;
+}
diff --git a/tests/cocotb/rvv/load_store/store8_indexed_m1.cc b/tests/cocotb/rvv/load_store/store8_indexed_m1.cc
new file mode 100644
index 0000000..221ff76
--- /dev/null
+++ b/tests/cocotb/rvv/load_store/store8_indexed_m1.cc
@@ -0,0 +1,28 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <riscv_vector.h>
+#include <stdint.h>
+
+uint8_t input_indices[128] __attribute__((section(".data")));
+uint8_t input_data[128] __attribute__((section(".data")));
+uint8_t output_data[4096] __attribute__((section(".data")));
+
+int main(int argc, char **argv) {
+ vuint8m1_t indices = __riscv_vle8_v_u8m1(input_indices, /*vl=*/16);
+ vuint8m1_t data = __riscv_vle8_v_u8m1(input_data, /*vl=*/16);
+ __riscv_vsoxei8_v_u8m1(output_data, indices, data, /*vl=*/16);
+
+ return 0;
+}
diff --git a/tests/cocotb/rvv_load_store_test.py b/tests/cocotb/rvv_load_store_test.py
index 6e4b481..640d1f2 100644
--- a/tests/cocotb/rvv_load_store_test.py
+++ b/tests/cocotb/rvv_load_store_test.py
@@ -59,6 +59,88 @@
assert (actual_outputs == expected_outputs).all(), debug_msg
+async def vector_load_indexed(
+ dut,
+ elf_name: str,
+ dtype,
+):
+ """RVV load-store test template for indexed loads.
+
+ Each test performs a gather operation and writes the result to an output.
+ """
+ fixture = await Fixture.Create(dut)
+ await fixture.load_elf_and_lookup_symbols(
+ '../tests/cocotb/rvv/load_store/' + elf_name,
+ ['input_indices', 'input_data', 'output_data'],
+ )
+
+ indices_count = 16 // np.dtype(dtype).itemsize
+ in_data_count = 4096 // np.dtype(dtype).itemsize
+ out_data_count = 16 // np.dtype(dtype).itemsize
+
+ min_value = np.iinfo(dtype).min
+ max_value = np.iinfo(dtype).max + 1 # One above.
+ rng = np.random.default_rng()
+ input_data = rng.integers(min_value, max_value, in_data_count, dtype=dtype)
+ input_indices = rng.integers(
+ 0, min(max_value, in_data_count-1), indices_count, dtype=dtype)
+
+ expected_outputs = np.take(input_data, input_indices)
+
+ await fixture.write('input_data', input_data)
+ await fixture.write('input_indices', input_indices)
+ await fixture.write('output_data', np.zeros([out_data_count], dtype=dtype))
+
+ await fixture.run_to_halt()
+
+ actual_outputs = (await fixture.read(
+ 'output_data', out_data_count * np.dtype(dtype).itemsize)).view(dtype)
+
+ assert (actual_outputs == expected_outputs).all()
+
+async def vector_store_indexed(
+ dut,
+ elf_name: str,
+ dtype,
+):
+ """RVV load-store test template for indexed stores.
+
+ Each test loads indices and data and performs a scatter operation.
+ """
+ fixture = await Fixture.Create(dut)
+ await fixture.load_elf_and_lookup_symbols(
+ '../tests/cocotb/rvv/load_store/' + elf_name,
+ ['input_indices', 'input_data', 'output_data'],
+ )
+
+ indices_count = 16 // np.dtype(dtype).itemsize
+ in_data_count = 16 // np.dtype(dtype).itemsize
+ out_data_count = 4096 // np.dtype(dtype).itemsize
+
+ min_value = np.iinfo(dtype).min
+ max_value = np.iinfo(dtype).max + 1 # One above.
+ rng = np.random.default_rng()
+ input_data = rng.integers(min_value, max_value, in_data_count, dtype=dtype)
+ input_indices = rng.integers(
+ 0, min(max_value, out_data_count-1), indices_count, dtype=dtype)
+ original_outputs = rng.integers(
+ min_value, max_value, out_data_count, dtype=dtype)
+
+ await fixture.write('input_data', input_data)
+ await fixture.write('input_indices', input_indices)
+ await fixture.write('output_data', original_outputs)
+
+ expected_outputs = np.copy(original_outputs)
+ for idx, data in zip(input_indices, input_data):
+ expected_outputs[idx] = data
+
+ await fixture.run_to_halt()
+
+ actual_outputs = (await fixture.read(
+ 'output_data', out_data_count * np.dtype(dtype).itemsize)).view(dtype)
+
+ assert (actual_outputs == expected_outputs).all()
+
@cocotb.test()
async def load8_stride2_m1(dut):
await vector_load_store(
@@ -157,3 +239,19 @@
out_size = 64,
pattern = list(range(0, 32)),
)
+
+@cocotb.test()
+async def load8_indexed_m1(dut):
+ await vector_load_indexed(
+ dut = dut,
+ elf_name = 'load8_indexed_m1.elf',
+ dtype = np.uint8,
+ )
+
+@cocotb.test()
+async def store8_indexed_m1(dut):
+ await vector_store_indexed(
+ dut = dut,
+ elf_name = 'store8_indexed_m1.elf',
+ dtype = np.uint8,
+ )