Add whole register into Lsu. Change-Id: Iee07259b7c63aca7748a231ccc89a6f77bbfb4a3
diff --git a/hdl/chisel/src/kelvin/scalar/Lsu.scala b/hdl/chisel/src/kelvin/scalar/Lsu.scala index 755309a..75cd112 100644 --- a/hdl/chisel/src/kelvin/scalar/Lsu.scala +++ b/hdl/chisel/src/kelvin/scalar/Lsu.scala
@@ -163,6 +163,16 @@ } } + def isWholeRegister(): Bool = { + if (p.enableRvv) { + (umop.get === "b01000".U) && + op.isOneOf(LsuOp.VLOAD_UNIT, LsuOp.VSTORE_UNIT) + } else { + false.B + } + + } + override def toPrintable: Printable = { cf"LsuCmd(store -> ${store}, addr -> 0x${addr}%x, op -> ${op}, " + cf"pc -> 0x${pc}%x, elemWidth -> ${elemWidth}, nfields -> ${nfields})" @@ -215,9 +225,23 @@ result.elemWidth.get := cmd.elemWidth.get // If mask operation, always make LMUL=1. result.lmul.get := Mux(cmd.isMaskOperation(), 0.U, rvvState.get.bits.lmul) + result.lmul.get := MuxCase(rvvState.get.bits.lmul, Seq( + cmd.isMaskOperation() -> 0.U, + // Section 7.9 of RVV Spec: "The nf field encoders how many vector + // registers to load and store". + cmd.isWholeRegister() -> MuxCase(0.U, Seq( + (cmd.nfields.get === 0.U) -> 0.U, // NF1 -> LMUL1 + (cmd.nfields.get === 1.U) -> 1.U, // NF2 -> LMUL2 + (cmd.nfields.get === 3.U) -> 2.U, // NF4 -> LMUL4 + (cmd.nfields.get === 7.U) -> 3.U, // NF8 -> LMUL8 + )), + )) // If mask operation, force fields to zero - result.nfields.get := Mux(cmd.isMaskOperation(), 0.U, cmd.nfields.get) + result.nfields.get := MuxCase(cmd.nfields.get, Seq( + cmd.isMaskOperation() -> 0.U, + cmd.isWholeRegister() -> 0.U, + )) result.sew.get := rvvState.get.bits.sew }
diff --git a/tests/cocotb/BUILD b/tests/cocotb/BUILD index 21b6f84..dedacec 100644 --- a/tests/cocotb/BUILD +++ b/tests/cocotb/BUILD
@@ -174,6 +174,7 @@ # BEGIN_TESTCASES_FOR_rvv_load_store_test RVV_LOAD_STORE_TESTCASES = [ "load_store_bits", + "load_store_whole_register_test", "load_unit_masked", "load_unit_all_vtypes_test", "load_strided_all_vtypes_test",
diff --git a/tests/cocotb/rvv/load_store/BUILD b/tests/cocotb/rvv/load_store/BUILD index 73d5979..727c38d 100644 --- a/tests/cocotb/rvv/load_store/BUILD +++ b/tests/cocotb/rvv/load_store/BUILD
@@ -23,6 +23,9 @@ "load_store_bits": { "srcs": ["load_store_bits.cc"], }, + "load_store_whole_register": { + "srcs": ["load_store_whole_register.cc"], + }, "load_unit_masked": { "srcs": ["load_unit_masked.cc"], }, @@ -165,6 +168,7 @@ name = "rvv_load_store_tests", srcs = [ ":load_store_bits.elf", + ":load_store_whole_register.elf", ":load_unit_masked.elf", ":load_unit_vtype.elf", ":load_stride_vtype.elf",
diff --git a/tests/cocotb/rvv/load_store/load_store_whole_register.cc b/tests/cocotb/rvv/load_store/load_store_whole_register.cc new file mode 100644 index 0000000..afdd9cf --- /dev/null +++ b/tests/cocotb/rvv/load_store/load_store_whole_register.cc
@@ -0,0 +1,73 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <riscv_vector.h> +#include <stdint.h> + +namespace { +constexpr size_t buf_size = 128; +} + +size_t vl __attribute__((section(".data"))) = buf_size; +size_t vtype __attribute__((section(".data"))) = buf_size; +uint8_t load_data[buf_size] __attribute__((section(".data"))); +uint8_t store_data[buf_size] __attribute__((section(".data"))); + +extern "C" { +#define CREATE_LOAD_FN(name, n_registers) \ +__attribute__((used, retain)) void name() { \ + size_t eight_vl = 8*__riscv_vlenb(); \ + asm("vsetvli zero, %[eight_vl], e8, m8, ta, ma;" \ + "vmv.v.i v8, 0;" \ + "vsetvl zero, %[vl], %[vtype];" \ + "vl" #n_registers "r.v v8, %[load_data];" \ + "vs8r.v v8, %[store_data];" \ + : [store_data] "=m"(store_data) \ + : [eight_vl] "r"(eight_vl), \ + [vl] "r"(vl), \ + [vtype] "r"(vtype), \ + [load_data] "m"(load_data) \ + : "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", \ + "vl", "vtype"); \ +} + +#define CREATE_STORE_FN(name, n_registers) \ +__attribute__((used, retain)) void name() { \ + asm("vsetvl zero, %[vl], %[vtype];" \ + "vl8r.v v8, %[load_data];" \ + "vs" #n_registers "r.v v8, %[store_data];" \ + : [store_data] "=m"(store_data) \ + : [vl] "r"(vl), \ + [vtype] "r"(vtype), \ + [load_data] "m"(load_data) \ + : "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", \ + "vl", "vtype"); \ +} + +CREATE_LOAD_FN(test_vl1r, 1) +CREATE_LOAD_FN(test_vl2r, 2) +CREATE_LOAD_FN(test_vl4r, 4) +CREATE_LOAD_FN(test_vl8r, 8) +CREATE_STORE_FN(test_vs1r, 1) +CREATE_STORE_FN(test_vs2r, 2) +CREATE_STORE_FN(test_vs4r, 4) +CREATE_STORE_FN(test_vs8r, 8) +} + +void (*impl)() __attribute__((section(".data"))) = &test_vl1r; + +int main(int argc, char** argv) { + impl(); + return 0; +}
diff --git a/tests/cocotb/rvv_load_store_test.py b/tests/cocotb/rvv_load_store_test.py index d83d0c3..64610c9 100644 --- a/tests/cocotb/rvv_load_store_test.py +++ b/tests/cocotb/rvv_load_store_test.py
@@ -2816,3 +2816,58 @@ store_data = (await fixture.read('store_data', 8192)) assert (expected_store_data == store_data).all() + +@cocotb.test() +async def load_store_whole_register_test(dut): + """Testbench to test RVV strided/segmented store, with all vtypes.""" + fixture = await Fixture.Create(dut) + r = runfiles.Create() + functions = [ + # Name, store, n_registers + ("test_vl1r", False, 1), + ("test_vl2r", False, 2), + ("test_vl4r", False, 4), + ("test_vl8r", False, 8), + ("test_vs1r", True, 1), + ("test_vs2r", True, 2), + ("test_vs4r", True, 4), + ("test_vs8r", True, 8), + ] + + await fixture.load_elf_and_lookup_symbols( + r.Rlocation('kelvin_hw/tests/cocotb/rvv/load_store/load_store_whole_register.elf'), + ['vl', 'vtype', 'stride', 'load_data', 'store_data', 'impl'] + + list(f[0] for f in functions), + ) + + vlenb = 16 + with tqdm.tqdm(functions) as t: + for (function, store, n_registers) in t: + for sew in SEWS: + for lmul, _ in SEW_TO_LMULS_AND_VLMAXS[sew]: + t.set_postfix({ + 'function': function, + 'sew': sew, + 'lmul': lmul, + }) + + await fixture.write_ptr('impl', function) + vtype = construct_vtype(1, 1, sew, lmul) + await fixture.write_word('vtype', vtype) + await fixture.write_word('vl', 1) + + load_data = np.random.randint(0, 255, 128, dtype=np.uint8) + await fixture.write('load_data', load_data) + await fixture.write('store_data', 0xFF*np.ones(128, dtype=np.uint8)) + + await fixture.run_to_halt() + + store_data = (await fixture.read('store_data', 128)) + + data_written = vlenb * n_registers + assert (load_data[0:data_written] == + store_data[0:data_written]).all() + if store: + assert(store_data[data_written:] == 0xFF).all() + else: + assert(store_data[data_written:] == 0x00).all()