[otbn] Fix BN.LID / BN.SID in ISS
Now, BN.LID / BN.SID load and store 256-bit little-endian values (so
byte 0 is the LSB; byte 31 is the MSB). As before (and as expected by
the RISC-V base ISA), the LW / SW instructions load and store 32-bit
little-endian values. Before this change, the code was a mess of
reversed() calls. After lots of debug prints, I think it's finally
both consistent and right!
This patch also updates the post-increment behaviour as discussed in
issue #3587 and expands the documentation a bit.
Signed-off-by: Rupert Swarbrick <rswarbrick@lowrisc.org>
diff --git a/hw/ip/otbn/data/bignum-insns.yml b/hw/ip/otbn/data/bignum-insns.yml
index a61ffe1..c2af187 100644
--- a/hw/ip/otbn/data/bignum-insns.yml
+++ b/hw/ip/otbn/data/bignum-insns.yml
@@ -779,15 +779,17 @@
syntax: |
<grd>[<grd_inc>], <offset>(<grs1>[<grs1_inc>])
doc: |
- Calculates a byte memory address by adding the offset to the value in the GPR `grs1`.
- The value from this memory address is then copied into the WDR pointed to by the value in GPR `grd`.
+ Load a WLEN-bit little-endian value from data memory.
+
+ The load address is `offset` plus the value in the GPR `grs1`.
+ The loaded value is stored into the WDR given by the bottom 5 bits of the GPR `grd`.
After the operation, either the value in the GPR `grs1`, or the value in `grd` can be optionally incremented.
- - If `grs1_inc` is set, the value in `grs1` is incremented by the value WLEN/8 (one word).
- - If `grd_inc` is set, the value in `grd` is incremented by the value 1.
+ - If `grs1_inc` is set, the value in `grs1` is incremented by value WLEN/8 (one word).
+ - If `grd_inc` is set, `grd` is updated to be `(*grd + 1) & 0x1f`.
- The memory address must be aligned to WLEN bytes.
+ The memory address must be aligned to WLEN bits.
Any address that is unaligned or is above the top of memory will result in an error (with error code `ErrCodeBadDataAddr`).
cycles: 2
decode: |
@@ -809,7 +811,7 @@
if grs1_inc:
GPR[rs1] = GPR[rs1] + (WLEN / 8)
if grd_inc:
- GPR[rd] = GPR[rd] + 1
+ GPR[rd] = (GPR[rd] + 1) & 0x1f
lsu:
type: mem-load
target: [offset, grs1]
@@ -851,15 +853,17 @@
syntax: |
<grs2>[<grs2_inc>], <offset>(<grs1>[<grs1_inc>])
doc: |
- Calculates a byte memory address by adding the offset to the value in the GPR `grs1`.
- The value from the WDR pointed to by `grs2` is then copied into the memory.
+ Store a WDR to memory as a WLEN-bit little-endian value.
+
+ The store address is `offset` plus the value in the GPR `grs1`.
+ The value to store is taken from the WDR given by the bottom 5 bits of the GPR `grs2`.
After the operation, either the value in the GPR `grs1`, or the value in `grs2` can be optionally incremented.
- If `grs1_inc` is set, the value in `grs1` is incremented by the value WLEN/8 (one word).
- - If `grs2_inc` is set, the value in `grs2` is incremented by the value 1.
+ - If `grs2_inc` is set, the value in `grs2` is updated to be `(*grs2 + 1) & 0x1f`.
- The memory address must be aligned to WLEN bytes.
+ The memory address must be aligned to WLEN bits.
Any address that is unaligned or is above the top of memory will result in an error (with error code `ErrCodeBadDataAddr`).
decode: |
rs1 = UInt(grs1)
@@ -880,7 +884,7 @@
if grs1_inc:
GPR[rs1] = GPR[rs1] + (WLEN / 8)
if grs2_inc:
- GPR[rs2] = GPR[rs2] + 1
+ GPR[rs2] = (GPR[rs2] + 1) & 0x1f
lsu:
type: mem-store
target: [offset, grs1]
diff --git a/hw/ip/otbn/dv/otbnsim/sim/dmem.py b/hw/ip/otbn/dv/otbnsim/sim/dmem.py
index a6bbaec..c97b0fe 100644
--- a/hw/ip/otbn/dv/otbnsim/sim/dmem.py
+++ b/hw/ip/otbn/dv/otbnsim/sim/dmem.py
@@ -61,7 +61,17 @@
self.trace = [] # type: List[Trace]
def _get_u32s(self, idx: int) -> List[int]:
- '''Return the value at idx as 8 uint32's in little-endian order'''
+ '''Return the value at idx as 8 uint32's
+
+ These are ordered by increasing address, so the first word from
+ _get_u32s(0) will correspond to bytes at addresses 0x0 through 0x3.
+
+ These uint32's are the words that get loaded by word accesses to
+ memory. Since these accesses are also little-endian, the byte at memory
+ address 0x0 (which holds the LSB for the first 256-bit value) will also
+ be the LSB of the first u32 returned by _get_u32s(0).
+
+ '''
assert 0 <= idx < len(self.data)
word = self.data[idx]
@@ -73,7 +83,8 @@
ret = []
w32_mask = (1 << 32) - 1
for subidx in range(8):
- ret.append((word >> (32 * (7 - subidx))) & w32_mask)
+ ret.append((word >> (32 * subidx)) & w32_mask)
+
return ret
def _set_u32s(self, idx: int, u32s: List[int]) -> None:
@@ -92,12 +103,7 @@
self.data[idx] = u256
def load_le_words(self, data: bytes) -> None:
- '''Replace the start of memory with data
-
- This data should be in the form of little-endian 32-bit words. These
- words are themselves packed little-endian into 256-bit words.
-
- '''
+ '''Replace the start of memory with data'''
if len(data) > 32 * len(self.data):
raise ValueError('Trying to load {} bytes of data, but DMEM '
'is only {} bytes long.'
@@ -112,9 +118,6 @@
for idx32, u32 in enumerate(struct.iter_unpack('<I', data)):
acc.append(u32[0])
if len(acc) == 8:
- # Reverse acc here, because we're also little-endian moving
- # from u32 to u256
- acc.reverse()
self._set_u32s(idx32 // 8, acc)
acc = []
@@ -131,18 +134,16 @@
'''
u32s = [] # type: List[int]
for idx in range(len(self.data)):
- # As in load_le_words, we also have to reverse each set of 8 words
- # because we're little-endian at this scale too.
- u32s += reversed(self._get_u32s(idx))
+ u32s += self._get_u32s(idx)
return struct.pack('<{}I'.format(len(u32s)), *u32s)
def load_u256(self, addr: int) -> int:
- '''Read an unsigned wide word from memory. addr should be aligned.'''
+ '''Read a u256 little-endian value from an aligned address'''
assert 0 == addr % 32
return self.data[addr // 32]
def store_u256(self, addr: int, value: int) -> None:
- '''Write an unsigned wide word to memory. addr should be aligned.'''
+ '''Write a u256 little-endian value to an aligned address'''
assert 0 == addr % 32
assert 0 <= value < (1 << 256)
self.trace.append(TraceDmemStore(addr, value, True))
diff --git a/hw/ip/otbn/dv/otbnsim/sim/insn.py b/hw/ip/otbn/dv/otbnsim/sim/insn.py
index 4f048f1..d290b63 100644
--- a/hw/ip/otbn/dv/otbnsim/sim/insn.py
+++ b/hw/ip/otbn/dv/otbnsim/sim/insn.py
@@ -798,7 +798,7 @@
state.wdrs.get_reg(wrd).write_unsigned(value)
if self.grd_inc:
- new_grd_val = (grd_val + 1) & ((1 << 32) - 1)
+ new_grd_val = (grd_val + 1) & 0x1f
state.gprs.get_reg(self.grd).write_unsigned(new_grd_val)
if self.grs1_inc:
@@ -832,7 +832,7 @@
state.gprs.get_reg(self.grs1).write_unsigned(new_grs1_val)
if self.grs2_inc:
- new_grs2_val = (grs2_val + 1) & ((1 << 32) - 1)
+ new_grs2_val = (grs2_val + 1) & 0x1f
state.gprs.get_reg(self.grs2).write_unsigned(new_grs2_val)