[otbn] Store bytes to memory in BN.SID

Here, model.state.wreg[wrs] is a Register object (from riscvmodel).
Its "int" coercion gets a 2's complement signed integer. For
load/store instructions, we definitely don't want signs. In fact, we
really want the raw bytes. The calculation of "value" in
BNSID.execute() could be moved into the Register class.

Signed-off-by: Rupert Swarbrick <rswarbrick@lowrisc.org>
diff --git a/hw/ip/otbn/dv/otbnsim/sim/insn.py b/hw/ip/otbn/dv/otbnsim/sim/insn.py
index 4660df0..1a13ec5 100644
--- a/hw/ip/otbn/dv/otbnsim/sim/insn.py
+++ b/hw/ip/otbn/dv/otbnsim/sim/insn.py
@@ -747,10 +747,14 @@
         self.grs1_inc = op_vals['grs1_inc']
 
     def execute(self, model: OTBNModel) -> None:
-        wrs = int(model.state.intreg[self.grs2])
+        idx = int(model.state.intreg[self.grs2])
         addr = int(model.state.intreg[self.grs1] + int(self.offset))
-        word = int(model.state.wreg[wrs])
-        model.store_wlen_word_to_memory(addr, word)
+
+        wrs = model.state.wreg[idx]
+        uval = wrs.unsigned()
+        value = bytes((uval >> (8 * idx)) & 255
+                      for idx in range(wrs.bits // 8))
+        model.store_bytes_to_memory(addr, value)
 
         if self.grs2_inc:
             model.state.intreg[self.grs2] += 1
diff --git a/hw/ip/otbn/dv/otbnsim/sim/model.py b/hw/ip/otbn/dv/otbnsim/sim/model.py
index 8598045..445863d 100644
--- a/hw/ip/otbn/dv/otbnsim/sim/model.py
+++ b/hw/ip/otbn/dv/otbnsim/sim/model.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from random import getrandbits
+import struct
 from typing import List, Optional, Tuple, cast
 
 from attrdict import AttrDict  # type: ignore
@@ -325,15 +326,12 @@
             word += cast(int, self.state.memory.lw(addr + byte_off)) << bit_off
         return word
 
-    def store_wlen_word_to_memory(self, addr: int, word: int) -> None:
+    def store_bytes_to_memory(self, addr: int, value: bytes) -> None:
         assert 0 <= addr
-        assert 0 <= word
-        assert (word >> 256) == 0
+        assert 0 == len(value) & 3
 
-        mask32 = (1 << 32) - 1
-        for byte_off in range(0, 32, 4):
-            bit_off = byte_off * 8
-            self.state.memory.sw(addr + byte_off, (word >> bit_off) & mask32)
+        for word_idx, word in enumerate(struct.iter_unpack('<I', value)):
+            self.state.memory.sw(addr + 4 * word_idx, word[0])
 
     @staticmethod
     def add_with_carry(a: int, b: int, carry_in: int) -> Tuple[int, int]: