[otbn,dv] Teach otbn_model to pass validity bits to and from the ISS The binary file format has changed and now consists of 5 bytes for each 32-bit word. The first byte is a "validity byte", which should have value 0 (for invalid) or 1 (for valid). The other 4 bytes are a little-endian encoded 32-bit word. The first change in otbn_model is to use Ecc32MemArea's new ReadWithIntegrity method to get the integrity bits as well as the raw data. We then pass this to the ISS in the new format. In the other direction (reading data from the ISS), we have to parse the new format and also need to tweak our correctness check. Note that there's a temporary hack (with TODO message) here, which will go away once we've implemented integrity bits properly on the ISS side. On the ISS side, the only work is to read and write the new file format (with struct format string "<BI") and fix up the various bits of packing logic. There's a corresponding hack (also with TODO message), reminding us to handle validity bits properly when loading DMEM. Signed-off-by: Rupert Swarbrick <rswarbrick@lowrisc.org>

commit: e10dfaf312614ae9ebcdfb8c79840de4cf45be7e [log] [tgz]
author: Rupert Swarbrick <rswarbrick@lowrisc.org> Tue Nov 09 17:28:56 2021 +0000
committer: Rupert Swarbrick <rswarbrick@gmail.com> Tue Nov 16 23:13:21 2021 +0000
tree: ecc284233f88942203c7490133d528f70a3a69c5
parent: b407f2200168c70d8561a3147df5e196f970cec2 [diff]
diff --git a/hw/ip/otbn/dv/memutil/otbn_memutil.h b/hw/ip/otbn/dv/memutil/otbn_memutil.h
index 4247b65..dc14aad 100644
--- a/hw/ip/otbn/dv/memutil/otbn_memutil.h
+++ b/hw/ip/otbn/dv/memutil/otbn_memutil.h

@@ -29,7 +29,7 @@
   const StagedMem::SegMap &GetSegs(bool is_imem) const;
 
   // Get access to a memory area
-  const MemArea &GetMemArea(bool is_imem) const {
+  const ScrambledEcc32MemArea &GetMemArea(bool is_imem) const {
     return is_imem ? imem_ : dmem_;
   }
 

diff --git a/hw/ip/otbn/dv/model/otbn_model.cc b/hw/ip/otbn/dv/model/otbn_model.cc
index 7677540..0a4e286 100644
--- a/hw/ip/otbn/dv/model/otbn_model.cc
+++ b/hw/ip/otbn/dv/model/otbn_model.cc

@@ -18,16 +18,6 @@
 #include "sv_scoped.h"
 #include "sv_utils.h"
 
-// Read (the start of) the contents of a file at path as a vector of bytes.
-// Expects num_bytes bytes of data. On failure, throws a std::runtime_error.
-static std::vector<uint8_t> read_vector_from_file(const std::string &path,
-                                                  size_t num_bytes);
-
-// Write a vector of bytes to a new file at path. On failure, throws a
-// std::runtime_error.
-static void write_vector_to_file(const std::string &path,
-                                 const std::vector<uint8_t> &data);
-
 extern "C" {
 // These functions are only implemented if DesignScope != "", i.e. if we're
 // running a block-level simulation. Code needs to check at runtime if
@@ -42,8 +32,10 @@
 #define FAILED_STEP_BIT (1U << 2)
 #define FAILED_CMP_BIT (1U << 3)
 
-static std::vector<uint8_t> read_vector_from_file(const std::string &path,
-                                                  size_t num_bytes) {
+// Read (the start of) the contents of a file at path as a vector of bytes.
+// Expects num_bytes bytes of data. On failure, throws a std::runtime_error.
+static Ecc32MemArea::EccWords read_words_from_file(const std::string &path,
+                                                   size_t num_words) {
   std::filebuf fb;
   if (!fb.open(path.c_str(), std::ios::in | std::ios::binary)) {
     std::ostringstream oss;
@@ -51,23 +43,71 @@
     throw std::runtime_error(oss.str());
   }
 
-  std::vector<uint8_t> buf(num_bytes);
-  std::streamsize chars_in =
-      fb.sgetn(reinterpret_cast<char *>(&buf[0]), num_bytes);
+  Ecc32MemArea::EccWords ret;
+  ret.reserve(num_words);
 
-  if (chars_in != num_bytes) {
-    std::ostringstream oss;
-    oss << "Cannot read " << num_bytes << " bytes of memory data from " << path
-        << " (actually got " << chars_in << ").";
-    throw std::runtime_error(oss.str());
+  char minibuf[5];
+  for (size_t i = 0; i < num_words; ++i) {
+    std::streamsize chars_in = fb.sgetn(minibuf, 5);
+    if (chars_in != 5) {
+      std::ostringstream oss;
+      oss << "Cannot read word " << i << " from " << path
+          << " (expected 5 bytes, but actually got " << chars_in << ").";
+      throw std::runtime_error(oss.str());
+    }
+
+    // The layout should be a validity byte (either 0 or 1), followed
+    // by 4 bytes with a little-endian 32-bit word.
+    uint8_t vld_byte = minibuf[0];
+    if (vld_byte > 2) {
+      std::ostringstream oss;
+      oss << "Word " << i << " at " << path
+          << " had a validity byte with value " << (int)vld_byte
+          << "; not 0 or 1.";
+      throw std::runtime_error(oss.str());
+    }
+    bool valid = vld_byte == 1;
+
+    uint32_t word = 0;
+    for (int j = 0; j < 4; ++j) {
+      word |= (uint32_t)(uint8_t)minibuf[j + 1] << 8 * j;
+    }
+
+    ret.push_back(std::make_pair(valid, word));
   }
 
-  return buf;
+  return ret;
 }
 
-// Write a vector of bytes to a new file at path
-static void write_vector_to_file(const std::string &path,
-                                 const std::vector<uint8_t> &data) {
+static std::vector<uint8_t> words_to_bytes(
+    const Ecc32MemArea::EccWords &words) {
+  std::vector<uint8_t> ret;
+  ret.reserve(words.size() * 4);
+  for (size_t i = 0; i < words.size(); ++i) {
+    const Ecc32MemArea::EccWord &word = words[i];
+    bool valid = word.first;
+    uint32_t w32 = word.second;
+
+    // Complain if the word has an invalid checksum. We don't support doing
+    // that at the moment.
+    if (!valid) {
+      std::ostringstream oss;
+      oss << "Cannot convert 32-bit word " << i
+          << " to bytes because its valid flag is false.";
+      throw std::runtime_error(oss.str());
+    }
+
+    for (int j = 0; j < 4; ++j) {
+      ret.push_back((w32 >> 8 * j) & 0xff);
+    }
+  }
+  return ret;
+}
+
+// Write some words to a new file at path. On failure, throws a
+// std::runtime_error.
+static void write_words_to_file(const std::string &path,
+                                const Ecc32MemArea::EccWords &words) {
   std::filebuf fb;
   if (!fb.open(path.c_str(), std::ios::out | std::ios::binary)) {
     std::ostringstream oss;
@@ -75,15 +115,24 @@
     throw std::runtime_error(oss.str());
   }
 
-  // Write out the data
-  std::streamsize chars_out =
-      fb.sputn(reinterpret_cast<const char *>(&data[0]), data.size());
+  for (const Ecc32MemArea::EccWord &word : words) {
+    uint8_t bytes[5];
 
-  if (chars_out != data.size()) {
-    std::ostringstream oss;
-    oss << "Cannot write " << data.size() << " bytes of memory data to " << path
-        << "' (actually wrote " << chars_out << ").";
-    throw std::runtime_error(oss.str());
+    bool valid = word.first;
+    uint32_t w32 = word.second;
+
+    bytes[0] = valid ? 1 : 0;
+    for (int j = 0; j < 4; ++j) {
+      bytes[j + 1] = (w32 >> (8 * j)) & 0xff;
+    }
+
+    std::streamsize chars_out =
+        fb.sputn(reinterpret_cast<const char *>(&bytes), 5);
+    if (chars_out != 5) {
+      std::ostringstream oss;
+      oss << "Failed to write to " << path << ".";
+      throw std::runtime_error(oss.str());
+    }
   }
 }
 
@@ -220,8 +269,8 @@
   std::string ifname(iss->make_tmp_path("imem"));
 
   try {
-    write_vector_to_file(dfname, get_sim_memory(false));
-    write_vector_to_file(ifname, get_sim_memory(true));
+    write_words_to_file(dfname, get_sim_memory(false));
+    write_words_to_file(ifname, get_sim_memory(true));
   } catch (const std::exception &err) {
     std::cerr << "Error when dumping memory contents: " << err.what() << "\n";
     return -1;
@@ -355,8 +404,11 @@
 
   std::string dfname(iss->make_tmp_path("dmem_out"));
   try {
+    // Read DMEM from the ISS
     iss->dump_d(dfname);
-    set_sim_memory(false, read_vector_from_file(dfname, dmem.GetSizeBytes()));
+    Ecc32MemArea::EccWords words =
+        read_words_from_file(dfname, dmem.GetSizeBytes() / 4);
+    set_sim_memory(false, words_to_bytes(words));
   } catch (const std::exception &err) {
     std::cerr << "Error when loading dmem from ISS: " << err.what() << "\n";
     return -1;
@@ -399,9 +451,9 @@
   return iss_.get();
 }
 
-std::vector<uint8_t> OtbnModel::get_sim_memory(bool is_imem) const {
-  const MemArea &mem_area = mem_util_.GetMemArea(is_imem);
-  return mem_area.Read(0, mem_area.GetSizeWords());
+Ecc32MemArea::EccWords OtbnModel::get_sim_memory(bool is_imem) const {
+  auto &mem_area = mem_util_.GetMemArea(is_imem);
+  return mem_area.ReadWithIntegrity(0, mem_area.GetSizeWords());
 }
 
 void OtbnModel::set_sim_memory(bool is_imem, const std::vector<uint8_t> &data) {
@@ -415,37 +467,63 @@
   std::string dfname(iss.make_tmp_path("dmem_out"));
 
   iss.dump_d(dfname);
-  std::vector<uint8_t> iss_data = read_vector_from_file(dfname, dmem_bytes);
-  assert(iss_data.size() == dmem_bytes);
+  Ecc32MemArea::EccWords iss_words =
+      read_words_from_file(dfname, dmem_bytes / 4);
+  assert(iss_words.size() == dmem_bytes / 4);
 
-  std::vector<uint8_t> rtl_data = get_sim_memory(false);
-  assert(rtl_data.size() == dmem_bytes);
+  Ecc32MemArea::EccWords rtl_words = get_sim_memory(false);
+  assert(rtl_words.size() == dmem_bytes / 4);
 
-  // If the arrays match, we're done.
-  if (0 == memcmp(&iss_data[0], &rtl_data[0], dmem_bytes))
-    return true;
-
-  // If not, print out the first 10 mismatches
   std::ios old_state(nullptr);
   old_state.copyfmt(std::cerr);
-  std::cerr << "ERROR: Mismatches in dmem data:\n"
-            << std::hex << std::setfill('0');
-  int bad_count = 0;
-  for (size_t i = 0; i < dmem_bytes; ++i) {
-    if (iss_data[i] != rtl_data[i]) {
-      std::cerr << " @offset 0x" << std::setw(3) << i << ": rtl has 0x"
-                << std::setw(2) << (int)rtl_data[i] << "; iss has 0x"
-                << std::setw(2) << (int)iss_data[i] << "\n";
-      ++bad_count;
 
-      if (bad_count == 10) {
-        std::cerr << " (skipping further errors...)\n";
-        break;
-      }
+  int bad_count = 0;
+  for (size_t i = 0; i < dmem_bytes / 4; ++i) {
+    bool iss_valid = iss_words[i].first;
+    bool rtl_valid = rtl_words[i].first;
+    uint32_t iss_w32 = iss_words[i].second;
+    uint32_t rtl_w32 = rtl_words[i].second;
+
+    // If neither word has valid checksum bits, all is well.
+    if (!iss_valid && !rtl_valid)
+      continue;
+
+    // If both words have valid checksum bits and equal data, all is well.
+    if (iss_valid && rtl_valid && iss_w32 == rtl_w32)
+      continue;
+
+    // TODO: At the moment, the ISS doesn't track validity bits properly in
+    //       DMEM, which means that we might have a situation where RTL says a
+    //       word is invalid, but the ISS doesn't. To avoid spurious failures
+    //       until we've implemented things, skip the check in this case. Once
+    //       the ISS handles validity bits properly, delete this block.
+    if (iss_valid && !rtl_valid)
+      continue;
+
+    // Otherwise, something has gone wrong. Print out a banner if this is the
+    // first mismatch.
+    if (bad_count == 0) {
+      std::cerr << "ERROR: Mismatches in dmem data:\n"
+                << std::hex << std::setfill('0');
+    }
+
+    std::cerr << " @offset 0x" << std::setw(3) << 4 * i << ": ";
+    if (iss_valid != rtl_valid) {
+      std::cerr << "mismatching validity bits (rtl = " << rtl_valid
+                << "; iss = " << iss_valid << ")\n";
+    } else {
+      assert(iss_valid && rtl_valid && iss_w32 != rtl_w32);
+      std::cerr << "rtl has 0x" << std::setw(8) << rtl_w32 << "; iss has 0x"
+                << std::setw(8) << iss_w32 << "\n";
+    }
+    ++bad_count;
+    if (bad_count == 10) {
+      std::cerr << " (skipping further errors...)\n";
+      break;
     }
   }
   std::cerr.copyfmt(old_state);
-  return false;
+  return bad_count == 0;
 }
 
 bool OtbnModel::check_regs(ISSWrapper &iss) const {

diff --git a/hw/ip/otbn/dv/model/otbn_model.h b/hw/ip/otbn/dv/model/otbn_model.h
index 1b7eca2..694b343 100644
--- a/hw/ip/otbn/dv/model/otbn_model.h
+++ b/hw/ip/otbn/dv/model/otbn_model.h

@@ -72,7 +72,7 @@
   ISSWrapper *ensure_wrapper();
 
   // Read the contents of the ISS's memory
-  std::vector<uint8_t> get_sim_memory(bool is_imem) const;
+  Ecc32MemArea::EccWords get_sim_memory(bool is_imem) const;
 
   // Set the contents of the ISS's memory
   void set_sim_memory(bool is_imem, const std::vector<uint8_t> &data);

diff --git a/hw/ip/otbn/dv/otbnsim/sim/decode.py b/hw/ip/otbn/dv/otbnsim/sim/decode.py
index fd17308..1e19d1c 100644
--- a/hw/ip/otbn/dv/otbnsim/sim/decode.py
+++ b/hw/ip/otbn/dv/otbnsim/sim/decode.py

@@ -5,7 +5,7 @@
 '''Code to load instruction words into a simulator'''
 
 import struct
-from typing import List, Optional, Iterator
+from typing import Iterator, List, Optional, Tuple
 
 from .constants import ErrBits
 from .isa import INSNS_FILE, OTBNInsn
@@ -83,13 +83,34 @@
     return cls(word, op_vals)
 
 
-def decode_bytes(base_addr: int, data: bytes) -> List[OTBNInsn]:
+def decode_words(base_addr: int,
+                 data: List[Tuple[bool, int]]) -> List[OTBNInsn]:
     '''Decode instruction bytes as instructions'''
-    assert len(data) & 3 == 0
-    return [_decode_word(base_addr + 4 * offset, int_val[0])
-            for offset, int_val in enumerate(struct.iter_unpack('<I', data))]
+    ret = []
+    for idx, (vld, w32) in enumerate(data):
+        pc = 4 * idx
+        ret.append(_decode_word(pc, w32) if vld else EmptyInsn(pc))
+    return ret
 
 
 def decode_file(base_addr: int, path: str) -> List[OTBNInsn]:
     with open(path, 'rb') as handle:
-        return decode_bytes(base_addr, handle.read())
+        raw_bytes = handle.read()
+
+    # Each 32-bit word is represented by a 5 bytes, consisting of a validity
+    # byte (0 or 1) followed by 4 bytes for the word itself.
+    if len(raw_bytes) % 5:
+        raise ValueError('Trying to load {} bytes of data from {}, '
+                         'which is not a multiple of 5.'
+                         .format(path, len(raw_bytes)))
+
+    data = []
+    for idx32, (vld, u32) in enumerate(struct.iter_unpack('<BI', raw_bytes)):
+        if vld not in [0, 1]:
+            raise ValueError('The validity byte for 32-bit word {} '
+                             'at {} is {}, not 0 or 1.'
+                             .format(idx32, path, vld))
+
+        data.append((vld == 1, u32))
+
+    return decode_words(base_addr, data)

diff --git a/hw/ip/otbn/dv/otbnsim/sim/dmem.py b/hw/ip/otbn/dv/otbnsim/sim/dmem.py
index 7146978..79b0c2c 100644
--- a/hw/ip/otbn/dv/otbnsim/sim/dmem.py
+++ b/hw/ip/otbn/dv/otbnsim/sim/dmem.py

@@ -58,8 +58,49 @@
         self.data = [uninit] * num_words
         self.trace = []  # type: List[TraceDmemStore]
 
-    def load_le_words(self, data: bytes) -> None:
-        '''Replace the start of memory with data'''
+    def _load_5byte_le_words(self, data: bytes) -> None:
+        '''Replace the start of memory with data
+
+        The bytes loaded should represent each 32-bit word with 5 bytes,
+        consisting of a validity byte (0 or 1) followed by 4 bytes for the word
+        itself.
+
+        '''
+        if len(data) % 5:
+            raise ValueError('Trying to load {} bytes of data, '
+                             'which is not a multiple of 5.'
+                             .format(len(data)))
+
+        len_data_32 = len(data) // 5
+        len_mem_32 = (256 // 32) * len(self.data)
+
+        if len_data_32 > len_mem_32:
+            raise ValueError('Trying to load {} bytes of data, but DMEM '
+                             'is only {} bytes long.'
+                             .format(4 * len_data_32, 32 * len(self.data)))
+
+        # Zero-pad up to the next 32-bit word, represented by 5 bytes. Because
+        # things are little-endian, this is like zero-extending the last word.
+        if len(data) % 5:
+            data = data + b'0' * (5 - (len(data) % 5))
+
+        for idx32, (vld, u32) in enumerate(struct.iter_unpack('<BI', data)):
+            if vld not in [0, 1]:
+                raise ValueError('The validity byte for 32-bit word {} '
+                                 'in the input data is {}, not 0 or 1.'
+                                 .format(idx32, vld))
+
+            # TODO: Take account of validity bit here!
+
+            self.data[idx32] = u32
+
+    def _load_4byte_le_words(self, data: bytes) -> None:
+        '''Replace the start of memory with data
+
+        The bytes loaded should represent each 32-bit word with 4 bytes in
+        little-endian format.
+
+        '''
         if len(data) > 32 * len(self.data):
             raise ValueError('Trying to load {} bytes of data, but DMEM '
                              'is only {} bytes long.'
@@ -72,6 +113,18 @@
         for idx32, u32 in enumerate(struct.iter_unpack('<I', data)):
             self.data[idx32] = u32[0]
 
+    def load_le_words(self, data: bytes, has_validity: bool) -> None:
+        '''Replace the start of memory with data
+
+        Uses the 5-byte format if has_validity is true and the 4-byte format
+        otherwise.
+
+        '''
+        if has_validity:
+            self._load_5byte_le_words(data)
+        else:
+            self._load_4byte_le_words(data)
+
     def dump_le_words(self) -> bytes:
         '''Return the contents of memory as bytes.
 
@@ -79,8 +132,11 @@
         words are themselves packed little-endian into 256-bit words.
 
         '''
-        u32s = [self.data[i] for i in range(len(self.data))]
-        return struct.pack('<{}I'.format(len(u32s)), *u32s)
+        ret = b''
+        for u32 in self.data:
+            ret += struct.pack('<BI', 1, u32)
+
+        return ret
 
     def is_valid_256b_addr(self, addr: int) -> bool:
         '''Return true if this is a valid address for a BN.LID/BN.SID'''

diff --git a/hw/ip/otbn/dv/otbnsim/sim/load_elf.py b/hw/ip/otbn/dv/otbnsim/sim/load_elf.py
index b8dae8e..4b8d20a 100644
--- a/hw/ip/otbn/dv/otbnsim/sim/load_elf.py
+++ b/hw/ip/otbn/dv/otbnsim/sim/load_elf.py

@@ -5,11 +5,12 @@
 '''OTBN ELF file handling'''
 
 import re
+import struct
 from typing import Optional, Dict
 
 from shared.elf import read_elf
 
-from .decode import decode_bytes
+from .decode import decode_words
 from .sim import LoopWarps, OTBNSim
 
 
@@ -76,12 +77,17 @@
     '''
     (imem_bytes, dmem_bytes, symbols) = read_elf(path)
 
-    imem_insns = decode_bytes(0, imem_bytes)
+    # Collect imem bytes into 32-bit words and set the validity bit for each
+    assert len(imem_bytes) & 3 == 0
+    imem_words = [(True, w32s[0])
+                  for w32s in struct.iter_unpack('<I', imem_bytes)]
+
+    imem_insns = decode_words(0, imem_words)
     loop_warps = _get_loop_warps(symbols)
     exp_end = _get_exp_end_addr(symbols)
 
     sim.load_program(imem_insns)
     sim.loop_warps = loop_warps
-    sim.load_data(dmem_bytes)
+    sim.load_data(dmem_bytes, has_validity=False)
 
     return exp_end

diff --git a/hw/ip/otbn/dv/otbnsim/sim/sim.py b/hw/ip/otbn/dv/otbnsim/sim/sim.py
index cc0ef0a..ef92c3e 100644
--- a/hw/ip/otbn/dv/otbnsim/sim/sim.py
+++ b/hw/ip/otbn/dv/otbnsim/sim/sim.py

@@ -34,8 +34,17 @@
         '''Add a new loop warp to the simulation'''
         self.loop_warps.setdefault(addr, {})[from_cnt] = to_cnt
 
-    def load_data(self, data: bytes) -> None:
-        self.state.dmem.load_le_words(data)
+    def load_data(self, data: bytes, has_validity: bool) -> None:
+        '''Load bytes into DMEM, starting at address zero.
+
+        If has_validity is true, each 32-bit word should be represented by 5
+        bytes (1 byte that says whether the word is valid, then 4 bytes that
+        give the word in little-endian format). If has_validity is false, each
+        word is considered valid and is represented by 4 bytes in little-endian
+        format.
+
+        '''
+        self.state.dmem.load_le_words(data, has_validity)
 
     def start(self, collect_stats: bool) -> None:
         '''Prepare to start the execution.

diff --git a/hw/ip/otbn/dv/otbnsim/stepped.py b/hw/ip/otbn/dv/otbnsim/stepped.py
index c2c458a..c2ea8eb 100755
--- a/hw/ip/otbn/dv/otbnsim/stepped.py
+++ b/hw/ip/otbn/dv/otbnsim/stepped.py

@@ -171,7 +171,7 @@
 
     print('LOAD_D {!r}'.format(path))
     with open(path, 'rb') as handle:
-        sim.load_data(handle.read())
+        sim.load_data(handle.read(), has_validity=True)
 
     return None
commit	e10dfaf312614ae9ebcdfb8c79840de4cf45be7e	[log] [tgz]
author	Rupert Swarbrick <rswarbrick@lowrisc.org>	Tue Nov 09 17:28:56 2021 +0000
committer	Rupert Swarbrick <rswarbrick@gmail.com>	Tue Nov 16 23:13:21 2021 +0000
tree	ecc284233f88942203c7490133d528f70a3a69c5
parent	b407f2200168c70d8561a3147df5e196f970cec2 [diff]