[prim] Increase maximum width for prim_util_memload to 312

Signed-off-by: Greg Chadwick <gac@lowrisc.org>
diff --git a/hw/dv/verilator/cpp/dpi_memutil.cc b/hw/dv/verilator/cpp/dpi_memutil.cc
index 7e348af..0b6e2a2 100644
--- a/hw/dv/verilator/cpp/dpi_memutil.cc
+++ b/hw/dv/verilator/cpp/dpi_memutil.cc
@@ -245,12 +245,13 @@
   // be caught at this function's callsite.
   SVScoped scoped(m.location.data());
 
-  // This "mini buffer" is used to transfer each write to SystemVerilog. It's
-  // not massively efficient, but doing so ensures that we pass 256 bits (32
-  // bytes) of initialised data each time. This is for simutil_set_mem (defined
-  // in prim_util_memload.svh), whose "val" argument has SystemVerilog type bit
-  // [255:0].
-  uint8_t minibuf[32];
+  // This "mini buffer" is used to transfer each write to SystemVerilog.
+  // `simutil_set_mem` takes a fixed 312 bit vector but it will only use the
+  // bits required for the RAM width. For example for a 32-bit wide RAM only
+  // elements 3 - 0 of `minibuf` will be written to memory. The simulator may
+  // still read bits from minibuf it does not use so we must use a fixed
+  // allocation of the full bit vector size to avoid out of bounds access.
+  uint8_t minibuf[39];
   memset(minibuf, 0, sizeof minibuf);
   assert(m.width_byte <= sizeof minibuf);
 
diff --git a/hw/ip/otbn/dv/model/otbn_model.cc b/hw/ip/otbn/dv/model/otbn_model.cc
index 0cc4b5f..351a704 100644
--- a/hw/ip/otbn/dv/model/otbn_model.cc
+++ b/hw/ip/otbn/dv/model/otbn_model.cc
@@ -104,10 +104,10 @@
   SVScoped scoped(scope);
 
   // simutil_get_mem passes data as a packed array of svBitVecVal words. It
-  // only works for memories of size at most 256 bits, so we can just allocate
-  // 256/8 = 32 bytes as 32/sizeof(svBitVecVal) words on the stack.
-  assert(word_size <= 256 / 8);
-  svBitVecVal buf[256 / 8 / sizeof(svBitVecVal)];
+  // only works for memories of size at most 312 bits, so we can just allocate
+  // 312/8 = 39 bytes as 39/sizeof(svBitVecVal) words on the stack.
+  assert(word_size <= 312 / 8);
+  svBitVecVal buf[312 / 8 / sizeof(svBitVecVal)];
 
   std::vector<uint8_t> ret;
 
@@ -135,8 +135,8 @@
   assert(num_words * word_size == data.size());
 
   // See get_sim_memory for why this array is sized like this.
-  assert(word_size <= 256 / 8);
-  svBitVecVal buf[256 / 8 / sizeof(svBitVecVal)];
+  assert(word_size <= 312 / 8);
+  svBitVecVal buf[312 / 8 / sizeof(svBitVecVal)];
 
   for (size_t w = 0; w < num_words; w++) {
     const uint8_t *p = &data[w * word_size];
diff --git a/hw/ip/prim/rtl/prim_util_memload.svh b/hw/ip/prim/rtl/prim_util_memload.svh
index 2141c7c..d4537e4 100644
--- a/hw/ip/prim/rtl/prim_util_memload.svh
+++ b/hw/ip/prim/rtl/prim_util_memload.svh
@@ -13,7 +13,11 @@
  * - A parameter `Width` giving the memory width (word size) in bit.
  * - A parameter `Depth` giving the memory depth in words.
  * - A parameter `MemInitFile` with a file path of a VMEM file to be loaded into
-*    the memory if not empty.
+ *   the memory if not empty.
+ *
+ * Note this works with memories up to a maximum width of 312 bits. Should this maximum width be
+ * increased all of the `simutil_set_mem` and `simutil_get_mem` call sites must be found (e.g. using
+ * git grep) and adjusted appropriately.
  */
 
 `ifndef SYNTHESIS
@@ -29,10 +33,10 @@
   // Returns 1 (true) for success, 0 (false) for errors.
   export "DPI-C" function simutil_set_mem;
 
-  function int simutil_set_mem(input int index, input bit [255:0] val);
+  function int simutil_set_mem(input int index, input bit [311:0] val);
 
-    // Function will only work for memories <= 256 bits
-    if (Width > 256) begin
+    // Function will only work for memories <= 312 bits
+    if (Width > 312) begin
       return 0;
     end
 
@@ -47,10 +51,10 @@
   // Function for getting a specific element in |mem|
   export "DPI-C" function simutil_get_mem;
 
-  function int simutil_get_mem(input int index, output bit [255:0] val);
+  function int simutil_get_mem(input int index, output bit [311:0] val);
 
-    // Function will only work for memories <= 256 bits
-    if (Width > 256) begin
+    // Function will only work for memories <= 312 bits
+    if (Width > 312) begin
       return 0;
     end