[otbn] Fix wrapping for grs1 increment with bn.lid/bn.sid

Because grs1 has a 14-bit signed immediate added to it, we can have a
valid address even when grs1 is really big. For example, suppose that
x21 is 0x1000 in

    bn.lid   x10, -2144(x21++)

Then we should write x21 = 0x1020 (rather than the 0x20 that we were
writing).

Similarly, if x21 is 0xffffffc0 (2^32 - 64) then

    bn.lid   x10, 64(x21++)

loads from the valid address of 0 and should increment x21 to
0xffffffe0.

Getting this right implies a 27-bit adder, plus extra muxing for the
top bits of increment_out. To avoid that, we could e.g. change the
spec to only operate on the lower 16 bits of the base address (which
would then need an 11-bit adder and no muxing on most of the upper
bits: much more manageable!)

Signed-off-by: Rupert Swarbrick <rswarbrick@lowrisc.org>
diff --git a/hw/ip/otbn/rtl/otbn_controller.sv b/hw/ip/otbn/rtl/otbn_controller.sv
index dfd4aec..2585bac 100644
--- a/hw/ip/otbn/rtl/otbn_controller.sv
+++ b/hw/ip/otbn/rtl/otbn_controller.sv
@@ -143,9 +143,9 @@
 
   // Computed increments for indirect register index and memory address in BN.LID/BN.SID/BN.MOVR
   // instructions.
-  logic [4:0]               rf_base_rd_data_a_inc;
-  logic [4:0]               rf_base_rd_data_b_inc;
-  logic [DmemAddrWidth-1:0] rf_base_rd_data_a_wlen_word_inc;
+  logic [4:0]  rf_base_rd_data_a_inc;
+  logic [4:0]  rf_base_rd_data_b_inc;
+  logic [26:0] rf_base_rd_data_a_wlen_word_inc;
 
   // Output of mux taking the above increments as inputs and choosing one to write back to base
   // register file with appropriate zero extension and padding to give a 32-bit result.
@@ -335,7 +335,9 @@
   // addresses in BN.LID/BN.SID/BN.MOVR instructions.
   assign rf_base_rd_data_a_inc           = rf_base_rd_data_a_i[4:0] + 1'b1;
   assign rf_base_rd_data_b_inc           = rf_base_rd_data_b_i[4:0] + 1'b1;
-  assign rf_base_rd_data_a_wlen_word_inc = {rf_base_rd_data_a_i[DmemAddrWidth-1:5] + 1'b1, 5'b0};
+  // We can avoid a full 32-bit adder here because the offset is 32-bit aligned, so we know the
+  // load/store address will only be valid if rf_base_rd_data_a_i[4:0] is zero.
+  assign rf_base_rd_data_a_wlen_word_inc = rf_base_rd_data_a_i[31:5] + 27'h1;
 
   // Choose increment to write back to base register file, only one increment can be written as
   // there is only one write port. Note that where an instruction is incrementing the indirect
@@ -353,7 +355,7 @@
         increment_out = {27'b0, rf_base_rd_data_b_inc};
       end
       insn_dec_bignum_i.a_wlen_word_inc: begin
-        increment_out = {{32-DmemAddrWidth{1'b0}}, rf_base_rd_data_a_wlen_word_inc};
+        increment_out = {rf_base_rd_data_a_wlen_word_inc, 5'b0};
       end
       default: begin
         // Whenever increment_out is written back to the register file, exactly one of the