[otbn] Changes to indirect register usage and increments

* If a value greater than 31 is used for an indirect register reference
  an `illegal_insn` error is raised.
* Increment no longer wraps

Fixes #4808

Signed-off-by: Greg Chadwick <gac@lowrisc.org>
diff --git a/hw/ip/otbn/data/bignum-insns.yml b/hw/ip/otbn/data/bignum-insns.yml
index af51560..f26e75b 100644
--- a/hw/ip/otbn/data/bignum-insns.yml
+++ b/hw/ip/otbn/data/bignum-insns.yml
@@ -594,11 +594,12 @@
     After the operation, either the value in the GPR `grs1`, or the value in `grd` can be optionally incremented.
     Specifying both `grd_inc` and `grs1_inc` results in an error (with error code `ErrCodeIllegalInsn`).
 
-    - If `grd_inc` is set, `grd` is updated to be `(*grd + 1) & 0x1f`.
+    - If `grd_inc` is set, `grd` is updated to be `*grd + 1`.
     - If `grs1_inc` is set, the value in `grs1` is incremented by value WLEN/8 (one word).
 
     The memory address must be aligned to WLEN bits.
     Any address that is unaligned or is above the top of memory results in an error (setting bit `bad_data_addr` in `ERR_BITS`).
+    Any `*grd` value greater than 31 before executing the instruction results in an error (setting bit `illegal_insn` in `ERR_BITS`) and no load or optional increment occurring.
     This instruction takes 2 cycles.
   lsu:
     type: mem-load
@@ -653,10 +654,11 @@
     Specifying both `grs1_inc` and `grs2_inc` results in an error (with error code `ErrCodeIllegalInsn`).
 
     - If `grs1_inc` is set, the value in `grs1` is incremented by the value WLEN/8 (one word).
-    - If `grs2_inc` is set, the value in `grs2` is updated to be `(*grs2 + 1) & 0x1f`.
+    - If `grs2_inc` is set, the value in `grs2` is updated to be `*grs2 + 1`.
 
     The memory address must be aligned to WLEN bits.
     Any address that is unaligned or is above the top of memory results in an error (setting bit `bad_data_addr` in `ERR_BITS`).
+    Any `*grs2` value greater than 31 before executing the instruction results in an error (setting bit `illegal_insn` in `ERR_BITS`) and no store or optional increment occurring.
   lsu:
     type: mem-store
     target: [offset, grs1]
@@ -709,8 +711,10 @@
     After the operation, either the value in the GPR `grd`, or the value in `grs` can be optionally incremented.
     Specifying both `grd_inc` and `grs_inc` results in an error (with error code `ErrCodeIllegalInsn`).
 
-    - If `grd_inc` is set, `grd` is updated to be `(*grd + 1) & 0x1f`.
-    - If `grs_inc` is set, `grs` is updated to be `(*grs + 1) & 0x1f`.
+    - If `grd_inc` is set, `grd` is updated to be `*grd + 1`.
+    - If `grs_inc` is set, `grs` is updated to be `*grs + 1`.
+
+    Any `*grd` or `*grs` value greater than 31 results in an error (setting bit `illegal_insn` in `ERR_BITS`)
   encoding:
     scheme: bnmovr
     mapping:
diff --git a/hw/ip/otbn/dv/otbnsim/sim/insn.py b/hw/ip/otbn/dv/otbnsim/sim/insn.py
index 81a7c36..2d88482 100644
--- a/hw/ip/otbn/dv/otbnsim/sim/insn.py
+++ b/hw/ip/otbn/dv/otbnsim/sim/insn.py
@@ -10,6 +10,7 @@
                   RV32ImmShift, insn_for_mnemonic, logical_byte_shift,
                   extract_quarter_word)
 from .state import OTBNState
+from .err_bits import ILLEGAL_INSN
 
 
 class ADD(RV32RegReg):
@@ -865,17 +866,20 @@
         addr = (grs1_val + self.offset) & ((1 << 32) - 1)
         grd_val = state.gprs.get_reg(self.grd).read_unsigned()
 
-        wrd = grd_val & 0x1f
-        value = state.dmem.load_u256(addr)
-        state.wdrs.get_reg(wrd).write_unsigned(value)
+        if grd_val > 31:
+            state.stop_at_end_of_cycle(ILLEGAL_INSN)
+        else:
+            wrd = grd_val & 0x1f
+            value = state.dmem.load_u256(addr)
+            state.wdrs.get_reg(wrd).write_unsigned(value)
 
-        if self.grd_inc:
-            new_grd_val = (grd_val + 1) & 0x1f
-            state.gprs.get_reg(self.grd).write_unsigned(new_grd_val)
+            if self.grd_inc:
+                new_grd_val = grd_val + 1
+                state.gprs.get_reg(self.grd).write_unsigned(new_grd_val)
 
-        if self.grs1_inc:
-            new_grs1_val = (grs1_val + 32) & ((1 << 32) - 1)
-            state.gprs.get_reg(self.grs1).write_unsigned(new_grs1_val)
+            if self.grs1_inc:
+                new_grs1_val = (grs1_val + 32) & ((1 << 32) - 1)
+                state.gprs.get_reg(self.grs1).write_unsigned(new_grs1_val)
 
 
 class BNSID(OTBNInsn):
@@ -897,18 +901,22 @@
         addr = (grs1_val + self.offset) & ((1 << 32) - 1)
 
         grs2_val = state.gprs.get_reg(self.grs2).read_unsigned()
-        wrs = grs2_val & 0x1f
-        wrs_val = state.wdrs.get_reg(wrs).read_unsigned()
 
-        state.dmem.store_u256(addr, wrs_val)
+        if grs2_val > 31:
+            state.stop_at_end_of_cycle(ILLEGAL_INSN)
+        else:
+            wrs = grs2_val & 0x1f
+            wrs_val = state.wdrs.get_reg(wrs).read_unsigned()
 
-        if self.grs1_inc:
-            new_grs1_val = (grs1_val + 32) & ((1 << 32) - 1)
-            state.gprs.get_reg(self.grs1).write_unsigned(new_grs1_val)
+            state.dmem.store_u256(addr, wrs_val)
 
-        if self.grs2_inc:
-            new_grs2_val = (grs2_val + 1) & 0x1f
-            state.gprs.get_reg(self.grs2).write_unsigned(new_grs2_val)
+            if self.grs1_inc:
+                new_grs1_val = (grs1_val + 32) & ((1 << 32) - 1)
+                state.gprs.get_reg(self.grs1).write_unsigned(new_grs1_val)
+
+            if self.grs2_inc:
+                new_grs2_val = grs2_val + 1
+                state.gprs.get_reg(self.grs2).write_unsigned(new_grs2_val)
 
 
 class BNMOV(OTBNInsn):
@@ -941,19 +949,24 @@
         grd_val = state.gprs.get_reg(self.grd).read_unsigned()
         grs_val = state.gprs.get_reg(self.grs).read_unsigned()
 
-        wrd = grd_val & 0x1f
-        wrs = grs_val & 0x1f
+        if grd_val > 31:
+            state.stop_at_end_of_cycle(ILLEGAL_INSN)
+        elif grs_val > 31:
+            state.stop_at_end_of_cycle(ILLEGAL_INSN)
+        else:
+            wrd = grd_val & 0x1f
+            wrs = grs_val & 0x1f
 
-        value = state.wdrs.get_reg(wrs).read_unsigned()
-        state.wdrs.get_reg(wrd).write_unsigned(value)
+            value = state.wdrs.get_reg(wrs).read_unsigned()
+            state.wdrs.get_reg(wrd).write_unsigned(value)
 
-        if self.grd_inc:
-            new_grd_val = (grd_val + 1) & 0x1f
-            state.gprs.get_reg(self.grd).write_unsigned(new_grd_val)
+            if self.grd_inc:
+                new_grd_val = grd_val + 1
+                state.gprs.get_reg(self.grd).write_unsigned(new_grd_val)
 
-        if self.grs_inc:
-            new_grs_val = (grs_val + 1) & 0x1f
-            state.gprs.get_reg(self.grs).write_unsigned(new_grs_val)
+            if self.grs_inc:
+                new_grs_val = grs_val + 1
+                state.gprs.get_reg(self.grs).write_unsigned(new_grs_val)
 
 
 class BNWSRR(OTBNInsn):
diff --git a/hw/ip/otbn/dv/otbnsim/test/simple/insns/bnlid.exp b/hw/ip/otbn/dv/otbnsim/test/simple/insns/bnlid.exp
index b13ea1b..614f72f 100644
--- a/hw/ip/otbn/dv/otbnsim/test/simple/insns/bnlid.exp
+++ b/hw/ip/otbn/dv/otbnsim/test/simple/insns/bnlid.exp
@@ -4,6 +4,6 @@
 
 w0 = 0x1f1e1d1c1b1a191817161514131211100f0e0d0c0b0a09080706050403020100
 w1 = 0x3f3e3d3c3b3a393837363534333231302f2e2d2c2b2a29282726252423222120
-w2 = 0x5f5e5d5c5b5a595857565554535251504f4e4d4c4b4a49484746454443424140
+w31 = 0x5f5e5d5c5b5a595857565554535251504f4e4d4c4b4a49484746454443424140
 x2 = 2
-x3 = 3
+x3 = 32
diff --git a/hw/ip/otbn/dv/otbnsim/test/simple/insns/bnlid.s b/hw/ip/otbn/dv/otbnsim/test/simple/insns/bnlid.s
index c4556ee..b978254 100644
--- a/hw/ip/otbn/dv/otbnsim/test/simple/insns/bnlid.s
+++ b/hw/ip/otbn/dv/otbnsim/test/simple/insns/bnlid.s
@@ -20,13 +20,19 @@
   bn.lid    x2++, 32(x0)
 
   /*
-    Check wrapping and incrementing on the grd side. Set x3 to 32+2 and load
-    with increment from address 0x40. We should get data from 0x40 in w2 and x3
-    should equal 3
+    Check incrementing on the grd side. Set x3 to 31 and load with increment
+    from address 0x40. We should get data from 0x40 in w31 and x3 should equal
+    32.
   */
-  addi      x3, x0, 34
+  addi      x3, x0, 31
   bn.lid    x3++, 64(x0)
 
+  /*
+    Check error due to <grd> greater than 31. No register writes should occur,
+    in particular x3 must remain 32.
+  */
+  bn.lid    x3++, 128(x0)
+
   ecall
 
 
@@ -64,3 +70,12 @@
   .word 0x57565554
   .word 0x5b5a5958
   .word 0x5f5e5d5c
+  /* Address 0x80: */
+  .word 0x63626160
+  .word 0x67666564
+  .word 0x6b6a6968
+  .word 0x6f6e6d6c
+  .word 0x73727170
+  .word 0x77767574
+  .word 0x7b7a7978
+  .word 0x7f7e7d7c
diff --git a/hw/ip/otbn/rtl/otbn_controller.sv b/hw/ip/otbn/rtl/otbn_controller.sv
index c0465c6..8296ccd 100644
--- a/hw/ip/otbn/rtl/otbn_controller.sv
+++ b/hw/ip/otbn/rtl/otbn_controller.sv
@@ -151,8 +151,8 @@
 
   // Computed increments for indirect register index and memory address in BN.LID/BN.SID/BN.MOVR
   // instructions.
-  logic [4:0]  rf_base_rd_data_a_inc;
-  logic [4:0]  rf_base_rd_data_b_inc;
+  logic [5:0]  rf_base_rd_data_a_inc;
+  logic [5:0]  rf_base_rd_data_b_inc;
   logic [26:0] rf_base_rd_data_a_wlen_word_inc;
 
   // Output of mux taking the above increments as inputs and choosing one to write back to base
@@ -176,6 +176,8 @@
   logic imem_addr_err, loop_err, ispr_err;
   logic dmem_addr_err, dmem_addr_unaligned_base, dmem_addr_unaligned_bignum, dmem_addr_overflow;
 
+  logic rf_a_indirect_err, rf_b_indirect_err, rf_d_indirect_err, rf_indirect_err;
+
   // Stall a cycle on loads to allow load data writeback to happen the following cycle. Stall not
   // required on stores as there is no response to deal with.
   // TODO: Possibility of error response on store? Probably still don't need to stall in that case
@@ -285,7 +287,7 @@
   assign err_bits_o.fatal_reg     = 1'b0;
   assign err_bits_o.fatal_imem    = insn_fetch_err_i;
   assign err_bits_o.fatal_dmem    = lsu_rdata_err_i;
-  assign err_bits_o.illegal_insn  = insn_illegal_i | ispr_err;
+  assign err_bits_o.illegal_insn  = insn_illegal_i | ispr_err | rf_indirect_err;
   assign err_bits_o.bad_data_addr = dmem_addr_err;
   assign err_bits_o.loop          = loop_err;
   assign err_bits_o.call_stack    = rf_base_call_stack_err_i;
@@ -358,13 +360,13 @@
   always_comb begin
     unique case (1'b1)
       insn_dec_bignum_i.a_inc: begin
-        increment_out = {27'b0, rf_base_rd_data_a_inc};
+        increment_out = {26'b0, rf_base_rd_data_a_inc};
       end
       insn_dec_bignum_i.b_inc: begin
-        increment_out = {27'b0, rf_base_rd_data_b_inc};
+        increment_out = {26'b0, rf_base_rd_data_b_inc};
       end
       insn_dec_bignum_i.d_inc: begin
-        increment_out = {27'b0, rf_base_rd_data_b_inc};
+        increment_out = {26'b0, rf_base_rd_data_b_inc};
       end
       insn_dec_bignum_i.a_wlen_word_inc: begin
         increment_out = {rf_base_rd_data_a_wlen_word_inc, 5'b0};
@@ -374,7 +376,7 @@
         // increment selector signals is high. To prevent the automatic inference of latches in
         // case nothing is written back (rf_wdata_sel != RfWdSelIncr) and to save logic, we choose
         // a valid output as default.
-        increment_out = {27'b0, rf_base_rd_data_a_inc};
+        increment_out = {26'b0, rf_base_rd_data_a_inc};
       end
     endcase
   end
@@ -433,7 +435,7 @@
   // only available in the stall state.
   assign rf_base_wr_en_o = insn_valid_i & insn_dec_base_i.rf_we &
       ~(insn_dec_shared_i.ld_insn & (state_q != OtbnStateStall));
-  assign rf_base_wr_commit_o = insn_executing;
+  assign rf_base_wr_commit_o = insn_executing & ~err;
 
   always_comb begin
     unique case (insn_dec_base_i.rf_wdata_sel)
@@ -489,8 +491,8 @@
     // By default write nothing
     rf_bignum_wr_en_o = 2'b00;
 
-    // Only write if executing instruction wants a bignum rf write
-    if (insn_executing && insn_dec_bignum_i.rf_we) begin
+    // Only write if executing instruction wants a bignum rf write and there is no error
+    if (insn_executing && insn_dec_bignum_i.rf_we && !err) begin
       if (insn_dec_bignum_i.mac_en && insn_dec_bignum_i.mac_shift_out) begin
         // Special handling for BN.MULQACC.SO, only enable upper or lower half depending on
         // mac_wr_hw_sel_upper.
@@ -513,6 +515,7 @@
   assign rf_bignum_wr_addr_o = insn_dec_bignum_i.rf_d_indirect ? rf_base_rd_data_b_i[4:0] :
                                                                  insn_dec_bignum_i.d;
 
+
   // For the shift-out variant of BN.MULQACC the bottom half of the MAC result is written to one
   // half of a desintation register specified by the instruction (mac_wr_hw_sel_upper). The bottom
   // half of the MAC result must be placed in the appropriate half of the write data (the RF only
@@ -536,6 +539,13 @@
     endcase
   end
 
+  assign rf_a_indirect_err = insn_dec_bignum_i.rf_a_indirect & (|rf_base_rd_data_a_i[31:5]);
+  assign rf_b_indirect_err = insn_dec_bignum_i.rf_b_indirect & (|rf_base_rd_data_b_i[31:5]);
+  assign rf_d_indirect_err = insn_dec_bignum_i.rf_d_indirect & (|rf_base_rd_data_b_i[31:5]);
+
+  assign rf_indirect_err =
+      insn_valid_i & (rf_a_indirect_err | rf_b_indirect_err | rf_d_indirect_err);
+
   // CSR/WSR/ISPR handling
   // ISPRs (Internal Special Purpose Registers) are the internal registers. CSRs and WSRs are the
   // ISA visible versions of those registers in the base and bignum ISAs respectively.