[otbn] Widen prefetch_loop_end_addr to avoid overflow

There's an amusing bug that you can trigger if you have something like

   loopi  123, 1025

The problem is that 1025 instructions (the loop body length) works out
as 4096 + 4 bytes, so the correct value of prefetch_loop_end_addr is
something like old_addr + 4096 + 4.

Unfortunately, 4096 is the size of IMEM so we were truncating this to
just old_addr + 4. This meant that the prefetch stage thought that the
following instruction was at the end of the loop and predicted a back
edge. Eventually, we failed the NoAddressMismatch assertion in
otbn_instruction_fetch.sv.

The fix is to pass one extra bit in the address, just like we already
do with the check in the loop controller itself.

Signed-off-by: Rupert Swarbrick <rswarbrick@lowrisc.org>
diff --git a/hw/ip/otbn/rtl/otbn_controller.sv b/hw/ip/otbn/rtl/otbn_controller.sv
index dab447d..5e2fd16 100644
--- a/hw/ip/otbn/rtl/otbn_controller.sv
+++ b/hw/ip/otbn/rtl/otbn_controller.sv
@@ -145,7 +145,7 @@
   output logic                     prefetch_en_o,
   output logic                     prefetch_loop_active_o,
   output logic [31:0]              prefetch_loop_iterations_o,
-  output logic [ImemAddrWidth-1:0] prefetch_loop_end_addr_o,
+  output logic [ImemAddrWidth:0]   prefetch_loop_end_addr_o,
   output logic [ImemAddrWidth-1:0] prefetch_loop_jump_addr_o
 );
   otbn_state_e state_q, state_d;
diff --git a/hw/ip/otbn/rtl/otbn_core.sv b/hw/ip/otbn/rtl/otbn_core.sv
index afbff04..0f72f08 100644
--- a/hw/ip/otbn/rtl/otbn_core.sv
+++ b/hw/ip/otbn/rtl/otbn_core.sv
@@ -211,7 +211,7 @@
   logic                     prefetch_en;
   logic                     prefetch_loop_active;
   logic [31:0]              prefetch_loop_iterations;
-  logic [ImemAddrWidth-1:0] prefetch_loop_end_addr;
+  logic [ImemAddrWidth:0]   prefetch_loop_end_addr;
   logic [ImemAddrWidth-1:0] prefetch_loop_jump_addr;
 
   // Start stop control start OTBN execution when requested and deals with any pre start or post
diff --git a/hw/ip/otbn/rtl/otbn_instruction_fetch.sv b/hw/ip/otbn/rtl/otbn_instruction_fetch.sv
index f31ac83..8f05df6 100644
--- a/hw/ip/otbn/rtl/otbn_instruction_fetch.sv
+++ b/hw/ip/otbn/rtl/otbn_instruction_fetch.sv
@@ -40,7 +40,7 @@
   input logic                     prefetch_en_i,
   input logic                     prefetch_loop_active_i,
   input logic [31:0]              prefetch_loop_iterations_i,
-  input logic [ImemAddrWidth-1:0] prefetch_loop_end_addr_i,
+  input logic [ImemAddrWidth:0]   prefetch_loop_end_addr_i,
   input logic [ImemAddrWidth-1:0] prefetch_loop_jump_addr_i
 );
 
@@ -115,7 +115,8 @@
       // timing consistent regardless of taken/not-taken.
       // This also applies to jumps, this avoids the need to calculate the jump address here.
       insn_prefetch = 1'b0;
-    end else if (insn_prefetch_addr == prefetch_loop_end_addr_i && prefetch_loop_active_i &&
+    end else if ({1'b0, insn_prefetch_addr} == prefetch_loop_end_addr_i &&
+                 prefetch_loop_active_i &&
                  prefetch_loop_iterations_i > 32'd1) begin
       // When in a loop prefetch the loop beginning when execution reaches the end.
       imem_addr_o = prefetch_loop_jump_addr_i;
diff --git a/hw/ip/otbn/rtl/otbn_loop_controller.sv b/hw/ip/otbn/rtl/otbn_loop_controller.sv
index e7a099b..855ae92 100644
--- a/hw/ip/otbn/rtl/otbn_loop_controller.sv
+++ b/hw/ip/otbn/rtl/otbn_loop_controller.sv
@@ -27,7 +27,7 @@
 
   output                     prefetch_loop_active_o,
   output [31:0]              prefetch_loop_iterations_o,
-  output [ImemAddrWidth-1:0] prefetch_loop_end_addr_o,
+  output [ImemAddrWidth:0]   prefetch_loop_end_addr_o,
   output [ImemAddrWidth-1:0] prefetch_loop_jump_addr_o,
 
   input jump_or_branch_i,
@@ -211,7 +211,7 @@
   // Forward info about loop state for next cycle to prefetch stage
   assign prefetch_loop_active_o     = loop_active_d;
   assign prefetch_loop_iterations_o = current_loop_d.loop_iterations;
-  assign prefetch_loop_end_addr_o   = current_loop_d.loop_end[ImemAddrWidth-1:0];
+  assign prefetch_loop_end_addr_o   = current_loop_d.loop_end;
   assign prefetch_loop_jump_addr_o  = current_loop_d.loop_start;
 
   `ASSERT(NoLoopStackPushAndPop, !(loop_stack_push && loop_stack_pop))